mirror of
https://github.com/gsi-upm/sitc
synced 2024-11-21 22:12:30 +00:00
Add Beatles introduction
This commit is contained in:
parent
9937490213
commit
1f5318a357
1870
lod/01_SPARQL_Introduction.ipynb
Normal file
1870
lod/01_SPARQL_Introduction.ipynb
Normal file
File diff suppressed because it is too large
Load Diff
1880
lod/SPARQL.ipynb
1880
lod/SPARQL.ipynb
File diff suppressed because it is too large
Load Diff
@ -1,49 +0,0 @@
|
|||||||
|
|
||||||
import sys
|
|
||||||
from future.standard_library import install_aliases
|
|
||||||
install_aliases()
|
|
||||||
|
|
||||||
from urllib import request, parse
|
|
||||||
from rdflib import Graph, term
|
|
||||||
from lxml import etree
|
|
||||||
|
|
||||||
if len(sys.argv) < 2:
|
|
||||||
print('Usage: python {} <URL>'.format(sys.argv[0]))
|
|
||||||
print('')
|
|
||||||
print('Extract rdfa, microdata and json-ld annotations from a website')
|
|
||||||
exit(1)
|
|
||||||
|
|
||||||
url = sys.argv[1]
|
|
||||||
|
|
||||||
g = Graph()
|
|
||||||
g.parse(url, format='rdfa')
|
|
||||||
g.parse(url, format='microdata')
|
|
||||||
|
|
||||||
|
|
||||||
def sanitize_triple(t):
|
|
||||||
"""Function to remove bad URIs from the graph that would otherwise
|
|
||||||
make the serialization fail."""
|
|
||||||
def sanitize_triple_item(item):
|
|
||||||
if isinstance(item, term.URIRef) and '/' not in item:
|
|
||||||
return term.URIRef(parse.quote(str(item)))
|
|
||||||
return item
|
|
||||||
|
|
||||||
return (sanitize_triple_item(t[0]),
|
|
||||||
sanitize_triple_item(t[1]),
|
|
||||||
sanitize_triple_item(t[2]))
|
|
||||||
|
|
||||||
|
|
||||||
with request.urlopen(url) as response:
|
|
||||||
# Get all json-ld objects embedded in the html file
|
|
||||||
html = response.read().decode('utf-8', errors='ignore')
|
|
||||||
parser = etree.XMLParser(recover=True)
|
|
||||||
root = etree.fromstring(html, parser=parser)
|
|
||||||
if root:
|
|
||||||
for jsonld in root.findall(".//script[@type='application/ld+json']"):
|
|
||||||
g.parse(data=jsonld.text, publicID=url, format='json-ld')
|
|
||||||
|
|
||||||
|
|
||||||
fixedgraph = Graph()
|
|
||||||
fixedgraph += [sanitize_triple(s) for s in g]
|
|
||||||
|
|
||||||
print(g.serialize(format='turtle').decode('utf-8', errors='ignore'))
|
|
@ -1,12 +1,22 @@
|
|||||||
|
'''
|
||||||
|
Helper functions and ipython magic for the SPARQL exercises.
|
||||||
|
|
||||||
|
The tests in the notebooks rely on the `LAST_QUERY` variable, which is updated by the `%%sparql` magic after every query.
|
||||||
|
This variable contains the full query used (`LAST_QUERY["query"]`), the endpoint it was sent to (`LAST_QUERY["endpoint"]`), and a dictionary with the response of the endpoint (`LAST_QUERY["results"]`).
|
||||||
|
For convenience, the results are also given as tuples (`LAST_QUERY["tuples"]`), and as a dictionary of of `{column:[values]}` (`LAST_QUERY["columns"]`).
|
||||||
|
'''
|
||||||
from IPython.core.magic import (register_line_magic, register_cell_magic,
|
from IPython.core.magic import (register_line_magic, register_cell_magic,
|
||||||
register_line_cell_magic)
|
register_line_cell_magic)
|
||||||
|
from IPython.display import HTML, display, Image, display_javascript
|
||||||
from IPython.display import HTML, display, Image
|
|
||||||
from urllib.request import Request, urlopen
|
from urllib.request import Request, urlopen
|
||||||
from urllib.parse import quote_plus, urlencode
|
from urllib.parse import quote_plus, urlencode
|
||||||
from urllib.error import HTTPError
|
from urllib.error import HTTPError
|
||||||
|
|
||||||
import json
|
import json
|
||||||
|
import sys
|
||||||
|
|
||||||
|
js = "IPython.CodeCell.options_default.highlight_modes['magic_sparql'] = {'reg':[/^%%sparql/]};"
|
||||||
|
display_javascript(js, raw=True)
|
||||||
|
|
||||||
|
|
||||||
def send_query(query, endpoint):
|
def send_query(query, endpoint):
|
||||||
@ -20,7 +30,11 @@ def send_query(query, endpoint):
|
|||||||
headers={'content-type': 'application/x-www-form-urlencoded',
|
headers={'content-type': 'application/x-www-form-urlencoded',
|
||||||
'accept': FORMATS},
|
'accept': FORMATS},
|
||||||
method='POST')
|
method='POST')
|
||||||
return json.loads(urlopen(r).read().decode('utf-8'));
|
res = urlopen(r)
|
||||||
|
data = res.read().decode('utf-8')
|
||||||
|
if res.getcode() == 200:
|
||||||
|
return json.loads(data)
|
||||||
|
raise Exception('Error getting results: {}'.format(data))
|
||||||
|
|
||||||
|
|
||||||
def tabulate(tuples, header=None):
|
def tabulate(tuples, header=None):
|
||||||
@ -39,11 +53,14 @@ def tabulate(tuples, header=None):
|
|||||||
|
|
||||||
LAST_QUERY = {}
|
LAST_QUERY = {}
|
||||||
|
|
||||||
|
def solution():
|
||||||
|
return LAST_QUERY
|
||||||
|
|
||||||
|
|
||||||
def query(query, endpoint=None, print_table=False):
|
def query(query, endpoint=None, print_table=False):
|
||||||
global LAST_QUERY
|
global LAST_QUERY
|
||||||
|
|
||||||
endpoint = endpoint or "http://dbpedia.org/sparql"
|
endpoint = endpoint or "http://fuseki.cluster.gsi.dit.upm.es/sitc/"
|
||||||
results = send_query(query, endpoint)
|
results = send_query(query, endpoint)
|
||||||
tuples = to_table(results)
|
tuples = to_table(results)
|
||||||
|
|
||||||
@ -80,12 +97,30 @@ def to_table(results):
|
|||||||
|
|
||||||
@register_cell_magic
|
@register_cell_magic
|
||||||
def sparql(line, cell):
|
def sparql(line, cell):
|
||||||
|
'''
|
||||||
|
Sparql magic command for ipython. It can be used in a cell like this:
|
||||||
|
|
||||||
|
```
|
||||||
|
%%sparql
|
||||||
|
|
||||||
|
... Your SPARQL query ...
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
by default, it will use the DBpedia endpoint, but you can use a different endpoint like this:
|
||||||
|
|
||||||
|
```
|
||||||
|
%%sparql http://my-sparql-endpoint...
|
||||||
|
|
||||||
|
... Your SPARQL query ...
|
||||||
|
```
|
||||||
|
'''
|
||||||
try:
|
try:
|
||||||
return query(cell, endpoint=line, print_table=True)
|
return query(cell, endpoint=line, print_table=True)
|
||||||
except HTTPError as ex:
|
except HTTPError as ex:
|
||||||
error_message = ex.read().decode('utf-8')
|
error_message = ex.read().decode('utf-8')
|
||||||
print('Error {}. Reason: {}'.format(ex.status, ex.reason))
|
print('Error {}. Reason: {}'.format(ex.status, ex.reason))
|
||||||
print(error_message)
|
print(error_message, file=sys.stderr)
|
||||||
|
|
||||||
|
|
||||||
def show_photos(values):
|
def show_photos(values):
|
||||||
|
@ -1,29 +0,0 @@
|
|||||||
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
|
|
||||||
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
|
|
||||||
@prefix schema: <http://schema.org/> .
|
|
||||||
|
|
||||||
|
|
||||||
_:Hotel1 a schema:Hotel ;
|
|
||||||
schema:description "A fictitious hotel" .
|
|
||||||
|
|
||||||
|
|
||||||
_:Review1 a schema:Review ;
|
|
||||||
schema:reviewBody "This is a great review" ;
|
|
||||||
schema:reviewRating [
|
|
||||||
a schema:Rating ;
|
|
||||||
schema:author <http://jfernando.es/me> ;
|
|
||||||
schema:ratingValue "0.7"
|
|
||||||
|
|
||||||
] ;
|
|
||||||
schema:itemReviewed _:Hotel1 .
|
|
||||||
|
|
||||||
|
|
||||||
_:Review2 a schema:Review ;
|
|
||||||
schema:reviewBody "This is a not so great review" ;
|
|
||||||
schema:reviewRating [
|
|
||||||
a schema:Rating ;
|
|
||||||
schema:author [ a schema:Person ;
|
|
||||||
schema:givenName "anonymous" ] ;
|
|
||||||
schema:ratingValue "0.3"
|
|
||||||
] ;
|
|
||||||
schema:itemReviewed _:Hotel1 .
|
|
@ -1,23 +0,0 @@
|
|||||||
# !/bin/env python #
|
|
||||||
# Ejemplo de consultas SPARQL sobre turtle #
|
|
||||||
# python consultas.py #
|
|
||||||
import rdflib
|
|
||||||
import sys
|
|
||||||
|
|
||||||
dataset = sys.argv[1] if len(sys.argv) > 1 else 'reviews.ttl'
|
|
||||||
g = rdflib.Graph()
|
|
||||||
|
|
||||||
schema = rdflib.Namespace("http://schema.org/")
|
|
||||||
|
|
||||||
# Read Turtle file #
|
|
||||||
g.parse(dataset, format='turtle')
|
|
||||||
|
|
||||||
results = g.query(
|
|
||||||
"""SELECT DISTINCT ?review ?p ?o
|
|
||||||
WHERE {
|
|
||||||
?review a schema:Review.
|
|
||||||
?review ?p ?o.
|
|
||||||
}""", initNs={'schema': schema})
|
|
||||||
|
|
||||||
for row in results:
|
|
||||||
print("%s %s %s" % row)
|
|
@ -1,6 +0,0 @@
|
|||||||
import rdflib
|
|
||||||
import sys
|
|
||||||
g = rdflib.Graph()
|
|
||||||
dataset = sys.argv[1] if len(sys.argv) > 1 else 'reviews.ttl'
|
|
||||||
g.parse(dataset, format="n3")
|
|
||||||
print(g.serialize(format="n3").decode('utf-8'))
|
|
Loading…
Reference in New Issue
Block a user