Add Beatles introduction

2025-11-26 18:08:17 +00:00 · 2019-02-20 18:52:05 +01:00
parent 9937490213
commit 1f5318a357
7 changed files with 1910 additions and 1992 deletions
--- a/lod/01_SPARQL_Introduction.ipynb
+++ b/lod/01_SPARQL_Introduction.ipynb
--- a/lod/SPARQL.ipynb
+++ b/lod/SPARQL.ipynb
--- a/lod/extract_data.py
+++ b/lod/extract_data.py
@@ -1,49 +0,0 @@
 import sys
 from future.standard_library import install_aliases
 install_aliases()
 from urllib import request, parse
 from rdflib import Graph, term
 from lxml import etree
 if len(sys.argv) < 2:
    print('Usage: python {} <URL>'.format(sys.argv[0]))
    print('')
    print('Extract rdfa, microdata and json-ld annotations from a website')
    exit(1)
 url = sys.argv[1]
 g = Graph()
 g.parse(url, format='rdfa')
 g.parse(url, format='microdata')
 def sanitize_triple(t):
    """Function to remove bad URIs from the graph that would otherwise
    make the serialization fail."""
    def sanitize_triple_item(item):
        if isinstance(item, term.URIRef) and '/' not in item:
            return term.URIRef(parse.quote(str(item)))
        return item
    return (sanitize_triple_item(t[0]),
            sanitize_triple_item(t[1]),
            sanitize_triple_item(t[2]))
 with request.urlopen(url) as response:
    # Get all json-ld objects embedded in the html file
    html = response.read().decode('utf-8', errors='ignore')
    parser = etree.XMLParser(recover=True)
    root = etree.fromstring(html, parser=parser)
    if root:
        for jsonld in root.findall(".//script[@type='application/ld+json']"):
            g.parse(data=jsonld.text, publicID=url, format='json-ld')
 fixedgraph = Graph()
 fixedgraph += [sanitize_triple(s) for s in g]
 print(g.serialize(format='turtle').decode('utf-8', errors='ignore'))
--- a/lod/helpers.py
+++ b/lod/helpers.py
@@ -1,12 +1,22 @@
 '''
 Helper functions and ipython magic for the SPARQL exercises.
 The tests in the notebooks rely on the `LAST_QUERY` variable, which is updated by the `%%sparql` magic after every query.
 This variable contains the full query used (`LAST_QUERY["query"]`), the endpoint it was sent to (`LAST_QUERY["endpoint"]`), and a dictionary with the response of the endpoint (`LAST_QUERY["results"]`).
 For convenience, the results are also given as tuples (`LAST_QUERY["tuples"]`), and as a dictionary of of `{column:[values]}` (`LAST_QUERY["columns"]`).
 '''
 from IPython.core.magic import (register_line_magic, register_cell_magic,
                                register_line_cell_magic)
-
+from IPython.display import HTML, display, Image, display_javascript
 from IPython.display import HTML, display, Image
 from urllib.request import Request, urlopen
 from urllib.parse import quote_plus, urlencode
 from urllib.error import HTTPError
 import json
 import sys
 js = "IPython.CodeCell.options_default.highlight_modes['magic_sparql'] = {'reg':[/^%%sparql/]};"
 display_javascript(js, raw=True)
 def send_query(query, endpoint):
@@ -20,7 +30,11 @@ def send_query(query, endpoint):
                headers={'content-type': 'application/x-www-form-urlencoded',
                         'accept': FORMATS},
                method='POST')
-    return json.loads(urlopen(r).read().decode('utf-8'));
+    res = urlopen(r)
    data = res.read().decode('utf-8')
    if res.getcode() == 200:
        return json.loads(data)
    raise Exception('Error getting results: {}'.format(data))
 def tabulate(tuples, header=None):
@@ -39,11 +53,14 @@ def tabulate(tuples, header=None):
 LAST_QUERY = {}
 def solution():
    return LAST_QUERY
 def query(query, endpoint=None, print_table=False):
    global LAST_QUERY
-    endpoint = endpoint or "http://dbpedia.org/sparql"
+    endpoint = endpoint or "http://fuseki.cluster.gsi.dit.upm.es/sitc/"
    results = send_query(query, endpoint)
    tuples = to_table(results)
@@ -80,12 +97,30 @@ def to_table(results):
@register_cell_magic
 def sparql(line, cell):
    '''
    Sparql magic command for ipython. It can be used in a cell like this:
    ```
    %%sparql
    ... Your SPARQL query ...
    ```
    by default, it will use the DBpedia endpoint, but you can use a different endpoint like this:
    ```
    %%sparql http://my-sparql-endpoint...
    ... Your SPARQL query ...
    ```
    '''
    try:
        return query(cell, endpoint=line, print_table=True)
    except HTTPError as ex:
        error_message = ex.read().decode('utf-8')
        print('Error {}. Reason: {}'.format(ex.status, ex.reason))
-        print(error_message)
+        print(error_message, file=sys.stderr)
 def show_photos(values):
--- a/lod/reviews.ttl
+++ b/lod/reviews.ttl
@@ -1,29 +0,0 @@
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix schema: <http://schema.org/> .
 _:Hotel1 a schema:Hotel ;
         schema:description "A fictitious hotel" .
 _:Review1 a schema:Review ;
          schema:reviewBody "This is a great review" ;
          schema:reviewRating [
           a schema:Rating ;
           schema:author <http://jfernando.es/me> ;
           schema:ratingValue "0.7"
          ] ;
          schema:itemReviewed _:Hotel1 .
 _:Review2 a schema:Review ;
          schema:reviewBody "This is a not so great review" ;
          schema:reviewRating [
           a schema:Rating ;
           schema:author [ a schema:Person ;
           schema:givenName "anonymous" ] ;
           schema:ratingValue "0.3"
          ] ;
          schema:itemReviewed _:Hotel1 .
--- a/lod/sparql.py
+++ b/lod/sparql.py
@@ -1,23 +0,0 @@
 # !/bin/env python #
 # Ejemplo de consultas SPARQL sobre turtle #
 # python consultas.py #
 import rdflib
 import sys
 dataset = sys.argv[1] if len(sys.argv) > 1 else 'reviews.ttl'
 g = rdflib.Graph()
 schema = rdflib.Namespace("http://schema.org/")
 # Read Turtle file #
 g.parse(dataset, format='turtle')
 results = g.query(
    """SELECT DISTINCT ?review ?p ?o
       WHERE {
          ?review a schema:Review.
          ?review ?p ?o.
       }""", initNs={'schema': schema})
 for row in results:
    print("%s %s %s" % row)
--- a/lod/validate.py
+++ b/lod/validate.py
@@ -1,6 +0,0 @@
 import rdflib
 import sys
 g = rdflib.Graph()
 dataset = sys.argv[1] if len(sys.argv) > 1 else 'reviews.ttl'
 g.parse(dataset, format="n3")
 print(g.serialize(format="n3").decode('utf-8'))