mirror of
				https://github.com/gsi-upm/sitc
				synced 2025-10-30 23:18:18 +00:00 
			
		
		
		
	Add Beatles introduction
This commit is contained in:
		
							
								
								
									
										1870
									
								
								lod/01_SPARQL_Introduction.ipynb
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1870
									
								
								lod/01_SPARQL_Introduction.ipynb
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										1880
									
								
								lod/SPARQL.ipynb
									
									
									
									
									
								
							
							
						
						
									
										1880
									
								
								lod/SPARQL.ipynb
									
									
									
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @@ -1,49 +0,0 @@ | ||||
|  | ||||
| import sys | ||||
| from future.standard_library import install_aliases | ||||
| install_aliases() | ||||
|  | ||||
| from urllib import request, parse | ||||
| from rdflib import Graph, term | ||||
| from lxml import etree | ||||
|  | ||||
| if len(sys.argv) < 2: | ||||
|     print('Usage: python {} <URL>'.format(sys.argv[0])) | ||||
|     print('') | ||||
|     print('Extract rdfa, microdata and json-ld annotations from a website') | ||||
|     exit(1) | ||||
|  | ||||
| url = sys.argv[1] | ||||
|  | ||||
| g = Graph() | ||||
| g.parse(url, format='rdfa') | ||||
| g.parse(url, format='microdata') | ||||
|  | ||||
|  | ||||
| def sanitize_triple(t): | ||||
|     """Function to remove bad URIs from the graph that would otherwise | ||||
|     make the serialization fail.""" | ||||
|     def sanitize_triple_item(item): | ||||
|         if isinstance(item, term.URIRef) and '/' not in item: | ||||
|             return term.URIRef(parse.quote(str(item))) | ||||
|         return item | ||||
|  | ||||
|     return (sanitize_triple_item(t[0]), | ||||
|             sanitize_triple_item(t[1]), | ||||
|             sanitize_triple_item(t[2])) | ||||
|  | ||||
|  | ||||
| with request.urlopen(url) as response: | ||||
|     # Get all json-ld objects embedded in the html file | ||||
|     html = response.read().decode('utf-8', errors='ignore') | ||||
|     parser = etree.XMLParser(recover=True) | ||||
|     root = etree.fromstring(html, parser=parser) | ||||
|     if root: | ||||
|         for jsonld in root.findall(".//script[@type='application/ld+json']"): | ||||
|             g.parse(data=jsonld.text, publicID=url, format='json-ld') | ||||
|  | ||||
|  | ||||
| fixedgraph = Graph() | ||||
| fixedgraph += [sanitize_triple(s) for s in g] | ||||
|  | ||||
| print(g.serialize(format='turtle').decode('utf-8', errors='ignore')) | ||||
| @@ -1,12 +1,22 @@ | ||||
| ''' | ||||
| Helper functions and ipython magic for the SPARQL exercises. | ||||
|  | ||||
| The tests in the notebooks rely on the `LAST_QUERY` variable, which is updated by the `%%sparql` magic after every query. | ||||
| This variable contains the full query used (`LAST_QUERY["query"]`), the endpoint it was sent to (`LAST_QUERY["endpoint"]`), and a dictionary with the response of the endpoint (`LAST_QUERY["results"]`). | ||||
| For convenience, the results are also given as tuples (`LAST_QUERY["tuples"]`), and as a dictionary of of `{column:[values]}` (`LAST_QUERY["columns"]`). | ||||
| ''' | ||||
| from IPython.core.magic import (register_line_magic, register_cell_magic, | ||||
|                                 register_line_cell_magic) | ||||
|  | ||||
| from IPython.display import HTML, display, Image | ||||
| from IPython.display import HTML, display, Image, display_javascript | ||||
| from urllib.request import Request, urlopen | ||||
| from urllib.parse import quote_plus, urlencode | ||||
| from urllib.error import HTTPError | ||||
|  | ||||
| import json | ||||
| import sys | ||||
|  | ||||
| js = "IPython.CodeCell.options_default.highlight_modes['magic_sparql'] = {'reg':[/^%%sparql/]};" | ||||
| display_javascript(js, raw=True) | ||||
|  | ||||
|  | ||||
| def send_query(query, endpoint): | ||||
| @@ -20,7 +30,11 @@ def send_query(query, endpoint): | ||||
|                 headers={'content-type': 'application/x-www-form-urlencoded', | ||||
|                          'accept': FORMATS}, | ||||
|                 method='POST') | ||||
|     return json.loads(urlopen(r).read().decode('utf-8')); | ||||
|     res = urlopen(r) | ||||
|     data = res.read().decode('utf-8') | ||||
|     if res.getcode() == 200: | ||||
|         return json.loads(data) | ||||
|     raise Exception('Error getting results: {}'.format(data)) | ||||
|  | ||||
|  | ||||
| def tabulate(tuples, header=None): | ||||
| @@ -39,11 +53,14 @@ def tabulate(tuples, header=None): | ||||
|  | ||||
| LAST_QUERY = {} | ||||
|  | ||||
| def solution(): | ||||
|     return LAST_QUERY | ||||
|  | ||||
|  | ||||
| def query(query, endpoint=None, print_table=False): | ||||
|     global LAST_QUERY | ||||
|  | ||||
|     endpoint = endpoint or "http://dbpedia.org/sparql" | ||||
|     endpoint = endpoint or "http://fuseki.cluster.gsi.dit.upm.es/sitc/" | ||||
|     results = send_query(query, endpoint) | ||||
|     tuples = to_table(results) | ||||
|  | ||||
| @@ -80,12 +97,30 @@ def to_table(results): | ||||
|  | ||||
| @register_cell_magic | ||||
| def sparql(line, cell): | ||||
|     ''' | ||||
|     Sparql magic command for ipython. It can be used in a cell like this: | ||||
|      | ||||
|     ``` | ||||
|     %%sparql | ||||
|      | ||||
|     ... Your SPARQL query ... | ||||
|      | ||||
|     ``` | ||||
|      | ||||
|     by default, it will use the DBpedia endpoint, but you can use a different endpoint like this: | ||||
|      | ||||
|     ``` | ||||
|     %%sparql http://my-sparql-endpoint... | ||||
|      | ||||
|     ... Your SPARQL query ... | ||||
|     ``` | ||||
|     ''' | ||||
|     try: | ||||
|         return query(cell, endpoint=line, print_table=True) | ||||
|     except HTTPError as ex: | ||||
|         error_message = ex.read().decode('utf-8') | ||||
|         print('Error {}. Reason: {}'.format(ex.status, ex.reason)) | ||||
|         print(error_message) | ||||
|         print(error_message, file=sys.stderr) | ||||
|  | ||||
|  | ||||
| def show_photos(values): | ||||
|   | ||||
| @@ -1,29 +0,0 @@ | ||||
| @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> . | ||||
| @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> . | ||||
| @prefix schema: <http://schema.org/> . | ||||
|  | ||||
|  | ||||
| _:Hotel1 a schema:Hotel ; | ||||
|          schema:description "A fictitious hotel" . | ||||
|  | ||||
|  | ||||
| _:Review1 a schema:Review ; | ||||
|           schema:reviewBody "This is a great review" ; | ||||
|           schema:reviewRating [ | ||||
|            a schema:Rating ; | ||||
|            schema:author <http://jfernando.es/me> ; | ||||
|            schema:ratingValue "0.7" | ||||
|             | ||||
|           ] ; | ||||
|           schema:itemReviewed _:Hotel1 . | ||||
|  | ||||
|  | ||||
| _:Review2 a schema:Review ; | ||||
|           schema:reviewBody "This is a not so great review" ; | ||||
|           schema:reviewRating [ | ||||
|            a schema:Rating ; | ||||
|            schema:author [ a schema:Person ; | ||||
|            schema:givenName "anonymous" ] ; | ||||
|            schema:ratingValue "0.3" | ||||
|           ] ; | ||||
|           schema:itemReviewed _:Hotel1 . | ||||
| @@ -1,23 +0,0 @@ | ||||
| # !/bin/env python # | ||||
| # Ejemplo de consultas SPARQL sobre turtle # | ||||
| # python consultas.py # | ||||
| import rdflib | ||||
| import sys | ||||
|  | ||||
| dataset = sys.argv[1] if len(sys.argv) > 1 else 'reviews.ttl' | ||||
| g = rdflib.Graph() | ||||
|  | ||||
| schema = rdflib.Namespace("http://schema.org/") | ||||
|  | ||||
| # Read Turtle file # | ||||
| g.parse(dataset, format='turtle') | ||||
|  | ||||
| results = g.query( | ||||
|     """SELECT DISTINCT ?review ?p ?o | ||||
|        WHERE { | ||||
|           ?review a schema:Review. | ||||
|           ?review ?p ?o. | ||||
|        }""", initNs={'schema': schema}) | ||||
|  | ||||
| for row in results: | ||||
|     print("%s %s %s" % row) | ||||
| @@ -1,6 +0,0 @@ | ||||
| import rdflib | ||||
| import sys | ||||
| g = rdflib.Graph() | ||||
| dataset = sys.argv[1] if len(sys.argv) > 1 else 'reviews.ttl' | ||||
| g.parse(dataset, format="n3") | ||||
| print(g.serialize(format="n3").decode('utf-8')) | ||||
		Reference in New Issue
	
	Block a user