mirror of
				https://github.com/gsi-upm/sitc
				synced 2025-10-30 15:08:19 +00:00 
			
		
		
		
	Compare commits
	
		
			3 Commits
		
	
	
		
			9937490213
			...
			a4f8f69b19
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | a4f8f69b19 | ||
|  | fc07718ae8 | ||
|  | 1f5318a357 | 
							
								
								
									
										1870
									
								
								lod/01_SPARQL_Introduction.ipynb
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1870
									
								
								lod/01_SPARQL_Introduction.ipynb
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										459
									
								
								lod/02_SPARQL_Custom_Endpoint.ipynb
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										459
									
								
								lod/02_SPARQL_Custom_Endpoint.ipynb
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,459 @@ | |||||||
|  | { | ||||||
|  |  "cells": [ | ||||||
|  |   { | ||||||
|  |    "cell_type": "markdown", | ||||||
|  |    "metadata": { | ||||||
|  |     "deletable": false, | ||||||
|  |     "editable": false, | ||||||
|  |     "nbgrader": { | ||||||
|  |      "checksum": "7276f055a8c504d3c80098c62ed41a4f", | ||||||
|  |      "grade": false, | ||||||
|  |      "grade_id": "cell-0bfe38f97f6ab2d2", | ||||||
|  |      "locked": true, | ||||||
|  |      "schema_version": 1, | ||||||
|  |      "solution": false | ||||||
|  |     } | ||||||
|  |    }, | ||||||
|  |    "source": [ | ||||||
|  |     "<header style=\"width:100%;position:relative\">\n", | ||||||
|  |     "  <div style=\"width:80%;float:right;\">\n", | ||||||
|  |     "    <h1>Course Notes for Learning Intelligent Systems</h1>\n", | ||||||
|  |     "    <h3>Department of Telematic Engineering Systems</h3>\n", | ||||||
|  |     "    <h5>Universidad Politécnica de Madrid</h5>\n", | ||||||
|  |     "  </div>\n", | ||||||
|  |     "        <img style=\"width:15%;\" src=\"../logo.jpg\" alt=\"UPM\" />\n", | ||||||
|  |     "</header>" | ||||||
|  |    ] | ||||||
|  |   }, | ||||||
|  |   { | ||||||
|  |    "cell_type": "markdown", | ||||||
|  |    "metadata": { | ||||||
|  |     "deletable": false, | ||||||
|  |     "editable": false, | ||||||
|  |     "nbgrader": { | ||||||
|  |      "checksum": "42642609861283bc33914d16750b7efa", | ||||||
|  |      "grade": false, | ||||||
|  |      "grade_id": "cell-0cd673883ee592d1", | ||||||
|  |      "locked": true, | ||||||
|  |      "schema_version": 1, | ||||||
|  |      "solution": false | ||||||
|  |     } | ||||||
|  |    }, | ||||||
|  |    "source": [ | ||||||
|  |     "## Introduction\n", | ||||||
|  |     "\n", | ||||||
|  |     "In the previous notebook, we learnt how to use SPARQL by querying DBpedia.\n", | ||||||
|  |     "\n", | ||||||
|  |     "In this notebook, we will use SPARQL on manually annotated data. The data was collected as part of a [previous exercise](../lod/).\n", | ||||||
|  |     "\n", | ||||||
|  |     "The goal is to try SPARQL with data annotated by users with limited knowledge of vocabularies and semantics, and to compare the experience with similar queries to a more structured dataset.\n", | ||||||
|  |     "\n", | ||||||
|  |     "Hence, there are two parts.\n", | ||||||
|  |     "First, you will query a set of graphs annotated by students of this course.\n", | ||||||
|  |     "Then, you will query a synthetic dataset that contains similar information." | ||||||
|  |    ] | ||||||
|  |   }, | ||||||
|  |   { | ||||||
|  |    "cell_type": "markdown", | ||||||
|  |    "metadata": { | ||||||
|  |     "deletable": false, | ||||||
|  |     "editable": false, | ||||||
|  |     "nbgrader": { | ||||||
|  |      "checksum": "a3ecb4b300a5ab82376a4a8cb01f7e6b", | ||||||
|  |      "grade": false, | ||||||
|  |      "grade_id": "cell-10264483046abcc4", | ||||||
|  |      "locked": true, | ||||||
|  |      "schema_version": 1, | ||||||
|  |      "solution": false | ||||||
|  |     } | ||||||
|  |    }, | ||||||
|  |    "source": [ | ||||||
|  |     "## Objectives\n", | ||||||
|  |     "\n", | ||||||
|  |     "* Experiencing the usefulness of the Linked Open Data initiative by querying data from different RDF graphs and endpoints\n", | ||||||
|  |     "* Understanding the challenges in querying multiple sources, with different annotators.\n" | ||||||
|  |    ] | ||||||
|  |   }, | ||||||
|  |   { | ||||||
|  |    "cell_type": "markdown", | ||||||
|  |    "metadata": { | ||||||
|  |     "deletable": false, | ||||||
|  |     "editable": false, | ||||||
|  |     "nbgrader": { | ||||||
|  |      "checksum": "2fedf0d73fc90104d1ab72c3413dfc83", | ||||||
|  |      "grade": false, | ||||||
|  |      "grade_id": "cell-4f8492996e74bf20", | ||||||
|  |      "locked": true, | ||||||
|  |      "schema_version": 1, | ||||||
|  |      "solution": false | ||||||
|  |     } | ||||||
|  |    }, | ||||||
|  |    "source": [ | ||||||
|  |     "## Tools\n", | ||||||
|  |     "\n", | ||||||
|  |     "See [the SPARQL notebook](./01_SPARQL_Introduction.ipynb#Tools)" | ||||||
|  |    ] | ||||||
|  |   }, | ||||||
|  |   { | ||||||
|  |    "cell_type": "markdown", | ||||||
|  |    "metadata": { | ||||||
|  |     "deletable": false, | ||||||
|  |     "editable": false, | ||||||
|  |     "nbgrader": { | ||||||
|  |      "checksum": "c5f8646518bd832a47d71f9d3218237a", | ||||||
|  |      "grade": false, | ||||||
|  |      "grade_id": "cell-eb13908482825e42", | ||||||
|  |      "locked": true, | ||||||
|  |      "schema_version": 1, | ||||||
|  |      "solution": false | ||||||
|  |     } | ||||||
|  |    }, | ||||||
|  |    "source": [ | ||||||
|  |     "Run this line to enable the `%%sparql` magic command." | ||||||
|  |    ] | ||||||
|  |   }, | ||||||
|  |   { | ||||||
|  |    "cell_type": "code", | ||||||
|  |    "execution_count": null, | ||||||
|  |    "metadata": {}, | ||||||
|  |    "outputs": [], | ||||||
|  |    "source": [ | ||||||
|  |     "from helpers import *" | ||||||
|  |    ] | ||||||
|  |   }, | ||||||
|  |   { | ||||||
|  |    "cell_type": "markdown", | ||||||
|  |    "metadata": {}, | ||||||
|  |    "source": [ | ||||||
|  |     "## Exercises\n" | ||||||
|  |    ] | ||||||
|  |   }, | ||||||
|  |   { | ||||||
|  |    "cell_type": "markdown", | ||||||
|  |    "metadata": {}, | ||||||
|  |    "source": [ | ||||||
|  |     "Querying the manually annotated dataset will be slightly different from querying DBpedia.\n", | ||||||
|  |     "The main difference is that this dataset uses different graphs to separate the annotations from different students.\n", | ||||||
|  |     "\n", | ||||||
|  |     "**Each graph is a separate set of triples**.\n", | ||||||
|  |     "For this exercise, you could think of graphs as individual endpoints.\n", | ||||||
|  |     "\n", | ||||||
|  |     "\n", | ||||||
|  |     "First, let us get a list of graphs available:" | ||||||
|  |    ] | ||||||
|  |   }, | ||||||
|  |   { | ||||||
|  |    "cell_type": "code", | ||||||
|  |    "execution_count": null, | ||||||
|  |    "metadata": {}, | ||||||
|  |    "outputs": [], | ||||||
|  |    "source": [ | ||||||
|  |     "%%sparql http://fuseki.cluster.gsi.dit.upm.es/hotels\n", | ||||||
|  |     "    \n", | ||||||
|  |     "SELECT ?g (COUNT(?s) as ?count) WHERE {\n", | ||||||
|  |     "    GRAPH ?g {\n", | ||||||
|  |     "        ?s ?p ?o\n", | ||||||
|  |     "    }\n", | ||||||
|  |     "}\n", | ||||||
|  |     "GROUP BY ?g\n", | ||||||
|  |     "ORDER BY desc(?count)" | ||||||
|  |    ] | ||||||
|  |   }, | ||||||
|  |   { | ||||||
|  |    "cell_type": "code", | ||||||
|  |    "execution_count": null, | ||||||
|  |    "metadata": {}, | ||||||
|  |    "outputs": [], | ||||||
|  |    "source": [ | ||||||
|  |     "You should see many graphs, with different triple counts.\n", | ||||||
|  |     "\n", | ||||||
|  |     "The biggest one should be http://fuseki.cluster.gsi.dit.upm.es/synthetic" | ||||||
|  |    ] | ||||||
|  |   }, | ||||||
|  |   { | ||||||
|  |    "cell_type": "markdown", | ||||||
|  |    "metadata": {}, | ||||||
|  |    "source": [ | ||||||
|  |     "Once you have this list, you can query specific graphs like so:" | ||||||
|  |    ] | ||||||
|  |   }, | ||||||
|  |   { | ||||||
|  |    "cell_type": "code", | ||||||
|  |    "execution_count": null, | ||||||
|  |    "metadata": {}, | ||||||
|  |    "outputs": [], | ||||||
|  |    "source": [ | ||||||
|  |     "%%sparql http://fuseki.cluster.gsi.dit.upm.es/hotels\n", | ||||||
|  |     "    \n", | ||||||
|  |     "SELECT *\n", | ||||||
|  |     "WHERE {\n", | ||||||
|  |     "    GRAPH <http://fuseki.cluster.gsi.dit.upm.es/synthetic>{\n", | ||||||
|  |     "    ?s ?p ?o .\n", | ||||||
|  |     "    }\n", | ||||||
|  |     "}\n", | ||||||
|  |     "LIMIT 10" | ||||||
|  |    ] | ||||||
|  |   }, | ||||||
|  |   { | ||||||
|  |    "cell_type": "markdown", | ||||||
|  |    "metadata": {}, | ||||||
|  |    "source": [ | ||||||
|  |     "There are two exercises in this notebook.\n", | ||||||
|  |     "\n", | ||||||
|  |     "In each of them, you are asked to run five queries, to answer the following questions:\n", | ||||||
|  |     "\n", | ||||||
|  |     "* Number of hotels (or entities) with reviews\n", | ||||||
|  |     "* Number of reviews\n", | ||||||
|  |     "* The hotel with the lowest average score\n", | ||||||
|  |     "* The hotel with the highest average score\n", | ||||||
|  |     "* A list of hotels with their addresses and telephone numbers" | ||||||
|  |    ] | ||||||
|  |   }, | ||||||
|  |   { | ||||||
|  |    "cell_type": "markdown", | ||||||
|  |    "metadata": {}, | ||||||
|  |    "source": [ | ||||||
|  |     "### Manually annotated data" | ||||||
|  |    ] | ||||||
|  |   }, | ||||||
|  |   { | ||||||
|  |    "cell_type": "markdown", | ||||||
|  |    "metadata": {}, | ||||||
|  |    "source": [ | ||||||
|  |     "Your task is to design five queries to answer the questions in the description, and run each of them in at least three graphs, other than the `synthetic` graph.\n", | ||||||
|  |     "\n", | ||||||
|  |     "To design the queries, you can either use what you know about the schema.org vocabularies, or explore subjects, predicates and objects in each of the graphs.\n", | ||||||
|  |     "\n", | ||||||
|  |     "You will get a better understanding if you follow the exploratory path.\n", | ||||||
|  |     "\n", | ||||||
|  |     "Here's an example to get the entities and their types in a graph:" | ||||||
|  |    ] | ||||||
|  |   }, | ||||||
|  |   { | ||||||
|  |    "cell_type": "code", | ||||||
|  |    "execution_count": null, | ||||||
|  |    "metadata": {}, | ||||||
|  |    "outputs": [], | ||||||
|  |    "source": [ | ||||||
|  |     "%%sparql http://fuseki.cluster.gsi.dit.upm.es/hotels\n", | ||||||
|  |     "\n", | ||||||
|  |     "PREFIX schema: <http://schema.org/>\n", | ||||||
|  |     "    \n", | ||||||
|  |     "SELECT ?s ?o\n", | ||||||
|  |     "WHERE {\n", | ||||||
|  |     "    GRAPH <http://fuseki.cluster.gsi.dit.upm.es/35c20a49f8c6581be1cf7bd56d12d131>{\n", | ||||||
|  |     "        ?s a ?o .\n", | ||||||
|  |     "    }\n", | ||||||
|  |     "\n", | ||||||
|  |     "}\n", | ||||||
|  |     "LIMIT 40" | ||||||
|  |    ] | ||||||
|  |   }, | ||||||
|  |   { | ||||||
|  |    "cell_type": "markdown", | ||||||
|  |    "metadata": {}, | ||||||
|  |    "source": [ | ||||||
|  |     "### Synthetic dataset\n", | ||||||
|  |     "\n", | ||||||
|  |     "Now, run the same queries in the synthetic dataset.\n", | ||||||
|  |     "\n", | ||||||
|  |     "The query below should get you started:" | ||||||
|  |    ] | ||||||
|  |   }, | ||||||
|  |   { | ||||||
|  |    "cell_type": "code", | ||||||
|  |    "execution_count": null, | ||||||
|  |    "metadata": {}, | ||||||
|  |    "outputs": [], | ||||||
|  |    "source": [ | ||||||
|  |     "%%sparql http://fuseki.cluster.gsi.dit.upm.es/hotels\n", | ||||||
|  |     "    \n", | ||||||
|  |     "SELECT *\n", | ||||||
|  |     "WHERE {\n", | ||||||
|  |     "    GRAPH <http://fuseki.cluster.gsi.dit.upm.es/synthetic>{\n", | ||||||
|  |     "    ?s ?p ?o .\n", | ||||||
|  |     "    }\n", | ||||||
|  |     "}\n", | ||||||
|  |     "LIMIT 10" | ||||||
|  |    ] | ||||||
|  |   }, | ||||||
|  |   { | ||||||
|  |    "cell_type": "markdown", | ||||||
|  |    "metadata": {}, | ||||||
|  |    "source": [ | ||||||
|  |     "### Optional exercise\n", | ||||||
|  |     "\n", | ||||||
|  |     "\n", | ||||||
|  |     "Explore the graphs and find the most typical mistakes (e.g. using `http://schema.org/Hotel/Hotel`).\n", | ||||||
|  |     "\n", | ||||||
|  |     "Tip: You can use normal SPARQL queries with `BOUND` and `REGEX` to check if the annotations are correct.\n", | ||||||
|  |     "\n", | ||||||
|  |     "You can also query all the graphs at the same time. e.g. to get all types used:" | ||||||
|  |    ] | ||||||
|  |   }, | ||||||
|  |   { | ||||||
|  |    "cell_type": "code", | ||||||
|  |    "execution_count": null, | ||||||
|  |    "metadata": {}, | ||||||
|  |    "outputs": [], | ||||||
|  |    "source": [ | ||||||
|  |     "%%sparql http://fuseki.cluster.gsi.dit.upm.es/hotels\n", | ||||||
|  |     "\n", | ||||||
|  |     "PREFIX schema: <http://schema.org/>\n", | ||||||
|  |     "    \n", | ||||||
|  |     "SELECT DISTINCT ?o\n", | ||||||
|  |     "WHERE {\n", | ||||||
|  |     "    GRAPH ?g {\n", | ||||||
|  |     "        ?s a ?o .\n", | ||||||
|  |     "    }\n", | ||||||
|  |     "    {\n", | ||||||
|  |     "        SELECT ?g\n", | ||||||
|  |     "        WHERE {\n", | ||||||
|  |     "           GRAPH ?g {}\n", | ||||||
|  |     "           FILTER (str(?g) != 'http://fuseki.cluster.gsi.dit.upm.es/synthetic')\n", | ||||||
|  |     "        }\n", | ||||||
|  |     "    }\n", | ||||||
|  |     "\n", | ||||||
|  |     "\n", | ||||||
|  |     "}\n", | ||||||
|  |     "LIMIT 50" | ||||||
|  |    ] | ||||||
|  |   }, | ||||||
|  |   { | ||||||
|  |    "cell_type": "markdown", | ||||||
|  |    "metadata": {}, | ||||||
|  |    "source": [ | ||||||
|  |     "### Discussion\n", | ||||||
|  |     "\n", | ||||||
|  |     "Compare the results of the synthetic and the manual dataset, and answer these questions:" | ||||||
|  |    ] | ||||||
|  |   }, | ||||||
|  |   { | ||||||
|  |    "cell_type": "markdown", | ||||||
|  |    "metadata": {}, | ||||||
|  |    "source": [ | ||||||
|  |     "Both datasets should use the same schema. Are there any differences when it comes to using them?" | ||||||
|  |    ] | ||||||
|  |   }, | ||||||
|  |   { | ||||||
|  |    "cell_type": "code", | ||||||
|  |    "execution_count": null, | ||||||
|  |    "metadata": { | ||||||
|  |     "deletable": false, | ||||||
|  |     "nbgrader": { | ||||||
|  |      "checksum": "860c3977cd06736f1342d535944dbb63", | ||||||
|  |      "grade": true, | ||||||
|  |      "grade_id": "cell-9bd08e4f5842cb89", | ||||||
|  |      "locked": false, | ||||||
|  |      "points": 0, | ||||||
|  |      "schema_version": 1, | ||||||
|  |      "solution": true | ||||||
|  |     } | ||||||
|  |    }, | ||||||
|  |    "outputs": [], | ||||||
|  |    "source": [ | ||||||
|  |     "# YOUR ANSWER HERE" | ||||||
|  |    ] | ||||||
|  |   }, | ||||||
|  |   { | ||||||
|  |    "cell_type": "markdown", | ||||||
|  |    "metadata": {}, | ||||||
|  |    "source": [ | ||||||
|  |     "Are the annotations used correctly in every graph?" | ||||||
|  |    ] | ||||||
|  |   }, | ||||||
|  |   { | ||||||
|  |    "cell_type": "code", | ||||||
|  |    "execution_count": null, | ||||||
|  |    "metadata": { | ||||||
|  |     "deletable": false, | ||||||
|  |     "nbgrader": { | ||||||
|  |      "checksum": "1946a7ed4aba8d168bb3fad898c05651", | ||||||
|  |      "grade": true, | ||||||
|  |      "grade_id": "cell-9dc1c9033198bb18", | ||||||
|  |      "locked": false, | ||||||
|  |      "points": 0, | ||||||
|  |      "schema_version": 1, | ||||||
|  |      "solution": true | ||||||
|  |     } | ||||||
|  |    }, | ||||||
|  |    "outputs": [], | ||||||
|  |    "source": [ | ||||||
|  |     "# YOUR ANSWER HERE" | ||||||
|  |    ] | ||||||
|  |   }, | ||||||
|  |   { | ||||||
|  |    "cell_type": "markdown", | ||||||
|  |    "metadata": {}, | ||||||
|  |    "source": [ | ||||||
|  |     "Has any of the datasets been harder to query? If so, why?" | ||||||
|  |    ] | ||||||
|  |   }, | ||||||
|  |   { | ||||||
|  |    "cell_type": "code", | ||||||
|  |    "execution_count": null, | ||||||
|  |    "metadata": { | ||||||
|  |     "deletable": false, | ||||||
|  |     "nbgrader": { | ||||||
|  |      "checksum": "6714abc5226618b76dc4c1aaed6d1a49", | ||||||
|  |      "grade": true, | ||||||
|  |      "grade_id": "cell-6c18003ced54be23", | ||||||
|  |      "locked": false, | ||||||
|  |      "points": 0, | ||||||
|  |      "schema_version": 1, | ||||||
|  |      "solution": true | ||||||
|  |     } | ||||||
|  |    }, | ||||||
|  |    "outputs": [], | ||||||
|  |    "source": [ | ||||||
|  |     "# YOUR ANSWER HERE" | ||||||
|  |    ] | ||||||
|  |   }, | ||||||
|  |   { | ||||||
|  |    "cell_type": "markdown", | ||||||
|  |    "metadata": {}, | ||||||
|  |    "source": [ | ||||||
|  |     "## References" | ||||||
|  |    ] | ||||||
|  |   }, | ||||||
|  |   { | ||||||
|  |    "cell_type": "markdown", | ||||||
|  |    "metadata": {}, | ||||||
|  |    "source": [ | ||||||
|  |     "* [RDFLib documentation](https://rdflib.readthedocs.io/en/stable/).\n", | ||||||
|  |     "* [Wikidata Query Service query examples](https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries/examples)" | ||||||
|  |    ] | ||||||
|  |   }, | ||||||
|  |   { | ||||||
|  |    "cell_type": "markdown", | ||||||
|  |    "metadata": {}, | ||||||
|  |    "source": [ | ||||||
|  |     "## Licence\n", | ||||||
|  |     "The notebook is freely licensed under under the [Creative Commons Attribution Share-Alike license](https://creativecommons.org/licenses/by/2.0/).  \n", | ||||||
|  |     "\n", | ||||||
|  |     "© 2018 Universidad Politécnica de Madrid." | ||||||
|  |    ] | ||||||
|  |   } | ||||||
|  |  ], | ||||||
|  |  "metadata": { | ||||||
|  |   "kernelspec": { | ||||||
|  |    "display_name": "Python 3", | ||||||
|  |    "language": "python", | ||||||
|  |    "name": "python3" | ||||||
|  |   }, | ||||||
|  |   "language_info": { | ||||||
|  |    "codemirror_mode": { | ||||||
|  |     "name": "ipython", | ||||||
|  |     "version": 3 | ||||||
|  |    }, | ||||||
|  |    "file_extension": ".py", | ||||||
|  |    "mimetype": "text/x-python", | ||||||
|  |    "name": "python", | ||||||
|  |    "nbconvert_exporter": "python", | ||||||
|  |    "pygments_lexer": "ipython3", | ||||||
|  |    "version": "3.7.2" | ||||||
|  |   } | ||||||
|  |  }, | ||||||
|  |  "nbformat": 4, | ||||||
|  |  "nbformat_minor": 2 | ||||||
|  | } | ||||||
| @@ -1,11 +0,0 @@ | |||||||
| # Files included # |  | ||||||
|  |  | ||||||
| * `validate.py` validates and serializes a turtle dataset |  | ||||||
| * `sparql.py` runs a custom sparql query on a given dataset (by default, `reviews.ttl`) |  | ||||||
| * `extract_data.py` extracts RDFa, micro-data and JSON-LD data from a given URL |  | ||||||
|  |  | ||||||
| # Installation # |  | ||||||
|  |  | ||||||
| ``` |  | ||||||
| pip install --user -r requirements.txt |  | ||||||
| ``` |  | ||||||
							
								
								
									
										1880
									
								
								lod/SPARQL.ipynb
									
									
									
									
									
								
							
							
						
						
									
										1880
									
								
								lod/SPARQL.ipynb
									
									
									
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @@ -1,49 +0,0 @@ | |||||||
|  |  | ||||||
| import sys |  | ||||||
| from future.standard_library import install_aliases |  | ||||||
| install_aliases() |  | ||||||
|  |  | ||||||
| from urllib import request, parse |  | ||||||
| from rdflib import Graph, term |  | ||||||
| from lxml import etree |  | ||||||
|  |  | ||||||
| if len(sys.argv) < 2: |  | ||||||
|     print('Usage: python {} <URL>'.format(sys.argv[0])) |  | ||||||
|     print('') |  | ||||||
|     print('Extract rdfa, microdata and json-ld annotations from a website') |  | ||||||
|     exit(1) |  | ||||||
|  |  | ||||||
| url = sys.argv[1] |  | ||||||
|  |  | ||||||
| g = Graph() |  | ||||||
| g.parse(url, format='rdfa') |  | ||||||
| g.parse(url, format='microdata') |  | ||||||
|  |  | ||||||
|  |  | ||||||
| def sanitize_triple(t): |  | ||||||
|     """Function to remove bad URIs from the graph that would otherwise |  | ||||||
|     make the serialization fail.""" |  | ||||||
|     def sanitize_triple_item(item): |  | ||||||
|         if isinstance(item, term.URIRef) and '/' not in item: |  | ||||||
|             return term.URIRef(parse.quote(str(item))) |  | ||||||
|         return item |  | ||||||
|  |  | ||||||
|     return (sanitize_triple_item(t[0]), |  | ||||||
|             sanitize_triple_item(t[1]), |  | ||||||
|             sanitize_triple_item(t[2])) |  | ||||||
|  |  | ||||||
|  |  | ||||||
| with request.urlopen(url) as response: |  | ||||||
|     # Get all json-ld objects embedded in the html file |  | ||||||
|     html = response.read().decode('utf-8', errors='ignore') |  | ||||||
|     parser = etree.XMLParser(recover=True) |  | ||||||
|     root = etree.fromstring(html, parser=parser) |  | ||||||
|     if root: |  | ||||||
|         for jsonld in root.findall(".//script[@type='application/ld+json']"): |  | ||||||
|             g.parse(data=jsonld.text, publicID=url, format='json-ld') |  | ||||||
|  |  | ||||||
|  |  | ||||||
| fixedgraph = Graph() |  | ||||||
| fixedgraph += [sanitize_triple(s) for s in g] |  | ||||||
|  |  | ||||||
| print(g.serialize(format='turtle').decode('utf-8', errors='ignore')) |  | ||||||
| @@ -1,12 +1,22 @@ | |||||||
|  | ''' | ||||||
|  | Helper functions and ipython magic for the SPARQL exercises. | ||||||
|  |  | ||||||
|  | The tests in the notebooks rely on the `LAST_QUERY` variable, which is updated by the `%%sparql` magic after every query. | ||||||
|  | This variable contains the full query used (`LAST_QUERY["query"]`), the endpoint it was sent to (`LAST_QUERY["endpoint"]`), and a dictionary with the response of the endpoint (`LAST_QUERY["results"]`). | ||||||
|  | For convenience, the results are also given as tuples (`LAST_QUERY["tuples"]`), and as a dictionary of of `{column:[values]}` (`LAST_QUERY["columns"]`). | ||||||
|  | ''' | ||||||
| from IPython.core.magic import (register_line_magic, register_cell_magic, | from IPython.core.magic import (register_line_magic, register_cell_magic, | ||||||
|                                 register_line_cell_magic) |                                 register_line_cell_magic) | ||||||
|  | from IPython.display import HTML, display, Image, display_javascript | ||||||
| from IPython.display import HTML, display, Image |  | ||||||
| from urllib.request import Request, urlopen | from urllib.request import Request, urlopen | ||||||
| from urllib.parse import quote_plus, urlencode | from urllib.parse import quote_plus, urlencode | ||||||
| from urllib.error import HTTPError | from urllib.error import HTTPError | ||||||
|  |  | ||||||
| import json | import json | ||||||
|  | import sys | ||||||
|  |  | ||||||
|  | js = "IPython.CodeCell.options_default.highlight_modes['magic_sparql'] = {'reg':[/^%%sparql/]};" | ||||||
|  | display_javascript(js, raw=True) | ||||||
|  |  | ||||||
|  |  | ||||||
| def send_query(query, endpoint): | def send_query(query, endpoint): | ||||||
| @@ -20,7 +30,11 @@ def send_query(query, endpoint): | |||||||
|                 headers={'content-type': 'application/x-www-form-urlencoded', |                 headers={'content-type': 'application/x-www-form-urlencoded', | ||||||
|                          'accept': FORMATS}, |                          'accept': FORMATS}, | ||||||
|                 method='POST') |                 method='POST') | ||||||
|     return json.loads(urlopen(r).read().decode('utf-8')); |     res = urlopen(r) | ||||||
|  |     data = res.read().decode('utf-8') | ||||||
|  |     if res.getcode() == 200: | ||||||
|  |         return json.loads(data) | ||||||
|  |     raise Exception('Error getting results: {}'.format(data)) | ||||||
|  |  | ||||||
|  |  | ||||||
| def tabulate(tuples, header=None): | def tabulate(tuples, header=None): | ||||||
| @@ -39,11 +53,14 @@ def tabulate(tuples, header=None): | |||||||
|  |  | ||||||
| LAST_QUERY = {} | LAST_QUERY = {} | ||||||
|  |  | ||||||
|  | def solution(): | ||||||
|  |     return LAST_QUERY | ||||||
|  |  | ||||||
|  |  | ||||||
| def query(query, endpoint=None, print_table=False): | def query(query, endpoint=None, print_table=False): | ||||||
|     global LAST_QUERY |     global LAST_QUERY | ||||||
|  |  | ||||||
|     endpoint = endpoint or "http://dbpedia.org/sparql" |     endpoint = endpoint or "http://fuseki.cluster.gsi.dit.upm.es/sitc/" | ||||||
|     results = send_query(query, endpoint) |     results = send_query(query, endpoint) | ||||||
|     tuples = to_table(results) |     tuples = to_table(results) | ||||||
|  |  | ||||||
| @@ -80,12 +97,30 @@ def to_table(results): | |||||||
|  |  | ||||||
| @register_cell_magic | @register_cell_magic | ||||||
| def sparql(line, cell): | def sparql(line, cell): | ||||||
|  |     ''' | ||||||
|  |     Sparql magic command for ipython. It can be used in a cell like this: | ||||||
|  |      | ||||||
|  |     ``` | ||||||
|  |     %%sparql | ||||||
|  |      | ||||||
|  |     ... Your SPARQL query ... | ||||||
|  |      | ||||||
|  |     ``` | ||||||
|  |      | ||||||
|  |     by default, it will use the DBpedia endpoint, but you can use a different endpoint like this: | ||||||
|  |      | ||||||
|  |     ``` | ||||||
|  |     %%sparql http://my-sparql-endpoint... | ||||||
|  |      | ||||||
|  |     ... Your SPARQL query ... | ||||||
|  |     ``` | ||||||
|  |     ''' | ||||||
|     try: |     try: | ||||||
|         return query(cell, endpoint=line, print_table=True) |         return query(cell, endpoint=line, print_table=True) | ||||||
|     except HTTPError as ex: |     except HTTPError as ex: | ||||||
|         error_message = ex.read().decode('utf-8') |         error_message = ex.read().decode('utf-8') | ||||||
|         print('Error {}. Reason: {}'.format(ex.status, ex.reason)) |         print('Error {}. Reason: {}'.format(ex.status, ex.reason)) | ||||||
|         print(error_message) |         print(error_message, file=sys.stderr) | ||||||
|  |  | ||||||
|  |  | ||||||
| def show_photos(values): | def show_photos(values): | ||||||
|   | |||||||
| @@ -1,29 +0,0 @@ | |||||||
| @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> . |  | ||||||
| @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> . |  | ||||||
| @prefix schema: <http://schema.org/> . |  | ||||||
|  |  | ||||||
|  |  | ||||||
| _:Hotel1 a schema:Hotel ; |  | ||||||
|          schema:description "A fictitious hotel" . |  | ||||||
|  |  | ||||||
|  |  | ||||||
| _:Review1 a schema:Review ; |  | ||||||
|           schema:reviewBody "This is a great review" ; |  | ||||||
|           schema:reviewRating [ |  | ||||||
|            a schema:Rating ; |  | ||||||
|            schema:author <http://jfernando.es/me> ; |  | ||||||
|            schema:ratingValue "0.7" |  | ||||||
|             |  | ||||||
|           ] ; |  | ||||||
|           schema:itemReviewed _:Hotel1 . |  | ||||||
|  |  | ||||||
|  |  | ||||||
| _:Review2 a schema:Review ; |  | ||||||
|           schema:reviewBody "This is a not so great review" ; |  | ||||||
|           schema:reviewRating [ |  | ||||||
|            a schema:Rating ; |  | ||||||
|            schema:author [ a schema:Person ; |  | ||||||
|            schema:givenName "anonymous" ] ; |  | ||||||
|            schema:ratingValue "0.3" |  | ||||||
|           ] ; |  | ||||||
|           schema:itemReviewed _:Hotel1 . |  | ||||||
| @@ -1,23 +0,0 @@ | |||||||
| # !/bin/env python # |  | ||||||
| # Ejemplo de consultas SPARQL sobre turtle # |  | ||||||
| # python consultas.py # |  | ||||||
| import rdflib |  | ||||||
| import sys |  | ||||||
|  |  | ||||||
| dataset = sys.argv[1] if len(sys.argv) > 1 else 'reviews.ttl' |  | ||||||
| g = rdflib.Graph() |  | ||||||
|  |  | ||||||
| schema = rdflib.Namespace("http://schema.org/") |  | ||||||
|  |  | ||||||
| # Read Turtle file # |  | ||||||
| g.parse(dataset, format='turtle') |  | ||||||
|  |  | ||||||
| results = g.query( |  | ||||||
|     """SELECT DISTINCT ?review ?p ?o |  | ||||||
|        WHERE { |  | ||||||
|           ?review a schema:Review. |  | ||||||
|           ?review ?p ?o. |  | ||||||
|        }""", initNs={'schema': schema}) |  | ||||||
|  |  | ||||||
| for row in results: |  | ||||||
|     print("%s %s %s" % row) |  | ||||||
| @@ -1,6 +0,0 @@ | |||||||
| import rdflib |  | ||||||
| import sys |  | ||||||
| g = rdflib.Graph() |  | ||||||
| dataset = sys.argv[1] if len(sys.argv) > 1 else 'reviews.ttl' |  | ||||||
| g.parse(dataset, format="n3") |  | ||||||
| print(g.serialize(format="n3").decode('utf-8')) |  | ||||||
		Reference in New Issue
	
	Block a user