1
0
mirror of https://github.com/gsi-upm/sitc synced 2024-11-24 23:42:29 +00:00

Modify RDF exercises

This commit is contained in:
J. Fernando Sánchez 2019-02-14 11:38:48 +01:00
parent 33d79e27b1
commit 332802176f
3 changed files with 231 additions and 238 deletions

View File

@ -31,7 +31,7 @@
"deletable": false, "deletable": false,
"editable": false, "editable": false,
"nbgrader": { "nbgrader": {
"checksum": "59c5cb46c9d722f691206e766e5af557", "checksum": "845cf125f1c5eb7aa3653ef461bffc67",
"grade": false, "grade": false,
"grade_id": "cell-51338a0933103db9", "grade_id": "cell-51338a0933103db9",
"locked": true, "locked": true,
@ -62,7 +62,7 @@
"# Tools\n", "# Tools\n",
"\n", "\n",
"This notebook is self-contained, but it requires some python libraries.\n", "This notebook is self-contained, but it requires some python libraries.\n",
"To install them, simply run the following line" "To install them, simply run the following line:"
] ]
}, },
{ {
@ -265,7 +265,7 @@
"deletable": false, "deletable": false,
"editable": false, "editable": false,
"nbgrader": { "nbgrader": {
"checksum": "e0b6464bce9263fb35543acf4acb31da", "checksum": "32f1f607adb584aaea9fb90ae4d805b5",
"grade": false, "grade": false,
"grade_id": "cell-bb418e9bae1fef1a", "grade_id": "cell-bb418e9bae1fef1a",
"locked": true, "locked": true,
@ -274,18 +274,17 @@
} }
}, },
"source": [ "source": [
"First of all, run the line below.\n", "First of all, run the line below to import everything you need for the exercises."
"It will import everything you need for the exercises."
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 2, "execution_count": null,
"metadata": { "metadata": {
"deletable": false, "deletable": false,
"editable": false, "editable": false,
"nbgrader": { "nbgrader": {
"checksum": "bf98cea45f42e3d0f1ab158693b40da7", "checksum": "892f8491591c25defdea5fdcdd289489",
"grade": false, "grade": false,
"grade_id": "cell-4a1b60bd9974bbb1", "grade_id": "cell-4a1b60bd9974bbb1",
"locked": true, "locked": true,
@ -293,20 +292,9 @@
"solution": false "solution": false
} }
}, },
"outputs": [ "outputs": [],
{
"data": {
"application/javascript": [
"IPython.CodeCell.options_default.highlight_modes['magic_turtle'] = {'reg':[/^%%ttl/]};"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [ "source": [
"from helpers import *\n", "from helpers import *"
"from rdflib import term, RDF, Namespace"
] ]
}, },
{ {
@ -314,9 +302,9 @@
"metadata": { "metadata": {
"deletable": false, "deletable": false,
"nbgrader": { "nbgrader": {
"checksum": "76c052d3f1117f07468068a90f969760", "checksum": "a855d3d63be5ea7f73fd85d645b21bfe",
"grade": true, "grade": true,
"grade_id": "cell-57f67d1e662b7f09", "grade_id": "cell-9ac392294d5708a1",
"locked": false, "locked": false,
"points": 0, "points": 0,
"schema_version": 1, "schema_version": 1,
@ -330,7 +318,7 @@
"# YOUR ANSWER HERE\n", "# YOUR ANSWER HERE\n",
"```\n", "```\n",
"\n", "\n",
"Execute the following line without any modification. It simple fills 'example' with the code written below. It should produce the output 'File loaded!\"." "Depending on the exercise, you might need to fill that part with a Turtle definition (first exercise), some python code (second exercise), or plain text."
] ]
}, },
{ {
@ -339,21 +327,65 @@
"deletable": false, "deletable": false,
"editable": false, "editable": false,
"nbgrader": { "nbgrader": {
"checksum": "f6eb0c6c19c1756d7705f52866b00f82", "checksum": "9a73f79f8f282874fb60011e6019e387",
"grade": false, "grade": false,
"grade_id": "cell-a4ed500079ba36ca", "grade_id": "cell-57f67d1e662b7f09",
"locked": true, "locked": true,
"schema_version": 1, "schema_version": 1,
"solution": false "solution": false
} }
}, },
"source": [ "source": [
"Now run the tests:" "Turtle is usually written in standalone files (e.g. `mydefinition.ttl`).\n",
"To write Turtle definitions inside notebook cells we will use a special magic command: `%%ttl`.\n",
"The command will check the Turtle syntax of your definition, and provide syntax highlighting.\n",
"\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": false,
"editable": false,
"nbgrader": {
"checksum": "18ad887c2f326ee59139b96860ce8893",
"grade": false,
"grade_id": "cell-16214ea73a9b689e",
"locked": true,
"schema_version": 1,
"solution": false
}
},
"source": [
"## Example"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": false,
"editable": false,
"nbgrader": {
"checksum": "ed2466715f57356f22ddeabfb101eb11",
"grade": false,
"grade_id": "cell-da88c2f8170436fe",
"locked": true,
"schema_version": 1,
"solution": false
}
},
"source": [
"\n",
"To make sure everything works, let's try first with an example exercise.\n",
"\n",
"Execute the code below, without modification.\n",
"\n",
"The definitio **is empty but valid**, so the output will be `The turtle syntax is correct.`."
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 3, "execution_count": null,
"metadata": { "metadata": {
"deletable": false, "deletable": false,
"nbgrader": { "nbgrader": {
@ -365,21 +397,7 @@
"solution": true "solution": true
} }
}, },
"outputs": [ "outputs": [],
{
"data": {
"text/markdown": [
"File loaded!"
],
"text/plain": [
"<IPython.core.display.Markdown object>"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"%%ttl example\n", "%%ttl example\n",
"\n", "\n",
@ -388,92 +406,61 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {},
"source": [
"Now run the tests executing the following cell. You will see they fail (as expected)."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": { "metadata": {
"deletable": false, "deletable": false,
"editable": false, "editable": false,
"nbgrader": { "nbgrader": {
"checksum": "7a21480d0282aed4f943d0c9d6ecd6e6", "checksum": "60a9934c544eee9fc2c3745c36beb049",
"grade": true,
"grade_id": "cell-23e61b9f48d597fc",
"locked": true,
"points": 1,
"schema_version": 1,
"solution": false
}
},
"outputs": [],
"source": [
"# This code tests that the definition above is correct.\n",
"g = solution('example')\n",
"test('Some triples have been loaded',\n",
" len(g))\n",
"test('A person has been defined',\n",
" g.subjects(RDF.type, term.URIRef('http://xmlns.com/foaf/0.1/Person')))\n",
"print('All tests passed. Well done!')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now replace #YOUR ANSWER HERE with your answer (the triples) and execute the test."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%%ttl example\n",
"# YOUR ANSWER HERE"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# This code tests that the definition above is correct.\n",
"g = solution('example')\n",
"test('Some triples have been loaded',\n",
" len(g))\n",
"test('A person has been defined',\n",
" g.subjects(RDF.type, term.URIRef('http://xmlns.com/foaf/0.1/Person')))\n",
"print('All tests passed. Well done!')"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": false,
"editable": false,
"nbgrader": {
"checksum": "61933311e76dd200b63032f42f26b11d",
"grade": false, "grade": false,
"grade_id": "cell-da88c2f8170436fe", "grade_id": "cell-1c2ca86de107dec3",
"locked": true, "locked": true,
"schema_version": 1, "schema_version": 1,
"solution": false "solution": false
} }
}, },
"source": [ "source": [
"Turtle is usually written in standalone files (e.g. `mydefinition.ttl`).\n", "However, the definition is empty, so the tests for that definition **should fail**.\n",
"To write Turtle definitions inside our notebook, and to make it easy to test them, we will use a special magic command: `%%ttl`.\n",
"\n", "\n",
"To make sure everything works, let's try first with an example.\n", "Try it yourself by running the following line:"
"\n", ]
"\n", },
"Copy/paste this solution below:\n", {
"cell_type": "code",
"execution_count": null,
"metadata": {
"deletable": false,
"editable": false,
"nbgrader": {
"checksum": "12b5c7170326932ff3c7e1688a5769b2",
"grade": false,
"grade_id": "cell-0154f8481bf393e8",
"locked": true,
"schema_version": 1,
"solution": false
}
},
"outputs": [],
"source": [
"# This will check your definition for the example.\n",
"check('example')"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": false,
"editable": false,
"nbgrader": {
"checksum": "b534d998c6d2e9f6bef8c2d88687a96b",
"grade": false,
"grade_id": "cell-adc7e6b7e96e8788",
"locked": true,
"schema_version": 1,
"solution": false
}
},
"source": [
"Now copy/paste the code below into the definition (below the `# YOUR ANSWER HERE` part), execute it, and run the test code again.\n",
"\n", "\n",
"```turtle\n", "```turtle\n",
"@prefix foaf: <http://xmlns.com/foaf/0.1/> .\n", "@prefix foaf: <http://xmlns.com/foaf/0.1/> .\n",
@ -487,7 +474,9 @@
" geo:lat \"34.0736111\" ;\n", " geo:lat \"34.0736111\" ;\n",
" geo:lon \"-118.3994444\"\n", " geo:lon \"-118.3994444\"\n",
" ] .\n", " ] .\n",
"```" "```\n",
"\n",
"If you copied the file right, the tests should pass."
] ]
}, },
{ {
@ -496,7 +485,7 @@
"deletable": false, "deletable": false,
"editable": false, "editable": false,
"nbgrader": { "nbgrader": {
"checksum": "a64acf02625b48b3c65b6e1bc1ba6c1a", "checksum": "67540252804835faea83d96aab87aa29",
"grade": false, "grade": false,
"grade_id": "cell-e73f1933742f7ab3", "grade_id": "cell-e73f1933742f7ab3",
"locked": true, "locked": true,
@ -505,14 +494,14 @@
} }
}, },
"source": [ "source": [
"## Exercise 1: Definition of a Hotel" "## Exercise 1: Definition of hotels and reviews"
] ]
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
"source": [ "source": [
"We will define some basic information about a hotel, and some reviews.\n", "We will define some basic information about hotels, and some reviews.\n",
"This should be the same type of information that some aggregators (e.g. TripAdvisor) offer in their websites.\n", "This should be the same type of information that some aggregators (e.g. TripAdvisor) offer in their websites.\n",
"\n", "\n",
"Namely, you need to define at least two hotels (you may add more than one), with the following information:\n", "Namely, you need to define at least two hotels (you may add more than one), with the following information:\n",
@ -597,68 +586,11 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"deletable": false,
"editable": false,
"nbgrader": {
"checksum": "4f54963163a64f46058c86be139e5543",
"grade": true,
"grade_id": "definition-tests",
"locked": true,
"points": 10,
"schema_version": 1,
"solution": false
}
},
"outputs": [], "outputs": [],
"source": [ "source": [
"g = solution('hotel')\n", "# This will check that your definition for the first exercise is correct.\n",
"test('Some triples are loaded',\n", "check('hotel')"
" len(g))\n",
"\n",
"hotels = set(g.subjects(RDF.type, schema['Hotel']))\n",
"test('At least 2 hotels are loaded',\n",
" hotels,\n",
" 2,\n",
" atLeast)\n",
"\n",
"for hotel in hotels:\n",
" if 'GSIHOTEL' in hotel: # Do not check the example hotel\n",
" continue\n",
" props = g.predicates(hotel)\n",
" test('Each hotel has all required properties',\n",
" props,\n",
" list(schema[i] for i in ['description', 'email', 'logo', 'priceRange']),\n",
" func=containsAll)\n",
"\n",
"reviews = set(g.subjects(RDF.type, schema['Review']))\n",
"test('At least 3 reviews are loaded',\n",
" reviews,\n",
" 3,\n",
" atLeast)\n",
"\n",
"for review in reviews:\n",
" props = g.predicates(review)\n",
" test('Each review has all required properties',\n",
" props,\n",
" list(schema[i] for i in ['itemReviewed', 'reviewBody', 'reviewRating']),\n",
" func=containsAll)\n",
" ratings = list(g.objects(review, schema['reviewRating']))\n",
" for rating in ratings:\n",
" value = g.value(rating, schema['ratingValue'])\n",
" test('The review should have ratings', value)\n",
"\n",
"authors = set(g.objects(None, schema['author']))\n",
"for author in authors:\n",
" for prop in g.predicates(author, None):\n",
" if 'name' in str(prop).lower():\n",
" break\n",
"else:\n",
" assert \"At least a reviewer has a name (surname, givenName...)\"\n",
"\n",
"print('All tests passed. Congratulations!')\n",
"print()\n",
"print('Now you can try to add the optional properties')"
] ]
}, },
{ {
@ -967,13 +899,20 @@
"print_data('https://mastodon.social/@Gargron')" "print_data('https://mastodon.social/@Gargron')"
] ]
}, },
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now try some new sites:"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {
"deletable": false, "deletable": false,
"nbgrader": { "nbgrader": {
"checksum": "fb8bd92e931c7836bb3c22dadd3be583", "checksum": "bf8d215c42321236b783601e7d072a05",
"grade": true, "grade": true,
"grade_id": "cell-ff2413f45311f086", "grade_id": "cell-ff2413f45311f086",
"locked": false, "locked": false,
@ -984,7 +923,6 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"# Try some new sites here.\n",
"# YOUR ANSWER HERE" "# YOUR ANSWER HERE"
] ]
}, },
@ -1040,24 +978,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.5.5" "version": "3.7.2"
},
"latex_envs": {
"LaTeX_envs_menu_present": true,
"autocomplete": true,
"bibliofile": "biblio.bib",
"cite_by": "apalike",
"current_citInitial": 1,
"eqLabelWithNumbers": true,
"eqNumInitial": 1,
"hotkeys": {
"equation": "Ctrl-E",
"itemize": "Ctrl-I"
},
"labels_anchors": false,
"latex_user_defs": false,
"report_style_numbering": false,
"user_envs_cfg": false
} }
}, },
"nbformat": 4, "nbformat": 4,

View File

@ -1,6 +1,4 @@
import sys import sys
import operator
import types
from future.standard_library import install_aliases from future.standard_library import install_aliases
install_aliases() install_aliases()
@ -18,13 +16,11 @@ from IPython.core.magic import (register_line_magic, register_cell_magic,
from IPython.display import HTML, display, Image, Markdown from IPython.display import HTML, display, Image, Markdown
schema = Namespace('http://schema.org/')
DEFINITIONS = {} DEFINITIONS = {}
def solution(exercise='default'): def solution(exercise='default'):
if exercise not in DEFINITIONS: if exercise not in DEFINITIONS:
raise Exception('Solution for {} not found'.format(exercise)) raise Exception('Solution for {} not found. Have you defined it?'.format(exercise))
return DEFINITIONS[exercise] return DEFINITIONS[exercise]
@ -51,16 +47,17 @@ If you don\'t know what this error means, try an online validator: http://ttl.su
''' '''
global DEFINITIONS global DEFINITIONS
key = line or 'default' key = line or 'default'
DEFINITIONS[key] = None
try: try:
DEFINITIONS[key] = g.parse(data=cell, DEFINITIONS[key] = g.parse(data=cell,
format="n3") format="n3")
except SyntaxError as ex: except SyntaxError as ex:
return Markdown(msg.format(line=ex.lines, reason=ex._why)) print(msg.format(line=ex.lines, reason=ex._why), file=sys.stderr)
raise Exception('Bad Turtle syntax') from None
except Exception as ex: except Exception as ex:
return Markdown(msg.format(line='?', reason=ex)) print(msg.format(line='?', reason=ex), file=sys.stderr)
return Markdown('File loaded!') raise Exception('Bad Turtle syntax') from None
return Markdown('The Turtle syntax is correct.')
return HTML('Loaded!') #HTML('<code>{}</code>'.format(cell))
def extract_data(url): def extract_data(url):
@ -101,7 +98,6 @@ def extract_data(url):
fixedgraph = Graph() fixedgraph = Graph()
fixedgraph += [sanitize_triple(s) for s in g] fixedgraph += [sanitize_triple(s) for s in g]
# print(g.serialize(format='turtle').decode('utf-8', errors='ignore'))
return fixedgraph return fixedgraph
def turtle(g): def turtle(g):
@ -119,30 +115,13 @@ def print_data(url):
def test(description, got, expected=None, func=None): def check(testname):
if isinstance(got, types.GeneratorType): import tests
got = set(got)
try: test = getattr(tests, 'test_{}'.format(testname), None)
if expected is None: if test is None:
func = func or operator.truth raise Exception('There are no tests for {}'.format(testname))
expected = True definition = solution(testname)
assert func(got) if definition is None:
else: raise Exception('The definition for {} is empty or invalid.'.format(testname))
func = func or operator.eq return test(definition)
assert func(got, expected)
except AssertionError:
print('Test failed: {}'.format(description), file=sys.stderr)
print('\tExpected: {}'.format(expected), file=sys.stderr)
print('\tGot: {}'.format(got), file=sys.stderr)
raise Exception('Test failed: {}'.format(description))
def atLeast(lst, number):
return len(set(lst))>=number
def containsAll(lst, other):
for i in other:
if i not in lst:
print('{} not found'.format(i), file=sys.stderr)
return False
return True

93
rdf/tests.py Normal file
View File

@ -0,0 +1,93 @@
import operator
import types
import sys
from rdflib import term, RDF, Namespace
schema = Namespace('http://schema.org/')
def test(description, got, expected=None, func=None):
if isinstance(got, types.GeneratorType):
got = set(got)
try:
if expected is None:
func = func or operator.truth
expected = True
assert func(got)
else:
func = func or operator.eq
assert func(got, expected)
except AssertionError:
print('Test failed: {}'.format(description), file=sys.stderr)
print('\tExpected: {}'.format(expected), file=sys.stderr)
print('\tGot: {}'.format(got), file=sys.stderr)
raise Exception('Test failed: {}'.format(description))
def atLeast(lst, number):
return len(set(lst))>=number
def containsAll(lst, other):
for i in other:
if i not in lst:
print('{} not found'.format(i), file=sys.stderr)
return False
return True
def test_hotel(g):
test('Some triples are loaded',
len(g))
hotels = set(g.subjects(RDF.type, schema['Hotel']))
test('At least 2 hotels are loaded',
hotels,
2,
atLeast)
for hotel in hotels:
if 'GSIHOTEL' in hotel: # Do not check the example hotel
continue
props = g.predicates(hotel)
test('Each hotel has all required properties',
props,
list(schema[i] for i in ['description', 'email', 'logo', 'priceRange']),
func=containsAll)
reviews = set(g.subjects(RDF.type, schema['Review']))
test('At least 3 reviews are loaded',
reviews,
3,
atLeast)
for review in reviews:
props = g.predicates(review)
test('Each review has all required properties',
props,
list(schema[i] for i in ['itemReviewed', 'reviewBody', 'reviewRating']),
func=containsAll)
ratings = list(g.objects(review, schema['reviewRating']))
for rating in ratings:
value = g.value(rating, schema['ratingValue'])
test('The review should have ratings', value)
authors = set(g.objects(None, schema['author']))
for author in authors:
for prop in g.predicates(author, None):
if 'name' in str(prop).lower():
break
else:
assert "At least a reviewer has a name (surname, givenName...)"
print('All tests passed. Congratulations!')
print()
print('Now you can try to add the optional properties')
def test_example(g):
test('Some triples have been loaded',
len(g))
test('A person has been defined',
g.subjects(RDF.type, term.URIRef('http://xmlns.com/foaf/0.1/Person')))
print('All tests passed. Well done!')