From 332802176f08b1600fd2104dec74804f3237b26e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=2E=20Fernando=20S=C3=A1nchez?= Date: Thu, 14 Feb 2019 11:38:48 +0100 Subject: [PATCH] Modify RDF exercises --- rdf/RDF.ipynb | 279 ++++++++++++++++++------------------------------- rdf/helpers.py | 55 +++------- rdf/tests.py | 93 +++++++++++++++++ 3 files changed, 210 insertions(+), 217 deletions(-) create mode 100644 rdf/tests.py diff --git a/rdf/RDF.ipynb b/rdf/RDF.ipynb index 707f537..2af2a86 100644 --- a/rdf/RDF.ipynb +++ b/rdf/RDF.ipynb @@ -31,7 +31,7 @@ "deletable": false, "editable": false, "nbgrader": { - "checksum": "59c5cb46c9d722f691206e766e5af557", + "checksum": "845cf125f1c5eb7aa3653ef461bffc67", "grade": false, "grade_id": "cell-51338a0933103db9", "locked": true, @@ -62,7 +62,7 @@ "# Tools\n", "\n", "This notebook is self-contained, but it requires some python libraries.\n", - "To install them, simply run the following line" + "To install them, simply run the following line:" ] }, { @@ -265,7 +265,7 @@ "deletable": false, "editable": false, "nbgrader": { - "checksum": "e0b6464bce9263fb35543acf4acb31da", + "checksum": "32f1f607adb584aaea9fb90ae4d805b5", "grade": false, "grade_id": "cell-bb418e9bae1fef1a", "locked": true, @@ -274,18 +274,17 @@ } }, "source": [ - "First of all, run the line below.\n", - "It will import everything you need for the exercises." + "First of all, run the line below to import everything you need for the exercises." ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": { "deletable": false, "editable": false, "nbgrader": { - "checksum": "bf98cea45f42e3d0f1ab158693b40da7", + "checksum": "892f8491591c25defdea5fdcdd289489", "grade": false, "grade_id": "cell-4a1b60bd9974bbb1", "locked": true, @@ -293,20 +292,9 @@ "solution": false } }, - "outputs": [ - { - "data": { - "application/javascript": [ - "IPython.CodeCell.options_default.highlight_modes['magic_turtle'] = {'reg':[/^%%ttl/]};" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ - "from helpers import *\n", - "from rdflib import term, RDF, Namespace" + "from helpers import *" ] }, { @@ -314,9 +302,9 @@ "metadata": { "deletable": false, "nbgrader": { - "checksum": "76c052d3f1117f07468068a90f969760", + "checksum": "a855d3d63be5ea7f73fd85d645b21bfe", "grade": true, - "grade_id": "cell-57f67d1e662b7f09", + "grade_id": "cell-9ac392294d5708a1", "locked": false, "points": 0, "schema_version": 1, @@ -330,7 +318,7 @@ "# YOUR ANSWER HERE\n", "```\n", "\n", - "Execute the following line without any modification. It simple fills 'example' with the code written below. It should produce the output 'File loaded!\"." + "Depending on the exercise, you might need to fill that part with a Turtle definition (first exercise), some python code (second exercise), or plain text." ] }, { @@ -339,117 +327,122 @@ "deletable": false, "editable": false, "nbgrader": { - "checksum": "f6eb0c6c19c1756d7705f52866b00f82", + "checksum": "9a73f79f8f282874fb60011e6019e387", "grade": false, - "grade_id": "cell-a4ed500079ba36ca", + "grade_id": "cell-57f67d1e662b7f09", "locked": true, "schema_version": 1, "solution": false } }, "source": [ - "Now run the tests:" + "Turtle is usually written in standalone files (e.g. `mydefinition.ttl`).\n", + "To write Turtle definitions inside notebook cells we will use a special magic command: `%%ttl`.\n", + "The command will check the Turtle syntax of your definition, and provide syntax highlighting.\n", + "\n" ] }, { - "cell_type": "code", - "execution_count": 3, + "cell_type": "markdown", "metadata": { "deletable": false, + "editable": false, "nbgrader": { - "checksum": "69182e8fadb9c9751f76786e0fcb8803", + "checksum": "18ad887c2f326ee59139b96860ce8893", "grade": false, - "grade_id": "cell-808cfcbf3891f39f", - "locked": false, + "grade_id": "cell-16214ea73a9b689e", + "locked": true, "schema_version": 1, - "solution": true + "solution": false } }, - "outputs": [ - { - "data": { - "text/markdown": [ - "File loaded!" - ], - "text/plain": [ - "" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], "source": [ - "%%ttl example\n", - "\n", - "# YOUR ANSWER HERE" + "## Example" ] }, { "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now run the tests executing the following cell. You will see they fail (as expected)." - ] - }, - { - "cell_type": "code", - "execution_count": null, "metadata": { "deletable": false, "editable": false, "nbgrader": { - "checksum": "7a21480d0282aed4f943d0c9d6ecd6e6", - "grade": true, - "grade_id": "cell-23e61b9f48d597fc", + "checksum": "ed2466715f57356f22ddeabfb101eb11", + "grade": false, + "grade_id": "cell-da88c2f8170436fe", "locked": true, - "points": 1, "schema_version": 1, "solution": false } }, - "outputs": [], "source": [ - "# This code tests that the definition above is correct.\n", - "g = solution('example')\n", - "test('Some triples have been loaded',\n", - " len(g))\n", - "test('A person has been defined',\n", - " g.subjects(RDF.type, term.URIRef('http://xmlns.com/foaf/0.1/Person')))\n", - "print('All tests passed. Well done!')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now replace #YOUR ANSWER HERE with your answer (the triples) and execute the test." + "\n", + "To make sure everything works, let's try first with an example exercise.\n", + "\n", + "Execute the code below, without modification.\n", + "\n", + "The definitio **is empty but valid**, so the output will be `The turtle syntax is correct.`." ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "deletable": false, + "nbgrader": { + "checksum": "69182e8fadb9c9751f76786e0fcb8803", + "grade": false, + "grade_id": "cell-808cfcbf3891f39f", + "locked": false, + "schema_version": 1, + "solution": true + } + }, "outputs": [], "source": [ "%%ttl example\n", + "\n", "# YOUR ANSWER HERE" ] }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "checksum": "60a9934c544eee9fc2c3745c36beb049", + "grade": false, + "grade_id": "cell-1c2ca86de107dec3", + "locked": true, + "schema_version": 1, + "solution": false + } + }, + "source": [ + "However, the definition is empty, so the tests for that definition **should fail**.\n", + "\n", + "Try it yourself by running the following line:" + ] + }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "checksum": "12b5c7170326932ff3c7e1688a5769b2", + "grade": false, + "grade_id": "cell-0154f8481bf393e8", + "locked": true, + "schema_version": 1, + "solution": false + } + }, "outputs": [], "source": [ - "# This code tests that the definition above is correct.\n", - "g = solution('example')\n", - "test('Some triples have been loaded',\n", - " len(g))\n", - "test('A person has been defined',\n", - " g.subjects(RDF.type, term.URIRef('http://xmlns.com/foaf/0.1/Person')))\n", - "print('All tests passed. Well done!')" + "# This will check your definition for the example.\n", + "check('example')" ] }, { @@ -458,22 +451,16 @@ "deletable": false, "editable": false, "nbgrader": { - "checksum": "61933311e76dd200b63032f42f26b11d", + "checksum": "b534d998c6d2e9f6bef8c2d88687a96b", "grade": false, - "grade_id": "cell-da88c2f8170436fe", + "grade_id": "cell-adc7e6b7e96e8788", "locked": true, "schema_version": 1, "solution": false } }, "source": [ - "Turtle is usually written in standalone files (e.g. `mydefinition.ttl`).\n", - "To write Turtle definitions inside our notebook, and to make it easy to test them, we will use a special magic command: `%%ttl`.\n", - "\n", - "To make sure everything works, let's try first with an example.\n", - "\n", - "\n", - "Copy/paste this solution below:\n", + "Now copy/paste the code below into the definition (below the `# YOUR ANSWER HERE` part), execute it, and run the test code again.\n", "\n", "```turtle\n", "@prefix foaf: .\n", @@ -487,7 +474,9 @@ " geo:lat \"34.0736111\" ;\n", " geo:lon \"-118.3994444\"\n", " ] .\n", - "```" + "```\n", + "\n", + "If you copied the file right, the tests should pass." ] }, { @@ -496,7 +485,7 @@ "deletable": false, "editable": false, "nbgrader": { - "checksum": "a64acf02625b48b3c65b6e1bc1ba6c1a", + "checksum": "67540252804835faea83d96aab87aa29", "grade": false, "grade_id": "cell-e73f1933742f7ab3", "locked": true, @@ -505,14 +494,14 @@ } }, "source": [ - "## Exercise 1: Definition of a Hotel" + "## Exercise 1: Definition of hotels and reviews" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "We will define some basic information about a hotel, and some reviews.\n", + "We will define some basic information about hotels, and some reviews.\n", "This should be the same type of information that some aggregators (e.g. TripAdvisor) offer in their websites.\n", "\n", "Namely, you need to define at least two hotels (you may add more than one), with the following information:\n", @@ -597,68 +586,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "deletable": false, - "editable": false, - "nbgrader": { - "checksum": "4f54963163a64f46058c86be139e5543", - "grade": true, - "grade_id": "definition-tests", - "locked": true, - "points": 10, - "schema_version": 1, - "solution": false - } - }, + "metadata": {}, "outputs": [], "source": [ - "g = solution('hotel')\n", - "test('Some triples are loaded',\n", - " len(g))\n", - "\n", - "hotels = set(g.subjects(RDF.type, schema['Hotel']))\n", - "test('At least 2 hotels are loaded',\n", - " hotels,\n", - " 2,\n", - " atLeast)\n", - "\n", - "for hotel in hotels:\n", - " if 'GSIHOTEL' in hotel: # Do not check the example hotel\n", - " continue\n", - " props = g.predicates(hotel)\n", - " test('Each hotel has all required properties',\n", - " props,\n", - " list(schema[i] for i in ['description', 'email', 'logo', 'priceRange']),\n", - " func=containsAll)\n", - "\n", - "reviews = set(g.subjects(RDF.type, schema['Review']))\n", - "test('At least 3 reviews are loaded',\n", - " reviews,\n", - " 3,\n", - " atLeast)\n", - "\n", - "for review in reviews:\n", - " props = g.predicates(review)\n", - " test('Each review has all required properties',\n", - " props,\n", - " list(schema[i] for i in ['itemReviewed', 'reviewBody', 'reviewRating']),\n", - " func=containsAll)\n", - " ratings = list(g.objects(review, schema['reviewRating']))\n", - " for rating in ratings:\n", - " value = g.value(rating, schema['ratingValue'])\n", - " test('The review should have ratings', value)\n", - "\n", - "authors = set(g.objects(None, schema['author']))\n", - "for author in authors:\n", - " for prop in g.predicates(author, None):\n", - " if 'name' in str(prop).lower():\n", - " break\n", - "else:\n", - " assert \"At least a reviewer has a name (surname, givenName...)\"\n", - "\n", - "print('All tests passed. Congratulations!')\n", - "print()\n", - "print('Now you can try to add the optional properties')" + "# This will check that your definition for the first exercise is correct.\n", + "check('hotel')" ] }, { @@ -967,13 +899,20 @@ "print_data('https://mastodon.social/@Gargron')" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now try some new sites:" + ] + }, { "cell_type": "code", "execution_count": null, "metadata": { "deletable": false, "nbgrader": { - "checksum": "fb8bd92e931c7836bb3c22dadd3be583", + "checksum": "bf8d215c42321236b783601e7d072a05", "grade": true, "grade_id": "cell-ff2413f45311f086", "locked": false, @@ -984,7 +923,6 @@ }, "outputs": [], "source": [ - "# Try some new sites here.\n", "# YOUR ANSWER HERE" ] }, @@ -1040,24 +978,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.5.5" - }, - "latex_envs": { - "LaTeX_envs_menu_present": true, - "autocomplete": true, - "bibliofile": "biblio.bib", - "cite_by": "apalike", - "current_citInitial": 1, - "eqLabelWithNumbers": true, - "eqNumInitial": 1, - "hotkeys": { - "equation": "Ctrl-E", - "itemize": "Ctrl-I" - }, - "labels_anchors": false, - "latex_user_defs": false, - "report_style_numbering": false, - "user_envs_cfg": false + "version": "3.7.2" } }, "nbformat": 4, diff --git a/rdf/helpers.py b/rdf/helpers.py index c280e72..5e7440a 100644 --- a/rdf/helpers.py +++ b/rdf/helpers.py @@ -1,6 +1,4 @@ import sys -import operator -import types from future.standard_library import install_aliases install_aliases() @@ -18,13 +16,11 @@ from IPython.core.magic import (register_line_magic, register_cell_magic, from IPython.display import HTML, display, Image, Markdown -schema = Namespace('http://schema.org/') - DEFINITIONS = {} def solution(exercise='default'): if exercise not in DEFINITIONS: - raise Exception('Solution for {} not found'.format(exercise)) + raise Exception('Solution for {} not found. Have you defined it?'.format(exercise)) return DEFINITIONS[exercise] @@ -51,16 +47,17 @@ If you don\'t know what this error means, try an online validator: http://ttl.su ''' global DEFINITIONS key = line or 'default' + DEFINITIONS[key] = None try: DEFINITIONS[key] = g.parse(data=cell, format="n3") except SyntaxError as ex: - return Markdown(msg.format(line=ex.lines, reason=ex._why)) + print(msg.format(line=ex.lines, reason=ex._why), file=sys.stderr) + raise Exception('Bad Turtle syntax') from None except Exception as ex: - return Markdown(msg.format(line='?', reason=ex)) - return Markdown('File loaded!') - - return HTML('Loaded!') #HTML('{}'.format(cell)) + print(msg.format(line='?', reason=ex), file=sys.stderr) + raise Exception('Bad Turtle syntax') from None + return Markdown('The Turtle syntax is correct.') def extract_data(url): @@ -101,7 +98,6 @@ def extract_data(url): fixedgraph = Graph() fixedgraph += [sanitize_triple(s) for s in g] -# print(g.serialize(format='turtle').decode('utf-8', errors='ignore')) return fixedgraph def turtle(g): @@ -119,30 +115,13 @@ def print_data(url): -def test(description, got, expected=None, func=None): - if isinstance(got, types.GeneratorType): - got = set(got) - try: - if expected is None: - func = func or operator.truth - expected = True - assert func(got) - else: - func = func or operator.eq - assert func(got, expected) - except AssertionError: - print('Test failed: {}'.format(description), file=sys.stderr) - print('\tExpected: {}'.format(expected), file=sys.stderr) - print('\tGot: {}'.format(got), file=sys.stderr) - raise Exception('Test failed: {}'.format(description)) - - -def atLeast(lst, number): - return len(set(lst))>=number - -def containsAll(lst, other): - for i in other: - if i not in lst: - print('{} not found'.format(i), file=sys.stderr) - return False - return True \ No newline at end of file +def check(testname): + import tests + + test = getattr(tests, 'test_{}'.format(testname), None) + if test is None: + raise Exception('There are no tests for {}'.format(testname)) + definition = solution(testname) + if definition is None: + raise Exception('The definition for {} is empty or invalid.'.format(testname)) + return test(definition) \ No newline at end of file diff --git a/rdf/tests.py b/rdf/tests.py new file mode 100644 index 0000000..118e852 --- /dev/null +++ b/rdf/tests.py @@ -0,0 +1,93 @@ +import operator +import types +import sys +from rdflib import term, RDF, Namespace + +schema = Namespace('http://schema.org/') + + +def test(description, got, expected=None, func=None): + if isinstance(got, types.GeneratorType): + got = set(got) + try: + if expected is None: + func = func or operator.truth + expected = True + assert func(got) + else: + func = func or operator.eq + assert func(got, expected) + except AssertionError: + print('Test failed: {}'.format(description), file=sys.stderr) + print('\tExpected: {}'.format(expected), file=sys.stderr) + print('\tGot: {}'.format(got), file=sys.stderr) + raise Exception('Test failed: {}'.format(description)) + + +def atLeast(lst, number): + return len(set(lst))>=number + + +def containsAll(lst, other): + for i in other: + if i not in lst: + print('{} not found'.format(i), file=sys.stderr) + return False + return True + + +def test_hotel(g): + test('Some triples are loaded', + len(g)) + + hotels = set(g.subjects(RDF.type, schema['Hotel'])) + test('At least 2 hotels are loaded', + hotels, + 2, + atLeast) + + for hotel in hotels: + if 'GSIHOTEL' in hotel: # Do not check the example hotel + continue + props = g.predicates(hotel) + test('Each hotel has all required properties', + props, + list(schema[i] for i in ['description', 'email', 'logo', 'priceRange']), + func=containsAll) + + reviews = set(g.subjects(RDF.type, schema['Review'])) + test('At least 3 reviews are loaded', + reviews, + 3, + atLeast) + + for review in reviews: + props = g.predicates(review) + test('Each review has all required properties', + props, + list(schema[i] for i in ['itemReviewed', 'reviewBody', 'reviewRating']), + func=containsAll) + ratings = list(g.objects(review, schema['reviewRating'])) + for rating in ratings: + value = g.value(rating, schema['ratingValue']) + test('The review should have ratings', value) + + authors = set(g.objects(None, schema['author'])) + for author in authors: + for prop in g.predicates(author, None): + if 'name' in str(prop).lower(): + break + else: + assert "At least a reviewer has a name (surname, givenName...)" + + print('All tests passed. Congratulations!') + print() + print('Now you can try to add the optional properties') + + +def test_example(g): + test('Some triples have been loaded', + len(g)) + test('A person has been defined', + g.subjects(RDF.type, term.URIRef('http://xmlns.com/foaf/0.1/Person'))) + print('All tests passed. Well done!') \ No newline at end of file