mirror of
				https://github.com/gsi-upm/sitc
				synced 2025-10-31 15:38:18 +00:00 
			
		
		
		
	Compare commits
	
		
			40 Commits
		
	
	
		
			332802176f
			...
			dveni-patc
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | 344e054ba4 | ||
|  | 2c8238f1f2 | ||
|  | e42299ac7a | ||
|  | 9d1b88dfea | ||
|  | ae3c34f94c | ||
|  | 06e51db4e3 | ||
|  | b42ba2fe58 | ||
|  | 0aa095b40d | ||
|  | ecfa8924c8 | ||
|  | b3239cbbab | ||
|  | 842b6307f1 | ||
|  | 762157bfe1 | ||
|  | d4b59e702d | ||
|  | 267421e5b8 | ||
|  | a1abd4b766 | ||
|  | c1d3ca38ea | ||
|  | a1be167cc0 | ||
|  | 4d339a1a83 | ||
|  | 47fe85d527 | ||
|  | f9965fdbcd | ||
|  | e824fd8fed | ||
|  | 76d08a9e40 | ||
|  | ba5bb34eb2 | ||
|  | 4f12fac0de | ||
|  | fafce65bd3 | ||
|  | 9332fd6f80 | ||
|  | d551ee44c5 | ||
|  | e573852e70 | ||
|  | 1086b9818a | ||
|  | f039465f5e | ||
|  | a4f8f69b19 | ||
|  | fc07718ae8 | ||
|  | 1f5318a357 | ||
|  | 9937490213 | ||
|  | ea0b4748b6 | ||
|  | 139a6f5d69 | ||
|  | df6a148907 | ||
|  | 13b3b2606a | ||
|  | 1b893c1097 | ||
|  | d857869c06 | 
| @@ -14,10 +14,42 @@ Also note that we have a code of conduct, please follow it in all your interacti | ||||
| 2. If you are adding code, ensure the changed notebooks can be run in a fresh environment. Include instructions to download | ||||
| any additional dependencies. | ||||
| 3. Ensure any spurious changes are removed, such as compilation files (`pyc`) or metadata changes in a notebook. | ||||
| You can automatically do so using nbstripout: | ||||
| ``` | ||||
| pip install nbstripout | ||||
| nbstripout --install | ||||
| ``` | ||||
| This will install a git hook that strips all metadata from the notebooks before you commit changes to git. | ||||
|  | ||||
| You can also remove the output from all cells using this command: | ||||
|  | ||||
| ``` | ||||
| make clean | ||||
| ``` | ||||
|  | ||||
| To limit the command to a specific folder (e.g. ml1): | ||||
| ``` | ||||
| make FOLDER=ml1 clean | ||||
| ``` | ||||
| 4. Submit your pull request on GitHub. | ||||
| 5. A member of the GSI-UPM group will review your request. | ||||
| 6. The reviewer may ask for further changes before merging the contribution. Please, follow the reviewer's instructions before resubmitting. | ||||
|  | ||||
|  | ||||
| ## Testing the changes | ||||
|  | ||||
| You can execute all notebooks at once to make sure they work with this command: | ||||
|  | ||||
| ``` | ||||
| make exec | ||||
| ``` | ||||
|  | ||||
| To only check notebooks in a specific folder, run: | ||||
|  | ||||
| ``` | ||||
| make FOLDER=ml2 exec  # Run all notebooks in the ml2 folder | ||||
| ``` | ||||
|  | ||||
| ## Code of Conduct | ||||
|  | ||||
| ### Our Pledge | ||||
|   | ||||
							
								
								
									
										11
									
								
								Makefile
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										11
									
								
								Makefile
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,11 @@ | ||||
| FOLDER:=. | ||||
| ERROR:=255 | ||||
|  | ||||
| exec: | ||||
| 	find $(FOLDER) -iname '*.ipynb' -print0 | xargs -n 1 -0 sh -c 'jupyter nbconvert --execute --ClearOutputPreprocessor.enabled=True --inplace $$0 || exit $(ERROR)' | ||||
|  | ||||
| clean: | ||||
| 	find $(FOLDER) -iname '*.ipynb' -print0 | xargs -n 1 -0 sh -c 'nbstripout $$0 || exit $(ERROR)' | ||||
|  | ||||
|  | ||||
| .PHONY: exec clean | ||||
							
								
								
									
										1852
									
								
								lod/01_SPARQL_Introduction.ipynb
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										1852
									
								
								lod/01_SPARQL_Introduction.ipynb
									
									
									
									
									
										Executable file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										457
									
								
								lod/02_SPARQL_Custom_Endpoint.ipynb
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										457
									
								
								lod/02_SPARQL_Custom_Endpoint.ipynb
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,457 @@ | ||||
| { | ||||
|  "cells": [ | ||||
|   { | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": { | ||||
|     "deletable": false, | ||||
|     "editable": false, | ||||
|     "nbgrader": { | ||||
|      "checksum": "7276f055a8c504d3c80098c62ed41a4f", | ||||
|      "grade": false, | ||||
|      "grade_id": "cell-0bfe38f97f6ab2d2", | ||||
|      "locked": true, | ||||
|      "schema_version": 1, | ||||
|      "solution": false | ||||
|     } | ||||
|    }, | ||||
|    "source": [ | ||||
|     "<header style=\"width:100%;position:relative\">\n", | ||||
|     "  <div style=\"width:80%;float:right;\">\n", | ||||
|     "    <h1>Course Notes for Learning Intelligent Systems</h1>\n", | ||||
|     "    <h3>Department of Telematic Engineering Systems</h3>\n", | ||||
|     "    <h5>Universidad Politécnica de Madrid</h5>\n", | ||||
|     "  </div>\n", | ||||
|     "        <img style=\"width:15%;\" src=\"../logo.jpg\" alt=\"UPM\" />\n", | ||||
|     "</header>" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": { | ||||
|     "deletable": false, | ||||
|     "editable": false, | ||||
|     "nbgrader": { | ||||
|      "checksum": "42642609861283bc33914d16750b7efa", | ||||
|      "grade": false, | ||||
|      "grade_id": "cell-0cd673883ee592d1", | ||||
|      "locked": true, | ||||
|      "schema_version": 1, | ||||
|      "solution": false | ||||
|     } | ||||
|    }, | ||||
|    "source": [ | ||||
|     "## Introduction\n", | ||||
|     "\n", | ||||
|     "In the previous notebook, we learnt how to use SPARQL by querying DBpedia.\n", | ||||
|     "\n", | ||||
|     "In this notebook, we will use SPARQL on manually annotated data. The data was collected as part of a [previous exercise](../lod/).\n", | ||||
|     "\n", | ||||
|     "The goal is to try SPARQL with data annotated by users with limited knowledge of vocabularies and semantics, and to compare the experience with similar queries to a more structured dataset.\n", | ||||
|     "\n", | ||||
|     "Hence, there are two parts.\n", | ||||
|     "First, you will query a set of graphs annotated by students of this course.\n", | ||||
|     "Then, you will query a synthetic dataset that contains similar information." | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": { | ||||
|     "deletable": false, | ||||
|     "editable": false, | ||||
|     "nbgrader": { | ||||
|      "checksum": "a3ecb4b300a5ab82376a4a8cb01f7e6b", | ||||
|      "grade": false, | ||||
|      "grade_id": "cell-10264483046abcc4", | ||||
|      "locked": true, | ||||
|      "schema_version": 1, | ||||
|      "solution": false | ||||
|     } | ||||
|    }, | ||||
|    "source": [ | ||||
|     "## Objectives\n", | ||||
|     "\n", | ||||
|     "* Experiencing the usefulness of the Linked Open Data initiative by querying data from different RDF graphs and endpoints\n", | ||||
|     "* Understanding the challenges in querying multiple sources, with different annotators.\n" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": { | ||||
|     "deletable": false, | ||||
|     "editable": false, | ||||
|     "nbgrader": { | ||||
|      "checksum": "2fedf0d73fc90104d1ab72c3413dfc83", | ||||
|      "grade": false, | ||||
|      "grade_id": "cell-4f8492996e74bf20", | ||||
|      "locked": true, | ||||
|      "schema_version": 1, | ||||
|      "solution": false | ||||
|     } | ||||
|    }, | ||||
|    "source": [ | ||||
|     "## Tools\n", | ||||
|     "\n", | ||||
|     "See [the SPARQL notebook](./01_SPARQL_Introduction.ipynb#Tools)" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": { | ||||
|     "deletable": false, | ||||
|     "editable": false, | ||||
|     "nbgrader": { | ||||
|      "checksum": "c5f8646518bd832a47d71f9d3218237a", | ||||
|      "grade": false, | ||||
|      "grade_id": "cell-eb13908482825e42", | ||||
|      "locked": true, | ||||
|      "schema_version": 1, | ||||
|      "solution": false | ||||
|     } | ||||
|    }, | ||||
|    "source": [ | ||||
|     "Run this line to enable the `%%sparql` magic command." | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "from helpers import *" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "## Exercises\n" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "Querying the manually annotated dataset will be slightly different from querying DBpedia.\n", | ||||
|     "The main difference is that this dataset uses different graphs to separate the annotations from different students.\n", | ||||
|     "\n", | ||||
|     "**Each graph is a separate set of triples**.\n", | ||||
|     "For this exercise, you could think of graphs as individual endpoints.\n", | ||||
|     "\n", | ||||
|     "\n", | ||||
|     "First, let us get a list of graphs available:" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "%%sparql http://fuseki.cluster.gsi.dit.upm.es/hotels\n", | ||||
|     "    \n", | ||||
|     "SELECT ?g (COUNT(?s) as ?count) WHERE {\n", | ||||
|     "    GRAPH ?g {\n", | ||||
|     "        ?s ?p ?o\n", | ||||
|     "    }\n", | ||||
|     "}\n", | ||||
|     "GROUP BY ?g\n", | ||||
|     "ORDER BY desc(?count)" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "You should see many graphs, with different triple counts.\n", | ||||
|     "\n", | ||||
|     "The biggest one should be http://fuseki.cluster.gsi.dit.upm.es/synthetic" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "Once you have this list, you can query specific graphs like so:" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "%%sparql http://fuseki.cluster.gsi.dit.upm.es/hotels\n", | ||||
|     "    \n", | ||||
|     "SELECT *\n", | ||||
|     "WHERE {\n", | ||||
|     "    GRAPH <http://fuseki.cluster.gsi.dit.upm.es/synthetic>{\n", | ||||
|     "    ?s ?p ?o .\n", | ||||
|     "    }\n", | ||||
|     "}\n", | ||||
|     "LIMIT 10" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "There are two exercises in this notebook.\n", | ||||
|     "\n", | ||||
|     "In each of them, you are asked to run five queries, to answer the following questions:\n", | ||||
|     "\n", | ||||
|     "* Number of hotels (or entities) with reviews\n", | ||||
|     "* Number of reviews\n", | ||||
|     "* The hotel with the lowest average score\n", | ||||
|     "* The hotel with the highest average score\n", | ||||
|     "* A list of hotels with their addresses and telephone numbers" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "### Manually annotated data" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "Your task is to design five queries to answer the questions in the description, and run each of them in at least three graphs, other than the `synthetic` graph.\n", | ||||
|     "\n", | ||||
|     "To design the queries, what you know about the schema.org vocabularies, or explore subjects, predicates and objects in each of the graphs.\n", | ||||
|     "\n", | ||||
|     "Here's an example to get the entities and their types in a graph:" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "%%sparql http://fuseki.cluster.gsi.dit.upm.es/hotels\n", | ||||
|     "\n", | ||||
|     "PREFIX schema: <http://schema.org/>\n", | ||||
|     "    \n", | ||||
|     "SELECT ?s ?o\n", | ||||
|     "WHERE {\n", | ||||
|     "    GRAPH <http://fuseki.cluster.gsi.dit.upm.es/35c20a49f8c6581be1cf7bd56d12d131>{\n", | ||||
|     "        ?s a ?o .\n", | ||||
|     "    }\n", | ||||
|     "\n", | ||||
|     "}\n", | ||||
|     "LIMIT 40" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "### Synthetic dataset\n", | ||||
|     "\n", | ||||
|     "Now, run the same queries in the synthetic dataset.\n", | ||||
|     "\n", | ||||
|     "The query below should get you started:" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "%%sparql http://fuseki.cluster.gsi.dit.upm.es/hotels\n", | ||||
|     "    \n", | ||||
|     "SELECT *\n", | ||||
|     "WHERE {\n", | ||||
|     "    GRAPH <http://fuseki.cluster.gsi.dit.upm.es/synthetic>{\n", | ||||
|     "    ?s ?p ?o .\n", | ||||
|     "    }\n", | ||||
|     "}\n", | ||||
|     "LIMIT 10" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "### Optional exercise\n", | ||||
|     "\n", | ||||
|     "\n", | ||||
|     "Explore the graphs and find the most typical mistakes (e.g. using `http://schema.org/Hotel/Hotel`).\n", | ||||
|     "\n", | ||||
|     "Tip: You can use normal SPARQL queries with `BOUND` and `REGEX` to check if the annotations are correct.\n", | ||||
|     "\n", | ||||
|     "You can also query all the graphs at the same time. e.g. to get all types used:" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "%%sparql http://fuseki.cluster.gsi.dit.upm.es/hotels\n", | ||||
|     "\n", | ||||
|     "PREFIX schema: <http://schema.org/>\n", | ||||
|     "    \n", | ||||
|     "SELECT DISTINCT ?o\n", | ||||
|     "WHERE {\n", | ||||
|     "    GRAPH ?g {\n", | ||||
|     "        ?s a ?o .\n", | ||||
|     "    }\n", | ||||
|     "    {\n", | ||||
|     "        SELECT ?g\n", | ||||
|     "        WHERE {\n", | ||||
|     "           GRAPH ?g {}\n", | ||||
|     "           FILTER (str(?g) != 'http://fuseki.cluster.gsi.dit.upm.es/synthetic')\n", | ||||
|     "        }\n", | ||||
|     "    }\n", | ||||
|     "\n", | ||||
|     "\n", | ||||
|     "}\n", | ||||
|     "LIMIT 50" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "### Discussion\n", | ||||
|     "\n", | ||||
|     "Compare the results of the synthetic and the manual dataset, and answer these questions:" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "Both datasets should use the same schema. Are there any differences when it comes to using them?" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "deletable": false, | ||||
|     "nbgrader": { | ||||
|      "checksum": "860c3977cd06736f1342d535944dbb63", | ||||
|      "grade": true, | ||||
|      "grade_id": "cell-9bd08e4f5842cb89", | ||||
|      "locked": false, | ||||
|      "points": 0, | ||||
|      "schema_version": 1, | ||||
|      "solution": true | ||||
|     } | ||||
|    }, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# YOUR ANSWER HERE" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "Are the annotations used correctly in every graph?" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "deletable": false, | ||||
|     "nbgrader": { | ||||
|      "checksum": "1946a7ed4aba8d168bb3fad898c05651", | ||||
|      "grade": true, | ||||
|      "grade_id": "cell-9dc1c9033198bb18", | ||||
|      "locked": false, | ||||
|      "points": 0, | ||||
|      "schema_version": 1, | ||||
|      "solution": true | ||||
|     } | ||||
|    }, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# YOUR ANSWER HERE" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "Has any of the datasets been harder to query? If so, why?" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "deletable": false, | ||||
|     "nbgrader": { | ||||
|      "checksum": "6714abc5226618b76dc4c1aaed6d1a49", | ||||
|      "grade": true, | ||||
|      "grade_id": "cell-6c18003ced54be23", | ||||
|      "locked": false, | ||||
|      "points": 0, | ||||
|      "schema_version": 1, | ||||
|      "solution": true | ||||
|     } | ||||
|    }, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# YOUR ANSWER HERE" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "## References" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "* [RDFLib documentation](https://rdflib.readthedocs.io/en/stable/).\n", | ||||
|     "* [Wikidata Query Service query examples](https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries/examples)" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "## Licence\n", | ||||
|     "The notebook is freely licensed under under the [Creative Commons Attribution Share-Alike license](https://creativecommons.org/licenses/by/2.0/).  \n", | ||||
|     "\n", | ||||
|     "© Universidad Politécnica de Madrid." | ||||
|    ] | ||||
|   } | ||||
|  ], | ||||
|  "metadata": { | ||||
|   "kernelspec": { | ||||
|    "display_name": "Python 3", | ||||
|    "language": "python", | ||||
|    "name": "python3" | ||||
|   }, | ||||
|   "language_info": { | ||||
|    "codemirror_mode": { | ||||
|     "name": "ipython", | ||||
|     "version": 3 | ||||
|    }, | ||||
|    "file_extension": ".py", | ||||
|    "mimetype": "text/x-python", | ||||
|    "name": "python", | ||||
|    "nbconvert_exporter": "python", | ||||
|    "pygments_lexer": "ipython3", | ||||
|    "version": "3.7.2" | ||||
|   } | ||||
|  }, | ||||
|  "nbformat": 4, | ||||
|  "nbformat_minor": 2 | ||||
| } | ||||
| @@ -1,11 +0,0 @@ | ||||
| # Files included # | ||||
|  | ||||
| * `validate.py` validates and serializes a turtle dataset | ||||
| * `sparql.py` runs a custom sparql query on a given dataset (by default, `reviews.ttl`) | ||||
| * `extract_data.py` extracts RDFa, micro-data and JSON-LD data from a given URL | ||||
|  | ||||
| # Installation # | ||||
|  | ||||
| ``` | ||||
| pip install --user -r requirements.txt | ||||
| ``` | ||||
							
								
								
									
										1880
									
								
								lod/SPARQL.ipynb
									
									
									
									
									
								
							
							
						
						
									
										1880
									
								
								lod/SPARQL.ipynb
									
									
									
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @@ -1,49 +0,0 @@ | ||||
|  | ||||
| import sys | ||||
| from future.standard_library import install_aliases | ||||
| install_aliases() | ||||
|  | ||||
| from urllib import request, parse | ||||
| from rdflib import Graph, term | ||||
| from lxml import etree | ||||
|  | ||||
| if len(sys.argv) < 2: | ||||
|     print('Usage: python {} <URL>'.format(sys.argv[0])) | ||||
|     print('') | ||||
|     print('Extract rdfa, microdata and json-ld annotations from a website') | ||||
|     exit(1) | ||||
|  | ||||
| url = sys.argv[1] | ||||
|  | ||||
| g = Graph() | ||||
| g.parse(url, format='rdfa') | ||||
| g.parse(url, format='microdata') | ||||
|  | ||||
|  | ||||
| def sanitize_triple(t): | ||||
|     """Function to remove bad URIs from the graph that would otherwise | ||||
|     make the serialization fail.""" | ||||
|     def sanitize_triple_item(item): | ||||
|         if isinstance(item, term.URIRef) and '/' not in item: | ||||
|             return term.URIRef(parse.quote(str(item))) | ||||
|         return item | ||||
|  | ||||
|     return (sanitize_triple_item(t[0]), | ||||
|             sanitize_triple_item(t[1]), | ||||
|             sanitize_triple_item(t[2])) | ||||
|  | ||||
|  | ||||
| with request.urlopen(url) as response: | ||||
|     # Get all json-ld objects embedded in the html file | ||||
|     html = response.read().decode('utf-8', errors='ignore') | ||||
|     parser = etree.XMLParser(recover=True) | ||||
|     root = etree.fromstring(html, parser=parser) | ||||
|     if root: | ||||
|         for jsonld in root.findall(".//script[@type='application/ld+json']"): | ||||
|             g.parse(data=jsonld.text, publicID=url, format='json-ld') | ||||
|  | ||||
|  | ||||
| fixedgraph = Graph() | ||||
| fixedgraph += [sanitize_triple(s) for s in g] | ||||
|  | ||||
| print(g.serialize(format='turtle').decode('utf-8', errors='ignore')) | ||||
| @@ -1,12 +1,22 @@ | ||||
| ''' | ||||
| Helper functions and ipython magic for the SPARQL exercises. | ||||
|  | ||||
| The tests in the notebooks rely on the `LAST_QUERY` variable, which is updated by the `%%sparql` magic after every query. | ||||
| This variable contains the full query used (`LAST_QUERY["query"]`), the endpoint it was sent to (`LAST_QUERY["endpoint"]`), and a dictionary with the response of the endpoint (`LAST_QUERY["results"]`). | ||||
| For convenience, the results are also given as tuples (`LAST_QUERY["tuples"]`), and as a dictionary of of `{column:[values]}` (`LAST_QUERY["columns"]`). | ||||
| ''' | ||||
| from IPython.core.magic import (register_line_magic, register_cell_magic, | ||||
|                                 register_line_cell_magic) | ||||
|  | ||||
| from IPython.display import HTML, display, Image | ||||
| from IPython.display import HTML, display, Image, display_javascript | ||||
| from urllib.request import Request, urlopen | ||||
| from urllib.parse import quote_plus, urlencode | ||||
| from urllib.error import HTTPError | ||||
|  | ||||
| import json | ||||
| import sys | ||||
|  | ||||
| js = "IPython.CodeCell.options_default.highlight_modes['magic_sparql'] = {'reg':[/^%%sparql/]};" | ||||
| display_javascript(js, raw=True) | ||||
|  | ||||
|  | ||||
| def send_query(query, endpoint): | ||||
| @@ -20,7 +30,11 @@ def send_query(query, endpoint): | ||||
|                 headers={'content-type': 'application/x-www-form-urlencoded', | ||||
|                          'accept': FORMATS}, | ||||
|                 method='POST') | ||||
|     return json.loads(urlopen(r).read().decode('utf-8')); | ||||
|     res = urlopen(r) | ||||
|     data = res.read().decode('utf-8') | ||||
|     if res.getcode() == 200: | ||||
|         return json.loads(data) | ||||
|     raise Exception('Error getting results: {}'.format(data)) | ||||
|  | ||||
|  | ||||
| def tabulate(tuples, header=None): | ||||
| @@ -39,11 +53,14 @@ def tabulate(tuples, header=None): | ||||
|  | ||||
| LAST_QUERY = {} | ||||
|  | ||||
| def solution(): | ||||
|     return LAST_QUERY | ||||
|  | ||||
|  | ||||
| def query(query, endpoint=None, print_table=False): | ||||
|     global LAST_QUERY | ||||
|  | ||||
|     endpoint = endpoint or "http://dbpedia.org/sparql" | ||||
|     endpoint = endpoint or "http://fuseki.cluster.gsi.dit.upm.es/sitc/" | ||||
|     results = send_query(query, endpoint) | ||||
|     tuples = to_table(results) | ||||
|  | ||||
| @@ -80,12 +97,30 @@ def to_table(results): | ||||
|  | ||||
| @register_cell_magic | ||||
| def sparql(line, cell): | ||||
|     ''' | ||||
|     Sparql magic command for ipython. It can be used in a cell like this: | ||||
|      | ||||
|     ``` | ||||
|     %%sparql | ||||
|      | ||||
|     ... Your SPARQL query ... | ||||
|      | ||||
|     ``` | ||||
|      | ||||
|     by default, it will use the DBpedia endpoint, but you can use a different endpoint like this: | ||||
|      | ||||
|     ``` | ||||
|     %%sparql http://my-sparql-endpoint... | ||||
|      | ||||
|     ... Your SPARQL query ... | ||||
|     ``` | ||||
|     ''' | ||||
|     try: | ||||
|         return query(cell, endpoint=line, print_table=True) | ||||
|     except HTTPError as ex: | ||||
|         error_message = ex.read().decode('utf-8') | ||||
|         print('Error {}. Reason: {}'.format(ex.status, ex.reason)) | ||||
|         print(error_message) | ||||
|         print(error_message, file=sys.stderr) | ||||
|  | ||||
|  | ||||
| def show_photos(values): | ||||
|   | ||||
| @@ -1,29 +0,0 @@ | ||||
| @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> . | ||||
| @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> . | ||||
| @prefix schema: <http://schema.org/> . | ||||
|  | ||||
|  | ||||
| _:Hotel1 a schema:Hotel ; | ||||
|          schema:description "A fictitious hotel" . | ||||
|  | ||||
|  | ||||
| _:Review1 a schema:Review ; | ||||
|           schema:reviewBody "This is a great review" ; | ||||
|           schema:reviewRating [ | ||||
|            a schema:Rating ; | ||||
|            schema:author <http://jfernando.es/me> ; | ||||
|            schema:ratingValue "0.7" | ||||
|             | ||||
|           ] ; | ||||
|           schema:itemReviewed _:Hotel1 . | ||||
|  | ||||
|  | ||||
| _:Review2 a schema:Review ; | ||||
|           schema:reviewBody "This is a not so great review" ; | ||||
|           schema:reviewRating [ | ||||
|            a schema:Rating ; | ||||
|            schema:author [ a schema:Person ; | ||||
|            schema:givenName "anonymous" ] ; | ||||
|            schema:ratingValue "0.3" | ||||
|           ] ; | ||||
|           schema:itemReviewed _:Hotel1 . | ||||
| @@ -1,23 +0,0 @@ | ||||
| # !/bin/env python # | ||||
| # Ejemplo de consultas SPARQL sobre turtle # | ||||
| # python consultas.py # | ||||
| import rdflib | ||||
| import sys | ||||
|  | ||||
| dataset = sys.argv[1] if len(sys.argv) > 1 else 'reviews.ttl' | ||||
| g = rdflib.Graph() | ||||
|  | ||||
| schema = rdflib.Namespace("http://schema.org/") | ||||
|  | ||||
| # Read Turtle file # | ||||
| g.parse(dataset, format='turtle') | ||||
|  | ||||
| results = g.query( | ||||
|     """SELECT DISTINCT ?review ?p ?o | ||||
|        WHERE { | ||||
|           ?review a schema:Review. | ||||
|           ?review ?p ?o. | ||||
|        }""", initNs={'schema': schema}) | ||||
|  | ||||
| for row in results: | ||||
|     print("%s %s %s" % row) | ||||
| @@ -1,6 +0,0 @@ | ||||
| import rdflib | ||||
| import sys | ||||
| g = rdflib.Graph() | ||||
| dataset = sys.argv[1] if len(sys.argv) > 1 else 'reviews.ttl' | ||||
| g.parse(dataset, format="n3") | ||||
| print(g.serialize(format="n3").decode('utf-8')) | ||||
| @@ -18,7 +18,7 @@ | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, © 2016 Carlos A. Iglesias" | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, © Carlos A. Iglesias" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
| @@ -82,7 +82,7 @@ | ||||
|     "## Licence\n", | ||||
|     "The notebook is freely licensed under under the [Creative Commons Attribution Share-Alike license](https://creativecommons.org/licenses/by/2.0/).  \n", | ||||
|     "\n", | ||||
|     "© 2016 Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|     "©  Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|    ] | ||||
|   } | ||||
|  ], | ||||
| @@ -102,9 +102,26 @@ | ||||
|    "name": "python", | ||||
|    "nbconvert_exporter": "python", | ||||
|    "pygments_lexer": "ipython3", | ||||
|    "version": "3.5.2" | ||||
|    "version": "3.6.7" | ||||
|   }, | ||||
|   "latex_envs": { | ||||
|    "LaTeX_envs_menu_present": true, | ||||
|    "autocomplete": true, | ||||
|    "bibliofile": "biblio.bib", | ||||
|    "cite_by": "apalike", | ||||
|    "current_citInitial": 1, | ||||
|    "eqLabelWithNumbers": true, | ||||
|    "eqNumInitial": 1, | ||||
|    "hotkeys": { | ||||
|     "equation": "Ctrl-E", | ||||
|     "itemize": "Ctrl-I" | ||||
|    }, | ||||
|    "labels_anchors": false, | ||||
|    "latex_user_defs": false, | ||||
|    "report_style_numbering": false, | ||||
|    "user_envs_cfg": false | ||||
|   } | ||||
|  }, | ||||
|  "nbformat": 4, | ||||
|  "nbformat_minor": 0 | ||||
|  "nbformat_minor": 1 | ||||
| } | ||||
|   | ||||
| @@ -18,7 +18,7 @@ | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, © 2016 Carlos A. Iglesias" | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, ©  Carlos A. Iglesias" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
| @@ -75,7 +75,7 @@ | ||||
|     "## LIcence\n", | ||||
|     "The notebook is freely licensed under under the [Creative Commons Attribution Share-Alike license](https://creativecommons.org/licenses/by/2.0/).  \n", | ||||
|     "\n", | ||||
|     "© 2016 Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|     "©  Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|    ] | ||||
|   } | ||||
|  ], | ||||
| @@ -95,9 +95,26 @@ | ||||
|    "name": "python", | ||||
|    "nbconvert_exporter": "python", | ||||
|    "pygments_lexer": "ipython3", | ||||
|    "version": "3.5.1" | ||||
|    "version": "3.6.7" | ||||
|   }, | ||||
|   "latex_envs": { | ||||
|    "LaTeX_envs_menu_present": true, | ||||
|    "autocomplete": true, | ||||
|    "bibliofile": "biblio.bib", | ||||
|    "cite_by": "apalike", | ||||
|    "current_citInitial": 1, | ||||
|    "eqLabelWithNumbers": true, | ||||
|    "eqNumInitial": 1, | ||||
|    "hotkeys": { | ||||
|     "equation": "Ctrl-E", | ||||
|     "itemize": "Ctrl-I" | ||||
|    }, | ||||
|    "labels_anchors": false, | ||||
|    "latex_user_defs": false, | ||||
|    "report_style_numbering": false, | ||||
|    "user_envs_cfg": false | ||||
|   } | ||||
|  }, | ||||
|  "nbformat": 4, | ||||
|  "nbformat_minor": 0 | ||||
|  "nbformat_minor": 1 | ||||
| } | ||||
|   | ||||
| @@ -18,7 +18,7 @@ | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, © 2016 Carlos A. Iglesias" | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, ©  Carlos A. Iglesias" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
| @@ -105,8 +105,8 @@ | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "In addition, scikit-learn helps in several tasks:\n", | ||||
|     "* ** Model selection**: Comparing, validating, choosing parameters and models, and persisting models. Some of the [available functionalities](http://scikit-learn.org/stable/model_selection.html#model-selection) are cross-validation or grid search for optimizing the parameters. \n", | ||||
|     "* ** Preprocessing**: Several common utility functions and transformer classes to change raw feature vectors into a representation that is more suitable for the downstream estimators. Some of the available [preprocessing functions](http://scikit-learn.org/stable/modules/preprocessing.html#preprocessing) are scaling and normalizing data, or imputing missing values." | ||||
|     "* **Model selection**: Comparing, validating, choosing parameters and models, and persisting models. Some of the [available functionalities](http://scikit-learn.org/stable/model_selection.html#model-selection) are cross-validation or grid search for optimizing the parameters. \n", | ||||
|     "* **Preprocessing**: Several common utility functions and transformer classes to change raw feature vectors into a representation that is more suitable for the downstream estimators. Some of the available [preprocessing functions](http://scikit-learn.org/stable/modules/preprocessing.html#preprocessing) are scaling and normalizing data, or imputing missing values." | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
| @@ -122,6 +122,8 @@ | ||||
|    "source": [ | ||||
|     "If you installed the conda distribution, scikit-learn is already installed! This is the best option.\n", | ||||
|     "\n", | ||||
|     "Anyway, before starting, update all the packages: `conda update --all`. \n", | ||||
|     "\n", | ||||
|     "In case it is an old installation, you can update it using conda: `conda update scikit-learn`.\n", | ||||
|     "\n", | ||||
|     "If it is not installed, install it with conda: `conda install scikit-learn`.\n", | ||||
| @@ -156,7 +158,7 @@ | ||||
|     "## Licence\n", | ||||
|     "The notebook is freely licensed under under the [Creative Commons Attribution Share-Alike license](https://creativecommons.org/licenses/by/2.0/).  \n", | ||||
|     "\n", | ||||
|     "© 2016 Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|     "© Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|    ] | ||||
|   } | ||||
|  ], | ||||
| @@ -176,9 +178,26 @@ | ||||
|    "name": "python", | ||||
|    "nbconvert_exporter": "python", | ||||
|    "pygments_lexer": "ipython3", | ||||
|    "version": "3.5.1+" | ||||
|    "version": "3.6.7" | ||||
|   }, | ||||
|   "latex_envs": { | ||||
|    "LaTeX_envs_menu_present": true, | ||||
|    "autocomplete": true, | ||||
|    "bibliofile": "biblio.bib", | ||||
|    "cite_by": "apalike", | ||||
|    "current_citInitial": 1, | ||||
|    "eqLabelWithNumbers": true, | ||||
|    "eqNumInitial": 1, | ||||
|    "hotkeys": { | ||||
|     "equation": "Ctrl-E", | ||||
|     "itemize": "Ctrl-I" | ||||
|    }, | ||||
|    "labels_anchors": false, | ||||
|    "latex_user_defs": false, | ||||
|    "report_style_numbering": false, | ||||
|    "user_envs_cfg": false | ||||
|   } | ||||
|  }, | ||||
|  "nbformat": 4, | ||||
|  "nbformat_minor": 0 | ||||
|  "nbformat_minor": 1 | ||||
| } | ||||
|   | ||||
| @@ -8,7 +8,7 @@ | ||||
|     "\n", | ||||
|     "# Course Notes for Learning Intelligent Systems\n", | ||||
|     "\n", | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, © 2016 Carlos A. Iglesias\n", | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, ©  Carlos A. Iglesias\n", | ||||
|     "\n", | ||||
|     "## [Introduction to Machine Learning](2_0_0_Intro_ML.ipynb)" | ||||
|    ] | ||||
| @@ -68,10 +68,8 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 8, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# import datasets from scikit-learn\n", | ||||
| @@ -90,22 +88,9 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 9, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "data": { | ||||
|       "text/plain": [ | ||||
|        "sklearn.datasets.base.Bunch" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 9, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "#type 'bunch' of a dataset\n", | ||||
|     "type(iris)" | ||||
| @@ -113,80 +98,9 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 10, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "name": "stdout", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "Iris Plants Database\n", | ||||
|       "\n", | ||||
|       "Notes\n", | ||||
|       "-----\n", | ||||
|       "Data Set Characteristics:\n", | ||||
|       "    :Number of Instances: 150 (50 in each of three classes)\n", | ||||
|       "    :Number of Attributes: 4 numeric, predictive attributes and the class\n", | ||||
|       "    :Attribute Information:\n", | ||||
|       "        - sepal length in cm\n", | ||||
|       "        - sepal width in cm\n", | ||||
|       "        - petal length in cm\n", | ||||
|       "        - petal width in cm\n", | ||||
|       "        - class:\n", | ||||
|       "                - Iris-Setosa\n", | ||||
|       "                - Iris-Versicolour\n", | ||||
|       "                - Iris-Virginica\n", | ||||
|       "    :Summary Statistics:\n", | ||||
|       "\n", | ||||
|       "    ============== ==== ==== ======= ===== ====================\n", | ||||
|       "                    Min  Max   Mean    SD   Class Correlation\n", | ||||
|       "    ============== ==== ==== ======= ===== ====================\n", | ||||
|       "    sepal length:   4.3  7.9   5.84   0.83    0.7826\n", | ||||
|       "    sepal width:    2.0  4.4   3.05   0.43   -0.4194\n", | ||||
|       "    petal length:   1.0  6.9   3.76   1.76    0.9490  (high!)\n", | ||||
|       "    petal width:    0.1  2.5   1.20  0.76     0.9565  (high!)\n", | ||||
|       "    ============== ==== ==== ======= ===== ====================\n", | ||||
|       "\n", | ||||
|       "    :Missing Attribute Values: None\n", | ||||
|       "    :Class Distribution: 33.3% for each of 3 classes.\n", | ||||
|       "    :Creator: R.A. Fisher\n", | ||||
|       "    :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)\n", | ||||
|       "    :Date: July, 1988\n", | ||||
|       "\n", | ||||
|       "This is a copy of UCI ML iris datasets.\n", | ||||
|       "http://archive.ics.uci.edu/ml/datasets/Iris\n", | ||||
|       "\n", | ||||
|       "The famous Iris database, first used by Sir R.A Fisher\n", | ||||
|       "\n", | ||||
|       "This is perhaps the best known database to be found in the\n", | ||||
|       "pattern recognition literature.  Fisher's paper is a classic in the field and\n", | ||||
|       "is referenced frequently to this day.  (See Duda & Hart, for example.)  The\n", | ||||
|       "data set contains 3 classes of 50 instances each, where each class refers to a\n", | ||||
|       "type of iris plant.  One class is linearly separable from the other 2; the\n", | ||||
|       "latter are NOT linearly separable from each other.\n", | ||||
|       "\n", | ||||
|       "References\n", | ||||
|       "----------\n", | ||||
|       "   - Fisher,R.A. \"The use of multiple measurements in taxonomic problems\"\n", | ||||
|       "     Annual Eugenics, 7, Part II, 179-188 (1936); also in \"Contributions to\n", | ||||
|       "     Mathematical Statistics\" (John Wiley, NY, 1950).\n", | ||||
|       "   - Duda,R.O., & Hart,P.E. (1973) Pattern Classification and Scene Analysis.\n", | ||||
|       "     (Q327.D83) John Wiley & Sons.  ISBN 0-471-22361-1.  See page 218.\n", | ||||
|       "   - Dasarathy, B.V. (1980) \"Nosing Around the Neighborhood: A New System\n", | ||||
|       "     Structure and Classification Rule for Recognition in Partially Exposed\n", | ||||
|       "     Environments\".  IEEE Transactions on Pattern Analysis and Machine\n", | ||||
|       "     Intelligence, Vol. PAMI-2, No. 1, 67-71.\n", | ||||
|       "   - Gates, G.W. (1972) \"The Reduced Nearest Neighbor Rule\".  IEEE Transactions\n", | ||||
|       "     on Information Theory, May 1972, 431-433.\n", | ||||
|       "   - See also: 1988 MLC Proceedings, 54-64.  Cheeseman et al\"s AUTOCLASS II\n", | ||||
|       "     conceptual clustering system finds 3 classes in the data.\n", | ||||
|       "   - Many, many more ...\n", | ||||
|       "\n" | ||||
|      ] | ||||
|     } | ||||
|    ], | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# print descrition of the dataset\n", | ||||
|     "print(iris.DESCR)" | ||||
| @@ -194,19 +108,9 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 11, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "name": "stdout", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']\n" | ||||
|      ] | ||||
|     } | ||||
|    ], | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# names of the features (attributes of the entities)\n", | ||||
|     "print(iris.feature_names)" | ||||
| @@ -214,19 +118,9 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 12, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "name": "stdout", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "['setosa' 'versicolor' 'virginica']\n" | ||||
|      ] | ||||
|     } | ||||
|    ], | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "#names of the targets(classes of the classifier)\n", | ||||
|     "print(iris.target_names)" | ||||
| @@ -234,22 +128,9 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 13, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "data": { | ||||
|       "text/plain": [ | ||||
|        "numpy.ndarray" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 13, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "#type numpy array\n", | ||||
|     "type(iris.data)" | ||||
| @@ -264,168 +145,9 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 14, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "name": "stdout", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "[[ 5.1  3.5  1.4  0.2]\n", | ||||
|       " [ 4.9  3.   1.4  0.2]\n", | ||||
|       " [ 4.7  3.2  1.3  0.2]\n", | ||||
|       " [ 4.6  3.1  1.5  0.2]\n", | ||||
|       " [ 5.   3.6  1.4  0.2]\n", | ||||
|       " [ 5.4  3.9  1.7  0.4]\n", | ||||
|       " [ 4.6  3.4  1.4  0.3]\n", | ||||
|       " [ 5.   3.4  1.5  0.2]\n", | ||||
|       " [ 4.4  2.9  1.4  0.2]\n", | ||||
|       " [ 4.9  3.1  1.5  0.1]\n", | ||||
|       " [ 5.4  3.7  1.5  0.2]\n", | ||||
|       " [ 4.8  3.4  1.6  0.2]\n", | ||||
|       " [ 4.8  3.   1.4  0.1]\n", | ||||
|       " [ 4.3  3.   1.1  0.1]\n", | ||||
|       " [ 5.8  4.   1.2  0.2]\n", | ||||
|       " [ 5.7  4.4  1.5  0.4]\n", | ||||
|       " [ 5.4  3.9  1.3  0.4]\n", | ||||
|       " [ 5.1  3.5  1.4  0.3]\n", | ||||
|       " [ 5.7  3.8  1.7  0.3]\n", | ||||
|       " [ 5.1  3.8  1.5  0.3]\n", | ||||
|       " [ 5.4  3.4  1.7  0.2]\n", | ||||
|       " [ 5.1  3.7  1.5  0.4]\n", | ||||
|       " [ 4.6  3.6  1.   0.2]\n", | ||||
|       " [ 5.1  3.3  1.7  0.5]\n", | ||||
|       " [ 4.8  3.4  1.9  0.2]\n", | ||||
|       " [ 5.   3.   1.6  0.2]\n", | ||||
|       " [ 5.   3.4  1.6  0.4]\n", | ||||
|       " [ 5.2  3.5  1.5  0.2]\n", | ||||
|       " [ 5.2  3.4  1.4  0.2]\n", | ||||
|       " [ 4.7  3.2  1.6  0.2]\n", | ||||
|       " [ 4.8  3.1  1.6  0.2]\n", | ||||
|       " [ 5.4  3.4  1.5  0.4]\n", | ||||
|       " [ 5.2  4.1  1.5  0.1]\n", | ||||
|       " [ 5.5  4.2  1.4  0.2]\n", | ||||
|       " [ 4.9  3.1  1.5  0.1]\n", | ||||
|       " [ 5.   3.2  1.2  0.2]\n", | ||||
|       " [ 5.5  3.5  1.3  0.2]\n", | ||||
|       " [ 4.9  3.1  1.5  0.1]\n", | ||||
|       " [ 4.4  3.   1.3  0.2]\n", | ||||
|       " [ 5.1  3.4  1.5  0.2]\n", | ||||
|       " [ 5.   3.5  1.3  0.3]\n", | ||||
|       " [ 4.5  2.3  1.3  0.3]\n", | ||||
|       " [ 4.4  3.2  1.3  0.2]\n", | ||||
|       " [ 5.   3.5  1.6  0.6]\n", | ||||
|       " [ 5.1  3.8  1.9  0.4]\n", | ||||
|       " [ 4.8  3.   1.4  0.3]\n", | ||||
|       " [ 5.1  3.8  1.6  0.2]\n", | ||||
|       " [ 4.6  3.2  1.4  0.2]\n", | ||||
|       " [ 5.3  3.7  1.5  0.2]\n", | ||||
|       " [ 5.   3.3  1.4  0.2]\n", | ||||
|       " [ 7.   3.2  4.7  1.4]\n", | ||||
|       " [ 6.4  3.2  4.5  1.5]\n", | ||||
|       " [ 6.9  3.1  4.9  1.5]\n", | ||||
|       " [ 5.5  2.3  4.   1.3]\n", | ||||
|       " [ 6.5  2.8  4.6  1.5]\n", | ||||
|       " [ 5.7  2.8  4.5  1.3]\n", | ||||
|       " [ 6.3  3.3  4.7  1.6]\n", | ||||
|       " [ 4.9  2.4  3.3  1. ]\n", | ||||
|       " [ 6.6  2.9  4.6  1.3]\n", | ||||
|       " [ 5.2  2.7  3.9  1.4]\n", | ||||
|       " [ 5.   2.   3.5  1. ]\n", | ||||
|       " [ 5.9  3.   4.2  1.5]\n", | ||||
|       " [ 6.   2.2  4.   1. ]\n", | ||||
|       " [ 6.1  2.9  4.7  1.4]\n", | ||||
|       " [ 5.6  2.9  3.6  1.3]\n", | ||||
|       " [ 6.7  3.1  4.4  1.4]\n", | ||||
|       " [ 5.6  3.   4.5  1.5]\n", | ||||
|       " [ 5.8  2.7  4.1  1. ]\n", | ||||
|       " [ 6.2  2.2  4.5  1.5]\n", | ||||
|       " [ 5.6  2.5  3.9  1.1]\n", | ||||
|       " [ 5.9  3.2  4.8  1.8]\n", | ||||
|       " [ 6.1  2.8  4.   1.3]\n", | ||||
|       " [ 6.3  2.5  4.9  1.5]\n", | ||||
|       " [ 6.1  2.8  4.7  1.2]\n", | ||||
|       " [ 6.4  2.9  4.3  1.3]\n", | ||||
|       " [ 6.6  3.   4.4  1.4]\n", | ||||
|       " [ 6.8  2.8  4.8  1.4]\n", | ||||
|       " [ 6.7  3.   5.   1.7]\n", | ||||
|       " [ 6.   2.9  4.5  1.5]\n", | ||||
|       " [ 5.7  2.6  3.5  1. ]\n", | ||||
|       " [ 5.5  2.4  3.8  1.1]\n", | ||||
|       " [ 5.5  2.4  3.7  1. ]\n", | ||||
|       " [ 5.8  2.7  3.9  1.2]\n", | ||||
|       " [ 6.   2.7  5.1  1.6]\n", | ||||
|       " [ 5.4  3.   4.5  1.5]\n", | ||||
|       " [ 6.   3.4  4.5  1.6]\n", | ||||
|       " [ 6.7  3.1  4.7  1.5]\n", | ||||
|       " [ 6.3  2.3  4.4  1.3]\n", | ||||
|       " [ 5.6  3.   4.1  1.3]\n", | ||||
|       " [ 5.5  2.5  4.   1.3]\n", | ||||
|       " [ 5.5  2.6  4.4  1.2]\n", | ||||
|       " [ 6.1  3.   4.6  1.4]\n", | ||||
|       " [ 5.8  2.6  4.   1.2]\n", | ||||
|       " [ 5.   2.3  3.3  1. ]\n", | ||||
|       " [ 5.6  2.7  4.2  1.3]\n", | ||||
|       " [ 5.7  3.   4.2  1.2]\n", | ||||
|       " [ 5.7  2.9  4.2  1.3]\n", | ||||
|       " [ 6.2  2.9  4.3  1.3]\n", | ||||
|       " [ 5.1  2.5  3.   1.1]\n", | ||||
|       " [ 5.7  2.8  4.1  1.3]\n", | ||||
|       " [ 6.3  3.3  6.   2.5]\n", | ||||
|       " [ 5.8  2.7  5.1  1.9]\n", | ||||
|       " [ 7.1  3.   5.9  2.1]\n", | ||||
|       " [ 6.3  2.9  5.6  1.8]\n", | ||||
|       " [ 6.5  3.   5.8  2.2]\n", | ||||
|       " [ 7.6  3.   6.6  2.1]\n", | ||||
|       " [ 4.9  2.5  4.5  1.7]\n", | ||||
|       " [ 7.3  2.9  6.3  1.8]\n", | ||||
|       " [ 6.7  2.5  5.8  1.8]\n", | ||||
|       " [ 7.2  3.6  6.1  2.5]\n", | ||||
|       " [ 6.5  3.2  5.1  2. ]\n", | ||||
|       " [ 6.4  2.7  5.3  1.9]\n", | ||||
|       " [ 6.8  3.   5.5  2.1]\n", | ||||
|       " [ 5.7  2.5  5.   2. ]\n", | ||||
|       " [ 5.8  2.8  5.1  2.4]\n", | ||||
|       " [ 6.4  3.2  5.3  2.3]\n", | ||||
|       " [ 6.5  3.   5.5  1.8]\n", | ||||
|       " [ 7.7  3.8  6.7  2.2]\n", | ||||
|       " [ 7.7  2.6  6.9  2.3]\n", | ||||
|       " [ 6.   2.2  5.   1.5]\n", | ||||
|       " [ 6.9  3.2  5.7  2.3]\n", | ||||
|       " [ 5.6  2.8  4.9  2. ]\n", | ||||
|       " [ 7.7  2.8  6.7  2. ]\n", | ||||
|       " [ 6.3  2.7  4.9  1.8]\n", | ||||
|       " [ 6.7  3.3  5.7  2.1]\n", | ||||
|       " [ 7.2  3.2  6.   1.8]\n", | ||||
|       " [ 6.2  2.8  4.8  1.8]\n", | ||||
|       " [ 6.1  3.   4.9  1.8]\n", | ||||
|       " [ 6.4  2.8  5.6  2.1]\n", | ||||
|       " [ 7.2  3.   5.8  1.6]\n", | ||||
|       " [ 7.4  2.8  6.1  1.9]\n", | ||||
|       " [ 7.9  3.8  6.4  2. ]\n", | ||||
|       " [ 6.4  2.8  5.6  2.2]\n", | ||||
|       " [ 6.3  2.8  5.1  1.5]\n", | ||||
|       " [ 6.1  2.6  5.6  1.4]\n", | ||||
|       " [ 7.7  3.   6.1  2.3]\n", | ||||
|       " [ 6.3  3.4  5.6  2.4]\n", | ||||
|       " [ 6.4  3.1  5.5  1.8]\n", | ||||
|       " [ 6.   3.   4.8  1.8]\n", | ||||
|       " [ 6.9  3.1  5.4  2.1]\n", | ||||
|       " [ 6.7  3.1  5.6  2.4]\n", | ||||
|       " [ 6.9  3.1  5.1  2.3]\n", | ||||
|       " [ 5.8  2.7  5.1  1.9]\n", | ||||
|       " [ 6.8  3.2  5.9  2.3]\n", | ||||
|       " [ 6.7  3.3  5.7  2.5]\n", | ||||
|       " [ 6.7  3.   5.2  2.3]\n", | ||||
|       " [ 6.3  2.5  5.   1.9]\n", | ||||
|       " [ 6.5  3.   5.2  2. ]\n", | ||||
|       " [ 6.2  3.4  5.4  2.3]\n", | ||||
|       " [ 5.9  3.   5.1  1.8]]\n" | ||||
|      ] | ||||
|     } | ||||
|    ], | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "#Data in the iris dataset. The value of the features of the samples.\n", | ||||
|     "print(iris.data)" | ||||
| @@ -433,23 +155,9 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 15, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "name": "stdout", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", | ||||
|       " 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n", | ||||
|       " 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2\n", | ||||
|       " 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2\n", | ||||
|       " 2 2]\n" | ||||
|      ] | ||||
|     } | ||||
|    ], | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# Target.  Category of every sample\n", | ||||
|     "print(iris.target)" | ||||
| @@ -457,19 +165,9 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 16, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "name": "stdout", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "(150, 4)\n" | ||||
|      ] | ||||
|     } | ||||
|    ], | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# Iris data is a numpy array\n", | ||||
|     "# We can inspect its shape (rows, columns). In our case, (n_samples, n_features)\n", | ||||
| @@ -478,19 +176,9 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 17, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "name": "stdout", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "2\n" | ||||
|      ] | ||||
|     } | ||||
|    ], | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "#Using numpy, I can print the dimensions (here we are working with 2D matriz)\n", | ||||
|     "print(iris.data.ndim)" | ||||
| @@ -498,19 +186,9 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 18, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "name": "stdout", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "150\n" | ||||
|      ] | ||||
|     } | ||||
|    ], | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# I can print n_samples\n", | ||||
|     "print(iris.data.shape[0])" | ||||
| @@ -518,19 +196,9 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 19, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "name": "stdout", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "4\n" | ||||
|      ] | ||||
|     } | ||||
|    ], | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# ... n_features\n", | ||||
|     "print(iris.data.shape[1])" | ||||
| @@ -538,19 +206,9 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 20, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "name": "stdout", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']\n" | ||||
|      ] | ||||
|     } | ||||
|    ], | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# names of the features\n", | ||||
|     "print(iris.feature_names)" | ||||
| @@ -590,7 +248,7 @@ | ||||
|     "\n", | ||||
|     "The notebook is freely licensed under under the [Creative Commons Attribution Share-Alike license](https://creativecommons.org/licenses/by/2.0/).  \n", | ||||
|     "\n", | ||||
|     "© 2016 Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|     "©  Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|    ] | ||||
|   } | ||||
|  ], | ||||
| @@ -610,9 +268,26 @@ | ||||
|    "name": "python", | ||||
|    "nbconvert_exporter": "python", | ||||
|    "pygments_lexer": "ipython3", | ||||
|    "version": "3.5.1+" | ||||
|    "version": "3.5.5" | ||||
|   }, | ||||
|   "latex_envs": { | ||||
|    "LaTeX_envs_menu_present": true, | ||||
|    "autocomplete": true, | ||||
|    "bibliofile": "biblio.bib", | ||||
|    "cite_by": "apalike", | ||||
|    "current_citInitial": 1, | ||||
|    "eqLabelWithNumbers": true, | ||||
|    "eqNumInitial": 1, | ||||
|    "hotkeys": { | ||||
|     "equation": "Ctrl-E", | ||||
|     "itemize": "Ctrl-I" | ||||
|    }, | ||||
|    "labels_anchors": false, | ||||
|    "latex_user_defs": false, | ||||
|    "report_style_numbering": false, | ||||
|    "user_envs_cfg": false | ||||
|   } | ||||
|  }, | ||||
|  "nbformat": 4, | ||||
|  "nbformat_minor": 0 | ||||
|  "nbformat_minor": 1 | ||||
| } | ||||
|   | ||||
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							| @@ -18,7 +18,7 @@ | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, © 2016 Carlos A. Iglesias" | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, © Carlos A. Iglesias" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
| @@ -55,7 +55,7 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 1, | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
| @@ -81,7 +81,7 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 2, | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
| @@ -93,17 +93,9 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 4, | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "name": "stdout", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "(112, 4) (38, 4)\n" | ||||
|      ] | ||||
|     } | ||||
|    ], | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# Dimensions of train and testing\n", | ||||
|     "print(x_train.shape, x_test.shape)" | ||||
| @@ -111,54 +103,9 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 5, | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "name": "stdout", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "[[ 5.7  2.9  4.2  1.3]\n", | ||||
|       " [ 6.7  3.1  4.4  1.4]\n", | ||||
|       " [ 4.7  3.2  1.6  0.2]\n", | ||||
|       " [ 6.5  2.8  4.6  1.5]\n", | ||||
|       " [ 6.1  2.6  5.6  1.4]\n", | ||||
|       " [ 6.3  3.3  6.   2.5]\n", | ||||
|       " [ 4.8  3.4  1.9  0.2]\n", | ||||
|       " [ 5.1  3.5  1.4  0.3]\n", | ||||
|       " [ 6.4  3.1  5.5  1.8]\n", | ||||
|       " [ 6.9  3.2  5.7  2.3]\n", | ||||
|       " [ 6.8  3.2  5.9  2.3]\n", | ||||
|       " [ 4.4  3.   1.3  0.2]\n", | ||||
|       " [ 6.3  3.4  5.6  2.4]\n", | ||||
|       " [ 6.1  2.9  4.7  1.4]\n", | ||||
|       " [ 6.9  3.1  5.1  2.3]\n", | ||||
|       " [ 6.4  2.9  4.3  1.3]\n", | ||||
|       " [ 6.   3.   4.8  1.8]\n", | ||||
|       " [ 5.2  3.5  1.5  0.2]\n", | ||||
|       " [ 6.3  3.3  4.7  1.6]\n", | ||||
|       " [ 7.2  3.2  6.   1.8]\n", | ||||
|       " [ 4.9  3.1  1.5  0.1]\n", | ||||
|       " [ 5.7  3.8  1.7  0.3]\n", | ||||
|       " [ 6.5  3.   5.8  2.2]\n", | ||||
|       " [ 4.8  3.   1.4  0.1]\n", | ||||
|       " [ 6.   2.2  5.   1.5]\n", | ||||
|       " [ 6.2  2.8  4.8  1.8]\n", | ||||
|       " [ 6.1  3.   4.6  1.4]\n", | ||||
|       " [ 6.1  2.8  4.   1.3]\n", | ||||
|       " [ 6.5  3.   5.2  2. ]\n", | ||||
|       " [ 5.9  3.   5.1  1.8]\n", | ||||
|       " [ 5.6  2.7  4.2  1.3]\n", | ||||
|       " [ 6.7  3.1  4.7  1.5]\n", | ||||
|       " [ 5.6  2.8  4.9  2. ]\n", | ||||
|       " [ 6.4  3.2  5.3  2.3]\n", | ||||
|       " [ 6.7  3.1  5.6  2.4]\n", | ||||
|       " [ 6.7  3.   5.2  2.3]\n", | ||||
|       " [ 5.8  2.7  5.1  1.9]\n", | ||||
|       " [ 5.7  3.   4.2  1.2]]\n" | ||||
|      ] | ||||
|     } | ||||
|    ], | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "#Test set\n", | ||||
|     "print (x_test)" | ||||
| @@ -182,7 +129,7 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 10, | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
| @@ -195,54 +142,9 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 11, | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "name": "stdout", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "[[-0.09752318 -0.32858743  0.34599443  0.25682671]\n", | ||||
|       " [ 1.06445511  0.09442168  0.45718919  0.39124069]\n", | ||||
|       " [-1.25950146  0.30592623 -1.09953753 -1.22172707]\n", | ||||
|       " [ 0.83205945 -0.54009199  0.56838396  0.52565467]\n", | ||||
|       " [ 0.36726814 -0.9631011   1.12435779  0.39124069]\n", | ||||
|       " [ 0.59966379  0.51743079  1.34674732  1.86979447]\n", | ||||
|       " [-1.14330363  0.72893534 -0.93274538 -1.22172707]\n", | ||||
|       " [-0.79471015  0.9404399  -1.2107323  -1.08731309]\n", | ||||
|       " [ 0.71586162  0.09442168  1.06876041  0.92889661]\n", | ||||
|       " [ 1.29685076  0.30592623  1.17995517  1.60096651]\n", | ||||
|       " [ 1.18065293  0.30592623  1.29114994  1.60096651]\n", | ||||
|       " [-1.60809495 -0.11708288 -1.26632968 -1.22172707]\n", | ||||
|       " [ 0.59966379  0.72893534  1.12435779  1.73538049]\n", | ||||
|       " [ 0.36726814 -0.32858743  0.62398134  0.39124069]\n", | ||||
|       " [ 1.29685076  0.09442168  0.84637087  1.60096651]\n", | ||||
|       " [ 0.71586162 -0.32858743  0.40159181  0.25682671]\n", | ||||
|       " [ 0.25107031 -0.11708288  0.67957873  0.92889661]\n", | ||||
|       " [-0.67851232  0.9404399  -1.15513491 -1.22172707]\n", | ||||
|       " [ 0.59966379  0.51743079  0.62398134  0.66006865]\n", | ||||
|       " [ 1.64544425  0.30592623  1.34674732  0.92889661]\n", | ||||
|       " [-1.0271058   0.09442168 -1.15513491 -1.35614105]\n", | ||||
|       " [-0.09752318  1.57495356 -1.04394015 -1.08731309]\n", | ||||
|       " [ 0.83205945 -0.11708288  1.23555256  1.46655253]\n", | ||||
|       " [-1.14330363 -0.11708288 -1.2107323  -1.35614105]\n", | ||||
|       " [ 0.25107031 -1.80911932  0.79077349  0.52565467]\n", | ||||
|       " [ 0.48346596 -0.54009199  0.67957873  0.92889661]\n", | ||||
|       " [ 0.36726814 -0.11708288  0.56838396  0.39124069]\n", | ||||
|       " [ 0.36726814 -0.54009199  0.23479966  0.25682671]\n", | ||||
|       " [ 0.83205945 -0.11708288  0.90196826  1.19772457]\n", | ||||
|       " [ 0.13487248 -0.11708288  0.84637087  0.92889661]\n", | ||||
|       " [-0.21372101 -0.75159654  0.34599443  0.25682671]\n", | ||||
|       " [ 1.06445511  0.09442168  0.62398134  0.52565467]\n", | ||||
|       " [-0.21372101 -0.54009199  0.73517611  1.19772457]\n", | ||||
|       " [ 0.71586162  0.30592623  0.95756564  1.60096651]\n", | ||||
|       " [ 1.06445511  0.09442168  1.12435779  1.73538049]\n", | ||||
|       " [ 1.06445511 -0.11708288  0.90196826  1.60096651]\n", | ||||
|       " [ 0.01867465 -0.75159654  0.84637087  1.06331059]\n", | ||||
|       " [-0.09752318 -0.11708288  0.34599443  0.12241273]]\n" | ||||
|      ] | ||||
|     } | ||||
|    ], | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# As we see, the iris dataset is now  normalized\n", | ||||
|     "print(x_test)" | ||||
| @@ -274,7 +176,7 @@ | ||||
|     "### Licences\n", | ||||
|     "The notebook is freely licensed under under the [Creative Commons Attribution Share-Alike license](https://creativecommons.org/licenses/by/2.0/).  \n", | ||||
|     "\n", | ||||
|     "© 2016 Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|     "© Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|    ] | ||||
|   } | ||||
|  ], | ||||
| @@ -294,7 +196,24 @@ | ||||
|    "name": "python", | ||||
|    "nbconvert_exporter": "python", | ||||
|    "pygments_lexer": "ipython3", | ||||
|    "version": "3.6.3" | ||||
|    "version": "3.5.6" | ||||
|   }, | ||||
|   "latex_envs": { | ||||
|    "LaTeX_envs_menu_present": true, | ||||
|    "autocomplete": true, | ||||
|    "bibliofile": "biblio.bib", | ||||
|    "cite_by": "apalike", | ||||
|    "current_citInitial": 1, | ||||
|    "eqLabelWithNumbers": true, | ||||
|    "eqNumInitial": 1, | ||||
|    "hotkeys": { | ||||
|     "equation": "Ctrl-E", | ||||
|     "itemize": "Ctrl-I" | ||||
|    }, | ||||
|    "labels_anchors": false, | ||||
|    "latex_user_defs": false, | ||||
|    "report_style_numbering": false, | ||||
|    "user_envs_cfg": false | ||||
|   } | ||||
|  }, | ||||
|  "nbformat": 4, | ||||
|   | ||||
| @@ -18,7 +18,7 @@ | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, © 2016 Carlos A. Iglesias" | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, © Carlos A. Iglesias" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
| @@ -145,9 +145,7 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "## References" | ||||
|    ] | ||||
| @@ -173,7 +171,7 @@ | ||||
|    "source": [ | ||||
|     "The notebook is freely licensed under under the [Creative Commons Attribution Share-Alike license](https://creativecommons.org/licenses/by/2.0/).  \n", | ||||
|     "\n", | ||||
|     "© 2016 Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|     "© Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|    ] | ||||
|   } | ||||
|  ], | ||||
| @@ -193,9 +191,26 @@ | ||||
|    "name": "python", | ||||
|    "nbconvert_exporter": "python", | ||||
|    "pygments_lexer": "ipython3", | ||||
|    "version": "3.5.1+" | ||||
|    "version": "3.5.6" | ||||
|   }, | ||||
|   "latex_envs": { | ||||
|    "LaTeX_envs_menu_present": true, | ||||
|    "autocomplete": true, | ||||
|    "bibliofile": "biblio.bib", | ||||
|    "cite_by": "apalike", | ||||
|    "current_citInitial": 1, | ||||
|    "eqLabelWithNumbers": true, | ||||
|    "eqNumInitial": 1, | ||||
|    "hotkeys": { | ||||
|     "equation": "Ctrl-E", | ||||
|     "itemize": "Ctrl-I" | ||||
|    }, | ||||
|    "labels_anchors": false, | ||||
|    "latex_user_defs": false, | ||||
|    "report_style_numbering": false, | ||||
|    "user_envs_cfg": false | ||||
|   } | ||||
|  }, | ||||
|  "nbformat": 4, | ||||
|  "nbformat_minor": 0 | ||||
|  "nbformat_minor": 1 | ||||
| } | ||||
|   | ||||
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							| @@ -18,7 +18,7 @@ | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, © 2016 Carlos A. Iglesias" | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, ©  Carlos A. Iglesias" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
| @@ -70,7 +70,7 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 1, | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
| @@ -101,9 +101,7 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": { | ||||
|     "collapsed": true | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "## Train classifier" | ||||
|    ] | ||||
| @@ -117,17 +115,9 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 2, | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "name": "stdout", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "Mean score: 0.940 (+/- 0.021)\n" | ||||
|      ] | ||||
|     } | ||||
|    ], | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "from sklearn.model_selection import cross_val_score, KFold\n", | ||||
|     "from sklearn.pipeline import Pipeline\n", | ||||
| @@ -179,51 +169,18 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 3, | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "data": { | ||||
|       "text/plain": [ | ||||
|        "{'ds': DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,\n", | ||||
|        "             max_features=None, max_leaf_nodes=None,\n", | ||||
|        "             min_impurity_split=1e-07, min_samples_leaf=1,\n", | ||||
|        "             min_samples_split=2, min_weight_fraction_leaf=0.0,\n", | ||||
|        "             presort=False, random_state=None, splitter='best'),\n", | ||||
|        " 'scaler': StandardScaler(copy=True, with_mean=True, with_std=True)}" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 3, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "model.named_steps" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 4, | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "data": { | ||||
|       "text/plain": [ | ||||
|        "[('scaler', StandardScaler(copy=True, with_mean=True, with_std=True)),\n", | ||||
|        " ('ds',\n", | ||||
|        "  DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,\n", | ||||
|        "              max_features=None, max_leaf_nodes=None,\n", | ||||
|        "              min_impurity_split=1e-07, min_samples_leaf=1,\n", | ||||
|        "              min_samples_split=2, min_weight_fraction_leaf=0.0,\n", | ||||
|        "              presort=False, random_state=None, splitter='best'))]" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 4, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "model.steps" | ||||
|    ] | ||||
| @@ -237,20 +194,9 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 5, | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "data": { | ||||
|       "text/plain": [ | ||||
|        "dict_keys(['steps', 'scaler', 'ds', 'scaler__copy', 'scaler__with_mean', 'scaler__with_std', 'ds__class_weight', 'ds__criterion', 'ds__max_depth', 'ds__max_features', 'ds__max_leaf_nodes', 'ds__min_impurity_split', 'ds__min_samples_leaf', 'ds__min_samples_split', 'ds__min_weight_fraction_leaf', 'ds__presort', 'ds__random_state', 'ds__splitter'])" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 5, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "model.get_params().keys()" | ||||
|    ] | ||||
| @@ -264,24 +210,9 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 6, | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "data": { | ||||
|       "text/plain": [ | ||||
|        "Pipeline(steps=[('scaler', StandardScaler(copy=True, with_mean=True, with_std=True)), ('ds', DecisionTreeClassifier(class_weight='balanced', criterion='gini',\n", | ||||
|        "            max_depth=None, max_features=None, max_leaf_nodes=None,\n", | ||||
|        "            min_impurity_split=1e-07, min_samples_leaf=1,\n", | ||||
|        "            min_samples_split=2, min_weight_fraction_leaf=0.0,\n", | ||||
|        "            presort=False, random_state=None, splitter='best'))])" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 6, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "model.set_params(ds__class_weight='balanced')" | ||||
|    ] | ||||
| @@ -295,24 +226,9 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 7, | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "data": { | ||||
|       "text/plain": [ | ||||
|        "Pipeline(steps=[('scaler', StandardScaler(copy=True, with_mean=True, with_std=True)), ('ds', DecisionTreeClassifier(class_weight='balanced', criterion='gini',\n", | ||||
|        "            max_depth=None, max_features=None, max_leaf_nodes=None,\n", | ||||
|        "            min_impurity_split=1e-07, min_samples_leaf=1,\n", | ||||
|        "            min_samples_split=2, min_weight_fraction_leaf=0.0,\n", | ||||
|        "            presort=False, random_state=None, splitter='best'))])" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 7, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "model = Pipeline([\n", | ||||
|     "        ('scaler', StandardScaler()),\n", | ||||
| @@ -330,17 +246,9 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 8, | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "name": "stdout", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "[ 0.01834862  0.01910853  0.05728223  0.90526062]\n" | ||||
|      ] | ||||
|     } | ||||
|    ], | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# Fit the model\n", | ||||
|     "model.fit(x_train, y_train) \n", | ||||
| @@ -351,17 +259,9 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 9, | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "name": "stdout", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "[ 0.01834862  0.01910853  0.05728223  0.90526062]\n" | ||||
|      ] | ||||
|     } | ||||
|    ], | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "#Using steps, we take the last step (-1) or the second step (1)\n", | ||||
|     "#name, my_desision_tree = model.steps[1]\n", | ||||
| @@ -389,47 +289,9 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 10, | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "data": { | ||||
|       "text/plain": [ | ||||
|        "{'ds': DecisionTreeClassifier(class_weight='balanced', criterion='gini',\n", | ||||
|        "             max_depth=None, max_features=None, max_leaf_nodes=None,\n", | ||||
|        "             min_impurity_split=1e-07, min_samples_leaf=1,\n", | ||||
|        "             min_samples_split=2, min_weight_fraction_leaf=0.0,\n", | ||||
|        "             presort=False, random_state=None, splitter='best'),\n", | ||||
|        " 'ds__class_weight': 'balanced',\n", | ||||
|        " 'ds__criterion': 'gini',\n", | ||||
|        " 'ds__max_depth': None,\n", | ||||
|        " 'ds__max_features': None,\n", | ||||
|        " 'ds__max_leaf_nodes': None,\n", | ||||
|        " 'ds__min_impurity_split': 1e-07,\n", | ||||
|        " 'ds__min_samples_leaf': 1,\n", | ||||
|        " 'ds__min_samples_split': 2,\n", | ||||
|        " 'ds__min_weight_fraction_leaf': 0.0,\n", | ||||
|        " 'ds__presort': False,\n", | ||||
|        " 'ds__random_state': None,\n", | ||||
|        " 'ds__splitter': 'best',\n", | ||||
|        " 'scaler': StandardScaler(copy=True, with_mean=True, with_std=True),\n", | ||||
|        " 'scaler__copy': True,\n", | ||||
|        " 'scaler__with_mean': True,\n", | ||||
|        " 'scaler__with_std': True,\n", | ||||
|        " 'steps': [('scaler',\n", | ||||
|        "   StandardScaler(copy=True, with_mean=True, with_std=True)),\n", | ||||
|        "  ('ds', DecisionTreeClassifier(class_weight='balanced', criterion='gini',\n", | ||||
|        "               max_depth=None, max_features=None, max_leaf_nodes=None,\n", | ||||
|        "               min_impurity_split=1e-07, min_samples_leaf=1,\n", | ||||
|        "               min_samples_split=2, min_weight_fraction_leaf=0.0,\n", | ||||
|        "               presort=False, random_state=None, splitter='best'))]}" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 10, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "model.get_params()" | ||||
|    ] | ||||
| @@ -466,18 +328,9 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 11, | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "name": "stdout", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "Best score:  0.946428571429\n", | ||||
|       "Best params:  {'max_depth': 3}\n" | ||||
|      ] | ||||
|     } | ||||
|    ], | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "from sklearn.model_selection import GridSearchCV\n", | ||||
|     "from sklearn.tree import DecisionTreeClassifier\n", | ||||
| @@ -496,32 +349,16 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": { | ||||
|     "collapsed": true | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "Now we are going to show the results of grid search" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 12, | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "name": "stdout", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "0.946 (+/-0.075) for {'max_depth': 3}\n", | ||||
|       "0.929 (+/-0.024) for {'max_depth': 4}\n", | ||||
|       "0.946 (+/-0.075) for {'max_depth': 5}\n", | ||||
|       "0.929 (+/-0.024) for {'max_depth': 6}\n", | ||||
|       "0.946 (+/-0.075) for {'max_depth': 7}\n", | ||||
|       "0.946 (+/-0.075) for {'max_depth': 8}\n", | ||||
|       "0.929 (+/-0.024) for {'max_depth': 9}\n" | ||||
|      ] | ||||
|     } | ||||
|    ], | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# We print the score for each value of max_depth\n", | ||||
|     "for i, max_depth in enumerate(gs.cv_results_['params']):\n", | ||||
| @@ -539,17 +376,9 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 13, | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "name": "stdout", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "Mean score: 0.953 (+/- 0.020)\n" | ||||
|      ] | ||||
|     } | ||||
|    ], | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# create a composite estimator made by a pipeline of preprocessing and the KNN model\n", | ||||
|     "model = Pipeline([\n", | ||||
| @@ -581,550 +410,9 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 14, | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "name": "stdout", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "# Tuning hyper-parameters for precision\n", | ||||
|       "\n" | ||||
|      ] | ||||
|     }, | ||||
|     { | ||||
|      "name": "stderr", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "/opt/conda/lib/python3.6/site-packages/sklearn/metrics/classification.py:1113: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples.\n", | ||||
|       "  'precision', 'predicted', average, warn_for)\n", | ||||
|       "/opt/conda/lib/python3.6/site-packages/sklearn/metrics/classification.py:1113: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples.\n", | ||||
|       "  'precision', 'predicted', average, warn_for)\n", | ||||
|       "/opt/conda/lib/python3.6/site-packages/sklearn/metrics/classification.py:1113: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples.\n", | ||||
|       "  'precision', 'predicted', average, warn_for)\n" | ||||
|      ] | ||||
|     }, | ||||
|     { | ||||
|      "name": "stdout", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "Best parameters set found on development set:\n", | ||||
|       "\n", | ||||
|       "{'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 7, 'max_leaf_nodes': 10, 'splitter': 'random'}\n", | ||||
|       "\n", | ||||
|       "Grid scores on development set:\n", | ||||
|       "\n", | ||||
|       "0.964 (+/-0.092) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 3, 'max_leaf_nodes': None, 'splitter': 'best'}\n", | ||||
|       "0.943 (+/-0.084) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 3, 'max_leaf_nodes': None, 'splitter': 'random'}\n", | ||||
|       "0.946 (+/-0.123) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 3, 'max_leaf_nodes': 5, 'splitter': 'best'}\n", | ||||
|       "0.936 (+/-0.122) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 3, 'max_leaf_nodes': 5, 'splitter': 'random'}\n", | ||||
|       "0.953 (+/-0.126) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 3, 'max_leaf_nodes': 10, 'splitter': 'best'}\n", | ||||
|       "0.973 (+/-0.068) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 3, 'max_leaf_nodes': 10, 'splitter': 'random'}\n", | ||||
|       "0.950 (+/-0.118) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 3, 'max_leaf_nodes': 20, 'splitter': 'best'}\n", | ||||
|       "0.968 (+/-0.132) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 3, 'max_leaf_nodes': 20, 'splitter': 'random'}\n", | ||||
|       "0.946 (+/-0.123) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 4, 'max_leaf_nodes': None, 'splitter': 'best'}\n", | ||||
|       "0.943 (+/-0.081) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 4, 'max_leaf_nodes': None, 'splitter': 'random'}\n", | ||||
|       "0.946 (+/-0.123) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 4, 'max_leaf_nodes': 5, 'splitter': 'best'}\n", | ||||
|       "0.919 (+/-0.251) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 4, 'max_leaf_nodes': 5, 'splitter': 'random'}\n", | ||||
|       "0.957 (+/-0.120) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 4, 'max_leaf_nodes': 10, 'splitter': 'best'}\n", | ||||
|       "0.975 (+/-0.079) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 4, 'max_leaf_nodes': 10, 'splitter': 'random'}\n", | ||||
|       "0.957 (+/-0.120) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 4, 'max_leaf_nodes': 20, 'splitter': 'best'}\n", | ||||
|       "0.957 (+/-0.120) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 4, 'max_leaf_nodes': 20, 'splitter': 'random'}\n", | ||||
|       "0.957 (+/-0.120) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 5, 'max_leaf_nodes': None, 'splitter': 'best'}\n", | ||||
|       "0.953 (+/-0.126) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 5, 'max_leaf_nodes': None, 'splitter': 'random'}\n", | ||||
|       "0.946 (+/-0.123) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 5, 'max_leaf_nodes': 5, 'splitter': 'best'}\n", | ||||
|       "0.951 (+/-0.118) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 5, 'max_leaf_nodes': 5, 'splitter': 'random'}\n", | ||||
|       "0.953 (+/-0.126) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 5, 'max_leaf_nodes': 10, 'splitter': 'best'}\n", | ||||
|       "0.943 (+/-0.113) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 5, 'max_leaf_nodes': 10, 'splitter': 'random'}\n", | ||||
|       "0.953 (+/-0.126) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 5, 'max_leaf_nodes': 20, 'splitter': 'best'}\n", | ||||
|       "0.948 (+/-0.108) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 5, 'max_leaf_nodes': 20, 'splitter': 'random'}\n", | ||||
|       "0.953 (+/-0.126) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 6, 'max_leaf_nodes': None, 'splitter': 'best'}\n", | ||||
|       "0.961 (+/-0.081) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 6, 'max_leaf_nodes': None, 'splitter': 'random'}\n", | ||||
|       "0.953 (+/-0.126) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 6, 'max_leaf_nodes': 5, 'splitter': 'best'}\n", | ||||
|       "0.928 (+/-0.165) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 6, 'max_leaf_nodes': 5, 'splitter': 'random'}\n", | ||||
|       "0.953 (+/-0.126) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 6, 'max_leaf_nodes': 10, 'splitter': 'best'}\n", | ||||
|       "0.949 (+/-0.118) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 6, 'max_leaf_nodes': 10, 'splitter': 'random'}\n", | ||||
|       "0.957 (+/-0.120) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 6, 'max_leaf_nodes': 20, 'splitter': 'best'}\n", | ||||
|       "0.953 (+/-0.134) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 6, 'max_leaf_nodes': 20, 'splitter': 'random'}\n", | ||||
|       "0.957 (+/-0.120) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 7, 'max_leaf_nodes': None, 'splitter': 'best'}\n", | ||||
|       "0.946 (+/-0.123) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 7, 'max_leaf_nodes': None, 'splitter': 'random'}\n", | ||||
|       "0.946 (+/-0.123) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 7, 'max_leaf_nodes': 5, 'splitter': 'best'}\n", | ||||
|       "0.942 (+/-0.067) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 7, 'max_leaf_nodes': 5, 'splitter': 'random'}\n", | ||||
|       "0.957 (+/-0.120) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 7, 'max_leaf_nodes': 10, 'splitter': 'best'}\n", | ||||
|       "0.980 (+/-0.062) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 7, 'max_leaf_nodes': 10, 'splitter': 'random'}\n", | ||||
|       "0.957 (+/-0.120) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 7, 'max_leaf_nodes': 20, 'splitter': 'best'}\n", | ||||
|       "0.945 (+/-0.141) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 7, 'max_leaf_nodes': 20, 'splitter': 'random'}\n", | ||||
|       "0.957 (+/-0.120) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 8, 'max_leaf_nodes': None, 'splitter': 'best'}\n", | ||||
|       "0.949 (+/-0.095) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 8, 'max_leaf_nodes': None, 'splitter': 'random'}\n", | ||||
|       "0.953 (+/-0.126) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 8, 'max_leaf_nodes': 5, 'splitter': 'best'}\n", | ||||
|       "0.946 (+/-0.123) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 8, 'max_leaf_nodes': 5, 'splitter': 'random'}\n", | ||||
|       "0.953 (+/-0.126) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 8, 'max_leaf_nodes': 10, 'splitter': 'best'}\n", | ||||
|       "0.961 (+/-0.114) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 8, 'max_leaf_nodes': 10, 'splitter': 'random'}\n", | ||||
|       "0.957 (+/-0.120) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 8, 'max_leaf_nodes': 20, 'splitter': 'best'}\n", | ||||
|       "0.972 (+/-0.069) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 8, 'max_leaf_nodes': 20, 'splitter': 'random'}\n", | ||||
|       "0.953 (+/-0.126) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 9, 'max_leaf_nodes': None, 'splitter': 'best'}\n", | ||||
|       "0.950 (+/-0.118) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 9, 'max_leaf_nodes': None, 'splitter': 'random'}\n", | ||||
|       "0.946 (+/-0.123) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 9, 'max_leaf_nodes': 5, 'splitter': 'best'}\n", | ||||
|       "0.946 (+/-0.125) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 9, 'max_leaf_nodes': 5, 'splitter': 'random'}\n", | ||||
|       "0.957 (+/-0.120) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 9, 'max_leaf_nodes': 10, 'splitter': 'best'}\n", | ||||
|       "0.938 (+/-0.142) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 9, 'max_leaf_nodes': 10, 'splitter': 'random'}\n", | ||||
|       "0.957 (+/-0.120) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 9, 'max_leaf_nodes': 20, 'splitter': 'best'}\n", | ||||
|       "0.956 (+/-0.121) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 9, 'max_leaf_nodes': 20, 'splitter': 'random'}\n", | ||||
|       "0.968 (+/-0.082) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 3, 'max_leaf_nodes': None, 'splitter': 'best'}\n", | ||||
|       "0.956 (+/-0.097) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 3, 'max_leaf_nodes': None, 'splitter': 'random'}\n", | ||||
|       "0.957 (+/-0.120) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 3, 'max_leaf_nodes': 5, 'splitter': 'best'}\n", | ||||
|       "0.906 (+/-0.296) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 3, 'max_leaf_nodes': 5, 'splitter': 'random'}\n", | ||||
|       "0.957 (+/-0.120) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 3, 'max_leaf_nodes': 10, 'splitter': 'best'}\n", | ||||
|       "0.932 (+/-0.110) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 3, 'max_leaf_nodes': 10, 'splitter': 'random'}\n", | ||||
|       "0.957 (+/-0.120) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 3, 'max_leaf_nodes': 20, 'splitter': 'best'}\n", | ||||
|       "0.955 (+/-0.121) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 3, 'max_leaf_nodes': 20, 'splitter': 'random'}\n", | ||||
|       "0.957 (+/-0.120) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 4, 'max_leaf_nodes': None, 'splitter': 'best'}\n", | ||||
|       "0.921 (+/-0.132) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 4, 'max_leaf_nodes': None, 'splitter': 'random'}\n", | ||||
|       "0.957 (+/-0.120) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 4, 'max_leaf_nodes': 5, 'splitter': 'best'}\n", | ||||
|       "0.942 (+/-0.132) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 4, 'max_leaf_nodes': 5, 'splitter': 'random'}\n", | ||||
|       "0.957 (+/-0.120) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 4, 'max_leaf_nodes': 10, 'splitter': 'best'}\n", | ||||
|       "0.948 (+/-0.108) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 4, 'max_leaf_nodes': 10, 'splitter': 'random'}\n", | ||||
|       "0.957 (+/-0.120) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 4, 'max_leaf_nodes': 20, 'splitter': 'best'}\n", | ||||
|       "0.945 (+/-0.123) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 4, 'max_leaf_nodes': 20, 'splitter': 'random'}\n", | ||||
|       "0.957 (+/-0.120) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 5, 'max_leaf_nodes': None, 'splitter': 'best'}\n", | ||||
|       "0.897 (+/-0.187) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 5, 'max_leaf_nodes': None, 'splitter': 'random'}\n", | ||||
|       "0.957 (+/-0.120) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 5, 'max_leaf_nodes': 5, 'splitter': 'best'}\n", | ||||
|       "0.944 (+/-0.148) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 5, 'max_leaf_nodes': 5, 'splitter': 'random'}\n", | ||||
|       "0.957 (+/-0.120) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 5, 'max_leaf_nodes': 10, 'splitter': 'best'}\n", | ||||
|       "0.948 (+/-0.107) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 5, 'max_leaf_nodes': 10, 'splitter': 'random'}\n", | ||||
|       "0.957 (+/-0.120) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 5, 'max_leaf_nodes': 20, 'splitter': 'best'}\n", | ||||
|       "0.950 (+/-0.118) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 5, 'max_leaf_nodes': 20, 'splitter': 'random'}\n", | ||||
|       "0.957 (+/-0.120) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 6, 'max_leaf_nodes': None, 'splitter': 'best'}\n", | ||||
|       "0.961 (+/-0.081) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 6, 'max_leaf_nodes': None, 'splitter': 'random'}\n", | ||||
|       "0.957 (+/-0.120) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 6, 'max_leaf_nodes': 5, 'splitter': 'best'}\n", | ||||
|       "0.939 (+/-0.117) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 6, 'max_leaf_nodes': 5, 'splitter': 'random'}\n", | ||||
|       "0.957 (+/-0.120) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 6, 'max_leaf_nodes': 10, 'splitter': 'best'}\n", | ||||
|       "0.949 (+/-0.090) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 6, 'max_leaf_nodes': 10, 'splitter': 'random'}\n", | ||||
|       "0.957 (+/-0.120) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 6, 'max_leaf_nodes': 20, 'splitter': 'best'}\n", | ||||
|       "0.972 (+/-0.068) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 6, 'max_leaf_nodes': 20, 'splitter': 'random'}\n", | ||||
|       "0.957 (+/-0.120) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 7, 'max_leaf_nodes': None, 'splitter': 'best'}\n", | ||||
|       "0.950 (+/-0.118) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 7, 'max_leaf_nodes': None, 'splitter': 'random'}\n", | ||||
|       "0.957 (+/-0.120) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 7, 'max_leaf_nodes': 5, 'splitter': 'best'}\n", | ||||
|       "0.906 (+/-0.162) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 7, 'max_leaf_nodes': 5, 'splitter': 'random'}\n", | ||||
|       "0.957 (+/-0.120) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 7, 'max_leaf_nodes': 10, 'splitter': 'best'}\n", | ||||
|       "0.947 (+/-0.146) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 7, 'max_leaf_nodes': 10, 'splitter': 'random'}\n", | ||||
|       "0.957 (+/-0.120) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 7, 'max_leaf_nodes': 20, 'splitter': 'best'}\n", | ||||
|       "0.950 (+/-0.118) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 7, 'max_leaf_nodes': 20, 'splitter': 'random'}\n", | ||||
|       "0.957 (+/-0.120) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 8, 'max_leaf_nodes': None, 'splitter': 'best'}\n", | ||||
|       "0.946 (+/-0.123) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 8, 'max_leaf_nodes': None, 'splitter': 'random'}\n", | ||||
|       "0.957 (+/-0.120) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 8, 'max_leaf_nodes': 5, 'splitter': 'best'}\n", | ||||
|       "0.953 (+/-0.134) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 8, 'max_leaf_nodes': 5, 'splitter': 'random'}\n", | ||||
|       "0.957 (+/-0.120) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 8, 'max_leaf_nodes': 10, 'splitter': 'best'}\n", | ||||
|       "0.975 (+/-0.079) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 8, 'max_leaf_nodes': 10, 'splitter': 'random'}\n", | ||||
|       "0.957 (+/-0.120) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 8, 'max_leaf_nodes': 20, 'splitter': 'best'}\n", | ||||
|       "0.932 (+/-0.136) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 8, 'max_leaf_nodes': 20, 'splitter': 'random'}\n", | ||||
|       "0.957 (+/-0.120) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 9, 'max_leaf_nodes': None, 'splitter': 'best'}\n", | ||||
|       "0.940 (+/-0.146) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 9, 'max_leaf_nodes': None, 'splitter': 'random'}\n", | ||||
|       "0.957 (+/-0.120) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 9, 'max_leaf_nodes': 5, 'splitter': 'best'}\n", | ||||
|       "0.953 (+/-0.082) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 9, 'max_leaf_nodes': 5, 'splitter': 'random'}\n", | ||||
|       "0.957 (+/-0.120) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 9, 'max_leaf_nodes': 10, 'splitter': 'best'}\n", | ||||
|       "0.979 (+/-0.064) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 9, 'max_leaf_nodes': 10, 'splitter': 'random'}\n", | ||||
|       "0.957 (+/-0.120) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 9, 'max_leaf_nodes': 20, 'splitter': 'best'}\n", | ||||
|       "0.952 (+/-0.108) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 9, 'max_leaf_nodes': 20, 'splitter': 'random'}\n", | ||||
|       "0.968 (+/-0.082) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 3, 'max_leaf_nodes': None, 'splitter': 'best'}\n", | ||||
|       "0.919 (+/-0.106) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 3, 'max_leaf_nodes': None, 'splitter': 'random'}\n", | ||||
|       "0.950 (+/-0.118) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 3, 'max_leaf_nodes': 5, 'splitter': 'best'}\n", | ||||
|       "0.941 (+/-0.129) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 3, 'max_leaf_nodes': 5, 'splitter': 'random'}\n", | ||||
|       "0.943 (+/-0.113) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 3, 'max_leaf_nodes': 10, 'splitter': 'best'}\n", | ||||
|       "0.956 (+/-0.094) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 3, 'max_leaf_nodes': 10, 'splitter': 'random'}\n", | ||||
|       "0.950 (+/-0.118) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 3, 'max_leaf_nodes': 20, 'splitter': 'best'}\n", | ||||
|       "0.954 (+/-0.154) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 3, 'max_leaf_nodes': 20, 'splitter': 'random'}\n", | ||||
|       "0.950 (+/-0.118) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 4, 'max_leaf_nodes': None, 'splitter': 'best'}\n", | ||||
|       "0.949 (+/-0.158) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 4, 'max_leaf_nodes': None, 'splitter': 'random'}\n", | ||||
|       "0.943 (+/-0.113) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 4, 'max_leaf_nodes': 5, 'splitter': 'best'}\n", | ||||
|       "0.893 (+/-0.163) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 4, 'max_leaf_nodes': 5, 'splitter': 'random'}\n", | ||||
|       "0.950 (+/-0.118) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 4, 'max_leaf_nodes': 10, 'splitter': 'best'}\n", | ||||
|       "0.916 (+/-0.186) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 4, 'max_leaf_nodes': 10, 'splitter': 'random'}\n", | ||||
|       "0.943 (+/-0.113) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 4, 'max_leaf_nodes': 20, 'splitter': 'best'}\n", | ||||
|       "0.961 (+/-0.081) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 4, 'max_leaf_nodes': 20, 'splitter': 'random'}\n", | ||||
|       "0.950 (+/-0.118) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 5, 'max_leaf_nodes': None, 'splitter': 'best'}\n", | ||||
|       "0.947 (+/-0.108) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 5, 'max_leaf_nodes': None, 'splitter': 'random'}\n", | ||||
|       "0.950 (+/-0.118) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 5, 'max_leaf_nodes': 5, 'splitter': 'best'}\n", | ||||
|       "0.912 (+/-0.120) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 5, 'max_leaf_nodes': 5, 'splitter': 'random'}\n", | ||||
|       "0.950 (+/-0.118) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 5, 'max_leaf_nodes': 10, 'splitter': 'best'}\n", | ||||
|       "0.960 (+/-0.082) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 5, 'max_leaf_nodes': 10, 'splitter': 'random'}\n", | ||||
|       "0.943 (+/-0.113) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 5, 'max_leaf_nodes': 20, 'splitter': 'best'}\n", | ||||
|       "0.962 (+/-0.113) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 5, 'max_leaf_nodes': 20, 'splitter': 'random'}\n", | ||||
|       "0.950 (+/-0.118) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 6, 'max_leaf_nodes': None, 'splitter': 'best'}\n", | ||||
|       "0.966 (+/-0.070) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 6, 'max_leaf_nodes': None, 'splitter': 'random'}\n", | ||||
|       "0.950 (+/-0.118) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 6, 'max_leaf_nodes': 5, 'splitter': 'best'}\n", | ||||
|       "0.950 (+/-0.118) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 6, 'max_leaf_nodes': 5, 'splitter': 'random'}\n", | ||||
|       "0.950 (+/-0.118) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 6, 'max_leaf_nodes': 10, 'splitter': 'best'}\n", | ||||
|       "0.962 (+/-0.113) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 6, 'max_leaf_nodes': 10, 'splitter': 'random'}\n", | ||||
|       "0.943 (+/-0.113) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 6, 'max_leaf_nodes': 20, 'splitter': 'best'}\n", | ||||
|       "0.949 (+/-0.118) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 6, 'max_leaf_nodes': 20, 'splitter': 'random'}\n", | ||||
|       "0.950 (+/-0.118) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 7, 'max_leaf_nodes': None, 'splitter': 'best'}\n", | ||||
|       "0.954 (+/-0.112) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 7, 'max_leaf_nodes': None, 'splitter': 'random'}\n", | ||||
|       "0.943 (+/-0.113) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 7, 'max_leaf_nodes': 5, 'splitter': 'best'}\n", | ||||
|       "0.955 (+/-0.097) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 7, 'max_leaf_nodes': 5, 'splitter': 'random'}\n", | ||||
|       "0.950 (+/-0.118) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 7, 'max_leaf_nodes': 10, 'splitter': 'best'}\n", | ||||
|       "0.974 (+/-0.081) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 7, 'max_leaf_nodes': 10, 'splitter': 'random'}\n", | ||||
|       "0.950 (+/-0.118) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 7, 'max_leaf_nodes': 20, 'splitter': 'best'}\n", | ||||
|       "0.947 (+/-0.175) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 7, 'max_leaf_nodes': 20, 'splitter': 'random'}\n", | ||||
|       "0.950 (+/-0.118) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 8, 'max_leaf_nodes': None, 'splitter': 'best'}\n", | ||||
|       "0.950 (+/-0.117) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 8, 'max_leaf_nodes': None, 'splitter': 'random'}\n", | ||||
|       "0.943 (+/-0.113) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 8, 'max_leaf_nodes': 5, 'splitter': 'best'}\n", | ||||
|       "0.935 (+/-0.075) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 8, 'max_leaf_nodes': 5, 'splitter': 'random'}\n", | ||||
|       "0.950 (+/-0.118) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 8, 'max_leaf_nodes': 10, 'splitter': 'best'}\n", | ||||
|       "0.954 (+/-0.129) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 8, 'max_leaf_nodes': 10, 'splitter': 'random'}\n", | ||||
|       "0.950 (+/-0.118) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 8, 'max_leaf_nodes': 20, 'splitter': 'best'}\n", | ||||
|       "0.940 (+/-0.142) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 8, 'max_leaf_nodes': 20, 'splitter': 'random'}\n", | ||||
|       "0.950 (+/-0.118) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 9, 'max_leaf_nodes': None, 'splitter': 'best'}\n", | ||||
|       "0.934 (+/-0.155) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 9, 'max_leaf_nodes': None, 'splitter': 'random'}\n", | ||||
|       "0.950 (+/-0.118) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 9, 'max_leaf_nodes': 5, 'splitter': 'best'}\n", | ||||
|       "0.927 (+/-0.112) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 9, 'max_leaf_nodes': 5, 'splitter': 'random'}\n", | ||||
|       "0.943 (+/-0.113) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 9, 'max_leaf_nodes': 10, 'splitter': 'best'}\n", | ||||
|       "0.934 (+/-0.184) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 9, 'max_leaf_nodes': 10, 'splitter': 'random'}\n", | ||||
|       "0.950 (+/-0.118) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 9, 'max_leaf_nodes': 20, 'splitter': 'best'}\n", | ||||
|       "0.932 (+/-0.136) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 9, 'max_leaf_nodes': 20, 'splitter': 'random'}\n", | ||||
|       "0.968 (+/-0.082) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 3, 'max_leaf_nodes': None, 'splitter': 'best'}\n", | ||||
|       "0.903 (+/-0.240) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 3, 'max_leaf_nodes': None, 'splitter': 'random'}\n", | ||||
|       "0.957 (+/-0.120) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 3, 'max_leaf_nodes': 5, 'splitter': 'best'}\n", | ||||
|       "0.939 (+/-0.179) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 3, 'max_leaf_nodes': 5, 'splitter': 'random'}\n", | ||||
|       "0.950 (+/-0.118) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 3, 'max_leaf_nodes': 10, 'splitter': 'best'}\n", | ||||
|       "0.975 (+/-0.079) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 3, 'max_leaf_nodes': 10, 'splitter': 'random'}\n", | ||||
|       "0.950 (+/-0.118) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 3, 'max_leaf_nodes': 20, 'splitter': 'best'}\n", | ||||
|       "0.923 (+/-0.094) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 3, 'max_leaf_nodes': 20, 'splitter': 'random'}\n", | ||||
|       "0.950 (+/-0.118) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 4, 'max_leaf_nodes': None, 'splitter': 'best'}\n", | ||||
|       "0.967 (+/-0.083) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 4, 'max_leaf_nodes': None, 'splitter': 'random'}\n", | ||||
|       "0.957 (+/-0.120) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 4, 'max_leaf_nodes': 5, 'splitter': 'best'}\n", | ||||
|       "0.944 (+/-0.115) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 4, 'max_leaf_nodes': 5, 'splitter': 'random'}\n", | ||||
|       "0.950 (+/-0.118) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 4, 'max_leaf_nodes': 10, 'splitter': 'best'}\n", | ||||
|       "0.938 (+/-0.177) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 4, 'max_leaf_nodes': 10, 'splitter': 'random'}\n", | ||||
|       "0.943 (+/-0.113) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 4, 'max_leaf_nodes': 20, 'splitter': 'best'}\n", | ||||
|       "0.964 (+/-0.092) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 4, 'max_leaf_nodes': 20, 'splitter': 'random'}\n", | ||||
|       "0.950 (+/-0.118) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 5, 'max_leaf_nodes': None, 'splitter': 'best'}\n", | ||||
|       "0.950 (+/-0.117) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 5, 'max_leaf_nodes': None, 'splitter': 'random'}\n", | ||||
|       "0.957 (+/-0.120) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 5, 'max_leaf_nodes': 5, 'splitter': 'best'}\n", | ||||
|       "0.895 (+/-0.229) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 5, 'max_leaf_nodes': 5, 'splitter': 'random'}\n", | ||||
|       "0.950 (+/-0.118) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 5, 'max_leaf_nodes': 10, 'splitter': 'best'}\n", | ||||
|       "0.944 (+/-0.138) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 5, 'max_leaf_nodes': 10, 'splitter': 'random'}\n", | ||||
|       "0.950 (+/-0.118) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 5, 'max_leaf_nodes': 20, 'splitter': 'best'}\n", | ||||
|       "0.930 (+/-0.199) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 5, 'max_leaf_nodes': 20, 'splitter': 'random'}\n", | ||||
|       "0.950 (+/-0.118) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 6, 'max_leaf_nodes': None, 'splitter': 'best'}\n", | ||||
|       "0.953 (+/-0.126) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 6, 'max_leaf_nodes': None, 'splitter': 'random'}\n", | ||||
|       "0.957 (+/-0.120) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 6, 'max_leaf_nodes': 5, 'splitter': 'best'}\n", | ||||
|       "0.949 (+/-0.116) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 6, 'max_leaf_nodes': 5, 'splitter': 'random'}\n", | ||||
|       "0.943 (+/-0.113) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 6, 'max_leaf_nodes': 10, 'splitter': 'best'}\n", | ||||
|       "0.922 (+/-0.177) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 6, 'max_leaf_nodes': 10, 'splitter': 'random'}\n", | ||||
|       "0.950 (+/-0.118) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 6, 'max_leaf_nodes': 20, 'splitter': 'best'}\n", | ||||
|       "0.959 (+/-0.067) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 6, 'max_leaf_nodes': 20, 'splitter': 'random'}\n", | ||||
|       "0.950 (+/-0.118) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 7, 'max_leaf_nodes': None, 'splitter': 'best'}\n", | ||||
|       "0.933 (+/-0.136) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 7, 'max_leaf_nodes': None, 'splitter': 'random'}\n", | ||||
|       "0.957 (+/-0.120) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 7, 'max_leaf_nodes': 5, 'splitter': 'best'}\n", | ||||
|       "0.933 (+/-0.125) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 7, 'max_leaf_nodes': 5, 'splitter': 'random'}\n", | ||||
|       "0.950 (+/-0.118) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 7, 'max_leaf_nodes': 10, 'splitter': 'best'}\n", | ||||
|       "0.943 (+/-0.113) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 7, 'max_leaf_nodes': 10, 'splitter': 'random'}\n", | ||||
|       "0.950 (+/-0.118) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 7, 'max_leaf_nodes': 20, 'splitter': 'best'}\n", | ||||
|       "0.939 (+/-0.117) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 7, 'max_leaf_nodes': 20, 'splitter': 'random'}\n", | ||||
|       "0.950 (+/-0.118) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 8, 'max_leaf_nodes': None, 'splitter': 'best'}\n", | ||||
|       "0.946 (+/-0.123) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 8, 'max_leaf_nodes': None, 'splitter': 'random'}\n", | ||||
|       "0.957 (+/-0.120) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 8, 'max_leaf_nodes': 5, 'splitter': 'best'}\n", | ||||
|       "0.918 (+/-0.155) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 8, 'max_leaf_nodes': 5, 'splitter': 'random'}\n", | ||||
|       "0.950 (+/-0.118) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 8, 'max_leaf_nodes': 10, 'splitter': 'best'}\n", | ||||
|       "0.945 (+/-0.123) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 8, 'max_leaf_nodes': 10, 'splitter': 'random'}\n", | ||||
|       "0.950 (+/-0.118) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 8, 'max_leaf_nodes': 20, 'splitter': 'best'}\n", | ||||
|       "0.931 (+/-0.153) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 8, 'max_leaf_nodes': 20, 'splitter': 'random'}\n", | ||||
|       "0.950 (+/-0.118) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 9, 'max_leaf_nodes': None, 'splitter': 'best'}\n", | ||||
|       "0.944 (+/-0.113) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 9, 'max_leaf_nodes': None, 'splitter': 'random'}\n", | ||||
|       "0.957 (+/-0.120) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 9, 'max_leaf_nodes': 5, 'splitter': 'best'}\n", | ||||
|       "0.972 (+/-0.069) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 9, 'max_leaf_nodes': 5, 'splitter': 'random'}\n", | ||||
|       "0.950 (+/-0.118) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 9, 'max_leaf_nodes': 10, 'splitter': 'best'}\n", | ||||
|       "0.968 (+/-0.082) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 9, 'max_leaf_nodes': 10, 'splitter': 'random'}\n", | ||||
|       "0.950 (+/-0.118) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 9, 'max_leaf_nodes': 20, 'splitter': 'best'}\n", | ||||
|       "0.955 (+/-0.111) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 9, 'max_leaf_nodes': 20, 'splitter': 'random'}\n", | ||||
|       "\n", | ||||
|       "Detailed classification report:\n", | ||||
|       "\n", | ||||
|       "The model is trained on the full development set.\n", | ||||
|       "The scores are computed on the full evaluation set.\n", | ||||
|       "\n", | ||||
|       "             precision    recall  f1-score   support\n", | ||||
|       "\n", | ||||
|       "          0       1.00      1.00      1.00         8\n", | ||||
|       "          1       0.92      1.00      0.96        11\n", | ||||
|       "          2       1.00      0.95      0.97        19\n", | ||||
|       "\n", | ||||
|       "avg / total       0.98      0.97      0.97        38\n", | ||||
|       "\n", | ||||
|       "\n", | ||||
|       "# Tuning hyper-parameters for recall\n", | ||||
|       "\n" | ||||
|      ] | ||||
|     }, | ||||
|     { | ||||
|      "name": "stderr", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "/opt/conda/lib/python3.6/site-packages/sklearn/model_selection/_search.py:667: DeprecationWarning: The grid_scores_ attribute was deprecated in version 0.18 in favor of the more elaborate cv_results_ attribute. The grid_scores_ attribute will not be available from 0.20\n", | ||||
|       "  DeprecationWarning)\n" | ||||
|      ] | ||||
|     }, | ||||
|     { | ||||
|      "name": "stdout", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "Best parameters set found on development set:\n", | ||||
|       "\n", | ||||
|       "{'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 5, 'max_leaf_nodes': None, 'splitter': 'random'}\n", | ||||
|       "\n", | ||||
|       "Grid scores on development set:\n", | ||||
|       "\n", | ||||
|       "0.946 (+/-0.140) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 3, 'max_leaf_nodes': None, 'splitter': 'best'}\n", | ||||
|       "0.893 (+/-0.215) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 3, 'max_leaf_nodes': None, 'splitter': 'random'}\n", | ||||
|       "0.938 (+/-0.159) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 3, 'max_leaf_nodes': 5, 'splitter': 'best'}\n", | ||||
|       "0.955 (+/-0.092) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 3, 'max_leaf_nodes': 5, 'splitter': 'random'}\n", | ||||
|       "0.929 (+/-0.155) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 3, 'max_leaf_nodes': 10, 'splitter': 'best'}\n", | ||||
|       "0.938 (+/-0.138) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 3, 'max_leaf_nodes': 10, 'splitter': 'random'}\n", | ||||
|       "0.929 (+/-0.155) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 3, 'max_leaf_nodes': 20, 'splitter': 'best'}\n", | ||||
|       "0.920 (+/-0.241) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 3, 'max_leaf_nodes': 20, 'splitter': 'random'}\n", | ||||
|       "0.946 (+/-0.140) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 4, 'max_leaf_nodes': None, 'splitter': 'best'}\n", | ||||
|       "0.946 (+/-0.120) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 4, 'max_leaf_nodes': None, 'splitter': 'random'}\n", | ||||
|       "0.938 (+/-0.159) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 4, 'max_leaf_nodes': 5, 'splitter': 'best'}\n", | ||||
|       "0.866 (+/-0.268) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 4, 'max_leaf_nodes': 5, 'splitter': 'random'}\n", | ||||
|       "0.946 (+/-0.140) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 4, 'max_leaf_nodes': 10, 'splitter': 'best'}\n", | ||||
|       "0.884 (+/-0.218) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 4, 'max_leaf_nodes': 10, 'splitter': 'random'}\n", | ||||
|       "0.946 (+/-0.140) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 4, 'max_leaf_nodes': 20, 'splitter': 'best'}\n", | ||||
|       "0.911 (+/-0.179) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 4, 'max_leaf_nodes': 20, 'splitter': 'random'}\n", | ||||
|       "0.946 (+/-0.140) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 5, 'max_leaf_nodes': None, 'splitter': 'best'}\n", | ||||
|       "0.973 (+/-0.081) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 5, 'max_leaf_nodes': None, 'splitter': 'random'}\n", | ||||
|       "0.929 (+/-0.155) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 5, 'max_leaf_nodes': 5, 'splitter': 'best'}\n", | ||||
|       "0.893 (+/-0.177) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 5, 'max_leaf_nodes': 5, 'splitter': 'random'}\n", | ||||
|       "0.938 (+/-0.159) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 5, 'max_leaf_nodes': 10, 'splitter': 'best'}\n", | ||||
|       "0.938 (+/-0.137) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 5, 'max_leaf_nodes': 10, 'splitter': 'random'}\n", | ||||
|       "0.946 (+/-0.140) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 5, 'max_leaf_nodes': 20, 'splitter': 'best'}\n", | ||||
|       "0.920 (+/-0.162) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 5, 'max_leaf_nodes': 20, 'splitter': 'random'}\n", | ||||
|       "0.946 (+/-0.140) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 6, 'max_leaf_nodes': None, 'splitter': 'best'}\n", | ||||
|       "0.920 (+/-0.187) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 6, 'max_leaf_nodes': None, 'splitter': 'random'}\n", | ||||
|       "0.938 (+/-0.159) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 6, 'max_leaf_nodes': 5, 'splitter': 'best'}\n", | ||||
|       "0.929 (+/-0.104) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 6, 'max_leaf_nodes': 5, 'splitter': 'random'}\n", | ||||
|       "0.946 (+/-0.140) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 6, 'max_leaf_nodes': 10, 'splitter': 'best'}\n", | ||||
|       "0.911 (+/-0.191) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 6, 'max_leaf_nodes': 10, 'splitter': 'random'}\n", | ||||
|       "0.938 (+/-0.159) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 6, 'max_leaf_nodes': 20, 'splitter': 'best'}\n", | ||||
|       "0.938 (+/-0.141) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 6, 'max_leaf_nodes': 20, 'splitter': 'random'}\n", | ||||
|       "0.946 (+/-0.140) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 7, 'max_leaf_nodes': None, 'splitter': 'best'}\n", | ||||
|       "0.946 (+/-0.114) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 7, 'max_leaf_nodes': None, 'splitter': 'random'}\n", | ||||
|       "0.929 (+/-0.155) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 7, 'max_leaf_nodes': 5, 'splitter': 'best'}\n", | ||||
|       "0.911 (+/-0.137) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 7, 'max_leaf_nodes': 5, 'splitter': 'random'}\n", | ||||
|       "0.938 (+/-0.159) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 7, 'max_leaf_nodes': 10, 'splitter': 'best'}\n", | ||||
|       "0.902 (+/-0.148) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 7, 'max_leaf_nodes': 10, 'splitter': 'random'}\n", | ||||
|       "0.946 (+/-0.140) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 7, 'max_leaf_nodes': 20, 'splitter': 'best'}\n", | ||||
|       "0.929 (+/-0.158) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 7, 'max_leaf_nodes': 20, 'splitter': 'random'}\n", | ||||
|       "0.946 (+/-0.140) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 8, 'max_leaf_nodes': None, 'splitter': 'best'}\n", | ||||
|       "0.938 (+/-0.159) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 8, 'max_leaf_nodes': None, 'splitter': 'random'}\n", | ||||
|       "0.938 (+/-0.159) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 8, 'max_leaf_nodes': 5, 'splitter': 'best'}\n", | ||||
|       "0.946 (+/-0.113) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 8, 'max_leaf_nodes': 5, 'splitter': 'random'}\n", | ||||
|       "0.938 (+/-0.159) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 8, 'max_leaf_nodes': 10, 'splitter': 'best'}\n", | ||||
|       "0.938 (+/-0.137) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 8, 'max_leaf_nodes': 10, 'splitter': 'random'}\n", | ||||
|       "0.946 (+/-0.140) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 8, 'max_leaf_nodes': 20, 'splitter': 'best'}\n", | ||||
|       "0.938 (+/-0.137) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 8, 'max_leaf_nodes': 20, 'splitter': 'random'}\n", | ||||
|       "0.938 (+/-0.159) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 9, 'max_leaf_nodes': None, 'splitter': 'best'}\n", | ||||
|       "0.920 (+/-0.147) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 9, 'max_leaf_nodes': None, 'splitter': 'random'}\n", | ||||
|       "0.938 (+/-0.159) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 9, 'max_leaf_nodes': 5, 'splitter': 'best'}\n", | ||||
|       "0.893 (+/-0.255) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 9, 'max_leaf_nodes': 5, 'splitter': 'random'}\n", | ||||
|       "0.938 (+/-0.159) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 9, 'max_leaf_nodes': 10, 'splitter': 'best'}\n", | ||||
|       "0.938 (+/-0.117) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 9, 'max_leaf_nodes': 10, 'splitter': 'random'}\n", | ||||
|       "0.938 (+/-0.159) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 9, 'max_leaf_nodes': 20, 'splitter': 'best'}\n", | ||||
|       "0.938 (+/-0.141) for {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 9, 'max_leaf_nodes': 20, 'splitter': 'random'}\n", | ||||
|       "0.955 (+/-0.115) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 3, 'max_leaf_nodes': None, 'splitter': 'best'}\n", | ||||
|       "0.893 (+/-0.139) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 3, 'max_leaf_nodes': None, 'splitter': 'random'}\n", | ||||
|       "0.946 (+/-0.140) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 3, 'max_leaf_nodes': 5, 'splitter': 'best'}\n", | ||||
|       "0.920 (+/-0.168) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 3, 'max_leaf_nodes': 5, 'splitter': 'random'}\n", | ||||
|       "0.938 (+/-0.137) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 3, 'max_leaf_nodes': 10, 'splitter': 'best'}\n", | ||||
|       "0.911 (+/-0.179) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 3, 'max_leaf_nodes': 10, 'splitter': 'random'}\n", | ||||
|       "0.938 (+/-0.137) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 3, 'max_leaf_nodes': 20, 'splitter': 'best'}\n", | ||||
|       "0.946 (+/-0.146) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 3, 'max_leaf_nodes': 20, 'splitter': 'random'}\n", | ||||
|       "0.946 (+/-0.140) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 4, 'max_leaf_nodes': None, 'splitter': 'best'}\n", | ||||
|       "0.946 (+/-0.121) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 4, 'max_leaf_nodes': None, 'splitter': 'random'}\n", | ||||
|       "0.946 (+/-0.140) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 4, 'max_leaf_nodes': 5, 'splitter': 'best'}\n", | ||||
|       "0.911 (+/-0.179) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 4, 'max_leaf_nodes': 5, 'splitter': 'random'}\n", | ||||
|       "0.946 (+/-0.140) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 4, 'max_leaf_nodes': 10, 'splitter': 'best'}\n", | ||||
|       "0.955 (+/-0.115) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 4, 'max_leaf_nodes': 10, 'splitter': 'random'}\n", | ||||
|       "0.946 (+/-0.140) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 4, 'max_leaf_nodes': 20, 'splitter': 'best'}\n", | ||||
|       "0.938 (+/-0.141) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 4, 'max_leaf_nodes': 20, 'splitter': 'random'}\n", | ||||
|       "0.946 (+/-0.140) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 5, 'max_leaf_nodes': None, 'splitter': 'best'}\n", | ||||
|       "0.955 (+/-0.121) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 5, 'max_leaf_nodes': None, 'splitter': 'random'}\n", | ||||
|       "0.946 (+/-0.140) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 5, 'max_leaf_nodes': 5, 'splitter': 'best'}\n", | ||||
|       "0.920 (+/-0.119) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 5, 'max_leaf_nodes': 5, 'splitter': 'random'}\n", | ||||
|       "0.946 (+/-0.140) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 5, 'max_leaf_nodes': 10, 'splitter': 'best'}\n", | ||||
|       "0.929 (+/-0.109) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 5, 'max_leaf_nodes': 10, 'splitter': 'random'}\n", | ||||
|       "0.946 (+/-0.140) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 5, 'max_leaf_nodes': 20, 'splitter': 'best'}\n", | ||||
|       "0.938 (+/-0.137) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 5, 'max_leaf_nodes': 20, 'splitter': 'random'}\n", | ||||
|       "0.946 (+/-0.140) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 6, 'max_leaf_nodes': None, 'splitter': 'best'}\n", | ||||
|       "0.946 (+/-0.140) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 6, 'max_leaf_nodes': None, 'splitter': 'random'}\n", | ||||
|       "0.946 (+/-0.140) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 6, 'max_leaf_nodes': 5, 'splitter': 'best'}\n", | ||||
|       "0.938 (+/-0.183) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 6, 'max_leaf_nodes': 5, 'splitter': 'random'}\n", | ||||
|       "0.946 (+/-0.140) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 6, 'max_leaf_nodes': 10, 'splitter': 'best'}\n", | ||||
|       "0.946 (+/-0.120) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 6, 'max_leaf_nodes': 10, 'splitter': 'random'}\n", | ||||
|       "0.946 (+/-0.140) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 6, 'max_leaf_nodes': 20, 'splitter': 'best'}\n", | ||||
|       "0.929 (+/-0.168) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 6, 'max_leaf_nodes': 20, 'splitter': 'random'}\n", | ||||
|       "0.946 (+/-0.140) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 7, 'max_leaf_nodes': None, 'splitter': 'best'}\n", | ||||
|       "0.938 (+/-0.183) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 7, 'max_leaf_nodes': None, 'splitter': 'random'}\n", | ||||
|       "0.946 (+/-0.140) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 7, 'max_leaf_nodes': 5, 'splitter': 'best'}\n", | ||||
|       "0.955 (+/-0.147) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 7, 'max_leaf_nodes': 5, 'splitter': 'random'}\n", | ||||
|       "0.946 (+/-0.140) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 7, 'max_leaf_nodes': 10, 'splitter': 'best'}\n", | ||||
|       "0.955 (+/-0.121) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 7, 'max_leaf_nodes': 10, 'splitter': 'random'}\n", | ||||
|       "0.946 (+/-0.140) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 7, 'max_leaf_nodes': 20, 'splitter': 'best'}\n", | ||||
|       "0.929 (+/-0.158) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 7, 'max_leaf_nodes': 20, 'splitter': 'random'}\n", | ||||
|       "0.946 (+/-0.140) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 8, 'max_leaf_nodes': None, 'splitter': 'best'}\n", | ||||
|       "0.920 (+/-0.168) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 8, 'max_leaf_nodes': None, 'splitter': 'random'}\n", | ||||
|       "0.946 (+/-0.140) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 8, 'max_leaf_nodes': 5, 'splitter': 'best'}\n", | ||||
|       "0.866 (+/-0.202) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 8, 'max_leaf_nodes': 5, 'splitter': 'random'}\n", | ||||
|       "0.946 (+/-0.140) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 8, 'max_leaf_nodes': 10, 'splitter': 'best'}\n", | ||||
|       "0.938 (+/-0.137) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 8, 'max_leaf_nodes': 10, 'splitter': 'random'}\n", | ||||
|       "0.946 (+/-0.140) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 8, 'max_leaf_nodes': 20, 'splitter': 'best'}\n", | ||||
|       "0.929 (+/-0.155) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 8, 'max_leaf_nodes': 20, 'splitter': 'random'}\n", | ||||
|       "0.946 (+/-0.140) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 9, 'max_leaf_nodes': None, 'splitter': 'best'}\n", | ||||
|       "0.938 (+/-0.141) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 9, 'max_leaf_nodes': None, 'splitter': 'random'}\n", | ||||
|       "0.946 (+/-0.140) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 9, 'max_leaf_nodes': 5, 'splitter': 'best'}\n", | ||||
|       "0.920 (+/-0.154) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 9, 'max_leaf_nodes': 5, 'splitter': 'random'}\n", | ||||
|       "0.946 (+/-0.140) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 9, 'max_leaf_nodes': 10, 'splitter': 'best'}\n", | ||||
|       "0.920 (+/-0.151) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 9, 'max_leaf_nodes': 10, 'splitter': 'random'}\n", | ||||
|       "0.946 (+/-0.140) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 9, 'max_leaf_nodes': 20, 'splitter': 'best'}\n", | ||||
|       "0.964 (+/-0.140) for {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 9, 'max_leaf_nodes': 20, 'splitter': 'random'}\n", | ||||
|       "0.955 (+/-0.115) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 3, 'max_leaf_nodes': None, 'splitter': 'best'}\n", | ||||
|       "0.920 (+/-0.140) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 3, 'max_leaf_nodes': None, 'splitter': 'random'}\n", | ||||
|       "0.938 (+/-0.138) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 3, 'max_leaf_nodes': 5, 'splitter': 'best'}\n", | ||||
|       "0.946 (+/-0.120) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 3, 'max_leaf_nodes': 5, 'splitter': 'random'}\n", | ||||
|       "0.938 (+/-0.138) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 3, 'max_leaf_nodes': 10, 'splitter': 'best'}\n", | ||||
|       "0.929 (+/-0.131) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 3, 'max_leaf_nodes': 10, 'splitter': 'random'}\n", | ||||
|       "0.929 (+/-0.132) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 3, 'max_leaf_nodes': 20, 'splitter': 'best'}\n", | ||||
|       "0.920 (+/-0.181) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 3, 'max_leaf_nodes': 20, 'splitter': 'random'}\n", | ||||
|       "0.929 (+/-0.132) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 4, 'max_leaf_nodes': None, 'splitter': 'best'}\n", | ||||
|       "0.929 (+/-0.132) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 4, 'max_leaf_nodes': None, 'splitter': 'random'}\n", | ||||
|       "0.929 (+/-0.132) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 4, 'max_leaf_nodes': 5, 'splitter': 'best'}\n", | ||||
|       "0.920 (+/-0.204) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 4, 'max_leaf_nodes': 5, 'splitter': 'random'}\n", | ||||
|       "0.938 (+/-0.138) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 4, 'max_leaf_nodes': 10, 'splitter': 'best'}\n", | ||||
|       "0.929 (+/-0.154) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 4, 'max_leaf_nodes': 10, 'splitter': 'random'}\n", | ||||
|       "0.929 (+/-0.132) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 4, 'max_leaf_nodes': 20, 'splitter': 'best'}\n", | ||||
|       "0.938 (+/-0.137) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 4, 'max_leaf_nodes': 20, 'splitter': 'random'}\n", | ||||
|       "0.938 (+/-0.138) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 5, 'max_leaf_nodes': None, 'splitter': 'best'}\n", | ||||
|       "0.955 (+/-0.146) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 5, 'max_leaf_nodes': None, 'splitter': 'random'}\n", | ||||
|       "0.929 (+/-0.132) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 5, 'max_leaf_nodes': 5, 'splitter': 'best'}\n", | ||||
|       "0.955 (+/-0.121) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 5, 'max_leaf_nodes': 5, 'splitter': 'random'}\n", | ||||
|       "0.938 (+/-0.138) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 5, 'max_leaf_nodes': 10, 'splitter': 'best'}\n", | ||||
|       "0.929 (+/-0.136) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 5, 'max_leaf_nodes': 10, 'splitter': 'random'}\n", | ||||
|       "0.938 (+/-0.138) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 5, 'max_leaf_nodes': 20, 'splitter': 'best'}\n", | ||||
|       "0.964 (+/-0.121) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 5, 'max_leaf_nodes': 20, 'splitter': 'random'}\n", | ||||
|       "0.938 (+/-0.138) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 6, 'max_leaf_nodes': None, 'splitter': 'best'}\n", | ||||
|       "0.946 (+/-0.086) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 6, 'max_leaf_nodes': None, 'splitter': 'random'}\n", | ||||
|       "0.929 (+/-0.132) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 6, 'max_leaf_nodes': 5, 'splitter': 'best'}\n", | ||||
|       "0.929 (+/-0.175) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 6, 'max_leaf_nodes': 5, 'splitter': 'random'}\n", | ||||
|       "0.929 (+/-0.132) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 6, 'max_leaf_nodes': 10, 'splitter': 'best'}\n", | ||||
|       "0.946 (+/-0.114) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 6, 'max_leaf_nodes': 10, 'splitter': 'random'}\n", | ||||
|       "0.938 (+/-0.138) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 6, 'max_leaf_nodes': 20, 'splitter': 'best'}\n", | ||||
|       "0.938 (+/-0.173) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 6, 'max_leaf_nodes': 20, 'splitter': 'random'}\n", | ||||
|       "0.938 (+/-0.138) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 7, 'max_leaf_nodes': None, 'splitter': 'best'}\n", | ||||
|       "0.938 (+/-0.110) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 7, 'max_leaf_nodes': None, 'splitter': 'random'}\n", | ||||
|       "0.929 (+/-0.132) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 7, 'max_leaf_nodes': 5, 'splitter': 'best'}\n", | ||||
|       "0.929 (+/-0.175) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 7, 'max_leaf_nodes': 5, 'splitter': 'random'}\n", | ||||
|       "0.938 (+/-0.138) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 7, 'max_leaf_nodes': 10, 'splitter': 'best'}\n", | ||||
|       "0.920 (+/-0.168) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 7, 'max_leaf_nodes': 10, 'splitter': 'random'}\n", | ||||
|       "0.938 (+/-0.138) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 7, 'max_leaf_nodes': 20, 'splitter': 'best'}\n", | ||||
|       "0.929 (+/-0.131) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 7, 'max_leaf_nodes': 20, 'splitter': 'random'}\n", | ||||
|       "0.938 (+/-0.138) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 8, 'max_leaf_nodes': None, 'splitter': 'best'}\n", | ||||
|       "0.964 (+/-0.119) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 8, 'max_leaf_nodes': None, 'splitter': 'random'}\n", | ||||
|       "0.929 (+/-0.132) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 8, 'max_leaf_nodes': 5, 'splitter': 'best'}\n", | ||||
|       "0.938 (+/-0.110) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 8, 'max_leaf_nodes': 5, 'splitter': 'random'}\n", | ||||
|       "0.938 (+/-0.138) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 8, 'max_leaf_nodes': 10, 'splitter': 'best'}\n", | ||||
|       "0.938 (+/-0.110) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 8, 'max_leaf_nodes': 10, 'splitter': 'random'}\n", | ||||
|       "0.938 (+/-0.138) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 8, 'max_leaf_nodes': 20, 'splitter': 'best'}\n", | ||||
|       "0.946 (+/-0.140) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 8, 'max_leaf_nodes': 20, 'splitter': 'random'}\n", | ||||
|       "0.938 (+/-0.138) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 9, 'max_leaf_nodes': None, 'splitter': 'best'}\n", | ||||
|       "0.938 (+/-0.159) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 9, 'max_leaf_nodes': None, 'splitter': 'random'}\n", | ||||
|       "0.938 (+/-0.138) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 9, 'max_leaf_nodes': 5, 'splitter': 'best'}\n", | ||||
|       "0.946 (+/-0.120) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 9, 'max_leaf_nodes': 5, 'splitter': 'random'}\n", | ||||
|       "0.938 (+/-0.138) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 9, 'max_leaf_nodes': 10, 'splitter': 'best'}\n", | ||||
|       "0.920 (+/-0.147) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 9, 'max_leaf_nodes': 10, 'splitter': 'random'}\n", | ||||
|       "0.938 (+/-0.138) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 9, 'max_leaf_nodes': 20, 'splitter': 'best'}\n", | ||||
|       "0.920 (+/-0.127) for {'class_weight': None, 'criterion': 'gini', 'max_depth': 9, 'max_leaf_nodes': 20, 'splitter': 'random'}\n", | ||||
|       "0.955 (+/-0.115) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 3, 'max_leaf_nodes': None, 'splitter': 'best'}\n", | ||||
|       "0.893 (+/-0.213) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 3, 'max_leaf_nodes': None, 'splitter': 'random'}\n", | ||||
|       "0.946 (+/-0.140) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 3, 'max_leaf_nodes': 5, 'splitter': 'best'}\n", | ||||
|       "0.875 (+/-0.216) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 3, 'max_leaf_nodes': 5, 'splitter': 'random'}\n", | ||||
|       "0.938 (+/-0.138) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 3, 'max_leaf_nodes': 10, 'splitter': 'best'}\n", | ||||
|       "0.929 (+/-0.196) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 3, 'max_leaf_nodes': 10, 'splitter': 'random'}\n", | ||||
|       "0.938 (+/-0.138) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 3, 'max_leaf_nodes': 20, 'splitter': 'best'}\n", | ||||
|       "0.911 (+/-0.173) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 3, 'max_leaf_nodes': 20, 'splitter': 'random'}\n", | ||||
|       "0.929 (+/-0.132) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 4, 'max_leaf_nodes': None, 'splitter': 'best'}\n", | ||||
|       "0.938 (+/-0.163) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 4, 'max_leaf_nodes': None, 'splitter': 'random'}\n", | ||||
|       "0.946 (+/-0.140) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 4, 'max_leaf_nodes': 5, 'splitter': 'best'}\n", | ||||
|       "0.929 (+/-0.132) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 4, 'max_leaf_nodes': 5, 'splitter': 'random'}\n", | ||||
|       "0.929 (+/-0.132) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 4, 'max_leaf_nodes': 10, 'splitter': 'best'}\n", | ||||
|       "0.938 (+/-0.115) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 4, 'max_leaf_nodes': 10, 'splitter': 'random'}\n", | ||||
|       "0.938 (+/-0.138) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 4, 'max_leaf_nodes': 20, 'splitter': 'best'}\n", | ||||
|       "0.946 (+/-0.140) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 4, 'max_leaf_nodes': 20, 'splitter': 'random'}\n", | ||||
|       "0.938 (+/-0.138) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 5, 'max_leaf_nodes': None, 'splitter': 'best'}\n", | ||||
|       "0.920 (+/-0.187) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 5, 'max_leaf_nodes': None, 'splitter': 'random'}\n", | ||||
|       "0.946 (+/-0.140) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 5, 'max_leaf_nodes': 5, 'splitter': 'best'}\n", | ||||
|       "0.920 (+/-0.187) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 5, 'max_leaf_nodes': 5, 'splitter': 'random'}\n", | ||||
|       "0.938 (+/-0.138) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 5, 'max_leaf_nodes': 10, 'splitter': 'best'}\n", | ||||
|       "0.929 (+/-0.131) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 5, 'max_leaf_nodes': 10, 'splitter': 'random'}\n", | ||||
|       "0.929 (+/-0.132) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 5, 'max_leaf_nodes': 20, 'splitter': 'best'}\n", | ||||
|       "0.929 (+/-0.155) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 5, 'max_leaf_nodes': 20, 'splitter': 'random'}\n", | ||||
|       "0.938 (+/-0.138) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 6, 'max_leaf_nodes': None, 'splitter': 'best'}\n", | ||||
|       "0.946 (+/-0.140) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 6, 'max_leaf_nodes': None, 'splitter': 'random'}\n", | ||||
|       "0.946 (+/-0.140) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 6, 'max_leaf_nodes': 5, 'splitter': 'best'}\n", | ||||
|       "0.920 (+/-0.127) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 6, 'max_leaf_nodes': 5, 'splitter': 'random'}\n", | ||||
|       "0.938 (+/-0.138) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 6, 'max_leaf_nodes': 10, 'splitter': 'best'}\n", | ||||
|       "0.938 (+/-0.159) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 6, 'max_leaf_nodes': 10, 'splitter': 'random'}\n", | ||||
|       "0.938 (+/-0.138) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 6, 'max_leaf_nodes': 20, 'splitter': 'best'}\n", | ||||
|       "0.946 (+/-0.120) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 6, 'max_leaf_nodes': 20, 'splitter': 'random'}\n", | ||||
|       "0.938 (+/-0.138) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 7, 'max_leaf_nodes': None, 'splitter': 'best'}\n", | ||||
|       "0.938 (+/-0.137) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 7, 'max_leaf_nodes': None, 'splitter': 'random'}\n", | ||||
|       "0.946 (+/-0.140) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 7, 'max_leaf_nodes': 5, 'splitter': 'best'}\n", | ||||
|       "0.902 (+/-0.179) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 7, 'max_leaf_nodes': 5, 'splitter': 'random'}\n", | ||||
|       "0.938 (+/-0.138) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 7, 'max_leaf_nodes': 10, 'splitter': 'best'}\n", | ||||
|       "0.929 (+/-0.175) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 7, 'max_leaf_nodes': 10, 'splitter': 'random'}\n", | ||||
|       "0.938 (+/-0.138) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 7, 'max_leaf_nodes': 20, 'splitter': 'best'}\n", | ||||
|       "0.902 (+/-0.148) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 7, 'max_leaf_nodes': 20, 'splitter': 'random'}\n", | ||||
|       "0.929 (+/-0.132) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 8, 'max_leaf_nodes': None, 'splitter': 'best'}\n", | ||||
|       "0.955 (+/-0.146) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 8, 'max_leaf_nodes': None, 'splitter': 'random'}\n", | ||||
|       "0.946 (+/-0.140) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 8, 'max_leaf_nodes': 5, 'splitter': 'best'}\n", | ||||
|       "0.955 (+/-0.169) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 8, 'max_leaf_nodes': 5, 'splitter': 'random'}\n", | ||||
|       "0.938 (+/-0.138) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 8, 'max_leaf_nodes': 10, 'splitter': 'best'}\n", | ||||
|       "0.964 (+/-0.121) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 8, 'max_leaf_nodes': 10, 'splitter': 'random'}\n", | ||||
|       "0.938 (+/-0.138) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 8, 'max_leaf_nodes': 20, 'splitter': 'best'}\n", | ||||
|       "0.929 (+/-0.136) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 8, 'max_leaf_nodes': 20, 'splitter': 'random'}\n", | ||||
|       "0.938 (+/-0.138) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 9, 'max_leaf_nodes': None, 'splitter': 'best'}\n", | ||||
|       "0.920 (+/-0.147) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 9, 'max_leaf_nodes': None, 'splitter': 'random'}\n", | ||||
|       "0.946 (+/-0.140) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 9, 'max_leaf_nodes': 5, 'splitter': 'best'}\n", | ||||
|       "0.938 (+/-0.137) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 9, 'max_leaf_nodes': 5, 'splitter': 'random'}\n", | ||||
|       "0.938 (+/-0.138) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 9, 'max_leaf_nodes': 10, 'splitter': 'best'}\n", | ||||
|       "0.929 (+/-0.168) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 9, 'max_leaf_nodes': 10, 'splitter': 'random'}\n", | ||||
|       "0.938 (+/-0.138) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 9, 'max_leaf_nodes': 20, 'splitter': 'best'}\n", | ||||
|       "0.946 (+/-0.120) for {'class_weight': None, 'criterion': 'entropy', 'max_depth': 9, 'max_leaf_nodes': 20, 'splitter': 'random'}\n", | ||||
|       "\n", | ||||
|       "Detailed classification report:\n", | ||||
|       "\n", | ||||
|       "The model is trained on the full development set.\n", | ||||
|       "The scores are computed on the full evaluation set.\n", | ||||
|       "\n", | ||||
|       "             precision    recall  f1-score   support\n", | ||||
|       "\n", | ||||
|       "          0       1.00      1.00      1.00         8\n", | ||||
|       "          1       1.00      0.64      0.78        11\n", | ||||
|       "          2       0.83      1.00      0.90        19\n", | ||||
|       "\n", | ||||
|       "avg / total       0.91      0.89      0.89        38\n", | ||||
|       "\n", | ||||
|       "\n" | ||||
|      ] | ||||
|     }, | ||||
|     { | ||||
|      "name": "stderr", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "/opt/conda/lib/python3.6/site-packages/sklearn/model_selection/_search.py:667: DeprecationWarning: The grid_scores_ attribute was deprecated in version 0.18 in favor of the more elaborate cv_results_ attribute. The grid_scores_ attribute will not be available from 0.20\n", | ||||
|       "  DeprecationWarning)\n" | ||||
|      ] | ||||
|     } | ||||
|    ], | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# Set the parameters by cross-validation\n", | ||||
|     "\n", | ||||
| @@ -1156,8 +444,11 @@ | ||||
|     "    print()\n", | ||||
|     "    print(\"Grid scores on development set:\")\n", | ||||
|     "    print()\n", | ||||
|     "    for params, mean_score, scores in gs.grid_scores_:\n", | ||||
|     "        print(\"%0.3f (+/-%0.03f) for %r\" % (mean_score, scores.std() * 2, params))\n", | ||||
|     "    means = gs.cv_results_['mean_test_score']\n", | ||||
|     "    stds = gs.cv_results_['std_test_score']\n", | ||||
|     "\n", | ||||
|     "    for mean_score, std_score, params in zip(means, stds, gs.cv_results_['params']):\n", | ||||
|     "        print(\"%0.3f (+/-%0.03f) for %r\" % (mean_score, std_score * 2, params))\n", | ||||
|     "    print()\n", | ||||
|     "\n", | ||||
|     "    print(\"Detailed classification report:\")\n", | ||||
| @@ -1172,26 +463,16 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": { | ||||
|     "collapsed": true | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "Let's evaluate the resulting tuning." | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 15, | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "name": "stdout", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "Mean score: 0.907 (+/- 0.015)\n" | ||||
|      ] | ||||
|     } | ||||
|    ], | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# create a composite estimator made by a pipeline of preprocessing and the KNN model\n", | ||||
|     "model = Pipeline([\n", | ||||
| @@ -1251,7 +532,7 @@ | ||||
|    "source": [ | ||||
|     "The notebook is freely licensed under under the [Creative Commons Attribution Share-Alike license](https://creativecommons.org/licenses/by/2.0/).  \n", | ||||
|     "\n", | ||||
|     "© 2016 Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|     "©  Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|    ] | ||||
|   } | ||||
|  ], | ||||
| @@ -1271,7 +552,24 @@ | ||||
|    "name": "python", | ||||
|    "nbconvert_exporter": "python", | ||||
|    "pygments_lexer": "ipython3", | ||||
|    "version": "3.6.3" | ||||
|    "version": "3.6.7" | ||||
|   }, | ||||
|   "latex_envs": { | ||||
|    "LaTeX_envs_menu_present": true, | ||||
|    "autocomplete": true, | ||||
|    "bibliofile": "biblio.bib", | ||||
|    "cite_by": "apalike", | ||||
|    "current_citInitial": 1, | ||||
|    "eqLabelWithNumbers": true, | ||||
|    "eqNumInitial": 1, | ||||
|    "hotkeys": { | ||||
|     "equation": "Ctrl-E", | ||||
|     "itemize": "Ctrl-I" | ||||
|    }, | ||||
|    "labels_anchors": false, | ||||
|    "latex_user_defs": false, | ||||
|    "report_style_numbering": false, | ||||
|    "user_envs_cfg": false | ||||
|   } | ||||
|  }, | ||||
|  "nbformat": 4, | ||||
|   | ||||
| @@ -18,7 +18,7 @@ | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, © 2016 Carlos A. Iglesias" | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, ©  Carlos A. Iglesias" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
| @@ -55,22 +55,9 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 1, | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "data": { | ||||
|       "text/plain": [ | ||||
|        "Pipeline(steps=[('scaler', StandardScaler(copy=True, with_mean=True, with_std=True)), ('KNN', KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',\n", | ||||
|        "           metric_params=None, n_jobs=1, n_neighbors=5, p=2,\n", | ||||
|        "           weights='uniform'))])" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 1, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# load iris\n", | ||||
|     "from sklearn import datasets\n", | ||||
| @@ -106,20 +93,9 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 2, | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "data": { | ||||
|       "text/plain": [ | ||||
|        "array([0])" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 2, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "import pickle\n", | ||||
|     "s = pickle.dumps(model)\n", | ||||
| @@ -136,10 +112,8 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 3, | ||||
|    "metadata": { | ||||
|     "collapsed": true | ||||
|    }, | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# save model\n", | ||||
| @@ -172,7 +146,7 @@ | ||||
|     "## Licence\n", | ||||
|     "The notebook is freely licensed under under the [Creative Commons Attribution Share-Alike license](https://creativecommons.org/licenses/by/2.0/).  \n", | ||||
|     "\n", | ||||
|     "© 2016 Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|     "©  Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|    ] | ||||
|   } | ||||
|  ], | ||||
| @@ -192,7 +166,24 @@ | ||||
|    "name": "python", | ||||
|    "nbconvert_exporter": "python", | ||||
|    "pygments_lexer": "ipython3", | ||||
|    "version": "3.6.3" | ||||
|    "version": "3.6.7" | ||||
|   }, | ||||
|   "latex_envs": { | ||||
|    "LaTeX_envs_menu_present": true, | ||||
|    "autocomplete": true, | ||||
|    "bibliofile": "biblio.bib", | ||||
|    "cite_by": "apalike", | ||||
|    "current_citInitial": 1, | ||||
|    "eqLabelWithNumbers": true, | ||||
|    "eqNumInitial": 1, | ||||
|    "hotkeys": { | ||||
|     "equation": "Ctrl-E", | ||||
|     "itemize": "Ctrl-I" | ||||
|    }, | ||||
|    "labels_anchors": false, | ||||
|    "latex_user_defs": false, | ||||
|    "report_style_numbering": false, | ||||
|    "user_envs_cfg": false | ||||
|   } | ||||
|  }, | ||||
|  "nbformat": 4, | ||||
|   | ||||
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								ml1/filename.pkl
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								ml1/filename.pkl
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
										
											Binary file not shown.
										
									
								
							| Before Width: | Height: | Size: 21 KiB After Width: | Height: | Size: 106 KiB | 
| @@ -1,7 +1,7 @@ | ||||
| import numpy as np | ||||
| import matplotlib.pyplot as plt | ||||
| from matplotlib.colors import ListedColormap | ||||
| from sklearn.cross_validation import train_test_split | ||||
| from sklearn.model_selection import train_test_split | ||||
| from sklearn.preprocessing import StandardScaler | ||||
| from sklearn.datasets import make_moons, make_circles, make_classification | ||||
| from sklearn.neighbors import KNeighborsClassifier | ||||
|   | ||||
							
								
								
									
										4
									
								
								ml1/requirements.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										4
									
								
								ml1/requirements.txt
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,4 @@ | ||||
| scikit-learn | ||||
| seaborn | ||||
| pydotplus | ||||
| graphviz | ||||
| @@ -18,7 +18,7 @@ | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, © 2016 Carlos A. Iglesias" | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, ©  Carlos A. Iglesias" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
| @@ -86,7 +86,7 @@ | ||||
|     "## Licence\n", | ||||
|     "The notebook is freely licensed under under the [Creative Commons Attribution Share-Alike license](https://creativecommons.org/licenses/by/2.0/).  \n", | ||||
|     "\n", | ||||
|     "© 2016 Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|     "©  Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|    ] | ||||
|   } | ||||
|  ], | ||||
| @@ -106,9 +106,26 @@ | ||||
|    "name": "python", | ||||
|    "nbconvert_exporter": "python", | ||||
|    "pygments_lexer": "ipython3", | ||||
|    "version": "3.5.1" | ||||
|    "version": "3.7.1" | ||||
|   }, | ||||
|   "latex_envs": { | ||||
|    "LaTeX_envs_menu_present": true, | ||||
|    "autocomplete": true, | ||||
|    "bibliofile": "biblio.bib", | ||||
|    "cite_by": "apalike", | ||||
|    "current_citInitial": 1, | ||||
|    "eqLabelWithNumbers": true, | ||||
|    "eqNumInitial": 1, | ||||
|    "hotkeys": { | ||||
|     "equation": "Ctrl-E", | ||||
|     "itemize": "Ctrl-I" | ||||
|    }, | ||||
|    "labels_anchors": false, | ||||
|    "latex_user_defs": false, | ||||
|    "report_style_numbering": false, | ||||
|    "user_envs_cfg": false | ||||
|   } | ||||
|  }, | ||||
|  "nbformat": 4, | ||||
|  "nbformat_minor": 0 | ||||
|  "nbformat_minor": 1 | ||||
| } | ||||
|   | ||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @@ -18,7 +18,7 @@ | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, © 2016 Carlos A. Iglesias" | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, ©  Carlos A. Iglesias" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
| @@ -84,25 +84,9 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 1, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "data": { | ||||
|       "text/plain": [ | ||||
|        "0     5\n", | ||||
|        "1    10\n", | ||||
|        "2    15\n", | ||||
|        "dtype: int64" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 1, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "import numpy as np\n", | ||||
|     "import pandas as pd\n", | ||||
| @@ -124,25 +108,9 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 2, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "data": { | ||||
|       "text/plain": [ | ||||
|        "a     5\n", | ||||
|        "b    10\n", | ||||
|        "c    15\n", | ||||
|        "dtype: int64" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 2, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "d = {'a': 5, 'b': 10, 'c': 15}\n", | ||||
|     "s = Series(d)\n", | ||||
| @@ -151,22 +119,9 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 3, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "data": { | ||||
|       "text/plain": [ | ||||
|        "Index(['a', 'b', 'c'], dtype='object')" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 3, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# We can get the list of indexes\n", | ||||
|     "s.index" | ||||
| @@ -174,22 +129,9 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 4, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "data": { | ||||
|       "text/plain": [ | ||||
|        "array([ 5, 10, 15])" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 4, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# and the values\n", | ||||
|     "s.values" | ||||
| @@ -204,28 +146,9 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 5, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "data": { | ||||
|       "text/plain": [ | ||||
|        "Madrid       3141991\n", | ||||
|        "Barcelona    1604555\n", | ||||
|        "Valencia      786189\n", | ||||
|        "Sevilla       693878\n", | ||||
|        "Zaragoza      664953\n", | ||||
|        "Malaga        569130\n", | ||||
|        "dtype: int64" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 5, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# Series with population in 2015 of more populated cities in Spain\n", | ||||
|     "s = Series([3141991, 1604555, 786189, 693878, 664953, 569130], index=['Madrid', 'Barcelona', 'Valencia', 'Sevilla', \n", | ||||
| @@ -235,22 +158,9 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 6, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "data": { | ||||
|       "text/plain": [ | ||||
|        "3141991" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 6, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# Population of Madrid\n", | ||||
|     "s['Madrid']" | ||||
| @@ -272,28 +182,9 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 7, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "data": { | ||||
|       "text/plain": [ | ||||
|        "Madrid        True\n", | ||||
|        "Barcelona     True\n", | ||||
|        "Valencia     False\n", | ||||
|        "Sevilla      False\n", | ||||
|        "Zaragoza     False\n", | ||||
|        "Malaga       False\n", | ||||
|        "dtype: bool" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 7, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "#Boolean condition\n", | ||||
|     "s > 1000000" | ||||
| @@ -301,24 +192,9 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 8, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "data": { | ||||
|       "text/plain": [ | ||||
|        "Madrid       3141991\n", | ||||
|        "Barcelona    1604555\n", | ||||
|        "dtype: int64" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 8, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# Cities with population greater than 1.000.000\n", | ||||
|     "s[s > 1000000]" | ||||
| @@ -333,24 +209,9 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 9, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "data": { | ||||
|       "text/plain": [ | ||||
|        "Madrid       3141991\n", | ||||
|        "Barcelona    1604555\n", | ||||
|        "dtype: int64" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 9, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# Cities with population greater than the mean\n", | ||||
|     "s[s > s.mean()]" | ||||
| @@ -358,25 +219,9 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 10, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "data": { | ||||
|       "text/plain": [ | ||||
|        "Madrid       3141991\n", | ||||
|        "Barcelona    1604555\n", | ||||
|        "Valencia      786189\n", | ||||
|        "dtype: int64" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 10, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# Cities with population greater than the median\n", | ||||
|     "s[s > s.median()]" | ||||
| @@ -384,28 +229,9 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 11, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "data": { | ||||
|       "text/plain": [ | ||||
|        "Madrid        True\n", | ||||
|        "Barcelona     True\n", | ||||
|        "Valencia      True\n", | ||||
|        "Sevilla      False\n", | ||||
|        "Zaragoza     False\n", | ||||
|        "Malaga       False\n", | ||||
|        "dtype: bool" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 11, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# Check cities with a population greater than 700.000\n", | ||||
|     "s > 700000" | ||||
| @@ -413,25 +239,9 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 12, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "data": { | ||||
|       "text/plain": [ | ||||
|        "Madrid       3141991\n", | ||||
|        "Barcelona    1604555\n", | ||||
|        "Valencia      786189\n", | ||||
|        "dtype: int64" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 12, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# List cities with a population greater than 700.000\n", | ||||
|     "s[s > 700000]" | ||||
| @@ -439,28 +249,9 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 13, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "data": { | ||||
|       "text/plain": [ | ||||
|        "Madrid        True\n", | ||||
|        "Barcelona     True\n", | ||||
|        "Valencia      True\n", | ||||
|        "Sevilla      False\n", | ||||
|        "Zaragoza     False\n", | ||||
|        "Malaga       False\n", | ||||
|        "dtype: bool" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 13, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "#Another way to write the same boolean indexing selection\n", | ||||
|     "bigger_than_700000 = s > 700000\n", | ||||
| @@ -469,25 +260,9 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 14, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "data": { | ||||
|       "text/plain": [ | ||||
|        "Madrid       3141991\n", | ||||
|        "Barcelona    1604555\n", | ||||
|        "Valencia      786189\n", | ||||
|        "dtype: int64" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 14, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "#Cities with population > 700000\n", | ||||
|     "s[bigger_than_700000]" | ||||
| @@ -509,28 +284,9 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 15, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "data": { | ||||
|       "text/plain": [ | ||||
|        "Madrid       1570995.5\n", | ||||
|        "Barcelona     802277.5\n", | ||||
|        "Valencia      393094.5\n", | ||||
|        "Sevilla       346939.0\n", | ||||
|        "Zaragoza      332476.5\n", | ||||
|        "Malaga        284565.0\n", | ||||
|        "dtype: float64" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 15, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# Divide population by 2\n", | ||||
|     "s / 2" | ||||
| @@ -538,22 +294,9 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 16, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "data": { | ||||
|       "text/plain": [ | ||||
|        "1243449.3333333333" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 16, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# Get the average population\n", | ||||
|     "s.mean()" | ||||
| @@ -561,22 +304,9 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 17, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "data": { | ||||
|       "text/plain": [ | ||||
|        "3141991" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 17, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# Get the highest population\n", | ||||
|     "s.max()" | ||||
| @@ -598,28 +328,9 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 18, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "data": { | ||||
|       "text/plain": [ | ||||
|        "Madrid       3320000\n", | ||||
|        "Barcelona    1604555\n", | ||||
|        "Valencia      786189\n", | ||||
|        "Sevilla       693878\n", | ||||
|        "Zaragoza      664953\n", | ||||
|        "Malaga        569130\n", | ||||
|        "dtype: int64" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 18, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# Change population of one city\n", | ||||
|     "s['Madrid'] = 3320000\n", | ||||
| @@ -628,28 +339,9 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 19, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "data": { | ||||
|       "text/plain": [ | ||||
|        "Madrid       3652000.0\n", | ||||
|        "Barcelona    1765010.5\n", | ||||
|        "Valencia      864807.9\n", | ||||
|        "Sevilla       693878.0\n", | ||||
|        "Zaragoza      664953.0\n", | ||||
|        "Malaga        569130.0\n", | ||||
|        "dtype: float64" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 19, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# Increase by 10% cities with population greater than 700000\n", | ||||
|     "s[s > 700000] = 1.1 * s[s > 700000]\n", | ||||
| @@ -672,61 +364,9 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 20, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "data": { | ||||
|       "text/html": [ | ||||
|        "<div>\n", | ||||
|        "<table border=\"1\" class=\"dataframe\">\n", | ||||
|        "  <thead>\n", | ||||
|        "    <tr style=\"text-align: right;\">\n", | ||||
|        "      <th></th>\n", | ||||
|        "      <th>one</th>\n", | ||||
|        "      <th>two</th>\n", | ||||
|        "    </tr>\n", | ||||
|        "  </thead>\n", | ||||
|        "  <tbody>\n", | ||||
|        "    <tr>\n", | ||||
|        "      <th>a</th>\n", | ||||
|        "      <td>1.0</td>\n", | ||||
|        "      <td>1.0</td>\n", | ||||
|        "    </tr>\n", | ||||
|        "    <tr>\n", | ||||
|        "      <th>b</th>\n", | ||||
|        "      <td>2.0</td>\n", | ||||
|        "      <td>2.0</td>\n", | ||||
|        "    </tr>\n", | ||||
|        "    <tr>\n", | ||||
|        "      <th>c</th>\n", | ||||
|        "      <td>3.0</td>\n", | ||||
|        "      <td>3.0</td>\n", | ||||
|        "    </tr>\n", | ||||
|        "    <tr>\n", | ||||
|        "      <th>d</th>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>4.0</td>\n", | ||||
|        "    </tr>\n", | ||||
|        "  </tbody>\n", | ||||
|        "</table>\n", | ||||
|        "</div>" | ||||
|       ], | ||||
|       "text/plain": [ | ||||
|        "   one  two\n", | ||||
|        "a  1.0  1.0\n", | ||||
|        "b  2.0  2.0\n", | ||||
|        "c  3.0  3.0\n", | ||||
|        "d  NaN  4.0" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 20, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# We are going to create a DataFrame from a dict of Series\n", | ||||
|     "d = {'one' : pd.Series([1., 2., 3.], index=['a', 'b', 'c']),\n", | ||||
| @@ -748,55 +388,9 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 21, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "data": { | ||||
|       "text/html": [ | ||||
|        "<div>\n", | ||||
|        "<table border=\"1\" class=\"dataframe\">\n", | ||||
|        "  <thead>\n", | ||||
|        "    <tr style=\"text-align: right;\">\n", | ||||
|        "      <th></th>\n", | ||||
|        "      <th>one</th>\n", | ||||
|        "      <th>two</th>\n", | ||||
|        "    </tr>\n", | ||||
|        "  </thead>\n", | ||||
|        "  <tbody>\n", | ||||
|        "    <tr>\n", | ||||
|        "      <th>d</th>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>4.0</td>\n", | ||||
|        "    </tr>\n", | ||||
|        "    <tr>\n", | ||||
|        "      <th>b</th>\n", | ||||
|        "      <td>2.0</td>\n", | ||||
|        "      <td>2.0</td>\n", | ||||
|        "    </tr>\n", | ||||
|        "    <tr>\n", | ||||
|        "      <th>a</th>\n", | ||||
|        "      <td>1.0</td>\n", | ||||
|        "      <td>1.0</td>\n", | ||||
|        "    </tr>\n", | ||||
|        "  </tbody>\n", | ||||
|        "</table>\n", | ||||
|        "</div>" | ||||
|       ], | ||||
|       "text/plain": [ | ||||
|        "   one  two\n", | ||||
|        "d  NaN  4.0\n", | ||||
|        "b  2.0  2.0\n", | ||||
|        "a  1.0  1.0" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 21, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# We can filter\n", | ||||
|     "df = DataFrame(d, index=['d', 'b', 'a'])\n", | ||||
| @@ -812,55 +406,9 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 22, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "data": { | ||||
|       "text/html": [ | ||||
|        "<div>\n", | ||||
|        "<table border=\"1\" class=\"dataframe\">\n", | ||||
|        "  <thead>\n", | ||||
|        "    <tr style=\"text-align: right;\">\n", | ||||
|        "      <th></th>\n", | ||||
|        "      <th>two</th>\n", | ||||
|        "      <th>three</th>\n", | ||||
|        "    </tr>\n", | ||||
|        "  </thead>\n", | ||||
|        "  <tbody>\n", | ||||
|        "    <tr>\n", | ||||
|        "      <th>d</th>\n", | ||||
|        "      <td>4.0</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "    </tr>\n", | ||||
|        "    <tr>\n", | ||||
|        "      <th>b</th>\n", | ||||
|        "      <td>2.0</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "    </tr>\n", | ||||
|        "    <tr>\n", | ||||
|        "      <th>a</th>\n", | ||||
|        "      <td>1.0</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "    </tr>\n", | ||||
|        "  </tbody>\n", | ||||
|        "</table>\n", | ||||
|        "</div>" | ||||
|       ], | ||||
|       "text/plain": [ | ||||
|        "   two three\n", | ||||
|        "d  4.0   NaN\n", | ||||
|        "b  2.0   NaN\n", | ||||
|        "a  1.0   NaN" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 22, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "df = DataFrame(d, index=['d', 'b', 'a'], columns=['two', 'three'])\n", | ||||
|     "df" | ||||
| @@ -904,7 +452,7 @@ | ||||
|    "source": [ | ||||
|     "The notebook is freely licensed under under the [Creative Commons Attribution Share-Alike license](https://creativecommons.org/licenses/by/2.0/).  \n", | ||||
|     "\n", | ||||
|     "© 2016 Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|     "©  Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|    ] | ||||
|   } | ||||
|  ], | ||||
| @@ -924,9 +472,26 @@ | ||||
|    "name": "python", | ||||
|    "nbconvert_exporter": "python", | ||||
|    "pygments_lexer": "ipython3", | ||||
|    "version": "3.5.2" | ||||
|    "version": "3.7.1" | ||||
|   }, | ||||
|   "latex_envs": { | ||||
|    "LaTeX_envs_menu_present": true, | ||||
|    "autocomplete": true, | ||||
|    "bibliofile": "biblio.bib", | ||||
|    "cite_by": "apalike", | ||||
|    "current_citInitial": 1, | ||||
|    "eqLabelWithNumbers": true, | ||||
|    "eqNumInitial": 1, | ||||
|    "hotkeys": { | ||||
|     "equation": "Ctrl-E", | ||||
|     "itemize": "Ctrl-I" | ||||
|    }, | ||||
|    "labels_anchors": false, | ||||
|    "latex_user_defs": false, | ||||
|    "report_style_numbering": false, | ||||
|    "user_envs_cfg": false | ||||
|   } | ||||
|  }, | ||||
|  "nbformat": 4, | ||||
|  "nbformat_minor": 0 | ||||
|  "nbformat_minor": 1 | ||||
| } | ||||
|   | ||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							| @@ -18,7 +18,7 @@ | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, © 2016 Carlos A. Iglesias" | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, ©  Carlos A. Iglesias" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
| @@ -46,10 +46,8 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 2, | ||||
|    "metadata": { | ||||
|     "collapsed": true | ||||
|    }, | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "import pandas as pd\n", | ||||
| @@ -82,9 +80,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [] | ||||
|   }, | ||||
| @@ -105,9 +101,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": true | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [] | ||||
|   }, | ||||
| @@ -121,9 +115,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": true | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [] | ||||
|   }, | ||||
| @@ -137,9 +129,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": true | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [] | ||||
|   }, | ||||
| @@ -153,17 +143,13 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": true | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": { | ||||
|     "collapsed": true | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "How many passsengers have survived? List them grouped by Sex and Pclass.\n", | ||||
|     "\n", | ||||
| @@ -173,17 +159,13 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": true | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "Visualise df_1 as an histogram." | ||||
|    ] | ||||
| @@ -191,17 +173,13 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": true | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": { | ||||
|     "collapsed": true | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "# Feature Engineering" | ||||
|    ] | ||||
| @@ -232,9 +210,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "df['FamilySize'] = df['SibSp'] + df['Parch']\n", | ||||
| @@ -258,9 +234,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "df['Alone'] = (df.FamilySize == 0)\n", | ||||
| @@ -284,9 +258,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "#Taken from http://www.analyticsvidhya.com/blog/2014/09/data-munging-python-using-pandas-baby-steps-python/\n", | ||||
| @@ -307,9 +279,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "df['Salutation'].unique()" | ||||
| @@ -318,9 +288,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "df.groupby(['Salutation']).size()" | ||||
| @@ -336,9 +304,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "def group_salutation(old_salutation):\n", | ||||
| @@ -362,9 +328,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# Distribution\n", | ||||
| @@ -375,9 +339,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "df.boxplot(column='Age', by = 'Salutation', sym='k.')" | ||||
| @@ -393,9 +355,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# Specific features for Children and Female since there are more survivors\n", | ||||
| @@ -413,9 +373,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": true | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# Group ages to simplify machine learning algorithms.  0: 0-5, 1: 6-10, 2: 11-15, 3: 16-59 and 4: 60-80\n", | ||||
| @@ -437,10 +395,8 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 8, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "def substrings_in_string(big_string, substrings):\n", | ||||
| @@ -475,9 +431,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "df['FarePerPerson']= df['Fare'] / (df['FamilySize'] + 1)" | ||||
| @@ -500,9 +454,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": true | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "df['AgeClass']=df['Age']*df['Pclass']" | ||||
| @@ -521,7 +473,7 @@ | ||||
|    "source": [ | ||||
|     "The notebook is freely licensed under under the [Creative Commons Attribution Share-Alike license](https://creativecommons.org/licenses/by/2.0/).  \n", | ||||
|     "\n", | ||||
|     "© 2016 Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|     "©  Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|    ] | ||||
|   } | ||||
|  ], | ||||
| @@ -541,9 +493,26 @@ | ||||
|    "name": "python", | ||||
|    "nbconvert_exporter": "python", | ||||
|    "pygments_lexer": "ipython3", | ||||
|    "version": "3.5.2" | ||||
|    "version": "3.7.1" | ||||
|   }, | ||||
|   "latex_envs": { | ||||
|    "LaTeX_envs_menu_present": true, | ||||
|    "autocomplete": true, | ||||
|    "bibliofile": "biblio.bib", | ||||
|    "cite_by": "apalike", | ||||
|    "current_citInitial": 1, | ||||
|    "eqLabelWithNumbers": true, | ||||
|    "eqNumInitial": 1, | ||||
|    "hotkeys": { | ||||
|     "equation": "Ctrl-E", | ||||
|     "itemize": "Ctrl-I" | ||||
|    }, | ||||
|    "labels_anchors": false, | ||||
|    "latex_user_defs": false, | ||||
|    "report_style_numbering": false, | ||||
|    "user_envs_cfg": false | ||||
|   } | ||||
|  }, | ||||
|  "nbformat": 4, | ||||
|  "nbformat_minor": 0 | ||||
|  "nbformat_minor": 1 | ||||
| } | ||||
|   | ||||
| @@ -18,7 +18,7 @@ | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, © 2016 Carlos A. Iglesias" | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, ©  Carlos A. Iglesias" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
| @@ -94,7 +94,7 @@ | ||||
|    "source": [ | ||||
|     "The notebook is freely licensed under under the [Creative Commons Attribution Share-Alike license](https://creativecommons.org/licenses/by/2.0/).  \n", | ||||
|     "\n", | ||||
|     "© 2016 Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|     "© Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|    ] | ||||
|   } | ||||
|  ], | ||||
| @@ -114,9 +114,26 @@ | ||||
|    "name": "python", | ||||
|    "nbconvert_exporter": "python", | ||||
|    "pygments_lexer": "ipython3", | ||||
|    "version": "3.5.2" | ||||
|    "version": "3.7.1" | ||||
|   }, | ||||
|   "latex_envs": { | ||||
|    "LaTeX_envs_menu_present": true, | ||||
|    "autocomplete": true, | ||||
|    "bibliofile": "biblio.bib", | ||||
|    "cite_by": "apalike", | ||||
|    "current_citInitial": 1, | ||||
|    "eqLabelWithNumbers": true, | ||||
|    "eqNumInitial": 1, | ||||
|    "hotkeys": { | ||||
|     "equation": "Ctrl-E", | ||||
|     "itemize": "Ctrl-I" | ||||
|    }, | ||||
|    "labels_anchors": false, | ||||
|    "latex_user_defs": false, | ||||
|    "report_style_numbering": false, | ||||
|    "user_envs_cfg": false | ||||
|   } | ||||
|  }, | ||||
|  "nbformat": 4, | ||||
|  "nbformat_minor": 0 | ||||
|  "nbformat_minor": 1 | ||||
| } | ||||
|   | ||||
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							| @@ -18,7 +18,7 @@ | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, © 2016 Carlos A. Iglesias" | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, © Carlos A. Iglesias" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
| @@ -61,7 +61,7 @@ | ||||
|    "source": [ | ||||
|     "The notebook is freely licensed under under the [Creative Commons Attribution Share-Alike license](https://creativecommons.org/licenses/by/2.0/).  \n", | ||||
|     "\n", | ||||
|     "© 2016 Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|     "© Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|    ] | ||||
|   } | ||||
|  ], | ||||
| @@ -81,9 +81,26 @@ | ||||
|    "name": "python", | ||||
|    "nbconvert_exporter": "python", | ||||
|    "pygments_lexer": "ipython3", | ||||
|    "version": "3.5.1+" | ||||
|    "version": "3.7.1" | ||||
|   }, | ||||
|   "latex_envs": { | ||||
|    "LaTeX_envs_menu_present": true, | ||||
|    "autocomplete": true, | ||||
|    "bibliofile": "biblio.bib", | ||||
|    "cite_by": "apalike", | ||||
|    "current_citInitial": 1, | ||||
|    "eqLabelWithNumbers": true, | ||||
|    "eqNumInitial": 1, | ||||
|    "hotkeys": { | ||||
|     "equation": "Ctrl-E", | ||||
|     "itemize": "Ctrl-I" | ||||
|    }, | ||||
|    "labels_anchors": false, | ||||
|    "latex_user_defs": false, | ||||
|    "report_style_numbering": false, | ||||
|    "user_envs_cfg": false | ||||
|   } | ||||
|  }, | ||||
|  "nbformat": 4, | ||||
|  "nbformat_minor": 0 | ||||
|  "nbformat_minor": 1 | ||||
| } | ||||
|   | ||||
| @@ -19,11 +19,10 @@ samples. | ||||
|  | ||||
| import numpy as np | ||||
| import matplotlib.pyplot as plt | ||||
| from sklearn import cross_validation | ||||
| from sklearn.naive_bayes import GaussianNB | ||||
| from sklearn.svm import SVC | ||||
| from sklearn.datasets import load_digits | ||||
| from sklearn.learning_curve import learning_curve | ||||
| from sklearn.model_selection import learning_curve | ||||
|  | ||||
|  | ||||
| def plot_learning_curve(estimator, title, X, y, ylim=None, cv=None, | ||||
| @@ -53,7 +52,7 @@ def plot_learning_curve(estimator, title, X, y, ylim=None, cv=None, | ||||
|     cv : integer, cross-validation generator, optional | ||||
|         If an integer is passed, it is the number of folds (defaults to 3). | ||||
|         Specific cross-validation objects can be passed, see | ||||
|         sklearn.cross_validation module for the list of possible objects | ||||
|         sklearn.model_selection module for the list of possible objects | ||||
|  | ||||
|     n_jobs : integer, optional | ||||
|         Number of jobs to run in parallel (default 1). | ||||
|   | ||||
| @@ -18,7 +18,7 @@ | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, © 2018 Carlos A. Iglesias" | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, ©  Carlos A. Iglesias" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
| @@ -60,7 +60,7 @@ | ||||
|     "## Licence\n", | ||||
|     "The notebook is freely licensed under under the [Creative Commons Attribution Share-Alike license](https://creativecommons.org/licenses/by/2.0/).  \n", | ||||
|     "\n", | ||||
|     "© 2018 Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|     "©  Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|    ] | ||||
|   } | ||||
|  ], | ||||
| @@ -80,7 +80,7 @@ | ||||
|    "name": "python", | ||||
|    "nbconvert_exporter": "python", | ||||
|    "pygments_lexer": "ipython3", | ||||
|    "version": "3.5.5" | ||||
|    "version": "3.7.1" | ||||
|   }, | ||||
|   "latex_envs": { | ||||
|    "LaTeX_envs_menu_present": true, | ||||
|   | ||||
| @@ -18,7 +18,7 @@ | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, © 2018 Óscar Araque" | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, ©  Óscar Araque" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
| @@ -275,7 +275,10 @@ | ||||
|     "print(classification_report(y_test, lr_preds))\n", | ||||
|     "\n", | ||||
|     "plt.figure(figsize=(10,7))\n", | ||||
|     "plot_decision_surface(X, y, lr)" | ||||
|     "# This methods outputs a visualization\n", | ||||
|     "# the h parameter adjusts the precision of the visualization\n", | ||||
|     "# if you find memory errors, set h to a higher value (e.g., h=0.1)\n", | ||||
|     "plot_decision_surface(X, y, lr, h=0.02) " | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
| @@ -620,7 +623,7 @@ | ||||
|    "source": [ | ||||
|     "The notebook is freely licensed under under the [Creative Commons Attribution Share-Alike license](https://creativecommons.org/licenses/by/2.0/).  \n", | ||||
|     "\n", | ||||
|     "© 2018 Óscar Araque, Universidad Politécnica de Madrid." | ||||
|     "© Óscar Araque, Universidad Politécnica de Madrid." | ||||
|    ] | ||||
|   } | ||||
|  ], | ||||
| @@ -640,7 +643,7 @@ | ||||
|    "name": "python", | ||||
|    "nbconvert_exporter": "python", | ||||
|    "pygments_lexer": "ipython3", | ||||
|    "version": "3.5.5" | ||||
|    "version": "3.7.1" | ||||
|   }, | ||||
|   "latex_envs": { | ||||
|    "LaTeX_envs_menu_present": true, | ||||
|   | ||||
| @@ -18,7 +18,7 @@ | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, © 2018 Óscar Araque" | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, © Óscar Araque" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
| @@ -298,7 +298,7 @@ | ||||
|    "source": [ | ||||
|     "The notebook is freely licensed under under the [Creative Commons Attribution Share-Alike license](https://creativecommons.org/licenses/by/2.0/).  \n", | ||||
|     "\n", | ||||
|     "© 2018 Óscar Araque, Universidad Politécnica de Madrid." | ||||
|     "© Óscar Araque, Universidad Politécnica de Madrid." | ||||
|    ] | ||||
|   } | ||||
|  ], | ||||
| @@ -318,7 +318,7 @@ | ||||
|    "name": "python", | ||||
|    "nbconvert_exporter": "python", | ||||
|    "pygments_lexer": "ipython3", | ||||
|    "version": "3.5.5" | ||||
|    "version": "3.7.1" | ||||
|   }, | ||||
|   "latex_envs": { | ||||
|    "LaTeX_envs_menu_present": true, | ||||
|   | ||||
| @@ -18,7 +18,7 @@ | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, © 2018 Carlos A. Iglesias" | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, © Carlos A. Iglesias" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
| @@ -58,7 +58,7 @@ | ||||
|     "## Licence\n", | ||||
|     "The notebook is freely licensed under under the [Creative Commons Attribution Share-Alike license](https://creativecommons.org/licenses/by/2.0/).  \n", | ||||
|     "\n", | ||||
|     "© 2018 Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|     "©  Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|    ] | ||||
|   } | ||||
|  ], | ||||
| @@ -78,7 +78,7 @@ | ||||
|    "name": "python", | ||||
|    "nbconvert_exporter": "python", | ||||
|    "pygments_lexer": "ipython3", | ||||
|    "version": "3.5.5" | ||||
|    "version": "3.7.1" | ||||
|   }, | ||||
|   "latex_envs": { | ||||
|    "LaTeX_envs_menu_present": true, | ||||
|   | ||||
| @@ -18,7 +18,7 @@ | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, © 2018 Carlos A. Iglesias" | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, © Carlos A. Iglesias" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
| @@ -72,9 +72,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "scrolled": true | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "import random\n", | ||||
| @@ -258,7 +256,7 @@ | ||||
|    "source": [ | ||||
|     "The notebook is freely licensed under under the [Creative Commons Attribution Share-Alike license](https://creativecommons.org/licenses/by/2.0/).  \n", | ||||
|     "\n", | ||||
|     "© 2018 Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|     "© Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|    ] | ||||
|   } | ||||
|  ], | ||||
| @@ -278,7 +276,7 @@ | ||||
|    "name": "python", | ||||
|    "nbconvert_exporter": "python", | ||||
|    "pygments_lexer": "ipython3", | ||||
|    "version": "3.5.5" | ||||
|    "version": "3.7.1" | ||||
|   }, | ||||
|   "latex_envs": { | ||||
|    "LaTeX_envs_menu_present": true, | ||||
|   | ||||
| @@ -18,7 +18,7 @@ | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, © 2018 Carlos A. Iglesias" | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, © Carlos A. Iglesias" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
| @@ -58,7 +58,7 @@ | ||||
|     "## Licence\n", | ||||
|     "The notebook is freely licensed under under the [Creative Commons Attribution Share-Alike license](https://creativecommons.org/licenses/by/2.0/).  \n", | ||||
|     "\n", | ||||
|     "© 2018 Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|     "© Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|    ] | ||||
|   } | ||||
|  ], | ||||
| @@ -78,7 +78,7 @@ | ||||
|    "name": "python", | ||||
|    "nbconvert_exporter": "python", | ||||
|    "pygments_lexer": "ipython3", | ||||
|    "version": "3.5.5" | ||||
|    "version": "3.7.1" | ||||
|   }, | ||||
|   "latex_envs": { | ||||
|    "LaTeX_envs_menu_present": true, | ||||
|   | ||||
| @@ -18,7 +18,7 @@ | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, © 2016 Carlos A. Iglesias" | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, © Carlos A. Iglesias" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
| @@ -64,7 +64,7 @@ | ||||
|     "## Licence\n", | ||||
|     "The notebook is freely licensed under under the [Creative Commons Attribution Share-Alike license](https://creativecommons.org/licenses/by/2.0/).  \n", | ||||
|     "\n", | ||||
|     "© 2016 Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|     "© Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|    ] | ||||
|   } | ||||
|  ], | ||||
| @@ -84,9 +84,26 @@ | ||||
|    "name": "python", | ||||
|    "nbconvert_exporter": "python", | ||||
|    "pygments_lexer": "ipython3", | ||||
|    "version": "3.5.1" | ||||
|    "version": "3.7.1" | ||||
|   }, | ||||
|   "latex_envs": { | ||||
|    "LaTeX_envs_menu_present": true, | ||||
|    "autocomplete": true, | ||||
|    "bibliofile": "biblio.bib", | ||||
|    "cite_by": "apalike", | ||||
|    "current_citInitial": 1, | ||||
|    "eqLabelWithNumbers": true, | ||||
|    "eqNumInitial": 1, | ||||
|    "hotkeys": { | ||||
|     "equation": "Ctrl-E", | ||||
|     "itemize": "Ctrl-I" | ||||
|    }, | ||||
|    "labels_anchors": false, | ||||
|    "latex_user_defs": false, | ||||
|    "report_style_numbering": false, | ||||
|    "user_envs_cfg": false | ||||
|   } | ||||
|  }, | ||||
|  "nbformat": 4, | ||||
|  "nbformat_minor": 0 | ||||
|  "nbformat_minor": 1 | ||||
| } | ||||
|   | ||||
| @@ -18,7 +18,7 @@ | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, © 2016 Carlos A. Iglesias" | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, © Carlos A. Iglesias" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
| @@ -68,9 +68,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "review = \"\"\"I purchased this monitor because of budgetary concerns. This item was the most inexpensive 17 inch monitor \n", | ||||
| @@ -111,9 +109,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "import nltk\n", | ||||
| @@ -171,9 +167,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "from nltk.tokenize import sent_tokenize, word_tokenize\n", | ||||
| @@ -199,10 +193,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false, | ||||
|     "scrolled": true | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "words = [word_tokenize(t) for t in sent_tokenize(review)]\n", | ||||
| @@ -219,9 +210,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "words = word_tokenize(review)\n", | ||||
| @@ -239,9 +228,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "from nltk.tokenize import TweetTokenizer\n", | ||||
| @@ -268,9 +255,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "from nltk.stem import PorterStemmer, LancasterStemmer, WordNetLemmatizer\n", | ||||
| @@ -304,9 +289,7 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": { | ||||
|     "collapsed": true | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "As we can see, we get the forms *are* and *is* instead of *be*. This is because we have not introduce the Part-Of-Speech (POS), and the default POS is 'n' (name).\n", | ||||
|     "\n", | ||||
| @@ -316,9 +299,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "verbs = \"are crying is have has\"\n", | ||||
| @@ -327,9 +308,7 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": { | ||||
|     "collapsed": true | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "Depending of the application, we can select stemmers or lemmatizers. \n", | ||||
|     "\n", | ||||
| @@ -341,9 +320,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "def preprocess(words, type='doc'):\n", | ||||
| @@ -376,9 +353,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "from nltk.corpus import stopwords\n", | ||||
| @@ -390,9 +365,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "def preprocess(words, type='doc'):\n", | ||||
| @@ -428,9 +401,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "import string\n", | ||||
| @@ -474,9 +445,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "frec = nltk.FreqDist(nltk.word_tokenize(review))\n", | ||||
| @@ -515,7 +484,7 @@ | ||||
|    "source": [ | ||||
|     "The notebook is freely licensed under under the [Creative Commons Attribution Share-Alike license](https://creativecommons.org/licenses/by/2.0/).  \n", | ||||
|     "\n", | ||||
|     "© 2016 Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|     "© Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|    ] | ||||
|   } | ||||
|  ], | ||||
| @@ -535,9 +504,26 @@ | ||||
|    "name": "python", | ||||
|    "nbconvert_exporter": "python", | ||||
|    "pygments_lexer": "ipython3", | ||||
|    "version": "3.5.2" | ||||
|    "version": "3.7.1" | ||||
|   }, | ||||
|   "latex_envs": { | ||||
|    "LaTeX_envs_menu_present": true, | ||||
|    "autocomplete": true, | ||||
|    "bibliofile": "biblio.bib", | ||||
|    "cite_by": "apalike", | ||||
|    "current_citInitial": 1, | ||||
|    "eqLabelWithNumbers": true, | ||||
|    "eqNumInitial": 1, | ||||
|    "hotkeys": { | ||||
|     "equation": "Ctrl-E", | ||||
|     "itemize": "Ctrl-I" | ||||
|    }, | ||||
|    "labels_anchors": false, | ||||
|    "latex_user_defs": false, | ||||
|    "report_style_numbering": false, | ||||
|    "user_envs_cfg": false | ||||
|   } | ||||
|  }, | ||||
|  "nbformat": 4, | ||||
|  "nbformat_minor": 0 | ||||
|  "nbformat_minor": 1 | ||||
| } | ||||
|   | ||||
| @@ -18,7 +18,7 @@ | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, © 2016 Carlos A. Iglesias" | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, © Carlos A. Iglesias" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
| @@ -62,9 +62,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "review = \"\"\"I purchased this Dell monitor because of budgetary concerns. This item was the most inexpensive 17 inch Apple monitor \n", | ||||
| @@ -110,9 +108,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "from nltk import pos_tag, word_tokenize\n", | ||||
| @@ -129,9 +125,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "print (pos_tag(word_tokenize(review)))" | ||||
| @@ -147,9 +141,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "import nltk\n", | ||||
| @@ -166,9 +158,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "from nltk.stem import WordNetLemmatizer\n", | ||||
| @@ -199,9 +189,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "from nltk import ne_chunk, pos_tag, word_tokenize\n", | ||||
| @@ -246,9 +234,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "from nltk.app import srparser_app\n", | ||||
| @@ -265,9 +251,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": true | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "from nltk.app import rdparser_app\n", | ||||
| @@ -288,9 +272,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "from nltk.chunk.regexp import *\n", | ||||
| @@ -316,9 +298,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "def extractTrees(parsed_tree, category='NP'):\n", | ||||
| @@ -330,9 +310,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "def extractStrings(parsed_tree, category='NP'):\n", | ||||
| @@ -370,7 +348,7 @@ | ||||
|    "source": [ | ||||
|     "The notebook is freely licensed under under the [Creative Commons Attribution Share-Alike license](https://creativecommons.org/licenses/by/2.0/).  \n", | ||||
|     "\n", | ||||
|     "© 2016 Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|     "© Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|    ] | ||||
|   } | ||||
|  ], | ||||
| @@ -390,9 +368,26 @@ | ||||
|    "name": "python", | ||||
|    "nbconvert_exporter": "python", | ||||
|    "pygments_lexer": "ipython3", | ||||
|    "version": "3.5.2" | ||||
|    "version": "3.7.1" | ||||
|   }, | ||||
|   "latex_envs": { | ||||
|    "LaTeX_envs_menu_present": true, | ||||
|    "autocomplete": true, | ||||
|    "bibliofile": "biblio.bib", | ||||
|    "cite_by": "apalike", | ||||
|    "current_citInitial": 1, | ||||
|    "eqLabelWithNumbers": true, | ||||
|    "eqNumInitial": 1, | ||||
|    "hotkeys": { | ||||
|     "equation": "Ctrl-E", | ||||
|     "itemize": "Ctrl-I" | ||||
|    }, | ||||
|    "labels_anchors": false, | ||||
|    "latex_user_defs": false, | ||||
|    "report_style_numbering": false, | ||||
|    "user_envs_cfg": false | ||||
|   } | ||||
|  }, | ||||
|  "nbformat": 4, | ||||
|  "nbformat_minor": 0 | ||||
|  "nbformat_minor": 1 | ||||
| } | ||||
|   | ||||
| @@ -18,7 +18,7 @@ | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, © 2016 Carlos A. Iglesias" | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, © Carlos A. Iglesias" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
| @@ -60,9 +60,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "doc1 = 'Summer is coming but Summer is short'\n", | ||||
| @@ -73,9 +71,7 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": { | ||||
|     "collapsed": true | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "# Tools" | ||||
|    ] | ||||
| @@ -110,9 +106,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "from sklearn.feature_extraction.text import CountVectorizer\n", | ||||
| @@ -123,9 +117,7 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": { | ||||
|     "collapsed": true | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "As we can see, [CountVectorizer](http://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.CountVectorizer.html#sklearn.feature_extraction.text.CountVectorizer) comes with many options. We can define many configuration options, such as the maximum or minimum frequency of a term (*min_fd*, *max_df*), maximum number of features (*max_features*), if we analyze words or characters (*analyzer*), or if the output is binary or not (*binary*). *CountVectorizer* also allows us to include if we want to preprocess the input (*preprocessor*) before tokenizing it (*tokenizer*) and exclude stop words (*stop_words*).\n", | ||||
|     "\n", | ||||
| @@ -137,9 +129,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "vectors = vectorizer.fit_transform(documents)\n", | ||||
| @@ -148,9 +138,7 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": { | ||||
|     "collapsed": true | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "We see the vectors are stored as a sparse matrix of 3x6 dimensions.\n", | ||||
|     "We can print the matrix as well as the feature names." | ||||
| @@ -159,9 +147,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "print(vectors.toarray())\n", | ||||
| @@ -170,9 +156,7 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": { | ||||
|     "collapsed": true | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "As you can see, the pronoun 'I' has been removed because of the default token_pattern. \n", | ||||
|     "We can change this as follows." | ||||
| @@ -181,9 +165,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "vectorizer = CountVectorizer(analyzer=\"word\", stop_words=None, token_pattern='(?u)\\\\b\\\\w+\\\\b') \n", | ||||
| @@ -201,9 +183,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "vectorizer = CountVectorizer(analyzer=\"word\", stop_words='english', token_pattern='(?u)\\\\b\\\\w+\\\\b') \n", | ||||
| @@ -214,9 +194,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "#stop words in scikit-learn for English\n", | ||||
| @@ -226,9 +204,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# Vectors\n", | ||||
| @@ -246,9 +222,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "from scipy.spatial.distance import cosine\n", | ||||
| @@ -275,9 +249,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "vectorizer = CountVectorizer(analyzer=\"word\", stop_words='english', binary=True) \n", | ||||
| @@ -288,9 +260,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "vectors.toarray()" | ||||
| @@ -313,9 +283,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "vectorizer = CountVectorizer(analyzer=\"word\", stop_words='english', ngram_range=[2,2]) \n", | ||||
| @@ -326,9 +294,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "vectors.toarray()" | ||||
| @@ -351,9 +317,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "from sklearn.feature_extraction.text import TfidfVectorizer\n", | ||||
| @@ -366,9 +330,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "vectors.toarray()" | ||||
| @@ -384,9 +346,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "train = [doc1, doc2, doc3]\n", | ||||
| @@ -400,10 +360,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false, | ||||
|     "scrolled": true | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "vectors.toarray()" | ||||
| @@ -419,9 +376,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "from sklearn.metrics.pairwise import cosine_similarity\n", | ||||
| @@ -445,9 +400,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "from sklearn.metrics.pairwise import linear_kernel\n", | ||||
| @@ -483,7 +436,7 @@ | ||||
|    "source": [ | ||||
|     "The notebook is freely licensed under under the [Creative Commons Attribution Share-Alike license](https://creativecommons.org/licenses/by/2.0/).  \n", | ||||
|     "\n", | ||||
|     "© 2016 Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|     "© Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|    ] | ||||
|   } | ||||
|  ], | ||||
| @@ -503,9 +456,26 @@ | ||||
|    "name": "python", | ||||
|    "nbconvert_exporter": "python", | ||||
|    "pygments_lexer": "ipython3", | ||||
|    "version": "3.5.2" | ||||
|    "version": "3.7.1" | ||||
|   }, | ||||
|   "latex_envs": { | ||||
|    "LaTeX_envs_menu_present": true, | ||||
|    "autocomplete": true, | ||||
|    "bibliofile": "biblio.bib", | ||||
|    "cite_by": "apalike", | ||||
|    "current_citInitial": 1, | ||||
|    "eqLabelWithNumbers": true, | ||||
|    "eqNumInitial": 1, | ||||
|    "hotkeys": { | ||||
|     "equation": "Ctrl-E", | ||||
|     "itemize": "Ctrl-I" | ||||
|    }, | ||||
|    "labels_anchors": false, | ||||
|    "latex_user_defs": false, | ||||
|    "report_style_numbering": false, | ||||
|    "user_envs_cfg": false | ||||
|   } | ||||
|  }, | ||||
|  "nbformat": 4, | ||||
|  "nbformat_minor": 0 | ||||
|  "nbformat_minor": 1 | ||||
| } | ||||
|   | ||||
| @@ -18,7 +18,7 @@ | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, © 2016 Carlos A. Iglesias" | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, © Carlos A. Iglesias" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
| @@ -74,19 +74,9 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 1, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "name": "stdout", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "['alt.atheism', 'comp.graphics', 'comp.os.ms-windows.misc', 'comp.sys.ibm.pc.hardware', 'comp.sys.mac.hardware', 'comp.windows.x', 'misc.forsale', 'rec.autos', 'rec.motorcycles', 'rec.sport.baseball', 'rec.sport.hockey', 'sci.crypt', 'sci.electronics', 'sci.med', 'sci.space', 'soc.religion.christian', 'talk.politics.guns', 'talk.politics.mideast', 'talk.politics.misc', 'talk.religion.misc']\n" | ||||
|      ] | ||||
|     } | ||||
|    ], | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "from sklearn.datasets import fetch_20newsgroups\n", | ||||
|     "\n", | ||||
| @@ -100,19 +90,9 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 2, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "name": "stdout", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "20\n" | ||||
|      ] | ||||
|     } | ||||
|    ], | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "#Number of categories\n", | ||||
|     "print(len(newsgroups_train.target_names))" | ||||
| @@ -120,28 +100,9 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 3, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "name": "stdout", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "Category id 4 comp.sys.mac.hardware\n", | ||||
|       "Doc A fair number of brave souls who upgraded their SI clock oscillator have\n", | ||||
|       "shared their experiences for this poll. Please send a brief message detailing\n", | ||||
|       "your experiences with the procedure. Top speed attained, CPU rated speed,\n", | ||||
|       "add on cards and adapters, heat sinks, hour of usage per day, floppy disk\n", | ||||
|       "functionality with 800 and 1.4 m floppies are especially requested.\n", | ||||
|       "\n", | ||||
|       "I will be summarizing in the next two days, so please add to the network\n", | ||||
|       "knowledge base if you have done the clock upgrade and haven't answered this\n", | ||||
|       "poll. Thanks.\n" | ||||
|      ] | ||||
|     } | ||||
|    ], | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# Show a document\n", | ||||
|     "docid = 1\n", | ||||
| @@ -154,22 +115,9 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 4, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "data": { | ||||
|       "text/plain": [ | ||||
|        "(11314,)" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 4, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "#Number of files\n", | ||||
|     "newsgroups_train.filenames.shape" | ||||
| @@ -177,30 +125,9 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 5, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "name": "stderr", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "/home/cif/anaconda3/lib/python3.5/site-packages/numpy/core/fromnumeric.py:2652: VisibleDeprecationWarning: `rank` is deprecated; use the `ndim` attribute or function instead. To find the rank of a matrix see `numpy.linalg.matrix_rank`.\n", | ||||
|       "  VisibleDeprecationWarning)\n" | ||||
|      ] | ||||
|     }, | ||||
|     { | ||||
|      "data": { | ||||
|       "text/plain": [ | ||||
|        "(11314, 101323)" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 5, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# Obtain a vector\n", | ||||
|     "\n", | ||||
| @@ -214,22 +141,9 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 6, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "data": { | ||||
|       "text/plain": [ | ||||
|        "66.80510871486653" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 6, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# The tf-idf vectors are very sparse with an average of 66 non zero components in 101.323 dimensions (.06%)\n", | ||||
|     "vectors_train.nnz / float(vectors_train.shape[0])" | ||||
| @@ -251,30 +165,9 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 7, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "name": "stderr", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "/home/cif/anaconda3/lib/python3.5/site-packages/numpy/core/fromnumeric.py:2652: VisibleDeprecationWarning: `rank` is deprecated; use the `ndim` attribute or function instead. To find the rank of a matrix see `numpy.linalg.matrix_rank`.\n", | ||||
|       "  VisibleDeprecationWarning)\n" | ||||
|      ] | ||||
|     }, | ||||
|     { | ||||
|      "data": { | ||||
|       "text/plain": [ | ||||
|        "0.69545360719001303" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 7, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "from sklearn.naive_bayes import MultinomialNB\n", | ||||
|     "\n", | ||||
| @@ -302,20 +195,9 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 8, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "name": "stdout", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "dimensionality: 101323\n", | ||||
|       "density: 1.000000\n" | ||||
|      ] | ||||
|     } | ||||
|    ], | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "from sklearn.utils.extmath import density\n", | ||||
|     "\n", | ||||
| @@ -325,38 +207,9 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 9, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "name": "stdout", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "alt.atheism: islam atheists say just religion atheism think don people god\n", | ||||
|       "comp.graphics: looking format 3d know program file files thanks image graphics\n", | ||||
|       "comp.os.ms-windows.misc: card problem thanks driver drivers use files dos file windows\n", | ||||
|       "comp.sys.ibm.pc.hardware: monitor disk thanks pc ide controller bus card scsi drive\n", | ||||
|       "comp.sys.mac.hardware: know monitor does quadra simms thanks problem drive apple mac\n", | ||||
|       "comp.windows.x: using windows x11r5 use application thanks widget server motif window\n", | ||||
|       "misc.forsale: asking email sell price condition new shipping offer 00 sale\n", | ||||
|       "rec.autos: don ford new good dealer just engine like cars car\n", | ||||
|       "rec.motorcycles: don just helmet riding like motorcycle ride bikes dod bike\n", | ||||
|       "rec.sport.baseball: braves players pitching hit runs games game baseball team year\n", | ||||
|       "rec.sport.hockey: league year nhl games season players play hockey team game\n", | ||||
|       "sci.crypt: people use escrow nsa keys government chip clipper encryption key\n", | ||||
|       "sci.electronics: don thanks voltage used know does like circuit power use\n", | ||||
|       "sci.med: skepticism cadre dsl banks chastity n3jxp pitt gordon geb msg\n", | ||||
|       "sci.space: just lunar earth shuttle like moon launch orbit nasa space\n", | ||||
|       "soc.religion.christian: believe faith christian christ bible people christians church jesus god\n", | ||||
|       "talk.politics.guns: just law firearms government fbi don weapons people guns gun\n", | ||||
|       "talk.politics.mideast: said arabs arab turkish people armenians armenian jews israeli israel\n", | ||||
|       "talk.politics.misc: know state clinton president just think tax don government people\n", | ||||
|       "talk.religion.misc: think don koresh objective christians bible people christian jesus god\n" | ||||
|      ] | ||||
|     } | ||||
|    ], | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# We can review the top features per topic in Bayes (attribute coef_)\n", | ||||
|     "import numpy as np\n", | ||||
| @@ -373,28 +226,9 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 10, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "name": "stdout", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "[ 2 15]\n", | ||||
|       "['comp.os.ms-windows.misc', 'soc.religion.christian']\n" | ||||
|      ] | ||||
|     }, | ||||
|     { | ||||
|      "name": "stderr", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "/home/cif/anaconda3/lib/python3.5/site-packages/numpy/core/fromnumeric.py:2652: VisibleDeprecationWarning: `rank` is deprecated; use the `ndim` attribute or function instead. To find the rank of a matrix see `numpy.linalg.matrix_rank`.\n", | ||||
|       "  VisibleDeprecationWarning)\n" | ||||
|      ] | ||||
|     } | ||||
|    ], | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# We try the classifier in two new docs\n", | ||||
|     "\n", | ||||
| @@ -435,7 +269,7 @@ | ||||
|    "source": [ | ||||
|     "The notebook is freely licensed under under the [Creative Commons Attribution Share-Alike license](https://creativecommons.org/licenses/by/2.0/).  \n", | ||||
|     "\n", | ||||
|     "© 2016 Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|     "© Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|    ] | ||||
|   } | ||||
|  ], | ||||
| @@ -455,9 +289,26 @@ | ||||
|    "name": "python", | ||||
|    "nbconvert_exporter": "python", | ||||
|    "pygments_lexer": "ipython3", | ||||
|    "version": "3.5.2" | ||||
|    "version": "3.7.1" | ||||
|   }, | ||||
|   "latex_envs": { | ||||
|    "LaTeX_envs_menu_present": true, | ||||
|    "autocomplete": true, | ||||
|    "bibliofile": "biblio.bib", | ||||
|    "cite_by": "apalike", | ||||
|    "current_citInitial": 1, | ||||
|    "eqLabelWithNumbers": true, | ||||
|    "eqNumInitial": 1, | ||||
|    "hotkeys": { | ||||
|     "equation": "Ctrl-E", | ||||
|     "itemize": "Ctrl-I" | ||||
|    }, | ||||
|    "labels_anchors": false, | ||||
|    "latex_user_defs": false, | ||||
|    "report_style_numbering": false, | ||||
|    "user_envs_cfg": false | ||||
|   } | ||||
|  }, | ||||
|  "nbformat": 4, | ||||
|  "nbformat_minor": 0 | ||||
|  "nbformat_minor": 1 | ||||
| } | ||||
|   | ||||
| @@ -18,7 +18,7 @@ | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, © 2016 Carlos A. Iglesias" | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, © Carlos A. Iglesias" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
| @@ -76,11 +76,20 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [], | ||||
|    "execution_count": 1, | ||||
|    "metadata": {}, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "data": { | ||||
|       "text/plain": [ | ||||
|        "(2034, 2807)" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 1, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "source": [ | ||||
|     "from sklearn.datasets import fetch_20newsgroups\n", | ||||
|     "\n", | ||||
| @@ -122,10 +131,8 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "execution_count": 2, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "from gensim import matutils\n", | ||||
| @@ -153,9 +160,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 3, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "from gensim.models.ldamodel import LdaModel\n", | ||||
| @@ -168,11 +173,27 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [], | ||||
|    "execution_count": 4, | ||||
|    "metadata": {}, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "data": { | ||||
|       "text/plain": [ | ||||
|        "[(0,\n", | ||||
|        "  '0.007*\"car\" + 0.006*\"increased\" + 0.006*\"closely\" + 0.006*\"groups\" + 0.006*\"center\" + 0.006*\"88\" + 0.006*\"offer\" + 0.005*\"archie\" + 0.005*\"beginning\" + 0.005*\"comets\"'),\n", | ||||
|        " (1,\n", | ||||
|        "  '0.005*\"allow\" + 0.005*\"discuss\" + 0.005*\"condition\" + 0.004*\"certain\" + 0.004*\"member\" + 0.004*\"manipulation\" + 0.004*\"little\" + 0.003*\"proposal\" + 0.003*\"heavily\" + 0.003*\"obvious\"'),\n", | ||||
|        " (2,\n", | ||||
|        "  '0.002*\"led\" + 0.002*\"mechanism\" + 0.002*\"frank\" + 0.002*\"platform\" + 0.002*\"mormons\" + 0.002*\"concepts\" + 0.002*\"proton\" + 0.002*\"aeronautics\" + 0.002*\"header\" + 0.002*\"foreign\"'),\n", | ||||
|        " (3,\n", | ||||
|        "  '0.004*\"objects\" + 0.003*\"activity\" + 0.003*\"manhattan\" + 0.003*\"obtained\" + 0.003*\"eyes\" + 0.003*\"education\" + 0.003*\"netters\" + 0.003*\"complex\" + 0.003*\"europe\" + 0.002*\"missions\"')]" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 4, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "source": [ | ||||
|     "# check the topics\n", | ||||
|     "lda.print_topics(4)" | ||||
| @@ -187,10 +208,8 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "execution_count": 5, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# import the gensim.corpora module to generate dictionary\n", | ||||
| @@ -221,11 +240,17 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [], | ||||
|    "execution_count": 6, | ||||
|    "metadata": {}, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "name": "stdout", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "Dictionary(10913 unique tokens: ['cel', 'ds', 'hi', 'nothing', 'prj']...)\n" | ||||
|      ] | ||||
|     } | ||||
|    ], | ||||
|    "source": [ | ||||
|     "# You can save the dictionary\n", | ||||
|     "dictionary.save('newsgroup.dict')\n", | ||||
| @@ -235,10 +260,8 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "execution_count": 7, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# Generate a list of docs, where each doc is a list of words\n", | ||||
| @@ -248,10 +271,8 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "execution_count": 8, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# import the gensim.corpora module to generate dictionary\n", | ||||
| @@ -262,25 +283,38 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": true | ||||
|    }, | ||||
|    "outputs": [], | ||||
|    "execution_count": 15, | ||||
|    "metadata": {}, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "name": "stderr", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "WARNING:root:random_state not set so using default value\n", | ||||
|       "WARNING:root:failed to load state from newsgroups.dict.state: [Errno 2] No such file or directory: 'newsgroups.dict.state'\n" | ||||
|      ] | ||||
|     } | ||||
|    ], | ||||
|    "source": [ | ||||
|     "# You can optionally save the  dictionary \n", | ||||
|     "\n", | ||||
|     "dictionary.save('newsgroups.dict')\n", | ||||
|     "lda = LdaModel.load('newsgroups.lda')" | ||||
|     "lda = LdaModel.load('newsgroups.dict')" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [], | ||||
|    "execution_count": 16, | ||||
|    "metadata": {}, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "name": "stdout", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "Dictionary(10913 unique tokens: ['cel', 'ds', 'hi', 'nothing', 'prj']...)\n" | ||||
|      ] | ||||
|     } | ||||
|    ], | ||||
|    "source": [ | ||||
|     "# We can print the dictionary, it is a mappying of id and tokens\n", | ||||
|     "\n", | ||||
| @@ -289,10 +323,8 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": true | ||||
|    }, | ||||
|    "execution_count": 17, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# construct the corpus representing each document as a bag-of-words (bow) vector\n", | ||||
| @@ -301,10 +333,8 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "execution_count": 18, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "from gensim.models import TfidfModel\n", | ||||
| @@ -316,11 +346,17 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [], | ||||
|    "execution_count": 19, | ||||
|    "metadata": {}, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "name": "stdout", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "[(0, 0.24093628445650234), (1, 0.5700978153855775), (2, 0.10438175896914427), (3, 0.1598114653031772), (4, 0.722808853369507), (5, 0.24093628445650234)]\n" | ||||
|      ] | ||||
|     } | ||||
|    ], | ||||
|    "source": [ | ||||
|     "#print tf-idf of first document\n", | ||||
|     "print(corpus_tfidf[0])" | ||||
| @@ -328,10 +364,8 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "execution_count": 20, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "from gensim.models.ldamodel import LdaModel\n", | ||||
| @@ -343,11 +377,27 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [], | ||||
|    "execution_count": 21, | ||||
|    "metadata": {}, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "data": { | ||||
|       "text/plain": [ | ||||
|        "[(0,\n", | ||||
|        "  '0.011*\"thanks\" + 0.010*\"targa\" + 0.008*\"mary\" + 0.008*\"western\" + 0.007*\"craig\" + 0.007*\"jeff\" + 0.006*\"yayayay\" + 0.006*\"phobos\" + 0.005*\"unfortunately\" + 0.005*\"martian\"'),\n", | ||||
|        " (1,\n", | ||||
|        "  '0.007*\"islam\" + 0.006*\"koresh\" + 0.006*\"moon\" + 0.006*\"bible\" + 0.006*\"plane\" + 0.006*\"ns\" + 0.005*\"zoroastrians\" + 0.005*\"joy\" + 0.005*\"lucky\" + 0.005*\"ssrt\"'),\n", | ||||
|        " (2,\n", | ||||
|        "  '0.009*\"whatever\" + 0.009*\"baptist\" + 0.007*\"cheers\" + 0.007*\"kent\" + 0.006*\"khomeini\" + 0.006*\"davidian\" + 0.005*\"gerald\" + 0.005*\"bull\" + 0.005*\"sorry\" + 0.005*\"jesus\"'),\n", | ||||
|        " (3,\n", | ||||
|        "  '0.005*\"pd\" + 0.004*\"baltimore\" + 0.004*\"also\" + 0.003*\"ipx\" + 0.003*\"dam\" + 0.003*\"feiner\" + 0.003*\"foley\" + 0.003*\"ideally\" + 0.003*\"srgp\" + 0.003*\"thank\"')]" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 21, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "source": [ | ||||
|     "# check the topics\n", | ||||
|     "lda_model.print_topics(4)" | ||||
| @@ -355,11 +405,17 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [], | ||||
|    "execution_count": 22, | ||||
|    "metadata": {}, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "name": "stdout", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "[(0, 0.09401487), (1, 0.08991001), (2, 0.08514047), (3, 0.7309346)]\n" | ||||
|      ] | ||||
|     } | ||||
|    ], | ||||
|    "source": [ | ||||
|     "# check the lsa vector for the first document\n", | ||||
|     "corpus_lda = lda_model[corpus_tfidf]\n", | ||||
| @@ -368,11 +424,17 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [], | ||||
|    "execution_count": 24, | ||||
|    "metadata": {}, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "name": "stdout", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "[('lord', 1), ('god', 2)]\n" | ||||
|      ] | ||||
|     } | ||||
|    ], | ||||
|    "source": [ | ||||
|     "#predict topics of a new doc\n", | ||||
|     "new_doc = \"God is love and God is the Lord\"\n", | ||||
| @@ -383,11 +445,17 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [], | ||||
|    "execution_count": 25, | ||||
|    "metadata": {}, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "name": "stdout", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "[(0, 0.06678458), (1, 0.8006135), (2, 0.06974816), (3, 0.062853776)]\n" | ||||
|      ] | ||||
|     } | ||||
|    ], | ||||
|    "source": [ | ||||
|     "#transform into LDA space\n", | ||||
|     "lda_vector = lda_model[bow_vector]\n", | ||||
| @@ -396,11 +464,17 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [], | ||||
|    "execution_count": 26, | ||||
|    "metadata": {}, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "name": "stdout", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "0.007*\"islam\" + 0.006*\"koresh\" + 0.006*\"moon\" + 0.006*\"bible\" + 0.006*\"plane\" + 0.006*\"ns\" + 0.005*\"zoroastrians\" + 0.005*\"joy\" + 0.005*\"lucky\" + 0.005*\"ssrt\"\n" | ||||
|      ] | ||||
|     } | ||||
|    ], | ||||
|    "source": [ | ||||
|     "# print the document's single most prominent LDA topic\n", | ||||
|     "print(lda_model.print_topic(max(lda_vector, key=lambda item: item[1])[0]))" | ||||
| @@ -408,11 +482,18 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [], | ||||
|    "execution_count": 27, | ||||
|    "metadata": {}, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "name": "stdout", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "[(0, 0.110989906), (1, 0.670005), (2, 0.11422917), (3, 0.10477593)]\n", | ||||
|       "0.007*\"islam\" + 0.006*\"koresh\" + 0.006*\"moon\" + 0.006*\"bible\" + 0.006*\"plane\" + 0.006*\"ns\" + 0.005*\"zoroastrians\" + 0.005*\"joy\" + 0.005*\"lucky\" + 0.005*\"ssrt\"\n" | ||||
|      ] | ||||
|     } | ||||
|    ], | ||||
|    "source": [ | ||||
|     "lda_vector_tfidf = lda_model[tfidf_model[bow_vector]]\n", | ||||
|     "print(lda_vector_tfidf)\n", | ||||
| @@ -429,10 +510,8 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "execution_count": 28, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "from gensim.models.lsimodel import LsiModel\n", | ||||
| @@ -447,11 +526,27 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [], | ||||
|    "execution_count": 29, | ||||
|    "metadata": {}, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "data": { | ||||
|       "text/plain": [ | ||||
|        "[(0,\n", | ||||
|        "  '0.769*\"god\" + 0.345*\"jesus\" + 0.235*\"bible\" + 0.203*\"christian\" + 0.149*\"christians\" + 0.108*\"christ\" + 0.089*\"well\" + 0.085*\"koresh\" + 0.081*\"kent\" + 0.080*\"christianity\"'),\n", | ||||
|        " (1,\n", | ||||
|        "  '-0.863*\"thanks\" + -0.255*\"please\" + -0.160*\"hello\" + -0.153*\"hi\" + 0.123*\"god\" + -0.112*\"sorry\" + -0.088*\"could\" + -0.075*\"windows\" + -0.068*\"jpeg\" + -0.062*\"gif\"'),\n", | ||||
|        " (2,\n", | ||||
|        "  '-0.779*\"well\" + 0.229*\"god\" + -0.164*\"yes\" + 0.153*\"thanks\" + -0.135*\"ico\" + -0.135*\"tek\" + -0.132*\"beauchaine\" + -0.132*\"queens\" + -0.132*\"bronx\" + -0.131*\"manhattan\"'),\n", | ||||
|        " (3,\n", | ||||
|        "  '0.343*\"well\" + -0.335*\"ico\" + -0.334*\"tek\" + -0.328*\"bronx\" + -0.328*\"beauchaine\" + -0.328*\"queens\" + -0.325*\"manhattan\" + -0.305*\"com\" + -0.303*\"bob\" + -0.073*\"god\"')]" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 29, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "source": [ | ||||
|     "# check the topics\n", | ||||
|     "lsi_model.print_topics(4)" | ||||
| @@ -459,11 +554,17 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [], | ||||
|    "execution_count": 30, | ||||
|    "metadata": {}, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "name": "stdout", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "[(0, 0.24093628445650234), (1, 0.5700978153855775), (2, 0.10438175896914427), (3, 0.1598114653031772), (4, 0.722808853369507), (5, 0.24093628445650234)]\n" | ||||
|      ] | ||||
|     } | ||||
|    ], | ||||
|    "source": [ | ||||
|     "# check the lsi vector for the first document\n", | ||||
|     "print(corpus_tfidf[0])" | ||||
| @@ -497,7 +598,7 @@ | ||||
|    "source": [ | ||||
|     "The notebook is freely licensed under under the [Creative Commons Attribution Share-Alike license](https://creativecommons.org/licenses/by/2.0/).  \n", | ||||
|     "\n", | ||||
|     "© 2016 Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|     "© Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|    ] | ||||
|   } | ||||
|  ], | ||||
| @@ -517,9 +618,26 @@ | ||||
|    "name": "python", | ||||
|    "nbconvert_exporter": "python", | ||||
|    "pygments_lexer": "ipython3", | ||||
|    "version": "3.5.2" | ||||
|    "version": "3.7.1" | ||||
|   }, | ||||
|   "latex_envs": { | ||||
|    "LaTeX_envs_menu_present": true, | ||||
|    "autocomplete": true, | ||||
|    "bibliofile": "biblio.bib", | ||||
|    "cite_by": "apalike", | ||||
|    "current_citInitial": 1, | ||||
|    "eqLabelWithNumbers": true, | ||||
|    "eqNumInitial": 1, | ||||
|    "hotkeys": { | ||||
|     "equation": "Ctrl-E", | ||||
|     "itemize": "Ctrl-I" | ||||
|    }, | ||||
|    "labels_anchors": false, | ||||
|    "latex_user_defs": false, | ||||
|    "report_style_numbering": false, | ||||
|    "user_envs_cfg": false | ||||
|   } | ||||
|  }, | ||||
|  "nbformat": 4, | ||||
|  "nbformat_minor": 0 | ||||
|  "nbformat_minor": 1 | ||||
| } | ||||
|   | ||||
| @@ -18,7 +18,7 @@ | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, © 2016 Carlos A. Iglesias" | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, © Carlos A. Iglesias" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
| @@ -84,17 +84,17 @@ | ||||
|     "\n", | ||||
|     "Each of these files contains 28 columns:\n", | ||||
|     "\n", | ||||
|     "* essay_id: A unique identifier for each individual student essay\n", | ||||
|     "* essay_set: 1-8, an id for each set of essays\n", | ||||
|     "* essay: The ascii text of a student's response\n", | ||||
|     "* rater1_domain1: Rater 1's domain 1 score; all essays have this\n", | ||||
|     "* rater2_domain1: Rater 2's domain 1 score; all essays have this\n", | ||||
|     "* rater3_domain1: Rater 3's domain 1 score; only some essays in set 8 have this.\n", | ||||
|     "* domain1_score: Resolved score between the raters; all essays have this\n", | ||||
|     "* rater1_domain2: Rater 1's domain 2 score; only essays in set 2 have this\n", | ||||
|     "* rater2_domain2: Rater 2's domain 2 score; only essays in set 2 have this\n", | ||||
|     "* domain2_score: Resolved score between the raters; only essays in set 2 have this\n", | ||||
|     "* rater1_trait1 score - rater3_trait6 score: trait scores for sets 7-8\n", | ||||
|     "* **essay_id**: A unique identifier for each individual student essay\n", | ||||
|     "* **essay_set**: 1-8, an id for each set of essays\n", | ||||
|     "* **essay**: The ascii text of a student's response\n", | ||||
|     "* **rater1_domain1**: Rater 1's domain 1 score; all essays have this\n", | ||||
|     "* **rater2_domain1**: Rater 2's domain 1 score; all essays have this\n", | ||||
|     "* **rater3_domain1**: Rater 3's domain 1 score; only some essays in set 8 have this.\n", | ||||
|     "* **domain1_score**: Resolved score between the raters; all essays have this\n", | ||||
|     "* **rater1_domain2**: Rater 1's domain 2 score; only essays in set 2 have this\n", | ||||
|     "* **rater2_domain2**: Rater 2's domain 2 score; only essays in set 2 have this\n", | ||||
|     "* **domain2_score**: Resolved score between the raters; only essays in set 2 have this\n", | ||||
|     "* **rater1_trait1 score - rater3_trait6 score**: trait scores for sets 7-8\n", | ||||
|     "\n", | ||||
|     "The dataset is provided in the folder *data-kaggle/training_set_rel3.tsv*.\n", | ||||
|     "\n", | ||||
| @@ -102,7 +102,7 @@ | ||||
|     "\n", | ||||
|     "The dataset has been anonymized  to remove personally identifying information from the essays using the Named Entity Recognizer (NER) from the Stanford Natural Language Processing group and a variety of other approaches. The relevant entities are identified in the text and then replaced with a string such as \"@PERSON1.\"\n", | ||||
|     "\n", | ||||
|     "The entitities identified by NER are: \"PERSON\", \"ORGANIZATION\", \"LOCATION\", \"DATE\", \"TIME\", \"MONEY\", \"PERCENT\"\n", | ||||
|     "The entities identified by NER are: \"PERSON\", \"ORGANIZATION\", \"LOCATION\", \"DATE\", \"TIME\", \"MONEY\", \"PERCENT\"\n", | ||||
|     "\n", | ||||
|     "Other replacements made: \"MONTH\" (any month name not tagged as a date by the NER), \"EMAIL\" (anything that looks like an e-mail address), \"NUM\" (word containing digits or non-alphanumeric symbols), and \"CAPS\" (any capitalized word that doesn't begin a sentence, except in essays where more than 20% of the characters are capitalized letters), \"DR\" (any word following \"Dr.\" with or without the period, with any capitalization, that doesn't fall into any of the above), \"CITY\" and \"STATE\" (various cities and states)." | ||||
|    ] | ||||
| @@ -123,183 +123,9 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 1, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "data": { | ||||
|       "text/html": [ | ||||
|        "<div>\n", | ||||
|        "<table border=\"1\" class=\"dataframe\">\n", | ||||
|        "  <thead>\n", | ||||
|        "    <tr style=\"text-align: right;\">\n", | ||||
|        "      <th></th>\n", | ||||
|        "      <th>essay_id</th>\n", | ||||
|        "      <th>essay_set</th>\n", | ||||
|        "      <th>essay</th>\n", | ||||
|        "      <th>rater1_domain1</th>\n", | ||||
|        "      <th>rater2_domain1</th>\n", | ||||
|        "      <th>rater3_domain1</th>\n", | ||||
|        "      <th>domain1_score</th>\n", | ||||
|        "      <th>rater1_domain2</th>\n", | ||||
|        "      <th>rater2_domain2</th>\n", | ||||
|        "      <th>domain2_score</th>\n", | ||||
|        "      <th>...</th>\n", | ||||
|        "      <th>rater2_trait3</th>\n", | ||||
|        "      <th>rater2_trait4</th>\n", | ||||
|        "      <th>rater2_trait5</th>\n", | ||||
|        "      <th>rater2_trait6</th>\n", | ||||
|        "      <th>rater3_trait1</th>\n", | ||||
|        "      <th>rater3_trait2</th>\n", | ||||
|        "      <th>rater3_trait3</th>\n", | ||||
|        "      <th>rater3_trait4</th>\n", | ||||
|        "      <th>rater3_trait5</th>\n", | ||||
|        "      <th>rater3_trait6</th>\n", | ||||
|        "    </tr>\n", | ||||
|        "  </thead>\n", | ||||
|        "  <tbody>\n", | ||||
|        "    <tr>\n", | ||||
|        "      <th>0</th>\n", | ||||
|        "      <td>1</td>\n", | ||||
|        "      <td>1</td>\n", | ||||
|        "      <td>Dear local newspaper, I think effects computer...</td>\n", | ||||
|        "      <td>4</td>\n", | ||||
|        "      <td>4</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>8</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>...</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "    </tr>\n", | ||||
|        "    <tr>\n", | ||||
|        "      <th>1</th>\n", | ||||
|        "      <td>2</td>\n", | ||||
|        "      <td>1</td>\n", | ||||
|        "      <td>Dear @CAPS1 @CAPS2, I believe that using compu...</td>\n", | ||||
|        "      <td>5</td>\n", | ||||
|        "      <td>4</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>9</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>...</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "    </tr>\n", | ||||
|        "    <tr>\n", | ||||
|        "      <th>2</th>\n", | ||||
|        "      <td>3</td>\n", | ||||
|        "      <td>1</td>\n", | ||||
|        "      <td>Dear, @CAPS1 @CAPS2 @CAPS3 More and more peopl...</td>\n", | ||||
|        "      <td>4</td>\n", | ||||
|        "      <td>3</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>7</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>...</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "    </tr>\n", | ||||
|        "    <tr>\n", | ||||
|        "      <th>3</th>\n", | ||||
|        "      <td>4</td>\n", | ||||
|        "      <td>1</td>\n", | ||||
|        "      <td>Dear Local Newspaper, @CAPS1 I have found that...</td>\n", | ||||
|        "      <td>5</td>\n", | ||||
|        "      <td>5</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>10</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>...</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "      <td>NaN</td>\n", | ||||
|        "    </tr>\n", | ||||
|        "  </tbody>\n", | ||||
|        "</table>\n", | ||||
|        "<p>4 rows × 28 columns</p>\n", | ||||
|        "</div>" | ||||
|       ], | ||||
|       "text/plain": [ | ||||
|        "   essay_id  essay_set                                              essay  \\\n", | ||||
|        "0         1          1  Dear local newspaper, I think effects computer...   \n", | ||||
|        "1         2          1  Dear @CAPS1 @CAPS2, I believe that using compu...   \n", | ||||
|        "2         3          1  Dear, @CAPS1 @CAPS2 @CAPS3 More and more peopl...   \n", | ||||
|        "3         4          1  Dear Local Newspaper, @CAPS1 I have found that...   \n", | ||||
|        "\n", | ||||
|        "   rater1_domain1  rater2_domain1  rater3_domain1  domain1_score  \\\n", | ||||
|        "0               4               4             NaN              8   \n", | ||||
|        "1               5               4             NaN              9   \n", | ||||
|        "2               4               3             NaN              7   \n", | ||||
|        "3               5               5             NaN             10   \n", | ||||
|        "\n", | ||||
|        "   rater1_domain2  rater2_domain2  domain2_score      ...        \\\n", | ||||
|        "0             NaN             NaN            NaN      ...         \n", | ||||
|        "1             NaN             NaN            NaN      ...         \n", | ||||
|        "2             NaN             NaN            NaN      ...         \n", | ||||
|        "3             NaN             NaN            NaN      ...         \n", | ||||
|        "\n", | ||||
|        "   rater2_trait3  rater2_trait4  rater2_trait5  rater2_trait6  rater3_trait1  \\\n", | ||||
|        "0            NaN            NaN            NaN            NaN            NaN   \n", | ||||
|        "1            NaN            NaN            NaN            NaN            NaN   \n", | ||||
|        "2            NaN            NaN            NaN            NaN            NaN   \n", | ||||
|        "3            NaN            NaN            NaN            NaN            NaN   \n", | ||||
|        "\n", | ||||
|        "   rater3_trait2  rater3_trait3  rater3_trait4  rater3_trait5  rater3_trait6  \n", | ||||
|        "0            NaN            NaN            NaN            NaN            NaN  \n", | ||||
|        "1            NaN            NaN            NaN            NaN            NaN  \n", | ||||
|        "2            NaN            NaN            NaN            NaN            NaN  \n", | ||||
|        "3            NaN            NaN            NaN            NaN            NaN  \n", | ||||
|        "\n", | ||||
|        "[4 rows x 28 columns]" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 1, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "import pandas as pd\n", | ||||
|     "\n", | ||||
| @@ -311,44 +137,18 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 2, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "data": { | ||||
|       "text/plain": [ | ||||
|        "(12976, 28)" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 2, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "df_orig.shape" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 3, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "data": { | ||||
|       "text/plain": [ | ||||
|        "(1783, 3)" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 3, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# We filter the data of the essay_set number 1, and we keep only two columns for this \n", | ||||
|     "# example\n", | ||||
| @@ -359,83 +159,17 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 4, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "data": { | ||||
|       "text/html": [ | ||||
|        "<div>\n", | ||||
|        "<table border=\"1\" class=\"dataframe\">\n", | ||||
|        "  <thead>\n", | ||||
|        "    <tr style=\"text-align: right;\">\n", | ||||
|        "      <th></th>\n", | ||||
|        "      <th>essay_id</th>\n", | ||||
|        "      <th>essay</th>\n", | ||||
|        "      <th>domain1_score</th>\n", | ||||
|        "    </tr>\n", | ||||
|        "  </thead>\n", | ||||
|        "  <tbody>\n", | ||||
|        "    <tr>\n", | ||||
|        "      <th>0</th>\n", | ||||
|        "      <td>1</td>\n", | ||||
|        "      <td>Dear local newspaper, I think effects computer...</td>\n", | ||||
|        "      <td>8</td>\n", | ||||
|        "    </tr>\n", | ||||
|        "    <tr>\n", | ||||
|        "      <th>1</th>\n", | ||||
|        "      <td>2</td>\n", | ||||
|        "      <td>Dear @CAPS1 @CAPS2, I believe that using compu...</td>\n", | ||||
|        "      <td>9</td>\n", | ||||
|        "    </tr>\n", | ||||
|        "    <tr>\n", | ||||
|        "      <th>2</th>\n", | ||||
|        "      <td>3</td>\n", | ||||
|        "      <td>Dear, @CAPS1 @CAPS2 @CAPS3 More and more peopl...</td>\n", | ||||
|        "      <td>7</td>\n", | ||||
|        "    </tr>\n", | ||||
|        "    <tr>\n", | ||||
|        "      <th>3</th>\n", | ||||
|        "      <td>4</td>\n", | ||||
|        "      <td>Dear Local Newspaper, @CAPS1 I have found that...</td>\n", | ||||
|        "      <td>10</td>\n", | ||||
|        "    </tr>\n", | ||||
|        "    <tr>\n", | ||||
|        "      <th>4</th>\n", | ||||
|        "      <td>5</td>\n", | ||||
|        "      <td>Dear @LOCATION1, I know having computers has a...</td>\n", | ||||
|        "      <td>8</td>\n", | ||||
|        "    </tr>\n", | ||||
|        "  </tbody>\n", | ||||
|        "</table>\n", | ||||
|        "</div>" | ||||
|       ], | ||||
|       "text/plain": [ | ||||
|        "   essay_id                                              essay  domain1_score\n", | ||||
|        "0         1  Dear local newspaper, I think effects computer...              8\n", | ||||
|        "1         2  Dear @CAPS1 @CAPS2, I believe that using compu...              9\n", | ||||
|        "2         3  Dear, @CAPS1 @CAPS2 @CAPS3 More and more peopl...              7\n", | ||||
|        "3         4  Dear Local Newspaper, @CAPS1 I have found that...             10\n", | ||||
|        "4         5  Dear @LOCATION1, I know having computers has a...              8" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 4, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "df[0:5]" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 5, | ||||
|    "metadata": { | ||||
|     "collapsed": true | ||||
|    }, | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# Define X and Y\n", | ||||
| @@ -468,10 +202,8 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 6, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# Generic Transformer \n", | ||||
| @@ -509,10 +241,8 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 7, | ||||
|    "metadata": { | ||||
|     "collapsed": true | ||||
|    }, | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# Sample of statistics using nltk\n", | ||||
| @@ -541,10 +271,8 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 8, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "from sklearn.base import BaseEstimator, TransformerMixin\n", | ||||
| @@ -581,10 +309,8 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 11, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "from sklearn.base import BaseEstimator, TransformerMixin\n", | ||||
| @@ -635,10 +361,8 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 10, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "from sklearn.pipeline import Pipeline, FeatureUnion\n", | ||||
| @@ -669,28 +393,17 @@ | ||||
|     "\n", | ||||
|     "The basic idea is:\n", | ||||
|     "* **Pipelines** consist of sequential steps: one step works on the results of the previous step\n", | ||||
|     "* ** FeatureUnions** consist of parallel tasks whose result is grouped when all have finished." | ||||
|     "* **FeatureUnions** consist of parallel tasks whose result is grouped when all have finished." | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 37, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "name": "stdout", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "Scores in every iteration [ 0.39798206  0.27497194]\n", | ||||
|       "Accuracy: 0.34 (+/- 0.12)\n" | ||||
|      ] | ||||
|     } | ||||
|    ], | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "from sklearn.naive_bayes import MultinomialNB\n", | ||||
|     "from sklearn.cross_validation import cross_val_score, KFold\n", | ||||
|     "from sklearn.model_selection import cross_val_score, KFold\n", | ||||
|     "from sklearn.metrics import classification_report\n", | ||||
|     "from sklearn.feature_extraction import DictVectorizer\n", | ||||
|     "from sklearn.preprocessing import FunctionTransformer\n", | ||||
| @@ -714,7 +427,7 @@ | ||||
|     "                            ])),\n", | ||||
|     "                    ('lda', Pipeline([ \n", | ||||
|     "                                ('count', CountVectorizer(tokenizer=custom_tokenizer)),\n", | ||||
|     "                                ('lda',  LatentDirichletAllocation(n_topics=4, max_iter=5,\n", | ||||
|     "                                ('lda',  LatentDirichletAllocation(n_components=4, max_iter=5,\n", | ||||
|     "                                                       learning_method='online', \n", | ||||
|     "                                                       learning_offset=50.,\n", | ||||
|     "                                                       random_state=0))\n", | ||||
| @@ -726,7 +439,7 @@ | ||||
|     "\n", | ||||
|     "# Using KFold validation\n", | ||||
|     "\n", | ||||
|     "cv = KFold(X.shape[0], 2, shuffle=True, random_state=33)\n", | ||||
|     "cv = KFold(2, shuffle=True, random_state=33)\n", | ||||
|     "scores = cross_val_score(pipeline, X, y, cv=cv)\n", | ||||
|     "print(\"Scores in every iteration\", scores)\n", | ||||
|     "print(\"Accuracy: %0.2f (+/- %0.2f)\" % (scores.mean(), scores.std() * 2))" | ||||
| @@ -734,9 +447,7 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": { | ||||
|     "collapsed": true | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "The result is not very good :(." | ||||
|    ] | ||||
| @@ -769,7 +480,7 @@ | ||||
|    "source": [ | ||||
|     "The notebook is freely licensed under under the [Creative Commons Attribution Share-Alike license](https://creativecommons.org/licenses/by/2.0/).  \n", | ||||
|     "\n", | ||||
|     "© 2016 Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|     "© Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|    ] | ||||
|   } | ||||
|  ], | ||||
| @@ -789,9 +500,26 @@ | ||||
|    "name": "python", | ||||
|    "nbconvert_exporter": "python", | ||||
|    "pygments_lexer": "ipython3", | ||||
|    "version": "3.5.1" | ||||
|    "version": "3.7.1" | ||||
|   }, | ||||
|   "latex_envs": { | ||||
|    "LaTeX_envs_menu_present": true, | ||||
|    "autocomplete": true, | ||||
|    "bibliofile": "biblio.bib", | ||||
|    "cite_by": "apalike", | ||||
|    "current_citInitial": 1, | ||||
|    "eqLabelWithNumbers": true, | ||||
|    "eqNumInitial": 1, | ||||
|    "hotkeys": { | ||||
|     "equation": "Ctrl-E", | ||||
|     "itemize": "Ctrl-I" | ||||
|    }, | ||||
|    "labels_anchors": false, | ||||
|    "latex_user_defs": false, | ||||
|    "report_style_numbering": false, | ||||
|    "user_envs_cfg": false | ||||
|   } | ||||
|  }, | ||||
|  "nbformat": 4, | ||||
|  "nbformat_minor": 0 | ||||
|  "nbformat_minor": 1 | ||||
| } | ||||
|   | ||||
| @@ -18,7 +18,7 @@ | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, © 2016 Carlos A. Iglesias" | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, © Carlos A. Iglesias" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
| @@ -116,7 +116,7 @@ | ||||
|    "source": [ | ||||
|     "The notebook is freely licensed under under the [Creative Commons Attribution Share-Alike license](https://creativecommons.org/licenses/by/2.0/).  \n", | ||||
|     "\n", | ||||
|     "© 2016 Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|     "© Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|    ] | ||||
|   } | ||||
|  ], | ||||
| @@ -136,9 +136,26 @@ | ||||
|    "name": "python", | ||||
|    "nbconvert_exporter": "python", | ||||
|    "pygments_lexer": "ipython3", | ||||
|    "version": "3.5.1" | ||||
|    "version": "3.7.1" | ||||
|   }, | ||||
|   "latex_envs": { | ||||
|    "LaTeX_envs_menu_present": true, | ||||
|    "autocomplete": true, | ||||
|    "bibliofile": "biblio.bib", | ||||
|    "cite_by": "apalike", | ||||
|    "current_citInitial": 1, | ||||
|    "eqLabelWithNumbers": true, | ||||
|    "eqNumInitial": 1, | ||||
|    "hotkeys": { | ||||
|     "equation": "Ctrl-E", | ||||
|     "itemize": "Ctrl-I" | ||||
|    }, | ||||
|    "labels_anchors": false, | ||||
|    "latex_user_defs": false, | ||||
|    "report_style_numbering": false, | ||||
|    "user_envs_cfg": false | ||||
|   } | ||||
|  }, | ||||
|  "nbformat": 4, | ||||
|  "nbformat_minor": 0 | ||||
|  "nbformat_minor": 1 | ||||
| } | ||||
|   | ||||
| @@ -18,7 +18,7 @@ | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, © 2016 Carlos A. Iglesias" | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, © Carlos A. Iglesias" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
| @@ -95,7 +95,7 @@ | ||||
|    "source": [ | ||||
|     "The notebook is freely licensed under under the [Creative Commons Attribution Share-Alike license](https://creativecommons.org/licenses/by/2.0/).  \n", | ||||
|     "\n", | ||||
|     "© 2016 Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|     "© Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|    ] | ||||
|   } | ||||
|  ], | ||||
| @@ -115,7 +115,7 @@ | ||||
|    "name": "python", | ||||
|    "nbconvert_exporter": "python", | ||||
|    "pygments_lexer": "ipython3", | ||||
|    "version": "3.5.3" | ||||
|    "version": "3.7.1" | ||||
|   }, | ||||
|   "latex_envs": { | ||||
|    "LaTeX_envs_menu_present": true, | ||||
|   | ||||
| @@ -18,7 +18,7 @@ | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid,  © 2016 Carlos A. Iglesias" | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid,  © Carlos A. Iglesias" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
| @@ -117,9 +117,7 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": { | ||||
|     "collapsed": true | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "Example: we use Jupyter as a calculator, let's execute 2+2" | ||||
|    ] | ||||
| @@ -140,20 +138,9 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 2, | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "data": { | ||||
|       "text/plain": [ | ||||
|        "4" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 2, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "2+2" | ||||
|    ] | ||||
| @@ -171,7 +158,7 @@ | ||||
|    "source": [ | ||||
|     "The notebook is freely licensed under under the [Creative Commons Attribution Share-Alike license](https://creativecommons.org/licenses/by/2.0/).  \n", | ||||
|     "\n", | ||||
|     "© 2016 Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|     "© Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|    ] | ||||
|   } | ||||
|  ], | ||||
| @@ -191,7 +178,7 @@ | ||||
|    "name": "python", | ||||
|    "nbconvert_exporter": "python", | ||||
|    "pygments_lexer": "ipython3", | ||||
|    "version": "3.5.3" | ||||
|    "version": "3.7.1" | ||||
|   }, | ||||
|   "latex_envs": { | ||||
|    "LaTeX_envs_menu_present": true, | ||||
|   | ||||
| @@ -18,7 +18,7 @@ | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, © 2016 Carlos A. Iglesias" | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, © Carlos A. Iglesias" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
| @@ -39,31 +39,16 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": { | ||||
|     "collapsed": true | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "## 1. Booleans" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 1, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "data": { | ||||
|       "text/plain": [ | ||||
|        "False" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 1, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "True and False # operations with booleans" | ||||
|    ] | ||||
| @@ -71,9 +56,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "not True" | ||||
| @@ -82,9 +65,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "True or False" | ||||
| @@ -111,9 +92,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "2 + 2 # 2 plus 2 (integers)" | ||||
| @@ -122,9 +101,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "2.0 * 3.0 # 2.0 times 3.0 (floats)" | ||||
| @@ -133,9 +110,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "2.0 ** 4.0 # 2.0 to the power of 4 (float)" | ||||
| @@ -144,9 +119,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "(3 + 4j) + (5 + 5j) #add two complex numbers" | ||||
| @@ -155,9 +128,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "10 / 3 # classic division" | ||||
| @@ -166,9 +137,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "10 // 3 # floor division" | ||||
| @@ -177,9 +146,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "10 % 3 # remainder" | ||||
| @@ -188,9 +155,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "10e158*17e158 #overflow shown as 'inf', infinitive" | ||||
| @@ -199,9 +164,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "type(10)" | ||||
| @@ -210,9 +173,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "type(2 + 3j)" | ||||
| @@ -221,9 +182,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "type(2.1)" | ||||
| @@ -232,9 +191,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "type(2E3)" | ||||
| @@ -249,9 +206,7 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": { | ||||
|     "collapsed": true | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "Strings are **immutable sequences** of Unicode code points.\n", | ||||
|     "\n", | ||||
| @@ -261,9 +216,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "\"This is a string\"" | ||||
| @@ -272,9 +225,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "'This is also a string'" | ||||
| @@ -283,9 +234,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "\"This is a string containing single quotes 'hi'\"" | ||||
| @@ -294,9 +243,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "'This is string containing double quotes \"hi\"'" | ||||
| @@ -305,9 +252,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "'''This is \n", | ||||
| @@ -328,9 +273,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "\"String with special characters: \\n newline, \\a beep and \\\\ slash\"" | ||||
| @@ -339,9 +282,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "\"concatenate \" + \"two strings\" #use of '+' for concatenating two strings" | ||||
| @@ -350,9 +291,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "len('hola') # length of a string" | ||||
| @@ -361,9 +300,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "type(\"hola\")" | ||||
| @@ -379,9 +316,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": true | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "s = \"hola\" # assign the string value \"hola\" to the variable s" | ||||
| @@ -390,9 +325,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "s # get the value of s" | ||||
| @@ -401,9 +334,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "s[0]" | ||||
| @@ -412,9 +343,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "s[1]" | ||||
| @@ -423,9 +352,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "s[3]" | ||||
| @@ -434,9 +361,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "s [-1] # we can start from the beginning (index 0, 1, 2, ...) or from the last position (-1, -2, ...)" | ||||
| @@ -452,9 +377,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "s[0:2] #slice [0,2)" | ||||
| @@ -463,9 +386,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "s[:2] #slice [0,2)" | ||||
| @@ -474,9 +395,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "s[:] #slice [0, len(s)]" | ||||
| @@ -485,9 +404,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "s[:-2]" | ||||
| @@ -496,9 +413,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "s[-4:-2]" | ||||
| @@ -518,9 +433,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": true | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "se = \"This is a string\"" | ||||
| @@ -529,9 +442,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "se[::1] # moves from 0 to len, and the index is incremented by 1" | ||||
| @@ -540,9 +451,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "se[0:14:2] #take the even indexed characters from 0 to 14" | ||||
| @@ -551,9 +460,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "se[::-1] #reverse the string" | ||||
| @@ -562,9 +469,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "se[:4:-1]" | ||||
| @@ -580,9 +485,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": true | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "a = 'b'" | ||||
| @@ -591,9 +494,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "se + \" plus \" + se + \" plus \"+ a*3" | ||||
| @@ -611,9 +512,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "s.lower()" | ||||
| @@ -622,9 +521,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "s.upper()" | ||||
| @@ -633,9 +530,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "s.split('o') # splits String " | ||||
| @@ -660,9 +555,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "\"hohoho\".split('h')" | ||||
| @@ -671,9 +564,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "type(\"hohoho\".split('h'))" | ||||
| @@ -692,7 +583,7 @@ | ||||
|    "source": [ | ||||
|     "The notebook is freely licensed under under the [Creative Commons Attribution Share-Alike license](https://creativecommons.org/licenses/by/2.0/).  \n", | ||||
|     "\n", | ||||
|     "© 2016 Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|     "© Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|    ] | ||||
|   } | ||||
|  ], | ||||
| @@ -712,9 +603,26 @@ | ||||
|    "name": "python", | ||||
|    "nbconvert_exporter": "python", | ||||
|    "pygments_lexer": "ipython3", | ||||
|    "version": "3.5.1" | ||||
|    "version": "3.7.1" | ||||
|   }, | ||||
|   "latex_envs": { | ||||
|    "LaTeX_envs_menu_present": true, | ||||
|    "autocomplete": true, | ||||
|    "bibliofile": "biblio.bib", | ||||
|    "cite_by": "apalike", | ||||
|    "current_citInitial": 1, | ||||
|    "eqLabelWithNumbers": true, | ||||
|    "eqNumInitial": 1, | ||||
|    "hotkeys": { | ||||
|     "equation": "Ctrl-E", | ||||
|     "itemize": "Ctrl-I" | ||||
|    }, | ||||
|    "labels_anchors": false, | ||||
|    "latex_user_defs": false, | ||||
|    "report_style_numbering": false, | ||||
|    "user_envs_cfg": false | ||||
|   } | ||||
|  }, | ||||
|  "nbformat": 4, | ||||
|  "nbformat_minor": 0 | ||||
|  "nbformat_minor": 1 | ||||
| } | ||||
|   | ||||
| @@ -18,7 +18,7 @@ | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, © 2016 Carlos A. Iglesias" | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, © Carlos A. Iglesias" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
| @@ -42,9 +42,7 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": { | ||||
|     "collapsed": true | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "## 1. Lists" | ||||
|    ] | ||||
| @@ -52,9 +50,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "l = [1, 2, 3, 4, 5, 6]" | ||||
| @@ -63,9 +59,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "l" | ||||
| @@ -74,9 +68,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "l[0:3] # we can use slicing in sequence types" | ||||
| @@ -85,9 +77,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": true | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "other_list = [1, 0.0, \"hola\"] #lists can have elements of different types" | ||||
| @@ -96,9 +86,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "other_list" | ||||
| @@ -107,9 +95,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "l + other_list # we can add lists (append)" | ||||
| @@ -118,9 +104,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "l * 3 # we can add n times a list" | ||||
| @@ -129,9 +113,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "len(l) # length of a list (as Strings)" | ||||
| @@ -140,9 +122,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": true | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "l.append(7) #append at the end of the list. Check help with Shift-tab, and methods with tab" | ||||
| @@ -151,9 +131,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "l" | ||||
| @@ -162,9 +140,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "l.pop() # remove last element" | ||||
| @@ -173,9 +149,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "l" | ||||
| @@ -184,9 +158,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "l.pop(2) # remove element at index 2" | ||||
| @@ -195,9 +167,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "l" | ||||
| @@ -206,18 +176,14 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": true | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "l.insert(2,3) # insert at index 2 the value 3" | ||||
| @@ -226,9 +192,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "l" | ||||
| @@ -237,9 +201,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": true | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "l.reverse()" | ||||
| @@ -248,9 +210,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "l" | ||||
| @@ -259,9 +219,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": true | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "l.sort()" | ||||
| @@ -270,9 +228,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "l" | ||||
| @@ -281,9 +237,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": true | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "l.remove(3) # remove first ocurrence of 3 from l. Remember: remove (element) vs pop(index)" | ||||
| @@ -292,9 +246,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "l" | ||||
| @@ -303,9 +255,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "l[0] = 0 # lists are mutable" | ||||
| @@ -314,9 +264,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "l" | ||||
| @@ -325,9 +273,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "2 in l # check if an element is in a list" | ||||
| @@ -336,9 +282,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "7 in l # check if an element is in a list " | ||||
| @@ -347,9 +291,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "4 not in l # check if an element is not in a list" | ||||
| @@ -358,9 +300,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "l.index(4) # search for an item" | ||||
| @@ -369,9 +309,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "l.index(-1) # search for an item, error since it is not in the list" | ||||
| @@ -380,9 +318,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "matrix = [[1,2], [3,4]] # matrix" | ||||
| @@ -391,9 +327,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "matrix" | ||||
| @@ -402,9 +336,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "matrix[0][0]" | ||||
| @@ -413,9 +345,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "matrix[0][1]" | ||||
| @@ -424,9 +354,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "type(matrix)" | ||||
| @@ -455,9 +383,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "tuple = ('a', 1)" | ||||
| @@ -466,9 +392,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "tuple" | ||||
| @@ -476,9 +400,7 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "Tuples implement all the common [sequence operators](https://docs.python.org/3/library/stdtypes.html#typesseq-common), such as slicing, concatenation, len, etc." | ||||
|    ] | ||||
| @@ -486,9 +408,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "tuple[::-1]" | ||||
| @@ -497,9 +417,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "len(tuple)" | ||||
| @@ -508,9 +426,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "tuple * 2 + ('b', 'c', 2.1, True)" | ||||
| @@ -519,9 +435,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "tuple[1]" | ||||
| @@ -530,9 +444,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "tuple[1] = 2 # Error, tuples are inmutable" | ||||
| @@ -541,9 +453,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "type(tuple)" | ||||
| @@ -558,9 +468,7 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": { | ||||
|     "collapsed": true | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "A [range](https://docs.python.org/3/library/stdtypes.html#range) represents an immutable sequence of numbers. Ranges are created with two constructors: *range(stop)* or *range(start, stop, [step])*. \n", | ||||
|     "\n", | ||||
| @@ -569,10 +477,8 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 1, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "r = range(10)" | ||||
| @@ -580,66 +486,27 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 2, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "data": { | ||||
|       "text/plain": [ | ||||
|        "range(0, 10)" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 2, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "r" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 3, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "data": { | ||||
|       "text/plain": [ | ||||
|        "True" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 3, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "5 in r # check if a number is in a range" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 4, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "data": { | ||||
|       "text/plain": [ | ||||
|        "2" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 4, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "r[2] # Get a value" | ||||
|    ] | ||||
| @@ -647,9 +514,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "type(r)" | ||||
| @@ -658,9 +523,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "list(range(10))" | ||||
| @@ -669,9 +532,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "list(range(1,10,2))" | ||||
| @@ -690,7 +551,7 @@ | ||||
|    "source": [ | ||||
|     "The notebook is freely licensed under under the [Creative Commons Attribution Share-Alike license](https://creativecommons.org/licenses/by/2.0/).  \n", | ||||
|     "\n", | ||||
|     "© 2016 Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|     "© Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|    ] | ||||
|   } | ||||
|  ], | ||||
| @@ -710,9 +571,26 @@ | ||||
|    "name": "python", | ||||
|    "nbconvert_exporter": "python", | ||||
|    "pygments_lexer": "ipython3", | ||||
|    "version": "3.5.1" | ||||
|    "version": "3.7.1" | ||||
|   }, | ||||
|   "latex_envs": { | ||||
|    "LaTeX_envs_menu_present": true, | ||||
|    "autocomplete": true, | ||||
|    "bibliofile": "biblio.bib", | ||||
|    "cite_by": "apalike", | ||||
|    "current_citInitial": 1, | ||||
|    "eqLabelWithNumbers": true, | ||||
|    "eqNumInitial": 1, | ||||
|    "hotkeys": { | ||||
|     "equation": "Ctrl-E", | ||||
|     "itemize": "Ctrl-I" | ||||
|    }, | ||||
|    "labels_anchors": false, | ||||
|    "latex_user_defs": false, | ||||
|    "report_style_numbering": false, | ||||
|    "user_envs_cfg": false | ||||
|   } | ||||
|  }, | ||||
|  "nbformat": 4, | ||||
|  "nbformat_minor": 0 | ||||
|  "nbformat_minor": 1 | ||||
| } | ||||
|   | ||||
| @@ -18,7 +18,7 @@ | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, © 2016 Carlos A. Iglesias" | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, © Carlos A. Iglesias" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
| @@ -42,9 +42,7 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": { | ||||
|     "collapsed": true | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "## 1. Sets" | ||||
|    ] | ||||
| @@ -52,9 +50,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "my_set = set() #create a set\n", | ||||
| @@ -64,9 +60,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "my_set.add(1) # add an element\n", | ||||
| @@ -76,9 +70,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "my_set.add(2) # add another element" | ||||
| @@ -87,9 +79,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "my_set" | ||||
| @@ -98,9 +88,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "my_set.add(3) # add another one\n", | ||||
| @@ -110,9 +98,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "my_set.add(1) #try to add a repeated element\n", | ||||
| @@ -122,9 +108,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "s2 = set(range(10)) # we can create a set from a range\n", | ||||
| @@ -134,9 +118,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "l = ['a', 'a', 'b', 'c', 'c', 'c']" | ||||
| @@ -145,9 +127,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "s3 = set(l) # if we create a set from a list, elements are not repeated\n", | ||||
| @@ -157,9 +137,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "len(s3) " | ||||
| @@ -168,9 +146,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "s3.union(s2) # we can use set methods: union(), intersection(), difference(), ..." | ||||
| @@ -179,9 +155,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "3 in my_set #check membership" | ||||
| @@ -190,9 +164,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "type(s3)" | ||||
| @@ -208,9 +180,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "my_dictionary = {'key1': 1, 'key2': 2, 'key3': 3} # pairs of key-value mappings\n", | ||||
| @@ -220,9 +190,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "my_dictionary['key1'] #retrieve a value given a key" | ||||
| @@ -231,9 +199,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "my_dict = dict()\n", | ||||
| @@ -246,9 +212,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "my_dict == my_dictionary # check if both dictionaries are equal" | ||||
| @@ -257,9 +221,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "my_dict2 = {'one': {'two': {'three': 'Nested dict'}}} #nested dictionary\n", | ||||
| @@ -269,9 +231,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "my_dict2['one']['two']['three'] #access the value" | ||||
| @@ -279,9 +239,7 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "Dictionaries have different methods, check them with Tab." | ||||
|    ] | ||||
| @@ -289,9 +247,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "my_dict.keys() # in Python3 we get a View object that changes when the dictionary changes" | ||||
| @@ -300,9 +256,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "list(my_dict.keys()) # we can convert it to a list, we see dicionaries are unordered" | ||||
| @@ -311,9 +265,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "my_dict.values()" | ||||
| @@ -322,9 +274,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "list(my_dict.values())" | ||||
| @@ -333,9 +283,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "type(my_dict)" | ||||
| @@ -354,7 +302,7 @@ | ||||
|    "source": [ | ||||
|     "The notebook is freely licensed under under the [Creative Commons Attribution Share-Alike license](https://creativecommons.org/licenses/by/2.0/).  \n", | ||||
|     "\n", | ||||
|     "© 2016 Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|     "© Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|    ] | ||||
|   } | ||||
|  ], | ||||
| @@ -374,9 +322,26 @@ | ||||
|    "name": "python", | ||||
|    "nbconvert_exporter": "python", | ||||
|    "pygments_lexer": "ipython3", | ||||
|    "version": "3.5.1" | ||||
|    "version": "3.7.1" | ||||
|   }, | ||||
|   "latex_envs": { | ||||
|    "LaTeX_envs_menu_present": true, | ||||
|    "autocomplete": true, | ||||
|    "bibliofile": "biblio.bib", | ||||
|    "cite_by": "apalike", | ||||
|    "current_citInitial": 1, | ||||
|    "eqLabelWithNumbers": true, | ||||
|    "eqNumInitial": 1, | ||||
|    "hotkeys": { | ||||
|     "equation": "Ctrl-E", | ||||
|     "itemize": "Ctrl-I" | ||||
|    }, | ||||
|    "labels_anchors": false, | ||||
|    "latex_user_defs": false, | ||||
|    "report_style_numbering": false, | ||||
|    "user_envs_cfg": false | ||||
|   } | ||||
|  }, | ||||
|  "nbformat": 4, | ||||
|  "nbformat_minor": 0 | ||||
|  "nbformat_minor": 1 | ||||
| } | ||||
|   | ||||
| @@ -18,7 +18,7 @@ | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, © 2016 Carlos A. Iglesias" | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, © Carlos A. Iglesias" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
| @@ -59,31 +59,16 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": { | ||||
|     "collapsed": true | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "## 1. Conditional statements: if, elif, else" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 1, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "data": { | ||||
|       "text/plain": [ | ||||
|        "6" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 1, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "import random # import random before using it\n", | ||||
|     "x = random.randrange(1, 10) # generate a random integer between [1, 10] (both included)\n", | ||||
| @@ -93,9 +78,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# Execute several times in order the previous cell and this one\n", | ||||
| @@ -110,9 +93,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# Only one branch\n", | ||||
| @@ -125,9 +106,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# Python has no switch statement for multiple branches\n", | ||||
| @@ -158,9 +137,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# for with ranges\n", | ||||
| @@ -171,9 +148,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# for with lists\n", | ||||
| @@ -185,9 +160,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# for with tuples\n", | ||||
| @@ -199,9 +172,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# for with dictionaries\n", | ||||
| @@ -213,9 +184,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# We get only the keys. If we want the pairs we need to create a generator (we will see this later)\n", | ||||
| @@ -233,9 +202,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "x = 5\n", | ||||
| @@ -247,9 +214,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# Else is optional\n", | ||||
| @@ -261,9 +226,7 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "### 2.3. Break, continue, pass\n", | ||||
|     "\n", | ||||
| @@ -277,9 +240,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# Example find an element, else executed at the end\n", | ||||
| @@ -295,9 +256,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# Example else\n", | ||||
| @@ -313,9 +272,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# We improve above code with break\n", | ||||
| @@ -333,9 +290,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# We improve above code with break\n", | ||||
| @@ -353,9 +308,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# Print numbers from 0 to 15 which are not multiple of 3\n", | ||||
| @@ -368,9 +321,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# Find the first occurrence of an element in a list\n", | ||||
| @@ -387,9 +338,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# Example of pass, when we do not want to do anything\n", | ||||
| @@ -418,9 +367,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# Syntax: first what we want to include in the list (x) and then how to obtain x\n", | ||||
| @@ -432,9 +379,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# list  = {x² : x in {0 ... 9}}\n", | ||||
| @@ -445,9 +390,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# list  = {x² : x in {0 ... 9}, x is even}\n", | ||||
| @@ -468,7 +411,7 @@ | ||||
|    "source": [ | ||||
|     "The notebook is freely licensed under under the [Creative Commons Attribution Share-Alike license](https://creativecommons.org/licenses/by/2.0/).  \n", | ||||
|     "\n", | ||||
|     "© 2016 Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|     "© Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|    ] | ||||
|   } | ||||
|  ], | ||||
| @@ -488,9 +431,26 @@ | ||||
|    "name": "python", | ||||
|    "nbconvert_exporter": "python", | ||||
|    "pygments_lexer": "ipython3", | ||||
|    "version": "3.5.1" | ||||
|    "version": "3.7.1" | ||||
|   }, | ||||
|   "latex_envs": { | ||||
|    "LaTeX_envs_menu_present": true, | ||||
|    "autocomplete": true, | ||||
|    "bibliofile": "biblio.bib", | ||||
|    "cite_by": "apalike", | ||||
|    "current_citInitial": 1, | ||||
|    "eqLabelWithNumbers": true, | ||||
|    "eqNumInitial": 1, | ||||
|    "hotkeys": { | ||||
|     "equation": "Ctrl-E", | ||||
|     "itemize": "Ctrl-I" | ||||
|    }, | ||||
|    "labels_anchors": false, | ||||
|    "latex_user_defs": false, | ||||
|    "report_style_numbering": false, | ||||
|    "user_envs_cfg": false | ||||
|   } | ||||
|  }, | ||||
|  "nbformat": 4, | ||||
|  "nbformat_minor": 0 | ||||
|  "nbformat_minor": 1 | ||||
| } | ||||
|   | ||||
| @@ -18,7 +18,7 @@ | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, © 2016 Carlos A. Iglesias" | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, © Carlos A. Iglesias" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
| @@ -42,9 +42,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "def sum(a, b):\n", | ||||
| @@ -56,9 +54,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "#keyword parameters\n", | ||||
| @@ -69,9 +65,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "def greetings():\n", | ||||
| @@ -85,9 +79,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# We can assign a function to a variable. Fun\n", | ||||
| @@ -97,9 +89,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "type(d)" | ||||
| @@ -108,9 +98,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "type(greetings)" | ||||
| @@ -127,9 +115,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "def reverse(l):\n", | ||||
| @@ -154,9 +140,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "def sum(a, b=0):\n", | ||||
| @@ -175,9 +159,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "#variable number of arguments: *\n", | ||||
| @@ -194,9 +176,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "#Packing \n", | ||||
| @@ -209,9 +189,7 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": { | ||||
|     "collapsed": true | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "## Lambda functions\n", | ||||
|     "\n", | ||||
| @@ -221,9 +199,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "def sq(x):\n", | ||||
| @@ -264,9 +240,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "print(1, 2, 3, 4)\n", | ||||
| @@ -285,13 +259,11 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "import math\n", | ||||
|     "print('Number: {},{}'.format(1, 2)) #replaces [] inside the string by the arguments of format\n", | ||||
|     "print('Number: {},{}'.format(1, 2)) #replaces {} inside the string by the arguments of format\n", | ||||
|     "print('PI #{}#'.format(math.pi))\n", | ||||
|     "print('PI #{:5.2f}#'.format(math.pi)) # at least 5 characters with two decimals" | ||||
|    ] | ||||
| @@ -308,9 +280,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "num = input('Enter a number ')\n", | ||||
| @@ -330,7 +300,7 @@ | ||||
|    "source": [ | ||||
|     "The notebook is freely licensed under under the [Creative Commons Attribution Share-Alike license](https://creativecommons.org/licenses/by/2.0/).  \n", | ||||
|     "\n", | ||||
|     "© 2016 Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|     "© Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|    ] | ||||
|   } | ||||
|  ], | ||||
| @@ -350,9 +320,26 @@ | ||||
|    "name": "python", | ||||
|    "nbconvert_exporter": "python", | ||||
|    "pygments_lexer": "ipython3", | ||||
|    "version": "3.5.1" | ||||
|    "version": "3.7.1" | ||||
|   }, | ||||
|   "latex_envs": { | ||||
|    "LaTeX_envs_menu_present": true, | ||||
|    "autocomplete": true, | ||||
|    "bibliofile": "biblio.bib", | ||||
|    "cite_by": "apalike", | ||||
|    "current_citInitial": 1, | ||||
|    "eqLabelWithNumbers": true, | ||||
|    "eqNumInitial": 1, | ||||
|    "hotkeys": { | ||||
|     "equation": "Ctrl-E", | ||||
|     "itemize": "Ctrl-I" | ||||
|    }, | ||||
|    "labels_anchors": false, | ||||
|    "latex_user_defs": false, | ||||
|    "report_style_numbering": false, | ||||
|    "user_envs_cfg": false | ||||
|   } | ||||
|  }, | ||||
|  "nbformat": 4, | ||||
|  "nbformat_minor": 0 | ||||
|  "nbformat_minor": 1 | ||||
| } | ||||
|   | ||||
| @@ -18,7 +18,7 @@ | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, © 2016 Carlos A. Iglesias" | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, © Carlos A. Iglesias" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
| @@ -51,9 +51,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "a = 2\n", | ||||
| @@ -74,9 +72,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "type(a)" | ||||
| @@ -103,9 +99,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "a = 'd'\n", | ||||
| @@ -115,9 +109,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "a = 'd' + 3\n", | ||||
| @@ -126,18 +118,14 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": { | ||||
|     "collapsed": true | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "## 2. Mutability" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "Objects whose value can change are said to be **mutable**; objects whose value is unchangeable once they are created are called **immutable**.\n", | ||||
|     "\n", | ||||
| @@ -148,9 +136,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# Exercise mutable type\n", | ||||
| @@ -166,9 +152,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# Exercise mutable type\n", | ||||
| @@ -182,9 +166,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# Exercise mutable type\n", | ||||
| @@ -200,9 +182,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# Exercise mutable type\n", | ||||
| @@ -225,9 +205,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# Example of a local variable\n", | ||||
| @@ -246,9 +224,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# Access global variables\n", | ||||
| @@ -275,9 +251,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "NUMBER_OF_LIFES = 5\n", | ||||
| @@ -302,7 +276,7 @@ | ||||
|    "source": [ | ||||
|     "The notebook is freely licensed under under the [Creative Commons Attribution Share-Alike license](https://creativecommons.org/licenses/by/2.0/).  \n", | ||||
|     "\n", | ||||
|     "© 2016 Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|     "© Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|    ] | ||||
|   } | ||||
|  ], | ||||
| @@ -322,9 +296,26 @@ | ||||
|    "name": "python", | ||||
|    "nbconvert_exporter": "python", | ||||
|    "pygments_lexer": "ipython3", | ||||
|    "version": "3.5.1" | ||||
|    "version": "3.7.1" | ||||
|   }, | ||||
|   "latex_envs": { | ||||
|    "LaTeX_envs_menu_present": true, | ||||
|    "autocomplete": true, | ||||
|    "bibliofile": "biblio.bib", | ||||
|    "cite_by": "apalike", | ||||
|    "current_citInitial": 1, | ||||
|    "eqLabelWithNumbers": true, | ||||
|    "eqNumInitial": 1, | ||||
|    "hotkeys": { | ||||
|     "equation": "Ctrl-E", | ||||
|     "itemize": "Ctrl-I" | ||||
|    }, | ||||
|    "labels_anchors": false, | ||||
|    "latex_user_defs": false, | ||||
|    "report_style_numbering": false, | ||||
|    "user_envs_cfg": false | ||||
|   } | ||||
|  }, | ||||
|  "nbformat": 4, | ||||
|  "nbformat_minor": 0 | ||||
|  "nbformat_minor": 1 | ||||
| } | ||||
|   | ||||
| @@ -18,7 +18,7 @@ | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, © 2016 Carlos A. Iglesias" | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, © Carlos A. Iglesias" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
| @@ -46,10 +46,8 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 2, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "#Example class declaration\n", | ||||
| @@ -67,29 +65,9 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 3, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "name": "stdout", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "<__main__.TV_Set object at 0x7fec69171860> off\n" | ||||
|      ] | ||||
|     }, | ||||
|     { | ||||
|      "data": { | ||||
|       "text/plain": [ | ||||
|        "__main__.TV_Set" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 3, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "#Example object instantiation\n", | ||||
|     "\n", | ||||
| @@ -100,19 +78,9 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 4, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "name": "stdout", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "Samsung on\n" | ||||
|      ] | ||||
|     } | ||||
|    ], | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# Call on method\n", | ||||
|     "my_tv.on()\n", | ||||
| @@ -132,9 +100,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "#Example class declaration\n", | ||||
| @@ -174,9 +140,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "class Person:\n", | ||||
| @@ -192,9 +156,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# Example __str(self)__\n", | ||||
| @@ -235,9 +197,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# Now we could change the age of Pedro to a negative value\n", | ||||
| @@ -255,9 +215,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "class Person:\n", | ||||
| @@ -296,7 +254,7 @@ | ||||
|    "source": [ | ||||
|     "The notebook is freely licensed under under the [Creative Commons Attribution Share-Alike license](https://creativecommons.org/licenses/by/2.0/).  \n", | ||||
|     "\n", | ||||
|     "© 2016 Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|     "© Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|    ] | ||||
|   } | ||||
|  ], | ||||
| @@ -316,9 +274,26 @@ | ||||
|    "name": "python", | ||||
|    "nbconvert_exporter": "python", | ||||
|    "pygments_lexer": "ipython3", | ||||
|    "version": "3.5.1" | ||||
|    "version": "3.7.1" | ||||
|   }, | ||||
|   "latex_envs": { | ||||
|    "LaTeX_envs_menu_present": true, | ||||
|    "autocomplete": true, | ||||
|    "bibliofile": "biblio.bib", | ||||
|    "cite_by": "apalike", | ||||
|    "current_citInitial": 1, | ||||
|    "eqLabelWithNumbers": true, | ||||
|    "eqNumInitial": 1, | ||||
|    "hotkeys": { | ||||
|     "equation": "Ctrl-E", | ||||
|     "itemize": "Ctrl-I" | ||||
|    }, | ||||
|    "labels_anchors": false, | ||||
|    "latex_user_defs": false, | ||||
|    "report_style_numbering": false, | ||||
|    "user_envs_cfg": false | ||||
|   } | ||||
|  }, | ||||
|  "nbformat": 4, | ||||
|  "nbformat_minor": 0 | ||||
|  "nbformat_minor": 1 | ||||
| } | ||||
|   | ||||
| @@ -18,7 +18,7 @@ | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, © 2016 Carlos A. Iglesias" | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, © Carlos A. Iglesias" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
| @@ -40,9 +40,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# Example SyntaxError - missing semicolon in while\n", | ||||
| @@ -61,9 +59,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# Example TypeError - wrong use of '+' with different types\n", | ||||
| @@ -73,10 +69,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false, | ||||
|     "scrolled": true | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# Example NameError:  variable not defined\n", | ||||
| @@ -98,9 +91,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# Example\n", | ||||
| @@ -116,9 +107,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# Example with finally\n", | ||||
| @@ -135,9 +124,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# Example with else and finally\n", | ||||
| @@ -164,9 +151,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "def add(a, b):\n", | ||||
| @@ -191,7 +176,7 @@ | ||||
|    "source": [ | ||||
|     "The notebook is freely licensed under under the [Creative Commons Attribution Share-Alike license](https://creativecommons.org/licenses/by/2.0/).  \n", | ||||
|     "\n", | ||||
|     "© 2016 Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|     "© Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|    ] | ||||
|   } | ||||
|  ], | ||||
| @@ -211,9 +196,26 @@ | ||||
|    "name": "python", | ||||
|    "nbconvert_exporter": "python", | ||||
|    "pygments_lexer": "ipython3", | ||||
|    "version": "3.5.1" | ||||
|    "version": "3.7.1" | ||||
|   }, | ||||
|   "latex_envs": { | ||||
|    "LaTeX_envs_menu_present": true, | ||||
|    "autocomplete": true, | ||||
|    "bibliofile": "biblio.bib", | ||||
|    "cite_by": "apalike", | ||||
|    "current_citInitial": 1, | ||||
|    "eqLabelWithNumbers": true, | ||||
|    "eqNumInitial": 1, | ||||
|    "hotkeys": { | ||||
|     "equation": "Ctrl-E", | ||||
|     "itemize": "Ctrl-I" | ||||
|    }, | ||||
|    "labels_anchors": false, | ||||
|    "latex_user_defs": false, | ||||
|    "report_style_numbering": false, | ||||
|    "user_envs_cfg": false | ||||
|   } | ||||
|  }, | ||||
|  "nbformat": 4, | ||||
|  "nbformat_minor": 0 | ||||
|  "nbformat_minor": 1 | ||||
| } | ||||
|   | ||||
| @@ -18,7 +18,7 @@ | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": {}, | ||||
|    "source": [ | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, © 2016 Carlos A. Iglesias" | ||||
|     "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, © Carlos A. Iglesias" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
| @@ -46,9 +46,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# We can import the module plural with import, but we should use the full name\n", | ||||
| @@ -59,9 +57,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "import babel.messages.plurals\n", | ||||
| @@ -71,9 +67,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "from babel.messages import plurals # with from-import, we can use the short name\n", | ||||
| @@ -83,9 +77,7 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "from babel.messages.plurals import get_plural # now we can use directly get_plural()\n", | ||||
| @@ -116,7 +108,7 @@ | ||||
|    "source": [ | ||||
|     "The notebook is freely licensed under under the [Creative Commons Attribution Share-Alike license](https://creativecommons.org/licenses/by/2.0/).  \n", | ||||
|     "\n", | ||||
|     "© 2016 Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|     "© Carlos A. Iglesias, Universidad Politécnica de Madrid." | ||||
|    ] | ||||
|   } | ||||
|  ], | ||||
| @@ -136,9 +128,26 @@ | ||||
|    "name": "python", | ||||
|    "nbconvert_exporter": "python", | ||||
|    "pygments_lexer": "ipython3", | ||||
|    "version": "3.5.1" | ||||
|    "version": "3.7.1" | ||||
|   }, | ||||
|   "latex_envs": { | ||||
|    "LaTeX_envs_menu_present": true, | ||||
|    "autocomplete": true, | ||||
|    "bibliofile": "biblio.bib", | ||||
|    "cite_by": "apalike", | ||||
|    "current_citInitial": 1, | ||||
|    "eqLabelWithNumbers": true, | ||||
|    "eqNumInitial": 1, | ||||
|    "hotkeys": { | ||||
|     "equation": "Ctrl-E", | ||||
|     "itemize": "Ctrl-I" | ||||
|    }, | ||||
|    "labels_anchors": false, | ||||
|    "latex_user_defs": false, | ||||
|    "report_style_numbering": false, | ||||
|    "user_envs_cfg": false | ||||
|   } | ||||
|  }, | ||||
|  "nbformat": 4, | ||||
|  "nbformat_minor": 0 | ||||
|  "nbformat_minor": 1 | ||||
| } | ||||
|   | ||||
| @@ -68,23 +68,16 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "deletable": false, | ||||
|     "editable": false, | ||||
|     "nbgrader": { | ||||
|      "checksum": "6455a9642f93288f6c74b88d0892c4c7", | ||||
|      "grade": false, | ||||
|      "grade_id": "cell-d7f1ea9c021693b8", | ||||
|      "locked": true, | ||||
|      "schema_version": 1, | ||||
|      "solution": false | ||||
|     } | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# Install a pip package in the current Jupyter kernel\n", | ||||
|     "# Install a pip package in the current Jupyter kernel.\n", | ||||
|     "import sys\n", | ||||
|     "!{sys.executable} -m pip install -r requirements.txt\n" | ||||
|     "import site\n", | ||||
|     "usersite = site.getusersitepackages()\n", | ||||
|     "if usersite not in sys.path:\n", | ||||
|     "    sys.path.append(usersite)\n", | ||||
|     "!{sys.executable} -m pip install --user -r requirements.txt" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
| @@ -531,7 +524,18 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": {}, | ||||
|    "metadata": { | ||||
|     "deletable": false, | ||||
|     "editable": false, | ||||
|     "nbgrader": { | ||||
|      "checksum": "d6f1bf2230282256e5fcb85dba0eef45", | ||||
|      "grade": false, | ||||
|      "grade_id": "cell-3241bf07ae153beb", | ||||
|      "locked": true, | ||||
|      "schema_version": 1, | ||||
|      "solution": false | ||||
|     } | ||||
|    }, | ||||
|    "source": [ | ||||
|     "To make sure we are following Principles 1 and 2, we should use URIs that can be queried.\n", | ||||
|     "For the sake of this exercise, you have use the made-up `http://example/sitc/` as base for our URIs.\n", | ||||
| @@ -586,7 +590,19 @@ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "metadata": { | ||||
|     "deletable": false, | ||||
|     "editable": false, | ||||
|     "nbgrader": { | ||||
|      "checksum": "e8ba71b32e6d4f15aef9dc7fe70387fe", | ||||
|      "grade": true, | ||||
|      "grade_id": "cell-2fb6e144a6691ede", | ||||
|      "locked": true, | ||||
|      "points": 10, | ||||
|      "schema_version": 1, | ||||
|      "solution": false | ||||
|     } | ||||
|    }, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# This will check that your definition for the first exercise is correct.\n", | ||||
| @@ -595,14 +611,36 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": {}, | ||||
|    "metadata": { | ||||
|     "deletable": false, | ||||
|     "editable": false, | ||||
|     "nbgrader": { | ||||
|      "checksum": "75d90c9a83c694f61e51bd5c47a672d9", | ||||
|      "grade": false, | ||||
|      "grade_id": "cell-63a55e7b8b195d59", | ||||
|      "locked": true, | ||||
|      "schema_version": 1, | ||||
|      "solution": false | ||||
|     } | ||||
|    }, | ||||
|    "source": [ | ||||
|     "## Exercise 2: Explore existing data" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": {}, | ||||
|    "metadata": { | ||||
|     "deletable": false, | ||||
|     "editable": false, | ||||
|     "nbgrader": { | ||||
|      "checksum": "23632182da48df109721378408e57f01", | ||||
|      "grade": false, | ||||
|      "grade_id": "cell-3843c3ce98a77c56", | ||||
|      "locked": true, | ||||
|      "schema_version": 1, | ||||
|      "solution": false | ||||
|     } | ||||
|    }, | ||||
|    "source": [ | ||||
|     "The goal of this exercise is to explore and compare annotations from existing websites.\n", | ||||
|     "\n", | ||||
| @@ -618,7 +656,18 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "markdown", | ||||
|    "metadata": {}, | ||||
|    "metadata": { | ||||
|     "deletable": false, | ||||
|     "editable": false, | ||||
|     "nbgrader": { | ||||
|      "checksum": "6c4b25718f493ad5964370f412519543", | ||||
|      "grade": false, | ||||
|      "grade_id": "cell-f42c087c9065bb23", | ||||
|      "locked": true, | ||||
|      "schema_version": 1, | ||||
|      "solution": false | ||||
|     } | ||||
|    }, | ||||
|    "source": [ | ||||
|     "Let us explore some semantic annotations from popular websites.\n", | ||||
|     "\n", | ||||
|   | ||||
| @@ -1,5 +1,5 @@ | ||||
| import sys | ||||
| from future.standard_library import install_aliases | ||||
| from future.standard_library import install_aliases, print_function | ||||
| install_aliases() | ||||
|  | ||||
| from urllib import request, parse | ||||
| @@ -124,4 +124,4 @@ def check(testname): | ||||
|     definition = solution(testname) | ||||
|     if definition is None: | ||||
|         raise Exception('The definition for {} is empty or invalid.'.format(testname)) | ||||
|     return test(definition) | ||||
|     return test(definition) | ||||
|   | ||||
| @@ -1,3 +1,4 @@ | ||||
| from future.standard_library import print_function | ||||
| import operator | ||||
| import types | ||||
| import sys | ||||
| @@ -90,4 +91,4 @@ def test_example(g): | ||||
|          len(g)) | ||||
|     test('A person has been defined', | ||||
|          g.subjects(RDF.type, term.URIRef('http://xmlns.com/foaf/0.1/Person'))) | ||||
|     print('All tests passed. Well done!') | ||||
|     print('All tests passed. Well done!') | ||||
|   | ||||
		Reference in New Issue
	
	Block a user