Remove outputs and metadata

dveni-patch-1
J. Fernando Sánchez 5 years ago
parent a1be167cc0
commit c1d3ca38ea

@ -14,6 +14,12 @@ Also note that we have a code of conduct, please follow it in all your interacti
2. If you are adding code, ensure the changed notebooks can be run in a fresh environment. Include instructions to download 2. If you are adding code, ensure the changed notebooks can be run in a fresh environment. Include instructions to download
any additional dependencies. any additional dependencies.
3. Ensure any spurious changes are removed, such as compilation files (`pyc`) or metadata changes in a notebook. 3. Ensure any spurious changes are removed, such as compilation files (`pyc`) or metadata changes in a notebook.
You can automatically do so using nbstripout:
```
pip install nbstripout
nbstripout --install
```
This will install a git hook that strips all metadata from the notebooks before you commit changes to git.
4. Submit your pull request on GitHub. 4. Submit your pull request on GitHub.
5. A member of the GSI-UPM group will review your request. 5. A member of the GSI-UPM group will review your request.
6. The reviewer may ask for further changes before merging the contribution. Please, follow the reviewer's instructions before resubmitting. 6. The reviewer may ask for further changes before merging the contribution. Please, follow the reviewer's instructions before resubmitting.

File diff suppressed because it is too large Load Diff

@ -84,25 +84,9 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 1, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false "outputs": [],
},
"outputs": [
{
"data": {
"text/plain": [
"0 5\n",
"1 10\n",
"2 15\n",
"dtype: int64"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"import numpy as np\n", "import numpy as np\n",
"import pandas as pd\n", "import pandas as pd\n",
@ -124,25 +108,9 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 2, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false "outputs": [],
},
"outputs": [
{
"data": {
"text/plain": [
"a 5\n",
"b 10\n",
"c 15\n",
"dtype: int64"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"d = {'a': 5, 'b': 10, 'c': 15}\n", "d = {'a': 5, 'b': 10, 'c': 15}\n",
"s = Series(d)\n", "s = Series(d)\n",
@ -151,22 +119,9 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 3, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false "outputs": [],
},
"outputs": [
{
"data": {
"text/plain": [
"Index(['a', 'b', 'c'], dtype='object')"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"# We can get the list of indexes\n", "# We can get the list of indexes\n",
"s.index" "s.index"
@ -174,22 +129,9 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 4, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false "outputs": [],
},
"outputs": [
{
"data": {
"text/plain": [
"array([ 5, 10, 15])"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"# and the values\n", "# and the values\n",
"s.values" "s.values"
@ -204,28 +146,9 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 5, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false "outputs": [],
},
"outputs": [
{
"data": {
"text/plain": [
"Madrid 3141991\n",
"Barcelona 1604555\n",
"Valencia 786189\n",
"Sevilla 693878\n",
"Zaragoza 664953\n",
"Malaga 569130\n",
"dtype: int64"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"# Series with population in 2015 of more populated cities in Spain\n", "# Series with population in 2015 of more populated cities in Spain\n",
"s = Series([3141991, 1604555, 786189, 693878, 664953, 569130], index=['Madrid', 'Barcelona', 'Valencia', 'Sevilla', \n", "s = Series([3141991, 1604555, 786189, 693878, 664953, 569130], index=['Madrid', 'Barcelona', 'Valencia', 'Sevilla', \n",
@ -235,22 +158,9 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 6, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false "outputs": [],
},
"outputs": [
{
"data": {
"text/plain": [
"3141991"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"# Population of Madrid\n", "# Population of Madrid\n",
"s['Madrid']" "s['Madrid']"
@ -272,28 +182,9 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 7, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false "outputs": [],
},
"outputs": [
{
"data": {
"text/plain": [
"Madrid True\n",
"Barcelona True\n",
"Valencia False\n",
"Sevilla False\n",
"Zaragoza False\n",
"Malaga False\n",
"dtype: bool"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"#Boolean condition\n", "#Boolean condition\n",
"s > 1000000" "s > 1000000"
@ -301,24 +192,9 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 8, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false "outputs": [],
},
"outputs": [
{
"data": {
"text/plain": [
"Madrid 3141991\n",
"Barcelona 1604555\n",
"dtype: int64"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"# Cities with population greater than 1.000.000\n", "# Cities with population greater than 1.000.000\n",
"s[s > 1000000]" "s[s > 1000000]"
@ -333,24 +209,9 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 9, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false "outputs": [],
},
"outputs": [
{
"data": {
"text/plain": [
"Madrid 3141991\n",
"Barcelona 1604555\n",
"dtype: int64"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"# Cities with population greater than the mean\n", "# Cities with population greater than the mean\n",
"s[s > s.mean()]" "s[s > s.mean()]"
@ -358,25 +219,9 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 10, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false "outputs": [],
},
"outputs": [
{
"data": {
"text/plain": [
"Madrid 3141991\n",
"Barcelona 1604555\n",
"Valencia 786189\n",
"dtype: int64"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"# Cities with population greater than the median\n", "# Cities with population greater than the median\n",
"s[s > s.median()]" "s[s > s.median()]"
@ -384,28 +229,9 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 11, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false "outputs": [],
},
"outputs": [
{
"data": {
"text/plain": [
"Madrid True\n",
"Barcelona True\n",
"Valencia True\n",
"Sevilla False\n",
"Zaragoza False\n",
"Malaga False\n",
"dtype: bool"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"# Check cities with a population greater than 700.000\n", "# Check cities with a population greater than 700.000\n",
"s > 700000" "s > 700000"
@ -413,25 +239,9 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 12, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false "outputs": [],
},
"outputs": [
{
"data": {
"text/plain": [
"Madrid 3141991\n",
"Barcelona 1604555\n",
"Valencia 786189\n",
"dtype: int64"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"# List cities with a population greater than 700.000\n", "# List cities with a population greater than 700.000\n",
"s[s > 700000]" "s[s > 700000]"
@ -439,28 +249,9 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 13, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false "outputs": [],
},
"outputs": [
{
"data": {
"text/plain": [
"Madrid True\n",
"Barcelona True\n",
"Valencia True\n",
"Sevilla False\n",
"Zaragoza False\n",
"Malaga False\n",
"dtype: bool"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"#Another way to write the same boolean indexing selection\n", "#Another way to write the same boolean indexing selection\n",
"bigger_than_700000 = s > 700000\n", "bigger_than_700000 = s > 700000\n",
@ -469,25 +260,9 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 14, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false "outputs": [],
},
"outputs": [
{
"data": {
"text/plain": [
"Madrid 3141991\n",
"Barcelona 1604555\n",
"Valencia 786189\n",
"dtype: int64"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"#Cities with population > 700000\n", "#Cities with population > 700000\n",
"s[bigger_than_700000]" "s[bigger_than_700000]"
@ -509,28 +284,9 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 15, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false "outputs": [],
},
"outputs": [
{
"data": {
"text/plain": [
"Madrid 1570995.5\n",
"Barcelona 802277.5\n",
"Valencia 393094.5\n",
"Sevilla 346939.0\n",
"Zaragoza 332476.5\n",
"Malaga 284565.0\n",
"dtype: float64"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"# Divide population by 2\n", "# Divide population by 2\n",
"s / 2" "s / 2"
@ -538,22 +294,9 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 16, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false "outputs": [],
},
"outputs": [
{
"data": {
"text/plain": [
"1243449.3333333333"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"# Get the average population\n", "# Get the average population\n",
"s.mean()" "s.mean()"
@ -561,22 +304,9 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 17, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false "outputs": [],
},
"outputs": [
{
"data": {
"text/plain": [
"3141991"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"# Get the highest population\n", "# Get the highest population\n",
"s.max()" "s.max()"
@ -598,28 +328,9 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 18, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false "outputs": [],
},
"outputs": [
{
"data": {
"text/plain": [
"Madrid 3320000\n",
"Barcelona 1604555\n",
"Valencia 786189\n",
"Sevilla 693878\n",
"Zaragoza 664953\n",
"Malaga 569130\n",
"dtype: int64"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"# Change population of one city\n", "# Change population of one city\n",
"s['Madrid'] = 3320000\n", "s['Madrid'] = 3320000\n",
@ -628,28 +339,9 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 19, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false "outputs": [],
},
"outputs": [
{
"data": {
"text/plain": [
"Madrid 3652000.0\n",
"Barcelona 1765010.5\n",
"Valencia 864807.9\n",
"Sevilla 693878.0\n",
"Zaragoza 664953.0\n",
"Malaga 569130.0\n",
"dtype: float64"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"# Increase by 10% cities with population greater than 700000\n", "# Increase by 10% cities with population greater than 700000\n",
"s[s > 700000] = 1.1 * s[s > 700000]\n", "s[s > 700000] = 1.1 * s[s > 700000]\n",
@ -672,61 +364,9 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 20, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false "outputs": [],
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>one</th>\n",
" <th>two</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>a</th>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>b</th>\n",
" <td>2.0</td>\n",
" <td>2.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>c</th>\n",
" <td>3.0</td>\n",
" <td>3.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>d</th>\n",
" <td>NaN</td>\n",
" <td>4.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" one two\n",
"a 1.0 1.0\n",
"b 2.0 2.0\n",
"c 3.0 3.0\n",
"d NaN 4.0"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"# We are going to create a DataFrame from a dict of Series\n", "# We are going to create a DataFrame from a dict of Series\n",
"d = {'one' : pd.Series([1., 2., 3.], index=['a', 'b', 'c']),\n", "d = {'one' : pd.Series([1., 2., 3.], index=['a', 'b', 'c']),\n",
@ -748,55 +388,9 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 21, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false "outputs": [],
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>one</th>\n",
" <th>two</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>d</th>\n",
" <td>NaN</td>\n",
" <td>4.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>b</th>\n",
" <td>2.0</td>\n",
" <td>2.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>a</th>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" one two\n",
"d NaN 4.0\n",
"b 2.0 2.0\n",
"a 1.0 1.0"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"# We can filter\n", "# We can filter\n",
"df = DataFrame(d, index=['d', 'b', 'a'])\n", "df = DataFrame(d, index=['d', 'b', 'a'])\n",
@ -812,55 +406,9 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 22, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false "outputs": [],
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>two</th>\n",
" <th>three</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>d</th>\n",
" <td>4.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>b</th>\n",
" <td>2.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>a</th>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" two three\n",
"d 4.0 NaN\n",
"b 2.0 NaN\n",
"a 1.0 NaN"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"df = DataFrame(d, index=['d', 'b', 'a'], columns=['two', 'three'])\n", "df = DataFrame(d, index=['d', 'b', 'a'], columns=['two', 'three'])\n",
"df" "df"

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

@ -46,10 +46,8 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 2, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": true
},
"outputs": [], "outputs": [],
"source": [ "source": [
"import pandas as pd\n", "import pandas as pd\n",
@ -82,9 +80,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [] "source": []
}, },
@ -105,9 +101,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": true
},
"outputs": [], "outputs": [],
"source": [] "source": []
}, },
@ -121,9 +115,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": true
},
"outputs": [], "outputs": [],
"source": [] "source": []
}, },
@ -137,9 +129,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": true
},
"outputs": [], "outputs": [],
"source": [] "source": []
}, },
@ -153,17 +143,13 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": true
},
"outputs": [], "outputs": [],
"source": [] "source": []
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": { "metadata": {},
"collapsed": true
},
"source": [ "source": [
"How many passsengers have survived? List them grouped by Sex and Pclass.\n", "How many passsengers have survived? List them grouped by Sex and Pclass.\n",
"\n", "\n",
@ -173,17 +159,13 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": true
},
"outputs": [], "outputs": [],
"source": [] "source": []
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": { "metadata": {},
"collapsed": false
},
"source": [ "source": [
"Visualise df_1 as an histogram." "Visualise df_1 as an histogram."
] ]
@ -191,17 +173,13 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": true
},
"outputs": [], "outputs": [],
"source": [] "source": []
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": { "metadata": {},
"collapsed": true
},
"source": [ "source": [
"# Feature Engineering" "# Feature Engineering"
] ]
@ -232,9 +210,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"df['FamilySize'] = df['SibSp'] + df['Parch']\n", "df['FamilySize'] = df['SibSp'] + df['Parch']\n",
@ -258,9 +234,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"df['Alone'] = (df.FamilySize == 0)\n", "df['Alone'] = (df.FamilySize == 0)\n",
@ -284,9 +258,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"#Taken from http://www.analyticsvidhya.com/blog/2014/09/data-munging-python-using-pandas-baby-steps-python/\n", "#Taken from http://www.analyticsvidhya.com/blog/2014/09/data-munging-python-using-pandas-baby-steps-python/\n",
@ -307,9 +279,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"df['Salutation'].unique()" "df['Salutation'].unique()"
@ -318,9 +288,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"df.groupby(['Salutation']).size()" "df.groupby(['Salutation']).size()"
@ -336,9 +304,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"def group_salutation(old_salutation):\n", "def group_salutation(old_salutation):\n",
@ -362,9 +328,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"# Distribution\n", "# Distribution\n",
@ -375,9 +339,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"df.boxplot(column='Age', by = 'Salutation', sym='k.')" "df.boxplot(column='Age', by = 'Salutation', sym='k.')"
@ -393,9 +355,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"# Specific features for Children and Female since there are more survivors\n", "# Specific features for Children and Female since there are more survivors\n",
@ -413,9 +373,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": true
},
"outputs": [], "outputs": [],
"source": [ "source": [
"# Group ages to simplify machine learning algorithms. 0: 0-5, 1: 6-10, 2: 11-15, 3: 16-59 and 4: 60-80\n", "# Group ages to simplify machine learning algorithms. 0: 0-5, 1: 6-10, 2: 11-15, 3: 16-59 and 4: 60-80\n",
@ -437,10 +395,8 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 8, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"def substrings_in_string(big_string, substrings):\n", "def substrings_in_string(big_string, substrings):\n",
@ -475,9 +431,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"df['FarePerPerson']= df['Fare'] / (df['FamilySize'] + 1)" "df['FarePerPerson']= df['Fare'] / (df['FamilySize'] + 1)"
@ -500,9 +454,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": true
},
"outputs": [], "outputs": [],
"source": [ "source": [
"df['AgeClass']=df['Age']*df['Pclass']" "df['AgeClass']=df['Age']*df['Pclass']"

File diff suppressed because one or more lines are too long

@ -19,11 +19,10 @@ samples.
import numpy as np import numpy as np
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from sklearn import cross_validation
from sklearn.naive_bayes import GaussianNB from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC from sklearn.svm import SVC
from sklearn.datasets import load_digits from sklearn.datasets import load_digits
from sklearn.learning_curve import learning_curve from sklearn.model_selection import learning_curve
def plot_learning_curve(estimator, title, X, y, ylim=None, cv=None, def plot_learning_curve(estimator, title, X, y, ylim=None, cv=None,
@ -53,7 +52,7 @@ def plot_learning_curve(estimator, title, X, y, ylim=None, cv=None,
cv : integer, cross-validation generator, optional cv : integer, cross-validation generator, optional
If an integer is passed, it is the number of folds (defaults to 3). If an integer is passed, it is the number of folds (defaults to 3).
Specific cross-validation objects can be passed, see Specific cross-validation objects can be passed, see
sklearn.cross_validation module for the list of possible objects sklearn.model_selection module for the list of possible objects
n_jobs : integer, optional n_jobs : integer, optional
Number of jobs to run in parallel (default 1). Number of jobs to run in parallel (default 1).

@ -72,9 +72,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"scrolled": true
},
"outputs": [], "outputs": [],
"source": [ "source": [
"import random\n", "import random\n",

@ -68,9 +68,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"review = \"\"\"I purchased this monitor because of budgetary concerns. This item was the most inexpensive 17 inch monitor \n", "review = \"\"\"I purchased this monitor because of budgetary concerns. This item was the most inexpensive 17 inch monitor \n",
@ -111,9 +109,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"import nltk\n", "import nltk\n",
@ -171,9 +167,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"from nltk.tokenize import sent_tokenize, word_tokenize\n", "from nltk.tokenize import sent_tokenize, word_tokenize\n",
@ -199,10 +193,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false,
"scrolled": true
},
"outputs": [], "outputs": [],
"source": [ "source": [
"words = [word_tokenize(t) for t in sent_tokenize(review)]\n", "words = [word_tokenize(t) for t in sent_tokenize(review)]\n",
@ -219,9 +210,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"words = word_tokenize(review)\n", "words = word_tokenize(review)\n",
@ -239,9 +228,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"from nltk.tokenize import TweetTokenizer\n", "from nltk.tokenize import TweetTokenizer\n",
@ -268,9 +255,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"from nltk.stem import PorterStemmer, LancasterStemmer, WordNetLemmatizer\n", "from nltk.stem import PorterStemmer, LancasterStemmer, WordNetLemmatizer\n",
@ -304,9 +289,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": { "metadata": {},
"collapsed": true
},
"source": [ "source": [
"As we can see, we get the forms *are* and *is* instead of *be*. This is because we have not introduce the Part-Of-Speech (POS), and the default POS is 'n' (name).\n", "As we can see, we get the forms *are* and *is* instead of *be*. This is because we have not introduce the Part-Of-Speech (POS), and the default POS is 'n' (name).\n",
"\n", "\n",
@ -316,9 +299,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"verbs = \"are crying is have has\"\n", "verbs = \"are crying is have has\"\n",
@ -327,9 +308,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": { "metadata": {},
"collapsed": true
},
"source": [ "source": [
"Depending of the application, we can select stemmers or lemmatizers. \n", "Depending of the application, we can select stemmers or lemmatizers. \n",
"\n", "\n",
@ -341,9 +320,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"def preprocess(words, type='doc'):\n", "def preprocess(words, type='doc'):\n",
@ -376,9 +353,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"from nltk.corpus import stopwords\n", "from nltk.corpus import stopwords\n",
@ -390,9 +365,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"def preprocess(words, type='doc'):\n", "def preprocess(words, type='doc'):\n",
@ -428,9 +401,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"import string\n", "import string\n",
@ -474,9 +445,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"frec = nltk.FreqDist(nltk.word_tokenize(review))\n", "frec = nltk.FreqDist(nltk.word_tokenize(review))\n",

@ -62,9 +62,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"review = \"\"\"I purchased this Dell monitor because of budgetary concerns. This item was the most inexpensive 17 inch Apple monitor \n", "review = \"\"\"I purchased this Dell monitor because of budgetary concerns. This item was the most inexpensive 17 inch Apple monitor \n",
@ -110,9 +108,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"from nltk import pos_tag, word_tokenize\n", "from nltk import pos_tag, word_tokenize\n",
@ -129,9 +125,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"print (pos_tag(word_tokenize(review)))" "print (pos_tag(word_tokenize(review)))"
@ -147,9 +141,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"import nltk\n", "import nltk\n",
@ -166,9 +158,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"from nltk.stem import WordNetLemmatizer\n", "from nltk.stem import WordNetLemmatizer\n",
@ -199,9 +189,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"from nltk import ne_chunk, pos_tag, word_tokenize\n", "from nltk import ne_chunk, pos_tag, word_tokenize\n",
@ -246,9 +234,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"from nltk.app import srparser_app\n", "from nltk.app import srparser_app\n",
@ -265,9 +251,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": true
},
"outputs": [], "outputs": [],
"source": [ "source": [
"from nltk.app import rdparser_app\n", "from nltk.app import rdparser_app\n",
@ -288,9 +272,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"from nltk.chunk.regexp import *\n", "from nltk.chunk.regexp import *\n",
@ -316,9 +298,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"def extractTrees(parsed_tree, category='NP'):\n", "def extractTrees(parsed_tree, category='NP'):\n",
@ -330,9 +310,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"def extractStrings(parsed_tree, category='NP'):\n", "def extractStrings(parsed_tree, category='NP'):\n",

@ -60,9 +60,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"doc1 = 'Summer is coming but Summer is short'\n", "doc1 = 'Summer is coming but Summer is short'\n",
@ -73,9 +71,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": { "metadata": {},
"collapsed": true
},
"source": [ "source": [
"# Tools" "# Tools"
] ]
@ -110,9 +106,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"from sklearn.feature_extraction.text import CountVectorizer\n", "from sklearn.feature_extraction.text import CountVectorizer\n",
@ -123,9 +117,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": { "metadata": {},
"collapsed": true
},
"source": [ "source": [
"As we can see, [CountVectorizer](http://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.CountVectorizer.html#sklearn.feature_extraction.text.CountVectorizer) comes with many options. We can define many configuration options, such as the maximum or minimum frequency of a term (*min_fd*, *max_df*), maximum number of features (*max_features*), if we analyze words or characters (*analyzer*), or if the output is binary or not (*binary*). *CountVectorizer* also allows us to include if we want to preprocess the input (*preprocessor*) before tokenizing it (*tokenizer*) and exclude stop words (*stop_words*).\n", "As we can see, [CountVectorizer](http://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.CountVectorizer.html#sklearn.feature_extraction.text.CountVectorizer) comes with many options. We can define many configuration options, such as the maximum or minimum frequency of a term (*min_fd*, *max_df*), maximum number of features (*max_features*), if we analyze words or characters (*analyzer*), or if the output is binary or not (*binary*). *CountVectorizer* also allows us to include if we want to preprocess the input (*preprocessor*) before tokenizing it (*tokenizer*) and exclude stop words (*stop_words*).\n",
"\n", "\n",
@ -137,9 +129,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"vectors = vectorizer.fit_transform(documents)\n", "vectors = vectorizer.fit_transform(documents)\n",
@ -148,9 +138,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": { "metadata": {},
"collapsed": true
},
"source": [ "source": [
"We see the vectors are stored as a sparse matrix of 3x6 dimensions.\n", "We see the vectors are stored as a sparse matrix of 3x6 dimensions.\n",
"We can print the matrix as well as the feature names." "We can print the matrix as well as the feature names."
@ -159,9 +147,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"print(vectors.toarray())\n", "print(vectors.toarray())\n",
@ -170,9 +156,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": { "metadata": {},
"collapsed": true
},
"source": [ "source": [
"As you can see, the pronoun 'I' has been removed because of the default token_pattern. \n", "As you can see, the pronoun 'I' has been removed because of the default token_pattern. \n",
"We can change this as follows." "We can change this as follows."
@ -181,9 +165,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"vectorizer = CountVectorizer(analyzer=\"word\", stop_words=None, token_pattern='(?u)\\\\b\\\\w+\\\\b') \n", "vectorizer = CountVectorizer(analyzer=\"word\", stop_words=None, token_pattern='(?u)\\\\b\\\\w+\\\\b') \n",
@ -201,9 +183,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"vectorizer = CountVectorizer(analyzer=\"word\", stop_words='english', token_pattern='(?u)\\\\b\\\\w+\\\\b') \n", "vectorizer = CountVectorizer(analyzer=\"word\", stop_words='english', token_pattern='(?u)\\\\b\\\\w+\\\\b') \n",
@ -214,9 +194,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"#stop words in scikit-learn for English\n", "#stop words in scikit-learn for English\n",
@ -226,9 +204,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"# Vectors\n", "# Vectors\n",
@ -246,9 +222,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"from scipy.spatial.distance import cosine\n", "from scipy.spatial.distance import cosine\n",
@ -275,9 +249,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"vectorizer = CountVectorizer(analyzer=\"word\", stop_words='english', binary=True) \n", "vectorizer = CountVectorizer(analyzer=\"word\", stop_words='english', binary=True) \n",
@ -288,9 +260,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"vectors.toarray()" "vectors.toarray()"
@ -313,9 +283,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"vectorizer = CountVectorizer(analyzer=\"word\", stop_words='english', ngram_range=[2,2]) \n", "vectorizer = CountVectorizer(analyzer=\"word\", stop_words='english', ngram_range=[2,2]) \n",
@ -326,9 +294,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"vectors.toarray()" "vectors.toarray()"
@ -351,9 +317,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"from sklearn.feature_extraction.text import TfidfVectorizer\n", "from sklearn.feature_extraction.text import TfidfVectorizer\n",
@ -366,9 +330,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"vectors.toarray()" "vectors.toarray()"
@ -384,9 +346,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"train = [doc1, doc2, doc3]\n", "train = [doc1, doc2, doc3]\n",
@ -400,10 +360,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false,
"scrolled": true
},
"outputs": [], "outputs": [],
"source": [ "source": [
"vectors.toarray()" "vectors.toarray()"
@ -419,9 +376,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"from sklearn.metrics.pairwise import cosine_similarity\n", "from sklearn.metrics.pairwise import cosine_similarity\n",
@ -445,9 +400,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"from sklearn.metrics.pairwise import linear_kernel\n", "from sklearn.metrics.pairwise import linear_kernel\n",

@ -74,19 +74,9 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 1, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false "outputs": [],
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['alt.atheism', 'comp.graphics', 'comp.os.ms-windows.misc', 'comp.sys.ibm.pc.hardware', 'comp.sys.mac.hardware', 'comp.windows.x', 'misc.forsale', 'rec.autos', 'rec.motorcycles', 'rec.sport.baseball', 'rec.sport.hockey', 'sci.crypt', 'sci.electronics', 'sci.med', 'sci.space', 'soc.religion.christian', 'talk.politics.guns', 'talk.politics.mideast', 'talk.politics.misc', 'talk.religion.misc']\n"
]
}
],
"source": [ "source": [
"from sklearn.datasets import fetch_20newsgroups\n", "from sklearn.datasets import fetch_20newsgroups\n",
"\n", "\n",
@ -100,19 +90,9 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 2, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false "outputs": [],
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"20\n"
]
}
],
"source": [ "source": [
"#Number of categories\n", "#Number of categories\n",
"print(len(newsgroups_train.target_names))" "print(len(newsgroups_train.target_names))"
@ -120,28 +100,9 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 3, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false "outputs": [],
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Category id 4 comp.sys.mac.hardware\n",
"Doc A fair number of brave souls who upgraded their SI clock oscillator have\n",
"shared their experiences for this poll. Please send a brief message detailing\n",
"your experiences with the procedure. Top speed attained, CPU rated speed,\n",
"add on cards and adapters, heat sinks, hour of usage per day, floppy disk\n",
"functionality with 800 and 1.4 m floppies are especially requested.\n",
"\n",
"I will be summarizing in the next two days, so please add to the network\n",
"knowledge base if you have done the clock upgrade and haven't answered this\n",
"poll. Thanks.\n"
]
}
],
"source": [ "source": [
"# Show a document\n", "# Show a document\n",
"docid = 1\n", "docid = 1\n",
@ -154,22 +115,9 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 4, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false "outputs": [],
},
"outputs": [
{
"data": {
"text/plain": [
"(11314,)"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"#Number of files\n", "#Number of files\n",
"newsgroups_train.filenames.shape" "newsgroups_train.filenames.shape"
@ -177,30 +125,9 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 5, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false "outputs": [],
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/cif/anaconda3/lib/python3.5/site-packages/numpy/core/fromnumeric.py:2652: VisibleDeprecationWarning: `rank` is deprecated; use the `ndim` attribute or function instead. To find the rank of a matrix see `numpy.linalg.matrix_rank`.\n",
" VisibleDeprecationWarning)\n"
]
},
{
"data": {
"text/plain": [
"(11314, 101323)"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"# Obtain a vector\n", "# Obtain a vector\n",
"\n", "\n",
@ -214,22 +141,9 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 6, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false "outputs": [],
},
"outputs": [
{
"data": {
"text/plain": [
"66.80510871486653"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"# The tf-idf vectors are very sparse with an average of 66 non zero components in 101.323 dimensions (.06%)\n", "# The tf-idf vectors are very sparse with an average of 66 non zero components in 101.323 dimensions (.06%)\n",
"vectors_train.nnz / float(vectors_train.shape[0])" "vectors_train.nnz / float(vectors_train.shape[0])"
@ -251,30 +165,9 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 7, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false "outputs": [],
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/cif/anaconda3/lib/python3.5/site-packages/numpy/core/fromnumeric.py:2652: VisibleDeprecationWarning: `rank` is deprecated; use the `ndim` attribute or function instead. To find the rank of a matrix see `numpy.linalg.matrix_rank`.\n",
" VisibleDeprecationWarning)\n"
]
},
{
"data": {
"text/plain": [
"0.69545360719001303"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"from sklearn.naive_bayes import MultinomialNB\n", "from sklearn.naive_bayes import MultinomialNB\n",
"\n", "\n",
@ -302,20 +195,9 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 8, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false "outputs": [],
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"dimensionality: 101323\n",
"density: 1.000000\n"
]
}
],
"source": [ "source": [
"from sklearn.utils.extmath import density\n", "from sklearn.utils.extmath import density\n",
"\n", "\n",
@ -325,38 +207,9 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 9, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false "outputs": [],
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"alt.atheism: islam atheists say just religion atheism think don people god\n",
"comp.graphics: looking format 3d know program file files thanks image graphics\n",
"comp.os.ms-windows.misc: card problem thanks driver drivers use files dos file windows\n",
"comp.sys.ibm.pc.hardware: monitor disk thanks pc ide controller bus card scsi drive\n",
"comp.sys.mac.hardware: know monitor does quadra simms thanks problem drive apple mac\n",
"comp.windows.x: using windows x11r5 use application thanks widget server motif window\n",
"misc.forsale: asking email sell price condition new shipping offer 00 sale\n",
"rec.autos: don ford new good dealer just engine like cars car\n",
"rec.motorcycles: don just helmet riding like motorcycle ride bikes dod bike\n",
"rec.sport.baseball: braves players pitching hit runs games game baseball team year\n",
"rec.sport.hockey: league year nhl games season players play hockey team game\n",
"sci.crypt: people use escrow nsa keys government chip clipper encryption key\n",
"sci.electronics: don thanks voltage used know does like circuit power use\n",
"sci.med: skepticism cadre dsl banks chastity n3jxp pitt gordon geb msg\n",
"sci.space: just lunar earth shuttle like moon launch orbit nasa space\n",
"soc.religion.christian: believe faith christian christ bible people christians church jesus god\n",
"talk.politics.guns: just law firearms government fbi don weapons people guns gun\n",
"talk.politics.mideast: said arabs arab turkish people armenians armenian jews israeli israel\n",
"talk.politics.misc: know state clinton president just think tax don government people\n",
"talk.religion.misc: think don koresh objective christians bible people christian jesus god\n"
]
}
],
"source": [ "source": [
"# We can review the top features per topic in Bayes (attribute coef_)\n", "# We can review the top features per topic in Bayes (attribute coef_)\n",
"import numpy as np\n", "import numpy as np\n",
@ -373,28 +226,9 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 10, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false "outputs": [],
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[ 2 15]\n",
"['comp.os.ms-windows.misc', 'soc.religion.christian']\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/cif/anaconda3/lib/python3.5/site-packages/numpy/core/fromnumeric.py:2652: VisibleDeprecationWarning: `rank` is deprecated; use the `ndim` attribute or function instead. To find the rank of a matrix see `numpy.linalg.matrix_rank`.\n",
" VisibleDeprecationWarning)\n"
]
}
],
"source": [ "source": [
"# We try the classifier in two new docs\n", "# We try the classifier in two new docs\n",
"\n", "\n",

@ -77,9 +77,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"from sklearn.datasets import fetch_20newsgroups\n", "from sklearn.datasets import fetch_20newsgroups\n",
@ -123,9 +121,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"from gensim import matutils\n", "from gensim import matutils\n",
@ -152,10 +148,8 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 3, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"from gensim.models.ldamodel import LdaModel\n", "from gensim.models.ldamodel import LdaModel\n",
@ -169,9 +163,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"# check the topics\n", "# check the topics\n",
@ -188,9 +180,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"# import the gensim.corpora module to generate dictionary\n", "# import the gensim.corpora module to generate dictionary\n",
@ -222,9 +212,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"# You can save the dictionary\n", "# You can save the dictionary\n",
@ -236,9 +224,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"# Generate a list of docs, where each doc is a list of words\n", "# Generate a list of docs, where each doc is a list of words\n",
@ -249,9 +235,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"# import the gensim.corpora module to generate dictionary\n", "# import the gensim.corpora module to generate dictionary\n",
@ -263,9 +247,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": true
},
"outputs": [], "outputs": [],
"source": [ "source": [
"# You can optionally save the dictionary \n", "# You can optionally save the dictionary \n",
@ -277,9 +259,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"# We can print the dictionary, it is a mappying of id and tokens\n", "# We can print the dictionary, it is a mappying of id and tokens\n",
@ -290,9 +270,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": true
},
"outputs": [], "outputs": [],
"source": [ "source": [
"# construct the corpus representing each document as a bag-of-words (bow) vector\n", "# construct the corpus representing each document as a bag-of-words (bow) vector\n",
@ -302,9 +280,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"from gensim.models import TfidfModel\n", "from gensim.models import TfidfModel\n",
@ -317,9 +293,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"#print tf-idf of first document\n", "#print tf-idf of first document\n",
@ -329,9 +303,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"from gensim.models.ldamodel import LdaModel\n", "from gensim.models.ldamodel import LdaModel\n",
@ -344,9 +316,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"# check the topics\n", "# check the topics\n",
@ -356,9 +326,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"# check the lsa vector for the first document\n", "# check the lsa vector for the first document\n",
@ -369,9 +337,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"#predict topics of a new doc\n", "#predict topics of a new doc\n",
@ -384,9 +350,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"#transform into LDA space\n", "#transform into LDA space\n",
@ -397,9 +361,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"# print the document's single most prominent LDA topic\n", "# print the document's single most prominent LDA topic\n",
@ -409,9 +371,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"lda_vector_tfidf = lda_model[tfidf_model[bow_vector]]\n", "lda_vector_tfidf = lda_model[tfidf_model[bow_vector]]\n",
@ -430,9 +390,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"from gensim.models.lsimodel import LsiModel\n", "from gensim.models.lsimodel import LsiModel\n",
@ -448,9 +406,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"# check the topics\n", "# check the topics\n",
@ -460,9 +416,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"# check the lsi vector for the first document\n", "# check the lsi vector for the first document\n",

@ -123,183 +123,9 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 1, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false "outputs": [],
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>essay_id</th>\n",
" <th>essay_set</th>\n",
" <th>essay</th>\n",
" <th>rater1_domain1</th>\n",
" <th>rater2_domain1</th>\n",
" <th>rater3_domain1</th>\n",
" <th>domain1_score</th>\n",
" <th>rater1_domain2</th>\n",
" <th>rater2_domain2</th>\n",
" <th>domain2_score</th>\n",
" <th>...</th>\n",
" <th>rater2_trait3</th>\n",
" <th>rater2_trait4</th>\n",
" <th>rater2_trait5</th>\n",
" <th>rater2_trait6</th>\n",
" <th>rater3_trait1</th>\n",
" <th>rater3_trait2</th>\n",
" <th>rater3_trait3</th>\n",
" <th>rater3_trait4</th>\n",
" <th>rater3_trait5</th>\n",
" <th>rater3_trait6</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Dear local newspaper, I think effects computer...</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" <td>NaN</td>\n",
" <td>8</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>Dear @CAPS1 @CAPS2, I believe that using compu...</td>\n",
" <td>5</td>\n",
" <td>4</td>\n",
" <td>NaN</td>\n",
" <td>9</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>Dear, @CAPS1 @CAPS2 @CAPS3 More and more peopl...</td>\n",
" <td>4</td>\n",
" <td>3</td>\n",
" <td>NaN</td>\n",
" <td>7</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>Dear Local Newspaper, @CAPS1 I have found that...</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>NaN</td>\n",
" <td>10</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>4 rows × 28 columns</p>\n",
"</div>"
],
"text/plain": [
" essay_id essay_set essay \\\n",
"0 1 1 Dear local newspaper, I think effects computer... \n",
"1 2 1 Dear @CAPS1 @CAPS2, I believe that using compu... \n",
"2 3 1 Dear, @CAPS1 @CAPS2 @CAPS3 More and more peopl... \n",
"3 4 1 Dear Local Newspaper, @CAPS1 I have found that... \n",
"\n",
" rater1_domain1 rater2_domain1 rater3_domain1 domain1_score \\\n",
"0 4 4 NaN 8 \n",
"1 5 4 NaN 9 \n",
"2 4 3 NaN 7 \n",
"3 5 5 NaN 10 \n",
"\n",
" rater1_domain2 rater2_domain2 domain2_score ... \\\n",
"0 NaN NaN NaN ... \n",
"1 NaN NaN NaN ... \n",
"2 NaN NaN NaN ... \n",
"3 NaN NaN NaN ... \n",
"\n",
" rater2_trait3 rater2_trait4 rater2_trait5 rater2_trait6 rater3_trait1 \\\n",
"0 NaN NaN NaN NaN NaN \n",
"1 NaN NaN NaN NaN NaN \n",
"2 NaN NaN NaN NaN NaN \n",
"3 NaN NaN NaN NaN NaN \n",
"\n",
" rater3_trait2 rater3_trait3 rater3_trait4 rater3_trait5 rater3_trait6 \n",
"0 NaN NaN NaN NaN NaN \n",
"1 NaN NaN NaN NaN NaN \n",
"2 NaN NaN NaN NaN NaN \n",
"3 NaN NaN NaN NaN NaN \n",
"\n",
"[4 rows x 28 columns]"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"import pandas as pd\n", "import pandas as pd\n",
"\n", "\n",
@ -311,44 +137,18 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 2, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false "outputs": [],
},
"outputs": [
{
"data": {
"text/plain": [
"(12976, 28)"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"df_orig.shape" "df_orig.shape"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 3, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false "outputs": [],
},
"outputs": [
{
"data": {
"text/plain": [
"(1783, 3)"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"# We filter the data of the essay_set number 1, and we keep only two columns for this \n", "# We filter the data of the essay_set number 1, and we keep only two columns for this \n",
"# example\n", "# example\n",
@ -359,83 +159,17 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 4, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false "outputs": [],
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>essay_id</th>\n",
" <th>essay</th>\n",
" <th>domain1_score</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>Dear local newspaper, I think effects computer...</td>\n",
" <td>8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>Dear @CAPS1 @CAPS2, I believe that using compu...</td>\n",
" <td>9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>Dear, @CAPS1 @CAPS2 @CAPS3 More and more peopl...</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>Dear Local Newspaper, @CAPS1 I have found that...</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>Dear @LOCATION1, I know having computers has a...</td>\n",
" <td>8</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" essay_id essay domain1_score\n",
"0 1 Dear local newspaper, I think effects computer... 8\n",
"1 2 Dear @CAPS1 @CAPS2, I believe that using compu... 9\n",
"2 3 Dear, @CAPS1 @CAPS2 @CAPS3 More and more peopl... 7\n",
"3 4 Dear Local Newspaper, @CAPS1 I have found that... 10\n",
"4 5 Dear @LOCATION1, I know having computers has a... 8"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"df[0:5]" "df[0:5]"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 5, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": true
},
"outputs": [], "outputs": [],
"source": [ "source": [
"# Define X and Y\n", "# Define X and Y\n",
@ -468,10 +202,8 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 6, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"# Generic Transformer \n", "# Generic Transformer \n",
@ -509,10 +241,8 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 7, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": true
},
"outputs": [], "outputs": [],
"source": [ "source": [
"# Sample of statistics using nltk\n", "# Sample of statistics using nltk\n",
@ -541,10 +271,8 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 8, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"from sklearn.base import BaseEstimator, TransformerMixin\n", "from sklearn.base import BaseEstimator, TransformerMixin\n",
@ -581,10 +309,8 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 11, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"from sklearn.base import BaseEstimator, TransformerMixin\n", "from sklearn.base import BaseEstimator, TransformerMixin\n",
@ -635,10 +361,8 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 10, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"from sklearn.pipeline import Pipeline, FeatureUnion\n", "from sklearn.pipeline import Pipeline, FeatureUnion\n",
@ -674,23 +398,12 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 37, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false "outputs": [],
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Scores in every iteration [ 0.39798206 0.27497194]\n",
"Accuracy: 0.34 (+/- 0.12)\n"
]
}
],
"source": [ "source": [
"from sklearn.naive_bayes import MultinomialNB\n", "from sklearn.naive_bayes import MultinomialNB\n",
"from sklearn.cross_validation import cross_val_score, KFold\n", "from sklearn.model_selection import cross_val_score, KFold\n",
"from sklearn.metrics import classification_report\n", "from sklearn.metrics import classification_report\n",
"from sklearn.feature_extraction import DictVectorizer\n", "from sklearn.feature_extraction import DictVectorizer\n",
"from sklearn.preprocessing import FunctionTransformer\n", "from sklearn.preprocessing import FunctionTransformer\n",
@ -726,7 +439,7 @@
"\n", "\n",
"# Using KFold validation\n", "# Using KFold validation\n",
"\n", "\n",
"cv = KFold(X.shape[0], 2, shuffle=True, random_state=33)\n", "cv = KFold(2, shuffle=True, random_state=33)\n",
"scores = cross_val_score(pipeline, X, y, cv=cv)\n", "scores = cross_val_score(pipeline, X, y, cv=cv)\n",
"print(\"Scores in every iteration\", scores)\n", "print(\"Scores in every iteration\", scores)\n",
"print(\"Accuracy: %0.2f (+/- %0.2f)\" % (scores.mean(), scores.std() * 2))" "print(\"Accuracy: %0.2f (+/- %0.2f)\" % (scores.mean(), scores.std() * 2))"
@ -734,9 +447,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": { "metadata": {},
"collapsed": true
},
"source": [ "source": [
"The result is not very good :(." "The result is not very good :(."
] ]
@ -789,9 +500,9 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.5.1" "version": "3.6.7"
} }
}, },
"nbformat": 4, "nbformat": 4,
"nbformat_minor": 0 "nbformat_minor": 1
} }

@ -117,9 +117,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": { "metadata": {},
"collapsed": true
},
"source": [ "source": [
"Example: we use Jupyter as a calculator, let's execute 2+2" "Example: we use Jupyter as a calculator, let's execute 2+2"
] ]
@ -140,20 +138,9 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 2, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{
"data": {
"text/plain": [
"4"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"2+2" "2+2"
] ]

@ -39,31 +39,16 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": { "metadata": {},
"collapsed": true
},
"source": [ "source": [
"## 1. Booleans" "## 1. Booleans"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 1, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false "outputs": [],
},
"outputs": [
{
"data": {
"text/plain": [
"False"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"True and False # operations with booleans" "True and False # operations with booleans"
] ]
@ -71,9 +56,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"not True" "not True"
@ -82,9 +65,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"True or False" "True or False"
@ -111,9 +92,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"2 + 2 # 2 plus 2 (integers)" "2 + 2 # 2 plus 2 (integers)"
@ -122,9 +101,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"2.0 * 3.0 # 2.0 times 3.0 (floats)" "2.0 * 3.0 # 2.0 times 3.0 (floats)"
@ -133,9 +110,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"2.0 ** 4.0 # 2.0 to the power of 4 (float)" "2.0 ** 4.0 # 2.0 to the power of 4 (float)"
@ -144,9 +119,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"(3 + 4j) + (5 + 5j) #add two complex numbers" "(3 + 4j) + (5 + 5j) #add two complex numbers"
@ -155,9 +128,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"10 / 3 # classic division" "10 / 3 # classic division"
@ -166,9 +137,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"10 // 3 # floor division" "10 // 3 # floor division"
@ -177,9 +146,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"10 % 3 # remainder" "10 % 3 # remainder"
@ -188,9 +155,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"10e158*17e158 #overflow shown as 'inf', infinitive" "10e158*17e158 #overflow shown as 'inf', infinitive"
@ -199,9 +164,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"type(10)" "type(10)"
@ -210,9 +173,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"type(2 + 3j)" "type(2 + 3j)"
@ -221,9 +182,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"type(2.1)" "type(2.1)"
@ -232,9 +191,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"type(2E3)" "type(2E3)"
@ -249,9 +206,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": { "metadata": {},
"collapsed": true
},
"source": [ "source": [
"Strings are **immutable sequences** of Unicode code points.\n", "Strings are **immutable sequences** of Unicode code points.\n",
"\n", "\n",
@ -261,9 +216,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"\"This is a string\"" "\"This is a string\""
@ -272,9 +225,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"'This is also a string'" "'This is also a string'"
@ -283,9 +234,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"\"This is a string containing single quotes 'hi'\"" "\"This is a string containing single quotes 'hi'\""
@ -294,9 +243,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"'This is string containing double quotes \"hi\"'" "'This is string containing double quotes \"hi\"'"
@ -305,9 +252,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"'''This is \n", "'''This is \n",
@ -328,9 +273,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"\"String with special characters: \\n newline, \\a beep and \\\\ slash\"" "\"String with special characters: \\n newline, \\a beep and \\\\ slash\""
@ -339,9 +282,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"\"concatenate \" + \"two strings\" #use of '+' for concatenating two strings" "\"concatenate \" + \"two strings\" #use of '+' for concatenating two strings"
@ -350,9 +291,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"len('hola') # length of a string" "len('hola') # length of a string"
@ -361,9 +300,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"type(\"hola\")" "type(\"hola\")"
@ -379,9 +316,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": true
},
"outputs": [], "outputs": [],
"source": [ "source": [
"s = \"hola\" # assign the string value \"hola\" to the variable s" "s = \"hola\" # assign the string value \"hola\" to the variable s"
@ -390,9 +325,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"s # get the value of s" "s # get the value of s"
@ -401,9 +334,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"s[0]" "s[0]"
@ -412,9 +343,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"s[1]" "s[1]"
@ -423,9 +352,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"s[3]" "s[3]"
@ -434,9 +361,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"s [-1] # we can start from the beginning (index 0, 1, 2, ...) or from the last position (-1, -2, ...)" "s [-1] # we can start from the beginning (index 0, 1, 2, ...) or from the last position (-1, -2, ...)"
@ -452,9 +377,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"s[0:2] #slice [0,2)" "s[0:2] #slice [0,2)"
@ -463,9 +386,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"s[:2] #slice [0,2)" "s[:2] #slice [0,2)"
@ -474,9 +395,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"s[:] #slice [0, len(s)]" "s[:] #slice [0, len(s)]"
@ -485,9 +404,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"s[:-2]" "s[:-2]"
@ -496,9 +413,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"s[-4:-2]" "s[-4:-2]"
@ -518,9 +433,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": true
},
"outputs": [], "outputs": [],
"source": [ "source": [
"se = \"This is a string\"" "se = \"This is a string\""
@ -529,9 +442,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"se[::1] # moves from 0 to len, and the index is incremented by 1" "se[::1] # moves from 0 to len, and the index is incremented by 1"
@ -540,9 +451,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"se[0:14:2] #take the even indexed characters from 0 to 14" "se[0:14:2] #take the even indexed characters from 0 to 14"
@ -551,9 +460,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"se[::-1] #reverse the string" "se[::-1] #reverse the string"
@ -562,9 +469,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"se[:4:-1]" "se[:4:-1]"
@ -580,9 +485,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": true
},
"outputs": [], "outputs": [],
"source": [ "source": [
"a = 'b'" "a = 'b'"
@ -591,9 +494,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"se + \" plus \" + se + \" plus \"+ a*3" "se + \" plus \" + se + \" plus \"+ a*3"
@ -611,9 +512,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"s.lower()" "s.lower()"
@ -622,9 +521,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"s.upper()" "s.upper()"
@ -633,9 +530,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"s.split('o') # splits String " "s.split('o') # splits String "
@ -660,9 +555,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"\"hohoho\".split('h')" "\"hohoho\".split('h')"
@ -671,9 +564,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"type(\"hohoho\".split('h'))" "type(\"hohoho\".split('h'))"

@ -42,9 +42,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": { "metadata": {},
"collapsed": true
},
"source": [ "source": [
"## 1. Lists" "## 1. Lists"
] ]
@ -52,9 +50,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"l = [1, 2, 3, 4, 5, 6]" "l = [1, 2, 3, 4, 5, 6]"
@ -63,9 +59,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"l" "l"
@ -74,9 +68,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"l[0:3] # we can use slicing in sequence types" "l[0:3] # we can use slicing in sequence types"
@ -85,9 +77,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": true
},
"outputs": [], "outputs": [],
"source": [ "source": [
"other_list = [1, 0.0, \"hola\"] #lists can have elements of different types" "other_list = [1, 0.0, \"hola\"] #lists can have elements of different types"
@ -96,9 +86,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"other_list" "other_list"
@ -107,9 +95,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"l + other_list # we can add lists (append)" "l + other_list # we can add lists (append)"
@ -118,9 +104,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"l * 3 # we can add n times a list" "l * 3 # we can add n times a list"
@ -129,9 +113,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"len(l) # length of a list (as Strings)" "len(l) # length of a list (as Strings)"
@ -140,9 +122,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": true
},
"outputs": [], "outputs": [],
"source": [ "source": [
"l.append(7) #append at the end of the list. Check help with Shift-tab, and methods with tab" "l.append(7) #append at the end of the list. Check help with Shift-tab, and methods with tab"
@ -151,9 +131,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"l" "l"
@ -162,9 +140,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"l.pop() # remove last element" "l.pop() # remove last element"
@ -173,9 +149,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"l" "l"
@ -184,9 +158,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"l.pop(2) # remove element at index 2" "l.pop(2) # remove element at index 2"
@ -195,9 +167,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"l" "l"
@ -206,18 +176,14 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": true
},
"outputs": [], "outputs": [],
"source": [] "source": []
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"l.insert(2,3) # insert at index 2 the value 3" "l.insert(2,3) # insert at index 2 the value 3"
@ -226,9 +192,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"l" "l"
@ -237,9 +201,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": true
},
"outputs": [], "outputs": [],
"source": [ "source": [
"l.reverse()" "l.reverse()"
@ -248,9 +210,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"l" "l"
@ -259,9 +219,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": true
},
"outputs": [], "outputs": [],
"source": [ "source": [
"l.sort()" "l.sort()"
@ -270,9 +228,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"l" "l"
@ -281,9 +237,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": true
},
"outputs": [], "outputs": [],
"source": [ "source": [
"l.remove(3) # remove first ocurrence of 3 from l. Remember: remove (element) vs pop(index)" "l.remove(3) # remove first ocurrence of 3 from l. Remember: remove (element) vs pop(index)"
@ -292,9 +246,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"l" "l"
@ -303,9 +255,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"l[0] = 0 # lists are mutable" "l[0] = 0 # lists are mutable"
@ -314,9 +264,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"l" "l"
@ -325,9 +273,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"2 in l # check if an element is in a list" "2 in l # check if an element is in a list"
@ -336,9 +282,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"7 in l # check if an element is in a list " "7 in l # check if an element is in a list "
@ -347,9 +291,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"4 not in l # check if an element is not in a list" "4 not in l # check if an element is not in a list"
@ -358,9 +300,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"l.index(4) # search for an item" "l.index(4) # search for an item"
@ -369,9 +309,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"l.index(-1) # search for an item, error since it is not in the list" "l.index(-1) # search for an item, error since it is not in the list"
@ -380,9 +318,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"matrix = [[1,2], [3,4]] # matrix" "matrix = [[1,2], [3,4]] # matrix"
@ -391,9 +327,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"matrix" "matrix"
@ -402,9 +336,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"matrix[0][0]" "matrix[0][0]"
@ -413,9 +345,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"matrix[0][1]" "matrix[0][1]"
@ -424,9 +354,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"type(matrix)" "type(matrix)"
@ -455,9 +383,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"tuple = ('a', 1)" "tuple = ('a', 1)"
@ -466,9 +392,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"tuple" "tuple"
@ -476,9 +400,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": { "metadata": {},
"collapsed": false
},
"source": [ "source": [
"Tuples implement all the common [sequence operators](https://docs.python.org/3/library/stdtypes.html#typesseq-common), such as slicing, concatenation, len, etc." "Tuples implement all the common [sequence operators](https://docs.python.org/3/library/stdtypes.html#typesseq-common), such as slicing, concatenation, len, etc."
] ]
@ -486,9 +408,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"tuple[::-1]" "tuple[::-1]"
@ -497,9 +417,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"len(tuple)" "len(tuple)"
@ -508,9 +426,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"tuple * 2 + ('b', 'c', 2.1, True)" "tuple * 2 + ('b', 'c', 2.1, True)"
@ -519,9 +435,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"tuple[1]" "tuple[1]"
@ -530,9 +444,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"tuple[1] = 2 # Error, tuples are inmutable" "tuple[1] = 2 # Error, tuples are inmutable"
@ -541,9 +453,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"type(tuple)" "type(tuple)"
@ -558,9 +468,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": { "metadata": {},
"collapsed": true
},
"source": [ "source": [
"A [range](https://docs.python.org/3/library/stdtypes.html#range) represents an immutable sequence of numbers. Ranges are created with two constructors: *range(stop)* or *range(start, stop, [step])*. \n", "A [range](https://docs.python.org/3/library/stdtypes.html#range) represents an immutable sequence of numbers. Ranges are created with two constructors: *range(stop)* or *range(start, stop, [step])*. \n",
"\n", "\n",
@ -569,10 +477,8 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 1, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"r = range(10)" "r = range(10)"
@ -580,66 +486,27 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 2, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false "outputs": [],
},
"outputs": [
{
"data": {
"text/plain": [
"range(0, 10)"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"r" "r"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 3, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false "outputs": [],
},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"5 in r # check if a number is in a range" "5 in r # check if a number is in a range"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 4, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false "outputs": [],
},
"outputs": [
{
"data": {
"text/plain": [
"2"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"r[2] # Get a value" "r[2] # Get a value"
] ]
@ -647,9 +514,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"type(r)" "type(r)"
@ -658,9 +523,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"list(range(10))" "list(range(10))"
@ -669,9 +532,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"list(range(1,10,2))" "list(range(1,10,2))"

@ -42,9 +42,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": { "metadata": {},
"collapsed": true
},
"source": [ "source": [
"## 1. Sets" "## 1. Sets"
] ]
@ -52,9 +50,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"my_set = set() #create a set\n", "my_set = set() #create a set\n",
@ -64,9 +60,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"my_set.add(1) # add an element\n", "my_set.add(1) # add an element\n",
@ -76,9 +70,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"my_set.add(2) # add another element" "my_set.add(2) # add another element"
@ -87,9 +79,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"my_set" "my_set"
@ -98,9 +88,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"my_set.add(3) # add another one\n", "my_set.add(3) # add another one\n",
@ -110,9 +98,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"my_set.add(1) #try to add a repeated element\n", "my_set.add(1) #try to add a repeated element\n",
@ -122,9 +108,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"s2 = set(range(10)) # we can create a set from a range\n", "s2 = set(range(10)) # we can create a set from a range\n",
@ -134,9 +118,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"l = ['a', 'a', 'b', 'c', 'c', 'c']" "l = ['a', 'a', 'b', 'c', 'c', 'c']"
@ -145,9 +127,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"s3 = set(l) # if we create a set from a list, elements are not repeated\n", "s3 = set(l) # if we create a set from a list, elements are not repeated\n",
@ -157,9 +137,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"len(s3) " "len(s3) "
@ -168,9 +146,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"s3.union(s2) # we can use set methods: union(), intersection(), difference(), ..." "s3.union(s2) # we can use set methods: union(), intersection(), difference(), ..."
@ -179,9 +155,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"3 in my_set #check membership" "3 in my_set #check membership"
@ -190,9 +164,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"type(s3)" "type(s3)"
@ -208,9 +180,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"my_dictionary = {'key1': 1, 'key2': 2, 'key3': 3} # pairs of key-value mappings\n", "my_dictionary = {'key1': 1, 'key2': 2, 'key3': 3} # pairs of key-value mappings\n",
@ -220,9 +190,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"my_dictionary['key1'] #retrieve a value given a key" "my_dictionary['key1'] #retrieve a value given a key"
@ -231,9 +199,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"my_dict = dict()\n", "my_dict = dict()\n",
@ -246,9 +212,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"my_dict == my_dictionary # check if both dictionaries are equal" "my_dict == my_dictionary # check if both dictionaries are equal"
@ -257,9 +221,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"my_dict2 = {'one': {'two': {'three': 'Nested dict'}}} #nested dictionary\n", "my_dict2 = {'one': {'two': {'three': 'Nested dict'}}} #nested dictionary\n",
@ -269,9 +231,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"my_dict2['one']['two']['three'] #access the value" "my_dict2['one']['two']['three'] #access the value"
@ -279,9 +239,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": { "metadata": {},
"collapsed": false
},
"source": [ "source": [
"Dictionaries have different methods, check them with Tab." "Dictionaries have different methods, check them with Tab."
] ]
@ -289,9 +247,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"my_dict.keys() # in Python3 we get a View object that changes when the dictionary changes" "my_dict.keys() # in Python3 we get a View object that changes when the dictionary changes"
@ -300,9 +256,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"list(my_dict.keys()) # we can convert it to a list, we see dicionaries are unordered" "list(my_dict.keys()) # we can convert it to a list, we see dicionaries are unordered"
@ -311,9 +265,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"my_dict.values()" "my_dict.values()"
@ -322,9 +274,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"list(my_dict.values())" "list(my_dict.values())"
@ -333,9 +283,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"type(my_dict)" "type(my_dict)"

@ -59,31 +59,16 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": { "metadata": {},
"collapsed": true
},
"source": [ "source": [
"## 1. Conditional statements: if, elif, else" "## 1. Conditional statements: if, elif, else"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 1, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false "outputs": [],
},
"outputs": [
{
"data": {
"text/plain": [
"6"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"import random # import random before using it\n", "import random # import random before using it\n",
"x = random.randrange(1, 10) # generate a random integer between [1, 10] (both included)\n", "x = random.randrange(1, 10) # generate a random integer between [1, 10] (both included)\n",
@ -93,9 +78,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"# Execute several times in order the previous cell and this one\n", "# Execute several times in order the previous cell and this one\n",
@ -110,9 +93,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"# Only one branch\n", "# Only one branch\n",
@ -125,9 +106,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"# Python has no switch statement for multiple branches\n", "# Python has no switch statement for multiple branches\n",
@ -158,9 +137,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"# for with ranges\n", "# for with ranges\n",
@ -171,9 +148,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"# for with lists\n", "# for with lists\n",
@ -185,9 +160,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"# for with tuples\n", "# for with tuples\n",
@ -199,9 +172,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"# for with dictionaries\n", "# for with dictionaries\n",
@ -213,9 +184,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"# We get only the keys. If we want the pairs we need to create a generator (we will see this later)\n", "# We get only the keys. If we want the pairs we need to create a generator (we will see this later)\n",
@ -233,9 +202,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"x = 5\n", "x = 5\n",
@ -247,9 +214,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"# Else is optional\n", "# Else is optional\n",
@ -261,9 +226,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": { "metadata": {},
"collapsed": false
},
"source": [ "source": [
"### 2.3. Break, continue, pass\n", "### 2.3. Break, continue, pass\n",
"\n", "\n",
@ -277,9 +240,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"# Example find an element, else executed at the end\n", "# Example find an element, else executed at the end\n",
@ -295,9 +256,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"# Example else\n", "# Example else\n",
@ -313,9 +272,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"# We improve above code with break\n", "# We improve above code with break\n",
@ -333,9 +290,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"# We improve above code with break\n", "# We improve above code with break\n",
@ -353,9 +308,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"# Print numbers from 0 to 15 which are not multiple of 3\n", "# Print numbers from 0 to 15 which are not multiple of 3\n",
@ -368,9 +321,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"# Find the first occurrence of an element in a list\n", "# Find the first occurrence of an element in a list\n",
@ -387,9 +338,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"# Example of pass, when we do not want to do anything\n", "# Example of pass, when we do not want to do anything\n",
@ -418,9 +367,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"# Syntax: first what we want to include in the list (x) and then how to obtain x\n", "# Syntax: first what we want to include in the list (x) and then how to obtain x\n",
@ -432,9 +379,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"# list = {x² : x in {0 ... 9}}\n", "# list = {x² : x in {0 ... 9}}\n",
@ -445,9 +390,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"# list = {x² : x in {0 ... 9}, x is even}\n", "# list = {x² : x in {0 ... 9}, x is even}\n",

@ -42,9 +42,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"def sum(a, b):\n", "def sum(a, b):\n",
@ -56,9 +54,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"#keyword parameters\n", "#keyword parameters\n",
@ -69,9 +65,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"def greetings():\n", "def greetings():\n",
@ -85,9 +79,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"# We can assign a function to a variable. Fun\n", "# We can assign a function to a variable. Fun\n",
@ -97,9 +89,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"type(d)" "type(d)"
@ -108,9 +98,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"type(greetings)" "type(greetings)"
@ -127,9 +115,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"def reverse(l):\n", "def reverse(l):\n",
@ -154,9 +140,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"def sum(a, b=0):\n", "def sum(a, b=0):\n",
@ -175,9 +159,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"#variable number of arguments: *\n", "#variable number of arguments: *\n",
@ -194,9 +176,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"#Packing \n", "#Packing \n",
@ -209,9 +189,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": { "metadata": {},
"collapsed": true
},
"source": [ "source": [
"## Lambda functions\n", "## Lambda functions\n",
"\n", "\n",
@ -221,9 +199,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"def sq(x):\n", "def sq(x):\n",
@ -264,9 +240,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"print(1, 2, 3, 4)\n", "print(1, 2, 3, 4)\n",
@ -285,9 +259,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"import math\n", "import math\n",
@ -308,9 +280,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"num = input('Enter a number ')\n", "num = input('Enter a number ')\n",

@ -51,9 +51,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"a = 2\n", "a = 2\n",
@ -74,9 +72,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"type(a)" "type(a)"
@ -103,9 +99,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"a = 'd'\n", "a = 'd'\n",
@ -115,9 +109,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"a = 'd' + 3\n", "a = 'd' + 3\n",
@ -126,18 +118,14 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": { "metadata": {},
"collapsed": true
},
"source": [ "source": [
"## 2. Mutability" "## 2. Mutability"
] ]
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": { "metadata": {},
"collapsed": false
},
"source": [ "source": [
"Objects whose value can change are said to be **mutable**; objects whose value is unchangeable once they are created are called **immutable**.\n", "Objects whose value can change are said to be **mutable**; objects whose value is unchangeable once they are created are called **immutable**.\n",
"\n", "\n",
@ -148,9 +136,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"# Exercise mutable type\n", "# Exercise mutable type\n",
@ -166,9 +152,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"# Exercise mutable type\n", "# Exercise mutable type\n",
@ -182,9 +166,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"# Exercise mutable type\n", "# Exercise mutable type\n",
@ -200,9 +182,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"# Exercise mutable type\n", "# Exercise mutable type\n",
@ -225,9 +205,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"# Example of a local variable\n", "# Example of a local variable\n",
@ -246,9 +224,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"# Access global variables\n", "# Access global variables\n",
@ -275,9 +251,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"NUMBER_OF_LIFES = 5\n", "NUMBER_OF_LIFES = 5\n",
@ -322,9 +296,9 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.5.1" "version": "3.6.7"
} }
}, },
"nbformat": 4, "nbformat": 4,
"nbformat_minor": 0 "nbformat_minor": 1
} }

@ -46,10 +46,8 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 2, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"#Example class declaration\n", "#Example class declaration\n",
@ -67,29 +65,9 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 3, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false "outputs": [],
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<__main__.TV_Set object at 0x7fec69171860> off\n"
]
},
{
"data": {
"text/plain": [
"__main__.TV_Set"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"#Example object instantiation\n", "#Example object instantiation\n",
"\n", "\n",
@ -100,19 +78,9 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 4, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false "outputs": [],
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Samsung on\n"
]
}
],
"source": [ "source": [
"# Call on method\n", "# Call on method\n",
"my_tv.on()\n", "my_tv.on()\n",
@ -132,9 +100,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"#Example class declaration\n", "#Example class declaration\n",
@ -174,9 +140,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"class Person:\n", "class Person:\n",
@ -192,9 +156,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"# Example __str(self)__\n", "# Example __str(self)__\n",
@ -235,9 +197,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"# Now we could change the age of Pedro to a negative value\n", "# Now we could change the age of Pedro to a negative value\n",
@ -255,9 +215,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"class Person:\n", "class Person:\n",

@ -40,9 +40,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"# Example SyntaxError - missing semicolon in while\n", "# Example SyntaxError - missing semicolon in while\n",
@ -61,9 +59,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"# Example TypeError - wrong use of '+' with different types\n", "# Example TypeError - wrong use of '+' with different types\n",
@ -73,10 +69,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false,
"scrolled": true
},
"outputs": [], "outputs": [],
"source": [ "source": [
"# Example NameError: variable not defined\n", "# Example NameError: variable not defined\n",
@ -98,9 +91,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"# Example\n", "# Example\n",
@ -116,9 +107,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"# Example with finally\n", "# Example with finally\n",
@ -135,9 +124,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"# Example with else and finally\n", "# Example with else and finally\n",
@ -164,9 +151,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"def add(a, b):\n", "def add(a, b):\n",

@ -46,9 +46,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"# We can import the module plural with import, but we should use the full name\n", "# We can import the module plural with import, but we should use the full name\n",
@ -59,9 +57,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"import babel.messages.plurals\n", "import babel.messages.plurals\n",
@ -71,9 +67,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"from babel.messages import plurals # with from-import, we can use the short name\n", "from babel.messages import plurals # with from-import, we can use the short name\n",
@ -83,9 +77,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"from babel.messages.plurals import get_plural # now we can use directly get_plural()\n", "from babel.messages.plurals import get_plural # now we can use directly get_plural()\n",

Loading…
Cancel
Save