diff --git a/ml2/3_3_Data_Munging_with_Pandas.ipynb b/ml2/3_3_Data_Munging_with_Pandas.ipynb index 16ee2b1..5467e85 100644 --- a/ml2/3_3_Data_Munging_with_Pandas.ipynb +++ b/ml2/3_3_Data_Munging_with_Pandas.ipynb @@ -404,7 +404,7 @@ "outputs": [], "source": [ "#Mean Age and SibSp of passengers grouped by passenger class and sex\n", - "df.groupby(['Pclass', 'Sex'])['Age','SibSp'].mean()" + "df.groupby(['Pclass', 'Sex'])[['Age','SibSp']].mean()" ] }, { @@ -414,7 +414,7 @@ "outputs": [], "source": [ "#Show mean Age and SibSp for passengers older than 25 grouped by Passenger Class and Sex\n", - "df[df.Age > 25].groupby(['Pclass', 'Sex'])['Age','SibSp'].mean()" + "df[df.Age > 25].groupby(['Pclass', 'Sex'])[['Age','SibSp']].mean()" ] }, { @@ -424,7 +424,7 @@ "outputs": [], "source": [ "# Mean age, SibSp , Survived of passengers older than 25 which survived, grouped by Passenger Class and Sex \n", - "df[(df.Age > 25 & (df.Survived == 1))].groupby(['Pclass', 'Sex'])['Age','SibSp','Survived'].mean()" + "df[(df.Age > 25 & (df.Survived == 1))].groupby(['Pclass', 'Sex'])[['Age','SibSp','Survived']].mean()" ] }, { @@ -436,7 +436,7 @@ "# We can also decide which function apply in each column\n", "\n", "#Show mean Age, mean SibSp, and number of passengers older than 25 that survived, grouped by Passenger Class and Sex\n", - "df[(df.Age > 25 & (df.Survived == 1))].groupby(['Pclass', 'Sex'])['Age','SibSp','Survived'].agg({'Age': np.mean, \n", + "df[(df.Age > 25 & (df.Survived == 1))].groupby(['Pclass', 'Sex'])[['Age','SibSp','Survived']].agg({'Age': np.mean, \n", " 'SibSp': np.mean, 'Survived': np.sum})" ] }, @@ -600,8 +600,8 @@ "metadata": {}, "outputs": [], "source": [ - "# Fill missing values with the median\n", - "df_filled = df.fillna(df.median())\n", + "# Fill missing values with the median, we avoid empty (None) values with numeric_only\n", + "df_filled = df.fillna(df.median(numeric_only=True))\n", "df_filled[-5:]" ] }, @@ -685,7 +685,7 @@ "metadata": {}, "outputs": [], "source": [ - "# But we are working on a copy \n", + "# But we are working on a copy, so we get a warning\n", "df.iloc[889]['Sex'] = np.nan" ] }, @@ -695,7 +695,7 @@ "metadata": {}, "outputs": [], "source": [ - "# If we want to change, we should not chain selections\n", + "# If we want to change it, we should not chain selections\n", "# The selection can be done with the column name\n", "df.loc[889, 'Sex']" ] @@ -932,11 +932,11 @@ "metadata": {}, "source": [ "* [Pandas](http://pandas.pydata.org/)\n", - "* [Learning Pandas, Michael Heydt, Packt Publishing, 2015](http://proquest.safaribooksonline.com/book/programming/python/9781783985128)\n", - "* [Useful Pandas Snippets](https://gist.github.com/bsweger/e5817488d161f37dcbd2)\n", - "* [Pandas. Introduction to Data Structures](http://pandas.pydata.org/pandas-docs/stable/dsintro.html#dsintro)\n", + "* [Learning Pandas, Michael Heydt, Packt Publishing, 2017](https://learning.oreilly.com/library/view/learning-pandas/9781787123137/)\n", + "* [Pandas. Introduction to Data Structures](https://pandas.pydata.org/pandas-docs/stable/user_guide/dsintro.html)\n", "* [Introducing Pandas Objects](https://www.oreilly.com/learning/introducing-pandas-objects)\n", - "* [Boolean Operators in Pandas](http://pandas.pydata.org/pandas-docs/stable/indexing.html#boolean-operators)" + "* [Boolean Operators in Pandas](https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#boolean-operators)\n", + "* [Useful Pandas Snippets](https://gist.github.com/bsweger/e5817488d161f37dcbd2)" ] }, { @@ -958,7 +958,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -972,7 +972,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.1" + "version": "3.8.12" }, "latex_envs": { "LaTeX_envs_menu_present": true,