Updated median with only numeric values

pull/6/merge
cif 2 years ago
parent a43fb4c78c
commit d99eeb733a

@ -404,7 +404,7 @@
"outputs": [],
"source": [
"#Mean Age and SibSp of passengers grouped by passenger class and sex\n",
"df.groupby(['Pclass', 'Sex'])['Age','SibSp'].mean()"
"df.groupby(['Pclass', 'Sex'])[['Age','SibSp']].mean()"
]
},
{
@ -414,7 +414,7 @@
"outputs": [],
"source": [
"#Show mean Age and SibSp for passengers older than 25 grouped by Passenger Class and Sex\n",
"df[df.Age > 25].groupby(['Pclass', 'Sex'])['Age','SibSp'].mean()"
"df[df.Age > 25].groupby(['Pclass', 'Sex'])[['Age','SibSp']].mean()"
]
},
{
@ -424,7 +424,7 @@
"outputs": [],
"source": [
"# Mean age, SibSp , Survived of passengers older than 25 which survived, grouped by Passenger Class and Sex \n",
"df[(df.Age > 25 & (df.Survived == 1))].groupby(['Pclass', 'Sex'])['Age','SibSp','Survived'].mean()"
"df[(df.Age > 25 & (df.Survived == 1))].groupby(['Pclass', 'Sex'])[['Age','SibSp','Survived']].mean()"
]
},
{
@ -436,7 +436,7 @@
"# We can also decide which function apply in each column\n",
"\n",
"#Show mean Age, mean SibSp, and number of passengers older than 25 that survived, grouped by Passenger Class and Sex\n",
"df[(df.Age > 25 & (df.Survived == 1))].groupby(['Pclass', 'Sex'])['Age','SibSp','Survived'].agg({'Age': np.mean, \n",
"df[(df.Age > 25 & (df.Survived == 1))].groupby(['Pclass', 'Sex'])[['Age','SibSp','Survived']].agg({'Age': np.mean, \n",
" 'SibSp': np.mean, 'Survived': np.sum})"
]
},
@ -600,8 +600,8 @@
"metadata": {},
"outputs": [],
"source": [
"# Fill missing values with the median\n",
"df_filled = df.fillna(df.median())\n",
"# Fill missing values with the median, we avoid empty (None) values with numeric_only\n",
"df_filled = df.fillna(df.median(numeric_only=True))\n",
"df_filled[-5:]"
]
},
@ -685,7 +685,7 @@
"metadata": {},
"outputs": [],
"source": [
"# But we are working on a copy \n",
"# But we are working on a copy, so we get a warning\n",
"df.iloc[889]['Sex'] = np.nan"
]
},
@ -695,7 +695,7 @@
"metadata": {},
"outputs": [],
"source": [
"# If we want to change, we should not chain selections\n",
"# If we want to change it, we should not chain selections\n",
"# The selection can be done with the column name\n",
"df.loc[889, 'Sex']"
]
@ -932,11 +932,11 @@
"metadata": {},
"source": [
"* [Pandas](http://pandas.pydata.org/)\n",
"* [Learning Pandas, Michael Heydt, Packt Publishing, 2015](http://proquest.safaribooksonline.com/book/programming/python/9781783985128)\n",
"* [Useful Pandas Snippets](https://gist.github.com/bsweger/e5817488d161f37dcbd2)\n",
"* [Pandas. Introduction to Data Structures](http://pandas.pydata.org/pandas-docs/stable/dsintro.html#dsintro)\n",
"* [Learning Pandas, Michael Heydt, Packt Publishing, 2017](https://learning.oreilly.com/library/view/learning-pandas/9781787123137/)\n",
"* [Pandas. Introduction to Data Structures](https://pandas.pydata.org/pandas-docs/stable/user_guide/dsintro.html)\n",
"* [Introducing Pandas Objects](https://www.oreilly.com/learning/introducing-pandas-objects)\n",
"* [Boolean Operators in Pandas](http://pandas.pydata.org/pandas-docs/stable/indexing.html#boolean-operators)"
"* [Boolean Operators in Pandas](https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#boolean-operators)\n",
"* [Useful Pandas Snippets](https://gist.github.com/bsweger/e5817488d161f37dcbd2)"
]
},
{
@ -958,7 +958,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
@ -972,7 +972,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.1"
"version": "3.8.12"
},
"latex_envs": {
"LaTeX_envs_menu_present": true,

Loading…
Cancel
Save