1
0
mirror of https://github.com/gsi-upm/sitc synced 2024-11-14 10:32:29 +00:00

Updated median with only numeric values

This commit is contained in:
cif 2022-03-07 12:44:14 +01:00
parent a43fb4c78c
commit d99eeb733a

View File

@ -404,7 +404,7 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"#Mean Age and SibSp of passengers grouped by passenger class and sex\n", "#Mean Age and SibSp of passengers grouped by passenger class and sex\n",
"df.groupby(['Pclass', 'Sex'])['Age','SibSp'].mean()" "df.groupby(['Pclass', 'Sex'])[['Age','SibSp']].mean()"
] ]
}, },
{ {
@ -414,7 +414,7 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"#Show mean Age and SibSp for passengers older than 25 grouped by Passenger Class and Sex\n", "#Show mean Age and SibSp for passengers older than 25 grouped by Passenger Class and Sex\n",
"df[df.Age > 25].groupby(['Pclass', 'Sex'])['Age','SibSp'].mean()" "df[df.Age > 25].groupby(['Pclass', 'Sex'])[['Age','SibSp']].mean()"
] ]
}, },
{ {
@ -424,7 +424,7 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"# Mean age, SibSp , Survived of passengers older than 25 which survived, grouped by Passenger Class and Sex \n", "# Mean age, SibSp , Survived of passengers older than 25 which survived, grouped by Passenger Class and Sex \n",
"df[(df.Age > 25 & (df.Survived == 1))].groupby(['Pclass', 'Sex'])['Age','SibSp','Survived'].mean()" "df[(df.Age > 25 & (df.Survived == 1))].groupby(['Pclass', 'Sex'])[['Age','SibSp','Survived']].mean()"
] ]
}, },
{ {
@ -436,7 +436,7 @@
"# We can also decide which function apply in each column\n", "# We can also decide which function apply in each column\n",
"\n", "\n",
"#Show mean Age, mean SibSp, and number of passengers older than 25 that survived, grouped by Passenger Class and Sex\n", "#Show mean Age, mean SibSp, and number of passengers older than 25 that survived, grouped by Passenger Class and Sex\n",
"df[(df.Age > 25 & (df.Survived == 1))].groupby(['Pclass', 'Sex'])['Age','SibSp','Survived'].agg({'Age': np.mean, \n", "df[(df.Age > 25 & (df.Survived == 1))].groupby(['Pclass', 'Sex'])[['Age','SibSp','Survived']].agg({'Age': np.mean, \n",
" 'SibSp': np.mean, 'Survived': np.sum})" " 'SibSp': np.mean, 'Survived': np.sum})"
] ]
}, },
@ -600,8 +600,8 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"# Fill missing values with the median\n", "# Fill missing values with the median, we avoid empty (None) values with numeric_only\n",
"df_filled = df.fillna(df.median())\n", "df_filled = df.fillna(df.median(numeric_only=True))\n",
"df_filled[-5:]" "df_filled[-5:]"
] ]
}, },
@ -685,7 +685,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"# But we are working on a copy \n", "# But we are working on a copy, so we get a warning\n",
"df.iloc[889]['Sex'] = np.nan" "df.iloc[889]['Sex'] = np.nan"
] ]
}, },
@ -695,7 +695,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"# If we want to change, we should not chain selections\n", "# If we want to change it, we should not chain selections\n",
"# The selection can be done with the column name\n", "# The selection can be done with the column name\n",
"df.loc[889, 'Sex']" "df.loc[889, 'Sex']"
] ]
@ -932,11 +932,11 @@
"metadata": {}, "metadata": {},
"source": [ "source": [
"* [Pandas](http://pandas.pydata.org/)\n", "* [Pandas](http://pandas.pydata.org/)\n",
"* [Learning Pandas, Michael Heydt, Packt Publishing, 2015](http://proquest.safaribooksonline.com/book/programming/python/9781783985128)\n", "* [Learning Pandas, Michael Heydt, Packt Publishing, 2017](https://learning.oreilly.com/library/view/learning-pandas/9781787123137/)\n",
"* [Useful Pandas Snippets](https://gist.github.com/bsweger/e5817488d161f37dcbd2)\n", "* [Pandas. Introduction to Data Structures](https://pandas.pydata.org/pandas-docs/stable/user_guide/dsintro.html)\n",
"* [Pandas. Introduction to Data Structures](http://pandas.pydata.org/pandas-docs/stable/dsintro.html#dsintro)\n",
"* [Introducing Pandas Objects](https://www.oreilly.com/learning/introducing-pandas-objects)\n", "* [Introducing Pandas Objects](https://www.oreilly.com/learning/introducing-pandas-objects)\n",
"* [Boolean Operators in Pandas](http://pandas.pydata.org/pandas-docs/stable/indexing.html#boolean-operators)" "* [Boolean Operators in Pandas](https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#boolean-operators)\n",
"* [Useful Pandas Snippets](https://gist.github.com/bsweger/e5817488d161f37dcbd2)"
] ]
}, },
{ {
@ -958,7 +958,7 @@
], ],
"metadata": { "metadata": {
"kernelspec": { "kernelspec": {
"display_name": "Python 3", "display_name": "Python 3 (ipykernel)",
"language": "python", "language": "python",
"name": "python3" "name": "python3"
}, },
@ -972,7 +972,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.7.1" "version": "3.8.12"
}, },
"latex_envs": { "latex_envs": {
"LaTeX_envs_menu_present": true, "LaTeX_envs_menu_present": true,