mirror of
https://github.com/gsi-upm/sitc
synced 2025-08-24 02:22:21 +00:00
Remove outputs and metadata
This commit is contained in:
@@ -123,183 +123,9 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>essay_id</th>\n",
|
||||
" <th>essay_set</th>\n",
|
||||
" <th>essay</th>\n",
|
||||
" <th>rater1_domain1</th>\n",
|
||||
" <th>rater2_domain1</th>\n",
|
||||
" <th>rater3_domain1</th>\n",
|
||||
" <th>domain1_score</th>\n",
|
||||
" <th>rater1_domain2</th>\n",
|
||||
" <th>rater2_domain2</th>\n",
|
||||
" <th>domain2_score</th>\n",
|
||||
" <th>...</th>\n",
|
||||
" <th>rater2_trait3</th>\n",
|
||||
" <th>rater2_trait4</th>\n",
|
||||
" <th>rater2_trait5</th>\n",
|
||||
" <th>rater2_trait6</th>\n",
|
||||
" <th>rater3_trait1</th>\n",
|
||||
" <th>rater3_trait2</th>\n",
|
||||
" <th>rater3_trait3</th>\n",
|
||||
" <th>rater3_trait4</th>\n",
|
||||
" <th>rater3_trait5</th>\n",
|
||||
" <th>rater3_trait6</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>Dear local newspaper, I think effects computer...</td>\n",
|
||||
" <td>4</td>\n",
|
||||
" <td>4</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>8</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>Dear @CAPS1 @CAPS2, I believe that using compu...</td>\n",
|
||||
" <td>5</td>\n",
|
||||
" <td>4</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>9</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>3</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>Dear, @CAPS1 @CAPS2 @CAPS3 More and more peopl...</td>\n",
|
||||
" <td>4</td>\n",
|
||||
" <td>3</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>7</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>4</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>Dear Local Newspaper, @CAPS1 I have found that...</td>\n",
|
||||
" <td>5</td>\n",
|
||||
" <td>5</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>10</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"<p>4 rows × 28 columns</p>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" essay_id essay_set essay \\\n",
|
||||
"0 1 1 Dear local newspaper, I think effects computer... \n",
|
||||
"1 2 1 Dear @CAPS1 @CAPS2, I believe that using compu... \n",
|
||||
"2 3 1 Dear, @CAPS1 @CAPS2 @CAPS3 More and more peopl... \n",
|
||||
"3 4 1 Dear Local Newspaper, @CAPS1 I have found that... \n",
|
||||
"\n",
|
||||
" rater1_domain1 rater2_domain1 rater3_domain1 domain1_score \\\n",
|
||||
"0 4 4 NaN 8 \n",
|
||||
"1 5 4 NaN 9 \n",
|
||||
"2 4 3 NaN 7 \n",
|
||||
"3 5 5 NaN 10 \n",
|
||||
"\n",
|
||||
" rater1_domain2 rater2_domain2 domain2_score ... \\\n",
|
||||
"0 NaN NaN NaN ... \n",
|
||||
"1 NaN NaN NaN ... \n",
|
||||
"2 NaN NaN NaN ... \n",
|
||||
"3 NaN NaN NaN ... \n",
|
||||
"\n",
|
||||
" rater2_trait3 rater2_trait4 rater2_trait5 rater2_trait6 rater3_trait1 \\\n",
|
||||
"0 NaN NaN NaN NaN NaN \n",
|
||||
"1 NaN NaN NaN NaN NaN \n",
|
||||
"2 NaN NaN NaN NaN NaN \n",
|
||||
"3 NaN NaN NaN NaN NaN \n",
|
||||
"\n",
|
||||
" rater3_trait2 rater3_trait3 rater3_trait4 rater3_trait5 rater3_trait6 \n",
|
||||
"0 NaN NaN NaN NaN NaN \n",
|
||||
"1 NaN NaN NaN NaN NaN \n",
|
||||
"2 NaN NaN NaN NaN NaN \n",
|
||||
"3 NaN NaN NaN NaN NaN \n",
|
||||
"\n",
|
||||
"[4 rows x 28 columns]"
|
||||
]
|
||||
},
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"\n",
|
||||
@@ -311,44 +137,18 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"(12976, 28)"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df_orig.shape"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"(1783, 3)"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# We filter the data of the essay_set number 1, and we keep only two columns for this \n",
|
||||
"# example\n",
|
||||
@@ -359,83 +159,17 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>essay_id</th>\n",
|
||||
" <th>essay</th>\n",
|
||||
" <th>domain1_score</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>Dear local newspaper, I think effects computer...</td>\n",
|
||||
" <td>8</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>Dear @CAPS1 @CAPS2, I believe that using compu...</td>\n",
|
||||
" <td>9</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>3</td>\n",
|
||||
" <td>Dear, @CAPS1 @CAPS2 @CAPS3 More and more peopl...</td>\n",
|
||||
" <td>7</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>4</td>\n",
|
||||
" <td>Dear Local Newspaper, @CAPS1 I have found that...</td>\n",
|
||||
" <td>10</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>5</td>\n",
|
||||
" <td>Dear @LOCATION1, I know having computers has a...</td>\n",
|
||||
" <td>8</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" essay_id essay domain1_score\n",
|
||||
"0 1 Dear local newspaper, I think effects computer... 8\n",
|
||||
"1 2 Dear @CAPS1 @CAPS2, I believe that using compu... 9\n",
|
||||
"2 3 Dear, @CAPS1 @CAPS2 @CAPS3 More and more peopl... 7\n",
|
||||
"3 4 Dear Local Newspaper, @CAPS1 I have found that... 10\n",
|
||||
"4 5 Dear @LOCATION1, I know having computers has a... 8"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df[0:5]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Define X and Y\n",
|
||||
@@ -468,10 +202,8 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Generic Transformer \n",
|
||||
@@ -509,10 +241,8 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Sample of statistics using nltk\n",
|
||||
@@ -541,10 +271,8 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.base import BaseEstimator, TransformerMixin\n",
|
||||
@@ -581,10 +309,8 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.base import BaseEstimator, TransformerMixin\n",
|
||||
@@ -635,10 +361,8 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.pipeline import Pipeline, FeatureUnion\n",
|
||||
@@ -674,23 +398,12 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 37,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Scores in every iteration [ 0.39798206 0.27497194]\n",
|
||||
"Accuracy: 0.34 (+/- 0.12)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.naive_bayes import MultinomialNB\n",
|
||||
"from sklearn.cross_validation import cross_val_score, KFold\n",
|
||||
"from sklearn.model_selection import cross_val_score, KFold\n",
|
||||
"from sklearn.metrics import classification_report\n",
|
||||
"from sklearn.feature_extraction import DictVectorizer\n",
|
||||
"from sklearn.preprocessing import FunctionTransformer\n",
|
||||
@@ -726,7 +439,7 @@
|
||||
"\n",
|
||||
"# Using KFold validation\n",
|
||||
"\n",
|
||||
"cv = KFold(X.shape[0], 2, shuffle=True, random_state=33)\n",
|
||||
"cv = KFold(2, shuffle=True, random_state=33)\n",
|
||||
"scores = cross_val_score(pipeline, X, y, cv=cv)\n",
|
||||
"print(\"Scores in every iteration\", scores)\n",
|
||||
"print(\"Accuracy: %0.2f (+/- %0.2f)\" % (scores.mean(), scores.std() * 2))"
|
||||
@@ -734,9 +447,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The result is not very good :(."
|
||||
]
|
||||
@@ -789,9 +500,9 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.5.1"
|
||||
"version": "3.6.7"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
"nbformat_minor": 1
|
||||
}
|
||||
|
Reference in New Issue
Block a user