gists/repos/004008aea84ab19b153b4cecd40e1461/Demo.ipynb

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This notebook provides a tutorial on how to use the library."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "%load_ext autoreload"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "%autoreload 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import logging\n",
    "\n",
    "logging.basicConfig(level=logging.DEBUG)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Datasets"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Datasets management is made simple. You can view the available datasets:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "- sentiment140:\n",
      "    \t Downloaded: True\n",
      "    \t # instances: 1600000\n",
      "\n",
      "\n"
     ]
    }
   ],
   "source": [
    "from gsitk.datasets.datasets import DatasetManager\n",
    "\n",
    "dm = DatasetManager()\n",
    "dm.view_datasets()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Preparing the data:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "DEBUG:gsitk.datasets.datasets:Preparing data: sentiment140\n",
      "DEBUG:gsitk.datasets.utils:Checking data path: /data/sentiment140\n",
      "DEBUG:gsitk.datasets.utils:Verified: trainingandtestdata.zip\n",
      "DEBUG:gsitk.datasets.datasets:sentiment140 data is ready\n"
     ]
    }
   ],
   "source": [
    "data = dm.prepare_datasets()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dict_keys(['sentiment140'])"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.keys()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Data is a simple pandas DataFrame."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>polarity</th>\n",
       "      <th>text</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>-1</td>\n",
       "      <td>['user', 'url', 'aw', 'elong', ',', 'thats', '...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>-1</td>\n",
       "      <td>['is', 'upset', 'that', 'he', 'cant', 'update'...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>-1</td>\n",
       "      <td>['user', 'i', 'dived', 'many', 'times', 'for',...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>-1</td>\n",
       "      <td>['my', 'whole', 'body', 'feels', 'itchy', 'and...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>-1</td>\n",
       "      <td>['user', 'no', ',', 'its', 'not', 'behaving', ...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   polarity                                               text\n",
       "0        -1  ['user', 'url', 'aw', 'elong', ',', 'thats', '...\n",
       "1        -1  ['is', 'upset', 'that', 'he', 'cant', 'update'...\n",
       "2        -1  ['user', 'i', 'dived', 'many', 'times', 'for',...\n",
       "3        -1  ['my', 'whole', 'body', 'feels', 'itchy', 'and...\n",
       "4        -1  ['user', 'no', ',', 'its', 'not', 'behaving', ..."
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data['sentiment140'].head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Preprocessing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Features"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "For using a word2vec model as feature extractor, write:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:gensim.utils:loading Word2Vec object from /data/w2vmodel_500d_5mc\n",
      "INFO:gensim.utils:loading syn0 from /data/w2vmodel_500d_5mc.syn0.npy with mmap=None\n",
      "INFO:gensim.utils:loading syn1 from /data/w2vmodel_500d_5mc.syn1.npy with mmap=None\n",
      "INFO:gensim.utils:setting ignored attribute syn0norm to None\n",
      "INFO:gensim.utils:setting ignored attribute cum_table to None\n",
      "INFO:gensim.utils:loaded /data/w2vmodel_500d_5mc\n"
     ]
    }
   ],
   "source": [
    "from gsitk.features.word2vec import Word2VecFeatures\n",
    "\n",
    "w2v_feat = Word2VecFeatures(w2v_model_path='/data/w2vmodel_500d_5mc')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Extracting features is made by the method `transform`. All feature extractors implement `transform`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(1600000, 500)"
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "transformed = w2v_feat.transform(data['sentiment140']['text'].values)\n",
    "transformed.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "If extracting the features is time consuming, you can save the features locally:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from gsitk.features import features\n",
    "\n",
    "features.save_features(transformed, 'w2v__sentiment40')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "And you can load them later:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "DEBUG:gsitk.features.utils:Reading features from w2v__sentiment\n",
      "DEBUG:gsitk.features.utils:Features are in /data/features/w2v__sentiment40.npy\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "array([[-0.03798573,  0.03630935,  0.08243822, ..., -0.0287797 ,\n",
       "         0.00937027,  0.21814214],\n",
       "       [-0.06142361, -0.03791333,  0.18094143, ...,  0.00306141,\n",
       "         0.08196757,  0.02467711],\n",
       "       [-0.03798573,  0.03630935,  0.08243822, ..., -0.0287797 ,\n",
       "         0.00937027,  0.21814214],\n",
       "       ..., \n",
       "       [-0.03798573,  0.03630935,  0.08243822, ..., -0.0287797 ,\n",
       "         0.00937027,  0.21814214],\n",
       "       [-0.03798573,  0.03630935,  0.08243822, ..., -0.0287797 ,\n",
       "         0.00937027,  0.21814214],\n",
       "       [-0.03798573,  0.03630935,  0.08243822, ..., -0.0287797 ,\n",
       "         0.00937027,  0.21814214]])"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "utils.load_features('w2v__sentiment')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Pipes and Evaluation"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The evaluation process uses pipes. Pipe are a way of organizing the different elements of the evaluation. Pipes are represented by EvalTuples, that are a way of specifiying which datasets, features and classifiers we want to evaluate."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "If we want to include a classifier in our evaluation:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "SGDClassifier(alpha=0.0001, average=False, class_weight=None, epsilon=0.1,\n",
       "       eta0=0.0, fit_intercept=True, l1_ratio=0.15,\n",
       "       learning_rate='optimal', loss='hinge', n_iter=5, n_jobs=-1,\n",
       "       penalty='l2', power_t=0.5, random_state=None, shuffle=True,\n",
       "       verbose=0, warm_start=False)"
      ]
     },
     "execution_count": 49,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from gsitk.pipe import Model, Features, EvalTuple\n",
    "from sklearn.linear_model import SGDClassifier\n",
    "\n",
    "sgd = SGDClassifier(n_jobs=-1)\n",
    "sgd.fit(transformed, data['sentiment140']['polarity'].values)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "models = [Model(name='sgd', classifier=sgd)]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Including features:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "feats = [Features(name='w2v__sentiment140', dataset='sentiment140', values=transformed)]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Putting them together:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "ets = [EvalTuple(classifier='sgd', features='w2v__sentiment140', labels='sentiment140')]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Running the evaluation:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "from gsitk.evaluation.evaluation import Evaluation\n",
    "\n",
    "ev = Evaluation(datasets=data, features=feats, models=models, tuples=ets)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "DEBUG:gsitk.evaluation.evaluation:Model sgd predicting from features w2v__sentiment140\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Dataset</th>\n",
       "      <th>Features</th>\n",
       "      <th>Model</th>\n",
       "      <th>Accuracy</th>\n",
       "      <th>Precision</th>\n",
       "      <th>Recall</th>\n",
       "      <th>F1-Score</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>sentiment140</td>\n",
       "      <td>w2v__sentiment140</td>\n",
       "      <td>sgd</td>\n",
       "      <td>0.589035</td>\n",
       "      <td>0.577554</td>\n",
       "      <td>0.663056</td>\n",
       "      <td>0.617359</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        Dataset           Features Model  Accuracy Precision    Recall  \\\n",
       "0  sentiment140  w2v__sentiment140   sgd  0.589035  0.577554  0.663056   \n",
       "\n",
       "   F1-Score  \n",
       "0  0.617359  "
      ]
     },
     "execution_count": 58,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ev.evaluate()\n",
    "ev.results"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
2017-05-05 09:50:56 +00:00			`{`
			`"cells": [`
			`{`
			`"cell_type": "markdown",`
			`"metadata": {},`
			`"source": [`
			`"This notebook provides a tutorial on how to use the library."`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 1,`
			`"metadata": {`
			`"collapsed": true`
			`},`
			`"outputs": [],`
			`"source": [`
			`"%load_ext autoreload"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 2,`
			`"metadata": {`
			`"collapsed": true`
			`},`
			`"outputs": [],`
			`"source": [`
			`"%autoreload 2"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 3,`
			`"metadata": {`
			`"collapsed": true`
			`},`
			`"outputs": [],`
			`"source": [`
			`"import logging\n",`
			`"\n",`
			`"logging.basicConfig(level=logging.DEBUG)"`
			`]`
			`},`
			`{`
			`"cell_type": "markdown",`
			`"metadata": {},`
			`"source": [`
			`"# Datasets"`
			`]`
			`},`
			`{`
			`"cell_type": "markdown",`
			`"metadata": {},`
			`"source": [`
			`"Datasets management is made simple. You can view the available datasets:"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 14,`
			`"metadata": {`
			`"collapsed": false`
			`},`
			`"outputs": [`
			`{`
			`"name": "stdout",`
			`"output_type": "stream",`
			`"text": [`
			`"- sentiment140:\n",`
			`" \t Downloaded: True\n",`
			`" \t # instances: 1600000\n",`
			`"\n",`
			`"\n"`
			`]`
			`}`
			`],`
			`"source": [`
			`"from gsitk.datasets.datasets import DatasetManager\n",`
			`"\n",`
			`"dm = DatasetManager()\n",`
			`"dm.view_datasets()"`
			`]`
			`},`
			`{`
			`"cell_type": "markdown",`
			`"metadata": {},`
			`"source": [`
			`"Preparing the data:"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 15,`
			`"metadata": {`
			`"collapsed": false`
			`},`
			`"outputs": [`
			`{`
			`"name": "stderr",`
			`"output_type": "stream",`
			`"text": [`
			`"DEBUG:gsitk.datasets.datasets:Preparing data: sentiment140\n",`
			`"DEBUG:gsitk.datasets.utils:Checking data path: /data/sentiment140\n",`
			`"DEBUG:gsitk.datasets.utils:Verified: trainingandtestdata.zip\n",`
			`"DEBUG:gsitk.datasets.datasets:sentiment140 data is ready\n"`
			`]`
			`}`
			`],`
			`"source": [`
			`"data = dm.prepare_datasets()"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 16,`
			`"metadata": {`
			`"collapsed": false`
			`},`
			`"outputs": [`
			`{`
			`"data": {`
			`"text/plain": [`
			`"dict_keys(['sentiment140'])"`
			`]`
			`},`
			`"execution_count": 16,`
			`"metadata": {},`
			`"output_type": "execute_result"`
			`}`
			`],`
			`"source": [`
			`"data.keys()"`
			`]`
			`},`
			`{`
			`"cell_type": "markdown",`
			`"metadata": {},`
			`"source": [`
			`"Data is a simple pandas DataFrame."`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 17,`
			`"metadata": {`
			`"collapsed": false`
			`},`
			`"outputs": [`
			`{`
			`"data": {`
			`"text/html": [`
			`"<div>\n",`
			`"<table border=\"1\" class=\"dataframe\">\n",`
			`" <thead>\n",`
			`" <tr style=\"text-align: right;\">\n",`
			`" <th></th>\n",`
			`" <th>polarity</th>\n",`
			`" <th>text</th>\n",`
			`" </tr>\n",`
			`" </thead>\n",`
			`" <tbody>\n",`
			`" <tr>\n",`
			`" <th>0</th>\n",`
			`" <td>-1</td>\n",`
			`" <td>['user', 'url', 'aw', 'elong', ',', 'thats', '...</td>\n",`
			`" </tr>\n",`
			`" <tr>\n",`
			`" <th>1</th>\n",`
			`" <td>-1</td>\n",`
			`" <td>['is', 'upset', 'that', 'he', 'cant', 'update'...</td>\n",`
			`" </tr>\n",`
			`" <tr>\n",`
			`" <th>2</th>\n",`
			`" <td>-1</td>\n",`
			`" <td>['user', 'i', 'dived', 'many', 'times', 'for',...</td>\n",`
			`" </tr>\n",`
			`" <tr>\n",`
			`" <th>3</th>\n",`
			`" <td>-1</td>\n",`
			`" <td>['my', 'whole', 'body', 'feels', 'itchy', 'and...</td>\n",`
			`" </tr>\n",`
			`" <tr>\n",`
			`" <th>4</th>\n",`
			`" <td>-1</td>\n",`
			`" <td>['user', 'no', ',', 'its', 'not', 'behaving', ...</td>\n",`
			`" </tr>\n",`
			`" </tbody>\n",`
			`"</table>\n",`
			`"</div>"`
			`],`
			`"text/plain": [`
			`" polarity text\n",`
			`"0 -1 ['user', 'url', 'aw', 'elong', ',', 'thats', '...\n",`
			`"1 -1 ['is', 'upset', 'that', 'he', 'cant', 'update'...\n",`
			`"2 -1 ['user', 'i', 'dived', 'many', 'times', 'for',...\n",`
			`"3 -1 ['my', 'whole', 'body', 'feels', 'itchy', 'and...\n",`
			`"4 -1 ['user', 'no', ',', 'its', 'not', 'behaving', ..."`
			`]`
			`},`
			`"execution_count": 17,`
			`"metadata": {},`
			`"output_type": "execute_result"`
			`}`
			`],`
			`"source": [`
			`"data['sentiment140'].head()"`
			`]`
			`},`
			`{`
			`"cell_type": "markdown",`
			`"metadata": {},`
			`"source": [`
			`"# Preprocessing"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": null,`
			`"metadata": {`
			`"collapsed": true`
			`},`
			`"outputs": [],`
			`"source": []`
			`},`
			`{`
			`"cell_type": "markdown",`
			`"metadata": {},`
			`"source": [`
			`"# Features"`
			`]`
			`},`
			`{`
			`"cell_type": "markdown",`
			`"metadata": {},`
			`"source": [`
			`"For using a word2vec model as feature extractor, write:"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 20,`
			`"metadata": {`
			`"collapsed": false`
			`},`
			`"outputs": [`
			`{`
			`"name": "stderr",`
			`"output_type": "stream",`
			`"text": [`
			`"INFO:gensim.utils:loading Word2Vec object from /data/w2vmodel_500d_5mc\n",`
			`"INFO:gensim.utils:loading syn0 from /data/w2vmodel_500d_5mc.syn0.npy with mmap=None\n",`
			`"INFO:gensim.utils:loading syn1 from /data/w2vmodel_500d_5mc.syn1.npy with mmap=None\n",`
			`"INFO:gensim.utils:setting ignored attribute syn0norm to None\n",`
			`"INFO:gensim.utils:setting ignored attribute cum_table to None\n",`
			`"INFO:gensim.utils:loaded /data/w2vmodel_500d_5mc\n"`
			`]`
			`}`
			`],`
			`"source": [`
			`"from gsitk.features.word2vec import Word2VecFeatures\n",`
			`"\n",`
			`"w2v_feat = Word2VecFeatures(w2v_model_path='/data/w2vmodel_500d_5mc')"`
			`]`
			`},`
			`{`
			`"cell_type": "markdown",`
			`"metadata": {},`
			`"source": [`
			"Extracting features is made by the method `transform`. All feature extractors implement `transform`."
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 48,`
			`"metadata": {`
			`"collapsed": false`
			`},`
			`"outputs": [`
			`{`
			`"data": {`
			`"text/plain": [`
			`"(1600000, 500)"`
			`]`
			`},`
			`"execution_count": 48,`
			`"metadata": {},`
			`"output_type": "execute_result"`
			`}`
			`],`
			`"source": [`
			`"transformed = w2v_feat.transform(data['sentiment140']['text'].values)\n",`
			`"transformed.shape"`
			`]`
			`},`
			`{`
			`"cell_type": "markdown",`
			`"metadata": {},`
			`"source": [`
			`"If extracting the features is time consuming, you can save the features locally:"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 59,`
			`"metadata": {`
			`"collapsed": true`
			`},`
			`"outputs": [],`
			`"source": [`
			`"from gsitk.features import features\n",`
			`"\n",`
			`"features.save_features(transformed, 'w2v__sentiment40')"`
			`]`
			`},`
			`{`
			`"cell_type": "markdown",`
			`"metadata": {},`
			`"source": [`
			`"And you can load them later:"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 29,`
			`"metadata": {`
			`"collapsed": false`
			`},`
			`"outputs": [`
			`{`
			`"name": "stderr",`
			`"output_type": "stream",`
			`"text": [`
			`"DEBUG:gsitk.features.utils:Reading features from w2v__sentiment\n",`
			`"DEBUG:gsitk.features.utils:Features are in /data/features/w2v__sentiment40.npy\n"`
			`]`
			`},`
			`{`
			`"data": {`
			`"text/plain": [`
			`"array([[-0.03798573, 0.03630935, 0.08243822, ..., -0.0287797 ,\n",`
			`" 0.00937027, 0.21814214],\n",`
			`" [-0.06142361, -0.03791333, 0.18094143, ..., 0.00306141,\n",`
			`" 0.08196757, 0.02467711],\n",`
			`" [-0.03798573, 0.03630935, 0.08243822, ..., -0.0287797 ,\n",`
			`" 0.00937027, 0.21814214],\n",`
			`" ..., \n",`
			`" [-0.03798573, 0.03630935, 0.08243822, ..., -0.0287797 ,\n",`
			`" 0.00937027, 0.21814214],\n",`
			`" [-0.03798573, 0.03630935, 0.08243822, ..., -0.0287797 ,\n",`
			`" 0.00937027, 0.21814214],\n",`
			`" [-0.03798573, 0.03630935, 0.08243822, ..., -0.0287797 ,\n",`
			`" 0.00937027, 0.21814214]])"`
			`]`
			`},`
			`"execution_count": 29,`
			`"metadata": {},`
			`"output_type": "execute_result"`
			`}`
			`],`
			`"source": [`
			`"utils.load_features('w2v__sentiment')"`
			`]`
			`},`
			`{`
			`"cell_type": "markdown",`
			`"metadata": {},`
			`"source": [`
			`"# Pipes and Evaluation"`
			`]`
			`},`
			`{`
			`"cell_type": "markdown",`
			`"metadata": {},`
			`"source": [`
			`"The evaluation process uses pipes. Pipe are a way of organizing the different elements of the evaluation. Pipes are represented by EvalTuples, that are a way of specifiying which datasets, features and classifiers we want to evaluate."`
			`]`
			`},`
			`{`
			`"cell_type": "markdown",`
			`"metadata": {},`
			`"source": [`
			`"If we want to include a classifier in our evaluation:"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 49,`
			`"metadata": {`
			`"collapsed": false`
			`},`
			`"outputs": [`
			`{`
			`"data": {`
			`"text/plain": [`
			`"SGDClassifier(alpha=0.0001, average=False, class_weight=None, epsilon=0.1,\n",`
			`" eta0=0.0, fit_intercept=True, l1_ratio=0.15,\n",`
			`" learning_rate='optimal', loss='hinge', n_iter=5, n_jobs=-1,\n",`
			`" penalty='l2', power_t=0.5, random_state=None, shuffle=True,\n",`
			`" verbose=0, warm_start=False)"`
			`]`
			`},`
			`"execution_count": 49,`
			`"metadata": {},`
			`"output_type": "execute_result"`
			`}`
			`],`
			`"source": [`
			`"from gsitk.pipe import Model, Features, EvalTuple\n",`
			`"from sklearn.linear_model import SGDClassifier\n",`
			`"\n",`
			`"sgd = SGDClassifier(n_jobs=-1)\n",`
			`"sgd.fit(transformed, data['sentiment140']['polarity'].values)"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 50,`
			`"metadata": {`
			`"collapsed": true`
			`},`
			`"outputs": [],`
			`"source": [`
			`"models = [Model(name='sgd', classifier=sgd)]"`
			`]`
			`},`
			`{`
			`"cell_type": "markdown",`
			`"metadata": {},`
			`"source": [`
			`"Including features:"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 51,`
			`"metadata": {`
			`"collapsed": true`
			`},`
			`"outputs": [],`
			`"source": [`
			`"feats = [Features(name='w2v__sentiment140', dataset='sentiment140', values=transformed)]"`
			`]`
			`},`
			`{`
			`"cell_type": "markdown",`
			`"metadata": {},`
			`"source": [`
			`"Putting them together:"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 52,`
			`"metadata": {`
			`"collapsed": true`
			`},`
			`"outputs": [],`
			`"source": [`
			`"ets = [EvalTuple(classifier='sgd', features='w2v__sentiment140', labels='sentiment140')]"`
			`]`
			`},`
			`{`
			`"cell_type": "markdown",`
			`"metadata": {},`
			`"source": [`
			`"Running the evaluation:"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 57,`
			`"metadata": {`
			`"collapsed": false`
			`},`
			`"outputs": [],`
			`"source": [`
			`"from gsitk.evaluation.evaluation import Evaluation\n",`
			`"\n",`
			`"ev = Evaluation(datasets=data, features=feats, models=models, tuples=ets)"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 58,`
			`"metadata": {`
			`"collapsed": false`
			`},`
			`"outputs": [`
			`{`
			`"name": "stderr",`
			`"output_type": "stream",`
			`"text": [`
			`"DEBUG:gsitk.evaluation.evaluation:Model sgd predicting from features w2v__sentiment140\n"`
			`]`
			`},`
			`{`
			`"data": {`
			`"text/html": [`
			`"<div>\n",`
			`"<table border=\"1\" class=\"dataframe\">\n",`
			`" <thead>\n",`
			`" <tr style=\"text-align: right;\">\n",`
			`" <th></th>\n",`
			`" <th>Dataset</th>\n",`
			`" <th>Features</th>\n",`
			`" <th>Model</th>\n",`
			`" <th>Accuracy</th>\n",`
			`" <th>Precision</th>\n",`
			`" <th>Recall</th>\n",`
			`" <th>F1-Score</th>\n",`
			`" </tr>\n",`
			`" </thead>\n",`
			`" <tbody>\n",`
			`" <tr>\n",`
			`" <th>0</th>\n",`
			`" <td>sentiment140</td>\n",`
			`" <td>w2v__sentiment140</td>\n",`
			`" <td>sgd</td>\n",`
			`" <td>0.589035</td>\n",`
			`" <td>0.577554</td>\n",`
			`" <td>0.663056</td>\n",`
			`" <td>0.617359</td>\n",`
			`" </tr>\n",`
			`" </tbody>\n",`
			`"</table>\n",`
			`"</div>"`
			`],`
			`"text/plain": [`
			`" Dataset Features Model Accuracy Precision Recall \\\n",`
			`"0 sentiment140 w2v__sentiment140 sgd 0.589035 0.577554 0.663056 \n",`
			`"\n",`
			`" F1-Score \n",`
			`"0 0.617359 "`
			`]`
			`},`
			`"execution_count": 58,`
			`"metadata": {},`
			`"output_type": "execute_result"`
			`}`
			`],`
			`"source": [`
			`"ev.evaluate()\n",`
			`"ev.results"`
			`]`
			`}`
			`],`
			`"metadata": {`
			`"kernelspec": {`
			`"display_name": "Python 3",`
			`"language": "python",`
			`"name": "python3"`
			`},`
			`"language_info": {`
			`"codemirror_mode": {`
			`"name": "ipython",`
			`"version": 3`
			`},`
			`"file_extension": ".py",`
			`"mimetype": "text/x-python",`
			`"name": "python",`
			`"nbconvert_exporter": "python",`
			`"pygments_lexer": "ipython3",`
			`"version": "3.5.2"`
			`}`
			`},`
			`"nbformat": 4,`
			`"nbformat_minor": 2`
			`}`