commit 46d6dd131ce46398107a2990abc96dd5cca73a65 Author: J. Fernando Sánchez Date: Thu Oct 11 16:12:07 2018 +0200 diff --git a/descarga.ipynb b/descarga.ipynb new file mode 100644 index 0000000..a883714 --- /dev/null +++ b/descarga.ipynb @@ -0,0 +1,308 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "!bitter tweet get_all lista-tweets.csv" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import os" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv('untitled.txt', names=['id', 'label'])" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idlabel
0631659865748860929ironic
1602202801092038656ironic
\n", + "
" + ], + "text/plain": [ + " id label\n", + "0 631659865748860929 ironic\n", + "1 602202801092038656 ironic" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "get_text(631659865748860929)" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "import os\n", + "\n", + "def get_text(tweetid):\n", + " tweetfile = 'tweets/{}.json'.format(tweetid)\n", + " if not os.path.exists(tweetfile):\n", + " return \"\"\n", + " return json.load(open(tweetfile))['text']" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Uy que emoción! #sarcasmo #noquiero #yatienesuno :( @Cris_baoc https://t.co/Roe4H1D4d6'" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "get_text(631659865748860929)" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [], + "source": [ + "df['text'] = df['id'].apply(get_text)" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idlabeltext
0631659865748860929ironicUy que emoción! #sarcasmo #noquiero #yatienesu...
1602202801092038656ironic
\n", + "
" + ], + "text/plain": [ + " id label \\\n", + "0 631659865748860929 ironic \n", + "1 602202801092038656 ironic \n", + "\n", + " text \n", + "0 Uy que emoción! #sarcasmo #noquiero #yatienesu... \n", + "1 " + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idlabeltext
0631659865748860929ironicUy que emoción! #sarcasmo #noquiero #yatienesu...
\n", + "
" + ], + "text/plain": [ + " id label \\\n", + "0 631659865748860929 ironic \n", + "\n", + " text \n", + "0 Uy que emoción! #sarcasmo #noquiero #yatienesu... " + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[df['text'] != \"\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}