{ "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "!bitter tweet get_all lista-tweets.csv" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "import os" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv('untitled.txt', names=['id', 'label'])" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idlabel
0631659865748860929ironic
1602202801092038656ironic
\n", "
" ], "text/plain": [ " id label\n", "0 631659865748860929 ironic\n", "1 602202801092038656 ironic" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "get_text(631659865748860929)" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [], "source": [ "import json\n", "import os\n", "\n", "def get_text(tweetid):\n", " tweetfile = 'tweets/{}.json'.format(tweetid)\n", " if not os.path.exists(tweetfile):\n", " return \"\"\n", " return json.load(open(tweetfile))['text']" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'Uy que emoción! #sarcasmo #noquiero #yatienesuno :( @Cris_baoc https://t.co/Roe4H1D4d6'" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "get_text(631659865748860929)" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [], "source": [ "df['text'] = df['id'].apply(get_text)" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idlabeltext
0631659865748860929ironicUy que emoción! #sarcasmo #noquiero #yatienesu...
1602202801092038656ironic
\n", "
" ], "text/plain": [ " id label \\\n", "0 631659865748860929 ironic \n", "1 602202801092038656 ironic \n", "\n", " text \n", "0 Uy que emoción! #sarcasmo #noquiero #yatienesu... \n", "1 " ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idlabeltext
0631659865748860929ironicUy que emoción! #sarcasmo #noquiero #yatienesu...
\n", "
" ], "text/plain": [ " id label \\\n", "0 631659865748860929 ironic \n", "\n", " text \n", "0 Uy que emoción! #sarcasmo #noquiero #yatienesu... " ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[df['text'] != \"\"]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.5" } }, "nbformat": 4, "nbformat_minor": 2 }