mirror of
https://github.com/balkian/bitter.git
synced 2024-12-22 16:38:13 +00:00
24 lines
805 B
Python
24 lines
805 B
Python
|
import pandas as pd
|
||
|
|
||
|
def read_rts(rtsfile, tweetsfile):
|
||
|
tweets = pd.read_csv(tweetsfile, index_col=0)
|
||
|
rts = pd.read_csv(rtsfile, index_col=1)
|
||
|
merged = rts.groupby(by=['id', 'rt_user_id']).size().rename('count').reset_index(level=1).merge(tweets, left_index=True, right_index=True)
|
||
|
return merged.sort_values(by='count', ascending=False)
|
||
|
|
||
|
|
||
|
def read_tweets(tweetsfile):
|
||
|
'''When the dataset is small enough, we can load tweets as-in'''
|
||
|
with open(tweetsfile) as f:
|
||
|
header = f.readline().strip().split(',')
|
||
|
dtypes = {}
|
||
|
for key in header:
|
||
|
if key.endswith('_str') or key.endswith('.id'):
|
||
|
dtypes[key] = object
|
||
|
tweets = pd.read_csv(tweetsfile, dtype=dtypes, index_col=0)
|
||
|
return tweets
|
||
|
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
import argparse
|