import pandas as pd def read_rts(rtsfile, tweetsfile): tweets = pd.read_csv(tweetsfile, index_col=0) rts = pd.read_csv(rtsfile, index_col=1) merged = rts.groupby(by=['id', 'rt_user_id']).size().rename('count').reset_index(level=1).merge(tweets, left_index=True, right_index=True) return merged.sort_values(by='count', ascending=False) def read_tweets(tweetsfile): '''When the dataset is small enough, we can load tweets as-in''' with open(tweetsfile) as f: header = f.readline().strip().split(',') dtypes = {} for key in header: if key.endswith('_str') or key.endswith('.id'): dtypes[key] = object tweets = pd.read_csv(tweetsfile, dtype=dtypes, index_col=0) return tweets if __name__ == '__main__': import argparse