1
0
mirror of https://github.com/balkian/TwitterDigger.git synced 2024-11-25 01:22:28 +00:00

Clumsy threading

This commit is contained in:
J.Fernando Sánchez 2013-03-15 16:11:46 +01:00
parent 36cd334b2e
commit 30d46c4edc

226
myego.py
View File

@ -6,124 +6,146 @@ import math
import signal import signal
import sys import sys
import argparse import argparse
import threading
from twitter import Twitter, OAuth, TwitterHTTPError from twitter import Twitter, OAuth, TwitterHTTPError
from httplib import IncompleteRead from httplib import IncompleteRead
parser = argparse.ArgumentParser(description='Get an ego network for a given ID.') class Fetcher(threading.Thread):
def __init__(self, fname, credentials):
self.t = Twitter(auth=OAuth(credentials.ACCESS_TOKEN,
credentials.ACCESS_TOKEN_SECRET,
credentials.CONSUMER_KEY,
credentials.CONSUMER_SECRET))
self.fname=fname
threading.Thread.__init__(self)
parser.add_argument('NAME', nargs='?', metavar='id', def run(self):
default='me', type=str, help='Name of the user to be used as a center') while True:
timetowait = 0
try:
lock.acquire()
print "Iteration! for Fetcher %s" % self.fname
nextuser=None
minimumscore=1000
unknowns = [person for person in pending if person not in userobject]
for i in xrange(0,int(math.ceil(len(unknowns)/100.0))):
piece = unknowns[i*100:(i+1)*100]
self.getinfo(piece)
for user in pending.copy():
userscore = userobject[user]['followers_count']**distance[user]
if(userscore < minimumscore):
minimumscore = userscore
nextuser=user
self.explore_user(nextuser)
pending.remove(nextuser)
sh.sync()
timetowait = 5
except TwitterHTTPError as ex:
print "Exception %s - %s" % (ex,type(ex))
if ex.e.code/10 == 42:
print "Sleeping for 1 minute"
timetowait = 60
except IncompleteRead:
print "IncompleteRead!"
timetowait = 5
except Exception as ex:
print "Exception"
raise ex
finally:
lock.release()
if timetowait:
time.sleep(timetowait)
parser.add_argument('--new','-n',action='store_true', help='Start a new search.') def getinfo(self,piece):
piecestr = ",".join(map(str,piece)) if type(piece) != str and len(piece)>1 else piece[0]
look=self.t.users.lookup(user_id=piecestr)
retrievedids=[]
for ob in look:
uid=ob['id']
userobject[uid]=ob
retrievedids.append(uid)
print "Got info for %s" % repr(piece)
if type(piece) != int:
print "Total: %s" % len(piece)
print "Difference: %s" % [person for person in piece if person not in retrievedids]
else:
print "Total: 1"
args = parser.parse_args() def explore_user(self,uid):
t = self.t
print "Exploring uid %s" % uid
newdist = distance[uid]+ 1
try:
follos = t.followers.ids(user_id=uid)['ids']
except TwitterHTTPError as ex:
print "Error code %s" % ex.e.code
if ex.e.code == 401: # Private Twitter
return
raise ex
followers[uid]=follos
for follo in follos:
if follo not in followers.keys():
pending.add(follo)
distance[follo] = newdist
print args.new if __name__ == "__main__":
print args.NAME parser = argparse.ArgumentParser(description='Get an ego network for a given ID.')
sh = shelve.open('twits_shelf-%s.db' % args.NAME,writeback=True) parser.add_argument('NAME', nargs='?', metavar='id',
keys=['followers','distance','userobject'] default='me', type=str, help='Name of the user to be used as a center')
for key in keys: parser.add_argument('--new','-n',action='store_true', help='Start a new search.')
if key not in sh or args.new:
sh[key]={}
followers = sh['followers'] args = parser.parse_args()
distance = sh['distance']
pending = set()
userobject = sh['userobject']
t = Twitter(auth=OAuth(credentials.ACCESS_TOKEN, print args.new
credentials.ACCESS_TOKEN_SECRET, print args.NAME
credentials.CONSUMER_KEY,
credentials.CONSUMER_SECRET))
if not args.new: sh = shelve.open('twits_shelf-%s.db' % args.NAME,writeback=True)
print 'Recovering state.' keys=['followers','distance','userobject']
for key in followers:
for follower in followers[key]:
if follower not in followers:
pending.add(follower)
if args.new or len(pending)<1: for key in keys:
if args.NAME != 'me': if key not in sh or args.new:
user = t.users.lookup(screen_name=args.NAME)[0] sh[key]={}
uid = user['id']
distance[uid] = 0
pending.add(uid)
else:
creds = t.account.verify_credentials()
myid = creds['id']
distance[myid]=0
pending.add(myid)
print 'Pending is now: %s' %pending followers = sh['followers']
distance = sh['distance']
pending = set()
userobject = sh['userobject']
def getinfo(piece): lock = threading.RLock()
piecestr = ",".join(map(str,piece)) if type(piece) != str and len(piece)>1 else piece[0]
look=t.users.lookup(user_id=piecestr)
retrievedids=[]
for ob in look:
uid=ob['id']
userobject[uid]=ob
retrievedids.append(uid)
print "Got info for %s" % repr(piece)
if type(piece) != int:
print "Total: %s" % len(piece)
print "Difference: %s" % [person for person in piece if person not in retrievedids]
else:
print "Total: 1"
def explore_user(t,uid): t = Twitter(auth=OAuth(credentials.ACCESS_TOKEN,
print "Exploring uid %s" % uid credentials.ACCESS_TOKEN_SECRET,
newdist = distance[uid]+ 1 credentials.CONSUMER_KEY,
try: credentials.CONSUMER_SECRET))
follos = t.followers.ids(user_id=uid)['ids']
except TwitterHTTPError as ex:
print "Error code %s" % ex.e.code
if ex.e.code == 401: # Private Twitter
return
raise ex
followers[uid]=follos
for follo in follos:
if follo not in followers.keys():
pending.add(follo)
distance[follo] = newdist
def signal_handler(signal, frame): if not args.new:
print 'You pressed Ctrl+C!' print 'Recovering state.'
sh.close() for key in followers:
sys.exit(0) for follower in followers[key]:
signal.signal(signal.SIGINT, signal_handler) if follower not in followers:
pending.add(follower)
while True: if args.new or len(pending)<1:
try: if args.NAME != 'me':
print "Iteration!" user = t.users.lookup(screen_name=args.NAME)[0]
nextuser=None uid = user['id']
minimumscore=1000 distance[uid] = 0
unknowns = [person for person in pending if person not in userobject] pending.add(uid)
for i in xrange(0,int(math.ceil(len(unknowns)/100.0))): else:
piece = unknowns[i*100:(i+1)*100] creds = t.account.verify_credentials()
getinfo(piece) myid = creds['id']
for user in pending.copy(): distance[myid]=0
userscore = userobject[user]['followers_count']**distance[user] pending.add(myid)
if(userscore < minimumscore):
minimumscore = userscore print 'Pending is now: %s' %pending
nextuser=user
explore_user(t,nextuser) def signal_handler(signal, frame):
pending.remove(nextuser) print 'You pressed Ctrl+C!'
sh.sync() sh.close()
time.sleep(5) sys.exit(0)
except TwitterHTTPError as ex: signal.signal(signal.SIGINT, signal_handler)
print "Exception %s - %s" % (ex,type(ex)) f1 = Fetcher('f1',credentials)
if ex.e.code/10 == 42: f1.run()
print "Sleeping for 1 minute"
time.sleep(60)
except IncompleteRead:
print "IncompleteRead!"
time.sleep(5)
except Exception as ex:
print "Exception"
sh.close()
raise ex