1
0
mirror of https://github.com/balkian/TwitterDigger.git synced 2024-11-22 00:02:28 +00:00

Clumsy threading

This commit is contained in:
J.Fernando Sánchez 2013-03-15 16:11:46 +01:00
parent 36cd334b2e
commit 30d46c4edc

146
myego.py
View File

@ -6,9 +6,89 @@ import math
import signal import signal
import sys import sys
import argparse import argparse
import threading
from twitter import Twitter, OAuth, TwitterHTTPError from twitter import Twitter, OAuth, TwitterHTTPError
from httplib import IncompleteRead from httplib import IncompleteRead
class Fetcher(threading.Thread):
def __init__(self, fname, credentials):
self.t = Twitter(auth=OAuth(credentials.ACCESS_TOKEN,
credentials.ACCESS_TOKEN_SECRET,
credentials.CONSUMER_KEY,
credentials.CONSUMER_SECRET))
self.fname=fname
threading.Thread.__init__(self)
def run(self):
while True:
timetowait = 0
try:
lock.acquire()
print "Iteration! for Fetcher %s" % self.fname
nextuser=None
minimumscore=1000
unknowns = [person for person in pending if person not in userobject]
for i in xrange(0,int(math.ceil(len(unknowns)/100.0))):
piece = unknowns[i*100:(i+1)*100]
self.getinfo(piece)
for user in pending.copy():
userscore = userobject[user]['followers_count']**distance[user]
if(userscore < minimumscore):
minimumscore = userscore
nextuser=user
self.explore_user(nextuser)
pending.remove(nextuser)
sh.sync()
timetowait = 5
except TwitterHTTPError as ex:
print "Exception %s - %s" % (ex,type(ex))
if ex.e.code/10 == 42:
print "Sleeping for 1 minute"
timetowait = 60
except IncompleteRead:
print "IncompleteRead!"
timetowait = 5
except Exception as ex:
print "Exception"
raise ex
finally:
lock.release()
if timetowait:
time.sleep(timetowait)
def getinfo(self,piece):
piecestr = ",".join(map(str,piece)) if type(piece) != str and len(piece)>1 else piece[0]
look=self.t.users.lookup(user_id=piecestr)
retrievedids=[]
for ob in look:
uid=ob['id']
userobject[uid]=ob
retrievedids.append(uid)
print "Got info for %s" % repr(piece)
if type(piece) != int:
print "Total: %s" % len(piece)
print "Difference: %s" % [person for person in piece if person not in retrievedids]
else:
print "Total: 1"
def explore_user(self,uid):
t = self.t
print "Exploring uid %s" % uid
newdist = distance[uid]+ 1
try:
follos = t.followers.ids(user_id=uid)['ids']
except TwitterHTTPError as ex:
print "Error code %s" % ex.e.code
if ex.e.code == 401: # Private Twitter
return
raise ex
followers[uid]=follos
for follo in follos:
if follo not in followers.keys():
pending.add(follo)
distance[follo] = newdist
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Get an ego network for a given ID.') parser = argparse.ArgumentParser(description='Get an ego network for a given ID.')
parser.add_argument('NAME', nargs='?', metavar='id', parser.add_argument('NAME', nargs='?', metavar='id',
@ -33,6 +113,8 @@ distance = sh['distance']
pending = set() pending = set()
userobject = sh['userobject'] userobject = sh['userobject']
lock = threading.RLock()
t = Twitter(auth=OAuth(credentials.ACCESS_TOKEN, t = Twitter(auth=OAuth(credentials.ACCESS_TOKEN,
credentials.ACCESS_TOKEN_SECRET, credentials.ACCESS_TOKEN_SECRET,
credentials.CONSUMER_KEY, credentials.CONSUMER_KEY,
@ -59,71 +141,11 @@ if args.new or len(pending)<1:
print 'Pending is now: %s' %pending print 'Pending is now: %s' %pending
def getinfo(piece):
piecestr = ",".join(map(str,piece)) if type(piece) != str and len(piece)>1 else piece[0]
look=t.users.lookup(user_id=piecestr)
retrievedids=[]
for ob in look:
uid=ob['id']
userobject[uid]=ob
retrievedids.append(uid)
print "Got info for %s" % repr(piece)
if type(piece) != int:
print "Total: %s" % len(piece)
print "Difference: %s" % [person for person in piece if person not in retrievedids]
else:
print "Total: 1"
def explore_user(t,uid):
print "Exploring uid %s" % uid
newdist = distance[uid]+ 1
try:
follos = t.followers.ids(user_id=uid)['ids']
except TwitterHTTPError as ex:
print "Error code %s" % ex.e.code
if ex.e.code == 401: # Private Twitter
return
raise ex
followers[uid]=follos
for follo in follos:
if follo not in followers.keys():
pending.add(follo)
distance[follo] = newdist
def signal_handler(signal, frame): def signal_handler(signal, frame):
print 'You pressed Ctrl+C!' print 'You pressed Ctrl+C!'
sh.close() sh.close()
sys.exit(0) sys.exit(0)
signal.signal(signal.SIGINT, signal_handler) signal.signal(signal.SIGINT, signal_handler)
f1 = Fetcher('f1',credentials)
while True: f1.run()
try:
print "Iteration!"
nextuser=None
minimumscore=1000
unknowns = [person for person in pending if person not in userobject]
for i in xrange(0,int(math.ceil(len(unknowns)/100.0))):
piece = unknowns[i*100:(i+1)*100]
getinfo(piece)
for user in pending.copy():
userscore = userobject[user]['followers_count']**distance[user]
if(userscore < minimumscore):
minimumscore = userscore
nextuser=user
explore_user(t,nextuser)
pending.remove(nextuser)
sh.sync()
time.sleep(5)
except TwitterHTTPError as ex:
print "Exception %s - %s" % (ex,type(ex))
if ex.e.code/10 == 42:
print "Sleeping for 1 minute"
time.sleep(60)
except IncompleteRead:
print "IncompleteRead!"
time.sleep(5)
except Exception as ex:
print "Exception"
sh.close()
raise ex