mirror of
https://github.com/balkian/TwitterDigger.git
synced 2024-11-22 00:02:28 +00:00
Clumsy threading
This commit is contained in:
parent
36cd334b2e
commit
30d46c4edc
226
myego.py
226
myego.py
@ -6,124 +6,146 @@ import math
|
|||||||
import signal
|
import signal
|
||||||
import sys
|
import sys
|
||||||
import argparse
|
import argparse
|
||||||
|
import threading
|
||||||
from twitter import Twitter, OAuth, TwitterHTTPError
|
from twitter import Twitter, OAuth, TwitterHTTPError
|
||||||
from httplib import IncompleteRead
|
from httplib import IncompleteRead
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(description='Get an ego network for a given ID.')
|
class Fetcher(threading.Thread):
|
||||||
|
def __init__(self, fname, credentials):
|
||||||
|
self.t = Twitter(auth=OAuth(credentials.ACCESS_TOKEN,
|
||||||
|
credentials.ACCESS_TOKEN_SECRET,
|
||||||
|
credentials.CONSUMER_KEY,
|
||||||
|
credentials.CONSUMER_SECRET))
|
||||||
|
self.fname=fname
|
||||||
|
threading.Thread.__init__(self)
|
||||||
|
|
||||||
parser.add_argument('NAME', nargs='?', metavar='id',
|
def run(self):
|
||||||
default='me', type=str, help='Name of the user to be used as a center')
|
while True:
|
||||||
|
timetowait = 0
|
||||||
|
try:
|
||||||
|
lock.acquire()
|
||||||
|
print "Iteration! for Fetcher %s" % self.fname
|
||||||
|
nextuser=None
|
||||||
|
minimumscore=1000
|
||||||
|
unknowns = [person for person in pending if person not in userobject]
|
||||||
|
for i in xrange(0,int(math.ceil(len(unknowns)/100.0))):
|
||||||
|
piece = unknowns[i*100:(i+1)*100]
|
||||||
|
self.getinfo(piece)
|
||||||
|
for user in pending.copy():
|
||||||
|
userscore = userobject[user]['followers_count']**distance[user]
|
||||||
|
if(userscore < minimumscore):
|
||||||
|
minimumscore = userscore
|
||||||
|
nextuser=user
|
||||||
|
self.explore_user(nextuser)
|
||||||
|
pending.remove(nextuser)
|
||||||
|
sh.sync()
|
||||||
|
timetowait = 5
|
||||||
|
except TwitterHTTPError as ex:
|
||||||
|
print "Exception %s - %s" % (ex,type(ex))
|
||||||
|
if ex.e.code/10 == 42:
|
||||||
|
print "Sleeping for 1 minute"
|
||||||
|
timetowait = 60
|
||||||
|
except IncompleteRead:
|
||||||
|
print "IncompleteRead!"
|
||||||
|
timetowait = 5
|
||||||
|
except Exception as ex:
|
||||||
|
print "Exception"
|
||||||
|
raise ex
|
||||||
|
finally:
|
||||||
|
lock.release()
|
||||||
|
if timetowait:
|
||||||
|
time.sleep(timetowait)
|
||||||
|
|
||||||
parser.add_argument('--new','-n',action='store_true', help='Start a new search.')
|
def getinfo(self,piece):
|
||||||
|
piecestr = ",".join(map(str,piece)) if type(piece) != str and len(piece)>1 else piece[0]
|
||||||
|
look=self.t.users.lookup(user_id=piecestr)
|
||||||
|
retrievedids=[]
|
||||||
|
for ob in look:
|
||||||
|
uid=ob['id']
|
||||||
|
userobject[uid]=ob
|
||||||
|
retrievedids.append(uid)
|
||||||
|
print "Got info for %s" % repr(piece)
|
||||||
|
if type(piece) != int:
|
||||||
|
print "Total: %s" % len(piece)
|
||||||
|
print "Difference: %s" % [person for person in piece if person not in retrievedids]
|
||||||
|
else:
|
||||||
|
print "Total: 1"
|
||||||
|
|
||||||
args = parser.parse_args()
|
def explore_user(self,uid):
|
||||||
|
t = self.t
|
||||||
|
print "Exploring uid %s" % uid
|
||||||
|
newdist = distance[uid]+ 1
|
||||||
|
try:
|
||||||
|
follos = t.followers.ids(user_id=uid)['ids']
|
||||||
|
except TwitterHTTPError as ex:
|
||||||
|
print "Error code %s" % ex.e.code
|
||||||
|
if ex.e.code == 401: # Private Twitter
|
||||||
|
return
|
||||||
|
raise ex
|
||||||
|
followers[uid]=follos
|
||||||
|
for follo in follos:
|
||||||
|
if follo not in followers.keys():
|
||||||
|
pending.add(follo)
|
||||||
|
distance[follo] = newdist
|
||||||
|
|
||||||
print args.new
|
if __name__ == "__main__":
|
||||||
print args.NAME
|
parser = argparse.ArgumentParser(description='Get an ego network for a given ID.')
|
||||||
|
|
||||||
sh = shelve.open('twits_shelf-%s.db' % args.NAME,writeback=True)
|
parser.add_argument('NAME', nargs='?', metavar='id',
|
||||||
keys=['followers','distance','userobject']
|
default='me', type=str, help='Name of the user to be used as a center')
|
||||||
|
|
||||||
for key in keys:
|
parser.add_argument('--new','-n',action='store_true', help='Start a new search.')
|
||||||
if key not in sh or args.new:
|
|
||||||
sh[key]={}
|
|
||||||
|
|
||||||
followers = sh['followers']
|
args = parser.parse_args()
|
||||||
distance = sh['distance']
|
|
||||||
pending = set()
|
|
||||||
userobject = sh['userobject']
|
|
||||||
|
|
||||||
t = Twitter(auth=OAuth(credentials.ACCESS_TOKEN,
|
print args.new
|
||||||
credentials.ACCESS_TOKEN_SECRET,
|
print args.NAME
|
||||||
credentials.CONSUMER_KEY,
|
|
||||||
credentials.CONSUMER_SECRET))
|
|
||||||
|
|
||||||
if not args.new:
|
sh = shelve.open('twits_shelf-%s.db' % args.NAME,writeback=True)
|
||||||
print 'Recovering state.'
|
keys=['followers','distance','userobject']
|
||||||
for key in followers:
|
|
||||||
for follower in followers[key]:
|
|
||||||
if follower not in followers:
|
|
||||||
pending.add(follower)
|
|
||||||
|
|
||||||
if args.new or len(pending)<1:
|
for key in keys:
|
||||||
if args.NAME != 'me':
|
if key not in sh or args.new:
|
||||||
user = t.users.lookup(screen_name=args.NAME)[0]
|
sh[key]={}
|
||||||
uid = user['id']
|
|
||||||
distance[uid] = 0
|
|
||||||
pending.add(uid)
|
|
||||||
else:
|
|
||||||
creds = t.account.verify_credentials()
|
|
||||||
myid = creds['id']
|
|
||||||
distance[myid]=0
|
|
||||||
pending.add(myid)
|
|
||||||
|
|
||||||
print 'Pending is now: %s' %pending
|
followers = sh['followers']
|
||||||
|
distance = sh['distance']
|
||||||
|
pending = set()
|
||||||
|
userobject = sh['userobject']
|
||||||
|
|
||||||
def getinfo(piece):
|
lock = threading.RLock()
|
||||||
piecestr = ",".join(map(str,piece)) if type(piece) != str and len(piece)>1 else piece[0]
|
|
||||||
look=t.users.lookup(user_id=piecestr)
|
|
||||||
retrievedids=[]
|
|
||||||
for ob in look:
|
|
||||||
uid=ob['id']
|
|
||||||
userobject[uid]=ob
|
|
||||||
retrievedids.append(uid)
|
|
||||||
print "Got info for %s" % repr(piece)
|
|
||||||
if type(piece) != int:
|
|
||||||
print "Total: %s" % len(piece)
|
|
||||||
print "Difference: %s" % [person for person in piece if person not in retrievedids]
|
|
||||||
else:
|
|
||||||
print "Total: 1"
|
|
||||||
|
|
||||||
def explore_user(t,uid):
|
t = Twitter(auth=OAuth(credentials.ACCESS_TOKEN,
|
||||||
print "Exploring uid %s" % uid
|
credentials.ACCESS_TOKEN_SECRET,
|
||||||
newdist = distance[uid]+ 1
|
credentials.CONSUMER_KEY,
|
||||||
try:
|
credentials.CONSUMER_SECRET))
|
||||||
follos = t.followers.ids(user_id=uid)['ids']
|
|
||||||
except TwitterHTTPError as ex:
|
|
||||||
print "Error code %s" % ex.e.code
|
|
||||||
if ex.e.code == 401: # Private Twitter
|
|
||||||
return
|
|
||||||
raise ex
|
|
||||||
followers[uid]=follos
|
|
||||||
for follo in follos:
|
|
||||||
if follo not in followers.keys():
|
|
||||||
pending.add(follo)
|
|
||||||
distance[follo] = newdist
|
|
||||||
|
|
||||||
def signal_handler(signal, frame):
|
if not args.new:
|
||||||
print 'You pressed Ctrl+C!'
|
print 'Recovering state.'
|
||||||
sh.close()
|
for key in followers:
|
||||||
sys.exit(0)
|
for follower in followers[key]:
|
||||||
signal.signal(signal.SIGINT, signal_handler)
|
if follower not in followers:
|
||||||
|
pending.add(follower)
|
||||||
|
|
||||||
while True:
|
if args.new or len(pending)<1:
|
||||||
try:
|
if args.NAME != 'me':
|
||||||
print "Iteration!"
|
user = t.users.lookup(screen_name=args.NAME)[0]
|
||||||
nextuser=None
|
uid = user['id']
|
||||||
minimumscore=1000
|
distance[uid] = 0
|
||||||
unknowns = [person for person in pending if person not in userobject]
|
pending.add(uid)
|
||||||
for i in xrange(0,int(math.ceil(len(unknowns)/100.0))):
|
else:
|
||||||
piece = unknowns[i*100:(i+1)*100]
|
creds = t.account.verify_credentials()
|
||||||
getinfo(piece)
|
myid = creds['id']
|
||||||
for user in pending.copy():
|
distance[myid]=0
|
||||||
userscore = userobject[user]['followers_count']**distance[user]
|
pending.add(myid)
|
||||||
if(userscore < minimumscore):
|
|
||||||
minimumscore = userscore
|
print 'Pending is now: %s' %pending
|
||||||
nextuser=user
|
|
||||||
explore_user(t,nextuser)
|
def signal_handler(signal, frame):
|
||||||
pending.remove(nextuser)
|
print 'You pressed Ctrl+C!'
|
||||||
sh.sync()
|
sh.close()
|
||||||
time.sleep(5)
|
sys.exit(0)
|
||||||
except TwitterHTTPError as ex:
|
signal.signal(signal.SIGINT, signal_handler)
|
||||||
print "Exception %s - %s" % (ex,type(ex))
|
f1 = Fetcher('f1',credentials)
|
||||||
if ex.e.code/10 == 42:
|
f1.run()
|
||||||
print "Sleeping for 1 minute"
|
|
||||||
time.sleep(60)
|
|
||||||
except IncompleteRead:
|
|
||||||
print "IncompleteRead!"
|
|
||||||
time.sleep(5)
|
|
||||||
except Exception as ex:
|
|
||||||
print "Exception"
|
|
||||||
sh.close()
|
|
||||||
raise ex
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user