mirror of
				https://github.com/balkian/bitter.git
				synced 2025-11-04 01:28:17 +00:00 
			
		
		
		
	Fix bug user_ids
Add number of failed downloads to the output. Add flag to retry previously failed downloads.
This commit is contained in:
		@@ -1 +1 @@
 | 
			
		||||
0.9.2
 | 
			
		||||
0.9.3
 | 
			
		||||
 
 | 
			
		||||
@@ -85,14 +85,15 @@ def get_tweet(tweetid, write, folder, update):
 | 
			
		||||
The result is stored as individual json files in your folder of choice.''')
 | 
			
		||||
@click.argument('tweetsfile', 'File with a list of tweets to look up')
 | 
			
		||||
@click.option('-f', '--folder', default="tweets")
 | 
			
		||||
@click.option('-u', '--update', is_flag=True, default=False, help='Download user even if it is already present. WARNING: it will overwrite existing files!')
 | 
			
		||||
@click.option('-u', '--update', is_flag=True, default=False, help='Download tweet even if it is already present. WARNING: it will overwrite existing files!')
 | 
			
		||||
@click.option('-r', '--retry', is_flag=True, default=False, help='Retry failed downloads')
 | 
			
		||||
@click.option('-d', '--delimiter', default=",")
 | 
			
		||||
@click.option('-h', '--header', help='Discard the first line (use it as a header)',
 | 
			
		||||
              is_flag=True, default=False)
 | 
			
		||||
@click.option('-q', '--quotechar', default='"')
 | 
			
		||||
@click.option('-c', '--column', type=int, default=0)
 | 
			
		||||
@click.pass_context
 | 
			
		||||
def get_tweets(ctx, tweetsfile, folder, update, delimiter, header, quotechar, column):
 | 
			
		||||
def get_tweets(ctx, tweetsfile, folder, update, retry, delimiter, header, quotechar, column):
 | 
			
		||||
    if update and not click.confirm('This may overwrite existing tweets. Continue?'):
 | 
			
		||||
        click.echo('Cancelling')
 | 
			
		||||
        return
 | 
			
		||||
@@ -100,7 +101,7 @@ def get_tweets(ctx, tweetsfile, folder, update, delimiter, header, quotechar, co
 | 
			
		||||
    for i in utils.download_file(wq, tweetsfile, folder, delimiter=delimiter,
 | 
			
		||||
                                 batch_method=utils.tweet_download_batch,
 | 
			
		||||
                                 header=header, quotechar=quotechar,
 | 
			
		||||
                                 column=column, update=update):
 | 
			
		||||
                                 column=column, update=update, retry_failed=retry):
 | 
			
		||||
        pass
 | 
			
		||||
 | 
			
		||||
@tweet.command('search')
 | 
			
		||||
@@ -163,20 +164,21 @@ def get_user(user, write, folder, update):
 | 
			
		||||
@click.argument('usersfile', 'File with a list of users to look up')
 | 
			
		||||
@click.option('-f', '--folder', default="users")
 | 
			
		||||
@click.option('-u', '--update', is_flag=True, default=False, help='Download user even if it is already present. WARNING: it will overwrite existing files!')
 | 
			
		||||
@click.option('-r', '--retry', is_flag=True, default=False, help='Retry failed downloads')
 | 
			
		||||
@click.option('-d', '--delimiter', default=",")
 | 
			
		||||
@click.option('-h', '--header', help='Discard the first line (use it as a header)',
 | 
			
		||||
              is_flag=True, default=False)
 | 
			
		||||
@click.option('-q', '--quotechar', default='"')
 | 
			
		||||
@click.option('-c', '--column', type=int, default=0)
 | 
			
		||||
@click.pass_context
 | 
			
		||||
def get_users(ctx, usersfile, folder, update, delimiter, header, quotechar, column):
 | 
			
		||||
def get_users(ctx, usersfile, folder, update, retry, delimiter, header, quotechar, column):
 | 
			
		||||
    if update and not click.confirm('This may overwrite existing users. Continue?'):
 | 
			
		||||
        click.echo('Cancelling')
 | 
			
		||||
        return
 | 
			
		||||
    wq = crawlers.TwitterQueue.from_config(conffile=bconf.CONFIG_FILE)
 | 
			
		||||
    for i in utils.download_file(wq, usersfile, folder, delimiter=delimiter,
 | 
			
		||||
                                 batch_method=utils.user_download_batch,
 | 
			
		||||
                                 update=update,
 | 
			
		||||
                                 update=update, retry_failed=retry,
 | 
			
		||||
                                 header=header, quotechar=quotechar,
 | 
			
		||||
                                 column=column):
 | 
			
		||||
        pass
 | 
			
		||||
 
 | 
			
		||||
@@ -518,12 +518,22 @@ def user_download_batch(wq, batch):
 | 
			
		||||
            user_ids.append(str(elem))
 | 
			
		||||
        except ValueError:
 | 
			
		||||
            screen_names.append(elem.lower())
 | 
			
		||||
    print('Downloading: {} - {}'.format(user_ids, screen_names))
 | 
			
		||||
    users = wq.users.lookup(user_id=",".join(user_ids), screen_name=",".join(screen_names))
 | 
			
		||||
    args = {}
 | 
			
		||||
    if user_ids:
 | 
			
		||||
        args['user_id'] = ','.join(user_ids)
 | 
			
		||||
    if screen_names:
 | 
			
		||||
        args['screen_name'] = ','.join(screen_names)
 | 
			
		||||
    try:
 | 
			
		||||
        users = wq.users.lookup(**args)
 | 
			
		||||
    except TwitterHTTPError as ex:
 | 
			
		||||
        if ex.e.code in (404,):
 | 
			
		||||
            users = []
 | 
			
		||||
        else:
 | 
			
		||||
            raise
 | 
			
		||||
    found_ids = []
 | 
			
		||||
    found_names = []
 | 
			
		||||
    for user in users:
 | 
			
		||||
        uid = user['id']
 | 
			
		||||
        uid = user['id_str']
 | 
			
		||||
        if uid in user_ids:
 | 
			
		||||
            found_ids.append(uid)
 | 
			
		||||
            yield (uid, user)
 | 
			
		||||
@@ -552,6 +562,9 @@ def download_list(wq, lst, folder, update=False, retry_failed=False, ignore_fail
 | 
			
		||||
          if obj:
 | 
			
		||||
              try:
 | 
			
		||||
                  write_json(obj, folder=folder, oid=oid)
 | 
			
		||||
                  failed = fail_file(oid, folder)
 | 
			
		||||
                  if os.path.exists(failed):
 | 
			
		||||
                      os.remove(failed)
 | 
			
		||||
                  yield 1
 | 
			
		||||
              except Exception as ex:
 | 
			
		||||
                  logger.error('%s: %s' % (oid, ex))
 | 
			
		||||
@@ -565,8 +578,13 @@ def download_list(wq, lst, folder, update=False, retry_failed=False, ignore_fail
 | 
			
		||||
 | 
			
		||||
    objects_to_crawl = filter(lambda x: x is not None, tqdm(parallel(filter_lines, lst), desc='Total objects'))
 | 
			
		||||
    batch_method = partial(batch_method, wq)
 | 
			
		||||
    tweets = parallel(batch_method, objects_to_crawl, 100)
 | 
			
		||||
    for res in tqdm(parallel(print_result, tweets), desc='Queried'):
 | 
			
		||||
    objects = parallel(batch_method, objects_to_crawl, 100)
 | 
			
		||||
    failed = 0
 | 
			
		||||
    pbar = tqdm(parallel(print_result, objects), desc='Queried')
 | 
			
		||||
    for res in pbar:
 | 
			
		||||
        if res < 0:
 | 
			
		||||
            failed += 1
 | 
			
		||||
            pbar.set_description('Failed: %s. Queried' % failed, refresh=True)
 | 
			
		||||
        yield res
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user