You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
48 lines
1.6 KiB
Python
48 lines
1.6 KiB
Python
import sys
|
|
import os
|
|
import os.path
|
|
import sqlite3
|
|
|
|
|
|
def addusers(c, ufrom, uto):
|
|
# Insert a row of data
|
|
c.execute("INSERT OR IGNORE INTO followers VALUES (?, ?)", [ufrom, uto])
|
|
|
|
|
|
def update_progress(i, current, total):
|
|
sys.stdout.write("\033[F") #back to previous line
|
|
sys.stdout.write("\033[K") #clear line
|
|
print('Done: {} lines. {} / {} bytes ({} %)'.format(i, current, total, 100.0*current/float(total)))
|
|
|
|
|
|
def main(infile):
|
|
conn = sqlite3.connect('%s.py.db' % os.path.basename(infile))
|
|
# Create table
|
|
conn.execute('''CREATE TABLE IF NOT EXISTS followers
|
|
(user int, follower int)''')
|
|
conn.execute("CREATE UNIQUE INDEX IF NOT EXISTS followersindex ON followers(user, follower) ")
|
|
conn.execute("CREATE INDEX IF NOT EXISTS followersindex_follower ON followers(follower) ")
|
|
conn.execute("CREATE INDEX IF NOT EXISTS followersindex_user ON followers(user) ")
|
|
|
|
with open(infile) as f:
|
|
total = os.fstat(f.fileno()).st_size
|
|
for i, line in enumerate(f):
|
|
tokens = line.strip().split('\t')
|
|
if len(tokens) != 2:
|
|
print('Wrong line: ', i, tokens)
|
|
continue
|
|
addusers(conn, tokens[0], tokens[1])
|
|
|
|
if i % 10000 == 0:
|
|
conn.commit()
|
|
update_progress(i, f.tell(), total)
|
|
# Save (commit) the changes
|
|
conn.commit()
|
|
|
|
# We can also close the connection if we are done with it.
|
|
# Just be sure any changes have been committed or they will be lost.
|
|
conn.close()
|
|
update_progress(i, total, total)
|
|
|
|
if __name__ == '__main__':
|
|
main(sys.argv[1]) |