1
0
mirror of https://github.com/balkian/gists.git synced 2024-11-22 01:32:29 +00:00
gists/group_pictures_UPM/group_pictures_UPM.py

57 lines
1.6 KiB
Python
Raw Normal View History

2013-07-30 15:13:28 +00:00
import mechanize, cookielib
from BeautifulSoup import BeautifulSoup
import re, os
2013-07-30 15:27:43 +00:00
from zipfile import ZipFile
2013-07-30 15:13:28 +00:00
def get_image(tag, folder):
if not os.path.isdir(folder):
os.makedirs(folder)
print image
filename = os.path.join(folder+'/'+url.split('/')[-1])
data = br.open(url).read()
br.back()
save = open(filename, 'wb')
save.write(data)
save.close()
2013-07-30 15:27:43 +00:00
return filename
2013-07-30 15:13:28 +00:00
br=mechanize.Browser()
cj = cookielib.LWPCookieJar()
br.set_cookiejar(cj)
br.set_handle_equiv(True)
br.set_handle_gzip(True)
br.set_handle_redirect(True)
br.set_handle_referer(True)
br.set_handle_robots(False)
br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
2013-07-30 15:27:43 +00:00
tozip = [];
2013-07-30 15:13:28 +00:00
#Individuales
r = br.open('http://www.etsit.upm.es/fotospromo85.html')
br.select_form(nr=0)
br.form['user']='entregadiplomas'
br.form['pass']='entrega2013'
r = br.submit()
r_tags = BeautifulSoup(r)
link = r_tags(text=re.compile(r'individual'))[0].parent['href']
galery = BeautifulSoup(br.open(link))
images_tags = galery(href=re.compile(r'pics/.{10}.jpg'))
folder = 'individuales'
for image in images_tags:
url = image['href']
2013-07-30 15:27:43 +00:00
tozip.append(get_image(url, folder))
2013-07-30 15:13:28 +00:00
# Ahora las de grupo
link=r_tags(text=re.compile(r'grupo'))[0].parent['href']
galery = BeautifulSoup(br.open(link))
images_tags = galery(href=re.compile(r'pics/.{10}.jpg'))
folder = 'grupo'
for image in images_tags:
url = image['href']
2013-07-30 15:27:43 +00:00
tozip.append(get_image(url, folder))
with ZipFile('fotos-85.zip', 'w') as myzip:
for i in tozip:
myzip.write(i)