From 43d0d4b46a33e49d8d2251d489fdf3b9c6a1a04d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fernando=20S=C3=A1nchez?= Date: Tue, 30 Jul 2013 08:13:28 -0700 Subject: [PATCH 1/2] --- group_pictures_UPM.py | 48 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 group_pictures_UPM.py diff --git a/group_pictures_UPM.py b/group_pictures_UPM.py new file mode 100644 index 0000000..1ccb808 --- /dev/null +++ b/group_pictures_UPM.py @@ -0,0 +1,48 @@ +import mechanize, cookielib +from BeautifulSoup import BeautifulSoup +import re, os + +def get_image(tag, folder): + if not os.path.isdir(folder): + os.makedirs(folder) + print image + filename = os.path.join(folder+'/'+url.split('/')[-1]) + data = br.open(url).read() + br.back() + save = open(filename, 'wb') + save.write(data) + save.close() + +br=mechanize.Browser() +cj = cookielib.LWPCookieJar() +br.set_cookiejar(cj) +br.set_handle_equiv(True) +br.set_handle_gzip(True) +br.set_handle_redirect(True) +br.set_handle_referer(True) +br.set_handle_robots(False) +br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')] + +#Individuales +r = br.open('http://www.etsit.upm.es/fotospromo85.html') +br.select_form(nr=0) +br.form['user']='entregadiplomas' +br.form['pass']='entrega2013' +r = br.submit() +r_tags = BeautifulSoup(r) +link = r_tags(text=re.compile(r'individual'))[0].parent['href'] +galery = BeautifulSoup(br.open(link)) +images_tags = galery(href=re.compile(r'pics/.{10}.jpg')) +folder = 'individuales' +for image in images_tags: + url = image['href'] + get_image(url, folder) + +# Ahora las de grupo +link=r_tags(text=re.compile(r'grupo'))[0].parent['href'] +galery = BeautifulSoup(br.open(link)) +images_tags = galery(href=re.compile(r'pics/.{10}.jpg')) +folder = 'grupo' +for image in images_tags: + url = image['href'] + get_image(url, folder) \ No newline at end of file From 543d7ca50ad60fc2a2f59e674767341df1a7ff6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fernando=20S=C3=A1nchez?= Date: Tue, 30 Jul 2013 08:27:43 -0700 Subject: [PATCH 2/2] --- group_pictures_UPM.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/group_pictures_UPM.py b/group_pictures_UPM.py index 1ccb808..de0b9b5 100644 --- a/group_pictures_UPM.py +++ b/group_pictures_UPM.py @@ -1,6 +1,7 @@ import mechanize, cookielib from BeautifulSoup import BeautifulSoup import re, os +from zipfile import ZipFile def get_image(tag, folder): if not os.path.isdir(folder): @@ -12,6 +13,8 @@ def get_image(tag, folder): save = open(filename, 'wb') save.write(data) save.close() + return filename + br=mechanize.Browser() cj = cookielib.LWPCookieJar() @@ -23,6 +26,8 @@ br.set_handle_referer(True) br.set_handle_robots(False) br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')] +tozip = []; + #Individuales r = br.open('http://www.etsit.upm.es/fotospromo85.html') br.select_form(nr=0) @@ -36,7 +41,7 @@ images_tags = galery(href=re.compile(r'pics/.{10}.jpg')) folder = 'individuales' for image in images_tags: url = image['href'] - get_image(url, folder) + tozip.append(get_image(url, folder)) # Ahora las de grupo link=r_tags(text=re.compile(r'grupo'))[0].parent['href'] @@ -45,4 +50,8 @@ images_tags = galery(href=re.compile(r'pics/.{10}.jpg')) folder = 'grupo' for image in images_tags: url = image['href'] - get_image(url, folder) \ No newline at end of file + tozip.append(get_image(url, folder)) + +with ZipFile('fotos-85.zip', 'w') as myzip: + for i in tozip: + myzip.write(i) \ No newline at end of file