mirror of
				https://github.com/balkian/gists.git
				synced 2025-10-30 23:28:27 +00:00 
			
		
		
		
	This commit is contained in:
		
							
								
								
									
										108
									
								
								gistfile1.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										108
									
								
								gistfile1.txt
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,108 @@ | ||||
| from selenium import webdriver | ||||
| from selenium.webdriver.support.ui import Select | ||||
| from selenium.webdriver.common.by import By | ||||
| from selenium.webdriver.support.ui import WebDriverWait | ||||
| #import selenium.webdriver.firefox.webdriver as fwb | ||||
| import selenium.webdriver.chrome as cwd | ||||
| from selenium.webdriver.support import expected_conditions as EC | ||||
|  | ||||
| import time | ||||
| import os | ||||
| import csv | ||||
| from urllib import request | ||||
| from urllib.error import HTTPError | ||||
|  | ||||
| home = 'https://www.aliexpress.com/'; | ||||
|  | ||||
| ATTRS = ['time', 'id', 'image', 'store', 'status', 'title', 'price', 'quantity'] | ||||
|  | ||||
|  | ||||
| # ff_bin = fwb.FirefoxBinary(firefox_path='/usr/bin/firefox') | ||||
| # ff_profile = fwb.FirefoxProfile() | ||||
| options = cwd.options.Options() | ||||
| options.binary_location = '/usr/bin/chromium' | ||||
| options.add_argument('--lang=en') | ||||
| options.add_argument('--user-data-dir=/tmp/aliexpress') | ||||
| driver = webdriver.Chrome(chrome_options=options) | ||||
| # driver.set_window_size(1024, 768) | ||||
|  | ||||
| def login(): | ||||
|     # Login | ||||
|     driver.get(home) | ||||
|     try: | ||||
|         lb = driver.find_element_by_link_text('Go to Global Site (English)') | ||||
|         lb.click() | ||||
|     except: | ||||
|         print('Already in English') | ||||
|         pass | ||||
|     wait = WebDriverWait(driver, 10) | ||||
|     login = wait.until(EC.element_to_be_clickable((By.LINK_TEXT,'Sign in'))) | ||||
|     login.click() | ||||
|     try: | ||||
|         element = WebDriverWait(driver, 30).until(EC.presence_of_element_located((By.CLASS_NAME, 'account-name'))) | ||||
|     except Exception as ex: | ||||
|         print('My orders not found') | ||||
|         print(ex) | ||||
|         driver.quit() | ||||
|         exit() | ||||
|  | ||||
| def gotoorders(): | ||||
|     wait = WebDriverWait(driver, 10) | ||||
|     element = driver.find_element_by_class_name('account-name') | ||||
|     element.click() | ||||
|     orders = wait.until(EC.element_to_be_clickable((By.LINK_TEXT,'My Orders'))) | ||||
|     orders.click() | ||||
|     orders = wait.until(EC.element_to_be_clickable((By.LINK_TEXT,'My Orders'))) | ||||
|  | ||||
| def scrape_orders_page(): | ||||
|     ordertable = driver.find_element_by_id('buyer-ordertable') | ||||
|     for order in ordertable.find_elements_by_tag_name('tbody'): | ||||
|         # head = element.find_element_by_class_name('order-head') | ||||
|         # body = element.find_element_by_class_name('order-body') | ||||
|         title = order.find_element_by_class_name('product-title').text | ||||
|         status = order.find_element_by_class_name('order-status').text | ||||
|         time = order.find_element_by_xpath(".//span[text()[contains(.,'Order time')]]/following-sibling::span[1]").text | ||||
|         orderid = order.find_element_by_xpath(".//span[text()[contains(.,'Order ID')]]/following-sibling::span[1]").text | ||||
|         store = order.find_element_by_xpath(".//span[text()[contains(.,'Store name')]]/following-sibling::span[1]").text | ||||
|         for product in order.find_elements_by_class_name('product-sets'): | ||||
|             pamount = product.find_element_by_class_name('product-amount').find_elements_by_tag_name('span') | ||||
|             price, quantity = (elem.text for elem in pamount[:2]) | ||||
|             image = product.find_element_by_xpath(".//div[@class='product-left']/a/img").get_attribute('src').rsplit('_', 1)[0] | ||||
|  | ||||
|             imagefile = image.split('/')[-1] | ||||
|             if not os.path.exists(imagefile): | ||||
|                 try: | ||||
|                     request.urlretrieve(image, imagefile) | ||||
|                 except HTTPError: | ||||
|                     print('Couldn\'t downloaded image:', image) | ||||
|             yield {'time': time, | ||||
|                 'id': orderid, | ||||
|                 'image': image, | ||||
|                 'store': store, | ||||
|                 'status': status, | ||||
|                 'title': title, | ||||
|                 'price': price, | ||||
|                 'quantity': quantity} | ||||
|  | ||||
|  | ||||
| def orders(): | ||||
|     gotoorders() | ||||
|     yield driver | ||||
|     yield from next_orders_page() | ||||
|  | ||||
| def next_orders_page(): | ||||
|     lb = driver.find_element_by_link_text('Next') | ||||
|     lb.click() | ||||
|     WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.ID, 'buyer-ordertable'))) | ||||
|     yield driver | ||||
|     yield from next_orders_page() | ||||
|  | ||||
| login() | ||||
| from itertools import islice | ||||
| with open('list.csv', 'w') as f: | ||||
|     fw = csv.DictWriter(f, ATTRS) | ||||
|     fw.writeheader() | ||||
|     for i in islice(orders(), None): | ||||
|         for element in scrape_orders_page(): | ||||
|             fw.writerow(element) | ||||
| # driver.save_screenshot('screen.png') | ||||
		Reference in New Issue
	
	Block a user