diff --git a/repos/5a26850fd18725fc17d698fd6d09f1ba/scrape_aliexpress.py b/repos/5a26850fd18725fc17d698fd6d09f1ba/scrape_aliexpress.py new file mode 100644 index 0000000..a8532f3 --- /dev/null +++ b/repos/5a26850fd18725fc17d698fd6d09f1ba/scrape_aliexpress.py @@ -0,0 +1,108 @@ +from selenium import webdriver +from selenium.webdriver.support.ui import Select +from selenium.webdriver.common.by import By +from selenium.webdriver.support.ui import WebDriverWait +#import selenium.webdriver.firefox.webdriver as fwb +import selenium.webdriver.chrome as cwd +from selenium.webdriver.support import expected_conditions as EC + +import time +import os +import csv +from urllib import request +from urllib.error import HTTPError + +home = 'https://www.aliexpress.com/'; + +ATTRS = ['time', 'id', 'image', 'store', 'status', 'title', 'price', 'quantity'] + + +# ff_bin = fwb.FirefoxBinary(firefox_path='/usr/bin/firefox') +# ff_profile = fwb.FirefoxProfile() +options = cwd.options.Options() +options.binary_location = '/usr/bin/chromium' +options.add_argument('--lang=en') +options.add_argument('--user-data-dir=/tmp/aliexpress') +driver = webdriver.Chrome(chrome_options=options) +# driver.set_window_size(1024, 768) + +def login(): + # Login + driver.get(home) + try: + lb = driver.find_element_by_link_text('Go to Global Site (English)') + lb.click() + except: + print('Already in English') + pass + wait = WebDriverWait(driver, 10) + login = wait.until(EC.element_to_be_clickable((By.LINK_TEXT,'Sign in'))) + login.click() + try: + element = WebDriverWait(driver, 30).until(EC.presence_of_element_located((By.CLASS_NAME, 'account-name'))) + except Exception as ex: + print('My orders not found') + print(ex) + driver.quit() + exit() + +def gotoorders(): + wait = WebDriverWait(driver, 10) + element = driver.find_element_by_class_name('account-name') + element.click() + orders = wait.until(EC.element_to_be_clickable((By.LINK_TEXT,'My Orders'))) + orders.click() + orders = wait.until(EC.element_to_be_clickable((By.LINK_TEXT,'My Orders'))) + +def scrape_orders_page(): + ordertable = driver.find_element_by_id('buyer-ordertable') + for order in ordertable.find_elements_by_tag_name('tbody'): + # head = element.find_element_by_class_name('order-head') + # body = element.find_element_by_class_name('order-body') + title = order.find_element_by_class_name('product-title').text + status = order.find_element_by_class_name('order-status').text + time = order.find_element_by_xpath(".//span[text()[contains(.,'Order time')]]/following-sibling::span[1]").text + orderid = order.find_element_by_xpath(".//span[text()[contains(.,'Order ID')]]/following-sibling::span[1]").text + store = order.find_element_by_xpath(".//span[text()[contains(.,'Store name')]]/following-sibling::span[1]").text + for product in order.find_elements_by_class_name('product-sets'): + pamount = product.find_element_by_class_name('product-amount').find_elements_by_tag_name('span') + price, quantity = (elem.text for elem in pamount[:2]) + image = product.find_element_by_xpath(".//div[@class='product-left']/a/img").get_attribute('src').rsplit('_', 1)[0] + + imagefile = image.split('/')[-1] + if not os.path.exists(imagefile): + try: + request.urlretrieve(image, imagefile) + except HTTPError: + print('Couldn\'t downloaded image:', image) + yield {'time': time, + 'id': orderid, + 'image': image, + 'store': store, + 'status': status, + 'title': title, + 'price': price, + 'quantity': quantity} + + +def orders(): + gotoorders() + yield driver + yield from next_orders_page() + +def next_orders_page(): + lb = driver.find_element_by_link_text('Next') + lb.click() + WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.ID, 'buyer-ordertable'))) + yield driver + yield from next_orders_page() + +login() +from itertools import islice +with open('list.csv', 'w') as f: + fw = csv.DictWriter(f, ATTRS) + fw.writeheader() + for i in islice(orders(), None): + for element in scrape_orders_page(): + fw.writerow(element) +# driver.save_screenshot('screen.png')