mirror of
https://github.com/balkian/gists.git
synced 2024-11-01 08:01:44 +00:00
109 lines
4.0 KiB
Python
109 lines
4.0 KiB
Python
|
from selenium import webdriver
|
||
|
from selenium.webdriver.support.ui import Select
|
||
|
from selenium.webdriver.common.by import By
|
||
|
from selenium.webdriver.support.ui import WebDriverWait
|
||
|
#import selenium.webdriver.firefox.webdriver as fwb
|
||
|
import selenium.webdriver.chrome as cwd
|
||
|
from selenium.webdriver.support import expected_conditions as EC
|
||
|
|
||
|
import time
|
||
|
import os
|
||
|
import csv
|
||
|
from urllib import request
|
||
|
from urllib.error import HTTPError
|
||
|
|
||
|
home = 'https://www.aliexpress.com/';
|
||
|
|
||
|
ATTRS = ['time', 'id', 'image', 'store', 'status', 'title', 'price', 'quantity']
|
||
|
|
||
|
|
||
|
# ff_bin = fwb.FirefoxBinary(firefox_path='/usr/bin/firefox')
|
||
|
# ff_profile = fwb.FirefoxProfile()
|
||
|
options = cwd.options.Options()
|
||
|
options.binary_location = '/usr/bin/chromium'
|
||
|
options.add_argument('--lang=en')
|
||
|
options.add_argument('--user-data-dir=/tmp/aliexpress')
|
||
|
driver = webdriver.Chrome(chrome_options=options)
|
||
|
# driver.set_window_size(1024, 768)
|
||
|
|
||
|
def login():
|
||
|
# Login
|
||
|
driver.get(home)
|
||
|
try:
|
||
|
lb = driver.find_element_by_link_text('Go to Global Site (English)')
|
||
|
lb.click()
|
||
|
except:
|
||
|
print('Already in English')
|
||
|
pass
|
||
|
wait = WebDriverWait(driver, 10)
|
||
|
login = wait.until(EC.element_to_be_clickable((By.LINK_TEXT,'Sign in')))
|
||
|
login.click()
|
||
|
try:
|
||
|
element = WebDriverWait(driver, 30).until(EC.presence_of_element_located((By.CLASS_NAME, 'account-name')))
|
||
|
except Exception as ex:
|
||
|
print('My orders not found')
|
||
|
print(ex)
|
||
|
driver.quit()
|
||
|
exit()
|
||
|
|
||
|
def gotoorders():
|
||
|
wait = WebDriverWait(driver, 10)
|
||
|
element = driver.find_element_by_class_name('account-name')
|
||
|
element.click()
|
||
|
orders = wait.until(EC.element_to_be_clickable((By.LINK_TEXT,'My Orders')))
|
||
|
orders.click()
|
||
|
orders = wait.until(EC.element_to_be_clickable((By.LINK_TEXT,'My Orders')))
|
||
|
|
||
|
def scrape_orders_page():
|
||
|
ordertable = driver.find_element_by_id('buyer-ordertable')
|
||
|
for order in ordertable.find_elements_by_tag_name('tbody'):
|
||
|
# head = element.find_element_by_class_name('order-head')
|
||
|
# body = element.find_element_by_class_name('order-body')
|
||
|
title = order.find_element_by_class_name('product-title').text
|
||
|
status = order.find_element_by_class_name('order-status').text
|
||
|
time = order.find_element_by_xpath(".//span[text()[contains(.,'Order time')]]/following-sibling::span[1]").text
|
||
|
orderid = order.find_element_by_xpath(".//span[text()[contains(.,'Order ID')]]/following-sibling::span[1]").text
|
||
|
store = order.find_element_by_xpath(".//span[text()[contains(.,'Store name')]]/following-sibling::span[1]").text
|
||
|
for product in order.find_elements_by_class_name('product-sets'):
|
||
|
pamount = product.find_element_by_class_name('product-amount').find_elements_by_tag_name('span')
|
||
|
price, quantity = (elem.text for elem in pamount[:2])
|
||
|
image = product.find_element_by_xpath(".//div[@class='product-left']/a/img").get_attribute('src').rsplit('_', 1)[0]
|
||
|
|
||
|
imagefile = image.split('/')[-1]
|
||
|
if not os.path.exists(imagefile):
|
||
|
try:
|
||
|
request.urlretrieve(image, imagefile)
|
||
|
except HTTPError:
|
||
|
print('Couldn\'t downloaded image:', image)
|
||
|
yield {'time': time,
|
||
|
'id': orderid,
|
||
|
'image': image,
|
||
|
'store': store,
|
||
|
'status': status,
|
||
|
'title': title,
|
||
|
'price': price,
|
||
|
'quantity': quantity}
|
||
|
|
||
|
|
||
|
def orders():
|
||
|
gotoorders()
|
||
|
yield driver
|
||
|
yield from next_orders_page()
|
||
|
|
||
|
def next_orders_page():
|
||
|
lb = driver.find_element_by_link_text('Next')
|
||
|
lb.click()
|
||
|
WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.ID, 'buyer-ordertable')))
|
||
|
yield driver
|
||
|
yield from next_orders_page()
|
||
|
|
||
|
login()
|
||
|
from itertools import islice
|
||
|
with open('list.csv', 'w') as f:
|
||
|
fw = csv.DictWriter(f, ATTRS)
|
||
|
fw.writeheader()
|
||
|
for i in islice(orders(), None):
|
||
|
for element in scrape_orders_page():
|
||
|
fw.writerow(element)
|
||
|
# driver.save_screenshot('screen.png')
|