-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscrape.py
142 lines (111 loc) · 4.1 KB
/
scrape.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import json
from time import sleep
import requests
from selenium import webdriver
from product import Product
amazon_URL = "https://www.amazon.co.uk/"
exchange_rate_url = ""
valid_currency = ["USD", "EUR", "GBP", "SEK"]
def get_exchange_rate(user_choice_currency):
global exchange_rate_url
if user_choice_currency in valid_currency:
exchange_rate_url = 'https://api.exchangerate-api.com/v4/latest/GBP'
response = requests.get(exchange_rate_url)
return response.json()['rates'][user_choice_currency]
else:
raise Exception("Invalid currency")
# Setup browser
options = webdriver.FirefoxOptions()
options.add_argument('--headless')
driver = webdriver.Firefox(options=options)
search_term = str(input("Search: "))
currency = str(input("Which currency do you want?\n USD, EUR, GBP, SEK\n :"))
currency_choice = get_exchange_rate(str(currency).upper())
driver.get(amazon_URL)
search_element = driver.find_element_by_css_selector('#twotabsearchtextbox')
search_element.send_keys(search_term)
search_element.submit()
products = []
page = 1
def convert_string_price_to_float(price_text):
product_price = price_text.split("£")[1]
try:
product_price = product_price.split("\n")[0] + "." + product_price.split("\n")[1]
except:
Exception()
try:
product_price = product_price.split(",")[0] + product_price.split(",")[1]
except:
Exception()
return round(float(product_price) * currency_choice)
while True:
# Sleep 0.5 seconds to not put to much stress on the website
sleep(0.5)
if page != 1:
try:
driver.get(driver.current_url + '&page=' + str(page))
except:
raise Exception("Could not change page")
# Loop through every item in page x.
for i in driver.find_elements_by_css_selector('.s-result-list'):
counter = 0
# For every element in page x
for element in i.find_elements_by_css_selector('.s-result-item'):
should_add = True
name = ""
price = ""
prev_price = ""
link = ""
try:
name = i.find_elements_by_tag_name('h2')[counter].text
price = convert_string_price_to_float(element.find_element_by_class_name('a-price').text)
link = i.find_elements_by_xpath('//h2/a')[counter].get_attribute("href")
try:
prev_price = convert_string_price_to_float(element.find_element_by_class_name('a-text-price').text)
except:
Exception()
prev_price = price
except:
should_add = False
product = Product(name, price, prev_price, link)
if should_add:
products.append(product)
counter += 1
print("Done scraping page " + str(page))
page += 1
if page >= 13:
break
biggest_discount = 0.0
lowest_price = 0.0
discount = 0.0
cheapest_product = Product("", "", "", "")
best_deal_product = Product("", "", "", "")
search_terms = search_term.split(" ")
run = 0
for product in products:
correct_product = True
for word in search_terms:
if word.lower() not in product.name.lower():
correct_product = False
if correct_product:
if run == 0:
lowest_price = product.price
cheapest_product = product
run = 1
elif product.price < lowest_price:
lowest_price = product.price
cheapest_product = product
else:
discount = product.prev_price - product.price
if discount > biggest_discount:
biggest_discount = discount
best_deal_product = product
with open('products.json', 'w') as json_file:
data = {"Products": []}
for prod in products:
data["Products"].append(prod.serialize())
json.dump(data, json_file, sort_keys=True, indent=4)
print(json.dumps(cheapest_product.serialize(), sort_keys=True, indent=4))
print(json.dumps(best_deal_product.serialize(), sort_keys=True, indent=4))
driver = webdriver.Firefox()
driver.get(best_deal_product.link)