forked from yasharya2901/Python-Price-Scrapper
-
Notifications
You must be signed in to change notification settings - Fork 0
/
amazon.py
73 lines (54 loc) · 2.58 KB
/
amazon.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
from bs4 import BeautifulSoup
import requests
import sys
header = {'User-Agent':'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:120.0) Gecko/20100101 Firefox/120.0',
'Accept-Language':'en-US,en;q=0.5',
'Sec-Fetch-Dest':'document',
'Sec-Fetch-Mode':'navigate',
'Sec-Fetch-Site':'same-origin',
'Sec-Fetch-User':'?1',
'Upgrade-Insecure-Requests':'1'
}
class amazon:
def __init__(self, url):
self.url = url
request_status_code = requests.get(url, headers=header).status_code
if request_status_code != 200:
sys.exit(f"Unable to get the page. Error code: {request_status_code}")
html_text = requests.get(url, headers=header).text
soup = BeautifulSoup(html_text, 'lxml')
product_html_element = soup.find('span', id='productTitle')
if self.__check_if_product_exists(product_html_element):
self.name = product_html_element.text.strip()
else:
sys.exit("Unable to get the product. Please check the URL and try again.")
self.price = soup.find('span', class_='a-price-whole').text
def __check_if_product_exists(self, soup):
if soup is None:
return False
else:
return True
def print_product_info(self):
print("Amazon")
print(f"Product Name: {self.name}")
print(f"Product Price: Rs. {self.price}")
print("-----------------------------------------------------------------------------------------")
@staticmethod
def search_item(prod_name):
prod_name = prod_name.replace(" ", "+")
url = "https://www.amazon.in/s?k=" + prod_name
request_status_code = requests.get(url, headers=header).status_code
if request_status_code != 200:
sys.exit(f"Unable to get the page. Error code: {request_status_code}")
html_text = requests.get(url, headers=header).text
soup = BeautifulSoup(html_text, 'lxml')
href_attr = soup.find('a', class_="a-link-normal s-underline-text s-underline-link-text s-link-style a-text-normal")
link = ""
if (not href_attr):
print('''We were unable to find the product on Amazon. Please paste the link of the product if you have any. Else type "exit"''')
link = input("> ")
return link
if (link == "exit"):
return link
link = "https://www.amazon.in" + href_attr['href']
return link