-
Notifications
You must be signed in to change notification settings - Fork 0
/
scrape.py
59 lines (47 loc) · 1.58 KB
/
scrape.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import time
import mysql.connector
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
#setting up the connection and browser
#mysql
mydb = mysql.connector.connect(
host="localhost",
user="root",
passwd="root",
database="Economictimes"
)
mycursor = mydb.cursor()
#WebDriver
browser = webdriver.Chrome('C:/Users/HP/Downloads/chromedriver')
browser.get("https://economictimes.indiatimes.com/markets/stocks/recos")
time.sleep(1)
elem = browser.find_element_by_tag_name("body")
#scrolling down till required place
no_of_pagedowns = 50
while no_of_pagedowns:
elem.send_keys(Keys.PAGE_DOWN)
time.sleep(0.2)
no_of_pagedowns-=1
#scrapping data
heading = browser.find_elements_by_xpath("//div[@class='eachStory']/h3")
subheading = browser.find_elements_by_xpath("//div[@class='eachStory']/p")
'''
id_S = browser.find_elements_by_xpath("//div[@class='eachStory']/h3/a")
id_ = []
for j in id_S:
id_.append(j.get_attribute("data-orefid"))
'''
times = browser.find_elements_by_xpath("//div[@class='eachStory']/time")
storeTime = []
id_ = []
for j in times:
storeTime.append(j.get_attribute("data-time"))
check = j.get_attribute("data-time").replace(',','').replace(' ','').replace('MIST','').replace(':','')
id_.append(check)
#storing data
for i in range(len(heading)):
sql = "INSERT INTO articles (id, heading, subheading, button_text, storeTime ) VALUES (%s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE id=id"
val = (id_[i], heading[i].text,subheading[i].text, button, storeTime[i])
mycursor.execute(sql, val)
mydb.commit()
print("records inserted.")