Skip to content

Commit

Permalink
Merge pull request #630 from stucka/hi-20240316
Browse files Browse the repository at this point in the history
Patch HI
  • Loading branch information
stucka authored Mar 16, 2024
2 parents f22a891 + e3d71df commit ff030e4
Showing 1 changed file with 8 additions and 2 deletions.
10 changes: 8 additions & 2 deletions warn/scrapers/hi.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def scrape(

firstpage = utils.get_url(firstpageurl)
soup = BeautifulSoup(firstpage.text, features="html5lib")
pagesection = soup.select("div.primary-content")[0]
pagesection = soup.select("div#container_main")[0]
subpageurls = []
for atag in pagesection.find_all("a"):
href = atag["href"]
Expand Down Expand Up @@ -121,7 +121,13 @@ def scrape(

# Before 2024, the a href contained the company name. In 2024, it's the date.
if line["Company"] == tempdate:
line["Company"] = row.get_text().strip().replace(tempdate, '').replace('–', '').strip()
line["Company"] = (
row.get_text()
.strip()
.replace(tempdate, "")
.replace("–", "")
.strip()
)
masterlist.append(line)

if len(masterlist) == 0:
Expand Down

0 comments on commit ff030e4

Please sign in to comment.