Linting pre biglocalnews#625

stucka · Mar 16, 2024 · 2174e90 · 2174e90
1 parent c57039d
commit 2174e90
Show file tree

Hide file tree

Showing 7 changed files with 6 additions and 17 deletions.
diff --git a/warn/scrapers/ca.py b/warn/scrapers/ca.py
@@ -11,7 +11,7 @@
 from .. import utils
 from ..cache import Cache
 
-__authors__ = ["zstumgoren", "Dilcia19", "ydoc5212", "stucka"]
+__authors__ = ["zstumgoren", "Dilcia19", "ydoc5212"]
 __tags__ = ["html", "pdf", "excel"]
 __source__ = {
     "name": "California Employment Development Department",

diff --git a/warn/scrapers/mo.py b/warn/scrapers/mo.py
@@ -48,7 +48,6 @@ def scrape(
     # Download them all
     html_list = []
     for year in year_range:
-
         # Set the URL, with a hack for 2020 and 2022
         url = f"https://jobs.mo.gov/warn/{year}"
 

diff --git a/warn/scrapers/nj.py b/warn/scrapers/nj.py
@@ -42,7 +42,6 @@ def scrape(
     for ws in wb.worksheets:
         logger.debug(f"Parsing {ws}")
         for i, row in enumerate(ws.rows):
-
             # Skip header
             if i == 0:
                 continue

diff --git a/warn/scrapers/or.py b/warn/scrapers/or.py
@@ -73,17 +73,15 @@ def scrape(
     r = requests.post(starturl, cookies=cookies, data=payload, headers=requestheaders)
 
     dlsoup = BeautifulSoup(r.content, features="html5lib")
-    excelurl = (
-        baseurl + dlsoup.find("a", {"target": "_blank", "class": "btn-primary"})["href"]
-    )
+    excelurl = baseurl + dlsoup.find("a", {"class": "btn-primary"})["href"]
     logger.debug(f"Found latest data's URL at {excelurl}")
     if not excelurl:
         logger.error("No URL could be found for the newest spreadsheet.")
     latest_excel_path = "or/latest.xlsx"
     logger.debug(f"Trying to save to, we hope, {cache_dir/latest_excel_path}")
     cache.download(latest_excel_path, excelurl)
 
-    workbook = load_workbook(filename=cache_dir/latest_excel_path)
+    workbook = load_workbook(filename=cache_dir / latest_excel_path)
     worksheet = workbook.worksheets[0]
 
     masterlist: list = []

diff --git a/warn/scrapers/sc.py b/warn/scrapers/sc.py
@@ -89,10 +89,8 @@ def scrape(
 
         # Open the PDF
         with pdfplumber.open(pdf_path) as pdf:
-
             # Loop through the pages
             for page in pdf.pages:
-
                 # Pull out the table
                 row_list = page.extract_table()
 
@@ -110,7 +108,6 @@ def scrape(
 
                 # Loop through each row in the table
                 for row in real_rows:
-
                     # Clean values
                     cell_list = [_clean_cell(c) for c in row if _clean_cell(c)]
 

diff --git a/warn/scrapers/tn.py b/warn/scrapers/tn.py
@@ -64,7 +64,6 @@ def scrape(
 
     # Loop through them all, skipping the first item, which is a header
     for data in data_list[1:]:
-
         # splitting the data on its delimiter
         items = str(data).split("|")
 
@@ -109,10 +108,8 @@ def scrape(
 
     # Open the PDF
     with pdfplumber.open(pdf_file) as pdf:
-
         # Loop through all the pages
         for i, my_page in enumerate(pdf.pages):
-
             # Sll even pages have data, odd pages don't have the data
             if i % 2 != 0:
                 continue
@@ -135,7 +132,6 @@ def scrape(
 
             # Loop through all the rows ...
             for row in row_list:
-
                 # Skip remove redundant headers
                 if row[0] in pdf_header_blacklist:
                     continue

diff --git a/warn/scrapers/tx.py b/warn/scrapers/tx.py
@@ -44,7 +44,9 @@ def scrape(
 
     # Get all the Excel links
     soup = BeautifulSoup(page.text, "html5lib")
-    link_list = soup.find_all("a", href=re.compile("^/sites/default/files/oei/docs/warn-act-listings-"))
+    link_list = soup.find_all(
+        "a", href=re.compile("^/sites/default/files/oei/docs/warn-act-listings-")
+    )
     logger.debug(f"{len(link_list):,} spreadsheet links found")
 
     # Clean up the links and filter 'em down
@@ -60,7 +62,6 @@ def scrape(
     # Loop through the links we want to download
     row_list = []
     for ihref, href in enumerate(href_list):
-
         # get each url from the HTML links we found
         data_url = f"https://www.twc.texas.gov{href}"
 
@@ -77,7 +78,6 @@ def scrape(
 
         # Convert the sheet to a list of lists
         for irow, row in enumerate(worksheet.rows):
-
             # Skip headers after the first workbook
             if ihref > 0 and irow == 0:
                 continue