From 88b038d0f66eff8c111c0d1f97a593a0c263895e Mon Sep 17 00:00:00 2001
From: palewire <b@palewi.re>
Date: Tue, 8 Mar 2022 15:18:14 -0800
Subject: [PATCH] Update NE to use the newer URL

---
 warn/scrapers/ne.py | 31 +++++++++++++++++++++++++------
 1 file changed, 25 insertions(+), 6 deletions(-)

diff --git a/warn/scrapers/ne.py b/warn/scrapers/ne.py
index 78424816..a3fd0523 100644
--- a/warn/scrapers/ne.py
+++ b/warn/scrapers/ne.py
@@ -1,5 +1,4 @@
 import logging
-from datetime import datetime
 from pathlib import Path
 
 from bs4 import BeautifulSoup
@@ -29,11 +28,31 @@ def scrape(
     # Open the cache
     cache = Cache(cache_dir)
 
-    current_year = datetime.now().year
-    year_range = range(2010, current_year + 1)
+    # Get data from active page
+    active_url = "https://dol.nebraska.gov/ReemploymentServices/LayoffServices/LayoffsAndDownsizingWARN"
+    active_r = utils.get_url(active_url)
+    active_html = active_r.text
+    cache.write("ne/active.html", active_html)
+
+    soup = BeautifulSoup(active_html, "html5lib")
+    table_list = soup.find_all("table")
+    assert len(table_list) == 1
 
-    # Scrape rows
     output_rows = []
+    for row in table_list[0].find_all("tr")[1:]:
+        cell_list = row.find_all("td")
+        d = {
+            "Date": cell_list[0].text.strip(),
+            "Company": cell_list[1].text.strip(),
+            "Jobs Affected": cell_list[2].text.strip(),
+            "Location": cell_list[3].text.strip(),
+        }
+        output_rows.append(d)
+
+    # Get archived data
+    year_range = range(2010, 2020)
+
+    # Scrape archived rows
     for year in year_range:
         # Get WARN page
         warn_url = (
@@ -42,7 +61,7 @@ def scrape(
         warn_key = f"ne/warn-{year}.html"
 
         # Read from cache if available and not this year or the year before
-        if cache.exists(warn_key) and year < current_year - 1:
+        if cache.exists(warn_key):
             warn_html = cache.read(warn_key)
         else:
             warn_r = utils.get_url(warn_url)
@@ -61,7 +80,7 @@ def scrape(
         layoff_key = f"ne/layoff-{year}.html"
 
         # Read from cache if available and not this year or the year before
-        if cache.exists(layoff_key) and year < current_year - 1:
+        if cache.exists(layoff_key):
             layoff_html = cache.read(layoff_key)
         else:
             page = utils.get_url(layoff_url)