fix: Swale Borough Council

#665 This one contains a Cloudflare robot check so is very temperamental
robbrad · Oct 14, 2024 · 50f3aab · 50f3aab
1 parent a4752b5
commit 50f3aab
Show file tree

Hide file tree

Showing 2 changed files with 65 additions and 26 deletions.
diff --git a/uk_bin_collection/tests/input.json b/uk_bin_collection/tests/input.json
@@ -1128,7 +1128,8 @@
     "SwaleBoroughCouncil": {
         "postcode": "ME12 2NQ",
         "skip_get_url": true,
-        "uprn": "100061081168",
+        "house_number": "81",
+        "web_driver": "http://selenium:4444",
         "url": "https://swale.gov.uk/bins-littering-and-the-environment/bins/collection-days",
         "wiki_name": "Swale Borough Council"
     },

diff --git a/uk_bin_collection/uk_bin_collection/councils/SwaleBoroughCouncil.py b/uk_bin_collection/uk_bin_collection/councils/SwaleBoroughCouncil.py
@@ -1,9 +1,11 @@
-import requests
 from bs4 import BeautifulSoup
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support import expected_conditions as EC
+from selenium.webdriver.support.wait import WebDriverWait
+
 from uk_bin_collection.uk_bin_collection.common import *
 from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
 
-
 # import the wonderful Beautiful Soup and the URL grabber
 
 
@@ -17,36 +19,72 @@ class CouncilClass(AbstractGetBinDataClass):
     def parse_data(self, page: str, **kwargs) -> dict:
         # Get postcode and UPRN from kwargs
         user_postcode = kwargs.get("postcode")
-        user_uprn = kwargs.get("uprn")
+        user_paon = kwargs.get("paon")
+        web_driver = kwargs.get("web_driver")
+        headless = kwargs.get("headless")
         check_postcode(user_postcode)
-        check_uprn(user_uprn)
+        check_paon(user_paon)
 
         # Build URL to parse
-        council_url = f"https://swale.gov.uk/bins-littering-and-the-environment/bins/collection-days?postcode={user_postcode.replace(' ', '+')}&addresses={user_uprn}&address-submit="
+        council_url = "https://swale.gov.uk/bins-littering-and-the-environment/bins/my-collection-day"
+
+        # Create Selenium webdriver
+        driver = create_webdriver(web_driver, headless, None, __name__)
+        driver.get(council_url)
+
+        # Wait for the postcode field to appear then populate it
+        try:
+            inputElement_postcode = WebDriverWait(driver, 10).until(
+                EC.presence_of_element_located((By.ID, "q462406_q1"))
+            )
+            inputElement_postcode.send_keys(user_postcode)
+        except Exception:
+            print("Page failed to load. Probably due to Cloudflare robot check!")
+
+        # Click search button
+        findAddress = WebDriverWait(driver, 10).until(
+            EC.presence_of_element_located((By.ID, "form_email_462397_submit"))
+        )
+        driver.execute_script("arguments[0].click();", findAddress)
+
+        # Wait for the 'Select address' dropdown to appear and select option matching the house name/number
+        WebDriverWait(driver, 10).until(
+            EC.element_to_be_clickable(
+                (
+                    By.XPATH,
+                    "//select[@id='SBCYBDAddressList']//option[contains(., '"
+                    + user_paon
+                    + "')]",
+                )
+            )
+        ).click()
+
+        # Click search button
+        getBins = WebDriverWait(driver, 10).until(
+            EC.presence_of_element_located((By.ID, "form_email_462397_submit"))
+        )
+        driver.execute_script("arguments[0].click();", getBins)
+
+        BinTable = WebDriverWait(driver, 30).until(
+            EC.presence_of_element_located((By.ID, "SBC-YBD-Main"))
+        )
 
-        # Parse URL and read if connection successful
-        requests.packages.urllib3.disable_warnings()
-        response = requests.get(council_url, verify=False)
-        if response.status_code == 200:
-            soup = BeautifulSoup(response.text, features="html.parser")
-            soup.prettify()
-        else:
-            raise ConnectionAbortedError("Could not parse council website.")
+        soup = BeautifulSoup(driver.page_source, features="html.parser")
+        soup.prettify()
 
         data = {"bins": []}
 
         # Get the collection bullet points on the page and parse them
-        form_area = soup.find("form", {"class": "integration bin-lookup"})
-        collections = [
-            item.text.strip().split(",") for item in form_area.find_all("li")
-        ]
-        for c in collections:
-            bin_type = c[0].strip()
-            # temp_date = c[2].strip() + " " + str(datetime.now().year)
-            bin_date = datetime.strptime(
-                c[2].strip() + " " + str(datetime.now().year), "%d %B %Y"
-            ).strftime(date_format)
-            dict_data = {"type": bin_type, "collectionDate": bin_date}
-            data["bins"].append(dict_data)
+        nextCollections = soup.find("div", {"id": "nextCollections"})
+        for c in nextCollections:
+            collection = c.find_all("strong")
+            for bin in collection:
+                split = (bin.text).split(" on ")
+                bin_type = split[0]
+                bin_date = datetime.strptime(split[1], "%A %d %b %Y").strftime(
+                    "%d/%m/%Y"
+                )
+                dict_data = {"type": bin_type, "collectionDate": bin_date}
+                data["bins"].append(dict_data)
 
         return data