Skip to content

Commit

Permalink
fix: Swale Borough Council
Browse files Browse the repository at this point in the history
#665
This one contains a Cloudflare robot check so is very temperamental
  • Loading branch information
m26dvd committed Oct 14, 2024
1 parent a4752b5 commit 50f3aab
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 26 deletions.
3 changes: 2 additions & 1 deletion uk_bin_collection/tests/input.json
Original file line number Diff line number Diff line change
Expand Up @@ -1128,7 +1128,8 @@
"SwaleBoroughCouncil": {
"postcode": "ME12 2NQ",
"skip_get_url": true,
"uprn": "100061081168",
"house_number": "81",
"web_driver": "http://selenium:4444",
"url": "https://swale.gov.uk/bins-littering-and-the-environment/bins/collection-days",
"wiki_name": "Swale Borough Council"
},
Expand Down
88 changes: 63 additions & 25 deletions uk_bin_collection/uk_bin_collection/councils/SwaleBoroughCouncil.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import requests
from bs4 import BeautifulSoup
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait

from uk_bin_collection.uk_bin_collection.common import *
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass


# import the wonderful Beautiful Soup and the URL grabber


Expand All @@ -17,36 +19,72 @@ class CouncilClass(AbstractGetBinDataClass):
def parse_data(self, page: str, **kwargs) -> dict:
# Get postcode and UPRN from kwargs
user_postcode = kwargs.get("postcode")
user_uprn = kwargs.get("uprn")
user_paon = kwargs.get("paon")
web_driver = kwargs.get("web_driver")
headless = kwargs.get("headless")
check_postcode(user_postcode)
check_uprn(user_uprn)
check_paon(user_paon)

# Build URL to parse
council_url = f"https://swale.gov.uk/bins-littering-and-the-environment/bins/collection-days?postcode={user_postcode.replace(' ', '+')}&addresses={user_uprn}&address-submit="
council_url = "https://swale.gov.uk/bins-littering-and-the-environment/bins/my-collection-day"

# Create Selenium webdriver
driver = create_webdriver(web_driver, headless, None, __name__)
driver.get(council_url)

# Wait for the postcode field to appear then populate it
try:
inputElement_postcode = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.ID, "q462406_q1"))
)
inputElement_postcode.send_keys(user_postcode)
except Exception:
print("Page failed to load. Probably due to Cloudflare robot check!")

# Click search button
findAddress = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.ID, "form_email_462397_submit"))
)
driver.execute_script("arguments[0].click();", findAddress)

# Wait for the 'Select address' dropdown to appear and select option matching the house name/number
WebDriverWait(driver, 10).until(
EC.element_to_be_clickable(
(
By.XPATH,
"//select[@id='SBCYBDAddressList']//option[contains(., '"
+ user_paon
+ "')]",
)
)
).click()

# Click search button
getBins = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.ID, "form_email_462397_submit"))
)
driver.execute_script("arguments[0].click();", getBins)

BinTable = WebDriverWait(driver, 30).until(
EC.presence_of_element_located((By.ID, "SBC-YBD-Main"))
)

# Parse URL and read if connection successful
requests.packages.urllib3.disable_warnings()
response = requests.get(council_url, verify=False)
if response.status_code == 200:
soup = BeautifulSoup(response.text, features="html.parser")
soup.prettify()
else:
raise ConnectionAbortedError("Could not parse council website.")
soup = BeautifulSoup(driver.page_source, features="html.parser")
soup.prettify()

data = {"bins": []}

# Get the collection bullet points on the page and parse them
form_area = soup.find("form", {"class": "integration bin-lookup"})
collections = [
item.text.strip().split(",") for item in form_area.find_all("li")
]
for c in collections:
bin_type = c[0].strip()
# temp_date = c[2].strip() + " " + str(datetime.now().year)
bin_date = datetime.strptime(
c[2].strip() + " " + str(datetime.now().year), "%d %B %Y"
).strftime(date_format)
dict_data = {"type": bin_type, "collectionDate": bin_date}
data["bins"].append(dict_data)
nextCollections = soup.find("div", {"id": "nextCollections"})
for c in nextCollections:
collection = c.find_all("strong")
for bin in collection:
split = (bin.text).split(" on ")
bin_type = split[0]
bin_date = datetime.strptime(split[1], "%A %d %b %Y").strftime(
"%d/%m/%Y"
)
dict_data = {"type": bin_type, "collectionDate": bin_date}
data["bins"].append(dict_data)

return data

0 comments on commit 50f3aab

Please sign in to comment.