diff --git a/custom_components/uk_bin_collection/__init__.py b/custom_components/uk_bin_collection/__init__.py index b3b46d0bd2..210a2e8190 100644 --- a/custom_components/uk_bin_collection/__init__.py +++ b/custom_components/uk_bin_collection/__init__.py @@ -12,7 +12,9 @@ async def async_setup_entry(hass: HomeAssistant, entry: ConfigEntry) -> bool: _LOGGER.info(LOG_PREFIX + "Data Supplied: %s", entry.data) council_name = entry.data.get("council", "Unknown Council") - _LOGGER.info(LOG_PREFIX + "Setting up UK Bin Collection Data for council: %s", council_name) + _LOGGER.info( + LOG_PREFIX + "Setting up UK Bin Collection Data for council: %s", council_name + ) hass.data.setdefault(DOMAIN, {}) @@ -25,11 +27,13 @@ async def async_setup_entry(hass: HomeAssistant, entry: ConfigEntry) -> bool: # Forward the entry setup to the sensor platform await hass.config_entries.async_forward_entry_setups(entry, PLATFORMS) - _LOGGER.info(LOG_PREFIX + "Successfully set up UK Bin Collection Data for council: %s", council_name) + _LOGGER.info( + LOG_PREFIX + "Successfully set up UK Bin Collection Data for council: %s", + council_name, + ) return True - async def async_unload_entry(hass: HomeAssistant, entry: ConfigEntry) -> bool: """Unload a config entry.""" return await hass.config_entries.async_unload_platforms(entry, PLATFORMS) diff --git a/custom_components/uk_bin_collection/config_flow.py b/custom_components/uk_bin_collection/config_flow.py index 98f0e2a79f..0691e0f8e9 100644 --- a/custom_components/uk_bin_collection/config_flow.py +++ b/custom_components/uk_bin_collection/config_flow.py @@ -118,7 +118,9 @@ async def async_step_user(self, user_input=None): { vol.Required("name", default=""): cv.string, vol.Required("council", default=""): vol.In(self.council_options), - vol.Optional("icon_color_mapping", default=""): cv.string, # Optional field + vol.Optional( + "icon_color_mapping", default="" + ): cv.string, # Optional field } ), errors=errors, @@ -279,7 +281,8 @@ async def async_step_reconfigure_confirm( schema = schema.extend( { vol.Optional( - "icon_color_mapping", default=existing_data.get("icon_color_mapping", "") + "icon_color_mapping", + default=existing_data.get("icon_color_mapping", ""), ): str } ) diff --git a/custom_components/uk_bin_collection/sensor.py b/custom_components/uk_bin_collection/sensor.py index 3d0141db27..cb194496be 100644 --- a/custom_components/uk_bin_collection/sensor.py +++ b/custom_components/uk_bin_collection/sensor.py @@ -44,7 +44,9 @@ async def async_setup_entry(hass, config, async_add_entities): name = config.data.get("name", "") timeout = config.data.get("timeout", 60) - icon_color_mapping = config.data.get("icon_color_mapping", "{}") # Use an empty JSON object as default + icon_color_mapping = config.data.get( + "icon_color_mapping", "{}" + ) # Use an empty JSON object as default args = [ config.data.get("council", ""), @@ -52,7 +54,8 @@ async def async_setup_entry(hass, config, async_add_entities): *( f"--{key}={value}" for key, value in config.data.items() - if key not in { + if key + not in { "name", "council", "url", @@ -87,7 +90,9 @@ async def async_setup_entry(hass, config, async_add_entities): for bin_type in coordinator.data.keys(): device_id = f"{name}_{bin_type}" entities.append( - UKBinCollectionDataSensor(coordinator, bin_type, device_id, icon_color_mapping) + UKBinCollectionDataSensor( + coordinator, bin_type, device_id, icon_color_mapping + ) ) entities.append( UKBinCollectionAttributeSensor( @@ -140,8 +145,12 @@ async def async_setup_entry(hass, config, async_add_entities): ) ) + # Add the new Raw JSON Sensor + entities.append(UKBinCollectionRawJSONSensor(coordinator, f"{name}_raw_json", name)) + async_add_entities(entities) + class HouseholdBinCoordinator(DataUpdateCoordinator): """Household Bin Coordinator""" @@ -168,15 +177,17 @@ def get_latest_collection_info(data) -> dict: """Process the bin collection data.""" current_date = datetime.now() next_collection_dates = {} - + for bin_data in data["bins"]: bin_type = bin_data["type"] collection_date_str = bin_data["collectionDate"] collection_date = datetime.strptime(collection_date_str, "%d/%m/%Y") - + if collection_date.date() >= current_date.date(): if bin_type in next_collection_dates: - if collection_date < datetime.strptime(next_collection_dates[bin_type], "%d/%m/%Y"): + if collection_date < datetime.strptime( + next_collection_dates[bin_type], "%d/%m/%Y" + ): next_collection_dates[bin_type] = collection_date_str else: next_collection_dates[bin_type] = collection_date_str @@ -190,12 +201,16 @@ class UKBinCollectionDataSensor(CoordinatorEntity, SensorEntity): device_class = DEVICE_CLASS - def __init__(self, coordinator, bin_type, device_id, icon_color_mapping=None) -> None: + def __init__( + self, coordinator, bin_type, device_id, icon_color_mapping=None + ) -> None: """Initialize the main bin sensor.""" super().__init__(coordinator) self._bin_type = bin_type self._device_id = device_id - self._icon_color_mapping = json.loads(icon_color_mapping) if icon_color_mapping else {} + self._icon_color_mapping = ( + json.loads(icon_color_mapping) if icon_color_mapping else {} + ) self.apply_values() @property @@ -270,6 +285,7 @@ def extra_state_attributes(self): STATE_ATTR_NEXT_COLLECTION: self._next_collection.strftime("%d/%m/%Y"), STATE_ATTR_DAYS: self._days, } + @property def color(self): """Return the entity icon.""" @@ -284,14 +300,24 @@ def unique_id(self): class UKBinCollectionAttributeSensor(CoordinatorEntity, SensorEntity): """Implementation of the attribute sensors (Colour, Next Collection, Days, Bin Type, Raw Next Collection).""" - def __init__(self, coordinator, bin_type, unique_id, attribute_type, device_id, icon_color_mapping=None) -> None: + def __init__( + self, + coordinator, + bin_type, + unique_id, + attribute_type, + device_id, + icon_color_mapping=None, + ) -> None: """Initialize the attribute sensor.""" super().__init__(coordinator) self._bin_type = bin_type self._unique_id = unique_id self._attribute_type = attribute_type self._device_id = device_id - self._icon_color_mapping = json.loads(icon_color_mapping) if icon_color_mapping else {} + self._icon_color_mapping = ( + json.loads(icon_color_mapping) if icon_color_mapping else {} + ) # Use user-supplied icon and color if available self._icon = self._icon_color_mapping.get(self._bin_type, {}).get("icon") @@ -320,14 +346,20 @@ def state(self): if self._attribute_type == "Colour": return self._color # Return the colour of the bin elif self._attribute_type == "Next Collection Human Readable": - return self.coordinator.data[self._bin_type] # Already formatted next collection + return self.coordinator.data[ + self._bin_type + ] # Already formatted next collection elif self._attribute_type == "Days Until Collection": - next_collection = parser.parse(self.coordinator.data[self._bin_type], dayfirst=True).date() + next_collection = parser.parse( + self.coordinator.data[self._bin_type], dayfirst=True + ).date() return (next_collection - datetime.now().date()).days elif self._attribute_type == "Bin Type": return self._bin_type # Return the bin type for the Bin Type sensor elif self._attribute_type == "Next Collection Date": - return self.coordinator.data[self._bin_type] # Return the raw next collection date + return self.coordinator.data[ + self._bin_type + ] # Return the raw next collection date @property def icon(self): @@ -344,14 +376,18 @@ def extra_state_attributes(self): """Return extra attributes of the sensor.""" return { STATE_ATTR_COLOUR: self._color, - STATE_ATTR_NEXT_COLLECTION: self.coordinator.data[self._bin_type], # Return the collection date + STATE_ATTR_NEXT_COLLECTION: self.coordinator.data[ + self._bin_type + ], # Return the collection date } @property def device_info(self): """Return device information for grouping sensors.""" return { - "identifiers": {(DOMAIN, self._device_id)}, # Use the same device_id for all sensors of the same bin type + "identifiers": { + (DOMAIN, self._device_id) + }, # Use the same device_id for all sensors of the same bin type "name": f"{self.coordinator.name} {self._bin_type}", "manufacturer": "UK Bin Collection", "model": "Bin Sensor", @@ -361,4 +397,36 @@ def device_info(self): @property def unique_id(self): """Return a unique ID for the sensor.""" - return self._unique_id \ No newline at end of file + return self._unique_id + + +class UKBinCollectionRawJSONSensor(CoordinatorEntity, SensorEntity): + """Sensor to hold the raw JSON data for bin collections.""" + + def __init__(self, coordinator, unique_id, name) -> None: + """Initialize the raw JSON sensor.""" + super().__init__(coordinator) + self._unique_id = unique_id + self._name = name + + @property + def name(self): + """Return the name of the sensor.""" + return f"{self._name} Raw JSON" + + @property + def state(self): + """Return the state, which is the raw JSON data.""" + return json.dumps(self.coordinator.data) # Convert the raw dict to JSON string + + @property + def unique_id(self): + """Return a unique ID for the sensor.""" + return self._unique_id + + @property + def extra_state_attributes(self): + """Return extra attributes for the sensor.""" + return { + "raw_data": self.coordinator.data # Provide the raw data as an attribute + } diff --git a/uk_bin_collection/tests/input.json b/uk_bin_collection/tests/input.json index e0441f4d8b..8426830e77 100644 --- a/uk_bin_collection/tests/input.json +++ b/uk_bin_collection/tests/input.json @@ -367,7 +367,7 @@ }, "EastDevonDC": { "url": "https://eastdevon.gov.uk/recycling-and-waste/recycling-waste-information/when-is-my-bin-collected/future-collections-calendar/?UPRN=010090909915", - "wiki_command_url_override": "https://eastdevon.gov.uk/recycling-waste/recycling-and-waste-information/when-is-my-bin-collected/future-collections-calendar/?UPRN=XXXXXXXX", + "wiki_command_url_override": "https://eastdevon.gov.uk/recycling-and-waste/recycling-waste-information/when-is-my-bin-collected/future-collections-calendar/?UPRN=XXXXXXXX", "wiki_name": "East Devon District Council", "wiki_note": "Replace XXXXXXXX with UPRN." }, diff --git a/uk_bin_collection/uk_bin_collection/councils/BasildonCouncil.py b/uk_bin_collection/uk_bin_collection/councils/BasildonCouncil.py index dd9df779aa..7abb346c25 100644 --- a/uk_bin_collection/uk_bin_collection/councils/BasildonCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/BasildonCouncil.py @@ -1,13 +1,13 @@ -from uk_bin_collection.uk_bin_collection.common import * +import requests +import json +from datetime import datetime +from uk_bin_collection.uk_bin_collection.common import check_uprn, date_format as DATE_FORMAT from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass -# import the wonderful Beautiful Soup and the URL grabber class CouncilClass(AbstractGetBinDataClass): """ - Concrete classes have to implement all abstract operations of the - base class. They can also override some operations with a default - implementation. + Concrete class that implements the abstract bin data fetching and parsing logic. """ def parse_data(self, page: str, **kwargs) -> dict: @@ -18,64 +18,67 @@ def parse_data(self, page: str, **kwargs) -> dict: check_uprn(uprn) payload = { - # Add your payload details here (replace this with the actual payload structure if required) "uprn": uprn } - # Headers for the request - headers = { - "Content-Type": "application/json" - } + headers = {"Content-Type": "application/json"} response = requests.post(url_base, data=json.dumps(payload), headers=headers) - # Ensure the request was successful if response.status_code == 200: data = response.json() # Initialize an empty list to store the bin collection details - bins = [] # Function to add collection details to bins list def add_collection(service_name, collection_data): - bins.append({ - "type": service_name, - "collectionDate": collection_data.get("current_collection_date") - }) + bins.append( + { + "type": service_name, + "collectionDate": collection_data.get("current_collection_date"), + } + ) - # Extract refuse information - available_services = data["refuse"]["available_services"] + available_services = data.get("refuse", {}).get("available_services", {}) + + date_format = "%d-%m-%Y" # Define the desired date format for service_name, service_data in available_services.items(): - # Append the service name and current collection date to the "bins" list + # Handle the different cases of service data match service_data["container"]: case "Green Wheelie Bin": - subscription_status = service_data["subscription"]["active"] if service_data["subscription"] else False - type_descr = f"Green Wheelie Bin ({"Active" if subscription_status else "Expired"})" + subscription_status = ( + service_data["subscription"]["active"] + if service_data.get("subscription") + else False + ) + type_descr = f"Green Wheelie Bin ({'Active' if subscription_status else 'Expired'})" case "N/A": - type_descr = service_data["name"] + type_descr = service_data.get("name", "Unknown Service") case _: - type_descr = service_data["container"] - + type_descr = service_data.get("container", "Unknown Container") date_str = service_data.get("current_collection_date") - # Parse the date string into a datetime object - date_obj = datetime.strptime(date_str, "%Y-%m-%d") - - # Convert the datetime object to the desired format - formatted_date = date_obj.strftime(date_format) - - bins.append({ - "type": type_descr, # Use service name from the data - "collectionDate": formatted_date - }) + if date_str: # Ensure the date string exists + try: + # Parse and format the date string + date_obj = datetime.strptime(date_str, "%Y-%m-%d") + formatted_date = date_obj.strftime(DATE_FORMAT) + except ValueError: + formatted_date = "Invalid Date" + else: + formatted_date = "No Collection Date" + + bins.append( + { + "type": type_descr, # Use service name from the data + "collectionDate": formatted_date, + } + ) else: print(f"Failed to fetch data. Status code: {response.status_code}") + return {} - data = { - "bins": bins - } - - return data + return {"bins": bins} diff --git a/uk_bin_collection/uk_bin_collection/councils/BelfastCityCouncil.py b/uk_bin_collection/uk_bin_collection/councils/BelfastCityCouncil.py index e87bb937b5..ce81105359 100644 --- a/uk_bin_collection/uk_bin_collection/councils/BelfastCityCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/BelfastCityCouncil.py @@ -9,7 +9,6 @@ from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass - # import the wonderful Beautiful Soup and the URL grabber class CouncilClass(AbstractGetBinDataClass): """ @@ -34,7 +33,7 @@ def parse_data(self, page: str, **kwargs) -> dict: session = requests.Session() session.headers.update(headers) - + user_uprn = kwargs.get("uprn") user_postcode = kwargs.get("postcode") URL = "https://online.belfastcity.gov.uk/find-bin-collection-day/Default.aspx" @@ -47,14 +46,16 @@ def parse_data(self, page: str, **kwargs) -> dict: "__EVENTTARGET": "", "__EVENTARGUMENT": "", "__VIEWSTATE": self.get_session_variable(soup, "__VIEWSTATE"), - "__VIEWSTATEGENERATOR": self.get_session_variable(soup, "__VIEWSTATEGENERATOR"), + "__VIEWSTATEGENERATOR": self.get_session_variable( + soup, "__VIEWSTATEGENERATOR" + ), "__SCROLLPOSITIONX": "0", "__SCROLLPOSITIONY": "0", "__EVENTVALIDATION": self.get_session_variable(soup, "__EVENTVALIDATION"), "ctl00$MainContent$searchBy_radio": "P", "ctl00$MainContent$Street_textbox": "", "ctl00$MainContent$Postcode_textbox": user_postcode, - "ctl00$MainContent$AddressLookup_button": "Find address" + "ctl00$MainContent$AddressLookup_button": "Find address", } # Build intermediate ASP.NET variables for uprn Select address @@ -65,7 +66,9 @@ def parse_data(self, page: str, **kwargs) -> dict: "__EVENTTARGET": "", "__EVENTARGUMENT": "", "__VIEWSTATE": self.get_session_variable(soup, "__VIEWSTATE"), - "__VIEWSTATEGENERATOR": self.get_session_variable(soup, "__VIEWSTATEGENERATOR"), + "__VIEWSTATEGENERATOR": self.get_session_variable( + soup, "__VIEWSTATEGENERATOR" + ), "__SCROLLPOSITIONX": "0", "__SCROLLPOSITIONY": "0", "__EVENTVALIDATION": self.get_session_variable(soup, "__EVENTVALIDATION"), @@ -73,14 +76,14 @@ def parse_data(self, page: str, **kwargs) -> dict: "ctl00$MainContent$Street_textbox": "", "ctl00$MainContent$Postcode_textbox": user_postcode, "ctl00$MainContent$lstAddresses": user_uprn, - "ctl00$MainContent$SelectAddress_button": "Select address" + "ctl00$MainContent$SelectAddress_button": "Select address", } # Actual http call to get Bins Data response = session.post(URL, data=form_data) response.raise_for_status() soup = BeautifulSoup(response.text, "html.parser") - + # Find Bins table and data table = soup.find("div", {"id": "binsGrid"}) if table: @@ -91,7 +94,9 @@ def parse_data(self, page: str, **kwargs) -> dict: collection_type = columns[0].get_text(strip=True) collection_date_raw = columns[3].get_text(strip=True) # if the month number is a single digit there are 2 spaces, stripping all spaces to make it consistent - collection_date = datetime.strptime(collection_date_raw.replace(" ", ""),'%a%b%d%Y') + collection_date = datetime.strptime( + collection_date_raw.replace(" ", ""), "%a%b%d%Y" + ) bin_entry = { "type": collection_type, "collectionDate": collection_date.strftime(date_format), diff --git a/uk_bin_collection/uk_bin_collection/councils/BexleyCouncil.py b/uk_bin_collection/uk_bin_collection/councils/BexleyCouncil.py index 5709224ac7..d7ed4458ea 100644 --- a/uk_bin_collection/uk_bin_collection/councils/BexleyCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/BexleyCouncil.py @@ -45,17 +45,13 @@ def parse_data(self, page: str, **kwargs) -> dict: ) inputElement_postcodesearch.send_keys(user_postcode) - - find_address_btn = wait.until( EC.element_to_be_clickable((By.XPATH, '//*[@id="sub"]')) ) find_address_btn.click() dropdown_options = wait.until( - EC.presence_of_element_located( - (By.XPATH, '//*[@id="address"]') - ) + EC.presence_of_element_located((By.XPATH, '//*[@id="address"]')) ) time.sleep(2) dropdown_options.click() @@ -71,11 +67,8 @@ def parse_data(self, page: str, **kwargs) -> dict: # Click the element address.click() - submit_address = wait.until( - EC.presence_of_element_located( - (By.XPATH, '//*[@id="go"]') - ) + EC.presence_of_element_located((By.XPATH, '//*[@id="go"]')) ) time.sleep(2) submit_address.click() @@ -83,13 +76,11 @@ def parse_data(self, page: str, **kwargs) -> dict: results_found = wait.until( EC.element_to_be_clickable( (By.XPATH, '//h1[contains(text(), "Your bin days")]') - ) ) + ) final_page = wait.until( - EC.presence_of_element_located( - (By.CLASS_NAME, "waste__collections") - ) + EC.presence_of_element_located((By.CLASS_NAME, "waste__collections")) ) soup = BeautifulSoup(driver.page_source, features="html.parser") @@ -103,29 +94,41 @@ def parse_data(self, page: str, **kwargs) -> dict: # Loop through each bin field for bin_section in bin_sections: # Extract the bin type (e.g., "Brown Caddy", "Green Wheelie Bin", etc.) - bin_type = bin_section.get_text(strip=True).split("\n")[0] # The first part is the bin type + bin_type = bin_section.get_text(strip=True).split("\n")[ + 0 + ] # The first part is the bin type # Find the next sibling
tag that contains the next collection information summary_list = bin_section.find_next("dl", class_="govuk-summary-list") if summary_list: # Now, instead of finding by class, we'll search by text within the dt element - next_collection_dt = summary_list.find("dt", string=lambda text: "Next collection" in text) + next_collection_dt = summary_list.find( + "dt", string=lambda text: "Next collection" in text + ) if next_collection_dt: # Find the sibling
tag for the collection date - next_collection = next_collection_dt.find_next_sibling("dd").get_text(strip=True) + next_collection = next_collection_dt.find_next_sibling( + "dd" + ).get_text(strip=True) if next_collection: try: # Parse the next collection date (assuming the format is like "Tuesday 15 October 2024") - parsed_date = datetime.strptime(next_collection, "%A %d %B %Y") + parsed_date = datetime.strptime( + next_collection, "%A %d %B %Y" + ) # Add the bin information to the data dictionary - data["bins"].append({ - "type": bin_type, - "collectionDate": parsed_date.strftime(date_format), - }) + data["bins"].append( + { + "type": bin_type, + "collectionDate": parsed_date.strftime( + date_format + ), + } + ) except ValueError as e: print(f"Error parsing date for {bin_type}: {e}") else: diff --git a/uk_bin_collection/uk_bin_collection/councils/BoltonCouncil.py b/uk_bin_collection/uk_bin_collection/councils/BoltonCouncil.py index 1978612e27..e278152988 100644 --- a/uk_bin_collection/uk_bin_collection/councils/BoltonCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/BoltonCouncil.py @@ -82,7 +82,7 @@ def parse_data(self, page: str, **kwargs) -> dict: bin_type = " ".join(words).capitalize() date_list = item.find_all("p") for d in date_list: - clean_date_str = re.sub(r'[^A-Za-z0-9 ]+', '', d.text.strip()) + clean_date_str = re.sub(r"[^A-Za-z0-9 ]+", "", d.text.strip()) next_collection = datetime.strptime(clean_date_str, "%A %d %B %Y") collections.append((bin_type, next_collection)) diff --git a/uk_bin_collection/uk_bin_collection/councils/CheshireEastCouncil.py b/uk_bin_collection/uk_bin_collection/councils/CheshireEastCouncil.py index 176324d100..a79f9df9b5 100644 --- a/uk_bin_collection/uk_bin_collection/councils/CheshireEastCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/CheshireEastCouncil.py @@ -1,26 +1,41 @@ -from bs4 import BeautifulSoup +from typing import Dict, Any, Optional +from bs4 import BeautifulSoup, Tag, NavigableString from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass -#Cheshire East +""" +This module provides bin collection data for Cheshire East Council. +""" + + class CouncilClass(AbstractGetBinDataClass): - def parse_data(self, page: str, **kwargs) -> dict: + """ + A class to fetch and parse bin collection data for Cheshire East Council. + """ + + def parse_data(self, page: Any, **kwargs: Any) -> Dict[str, Any]: soup = BeautifulSoup(page.text, features="html.parser") - bin_data_dict = {"bins": []} + bin_data_dict: Dict[str, Any] = {"bins": []} - table = soup.find("table", {"class": "job-details"}) - if table: + table: Optional[Tag | NavigableString] = soup.find( + "table", {"class": "job-details"} + ) + if isinstance(table, Tag): # Ensure we only proceed if 'table' is a Tag rows = table.find_all("tr", {"class": "data-row"}) for row in rows: cells = row.find_all( - "td", {"class": lambda L: L and L.startswith("visible-cell")} + "td", + { + "class": lambda L: isinstance(L, str) + and L.startswith("visible-cell") + }, # Explicitly check if L is a string ) - labels = cells[0].find_all("label") if cells else [] + labels: list[Tag] = cells[0].find_all("label") if cells else [] if len(labels) >= 3: - bin_type = labels[2].get_text(strip=True) - collection_date = labels[1].get_text(strip=True) + bin_type: str = labels[2].get_text(strip=True) + collection_date: str = labels[1].get_text(strip=True) bin_data_dict["bins"].append( { diff --git a/uk_bin_collection/uk_bin_collection/councils/CornwallCouncil.py b/uk_bin_collection/uk_bin_collection/councils/CornwallCouncil.py index dc0b1342d3..2170c16f5f 100644 --- a/uk_bin_collection/uk_bin_collection/councils/CornwallCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/CornwallCouncil.py @@ -4,7 +4,6 @@ from dateutil.relativedelta import relativedelta - # import the wonderful Beautiful Soup and the URL grabber class CouncilClass(AbstractGetBinDataClass): """ @@ -23,37 +22,39 @@ def parse_data(self, page: str, **kwargs) -> dict: check_uprn(user_uprn) headers = { - 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7', - 'Accept-Language': 'en-GB,en;q=0.9', - 'Cache-Control': 'no-cache', - 'Connection': 'keep-alive', - 'Pragma': 'no-cache', - 'Sec-Fetch-Dest': 'document', - 'Sec-Fetch-Mode': 'navigate', - 'Sec-Fetch-Site': 'none', - 'Sec-Fetch-User': '?1', - 'Upgrade-Insecure-Requests': '1', - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.6422.143 Safari/537.36', - 'sec-ch-ua': '"Opera GX";v="111", "Chromium";v="125", "Not.A/Brand";v="24"', - 'sec-ch-ua-mobile': '?0', - 'sec-ch-ua-platform': '"Windows"', + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", + "Accept-Language": "en-GB,en;q=0.9", + "Cache-Control": "no-cache", + "Connection": "keep-alive", + "Pragma": "no-cache", + "Sec-Fetch-Dest": "document", + "Sec-Fetch-Mode": "navigate", + "Sec-Fetch-Site": "none", + "Sec-Fetch-User": "?1", + "Upgrade-Insecure-Requests": "1", + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.6422.143 Safari/537.36", + "sec-ch-ua": '"Opera GX";v="111", "Chromium";v="125", "Not.A/Brand";v="24"', + "sec-ch-ua-mobile": "?0", + "sec-ch-ua-platform": '"Windows"', } params = { - 'uprn': f'{user_uprn}', + "uprn": f"{user_uprn}", # 'uprn': f'100040128734', } response = requests.get( - 'https://www.cornwall.gov.uk/umbraco/surface/waste/MyCollectionDays', + "https://www.cornwall.gov.uk/umbraco/surface/waste/MyCollectionDays", params=params, - headers=headers + headers=headers, ) soup = BeautifulSoup(response.text, features="html.parser") soup.prettify() - for item in soup.find_all('div', class_='collection text-center service'): + for item in soup.find_all("div", class_="collection text-center service"): bin_type = item.contents[1].text + " bin" - collection_date = datetime.strptime(item.contents[5].text, "%d %b").replace(year=curr_date.year) + collection_date = datetime.strptime(item.contents[5].text, "%d %b").replace( + year=curr_date.year + ) if curr_date.month == 12 and collection_date.month == 1: collection_date = collection_date + relativedelta(years=1) collections.append((bin_type, collection_date)) diff --git a/uk_bin_collection/uk_bin_collection/councils/EnfieldCouncil.py b/uk_bin_collection/uk_bin_collection/councils/EnfieldCouncil.py index 30241b05dc..3f979145c4 100644 --- a/uk_bin_collection/uk_bin_collection/councils/EnfieldCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/EnfieldCouncil.py @@ -56,9 +56,7 @@ def parse_data(self, page: str, **kwargs) -> dict: postcode_input.send_keys(user_postcode) find_address_button = WebDriverWait(driver, 10).until( - EC.presence_of_element_located( - (By.ID, 'submitButton0') - ) + EC.presence_of_element_located((By.ID, "submitButton0")) ) find_address_button.click() @@ -80,7 +78,7 @@ def parse_data(self, page: str, **kwargs) -> dict: template_parts = first_option.split(", ") template_parts[0] = user_paon # Replace the first part with user_paon - addr_label = ", ".join(template_parts) + addr_label = ", ".join(template_parts) for addr_option in select.options: option_name = addr_option.accessible_name[0 : len(addr_label)] if option_name == addr_label: @@ -100,32 +98,27 @@ def parse_data(self, page: str, **kwargs) -> dict: # Find the div with the specified id target_div = soup.find("div", {"id": target_div_id}) - # Check if the div is found if target_div: bin_data = {"bins": []} - for bin_div in target_div.find_all( - "div" - ): + for bin_div in target_div.find_all("div"): # Extract the collection date from the message try: bin_collection_message = bin_div.find("p").text.strip() date_pattern = r"\b\d{2}/\d{2}/\d{4}\b" collection_date_string = ( - re.search(date_pattern, bin_div.text) - .group(0) - .strip() - .replace(",", "") - ) + re.search(date_pattern, bin_div.text) + .group(0) + .strip() + .replace(",", "") + ) except AttributeError: continue current_date = datetime.now() - parsed_date = datetime.strptime( - collection_date_string, "%d/%m/%Y" - ) + parsed_date = datetime.strptime(collection_date_string, "%d/%m/%Y") # Check if the parsed date is in the past and not today if parsed_date.date() < current_date.date(): # If so, set the year to the next year @@ -137,9 +130,14 @@ def parse_data(self, page: str, **kwargs) -> dict: contains_date(formatted_date) # Extract the bin type from the message - bin_type_match = re.search(r"Your next (.*?) collection", bin_collection_message) + bin_type_match = re.search( + r"Your next (.*?) collection", bin_collection_message + ) if bin_type_match: - bin_info = {"type": bin_type_match.group(1), "collectionDate": formatted_date} + bin_info = { + "type": bin_type_match.group(1), + "collectionDate": formatted_date, + } bin_data["bins"].append(bin_info) else: raise ValueError("Collection data not found.") diff --git a/uk_bin_collection/uk_bin_collection/councils/GedlingBoroughCouncil.py b/uk_bin_collection/uk_bin_collection/councils/GedlingBoroughCouncil.py index 07420cf2bc..b8d4ef3a93 100644 --- a/uk_bin_collection/uk_bin_collection/councils/GedlingBoroughCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/GedlingBoroughCouncil.py @@ -16,7 +16,7 @@ class CouncilClass(AbstractGetBinDataClass): def parse_data(self, page: str, **kwargs) -> dict: data = {"bins": []} collections = [] - selected_collections = kwargs.get("paon").split(',') + selected_collections = kwargs.get("paon").split(",") calendar_urls = [] run_date = datetime.now().date() @@ -25,9 +25,13 @@ def parse_data(self, page: str, **kwargs) -> dict: for item in selected_collections: item = item.strip().lower().replace(" ", "_") if has_numbers(item): - calendar_urls.append(f"https://www.gbcbincalendars.co.uk/json/gedling_borough_council_{item}_bin_schedule.json") + calendar_urls.append( + f"https://www.gbcbincalendars.co.uk/json/gedling_borough_council_{item}_bin_schedule.json" + ) else: - calendar_urls.append(f"https://www.gbcbincalendars.co.uk/json/gedling_borough_council_{item}_garden_bin_schedule.json") + calendar_urls.append( + f"https://www.gbcbincalendars.co.uk/json/gedling_borough_council_{item}_garden_bin_schedule.json" + ) # Parse each URL and load future data for url in calendar_urls: @@ -36,7 +40,9 @@ def parse_data(self, page: str, **kwargs) -> dict: raise ConnectionError(f"Could not get response from: {url}") json_data = response.json()["collectionDates"] for col in json_data: - bin_date = datetime.strptime(col.get("collectionDate"), "%Y-%m-%d").date() + bin_date = datetime.strptime( + col.get("collectionDate"), "%Y-%m-%d" + ).date() if bin_date >= run_date: collections.append((col.get("alternativeName"), bin_date)) diff --git a/uk_bin_collection/uk_bin_collection/councils/IslingtonCouncil.py b/uk_bin_collection/uk_bin_collection/councils/IslingtonCouncil.py index f043dba453..971b1589d2 100644 --- a/uk_bin_collection/uk_bin_collection/councils/IslingtonCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/IslingtonCouncil.py @@ -29,9 +29,11 @@ def parse_data(self, page: str, **kwargs) -> dict: waste_type = row.find("th").text.strip() next_collection = parse(row.find("td").text.strip()).date() - data['bins'].append({ - "type": waste_type, - "collectionDate": next_collection.strftime(date_format), - }) + data["bins"].append( + { + "type": waste_type, + "collectionDate": next_collection.strftime(date_format), + } + ) return data diff --git a/uk_bin_collection/uk_bin_collection/councils/MoleValleyDistrictCouncil.py b/uk_bin_collection/uk_bin_collection/councils/MoleValleyDistrictCouncil.py index 62710c99e4..ebb114f57f 100644 --- a/uk_bin_collection/uk_bin_collection/councils/MoleValleyDistrictCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/MoleValleyDistrictCouncil.py @@ -7,6 +7,7 @@ from uk_bin_collection.uk_bin_collection.common import * from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass + class CouncilClass(AbstractGetBinDataClass): def parse_data(self, page: str, **kwargs) -> dict: @@ -14,7 +15,9 @@ def parse_data(self, page: str, **kwargs) -> dict: check_postcode(user_postcode) # Fetch the page content - root_url = "https://myproperty.molevalley.gov.uk/molevalley/api/live_addresses/{}?format=json".format(user_postcode) + root_url = "https://myproperty.molevalley.gov.uk/molevalley/api/live_addresses/{}?format=json".format( + user_postcode + ) response = requests.get(root_url, verify=False) if not response.ok: @@ -63,23 +66,27 @@ def parse_data(self, page: str, **kwargs) -> dict: if bins_panel: panel = bins_panel.find_parent("div", class_="panel") print("Found 'Bins and Recycling' panel.") - + # Extract bin collection info from the un-commented HTML for strong_tag in panel.find_all("strong"): bin_type = strong_tag.text.strip() collection_string = strong_tag.find_next("p").text.strip() - + # Debugging output print(f"Processing bin type: {bin_type}") print(f"Collection string: {collection_string}") - + match = regex_date.search(collection_string) if match: - collection_date = datetime.strptime(match.group(1), "%d/%m/%Y").date() - data["bins"].append({ - "type": bin_type, - "collectionDate": collection_date.strftime("%d/%m/%Y"), - }) + collection_date = datetime.strptime( + match.group(1), "%d/%m/%Y" + ).date() + data["bins"].append( + { + "type": bin_type, + "collectionDate": collection_date.strftime("%d/%m/%Y"), + } + ) all_collection_dates.append(collection_date) else: # Add a debug line to show which collections are missing dates @@ -88,7 +95,7 @@ def parse_data(self, page: str, **kwargs) -> dict: # Search for additional collections like electrical and textiles for p in panel.find_all("p"): additional_match = regex_additional_collection.match(p.text.strip()) - + # Debugging output for additional collections if additional_match: bin_type = additional_match.group(1) @@ -96,23 +103,33 @@ def parse_data(self, page: str, **kwargs) -> dict: if "each collection day" in additional_match.group(2): if all_collection_dates: collection_date = min(all_collection_dates) - data["bins"].append({ - "type": bin_type, - "collectionDate": collection_date.strftime("%d/%m/%Y"), - }) + data["bins"].append( + { + "type": bin_type, + "collectionDate": collection_date.strftime( + "%d/%m/%Y" + ), + } + ) else: - print("No collection dates available for additional collection.") + print( + "No collection dates available for additional collection." + ) raise ValueError("No valid bin collection dates found.") else: - print(f"No additional collection found in paragraph: {p.text.strip()}") + print( + f"No additional collection found in paragraph: {p.text.strip()}" + ) else: - raise ValueError("Unable to find 'Bins and Recycling' panel in the HTML data.") + raise ValueError( + "Unable to find 'Bins and Recycling' panel in the HTML data." + ) # Debugging to check collected data print(f"Collected bin data: {data}") - + # Handle the case where no collection dates were found if not all_collection_dates: raise ValueError("No valid collection dates were found in the data.") - - return data \ No newline at end of file + + return data diff --git a/uk_bin_collection/uk_bin_collection/councils/NorthTynesideCouncil.py b/uk_bin_collection/uk_bin_collection/councils/NorthTynesideCouncil.py index 07ee012001..ef0b099972 100644 --- a/uk_bin_collection/uk_bin_collection/councils/NorthTynesideCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/NorthTynesideCouncil.py @@ -23,7 +23,7 @@ def parse_data(self, page: str, **kwargs) -> dict: # Get the first form response = s.get( "https://my.northtyneside.gov.uk/category/81/bin-collection-dates", - verify = False, + verify=False, ) # Find the form ID and submit with a postcode @@ -31,13 +31,13 @@ def parse_data(self, page: str, **kwargs) -> dict: form_build_id = soup.find("input", {"name": "form_build_id"})["value"] response = s.post( "https://my.northtyneside.gov.uk/category/81/bin-collection-dates", - data = { + data={ "postcode": user_postcode, "op": "Find", "form_build_id": form_build_id, "form_id": "ntc_address_wizard", }, - verify = False, + verify=False, ) # Find the form ID and submit with the UPRN @@ -45,18 +45,18 @@ def parse_data(self, page: str, **kwargs) -> dict: form_build_id = soup.find("input", {"name": "form_build_id"})["value"] response = s.post( "https://my.northtyneside.gov.uk/category/81/bin-collection-dates", - data = { + data={ "house_number": f"0000{user_uprn}", "op": "Use", "form_build_id": form_build_id, "form_id": "ntc_address_wizard", }, - verify = False, + verify=False, ) # Parse form page and get the day of week and week offsets soup = BeautifulSoup(response.text, features="html.parser") - info_section = soup.find("section", {"class": "block block-ntc-bins clearfix"}) + info_section = soup.find("section", {"class": "block block-ntc-bins clearfix"}) regular_day, garden_day, special_day = None, None, None # Get day of week and week label for refuse, garden and special collections. @@ -82,7 +82,9 @@ def parse_data(self, page: str, **kwargs) -> dict: weeks_total = math.floor((datetime(2026, 4, 1) - datetime.now()).days / 7) # The garden calendar only shows until end of November 2024, work out how many weeks that is - garden_weeks_total = math.floor((datetime(2024, 12, 1) - datetime.now()).days / 7) + garden_weeks_total = math.floor( + (datetime(2024, 12, 1) - datetime.now()).days / 7 + ) regular_collections, garden_collections, special_collections = [], [], [] # Convert day text to series of dates using previous calculation @@ -134,10 +136,10 @@ def parse_data(self, page: str, **kwargs) -> dict: return { "bins": [ - { + { "type": item[0], "collectionDate": item[1].strftime(date_format), } for item in sorted(collections, key=lambda x: x[1]) ] - } \ No newline at end of file + } diff --git a/uk_bin_collection/uk_bin_collection/councils/RochfordCouncil.py b/uk_bin_collection/uk_bin_collection/councils/RochfordCouncil.py index f05156f593..fc3d1192af 100644 --- a/uk_bin_collection/uk_bin_collection/councils/RochfordCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/RochfordCouncil.py @@ -36,8 +36,7 @@ def parse_data(self, page: str, **kwargs) -> dict: collection_date = datetime.strptime( remove_ordinal_indicator_from_date_string( week_text[0].split(" - ")[0] - ) - .strip(), + ).strip(), "%A %d %B", ) next_collection = collection_date.replace(year=datetime.now().year) diff --git a/uk_bin_collection/uk_bin_collection/councils/RotherhamCouncil.py b/uk_bin_collection/uk_bin_collection/councils/RotherhamCouncil.py index 63c7e7ac1b..19eb0017e1 100644 --- a/uk_bin_collection/uk_bin_collection/councils/RotherhamCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/RotherhamCouncil.py @@ -17,7 +17,9 @@ def parse_data(self, page: str, **kwargs) -> dict: check_uprn(user_uprn) response = requests.post( - "https://www.rotherham.gov.uk/bin-collections?address={}&submit=Submit".format(user_uprn) + "https://www.rotherham.gov.uk/bin-collections?address={}&submit=Submit".format( + user_uprn + ) ) # Make a BS4 object soup = BeautifulSoup(response.text, features="html.parser") @@ -25,15 +27,15 @@ def parse_data(self, page: str, **kwargs) -> dict: data = {"bins": []} - table = soup.select('table')[0] + table = soup.select("table")[0] if table: - rows = table.select('tr') + rows = table.select("tr") for index, row in enumerate(rows): - bin_info_cell = row.select('td') - if bin_info_cell: - bin_type = bin_info_cell[0].get_text(separator=' ', strip=True) + bin_info_cell = row.select("td") + if bin_info_cell: + bin_type = bin_info_cell[0].get_text(separator=" ", strip=True) bin_collection = bin_info_cell[1] if bin_collection: diff --git a/uk_bin_collection/uk_bin_collection/councils/WakefieldCityCouncil.py b/uk_bin_collection/uk_bin_collection/councils/WakefieldCityCouncil.py index 6a8671c5f0..9977ca6680 100644 --- a/uk_bin_collection/uk_bin_collection/councils/WakefieldCityCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/WakefieldCityCouncil.py @@ -3,6 +3,7 @@ from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass from datetime import datetime + class CouncilClass(AbstractGetBinDataClass): """ Concrete class to scrape bin collection data. @@ -25,23 +26,23 @@ def parse_data(self, page: str, **kwargs) -> dict: data = {"bins": []} # Locate the section with bin collection data sections = soup.find_all("div", {"class": "wil_c-content-section_heading"}) - + for s in sections: if s.get_text(strip=True).lower() == "bin collections": rows = s.find_next_sibling( "div", {"class": "c-content-section_body"} ).find_all("div", class_="tablet:l-col-fb-4 u-mt-10") - + for row in rows: title_elem = row.find("div", class_="u-mb-4") if title_elem: title = title_elem.get_text(strip=True).capitalize() - + # Find all collection info in the same section collections = row.find_all("div", class_="u-mb-2") for c in collections: text = c.get_text(strip=True).lower() - + if "next collection" in text: date_text = text.replace("next collection - ", "") try: @@ -51,34 +52,43 @@ def parse_data(self, page: str, **kwargs) -> dict: dict_data = { "type": title, - "collectionDate": next_collection_date + "collectionDate": next_collection_date, } data["bins"].append(dict_data) except ValueError: # Skip if the date isn't a valid date print(f"Skipping invalid date: {date_text}") - + # Get future collections future_collections_section = row.find("ul", class_="u-mt-4") if future_collections_section: - future_collections = future_collections_section.find_all("li") + future_collections = ( + future_collections_section.find_all("li") + ) for future_collection in future_collections: - future_date_text = future_collection.get_text(strip=True) + future_date_text = future_collection.get_text( + strip=True + ) try: future_collection_date = datetime.strptime( future_date_text, "%A, %d %B %Y" ).strftime(date_format) # Avoid duplicates of next collection date - if future_collection_date != next_collection_date: + if ( + future_collection_date + != next_collection_date + ): dict_data = { "type": title, - "collectionDate": future_collection_date + "collectionDate": future_collection_date, } data["bins"].append(dict_data) except ValueError: # Skip if the future collection date isn't valid - print(f"Skipping invalid future date: {future_date_text}") + print( + f"Skipping invalid future date: {future_date_text}" + ) # Sort the collections by date data["bins"].sort( diff --git a/uk_bin_collection/uk_bin_collection/councils/WokinghamBoroughCouncil.py b/uk_bin_collection/uk_bin_collection/councils/WokinghamBoroughCouncil.py index 95782de873..7881e0ac48 100644 --- a/uk_bin_collection/uk_bin_collection/councils/WokinghamBoroughCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/WokinghamBoroughCouncil.py @@ -56,7 +56,7 @@ def parse_data(self, page: str, **kwargs) -> dict: ) ) ).click() - + # Wait for the Show collection dates button to appear, then click it to get the collection dates inputElement_show_dates_button = WebDriverWait(driver, timeout).until( EC.presence_of_element_located(