From c5409de6e82ebe8bafee901548e706c2c72f3d2d Mon Sep 17 00:00:00 2001 From: TheyWorkForYou Live CVS User Date: Fri, 3 May 2024 10:08:47 +0100 Subject: [PATCH] [Scotland] Spot dangling paragraph before timestamp. --- pyscraper/sp_2024/parse.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyscraper/sp_2024/parse.py b/pyscraper/sp_2024/parse.py index 52345cd1..b489137a 100644 --- a/pyscraper/sp_2024/parse.py +++ b/pyscraper/sp_2024/parse.py @@ -76,7 +76,7 @@ def process_raw_html(raw_html: Tag, agenda_item_url: str) -> BeautifulSoup: # Deal with timestamps that are not inside anything first raw_html = str(raw_html) raw_html = re.sub( - "(?m)^\s*(.*?)\s*
\s*
", r"\1", raw_html + '(?:

)?\s*(.*?)\s*
\s*
', r"\1", raw_html ) soup = BeautifulSoup(raw_html, "html.parser")