Skip to content

Commit

Permalink
Make sure date not included in committee slug.
Browse files Browse the repository at this point in the history
  • Loading branch information
ajparsons authored and dracos committed Apr 19, 2024
1 parent 09063bf commit 2c49130
Showing 1 changed file with 13 additions and 1 deletion.
14 changes: 13 additions & 1 deletion pyscraper/sp_2024/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,18 @@ def get_next_minor_id(self) -> str:
return self._current_id()


def slugify_committee(name: str) -> str:
"""
Convert a committee name to a slug
"""
name = slugify(name)
# if this ends in a year (four digita number) - assume it's a date and remove the last three elements
if name[-4:].isdigit():
name = "-".join(name.split("-")[:-3])

return name


def convert_xml_to_twfy(file_path: Path, output_dir: Path, verbose: bool = False):
"""
Convert from the loose structured xml format to the
Expand All @@ -72,7 +84,7 @@ def convert_xml_to_twfy(file_path: Path, output_dir: Path, verbose: bool = False
# get the date in format Thursday 9 June 2005
date_str = datetime.datetime.fromisoformat(iso_date).strftime("%A %d %B %Y")

committee_slug = slugify(title)
committee_slug = slugify_committee(title)

dest_path = output_dir / committee_slug / f"{iso_date}-{source_id}.xml"
dest_path.parent.mkdir(parents=True, exist_ok=True)
Expand Down

0 comments on commit 2c49130

Please sign in to comment.