From 4e00f0866dcdde575b867e0d386f6d50d4b61a8d Mon Sep 17 00:00:00 2001 From: Frank Elsinga Date: Sat, 25 Mar 2023 20:05:05 +0100 Subject: [PATCH] [autoscraping-4] Agressive guessing for roomfinder rooms (#463) * Fixed a runtime bug which sneaked in * scraped all new data * made the room guessing more agressive * added the previously missing room --- data/external/results/rooms_roomfinder.json | 23 +++++++++++++++++++++ data/external/scrapers/roomfinder.py | 15 +++++++++++--- 2 files changed, 35 insertions(+), 3 deletions(-) diff --git a/data/external/results/rooms_roomfinder.json b/data/external/results/rooms_roomfinder.json index de0782e80..36308f17e 100644 --- a/data/external/results/rooms_roomfinder.json +++ b/data/external/results/rooms_roomfinder.json @@ -6758,6 +6758,29 @@ "utm_northing": 5336203.6581, "utm_zone": "32" }, + { + "b_alias": "U-Trakt", + "b_area": "M\u00fcnchen - Stammgel\u00e4nde Nord", + "b_id": "0101", + "b_name": "N1", + "default_map": ["4000", 12, "Stammgel\u00e4nde Basiskarte", 501, 484], + "maps": [ + ["500", 65, "U-Trakt UG", 525, 443], + ["4000", 12, "Stammgel\u00e4nde Basiskarte", 501, 484], + ["5000", 10, "Lageplan TUM", 657, 951], + ["200000", 54, "M\u00fcnchen", 640, 603], + ["400000", 156, "M\u00fcnchen und Umgebung", 420, 515], + ["100000000", 9, "Weltkarte Norden", 1619, 396] + ], + "metas": [], + "r_alias": "FLUR", + "r_id": "NK111B@0101", + "r_level": "-1", + "r_number": "NK111B", + "utm_easting": 691057.0769, + "utm_northing": 5336198.9831, + "utm_zone": "32" + }, { "b_alias": "U-Trakt", "b_area": "M\u00fcnchen - Stammgel\u00e4nde Nord", diff --git a/data/external/scrapers/roomfinder.py b/data/external/scrapers/roomfinder.py index 0e3ac6fb9..31cf46c4a 100644 --- a/data/external/scrapers/roomfinder.py +++ b/data/external/scrapers/roomfinder.py @@ -98,7 +98,7 @@ def scrape_rooms(): def _guess_queries(rooms, n_rooms): - # First try: all single-digit numbers + # First try: any single-digit number? for i in range(10): if len(rooms) < n_rooms: maybe_sleep(0.05) @@ -106,7 +106,7 @@ def _guess_queries(rooms, n_rooms): else: return - # Second try: all double-digit numbers + # Second try: any double-digit number? for i in range(100): if len(rooms) < n_rooms: maybe_sleep(0.05) @@ -114,7 +114,7 @@ def _guess_queries(rooms, n_rooms): else: return - # Third try: all characters + # Third try: any character? for char in string.ascii_lowercase: if len(rooms) < n_rooms: maybe_sleep(0.05) @@ -122,6 +122,15 @@ def _guess_queries(rooms, n_rooms): else: return + # Fourth try: any character twice? + for c1 in string.ascii_lowercase: + for c2 in string.ascii_lowercase: + if len(rooms) < n_rooms: + maybe_sleep(0.05) + yield c1 + c2 + else: + return + @cached_json("maps_roomfinder.json") def scrape_maps():