Skip to content

Commit

Permalink
[autoscraping-4] Agressive guessing for roomfinder rooms (#463)
Browse files Browse the repository at this point in the history
* Fixed a runtime bug which sneaked in

* scraped all new data

* made the room guessing more agressive

* added the previously missing room
  • Loading branch information
CommanderStorm authored Mar 25, 2023
1 parent 0a91fd2 commit 4e00f08
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 3 deletions.
23 changes: 23 additions & 0 deletions data/external/results/rooms_roomfinder.json
Original file line number Diff line number Diff line change
Expand Up @@ -6758,6 +6758,29 @@
"utm_northing": 5336203.6581,
"utm_zone": "32"
},
{
"b_alias": "U-Trakt",
"b_area": "M\u00fcnchen - Stammgel\u00e4nde Nord",
"b_id": "0101",
"b_name": "N1",
"default_map": ["4000", 12, "Stammgel\u00e4nde Basiskarte", 501, 484],
"maps": [
["500", 65, "U-Trakt UG", 525, 443],
["4000", 12, "Stammgel\u00e4nde Basiskarte", 501, 484],
["5000", 10, "Lageplan TUM", 657, 951],
["200000", 54, "M\u00fcnchen", 640, 603],
["400000", 156, "M\u00fcnchen und Umgebung", 420, 515],
["100000000", 9, "Weltkarte Norden", 1619, 396]
],
"metas": [],
"r_alias": "FLUR",
"r_id": "NK111B@0101",
"r_level": "-1",
"r_number": "NK111B",
"utm_easting": 691057.0769,
"utm_northing": 5336198.9831,
"utm_zone": "32"
},
{
"b_alias": "U-Trakt",
"b_area": "M\u00fcnchen - Stammgel\u00e4nde Nord",
Expand Down
15 changes: 12 additions & 3 deletions data/external/scrapers/roomfinder.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,30 +98,39 @@ def scrape_rooms():


def _guess_queries(rooms, n_rooms):
# First try: all single-digit numbers
# First try: any single-digit number?
for i in range(10):
if len(rooms) < n_rooms:
maybe_sleep(0.05)
yield str(i)
else:
return

# Second try: all double-digit numbers
# Second try: any double-digit number?
for i in range(100):
if len(rooms) < n_rooms:
maybe_sleep(0.05)
yield str(i).zfill(2)
else:
return

# Third try: all characters
# Third try: any character?
for char in string.ascii_lowercase:
if len(rooms) < n_rooms:
maybe_sleep(0.05)
yield char
else:
return

# Fourth try: any character twice?
for c1 in string.ascii_lowercase:
for c2 in string.ascii_lowercase:
if len(rooms) < n_rooms:
maybe_sleep(0.05)
yield c1 + c2
else:
return


@cached_json("maps_roomfinder.json")
def scrape_maps():
Expand Down

0 comments on commit 4e00f08

Please sign in to comment.