Skip to content

Commit

Permalink
Merge pull request #17 from MIERUNE/fix/route_ids
Browse files Browse the repository at this point in the history
fix: wrong extracting routes on stop
  • Loading branch information
Kanahiro authored May 8, 2024
2 parents 2cf53f2 + 957bbe2 commit 7f9aa70
Show file tree
Hide file tree
Showing 2 changed files with 79 additions and 26 deletions.
71 changes: 45 additions & 26 deletions gtfs_parser/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,15 @@ def read_stops(gtfs: GTFS, ignore_no_route=False) -> list:
list: [description]
"""
# get unique list of route_id related to each stop
stop_trip_route_df = gtfs.stop_times[["trip_id", "stop_id"]].join(
gtfs.trips["route_id"].reindex(gtfs.trips["trip_id"]),
stop_trip_route_df = pd.merge(
gtfs.stop_times[["trip_id", "stop_id"]],
gtfs.trips[["trip_id", "route_id"]],
on="trip_id",
)
stop_route_df = stop_trip_route_df[["stop_id", "route_id"]].drop_duplicates()
route_ids_on_stops = stop_route_df.groupby("stop_id")["route_id"].apply(list).rename("route_ids")

route_ids_on_stops = (
stop_route_df.groupby("stop_id")["route_id"].apply(list).rename("route_ids")
)
# outer join route_ids to stop
route_stop = gtfs.stops.join(route_ids_on_stops, on="stop_id", how="left")

Expand Down Expand Up @@ -74,17 +76,24 @@ def read_routes(gtfs: GTFS, ignore_shapes=False) -> list:

def __read_route_shapes(gtfs):
# get_shapeids_on route
shape_ids_on_routes = gtfs.trips[["route_id", "shape_id"]].drop_duplicates().dropna(
subset=["shape_id"]
).sort_values(["route_id", "shape_id"])
shape_ids_on_routes = (
gtfs.trips[["route_id", "shape_id"]]
.drop_duplicates()
.dropna(subset=["shape_id"])
.sort_values(["route_id", "shape_id"])
)

# get shape coordinate
shapes_df = gtfs.shapes.copy()
shapes_df["shape_pt"] = list(zip(shapes_df["shape_pt_lon"], shapes_df["shape_pt_lat"]))
shapes_df["shape_pt"] = list(
zip(shapes_df["shape_pt_lon"], shapes_df["shape_pt_lat"])
)
shapes_df = shapes_df.sort_values(["shape_id", "shape_pt_sequence"])
shape_lines = shapes_df.groupby("shape_id")["shape_pt"].apply(
lambda x: x.tolist()
).rename("line")
shape_lines = (
shapes_df.groupby("shape_id")["shape_pt"]
.apply(lambda x: x.tolist())
.rename("line")
)

# merge
route_line_df = pd.merge(shape_ids_on_routes, shape_lines, on="shape_id")
Expand All @@ -98,11 +107,13 @@ def __read_route_shapes(gtfs):
]
if len(unloaded_shape_lines) > 0:
# fill id, name with shape_id, line to multiline
multiline_df = pd.DataFrame({
"route_id": None,
"route_name": unloaded_shape_lines.index,
"multiline": unloaded_shape_lines.apply(lambda x: [x])
})
multiline_df = pd.DataFrame(
{
"route_id": None,
"route_name": unloaded_shape_lines.index,
"multiline": unloaded_shape_lines.apply(lambda x: [x]),
}
)

unloaded_features = __route_multiline_df_to_features(multiline_df)
features.extend(unloaded_features)
Expand All @@ -112,15 +123,19 @@ def __read_route_shapes(gtfs):
def __read_routes_ignore_shapes(gtfs):
# generate stop patterns
sorted_stop_times = gtfs.stop_times.sort_values(["trip_id", "stop_sequence"])
trip_stop_pattern = sorted_stop_times.groupby("trip_id")["stop_id"].agg(tuple).rename("stop_pattern")
trip_stop_pattern = (
sorted_stop_times.groupby("trip_id")["stop_id"]
.agg(tuple)
.rename("stop_pattern")
)

# unique stop pattens by route_id
route_trip_stop_pattern = pd.merge(
trip_stop_pattern,
gtfs.trips[["trip_id", "route_id"]],
on='trip_id'
trip_stop_pattern, gtfs.trips[["trip_id", "route_id"]], on="trip_id"
)
route_stop_patterns = route_trip_stop_pattern[["route_id", "stop_pattern"]].drop_duplicates()
route_stop_patterns = route_trip_stop_pattern[
["route_id", "stop_pattern"]
].drop_duplicates()

# explode stop patterns to stop ids
route_stop_patterns["stop_id"] = route_stop_patterns["stop_pattern"]
Expand All @@ -130,7 +145,7 @@ def __read_routes_ignore_shapes(gtfs):
stop_geoms = pd.Series(
data=zip(gtfs.stops["stop_lon"], gtfs.stops["stop_lat"]),
name="stop_pt",
index=gtfs.stops["stop_id"]
index=gtfs.stops["stop_id"],
)

# join geomtry to route stops
Expand All @@ -142,15 +157,19 @@ def __read_routes_ignore_shapes(gtfs):
).sort_values("order")

# Point -> LineString: group by route_id and stop_pattern
route_lines = route_stop_geoms.groupby(["route_id", "stop_pattern"])["stop_pt"].agg(list)
route_lines = route_stop_geoms.groupby(["route_id", "stop_pattern"])["stop_pt"].agg(
list
)
return __route_lines_to_features(route_lines, gtfs.routes)


def __route_lines_to_features(route_lines, routes):
# group by route_id into MultiLineString
multilines = route_lines.groupby(['route_id']).apply(
lambda x: x.tolist()
).rename("multiline")
multilines = (
route_lines.groupby(["route_id"])
.apply(lambda x: x.tolist())
.rename("multiline")
)
# join route_id and route_name
multiline_df = pd.merge(
multilines,
Expand Down
34 changes: 34 additions & 0 deletions tests/test_parser.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,45 @@
from gtfs_parser.parse import read_routes, read_stops


def __find_feature_by(properties, features):
for feature in features:
if all(
feature["properties"].get(key) == value for key, value in properties.items()
):
return feature
return None


def test_read_stops(gtfs):
stops_features = read_stops(gtfs)
# list of geojson-feature
assert 899 == len(stops_features)

# this feature exists in stops.geojson
feature = {
"type": "Feature",
"geometry": {"type": "Point", "coordinates": [143.203227, 42.918326]},
"properties": {
"stop_id": "1000_04",
"stop_name": "帯広駅バスターミナル",
"route_ids": [
"24_C",
"53_F",
"32_B",
"33_C",
"51_C",
"34_A",
"42_B",
"41_A",
"31_A",
"52_D",
],
},
}

found = __find_feature_by(feature["properties"], stops_features)
assert found is not None


def test_read_stops_ignore_no_route(gtfs):
# remove no-route stops
Expand Down

0 comments on commit 7f9aa70

Please sign in to comment.