-
Notifications
You must be signed in to change notification settings - Fork 0
/
Arrow_for_Spatial_Data.R
84 lines (69 loc) · 2.17 KB
/
Arrow_for_Spatial_Data.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# THESE MATERIALS WERE SOURCED FROM THE 'BIG DATA IN R WITH ARROW'
# WORKSHOP HOSTED BY STEPH HAZLITT & NIC CRANE AT POSIT::CONF(2023)
---------------------------------------------------------------------------
# Installation of NYC Taxi data
# library(here)
# data_path <- here::here("data/nyc-taxi")
# open_dataset("s3://voltrondata-labs-datasets/nyc-taxi") |>
# filter(year %in% 2012:2021) |>
# write_dataset(data_path, partitioning = c("year", "month"))
# Use Airport Pickups and find Dropoff Zones ------------------------------
library(arrow)
library(dplyr)
library(janitor)
library(stringr)
nyc_taxi_zones <- read_csv_arrow(here::here("data/taxi_zone_lookup.csv"),
as_data_frame = FALSE) |>
clean_names()
airport_zones <- nyc_taxi_zones |>
filter(str_detect(zone, "Airport")) |>
pull(location_id, as_vector = TRUE)
dropoff_zones <- nyc_taxi_zones |>
select(dropoff_location_id = location_id,
dropoff_zone = zone) |>
compute()
airport_pickups <- open_dataset(here::here("data/nyc-taxi")) |>
filter(pickup_location_id %in% airport_zones) |>
select(
matches("datetime"),
matches("location_id")
) |>
left_join(dropoff_zones) |>
count(dropoff_zone) |>
arrange(desc(n)) |>
collect()
airport_pickups
# Read and Wrangle Spatial Data -------------------------------------------
library(sf)
library(ggplot2)
library(ggrepel)
library(stringr)
library(scales)
map <- read_sf(here::here("data/taxi_zones/taxi_zones.shp")) |>
clean_names() |>
left_join(airport_pickups,
by = c("zone" = "dropoff_zone")) |>
arrange(desc(n))
arrow_r_together <- ggplot(data = map, aes(fill = n)) +
geom_sf(size = .1) +
scale_fill_distiller(
name = "Number of trips",
labels = label_comma(),
palette = "Reds",
direction = 1
) +
geom_label_repel(
stat = "sf_coordinates",
data = map |>
mutate(zone_label = case_when(
str_detect(zone, "Airport") ~ zone,
str_detect(zone, "Times") ~ zone,
.default = ""
)),
mapping = aes(label = zone_label, geometry = geometry),
max.overlaps = 60,
label.padding = .3,
fill = "white"
) +
theme_void()
arrow_r_together