-
Notifications
You must be signed in to change notification settings - Fork 1
/
2.3_transform-gbfs2zones.R
378 lines (287 loc) · 15.1 KB
/
2.3_transform-gbfs2zones.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
###################################################################################################
### The purpose of this script is to identify whether each zone (census block / hexagon) is ###
### served by a shared micromobility provider or not. For each zone, we add a binary column ###
### for each provider to identify whether it works in the zone or not. ###
###################################################################################################
library(tidyverse)
library(sf)
# ------------------------------------ Load the data ------------------------------------ #
city = "San Francisco"
#city = "Mexico City"
# edit these variables based on the name of your provider. This isn't necessary if you got the gbfs data
# through the 1_extract_get_gbfs.R script
docked_provider_name <- "docked_provider_name"
dockless_provider_name <- "dockless_provider_name"
# --- Read in: docked bikes (change layer name if it is different)
docks <- st_read(paste0("../data_raw/", city, "/GBFS/gbfs_stations.geojson")) %>%
st_make_valid() %>%
# create a "provider column if it does not exist
rowwise() %>%
mutate(provider = ifelse("provider" %in% names(.), provider, docked_provider_name)) %>%
ungroup()
# --- Read in: dockless zones
dockless <- st_read(paste0("../data_raw/" , city, "/GBFS/gbfs_zones.geojson")) %>%
st_make_valid() %>%
# create a "provider column if it does not exist
rowwise() %>%
mutate(provider = ifelse("provider" %in% names(.), provider, dockless_provider_name)) %>%
ungroup()
# --- census data
census <- st_read(paste0("../data_raw/", city, "/level_i/census_hex.geojson"))
# ----------------------- Functions to match identify if a zone has docked / dockless service ----------------------- #
# ----- Function to determine which DOCKED providers operate in which zones
match_docked_to_zones = function(city_layer, # census / hexagons
docked_stations, # point layer with stations and associated provider
id_col){ # id column in the zone layer. Used for joining
# --- create a column with a unique ID for each zone
city_layer <- city_layer %>%
mutate(zone_id = row_number())
# --- select necessary columns from each layer
city_layer_temp <- city_layer %>%
select(zone_id)
docked_stations <- docked_stations %>%
select(provider)
# --- make sure both layers have the same crs
docked_stations <- docked_stations %>%
st_transform(st_crs(city_layer)) %>%
# add a column that will be used in the pivot wider operation
mutate(provider_binary = 1)
# --- spatial join to assign stations to city_layer zones
joined_layer <- st_join(city_layer_temp,
docked_stations,
left = FALSE, # we want an inner join not a left join
join = st_intersects)
# --- remove duplicate rows
# the same provider might have multiple stations in the same zone. This will create multiple rows, whereas
# we only want 1 row to identify the availability of a docked station in the zone
joined_layer <- joined_layer %>%
# remove the geometry so that we can run the distinct function
st_drop_geometry() %>%
distinct(zone_id, provider, .keep_all = TRUE)
# --- pivot wider to get one row per zone, and all providers as columns with binary 0/1 values to indicate availability
joined_layer <- joined_layer %>%
pivot_wider(names_from = provider,
values_from = provider_binary,
values_fill = 0,
names_prefix = "docked_")
# --- join results onto original layer
city_layer <- city_layer %>%
left_join(joined_layer, by = "zone_id")
# --- replace na values with 0 for provider columns
city_layer <- city_layer%>%
mutate_at(vars(matches("docked_")), ~replace_na(., 0))
# transform to desired crs
city_layer <- city_layer %>% st_transform(4326)
return(city_layer)
}
# census_matched_docked <- match_docked_to_zones(city_layer = census_hex,
# docked_stations = docks,
# id_col = GEOID)
# ----- Function to determine which DOCKLESS systems operate in each zone
match_dockless_to_zones = function(city_layer, # census / hexagons
dockless_zones, # polygon layer with dockless zones and associated providers
id_col){ # id column in the zone layer. Used for joining
# --- create a column with a unique ID for each zone
city_layer <- city_layer %>%
mutate(zone_id = row_number())
# --- select necessary columns from each layer
city_layer_temp <- city_layer %>%
select(zone_id)
dockless_zones <- dockless_zones %>%
st_make_valid() %>%
select(provider)
# --- make sure both layers have the same crs
dockless_zones <- dockless_zones %>%
st_transform(st_crs(city_layer)) %>%
# add a column that will be used in the pivot wider operation
mutate(provider_binary = 1)
# --- spatial join to assign stations to city_layer zones
joined_layer <- st_join(city_layer_temp,
dockless_zones,
left = FALSE, # we want an inner join not a left join
join = st_intersects)
# --- remove duplicate rows
# the same provider might have multiple stations in the same zone. This will create multiple rows, whereas
# we only want 1 row to identify the availability of a docked station in the zone
joined_layer <- joined_layer %>%
# remove the geometry so that we can run the distinct function
st_drop_geometry() %>%
distinct(zone_id, provider, .keep_all = TRUE)
# --- pivot wider to get one row per zone, and all providers as columns with binary 0/1 values to indicate availability
joined_layer <- joined_layer %>%
pivot_wider(names_from = provider,
values_from = provider_binary,
values_fill = 0,
names_prefix = "dockless_")
# --- join results onto original layer
city_layer <- city_layer %>%
left_join(joined_layer, by = "zone_id")
# --- replace na values with 0 for provider columns
city_layer <- city_layer%>%
mutate_at(vars(matches("dockless_")), ~replace_na(., 0))
# transform to desired crs
city_layer <- city_layer %>% st_transform(4326)
return(city_layer)
}
# ----- Combined Function: Return both DOCKED and DOCKLESS service providers in each zone
match_providers_to_zones = function(city_layer,
docked_stations,
dockless_zones,
id_col){
# --- create a column with a unique ID for each zone
city_layer <- city_layer %>%
mutate(zone_id = row_number())
# ---------- DOCKED
if(!is.null(docked_stations)){
# --- select necessary columns from each layer
city_layer_temp <- city_layer %>%
select(zone_id)
docked_stations <- docked_stations %>%
select(provider)
# --- make sure both layers have the same crs
docked_stations <- docked_stations %>%
st_transform(st_crs(city_layer)) %>%
# add a column that will be used in the pivot wider operation
mutate(provider_binary = 1)
# keep only geometry that is inside the sudy area
docked_stations <- docked_stations %>%
st_filter(city_layer, .predicate = st_intersects)
# --- spatial join to assign stations to city_layer zones
joined_layer <- st_join(city_layer_temp,
docked_stations,
left = FALSE, # we want an inner join not a left join
join = st_intersects)
# --- remove duplicate rows
# the same provider might have multiple stations in the same zone. This will create multiple rows, whereas
# we only want 1 row to identify the availability of a docked station in the zone
joined_layer <- joined_layer %>%
# remove the geometry so that we can run the distinct function
st_drop_geometry() %>%
distinct(zone_id, provider, .keep_all = TRUE)
# --- pivot wider to get one row per zone, and all providers as columns with binary 0/1 values to indicate availability
joined_layer <- joined_layer %>%
pivot_wider(names_from = provider,
values_from = provider_binary,
values_fill = 0,
names_prefix = "docked_")
# --- join results onto original layer
city_layer <- city_layer %>%
left_join(joined_layer, by = "zone_id")
# --- replace na values with 0 for provider columns
city_layer <- city_layer %>%
mutate_at(vars(matches("docked_")), ~replace_na(., 0))
}
# ---------- DOCKLESS
if(!is.null(dockless_zones)){
# --- select necessary columns from each layer
city_layer_temp <- city_layer %>%
select(zone_id)
dockless_zones <- dockless_zones %>%
st_make_valid() %>%
select(provider)
# --- make sure both layers have the same crs
dockless_zones <- dockless_zones %>%
st_transform(st_crs(city_layer)) %>%
# add a column that will be used in the pivot wider operation
mutate(provider_binary = 1)
# keep only geometry that is inside the sudy area
dockless_zones <- dockless_zones %>%
st_filter(city_layer, .predicate = st_intersects)
# --- spatial join to assign stations to city_layer zones
joined_layer <- st_join(city_layer_temp,
dockless_zones,
left = FALSE, # we want an inner join not a left join
join = st_intersects)
# --- remove duplicate rows
# the same provider might have multiple stations in the same zone. This will create multiple rows, whereas
# we only want 1 row to identify the availability of a docked station in the zone
joined_layer <- joined_layer %>%
# remove the geometry so that we can run the distinct function
st_drop_geometry() %>%
distinct(zone_id, provider, .keep_all = TRUE)
# --- pivot wider to get one row per zone, and all providers as columns with binary 0/1 values to indicate availability
joined_layer <- joined_layer %>%
pivot_wider(names_from = provider,
values_from = provider_binary,
values_fill = 0,
names_prefix = "dockless_")
# --- join results onto original layer
city_layer <- city_layer %>%
left_join(joined_layer, by = "zone_id")
# --- replace na values with 0 for provider columns
city_layer <- city_layer%>%
mutate_at(vars(matches("dockless_")), ~replace_na(., 0))
}
# transform to desired crs
city_layer <- city_layer %>% st_transform(4326)
return(city_layer)
}
# ----- Run function
census_matched <- match_providers_to_zones(city_layer = census,
docked_stations = docks, # replace with NULL if layer does not exist
dockless_zones = dockless, # replace with NULL if layer does not exist
id_col = GEOID)
# ----------------------- Availability of docked/dockless in neighbouring zones ----------------------- #
# Since users can walk to neighboring zones, it makes sense to factor in these zones when determining whether
# micromobility exists at a zone or not. We use
# Spatial Weights
# Create a sparse matrix of neighbors using st_intersects https://github.com/r-spatial/sf/issues/234
availability_neighbors = function(layer, docked_col, dockless_col){
# the id column must match the row_number because the output of st_intersects(.) is row.id and col.id
layer <- layer %>% mutate(cell_id = row_number())
# prepare column names for calling inside the function
#docked_col <- sym(docked_col)
docked_col <- ifelse(is.na(docked_col), docked_col, sym(docked_col))
dockless_col <- ifelse(is.na(dockless_col), dockless_col, sym(dockless_col))
# intersect geometry with itself
layer_matrix <- layer %>% st_intersects(.)
# convert result from list to dataframe
layer_matrix_df <- layer_matrix %>% as.data.frame()
# join onto original layer which has info on docked / dockless service availability at each zone
layer_matrix_meta <- layer_matrix_df %>%
left_join(layer, by = c("col.id" = "cell_id"))
# get number of stations (docked) / zones (dockless) that are in the zone + all its neighboring zones
layer_matrix_meta <- layer_matrix_meta %>%
group_by(row.id) %>%
summarise(docked_service = sum(!! docked_col, na.rm = TRUE),
dockless_service = sum(!! dockless_col, na.rm = TRUE)) %>%
ungroup()
layer_matrix_meta <- layer %>% left_join(layer_matrix_meta, by = c("cell_id" = "row.id"))
return(layer_matrix_meta)
}
# --- apply the function
census_matched_neighbors <- availability_neighbors(layer = census_matched,
docked_col = "docked_Bay Wheels", # edit: replace with the name of the provider in your data. If more than one, pick one
dockless_col = "dockless_Spin San Francisco") # edit: replace with the name of the provider in your data
# if (city == "San Francisco"){
# # San Francisco
# census_matched_neighbors <- availability_neighbors(layer = census_matched,
# docked_col = "docked_Bay Wheels",
# dockless_col = "dockless_Spin San Francisco")
#
# } else if(city == "Minneapolis"){
# # Minneapolis
# census_matched_neighbors <- availability_neighbors(layer = census_matched,
# docked_col = "docked_Nice Ride Minnesota",
# dockless_col = "dockless_Spin Minneapolis")
#
# } else if(city == "Mexico City"){
# # Mexico city
# census_matched_neighbors <- availability_neighbors(layer = census_matched,
# docked_col = "docked_ECOBICI",
# dockless_col = NA)
#
# } else if(city == "Cairo"){
# # Cairo
# census_matched_neighbors <- availability_neighbors(layer = census_matched,
# docked_col = "docked_Donkey_Republic",
# dockless_col = NA)
# }
plot(census_matched_neighbors["docked_service"])
plot(census_matched_neighbors["dockless_service"])
mapview::mapviewOptions(fgb = FALSE)
mapview::mapview(census_matched_neighbors, zcol = "docked_service") + mapview::mapview(docks)
mapview::mapview(census_matched_neighbors, zcol = "dockless_service") + mapview::mapview(dockless)
# --- save
st_write(census_matched_neighbors, paste0("../data/", city, "/level_i_ii/zones_w_micromobility_providers.geojson"), delete_dsn = TRUE)
#census_matched_neighbors <- st_read(paste0("../data/", city, "/level_i_ii/zones_w_micromobility_providers.geojson"))