-
Notifications
You must be signed in to change notification settings - Fork 1
/
get_buoy_data.R
95 lines (78 loc) · 2.58 KB
/
get_buoy_data.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# Author: Jeremy Boyd ([email protected])
# Description: Function that takes a buoy ID as input and returns all available
# mean daily wave heights from the buoy.
# Resources
library(tidyverse)
library(rvest)
library(R.utils)
library(rnoaa)
library(lubridate)
library(padr)
library(purrr)
# Get station info
# x <- buoy_stations(refresh = TRUE)
# buoy_id <- "51101"
# Table with URLs for buoys with standard meteorological data
stdmet_buoys <- buoys(dataset = "stdmet") %>%
as_tibble()
# Function definition
get_buoy_data <- function(buoy_id) {
# URL for selected buoy
buoy_url <- stdmet_buoys %>%
filter(id == buoy_id) %>%
pull(url)
# Years that data is available
years <- read_html(buoy_url) %>%
html_nodes("a tt") %>%
xml_text() %>%
as_tibble() %>%
filter(str_detect(value, "h[0-9]+")) %>%
pull(value) %>%
str_extract("h[0-9]+") %>%
str_remove("h")
# User message
message(paste("Collecting data from",
length(years),
"files..."))
# Get data for all years
df <- map_dfr(years, function(year) {
# Get data
df1 <- buoy(dataset = "stdmet",
buoyid = buoy_id,
year = year)
# Missing value for wave height
wave_height_na <- df1$meta$wave_height$missval
# Clean up
df1$data %>%
mutate(time = ymd_hms(time),
wave_height = if_else(wave_height == wave_height_na,
NA_real_,
wave_height),
buoy_id = buoy_id,
year_file = year) %>%
select(buoy_id, year_file, time:lon, wvht = wave_height)
})
# Drop duplicate times
df2 <- df %>%
group_by(buoy_id, time) %>%
filter(year_file == max(as.integer(year_file))) %>%
ungroup() %>%
arrange(time) %>%
# Thicken to day and compute day means
thicken(interval = "day") %>%
group_by(buoy_id, time_day) %>%
summarize(wvht = mean(wvht, na.rm = TRUE),
.groups = "drop") %>%
rename(date = time_day) %>%
filter(!is.na(wvht))
# Add padding
df3 <- df2 %>%
pad(interval = "day")
# Tell user how much padding has been added. This helps to evaluate data
# quality.
message(paste("Added",
nrow(filter(df3, is.na(wvht))),
"padding rows."))
# Return this
return(df3)
}