The following APIs impose a period limit in the data retrieved, not allowing querying more than the predetermined period in each API.
AEMET API limit the daily data download to 15 days, and the monthly and yearly data to 36 months:
# aemet api has a limit for 15 days in daily:
get_meteo_from(
'aemet',
aemet_options(
api_key = keyring::key_get('aemet'),
resolution = 'daily',
start_date = as.Date('1990-01-01'),
end_date = as.Date('1990-12-31')
)
)
# and monthly and yearly data to 36 months
get_meteo_from(
'aemet',
aemet_options(
api_key = keyring::key_get('aemet'),
resolution = 'yearly',
start_date = as.Date('2005-01-01'),
end_date = as.Date('2020-12-31'),
stations = "0149X"
)
)
This means that with one call to get_meteo_from
to the
AEMET service, one can only download 15 days of data, (or 36 months in
monthly and yearly data).
If the period needed is bigger than that, one option is performing all
the calls necessary and join the results:
res_1990_jan_1 <- get_meteo_from(
'aemet',
aemet_options(
api_key = keyring::key_get('aemet'),
resolution = 'daily',
start_date = as.Date('1990-01-01'),
end_date = as.Date('1990-01-15')
)
)
res_1990_jan_2 <- get_meteo_from(
'aemet',
aemet_options(
api_key = keyring::key_get('aemet'),
resolution = 'daily',
start_date = as.Date('1990-01-16'),
end_date = as.Date('1990-01-31')
)
)
res_1990_jan <- rbind(res_1990_jan_1, res_1990_jan_2)
res_1990_jan
While for short periods this can be easily done, when needing long
periods (years, decades), this can be tedious and prone to error (or at
least involves a lot of copy&paste and generate longer
scripts).
To avoid this, we can use loops, both in a tidyverse way
(purrr::map
) or in a more classic approach
(for
). For both ways, the first thing to do is create the
vectors of dates to retrieve:
# First, we prepare the date vectors, with the start and end dates.
start_dates <- seq(as.Date('1990-01-01'), as.Date('1990-07-01'), '15 days')
end_dates <- seq(as.Date('1990-01-15'), as.Date('1990-07-15'), '15 days')
# Both vectors must have the same length
length(start_dates) == length(end_dates)
#> [1] TRUE
# lets see them
data.frame(start_dates, end_dates)
#> start_dates end_dates
#> 1 1990-01-01 1990-01-15
#> 2 1990-01-16 1990-01-30
#> 3 1990-01-31 1990-02-14
#> 4 1990-02-15 1990-03-01
#> 5 1990-03-02 1990-03-16
#> 6 1990-03-17 1990-03-31
#> 7 1990-04-01 1990-04-15
#> 8 1990-04-16 1990-04-30
#> 9 1990-05-01 1990-05-15
#> 10 1990-05-16 1990-05-30
#> 11 1990-05-31 1990-06-14
#> 12 1990-06-15 1990-06-29
#> 13 1990-06-30 1990-07-14
We are gonna use purrr::map2
, to iterate both date
vectors at the same time and return a data frame with all the results
directly:
# tidyverse map
res_tidyverse <-
purrr::map2(
.x = start_dates, .y = end_dates,
.f = function(start_date, end_date) {
res <- get_meteo_from(
'aemet',
aemet_options(
api_key = keyring::key_get('aemet'),
resolution = 'daily',
start_date = start_date,
end_date = end_date
)
)
return(res)
}
) |>
purrr::list_rbind()
head(res_tidyverse)
We use base::for
, iterating by the index of the dates
vectors:
# base for loop
res_for <- data.frame()
for (index in seq_along(start_dates)) {
temp_res <- get_meteo_from(
'aemet',
aemet_options(
api_key = keyring::key_get('aemet'),
resolution = 'daily',
start_date = start_dates[index],
end_date = end_dates[index]
)
)
res_for <- rbind(res_for, temp_res)
}
head(res_for)
Both methods return identical results:
In a loop, no matter if a
purrr::map
or afor
loop, each iteration will connect with the API, consuming connections from the user quota. Take this into consideration when creating loops for longer periods, as you can reach your API request limits for the day/month… (it depends on the service API).
When using MeteoCat in daily
, monthly
and
yearly
there are restrictions on the period that can be
accessed.
daily
daily
always returns the whole month the date selected
is in, i.e. for start_date = as.Date('2020-04-10')
it will
return all days in April, 2020:
api_options <- meteocat_options(
'daily', start_date = as.Date('2020-04-10'),
api_key = keyring::key_get('meteocat')
)
april_2020 <- get_meteo_from('meteocat', api_options)
unique(april_2020$timestamp)
This means that if we want more than one month, we need to use loops in a similar way as described previously for AEMET:
start_dates <- seq(as.Date('2020-01-01'), as.Date('2020-04-01'), 'months')
# tidyverse map
meteocat_2020q1_tidyverse <-
purrr::map(
.x = start_dates,
.f = function(start_date) {
res <- get_meteo_from(
'meteocat',
meteocat_options(
api_key = keyring::key_get('meteocat'),
resolution = 'daily',
start_date = start_date
)
)
return(res)
}
) |>
purrr::list_rbind()
head(meteocat_2020q1_tidyverse)
# base for loop
meteocat_2020q1_for <- data.frame()
for (index in seq_along(start_dates)) {
temp_res <- get_meteo_from(
'meteocat',
meteocat_options(
api_key = keyring::key_get('meteocat'),
resolution = 'daily',
start_date = start_dates[index]
)
)
meteocat_2020q1_for <- rbind(meteocat_2020q1_for, temp_res)
}
head(meteocat_2020q1_for)
# both are identical
identical(meteocat_2020q1_tidyverse, meteocat_2020q1_for)
monthly
monthly
always returns the whole year the date selected
is in, i.e. for start_date = as.Date('2020-04-10')
it will
return all months in 2020:
api_options <- meteocat_options(
'monthly', start_date = as.Date('2020-04-10'),
api_key = keyring::key_get('meteocat')
)
year_2020 <- get_meteo_from('meteocat', api_options)
unique(year_2020$timestamp)
Which means that if we need more than one year of monthly data, we need to use loops again:
start_dates <- seq(as.Date('2019-01-01'), as.Date('2020-01-01'), 'years')
# tidyverse map
meteocat_2019_20_tidyverse <-
purrr::map(
.x = start_dates,
.f = function(start_date) {
res <- get_meteo_from(
'meteocat',
meteocat_options(
api_key = keyring::key_get('meteocat'),
resolution = 'monthly',
start_date = start_date
)
)
return(res)
}
) |>
purrr::list_rbind()
head(meteocat_2019_20_tidyverse)
# base for loop
meteocat_2019_20_for <- data.frame()
for (index in seq_along(start_dates)) {
temp_res <- get_meteo_from(
'meteocat',
meteocat_options(
api_key = keyring::key_get('meteocat'),
resolution = 'monthly',
start_date = start_dates[index]
)
)
meteocat_2019_20_for <- rbind(meteocat_2019_20_for, temp_res)
}
head(meteocat_2019_20_for)
# both are identical
identical(meteocat_2019_20_tidyverse, meteocat_2019_20_for)
yearly
yearly
always returns all available years and
start_date
argument is ignored, i.e. using
start_date = as.Date('2020-04-10')
will return all years,
independently of the date supplied:
api_options <- meteocat_options(
'yearly', start_date = as.Date('2020-04-10'),
api_key = keyring::key_get('meteocat')
)
all_years <- get_meteo_from('meteocat', api_options)
unique(all_years$timestamp)
This means that with yearly we always get all the data available, so there is no need of loops.