#' ---
#' title: "recalibrate: statistical recalibration of windstorm footprints"
#' output: html_document
#' ---
#' 
#' # example: windstorm Kyrill
#' 
#' This code demonstrates using the `recalibrate` package to recalibrate the footprint of windstorm Kyrill. The method follows the preprint of [Youngman \& Stephenson (2019)](http://empslocal.ex.ac.uk/people/staff/by223/youngman-stephenson_recalibration.pdf).
#' 
#' We'll need a few libraries, in addition to `recalibrate`.
#' 
## ----message=FALSE-------------------------------------------------------
library(mgcv)
library(fields)
library(MASS)
library(recalibrate)

#' 
#' ## load and prepare data
#' 
#' We'll also need some wind gust observation data. The following will load `gusts`, which are synthentic gust observations designed to mimic some of the properties of the actual gust observations, based on those available in the National Climatic Data Centre's Global Summary of the Day database. **Do not use these data for any meaningful analyses.**
#' 
## ------------------------------------------------------------------------
load(url("http://empslocal.ex.ac.uk/people/staff/by223/software/gusts.rda"))

#' 
#' Now use the `RNetCDF` library to read in the footprint, which we can obtain as a NetCDF file from [europeanwindstorms.org](http://www.europeanwindstorms.org/repository/Kyrill/Kyrill_rawFoot.nc). Note that we specify the storm's `time`, which is when it reaches its peak, and its `pole`, as its data are given for a rotated grid.
#' 
#' 
## ------------------------------------------------------------------------
library(RNetCDF)
time <- "2007011815" # reset if storm isn't Kyrill
nc <- open.nc("Kyrill_rawFoot.nc")
rfp <- list(x=var.get.nc(nc, "lon") - 360, y=var.get.nc(nc, "lat"), z=var.get.nc(nc, "wind_speed_of_gust"))
pole <- c(177.5, 37.5) 

#' 
#' The footprint will now be projected onto a conventional latitude:longitude grid. This is specified as a 300 $\times$ 300 regular grid with ranges in each direction determined by the footprint's range.
#' 
## ------------------------------------------------------------------------
crds <- read.csv("http://www.europeanwindstorms.org/database/dataDesc/grid_locations.csv")
lon.rg <- range(crds[,2])
lat.rg <- range(crds[,3])
nlon <- nlat <- 300
fp <- list(x=seq(lon.rg[1], lon.rg[2], l=nlon), y=seq(lat.rg[1], lat.rg[2], l=nlat))
rgrid <- rotateCoords(as.matrix(expand.grid(fp)), pole)
fp$z <- matrix(interp.surface(rfp, rgrid), nlon, nlat)

#' 
#' The footprint's time range is now given (here one day either side of its peak) and those observations falling within that range are identified and stored as `is.storm`.
#' 
## ------------------------------------------------------------------------
time.storm <- as.Date(paste(substr(time, 1, 4), substr(time, 5, 6), substr(time, 7, 8), sep="-"))
time.gusts <- as.Date(paste(gusts$year, gusts$month, gusts$day, gusts$year, sep="-"))
is.storm <- sapply(time.storm + c(-1, 0, 1), function(x) time.gusts %in% x)
is.storm <- apply(is.storm, 1, any)

#' 
#' The logical vector `is.storm` is then used to remove any observations lying outside the storm's time range. The data are then separated by station with only the maximum value for each retained, and then joined back together.
#' 
## ------------------------------------------------------------------------
gusts <- subset(gusts, is.storm)
gusts <- split(gusts, gusts$WMO_id)
gusts <- do.call(rbind, lapply(gusts, function(x) x[which.max(x$fakegust),]))

#' 
#' The footprint's values are then interpolated to give one for each station location.
#' 
## ------------------------------------------------------------------------
gusts$original <- interp.surface(fp, gusts[,c("lon", "lat")])
gusts <- subset(gusts, !is.na(original))

#' 
#' Some of the discrepancy between the observations and the simulated footprint can be attributed to elevation. This is therefore included as a covariate. Its values are transformed to improve its predictive power.
#' 
## ------------------------------------------------------------------------
elevtrans <- function(x) sign(x) * sqrt(abs(x))
data(EUelevation)
# load("elevation.RData")
gusts$elev <- elevtrans(interp.surface(EUelevation, gusts[,c("lon", "lat")]))

#' 
#' ## set aside some data for model validation
#' 
#' There are various assumptions made within the model. Here `n.valid = 30` data are withheld so that the model's assumptions can be checked. These are stored in the data frame `gusts.valid` and removed from the data frame `gusts` to which the recalibration model will be fitted.
#' 
## ------------------------------------------------------------------------
n.valid <- 30
id.valid <- replace(logical(nrow(gusts)), sample(nrow(gusts), n.valid), TRUE)
gusts.valid <- subset(gusts, id.valid)
gusts <- subset(gusts, !id.valid)

#' 
#' ## fit the recalibration model
#' 
#' The `recalibrate` function is used to fit the recalibration model. Here a mean function is specified so that the observed wind gust is some function of the modelled wind gust and elevation. An additive form of these relationships is used, where each takes the form of a cubic regression spline; see `help(mgcv::smooth.terms)`. The measurement error in the observations is specified as 3m/s. The names of the $x$ and $y$ coordinates, `lon` and `lat` respectively, are also provided to facilitate subsequent calculations with the fitted object, `fit.recal`.
#' 
## ------------------------------------------------------------------------
fit.recal <- recalibrate(fakegust ~ s(original, bs="cr", k=5) + s(elev, bs="cr", k=5), gusts, meas.err=3, xnm="lon", ynm="lat") 

#' 
#' A data frame to house the recalibrated values, `recal.data`, is then created
#' 
## ------------------------------------------------------------------------
x.recal <- seq(-25, 40, l=200) # alternatively fp$x for original longitudes
y.recal <- seq(35, 71, l=200) # alternatively fp$y for original latitudes
recal.data <- expand.grid(lon=x.recal, lat=y.recal)
recal.data$elev <- elevtrans(interp.surface(EUelevation, recal.data[,c("lon", "lat")]))
recal.data$original <- interp.surface(fp, recal.data[,c("lon", "lat")])
recal.data <- subset(recal.data, !is.na(maps::map.where("world", recal.data$lon, recal.data$lat)))

#' 
#' and these are then added to `recal.data`.
#' 
## ------------------------------------------------------------------------
recal.data[,c("recalibrated", "uncertainty")] <- predict(fit.recal, recal.data)

#' 
#' ## plots for recalibration model
#' 
#' The first example plot is off the additive components in the mean of the observations. Perhaps most striking is that the observations have a strong and increasing linear relationship with the simutaed values.
#' 
## ------------------------------------------------------------------------
plot(fit.recal)

#' 
#' We can also plot a map of the recalibrated footprint's mean estimate. 
#' 
## ------------------------------------------------------------------------
plot(fit.recal, data=recal.data, id="recalibrated")

#' 
#' Or we can plot it alongside the original footprint and an esimtate of its (pointwise) standard error.
#' 
## ---- fig.show = "hold", out.width = "50%"-------------------------------
plot(fit.recal, data=recal.data, id=c("recalibrated", "original", "uncertainty"))

#' 
#' Or we could go further and calculate the difference between the mean estimate of the recalibrated footprint and the original footprint, plot that together with the other three plots, and give some informative titles.
#' 
## ---- fig.show = "hold", out.width = "50%", fig.align = "default"--------
recal.data$change <- recal.data$recalibrated - recal.data$original
titles <- c(paste(c("Recalibrated", "Original"), "footprint"), "Footprint uncertainty \n (standard error)", "Footprint change \n (recalibrated - original)")
# as above plus change and titles
# this is probably the plot that you'll want most of the time
plot(fit.recal, data=recal.data, id=c("recalibrated", "original", "uncertainty", "change"), titles=titles)

#' 
#' The mean estimates of the recalibrated values can also be plotted against the observations, using data each station location.
#' 
## ------------------------------------------------------------------------
plot(fit.recal, type="observation") 

#' 
#' A map of the correlation in the spatial discrepancy can also be plotted, conditional of the domain's center.
#' 
## ------------------------------------------------------------------------
plot(fit.recal, type="correlation") 

#' 
#' ## check the recalibration model's assumptions
#' 
#' Finally, statistical diagnostics can be used to assess the model's assumptions, using the validation data withheld earlier.
#' 
## ---- fig.show = "hold", out.width = "50%", fig.align = "default"--------
plot(fit.recal, gusts.valid, type="validation")

#' 
#' ## references
#' Youngman, B. D. and D. B. Stephenson (2019). Spatial inference for hazard event intensities using imperfect observation and simulation data. Preprint available from [http://empslocal.ex.ac.uk/people/staff/by223/youngman-stephenson_recalibration.pdf](http://empslocal.ex.ac.uk/people/staff/by223/youngman-stephenson_recalibration.pdf).