# Extreme-value Model for a Contingency Table of Rare-event Forecasts # # Description: # # Returns probabilities with which events are observed, forecasted, # and both observed and forecasted, by fitting extreme-value models # to the marginal and/or joint upper tails of decision and event # variables. Parameter estimates and standard errors are also # returned. # # Usage: # # ctable(x, y, p0 = list(x = 0, y = 0, z = 0), p, ...) # # Arguments: # # x: Vector of decision variables. # # y: Vector of event variables, equal in length to `x'. # # p0: Named list giving the exceedance probabilities for the # thresholds above which `x', `y' and `z' are modelled with # extreme-value distributions. If a probability lies outside # the interval (0, 1) then only the empirical distribution # function is used. # # p: Vector of base rates for which the contingency table is # estimated. # # ...: Additional arguments to `fpot' for marginal estimation, e.g. # standard errors can be suppressed with `std.err = FALSE'. # # Details: # # See Ferro (2007). # # Output: # # A list containing the following components: # # table: List containing the following components: # # observe: Vector of base rates `p' giving the probability # with which the event is observed: Pr(Y > v). # # forecast: Vector of base rates `p' giving the probability # with which the event is forecast: Pr(X > u). # # joint: Vector of probabilities with which the event is # both forecast and observed: Pr(X > u, Y > v). # # x: List containing the following components: # # threshold: Threshold above which the extreme-value model is # fitted to `x' (if used). # # estimate: Estimates of the location, scale and shape # parameters in the extreme-value model (if used). # # std.err: Standard errors of the parameter estimates (if # requested). # # transform: Values of `x' after transformation to standard # Exponential scale. # # y: List with the same form as for `x' but for the event # variables `y'. # # z: List containing the following components (if an extreme- # value model is fitted to `z'): # # threshold: Threshold above which the model is fitted. # # estimate: Estimates of the parameters `alpha', `eta' and # `kappa'. # # Author: # # Chris Ferro 30 January 2007 # # References: # # Ferro CAT (2007) A probability model for verifying deterministic # forecasts of extreme events. Submitted. Available from # # http://www.met.rdg.ac.uk/~sws02caf/Publications/verif.pdf # # See Also: # # `fpot' in the `evd' package, and `vscores'. # # Examples: # # # bivariate Normal data # x <- rnorm(1000) # y <- rnorm(1000, 0.5 * x, sqrt(1 - 0.5^2)) # exceed <- list(x = 0.05, y = 0.05, z = 0.1) # out <- ctable(x, y, exceed, p = seq(0, 0.1, 0.01)) # vscores(out) ctable <- function(x, y, exceed = list(x = 0, y = 0, z = 0), p, ...) { # local functions require(evd) start.pp <- function(x, u) { f <- fpot(x, u, "gpd", std.err = FALSE) sh <- f\$est[["shape"]] sc <- f\$est[["scale"]] * (f\$pat * length(x))^sh lo <- u + (sc - f\$est[["scale"]]) / sh list(loc = lo, scale = sc, shape = sh) } # check arguments n <- length(x) if(length(y) != n) stop("`x' and `y' should be vectors with the same lengths") if(any(is.na(match(c("x", "y", "z"), names(exceed))))) stop("`exceed' should be a list with names `x', `y' and `z'") if(any(p < 0) || any(p > 1)) stop("`p' should be a vector of probabilities") # store marginal probabilities out <- list(table = list(observe = p, forecast = p)) # transform x margin if((exceed\$x > 0) && (exceed\$x < 1)) { xt <- -log(1 - rank(x) / n) u0 <- quantile(x, 1 - exceed\$x, names = FALSE) fit <- fpot(x, u0, "pp", start = start.pp(x, u0), ...) par <- fit\$est[c("loc", "scale", "shape")] px <- (1 + par[3] * (x[x > u0] - par[1]) / par[2])^(-1 / par[3]) / n sx <- fit\$std[c("loc", "scale", "shape")] xt[x > u0] <- -log(px) out\$x\$threshold <- u0 out\$x\$estimate <- par if(!is.null(sx)) out\$x\$std.err <- sx } else { xt <- -log(1 - rank(x) / (n + 1)) } out\$x\$transform <- xt # transform y margin if((exceed\$y > 0) && (exceed\$y < 1)) { yt <- -log(1 - rank(y) / n) v0 <- quantile(y, 1 - exceed\$y, names = FALSE) fit <- fpot(y, v0, "pp", start = start.pp(y, v0), ...) par <- fit\$est[c("loc", "scale", "shape")] py <- (1 + par[3] * (y[y > v0] - par[1]) / par[2])^(-1 / par[3]) / n sy <- fit\$std[c("loc", "scale", "shape")] yt[y > v0] <- -log(py) out\$y\$threshold <- v0 out\$y\$estimate <- par if(!is.null(sy)) out\$y\$std.err <- sy } else { yt <- -log(1 - rank(y) / (n + 1)) } out\$y\$transform <- yt # estimate joint probability z <- pmin(xt, yt) pxy <- apply(matrix(-log(p)), 1, function(w) mean(z > w)) if((exceed\$z > 0) && (exceed\$z < 1)) { w0 <- quantile(z, 1 - exceed\$z, names = FALSE) m <- sum(z > w0) eta <- min(1, mean(z[z > w0]) - w0) kappa <- m * exp(w0 / eta) / n out\$z\$threshold <- w0 out\$z\$estimate <- c(alpha = w0 + eta * log(m), eta = eta, kappa = kappa) pxy[p <= exp(-w0)] <- kappa * p[p <= exp(-w0)]^(1 / eta) } else { pxy <- pxy * n / (n + 1) } out\$table\$joint <- pxy out } # Verification Measures from Forecast-Observation Contingency Tables # # Description: # # Returns several verification measures estimated from a set of # contingency tables. # # Usage: # # vscores(ctab) # # Arguments: # # ctab: Output from `ctable'. # # Details: # # See Ferro (2007). # # Output: # # A list containing several verification measures estimated for each # of the different contingency tables contained in `ctab'. The # measures are: # # base.rate: Base rate # hit.rate: Hit rate # false.rate: False alarm rate # far: False alarm ratio # pc: Proportion correction # heidke: Heidke skill score # peirce: Peirce skill score # csi: Critical success index # gilbert: Gilbert skill score # yule: Yule's Q # log.odds: Log odds ratio # ddist: Discrimination distance # roc.area: Area under the binormal ROC curve # # Author: # # Chris Ferro 30 January 2007 # # References: # # Ferro CAT (2007) A probability model for verifying deterministic # forecasts of extreme events. Submitted. Available from # # http://www.met.rdg.ac.uk/~sws02caf/Publications/verif.pdf # # See Also: # # `fpot' in the `evd' package, and `ctable'. # # Examples: # # # bivariate Normal data # x <- rnorm(1000) # y <- rnorm(1000, 0.5 * x, sqrt(1 - 0.5^2)) # exceed <- list(x = 0.05, y = 0.05, z = 0.1) # out <- ctable(x, y, exceed, p = seq(0, 0.1, 0.01)) # vscores(out) vscores <- function(ctab) { # extract table entries tab <- ctab\$table a <- tab\$joint b <- tab\$forecast - a c <- tab\$observe - a d <- 1 - a - b - c # evaluate measures s <- a + c h <- a / (a + c) f <- b / (b + d) far <- b / (a + b) pc <- a + d heidke <- 2 * s * (1 - s) * (h - f) / (s + s * (1 - 2 * s) * h + (1 - s) * (1 - 2 * s) * f) peirce <- h - f csi <- a / (a + b + c) gilbert <- s * (1 - s) * (h - f) / (s * (1 - s * h) + (1 - s)^2 * f) yule <- (a * d - b * c) / (a * d + b * c) odds <- a * d / (b * c) dd <- qnorm(h) - qnorm(f) # evaluate limits of measures if base rate is zero if(any(b == 0) && !is.null(ctab\$z)) { i <- (1:length(b))[b == 0] eta <- ctab\$z\$estimate["eta"] kap <- ctab\$z\$estimate["kappa"] if(eta == 1) { h[i] <- kappa f[i] <- 0 far[i] <- 1 - kappa pc[i] <- 1 heidke[i] <- kappa peirce[i] <- kappa csi[i] <- kappa / (2 - kappa) gilbert[i] <- kappa / (2 - kappa) yule[i] <- 1 odds[i] <- Inf } else { h[i] <- f[i] <- heidke[i] <- peirce[i] <- csi[i] <- gilbert[i] <- 0 far[i] <- pc[i] <- 1 if(eta > 0.5) { yule[i] <- 1 odds[i] <- Inf } else if(eta < 0.5) { yule[i] <- -1 odds[i] <- 0 } else { yule[i] <- (kappa - 1) / (kappa + 1) odds[i] <- kappa } } } list(base.rate = s, hit.rate = h, false.rate = f, far = far, pc = pc, heidke = heidke, peirce = peirce, csi = csi, gilbert = gilbert, yule = yule, log.odds = log(odds), ddist = dd, roc.area = pnorm(dd / sqrt(2))) }