# Point and Interval Estimates for the Brier Score # # Description: # # Returns point estimates, standard errors and bootstrapped # pointwise and simultaneous confidence intervals for both # conditional and unconditional Brier scores of ensemble-based # probabilistic forecasts of binary events. # # Usage: # # brier.score(x, y = NULL, th, size, above = TRUE, con = TRUE, # unc = TRUE, cover = NULL, boot = 1000, simul = FALSE) # # Arguments: # # x: Matrix with observations in the first row and members of # corresponding ensemble forecasts in subsequent rows. # # y: Optional matrix with second set of observations and forecasts # in the same form as `x'. # # th: Vector of thresholds defining different events. The same # thresholds are used for the observations and forecasts. # # size: Vector of ensemble sizes for which to estimate the expected # Brier scores. # # above: Logical: if `TRUE' (the default), events are defined by # threshold exceedance; if `FALSE', events are defined by # threshold deficit. # # con: Logical: if `TRUE' (the default), computes standard errors for # the true conditional Brier scores. # # unc: Logical: if `TRUE' (the default), computes standard errors for # the true unconditional Brier scores. # # cover: Coverage probability for confidence intervals; if `NULL', no # confidence intervals are computed. # # boot: Number of bootstrap samples from which to compute confidence # intervals. # # simul: Logical: if `FALSE' (the default), only pointwise confidence # intervals are computed; if `TRUE', simultaneous confidence # intervals for all thresholds are also computed. # # Details: # # See Ferro (2007). # # Output: # # A list with three components (`x', `y' and `difference' # corresponding to the scores for the x- and y-forecasts and the # differences between the scores for the x- and y-forecasts) is # returned invisibly. Each component is itself a list containing # the following elements: # # members: Vector of ensemble sizes `size'. # # thresholds: Vector of thresholds `th'. # # scores: Matrix of estimates of the Brier score for each # threshold (rows) and ensemble size (columns). # # reliability: Matrix of the reliability components for the estimates # in `scores'. # # resolution: Matrix of the resolution components for the estimates # in `scores'. # # certainty: Matrix of the uncertainty components for the estimates # in `scores'. # # adjustment: Matrix of the adjustment components for the estimates # in `scores'. # # con: List containing the following components: # # std.err: Matrix of conditional standard errors for the # estimates in `scores'. # # point: List containing lower and upper limits of # conditional pointwise confidence intervals for # the estimates in `scores'. # # simul: List containing lower and upper limits of # conditional simultaneous confidence intervals # for the estimates in `scores', plus estimates of # the global and local coverages of the intervals. # # unc: List with the same form as `con' but for unconditional # standard errors and intervals. # # Author: # # Chris Ferro 30 January 2007 # # References: # # Ferro CAT (2007) Comparing probabilistic forecasting systems with # the Brier score. Weather and Forecasting. In press. Available at # # http://www.met.rdg.ac.uk/~sws02caf/Publications/brier.pdf # # Examples: # # obs <- rnorm(30) # mod <- matrix(NA, 5, 30) # for(i in 1:5) mod[i, ] <- rnorm(30, 0.5 * obs, sqrt(1 - 0.5^2)) # data <- rbind(obs, mod) # out <- brier.score(data,, c(0, 2), c(5, Inf)) brier.score <- function(x, y = NULL, th, size, above = TRUE, con = TRUE, unc = TRUE, cover = NULL, boot = 1000, simul = FALSE) { # check arguments if(!is.null(cover) && !con && !unc) stop("One of `con' and `unc' must equal `TRUE'") # negate data if below threshold if(!above) { x <- -x if(!is.null(y)) y <- -y th <- -th } # data xobs <- x[1, ] xens <- x[-1,, drop = FALSE] if(!is.null(y)) { yobs <- y[1, ] yens <- y[-1,, drop = FALSE] } if(is.null(cover)) alpha <- NULL else alpha <- (1 - cover) / 2 # dimensions # m <- nrow(xens) # not needed? need mx and my? n <- length(xobs) nt <- length(th) ns <- length(size) # point estimates and standard errors temp <- brier.point(xobs, xens, th, size, con, unc) xb <- temp$b xb0 <- temp$b0 xs.con <- temp$s.con xs.unc <- temp$s.unc xout <- list(members = size, thresholds = th, scores = xb, reliability = temp$relia, resolution = temp$resol, certainty = temp$uncer, adjustment = temp$adjus) if(con) xout$con$std.err <- xs.con if(unc) xout$unc$std.err <- xs.unc if(!is.null(y)) { temp <- brier.point(yobs, yens, th, size, con, unc) yb <- temp$b yb0 <- temp$b0 ys.con <- temp$s.con ys.unc <- temp$s.unc yout <- list(members = size, thresholds = th, scores = yb, reliability = temp$relia, resolution = temp$resol, certainty = temp$uncer, adjustment = temp$adjus) if(con) yout$con$std.err <- ys.con if(unc) yout$unc$std.err <- ys.unc d <- xb - yb covar <- brier.cov(xobs, xens, yobs, yens, th, size) ds.con <- sqrt(xs.con^2 + ys.con^2) ds.unc <- sqrt(xs.unc^2 + ys.unc^2 - 2 * covar / n) dout <- list(members = size, thresholds = th, scores = d) if(con) dout$con$std.err <- ds.con if(unc) dout$unc$std.err <- ds.unc } # return point estimates if(is.null(alpha)) { if(!above) th <- -th xout$thresholds <- th if(is.null(y)) { out <- xout } else { yout$thresholds <- th dout$thresholds <- th out <- list(x = xout, y = yout, difference = dout) } return(invisible(out)) } # pointwise confidence intervals if(con) { # conditional print("Conditional bootstrap (x)...") temp <- brier.boot(xobs, xens,,, th, size, boot, con = TRUE) xbb.con <- temp$x$score xsb.con <- temp$x$std.err temp <- brier.int(xbb.con, xsb.con, xb0, xs.con, xb, alpha, con = TRUE) xrb.con <- temp$r xtb.con <- temp$t xlo.con <- temp$lo xup.con <- temp$up xout$con$point$lower <- xlo.con xout$con$point$upper <- xup.con if(!is.null(y)) { print("Conditional bootstrap (y)...") temp <- brier.boot(yobs, yens,,, th, size, boot, con = TRUE) ybb.con <- temp$x$score ysb.con <- temp$x$std.err temp <- brier.int(ybb.con, ysb.con, yb0, ys.con, yb, alpha, con = TRUE) yrb.con <- temp$r ytb.con <- temp$t ylo.con <- temp$lo yup.con <- temp$up yout$con$point$lower <- ylo.con yout$con$point$upper <- yup.con dsb.con <- sqrt(xsb.con^2 + ysb.con^2) dtb.con <- (xrb.con - yrb.con) / dsb.con zb <- aperm(apply(dtb.con, c(1, 2), quantile, probs = c(1 - alpha, alpha), na.rm = TRUE), c(2, 3, 1)) dlo.con <- d - ds.con * zb[, , 1] dup.con <- d - ds.con * zb[, , 2] dout$con$point$lower <- dlo.con dout$con$point$upper <- dup.con } } if(unc) { # unconditional if(is.null(y)) { print("Unconditional bootstrap (x)...") temp <- brier.boot(xobs, xens,,, th, size, boot, con = FALSE) xbb.unc <- temp$x$score xsb.unc <- temp$x$std.err temp <- brier.int(xbb.unc, xsb.unc, xb0, xs.unc, xb, alpha, con = FALSE) xrb.unc <- temp$r xtb.unc <- temp$t xlo.unc <- temp$lo xup.unc <- temp$up xout$unc$point$lower <- xlo.unc xout$unc$point$upper <- xup.unc } else { print("Unconditional bootstrap (x and y)...") temp <- brier.boot(xobs, xens, yobs, yens, th, size, boot, con = FALSE) xbb.unc <- temp$x$score xsb.unc <- temp$x$std.err ybb.unc <- temp$y$score ysb.unc <- temp$y$std.err covarb <- temp$cov temp <- brier.int(xbb.unc, xsb.unc, xb0, xs.unc, xb, alpha, con = FALSE) xrb.unc <- temp$r xtb.unc <- temp$t xlo.unc <- temp$lo xup.unc <- temp$up xout$unc$point$lower <- xlo.unc xout$unc$point$upper <- xup.unc temp <- brier.int(ybb.unc, ysb.unc, yb0, ys.unc, yb, alpha, con = FALSE) yrb.unc <- temp$r ytb.unc <- temp$t ylo.unc <- temp$lo yup.unc <- temp$up yout$unc$point$lower <- ylo.unc yout$unc$point$upper <- yup.unc dsb.unc <- sqrt(xsb.unc^2 + ysb.unc^2 - 2 * covarb / n) dtb.unc <- (xrb.unc - yrb.unc) / dsb.unc zb <- aperm(apply(dtb.unc, c(1, 2), quantile, probs = c(1 - alpha, alpha), na.rm = TRUE), c(2, 3, 1)) dlo.unc <- d - ds.unc * zb[, , 1] dup.unc <- d - ds.unc * zb[, , 2] dout$unc$point$lower <- dlo.unc dout$unc$point$upper <- dup.unc } } # return pointwise intervals if(!simul) { if(!above) th <- -th xout$thresholds <- th if(is.null(y)) { out <- xout } else { yout$thresholds <- th dout$thresholds <- th out <- list(x = xout, y = yout, difference = dout) } return(invisible(out)) } # simultaneous confidence intervals if(con) { # conditional temp <- brier.sim(xtb.con, xb, xs.con, alpha) xlo.con.sim <- pmax(temp$lo, 0) xup.con.sim <- pmin(temp$up, 1) xcov.con.sim <- temp$sim xcov.con.loc <- temp$loc xout$con$simul$lower <- xlo.con.sim xout$con$simul$upper <- xup.con.sim xout$con$simul$cover <- rbind(global = xcov.con.sim, local = xcov.con.loc) if(!is.null(y)) { temp <- brier.sim(ytb.con, yb, ys.con, alpha) ylo.con.sim <- pmax(temp$lo, 0) yup.con.sim <- pmin(temp$up, 1) ycov.con.sim <- temp$sim ycov.con.loc <- temp$loc yout$con$simul$lower <- ylo.con.sim yout$con$simul$upper <- yup.con.sim yout$con$simul$cover <- rbind(global = ycov.con.sim, local = ycov.con.loc) temp <- brier.sim(dtb.con, d, ds.con, alpha) dlo.con.sim <- temp$lo dup.con.sim <- temp$up dcov.con.sim <- temp$sim dcov.con.loc <- temp$loc dout$con$simul$lower <- dlo.con.sim dout$con$simul$upper <- dup.con.sim dout$con$simul$cover <- rbind(global = dcov.con.sim, local = dcov.con.loc) } } if(unc) { # unconditional temp <- brier.sim(xtb.unc, xb, xs.unc, alpha) xlo.unc.sim <- pmax(temp$lo, 0) xup.unc.sim <- pmin(temp$up, 1) xcov.unc.sim <- temp$sim xcov.unc.loc <- temp$loc xout$unc$simul$lower <- xlo.unc.sim xout$unc$simul$upper <- xup.unc.sim xout$unc$simul$cover <- rbind(global = xcov.unc.sim, local = xcov.unc.loc) if(!is.null(y)) { temp <- brier.sim(ytb.unc, yb, ys.unc, alpha) ylo.unc.sim <- pmax(temp$lo, 0) yup.unc.sim <- pmin(temp$up, 1) ycov.unc.sim <- temp$sim ycov.unc.loc <- temp$loc yout$unc$simul$lower <- ylo.unc.sim yout$unc$simul$upper <- yup.unc.sim yout$unc$simul$cover <- rbind(global = ycov.unc.sim, local = ycov.unc.loc) temp <- brier.sim(dtb.unc, d, ds.unc, alpha) dlo.unc.sim <- temp$lo dup.unc.sim <- temp$up dcov.unc.sim <- temp$sim dcov.unc.loc <- temp$loc dout$unc$simul$lower <- dlo.unc.sim dout$unc$simul$upper <- dup.unc.sim dout$unc$simul$cover <- rbind(global = dcov.unc.sim, local = dcov.unc.loc) } } # output if(!above) th <- -th xout$thresholds <- th if(is.null(y)) { out <- xout } else { yout$thresholds <- th dout$thresholds <- th out <- list(x = xout, y = yout, difference = dout) } invisible(out) } brier.point <- function(obs, sim, th, size, con, unc) { m <- nrow(sim) n <- length(obs) ns <- length(size) nt <- length(th) b <- b0 <- matrix(NA, nt, ns) s.con <- s.unc <- matrix(NA, nt, ns) relia <- resol <- adjus <- uncer <- matrix(NA, nt, ns) for(i in 1:nt) { I <- (obs > th[i]) Q <- apply(sim > th[i], 2, mean) Ibar <- mean(I) Qbar <- table(I, Q)[, as.character(Q)] Qbar <- Qbar["TRUE", ] / apply(Qbar, 2, sum) relia[i, ] <- mean((Q - Qbar)^2) resol[i, ] <- mean((Qbar - Ibar)^2) uncer[i, ] <- Ibar * (1 - Ibar) sharp <- mean((Q - 0.5)^2) adjus[i, ] <- (1 - m / size) * (0.25 - sharp) / (m - 1) for(j in 1:ns) { b[i, j] <- brier(I, Q, m, size[j]) b0[i, j] <- brier.true(I, Q, size[j]) if(con) s.con[i, j] <- brier.sd(I, Q, m, n, size[j]) if(unc) s.unc[i, j] <- brier.sd(I, Q, m, n, size[j], TRUE) } } list(b = b, b0 = b0, s.con = s.con, s.unc = s.unc, relia = relia, resol = resol, uncer = uncer, adjus = adjus) } brier.cov <- function(xobs, xsim, yobs, ysim, th, size) { mx <- nrow(xsim) my <- nrow(ysim) nt <- length(th) ns <- length(size) covar <- matrix(NA, nt, ns) for(i in 1:nt) { Ix <- (xobs > th[i]) Qx <- apply(xsim > th[i], 2, mean) Iy <- (yobs > th[i]) Qy <- apply(ysim > th[i], 2, mean) for(j in 1:ns) { M <- size[j] Wx <- (Qx - Ix)^2 - (1 - mx / M) * Qx * (1 - Qx) / (mx - 1) Wy <- (Qy - Iy)^2 - (1 - my / M) * Qy * (1 - Qy) / (my - 1) covar[i, j] <- mean(Wx * Wy) - mean(Wx) * mean(Wy) } } covar } brier <- function(I, Q, m, M) { if(m == 1 && M == 1) return(mean((Q - I)^2)) mean((Q - I)^2 - (1 - m / M) * Q * (1 - Q) / (m - 1)) } brier.sd <- function(I, Q, m, n, M, sample = FALSE) { if(sample) { return(sd((Q - I)^2 - (1 - m / M) * Q * (1 - Q) / (m - 1)) / sqrt(n)) } if(m < 4) { Q2 <- Q * Q Q3 <- Q * Q2 Q4 <- Q * Q3 } else { Q2 <- Q * (m * Q - 1) / (m - 1) Q3 <- Q2 * (m * Q - 2) / (m - 2) Q4 <- Q3 * (m * Q - 3) / (m - 3) } sqrt((1 - 1 / M) * max(0, 2 * (3 - 2 * m) * (1 - 1 / M) * mean(Q4) / (m * (m - 1)) + 4 * (m - 2 + (3 - 2 * m) / M) * mean(Q3) / (m * (m - 1)) + 2 * (1 + (2 * m - 3) / (M - 1) + (7 - 5 * m) / (2 * M * (M - 1))) * mean(Q2) / (m * (m - 1)) + mean(Q) / (m * (m - 1) * M) + 8 * mean(I * Q3) / m - 12 * mean(I * Q2) / m + 4 * mean(I * Q) / m)) / sqrt(n) } brier.true <- function(I, Q, M) { (1 - 1 / M) * mean(Q^2) + mean(Q) / M - 2 * mean(I * Q) + mean(I) } brier.boot <- function(xobs, xsim, yobs = NULL, ysim = NULL, th, size, boot, con) { mx <- nrow(xsim) my <- nrow(ysim) n <- length(xobs) ns <- length(size) nt <- length(th) covb <- array(NA, c(nt, ns, boot)) xbb <- xsb <- array(NA, c(nt, ns, boot)) ybb <- ysb <- array(NA, c(nt, ns, boot)) if(con) { # conditional x for(k in 1:boot) { xsimb <- apply(xsim, 2, sample, replace = TRUE) for(i in 1:nt) { I <- (xobs > th[i]) Q <- apply(xsimb > th[i], 2, mean) for(j in 1:ns) { xbb[i, j, k] <- brier(I, Q, mx, size[j]) xsb[i, j, k] <- brier.sd(I, Q, mx, n, size[j]) } } } out <- list(x = list(score = xbb, std.err = xsb)) # conditional y if(!is.null(yobs)) { for(k in 1:boot) { ysimb <- apply(ysim, 2, sample, replace = TRUE) for(i in 1:nt) { I <- (yobs > th[i]) Q <- apply(ysimb > th[i], 2, mean) for(j in 1:ns) { ybb[i, j, k] <- brier(I, Q, my, size[j]) ysb[i, j, k] <- brier.sd(I, Q, my, n, size[j]) } } } out$y <- list(score = ybb, std.err = ysb) } } else { # unconditional x if(is.null(yobs)) { for(k in 1:boot) { index <- sample(n, replace = TRUE) obsb <- xobs[index] simb <- xsim[, index] for(i in 1:nt) { I <- (obsb > th[i]) Q <- apply(simb > th[i], 2, mean) for(j in 1:ns) { xbb[i, j, k] <- brier(I, Q, mx, size[j]) xsb[i, j, k] <- brier.sd(I, Q, mx, n, size[j], TRUE) } } } out <- list(x = list(score = xbb, std.err = xsb)) } else { # unconditional x and y for(k in 1:boot) { index <- sample(n, replace = TRUE) xobsb <- xobs[index] yobsb <- yobs[index] xsimb <- xsim[, index] ysimb <- ysim[, index] for(i in 1:nt) { Ix <- (xobsb > th[i]) Qx <- apply(xsimb > th[i], 2, mean) Iy <- (yobsb > th[i]) Qy <- apply(ysimb > th[i], 2, mean) for(j in 1:ns) { xbb[i, j, k] <- brier(Ix, Qx, mx, size[j]) xsb[i, j, k] <- brier.sd(Ix, Qx, mx, n, size[j], TRUE) ybb[i, j, k] <- brier(Iy, Qy, my, size[j]) ysb[i, j, k] <- brier.sd(Iy, Qy, my, n, size[j], TRUE) } } covb[,, k] <- brier.cov(xobsb, xsimb, yobsb, ysimb, th, size) } out <- list(x = list(score = xbb, std.err = xsb)) out$y <- list(score = ybb, std.err = ysb) out$cov <- covb } } out } brier.int <- function(bb, sb, b0, s, b, alpha, con) { nt <- dim(bb)[1] ns <- dim(bb)[2] boot <- dim(bb)[3] lo <- up <- matrix(NA, nt, ns) rb <- tb <- array(NA, c(nt, ns, boot)) if(con) { for(i in 1:nt) { for(j in 1:ns) { rb[i, j, ] <- bb[i, j, ] - b0[i, j] tb[i, j, ] <- rb[i, j, ] / sb[i, j, ] zb <- quantile(tb[i, j, ], c(1 - alpha, alpha), na.rm = TRUE) if(s[i, j] == 0) { lo[i, j] <- b[i, j] up[i, j] <- b[i, j] } else { lo[i, j] <- b[i, j] - s[i, j] * zb[1] up[i, j] <- b[i, j] - s[i, j] * zb[2] } } } } else { for(i in 1:nt) { for(j in 1:ns) { rb[i, j, ] <- bb[i, j, ] - b[i, j] tb[i, j, ] <- rb[i, j, ] / sb[i, j, ] zb <- quantile(tb[i, j, ], c(1 - alpha, alpha), na.rm = TRUE) if(s[i, j] == 0) { lo[i, j] <- b[i, j] up[i, j] <- b[i, j] } else { lo[i, j] <- b[i, j] - s[i, j] * zb[1] up[i, j] <- b[i, j] - s[i, j] * zb[2] } } } } lo[lo < 0] <- 0 up[up > 1] <- 1 list(r = rb, t = tb, lo = lo, up = up) } brier.sim <- function(tb, b, s, alpha) { nt <- dim(tb)[1] ns <- dim(tb)[2] boot <- dim(tb)[3] sim <- loc <- numeric(ns) lo <- up <- matrix(NA, nt, ns) for(j in 1:ns) { k <- 1 tr <- t(apply(tb[, j, ], 1, rank)) tbs <- t(apply(tb[, j, ], 1, sort, na.last = TRUE)) out <- apply((tr <= k) | (tr >= boot + 1 - k), 2, any) p.global <- sum(out) / boot while((p.global < 2 * alpha) && (k < boot)) { k <- k + 1 out <- apply((tr <= k) | (tr >= boot + 1 - k), 2, any) p.global <- sum(out) / boot } ok <- apply(!is.na(tbs), 1, sum) for(i in 1:nt) { if(s[i, j] == 0) { lo[i, j] <- b[i, j] up[i, j] <- b[i, j] } else { lo[i, j] <- b[i, j] - s[i, j] * tbs[i, ok[i] + 1 - k] up[i, j] <- b[i, j] - s[i, j] * tbs[i, k] } } sim[j] <- 1 - p.global loc[j] <- 1 - 2 * k / boot } list(sim = sim, loc = loc, lo = lo, up = up) }