xparetotvt <- function(lon,lat,array3d,span,p=0.9,index,nonmissing=0.5) { # Fit Generalized Pareto distribution with time-varying threshold at each grid # point for a given three-dimensional array of montly data with first two space # dimensions (e.g. longitude and latitude) and third time dimension. # # Description: # # Returns maps of KS test p-values, fitted parameters (constant scale and # constant shape), map of fraction of points above the time-varying # threshold, and the time varying threshold. # # Usage: # # xparetotvt(lon,lat,array3d,span,p,index,nonmissing) # # Input: # # lon: vector with p longitude values # # lat: vector with q latitude values # # array3d: a three-dimensional array of monthly data with p longitude points # and q latitude points as the first two dimensions and n as the # third time dimension # # span: fraction of the total number of points 'n' to be used to compute # the long term mean # # p: a value between 0 and 1 that informs the percentage of point # that will be left below the time-varying threshold # # index: index vector of length n composed of logicals (TRUE and FALSE) # defining the months to be considered when computing the # time-varying threshold # #nonmissing: Only grid points with fraction given by 'nonmissing' # (between 0 and 1) of non-missing values are used to estimate the # Generalized Pareto distribution parameters. Default is 0.5. # # Output: # # $pvaloutput: p x q map of KS test p-values # # $output: 2 x p x q array with scale (first level of the array) and # shape (second level of the array) parameters # # $frac: p x q map with fraction of points above the time-varying threshold # # $th: p x q x n' array containing the time varying threshold, where # n' is the number of months considered when computing the # time-varying threshold # # Authors: # # Caio Coelho 28 Feb 2006 # Chris Ferro # reshape three-dimensional array into a data matrix with # time as first dimension and space as sencond dimension data <- reshapefield(lon,lat,array3d)$out # compute percentage of non-missing values at each grid point aux <- apply(data[index,],2,function(x)sum(!is.na(x))/(length(x))) # identify grid points with less than 50% missing values indexgrid <- (1:length(aux))[aux >= nonmissing] out1 <- rep(NA,dim(data)[2]) out2 <- rep(NA,dim(data)[2]) pval1 <- rep(NA,dim(data)[2]) frac1 <- rep(NA,dim(data)[2]) mthresh <- matrix(nrow=(dim(data)[1])/(12/sum(index[1:12])),ncol=dim(data)[2]) # function to perform KS test ff <- function(x,loc,scale,shape){ if(all(is.na(c(scale,shape)))) {return(NA)} ks.test(x,"pgpd",loc,scale,shape)$p } # Estimate Generalized Pareto distribution parameters (scale and shape) for # exceedances above the threshold p estim.par <- function(y,p,index) { threshold<-tvt(y,span,index,1-p)$threshold yy<-y[index] fraction<-sum(!is.na(yy[yy>threshold]))/sum(!is.na(yy)) params<-gpd.fit(yy[!is.na(yy)], threshold[!is.na(yy)], show=FALSE)$mle pval<-ff((yy-threshold)[yy>threshold],0,params[1],params[2]) list(params=params,pval=pval,fraction=fraction,threshold=threshold) } out<-apply(data[,indexgrid],2,estim.par,p,index) for (i in 1:length(indexgrid)){ #scale parameter out1[indexgrid[i]]<-out[[i]]$params[1] mthresh[,indexgrid[i]]<-out[[i]]$threshold #shape parameter out2[indexgrid[i]]<-out[[i]]$params[2] #p-value pval1[indexgrid[i]]<-out[[i]]$pval frac1[indexgrid[i]]<-out[[i]]$fraction } aux<-reshapefield(lon,lat,t(as.matrix(pval1)))$out pvaloutput<-aux[,,1,drop=T] aux1<-reshapefield(lon,lat,t(as.matrix(frac1)))$out frac<-aux1[,,1,drop=T] th<-reshapefield(lon,lat,mthresh)$out output<-array(NA,c(2,dim(array3d)[1],dim(array3d)[2])) output[1,,]<-reshapefield(lon,lat,t(as.matrix(out1)))$out output[2,,]<-reshapefield(lon,lat,t(as.matrix(out2)))$out invisible(list(pvaloutput=pvaloutput,output=output,frac=frac,th=th)) }