C++ Revenue Management Open Library Code

Status: Beta
Brought to you by: carobelin, clacombe, denis_arnaud, dkchoe, and 8 others
[2722f9]: / R / msm / R / boot.R Maximize Restore History
248 lines (224 with data), 12.8 kB

### Take a bootstrap sample from the data contained in a fitted msm
### model. Sample pairs of consecutive observations, i.e. independent
### transitions.  Not applicable if model is hidden or some states are
### censored.

bootdata.trans.msm <- function(x) {
    dat <- if (!is.null(x$data.orig)) x$data.orig else x$data
    subj.num <- match(dat$subject, unique(dat$subject))
    nextsubj <- c(subj.num[2:length(subj.num)], Inf)
    lastsubj <- subj.num != nextsubj
    inds <- sample(which(!lastsubj), replace=TRUE)
    data.boot <- as.data.frame(matrix(nrow=length(inds)*2, ncol=length(dat$covlabels.orig) + 4))
    state.name <- deparse(as.list(x$call$formula)[[2]])
    time.name <- deparse(as.list(x$call$formula)[[3]])
    colnames(data.boot) <- c("subject.name", time.name, state.name, "obstype.name", dat$covlabels.orig)
    data.boot[,state.name] <- as.vector(rbind(dat$state[inds], dat$state[inds+1]))
                                        # in the bootstrap data, label each transition as being from a different subject
    data.boot[,"subject.name"] <- rep(seq(along=inds), each=2)
    data.boot[,time.name] <- as.vector(rbind(dat$time[inds], dat$time[inds+1]))
    data.boot[,"obstype.name"] <- as.vector(rbind(dat$obstype.obs[inds], dat$obstype.obs[inds+1]))
    for (j in dat$covlabels.orig) {
        frominds <- seq(1, 2*length(inds)-1, 2)
        data.boot[frominds, j] <- data.boot[frominds+1,j] <- dat$cov.orig[inds, j]
        if (dat$covdata$covfactor[j])
            data.boot[,j] <- factor(data.boot[,j], labels=levels(dat$cov.orig[,j]))
    }
    colnames(data.boot) <- gsub("factor\\((.+)\\)", "\\1", colnames(data.boot))
    data.boot
}

### Take a bootstrap sample from the data contained in a fitted msm
### model. Sample subjects. Used for hidden models or models with
### censoring, in which the transitions within a subject are not
### independent.

bootdata.subject.msm <- function(x) {
    dat <- if (!is.null(x$data.orig)) x$data.orig else x$data
    subj.num <- match(dat$subject, unique(dat$subject))
    subjs <- sample(unique(subj.num), replace=TRUE)
    inds <- new.subj <- NULL
    for (i in seq(along=subjs)) {
        subj.inds <- which(subj.num == subjs[i])
        inds <- c(inds, subj.inds)
        new.subj <- c(new.subj, rep(i, length(subj.inds)))
    }
    data.boot <- as.data.frame(matrix(nrow=length(inds), ncol=length(dat$covlabels.orig) + 5))
    state.name <- deparse(as.list(x$call$formula)[[2]])
    time.name <- deparse(as.list(x$call$formula)[[3]])
    colnames(data.boot) <- c("subject.name", time.name, state.name, "obstype.name", "obstrue.name", dat$covlabels.orig)
    data.boot[,"subject.name"] <- new.subj
    data.boot[,time.name] <- dat$time[inds]
    data.boot[,state.name] <- dat$state[inds]
    data.boot[,"obstype.name"] <- dat$obstype[inds]
    data.boot[,"obstrue.name"] <- dat$obstrue[inds]
    for (j in dat$covlabels.orig) {
        data.boot[, j] <- dat$cov.orig[inds, j]
        if (dat$covdata$covfactor[j]){
          data.boot[,j] <- factor(data.boot[,j], labels=levels(dat$cov.orig[,j]))
        }
    }
    colnames(data.boot) <- gsub("factor\\((.+)\\)", "\\1", colnames(data.boot))
    data.boot
}

### Given a fitted msm model, draw a bootstrap dataset, refit the
### model, and optionally compute a statistic on the refitted model.
### Repeat B times, store the results in a list.
### msm objects tend to be large, so it is advised to compute a statistic on them by specifying "stat", instead
### of using this function to return a list of refitted msm objects.
### To compute more than one statistic, specify, e.g. stat=function(x)list(stat1(x),stat2(x))

### Some of the arguments to the msm call might be user-defined objects.
### e.g. qmatrix, ematrix, hmodel, ...
### Put in help file that these must be in the working environment.

### a) if call supplied as factor(), strip factor() from name. 

boot.msm <- function(x, stat=pmatrix.msm, B=1000, file=NULL){
    boot.list <- vector(B, mode="list")
    if (!is.null(x$call$subject)) x$call$subject <- substitute(subject.name)
    if (!is.null(x$call$obstype)) x$call$obstype <- substitute(obstype.name)
    if (!is.null(x$call$obstrue)) x$call$obstrue <- substitute(obstrue.name)
    for (i in 1:B) {
        boot.data <- if (x$hmodel$hidden || x$cmodel$ncens) bootdata.subject.msm(x) else bootdata.trans.msm(x)
        x$call$data <- substitute(boot.data)
        boot.list[[i]] <- try(eval(x$call))
        if (!is.null(stat))
            boot.list[[i]] <- stat(boot.list[[i]])
        if (!is.null(file)) save(boot.list, file=file)
    }
    boot.list
}

### Utilities for calculating bootstrap CIs for particular statistics

qematrix.ci.msm <- function(x, covariates="mean", intmisc="intens", sojourn=FALSE, cl=0.95, B=1000) {
    q.list <- boot.msm(x, function(x)qematrix.msm(x=x, covariates=covariates, intmisc=intmisc)$estimates, B)
    q.array <- array(unlist(q.list), dim=c(dim(q.list[[1]]), length(q.list)))
    q.ci <- apply(q.array, c(1,2), function(x)(c(quantile(x, c(0.5 - cl/2, 0.5 + cl/2)), sd(x))))
    q.ci <- aperm(q.ci, c(2,3,1))
    if (sojourn) {
        soj.array <- apply(q.array, 3, function(x) -1/diag(x))
        soj.ci <- apply(soj.array, 1, function(x)(c(quantile(x, c(0.5 - cl/2, 0.5 + cl/2)), sd(x))))
        list(q=q.ci, soj=soj.ci)
    }
    else q.ci
}

qratio.ci.msm <- function(x, ind1, ind2, covariates="mean", cl=0.95, B=1000) {
    q.list <- boot.msm(x, function(x)qratio.msm(x=x, ind1=ind1, ind2=ind2, covariates=covariates)["estimate"], B)
    q.vec <- unlist(q.list)
    c(quantile(q.vec, c(0.5 - cl/2, 0.5 + cl/2)), sd(q.vec))
}

pmatrix.ci.msm <- function(x, t, covariates="mean", cl=0.95, B=1000) {
    p.list <- boot.msm(x, function(x)pmatrix.msm(x=x, t=t, covariates=covariates,ci="none"), B)
    p.array <- array(unlist(p.list), dim=c(dim(p.list[[1]]), length(p.list)))
    p.ci <- apply(p.array, c(1,2), function(x)(quantile(x, c(0.5 - cl/2, 0.5 + cl/2))))
    aperm(p.ci, c(2,3,1))
}

pmatrix.piecewise.ci.msm <- function(x, t1, t2, times, covariates="mean", cl=0.95, B=1000) {
    p.list <- boot.msm(x, function(x)pmatrix.piecewise.msm(x=x, t1=t1, t2=t2, times=times, covariates=covariates,ci="none"), B)
    p.array <- array(unlist(p.list), dim=c(dim(p.list[[1]]), length(p.list)))
    p.ci <- apply(p.array, c(1,2), function(x)(quantile(x, c(0.5 - cl/2, 0.5 + cl/2))))
    aperm(p.ci, c(2,3,1))
}

totlos.ci.msm <- function(x, start=1, fromt=0, tot=Inf, covariates="mean", cl=0.95, B=1000, ...) {
    t.list <- boot.msm(x, function(x)totlos.msm(x, start, fromt, tot, covariates), B)
    t.array <- do.call("rbind", t.list)
    apply(t.array, 2, function(x)(quantile(x, c(0.5 - cl/2, 0.5 + cl/2))))
}

expected.ci.msm <- function(x,
                            times=NULL,
                            timezero=NULL,
                            initstates=NULL,
                            covariates="mean",
                            misccovariates="mean",
                            piecewise.times=NULL,
                            piecewise.covariates=NULL,
                            risk=NULL,
                            cl=0.95, B=1000) {
    if(is.null(risk)) risk <- observed.msm(x)$risk
    e.list <- boot.msm(x, function(x){
        expected.msm(x, times, timezero, initstates, covariates, misccovariates, piecewise.times, piecewise.covariates, risk)
    }, B)
    e.tab.array <- array(unlist(lapply(e.list, function(x)x[[1]])), dim=c(dim(e.list[[1]][[1]]), length(e.list)))
    e.perc.array <- array(unlist(lapply(e.list, function(x)x[[2]])), dim=c(dim(e.list[[1]][[2]]), length(e.list)))
    e.tab.ci <- apply(e.tab.array, c(1,2), function(x)(quantile(x, c(0.5 - cl/2, 0.5 + cl/2))))
    e.perc.ci <- apply(e.perc.array, c(1,2), function(x)(quantile(x, c(0.5 - cl/2, 0.5 + cl/2))))
    res <- list(aperm(e.tab.ci, c(2,3,1)),  aperm(e.perc.ci, c(2,3,1)))
    names(res) <- c("Expected", "Expected percentages")
    res
}

### Compute a CI for a statistic using a sample from the assumed MVN
### distribution of MLEs of log Q, logit E and covariate effects on these
### Not user visible: only support a limited set of statistics based on Q matrix and E matrix.

normboot.msm <- function(x, stat, B=100) {
    ## simulate from vector of unreplicated parameters, to avoid numerical problems with rmvnorm when lots of correlations are 1 
    if (!x$foundse) stop("Asymptotic standard errors not available in fitted model")
    sim <- rmvnorm(B, x$opt$par, solve(0.5 * x$opt$hessian))
    params <- matrix(nrow=B, ncol=x$paramdata$npars)  # replicate constrained parameters.
    params[,x$paramdata$optpars] <- sim 
    params[,x$paramdata$hmmpars] <- msm.mninvlogit.transform(x$paramdata$params[x$paramdata$hmmpars], x$hmodel$plabs, x$hmodel$parstate)
    params <- params[, !duplicated(abs(x$paramdata$constr))][, abs(x$paramdata$constr)]*rep(sign(x$paramdata$constr), each=B)

    sim.stat <- vector(B, mode="list")
    for (i in 1:B) {
        x.rep <- x
        x.rep$paramdata$params <- params[i,]
        output <- msm.form.output("intens", x.rep$qmodel, x.rep$qcmodel, x.rep$paramdata)
        x.rep$Qmatrices <- output$Matrices
        if (x$emodel$misc) {
            output <- msm.form.output("misc", x.rep$emodel, x.rep$ecmodel, x.rep$paramdata)
            x.rep$Ematrices <- output$Matrices
            names(x.rep$Ematrices)[1] <- "logitbaseline"
        }
        sim.stat[[i]] <- stat(x.rep)
    }
    sim.stat
}

qematrix.normci.msm <- function(x, covariates="mean", intmisc="intens", sojourn=FALSE, cl=0.95, B=1000) {
    q.list <- normboot.msm(x, function(x)qematrix.msm(x=x, covariates=covariates, intmisc=intmisc, ci="none"), B)
    q.array <- array(unlist(q.list), dim=c(dim(q.list[[1]]), length(q.list)))
    q.ci <- apply(q.array, c(1,2), function(x)(c(quantile(x, c(0.5 - cl/2, 0.5 + cl/2)), sd(x))))
    q.ci <- aperm(q.ci, c(2,3,1))
    if (sojourn) {
        soj.array <- apply(q.array, 3, function(x) -1/diag(x))
        soj.ci <- apply(soj.array, 1, function(x)(c(quantile(x, c(0.5 - cl/2, 0.5 + cl/2)), sd(x))))
        list(q=q.ci, soj=soj.ci)
    }
    else q.ci
}

qratio.normci.msm <- function(x, ind1, ind2, covariates="mean", cl=0.95, B=1000) {
    q.list <- normboot.msm(x, function(x)qratio.msm(x=x, ind1=ind1, ind2=ind2, covariates=covariates)["estimate"], B)
    q.vec <- unlist(q.list)
    c(quantile(q.vec, c(0.5 - cl/2, 0.5 + cl/2)), sd(q.vec))
}

pmatrix.normci.msm <- function(x, t, covariates="mean", cl=0.95, B=1000) {
    p.list <- normboot.msm(x, function(x)pmatrix.msm(x=x, t=t, covariates=covariates, ci="none"), B)
    p.array <- array(unlist(p.list), dim=c(dim(p.list[[1]]), length(p.list)))
    p.ci <- apply(p.array, c(1,2), function(x)(quantile(x, c(0.5 - cl/2, 0.5 + cl/2))))
    aperm(p.ci, c(2,3,1))
}

pmatrix.piecewise.normci.msm <- function(x, t1, t2, times, covariates="mean", cl=0.95, B=1000) {
    p.list <- normboot.msm(x, function(x)pmatrix.piecewise.msm(x=x, t1=t1, t2=t2, times=times, covariates=covariates, ci="none"), B)
    p.array <- array(unlist(p.list), dim=c(dim(p.list[[1]]), length(p.list)))
    p.ci <- apply(p.array, c(1,2), function(x)(quantile(x, c(0.5 - cl/2, 0.5 + cl/2))))
    aperm(p.ci, c(2,3,1))
}

totlos.normci.msm <- function(x, start=1, fromt=0, tot=Inf, covariates="mean", cl=0.95, B=1000, ...) {
    t.list <- normboot.msm(x, function(x)totlos.msm(x, start, fromt, tot, covariates, ci="none"), B)
    t.array <- do.call("rbind", t.list)
    apply(t.array, 2, function(x)(quantile(x, c(0.5 - cl/2, 0.5 + cl/2))))
}

expected.normci.msm <- function(x,
                                times=NULL,
                                timezero=NULL,
                                initstates=NULL,
                                covariates="mean",
                                misccovariates="mean",
                                piecewise.times=NULL,
                                piecewise.covariates=NULL,
                                risk=NULL,
                                cl=0.95, B=1000) {
    if(is.null(risk)) risk <- observed.msm(x)$risk
    e.list <- normboot.msm(x, function(x){
        expected.msm(x, times, timezero, initstates, covariates, misccovariates, piecewise.times, piecewise.covariates, risk)
    }, B)
    e.tab.array <- array(unlist(lapply(e.list, function(x)x[[1]])), dim=c(dim(e.list[[1]][[1]]), length(e.list)))
    e.perc.array <- array(unlist(lapply(e.list, function(x)x[[2]])), dim=c(dim(e.list[[1]][[2]]), length(e.list)))
    e.tab.ci <- apply(e.tab.array, c(1,2), function(x)(quantile(x, c(0.5 - cl/2, 0.5 + cl/2))))
    e.perc.ci <- apply(e.perc.array, c(1,2), function(x)(quantile(x, c(0.5 - cl/2, 0.5 + cl/2))))
    res <- list(aperm(e.tab.ci, c(2,3,1)),  aperm(e.perc.ci, c(2,3,1)))
    names(res) <- c("Expected", "Expected percentages")
    res
}
C++ Revenue Management Open Library Code

Branches

Tags

[2722f9]: / R / msm / R / boot.R Maximize Restore History

248 lines (224 with data), 12.8 kB