% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/PB-modcomp.R
\name{pb-modcomp}
\alias{pb-modcomp}
\alias{PBmodcomp}
\alias{PBmodcomp.lm}
\alias{PBmodcomp.merMod}
\alias{getLRT}
\alias{getLRT.lm}
\alias{getLRT.merMod}
\alias{plot.XXmodcomp}
\alias{PBmodcomp.mer}
\alias{getLRT.mer}
\alias{seqPBmodcomp}
\title{Model comparison using parametric bootstrap methods.}
\usage{
PBmodcomp(
  largeModel,
  smallModel,
  nsim = 1000,
  ref = NULL,
  seed = NULL,
  cl = NULL,
  details = 0
)

\method{PBmodcomp}{merMod}(
  largeModel,
  smallModel,
  nsim = 1000,
  ref = NULL,
  seed = NULL,
  cl = NULL,
  details = 0
)

\method{PBmodcomp}{lm}(
  largeModel,
  smallModel,
  nsim = 1000,
  ref = NULL,
  seed = NULL,
  cl = NULL,
  details = 0
)

seqPBmodcomp(largeModel, smallModel, h = 20, nsim = 1000, cl = 1)
}
\arguments{
\item{largeModel}{A model object. Can be a linear mixed effects
model or generalized linear mixed effects model (as fitted with
\code{lmer()} and \code{glmer()} function in the \pkg{lme4}
package) or a linear normal model or a generalized linear
model. The \code{largeModel} must be larger than
\code{smallModel} (see below).}

\item{smallModel}{A model of the same type as \code{largeModel} or
a restriction matrix.}

\item{nsim}{The number of simulations to form the reference
distribution.}

\item{ref}{Vector containing samples from the reference
distribution. If NULL, this vector will be generated using
PBrefdist().}

\item{seed}{A seed that will be passed to the simulation of new
datasets.}

\item{cl}{A vector identifying a cluster; used for calculating the
reference distribution using several cores. See examples below.}

\item{details}{The amount of output produced. Mainly relevant for
debugging purposes.}

\item{h}{For sequential computing for bootstrap p-values: The
number of extreme cases needed to generate before the sampling
proces stops.}
}
\description{
Model comparison of nested models using parametric bootstrap
    methods.  Implemented for some commonly applied model types.
}
\details{
The model \code{object} must be fitted with maximum likelihood
    (i.e. with \code{REML=FALSE}). If the object is fitted with
    restricted maximum likelihood (i.e. with \code{REML=TRUE}) then
    the model is refitted with \code{REML=FALSE} before the
    p-values are calculated. Put differently, the user needs not
    worry about this issue.

Under the fitted hypothesis (i.e. under the fitted small model) \code{nsim}
samples of the likelihood ratio test statistic (LRT) are generetated.

Then p-values are calculated as follows:

LRT: Assuming that LRT has a chi-square distribution.

PBtest: The fraction of simulated LRT-values that are larger or equal to the
observed LRT value.

Bartlett: A Bartlett correction is of LRT is calculated from the mean of the
simulated LRT-values

Gamma: The reference distribution of LRT is assumed to be a gamma
distribution with mean and variance determined as the sample mean and sample
variance of the simulated LRT-values.

F: The LRT divided by the number of degrees of freedom is assumed to be
F-distributed, where the denominator degrees of freedom are determined by
matching the first moment of the reference distribution.
}
\note{
It can happen that some values of the LRT statistic in the
    reference distribution are negative. When this happens one will
    see that the number of used samples (those where the LRT is
    positive) are reported (this number is smaller than the
    requested number of samples).

In theory one can not have a negative value of the LRT statistic but in
practice on can: We speculate that the reason is as follows: We simulate data
under the small model and fit both the small and the large model to the
simulated data. Therefore the large model represents - by definition - an
overfit; the model has superfluous parameters in it. Therefore the fit of the
two models will for some simulated datasets be very similar resulting in
similar values of the log-likelihood. There is no guarantee that the the
log-likelihood for the large model in practice always will be larger than for
the small (convergence problems and other numerical issues can play a role
here).

To look further into the problem, one can use the \code{PBrefdist()} function
for simulating the reference distribution (this reference distribution can be
provided as input to \code{PBmodcomp()}). Inspection sometimes reveals that
while many values are negative, they are numerically very small. In this case
one may try to replace the negative values by a small positive value and then
invoke \code{PBmodcomp()} to get some idea about how strong influence there
is on the resulting p-values. (The p-values get smaller this way compared to
the case when only the originally positive values are used).
}
\examples{

data(beets, package="pbkrtest")
head(beets)

NSIM <- 50 ## Simulations in parametric bootstrap

## Linear mixed effects model:
sug   <- lmer(sugpct ~ block + sow + harvest + (1|block:harvest),
              data=beets, REML=FALSE)
sug.h <- update(sug, .~. -harvest)
sug.s <- update(sug, .~. -sow)

anova(sug, sug.h)
PBmodcomp(sug, sug.h, nsim=NSIM, cl=1)
anova(sug, sug.h)
PBmodcomp(sug, sug.s, nsim=NSIM, cl=1)

## Linear normal model:
sug <- lm(sugpct ~ block + sow + harvest, data=beets)
sug.h <- update(sug, .~. -harvest)
sug.s <- update(sug, .~. -sow)

anova(sug, sug.h)
PBmodcomp(sug, sug.h, nsim=NSIM, cl=1)
anova(sug, sug.s)
PBmodcomp(sug, sug.s, nsim=NSIM, cl=1)

## Generalized linear model
counts    <- c(18, 17, 15, 20, 10, 20, 25, 13, 12)
outcome   <- gl(3, 1, 9)
treatment <- gl(3, 3)
d.AD      <- data.frame(treatment, outcome, counts)
head(d.AD)
glm.D93   <- glm(counts ~ outcome + treatment, family = poisson())
glm.D93.o <- update(glm.D93, .~. -outcome)
glm.D93.t <- update(glm.D93, .~. -treatment)

anova(glm.D93, glm.D93.o, test="Chisq")
PBmodcomp(glm.D93, glm.D93.o, nsim=NSIM, cl=1)
anova(glm.D93, glm.D93.t, test="Chisq")
PBmodcomp(glm.D93, glm.D93.t, nsim=NSIM, cl=1)

## Generalized linear mixed model (it takes a while to fit these)
\dontrun{
(gm1 <- glmer(cbind(incidence, size - incidence) ~ period + (1 | herd),
              data = cbpp, family = binomial))
(gm2 <- update(gm1, .~.-period))
anova(gm1, gm2)
PBmodcomp(gm1, gm2, cl=2)
}


\dontrun{
(fmLarge <- lmer(Reaction ~ Days + (Days|Subject), sleepstudy))
## removing Days
(fmSmall <- lmer(Reaction ~ 1 + (Days|Subject), sleepstudy))
anova(fmLarge, fmSmall)
PBmodcomp(fmLarge, fmSmall, cl=1)

## The same test using a restriction matrix
L <- cbind(0,1)
PBmodcomp(fmLarge, L, cl=1)

## Vanilla
PBmodcomp(beet0, beet_no.harv, nsim=NSIM, cl=1)

## Simulate reference distribution separately:
refdist <- PBrefdist(beet0, beet_no.harv, nsim=1000)
PBmodcomp(beet0, beet_no.harv, ref=refdist, cl=1)

## Do computations with multiple processors:
## Number of cores:
(nc <- detectCores())
## Create clusters
cl <- makeCluster(rep("localhost", nc))

## Then do:
PBmodcomp(beet0, beet_no.harv, cl=cl)

## Or in two steps:
refdist <- PBrefdist(beet0, beet_no.harv, nsim=NSIM, cl=cl)
PBmodcomp(beet0, beet_no.harv, ref=refdist)

## It is recommended to stop the clusters before quitting R:
stopCluster(cl)
}

## Linear and generalized linear models:

m11 <- lm(dist ~ speed + I(speed^2), data=cars)
m10 <- update(m11, ~.-I(speed^2))
anova(m11, m10)

PBmodcomp(m11, m10, cl=1, nsim=NSIM)
PBmodcomp(m11, ~.-I(speed^2), cl=1, nsim=NSIM)
PBmodcomp(m11, c(0, 0, 1), cl=1, nsim=NSIM)

m21 <- glm(dist ~ speed + I(speed^2), family=Gamma("identity"), data=cars)
m20 <- update(m21, ~.-I(speed^2))
anova(m21, m20, test="Chisq")

PBmodcomp(m21, m20, cl=1, nsim=NSIM)
PBmodcomp(m21, ~.-I(speed^2), cl=1, nsim=NSIM)
PBmodcomp(m21, c(0, 0, 1), cl=1, nsim=NSIM)

}
\references{
Ulrich Halekoh, Søren Højsgaard (2014)., A Kenward-Roger
    Approximation and Parametric Bootstrap Methods for Tests in Linear Mixed
    Models - The R Package pbkrtest., Journal of Statistical Software,
    58(10), 1-30., \url{https://www.jstatsoft.org/v59/i09/}
}
\seealso{
\code{\link{KRmodcomp}}, \code{\link{PBrefdist}}
}
\author{
Søren Højsgaard \email{sorenh@math.aau.dk}
}
\keyword{inference}
\keyword{models}
