brain size and body size on primates ("primate_ind.txt", a tab separated text file), with more than one observation for the majority of species, individual-specific data

brain size and body size on primates ("primate_spec.txt", a tab separated text file), species-specific data obtained from the exercises above, includes information on within-species sampling (sample sizes, variances and standard errors

phylogeny ("primate_tree.phy", in phylip format) is taken from 10kTrees (Arnold et al 2010) and tailored to the data at hand

Codes

ape & nlme (PGLS)

library(ape)
library(nlme)
data.spec = read.table("primate_spec.txt", sep = "\t", header = TRUE)
tree = read.tree("primate_tree.phy")
row.names(data.spec) = data.spec$species
pgls.nlme = gls(lg.brain ~ lg.body, data.spec, correlation = corBrownian(phy = tree))
summary(pgls.nlme)

## Generalized least squares fit by REML
##   Model: lg.brain ~ lg.body 
##   Data: data.spec 
##     AIC   BIC logLik
##   60.48 67.77 -27.24
## 
## Correlation Structure: corBrownian
##  Formula: ~1 
##  Parameter estimate(s):
## numeric(0)
## 
## Coefficients:
##             Value Std.Error t-value p-value
## (Intercept) 1.218    0.5333   2.284  0.0249
## lg.body     0.287    0.0389   7.375  0.0000
## 
##  Correlation: 
##         (Intr)
## lg.body -0.544
## 
## Standardized residuals:
##     Min      Q1     Med      Q3     Max 
## -2.1662  0.1224  0.7819  1.1608  3.3928 
## 
## Residual standard error: 0.8308 
## Degrees of freedom: 86 total; 84 residual

pglsME.nlme = gls(lg.brain ~ lg.body, data.spec, correlation = corBrownian(phy = tree),
    weights = varFixed(~lg.body_var.adj))
summary(pglsME.nlme)

## Generalized least squares fit by REML
##   Model: lg.brain ~ lg.body 
##   Data: data.spec 
##     AIC   BIC logLik
##   181.9 189.2 -87.97
## 
## Correlation Structure: corBrownian
##  Formula: ~1 
##  Parameter estimate(s):
## numeric(0)
## Variance function:
##  Structure: fixed weights
##  Formula: ~lg.body_var.adj 
## 
## Coefficients:
##               Value Std.Error t-value p-value
## (Intercept) -1.3018    0.3866  -3.367  0.0011
## lg.body      0.7049    0.0485  14.549  0.0000
## 
##  Correlation: 
##         (Intr)
## lg.body -0.983
## 
## Standardized residuals:
##      Min       Q1      Med       Q3      Max 
## -0.95067 -0.35625 -0.13744  0.05117  1.55863 
## 
## Residual standard error: 8.399 
## Degrees of freedom: 86 total; 84 residual

MERegPHYSIG (outputs from the program)

When considering measurement error (on both the response and predictor, but without within-species covariance)

phytools

library(phytools)
data.spec = read.table("primate_spec.txt", sep = "\t", header = TRUE)
tree = read.tree("primate_tree.phy")
pgls.Ives(tree = tree, X = setNames(data.spec$lg.body, data.spec$species), y = setNames(data.spec$lg.brain,
    data.spec$species), Vx = setNames(rep(0, length(tree$tip)), data.spec$species),
    Vy = setNames(rep(0, length(tree$tip)), data.spec$species), Cxy = setNames(rep(0,
        length(tree$tip)), data.spec$species))

## $beta
## [1] 0.6108 0.3123
## 
## $sig2x
## [1] 0.108
## 
## $sig2y
## [1] 0.009439
## 
## $a
## [1] -0.5942  0.4252
## 
## $logL
## [1] -156.5
## 
## $convergence
## [1] 0
## 
## $message
## [1] "CONVERGENCE: REL_REDUCTION_OF_F <= FACTR*EPSMCH"

The convergence is often sensitive to starting conditions. It may be more straightforward to repeat the above procedure several times (10-100) and select the results with the highest likelihood:

res = c()
for (k in 1:100) {
    mod = try(pgls.Ives(tree = tree, X = setNames(data.spec$lg.body, data.spec$species),
        y = setNames(data.spec$lg.brain, data.spec$species), Vx = setNames(rep(0,
            length(tree$tip)), data.spec$species), Vy = setNames(rep(0, length(tree$tip)),
            data.spec$species), Cxy = setNames(rep(0, length(tree$tip)), data.spec$species)))
    if (class(mod) != "try-error") {
        int = mod$beta[1]
        slope = mod$beta[2]
        lik = mod$logL
        anc_a = mod$a[1]
        anc_b = mod$a[2]
        conv = mod$convergence
    }
    res = rbind(res, data.frame(k, int, slope, anc_a, anc_b, lik, conv))
}
res[which(res[, 6] == max(subset(res, conv == 0)[, 6])), ]

##     k   int  slope anc_a anc_b    lik conv
## 56 56 1.215 0.2872 7.434  3.35 -138.7    0

When considering measurement error (on both the response and predictor, but without within-species covariance)

res = c()
for (k in 1:100) {
    mod = try(pgls.Ives(tree = tree, X = setNames(data.spec$lg.body, data.spec$species),
        y = setNames(data.spec$lg.brain, data.spec$species), Vx = setNames(data.spec$lg.body_var.adj,
            data.spec$species), Vy = setNames(data.spec$lg.brain_var.adj, data.spec$species),
        Cxy = setNames(rep(0, length(tree$tip)), data.spec$species)))
    if (class(mod) != "try-error") {
        int = mod$beta[1]
        slope = mod$beta[2]
        lik = mod$logL
        anc_a = mod$a[1]
        anc_b = mod$a[2]
        conv = mod$convergence
    }
    res = rbind(res, data.frame(k, int, slope, anc_a, anc_b, lik, conv))
}
res[which(res[, 6] == max(subset(res, conv == 0)[, 6])), ]

##     k     int  slope anc_a anc_b    lik conv
## 59 59 -0.2379 0.4826  7.46 3.362 -125.7    0

GLSME

# functions are stored in a source file (available from K. Bartoszek's
# website)
source("~/Dropbox/comparative book/website/OPMs/chap_7_Garamszegi/GLSME.R")
data.spec = read.table("primate_spec.txt", sep = "\t", header = TRUE)
row.names(data.spec) = data.spec$species
tree = read.tree("primate_tree.phy")
gls.glsme = GLSME(y = data.spec$lg.brain, D = cbind(rep(1, length(data.spec$lg.body)),
    data.spec$lg.body), Vt = 1, Ve = 0, Vd = list("F", "F"), Vu = 0)
gls.glsme

## $GLSestimate
##         [,1]
## [1,] -1.6429
## [2,]  0.7128
## 
## $errorGLSestim
##         [,1]
## [1,] 0.32506
## [2,] 0.04057
## 
## $BiasCorrectedGLSestimate
##         [,1]
## [1,] -1.6429
## [2,]  0.7128
## 
## $errorBiasCorrectedGLSestim
##         [,1]
## [1,] 0.32506
## [2,] 0.04057
## 
## $K
##      [,1] [,2]
## [1,]    1    0
## [2,]    0    1
## 
## $R2
##        [,1]
## [1,] 0.7821
## 
## $R2BiasCorrectedModel
##        [,1]
## [1,] 0.7821
## 
## $LogLikelihood
## [1] -63.45
## 
## $LogLikelihoodBiasCorrectedModel
## [1] -63.45
## 
## $PredictorVarianceConstantEstimate
## [1] 0 0
## 
## $ResponseVarianceConstantEstimate
## [1] 0.2561

# the order of species in the data should be the same as the order of
# species in the VCV matrix, thus the dataset should be resorted as
# data.spec[tree$tip.label,]!
pgls.glsme = GLSME(y = data.spec[tree$tip.label, ]$lg.brain, D = cbind(rep(1,
    length(data.spec$lg.body)), data.spec[tree$tip.label, ]$lg.body), Vt = vcv(tree),
    Ve = 0, Vd = list("F", "F"), Vu = 0)
pgls.glsme

## $GLSestimate
##       [,1]
## [1,] 1.218
## [2,] 0.287
## 
## $errorGLSestim
##         [,1]
## [1,] 0.52707
## [2,] 0.03846
## 
## $BiasCorrectedGLSestimate
##       [,1]
## [1,] 1.218
## [2,] 0.287
## 
## $errorBiasCorrectedGLSestim
##         [,1]
## [1,] 0.52707
## [2,] 0.03846
## 
## $K
##      [,1] [,2]
## [1,]    1    0
## [2,]    0    1
## 
## $R2
##       [,1]
## [1,] 0.393
## 
## $R2BiasCorrectedModel
##       [,1]
## [1,] 0.393
## 
## $LogLikelihood
## [1] -25.02
## 
## $LogLikelihoodBiasCorrectedModel
## [1] -25.02
## 
## $PredictorVarianceConstantEstimate
## [1] 0 0
## 
## $ResponseVarianceConstantEstimate
## [1] 0.009235

When both measurement error and phylogeny are accounted for (considering measurement error (on both the response and predictor, but without within-species covariance)

# again, the order of species in the data should be the same as the order of
# species in the VCV matrix, thus the dataset should be resorted as
# data.spec[tree$tip.label,]! in addition, note that when there is a
# measurement error in the predictors, these are treated as 'random effects'
# (as specified by the non zero entry in the Vd matrix), and they will be
# centered on their means by default (it can be switched off by setting
# CenterPredictor=FALSE)
pglsME.glsme = GLSME(y = data.spec[tree$tip.label, ]$lg.brain, D = cbind(rep(1,
    length(data.spec$lg.body)), data.spec[tree$tip.label, ]$lg.body), Vt = vcv(tree),
    Ve = diag(data.spec[tree$tip.label, ]$lg.brain_var.adj), Vd = list("F",
        vcv(tree)), Vu = list("F", diag(data.spec[tree$tip.label, ]$lg.body_var.adj)))
pglsME.glsme

## $GLSestimate
##        [,1]
## [1,] 3.3610
## [2,] 0.3209
## 
## $errorGLSestim
##         [,1]
## [1,] 0.48730
## [2,] 0.04689
## 
## $BiasCorrectedGLSestimate
##        [,1]
## [1,] 3.3582
## [2,] 0.4173
## 
## $errorBiasCorrectedGLSestim
##         [,1]
## [1,] 0.48730
## [2,] 0.06111
## 
## $K
##      [,1]     [,2]
## [1,]    1 0.006861
## [2,]    0 0.769192
## 
## $R2
##        [,1]
## [1,] 0.4894
## 
## $R2BiasCorrectedModel
##        [,1]
## [1,] 0.5328
## 
## $LogLikelihood
## [1] -23.66
## 
## $LogLikelihoodBiasCorrectedModel
## [1] -21.25
## 
## $PredictorVarianceConstantEstimate
## [1] 0.00000 0.04932
## 
## $ResponseVarianceConstantEstimate
## [1] 0.0112

# as a consequence of centering, the estimated intercept corresponds to
# b0+b1*mean(data.spec$lg.body) from model data.spec$lg.brain =
# b0+b1*mean(data.spec$lg.body) + b1*(data.spec$lg.body-
# mean(data.spec$lg.body)). Therefore, the intercept of interests can be
# obtained as:
pglsME.glsme$BiasCorrectedGLSestimate[1] - pglsME.glsme$BiasCorrectedGLSestimate[2] *
    mean(data.spec$lg.body)

## [1] 0.06225

# from which, the slope of interest is:
pglsME.glsme$BiasCorrectedGLSestimate[2]

## [1] 0.4173

# taking into account estimation error after the bias correction, the slope
# falls within the error range of:
slope_range = rbind(pglsME.glsme$BiasCorrectedGLSestimate[2] - pglsME.glsme$errorBiasCorrectedGLSestim[2],
    pglsME.glsme$BiasCorrectedGLSestimate[2] + pglsME.glsme$errorBiasCorrectedGLSestim[2])
slope_range

##        [,1]
## [1,] 0.3561
## [2,] 0.4784

# the standard error of the intercept can be computed based on the variance
# of the noncentered intercept estimate (T. Hansen and K. Bartoszek pers.
# comm.):
int_var = pglsME.glsme$errorBiasCorrectedGLSestim[1]^2 + pglsME.glsme$errorBiasCorrectedGLSestim[2]^2 *
    mean(data.spec$lg.body)^2
sqrt(int_var)

## [1] 0.6859

# from this the error range around the non-centred intercept can be given:
intercept_range = rbind(pglsME.glsme$BiasCorrectedGLSestimate[1] - pglsME.glsme$BiasCorrectedGLSestimate[2] *
    mean(data.spec$lg.body) - sqrt(int_var), pglsME.glsme$BiasCorrectedGLSestimate[1] -
    pglsME.glsme$BiasCorrectedGLSestimate[2] * mean(data.spec$lg.body) + sqrt(int_var))
intercept_range

##         [,1]
## [1,] -0.6237
## [2,]  0.7481

MCMCglmm

library(MCMCglmm)
data.spec = read.table("primate_spec.txt", sep = "\t", header = TRUE)
data.ind = read.table("primate_ind.txt", sep = "\t", header = TRUE)
tree = read.tree("primate_tree.phy")
data.spec$animal = data.spec$species
data.ind$animal = data.ind$species

When measurement error is ignored, and species-specific data are used (equivalent with PGLS)

prior <- list(R = list(V = 1, nu = 0.002), G = list(G1 = list(V = 1, nu = 0.002)))
mcmc = MCMCglmm(lg.brain ~ lg.body, random = ~animal, data = data.spec, pedigree = tree,
    prior = prior, verbose = F, family = "gaussian", nodes = "TIPS")
summary(mcmc)

## 
##  Iterations = 3001:12991
##  Thinning interval  = 10
##  Sample size  = 1000 
## 
##  DIC: -238.3 
## 
##  G-structure:  ~animal
## 
##        post.mean l-95% CI u-95% CI eff.samp
## animal      0.69     0.47    0.904     1000
## 
##  R-structure:  ~units
## 
##       post.mean l-95% CI u-95% CI eff.samp
## units   0.00327 0.000199  0.00967      226
## 
##  Location effects: lg.brain ~ lg.body 
## 
##             post.mean l-95% CI u-95% CI eff.samp  pMCMC    
## (Intercept)    1.0990   0.0235   2.0715     1000  0.024 *  
## lg.body        0.2996   0.2282   0.3870     1000 <0.001 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

When measurement error is considered on the predictor variable, and individual-specific data are used (phylogenetic mixed model)

# we need within-subject centering to do this (van de Pol and Wright 2009),
# the approprate data transformation:
data.ind$lg.body = log(data.ind$body)
data.ind$lg.brain = log(data.ind$brain)
means = tapply(X = data.ind$lg.body, INDEX = data.ind$species, FUN = mean)
N = tapply(X = data.ind$lg.body, INDEX = data.ind$species, FUN = length)
mean.vector = vector()
for (k in 1:length(means)) {
    mean.set = rep(means[k], N[k])
    mean.vector = rbind(mean.vector, as.data.frame(mean.set))
}
data.ind$body_av = mean.vector[, 1]
data.ind$body_centr <- data.ind$lg.body - data.ind$body_av

# the model based on individual-specific data
prior2 = list(G = list(G1 = list(V = 1, nu = 0.02), G2 = list(V = 1, nu = 0.02)),
    R = list(V = 1, nu = 0.02))
mcmc.ind.centr = MCMCglmm(lg.brain ~ body_centr + body_av, random = ~animal +
    species, data = data.ind, pedigree = tree, prior = prior2, verbose = F,
    family = "gaussian", nodes = "TIPS")
summary(mcmc.ind.centr)

## 
##  Iterations = 3001:12991
##  Thinning interval  = 10
##  Sample size  = 1000 
## 
##  DIC: -1236 
## 
##  G-structure:  ~animal
## 
##        post.mean l-95% CI u-95% CI eff.samp
## animal     0.649     0.44    0.899     1000
## 
##                ~species
## 
##         post.mean l-95% CI u-95% CI eff.samp
## species   0.00642   0.0017   0.0134      469
## 
##  R-structure:  ~units
## 
##       post.mean l-95% CI u-95% CI eff.samp
## units    0.0134    0.012   0.0147      911
## 
##  Location effects: lg.brain ~ body_centr + body_av 
## 
##             post.mean l-95% CI u-95% CI eff.samp  pMCMC    
## (Intercept)     1.445    0.542    2.492     1000  0.002 ** 
## body_centr      0.129    0.114    0.144     1000 <0.001 ***
## body_av         0.258    0.184    0.318     1000 <0.001 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

The slope for 'body_av' that gives the interspecific slope that is controlled for phylogeny and within-species variance in body mass (predictor).

Sources

R packages

Matlab packages

Data

Codes

ape & nlme (PGLS)

MERegPHYSIG (outputs from the program)

phytools

GLSME

MCMCglmm

References