Sources

R packages

"ape" (Paradis et al 2004)

"nlme" (Pinheiro et al 2012)

"GLSME" (Hansen and Bartoszek 2012)

"MCMCglmm" (Hadfield 2010)

"phytools" (Revell 2012)

Matlab packages

"MERegPHYSIG" (Ives et al 2007)

Data

brain size and body size on primates ("primate_ind.txt", a tab separated text file), with more than one observation for the majority of species, individual-specific data

brain size and body size on primates ("primate_spec.txt", a tab separated text file), species-specific data obtained from the exercises above, includes information on within-species sampling (sample sizes, variances and standard errors

phylogeny ("primate_tree.phy", in phylip format) is taken from 10kTrees (Arnold et al 2010) and tailored to the data at hand

Codes

ape & nlme (PGLS)

When measurement error is ignored

library(ape)
library(nlme)
data.spec = read.table("primate_spec.txt", sep = "\t", header = TRUE)
tree = read.tree("primate_tree.phy")
row.names(data.spec) = data.spec$species
pgls.nlme = gls(lg.brain ~ lg.body, data.spec, correlation = corBrownian(phy = tree))
summary(pgls.nlme)
## Generalized least squares fit by REML
##   Model: lg.brain ~ lg.body 
##   Data: data.spec 
##     AIC   BIC logLik
##   60.48 67.77 -27.24
## 
## Correlation Structure: corBrownian
##  Formula: ~1 
##  Parameter estimate(s):
## numeric(0)
## 
## Coefficients:
##             Value Std.Error t-value p-value
## (Intercept) 1.218    0.5333   2.284  0.0249
## lg.body     0.287    0.0389   7.375  0.0000
## 
##  Correlation: 
##         (Intr)
## lg.body -0.544
## 
## Standardized residuals:
##     Min      Q1     Med      Q3     Max 
## -2.1662  0.1224  0.7819  1.1608  3.3928 
## 
## Residual standard error: 0.8308 
## Degrees of freedom: 86 total; 84 residual

When considering measurement error (on the predictor)

pglsME.nlme = gls(lg.brain ~ lg.body, data.spec, correlation = corBrownian(phy = tree),
    weights = varFixed(~lg.body_var.adj))
summary(pglsME.nlme)
## Generalized least squares fit by REML
##   Model: lg.brain ~ lg.body 
##   Data: data.spec 
##     AIC   BIC logLik
##   181.9 189.2 -87.97
## 
## Correlation Structure: corBrownian
##  Formula: ~1 
##  Parameter estimate(s):
## numeric(0)
## Variance function:
##  Structure: fixed weights
##  Formula: ~lg.body_var.adj 
## 
## Coefficients:
##               Value Std.Error t-value p-value
## (Intercept) -1.3018    0.3866  -3.367  0.0011
## lg.body      0.7049    0.0485  14.549  0.0000
## 
##  Correlation: 
##         (Intr)
## lg.body -0.983
## 
## Standardized residuals:
##      Min       Q1      Med       Q3      Max 
## -0.95067 -0.35625 -0.13744  0.05117  1.55863 
## 
## Residual standard error: 8.399 
## Degrees of freedom: 86 total; 84 residual

MERegPHYSIG (outputs from the program)

When measurement error is ignored

%METHOD = GLS
%
%Data file and ME file saved as 'workingDataFile.txt' and 'workingMEfile.txt'
%
%Coefficients
%b0 (intercept) = 1.2179
%b1 = 0.28701
%
%sigma2 = 0.10725
%
%Independent variable means, variances, and correlations
%aX1 = 7.4524
%s2X1 = 0.83315
%
%LnLikelihood = -25.028
%-2LL = 50.056
%AIC(par=3) = 56.056
		

When considering measurement error (on both the response and predictor, but without within-species covariance)

%METHOD = EGLS
%
%Data file and ME file saved as 'workingDataFile.txt' and 'workingMEfile.txt'
%
%Coefficients with approximate SE
%b0 (intercept) = 1.1929 +- 0.53329
%b1 = 0.29033 +- 0.038919
%
%sigma2 = 0.12494
%
%Independent variable means, variances, and correlations
%aX1 = 7.4533
%s2X1 = 0.82363

%METHOD = ML
%
%Data file and ME file saved as 'workingDataFile.txt' and 'workingMEfile.txt'
%
%Coefficients with approximate SE
%b0 (intercept) = 1.1659 +- 0.52755
%b1 = 0.29394 +- 0.039164
%
%correlation matrix of estimates of b = 
%    1    -0.55338
%    -0.55338           1
%
%sigma2 = 0.1046
%
%Independent variable means, variances, and correlations
%aX1 = 7.4534
%s2X1 = 0.81061
%
%LnLikelihood = -138.0037
%-2LL = 276.0074
%AIC(par=5) = 286.0074

%METHOD = REML
%
%Data file and ME file saved as 'workingDataFile.txt' and 'workingMEfile.txt'
%
%Coefficients
%b0 (intercept) = 1.1648
%b1 = 0.29384
%
%sigma2 = 0.10462
%
%Independent variable means, variances, and correlations
%aX1 = 7.4597
%s2X1 = 0.020603
%
%LnLikelihood = -132.3263
%-2LL = 264.6526
%AIC(par=2) = 268.6526
		

phytools

When measurement error is ignored

library(phytools)
data.spec = read.table("primate_spec.txt", sep = "\t", header = TRUE)
tree = read.tree("primate_tree.phy")
pgls.Ives(tree = tree, X = setNames(data.spec$lg.body, data.spec$species), y = setNames(data.spec$lg.brain,
    data.spec$species), Vx = setNames(rep(0, length(tree$tip)), data.spec$species),
    Vy = setNames(rep(0, length(tree$tip)), data.spec$species), Cxy = setNames(rep(0,
        length(tree$tip)), data.spec$species))
## $beta
## [1] 0.6108 0.3123
## 
## $sig2x
## [1] 0.108
## 
## $sig2y
## [1] 0.009439
## 
## $a
## [1] -0.5942  0.4252
## 
## $logL
## [1] -156.5
## 
## $convergence
## [1] 0
## 
## $message
## [1] "CONVERGENCE: REL_REDUCTION_OF_F <= FACTR*EPSMCH"

The convergence is often sensitive to starting conditions. It may be more straightforward to repeat the above procedure several times (10-100) and select the results with the highest likelihood:

res = c()
for (k in 1:100) {
    mod = try(pgls.Ives(tree = tree, X = setNames(data.spec$lg.body, data.spec$species),
        y = setNames(data.spec$lg.brain, data.spec$species), Vx = setNames(rep(0,
            length(tree$tip)), data.spec$species), Vy = setNames(rep(0, length(tree$tip)),
            data.spec$species), Cxy = setNames(rep(0, length(tree$tip)), data.spec$species)))
    if (class(mod) != "try-error") {
        int = mod$beta[1]
        slope = mod$beta[2]
        lik = mod$logL
        anc_a = mod$a[1]
        anc_b = mod$a[2]
        conv = mod$convergence
    }
    res = rbind(res, data.frame(k, int, slope, anc_a, anc_b, lik, conv))
}
res[which(res[, 6] == max(subset(res, conv == 0)[, 6])), ]
##     k   int  slope anc_a anc_b    lik conv
## 56 56 1.215 0.2872 7.434  3.35 -138.7    0

When considering measurement error (on both the response and predictor, but without within-species covariance)

res = c()
for (k in 1:100) {
    mod = try(pgls.Ives(tree = tree, X = setNames(data.spec$lg.body, data.spec$species),
        y = setNames(data.spec$lg.brain, data.spec$species), Vx = setNames(data.spec$lg.body_var.adj,
            data.spec$species), Vy = setNames(data.spec$lg.brain_var.adj, data.spec$species),
        Cxy = setNames(rep(0, length(tree$tip)), data.spec$species)))
    if (class(mod) != "try-error") {
        int = mod$beta[1]
        slope = mod$beta[2]
        lik = mod$logL
        anc_a = mod$a[1]
        anc_b = mod$a[2]
        conv = mod$convergence
    }
    res = rbind(res, data.frame(k, int, slope, anc_a, anc_b, lik, conv))
}
res[which(res[, 6] == max(subset(res, conv == 0)[, 6])), ]
##     k     int  slope anc_a anc_b    lik conv
## 59 59 -0.2379 0.4826  7.46 3.362 -125.7    0

GLSME

When both measurement error and phylogeny are ignored (GLS)

# functions are stored in a source file (available from K. Bartoszek's
# website)
source("~/Dropbox/comparative book/website/OPMs/chap_7_Garamszegi/GLSME.R")
data.spec = read.table("primate_spec.txt", sep = "\t", header = TRUE)
row.names(data.spec) = data.spec$species
tree = read.tree("primate_tree.phy")
gls.glsme = GLSME(y = data.spec$lg.brain, D = cbind(rep(1, length(data.spec$lg.body)),
    data.spec$lg.body), Vt = 1, Ve = 0, Vd = list("F", "F"), Vu = 0)
gls.glsme
## $GLSestimate
##         [,1]
## [1,] -1.6429
## [2,]  0.7128
## 
## $errorGLSestim
##         [,1]
## [1,] 0.32506
## [2,] 0.04057
## 
## $BiasCorrectedGLSestimate
##         [,1]
## [1,] -1.6429
## [2,]  0.7128
## 
## $errorBiasCorrectedGLSestim
##         [,1]
## [1,] 0.32506
## [2,] 0.04057
## 
## $K
##      [,1] [,2]
## [1,]    1    0
## [2,]    0    1
## 
## $R2
##        [,1]
## [1,] 0.7821
## 
## $R2BiasCorrectedModel
##        [,1]
## [1,] 0.7821
## 
## $LogLikelihood
## [1] -63.45
## 
## $LogLikelihoodBiasCorrectedModel
## [1] -63.45
## 
## $PredictorVarianceConstantEstimate
## [1] 0 0
## 
## $ResponseVarianceConstantEstimate
## [1] 0.2561

When measurement error is ignored but phylogeny is accounted for (PGLS)

# the order of species in the data should be the same as the order of
# species in the VCV matrix, thus the dataset should be resorted as
# data.spec[tree$tip.label,]!
pgls.glsme = GLSME(y = data.spec[tree$tip.label, ]$lg.brain, D = cbind(rep(1,
    length(data.spec$lg.body)), data.spec[tree$tip.label, ]$lg.body), Vt = vcv(tree),
    Ve = 0, Vd = list("F", "F"), Vu = 0)
pgls.glsme
## $GLSestimate
##       [,1]
## [1,] 1.218
## [2,] 0.287
## 
## $errorGLSestim
##         [,1]
## [1,] 0.52707
## [2,] 0.03846
## 
## $BiasCorrectedGLSestimate
##       [,1]
## [1,] 1.218
## [2,] 0.287
## 
## $errorBiasCorrectedGLSestim
##         [,1]
## [1,] 0.52707
## [2,] 0.03846
## 
## $K
##      [,1] [,2]
## [1,]    1    0
## [2,]    0    1
## 
## $R2
##       [,1]
## [1,] 0.393
## 
## $R2BiasCorrectedModel
##       [,1]
## [1,] 0.393
## 
## $LogLikelihood
## [1] -25.02
## 
## $LogLikelihoodBiasCorrectedModel
## [1] -25.02
## 
## $PredictorVarianceConstantEstimate
## [1] 0 0
## 
## $ResponseVarianceConstantEstimate
## [1] 0.009235

When both measurement error and phylogeny are accounted for (considering measurement error (on both the response and predictor, but without within-species covariance)

# again, the order of species in the data should be the same as the order of
# species in the VCV matrix, thus the dataset should be resorted as
# data.spec[tree$tip.label,]! in addition, note that when there is a
# measurement error in the predictors, these are treated as 'random effects'
# (as specified by the non zero entry in the Vd matrix), and they will be
# centered on their means by default (it can be switched off by setting
# CenterPredictor=FALSE)
pglsME.glsme = GLSME(y = data.spec[tree$tip.label, ]$lg.brain, D = cbind(rep(1,
    length(data.spec$lg.body)), data.spec[tree$tip.label, ]$lg.body), Vt = vcv(tree),
    Ve = diag(data.spec[tree$tip.label, ]$lg.brain_var.adj), Vd = list("F",
        vcv(tree)), Vu = list("F", diag(data.spec[tree$tip.label, ]$lg.body_var.adj)))
pglsME.glsme
## $GLSestimate
##        [,1]
## [1,] 3.3610
## [2,] 0.3209
## 
## $errorGLSestim
##         [,1]
## [1,] 0.48730
## [2,] 0.04689
## 
## $BiasCorrectedGLSestimate
##        [,1]
## [1,] 3.3582
## [2,] 0.4173
## 
## $errorBiasCorrectedGLSestim
##         [,1]
## [1,] 0.48730
## [2,] 0.06111
## 
## $K
##      [,1]     [,2]
## [1,]    1 0.006861
## [2,]    0 0.769192
## 
## $R2
##        [,1]
## [1,] 0.4894
## 
## $R2BiasCorrectedModel
##        [,1]
## [1,] 0.5328
## 
## $LogLikelihood
## [1] -23.66
## 
## $LogLikelihoodBiasCorrectedModel
## [1] -21.25
## 
## $PredictorVarianceConstantEstimate
## [1] 0.00000 0.04932
## 
## $ResponseVarianceConstantEstimate
## [1] 0.0112
# as a consequence of centering, the estimated intercept corresponds to
# b0+b1*mean(data.spec$lg.body) from model data.spec$lg.brain =
# b0+b1*mean(data.spec$lg.body) + b1*(data.spec$lg.body-
# mean(data.spec$lg.body)). Therefore, the intercept of interests can be
# obtained as:
pglsME.glsme$BiasCorrectedGLSestimate[1] - pglsME.glsme$BiasCorrectedGLSestimate[2] *
    mean(data.spec$lg.body)
## [1] 0.06225
# from which, the slope of interest is:
pglsME.glsme$BiasCorrectedGLSestimate[2]
## [1] 0.4173
# taking into account estimation error after the bias correction, the slope
# falls within the error range of:
slope_range = rbind(pglsME.glsme$BiasCorrectedGLSestimate[2] - pglsME.glsme$errorBiasCorrectedGLSestim[2],
    pglsME.glsme$BiasCorrectedGLSestimate[2] + pglsME.glsme$errorBiasCorrectedGLSestim[2])
slope_range
##        [,1]
## [1,] 0.3561
## [2,] 0.4784
# the standard error of the intercept can be computed based on the variance
# of the noncentered intercept estimate (T. Hansen and K. Bartoszek pers.
# comm.):
int_var = pglsME.glsme$errorBiasCorrectedGLSestim[1]^2 + pglsME.glsme$errorBiasCorrectedGLSestim[2]^2 *
    mean(data.spec$lg.body)^2
sqrt(int_var)
## [1] 0.6859
# from this the error range around the non-centred intercept can be given:
intercept_range = rbind(pglsME.glsme$BiasCorrectedGLSestimate[1] - pglsME.glsme$BiasCorrectedGLSestimate[2] *
    mean(data.spec$lg.body) - sqrt(int_var), pglsME.glsme$BiasCorrectedGLSestimate[1] -
    pglsME.glsme$BiasCorrectedGLSestimate[2] * mean(data.spec$lg.body) + sqrt(int_var))
intercept_range
##         [,1]
## [1,] -0.6237
## [2,]  0.7481

MCMCglmm

To prepare the dataset, we need to generate a column named 'animal'

library(MCMCglmm)
data.spec = read.table("primate_spec.txt", sep = "\t", header = TRUE)
data.ind = read.table("primate_ind.txt", sep = "\t", header = TRUE)
tree = read.tree("primate_tree.phy")
data.spec$animal = data.spec$species
data.ind$animal = data.ind$species

When measurement error is ignored, and species-specific data are used (equivalent with PGLS)

prior <- list(R = list(V = 1, nu = 0.002), G = list(G1 = list(V = 1, nu = 0.002)))
mcmc = MCMCglmm(lg.brain ~ lg.body, random = ~animal, data = data.spec, pedigree = tree,
    prior = prior, verbose = F, family = "gaussian", nodes = "TIPS")
summary(mcmc)
## 
##  Iterations = 3001:12991
##  Thinning interval  = 10
##  Sample size  = 1000 
## 
##  DIC: -238.3 
## 
##  G-structure:  ~animal
## 
##        post.mean l-95% CI u-95% CI eff.samp
## animal      0.69     0.47    0.904     1000
## 
##  R-structure:  ~units
## 
##       post.mean l-95% CI u-95% CI eff.samp
## units   0.00327 0.000199  0.00967      226
## 
##  Location effects: lg.brain ~ lg.body 
## 
##             post.mean l-95% CI u-95% CI eff.samp  pMCMC    
## (Intercept)    1.0990   0.0235   2.0715     1000  0.024 *  
## lg.body        0.2996   0.2282   0.3870     1000 <0.001 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

When measurement error is considered on the predictor variable, and individual-specific data are used (phylogenetic mixed model)

# we need within-subject centering to do this (van de Pol and Wright 2009),
# the approprate data transformation:
data.ind$lg.body = log(data.ind$body)
data.ind$lg.brain = log(data.ind$brain)
means = tapply(X = data.ind$lg.body, INDEX = data.ind$species, FUN = mean)
N = tapply(X = data.ind$lg.body, INDEX = data.ind$species, FUN = length)
mean.vector = vector()
for (k in 1:length(means)) {
    mean.set = rep(means[k], N[k])
    mean.vector = rbind(mean.vector, as.data.frame(mean.set))
}
data.ind$body_av = mean.vector[, 1]
data.ind$body_centr <- data.ind$lg.body - data.ind$body_av

# the model based on individual-specific data
prior2 = list(G = list(G1 = list(V = 1, nu = 0.02), G2 = list(V = 1, nu = 0.02)),
    R = list(V = 1, nu = 0.02))
mcmc.ind.centr = MCMCglmm(lg.brain ~ body_centr + body_av, random = ~animal +
    species, data = data.ind, pedigree = tree, prior = prior2, verbose = F,
    family = "gaussian", nodes = "TIPS")
summary(mcmc.ind.centr)
## 
##  Iterations = 3001:12991
##  Thinning interval  = 10
##  Sample size  = 1000 
## 
##  DIC: -1236 
## 
##  G-structure:  ~animal
## 
##        post.mean l-95% CI u-95% CI eff.samp
## animal     0.649     0.44    0.899     1000
## 
##                ~species
## 
##         post.mean l-95% CI u-95% CI eff.samp
## species   0.00642   0.0017   0.0134      469
## 
##  R-structure:  ~units
## 
##       post.mean l-95% CI u-95% CI eff.samp
## units    0.0134    0.012   0.0147      911
## 
##  Location effects: lg.brain ~ body_centr + body_av 
## 
##             post.mean l-95% CI u-95% CI eff.samp  pMCMC    
## (Intercept)     1.445    0.542    2.492     1000  0.002 ** 
## body_centr      0.129    0.114    0.144     1000 <0.001 ***
## body_av         0.258    0.184    0.318     1000 <0.001 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

The slope for 'body_av' that gives the interspecific slope that is controlled for phylogeny and within-species variance in body mass (predictor).

See more exercises with "MCMCglmm" in the OPM for chapter 11

References