Species-specific trait data ("primate_spec.txt"), a tab separated text file, species-specific data on brain size and body size in primates

Sample of phylogenetic trees ("10kTrees_Primates.nex"), 1000 phylogenetic hypotheses in nexus format, phylogeny of primates from Arnold et al. (2010)

A consensus tree ("consensusTre_Primates.nex"), the consenus phylogenetic tree for the above sample, nexus format, data from Arnold et al. (2010)

Codes

To get started

# activate libraries
library(ape)
library(nlme)
library(caper)
library(AICcmodavg)

# import data and trees
xdata <- read.table(file = "primate_spec.txt", header = T, sep = "\t", dec = ".")
row.names(xdata) = xdata$species
trees <- read.nexus("10kTrees_Primates.nex")
cons <- read.nexus("consensusTre_Primates.nex")

ape and nlme

We will run models on all trees then perform model averaging across them to get parameter estimates.

# for simplicity, we do not estimate lambda here
Cand.models = list()
niter = 1000
for (i in 1:niter) {
    Cand.models[[i]] = gls(lg.brain ~ lg.body, data = xdata, method = "ML",
        correlation = corPagel(1, trees[[i]], fixed = T))
}

# if you want to see the particular models
head(aictab(cand.set = Cand.models, modnames = paste("Tree Nr.", 1:niter, sep = " "),
    sort = F), 20)

## 
## Model selection based on AICc :
## 
##             K  AICc Delta_AICc AICcWt     LL
## Tree Nr. 1  3 57.33       6.47      0 -25.52
## Tree Nr. 2  3 55.53       4.67      0 -24.62
## Tree Nr. 3  3 56.37       5.51      0 -25.04
## Tree Nr. 4  3 57.56       6.70      0 -25.63
## Tree Nr. 5  3 56.26       5.40      0 -24.98
## Tree Nr. 6  3 55.09       4.23      0 -24.40
## Tree Nr. 7  3 55.49       4.63      0 -24.60
## Tree Nr. 8  3 57.04       6.18      0 -25.37
## Tree Nr. 9  3 56.38       5.52      0 -25.04
## Tree Nr. 10 3 57.52       6.66      0 -25.61
## Tree Nr. 11 3 56.73       5.87      0 -25.22
## Tree Nr. 12 3 53.81       2.95      0 -23.76
## Tree Nr. 13 3 56.19       5.34      0 -24.95
## Tree Nr. 14 3 57.02       6.16      0 -25.37
## Tree Nr. 15 3 57.36       6.50      0 -25.53
## Tree Nr. 16 3 55.63       4.77      0 -24.67
## Tree Nr. 17 3 57.23       6.37      0 -25.47
## Tree Nr. 18 3 56.49       5.63      0 -25.10
## Tree Nr. 19 3 58.61       7.75      0 -26.16
## Tree Nr. 20 3 55.40       4.54      0 -24.55

# model averaged intercept
Int_av = modavg(parm = "(Intercept)", cand.set = Cand.models, modnames = paste("Tree Nr.",
    1:niter, sep = " "))
Int_av$Mod.avg.beta

## [1] 1.189

Int_av$Lower.CL

## [1] 0.1526

Int_av$Upper.CL

## [1] 2.225

Int_av$Uncond.SE

## [1] 0.5287

# model averaged slope
beta_av = modavg(parm = "lg.body", cand.set = Cand.models, modnames = paste("Tree Nr.",
    1:niter, sep = " "))
beta_av$Mod.avg.beta

## [1] 0.2916

beta_av$Lower.CL

## [1] 0.2131

beta_av$Upper.CL

## [1] 0.3701

beta_av$Uncond.SE

## [1] 0.04005

caper

"AICcmodavg" does not work in this case, we need to do the multomodel inference "by hand". See also OPM for "1) Selecting among evolutionary models with different combinations of predictors"

# run all models, for simplicity we do not estimate lambda here
params = c()
niter = 1000
for (i in 1:niter) {
    tree.data = comparative.data(phy = trees[[i]], data = xdata, names.col = species,
        vcv = T, na.omit = F, warn.dropped = T)
    mod = pgls(lg.brain ~ lg.body, data = tree.data, lambda = 1)
    params = rbind(params, data.frame(row.names = NULL, tree = i, logLik = mod$model$log.lik,
        AIC = mod$aicc, lambda = mod$param[2], Int = mod$model$coef[1], Int_err = mod$sterr[1],
        beta = mod$model$coef[2], beta_err = mod$sterr[2]))
}

# get AIC delta and w
params$delta = params$AIC - min(params$AIC)
params$w = exp(-0.5 * params$delta)/sum(exp(-0.5 * params$delta))

# model averaging of parameters of interest
weighted.mean(params$Int, params$w)  #averaged Intercept

## [1] 1.189

w_err = c()
for (i in 1:niter) {
    w_err[i] = sqrt(params[i, ]$Int_err^2 + (params[i, ]$Int - weighted.mean(params$Int,
        params$w))^2)
}
weighted.mean(w_err, params$w)  #SE

## [1] 0.5268

weighted.mean(params$Int, params$w) - 1.96 * weighted.mean(w_err, params$w)  #lower CI (from SE)

## [1] 0.1562

weighted.mean(params$Int, params$w) + 1.96 * weighted.mean(w_err, params$w)  #Upper CI (from SE)

## [1] 2.221

quantile(params$Int, c(0.025, 0.975))  #CI

##  2.5% 97.5% 
## 1.075 1.288

weighted.mean(params$beta, params$w)  #averaged slope

## [1] 0.2916

for (i in 1:niter) {
    w_err[i] = sqrt(params[i, ]$beta_err^2 + (params[i, ]$beta - weighted.mean(params$beta,
        params$w))^2)
}
weighted.mean(w_err, params$w)  #SE

## [1] 0.03964

weighted.mean(params$beta, params$w) - 1.96 * weighted.mean(w_err, params$w)  #Lower CI

## [1] 0.2139

weighted.mean(params$beta, params$w) + 1.96 * weighted.mean(w_err, params$w)  #Upper CI

## [1] 0.3693

quantile(params$beta, c(0.025, 0.975))  #CI

##   2.5%  97.5% 
## 0.2778 0.3128

Plotting the results

plot(xdata$lg.body, xdata$lg.brain, xlab = "log(Body size)", ylab = "log(Brain size)",
    pch = NA_integer_)
# this draws the regression lines corresponding to different trees in the sample:
for (i in 1:niter) {
    abline(Cand.models[[i]], col = "lightblue")
}
# this is what you would get if you used the consensus phylogenetic tree:
abline(gls(lg.brain ~ lg.body, data = xdata, method = "ML", correlation = corPagel(1,
    cons, fixed = T)))
# this is what the model averaged parameters deliniate:
abline(a = Int_av$Mod.avg.beta, b = beta_av$Mod.avg.beta, lty = 2)
points(xdata$lg.body, xdata$lg.brain, pch = 20)

We can see that different trees define slightly different regression lines (light blue). However, with the exception of few exteremes, most of them vary along the same mean (that can be characterized by the model averaged -dashed- line or the -solid- line that corresponds to the consensus tree). This figure suggests only a small confounding role for phylogenetic uncertainty.

Sources

R packages

Data

Codes

To get started

ape and nlme

caper

Plotting the results

References