"ape"
(Paradis et al. 2004)
"nlme"
(Pinheiro et al. 2014)
"caper"
(Orme et al. 2012)
"AICcmodavg"
(Mazerolle 2013)
Species-specific trait data ("primate_spec.txt"
), a tab separated text file, species-specific data on brain size and body size in primates
Sample of phylogenetic trees ("10kTrees_Primates.nex"
), 1000 phylogenetic hypotheses in nexus format, phylogeny of primates from Arnold et al. (2010)
A consensus tree ("consensusTre_Primates.nex"
), the consenus phylogenetic tree for the above sample, nexus format, data from Arnold et al. (2010)
# activate libraries library(ape) library(nlme) library(caper) library(AICcmodavg) # import data and trees xdata <- read.table(file = "primate_spec.txt", header = T, sep = "\t", dec = ".") row.names(xdata) = xdata$species trees <- read.nexus("10kTrees_Primates.nex") cons <- read.nexus("consensusTre_Primates.nex")
We will run models on all trees then perform model averaging across them to get parameter estimates.
# for simplicity, we do not estimate lambda here Cand.models = list() niter = 1000 for (i in 1:niter) { Cand.models[[i]] = gls(lg.brain ~ lg.body, data = xdata, method = "ML", correlation = corPagel(1, trees[[i]], fixed = T)) } # if you want to see the particular models head(aictab(cand.set = Cand.models, modnames = paste("Tree Nr.", 1:niter, sep = " "), sort = F), 20)
## ## Model selection based on AICc : ## ## K AICc Delta_AICc AICcWt LL ## Tree Nr. 1 3 57.33 6.47 0 -25.52 ## Tree Nr. 2 3 55.53 4.67 0 -24.62 ## Tree Nr. 3 3 56.37 5.51 0 -25.04 ## Tree Nr. 4 3 57.56 6.70 0 -25.63 ## Tree Nr. 5 3 56.26 5.40 0 -24.98 ## Tree Nr. 6 3 55.09 4.23 0 -24.40 ## Tree Nr. 7 3 55.49 4.63 0 -24.60 ## Tree Nr. 8 3 57.04 6.18 0 -25.37 ## Tree Nr. 9 3 56.38 5.52 0 -25.04 ## Tree Nr. 10 3 57.52 6.66 0 -25.61 ## Tree Nr. 11 3 56.73 5.87 0 -25.22 ## Tree Nr. 12 3 53.81 2.95 0 -23.76 ## Tree Nr. 13 3 56.19 5.34 0 -24.95 ## Tree Nr. 14 3 57.02 6.16 0 -25.37 ## Tree Nr. 15 3 57.36 6.50 0 -25.53 ## Tree Nr. 16 3 55.63 4.77 0 -24.67 ## Tree Nr. 17 3 57.23 6.37 0 -25.47 ## Tree Nr. 18 3 56.49 5.63 0 -25.10 ## Tree Nr. 19 3 58.61 7.75 0 -26.16 ## Tree Nr. 20 3 55.40 4.54 0 -24.55
# model averaged intercept Int_av = modavg(parm = "(Intercept)", cand.set = Cand.models, modnames = paste("Tree Nr.", 1:niter, sep = " ")) Int_av$Mod.avg.beta
## [1] 1.189
Int_av$Lower.CL
## [1] 0.1526
Int_av$Upper.CL
## [1] 2.225
Int_av$Uncond.SE
## [1] 0.5287
# model averaged slope beta_av = modavg(parm = "lg.body", cand.set = Cand.models, modnames = paste("Tree Nr.", 1:niter, sep = " ")) beta_av$Mod.avg.beta
## [1] 0.2916
beta_av$Lower.CL
## [1] 0.2131
beta_av$Upper.CL
## [1] 0.3701
beta_av$Uncond.SE
## [1] 0.04005
"AICcmodavg"
does not work in this case, we need to do the multomodel inference "by hand". See also OPM for "1) Selecting among evolutionary models with different combinations of predictors"
# run all models, for simplicity we do not estimate lambda here params = c() niter = 1000 for (i in 1:niter) { tree.data = comparative.data(phy = trees[[i]], data = xdata, names.col = species, vcv = T, na.omit = F, warn.dropped = T) mod = pgls(lg.brain ~ lg.body, data = tree.data, lambda = 1) params = rbind(params, data.frame(row.names = NULL, tree = i, logLik = mod$model$log.lik, AIC = mod$aicc, lambda = mod$param[2], Int = mod$model$coef[1], Int_err = mod$sterr[1], beta = mod$model$coef[2], beta_err = mod$sterr[2])) } # get AIC delta and w params$delta = params$AIC - min(params$AIC) params$w = exp(-0.5 * params$delta)/sum(exp(-0.5 * params$delta)) # model averaging of parameters of interest weighted.mean(params$Int, params$w) #averaged Intercept
## [1] 1.189
w_err = c() for (i in 1:niter) { w_err[i] = sqrt(params[i, ]$Int_err^2 + (params[i, ]$Int - weighted.mean(params$Int, params$w))^2) } weighted.mean(w_err, params$w) #SE
## [1] 0.5268
weighted.mean(params$Int, params$w) - 1.96 * weighted.mean(w_err, params$w) #lower CI (from SE)
## [1] 0.1562
weighted.mean(params$Int, params$w) + 1.96 * weighted.mean(w_err, params$w) #Upper CI (from SE)
## [1] 2.221
quantile(params$Int, c(0.025, 0.975)) #CI
## 2.5% 97.5% ## 1.075 1.288
weighted.mean(params$beta, params$w) #averaged slope
## [1] 0.2916
for (i in 1:niter) { w_err[i] = sqrt(params[i, ]$beta_err^2 + (params[i, ]$beta - weighted.mean(params$beta, params$w))^2) } weighted.mean(w_err, params$w) #SE
## [1] 0.03964
weighted.mean(params$beta, params$w) - 1.96 * weighted.mean(w_err, params$w) #Lower CI
## [1] 0.2139
weighted.mean(params$beta, params$w) + 1.96 * weighted.mean(w_err, params$w) #Upper CI
## [1] 0.3693
quantile(params$beta, c(0.025, 0.975)) #CI
## 2.5% 97.5% ## 0.2778 0.3128
We will plot all models corresponding to different trees
plot(xdata$lg.body, xdata$lg.brain, xlab = "log(Body size)", ylab = "log(Brain size)", pch = NA_integer_) # this draws the regression lines corresponding to different trees in the sample: for (i in 1:niter) { abline(Cand.models[[i]], col = "lightblue") } # this is what you would get if you used the consensus phylogenetic tree: abline(gls(lg.brain ~ lg.body, data = xdata, method = "ML", correlation = corPagel(1, cons, fixed = T))) # this is what the model averaged parameters deliniate: abline(a = Int_av$Mod.avg.beta, b = beta_av$Mod.avg.beta, lty = 2) points(xdata$lg.body, xdata$lg.brain, pch = 20)
We can see that different trees define slightly different regression lines (light blue). However, with the exception of few exteremes, most of them vary along the same mean (that can be characterized by the model averaged -dashed- line or the -solid- line that corresponds to the consensus tree). This figure suggests only a small confounding role for phylogenetic uncertainty.