"ape"
(Paradis et al 2004)
"nlme"
(Pinheiro et al 2012)
"GLSME"
(Hansen and Bartoszek 2012)
"MCMCglmm"
(Hadfield 2010)
"phytools"
(Revell 2012)
"MERegPHYSIG"
(Ives et al 2007)
brain size and body size on primates ("primate_ind.txt"
, a tab separated text file), with more than one observation for the majority of species, individual-specific data
brain size and body size on primates ("primate_spec.txt"
, a tab separated text file), species-specific data obtained from the exercises above, includes information on within-species sampling (sample sizes, variances and standard errors
phylogeny ("primate_tree.phy"
, in phylip format) is taken from 10kTrees (Arnold et al 2010) and tailored to the data at hand
When measurement error is ignored
library(ape) library(nlme) data.spec = read.table("primate_spec.txt", sep = "\t", header = TRUE) tree = read.tree("primate_tree.phy") row.names(data.spec) = data.spec$species pgls.nlme = gls(lg.brain ~ lg.body, data.spec, correlation = corBrownian(phy = tree)) summary(pgls.nlme)
## Generalized least squares fit by REML ## Model: lg.brain ~ lg.body ## Data: data.spec ## AIC BIC logLik ## 60.48 67.77 -27.24 ## ## Correlation Structure: corBrownian ## Formula: ~1 ## Parameter estimate(s): ## numeric(0) ## ## Coefficients: ## Value Std.Error t-value p-value ## (Intercept) 1.218 0.5333 2.284 0.0249 ## lg.body 0.287 0.0389 7.375 0.0000 ## ## Correlation: ## (Intr) ## lg.body -0.544 ## ## Standardized residuals: ## Min Q1 Med Q3 Max ## -2.1662 0.1224 0.7819 1.1608 3.3928 ## ## Residual standard error: 0.8308 ## Degrees of freedom: 86 total; 84 residual
When considering measurement error (on the predictor)
pglsME.nlme = gls(lg.brain ~ lg.body, data.spec, correlation = corBrownian(phy = tree), weights = varFixed(~lg.body_var.adj)) summary(pglsME.nlme)
## Generalized least squares fit by REML ## Model: lg.brain ~ lg.body ## Data: data.spec ## AIC BIC logLik ## 181.9 189.2 -87.97 ## ## Correlation Structure: corBrownian ## Formula: ~1 ## Parameter estimate(s): ## numeric(0) ## Variance function: ## Structure: fixed weights ## Formula: ~lg.body_var.adj ## ## Coefficients: ## Value Std.Error t-value p-value ## (Intercept) -1.3018 0.3866 -3.367 0.0011 ## lg.body 0.7049 0.0485 14.549 0.0000 ## ## Correlation: ## (Intr) ## lg.body -0.983 ## ## Standardized residuals: ## Min Q1 Med Q3 Max ## -0.95067 -0.35625 -0.13744 0.05117 1.55863 ## ## Residual standard error: 8.399 ## Degrees of freedom: 86 total; 84 residual
When measurement error is ignored
%METHOD = GLS % %Data file and ME file saved as 'workingDataFile.txt' and 'workingMEfile.txt' % %Coefficients %b0 (intercept) = 1.2179 %b1 = 0.28701 % %sigma2 = 0.10725 % %Independent variable means, variances, and correlations %aX1 = 7.4524 %s2X1 = 0.83315 % %LnLikelihood = -25.028 %-2LL = 50.056 %AIC(par=3) = 56.056
When considering measurement error (on both the response and predictor, but without within-species covariance)
%METHOD = EGLS % %Data file and ME file saved as 'workingDataFile.txt' and 'workingMEfile.txt' % %Coefficients with approximate SE %b0 (intercept) = 1.1929 +- 0.53329 %b1 = 0.29033 +- 0.038919 % %sigma2 = 0.12494 % %Independent variable means, variances, and correlations %aX1 = 7.4533 %s2X1 = 0.82363
%METHOD = ML % %Data file and ME file saved as 'workingDataFile.txt' and 'workingMEfile.txt' % %Coefficients with approximate SE %b0 (intercept) = 1.1659 +- 0.52755 %b1 = 0.29394 +- 0.039164 % %correlation matrix of estimates of b = % 1 -0.55338 % -0.55338 1 % %sigma2 = 0.1046 % %Independent variable means, variances, and correlations %aX1 = 7.4534 %s2X1 = 0.81061 % %LnLikelihood = -138.0037 %-2LL = 276.0074 %AIC(par=5) = 286.0074
%METHOD = REML % %Data file and ME file saved as 'workingDataFile.txt' and 'workingMEfile.txt' % %Coefficients %b0 (intercept) = 1.1648 %b1 = 0.29384 % %sigma2 = 0.10462 % %Independent variable means, variances, and correlations %aX1 = 7.4597 %s2X1 = 0.020603 % %LnLikelihood = -132.3263 %-2LL = 264.6526 %AIC(par=2) = 268.6526
When measurement error is ignored
library(phytools) data.spec = read.table("primate_spec.txt", sep = "\t", header = TRUE) tree = read.tree("primate_tree.phy") pgls.Ives(tree = tree, X = setNames(data.spec$lg.body, data.spec$species), y = setNames(data.spec$lg.brain, data.spec$species), Vx = setNames(rep(0, length(tree$tip)), data.spec$species), Vy = setNames(rep(0, length(tree$tip)), data.spec$species), Cxy = setNames(rep(0, length(tree$tip)), data.spec$species))
## $beta ## [1] 0.6108 0.3123 ## ## $sig2x ## [1] 0.108 ## ## $sig2y ## [1] 0.009439 ## ## $a ## [1] -0.5942 0.4252 ## ## $logL ## [1] -156.5 ## ## $convergence ## [1] 0 ## ## $message ## [1] "CONVERGENCE: REL_REDUCTION_OF_F <= FACTR*EPSMCH"
The convergence is often sensitive to starting conditions. It may be more straightforward to repeat the above procedure several times (10-100) and select the results with the highest likelihood:
res = c() for (k in 1:100) { mod = try(pgls.Ives(tree = tree, X = setNames(data.spec$lg.body, data.spec$species), y = setNames(data.spec$lg.brain, data.spec$species), Vx = setNames(rep(0, length(tree$tip)), data.spec$species), Vy = setNames(rep(0, length(tree$tip)), data.spec$species), Cxy = setNames(rep(0, length(tree$tip)), data.spec$species))) if (class(mod) != "try-error") { int = mod$beta[1] slope = mod$beta[2] lik = mod$logL anc_a = mod$a[1] anc_b = mod$a[2] conv = mod$convergence } res = rbind(res, data.frame(k, int, slope, anc_a, anc_b, lik, conv)) } res[which(res[, 6] == max(subset(res, conv == 0)[, 6])), ]
## k int slope anc_a anc_b lik conv ## 56 56 1.215 0.2872 7.434 3.35 -138.7 0
When considering measurement error (on both the response and predictor, but without within-species covariance)
res = c() for (k in 1:100) { mod = try(pgls.Ives(tree = tree, X = setNames(data.spec$lg.body, data.spec$species), y = setNames(data.spec$lg.brain, data.spec$species), Vx = setNames(data.spec$lg.body_var.adj, data.spec$species), Vy = setNames(data.spec$lg.brain_var.adj, data.spec$species), Cxy = setNames(rep(0, length(tree$tip)), data.spec$species))) if (class(mod) != "try-error") { int = mod$beta[1] slope = mod$beta[2] lik = mod$logL anc_a = mod$a[1] anc_b = mod$a[2] conv = mod$convergence } res = rbind(res, data.frame(k, int, slope, anc_a, anc_b, lik, conv)) } res[which(res[, 6] == max(subset(res, conv == 0)[, 6])), ]
## k int slope anc_a anc_b lik conv ## 59 59 -0.2379 0.4826 7.46 3.362 -125.7 0
When both measurement error and phylogeny are ignored (GLS)
# functions are stored in a source file (available from K. Bartoszek's # website) source("~/Dropbox/comparative book/website/OPMs/chap_7_Garamszegi/GLSME.R") data.spec = read.table("primate_spec.txt", sep = "\t", header = TRUE) row.names(data.spec) = data.spec$species tree = read.tree("primate_tree.phy") gls.glsme = GLSME(y = data.spec$lg.brain, D = cbind(rep(1, length(data.spec$lg.body)), data.spec$lg.body), Vt = 1, Ve = 0, Vd = list("F", "F"), Vu = 0) gls.glsme
## $GLSestimate ## [,1] ## [1,] -1.6429 ## [2,] 0.7128 ## ## $errorGLSestim ## [,1] ## [1,] 0.32506 ## [2,] 0.04057 ## ## $BiasCorrectedGLSestimate ## [,1] ## [1,] -1.6429 ## [2,] 0.7128 ## ## $errorBiasCorrectedGLSestim ## [,1] ## [1,] 0.32506 ## [2,] 0.04057 ## ## $K ## [,1] [,2] ## [1,] 1 0 ## [2,] 0 1 ## ## $R2 ## [,1] ## [1,] 0.7821 ## ## $R2BiasCorrectedModel ## [,1] ## [1,] 0.7821 ## ## $LogLikelihood ## [1] -63.45 ## ## $LogLikelihoodBiasCorrectedModel ## [1] -63.45 ## ## $PredictorVarianceConstantEstimate ## [1] 0 0 ## ## $ResponseVarianceConstantEstimate ## [1] 0.2561
When measurement error is ignored but phylogeny is accounted for (PGLS)
# the order of species in the data should be the same as the order of # species in the VCV matrix, thus the dataset should be resorted as # data.spec[tree$tip.label,]! pgls.glsme = GLSME(y = data.spec[tree$tip.label, ]$lg.brain, D = cbind(rep(1, length(data.spec$lg.body)), data.spec[tree$tip.label, ]$lg.body), Vt = vcv(tree), Ve = 0, Vd = list("F", "F"), Vu = 0) pgls.glsme
## $GLSestimate ## [,1] ## [1,] 1.218 ## [2,] 0.287 ## ## $errorGLSestim ## [,1] ## [1,] 0.52707 ## [2,] 0.03846 ## ## $BiasCorrectedGLSestimate ## [,1] ## [1,] 1.218 ## [2,] 0.287 ## ## $errorBiasCorrectedGLSestim ## [,1] ## [1,] 0.52707 ## [2,] 0.03846 ## ## $K ## [,1] [,2] ## [1,] 1 0 ## [2,] 0 1 ## ## $R2 ## [,1] ## [1,] 0.393 ## ## $R2BiasCorrectedModel ## [,1] ## [1,] 0.393 ## ## $LogLikelihood ## [1] -25.02 ## ## $LogLikelihoodBiasCorrectedModel ## [1] -25.02 ## ## $PredictorVarianceConstantEstimate ## [1] 0 0 ## ## $ResponseVarianceConstantEstimate ## [1] 0.009235
When both measurement error and phylogeny are accounted for (considering measurement error (on both the response and predictor, but without within-species covariance)
# again, the order of species in the data should be the same as the order of # species in the VCV matrix, thus the dataset should be resorted as # data.spec[tree$tip.label,]! in addition, note that when there is a # measurement error in the predictors, these are treated as 'random effects' # (as specified by the non zero entry in the Vd matrix), and they will be # centered on their means by default (it can be switched off by setting # CenterPredictor=FALSE) pglsME.glsme = GLSME(y = data.spec[tree$tip.label, ]$lg.brain, D = cbind(rep(1, length(data.spec$lg.body)), data.spec[tree$tip.label, ]$lg.body), Vt = vcv(tree), Ve = diag(data.spec[tree$tip.label, ]$lg.brain_var.adj), Vd = list("F", vcv(tree)), Vu = list("F", diag(data.spec[tree$tip.label, ]$lg.body_var.adj))) pglsME.glsme
## $GLSestimate ## [,1] ## [1,] 3.3610 ## [2,] 0.3209 ## ## $errorGLSestim ## [,1] ## [1,] 0.48730 ## [2,] 0.04689 ## ## $BiasCorrectedGLSestimate ## [,1] ## [1,] 3.3582 ## [2,] 0.4173 ## ## $errorBiasCorrectedGLSestim ## [,1] ## [1,] 0.48730 ## [2,] 0.06111 ## ## $K ## [,1] [,2] ## [1,] 1 0.006861 ## [2,] 0 0.769192 ## ## $R2 ## [,1] ## [1,] 0.4894 ## ## $R2BiasCorrectedModel ## [,1] ## [1,] 0.5328 ## ## $LogLikelihood ## [1] -23.66 ## ## $LogLikelihoodBiasCorrectedModel ## [1] -21.25 ## ## $PredictorVarianceConstantEstimate ## [1] 0.00000 0.04932 ## ## $ResponseVarianceConstantEstimate ## [1] 0.0112
# as a consequence of centering, the estimated intercept corresponds to # b0+b1*mean(data.spec$lg.body) from model data.spec$lg.brain = # b0+b1*mean(data.spec$lg.body) + b1*(data.spec$lg.body- # mean(data.spec$lg.body)). Therefore, the intercept of interests can be # obtained as: pglsME.glsme$BiasCorrectedGLSestimate[1] - pglsME.glsme$BiasCorrectedGLSestimate[2] * mean(data.spec$lg.body)
## [1] 0.06225
# from which, the slope of interest is: pglsME.glsme$BiasCorrectedGLSestimate[2]
## [1] 0.4173
# taking into account estimation error after the bias correction, the slope # falls within the error range of: slope_range = rbind(pglsME.glsme$BiasCorrectedGLSestimate[2] - pglsME.glsme$errorBiasCorrectedGLSestim[2], pglsME.glsme$BiasCorrectedGLSestimate[2] + pglsME.glsme$errorBiasCorrectedGLSestim[2]) slope_range
## [,1] ## [1,] 0.3561 ## [2,] 0.4784
# the standard error of the intercept can be computed based on the variance # of the noncentered intercept estimate (T. Hansen and K. Bartoszek pers. # comm.): int_var = pglsME.glsme$errorBiasCorrectedGLSestim[1]^2 + pglsME.glsme$errorBiasCorrectedGLSestim[2]^2 * mean(data.spec$lg.body)^2 sqrt(int_var)
## [1] 0.6859
# from this the error range around the non-centred intercept can be given: intercept_range = rbind(pglsME.glsme$BiasCorrectedGLSestimate[1] - pglsME.glsme$BiasCorrectedGLSestimate[2] * mean(data.spec$lg.body) - sqrt(int_var), pglsME.glsme$BiasCorrectedGLSestimate[1] - pglsME.glsme$BiasCorrectedGLSestimate[2] * mean(data.spec$lg.body) + sqrt(int_var)) intercept_range
## [,1] ## [1,] -0.6237 ## [2,] 0.7481
To prepare the dataset, we need to generate a column named 'animal'
library(MCMCglmm) data.spec = read.table("primate_spec.txt", sep = "\t", header = TRUE) data.ind = read.table("primate_ind.txt", sep = "\t", header = TRUE) tree = read.tree("primate_tree.phy") data.spec$animal = data.spec$species data.ind$animal = data.ind$species
When measurement error is ignored, and species-specific data are used (equivalent with PGLS)
prior <- list(R = list(V = 1, nu = 0.002), G = list(G1 = list(V = 1, nu = 0.002))) mcmc = MCMCglmm(lg.brain ~ lg.body, random = ~animal, data = data.spec, pedigree = tree, prior = prior, verbose = F, family = "gaussian", nodes = "TIPS") summary(mcmc)
## ## Iterations = 3001:12991 ## Thinning interval = 10 ## Sample size = 1000 ## ## DIC: -238.3 ## ## G-structure: ~animal ## ## post.mean l-95% CI u-95% CI eff.samp ## animal 0.69 0.47 0.904 1000 ## ## R-structure: ~units ## ## post.mean l-95% CI u-95% CI eff.samp ## units 0.00327 0.000199 0.00967 226 ## ## Location effects: lg.brain ~ lg.body ## ## post.mean l-95% CI u-95% CI eff.samp pMCMC ## (Intercept) 1.0990 0.0235 2.0715 1000 0.024 * ## lg.body 0.2996 0.2282 0.3870 1000 <0.001 *** ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
When measurement error is considered on the predictor variable, and individual-specific data are used (phylogenetic mixed model)
# we need within-subject centering to do this (van de Pol and Wright 2009), # the approprate data transformation: data.ind$lg.body = log(data.ind$body) data.ind$lg.brain = log(data.ind$brain) means = tapply(X = data.ind$lg.body, INDEX = data.ind$species, FUN = mean) N = tapply(X = data.ind$lg.body, INDEX = data.ind$species, FUN = length) mean.vector = vector() for (k in 1:length(means)) { mean.set = rep(means[k], N[k]) mean.vector = rbind(mean.vector, as.data.frame(mean.set)) } data.ind$body_av = mean.vector[, 1] data.ind$body_centr <- data.ind$lg.body - data.ind$body_av # the model based on individual-specific data prior2 = list(G = list(G1 = list(V = 1, nu = 0.02), G2 = list(V = 1, nu = 0.02)), R = list(V = 1, nu = 0.02)) mcmc.ind.centr = MCMCglmm(lg.brain ~ body_centr + body_av, random = ~animal + species, data = data.ind, pedigree = tree, prior = prior2, verbose = F, family = "gaussian", nodes = "TIPS") summary(mcmc.ind.centr)
## ## Iterations = 3001:12991 ## Thinning interval = 10 ## Sample size = 1000 ## ## DIC: -1236 ## ## G-structure: ~animal ## ## post.mean l-95% CI u-95% CI eff.samp ## animal 0.649 0.44 0.899 1000 ## ## ~species ## ## post.mean l-95% CI u-95% CI eff.samp ## species 0.00642 0.0017 0.0134 469 ## ## R-structure: ~units ## ## post.mean l-95% CI u-95% CI eff.samp ## units 0.0134 0.012 0.0147 911 ## ## Location effects: lg.brain ~ body_centr + body_av ## ## post.mean l-95% CI u-95% CI eff.samp pMCMC ## (Intercept) 1.445 0.542 2.492 1000 0.002 ** ## body_centr 0.129 0.114 0.144 1000 <0.001 *** ## body_av 0.258 0.184 0.318 1000 <0.001 *** ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
The slope for 'body_av' that gives the interspecific slope that is controlled for phylogeny and within-species variance in body mass (predictor).
See more exercises with "MCMCglmm"
in the OPM for chapter 11