20 April, 2021
- Setting general parameters:
- Description
set.seed(15102020)
bootstraps <- 1000
data_path <- "./Data/"
Proj_name <- "BRC_growth_rate"
Browns <- RColorBrewer::brewer.pal(n = 9, "YlOrBr")[9:6]
Greens <- RColorBrewer::brewer.pal(n = 9, "YlGn")[9:6]
Blues <- RColorBrewer::brewer.pal(n = 9, "YlGnBu")[9:6]
Gradient.colours <- c(Browns[1], Greens[1], Browns[2], Greens[2], Browns[3], Greens[3], Browns[4], Greens[4], Blues)
This script reproduces all sequence analysis steps and plots included in the paper plus some additional exploratory analyses.
OTUmat <- t(read.csv(paste0(data_path, "Shivta_site_otuTab2.txt"), header = TRUE, row.names = 1))
sort.order <- as.numeric(gsub("OTU([0-9]+)", "\\1", colnames( OTUmat )))
OTUmat <- OTUmat[, order(sort.order )]
Metadata <- read.csv(paste0(data_path, "Shivta_metadata.csv"), row.names = 1, header = TRUE)
read_csv(paste0(data_path, "Shivta_metadata.csv"),
trim_ws = TRUE) %>%
mutate_at(
c(
"Rock.type",
"Location"
),
~(factor(.))
) %>%
column_to_rownames("Sample.code") ->
Metadata
row.names(OTUmat) <- gsub("(.*)Nimrod[0-9]+|Osnat[0-9]+", "\\1", row.names( OTUmat))
Metadata <- Metadata[order(row.names(Metadata)), ]
OTUmat <- OTUmat[order(row.names(OTUmat)), ]
# calculate sample size
Metadata$Library.size = rowSums(OTUmat)
Metadata$Location.rock <- with(Metadata, Location:Rock.type)
# Load taxonomy data
tax.file <- "Shivta_site_silva.nrv119.taxonomy"
Taxonomy <- read.table(paste0(data_path, tax.file), stringsAsFactors = FALSE) # read taxonomy file
# count how many ';' in each cell and add up to 6
for (i in 1:nrow(Taxonomy)){
semicolons <- length(gregexpr(";", Taxonomy$V2[i] )[[1]])
if (semicolons < 6){
x <- paste0( rep("Unclassified;", 6 - semicolons ), collapse = "")
Taxonomy$V2[i] <- paste0( Taxonomy$V2[i], x, sep = "")
}
}
# split taxonomy to columns
do.call( "rbind", strsplit( Taxonomy$V1, ";", fixed = TRUE)) %>%
gsub( "size=([0-9]+)", "\\1", .) %>%
data.frame( ., do.call( "rbind", strsplit( Taxonomy$V2, ";", fixed = TRUE)), stringsAsFactors = F) %>%
apply(., 2, function(x) gsub( "\\(.*\\)", "", x)) %>%
replace(., . == "unclassified", "Unclassified") ->
Taxonomy
colnames( Taxonomy ) <- c( "OTU", "Frequency", "Domain", "Phylum", "Class", "Order", "Family", "Genus" )
# rownames(Taxonomy) <- colnames(Rock_weathering_OTUmat)
rownames(Taxonomy) <- Taxonomy[, 1]
Tree_IQ <- read_tree(paste0(data_path, "Shivta_site_otuReps.filtered.align.treefile"))
# generate phyloseq object
Ps_obj <- phyloseq(otu_table(OTUmat, taxa_are_rows = FALSE),
tax_table(Taxonomy[, -c(1, 2)]),
sample_data(Metadata),
phy_tree(Tree_IQ)
)
# Reorder factors for plotting
sample_data(Ps_obj)$Location %<>% fct_relevel("Slope", "City")
Remove un- and mis-classified sequences, chloroplasts and mitochondria
domains2remove <- c("", "Archaea", "Eukaryota", "Unclassified")
classes2remove <- c("Chloroplast")
families2remove <- c("Mitochondria")
Ps_obj_filt <- subset_taxa(Ps_obj, !is.na(Phylum) &
!Domain %in% domains2remove &
!Class %in% classes2remove &
!Family %in% families2remove)
Ps_obj_df <-
as.data.frame(sample_data(Ps_obj_filt)) # Put sample_data into a ggplot-friendly data.frame
Ps_obj_df <- Ps_obj_df[order(Ps_obj_df$Library.size), ]
Ps_obj_df$Index <- seq(nrow(Ps_obj_df))
ggplot(data = Ps_obj_df,
aes(x = Index, y = Library.size, color = Location.rock)) +
geom_point2(size = 4) +
scale_colour_manual(values = ggpomological:::pomological_palette[c(2, 1, 9, 3)], name = "Location.rock")
summary(sample_sums(Ps_obj_filt))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 23966 44989 53000 52663 61348 77459
summary(taxa_sums(Ps_obj_filt))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3 70 198 1852 771 76180
prevdf <- apply(X = otu_table(Ps_obj_filt),
MARGIN = ifelse(taxa_are_rows(Ps_obj_filt), yes = 1, no = 2),
FUN = function(x){sum(x > 0)})
# Add taxonomy and total read counts to this data.frame
prevdf <- data.frame(Prevalence = prevdf,
TotalAbundance = taxa_sums(Ps_obj_filt),
tax_table(Ps_obj_filt))
prevdf %>%
group_by(Phylum) %>%
summarise(`Mean prevalence` = mean(Prevalence),
`Sum prevalence` = sum(Prevalence)) ->
Prevalence_phylum_summary
Prevalence_phylum_summary %>%
kable(., digits = c(0, 1, 0)) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F)
Phylum | Mean prevalence | Sum prevalence |
---|---|---|
Acidobacteria | 11.7 | 152 |
Actinobacteria | 16.3 | 3414 |
Aquificae | 4.0 | 12 |
Armatimonadetes | 7.6 | 76 |
Bacteroidetes | 11.8 | 1027 |
Candidate\_division\_KB1 | 8.0 | 8 |
Candidate\_division\_OD1 | 4.0 | 4 |
Candidate\_division\_OP11 | 4.5 | 9 |
Candidate\_division\_TM7 | 7.9 | 158 |
Chloroflexi | 11.0 | 1124 |
Cyanobacteria | 14.9 | 506 |
Deinococcus-Thermus | 15.8 | 174 |
Firmicutes | 13.4 | 214 |
Gemmatimonadetes | 11.2 | 617 |
Nitrospirae | 6.0 | 12 |
Planctomycetes | 10.8 | 140 |
Proteobacteria | 14.4 | 1817 |
Tenericutes | 3.0 | 3 |
Verrucomicrobia | 23.0 | 23 |
WCHB1-60 | 7.0 | 28 |
prevdf %>%
group_by(Order) %>%
summarise(`Mean prevalence` = mean(Prevalence),
`Sum prevalence` = sum(Prevalence)) ->
Prevalence_Order_summary
Prevalence_Order_summary %>%
kable(., digits = c(0, 1, 0)) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F)
Order | Mean prevalence | Sum prevalence |
---|---|---|
AKIW781 | 9.4 | 198 |
AKYG1722 | 10.7 | 75 |
AT425-EubC11\_terrestrial\_group | 10.2 | 378 |
Acidimicrobiales | 14.7 | 368 |
Alteromonadales | 12.0 | 12 |
Aquificales | 4.0 | 12 |
Ardenticatenales | 8.0 | 16 |
B103G10 | 7.0 | 7 |
BD2-11\_terrestrial\_group | 5.0 | 5 |
Bacillales | 13.7 | 164 |
Bacteroidales | 17.0 | 17 |
Bdellovibrionales | 8.0 | 40 |
Burkholderiales | 18.5 | 204 |
C0119 | 5.0 | 5 |
Caldilineales | 18.0 | 18 |
Campylobacterales | 6.0 | 6 |
Caulobacterales | 23.7 | 71 |
Chromatiales | 20.0 | 20 |
Chthoniobacterales | 23.0 | 23 |
Clostridiales | 6.5 | 13 |
Corynebacteriales | 8.0 | 40 |
Cytophagales | 11.8 | 695 |
Deinococcales | 15.1 | 151 |
Desulfovibrionales | 5.0 | 5 |
E6aD10 | 11.0 | 11 |
EMP-G18 | 3.0 | 3 |
Enterobacteriales | 22.0 | 44 |
Euzebyales | 13.5 | 202 |
Flavobacteriales | 12.7 | 38 |
Frankiales | 20.9 | 356 |
Gaiellales | 10.4 | 94 |
Gemmatimonadales | 14.3 | 214 |
JG30-KF-CM45 | 13.0 | 468 |
Kineosporiales | 20.2 | 121 |
Lactobacillales | 18.5 | 37 |
Micrococcales | 18.0 | 198 |
Micromonosporales | 19.5 | 39 |
Myxococcales | 10.2 | 51 |
Nitriliruptorales | 9.7 | 58 |
Nitrosomonadales | 11.5 | 23 |
Nitrospirales | 6.0 | 12 |
Oceanospirillales | 16.0 | 16 |
Orbales | 7.0 | 7 |
Order\_II | 4.8 | 24 |
Order\_III | 8.7 | 26 |
PAUC43f\_marine\_benthic\_group | 2.0 | 2 |
Planctomycetales | 9.2 | 74 |
Propionibacteriales | 14.5 | 318 |
Pseudomonadales | 14.7 | 103 |
Pseudonocardiales | 18.0 | 306 |
Rhizobiales | 16.1 | 386 |
Rhodobacterales | 14.8 | 163 |
Rhodospirillales | 11.8 | 271 |
Rickettsiales | 11.2 | 146 |
Rubrobacterales | 22.3 | 491 |
S0134\_terrestrial\_group | 18.0 | 18 |
SBYG-4553 | 10.0 | 10 |
Solirubrobacterales | 17.2 | 638 |
Sphaerobacterales | 3.3 | 10 |
Sphingobacteriales | 14.2 | 227 |
Sphingomonadales | 19.6 | 196 |
Streptomycetales | 24.5 | 49 |
Streptosporangiales | 3.0 | 3 |
Subgroup\_3 | 15.0 | 15 |
Subgroup\_4 | 10.6 | 85 |
Subgroup\_6 | 13.0 | 52 |
SubsectionI | 13.0 | 13 |
SubsectionII | 15.8 | 315 |
SubsectionIII | 14.8 | 118 |
SubsectionIV | 13.5 | 54 |
TRA3-20 | 14.0 | 14 |
Thermales | 23.0 | 23 |
Thermophilales | 15.0 | 30 |
Unclassified | 9.2 | 642 |
Unknown\_Order | 9.8 | 98 |
Vampirovibrionales | 6.0 | 6 |
WD2101\_soil\_group | 13.0 | 39 |
Xanthomonadales | 18.0 | 18 |
Based on that we will remove all phyla with a prevalence of under 8
Prevalence_phylum_summary %>%
filter(`Sum prevalence` < 8) %>%
dplyr::select(Phylum) %>%
map(as.character) %>%
unlist() ->
filterPhyla
Ps_obj_filt %<>% subset_taxa(!Phylum %in% filterPhyla)
sample_data(Ps_obj_filt)$Library.size <- rowSums(otu_table(Ps_obj_filt))
print(Ps_obj)
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 767 taxa and 25 samples ]
## sample_data() Sample Data: [ 25 samples by 6 sample variables ]
## tax_table() Taxonomy Table: [ 767 taxa by 6 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 767 tips and 765 internal nodes ]
## taxa are columns
print(Ps_obj_filt)
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 709 taxa and 25 samples ]
## sample_data() Sample Data: [ 25 samples by 6 sample variables ]
## tax_table() Taxonomy Table: [ 709 taxa by 6 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 709 tips and 707 internal nodes ]
## taxa are columns
# Subset to the remaining phyla
prevdf_phylum_filt <- subset(prevdf, Phylum %in% get_taxa_unique(Ps_obj_filt, "Phylum"))
ggplot(prevdf_phylum_filt,
aes(TotalAbundance, Prevalence / nsamples(Ps_obj_filt), color = Phylum)) +
# Include a guess for parameter
geom_hline(yintercept = 0.05,
alpha = 0.5,
linetype = 2) + geom_point2(size = 2, alpha = 0.7) +
scale_x_log10() + xlab("Total Abundance") + ylab("Prevalence [Frac. Samples]") +
facet_wrap( ~ Phylum) + theme(legend.position = "none")
# Subset to the remaining phyla
prevdf_order_filt <- subset(prevdf, Order %in% get_taxa_unique(Ps_obj_filt, "Order"))
# grab the top 30 most abundant orders
prevdf_order_filt %>%
group_by(Order) %>%
summarise(Combined.abundance = sum(TotalAbundance)) %>%
arrange(desc(Combined.abundance)) %>%
.[1:30, "Order"] ->
Orders2plot
prevdf_order_filt2 <- subset(prevdf, Order %in% Orders2plot$Order)
ggplot(prevdf_order_filt2,
aes(TotalAbundance, Prevalence / nsamples(Ps_obj_filt), color = Order)) +
# Include a guess for parameter
geom_hline(yintercept = 0.05,
alpha = 0.5,
linetype = 2) + geom_point2(size = 2, alpha = 0.7) +
scale_x_log10() + xlab("Total Abundance") + ylab("Prevalence [Frac. Samples]") +
facet_wrap( ~ Order) + theme(legend.position = "none")
We will remove all sequences which appear in less than 10% of the samples
# Define prevalence threshold as 10% of total samples
prevalenceThreshold <- 0.1 * nsamples(Ps_obj_filt)
prevalenceThreshold
## [1] 2.5
# Execute prevalence filter, using `prune_taxa()` function
keepTaxa <-
row.names(prevdf_phylum_filt)[(prevdf_phylum_filt$Prevalence >= prevalenceThreshold)]
Ps_obj_filt %<>% prune_taxa(keepTaxa, .)
sample_data(Ps_obj_filt)$Library.size <- rowSums(otu_table(Ps_obj_filt))
print(Ps_obj)
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 767 taxa and 25 samples ]
## sample_data() Sample Data: [ 25 samples by 6 sample variables ]
## tax_table() Taxonomy Table: [ 767 taxa by 6 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 767 tips and 765 internal nodes ]
## taxa are columns
print(Ps_obj_filt)
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 694 taxa and 25 samples ]
## sample_data() Sample Data: [ 25 samples by 6 sample variables ]
## tax_table() Taxonomy Table: [ 694 taxa by 6 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 694 tips and 692 internal nodes ]
## taxa are columns
This removed 73 or 10% of the sequences.
First let’s look at the count data distribution after filtering:
PlotLibDist(Ps_obj_filt)
sample_data(Ps_obj_filt) %>%
as_tibble() %>%
dplyr::select(Sample.name, Library.size) %>%
as(., "data.frame") %>%
kable(.) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F)
Sample.name | Library.size |
---|---|
City Chalk 2 | 49453 |
City Chalk 3 | 54155 |
City Chalk 4 | 44943 |
City Chalk 5 | 58835 |
City Chalk 6 | 77459 |
City Limestone 1 | 36122 |
City Limestone 2 | 52816 |
City Limestone 3 | 48378 |
City Limestone 4 | 47108 |
City Limestone 5 | 61346 |
City Limestone 6 | 54505 |
Slope Limestone 1 | 75578 |
Slope Chalk 1 | 51651 |
Slope Chalk 2 | 45197 |
Slope Chalk 3 | 33625 |
Slope Chalk 4 | 70886 |
Slope Chalk 5 | 71458 |
Slope Chalk 6 | 58959 |
Slope Chalk 7 | 36756 |
City Chalk 1 | 23962 |
Slope Limestone 2 | 62456 |
Slope Limestone 3 | 36628 |
Slope Limestone 4 | 57283 |
Slope Limestone 5 | 64509 |
Slope Limestone 6 | 41564 |
The figure and table indicate only a small deviation in the number of reads per samples.
(mod1 <- adonis(
otu_table(Ps_obj_filt) ~ Library.size,
data = as(sample_data(Ps_obj_filt), "data.frame"),
method = "horn",
permutations = 9999
))
##
## Call:
## adonis(formula = otu_table(Ps_obj_filt) ~ Library.size, data = as(sample_data(Ps_obj_filt), "data.frame"), permutations = 9999, method = "horn")
##
## Permutation: free
## Number of permutations: 9999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Library.size 1 0.4246 0.42456 1.3337 0.05481 0.2192
## Residuals 23 7.3218 0.31834 0.94519
## Total 24 7.7464 1.00000
PlotReadHist(as(otu_table(Ps_obj_filt), "matrix"))
notAllZero <- (rowSums(t(otu_table(Ps_obj_filt))) > 0)
vsn::meanSdPlot(as.matrix(log2(t(otu_table(Ps_obj_filt))[notAllZero, ] + 1)))
The difference in library sizes is low and its effect on the community composition is minimal. We’ll use the GMPR method for library size normalisation (Chen and Chen 2017)
Ps_obj_filt_GMPR <- Ps_obj_filt
Ps_obj_filt %>%
otu_table(.) %>%
t() %>%
as(., "matrix") %>%
GMPR() ->
GMPR_factors
## Begin GMPR size factor calculation ...
## Completed!
## Please watch for the samples with limited sharing with other samples based on NSS! They may be outliers!
Ps_obj_filt %>%
otu_table(.) %>%
t() %*% diag(1 / GMPR_factors$gmpr) %>%
t() %>%
as.data.frame(., row.names = sample_names(Ps_obj_filt)) %>%
otu_table(., taxa_are_rows = FALSE) ->
otu_table(Ps_obj_filt_GMPR)
sample_data(Ps_obj_filt_GMPR)$Library.size <- sample_sums(Ps_obj_filt)
adonis(
otu_table(Ps_obj_filt_GMPR) ~ Library.size,
data = as(sample_data(Ps_obj_filt_GMPR), "data.frame"),
method = "horn",
permutations = 9999
)
##
## Call:
## adonis(formula = otu_table(Ps_obj_filt_GMPR) ~ Library.size, data = as(sample_data(Ps_obj_filt_GMPR), "data.frame"), permutations = 9999, method = "horn")
##
## Permutation: free
## Number of permutations: 9999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Library.size 1 0.4246 0.42456 1.3337 0.05481 0.228
## Residuals 23 7.3218 0.31834 0.94519
## Total 24 7.7464 1.00000
PlotLibDist(Ps_obj_filt_GMPR)
Did it improve anything?
PlotReadHist(as(otu_table(Ps_obj_filt_GMPR), "matrix"))
notAllZero <- (rowSums(t(otu_table(Ps_obj_filt_GMPR))) > 0)
vsn::meanSdPlot(as.matrix(log2(t(otu_table(Ps_obj_filt_GMPR))[notAllZero, ] + 1)))
We do that by simulating 1000 rarefaction events and calculating the metrics each time. Then, the result is averaged.
rarefaction.mat <- matrix(0, nrow = nsamples(Ps_obj_filt), ncol = bootstraps)
rownames(rarefaction.mat) <- sample_names(Ps_obj_filt)
rich.ests <- list(S.obs = rarefaction.mat, S.chao1 = rarefaction.mat, se.chao1 = rarefaction.mat,
S.ACE = rarefaction.mat, se.ACE = rarefaction.mat)
for (i in seq(bootstraps)) {
sub.OTUmat <- rrarefy(otu_table(Ps_obj_filt), min(rowSums(otu_table(Ps_obj_filt))))
for (j in seq(length(rich.ests))) {
rich.ests[[j]][, i] <- t(estimateR(sub.OTUmat))[, j]
}
}
Richness <- data.frame(row.names = row.names(rich.ests[[1]]))
for (i in c(1, seq(2, length(rich.ests), 2))) {
S <- apply(rich.ests[[i]], 1, mean)
if (i == 1) {
se <- apply(rich.ests[[i]], 1, function(x) (mean(x)/sqrt(length(x))))
} else se <- apply(rich.ests[[i + 1]], 1, mean)
Richness <- cbind(Richness, S, se)
}
colnames(Richness) <- c("S.obs", "S.obs.se", "S.chao1", "S.chao1.se", "S.ACE", "S.ACE.se")
saveRDS(Richness, file = paste0("./Results/", Proj_name, "_richness.RDS"))
write.csv(Richness, file = paste0("./Results/", Proj_name, "_richness.csv"))
ses <- grep("\\.se", colnames(Richness))
Richness[, ses] %>%
gather(key = "est.se") -> se.dat
Richness[, -unique(ses)] %>%
gather(key = "est") -> mean.dat
n <- length(unique(mean.dat$est))
# diversity indices
diversity.inds <- list(Shannon = rarefaction.mat, inv.simpson = rarefaction.mat, BP = rarefaction.mat)
for (i in seq(bootstraps)) {
sub.OTUmat <- rrarefy(otu_table(Ps_obj_filt), min(rowSums(otu_table(Ps_obj_filt))))
diversity.inds$Shannon[, i] <- diversityresult(sub.OTUmat, index = 'Shannon', method = 'each site', digits = 3)[, 1]
diversity.inds$inv.simpson[, i] <- diversityresult(sub.OTUmat, index = 'inverseSimpson', method = 'each site', digits = 3)[, 1]
diversity.inds$BP[, i] <- diversityresult(sub.OTUmat, index = 'Berger', method = 'each site', digits = 3)[, 1]
}
Diversity <- data.frame(row.names = row.names(diversity.inds[[1]]))
for (i in seq(length(diversity.inds))) {
S <- apply(diversity.inds[[i]], 1, mean)
se <- apply(diversity.inds[[i]], 1, function(x) (mean(x)/sqrt(length(x))))
Diversity <- cbind(Diversity, S, se)
}
colnames(Diversity) <- c("Shannon", "Shannon.se", "Inv.simpson", "Inv.simpson.se", "BP", "BP.se")
ses <- grep("\\.se", colnames(Diversity))
Diversity[, ses] %>% gather(key = "est.se") -> se.dat
Diversity[, -unique(ses)] %>% gather(key = "est") -> mean.dat
saveRDS(Diversity, file = paste0("./Results/", Proj_name, "_diversity.RDS"))
write.csv(Diversity, file = paste0("./Results/", Proj_name, "_diversity.csv"))
# make combined richness diversity
Richness_Diversity <- cbind(Richness, Diversity)
ses <- grep("\\.se", colnames(Richness_Diversity))
Richness_Diversity[, ses] %>%
gather(key = "est.se") ->
se.dat
Richness_Diversity[, -unique(ses)] %>%
gather(key = "Metric",
value = "Estimate") ->
mean.dat
Richness_Diversity_long <-
cbind(
Sample = rep(rownames(Richness_Diversity), times = length(unique(mean.dat$Metric))),
mean.dat,
lerr = mean.dat$Estimate - se.dat$value,
herr = mean.dat$Estimate + se.dat$value
)
Richness_Diversity_long$Metric <-
factor(
Richness_Diversity_long$Metric,
levels = c("S.obs", "S.chao1", "S.ACE", "Shannon", "Inv.simpson", "BP"),
labels = c("S obs.", "Chao1", "ACE", "Shannon", "Inv. Simpson" , "Berger Parker")
)
Richness_Diversity_long %<>%
cbind(.,
sample_data(Ps_obj_filt))
# S Obs
(mod_obsS <- TestAlphaV3(filter(Richness_Diversity_long, Metric == "S obs.")))
## Call:
## aov(formula = as.formula(paste(response_name, paste(factor_names[1],
## factor_names[2], sep = " * "), sep = " ~ ")), data = data2test)
##
## Terms:
## Location Rock.type Location:Rock.type Residuals
## Sum of Squares 67.75 647.17 10123.07 62200.46
## Deg. of Freedom 1 1 1 21
##
## Residual standard error: 54.42358
## Estimated effects may be unbalanced
## [1] "Unequal group sizes - showing SS type III"
## Anova Table (Type III tests)
##
## Response: Estimate
## Sum Sq Df F value Pr(>F)
## (Intercept) 588166 1 198.5755 3.566e-12 ***
## Location 3993 1 1.3480 0.25866
## Rock.type 7767 1 2.6224 0.12028
## Location:Rock.type 10123 1 3.4177 0.07863 .
## Residuals 62200 21
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tables of means
## Grand mean
##
## 310.9173
##
## Location
## Slope City
## 312 309
## rep 13 12
##
## Rock.type
## Chalk Limestone
## 306 316
## rep 13 12
##
## Location:Rock.type
## Rock.type
## Location Chalk Limestone
## Slope 290 339
## rep 7 6
## City 325 293
## rep 6 6
## Call:
## aov(formula = as.formula(paste(response_name, paste(factor_names[1],
## factor_names[2], sep = " * "), sep = " ~ ")), data = data2test)
##
## Terms:
## Location Rock.type Location:Rock.type Residuals
## Sum of Squares 67.75 647.17 10123.07 62200.46
## Deg. of Freedom 1 1 1 21
##
## Residual standard error: 54.42358
## Estimated effects may be unbalanced
# Post-hoc test
marginal <- emmeans(mod_obsS,
~ Location : Rock.type)
summary(marginal)
Location | Rock.type | emmean | SE | df | lower.CL | upper.CL |
---|---|---|---|---|---|---|
Slope | Chalk | 289.8684 | 20.57018 | 21 | 247.0904 | 332.6465 |
City | Chalk | 325.0227 | 22.21834 | 21 | 278.8171 | 371.2282 |
Slope | Limestone | 338.9012 | 22.21834 | 21 | 292.6956 | 385.1067 |
City | Limestone | 293.3852 | 22.21834 | 21 | 247.1796 | 339.5907 |
contrast(marginal,
method = "pairwise",
adjust = "tukey")
## contrast estimate SE df t.ratio p.value
## Slope Chalk - City Chalk -35.15 30.3 21 -1.161 0.6571
## Slope Chalk - Slope Limestone -49.03 30.3 21 -1.619 0.3897
## Slope Chalk - City Limestone -3.52 30.3 21 -0.116 0.9994
## City Chalk - Slope Limestone -13.88 31.4 21 -0.442 0.9705
## City Chalk - City Limestone 31.64 31.4 21 1.007 0.7472
## Slope Limestone - City Limestone 45.52 31.4 21 1.449 0.4847
##
## P value adjustment: tukey method for comparing a family of 4 estimates
(obsS_pairwise <- cld(marginal,
alpha = 0.05,
Letters = letters,
adjust = "tukey")) # works with lm but not with two-factor ART
Location | Rock.type | emmean | SE | df | lower.CL | upper.CL | .group | |
---|---|---|---|---|---|---|---|---|
1 | Slope | Chalk | 289.8684 | 20.57018 | 21 | 233.8552 | 345.8817 | a |
4 | City | Limestone | 293.3852 | 22.21834 | 21 | 232.8840 | 353.8864 | a |
2 | City | Chalk | 325.0227 | 22.21834 | 21 | 264.5215 | 385.5239 | a |
3 | Slope | Limestone | 338.9012 | 22.21834 | 21 | 278.4000 | 399.4024 | a |
(mod_obsS %>%
anova() %>%
mutate(`Part Eta Sq`=`Sum Sq`/sum(`Sum Sq`) ) ->
mod_obsS_ANOVA)
Df | Sum Sq | Mean Sq | F value | Pr(>F) | Part Eta Sq | |
---|---|---|---|---|---|---|
Location | 1 | 67.7481 | 67.7481 | 0.0228730 | 0.8812313 | 0.0009276 |
Rock.type | 1 | 647.1708 | 647.1708 | 0.2184966 | 0.6450019 | 0.0088607 |
Location:Rock.type | 1 | 10123.0692 | 10123.0692 | 3.4177313 | 0.0786297 | 0.1385992 |
Residuals | 21 | 62200.4587 | 2961.9266 | NA | NA | 0.8516126 |
# pwpp(marginal) # Pairwise P-value plot. Fails for unbalanced design
emmip(mod_obsS, Location ~ Rock.type)
# summary(as.glht(pairs(marginal))) # fails because of unbalanced design
# Shannon
(mod_Shannon <- TestAlphaV3(filter(Richness_Diversity_long, Metric == "Shannon")))
## Call:
## aov(formula = as.formula(paste(response_name, paste(factor_names[1],
## factor_names[2], sep = " * "), sep = " ~ ")), data = data2test)
##
## Terms:
## Location Rock.type Location:Rock.type Residuals
## Sum of Squares 1.325063 0.093952 0.262865 4.794581
## Deg. of Freedom 1 1 1 21
##
## Residual standard error: 0.4778215
## Estimated effects may be unbalanced
## [1] "Unequal group sizes - showing SS type III"
## Anova Table (Type III tests)
##
## Response: Estimate
## Sum Sq Df F value Pr(>F)
## (Intercept) 308.927 1 1353.0847 < 2e-16 ***
## Location 1.393 1 6.1011 0.02216 *
## Rock.type 0.083 1 0.3616 0.55407
## Location:Rock.type 0.263 1 1.1513 0.29545
## Residuals 4.795 21
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tables of means
## Grand mean
##
## 3.526152
##
## Location
## Slope City
## 3.75 3.29
## rep 13.00 12.00
##
## Rock.type
## Chalk Limestone
## 3.47 3.59
## rep 13.00 12.00
##
## Location:Rock.type
## Rock.type
## Location Chalk Limestone
## Slope 3.60 3.92
## rep 7.00 6.00
## City 3.33 3.24
## rep 6.00 6.00
## Call:
## aov(formula = as.formula(paste(response_name, paste(factor_names[1],
## factor_names[2], sep = " * "), sep = " ~ ")), data = data2test)
##
## Terms:
## Location Rock.type Location:Rock.type Residuals
## Sum of Squares 1.325063 0.093952 0.262865 4.794581
## Deg. of Freedom 1 1 1 21
##
## Residual standard error: 0.4778215
## Estimated effects may be unbalanced
# Post-hoc test
marginal <- emmeans(mod_Shannon,
~ Location : Rock.type)
summary(marginal)
Location | Rock.type | emmean | SE | df | lower.CL | upper.CL |
---|---|---|---|---|---|---|
Slope | Chalk | 3.599318 | 0.1805996 | 21 | 3.223740 | 3.974895 |
City | Chalk | 3.331707 | 0.1950698 | 21 | 2.926037 | 3.737377 |
Slope | Limestone | 3.920039 | 0.1950698 | 21 | 3.514369 | 4.325709 |
City | Limestone | 3.241351 | 0.1950698 | 21 | 2.835681 | 3.647020 |
contrast(marginal,
method = "pairwise",
adjust = "tukey")
## contrast estimate SE df t.ratio p.value
## Slope Chalk - City Chalk 0.2676 0.266 21 1.007 0.7473
## Slope Chalk - Slope Limestone -0.3207 0.266 21 -1.206 0.6297
## Slope Chalk - City Limestone 0.3580 0.266 21 1.347 0.5450
## City Chalk - Slope Limestone -0.5883 0.276 21 -2.133 0.1754
## City Chalk - City Limestone 0.0904 0.276 21 0.328 0.9875
## Slope Limestone - City Limestone 0.6787 0.276 21 2.460 0.0962
##
## P value adjustment: tukey method for comparing a family of 4 estimates
(Shannon_pairwise <- cld(marginal,
alpha = 0.05,
Letters = letters,
adjust = "tukey")) # works with lm but not with two-factor ART
Location | Rock.type | emmean | SE | df | lower.CL | upper.CL | .group | |
---|---|---|---|---|---|---|---|---|
4 | City | Limestone | 3.241351 | 0.1950698 | 21 | 2.710170 | 3.772532 | a |
2 | City | Chalk | 3.331707 | 0.1950698 | 21 | 2.800526 | 3.862888 | a |
1 | Slope | Chalk | 3.599318 | 0.1805996 | 21 | 3.107540 | 4.091096 | a |
3 | Slope | Limestone | 3.920039 | 0.1950698 | 21 | 3.388858 | 4.451220 | a |
(mod_Shannon %>%
anova() %>%
mutate(`Part Eta Sq`=`Sum Sq`/sum(`Sum Sq`) ) ->
mod_Shannon_ANOVA)
Df | Sum Sq | Mean Sq | F value | Pr(>F) | Part Eta Sq | |
---|---|---|---|---|---|---|
Location | 1 | 1.3250630 | 1.3250630 | 5.8037028 | 0.0252549 | 0.2045968 |
Rock.type | 1 | 0.0939515 | 0.0939515 | 0.4115025 | 0.5281480 | 0.0145066 |
Location:Rock.type | 1 | 0.2628651 | 0.2628651 | 1.1513345 | 0.2954460 | 0.0405878 |
Residuals | 21 | 4.7945811 | 0.2283134 | NA | NA | 0.7403088 |
# pwpp(marginal) # Pairwise P-value plot. Fails for unbalanced design
emmip(mod_Shannon, Location ~ Rock.type)
# ACE
(mod_ACE <- TestAlphaV3(filter(Richness_Diversity_long, Metric == "ACE")))
## Call:
## aov(formula = as.formula(paste(response_name, paste(factor_names[1],
## factor_names[2], sep = " * "), sep = " ~ ")), data = data2test)
##
## Terms:
## Location Rock.type Location:Rock.type Residuals
## Sum of Squares 10.12 1969.32 8755.53 68809.75
## Deg. of Freedom 1 1 1 21
##
## Residual standard error: 57.24207
## Estimated effects may be unbalanced
## [1] "Unequal group sizes - showing SS type III"
## Anova Table (Type III tests)
##
## Response: Estimate
## Sum Sq Df F value Pr(>F)
## (Intercept) 4281315 1 1306.6116 <2e-16 ***
## Location 4 1 0.0012 0.9725
## Rock.type 1671 1 0.5100 0.4830
## Location:Rock.type 8756 1 2.6721 0.1170
## Residuals 68810 21
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tables of means
## Grand mean
##
## 413.6874
##
## Location
## Slope City
## 413 414
## rep 13 12
##
## Rock.type
## Chalk Limestone
## 405 423
## rep 13 12
##
## Location:Rock.type
## Rock.type
## Location Chalk Limestone
## Slope 388 442
## rep 7 6
## City 425 404
## rep 6 6
## Call:
## aov(formula = as.formula(paste(response_name, paste(factor_names[1],
## factor_names[2], sep = " * "), sep = " ~ ")), data = data2test)
##
## Terms:
## Location Rock.type Location:Rock.type Residuals
## Sum of Squares 10.12 1969.32 8755.53 68809.75
## Deg. of Freedom 1 1 1 21
##
## Residual standard error: 57.24207
## Estimated effects may be unbalanced
# Post-hoc test
marginal <- emmeans(mod_ACE,
~ Location : Rock.type)
summary(marginal)
Location | Rock.type | emmean | SE | df | lower.CL | upper.CL |
---|---|---|---|---|---|---|
Slope | Chalk | 388.1990 | 21.63547 | 21 | 343.2056 | 433.1924 |
City | Chalk | 424.9111 | 23.36898 | 21 | 376.3126 | 473.5095 |
Slope | Limestone | 442.0997 | 23.36898 | 21 | 393.5012 | 490.6981 |
City | Limestone | 403.7881 | 23.36898 | 21 | 355.1896 | 452.3865 |
contrast(marginal,
method = "pairwise",
adjust = "tukey")
## contrast estimate SE df t.ratio p.value
## Slope Chalk - City Chalk -36.7 31.8 21 -1.153 0.6620
## Slope Chalk - Slope Limestone -53.9 31.8 21 -1.693 0.3522
## Slope Chalk - City Limestone -15.6 31.8 21 -0.490 0.9606
## City Chalk - Slope Limestone -17.2 33.0 21 -0.520 0.9533
## City Chalk - City Limestone 21.1 33.0 21 0.639 0.9181
## Slope Limestone - City Limestone 38.3 33.0 21 1.159 0.6581
##
## P value adjustment: tukey method for comparing a family of 4 estimates
(ACE_pairwise <- cld(marginal,
alpha = 0.05,
Letters = letters,
adjust = "tukey")) # works with lm but not with two-factor ART
Location | Rock.type | emmean | SE | df | lower.CL | upper.CL | .group | |
---|---|---|---|---|---|---|---|---|
1 | Slope | Chalk | 388.1990 | 21.63547 | 21 | 329.2849 | 447.1130 | a |
4 | City | Limestone | 403.7881 | 23.36898 | 21 | 340.1536 | 467.4225 | a |
2 | City | Chalk | 424.9111 | 23.36898 | 21 | 361.2766 | 488.5455 | a |
3 | Slope | Limestone | 442.0997 | 23.36898 | 21 | 378.4653 | 505.7341 | a |
(mod_ACE %>%
anova() %>%
mutate(`Part Eta Sq`=`Sum Sq`/sum(`Sum Sq`) ) ->
mod_ACE_ANOVA)
Df | Sum Sq | Mean Sq | F value | Pr(>F) | Part Eta Sq | |
---|---|---|---|---|---|---|
Location | 1 | 10.11758 | 10.11758 | 0.0030878 | 0.9562114 | 0.0001272 |
Rock.type | 1 | 1969.32310 | 1969.32310 | 0.6010164 | 0.4468331 | 0.0247574 |
Location:Rock.type | 1 | 8755.52902 | 8755.52902 | 2.6720940 | 0.1170242 | 0.1100705 |
Residuals | 21 | 68809.74549 | 3276.65455 | NA | NA | 0.8650448 |
# pwpp(marginal) # Pairwise P-value plot. Fails for unbalanced design
emmip(mod_ACE, Location ~ Rock.type)
# summary(as.glht(pairs(marginal))) # fails because of unbalanced design
#Inv. Simpson
(mod_InvSim <- TestAlphaV3(filter(Richness_Diversity_long, Metric == "Inv. Simpson")))
## Call:
## aov(formula = as.formula(paste(response_name, paste(factor_names[1],
## factor_names[2], sep = " * "), sep = " ~ ")), data = data2test)
##
## Terms:
## Location Rock.type Location:Rock.type Residuals
## Sum of Squares 507.6609 107.4251 117.0279 1059.8590
## Deg. of Freedom 1 1 1 21
##
## Residual standard error: 7.104187
## Estimated effects may be unbalanced
## [1] "Unequal group sizes - showing SS type III"
## Anova Table (Type III tests)
##
## Response: Estimate
## Sum Sq Df F value Pr(>F)
## (Intercept) 7346.2 1 145.5569 6.594e-11 ***
## Location 542.8 1 10.7552 0.003577 **
## Rock.type 99.1 1 1.9643 0.175658
## Location:Rock.type 117.0 1 2.3188 0.142737
## Residuals 1059.9 21
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tables of means
## Grand mean
##
## 17.20043
##
## Location
## Slope City
## 21.5 12.5
## rep 13.0 12.0
##
## Rock.type
## Chalk Limestone
## 15.2 19.4
## rep 13.0 12.0
##
## Location:Rock.type
## Rock.type
## Location Chalk Limestone
## Slope 17.69 26.01
## rep 7.00 6.00
## City 12.68 12.34
## rep 6.00 6.00
## Call:
## aov(formula = as.formula(paste(response_name, paste(factor_names[1],
## factor_names[2], sep = " * "), sep = " ~ ")), data = data2test)
##
## Terms:
## Location Rock.type Location:Rock.type Residuals
## Sum of Squares 507.6609 107.4251 117.0279 1059.8590
## Deg. of Freedom 1 1 1 21
##
## Residual standard error: 7.104187
## Estimated effects may be unbalanced
# Post-hoc test
marginal <- emmeans(mod_InvSim,
~ Location : Rock.type)
summary(marginal)
Location | Rock.type | emmean | SE | df | lower.CL | upper.CL |
---|---|---|---|---|---|---|
Slope | Chalk | 17.68601 | 2.685130 | 21 | 12.101979 | 23.27005 |
City | Chalk | 12.68276 | 2.900272 | 21 | 6.651317 | 18.71421 |
Slope | Limestone | 26.01445 | 2.900272 | 21 | 19.983005 | 32.04590 |
City | Limestone | 12.33755 | 2.900272 | 21 | 6.306105 | 18.36900 |
contrast(marginal,
method = "pairwise",
adjust = "tukey")
## contrast estimate SE df t.ratio p.value
## Slope Chalk - City Chalk 5.003 3.95 21 1.266 0.5937
## Slope Chalk - Slope Limestone -8.328 3.95 21 -2.107 0.1834
## Slope Chalk - City Limestone 5.348 3.95 21 1.353 0.5410
## City Chalk - Slope Limestone -13.332 4.10 21 -3.250 0.0185
## City Chalk - City Limestone 0.345 4.10 21 0.084 0.9998
## Slope Limestone - City Limestone 13.677 4.10 21 3.335 0.0154
##
## P value adjustment: tukey method for comparing a family of 4 estimates
(InvSim_pairwise <- cld(marginal,
alpha = 0.05,
Letters = letters,
adjust = "tukey")) # works with lm but not with two-factor ART
Location | Rock.type | emmean | SE | df | lower.CL | upper.CL | .group | |
---|---|---|---|---|---|---|---|---|
4 | City | Limestone | 12.33755 | 2.900272 | 21 | 4.440021 | 20.23508 | a |
2 | City | Chalk | 12.68276 | 2.900272 | 21 | 4.785233 | 20.58029 | a |
1 | Slope | Chalk | 17.68601 | 2.685130 | 21 | 10.374321 | 24.99771 | ab |
3 | Slope | Limestone | 26.01445 | 2.900272 | 21 | 18.116921 | 33.91198 | b |
(mod_InvSim %>%
anova() %>%
mutate(`Part Eta Sq`=`Sum Sq`/sum(`Sum Sq`) ) ->
mod_InvSim_ANOVA)
Df | Sum Sq | Mean Sq | F value | Pr(>F) | Part Eta Sq | |
---|---|---|---|---|---|---|
Location | 1 | 507.6609 | 507.66088 | 10.058770 | 0.0045968 | 0.2832972 |
Rock.type | 1 | 107.4251 | 107.42510 | 2.128516 | 0.1593772 | 0.0599479 |
Location:Rock.type | 1 | 117.0279 | 117.02787 | 2.318785 | 0.1427372 | 0.0653067 |
Residuals | 21 | 1059.8590 | 50.46948 | NA | NA | 0.5914481 |
# pwpp(marginal) # Pairwise P-value plot. Fails for unbalanced design
emmip(mod_InvSim, Location ~ Rock.type)
# summary(as.glht(pairs(marginal))) # fails because of unbalanced design
#Berger Parker
(mod_BP <- TestAlphaV3(filter(Richness_Diversity_long, Metric == "Berger Parker")))
## Call:
## aov(formula = as.formula(paste(response_name, paste(factor_names[1],
## factor_names[2], sep = " * "), sep = " ~ ")), data = data2test)
##
## Terms:
## Location Rock.type Location:Rock.type Residuals
## Sum of Squares 0.07180240 0.01029748 0.00019359 0.11203611
## Deg. of Freedom 1 1 1 21
##
## Residual standard error: 0.07304145
## Estimated effects may be unbalanced
## [1] "Unequal group sizes - showing SS type III"
## Anova Table (Type III tests)
##
## Response: Estimate
## Sum Sq Df F value Pr(>F)
## (Intercept) 0.95185 1 178.4141 9.851e-12 ***
## Location 0.07398 1 13.8668 0.001255 **
## Rock.type 0.01018 1 1.9080 0.181711
## Location:Rock.type 0.00019 1 0.0363 0.850753
## Residuals 0.11204 21
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tables of means
## Grand mean
##
## 0.1943002
##
## Location
## Slope City
## 0.143 0.25
## rep 13.000 12.00
##
## Rock.type
## Chalk Limestone
## 0.214 0.173
## rep 13.000 12.000
##
## Location:Rock.type
## Rock.type
## Location Chalk Limestone
## Slope 0.16 0.12
## rep 7.00 6.00
## City 0.27 0.23
## rep 6.00 6.00
## Call:
## aov(formula = as.formula(paste(response_name, paste(factor_names[1],
## factor_names[2], sep = " * "), sep = " ~ ")), data = data2test)
##
## Terms:
## Location Rock.type Location:Rock.type Residuals
## Sum of Squares 0.07180240 0.01029748 0.00019359 0.11203611
## Deg. of Freedom 1 1 1 21
##
## Residual standard error: 0.07304145
## Estimated effects may be unbalanced
# Post-hoc test
marginal <- emmeans(mod_BP,
~ Location : Rock.type)
summary(marginal)
Location | Rock.type | emmean | SE | df | lower.CL | upper.CL |
---|---|---|---|---|---|---|
Slope | Chalk | 0.1640529 | 0.0276071 | 21 | 0.1066408 | 0.2214649 |
City | Chalk | 0.2675148 | 0.0298190 | 21 | 0.2055027 | 0.3295269 |
Slope | Limestone | 0.1180283 | 0.0298190 | 21 | 0.0560162 | 0.1800404 |
City | Limestone | 0.2326462 | 0.0298190 | 21 | 0.1706341 | 0.2946583 |
contrast(marginal,
method = "pairwise",
adjust = "tukey")
## contrast estimate SE df t.ratio p.value
## Slope Chalk - City Chalk -0.1035 0.0406 21 -2.546 0.0814
## Slope Chalk - Slope Limestone 0.0460 0.0406 21 1.133 0.6741
## Slope Chalk - City Limestone -0.0686 0.0406 21 -1.688 0.3545
## City Chalk - Slope Limestone 0.1495 0.0422 21 3.545 0.0096
## City Chalk - City Limestone 0.0349 0.0422 21 0.827 0.8411
## Slope Limestone - City Limestone -0.1146 0.0422 21 -2.718 0.0577
##
## P value adjustment: tukey method for comparing a family of 4 estimates
(BP_pairwise <- cld(marginal,
alpha = 0.05,
Letters = letters,
adjust = "tukey")) # works with lm but not with two-factor ART
Location | Rock.type | emmean | SE | df | lower.CL | upper.CL | .group | |
---|---|---|---|---|---|---|---|---|
3 | Slope | Limestone | 0.1180283 | 0.0298190 | 21 | 0.0368302 | 0.1992265 | a |
1 | Slope | Chalk | 0.1640529 | 0.0276071 | 21 | 0.0888780 | 0.2392278 | ab |
4 | City | Limestone | 0.2326462 | 0.0298190 | 21 | 0.1514480 | 0.3138443 | ab |
2 | City | Chalk | 0.2675148 | 0.0298190 | 21 | 0.1863167 | 0.3487130 | b |
(mod_BP %>%
anova() %>%
mutate(`Part Eta Sq`=`Sum Sq`/sum(`Sum Sq`) ) ->
mod_BP_ANOVA)
Df | Sum Sq | Mean Sq | F value | Pr(>F) | Part Eta Sq | |
---|---|---|---|---|---|---|
Location | 1 | 0.0718024 | 0.0718024 | 13.4586112 | 0.0014309 | 0.3694878 |
Rock.type | 1 | 0.0102975 | 0.0102975 | 1.9301546 | 0.1792983 | 0.0529898 |
Location:Rock.type | 1 | 0.0001936 | 0.0001936 | 0.0362871 | 0.8507532 | 0.0009962 |
Residuals | 21 | 0.1120361 | 0.0053351 | NA | NA | 0.5765263 |
# pwpp(marginal) # Pairwise P-value plot. Fails for unbalanced design
emmip(mod_BP, Location ~ Rock.type)
# summary(as.glht(pairs(marginal))) # fails because of unbalanced design
Richness_Diversity_long %>%
dplyr::filter(!Metric %in% c("Chao1", "ACE")) %>%
mutate_at(., "Metric", ~fct_recode(., "Observed S" = "S obs.", "Inv. Simpson" = "Inv. Simpson", "Berger Parker" = "Berger Parker")) %>%
mutate_at(., "Metric", ~fct_relevel(., "Observed S", "Inv. Simpson", "Shannon", "Berger Parker")) %>%
droplevels() ->
Richness_Diversity_long2plot
p_alpha <- ggplot() +
geom_violin(data = Richness_Diversity_long2plot,
aes(
x = Location,
y = Estimate,
ymin = lerr,
ymax = herr
), colour = "grey",
fill = "grey",
alpha = 1 / 3) +
geom_jitter2(data = Richness_Diversity_long2plot,
aes(x = Location,
y = Estimate,
ymin = lerr,
ymax = herr,
colour = Location), size = 3, width = 0.2, alpha = 2/3) +
scale_colour_manual(values = Gradient.colours[c(5, 6, 11)], name = "") +
# geom_errorbar(alpha = 1 / 2, width = 0.3) +
xlab("") +
ylab("") +
theme(axis.text.x = element_text(
angle = 45,
vjust = 0.9,
hjust = 1
),
legend.position="none") +
facet_grid(Metric ~ Rock.type, scale = "free") +
theme(strip.text = element_text(size = f_size - 4)) +
background_grid(major = "y",
minor = "none",
size.major = 0.8)
dat_text <- data.frame(
label = str_remove_all(c(obsS_pairwise$.group[1:4],
Shannon_pairwise$.group[1:4],
InvSim_pairwise$.group[1:4],
BP_pairwise$.group[1:4]),
pattern = " "),
Metric = fct_inorder(rep(levels(Richness_Diversity_long2plot$Metric), each = 4)),
Rock.type = fct_c(obsS_pairwise$Rock.type[1:4],
Shannon_pairwise$Rock.type[1:4],
InvSim_pairwise$Rock.type[1:4],
BP_pairwise$Rock.type[1:4]),
x = fct_c(obsS_pairwise$Location[1:4],
Shannon_pairwise$Location[1:4],
InvSim_pairwise$Location[1:4],
BP_pairwise$Location[1:4]),
# x = as.factor(levels(Richness_Diversity_long2plot$Climate.Source)),
y = rep(c(520, 45, 5.2, 0.52), each = 4)
# y = rep(c(40, 140, 0.5), each = 6)
)
p_alpha <- p_alpha + geom_text(
data = dat_text,
mapping = aes(x = x, y = y, label = label),
nudge_x = 0,
nudge_y = 0
)
print(p_alpha)
Calculate and plot beta diversity metrics.
(mod1 <- adonis(
otu_table(Ps_obj_filt_GMPR) ~ Location * Rock.type,
data = as(sample_data(Ps_obj_filt_GMPR), "data.frame"),
method = "horn",
permutations = 9999
))
##
## Call:
## adonis(formula = otu_table(Ps_obj_filt_GMPR) ~ Location * Rock.type, data = as(sample_data(Ps_obj_filt_GMPR), "data.frame"), permutations = 9999, method = "horn")
##
## Permutation: free
## Number of permutations: 9999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Location 1 0.63 0.629 2.12 0.081 0.024 *
## Rock.type 1 0.39 0.386 1.30 0.050 0.231
## Location:Rock.type 1 0.49 0.486 1.63 0.063 0.101
## Residuals 21 6.25 0.297 0.806
## Total 24 7.75 1.000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(mod2 <- adonis(
otu_table(Ps_obj_filt_GMPR) ~ Location,
data = as(sample_data(Ps_obj_filt_GMPR), "data.frame"),
method = "horn",
permutations = 9999
))
##
## Call:
## adonis(formula = otu_table(Ps_obj_filt_GMPR) ~ Location, data = as(sample_data(Ps_obj_filt_GMPR), "data.frame"), permutations = 9999, method = "horn")
##
## Permutation: free
## Number of permutations: 9999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Location 1 0.63 0.629 2.03 0.081 0.035 *
## Residuals 23 7.12 0.309 0.919
## Total 24 7.75 1.000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod1_pairwise <- PairwiseAdonis(
otu_table(Ps_obj_filt_GMPR),
sample_data(Ps_obj_filt_GMPR)$Location,
sim.function = "vegdist",
sim.method = "horn",
p.adjust.m = "BH"
)
print(mod1_pairwise)
## pairs total.DF F.Model R2 p.value p.adjusted sig
## 1 City - Slope 24 2.03 0.0812 0.038 0.038 .
(sig_pairs1 <- list(as.character(mod1_pairwise$pairs[mod1_pairwise$p.adjusted < 0.05])))
## [[1]]
## [1] "City - Slope"
mod2_pairwise <- PairwiseAdonis(
otu_table(Ps_obj_filt_GMPR),
sample_data(Ps_obj_filt_GMPR)$Rock.type,
sim.function = "vegdist",
sim.method = "horn",
p.adjust.m = "BH"
)
print(mod2_pairwise)
## pairs total.DF F.Model R2 p.value p.adjusted sig
## 1 Chalk - Limestone 24 1.21 0.0501 0.281 0.281
(sig_pairs2 <- list(as.character(mod2_pairwise$pairs[mod2_pairwise$p.adjusted < 0.05])))
## [[1]]
## character(0)
mod3_pairwise <- PairwiseAdonis(
otu_table(Ps_obj_filt_GMPR),
sample_data(Ps_obj_filt_GMPR)$Location.rock,
sim.function = "vegdist",
sim.method = "horn",
p.adjust.m = "BH"
)
print(mod3_pairwise)
## pairs total.DF F.Model R2 p.value p.adjusted sig
## 1 City:Chalk - City:Limestone 11 1.20 0.1069 0.329 0.395
## 2 City:Chalk - Slope:Limestone 11 1.43 0.1251 0.182 0.273
## 3 City:Chalk - Slope:Chalk 12 2.92 0.2099 0.012 0.072
## 4 City:Limestone - Slope:Limestone 11 1.07 0.0967 0.424 0.424
## 5 City:Limestone - Slope:Chalk 12 1.99 0.1531 0.089 0.178
## 6 Slope:Limestone - Slope:Chalk 12 1.90 0.1471 0.074 0.178
(sig_pairs3 <- list(as.character(mod3_pairwise$pairs[mod3_pairwise$p.adjusted < 0.1])))
## [[1]]
## [1] "City:Chalk - Slope:Chalk"
Unifrac_mat <- UniFrac(Ps_obj_filt,
weighted = TRUE,
normalized = TRUE,
parallel = TRUE,
fast = TRUE)
(mod1 <- adonis(
Unifrac_mat ~ Location * Rock.type,
data = as(sample_data(Ps_obj_filt_GMPR), "data.frame"),
method = "horn",
permutations = 9999
))
##
## Call:
## adonis(formula = Unifrac_mat ~ Location * Rock.type, data = as(sample_data(Ps_obj_filt_GMPR), "data.frame"), permutations = 9999, method = "horn")
##
## Permutation: free
## Number of permutations: 9999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Location 1 0.044 0.0443 1.24 0.049 0.268
## Rock.type 1 0.050 0.0496 1.39 0.055 0.193
## Location:Rock.type 1 0.060 0.0601 1.68 0.066 0.097 .
## Residuals 21 0.751 0.0357 0.830
## Total 24 0.905 1.000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(mod2 <- adonis(
Unifrac_mat ~ Location,
data = as(sample_data(Ps_obj_filt_GMPR), "data.frame"),
method = "horn",
permutations = 9999
))
##
## Call:
## adonis(formula = Unifrac_mat ~ Location, data = as(sample_data(Ps_obj_filt_GMPR), "data.frame"), permutations = 9999, method = "horn")
##
## Permutation: free
## Number of permutations: 9999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Location 1 0.044 0.0443 1.19 0.049 0.28
## Residuals 23 0.860 0.0374 0.951
## Total 24 0.905 1.000
The difference between city and slope is significant based on the Morisita-Horn distances between OTUs but not based on UniFrac.
Ps_obj_ord1 <- ordinate(Ps_obj_filt_GMPR, "CAP", "horn", formula = Ps_obj_filt_GMPR ~ Location * Rock.type)
Ps_obj_ord2 <- ordinate(Ps_obj_filt_GMPR, "CAP", "horn", formula = Ps_obj_filt_GMPR ~ Location)
explained <- eigenvals(Ps_obj_ord2)/sum( eigenvals(Ps_obj_ord2)) * 100
explained <- as.numeric(format(round(explained, 1), nsmall = 1))
Ps_obj_filt_GMPR %>%
plot_ordination(., Ps_obj_ord2, type = "samples", shape = "Rock.type", color = "Location", justDF = TRUE) ->
ord_df
p_ord <- ggplot(ord_df,
aes(
x = CAP1,
y = MDS1,
shape = Rock.type,
color = Location
)) +
stat_ellipse(
aes(x = CAP1,
y = MDS1,
fill = Location
),
geom = "polygon",
alpha = 1/4,
type = "t",
level = 0.9,
# linetype = 2,
inherit.aes = FALSE
) +
geom_point2(size = 4, alpha = 2 / 3) +
guides(colour = guide_legend(title = "Location"), shape = guide_legend(title = "Rock.type")) +
scale_colour_manual(values = Gradient.colours) +
scale_fill_manual(values = Gradient.colours, guide = "none") +
labs(x = sprintf("CAP1 (%s%%)", explained[1]),
y = sprintf("CAP2 (%s%%)", explained[2])) +
coord_fixed(ratio = sqrt(explained[2] / explained[1])) +
theme(legend.justification = "top")
# facet_wrap(. ~ Rock.type)
print(p_ord)
Ps_obj_ord1 <- ordinate(Ps_obj_filt, "PCoA", "Unifrac", formula = Ps_obj_filt ~ Location * Rock.type)
Ps_obj_ord2 <- ordinate(Ps_obj_filt, "PCoA", "Unifrac", formula = Ps_obj_filt ~ Location)
explained <- Ps_obj_ord2$values$Relative_eig/sum(Ps_obj_ord2$values$Relative_eig) * 100
explained <- as.numeric(format(round(explained, 1), nsmall = 1))
Ps_obj_filt %>%
plot_ordination(., Ps_obj_ord2, type = "samples", shape = "Rock.type", color = "Location", justDF = TRUE) ->
ord_df
p_ord_phylo <- ggplot(ord_df,
aes(
x = Axis.1,
y = Axis.2,
shape = Rock.type,
color = Location
)) +
stat_ellipse(
aes(x = Axis.1,
y = Axis.2,
fill = Location
),
geom = "polygon",
alpha = 1/4,
type = "t",
level = 0.9,
# linetype = 2,
inherit.aes = FALSE
) +
geom_point2(size = 4, alpha = 2 / 3) +
# theme_bw(base_size = f_size) +
guides(colour = guide_legend(title = "Location"), shape = guide_legend(title = "Rock.type")) +
scale_colour_manual(values = Gradient.colours) +
scale_fill_manual(values = Gradient.colours, guide = "none") +
labs(x = sprintf("CAP1 (%s%%)", explained[1]),
y = sprintf("CAP2 (%s%%)", explained[2])) +
coord_fixed(ratio = sqrt(explained[2] / explained[1])) #+
# facet_wrap(. ~ Rock.type)
print(p_ord_phylo)
STAMPR analysis of the differences of each phylum between locations using Aligned Rank Transformed ANOVA test and a post-hoc estimated marginal means.
Taxa_tests_phylum1 <- STAMPR2(Ps_obj_filt, vars2test = "Location", threshold = 0.05, outputfile = paste0(Proj_name, "_Location"))
pSTAMPR1 <- plotSTAMPR(Taxa_tests_phylum1, pair = "Slope - City", f_size = f_size)
print(pSTAMPR1)
Taxa_tests_phylum2 <- STAMPR2(Ps_obj_filt, vars2test = c("Location", "Rock.type"), threshold = 0.05, outputfile = paste0(Proj_name, "_Location_Rock"))
pSTAMPR2 <- plotSTAMPR(Taxa_tests_phylum2, pair = "Slope:Chalk - City:Chalk", f_size = f_size)
print(pSTAMPR2)
Agglomerate data and tag rare taxa
Ps_obj_filt_GMPR %>%
transform_sample_counts(., function(x){x / sum(x)} * 100) %>%
tax_glom(.,
"Phylum",
NArm = TRUE) ->
Ps_obj_filt_GMPR_glom
Ps_obj_filt_GMPR_glom_DF <- speedyseq::psmelt(Ps_obj_filt_GMPR_glom)
Ps_obj_filt_GMPR_glom_DF$Phylum %<>% as.character()
# Ps_obj_filt3_glom_DF %<>% mutate(Species = fct_relevel(Species, "NA", after = Inf))
# group dataframe by Phylum, calculate sum rel. abundance
Ps_obj_filt_GMPR_glom_DF %>%
group_by(Phylum) %>%
summarise(Sum = sum(Abundance) / nsamples(Ps_obj_filt_GMPR_glom) ) ->
Sums
# find Phyla whose rel. abund. is less than 5%
Rare_phyla0.05 <- Sums[Sums$Sum <= 0.05, ]$Phylum
# change their name to "Rare"
Ps_obj_filt_GMPR_glom_DF[Ps_obj_filt_GMPR_glom_DF$Phylum %in% Rare_phyla0.05, ]$Phylum <- 'Rare'
# re-group
Ps_obj_filt_GMPR_glom_DF %>%
group_by(Sample, Phylum, Location, Rock.type, Location.rock) %>%
summarise(Abundance = sum(Abundance)) ->
Ps_obj_filt_GMPR_glom_DF_2plot
# ab.taxonomy$Freq <- sqrt(ab.taxonomy$Freq)
# Ps_obj_filt3_glom_rel_DF$Phylum %<>% sub("unclassified", "Unclassified", .)
# Ps_obj_filt3_glom_rel_DF$Phylum %<>% sub("uncultured", "Unclassified", .)
Ps_obj_filt_GMPR_glom_DF_2plot %>%
group_by(Sample) %>%
filter(Phylum == "Rare") %>%
summarise(`Rares (%)` = sum(Abundance)) ->
Rares
Summarise taxonomy
# Percentage of reads classified as rare
Rares %>%
kable(., digits = c(2), caption = "Percentage of reads per sample classified as rare:") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F)
Sample | Rares (%) |
---|---|
ShivSClk1S12 | 0.01 |
ShivSClk2S15 | 0.59 |
ShivSClk3S16 | 0.02 |
ShivSClk4S17 | 0.01 |
ShivSClk5S18 | 0.00 |
ShivSLimek1S13 | 0.02 |
ShivSLimek2S14 | 0.04 |
ShivSLimek3S19 | 0.11 |
ShivSLimek4S20 | 0.24 |
ShivSLimek5S21 | 0.01 |
ShivSLimek6S25 | 0.01 |
SlpClk1S3 | 0.24 |
SlpClk2S4 | 0.03 |
SlpClk3S5 | 0.03 |
SlpClk4S6 | 0.01 |
SlpClk5S7 | 0.01 |
SlpClk6S8 | 0.03 |
SlpClk7S9 | 0.01 |
SlpClk8S10 | 0.02 |
SlpClk9S11 | 0.03 |
SlpLime1S1 | 0.01 |
SlpLime2S2 | 0.02 |
SlpLime3S22 | 0.07 |
SlpLime4S23 | 0.01 |
SlpLime5S24 | 0.02 |
sample_order <- match(Rares$Sample, row.names(sample_data(Ps_obj_filt_GMPR_glom)))
Rares %<>% arrange(., sample_order)
Rares %>%
cbind(., sample_data(Ps_obj_filt_GMPR_glom)) %>%
group_by(Location.rock) %>%
setNames(make.names(names(.), unique = TRUE)) %>% # fails for some reason without it
summarise(`Rares (%)` = mean(`Rares....`)) ->
Rares_merged
# Percentage of reads classified as rare
Rares_merged %>%
kable(., digits = c(2), caption = "Percentage of reads per sample classified as rare:") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F)
Location.rock | Rares (%) |
---|---|
City:Chalk | 0.11 |
City:Limestone | 0.07 |
Slope:Chalk | 0.02 |
Slope:Limestone | 0.06 |
Plot taxonomy box-plot
Ps_obj_filt_GMPR_glom_DF_2plot %>%
group_by(Phylum) %>%
summarise(sum.Taxa = sum(Abundance)) %>%
arrange(desc(sum.Taxa)) -> Taxa_rank
Ps_obj_filt_GMPR_glom_DF_2plot$Phylum %<>%
factor(., levels = Taxa_rank$Phylum) %>%
fct_relevel(., "Rare", after = Inf)
p_taxa_box <-
ggplot(Ps_obj_filt_GMPR_glom_DF_2plot, aes(x = Phylum, y = (Abundance))) +
geom_boxplot(aes(group = interaction(Phylum, Location)), position = position_dodge(width = 0.9), fatten = 1) +
geom_point2(
aes(colour = Rock.type),
position = position_jitterdodge(dodge.width = 1),
alpha = 1 / 2,
stroke = 0,
size = 2
) +
scale_colour_manual(values = Gradient.colours, name = "") +
# theme_bw()+
labs(x = NULL, y = "Relative abundance (%)") +
guides(colour = guide_legend(override.aes = list(size = 5))) +
facet_grid(Location ~ .) +
background_grid(major = "xy",
minor = "none") +
theme(axis.text.x = element_text(
angle = 45,
vjust = 0.9,
hjust = 0.9
),
legend.position = c(.99, .99),
legend.justification = c("right", "top"),
legend.box.just = "top",
legend.margin = margin(0, 3, 3, 3))
print(p_taxa_box)
Tag rare phyla (for plotting purposes only)
Ps_obj_filt_GMPR_glom <- tax_glom(Ps_obj_filt_GMPR,
"Phylum",
NArm = TRUE) # glomerate to the phylum level
Ps_obj_filt_GMPR_glom_rel <- transform_sample_counts(Ps_obj_filt_GMPR_glom, function(x) x / sum(x)) # transform to rel. ab.
Ps_obj_filt_GMPR_glom_rel_DF <- speedyseq::psmelt(Ps_obj_filt_GMPR_glom_rel) # generate a df
Ps_obj_filt_GMPR_glom_rel_DF$Phylum %<>% as.character() # factor to char
# group dataframe by Phylum, calculate sum rel. abundance
Ps_obj_filt_GMPR_glom_rel_DF %>%
group_by(Phylum) %>%
summarise(Sum = sum(Abundance) / nsamples(Ps_obj_filt_GMPR_glom) ) ->
Sums
# find Phyla whose mean rel. abund. is less than 0.5%
Rare_phyla0.05 <- Sums[Sums$Sum <= 0.05, ]$Phylum
# change their name to "Rare"
Ps_obj_filt_GMPR_glom_rel_DF[Ps_obj_filt_GMPR_glom_rel_DF$Phylum %in% Rare_phyla0.05, "Phylum"] <- 'Rare'
# re-group
Ps_obj_filt_GMPR_glom_rel_DF %>%
group_by(Phylum) %>%
summarise(Abundance = sum(Abundance)) %>%
arrange(desc(Abundance)) -> Taxa_rank
Detect differentially abundant OTUs using corncob (Martin, Witten, and Willis 2020)
comparison_string <- c("City", "Slope")
Ps_obj_filt %>%
subset_samples(Location %in% c(comparison_string[1], comparison_string[2])) %>%
tax_glom("Order") ->
Ps_obj_filt_pairwise_glom
# Test differential abundance for location
da_Loc <- differentialTest(formula = ~ Location,
phi.formula = ~ Location,
formula_null = ~ 1,
phi.formula_null = ~ Location,
test = "Wald", boot = FALSE,
data = Ps_obj_filt,
fdr_cutoff = 0.05,
full_output = TRUE)
da_Loc_intervals <- plot(da_Loc, level = "Class", data_only = T)
which(is.na(da_Loc$p)) %>% names
## [1] "OTU398" "OTU164" "OTU476" "OTU641" "OTU239" "OTU481" "OTU727" "OTU245"
Ps_obj_filt %>%
transform_sample_counts(., function(x) x / sum(x) * 100) %>%
taxa_sums(.) %>%
map_dbl(~(.x / nsamples(Ps_obj_filt))) %>%
enframe(name = "OTU", value = "Mean abundance (%)") ->
baseMean
map(da_Loc$all_models,15) %>%
map(.,2) %>%
unlist %>% # grab all mu.LocationSlope Estimates (differences in estimated population relative abundance)
bind_cols(OTU = taxa_names(Ps_obj_filt),
tax_table(Ps_obj_filt),
`Differential abundance` = .,
Significance = fct_recode(as_factor(taxa_names(Ps_obj_filt) %in% da_Loc$significant_taxa), Pass = "TRUE", Fail = "FALSE"),
ymin = as.numeric(NA),
ymax = as.numeric(NA)
) %>%
left_join(., baseMean, by = "OTU") ->
da_Loc_df
da_Loc_df %<>% rows_update(., tibble(ymin = da_Loc_intervals$xmin, OTU = da_Loc$significant_taxa), by = "OTU")
da_Loc_df %<>% rows_update(., tibble(ymax = da_Loc_intervals$xmax, OTU = da_Loc$significant_taxa), by = "OTU")
da_Loc_df[da_Loc_df$Phylum %in% Rare_phyla0.05, "Phylum"] <- 'Rare' # rare_phyla is
p_corncob_loc <- GGPlotCorncob(da_Loc_df, OTU_labels = FALSE, Taxa = "Phylum", Y_val = "Differential abundance", sig_level = 0.05, Rank = Taxa_rank)
corncob_summary <- tibble(Label = c(paste0("⬆", sum(da_Loc_df$`Differential abundance` > 0 & da_Loc_df$Significance == "Pass"), " ⬇", sum(da_Loc_df$`Differential abundance` < 0 & da_Loc_df$Significance == "Pass"), " (", nrow(da_Loc_df), ")")))
p_corncob_loc <- p_corncob_loc +
labs(title = paste(comparison_string, collapse = " - ")) +
coord_cartesian(ylim = c(-10, 10))
print(p_corncob_loc)
write.csv(da_Loc_df, file = paste0("./Results/corncob_", comparison_string[1], "_vs_", comparison_string[2], ".csv"))
Modelling differential abundance and variance between locations
discovered length(da_Loc$significant_taxa)
comparison_string <- c("Limestone", "Chalk")
# Test differential abundance and variance for rock type
da_Rock <- differentialTest(formula = ~ Rock.type,
phi.formula = ~ Rock.type,
formula_null = ~ 1,
phi.formula_null = ~ Rock.type,
test = "Wald", boot = FALSE,
data = Ps_obj_filt,
fdr_cutoff = 0.05,
full_output = TRUE)
da_Rock_intervals <- plot(da_Rock, level = "Class", data_only = TRUE)
which(is.na(da_Rock$p)) %>% names
## [1] "OTU780" "OTU164" "OTU580" "OTU679" "OTU696" "OTU712" "OTU519" "OTU503" "OTU542"
## [10] "OTU245" "OTU229"
map(da_Rock$all_models,15) %>%
map(.,2) %>%
unlist %>% # grab all mu.LocationSlope Estimates (differences in estimated population relative abundance)
bind_cols(OTU = taxa_names(Ps_obj_filt),
tax_table(Ps_obj_filt),
`Differential abundance` = .,
Significance = fct_recode(as_factor(taxa_names(Ps_obj_filt) %in% da_Rock$significant_taxa), Pass = "TRUE", Fail = "FALSE"),
ymin = as.numeric(NA),
ymax = as.numeric(NA)
) %>%
left_join(., baseMean, by = "OTU") ->
da_Rock_df
da_Rock_df %<>% rows_update(., tibble(ymin = da_Rock_intervals$xmin, OTU = da_Rock$significant_taxa), by = "OTU")
da_Rock_df %<>% rows_update(., tibble(ymax = da_Rock_intervals$xmax, OTU = da_Rock$significant_taxa), by = "OTU")
da_Rock_df[da_Rock_df$Phylum %in% Rare_phyla0.05, "Phylum"] <- 'Rare' # rare_phyla is
p_corncob_rock <- GGPlotCorncob(da_Rock_df, OTU_labels = FALSE, Taxa = "Phylum", Y_val = "Differential abundance", sig_level = 0.05, Rank = Taxa_rank)
corncob_summary <- tibble(Label = c(paste0("⬆", sum(da_Rock_df$`Differential abundance` > 0 & da_Rock_df$Significance == "Pass"), " ⬇", sum(da_Rock_df$`Differential abundance` < 0 & da_Rock_df$Significance == "Pass"), " (", nrow(da_Rock_df), ")")))
p_corncob_rock <- p_corncob_rock +
labs(title = paste(comparison_string, collapse = " - ")) +
coord_cartesian(ylim = c(-10, 10))
print(p_corncob_rock)
write.csv(da_Rock_df, file = paste0("./Results/corncob_", comparison_string[1], "_vs_", comparison_string[2], ".csv"))
Modelling differential abundance and variance between rock types
discovered length(da_Rock$significant_taxa)
# Test differential abundance for location, control for Rock.type for both cases
comparison_string <- c("City", "Slope")
da_Loc_exRock <- differentialTest(formula = ~ Location + Rock.type,
phi.formula = ~ Location + Rock.type,
formula_null = ~ Rock.type,
phi.formula_null = ~ Location + Rock.type,
test = "Wald", boot = FALSE,
data = Ps_obj_filt,
fdr_cutoff = 0.05,
full_output = TRUE)
da_Loc_exRock_intervals <- plot(da_Loc_exRock, level = "Class", data_only = TRUE)
which(is.na(da_Loc_exRock$p)) %>% names
## [1] "OTU780" "OTU398" "OTU164" "OTU476" "OTU580" "OTU679" "OTU696" "OTU712" "OTU641"
## [10] "OTU519" "OTU239" "OTU481" "OTU503" "OTU542" "OTU727" "OTU245" "OTU229"
map(da_Loc_exRock$all_models, 15) %>%
map(., 2) %>%
unlist %>% # grab all mu.LocationSlope Estimates (differences in estimated population relative abundance)
bind_cols(OTU = taxa_names(Ps_obj_filt),
tax_table(Ps_obj_filt),
`Differential abundance` = .,
Significance = fct_recode(as_factor(taxa_names(Ps_obj_filt) %in% da_Loc_exRock$significant_taxa), Pass = "TRUE", Fail = "FALSE"),
ymin = as.numeric(NA),
ymax = as.numeric(NA)
) %>%
left_join(., baseMean, by = "OTU") ->
da_Loc_exRock_df
da_Loc_exRock_df %<>% rows_update(., tibble(ymin = da_Loc_exRock_intervals$xmin, OTU = da_Loc_exRock$significant_taxa), by = "OTU")
da_Loc_exRock_df %<>% rows_update(., tibble(ymax = da_Loc_exRock_intervals$xmax, OTU = da_Loc_exRock$significant_taxa), by = "OTU")
da_Loc_exRock_df[da_Loc_exRock_df$Phylum %in% Rare_phyla0.05, "Phylum"] <- 'Rare' # rare_phyla is
p_corncob_locExroc <- GGPlotCorncob(da_Loc_exRock_df, OTU_labels = FALSE, Taxa = "Phylum", Y_val = "Differential abundance", sig_level = 0.05, Rank = Taxa_rank)
corncob_summary <- tibble(Label = c(paste0("⬆", sum(da_Loc_exRock_df$`Differential abundance` > 0 & da_Loc_exRock_df$Significance == "Pass"), " ⬇", sum(da_Loc_exRock_df$`Differential abundance` < 0 & da_Loc_exRock_df$Significance == "Pass"), " (", nrow(da_Loc_exRock_df), ")")))
p_corncob_locExroc <- p_corncob_locExroc +
labs(title = paste(comparison_string, collapse = " - ")) +
coord_cartesian(ylim = c(-10, 10))
print(p_corncob_locExroc)
write.csv(da_Loc_exRock_df, file = paste0("./Results/corncob_", comparison_string[1], "_vs_", comparison_string[2], "_ExRockType.csv"))
Modelling differential abundance between locations, while controlling
for rock type discovered length(da_Loc_exRock$significant_taxa)
mod260 <- bbdml(formula = OTU260 ~ 1,
phi.formula = ~ 1,
data = Ps_obj_filt)
mod260_Loc <- bbdml(formula = OTU260 ~ Location,
phi.formula = ~ Location,
data = Ps_obj_filt)
mod260_Loc_rock <- bbdml(formula = OTU97 ~ Location*Rock.type,
phi.formula = ~ Location*Rock.type,
data = Ps_obj_filt)
lrtest(mod_null = mod260, mod = mod260_Loc)
## [1] 0.00401
# lrtest(mod_null = mod260_Loc, mod = mod260_Loc_rock)
summary(mod260_Loc)
##
## Call:
## bbdml(formula = OTU260 ~ Location, phi.formula = ~Location, data = Ps_obj_filt)
##
##
## Coefficients associated with abundance:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.544 0.241 -35.51 <2e-16 ***
## Location1 0.658 0.241 2.73 0.012 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## Coefficients associated with dispersion:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.486 0.406 -20.9 1.5e-15 ***
## Location1 1.379 0.406 3.4 0.0027 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## Log-likelihood: -80.86
plot(mod260_Loc, color = "Location", shape = "Rock.type") # add total = TRUE for total counts (i.e. not relative abundance)
# composite_plot <- ((p_alpha + p_taxa_box + plot_layout(widths = c(1, 2))) /(p_ord + pSTAMPR1) + plot_annotation(tag_levels = 'A') & theme(plot.tag = element_text(size = f_size)))
composite_plot <- (p_alpha + p_ord) /(p_taxa_box) / (pSTAMPR1) +
plot_layout(heights = c(1.5, 1, 1)) +
plot_annotation(tag_levels = 'A') & theme(plot.tag = element_text(size = f_size))
plot_file <- "./Results/Microbiome_1"
svglite(paste0(plot_file, ".svg"),
width = 10,
height = 11)
print(composite_plot)
invisible(dev.off())
agg_png(paste0(plot_file, ".png"),
width = 10,
height = 11,
units = 'cm',
res = 900,
scaling = 0.38)
print(composite_plot)
invisible(invisible(dev.off()))
gz(paste0(plot_file, ".svg"), paste0(plot_file, ".svgz"))
knitr::include_graphics(paste0(plot_file, ".png"))
plot_file <- "./Results/Microbiome_2"
svglite(paste0(plot_file, ".svg"),
width = 12,
height = 10)
print(p_corncob_locExroc)
invisible(dev.off())
agg_png(paste0(plot_file, ".png"),
width = 12,
height = 10.5,
units = 'cm',
res = 900,
scaling = 0.38)
print(p_corncob_locExroc)
invisible(invisible(dev.off()))
gz(paste0(plot_file, ".svg"), paste0(plot_file, ".svgz"))
Chen, Jun, and Li Chen. 2017. “GMPR: A novel normalization method for microbiome sequencing data.” bioRxiv, February, 112565. https://doi.org/10.1101/112565.
Martin, Bryan D., Daniela Witten, and Amy D. Willis. 2020. “Modeling Microbial Abundances and Dysbiosis with Beta-Binomial Regression.” Annals of Applied Statistics 14 (1): 94–115. https://doi.org/10.1214/19-AOAS1283.