Download the fastq files, stored here in three zip archives: https://drive.google.com/drive/folders/0B5wF0CrINC0YNmZ5NEhWSFJWcHc
Additionally check the annotation files from Yaohui Bai on an email on July 19 and August 8 2017, and Francis Burdon on Aug 10 2017.
cp -R MJ12214056483-$'\342'$'\210'$'\236'$'\303'$'\277'$'\342'$'\200'$'\234'$'\302'$'\264'$'\302'$'\252'$'\342'$'\200'$'\230'-$'\342'$'\210'$'\202'$'\342'$'\200'$'\241'$'\342'$'\200'$'\224'$'\313'$'\230'$'\342'$'\200'$'\223'$'\342'$'\200'$'\230'-$'\302'$'\240'$'\313'$'\235'$'\303'$'\246'$'\342'$'\200'$'\272'-20140625/515F_907R/valid .
awk '{print $1}' valid/valid.MJ12214056483.515F_907R.fq | \
awk -F'_' '{if ((NR - 1) % 4 == 0) {print $0 ";sample=S" substr($1, 2)} else {print $0}}' > \
valid_copy.fq
usearch -fastq_filter valid_copy.fq -fastq_maxee 1.0 -relabel Filt -fastaout filtered.fa
usearch -fastx_uniques filtered.fa -relabel Uniq -sizeout -fastaout uniques.fa
usearch -cluster_otus uniques.fa -minsize 2 -otus otus.fa -relabel Otu
usearch -usearch_global valid_copy.fq -db otus.fa -strand plus -id 0.97 -otutabout otutab.txt -biomout otutab.json
wget https://www.drive5.com/sintax/rdp_16s_v16.fa.gz
usearch -makeudb_usearch rdp_16s_v16.fa -output rdp_16s.udb
usearch -sintax otus.fa -db rdp_16s.udb -tabbedout reads.sintax -strand both -sintax_cutoff 0.8
wget http://greengenes.lbl.gov/Download/Sequence_Data/Fasta_data_files/core_set_aligned.fasta
pynast -i otus.fa -t core_set_aligned.fasta
Sequence counts:
valid_copy.fq:412446 filtered.fa:334028 uniques.fa:92993 otus.fa:1012
Use the following outgroup:
>JQ837894.1.1415 Archaea;Euryarchaeota;Methanomicrobia;Methanomicrobiales;Methanocorpusculaceae;Methanocalculus;Methanocalculus sp. AMF-B2M
CTCCGGAGGCTATTGCTATCAGGGTTTGACTAAGCCATGCGAGTCGAGAGGTGTAAGACCTCGGCATACTGCTCAGTAAC
ACGTGGATAATCTGCCCTCAGGTGAGGAATAATCCCGGGAAACTGGGGCTAATGCCTCATAGGAGACGGGTGCTGGAATG
CTCTGTCTCCCAAAGGTCCGCCGCCTGAGGATGAGTCTGCGTCCGATTAGGTTGTTGTTGGGGTAACGGCCCAACAAGCC
ATTGATCGGTACGGGTTGTGGGAGCAAGAGCCCGGAGATGGATTCTGAGACATGAATCCAGGCCCTACGGGGCGCAGCAG
GCGCGAAAACTTTACAATGCGAGCAATCGTGATAAGGAAACCCTGAGTGCCTGTCAATGCAGGCTGTTCTGGTGTCTAAC
ACGCACCAGGAGAAAGGGCGGGGCAAGACCGGTGCCAGCCGCCGCGGTAATACCGGCTGCTCGAGTGATAGCCGCTTTTA
CTGGGCTTAAAGCGTTCGTAGCTTGGTTGTCAAGTCTCTGGGGAAATCTTCTGGCTTAACCAGAAGGCGTCTCAGGGAAA
CTGGCGACCTAGGAACCGGGAGAGGTGAGACGTACTTCGGGGGTAGGAGTGAAATCTTGTAATCCCCGAGGGACGACCGA
TGGCGAAGGCATCTCACCAGAACGGCTTCGACAGTGAGGGACGAAAGCTGGGGGAGCAAACCGGATTAGATACCCGGGTA
GTCCCAGCCGTAAACGATGTGCGTTAGGTGTGTCGGTGACCACGAGTCGCCGAGGTGCCGAAGGGAAACCGTGAAACGCA
CCGCCTGGGAAGTACGGTCGCAAGGCTGAAACTTAAAGGAATTGGCGGGGGAGCACCACAACGGGTGGAGCCTGCGGTTT
AATTGGATTCAACGCCGGACAACTCACCGGATACGACAGCGGAATGATAGCCGGGCTGAAGACTCTGCTTGACCAGCTGA
GAGGAGGTGCATGGCCGTCGTCAGTTCGTACTGTGAAGCATCCTGTTAAGTCAGGCAACGAGCGAGACCCACGCCAACAG
TTGCCAGCATGGTCTCCGGACTGATGGGGACACTGTTGGGACCGCCTCTGCTAAAGGGGAGGAAGGAATGGGCAACGGTA
GGTCAGCATGCCCCGAATTATCCGGGCTACACGCGGGCTACAATGGATGGGACAATGGGTTTCGACACCGAAAGGTGAAG
GTAATCTCCTAACCCCACCCGTAGTTCGGATTGCGGGCTGCAACTCGCCCGCATGAAGCTGGAATCCGTAGTAATCGCGT
CTCACGATGGCGCGGTGAATATGTCCCTGCTCCTTGCACACACCGCCCGTCAAACCACCCGAGTGGGGTCTGGATGAGGC
GGCAGTTTATGCTGCTGTCGAATCTAGGTTCCGCAAGGGGGGTTAAGTCGTAACA
EOF
cat <<'EOF' > outgroup.fasta
<<outgroup>>
cat outgroup.fasta otus.fa > otus_outgroup.fa
conda create -n sina sina
conda activate sina
sina -i otus_outgroup.fa --intype fasta -o otus_align.fa --outtype fasta --db SILVA_132_SSURef_NR99_13_12_17_opt.arb
conda deactivate
fasttree -nt otus_align.fa > otus.tre
Process Yahoui’s raw ITS data files
ls raw/*.fq | while read file; do trunc=$(echo $file | awk -F'[./]' '{print $2}'); awk -F'#' '{print $1}' $file > "trunc/"$trunc".fq"; done
ls trunc/*.fq | grep 'R1' | while read file; do out=$(echo $file | awk -F'[/_]' '{print $2}'); usearch -fastq_mergepairs $file -fastqout "merged/"$out"_merged.fq" -relabel $out"." ; done
cat merged/*.fq | awk '(NR - 1) % 4 == 0 {gsub("^@", "@S"); print} (NR - 1) % 4 != 0 {print}' > seqs.fq
usearch -fastq_filter seqs.fq -fastq_maxee 1.0 -relabel Filt -fastaout filtered.fa
usearch -fastx_uniques filtered.fa -relabel Uniq -sizeout -fastaout uniques.fa
usearch -cluster_otus uniques.fa -minsize 2 -otus otus.fa -relabel Otu
usearch -usearch_global seqs.fq -db otus.fa -strand plus -id 0.97 -otutabout otutab.txt -biomout otutab.json
wget https://files.plutof.ut.ee/public/orig/EB/0C/EB0CCB3A871B77EA75E472D13926271076904A588D2E1C1EA5AFCF7397D48378.zip
unzip EB0CCB3A871B77EA75E472D13926271076904A588D2E1C1EA5AFCF7397D48378.zip
cat uchime_reference_dataset_28.06.2017/untrimmed_sequences/uchime_reference_dataset_untrimmed_28.06.2017.fasta | \
awk -F'|' '/>/ {print ">"$2"|"$5} !/>/ {print $0}' | \
awk '{gsub(";", ","); gsub("\\|", ";tax="); gsub("__", ":"); print}' > \
uchime_its.fa
usearch -makeudb_usearch uchime_its.fa -output its.udb
usearch -sintax otus.fa -db its.udb -tabbedout reads.sintax -strand both -sintax_cutoff 0.8
Sequence counts:
seqs.fq:451000 filtered.fa:448447 uniques.fa:73328 otus.fa:2201
library(tidyverse)
library(vegan)
library(readxl)
library(broom)
library(phyloseq)
library(glue)
library(DESeq2)
library(viridis)
library(ggrepel)
site_labels <-
read_excel("Correct_site_labelling.xlsx") %>%
unite(ID, Site_code, Location)
csa <-
read_csv("ECOIMPACT_csa_data_2013_ngs_means_190308.csv") %>%
select(-X1) %>%
unite(ID, Code_01, Location, sep="_") %>%
left_join(site_labels, by="ID") %>%
filter(complete.cases(.)) %>%
filter(Rep == 1) %>%
select(-Rep) %>%
separate(ID, c("Smp", "USDS"), sep="_", remove = FALSE) %>%
mutate(
USDS = case_when(
USDS == "D" ~ "Downstream",
TRUE ~ "Upstream")) %>%
data.frame(row.names = "ID") %>%
sample_data
ribo_tax_tbl <-
read_tsv("bact_reads.sintax", col_names=FALSE) %>%
select(X1, X4) %>%
mutate(X4 = str_replace_all(X4, "[a-z]:", ""),
X4 = str_replace_all(X4, '"', ""),
X4 = str_replace_all(X4, '_Gp6', "")) %>%
separate(X4, c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus"), sep = ",") %>%
dplyr::rename("OTU" = "X1") %>%
pivot_longer(
cols = -OTU,
names_to = "Level",
values_to = "Value") %>%
mutate(
Value = case_when(
is.na(Value) ~ "Unidentified",
Value == "unidentified" ~ "Unidentified",
TRUE ~ Value)) %>%
pivot_wider(
id_cols = "OTU",
names_from = "Level",
values_from = "Value") %>%
filter(Phylum != "Cyanobacteria/Chloroplast") %>%
data.frame(row.names = "OTU") %>%
as.matrix %>%
tax_table
chloroplast_otus <-
ribo_tax_tbl <-
read_tsv("bact_reads.sintax", col_names=FALSE) %>%
select(X1, X4) %>%
mutate(X4 = str_replace_all(X4, "[a-z]:", ""),
X4 = str_replace_all(X4, '"', ""),
X4 = str_replace_all(X4, '_Gp6', "")) %>%
separate(X4, c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus"), sep = ",") %>%
dplyr::rename("OTU" = "X1") %>%
pivot_longer(
cols = -OTU,
names_to = "Level",
values_to = "Value") %>%
mutate(
Value = case_when(
is.na(Value) ~ "Unidentified",
Value == "unidentified" ~ "Unidentified",
TRUE ~ Value)) %>%
pivot_wider(
id_cols = "OTU",
names_from = "Level",
values_from = "Value") %>%
filter(Phylum == "Cyanobacteria/Chloroplast") %>%
.$OTU
ribo_otu_tbl <-
read_tsv("bact_otutab.txt") %>%
dplyr::rename("OTU" = "#OTU ID") %>%
pivot_longer(
cols = starts_with("S"),
names_to = "ID_sample",
names_prefix = "S",
values_to = "Count") %>%
mutate(ID_sample = as.numeric(ID_sample)) %>%
left_join(
site_labels,
by = "ID_sample") %>%
filter(Rep == 1) %>%
pivot_wider(
id_cols = "OTU",
names_from = "ID",
values_from = "Count") %>%
filter(!(OTU %in% chloroplast_otus)) %>%
data.frame(row.names = "OTU") %>%
otu_table(taxa_are_rows = TRUE)
read_tsv("bact_otutab.txt") %>%
dplyr::rename("OTU" = "#OTU ID") %>%
filter(!(OTU %in% chloroplast_otus)) %>%
write_tsv("bact_otutab_wo_chloroplasts.txt")
read_tsv("bact_otutab.txt") %>%
dplyr::rename("OTU" = "#OTU ID") %>%
pivot_longer(
cols = starts_with("S"),
names_to = "ID_sample",
names_prefix = "S",
values_to = "Count") %>%
mutate(ID_sample = as.numeric(ID_sample)) %>%
left_join(
site_labels,
by = "ID_sample") %>%
filter(Rep == 1,
!(OTU %in% chloroplast_otus)) %>%
separate(Yaohui_labelling, into = c("YA", "USDS"), sep = "_") %>%
unite(Name_USDS, Site_name, USDS, sep = "_") %>%
group_by(Name_USDS) %>%
summarise(Tot = sum(Count)) %>%
write_csv("seq_depth_bacteria.csv")
TRE <- function(tree_file, outgroup)
{
read_tree(tree_file) %>%
ape::root(outgroup, resolve.root=TRUE) %>%
phy_tree
}
trim_otu_table <- function(GP)
{
wh0 <- genefilter_sample(GP, filterfun_sample(function(x) x > 5), A=0.5*nsamples(GP))
GP1 <- prune_taxa(wh0, GP)
transform_sample_counts(GP1, function(x) 1E6 * x/sum(x))
}
GP_16 <-
phyloseq(
ribo_otu_tbl,
ribo_tax_tbl,
TRE("otus.tre", "JQ837894.1.1415"),
csa) %>%
trim_otu_table %>%
transform_sample_counts(function(OTU) OTU/sum(OTU))
pdf("bacteria.pdf", useDingbats = FALSE)
psmelt(GP_16) %>%
ggplot(aes(x=Site_name, y=Abundance, fill=Class)) +
geom_bar(stat="identity", position="stack") +
facet_grid(USDS~.) +
scale_fill_viridis(discrete = TRUE, option = "D") +
theme(axis.text.x = element_text(angle = -90, hjust = 0)) +
theme(panel.grid.major.y = element_blank(), panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank()) + ##strip out ggplot2 defaults
theme(axis.text.x = element_text(size=8,colour='grey20'),
axis.text.y = element_text(size=12,colour='grey20')) +
theme(panel.background = element_rect(fill="white", colour='black')) +
ylab("Relative abundance") +
xlab("Site") +
labs(fill = "Genera") +
theme(axis.title.x = element_text(size=16),
axis.title.y = element_text(size=16)) +
theme(legend.key.size = unit(0.7, "lines"),## Makes legend keys smaller
strip.background = element_rect(color="black", size=.5, linetype="solid"),
strip.text.x = element_text(
size = 16, color = "black"),
strip.text.y = element_text(
size = 16, color = "black"))
dev.off()
its_tax_tbl <-
read_tsv("its_reads.sintax", col_names=FALSE) %>%
select(X1, X4) %>%
mutate(X4 = str_replace_all(X4, "[a-z]:", ""),
X4 = str_replace_all(X4, '"', ""),
X4 = str_replace_all(X4, '_Gp6', "")) %>%
separate(X4, c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus"), sep = ",") %>%
dplyr::rename("OTU" = "X1") %>%
pivot_longer(
cols = -OTU,
names_to = "Level",
values_to = "Value") %>%
mutate(
Value = case_when(
is.na(Value) ~ "Unidentified",
Value == "unidentified" ~ "Unidentified",
TRUE ~ Value)) %>%
pivot_wider(
id_cols = "OTU",
names_from = "Level",
values_from = "Value") %>%
data.frame(row.names = "OTU") %>%
as.matrix %>%
tax_table
its_otu_tbl <-
read_tsv("its_otutab.txt") %>%
dplyr::rename("OTU" = "#OTU ID") %>%
pivot_longer(
cols = starts_with("S"),
names_to = "ID_sample",
names_prefix = "S",
values_to = "Count") %>%
mutate(ID_sample = as.numeric(ID_sample)) %>%
left_join(
site_labels,
by = "ID_sample") %>%
filter(Rep == 1) %>%
pivot_wider(
id_cols = "OTU",
names_from = "ID",
values_from = "Count") %>%
data.frame(row.names = "OTU") %>%
otu_table(taxa_are_rows = TRUE)
read_tsv("its_otutab.txt") %>%
dplyr::rename("OTU" = "#OTU ID") %>%
pivot_longer(
cols = starts_with("S"),
names_to = "ID_sample",
names_prefix = "S",
values_to = "Count") %>%
mutate(ID_sample = as.numeric(ID_sample)) %>%
left_join(
site_labels,
by = "ID_sample") %>%
filter(Rep == 1) %>%
separate(Yaohui_labelling, into = c("YA", "USDS"), sep = "_") %>%
unite(Name_USDS, Site_name, USDS, sep = "_") %>%
group_by(Name_USDS) %>%
summarise(Tot = sum(Count)) %>%
write_csv("seq_depth_fungi.csv")
GP_ITS <-
phyloseq(
its_otu_tbl,
its_tax_tbl,
csa) %>%
trim_otu_table %>%
transform_sample_counts(function(OTU) OTU/sum(OTU))
pdf("fungi.pdf", useDingbats = FALSE)
psmelt(GP_ITS) %>%
ggplot(aes(x=Site_name, y=Abundance, fill=Class)) +
geom_bar(stat="identity", position="stack") +
facet_grid(USDS~.) +
scale_fill_viridis(discrete = TRUE, option = "D") +
theme(axis.text.x = element_text(angle = -90, hjust = 0)) +
theme(panel.grid.major.y = element_blank(), panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank()) + ##strip out ggplot2 defaults
theme(axis.text.x = element_text(size=8,colour='grey20'),
axis.text.y = element_text(size=12,colour='grey20')) +
theme(panel.background = element_rect(fill="white", colour='black')) +
ylab("Relative abundance") +
xlab("Site") +
labs(fill = "Genera") +
theme(axis.title.x = element_text(size=16),
axis.title.y = element_text(size=16)) +
theme(legend.key.size = unit(0.7, "lines"),## Makes legend keys smaller
strip.background = element_rect(color="black", size=.5, linetype="solid"),
strip.text.x = element_text(
size = 16, color = "black"),
strip.text.y = element_text(
size = 16, color = "black"))
dev.off()
smp_matrix <-
phyloseq(
its_otu_tbl,
its_tax_tbl,
csa) %>%
psmelt %>%
select(c(2, 5:18)) %>%
unique
its_tax_table <-
its_tax_tbl %>%
as.data.frame %>%
as_tibble(rownames = "OTU")
its_otu_data <-
phyloseq(
its_otu_tbl,
its_tax_tbl,
csa) %>%
psmelt %>%
as_tibble
its_otu_matrix <-
its_otu_data %>%
pivot_wider(
id_cols = OTU,
names_from = Sample,
values_from = Abundance) %>%
data.frame(row.names = "OTU") %>%
as.matrix
its_tax_table <-
its_otu_data %>%
select(OTU, Kingdom, Phylum, Class, Order, Family, Genus)
ribo_otu_data <-
phyloseq(
ribo_otu_tbl,
ribo_tax_tbl,
TRE("otus.tre", "JQ837894.1.1415"),
csa) %>%
psmelt %>%
as_tibble
ribo_otu_matrix <-
ribo_otu_data %>%
pivot_wider(
id_cols = OTU,
names_from = Sample,
values_from = Abundance) %>%
data.frame(row.names = "OTU") %>%
as.matrix
ribo_tax_table <-
ribo_otu_data %>%
select(OTU, Kingdom, Phylum, Class, Order, Family, Genus)
fit_deseq <- function(otu_matrix, tax_table, test, pval, ...)
{
DESeqDataSetFromMatrix(
countData = otu_matrix,
colData = smp_matrix,
design = ...) %>%
DESeq(test = test) %>%
results %>%
as_tibble(rownames = "OTU") %>%
arrange(pvalue) %>%
filter(pvalue < pval) %>%
left_join(tax_table) %>%
mutate(Model = paste0(as.formula(...), collapse=""))
}
its_sig_differences <-
bind_rows(
fit_deseq(its_otu_matrix, its_tax_table, "Wald", 0.05, ~USDS),
fit_deseq(its_otu_matrix, its_tax_table, "Wald", 0.05, ~USDS+Site_name)) %>%
unique
write_csv(its_sig_differences, "fungal_differences.csv")
ribo_sig_differences <-
bind_rows(
fit_deseq(ribo_otu_matrix, ribo_tax_table, "Wald", 0.05, ~USDS),
fit_deseq(ribo_otu_matrix, ribo_tax_table, "Wald", 0.05, ~USDS+Site_name)) %>%
unique
write_csv(ribo_sig_differences, "bacterial_differences.csv")
GP_16 <-
phyloseq(
ribo_otu_tbl,
ribo_tax_tbl,
TRE("otus.tre", "JQ837894.1.1415"),
csa) %>%
trim_otu_table %>%
transform_sample_counts(function(OTU) OTU/sum(OTU))
GP_ITS <-
phyloseq(
its_otu_tbl,
its_tax_tbl,
csa) %>%
trim_otu_table %>%
transform_sample_counts(function(OTU) OTU/sum(OTU))
ribo_data <-
GP_16 %>%
otu_table %>%
as.data.frame %>%
as_tibble(rownames="OTU") %>%
pivot_longer(
cols = -OTU,
names_to = "Smp",
values_to = "Value") %>%
pivot_wider(
id_cols = Smp,
names_from = "OTU",
values_from = "Value") %>%
left_join(smp_matrix, ., by = c("Sample" = "Smp")) %>%
as_tibble
its_data <-
GP_ITS %>%
otu_table %>%
as.data.frame %>%
as_tibble(rownames="OTU") %>%
pivot_longer(
cols = -OTU,
names_to = "Smp",
values_to = "Value") %>%
pivot_wider(
id_cols = Smp,
names_from = "OTU",
values_from = "Value") %>%
left_join(smp_matrix, ., by = c("Sample" = "Smp")) %>%
as_tibble
adonis(
its_data %>% select(-(1:15)) %>% as.matrix %>% decostand("pa")
~ its_data$USDS,
strata = its_data$Site_name,
method = "bray")
adonis(
its_data %>% select(-(1:15)) %>% as.matrix
~ its_data$USDS,
strata = its_data$Site_name,
method = "bray")
adonis(
ribo_data %>% select(-(1:15)) %>% as.matrix %>% decostand("pa")
~ ribo_data$USDS,
strata = ribo_data$Site_name,
method = "bray")
adonis(
ribo_data %>% select(-(1:15)) %>% as.matrix
~ ribo_data$USDS,
strata = ribo_data$Site_name,
method = "bray")
csa_measurements <-
read_csv("CSA_2013_ngs_ecofunc_mu_191108.csv") %>%
dplyr::rename(
ID=Code_02,
Tensile_strength_loss=`Tensile strength loss`,
Mass_loss=`Mass loss`)
ribo_csa_measurements_matrix <-
GP_16 %>%
otu_table %>%
data.frame %>%
as_tibble(rownames="OTU") %>%
pivot_longer(
cols= -OTU,
names_to="ID",
values_to="Abund") %>%
pivot_wider(
id_cols=ID,
names_from="OTU",
values_from="Abund") %>%
left_join(csa_measurements, .) %>%
filter(complete.cases(.))
FULL.cap <-
capscale(
ribo_csa_measurements_matrix[-c(1:8)] ~ Tensile_strength_loss + Mass_loss + Respiration,
data=ribo_csa_measurements_matrix[1:8])
basplot <-
plot(FULL.cap)
taxonomy <-
GP_16 %>%
tax_table %>%
data.frame %>%
as_tibble(rownames="OTU")
species <-
basplot$species %>%
as_tibble(rownames="OTU") %>%
mutate(dist=sqrt(CAP1^2 + CAP2^2)) %>%
arrange(desc(dist)) %>%
left_join(taxonomy) %>%
head(10)
sites <-
basplot$sites %>%
data.frame %>%
mutate(ID = csa_measurements$ID) %>%
left_join(site_labels) %>%
filter(Rep == 1) %>%
separate(Yaohui_labelling, c("Name", "USDS"), sep="_") %>%
select(-Rep, -Name, -ID_sample)
arrows <-
basplot$biplot %>%
data.frame %>%
as_tibble(rownames="Variable")
mult <-
basplot$biplot %>%
attributes %>%
.$arrow.mul
eigenvals(FULL.cap) / sum(eigenvals(FULL.cap))*100
anova(FULL.cap)
pdf("bacteria_rda.pdf", useDingbats = FALSE)
ggplot() +
geom_point(data=sites, aes(x=CAP1, y=CAP2, shape=USDS, color=Site_name)) +
geom_point(data=species, aes(x=CAP1, y=CAP2)) +
geom_segment(data = arrows,
aes(x = 0, xend = mult * CAP1,
y = 0, yend = mult * CAP2),
arrow = arrow(length = unit(0.25, "cm")), colour = "grey") +
geom_text_repel(data = arrows,
aes(x= (mult + mult/10) * CAP1, y = (mult + mult/10) * CAP2,
label = Variable),
size = 2,
hjust = 0.5) +
geom_text_repel(data = species,
aes(x= CAP1, y = CAP2,
label = Genus),
size = 2,
hjust = 1) +
xlab("CAP1 (17.4%)") +
ylab("CAP2 (10.2%)") +
ggtitle("Y ~ tsl + ml + resp (p = 0.015)")
dev.off()
its_csa_measurements_matrix <-
GP_ITS %>%
otu_table %>%
data.frame %>%
as_tibble(rownames="OTU") %>%
pivot_longer(
cols= -OTU,
names_to="ID",
values_to="Abund") %>%
pivot_wider(
id_cols=ID,
names_from="OTU",
values_from="Abund") %>%
left_join(csa_measurements, .)
filter(complete.cases(.))
FULL.cap <-
capscale(
its_csa_measurements_matrix[-c(1:8)] ~ Tensile_strength_loss + Mass_loss + Respiration,
data=its_csa_measurements_matrix[1:8])
basplot <-
plot(FULL.cap)
taxonomy <-
GP_ITS %>%
tax_table %>%
data.frame
as_tibble(rownames="OTU")
species <-
basplot$species
as_tibble(rownames="OTU")
mutate(dist=sqrt(CAP1^2 + CAP2^2)) %>%
arrange(desc(dist)) %>%
left_join(taxonomy)
head(10)
sites <-
basplot$sites %>%
data.frame %>%
mutate(ID = csa_measurements$ID) %>%
left_join(site_labels) %>%
filter(Rep == 1) %>%
separate(Yaohui_labelling, c("Name", "USDS"), sep="_") %>%
select(-Rep, -Name, -ID_sample)
arrows <-
basplot$biplot %>%
data.frame %>%
as_tibble(rownames="Variable")
mult <-
basplot$biplot %>%
attributes %>%
.$arrow.mul
eigenvals(FULL.cap) / sum(eigenvals(FULL.cap))*100
anova(FULL.cap)
pdf("fungi_rda.pdf", useDingbats = FALSE)
ggplot() +
geom_point(data=sites, aes(x=CAP1, y=CAP2, shape=USDS, color=Site_name)) +
geom_point(data=species, aes(x=CAP1, y=CAP2)) +
geom_segment(data = arrows,
aes(x = 0, xend = mult * CAP1,
y = 0, yend = mult * CAP2),
arrow = arrow(length = unit(0.25, "cm")), colour = "grey") +
geom_text_repel(data = arrows,
aes(x= (mult + mult/10) * CAP1, y = (mult + mult/10) * CAP2,
label = Variable),
size = 2,
hjust = 0.5) +
geom_text_repel(data = species,
aes(x= CAP1, y = CAP2,
label = Class),
size = 2,
hjust = 1) +
xlab("CAP1 (13.8%)") +
ylab("CAP2 (9.8%)") +
ggtitle("Y ~ tsl + ml + resp (p = 0.104)")
dev.off()