references.bib

% Encoding: UTF-8

@Article{angel_total_2012,
  author       = {Angel, Roey},
  title        = {{Total Nucleic Acid Extraction from Soil}},
  pages        = {\{DOI: 10.1038/protex.2011.204.\}},
  issn         = {2043-0116},
  abstract     = {The following protocol is intended for the simultaneous extraction of \{{DNA}\} and \{{RNA}\} from various soil samples along with suggestions on how to tweak the protocol for soil with higher humic content. The protocol has been used by many and results in very high yields of nucleic acids, typically much more than commercial kits. For buffers and solutions used in this protocol, please see accompanying document Buffers and Solutions for \{{TNA}\} Extractions.pdf.},
  date         = {2012-10-23},
  doi          = {10.1038/protex.2012.046},
  journaltitle = {Protocol Exchange},
  keywords     = {Humic acids; {mRNA}; {RNA}; Soil},
}

@Article{callahan_dada2:_2016,
  author       = {Callahan, Benjamin J. and McMurdie, Paul J. and Rosen, Michael J. and Han, Andrew W. and Johnson, Amy Jo A. and Holmes, Susan P.},
  title        = {{{DADA2}}: {{High}}-Resolution Sample Inference from {{Illumina}} Amplicon Data},
  volume       = {13},
  number       = {7},
  pages        = {581--583},
  issn         = {1548-7091},
  abstract     = {We present the open-source software package DADA2 for modeling and correcting Illumina-sequenced amplicon errors (https://github.com/benjjneb/dada2). DADA2 infers sample sequences exactly and resolves differences of as little as 1 nucleotide. In several mock communities, DADA2 identified more real variants and output fewer spurious sequences than other methods. We applied DADA2 to vaginal samples from a cohort of pregnant women, revealing a diversity of previously undetected Lactobacillus crispatus variants.},
  date         = {2016-07},
  doi          = {10.1038/nmeth.3869},
  file         = {:/home/angel/Work/Literature/PDFs/Callahan_et_al_2016_Nat_Meth_supp.pdf:;:/home/angel/Work/Literature/PDFs/Callahan_et_al_2016_Nat_Meth.pdf:;:/home/angel/Work/Literature/Zotero/storage/UWFM6J35/dada2.html:},
  journaltitle = {Nature Methods},
  keywords     = {HT seq,Methods,Bioinfo,Diversity,Stat/Model},
  langid       = {english},
  owner        = {angel},
  rights       = {© 2016 Nature Publishing Group, a division of Macmillan Publishers Limited. All Rights Reserved.},
  shortjournal = {Nat Meth},
  shorttitle   = {{{DADA2}}},
  timestamp    = {2018.01.24},
  url          = {http://www.nature.com/nmeth/journal/v13/n7/full/nmeth.3869.html},
  urldate      = {2016-07-27},
}

@Article{callahan_bioconductor_2016,
  author       = {Callahan, Ben J. and Sankaran, Kris and Fukuyama, Julia A. and McMurdie, Paul J. and Holmes, Susan P.},
  title        = {Bioconductor Workflow for Microbiome Data Analysis: From Raw Reads to Community Analyses},
  volume       = {5},
  pages        = {1492},
  issn         = {2046-1402},
  date         = {2016-11-02},
  doi          = {10.12688/f1000research.8986.2},
  file         = {:/home/angel/Work/Literature/PDFs/Callahan_et_al_2016_F1000Research.pdf:},
  journaltitle = {F1000Research},
  keywords     = {HT seq,Methods,Bioinfo,Review,Diversity,Stat/Model},
  langid       = {english},
  owner        = {angel},
  shortjournal = {F1000Research},
  shorttitle   = {Bioconductor {{Workflow}} for {{Microbiome Data Analysis}}},
  timestamp    = {2018.01.24},
  url          = {https://f1000research.com/articles/5-1492/v2},
  urldate      = {2017-10-11},
}

@Article{chen_gmpr:_2017,
  author       = {Chen, Jun and Chen, Li},
  title        = {{{GMPR}: A novel normalization method for microbiome sequencing data}},
  pages        = {112565},
  abstract     = {Normalization is the first and a critical step in microbiome sequencing (microbiome-Seq) data analysis to account for variable library sizes. Though {RNA}-Seq based normalization methods have been adapted for microbiome-Seq data, they fail to consider the unique characteristics of microbiome-Seq data, which contain a vast number of zeros due to the physical absence or undersampling of the microbes. Normalization methods that specifically address the zeroinflation remain largely undeveloped. Here we propose {GMPR} - a simple but effective normalization method - for zeroinflated sequencing data such as microbiome-Seq data. Simulation studies and analyses of 38 real gut microbiome datasets from 16S {rRNA} gene amplicon sequencing demonstrated the superior performance of the proposed method.},
  date         = {2017-02-28},
  doi          = {10.1101/112565},
  file         = {Chen_Chen_2017_.pdf:/home/angel/Work/Literature/PDFs/Chen_Chen_2017_.pdf:application/pdf;Chen_Chen_2017_supp.pdf:/home/angel/Work/Literature/PDFs/Chen_Chen_2017_supp.pdf:application/pdf},
  journaltitle = {{bioRxiv}},
  keywords     = {{HT} seq; Methods; Bioinfo; Diversity; Stat/Model},
  langid       = {english},
  rights       = {© 2017, Posted by Cold Spring Harbor Laboratory. This pre-print is available under a Creative Commons License (Attribution-{NonCommercial} 4.0 International), {CC} {BY}-{NC} 4.0, as described at http://creativecommons.org/licenses/by-nc/4.0/},
  shorttitle   = {{GMPR}},
  url          = {http://biorxiv.org/content/early/2017/02/28/112565},
  urldate      = {2017-03-06},
}

@Article{davis_simple_2017,
  author       = {Davis, Nicole M. and Proctor, Diana and Holmes, Susan P. and Relman, David A. and Callahan, Benjamin J.},
  title        = {{Simple statistical identification and removal of contaminant sequences in marker-gene and metagenomics data}},
  pages        = {221499},
  abstract     = {The accuracy of microbial community surveys based on marker-gene and metagenomic sequencing ({MGS}) suffers from the presence of contaminants - {DNA} sequences not truly present in the sample. Contaminants come from a variety of sources, including reagents. Appropriate laboratory practices can reduce contamination in {MGS} data, but do not eliminate it. Here we introduce decontam (https://github.com/benjjneb/decontam), an open-source R package which implements a statistical classification procedure for identifying contaminants in {MGS} data. Contaminants are identified on the basis of two widely reproduced signatures: contaminants are more frequent in low-concentration samples, and are often found in negative controls. In a dataset from the human oral microbiome, the classification of amplicon sequence variants by decontam was strongly consistent with prior microscopic observations of microbial taxa in that environment. In both metagenomics and marker-gene measurements of a mock community dilution series, the removal of contaminants identified by decontam substantially reduced technical variation due to differences in reagents and sequencing centers. The application of decontam to two recently published datasets corroborated and extended their conclusions that little evidence existed for an indigenous placenta microbiome, and that some low-frequency taxa seemingly associated with preterm birth were run-specific contaminants. decontam integrates easily with existing {MGS} workflows, and allows researchers to generate more accurate profiles of microbial community composition at little to no additional cost.},
  date         = {2017-11-17},
  doi          = {10.1101/221499},
  file         = {Davis_et_al_2017_.pdf:/home/angel/Documents/Work/Literature/PDFs/Davis_et_al_2017_.pdf:application/pdf},
  journaltitle = {{bioRxiv}},
  keywords     = {{HT} seq; {PCR}; Methods; Bioinfo; Diversity; Stat/Model},
  langid       = {english},
  rights       = {© 2017, Posted by Cold Spring Harbor Laboratory. This pre-print is available under a Creative Commons License (Attribution 4.0 International), {CC} {BY} 4.0, as described at http://creativecommons.org/licenses/by/4.0/},
  url          = {https://www.biorxiv.org/content/early/2017/11/17/221499},
  urldate      = {2017-12-06},
}

@Article{herbold_flexible_2015,
  author       = {Herbold, Craig W. and Pelikan, Claus and Kuzyk, Orest and Hausmann, Bela and Angel, Roey and Berry, David and Loy, Alexander},
  title        = {{A flexible and economical barcoding approach for highly multiplexed amplicon sequencing of diverse target genes}},
  volume       = {6},
  pages        = {731},
  abstract     = {High throughput sequencing of phylogenetic and functional gene amplicons provides tremendous insight into the structure and functional potential of complex microbial communities. Here, we introduce a highly adaptable and economical {PCR} approach to barcoding and pooling libraries of numerous target genes. In this approach, we replace gene- and sequencing platform-specific fusion primers with general, interchangeable barcoding primers, enabling nearly limitless customized barcode-primer combinations. Compared to barcoding with long fusion primers, our multiple-target gene approach is more economical because it overall requires lower number of primers and is based on short primers with generally lower synthesis and purification costs. To highlight our approach, we pooled over 900 different small-subunit {rRNA} and functional gene amplicon libraries obtained from various environmental or host-associated microbial community samples into a single, paired-end Illumina {MiSeq} run. Although the amplicon regions ranged in size from approximately 290 to 720 bp, we found no significant systematic sequencing bias related to amplicon length or gene target. Our results indicate that this flexible multiplexing approach produces large, diverse, and high quality sets of amplicon sequence data for modern studies in microbial ecology.},
  date         = {2015},
  doi          = {10.3389/fmicb.2015.00731},
  file         = {Herbold_et_al_2015_Front_Microbiol.pdf:/home/angel/Dropbox/Shared/Work/Literature/PDFs/Herbold_et_al_2015_Front_Microbiol.pdf:application/pdf},
  journaltitle = {Frontiers in Microbiology},
  shortjournal = {Front Microbiol},
  urldate      = {2015-07-23},
}

@Article{paulson_differential_2013,
  author       = {Paulson, Joseph N. and Stine, O. Colin and Bravo, H{\'e}ctor Corrada and Pop, Mihai},
  title        = {{Differential abundance analysis for microbial marker-gene surveys}},
  volume       = {10},
  number       = {12},
  pages        = {1200--1202},
  issn         = {1548-7091},
  abstract     = {We introduce a methodology to assess differential abundance in sparse high-throughput microbial marker-gene survey data. Our approach, implemented in the {metagenomeSeq} Bioconductor package, relies on a novel normalization technique and a statistical model that accounts for undersampling---a common feature of large-scale marker-gene studies. Using simulated data and several published microbiota data sets, we show that {metagenomeSeq} outperforms the tools currently used in this field.},
  date         = {2013-12},
  doi          = {10.1038/nmeth.2658},
  file         = {Paulson_et_al_2013_Nat_Meth.pdf:/home/angel/Work/Literature/PDFs/Paulson_et_al_2013_Nat_Meth.pdf:application/pdf},
  journaltitle = {Nature Methods},
  langid       = {english},
  rights       = {© 2013 Nature Publishing Group, a division of Macmillan Publishers Limited. All Rights Reserved.},
  shortjournal = {Nat Meth},
  url          = {http://www.nature.com/nmeth/journal/v10/n12/full/nmeth.2658.html%3FWT.ec_id%3DNMETH-201312},
  urldate      = {2016-07-26},
}

@Article{walters_improved_2016,
  author       = {Walters, William and Hyde, Embriette R. and Berg-Lyons, Donna and Ackermann, Gail and Humphrey, Greg and Parada, Alma and Gilbert, Jack A. and Jansson, Janet K. and Caporaso, J. Gregory and Fuhrman, Jed A. and Apprill, Amy and Knight, Rob},
  title        = {{Improved bacterial 16S {rRNA} gene (v4 and v4-5) and fungal internal transcribed spacer marker gene primers for microbial community surveys}},
  volume       = {1},
  number       = {1},
  pages        = {e00009--15},
  issn         = {2379-5077},
  abstract     = {Designing primers for {PCR}-based taxonomic surveys that amplify a broad range of phylotypes in varied community samples is a difficult challenge, and the comparability of data sets amplified with varied primers requires attention. Here, we examined the performance of modified 16S {rRNA} gene and internal transcribed spacer ({ITS}) primers for archaea/bacteria and fungi, respectively, with nonaquatic samples. We moved primer bar codes to the 5′ end, allowing for a range of different 3′ primer pairings, such as the 515f/926r primer pair, which amplifies variable regions 4 and 5 of the 16S {rRNA} gene. We additionally demonstrated that modifications to the 515f/806r (variable region 4) 16S primer pair, which improves detection of Thaumarchaeota and clade {SAR}11 in marine samples, do not degrade performance on taxa already amplified effectively by the original primer set. Alterations to the fungal {ITS} primers did result in differential but overall improved performance compared to the original primers. In both cases, the improved primers should be widely adopted for amplicon studies. {IMPORTANCE} We continue to uncover a wealth of information connecting microbes in important ways to human and environmental ecology. As our scientific knowledge and technical abilities improve, the tools used for microbiome surveys can be modified to improve the accuracy of our techniques, ensuring that we can continue to identify groundbreaking connections between microbes and the ecosystems they populate, from ice caps to the human body. It is important to confirm that modifications to these tools do not cause new, detrimental biases that would inhibit the field rather than continue to move it forward. We therefore demonstrated that two recently modified primer pairs that target taxonomically discriminatory regions of bacterial and fungal genomic {DNA} do not introduce new biases when used on a variety of sample types, from soil to human skin. This confirms the utility of these primers for maintaining currently recommended microbiome research techniques as the state of the art.},
  date         = {2016-02-23},
  doi          = {10.1128/mSystems.00009-15},
  file         = {Walters_et_al_2016_mSystems.pdf:/home/angel/Dropbox/Shared/Work/Literature/PDFs/Walters_et_al_2016_mSystems.pdf:application/pdf},
  journaltitle = {{mSystems}},
  keywords     = {Diversity; {HT} seq; Methods},
  langid       = {english},
  rights       = {Copyright © 2015 Walters et al.. This is an open-access article distributed under the terms of the Creative Commons Attribution 4.0 International license.},
  shortjournal = {{mSystems}},
  url          = {http://msystems.asm.org/content/1/1/e00009-15},
  urldate      = {2016-02-25},
}

@Article{angel_elucidating_2013,
  author       = {Angel, Roey and Conrad, Ralf},
  title        = {Elucidating the Microbial Resuscitation Cascade in Biological Soil Crusts Following a Simulated Rain Event},
  volume       = {15},
  number       = {10},
  pages        = {2799--2815},
  issn         = {1462-2920},
  abstract     = {Biological soil crusts (biocrusts) are photosynthetic mats formed through an association of prokaryotic and eukaryotic microorganisms with soil particles. Biocrusts are found in virtually any terrestrial ecosystem where vascular plant coverage is abiotically limited, with drylands comprising the primary habitat for them. We studied the dynamics of the active bacterial community in two biocrusts from an arid and a hyperarid region in the Negev Desert, Israel, under light-oxic and dark-anoxic incubation conditions after simulated rainfall. We used H218O for hydrating the crusts and analysed the bacterial community in the upper and lower parts of the biocrust using an RNA-stable isotope probing approach coupled with 454-pyrosequencing. In both biocrusts, two distinct bacterial communities developed under each incubation condition. The active anaerobic communities were initially dominated by members of the order Bacillales which were later replaced by Clostridiales. The aerobic communities on the other hand were dominated by Sphingobacteriales and several Alphaproteobacteria (Rhizobiales, Rhodobacterales, Rhodospirillales and Rubrobacteriales). Actinomycetales were the dominant bacterial order in the dry crusts but quickly collapsed and accounted for $<$ 1\% of the community by the end of the incubation. Our study shows that biocrusts host a diverse community whose members display complex interactions as they resuscitate from dormancy.},
  date         = {2013},
  doi          = {10.1111/1462-2920.12140},
  file         = {Angel_Conrad_2013_Environ_Microbiol.pdf:/home/angel/Work/Literature/PDFs/Angel_Conrad_2013_Environ_Microbiol.pdf:application/pdf;Angel_Conrad_2013_Environ_Microbiol2.pdf:/home/angel/Work/Literature/PDFs/Angel_Conrad_2013_Environ_Microbiol2.pdf:application/pdf},
  journaltitle = {Environmental Microbiology},
  keywords     = {HT seq,RNA,Actinobact,Pop dynamics,Mats,Drylands,Soil,Cyanobact,Moisture,SIP/Radiolabel,Diversity},
  langid       = {english},
  rights       = {© 2013 John Wiley \& Sons Ltd and Society for Applied Microbiology},
  shortjournal = {Environ Microbiol},
  timestamp    = {2016-04-12T09:48:39Z},
  url          = {http://onlinelibrary.wiley.com/doi/10.1111/1462-2920.12140/abstract},
  urldate      = {2013-10-08},
}

@Article{fernandes_anova-like_2013,
  author       = {Fernandes, Andrew D. and Macklaim, Jean M. and Linn, Thomas G. and Reid, Gregor and Gloor, Gregory B.},
  title        = {{ANOVA}-Like Differential Expression ({ALDEx}) Analysis for Mixed Population {RNA}-Seq},
  issn         = {1932-6203},
  note         = {{ZSCC}: 0000091},
  number       = {7},
  pages        = {e67019},
  volume       = {8},
  abstract     = {Experimental variance is a major challenge when dealing with high-throughput sequencing data. This variance has several sources: sampling replication, technical replication, variability within biological conditions, and variability between biological conditions. The high per-sample cost of {RNA}-Seq often precludes the large number of experiments needed to partition observed variance into these categories as per standard {ANOVA} models. We show that the partitioning of within-condition to between-condition variation cannot reasonably be ignored, whether in single-organism {RNA}-Seq or in Meta-{RNA}-Seq experiments, and further find that commonly-used {RNA}-Seq analysis tools, as described in the literature, do not enforce the constraint that the sum of relative expression levels must be one, and thus report expression levels that are systematically distorted. These two factors lead to misleading inferences if not properly accommodated. As it is usually only the biological between-condition and within-condition differences that are of interest, we developed {ALDEx}, an {ANOVA}-like differential expression procedure, to identify genes with greater between- to within-condition differences. We show that the presence of differential expression and the magnitude of these comparative differences can be reasonably estimated with even very small sample sizes.},
  date         = {2013-07-02},
  doi          = {10.1371/journal.pone.0067019},
  file         = {Fernandes_et_al_2013_PLOS_ONE.pdf:/home/angel/Dropbox/Shared/Work/Literature/PDFs/Fernandes_et_al_2013_PLOS_ONE.pdf:application/pdf;Fernandes_et_al_2013_PLOS_ONE.pdf:/home/angel/Dropbox/Shared/Work/Literature/PDFs/Fernandes_et_al_2013_PLOS_ONE.pdf:application/pdf},
  journaltitle = {{PLOS} {ONE}},
  keywords     = {{RNA}, Methods, Diversity, Stat/Model, Meta-omics},
  langid       = {english},
  shortjournal = {{PLOS} {ONE}},
  url          = {http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0067019},
  urldate      = {2018-06-07},
}

@Article{martin_modeling_2020,
  author       = {Martin, Bryan D. and Witten, Daniela and Willis, Amy D.},
  title        = {Modeling Microbial Abundances and Dysbiosis with Beta-Binomial Regression},
  issn         = {1932-6157, 1941-7330},
  number       = {1},
  pages        = {94--115},
  volume       = {14},
  abstract     = {Using a sample from a population to estimate the proportion of the population with a certain category label is a broadly important problem. In the context of microbiome studies, this problem arises when researchers wish to use a sample from a population of microbes to estimate the population proportion of a particular taxon, known as the taxon’s relative abundance. In this paper, we propose a beta-binomial model for this task. Like existing models, our model allows for a taxon’s relative abundance to be associated with covariates of interest. However, unlike existing models, our proposal also allows for the overdispersion in the taxon’s counts to be associated with covariates of interest. We exploit this model in order to propose tests not only for differential relative abundance, but also for differential variability. The latter is particularly valuable in light of speculation that dysbiosis, the perturbation from a normal microbiome that can occur in certain disease conditions, may manifest as a loss of stability, or increase in variability, of the counts associated with each taxon. We demonstrate the performance of our proposed model using a simulation study and an application to soil microbial data.},
  annotation   = {ZSCC: 0000013},
  date         = {2020-03},
  doi          = {10.1214/19-AOAS1283},
  file         = {Martin_et_al_2020_Ann_Appl_Stat.pdf:/home/angel/Dropbox/Shared/Work/Literature/PDFs/Martin_et_al_2020_Ann_Appl_Stat.pdf:application/pdf},
  journaltitle = {Annals of Applied Statistics},
  langid       = {english},
  mrnumber     = {MR4085085},
  publisher    = {{Institute of Mathematical Statistics}},
  shortjournal = {Ann Appl Stat},
  timestamp    = {2020-09-26T13:04:15Z},
  url          = {https://projecteuclid.org/euclid.aoas/1587002666},
  urldate      = {2020-09-26},
  zmnumber     = {07200163},
}

@Article{mcmurdie_phyloseq_2013,
  author       = {McMurdie, Paul J. and Holmes, Susan},
  title        = {Phyloseq: {{An R Package}} for {{Reproducible Interactive Analysis}} and {{Graphics}} of {{Microbiome Census Data}}},
  issn         = {1932-6203},
  number       = {4},
  pages        = {e61217},
  volume       = {8},
  abstract     = {Background The analysis of microbial communities through DNA sequencing brings many challenges: the integration of different types of data with methods from ecology, genetics, phylogenetics, multivariate statistics, visualization and testing. With the increased breadth of experimental designs now being pursued, project-specific statistical analyses are often needed, and these analyses are often difficult (or impossible) for peer researchers to independently reproduce. The vast majority of the requisite tools for performing these analyses reproducibly are already implemented in R and its extensions (packages), but with limited support for high throughput microbiome census data. Results Here we describe a software project, phyloseq, dedicated to the object-oriented representation and analysis of microbiome census data in R. It supports importing data from a variety of common formats, as well as many analysis techniques. These include calibration, filtering, subsetting, agglomeration, multi-table comparisons, diversity analysis, parallelized Fast UniFrac, ordination methods, and production of publication-quality graphics; all in a manner that is easy to document, share, and modify. We show how to apply functions from other R packages to phyloseq-represented data, illustrating the availability of a large number of open source analysis techniques. We discuss the use of phyloseq with tools for reproducible research, a practice common in other fields but still rare in the analysis of highly parallel microbiome census data. We have made available all of the materials necessary to completely reproduce the analysis and figures included in this article, an example of best practices for reproducible research. Conclusions The phyloseq project for R is a new open-source software package, freely available on the web from both GitHub and Bioconductor.},
  date         = {2013-04-22},
  doi          = {10.1371/journal.pone.0061217},
  file         = {McMurdie_Holmes_2013_PLOS_ONE.pdf:/home/angel/Dropbox/Shared/Work/Literature/PDFs/McMurdie_Holmes_2013_PLOS_ONE.pdf:application/pdf},
  journaltitle = {PLOS ONE},
  keywords     = {Bioinfo,Diversity,Meta-omics,Methods,Stat/Model},
  langid       = {english},
  shortjournal = {PLOS ONE},
  shorttitle   = {Phyloseq},
  timestamp    = {2018-07-10T11:29:05Z},
  url          = {http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0061217},
  urldate      = {2018-07-10},
}

@Comment{jabref-meta: databaseType:bibtex;}