forked from YuLab-SMU/treedata-book
-
Notifications
You must be signed in to change notification settings - Fork 0
/
references.bib
3235 lines (2993 loc) · 291 KB
/
references.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
@article{yu_gosemsim_2020,
title = {Gene Ontology Semantic Similarity Analysis Using {GOSemSim}},
volume = {2117},
issn = {1940-6029},
doi = {10.1007/978-1-0716-0301-7_11},
abstract = {The {GOSemSim} package, an R-based tool within the Bioconductor project, offers several methods based on information content and graph structure for measuring semantic similarity among {GO} terms, gene products and gene clusters. In this chapter, I illustrate the use of {GOSemSim} on a list of regulators in preimplantation embryos. A step-by-step analysis was provided as well as instructions on interpretation and visualization of the results. {GOSemSim} is open-source and is available from https://www.bioconductor.org/packages/{GOSemSim} .},
pages = {207--215},
journaltitle = {Methods in Molecular Biology (Clifton, N.J.)},
shortjournal = {Methods Mol. Biol.},
author = {Yu, Guangchuang},
date = {2020},
pmid = {31960380},
keywords = {Functional prediction, Gene ontology, {GOSemSim}, Reproducible research, Semantic similarity}
}
@article{yu_dose_2015,
title = {{DOSE}: an R/Bioconductor package for disease ontology semantic and enrichment analysis},
volume = {31},
issn = {1367-4803, 1460-2059},
url = {http://bioinformatics.oxfordjournals.org.eproxy2.lib.hku.hk/content/31/4/608},
doi = {10.1093/bioinformatics/btu684},
shorttitle = {{DOSE}},
abstract = {Summary: Disease ontology ({DO}) annotates human genes in the context of disease. {DO} is important annotation in translating molecular findings from high-throughput data to clinical relevance. {DOSE} is an R package providing semantic similarity computations among {DO} terms and genes which allows biologists to explore the similarities of diseases and of gene functions in disease perspective. Enrichment analyses including hypergeometric model and gene set enrichment analysis are also implemented to support discovering disease associations of high-throughput biological data. This allows biologists to verify disease relevance in a biological experiment and identify unexpected disease associations. Comparison among gene clusters is also supported.
Availability and implementation: {DOSE} is released under Artistic-2.0 License. The source code and documents are freely available through Bioconductor (http://www.bioconductor.org/packages/release/bioc/html/{DOSE}.html).
Supplementary information: Supplementary data are available at Bioinformatics online.
Contact: gcyu@connect.hku.hk or tqyhe@jnu.edu.cn},
pages = {608--609},
number = {4},
journaltitle = {Bioinformatics},
shortjournal = {Bioinformatics},
author = {Yu, Guangchuang and Wang, Li-Gen and Yan, Guang-Rong and He, Qing-Yu},
urldate = {2015-02-13},
date = {2015-02-15},
langid = {english}
}
@article{yu2012,
title = {{clusterProfiler:} an R Package for Comparing Biological Themes Among Gene Clusters},
volume = {16},
issn = {1536-2310, 1557-8100},
shorttitle = {{clusterProfiler}},
url = {http://online.liebertpub.com/doi/abs/10.1089/omi.2011.0118},
doi = {10.1089/omi.2011.0118},
number = {5},
urldate = {2012-05-05},
journal = {{OMICS:} A Journal of Integrative Biology},
author = {Yu, Guangchuang and Wang, Li-Gen and Han, Yanyan and He, Qing-Yu},
month = may,
year = {2012},
pages = {284--287},
file = {2012-OMICS-clusterProfiler.pdf:/Volumes/YGC/MyZotero/storage/3ZEKB39T/2012-OMICS-clusterProfiler.pdf:application/pdf;clusterProfiler: an R Package for Comparing Biological Themes Among Gene Clusters | Abstract:/Volumes/YGC/MyZotero/storage/WPTJGAT7/omi.2011.html:text/html}
}
@article{yu_reactomepa_2016,
title = {{ReactomePA}: an R/Bioconductor package for reactome pathway analysis and visualization},
volume = {12},
issn = {1742-2051},
url = {http://pubs.rsc.org.eproxy2.lib.hku.hk/en/content/articlelanding/2016/mb/c5mb00663e},
doi = {10.1039/C5MB00663E},
shorttitle = {{ReactomePA}},
abstract = {Reactome is a manually curated pathway annotation database for unveiling high-order biological pathways from high-throughput data. {ReactomePA} is an R/Bioconductor package providing enrichment analyses, including hypergeometric test and gene set enrichment analyses. A functional analysis can be applied to the genomic coordination obtained from a sequencing experiment to analyze the functional significance of genomic loci including cis-regulatory elements and non-coding regions. Comparison among different experiments is also supported. Moreover, {ReactomePA} provides several visualization functions to produce highly customizable, publication-quality figures. The source code and documents of {ReactomePA} are freely available through Bioconductor (http://www.bioconductor.org/packages/{ReactomePA}).},
pages = {477--479},
number = {2},
journaltitle = {Molecular {BioSystems}},
shortjournal = {Mol. {BioSyst}.},
author = {Yu, Guangchuang and He, Qing-Yu},
urldate = {2016-02-17},
date = {2016-01-26},
langid = {english}
}
@article{ggtreeExtra_2021,
author = {Xu, Shuangbin and Dai, Zehan and Guo, Pingfan and Fu, Xiaocong and Liu, Shanshan and Zhou, Lang and Tang, Wenli and Feng, Tingze and Chen, Meijun and Zhan, Li and Wu, Tianzhi and Hu, Erqiang and Jiang, Yong and Bo, Xiaochen and Yu, Guangchuang},
title = "{ggtreeExtra: Compact Visualization of Richly Annotated Phylogenetic Data}",
journal = {Molecular Biology and Evolution},
volume = {38},
number = {9},
pages = {4039-4042},
year = {2021},
month = {06},
abstract = "{We present the ggtreeExtra package for visualizing heterogeneous data with a phylogenetic tree in a circular or rectangular layout (https://www.bioconductor.org/packages/ggtreeExtra). The package supports more data types and visualization methods than other tools. It supports using the grammar of graphics syntax to present data on a tree with richly annotated layers and allows evolutionary statistics inferred by commonly used software to be integrated and visualized with external data. GgtreeExtra is a universal tool for tree data visualization. It extends the applications of the phylogenetic tree in different disciplines by making more domain-specific data to be available to visualize and interpret in the evolutionary context.}",
issn = {0737-4038},
doi = {10.1093/molbev/msab166},
url = {https://doi.org/10.1093/molbev/msab166},
eprint = {https://academic.oup.com/mbe/article-pdf/38/9/4039/39882875/msab166.pdf},
}
@article{ggbreak,
title = {Use ggbreak to effectively utilize plotting space to deal with large datasets and outliers.},
author = {Xu, Shuangbin and Chen, Meijun and Feng, Tingze and Zhan Li and Zhou Lang and Yu, Guangchuang},
year = {2021},
journal = {Frontiers in Genetics},
volume = {12},
pages = {774846},
doi = {10.3389/fgene.2021.774846},
}
@article{yu_cp_2020,
title = {Using ggtree to Visualize Data on Tree-Like Structures},
volume = {69},
rights = {© 2020 John Wiley \& Sons, Inc.},
issn = {1934-340X},
url = {https://currentprotocols.onlinelibrary.wiley.com/doi/abs/10.1002/cpbi.96},
doi = {10.1002/cpbi.96},
abstract = {Ggtree is an R/Bioconductor package for visualizing tree-like structures and associated data. After 5 years of continual development, ggtree has been evolved as a package suite that contains treeio for tree data input and output, tidytree for tree data manipulation, and ggtree for tree data visualization. Ggtree was originally designed to work with phylogenetic trees, and has been expanded to support other tree-like structures, which extends the application of ggtree to present tree data in other disciplines. This article contains five basic protocols describing how to visualize trees using the grammar of graphics syntax, how to visualize hierarchical clustering results with associated data, how to estimate bootstrap values and visualize the values on the tree, how to estimate continuous and discrete ancestral traits and visualize ancestral states on the tree, and how to visualize a multiple sequence alignment with a phylogenetic tree. The ggtree package is freely available at https://www.bioconductor.org/packages/ggtree. © 2020 by John Wiley \& Sons, Inc. Basic Protocol 1: Using grammar of graphics for visualizing trees Basic Protocol 2: Visualizing hierarchical clustering using ggtree Basic Protocol 3: Visualizing bootstrap values as symbolic points Basic Protocol 4: Visualizing ancestral status Basic Protocol 5: Visualizing a multiple sequence alignment with a phylogenetic tree},
pages = {e96},
number = {1},
journaltitle = {Current Protocols in Bioinformatics},
author = {Yu, Guangchuang},
urldate = {2020-03-06},
date = {2020},
langid = {english},
keywords = {grammar of graphics, phylogeny, tree associated data, tree structure, visualization}
}
@article{wang_treeio_2020,
title = {Treeio: An R Package for Phylogenetic Tree Input and Output with Richly Annotated and Associated Data},
volume = {37},
issn = {0737-4038},
url = {https://academic.oup.com/mbe/article/37/2/599/5601621},
doi = {10.1093/molbev/msz240},
shorttitle = {Treeio},
abstract = {Abstract. Phylogenetic trees and data are often stored in incompatible and inconsistent formats. The outputs of software tools that contain trees with analysis},
pages = {599--603},
number = {2},
journaltitle = {Molecular Biology and Evolution},
shortjournal = {Mol Biol Evol},
author = {Wang, Li-Gen and Lam, Tommy Tsan-Yuk and Xu, Shuangbin and Dai, Zehan and Zhou, Lang and Feng, Tingze and Guo, Pingfan and Dunn, Casey W. and Jones, Bradley R. and Bradley, Tyler and Zhu, Huachen and Guan, Yi and Jiang, Yong and Yu, Guangchuang},
urldate = {2020-02-18},
date = {2020-02-01},
langid = {english}
}
@article{yu_two_2018,
title = {Two Methods for Mapping and Visualizing Associated Data on Phylogeny Using Ggtree},
volume = {35},
issn = {0737-4038},
url = {https://academic.oup.com/mbe/article/35/12/3041/5142656},
doi = {10.1093/molbev/msy194},
abstract = {Abstract. Ggtree is a comprehensive R package for visualizing and annotating phylogenetic trees with associated data. It can also map and visualize associated},
pages = {3041--3043},
number = {12},
journaltitle = {Molecular Biology and Evolution},
shortjournal = {Mol Biol Evol},
author = {Yu, Guangchuang and Lam, Tommy Tsan-Yuk and Zhu, Huachen and Guan, Yi},
urldate = {2019-01-03},
date = {2018-12-01},
langid = {english}
}
@article{yu_ggtree:_2017,
title = {ggtree: an r package for visualization and annotation of phylogenetic trees with their covariates and other associated data},
volume = {8},
issn = {2041-210X},
shorttitle = {ggtree},
url = {http://onlinelibrary.wiley.com.eproxy2.lib.hku.hk/doi/10.1111/2041-210X.12628/abstract},
doi = {10.1111/2041-210X.12628},
language = {en},
number = {1},
urldate = {2017-03-07},
journal = {Methods in Ecology and Evolution},
author = {Yu, Guangchuang and Smith, David K. and Zhu, Huachen and Guan, Yi and Lam, Tommy Tsan-Yuk},
month = jan,
year = {2017},
keywords = {annotation, bioconductor, Evolution, Phylogeny, r package, visualization},
pages = {28--36}
}
@article{segata_metagenomic_2011,
title = {Metagenomic biomarker discovery and explanation},
volume = {12},
issn = {1465-6906},
url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3218848/},
doi = {10.1186/gb-2011-12-6-r60},
abstract = {This study describes and validates a new method for metagenomic biomarker discovery by way of class comparison, tests of biological consistency and effect size estimation. This addresses the challenge of finding organisms, genes, or pathways that consistently explain the differences between two or more microbial communities, which is a central problem to the study of metagenomics. We extensively validate our method on several microbiomes and a convenient online interface for the method is provided at http://huttenhower.sph.harvard.edu/lefse/.},
pages = {R60},
number = {6},
journaltitle = {Genome Biology},
shortjournal = {Genome Biol},
author = {Segata, Nicola and Izard, Jacques and Waldron, Levi and Gevers, Dirk and Miropolsky, Larisa and Garrett, Wendy S and Huttenhower, Curtis},
urldate = {2020-11-25},
date = {2011},
pmid = {21702898},
pmcid = {PMC3218848}
}
@article{escudero_grand_2020,
title = {The grand sweep of chromosomal evolution in angiosperms},
volume = {228},
issn = {1469-8137},
url = {https://onlinelibrary.wiley.com/doi/abs/10.1111/nph.16802},
doi = {10.1111/nph.16802},
abstract = {This article is a Commentary on Carta et al. (2020), 228: 1097–1106.},
pages = {805--808},
number = {3},
journaltitle = {New Phytologist},
author = {Escudero, Marcial and Wendel, Jonathan F.},
urldate = {2021-11-01},
date = {2020},
note = {\_eprint: https://onlinelibrary.wiley.com/doi/pdf/10.1111/nph.16802},
keywords = {angiosperms, chromosome number, dysploidy, genome down-sizing, genome size, karyotype evolution, phylogenetics, polyploidy},
file = {Full Text PDF:/data/Zotero/storage/86HVD5PJ/Escudero and Wendel - 2020 - The grand sweep of chromosomal evolution in angios.pdf:application/pdf}
}
@article{michonneau_rotl:_2016,
title = {rotl: an R package to interact with the Open Tree of Life data},
volume = {7},
issn = {2041-210X},
url = {https://besjournals.onlinelibrary.wiley.com/doi/full/10.1111/2041-210X.12593},
doi = {10.1111/2041-210X.12593},
shorttitle = {rotl},
abstract = {Summary While phylogenies have been getting easier to build, it has been difficult to reuse, combine and synthesize the information they provide because published trees are often only available as image files, and taxonomic information is not standardized across studies. The Open Tree of Life ({OTL}) project addresses these issues by providing a digital tree that encompasses all organisms, built by combining taxonomic information and published phylogenies. The project also provides tools and services to query and download parts of this synthetic tree, as well as the source data used to build it. Here, we present rotl, an R package to search and download data from the Open Tree of Life directly in R. rotl uses common data structures allowing researchers to take advantage of the rich set of tools and methods that are available in R to manipulate, analyse and visualize phylogenies. Here, and in the vignettes accompanying the package, we demonstrate how rotl can be used with other R packages to analyse biodiversity data. As phylogenies are being used in a growing number of applications, rotl facilitates access to phylogenetic data and allows their integration with statistical methods and data sources available in R.},
pages = {1476--1481},
number = {12},
journaltitle = {Methods in Ecology and Evolution},
shortjournal = {Methods in Ecology and Evolution},
author = {Michonneau, François and Brown, Joseph W. and Winter, David J.},
urldate = {2019-06-05},
date = {2016-12-01},
keywords = {phylogenetics, comparative methods, macroevolution, Open Tree of Life}
}
@article{chen_ancient_2017,
title = {Ancient Evolution and Dispersion of Human Papillomavirus 58 Variants},
volume = {91},
issn = {0022-538X, 1098-5514},
url = {http://jvi.asm.org/content/91/21/e01285-17},
doi = {10.1128/JVI.01285-17},
abstract = {Human papillomavirus 58 ({HPV}58) is found in 10 to 18\% of cervical cancers in East Asia but is rather uncommon elsewhere. The distribution and oncogenic potential of {HPV}58 variants appear to be heterogeneous, since the E7 T20I/G63S variant is more prevalent in East Asia and confers a 7- to 9-fold-higher risk of cervical precancer and cancer. However, the underlying genomic mechanisms that explain the geographic and carcinogenic diversity of {HPV}58 variants are still poorly understood. In this study, we used a combination of phylogenetic analyses and bioinformatics to investigate the deep evolutionary history of {HPV}58 complete genome variants. The initial splitting of {HPV}58 variants was estimated to occur 478,600 years ago (95\% highest posterior density [{HPD}], 391,000 to 569,600 years ago). This divergence time is well within the era of speciation between Homo sapiens and Neanderthals/Denisovans and around three times longer than the modern Homo sapiens divergence times. The expansion of present-day variants in Eurasia could be the consequence of viral transmission from Neanderthals/Denisovans to non-African modern human populations through gene flow. A whole-genome sequence signature analysis identified 3 amino acid changes, 16 synonymous nucleotide changes, and a 12-bp insertion strongly associated with the E7 T20I/G63S variant that represents the A3 sublineage and carries higher carcinogenetic potential. Compared with the capsid proteins, the oncogenes E7 and E6 had increased substitution rates indicative of higher selection pressure. These data provide a comprehensive evolutionary history and genomic basis of {HPV}58 variants to assist further investigation of carcinogenic association and the development of diagnostic and therapeutic strategies.
{IMPORTANCE} Papillomaviruses ({PVs}) are an ancient and heterogeneous group of double-stranded {DNA} viruses that preferentially infect the cutaneous and mucocutaneous epithelia of vertebrates. Persistent infection by specific oncogenic human papillomaviruses ({HPVs}), including {HPV}58, has been established as the primary cause of cervical cancer. In this work, we reveal the complex evolutionary history of {HPV}58 variants that explains the heterogeneity of oncogenic potential and geographic distribution. Our data suggest that {HPV}58 variants may have coevolved with archaic hominins and dispersed across the planet through host interbreeding and gene flow. Certain genes and codons of {HPV}58 variants representing higher carcinogenic potential and/or that are under positive selection may have important implications for viral host specificity, pathogenesis, and disease prevention.},
pages = {e01285--17},
number = {21},
journaltitle = {Journal of Virology},
shortjournal = {J. Virol.},
author = {Chen, Zigui and Ho, Wendy C. S. and Boon, Siaw Shi and Law, Priscilla T. Y. and Chan, Martin C. W. and {DeSalle}, Rob and Burk, Robert D. and Chan, Paul K. S.},
urldate = {2018-07-10},
date = {2017-11-01},
langid = {english},
pmid = {28794033},
keywords = {cervical cancer, evolution, {HPV}58, oncogenicity, papillomavirus, virus-host codivergence},
file = {Full Text PDF:/Volumes/HOME/Zotero/storage/XXBU2AQL/Chen et al. - 2017 - Ancient Evolution and Dispersion of Human Papillom.pdf:application/pdf;Snapshot:/Volumes/HOME/Zotero/storage/XGGC2RCD/e01285-17.html:text/html}
}
@article{lott_covenntree:_2015,
title = {{CoVennTree}: a new method for the comparative analysis of large datasets},
volume = {6},
issn = {1664-8021},
doi = {10.3389/fgene.2015.00043},
shorttitle = {{CoVennTree}},
abstract = {The visualization of massive datasets, such as those resulting from comparative metatranscriptome analyses or the analysis of microbial population structures using ribosomal {RNA} sequences, is a challenging task. We developed a new method called {CoVennTree} (Comparative weighted Venn Tree) that simultaneously compares up to three multifarious datasets by aggregating and propagating information from the bottom to the top level and produces a graphical output in Cytoscape. With the introduction of weighted Venn structures, the contents and relationships of various datasets can be correlated and simultaneously aggregated without losing information. We demonstrate the suitability of this approach using a dataset of 16S {rDNA} sequences obtained from microbial populations at three different depths of the Gulf of Aqaba in the Red Sea. {CoVennTree} has been integrated into the Galaxy {ToolShed} and can be directly downloaded and integrated into the user instance.},
pages = {43},
journaltitle = {Frontiers in Genetics},
shortjournal = {Front Genet},
author = {Lott, Steffen C. and Voß, Björn and Hess, Wolfgang R. and Steglich, Claudia},
date = {2015},
pmid = {25750651},
pmcid = {PMC4335276},
keywords = {{CoVennTree}, massive comparative analysis, rooted tree, {VDS} value, weighted Venn diagram}
}
@article{grubaugh_genomic_2017,
title = {Genomic epidemiology reveals multiple introductions of Zika virus into the United States},
volume = {546},
rights = {2017 Nature Publishing Group},
issn = {1476-4687},
url = {https://www.nature.com/articles/nature22400},
doi = {10.1038/nature22400},
abstract = {Zika virus ({ZIKV}) is causing an unprecedented epidemic linked to severe congenital abnormalities1,2. In July 2016, mosquito-borne {ZIKV} transmission was reported in the continental United States; since then, hundreds of locally acquired infections have been reported in Florida3,4. To gain insights into the timing, source, and likely route(s) of {ZIKV} introduction, we tracked the virus from its first detection in Florida by sequencing {ZIKV} genomes from infected patients and Aedes aegypti mosquitoes. We show that at least 4 introductions, but potentially as many as 40, contributed to the outbreak in Florida and that local transmission is likely to have started in the spring of 2016—several months before its initial detection. By analysing surveillance and genetic data, we show that {ZIKV} moved among transmission zones in Miami. Our analyses show that most introductions were linked to the Caribbean, a finding corroborated by the high incidence rates and traffic volumes from the region into the Miami area. Our study provides an understanding of how {ZIKV} initiates transmission in new regions.},
pages = {401--405},
number = {7658},
journaltitle = {Nature},
author = {Grubaugh, Nathan D. and Ladner, Jason T. and Kraemer, Moritz U. G. and Dudas, Gytis and Tan, Amanda L. and Gangavarapu, Karthik and Wiley, Michael R. and White, Stephen and Thézé, Julien and Magnani, Diogo M. and Prieto, Karla and Reyes, Daniel and Bingham, Andrea M. and Paul, Lauren M. and Robles-Sikisaka, Refugio and Oliveira, Glenn and Pronty, Darryl and Barcellona, Carolyn M. and Metsky, Hayden C. and Baniecki, Mary Lynn and Barnes, Kayla G. and Chak, Bridget and Freije, Catherine A. and Gladden-Young, Adrianne and Gnirke, Andreas and Luo, Cynthia and {MacInnis}, Bronwyn and Matranga, Christian B. and Park, Daniel J. and Qu, James and Schaffner, Stephen F. and Tomkins-Tinch, Christopher and West, Kendra L. and Winnicki, Sarah M. and Wohl, Shirlee and Yozwiak, Nathan L. and Quick, Joshua and Fauver, Joseph R. and Khan, Kamran and Brent, Shannon E. and Jr, Robert C. Reiner and Lichtenberger, Paola N. and Ricciardi, Michael J. and Bailey, Varian K. and Watkins, David I. and Cone, Marshall R. and Iv, Edgar W. Kopp and Hogan, Kelly N. and Cannons, Andrew C. and Jean, Reynald and Monaghan, Andrew J. and Garry, Robert F. and Loman, Nicholas J. and Faria, Nuno R. and Porcelli, Mario C. and Vasquez, Chalmers and Nagle, Elyse R. and Cummings, Derek A. T. and Stanek, Danielle and Rambaut, Andrew and Sanchez-Lockhart, Mariano and Sabeti, Pardis C. and Gillis, Leah D. and Michael, Scott F. and Bedford, Trevor and Pybus, Oliver G. and Isern, Sharon and Palacios, Gustavo and Andersen, Kristian G.},
urldate = {2018-07-18},
date = {2017-06},
langid = {english}
}
@article{kumar_mega7_2016,
title = {{MEGA}7: Molecular Evolutionary Genetics Analysis Version 7.0 for Bigger Datasets},
volume = {33},
issn = {1537-1719},
doi = {10.1093/molbev/msw054},
shorttitle = {{MEGA}7},
abstract = {We present the latest version of the Molecular Evolutionary Genetics Analysis (Mega) software, which contains many sophisticated methods and tools for phylogenomics and phylomedicine. In this major upgrade, Mega has been optimized for use on 64-bit computing systems for analyzing larger datasets. Researchers can now explore and analyze tens of thousands of sequences in Mega The new version also provides an advanced wizard for building timetrees and includes a new functionality to automatically predict gene duplication events in gene family trees. The 64-bit Mega is made available in two interfaces: graphical and command line. The graphical user interface ({GUI}) is a native Microsoft Windows application that can also be used on Mac {OS} X. The command line Mega is available as native applications for Windows, Linux, and Mac {OS} X. They are intended for use in high-throughput and scripted analysis. Both versions are available from www.megasoftware.net free of charge.},
pages = {1870--1874},
number = {7},
journaltitle = {Molecular Biology and Evolution},
shortjournal = {Mol. Biol. Evol.},
author = {Kumar, Sudhir and Stecher, Glen and Tamura, Koichiro},
date = {2016},
pmid = {27004904},
keywords = {Algorithms, Biological Evolution, Databases, Genetic, Datasets as Topic, Evolution, Molecular, evolution., gene families, Internet, Phylogeny, Sequence Alignment, Sequence Analysis, software, Software, timetree, User-Computer Interface}
}
@article{schliep_phangorn_2011,
title = {phangorn: phylogenetic analysis in R},
volume = {27},
url = {http://bioinformatics.oxfordjournals.org/content/27/4/592.abstract},
doi = {10.1093/bioinformatics/btq706},
shorttitle = {phangorn},
abstract = {Summary: phangorn is a package for phylogenetic reconstruction and analysis in the R language. Previously it was only possible to estimate phylogenetic trees with distance methods in R. phangorn, now offers the possibility of reconstructing phylogenies with distance based methods, maximum parsimony or maximum likelihood ({ML}) and performing Hadamard conjugation. Extending the general {ML} framework, this package provides the possibility of estimating mixture and partition models. Furthermore, phangorn offers several functions for comparing trees, phylogenetic models or splits, simulating character data and performing congruence analyses.Availability: phangorn can be obtained through the {CRAN} homepage http://cran.r-project.org/web/packages/phangorn/index.html. phangorn is licensed under {GPL} 2.Contact: klaus.kschliep@snv.jussieu.{frSupplementary} information: Supplementary data are available at Bioinformatics online.},
pages = {592--593},
number = {4},
journaltitle = {Bioinformatics},
author = {Schliep, Klaus Peter},
urldate = {2011-03-05},
date = {2011-02-15}
}
@article{sanderson_r8s:_2003,
title = {r8s: inferring absolute rates of molecular evolution and divergence times in the absence of a molecular clock},
volume = {19},
issn = {1367-4803, 1460-2059},
shorttitle = {r8s},
url = {http://bioinformatics.oxfordjournals.org./content/19/2/301},
doi = {10.1093/bioinformatics/19.2.301},
language = {en},
number = {2},
urldate = {2015-09-07},
journal = {Bioinformatics},
author = {Sanderson, Michael J.},
month = jan,
year = {2003},
pmid = {12538260},
pages = {301--302}
}
@article{vos_nexml:_2012,
title = {{NeXML}: rich, extensible, and verifiable representation of comparative data and metadata},
volume = {61},
issn = {1076-836X},
doi = {10.1093/sysbio/sys025},
shorttitle = {{NeXML}},
abstract = {In scientific research, integration and synthesis require a common understanding of where data come from, how much they can be trusted, and what they may be used for. To make such an understanding computer-accessible requires standards for exchanging richly annotated data. The challenges of conveying reusable data are particularly acute in regard to evolutionary comparative analysis, which comprises an ever-expanding list of data types, methods, research aims, and subdisciplines. To facilitate interoperability in evolutionary comparative analysis, we present {NeXML}, an {XML} standard (inspired by the current standard, {NEXUS}) that supports exchange of richly annotated comparative data. {NeXML} defines syntax for operational taxonomic units, character-state matrices, and phylogenetic trees and networks. Documents can be validated unambiguously. Importantly, any data element can be annotated, to an arbitrary degree of richness, using a system that is both flexible and rigorous. We describe how the use of {NeXML} by the {TreeBASE} and Phenoscape projects satisfies user needs that cannot be satisfied with other available file formats. By relying on {XML} Schema Definition, the design of {NeXML} facilitates the development and deployment of software for processing, transforming, and querying documents. The adoption of {NeXML} for practical use is facilitated by the availability of (1) an online manual with code samples and a reference to all defined elements and attributes, (2) programming toolkits in most of the languages used commonly in evolutionary informatics, and (3) input-output support in several widely used software applications. An active, open, community-based development process enables future revision and expansion of {NeXML}.},
pages = {675--689},
number = {4},
journaltitle = {Systematic Biology},
shortjournal = {Syst. Biol.},
author = {Vos, Rutger A. and Balhoff, James P. and Caravas, Jason A. and Holder, Mark T. and Lapp, Hilmar and Maddison, Wayne P. and Midford, Peter E. and Priyam, Anurag and Sukumaran, Jeet and Xia, Xuhua and Stoltzfus, Arlin},
date = {2012-07},
pmid = {22357728},
pmcid = {PMC3376374},
keywords = {Computational Biology, Software, Models, Biological, Programming Languages, Classification, Informatics, Phylogeny, Biological Evolution, Biodiversity}
}
@article{liang_expansion_2014,
title = {Expansion of genotypic diversity and establishment of 2009 {H}1N1 pandemic-origin internal genes in pigs in {China}},
issn = {0022-538X, 1098-5514},
url = {http://jvi.asm.org.eproxy1.lib.hku.hk/content/early/2014/07/03/JVI.01327-14},
doi = {10.1128/JVI.01327-14},
abstract = {‘Two-way' transmission of influenza viruses between humans and swine has been frequently observed and the occurrence of the 2009 H1N1 pandemic influenza (pdm/09) demonstrated that swine-origin viruses could facilitate the genesis of a pandemic strain. Although multiple introductions to and reassortment in swine of the pdm/09 virus have been repeatedly reported in both Eurasia and the Americas, its long-term impact on the development of swine influenza viruses (SIVs) has not been systematically explored. Our comprehensive evolutionary studies on the complete genomes of 387 SIVs obtained from 2009 to 2012 in influenza surveillance in China revealed 17 reassortant genotypes with pdm/09-origin genes. Even though the entire 2009 pandemic virus and its surface genes cannot persist, its internal genes have becoming established and are now the predominant lineages in pigs in the region. The main persistent pdm/09-origin reassortant forms had at least 5 pdm/09-origin internal genes and their surface genes primarily of European avian-like (EA) or human H3N2-like SIV origin. These findings represent a marked change to the evolutionary patterns and ecosystem of SIVs in China. It is possible that the pdm/09-origin internal genes may be in the process of replacing EA- or triple reassortant-like internal genes. These alterations to the SIV gene pool need to be continually monitored to assess changes in the potential for SIVs to transmit to humans.
Importance Shortly after the emergence of the 2009 pandemic H1N1 (pdm/09) influenza virus, it was transmitted from humans to pigs and this continues to occur around the world. Many reassortants between pdm/09-origin viruses and enzootic swine influenza viruses (SIVs) have been detected. However, the long-term impact of pdm/09-origin viruses on the SIV gene pool, which could lead to the generation of influenza viruses with the potential to infect humans, has not been systematically examined. From extensive surveillance of SIVs over a 38-month period in southern China, it was found that, although neither complete pdm/09 viruses nor their surface genes could persist in pigs, their internal genes did persist. Over the survey period, these internal genes became predominant, potentially replacing those of the enzootic SIV lineages. The altered diversity of the SIV gene pool needs to be closely monitored for changes in the potential of SIVs to transmit to humans.},
language = {en},
urldate = {2017-02-15},
journal = {Journal of Virology},
author = {Liang, Huyi and Lam, Tommy Tsan-Yuk and Fan, Xiaohui and Chen, Xinchun and Zeng, Yu and Zhou, Ji and Duan, Lian and Tse, Maying and Chan, Chung-Hei and Li, Lifeng and Leung, Tak-Ying and Yip, Chun-Hung and Cheung, Chung-Lam and Zhou, Boping and Smith, David K. and Poon, Leo Lit-Man and Peiris, Malik and Guan, Yi and Zhu, Huachen},
month = jul,
year = {2014},
pmid = {25008935},
pages = {JVI.01327--14},
}
@article{hohna_probabilistic_2014,
title = {Probabilistic Graphical Model Representation in Phylogenetics},
volume = {63},
issn = {1063-5157, 1076-836X},
url = {http://sysbio.oxfordjournals.org/content/63/5/753},
doi = {10.1093/sysbio/syu039},
abstract = {Recent years have seen a rapid expansion of the model space explored in statistical phylogenetics, emphasizing the need for new approaches to statistical model representation and software development. Clear communication and representation of the chosen model is crucial for: (i) reproducibility of an analysis, (ii) model development, and (iii) software design. Moreover, a unified, clear and understandable framework for model representation lowers the barrier for beginners and nonspecialists to grasp complex phylogenetic models, including their assumptions and parameter/variable dependencies. Graphical modeling is a unifying framework that has gained in popularity in the statistical literature in recent years. The core idea is to break complex models into conditionally independent distributions. The strength lies in the comprehensibility, flexibility, and adaptability of this formalism, and the large body of computational work based on it. Graphical models are well-suited to teach statistical models, to facilitate communication among phylogeneticists and in the development of generic software for simulation and statistical inference. Here, we provide an introduction to graphical models for phylogeneticists and extend the standard graphical model representation to the realm of phylogenetics. We introduce a new graphical model component, tree plates, to capture the changing structure of the subgraph corresponding to a phylogenetic tree. We describe a range of phylogenetic models using the graphical model framework and introduce modules to simplify the representation of standard components in large and complex models. Phylogenetic model graphs can be readily used in simulation, maximum likelihood inference, and Bayesian inference using, for example, Metropolis–Hastings or Gibbs sampling of the posterior distribution. [Computation; graphical models; inference; modularization; statistical phylogenetics; tree plate.]},
pages = {753--771},
number = {5},
journaltitle = {Systematic Biology},
shortjournal = {Syst Biol},
author = {Höhna, Sebastian and Heath, Tracy A. and Boussau, Bastien and Landis, Michael J. and Ronquist, Fredrik and Huelsenbeck, John P.},
urldate = {2015-11-17},
date = {2014-09-01},
langid = {english},
pmid = {24951559}
}
@article{boussau_genome-scale_2013,
title = {Genome-scale coestimation of species and gene trees},
volume = {23},
issn = {1088-9051, 1549-5469},
url = {http://genome.cshlp.org/content/23/2/323},
doi = {10.1101/gr.141978.112},
abstract = {Comparisons of gene trees and species trees are key to understanding major processes of genome evolution such as gene duplication and loss. Because current methods to reconstruct phylogenies fail to model the two-way dependency between gene trees and the species tree, they often misrepresent gene and species histories. We present a new probabilistic model to jointly infer rooted species and gene trees for dozens of genomes and thousands of gene families. We use simulations to show that this method accurately infers the species tree and gene trees, is robust to misspecification of the models of sequence and gene family evolution, and provides a precise historic record of gene duplications and losses throughout genome evolution. We simultaneously reconstruct the history of mammalian species and their genes based on 36 completely sequenced genomes, and use the reconstructed gene trees to infer the gene content and organization of ancestral mammalian genomes. We show that our method yields a more accurate picture of ancestral genomes than the trees available in the authoritative database Ensembl.},
pages = {323--330},
number = {2},
journaltitle = {Genome Research},
shortjournal = {Genome Res.},
author = {Boussau, Bastien and Szöllősi, Gergely J. and Duret, Laurent and Gouy, Manolo and Tannier, Eric and Daubin, Vincent},
urldate = {2015-11-17},
date = {2013-02-01},
langid = {english},
pmid = {23132911}
}
@book{felsenstein_inferring_2003,
address = {Sunderland, Mass},
edition = {2 edition},
title = {Inferring Phylogenies},
isbn = {9780878931774},
abstract = {Phylogenies (evolutionary trees) are basic to thinking about and analyzing differences between species. Statistical, computational, and algorithmic work on them has been ongoing for four decades, with great advances in understanding. Yet no book has summarized this work until now. Inferring Phylogenies explains clearly the assumptions and logic of making inferences about phylogenies, and using them to make inferences about evolutionary processes. It is an essential text and reference for anyone who wants to understand how phylogenies are reconstructed and how they are used. As phylogenies are inferred with various kinds of data, this book concentrates on some of the central ones: discretely coded characters, molecular sequences, gene frequencies, and quantitative traits. Also covered are restriction sites, {RAPDs}, and microsatellites. Inferring Phylogenies is intended for graduate-level courses, assuming some knowledge of statistics, mathematics (calculus and fundamental matrix algebra), molecular sequences, and quantitative genetics.},
language = {English},
publisher = {Sinauer Associates},
author = {Felsenstein, Joseph},
month = sep,
year = {2003}
}
@book{wickham_ggplot2_2009,
edition = {1},
title = {ggplot2: Elegant Graphics for Data Analysis},
isbn = {0387981403},
shorttitle = {ggplot2},
publisher = {Springer},
author = {Wickham, Hadley},
month = aug,
year = {2009}
}
@article{paradis_ape_2004,
title = {{APE}: Analyses of Phylogenetics and Evolution in R language},
volume = {20},
shorttitle = {{APE}},
url = {http://bioinformatics.oxfordjournals.org/content/20/2/289.abstract},
doi = {10.1093/bioinformatics/btg412},
abstract = {Summary: Analysis of Phylogenetics and Evolution ({APE}) is a package written in the R language for use in molecular evolution and phylogenetics. {APE} provides both utility functions for reading and writing data and manipulating phylogenetic trees, as well as several advanced methods for phylogenetic and evolutionary analysis (e.g. comparative and population genetic methods). {APE} takes advantage of the many R functions for statistics and graphics, and also provides a flexible framework for developing and implementing further statistical methods for the analysis of evolutionary processes.Availability: The program is free and available from the official R package archive at http://cran.r-project.org/src/contrib/{PACKAGES}.html\#ape. {APE} is licensed under the {GNU} General Public License.},
number = {2},
urldate = {2011-03-04},
journal = {Bioinformatics},
author = {Paradis, Emmanuel and Claude, Julien and Strimmer, Korbinian},
month = jan,
year = {2004},
pages = {289--290}
}
@article{matsen_pplacer_2010,
title = {pplacer: linear time maximum-likelihood and Bayesian phylogenetic placement of sequences onto a fixed reference tree},
volume = {11},
issn = {1471-2105},
shorttitle = {pplacer},
url = {http://www.biomedcentral.com.eproxy1.lib.hku.hk/1471-2105/11/538},
doi = {10.1186/1471-2105-11-538},
language = {en},
number = {1},
urldate = {2015-01-05},
journal = {{BMC} Bioinformatics},
author = {Matsen, Frederick A and Kodner, Robin B and Armbrust, E Virginia},
year = {2010},
pages = {538}
}
@article{matsen_format_2012,
title = {A Format for Phylogenetic Placements},
volume = {7},
url = {http://dx.doi.org/10.1371/journal.pone.0031009},
doi = {10.1371/journal.pone.0031009},
abstract = {We have developed a unified format for phylogenetic placements, that is, mappings of environmental sequence data (e.g., short reads) into a phylogenetic tree. We are motivated to do so by the growing number of tools for computing and post-processing phylogenetic placements, and the lack of an established standard for storing them. The format is lightweight, versatile, extensible, and is based on the {JSON} format, which can be parsed by most modern programming languages. Our format is already implemented in several tools for computing and post-processing parsimony- and likelihood-based phylogenetic placements and has worked well in practice. We believe that establishing a standard format for analyzing read placements at this early stage will lead to a more efficient development of powerful and portable post-analysis tools for the growing applications of phylogenetic placement.},
number = {2},
urldate = {2015-01-05},
journal = {{PLoS} {ONE}},
author = {Matsen, Frederick A. and Hoffman, Noah G. and Gallagher, Aaron and Stamatakis, Alexandros},
month = feb,
year = {2012},
pages = {e31009}
}
@article{berger_performance_2011,
title = {Performance, {Accuracy}, and {Web} {Server} for {Evolutionary} {Placement} of {Short} {Sequence} {Reads} under {Maximum} {Likelihood}},
issn = {1063-5157, 1076-836X},
url = {http://sysbio.oxfordjournals.org/content/early/2011/03/23/sysbio.syr010},
doi = {10.1093/sysbio/syr010},
abstract = {We present an evolutionary placement algorithm (EPA) and a Web server for the rapid assignment of sequence fragments (short reads) to edges of a given phylogenetic tree under the maximum-likelihood model. The accuracy of the algorithm is evaluated on several real-world data sets and compared with placement by pair-wise sequence comparison, using edit distances and BLAST. We introduce a slow and accurate as well as a fast and less accurate placement algorithm. For the slow algorithm, we develop additional heuristic techniques that yield almost the same run times as the fast version with only a small loss of accuracy. When those additional heuristics are employed, the run time of the more accurate algorithm is comparable with that of a simple BLAST search for data sets with a high number of short query sequences. Moreover, the accuracy of the EPA is significantly higher, in particular when the sample of taxa in the reference topology is sparse or inadequate. Our algorithm, which has been integrated into RAxML, therefore provides an equally fast but more accurate alternative to BLAST for tree-based inference of the evolutionary origin and composition of short sequence reads. We are also actively developing a Web server that offers a freely available service for computing read placements on trees using the EPA. [Maximum likelihood; metagenomics; phylogenetic placement; RAxML; short sequence reads.]},
language = {en},
urldate = {2016-11-10},
journal = {Systematic Biology},
author = {Berger, Simon A. and Krompass, Denis and Stamatakis, Alexandros},
month = mar,
year = {2011},
pmid = {21436105},
pages = {291--302}
}
@article{stamatakis_raxml_2014,
title = {{RAxML} Version 8: A tool for Phylogenetic Analysis and Post-Analysis of Large Phylogenies},
issn = {1367-4803, 1460-2059},
url = {http://bioinformatics.oxfordjournals.org.eproxy2.lib.hku.hk/content/early/2014/01/21/bioinformatics.btu033},
doi = {10.1093/bioinformatics/btu033},
shorttitle = {{RAxML} Version 8},
abstract = {Motivation: Phylogenies are increasingly used in all fields of medical and biological research. Moreover, because of the next generation sequencing revolution, datasets used for conducting phylogenetic analyses grow at an unprecedented pace. {RAxML} (Randomized Axelerated Maximum Likelihood) is a popular program for phylogenetic analyses of large datasets under maximum likelihood. Since the last {RAxML} paper in 2006, it has been continuously maintained and extended to accommodate the increasingly growing input datasets and to serve the needs of the user community.
Results: I present some of the most notable new features and extensions of {RAxML}, such as, a substantial extension of substitution models and supported data types, the introduction of {SSE}3, {AVX}, and {AVX}2 vector intrinsics, techniques for reducing the memory requirements of the code and a plethora of operations for conducting post-analyses on sets of trees. In addition, an up-to-date, 50 page user manual covering all new {RAxML} options is available.
Availability: The code is available under {GNU} {GPL} at https://github.com/stamatak/standard-{RAxML}.
Contact: Alexandros.Stamatakis@h-its.org},
pages = {btu033},
journaltitle = {Bioinformatics},
shortjournal = {Bioinformatics},
author = {Stamatakis, Alexandros},
urldate = {2015-12-28},
date = {2014-01-21},
langid = {english},
pmid = {24451623}
}
@article{mcmurdie_phyloseq_2013,
title = {phyloseq: An R Package for Reproducible Interactive Analysis and Graphics of Microbiome Census Data},
volume = {8},
shorttitle = {phyloseq},
url = {http://dx.doi.org/10.1371/journal.pone.0061217},
doi = {10.1371/journal.pone.0061217},
abstract = {{BackgroundThe} analysis of microbial communities through {DNA} sequencing brings many challenges: the integration of different types of data with methods from ecology, genetics, phylogenetics, multivariate statistics, visualization and testing. With the increased breadth of experimental designs now being pursued, project-specific statistical analyses are often needed, and these analyses are often difficult (or impossible) for peer researchers to independently reproduce. The vast majority of the requisite tools for performing these analyses reproducibly are already implemented in R and its extensions (packages), but with limited support for high throughput microbiome census data.{ResultsHere} we describe a software project, phyloseq, dedicated to the object-oriented representation and analysis of microbiome census data in R. It supports importing data from a variety of common formats, as well as many analysis techniques. These include calibration, filtering, subsetting, agglomeration, multi-table comparisons, diversity analysis, parallelized Fast {UniFrac}, ordination methods, and production of publication-quality graphics; all in a manner that is easy to document, share, and modify. We show how to apply functions from other R packages to phyloseq-represented data, illustrating the availability of a large number of open source analysis techniques. We discuss the use of phyloseq with tools for reproducible research, a practice common in other fields but still rare in the analysis of highly parallel microbiome census data. We have made available all of the materials necessary to completely reproduce the analysis and figures included in this article, an example of best practices for reproducible research.{ConclusionsThe} phyloseq project for R is a new open-source software package, freely available on the web from both {GitHub} and Bioconductor.},
number = {4},
urldate = {2015-01-05},
journal = {{PLoS} {ONE}},
author = {McMurdie, Paul J. and Holmes, Susan},
month = apr,
year = {2013},
pages = {e61217}
}
@article{marazzi_locating_2012,
title = {Locating Evolutionary Precursors on a Phylogenetic Tree},
volume = {66},
rights = {© 2012 The Author(s). Evolution© 2012 The Society for the Study of Evolution.},
issn = {1558-5646},
url = {http://onlinelibrary.wiley.com.eproxy2.lib.hku.hk/doi/10.1111/j.1558-5646.2012.01720.x/abstract},
doi = {10.1111/j.1558-5646.2012.01720.x},
abstract = {Conspicuous innovations in the history of life are often preceded by more cryptic genetic and developmental precursors. In many cases, these appear to be associated with recurring origins of very similar traits in close relatives (parallelisms) or striking convergences separated by deep time (deep homologies). Although the phylogenetic distribution of gain and loss of traits hints strongly at the existence of such precursors, no models of trait evolution currently permit inference about their location on a tree. Here we develop a new stochastic model, which explicitly captures the dependency implied by a precursor and permits estimation of precursor locations. We apply it to the evolution of extrafloral nectaries ({EFNs}), an ecologically significant trait mediating a widespread mutualism between plants and ants. In legumes, a species-rich clade with morphologically diverse {EFNs}, the precursor model fits the data on {EFN} occurrences significantly better than conventional models. The model generates explicit hypotheses about the phylogenetic location of hypothetical precursors, which may help guide future studies of molecular genetic pathways underlying nectary position, development, and function.},
pages = {3918--3930},
number = {12},
journaltitle = {Evolution},
author = {Marazzi, Brigitte and Ané, Cécile and Simon, Marcelo F. and Delgado-Salinas, Alfonso and Luckow, Melissa and Sanderson, Michael J.},
urldate = {2015-12-28},
date = {2012-12-01},
langid = {english},
keywords = {Deep homology, extra-floral nectary, homoplasy, trait evolution}
}
@article{yang_paml_2007,
title = {{PAML} 4: Phylogenetic Analysis by Maximum Likelihood},
volume = {24},
issn = {0737-4038, 1537-1719},
shorttitle = {{PAML} 4},
url = {http://mbe.oxfordjournals.org/content/24/8/1586},
doi = {10.1093/molbev/msm088},
abstract = {{PAML}, currently in version 4, is a package of programs for phylogenetic analyses of {DNA} and protein sequences using maximum likelihood ({ML}). The programs may be used to compare and test phylogenetic trees, but their main strengths lie in the rich repertoire of evolutionary models implemented, which can be used to estimate parameters in models of sequence evolution and to test interesting biological hypotheses. Uses of the programs include estimation of synonymous and nonsynonymous rates ({dN} and {dS}) between two protein-coding {DNA} sequences, inference of positive Darwinian selection through phylogenetic comparison of protein-coding genes, reconstruction of ancestral genes and proteins for molecular restoration studies of extinct life forms, combined analysis of heterogeneous data sets from multiple gene loci, and estimation of species divergence times incorporating uncertainties in fossil calibrations. This note discusses some of the major applications of the package, which includes example data sets to demonstrate their use. The package is written in {ANSI} C, and runs under Windows, Mac {OSX}, and {UNIX} systems. It is available at http://abacus.gene.ucl.ac.uk/software/paml.html.},
language = {en},
number = {8},
urldate = {2015-01-05},
journal = {Molecular Biology and Evolution},
author = {Yang, Ziheng},
month = aug,
year = {2007},
pmid = {17483113},
keywords = {codon models, likelihood, {PAML}, phylogenetic analysis, Software},
pages = {1586--1591},
file = {Full Text PDF:/home/ygc/baiduYun/Zotero/storage/53H9DMTJ/Yang - 2007 - PAML 4 Phylogenetic Analysis by Maximum Likelihoo.pdf:application/pdf;Snapshot:/home/ygc/baiduYun/Zotero/storage/7VIHDNKN/1586.html:text/html}
}
@article{pond_hyphy_2005,
title = {{HyPhy}: hypothesis testing using phylogenies},
volume = {21},
issn = {1367-4803, 1460-2059},
shorttitle = {{HyPhy}},
url = {http://bioinformatics.oxfordjournals.org.eproxy1.lib.hku.hk/content/21/5/676},
doi = {10.1093/bioinformatics/bti079},
abstract = {Summary: The {HyPhypackage} is designed to provide a flexible and unified platform for carrying out likelihood-based analyses on multiple alignments of molecular sequence data, with the emphasis on studies of rates and patterns of sequence evolution.
Availability: http://www.hyphy.org
Contact: muse@stat.ncsu.edu
Supplementary information: {HyPhydocumentation} and tutorials are available at http://www.hyphy.org},
language = {en},
number = {5},
urldate = {2015-01-05},
journal = {Bioinformatics},
author = {Pond, Sergei L. Kosakovsky and Frost, Simon D. W. and Muse, Spencer V.},
month = mar,
year = {2005},
pmid = {15509596},
pages = {676--679}
}
@article{bouckaert_beast_2014,
title = {{BEAST} 2: A Software Platform for Bayesian Evolutionary Analysis},
volume = {10},
shorttitle = {{BEAST} 2},
url = {http://dx.doi.org/10.1371/journal.pcbi.1003537},
doi = {10.1371/journal.pcbi.1003537},
abstract = {We present a new open source, extensible and flexible software platform for Bayesian evolutionary analysis called {BEAST} 2. This software platform is a re-design of the popular {BEAST} 1 platform to correct structural deficiencies that became evident as the {BEAST} 1 software evolved. Key among those deficiencies was the lack of post-deployment extensibility. {BEAST} 2 now has a fully developed package management system that allows third party developers to write additional functionality that can be directly installed to the {BEAST} 2 analysis platform via a package manager without requiring a new software release of the platform. This package architecture is showcased with a number of recently published new models encompassing birth-death-sampling tree priors, phylodynamics and model averaging for substitution models and site partitioning. A second major improvement is the ability to read/write the entire state of the {MCMC} chain to/from disk allowing it to be easily shared between multiple instances of the {BEAST} software. This facilitates checkpointing and better support for multi-processor and high-end computing extensions. Finally, the functionality in new packages can be easily added to the user interface ({BEAUti} 2) by a simple {XML} template-based mechanism because {BEAST} 2 has been re-designed to provide greater integration between the analysis engine and the user interface so that, for example {BEAST} and {BEAUti} use exactly the same {XML} file format.},
number = {4},
urldate = {2015-01-05},
journal = {{PLoS} Comput Biol},
author = {Bouckaert, Remco and Heled, Joseph and Kühnert, Denise and Vaughan, Tim and Wu, Chieh-Hsi and Xie, Dong and Suchard, Marc A. and Rambaut, Andrew and Drummond, Alexei J.},
month = apr,
year = {2014},
pages = {e1003537}
}
@manual{rstats,
title = {R: A Language and Environment for Statistical Computing},
author = {{R Core Team}},
organization = {R Foundation for Statistical Computing},
address = {Vienna, Austria},
year = {2016},
url = {https://www.R-project.org/}
}
@article{koski_closest_2001,
title = {The closest {BLAST} hit is often not the nearest neighbor},
volume = {52},
issn = {0022-2844},
doi = {10.1007/s002390010184},
abstract = {It is well known that basing phylogenetic reconstructions on uncorrected genetic distances can lead to errors in their reconstruction. Nevertheless, it is often common practice to report simply the most similar BLAST (Altschul et al. 1997) hit in genomic reports that discuss many genes (Ruepp et al. 2000; Freiberg et al. 1997). This is because BLAST hits can provide a rapid, efficient, and concise analysis of many genes at once. These hits are often interpreted to imply that the gene is most closely related to the gene or protein in the databases that returned the closest BLAST hit. Though these two may coincide, for many genes, particularly genes with few homologs, they may not be the same. There are a number of circumstances that can account for such limitations in accuracy (Eisen 2000). We stress here that genes appearing to be the most similar based on BLAST hits are often not each others closest relative phylogenetically. The extent to which this occurs depends on the availability of close relatives present in the databases. As an example we have chosen the analysis of the genomes of a crenarcheaota species Aeropyrum pernix, an organism with few close relatives fully sequenced, and Escherichia coli, an organism whose closest relative, Salmonella typhimurium, is completely sequenced.},
language = {eng},
number = {6},
journal = {Journal of Molecular Evolution},
author = {Koski, L. B. and Golding, G. B.},
month = jun,
year = {2001},
pmid = {11443357},
keywords = {Algorithms, Crenarchaeota, Databases, Factual, Escherichia coli, Open Reading Frames, Phylogeny, Salmonella typhimurium, Software},
pages = {540--542}
}
@article{lemey_reconstructing_2009,
title = {Reconstructing the initial global spread of a human influenza pandemic},
volume = {1},
issn = {2157-3999},
url = {http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2762761/},
doi = {10.1371/currents.RRN1031},
abstract = {Here, we present an analysis of the H1N1pdm genetic data sampled over the initial stages in the epidemic. To infer phylodynamic spread in time and space we employ a recently developed Bayesian statistical inference framework (Lemey et al., in press). We model spatial diffusion as a continuous-time Markov chain process along time-measured genealogies. In this analysis, we consider 40 locations for which sequence data were available on 06-Aug-2009. The sampling time interval of the 242 sequences spans from 30-Mar-2009 to 12-Jul-2009. The Bayesian inference typically results in a posterior distribution of phylogenetic trees, each having an estimate of the epidemic locations at the ancestral nodes in the tree. We summarize these trees using the most representative clustering pattern and annotate these clusters with the most probable location states. We can visualize this information as tree that grows over time, seeding locations each time an ancestral node is inferred to exist at a different location. A Bayes factor test provides statistical support for epidemiological linkage throughout the evolutionary history. We demonstrate how our full probabilistic approach efficiently tracks an epidemic based on viral genetic data as it unfolds across the globe.},
urldate = {2017-06-07},
journal = {PLoS Currents},
author = {Lemey, Philippe and Suchard, Marc and Rambaut, Andrew},
month = sep,
year = {2009},
pmid = {20029613},
pmcid = {PMC2762761}
}
@article{delviks-frankenberry_mechanisms_2011,
title = {Mechanisms and {Factors} that {Influence} {High} {Frequency} {Retroviral} {Recombination}},
volume = {3},
copyright = {http://creativecommons.org/licenses/by/3.0/},
url = {http://www.mdpi.com/1999-4915/3/9/1650},
doi = {10.3390/v3091650},
abstract = {With constantly changing environmental selection pressures, retroviruses rely upon recombination to reassort polymorphisms in their genomes and increase genetic diversity, which improves the chances for the survival of their population. Recombination occurs during DNA synthesis, whereby reverse transcriptase undergoes template switching events between the two copackaged RNAs, resulting in a viral recombinant with portions of the genetic information from each parental RNA. This review summarizes our current understanding of the factors and mechanisms influencing retroviral recombination, fidelity of the recombination process, and evaluates the subsequent viral diversity and fitness of the progeny recombinant. Specifically, the high mutation rates and high recombination frequencies of HIV-1 will be analyzed for their roles in influencing HIV-1 global diversity, as well as HIV-1 diagnosis, drug treatment, and vaccine development.},
language = {en},
number = {9},
urldate = {2017-06-07},
journal = {Viruses},
author = {Delviks-Frankenberry, Krista and Galli, Andrea and Nikolaitchik, Olga and Mens, Helene and Pathak, Vinay K. and Hu, Wei-Shau},
month = sep,
year = {2011},
keywords = {HIV-1, recombination, retrovirus},
pages = {1650--1680},
file = {Full Text PDF:/Volumes/HOME/Zotero/storage/IASRKP3U/Delviks-Frankenberry et al. - 2011 - Mechanisms and Factors that Influence High Frequen.pdf:application/pdf;Snapshot:/Volumes/HOME/Zotero/storage/IT5G96NI/1650.html:text/html}
}
@article{huang_bat-derived_2016,
title = {A {Bat}-{Derived} {Putative} {Cross}-{Family} {Recombinant} {Coronavirus} with a {Reovirus} {Gene}},
volume = {12},
issn = {1553-7374},
doi = {10.1371/journal.ppat.1005883},
abstract = {The emergence of severe acute respiratory syndrome coronavirus (SARS-CoV) in 2002 and Middle East respiratory syndrome coronavirus (MERS-CoV) in 2012 has generated enormous interest in the biodiversity, genomics and cross-species transmission potential of coronaviruses, especially those from bats, the second most speciose order of mammals. Herein, we identified a novel coronavirus, provisionally designated Rousettus bat coronavirus GCCDC1 (Ro-BatCoV GCCDC1), in the rectal swab samples of Rousettus leschenaulti bats by using pan-coronavirus RT-PCR and next-generation sequencing. Although the virus is similar to Rousettus bat coronavirus HKU9 (Ro-BatCoV HKU9) in genome characteristics, it is sufficiently distinct to be classified as a new species according to the criteria defined by the International Committee of Taxonomy of Viruses (ICTV). More striking was that Ro-BatCoV GCCDC1 contained a unique gene integrated into the 3'-end of the genome that has no homologs in any known coronavirus, but which sequence and phylogeny analyses indicated most likely originated from the p10 gene of a bat orthoreovirus. Subgenomic mRNA and cellular-level observations demonstrated that the p10 gene is functional and induces the formation of cell syncytia. Therefore, here we report a putative heterologous inter-family recombination event between a single-stranded, positive-sense RNA virus and a double-stranded segmented RNA virus, providing insights into the fundamental mechanisms of viral evolution.},
language = {eng},
number = {9},
journal = {PLoS pathogens},
author = {Huang, Canping and Liu, William J. and Xu, Wen and Jin, Tao and Zhao, Yingze and Song, Jingdong and Shi, Yi and Ji, Wei and Jia, Hao and Zhou, Yongming and Wen, Honghua and Zhao, Honglan and Liu, Huaxing and Li, Hong and Wang, Qihui and Wu, Ying and Wang, Liang and Liu, Di and Liu, Guang and Yu, Hongjie and Holmes, Edward C. and Lu, Lin and Gao, George F.},
month = sep,
year = {2016},
pmid = {27676249},
pmcid = {PMC5038965},
pages = {e1005883}
}
@article{he_intragenic_2010,
title = {Intragenic {Recombination} as a {Mechanism} of {Genetic} {Diversity} in {Bluetongue} {Virus}},
volume = {84},
issn = {0022-538X, 1098-5514},
url = {http://jvi.asm.org.eproxy1.lib.hku.hk/content/84/21/11487},
doi = {10.1128/JVI.00889-10},
abstract = {Bluetongue (BT), caused by Bluetongue virus (BTV), is an economically important disease affecting sheep, deer, cattle, and goats. Since 1998, a series of BT outbreaks have spread across much of southern and central Europe. To study why the epidemiology of the virus happens to change, it is important to fully know the mechanisms resulting in its genetic diversity. Gene mutation and segment reassortment have been considered as the key forces driving the evolution of BTV. However, it is still unknown whether intragenic recombination can occur and contribute to the process in the virus. We present here several BTV groups containing mosaic genes to reveal that intragenic recombination can take place between the virus strains and play a potential role in bringing novel BTV lineages.},
language = {en},
number = {21},
urldate = {2017-06-07},
journal = {Journal of Virology},
author = {He, Cheng-Qiang and Ding, Nai-Zheng and He, Mei and Li, Shan-Ni and Wang, Xing-Ming and He, Hong-Bin and Liu, Xin-Fa and Guo, Hong-Shan},
month = nov,
year = {2010},
pmid = {20702614},
pages = {11487--11495},
file = {Full Text PDF:/Volumes/HOME/Zotero/storage/5H4JQA5P/He et al. - 2010 - Intragenic Recombination as a Mechanism of Genetic.pdf:application/pdf;Snapshot:/Volumes/HOME/Zotero/storage/NWMDKHNS/11487.html:text/html}
}
@article{steinhauer_lack_1992,
title = {Lack of evidence for proofreading mechanisms associated with an {RNA} virus polymerase},
volume = {122},
issn = {0378-1119},
abstract = {The in vitro fidelity of the virion-associated RNA polymerase of vesicular stomatitis virus was quantitated for a single conserved viral RNA site and the usual high in vitro base misincorporation error frequencies (approx. 10(-3)) were observed at this (guanine) site. We sought evidence for RNA 3'--{\textgreater}5' exonuclease proofreading mechanisms by varying the concentrations of the next nucleoside triphosphate, by incorporation of nucleoside[1-thio]triphosphate analogues of the four natural RNA nucleosides, and by varying the concentrations of pyrophosphate in the in vitro polymerase reaction. None of these perturbations greatly affected viral RNA polymerase fidelity at the site studied. These results fail to show evidence for proofreading exonuclease activity associated with the virion replicase of an RNA virus. They suggest that RNA virus replication might generally be error-prone, because RNA replicase base misincorporations are proofread very inefficiently or not at all.},
language = {eng},
number = {2},
journal = {Gene},
author = {Steinhauer, D. A. and Domingo, E. and Holland, J. J.},
month = dec,
year = {1992},
pmid = {1336756},
keywords = {Base Sequence, DNA-Directed RNA Polymerases, Electrophoresis, Polyacrylamide Gel, Molecular Sequence Data, RNA, Messenger, RNA, Viral, Vesicular stomatitis Indiana virus},
pages = {281--288}
}
@article{dobzhansky_nothing_1973,
title = {Nothing in {Biology} {Makes} {Sense} except in the {Light} of {Evolution}},
volume = {35},
copyright = {Copyright 1973 National Association of Biology Teachers},
issn = {0002-7685, 1938-4211},
url = {http://abt.ucpress.edu/content/35/3/125},
doi = {10.2307/4444260},
language = {en},
number = {3},
urldate = {2017-06-07},
journal = {The American Biology Teacher},
author = {Dobzhansky, Theodosius},
month = mar,
year = {1973},
pages = {125--129},
file = {Snapshot:/Volumes/HOME/Zotero/storage/KE3THCIS/125.html:text/html}
}
@article{salamin_building_2002,
title = {Building {Supertrees}: {An} {Empirical} {Assessment} {Using} the {Grass} {Family} ({Poaceae})},
volume = {51},
issn = {1063-5157},
shorttitle = {Building {Supertrees}},
url = {https://academic-oup-com.eproxy2.lib.hku.hk/sysbio/article/51/1/136/1631315/Building-Supertrees-An-Empirical-Assessment-Using},
doi = {10.1080/106351502753475916},
number = {1},
urldate = {2017-05-22},
journal = {Systematic Biology},
author = {Salamin, Nicolas and Hodkinson, Trevor R. and Savolainen, Vincent},
month = jan,
year = {2002},
pages = {136--150},
file = {Full Text PDF:/Volumes/HOME/Zotero/storage/96WRQSHP/Salamin et al. - 2002 - Building Supertrees An Empirical Assessment Using.pdf:application/pdf;Snapshot:/Volumes/HOME/Zotero/storage/V5MWGXB3/Building-Supertrees-An-Empirical-Assessment-Using.html:text/html}
}
@article{semple_supertree_2000,
title = {A supertree method for rooted trees},
volume = {105},
issn = {0166-218X},
url = {http://www.sciencedirect.com/science/article/pii/S0166218X0000202X},
doi = {10.1016/S0166-218X(00)00202-X},
abstract = {The amalgamation of leaf-labelled (phylogenetic) trees on overlapping leaf sets into one (super)tree is a central problem in several areas of classification, particularly evolutionary biology. In this paper, we describe a new technique for amalgamating rooted phylogenetic trees. This appears to be the first such method to provably exhibit particular desirable properties which we list and establish.},
number = {1–3},
urldate = {2017-05-22},
journal = {Discrete Applied Mathematics},
author = {Semple, Charles and Steel, Mike},
month = oct,
year = {2000},
keywords = {Consensus, Rooted phylogenetic tree, Supertree},
pages = {147--158},
file = {ScienceDirect Full Text PDF:/Volumes/HOME/Zotero/storage/U5BNWITA/Semple and Steel - 2000 - A supertree method for rooted trees.pdf:application/pdf;ScienceDirect Snapshot:/Volumes/HOME/Zotero/storage/WCZIDADG/S0166218X0000202X.html:text/html}
}
@article{eulenstein_performance_2004,
title = {Performance of flip supertree construction with a heuristic algorithm},
volume = {53},
issn = {1063-5157},
doi = {10.1080/10635150490423719},
abstract = {Supertree methods are used to assemble separate phylogenetic trees with shared taxa into larger trees (supertrees) in an effort to construct more comprehensive phylogenetic hypotheses. In spite of much recent interest in supertrees, there are still few methods for supertree construction. The flip supertree problem is an error correction approach that seeks to find a minimum number of changes (flips) to the matrix representation of the set of input trees to resolve their incompatibilities. A previous flip supertree algorithm was limited to finding exact solutions and was only feasible for small input trees. We developed a heuristic algorithm for the flip supertree problem suitable for much larger input trees. We used a series of 48- and 96-taxon simulations to compare supertrees constructed with the flip supertree heuristic algorithm with supertrees constructed using other approaches, including MinCut (MC), modified MC (MMC), and matrix representation with parsimony (MRP). Flip supertrees are generally far more accurate than supertrees constructed using MC or MMC algorithms and are at least as accurate as supertrees built with MRP. The flip supertree method is therefore a viable alternative to other supertree methods when the number of taxa is large.},
language = {eng},
number = {2},
journal = {Systematic Biology},
author = {Eulenstein, Oliver and Chen, Duhong and Burleigh, J. Gordon and Fernández-Baca, David and Sanderson, Michael J.},
month = apr,
year = {2004},
pmid = {15205054},
keywords = {Algorithms, Classification, Computer Simulation, Phylogeny},
pages = {299--308}
}
@article{friedrich_profdist:_2005,
title = {{ProfDist}: a tool for the construction of large phylogenetic trees based on profile distances},
volume = {21},
issn = {1367-4803},
shorttitle = {{ProfDist}},
doi = {10.1093/bioinformatics/bti289},
abstract = {SUMMARY: ProfDist is a user-friendly software package using the profile-neighbor-joining method (PNJ) in inferring phylogenies based on profile distances on DNA or RNA sequences. It is a tool for reconstructing and visualizing large phylogenetic trees providing new and standard features with a special focus on time efficency, robustness and accuracy.
AVAILABILITY: A Windows version of ProfDist comes with a graphical user interface and is freely available at http://profdist.bioapps.biozentrum.uni-wuerzburg.de},
language = {eng},
number = {9},
journal = {Bioinformatics (Oxford, England)},
author = {Friedrich, Joachim and Dandekar, Thomas and Wolf, Matthias and Müller, Tobias},
month = may,
year = {2005},
pmid = {15677706},
keywords = {Algorithms, Chromosome Mapping, DNA Mutational Analysis, Gene Expression Profiling, Linkage Disequilibrium, Phylogeny, Sequence Alignment, Sequence Analysis, DNA, Software, User-Computer Interface},
pages = {2108--2109}
}
@article{nelson_spatial_2011,
title = {Spatial {Dynamics} of {Human}-{Origin} {H}1 {Influenza} {A} {Virus} in {North} {American} {Swine}},
volume = {7},
issn = {1553-7374},
url = {http://journals.plos.org/plospathogens/article?id=10.1371/journal.ppat.1002077},
doi = {10.1371/journal.ppat.1002077},
abstract = {Author Summary Since 1998, genetically and antigenically diverse influenza A viruses have circulated in North American swine due to continuous cross-species transmission and reassortment with avian and human influenza viruses, presenting a pandemic threat to humans. Millions of swine are transported year-round from the southern United States into the corn-rich Midwest, but the importance of these movements in the spatial dissemination and evolution of the influenza virus in swine is unknown. Using a large data set of influenza virus sequences collected in North American swine during 2003–2010, we investigated the spatial dynamics of two influenza viruses of the H1 subtype that were introduced into swine from humans around 2003. Employing recently developed Bayesian phylogeography methods, we find that the spread of this influenza virus follows the large-scale transport of swine from the South to the Midwest. Based on this pattern of viral migration, we suggest that the genetic diversity of swine influenza viruses in the Midwest is continually augmented by the importation of viruses from source populations located in the South. Understanding the importance of long-distance pig movements in the evolution and spatial dissemination of influenza virus in swine may inform future strategies for the surveillance and control of influenza, and perhaps other swine pathogens.},
number = {6},
urldate = {2017-05-22},
journal = {PLOS Pathogens},
author = {Nelson, Martha I. and Lemey, Philippe and Tan, Yi and Vincent, Amy and Lam, Tommy Tsan-Yuk and Detmer, Susan and Viboud, Cécile and Suchard, Marc A. and Rambaut, Andrew and Holmes, Edward C. and Gramer, Marie},
month = jun,
year = {2011},
keywords = {H1N1, Influenza A virus, Influenza viruses, phylogenetic analysis, phylogenetics, Phylogeography, Swine, Swine influenza},
pages = {e1002077},
file = {Full Text PDF:/Volumes/HOME/Zotero/storage/JDR2JP5E/Nelson et al. - 2011 - Spatial Dynamics of Human-Origin H1 Influenza A Vi.pdf:application/pdf;Snapshot:/Volumes/HOME/Zotero/storage/3SDAAC5F/article.html:text/html}
}
@article{colijn_phylogenetic_2014,
title = {Phylogenetic tree shapes resolve disease transmission patterns},
volume = {2014},
issn = {2050-6201},
url = {http://www.ncbi.nlm.nih.gov/pmc/articles/PMC4097963/},
doi = {10.1093/emph/eou018},
abstract = {The shapes of phylogenies of pathogens can reveal patterns in how an outbreak spreads. We used simple features to summarise the shapes of pathogen phylogenies. This provided enough information to distinguish outbreaks with super-spreaders, outbreaks spreading homogeneously, and those with chains of transmission., Background and Objectives: Whole-genome sequencing is becoming popular as a tool for understanding outbreaks of communicable diseases, with phylogenetic trees being used to identify individual transmission events or to characterize outbreak-level overall transmission dynamics. Existing methods to infer transmission dynamics from sequence data rely on well-characterized infectious periods, epidemiological and clinical metadata which may not always be available, and typically require computationally intensive analysis focusing on the branch lengths in phylogenetic trees. We sought to determine whether the topological structures of phylogenetic trees contain signatures of the transmission patterns underlying an outbreak., Methodology: We use simulated outbreaks to train and then test computational classifiers. We test the method on data from two real-world outbreaks., Results: We show that different transmission patterns result in quantitatively different phylogenetic tree shapes. We describe topological features that summarize a phylogeny’s structure and find that computational classifiers based on these are capable of predicting an outbreak’s transmission dynamics. The method is robust to variations in the transmission parameters and network types, and recapitulates known epidemiology of previously characterized real-world outbreaks., Conclusions and implications: There are simple structural properties of phylogenetic trees which, when combined, can distinguish communicable disease outbreaks with a super-spreader, homogeneous transmission and chains of transmission. This is possible using genome data alone, and can be done during an outbreak. We discuss the implications for management of outbreaks.},
number = {1},
urldate = {2017-05-22},
journal = {Evolution, Medicine, and Public Health},
author = {Colijn, Caroline and Gardy, Jennifer},
month = jun,
year = {2014},
pmid = {24916411},
pmcid = {PMC4097963},
pages = {96--108},
file = {PubMed Central Full Text PDF:/Volumes/HOME/Zotero/storage/PPPERVV6/Colijn and Gardy - 2014 - Phylogenetic tree shapes resolve disease transmiss.pdf:application/pdf}
}
@article{volz_inferring_2013,
title = {Inferring the {Source} of {Transmission} with {Phylogenetic} {Data}},
volume = {9},
issn = {1553-7358},
url = {http://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1003397},
doi = {10.1371/journal.pcbi.1003397},
abstract = {Author Summary Molecular data from pathogens may be useful for identifying the source of infection and identifying pairs of individuals such that one host transmitted to the other. Inference of who acquired infection from whom is confounded by incomplete sampling, and given genetic data only, it is not possible to infer the direction of transmission in a transmission pair. Given additional information about an infectious disease epidemic, such as incidence of infection over time, and the proportion of hosts sampled, it is possible to correct for biases stemming from incomplete sampling of the infected host population. It may even be possible to infer the direction of transmission within a transmission pair if additional clinical, behavioral, and demographic covariates of the infected hosts are available. We consider the problem of identifying the source of infection using HIV sequence data collected for clinical purposes. We find that it is rarely possible to infer transmission pairs with high credibility, but such data may nevertheless be useful for epidemiological investigations and identifying risk factors for transmission.},
number = {12},
urldate = {2017-05-22},
journal = {PLOS Computational Biology},
author = {Volz, Erik M. and Frost, Simon D. W.},
month = dec,
year = {2013},
keywords = {Cherries, HIV, HIV epidemiology, Pathogens, phylogenetic analysis, phylogenetics, Sequence Alignment, Simulation and modeling},
pages = {e1003397},
file = {Full Text PDF:/Volumes/HOME/Zotero/storage/96J943UD/Volz and Frost - 2013 - Inferring the Source of Transmission with Phylogen.pdf:application/pdf;Snapshot:/Volumes/HOME/Zotero/storage/WJUXVRPA/article.html:text/html}
}
@article{lam_use_2010,
title = {Use of phylogenetics in the molecular epidemiology and evolutionary studies of viral infections},
volume = {47},
issn = {1549-781X},
doi = {10.3109/10408361003633318},
abstract = {Since DNA sequencing techniques first became available almost 30 years ago, the amount of nucleic acid sequence data has increased enormously. Phylogenetics, which is widely applied to compare and analyze such data, is particularly useful for the analysis of genes from rapidly evolving viruses. It has been used extensively to describe the molecular epidemiology and transmission of the human immunodeficiency virus (HIV), the origins and subsequent evolution of the severe acute respiratory syndrome (SARS)-associated coronavirus (SCoV), and, more recently, the evolving epidemiology of avian influenza as well as seasonal and pandemic human influenza viruses. Recent advances in phylogenetic methods can infer more in-depth information about the patterns of virus emergence, adding to the conventional approaches in viral epidemiology. Examples of this information include estimations (with confidence limits) of the actual time of the origin of a new viral strain or its emergence in a new species, viral recombination and reassortment events, the rate of population size change in a viral epidemic, and how the virus spreads and evolves within a specific population and geographical region. Such sequence-derived information obtained from the phylogenetic tree can assist in the design and implementation of public health and therapeutic interventions. However, application of many of these advanced phylogenetic methods are currently limited to specialized phylogeneticists and statisticians, mainly because of their mathematical basis and their dependence on the use of a large number of computer programs. This review attempts to bridge this gap by presenting conceptual, technical, and practical aspects of applying phylogenetic methods in studies of influenza, HIV, and SCoV. It aims to provide, with minimal mathematics and statistics, a practical overview of how phylogenetic methods can be incorporated into virological studies by clinical and laboratory specialists.},
language = {eng},
number = {1},
journal = {Critical Reviews in Clinical Laboratory Sciences},
author = {Lam, Tommy Tsan-Yuk and Hon, Chung-Chau and Tang, Julian W.},
month = feb,
year = {2010},
pmid = {20367503},
keywords = {Animals, Evolution, Molecular, Host-Pathogen Interactions, Humans, Molecular Epidemiology, Phylogeny, Virus Diseases, Viruses},
pages = {5--49}
}
@article{parrish_influenza_2015,
title = {Influenza {Virus} {Reservoirs} and {Intermediate} {Hosts}: {Dogs}, {Horses}, and {New} {Possibilities} for {Influenza} {Virus} {Exposure} of {Humans}},
volume = {89},
issn = {0022-538X, 1098-5514},
shorttitle = {Influenza {Virus} {Reservoirs} and {Intermediate} {Hosts}},
url = {http://jvi.asm.org.eproxy2.lib.hku.hk/content/89/6/2990},
doi = {10.1128/JVI.03146-14},
abstract = {Influenza A virus (IAV) infections in hosts outside the main aquatic bird reservoirs occur periodically. Although most such cross-species transmission events result in limited onward transmission in the new host, sustained influenza outbreaks have occurred in poultry and in a number of mammalian species, including humans, pigs, horses, seals, and mink. Recently, two distinct strains of IAV have emerged in domestic dogs, with each circulating widely for several years. Here, we briefly outline what is known about the role of intermediate hosts in influenza emergence, summarize our knowledge of the new canine influenza viruses (CIVs) and how they provide key new information on the process of host adaptation, and assess the risk these viruses pose to human populations.},
language = {en},
number = {6},
urldate = {2017-05-19},
journal = {Journal of Virology},
author = {Parrish, Colin R. and Murcia, Pablo R. and Holmes, Edward C.},
month = mar,
year = {2015},
pmid = {25540375},
pages = {2990--2994},
file = {Snapshot:/Volumes/HOME/Zotero/storage/GVPIB386/2990.html:text/html}
}
@article{price_fasttree_2010,
title = {{FastTree} 2 – {Approximately} {Maximum}-{Likelihood} {Trees} for {Large} {Alignments}},
volume = {5},
issn = {1932-6203},
url = {http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0009490},
doi = {10.1371/journal.pone.0009490},
abstract = {Background We recently described FastTree, a tool for inferring phylogenies for alignments with up to hundreds of thousands of sequences. Here, we describe improvements to FastTree that improve its accuracy without sacrificing scalability. Methodology/Principal Findings Where FastTree 1 used nearest-neighbor interchanges (NNIs) and the minimum-evolution criterion to improve the tree, FastTree 2 adds minimum-evolution subtree-pruning-regrafting (SPRs) and maximum-likelihood NNIs. FastTree 2 uses heuristics to restrict the search for better trees and estimates a rate of evolution for each site (the “CAT” approximation). Nevertheless, for both simulated and genuine alignments, FastTree 2 is slightly more accurate than a standard implementation of maximum-likelihood NNIs (PhyML 3 with default settings). Although FastTree 2 is not quite as accurate as methods that use maximum-likelihood SPRs, most of the splits that disagree are poorly supported, and for large alignments, FastTree 2 is 100–1,000 times faster. FastTree 2 inferred a topology and likelihood-based local support values for 237,882 distinct 16S ribosomal RNAs on a desktop computer in 22 hours and 5.8 gigabytes of memory. Conclusions/Significance FastTree 2 allows the inference of maximum-likelihood phylogenies for huge alignments. FastTree 2 is freely available at http://www.microbesonline.org/fasttree.},
number = {3},
urldate = {2017-04-30},
journal = {PLOS ONE},
author = {Price, Morgan N. and Dehal, Paramvir S. and Arkin, Adam P.},
month = mar,
year = {2010},
keywords = {Biochemical simulations, Biophysical simulations, Multiple alignment calculation, Optimization, phylogenetic analysis, Protein structure comparison, Ribosomal RNA, Sequence Alignment},
pages = {e9490},
file = {Full Text PDF:/Volumes/HOME/Zotero/storage/SEMG6Z4W/Price et al. - 2010 - FastTree 2 – Approximately Maximum-Likelihood Tree.pdf:application/pdf;Snapshot:/Volumes/HOME/Zotero/storage/FV7U2S33/article.html:text/html}
}
@article{yin_ggbio:_2012,
title = {ggbio: an {R} package for extending the grammar of graphics for genomic data},
volume = {13},
copyright = {2012 Yin et al.; licensee BioMed Central Ltd.},
issn = {1474-760X},
shorttitle = {ggbio},
url = {https://genomebiology.biomedcentral.com/articles/10.1186/gb-2012-13-8-r77},
doi = {10.1186/gb-2012-13-8-r77},
abstract = {We introduce ggbio, a new methodology to visualize and explore genomics annotationsand high-throughput data. The plots provide detailed views of genomic regions,summary views of sequence alignments and splicing patterns, and genome-wide overviewswith karyogram, circular and grand linear layouts. The methods leverage thestatistical functionality available in R, the grammar of graphics and the datahandling capabilities of the Bioconductor project. The plots are specified within amodular framework that enables users to construct plots in a systematic way, and aregenerated directly from Bioconductor data structures. The ggbio R package isavailable at
http://www.bioconductor.org/packages/2.11/bioc/html/ggbio.html
.},
language = {En},
number = {8},
urldate = {2017-05-16},
journal = {Genome Biology},
author = {Yin, Tengfei and Cook, Dianne and Lawrence, Michael},
month = aug,
year = {2012},
pages = {R77},
file = {Full Text PDF:/Volumes/HOME/Zotero/storage/BAUVSJM5/Yin et al. - 2012 - ggbio an R package for extending the grammar of g.pdf:application/pdf;Snapshot:/Volumes/HOME/Zotero/storage/6B54QURX/gb-2012-13-8-r77.html:text/html}
}
@article{chevenet_treedyn:_2006,
title = {{TreeDyn}: towards dynamic graphics and annotations for analyses of trees},
volume = {7},
issn = {1471-2105},
shorttitle = {{TreeDyn}},
doi = {10.1186/1471-2105-7-439},
abstract = {BACKGROUND: Analyses of biomolecules for biodiversity, phylogeny or structure/function studies often use graphical tree representations. Many powerful tree editors are now available, but existing tree visualization tools make little use of meta-information related to the entities under study such as taxonomic descriptions or gene functions that can hardly be encoded within the tree itself (if using popular tree formats). Consequently, a tedious manual analysis and post-processing of the tree graphics are required if one needs to use external information for displaying or investigating trees.
RESULTS: We have developed TreeDyn, a tool using annotations and dynamic graphical methods for editing and analyzing multiple trees. The main features of TreeDyn are 1) the management of multiple windows and multiple trees per window, 2) the export of graphics to several standard file formats with or without HTML encapsulation and a new format called TGF, which enables saving and restoring graphical analysis, 3) the projection of texts or symbols facing leaf labels or linked to nodes, through manual pasting or by using annotation files, 4) the highlight of graphical elements after querying leaf labels (or annotations) or by selection of graphical elements and information extraction, 5) the highlight of targeted trees according to a source tree browsed by the user, 6) powerful scripts for automating repetitive graphical tasks, 7) a command line interpreter enabling the use of TreeDyn through CGI scripts for online building of trees, 8) the inclusion of a library of packages dedicated to specific research fields involving trees.
CONCLUSION: TreeDyn is a tree visualization and annotation tool which includes tools for tree manipulation and annotation and uses meta-information through dynamic graphical operators or scripting to help analyses and annotations of single trees or tree collections.},
language = {eng},
journal = {BMC bioinformatics},
author = {Chevenet, François and Brun, Christine and Bañuls, Anne-Laure and Jacq, Bernard and Christen, Richard},
month = oct,
year = {2006},
pmid = {17032440},
pmcid = {PMC1615880},
keywords = {Computer Graphics, Databases, Genetic, Decision Trees},
pages = {439}
}
@article{he_evolview_2016,
title = {Evolview v2: an online visualization and management tool for customized and annotated phylogenetic trees},
volume = {44},
issn = {1362-4962},
shorttitle = {Evolview v2},
doi = {10.1093/nar/gkw370},
abstract = {Evolview is an online visualization and management tool for customized and annotated phylogenetic trees. It allows users to visualize phylogenetic trees in various formats, customize the trees through built-in functions and user-supplied datasets and export the customization results to publication-ready figures. Its 'dataset system' contains not only the data to be visualized on the tree, but also 'modifiers' that control various aspects of the graphical annotation. Evolview is a single-page application (like Gmail); its carefully designed interface allows users to upload, visualize, manipulate and manage trees and datasets all in a single webpage. Developments since the last public release include a modern dataset editor with keyword highlighting functionality, seven newly added types of annotation datasets, collaboration support that allows users to share their trees and datasets and various improvements of the web interface and performance. In addition, we included eleven new 'Demo' trees to demonstrate the basic functionalities of Evolview, and five new 'Showcase' trees inspired by publications to showcase the power of Evolview in producing publication-ready figures. Evolview is freely available at: http://www.evolgenius.info/evolview/.},
language = {eng},
number = {W1},
journal = {Nucleic Acids Research},
author = {He, Zilong and Zhang, Huangkai and Gao, Shenghan and Lercher, Martin J. and Chen, Wei-Hua and Hu, Songnian},
month = jul,
year = {2016},
pmid = {27131786},
pmcid = {PMC4987921},
pages = {W236--241}
}
@article{huson_dendroscope_2012,
title = {Dendroscope 3: an interactive tool for rooted phylogenetic trees and networks},