diff --git a/README.md b/README.md index 21ed8de..5421e87 100644 --- a/README.md +++ b/README.md @@ -55,20 +55,6 @@ nextflow run BCCDC-PHL/plasmid-screen \ --outdir ``` -If the `--versioned_outdir` flag is used, then a sub-directory will be created below each sample, named with the pipeline name and minor version: - -``` -sample-01 - └── plasmid-screen-v0.2-output - ├── sample-01_abricate.tsv - ├── sample-01_chromosome.fasta - ├── sample-01_fastp.csv - ├── sample-01_mash_screen.tsv - ├── ... - ├── sample-01_quast.csv - └── NC_019152.1.fa -``` - ## Outputs The main output of the pipeline is the 'Resistance gene report', which summarizes where the resistance gene was located (contig and position), the quality of the resitance gene match (% identity and @@ -104,7 +90,8 @@ For each sample, the following output files are created: ``` sample-01/ ├── sample-01_20211207163723_provenance.yml -├── sample-01_abricate.tsv +├── sample-01_abricate_ncbi.tsv +├── sample-01_abricate_plasmidfinder.tsv ├── sample-01_chromosome.fasta ├── sample-01_fastp.csv ├── sample-01_mash_screen.tsv @@ -122,7 +109,8 @@ sample-01/ | filename suffix | Generated by | Description | |:---------------------------------|:--------------|:-----------------------------------------------------------------------------------------------------| -| `_abricate.tsv` | `abricate` | All resistance genes found in the entire assembly | +| `_abricate_ncbi.tsv` | `abricate` | All resistance genes found in the entire assembly | +| `_abricate_plasmidfinder.tsv` | `abricate` | All replicon genes found in the entire assembly | | `_chromosome.fasta` | `mob_recon` | The set of contigs determined by mob_recon to belong to the chromosome (non-plasmid) | | `_plasmid__.fasta` | `mob_recon` | Plasmid reconstructions. Groups of contigs that were determined to be part of the same plasmid | | `_fastp.csv` | `fastp` | Read QC info | @@ -134,6 +122,7 @@ sample-01/ | `.snps.vcf` | `freebayes` | SNPs found in alignment of reads against a reference plasmid | | `.fa` | `seqkit` | Reference plasmid used for alignments | + ### Provenance Each analysis will create a provenance.yml file for each sample. The filename of the `provenance.yml` file includes a timestamp with format `YYYYMMDDHHMMSS` to ensure @@ -142,90 +131,108 @@ that a unique file will be produced if a sample is re-analyzed and outputs are s Example provenance output: ```yml -- process_name: mob_recon - tool_name: mob_recon - tool_version: 3.0.3 - parameters: - - parameter: database_directory - value: /path/to/mob_db - - parameter: filter_db - value: /path/to/mob_filter_db - - parameter: min_con_cov - value: 95 -- process_name: abricate - tool_name: abricate - tool_version: 1.0.1 - parameters: - - parameter: db - value: ncbi -- process_name: trim_reads - tool_name: fastp - tool_version: 0.22.0 - parameters: - - parameter: cut_tail - value: true +- pipeline_name: BCCDC-PHL/plasmid-screen + pipeline_version: 0.2.3 + nextflow_session_id: c0cc6250-c767-4bfe-9254-0b49ff6dab91 + nextflow_run_name: mighty_panini + timestamp_analysis_start: 2024-06-18T16:09:15.659426-07:00 - input_filename: sample-01_R1.fastq.gz - input_path: /path/to/sample-01_R1_001.fastq.gz - sha256: b0534592d61321243897e842a9ea655d396d4496cbf6d926b6c6fea8e06aa98d + input_path: /path/to/sample-01_R1.fastq.gz + sha256: 497c99c5665bd0b89666c5fa625ae966f2ffaf218186db0e1ae95a15dac3ac76 - input_filename: sample-01_R2.fastq.gz - input_path: /path/to/sample-01_R2_001.fastq.gz - sha256: cc66309103da91e337143eb649196d84ed3ebe2ff08a45b197cd4151d137a167 -- input_filename: sample-01.fa - input_path: /path/to/sample-01.fa - sha256: 6fffb542711ee301ef1185a403a74fed36c066872e3fbfb7aa5c81464243bd00 -- process_name: align_reads_to_reference_plasmid - process_tags: - ref_plasmid_id: NC_019152.1 - resistance_gene: blaKPC-3 - tool_name: bwa - subcommand: mem - tool_version: 0.7.17-r1188 - parameters: - - parameter: alignment_algorithm - value: mem + input_path: /path/to/sample-01_R2.fastq.gz + sha256: 46ec4c473b613d36c7ce109808c4510a10b205aaebcfe837eb542999fdbdf11f +- input_filename: sample-01_unicycler_short.fa + input_path: /path/to/sample-01_unicycler_short.fa + sha256: b0d012b23057095b305cf57a687d90406e7383051d2c845717f6e99fdb4d4ad7 +- process_name: trim_reads + tools: + - tool_name: fastp + tool_version: 0.22.0 + parameters: + - parameter: cut_tail + value: true +- process_name: quast + tools: + - tool_name: quast + tool_version: 5.0.2 +- process_name: mash_screen + tools + - tool_name: mash + tool_version: 2.3 + parameters + - name: threshold + value: 0.996 +- process_name: mob_recon + tools: + - tool_name: mob_recon + tool_version: 3.0.3 + parameters + - parameter: database_directory + value: /path/to/mob-suite/db + - parameter: filter_db + value: /path/to/mob-suite/chromosomes/2019-11-NCBI-Enterobacteriacea-Chromosomes.fasta + - parameter: min_con_cov + value: 95 +- process_name: abricate + tools: + - tool_name: abricate + tool_version: 1.0.1 + parameters: + - parameter: db + value: ncbi +- process_name: abricate + tools: + - tool_name: abricate + tool_version: 1.0.1 + parameters: + - parameter: db + value: plasmidfinder - process_name: align_reads_to_reference_plasmid process_tags: - ref_plasmid_id: NC_019152.1 - resistance_gene: blaKPC-3 - tool_name: samtools - subcommand: view - tool_version: 1.13 - parameters: - - parameter: exclude_flags - value: 1540 -- process_name: call_snps - process_tags: - ref_plasmid_id: NC_019152.1 - resistance_gene: blaKPC-3 - tool_name: freebayes - tool_version: 1.3.5 - parameters: - - parameter: ploidy - value: 1 - - parameter: min_base_quality - value: 20 - - parameter: min_mapping_quality - value: 60 - - parameter: min_coverage - value: 10 - - parameter: min_alternate_fraction - value: 0.8 - - parameter: min_repeat_entropy - value: 1.0 + ref_plasmid_id: NZ_CP023897.1 + resistance_gene: blaOXA-181 + tools: + - tool_name: bwa + tool_version: 0.7.17-r1188 + subcommand: mem + parameters: + - parameter: output_all_alignments + value: true + - parameter: use_soft_clipping_for_supplementary_alignments + value: true + - parameter: mark_shorter_split_hits_as_secondary + value: true + - tool_name: samtools + tool_version: 1.13 + subcommand: view + parameters: + - parameter: exclude_flags + value: 1540 - process_name: call_snps process_tags: - ref_plasmid_id: NC_019152.1 - resistance_gene: blaKPC-3 - tool_name: bcftools - subcommand: view - tool_version: 1.12 - parameters: - - parameter: include - value: INFO/TYPE=snp -- process_name: quast - tool_name: quast - tool_version: 5.0.2 -- pipeline_name: BCCDC-PHL/plasmid-screen - pipeline_version: 0.1.0 -- timestamp_analysis_start: 2021-12-06T16:12:31.252055 + ref_plasmid_id: NZ_CP023897.1 + resistance_gene: blaOXA-181 + tools: + - tool_name: freebayes + tool_version: 1.3.5 + parameters: + - parameter: ploidy + value: 1 + - parameter: min_base_quality + value: 20 + - parameter: min_mapping_quality + value: 60 + - parameter: min_coverage + value: 10 + - parameter: min_alternate_fraction + value: 0.8 + - parameter: min_repeat_entropy + value: 1.0 + - tool_name: bcftools + tool_version: 1.20 + subcommand: view + parameters: + - parameter: include + value: INFO/TYPE=snp ``` diff --git a/main.nf b/main.nf index 41c97bf..f8cf91d 100644 --- a/main.nf +++ b/main.nf @@ -11,7 +11,8 @@ include { mash_screen } from './modules/mash_screen. include { quast } from './modules/quast.nf' include { parse_quast_report } from './modules/quast.nf' include { mob_recon } from './modules/mob_recon.nf' -include { abricate } from './modules/abricate.nf' +include { abricate as abricate_ncbi } from './modules/abricate.nf' +include { abricate as abricate_plasmidfinder } from './modules/abricate.nf' include { join_mob_typer_and_abricate_reports } from './modules/join_reports.nf' include { select_resistance_chromosomes } from './modules/join_reports.nf' include { select_resistance_contigs } from './modules/select_resistance_contigs.nf' @@ -79,9 +80,11 @@ workflow { // pass reconstructed plasmids as [sample_id, [seq1, seq2, seq3...]] ch_mob_recon_sequences = mob_recon.out.sequences.map{ it -> [it[0], it[1..-1][0]] } - abricate(ch_mob_recon_sequences) + abricate_ncbi(ch_mob_recon_sequences.combine(Channel.of("ncbi"))) - ch_join_reports_input = mob_recon.out.mobtyper_reports.cross(abricate.out.report).map{ it -> [it[0][0], it[0][1], it[0][2], it[1][1]] } + abricate_plasmidfinder(ch_mob_recon_sequences.combine(Channel.of("plasmidfinder"))) + + ch_join_reports_input = mob_recon.out.mobtyper_reports.cross(abricate_ncbi.out.report).map{ it -> [it[0][0], it[0][1], it[0][2], it[1][1]] } ch_combined_abricate_mobtyper_report = join_mob_typer_and_abricate_reports(ch_join_reports_input) @@ -115,7 +118,8 @@ workflow { ch_provenance = ch_provenance.join(quast.out.provenance).map{ it -> [it[0], it[1] << it[2]] } ch_provenance = ch_provenance.join(mash_screen.out.provenance).map{ it -> [it[0], it[1] << it[2]] } ch_provenance = ch_provenance.join(mob_recon.out.provenance).map{ it -> [it[0], it[1] << it[2]] } - ch_provenance = ch_provenance.join(abricate.out.provenance).map{ it -> [it[0], it[1] << it[2]] } + ch_provenance = ch_provenance.join(abricate_ncbi.out.provenance).map{ it -> [it[0], it[1] << it[2]] } + ch_provenance = ch_provenance.join(abricate_plasmidfinder.out.provenance).map{ it -> [it[0], it[1] << it[2]] } ch_provenance = ch_provenance.join(align_reads_to_reference_plasmid.out.provenance, remainder: true).map{ it -> it.collect{ x -> x ? x : [] }}.map{ it -> [it[0], it[1] << it[2]] }.groupTuple().map{ it -> [it[0], it[1].flatten()] } ch_provenance = ch_provenance.join(call_snps.out.provenance, remainder: true).map{ it -> it.collect{ x -> x ? x : [] }}.map{ it -> [it[0], it[1] << it[2]] }.groupTuple().map{ it -> [it[0], it[1].flatten()] } ch_provenance = ch_provenance.map{ [it[0]] + [it[1].unique{ path -> path.getFileName() }] } diff --git a/modules/abricate.nf b/modules/abricate.nf index 9eac975..426478e 100644 --- a/modules/abricate.nf +++ b/modules/abricate.nf @@ -1,31 +1,30 @@ process abricate { - tag { sample_id } + tag { sample_id + ' / ' + db } - publishDir "${params.outdir}/${sample_id}", pattern: "${sample_id}_abricate.tsv", mode: 'copy' + publishDir "${params.outdir}/${sample_id}", pattern: "${sample_id}_abricate_${db}.tsv", mode: 'copy' input: - tuple val(sample_id), path(assemblies) + tuple val(sample_id), path(assemblies), val(db) output: - tuple val(sample_id), path("${sample_id}_abricate.tsv"), emit: report - tuple val(sample_id), path("${sample_id}*_provenance.yml"), emit: provenance + tuple val(sample_id), path("${sample_id}_abricate_${db}.tsv"), emit: report + tuple val(sample_id), path("${sample_id}_abricate_${db}_provenance.yml"), emit: provenance script: - db = "ncbi" """ - printf -- "- process_name: abricate\\n" >> ${sample_id}_abricate_provenance.yml - printf -- " tools:\\n" >> ${sample_id}_abricate_provenance.yml - printf -- " - tool_name: abricate\\n" >> ${sample_id}_abricate_provenance.yml - printf -- " tool_version: \$(abricate --version | cut -d ' ' -f 2)\\n" >> ${sample_id}_abricate_provenance.yml - printf -- " parameters:\\n" >> ${sample_id}_abricate_provenance.yml - printf -- " - parameter: db\\n" >> ${sample_id}_abricate_provenance.yml - printf -- " value: ${db}\n" >> ${sample_id}_abricate_provenance.yml + printf -- "- process_name: abricate\\n" >> ${sample_id}_abricate_${db}_provenance.yml + printf -- " tools:\\n" >> ${sample_id}_abricate_${db}_provenance.yml + printf -- " - tool_name: abricate\\n" >> ${sample_id}_abricate_${db}_provenance.yml + printf -- " tool_version: \$(abricate --version | cut -d ' ' -f 2)\\n" >> ${sample_id}_abricate_${db}_provenance.yml + printf -- " parameters:\\n" >> ${sample_id}_abricate_${db}_provenance.yml + printf -- " - parameter: db\\n" >> ${sample_id}_abricate_${db}_provenance.yml + printf -- " value: ${db}\n" >> ${sample_id}_abricate_${db}_provenance.yml abricate \ --threads ${task.cpus} \ --db ${db} \ --nopath \ - ${assemblies} > ${sample_id}_abricate.tsv + ${assemblies} > ${sample_id}_abricate_${db}.tsv """ }