Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

port parabricks/deepvariant to nf-test #6995

Open
wants to merge 24 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/conda_skip.yml
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,8 @@ exclude:
path: modules/nf-core/xeniumranger/resegment
- profile: conda
path: modules/nf-core/xeniumranger/import-segmentation
- profile: conda
path: modules/nf-core/parabricks/deepvariant
- profile: conda
path: modules/nf-core/parabricks/mutectcaller
- profile: conda
Expand Down
1 change: 1 addition & 0 deletions modules/nf-core/parabricks/applybqsr/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -88,3 +88,4 @@ authors:
- "@bsiranosian"
maintainers:
- "@bsiranosian"
- "@famosab"
2 changes: 2 additions & 0 deletions modules/nf-core/parabricks/dbsnp/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,5 @@ output:
pattern: "versions.yml"
authors:
- "@Furentsu"
maintainers:
- "@famosab"
22 changes: 11 additions & 11 deletions modules/nf-core/parabricks/deepvariant/main.nf
Original file line number Diff line number Diff line change
@@ -1,40 +1,41 @@
process PARABRICKS_DEEPVARIANT {
tag "$meta.id"
label 'process_high'
label 'process_gpu'
stageInMode 'copy' // needed by the module to work properly can be removed when fixed upstream - Issue #7226

container "nvcr.io/nvidia/clara/clara-parabricks:4.3.0-1"
container "nvcr.io/nvidia/clara/clara-parabricks:4.4.0-1"

input:
tuple val(meta), path(input), path(input_index), path(interval_file)
tuple val(ref_meta), path(fasta)

output:
tuple val(meta), path("*.vcf"), emit: vcf
path "versions.yml", emit: versions
tuple val(meta), path("*.vcf"), optional: true, emit: vcf
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this one not zipped?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll update it

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think deepvariant gives either a vcf or a g.vcf.gz file. Otherwise the container needs to be changed and I am unsure how to do that with parabricks.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, but we can just gzip it afterwards (with whatever the magic way of doing that is for vcf files), presumably these vcf files will be large.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we need to install bgzip to the container then.

tuple val(meta), path("*.g.vcf.gz"), optional: true, emit: gvcf
path "versions.yml", emit: versions

when:
task.ext.when == null || task.ext.when

script:

// Exit if running this module with -profile conda / -profile mamba
if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
exit 1, "Parabricks module does not support Conda. Please use Docker / Singularity / Podman instead."
}

def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def output_file = args =~ "gvcf" ? "${prefix}.g.vcf" : "${prefix}.vcf"
def output_file = ("--gvcf" =~ task.ext.args)? "${prefix}.g.vcf.gz" : "${prefix}.vcf"
def interval_file_command = interval_file ? interval_file.collect{"--interval-file $it"}.join(' ') : ""
def num_gpus = task.accelerator ? "--num-gpus $task.accelerator.request" : ''
"""

pbrun \\
deepvariant \\
--ref $fasta \\
--in-bam $input \\
--out-variants $output_file \\
$interval_file_command \\
--num-gpus $task.accelerator.request \\
$num_gpus \\
$args

cat <<-END_VERSIONS > versions.yml
Expand All @@ -44,11 +45,10 @@ process PARABRICKS_DEEPVARIANT {
"""

stub:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def output_file = args =~ "gvcf" ? "${prefix}.g.vcf" : "${prefix}.vcf"
def output_cmd = ("--gvcf" =~ task.ext.args)? "echo '' | gzip > ${prefix}.g.vcf.gz" : "touch ${prefix}.vcf"
"""
touch $output_file
$output_cmd

cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand Down
14 changes: 13 additions & 1 deletion modules/nf-core/parabricks/deepvariant/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,24 @@ output:
e.g. [ id:'test' ]
- "*.vcf":
type: file
description: Variant file.
description: vcf file created with deepvariant (does not support .gz for normal vcf), optional
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But could be gzipped afterwards, other modules do that.

pattern: "*.vcf"
- gvcf:
- meta:
type: map
description: |
Groovy Map containing sample information.
e.g. [ id:'test' ]
- "*.g.vcf.gz":
type: file
description: bgzipped gvcf created with deepvariant, optional
pattern: "*.g.vcf.gz"
- versions:
- versions.yml:
type: file
description: File containing software versions.
pattern: "versions.yml"
authors:
- "@bsiranosian"
maintainers:
- "@famosab"
223 changes: 223 additions & 0 deletions modules/nf-core/parabricks/deepvariant/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,223 @@
nextflow_process {

name "Test Process PARABRICKS_DEEPVARIANT"
script "../main.nf"
process "PARABRICKS_DEEPVARIANT"

tag "modules"
tag "modules_nfcore"
tag "parabricks"
tag "parabricks/deepvariant"
tag "gpu"

test("human - bam") {

when {
process {
"""
input[0] = [
[ id:'test'],
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam', checkIfExists: true),
[], // bam.bai not needed unless using intervals
[]
]
input[1] = [
[ id:'test'],
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', checkIfExists: true)
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(
path(process.out.vcf[0][1]).vcf.variantsMD5,
process.out.versions,
path(process.out.versions[0]).yaml
).match() }
)
}

}

test("human - bam - intervals") {

when {
process {
"""
input[0] = [
[ id:'test'],
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam.bai', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed', checkIfExists: true)
]
input[1] = [
[ id:'ref'],
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', checkIfExists: true)
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(
path(process.out.vcf[0][1]).vcf.variantsMD5,
process.out.versions,
path(process.out.versions[0]).yaml
).match() }
)
}

}

test("human - bam - gvcf") {

config './nextflow.config'

when {
params {
module_args = '--gvcf'
}
process {
"""
input[0] = [
[ id:'test'],
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam', checkIfExists: true),
[], // bam.bai not needed unless using intervals
[]
]
input[1] = [
[ id:'ref'],
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', checkIfExists: true)
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(
path(process.out.gvcf[0][1]).vcf.variantsMD5,
process.out.versions,
path(process.out.versions[0]).yaml
).match()
}
)
}

}

test("human - bam - intervals - gvcf") {

config './nextflow.config'

when {
params {
module_args = '--gvcf'
}
process {
"""
input[0] = [
[ id:'test'],
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam.bai', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed', checkIfExists: true)
]
input[1] = [
[ id:'ref'],
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', checkIfExists: true)
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(
path(process.out.gvcf[0][1]).vcf.variantsMD5,
process.out.versions,
path(process.out.versions[0]).yaml
).match()
}
)
}

}

test("human - bam - stub") {

options "-stub"

when {
process {
"""
input[0] = [
[ id:'test'],
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam', checkIfExists: true),
[], // bam.bai not needed unless using intervals
[]
]
input[1] = [
[ id:'test'],
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', checkIfExists: true)
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(
process.out,
path(process.out.versions[0]).yaml
).match() }
)
}

}

test("human - bam - intervals - gvcf - stub") {

config './nextflow.config'
options "-stub"

when {
params {
module_args = '--gvcf'
}
process {
"""
input[0] = [
[ id:'test'],
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam.bai', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed', checkIfExists: true)
]
input[1] = [
[ id:'test'],
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', checkIfExists: true)
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(
process.out,
path(process.out.versions[0]).yaml
).match() }
)
}

}

}
Loading
Loading