From b4c0f37461d4579321a56040f1f1be5e985f1c8f Mon Sep 17 00:00:00 2001 From: Emilio Palumbo Date: Fri, 3 May 2024 14:52:43 +0200 Subject: [PATCH] Update RSEM version and tests - address #83 --- modules/quantification/rsem/main.nf | 42 +++++++++++--- ....test => main.calculateExpression.nf.test} | 37 ++++++++++--- ... => main.calculateExpression.nf.test.snap} | 55 +++++++++++++++++++ .../rsem/main.validateBam.nf.test | 47 ++++++++++++++++ .../rsem/main.validateBam.nf.test.snap | 36 ++++++++++++ tests/workflows/quantification/rsem.nf.test | 51 +++++++++-------- workflows/quantification/rsem.nf | 14 +++-- 7 files changed, 238 insertions(+), 44 deletions(-) rename tests/modules/quantification/rsem/{main.quantify.nf.test => main.calculateExpression.nf.test} (57%) rename tests/modules/quantification/rsem/{main.quantify.nf.test.snap => main.calculateExpression.nf.test.snap} (66%) create mode 100644 tests/modules/quantification/rsem/main.validateBam.nf.test create mode 100644 tests/modules/quantification/rsem/main.validateBam.nf.test.snap diff --git a/modules/quantification/rsem/main.nf b/modules/quantification/rsem/main.nf index 29d62ab..d0df9e9 100644 --- a/modules/quantification/rsem/main.nf +++ b/modules/quantification/rsem/main.nf @@ -1,5 +1,5 @@ -params.rsemVersion = '1.2.21' -params.container = "grapenf/quantification:rsem-${params.rsemVersion}" +params.rsemVersion = '1.3.3--pl5321h0033a41_7' +params.container = "quay.io/biocontainers/rsem:${params.rsemVersion}" params.rsemCalcCI = false params.rsemPlotModel = false @@ -42,14 +42,29 @@ process index { cmd.join('\n') } -process quantify { +process validateBam { + tag "${sample}" + container params.container + + input: + tuple val(sample), val(id), path(bam), val(type), val(view), val(pairedEnd), val(readStrand) + + output: + tuple val(sample), stdout + + """ + rsem-sam-validator ${bam} + """ +} + +process calculateExpression { tag "${sample}" container params.container input: path(quantRef) - tuple val(sample), val(id), path(bam), val(type), val(view), val(pairedEnd), val(readStrand) + tuple val(sample), val(id), path(bam), val(type), val(view), val(pairedEnd), val(readStrand), val(convertBam) output: tuple val(sample), val(id), path("*isoforms*"), val(type), val(viewTx), val(pairedEnd), val(readStrand), emit: isoforms @@ -57,12 +72,11 @@ process quantify { script: prefix = "${sample}" + rsemPrefix = bam.simpleName type = 'tsv' viewTx = "TranscriptQuantifications" viewGn = "GeneQuantifications" def memory = (task.memory ?: 1.GB).toMega() - def sortMemFrac = params.rsemCalcCI ? 0.5 : 0.75 - def sortMemory = Math.max(1024, (memory * sortMemFrac) as long) def forwardProb = null switch (readStrand) { @@ -75,9 +89,19 @@ process quantify { } def cmd = [] - cmd << "sambamba sort -t ${task.cpus} -m ${sortMemory}MB -N -M -l 0 -o - ${bam} \\" + if ( convertBam ) { + rsemPrefix = "${rsemPrefix}.rsem" + def convertMemory = Math.max(1024, ( memory / task.cpus / 2 ) as long) + cmd << """\ + mkfifo ${rsemPrefix}.bam + convert-sam-for-rsem -p ${task.cpus} \\ + --memory-per-thread ${convertMemory}M \\ + ${bam} \\ + ${rsemPrefix} & + """.stripIndent() + } cmd << """\ - | rsem-calculate-expression -p ${task.cpus} \\ + rsem-calculate-expression -p ${task.cpus} \\ --bam \\ --seed 12345 \\ --estimate-rspd \\ @@ -97,7 +121,7 @@ process quantify { --ci-memory ${ciMemory} \\""" } cmd << """\ - - \\ + ${rsemPrefix}.bam \\ ${quantRef}/RSEMref \\ ${prefix}""" if ( params.rsemPlotModel ) { diff --git a/tests/modules/quantification/rsem/main.quantify.nf.test b/tests/modules/quantification/rsem/main.calculateExpression.nf.test similarity index 57% rename from tests/modules/quantification/rsem/main.quantify.nf.test rename to tests/modules/quantification/rsem/main.calculateExpression.nf.test index 722ba17..7300838 100644 --- a/tests/modules/quantification/rsem/main.quantify.nf.test +++ b/tests/modules/quantification/rsem/main.calculateExpression.nf.test @@ -2,7 +2,7 @@ nextflow_process { name "Test Process quantify" script "modules/quantification/rsem/main.nf" - process "quantify" + process "calculateExpression" tag "module" @@ -25,7 +25,7 @@ nextflow_process { """ input[0] = index.out input[1] = Channel.from([ - [ "sample3", "test3", file("${baseDir}/data/sample3_m4_n10_toTranscriptome.bam"), "bam", "TranscriptomeAlignments", true, "MATE2_SENSE" ] + [ "sample3", "test3", file("${baseDir}/data/sample3_m4_n10_toTranscriptome.bam"), "bam", "TranscriptomeAlignments", true, "MATE2_SENSE", false ] ]) """ } @@ -38,17 +38,14 @@ nextflow_process { } - test("Should quantify with RSEM (with credibility intervals)") { + test("Should quantify with RSEM converting the input BAM file") { when { - params { - rsemCalcCI = true - } process { """ input[0] = index.out input[1] = Channel.from([ - [ "sample3", "test3", file("${baseDir}/data/sample3_m4_n10_toTranscriptome.bam"), "bam", "TranscriptomeAlignments", true, "MATE2_SENSE" ] + [ "sample3", "test3", file("${baseDir}/data/sample3_m4_n10_toTranscriptome.bam"), "bam", "TranscriptomeAlignments", true, "MATE2_SENSE", true ] ]) """ } @@ -61,4 +58,30 @@ nextflow_process { } + // >>> TEST COMMENTED BECAUSE RSEM 1.3.3 FAILS WITH --calc-ci + // >>> see https://github.com/deweylab/RSEM/issues/134 + // >>> DO NOT CLEAN SNAPSHOT! + // test("Should quantify with RSEM (with credibility intervals)") { + + // when { + // params { + // rsemCalcCI = true + // } + // process { + // """ + // input[0] = index.out + // input[1] = Channel.from([ + // [ "sample3", "test3", file("${baseDir}/data/sample3_m4_n10_toTranscriptome.bam"), "bam", "TranscriptomeAlignments", true, "MATE2_SENSE", false ] + // ]) + // """ + // } + // } + + // then { + // assert process.success + // assert snapshot(process.out).match() + // } + + // } + } diff --git a/tests/modules/quantification/rsem/main.quantify.nf.test.snap b/tests/modules/quantification/rsem/main.calculateExpression.nf.test.snap similarity index 66% rename from tests/modules/quantification/rsem/main.quantify.nf.test.snap rename to tests/modules/quantification/rsem/main.calculateExpression.nf.test.snap index 7ef5398..e2948fb 100644 --- a/tests/modules/quantification/rsem/main.quantify.nf.test.snap +++ b/tests/modules/quantification/rsem/main.calculateExpression.nf.test.snap @@ -54,6 +54,61 @@ }, "timestamp": "2024-04-30T17:37:25.520162" }, + "Should quantify with RSEM converting the input BAM file": { + "content": [ + { + "0": [ + [ + "sample3", + "test3", + "sample3.isoforms.results:md5,2f825c2b612114534df653ec4c172c18", + "tsv", + "TranscriptQuantifications", + true, + "MATE2_SENSE" + ] + ], + "1": [ + [ + "sample3", + "test3", + "sample3.genes.results:md5,d76a7103c9d815fa46dc7b3ab38ae4b1", + "tsv", + "GeneQuantifications", + true, + "MATE2_SENSE" + ] + ], + "genes": [ + [ + "sample3", + "test3", + "sample3.genes.results:md5,d76a7103c9d815fa46dc7b3ab38ae4b1", + "tsv", + "GeneQuantifications", + true, + "MATE2_SENSE" + ] + ], + "isoforms": [ + [ + "sample3", + "test3", + "sample3.isoforms.results:md5,2f825c2b612114534df653ec4c172c18", + "tsv", + "TranscriptQuantifications", + true, + "MATE2_SENSE" + ] + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-03T13:07:20.24352" + }, "Should quantify with RSEM": { "content": [ { diff --git a/tests/modules/quantification/rsem/main.validateBam.nf.test b/tests/modules/quantification/rsem/main.validateBam.nf.test new file mode 100644 index 0000000..9a3e2ff --- /dev/null +++ b/tests/modules/quantification/rsem/main.validateBam.nf.test @@ -0,0 +1,47 @@ +nextflow_process { + + name "Test Process validate" + script "modules/quantification/rsem/main.nf" + process "validateBam" + + tag "module" + + test("Should validate BAM file for RSEM") { + + when { + process { + """ + input[0] = Channel.from([ + [ "sample3", "test3", file("${baseDir}/data/sample3_m4_n10_toTranscriptome.bam"), "bam", "TranscriptomeAlignments", true, "MATE2_SENSE" ] + ]) + """ + } + } + + then { + assert process.success + assert snapshot(process.out).match() + } + + } + + test("Should not validate BAM file for RSEM") { + + when { + process { + """ + input[0] = Channel.from([ + [ "sample3", "test3", file("${baseDir}/data/sample3_m4_n10_toGenome.bam"), "bam", "GenomeAlignments", true, "MATE2_SENSE" ] + ]) + """ + } + } + + then { + assert process.success + assert snapshot(process.out).match() + } + + } + +} diff --git a/tests/modules/quantification/rsem/main.validateBam.nf.test.snap b/tests/modules/quantification/rsem/main.validateBam.nf.test.snap new file mode 100644 index 0000000..fb28055 --- /dev/null +++ b/tests/modules/quantification/rsem/main.validateBam.nf.test.snap @@ -0,0 +1,36 @@ +{ + "Should not validate BAM file for RSEM": { + "content": [ + { + "0": [ + [ + "sample3", + ".\nSkipped region is detected (cigar N) for read HWI-ST985:81:C0C5PACXX:8:1104:20582:12084!\nTo use RSEM, please align your reads to a set of transcript sequences instead of a genome.\nThe input file is not valid!\n" + ] + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-03T12:30:00.196523" + }, + "Should validate BAM file for RSEM": { + "content": [ + { + "0": [ + [ + "sample3", + ".\nThe input file is valid!\n" + ] + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-03T12:29:51.499688" + } +} \ No newline at end of file diff --git a/tests/workflows/quantification/rsem.nf.test b/tests/workflows/quantification/rsem.nf.test index d4503c8..a34b947 100644 --- a/tests/workflows/quantification/rsem.nf.test +++ b/tests/workflows/quantification/rsem.nf.test @@ -33,32 +33,35 @@ nextflow_workflow { } - test("Should run quantification with RSEM (with credibility intervals)") { + // >>> TEST COMMENTED BECAUSE RSEM 1.3.3 FAILS WITH --calc-ci + // >>> see https://github.com/deweylab/RSEM/issues/134 + // >>> DO NOT CLEAN SNAPSHOT! + // test("Should run quantification with RSEM (with credibility intervals)") { - when { - params { - stepList = [ "quantification" ] - rsemCalcCI = true - } - workflow { - """ - input[0] = file("${baseDir}/data/genome.fa") - input[1] = file("${baseDir}/data/annotation.gtf") - input[2] = Channel.from([ - [ "sample3", "test3", file("${baseDir}/data/sample3_m4_n10_toGenome.bam"), "bam", "GenomeAlignments", true, "MATE2_SENSE" ] - ]) - input[3] = Channel.from([ - [ "sample3", "test3", file("${baseDir}/data/sample3_m4_n10_toTranscriptome.bam"), "bam", "TranscriptomeAlignments", true, "MATE2_SENSE" ] - ]) - """ - } - } + // when { + // params { + // stepList = [ "quantification" ] + // rsemCalcCI = true + // } + // workflow { + // """ + // input[0] = file("${baseDir}/data/genome.fa") + // input[1] = file("${baseDir}/data/annotation.gtf") + // input[2] = Channel.from([ + // [ "sample3", "test3", file("${baseDir}/data/sample3_m4_n10_toGenome.bam"), "bam", "GenomeAlignments", true, "MATE2_SENSE" ] + // ]) + // input[3] = Channel.from([ + // [ "sample3", "test3", file("${baseDir}/data/sample3_m4_n10_toTranscriptome.bam"), "bam", "TranscriptomeAlignments", true, "MATE2_SENSE" ] + // ]) + // """ + // } + // } - then { - assert workflow.success - assert snapshot(workflow.out).match() - } + // then { + // assert workflow.success + // assert snapshot(workflow.out).match() + // } - } + // } } diff --git a/workflows/quantification/rsem.nf b/workflows/quantification/rsem.nf index 016d283..090eb02 100644 --- a/workflows/quantification/rsem.nf +++ b/workflows/quantification/rsem.nf @@ -1,4 +1,4 @@ -include { index; quantify } from "../../modules/quantification/rsem" +include { index; validateBam; calculateExpression } from "../../modules/quantification/rsem" def doQuantify = ( 'quantification' in params.stepList ) @@ -11,9 +11,15 @@ workflow quantification { main: if ( doQuantify ) { quantificationIndex = index( genome, annotation ) - quantify( quantificationIndex, transcriptomeAlignments ) + validateBam ( transcriptomeAlignments ) + calculateExpression( + quantificationIndex, + transcriptomeAlignments.join(validateBam.out).map { + it[0..-2] + [ it[-1].contains("not valid") ] + } + ) } emit: - genes = ( doQuantify ) ? quantify.out.genes : Channel.empty() - isoforms = ( doQuantify ) ? quantify.out.isoforms : Channel.empty() + genes = ( doQuantify ) ? calculateExpression.out.genes : Channel.empty() + isoforms = ( doQuantify ) ? calculateExpression.out.isoforms : Channel.empty() }