Skip to content

Commit

Permalink
Update RSEM version and tests - address #83
Browse files Browse the repository at this point in the history
  • Loading branch information
emi80 committed May 3, 2024
1 parent 2b11791 commit b4c0f37
Show file tree
Hide file tree
Showing 7 changed files with 238 additions and 44 deletions.
42 changes: 33 additions & 9 deletions modules/quantification/rsem/main.nf
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
params.rsemVersion = '1.2.21'
params.container = "grapenf/quantification:rsem-${params.rsemVersion}"
params.rsemVersion = '1.3.3--pl5321h0033a41_7'
params.container = "quay.io/biocontainers/rsem:${params.rsemVersion}"
params.rsemCalcCI = false
params.rsemPlotModel = false

Expand Down Expand Up @@ -42,27 +42,41 @@ process index {
cmd.join('\n')
}

process quantify {
process validateBam {
tag "${sample}"
container params.container

input:
tuple val(sample), val(id), path(bam), val(type), val(view), val(pairedEnd), val(readStrand)

output:
tuple val(sample), stdout

"""
rsem-sam-validator ${bam}
"""
}

process calculateExpression {

tag "${sample}"
container params.container

input:
path(quantRef)
tuple val(sample), val(id), path(bam), val(type), val(view), val(pairedEnd), val(readStrand)
tuple val(sample), val(id), path(bam), val(type), val(view), val(pairedEnd), val(readStrand), val(convertBam)

output:
tuple val(sample), val(id), path("*isoforms*"), val(type), val(viewTx), val(pairedEnd), val(readStrand), emit: isoforms
tuple val(sample), val(id), path("*genes*"), val(type), val(viewGn), val(pairedEnd), val(readStrand), emit: genes

script:
prefix = "${sample}"
rsemPrefix = bam.simpleName
type = 'tsv'
viewTx = "TranscriptQuantifications"
viewGn = "GeneQuantifications"
def memory = (task.memory ?: 1.GB).toMega()
def sortMemFrac = params.rsemCalcCI ? 0.5 : 0.75
def sortMemory = Math.max(1024, (memory * sortMemFrac) as long)
def forwardProb = null

switch (readStrand) {
Expand All @@ -75,9 +89,19 @@ process quantify {
}

def cmd = []
cmd << "sambamba sort -t ${task.cpus} -m ${sortMemory}MB -N -M -l 0 -o - ${bam} \\"
if ( convertBam ) {
rsemPrefix = "${rsemPrefix}.rsem"
def convertMemory = Math.max(1024, ( memory / task.cpus / 2 ) as long)
cmd << """\
mkfifo ${rsemPrefix}.bam
convert-sam-for-rsem -p ${task.cpus} \\
--memory-per-thread ${convertMemory}M \\
${bam} \\
${rsemPrefix} &
""".stripIndent()
}
cmd << """\
| rsem-calculate-expression -p ${task.cpus} \\
rsem-calculate-expression -p ${task.cpus} \\
--bam \\
--seed 12345 \\
--estimate-rspd \\
Expand All @@ -97,7 +121,7 @@ process quantify {
--ci-memory ${ciMemory} \\"""
}
cmd << """\
- \\
${rsemPrefix}.bam \\
${quantRef}/RSEMref \\
${prefix}"""
if ( params.rsemPlotModel ) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ nextflow_process {

name "Test Process quantify"
script "modules/quantification/rsem/main.nf"
process "quantify"
process "calculateExpression"

tag "module"

Expand All @@ -25,7 +25,7 @@ nextflow_process {
"""
input[0] = index.out
input[1] = Channel.from([
[ "sample3", "test3", file("${baseDir}/data/sample3_m4_n10_toTranscriptome.bam"), "bam", "TranscriptomeAlignments", true, "MATE2_SENSE" ]
[ "sample3", "test3", file("${baseDir}/data/sample3_m4_n10_toTranscriptome.bam"), "bam", "TranscriptomeAlignments", true, "MATE2_SENSE", false ]
])
"""
}
Expand All @@ -38,17 +38,14 @@ nextflow_process {

}

test("Should quantify with RSEM (with credibility intervals)") {
test("Should quantify with RSEM converting the input BAM file") {

when {
params {
rsemCalcCI = true
}
process {
"""
input[0] = index.out
input[1] = Channel.from([
[ "sample3", "test3", file("${baseDir}/data/sample3_m4_n10_toTranscriptome.bam"), "bam", "TranscriptomeAlignments", true, "MATE2_SENSE" ]
[ "sample3", "test3", file("${baseDir}/data/sample3_m4_n10_toTranscriptome.bam"), "bam", "TranscriptomeAlignments", true, "MATE2_SENSE", true ]
])
"""
}
Expand All @@ -61,4 +58,30 @@ nextflow_process {

}

// >>> TEST COMMENTED BECAUSE RSEM 1.3.3 FAILS WITH --calc-ci
// >>> see https://github.com/deweylab/RSEM/issues/134
// >>> DO NOT CLEAN SNAPSHOT!
// test("Should quantify with RSEM (with credibility intervals)") {

// when {
// params {
// rsemCalcCI = true
// }
// process {
// """
// input[0] = index.out
// input[1] = Channel.from([
// [ "sample3", "test3", file("${baseDir}/data/sample3_m4_n10_toTranscriptome.bam"), "bam", "TranscriptomeAlignments", true, "MATE2_SENSE", false ]
// ])
// """
// }
// }

// then {
// assert process.success
// assert snapshot(process.out).match()
// }

// }

}
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,61 @@
},
"timestamp": "2024-04-30T17:37:25.520162"
},
"Should quantify with RSEM converting the input BAM file": {
"content": [
{
"0": [
[
"sample3",
"test3",
"sample3.isoforms.results:md5,2f825c2b612114534df653ec4c172c18",
"tsv",
"TranscriptQuantifications",
true,
"MATE2_SENSE"
]
],
"1": [
[
"sample3",
"test3",
"sample3.genes.results:md5,d76a7103c9d815fa46dc7b3ab38ae4b1",
"tsv",
"GeneQuantifications",
true,
"MATE2_SENSE"
]
],
"genes": [
[
"sample3",
"test3",
"sample3.genes.results:md5,d76a7103c9d815fa46dc7b3ab38ae4b1",
"tsv",
"GeneQuantifications",
true,
"MATE2_SENSE"
]
],
"isoforms": [
[
"sample3",
"test3",
"sample3.isoforms.results:md5,2f825c2b612114534df653ec4c172c18",
"tsv",
"TranscriptQuantifications",
true,
"MATE2_SENSE"
]
]
}
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "23.10.1"
},
"timestamp": "2024-05-03T13:07:20.24352"
},
"Should quantify with RSEM": {
"content": [
{
Expand Down
47 changes: 47 additions & 0 deletions tests/modules/quantification/rsem/main.validateBam.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
nextflow_process {

name "Test Process validate"
script "modules/quantification/rsem/main.nf"
process "validateBam"

tag "module"

test("Should validate BAM file for RSEM") {

when {
process {
"""
input[0] = Channel.from([
[ "sample3", "test3", file("${baseDir}/data/sample3_m4_n10_toTranscriptome.bam"), "bam", "TranscriptomeAlignments", true, "MATE2_SENSE" ]
])
"""
}
}

then {
assert process.success
assert snapshot(process.out).match()
}

}

test("Should not validate BAM file for RSEM") {

when {
process {
"""
input[0] = Channel.from([
[ "sample3", "test3", file("${baseDir}/data/sample3_m4_n10_toGenome.bam"), "bam", "GenomeAlignments", true, "MATE2_SENSE" ]
])
"""
}
}

then {
assert process.success
assert snapshot(process.out).match()
}

}

}
36 changes: 36 additions & 0 deletions tests/modules/quantification/rsem/main.validateBam.nf.test.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
{
"Should not validate BAM file for RSEM": {
"content": [
{
"0": [
[
"sample3",
".\nSkipped region is detected (cigar N) for read HWI-ST985:81:C0C5PACXX:8:1104:20582:12084!\nTo use RSEM, please align your reads to a set of transcript sequences instead of a genome.\nThe input file is not valid!\n"
]
]
}
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "23.10.1"
},
"timestamp": "2024-05-03T12:30:00.196523"
},
"Should validate BAM file for RSEM": {
"content": [
{
"0": [
[
"sample3",
".\nThe input file is valid!\n"
]
]
}
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "23.10.1"
},
"timestamp": "2024-05-03T12:29:51.499688"
}
}
51 changes: 27 additions & 24 deletions tests/workflows/quantification/rsem.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -33,32 +33,35 @@ nextflow_workflow {

}

test("Should run quantification with RSEM (with credibility intervals)") {
// >>> TEST COMMENTED BECAUSE RSEM 1.3.3 FAILS WITH --calc-ci
// >>> see https://github.com/deweylab/RSEM/issues/134
// >>> DO NOT CLEAN SNAPSHOT!
// test("Should run quantification with RSEM (with credibility intervals)") {

when {
params {
stepList = [ "quantification" ]
rsemCalcCI = true
}
workflow {
"""
input[0] = file("${baseDir}/data/genome.fa")
input[1] = file("${baseDir}/data/annotation.gtf")
input[2] = Channel.from([
[ "sample3", "test3", file("${baseDir}/data/sample3_m4_n10_toGenome.bam"), "bam", "GenomeAlignments", true, "MATE2_SENSE" ]
])
input[3] = Channel.from([
[ "sample3", "test3", file("${baseDir}/data/sample3_m4_n10_toTranscriptome.bam"), "bam", "TranscriptomeAlignments", true, "MATE2_SENSE" ]
])
"""
}
}
// when {
// params {
// stepList = [ "quantification" ]
// rsemCalcCI = true
// }
// workflow {
// """
// input[0] = file("${baseDir}/data/genome.fa")
// input[1] = file("${baseDir}/data/annotation.gtf")
// input[2] = Channel.from([
// [ "sample3", "test3", file("${baseDir}/data/sample3_m4_n10_toGenome.bam"), "bam", "GenomeAlignments", true, "MATE2_SENSE" ]
// ])
// input[3] = Channel.from([
// [ "sample3", "test3", file("${baseDir}/data/sample3_m4_n10_toTranscriptome.bam"), "bam", "TranscriptomeAlignments", true, "MATE2_SENSE" ]
// ])
// """
// }
// }

then {
assert workflow.success
assert snapshot(workflow.out).match()
}
// then {
// assert workflow.success
// assert snapshot(workflow.out).match()
// }

}
// }

}
14 changes: 10 additions & 4 deletions workflows/quantification/rsem.nf
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
include { index; quantify } from "../../modules/quantification/rsem"
include { index; validateBam; calculateExpression } from "../../modules/quantification/rsem"

def doQuantify = ( 'quantification' in params.stepList )

Expand All @@ -11,9 +11,15 @@ workflow quantification {
main:
if ( doQuantify ) {
quantificationIndex = index( genome, annotation )
quantify( quantificationIndex, transcriptomeAlignments )
validateBam ( transcriptomeAlignments )
calculateExpression(
quantificationIndex,
transcriptomeAlignments.join(validateBam.out).map {
it[0..-2] + [ it[-1].contains("not valid") ]
}
)
}
emit:
genes = ( doQuantify ) ? quantify.out.genes : Channel.empty()
isoforms = ( doQuantify ) ? quantify.out.isoforms : Channel.empty()
genes = ( doQuantify ) ? calculateExpression.out.genes : Channel.empty()
isoforms = ( doQuantify ) ? calculateExpression.out.isoforms : Channel.empty()
}

0 comments on commit b4c0f37

Please sign in to comment.