From 7fd374ec22647b9aac628095498313661701b6a1 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Sat, 12 Oct 2024 13:25:41 +0200 Subject: [PATCH] Add TAR module (#6772) * start filling in mneta * Fix linting * Replace switch with ifelse * Fix ype in in if else --- modules/nf-core/tar/environment.yml | 12 ++ modules/nf-core/tar/main.nf | 74 +++++++ modules/nf-core/tar/meta.yml | 67 +++++++ modules/nf-core/tar/tests/main.nf.test | 210 ++++++++++++++++++++ modules/nf-core/tar/tests/main.nf.test.snap | 126 ++++++++++++ 5 files changed, 489 insertions(+) create mode 100644 modules/nf-core/tar/environment.yml create mode 100644 modules/nf-core/tar/main.nf create mode 100644 modules/nf-core/tar/meta.yml create mode 100644 modules/nf-core/tar/tests/main.nf.test create mode 100644 modules/nf-core/tar/tests/main.nf.test.snap diff --git a/modules/nf-core/tar/environment.yml b/modules/nf-core/tar/environment.yml new file mode 100644 index 00000000000..1decc62432d --- /dev/null +++ b/modules/nf-core/tar/environment.yml @@ -0,0 +1,12 @@ +channels: + - conda-forge + - bioconda + +dependencies: + - conda-forge::bzip2=1.0.8 + - conda-forge::gzip=1.13 + - conda-forge::lzip=1.21 + - conda-forge::lzop=1.04 + - conda-forge::tar=1.34 + - conda-forge::xz=5.2.6 + - conda-forge::zstd=1.5.6 diff --git a/modules/nf-core/tar/main.nf b/modules/nf-core/tar/main.nf new file mode 100644 index 00000000000..60f828101b9 --- /dev/null +++ b/modules/nf-core/tar/main.nf @@ -0,0 +1,74 @@ +process TAR { + tag "${meta.id}" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/98/98946ea8217c35441352a94f3e0cd1dfa24137c323e8b0f5dfcb3123b465d0b1/data': + 'community.wave.seqera.io/library/bzip2_gzip_lzip_lzop_pruned:5a822ddcf829e7af' }" + + input: + tuple val(meta), path(input) + val compress_type + + output: + tuple val(meta), path("*.tar${compress_type}"), emit: archive + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + valid_compress_types = ['.bz2', '.xz', '.lz', '.lzma', '.lzo', '.zst', '.gz', ''] + if (!compress_type in valid_compress_types) { + error("ERROR: Invalid compress_type: ${compress_type} for TAR. Set as empty string for no compression. Compression options: ${valid_compress_types.join(", ")}") + } + + if (compress_type == '.bz2') { + compress_flag = '--bzip2' + } else if (compress_type == '.xz') { + compress_flag = '--xz' + } else if (compress_type == '.lz') { + compress_flag = '--lzip' + } else if (compress_type == '.lzma') { + compress_flag = '--lzma' + } else if (compress_type == '.lzo') { + compress_flag = '--lzop' + } else if (compress_type == '.zst') { + compress_flag = '--zstd' + } else if (compress_type == '.gz') { + compress_flag = '--gzip' + } else if (compress_type == '') { + compress_flag = '' + } else { + error("ERROR: Invalid compress_type: ${compress_type} for TAR. Set as empty string for no compression. Compression options: ${valid_compress_types.join(", ")}") + } + + """ + tar \\ + -c \\ + ${compress_flag} \\ + ${args} \\ + -f ${prefix}.tar${compress_type} \\ + ${input} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tar: \$(tar --version | grep tar | sed 's/.*) //g') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo "" | gzip -c > ${prefix}.tar.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tar: \$(tar --version | grep tar | sed 's/.*) //g') + END_VERSIONS + """ +} diff --git a/modules/nf-core/tar/meta.yml b/modules/nf-core/tar/meta.yml new file mode 100644 index 00000000000..9a3cbde631b --- /dev/null +++ b/modules/nf-core/tar/meta.yml @@ -0,0 +1,67 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "tar" +description: Compress directories into tarballs with various compression options +keywords: + - untar + - tar + - tarball + - compression + - archive + - gzip + - targz +tools: + - "tar": + description: "GNU Tar provides the ability to create tar archives, as well as + various other kinds of manipulation." + homepage: "https://www.gnu.org/software/tar/" + documentation: "https://www.gnu.org/software/tar/manual/" + licence: ["GPLv3"] + identifier: "" + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - input: + type: directory + description: A file or directory to be archived + pattern: "*/" + ontologies: + - edam: "http://edamontology.org/data_1049" + - - compress_type: + type: string + description: | + A string defining which type of (optional) compression to apply to the archive. + Provide an empty string in quotes for no compression + pattern: ".bz2|.xz|.lz|.lzma|.lzo|.zst|.gz" +output: + - archive: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + pattern: "*.tar{.bz2,.xz,.lz,.lzma,.lzo,.zst,.gz,}" + ontologies: &id001 + - edam: "http://edamontology.org/format_25722" + - edam: "http://edamontology.org/format_2573" + - edam: "http://edamontology.org/format_3462" + + - "*.tar${compress_type}": + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + pattern: "*.tar{.bz2,.xz,.lz,.lzma,.lzo,.zst,.gz,}" + ontologies: *id001 + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@jfy133" +maintainers: + - "@jfy133" diff --git a/modules/nf-core/tar/tests/main.nf.test b/modules/nf-core/tar/tests/main.nf.test new file mode 100644 index 00000000000..0c8dc4793b3 --- /dev/null +++ b/modules/nf-core/tar/tests/main.nf.test @@ -0,0 +1,210 @@ +// nf-core modules test tar +nextflow_process { + + name "Test Process TAR" + script "../main.nf" + process "TAR" + + tag "modules" + tag "modules_nfcore" + tag "tar" + tag "untar" + + setup { + run("UNTAR") { + script "../../untar/main.nf" + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/kraken2.tar.gz', checkIfExists: true) + ] + """ + } + } + } + + test("sarscov2 - genome - db - kraken2 - none") { + + when { + process { + """ + input[0] = UNTAR.out.untar + input[1] = '' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.archive[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - genome - db - kraken2 - .gz") { + + when { + process { + """ + input[0] = UNTAR.out.untar + input[1] = '.gz' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.archive[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - genome - db - kraken2 - .bz2") { + + when { + process { + """ + input[0] = UNTAR.out.untar + input[1] = '.bz2' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.archive[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - genome - db - kraken2 - .lzip") { + + when { + process { + """ + input[0] = UNTAR.out.untar + input[1] = '.lz' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.archive[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - genome - db - kraken2 - .lzma") { + + when { + process { + """ + input[0] = UNTAR.out.untar + input[1] = '.lzma' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.archive[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - genome - db - kraken2 - .lzo") { + + when { + process { + """ + input[0] = UNTAR.out.untar + input[1] = '.lzo' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.archive[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - genome - db - kraken2 - .zst") { + + when { + process { + """ + input[0] = UNTAR.out.untar + input[1] = '.zst' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.archive[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - genome - db - kraken2 - stub") { + + options "-stub" + + when { + process { + """ + input[0] = UNTAR.out.untar + input[1] = '.gz' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/tar/tests/main.nf.test.snap b/modules/nf-core/tar/tests/main.nf.test.snap new file mode 100644 index 00000000000..9881e7ee714 --- /dev/null +++ b/modules/nf-core/tar/tests/main.nf.test.snap @@ -0,0 +1,126 @@ +{ + "sarscov2 - genome - db - kraken2 - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.tar.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + "versions.yml:md5,5244284efaeb7329ac2e3a8c72432461" + ], + "archive": [ + [ + { + "id": "test" + }, + "test.tar.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,5244284efaeb7329ac2e3a8c72432461" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-11T17:39:29.878210779" + }, + "sarscov2 - genome - db - kraken2 - .bz2": { + "content": [ + "test.tar.bz2", + [ + "versions.yml:md5,5244284efaeb7329ac2e3a8c72432461" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-11T19:04:03.312488057" + }, + "sarscov2 - genome - db - kraken2 - .zst": { + "content": [ + "test.tar.zst", + [ + "versions.yml:md5,5244284efaeb7329ac2e3a8c72432461" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-11T19:04:42.327014114" + }, + "sarscov2 - genome - db - kraken2 - .lzip": { + "content": [ + "test.tar.lz", + [ + "versions.yml:md5,5244284efaeb7329ac2e3a8c72432461" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-11T19:04:12.94431061" + }, + "sarscov2 - genome - db - kraken2 - .lzo": { + "content": [ + "test.tar.lzo", + [ + "versions.yml:md5,5244284efaeb7329ac2e3a8c72432461" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-11T19:04:32.489438637" + }, + "sarscov2 - genome - db - kraken2 - .lzma": { + "content": [ + "test.tar.lzma", + [ + "versions.yml:md5,5244284efaeb7329ac2e3a8c72432461" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-11T19:04:22.523407809" + }, + "sarscov2 - genome - db - kraken2 - .gz": { + "content": [ + "test.tar.gz", + [ + "versions.yml:md5,5244284efaeb7329ac2e3a8c72432461" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-11T19:03:54.163215135" + }, + "sarscov2 - genome - db - kraken2 - none": { + "content": [ + "test.tar", + [ + "versions.yml:md5,5244284efaeb7329ac2e3a8c72432461" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-11T18:56:50.875583591" + } +} \ No newline at end of file