nf-core · LeonHafner · Nov 4, 2024 · Oct 29, 2024 · Oct 30, 2024 · Oct 31, 2024
@@ -550,6 +550,8 @@ jobs:
             path: modules/nf-core/deepvariant/rundeepvariant
           - profile: conda
             path: modules/nf-core/deepvariant/vcfstatsreport
+          - profile: conda
+            path: modules/nf-core/doubletdetection
           - profile: conda
             path: modules/nf-core/ensemblvep/vep
           - profile: conda

diff --git a/modules/nf-core/doubletdetection/main.nf b/modules/nf-core/doubletdetection/main.nf
@@ -0,0 +1,48 @@
+process DOUBLETDETECTION {
+    tag "$meta.id"
+    label 'process_medium'
+
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'oras://community.wave.seqera.io/library/anndata_louvain_pip_doubletdetection:42d2326cc250350b':
+        'community.wave.seqera.io/library/anndata_louvain_pip_doubletdetection:cbe92394c10372fa' }"
+
+    input:
+    tuple val(meta), path(h5ad)
+
+    output:
+    tuple val(meta), path("*.h5ad"), emit: h5ad
+    tuple val(meta), path("*.pkl") , emit: predictions
+    path "versions.yml"            , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    // Exit if running this module with -profile conda / -profile mamba
+    if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
+        error "DOUBLETDETECTION module does not support Conda. Please use Docker / Singularity / Podman instead."
+    }
+    prefix = task.ext.prefix ?: "${meta.id}"
+    template 'doubletdetection.py'
+
+    stub:
+    // Exit if running this module with -profile conda / -profile mamba
+    if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
+        error "DOUBLETDETECTION module does not support Conda. Please use Docker / Singularity / Podman instead."
+    }
+    prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    export MPLCONFIGDIR=./tmp
+    export NUMBA_CACHE_DIR=./tmp
+
+    touch ${prefix}.h5ad
+    touch ${prefix}.pkl
+
+    cat <<-END_VERSIONS > versions.yml
+    ${task.process}:
+        python: \$(python3 -c 'import platform as pf; print(pf.python_version())')
+        anndata: \$(python3 -c 'import anndata as ad; print(ad.__version__)')
+        doubletdetection: \$(python3 -c 'import doubletdetection as dt; print(dt.__version__)')
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/doubletdetection/meta.yml b/modules/nf-core/doubletdetection/meta.yml
@@ -0,0 +1,58 @@
+name: "doubletdetection"
+description: Doublet detection in single-cell RNA-seq data
+keywords:
+  - single-cell
+  - doublets
+  - doublet_detection
+tools:
+  - "doubletdetection":
+      description: "Doublet detection in single-cell RNA-seq data"
+      tool_dev_url: "https://github.com/JonathanShor/DoubletDetection"
+      doi: "10.5281/zenodo.6349517"
+      licence: ["MIT"]
+
+input:
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing sample information
+          e.g. `[ id:'sample1', single_end:false ]`
+
+    - h5ad:
+        type: file
+        description: H5AD anndata object
+        pattern: "*.h5ad"
+
+output:
+  - h5ad:
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. `[ id:'sample1', single_end:false ]`
+      - "*.h5ad":
+          type: file
+          description: H5AD anndata object
+          pattern: "*.h5ad"
+
+  - predictions:
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. `[ id:'sample1', single_end:false ]`
+      - "*.pkl":
+          type: file
+          description: pandas dataframe containing the doublet classification
+          pattern: "*.pkl"
+
+  - versions:
+      - "versions.yml":
+          type: file
+          description: File containing software versions
+          pattern: "versions.yml"
+
+authors:
+  - "@LeonHafner"
+maintainers:
+  - "@LeonHafner"
diff --git a/modules/nf-core/doubletdetection/templates/doubletdetection.py b/modules/nf-core/doubletdetection/templates/doubletdetection.py
@@ -0,0 +1,58 @@
+#!/usr/bin/env python3
+
+import os
+import platform
+
+os.environ["MPLCONFIGDIR"] = "./tmp"
+os.environ["NUMBA_CACHE_DIR"] = "./tmp"
+
+import anndata as ad
+import doubletdetection
+
+
+def format_yaml_like(data: dict, indent: int = 0) -> str:
+    """Formats a dictionary to a YAML-like string.
+
+    Args:
+        data (dict): The dictionary to format.
+        indent (int): The current indentation level.
+
+    Returns:
+        str: A string formatted as YAML.
+    """
+    yaml_str = ""
+    for key, value in data.items():
+        spaces = "    " * indent
+        if isinstance(value, dict):
+            yaml_str += f"{spaces}{key}:\\n{format_yaml_like(value, indent + 1)}"
+        else:
+            yaml_str += f"{spaces}{key}: {value}\\n"
+    return yaml_str
+
+
+adata = ad.read_h5ad("${h5ad}")
+
+clf = doubletdetection.BoostClassifier()
+doublets = clf.fit(adata.X).predict()
+scores = clf.doublet_score()
+
+adata.obs["doublet"] = [label == 1 for label in doublets]
+adata.obs["doublet_score"] = scores
+
+adata.write_h5ad("${prefix}.h5ad")
+
+df = adata.obs[["doublet"]]
+df.columns = ["${prefix}"]
+df.to_pickle("${prefix}.pkl")
+
+# Versions
+versions = {
+    "${task.process}": {
+        "python": platform.python_version(),
+        "anndata": ad.__version__,
+        "doubletdetection": doubletdetection.__version__,
+    }
+}
+
+with open("versions.yml", "w") as f:
+    f.write(format_yaml_like(versions))
diff --git a/modules/nf-core/doubletdetection/tests/main.nf.test b/modules/nf-core/doubletdetection/tests/main.nf.test
@@ -0,0 +1,67 @@
+nextflow_process {
+
+    name "Test Process DOUBLETDETECTION"
+    script "../main.nf"
+    process "DOUBLETDETECTION"
+
+    tag "modules"
+    tag "modules_nfcore"
+    tag "doubletdetection"
+
+    test("scdownsteam - h5ad") {
+        when {
+            process {
+                """                
+                input[0] = [
+                     [id: 'test'],
+                     file("https://raw.githubusercontent.com/nf-core/test-datasets/scdownstream/samples/SAMN14430799_custom_emptydrops_filter_matrix.h5ad", checkIfExists: true)
+                ]
+                """
+            }
+        }
+
+        then {
+            def mb = 1024 * 1024 
+            def kb = 1024
+            assertAll(
+                { assert process.success },
+
+                // Only check if output exists, as phenotype supports no random seeding:
+                // https://github.com/jacoblevine/PhenoGraph/issues/16
+                { assert path(process.out.h5ad.get(0).get(1)).exists() },
+                { assert path(process.out.predictions.get(0).get(1)).exists() },
+
+                { assert path(process.out.h5ad.get(0).get(1)).size() > 30 * mb },
+                { assert path(process.out.predictions.get(0).get(1)).size() > 50 * kb },
+
+                { assert snapshot(process.out.versions).match("versions") }
+            )
+        }
+
+    }
+
+    test("scdownstream - h5ad - stub") {
+
+        options "-stub"
+
+        when {
+            process {
+                """
+                input[0] = [
+                     [id: 'test'],
+                     file("https://raw.githubusercontent.com/nf-core/test-datasets/scdownstream/samples/SAMN14430799_custom_emptydrops_filter_matrix.h5ad", checkIfExists: true)
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+
+    }
+
+}
diff --git a/modules/nf-core/doubletdetection/tests/main.nf.test.snap b/modules/nf-core/doubletdetection/tests/main.nf.test.snap
@@ -0,0 +1,63 @@
+{
+    "scdownstream - h5ad - stub": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "1": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.pkl:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "2": [
+                    "versions.yml:md5,b339d31cdc0422b203a26440591e1f12"
+                ],
+                "h5ad": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "predictions": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.pkl:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,b339d31cdc0422b203a26440591e1f12"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "24.04.4"
+        },
+        "timestamp": "2024-11-01T09:38:38.099329542"
+    },
+    "versions": {
+        "content": [
+            [
+                "versions.yml:md5,b339d31cdc0422b203a26440591e1f12"
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "24.04.4"
+        },
+        "timestamp": "2024-11-01T09:38:20.985491914"
+    }
+}