Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New module: doubletdetection #6897

Merged
merged 17 commits into from
Nov 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -550,6 +550,8 @@ jobs:
path: modules/nf-core/deepvariant/rundeepvariant
- profile: conda
path: modules/nf-core/deepvariant/vcfstatsreport
- profile: conda
path: modules/nf-core/doubletdetection
- profile: conda
path: modules/nf-core/ensemblvep/vep
- profile: conda
Expand Down
48 changes: 48 additions & 0 deletions modules/nf-core/doubletdetection/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
process DOUBLETDETECTION {
tag "$meta.id"
label 'process_medium'

container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'oras://community.wave.seqera.io/library/anndata_louvain_pip_doubletdetection:42d2326cc250350b':
'community.wave.seqera.io/library/anndata_louvain_pip_doubletdetection:cbe92394c10372fa' }"

input:
tuple val(meta), path(h5ad)

output:
tuple val(meta), path("*.h5ad"), emit: h5ad
tuple val(meta), path("*.pkl") , emit: predictions
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
// Exit if running this module with -profile conda / -profile mamba
if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
error "DOUBLETDETECTION module does not support Conda. Please use Docker / Singularity / Podman instead."
}
prefix = task.ext.prefix ?: "${meta.id}"
template 'doubletdetection.py'

stub:
// Exit if running this module with -profile conda / -profile mamba
if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
error "DOUBLETDETECTION module does not support Conda. Please use Docker / Singularity / Podman instead."
}
prefix = task.ext.prefix ?: "${meta.id}"
"""
export MPLCONFIGDIR=./tmp
export NUMBA_CACHE_DIR=./tmp

touch ${prefix}.h5ad
touch ${prefix}.pkl

cat <<-END_VERSIONS > versions.yml
${task.process}:
python: \$(python3 -c 'import platform as pf; print(pf.python_version())')
anndata: \$(python3 -c 'import anndata as ad; print(ad.__version__)')
doubletdetection: \$(python3 -c 'import doubletdetection as dt; print(dt.__version__)')
END_VERSIONS
"""
}
58 changes: 58 additions & 0 deletions modules/nf-core/doubletdetection/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
name: "doubletdetection"
description: Doublet detection in single-cell RNA-seq data
keywords:
- single-cell
- doublets
- doublet_detection
tools:
- "doubletdetection":
description: "Doublet detection in single-cell RNA-seq data"
tool_dev_url: "https://github.com/JonathanShor/DoubletDetection"
doi: "10.5281/zenodo.6349517"
licence: ["MIT"]

input:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`

- h5ad:
type: file
description: H5AD anndata object
pattern: "*.h5ad"

output:
- h5ad:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- "*.h5ad":
type: file
description: H5AD anndata object
pattern: "*.h5ad"

- predictions:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- "*.pkl":
type: file
description: pandas dataframe containing the doublet classification
pattern: "*.pkl"

- versions:
- "versions.yml":
type: file
description: File containing software versions
pattern: "versions.yml"

authors:
- "@LeonHafner"
maintainers:
- "@LeonHafner"
58 changes: 58 additions & 0 deletions modules/nf-core/doubletdetection/templates/doubletdetection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#!/usr/bin/env python3

import os
import platform

os.environ["MPLCONFIGDIR"] = "./tmp"
os.environ["NUMBA_CACHE_DIR"] = "./tmp"

import anndata as ad
import doubletdetection


def format_yaml_like(data: dict, indent: int = 0) -> str:
"""Formats a dictionary to a YAML-like string.

Args:
data (dict): The dictionary to format.
indent (int): The current indentation level.

Returns:
str: A string formatted as YAML.
"""
yaml_str = ""
for key, value in data.items():
spaces = " " * indent
if isinstance(value, dict):
yaml_str += f"{spaces}{key}:\\n{format_yaml_like(value, indent + 1)}"
else:
yaml_str += f"{spaces}{key}: {value}\\n"
return yaml_str


adata = ad.read_h5ad("${h5ad}")

clf = doubletdetection.BoostClassifier()
doublets = clf.fit(adata.X).predict()
scores = clf.doublet_score()

adata.obs["doublet"] = [label == 1 for label in doublets]
adata.obs["doublet_score"] = scores

adata.write_h5ad("${prefix}.h5ad")

df = adata.obs[["doublet"]]
df.columns = ["${prefix}"]
df.to_pickle("${prefix}.pkl")

# Versions
versions = {
"${task.process}": {
"python": platform.python_version(),
"anndata": ad.__version__,
"doubletdetection": doubletdetection.__version__,
}
}

with open("versions.yml", "w") as f:
f.write(format_yaml_like(versions))
67 changes: 67 additions & 0 deletions modules/nf-core/doubletdetection/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
nextflow_process {

name "Test Process DOUBLETDETECTION"
script "../main.nf"
process "DOUBLETDETECTION"

tag "modules"
tag "modules_nfcore"
tag "doubletdetection"

test("scdownsteam - h5ad") {
when {
process {
"""
input[0] = [
[id: 'test'],
file("https://raw.githubusercontent.com/nf-core/test-datasets/scdownstream/samples/SAMN14430799_custom_emptydrops_filter_matrix.h5ad", checkIfExists: true)
]
"""
}
}

then {
def mb = 1024 * 1024
def kb = 1024
assertAll(
{ assert process.success },

// Only check if output exists, as phenotype supports no random seeding:
// https://github.com/jacoblevine/PhenoGraph/issues/16
{ assert path(process.out.h5ad.get(0).get(1)).exists() },
{ assert path(process.out.predictions.get(0).get(1)).exists() },

{ assert path(process.out.h5ad.get(0).get(1)).size() > 30 * mb },
{ assert path(process.out.predictions.get(0).get(1)).size() > 50 * kb },

{ assert snapshot(process.out.versions).match("versions") }
)
}

}

test("scdownstream - h5ad - stub") {

options "-stub"

when {
process {
"""
input[0] = [
[id: 'test'],
file("https://raw.githubusercontent.com/nf-core/test-datasets/scdownstream/samples/SAMN14430799_custom_emptydrops_filter_matrix.h5ad", checkIfExists: true)
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}

}
63 changes: 63 additions & 0 deletions modules/nf-core/doubletdetection/tests/main.nf.test.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
{
"scdownstream - h5ad - stub": {
"content": [
{
"0": [
[
{
"id": "test"
},
"test.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"1": [
[
{
"id": "test"
},
"test.pkl:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"2": [
"versions.yml:md5,b339d31cdc0422b203a26440591e1f12"
],
"h5ad": [
[
{
"id": "test"
},
"test.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"predictions": [
[
{
"id": "test"
},
"test.pkl:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"versions": [
"versions.yml:md5,b339d31cdc0422b203a26440591e1f12"
]
}
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "24.04.4"
},
"timestamp": "2024-11-01T09:38:38.099329542"
},
"versions": {
"content": [
[
"versions.yml:md5,b339d31cdc0422b203a26440591e1f12"
]
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "24.04.4"
},
"timestamp": "2024-11-01T09:38:20.985491914"
}
}
Loading