Skip to content

Commit

Permalink
added reprocess functions for bed and bedsets and cli options
Browse files Browse the repository at this point in the history
  • Loading branch information
khoroshevskyi committed Dec 18, 2024
1 parent 60563bd commit 5ce2d6d
Show file tree
Hide file tree
Showing 7 changed files with 223 additions and 18 deletions.
2 changes: 1 addition & 1 deletion bedboss/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.5.1"
__version__ = "0.6.0"
5 changes: 3 additions & 2 deletions bedboss/bbuploader/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from pephubclient import PEPHubClient
from pephubclient.helpers import MessageHandler
from pephubclient.models import SearchReturnModel
from setuptools.command.egg_info import overwrite_arg
from sqlalchemy import and_, select
from sqlalchemy.orm import Session

Expand All @@ -28,13 +27,14 @@
from bedboss.bedbuncher.bedbuncher import run_bedbuncher
from bedboss.exceptions import BedBossException
from bedboss.skipper import Skipper
from bedboss.utils import download_file, standardize_genome_name
from bedboss.utils import calculate_time, download_file, standardize_genome_name
from bedboss.utils import standardize_pep as pep_standardizer

_LOGGER = logging.getLogger(PKG_NAME)
_LOGGER.setLevel(logging.DEBUG)


@calculate_time
def upload_all(
bedbase_config: str,
outfolder: str = os.getcwd(),
Expand Down Expand Up @@ -271,6 +271,7 @@ def find_peps(
)


@calculate_time
def upload_gse(
gse: str,
bedbase_config: Union[str, BedBaseAgent],
Expand Down
129 changes: 122 additions & 7 deletions bedboss/bedboss.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
import datetime
import logging
import os
import subprocess
from typing import Union

import bbconf
import yaml
import pephubclient
import peppy
import pypiper
import yaml
from bbconf.bbagent import BedBaseAgent
from bbconf.const import DEFAULT_LICENSE
from bbconf.models.base_models import FileModel
from eido import validate_project
import datetime
from geniml.bbclient import BBClient
from pephubclient.helpers import MessageHandler as m
from pephubclient.helpers import is_registry_path
from geniml.bbclient import BBClient

from bedboss._version import __version__
from bedboss.bedbuncher import run_bedbuncher
Expand All @@ -32,7 +32,7 @@
)
from bedboss.refgenome_validator.main import ReferenceValidator
from bedboss.skipper import Skipper
from bedboss.utils import get_genome_digest, standardize_genome_name, calculate_time
from bedboss.utils import calculate_time, get_genome_digest, standardize_genome_name
from bedboss.utils import standardize_pep as pep_standardizer

_LOGGER = logging.getLogger(PKG_NAME)
Expand All @@ -50,6 +50,7 @@ def requirements_check() -> None:
)


@calculate_time
def run_all(
input_file: str,
input_type: str,
Expand Down Expand Up @@ -264,6 +265,7 @@ def run_all(
return bed_metadata.bed_digest


@calculate_time
def insert_pep(
bedbase_config: str,
output_folder: str,
Expand All @@ -278,6 +280,7 @@ def insert_pep(
ensdb: str = None,
just_db_commit: bool = False,
force_overwrite: bool = False,
update: bool = False,
upload_s3: bool = False,
upload_pephub: bool = False,
upload_qdrant: bool = False,
Expand Down Expand Up @@ -306,6 +309,7 @@ def insert_pep(
:param str ensdb: a full path to the ensdb gtf file required for genomes not in GDdata
:param bool just_db_commit: whether save only to the database (Without saving locally )
:param bool force_overwrite: whether to overwrite the existing record
:param bool update: whether to update the record in the database. This option will overwrite the force_overwrite option. [Default: False]
:param bool upload_s3: whether to upload to s3
:param bool upload_pephub: whether to push bedfiles and metadata to pephub (default: False)
:param bool upload_qdrant: whether to execute qdrant indexing
Expand Down Expand Up @@ -378,6 +382,7 @@ def insert_pep(
ensdb=ensdb,
just_db_commit=just_db_commit,
force_overwrite=force_overwrite,
update=update,
upload_qdrant=upload_qdrant,
upload_s3=upload_s3,
upload_pephub=upload_pephub,
Expand Down Expand Up @@ -427,12 +432,12 @@ def insert_pep(


@calculate_time
def run_unprocessed_beds(
def reprocess_all(
bedbase_config: Union[str, BedBaseAgent],
output_folder: str,
limit: int = 10,
nofail: bool = False,
):
) -> None:
"""
Run bedboss pipeline for all unprocessed beds in the bedbase
Expand Down Expand Up @@ -504,7 +509,7 @@ def run_unprocessed_beds(
) as file:
yaml.dump(failed_samples, file)

from rich import print
m.print_warning(f"Logs with failed samples are saved in {output_folder}")

m.print_success(f"Processing completed successfully")

Expand All @@ -515,3 +520,113 @@ def run_unprocessed_beds(
success_files=unprocessed_beds.limit - len(failed_samples),
)
print(print_values)


@calculate_time
def reprocess_one(
bedbase_config: Union[str, BedBaseAgent],
output_folder: str,
identifier: str,
) -> None:
"""
Run bedboss pipeline for one bed in the bedbase [Reprocess]
:param bedbase_config: bedbase configuration file path
:param output_folder: output folder of the pipeline
:param identifier: bed identifier
:return: None
"""

if isinstance(bedbase_config, str):
bbagent = BedBaseAgent(config=bedbase_config)
elif isinstance(bedbase_config, bbconf.BedBaseAgent):
bbagent = bedbase_config
else:
raise BedBossException("Incorrect bedbase_config type. Exiting...")

bbclient = BBClient()

bed_annot = bbagent.bed.get(identifier)
bed_file = bbclient.load_bed(bed_annot.id)

run_all(
input_file=bed_file.path,
input_type="bed",
outfolder=output_folder,
genome=bed_annot.genome_alias,
bedbase_config=bbagent,
name=bed_annot.name,
license_id=bed_annot.license_id,
rfg_config=None,
check_qc=False,
validate_reference=True,
chrom_sizes=None,
open_signal_matrix=None,
ensdb=None,
other_metadata=None,
just_db_commit=False,
update=True,
upload_qdrant=True,
upload_s3=True,
upload_pephub=True,
light=False,
universe=False,
universe_method=None,
universe_bedset=None,
pm=None,
)

_LOGGER.info(f"Successfully processed {identifier}")


@calculate_time
def reprocess_bedset(
bedbase_config: Union[str, BedBaseAgent],
output_folder: str,
identifier: str,
no_fail: bool = True,
heavy: bool = False,
):
"""
Recalculate bedset from the bedbase
:param bedbase_config: bedbase configuration file path
:param output_folder: output folder of the pipeline
:param identifier: bedset identifier
:param no_fail: whether to raise an error if bedset was not added to the database
:param heavy: whether to use heavy processing. Calculate plots for bedset
:return: None
"""

if isinstance(bedbase_config, str):
bbagent = BedBaseAgent(config=bedbase_config)
elif isinstance(bedbase_config, bbconf.BedBaseAgent):
bbagent = bedbase_config
else:
raise BedBossException("Incorrect bedbase_config type. Exiting...")

bedset_annot = bbagent.bedset.get(identifier)

run_bedbuncher(
bedbase_config=bbagent,
record_id=bedset_annot.id,
bed_set=bedset_annot.bed_ids,
name=bedset_annot.name,
output_folder=output_folder,
description=bedset_annot.description,
heavy=heavy,
upload_pephub=False,
upload_s3=heavy,
no_fail=no_fail,
force_overwrite=True,
annotation={
**bedset_annot.model_dump(
exclude={
"bed_ids",
}
)
},
light=False,
)
79 changes: 79 additions & 0 deletions bedboss/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,10 @@ def run_all(
force_overwrite: bool = typer.Option(
False, help="Force overwrite the output files"
),
update: bool = typer.Option(
False,
help="Update the bedbase database with the new record if it exists. This overwrites 'force_overwrite' option",
),
light: bool = typer.Option(
False, help="Run the pipeline in light mode. [Default: False]"
),
Expand Down Expand Up @@ -135,6 +139,7 @@ def run_all(
light=light,
just_db_commit=just_db_commit,
force_overwrite=force_overwrite,
update=update,
upload_qdrant=upload_qdrant,
upload_s3=upload_s3,
upload_pephub=upload_pephub,
Expand Down Expand Up @@ -168,6 +173,10 @@ def run_pep(
force_overwrite: bool = typer.Option(
False, help="Force overwrite the output files"
),
update: bool = typer.Option(
False,
help="Update the bedbase database with the new record if it exists. This overwrites 'force_overwrite' option",
),
upload_qdrant: bool = typer.Option(True, help="Upload to Qdrant"),
upload_s3: bool = typer.Option(True, help="Upload to S3"),
upload_pephub: bool = typer.Option(True, help="Upload to PEPHub"),
Expand Down Expand Up @@ -200,6 +209,7 @@ def run_pep(
ensdb=ensdb,
just_db_commit=just_db_commit,
force_overwrite=force_overwrite,
update=update,
license_id=license_id,
upload_s3=upload_s3,
upload_pephub=upload_pephub,
Expand All @@ -218,6 +228,75 @@ def run_pep(
)


@app.command(help="Run unprocessed files, or reprocess them")
def reprocess_all(
bedbase_config: str = typer.Option(
...,
help="Path to the bedbase config file",
exists=True,
file_okay=True,
readable=True,
),
outfolder: str = typer.Option(..., help="Path to the output folder"),
limit: int = typer.Option(100, help="Limit the number of files to reprocess"),
no_fail: bool = typer.Option(True, help="Do not fail on error"),
):
from bedboss.bedboss import reprocess_all as reprocess_all_function

reprocess_all(
bedbase_config=bedbase_config,
output_folder=outfolder,
limit=limit,
no_fail=no_fail,
)


@app.command(help="Run unprocessed file, or reprocess it [Only 1 file]")
def reprocess_one(
bedbase_config: str = typer.Option(
...,
help="Path to the bedbase config file",
exists=True,
file_okay=True,
readable=True,
),
outfolder: str = typer.Option(..., help="Path to the output folder"),
identifier: str = typer.Option(..., help="Identifier of the bed file"),
):
from bedboss.bedboss import reprocess_one as reprocess_one_function

reprocess_one(
bedbase_config=bedbase_config,
output_folder=outfolder,
identifier=identifier,
)


@app.command(help="Reprocess a bedset")
def reprocess_bedset(
bedbase_config: str = typer.Option(
...,
help="Path to the bedbase config file",
exists=True,
file_okay=True,
readable=True,
),
outfolder: str = typer.Option(..., help="Path to the output folder"),
identifier: str = typer.Option(..., help="Bedset ID"),
no_fail: bool = typer.Option(True, help="Do not fail on error"),
heavy: bool = typer.Option(False, help="Run the heavy version of the pipeline"),
):
from bedboss.bedboss import reprocess_bedset as reprocess_bedset_function

reprocess_bedset_function(
bedbase_config=bedbase_config,
output_folder=outfolder,
identifier=identifier,
no_fail=no_fail,
heavy=heavy,
)


@app.command(help=f"Create a bed files form a [{', '.join(options_list)}] file")
def make_bed(
input_file: str = typer.Option(
Expand Down
5 changes: 2 additions & 3 deletions bedboss/utils.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
import glob
import logging
import os
import urllib.request
import time
import urllib.request
from functools import wraps

import peppy
import requests
from bedms import AttrStandardizer
from pephubclient.files_manager import FilesManager
from peppy.const import SAMPLE_RAW_DICT_KEY
from pypiper import PipelineManager
from functools import wraps

from bedboss.refgenome_validator.main import ReferenceValidator

Expand Down Expand Up @@ -51,7 +51,6 @@ def standardize_genome_name(input_genome: str, bedfile: str = None) -> str:
return input_genome


# %%
def download_file(url: str, path: str, no_fail: bool = False) -> None:
"""
Download file from the url to specific location
Expand Down
Loading

0 comments on commit 5ce2d6d

Please sign in to comment.