Skip to content

Commit

Permalink
Merge pull request #87 from databio/partial_processing
Browse files Browse the repository at this point in the history
Added partial processing
  • Loading branch information
khoroshevskyi authored Dec 19, 2024
2 parents 6f0d977 + a4f4cea commit a75b987
Show file tree
Hide file tree
Showing 17 changed files with 471 additions and 62 deletions.
2 changes: 1 addition & 1 deletion bedboss/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.5.1"
__version__ = "0.6.0"
8 changes: 8 additions & 0 deletions bedboss/bbuploader/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,9 @@ def upload_all(
reinit_skipper: bool = typer.Option(
False, help="Reinitialize skipper. [Default: False]"
),
lite: bool = typer.Option(
False, help="Run the pipeline in lite mode. [Default: False]"
),
):
from .main import upload_all as upload_all_function

Expand All @@ -83,6 +86,7 @@ def upload_all(
reinit_skipper=reinit_skipper,
overwrite=overwrite,
overwrite_bedset=overwrite_bedset,
lite=lite,
)


Expand Down Expand Up @@ -124,6 +128,9 @@ def upload_gse(
reinit_skipper: bool = typer.Option(
False, help="Reinitialize skipper. [Default: False]"
),
lite: bool = typer.Option(
False, help="Run the pipeline in lite mode. [Default: False]"
),
):
from .main import upload_gse as upload_gse_function

Expand All @@ -142,6 +149,7 @@ def upload_gse(
reinit_skipper=reinit_skipper,
overwrite=overwrite,
overwrite_bedset=overwrite_bedset,
lite=lite,
)


Expand Down
21 changes: 16 additions & 5 deletions bedboss/bbuploader/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from pephubclient import PEPHubClient
from pephubclient.helpers import MessageHandler
from pephubclient.models import SearchReturnModel
from setuptools.command.egg_info import overwrite_arg
from sqlalchemy import and_, select
from sqlalchemy.orm import Session

Expand All @@ -28,13 +27,14 @@
from bedboss.bedbuncher.bedbuncher import run_bedbuncher
from bedboss.exceptions import BedBossException
from bedboss.skipper import Skipper
from bedboss.utils import download_file, standardize_genome_name
from bedboss.utils import calculate_time, download_file, standardize_genome_name
from bedboss.utils import standardize_pep as pep_standardizer

_LOGGER = logging.getLogger(PKG_NAME)
_LOGGER.setLevel(logging.DEBUG)


@calculate_time
def upload_all(
bedbase_config: str,
outfolder: str = os.getcwd(),
Expand All @@ -54,6 +54,7 @@ def upload_all(
reinit_skipper=False,
overwrite=False,
overwrite_bedset=False,
lite=False,
):
"""
This is main function that is responsible for processing bed files from PEPHub.
Expand All @@ -75,12 +76,13 @@ def upload_all(
:param use_skipper: use skipper to skip already processed logged locally. Skipper creates local log of processed
and failed files.
:param reinit_skipper: reinitialize skipper, if set to True, skipper will be reinitialized and all logs files will be cleaned
:param lite: lite mode, where skipping statistic processing for memory optimization and time saving
"""

phc = PEPHubClient()
os.makedirs(outfolder, exist_ok=True)

bbagent = BedBaseAgent(config=bedbase_config)
bbagent = BedBaseAgent(config=bedbase_config, init_ml=not lite)
genome = standardize_genome_name(genome)

pep_annotation_list = find_peps(
Expand Down Expand Up @@ -154,6 +156,7 @@ def upload_all(
preload=preload,
overwrite=overwrite,
overwrite_bedset=overwrite_bedset,
lite=lite,
)
except Exception as err:
_LOGGER.error(
Expand Down Expand Up @@ -268,6 +271,7 @@ def find_peps(
)


@calculate_time
def upload_gse(
gse: str,
bedbase_config: Union[str, BedBaseAgent],
Expand All @@ -282,7 +286,8 @@ def upload_gse(
use_skipper=True,
reinit_skipper=False,
overwrite=False,
overwrite_bedset=False,
overwrite_bedset=True,
lite=False,
):
"""
Upload bed files from GEO series to BedBase
Expand All @@ -302,10 +307,11 @@ def upload_gse(
:param reinit_skipper: reinitialize skipper, if set to True, skipper will be reinitialized and all logs files will be cleaned
:param overwrite: overwrite existing bedfiles
:param overwrite_bedset: overwrite existing bedset
:param lite: lite mode, where skipping statistic processing for memory optimization and time saving
:return: None
"""
bbagent = BedBaseAgent(config=bedbase_config)
bbagent = BedBaseAgent(config=bedbase_config, init_ml=not lite)

with Session(bbagent.config.db_engine.engine) as session:
_LOGGER.info(f"Processing: '{gse}'")
Expand Down Expand Up @@ -352,6 +358,7 @@ def upload_gse(
overwrite_bedset=overwrite_bedset,
use_skipper=use_skipper,
reinit_skipper=reinit_skipper,
lite=lite,
)
except Exception as e:
_LOGGER.error(f"Processing of '{gse}' failed with error: {e}")
Expand Down Expand Up @@ -403,6 +410,7 @@ def _upload_gse(
use_skipper: bool = True,
reinit_skipper: bool = False,
preload: bool = True,
lite=False,
) -> ProjectProcessingStatus:
"""
Upload bed files from GEO series to BedBase
Expand All @@ -421,6 +429,7 @@ def _upload_gse(
and failed files.
:param reinit_skipper: reinitialize skipper, if set to True, skipper will be reinitialized and all logs will be
:param preload: pre - download files to the local folder (used for faster reproducibility)
:param lite: lite mode, where skipping statistic processing for memory optimization and time saving
:return: None
"""
if isinstance(bedbase_config, str):
Expand Down Expand Up @@ -540,6 +549,7 @@ def _upload_gse(
upload_s3=True,
upload_qdrant=True,
force_overwrite=overwrite,
lite=lite,
)
uploaded_files.append(file_digest)
if skipper_obj:
Expand Down Expand Up @@ -571,6 +581,7 @@ def _upload_gse(
upload_s3=True,
no_fail=True,
force_overwrite=overwrite_bedset,
lite=lite,
)

else:
Expand Down
Loading

0 comments on commit a75b987

Please sign in to comment.