Skip to content

Commit

Permalink
🎨 Merge conflict
Browse files Browse the repository at this point in the history
Signed-off-by: zethson <lukas.heumos@posteo.net>
  • Loading branch information
Zethson committed Jan 7, 2025
2 parents 2ce631e + 684bd38 commit 4a03222
Show file tree
Hide file tree
Showing 52 changed files with 398 additions and 153 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ repos:
docs/notes/
)
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.5.5
rev: v0.8.6
hooks:
- id: ruff
args: [--fix, --exit-non-zero-on-fix, --unsafe-fixes]
Expand Down
8 changes: 6 additions & 2 deletions docs/bio-registries.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -601,7 +601,9 @@
"metadata": {},
"outputs": [],
"source": [
"bt.Gene.validate([\"ENSG00000000419\", \"ENSMUSG00002076988\"], field=bt.Gene.ensembl_gene_id)"
"bt.Gene.validate(\n",
" [\"ENSG00000000419\", \"ENSMUSG00002076988\"], field=bt.Gene.ensembl_gene_id\n",
")"
]
},
{
Expand Down Expand Up @@ -721,7 +723,9 @@
"outputs": [],
"source": [
"# validate against the NCBI Taxonomy\n",
"bt.Organism.validate([\"iris setosa\", \"iris versicolor\", \"iris virginica\"], source=source)"
"bt.Organism.validate(\n",
" [\"iris setosa\", \"iris versicolor\", \"iris virginica\"], source=source\n",
")"
]
},
{
Expand Down
31 changes: 23 additions & 8 deletions docs/curate-any.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,24 @@
"import zarr\n",
"import numpy as np\n",
"\n",
"data = zarr.create((10,), dtype=[('value', 'f8'), (\"gene\", \"U15\"), ('disease', 'U16')], store='data.zarr')\n",
"data[\"gene\"] = [\"ENSG00000139618\", \"ENSG00000141510\", \"ENSG00000133703\", \"ENSG00000157764\", \"ENSG00000171862\", \"ENSG00000091831\", \"ENSG00000141736\", \"ENSG00000133056\", \"ENSG00000146648\", \"ENSG00000118523\"]\n",
"data[\"disease\"] = np.random.choice(['MONDO:0004975', 'MONDO:0004980'], 10)"
"data = zarr.create(\n",
" (10,),\n",
" dtype=[(\"value\", \"f8\"), (\"gene\", \"U15\"), (\"disease\", \"U16\")],\n",
" store=\"data.zarr\",\n",
")\n",
"data[\"gene\"] = [\n",
" \"ENSG00000139618\",\n",
" \"ENSG00000141510\",\n",
" \"ENSG00000133703\",\n",
" \"ENSG00000157764\",\n",
" \"ENSG00000171862\",\n",
" \"ENSG00000091831\",\n",
" \"ENSG00000141736\",\n",
" \"ENSG00000133056\",\n",
" \"ENSG00000146648\",\n",
" \"ENSG00000118523\",\n",
"]\n",
"data[\"disease\"] = np.random.default_rng().choice([\"MONDO:0004975\", \"MONDO:0004980\"], 10)"
]
},
{
Expand Down Expand Up @@ -164,9 +179,9 @@
"outputs": [],
"source": [
"projects = ln.ULabel.from_values(\n",
" [\"Project A\", \"Project B\"], \n",
" field=ln.ULabel.name, \n",
" create=True, # create non-existing labels rather than attempting to load them from the database\n",
" [\"Project A\", \"Project B\"],\n",
" field=ln.ULabel.name,\n",
" create=True, # create non-existing labels rather than attempting to load them from the database\n",
")\n",
"ln.save(projects)"
]
Expand Down Expand Up @@ -256,8 +271,8 @@
},
"outputs": [],
"source": [
"ln.Feature(name='project', dtype='cat[ULabel]').save()\n",
"ln.Feature(name='disease', dtype='cat[bionty.Disease]').save()\n",
"ln.Feature(name=\"project\", dtype=\"cat[ULabel]\").save()\n",
"ln.Feature(name=\"disease\", dtype=\"cat[bionty.Disease]\").save()\n",
"artifact.features.add_values({\"project\": projects, \"disease\": diseases})\n",
"artifact.features"
]
Expand Down
40 changes: 24 additions & 16 deletions docs/curate-df.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -129,11 +129,15 @@
"df = pd.DataFrame(\n",
" {\n",
" \"temperature\": [37.2, 36.3, 38.2],\n",
" \"cell_type\": [\"cerebral pyramidal neuron\", \"astrocytic glia\", \"oligodendrocyte\"],\n",
" \"cell_type\": [\n",
" \"cerebral pyramidal neuron\",\n",
" \"astrocytic glia\",\n",
" \"oligodendrocyte\",\n",
" ],\n",
" \"assay_ontology_id\": [\"EFO:0008913\", \"EFO:0008913\", \"EFO:0008913\"],\n",
" \"donor\": [\"D0001\", \"D0002\", \"D0003\"]\n",
" \"donor\": [\"D0001\", \"D0002\", \"D0003\"],\n",
" },\n",
" index = [\"obs1\", \"obs2\", \"obs3\"]\n",
" index=[\"obs1\", \"obs2\", \"obs3\"],\n",
")\n",
"df"
]
Expand Down Expand Up @@ -286,7 +290,9 @@
"outputs": [],
"source": [
"# fix the cell type\n",
"df.cell_type = df.cell_type.replace({\"cerebral pyramidal neuron\": cell_types.cerebral_cortex_pyramidal_neuron.name})"
"df.cell_type = df.cell_type.replace(\n",
" {\"cerebral pyramidal neuron\": cell_types.cerebral_cortex_pyramidal_neuron.name}\n",
")"
]
},
{
Expand Down Expand Up @@ -380,14 +386,14 @@
"\n",
"X = pd.DataFrame(\n",
" {\n",
" \"ENSG00000081059\": [1, 2, 3], \n",
" \"ENSG00000276977\": [4, 5, 6], \n",
" \"ENSG00000198851\": [7, 8, 9], \n",
" \"ENSG00000010610\": [10, 11, 12], \n",
" \"ENSG00000081059\": [1, 2, 3],\n",
" \"ENSG00000276977\": [4, 5, 6],\n",
" \"ENSG00000198851\": [7, 8, 9],\n",
" \"ENSG00000010610\": [10, 11, 12],\n",
" \"ENSG00000153563\": [13, 14, 15],\n",
" \"ENSGcorrupted\": [16, 17, 18]\n",
" }, \n",
" index=df.index # because we already curated the dataframe above, it will validate \n",
" \"ENSGcorrupted\": [16, 17, 18],\n",
" },\n",
" index=df.index, # because we already curated the dataframe above, it will validate\n",
")\n",
"adata = ad.AnnData(X=X, obs=df)\n",
"adata"
Expand All @@ -405,9 +411,9 @@
"outputs": [],
"source": [
"curate = ln.Curator.from_anndata(\n",
" adata, \n",
" adata,\n",
" var_index=bt.Gene.ensembl_gene_id, # validate var.index against Gene.ensembl_gene_id\n",
" categoricals=categoricals, \n",
" categoricals=categoricals,\n",
" organism=\"human\",\n",
")\n",
"curate.validate()"
Expand Down Expand Up @@ -450,7 +456,9 @@
"metadata": {},
"outputs": [],
"source": [
"adata_validated = adata[:, ~adata.var.index.isin(curate.non_validated[\"var_index\"])].copy()"
"adata_validated = adata[\n",
" :, ~adata.var.index.isin(curate.non_validated[\"var_index\"])\n",
"].copy()"
]
},
{
Expand All @@ -473,9 +481,9 @@
"outputs": [],
"source": [
"curate = ln.Curator.from_anndata(\n",
" adata_validated, \n",
" adata_validated,\n",
" var_index=bt.Gene.ensembl_gene_id, # validate var.index against Gene.ensembl_gene_id\n",
" categoricals=categoricals, \n",
" categoricals=categoricals,\n",
" organism=\"human\",\n",
")\n",
"curate.validate()"
Expand Down
42 changes: 30 additions & 12 deletions docs/curate-subclass.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -95,10 +95,22 @@
"source": [
"# create example DataFrame that has all mandatory columns but one ('patient_age') is wrongly named\n",
"data = {\n",
" 'disease': ['Alzheimer disease', 'diabetes mellitus', 'breast cancer', 'Hypertension', 'asthma'],\n",
" 'phenotype': ['Mental deterioration', 'Hyperglycemia', 'Tumor growth', 'Increased blood pressure', 'Airway inflammation'],\n",
" 'developmental_stage': ['Adult', 'Adult', 'Adult', 'Adult', 'Child'],\n",
" 'patient_age': [70, 55, 60, 65, 12],\n",
" \"disease\": [\n",
" \"Alzheimer disease\",\n",
" \"diabetes mellitus\",\n",
" \"breast cancer\",\n",
" \"Hypertension\",\n",
" \"asthma\",\n",
" ],\n",
" \"phenotype\": [\n",
" \"Mental deterioration\",\n",
" \"Hyperglycemia\",\n",
" \"Tumor growth\",\n",
" \"Increased blood pressure\",\n",
" \"Airway inflammation\",\n",
" ],\n",
" \"developmental_stage\": [\"Adult\", \"Adult\", \"Adult\", \"Adult\", \"Child\"],\n",
" \"patient_age\": [70, 55, 60, 65, 12],\n",
"}\n",
"df = pd.DataFrame(data)\n",
"df"
Expand Down Expand Up @@ -148,15 +160,21 @@
"phenotype_lo = bt.Phenotype.public().lookup()\n",
"developmental_stage_lo = bt.DevelopmentalStage.public().lookup()\n",
"\n",
"df[\"disease\"] = df[\"disease\"].replace({\"Hypertension\": disease_lo.hypertensive_disorder.name})\n",
"df[\"phenotype\"] = df[\"phenotype\"].replace({\n",
" \"Tumor growth\": phenotype_lo.neoplasm.name,\n",
" \"Airway inflammation\": phenotype_lo.bronchitis.name}\n",
"df[\"disease\"] = df[\"disease\"].replace(\n",
" {\"Hypertension\": disease_lo.hypertensive_disorder.name}\n",
")\n",
"df[\"phenotype\"] = df[\"phenotype\"].replace(\n",
" {\n",
" \"Tumor growth\": phenotype_lo.neoplasm.name,\n",
" \"Airway inflammation\": phenotype_lo.bronchitis.name,\n",
" }\n",
")\n",
"df[\"developmental_stage\"] = df[\"developmental_stage\"].replace(\n",
" {\n",
" \"Adult\": developmental_stage_lo.adolescent_stage.name,\n",
" \"Child\": developmental_stage_lo.child_stage.name,\n",
" }\n",
")\n",
"df[\"developmental_stage\"] = df[\"developmental_stage\"].replace({\n",
" \"Adult\": developmental_stage_lo.adolescent_stage.name,\n",
" \"Child\": developmental_stage_lo.child_stage.name\n",
"})\n",
"\n",
"ehrcurator.validate()"
]
Expand Down
4 changes: 2 additions & 2 deletions docs/ehrcurator.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import bionty as bt
import pandas as pd
from lamindb.core import DataFrameCurator, Record, logger
from lamindb.core.types import UPathStr, FieldAttr
from lamindb.core import DataFrameCurator, logger
from lamindb.core.types import UPathStr

__version__ = "0.1.0"

Expand Down
16 changes: 8 additions & 8 deletions docs/faq/key.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -565,7 +565,7 @@
"try:\n",
" artifact_key_3.save()\n",
" artifact_key_4.save()\n",
"except Exception as e:\n",
"except Exception:\n",
" print(\n",
" \"It is not possible to save artifacts to the same key. This results in an\"\n",
" \" Integrity Error!\"\n",
Expand Down Expand Up @@ -615,8 +615,8 @@
"\n",
"for root, _, artifacts in os.walk(\"complex_biological_project/raw\"):\n",
" for artifactname in artifacts:\n",
" file_path = os.path.join(root, artifactname)\n",
" key_path = file_path.removeprefix(\"complex_biological_project\")\n",
" file_path = Path(root) / artifactname\n",
" key_path = str(file_path).removeprefix(\"complex_biological_project\")\n",
" ln_artifact = ln.Artifact(file_path, key=key_path)\n",
" ln_artifact.save()"
]
Expand Down Expand Up @@ -653,7 +653,7 @@
"all_data_paths = []\n",
"for root, _, artifacts in os.walk(\"complex_biological_project/raw\"):\n",
" for artifactname in artifacts:\n",
" file_path = os.path.join(root, artifactname)\n",
" file_path = Path(root) / artifactname\n",
" all_data_paths.append(file_path)\n",
"\n",
"all_data_artifacts = []\n",
Expand Down Expand Up @@ -695,8 +695,8 @@
"source": [
"for root, _, artifacts in os.walk(\"complex_biological_project/raw\"):\n",
" for artifactname in artifacts:\n",
" file_path = os.path.join(root, artifactname)\n",
" key_path = file_path.removeprefix(\"complex_biological_project\")\n",
" file_path = Path(root) / artifactname\n",
" key_path = str(file_path).removeprefix(\"complex_biological_project\")\n",
" ln_artifact = ln.Artifact(file_path, key=key_path)\n",
" ln_artifact.save()\n",
"\n",
Expand Down Expand Up @@ -786,8 +786,8 @@
"source": [
"for root, _, artifacts in os.walk(\"complex_biological_project/preprocessed\"):\n",
" for artifactname in artifacts:\n",
" file_path = os.path.join(root, artifactname)\n",
" key_path = file_path.removeprefix(\"complex_biological_project\")\n",
" file_path = Path(root) / artifactname\n",
" key_path = str(file_path).removeprefix(\"complex_biological_project\")\n",
"\n",
" print(file_path)\n",
" print()\n",
Expand Down
24 changes: 20 additions & 4 deletions docs/registries.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,15 @@
"\n",
"# Ingest dataset2\n",
"adata2 = datasets.small_dataset2(format=\"anndata\")\n",
"curator = ln.Curator.from_anndata(adata2, var_index=bt.Gene.symbol, categoricals={\"cell_medium\": ln.ULabel.name, \"cell_type_by_model\": bt.CellType.name}, organism=\"human\")\n",
"curator = ln.Curator.from_anndata(\n",
" adata2,\n",
" var_index=bt.Gene.symbol,\n",
" categoricals={\n",
" \"cell_medium\": ln.ULabel.name,\n",
" \"cell_type_by_model\": bt.CellType.name,\n",
" },\n",
" organism=\"human\",\n",
")\n",
"artifact2 = curator.save_artifact(key=\"example_datasets/dataset2.h5ad\")\n",
"artifact2.features.add_values(adata2.uns)"
]
Expand Down Expand Up @@ -151,7 +159,15 @@
},
"outputs": [],
"source": [
"ln.Artifact.df(include=[\"created_by__name\", \"ulabels__name\", \"cell_types__name\", \"feature_sets__registry\", \"suffix\"])"
"ln.Artifact.df(\n",
" include=[\n",
" \"created_by__name\",\n",
" \"ulabels__name\",\n",
" \"cell_types__name\",\n",
" \"feature_sets__registry\",\n",
" \"suffix\",\n",
" ]\n",
")"
]
},
{
Expand Down Expand Up @@ -449,7 +465,7 @@
},
"outputs": [],
"source": [
"ln.Artifact.filter(created_by__handle__startswith=\"testuse\").df() "
"ln.Artifact.filter(created_by__handle__startswith=\"testuse\").df()"
]
},
{
Expand Down Expand Up @@ -477,7 +493,7 @@
"cd8a = bt.Gene.get(symbol=\"CD8A\")\n",
"# query for all feature sets that contain CD8A\n",
"feature_sets_with_cd8a = ln.FeatureSet.filter(genes=cd8a).all()\n",
"# get all artifacts \n",
"# get all artifacts\n",
"ln.Artifact.filter(feature_sets__in=feature_sets_with_cd8a).df()"
]
},
Expand Down
8 changes: 6 additions & 2 deletions docs/storage/prepare-transfer-local-to-cloud.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,14 @@
"metadata": {},
"outputs": [],
"source": [
"artifact = ln.Artifact.from_df(pd.DataFrame({'a': [1, 2, 3]}), description='test-transfer-to-cloud')\n",
"artifact = ln.Artifact.from_df(\n",
" pd.DataFrame({\"a\": [1, 2, 3]}), description=\"test-transfer-to-cloud\"\n",
")\n",
"artifact.save()\n",
"\n",
"features = bt.CellMarker.from_values([\"PD1\", \"CD21\"], field=bt.CellMarker.name, organism=\"human\")\n",
"features = bt.CellMarker.from_values(\n",
" [\"PD1\", \"CD21\"], field=bt.CellMarker.name, organism=\"human\"\n",
")\n",
"ln.save(features)\n",
"artifact.features.add_feature_set(ln.FeatureSet(features), slot=\"var\")\n",
"\n",
Expand Down
7 changes: 5 additions & 2 deletions docs/storage/transfer-local-to-cloud.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@
" features_sets.delete()\n",
" experiments.delete()\n",
"\n",
"artifacts = ln.Artifact.filter(description='test-transfer-to-cloud').all()\n",
"\n",
"artifacts = ln.Artifact.filter(description=\"test-transfer-to-cloud\").all()\n",
"for artifact in artifacts:\n",
" cleanup(artifact)"
]
Expand All @@ -48,7 +49,9 @@
"metadata": {},
"outputs": [],
"source": [
"artifact = ln.Artifact.using(\"testuser1/test-transfer-to-cloud\").get(description='test-transfer-to-cloud')\n",
"artifact = ln.Artifact.using(\"testuser1/test-transfer-to-cloud\").get(\n",
" description=\"test-transfer-to-cloud\"\n",
")\n",
"artifact.describe()"
]
},
Expand Down
Loading

0 comments on commit 4a03222

Please sign in to comment.