Skip to content

Commit

Permalink
feat: build subgraph from term ID (#156)
Browse files Browse the repository at this point in the history
  • Loading branch information
nayib-jose-gloria authored Mar 29, 2024
1 parent cdb8faf commit 3f4cd4b
Show file tree
Hide file tree
Showing 3 changed files with 122 additions and 0 deletions.
43 changes: 43 additions & 0 deletions api/python/src/cellxgene_ontology_guide/entities.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from collections import Counter
from enum import Enum
from typing import Any, Dict, List


class Ontology(Enum):
Expand Down Expand Up @@ -28,3 +30,44 @@ class CuratedOntologyTermList(Enum):
SYSTEM = "system"
TISSUE_GENERAL = "tissue_general"
UBERON_DEVELOPMENT_STAGE = "uberon_development_stage"


class OntologyNode:
"""
Class to represent an ontology term and its subclasses
"""

def __init__(self, term_id: str):
self._term_id = term_id
self._children: List["OntologyNode"] = []
self._term_counter: Counter[str] = Counter({self.term_id: 1})

@property
def term_id(self) -> str:
"""
Ontology term ID represented by this OntologyNode.
"""
return self._term_id

@property
def children(self) -> List["OntologyNode"]:
"""
List of children OntologyNode of this OntologyNode.
"""
return self._children

@property
def term_counter(self) -> Counter[str]:
"""
Mapping of unique ontology term ID descendants of this OntologyNode to the number of times each term
appears in the graph rooted at this node.
:return:
"""
return self._term_counter

def add_child(self, child: "OntologyNode") -> None:
self._children.append(child)
self._term_counter.update(child.term_counter)

def to_dict(self) -> Dict[str, Any]:
return {self.term_id: [child.to_dict() for child in self.children]}
45 changes: 45 additions & 0 deletions api/python/src/cellxgene_ontology_guide/ontology_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from typing import Any, Dict, Iterable, List, Optional, Union

from cellxgene_ontology_guide._constants import VALID_NON_ONTOLOGY_TERMS
from cellxgene_ontology_guide.entities import OntologyNode
from cellxgene_ontology_guide.supported_versions import CXGSchema


Expand Down Expand Up @@ -333,6 +334,50 @@ def map_term_descendants(self, term_ids: Iterable[str], include_self: bool = Fal

return descendants_dict

def get_term_graph(self, term_id: str) -> OntologyNode:
"""
Get the DAG of OntologyNode relationships, with the input term as the root node. Only includes terms from the
same ontology as the root term ID.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> root_node = ontology_parser.get_term_graph("CL:0000000")
>>> root_node.term_id
"CL:0000000"
>>> root_node.to_dict()
{
"CL:0000000": [
{"CL:0000001": [
{"CL:0000004": [...]},
{"CL:0000005": [...]},
{"CL:0000006": [...]},
{"CL:0000007": [...]},
...
]},
{"CL:0000002": [
{"CL:0000004": [...]},
{"CL:0000005": [...]},
...
]},
{"CL:0000003": []},
...
]
}
>>> root_node.term_counter
{"CL:0000000": 1, "CL:0000001": 1, "CL:0000004": 2, ...}
:param term_id: str ontology term to build subtree for
:return: OntologyNode representation of graph with term_id as root.
"""
ontology_name = self._parse_ontology_name(term_id)
root = OntologyNode(term_id)
for candidate_descendant, candidate_metadata in self.cxg_schema.ontology(ontology_name).items():
for ancestor, distance in candidate_metadata["ancestors"].items():
if ancestor == term_id and distance == 1:
root.add_child(self.get_term_graph(candidate_descendant))
return root

def is_term_deprecated(self, term_id: str) -> bool:
"""
Check if an ontology term is deprecated.
Expand Down
34 changes: 34 additions & 0 deletions api/python/tests/test_ontology_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,3 +281,37 @@ def test_get_distance_between_terms(ontology_parser):

# disjoint distance
assert ontology_parser.get_distance_between_terms(term_id_1="CL:0000001", term_id_2="CL:0000008") == -1


def test_get_term_graph(ontology_parser):
graph = ontology_parser.get_term_graph("CL:0000000")
assert graph.to_dict() == {
"CL:0000000": [
{
"CL:0000001": [
{"CL:0000004": []},
{"CL:0000005": []},
{"CL:0000006": []},
{"CL:0000007": []},
]
},
{
"CL:0000002": [
{"CL:0000004": []},
{"CL:0000005": []},
]
},
{"CL:0000003": []},
]
}

assert graph.term_counter == {
"CL:0000000": 1,
"CL:0000001": 1,
"CL:0000002": 1,
"CL:0000003": 1,
"CL:0000004": 2,
"CL:0000005": 2,
"CL:0000006": 1,
"CL:0000007": 1,
}

0 comments on commit 3f4cd4b

Please sign in to comment.