Skip to content

Commit

Permalink
Unit testing
Browse files Browse the repository at this point in the history
- Add: test/ dir, and initialized with the necessary files to test slurp / migrate
  • Loading branch information
joeflack4 committed May 28, 2023
1 parent 53d3aea commit 9d57219
Show file tree
Hide file tree
Showing 7 changed files with 84,323 additions and 4 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -78,3 +78,6 @@ src/scripts/.ipynb_checkpoints/*
src/ontology/.template.db
src/mappings/mondo-sources-all-lexical.sssom.tsv
src/scripts/mondo_unmapped.tsv

# Test
test/output/
10 changes: 10 additions & 0 deletions src/ontology/mondo-ingest.Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -496,6 +496,13 @@ update-jinja-sparql-queries:
python3 $(SCRIPTSDIR)/ordo_mapping_annotations/create_sparql__ordo_replace_annotation_based_mappings.py
python3 $(SCRIPTSDIR)/ordo_mapping_annotations/create_sparql__ordo_mapping_annotations_violation.py

#################
##### Tests #####
#################
.PHONY: tests
tests:
cd ../../; python -m unittest discover

#############################
########### Help ############
#############################
Expand All @@ -506,6 +513,9 @@ help:
@echo "----------------------------------------"
@echo " Command reference: mondo-ingest"
@echo "----------------------------------------"
# Tests
@echo "tests"
@echo "Run all project related (as oppposed to ODK) automated tests. To run ODK tests, use the 'test' goal.\n"
# Slurp / migrate
@echo "slurp/%.tsv and slurp-%"
@echo "For a given ontology, determine all slurpable / migratable terms. That is, terms that are candidates for integration into Mondo.\n"
Expand Down
82 changes: 78 additions & 4 deletions src/scripts/migrate.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,10 @@
- https://incatools.github.io/ontology-access-kit/intro/tutorial02.html
"""
import os
import sys
from argparse import ArgumentParser
from glob import glob
from pathlib import Path
from typing import Dict, List, Set

import pandas as pd
Expand All @@ -20,8 +22,11 @@
from oaklib.implementations import ProntoImplementation
from oaklib.types import CURIE, URI

from utils import CACHE_DIR, DOCS_DIR, PREFIX, PROJECT_DIR, Term, _get_all_owned_terms, _get_next_available_mondo_id, \
get_mondo_term_ids, _load_ontology, SLURP_DIR
SCRIPTS_DIR = Path(os.path.abspath(os.path.dirname(__file__)))
PROJECT_ROOT = SCRIPTS_DIR.parent.parent
sys.path.insert(0, str(PROJECT_ROOT))
from src.scripts.utils import CACHE_DIR, DOCS_DIR, PREFIX, PROJECT_DIR, Term, _get_all_owned_terms, \
_get_next_available_mondo_id, get_mondo_term_ids, _load_ontology, SLURP_DIR


FILENAME_GLOB_PATTERN = '*.tsv'
Expand Down Expand Up @@ -50,13 +55,47 @@
'mondo_id': 'ID', 'mondo_label': 'LABEL', 'xref': 'A oboInOwl:hasDbXref',
'xref_source': '>A oboInOwl:source SPLIT=|', 'original_label': '', 'definition': 'A IAO:0000115', 'parents': 'SC %'}

def _valid_parent_conditions(
# TODO: temp term param
def _valid_parent_conditions(t: Term,
parents: List[CURIE], mapped: Set[CURIE], excluded: Set[CURIE], obsolete: Set[CURIE]
) -> bool:
"""This is an optional, stricter check on slurp candidacy / order.
For a term to be immediately migratable, it must either (a) have no parents, or (b) all of its parents must be
mapped, obsolete, or excluded"""
# TODO: save curie and why it was a candidate
# - save which cases did not occur at the end
# if all([x in mapped for x in parents]):
# print(t.curie)
# elif all([x in excluded for x in parents]):
# print(t.curie)
# elif all([x in obsolete for x in parents]):
# print(t.curie)
# elif all([x not in mapped for x in parents]):
# print(t.curie)
# elif all([x not in excluded for x in parents]):
# print(t.curie)
# elif all([x not in obsolete for x in parents]):
# print(t.curie)
# # 1 but not all parent is mapped
# elif any([x in mapped for x in parents]) and not all([x in mapped for x in parents]):
# print(t.curie)
# # 1 but not all parent is excluded
# elif any([x in excluded for x in parents]) and not all([x in excluded for x in parents]):
# print(t.curie)
# # 1 but not all parent is obsolete
# elif any([x in obsolete for x in parents]) and not all([x in obsolete for x in parents]):
# print(t.curie)
# elif all([x in y for y in [mapped, excluded, obsolete] for x in parents]): # does this make sense
# at least 1 parent that is all of mapped, excluded, and obsolete
if any([(x in mapped) and (x in excluded) and (x in obsolete) for x in parents]):
print(t.curie)
# all parents are all of mapped, excluded, and obsolete
elif all([(x in mapped) and (x in excluded) and (x in obsolete) for x in parents]):
print(t.curie)
# no parents mapped, exlcuded, or obsolete
elif all([(x not in mapped) and (x not in excluded) and (x not in obsolete) for x in parents]):
print(t.curie)
return not parents or all([(x in mapped) or (x in excluded) or (x in obsolete) for x in parents])


Expand Down Expand Up @@ -115,14 +154,49 @@ def slurp(
# `reports/%_term_exclusions.txt`), and (iii) must not be deprecated / obsolete. Then, unless
# `parent_conditions_off`, will also (iv) `_check_parent_conditions()`.
terms_to_slurp: List[Dict[str, str]] = []
slurp_candidates = [t for t in slurp_candidates if _valid_parent_conditions(
# TODO: temp term param
slurp_candidates = [t for t in slurp_candidates if _valid_parent_conditions(t,
t.direct_owned_parent_curies, mapped, excluded, obsolete)] if not parent_conditions_off else slurp_candidates
for t in slurp_candidates:
if t.curie in slurp_id_map:
mondo_id = slurp_id_map[t.curie]
else:
next_mondo_id, mondo_term_ids = _get_next_available_mondo_id(next_mondo_id, max_id, mondo_term_ids)
mondo_id = 'MONDO:' + str(next_mondo_id).zfill(7) # leading 0-padding
qualified_parents = [p for p in t.direct_owned_parent_curies
if p in match_types and match_types[p] in ['skos:exactMatch', 'skos:narrowMatch']]
qualified_mondo_paents = [mondo_id_map[p] for p in qualified_parents if p in mondo_id_map]

# TODO: temp: obsolete mondo parents
mondo_parents = {}
for parent_curie in t.direct_owned_parent_curies:
parent: Dict = sssom_df[sssom_df['object_id'] == parent_curie].to_dict('records')[0]
mondo_parents[parent['subject_id']] = parent['subject_label']
osbsolete_status_mondo_parents = [x.startswith('obsolete') for x in list(mondo_parents.values())]
# some parents obsolete
if any(osbsolete_status_mondo_parents) and not all(osbsolete_status_mondo_parents):
print(t.curie)
# all parents obsolete
elif all(osbsolete_status_mondo_parents):
print(t.curie)
# no parents obsolete
elif not any(osbsolete_status_mondo_parents):
print(t.curie)

# TODO: temp: qualified parents
# all parents qualified
if len(qualified_parents) > 0 and len(qualified_mondo_paents) == len(t.direct_owned_parent_curies):
print(t.curie)
# but not all parents are qualified
elif len(qualified_parents) > 0 and len(qualified_mondo_paents) != len(t.direct_owned_parent_curies):
print(t.curie)
# has parents but none of them are qualified
elif len(qualified_parents) == 0 and len(t.direct_owned_parent_curies) > 0:
print(t.curie)
# no parents
elif not t.direct_owned_parent_curies:
print(t.curie)

mondo_label = t.label.lower() if t.label else ''
qualified_parents = [p for p in t.direct_owned_parent_curies
if p in match_types and match_types[p] in ['skos:exactMatch', 'skos:narrowMatch']]
Expand Down
1 change: 1 addition & 0 deletions test/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Unit tests"""
Loading

0 comments on commit 9d57219

Please sign in to comment.