Unit testing

- Add: test/ dir, and initialized with the necessary files to test slurp / migrate - Update: Temp: Updated slurp migrate script with some temporary logic to create a modified mapping status file that will be used as a static test input.
monarch-initiative · May 29, 2023 · def3532 · def3532
1 parent 3199059
commit def3532
Show file tree

Hide file tree

Showing 7 changed files with 84,404 additions and 2 deletions.
diff --git a/.gitignore b/.gitignore
@@ -78,3 +78,6 @@ src/scripts/.ipynb_checkpoints/*
 src/ontology/.template.db
 src/mappings/mondo-sources-all-lexical.sssom.tsv
 src/scripts/mondo_unmapped.tsv
+
+# Test
+test/output/
diff --git a/src/ontology/mondo-ingest.Makefile b/src/ontology/mondo-ingest.Makefile
@@ -496,6 +496,13 @@ update-jinja-sparql-queries:
 	python3 $(SCRIPTSDIR)/ordo_mapping_annotations/create_sparql__ordo_replace_annotation_based_mappings.py
 	python3 $(SCRIPTSDIR)/ordo_mapping_annotations/create_sparql__ordo_mapping_annotations_violation.py
 
+#################
+##### Tests #####
+#################
+.PHONY: tests
+tests:
+	cd ../../; python -m unittest discover
+
 #############################
 ########### Help ############
 #############################
@@ -506,6 +513,9 @@ help:
 	@echo "----------------------------------------"
 	@echo "	Command reference: mondo-ingest"
 	@echo "----------------------------------------"
+	# Tests
+	@echo "tests"
+	@echo "Run all project related (as oppposed to ODK) automated tests. To run ODK tests, use the 'test' goal.\n"
 	# Slurp / migrate
 	@echo "slurp/%.tsv and slurp-%"
 	@echo "For a given ontology, determine all slurpable / migratable terms. That is, terms that are candidates for integration into Mondo.\n"

diff --git a/src/scripts/migrate.py b/src/scripts/migrate.py
@@ -10,8 +10,10 @@
 - https://incatools.github.io/ontology-access-kit/intro/tutorial02.html
 """
 import os
+import sys
 from argparse import ArgumentParser
 from glob import glob
+from pathlib import Path
 from typing import Dict, List, Set
 
 import pandas as pd
@@ -20,8 +22,11 @@
 from oaklib.implementations import ProntoImplementation
 from oaklib.types import CURIE, URI
 
-from utils import CACHE_DIR, DOCS_DIR, PREFIX, PROJECT_DIR, Term, _get_all_owned_terms, _get_next_available_mondo_id, \
-    get_mondo_term_ids, _load_ontology, SLURP_DIR
+SCRIPTS_DIR = Path(os.path.abspath(os.path.dirname(__file__)))
+PROJECT_ROOT = SCRIPTS_DIR.parent.parent
+sys.path.insert(0, str(PROJECT_ROOT))
+from src.scripts.utils import CACHE_DIR, DOCS_DIR, PREFIX, PROJECT_DIR, Term, _get_all_owned_terms, \
+    _get_next_available_mondo_id, get_mondo_term_ids, _load_ontology, SLURP_DIR
 
 
 FILENAME_GLOB_PATTERN = '*.tsv'
@@ -60,6 +65,50 @@ def _valid_parent_conditions(
     return not parents or all([(x in mapped) or (x in excluded) or (x in obsolete) for x in parents])
 
 
+# TODO: temp
+def _parent_comment(t: Term,
+    parents: List[CURIE], mapped: Set[CURIE], excluded: Set[CURIE], obsolete: Set[CURIE]
+) -> (bool, str):
+    """This is an optional, stricter check on slurp candidacy / order.
+
+    For a term to be immediately migratable, it must either (a) have no parents, or (b) all of its parents must be
+    mapped, obsolete, or excluded"""
+    comment = ''
+    # - save which cases did not occur at the end
+    # if all([x in mapped for x in parents]):
+    #     print(t.curie)
+    # elif all([x in excluded for x in parents]):
+    #     print(t.curie)
+    # elif all([x in obsolete for x in parents]):
+    #     print(t.curie)
+    # elif all([x not in mapped for x in parents]):
+    #     print(t.curie)
+    # elif all([x not in excluded for x in parents]):
+    #     print(t.curie)
+    # elif all([x not in obsolete for x in parents]):
+    #     print(t.curie)
+    # # 1 but not all parent is mapped
+    # elif any([x in mapped for x in parents]) and not all([x in mapped for x in parents]):
+    #     print(t.curie)
+    # # 1 but not all parent is excluded
+    # elif any([x in excluded for x in parents]) and not all([x in excluded for x in parents]):
+    #     print(t.curie)
+    # # 1 but not all parent is obsolete
+    # elif any([x in obsolete for x in parents]) and not all([x in obsolete for x in parents]):
+    #     print(t.curie)
+    # elif all([x in y for y in [mapped, excluded, obsolete] for x in parents]):  # does this make sense
+    # all parents are all of mapped, excluded, and obsolete
+    if all([(x in mapped) and (x in excluded) and (x in obsolete) for x in parents]) and bool(parents):
+        comment = 'all parents are all of mapped, excluded, and obsolete'
+    # at least 1 parent that is all of mapped, excluded, and obsolete
+    elif any([(x in mapped) and (x in excluded) and (x in obsolete) for x in parents]) and bool(parents):
+        comment = 'at least 1 parent that is all of mapped, excluded, and obsolete'
+    # no parents mapped, exlcuded, or obsolete
+    elif all([(x not in mapped) and (x not in excluded) and (x not in obsolete) for x in parents]) and bool(parents):
+        comment = 'no parents mapped, exlcuded, or obsolete'
+    return not parents or all([(x in mapped) or (x in excluded) or (x in obsolete) for x in parents]), comment
+
+
 def slurp(
     ontology_path: str, onto_config_path: str, mondo_mappings_path: str, mapping_status_path: str,
     mondo_terms_path: str, slurp_dir_path: str, outpath: str, max_id: int, min_id: int = 0,
@@ -111,6 +160,20 @@ def slurp(
         # noinspection PyUnresolvedReferences
         mondo_id_map[row.object_id] = row.subject_id
 
+    # TODO: temp map for output DF
+    mapping_status_map = {x['subject_id']: x for x in mapping_status_df.to_dict(orient='records')}
+    slurp_candidate_curies = [x.curie for x in slurp_candidates]
+    for t in owned_terms:
+        d = mapping_status_map[t.curie]
+        ok_term_conditions = t.curie in slurp_candidate_curies
+        ok_parent_conditions, parent_comment = _parent_comment(
+            t, t.direct_owned_parent_curies, mapped, excluded, obsolete)
+        d['ok_term_conditions'] = ok_term_conditions
+        d['ok_parent_conditions'] = ok_parent_conditions
+        d['expected_in_output'] = bool(bool(ok_parent_conditions) * ok_term_conditions)
+        d['comment_condition_parents'] = parent_comment
+        mapping_status_map[t.curie] = d
+
     # Determine slurpable / migratable terms
     # To be migratable, the term (i) must not already be mapped, (ii) must not be excluded (e.g. not in
     # `reports/%_term_exclusions.txt`), and (iii) must not be deprecated / obsolete. Then, unless
@@ -119,6 +182,9 @@ def slurp(
     slurp_candidates = [t for t in slurp_candidates if _valid_parent_conditions(
         t.direct_owned_parent_curies, mapped, excluded, obsolete)] if not parent_conditions_off else slurp_candidates
     for t in slurp_candidates:
+        # TODO: temp
+        d = mapping_status_map[t.curie]
+
         if t.curie in slurp_id_map:
             mondo_id = slurp_id_map[t.curie]
         else:
@@ -128,12 +194,74 @@ def slurp(
         qualified_parents = [p for p in t.direct_owned_parent_curies
                              if p in match_types and match_types[p] in ['skos:exactMatch', 'skos:narrowMatch']]
         qualified_mondo_parents = [mondo_id_map[p] for p in qualified_parents if p in mondo_id_map]
+
+        # TODO: temp: obsolete mondo parents
+        mondo_parents = {}
+        for parent_curie in t.direct_owned_parent_curies:
+            try:
+                parent: Dict = sssom_df[sssom_df['object_id'] == parent_curie].to_dict('records')[0]
+                mondo_parents[parent['subject_id']] = parent['subject_label']
+            except IndexError:  # no mondo parent; parent is not mapped
+                continue
+        osbsolete_status_mondo_parents = [x.startswith('obsolete') for x in list(mondo_parents.values())]
+        # some parents obsolete
+        if any(osbsolete_status_mondo_parents) and not all(osbsolete_status_mondo_parents):
+            d['comment_noncondition_obsolete_mondo_parents'] = 'some parents obsolete'
+        # all parents obsolete
+        elif all(osbsolete_status_mondo_parents):
+            d['comment_noncondition_obsolete_mondo_parents'] = 'all parents obsolete'
+        # no parents obsolete
+        elif not any(osbsolete_status_mondo_parents):
+            d['comment_noncondition_obsolete_mondo_parents'] = 'no parents obsolete'
+
+        # TODO: temp: qualified parents
+        # all parents qualified
+        if len(qualified_parents) > 0 and len(qualified_mondo_parents) == len(t.direct_owned_parent_curies):
+            d['comment_noncondition_qualified_parents'] = 'all parents qualified'
+        # has parents but not all are qualified
+        elif len(qualified_parents) > 0 and len(qualified_mondo_parents) != len(t.direct_owned_parent_curies):
+            d['comment_noncondition_qualified_parents'] = 'has parents but not all are qualified'
+        # has parents but none of them are qualified
+        elif len(qualified_parents) == 0 and len(t.direct_owned_parent_curies) > 0:
+            d['comment_noncondition_qualified_parents'] = 'has parents but none of them are qualified'
+        # no parents
+        elif not t.direct_owned_parent_curies:
+            d['comment_noncondition_qualified_parents'] = 'no parents'
+
         terms_to_slurp.append({
             'mondo_id': mondo_id, 'mondo_label': mondo_label, 'xref': t.curie, 'xref_source': 'MONDO:equivalentTo',
             'original_label': t.label if t.label else '', 'definition': t.definition if t.definition else '',
             # if not in match_types, this should mean term is excluded or obsolete
             'parents': '|'.join(qualified_mondo_parents)})
 
+        # TODO: temp
+        mapping_status_map[t.curie] = d
+
+    # TODO: temp: for forcing a case in which no parents, but qualified otherwise
+    #  - found C001 to be best candidate. really it's only excluded, but being thorough below
+    # no_parents = [t for t in owned_terms if not t.direct_owned_parent_curies]
+    d = mapping_status_map['Orphanet:C001']
+    d['is_excluded'], d['is_deprecated'], d['is_mapped'], d['ok_term_conditions'], d['expected_in_output'] = \
+        False, False, False, True, True
+    mapping_status_map['Orphanet:C001'] = d
+    mapping_df_new_ungrouped = pd.DataFrame(mapping_status_map.values()).fillna('')
+
+    # TODO: ideally make sure at least 1 thing in each group, then make a new TSV with 1 in each group
+    #  - there woudl be around 27~ cases by combining all (qualified parents conditions) * (obsolete parents conditions)
+    #  * (parent conditions).
+    non_grouping_cols = ['subject_id', 'subject_label',	'is_mapped', 'is_excluded', 'is_deprecated']
+    grouping_cols = [c for c in mapping_df_new_ungrouped.columns if c not in non_grouping_cols]
+    grouped = mapping_df_new_ungrouped.groupby(grouping_cols)
+    i = 0
+    dfs = []
+    for name, df in grouped:
+        i += 1
+        df['unique_combo_group'] = i
+        dfs.append(df)
+    mapping_df_new_grouped = pd.concat(dfs)
+    mapping_df_new_grouped.to_csv('~/Desktop/ordo_mapping_status_custom.tsv', sep='\t', index=False)
+    mapping_df_new_grouped.to_csv('~/Desktop/ordo_mapping_status_custom.csv', index=False)
+
     # Sort, add robot row, save and return
     result = pd.DataFrame(terms_to_slurp)
     if len(result) > 0:

diff --git a/test/__init__.py b/test/__init__.py
@@ -0,0 +1 @@
+"""Unit tests"""