Skip to content

Commit

Permalink
Unit testing
Browse files Browse the repository at this point in the history
- Add: test/ dir, and initialized with the necessary files to test slurp / migrate
- Update: Temp: Updated slurp migrate script with some temporary logic to create a modified mapping status file that will be used as a static test input.
  • Loading branch information
joeflack4 committed May 29, 2023
1 parent 3199059 commit def3532
Show file tree
Hide file tree
Showing 7 changed files with 84,404 additions and 2 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -78,3 +78,6 @@ src/scripts/.ipynb_checkpoints/*
src/ontology/.template.db
src/mappings/mondo-sources-all-lexical.sssom.tsv
src/scripts/mondo_unmapped.tsv

# Test
test/output/
10 changes: 10 additions & 0 deletions src/ontology/mondo-ingest.Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -496,6 +496,13 @@ update-jinja-sparql-queries:
python3 $(SCRIPTSDIR)/ordo_mapping_annotations/create_sparql__ordo_replace_annotation_based_mappings.py
python3 $(SCRIPTSDIR)/ordo_mapping_annotations/create_sparql__ordo_mapping_annotations_violation.py

#################
##### Tests #####
#################
.PHONY: tests
tests:
cd ../../; python -m unittest discover

#############################
########### Help ############
#############################
Expand All @@ -506,6 +513,9 @@ help:
@echo "----------------------------------------"
@echo " Command reference: mondo-ingest"
@echo "----------------------------------------"
# Tests
@echo "tests"
@echo "Run all project related (as oppposed to ODK) automated tests. To run ODK tests, use the 'test' goal.\n"
# Slurp / migrate
@echo "slurp/%.tsv and slurp-%"
@echo "For a given ontology, determine all slurpable / migratable terms. That is, terms that are candidates for integration into Mondo.\n"
Expand Down
132 changes: 130 additions & 2 deletions src/scripts/migrate.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,10 @@
- https://incatools.github.io/ontology-access-kit/intro/tutorial02.html
"""
import os
import sys
from argparse import ArgumentParser
from glob import glob
from pathlib import Path
from typing import Dict, List, Set

import pandas as pd
Expand All @@ -20,8 +22,11 @@
from oaklib.implementations import ProntoImplementation
from oaklib.types import CURIE, URI

from utils import CACHE_DIR, DOCS_DIR, PREFIX, PROJECT_DIR, Term, _get_all_owned_terms, _get_next_available_mondo_id, \
get_mondo_term_ids, _load_ontology, SLURP_DIR
SCRIPTS_DIR = Path(os.path.abspath(os.path.dirname(__file__)))
PROJECT_ROOT = SCRIPTS_DIR.parent.parent
sys.path.insert(0, str(PROJECT_ROOT))
from src.scripts.utils import CACHE_DIR, DOCS_DIR, PREFIX, PROJECT_DIR, Term, _get_all_owned_terms, \
_get_next_available_mondo_id, get_mondo_term_ids, _load_ontology, SLURP_DIR


FILENAME_GLOB_PATTERN = '*.tsv'
Expand Down Expand Up @@ -60,6 +65,50 @@ def _valid_parent_conditions(
return not parents or all([(x in mapped) or (x in excluded) or (x in obsolete) for x in parents])


# TODO: temp
def _parent_comment(t: Term,
parents: List[CURIE], mapped: Set[CURIE], excluded: Set[CURIE], obsolete: Set[CURIE]
) -> (bool, str):
"""This is an optional, stricter check on slurp candidacy / order.
For a term to be immediately migratable, it must either (a) have no parents, or (b) all of its parents must be
mapped, obsolete, or excluded"""
comment = ''
# - save which cases did not occur at the end
# if all([x in mapped for x in parents]):
# print(t.curie)
# elif all([x in excluded for x in parents]):
# print(t.curie)
# elif all([x in obsolete for x in parents]):
# print(t.curie)
# elif all([x not in mapped for x in parents]):
# print(t.curie)
# elif all([x not in excluded for x in parents]):
# print(t.curie)
# elif all([x not in obsolete for x in parents]):
# print(t.curie)
# # 1 but not all parent is mapped
# elif any([x in mapped for x in parents]) and not all([x in mapped for x in parents]):
# print(t.curie)
# # 1 but not all parent is excluded
# elif any([x in excluded for x in parents]) and not all([x in excluded for x in parents]):
# print(t.curie)
# # 1 but not all parent is obsolete
# elif any([x in obsolete for x in parents]) and not all([x in obsolete for x in parents]):
# print(t.curie)
# elif all([x in y for y in [mapped, excluded, obsolete] for x in parents]): # does this make sense
# all parents are all of mapped, excluded, and obsolete
if all([(x in mapped) and (x in excluded) and (x in obsolete) for x in parents]) and bool(parents):
comment = 'all parents are all of mapped, excluded, and obsolete'
# at least 1 parent that is all of mapped, excluded, and obsolete
elif any([(x in mapped) and (x in excluded) and (x in obsolete) for x in parents]) and bool(parents):
comment = 'at least 1 parent that is all of mapped, excluded, and obsolete'
# no parents mapped, exlcuded, or obsolete
elif all([(x not in mapped) and (x not in excluded) and (x not in obsolete) for x in parents]) and bool(parents):
comment = 'no parents mapped, exlcuded, or obsolete'
return not parents or all([(x in mapped) or (x in excluded) or (x in obsolete) for x in parents]), comment


def slurp(
ontology_path: str, onto_config_path: str, mondo_mappings_path: str, mapping_status_path: str,
mondo_terms_path: str, slurp_dir_path: str, outpath: str, max_id: int, min_id: int = 0,
Expand Down Expand Up @@ -111,6 +160,20 @@ def slurp(
# noinspection PyUnresolvedReferences
mondo_id_map[row.object_id] = row.subject_id

# TODO: temp map for output DF
mapping_status_map = {x['subject_id']: x for x in mapping_status_df.to_dict(orient='records')}
slurp_candidate_curies = [x.curie for x in slurp_candidates]
for t in owned_terms:
d = mapping_status_map[t.curie]
ok_term_conditions = t.curie in slurp_candidate_curies
ok_parent_conditions, parent_comment = _parent_comment(
t, t.direct_owned_parent_curies, mapped, excluded, obsolete)
d['ok_term_conditions'] = ok_term_conditions
d['ok_parent_conditions'] = ok_parent_conditions
d['expected_in_output'] = bool(bool(ok_parent_conditions) * ok_term_conditions)
d['comment_condition_parents'] = parent_comment
mapping_status_map[t.curie] = d

# Determine slurpable / migratable terms
# To be migratable, the term (i) must not already be mapped, (ii) must not be excluded (e.g. not in
# `reports/%_term_exclusions.txt`), and (iii) must not be deprecated / obsolete. Then, unless
Expand All @@ -119,6 +182,9 @@ def slurp(
slurp_candidates = [t for t in slurp_candidates if _valid_parent_conditions(
t.direct_owned_parent_curies, mapped, excluded, obsolete)] if not parent_conditions_off else slurp_candidates
for t in slurp_candidates:
# TODO: temp
d = mapping_status_map[t.curie]

if t.curie in slurp_id_map:
mondo_id = slurp_id_map[t.curie]
else:
Expand All @@ -128,12 +194,74 @@ def slurp(
qualified_parents = [p for p in t.direct_owned_parent_curies
if p in match_types and match_types[p] in ['skos:exactMatch', 'skos:narrowMatch']]
qualified_mondo_parents = [mondo_id_map[p] for p in qualified_parents if p in mondo_id_map]

# TODO: temp: obsolete mondo parents
mondo_parents = {}
for parent_curie in t.direct_owned_parent_curies:
try:
parent: Dict = sssom_df[sssom_df['object_id'] == parent_curie].to_dict('records')[0]
mondo_parents[parent['subject_id']] = parent['subject_label']
except IndexError: # no mondo parent; parent is not mapped
continue
osbsolete_status_mondo_parents = [x.startswith('obsolete') for x in list(mondo_parents.values())]
# some parents obsolete
if any(osbsolete_status_mondo_parents) and not all(osbsolete_status_mondo_parents):
d['comment_noncondition_obsolete_mondo_parents'] = 'some parents obsolete'
# all parents obsolete
elif all(osbsolete_status_mondo_parents):
d['comment_noncondition_obsolete_mondo_parents'] = 'all parents obsolete'
# no parents obsolete
elif not any(osbsolete_status_mondo_parents):
d['comment_noncondition_obsolete_mondo_parents'] = 'no parents obsolete'

# TODO: temp: qualified parents
# all parents qualified
if len(qualified_parents) > 0 and len(qualified_mondo_parents) == len(t.direct_owned_parent_curies):
d['comment_noncondition_qualified_parents'] = 'all parents qualified'
# has parents but not all are qualified
elif len(qualified_parents) > 0 and len(qualified_mondo_parents) != len(t.direct_owned_parent_curies):
d['comment_noncondition_qualified_parents'] = 'has parents but not all are qualified'
# has parents but none of them are qualified
elif len(qualified_parents) == 0 and len(t.direct_owned_parent_curies) > 0:
d['comment_noncondition_qualified_parents'] = 'has parents but none of them are qualified'
# no parents
elif not t.direct_owned_parent_curies:
d['comment_noncondition_qualified_parents'] = 'no parents'

terms_to_slurp.append({
'mondo_id': mondo_id, 'mondo_label': mondo_label, 'xref': t.curie, 'xref_source': 'MONDO:equivalentTo',
'original_label': t.label if t.label else '', 'definition': t.definition if t.definition else '',
# if not in match_types, this should mean term is excluded or obsolete
'parents': '|'.join(qualified_mondo_parents)})

# TODO: temp
mapping_status_map[t.curie] = d

# TODO: temp: for forcing a case in which no parents, but qualified otherwise
# - found C001 to be best candidate. really it's only excluded, but being thorough below
# no_parents = [t for t in owned_terms if not t.direct_owned_parent_curies]
d = mapping_status_map['Orphanet:C001']
d['is_excluded'], d['is_deprecated'], d['is_mapped'], d['ok_term_conditions'], d['expected_in_output'] = \
False, False, False, True, True
mapping_status_map['Orphanet:C001'] = d
mapping_df_new_ungrouped = pd.DataFrame(mapping_status_map.values()).fillna('')

# TODO: ideally make sure at least 1 thing in each group, then make a new TSV with 1 in each group
# - there woudl be around 27~ cases by combining all (qualified parents conditions) * (obsolete parents conditions)
# * (parent conditions).
non_grouping_cols = ['subject_id', 'subject_label', 'is_mapped', 'is_excluded', 'is_deprecated']
grouping_cols = [c for c in mapping_df_new_ungrouped.columns if c not in non_grouping_cols]
grouped = mapping_df_new_ungrouped.groupby(grouping_cols)
i = 0
dfs = []
for name, df in grouped:
i += 1
df['unique_combo_group'] = i
dfs.append(df)
mapping_df_new_grouped = pd.concat(dfs)
mapping_df_new_grouped.to_csv('~/Desktop/ordo_mapping_status_custom.tsv', sep='\t', index=False)
mapping_df_new_grouped.to_csv('~/Desktop/ordo_mapping_status_custom.csv', index=False)

# Sort, add robot row, save and return
result = pd.DataFrame(terms_to_slurp)
if len(result) > 0:
Expand Down
1 change: 1 addition & 0 deletions test/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Unit tests"""
Loading

0 comments on commit def3532

Please sign in to comment.