Skip to content

Commit

Permalink
Slurp / migrate
Browse files Browse the repository at this point in the history
- Bugfix: Robot template 'parents' column IDs were not Mondo IDs
- Add: tests

Unit testing
- Add: test/ dir, and initialized with the necessary files to test slurp / migrate
  • Loading branch information
joeflack4 committed May 27, 2023
1 parent af6a0ec commit 73b1d6d
Show file tree
Hide file tree
Showing 5 changed files with 84,246 additions and 3 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -78,3 +78,6 @@ src/scripts/.ipynb_checkpoints/*
src/ontology/.template.db
src/mappings/mondo-sources-all-lexical.sssom.tsv
src/scripts/mondo_unmapped.tsv

# Test
test/output/
16 changes: 13 additions & 3 deletions src/scripts/migrate.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,10 @@
- https://incatools.github.io/ontology-access-kit/intro/tutorial02.html
"""
import os
import sys
from argparse import ArgumentParser
from glob import glob
from pathlib import Path
from typing import Dict, List, Set

import pandas as pd
Expand All @@ -20,7 +22,10 @@
from oaklib.implementations import ProntoImplementation
from oaklib.types import CURIE, URI

from utils import CACHE_DIR, DOCS_DIR, PREFIX, PROJECT_DIR, Term, _get_all_owned_terms, _get_next_available_mondo_id, \
SCRIPTS_DIR = Path(os.path.abspath(os.path.dirname(__file__)))
PROJECT_ROOT = SCRIPTS_DIR.parent.parent
sys.path.insert(0, str(PROJECT_ROOT))
from src.scripts.utils import CACHE_DIR, DOCS_DIR, PREFIX, PROJECT_DIR, Term, _get_all_owned_terms, _get_next_available_mondo_id, \
get_mondo_term_ids, _load_ontology, SLURP_DIR


Expand Down Expand Up @@ -103,9 +108,12 @@ def slurp(
onto_config_path=onto_config_path, use_cache=use_cache)
slurp_candidates: List[Term] = [x for x in owned_terms if all([x.curie not in y for y in [excluded, mapped]])]
match_types: Dict = {}
mondo_id_map: Dict = {}
for row in sssom_df.itertuples():
# noinspection PyUnresolvedReferences
match_types[row.object_id] = row.predicate_id
# noinspection PyUnresolvedReferences
match_types[row.object_id] = row.subject_id

# Determine slurpable / migratable terms
# To be migratable, the term (i) must not already be mapped, (ii) must not be excluded (e.g. not in
Expand All @@ -120,13 +128,15 @@ def slurp(
else:
next_mondo_id, mondo_term_ids = _get_next_available_mondo_id(next_mondo_id, max_id, mondo_term_ids)
mondo_id = 'MONDO:' + str(next_mondo_id).zfill(7) # leading 0-padding
qualified_parents = [p for p in t.direct_owned_parent_curies
if p in match_types and match_types[p] in ['skos:exactMatch', 'skos:narrowMatch']]
qualified_mondo_paents = [mondo_id_map[p] for p in qualified_parents if p in mondo_id_map]
mondo_label = t.label.lower() if t.label else ''
terms_to_slurp.append({
'mondo_id': mondo_id, 'mondo_label': mondo_label, 'xref': t.curie, 'xref_source': 'MONDO:equivalentTo',
'original_label': t.label if t.label else '', 'definition': t.definition if t.definition else '',
# if not in match_types, this should mean term is excluded or obsolete
'parents': '|'.join([p for p in t.direct_owned_parent_curies if p in match_types
and match_types[p] in ['skos:exactMatch', 'skos:narrowMatch']])})
'parents': '|'.join(qualified_mondo_paents)})

# Sort, add robot row, save and return
result = pd.DataFrame(terms_to_slurp)
Expand Down
Loading

0 comments on commit 73b1d6d

Please sign in to comment.