Skip to content

Commit

Permalink
Bugfixes: match-mondo-sources-all-lexical.py
Browse files Browse the repository at this point in the history
- Bugfix: AttributeError: 'tuple' object has no attribute 'pop': wrong datatype for metadata was being passed to lexical_index_to_sssom()
- Bugfix: Several other bugs in mondo-ingest, and upgrading OAK/sssom-py/curies to fix other bugs related to prefix maps.
- Update: mondo.sssom.config.yml: Commented out duplicate prefix 'oio'
- Update: prefixes.csv: Removed duplicate prefix oio
- Update: Python requirements: Upgraded curies for bugfix involving get_prefixes(include_synonyms)
- Update: Not bugfixes, but did some codestyle fixes: (i) converted regular comments to docstring, (ii) added missing docstring, (iii) renamed built-in 'input' param
  • Loading branch information
joeflack4 committed Jan 20, 2024
1 parent 466b83c commit d2328dc
Show file tree
Hide file tree
Showing 5 changed files with 25 additions and 23 deletions.
2 changes: 1 addition & 1 deletion python-requirements-apple-silicon.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ charset-normalizer==3.3.2
class-resolver==0.4.2
click==8.1.7
colorama==0.4.6
curies==0.7.4
curies==0.7.6
Deprecated==1.2.14
deprecation==2.1.0
distlib==0.3.7
Expand Down
2 changes: 1 addition & 1 deletion python-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ class-resolver==0.4.2
click==8.1.7
colorama==0.4.6
commonmark==0.9.1
curies==0.6.4
curies==0.7.6
decorator==5.1.1
Deprecated==1.2.13
deprecation==2.1.0
Expand Down
2 changes: 1 addition & 1 deletion src/ontology/config/prefixes.csv
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ rdf,http://www.w3.org/1999/02/22-rdf-syntax-ns#
rdfs,http://www.w3.org/2000/01/rdf-schema#
xsd,http://www.w3.org/2001/XMLSchema#
owl,http://www.w3.org/2002/07/owl#
oio,http://www.geneontology.org/formats/oboInOwl#
oboInOwl,http://www.geneontology.org/formats/oboInOwl#
dce,http://purl.org/dc/elements/1.1/
dct,http://purl.org/dc/terms/
foaf,http://xmlns.com/foaf/0.1/
Expand Down
2 changes: 1 addition & 1 deletion src/ontology/metadata/mondo.sssom.config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ curie_map:
semapv: https://w3id.org/semapv/vocab/
rdf: http://www.w3.org/1999/02/22-rdf-syntax-ns#
sssom: https://w3id.org/sssom/
oio: http://www.geneontology.org/formats/oboInOwl#
# oio: http://www.geneontology.org/formats/oboInOwl#
GTR: "http://purl.obolibrary.org/obo/mondo/mappings/unknown_prefix/GTR/"
NCI: "http://purl.obolibrary.org/obo/mondo/mappings/unknown_prefix/NCI/"
NIFSTD: "http://purl.obolibrary.org/obo/mondo/mappings/unknown_prefix/NIFSTD/"
Expand Down
40 changes: 21 additions & 19 deletions src/scripts/match-mondo-sources-all-lexical.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,19 @@
# Basic matching pipeline that takes in
"""Match mondo sources, all lexical
Basic matching pipeline that takes in
# Input:
# 1. MERGED_ONTOLOGY = tmp/merged.owl
# 2. SSSOM_CONFIG = metadata/mondo.sssom.config.yml
# 3. OUTPUT_SSSOM = mapping/mondo-sources-all-lexical.sssom.tsv
Input:
1. MERGED_ONTOLOGY = tmp/merged.owl
2. SSSOM_CONFIG = metadata/mondo.sssom.config.yml
3. OUTPUT_SSSOM = mapping/mondo-sources-all-lexical.sssom.tsv
# I would try some basic things first:

# Use synonymiser
# Use oak.mapping() pipeline
I would try some basic things first:
Use synonymiser
Use oak.mapping() pipeline
"""
import logging
from pathlib import Path
from curies import Converter
from oaklib.resource import OntologyResource
from oaklib.implementations.sqldb.sql_implementation import SqlImplementation
from oaklib.utilities.lexical.lexical_indexer import (
Expand All @@ -25,11 +27,11 @@
import yaml
import pandas as pd

from sssom.constants import SUBJECT_ID, OBJECT_ID, PREDICATE_MODIFIER
from sssom.constants import SUBJECT_ID, OBJECT_ID
from sssom.util import filter_prefixes, is_curie, is_iri
from sssom.parsers import parse_sssom_table
from sssom.writers import write_table
from sssom.io import get_metadata_and_prefix_map, filter_file
from sssom.io import filter_file
from bioregistry import curie_from_iri

SRC = Path(__file__).resolve().parents[1]
Expand All @@ -49,6 +51,7 @@
)


# todo: duplicated code fragment w/ lexmatch-sssom-compare: solution, move to a lexmatch_utils.py and import to both
@click.group()
@click.option("-v", "--verbose", count=True)
@click.option("-q", "--quiet")
Expand Down Expand Up @@ -83,11 +86,11 @@ def main(verbose: int, quiet: bool):
)
@output_option
def run(input: str, config: str, rules: str, rejects: str, output: str):
# Implemented `meta` param in `lexical_index_to_sssom`

meta = get_metadata_and_prefix_map(config)
"""Run the script"""
# Get metadata config
with open(config, "r") as f:
yml = yaml.safe_load(f)
converter = Converter.from_extended_prefix_map(yml.pop('extended_prefix_map', {}))

# Get mondo.sssom.tsv
mapping_msdf = parse_sssom_table(SSSOM_MAP_FILE)
Expand All @@ -108,8 +111,6 @@ def run(input: str, config: str, rules: str, rejects: str, output: str):
# .reset_index(drop=True)
# )

prefix_of_interest = yml["subject_prefixes"]

resource = OntologyResource(slug=f"sqlite:///{Path(input).absolute()}")
oi = SqlImplementation(resource=resource)
ruleset = load_mapping_rules(rules)
Expand All @@ -118,9 +119,9 @@ def run(input: str, config: str, rules: str, rejects: str, output: str):
save_lexical_index(lexical_index, OUT_INDEX_DB)

if rules:
msdf = lexical_index_to_sssom(oi, lexical_index, ruleset=ruleset, meta=meta)
msdf = lexical_index_to_sssom(oi, lexical_index, ruleset=ruleset, prefix_map=converter)
else:
msdf = lexical_index_to_sssom(oi, lexical_index, meta=meta)
msdf = lexical_index_to_sssom(oi, lexical_index, prefix_map=converter)

# msdf.prefix_map = sssom_yaml['curie_map']
# msdf.metadata = sssom_yaml['global_metadata']
Expand All @@ -131,8 +132,9 @@ def run(input: str, config: str, rules: str, rejects: str, output: str):
# msdf.df[OBJECT_ID] = msdf.df[OBJECT_ID].apply(
# lambda x: iri_to_curie(x) if x.startswith("<http") else x
# )
prefixes_of_interest = yml["subject_prefixes"]
msdf.df = filter_prefixes(
df=msdf.df, filter_prefixes=prefix_of_interest, features=[SUBJECT_ID, OBJECT_ID]
df=msdf.df, filter_prefixes=prefixes_of_interest, features=[SUBJECT_ID, OBJECT_ID]
)
msdf.remove_mappings(mapping_msdf)

Expand Down

0 comments on commit d2328dc

Please sign in to comment.