From d2328dc2ae5223c9dec1f9892071b001decb6de5 Mon Sep 17 00:00:00 2001 From: Joe Flack Date: Thu, 11 Jan 2024 20:53:56 -0500 Subject: [PATCH] Bugfixes: match-mondo-sources-all-lexical.py - Bugfix: AttributeError: 'tuple' object has no attribute 'pop': wrong datatype for metadata was being passed to lexical_index_to_sssom() - Bugfix: Several other bugs in mondo-ingest, and upgrading OAK/sssom-py/curies to fix other bugs related to prefix maps. - Update: mondo.sssom.config.yml: Commented out duplicate prefix 'oio' - Update: prefixes.csv: Removed duplicate prefix oio - Update: Python requirements: Upgraded curies for bugfix involving get_prefixes(include_synonyms) - Update: Not bugfixes, but did some codestyle fixes: (i) converted regular comments to docstring, (ii) added missing docstring, (iii) renamed built-in 'input' param --- python-requirements-apple-silicon.txt | 2 +- python-requirements.txt | 2 +- src/ontology/config/prefixes.csv | 2 +- src/ontology/metadata/mondo.sssom.config.yml | 2 +- .../match-mondo-sources-all-lexical.py | 40 ++++++++++--------- 5 files changed, 25 insertions(+), 23 deletions(-) diff --git a/python-requirements-apple-silicon.txt b/python-requirements-apple-silicon.txt index 7af503b9..53f694ff 100644 --- a/python-requirements-apple-silicon.txt +++ b/python-requirements-apple-silicon.txt @@ -15,7 +15,7 @@ charset-normalizer==3.3.2 class-resolver==0.4.2 click==8.1.7 colorama==0.4.6 -curies==0.7.4 +curies==0.7.6 Deprecated==1.2.14 deprecation==2.1.0 distlib==0.3.7 diff --git a/python-requirements.txt b/python-requirements.txt index 4731bf75..ca771da2 100644 --- a/python-requirements.txt +++ b/python-requirements.txt @@ -22,7 +22,7 @@ class-resolver==0.4.2 click==8.1.7 colorama==0.4.6 commonmark==0.9.1 -curies==0.6.4 +curies==0.7.6 decorator==5.1.1 Deprecated==1.2.13 deprecation==2.1.0 diff --git a/src/ontology/config/prefixes.csv b/src/ontology/config/prefixes.csv index f29f5f41..0efab057 100644 --- a/src/ontology/config/prefixes.csv +++ b/src/ontology/config/prefixes.csv @@ -3,7 +3,7 @@ rdf,http://www.w3.org/1999/02/22-rdf-syntax-ns# rdfs,http://www.w3.org/2000/01/rdf-schema# xsd,http://www.w3.org/2001/XMLSchema# owl,http://www.w3.org/2002/07/owl# -oio,http://www.geneontology.org/formats/oboInOwl# +oboInOwl,http://www.geneontology.org/formats/oboInOwl# dce,http://purl.org/dc/elements/1.1/ dct,http://purl.org/dc/terms/ foaf,http://xmlns.com/foaf/0.1/ diff --git a/src/ontology/metadata/mondo.sssom.config.yml b/src/ontology/metadata/mondo.sssom.config.yml index 6875e9cf..6fb52f35 100644 --- a/src/ontology/metadata/mondo.sssom.config.yml +++ b/src/ontology/metadata/mondo.sssom.config.yml @@ -34,7 +34,7 @@ curie_map: semapv: https://w3id.org/semapv/vocab/ rdf: http://www.w3.org/1999/02/22-rdf-syntax-ns# sssom: https://w3id.org/sssom/ - oio: http://www.geneontology.org/formats/oboInOwl# +# oio: http://www.geneontology.org/formats/oboInOwl# GTR: "http://purl.obolibrary.org/obo/mondo/mappings/unknown_prefix/GTR/" NCI: "http://purl.obolibrary.org/obo/mondo/mappings/unknown_prefix/NCI/" NIFSTD: "http://purl.obolibrary.org/obo/mondo/mappings/unknown_prefix/NIFSTD/" diff --git a/src/scripts/match-mondo-sources-all-lexical.py b/src/scripts/match-mondo-sources-all-lexical.py index 1da04646..137c5d36 100644 --- a/src/scripts/match-mondo-sources-all-lexical.py +++ b/src/scripts/match-mondo-sources-all-lexical.py @@ -1,17 +1,19 @@ -# Basic matching pipeline that takes in +"""Match mondo sources, all lexical +Basic matching pipeline that takes in -# Input: -# 1. MERGED_ONTOLOGY = tmp/merged.owl -# 2. SSSOM_CONFIG = metadata/mondo.sssom.config.yml -# 3. OUTPUT_SSSOM = mapping/mondo-sources-all-lexical.sssom.tsv +Input: +1. MERGED_ONTOLOGY = tmp/merged.owl +2. SSSOM_CONFIG = metadata/mondo.sssom.config.yml +3. OUTPUT_SSSOM = mapping/mondo-sources-all-lexical.sssom.tsv -# I would try some basic things first: - -# Use synonymiser -# Use oak.mapping() pipeline +I would try some basic things first: +Use synonymiser +Use oak.mapping() pipeline +""" import logging from pathlib import Path +from curies import Converter from oaklib.resource import OntologyResource from oaklib.implementations.sqldb.sql_implementation import SqlImplementation from oaklib.utilities.lexical.lexical_indexer import ( @@ -25,11 +27,11 @@ import yaml import pandas as pd -from sssom.constants import SUBJECT_ID, OBJECT_ID, PREDICATE_MODIFIER +from sssom.constants import SUBJECT_ID, OBJECT_ID from sssom.util import filter_prefixes, is_curie, is_iri from sssom.parsers import parse_sssom_table from sssom.writers import write_table -from sssom.io import get_metadata_and_prefix_map, filter_file +from sssom.io import filter_file from bioregistry import curie_from_iri SRC = Path(__file__).resolve().parents[1] @@ -49,6 +51,7 @@ ) +# todo: duplicated code fragment w/ lexmatch-sssom-compare: solution, move to a lexmatch_utils.py and import to both @click.group() @click.option("-v", "--verbose", count=True) @click.option("-q", "--quiet") @@ -83,11 +86,11 @@ def main(verbose: int, quiet: bool): ) @output_option def run(input: str, config: str, rules: str, rejects: str, output: str): - # Implemented `meta` param in `lexical_index_to_sssom` - - meta = get_metadata_and_prefix_map(config) + """Run the script""" + # Get metadata config with open(config, "r") as f: yml = yaml.safe_load(f) + converter = Converter.from_extended_prefix_map(yml.pop('extended_prefix_map', {})) # Get mondo.sssom.tsv mapping_msdf = parse_sssom_table(SSSOM_MAP_FILE) @@ -108,8 +111,6 @@ def run(input: str, config: str, rules: str, rejects: str, output: str): # .reset_index(drop=True) # ) - prefix_of_interest = yml["subject_prefixes"] - resource = OntologyResource(slug=f"sqlite:///{Path(input).absolute()}") oi = SqlImplementation(resource=resource) ruleset = load_mapping_rules(rules) @@ -118,9 +119,9 @@ def run(input: str, config: str, rules: str, rejects: str, output: str): save_lexical_index(lexical_index, OUT_INDEX_DB) if rules: - msdf = lexical_index_to_sssom(oi, lexical_index, ruleset=ruleset, meta=meta) + msdf = lexical_index_to_sssom(oi, lexical_index, ruleset=ruleset, prefix_map=converter) else: - msdf = lexical_index_to_sssom(oi, lexical_index, meta=meta) + msdf = lexical_index_to_sssom(oi, lexical_index, prefix_map=converter) # msdf.prefix_map = sssom_yaml['curie_map'] # msdf.metadata = sssom_yaml['global_metadata'] @@ -131,8 +132,9 @@ def run(input: str, config: str, rules: str, rejects: str, output: str): # msdf.df[OBJECT_ID] = msdf.df[OBJECT_ID].apply( # lambda x: iri_to_curie(x) if x.startswith("