From 57a8cf843b864b21e83737c4f1273af1ff07bef0 Mon Sep 17 00:00:00 2001 From: Richard Bruskiewich Date: Thu, 11 Aug 2022 20:43:40 -0700 Subject: [PATCH] Diverse Validation enhancements (#31) * check for missing TRAPI Attribute 'attribute_type_id' and 'value' fields. * validating the attribute_type_id be a CURIE, is an association_slot or, have a prefix known to Biolink * check for deprecated, abstract and mixin classes * DRY refactoring of category validation to reduce code duplication in input edge validation --- app/main.py | 6 +- reasoner_validator/biolink/__init__.py | 228 ++++++++++++------- requirements-service.txt | 3 +- requirements.txt | 3 +- tests/README.md | 5 +- tests/test_biolink_compliance_validation.py | 239 +++++++++++++++++--- 6 files changed, 362 insertions(+), 122 deletions(-) diff --git a/app/main.py b/app/main.py index b5fbdfb..23bfebc 100644 --- a/app/main.py +++ b/app/main.py @@ -47,7 +47,7 @@ async def validate(query: Query): results.append(error) # Verify that the response has a Query Graph - if not len(query.message['query_graph']): + if not query.message['query_graph']: # An empty Query Graph is Not considered an absolute error, but we issue a warning results.append(f"TRAPI Message Warning: empty TRAPI Message Query Graph?") else: @@ -61,7 +61,7 @@ async def validate(query: Query): results.extend(errors) # Verify that the response had a non-empty Knowledge Graph - if not len(query.message['knowledge_graph']) > 0: + if not query.message['knowledge_graph']: # An empty Knowledge Graph is Not considered an absolute error, but we issue a warning results.append(f"TRAPI Message Warning: empty TRAPI Message Knowledge Graph?") else: @@ -75,7 +75,7 @@ async def validate(query: Query): results.extend(errors) # Verify that the response had some Result - if not len(query.message['results']): + if not query.message['results']: # An empty Result is Not considered an absolute error, but we issue a warning results.append(f"TRAPI Message Warning: empty TRAPI Message Result?") diff --git a/reasoner_validator/biolink/__init__.py b/reasoner_validator/biolink/__init__.py index bb788c4..0e9dfb1 100644 --- a/reasoner_validator/biolink/__init__.py +++ b/reasoner_validator/biolink/__init__.py @@ -10,6 +10,7 @@ import logging from bmt import Toolkit +from linkml_runtime.linkml_model import ClassDefinition from reasoner_validator.util import SemVer, SemVerError @@ -125,6 +126,21 @@ def minimum_required_biolink_version(self, version: str) -> bool: logger.error(f"minimum_required_biolink_version() error: {str(sve)}") return False + @staticmethod + def is_curie(s: str) -> bool: + """ + Check if a given string is a CURIE. + + :param s: str, string to be validated as a CURIE + :return: bool, whether or not the given string is a CURIE + """ + # Method copied from kgx.prefix_manager.PrefixManager... + if isinstance(s, str): + m = re.match(r"^[^ <()>:]*:[^/ :]+$", s) + return bool(m) + else: + return False + def get_result(self) -> Tuple[str, Optional[List[str]]]: """ Get result of validation. @@ -133,42 +149,7 @@ def get_result(self) -> Tuple[str, Optional[List[str]]]: """ return self.bmtk.get_model_version(), list(self.errors) - def validate_category(self, node_id: str, category: str) -> Optional[str]: - """ - Validate the category of node. - - :param node_id: identifier of a concept node - :type node_id: str - :param category: of the node - :type category: str - :return: category name associated wth the category of the node - :rtype: Optional[str] - """ - if self.bmtk.is_category(category): - return self.bmtk.get_element(category).name - elif self.bmtk.is_mixin(category): - # finding mixins in the categories is OK, but we otherwise ignore them in validation - logger.info(f"\nReported Biolink Model category '{category}' resolves to a Biolink Model 'mixin'?") - else: - element = self.bmtk.get_element(category) - if element: - # got something here... hopefully just an abstract class - # but not a regular category, so we also ignore it! - # TODO: how do we better detect abstract classes from the model? - # How strict should our validation be here? - logger.info( - f"\nReported Biolink Model category '{category}' " + - "resolves to the (possibly abstract) " + - f"Biolink Model element '{element.name}'?") - else: - # Error: a truly unrecognized category? - self.report_error( - f"'{category}' for node '{node_id}' " + - "is not a recognized Biolink Model category?" - ) - return None - - def validate_node(self, node_id, slots: Dict[str, Any]): + def validate_graph_node(self, node_id, slots: Dict[str, Any]): """ Validate slot properties (mainly 'categories') of a node. @@ -190,10 +171,11 @@ def validate_node(self, node_id, slots: Dict[str, Any]): categories = slots["categories"] node_prefix_mapped: bool = False for category in categories: - category_name: str = self.validate_category(node_id, category) - if category_name: + category: Optional[ClassDefinition] = \ + self.validate_category(context="Node", category=category, strict_validation=False) + if category: possible_subject_categories = self.bmtk.get_element_by_prefix(node_id) - if category_name in possible_subject_categories: + if category.name in possible_subject_categories: node_prefix_mapped = True if not node_prefix_mapped: self.report_error( @@ -225,11 +207,12 @@ def validate_node(self, node_id, slots: Dict[str, Any]): id_prefix_mapped: Dict = {identifier: False for identifier in ids} for category in categories: # category validation may report an error internally - category_name = self.validate_category(node_id, category) - if category_name: + category: Optional[ClassDefinition] = \ + self.validate_category(context="Node", category=category, strict_validation=False) + if category: for identifier in ids: # may be empty list if not provided... possible_subject_categories = self.bmtk.get_element_by_prefix(identifier) - if category_name in possible_subject_categories: + if category.name in possible_subject_categories: id_prefix_mapped[identifier] = True unmapped_ids = [ identifier for identifier in id_prefix_mapped.keys() if not id_prefix_mapped[identifier] @@ -253,14 +236,14 @@ def validate_node(self, node_id, slots: Dict[str, Any]): def set_nodes(self, nodes: Set): self.nodes.update(nodes) - def validate_edge(self, edge: Dict): + def validate_graph_edge(self, edge: Dict): """ Validate slot properties of a relationship ('biolink:Association') edge. :param edge: dictionary of slot properties of the edge. :type edge: dict[str, str] """ - logger.debug(edge) + # logger.debug(edge) # edge data fields to be validated... subject_id = edge['subject'] if 'subject' in edge else None @@ -275,7 +258,7 @@ def validate_edge(self, edge: Dict): edge_label = str(predicates) object_id = edge['object'] if 'object' in edge else None - attributes = edge['attributes'] if 'attributes' in edge else None + attributes: List = edge['attributes'] if 'attributes' in edge else None edge_id = f"{str(subject_id)}--{edge_label}->{str(object_id)}" @@ -317,14 +300,110 @@ def validate_edge(self, edge: Dict): if self.graph_type is TrapiGraphType.Knowledge_Graph: if not attributes: - # TODO: not quite sure whether and how to fully validate the 'attributes' of an edge # For now, we simply assume that *all* edges must have *some* attributes # (at least, provenance related, but we don't explicitly test for them) self.report_error(f"Edge '{edge_id}' has missing or empty attributes?") + else: + # TODO: attempt some deeper attribute validation here + for attribute in attributes: + attribute_type_id: Optional[str] = attribute.get('attribute_type_id', None) + if not attribute_type_id: + self.report_error( + f"Edge '{edge_id}' attribute '{str(attribute)}' missing its 'attribute_type_id'?" + ) + continue + value: Optional[str] = attribute.get('value', None) + if not value: + self.report_error( + f"Edge '{edge_id}' attribute '{str(attribute)}' missing its 'value'?" + ) + continue + # + # TODO: not sure if this should only be a Pytest 'warning' rather than an Pytest 'error' + # + if not self.is_curie(attribute_type_id): + self.report_error( + f"Edge '{edge_id}' attribute_type_id '{str(attribute_type_id)}' is not a CURIE?" + ) + elif not self.bmtk.is_association_slot(attribute_type_id): + self.report_error( + f"Edge '{edge_id}' attribute_type_id '{str(attribute_type_id)}' " + + "not a biolink:association_slot?" + ) + # if not a Biolink association_slot, at least, check if it is known to Biolink + prefix = attribute_type_id.split(":", 1)[0] + if not self.bmtk.get_element_by_prefix(prefix): + self.report_error( + f"Edge '{edge_id}' attribute_type_id '{str(attribute_type_id)}' " + + f"has a CURIE prefix namespace unknown to Biolink?" + ) else: # TODO: do we need to validate Query Graph 'constraints' slot contents here? pass + def validate_category( + self, + context: str, + category: Optional[str], + strict_validation: bool = True + ) -> ClassDefinition: + """ + + :param context: str, label for context of concept whose category is being validated, i.e. 'Subject' or 'Object' + :param category: str, CURIE of putative concept 'category' + :param strict_validation: bool, True report mixin or abstract categories as errors; Ignore otherwise if False + :return: + """ + biolink_class: Optional[ClassDefinition] = None + if category: + biolink_class = self.bmtk.get_element(category) + if biolink_class: + if biolink_class.deprecated: + self.report_error( + f"{context} Biolink class '{category}' is deprecated: {biolink_class.deprecated}?" + ) + biolink_class = None + elif biolink_class.abstract: + if strict_validation: + self.report_error( + f"{context} Biolink class '{category}' is abstract, not a concrete category?" + ) + else: + logger.info(f"{context} Biolink class '{category}' is abstract. Ignored in this context.") + biolink_class = None + elif self.bmtk.is_mixin(category): + # A mixin cannot be instantiated so it should not be given as an input concept category + if strict_validation: + self.report_error( + f"{context} identifier '{category}' designates a mixin, not a concrete category?" + ) + else: + logger.info(f"{context} Biolink class '{category}' is a 'mixin'. Ignored in this context.") + biolink_class = None + elif not self.bmtk.is_category(category): + self.report_error(f"{context} identifier '{category}' is not a valid Biolink category?") + biolink_class = None + else: + self.report_error(f"{context} Biolink class '{category}' is unknown?") + else: + self.report_error(f"{context} category identifier is missing?") + + return biolink_class + + def validate_input_node(self, context: str, category: Optional[str], identifier: Optional[str]): + + biolink_class: Optional[ClassDefinition] = self.validate_category(f"Input {context}", category) + + if identifier: + if biolink_class: + possible_subject_categories = self.bmtk.get_element_by_prefix(identifier) + if biolink_class.name not in possible_subject_categories: + err_msg = f"Namespace prefix of input {context} identifier '{identifier}' is unmapped to '{category}'?" + self.report_error(err_msg) + # else, we will have already reported an error in validate_category() + else: + self.report_error(f"Input {context} identifier is missing?") + def check_biolink_model_compliance_of_input_edge(self, edge: Dict[str, str]) -> Tuple[str, Optional[List[str]]]: """ Validate a templated test input edge contents against the current BMT Biolink Model release. @@ -351,51 +430,25 @@ def check_biolink_model_compliance_of_input_edge(self, edge: Dict[str, str]) -> subject_curie = edge['subject'] if 'subject' in edge else None object_curie = edge['object'] if 'object' in edge else None - if subject_category_curie and self.bmtk.is_category(subject_category_curie): - subject_category_name = self.bmtk.get_element(subject_category_curie).name - else: - err_msg = f"'subject' category " - err_msg += f"'{subject_category_curie}' is unknown?" if subject_category_curie else "is missing?" - self.report_error(err_msg) - subject_category_name = None - - if object_category_curie and self.bmtk.is_category(object_category_curie): - object_category_name = self.bmtk.get_element(object_category_curie).name - else: - err_msg = f"'object' category " - err_msg += f"'{object_category_curie}' is unknown?" if object_category_curie else "is missing?" - self.report_error(err_msg) - object_category_name = None + self.validate_input_node( + context='subject', + category=subject_category_curie, + identifier=subject_curie + ) if not (predicate_curie and self.bmtk.is_predicate(predicate_curie)): - err_msg = f"predicate " + err_msg = f"Input predicate " err_msg += f"'{predicate_curie}' is unknown?" if predicate_curie else "is missing?" self.report_error(err_msg) elif self.minimum_required_biolink_version("2.2.0") and \ not self.bmtk.is_translator_canonical_predicate(predicate_curie): - self.report_error(f"predicate '{predicate_curie}' is non-canonical?") - - if subject_curie: - if subject_category_name: - possible_subject_categories = self.bmtk.get_element_by_prefix(subject_curie) - if subject_category_name not in possible_subject_categories: - err_msg = f"namespace prefix of 'subject' identifier '{subject_curie}' " +\ - f"is unmapped to '{subject_category_curie}'?" - self.report_error(err_msg) - else: - err_msg = "'subject' is missing?" - self.report_error(err_msg) + self.report_error(f"Input predicate '{predicate_curie}' is non-canonical?") - if object_curie: - if object_category_name: - possible_object_categories = self.bmtk.get_element_by_prefix(object_curie) - if object_category_name not in possible_object_categories: - err_msg = f"namespace prefix of 'object' identifier '{object_curie}' " +\ - f"is unmapped to '{object_category_curie}'?" - self.report_error(err_msg) - else: - err_msg = "'object' is missing?" - self.report_error(err_msg) + self.validate_input_node( + context='object', + category=object_category_curie, + identifier=object_curie + ) return self.get_result() @@ -409,6 +462,9 @@ def check_biolink_model_compliance(self, graph: Dict) -> Tuple[str, Optional[Lis :returns: 2-tuple of Biolink Model version (str) and List[str] (possibly empty) of error messages :rtype: Tuple[str, Optional[List[str]]] """ + if not graph: + self.report_error(f"Empty graph?") + # Access graph data fields to be validated nodes: Optional[Dict] if 'nodes' in graph and graph['nodes']: @@ -435,7 +491,7 @@ def check_biolink_model_compliance(self, graph: Dict) -> Tuple[str, Optional[Lis if nodes: for node_id, details in nodes.items(): - self.validate_node(node_id, details) + self.validate_graph_node(node_id, details) nodes_seen += 1 if nodes_seen >= _MAX_TEST_NODES: @@ -449,7 +505,7 @@ def check_biolink_model_compliance(self, graph: Dict) -> Tuple[str, Optional[Lis for edge in edges.values(): # print(f"{str(edge)}", flush=True) - self.validate_edge(edge) + self.validate_graph_edge(edge) edges_seen += 1 if edges_seen >= _MAX_TEST_EDGES: diff --git a/requirements-service.txt b/requirements-service.txt index a93bb25..9dee7b9 100644 --- a/requirements-service.txt +++ b/requirements-service.txt @@ -8,4 +8,5 @@ linkml-runtime>=1.3.1 linkml>=1.3.2 prefixcommons==0.1.11 tomli<2.0.0,>=0.2.6 -bmt==0.8.4 +# bmt>=0.8.10 +git+https://github.com/biolink/biolink-model-toolkit.git@v0.8.4-patch-release#egg=bmt diff --git a/requirements.txt b/requirements.txt index 3cc92af..389cde1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,4 +5,5 @@ linkml-runtime>=1.3.1 linkml>=1.3.2 prefixcommons==0.1.11 tomli<2.0.0,>=0.2.6 -bmt==0.8.4 +# bmt>=0.8.10 +git+https://github.com/biolink/biolink-model-toolkit.git@v0.8.4-patch-release#egg=bmt diff --git a/tests/README.md b/tests/README.md index 07ecb96..491e503 100644 --- a/tests/README.md +++ b/tests/README.md @@ -1,8 +1,11 @@ # Tests -- [test_biolink_compliance_validation.py](translator/biolink/test_biolink_compliance_validation.py) Version-specific Biolink Model semantic compliance test harness unit tests: +- [test_semver](translator/biolink/test_semver.py) testing SemVer code (mostly used by TRAPI) +- [test_biolink_compliance_validation](translator/biolink/test_biolink_compliance_validation.py) Version-specific Biolink Model semantic compliance test harness unit tests: - **test_set_default_biolink_versioned_global_environment:** testing default Biolink Model release - **test_set_specific_biolink_versioned_global_environment:** testing specific Biolink Model release + - **test_minimum_required_biolink_version:** testing minimal threshold Biolink Model release - **test_check_biolink_model_compliance_of_input_edge:** test of KP data template test input edges validation - **test_check_biolink_model_compliance_of_query_graph:** test of TRAPI output query graph validation - **test_check_biolink_model_compliance_of_knowledge_graph:** test of TRAPI output knowledge graphs validation +- [test_validator](translator/biolink/test_validator.py) tests for TRAPI validator diff --git a/tests/test_biolink_compliance_validation.py b/tests/test_biolink_compliance_validation.py index 9f8a87b..23e8675 100644 --- a/tests/test_biolink_compliance_validation.py +++ b/tests/test_biolink_compliance_validation.py @@ -83,7 +83,7 @@ def test_minimum_required_biolink_version(): 'subject': 'UBERON:0005453', 'object': 'UBERON:0035769' }, - f"{INPUT_EDGE_PREFIX}: 'subject' category is missing?" + f"{INPUT_EDGE_PREFIX}: Input subject category identifier is missing?" ), ( # Query 2 - Invalid subject category LATEST_BIOLINK_MODEL, @@ -94,7 +94,7 @@ def test_minimum_required_biolink_version(): 'subject': 'UBERON:0005453', 'object': 'UBERON:0035769' }, - f"{INPUT_EDGE_PREFIX}: 'subject' category 'biolink:NotACategory' is unknown?" + f"{INPUT_EDGE_PREFIX}: Input subject Biolink class 'biolink:NotACategory' is unknown?" ), ( # Query 3 - Missing object category LATEST_BIOLINK_MODEL, @@ -104,7 +104,7 @@ def test_minimum_required_biolink_version(): 'subject': 'UBERON:0005453', 'object': 'UBERON:0035769' }, - f"{INPUT_EDGE_PREFIX}: 'object' category is missing?" + f"{INPUT_EDGE_PREFIX}: Input object category identifier is missing?" ), ( # Query 4 - Invalid object category LATEST_BIOLINK_MODEL, @@ -115,7 +115,7 @@ def test_minimum_required_biolink_version(): 'subject': 'UBERON:0005453', 'object': 'UBERON:0035769' }, - f"{INPUT_EDGE_PREFIX}: 'object' category 'biolink:NotACategory' is unknown?" + f"{INPUT_EDGE_PREFIX}: Input object Biolink class 'biolink:NotACategory' is unknown?" ), ( # Query 5 - Missing predicate LATEST_BIOLINK_MODEL, @@ -125,7 +125,7 @@ def test_minimum_required_biolink_version(): 'subject': 'UBERON:0005453', 'object': 'UBERON:0035769' }, - f"{INPUT_EDGE_PREFIX}: predicate is missing?" + f"{INPUT_EDGE_PREFIX}: Input predicate is missing?" ), ( # Query 6 - Invalid predicate LATEST_BIOLINK_MODEL, @@ -136,7 +136,7 @@ def test_minimum_required_biolink_version(): 'subject': 'UBERON:0005453', 'object': 'UBERON:0035769' }, - f"{INPUT_EDGE_PREFIX}: predicate 'biolink:not_a_predicate' is unknown?" + f"{INPUT_EDGE_PREFIX}: Input predicate 'biolink:not_a_predicate' is unknown?" ), ( # Query 7 - Non-canonical directed predicate LATEST_BIOLINK_MODEL, @@ -147,7 +147,7 @@ def test_minimum_required_biolink_version(): 'subject': 'DRUGBANK:DB00331', 'object': 'MONDO:0005148' }, - f"{INPUT_EDGE_PREFIX}: predicate 'biolink:affected_by' is non-canonical?" + f"{INPUT_EDGE_PREFIX}: Input predicate 'biolink:affected_by' is non-canonical?" ), ( # Query 8 - Missing subject LATEST_BIOLINK_MODEL, # Biolink Model Version @@ -157,7 +157,7 @@ def test_minimum_required_biolink_version(): 'predicate': 'biolink:subclass_of', 'object': 'UBERON:0035769' }, - f"{INPUT_EDGE_PREFIX}: 'subject' is missing?" + f"{INPUT_EDGE_PREFIX}: Input subject identifier is missing?" ), ( # Query 9 - Unmappable subject namespace LATEST_BIOLINK_MODEL, @@ -168,7 +168,7 @@ def test_minimum_required_biolink_version(): 'subject': 'FOO:0005453', 'object': 'UBERON:0035769' }, - f"{INPUT_EDGE_PREFIX}: namespace prefix of 'subject' identifier 'FOO:0005453' " + + f"{INPUT_EDGE_PREFIX}: Namespace prefix of input subject identifier 'FOO:0005453' " + "is unmapped to 'biolink:AnatomicalEntity'?" ), ( # Query 10 - missing object @@ -179,7 +179,7 @@ def test_minimum_required_biolink_version(): 'predicate': 'biolink:subclass_of', 'subject': "UBERON:0005453" }, - f"{INPUT_EDGE_PREFIX}: 'object' is missing?" + f"{INPUT_EDGE_PREFIX}: Input object identifier is missing?" ), ( # Query 11 - Unmappable object namespace LATEST_BIOLINK_MODEL, @@ -190,7 +190,7 @@ def test_minimum_required_biolink_version(): 'subject': 'UBERON:0005453', 'object': 'BAR:0035769' }, - f"{INPUT_EDGE_PREFIX}: namespace prefix of 'object' identifier 'BAR:0035769' " + + f"{INPUT_EDGE_PREFIX}: Namespace prefix of input object identifier 'BAR:0035769' " + "is unmapped to 'biolink:AnatomicalEntity'?" ), ( # Query 12 - Valid other model @@ -203,6 +203,42 @@ def test_minimum_required_biolink_version(): 'object': 'UniProtKB:P23219' }, "" + ), + ( # Query 13 - Deprecated + LATEST_BIOLINK_MODEL, + { + 'subject_category': 'biolink:ChemicalSubstance', + 'object_category': 'biolink:Protein', + 'predicate': 'biolink:entity_negatively_regulates_entity', + 'subject': 'DRUGBANK:DB00945', + 'object': 'UniProtKB:P23219' + }, + f"{INPUT_EDGE_PREFIX}: Input subject Biolink class 'biolink:ChemicalSubstance' is deprecated: " + "This class is deprecated in favor of 'small molecule.'?" + ), + ( # Query 14 - input category cannot be a mixin + LATEST_BIOLINK_MODEL, + { + 'subject_category': 'biolink:GeneOrGeneProduct', + 'object_category': 'biolink:Protein', + 'predicate': 'biolink:related_to', + 'subject': 'HGNC:9604', + 'object': 'UniProtKB:P23219' + }, + f"{INPUT_EDGE_PREFIX}: Input subject identifier 'biolink:GeneOrGeneProduct' " + + f"designates a mixin, not a concrete category?" + ), + ( # Query 15 - input category should not be abstract? + LATEST_BIOLINK_MODEL, + { + 'subject_category': 'biolink:AdministrativeEntity', + 'object_category': 'biolink:Agent', + 'predicate': 'biolink:related_to', + 'subject': 'isbn:1234', + 'object': 'ORCID:1234' + }, + f"{INPUT_EDGE_PREFIX}: Input subject Biolink class " + \ + "'biolink:AdministrativeEntity' is abstract, not a concrete category?" ) ] ) @@ -239,7 +275,8 @@ def test_check_biolink_model_compliance_of_input_edge(query: Tuple): LATEST_BIOLINK_MODEL, # Query 1: Empty query graph {}, - "" # Query Graphs can have empty 'nodes' + # Query Graphs can have empty 'nodes', so we should just issue a warning + f"{QUERY_GRAPH_PREFIX}: Empty graph?" ), ( LATEST_BIOLINK_MODEL, @@ -337,8 +374,7 @@ def test_check_biolink_model_compliance_of_input_edge(query: Tuple): }, "edges": {} }, - f"{QUERY_GRAPH_PREFIX}: 'biolink:InvalidCategory' for node 'NCBIGene:29974' " + - "is not a recognized Biolink Model category?" + f"{QUERY_GRAPH_PREFIX}: Node Biolink class 'biolink:InvalidCategory' is unknown?" ), ( LATEST_BIOLINK_MODEL, @@ -686,7 +722,7 @@ def test_check_biolink_model_compliance_of_query_graph(query: Tuple): "subject": "NCBIGene:29974", "predicate": "biolink:interacts_with", "object": "NCBIGene:29974", - "attributes": [{"attribute_type_id": "fake-attribute-id"}] + "attributes": [{"attribute_type_id": "biolink:knowledge_source"}] } } }, @@ -706,7 +742,7 @@ def test_check_biolink_model_compliance_of_query_graph(query: Tuple): "subject": "NCBIGene:29974", "predicate": "biolink:interacts_with", "object": "NCBIGene:29974", - "attributes": [{"attribute_type_id": "fake-attribute-id"}] + "attributes": [{"attribute_type_id": "biolink:knowledge_source"}] } } }, @@ -728,12 +764,11 @@ def test_check_biolink_model_compliance_of_query_graph(query: Tuple): "subject": "NCBIGene:29974", "predicate": "biolink:interacts_with", "object": "NCBIGene:29974", - "attributes": [{"attribute_type_id": "fake-attribute-id"}] + "attributes": [{"attribute_type_id": "biolink:knowledge_source"}] } } }, - f"{KNOWLEDGE_GRAPH_PREFIX}: 'biolink:Nonsense_Category' for node " + - "'NCBIGene:29974' is not a recognized Biolink Model category?" + f"{KNOWLEDGE_GRAPH_PREFIX}: Node Biolink class 'biolink:Nonsense_Category' is unknown?" ), ( LATEST_BIOLINK_MODEL, @@ -751,7 +786,7 @@ def test_check_biolink_model_compliance_of_query_graph(query: Tuple): "subject": "FOO:1234", "predicate": "biolink:interacts_with", "object": "FOO:1234", - "attributes": [{"attribute_type_id": "fake-attribute-id"}] + "attributes": [{"attribute_type_id": "biolink:knowledge_source"}] } } }, @@ -774,7 +809,7 @@ def test_check_biolink_model_compliance_of_query_graph(query: Tuple): # "subject": "", "predicate": "biolink:interacts_with", "object": "NCBIGene:29974", - "attributes": [{"attribute_type_id": "fake-attribute-id"}] + "attributes": [{"attribute_type_id": "biolink:knowledge_source"}] } } }, @@ -804,7 +839,7 @@ def test_check_biolink_model_compliance_of_query_graph(query: Tuple): "subject": "NCBIGene:12345", "predicate": "biolink:interacts_with", "object": "PUBCHEM.COMPOUND:597", - "attributes": [{"attribute_type_id": "fake-attribute-id"}] + "attributes": [{"attribute_type_id": "biolink:knowledge_source"}] } } }, @@ -832,7 +867,7 @@ def test_check_biolink_model_compliance_of_query_graph(query: Tuple): "subject": "NCBIGene:29974", "predicate": "biolink:unknown_predicate", "object": "PUBCHEM.COMPOUND:597", - "attributes": [{"attribute_type_id": "fake-attribute-id"}] + "attributes": [{"attribute_type_id": "biolink:knowledge_source"}] } } }, @@ -860,7 +895,7 @@ def test_check_biolink_model_compliance_of_query_graph(query: Tuple): "subject": "NCBIGene:29974", "predicate": "biolink:affected_by", "object": "PUBCHEM.COMPOUND:597", - "attributes": [{"attribute_type_id": "fake-attribute-id"}] + "attributes": [{"attribute_type_id": "biolink:knowledge_source"}] } } }, @@ -888,7 +923,7 @@ def test_check_biolink_model_compliance_of_query_graph(query: Tuple): "subject": "NCBIGene:29974", "predicate": "biolink:interacts_with", "object": "PUBCHEM.COMPOUND:678", - "attributes": [{"attribute_type_id": "fake-attribute-id"}] + "attributes": [{"attribute_type_id": "biolink:knowledge_source"}] } } }, @@ -896,7 +931,152 @@ def test_check_biolink_model_compliance_of_query_graph(query: Tuple): ), ( LATEST_BIOLINK_MODEL, - # Query 14: edge has missing or empty attributes + # Query 14: attribute 'attribute_type_id' is missing + { + "nodes": { + "NCBIGene:29974": { + "categories": [ + "biolink:Gene" + ] + }, + "PUBCHEM.COMPOUND:597": { + "name": "cytosine", + "categories": [ + "biolink:SmallMolecule" + ], + } + }, + "edges": { + "edge_1": { + "subject": "NCBIGene:29974", + "predicate": "biolink:interacts_with", + "object": "PUBCHEM.COMPOUND:597", + "attributes": [{"value": "some value"}] + } + } + }, + f"{KNOWLEDGE_GRAPH_PREFIX}: Edge 'NCBIGene:29974--biolink:interacts_with->PUBCHEM.COMPOUND:597' " + + "attribute '{'value': 'some value'}' missing its 'attribute_type_id'?" + ), + ( + LATEST_BIOLINK_MODEL, + # Query 15: attribute 'value' is missing? + { + "nodes": { + "NCBIGene:29974": { + "categories": [ + "biolink:Gene" + ] + }, + "PUBCHEM.COMPOUND:597": { + "name": "cytosine", + "categories": [ + "biolink:SmallMolecule" + ], + } + }, + "edges": { + "edge_1": { + "subject": "NCBIGene:29974", + "predicate": "biolink:interacts_with", + "object": "PUBCHEM.COMPOUND:597", + "attributes": [{"attribute_type_id": "biolink:knowledge_source"}] + } + } + }, + f"{KNOWLEDGE_GRAPH_PREFIX}: Edge 'NCBIGene:29974--biolink:interacts_with->PUBCHEM.COMPOUND:597' " + + "attribute '{'attribute_type_id': 'biolink:knowledge_source'}' missing its 'value'?" + ), + ( + LATEST_BIOLINK_MODEL, + # Query 16: 'attribute_type_id' is not a CURIE + { + "nodes": { + "NCBIGene:29974": { + "categories": [ + "biolink:Gene" + ] + }, + "PUBCHEM.COMPOUND:597": { + "name": "cytosine", + "categories": [ + "biolink:SmallMolecule" + ], + } + }, + "edges": { + "edge_1": { + "subject": "NCBIGene:29974", + "predicate": "biolink:interacts_with", + "object": "PUBCHEM.COMPOUND:597", + "attributes": [{"attribute_type_id": "not_a_curie", "value": "some value"}] + } + } + }, + f"{KNOWLEDGE_GRAPH_PREFIX}: Edge 'NCBIGene:29974--biolink:interacts_with->PUBCHEM.COMPOUND:597' " + + "attribute_type_id 'not_a_curie' is not a CURIE?" + ), + ( + LATEST_BIOLINK_MODEL, + # Query 17: 'attribute_type_id' is not a 'biolink:association_slot' (biolink:synonym is a node property) + { + "nodes": { + "NCBIGene:29974": { + "categories": [ + "biolink:Gene" + ] + }, + "PUBCHEM.COMPOUND:597": { + "name": "cytosine", + "categories": [ + "biolink:SmallMolecule" + ], + } + }, + "edges": { + "edge_1": { + "subject": "NCBIGene:29974", + "predicate": "biolink:interacts_with", + "object": "PUBCHEM.COMPOUND:597", + "attributes": [{"attribute_type_id": "biolink:synonym", "value": "some synonym"}] + } + } + }, + f"{KNOWLEDGE_GRAPH_PREFIX}: Edge 'NCBIGene:29974--biolink:interacts_with->PUBCHEM.COMPOUND:597' " + + "attribute_type_id 'biolink:synonym' not a biolink:association_slot?" + ), + ( + LATEST_BIOLINK_MODEL, + # Query 18: 'attribute_type_id' has a CURIE prefix namespace unknown to Biolink? + { + "nodes": { + "NCBIGene:29974": { + "categories": [ + "biolink:Gene" + ] + }, + "PUBCHEM.COMPOUND:597": { + "name": "cytosine", + "categories": [ + "biolink:SmallMolecule" + ], + } + }, + "edges": { + "edge_1": { + "subject": "NCBIGene:29974", + "predicate": "biolink:interacts_with", + "object": "PUBCHEM.COMPOUND:597", + "attributes": [{"attribute_type_id": "foo:bar", "value": "some value"}] + } + } + }, + f"{KNOWLEDGE_GRAPH_PREFIX}: Edge 'NCBIGene:29974--biolink:interacts_with->PUBCHEM.COMPOUND:597' " + + "attribute_type_id 'foo:bar' has a CURIE prefix namespace unknown to Biolink?" + ), + ( + LATEST_BIOLINK_MODEL, + # Query 19: has missing or empty attributes? { "nodes": { "NCBIGene:29974": { @@ -916,7 +1096,7 @@ def test_check_biolink_model_compliance_of_query_graph(query: Tuple): "subject": "NCBIGene:29974", "predicate": "biolink:interacts_with", "object": "PUBCHEM.COMPOUND:597", - # "attributes": [{"attribute_type_id": "fake-attribute-id"}] + # "attributes": [{"attribute_type_id": "biolink:knowledge_source"}] } } }, @@ -925,7 +1105,7 @@ def test_check_biolink_model_compliance_of_query_graph(query: Tuple): ), ( "1.8.2", - # Query 15: # An earlier Biolink Model Version won't recognize a category not found in its version + # Query 20: # An earlier Biolink Model Version won't recognize a category not found in its version { # Sample nodes 'nodes': { @@ -980,8 +1160,7 @@ def test_check_biolink_model_compliance_of_query_graph(query: Tuple): } } }, - "BLM Version 1.8.2 Error in Knowledge Graph: 'biolink:SmallMolecule' for node " + - "'PUBCHEM.COMPOUND:597' is not a recognized Biolink Model category?" + "BLM Version 1.8.2 Error in Knowledge Graph: Node Biolink class 'biolink:SmallMolecule' is unknown?" ) ] )