Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix source naming and escaping #185

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,22 +13,23 @@ jobs:
key: ${{ runner.os }}-pip-${{ hashFiles('**/setup.py') }}
restore-keys: |
${{ runner.os }}-pip-
- name: Set up Python 3.6
- name: Set up Python 3.8
uses: actions/setup-python@v1
with:
python-version: 3.6
python-version: 3.8
- name: Install dependencies
run: |
# Install Graphviz
sudo apt-get update
sudo apt-get install libstdc++6 python3-dev graphviz libgraphviz-dev pkg-config
sudo apt-get install graphviz
# Tests use the web ontology but this is here as backup
mkdir -p $HOME/.indra/bio_ontology/1.11
wget -nv https://bigmech.s3.amazonaws.com/travis/bio_ontology/1.11/mock_ontology.pkl -O $HOME/.indra/bio_ontology/1.11/bio_ontology.pkl
bio_ontology_version=1.30
mkdir -p $HOME/.indra/bio_ontology/$bio_ontology_version
wget -nv https://bigmech.s3.amazonaws.com/travis/bio_ontology/$bio_ontology_version/mock_ontology.pkl -O $HOME/.indra/bio_ontology/$bio_ontology_version/bio_ontology.pkl
# Now do some regular pip installs
python -m pip install --upgrade pip
pip install numpy scipy sympy cython==0.23.5 nose lxml matplotlib pandas kappy==4.0.0 boto3 nose-timer coverage
pip install numpy scipy sympy cython==0.23.5 nose lxml matplotlib pandas<2.0.0 kappy==4.0.0 boto3 nose-timer coverage
# Install Bionetgen
wget --no-check-certificate "http://www.csb.pitt.edu/Faculty/Faeder/wp-content/uploads/2017/04/BioNetGen-2.2.6-stable_Linux.tar.gz" -O bionetgen.tar.gz -nv
tar xzf bionetgen.tar.gz
Expand Down
23 changes: 17 additions & 6 deletions bioagents/msa/local_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from indra.sources.indra_db_rest.query import And, HasType, HasAgent
from indra.assemblers.html.assembler import get_available_source_counts
from indra.util.statement_presentation import _get_available_ev_source_counts, \
_get_initial_source_counts
_get_initial_source_counts, internal_source_mappings

from bioagents.msa.exceptions import EntityError

Expand Down Expand Up @@ -93,7 +93,8 @@ def get_statements_from_query(self, query, **ignored_kwargs):
else:
raise EntityError("Could not form a usable query from given "
"constraints.")
logger.info('Found %d statements from query' % len(all_stmts))
logger.info('Found %d statements from query'
% 0 if all_stmts is None else len(all_stmts))
self.statements = all_stmts
return self

Expand All @@ -110,7 +111,7 @@ def _filter_stmts_by_query(self, query, all_stmts):
elif isinstance(query, HasType):
return self._filter_for_type(all_stmts, query.stmt_types[0])
else:
logger.warning("Query {query} not handled.")
logger.warning(f"Query {query} not handled.")
return all_stmts

def sort_statements(self, stmts):
Expand Down Expand Up @@ -271,14 +272,18 @@ def _process_relations(self, relations):
stmt_jsons = []
for rel in relations:
mkh = rel.data.get('stmt_hash')
stmt_json = json.loads(
rel.data.get('stmt_json').replace('\\\\', '\\').replace('\\\\', '\\'))
stmt_json = json.loads(_str_escaping(rel.data.get('stmt_json')))
source_counts = json.loads(rel.data.get('source_counts'))
stmt_jsons.append(stmt_json)
if mkh not in self.source_counts:
self.source_counts[mkh] = _get_initial_source_counts()
for source, num in source_counts.items():
self.source_counts[mkh][source] += num
# The dict from _get_initial_source_counts() contains the
# source names as they appear in indra_db while the
# source names in CoGEx are as they appear in INDRA,
# e.g. 'bel' (in CoGEx) instead of 'bel_lc' (indra_db).
mapped_source = internal_source_mappings.get(source, source)
self.source_counts[mkh][mapped_source] += num
return stmts_from_json(stmt_jsons)


Expand Down Expand Up @@ -307,3 +312,9 @@ def get_resoure(self, key):

resource_manager = ResourceManager(preloads=preloads)


def _str_escaping(s: str) -> str:
"""Remove double escaped characters and other escaping artifacts."""
return s.replace(
'\\\\', '\\').replace('\\\\', '\\').replace('\\{', '{').replace(
'\\}', '}')
Loading