Prepare release v.0.0.5 (#10)

* prepare release * fix conflict version and prepare release * clean up and prepare release * test branch before merging * fix lint format
TieuLongPhan · Nov 11, 2024 · af0dc37 · af0dc37
1 parent 479a8a1
commit af0dc37
Show file tree

Hide file tree

Showing 21 changed files with 115 additions and 36 deletions.
diff --git a/.github/workflows/test-and-lint.yml b/.github/workflows/test-and-lint.yml
@@ -7,7 +7,7 @@ name: Test & Lint
 
 on:
   push:
-    branches: [ "main", "dev" ]
+    branches: [ "main", "dev", "dev-clean"]
   pull_request:
     branches: [ "main" ]
 

diff --git a/.gitignore b/.gitignore
@@ -14,3 +14,4 @@ Data/Report/*
 Data/Temp/Benchmark/Complete/*
 Data/Temp/Benchmark/Hier/*
 Data/Temp/Benchmark/Raw/*
+*.ipynb
diff --git a/Docs/Analysis/_3_cgrtool_aam.py b/Docs/Analysis/_3_cgrtool_aam.py
@@ -31,9 +31,12 @@ def parse_reactions(
                 except StopIteration:
                     break
     elif input_file.endswith(".smi") or input_file.endswith(".smiles"):
-        with SMILESRead(
-            input_file, ignore=True, store_log=True, remap=False, header=True
-        ) as ifile, open(input_file) as meta_searcher:
+        with (
+            SMILESRead(
+                input_file, ignore=True, store_log=True, remap=False, header=True
+            ) as ifile,
+            open(input_file) as meta_searcher,
+        ):
             id_tag_position = meta_searcher.readline().strip().split().index(id_tag)
             if id_tag_position is None or id_tag_position == 0:
                 logging.critical(f"No reaction ID tag was found in the header!")

diff --git a/Test/SynComp/test_rule_compose.py b/Test/SynComp/test_rule_compose.py
@@ -3,7 +3,7 @@
 import unittest
 from syntemp.SynComp.rule_compose import RuleCompose
 from pathlib import Path
-from syntemp.SynUtils.graph_utils import load_gml_as_text
+from synutility.SynIO.data_type import load_gml_as_text
 from mod import ruleGMLString
 
 root_dir = Path(__file__).parents[2]

diff --git a/Test/SynITS/test_its_extraction.py b/Test/SynITS/test_its_extraction.py
@@ -84,6 +84,21 @@ def test_parallel_process_smiles(self):
         # Inequivalent AAM
         self.assertEqual(results_wrong[0]["equivariant"], 0)
 
+    def test_unsanitize_smiles(self):
+        test_2 = {
+            "R_ID": "R_1",
+            "Map": "[CH2:1]=[CH2:2].[H:3][H:4]>>[CH2:1]([H:3])[CH2:2]([H:4])",
+        }
+        its_true, _ = ITSExtraction.process_mapped_smiles(test_2, ["Map"])
+        its_false, _ = ITSExtraction.process_mapped_smiles(
+            test_2, ["Map"], sanitize=False
+        )
+        self.assertNotEqual(
+            len(its_true["ITSGraph"][2].nodes()), len(its_false["ITSGraph"][2].nodes())
+        )
+        self.assertEqual(len(its_true["ITSGraph"][2].nodes()), 2)
+        self.assertEqual(len(its_false["ITSGraph"][2].nodes()), 4)
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/Test/SynRule/test_hierarchical_clustering.py b/Test/SynRule/test_hierarchical_clustering.py
@@ -1,6 +1,6 @@
 import unittest
 from syntemp.SynRule.hierarchical_clustering import HierarchicalClustering
-from syntemp.SynUtils.utils import load_from_pickle
+from synutility.SynIO.data_type import load_from_pickle
 
 
 class TestRuleCluster(unittest.TestCase):

diff --git a/Test/SynRule/test_rule_cluster.py b/Test/SynRule/test_rule_cluster.py
@@ -1,6 +1,6 @@
 import unittest
 from syntemp.SynRule.rule_cluster import RuleCluster
-from syntemp.SynUtils.utils import load_from_pickle
+from synutility.SynIO.data_type import load_from_pickle
 
 
 class TestRuleCluster(unittest.TestCase):

diff --git a/Test/SynRule/test_rule_writing.py b/Test/SynRule/test_rule_writing.py
@@ -1,6 +1,6 @@
 import unittest
 import networkx as nx
-from syntemp.SynUtils.utils import load_from_pickle
+from synutility.SynIO.data_type import load_from_pickle
 from syntemp.SynRule.rule_writing import RuleWriting
 
 

diff --git a/Test/test_auto_template.py b/Test/test_auto_template.py
@@ -1,7 +1,8 @@
 import unittest
-from syntemp.auto_template import AutoTemp
-from syntemp.SynUtils.utils import load_database
 from pathlib import Path
+from syntemp.auto_template import AutoTemp
+from synutility.SynIO.data_type import load_database
+
 
 root_dir = Path(__file__).parents[1]
 

diff --git a/Test/test_main.py b/Test/test_main.py
@@ -1,8 +1,8 @@
-import unittest
 import os
+import shutil
+import unittest
 import subprocess
 import tempfile
-import shutil
 
 
 class TestCMD(unittest.TestCase):

diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "syntemp"
-version = "0.0.4"
+version = "0.0.5"
 authors = [
     {name="Tieu Long Phan", email="tieu@bioinf.uni-leipzig.de"}
 ]

diff --git a/requirements.txt b/requirements.txt
@@ -12,4 +12,5 @@ rdkit>=2024.3.3
 networkx>=3.3
 seaborn>=0.13.2
 joblib>=1.3.2
-synrbl>=0.0.25
+synrbl>=0.0.25
+synutility>=0.0.10
diff --git a/syntemp/SynAAM/aam_validator.py b/syntemp/SynAAM/aam_validator.py
@@ -5,9 +5,10 @@
 from operator import eq
 from joblib import Parallel, delayed
 from networkx.algorithms.isomorphism import generic_node_match, generic_edge_match
+from synutility.SynIO.Format.mol_to_graph import MolToGraph
 from syntemp.SynITS.its_construction import ITSConstruction
+
 from syntemp.SynITS.its_extraction import ITSExtraction
-from syntemp.SynChemistry.mol_to_graph import MolToGraph
 from syntemp.SynRule.rules_extraction import RuleExtraction
 from syntemp.SynUtils.chemutils import enumerate_tautomers, mapping_success_rate
 from itertools import combinations

diff --git a/syntemp/SynChemistry/__init__.py b/syntemp/SynChemistry/__init__.py
@@ -0,0 +1,9 @@
+import warnings
+
+warnings.warn(
+    "The 'SynChemistry' subpackage is deprecated and will be removed in future releases. "
+    "Please migrate to the 'synutility' package as soon as possible,"
+    + " which offers enhanced functionality. "
+    "You can install it directly using pip: `pip install synutility`.",
+    FutureWarning,
+)
diff --git a/syntemp/SynITS/its_extraction.py b/syntemp/SynITS/its_extraction.py
@@ -5,11 +5,12 @@
 from joblib import Parallel, delayed
 from operator import eq
 from networkx.algorithms.isomorphism import generic_node_match, generic_edge_match
+from synutility.SynIO.debug import setup_logging
+from synutility.SynIO.Format.mol_to_graph import MolToGraph
+from synutility.SynChem.Reaction.standardize import Standardize
 from syntemp.SynITS.its_construction import ITSConstruction
-from syntemp.SynChemistry.mol_to_graph import MolToGraph
 from syntemp.SynRule.rules_extraction import RuleExtraction
-from syntemp.SynUtils.chemutils import remove_atom_mapping
-from syntemp.SynUtils.utils import setup_logging
+
 
 logger = setup_logging()
 
@@ -19,18 +20,19 @@ def __init__(self):
         pass
 
     @staticmethod
-    def graph_from_smiles(smiles: str) -> nx.Graph:
+    def graph_from_smiles(smiles: str, sanitize: bool = True) -> nx.Graph:
         """
         Constructs a graph representation from a SMILES string.
 
         Parameters:
         - smiles (str): A SMILES string representing a molecule or a set of molecules.
+        - sanitize (bool): Whether to sanitize the molecule(s).
 
         Returns:
         - nx.Graph: A graph representation of the molecule(s).
         """
 
-        mol = Chem.MolFromSmiles(smiles)
+        mol = Chem.MolFromSmiles(smiles, sanitize=sanitize)
         graph = MolToGraph().mol_to_graph(mol, drop_non_aam=True)
         return graph
 
@@ -75,6 +77,7 @@ def process_mapped_smiles(
         ignore_aromaticity: bool = False,
         confident_mapper: str = "graphormer",
         symbol: str = ">>",
+        sanitize: bool = True,
     ) -> Dict[str, any]:
         """
         Processes mapped SMILES strings representing chemical reactions by constructing
@@ -89,6 +92,15 @@ def process_mapped_smiles(
         - mapper_names (List[str]): A list of mapper names to be processed.
         - check_method (str): A method to check for isomorphism among the ITS graphs.
         Either 'RC' or 'ITS'. Defaults to 'RC'.
+        - id_column (str): The name of the column in the dataframe that contains the
+        reaction ID. Defaults to 'R-id'.
+        - ignore_aromaticity (bool): Whether to ignore aromaticity in the reaction
+        graphs. Defaults to False.
+        - confident_mapper (str): The name of the mapper that was used to generate the
+        reaction graphs. Defaults to 'graphormer'.
+        - symbol (str): The symbol used to separate reactants and products in the
+        reaction SMILES string. Defaults to '>>'.
+        - sanitize (bool): Whether to sanitize the molecule(s).
 
         Returns:
         - Dict[str, any]: A dictionary containing graph representations for each reaction
@@ -109,10 +121,10 @@ def process_mapped_smiles(
                 reactants_side, products_side = mapped_smiles[mapper].split(symbol)
 
                 # Get reactants graph G
-                G = ITSExtraction.graph_from_smiles(reactants_side)
+                G = ITSExtraction.graph_from_smiles(reactants_side, sanitize)
 
                 # Get products graph H
-                H = ITSExtraction.graph_from_smiles(products_side)
+                H = ITSExtraction.graph_from_smiles(products_side, sanitize)
 
                 # Construct the ITS graph
                 ITS = ITSConstruction.ITSGraph(G, H, ignore_aromaticity)
@@ -165,7 +177,9 @@ def process_mapped_smiles(
 
         # Check if mapper_names is not empty to avoid IndexError
         if mapper_names:
-            if "[O]" in remove_atom_mapping(mapped_smiles[mapper_names[0]]):
+            if "[O]" in Standardize().remove_atom_mapping(
+                mapped_smiles[mapper_names[0]]
+            ):
                 target_dict["ITSGraph"] = graphs_by_map.get(mapper_names[0], None)
                 target_dict["GraphRules"] = rules_by_map.get(mapper_names[0], None)
             else:
@@ -190,6 +204,7 @@ def parallel_process_smiles(
         ignore_aromaticity: bool = False,
         confident_mapper: str = "graphormer",
         symbol: str = ">>",
+        sanitize: bool = True,
     ) -> List[Dict[str, any],]:
         """
         Processes a list of mapped SMILES strings in parallel.
@@ -203,6 +218,13 @@ def parallel_process_smiles(
         - verbose (int): The verbosity level of the parallel processing.
         - check_method (str): A method to check for isomorphism among the ITS graphs.
         Either 'RC' or 'ITS'. Defaults to 'RC'.
+        - export_full (bool): Whether to export the full results. Defaults to False.
+        - ignore_aromaticity (bool): Whether to ignore aromaticity in the graph.
+        Defaults to False.
+        - confident_mapper (str): The mapper name to use if the check_method is 'RC'.
+        Defaults to 'graphormer'.
+        - symbol (str): The symbol to use if the check_method is 'RC'. Defaults to '>>'.
+        - sanitize (bool): Whether to sanitize the molecule(s). Defaults to True.
 
         Returns:
         - List[Dict[str, any]]: A list of dictionaries containing graph representations
@@ -218,6 +240,7 @@ def parallel_process_smiles(
                 ignore_aromaticity,
                 confident_mapper,
                 symbol,
+                sanitize,
             )
             for mapped_smiles in mapped_smiles_list
         )

diff --git a/syntemp/SynITS/its_hadjuster.py b/syntemp/SynITS/its_hadjuster.py
@@ -53,7 +53,6 @@ def process_single_graph_data(
         hydrogen counts and aromaticity considerations.
         """
         graphs = deepcopy(graph_data)
-        logger.info(f"{graphs}")
         react_graph, prod_graph, its = graphs[column]
         is_empty_graph_present = any(
             (not isinstance(graph, nx.Graph) or graph.number_of_nodes() == 0)

diff --git a/syntemp/SynRule/hierarchical_clustering.py b/syntemp/SynRule/hierarchical_clustering.py
@@ -7,11 +7,16 @@
     add_child_ids,
     get_descriptors,
 )
-import logging
 
-logging.basicConfig(
-    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
-)
+from synutility.SynIO.debug import setup_logging
+
+logger = setup_logging()
+
+# import logging
+
+# logging.basicConfig(
+#     level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
+# )
 
 
 class HierarchicalClustering(RuleCluster):
@@ -222,8 +227,8 @@ def fit(
             reaction_dicts = copy.deepcopy(original_reaction_dicts)
             its_graphs = [value[its_column] for value in reaction_dicts]
 
-            logging.info("Processing with templates")
-            logging.info("Parent level")
+            logger.info("Processing with templates")
+            logger.info("Parent level")
             cluster_indices_0, templates_0 = self.process_level(
                 its_graphs,
                 0,
@@ -239,7 +244,7 @@ def fit(
             parent_cluster_indices = cluster_indices_0
             for k in self.radius:
                 if k > 0:
-                    logging.info(f"Child level with radius {k}")
+                    logger.info(f"Child level with radius {k}")
                     cluster_indices_k, templates_k = self.process_child_level(
                         its_graphs,
                         parent_cluster_indices,

diff --git a/syntemp/SynUtils/__init__.py b/syntemp/SynUtils/__init__.py
@@ -0,0 +1,9 @@
+import warnings
+
+warnings.warn(
+    "The 'SynUtils' subpackage is deprecated and will be removed in future releases. "
+    "Please migrate to the 'synutility' package as soon as possible,"
+    + " which offers enhanced functionality. "
+    "You can install it directly using pip: `pip install synutility`.",
+    FutureWarning,
+)
diff --git a/syntemp/SynVis/__init__.py b/syntemp/SynVis/__init__.py
@@ -0,0 +1,9 @@
+import warnings
+
+warnings.warn(
+    "The 'SynVis' subpackage is deprecated and will be removed in future releases. "
+    "Please migrate to the 'synutility' package as soon as possible,"
+    + " which offers enhanced functionality. "
+    "You can install it directly using pip: `pip install synutility`.",
+    FutureWarning,
+)
diff --git a/syntemp/auto_template.py b/syntemp/auto_template.py
@@ -13,9 +13,9 @@
 from syntemp.SynUtils.utils import (
     prune_branches,
     reindex_data,
-    save_database,
-    setup_logging,
 )
+from synutility.SynIO.data_type import save_database
+from synutility.SynIO.debug import setup_logging
 
 
 class AutoTemp:

diff --git a/syntemp/pipeline.py b/syntemp/pipeline.py
@@ -2,15 +2,18 @@
 import shutil
 import pandas as pd
 from typing import List, Any, Dict, Optional, Union, Tuple
-from syntemp.SynChemistry.neutralize import Neutralize
-from syntemp.SynChemistry.deionize import Deionize
+from synutility.SynChem.Reaction.neutralize import Neutralize
+from synutility.SynChem.Reaction.deionize import Deionize
+from synutility.SynIO.data_type import save_to_pickle, collect_data
+from synutility.SynIO.debug import setup_logging
+
 from syntemp.SynAAM.atom_map_consensus import AAMConsensus
 from syntemp.SynITS.its_extraction import ITSExtraction
 from syntemp.SynITS.its_hadjuster import ITSHAdjuster
 from syntemp.SynITS.its_refinement import ITSRefinement
 from syntemp.SynRule.hierarchical_clustering import HierarchicalClustering
 from syntemp.SynRule.rule_writing import RuleWriting
-from syntemp.SynUtils.utils import save_to_pickle, collect_data, setup_logging
+
 from synrbl import Balancer