diff --git a/CHANGELOG.md b/CHANGELOG.md index e29283a..3683aa4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,17 @@ All notable changes to this project will be documented in this file. This project adheres to [Semantic Versioning](https://semver.org). +## [2.0.1] - 2021-11-15 + +### Added + +- Cell module test. + +### Fixed + +- Removing atoms not in the monomer library after Nautilus +- Updated PIP requirements + ## [2.0.0] - 2021-11-04 ### Added diff --git a/modelcraft/__init__.py b/modelcraft/__init__.py index 8c0d5d5..159d48b 100644 --- a/modelcraft/__init__.py +++ b/modelcraft/__init__.py @@ -1 +1 @@ -__version__ = "2.0.0" +__version__ = "2.0.1" diff --git a/modelcraft/jobs/nautilus.py b/modelcraft/jobs/nautilus.py index a65ba70..adcb957 100644 --- a/modelcraft/jobs/nautilus.py +++ b/modelcraft/jobs/nautilus.py @@ -3,7 +3,7 @@ from ..contents import AsuContents, PolymerType from ..job import Job from ..reflections import DataItem, write_mtz -from ..structure import read_structure, write_mmcif +from ..structure import read_structure, remove_non_library_atoms, write_mmcif @dataclasses.dataclass @@ -58,7 +58,9 @@ def _setup(self) -> None: def _result(self) -> NautilusResult: self._check_files_exist("xyzout.cif") + structure = read_structure(self._path("xyzout.cif")) + remove_non_library_atoms(structure) return NautilusResult( - structure=read_structure(self._path("xyzout.cif")), + structure=structure, seconds=self._seconds, ) diff --git a/modelcraft/monlib.py b/modelcraft/monlib.py new file mode 100644 index 0000000..23ee9fe --- /dev/null +++ b/modelcraft/monlib.py @@ -0,0 +1,23 @@ +import functools +import os +import gemmi + + +def _path(code: str) -> str: + return os.path.join(os.environ["CLIBD_MON"], code[0].lower(), f"{code.upper()}.cif") + + +@functools.lru_cache(maxsize=None) +def atom_ids(code: str) -> set: + return {atom.id for atom in chemcomp(code).atoms} + + +@functools.lru_cache(maxsize=None) +def chemcomp(code: str) -> gemmi.ChemComp: + doc = gemmi.cif.read(_path(code)) + return gemmi.make_chemcomp_from_block(doc[-1]) + + +@functools.lru_cache(maxsize=None) +def in_library(code: str) -> bool: + return os.path.exists(_path(code)) diff --git a/modelcraft/solvent.py b/modelcraft/solvent.py index b2f2a8f..463e085 100644 --- a/modelcraft/solvent.py +++ b/modelcraft/solvent.py @@ -2,10 +2,10 @@ import dataclasses import functools import math -import os import re import gemmi from .contents import AsuContents, Polymer, PolymerType +from .monlib import chemcomp def solvent_fraction(contents: AsuContents, mtz: gemmi.Mtz) -> float: @@ -15,24 +15,14 @@ def solvent_fraction(contents: AsuContents, mtz: gemmi.Mtz) -> float: return 1 - copies * volume / asu_volume -@functools.lru_cache(maxsize=None) -def _library_chemcomp(code: str) -> gemmi.ChemComp: - code = code.upper() - path = os.path.join(os.environ["CLIBD_MON"], code[0].lower(), code + ".cif") - if not os.path.exists(path): - raise RuntimeError(f"Monomer {code} not found at {path}") - block = gemmi.cif.read(path)[-1] - return gemmi.make_chemcomp_from_block(block) - - @functools.lru_cache(maxsize=None) def _library_weight(code: str) -> float: - return sum(atom.el.weight for atom in _library_chemcomp(code).atoms) + return sum(atom.el.weight for atom in chemcomp(code).atoms) @functools.lru_cache(maxsize=None) def _library_volume(code: str) -> float: - return sum(18 for atom in _library_chemcomp(code).atoms if not atom.is_hydrogen()) + return sum(18 for atom in chemcomp(code).atoms if not atom.is_hydrogen()) def _polymer_weight(polymer: Polymer) -> float: diff --git a/modelcraft/structure.py b/modelcraft/structure.py index 1ee5b08..28826de 100755 --- a/modelcraft/structure.py +++ b/modelcraft/structure.py @@ -1,4 +1,5 @@ import gemmi +from .monlib import atom_ids, in_library def read_structure(path: str) -> gemmi.Structure: @@ -45,6 +46,17 @@ def remove_residues(structure: gemmi.Structure, names) -> None: structure.remove_empty_chains() +def remove_non_library_atoms(structure: gemmi.Structure) -> None: + for model in structure: + for chain in model: + for residue in chain: + if in_library(residue.name): + for i, atom in reversed(list(enumerate(residue))): + if atom.name not in atom_ids(residue.name): + del residue[i] + structure.remove_empty_chains() + + def write_mmcif(path: str, structure: gemmi.Structure) -> None: structure.make_mmcif_document().write_file(path) diff --git a/modelcraft/tests/integration/test_cell.py b/modelcraft/tests/integration/test_cell.py new file mode 100644 index 0000000..3ac7424 --- /dev/null +++ b/modelcraft/tests/integration/test_cell.py @@ -0,0 +1,25 @@ +import urllib.request +import gemmi +from modelcraft.cell import max_distortion, remove_scale, update_cell +from modelcraft.structure import read_structure +from . import in_temp_directory + + +@in_temp_directory +def test_1ana(): + url = "https://ftp.ebi.ac.uk/pub/databases/pdb_versioned/data/entries/" + url += "an/pdb_00001ana/pdb_00001ana_xyz_v1-2.cif.gz" + urllib.request.urlretrieve(url, "1ana.cif.gz") + structure = read_structure("1ana.cif.gz") + assert structure.cell.parameters == (41.1, 41.1, 26.7, 90, 90, 90) + assert structure.cell.fractionalization_matrix.tolist()[0][0] == 0 + remove_scale(structure) + assert structure.cell.fractionalization_matrix.tolist()[0][0] != 0 + new_parameters = (41, 41, 27, 90, 90, 90) + new_cell = gemmi.UnitCell(*new_parameters) + assert max_distortion(structure.cell, new_cell) < 0.05 + old_pos = list(structure[0][0][0][0].pos) + update_cell(structure, new_cell) + assert structure.cell.parameters == new_parameters + new_pos = list(structure[0][0][0][0].pos) + assert old_pos != new_pos diff --git a/requirements.txt b/requirements.txt index 44b92c7..c5c8d23 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,5 @@ gemmi >=0.4.0 +numpy pandas requests +scipy \ No newline at end of file diff --git a/setup.py b/setup.py index 58a6b3a..2a0e05a 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setuptools.setup( name="modelcraft", - version="2.0.0", + version="2.0.1", author="Paul Bond", author_email="paul.bond@york.ac.uk", description="Automated model building pipeline for X-ray crystallography", @@ -27,8 +27,10 @@ python_requires="~=3.7", install_requires=[ "gemmi >=0.4.0", + "numpy", "pandas", "requests", + "scipy", ], entry_points={ "console_scripts": [