From 0ccc57046aa2cbff36ee863a3d9031968e10e4df Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Wed, 10 May 2023 01:30:53 -0400
Subject: [PATCH 01/40] fix RTD builds (#468)

per
https://github.com/readthedocs/readthedocs.org/issues/10290#issuecomment-1535120995
---
 .readthedocs.yaml | 12 ++++++++++++
 1 file changed, 12 insertions(+)
 create mode 100644 .readthedocs.yaml

diff --git a/.readthedocs.yaml b/.readthedocs.yaml
new file mode 100644
index 00000000..87f8c733
--- /dev/null
+++ b/.readthedocs.yaml
@@ -0,0 +1,12 @@
+version: 2
+
+build:
+  os: ubuntu-22.04
+  tools:
+    python: "3.11"
+sphinx:
+   configuration: docs/conf.py
+formats: all
+python:
+   install:
+   - requirements: docs/requirements.txt

From 63904ed6331bb7e43ffc2738481b447cfb8f1427 Mon Sep 17 00:00:00 2001
From: Jia-Xin Zhu <53895049+ChiahsinChu@users.noreply.github.com>
Date: Thu, 11 May 2023 09:38:37 +0800
Subject: [PATCH 02/40] fix bug in reading cp2k/output of cp2k v9.1 (compatible
 with old ver) (#466)

Co-authored-by: Jia-Xin Zhu <xmuchiahsin@gmail.com>
Co-authored-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 dpdata/cp2k/output.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/dpdata/cp2k/output.py b/dpdata/cp2k/output.py
index 13d5be7c..230c3def 100644
--- a/dpdata/cp2k/output.py
+++ b/dpdata/cp2k/output.py
@@ -417,6 +417,12 @@ def get_frames(fname):
 
             # get the coord block info
             if coord_flag:
+                if idx == coord_idx + 1:
+                    if ii == "\n":
+                        pass
+                    else:
+                        coord.append(ii.split()[4:7])
+                        atom_symbol_idx_list.append(ii.split()[1])
                 if idx > coord_idx + 1:
                     if ii == "\n":
                         coord_flag = False

From c5b96d84aee66c92913c0a7a7cac3c8da120d95c Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 11 May 2023 09:38:48 +0800
Subject: [PATCH 03/40] [pre-commit.ci] pre-commit autoupdate (#467)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

<!--pre-commit.ci start-->
updates:
- [github.com/charliermarsh/ruff-pre-commit: v0.0.263 →
v0.0.265](https://github.com/charliermarsh/ruff-pre-commit/compare/v0.0.263...v0.0.265)
<!--pre-commit.ci end-->

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 14b08cef..02d01621 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -23,7 +23,7 @@ repos:
     -   id: black-jupyter
 -   repo: https://github.com/charliermarsh/ruff-pre-commit
     # Ruff version.
-    rev: v0.0.263
+    rev: v0.0.265
     hooks:
     - id: ruff
       args: ["--fix"]

From 0d2b823d89924a4dc5f12f56271e6eaadb7ddc63 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Sun, 14 May 2023 23:44:11 -0400
Subject: [PATCH 04/40] deepmd/hdf5: handle empty systems and files (#469)

---
 dpdata/deepmd/hdf5.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/dpdata/deepmd/hdf5.py b/dpdata/deepmd/hdf5.py
index e7e1abe6..1afb6489 100644
--- a/dpdata/deepmd/hdf5.py
+++ b/dpdata/deepmd/hdf5.py
@@ -30,6 +30,9 @@ def to_system_data(
     g = f[folder] if folder else f
 
     data = {}
+    # ignore empty files or groups
+    if "type.raw" not in g.keys():
+        return data
     data["atom_types"] = g["type.raw"][:]
     ntypes = np.max(data["atom_types"]) + 1
     natoms = data["atom_types"].size
@@ -140,6 +143,9 @@ def dump(
         g = f.create_group(folder)
     else:
         g = f
+    # ignore empty systems
+    if not len(data["coords"]):
+        return
     # dump raw (array in fact)
     g.create_dataset("type.raw", data=data["atom_types"])
     g.create_dataset("type_map.raw", data=np.array(data["atom_names"], dtype="S"))

From 939fe6ff510257fd01e30d20a92d50f461cf83a3 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 17 May 2023 12:47:42 +0800
Subject: [PATCH 05/40] [pre-commit.ci] pre-commit autoupdate (#470)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

<!--pre-commit.ci start-->
updates:
- [github.com/charliermarsh/ruff-pre-commit: v0.0.265 →
v0.0.267](https://github.com/charliermarsh/ruff-pre-commit/compare/v0.0.265...v0.0.267)
<!--pre-commit.ci end-->

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 02d01621..380c3433 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -23,7 +23,7 @@ repos:
     -   id: black-jupyter
 -   repo: https://github.com/charliermarsh/ruff-pre-commit
     # Ruff version.
-    rev: v0.0.265
+    rev: v0.0.267
     hooks:
     - id: ruff
       args: ["--fix"]

From bc72db372a5ebe7dd41d0c7bf462cbfe732c3955 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Thu, 18 May 2023 22:30:57 -0400
Subject: [PATCH 06/40] keep atom types when using DP models (#471)

Before this PR, when using DP models to predict energies and forces, the
atom types were changed in the returned system according to the type map
in the model. This is not an expected behavior. The changed atom types
should only be used for internal inference.
---
 dpdata/plugins/deepmd.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/dpdata/plugins/deepmd.py b/dpdata/plugins/deepmd.py
index 38eab85f..51fd4667 100644
--- a/dpdata/plugins/deepmd.py
+++ b/dpdata/plugins/deepmd.py
@@ -394,8 +394,10 @@ def label(self, data: dict) -> dict:
         type_map = self.dp.get_type_map()
 
         ori_sys = dpdata.System.from_dict({"data": data})
+        ori_sys_copy = ori_sys.copy()
         ori_sys.sort_atom_names(type_map=type_map)
         atype = ori_sys["atom_types"]
+        ori_sys = ori_sys_copy
 
         if not self.enable_auto_batch_size:
             labeled_sys = dpdata.LabeledSystem()

From e31b42e07e52b91f4faff4f30d6aed3646cd4299 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Tue, 23 May 2023 22:56:16 -0400
Subject: [PATCH 07/40] allow custom data types for deepmd format (#476)

In this commit, the deepmd format reads data types from
`dpdata.system.LabeledSystem.DTYPES` and load or dump the corresponding
data. One can modify `dpdata.system.LabeledSystem.DTYPES` before using
it or in a plugin (see tests), so deepmd format can be extended in this
way.

---------

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 dpdata/deepmd/comp.py          | 68 ++++++++++++++++++++++++++++++++++
 dpdata/deepmd/hdf5.py          | 64 ++++++++++++++++++++++++++++++++
 dpdata/deepmd/raw.py           | 65 ++++++++++++++++++++++++++++++++
 tests/test_custom_data_type.py | 53 ++++++++++++++++++++++++++
 4 files changed, 250 insertions(+)
 create mode 100644 tests/test_custom_data_type.py

diff --git a/dpdata/deepmd/comp.py b/dpdata/deepmd/comp.py
index 66bf4ee6..9d63e1dd 100644
--- a/dpdata/deepmd/comp.py
+++ b/dpdata/deepmd/comp.py
@@ -1,9 +1,12 @@
 import glob
 import os
 import shutil
+import warnings
 
 import numpy as np
 
+import dpdata
+
 from .raw import load_type
 
 
@@ -60,6 +63,40 @@ def to_system_data(folder, type_map=None, labels=True):
         data["forces"] = np.concatenate(all_forces, axis=0)
     if len(all_virs) > 0:
         data["virials"] = np.concatenate(all_virs, axis=0)
+    # allow custom dtypes
+    if labels:
+        for dtype in dpdata.system.LabeledSystem.DTYPES:
+            if dtype.name in (
+                "atom_numbs",
+                "atom_names",
+                "atom_types",
+                "orig",
+                "cells",
+                "coords",
+                "real_atom_types",
+                "real_atom_names",
+                "nopbc",
+                "energies",
+                "forces",
+                "virials",
+            ):
+                # skip as these data contains specific rules
+                continue
+            if not (len(dtype.shape) and dtype.shape[0] == dpdata.system.Axis.NFRAMES):
+                warnings.warn(
+                    f"Shape of {dtype.name} is not (nframes, ...), but {dtype.shape}. This type of data will not converted from deepmd/npy format."
+                )
+                continue
+            shape = [
+                -1 if xx == dpdata.system.Axis.NATOMS else xx for xx in dtype.shape[1:]
+            ]
+            all_data = []
+            for ii in sets:
+                tmp = _cond_load_data(os.path.join(ii, dtype.name + ".npy"))
+                if tmp is not None:
+                    all_data.append(np.reshape(tmp, [tmp.shape[0], *shape]))
+            if len(all_data) > 0:
+                data[dtype.name] = np.concatenate(all_data, axis=0)
     return data
 
 
@@ -131,3 +168,34 @@ def dump(folder, data, set_size=5000, comp_prec=np.float32, remove_sets=True):
     if data.get("nopbc", False):
         with open(os.path.join(folder, "nopbc"), "w") as fw_nopbc:
             pass
+    # allow custom dtypes
+    for dtype in dpdata.system.LabeledSystem.DTYPES:
+        if dtype.name in (
+            "atom_numbs",
+            "atom_names",
+            "atom_types",
+            "orig",
+            "cells",
+            "coords",
+            "real_atom_types",
+            "real_atom_names",
+            "nopbc",
+            "energies",
+            "forces",
+            "virials",
+        ):
+            # skip as these data contains specific rules
+            continue
+        if dtype.name not in data:
+            continue
+        if not (len(dtype.shape) and dtype.shape[0] == dpdata.system.Axis.NFRAMES):
+            warnings.warn(
+                f"Shape of {dtype.name} is not (nframes, ...), but {dtype.shape}. This type of data will not converted to deepmd/npy format."
+            )
+            continue
+        ddata = np.reshape(data[dtype.name], [nframes, -1]).astype(comp_prec)
+        for ii in range(nsets):
+            set_stt = ii * set_size
+            set_end = (ii + 1) * set_size
+            set_folder = os.path.join(folder, "set.%03d" % ii)
+            np.save(os.path.join(set_folder, dtype.name), ddata[set_stt:set_end])
diff --git a/dpdata/deepmd/hdf5.py b/dpdata/deepmd/hdf5.py
index 1afb6489..b4ae1a3c 100644
--- a/dpdata/deepmd/hdf5.py
+++ b/dpdata/deepmd/hdf5.py
@@ -1,10 +1,13 @@
 """Utils for deepmd/hdf5 format."""
+import warnings
 from typing import Optional, Union
 
 import h5py
 import numpy as np
 from wcmatch.glob import globfilter
 
+import dpdata
+
 __all__ = ["to_system_data", "dump"]
 
 
@@ -92,6 +95,36 @@ def to_system_data(
             "required": False,
         },
     }
+    # allow custom dtypes
+    for dtype in dpdata.system.LabeledSystem.DTYPES:
+        if dtype.name in (
+            "atom_numbs",
+            "atom_names",
+            "atom_types",
+            "orig",
+            "cells",
+            "coords",
+            "real_atom_types",
+            "real_atom_names",
+            "nopbc",
+            "energies",
+            "forces",
+            "virials",
+        ):
+            # skip as these data contains specific rules
+            continue
+        if not (len(dtype.shape) and dtype.shape[0] == dpdata.system.Axis.NFRAMES):
+            warnings.warn(
+                f"Shape of {dtype.name} is not (nframes, ...), but {dtype.shape}. This type of data will not converted from deepmd/hdf5 format."
+            )
+            continue
+
+        data_types[dtype.name] = {
+            "fn": dtype.name,
+            "labeled": True,
+            "shape": dtype.shape[1:],
+            "required": False,
+        }
 
     for dt, prop in data_types.items():
         all_data = []
@@ -167,6 +200,37 @@ def dump(
         "forces": {"fn": "force", "shape": (nframes, -1), "dump": True},
         "virials": {"fn": "virial", "shape": (nframes, 9), "dump": True},
     }
+
+    # allow custom dtypes
+    for dtype in dpdata.system.LabeledSystem.DTYPES:
+        if dtype.name in (
+            "atom_numbs",
+            "atom_names",
+            "atom_types",
+            "orig",
+            "cells",
+            "coords",
+            "real_atom_types",
+            "real_atom_names",
+            "nopbc",
+            "energies",
+            "forces",
+            "virials",
+        ):
+            # skip as these data contains specific rules
+            continue
+        if not (len(dtype.shape) and dtype.shape[0] == dpdata.system.Axis.NFRAMES):
+            warnings.warn(
+                f"Shape of {dtype.name} is not (nframes, ...), but {dtype.shape}. This type of data will not converted to deepmd/hdf5 format."
+            )
+            continue
+
+        data_types[dtype.name] = {
+            "fn": dtype.name,
+            "shape": (nframes, -1),
+            "dump": True,
+        }
+
     for dt, prop in data_types.items():
         if dt in data:
             if prop["dump"]:
diff --git a/dpdata/deepmd/raw.py b/dpdata/deepmd/raw.py
index 2f2021d4..fdb2fc64 100644
--- a/dpdata/deepmd/raw.py
+++ b/dpdata/deepmd/raw.py
@@ -1,7 +1,10 @@
 import os
+import warnings
 
 import numpy as np
 
+import dpdata
+
 
 def load_type(folder, type_map=None):
     data = {}
@@ -57,6 +60,41 @@ def to_system_data(folder, type_map=None, labels=True):
                 data["virials"] = np.reshape(data["virials"], [nframes, 3, 3])
         if os.path.isfile(os.path.join(folder, "nopbc")):
             data["nopbc"] = True
+        # allow custom dtypes
+        if labels:
+            for dtype in dpdata.system.LabeledSystem.DTYPES:
+                if dtype.name in (
+                    "atom_numbs",
+                    "atom_names",
+                    "atom_types",
+                    "orig",
+                    "cells",
+                    "coords",
+                    "real_atom_types",
+                    "real_atom_names",
+                    "nopbc",
+                    "energies",
+                    "forces",
+                    "virials",
+                ):
+                    # skip as these data contains specific rules
+                    continue
+                if not (
+                    len(dtype.shape) and dtype.shape[0] == dpdata.system.Axis.NFRAMES
+                ):
+                    warnings.warn(
+                        f"Shape of {dtype.name} is not (nframes, ...), but {dtype.shape}. This type of data will not converted from deepmd/raw format."
+                    )
+                    continue
+                shape = [
+                    -1 if xx == dpdata.system.Axis.NATOMS else xx
+                    for xx in dtype.shape[1:]
+                ]
+                if os.path.exists(os.path.join(folder, f"{dtype.name}.raw")):
+                    data[dtype.name] = np.reshape(
+                        np.loadtxt(os.path.join(folder, f"{dtype.name}.raw")),
+                        [nframes, *shape],
+                    )
         return data
     else:
         raise RuntimeError("not dir " + folder)
@@ -102,3 +140,30 @@ def dump(folder, data):
     if data.get("nopbc", False):
         with open(os.path.join(folder, "nopbc"), "w") as fw_nopbc:
             pass
+    # allow custom dtypes
+    for dtype in dpdata.system.LabeledSystem.DTYPES:
+        if dtype.name in (
+            "atom_numbs",
+            "atom_names",
+            "atom_types",
+            "orig",
+            "cells",
+            "coords",
+            "real_atom_types",
+            "real_atom_names",
+            "nopbc",
+            "energies",
+            "forces",
+            "virials",
+        ):
+            # skip as these data contains specific rules
+            continue
+        if dtype.name not in data:
+            continue
+        if not (len(dtype.shape) and dtype.shape[0] == dpdata.system.Axis.NFRAMES):
+            warnings.warn(
+                f"Shape of {dtype.name} is not (nframes, ...), but {dtype.shape}. This type of data will not converted to deepmd/raw format."
+            )
+            continue
+        ddata = np.reshape(data[dtype.name], [nframes, -1])
+        np.savetxt(os.path.join(folder, f"{dtype.name}.raw"), ddata)
diff --git a/tests/test_custom_data_type.py b/tests/test_custom_data_type.py
new file mode 100644
index 00000000..58b9f5e4
--- /dev/null
+++ b/tests/test_custom_data_type.py
@@ -0,0 +1,53 @@
+import unittest
+
+import h5py
+import numpy as np
+
+import dpdata
+from dpdata.system import Axis, DataType
+
+
+class TestDeepmdLoadDumpComp(unittest.TestCase):
+    def setUp(self):
+        self.backup = dpdata.system.LabeledSystem.DTYPES
+        dpdata.system.LabeledSystem.DTYPES = dpdata.system.LabeledSystem.DTYPES + (
+            DataType("foo", np.ndarray, (Axis.NFRAMES, 2, 4), required=False),
+        )
+        self.system = dpdata.LabeledSystem("poscars/OUTCAR.h2o.md", fmt="vasp/outcar")
+        self.foo = np.ones((len(self.system), 2, 4))
+        self.system.data["foo"] = self.foo
+        self.system.check_data()
+
+    def tearDown(self) -> None:
+        dpdata.system.LabeledSystem.DTYPES = self.backup
+
+    def test_to_deepmd_raw(self):
+        self.system.to_deepmd_raw("data_foo")
+        foo = np.loadtxt("data_foo/foo.raw")
+        np.testing.assert_allclose(foo.reshape(self.foo.shape), self.foo)
+
+    def test_from_deepmd_raw(self):
+        self.system.to_deepmd_raw("data_foo")
+        x = dpdata.LabeledSystem("data_foo", fmt="deepmd/raw")
+        np.testing.assert_allclose(x.data["foo"], self.foo)
+
+    def test_to_deepmd_npy(self):
+        self.system.to_deepmd_npy("data_foo")
+        foo = np.load("data_foo/set.000/foo.npy")
+        np.testing.assert_allclose(foo.reshape(self.foo.shape), self.foo)
+
+    def test_from_deepmd_npy(self):
+        self.system.to_deepmd_npy("data_foo")
+        x = dpdata.LabeledSystem("data_foo", fmt="deepmd/npy")
+        np.testing.assert_allclose(x.data["foo"], self.foo)
+
+    def test_to_deepmd_hdf5(self):
+        self.system.to_deepmd_hdf5("data_foo.h5")
+        with h5py.File("data_foo.h5") as f:
+            foo = f["set.000/foo.npy"][:]
+        np.testing.assert_allclose(foo.reshape(self.foo.shape), self.foo)
+
+    def test_from_deepmd_hdf5(self):
+        self.system.to_deepmd_hdf5("data_foo.h5")
+        x = dpdata.LabeledSystem("data_foo.h5", fmt="deepmd/hdf5")
+        np.testing.assert_allclose(x.data["foo"], self.foo)

From 7b67af009b37e9496384ca751b20707128a1b2ba Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 24 May 2023 11:04:35 +0800
Subject: [PATCH 08/40] [pre-commit.ci] pre-commit autoupdate (#474)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

<!--pre-commit.ci start-->
updates:
- [github.com/charliermarsh/ruff-pre-commit: v0.0.267 →
v0.0.269](https://github.com/charliermarsh/ruff-pre-commit/compare/v0.0.267...v0.0.269)
<!--pre-commit.ci end-->

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 380c3433..aeb69a23 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -23,7 +23,7 @@ repos:
     -   id: black-jupyter
 -   repo: https://github.com/charliermarsh/ruff-pre-commit
     # Ruff version.
-    rev: v0.0.267
+    rev: v0.0.269
     hooks:
     - id: ruff
       args: ["--fix"]

From 1b9daf4cd242c457258f3845181a514ea99ad9e5 Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Thu, 25 May 2023 11:59:55 +0800
Subject: [PATCH 09/40] fix `set_size` in  `to_deepmd_npy_mixed` func (#477)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 dpdata/plugins/deepmd.py   |   8 ++-
 tests/test_deepmd_mixed.py | 101 +++++++++++++++++++++++++++++++++++++
 2 files changed, 107 insertions(+), 2 deletions(-)

diff --git a/dpdata/plugins/deepmd.py b/dpdata/plugins/deepmd.py
index 51fd4667..26cdc382 100644
--- a/dpdata/plugins/deepmd.py
+++ b/dpdata/plugins/deepmd.py
@@ -95,7 +95,9 @@ def from_system_mix(self, file_name, type_map=None, **kwargs):
             file_name, type_map=type_map, labels=False
         )
 
-    def to_system(self, data, file_name, prec=np.float64, **kwargs):
+    def to_system(
+        self, data, file_name, set_size: int = 2000, prec=np.float64, **kwargs
+    ):
         """Dump the system in deepmd mixed type format (numpy binary) to `folder`.
 
         The frames were already split to different systems, so these frames can be dumped to one single subfolders
@@ -107,12 +109,14 @@ def to_system(self, data, file_name, prec=np.float64, **kwargs):
             System data
         file_name : str
             The output folder
+        set_size : int, default=2000
+            set size
         prec : {numpy.float32, numpy.float64}
             The floating point precision of the compressed data
         **kwargs : dict
             other parameters
         """
-        dpdata.deepmd.mixed.dump(file_name, data, comp_prec=prec)
+        dpdata.deepmd.mixed.dump(file_name, data, set_size=set_size, comp_prec=prec)
 
     def from_labeled_system_mix(self, file_name, type_map=None, **kwargs):
         return dpdata.deepmd.mixed.to_system_data(
diff --git a/tests/test_deepmd_mixed.py b/tests/test_deepmd_mixed.py
index 6311410e..7e522e06 100644
--- a/tests/test_deepmd_mixed.py
+++ b/tests/test_deepmd_mixed.py
@@ -109,6 +109,107 @@ def test_str(self):
         )
 
 
+class TestMixedMultiSystemsDumpLoadSetSize(
+    unittest.TestCase, CompLabeledSys, MultiSystems, IsNoPBC
+):
+    def setUp(self):
+        self.places = 6
+        self.e_places = 6
+        self.f_places = 6
+        self.v_places = 6
+
+        # C1H4
+        system_1 = dpdata.LabeledSystem(
+            "gaussian/methane.gaussianlog", fmt="gaussian/log"
+        )
+
+        # C1H3
+        system_2 = dpdata.LabeledSystem(
+            "gaussian/methane_sub.gaussianlog", fmt="gaussian/log"
+        )
+
+        tmp_data = system_1.data.copy()
+        tmp_data["atom_numbs"] = [1, 1, 1, 2]
+        tmp_data["atom_names"] = ["C", "H", "A", "B"]
+        tmp_data["atom_types"] = np.array([0, 1, 2, 3, 3])
+        # C1H1A1B2
+        system_1_modified_type_1 = dpdata.LabeledSystem(data=tmp_data)
+
+        tmp_data = system_1.data.copy()
+        tmp_data["atom_numbs"] = [1, 1, 2, 1]
+        tmp_data["atom_names"] = ["C", "H", "A", "B"]
+        tmp_data["atom_types"] = np.array([0, 1, 2, 2, 3])
+        # C1H1A2B1
+        system_1_modified_type_2 = dpdata.LabeledSystem(data=tmp_data)
+
+        tmp_data = system_1.data.copy()
+        tmp_data["atom_numbs"] = [1, 1, 1, 2]
+        tmp_data["atom_names"] = ["C", "H", "A", "D"]
+        tmp_data["atom_types"] = np.array([0, 1, 2, 3, 3])
+        # C1H1A1C2
+        system_1_modified_type_3 = dpdata.LabeledSystem(data=tmp_data)
+
+        self.ms = dpdata.MultiSystems(
+            system_1,
+            system_2,
+            system_1_modified_type_1,
+            system_1_modified_type_2,
+            system_1_modified_type_3,
+        )
+        self.ms.to_deepmd_npy_mixed("tmp.deepmd.mixed", set_size=1)
+        self.place_holder_ms = dpdata.MultiSystems()
+        self.place_holder_ms.from_deepmd_npy("tmp.deepmd.mixed", fmt="deepmd/npy")
+        self.systems = dpdata.MultiSystems()
+        self.systems.from_deepmd_npy_mixed("tmp.deepmd.mixed", fmt="deepmd/npy/mixed")
+        self.system_1 = self.ms["C1H4A0B0D0"]
+        self.system_2 = self.systems["C1H4A0B0D0"]
+        mixed_sets = glob("tmp.deepmd.mixed/*/set.*")
+        self.assertEqual(len(mixed_sets), 5)
+        for i in mixed_sets:
+            self.assertEqual(
+                os.path.exists(os.path.join(i, "real_atom_types.npy")), True
+            )
+
+        self.system_names = [
+            "C1H4A0B0D0",
+            "C1H3A0B0D0",
+            "C1H1A1B2D0",
+            "C1H1A2B1D0",
+            "C1H1A1B0D2",
+        ]
+        self.system_sizes = {
+            "C1H4A0B0D0": 1,
+            "C1H3A0B0D0": 1,
+            "C1H1A1B2D0": 1,
+            "C1H1A2B1D0": 1,
+            "C1H1A1B0D2": 1,
+        }
+        self.atom_names = ["C", "H", "A", "B", "D"]
+
+    def tearDown(self):
+        if os.path.exists("tmp.deepmd.mixed"):
+            shutil.rmtree("tmp.deepmd.mixed")
+
+    def test_len(self):
+        self.assertEqual(len(self.ms), 5)
+        self.assertEqual(len(self.place_holder_ms), 2)
+        self.assertEqual(len(self.systems), 5)
+
+    def test_get_nframes(self):
+        self.assertEqual(self.ms.get_nframes(), 5)
+        self.assertEqual(self.place_holder_ms.get_nframes(), 5)
+        self.assertEqual(self.systems.get_nframes(), 5)
+
+    def test_str(self):
+        self.assertEqual(str(self.ms), "MultiSystems (5 systems containing 5 frames)")
+        self.assertEqual(
+            str(self.place_holder_ms), "MultiSystems (2 systems containing 5 frames)"
+        )
+        self.assertEqual(
+            str(self.systems), "MultiSystems (5 systems containing 5 frames)"
+        )
+
+
 class TestMixedMultiSystemsTypeChange(
     unittest.TestCase, CompLabeledSys, MultiSystems, IsNoPBC
 ):

From ded4354b66e27e19de9a2783d2c546c7231af7a0 Mon Sep 17 00:00:00 2001
From: Wenxin Zhang <115126706+starinthesky72@users.noreply.github.com>
Date: Mon, 29 May 2023 11:59:25 +0800
Subject: [PATCH 10/40] fix: writing abacus stru. (#478)

When using system.to() and convert data to write ABACUS `STRU` file,
func `make_unlabeled_stru` won't use `atom_types` in data and miswriting
the atom order.
For example when using data from gaussian/log and other file using
ungrouped atom coordinations,as the `atom_types=[0,1,1,1,2,1]` the func
will write wrong coordinations and regard the atom as
`atom_types=[0,1,1,1,1,2]` which cause serious problem.
---
 dpdata/abacus/scf.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/dpdata/abacus/scf.py b/dpdata/abacus/scf.py
index 1816913e..d6f83e72 100644
--- a/dpdata/abacus/scf.py
+++ b/dpdata/abacus/scf.py
@@ -327,10 +327,11 @@ def make_unlabeled_stru(
         out += "0.0\n"
         out += str(data["atom_numbs"][iele]) + "\n"
         for iatom in range(data["atom_numbs"][iele]):
+            iatomtype = np.nonzero(data["atom_types"] == iele)[0][iatom]
             out += "%.12f %.12f %.12f %d %d %d\n" % (
-                data["coords"][frame_idx][natom_tot, 0],
-                data["coords"][frame_idx][natom_tot, 1],
-                data["coords"][frame_idx][natom_tot, 2],
+                data["coords"][frame_idx][iatomtype, 0],
+                data["coords"][frame_idx][iatomtype, 1],
+                data["coords"][frame_idx][iatomtype, 2],
                 1,
                 1,
                 1,

From 5bb84c9123b618d179ef4c3406bc44ace57863ea Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 30 May 2023 20:17:58 +0800
Subject: [PATCH 11/40] [pre-commit.ci] pre-commit autoupdate (#479)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

<!--pre-commit.ci start-->
updates:
- [github.com/charliermarsh/ruff-pre-commit: v0.0.269 →
v0.0.270](https://github.com/charliermarsh/ruff-pre-commit/compare/v0.0.269...v0.0.270)
<!--pre-commit.ci end-->

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index aeb69a23..7104e3a1 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -23,7 +23,7 @@ repos:
     -   id: black-jupyter
 -   repo: https://github.com/charliermarsh/ruff-pre-commit
     # Ruff version.
-    rev: v0.0.269
+    rev: v0.0.270
     hooks:
     - id: ruff
       args: ["--fix"]

From f83c5b0106d98bc872ca2af7584b576673f08cf0 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Thu, 8 Jun 2023 01:18:11 -0400
Subject: [PATCH 12/40] fix customized data shape in deepmd/hdf5 (#481)

Follow up #476. `deepmd/npy` and `deepmd/raw` are ok, and only
`deepmd/hdf5` has this bug.
---
 dpdata/deepmd/hdf5.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/dpdata/deepmd/hdf5.py b/dpdata/deepmd/hdf5.py
index b4ae1a3c..f2ee9c8a 100644
--- a/dpdata/deepmd/hdf5.py
+++ b/dpdata/deepmd/hdf5.py
@@ -118,11 +118,14 @@ def to_system_data(
                 f"Shape of {dtype.name} is not (nframes, ...), but {dtype.shape}. This type of data will not converted from deepmd/hdf5 format."
             )
             continue
+        shape = [
+            natoms if xx == dpdata.system.Axis.NATOMS else xx for xx in dtype.shape[1:]
+        ]
 
         data_types[dtype.name] = {
             "fn": dtype.name,
             "labeled": True,
-            "shape": dtype.shape[1:],
+            "shape": shape,
             "required": False,
         }
 

From ec232f8a0c2a897f2a9b776d46eef8a517fe1336 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 13 Jun 2023 22:02:01 +0800
Subject: [PATCH 13/40] [pre-commit.ci] pre-commit autoupdate (#482)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

<!--pre-commit.ci start-->
updates:
- [github.com/charliermarsh/ruff-pre-commit: v0.0.270 →
v0.0.272](https://github.com/charliermarsh/ruff-pre-commit/compare/v0.0.270...v0.0.272)
<!--pre-commit.ci end-->

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 7104e3a1..80fe8636 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -23,7 +23,7 @@ repos:
     -   id: black-jupyter
 -   repo: https://github.com/charliermarsh/ruff-pre-commit
     # Ruff version.
-    rev: v0.0.270
+    rev: v0.0.272
     hooks:
     - id: ruff
       args: ["--fix"]

From c333c192dae729cabb845d842aeb347e1481da76 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Thu, 22 Jun 2023 09:06:05 -0400
Subject: [PATCH 14/40] Use stable sorting algorithms (#487)

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 dpdata/cp2k/output.py               |   2 +-
 dpdata/lammps/dump.py               |   4 +-
 dpdata/md/rdf.py                    |   2 +-
 dpdata/qe/scf.py                    |   2 +-
 dpdata/system.py                    |   4 +-
 dpdata/utils.py                     |   8 +-
 tests/poscars/POSCAR.P42nmc.replace | 192 ++++++++++++++--------------
 7 files changed, 109 insertions(+), 105 deletions(-)

diff --git a/dpdata/cp2k/output.py b/dpdata/cp2k/output.py
index 230c3def..ed3ab5af 100644
--- a/dpdata/cp2k/output.py
+++ b/dpdata/cp2k/output.py
@@ -494,7 +494,7 @@ def get_frames(fname):
     atom_types = []
     atom_numbs = []
     # preserve the atom_name order
-    atom_names = atom_symbol_list[np.sort(symbol_idx)]
+    atom_names = atom_symbol_list[np.sort(symbol_idx, kind="stable")]
     for jj in atom_symbol_list:
         for idx, ii in enumerate(atom_names):
             if jj == ii:
diff --git a/dpdata/lammps/dump.py b/dpdata/lammps/dump.py
index cdc28bdc..017f75a3 100644
--- a/dpdata/lammps/dump.py
+++ b/dpdata/lammps/dump.py
@@ -215,7 +215,9 @@ def system_data(lines, type_map=None, type_idx_zero=True, unwrap=False):
         system["cells"].append(cell)
         atype = get_atype(array_lines[ii], type_idx_zero=type_idx_zero)
         # map atom type; a[as[a][as[as[b]]]] = b[as[b][as^{-1}[b]]] = b[id]
-        idx = np.argsort(atype)[np.argsort(np.argsort(system["atom_types"]))]
+        idx = np.argsort(atype, kind="stable")[
+            np.argsort(np.argsort(system["atom_types"], kind="stable"), kind="stable")
+        ]
         system["coords"].append(
             safe_get_posi(array_lines[ii], cell, np.array(orig), unwrap)[idx]
         )
diff --git a/dpdata/md/rdf.py b/dpdata/md/rdf.py
index 219543eb..d5409a0c 100644
--- a/dpdata/md/rdf.py
+++ b/dpdata/md/rdf.py
@@ -57,7 +57,7 @@ def compute_rdf(box, posis, atype, sel_type=[None, None], max_r=5, nbins=100):
 
 
 def _compute_rdf_1frame(box, posis, atype, sel_type=[None, None], max_r=5, nbins=100):
-    all_types = list(set(list(np.sort(atype))))
+    all_types = list(set(list(np.sort(atype, kind="stable"))))
     if sel_type[0] is None:
         sel_type[0] = all_types
     if sel_type[1] is None:
diff --git a/dpdata/qe/scf.py b/dpdata/qe/scf.py
index 659b1845..4457572d 100755
--- a/dpdata/qe/scf.py
+++ b/dpdata/qe/scf.py
@@ -87,7 +87,7 @@ def get_coords(lines, cell):
     atom_types = []
     atom_numbs = []
     # preserve the atom_name order
-    atom_names = atom_symbol_list[np.sort(symbol_idx)]
+    atom_names = atom_symbol_list[np.sort(symbol_idx, kind="stable")]
     for jj in atom_symbol_list:
         for idx, ii in enumerate(atom_names):
             if jj == ii:
diff --git a/dpdata/system.py b/dpdata/system.py
index 743f3d05..e33ce47e 100644
--- a/dpdata/system.py
+++ b/dpdata/system.py
@@ -624,7 +624,7 @@ def sort_atom_types(self) -> np.ndarray:
         idx : np.ndarray
             new atom index in the Axis.NATOMS
         """
-        idx = np.argsort(self.data["atom_types"])
+        idx = np.argsort(self.data["atom_types"], kind="stable")
         for tt in self.DTYPES:
             if tt.name not in self.data:
                 # skip optional data
@@ -767,7 +767,7 @@ def replicate(self, ncopy):
             np.array(np.copy(data["atom_numbs"])) * np.prod(ncopy)
         )
         tmp.data["atom_types"] = np.sort(
-            np.tile(np.copy(data["atom_types"]), np.prod(ncopy))
+            np.tile(np.copy(data["atom_types"]), np.prod(ncopy)), kind="stable"
         )
         tmp.data["cells"] = np.copy(data["cells"])
         for ii in range(3):
diff --git a/dpdata/utils.py b/dpdata/utils.py
index 55cf41ef..da726179 100644
--- a/dpdata/utils.py
+++ b/dpdata/utils.py
@@ -63,14 +63,16 @@ def sort_atom_names(data, type_map=None):
         # a[as[a]] == b[as[b]]  as == argsort
         # as[as[b]] == as^{-1}[b]
         # a[as[a][as[as[b]]]] = b[as[b][as^{-1}[b]]] = b[id]
-        idx = np.argsort(data["atom_names"])[np.argsort(np.argsort(type_map))]
+        idx = np.argsort(data["atom_names"], kind="stable")[
+            np.argsort(np.argsort(type_map, kind="stable"), kind="stable")
+        ]
     else:
         # index that will sort an array by alphabetical order
-        idx = np.argsort(data["atom_names"])
+        idx = np.argsort(data["atom_names"], kind="stable")
     # sort atom_names, atom_numbs, atom_types by idx
     data["atom_names"] = list(np.array(data["atom_names"])[idx])
     data["atom_numbs"] = list(np.array(data["atom_numbs"])[idx])
-    data["atom_types"] = np.argsort(idx)[data["atom_types"]]
+    data["atom_types"] = np.argsort(idx, kind="stable")[data["atom_types"]]
     return data
 
 
diff --git a/tests/poscars/POSCAR.P42nmc.replace b/tests/poscars/POSCAR.P42nmc.replace
index e785d128..5dd05d84 100644
--- a/tests/poscars/POSCAR.P42nmc.replace
+++ b/tests/poscars/POSCAR.P42nmc.replace
@@ -6,99 +6,99 @@ Hf24 O64 Zr8
 Hf O Zr 
 24 64 8 
 Cartesian
-   0.0000000000    0.0000000000    0.0000000000
-   7.5009600000    7.5009600000    5.0819200000
-   7.5009600000    7.5009600000    0.0000000000
-   7.5009600000    2.5003200000   10.1638400000
-   2.5003200000    7.5009600000    5.0819200000
-   2.5003200000    7.5009600000   10.1638400000
-   2.5003200000    2.5003200000    5.0819200000
-   2.5003200000    2.5003200000   10.1638400000
-   5.0006400000    2.5003200000    7.6228800000
-  10.0012800000    7.5009600000    2.5409600000
-   0.0000000000    2.5003200000    7.6228800000
-   0.0000000000    2.5003200000    2.5409600000
-   7.5009600000    5.0006400000    7.6228800000
-   7.5009600000    5.0006400000    2.5409600000
-  10.0012800000    7.5009600000    7.6228800000
-   7.5009600000    0.0000000000    2.5409600000
-   0.0000000000    0.0000000000    5.0819200000
-   0.0000000000    5.0006400000    5.0819200000
-   7.5009600000    0.0000000000    7.6228800000
-   5.0006400000    0.0000000000    5.0819200000
-   2.5003200000    0.0000000000    2.5409600000
-   5.0006400000    0.0000000000   10.1638400000
-   2.5003200000    0.0000000000    7.6228800000
-   2.5003200000    5.0006400000    7.6228800000
-   1.2501600000    8.7511200000    1.0558196992
-   1.2501600000    8.7511200000    3.5967796992
-   1.2501600000    3.7504800000    8.6786996992
-   1.2501600000    3.7504800000    3.5967796992
-   6.2508000000    8.7511200000    6.1377396992
-   6.2508000000    8.7511200000    1.0558196992
-   6.2508000000    3.7504800000    6.1377396992
-   8.7511200000    1.2501600000    8.6786996992
-   6.2508000000    3.7504800000    1.0558196992
-   1.2501600000    3.7504800000    1.0558196992
-   1.2501600000    8.7511200000    8.6786996992
-   8.7511200000    6.2508000000    3.5967796992
-   8.7511200000    6.2508000000    8.6786996992
-   1.2501600000    8.7511200000    6.1377396992
-   1.2501600000    3.7504800000    6.1377396992
-   6.2508000000    3.7504800000    3.5967796992
-   6.2508000000    6.2508000000    1.4851403008
-   6.2508000000    8.7511200000    3.5967796992
-   6.2508000000    1.2501600000    9.1080203008
-   6.2508000000    1.2501600000    4.0261003008
-   1.2501600000    6.2508000000    9.1080203008
-   1.2501600000    6.2508000000    4.0261003008
-   1.2501600000    1.2501600000    9.1080203008
-   1.2501600000    1.2501600000    4.0261003008
-   6.2508000000    6.2508000000    6.5670603008
-   8.7511200000    1.2501600000    3.5967796992
-   6.2508000000    1.2501600000    6.5670603008
-   6.2508000000    1.2501600000    1.4851403008
-   1.2501600000    6.2508000000    6.5670603008
-   1.2501600000    6.2508000000    1.4851403008
-   1.2501600000    1.2501600000    6.5670603008
-   1.2501600000    1.2501600000    1.4851403008
-   6.2508000000    8.7511200000    8.6786996992
-   6.2508000000    3.7504800000    8.6786996992
-   3.7504800000    6.2508000000    8.6786996992
-   8.7511200000    8.7511200000    9.1080203008
-   3.7504800000    1.2501600000    8.6786996992
-   3.7504800000    3.7504800000    1.4851403008
-   3.7504800000    3.7504800000    6.5670603008
-   3.7504800000    8.7511200000    1.4851403008
-   3.7504800000    8.7511200000    6.5670603008
-   8.7511200000    3.7504800000    1.4851403008
-   8.7511200000    3.7504800000    6.5670603008
-   8.7511200000    8.7511200000    1.4851403008
-   8.7511200000    8.7511200000    6.5670603008
-   3.7504800000    6.2508000000    3.5967796992
-   3.7504800000    3.7504800000    9.1080203008
-   3.7504800000    8.7511200000    4.0261003008
-   3.7504800000    8.7511200000    9.1080203008
-   3.7504800000    3.7504800000    4.0261003008
-   8.7511200000    3.7504800000    9.1080203008
-   3.7504800000    1.2501600000    3.5967796992
-   8.7511200000    3.7504800000    4.0261003008
-   8.7511200000    6.2508000000    1.0558196992
-   8.7511200000    1.2501600000    6.1377396992
-   8.7511200000    1.2501600000    1.0558196992
-   8.7511200000    6.2508000000    6.1377396992
-   3.7504800000    6.2508000000    1.0558196992
-   3.7504800000    1.2501600000    6.1377396992
-   3.7504800000    1.2501600000    1.0558196992
-   6.2508000000    6.2508000000    4.0261003008
-   8.7511200000    8.7511200000    4.0261003008
-   3.7504800000    6.2508000000    6.1377396992
-   6.2508000000    6.2508000000    9.1080203008
-   7.5009600000    2.5003200000    5.0819200000
-   5.0006400000    7.5009600000    7.6228800000
-   5.0006400000    7.5009600000    2.5409600000
-   5.0006400000    2.5003200000    2.5409600000
-   2.5003200000    5.0006400000    2.5409600000
-   5.0006400000    5.0006400000    5.0819200000
-   5.0006400000    5.0006400000   10.1638400000
-   0.0000000000    5.0006400000   10.1638400000
+0.000000 0.000000 0.000000
+0.000000 0.000000 5.081920
+0.000000 5.000640 5.081920
+5.000640 0.000000 10.163840
+5.000640 0.000000 5.081920
+2.500320 0.000000 2.540960
+2.500320 0.000000 7.622880
+2.500320 5.000640 7.622880
+7.500960 0.000000 2.540960
+7.500960 0.000000 7.622880
+7.500960 5.000640 2.540960
+7.500960 5.000640 7.622880
+0.000000 2.500320 2.540960
+0.000000 2.500320 7.622880
+10.001280 7.500960 2.540960
+10.001280 7.500960 7.622880
+5.000640 2.500320 7.622880
+2.500320 2.500320 10.163840
+2.500320 2.500320 5.081920
+2.500320 7.500960 10.163840
+2.500320 7.500960 5.081920
+7.500960 2.500320 10.163840
+7.500960 7.500960 0.000000
+7.500960 7.500960 5.081920
+3.750480 3.750480 1.485140
+3.750480 3.750480 6.567060
+3.750480 8.751120 1.485140
+3.750480 8.751120 6.567060
+8.751120 3.750480 1.485140
+8.751120 3.750480 6.567060
+8.751120 8.751120 1.485140
+8.751120 8.751120 6.567060
+3.750480 3.750480 4.026100
+3.750480 3.750480 9.108020
+3.750480 8.751120 4.026100
+3.750480 8.751120 9.108020
+8.751120 3.750480 4.026100
+8.751120 3.750480 9.108020
+8.751120 8.751120 4.026100
+8.751120 8.751120 9.108020
+3.750480 1.250160 1.055820
+3.750480 1.250160 6.137740
+3.750480 6.250800 1.055820
+3.750480 6.250800 6.137740
+8.751120 1.250160 1.055820
+8.751120 1.250160 6.137740
+8.751120 6.250800 1.055820
+8.751120 6.250800 6.137740
+3.750480 1.250160 3.596780
+3.750480 1.250160 8.678700
+3.750480 6.250800 3.596780
+3.750480 6.250800 8.678700
+8.751120 1.250160 3.596780
+8.751120 1.250160 8.678700
+8.751120 6.250800 3.596780
+8.751120 6.250800 8.678700
+1.250160 3.750480 1.055820
+1.250160 3.750480 6.137740
+1.250160 8.751120 1.055820
+1.250160 8.751120 6.137740
+6.250800 3.750480 1.055820
+6.250800 3.750480 6.137740
+6.250800 8.751120 1.055820
+6.250800 8.751120 6.137740
+1.250160 3.750480 3.596780
+1.250160 3.750480 8.678700
+1.250160 8.751120 3.596780
+1.250160 8.751120 8.678700
+6.250800 3.750480 3.596780
+6.250800 3.750480 8.678700
+6.250800 8.751120 3.596780
+6.250800 8.751120 8.678700
+1.250160 1.250160 1.485140
+1.250160 1.250160 6.567060
+1.250160 6.250800 1.485140
+1.250160 6.250800 6.567060
+6.250800 1.250160 1.485140
+6.250800 1.250160 6.567060
+6.250800 6.250800 1.485140
+6.250800 6.250800 6.567060
+1.250160 1.250160 4.026100
+1.250160 1.250160 9.108020
+1.250160 6.250800 4.026100
+1.250160 6.250800 9.108020
+6.250800 1.250160 4.026100
+6.250800 1.250160 9.108020
+6.250800 6.250800 4.026100
+6.250800 6.250800 9.108020
+0.000000 5.000640 10.163840
+5.000640 5.000640 10.163840
+5.000640 5.000640 5.081920
+2.500320 5.000640 2.540960
+5.000640 2.500320 2.540960
+5.000640 7.500960 2.540960
+5.000640 7.500960 7.622880
+7.500960 2.500320 5.081920

From 3931dfcc5715ff51dca4415fc240ff90e45bbde4 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Thu, 22 Jun 2023 09:06:59 -0400
Subject: [PATCH 15/40] avoid modifying appended system (#483)

When `sys1.append(sys2)`, we do not hope that `sys2` to be modified in
any way.

---------

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 dpdata/system.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/dpdata/system.py b/dpdata/system.py
index e33ce47e..57308eb1 100644
--- a/dpdata/system.py
+++ b/dpdata/system.py
@@ -521,17 +521,21 @@ def append(self, system):
             return False
         elif not len(self.data["atom_numbs"]):
             # this system is non-converged but the system to append is converged
-            self.data = system.data
+            self.data = system.data.copy()
             return False
         if system.uniq_formula != self.uniq_formula:
             raise RuntimeError(
                 f"systems with inconsistent formula could not be append: {self.uniq_formula} v.s. {system.uniq_formula}"
             )
         if system.data["atom_names"] != self.data["atom_names"]:
+            # prevent original system to be modified
+            system = system.copy()
             # allow to append a system with different atom_names order
             system.sort_atom_names()
             self.sort_atom_names()
         if (system.data["atom_types"] != self.data["atom_types"]).any():
+            # prevent original system to be modified
+            system = system.copy()
             # allow to append a system with different atom_types order
             system.sort_atom_types()
             self.sort_atom_types()
@@ -1440,6 +1444,8 @@ def append(self, *systems):
     def __append(self, system):
         if not system.formula:
             return
+        # prevent changing the original system
+        system = system.copy()
         self.check_atom_names(system)
         formula = system.formula
         if formula in self.systems:

From c6d9611edba03fafe5f44268054f0453be297600 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 22 Jun 2023 13:09:59 +0000
Subject: [PATCH 16/40] [pre-commit.ci] pre-commit autoupdate (#485)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

<!--pre-commit.ci start-->
updates:
- [github.com/asottile/blacken-docs: 1.13.0 →
1.14.0](https://github.com/asottile/blacken-docs/compare/1.13.0...1.14.0)
<!--pre-commit.ci end-->

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 80fe8636..a24f7288 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -35,7 +35,7 @@ repos:
       args: ["--write"]
 # Python inside docs
 -   repo: https://github.com/asottile/blacken-docs
-    rev: 1.13.0
+    rev: 1.14.0
     hooks:
     -   id: blacken-docs
 ci:

From 5242c1b79f78ac446224314d130824971cdb4b0a Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 28 Jun 2023 09:16:11 +0800
Subject: [PATCH 17/40] [pre-commit.ci] pre-commit autoupdate (#488)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

<!--pre-commit.ci start-->
updates:
- [github.com/charliermarsh/ruff-pre-commit: v0.0.272 →
v0.0.275](https://github.com/charliermarsh/ruff-pre-commit/compare/v0.0.272...v0.0.275)
<!--pre-commit.ci end-->

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index a24f7288..e58280cb 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -23,7 +23,7 @@ repos:
     -   id: black-jupyter
 -   repo: https://github.com/charliermarsh/ruff-pre-commit
     # Ruff version.
-    rev: v0.0.272
+    rev: v0.0.275
     hooks:
     - id: ruff
       args: ["--fix"]

From ee6f38b1c44d9b45ed31fc92378928cc6e879467 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Tue, 27 Jun 2023 21:18:50 -0400
Subject: [PATCH 18/40] docs: add badges to README (#490)

---
 README.md | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/README.md b/README.md
index cfa611d9..828223e5 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,9 @@
+# dpdata
+
+[![conda-forge](https://img.shields.io/conda/dn/conda-forge/dpdata?color=red&label=conda-forge&logo=conda-forge)](https://anaconda.org/conda-forge/dpdata)
+[![pip install](https://img.shields.io/pypi/dm/dpdata?label=pip%20install&logo=pypi)](https://pypi.org/project/dpdata)
+[![Documentation Status](https://readthedocs.org/projects/dpdata/badge/)](https://dpdata.readthedocs.io/)
+
 **dpdata** is a python package for manipulating data formats of software in computational science, including DeePMD-kit, VASP, LAMMPS, GROMACS, Gaussian.
 dpdata only works with python 3.7 or above.
 

From f1b269a1d6a83cb6577dcecdd336b97abea7bef2 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 5 Jul 2023 09:12:17 +0800
Subject: [PATCH 19/40] [pre-commit.ci] pre-commit autoupdate (#493)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

<!--pre-commit.ci start-->
updates:
- https://github.com/charliermarsh/ruff-pre-commit →
https://github.com/astral-sh/ruff-pre-commit
- [github.com/astral-sh/ruff-pre-commit: v0.0.275 →
v0.0.276](https://github.com/astral-sh/ruff-pre-commit/compare/v0.0.275...v0.0.276)
<!--pre-commit.ci end-->

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .pre-commit-config.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index e58280cb..54f6231d 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -21,9 +21,9 @@ repos:
     rev: 23.3.0
     hooks:
     -   id: black-jupyter
--   repo: https://github.com/charliermarsh/ruff-pre-commit
+-   repo: https://github.com/astral-sh/ruff-pre-commit
     # Ruff version.
-    rev: v0.0.275
+    rev: v0.0.276
     hooks:
     - id: ruff
       args: ["--fix"]

From 119fb45cd3a9d4c94d7d43f978b437a04ff89c69 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 11 Jul 2023 21:59:15 +0800
Subject: [PATCH 20/40] [pre-commit.ci] pre-commit autoupdate (#494)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

<!--pre-commit.ci start-->
updates:
- [github.com/psf/black: 23.3.0 →
23.7.0](https://github.com/psf/black/compare/23.3.0...23.7.0)
- [github.com/astral-sh/ruff-pre-commit: v0.0.276 →
v0.0.277](https://github.com/astral-sh/ruff-pre-commit/compare/v0.0.276...v0.0.277)
- [github.com/asottile/blacken-docs: 1.14.0 →
1.15.0](https://github.com/asottile/blacken-docs/compare/1.14.0...1.15.0)
<!--pre-commit.ci end-->

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .pre-commit-config.yaml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 54f6231d..30d14c22 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -18,12 +18,12 @@ repos:
     -   id: check-toml
 # Python
 -   repo: https://github.com/psf/black
-    rev: 23.3.0
+    rev: 23.7.0
     hooks:
     -   id: black-jupyter
 -   repo: https://github.com/astral-sh/ruff-pre-commit
     # Ruff version.
-    rev: v0.0.276
+    rev: v0.0.277
     hooks:
     - id: ruff
       args: ["--fix"]
@@ -35,7 +35,7 @@ repos:
       args: ["--write"]
 # Python inside docs
 -   repo: https://github.com/asottile/blacken-docs
-    rev: 1.14.0
+    rev: 1.15.0
     hooks:
     -   id: blacken-docs
 ci:

From 7bb639255e82b9ed7002d4529cec4b8e6ce2ee0b Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Thu, 20 Jul 2023 02:17:26 -0400
Subject: [PATCH 21/40] pin pymatgen to 2023.7.11 (#501)

Revert after https://github.com/materialsproject/pymatgen/issues/3166 is
fixed.
---
 .github/workflows/test.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 9e2cbf60..0ab9509f 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -22,7 +22,7 @@ jobs:
     - name: Install rdkit
       run: python -m pip install rdkit openbabel-wheel
     - name: Install dependencies
-      run: python -m pip install .[amber,ase,pymatgen] coverage
+      run: python -m pip install .[amber,ase,pymatgen] coverage "pymatgen==2023.7.11;python_version>='3.8'"
     - name: Test
       run: cd tests && coverage run --source=../dpdata -m unittest && cd .. && coverage combine tests/.coverage && coverage report
     - name: Run codecov

From a091a596637b359002994495bfc60fe7b76eaf2c Mon Sep 17 00:00:00 2001
From: Yu Liu <77716030+YuLiu98@users.noreply.github.com>
Date: Thu, 20 Jul 2023 15:14:45 +0800
Subject: [PATCH 22/40] Fix: bug in reading abacus md files (#496)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Han Wang <92130845+wanghan-iapcm@users.noreply.github.com>
---
 dpdata/abacus/md.py | 25 ++++++++++++++++++-------
 1 file changed, 18 insertions(+), 7 deletions(-)

diff --git a/dpdata/abacus/md.py b/dpdata/abacus/md.py
index 7726cafe..8cda5960 100644
--- a/dpdata/abacus/md.py
+++ b/dpdata/abacus/md.py
@@ -43,13 +43,20 @@ def get_coord_dump_freq(inlines):
 def get_coords_from_dump(dumplines, natoms):
     nlines = len(dumplines)
     total_natoms = sum(natoms)
+    # The output of VIRIAL, FORCE, and VELOCITY are controlled by INPUT parameters dump_virial, dump_force, and dump_vel, respectively.
+    # So the search of keywords can determine whether these datas are printed into MD_dump.
     calc_stress = False
+    calc_force = False
+    check_line = 6
     if "VIRIAL" in dumplines[6]:
         calc_stress = True
-    else:
-        assert (
-            "POSITIONS" in dumplines[6] and "FORCE" in dumplines[6]
-        ), "keywords 'POSITIONS' and 'FORCE' cannot be found in the 6th line. Please check."
+        check_line = 10
+    assert (
+        "POSITION" in dumplines[check_line]
+    ), "keywords 'POSITION' cannot be found in the 6th line. Please check."
+    if "FORCE" in dumplines[check_line]:
+        calc_force = True
+
     nframes_dump = -1
     if calc_stress:
         nframes_dump = int(nlines / (total_natoms + 13))
@@ -104,9 +111,13 @@ def get_coords_from_dump(dumplines, natoms):
                 if not newversion:
                     coords[iframe, iat] *= celldm
 
-                forces[iframe, iat] = np.array(
-                    [float(i) for i in dumplines[iline + skipline + iat].split()[5:8]]
-                )
+                if calc_force:
+                    forces[iframe, iat] = np.array(
+                        [
+                            float(i)
+                            for i in dumplines[iline + skipline + iat].split()[5:8]
+                        ]
+                    )
             iframe += 1
     assert iframe == nframes_dump, (
         "iframe=%d, nframe_dump=%d. Number of frames does not match number of lines in MD_dump."

From be669c87c54cfd159f1194b0043a34ac1fd767d7 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 20 Jul 2023 15:14:57 +0800
Subject: [PATCH 23/40] [pre-commit.ci] pre-commit autoupdate (#497)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

<!--pre-commit.ci start-->
updates:
- [github.com/astral-sh/ruff-pre-commit: v0.0.277 →
v0.0.278](https://github.com/astral-sh/ruff-pre-commit/compare/v0.0.277...v0.0.278)
<!--pre-commit.ci end-->

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Han Wang <92130845+wanghan-iapcm@users.noreply.github.com>
---
 .pre-commit-config.yaml    |  2 +-
 dpdata/cp2k/output.py      |  4 +---
 dpdata/system.py           | 12 ++----------
 dpdata/xyz/quip_gap_xyz.py |  4 +---
 4 files changed, 5 insertions(+), 17 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 30d14c22..cd2658ab 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -23,7 +23,7 @@ repos:
     -   id: black-jupyter
 -   repo: https://github.com/astral-sh/ruff-pre-commit
     # Ruff version.
-    rev: v0.0.277
+    rev: v0.0.278
     hooks:
     - id: ruff
       args: ["--fix"]
diff --git a/dpdata/cp2k/output.py b/dpdata/cp2k/output.py
index ed3ab5af..7b3aeb64 100644
--- a/dpdata/cp2k/output.py
+++ b/dpdata/cp2k/output.py
@@ -327,9 +327,7 @@ def handle_single_xyz_frame(self, lines):
         atom_num = int(lines[0].strip("\n").strip())
         if len(lines) != atom_num + 2:
             raise RuntimeError(
-                "format error, atom_num=={}, {}!=atom_num+2".format(
-                    atom_num, len(lines)
-                )
+                f"format error, atom_num=={atom_num}, {len(lines)}!=atom_num+2"
             )
         data_format_line = lines[1].strip("\n").strip() + " "
         prop_pattern = re.compile(r"(?P<prop>\w+)\s*=\s*(?P<number>.*?)[, ]")
diff --git a/dpdata/system.py b/dpdata/system.py
index 57308eb1..bfec97c6 100644
--- a/dpdata/system.py
+++ b/dpdata/system.py
@@ -797,17 +797,9 @@ def replace(self, initial_atom_type, end_atom_type, replace_num):
                 "Must use method replace() of the instance of class dpdata.System"
             )
         if type(replace_num) is not int:
-            raise ValueError(
-                "replace_num must be a integer. Now is {replace_num}".format(
-                    replace_num=replace_num
-                )
-            )
+            raise ValueError(f"replace_num must be a integer. Now is {replace_num}")
         if replace_num <= 0:
-            raise ValueError(
-                "replace_num must be larger than 0.Now is {replace_num}".format(
-                    replace_num=replace_num
-                )
-            )
+            raise ValueError(f"replace_num must be larger than 0.Now is {replace_num}")
 
         try:
             initial_atom_index = self.data["atom_names"].index(initial_atom_type)
diff --git a/dpdata/xyz/quip_gap_xyz.py b/dpdata/xyz/quip_gap_xyz.py
index c2d4118c..8b3f164b 100644
--- a/dpdata/xyz/quip_gap_xyz.py
+++ b/dpdata/xyz/quip_gap_xyz.py
@@ -47,9 +47,7 @@ def handle_single_xyz_frame(lines):
         atom_num = int(lines[0].strip("\n").strip())
         if len(lines) != atom_num + 2:
             raise RuntimeError(
-                "format error, atom_num=={}, {}!=atom_num+2".format(
-                    atom_num, len(lines)
-                )
+                f"format error, atom_num=={atom_num}, {len(lines)}!=atom_num+2"
             )
         data_format_line = lines[1].strip("\n").strip() + " "
         field_value_pattern = re.compile(

From 7531150717dcb54a66d73ac2d12bea8e8773064d Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Sun, 23 Jul 2023 20:59:15 -0400
Subject: [PATCH 24/40] Revert "pin pymatgen to 2023.7.11" (#504)

Reverts deepmodeling/dpdata#501

Let's see if the error got fixed.
---
 .github/workflows/test.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 0ab9509f..9e2cbf60 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -22,7 +22,7 @@ jobs:
     - name: Install rdkit
       run: python -m pip install rdkit openbabel-wheel
     - name: Install dependencies
-      run: python -m pip install .[amber,ase,pymatgen] coverage "pymatgen==2023.7.11;python_version>='3.8'"
+      run: python -m pip install .[amber,ase,pymatgen] coverage
     - name: Test
       run: cd tests && coverage run --source=../dpdata -m unittest && cd .. && coverage combine tests/.coverage && coverage report
     - name: Run codecov

From b61144f4cd1951194493a78a39514243675e9672 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Mon, 24 Jul 2023 23:35:16 -0400
Subject: [PATCH 25/40] add a public method to register new DataType in a
 plugin (#505)

Move DataType to an independent module, so register it will be eaiser.
No breaking changes are made.

---------

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .github/workflows/test.yml                  |   2 +-
 dpdata/data_type.py                         | 145 ++++++++++++++++++++
 dpdata/system.py                            | 116 ++--------------
 tests/plugin/dpdata_plugin_test/__init__.py |  11 ++
 tests/plugin/pyproject.toml                 |  17 +++
 tests/test_custom_data_type.py              |   8 --
 6 files changed, 182 insertions(+), 117 deletions(-)
 create mode 100644 dpdata/data_type.py
 create mode 100644 tests/plugin/dpdata_plugin_test/__init__.py
 create mode 100644 tests/plugin/pyproject.toml

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 9e2cbf60..765db654 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -22,7 +22,7 @@ jobs:
     - name: Install rdkit
       run: python -m pip install rdkit openbabel-wheel
     - name: Install dependencies
-      run: python -m pip install .[amber,ase,pymatgen] coverage
+      run: python -m pip install .[amber,ase,pymatgen] coverage ./tests/plugin
     - name: Test
       run: cd tests && coverage run --source=../dpdata -m unittest && cd .. && coverage combine tests/.coverage && coverage report
     - name: Run codecov
diff --git a/dpdata/data_type.py b/dpdata/data_type.py
new file mode 100644
index 00000000..c0bd944f
--- /dev/null
+++ b/dpdata/data_type.py
@@ -0,0 +1,145 @@
+from enum import Enum, unique
+from typing import TYPE_CHECKING, Tuple
+
+import numpy as np
+
+from dpdata.plugin import Plugin
+
+if TYPE_CHECKING:
+    from dpdata.system import System
+
+
+@unique
+class Axis(Enum):
+    """Data axis."""
+
+    NFRAMES = "nframes"
+    NATOMS = "natoms"
+    NTYPES = "ntypes"
+    NBONDS = "nbonds"
+
+
+class DataError(Exception):
+    """Data is not correct."""
+
+
+class DataType:
+    """DataType represents a type of data, like coordinates, energies, etc.
+
+    Parameters
+    ----------
+    name : str
+        name of data
+    dtype : type or tuple[type]
+        data type, e.g. np.ndarray
+    shape : tuple[int], optional
+        shape of data. Used when data is list or np.ndarray. Use Axis to
+        represents numbers
+    required : bool, default=True
+        whether this data is required
+    """
+
+    def __init__(
+        self,
+        name: str,
+        dtype: type,
+        shape: Tuple[int, Axis] = None,
+        required: bool = True,
+    ) -> None:
+        self.name = name
+        self.dtype = dtype
+        self.shape = shape
+        self.required = required
+
+    def real_shape(self, system: "System") -> Tuple[int]:
+        """Returns expected real shape of a system."""
+        shape = []
+        for ii in self.shape:
+            if ii is Axis.NFRAMES:
+                shape.append(system.get_nframes())
+            elif ii is Axis.NTYPES:
+                shape.append(system.get_ntypes())
+            elif ii is Axis.NATOMS:
+                shape.append(system.get_natoms())
+            elif ii is Axis.NBONDS:
+                # BondOrderSystem
+                shape.append(system.get_nbonds())
+            elif isinstance(ii, int):
+                shape.append(ii)
+            else:
+                raise RuntimeError("Shape is not an int!")
+        return tuple(shape)
+
+    def check(self, system: "System"):
+        """Check if a system has correct data of this type.
+
+        Parameters
+        ----------
+        system : System
+            checked system
+
+        Raises
+        ------
+        DataError
+            type or shape of data is not correct
+        """
+        # check if exists
+        if self.name in system.data:
+            data = system.data[self.name]
+            # check dtype
+            # allow list for empty np.ndarray
+            if isinstance(data, list) and not len(data):
+                pass
+            elif not isinstance(data, self.dtype):
+                raise DataError(
+                    f"Type of {self.name} is {type(data).__name__}, but expected {self.dtype.__name__}"
+                )
+            # check shape
+            if self.shape is not None:
+                shape = self.real_shape(system)
+                # skip checking empty list of np.ndarray
+                if isinstance(data, np.ndarray):
+                    if data.size and shape != data.shape:
+                        raise DataError(
+                            f"Shape of {self.name} is {data.shape}, but expected {shape}"
+                        )
+                elif isinstance(data, list):
+                    if len(shape) and shape[0] != len(data):
+                        raise DataError(
+                            "Length of %s is %d, but expected %d"
+                            % (self.name, len(data), shape[0])
+                        )
+                else:
+                    raise RuntimeError("Unsupported type to check shape")
+        elif self.required:
+            raise DataError("%s not found in data" % self.name)
+
+
+__system_data_type_plugin = Plugin()
+__labeled_system_data_type_plugin = Plugin()
+
+
+def register_data_type(data_type: DataType, labeled: bool):
+    """Register a data type.
+
+    Parameters
+    ----------
+    data_type : DataType
+        data type to be registered
+    labeled : bool
+        whether this data type is for LabeledSystem
+    """
+    plugin = __labeled_system_data_type_plugin if labeled else __system_data_type_plugin
+    plugin.register(data_type.name)(data_type)
+
+
+def get_data_types(labeled: bool):
+    """Get all registered data types.
+
+    Parameters
+    ----------
+    labeled : bool
+        whether this data type is for LabeledSystem
+    """
+    plugin = __labeled_system_data_type_plugin if labeled else __system_data_type_plugin
+    return tuple(plugin.plugins.values())
diff --git a/dpdata/system.py b/dpdata/system.py
index bfec97c6..7b68a9e1 100644
--- a/dpdata/system.py
+++ b/dpdata/system.py
@@ -2,7 +2,6 @@
 import glob
 import os
 from copy import deepcopy
-from enum import Enum, unique
 from typing import Any, Dict, Optional, Tuple, Union
 
 import numpy as np
@@ -15,6 +14,7 @@
 # ensure all plugins are loaded!
 import dpdata.plugins
 from dpdata.amber.mask import load_param_file, pick_by_amber_mask
+from dpdata.data_type import Axis, DataError, DataType, get_data_types
 from dpdata.driver import Driver, Minimizer
 from dpdata.format import Format
 from dpdata.plugin import Plugin
@@ -33,112 +33,6 @@ def load_format(fmt):
     )
 
 
-@unique
-class Axis(Enum):
-    """Data axis."""
-
-    NFRAMES = "nframes"
-    NATOMS = "natoms"
-    NTYPES = "ntypes"
-    NBONDS = "nbonds"
-
-
-class DataError(Exception):
-    """Data is not correct."""
-
-
-class DataType:
-    """DataType represents a type of data, like coordinates, energies, etc.
-
-    Parameters
-    ----------
-    name : str
-        name of data
-    dtype : type or tuple[type]
-        data type, e.g. np.ndarray
-    shape : tuple[int], optional
-        shape of data. Used when data is list or np.ndarray. Use Axis to
-        represents numbers
-    required : bool, default=True
-        whether this data is required
-    """
-
-    def __init__(
-        self,
-        name: str,
-        dtype: type,
-        shape: Tuple[int, Axis] = None,
-        required: bool = True,
-    ) -> None:
-        self.name = name
-        self.dtype = dtype
-        self.shape = shape
-        self.required = required
-
-    def real_shape(self, system: "System") -> Tuple[int]:
-        """Returns expected real shape of a system."""
-        shape = []
-        for ii in self.shape:
-            if ii is Axis.NFRAMES:
-                shape.append(system.get_nframes())
-            elif ii is Axis.NTYPES:
-                shape.append(system.get_ntypes())
-            elif ii is Axis.NATOMS:
-                shape.append(system.get_natoms())
-            elif ii is Axis.NBONDS:
-                # BondOrderSystem
-                shape.append(system.get_nbonds())
-            elif isinstance(ii, int):
-                shape.append(ii)
-            else:
-                raise RuntimeError("Shape is not an int!")
-        return tuple(shape)
-
-    def check(self, system: "System"):
-        """Check if a system has correct data of this type.
-
-        Parameters
-        ----------
-        system : System
-            checked system
-
-        Raises
-        ------
-        DataError
-            type or shape of data is not correct
-        """
-        # check if exists
-        if self.name in system.data:
-            data = system.data[self.name]
-            # check dtype
-            # allow list for empty np.ndarray
-            if isinstance(data, list) and not len(data):
-                pass
-            elif not isinstance(data, self.dtype):
-                raise DataError(
-                    f"Type of {self.name} is {type(data).__name__}, but expected {self.dtype.__name__}"
-                )
-            # check shape
-            if self.shape is not None:
-                shape = self.real_shape(system)
-                # skip checking empty list of np.ndarray
-                if isinstance(data, np.ndarray):
-                    if data.size and shape != data.shape:
-                        raise DataError(
-                            f"Shape of {self.name} is {data.shape}, but expected {shape}"
-                        )
-                elif isinstance(data, list):
-                    if len(shape) and shape[0] != len(data):
-                        raise DataError(
-                            "Length of %s is %d, but expected %d"
-                            % (self.name, len(data), shape[0])
-                        )
-                else:
-                    raise RuntimeError("Unsupported type to check shape")
-        elif self.required:
-            raise DataError("%s not found in data" % self.name)
-
-
 class System(MSONable):
     """The data System.
 
@@ -1657,7 +1551,8 @@ def get_cls_name(cls: object) -> str:
 
 
 def add_format_methods():
-    """Add format methods to System, LabeledSystem, and MultiSystems.
+    """Add format methods to System, LabeledSystem, and MultiSystems; add data types
+    to System and LabeledSystem.
 
     Notes
     -----
@@ -1701,5 +1596,10 @@ def to_format(self, *args, **kwargs):
         setattr(LabeledSystem, method, get_func(formatcls))
         setattr(MultiSystems, method, get_func(formatcls))
 
+    # at this point, System.DTYPES and LabeledSystem.DTYPES has been initialized
+    System.DTYPES = System.DTYPES + get_data_types(labeled=False)
+    LabeledSystem.DTYPES = LabeledSystem.DTYPES + get_data_types(labeled=False)
+    LabeledSystem.DTYPES = LabeledSystem.DTYPES + get_data_types(labeled=True)
+
 
 add_format_methods()
diff --git a/tests/plugin/dpdata_plugin_test/__init__.py b/tests/plugin/dpdata_plugin_test/__init__.py
new file mode 100644
index 00000000..29911b5f
--- /dev/null
+++ b/tests/plugin/dpdata_plugin_test/__init__.py
@@ -0,0 +1,11 @@
+import numpy as np
+
+from dpdata.data_type import Axis, DataType, register_data_type
+
+# test data type
+
+register_data_type(
+    DataType("foo", np.ndarray, (Axis.NFRAMES, 2, 4), required=False), labeled=True
+)
+
+ep = None
diff --git a/tests/plugin/pyproject.toml b/tests/plugin/pyproject.toml
new file mode 100644
index 00000000..7ce1f854
--- /dev/null
+++ b/tests/plugin/pyproject.toml
@@ -0,0 +1,17 @@
+[build-system]
+requires = ["setuptools>=61"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "dpdata_plugin_test"
+version = "0.0.0"
+description = "A test for dpdata plugin"
+dependencies = [
+    'numpy',
+    'dpdata',
+]
+readme = "README.md"
+requires-python = ">=3.7"
+
+[project.entry-points.'dpdata.plugins']
+random = "dpdata_plugin_test:ep"
diff --git a/tests/test_custom_data_type.py b/tests/test_custom_data_type.py
index 58b9f5e4..49c99586 100644
--- a/tests/test_custom_data_type.py
+++ b/tests/test_custom_data_type.py
@@ -4,23 +4,15 @@
 import numpy as np
 
 import dpdata
-from dpdata.system import Axis, DataType
 
 
 class TestDeepmdLoadDumpComp(unittest.TestCase):
     def setUp(self):
-        self.backup = dpdata.system.LabeledSystem.DTYPES
-        dpdata.system.LabeledSystem.DTYPES = dpdata.system.LabeledSystem.DTYPES + (
-            DataType("foo", np.ndarray, (Axis.NFRAMES, 2, 4), required=False),
-        )
         self.system = dpdata.LabeledSystem("poscars/OUTCAR.h2o.md", fmt="vasp/outcar")
         self.foo = np.ones((len(self.system), 2, 4))
         self.system.data["foo"] = self.foo
         self.system.check_data()
 
-    def tearDown(self) -> None:
-        dpdata.system.LabeledSystem.DTYPES = self.backup
-
     def test_to_deepmd_raw(self):
         self.system.to_deepmd_raw("data_foo")
         foo = np.loadtxt("data_foo/foo.raw")

From 9ef8daf40912265c5a3453c8ce5e71440afc54f6 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 25 Jul 2023 14:13:35 +0800
Subject: [PATCH 26/40] [pre-commit.ci] pre-commit autoupdate (#506)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

<!--pre-commit.ci start-->
updates:
- [github.com/astral-sh/ruff-pre-commit: v0.0.278 →
v0.0.280](https://github.com/astral-sh/ruff-pre-commit/compare/v0.0.278...v0.0.280)
<!--pre-commit.ci end-->

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .pre-commit-config.yaml |  2 +-
 dpdata/cp2k/cell.py     | 18 ++++++------------
 2 files changed, 7 insertions(+), 13 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index cd2658ab..9ed4b592 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -23,7 +23,7 @@ repos:
     -   id: black-jupyter
 -   repo: https://github.com/astral-sh/ruff-pre-commit
     # Ruff version.
-    rev: v0.0.278
+    rev: v0.0.280
     hooks:
     - id: ruff
       args: ["--fix"]
diff --git a/dpdata/cp2k/cell.py b/dpdata/cp2k/cell.py
index 7f5f3ef0..7af73353 100644
--- a/dpdata/cp2k/cell.py
+++ b/dpdata/cp2k/cell.py
@@ -28,24 +28,18 @@ def cell_to_low_triangle(A, B, C, alpha, beta, gamma):
     """
     if not np.pi * 5 / 180 < alpha < np.pi * 175 / 180:
         raise RuntimeError(
-            "alpha=={}: must be a radian, and \
-            must be in np.pi*5/180 < alpha < np.pi*175/180".format(
-                alpha
-            )
+            f"alpha=={alpha}: must be a radian, and \
+            must be in np.pi*5/180 < alpha < np.pi*175/180"
         )
     if not np.pi * 5 / 180 < beta < np.pi * 175 / 180:
         raise RuntimeError(
-            "beta=={}: must be a radian, and \
-            must be in np.pi*5/180 < beta < np.pi*175/180".format(
-                beta
-            )
+            f"beta=={beta}: must be a radian, and \
+            must be in np.pi*5/180 < beta < np.pi*175/180"
         )
     if not np.pi * 5 / 180 < gamma < np.pi * 175 / 180:
         raise RuntimeError(
-            "gamma=={}: must be a radian, and \
-                must be in np.pi*5/180 < gamma < np.pi*175/180".format(
-                gamma
-            )
+            f"gamma=={gamma}: must be a radian, and \
+                must be in np.pi*5/180 < gamma < np.pi*175/180"
         )
     if not A > 0.2:
         raise RuntimeError(f"A=={A}, must be greater than 0.2")

From 47619b75a14f4aad31bf321ea176aaaebb8ad55f Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Tue, 25 Jul 2023 21:18:42 -0400
Subject: [PATCH 27/40] support `-1` shape in DataType (#510)

Sometimes, the shape is unknown and can be any integer. This PR supports
this situation. The shape of a dimension can be `-1`.
Note: Only one `-1` can be used.

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 dpdata/data_type.py                         |  9 +++++
 dpdata/deepmd/comp.py                       |  4 ++-
 dpdata/deepmd/raw.py                        |  3 +-
 tests/plugin/dpdata_plugin_test/__init__.py |  5 +++
 tests/test_custom_data_type.py              | 39 +++++++++++++++++++++
 5 files changed, 58 insertions(+), 2 deletions(-)

diff --git a/dpdata/data_type.py b/dpdata/data_type.py
index c0bd944f..b5141a4e 100644
--- a/dpdata/data_type.py
+++ b/dpdata/data_type.py
@@ -19,6 +19,13 @@ class Axis(Enum):
     NBONDS = "nbonds"
 
 
+class AnyInt(int):
+    """AnyInt equals to any other integer."""
+
+    def __eq__(self, other):
+        return True
+
+
 class DataError(Exception):
     """Data is not correct."""
 
@@ -64,6 +71,8 @@ def real_shape(self, system: "System") -> Tuple[int]:
             elif ii is Axis.NBONDS:
                 # BondOrderSystem
                 shape.append(system.get_nbonds())
+            elif ii == -1:
+                shape.append(AnyInt(-1))
             elif isinstance(ii, int):
                 shape.append(ii)
             else:
diff --git a/dpdata/deepmd/comp.py b/dpdata/deepmd/comp.py
index 9d63e1dd..7b909b16 100644
--- a/dpdata/deepmd/comp.py
+++ b/dpdata/deepmd/comp.py
@@ -87,8 +87,10 @@ def to_system_data(folder, type_map=None, labels=True):
                     f"Shape of {dtype.name} is not (nframes, ...), but {dtype.shape}. This type of data will not converted from deepmd/npy format."
                 )
                 continue
+            natoms = data["coords"].shape[1]
             shape = [
-                -1 if xx == dpdata.system.Axis.NATOMS else xx for xx in dtype.shape[1:]
+                natoms if xx == dpdata.system.Axis.NATOMS else xx
+                for xx in dtype.shape[1:]
             ]
             all_data = []
             for ii in sets:
diff --git a/dpdata/deepmd/raw.py b/dpdata/deepmd/raw.py
index fdb2fc64..c7a64ec4 100644
--- a/dpdata/deepmd/raw.py
+++ b/dpdata/deepmd/raw.py
@@ -86,8 +86,9 @@ def to_system_data(folder, type_map=None, labels=True):
                         f"Shape of {dtype.name} is not (nframes, ...), but {dtype.shape}. This type of data will not converted from deepmd/raw format."
                     )
                     continue
+                natoms = data["coords"].shape[1]
                 shape = [
-                    -1 if xx == dpdata.system.Axis.NATOMS else xx
+                    natoms if xx == dpdata.system.Axis.NATOMS else xx
                     for xx in dtype.shape[1:]
                 ]
                 if os.path.exists(os.path.join(folder, f"{dtype.name}.raw")):
diff --git a/tests/plugin/dpdata_plugin_test/__init__.py b/tests/plugin/dpdata_plugin_test/__init__.py
index 29911b5f..b3821cb3 100644
--- a/tests/plugin/dpdata_plugin_test/__init__.py
+++ b/tests/plugin/dpdata_plugin_test/__init__.py
@@ -8,4 +8,9 @@
     DataType("foo", np.ndarray, (Axis.NFRAMES, 2, 4), required=False), labeled=True
 )
 
+register_data_type(
+    DataType("bar", np.ndarray, (Axis.NFRAMES, Axis.NATOMS, -1), required=False),
+    labeled=True,
+)
+
 ep = None
diff --git a/tests/test_custom_data_type.py b/tests/test_custom_data_type.py
index 49c99586..5a0e2bab 100644
--- a/tests/test_custom_data_type.py
+++ b/tests/test_custom_data_type.py
@@ -43,3 +43,42 @@ def test_from_deepmd_hdf5(self):
         self.system.to_deepmd_hdf5("data_foo.h5")
         x = dpdata.LabeledSystem("data_foo.h5", fmt="deepmd/hdf5")
         np.testing.assert_allclose(x.data["foo"], self.foo)
+
+
+class TestDeepmdLoadDumpCompAny(unittest.TestCase):
+    def setUp(self):
+        self.system = dpdata.LabeledSystem("poscars/OUTCAR.h2o.md", fmt="vasp/outcar")
+        self.bar = np.ones((len(self.system), self.system.get_natoms(), 2))
+        self.system.data["bar"] = self.bar
+        self.system.check_data()
+
+    def test_to_deepmd_raw(self):
+        self.system.to_deepmd_raw("data_bar")
+        bar = np.loadtxt("data_bar/bar.raw")
+        np.testing.assert_allclose(bar.reshape(self.bar.shape), self.bar)
+
+    def test_from_deepmd_raw(self):
+        self.system.to_deepmd_raw("data_bar")
+        x = dpdata.LabeledSystem("data_bar", fmt="deepmd/raw")
+        np.testing.assert_allclose(x.data["bar"], self.bar)
+
+    def test_to_deepmd_npy(self):
+        self.system.to_deepmd_npy("data_bar")
+        bar = np.load("data_bar/set.000/bar.npy")
+        np.testing.assert_allclose(bar.reshape(self.bar.shape), self.bar)
+
+    def test_from_deepmd_npy(self):
+        self.system.to_deepmd_npy("data_bar")
+        x = dpdata.LabeledSystem("data_bar", fmt="deepmd/npy")
+        np.testing.assert_allclose(x.data["bar"], self.bar)
+
+    def test_to_deepmd_hdf5(self):
+        self.system.to_deepmd_hdf5("data_bar.h5")
+        with h5py.File("data_bar.h5") as f:
+            bar = f["set.000/bar.npy"][:]
+        np.testing.assert_allclose(bar.reshape(self.bar.shape), self.bar)
+
+    def test_from_deepmd_hdf5(self):
+        self.system.to_deepmd_hdf5("data_bar.h5")
+        x = dpdata.LabeledSystem("data_bar.h5", fmt="deepmd/hdf5")
+        np.testing.assert_allclose(x.data["bar"], self.bar)

From 5b57ac41074fd42912ede1eef04096e8043190dc Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Tue, 1 Aug 2023 04:21:59 -0400
Subject: [PATCH 28/40] docs: add format docs (#516)

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .gitignore          |   3 +
 docs/formats.rst    |   7 ++
 docs/make_format.py | 271 ++++++++++++++++++++++++++++++++++++++++----
 pyproject.toml      |   1 +
 4 files changed, 263 insertions(+), 19 deletions(-)

diff --git a/.gitignore b/.gitignore
index 41ee080c..cd48a3e2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -24,4 +24,7 @@ _version.py
 __pycache__
 docs/_build
 docs/formats.csv
+docs/drivers.csv
+docs/minimizers.csv
 docs/api/
+docs/formats/
diff --git a/docs/formats.rst b/docs/formats.rst
index c0ff5b8f..41ca9091 100644
--- a/docs/formats.rst
+++ b/docs/formats.rst
@@ -6,3 +6,10 @@ dpdata supports the following formats:
 .. csv-table:: Supported Formats
    :file: formats.csv
    :header-rows: 1
+
+
+.. toctree::
+   :maxdepth: 1
+   :glob:
+
+   formats/*
diff --git a/docs/make_format.py b/docs/make_format.py
index a208b21e..62dd0879 100644
--- a/docs/make_format.py
+++ b/docs/make_format.py
@@ -1,9 +1,18 @@
 import csv
+import os
 from collections import defaultdict
+from inspect import Parameter, Signature, cleandoc, signature
+from typing import Literal
+
+from numpydoc.docscrape import Parameter as numpydoc_Parameter
+from numpydoc.docscrape_sphinx import SphinxDocString
+
+from dpdata.bond_order_system import BondOrderSystem
 
 # ensure all plugins are loaded!
 from dpdata.driver import Driver, Minimizer
 from dpdata.format import Format
+from dpdata.system import LabeledSystem, MultiSystems, System
 
 
 def get_formats() -> dict:
@@ -64,7 +73,7 @@ def get_cls_link(cls: object) -> str:
 
 
 def check_supported(fmt: Format):
-    methods = set()
+    methods = {}
     for mtd in [
         "from_system",
         "to_system",
@@ -76,33 +85,255 @@ def check_supported(fmt: Format):
         "to_multi_systems",
     ]:
         if detect_overridden(fmt, mtd):
-            methods.add(mtd)
+            methods[mtd] = None
             if mtd == "to_system":
-                methods.add("to_labeled_system")
+                methods["to_labeled_system"] = None
     if fmt.MultiMode != fmt.MultiModes.NotImplemented:
-        methods.add("from_multi_systems")
-        methods.add("to_multi_systems")
-    return methods
+        methods["from_multi_systems"] = None
+        methods["to_multi_systems"] = None
+    return list(methods.keys())
 
 
 method_links = {
-    "from_system": ":func:`System() <dpdata.system.System>`",
-    "to_system": ":func:`System.to() <dpdata.system.System.to>`",
-    "from_labeled_system": ":func:`LabeledSystem() <dpdata.system.LabeledSystem>`",
-    "to_labeled_system": ":func:`LabeledSystem.to() <dpdata.system.System.to>`",
-    "from_bond_order_system": ":func:`BondOrderSystem() <dpdata.bond_order_system.BondOrderSystem>`",
-    "to_bond_order_system": ":func:`BondOrderSystem.to() <dpdata.system.System.to>`",
-    "from_multi_systems": ":func:`MultiSystems.load_systems_from_file() <dpdata.system.MultiSystems.load_systems_from_file>`",
-    "to_multi_systems": ":func:`MultiSystems.to() <dpdata.system.MultiSystems.to>`",
+    "from_system": ":ref:`System() <{}_{}>`",
+    "to_system": ":ref:`System.to() <{}_{}>`",
+    "from_labeled_system": ":ref:`LabeledSystem() <{}_{}>`",
+    "to_labeled_system": ":ref:`LabeledSystem.to() <{}_{}>`",
+    "from_bond_order_system": ":ref:`BondOrderSystem() <{}_{}>`",
+    "to_bond_order_system": ":ref:`BondOrderSystem.to() <{}_{}>`",
+    "from_multi_systems": ":ref:`MultiSystems.load_systems_from_file() <{}_{}>`",
+    "to_multi_systems": ":ref:`MultiSystems.to() <{}_{}>`",
+}
+
+method_classes = {
+    "from_system": "System",
+    "to_system": "System",
+    "from_labeled_system": "LabeledSystem",
+    "to_labeled_system": "LabeledSystem",
+    "from_bond_order_system": "BondOrderSystem",
+    "to_bond_order_system": "BondOrderSystem",
+    "from_multi_systems": "MultiSystems",
+    "to_multi_systems": "MultiSystems",
 }
 
+method_cls_obj = {
+    "from_system": System,
+    "to_system": System,
+    "from_labeled_system": LabeledSystem,
+    "to_labeled_system": LabeledSystem,
+    "from_bond_order_system": BondOrderSystem,
+    "to_bond_order_system": BondOrderSystem,
+    "from_multi_systems": MultiSystems,
+    "to_multi_systems": MultiSystems,
+}
+
+
+def generate_sub_format_pages(formats: dict):
+    """Generate sub format pages."""
+    os.makedirs("formats", exist_ok=True)
+    for format, alias in formats.items():
+        # format: Format, alias: list[str]
+        buff = []
+        buff.append(".. _%s:" % format.__name__)
+        buff.append("")
+        for aa in alias:
+            buff.append("%s format" % aa)
+            buff.append("=" * len(buff[-1]))
+            buff.append("")
+        buff.append("Class: %s" % get_cls_link(format))
+        buff.append("")
+
+        docstring = format.__doc__
+        if docstring is not None:
+            docstring = cleandoc(docstring)
+            rst = str(SphinxDocString(docstring))
+            buff.append(rst)
+            buff.append("")
+
+        buff.append("Conversions")
+        buff.append("-----------")
+        methods = check_supported(format)
+        for method in methods:
+            buff.append("")
+            buff.append(f".. _{format.__name__}_{method}:")
+            buff.append("")
+            if method.startswith("from_"):
+                buff.append("Convert from this format to %s" % method_classes[method])
+                buff.append("`" * len(buff[-1]))
+            elif method.startswith("to_"):
+                buff.append("Convert from %s to this format" % method_classes[method])
+                buff.append("`" * len(buff[-1]))
+            buff.append("")
+            method_obj = getattr(format, method)
+            if (
+                method == "to_labeled_system"
+                and method not in format.__dict__
+                and "to_system" in format.__dict__
+            ):
+                method_obj = getattr(format, "to_system")
+            docstring = method_obj.__doc__
+            if docstring is not None:
+                docstring = cleandoc(docstring)
+            sig = signature(method_obj)
+            parameters = dict(sig.parameters)
+            return_annotation = sig.return_annotation
+            # del self
+            del parameters[list(parameters)[0]]
+            # del data
+            if method.startswith("to_"):
+                del parameters[list(parameters)[0]]
+            if "args" in parameters:
+                del parameters["args"]
+            if "kwargs" in parameters:
+                del parameters["kwargs"]
+            if method == "to_multi_systems" or method.startswith("from_"):
+                sig = Signature(
+                    list(parameters.values()), return_annotation=method_cls_obj[method]
+                )
+            else:
+                sig = Signature(
+                    list(parameters.values()), return_annotation=return_annotation
+                )
+            sig = str(sig)
+            if method.startswith("from_"):
+                if method != "from_multi_systems":
+                    for aa in alias:
+                        parameters["fmt"] = Parameter(
+                            "fmt",
+                            Parameter.POSITIONAL_OR_KEYWORD,
+                            default=None,
+                            annotation=Literal[aa],
+                        )
+                        sig_fmt = Signature(
+                            list(parameters.values()),
+                            return_annotation=method_cls_obj[method],
+                        )
+                        sig_fmt = str(sig_fmt)
+                        buff.append(
+                            f""".. py:function:: dpdata.{method_classes[method]}{sig_fmt}"""
+                        )
+                        buff.append("""   :noindex:""")
+                for aa in alias:
+                    buff.append(
+                        """.. py:function:: dpdata.{}.from_{}{}""".format(
+                            method_classes[method],
+                            aa.replace("/", "_").replace(".", ""),
+                            sig,
+                        )
+                    )
+                    buff.append("""   :noindex:""")
+                buff.append("")
+                if docstring is None or method not in format.__dict__:
+                    docstring = """   Convert this format to :class:`%s`.""" % (
+                        method_classes[method]
+                    )
+                doc_obj = SphinxDocString(docstring)
+                if len(doc_obj["Parameters"]) > 0:
+                    doc_obj["Parameters"] = [
+                        xx
+                        for xx in doc_obj["Parameters"]
+                        if xx.name not in ("*args", "**kwargs")
+                    ]
+                else:
+                    if method == "from_multi_systems":
+                        doc_obj["Parameters"] = [
+                            numpydoc_Parameter(
+                                "directory",
+                                "str",
+                                ["directory of systems"],
+                            )
+                        ]
+                doc_obj["Yields"] = []
+                doc_obj["Returns"] = [
+                    numpydoc_Parameter("", method_classes[method], ["converted system"])
+                ]
+                rst = "   " + str(doc_obj)
+                buff.append(rst)
+                buff.append("")
+            elif method.startswith("to_"):
+                for aa in alias:
+                    parameters = {
+                        "fmt": Parameter(
+                            "fmt",
+                            Parameter.POSITIONAL_OR_KEYWORD,
+                            annotation=Literal[aa],
+                        ),
+                        **parameters,
+                    }
+                    if method == "to_multi_systems":
+                        sig_fmt = Signature(
+                            list(parameters.values()),
+                            return_annotation=method_cls_obj[method],
+                        )
+                    else:
+                        sig_fmt = Signature(
+                            list(parameters.values()),
+                            return_annotation=return_annotation,
+                        )
+                    sig_fmt = str(sig_fmt)
+                    buff.append(
+                        f""".. py:function:: dpdata.{method_classes[method]}.to{sig_fmt}"""
+                    )
+                    buff.append("""   :noindex:""")
+                for aa in alias:
+                    buff.append(
+                        """.. py:function:: dpdata.{}.to_{}{}""".format(
+                            method_classes[method],
+                            aa.replace("/", "_").replace(".", ""),
+                            sig,
+                        )
+                    )
+                    buff.append("""   :noindex:""")
+                buff.append("")
+                if docstring is None or (
+                    method not in format.__dict__
+                    and not (
+                        method == "to_labeled_system"
+                        and method not in format.__dict__
+                        and "to_system" in format.__dict__
+                    )
+                ):
+                    docstring = "Convert :class:`%s` to this format." % (
+                        method_classes[method]
+                    )
+                doc_obj = SphinxDocString(docstring)
+                if len(doc_obj["Parameters"]) > 0:
+                    doc_obj["Parameters"] = [
+                        xx
+                        for xx in doc_obj["Parameters"][1:]
+                        if xx.name not in ("*args", "**kwargs")
+                    ]
+                else:
+                    if method == "to_multi_systems":
+                        doc_obj["Parameters"] = [
+                            numpydoc_Parameter(
+                                "directory",
+                                "str",
+                                ["directory to save systems"],
+                            )
+                        ]
+                if method == "to_multi_systems":
+                    doc_obj["Yields"] = []
+                    doc_obj["Returns"] = [
+                        numpydoc_Parameter("", method_classes[method], ["this system"])
+                    ]
+                rst = "   " + str(doc_obj)
+                buff.append(rst)
+                buff.append("")
+            buff.append("")
+            buff.append("")
+
+        with open("formats/%s.rst" % format.__name__, "w") as rstfile:
+            rstfile.write("\n".join(buff))
+
+
 if __name__ == "__main__":
     formats = get_formats()
     with open("formats.csv", "w", newline="") as csvfile:
         fieldnames = [
-            "Class",
+            "Format",
             "Alias",
-            "Supported Functions",
+            "Supported Conversions",
         ]
         writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
 
@@ -110,10 +341,11 @@ def check_supported(fmt: Format):
         for kk, vv in formats.items():
             writer.writerow(
                 {
-                    "Class": get_cls_link(kk),
+                    "Format": ":ref:`%s`" % kk.__name__,
                     "Alias": "\n".join("``%s``" % vvv for vvv in vv),
-                    "Supported Functions": "\n".join(
-                        method_links[mtd] for mtd in check_supported(kk)
+                    "Supported Conversions": "\n".join(
+                        method_links[mtd].format(kk.__name__, mtd)
+                        for mtd in check_supported(kk)
                     ),
                 }
             )
@@ -151,3 +383,4 @@ def check_supported(fmt: Format):
                     "Alias": "\n".join("``%s``" % vvv for vvv in vv),
                 }
             )
+    generate_sub_format_pages(formats)
diff --git a/pyproject.toml b/pyproject.toml
index 5b555ad5..087d6ff9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -54,6 +54,7 @@ docs = [
     'm2r2',
     'deepmodeling-sphinx>=0.1.1',
     'sphinx-argparse',
+    'rdkit',
 ]
 
 [tool.setuptools.packages.find]

From 452a0e52822e81573325798e432fec2fdb2a9cd1 Mon Sep 17 00:00:00 2001
From: L-RuiHao <93460643+ruio248@users.noreply.github.com>
Date: Sat, 5 Aug 2023 15:29:30 +0800
Subject: [PATCH 29/40] new feature DFTB+ (#511)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: liaoruihao <liaoruihao@liaoruihaodeMacBook-Pro.local>
---
 README.md                   |  3 +-
 dpdata/dftbplus/__init__.py |  0
 dpdata/dftbplus/output.py   | 74 +++++++++++++++++++++++++++++
 dpdata/plugins/dftbplus.py  | 61 ++++++++++++++++++++++++
 tests/dftbplus/detailed.out | 88 +++++++++++++++++++++++++++++++++++
 tests/dftbplus/dftb_pin.hsd | 93 +++++++++++++++++++++++++++++++++++++
 tests/test_dftbplus.py      | 58 +++++++++++++++++++++++
 7 files changed, 376 insertions(+), 1 deletion(-)
 create mode 100644 dpdata/dftbplus/__init__.py
 create mode 100644 dpdata/dftbplus/output.py
 create mode 100644 tests/dftbplus/detailed.out
 create mode 100644 tests/dftbplus/dftb_pin.hsd
 create mode 100644 tests/test_dftbplus.py

diff --git a/README.md b/README.md
index 828223e5..1e86a719 100644
--- a/README.md
+++ b/README.md
@@ -88,11 +88,12 @@ The `System` or `LabeledSystem` can be constructed from the following file forma
 | Amber   | multi       | True         | True    | LabeledSystem | 'amber/md'           |
 | Amber/sqm | sqm.out   | False        | False   | System        | 'sqm/out'            |
 | Gromacs | gro         | True         | False   | System        | 'gromacs/gro'        |
-| ABACUS  | STRU        | False        | False   | System        | 'abacus/stru'         |
+| ABACUS  | STRU        | False        | False   | System        | 'abacus/stru'        |
 | ABACUS  | STRU        | False        | True    | LabeledSystem | 'abacus/scf'         |
 | ABACUS  | cif         | True         | True    | LabeledSystem | 'abacus/md'          |
 | ABACUS  | STRU        | True         | True    | LabeledSystem | 'abacus/relax'       |
 | ase     | structure   | True         | True    | MultiSystems  | 'ase/structure'      |
+| DFTB+   | dftbplus    | False        | True    | LabeledSystem | 'dftbplus'           |
 
 
 The Class `dpdata.MultiSystems`  can read data  from a dir which may contains many files of different systems, or from single xyz file which contains different systems.
diff --git a/dpdata/dftbplus/__init__.py b/dpdata/dftbplus/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/dpdata/dftbplus/output.py b/dpdata/dftbplus/output.py
new file mode 100644
index 00000000..ba8f6c84
--- /dev/null
+++ b/dpdata/dftbplus/output.py
@@ -0,0 +1,74 @@
+from typing import Tuple
+
+import numpy as np
+
+
+def read_dftb_plus(fn_1: str, fn_2: str) -> Tuple[str, np.ndarray, float, np.ndarray]:
+    """Read from DFTB+ input and output.
+
+    Parameters
+    ----------
+    fn_1 : str
+        DFTB+ input file name
+    fn_2 : str
+        DFTB+ output file name
+
+    Returns
+    -------
+    str
+        atomic symbols
+    np.ndarray
+        atomic coordinates
+    float
+        total potential energy
+    np.ndarray
+        atomic forces
+
+    """
+    coord = None
+    symbols = None
+    forces = None
+    energy = None
+    with open(fn_1) as f:
+        flag = 0
+        for line in f:
+            if flag == 1:
+                flag += 1
+            elif flag == 2:
+                components = line.split()
+                flag += 1
+            elif line.startswith("Geometry"):
+                flag = 1
+                coord = []
+                symbols = []
+            elif flag in (3, 4, 5, 6):
+                s = line.split()
+                components_num = int(s[1])
+                symbols.append(components[components_num - 1])
+                coord.append([float(s[2]), float(s[3]), float(s[4])])
+                flag += 1
+                if flag == 7:
+                    flag = 0
+    with open(fn_2) as f:
+        flag = 0
+        for line in f:
+            if line.startswith("Total Forces"):
+                flag = 8
+                forces = []
+            elif flag in (8, 9, 10, 11):
+                s = line.split()
+                forces.append([float(s[1]), float(s[2]), float(s[3])])
+                flag += 1
+                if flag == 12:
+                    flag = 0
+            elif line.startswith("Total energy:"):
+                s = line.split()
+                energy = float(s[2])
+                flag = 0
+
+    symbols = np.array(symbols)
+    forces = np.array(forces)
+    coord = np.array(coord)
+    assert coord.shape == forces.shape
+
+    return symbols, coord, energy, forces
diff --git a/dpdata/plugins/dftbplus.py b/dpdata/plugins/dftbplus.py
index e69de29b..5c8b4682 100644
--- a/dpdata/plugins/dftbplus.py
+++ b/dpdata/plugins/dftbplus.py
@@ -0,0 +1,61 @@
+import numpy as np
+
+from dpdata.dftbplus.output import read_dftb_plus
+from dpdata.format import Format
+from dpdata.unit import EnergyConversion, ForceConversion
+
+energy_convert = EnergyConversion("hartree", "eV").value()
+force_convert = ForceConversion("hartree/bohr", "eV/angstrom").value()
+
+
+@Format.register("dftbplus")
+class DFTBplusFormat(Format):
+    """The DFTBplusFormat class handles files in the DFTB+ format.
+
+    This class provides a method to read DFTB+ files from a labeled system and
+    returns a dictionary containing various properties of the system.For more
+    information, please refer to the official documentation at the following URL:
+    https://dftbplus.org/documentation
+
+    Attributes
+    ----------
+        None
+
+    Methods
+    -------
+        from_labeled_system(file_paths, **kwargs): Reads system information from files.
+
+    """
+
+    def from_labeled_system(self, file_paths, **kwargs):
+        """Reads system information from the given DFTB+ file paths.
+
+        Parameters
+        ----------
+        file_paths : tuple
+            A tuple containing the input and output file paths.
+            - Input file (file_in): Contains information about symbols and coord.
+            - Output file (file_out): Contains information about energy and force.
+        **kwargs : dict
+            other parameters
+
+        """
+        file_in, file_out = file_paths
+        symbols, coord, energy, forces = read_dftb_plus(file_in, file_out)
+        last_occurrence = {v: i for i, v in enumerate(symbols)}
+        atom_names = np.array(sorted(np.unique(symbols), key=last_occurrence.get))
+        atom_types = np.array([np.where(atom_names == s)[0][0] for s in symbols])
+        atom_numbs = np.array([np.sum(atom_types == i) for i in range(len(atom_names))])
+        natoms = coord.shape[0]
+
+        return {
+            "atom_types": atom_types,
+            "atom_names": list(atom_names),
+            "atom_numbs": list(atom_numbs),
+            "coords": coord.reshape((1, natoms, 3)),
+            "energies": np.array([energy * energy_convert]),
+            "forces": (forces * force_convert).reshape((1, natoms, 3)),
+            "cells": np.zeros((1, 3, 3)),
+            "orig": np.zeros(3),
+            "nopbc": True,
+        }
diff --git a/tests/dftbplus/detailed.out b/tests/dftbplus/detailed.out
new file mode 100644
index 00000000..21fe93fc
--- /dev/null
+++ b/tests/dftbplus/detailed.out
@@ -0,0 +1,88 @@
+ Fermi distribution function
+
+Calculation with static geometry
+
+
+********************************************************************************
+ iSCC Total electronic   Diff electronic      SCC error    
+   88   -0.33554958E+01   -0.31884693E-07    0.11352275E-06
+********************************************************************************
+
+ Total charge:    -0.00000000
+
+ Atomic gross charges (e)
+ Atom           Charge
+    1      -0.48336965
+    2       0.03575828
+    3       0.22380553
+    4       0.22380583
+
+Nr. of electrons (up):      8.00000000
+Atom populations (up)
+ Atom       Population
+    1       5.48336965
+    2       0.96424172
+    3       0.77619447
+    4       0.77619417
+
+l-shell populations (up)
+ Atom Sh.   l       Population
+    1   1   0       1.65733979
+    1   2   1       3.82602986
+    2   1   0       0.96424172
+    2   2   1       0.00000000
+    3   1   0       0.77619447
+    3   2   1       0.00000000
+    4   1   0       0.77619417
+    4   2   1       0.00000000
+
+Orbital populations (up)
+ Atom Sh.   l   m       Population  Label
+    1   1   0   0       1.65733979  s
+    1   2   1  -1       1.21678996  p_y
+    1   2   1   0       1.46556336  p_z
+    1   2   1   1       1.14367654  p_x
+    2   1   0   0       0.96424172  s
+    2   2   1  -1       0.00000000  p_y
+    2   2   1   0       0.00000000  p_z
+    2   2   1   1       0.00000000  p_x
+    3   1   0   0       0.77619447  s
+    3   2   1  -1       0.00000000  p_y
+    3   2   1   0       0.00000000  p_z
+    3   2   1   1       0.00000000  p_x
+    4   1   0   0       0.77619417  s
+    4   2   1  -1       0.00000000  p_y
+    4   2   1   0       0.00000000  p_z
+    4   2   1   1       0.00000000  p_x
+
+Fermi level:                        -0.2370222488 H           -6.4497 eV
+Band energy:                        -3.2187242078 H          -87.5859 eV
+TS:                                  0.0000000000 H            0.0000 eV
+Band free energy (E-TS):            -3.2187242078 H          -87.5859 eV
+Extrapolated E(0K):                 -3.2187242078 H          -87.5859 eV
+Input / Output electrons (q):      8.0000000000      8.0000000000
+
+Energy H0:                          -3.3599884076 H          -91.4299 eV
+Energy SCC:                          0.0056352830 H            0.1533 eV
+Energy 3rd:                         -0.0011426808 H           -0.0311 eV
+Total Electronic energy:            -3.3554958054 H          -91.3077 eV
+Repulsive energy:                    0.0590974170 H            1.6081 eV
+Total energy:                       -3.2963983884 H          -89.6996 eV
+Extrapolated to 0:                  -3.2963983884 H          -89.6996 eV
+Total Mermin free energy:           -3.2963983884 H          -89.6996 eV
+Force related energy:               -3.2963983884 H          -89.6996 eV
+
+SCC converged
+
+Total Forces
+    1      0.016567056203      0.002817951422      0.005634574270
+    2     -0.018803818530     -0.000002880649     -0.000006015442
+    3      0.001118562874     -0.005291070259     -0.000870711110
+    4      0.001118199454      0.002475999486     -0.004757847718
+
+Maximal derivative component:        0.188038E-01 au
+
+Dipole moment:   -0.06792979   -0.20495079   -0.40960550 au
+Dipole moment:   -0.17266032   -0.52093298   -1.04111341 Debye
+
+
diff --git a/tests/dftbplus/dftb_pin.hsd b/tests/dftbplus/dftb_pin.hsd
new file mode 100644
index 00000000..02e0ed22
--- /dev/null
+++ b/tests/dftbplus/dftb_pin.hsd
@@ -0,0 +1,93 @@
+Geometry = GenFormat {
+4 C
+N H
+1     1        1.014150        0.112320        0.047370
+2     2        3.909390        0.037985       -0.101159
+3     2        0.702550       -0.851820       -0.060860
+4     2        0.702550        0.603740       -0.789160
+}
+Driver = {}
+Hamiltonian = DFTB {
+  SCC = Yes
+  MaxAngularMomentum = {
+    N = "p"
+    H = "p"
+  }
+  SlaterKosterFiles = {
+    N-N = "/home/jz748/devel/git/Programs/Amber18/dat/slko/3ob-3-1/N-N.skf"
+    H-H = "/home/jz748/devel/git/Programs/Amber18/dat/slko/3ob-3-1/H-H.skf"
+    H-N = "/home/jz748/devel/git/Programs/Amber18/dat/slko/3ob-3-1/H-N.skf"
+    N-H = "/home/jz748/devel/git/Programs/Amber18/dat/slko/3ob-3-1/N-H.skf"
+  }
+  ThirdOrderFull = Yes
+  HubbardDerivs = {
+    H = -0.1857
+    N = -0.1535
+  }
+  HCorrection = Damping {
+    Exponent = 4.0
+  }
+  PolynomialRepulsive = {}
+  ShellResolvedSCC = No
+  OldSKInterpolation = No
+  RangeSeparated = None {}
+  ReadInitialCharges = No
+  InitialCharges = {}
+  SCCTolerance = 1.0000000000000001E-005
+  ConvergentSCCOnly = Yes
+  SpinPolarisation = {}
+  ElectricField = {}
+  Solver = RelativelyRobust {}
+  Charge = 0.0000000000000000
+  MaxSCCIterations = 100
+  OnSiteCorrection = {}
+  Dispersion = {}
+  Solvation = {}
+  Electrostatics = GammaFunctional {}
+  ThirdOrder = No
+  Differentiation = FiniteDiff {
+    Delta = 1.2207031250000000E-004
+  }
+  ForceEvaluation = "Traditional"
+  Mixer = Broyden {
+    MixingParameter = 0.20000000000000001
+    InverseJacobiWeight = 1.0000000000000000E-002
+    MinimalWeight = 1.0000000000000000
+    MaximalWeight = 100000.00000000000
+    WeightFactor = 1.0000000000000000E-002
+  }
+  Filling = Fermi {
+    Temperature = 0.0000000000000000
+  }
+}
+Options = {
+  WriteDetailedOut = Yes
+  WriteAutotestTag = No
+  WriteDetailedXML = No
+  WriteResultsTag = No
+  RestartFrequency = 20
+  RandomSeed = 0
+  WriteHS = No
+  WriteRealHS = No
+  MinimiseMemoryUsage = No
+  ShowFoldedCoords = No
+  TimingVerbosity = 1
+  WriteChargesAsText = No
+}
+Analysis = {
+  CalculateForces = Yes
+  ProjectStates = {}
+  WriteEigenvectors = No
+  WriteBandOut = Yes
+  MullikenAnalysis = Yes
+  WriteNetCharges = No
+  AtomResolvedEnergies = No
+}
+ParserOptions = {
+  ParserVersion = 11
+  WriteHSDInput = Yes
+  StopAfterParsing = No
+  IgnoreUnprocessedNodes = No
+}
+Reks = None {}
+ExcitedState = {}
diff --git a/tests/test_dftbplus.py b/tests/test_dftbplus.py
new file mode 100644
index 00000000..2a2913a5
--- /dev/null
+++ b/tests/test_dftbplus.py
@@ -0,0 +1,58 @@
+import unittest
+
+import numpy as np
+from comp_sys import CompLabeledSys, IsNoPBC
+from context import dpdata
+
+
+class TestDeepmdLoadAmmonia(unittest.TestCase, CompLabeledSys, IsNoPBC):
+    def setUp(self):
+        energy_convert = dpdata.unit.EnergyConversion("hartree", "eV").value()
+        force_convert = dpdata.unit.ForceConversion(
+            "hartree/bohr", "eV/angstrom"
+        ).value()
+
+        self.system_1 = dpdata.LabeledSystem(
+            ("dftbplus/dftb_pin.hsd", "dftbplus/detailed.out"), fmt="dftbplus"
+        )
+
+        self.system_2 = dpdata.LabeledSystem(
+            data={
+                "atom_types": np.array([0, 1, 1, 1]),
+                "atom_names": ["N", "H"],
+                "atom_numbs": [1, 3],
+                "coords": np.array(
+                    [
+                        [
+                            [1.014150, 0.112320, 0.047370],
+                            [3.909390, 0.037985, -0.101159],
+                            [0.702550, -0.851820, -0.060860],
+                            [0.702550, 0.603740, -0.789160],
+                        ]
+                    ]
+                ),
+                "energies": np.array([-3.2963983884]) * energy_convert,
+                "forces": np.array(
+                    [
+                        [
+                            [0.016567056203, 0.002817951422, 0.005634574270],
+                            [-0.018803818530, -0.000002880649, -0.000006015442],
+                            [0.001118562874, -0.005291070259, -0.000870711110],
+                            [0.001118199454, 0.002475999486, -0.004757847718],
+                        ]
+                    ]
+                )
+                * force_convert,
+                "cells": np.zeros((1, 3, 3)),
+                "orig": np.zeros(3),
+                "nopbc": True,
+            }
+        )
+        self.places = 6
+        self.e_places = 6
+        self.f_places = 6
+        self.v_places = 6
+
+
+if __name__ == "__main__":
+    unittest.main()

From 2d8e7f2007d08dccbaa89c562b5fa00a319e7d60 Mon Sep 17 00:00:00 2001
From: Han Wang <92130845+wanghan-iapcm@users.noreply.github.com>
Date: Mon, 7 Aug 2023 21:46:31 +0800
Subject: [PATCH 30/40] Fix issue #517 (#520)

Co-authored-by: Han Wang <wang_han@iapcm.ac.cn>
---
 dpdata/qe/traj.py       |  1 +
 tests/qe.traj/traj6.cel | 36 ++++++++++++++++++------------------
 2 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/dpdata/qe/traj.py b/dpdata/qe/traj.py
index ab1a790b..e27990cb 100644
--- a/dpdata/qe/traj.py
+++ b/dpdata/qe/traj.py
@@ -205,6 +205,7 @@ def to_system_data(input_name, prefix, begin=0, step=1):
         data["cells"], tmp_steps = load_data(
             prefix + ".cel", 3, begin=begin, step=step, convert=length_convert
         )
+        data["cells"] = np.transpose(data["cells"], (0, 2, 1))
         if csteps != tmp_steps:
             csteps.append(None)
             tmp_steps.append(None)
diff --git a/tests/qe.traj/traj6.cel b/tests/qe.traj/traj6.cel
index bcba5c37..c2a73531 100644
--- a/tests/qe.traj/traj6.cel
+++ b/tests/qe.traj/traj6.cel
@@ -1,24 +1,24 @@
 195
-5.157620138324029213e+00 0 0
-8.882263018337732685e-02 5.782217839848955876e+00 0
-4.531494312205264219e-02 1.857154025688012577e-01 5.855553673164934025e+00
+5.157620138324029213e+00 8.882263018337732685e-02 4.531494312205264219e-02
+0 5.782217839848955876e+00 1.857154025688012577e-01
+0 0 5.855553673164934025e+00
 200
-5.359985500701728967e+00 0 0
-3.585941820098031974e-01 5.317218997480877896e+00 0
-7.606780476053129902e-01 7.811107228901693622e-01 5.715864930517207121e+00
+5.359985500701728967e+00 3.585941820098031974e-01 7.606780476053129902e-01
+0 5.317218997480877896e+00 7.811107228901693622e-01
+0 0 5.715864930517207121e+00
 201
-5.235484852416917079e+00 0 0
-5.612267823951659906e-01 5.640178212424978632e+00 0
-1.561235654197479228e-02 2.819721738692628765e-01 5.868536463888631260e+00
+5.235484852416917079e+00 5.612267823951659906e-01 1.561235654197479228e-02
+0 5.640178212424978632e+00 2.819721738692628765e-01
+0 0 5.868536463888631260e+00
 202
-5.735200354152383717e+00 0 0
-8.818112752306928037e-02 5.003946763988647461e+00 0
-5.346029912365874992e-01 4.453576669882056693e-01 5.311832768820492490e+00
+5.735200354152383717e+00 8.818112752306928037e-02 5.346029912365874992e-01
+0 5.003946763988647461e+00 4.453576669882056693e-01
+0 0 5.311832768820492490e+00
 203
-5.969007839117177916e+00 0 0
-7.974158979687617776e-02 5.086270090760550922e+00 0
-2.286579398249665163e-01 1.449088120223311904e-01 5.588188387987865546e+00
+5.969007839117177916e+00 7.974158979687617776e-02 2.286579398249665163e-01
+0 5.086270090760550922e+00 1.449088120223311904e-01
+0 0  5.588188387987865546e+00
 204
-5.308510801020571712e+00 0 0
-3.076052782312116429e-01 5.279388982187173340e+00 0
-4.321921336152507731e-01 8.121110815096156399e-01 5.301664983741235737e+00
+5.308510801020571712e+00 3.076052782312116429e-01 4.321921336152507731e-01
+0 5.279388982187173340e+00 8.121110815096156399e-01
+0 0 5.301664983741235737e+00

From 3815b4e78bacdc4ee7ef720dbbbb31c11b3e941c Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 10 Aug 2023 09:27:54 +0800
Subject: [PATCH 31/40] [pre-commit.ci] pre-commit autoupdate (#521)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

<!--pre-commit.ci start-->
updates:
- [github.com/astral-sh/ruff-pre-commit: v0.0.280 →
v0.0.282](https://github.com/astral-sh/ruff-pre-commit/compare/v0.0.280...v0.0.282)
<!--pre-commit.ci end-->

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 9ed4b592..7b7fb84d 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -23,7 +23,7 @@ repos:
     -   id: black-jupyter
 -   repo: https://github.com/astral-sh/ruff-pre-commit
     # Ruff version.
-    rev: v0.0.280
+    rev: v0.0.282
     hooks:
     - id: ruff
       args: ["--fix"]

From 3da4651dd80e2502daf65f2efb9717744057349c Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Sun, 13 Aug 2023 19:55:01 -0400
Subject: [PATCH 32/40] add extra lines for code blocks in docstring (#523)

See #519
---
 dpdata/driver.py         | 1 +
 dpdata/plugins/deepmd.py | 3 +++
 dpdata/system.py         | 2 ++
 3 files changed, 6 insertions(+)

diff --git a/dpdata/driver.py b/dpdata/driver.py
index b446a5e8..de72d3fe 100644
--- a/dpdata/driver.py
+++ b/dpdata/driver.py
@@ -125,6 +125,7 @@ class HybridDriver(Driver):
     ...     {"type": "sqm", "qm_theory": "DFTB3"},
     ...     {"type": "dp", "dp": "frozen_model.pb"},
     ... ])
+
     This driver is the hybrid of SQM and DP.
     """
 
diff --git a/dpdata/plugins/deepmd.py b/dpdata/plugins/deepmd.py
index 26cdc382..6953aba5 100644
--- a/dpdata/plugins/deepmd.py
+++ b/dpdata/plugins/deepmd.py
@@ -82,10 +82,12 @@ class DeePMDMixedFormat(Format):
     Examples
     --------
     Dump a MultiSystems into a mixed type numpy directory:
+
     >>> import dpdata
     >>> dpdata.MultiSystems(*systems).to_deepmd_npy_mixed("mixed_dir")
 
     Load a mixed type data into a MultiSystems:
+
     >>> import dpdata
     >>> dpdata.MultiSystems().load_systems_from_file("mixed_dir", fmt="deepmd/npy/mixed")
     """
@@ -161,6 +163,7 @@ class DeePMDHDF5Format(Format):
     Examples
     --------
     Dump a MultiSystems to a HDF5 file:
+
     >>> import dpdata
     >>> dpdata.MultiSystems().from_deepmd_npy("data").to_deepmd_hdf5("data.hdf5")
     """
diff --git a/dpdata/system.py b/dpdata/system.py
index 7b68a9e1..171cd81a 100644
--- a/dpdata/system.py
+++ b/dpdata/system.py
@@ -831,6 +831,7 @@ def predict(self, *args: Any, driver: str = "dp", **kwargs: Any) -> "LabeledSyst
         Examples
         --------
         The default driver is DP:
+
         >>> labeled_sys = ori_sys.predict("frozen_model_compressed.pb")
         """
         if not isinstance(driver, Driver):
@@ -1406,6 +1407,7 @@ def minimize(
         Examples
         --------
         Minimize a system using ASE BFGS along with a DP driver:
+
         >>> from dpdata.driver import Driver
         >>> from ase.optimize import BFGS
         >>> driver = driver.get_driver("dp")("some_model.pb")

From 40d593bcdcee2261c0e7f99fff1f07227dde5a12 Mon Sep 17 00:00:00 2001
From: Levi Zhou <31941107+ZhouXY-PKU@users.noreply.github.com>
Date: Wed, 16 Aug 2023 09:06:15 +0800
Subject: [PATCH 33/40] Update system.py (#524)

The example is wrong? "driver" should be "Driver"?

Signed-off-by: Levi Zhou  <31941107+ZhouXY-PKU@users.noreply.github.com>
---
 dpdata/system.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dpdata/system.py b/dpdata/system.py
index 171cd81a..79044c72 100644
--- a/dpdata/system.py
+++ b/dpdata/system.py
@@ -1410,7 +1410,7 @@ def minimize(
 
         >>> from dpdata.driver import Driver
         >>> from ase.optimize import BFGS
-        >>> driver = driver.get_driver("dp")("some_model.pb")
+        >>> driver = Driver.get_driver("dp")("some_model.pb")
         >>> some_system.minimize(minimizer="ase", driver=driver, optimizer=BFGS, fmax=1e-5)
         """
         if not isinstance(minimizer, Minimizer):

From 666d199133ee59bd6acf349437d0f848c734ec0d Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 15 Aug 2023 21:06:34 -0400
Subject: [PATCH 34/40] [pre-commit.ci] pre-commit autoupdate (#525)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

<!--pre-commit.ci start-->
updates:
- [github.com/astral-sh/ruff-pre-commit: v0.0.282 →
v0.0.284](https://github.com/astral-sh/ruff-pre-commit/compare/v0.0.282...v0.0.284)
<!--pre-commit.ci end-->

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 .pre-commit-config.yaml | 2 +-
 dpdata/abacus/md.py     | 2 +-
 dpdata/abacus/relax.py  | 2 +-
 dpdata/abacus/scf.py    | 6 +++---
 dpdata/qe/scf.py        | 4 ++--
 dpdata/system.py        | 4 ++--
 6 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 7b7fb84d..fe362cb0 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -23,7 +23,7 @@ repos:
     -   id: black-jupyter
 -   repo: https://github.com/astral-sh/ruff-pre-commit
     # Ruff version.
-    rev: v0.0.282
+    rev: v0.0.284
     hooks:
     - id: ruff
       args: ["--fix"]
diff --git a/dpdata/abacus/md.py b/dpdata/abacus/md.py
index 8cda5960..55538d0b 100644
--- a/dpdata/abacus/md.py
+++ b/dpdata/abacus/md.py
@@ -148,7 +148,7 @@ def get_energy(outlines, ndump, dump_freq):
 
 
 def get_frame(fname):
-    if type(fname) == str:
+    if isinstance(fname, str):
         # if the input parameter is only one string, it is assumed that it is the
         # base directory containing INPUT file;
         path_in = os.path.join(fname, "INPUT")
diff --git a/dpdata/abacus/relax.py b/dpdata/abacus/relax.py
index 5dd77030..db910fc7 100644
--- a/dpdata/abacus/relax.py
+++ b/dpdata/abacus/relax.py
@@ -167,7 +167,7 @@ def get_coords_from_log(loglines, natoms):
 
 
 def get_frame(fname):
-    if type(fname) == str:
+    if isinstance(fname, str):
         # if the input parameter is only one string, it is assumed that it is the
         # base directory containing INPUT file;
         path_in = os.path.join(fname, "INPUT")
diff --git a/dpdata/abacus/scf.py b/dpdata/abacus/scf.py
index d6f83e72..fbd658b4 100644
--- a/dpdata/abacus/scf.py
+++ b/dpdata/abacus/scf.py
@@ -161,7 +161,7 @@ def get_frame(fname):
         "forces": [],
     }
 
-    if type(fname) == str:
+    if isinstance(fname, str):
         # if the input parameter is only one string, it is assumed that it is the
         # base directory containing INPUT file;
         path_in = os.path.join(fname, "INPUT")
@@ -255,7 +255,7 @@ def get_nele_from_stru(geometry_inlines):
 
 
 def get_frame_from_stru(fname):
-    assert type(fname) == str
+    assert isinstance(fname, str)
     with open(fname) as fp:
         geometry_inlines = fp.read().split("\n")
     nele = get_nele_from_stru(geometry_inlines)
@@ -304,7 +304,7 @@ def make_unlabeled_stru(
         out += "\n"
 
     if numerical_descriptor is not None:
-        assert type(numerical_descriptor) == str
+        assert isinstance(numerical_descriptor, str)
         out += "NUMERICAL_DESCRIPTOR\n%s\n" % numerical_descriptor
         out += "\n"
 
diff --git a/dpdata/qe/scf.py b/dpdata/qe/scf.py
index 4457572d..d2f5dead 100755
--- a/dpdata/qe/scf.py
+++ b/dpdata/qe/scf.py
@@ -129,13 +129,13 @@ def get_stress(lines):
 
 
 def get_frame(fname):
-    if type(fname) == str:
+    if isinstance(fname, str):
         path_out = fname
         outname = os.path.basename(path_out)
         # the name of the input file is assumed to be different from the output by 'in' and 'out'
         inname = outname.replace("out", "in")
         path_in = os.path.join(os.path.dirname(path_out), inname)
-    elif type(fname) == list and len(fname) == 2:
+    elif isinstance(fname, list) and len(fname) == 2:
         path_in = fname[0]
         path_out = fname[1]
     else:
diff --git a/dpdata/system.py b/dpdata/system.py
index 79044c72..cf03aa61 100644
--- a/dpdata/system.py
+++ b/dpdata/system.py
@@ -654,7 +654,7 @@ def replicate(self, ncopy):
         if len(ncopy) != 3:
             raise RuntimeError("ncopy must be a list or tuple with 3 int")
         for ii in ncopy:
-            if type(ii) is not int:
+            if not isinstance(ii, int):
                 raise RuntimeError("ncopy must be a list or tuple must with 3 int")
 
         tmp = System()
@@ -690,7 +690,7 @@ def replace(self, initial_atom_type, end_atom_type, replace_num):
             raise RuntimeError(
                 "Must use method replace() of the instance of class dpdata.System"
             )
-        if type(replace_num) is not int:
+        if not isinstance(replace_num, int):
             raise ValueError(f"replace_num must be a integer. Now is {replace_num}")
         if replace_num <= 0:
             raise ValueError(f"replace_num must be larger than 0.Now is {replace_num}")

From 5db6410c2f1e0018ca502ada373a98488d528cb0 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Mon, 21 Aug 2023 01:50:16 -0400
Subject: [PATCH 35/40] Delete requirements.txt (#527)

unclear what this file is used for
---
 requirements.txt | 2 --
 1 file changed, 2 deletions(-)
 delete mode 100644 requirements.txt

diff --git a/requirements.txt b/requirements.txt
deleted file mode 100644
index 1c7c2293..00000000
--- a/requirements.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-monty==2.0.4
-pymatgen==2019.7.2

From 72d562689c65e9d632019fcc6fddc620bf200c57 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 23 Aug 2023 10:14:19 +0800
Subject: [PATCH 36/40] [pre-commit.ci] pre-commit autoupdate (#528)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

<!--pre-commit.ci start-->
updates:
- [github.com/astral-sh/ruff-pre-commit: v0.0.284 →
v0.0.285](https://github.com/astral-sh/ruff-pre-commit/compare/v0.0.284...v0.0.285)
- [github.com/asottile/blacken-docs: 1.15.0 →
1.16.0](https://github.com/asottile/blacken-docs/compare/1.15.0...1.16.0)
<!--pre-commit.ci end-->

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 .pre-commit-config.yaml  | 4 ++--
 dpdata/abacus/md.py      | 2 +-
 dpdata/md/rdf.py         | 4 ++--
 dpdata/system.py         | 2 +-
 plugin_example/README.md | 1 +
 5 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index fe362cb0..ab4fca9c 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -23,7 +23,7 @@ repos:
     -   id: black-jupyter
 -   repo: https://github.com/astral-sh/ruff-pre-commit
     # Ruff version.
-    rev: v0.0.284
+    rev: v0.0.285
     hooks:
     - id: ruff
       args: ["--fix"]
@@ -35,7 +35,7 @@ repos:
       args: ["--write"]
 # Python inside docs
 -   repo: https://github.com/asottile/blacken-docs
-    rev: 1.15.0
+    rev: 1.16.0
     hooks:
     -   id: blacken-docs
 ci:
diff --git a/dpdata/abacus/md.py b/dpdata/abacus/md.py
index 55538d0b..4b8ec038 100644
--- a/dpdata/abacus/md.py
+++ b/dpdata/abacus/md.py
@@ -206,7 +206,7 @@ def get_frame(fname):
     data["energies"] = energy
     data["forces"] = force
     data["virials"] = stress
-    if type(data["virials"]) != np.ndarray:
+    if not isinstance(data["virials"], np.ndarray):
         del data["virials"]
     data["orig"] = np.zeros(3)
 
diff --git a/dpdata/md/rdf.py b/dpdata/md/rdf.py
index d5409a0c..de8f1c74 100644
--- a/dpdata/md/rdf.py
+++ b/dpdata/md/rdf.py
@@ -62,9 +62,9 @@ def _compute_rdf_1frame(box, posis, atype, sel_type=[None, None], max_r=5, nbins
         sel_type[0] = all_types
     if sel_type[1] is None:
         sel_type[1] = all_types
-    if type(sel_type[0]) is not list:
+    if not isinstance(sel_type[0], list):
         sel_type[0] = [sel_type[0]]
-    if type(sel_type[1]) is not list:
+    if not isinstance(sel_type[1], list):
         sel_type[1] = [sel_type[1]]
     natoms = len(posis)
     import ase.neighborlist
diff --git a/dpdata/system.py b/dpdata/system.py
index cf03aa61..daf0c885 100644
--- a/dpdata/system.py
+++ b/dpdata/system.py
@@ -509,7 +509,7 @@ def check_type_map(self, type_map):
             self.sort_atom_names(type_map=type_map)
 
     def apply_type_map(self, type_map):
-        if type_map is not None and type(type_map) is list:
+        if type_map is not None and isinstance(type_map, list):
             self.check_type_map(type_map)
         else:
             raise RuntimeError("invalid type map, cannot be applied")
diff --git a/plugin_example/README.md b/plugin_example/README.md
index 322756f9..a506a4e8 100644
--- a/plugin_example/README.md
+++ b/plugin_example/README.md
@@ -6,6 +6,7 @@ pip install .
 
 ```py
 import dpdata
+
 print(dpdata.System(12, fmt="random"))
 ```
 

From e239d0b72854b363fabeeacc2815fe5bc8768a99 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 31 Aug 2023 13:41:11 +0800
Subject: [PATCH 37/40] [pre-commit.ci] pre-commit autoupdate (#531)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

<!--pre-commit.ci start-->
updates:
- [github.com/astral-sh/ruff-pre-commit: v0.0.285 →
v0.0.286](https://github.com/astral-sh/ruff-pre-commit/compare/v0.0.285...v0.0.286)
<!--pre-commit.ci end-->

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index ab4fca9c..fb17c270 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -23,7 +23,7 @@ repos:
     -   id: black-jupyter
 -   repo: https://github.com/astral-sh/ruff-pre-commit
     # Ruff version.
-    rev: v0.0.285
+    rev: v0.0.286
     hooks:
     - id: ruff
       args: ["--fix"]

From 6ed3c44c7b8ba787a8a93b86df78315d6137129b Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Sat, 2 Sep 2023 10:43:16 -0400
Subject: [PATCH 38/40] add a public API to register data types dynamically
 (#532)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 dpdata/system.py | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/dpdata/system.py b/dpdata/system.py
index daf0c885..8af8e4a5 100644
--- a/dpdata/system.py
+++ b/dpdata/system.py
@@ -954,6 +954,17 @@ def pick_by_amber_mask(self, param, maskstr, pass_coords=False, nopbc=None):
             idx = pick_by_amber_mask(parm, maskstr)
             return self.pick_atom_idx(idx, nopbc=nopbc)
 
+    @classmethod
+    def register_data_type(cls, *data_type: Tuple[DataType]):
+        """Register data type.
+
+        Parameters
+        ----------
+        *data_type : tuple[DataType]
+            data type to be regiestered
+        """
+        cls.DTYPES = cls.DTYPES + tuple(data_type)
+
 
 def get_cell_perturb_matrix(cell_pert_fraction):
     if cell_pert_fraction < 0:
@@ -1599,9 +1610,9 @@ def to_format(self, *args, **kwargs):
         setattr(MultiSystems, method, get_func(formatcls))
 
     # at this point, System.DTYPES and LabeledSystem.DTYPES has been initialized
-    System.DTYPES = System.DTYPES + get_data_types(labeled=False)
-    LabeledSystem.DTYPES = LabeledSystem.DTYPES + get_data_types(labeled=False)
-    LabeledSystem.DTYPES = LabeledSystem.DTYPES + get_data_types(labeled=True)
+    System.register_data_type(*get_data_types(labeled=False))
+    LabeledSystem.register_data_type(*get_data_types(labeled=False))
+    LabeledSystem.register_data_type(*get_data_types(labeled=True))
 
 
 add_format_methods()

From 002e0734580c3524b944e539fd38ce9f2f5ee99f Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 6 Sep 2023 09:09:05 +0800
Subject: [PATCH 39/40] [pre-commit.ci] pre-commit autoupdate (#534)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

<!--pre-commit.ci start-->
updates:
- [github.com/astral-sh/ruff-pre-commit: v0.0.286 →
v0.0.287](https://github.com/astral-sh/ruff-pre-commit/compare/v0.0.286...v0.0.287)
<!--pre-commit.ci end-->

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index fb17c270..abaa911e 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -23,7 +23,7 @@ repos:
     -   id: black-jupyter
 -   repo: https://github.com/astral-sh/ruff-pre-commit
     # Ruff version.
-    rev: v0.0.286
+    rev: v0.0.287
     hooks:
     - id: ruff
       args: ["--fix"]

From ec7985b2854c81b3b25573fd2ad4c21d25945634 Mon Sep 17 00:00:00 2001
From: link89 <xuweihong.cn@gmail.com>
Date: Wed, 6 Sep 2023 09:13:33 +0800
Subject: [PATCH 40/40] fix cp2k 2023 output (#533)

fix https://github.com/deepmodeling/dpdata/issues/508,
https://github.com/deepmodeling/dpdata/issues/507

---------

Signed-off-by: link89 <xuweihong.cn@gmail.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 dpdata/cp2k/output.py | 26 ++++++++++----------------
 1 file changed, 10 insertions(+), 16 deletions(-)

diff --git a/dpdata/cp2k/output.py b/dpdata/cp2k/output.py
index 7b3aeb64..0b08c51e 100644
--- a/dpdata/cp2k/output.py
+++ b/dpdata/cp2k/output.py
@@ -409,24 +409,18 @@ def get_frames(fname):
                 cell.append(ii.split()[4:7])
             if "Atomic kind:" in ii:
                 atom_symbol_list.append(ii.split()[3])
-            if "Atom  Kind  Element" in ii:
+
+            # beginning of coords block
+            if "Atom  Kind  Element" in ii or "Atom Kind Element" in ii:
                 coord_flag = True
-                coord_idx = idx
+            # parse coords lines
+            elif coord_flag:
+                if ii == "\n":
+                    coord_flag = len(coord) == 0  # skip empty line at the beginning
+                else:
+                    coord.append(ii.split()[4:7])
+                    atom_symbol_idx_list.append(ii.split()[1])
 
-            # get the coord block info
-            if coord_flag:
-                if idx == coord_idx + 1:
-                    if ii == "\n":
-                        pass
-                    else:
-                        coord.append(ii.split()[4:7])
-                        atom_symbol_idx_list.append(ii.split()[1])
-                if idx > coord_idx + 1:
-                    if ii == "\n":
-                        coord_flag = False
-                    else:
-                        coord.append(ii.split()[4:7])
-                        atom_symbol_idx_list.append(ii.split()[1])
             if "ENERGY|" in ii:
                 energy = ii.split()[8]
             if " Atom   Kind " in ii: