diff --git a/src/kohlrahbi/unfoldedahb/unfoldedahbline.py b/src/kohlrahbi/unfoldedahb/unfoldedahbline.py index e1d645fd..303598ea 100644 --- a/src/kohlrahbi/unfoldedahb/unfoldedahbline.py +++ b/src/kohlrahbi/unfoldedahb/unfoldedahbline.py @@ -81,6 +81,7 @@ class UnfoldedAhbLine(BaseModel): segment_name: str # Ansprechpartner segment_gruppe: str | None # SG3 segment: str | None # CTA + segment_id: str | None = None # 00009 datenelement: str | None # 3055 code: str | None # IC qualifier: str | None # Name vom Ansprechpartner diff --git a/src/kohlrahbi/unfoldedahb/unfoldedahbtable.py b/src/kohlrahbi/unfoldedahb/unfoldedahbtable.py index d5881146..6c2be21d 100644 --- a/src/kohlrahbi/unfoldedahb/unfoldedahbtable.py +++ b/src/kohlrahbi/unfoldedahb/unfoldedahbtable.py @@ -5,6 +5,7 @@ import copy import json import re +from functools import lru_cache from pathlib import Path from uuid import uuid4 @@ -26,6 +27,7 @@ from kohlrahbi.unfoldedahb.unfoldedahbtablemetadata import UnfoldedAhbTableMetaData _segment_group_pattern = re.compile(r"^SG\d+$") +_segment_id_pattern = re.compile(r"^\d{5}$") def _lines_are_equal_when_ignoring_guid(line1: AhbLine, line2: AhbLine) -> bool: @@ -39,6 +41,24 @@ def _lines_are_equal_when_ignoring_guid(line1: AhbLine, line2: AhbLine) -> bool: return line1_copy == line2_copy +@lru_cache +def _split_data_element_and_segment_id(value: str | None) -> tuple[str | None, str | None]: + """ + returns the data element id and segment id + """ + if value is None: + return None, None + datenelement_id: str | None + segment_id: str | None + if _segment_id_pattern.match(value): + datenelement_id = None + segment_id = value + else: + datenelement_id = value + segment_id = None + return datenelement_id, segment_id + + def _keep_guids_of_unchanged_lines_stable( updated_ahb: FlatAnwendungshandbuch, existing_ahb: FlatAnwendungshandbuch ) -> None: @@ -159,7 +179,8 @@ def from_ahb_table(cls, ahb_table: AhbTable, pruefi: str) -> "UnfoldedAhb": segment_name=current_section_name, segment_gruppe=row["Segment Gruppe"] or None, segment=row["Segment"] or None, - datenelement=row["Datenelement"] or None, + datenelement=_split_data_element_and_segment_id(row["Datenelement"])[0], + segment_id=_split_data_element_and_segment_id(row["Datenelement"])[1], code=value_pool_entry, qualifier="", beschreibung=description, @@ -284,6 +305,8 @@ def _is_just_segment(ahb_row: pd.Series) -> bool: # type:ignore[type-arg] and not ahb_row["Datenelement"] ): return True + if ahb_row["Datenelement"] is not None and _segment_id_pattern.match(ahb_row["Datenelement"]): + return True return False @staticmethod @@ -325,6 +348,7 @@ def convert_to_flat_ahb(self) -> FlatAnwendungshandbuch: segment_group_key=unfolded_ahb_line.segment_gruppe, segment_code=unfolded_ahb_line.segment, data_element=unfolded_ahb_line.datenelement, + segment_id=unfolded_ahb_line.segment_id, value_pool_entry=unfolded_ahb_line.code, name=unfolded_ahb_line.beschreibung or unfolded_ahb_line.qualifier, ahb_expression=unfolded_ahb_line.bedingung_ausdruck, diff --git a/unittests/test-files/docx_files/UTILMDAHBStrom-informatorischeLesefassung1.2aKonsolidierteLesefassungmitFehlerkorrekturenStand05.04.2024_99991231_20240405.docx b/unittests/test-files/docx_files/UTILMDAHBStrom-informatorischeLesefassung1.2aKonsolidierteLesefassungmitFehlerkorrekturenStand05.04.2024_99991231_20240405.docx new file mode 100644 index 00000000..c995a286 Binary files /dev/null and b/unittests/test-files/docx_files/UTILMDAHBStrom-informatorischeLesefassung1.2aKonsolidierteLesefassungmitFehlerkorrekturenStand05.04.2024_99991231_20240405.docx differ diff --git a/unittests/test_ahb_sub_table.py b/unittests/test_ahb_sub_table.py index 2e703f43..76966732 100644 --- a/unittests/test_ahb_sub_table.py +++ b/unittests/test_ahb_sub_table.py @@ -5,10 +5,12 @@ from pathlib import Path import docx +import pytest from docx.table import Table from kohlrahbi.ahbtable.ahbsubtable import AhbSubTable -from kohlrahbi.read_functions import get_all_paragraphs_and_tables +from kohlrahbi.read_functions import get_ahb_table, get_all_paragraphs_and_tables +from kohlrahbi.unfoldedahb import UnfoldedAhb class TestAhbSubTable: @@ -41,3 +43,50 @@ def test_from_table(self) -> None: assert isinstance(ahb_sub_table, AhbSubTable) else: raise TypeError("You did not pass a docx table instance.") + + @pytest.mark.parametrize( + "docx_path, segment_id, segment_code", + [ + pytest.param( + Path(__file__).parent + / Path( + # pylint: disable=line-too-long + "test-files/docx_files/UTILMDAHBStrom-informatorischeLesefassung1.2aKonsolidierteLesefassungmitFehlerkorrekturenStand05.04.2024_99991231_20240405.docx" + ), + "00003", + "UNH", + ), + pytest.param( + Path(__file__).parent + / Path( + # pylint: disable=line-too-long + "test-files/docx_files/UTILMDAHBStrom-informatorischeLesefassung1.2aKonsolidierteLesefassungmitFehlerkorrekturenStand05.04.2024_99991231_20240405.docx" + ), + "00004", + "BGM", + ), + pytest.param( + Path(__file__).parent + / Path( + # pylint: disable=line-too-long + "test-files/docx_files/UTILMDAHBStrom-informatorischeLesefassung1.2aKonsolidierteLesefassungmitFehlerkorrekturenStand05.04.2024_99991231_20240405.docx" + ), + "00540", + "UNT", + ), + ], + ) + def test_segment_id_parsing(self, docx_path: Path, segment_id: str, segment_code: str) -> None: + """ + https://github.com/Hochfrequenz/kohlrahbi/issues/304 + """ + assert docx_path.exists() + doc = docx.Document(str(docx_path)) # Creating word reader object. + ahb_table = get_ahb_table(document=doc, pruefi="55109") + assert ahb_table is not None + unfolded_ahb = UnfoldedAhb.from_ahb_table(ahb_table=ahb_table, pruefi="55109") + assert unfolded_ahb is not None + flat_ahb = unfolded_ahb.convert_to_flat_ahb() + assert flat_ahb is not None + assert any(l for l in flat_ahb.lines if l.segment_id is not None) + assert any(l for l in flat_ahb.lines if l.segment_id == segment_id and l.segment_code == segment_code)