From 7c5fe87ef63b85823fc44c9f21cbb055d47be181 Mon Sep 17 00:00:00 2001
From: Stefan <96178532+stefan6419846@users.noreply.github.com>
Date: Thu, 19 Dec 2024 19:29:18 +0100
Subject: [PATCH 1/3] ROB: Ignore odd-length strings when processing cmap lines
 (#3009)

Closes #2216.
---
 pypdf/_cmap.py     |  6 +++++-
 tests/test_cmap.py | 11 +++++++++++
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/pypdf/_cmap.py b/pypdf/_cmap.py
index e6c4bce88..de21b3429 100644
--- a/pypdf/_cmap.py
+++ b/pypdf/_cmap.py
@@ -1,3 +1,4 @@
+import binascii
 from binascii import unhexlify
 from math import ceil
 from typing import Any, Dict, List, Tuple, Union, cast
@@ -304,7 +305,10 @@ def process_cm_line(
     elif b"endbfchar" in line:
         process_char = False
     elif process_rg:
-        multiline_rg = parse_bfrange(line, map_dict, int_entry, multiline_rg)
+        try:
+            multiline_rg = parse_bfrange(line, map_dict, int_entry, multiline_rg)
+        except binascii.Error as error:
+            logger_warning(f"Skipping broken line {line!r}: {error}", __name__)
     elif process_char:
         parse_bfchar(line, map_dict, int_entry)
     return process_rg, process_char, multiline_rg
diff --git a/tests/test_cmap.py b/tests/test_cmap.py
index 55df3f1f2..e80842460 100644
--- a/tests/test_cmap.py
+++ b/tests/test_cmap.py
@@ -281,3 +281,14 @@ def test_iss2966():
     reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
     assert "Lorem ipsum dolor sit amet" in reader.pages[0].extract_text()
 
+
+@pytest.mark.enable_socket
+def test_binascii_odd_length_string(caplog):
+    """Tests for #2216"""
+    url = "https://github.com/user-attachments/files/18199642/iss2216.pdf"
+    name = "iss2216.pdf"
+    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
+
+    page = reader.pages[0]
+    assert "\n(Many other theorems may\n" in page.extract_text()
+    assert "Skipping broken line b'143f   143f   10300': Odd-length string\n" in caplog.text

From 83083bb15596af05fe9564aaf5e937a8f293e0db Mon Sep 17 00:00:00 2001
From: Stefan <96178532+stefan6419846@users.noreply.github.com>
Date: Thu, 19 Dec 2024 20:11:14 +0100
Subject: [PATCH 2/3] BUG: Handle chained colorspace for inline images when no
 filter is set (#3008)

Closes #2998.
---
 pypdf/generic/_data_structures.py |  2 ++
 tests/test_images.py              | 11 +++++++++++
 2 files changed, 13 insertions(+)

diff --git a/pypdf/generic/_data_structures.py b/pypdf/generic/_data_structures.py
index 3313ab8da..f02fe4988 100644
--- a/pypdf/generic/_data_structures.py
+++ b/pypdf/generic/_data_structures.py
@@ -1343,6 +1343,8 @@ def _read_inline_image(self, stream: StreamType) -> Dict[str, Any]:
             data = extract_inline_DCT(stream)
         elif filtr == "not set":
             cs = settings.get("/CS", "")
+            if isinstance(cs, list):
+                cs = cs[0]
             if "RGB" in cs:
                 lcs = 3
             elif "CMYK" in cs:
diff --git a/tests/test_images.py b/tests/test_images.py
index c0308eb3e..7d415f6f3 100644
--- a/tests/test_images.py
+++ b/tests/test_images.py
@@ -473,3 +473,14 @@ def test_4bits_images(caplog):
     name = "iss2411.png"
     img = Image.open(BytesIO(get_data_from_url(url, name=name)))
     assert image_similarity(reader.pages[0].images[1].image, img) == 1.0
+
+
+@pytest.mark.enable_socket
+def test_no_filter_with_colorspace_as_list():
+    """Tests for #2998"""
+    url = "https://github.com/user-attachments/files/18058571/9bf7a2e2-72c8-4ac1-b8ae-164df16c8cef.pdf"
+    name = "iss2998.pdf"
+    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
+
+    page = reader.pages[0]
+    page.images.items()

From 4f2cd3439c6f074c515fe347ef92ba0bb44a9e37 Mon Sep 17 00:00:00 2001
From: Stefan <96178532+stefan6419846@users.noreply.github.com>
Date: Thu, 19 Dec 2024 20:15:27 +0100
Subject: [PATCH 3/3] ROB: Fall back to non-Adobe Ascii85 format for missing
 end markers (#3007)

Closes #2996.
---
 pypdf/filters.py      |  8 +++++++-
 tests/test_filters.py | 26 ++++++++++++++++++++++++++
 2 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/pypdf/filters.py b/pypdf/filters.py
index 517d6aac3..a95b96a54 100644
--- a/pypdf/filters.py
+++ b/pypdf/filters.py
@@ -446,7 +446,13 @@ def decode(
         if isinstance(data, str):
             data = data.encode()
         data = data.strip(WHITESPACES_AS_BYTES)
-        return a85decode(data, adobe=True, ignorechars=WHITESPACES_AS_BYTES)
+        try:
+            return a85decode(data, adobe=True, ignorechars=WHITESPACES_AS_BYTES)
+        except ValueError as error:
+            if error.args[0] == "Ascii85 encoded byte sequences must end with b'~>'":
+                logger_warning("Ignoring missing Ascii85 end marker.", __name__)
+                return a85decode(data, adobe=False, ignorechars=WHITESPACES_AS_BYTES)
+            raise
 
 
 class DCTDecode:
diff --git a/tests/test_filters.py b/tests/test_filters.py
index 23b90cca8..90a119844 100644
--- a/tests/test_filters.py
+++ b/tests/test_filters.py
@@ -594,3 +594,29 @@ def test_flate_decode_with_image_mode_1__whitespace_at_end_of_lookup():
     name = "issue2331.pdf"
     reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
     reader.pages[0].images[0]
+
+
+@pytest.mark.enable_socket
+def test_ascii85decode__invalid_end__recoverable(caplog):
+    """From #2996"""
+    url = "https://github.com/user-attachments/files/18050808/1af7d56a-5c8c-4914-85b3-b2536a5525cd.pdf"
+    name = "issue2996.pdf"
+    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
+
+    page = reader.pages[1]
+    assert page.extract_text() == ""
+    assert "Ignoring missing Ascii85 end marker." in caplog.text
+
+
+def test_ascii85decode__non_recoverable(caplog):
+    # Without our custom handling, this would complain about the final `~>` being missing.
+    data = "äöüß"
+    with pytest.raises(ValueError, match="Non-Ascii85 digit found: Ã"):
+        ASCII85Decode.decode(data)
+    assert "Ignoring missing Ascii85 end marker." in caplog.text
+    caplog.clear()
+
+    data += "~>"
+    with pytest.raises(ValueError, match="Non-Ascii85 digit found: Ã"):
+        ASCII85Decode.decode(data)
+    assert caplog.text == ""