py-pdf · pubpub-zz · Dec 23, 2024 · Dec 19, 2024 · Dec 19, 2024 · Dec 19, 2024
diff --git a/pypdf/_page.py b/pypdf/_page.py
@@ -1822,7 +1822,7 @@ def _handle_tj(
 
         return text, rtl_dir, actual_str_size
 
-    def _extract_text(
+    def _extract_text(  # noqa: C901, PLR0915  # TODO: Refactor for less complexity.
         self,
         obj: Any,
         pdf: Any,
@@ -1980,12 +1980,12 @@ def process_operation(operator: bytes, operands: List[Any]) -> None:
                 memo_tm = tm_matrix.copy()
             # Table 5.2 page 398
             elif operator == b"Tz":
-                char_scale = float(operands[0]) / 100.0
+                char_scale = float(operands[0]) / 100.0 if operands else 1.0
             elif operator == b"Tw":
-                space_scale = 1.0 + float(operands[0])
+                space_scale = 1.0 + float(operands[0] if operands else 0.0)
             elif operator == b"TL":
                 scale_x = math.sqrt(tm_matrix[0]**2 + tm_matrix[2]**2)
-                TL = float(operands[0]) * font_size * scale_x
+                TL = float(operands[0] if operands else 0.0) * font_size * scale_x
             elif operator == b"Tf":
                 if text != "":
                     output += text  # .translate(cmap)

diff --git a/tests/test_text_extraction.py b/tests/test_text_extraction.py
@@ -272,3 +272,15 @@ def test_infinite_loop_arrays():
     page = reader.pages[0]
     extracted = page.extract_text()
     assert "RNA structure comparison" in extracted
+
+
+@pytest.mark.enable_socket
+def test_tz_with_no_operands():
+    """Tests for #2975"""
+    url = "https://github.com/user-attachments/files/17974120/9E5E080E-C8DB-4A6B-822B-9A67DC04E526-120438.pdf"
+    name = "iss2975.pdf"
+    data = get_data_from_url(url, name=name)
+
+    reader = PdfReader(BytesIO(data))
+    page = reader.pages[1]
+    assert "\nThankyouforyourattentiontothismatter.\n" in page.extract_text()