Skip to content

Commit

Permalink
BUG: Fix UnboundLocalError on malformed pdf (#2619)
Browse files Browse the repository at this point in the history
Closes #2617

Co-authored-by: jules <jules@harfanglab.fr>
  • Loading branch information
farjasju and jules authored May 2, 2024
1 parent d9bf67f commit e92b20e
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 1 deletion.
3 changes: 2 additions & 1 deletion pypdf/_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -398,7 +398,8 @@ def get_object(
self.stream.seek(m.start(0) + 1)
idnum, generation = self.read_object_header(self.stream)
else:
idnum = -1 # exception will be raised below
idnum = -1
generation = -1 # exception will be raised below
if idnum != indirect_reference.idnum and self.xref_index:
# Xref table probably had bad indexes due to not being zero-indexed
if self.strict:
Expand Down
14 changes: 14 additions & 0 deletions tests/test_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -1516,3 +1516,17 @@ def test_truncated_xref(caplog):
name = "iss2575.pdf"
PdfReader(BytesIO(get_data_from_url(url, name=name)))
assert "Invalid/Truncated xref table. Rebuilding it." in caplog.text


@pytest.mark.enable_socket()
def test_damaged_pdf():
url = "https://github.com/py-pdf/pypdf/files/15186107/malformed_pdf.pdf"
name = "malformed_pdf.pdf"
reader = PdfReader(BytesIO(get_data_from_url(url, name=name)), strict=False)
len(reader.pages)
strict_reader = PdfReader(BytesIO(get_data_from_url(url, name=name)), strict=True)
with pytest.raises(PdfReadError) as exc:
len(strict_reader.pages)
assert (
exc.value.args[0] == "Expected object ID (21 0) does not match actual (-1 -1)."
)

0 comments on commit e92b20e

Please sign in to comment.