From 2a63b4fc19a464d2111683edc9aa1fa242edd20b Mon Sep 17 00:00:00 2001 From: Petr Pucil Date: Sun, 15 Sep 2024 10:53:49 +0200 Subject: [PATCH] Adopt a system of exceptions derived from KaitaiStructError * Resolves https://github.com/kaitai-io/kaitai_struct_python_runtime/issues/40 * Resolves https://github.com/kaitai-io/kaitai_struct_python_runtime/issues/41 As explained in https://github.com/kaitai-io/kaitai_struct_python_runtime/issues/40, this makes it easy to handle to all errors caused by invalid input data by using `kaitaistruct.KaitaiStructError` in a `try..except` statement. Three new exception types were added: `InvalidArgumentError`, `EndOfStreamError` and `NoTerminatorFoundError`. All changes to raised exceptions in this commit should be backward compatible, as we are only moving to subclasses of previously raised exceptions. `NoTerminatorFoundError` is a subclass of `EndOfStreamError` to address the suggestion in https://github.com/kaitai-io/kaitai_struct_python_runtime/issues/41. Note that the `process_rotate_left` method could only raise `NotImplementedError` if someone called it manually (because KSC-generated parsers hardcode `group_size` to `1`, see https://github.com/kaitai-io/kaitai_struct_compiler/blob/c23ec2ca88d84042edba76f70c1f003d062b7585/shared/src/main/scala/io/kaitai/struct/languages/PythonCompiler.scala#L211), so it makes no sense to raise an exception derived `KaitaiStructError` (it's a programmer error, not a user input error). Most of our runtime libraries in other languages don't even have this `group_size` parameter, and if they do (C#, Java, Ruby), they also throw the equivalent of `NotImplementedError` (except the JavaScript runtime, which throws a plain string, which is _possible_ in JS but considered bad practice, so we should fix this). --- kaitaistruct.py | 75 +++++++++++++++++++++++++++++++++++++------------ 1 file changed, 57 insertions(+), 18 deletions(-) diff --git a/kaitaistruct.py b/kaitaistruct.py index 2497502..e774dba 100644 --- a/kaitaistruct.py +++ b/kaitaistruct.py @@ -128,6 +128,9 @@ def is_eof(self): return self._io.tell() >= self.size() def seek(self, n): + if n < 0: + raise InvalidArgumentError("cannot seek to invalid position %d" % (n,)) + if self.bits_write_mode: self.write_align_to_byte() else: @@ -376,7 +379,7 @@ def read_bytes(self, n): def _read_bytes_not_aligned(self, n): if n < 0: - raise ValueError( + raise InvalidArgumentError( "requested invalid %d amount of bytes" % (n,) ) @@ -404,9 +407,10 @@ def _read_bytes_not_aligned(self, n): if not is_satisfiable: # noinspection PyUnboundLocalVariable - raise EOFError( + raise EndOfStreamError( "requested %d bytes, but only %d bytes available" % - (n, num_bytes_available) + (n, num_bytes_available), + n, num_bytes_available ) # noinspection PyUnboundLocalVariable @@ -424,10 +428,7 @@ def read_bytes_term(self, term, include_term, consume_term, eos_error): c = self._io.read(1) if not c: if eos_error: - raise Exception( - "end of stream reached, but no terminator %d found" % - (term,) - ) + raise NoTerminatorFoundError(term_byte, 0) return bytes(r) @@ -448,10 +449,7 @@ def read_bytes_term_multi(self, term, include_term, consume_term, eos_error): c = self._io.read(unit_size) if len(c) < unit_size: if eos_error: - raise Exception( - "end of stream reached, but no terminator %s found" % - (repr(term),) - ) + raise NoTerminatorFoundError(term, len(c)) r += c return bytes(r) @@ -523,9 +521,10 @@ def _ensure_bytes_left_to_write(self, n, pos): num_bytes_left = full_size - pos if n > num_bytes_left: - raise EOFError( + raise EndOfStreamError( "requested to write %d bytes, but only %d bytes left in the stream" % - (n, num_bytes_left) + (n, num_bytes_left), + n, num_bytes_left ) # region Integer numbers @@ -782,7 +781,7 @@ def process_xor_many(data, key): @staticmethod def process_rotate_left(data, amount, group_size): if group_size != 1: - raise Exception( + raise NotImplementedError( "unable to rotate group of %d bytes yet" % (group_size,) ) @@ -872,15 +871,55 @@ def _write_back(self, parent): class KaitaiStructError(Exception): - """Common ancestor for all error originating from Kaitai Struct usage. - Stores KSY source path, pointing to an element supposedly guilty of - an error. + """Common ancestor for all errors originating from correct Kaitai Struct + usage (i.e. errors that indicate a problem with user input, not errors + indicating incorrect usage that are not meant to be caught but fixed in the + application code). Use this exception type in the `except` clause if you + want to handle all parse errors and serialization errors. + + If available, the `src_path` attribute will contain the KSY source path + pointing to the element where the error occurred. If it is not available, + `src_path` will be `None`. """ def __init__(self, msg, src_path): - super(KaitaiStructError, self).__init__("%s: %s" % (src_path, msg)) + super(KaitaiStructError, self).__init__(("" if src_path is None else src_path + ": ") + msg) self.src_path = src_path +class InvalidArgumentError(KaitaiStructError, ValueError): + """Indicates that an invalid argument value was received (like `ValueError`), + but used in places where this might indicate invalid user input and + therefore represents a parse error or serialization error. + """ + def __init__(self, msg): + super(InvalidArgumentError, self).__init__(msg, None) + + +class EndOfStreamError(KaitaiStructError, EOFError): + """Read or write beyond end of stream. Provides the `bytes_needed` (number + of bytes requested to read or write) and `bytes_available` (number of bytes + remaining in the stream) attributes. + """ + def __init__(self, msg, bytes_needed, bytes_available): + super(EndOfStreamError, self).__init__(msg, None) + self.bytes_needed = bytes_needed + self.bytes_available = bytes_available + + +class NoTerminatorFoundError(EndOfStreamError): + """Special type of `EndOfStreamError` that occurs when end of stream is + reached before the required terminator is found. If you want to tolerate a + missing terminator, you can specify `eos-error: false` in the KSY + specification, in which case the end of stream will be considered a valid + end of field and this error will no longer be raised. + + The `term` attribute contains a `bytes` object with the searched terminator. + """ + def __init__(self, term, bytes_available): + super(NoTerminatorFoundError, self).__init__("end of stream reached, but no terminator %r found" % (term,), len(term), bytes_available) + self.term = term + + class UndecidedEndiannessError(KaitaiStructError): """Error that occurs when default endianness should be decided with switch, but nothing matches (although using endianness expression