From df8d69cd7614df8e391e5514eb309d8f982a76d6 Mon Sep 17 00:00:00 2001 From: rihi <19492038+rihi@users.noreply.github.com> Date: Wed, 31 Jul 2024 15:52:30 +0200 Subject: [PATCH 1/4] Fix string literal escaping --- decompiler/backend/cexpressiongenerator.py | 24 +++++++++++++++------- decompiler/backend/codevisitor.py | 6 +++++- 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/decompiler/backend/cexpressiongenerator.py b/decompiler/backend/cexpressiongenerator.py index 892f52212..6bc9c73d5 100644 --- a/decompiler/backend/cexpressiongenerator.py +++ b/decompiler/backend/cexpressiongenerator.py @@ -163,6 +163,19 @@ class CExpressionGenerator(DataflowObjectVisitorInterface): # OperationType.adc: "adc", } + ESCAPE_TABLE = str.maketrans({ + '\\': r'\\', + '"': r'\"', + "'": r"\'", + '\n': r'\n', + '\r': r'\r', + '\t': r'\t', + '\v': r'\v', + '\b': r'\b', + '\f': r'\f', + '\0': r'\0' + }) + def visit_unknown_expression(self, expr: expressions.UnknownExpression) -> str: """Return the error message for this UnknownExpression.""" return expr.msg @@ -197,16 +210,16 @@ def visit_constant_composition(self, expr: expressions.ConstantComposition): """Visit a Constant Array.""" match expr.type.type: case CustomType(text="wchar16") | CustomType(text="wchar32"): - val = "".join([x.value for x in expr.value]) + val = "".join([x.value for x in expr.value]).translate(self.ESCAPE_TABLE) return f'L"{val}"' if len(val) <= MAX_GLOBAL_INIT_LENGTH else f'L"{val[:MAX_GLOBAL_INIT_LENGTH]}..."' case Integer(size=8, signed=False): val = "".join([f"\\x{x.value:02X}" for x in expr.value][:MAX_GLOBAL_INIT_LENGTH]) return f'"{val}"' if len(val) <= MAX_GLOBAL_INIT_LENGTH else f'"{val[:MAX_GLOBAL_INIT_LENGTH]}..."' case Integer(8): - val = "".join([x.value for x in expr.value][:MAX_GLOBAL_INIT_LENGTH]) + val = "".join([x.value for x in expr.value][:MAX_GLOBAL_INIT_LENGTH]).translate(self.ESCAPE_TABLE) return f'"{val}"' if len(val) <= MAX_GLOBAL_INIT_LENGTH else f'"{val[:MAX_GLOBAL_INIT_LENGTH]}..."' case _: - return f'{", ".join([self.visit(x) for x in expr.value])}' # Todo: Should we print every member? Could get pretty big + return f'{", ".join([self.visit(x) for x in expr.value]).translate(self.ESCAPE_TABLE)}' # Todo: Should we print every member? Could get pretty big def visit_variable(self, expr: expressions.Variable) -> str: """Return a string representation of the variable.""" @@ -404,10 +417,7 @@ def _format_string_literal(constant: expressions.Constant) -> str: string_representation = str(constant) if string_representation.startswith('"') and string_representation.endswith('"'): string_representation = str(constant)[1:-1] - if '"' in string_representation: - escaped = string_representation.replace('"', '\\"') - return f'"{escaped}"' - return f"{constant}" + return f"{string_representation.translate(CExpressionGenerator.ESCAPE_TABLE)}" @staticmethod def format_variables_declaration(var_type: Type, var_names: list[str]) -> str: diff --git a/decompiler/backend/codevisitor.py b/decompiler/backend/codevisitor.py index ad064012b..11fa56f9b 100644 --- a/decompiler/backend/codevisitor.py +++ b/decompiler/backend/codevisitor.py @@ -251,7 +251,11 @@ def _condition_string(self, condition: ConditionVar) -> str: def _format_integer_literal(self, type_info: Integer, value: int) -> str: """Format the integer based on the codegenerators settings.""" - byte_format_handler = {"char": lambda x: f"'{chr(x)}'", "hex": lambda x: f"{hex(x)}", "dec": lambda x: f"{x}"} + byte_format_handler = { + "char": lambda x: f"'{chr(x).translate(self.ESCAPE_TABLE)}'", + "hex": lambda x: f"{hex(x)}", + "dec": lambda x: f"{x}", + } if self._possibly_char_in_ascii_range(type_info, value): if value_handler := byte_format_handler.get(self._byte_format, None): if hint_handler := byte_format_handler.get(self._byte_format_hint, None): From f9b4ae5c422b480dda6280bad4c7912c007fc852 Mon Sep 17 00:00:00 2001 From: rihi <19492038+rihi@users.noreply.github.com> Date: Wed, 31 Jul 2024 15:52:52 +0200 Subject: [PATCH 2/4] Run Black obfuscation --- decompiler/backend/cexpressiongenerator.py | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/decompiler/backend/cexpressiongenerator.py b/decompiler/backend/cexpressiongenerator.py index 6bc9c73d5..aa1795c19 100644 --- a/decompiler/backend/cexpressiongenerator.py +++ b/decompiler/backend/cexpressiongenerator.py @@ -163,18 +163,9 @@ class CExpressionGenerator(DataflowObjectVisitorInterface): # OperationType.adc: "adc", } - ESCAPE_TABLE = str.maketrans({ - '\\': r'\\', - '"': r'\"', - "'": r"\'", - '\n': r'\n', - '\r': r'\r', - '\t': r'\t', - '\v': r'\v', - '\b': r'\b', - '\f': r'\f', - '\0': r'\0' - }) + ESCAPE_TABLE = str.maketrans( + {"\\": r"\\", '"': r"\"", "'": r"\'", "\n": r"\n", "\r": r"\r", "\t": r"\t", "\v": r"\v", "\b": r"\b", "\f": r"\f", "\0": r"\0"} + ) def visit_unknown_expression(self, expr: expressions.UnknownExpression) -> str: """Return the error message for this UnknownExpression.""" From 641f24347b4debcf95c9d6dd33719547d9ffa49c Mon Sep 17 00:00:00 2001 From: rihi <19492038+rihi@users.noreply.github.com> Date: Wed, 31 Jul 2024 16:12:27 +0200 Subject: [PATCH 3/4] Revert _format_string_literal back --- decompiler/backend/cexpressiongenerator.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/decompiler/backend/cexpressiongenerator.py b/decompiler/backend/cexpressiongenerator.py index aa1795c19..78afa82f7 100644 --- a/decompiler/backend/cexpressiongenerator.py +++ b/decompiler/backend/cexpressiongenerator.py @@ -408,7 +408,10 @@ def _format_string_literal(constant: expressions.Constant) -> str: string_representation = str(constant) if string_representation.startswith('"') and string_representation.endswith('"'): string_representation = str(constant)[1:-1] - return f"{string_representation.translate(CExpressionGenerator.ESCAPE_TABLE)}" + if '"' in string_representation: + escaped = string_representation.replace('"', '\\"').translate(CExpressionGenerator.ESCAPE_TABLE) + return f'"{escaped}"' + return f"{constant}" @staticmethod def format_variables_declaration(var_type: Type, var_names: list[str]) -> str: From a096008096f08e6b00fcb17a8c0e944f79cbcb20 Mon Sep 17 00:00:00 2001 From: rihi <19492038+rihi@users.noreply.github.com> Date: Wed, 31 Jul 2024 16:23:20 +0200 Subject: [PATCH 4/4] Revert _format_string_literal back 2 --- decompiler/backend/cexpressiongenerator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/decompiler/backend/cexpressiongenerator.py b/decompiler/backend/cexpressiongenerator.py index 78afa82f7..d6eda3615 100644 --- a/decompiler/backend/cexpressiongenerator.py +++ b/decompiler/backend/cexpressiongenerator.py @@ -409,7 +409,7 @@ def _format_string_literal(constant: expressions.Constant) -> str: if string_representation.startswith('"') and string_representation.endswith('"'): string_representation = str(constant)[1:-1] if '"' in string_representation: - escaped = string_representation.replace('"', '\\"').translate(CExpressionGenerator.ESCAPE_TABLE) + escaped = string_representation.replace('"', '\\"') return f'"{escaped}"' return f"{constant}"