diff --git a/decompiler/pipeline/controlflowanalysis/readability_based_refinement.py b/decompiler/pipeline/controlflowanalysis/readability_based_refinement.py index 7a334fd44..c48621eb3 100644 --- a/decompiler/pipeline/controlflowanalysis/readability_based_refinement.py +++ b/decompiler/pipeline/controlflowanalysis/readability_based_refinement.py @@ -26,11 +26,11 @@ def _get_potential_guarded_do_while_loops(ast: AbstractSyntaxTree) -> tuple(Unio def remove_guarded_do_while(ast: AbstractSyntaxTree): - """ Removes a if statement which guards a do-while loop/while loop when: - -> there is nothing in between the if-node and the do-while-node/while-node - -> the if-node has only one branch (true branch) - -> the condition of the branch is the same as the condition of the do-while-node - Replacement is a WhileLoop, otherwise the control flow would not be correct + """Removes a if statement which guards a do-while loop/while loop when: + -> there is nothing in between the if-node and the do-while-node/while-node + -> the if-node has only one branch (true branch) + -> the condition of the branch is the same as the condition of the do-while-node + Replacement is a WhileLoop, otherwise the control flow would not be correct """ for do_while_node, condition_node in _get_potential_guarded_do_while_loops(ast): if condition_node.false_branch: @@ -43,40 +43,51 @@ def remove_guarded_do_while(ast: AbstractSyntaxTree): class WhileLoopReplacer: """Convert WhileLoopNodes to ForLoopNodes depending on the configuration. - -> keep_empty_for_loops will keep empty for-loops in the code - -> force_for_loops will transform every while-loop into a for-loop, worst case with empty declaration/modification statement - -> forbidden_condition_types_in_simple_for_loops will not transform trivial for-loop candidates (with only one condition) into for-loops - if the operator matches one of the forbidden operator list - -> max_condition_complexity_for_loop_recovery will transform for-loop candidates only into for-loops if the condition complexity is - less/equal then the threshold - -> max_modification_complexity_for_loop_recovery will transform for-loop candidates only into for-loops if the modification complexity is - less/equal then the threshold + -> keep_empty_for_loops will keep empty for-loops in the code + -> force_for_loops will transform every while-loop into a for-loop, worst case with empty declaration/modification statement + -> forbidden_condition_types_in_simple_for_loops will not transform trivial for-loop candidates (with only one condition) into for-loops + if the operator matches one of the forbidden operator list + -> max_condition_complexity_for_loop_recovery will transform for-loop candidates only into for-loops if the condition complexity is + less/equal then the threshold + -> max_modification_complexity_for_loop_recovery will transform for-loop candidates only into for-loops if the modification complexity is + less/equal then the threshold """ def __init__(self, ast: AbstractSyntaxTree, options: Options): self._ast = ast + self._restructure_for_loops = options.getboolean("readability-based-refinement.restructure_for_loops", fallback=True) self._keep_empty_for_loops = options.getboolean("readability-based-refinement.keep_empty_for_loops", fallback=False) self._hide_non_init_decl = options.getboolean("readability-based-refinement.hide_non_initializing_declaration", fallback=False) self._force_for_loops = options.getboolean("readability-based-refinement.force_for_loops", fallback=False) - self._forbidden_condition_types = options.getlist("readability-based-refinement.forbidden_condition_types_in_simple_for_loops", fallback=[]) - self._condition_max_complexity = options.getint("readability-based-refinement.max_condition_complexity_for_loop_recovery", fallback=100) - self._modification_max_complexity = options.getint("readability-based-refinement.max_modification_complexity_for_loop_recovery", fallback=100) + self._forbidden_condition_types = options.getlist( + "readability-based-refinement.forbidden_condition_types_in_simple_for_loops", fallback=[] + ) + self._condition_max_complexity = options.getint( + "readability-based-refinement.max_condition_complexity_for_loop_recovery", fallback=100 + ) + self._modification_max_complexity = options.getint( + "readability-based-refinement.max_modification_complexity_for_loop_recovery", fallback=100 + ) def run(self): """For each WhileLoop in AST check the following conditions: -> any variable in loop condition has a valid continuation instruction in loop body -> variable is initialized - -> loop condition complexity < condition complexity + -> loop condition complexity < condition complexity -> possible modification complexity < modification complexity -> if condition is only a symbol: check condition type for allowed one - - If 'force_for_loops' is enabled, the complexity options are ignored and every while loop after the - initial transformation will be forced into a for loop with an empty declaration/modification - """ + If 'force_for_loops' is enabled, the complexity options are ignored and every while loop after the + initial transformation will be forced into a for loop with an empty declaration/modification + """ + if not self._restructure_for_loops: + return for loop_node in list(self._ast.get_while_loop_nodes_topological_order()): - if loop_node.is_endless_loop or (not self._keep_empty_for_loops and _is_single_instruction_loop_node(loop_node)) \ - or self._invalid_simple_for_loop_condition_type(loop_node.condition): + if ( + loop_node.is_endless_loop + or (not self._keep_empty_for_loops and _is_single_instruction_loop_node(loop_node)) + or self._invalid_simple_for_loop_condition_type(loop_node.condition) + ): continue if any(node.does_end_with_continue for node in loop_node.body.get_descendant_code_nodes_interrupting_ancestor_loop()): @@ -100,11 +111,11 @@ def run(self): self._ast.substitute_loop_node( loop_node, ForLoopNode( - declaration=None, - condition=loop_node.condition, - modification=None, - reaching_condition=loop_node.reaching_condition, - ) + declaration=None, + condition=loop_node.condition, + modification=None, + reaching_condition=loop_node.reaching_condition, + ), ) def _replace_with_for_loop(self, loop_node: WhileLoopNode, continuation: AstInstruction, init: AstInstruction): @@ -139,9 +150,9 @@ def _replace_with_for_loop(self, loop_node: WhileLoopNode, continuation: AstInst ) continuation.node.instructions.remove(continuation.instruction) self._ast.clean_up() - + def _invalid_simple_for_loop_condition_type(self, logic_condition) -> bool: - """ Checks if a logic condition is only a symbol, if true checks condition type of symbol for forbidden ones""" + """Checks if a logic condition is only a symbol, if true checks condition type of symbol for forbidden ones""" if not logic_condition.is_symbol or not self._forbidden_condition_types: return False diff --git a/decompiler/util/default.json b/decompiler/util/default.json index c4bca6cff..9170478a0 100644 --- a/decompiler/util/default.json +++ b/decompiler/util/default.json @@ -275,6 +275,16 @@ "is_hidden_from_cli": false, "argument_name": "--return-complexity-threshold" }, + { + "dest": "readability-based-refinement.restructure_for_loops", + "default": true, + "type": "boolean", + "title": "Restructure for-loops at all", + "description": "Transform while-loops to for-loops. If set to false, no for-loop is restructured.", + "is_hidden_from_gui": false, + "is_hidden_from_cli": false, + "argument_name": "--restructure-for-loops" + }, { "dest": "readability-based-refinement.keep_empty_for_loops", "default": false, diff --git a/tests/pipeline/controlflowanalysis/test_readability_based_refinement.py b/tests/pipeline/controlflowanalysis/test_readability_based_refinement.py index 15a602a1b..7cd0c82f4 100644 --- a/tests/pipeline/controlflowanalysis/test_readability_based_refinement.py +++ b/tests/pipeline/controlflowanalysis/test_readability_based_refinement.py @@ -32,9 +32,17 @@ def logic_cond(name: str, context) -> LogicCondition: return LogicCondition.initialize_symbol(name, context) -def _generate_options(empty_loops: bool = False, hide_decl: bool = False, max_condition: int = 100, max_modification: int = 100, \ - force_for_loops: bool = False, blacklist : List[str] = []) -> Options: +def _generate_options( + restructure: bool = True, + empty_loops: bool = False, + hide_decl: bool = False, + max_condition: int = 100, + max_modification: int = 100, + force_for_loops: bool = False, + blacklist: List[str] = [], +) -> Options: options = Options() + options.set("readability-based-refinement.restructure_for_loops", restructure) options.set("readability-based-refinement.keep_empty_for_loops", empty_loops) options.set("readability-based-refinement.hide_non_initializing_declaration", hide_decl) options.set("readability-based-refinement.max_condition_complexity_for_loop_recovery", max_condition) @@ -75,14 +83,19 @@ def ast_innerWhile_simple_condition_complexity() -> AbstractSyntaxTree: outer_while = ast.factory.create_while_loop_node(logic_cond("x1", context)) outer_while_body = ast.factory.create_seq_node() - outer_while_init = ast._add_code_node([Assignment(Variable("b"), Constant(0)), Assignment(Variable("c"), Constant(0)) - , Assignment(Variable("d"), Constant(0))]) + outer_while_init = ast._add_code_node( + [Assignment(Variable("b"), Constant(0)), Assignment(Variable("c"), Constant(0)), Assignment(Variable("d"), Constant(0))] + ) outer_while_exit = ast._add_code_node([Assignment(Variable("a"), BinaryOperation(OperationType.plus, [Variable("a"), Constant(1)]))]) inner_while = ast.factory.create_while_loop_node(logic_cond("x2", context) & logic_cond("x3", context) & logic_cond("x4", context)) - inner_while_body = ast._add_code_node([Assignment(Variable("b"), BinaryOperation(OperationType.plus, [Variable("b"), Constant(1)])), - Assignment(Variable("c"), BinaryOperation(OperationType.plus, [Variable("c"), Constant(1)])), - Assignment(Variable("d"), BinaryOperation(OperationType.plus, [Variable("d"), Constant(1)]))]) + inner_while_body = ast._add_code_node( + [ + Assignment(Variable("b"), BinaryOperation(OperationType.plus, [Variable("b"), Constant(1)])), + Assignment(Variable("c"), BinaryOperation(OperationType.plus, [Variable("c"), Constant(1)])), + Assignment(Variable("d"), BinaryOperation(OperationType.plus, [Variable("d"), Constant(1)])), + ] + ) ast._add_nodes_from((outer_while, outer_while_body, inner_while)) ast._add_edges_from( @@ -99,7 +112,7 @@ def ast_innerWhile_simple_condition_complexity() -> AbstractSyntaxTree: return ast -def generate_ast_with_modification_complexity(complexity : int) -> AbstractSyntaxTree: +def generate_ast_with_modification_complexity(complexity: int) -> AbstractSyntaxTree: """ a = 0; while (a < 10) { @@ -121,7 +134,7 @@ def generate_ast_with_modification_complexity(complexity : int) -> AbstractSynta return ast -def generate_ast_with_condition_type(op : OperationType) -> AbstractSyntaxTree: +def generate_ast_with_condition_type(op: OperationType) -> AbstractSyntaxTree: """ a = 0; while (a 10) { @@ -161,9 +174,17 @@ def ast_guarded_do_while_if() -> AbstractSyntaxTree: ast._add_node(cond_node) ast._add_node(true_branch) ast._add_node(do_while_loop) - ast._add_edges_from([(root, init_code_node), (root, cond_node), (cond_node, true_branch), (true_branch, do_while_loop), (do_while_loop, do_while_loop_body)]) + ast._add_edges_from( + [ + (root, init_code_node), + (root, cond_node), + (cond_node, true_branch), + (true_branch, do_while_loop), + (do_while_loop, do_while_loop_body), + ] + ) return ast - + @pytest.fixture def ast_guarded_do_while_else() -> AbstractSyntaxTree: @@ -188,7 +209,15 @@ def ast_guarded_do_while_else() -> AbstractSyntaxTree: ast._add_node(cond_node) ast._add_node(false_branch) ast._add_node(do_while_loop) - ast._add_edges_from([(root, init_code_node), (root, cond_node), (cond_node, false_branch), (false_branch, do_while_loop), (do_while_loop, do_while_loop_body)]) + ast._add_edges_from( + [ + (root, init_code_node), + (root, cond_node), + (cond_node, false_branch), + (false_branch, do_while_loop), + (do_while_loop, do_while_loop_body), + ] + ) return ast @@ -248,7 +277,8 @@ def ast_while_in_else() -> AbstractSyntaxTree: class TestForLoopRecovery: - """ Test options for for-loop recovery """ + """Test options for for-loop recovery""" + @staticmethod def run_rbr(ast: AbstractSyntaxTree, options: Options = _generate_options()): ReadabilityBasedRefinement().run(DecompilerTask("func", cfg=None, ast=ast, options=options)) @@ -262,7 +292,6 @@ def test_max_condition_complexity(self, ast_innerWhile_simple_condition_complexi else: assert isinstance(loop_node, WhileLoopNode) - @pytest.mark.parametrize("modification_nesting", [1, 2]) def test_max_modification_complexity(self, modification_nesting): ast = generate_ast_with_modification_complexity(modification_nesting) @@ -276,11 +305,10 @@ def test_max_modification_complexity(self, modification_nesting): assert isinstance(loop_node, WhileLoopNode) for condition_variable in loop_node.get_required_variables(ast.condition_map): instruction = _find_continuation_instruction(ast, loop_node, condition_variable) - assert instruction is not None + assert instruction is not None assert instruction.instruction.complexity > max_modi_complexity - - @pytest.mark.parametrize("operation", [OperationType.equal, OperationType.not_equal ,OperationType.less_or_equal, OperationType.less]) + @pytest.mark.parametrize("operation", [OperationType.equal, OperationType.not_equal, OperationType.less_or_equal, OperationType.less]) def test_for_loop_recovery_blacklist(self, operation): ast = generate_ast_with_condition_type(operation) forbidden_conditon_types = ["not_equal", "equal"] @@ -292,6 +320,15 @@ def test_for_loop_recovery_blacklist(self, operation): else: assert isinstance(loop_node, ForLoopNode) + @pytest.mark.parametrize("restructure", [True, False]) + def test_restructure_for_loop_option(self, restructure, ast_while_in_else): + self.run_rbr(ast_while_in_else, _generate_options(restructure=restructure)) + for_loop = list(ast_while_in_else.get_for_loop_nodes_topological_order()) + if restructure: + assert len(for_loop) == 1 + else: + assert len(for_loop) == 0 + class TestGuardedDoWhile: @staticmethod @@ -811,7 +848,6 @@ def test_separated_by_loop_node_4(self, ast_while_in_else): assert _initialization_reaches_loop_node(init_code_node, inner_while) is False - def test_skip_for_loop_recovery_if_continue_in_while(self): """ a = 0 @@ -828,15 +864,12 @@ def test_skip_for_loop_recovery_if_continue_in_while(self): root := SeqNode(true_value), condition_map={ logic_cond("x1", context): Condition(OperationType.less, [Variable("a"), Constant(10)]), - logic_cond("x2", context): Condition(OperationType.equal, [Variable("a"), Constant(2)]) - } + logic_cond("x2", context): Condition(OperationType.equal, [Variable("a"), Constant(2)]), + }, ) true_branch = ast._add_code_node( - [ - Assignment(Variable("a"), BinaryOperation(OperationType.plus, [Variable("a"), Constant(2)])), - Continue() - ] + [Assignment(Variable("a"), BinaryOperation(OperationType.plus, [Variable("a"), Constant(2)])), Continue()] ) if_condition = ast._add_condition_node_with(logic_cond("x2", context), true_branch) @@ -844,7 +877,9 @@ def test_skip_for_loop_recovery_if_continue_in_while(self): while_loop = ast.factory.create_while_loop_node(logic_cond("x1", context)) while_loop_body = ast.factory.create_seq_node() - while_loop_iteration = ast._add_code_node([Assignment(Variable("a"), BinaryOperation(OperationType.plus, [Variable("a"), Constant(1)]))]) + while_loop_iteration = ast._add_code_node( + [Assignment(Variable("a"), BinaryOperation(OperationType.plus, [Variable("a"), Constant(1)]))] + ) ast._add_node(while_loop) ast._add_node(while_loop_body) @@ -854,7 +889,7 @@ def test_skip_for_loop_recovery_if_continue_in_while(self): (root, while_loop), (while_loop, while_loop_body), (while_loop_body, if_condition), - (while_loop_body, while_loop_iteration) + (while_loop_body, while_loop_iteration), ] ) @@ -881,28 +916,31 @@ def test_skip_for_loop_recovery_if_continue_in_nested_while(self): condition_map={ logic_cond("x1", context): Condition(OperationType.less, [Variable("a"), Constant(5)]), logic_cond("x2", context): Condition(OperationType.less, [Variable("b"), Constant(10)]), - logic_cond("x3", context): Condition(OperationType.less, [Variable("b"), Constant(0)]) - } + logic_cond("x3", context): Condition(OperationType.less, [Variable("b"), Constant(0)]), + }, ) true_branch = ast._add_code_node( - [ - Assignment(Variable("b"), BinaryOperation(OperationType.plus, [Variable("b"), Constant(2)])), - Continue() - ] + [Assignment(Variable("b"), BinaryOperation(OperationType.plus, [Variable("b"), Constant(2)])), Continue()] ) if_condition = ast._add_condition_node_with(logic_cond("x3", context), true_branch) while_loop_outer = ast.factory.create_while_loop_node(logic_cond("x1", context)) while_loop_body_outer = ast.factory.create_seq_node() - while_loop_iteration_outer_1 = ast._add_code_node([Assignment(Variable("a"), BinaryOperation(OperationType.plus, [Variable("a"), Variable("b")]))]) - while_loop_iteration_outer_2 = ast._add_code_node([Assignment(Variable("a"), BinaryOperation(OperationType.plus, [Variable("a"), Constant(1)]))]) + while_loop_iteration_outer_1 = ast._add_code_node( + [Assignment(Variable("a"), BinaryOperation(OperationType.plus, [Variable("a"), Variable("b")]))] + ) + while_loop_iteration_outer_2 = ast._add_code_node( + [Assignment(Variable("a"), BinaryOperation(OperationType.plus, [Variable("a"), Constant(1)]))] + ) ast._add_node(while_loop_outer) ast._add_node(while_loop_body_outer) while_loop_inner = ast.factory.create_while_loop_node(logic_cond("x2", context)) while_loop_body_inner = ast.factory.create_seq_node() - while_loop_iteration_inner = ast._add_code_node([Assignment(Variable("b"), BinaryOperation(OperationType.plus, [Variable("b"), Constant(1)]))]) + while_loop_iteration_inner = ast._add_code_node( + [Assignment(Variable("b"), BinaryOperation(OperationType.plus, [Variable("b"), Constant(1)]))] + ) ast._add_node(while_loop_inner) ast._add_node(while_loop_body_inner) @@ -915,10 +953,10 @@ def test_skip_for_loop_recovery_if_continue_in_nested_while(self): (while_loop_body_outer, while_loop_iteration_outer_2), (while_loop_inner, while_loop_body_inner), (while_loop_body_inner, if_condition), - (while_loop_body_inner, while_loop_iteration_inner) + (while_loop_body_inner, while_loop_iteration_inner), ] ) WhileLoopReplacer(ast, _generate_options()).run() loop_nodes = list(ast.get_loop_nodes_post_order()) - assert not isinstance(loop_nodes[0], ForLoopNode) and isinstance(loop_nodes[1], ForLoopNode) \ No newline at end of file + assert not isinstance(loop_nodes[0], ForLoopNode) and isinstance(loop_nodes[1], ForLoopNode)