Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement conditional out of SSA #406

Merged
merged 26 commits into from
Jun 20, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
f850af0
Implement conditional out of ssa
rihi Apr 18, 2024
dd3a543
black
rihi Apr 18, 2024
14c0397
Change dot stuff
rihi Apr 24, 2024
31b8dfe
Add some test
rihi May 2, 2024
5aaa2fa
black
rihi May 9, 2024
f02b7a7
Adjust _expression_dependencies()
rihi May 22, 2024
91e5db2
Fix docstring of ConditionalVariableRenamer
rihi May 22, 2024
a46bae4
Add a type hint
rihi May 22, 2024
aeeaa9c
Remove some debug stuff
rihi May 22, 2024
1641a5f
Use _variables_can_have_same_name
rihi May 22, 2024
780bf8b
Simply syntax and remove an unused statement
rihi May 22, 2024
055404a
Restructure ConditionalVariableRenamer
rihi May 22, 2024
bb16b0b
Move dependency graph decoration function
rihi May 22, 2024
39f36bf
Add docstring to _decorate_dependency_graph
rihi May 29, 2024
5a04d23
Add docstring to _collect_variables
rihi May 29, 2024
2e22e65
Add docstring to _expression_dependencies
rihi May 29, 2024
1b03a61
Add docstring to _generate_renaming_map
rihi May 29, 2024
7d9ec34
Fix debug decorating of dependency graph
rihi Jun 5, 2024
f5050c0
Merge parallel edges in conditional renaming
rihi Jun 5, 2024
8e7a65c
Extract operation penalty in dependency_graph and change to 0.9
rihi Jun 5, 2024
37eb715
Update replace variable code in variable_renaming.py
rihi Jun 6, 2024
c77738d
Fix constant naming in dependency_graph.py
rihi Jun 7, 2024
5d34076
Fix renaming change
rihi Jun 7, 2024
b9785e1
Add test for parallel edges
rihi Jun 19, 2024
658ad35
Run black to obfuscate code
rihi Jun 19, 2024
67e1cb4
Merge branch 'main' into conditional-out-of-ssa
ebehner Jun 20, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 51 additions & 61 deletions decompiler/pipeline/ssa/dependency_graph.py
Original file line number Diff line number Diff line change
@@ -1,74 +1,64 @@
from typing import Iterable, List, Optional, Set
from functools import reduce
from typing import Iterator

from decompiler.structures.graphs.cfg import ControlFlowGraph
from decompiler.structures.interferencegraph import InterferenceGraph
from decompiler.structures.pseudo import Expression, Operation, OperationType
from decompiler.structures.pseudo.expressions import Variable
from decompiler.structures.pseudo.instructions import Assignment
from decompiler.structures.pseudo.operations import Call
from networkx import DiGraph, weakly_connected_components
from networkx import MultiDiGraph


def _non_call_assignments(cfg: ControlFlowGraph) -> Iterable[Assignment]:
"""Yield all interesting assignments for the dependency graph."""
for instr in cfg.instructions:
if isinstance(instr, Assignment) and isinstance(instr.destination, Variable) and not isinstance(instr.value, Call):
yield instr
def dependency_graph_from_cfg(cfg: ControlFlowGraph) -> MultiDiGraph:
"""
Construct the dependency graph of the given CFG, i.e. adds an edge between two variables if they depend on each other.
- Add an edge the definition to at most one requirement for each instruction.
- All variables that where not defined via Phi-functions before have out-degree of at most 1, because they are defined at most once.
- Variables that are defined via Phi-functions can have one successor for each required variable of the Phi-function.
"""
dependency_graph = MultiDiGraph()

for variable in _collect_variables(cfg):
dependency_graph.add_node((variable,))
for instruction in _assignments_in_cfg(cfg):
ebehner marked this conversation as resolved.
Show resolved Hide resolved
defined_variables = instruction.definitions
for used_variable, score in _expression_dependencies(instruction.value).items():
if score > 0:
dependency_graph.add_edges_from((((dvar,), (used_variable,)) for dvar in defined_variables), score=score)

return dependency_graph

class DependencyGraph(DiGraph):
def __init__(self, interference_graph: Optional[InterferenceGraph] = None):
super().__init__()
self.add_nodes_from(interference_graph.nodes)
self.interference_graph = interference_graph

@classmethod
def from_cfg(cls, cfg: ControlFlowGraph, interference_graph: InterferenceGraph):
"""
Construct the dependency graph of the given CFG, i.e. adds an edge between two variables if they depend on each other.
- Add an edge the definition to at most one requirement for each instruction.
- All variables that where not defined via Phi-functions before have out-degree at most 1, because they are defined at most once
- Variables that are defined via Phi-functions can have one successor for each required variable of the Phi-function.
"""
dependency_graph = cls(interference_graph)
for instruction in _non_call_assignments(cfg):
defined_variable = instruction.destination
if isinstance(instruction.value, Variable):
if dependency_graph._variables_can_have_same_name(defined_variable, instruction.value):
dependency_graph.add_edge(defined_variable, instruction.requirements[0], strength="high")
elif len(instruction.requirements) == 1:
if dependency_graph._variables_can_have_same_name(defined_variable, instruction.requirements[0]):
dependency_graph.add_edge(defined_variable, instruction.requirements[0], strength="medium")
else:
if non_interfering_variable := dependency_graph._non_interfering_requirements(instruction.requirements, defined_variable):
dependency_graph.add_edge(defined_variable, non_interfering_variable, strength="low")
return dependency_graph
def _collect_variables(cfg: ControlFlowGraph) -> Iterator[Variable]:
for instruction in cfg.instructions:
rihi marked this conversation as resolved.
Show resolved Hide resolved
for subexpression in instruction.subexpressions():
if isinstance(subexpression, Variable):
yield subexpression

def _non_interfering_requirements(self, requirements: List[Variable], defined_variable: Variable) -> Optional[Variable]:
"""Get the unique non-interfering requirement if it exists, otherwise we return None."""
non_interfering_requirement = None
for required_variable in requirements:
if self._variables_can_have_same_name(defined_variable, required_variable):
if non_interfering_requirement:
return None
non_interfering_requirement = required_variable
return non_interfering_requirement

def _variables_can_have_same_name(self, source: Variable, sink: Variable) -> bool:
"""
Two variable can have the same name, if they have the same type, are both aliased or both non-aliased variables, and if they
do not interfere.
def _assignments_in_cfg(cfg: ControlFlowGraph) -> Iterator[Assignment]:
"""Yield all interesting assignments for the dependency graph."""
for instr in cfg.instructions:
if isinstance(instr, Assignment):
yield instr


:param source: The potential source vertex.
:param sink: The potential sink vertex
:return: True, if the given variables can have the same name, and false otherwise.
"""
if self.interference_graph.are_interfering(source, sink) or source.type != sink.type or source.is_aliased != sink.is_aliased:
return False
if source.is_aliased and sink.is_aliased and source.name != sink.name:
return False
return True
def _expression_dependencies(expression: Expression) -> dict[Variable, float]:
match expression:
rihi marked this conversation as resolved.
Show resolved Hide resolved
case Variable():
return {expression: 1.0}
case Operation():
operation_type_penalty = {
OperationType.call: 0,
OperationType.address: 0,
OperationType.dereference: 0,
OperationType.member_access: 0,
}.get(expression.operation, 0.5)

def get_components(self) -> Iterable[Set[Variable]]:
"""Returns the weakly connected components of the dependency graph."""
for component in weakly_connected_components(self):
yield set(component)
operands_dependencies = (_expression_dependencies(operand) for operand in expression.operands)
dependencies: dict[Variable, float] = reduce(dict.__or__, operands_dependencies, {})
for var in dependencies:
dependencies[var] /= len(dependencies)
dependencies[var] *= operation_type_penalty
return dependencies
rihi marked this conversation as resolved.
Show resolved Hide resolved
case _:
return {}
26 changes: 15 additions & 11 deletions decompiler/pipeline/ssa/outofssatranslation.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@
from collections import defaultdict
from configparser import NoOptionError
from enum import Enum
from typing import DefaultDict, List
from typing import Callable, DefaultDict, List

from decompiler.pipeline.ssa.phi_cleaner import PhiFunctionCleaner
from decompiler.pipeline.ssa.phi_dependency_resolver import PhiDependencyResolver
from decompiler.pipeline.ssa.phi_lifting import PhiFunctionLifter
from decompiler.pipeline.ssa.variable_renaming import MinimalVariableRenamer, SimpleVariableRenamer
from decompiler.pipeline.ssa.variable_renaming import ConditionalVariableRenamer, MinimalVariableRenamer, SimpleVariableRenamer
from decompiler.pipeline.stage import PipelineStage
from decompiler.structures.graphs.cfg import BasicBlock
from decompiler.structures.interferencegraph import InterferenceGraph
Expand Down Expand Up @@ -98,12 +98,13 @@ def _out_of_ssa(self) -> None:

-> There are different optimization levels
"""
try:
self.out_of_ssa_strategy[self._optimization](self)
except KeyError:
error_message = f"The Out of SSA according to the optimization level {self._optimization.value} is not implemented so far."
logging.error(error_message)
raise NotImplementedError(error_message)
strategy = self.out_of_ssa_strategy.get(self._optimization, None)
if strategy is None:
raise NotImplementedError(
f"The Out of SSA according to the optimization level {self._optimization.value} is not implemented so far."
)

strategy(self)

def _simple_out_of_ssa(self) -> None:
"""
Expand Down Expand Up @@ -158,12 +159,15 @@ def _conditional_out_of_ssa(self) -> None:
This is a more advanced algorithm for out of SSA:
- We first remove the circular dependency of the Phi-functions
- Then, we remove the Phi-functions by lifting them to their predecessor basic blocks.
- Afterwards, we rename the variables, by considering their dependency on each other.
- Afterwards, we rename the variables by considering their dependency on each other.
"""
pass
PhiDependencyResolver(self._phi_functions_of).resolve()
self.interference_graph = InterferenceGraph(self.task.graph)
PhiFunctionLifter(self.task.graph, self.interference_graph, self._phi_functions_of).lift()
ConditionalVariableRenamer(self.task, self.interference_graph).rename()

# This translator maps the optimization levels to the functions.
out_of_ssa_strategy = {
out_of_ssa_strategy: dict[SSAOptions, Callable[["OutOfSsaTranslation"], None]] = {
SSAOptions.simple: _simple_out_of_ssa,
SSAOptions.minimization: _minimization_out_of_ssa,
SSAOptions.lift_minimal: _lift_minimal_out_of_ssa,
Expand Down
96 changes: 95 additions & 1 deletion decompiler/pipeline/ssa/variable_renaming.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,24 @@
"""Module for renaming variables in Out of SSA."""

import itertools
import logging
from collections import defaultdict
from dataclasses import dataclass, field
from itertools import combinations
from operator import attrgetter
from typing import DefaultDict, Dict, Iterable, Iterator, List, Optional, Set, Tuple, Union

import networkx
from decompiler.pipeline.ssa.dependency_graph import dependency_graph_from_cfg
from decompiler.structures.interferencegraph import InterferenceGraph
from decompiler.structures.pseudo.expressions import GlobalVariable, Variable
from decompiler.structures.pseudo.instructions import BaseAssignment, Instruction, Relation
from decompiler.structures.pseudo.typing import Type
from decompiler.task import DecompilerTask
from decompiler.util.decoration import DecoratedGraph
from decompiler.util.insertion_ordered_set import InsertionOrderedSet
from decompiler.util.lexicographical_bfs import LexicographicalBFS
from networkx import Graph, connected_components
from networkx import Graph, MultiDiGraph, connected_components


@dataclass
Expand Down Expand Up @@ -334,3 +338,93 @@ def _classes_of(self, neighborhood: Iterable[Variable]) -> Iterable[Variable]:
for neighbor in neighborhood:
if neighbor in self._variable_classes_handler.color_class_of:
yield self._variable_classes_handler.color_class_of[neighbor]


class ConditionalVariableRenamer(VariableRenamer):
"""
A minimal renaming strategy, that renames the SSA-variables such that the total number of non SSA-variables is (almost) minimal.
Therefore, we construct color-classes by using lexicographical BFS on the interference graph. When the interference graph is chordal
this leads to a minimum number of possible variables.
rihi marked this conversation as resolved.
Show resolved Hide resolved
"""

def __init__(self, task, interference_graph: InterferenceGraph):
"""
rihi marked this conversation as resolved.
Show resolved Hide resolved
self._color_classes is a dictionary where the set of keys is the set of colors
and to each color we assign the set of variables of this color.
"""
super().__init__(task, interference_graph.copy())

dependency_graph = dependency_graph_from_cfg(task.graph)

mapping = {}
rihi marked this conversation as resolved.
Show resolved Hide resolved
for variable in self.interference_graph.nodes():
contracted = tuple(self._variables_contracted_to[variable])
for var in contracted:
mapping[(var,)] = contracted

# Merge nodes which need to be contracted from self._variables_contracted_to
dependency_graph = networkx.relabel_nodes(dependency_graph, mapping)

# counter = 0
# self._decorate_graph(dependency_graph, f"dep{counter}.svg")
rihi marked this conversation as resolved.
Show resolved Hide resolved

dependency_graph.edge = dependency_graph.edges(data=True)
while True:
for u, v, _ in sorted(dependency_graph.edges(data=True), key=lambda edge: edge[2]["score"], reverse=True):
ebehner marked this conversation as resolved.
Show resolved Hide resolved
if u == v: # self loop
continue

variables = u + v
if interference_graph.are_interfering(*variables):
continue
if u[0].type != v[0].type:
continue
if u[0].is_aliased != v[0].is_aliased:
continue
rihi marked this conversation as resolved.
Show resolved Hide resolved

break
else:
# We didn't find any remaining nodes to contract, break outer loop
break

networkx.relabel_nodes(dependency_graph, {u: (*u, *v), v: (*u, *v)}, copy=False)
# counter += 1
# self._decorate_graph(dependency_graph, f"dep{counter}.svg")

# counter += 1
# self._decorate_graph(dependency_graph, f"dep{counter}.svg")
rihi marked this conversation as resolved.
Show resolved Hide resolved

self._variable_classes_handler = VariableClassesHandler(defaultdict(set))
for i, vars in enumerate(dependency_graph.nodes):
for var in vars:
self._variable_classes_handler.add_variable_to_class(var, i)

self.compute_new_name_for_each_variable()

def _decorate_graph(self, dependency_graph: MultiDiGraph, path: str):
decorated_graph = MultiDiGraph()
for node in dependency_graph.nodes:
decorated_graph.add_node(hash(node), label="\n".join(map(lambda n: f"{n}: {n.type}, aliased: {n.is_aliased}", node)))
for u, v, data in dependency_graph.edges.data():
decorated_graph.add_edge(u, v, label=f"{data['score']}")
for nodes in networkx.weakly_connected_components(dependency_graph):
for node_1, node_2 in combinations(nodes, 2):
if any(self.interference_graph.has_edge(pair[0], pair[1]) for pair in itertools.product(node_1, node_2)):
decorated_graph.add_edge(hash(node_1), hash(node_2), color="red", dir="none")

DecoratedGraph(decorated_graph).export_plot(path, type="svg")
ebehner marked this conversation as resolved.
Show resolved Hide resolved

def _variables_can_have_same_name(self, source: Variable, sink: Variable) -> bool:
"""
Two variable can have the same name, if they have the same type, are both aliased or both non-aliased variables, and if they
do not interfere.

:param source: The potential source vertex.
:param sink: The potential sink vertex
:return: True, if the given variables can have the same name, and false otherwise.
"""
if self.interference_graph.are_interfering(source, sink) or source.type != sink.type or source.is_aliased != sink.is_aliased:
return False
if source.is_aliased and sink.is_aliased and source.name != sink.name:
return False
return True
4 changes: 2 additions & 2 deletions decompiler/util/to_dot_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@

from networkx import DiGraph

HEADER = "strict digraph {"
HEADER = "digraph {"
FOOTER = "}"


class ToDotConverter:
"""Class in charge of writing a networkx DiGraph into dot-format"""

ATTRIBUTES = {"color", "fillcolor", "label", "shape", "style"}
ATTRIBUTES = {"color", "fillcolor", "label", "shape", "style", "dir"}

def __init__(self, graph: DiGraph):
self._graph = graph
Expand Down
Loading
Loading