Skip to content

Commit

Permalink
[TypeError@expressions.py:189] TypeError: unhashable type: 'dict' (#427)
Browse files Browse the repository at this point in the history
* Create draft PR for #426

* Merge ConstantComposition with Constant to fix stuff

* Black

* Merge StructConstant with Constant

---------

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: rihi <19492038+rihi@users.noreply.github.com>
Co-authored-by: ebehner <eva-maria.behner@fkie.fraunhofer.de>
Co-authored-by: Steffen Enders <steffen.enders@fkie.fraunhofer.de>
  • Loading branch information
4 people authored Jan 6, 2025
1 parent 87a29c9 commit b42aa68
Show file tree
Hide file tree
Showing 8 changed files with 35 additions and 112 deletions.
28 changes: 13 additions & 15 deletions decompiler/backend/cexpressiongenerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,24 +241,22 @@ def visit_constant(self, expr: expressions.Constant) -> str:
case bytes():
val = "".join("\\x{:02x}".format(x) for x in expr.value)
return f'"{val}"' if len(val) <= MAX_GLOBAL_INIT_LENGTH else f'"{val[:MAX_GLOBAL_INIT_LENGTH]}..."'
if isinstance(expr.type, ArrayType):
match expr.type.type:
case CustomType(text="wchar16") | CustomType(text="wchar32"):
val = "".join(expr.value).translate(self.ESCAPE_TABLE)
return f'L"{val}"' if len(val) <= MAX_GLOBAL_INIT_LENGTH else f'L"{val[:MAX_GLOBAL_INIT_LENGTH]}..."'
case Integer(size=8, signed=False):
val = "".join([f"\\x{x:02X}" for x in expr.value][:MAX_GLOBAL_INIT_LENGTH])
return f'"{val}"' if len(val) <= MAX_GLOBAL_INIT_LENGTH else f'"{val[:MAX_GLOBAL_INIT_LENGTH]}..."'
case Integer(8):
val = "".join(expr.value[:MAX_GLOBAL_INIT_LENGTH]).translate(self.ESCAPE_TABLE)
return f'"{val}"' if len(val) <= MAX_GLOBAL_INIT_LENGTH else f'"{val[:MAX_GLOBAL_INIT_LENGTH]}..."'
case _:
return f'{", ".join([self.visit_constant(expressions.Constant(x, expr.type.type)) for x in expr.value]).translate(self.ESCAPE_TABLE)}' # Todo: Should we print every member? Could get pretty big

return self._format_string_literal(expr)

def visit_constant_composition(self, expr: expressions.ConstantComposition):
"""Visit a Constant Array."""
match expr.type.type:
case CustomType(text="wchar16") | CustomType(text="wchar32"):
val = "".join([x.value for x in expr.value]).translate(self.ESCAPE_TABLE)
return f'L"{val}"' if len(val) <= MAX_GLOBAL_INIT_LENGTH else f'L"{val[:MAX_GLOBAL_INIT_LENGTH]}..."'
case Integer(size=8, signed=False):
val = "".join([f"\\x{x.value:02X}" for x in expr.value][:MAX_GLOBAL_INIT_LENGTH])
return f'"{val}"' if len(val) <= MAX_GLOBAL_INIT_LENGTH else f'"{val[:MAX_GLOBAL_INIT_LENGTH]}..."'
case Integer(8):
val = "".join([x.value for x in expr.value][:MAX_GLOBAL_INIT_LENGTH]).translate(self.ESCAPE_TABLE)
return f'"{val}"' if len(val) <= MAX_GLOBAL_INIT_LENGTH else f'"{val[:MAX_GLOBAL_INIT_LENGTH]}..."'
case _:
return f'{", ".join([self.visit(x) for x in expr.value]).translate(self.ESCAPE_TABLE)}' # Todo: Should we print every member? Could get pretty big

def visit_variable(self, expr: expressions.Variable) -> str:
"""Return a string representation of the variable."""
return f"{expr.name}" if (label := expr.ssa_label) is None else f"{expr.name}_{label}"
Expand Down
4 changes: 2 additions & 2 deletions decompiler/backend/variabledeclarations.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from decompiler.structures.ast.syntaxtree import AbstractSyntaxTree
from decompiler.structures.pseudo import GlobalVariable, Integer, Variable
from decompiler.structures.pseudo.complextypes import Struct
from decompiler.structures.pseudo.expressions import StructConstant
from decompiler.structures.pseudo.expressions import Constant
from decompiler.structures.pseudo.typing import ArrayType, CustomType, Pointer
from decompiler.structures.visitors.ast_dataflowobjectvisitor import BaseAstDataflowObjectVisitor
from decompiler.task import DecompilerTask
Expand Down Expand Up @@ -105,6 +105,6 @@ def visit_global_variable(self, expr: GlobalVariable):
self._global_vars.add(expr.copy(ssa_label=0, ssa_name=None))
if not expr.is_constant or expr.type == Pointer(CustomType.void()):
self._global_vars.add(expr.copy(ssa_label=0, ssa_name=None))
if isinstance(expr.initial_value, StructConstant):
if isinstance(expr.initial_value, Constant) and isinstance(expr.initial_value.value, dict):
for member_value in expr.initial_value.value.values():
self.visit(member_value)
18 changes: 8 additions & 10 deletions decompiler/frontend/binaryninja/handlers/globals.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from typing import Callable, Optional, Tuple, Union

from binaryninja import BinaryView, DataVariable, Endianness, MediumLevelILInstruction, SectionSemantics, Type
from binaryninja import BinaryView, DataVariable, Endianness, MediumLevelILInstruction, SectionSemantics
from binaryninja.enums import NamedTypeReferenceClass
from binaryninja.types import (
ArrayType,
Expand All @@ -23,15 +23,13 @@
from decompiler.structures.pseudo import ArrayType as PseudoArrayType
from decompiler.structures.pseudo import (
Constant,
ConstantComposition,
CustomType,
Expression,
GlobalVariable,
ImportedFunctionSymbol,
Integer,
OperationType,
Pointer,
StructConstant,
Symbol,
UnaryOperation,
)
Expand Down Expand Up @@ -186,15 +184,15 @@ def _lift_array_type(self, variable: DataVariable, parent: Optional[MediumLevelI
type = self._lifter.lift(variable.type)
match variable.value:
case bytes(): # BNinja corner case: C-Strings (8Bit) are represented as python Bytes
value = [Constant(x, type.type) for x in str(variable.value.rstrip(b"\x00"))[2:-1]]
value = [x for x in str(variable.value.rstrip(b"\x00"))[2:-1]]
case _:
value = [Constant(x, type.type) for x in variable.value]
value = list(variable.value)

return self._build_global_variable(
name=variable.name,
type=type,
addr=variable.address,
init_value=ConstantComposition(value, type),
init_value=Constant(value, type),
ssa_label=parent.ssa_memory_version if parent else 0,
)

Expand Down Expand Up @@ -295,7 +293,7 @@ def _lift_struct_helper(self, variable, parent, struct_type):
lift = self._lifter.lift(dv, view=self._view)
values[member_type.offset] = lift.initial_value
return self._build_global_variable(
variable.name, s_type, variable.address, StructConstant(values, s_type), parent.ssa_memory_version if parent else 0
variable.name, s_type, variable.address, Constant(values, s_type), parent.ssa_memory_version if parent else 0
)

def _lift_enum_type(self, variable: DataVariable, parent: Optional[MediumLevelILInstruction] = None, **_):
Expand All @@ -316,11 +314,11 @@ def _get_unknown_value(self, variable: DataVariable):
"""Return string or bytes at dv.address(!) (dv.type must be void)"""
if (data := get_different_string_types_at(variable.address, self._view)) and data[0] is not None:
type = PseudoArrayType(self._lifter.lift(data[1]), len(data[0]))
data = ConstantComposition([Constant(x, type.type) for x in data[0]], type)
data = Constant(list(data[0]), type)
else:
rbytes = get_raw_bytes(variable.address, self._view)
type = PseudoArrayType(Integer.uint8_t(), len(rbytes))
data = ConstantComposition([Constant(b, type.type) for b in rbytes], type)
data = Constant(list(rbytes), type)
return data, type

def _get_unknown_pointer_value(self, variable: DataVariable, callers: list[int] = None):
Expand Down Expand Up @@ -349,7 +347,7 @@ def _get_unknown_pointer_value(self, variable: DataVariable, callers: list[int]
0
] is not None: # Implicit pointer removal if called from a pointer value, does NOT need to be a UnaryOperation
vtype = PseudoArrayType(self._lifter.lift(data[1]), len(data[0]))
vdata = ConstantComposition([Constant(x, vtype.type) for x in data[0]], vtype)
vdata = Constant(list(data[0]), vtype)
data = self._build_global_variable(None, vtype, variable.value, vdata, None)
type = Pointer(vtype, self._view.address_size * BYTE_SIZE)
return (
Expand Down
2 changes: 0 additions & 2 deletions decompiler/structures/pseudo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
from .delogic_logic import DelogicConverter
from .expressions import (
Constant,
ConstantComposition,
DataflowObject,
Expression,
FunctionSymbol,
Expand All @@ -11,7 +10,6 @@
IntrinsicSymbol,
NotUseableConstant,
RegisterPair,
StructConstant,
Symbol,
Tag,
UnknownExpression,
Expand Down
75 changes: 11 additions & 64 deletions decompiler/structures/pseudo/expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,13 @@

from __future__ import annotations

import json
from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import TYPE_CHECKING, Generic, Iterator, List, Optional, Tuple, TypeVar, Union, final

from ...util.insertion_ordered_set import InsertionOrderedSet
from .complextypes import Enum, Struct
from .complextypes import Enum
from .typing import CustomType, Type, UnknownType

T = TypeVar("T")
Expand Down Expand Up @@ -164,9 +165,11 @@ def accept(self, visitor: DataflowObjectVisitorInterface[T]) -> T:
class Constant(Expression[DecompiledType]):
"""Represents a constant expression type."""

ValueType = int | float | str | bytes | Expression | list["ValueType"] | dict[int, "ValueType"]

def __init__(
self,
value: Union[int, float, str, bytes],
value: ValueType,
vartype: DecompiledType = UnknownType(),
pointee: Optional[Constant] = None,
tags: Optional[Tuple[Tag, ...]] = None,
Expand All @@ -186,7 +189,12 @@ def __eq__(self, __value):
)

def __hash__(self):
return hash((tuple(self.value) if isinstance(self.value, list) else self.value, self._type, self._pointee))
match self.value:
case dict() | list():
value_hash_obj = json.dumps(self.value, sort_keys=True)
case _:
value_hash_obj = self.value
return hash((value_hash_obj, self._type, self._pointee))

def __repr__(self) -> str:
value = str(self) if isinstance(self.value, str) else self.value
Expand Down Expand Up @@ -561,64 +569,3 @@ def copy(self) -> RegisterPair:
def accept(self, visitor: DataflowObjectVisitorInterface[T]) -> T:
"""Invoke the appropriate visitor for this Expression."""
return visitor.visit_register_pair(self)


class ConstantComposition(Constant):
"""This class stores multiple constants of the same type in a list.
It is used to represent arrays and string constants"""

def __init__(self, value: list[Constant], vartype: DecompiledType = UnknownType(), tags: Optional[Tuple[Tag, ...]] = None):
super().__init__(
value,
vartype=vartype,
tags=tags,
)

def __eq__(self, __value):
return isinstance(__value, ConstantComposition) and super().__eq__(__value)

def __hash__(self):
return super().__hash__()

def __str__(self) -> str:
"""Return a string representation of the ConstantComposition"""
return "{" + ",".join([str(x) for x in self.value]) + "}"

def copy(self) -> ConstantComposition:
"""Generate a copy of the UnknownExpression with the same message."""
return ConstantComposition(self.value, self._type)

def accept(self, visitor: DataflowObjectVisitorInterface[T]) -> T:
"""Invoke the appropriate visitor for this Expression."""
return visitor.visit_constant_composition(self)


class StructConstant(Constant):
"""This class represents constant structs.
The value is a dictionary mapping offsets to the corresponding fields' value.
The vartype is a 'Struct' (a special ComplexType), which provides a mapping from offsets to field names."""

def __init__(self, value: dict[int, Expression], vartype: Struct, tags: Optional[Tuple[Tag, ...]] = None):
super().__init__(
value,
vartype=vartype,
tags=tags,
)

def __eq__(self, __value):
return isinstance(__value, StructConstant) and super().__eq__(__value)

def __hash__(self):
return hash(tuple(sorted(self.value.items())))

def __str__(self) -> str:
"""Return a string representation of the struct"""

return str(self.value)

def __iter__(self) -> Iterator[Expression]:
yield from self.value.values()

def copy(self) -> StructConstant:
"""Generate a copy of the UnknownExpression with the same message."""
return StructConstant(self.value, self._type)
12 changes: 1 addition & 11 deletions decompiler/structures/visitors/ast_dataflowobjectvisitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,7 @@
VirtualRootNode,
)
from decompiler.structures.ast.syntaxtree import AbstractSyntaxTree
from decompiler.structures.pseudo.expressions import (
Constant,
ConstantComposition,
DataflowObject,
RegisterPair,
UnknownExpression,
Variable,
)
from decompiler.structures.pseudo.expressions import Constant, DataflowObject, RegisterPair, UnknownExpression, Variable
from decompiler.structures.pseudo.instructions import Assignment, Break, Comment, Continue, GenericBranch, MemPhi, Phi, Return
from decompiler.structures.pseudo.operations import BinaryOperation, Call, Condition, ListOperation, TernaryExpression, UnaryOperation
from decompiler.structures.visitors.interfaces import ASTVisitorInterface, DataflowObjectVisitorInterface
Expand Down Expand Up @@ -93,9 +86,6 @@ def visit_unknown_expression(self, expression: UnknownExpression):
def visit_constant(self, expression: Constant):
pass

def visit_constant_composition(self, expression: ConstantComposition):
pass

def visit_variable(self, expression: Variable):
pass

Expand Down
4 changes: 0 additions & 4 deletions decompiler/structures/visitors/interfaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,10 +70,6 @@ def visit_unknown_expression(self, expr: expressions.UnknownExpression):
def visit_constant(self, expr: expressions.Constant):
"""Visit a Constant."""

@abstractmethod
def visit_constant_composition(self, expr: expressions.ConstantComposition):
"""Visit a Constant."""

@abstractmethod
def visit_variable(self, expr: expressions.Variable):
"""Visit a Variable."""
Expand Down
4 changes: 0 additions & 4 deletions decompiler/structures/visitors/substitute_visitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
Comment,
Condition,
Constant,
ConstantComposition,
Continue,
DataflowObject,
Expression,
Expand Down Expand Up @@ -108,9 +107,6 @@ def visit_unknown_expression(self, expr: UnknownExpression) -> Optional[Dataflow
def visit_constant(self, expr: Constant) -> Optional[DataflowObject]:
return self._mapper(expr)

def visit_constant_composition(self, expr: ConstantComposition):
pass

def visit_variable(self, expr: Variable) -> Optional[DataflowObject]:
return self._mapper(expr)

Expand Down

0 comments on commit b42aa68

Please sign in to comment.