From 510ff95655e76e498a1a2aa112b4efbef5280eee Mon Sep 17 00:00:00 2001 From: Nikhil Parasaram Date: Tue, 30 Nov 2021 03:49:02 +0000 Subject: [PATCH] Support symbolic bytecode (#1560) * Support symbolic bytecode * Use hex notation * Handle swarm hash --- mythril/analysis/solver.py | 8 +++-- mythril/disassembler/asm.py | 29 +++++++++++++--- mythril/disassembler/disassembly.py | 17 ++++++++-- mythril/laser/ethereum/cfg.py | 2 +- mythril/laser/ethereum/instructions.py | 34 +++++++++++++++---- mythril/laser/ethereum/state/account.py | 13 ++++++- .../transaction/transaction_models.py | 11 ++---- mythril/support/support_utils.py | 6 +++- tests/integration_tests/analysis_tests.py | 11 ++++++ .../symbolic_exec_bytecode.sol | 23 +++++++++++++ .../inputs/symbolic_exec_bytecode.sol.o | 1 + 11 files changed, 129 insertions(+), 26 deletions(-) create mode 100644 tests/testdata/input_contracts/symbolic_exec_bytecode.sol create mode 100644 tests/testdata/inputs/symbolic_exec_bytecode.sol.o diff --git a/mythril/analysis/solver.py b/mythril/analysis/solver.py index 65947485..760c8c2a 100644 --- a/mythril/analysis/solver.py +++ b/mythril/analysis/solver.py @@ -111,7 +111,11 @@ def _add_calldata_placeholder( tx["calldata"] = tx["input"] if not isinstance(transaction_sequence[0], ContractCreationTransaction): return - code_len = len(transaction_sequence[0].code.bytecode) + + if type(transaction_sequence[0].code.bytecode) == tuple: + code_len = len(transaction_sequence[0].code.bytecode) * 2 + else: + code_len = len(transaction_sequence[0].code.bytecode) concrete_transactions[0]["calldata"] = concrete_transactions[0]["input"][ code_len + 2 : ] @@ -164,7 +168,7 @@ def _get_concrete_state(initial_accounts: Dict, min_price_dict: Dict[str, int]): data = dict() # type: Dict[str, Union[int, str]] data["nonce"] = account.nonce - data["code"] = account.code.bytecode + data["code"] = account.serialised_code() data["storage"] = str(account.storage) data["balance"] = hex(min_price_dict.get(address, 0)) accounts[hex(address)] = data diff --git a/mythril/disassembler/asm.py b/mythril/disassembler/asm.py index 0c2bac35..0340b80e 100644 --- a/mythril/disassembler/asm.py +++ b/mythril/disassembler/asm.py @@ -3,7 +3,9 @@ code disassembly.""" import re from collections.abc import Generator +from functools import lru_cache +from mythril.ethereum import util from mythril.support.opcodes import OPCODES, ADDRESS, ADDRESS_OPCODE_MAPPING regex_PUSH = re.compile(r"^PUSH(\d*)$") @@ -86,7 +88,10 @@ def is_sequence_match(pattern: list, instruction_list: list, index: int) -> bool return True -def disassemble(bytecode: bytes) -> list: +lru_cache(maxsize=2 ** 10) + + +def disassemble(bytecode) -> list: """Disassembles evm bytecode and returns a list of instructions. :param bytecode: @@ -95,9 +100,20 @@ def disassemble(bytecode: bytes) -> list: instruction_list = [] address = 0 length = len(bytecode) - if "bzzr" in str(bytecode[-43:]): - # ignore swarm hash - length -= 43 + + if type(bytecode) == str: + bytecode = util.safe_decode(bytecode) + length = len(bytecode) + part_code = bytecode[-43:] + else: + part_code = bytes(bytecode[-43:]) + + try: + if "bzzr" in str(part_code): + # ignore swarm hash + length -= 43 + except ValueError: + pass while address < length: try: @@ -112,7 +128,10 @@ def disassemble(bytecode: bytes) -> list: match = re.search(regex_PUSH, op_code) if match: argument_bytes = bytecode[address + 1 : address + 1 + int(match.group(1))] - current_instruction.argument = "0x" + argument_bytes.hex() + if type(argument_bytes) == bytes: + current_instruction.argument = "0x" + argument_bytes.hex() + else: + current_instruction.argument = argument_bytes address += int(match.group(1)) instruction_list.append(current_instruction) diff --git a/mythril/disassembler/disassembly.py b/mythril/disassembler/disassembly.py index d51927a1..00a76aba 100644 --- a/mythril/disassembler/disassembly.py +++ b/mythril/disassembler/disassembly.py @@ -36,7 +36,7 @@ class Disassembly(object): # open from default locations # control if you want to have online signature hash lookups signatures = SignatureDB(enable_online_lookup=self.enable_online_lookup) - self.instruction_list = asm.disassemble(util.safe_decode(bytecode)) + self.instruction_list = asm.disassemble(bytecode) # Need to take from PUSH1 to PUSH4 because solc seems to remove excess 0s at the beginning for optimizing jump_table_indices = asm.find_op_code_sequence( [("PUSH1", "PUSH2", "PUSH3", "PUSH4"), ("EQ",)], self.instruction_list @@ -82,7 +82,18 @@ def get_function_info( """ # Append with missing 0s at the beginning - function_hash = "0x" + instruction_list[index]["argument"][2:].rjust(8, "0") + if type(instruction_list[index]["argument"]) == tuple: + try: + function_hash = "0x" + bytes( + instruction_list[index]["argument"] + ).hex().rjust(8, "0") + except AttributeError: + raise ValueError( + "Mythril currently does not support symbolic function signatures" + ) + else: + function_hash = "0x" + instruction_list[index]["argument"][2:].rjust(8, "0") + function_names = signature_database.get(function_hash) if len(function_names) > 0: @@ -92,6 +103,8 @@ def get_function_info( try: offset = instruction_list[index + 2]["argument"] + if type(offset) == tuple: + offset = bytes(offset).hex() entry_point = int(offset, 16) except (KeyError, IndexError): return function_hash, None, None diff --git a/mythril/laser/ethereum/cfg.py b/mythril/laser/ethereum/cfg.py index 4fd3524b..cae9c525 100644 --- a/mythril/laser/ethereum/cfg.py +++ b/mythril/laser/ethereum/cfg.py @@ -66,7 +66,7 @@ class Node: code += str(instruction["address"]) + " " + instruction["opcode"] if instruction["opcode"].startswith("PUSH"): - code += " " + instruction["argument"] + code += " " + "".join(instruction["argument"]) code += "\\n" diff --git a/mythril/laser/ethereum/instructions.py b/mythril/laser/ethereum/instructions.py index 4c68eb33..a92036c3 100644 --- a/mythril/laser/ethereum/instructions.py +++ b/mythril/laser/ethereum/instructions.py @@ -282,17 +282,39 @@ class Instruction: :return: """ push_instruction = global_state.get_current_instruction() - push_value = push_instruction["argument"][2:] - + push_value = push_instruction["argument"] try: length_of_value = 2 * int(push_instruction["opcode"][4:]) except ValueError: raise VmException("Invalid Push instruction") - push_value += "0" * max(length_of_value - len(push_value), 0) - global_state.mstate.stack.append( - symbol_factory.BitVecVal(int(push_value, 16), 256) - ) + if type(push_value) == tuple: + if type(push_value[0]) == int: + new_value = symbol_factory.BitVecVal(push_value[0], 8) + else: + new_value = push_value[0] + if len(push_value) > 1: + for val in push_value[1:]: + if type(val) == int: + new_value = Concat(new_value, symbol_factory.BitVecVal(val, 8)) + else: + new_value = Concat(new_value, val) + + pad_length = length_of_value // 2 - len(push_value) + + if pad_length > 0: + new_value = Concat(new_value, symbol_factory.BitVecVal(0, pad_length)) + if new_value.size() < 256: + new_value = Concat( + symbol_factory.BitVecVal(0, 256 - new_value.size()), new_value + ) + global_state.mstate.stack.append(new_value) + + else: + push_value += "0" * max(length_of_value - (len(push_value) - 2), 0) + global_state.mstate.stack.append( + symbol_factory.BitVecVal(int(push_value, 16), 256) + ) return [global_state] @StateTransition() diff --git a/mythril/laser/ethereum/state/account.py b/mythril/laser/ethereum/state/account.py index 05518b4c..53aff3f3 100644 --- a/mythril/laser/ethereum/state/account.py +++ b/mythril/laser/ethereum/state/account.py @@ -182,11 +182,22 @@ class Account: """ return { "nonce": self.nonce, - "code": self.code, + "code": self.serialised_code(), "balance": self.balance(), "storage": self.storage, } + def serialised_code(self): + if type(self.code.bytecode) == str: + return self.code.bytecode + new_code = "0x" + for byte in self.code.bytecode: + if type(byte) == int: + new_code += hex(byte) + else: + new_code += "" + return new_code + def __copy__(self, memodict={}): new_account = Account( address=self.address, diff --git a/mythril/laser/ethereum/transaction/transaction_models.py b/mythril/laser/ethereum/transaction/transaction_models.py index 3b1d9d53..21f2f7d4 100644 --- a/mythril/laser/ethereum/transaction/transaction_models.py +++ b/mythril/laser/ethereum/transaction/transaction_models.py @@ -255,17 +255,12 @@ class ContractCreationTransaction(BaseTransaction): :param return_data: :param revert: """ - if ( - return_data is None - or not all([isinstance(element, int) for element in return_data]) - or len(return_data) == 0 - ): + + if return_data is None or len(return_data) == 0: self.return_data = None raise TransactionEndSignal(global_state, revert=revert) - contract_code = bytes.hex(array.array("B", return_data).tobytes()) - - global_state.environment.active_account.code.assign_bytecode(contract_code) + global_state.environment.active_account.code.assign_bytecode(tuple(return_data)) self.return_data = str( hex(global_state.environment.active_account.address.value) ) diff --git a/mythril/support/support_utils.py b/mythril/support/support_utils.py index 84dfe05b..9956de6d 100644 --- a/mythril/support/support_utils.py +++ b/mythril/support/support_utils.py @@ -26,11 +26,15 @@ class Singleton(type): return cls._instances[cls] -def get_code_hash(code: str) -> str: +def get_code_hash(code) -> str: """ :param code: bytecode :return: Returns hash of the given bytecode """ + if type(code) == tuple: + # Temporary hack, since we cannot find symbols of sha3 + return str(hash(code)) + code = code[2:] if code[:2] == "0x" else code try: keccak = _pysha3.keccak_256() diff --git a/tests/integration_tests/analysis_tests.py b/tests/integration_tests/analysis_tests.py index 988a6bf3..e93fae6c 100644 --- a/tests/integration_tests/analysis_tests.py +++ b/tests/integration_tests/analysis_tests.py @@ -29,6 +29,17 @@ test_data = ( }, None, ), + ( + "symbolic_exec_bytecode.sol.o", + { + "TX_COUNT": 1, + "TX_OUTPUT": 0, + "MODULE": "AccidentallyKillable", + "ISSUE_COUNT": 1, + "ISSUE_NUMBER": 0, + }, + None, + ), ) diff --git a/tests/testdata/input_contracts/symbolic_exec_bytecode.sol b/tests/testdata/input_contracts/symbolic_exec_bytecode.sol new file mode 100644 index 00000000..8479ad9a --- /dev/null +++ b/tests/testdata/input_contracts/symbolic_exec_bytecode.sol @@ -0,0 +1,23 @@ +pragma solidity ^0.8.0; + +contract Test { + uint256 immutable inputSize; + + constructor(uint256 _log2Size) { + inputSize = (1 << _log2Size); + } + + function getBytes(bytes calldata _input) public view returns (bytes32) { + require( + _input.length > 0 && _input.length <= inputSize, + "input len: (0,inputSize]" + ); + + return "123"; + } + + function commencekilling() public { + address payable receiver = payable(msg.sender); + selfdestruct(receiver); + } +} diff --git a/tests/testdata/inputs/symbolic_exec_bytecode.sol.o b/tests/testdata/inputs/symbolic_exec_bytecode.sol.o new file mode 100644 index 00000000..7de4b200 --- /dev/null +++ b/tests/testdata/inputs/symbolic_exec_bytecode.sol.o @@ -0,0 +1 @@ +60a060405234801561001057600080fd5b5060405161039b38038061039b83398181016040528101906100329190610059565b806001901b60808181525050506100ac565b60008151905061005381610095565b92915050565b60006020828403121561006f5761006e610090565b5b600061007d84828501610044565b91505092915050565b6000819050919050565b600080fd5b61009e81610086565b81146100a957600080fd5b50565b6080516102d56100c66000396000608601526102d56000f3fe608060405234801561001057600080fd5b50600436106100365760003560e01c8063781c6dbe1461003b5780637c11da201461006b575b600080fd5b61005560048036038101906100509190610188565b610075565b6040516100629190610207565b60405180910390f35b610073610114565b005b600080838390501180156100ac57507f00000000000000000000000000000000000000000000000000000000000000008383905011155b6100eb576040517f08c379a00000000000000000000000000000000000000000000000000000000081526004016100e290610222565b60405180910390fd5b7f3132330000000000000000000000000000000000000000000000000000000000905092915050565b60003390508073ffffffffffffffffffffffffffffffffffffffff16ff5b60008083601f84011261014857610147610262565b5b8235905067ffffffffffffffff8111156101655761016461025d565b5b60208301915083600182028301111561018157610180610267565b5b9250929050565b6000806020838503121561019f5761019e610271565b5b600083013567ffffffffffffffff8111156101bd576101bc61026c565b5b6101c985828601610132565b92509250509250929050565b6101de81610253565b82525050565b60006101f1601883610242565b91506101fc82610276565b602082019050919050565b600060208201905061021c60008301846101d5565b92915050565b6000602082019050818103600083015261023b816101e4565b9050919050565b600082825260208201905092915050565b6000819050919050565b600080fd5b600080fd5b600080fd5b600080fd5b600080fd5b7f696e707574206c656e3a2028302c696e70757453697a655d000000000000000060008201525056fea26469706673582212203f37af85f2d345d5d16dd25ae1404605d9e5e7240b970530ef963385fc73a82e64736f6c63430008060033 \ No newline at end of file