From 395cea71c422b2927dfb18bd3fc88f4fe3b8372d Mon Sep 17 00:00:00 2001 From: Joran Honig Date: Tue, 23 Oct 2018 14:06:00 +0200 Subject: [PATCH] Cleanup + documentation --- mythril/disassembler/disassembly.py | 114 ++++++++++++++++++---------- mythril/laser/ethereum/svm.py | 8 +- 2 files changed, 78 insertions(+), 44 deletions(-) diff --git a/mythril/disassembler/disassembly.py b/mythril/disassembler/disassembly.py index e28a4d73..48682f5a 100644 --- a/mythril/disassembler/disassembly.py +++ b/mythril/disassembler/disassembly.py @@ -4,57 +4,91 @@ import logging class Disassembly(object): + """ + Disassembly class - def __init__(self, code, enable_online_lookup=False): + Stores bytecode, and its disassembly. + Additionally it will gather the following information on the existing functions in the disassembled code: + - function hashes + - function name to entry point mapping + - function entry point to function name mapping + """ + def __init__(self, code: str, enable_online_lookup: bool=False): + self.bytecode = code self.instruction_list = asm.disassemble(util.safe_decode(code)) + self.func_hashes = [] - self.func_to_addr = {} - self.addr_to_func = {} - self.bytecode = code + self.function_name_to_address = {} + self.address_to_function_name = {} - signatures = SignatureDb(enable_online_lookup=enable_online_lookup) # control if you want to have online sighash lookups + signatures = SignatureDb( + enable_online_lookup=enable_online_lookup + ) # control if you want to have online signature hash lookups try: signatures.open() # open from default locations except FileNotFoundError: - logging.info("Missing function signature file. Resolving of function names from signature file disabled.") - - # Parse jump table & resolve function names + logging.info( + "Missing function signature file. Resolving of function names from signature file disabled." + ) # Need to take from PUSH1 to PUSH4 because solc seems to remove excess 0s at the beginning for optimizing - jmptable_indices = asm.find_opcode_sequence([("PUSH1", "PUSH2", "PUSH3", "PUSH4"), ("EQ",)], - self.instruction_list) - - for i in jmptable_indices: - func_hash = self.instruction_list[i]['argument'] - - # Append with missing 0s at the beginning - func_hash = "0x" + func_hash[2:].rjust(8, "0") - - self.func_hashes.append(func_hash) - try: - # tries local cache, file and optional online lookup - # may return more than one function signature. since we cannot probe for the correct one we'll use the first - func_names = signatures.get(func_hash) - if len(func_names) > 1: - # ambigious result - func_name = "**ambiguous** %s" % func_names[0] # return first hit but note that result was ambiguous - else: - # only one item - func_name = func_names[0] - except KeyError: - func_name = "_function_" + func_hash - - try: - offset = self.instruction_list[i + 2]['argument'] - jump_target = int(offset, 16) - - self.func_to_addr[func_name] = jump_target - self.addr_to_func[jump_target] = func_name - except: - continue + jump_table_indices = asm.find_opcode_sequence( + [("PUSH1", "PUSH2", "PUSH3", "PUSH4"), ("EQ",)], self.instruction_list + ) + + for index in jump_table_indices: + function_hash, jump_target, function_name = _get_function_info( + index, self.instruction_list, signatures + ) + self.func_hashes.append(function_hash) + + if jump_target is not None and function_name is not None: + self.function_name_to_address[function_name] = jump_target + self.address_to_function_name[jump_target] = function_name signatures.write() # store resolved signatures (potentially resolved online) def get_easm(self): - # todo: tintinweb - print funcsig resolved data from self.addr_to_func? return asm.instruction_list_to_easm(self.instruction_list) + + +def _get_function_info(index: int, instruction_list: list, signature_database: SignatureDb) -> (str, int, str): + """ + Finds the function information for a call table entry + Solidity uses the first 4 bytes of the calldata to indicate which function the message call should execute + The generated code that directs execution to the correct function looks like this: + - PUSH function_hash + - EQ + - PUSH entry_point + - JUMPI + + This function takes an index that points to the first instruction, and from that finds out the function hash, + function entry and the function name. + + :param index: Start of the entry pattern + :param instruction_list: Instruction list for the contract that is being analyzed + :param signature_database: Database used to map function hashes to their respective function names + :return: function hash, function entry point, function name + """ + + # Append with missing 0s at the beginning + function_hash = "0x" + instruction_list[index]["argument"][2:].rjust(8, "0") + + function_names = signature_database.get(function_hash) + if len(function_names) > 1: + # In this case there was an ambiguous result + function_name = ( + "**ambiguous** {}".format(function_names[0]) + ) + elif len(function_names) == 1: + function_name = function_names[0] + else: + function_name = "_function_" + function_hash + + try: + offset = instruction_list[index + 2]["argument"] + entry_point = int(offset, 16) + except IndexError: + return function_hash, None, None + + return function_hash, entry_point, function_name diff --git a/mythril/laser/ethereum/svm.py b/mythril/laser/ethereum/svm.py index ed5f5724..f6693ead 100644 --- a/mythril/laser/ethereum/svm.py +++ b/mythril/laser/ethereum/svm.py @@ -1,4 +1,5 @@ import logging +from mythril.disassembler.disassembly import Disassembly from mythril.laser.ethereum.state import WorldState from mythril.laser.ethereum.transaction import TransactionStartSignal, TransactionEndSignal, \ ContractCreationTransaction @@ -258,11 +259,10 @@ class LaserEVM: address = state.environment.code.instruction_list[state.mstate.pc]['address'] environment = state.environment - disassembly = environment.code - if address in state.environment.code.addr_to_func: + disassembly: Disassembly = environment.code + if address in disassembly.address_to_function_name: # Enter a new function - - environment.active_function_name = disassembly.addr_to_func[address] + environment.active_function_name = disassembly.address_to_function_name[address] new_node.flags |= NodeFlags.FUNC_ENTRY logging.debug(