From 6bbab6855a12cabec3f22cf7fcbcde8408c3b9c3 Mon Sep 17 00:00:00 2001 From: Joran Honig Date: Thu, 5 Jul 2018 11:30:28 +0200 Subject: [PATCH 01/18] Add return statement --- mythril/laser/ethereum/call.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/mythril/laser/ethereum/call.py b/mythril/laser/ethereum/call.py index 23145b9e..483894ce 100644 --- a/mythril/laser/ethereum/call.py +++ b/mythril/laser/ethereum/call.py @@ -111,10 +111,13 @@ def get_callee_account(global_state, callee_address, dynamic_loader): if code is None: logging.info("No code returned, not a contract account?") raise ValueError() + logging.info("Dependency loaded: " + callee_address) - accounts[callee_address] = Account(callee_address, code, callee_address) + callee_account = Account(callee_address, code, callee_address) + accounts[callee_address] = callee_account + + return callee_account - logging.info("Dependency loaded: " + callee_address) def get_call_data(global_state, memory_start, memory_size, pad=True): From 5c0e65b2d80981ecfd8507acc598a80932c6ef9a Mon Sep 17 00:00:00 2001 From: Joran Honig Date: Thu, 5 Jul 2018 11:55:29 +0200 Subject: [PATCH 02/18] Implementation of a depth first search search strategy --- mythril/laser/ethereum/strategy/__init__.py | 0 mythril/laser/ethereum/strategy/basic.py | 17 +++++++++++++++++ mythril/laser/ethereum/svm.py | 13 ++++--------- 3 files changed, 21 insertions(+), 9 deletions(-) create mode 100644 mythril/laser/ethereum/strategy/__init__.py create mode 100644 mythril/laser/ethereum/strategy/basic.py diff --git a/mythril/laser/ethereum/strategy/__init__.py b/mythril/laser/ethereum/strategy/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/mythril/laser/ethereum/strategy/basic.py b/mythril/laser/ethereum/strategy/basic.py new file mode 100644 index 00000000..4dc2d968 --- /dev/null +++ b/mythril/laser/ethereum/strategy/basic.py @@ -0,0 +1,17 @@ +class DepthFirstSearchStrategy: + + def __init__(self, content, max_depth): + self.content = content + self.max_depth = max_depth + + def __iter__(self): + return self + + def __next__(self): + try: + global_state = self.content.pop(0) + if global_state.mstate.depth >= self.max_depth: + return self.__next__() + return global_state + except IndexError: + raise StopIteration() diff --git a/mythril/laser/ethereum/svm.py b/mythril/laser/ethereum/svm.py index 0001f8e0..fc835662 100644 --- a/mythril/laser/ethereum/svm.py +++ b/mythril/laser/ethereum/svm.py @@ -3,6 +3,7 @@ import logging from mythril.laser.ethereum.state import GlobalState, Environment, CalldataType, Account from mythril.laser.ethereum.instructions import Instruction from mythril.laser.ethereum.cfg import NodeFlags, Node, Edge, JumpType +from mythril.laser.ethereum.strategy.basic import DepthFirstSearchStrategy TT256 = 2 ** 256 TT256M1 = 2 ** 256 - 1 @@ -31,7 +32,7 @@ class LaserEVM: self.dynamic_loader = dynamic_loader self.work_list = [] - self.max_depth = max_depth + self.strategy = DepthFirstSearchStrategy(self.work_list, max_depth) logging.info("LASER EVM initialized with dynamic loader: " + str(dynamic_loader)) @@ -57,20 +58,14 @@ class LaserEVM: initial_node.states.append(global_state) # Empty the work_list before starting an execution - self.work_list = [global_state] + self.work_list.append(global_state) self._sym_exec() logging.info("Execution complete") logging.info("%d nodes, %d edges, %d total states", len(self.nodes), len(self.edges), self.total_states) def _sym_exec(self): - while True: - try: - global_state = self.work_list.pop(0) - if global_state.mstate.depth >= self.max_depth: continue - except IndexError: - return - + for global_state in self.strategy: try: new_states, op_code = self.execute_state(global_state) except NotImplementedError: From b973a1686f269044e670704b56c07ca79336c29c Mon Sep 17 00:00:00 2001 From: Joran Honig Date: Thu, 5 Jul 2018 12:01:26 +0200 Subject: [PATCH 03/18] Add documentation and fix pop --- mythril/laser/ethereum/strategy/basic.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/mythril/laser/ethereum/strategy/basic.py b/mythril/laser/ethereum/strategy/basic.py index 4dc2d968..754cd3ef 100644 --- a/mythril/laser/ethereum/strategy/basic.py +++ b/mythril/laser/ethereum/strategy/basic.py @@ -1,17 +1,29 @@ -class DepthFirstSearchStrategy: +""" +This module implements basic symbolic execution search strategies +""" + - def __init__(self, content, max_depth): - self.content = content +class DepthFirstSearchStrategy: + """ + Implements a depth first search strategy + I.E. Follow one path to a leaf, and then continue to the next one + """ + def __init__(self, work_list, max_depth): + self.work_list = work_list self.max_depth = max_depth def __iter__(self): return self def __next__(self): + """ Picks the next state to execute """ try: - global_state = self.content.pop(0) + # This strategies assumes that new states are appended at the end of the work_list + # By taking the last element we effectively pick the "newest" states, which amounts to dfs + global_state = self.work_list.pop() if global_state.mstate.depth >= self.max_depth: return self.__next__() return global_state except IndexError: raise StopIteration() + From b82717afedda02b94fed56e5abbed7a66480f879 Mon Sep 17 00:00:00 2001 From: Joran Honig Date: Thu, 5 Jul 2018 12:05:08 +0200 Subject: [PATCH 04/18] re add max depth for now --- mythril/laser/ethereum/svm.py | 1 + 1 file changed, 1 insertion(+) diff --git a/mythril/laser/ethereum/svm.py b/mythril/laser/ethereum/svm.py index fc835662..1ceb3740 100644 --- a/mythril/laser/ethereum/svm.py +++ b/mythril/laser/ethereum/svm.py @@ -33,6 +33,7 @@ class LaserEVM: self.work_list = [] self.strategy = DepthFirstSearchStrategy(self.work_list, max_depth) + self.max_depth = max_depth logging.info("LASER EVM initialized with dynamic loader: " + str(dynamic_loader)) From eb963d38fde46d39e91b4be14a68263b869d35d7 Mon Sep 17 00:00:00 2001 From: Joran Honig Date: Thu, 5 Jul 2018 12:16:18 +0200 Subject: [PATCH 05/18] Implement depth first search --- mythril/laser/ethereum/strategy/basic.py | 25 ++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/mythril/laser/ethereum/strategy/basic.py b/mythril/laser/ethereum/strategy/basic.py index 754cd3ef..33dca443 100644 --- a/mythril/laser/ethereum/strategy/basic.py +++ b/mythril/laser/ethereum/strategy/basic.py @@ -27,3 +27,28 @@ class DepthFirstSearchStrategy: except IndexError: raise StopIteration() + +class BreadthFirstSearchStrategy: + """ + Implements a breadth first search strategy + I.E. Execute all states of a "level" before continuing + """ + def __init__(self, work_list, max_depth): + self.work_list = work_list + self.max_depth = max_depth + + def __iter__(self): + return self + + def __next__(self): + """ Picks the next state to execute """ + try: + # This strategies assumes that new states are appended at the end of the work_list + # By taking the first element we effectively pick the "oldest" states, which amounts to bfs + global_state = self.work_list.pop(0) + if global_state.mstate.depth >= self.max_depth: + return self.__next__() + return global_state + except IndexError: + raise StopIteration() + From 25f76368bca4e7865811fcc578a13ac10a09f8ce Mon Sep 17 00:00:00 2001 From: Nikhil Parasaram Date: Fri, 6 Jul 2018 21:45:18 +0530 Subject: [PATCH 06/18] Support 0x0 for extcodesize --- mythril/laser/ethereum/instructions.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/mythril/laser/ethereum/instructions.py b/mythril/laser/ethereum/instructions.py index 4d90da95..64e87937 100644 --- a/mythril/laser/ethereum/instructions.py +++ b/mythril/laser/ethereum/instructions.py @@ -529,8 +529,12 @@ class Instruction: logging.info("error accessing contract storage due to: " + str(e)) state.stack.append(BitVec("extcodesize_" + str(addr), 256)) return [global_state] + + if code is None: + state.stack.append(0) + else: + state.stack.append(len(code.bytecode) // 2) - state.stack.append(len(code.bytecode) // 2) return [global_state] @instruction From 24fad14cbd498e49834cf22b3f043bfe66a7bd39 Mon Sep 17 00:00:00 2001 From: Nikhil Parasaram Date: Fri, 6 Jul 2018 22:25:39 +0530 Subject: [PATCH 07/18] fix ethereum to 2.3.1 --- requirements.txt | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 13e88c78..aad67f69 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ configparser>=3.5.0 coverage eth_abi>=1.0.0 eth-account>=0.1.0a2 -ethereum>=2.3.0 +ethereum==2.3.1 eth-hash>=0.1.0 eth-keyfile>=0.5.1 eth-keys>=0.2.0b3 diff --git a/setup.py b/setup.py index 3cbf5f97..0e7c624d 100755 --- a/setup.py +++ b/setup.py @@ -305,7 +305,7 @@ setup( packages=find_packages(exclude=['contrib', 'docs', 'tests']), install_requires=[ - 'ethereum>=2.3.0', + 'ethereum==2.3.1', 'z3-solver>=4.5', 'requests', 'py-solc', From d0efd35ce25a817415c17e82ee4d734c3f4e17d4 Mon Sep 17 00:00:00 2001 From: Nikhil Parasaram Date: Sat, 7 Jul 2018 19:19:26 +0530 Subject: [PATCH 08/18] connect to infura with -l --- mythril/mythril.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mythril/mythril.py b/mythril/mythril.py index b63cc137..a72a82d7 100644 --- a/mythril/mythril.py +++ b/mythril/mythril.py @@ -335,9 +335,9 @@ class Mythril(object): verbose_report=False, max_depth=12): all_issues = [] + if self.dynld and self.eth is None: + self.set_api_rpc_infura() for contract in (contracts or self.contracts): - if self.eth is None: - self.set_api_rpc_infura() sym = SymExecWrapper(contract, address, dynloader=DynLoader(self.eth) if self.dynld else None, max_depth=max_depth) From 290aaf81988e55366100c900dae4bb5b767c499e Mon Sep 17 00:00:00 2001 From: Joran Honig Date: Sat, 7 Jul 2018 22:16:01 +0200 Subject: [PATCH 09/18] Add condition not 0 --- mythril/laser/ethereum/call.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mythril/laser/ethereum/call.py b/mythril/laser/ethereum/call.py index 483894ce..58eafed3 100644 --- a/mythril/laser/ethereum/call.py +++ b/mythril/laser/ethereum/call.py @@ -36,7 +36,7 @@ def get_call_parameters(global_state, dynamic_loader, with_value=False): callee_account = None call_data, call_data_type = get_call_data(global_state, meminstart, meminsz, False) - if int(callee_address, 16) >= 5: + if int(callee_address, 16) >= 5 or int(callee_address) != 0: call_data, call_data_type = get_call_data(global_state, meminstart, meminsz) callee_account = get_callee_account(global_state, callee_address, dynamic_loader) From c3d1eb85c36ccb0f46f638f01d05f90d82b6dbb5 Mon Sep 17 00:00:00 2001 From: Joran Honig Date: Sat, 7 Jul 2018 22:16:47 +0200 Subject: [PATCH 10/18] Pass None as node --- mythril/laser/ethereum/instructions.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mythril/laser/ethereum/instructions.py b/mythril/laser/ethereum/instructions.py index 4d90da95..72451f48 100644 --- a/mythril/laser/ethereum/instructions.py +++ b/mythril/laser/ethereum/instructions.py @@ -913,7 +913,7 @@ class Instruction: value, environment.origin, calldata_type=call_data_type) - new_global_state = GlobalState(global_state.accounts, callee_environment, MachineState(gas)) + new_global_state = GlobalState(global_state.accounts, callee_environment, None, MachineState(gas)) new_global_state.mstate.depth = global_state.mstate.depth + 1 new_global_state.mstate.constraints = copy(global_state.mstate.constraints) return [global_state] @@ -940,7 +940,7 @@ class Instruction: environment.caller = environment.address environment.calldata = call_data - new_global_state = GlobalState(global_state.accounts, environment, MachineState(gas)) + new_global_state = GlobalState(global_state.accounts, environment, None, MachineState(gas)) new_global_state.mstate.depth = global_state.mstate.depth + 1 new_global_state.mstate.constraints = copy(global_state.mstate.constraints) @@ -968,7 +968,7 @@ class Instruction: environment.code = callee_account.code environment.calldata = call_data - new_global_state = GlobalState(global_state.accounts, environment, MachineState(gas)) + new_global_state = GlobalState(global_state.accounts, environment, None, MachineState(gas)) new_global_state.mstate.depth = global_state.mstate.depth + 1 new_global_state.mstate.constraints = copy(global_state.mstate.constraints) From 09c41deb6370ffdb548e9df04d7722e55b38c39b Mon Sep 17 00:00:00 2001 From: Joran Honig Date: Sat, 7 Jul 2018 22:43:01 +0200 Subject: [PATCH 11/18] Fix node issue --- mythril/laser/ethereum/instructions.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mythril/laser/ethereum/instructions.py b/mythril/laser/ethereum/instructions.py index 72451f48..56456fdd 100644 --- a/mythril/laser/ethereum/instructions.py +++ b/mythril/laser/ethereum/instructions.py @@ -913,7 +913,7 @@ class Instruction: value, environment.origin, calldata_type=call_data_type) - new_global_state = GlobalState(global_state.accounts, callee_environment, None, MachineState(gas)) + new_global_state = GlobalState(global_state.accounts, callee_environment, global_state.node, MachineState(gas)) new_global_state.mstate.depth = global_state.mstate.depth + 1 new_global_state.mstate.constraints = copy(global_state.mstate.constraints) return [global_state] @@ -940,7 +940,7 @@ class Instruction: environment.caller = environment.address environment.calldata = call_data - new_global_state = GlobalState(global_state.accounts, environment, None, MachineState(gas)) + new_global_state = GlobalState(global_state.accounts, environment, global_state.node, MachineState(gas)) new_global_state.mstate.depth = global_state.mstate.depth + 1 new_global_state.mstate.constraints = copy(global_state.mstate.constraints) @@ -968,7 +968,7 @@ class Instruction: environment.code = callee_account.code environment.calldata = call_data - new_global_state = GlobalState(global_state.accounts, environment, None, MachineState(gas)) + new_global_state = GlobalState(global_state.accounts, environment, global_state.node, MachineState(gas)) new_global_state.mstate.depth = global_state.mstate.depth + 1 new_global_state.mstate.constraints = copy(global_state.mstate.constraints) From 7c638272cbe936ba40cea9d0b5552e0edebeb6aa Mon Sep 17 00:00:00 2001 From: Joran Honig Date: Sat, 7 Jul 2018 23:12:49 +0200 Subject: [PATCH 12/18] Reverse comparison --- mythril/laser/ethereum/call.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mythril/laser/ethereum/call.py b/mythril/laser/ethereum/call.py index 58eafed3..63d09837 100644 --- a/mythril/laser/ethereum/call.py +++ b/mythril/laser/ethereum/call.py @@ -36,7 +36,7 @@ def get_call_parameters(global_state, dynamic_loader, with_value=False): callee_account = None call_data, call_data_type = get_call_data(global_state, meminstart, meminsz, False) - if int(callee_address, 16) >= 5 or int(callee_address) != 0: + if int(callee_address, 16) >= 5 or int(callee_address) == 0: call_data, call_data_type = get_call_data(global_state, meminstart, meminsz) callee_account = get_callee_account(global_state, callee_address, dynamic_loader) From 83c5eca66bc82ca500e6fb75e7de66c34559f734 Mon Sep 17 00:00:00 2001 From: tintinweb Date: Tue, 3 Jul 2018 23:05:24 +0200 Subject: [PATCH 13/18] added online lookup for signature hashes via 4bytes.directory --- mythril/disassembler/disassembly.py | 26 +++-------- mythril/support/signatures.py | 67 +++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+), 20 deletions(-) diff --git a/mythril/disassembler/disassembly.py b/mythril/disassembler/disassembly.py index 787f297f..b7414529 100644 --- a/mythril/disassembler/disassembly.py +++ b/mythril/disassembler/disassembly.py @@ -1,6 +1,5 @@ from mythril.ether import asm,util -import os -import json +from mythril.support.signatures import Signatures import logging @@ -13,21 +12,11 @@ class Disassembly: self.addr_to_func = {} self.bytecode = code + signatures = Signatures(enable_online_lookkup=True) # control if you want to have online sighash lookups try: - mythril_dir = os.environ['MYTHRIL_DIR'] - except KeyError: - mythril_dir = os.path.join(os.path.expanduser('~'), ".mythril") - - # Load function signatures - - signatures_file = os.path.join(mythril_dir, 'signatures.json') - - if not os.path.exists(signatures_file): - logging.info("Missing function signature file. Resolving of function names disabled.") - signatures = {} - else: - with open(signatures_file) as f: - signatures = json.load(f) + signatures.open() # open from default locations + except FileNotFoundError: + logging.info("Missing function signature file. Resolving of function names from disabled.") # Parse jump table & resolve function names @@ -36,7 +25,7 @@ class Disassembly: for i in jmptable_indices: func_hash = self.instruction_list[i]['argument'] try: - func_name = signatures[func_hash] + func_name = signatures.get(func_hash) # tries local cache, file and optional online lookup except KeyError: func_name = "_function_" + func_hash @@ -49,8 +38,5 @@ class Disassembly: except: continue - - def get_easm(self): - return asm.instruction_list_to_easm(self.instruction_list) diff --git a/mythril/support/signatures.py b/mythril/support/signatures.py index 0431dd21..5fb21737 100644 --- a/mythril/support/signatures.py +++ b/mythril/support/signatures.py @@ -1,7 +1,17 @@ import re +import os +import logging +import json from ethereum import utils +try: + # load if available but do not fail + import ethereum_input_decoder +except ImportError: + ethereum_input_decoder = None + +# TODO: tintinweb: move this and signature functionality from mythril.py to class Signatures to have one single interface. def add_signatures_from_file(file, sigs={}): funcs = [] @@ -42,3 +52,60 @@ def add_signatures_from_file(file, sigs={}): signature = re.sub(r'\s', '', signature) sigs["0x" + utils.sha3(signature)[:4].hex()] = signature + + +class Signatures(object): + + def __init__(self, enable_online_lookkup=True): + self.signatures = {} # signatures in-mem cache + self.enable_online_lookup =enable_online_lookkup # enable online funcsig resolving + + def open(self, path=None): + if not path: + # try default locations + try: + mythril_dir = os.environ['MYTHRIL_DIR'] + except KeyError: + mythril_dir = os.path.join(os.path.expanduser('~'), ".mythril") + path = os.path.join(mythril_dir, 'signatures.json') + + if not os.path.exists(path): + raise FileNotFoundError("Missing function signature file. Resolving of function names disabled.") + + with open(path) as f: + sigs = json.load(f) + + # normalize it to {sighash:list(signatures,...)} + for sighash,funcsig in sigs.items(): + self.signatures.setdefault(sighash, []) + self.signatures[sighash].append(funcsig) + + return self + + def get(self, sighash): + """ + get a function signature for a sighash + 1) try local cache + 2) try online lookup + :param sighash: + :return: list of function signatures + """ + if not self.signatures.get(sighash) and self.enable_online_lookup: + self.signatures[sighash] = Signatures.lookup_online(sighash) # might return multiple sigs + return self.signatures.get(sighash) + + + @staticmethod + def lookup_online(sighash): + """ + Lookup function signatures from 4bytes.directory. + //tintinweb: the smart-contract-sanctuary project dumps contracts from etherscan.io and feeds them into + 4bytes.directory. + https://github.com/tintinweb/smart-contract-sanctuary + + :param s: function signature as hexstr + :return: a list of possible function signatures for this hash + """ + if not ethereum_input_decoder: + return None + return list(ethereum_input_decoder.decoder.FourByteDirectory.lookup_signatures(sighash)) From cdd27383970244d943a7de89d8c58d0910e95702 Mon Sep 17 00:00:00 2001 From: tintinweb Date: Tue, 3 Jul 2018 23:12:48 +0200 Subject: [PATCH 14/18] result to online lookup can be ambiguous. use first item and note this in disassembly --- mythril/disassembler/disassembly.py | 14 +++++++++++--- mythril/support/signatures.py | 13 ++++++++----- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/mythril/disassembler/disassembly.py b/mythril/disassembler/disassembly.py index b7414529..de1c9bad 100644 --- a/mythril/disassembler/disassembly.py +++ b/mythril/disassembler/disassembly.py @@ -12,11 +12,11 @@ class Disassembly: self.addr_to_func = {} self.bytecode = code - signatures = Signatures(enable_online_lookkup=True) # control if you want to have online sighash lookups + signatures = Signatures(enable_online_lookup=True) # control if you want to have online sighash lookups try: signatures.open() # open from default locations except FileNotFoundError: - logging.info("Missing function signature file. Resolving of function names from disabled.") + logging.info("Missing function signature file. Resolving of function names from signature file disabled.") # Parse jump table & resolve function names @@ -25,7 +25,15 @@ class Disassembly: for i in jmptable_indices: func_hash = self.instruction_list[i]['argument'] try: - func_name = signatures.get(func_hash) # tries local cache, file and optional online lookup + # tries local cache, file and optional online lookup + # may return more than one function signature. since we cannot probe for the correct one we'll use the first + func_names = signatures.get(func_hash) + if len(func_names) > 1: + # ambigious result + func_name = "**ambiguous** %s"%func_names[0] # return first hit but note that result was ambiguous + else: + # only one item + func_name = func_names[0] except KeyError: func_name = "_function_" + func_hash diff --git a/mythril/support/signatures.py b/mythril/support/signatures.py index 5fb21737..04fde734 100644 --- a/mythril/support/signatures.py +++ b/mythril/support/signatures.py @@ -56,9 +56,9 @@ def add_signatures_from_file(file, sigs={}): class Signatures(object): - def __init__(self, enable_online_lookkup=True): + def __init__(self, enable_online_lookup=True): self.signatures = {} # signatures in-mem cache - self.enable_online_lookup =enable_online_lookkup # enable online funcsig resolving + self.enable_online_lookup =enable_online_lookup # enable online funcsig resolving def open(self, path=None): if not path: @@ -91,14 +91,17 @@ class Signatures(object): :return: list of function signatures """ if not self.signatures.get(sighash) and self.enable_online_lookup: - self.signatures[sighash] = Signatures.lookup_online(sighash) # might return multiple sigs - return self.signatures.get(sighash) + funcsigs = Signatures.lookup_online(sighash) # might return multiple sigs + if funcsigs: + # only store if we get at least one result + self.signatures[sighash] = funcsigs + return self.signatures[sighash] # raise keyerror @staticmethod def lookup_online(sighash): """ - Lookup function signatures from 4bytes.directory. + Lookup function signatures from 4byte.directory. //tintinweb: the smart-contract-sanctuary project dumps contracts from etherscan.io and feeds them into 4bytes.directory. https://github.com/tintinweb/smart-contract-sanctuary From 73efcffde213e1f17aec220611b1d6ac58a38d7d Mon Sep 17 00:00:00 2001 From: tintinweb Date: Wed, 4 Jul 2018 19:55:11 +0200 Subject: [PATCH 15/18] rebase off master refactor function signature handling -> one single class to handle signatures -> Signatures().get(sighash) and Signatures()[sighash] interfaces -> added simple file locking mechanism ref #294 -> fix signature caching (avoid looking up same hash multiple times) --- mythril/disassembler/disassembly.py | 11 +- mythril/mythril.py | 58 +++---- mythril/support/signatures.py | 232 ++++++++++++++++++++++------ 3 files changed, 210 insertions(+), 91 deletions(-) diff --git a/mythril/disassembler/disassembly.py b/mythril/disassembler/disassembly.py index de1c9bad..a20d4ffe 100644 --- a/mythril/disassembler/disassembly.py +++ b/mythril/disassembler/disassembly.py @@ -1,9 +1,9 @@ from mythril.ether import asm,util -from mythril.support.signatures import Signatures +from mythril.support.signatures import SignatureDb import logging -class Disassembly: +class Disassembly(object): def __init__(self, code): self.instruction_list = asm.disassemble(util.safe_decode(code)) @@ -12,7 +12,7 @@ class Disassembly: self.addr_to_func = {} self.bytecode = code - signatures = Signatures(enable_online_lookup=True) # control if you want to have online sighash lookups + signatures = SignatureDb(enable_online_lookup=True) # control if you want to have online sighash lookups try: signatures.open() # open from default locations except FileNotFoundError: @@ -30,7 +30,7 @@ class Disassembly: func_names = signatures.get(func_hash) if len(func_names) > 1: # ambigious result - func_name = "**ambiguous** %s"%func_names[0] # return first hit but note that result was ambiguous + func_name = "**ambiguous** %s" % func_names[0] # return first hit but note that result was ambiguous else: # only one item func_name = func_names[0] @@ -46,5 +46,8 @@ class Disassembly: except: continue + signatures.write() # store resolved signatures (potentially resolved online) + def get_easm(self): + # todo: tintinweb - print funcsig resolved data from self.addr_to_func? return asm.instruction_list_to_easm(self.instruction_list) diff --git a/mythril/mythril.py b/mythril/mythril.py index a72a82d7..0a85366b 100644 --- a/mythril/mythril.py +++ b/mythril/mythril.py @@ -78,8 +78,6 @@ class Mythril(object): mythril.get_state_variable_from_storage(args) """ - - def __init__(self, solv=None, solc_args=None, dynld=False): @@ -88,7 +86,17 @@ class Mythril(object): self.dynld = dynld self.mythril_dir = self._init_mythril_dir() - self.signatures_file, self.sigs = self._init_signatures() + + self.sigs = signatures.SignatureDb() + try: + self.sigs.open() # tries mythril_dir/signatures.json by default (provide path= arg to make this configurable) + except FileNotFoundError as fnfe: + logging.info( + "No signature database found. Creating database if sigs are loaded in: " + self.sigs.signatures_file + "\n" + + "Consider replacing it with the pre-initialized database at https://raw.githubusercontent.com/ConsenSys/mythril/master/signatures.json") + except json.JSONDecodeError as jde: + raise CriticalError("Invalid JSON in signatures file " + self.sigs.signatures_file + "\n" + str(jde)) + self.solc_binary = self._init_solc_binary(solv) self.leveldb_dir = self._init_config() @@ -110,33 +118,6 @@ class Mythril(object): os.mkdir(mythril_dir) return mythril_dir - def _init_signatures(self): - - # If no function signature file exists, create it. Function signatures from Solidity source code are added automatically. - - signatures_file = os.path.join(self.mythril_dir, 'signatures.json') - - sigs = {} - if not os.path.exists(signatures_file): - logging.info("No signature database found. Creating empty database: " + signatures_file + "\n" + - "Consider replacing it with the pre-initialized database at https://raw.githubusercontent.com/ConsenSys/mythril/master/signatures.json") - with open(signatures_file, 'a') as f: - json.dump({}, f) - - with open(signatures_file) as f: - try: - sigs = json.load(f) - except json.JSONDecodeError as e: - raise CriticalError("Invalid JSON in signatures file " + signatures_file + "\n" + str(e)) - return signatures_file, sigs - - def _update_signatures(self, jsonsigs): - # Save updated function signatures - with open(self.signatures_file, 'w') as f: - json.dump(jsonsigs, f) - - self.sigs = jsonsigs - def _init_config(self): # If no config file exists, create it. Default LevelDB path is specified based on OS @@ -300,27 +281,32 @@ class Mythril(object): file = os.path.expanduser(file) try: - signatures.add_signatures_from_file(file, self.sigs) - self._update_signatures(self.sigs) + # import signatures from solidity source + with open(file, encoding="utf-8") as f: + self.sigs.import_from_solidity_source(f.read()) + contract = SolidityContract(file, contract_name, solc_args=self.solc_args) logging.info("Analyzing contract %s:%s" % (file, contract.name)) except FileNotFoundError: - raise CriticalError("Input file not found: " + file) + raise CriticalError("Input file not found: " + file) except CompilerError as e: - raise CriticalError(e) + raise CriticalError(e) except NoContractFoundError: logging.info("The file " + file + " does not contain a compilable contract.") else: self.contracts.append(contract) contracts.append(contract) + # Save updated function signatures + self.sigs.write() # dump signatures to disk (previously opened file or default location) + return address, contracts def dump_statespace(self, contract, address=None, max_depth=12): sym = SymExecWrapper(contract, address, - dynloader=DynLoader(self.eth) if self.dynld else None, - max_depth=max_depth) + dynloader=DynLoader(self.eth) if self.dynld else None, + max_depth=max_depth) return get_serializable_statespace(sym) diff --git a/mythril/support/signatures.py b/mythril/support/signatures.py index 04fde734..af86abc4 100644 --- a/mythril/support/signatures.py +++ b/mythril/support/signatures.py @@ -1,66 +1,90 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*- +"""mythril.py: Function Signature Database +""" import re import os -import logging import json +import time +import pathlib +import logging from ethereum import utils +# todo: tintinweb - make this a normal requirement? (deps: eth-abi and requests, both already required by mythril) try: # load if available but do not fail import ethereum_input_decoder + from ethereum_input_decoder.decoder import FourByteDirectoryOnlineLookupError except ImportError: + # fake it :) ethereum_input_decoder = None + FourByteDirectoryOnlineLookupError = Exception -# TODO: tintinweb: move this and signature functionality from mythril.py to class Signatures to have one single interface. -def add_signatures_from_file(file, sigs={}): - - funcs = [] - - with open(file, encoding="utf-8") as f: - - code = f.read() - - funcs = re.findall(r'function[\s]+(\w+\([^\)]*\))', code, re.DOTALL) - - for f in funcs: +class SimpleFileLock(object): + # todo: replace with something more reliable. this is a quick shot on concurrency and might not work in all cases - f = re.sub(r'[\n]', '', f) + def __init__(self, path): + self.path = path + self.lockfile = pathlib.Path("%s.lck" % path) + self.locked = False - m = re.search(r'^([A-Za-z0-9_]+)', f) + def aquire(self, timeout=5): + if self.locked: + raise Exception("SimpleFileLock: lock already aquired") - if (m): - - signature = m.group(1) - - m = re.search(r'\((.*)\)', f) - - _args = m.group(1).split(",") - - types = [] + t_end = time.time()+timeout + while time.time() < t_end: + # try to aquire lock + try: + self.lockfile.touch(mode=0o0000, exist_ok=False) # touch the lockfile + # lockfile does not exist. we have a lock now + self.locked = True + return + except FileExistsError as fee: + # check if lockfile date exceeds age and cleanup lock + if time.time() > self.lockfile.stat().st_mtime + 60 * 5: + self.release(force=True) # cleanup old lockfile > 5mins - for arg in _args: + time.sleep(0.5) # busywait is evil + continue - _type = arg.lstrip().split(" ")[0] - if _type == "uint": - _type = "uint256" + raise Exception("SimpleFileLock: timeout hit. failed to aquire lock: %s"% (time.time()-self.lockfile.stat().st_mtime)) - types.append(_type) + def release(self, force=False): + if not force and not self.locked: + raise Exception("SimpleFileLock: aquire lock first") - typelist = ",".join(types) - signature += "(" + typelist + ")" + try: + self.lockfile.unlink() # might throw if we force unlock and the file gets removed in the meantime. TOCTOU + except FileNotFoundError as fnfe: + logging.warning("SimpleFileLock: release(force=%s) on unavailable file. race? %r" % (force, fnfe)) - signature = re.sub(r'\s', '', signature) + self.locked = False - sigs["0x" + utils.sha3(signature)[:4].hex()] = signature -class Signatures(object): +class SignatureDb(object): def __init__(self, enable_online_lookup=True): + """ + Constr + :param enable_online_lookup: enable onlien signature hash lookup + """ self.signatures = {} # signatures in-mem cache - self.enable_online_lookup =enable_online_lookup # enable online funcsig resolving + self.signatures_file = None + self.signatures_file_lock = None + self.enable_online_lookup = enable_online_lookup # enable online funcsig resolving + self.online_lookup_miss = set() # temporarily track misses from onlinedb to avoid requesting the same non-existent sighash multiple times + self.online_directory_unavailable_until = 0 # flag the online directory as unavailable for some time def open(self, path=None): + """ + Open a function signature db from json file + + :param path: specific path to signatures.json; default mythril location if not specified + :return: self + """ if not path: # try default locations try: @@ -69,46 +93,152 @@ class Signatures(object): mythril_dir = os.path.join(os.path.expanduser('~'), ".mythril") path = os.path.join(mythril_dir, 'signatures.json') + self.signatures_file = path # store early to allow error handling to access the place we tried to load the file + if not os.path.exists(path): + logging.debug("Signatures: file not found: %s" % path) raise FileNotFoundError("Missing function signature file. Resolving of function names disabled.") - with open(path) as f: + self.signatures_file_lock = SimpleFileLock(self.signatures_file) # lock file to prevent concurrency issues + self.signatures_file_lock.aquire() # try to aquire it within the next 10s + + with open(path, 'r') as f: sigs = json.load(f) + self.signatures_file_lock.release() # release lock + # normalize it to {sighash:list(signatures,...)} - for sighash,funcsig in sigs.items(): + for sighash, funcsig in sigs.items(): + if isinstance(funcsig, list): + self.signatures = sigs # keep original todo: tintinweb - super hacky. make sure signatures.json is initially in correct format fixme + break # already normalized self.signatures.setdefault(sighash, []) self.signatures[sighash].append(funcsig) return self - def get(self, sighash): + def write(self, path=None, sync=True): + """ + Write signatures database as json to file + + :param path: specify path otherwise update the file that was loaded with open() + :param sync: lock signature file, load contents and merge it into memcached sighash db, then save it + :return: self + """ + path = path or self.signatures_file + self.signatures_file_lock = SimpleFileLock(path) # lock file to prevent concurrency issues + self.signatures_file_lock.aquire() # try to aquire it within the next 10s + + if sync and os.path.exists(path): + # reload and save if file exists + with open(path, 'r') as f: + sigs = json.load(f) + + sigs.update(self.signatures) # reload file and merge cached sigs into what we load from file + self.signatures = sigs + + with open(path, 'w') as f: + json.dump(self.signatures, f) + + self.signatures_file_lock.release() + return self + + def get(self, sighash, timeout=2): """ get a function signature for a sighash 1) try local cache - 2) try online lookup - :param sighash: - :return: list of function signatures - """ - if not self.signatures.get(sighash) and self.enable_online_lookup: - funcsigs = Signatures.lookup_online(sighash) # might return multiple sigs - if funcsigs: - # only store if we get at least one result - self.signatures[sighash] = funcsigs + 2) try online lookup (if enabled; if not flagged as unavailable) + :param sighash: function signature hash as hexstr + :param timeout: online lookup timeout + :return: list of matching function signatures + """ + if not sighash.startswith("0x"): + sighash = "0x%s" % sighash # normalize sighash format + + if self.enable_online_lookup and not self.signatures.get(sighash) and sighash not in self.online_lookup_miss and time.time() > self.online_directory_unavailable_until: + # online lookup enabled, and signature not in cache, sighash was not a miss earlier, and online directory not down + logging.debug("Signatures: performing online lookup for sighash %r" % sighash) + try: + funcsigs = SignatureDb.lookup_online(sighash, timeout=timeout) # might return multiple sigs + if funcsigs: + # only store if we get at least one result + self.signatures[sighash] = funcsigs + else: + # miss + self.online_lookup_miss.add(sighash) + except FourByteDirectoryOnlineLookupError as fbdole: + self.online_directory_unavailable_until = time.time() + 2 * 60 # wait at least 2 mins to try again + logging.warning("online function signature lookup not available. will not try to lookup hash for the next 2 minutes. exception: %r" % fbdole) return self.signatures[sighash] # raise keyerror + def __getitem__(self, item): + """ + Provide dict interface Signatures()[sighash] + :param item: sighash + :return: list of matching signatures + """ + return self.get(sighash=item) + + def import_from_solidity_source(self, code): + """ + Import Function Signatures from solidity source files + :param code: solidity source code + :return: self + """ + self.signatures.update(SignatureDb.parse_function_signatures_from_solidity_source(code)) + return self @staticmethod - def lookup_online(sighash): + def lookup_online(sighash, timeout=None, proxies=None): """ Lookup function signatures from 4byte.directory. //tintinweb: the smart-contract-sanctuary project dumps contracts from etherscan.io and feeds them into 4bytes.directory. https://github.com/tintinweb/smart-contract-sanctuary - :param s: function signature as hexstr - :return: a list of possible function signatures for this hash + :param sighash: function signature hash as hexstr + :param timeout: optional timeout for online lookup + :param proxies: optional proxy servers for online lookup + :return: a list of matching function signatures for this hash """ if not ethereum_input_decoder: return None - return list(ethereum_input_decoder.decoder.FourByteDirectory.lookup_signatures(sighash)) + return list(ethereum_input_decoder.decoder.FourByteDirectory.lookup_signatures(sighash, + timeout=timeout, + proxies=proxies)) + + @staticmethod + def parse_function_signatures_from_solidity_source(code): + """ + Parse solidity sourcecode for function signatures and return the signature hash and function signature + :param code: solidity source code + :return: dictionary {sighash: function_signature} + """ + sigs = {} + + funcs = re.findall(r'function[\s]+(.*?\))', code, re.DOTALL) + for f in funcs: + f = re.sub(r'[\n]', '', f) + m = re.search(r'^([A-Za-z0-9_]+)', f) + + if m: + signature = m.group(1) + m = re.search(r'\((.*)\)', f) + _args = m.group(1).split(",") + types = [] + + for arg in _args: + _type = arg.lstrip().split(" ")[0] + + if _type == "uint": + _type = "uint256" + + types.append(_type) + + typelist = ",".join(types) + signature += "(" + typelist + ")" + signature = re.sub(r'\s', '', signature) + sigs["0x" + utils.sha3(signature)[:4].hex()] = signature + + logging.debug("Signatures: parse soldiity found %d signatures" % len(sigs)) + return sigs From 6851754059888d46bd8484a87fcfd82436df1ed3 Mon Sep 17 00:00:00 2001 From: tintinweb Date: Fri, 6 Jul 2018 18:00:47 +0200 Subject: [PATCH 16/18] remove unnecessary comment (todo) reuse SimpleFileLock instance if already set Note: requests.[get|post|request](.., proxies=None) should be using the default proxy settings (env) Note: signature db path selection logic is currently not matching mythril._init_config (but myhtril._init_mythril_dir) --- mythril/support/signatures.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mythril/support/signatures.py b/mythril/support/signatures.py index af86abc4..e1582b68 100644 --- a/mythril/support/signatures.py +++ b/mythril/support/signatures.py @@ -99,7 +99,7 @@ class SignatureDb(object): logging.debug("Signatures: file not found: %s" % path) raise FileNotFoundError("Missing function signature file. Resolving of function names disabled.") - self.signatures_file_lock = SimpleFileLock(self.signatures_file) # lock file to prevent concurrency issues + self.signatures_file_lock = self.signatures_file_lock or SimpleFileLock(self.signatures_file) # lock file to prevent concurrency issues self.signatures_file_lock.aquire() # try to aquire it within the next 10s with open(path, 'r') as f: @@ -110,7 +110,7 @@ class SignatureDb(object): # normalize it to {sighash:list(signatures,...)} for sighash, funcsig in sigs.items(): if isinstance(funcsig, list): - self.signatures = sigs # keep original todo: tintinweb - super hacky. make sure signatures.json is initially in correct format fixme + self.signatures = sigs break # already normalized self.signatures.setdefault(sighash, []) self.signatures[sighash].append(funcsig) @@ -126,7 +126,7 @@ class SignatureDb(object): :return: self """ path = path or self.signatures_file - self.signatures_file_lock = SimpleFileLock(path) # lock file to prevent concurrency issues + self.signatures_file_lock = self.signatures_file_lock or SimpleFileLock(path) # lock file to prevent concurrency issues self.signatures_file_lock.aquire() # try to aquire it within the next 10s if sync and os.path.exists(path): From 9b5bd6de777e57d48af26f5a26d04f86e8690c93 Mon Sep 17 00:00:00 2001 From: Joran Honig Date: Sun, 8 Jul 2018 14:56:24 +0200 Subject: [PATCH 17/18] Use base 16 --- mythril/laser/ethereum/call.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mythril/laser/ethereum/call.py b/mythril/laser/ethereum/call.py index 63d09837..aee2f30c 100644 --- a/mythril/laser/ethereum/call.py +++ b/mythril/laser/ethereum/call.py @@ -36,7 +36,7 @@ def get_call_parameters(global_state, dynamic_loader, with_value=False): callee_account = None call_data, call_data_type = get_call_data(global_state, meminstart, meminsz, False) - if int(callee_address, 16) >= 5 or int(callee_address) == 0: + if int(callee_address, 16) >= 5 or int(callee_address, 16) == 0: call_data, call_data_type = get_call_data(global_state, meminstart, meminsz) callee_account = get_callee_account(global_state, callee_address, dynamic_loader) From deb98df7c5d4bd6467f06073ddb0f545f15154c6 Mon Sep 17 00:00:00 2001 From: Nikhil Parasaram Date: Sun, 8 Jul 2018 23:17:26 +0530 Subject: [PATCH 18/18] return a list for blockhash function --- mythril/laser/ethereum/instructions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mythril/laser/ethereum/instructions.py b/mythril/laser/ethereum/instructions.py index a7d9521f..949de064 100644 --- a/mythril/laser/ethereum/instructions.py +++ b/mythril/laser/ethereum/instructions.py @@ -529,7 +529,7 @@ class Instruction: logging.info("error accessing contract storage due to: " + str(e)) state.stack.append(BitVec("extcodesize_" + str(addr), 256)) return [global_state] - + if code is None: state.stack.append(0) else: @@ -555,7 +555,7 @@ class Instruction: state = global_state.mstate blocknumber = state.stack.pop() state.stack.append(BitVec("blockhash_block_" + str(blocknumber), 256)) - return global_state + return [global_state] @instruction def coinbase_(self, global_state):