diff --git a/mythril/disassembler/disassembly.py b/mythril/disassembler/disassembly.py index 787f297f..a20d4ffe 100644 --- a/mythril/disassembler/disassembly.py +++ b/mythril/disassembler/disassembly.py @@ -1,10 +1,9 @@ from mythril.ether import asm,util -import os -import json +from mythril.support.signatures import SignatureDb import logging -class Disassembly: +class Disassembly(object): def __init__(self, code): self.instruction_list = asm.disassemble(util.safe_decode(code)) @@ -13,21 +12,11 @@ class Disassembly: self.addr_to_func = {} self.bytecode = code + signatures = SignatureDb(enable_online_lookup=True) # control if you want to have online sighash lookups try: - mythril_dir = os.environ['MYTHRIL_DIR'] - except KeyError: - mythril_dir = os.path.join(os.path.expanduser('~'), ".mythril") - - # Load function signatures - - signatures_file = os.path.join(mythril_dir, 'signatures.json') - - if not os.path.exists(signatures_file): - logging.info("Missing function signature file. Resolving of function names disabled.") - signatures = {} - else: - with open(signatures_file) as f: - signatures = json.load(f) + signatures.open() # open from default locations + except FileNotFoundError: + logging.info("Missing function signature file. Resolving of function names from signature file disabled.") # Parse jump table & resolve function names @@ -36,7 +25,15 @@ class Disassembly: for i in jmptable_indices: func_hash = self.instruction_list[i]['argument'] try: - func_name = signatures[func_hash] + # tries local cache, file and optional online lookup + # may return more than one function signature. since we cannot probe for the correct one we'll use the first + func_names = signatures.get(func_hash) + if len(func_names) > 1: + # ambigious result + func_name = "**ambiguous** %s" % func_names[0] # return first hit but note that result was ambiguous + else: + # only one item + func_name = func_names[0] except KeyError: func_name = "_function_" + func_hash @@ -49,8 +46,8 @@ class Disassembly: except: continue - + signatures.write() # store resolved signatures (potentially resolved online) def get_easm(self): - + # todo: tintinweb - print funcsig resolved data from self.addr_to_func? return asm.instruction_list_to_easm(self.instruction_list) diff --git a/mythril/laser/ethereum/call.py b/mythril/laser/ethereum/call.py index 483894ce..aee2f30c 100644 --- a/mythril/laser/ethereum/call.py +++ b/mythril/laser/ethereum/call.py @@ -36,7 +36,7 @@ def get_call_parameters(global_state, dynamic_loader, with_value=False): callee_account = None call_data, call_data_type = get_call_data(global_state, meminstart, meminsz, False) - if int(callee_address, 16) >= 5: + if int(callee_address, 16) >= 5 or int(callee_address, 16) == 0: call_data, call_data_type = get_call_data(global_state, meminstart, meminsz) callee_account = get_callee_account(global_state, callee_address, dynamic_loader) diff --git a/mythril/laser/ethereum/instructions.py b/mythril/laser/ethereum/instructions.py index 470bc122..bc972044 100644 --- a/mythril/laser/ethereum/instructions.py +++ b/mythril/laser/ethereum/instructions.py @@ -921,7 +921,7 @@ class Instruction: value, environment.origin, calldata_type=call_data_type) - new_global_state = GlobalState(global_state.accounts, callee_environment, MachineState(gas)) + new_global_state = GlobalState(global_state.accounts, callee_environment, global_state.node, MachineState(gas)) new_global_state.mstate.depth = global_state.mstate.depth + 1 new_global_state.mstate.constraints = copy(global_state.mstate.constraints) return [global_state] @@ -948,7 +948,7 @@ class Instruction: environment.caller = environment.address environment.calldata = call_data - new_global_state = GlobalState(global_state.accounts, environment, MachineState(gas)) + new_global_state = GlobalState(global_state.accounts, environment, global_state.node, MachineState(gas)) new_global_state.mstate.depth = global_state.mstate.depth + 1 new_global_state.mstate.constraints = copy(global_state.mstate.constraints) @@ -976,7 +976,7 @@ class Instruction: environment.code = callee_account.code environment.calldata = call_data - new_global_state = GlobalState(global_state.accounts, environment, MachineState(gas)) + new_global_state = GlobalState(global_state.accounts, environment, global_state.node, MachineState(gas)) new_global_state.mstate.depth = global_state.mstate.depth + 1 new_global_state.mstate.constraints = copy(global_state.mstate.constraints) diff --git a/mythril/mythril.py b/mythril/mythril.py index a72a82d7..0a85366b 100644 --- a/mythril/mythril.py +++ b/mythril/mythril.py @@ -78,8 +78,6 @@ class Mythril(object): mythril.get_state_variable_from_storage(args) """ - - def __init__(self, solv=None, solc_args=None, dynld=False): @@ -88,7 +86,17 @@ class Mythril(object): self.dynld = dynld self.mythril_dir = self._init_mythril_dir() - self.signatures_file, self.sigs = self._init_signatures() + + self.sigs = signatures.SignatureDb() + try: + self.sigs.open() # tries mythril_dir/signatures.json by default (provide path= arg to make this configurable) + except FileNotFoundError as fnfe: + logging.info( + "No signature database found. Creating database if sigs are loaded in: " + self.sigs.signatures_file + "\n" + + "Consider replacing it with the pre-initialized database at https://raw.githubusercontent.com/ConsenSys/mythril/master/signatures.json") + except json.JSONDecodeError as jde: + raise CriticalError("Invalid JSON in signatures file " + self.sigs.signatures_file + "\n" + str(jde)) + self.solc_binary = self._init_solc_binary(solv) self.leveldb_dir = self._init_config() @@ -110,33 +118,6 @@ class Mythril(object): os.mkdir(mythril_dir) return mythril_dir - def _init_signatures(self): - - # If no function signature file exists, create it. Function signatures from Solidity source code are added automatically. - - signatures_file = os.path.join(self.mythril_dir, 'signatures.json') - - sigs = {} - if not os.path.exists(signatures_file): - logging.info("No signature database found. Creating empty database: " + signatures_file + "\n" + - "Consider replacing it with the pre-initialized database at https://raw.githubusercontent.com/ConsenSys/mythril/master/signatures.json") - with open(signatures_file, 'a') as f: - json.dump({}, f) - - with open(signatures_file) as f: - try: - sigs = json.load(f) - except json.JSONDecodeError as e: - raise CriticalError("Invalid JSON in signatures file " + signatures_file + "\n" + str(e)) - return signatures_file, sigs - - def _update_signatures(self, jsonsigs): - # Save updated function signatures - with open(self.signatures_file, 'w') as f: - json.dump(jsonsigs, f) - - self.sigs = jsonsigs - def _init_config(self): # If no config file exists, create it. Default LevelDB path is specified based on OS @@ -300,27 +281,32 @@ class Mythril(object): file = os.path.expanduser(file) try: - signatures.add_signatures_from_file(file, self.sigs) - self._update_signatures(self.sigs) + # import signatures from solidity source + with open(file, encoding="utf-8") as f: + self.sigs.import_from_solidity_source(f.read()) + contract = SolidityContract(file, contract_name, solc_args=self.solc_args) logging.info("Analyzing contract %s:%s" % (file, contract.name)) except FileNotFoundError: - raise CriticalError("Input file not found: " + file) + raise CriticalError("Input file not found: " + file) except CompilerError as e: - raise CriticalError(e) + raise CriticalError(e) except NoContractFoundError: logging.info("The file " + file + " does not contain a compilable contract.") else: self.contracts.append(contract) contracts.append(contract) + # Save updated function signatures + self.sigs.write() # dump signatures to disk (previously opened file or default location) + return address, contracts def dump_statespace(self, contract, address=None, max_depth=12): sym = SymExecWrapper(contract, address, - dynloader=DynLoader(self.eth) if self.dynld else None, - max_depth=max_depth) + dynloader=DynLoader(self.eth) if self.dynld else None, + max_depth=max_depth) return get_serializable_statespace(sym) diff --git a/mythril/support/signatures.py b/mythril/support/signatures.py index 0431dd21..e1582b68 100644 --- a/mythril/support/signatures.py +++ b/mythril/support/signatures.py @@ -1,44 +1,244 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*- +"""mythril.py: Function Signature Database +""" import re +import os +import json +import time +import pathlib +import logging from ethereum import utils - -def add_signatures_from_file(file, sigs={}): - - funcs = [] - - with open(file, encoding="utf-8") as f: - - code = f.read() - - funcs = re.findall(r'function[\s]+(\w+\([^\)]*\))', code, re.DOTALL) - - for f in funcs: - - f = re.sub(r'[\n]', '', f) - - m = re.search(r'^([A-Za-z0-9_]+)', f) - - if (m): - - signature = m.group(1) - - m = re.search(r'\((.*)\)', f) - - _args = m.group(1).split(",") - - types = [] - - for arg in _args: - - _type = arg.lstrip().split(" ")[0] - if _type == "uint": - _type = "uint256" - - types.append(_type) - - typelist = ",".join(types) - signature += "(" + typelist + ")" - - signature = re.sub(r'\s', '', signature) - - sigs["0x" + utils.sha3(signature)[:4].hex()] = signature +# todo: tintinweb - make this a normal requirement? (deps: eth-abi and requests, both already required by mythril) +try: + # load if available but do not fail + import ethereum_input_decoder + from ethereum_input_decoder.decoder import FourByteDirectoryOnlineLookupError +except ImportError: + # fake it :) + ethereum_input_decoder = None + FourByteDirectoryOnlineLookupError = Exception + + +class SimpleFileLock(object): + # todo: replace with something more reliable. this is a quick shot on concurrency and might not work in all cases + + def __init__(self, path): + self.path = path + self.lockfile = pathlib.Path("%s.lck" % path) + self.locked = False + + def aquire(self, timeout=5): + if self.locked: + raise Exception("SimpleFileLock: lock already aquired") + + t_end = time.time()+timeout + while time.time() < t_end: + # try to aquire lock + try: + self.lockfile.touch(mode=0o0000, exist_ok=False) # touch the lockfile + # lockfile does not exist. we have a lock now + self.locked = True + return + except FileExistsError as fee: + # check if lockfile date exceeds age and cleanup lock + if time.time() > self.lockfile.stat().st_mtime + 60 * 5: + self.release(force=True) # cleanup old lockfile > 5mins + + time.sleep(0.5) # busywait is evil + continue + + raise Exception("SimpleFileLock: timeout hit. failed to aquire lock: %s"% (time.time()-self.lockfile.stat().st_mtime)) + + def release(self, force=False): + if not force and not self.locked: + raise Exception("SimpleFileLock: aquire lock first") + + try: + self.lockfile.unlink() # might throw if we force unlock and the file gets removed in the meantime. TOCTOU + except FileNotFoundError as fnfe: + logging.warning("SimpleFileLock: release(force=%s) on unavailable file. race? %r" % (force, fnfe)) + + self.locked = False + + + +class SignatureDb(object): + + def __init__(self, enable_online_lookup=True): + """ + Constr + :param enable_online_lookup: enable onlien signature hash lookup + """ + self.signatures = {} # signatures in-mem cache + self.signatures_file = None + self.signatures_file_lock = None + self.enable_online_lookup = enable_online_lookup # enable online funcsig resolving + self.online_lookup_miss = set() # temporarily track misses from onlinedb to avoid requesting the same non-existent sighash multiple times + self.online_directory_unavailable_until = 0 # flag the online directory as unavailable for some time + + def open(self, path=None): + """ + Open a function signature db from json file + + :param path: specific path to signatures.json; default mythril location if not specified + :return: self + """ + if not path: + # try default locations + try: + mythril_dir = os.environ['MYTHRIL_DIR'] + except KeyError: + mythril_dir = os.path.join(os.path.expanduser('~'), ".mythril") + path = os.path.join(mythril_dir, 'signatures.json') + + self.signatures_file = path # store early to allow error handling to access the place we tried to load the file + + if not os.path.exists(path): + logging.debug("Signatures: file not found: %s" % path) + raise FileNotFoundError("Missing function signature file. Resolving of function names disabled.") + + self.signatures_file_lock = self.signatures_file_lock or SimpleFileLock(self.signatures_file) # lock file to prevent concurrency issues + self.signatures_file_lock.aquire() # try to aquire it within the next 10s + + with open(path, 'r') as f: + sigs = json.load(f) + + self.signatures_file_lock.release() # release lock + + # normalize it to {sighash:list(signatures,...)} + for sighash, funcsig in sigs.items(): + if isinstance(funcsig, list): + self.signatures = sigs + break # already normalized + self.signatures.setdefault(sighash, []) + self.signatures[sighash].append(funcsig) + + return self + + def write(self, path=None, sync=True): + """ + Write signatures database as json to file + + :param path: specify path otherwise update the file that was loaded with open() + :param sync: lock signature file, load contents and merge it into memcached sighash db, then save it + :return: self + """ + path = path or self.signatures_file + self.signatures_file_lock = self.signatures_file_lock or SimpleFileLock(path) # lock file to prevent concurrency issues + self.signatures_file_lock.aquire() # try to aquire it within the next 10s + + if sync and os.path.exists(path): + # reload and save if file exists + with open(path, 'r') as f: + sigs = json.load(f) + + sigs.update(self.signatures) # reload file and merge cached sigs into what we load from file + self.signatures = sigs + + with open(path, 'w') as f: + json.dump(self.signatures, f) + + self.signatures_file_lock.release() + return self + + def get(self, sighash, timeout=2): + """ + get a function signature for a sighash + 1) try local cache + 2) try online lookup (if enabled; if not flagged as unavailable) + :param sighash: function signature hash as hexstr + :param timeout: online lookup timeout + :return: list of matching function signatures + """ + if not sighash.startswith("0x"): + sighash = "0x%s" % sighash # normalize sighash format + + if self.enable_online_lookup and not self.signatures.get(sighash) and sighash not in self.online_lookup_miss and time.time() > self.online_directory_unavailable_until: + # online lookup enabled, and signature not in cache, sighash was not a miss earlier, and online directory not down + logging.debug("Signatures: performing online lookup for sighash %r" % sighash) + try: + funcsigs = SignatureDb.lookup_online(sighash, timeout=timeout) # might return multiple sigs + if funcsigs: + # only store if we get at least one result + self.signatures[sighash] = funcsigs + else: + # miss + self.online_lookup_miss.add(sighash) + except FourByteDirectoryOnlineLookupError as fbdole: + self.online_directory_unavailable_until = time.time() + 2 * 60 # wait at least 2 mins to try again + logging.warning("online function signature lookup not available. will not try to lookup hash for the next 2 minutes. exception: %r" % fbdole) + return self.signatures[sighash] # raise keyerror + + def __getitem__(self, item): + """ + Provide dict interface Signatures()[sighash] + :param item: sighash + :return: list of matching signatures + """ + return self.get(sighash=item) + + def import_from_solidity_source(self, code): + """ + Import Function Signatures from solidity source files + :param code: solidity source code + :return: self + """ + self.signatures.update(SignatureDb.parse_function_signatures_from_solidity_source(code)) + return self + + @staticmethod + def lookup_online(sighash, timeout=None, proxies=None): + """ + Lookup function signatures from 4byte.directory. + //tintinweb: the smart-contract-sanctuary project dumps contracts from etherscan.io and feeds them into + 4bytes.directory. + https://github.com/tintinweb/smart-contract-sanctuary + + :param sighash: function signature hash as hexstr + :param timeout: optional timeout for online lookup + :param proxies: optional proxy servers for online lookup + :return: a list of matching function signatures for this hash + """ + if not ethereum_input_decoder: + return None + return list(ethereum_input_decoder.decoder.FourByteDirectory.lookup_signatures(sighash, + timeout=timeout, + proxies=proxies)) + + @staticmethod + def parse_function_signatures_from_solidity_source(code): + """ + Parse solidity sourcecode for function signatures and return the signature hash and function signature + :param code: solidity source code + :return: dictionary {sighash: function_signature} + """ + sigs = {} + + funcs = re.findall(r'function[\s]+(.*?\))', code, re.DOTALL) + for f in funcs: + f = re.sub(r'[\n]', '', f) + m = re.search(r'^([A-Za-z0-9_]+)', f) + + if m: + signature = m.group(1) + m = re.search(r'\((.*)\)', f) + _args = m.group(1).split(",") + types = [] + + for arg in _args: + _type = arg.lstrip().split(" ")[0] + + if _type == "uint": + _type = "uint256" + + types.append(_type) + + typelist = ",".join(types) + signature += "(" + typelist + ")" + signature = re.sub(r'\s', '', signature) + sigs["0x" + utils.sha3(signature)[:4].hex()] = signature + + logging.debug("Signatures: parse soldiity found %d signatures" % len(sigs)) + return sigs