diff --git a/.circleci/config.yml b/.circleci/config.yml index 351fc89b..fc14bfc8 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -33,6 +33,12 @@ jobs: - .tox/py* - /root/.cache/pip/wheels/ + - run: + name: Black style check + command: | + pip3 install --user black + python3 -m black --check /home/mythril/ + - run: background: true name: Launch of background geth instance diff --git a/README.md b/README.md index c5fbd20f..4b975def 100644 --- a/README.md +++ b/README.md @@ -1,17 +1,21 @@ -# Mythril OSS [![Tweet](https://img.shields.io/twitter/url/http/shields.io.svg?style=social)](https://twitter.com/intent/tweet?text=Mythril%20-%20Security%20Analyzer%20for%20Ethereum%20Smart%20Contracts&url=https://www.github.com/ConsenSys/mythril) +# Mythril Classic + +
+ +
+ [![Discord](https://img.shields.io/discord/481002907366588416.svg)](https://discord.gg/E3YrVtG) [![PyPI](https://badge.fury.io/py/mythril.svg)](https://pypi.python.org/pypi/mythril) -![Master Build Status](https://img.shields.io/circleci/project/github/ConsenSys/mythril/master.svg) -[![Waffle.io - Columns and their card count](https://badge.waffle.io/ConsenSys/mythril.svg?columns=In%20Progress)](https://waffle.io/ConsenSys/mythril) +![Master Build Status](https://img.shields.io/circleci/project/github/ConsenSys/mythril-classic/master.svg) +[![Waffle.io - Columns and their card count](https://badge.waffle.io/ConsenSys/mythril-classic.svg?columns=In%20Progress)](https://waffle.io/ConsenSys/mythril-classic/) [![Sonarcloud - Maintainability](https://sonarcloud.io/api/project_badges/measure?project=mythril&metric=sqale_rating)](https://sonarcloud.io/dashboard?id=mythril) [![PyPI Statistics](https://pypistats.com/badge/mythril.svg)](https://pypistats.com/package/mythril) - -Mythril OSS is the classic security analysis tool for Ethereum smart contracts. It uses concolic analysis, taint analysis and control flow checking to detect a variety of security vulnerabilities. +Mythril Classic is an open-source security analysis tool for Ethereum smart contracts. It uses concolic analysis, taint analysis and control flow checking to detect a variety of security vulnerabilities. -Whether you want to contribute, need support, or want to learn what we have cooking for the future, our [Discord server](https://discord.gg/E3YrVtG) will serve your needs! +Whether you want to contribute, need support, or want to learn what we have cooking for the future, our [Discord server](https://discord.gg/E3YrVtG) will serve your needs. -Oh and by the way, we're building an easy-to-use SaaS solution and tools ecosystem for Ethereum developers called [Mythril Platform](https://mythril.ai). You should definitely check that out as well. +Oh and by the way, we're also building an easy-to-use security analysis platform (a.k.a. "the INFURA for smart contract security") that anybody can use to create purpose-built security tools. It's called [Mythril Platform](https://mythril.ai) and you should definitely [check it out](https://media.consensys.net/mythril-platform-api-is-upping-the-smart-contract-security-game-eee1d2642488). ## Installation and setup @@ -31,21 +35,10 @@ See the [Wiki](https://github.com/ConsenSys/mythril/wiki/Installation-and-Setup) ## Usage -Instructions for using the 'myth' tool are found on the [Wiki](https://github.com/ConsenSys/mythril/wiki). +Instructions for using Mythril Classic are found on the [Wiki](https://github.com/ConsenSys/mythril-classic/wiki). For support or general discussions please join the Mythril community on [Discord](https://discord.gg/E3YrVtG). ## Vulnerability Remediation Visit the [Smart Contract Vulnerability Classification Registry](https://smartcontractsecurity.github.io/SWC-registry/) to find detailed information and remediation guidance for the vulnerabilities reported. - -## Presentations, papers and articles - -- [Analyzing Ethereum Smart Contracts for Vulnerabilities](https://hackernoon.com/scanning-ethereum-smart-contracts-for-vulnerabilities-b5caefd995df) -- [What Caused the Parity SUICIDE Vulnerability & How to Detect Similar Bugs](https://hackernoon.com/what-caused-the-latest-100-million-ethereum-bug-and-a-detection-tool-for-similar-bugs-7b80f8ab7279) -- [Detecting Integer Overflows in Ethereum Smart Contracts](https://media.consensys.net/detecting-batchoverflow-and-similar-flaws-in-ethereum-smart-contracts-93cf5a5aaac8) -- [How Formal Verification Can Ensure Flawless Smart Contracts](https://media.consensys.net/how-formal-verification-can-ensure-flawless-smart-contracts-cbda8ad99bd1) -- [Smashing Smart Contracts for Fun and Real Profit](https://hackernoon.com/hitb2018ams-smashing-smart-contracts-for-fun-and-real-profit-720f5e3ac777) -- [HITBSecConf 2018 - Presentation video](https://www.youtube.com/watch?v=iqf6epACgds) -- [EDCon Toronto 2018 - Mythril: Find bugs and verify security properties in your contracts](https://www.youtube.com/watch?v=NJ9StJThxZY&feature=youtu.be&t=3h3m18s) - diff --git a/mythril/analysis/modules/dependence_on_predictable_vars.py b/mythril/analysis/modules/dependence_on_predictable_vars.py index f3fcf41a..12172982 100644 --- a/mythril/analysis/modules/dependence_on_predictable_vars.py +++ b/mythril/analysis/modules/dependence_on_predictable_vars.py @@ -156,12 +156,11 @@ def solve(call): try: model = solver.get_model(call.node.constraints) logging.debug("[DEPENDENCE_ON_PREDICTABLE_VARS] MODEL: " + str(model)) + pretty_model = solver.pretty_print_model(model) - for d in model.decls(): - logging.debug( - "[DEPENDENCE_ON_PREDICTABLE_VARS] main model: %s = 0x%x" - % (d.name(), model[d].as_long()) - ) + logging.debug( + "[DEPENDENCE_ON_PREDICTABLE_VARS] main model: \n%s" % pretty_model + ) return True except UnsatError: diff --git a/mythril/analysis/modules/ether_send.py b/mythril/analysis/modules/ether_send.py index fa967f2f..ae723333 100644 --- a/mythril/analysis/modules/ether_send.py +++ b/mythril/analysis/modules/ether_send.py @@ -1,10 +1,8 @@ -from z3 import * from mythril.analysis.ops import * from mythril.analysis import solver from mythril.analysis.report import Issue from mythril.analysis.swc_data import UNPROTECTED_ETHER_WITHDRAWAL from mythril.exceptions import UnsatError -import re import logging @@ -17,147 +15,63 @@ to that index). """ -def execute(statespace): +def execute(state_space): logging.debug("Executing module: ETHER_SEND") issues = [] - for call in statespace.calls: + for k in state_space.nodes: + node = state_space.nodes[k] - state = call.state - address = state.get_current_instruction()["address"] + for state in node.states: + issues += _analyze_state(state, node) - if "callvalue" in str(call.value): - logging.debug("[ETHER_SEND] Skipping refund function") - continue - - # We're only interested in calls that send Ether - - if call.value.type == VarType.CONCRETE and call.value.val == 0: - continue - - interesting = False - - description = "In the function `" + call.node.function_name + "` " - - if re.search(r"caller", str(call.to)): - description += "a non-zero amount of Ether is sent to msg.sender.\n" - interesting = True - - elif re.search(r"calldata", str(call.to)): - description += "a non-zero amount of Ether is sent to an address taken from function arguments.\n" - interesting = True - - else: - m = re.search(r"storage_([a-z0-9_&^]+)", str(call.to)) - - if m: - idx = m.group(1) - - description += ( - "a non-zero amount of Ether is sent to an address taken from storage slot " - + str(idx) - + ".\n" - ) - - func = statespace.find_storage_write( - state.environment.active_account.address, idx - ) - - if func: - description += ( - "There is a check on storage index " - + str(idx) - + ". This storage slot can be written to by calling the function `" - + func - + "`.\n" - ) - interesting = True - else: - logging.debug("[ETHER_SEND] No storage writes to index " + str(idx)) - - if interesting: - - node = call.node - - can_solve = True - constrained = False - - index = 0 - - while can_solve and index < len(node.constraints): - - constraint = node.constraints[index] - index += 1 - logging.debug("[ETHER_SEND] Constraint: " + str(constraint)) - - m = re.search(r"storage_([a-z0-9_&^]+)", str(constraint)) - - if m: - - constrained = True - idx = m.group(1) - - func = statespace.find_storage_write( - state.environment.active_account.address, idx - ) - - if func: - description += ( - "\nThere is a check on storage index " - + str(idx) - + ". This storage slot can be written to by calling the function `" - + func - + "`." - ) - else: - logging.debug( - "[ETHER_SEND] No storage writes to index " + str(idx) - ) - can_solve = False - break + return issues - # CALLER may also be constrained to hardcoded address. I.e. 'caller' and some integer - elif re.search(r"caller", str(constraint)) and re.search( - r"[0-9]{20}", str(constraint) - ): - constrained = True - can_solve = False - break - - if not constrained: - description += ( - "It seems that this function can be called without restrictions." - ) - - if can_solve: - - try: - model = solver.get_model(node.constraints) - - for d in model.decls(): - logging.debug( - "[ETHER_SEND] main model: %s = 0x%x" - % (d.name(), model[d].as_long()) - ) - - debug = "SOLVER OUTPUT:\n" + solver.pretty_print_model(model) - - issue = Issue( - contract=call.node.contract_name, - function=call.node.function_name, - address=address, - title="Ether send", - _type="Warning", - swc_id=UNPROTECTED_ETHER_WITHDRAWAL, - description=description, - debug=debug, - ) - issues.append(issue) - - except UnsatError: - logging.debug("[ETHER_SEND] no model found") +def _analyze_state(state, node): + issues = [] + instruction = state.get_current_instruction() + + if instruction["opcode"] != "CALL": + return [] + + call_value = state.mstate.stack[-3] + target = state.mstate.stack[-2] + + not_creator_constraints = [] + if len(state.world_state.transaction_sequence) > 1: + creator = state.world_state.transaction_sequence[0].caller + for transaction in state.world_state.transaction_sequence[1:]: + not_creator_constraints.append( + Not(Extract(159, 0, transaction.caller) == Extract(159, 0, creator)) + ) + not_creator_constraints.append( + Not(Extract(159, 0, transaction.caller) == 0) + ) + + try: + model = solver.get_model( + node.constraints + not_creator_constraints + [call_value > 0] + ) + + debug = "SOLVER OUTPUT:\n" + solver.pretty_print_model(model) + + issue = Issue( + contract=node.contract_name, + function=node.function_name, + address=instruction["address"], + swc_id=UNPROTECTED_ETHER_WITHDRAWAL, + title="Ether send", + _type="Warning", + description="It seems that an attacker is able to execute an call instruction," + " this can mean that the attacker is able to extract funds " + "out of the contract.".format(target), + debug=debug, + ) + issues.append(issue) + except UnsatError: + logging.debug("[UNCHECKED_SUICIDE] no model found") return issues diff --git a/mythril/analysis/modules/exceptions.py b/mythril/analysis/modules/exceptions.py index a9e63205..0768fac4 100644 --- a/mythril/analysis/modules/exceptions.py +++ b/mythril/analysis/modules/exceptions.py @@ -36,8 +36,7 @@ def execute(statespace): "out-of-bounds array access, or assert violations. " ) description += ( - "This is acceptable in most situations. " - "Note however that `assert()` should only be used to check invariants. " + "Note that explicit `assert()` should only be used to check invariants. " "Use `require()` for regular input checking. " ) diff --git a/mythril/analysis/modules/integer.py b/mythril/analysis/modules/integer.py index 091b8cfb..b5c952f8 100644 --- a/mythril/analysis/modules/integer.py +++ b/mythril/analysis/modules/integer.py @@ -98,12 +98,7 @@ def _check_integer_overflow(statespace, state, node): _type="Warning", ) - issue.description = ( - "A possible integer overflow exists in the function `{}`.\n" - "The addition or multiplication may result in a value higher than the maximum representable integer.".format( - node.function_name - ) - ) + issue.description = "The arithmetic operation can result in integer overflow.\n" issue.debug = solver.pretty_print_model(model) issues.append(issue) @@ -211,10 +206,7 @@ def _check_integer_underflow(statespace, state, node): ) issue.description = ( - "A possible integer underflow exists in the function `" - + node.function_name - + "`.\n" - "The subtraction may result in a value < 0." + "The subtraction can result in an integer underflow.\n" ) issue.debug = solver.pretty_print_model(model) diff --git a/mythril/analysis/modules/multiple_sends.py b/mythril/analysis/modules/multiple_sends.py index 459405dd..db7cca39 100644 --- a/mythril/analysis/modules/multiple_sends.py +++ b/mythril/analysis/modules/multiple_sends.py @@ -32,7 +32,8 @@ def execute(statespace): ) issue.description = ( - "Multiple sends exist in one transaction. Try to isolate each external call into its own transaction," + "Multiple sends are executed in a single transaction. " + "Try to isolate each external call into its own transaction," " as external calls can fail accidentally or deliberately.\nConsecutive calls: \n" ) @@ -47,9 +48,7 @@ def execute(statespace): def _explore_nodes(call, statespace): children = _child_nodes(statespace, call.node) - sending_children = list( - filter(lambda call: call.node in children, statespace.calls) - ) + sending_children = list(filter(lambda c: c.node in children, statespace.calls)) return sending_children diff --git a/mythril/analysis/modules/suicide.py b/mythril/analysis/modules/suicide.py index e2955486..2a191de2 100644 --- a/mythril/analysis/modules/suicide.py +++ b/mythril/analysis/modules/suicide.py @@ -39,9 +39,8 @@ def _analyze_state(state, node): to = state.mstate.stack[-1] logging.debug("[UNCHECKED_SUICIDE] suicide in function " + node.function_name) - description = ( - "The function `" + node.function_name + "` executes the SUICIDE instruction. " - ) + + description = "A reachable SUICIDE instruction was detected. " if "caller" in str(to): description += "The remaining Ether is sent to the caller's address.\n" diff --git a/mythril/analysis/modules/transaction_order_dependence.py b/mythril/analysis/modules/transaction_order_dependence.py index b8d34cd2..4528459c 100644 --- a/mythril/analysis/modules/transaction_order_dependence.py +++ b/mythril/analysis/modules/transaction_order_dependence.py @@ -42,10 +42,8 @@ def execute(statespace): ) issue.description = ( - "A possible transaction order dependence vulnerability exists in function {}. The value or " - "direction of the call statement is determined from a tainted storage location".format( - node.function_name - ) + "Possible transaction order dependence vulnerability: The value or " + "direction of the call statement is determined from a tainted storage location" ) issues.append(issue) diff --git a/mythril/analysis/solver.py b/mythril/analysis/solver.py index afa4ef5b..8e436b71 100644 --- a/mythril/analysis/solver.py +++ b/mythril/analysis/solver.py @@ -1,4 +1,4 @@ -from z3 import Solver, simplify, sat, unknown +from z3 import Solver, simplify, sat, unknown, FuncInterp from mythril.exceptions import UnsatError import logging @@ -22,6 +22,10 @@ def pretty_print_model(model): ret = "" for d in model.decls(): + if type(model[d]) == FuncInterp: + condition = model[d].as_list() + ret += "%s: %s\n" % (d.name(), condition) + continue try: condition = "0x%x" % model[d].as_long() diff --git a/mythril/analysis/symbolic.py b/mythril/analysis/symbolic.py index 871f0d61..539fbb24 100644 --- a/mythril/analysis/symbolic.py +++ b/mythril/analysis/symbolic.py @@ -7,6 +7,8 @@ from .ops import get_variable, SStore, Call, VarType from mythril.laser.ethereum.strategy.basic import ( DepthFirstSearchStrategy, BreadthFirstSearchStrategy, + ReturnRandomNaivelyStrategy, + ReturnWeightedRandomStrategy, ) @@ -28,11 +30,14 @@ class SymExecWrapper: max_transaction_count=3, ): - s_strategy = None if strategy == "dfs": s_strategy = DepthFirstSearchStrategy elif strategy == "bfs": s_strategy = BreadthFirstSearchStrategy + elif strategy == "naive-random": + s_strategy = ReturnRandomNaivelyStrategy + elif strategy == "weighted-random": + s_strategy = ReturnWeightedRandomStrategy else: raise ValueError("Invalid strategy argument supplied") diff --git a/mythril/analysis/traceexplore.py b/mythril/analysis/traceexplore.py index d3e87cab..6fd476aa 100644 --- a/mythril/analysis/traceexplore.py +++ b/mythril/analysis/traceexplore.py @@ -47,7 +47,6 @@ colors = [ def get_serializable_statespace(statespace): - nodes = [] edges = [] @@ -77,10 +76,10 @@ def get_serializable_statespace(statespace): color = color_map[node.get_cfg_dict()["contract_name"]] - def get_state_accounts(state): + def get_state_accounts(node_state): state_accounts = [] - for key in state.accounts: - account = state.accounts[key].as_dict + for key in node_state.accounts: + account = node_state.accounts[key].as_dict account.pop("code", None) account["balance"] = str(account["balance"]) diff --git a/mythril/disassembler/disassembly.py b/mythril/disassembler/disassembly.py index 6d3ef817..d86d32e5 100644 --- a/mythril/disassembler/disassembly.py +++ b/mythril/disassembler/disassembly.py @@ -4,16 +4,27 @@ import logging class Disassembly(object): - def __init__(self, code, enable_online_lookup=True): + """ + Disassembly class + + Stores bytecode, and its disassembly. + Additionally it will gather the following information on the existing functions in the disassembled code: + - function hashes + - function name to entry point mapping + - function entry point to function name mapping + """ + + def __init__(self, code: str, enable_online_lookup: bool = False): + self.bytecode = code self.instruction_list = asm.disassemble(util.safe_decode(code)) + self.func_hashes = [] - self.func_to_addr = {} - self.addr_to_func = {} - self.bytecode = code + self.function_name_to_address = {} + self.address_to_function_name = {} signatures = SignatureDb( enable_online_lookup=enable_online_lookup - ) # control if you want to have online sighash lookups + ) # control if you want to have online signature hash lookups try: signatures.open() # open from default locations except FileNotFoundError: @@ -21,46 +32,64 @@ class Disassembly(object): "Missing function signature file. Resolving of function names from signature file disabled." ) - # Parse jump table & resolve function names - # Need to take from PUSH1 to PUSH4 because solc seems to remove excess 0s at the beginning for optimizing - jmptable_indices = asm.find_opcode_sequence( + jump_table_indices = asm.find_opcode_sequence( [("PUSH1", "PUSH2", "PUSH3", "PUSH4"), ("EQ",)], self.instruction_list ) - for i in jmptable_indices: - func_hash = self.instruction_list[i]["argument"] - - # Append with missing 0s at the beginning - func_hash = "0x" + func_hash[2:].rjust(8, "0") - - self.func_hashes.append(func_hash) - try: - # tries local cache, file and optional online lookup - # may return more than one function signature. since we cannot probe for the correct one we'll use the first - func_names = signatures.get(func_hash) - if len(func_names) > 1: - # ambigious result - func_name = ( - "**ambiguous** %s" % func_names[0] - ) # return first hit but note that result was ambiguous - else: - # only one item - func_name = func_names[0] - except KeyError: - func_name = "_function_" + func_hash - - try: - offset = self.instruction_list[i + 2]["argument"] - jump_target = int(offset, 16) - - self.func_to_addr[func_name] = jump_target - self.addr_to_func[jump_target] = func_name - except: - continue + for index in jump_table_indices: + function_hash, jump_target, function_name = get_function_info( + index, self.instruction_list, signatures + ) + self.func_hashes.append(function_hash) + + if jump_target is not None and function_name is not None: + self.function_name_to_address[function_name] = jump_target + self.address_to_function_name[jump_target] = function_name signatures.write() # store resolved signatures (potentially resolved online) def get_easm(self): - # todo: tintinweb - print funcsig resolved data from self.addr_to_func? return asm.instruction_list_to_easm(self.instruction_list) + + +def get_function_info( + index: int, instruction_list: list, signature_database: SignatureDb +) -> (str, int, str): + """ + Finds the function information for a call table entry + Solidity uses the first 4 bytes of the calldata to indicate which function the message call should execute + The generated code that directs execution to the correct function looks like this: + - PUSH function_hash + - EQ + - PUSH entry_point + - JUMPI + + This function takes an index that points to the first instruction, and from that finds out the function hash, + function entry and the function name. + + :param index: Start of the entry pattern + :param instruction_list: Instruction list for the contract that is being analyzed + :param signature_database: Database used to map function hashes to their respective function names + :return: function hash, function entry point, function name + """ + + # Append with missing 0s at the beginning + function_hash = "0x" + instruction_list[index]["argument"][2:].rjust(8, "0") + + function_names = signature_database.get(function_hash) + if len(function_names) > 1: + # In this case there was an ambiguous result + function_name = "**ambiguous** {}".format(function_names[0]) + elif len(function_names) == 1: + function_name = function_names[0] + else: + function_name = "_function_" + function_hash + + try: + offset = instruction_list[index + 2]["argument"] + entry_point = int(offset, 16) + except (KeyError, IndexError): + return function_hash, None, None + + return function_hash, entry_point, function_name diff --git a/mythril/ether/ethcontract.py b/mythril/ether/ethcontract.py index b2a55106..8914c882 100644 --- a/mythril/ether/ethcontract.py +++ b/mythril/ether/ethcontract.py @@ -6,7 +6,7 @@ import re class ETHContract(persistent.Persistent): def __init__( - self, code, creation_code="", name="Unknown", enable_online_lookup=True + self, code, creation_code="", name="Unknown", enable_online_lookup=False ): # Workaround: We currently do not support compile-time linking. diff --git a/mythril/ether/evm.py b/mythril/ether/evm.py index 020dcd94..2dd84895 100644 --- a/mythril/ether/evm.py +++ b/mythril/ether/evm.py @@ -8,14 +8,12 @@ import re def trace(code, calldata=""): - log_handlers = [ "eth.vm.op", "eth.vm.op.stack", "eth.vm.op.memory", "eth.vm.op.storage", ] - output = StringIO() stream_handler = StreamHandler(output) @@ -25,58 +23,43 @@ def trace(code, calldata=""): log_vm_op.addHandler(stream_handler) addr = bytes.fromhex("0123456789ABCDEF0123456789ABCDEF01234567") - state = State() ext = messages.VMExt(state, transactions.Transaction(0, 0, 21000, addr, 0, addr)) - message = vm.Message(addr, addr, 0, 21000, calldata) - - res, gas, dat = vm.vm_execute(ext, message, util.safe_decode(code)) - + vm.vm_execute(ext, message, util.safe_decode(code)) stream_handler.flush() - ret = output.getvalue() - lines = ret.split("\n") - trace = [] - + state_trace = [] for line in lines: - m = re.search(r"pc=b\'(\d+)\'.*op=([A-Z0-9]+)", line) - if m: pc = m.group(1) op = m.group(2) - m = re.match(r".*stack=(\[.*?\])", line) if m: - stackitems = re.findall(r"b\'(\d+)\'", m.group(1)) - stack = "[" if len(stackitems): - for i in range(0, len(stackitems) - 1): stack += hex(int(stackitems[i])) + ", " - stack += hex(int(stackitems[-1])) stack += "]" - else: stack = "[]" if re.match(r"^PUSH.*", op): val = re.search(r"pushvalue=(\d+)", line).group(1) pushvalue = hex(int(val)) - trace.append( + state_trace.append( {"pc": pc, "op": op, "stack": stack, "pushvalue": pushvalue} ) else: - trace.append({"pc": pc, "op": op, "stack": stack}) + state_trace.append({"pc": pc, "op": op, "stack": stack}) - return trace + return state_trace diff --git a/mythril/ethereum/interface/leveldb/accountindexing.py b/mythril/ethereum/interface/leveldb/accountindexing.py index 21203740..6452a6b9 100644 --- a/mythril/ethereum/interface/leveldb/accountindexing.py +++ b/mythril/ethereum/interface/leveldb/accountindexing.py @@ -63,16 +63,15 @@ class AccountIndexer(object): def get_contract_by_hash(self, contract_hash): """ - get mapped address by its hash, if not found try indexing + get mapped contract_address by its hash, if not found try indexing """ - address = self.db.reader._get_address_by_hash(contract_hash) - if address is not None: - return address + contract_address = self.db.reader._get_address_by_hash(contract_hash) + if contract_address is not None: + return contract_address + else: raise AddressNotFoundError - return self.db.reader._get_address_by_hash(contract_hash) - def _process(self, startblock): """ Processesing method @@ -84,9 +83,9 @@ class AccountIndexer(object): addresses = [] for blockNum in range(startblock, startblock + BATCH_SIZE): - hash = self.db.reader._get_block_hash(blockNum) - if hash is not None: - receipts = self.db.reader._get_block_receipts(hash, blockNum) + block_hash = self.db.reader._get_block_hash(blockNum) + if block_hash is not None: + receipts = self.db.reader._get_block_receipts(block_hash, blockNum) for receipt in receipts: if receipt.contractAddress is not None and not all( diff --git a/mythril/ethereum/interface/leveldb/client.py b/mythril/ethereum/interface/leveldb/client.py index b3500baf..46e6bc77 100644 --- a/mythril/ethereum/interface/leveldb/client.py +++ b/mythril/ethereum/interface/leveldb/client.py @@ -84,55 +84,46 @@ class LevelDBReader(object): gets head block header """ if not self.head_block_header: - hash = self.db.get(head_header_key) - num = self._get_block_number(hash) - self.head_block_header = self._get_block_header(hash, num) + block_hash = self.db.get(head_header_key) + num = self._get_block_number(block_hash) + self.head_block_header = self._get_block_header(block_hash, num) # find header with valid state while ( not self.db.get(self.head_block_header.state_root) and self.head_block_header.prevhash is not None ): - hash = self.head_block_header.prevhash - num = self._get_block_number(hash) - self.head_block_header = self._get_block_header(hash, num) + block_hash = self.head_block_header.prevhash + num = self._get_block_number(block_hash) + self.head_block_header = self._get_block_header(block_hash, num) return self.head_block_header - def _get_block_number(self, hash): - """ - gets block number by hash - """ - number_key = block_hash_prefix + hash + def _get_block_number(self, block_hash): + """Get block number by its hash""" + number_key = block_hash_prefix + block_hash return self.db.get(number_key) - def _get_block_header(self, hash, num): - """ - get block header by block header hash & number - """ - header_key = header_prefix + num + hash + def _get_block_header(self, block_hash, num): + """Get block header by block header hash & number""" + header_key = header_prefix + num + block_hash + block_header_data = self.db.get(header_key) header = rlp.decode(block_header_data, sedes=BlockHeader) return header - def _get_address_by_hash(self, hash): - """ - get mapped address by its hash - """ - address_key = address_prefix + hash + def _get_address_by_hash(self, block_hash): + """Get mapped address by its hash""" + address_key = address_prefix + block_hash return self.db.get(address_key) def _get_last_indexed_number(self): - """ - latest indexed block number - """ + """Get latest indexed block number""" return self.db.get(address_mapping_head_key) - def _get_block_receipts(self, hash, num): - """ - get block transaction receipts by block header hash & number - """ + def _get_block_receipts(self, block_hash, num): + """Get block transaction receipts by block header hash & number""" number = _format_block_number(num) - receipts_key = block_receipts_prefix + number + hash + receipts_key = block_receipts_prefix + number + block_hash receipts_data = self.db.get(receipts_key) receipts = rlp.decode(receipts_data, sedes=CountableList(ReceiptForStorage)) return receipts @@ -224,12 +215,10 @@ class EthLevelDB(object): if not cnt % 1000: logging.info("Searched %d contracts" % cnt) - def contract_hash_to_address(self, hash): - """ - tries to find corresponding account address - """ + def contract_hash_to_address(self, contract_hash): + """Tries to find corresponding account address""" - address_hash = binascii.a2b_hex(utils.remove_0x_head(hash)) + address_hash = binascii.a2b_hex(utils.remove_0x_head(contract_hash)) indexer = AccountIndexer(self) return _encode_hex(indexer.get_contract_by_hash(address_hash)) @@ -238,9 +227,9 @@ class EthLevelDB(object): """ gets block header by block number """ - hash = self.reader._get_block_hash(number) + block_hash = self.reader._get_block_hash(number) block_number = _format_block_number(number) - return self.reader._get_block_header(hash, block_number) + return self.reader._get_block_header(block_hash, block_number) def eth_getBlockByNumber(self, number): """ diff --git a/mythril/ethereum/interface/leveldb/state.py b/mythril/ethereum/interface/leveldb/state.py index 678c36af..f756de24 100644 --- a/mythril/ethereum/interface/leveldb/state.py +++ b/mythril/ethereum/interface/leveldb/state.py @@ -58,9 +58,9 @@ class Account(rlp.Serializable): ("code_hash", hash32), ] - def __init__(self, nonce, balance, storage, code_hash, db, address): + def __init__(self, nonce, balance, storage, code_hash, db, addr): self.db = db - self.address = address + self.address = addr super(Account, self).__init__(nonce, balance, storage, code_hash) self.storage_cache = {} self.storage_trie = SecureTrie(Trie(self.db)) @@ -89,12 +89,12 @@ class Account(rlp.Serializable): return self.storage_cache[key] @classmethod - def blank_account(cls, db, address, initial_nonce=0): + def blank_account(cls, db, addr, initial_nonce=0): """ creates a blank account """ db.put(BLANK_HASH, b"") - o = cls(initial_nonce, 0, trie.BLANK_ROOT, BLANK_HASH, db, address) + o = cls(initial_nonce, 0, trie.BLANK_ROOT, BLANK_HASH, db, addr) o.existent_at_start = False return o @@ -117,22 +117,22 @@ class State: self.journal = [] self.cache = {} - def get_and_cache_account(self, address): - """ - gets and caches an account for an addres, creates blank if not found - """ - if address in self.cache: - return self.cache[address] - rlpdata = self.secure_trie.get(address) + def get_and_cache_account(self, addr): + """Gets and caches an account for an addres, creates blank if not found""" + + if addr in self.cache: + return self.cache[addr] + rlpdata = self.secure_trie.get(addr) if ( - rlpdata == trie.BLANK_NODE and len(address) == 32 + rlpdata == trie.BLANK_NODE and len(addr) == 32 ): # support for hashed addresses - rlpdata = self.trie.get(address) + rlpdata = self.trie.get(addr) + if rlpdata != trie.BLANK_NODE: - o = rlp.decode(rlpdata, Account, db=self.db, address=address) + o = rlp.decode(rlpdata, Account, db=self.db, address=addr) else: - o = Account.blank_account(self.db, address, 0) - self.cache[address] = o + o = Account.blank_account(self.db, addr, 0) + self.cache[addr] = o o._mutable = True o._cached_rlp = None return o diff --git a/mythril/interfaces/cli.py b/mythril/interfaces/cli.py index 08a8d4e7..443812bb 100644 --- a/mythril/interfaces/cli.py +++ b/mythril/interfaces/cli.py @@ -15,10 +15,11 @@ import argparse from mythril.exceptions import CriticalError, AddressNotFoundError from mythril.mythril import Mythril from mythril.version import VERSION +import mythril.support.signatures as sigs -def exit_with_error(format, message): - if format == "text" or format == "markdown": +def exit_with_error(format_, message): + if format_ == "text" or format_ == "markdown": print(message) else: result = {"success": False, "error": str(message), "issues": []} @@ -147,18 +148,19 @@ def main(): default=22, help="Maximum recursion depth for symbolic execution", ) + + options.add_argument( + "--strategy", + choices=["dfs", "bfs", "naive-random", "weighted-random"], + default="dfs", + help="Symbolic execution strategy", + ) options.add_argument( "--max-transaction-count", type=int, default=3, help="Maximum number of transactions issued by laser", ) - options.add_argument( - "--strategy", - choices=["dfs", "bfs"], - default="dfs", - help="Symbolic execution strategy", - ) options.add_argument( "--execution-timeout", type=int, @@ -179,6 +181,12 @@ def main(): "--enable-physics", action="store_true", help="enable graph physics simulation" ) options.add_argument("-v", type=int, help="log level (0-2)", metavar="LOG_LEVEL") + options.add_argument( + "-q", + "--query-signature", + action="store_true", + help="Lookup function signatures through www.4byte.directory", + ) rpc = parser.add_argument_group("RPC options") rpc.add_argument( @@ -231,6 +239,13 @@ def main(): args.outform, "Invalid -v value, you can find valid values in usage" ) + if args.query_signature: + if sigs.ethereum_input_decoder == None: + exit_with_error( + args.outform, + "The --query-signature function requires the python package ethereum-input-decoder", + ) + # -- commands -- if args.hash: print(Mythril.hash_for_function_signature(args.hash)) @@ -241,7 +256,12 @@ def main(): # infura = None, rpc = None, rpctls = None # solc_args = None, dynld = None, max_recursion_depth = 12): - mythril = Mythril(solv=args.solv, dynld=args.dynld, solc_args=args.solc_args) + mythril = Mythril( + solv=args.solv, + dynld=args.dynld, + solc_args=args.solc_args, + enable_online_lookup=args.query_signature, + ) if args.dynld and not (args.rpc or args.i): mythril.set_api_from_config_path() diff --git a/mythril/laser/ethereum/call.py b/mythril/laser/ethereum/call.py index 589d44a9..b96e5ac3 100644 --- a/mythril/laser/ethereum/call.py +++ b/mythril/laser/ethereum/call.py @@ -1,8 +1,8 @@ import logging from typing import Union -from z3 import simplify, BitVecRef, BitVecNumRef, BoolRef +from z3 import simplify, BitVecRef, BitVecNumRef, BoolRef, Extract import mythril.laser.ethereum.util as util -from mythril.laser.ethereum.state import Account, CalldataType, GlobalState +from mythril.laser.ethereum.state import Account, CalldataType, GlobalState, Calldata from mythril.support.loader import DynLoader import re @@ -125,7 +125,7 @@ def get_callee_account( try: code = dynamic_loader.dynld(environment.active_account.address, callee_address) - except Exception as e: + except Exception: logging.debug("Unable to execute dynamic loader.") raise ValueError() if code is None: @@ -156,21 +156,33 @@ def get_call_data( :return: Tuple containing: call_data array from memory or empty array if symbolic, type found """ state = global_state.mstate + transaction_id = "{}_internalcall".format(global_state.current_transaction.id) try: # TODO: This only allows for either fully concrete or fully symbolic calldata. # Improve management of memory and callata to support a mix between both types. - call_data = state.memory[ + calldata_from_mem = state.memory[ util.get_concrete_int(memory_start) : util.get_concrete_int( memory_start + memory_size ) ] - if len(call_data) < 32 and pad: - call_data += [0] * (32 - len(call_data)) + i = 0 + starting_calldata = [] + while i < len(calldata_from_mem): + elem = calldata_from_mem[i] + if isinstance(elem, int): + starting_calldata.append(elem) + i += 1 + else: # BitVec + for j in range(0, elem.size(), 8): + starting_calldata.append(Extract(j + 7, j, elem)) + i += 1 + + call_data = Calldata(transaction_id, starting_calldata) call_data_type = CalldataType.CONCRETE logging.debug("Calldata: " + str(call_data)) except TypeError: logging.info("Unsupported symbolic calldata offset") call_data_type = CalldataType.SYMBOLIC - call_data = [] + call_data = Calldata("{}_internalcall".format(transaction_id)) return call_data, call_data_type diff --git a/mythril/laser/ethereum/cfg.py b/mythril/laser/ethereum/cfg.py index 5ccf5287..93950104 100644 --- a/mythril/laser/ethereum/cfg.py +++ b/mythril/laser/ethereum/cfg.py @@ -38,9 +38,11 @@ class Node: code = "" for state in self.states: instruction = state.get_current_instruction() + code += str(instruction["address"]) + " " + instruction["opcode"] if instruction["opcode"].startswith("PUSH"): code += " " + instruction["argument"] + code += "\\n" return dict( diff --git a/mythril/laser/ethereum/instructions.py b/mythril/laser/ethereum/instructions.py index f0494ec1..41925c11 100644 --- a/mythril/laser/ethereum/instructions.py +++ b/mythril/laser/ethereum/instructions.py @@ -38,13 +38,14 @@ from mythril.laser.ethereum.evm_exceptions import ( InvalidInstruction, ) from mythril.laser.ethereum.keccak import KeccakFunctionManager -from mythril.laser.ethereum.state import GlobalState, CalldataType +from mythril.laser.ethereum.state import GlobalState, CalldataType, Calldata from mythril.laser.ethereum.transaction import ( MessageCallTransaction, TransactionStartSignal, ContractCreationTransaction, ) from mythril.support.loader import DynLoader +from mythril.analysis.solver import get_model TT256 = 2 ** 256 TT256M1 = 2 ** 256 - 1 @@ -162,7 +163,6 @@ class Instruction: op1 = If(op1, BitVecVal(1, 256), BitVecVal(0, 256)) if type(op2) == BoolRef: op2 = If(op2, BitVecVal(1, 256), BitVecVal(0, 256)) - stack.append(op1 & op2) return [global_state] @@ -314,8 +314,8 @@ class Instruction: @StateTransition() def exp_(self, global_state: GlobalState) -> List[GlobalState]: state = global_state.mstate - base, exponent = util.pop_bitvec(state), util.pop_bitvec(state) + if (type(base) != BitVecNumRef) or (type(exponent) != BitVecNumRef): state.stack.append( global_state.new_bitvec( @@ -423,89 +423,14 @@ class Instruction: environment = global_state.environment op0 = state.stack.pop() - try: - offset = util.get_concrete_int(simplify(op0)) - b = environment.calldata[offset] - except TypeError: - logging.debug("CALLDATALOAD: Unsupported symbolic index") - state.stack.append( - global_state.new_bitvec( - "calldata_" - + str(environment.active_account.contract_name) - + "[" - + str(simplify(op0)) - + "]", - 256, - ) - ) - return [global_state] - except IndexError: - logging.debug("Calldata not set, using symbolic variable instead") - state.stack.append( - global_state.new_bitvec( - "calldata_" - + str(environment.active_account.contract_name) - + "[" - + str(simplify(op0)) - + "]", - 256, - ) - ) - return [global_state] - - if type(b) == int: - - try: - val = b"".join( - [ - calldata.to_bytes(1, byteorder="big") - for calldata in environment.calldata[offset : offset + 32] - ] - ) - - logging.debug( - "Final value: " + str(int.from_bytes(val, byteorder="big")) - ) - state.stack.append(BitVecVal(int.from_bytes(val, byteorder="big"), 256)) - - except (TypeError, AttributeError): - state.stack.append( - global_state.new_bitvec( - "calldata_" - + str(environment.active_account.contract_name) - + "[" - + str(simplify(op0)) - + "]", - 256, - ) - ) - else: - # symbolic variable - state.stack.append( - global_state.new_bitvec( - "calldata_" - + str(environment.active_account.contract_name) - + "[" - + str(simplify(op0)) - + "]", - 256, - ) - ) - + state.stack.append(environment.calldata.get_word_at(op0)) return [global_state] @StateTransition() def calldatasize_(self, global_state: GlobalState) -> List[GlobalState]: state = global_state.mstate environment = global_state.environment - if environment.calldata_type == CalldataType.SYMBOLIC: - state.stack.append( - global_state.new_bitvec( - "calldatasize_" + environment.active_account.contract_name, 256 - ) - ) - else: - state.stack.append(BitVecVal(len(environment.calldata), 256)) + state.stack.append(environment.calldata.calldatasize) return [global_state] @StateTransition() @@ -536,7 +461,7 @@ class Instruction: size = simplify(op2) size_sym = True - if dstart_sym or size_sym: + if size_sym: state.mem_extend(mstart, 1) state.memory[mstart] = global_state.new_bitvec( "calldata_" @@ -574,11 +499,17 @@ class Instruction: return [global_state] try: - i_data = environment.calldata[dstart] + i_data = dstart + + new_memory = [] + for i in range(size): + new_memory.append(environment.calldata[i_data]) + i_data = ( + i_data + 1 if isinstance(i_data, int) else simplify(i_data + 1) + ) - for i in range(mstart, mstart + size): - state.memory[i] = environment.calldata[i_data] - i_data += 1 + for i in range(0, len(new_memory), 32): + state.memory[i + mstart] = simplify(Concat(new_memory[i : i + 32])) except IndexError: logging.debug("Exception copying calldata to memory") @@ -636,7 +567,6 @@ class Instruction: global keccak_function_manager state = global_state.mstate - environment = global_state.environment op0, op1 = state.stack.pop(), state.stack.pop() try: @@ -886,12 +816,9 @@ class Instruction: try: # Attempt to concretize value - _bytes = util.concrete_int_to_bytes(value) - state.memory[mstart : mstart + len(_bytes)] = _bytes - - except (AttributeError, TypeError): + except: try: state.memory[mstart] = value except TypeError: @@ -1161,7 +1088,7 @@ class Instruction: state = global_state.mstate dpth = int(self.op_code[3:]) state.stack.pop(), state.stack.pop() - [state.stack.pop() for x in range(dpth)] + [state.stack.pop() for _ in range(dpth)] # Not supported return [global_state] diff --git a/mythril/laser/ethereum/keccak.py b/mythril/laser/ethereum/keccak.py index d99c49aa..a69f782d 100644 --- a/mythril/laser/ethereum/keccak.py +++ b/mythril/laser/ethereum/keccak.py @@ -1,6 +1,7 @@ from z3 import ExprRef, BitVecRef + class KeccakFunctionManager: def __init__(self): self.keccak_expression_mapping = {} diff --git a/mythril/laser/ethereum/natives.py b/mythril/laser/ethereum/natives.py index 1e379fbe..b764aba1 100644 --- a/mythril/laser/ethereum/natives.py +++ b/mythril/laser/ethereum/natives.py @@ -87,4 +87,4 @@ def native_contracts(address: int, data: List): takes integer address 1, 2, 3, 4 """ functions = (ecrecover, sha256, ripemd160, identity) - return functions[address - 1](data) + return functions[address - 1](data.starting_calldata) diff --git a/mythril/laser/ethereum/state.py b/mythril/laser/ethereum/state.py index 6046b82d..43942c09 100644 --- a/mythril/laser/ethereum/state.py +++ b/mythril/laser/ethereum/state.py @@ -1,10 +1,29 @@ -from z3 import BitVec, BitVecVal, BitVecRef, BitVecNumRef, Solver, ExprRef, sat +from z3 import ( + BitVec, + BitVecVal, + BitVecRef, + BitVecNumRef, + BitVecSort, + Solver, + ExprRef, + Concat, + sat, + simplify, + Array, + ForAll, + Solver, + UGT, + Implies, +) +from z3.z3types import Z3Exception from mythril.disassembler.disassembly import Disassembly from mythril.laser.ethereum.cfg import Node from copy import copy, deepcopy from enum import Enum from random import randint from typing import KeysView, Dict, List, Union, Any +from mythril.laser.ethereum.util import get_concrete_int + from mythril.laser.ethereum.evm_exceptions import ( StackOverflowException, StackUnderflowException, @@ -16,6 +35,88 @@ class CalldataType(Enum): SYMBOLIC = 2 +class Calldata: + """ + Calldata class representing the calldata of a transaction + """ + + def __init__(self, tx_id, starting_calldata=None): + """ + Constructor for Calldata + :param tx_id: unique value representing the transaction the calldata is for + :param starting_calldata: byte array representing the concrete calldata of a transaction + """ + self.tx_id = tx_id + if starting_calldata: + self._calldata = [] + self.calldatasize = BitVecVal(len(starting_calldata), 256) + self.concrete = True + else: + self._calldata = Array( + "{}_calldata".format(self.tx_id), BitVecSort(256), BitVecSort(8) + ) + self.calldatasize = BitVec("{}_calldatasize".format(self.tx_id), 256) + self.concrete = False + + self.starting_calldata = starting_calldata or [] + + @property + def constraints(self): + constraints = [] + + if self.concrete: + for calldata_byte in self.starting_calldata: + if type(calldata_byte) == int: + self._calldata.append(BitVecVal(calldata_byte, 8)) + else: + self._calldata.append(calldata_byte) + constraints.append(self.calldatasize == len(self.starting_calldata)) + else: + x = BitVec("x", 256) + constraints.append( + ForAll(x, Implies(self[x] != 0, UGT(self.calldatasize, x))) + ) + + return constraints + + def concretized(self, model): + result = [] + for i in range( + get_concrete_int(model.eval(self.calldatasize, model_completion=True)) + ): + result.append(get_concrete_int(model.eval(self[i], model_completion=True))) + + return result + + def get_word_at(self, index: int): + return self[index : index + 32] + + def __getitem__(self, item): + if isinstance(item, slice): + try: + current_index = ( + item.start + if isinstance(item.start, BitVecRef) + else BitVecVal(item.start, 256) + ) + dataparts = [] + while simplify(current_index != item.stop): + dataparts.append(self[current_index]) + current_index = simplify(current_index + 1) + except Z3Exception: + raise IndexError("Invalid Calldata Slice") + + return simplify(Concat(dataparts)) + + if self.concrete: + try: + return self._calldata[get_concrete_int(item)] + except IndexError: + return BitVecVal(0, 8) + else: + return self._calldata[item] + + class Storage: """ Storage class represents the storage of an Account @@ -339,7 +440,6 @@ class GlobalState: def new_bitvec(self, name: str, size=256) -> BitVec: transaction_id = self.current_transaction.id - node_id = self.node.uid return BitVec("{}_{}".format(transaction_id, name), size) diff --git a/mythril/laser/ethereum/strategy/__init__.py b/mythril/laser/ethereum/strategy/__init__.py index e69de29b..099aa4fb 100644 --- a/mythril/laser/ethereum/strategy/__init__.py +++ b/mythril/laser/ethereum/strategy/__init__.py @@ -0,0 +1,25 @@ +from abc import ABC, abstractmethod + + +class BasicSearchStrategy(ABC): + __slots__ = "work_list", "max_depth" + + def __init__(self, work_list, max_depth): + self.work_list = work_list + self.max_depth = max_depth + + def __iter__(self): + return self + + @abstractmethod + def get_strategic_global_state(self): + raise NotImplementedError("Must be implemented by a subclass") + + def __next__(self): + try: + global_state = self.get_strategic_global_state() + if global_state.mstate.depth >= self.max_depth: + return self.__next__() + return global_state + except IndexError: + raise StopIteration diff --git a/mythril/laser/ethereum/strategy/basic.py b/mythril/laser/ethereum/strategy/basic.py index b67f602f..232ac19b 100644 --- a/mythril/laser/ethereum/strategy/basic.py +++ b/mythril/laser/ethereum/strategy/basic.py @@ -3,55 +3,75 @@ This module implements basic symbolic execution search strategies """ from ..state import GlobalState from typing import List +from random import randrange +from . import BasicSearchStrategy +try: + from random import choices +except ImportError: -class DepthFirstSearchStrategy: + # This is for supporting python versions < 3.6 + from itertools import accumulate + from random import random + from bisect import bisect + + def choices(population, weights=None): + """ + Returns a random element out of the population based on weight. + If the relative weights or cumulative weights are not specified, + the selections are made with equal probability. + """ + if weights is None: + return [population[int(random() * len(population))]] + cum_weights = accumulate(weights) + return [ + population[ + bisect(cum_weights, random() * cum_weights[-1], 0, len(population) - 1) + ] + ] + + +class DepthFirstSearchStrategy(BasicSearchStrategy): """ Implements a depth first search strategy I.E. Follow one path to a leaf, and then continue to the next one """ - def __init__(self, work_list: List[GlobalState], max_depth: float): - self.work_list = work_list - self.max_depth = max_depth - - def __iter__(self): - return self + def get_strategic_global_state(self) -> GlobalState: + return self.work_list.pop() - def __next__(self) -> GlobalState: - """ Picks the next state to execute """ - try: - # This strategies assumes that new states are appended at the end of the work_list - # By taking the last element we effectively pick the "newest" states, which amounts to dfs - global_state = self.work_list.pop() - if global_state.mstate.depth >= self.max_depth: - return self.__next__() - return global_state - except IndexError: - raise StopIteration() - -class BreadthFirstSearchStrategy: +class BreadthFirstSearchStrategy(BasicSearchStrategy): """ Implements a breadth first search strategy I.E. Execute all states of a "level" before continuing """ - def __init__(self, work_list: List[GlobalState], max_depth: float): - self.work_list = work_list - self.max_depth = max_depth - - def __iter__(self) -> "BreadthFirstSearchStrategy": - return self - - def __next__(self) -> GlobalState: - """ Picks the next state to execute """ - try: - # This strategies assumes that new states are appended at the end of the work_list - # By taking the first element we effectively pick the "oldest" states, which amounts to bfs - global_state = self.work_list.pop(0) - if global_state.mstate.depth >= self.max_depth: - return self.__next__() - return global_state - except IndexError: - raise StopIteration() + def get_strategic_global_state(self) -> GlobalState: + return self.work_list.pop(0) + + +class ReturnRandomNaivelyStrategy(BasicSearchStrategy): + """ + chooses a random state from the worklist with equal likelihood + """ + + def get_strategic_global_state(self) -> GlobalState: + if len(self.work_list) > 0: + return self.work_list.pop(randrange(len(self.work_list))) + else: + raise IndexError + + +class ReturnWeightedRandomStrategy(BasicSearchStrategy): + """ + chooses a random state from the worklist with likelihood based on inverse proportion to depth + """ + + def get_strategic_global_state(self) -> GlobalState: + probability_distribution = [ + 1 / (global_state.mstate.depth + 1) for global_state in self.work_list + ] + return self.work_list.pop( + choices(range(len(self.work_list)), probability_distribution)[0] + ) diff --git a/mythril/laser/ethereum/svm.py b/mythril/laser/ethereum/svm.py index 818b8d3f..b5b6ccca 100644 --- a/mythril/laser/ethereum/svm.py +++ b/mythril/laser/ethereum/svm.py @@ -1,5 +1,6 @@ import logging from typing import List, Tuple, Union, Callable +from mythril.disassembler.disassembly import Disassembly from mythril.laser.ethereum.state import WorldState, GlobalState from mythril.laser.ethereum.transaction import ( TransactionStartSignal, @@ -105,7 +106,7 @@ class LaserEVM: if len(self.open_states) == 0: logging.warning( "No contract was created during the execution of contract creation " - "Increase the resources for creation execution (--max-depth or --create_timeout)" + "Increase the resources for creation execution (--max-depth or --create-timeout)" ) # Reset code coverage @@ -353,10 +354,11 @@ class LaserEVM: environment = state.environment disassembly = environment.code - if address in state.environment.code.addr_to_func: + if address in disassembly.address_to_function_name: # Enter a new function - - environment.active_function_name = disassembly.addr_to_func[address] + environment.active_function_name = disassembly.address_to_function_name[ + address + ] new_node.flags |= NodeFlags.FUNC_ENTRY logging.debug( diff --git a/mythril/laser/ethereum/taint_analysis.py b/mythril/laser/ethereum/taint_analysis.py index d014e78f..f0ff9ede 100644 --- a/mythril/laser/ethereum/taint_analysis.py +++ b/mythril/laser/ethereum/taint_analysis.py @@ -8,6 +8,7 @@ from mythril.laser.ethereum.state import GlobalState, Environment from mythril.analysis.symbolic import SymExecWrapper + class TaintRecord: """ TaintRecord contains tainting information for a specific (state, node) diff --git a/mythril/laser/ethereum/transaction/concolic.py b/mythril/laser/ethereum/transaction/concolic.py index 597de21d..db3f0116 100644 --- a/mythril/laser/ethereum/transaction/concolic.py +++ b/mythril/laser/ethereum/transaction/concolic.py @@ -10,6 +10,7 @@ from mythril.laser.ethereum.state import ( CalldataType, Account, WorldState, + Calldata, ) from mythril.disassembler.disassembly import Disassembly from mythril.laser.ethereum.cfg import Node, Edge, JumpType @@ -32,12 +33,13 @@ def execute_message_call( del laser_evm.open_states[:] for open_world_state in open_states: + next_transaction_id = get_next_transaction_id() transaction = MessageCallTransaction( - identifier=get_next_transaction_id(), + identifier=next_transaction_id, world_state=open_world_state, callee_account=open_world_state[callee_address], caller=caller_address, - call_data=data, + call_data=Calldata(next_transaction_id, data), gas_price=gas_price, call_value=value, origin=origin_address, diff --git a/mythril/laser/ethereum/transaction/symbolic.py b/mythril/laser/ethereum/transaction/symbolic.py index 540b2b6c..50337e73 100644 --- a/mythril/laser/ethereum/transaction/symbolic.py +++ b/mythril/laser/ethereum/transaction/symbolic.py @@ -3,7 +3,7 @@ from logging import debug from mythril.disassembler.disassembly import Disassembly from mythril.laser.ethereum.cfg import Node, Edge, JumpType -from mythril.laser.ethereum.state import CalldataType, Account +from mythril.laser.ethereum.state import CalldataType, Account, Calldata from mythril.laser.ethereum.transaction.transaction_models import ( MessageCallTransaction, ContractCreationTransaction, @@ -28,7 +28,7 @@ def execute_message_call(laser_evm, callee_address: str) -> None: callee_account=open_world_state[callee_address], caller=BitVec("caller{}".format(next_transaction_id), 256), identifier=next_transaction_id, - call_data=[], + call_data=Calldata(next_transaction_id), gas_price=BitVec("gas_price{}".format(next_transaction_id), 256), call_value=BitVec("call_value{}".format(next_transaction_id), 256), origin=BitVec("origin{}".format(next_transaction_id), 256), diff --git a/mythril/laser/ethereum/transaction/transaction_models.py b/mythril/laser/ethereum/transaction/transaction_models.py index 50a2e540..a4ff2e2a 100644 --- a/mythril/laser/ethereum/transaction/transaction_models.py +++ b/mythril/laser/ethereum/transaction/transaction_models.py @@ -1,7 +1,7 @@ import logging from typing import Union from mythril.disassembler.disassembly import Disassembly -from mythril.laser.ethereum.state import GlobalState, Environment, WorldState, Account +from mythril.laser.ethereum.state import GlobalState, Environment, WorldState, Account, Calldata from z3 import BitVec, BitVecNumRef import array @@ -42,7 +42,7 @@ class MessageCallTransaction: world_state: WorldState, callee_account: Account, caller: BitVecNumRef, - call_data=(), + call_data=None, identifier=None, gas_price=None, call_value=None, @@ -55,7 +55,11 @@ class MessageCallTransaction: self.world_state = world_state self.callee_account = callee_account self.caller = caller - self.call_data = call_data + self.call_data = ( + Calldata(self.id, call_data) + if not isinstance(call_data, Calldata) + else call_data + ) self.gas_price = ( BitVec("gasprice{}".format(identifier), 256) if gas_price is None @@ -92,6 +96,9 @@ class MessageCallTransaction: global_state = GlobalState(self.world_state, environment, None) global_state.environment.active_function_name = "fallback" + global_state.mstate.constraints.extend( + global_state.environment.calldata.constraints + ) return global_state @@ -110,7 +117,7 @@ class ContractCreationTransaction: identifier=None, callee_account=None, code=None, - call_data=(), + call_data=None, gas_price=None, call_value=None, origin=None, @@ -147,7 +154,11 @@ class ContractCreationTransaction: else call_data_type ) - self.call_data = call_data + self.call_data = ( + Calldata(self.id, call_data) + if not isinstance(call_data, Calldata) + else call_data + ) self.origin = origin self.code = code self.return_data = None diff --git a/mythril/laser/ethereum/util.py b/mythril/laser/ethereum/util.py index d27fa89f..966c9c0d 100644 --- a/mythril/laser/ethereum/util.py +++ b/mythril/laser/ethereum/util.py @@ -2,7 +2,6 @@ import re from z3 import * import logging from typing import Union, List, Dict -from mythril.laser.ethereum.state import MachineState import sha3 as _sha3 @@ -38,14 +37,14 @@ def get_instruction_index( return None -def get_trace_line(instr: Dict, state: MachineState) -> str: +def get_trace_line(instr: Dict, state: "MachineState") -> str: stack = str(state.stack[::-1]) # stack = re.sub("(\d+)", lambda m: hex(int(m.group(1))), stack) stack = re.sub("\n", "", stack) return str(instr["address"]) + " " + instr["opcode"] + "\tSTACK: " + stack -def pop_bitvec(state: MachineState) -> BitVecVal: +def pop_bitvec(state: "MachineState") -> BitVecVal: # pop one element from stack, converting boolean expressions and # concrete Python variables to BitVecVal @@ -85,18 +84,20 @@ def get_concrete_int(item: Union[int, BitVecNumRef, BoolRef]) -> int: def concrete_int_from_bytes(_bytes: bytes, start_index: int) -> int: + # logging.debug("-- concrete_int_from_bytes: " + str(_bytes[start_index:start_index+32])) b = _bytes[start_index : start_index + 32] val = int.from_bytes(b, byteorder="big") return val -def concrete_int_to_bytes(val: int) -> bytes: - if isinstance(val, int): +def concrete_int_to_bytes(val): + # logging.debug("concrete_int_to_bytes " + str(val)) + if type(val) == int: return val.to_bytes(32, byteorder="big") return (simplify(val).as_long()).to_bytes(32, byteorder="big") -def bytearray_to_int(arr: bytearray) -> int: +def bytearray_to_int(arr): o = 0 for a in arr: o = (o << 8) + a diff --git a/mythril/mythril.py b/mythril/mythril.py index d917d7f8..d5dbd76f 100644 --- a/mythril/mythril.py +++ b/mythril/mythril.py @@ -77,18 +77,23 @@ class Mythril(object): """ - def __init__(self, solv=None, solc_args=None, dynld=False): + def __init__( + self, solv=None, solc_args=None, dynld=False, enable_online_lookup=False + ): self.solv = solv self.solc_args = solc_args self.dynld = dynld + self.enable_online_lookup = enable_online_lookup self.mythril_dir = self._init_mythril_dir() - self.sigs = signatures.SignatureDb() + self.sigs = signatures.SignatureDb( + enable_online_lookup=self.enable_online_lookup + ) try: self.sigs.open() # tries mythril_dir/signatures.json by default (provide path= arg to make this configurable) - except FileNotFoundError as fnfe: + except FileNotFoundError: logging.info( "No signature database found. Creating database if sigs are loaded in: " + self.sigs.signatures_file @@ -291,8 +296,7 @@ class Mythril(object): self.set_api_rpc(dynamic_loading) def search_db(self, search): - def search_callback(contract, address, balance): - + def search_callback(_, address, balance): print("Address: " + address + ", balance: " + str(balance)) try: @@ -309,7 +313,11 @@ class Mythril(object): def load_from_bytecode(self, code): address = util.get_indexed_address(0) - self.contracts.append(ETHContract(code, name="MAIN")) + self.contracts.append( + ETHContract( + code, name="MAIN", enable_online_lookup=self.enable_online_lookup + ) + ) return address, self.contracts[-1] # return address and contract object def load_from_address(self, address): @@ -320,7 +328,7 @@ class Mythril(object): code = self.eth.eth_getCode(address) except FileNotFoundError as e: raise CriticalError("IPC error: " + str(e)) - except ConnectionError as e: + except ConnectionError: raise CriticalError( "Could not connect to RPC server. Make sure that your node is running and that RPC parameters are set correctly." ) @@ -332,7 +340,13 @@ class Mythril(object): "Received an empty response from eth_getCode. Check the contract address and verify that you are on the correct chain." ) else: - self.contracts.append(ETHContract(code, name=address)) + self.contracts.append( + ETHContract( + code, + name=address, + enable_online_lookup=self.enable_online_lookup, + ) + ) return address, self.contracts[-1] # return address and contract object def load_from_solidity(self, solidity_files): @@ -541,7 +555,7 @@ class Mythril(object): ) except FileNotFoundError as e: raise CriticalError("IPC error: " + str(e)) - except ConnectionError as e: + except ConnectionError: raise CriticalError( "Could not connect to RPC server. Make sure that your node is running and that RPC parameters are set correctly." ) diff --git a/mythril/support/signatures.py b/mythril/support/signatures.py index 4bc1880c..ba214db6 100644 --- a/mythril/support/signatures.py +++ b/mythril/support/signatures.py @@ -11,7 +11,6 @@ from subprocess import Popen, PIPE from mythril.exceptions import CompilerError -# todo: tintinweb - make this a normal requirement? (deps: eth-abi and requests, both already required by mythril) try: # load if available but do not fail import ethereum_input_decoder @@ -54,7 +53,7 @@ except ImportError: class SignatureDb(object): - def __init__(self, enable_online_lookup=True): + def __init__(self, enable_online_lookup=False): """ Constr :param enable_online_lookup: enable onlien signature hash lookup @@ -191,9 +190,12 @@ class SignatureDb(object): "online function signature lookup not available. will not try to lookup hash for the next 2 minutes. exception: %r" % fbdole ) + + if sighash not in self.signatures: + return [] if type(self.signatures[sighash]) != list: return [self.signatures[sighash]] - return self.signatures[sighash] # raise keyerror + return self.signatures[sighash] def __getitem__(self, item): """ diff --git a/requirements.txt b/requirements.txt index 30162f8d..b41daf56 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,6 +4,7 @@ coverage eth_abi>=1.0.0 eth-account>=0.1.0a2 ethereum>=2.3.2 +ethereum-input-decoder>=0.2.2 eth-hash>=0.1.0 eth-keyfile>=0.5.1 eth-keys>=0.2.0b3 diff --git a/setup.py b/setup.py index fe9ad5f5..6aa0dfb7 100755 --- a/setup.py +++ b/setup.py @@ -96,6 +96,7 @@ setup( "mock", "configparser>=3.5.0", "persistent>=4.2.0", + "ethereum-input-decoder>=0.2.2", ], tests_require=["pytest>=3.6.0", "pytest_mock", "pytest-cov"], python_requires=">=3.5", diff --git a/static/mythril.png b/static/mythril.png deleted file mode 100644 index f59c807e..00000000 Binary files a/static/mythril.png and /dev/null differ diff --git a/static/mythril_new.png b/static/mythril_new.png new file mode 100644 index 00000000..ecdd7ae9 Binary files /dev/null and b/static/mythril_new.png differ diff --git a/tests/analysis/test_delegatecall.py b/tests/analysis/test_delegatecall.py index a363f0da..72519d57 100644 --- a/tests/analysis/test_delegatecall.py +++ b/tests/analysis/test_delegatecall.py @@ -199,7 +199,7 @@ def test_delegate_call(sym_mock, concrete_mock, curr_instruction): statespace.calls = [call] # act - issues = execute(statespace) + execute(statespace) # assert assert concrete_mock.call_count == 1 diff --git a/tests/cmd_line_test.py b/tests/cmd_line_test.py index 91c8714d..394c5d42 100644 --- a/tests/cmd_line_test.py +++ b/tests/cmd_line_test.py @@ -29,10 +29,7 @@ class TruffleTestCase(BaseTestCase): command = "cd {}; truffle compile; python3 {} --truffle".format( truffle_project_root, MYTH ) - self.assertIn( - "In the function `withdrawfunds()` a non-zero amount of Ether is sent to msg.sender.", - output_of(command), - ) + self.assertIn("=== Ether send ====", output_of(command)) class InfuraTestCase(BaseTestCase): diff --git a/tests/disassembler/__init__.py b/tests/disassembler/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/disassembler/disassembly.py b/tests/disassembler/disassembly.py new file mode 100644 index 00000000..49bca31d --- /dev/null +++ b/tests/disassembler/disassembly.py @@ -0,0 +1,61 @@ +from mythril.disassembler.disassembly import * + +instruction_list = [ + {"opcode": "PUSH4", "argument": "0x10203040"}, + {"opcode": "EQ"}, + {"opcode": "PUSH4", "argument": "0x40302010"}, + {"opcode": "JUMPI"}, +] + + +def test_get_function_info(mocker): + # Arrange + global instruction_list + + signature_database_mock = SignatureDb() + mocker.patch.object(signature_database_mock, "get") + signature_database_mock.get.return_value = ["function_name"] + + # Act + function_hash, entry_point, function_name = get_function_info( + 0, instruction_list, signature_database_mock + ) + + # Assert + assert function_hash == "0x10203040" + assert entry_point == 0x40302010 + assert function_name == "function_name" + + +def test_get_function_info_multiple_names(mocker): + # Arrange + global instruction_list + + signature_database_mock = SignatureDb() + mocker.patch.object(signature_database_mock, "get") + signature_database_mock.get.return_value = ["function_name", "another_name"] + + # Act + function_hash, entry_point, function_name = get_function_info( + 0, instruction_list, signature_database_mock + ) + + # Assert + assert function_name == "**ambiguous** function_name" + + +def test_get_function_info_no_names(mocker): + # Arrange + global instruction_list + + signature_database_mock = SignatureDb() + mocker.patch.object(signature_database_mock, "get") + signature_database_mock.get.return_value = [] + + # Act + function_hash, entry_point, function_name = get_function_info( + 0, instruction_list, signature_database_mock + ) + + # Assert + assert function_name == "_function_0x10203040" diff --git a/tests/laser/evm_testsuite/evm_test.py b/tests/laser/evm_testsuite/evm_test.py index 7e96a8c5..5bab5e45 100644 --- a/tests/laser/evm_testsuite/evm_test.py +++ b/tests/laser/evm_testsuite/evm_test.py @@ -3,8 +3,8 @@ from mythril.laser.ethereum.svm import LaserEVM from mythril.laser.ethereum.state import Account from mythril.disassembler.disassembly import Disassembly from mythril.laser.ethereum.transaction.concolic import execute_message_call +from mythril.analysis.solver import get_model from datetime import datetime -from mythril.laser.ethereum.util import get_concrete_int import binascii import json from pathlib import Path @@ -83,6 +83,7 @@ def test_vmtest( return world_state = laser_evm.open_states[0] + model = get_model(next(iter(laser_evm.nodes.values())).states[0].mstate.constraints) for address, details in post_condition.items(): account = world_state[address] @@ -92,5 +93,9 @@ def test_vmtest( for index, value in details["storage"].items(): expected = int(value, 16) - actual = get_concrete_int(account.storage[int(index, 16)]) + if type(account.storage[int(index, 16)]) != int: + actual = model.eval(account.storage[int(index, 16)]) + actual = 1 if actual == True else 0 if actual == False else actual + else: + actual = account.storage[int(index, 16)] assert actual == expected diff --git a/tests/laser/state/calldata_test.py b/tests/laser/state/calldata_test.py new file mode 100644 index 00000000..c2d76c81 --- /dev/null +++ b/tests/laser/state/calldata_test.py @@ -0,0 +1,102 @@ +import pytest +from mythril.laser.ethereum.state import Calldata +from z3 import Solver, simplify +from z3.z3types import Z3Exception + + +uninitialized_test_data = [ + ([]), # Empty concrete calldata + ([1, 4, 5, 3, 4, 72, 230, 53]), # Concrete calldata +] + + +@pytest.mark.parametrize("starting_calldata", uninitialized_test_data) +def test_concrete_calldata_uninitialized_index(starting_calldata): + # Arrange + calldata = Calldata(0, starting_calldata) + solver = Solver() + + # Act + value = calldata[100] + value2 = calldata.get_word_at(200) + + solver.add(calldata.constraints) + solver.check() + model = solver.model() + + value = model.eval(value) + value2 = model.eval(value2) + + # Assert + assert value == 0 + assert value2 == 0 + + +def test_concrete_calldata_calldatasize(): + # Arrange + calldata = Calldata(0, [1, 4, 7, 3, 7, 2, 9]) + solver = Solver() + + # Act + solver.add(calldata.constraints) + solver.check() + model = solver.model() + + result = model.eval(calldata.calldatasize) + + # Assert + assert result == 7 + + +def test_symbolic_calldata_constrain_index(): + # Arrange + calldata = Calldata(0) + solver = Solver() + + # Act + constraint = calldata[100] == 50 + + value = calldata[100] + + solver.add(calldata.constraints + [constraint]) + solver.check() + model = solver.model() + + value = model.eval(value) + calldatasize = model.eval(calldata.calldatasize) + + # Assert + assert value == 50 + assert simplify(calldatasize >= 100) + + +def test_concrete_calldata_constrain_index(): + # Arrange + calldata = Calldata(0, [1, 4, 7, 3, 7, 2, 9]) + solver = Solver() + + # Act + constraint = calldata[2] == 3 + + solver.add(calldata.constraints + [constraint]) + result = solver.check() + + # Assert + assert str(result) == "unsat" + + +def test_concrete_calldata_constrain_index(): + # Arrange + calldata = Calldata(0) + solver = Solver() + + # Act + constraints = [] + constraints.append(calldata[51] == 1) + constraints.append(calldata.calldatasize == 50) + + solver.add(calldata.constraints + constraints) + result = solver.check() + + # Assert + assert str(result) == "unsat" diff --git a/tests/laser/state/mstack_test.py b/tests/laser/state/mstack_test.py index 9724fd38..03787075 100644 --- a/tests/laser/state/mstack_test.py +++ b/tests/laser/state/mstack_test.py @@ -44,7 +44,7 @@ class MachineStackTest(BaseTestCase): mstack = MachineStack([0, 1]) with pytest.raises(NotImplementedError): - mstack = mstack + [2] + mstack + [2] @staticmethod def test_mstack_no_support_iadd(): diff --git a/tests/laser/transaction/symbolic_test.py b/tests/laser/transaction/symbolic_test.py index e14e5618..2ca12f63 100644 --- a/tests/laser/transaction/symbolic_test.py +++ b/tests/laser/transaction/symbolic_test.py @@ -59,7 +59,7 @@ def test_execute_contract_creation(mocked_setup: MagicMock): mocked_setup.side_effect = _is_contract_creation # Act - new_account = execute_contract_creation(laser_evm, "606000") + execute_contract_creation(laser_evm, "606000") # Assert # mocked_setup.assert_called() diff --git a/tests/report_test.py b/tests/report_test.py index 26aa5be6..8cee1d65 100644 --- a/tests/report_test.py +++ b/tests/report_test.py @@ -23,7 +23,7 @@ def _fix_debug_data(json_str): def _generate_report(input_file): - contract = ETHContract(input_file.read_text()) + contract = ETHContract(input_file.read_text(), enable_online_lookup=False) sym = SymExecWrapper( contract, address=(util.get_indexed_address(0)), @@ -43,7 +43,9 @@ def _generate_report(input_file): def reports(): """ Fixture that analyses all reports""" pool = Pool(cpu_count()) - input_files = sorted([f for f in TESTDATA_INPUTS.iterdir()]) + input_files = sorted( + [f for f in TESTDATA_INPUTS.iterdir() if f.name != "environments.sol.o"] + ) results = pool.map(_generate_report, input_files) return results diff --git a/tests/svm_test.py b/tests/svm_test.py index ba580440..acf3329e 100644 --- a/tests/svm_test.py +++ b/tests/svm_test.py @@ -69,7 +69,7 @@ class SVMTestCase(BaseTestCase): def test_laser_result(self): for input_file in TESTDATA_INPUTS_CONTRACTS.iterdir(): - if input_file.name == "weak_random.sol": + if input_file.name in ["weak_random.sol", "environments.sol"]: continue output_expected = TESTDATA_OUTPUTS_EXPECTED_LASER_RESULT / ( input_file.name + ".json" diff --git a/tests/testdata/outputs_expected/calls.sol.o.graph.html b/tests/testdata/outputs_expected/calls.sol.o.graph.html index 6af2b5fa..1e131531 100644 --- a/tests/testdata/outputs_expected/calls.sol.o.graph.html +++ b/tests/testdata/outputs_expected/calls.sol.o.graph.html @@ -8,6 +8,7 @@Mythril / Ethereum LASER Symbolic VM
-Mythril / Ethereum LASER Symbolic VM
-Mythril / Ethereum LASER Symbolic VM
-Mythril / Ethereum LASER Symbolic VM
-Mythril / Ethereum LASER Symbolic VM
-Mythril / Ethereum LASER Symbolic VM
-Mythril / Ethereum LASER Symbolic VM
-Mythril / Ethereum LASER Symbolic VM
-Mythril / Ethereum LASER Symbolic VM
-Mythril / Ethereum LASER Symbolic VM
+ + + +