From c72d212c7e993f4340d7bcfc8973c7741662c0e2 Mon Sep 17 00:00:00 2001 From: Nikhil Parasaram Date: Fri, 16 Jul 2021 08:20:58 +0530 Subject: [PATCH] [WIP] Handle exponents (#1497) * Handle exponents * Bruteforce enumerate constraints * Fix evm tests * Fix imports * Fix z3 version to avoid exception * Fix z3 version to avoid exception * Fix z3 version * Fix z3 version * Hunt the issue * Hunt the issue * Hunt the issue * Hunt the issue * Hunt the issue * Fix issue * Fix issue * Add tests * Black * Fix issue * Fix issue with disassembler test * Fix issue with disassembler test --- .../analysis/module/modules/ether_thief.py | 1 - mythril/analysis/solver.py | 28 ++++--- .../ethereum/function_managers/__init__.py | 2 + .../exponent_function_manager.py | 74 +++++++++++++++++++ .../keccak_function_manager.py | 7 +- mythril/laser/ethereum/instructions.py | 36 +++------ mythril/laser/smt/function.py | 16 ++-- tests/disassembler_test.py | 5 +- tests/integration_tests/analysis_tests.py | 33 +++++++++ tests/laser/evm_testsuite/evm_test.py | 2 +- tests/laser/keccak_tests.py | 2 +- tests/testdata/input_contracts/flag_array.sol | 17 +++++ tests/testdata/inputs/flag_array.sol.o | 1 + 13 files changed, 174 insertions(+), 50 deletions(-) create mode 100644 mythril/laser/ethereum/function_managers/__init__.py create mode 100644 mythril/laser/ethereum/function_managers/exponent_function_manager.py rename mythril/laser/ethereum/{ => function_managers}/keccak_function_manager.py (96%) create mode 100644 tests/integration_tests/analysis_tests.py create mode 100644 tests/testdata/input_contracts/flag_array.sol create mode 100644 tests/testdata/inputs/flag_array.sol.o diff --git a/mythril/analysis/module/modules/ether_thief.py b/mythril/analysis/module/modules/ether_thief.py index 7eb08872..a0a3cf97 100644 --- a/mythril/analysis/module/modules/ether_thief.py +++ b/mythril/analysis/module/modules/ether_thief.py @@ -76,7 +76,6 @@ class EtherThief(DetectionModule): # Pre-solve so we only add potential issues if the attacker's balance is increased. solver.get_model(constraints) - potential_issue = PotentialIssue( contract=state.environment.active_account.contract_name, function_name=state.environment.active_function_name, diff --git a/mythril/analysis/solver.py b/mythril/analysis/solver.py index e2abe169..65947485 100644 --- a/mythril/analysis/solver.py +++ b/mythril/analysis/solver.py @@ -1,22 +1,24 @@ """This module contains analysis module helpers to solve path constraints.""" +import logging from typing import Dict, List, Tuple, Union -from z3 import FuncInterp + import z3 +from z3 import FuncInterp -from mythril.support.model import get_model -from mythril.laser.ethereum.state.global_state import GlobalState -from mythril.laser.ethereum.state.constraints import Constraints -from mythril.laser.ethereum.keccak_function_manager import ( +from mythril.exceptions import UnsatError +from mythril.laser.ethereum.function_managers import ( + exponent_function_manager, keccak_function_manager, - hash_matcher, ) + +from mythril.laser.ethereum.state.constraints import Constraints +from mythril.laser.ethereum.state.global_state import GlobalState from mythril.laser.ethereum.transaction import BaseTransaction -from mythril.laser.smt import UGE, symbol_factory -from mythril.exceptions import UnsatError from mythril.laser.ethereum.transaction.transaction_models import ( ContractCreationTransaction, ) -import logging +from mythril.laser.smt import UGE, symbol_factory +from mythril.support.model import get_model log = logging.getLogger(__name__) @@ -65,7 +67,6 @@ def get_transaction_sequence( model = get_model(tx_constraints, minimize=minimize) except UnsatError: raise UnsatError - # Include creation account in initial state # Note: This contains the code, which should not exist until after the first tx initial_world_state = transaction_sequence[0].world_state @@ -121,7 +122,7 @@ def _replace_with_actual_sha( ): concrete_hashes = keccak_function_manager.get_concrete_hash_data(model) for tx in concrete_transactions: - if hash_matcher not in tx["input"]: + if keccak_function_manager.hash_matcher not in tx["input"]: continue if code is not None and code.bytecode in tx["input"]: s_index = len(code.bytecode) + 2 @@ -129,7 +130,10 @@ def _replace_with_actual_sha( s_index = 10 for i in range(s_index, len(tx["input"])): data_slice = tx["input"][i : i + 64] - if hash_matcher not in data_slice or len(data_slice) != 64: + if ( + keccak_function_manager.hash_matcher not in data_slice + or len(data_slice) != 64 + ): continue find_input = symbol_factory.BitVecVal(int(data_slice, 16), 256) input_ = None diff --git a/mythril/laser/ethereum/function_managers/__init__.py b/mythril/laser/ethereum/function_managers/__init__.py new file mode 100644 index 00000000..3fa564a3 --- /dev/null +++ b/mythril/laser/ethereum/function_managers/__init__.py @@ -0,0 +1,2 @@ +from .exponent_function_manager import exponent_function_manager +from .keccak_function_manager import keccak_function_manager diff --git a/mythril/laser/ethereum/function_managers/exponent_function_manager.py b/mythril/laser/ethereum/function_managers/exponent_function_manager.py new file mode 100644 index 00000000..95ee064b --- /dev/null +++ b/mythril/laser/ethereum/function_managers/exponent_function_manager.py @@ -0,0 +1,74 @@ +import logging +from typing import Dict, List, Optional, Tuple + +from ethereum import utils + +from mythril.laser.smt import ( + And, + BitVec, + Bool, + Function, + URem, + symbol_factory, +) + +log = logging.getLogger(__name__) + + +class ExponentFunctionManager: + """ + Uses an uninterpreted function for exponentiation with the following properties: + 1) power(a, b) > 0 + 2) if a = 256 => forall i if b = i then power(a, b) = (256 ^ i) % (2^256) + + Only these two properties are added as to handle indexing of boolean arrays. + Caution should be exercised when increasing the conditions since it severely affects + the solving time. + """ + + def __init__(self): + power = Function("Power", [256, 256], 256) + NUMBER_256 = symbol_factory.BitVecVal(256, 256) + self.concrete_constraints = And( + *[ + power(NUMBER_256, symbol_factory.BitVecVal(i, 256)) + == symbol_factory.BitVecVal(256 ** i, 256) + for i in range(0, 32) + ] + ) + self.concrete_constraints_sent = False + + def create_condition(self, base: BitVec, exponent: BitVec) -> Tuple[BitVec, Bool]: + """ + Creates a condition for exponentiation + :param base: The base of exponentiation + :param exponent: The exponent of the exponentiation + :return: Tuple of condition and the exponentiation result + """ + power = Function("Power", [256, 256], 256) + exponentiation = power(base, exponent) + + if exponent.symbolic is False and base.symbolic is False: + const_exponentiation = symbol_factory.BitVecVal( + pow(base.value, exponent.value, 2 ** 256), + 256, + annotations=base.annotations.union(exponent.annotations), + ) + constraint = const_exponentiation == exponentiation + return const_exponentiation, constraint + + constraint = exponentiation > 0 + if self.concrete_constraints_sent is False: + constraint = And(constraint, self.concrete_constraints) + self.concrete_constraints_sent = True + if base.value == 256: + constraint = And( + constraint, + power(base, URem(exponent, symbol_factory.BitVecVal(32, 256))) + == power(base, exponent), + ) + + return exponentiation, constraint + + +exponent_function_manager = ExponentFunctionManager() diff --git a/mythril/laser/ethereum/keccak_function_manager.py b/mythril/laser/ethereum/function_managers/keccak_function_manager.py similarity index 96% rename from mythril/laser/ethereum/keccak_function_manager.py rename to mythril/laser/ethereum/function_managers/keccak_function_manager.py index e427420a..9bac0b81 100644 --- a/mythril/laser/ethereum/keccak_function_manager.py +++ b/mythril/laser/ethereum/function_managers/keccak_function_manager.py @@ -17,7 +17,6 @@ import logging TOTAL_PARTS = 10 ** 40 PART = (2 ** 256 - 1) // TOTAL_PARTS INTERVAL_DIFFERENCE = 10 ** 30 -hash_matcher = "fffffff" # This is usually the prefix for the hash in the output log = logging.getLogger(__name__) @@ -32,6 +31,8 @@ class KeccakFunctionManager: For more info https://files.sri.inf.ethz.ch/website/papers/sp20-verx.pdf """ + hash_matcher = "fffffff" # This is usually the prefix for the hash in the output + def __init__(self): self.store_function = {} # type: Dict[int, Tuple[Function, Function]] self.interval_hook_for_size = {} # type: Dict[int, int] @@ -65,8 +66,8 @@ class KeccakFunctionManager: try: func, inverse = self.store_function[length] except KeyError: - func = Function("keccak256_{}".format(length), length, 256) - inverse = Function("keccak256_{}-1".format(length), 256, length) + func = Function("keccak256_{}".format(length), [length], 256) + inverse = Function("keccak256_{}-1".format(length), [256], length) self.store_function[length] = (func, inverse) self.hash_result_store[length] = [] return func, inverse diff --git a/mythril/laser/ethereum/instructions.py b/mythril/laser/ethereum/instructions.py index 440606b7..676fa9e0 100644 --- a/mythril/laser/ethereum/instructions.py +++ b/mythril/laser/ethereum/instructions.py @@ -8,6 +8,7 @@ from typing import cast, Callable, List, Union from mythril.laser.smt import ( Extract, Expression, + Function, UDiv, simplify, Concat, @@ -32,7 +33,11 @@ from mythril.laser.ethereum.state.calldata import ConcreteCalldata, SymbolicCall import mythril.laser.ethereum.util as helper from mythril.laser.ethereum import util -from mythril.laser.ethereum.keccak_function_manager import keccak_function_manager +from mythril.laser.ethereum.function_managers import ( + keccak_function_manager, + exponent_function_manager, +) + from mythril.laser.ethereum.call import ( get_call_parameters, native_call, @@ -601,30 +606,11 @@ class Instruction: """ state = global_state.mstate base, exponent = util.pop_bitvec(state), util.pop_bitvec(state) - - if base.symbolic or exponent.symbolic: - - state.stack.append( - global_state.new_bitvec( - "invhash(" - + str(hash(simplify(base))) - + ")**invhash(" - + str(hash(simplify(exponent))) - + ")", - 256, - base.annotations.union(exponent.annotations), - ) - ) # Hash is used because str(symbol) takes a long time to be converted to a string - else: - - state.stack.append( - symbol_factory.BitVecVal( - pow(base.value, exponent.value, 2 ** 256), - 256, - annotations=base.annotations.union(exponent.annotations), - ) - ) - + exponentiation, constraint = exponent_function_manager.create_condition( + base, exponent + ) + state.stack.append(exponentiation) + global_state.world_state.constraints.append(constraint) return [global_state] @StateTransition() diff --git a/mythril/laser/smt/function.py b/mythril/laser/smt/function.py index 77451871..a6a6bcda 100644 --- a/mythril/laser/smt/function.py +++ b/mythril/laser/smt/function.py @@ -1,4 +1,4 @@ -from typing import cast +from typing import cast, List, Any, Set import z3 from mythril.laser.smt.bitvec import BitVec @@ -7,19 +7,23 @@ from mythril.laser.smt.bitvec import BitVec class Function: """An uninterpreted function.""" - def __init__(self, name: str, domain: int, value_range: int): + def __init__(self, name: str, domain: List[int], value_range: int): """Initializes an uninterpreted function. :param name: Name of the Function :param domain: The domain for the Function (10 -> all the values that a bv of size 10 could take) :param value_range: The range for the values of the function (10 -> all the values that a bv of size 10 could take) """ - self.domain = z3.BitVecSort(domain) + self.domain = [] + for element in domain: + self.domain.append(z3.BitVecSort(element)) self.range = z3.BitVecSort(value_range) - self.raw = z3.Function(name, self.domain, self.range) + self.raw = z3.Function(name, *self.domain, self.range) - def __call__(self, item: BitVec) -> BitVec: + def __call__(self, *items) -> BitVec: """Function accessor, item can be symbolic.""" + annotations: Set[Any] = set().union(*[item.annotations for item in items]) return BitVec( - cast(z3.BitVecRef, self.raw(item.raw)), annotations=item.annotations + cast(z3.BitVecRef, self.raw(*[item.raw for item in items])), + annotations=annotations, ) diff --git a/tests/disassembler_test.py b/tests/disassembler_test.py index ffdc5f08..4654f3ac 100644 --- a/tests/disassembler_test.py +++ b/tests/disassembler_test.py @@ -1,6 +1,7 @@ from mythril.disassembler.disassembly import Disassembly from mythril.ethereum import util from tests import * +import os class DisassemblerTestCase(BaseTestCase): @@ -13,7 +14,9 @@ class DisassemblerTestCase(BaseTestCase): for input_file in TESTDATA_INPUTS.iterdir(): output_expected = TESTDATA_OUTPUTS_EXPECTED / (input_file.name + ".easm") output_current = TESTDATA_OUTPUTS_CURRENT / (input_file.name + ".easm") - + if os.path.isfile(output_current) is False: + # Ignore files which do not have output data + continue code = input_file.read_text() disassembly = Disassembly(code) output_current.write_text(disassembly.get_easm()) diff --git a/tests/integration_tests/analysis_tests.py b/tests/integration_tests/analysis_tests.py new file mode 100644 index 00000000..68f6ba93 --- /dev/null +++ b/tests/integration_tests/analysis_tests.py @@ -0,0 +1,33 @@ +import pytest +import json +import sys + +from subprocess import check_output +from tests import PROJECT_DIR, TESTDATA + +MYTH = str(PROJECT_DIR / "myth") +test_data = ( + ( + "flag_array.sol.o", + { + "TX_COUNT": 1, + "TX_OUTPUT": 1, + "MODULE": "EtherThief", + "ISSUE_COUNT": 1, + "ISSUE_NUMBER": 0, + }, + "0xab12585800000000000000000000000000000000000000000000000000000000000004d2", + ), +) + + +@pytest.mark.parametrize("file_name, tx_data, calldata", test_data) +def test_analysis(file_name, tx_data, calldata): + bytecode_file = str(TESTDATA / "inputs" / file_name) + command = f"""python3 {MYTH} analyze -f {bytecode_file} -t {tx_data["TX_COUNT"]} -o jsonv2 -m {tx_data["MODULE"]} --solver-timeout 60000""" + output = json.loads(check_output(command, shell=True).decode("UTF-8")) + + assert len(output[0]["issues"]) == tx_data["ISSUE_COUNT"] + test_case = output[0]["issues"][tx_data["ISSUE_NUMBER"]]["extra"]["testCases"][0] + print(test_case["steps"]) + assert test_case["steps"][tx_data["TX_OUTPUT"]]["input"] == calldata diff --git a/tests/laser/evm_testsuite/evm_test.py b/tests/laser/evm_testsuite/evm_test.py index c12ddd68..740be788 100644 --- a/tests/laser/evm_testsuite/evm_test.py +++ b/tests/laser/evm_testsuite/evm_test.py @@ -2,7 +2,7 @@ from mythril.laser.ethereum.svm import LaserEVM from mythril.laser.ethereum.state.account import Account from mythril.laser.ethereum.state.world_state import WorldState from mythril.laser.ethereum.time_handler import time_handler -from mythril.laser.ethereum.keccak_function_manager import keccak_function_manager +from mythril.laser.ethereum.function_managers import keccak_function_manager from mythril.disassembler.disassembly import Disassembly from mythril.laser.ethereum.transaction.concolic import execute_message_call from mythril.laser.smt import Expression, BitVec, symbol_factory diff --git a/tests/laser/keccak_tests.py b/tests/laser/keccak_tests.py index 8a3ae00e..0f83346d 100644 --- a/tests/laser/keccak_tests.py +++ b/tests/laser/keccak_tests.py @@ -1,5 +1,5 @@ from mythril.laser.smt import Solver, symbol_factory, And -from mythril.laser.ethereum.keccak_function_manager import keccak_function_manager +from mythril.laser.ethereum.function_managers import keccak_function_manager import z3 import pytest diff --git a/tests/testdata/input_contracts/flag_array.sol b/tests/testdata/input_contracts/flag_array.sol new file mode 100644 index 00000000..22c2aabc --- /dev/null +++ b/tests/testdata/input_contracts/flag_array.sol @@ -0,0 +1,17 @@ +pragma solidity ^0.8.0; + +contract BasicLiquidation { + bool[4096] _flags; + constructor() payable + { + require(msg.value == 0.1 ether); + _flags[1234] = true; + } + function extractMoney(uint256 idx) public payable + { + require(idx >= 0); + require(idx < 4096); + require(_flags[idx]); + payable(msg.sender).transfer(address(this).balance); + } +} \ No newline at end of file diff --git a/tests/testdata/inputs/flag_array.sol.o b/tests/testdata/inputs/flag_array.sol.o new file mode 100644 index 00000000..fc044655 --- /dev/null +++ b/tests/testdata/inputs/flag_array.sol.o @@ -0,0 +1 @@ +608060405267016345785d8a0000341461001857600080fd5b600160006104d2611000811061003157610030610055565b5b602091828204019190066101000a81548160ff021916908315150217905550610084565b7f4e487b7100000000000000000000000000000000000000000000000000000000600052603260045260246000fd5b6101a6806100936000396000f3fe60806040526004361061001e5760003560e01c8063ab12585814610023575b600080fd5b61003d600480360381019061003891906100ee565b61003f565b005b600081101561004d57600080fd5b611000811061005b57600080fd5b60008161100081106100705761006f610125565b5b602091828204019190069054906101000a900460ff1661008f57600080fd5b3373ffffffffffffffffffffffffffffffffffffffff166108fc479081150290604051600060405180830381858888f193505050501580156100d5573d6000803e3d6000fd5b5050565b6000813590506100e881610159565b92915050565b60006020828403121561010457610103610154565b5b6000610112848285016100d9565b91505092915050565b6000819050919050565b7f4e487b7100000000000000000000000000000000000000000000000000000000600052603260045260246000fd5b600080fd5b6101628161011b565b811461016d57600080fd5b5056fea264697066735822122038d1a63a64c5408c7008a0f3746ab94e43a04b5bc74f52e4869d3f15cf5b2b9e64736f6c63430008060033 \ No newline at end of file