Cleanup + documentation

pull/608/head
Joran Honig 6 years ago
parent d10666a068
commit 395cea71c4
  1. 114
      mythril/disassembler/disassembly.py
  2. 8
      mythril/laser/ethereum/svm.py

@ -4,57 +4,91 @@ import logging
class Disassembly(object): class Disassembly(object):
"""
Disassembly class
def __init__(self, code, enable_online_lookup=False): Stores bytecode, and its disassembly.
Additionally it will gather the following information on the existing functions in the disassembled code:
- function hashes
- function name to entry point mapping
- function entry point to function name mapping
"""
def __init__(self, code: str, enable_online_lookup: bool=False):
self.bytecode = code
self.instruction_list = asm.disassemble(util.safe_decode(code)) self.instruction_list = asm.disassemble(util.safe_decode(code))
self.func_hashes = [] self.func_hashes = []
self.func_to_addr = {} self.function_name_to_address = {}
self.addr_to_func = {} self.address_to_function_name = {}
self.bytecode = code
signatures = SignatureDb(enable_online_lookup=enable_online_lookup) # control if you want to have online sighash lookups signatures = SignatureDb(
enable_online_lookup=enable_online_lookup
) # control if you want to have online signature hash lookups
try: try:
signatures.open() # open from default locations signatures.open() # open from default locations
except FileNotFoundError: except FileNotFoundError:
logging.info("Missing function signature file. Resolving of function names from signature file disabled.") logging.info(
"Missing function signature file. Resolving of function names from signature file disabled."
# Parse jump table & resolve function names )
# Need to take from PUSH1 to PUSH4 because solc seems to remove excess 0s at the beginning for optimizing # Need to take from PUSH1 to PUSH4 because solc seems to remove excess 0s at the beginning for optimizing
jmptable_indices = asm.find_opcode_sequence([("PUSH1", "PUSH2", "PUSH3", "PUSH4"), ("EQ",)], jump_table_indices = asm.find_opcode_sequence(
self.instruction_list) [("PUSH1", "PUSH2", "PUSH3", "PUSH4"), ("EQ",)], self.instruction_list
)
for i in jmptable_indices:
func_hash = self.instruction_list[i]['argument'] for index in jump_table_indices:
function_hash, jump_target, function_name = _get_function_info(
# Append with missing 0s at the beginning index, self.instruction_list, signatures
func_hash = "0x" + func_hash[2:].rjust(8, "0") )
self.func_hashes.append(function_hash)
self.func_hashes.append(func_hash)
try: if jump_target is not None and function_name is not None:
# tries local cache, file and optional online lookup self.function_name_to_address[function_name] = jump_target
# may return more than one function signature. since we cannot probe for the correct one we'll use the first self.address_to_function_name[jump_target] = function_name
func_names = signatures.get(func_hash)
if len(func_names) > 1:
# ambigious result
func_name = "**ambiguous** %s" % func_names[0] # return first hit but note that result was ambiguous
else:
# only one item
func_name = func_names[0]
except KeyError:
func_name = "_function_" + func_hash
try:
offset = self.instruction_list[i + 2]['argument']
jump_target = int(offset, 16)
self.func_to_addr[func_name] = jump_target
self.addr_to_func[jump_target] = func_name
except:
continue
signatures.write() # store resolved signatures (potentially resolved online) signatures.write() # store resolved signatures (potentially resolved online)
def get_easm(self): def get_easm(self):
# todo: tintinweb - print funcsig resolved data from self.addr_to_func?
return asm.instruction_list_to_easm(self.instruction_list) return asm.instruction_list_to_easm(self.instruction_list)
def _get_function_info(index: int, instruction_list: list, signature_database: SignatureDb) -> (str, int, str):
"""
Finds the function information for a call table entry
Solidity uses the first 4 bytes of the calldata to indicate which function the message call should execute
The generated code that directs execution to the correct function looks like this:
- PUSH function_hash
- EQ
- PUSH entry_point
- JUMPI
This function takes an index that points to the first instruction, and from that finds out the function hash,
function entry and the function name.
:param index: Start of the entry pattern
:param instruction_list: Instruction list for the contract that is being analyzed
:param signature_database: Database used to map function hashes to their respective function names
:return: function hash, function entry point, function name
"""
# Append with missing 0s at the beginning
function_hash = "0x" + instruction_list[index]["argument"][2:].rjust(8, "0")
function_names = signature_database.get(function_hash)
if len(function_names) > 1:
# In this case there was an ambiguous result
function_name = (
"**ambiguous** {}".format(function_names[0])
)
elif len(function_names) == 1:
function_name = function_names[0]
else:
function_name = "_function_" + function_hash
try:
offset = instruction_list[index + 2]["argument"]
entry_point = int(offset, 16)
except IndexError:
return function_hash, None, None
return function_hash, entry_point, function_name

@ -1,4 +1,5 @@
import logging import logging
from mythril.disassembler.disassembly import Disassembly
from mythril.laser.ethereum.state import WorldState from mythril.laser.ethereum.state import WorldState
from mythril.laser.ethereum.transaction import TransactionStartSignal, TransactionEndSignal, \ from mythril.laser.ethereum.transaction import TransactionStartSignal, TransactionEndSignal, \
ContractCreationTransaction ContractCreationTransaction
@ -258,11 +259,10 @@ class LaserEVM:
address = state.environment.code.instruction_list[state.mstate.pc]['address'] address = state.environment.code.instruction_list[state.mstate.pc]['address']
environment = state.environment environment = state.environment
disassembly = environment.code disassembly: Disassembly = environment.code
if address in state.environment.code.addr_to_func: if address in disassembly.address_to_function_name:
# Enter a new function # Enter a new function
environment.active_function_name = disassembly.address_to_function_name[address]
environment.active_function_name = disassembly.addr_to_func[address]
new_node.flags |= NodeFlags.FUNC_ENTRY new_node.flags |= NodeFlags.FUNC_ENTRY
logging.debug( logging.debug(

Loading…
Cancel
Save