diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..f852f0fd --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +.DS_Store +.python-version +*.pyc diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 00000000..7125d68d --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,22 @@ +The MIT License (MIT) + +Copyright (c) 2015-present Dan Abramov + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + diff --git a/README.md b/README.md deleted file mode 100644 index 12676e73..00000000 --- a/README.md +++ /dev/null @@ -1,2 +0,0 @@ -# mythril -Assembler / Disassembler for Ethereum VM bytecode diff --git a/ether/__init__.py b/ether/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/ether/asm.py b/ether/asm.py new file mode 100644 index 00000000..23db8564 --- /dev/null +++ b/ether/asm.py @@ -0,0 +1,161 @@ +from ethereum import opcodes +import re +import binascii + + +regex_PUSH = re.compile('^PUSH(\d*)$') + + +def safe_decode(hex_encoded_string): + if (hex_encoded_string.startswith("0x")): + return hex_encoded_string[2:].decode("hex") + else: + return hex_encoded_string.decode("hex") + + +def disassembly_to_easm(disassembly): + easm = "" + + for instruction in disassembly: + easm += instruction['opcode'] + + if 'argument' in instruction: + easm += " 0x" + instruction['argument'] + + easm += "\n" + + return easm + + +def easm_to_disassembly(easm): + + regex_CODELINE = re.compile('^([A-Z0-9]+)(?:\s+([0-9a-fA-Fx]+))?$') + + disassembly = [] + + codelines = easm.split('\n') + + for line in codelines: + + m = re.search(regex_CODELINE, line) + + if not m: + # Invalid code line + continue + + instruction = {} + + instruction['opcode'] = m.group(1) + + if m.group(2): + instruction['argument'] = m.group(2)[2:] + + disassembly.append(instruction) + + return disassembly + + +def get_opcode_from_name(name): + + for opcode, value in opcodes.opcodes.items(): + + if name == value[0]: + + return opcode + + raise RuntimeError("Unknown opcode") + + +def find_opcode_sequence(pattern, disassembly): + match_indexes = [] + + pattern_length = len(pattern) + + for i in range(0, len(disassembly) - pattern_length): + + if disassembly[i]['opcode'] == pattern[0]: + + matched = True + + for j in range(1, len(pattern)): + + if not (disassembly[i + j]['opcode'] == pattern[j]): + matched = False + break + + if (matched): + match_indexes.append(i) + + return match_indexes + + +def resolve_functions(disassembly): + + function_stubs = find_opcode_sequence(['PUSH4', 'EQ', 'PUSH2', 'JUMPI'], disassembly) + + functions = [] + + for index in function_stubs: + func = {} + + func['hash'] = disassembly[index]['argument'] + func['address'] = disassembly[index + 2]['argument'] + + functions.append(func) + + return functions + + +def disassemble(encoded_bytecode): + + bytecode = safe_decode(encoded_bytecode) + + disassembly = [] + i = 0 + + while i < len(bytecode): + + instruction = {} + + try: + opcode = opcodes.opcodes[ord(bytecode[i])] + except KeyError: + # invalid opcode + disassembly.append({'opcode': "INVALID"}) + i += 1 + continue + + instruction['opcode'] = opcode[0] + + m = re.search(regex_PUSH, opcode[0]) + + if m: + argument = bytecode[i+1:i+1+int(m.group(1))] + instruction['argument'] = argument.encode("hex") + i += int(m.group(1)) + + disassembly.append(instruction) + + i += 1 + + return disassembly + + +def assemble(disassembly): + + bytecode = "" + + for instruction in disassembly: + + try: + opcode = get_opcode_from_name(instruction['opcode']) + except RuntimeError: + opcode = 0xbb + + bytecode += binascii.hexlify(chr(opcode)) + + if 'argument' in instruction: + + bytecode += instruction['argument'] + + return bytecode diff --git a/ether/jsonrpc.py b/ether/jsonrpc.py new file mode 100644 index 00000000..22e03fb8 --- /dev/null +++ b/ether/jsonrpc.py @@ -0,0 +1,12 @@ +from ethjsonrpc import EthJsonRpc + + +class EthJsonRpcWithDebug(EthJsonRpc): + + def getBlockRlp(self, number=0): + + return self._call('debug_getBlockRlp', [number]) + + def traceTransaction(self, txHash): + + return self._call('debug_traceTransaction', [txHash]) diff --git a/mythril.py b/mythril.py new file mode 100755 index 00000000..ca32c3d5 --- /dev/null +++ b/mythril.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python +"""mythril.py: Ethereum VM bytecode assembler/ disassembler + + http://www.github.com/b-mueller/mythril +""" + +from ether import asm +import sys +import argparse +import util + + +def exitWithError(message): + print(message) + sys.exit() + + +parser = argparse.ArgumentParser(description='Ethereum VM bytecode assembler/ disassembler') + +parser.add_argument('-d', '--disassemble', action='store_true', help='disassemble, use with -c or -t') +parser.add_argument('-a', '--assemble', nargs=1, help='produce bytecode from easm input file', metavar='INPUT FILE') +parser.add_argument('-c', '--code', nargs=1, help='bytecode string ("6060604052...")', metavar='BYTECODE') +parser.add_argument('-t', '--transaction_hash', help='id of contract creation transaction') +parser.add_argument('-o', '--outfile', help='file to write disassembly output to (e.g. "test.easm")') +parser.add_argument('--rpchost', nargs=1, help='RPC host') +parser.add_argument('--rpcport', nargs=1, help='RPC port') + +args = parser.parse_args() + + +if (args.disassemble): + + if (args.code): + disassembly = asm.disassemble(args.code[0]) + elif (args.transaction_hash): + + try: + bytecode = util.bytecode_from_blockchain(args.transaction_hash) + except Exception as e: + exitWithError("Exception loading bytecode via RPC: " + str(e.message)) + + disassembly = asm.disassemble(bytecode) + + else: + exitWithError("Disassembler: Pass either the -c or -t flag to specify the input bytecode") + + easm_text = asm.disassembly_to_easm(disassembly) + + if (args.outfile): + util.string_to_file(args.outfile, easm_text) + else: + sys.stdout.write(easm_text) + +elif (args.assemble): + + easm = util.file_to_string(args.assemble[0]) + + disassembly = asm.easm_to_disassembly(easm) + + print("0x" + asm.assemble(disassembly)) + +else: + + parser.print_help() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..64325034 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +ethereum==2.0.4 +ethjsonrpc==0.3.0 diff --git a/test.easm b/test.easm new file mode 100644 index 00000000..cfae8b47 --- /dev/null +++ b/test.easm @@ -0,0 +1,60 @@ +PUSH1 0x60 +PUSH1 0x40 +MSTORE +CALLDATASIZE +ISZERO +PUSH1 0x0a +JUMPI +JUMPDEST +PUSH1 0x5a +PUSH1 0x00 +DUP1 +SLOAD +PUSH1 0xff +AND +ISZERO +ISZERO +EQ +DUP1 +PUSH1 0x20 +JUMPI +POP +CALLVALUE +PUSH1 0x00 +EQ +JUMPDEST +DUP1 +PUSH1 0x52 +JUMPI +POP +PUSH1 0x01 +SLOAD +PUSH20 0xffffffffffffffffffffffffffffffffffffffff +AND +PUSH1 0x00 +CALLVALUE +PUSH1 0x60 +DUP3 +DUP2 +DUP2 +DUP2 +DUP6 +DUP9 +DUP4 +CALL +SWAP4 +POP +POP +POP +POP +ISZERO +JUMPDEST +ISZERO +PUSH1 0x5c +JUMPI +PUSH1 0x02 +JUMP +JUMPDEST +STOP +JUMPDEST +JUMP diff --git a/util.py b/util.py new file mode 100644 index 00000000..31c82a06 --- /dev/null +++ b/util.py @@ -0,0 +1,37 @@ +from ether.jsonrpc import EthJsonRpcWithDebug + + +def safe_decode(hex_encoded_string): + if (hex_encoded_string.startswith("0x")): + return hex_encoded_string[2:].decode("hex") + else: + return hex_encoded_string.decode("hex") + + +def bytecode_from_blockchain(creation_tx_hash, rpc_host='127.0.0.1', rpc_port=8545): + """Load bytecode from a local node via + creation_tx_hash = ID of transaction that created the contract. + """ + + eth = EthJsonRpcWithDebug('127.0.0.1', 8545) + + # receipt = eth.eth_getTransactionReceipt(creation_tx_hash) + + trace = eth.traceTransaction(creation_tx_hash) + + if trace['returnValue']: + + return trace['returnValue'] + + raise RuntimeError("Transaction trace didn't return any bytecode") + + +def string_to_file(filename, string): + outfile = open(filename, "w") + outfile.write(string) + outfile.close() + + +def file_to_string(filename): + infile = open(filename, "r") + return infile.read()