Move asm to disassembler + cleanup

pull/603/head
Joran Honig 6 years ago
parent d10666a068
commit b30430252e
  1. 66
      mythril/disassembler/asm.py
  2. 3
      mythril/disassembler/disassembly.py

@ -1,51 +1,46 @@
import sys import sys
import re import re
from typing import Pattern, Match
from ethereum.opcodes import opcodes from ethereum.opcodes import opcodes
from mythril.ether import util from mythril.ether import util
regex_PUSH = re.compile('^PUSH(\d*)$') regex_PUSH = re.compile("^PUSH(\d*)$")
# Additional mnemonic to catch failed assertions # Additional mnemonic to catch failed assertions
opcodes[254] = ["ASSERT_FAIL", 0, 0, 0]
opcodes[254] = ['ASSERT_FAIL', 0, 0, 0]
def instruction_list_to_easm(instruction_list): def instruction_list_to_easm(instruction_list):
easm = "" result = ""
for instruction in instruction_list: for instruction in instruction_list:
result += "{} {}".format(instruction["address"], instruction["opcode"])
if "argument" in instruction:
result += " " + instruction["argument"]
result += "\n"
easm += str(instruction['address']) + " " + instruction['opcode'] return result
if 'argument' in instruction:
easm += " " + instruction['argument']
easm += "\n"
return easm
def easm_to_instruction_list(evm_assembly):
def easm_to_instruction_list(easm): regex_CODELINE: Pattern[str] = re.compile("^([A-Z0-9]+)(?:\s+([0-9a-fA-Fx]+))?$")
regex_CODELINE = re.compile('^([A-Z0-9]+)(?:\s+([0-9a-fA-Fx]+))?$')
instruction_list = [] instruction_list = []
codelines = easm.split('\n') for line in evm_assembly.split("\n"):
for line in codelines: match: Match[str] = re.search(regex_CODELINE, line)
m = re.search(regex_CODELINE, line) if not match:
if not m:
# Invalid code line # Invalid code line
continue continue
instruction = {'opcode': m.group(1)} instruction = {"opcode": match.group(1)}
if m.group(2): if match.group(2):
instruction['argument'] = m.group(2)[2:] instruction["argument"] = match.group(2)[2:]
instruction_list.append(instruction) instruction_list.append(instruction)
@ -70,13 +65,13 @@ def find_opcode_sequence(pattern, instruction_list):
for i in range(0, len(instruction_list) - pattern_length + 1): for i in range(0, len(instruction_list) - pattern_length + 1):
if instruction_list[i]['opcode'] in pattern[0]: if instruction_list[i]["opcode"] in pattern[0]:
matched = True matched = True
for j in range(1, len(pattern)): for j in range(1, len(pattern)):
if not (instruction_list[i + j]['opcode'] in pattern[j]): if not (instruction_list[i + j]["opcode"] in pattern[j]):
matched = False matched = False
break break
@ -99,7 +94,7 @@ def disassemble(bytecode):
while addr < length: while addr < length:
instruction = {'address': addr} instruction = {"address": addr}
try: try:
if sys.version_info > (3, 0): if sys.version_info > (3, 0):
@ -110,21 +105,20 @@ def disassemble(bytecode):
except KeyError: except KeyError:
# invalid opcode # invalid opcode
instruction_list.append({'address': addr, 'opcode': "INVALID"}) instruction_list.append({"address": addr, "opcode": "INVALID"})
addr += 1 addr += 1
continue continue
instruction['opcode'] = opcode[0] instruction["opcode"] = opcode[0]
m = re.search(regex_PUSH, opcode[0]) m = re.search(regex_PUSH, opcode[0])
if m: if m:
argument = bytecode[addr+1:addr+1+int(m.group(1))] argument = bytecode[addr + 1 : addr + 1 + int(m.group(1))]
instruction['argument'] = "0x" + argument.hex() instruction["argument"] = "0x" + argument.hex()
addr += int(m.group(1)) addr += int(m.group(1))
instruction_list.append(instruction) instruction_list.append(instruction)
addr += 1 addr += 1
@ -139,14 +133,14 @@ def assemble(instruction_list):
for instruction in instruction_list: for instruction in instruction_list:
try: try:
opcode = get_opcode_from_name(instruction['opcode']) opcode = get_opcode_from_name(instruction["opcode"])
except RuntimeError: except RuntimeError:
opcode = 0xbb opcode = 0xBB
bytecode += opcode.to_bytes(1, byteorder='big') bytecode += opcode.to_bytes(1, byteorder="big")
if 'argument' in instruction: if "argument" in instruction:
bytecode += util.safe_decode(instruction['argument']) bytecode += util.safe_decode(instruction["argument"])
return bytecode return bytecode

@ -1,4 +1,5 @@
from mythril.ether import asm, util from mythril.ether import util
from mythril.disassembler import asm
from mythril.support.signatures import SignatureDb from mythril.support.signatures import SignatureDb
import logging import logging

Loading…
Cancel
Save