From 4f234e7ef36f8a9c53c19716f6a0bef7f6c7e892 Mon Sep 17 00:00:00 2001 From: "Dr. Sergey Pogodin" Date: Wed, 4 Apr 2018 21:52:19 +0200 Subject: [PATCH] CODE: LevelDB support Submission #252303 by N1k1tung to the challenge https://www.topcoder.com/challenges/30063861 --- README.md | 17 ++++ myth | 15 ++- mythril/leveldb/__init__.py | 0 mythril/leveldb/client.py | 181 ++++++++++++++++++++++++++++++++++++ mythril/leveldb/eth_db.py | 23 +++++ mythril/leveldb/state.py | 130 ++++++++++++++++++++++++++ setup.py | 5 +- 7 files changed, 367 insertions(+), 4 deletions(-) create mode 100644 mythril/leveldb/__init__.py create mode 100644 mythril/leveldb/client.py create mode 100644 mythril/leveldb/eth_db.py create mode 100644 mythril/leveldb/state.py diff --git a/README.md b/README.md index d7b22a74..8a479e74 100644 --- a/README.md +++ b/README.md @@ -233,6 +233,23 @@ $ wget https://raw.githubusercontent.com/b-mueller/mythril/master/signatures.jso When you analyze Solidity code, new function signatures are added to the database automatically. +### Use LevelDB directly + +If you want to directly use the LevelDB database of your local geth instance you can do so by specifying it's path with *--leveldb* option: + +```bash +$ myth --leveldb ./geth/chaindata -s "code#PUSH#" +$ myth --leveldb ./geth/chaindata -a 0xA692B965434F804BF7C39217E881F2c229befc2e --storage 0,10 +``` + +Default geth data directories are: + +* Mac: `~/Library/Ethereum` +* Linux: `~/.ethereum` +* Windows: `%APPDATA%\Ethereum` + +The chaindata LevelDB is located at `/geth/chaindata` + ## Credit - JSON RPC library is adapted from [ethjsonrpc](https://github.com/ConsenSys/ethjsonrpc) (it doesn't seem to be maintained anymore, and I needed to make some changes to it). diff --git a/myth b/myth index b3289bc8..742ec48e 100755 --- a/myth +++ b/myth @@ -31,6 +31,7 @@ from mythril.analysis.symbolic import SymExecWrapper from mythril.analysis.callgraph import generate_graph from mythril.analysis.security import fire_lasers from mythril.analysis.report import Report +from mythril.leveldb.client import EthLevelDB def searchCallback(code_hash, code, addresses, balances): @@ -83,6 +84,7 @@ options.add_argument('--solc-args', help='Extra arguments for solc') options.add_argument('--phrack', action='store_true', help='Phrack-style call graph') options.add_argument('--enable-physics', action='store_true', help='enable graph physics simulation') options.add_argument('-v', type=int, help='log level (0-2)', metavar='LOG_LEVEL') +options.add_argument('--leveldb', help='enable direct leveldb access operations', metavar='LEVELDB_PATH') rpc = parser.add_argument_group('RPC options') rpc.add_argument('-i', action='store_true', help='Preset: Infura Node service (Mainnet)') @@ -179,9 +181,15 @@ else: except KeyError: solc_binary = 'solc' +# Open LevelDB if specified + +if args.leveldb: + ethDB = EthLevelDB(args.leveldb) + eth = ethDB + # Establish RPC/IPC connection if necessary -if args.address or args.init_db: +if (args.address or args.init_db) and not args.leveldb: if args.i: eth = EthJsonRpc('mainnet.infura.io', 443, True) @@ -232,7 +240,10 @@ if args.search or args.init_db: contract_storage = get_persistent_storage(mythril_dir) if args.search: try: - contract_storage.search(args.search, searchCallback) + if not args.leveldb: + contract_storage.search(args.search, searchCallback) + else: + ethDB.search(args.search, searchCallback) except SyntaxError: exitWithError(args.outform, "Syntax error in search expression.") elif args.init_db: diff --git a/mythril/leveldb/__init__.py b/mythril/leveldb/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/mythril/leveldb/client.py b/mythril/leveldb/client.py new file mode 100644 index 00000000..e5458b40 --- /dev/null +++ b/mythril/leveldb/client.py @@ -0,0 +1,181 @@ +import plyvel +import binascii +import rlp +import hashlib +from ethereum import utils +from ethereum.block import BlockHeader, Block +from mythril.leveldb.state import State, Account +from mythril.leveldb.eth_db import ETH_DB +from mythril.ether.ethcontract import ETHContract, InstanceList + +# Per https://github.com/ethereum/go-ethereum/blob/master/core/database_util.go +# prefixes and suffixes for keys in geth +headerPrefix = b'h' # headerPrefix + num (uint64 big endian) + hash -> header +bodyPrefix = b'b' # bodyPrefix + num (uint64 big endian) + hash -> block body +numSuffix = b'n' # headerPrefix + num (uint64 big endian) + numSuffix -> hash +blockHashPrefix = b'H' # blockHashPrefix + hash -> num (uint64 big endian) +# known geth keys +headHeaderKey = b'LastHeader' # head (latest) header hash + +def _formatBlockNumber(number): + ''' + formats block number to uint64 big endian + ''' + return utils.zpad(utils.int_to_big_endian(number), 8) + +def _encode_hex(v): + ''' + encodes hash as hex + ''' + return '0x' + utils.encode_hex(v) + +class EthLevelDB(object): + ''' + Go-Ethereum LevelDB client class + ''' + + def __init__(self, path): + self.path = path + self.db = ETH_DB(path) + self.headBlockHeader = None + self.headState = None + self.all_contracts = None + self.active_contracts = None + self.instance_lists = None + + def get_all_contracts(self): + ''' + get all contracts + ''' + if not self.all_contracts: + self.all_contracts = [] + self.active_contracts = [] + self.instance_lists = [] + state = self._get_head_state() + accounts = state.get_all_accounts() + + for a in accounts: + if a.code is not None: + code = _encode_hex(a.code) + md5 = hashlib.md5() + md5.update(code.encode('UTF-8')) + contract_hash = md5.digest() + contract = ETHContract(code, name=contract_hash.hex()) + self.all_contracts.append(contract) + + if a.balance != 0: + md5 = InstanceList() + md5.add(_encode_hex(a.address), a.balance) + self.instance_lists.append(md5) + self.active_contracts.append(contract) + + return self.all_contracts + + def get_active_contracts(self): + ''' + get all contracts with non-zero balance + ''' + if not self.active_contracts: + self.get_all_contracts() # optimized + return self.active_contracts + + def search(self, expression, callback_func): + ''' + searches through non-zero balance contracts + ''' + contracts = self.get_active_contracts() + for i in range(0, len(contracts)): + if contracts[i].matches_expression(expression): + m = self.instance_lists[i] + callback_func(contracts[i].name, contracts[i], m.addresses, m.balances) + + def eth_getBlockHeaderByNumber(self, number): + ''' + gets block header by block number + ''' + hash = self._get_block_hash(number) + blockNumber = _formatBlockNumber(number) + return self._get_block_header(hash, blockNumber) + + def eth_getBlockByNumber(self, number): + ''' + gets block body by block number + ''' + blockHash = self._get_block_hash(number) + blockNumber = _formatBlockNumber(number) + bodyKey = bodyPrefix + blockNumber + blockHash + blockData = self.db.get(bodyKey) + body = rlp.decode(blockData, sedes=Block) + return body + + def eth_getCode(self, address): + ''' + gets account code + ''' + account = self._get_account(address) + return _encode_hex(account.code) + + def eth_getBalance(self, address): + ''' + gets account balance + ''' + account = self._get_account(address) + return account.balance + + def eth_getStorageAt(self, address, position): + ''' + gets account storage data at position + ''' + account = self._get_account(address) + return _encode_hex(utils.zpad(utils.encode_int(account.get_storage_data(position)), 32)) + + def _get_head_state(self): + ''' + gets head state + ''' + if not self.headState: + root = self._get_head_block().state_root + self.headState = State(self.db, root) + return self.headState + + def _get_account(self, address): + ''' + gets account by address + ''' + state = self._get_head_state() + accountAddress = binascii.a2b_hex(utils.remove_0x_head(address)) + return state.get_and_cache_account(accountAddress) + + def _get_block_hash(self, number): + ''' + gets block hash by block number + ''' + num = _formatBlockNumber(number) + hashKey = headerPrefix + num + numSuffix + return self.db.get(hashKey) + + def _get_head_block(self): + ''' + gets head block header + ''' + if not self.headBlockHeader: + hash = self.db.get(headHeaderKey) + num = self._get_block_number(hash) + self.headBlockHeader = self._get_block_header(hash, num) + return self.headBlockHeader + + def _get_block_number(self, hash): + ''' + gets block number by hash + ''' + numberKey = blockHashPrefix + hash + return self.db.get(numberKey) + + def _get_block_header(self, hash, num): + ''' + get block header by block header hash & number + ''' + headerKey = headerPrefix + num + hash + blockHeaderData = self.db.get(headerKey) + header = rlp.decode(blockHeaderData, sedes=BlockHeader) + return header \ No newline at end of file diff --git a/mythril/leveldb/eth_db.py b/mythril/leveldb/eth_db.py new file mode 100644 index 00000000..2e4dcf6a --- /dev/null +++ b/mythril/leveldb/eth_db.py @@ -0,0 +1,23 @@ +import plyvel +from ethereum.db import BaseDB +from ethereum import utils + +class ETH_DB(BaseDB): + ''' + adopts pythereum BaseDB using plyvel + ''' + + def __init__(self, path): + self.db = plyvel.DB(path) + + def get(self, key): + ''' + gets value for key + ''' + return self.db.get(key) + + def put(self, key, value): + ''' + puts value for key + ''' + self.db.put(key, value) \ No newline at end of file diff --git a/mythril/leveldb/state.py b/mythril/leveldb/state.py new file mode 100644 index 00000000..71b4856c --- /dev/null +++ b/mythril/leveldb/state.py @@ -0,0 +1,130 @@ +import rlp +import binascii +from ethereum.utils import normalize_address, hash32, trie_root, \ + big_endian_int, address, int256, encode_hex, encode_int, \ + big_endian_to_int, int_to_addr, zpad, parse_as_bin, parse_as_int, \ + decode_hex, sha3, is_string, is_numeric +from rlp.sedes import big_endian_int, Binary, binary, CountableList +from ethereum import utils +from ethereum import trie +from ethereum.trie import Trie +from ethereum.securetrie import SecureTrie + +BLANK_HASH = utils.sha3(b'') +BLANK_ROOT = utils.sha3rlp(b'') + +STATE_DEFAULTS = { + "txindex": 0, + "gas_used": 0, + "gas_limit": 3141592, + "block_number": 0, + "block_coinbase": '\x00' * 20, + "block_difficulty": 1, + "timestamp": 0, + "logs": [], + "receipts": [], + "bloom": 0, + "suicides": [], + "recent_uncles": {}, + "prev_headers": [], + "refunds": 0, +} + + +class Account(rlp.Serializable): + ''' + adjusted account from ethereum.state + ''' + + fields = [ + ('nonce', big_endian_int), + ('balance', big_endian_int), + ('storage', trie_root), + ('code_hash', hash32) + ] + + def __init__(self, nonce, balance, storage, code_hash, db, address): + self.db = db + self.address = address + super(Account, self).__init__(nonce, balance, storage, code_hash) + self.storage_cache = {} + self.storage_trie = SecureTrie(Trie(self.db)) + self.storage_trie.root_hash = self.storage + self.touched = False + self.existent_at_start = True + self._mutable = True + self.deleted = False + + @property + def code(self): + ''' + code rlp data + ''' + return self.db.get(self.code_hash) + + def get_storage_data(self, key): + ''' + get storage data + ''' + if key not in self.storage_cache: + v = self.storage_trie.get(utils.encode_int32(key)) + self.storage_cache[key] = utils.big_endian_to_int( + rlp.decode(v) if v else b'') + return self.storage_cache[key] + + @classmethod + def blank_account(cls, db, address, initial_nonce=0): + ''' + creates a blank account + ''' + db.put(BLANK_HASH, b'') + o = cls(initial_nonce, 0, trie.BLANK_ROOT, BLANK_HASH, db, address) + o.existent_at_start = False + return o + + def is_blank(self): + ''' + checks if is a blank account + ''' + return self.nonce == 0 and self.balance == 0 and self.code_hash == BLANK_HASH + +class State(): + ''' + adjusted state from ethereum.state + ''' + + def __init__(self, db, root): + self.db = db + self.trie = Trie(self.db, root) + self.secureTrie = SecureTrie(self.trie) + self.journal = [] + self.cache = {} + + def get_and_cache_account(self, address): + ''' + gets and caches an account for an addres, creates blank if not found + ''' + if address in self.cache: + return self.cache[address] + rlpdata = self.secureTrie.get(address) + if rlpdata == trie.BLANK_NODE and len(address) == 32: # support for hashed addresses + rlpdata = self.trie.get(address) + if rlpdata != trie.BLANK_NODE: + o = rlp.decode(rlpdata, Account, db=self.db, address=address) + else: + o = Account.blank_account( + self.db, address, 0) + self.cache[address] = o + o._mutable = True + o._cached_rlp = None + return o + + def get_all_accounts(self): + ''' + iterates through trie to get all items + ''' + accounts = [] + for addressHash, rlpdata in self.secureTrie.trie.to_dict().items(): + if rlpdata != trie.BLANK_NODE: + accounts.append(rlp.decode(rlpdata, Account, db=self.db, address=addressHash)) + return accounts \ No newline at end of file diff --git a/setup.py b/setup.py index 74050113..b72d5d87 100755 --- a/setup.py +++ b/setup.py @@ -290,10 +290,11 @@ setup( 'ethereum>=2.0.4', 'ZODB>=5.3.0', 'z3-solver>=4.5', - 'laser-ethereum==0.5.19', + 'laser-ethereum>=0.5.19', 'requests', 'BTrees', - 'py-solc' + 'py-solc', + 'plyvel' ], python_requires='>=3.5',