From 43fa9c73df038cda9dd3d4ccbf9e700fec2f36ba Mon Sep 17 00:00:00 2001 From: ggrieco-tob Date: Thu, 18 Apr 2019 09:46:47 -0300 Subject: [PATCH 01/28] first version of slither-simil --- setup.py | 3 +- utils/similarity/__init__.py | 0 utils/similarity/__main__.py | 107 +++++++++++++++++++++ utils/similarity/cache.py | 22 +++++ utils/similarity/encode.py | 168 +++++++++++++++++++++++++++++++++ utils/similarity/info.py | 47 +++++++++ utils/similarity/similarity.py | 6 ++ utils/similarity/test.py | 49 ++++++++++ utils/similarity/train.py | 37 ++++++++ 9 files changed, 438 insertions(+), 1 deletion(-) create mode 100644 utils/similarity/__init__.py create mode 100755 utils/similarity/__main__.py create mode 100644 utils/similarity/cache.py create mode 100644 utils/similarity/encode.py create mode 100644 utils/similarity/info.py create mode 100644 utils/similarity/similarity.py create mode 100755 utils/similarity/test.py create mode 100755 utils/similarity/train.py diff --git a/setup.py b/setup.py index 525839e69..7b8bead4c 100644 --- a/setup.py +++ b/setup.py @@ -15,7 +15,8 @@ setup( 'console_scripts': [ 'slither = slither.__main__:main', 'slither-check-upgradeability = utils.upgradeability.__main__:main', - 'slither-find-paths = utils.possible_paths.__main__:main' + 'slither-find-paths = utils.possible_paths.__main__:main', + 'slither-simil = utils.similarity.__main__:main' ] } ) diff --git a/utils/similarity/__init__.py b/utils/similarity/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/utils/similarity/__main__.py b/utils/similarity/__main__.py new file mode 100755 index 000000000..c456ae0a9 --- /dev/null +++ b/utils/similarity/__main__.py @@ -0,0 +1,107 @@ +#!/usr/bin/env python3 + +import argparse +import logging +import sys +import traceback +import operator +import numpy as np + +from .info import info +from .test import test +from .train import train + + +logging.basicConfig() +logger = logging.getLogger("Slither") + +slither_simil_usage = "USAGE" # TODO +modes = ["info", "test", "train"] + +def parse_args(): + parser = argparse.ArgumentParser(description='', + usage=slither_simil_usage) + + parser.add_argument('mode', + help="|".join(modes)) + + parser.add_argument('model', + help='model.bin') + + parser.add_argument('--solc', + help='solc path', + action='store', + default='solc') + + parser.add_argument('--filename', + action='store', + dest='filename', + help='contract.sol') + + parser.add_argument('--contract', + action='store', + dest='contract', + help='Contract') + + parser.add_argument('--filter', + action='store', + dest='filter', + help='Extension to filter contracts') + + parser.add_argument('--fname', + action='store', + dest='fname', + help='Function name') + + parser.add_argument('--input', + action='store', + dest='input', + help='File or directory used as input') + + parser.add_argument('--version', + help='displays the current version', + version="0.0", + action='version') + + if len(sys.argv) == 1: + parser.print_help(sys.stderr) + sys.exit(1) + + args = parser.parse_args() + return args + +# endregion +################################################################################### +################################################################################### +# region Main +################################################################################### +################################################################################### + +def main(): + args = parse_args() + + default_log = logging.INFO + logger.setLevel(default_log) + + try: + mode = args.mode + + if mode == "info": + info(args) + elif mode == "train": + train(args) + elif mode == "test": + test(args) + else: + logger.error('Invalid mode!. It should be one of these: %s' % ", ".join(modes)) + sys.exit(-1) + + except Exception: + logger.error('Error in %s' % args.filename) + logger.error(traceback.format_exc()) + sys.exit(-1) + +if __name__ == '__main__': + main() + +# endregion diff --git a/utils/similarity/cache.py b/utils/similarity/cache.py new file mode 100644 index 000000000..8093134cf --- /dev/null +++ b/utils/similarity/cache.py @@ -0,0 +1,22 @@ +import numpy as np + +from .encode import encode_contract, load_contracts + +def load_cache(infile, model, ext=None, solc='solc'): + cache = dict() + if infile.endswith(".npz"): + with np.load(infile) as data: + array = data['arr_0'][0] + for x,y in array: + cache[x] = y + else: + contracts = load_contracts(infile, ext=ext) + for contract in contracts: + for x,ir in encode_contract(contract, solc=solc).items(): + if ir != []: + y = " ".join(ir) + cache[x] = model.get_sentence_vector(y) + return cache + +def save_cache(cache, outfile): + np.savez(outfile,[np.array(list(cache.items()))]) diff --git a/utils/similarity/encode.py b/utils/similarity/encode.py new file mode 100644 index 000000000..75e022f7a --- /dev/null +++ b/utils/similarity/encode.py @@ -0,0 +1,168 @@ +import os +import sys + +from slither import Slither +from slither.slithir.operations import * +from slither.slithir.variables import * +from slither.core.declarations import * +from slither.solc_parsing.declarations.function import * + +from slither.solc_parsing.variables.state_variable import * +from slither.solc_parsing.variables.local_variable import * +from slither.solc_parsing.variables.local_variable_init_from_tuple import * + +def load_contracts(dirname, ext=None): + r = [] + walk = list(os.walk(dirname)) + for x, y, files in walk: + for f in files: + if ext is None or f.endswith(ext): + r.append(x + "/".join(y) + "/" + f) + return r + +def ntype(_type): + if type(_type) is not str: + _type = str(_type) + + if "struct" in _type: + return "struct" + elif "enum" in _type: + return "enum" + elif "tuple" in _type: + return "tuple" + elif "contract" in _type: + return "contract" + elif "mapping" in _type: + return "mapping" + elif "." in _type or _type[0].isupper(): + return "" + else: + return _type.replace(" ","_") + +def encode_ir(ir): + # operations + if isinstance(ir, Assignment): + return '({}):=({})'.format(encode_ir(ir.lvalue), encode_ir(ir.rvalue)) + if isinstance(ir, Index): + return 'index({})'.format(ntype(ir._type)) + if isinstance(ir, Member): + return 'member' #.format(ntype(ir._type)) + if isinstance(ir, Length): + return 'length' + if isinstance(ir, Balance): + return 'balance' + if isinstance(ir, Binary): + return 'binary({})'.format(ir.type_str) + if isinstance(ir, Unary): + return 'unary({})'.format(ir.type_str) + if isinstance(ir, Condition): + return 'condition({})'.format(encode_ir(ir.value)) + if isinstance(ir, NewStructure): + return 'new_structure' + if isinstance(ir, NewContract): + return 'new_contract' + if isinstance(ir, NewArray): + return 'new_array({})'.format(ntype(ir._array_type)) + if isinstance(ir, NewElementaryType): + return 'new_elementary({})'.format(ntype(ir._type)) + if isinstance(ir, Push): + return 'push({},{})'.format(encode_ir(ir.value), encode_ir(ir.lvalue)) + if isinstance(ir, Delete): + return 'delete({},{})'.format(encode_ir(ir.lvalue), encode_ir(ir.variable)) + if isinstance(ir, SolidityCall): + return 'solidity_call({})'.format(ir.function.full_name) + if isinstance(ir, InternalCall): + return 'internal_call({})'.format(ntype(ir._type_call)) + if isinstance(ir, EventCall): # is this useful? + return 'event' + if isinstance(ir, LibraryCall): + return 'library_call' + if isinstance(ir, InternalDynamicCall): + return 'internal_dynamic_call' + if isinstance(ir, HighLevelCall): # TODO: improve + return 'high_level_call' + if isinstance(ir, LowLevelCall): # TODO: improve + return 'low_level_call' + if isinstance(ir, TypeConversion): + return 'type_conversion({})'.format(ntype(ir.type)) + if isinstance(ir, Return): # this can be improved using values + return 'return' #.format(ntype(ir.type)) + if isinstance(ir, Transfer): + return 'transfer({})'.format(encode_ir(ir.call_value)) + if isinstance(ir, Send): + return 'send({})'.format(encode_ir(ir.call_value)) + if isinstance(ir, Unpack): # TODO: improve + return 'unpack' + if isinstance(ir, InitArray): # TODO: improve + return 'init_array' + if isinstance(ir, FunctionSolc): # TODO: investigate this + return 'function_solc' + + # variables + if isinstance(ir, Constant): + return 'constant({})'.format(ntype(ir._type)) + if isinstance(ir, SolidityVariableComposed): + return 'solidity_variable_composed({})'.format(ir.name) + if isinstance(ir, SolidityVariable): + return 'solidity_variable{}'.format(ir.name) + if isinstance(ir, TemporaryVariable): + return 'temporary_variable' + if isinstance(ir, ReferenceVariable): + return 'reference({})'.format(ntype(ir._type)) + if isinstance(ir, LocalVariableSolc): + return 'local_solc_variable({})'.format(ir._location) + if isinstance(ir, StateVariableSolc): + return 'state_solc_variable({})'.format(ntype(ir._type)) + if isinstance(ir, LocalVariableInitFromTupleSolc): + return 'local_variable_init_tuple' + if isinstance(ir, TupleVariable): + return 'tuple_variable' + + # default + else: + print(type(ir),"is missing encoding!") + #sys.exit(1) + return '' + +def encode_contract(filename, solc): + r = dict() + + # Init slither + try: + slither = Slither(filename, solc=solc) + except: + print("Compilation failed") + return r + + # Iterate over all the contracts + for contract in slither.contracts: + + # Iterate over all the functions + for function in contract.functions: + + # Dont explore inherited functions + if function.contract == contract: + + if function.nodes == []: + continue + + x = "-".join([filename,contract.name,function.name]) + + r[x] = [] + + # Iterate over the nodes of the function + for node in function.nodes: + + # Print the Solidity expression of the nodes + # And the SlithIR operations + if node.expression: + + #print('\tSolidity expression: {}'.format(node.expression)) + #print('\tSlithIR:') + for ir in node.irs: + #print(ir) + r[x].append(encode_ir(ir)) + #print('\t\t\t{}'.format(ir)) + return r + + diff --git a/utils/similarity/info.py b/utils/similarity/info.py new file mode 100644 index 000000000..46625eabd --- /dev/null +++ b/utils/similarity/info.py @@ -0,0 +1,47 @@ +import logging +import sys +import traceback + +from fastText import load_model +from .encode import encode_contract + +logging.basicConfig() +logger = logging.getLogger("Slither") + +def info(args): + + try: + model = args.model + model = load_model(model) + filename = args.filename + contract = args.contract + solc = args.solc + fname = args.fname + if filename is None and contract is None and fname is None: + print(args.model,"uses the following words:") + for word in model.get_words(): + print(word) + sys.exit(0) + + if filename is None or contract is None or fname is None: + logger.error('The encode mode requires filename, contract and fname parameters.') + sys.exit(-1) + + irs = encode_contract(filename, solc=solc) + if len(irs) == 0: + sys.exit(-1) + + x = "-".join([filename,contract,fname]) + y = " ".join(irs[x]) + + fvector = model.get_sentence_vector(y) + print("Function {} in contract {} is encoded as:".format(fname, contract)) + print(y) + print(fvector) + + except Exception: + logger.error('Error in %s' % args.filename) + logger.error(traceback.format_exc()) + sys.exit(-1) + + diff --git a/utils/similarity/similarity.py b/utils/similarity/similarity.py new file mode 100644 index 000000000..4cc3f2b35 --- /dev/null +++ b/utils/similarity/similarity.py @@ -0,0 +1,6 @@ +import numpy as np + +def similarity(v1, v2): + n1 = np.linalg.norm(v1) + n2 = np.linalg.norm(v2) + return np.dot(v1, v2) / n1 / n2 diff --git a/utils/similarity/test.py b/utils/similarity/test.py new file mode 100755 index 000000000..96a6e648e --- /dev/null +++ b/utils/similarity/test.py @@ -0,0 +1,49 @@ +import argparse +import logging +import sys +import traceback +import operator +import numpy as np + +from fastText import load_model +from .encode import encode_contract, load_contracts +from .cache import load_cache, save_cache +from .similarity import similarity + +logger = logging.getLogger("crytic-pred") + +def test(args): + + try: + model = args.model + model = load_model(model) + filename = args.filename + contract = args.contract + fname = args.fname + solc = args.solc + infile = args.input + ext = args.filter + if filename is None or contract is None or fname is None or infile is None: + logger.error('The test mode requires filename, contract, fname and input parameters.') + sys.exit(-1) + + irs = encode_contract(filename,solc=solc) + x = "-".join([filename,contract,fname]) + y = " ".join(irs[x]) + + fvector = model.get_sentence_vector(y) + cache = load_cache(infile, model, ext=ext, solc=solc) + #save_cache("cache.npz", cache) + + r = dict() + for x,y in cache.items(): + r[x] = similarity(fvector, y) + + r = sorted(r.items(), key=operator.itemgetter(1), reverse=True) + for x,score in r[:10]: + print(x,score) + + except Exception: + logger.error('Error in %s' % args.filename) + logger.error(traceback.format_exc()) + sys.exit(-1) diff --git a/utils/similarity/train.py b/utils/similarity/train.py new file mode 100755 index 000000000..83c7ea8bf --- /dev/null +++ b/utils/similarity/train.py @@ -0,0 +1,37 @@ +import argparse +import logging +import sys +import traceback +import operator + +from fastText import train_unsupervised +from .encode import encode_contract, load_contracts + +logger = logging.getLogger("crytic-pred") + +def train(args): + + try: + model_filename = args.model + solc = args.solc + dirname = args.input + + if dirname is None: + logger.error('The train mode requires the directory parameter.') + sys.exit(-1) + + contracts = load_contracts(dirname) + with open("data.txt", 'w') as f: + for contract in contracts: + for function,ir in encode_contract(contract,solc).items(): + if ir != []: + f.write(" ".join(ir)+"\n") + + model = train_unsupervised(input='data.txt', model='skipgram') + model.save_model(model_filename) + print(model.get_words()) + + except Exception: + logger.error('Error in %s' % args.filename) + logger.error(traceback.format_exc()) + sys.exit(-1) From 32f2f6f0d103baab01a1bbbc030f93aaeb0f3d99 Mon Sep 17 00:00:00 2001 From: ggrieco-tob Date: Thu, 18 Apr 2019 10:41:49 -0300 Subject: [PATCH 02/28] fixes + logger --- utils/similarity/__main__.py | 4 +--- utils/similarity/encode.py | 11 +++-------- utils/similarity/info.py | 2 +- utils/similarity/test.py | 5 ++++- utils/similarity/train.py | 2 +- 5 files changed, 10 insertions(+), 14 deletions(-) diff --git a/utils/similarity/__main__.py b/utils/similarity/__main__.py index c456ae0a9..2e2abd26c 100755 --- a/utils/similarity/__main__.py +++ b/utils/similarity/__main__.py @@ -5,13 +5,11 @@ import logging import sys import traceback import operator -import numpy as np from .info import info from .test import test from .train import train - logging.basicConfig() logger = logging.getLogger("Slither") @@ -19,7 +17,7 @@ slither_simil_usage = "USAGE" # TODO modes = ["info", "test", "train"] def parse_args(): - parser = argparse.ArgumentParser(description='', + parser = argparse.ArgumentParser(description='Code similarity detection tool', usage=slither_simil_usage) parser.add_argument('mode', diff --git a/utils/similarity/encode.py b/utils/similarity/encode.py index 75e022f7a..f6ad95517 100644 --- a/utils/similarity/encode.py +++ b/utils/similarity/encode.py @@ -11,6 +11,8 @@ from slither.solc_parsing.variables.state_variable import * from slither.solc_parsing.variables.local_variable import * from slither.solc_parsing.variables.local_variable_init_from_tuple import * +logger = logging.getLogger("Slither-simil") + def load_contracts(dirname, ext=None): r = [] walk = list(os.walk(dirname)) @@ -121,7 +123,6 @@ def encode_ir(ir): # default else: print(type(ir),"is missing encoding!") - #sys.exit(1) return '' def encode_contract(filename, solc): @@ -131,7 +132,7 @@ def encode_contract(filename, solc): try: slither = Slither(filename, solc=solc) except: - print("Compilation failed") + logger.error("Compilation failed") return r # Iterate over all the contracts @@ -152,17 +153,11 @@ def encode_contract(filename, solc): # Iterate over the nodes of the function for node in function.nodes: - # Print the Solidity expression of the nodes # And the SlithIR operations if node.expression: - - #print('\tSolidity expression: {}'.format(node.expression)) - #print('\tSlithIR:') for ir in node.irs: - #print(ir) r[x].append(encode_ir(ir)) - #print('\t\t\t{}'.format(ir)) return r diff --git a/utils/similarity/info.py b/utils/similarity/info.py index 46625eabd..947d9b40b 100644 --- a/utils/similarity/info.py +++ b/utils/similarity/info.py @@ -6,7 +6,7 @@ from fastText import load_model from .encode import encode_contract logging.basicConfig() -logger = logging.getLogger("Slither") +logger = logging.getLogger("Slither-simil") def info(args): diff --git a/utils/similarity/test.py b/utils/similarity/test.py index 96a6e648e..1ea803546 100755 --- a/utils/similarity/test.py +++ b/utils/similarity/test.py @@ -10,7 +10,7 @@ from .encode import encode_contract, load_contracts from .cache import load_cache, save_cache from .similarity import similarity -logger = logging.getLogger("crytic-pred") +logger = logging.getLogger("Slither-simil") def test(args): @@ -28,6 +28,9 @@ def test(args): sys.exit(-1) irs = encode_contract(filename,solc=solc) + if len(irs) == 0: + sys.exit(-1) + x = "-".join([filename,contract,fname]) y = " ".join(irs[x]) diff --git a/utils/similarity/train.py b/utils/similarity/train.py index 83c7ea8bf..0d99963de 100755 --- a/utils/similarity/train.py +++ b/utils/similarity/train.py @@ -7,7 +7,7 @@ import operator from fastText import train_unsupervised from .encode import encode_contract, load_contracts -logger = logging.getLogger("crytic-pred") +logger = logging.getLogger("Slither-simil") def train(args): From 7affd05610252185b38334d99df9caf2e9ff390c Mon Sep 17 00:00:00 2001 From: ggrieco-tob Date: Thu, 18 Apr 2019 11:06:24 -0300 Subject: [PATCH 03/28] handling fastText dependency --- utils/similarity/__init__.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/utils/similarity/__init__.py b/utils/similarity/__init__.py index e69de29bb..ffbc13f30 100644 --- a/utils/similarity/__init__.py +++ b/utils/similarity/__init__.py @@ -0,0 +1,12 @@ +# from https://stackoverflow.com/questions/563022/whats-python-good-practice-for-importing-and-offering-optional-features +import sys + +try: + import fastText +except ImportError: + fastText = None + +if fastText is None: + print("In order to use slither-simil, you need to install fastText 0.2.0:") + print("$ pip3 install https://github.com/facebookresearch/fastText/archive/0.2.0.zip --user") + sys.exit(-1) From 3fd50a40a6162cddeab4b11b3e42488b238f9d95 Mon Sep 17 00:00:00 2001 From: ggrieco-tob Date: Thu, 18 Apr 2019 15:55:01 -0300 Subject: [PATCH 04/28] fixes --- utils/similarity/encode.py | 35 ++++++++++++++++++++++++++++++----- utils/similarity/train.py | 5 +++-- 2 files changed, 33 insertions(+), 7 deletions(-) diff --git a/utils/similarity/encode.py b/utils/similarity/encode.py index f6ad95517..6c6c48906 100644 --- a/utils/similarity/encode.py +++ b/utils/similarity/encode.py @@ -6,6 +6,7 @@ from slither.slithir.operations import * from slither.slithir.variables import * from slither.core.declarations import * from slither.solc_parsing.declarations.function import * +from slither.core.solidity_types import * from slither.solc_parsing.variables.state_variable import * from slither.solc_parsing.variables.local_variable import * @@ -13,18 +14,43 @@ from slither.solc_parsing.variables.local_variable_init_from_tuple import * logger = logging.getLogger("Slither-simil") -def load_contracts(dirname, ext=None): +def load_contracts(dirname, ext=None, nsamples=None): r = [] walk = list(os.walk(dirname)) for x, y, files in walk: for f in files: if ext is None or f.endswith(ext): r.append(x + "/".join(y) + "/" + f) - return r + + if nsamples is None: + return r + else: + # TODO: shuffle + return r[:nsamples] def ntype(_type): - if type(_type) is not str: + if isinstance(_type, ElementaryType): + _type = str(_type) + elif isinstance(_type, ArrayType): + if isinstance(_type.type, ElementaryType): + _type = str(_type) + else: + _type = "user_defined_array" + elif isinstance(_type, Structure): + print(_type) _type = str(_type) + elif isinstance(_type, Enum): + print(_type) + _type = str(_type) + elif isinstance(_type, MappingType): + _type = str(_type) + elif isinstance(_type, UserDefinedType): + _type = "user_defined_type" # TODO: this could be Contract, Enum or Struct + else: + _type = str(_type) + + _type = _type.replace("_memory","") + _type = _type.replace("_storage_ref","") if "struct" in _type: return "struct" @@ -36,8 +62,6 @@ def ntype(_type): return "contract" elif "mapping" in _type: return "mapping" - elif "." in _type or _type[0].isupper(): - return "" else: return _type.replace(" ","_") @@ -46,6 +70,7 @@ def encode_ir(ir): if isinstance(ir, Assignment): return '({}):=({})'.format(encode_ir(ir.lvalue), encode_ir(ir.rvalue)) if isinstance(ir, Index): + #print(type(ir._type)) return 'index({})'.format(ntype(ir._type)) if isinstance(ir, Member): return 'member' #.format(ntype(ir._type)) diff --git a/utils/similarity/train.py b/utils/similarity/train.py index 0d99963de..6892c8c80 100755 --- a/utils/similarity/train.py +++ b/utils/similarity/train.py @@ -15,12 +15,13 @@ def train(args): model_filename = args.model solc = args.solc dirname = args.input + ext = args.filter if dirname is None: - logger.error('The train mode requires the directory parameter.') + logger.error('The train mode requires the input parameter.') sys.exit(-1) - contracts = load_contracts(dirname) + contracts = load_contracts(dirname, ext=ext, nsamples=None) with open("data.txt", 'w') as f: for contract in contracts: for function,ir in encode_contract(contract,solc).items(): From 03b318b55e5ff5b0db5c86b0082c69e5dce0ed40 Mon Sep 17 00:00:00 2001 From: ggrieco-tob Date: Fri, 19 Apr 2019 12:30:14 -0300 Subject: [PATCH 05/28] fixes + enable contract sampling during training --- utils/similarity/__main__.py | 6 ++++++ utils/similarity/encode.py | 4 ++-- utils/similarity/train.py | 9 ++++++--- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/utils/similarity/__main__.py b/utils/similarity/__main__.py index 2e2abd26c..9e9860fd8 100755 --- a/utils/similarity/__main__.py +++ b/utils/similarity/__main__.py @@ -51,6 +51,12 @@ def parse_args(): dest='fname', help='Function name') + parser.add_argument('--nsamples', + action='store', + type=int, + dest='nsamples', + help='Number of contract samples used for training') + parser.add_argument('--input', action='store', dest='input', diff --git a/utils/similarity/encode.py b/utils/similarity/encode.py index 6c6c48906..6630b1bfc 100644 --- a/utils/similarity/encode.py +++ b/utils/similarity/encode.py @@ -49,8 +49,8 @@ def ntype(_type): else: _type = str(_type) - _type = _type.replace("_memory","") - _type = _type.replace("_storage_ref","") + _type = _type.replace(" memory","") + _type = _type.replace(" storage ref","") if "struct" in _type: return "struct" diff --git a/utils/similarity/train.py b/utils/similarity/train.py index 6892c8c80..8f8e7a888 100755 --- a/utils/similarity/train.py +++ b/utils/similarity/train.py @@ -12,23 +12,26 @@ logger = logging.getLogger("Slither-simil") def train(args): try: + last_data_train_filename = "last_data_train.txt" model_filename = args.model solc = args.solc dirname = args.input ext = args.filter + nsamples = args.nsamples if dirname is None: logger.error('The train mode requires the input parameter.') sys.exit(-1) - contracts = load_contracts(dirname, ext=ext, nsamples=None) - with open("data.txt", 'w') as f: + contracts = load_contracts(dirname, ext=ext, nsamples=nsamples) + logger.info('Saving extracted data into', last_data_train_filename) + with open(last_data_train_filename, 'w') as f: for contract in contracts: for function,ir in encode_contract(contract,solc).items(): if ir != []: f.write(" ".join(ir)+"\n") - model = train_unsupervised(input='data.txt', model='skipgram') + model = train_unsupervised(input=last_data_train_filename, model='skipgram') model.save_model(model_filename) print(model.get_words()) From 8f2bad8905f9ba27bd0188fe6fb2d0f051d7a47f Mon Sep 17 00:00:00 2001 From: ggrieco-tob Date: Fri, 19 Apr 2019 17:45:25 -0300 Subject: [PATCH 06/28] fixes --- utils/similarity/__main__.py | 2 +- utils/similarity/cache.py | 2 +- utils/similarity/encode.py | 2 +- utils/similarity/test.py | 4 ++-- utils/similarity/train.py | 25 +++++++++++++++++++------ 5 files changed, 24 insertions(+), 11 deletions(-) diff --git a/utils/similarity/__main__.py b/utils/similarity/__main__.py index 9e9860fd8..b241f15cd 100755 --- a/utils/similarity/__main__.py +++ b/utils/similarity/__main__.py @@ -11,7 +11,7 @@ from .test import test from .train import train logging.basicConfig() -logger = logging.getLogger("Slither") +logger = logging.getLogger("Slither-simil") slither_simil_usage = "USAGE" # TODO modes = ["info", "test", "train"] diff --git a/utils/similarity/cache.py b/utils/similarity/cache.py index 8093134cf..b11bbf499 100644 --- a/utils/similarity/cache.py +++ b/utils/similarity/cache.py @@ -19,4 +19,4 @@ def load_cache(infile, model, ext=None, solc='solc'): return cache def save_cache(cache, outfile): - np.savez(outfile,[np.array(list(cache.items()))]) + np.savez(outfile,[np.array(cache)]) diff --git a/utils/similarity/encode.py b/utils/similarity/encode.py index 6630b1bfc..081b66671 100644 --- a/utils/similarity/encode.py +++ b/utils/similarity/encode.py @@ -172,7 +172,7 @@ def encode_contract(filename, solc): if function.nodes == []: continue - x = "-".join([filename,contract.name,function.name]) + x = (filename,contract.name,function.name) r[x] = [] diff --git a/utils/similarity/test.py b/utils/similarity/test.py index 1ea803546..4f28801e0 100755 --- a/utils/similarity/test.py +++ b/utils/similarity/test.py @@ -23,6 +23,7 @@ def test(args): solc = args.solc infile = args.input ext = args.filter + if filename is None or contract is None or fname is None or infile is None: logger.error('The test mode requires filename, contract, fname and input parameters.') sys.exit(-1) @@ -31,8 +32,7 @@ def test(args): if len(irs) == 0: sys.exit(-1) - x = "-".join([filename,contract,fname]) - y = " ".join(irs[x]) + y = " ".join(irs[(filename,contract,fname)]) fvector = model.get_sentence_vector(y) cache = load_cache(infile, model, ext=ext, solc=solc) diff --git a/utils/similarity/train.py b/utils/similarity/train.py index 8f8e7a888..cbe037cf0 100755 --- a/utils/similarity/train.py +++ b/utils/similarity/train.py @@ -3,9 +3,11 @@ import logging import sys import traceback import operator +import os from fastText import train_unsupervised -from .encode import encode_contract, load_contracts +from .encode import encode_contract, load_contracts +from .cache import save_cache logger = logging.getLogger("Slither-simil") @@ -25,15 +27,26 @@ def train(args): contracts = load_contracts(dirname, ext=ext, nsamples=nsamples) logger.info('Saving extracted data into', last_data_train_filename) + cache = [] with open(last_data_train_filename, 'w') as f: - for contract in contracts: - for function,ir in encode_contract(contract,solc).items(): + for filename in contracts: + #cache[filename] = dict() + for (filename, contract, function), ir in encode_contract(filename,solc).items(): if ir != []: - f.write(" ".join(ir)+"\n") - + x = " ".join(ir) + f.write(x+"\n") + cache.append((os.path.split(filename)[-1], contract, function, x)) + + logger.info('Starting training') model = train_unsupervised(input=last_data_train_filename, model='skipgram') + logger.info('Training complete') model.save_model(model_filename) - print(model.get_words()) + + for i,(filename, contract, function, irs) in enumerate(cache): + cache[i] = ((filename, contract, function), model.get_sentence_vector(irs)) + + logger.info('Saved cache in cache.npz') + save_cache(cache, "cache.npz") except Exception: logger.error('Error in %s' % args.filename) From d8d793881301c1a61831574387578612ba2aa240 Mon Sep 17 00:00:00 2001 From: ggrieco-tob Date: Fri, 19 Apr 2019 18:17:21 -0300 Subject: [PATCH 07/28] improved logging --- utils/similarity/train.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/utils/similarity/train.py b/utils/similarity/train.py index cbe037cf0..679f8b7df 100755 --- a/utils/similarity/train.py +++ b/utils/similarity/train.py @@ -26,7 +26,7 @@ def train(args): sys.exit(-1) contracts = load_contracts(dirname, ext=ext, nsamples=nsamples) - logger.info('Saving extracted data into', last_data_train_filename) + logger.info('Saving extracted data into %s', last_data_train_filename) cache = [] with open(last_data_train_filename, 'w') as f: for filename in contracts: @@ -40,13 +40,15 @@ def train(args): logger.info('Starting training') model = train_unsupervised(input=last_data_train_filename, model='skipgram') logger.info('Training complete') + logger.info('Saving model') model.save_model(model_filename) for i,(filename, contract, function, irs) in enumerate(cache): cache[i] = ((filename, contract, function), model.get_sentence_vector(irs)) - logger.info('Saved cache in cache.npz') + logger.info('Saving cache in cache.npz') save_cache(cache, "cache.npz") + logger.info('Done!') except Exception: logger.error('Error in %s' % args.filename) From 0978700a50802a5ec7d35fb1361e6a335c1b6f66 Mon Sep 17 00:00:00 2001 From: ggrieco-tob Date: Sat, 20 Apr 2019 18:16:19 -0300 Subject: [PATCH 08/28] added plot mode --- utils/similarity/__main__.py | 12 ++++++- utils/similarity/plot.py | 62 ++++++++++++++++++++++++++++++++++++ utils/similarity/test.py | 3 +- 3 files changed, 75 insertions(+), 2 deletions(-) create mode 100644 utils/similarity/plot.py diff --git a/utils/similarity/__main__.py b/utils/similarity/__main__.py index b241f15cd..50cc09a43 100755 --- a/utils/similarity/__main__.py +++ b/utils/similarity/__main__.py @@ -9,12 +9,13 @@ import operator from .info import info from .test import test from .train import train +from .plot import plot logging.basicConfig() logger = logging.getLogger("Slither-simil") slither_simil_usage = "USAGE" # TODO -modes = ["info", "test", "train"] +modes = ["info", "test", "train", "plot"] def parse_args(): parser = argparse.ArgumentParser(description='Code similarity detection tool', @@ -57,6 +58,13 @@ def parse_args(): dest='nsamples', help='Number of contract samples used for training') + parser.add_argument('--ntop', + action='store', + type=int, + dest='ntop', + default=10, + help='Number of more similar contracts to show for testing') + parser.add_argument('--input', action='store', dest='input', @@ -96,6 +104,8 @@ def main(): train(args) elif mode == "test": test(args) + elif mode == "plot": + plot(args) else: logger.error('Invalid mode!. It should be one of these: %s' % ", ".join(modes)) sys.exit(-1) diff --git a/utils/similarity/plot.py b/utils/similarity/plot.py new file mode 100644 index 000000000..84fcd69b9 --- /dev/null +++ b/utils/similarity/plot.py @@ -0,0 +1,62 @@ +import logging +import sys +import traceback +import operator +import numpy as np +import random + +from sklearn import decomposition +import matplotlib.pyplot as plt + +from fastText import load_model +from .cache import load_cache + +logger = logging.getLogger("crytic-pred") + +def plot(args): + + try: + model = args.model + model = load_model(model) + filename = args.filename + contract = args.contract + fname = args.fname + solc = args.solc + infile = args.input + ext = args.filter + + if contract is None or fname is None or infile is None: + logger.error('The plot mode requieres contract, fname and input parameters.') + sys.exit(-1) + + cache = load_cache(infile, model, ext=ext, solc=solc) + #save_cache("cache.npz", cache) + + data = list() + fs = list() + for (f,c,n),y in cache.items(): + if c == contract and n == fname: + fs.append(f) + data.append(y) + #r[x] = similarity(fvector, y) + + + data = np.array(data) + pca = decomposition.PCA(n_components=2) + tdata = pca.fit_transform(data) + plt.figure() + assert(len(tdata) == len(fs)) + for ([x,y],l) in zip(tdata, fs): + x = random.gauss(0, 0.01) + x + y = random.gauss(0, 0.01) + y + plt.scatter(x, y, c='blue') + plt.text(x-0.001,y+0.001, l.split("_")[1].replace(".sol.ast.compact.json","")) + + plt.show() + #r = sorted(r.items(), key=operator.itemgetter(1), reverse=True) + #for x,score in r[:10]: + + except Exception: + logger.error('Error in %s' % args.filename) + logger.error(traceback.format_exc()) + sys.exit(-1) diff --git a/utils/similarity/test.py b/utils/similarity/test.py index 4f28801e0..342551499 100755 --- a/utils/similarity/test.py +++ b/utils/similarity/test.py @@ -23,6 +23,7 @@ def test(args): solc = args.solc infile = args.input ext = args.filter + ntop = args.ntop if filename is None or contract is None or fname is None or infile is None: logger.error('The test mode requires filename, contract, fname and input parameters.') @@ -43,7 +44,7 @@ def test(args): r[x] = similarity(fvector, y) r = sorted(r.items(), key=operator.itemgetter(1), reverse=True) - for x,score in r[:10]: + for x,score in r[:ntop]: print(x,score) except Exception: From 1eb84785954d437b7f0b05d41b1a9d5fd6e83fdc Mon Sep 17 00:00:00 2001 From: ggrieco-tob Date: Sat, 20 Apr 2019 18:18:46 -0300 Subject: [PATCH 09/28] verification of sklearn (optional) dependency --- utils/similarity/__init__.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/utils/similarity/__init__.py b/utils/similarity/__init__.py index ffbc13f30..dc48cf579 100644 --- a/utils/similarity/__init__.py +++ b/utils/similarity/__init__.py @@ -10,3 +10,12 @@ if fastText is None: print("In order to use slither-simil, you need to install fastText 0.2.0:") print("$ pip3 install https://github.com/facebookresearch/fastText/archive/0.2.0.zip --user") sys.exit(-1) + +try: + import sklearn +except ImportError: + sklearn = None + +if sklearn is None: + print("In order to use plot mode in slither-simil, you need to install sklearn:") + print("$ pip3 install sklearn --user") From 2b4edbda8cf29454aec6008088e50684101a37c1 Mon Sep 17 00:00:00 2001 From: g Date: Sat, 20 Apr 2019 17:56:26 -0400 Subject: [PATCH 10/28] fixes --- utils/similarity/__init__.py | 13 ++----------- utils/similarity/plot.py | 33 ++++++++++++++++++++++----------- 2 files changed, 24 insertions(+), 22 deletions(-) diff --git a/utils/similarity/__init__.py b/utils/similarity/__init__.py index dc48cf579..5a1473b46 100644 --- a/utils/similarity/__init__.py +++ b/utils/similarity/__init__.py @@ -7,15 +7,6 @@ except ImportError: fastText = None if fastText is None: - print("In order to use slither-simil, you need to install fastText 0.2.0:") - print("$ pip3 install https://github.com/facebookresearch/fastText/archive/0.2.0.zip --user") + print("ERROR: in order to use slither-simil, you need to install fastText 0.2.0:") + print("$ pip3 install https://github.com/facebookresearch/fastText/archive/0.2.0.zip --user\n") sys.exit(-1) - -try: - import sklearn -except ImportError: - sklearn = None - -if sklearn is None: - print("In order to use plot mode in slither-simil, you need to install sklearn:") - print("$ pip3 install sklearn --user") diff --git a/utils/similarity/plot.py b/utils/similarity/plot.py index 84fcd69b9..d6bd75e33 100644 --- a/utils/similarity/plot.py +++ b/utils/similarity/plot.py @@ -3,19 +3,30 @@ import sys import traceback import operator import numpy as np +import tqdm import random -from sklearn import decomposition -import matplotlib.pyplot as plt - +try: + from sklearn import decomposition + import matplotlib.pyplot as plt +except ImportError: + decomposition = None + plt = None + from fastText import load_model from .cache import load_cache -logger = logging.getLogger("crytic-pred") +logger = logging.getLogger("Slither-simil") def plot(args): + if decomposition is None or plt is None: + print("ERROR: In order to use plot mode in slither-simil, you need to install sklearn and matplotlib:") + print("$ pip3 install sklearn matplotlib --user") + sys.exit(-1) + try: + model = args.model model = load_model(model) filename = args.filename @@ -29,32 +40,32 @@ def plot(args): logger.error('The plot mode requieres contract, fname and input parameters.') sys.exit(-1) + logger.info('Loading data..') cache = load_cache(infile, model, ext=ext, solc=solc) - #save_cache("cache.npz", cache) data = list() fs = list() + + logger.info('Procesing data..') for (f,c,n),y in cache.items(): if c == contract and n == fname: fs.append(f) data.append(y) - #r[x] = similarity(fvector, y) - data = np.array(data) pca = decomposition.PCA(n_components=2) tdata = pca.fit_transform(data) + + logger.info('Plotting data..') plt.figure() assert(len(tdata) == len(fs)) for ([x,y],l) in zip(tdata, fs): x = random.gauss(0, 0.01) + x y = random.gauss(0, 0.01) + y plt.scatter(x, y, c='blue') - plt.text(x-0.001,y+0.001, l.split("_")[1].replace(".sol.ast.compact.json","")) + #plt.text(x-0.001,y+0.001, l.split("_")[1].replace(".sol.ast.compact.json","")) - plt.show() - #r = sorted(r.items(), key=operator.itemgetter(1), reverse=True) - #for x,score in r[:10]: + plt.savefig('plot.png', bbox_inches='tight') except Exception: logger.error('Error in %s' % args.filename) From 92a0ca0e22cba978ff6738c1eee4797ebde7d0d0 Mon Sep 17 00:00:00 2001 From: ggrieco-tob Date: Tue, 7 May 2019 18:13:32 -0300 Subject: [PATCH 11/28] fixed bug in simil info --- utils/similarity/info.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/utils/similarity/info.py b/utils/similarity/info.py index 947d9b40b..76da37d99 100644 --- a/utils/similarity/info.py +++ b/utils/similarity/info.py @@ -1,5 +1,6 @@ import logging import sys +import os.path import traceback from fastText import load_model @@ -11,8 +12,13 @@ logger = logging.getLogger("Slither-simil") def info(args): try: + model = args.model - model = load_model(model) + if os.path.isfile(model): + model = load_model(model) + else: + model = None + filename = args.filename contract = args.contract solc = args.solc @@ -30,14 +36,15 @@ def info(args): irs = encode_contract(filename, solc=solc) if len(irs) == 0: sys.exit(-1) - - x = "-".join([filename,contract,fname]) - y = " ".join(irs[x]) - fvector = model.get_sentence_vector(y) + x = (filename,contract,fname) + y = " ".join(irs[x]) + print("Function {} in contract {} is encoded as:".format(fname, contract)) print(y) - print(fvector) + if model is not None: + fvector = model.get_sentence_vector(y) + print(fvector) except Exception: logger.error('Error in %s' % args.filename) From c41bddd3930773d697a666034438e02cddf56c2b Mon Sep 17 00:00:00 2001 From: ggrieco-tob Date: Tue, 7 May 2019 18:16:29 -0300 Subject: [PATCH 12/28] added basic tests for slither-simil --- .travis.yml | 1 + scripts/travis_test_simil.sh | 16 ++++++++++++++++ tests/simil/test_1.txt | 1 + 3 files changed, 18 insertions(+) create mode 100755 scripts/travis_test_simil.sh create mode 100644 tests/simil/test_1.txt diff --git a/.travis.yml b/.travis.yml index d3ccde3c2..b33582446 100644 --- a/.travis.yml +++ b/.travis.yml @@ -18,6 +18,7 @@ env: - TEST_SUITE=scripts/travis_test_cli.sh - TEST_SUITE=scripts/travis_test_printers.sh - TEST_SUITE=scripts/travis_test_slither_config.sh + - TEST_SUITE=scripts/travis_test_simil.sh branches: only: - master diff --git a/scripts/travis_test_simil.sh b/scripts/travis_test_simil.sh new file mode 100755 index 000000000..c4123191a --- /dev/null +++ b/scripts/travis_test_simil.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash + +### Test slither-simil + +DIR_TESTS="tests/simil" +slither-simil info "" --filename $DIR_TESTS/../complex_func.sol --contract Complex --fname complexExternalWrites --solc solc-0.4.25 > test_1.txt 2>&1 +DIFF=$(diff test_1.txt "$DIR_TESTS/test_1.txt") +if [ "$DIFF" != "" ] +then + echo "slither-simil failed" + cat test_1.txt + cat "$DIR_TESTS/test_1.txt" + exit -1 +fi + +rm test_1.txt diff --git a/tests/simil/test_1.txt b/tests/simil/test_1.txt new file mode 100644 index 000000000..8baef1b4a --- /dev/null +++ b/tests/simil/test_1.txt @@ -0,0 +1 @@ +abc From b00b37552089f8049e895e2ceff4ee227e0aefe9 Mon Sep 17 00:00:00 2001 From: ggrieco-tob Date: Tue, 7 May 2019 18:29:56 -0300 Subject: [PATCH 13/28] added installation of requisites --- scripts/travis_test_simil.sh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/scripts/travis_test_simil.sh b/scripts/travis_test_simil.sh index c4123191a..0d571aa82 100755 --- a/scripts/travis_test_simil.sh +++ b/scripts/travis_test_simil.sh @@ -1,5 +1,10 @@ #!/usr/bin/env bash +### Install requisites + +pip3.6 install pybind11 --user +pip3.6 install https://github.com/facebookresearch/fastText/archive/0.2.0.zip --user + ### Test slither-simil DIR_TESTS="tests/simil" From 2664045e6f21674976015f1a76732633a3d91e8c Mon Sep 17 00:00:00 2001 From: ggrieco-tob Date: Tue, 7 May 2019 18:39:02 -0300 Subject: [PATCH 14/28] added installation of requisites --- scripts/travis_test_simil.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/travis_test_simil.sh b/scripts/travis_test_simil.sh index 0d571aa82..d520ea748 100755 --- a/scripts/travis_test_simil.sh +++ b/scripts/travis_test_simil.sh @@ -2,8 +2,8 @@ ### Install requisites -pip3.6 install pybind11 --user -pip3.6 install https://github.com/facebookresearch/fastText/archive/0.2.0.zip --user +pip3.6 install pybind11 +pip3.6 install https://github.com/facebookresearch/fastText/archive/0.2.0.zip ### Test slither-simil From 96c9417705b1bdc134f2738fd4e856658e23ea5a Mon Sep 17 00:00:00 2001 From: ggrieco-tob Date: Tue, 7 May 2019 18:57:00 -0300 Subject: [PATCH 15/28] removed useless module --- utils/similarity/plot.py | 1 - 1 file changed, 1 deletion(-) diff --git a/utils/similarity/plot.py b/utils/similarity/plot.py index d6bd75e33..bfa4059f4 100644 --- a/utils/similarity/plot.py +++ b/utils/similarity/plot.py @@ -3,7 +3,6 @@ import sys import traceback import operator import numpy as np -import tqdm import random try: From ee37919ca76fd4b0fc44f6cec86163bee2a2e157 Mon Sep 17 00:00:00 2001 From: ggrieco-tob Date: Tue, 7 May 2019 19:07:25 -0300 Subject: [PATCH 16/28] fixed test --- tests/simil/test_1.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/simil/test_1.txt b/tests/simil/test_1.txt index 8baef1b4a..f722b9880 100644 --- a/tests/simil/test_1.txt +++ b/tests/simil/test_1.txt @@ -1 +1,2 @@ -abc +Function complexExternalWrites in contract Complex is encoded as: +new_contract (local_solc_variable(default)):=(temporary_variable) high_level_call high_level_call high_level_call high_level_call high_level_call new_contract (local_solc_variable(default)):=(temporary_variable) high_level_call new_contract (local_solc_variable(default)):=(temporary_variable) solidity_call(keccak256()) type_conversion(bytes4) low_level_call new_contract (local_solc_variable(default)):=(temporary_variable) solidity_call(keccak256()) type_conversion(bytes4) low_level_call From 5a1d30e1ab78379af792d990b31597fef4b12b08 Mon Sep 17 00:00:00 2001 From: ggrieco-tob Date: Thu, 9 May 2019 08:59:09 -0300 Subject: [PATCH 17/28] fixes --- utils/similarity/cache.py | 23 ++++++++--------------- utils/similarity/encode.py | 19 +++++++++++++++++++ utils/similarity/plot.py | 21 ++++++++++++++------- utils/similarity/test.py | 5 +++-- 4 files changed, 44 insertions(+), 24 deletions(-) diff --git a/utils/similarity/cache.py b/utils/similarity/cache.py index b11bbf499..f5973b4b3 100644 --- a/utils/similarity/cache.py +++ b/utils/similarity/cache.py @@ -1,21 +1,14 @@ import numpy as np -from .encode import encode_contract, load_contracts - -def load_cache(infile, model, ext=None, solc='solc'): +def load_cache(infile, nsamples=None): cache = dict() - if infile.endswith(".npz"): - with np.load(infile) as data: - array = data['arr_0'][0] - for x,y in array: - cache[x] = y - else: - contracts = load_contracts(infile, ext=ext) - for contract in contracts: - for x,ir in encode_contract(contract, solc=solc).items(): - if ir != []: - y = " ".join(ir) - cache[x] = model.get_sentence_vector(y) + with np.load(infile) as data: + array = data['arr_0'][0] + for i,(x,y) in enumerate(array): + cache[x] = y + if i == nsamples: + break + return cache def save_cache(cache, outfile): diff --git a/utils/similarity/encode.py b/utils/similarity/encode.py index 081b66671..8dbac9f3d 100644 --- a/utils/similarity/encode.py +++ b/utils/similarity/encode.py @@ -12,8 +12,27 @@ from slither.solc_parsing.variables.state_variable import * from slither.solc_parsing.variables.local_variable import * from slither.solc_parsing.variables.local_variable_init_from_tuple import * +from .cache import load_cache + logger = logging.getLogger("Slither-simil") +def load_and_encode(infile, model, ext=None, solc='solc', nsamples=None): + r = dict() + if infile.endswith(".npz"): + r = load_cache(infile, nsamples=nsamples) + else: + contracts = load_contracts(infile, ext=ext, nsamples=nsamples) + for contract in contracts: + for x,ir in encode_contract(contract, solc=solc).items(): + if ir != []: + y = " ".join(ir) + r[x] = model.get_sentence_vector(y) + + return r + + + + def load_contracts(dirname, ext=None, nsamples=None): r = [] walk = list(os.walk(dirname)) diff --git a/utils/similarity/plot.py b/utils/similarity/plot.py index bfa4059f4..83809faa3 100644 --- a/utils/similarity/plot.py +++ b/utils/similarity/plot.py @@ -13,7 +13,7 @@ except ImportError: plt = None from fastText import load_model -from .cache import load_cache +from .encode import load_and_encode logger = logging.getLogger("Slither-simil") @@ -34,23 +34,29 @@ def plot(args): solc = args.solc infile = args.input ext = args.filter + nsamples = args.nsamples - if contract is None or fname is None or infile is None: - logger.error('The plot mode requieres contract, fname and input parameters.') + if fname is None or infile is None: + logger.error('The plot mode requieres fname and input parameters.') sys.exit(-1) logger.info('Loading data..') - cache = load_cache(infile, model, ext=ext, solc=solc) + cache = load_and_encode(infile, model, ext=ext, solc=solc, nsamples=nsamples) + #cache = load_cache(infile, model, ext=ext, solc=solc) data = list() fs = list() logger.info('Procesing data..') for (f,c,n),y in cache.items(): - if c == contract and n == fname: + if (c == contract or contract is None) and n == fname: fs.append(f) data.append(y) - + + if len(data) == 0: + logger.error('No contract was found with function %s', fname) + sys.exit(-1) + data = np.array(data) pca = decomposition.PCA(n_components=2) tdata = pca.fit_transform(data) @@ -62,8 +68,9 @@ def plot(args): x = random.gauss(0, 0.01) + x y = random.gauss(0, 0.01) + y plt.scatter(x, y, c='blue') - #plt.text(x-0.001,y+0.001, l.split("_")[1].replace(".sol.ast.compact.json","")) + plt.text(x-0.001,y+0.001, l) + logger.info('Saving figure to plot.png..') plt.savefig('plot.png', bbox_inches='tight') except Exception: diff --git a/utils/similarity/test.py b/utils/similarity/test.py index 342551499..7fd7c7f56 100755 --- a/utils/similarity/test.py +++ b/utils/similarity/test.py @@ -6,7 +6,7 @@ import operator import numpy as np from fastText import load_model -from .encode import encode_contract, load_contracts +from .encode import encode_contract, load_and_encode from .cache import load_cache, save_cache from .similarity import similarity @@ -36,7 +36,7 @@ def test(args): y = " ".join(irs[(filename,contract,fname)]) fvector = model.get_sentence_vector(y) - cache = load_cache(infile, model, ext=ext, solc=solc) + cache = load_and_encode(infile, model, ext=ext, solc=solc) #save_cache("cache.npz", cache) r = dict() @@ -44,6 +44,7 @@ def test(args): r[x] = similarity(fvector, y) r = sorted(r.items(), key=operator.itemgetter(1), reverse=True) + logger.info("Reviewed %d functions, listing the %d most similar ones:", len(r), ntop) for x,score in r[:ntop]: print(x,score) From 1752a1abfba5d15ab90c8aaa628e94aec80f085b Mon Sep 17 00:00:00 2001 From: ggrieco-tob Date: Thu, 9 May 2019 14:26:20 -0300 Subject: [PATCH 18/28] added crytic-compile options into the slither-simil command line --- utils/similarity/__main__.py | 9 ++++----- utils/similarity/encode.py | 11 ++++------- 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/utils/similarity/__main__.py b/utils/similarity/__main__.py index 50cc09a43..f7eca662e 100755 --- a/utils/similarity/__main__.py +++ b/utils/similarity/__main__.py @@ -6,6 +6,8 @@ import sys import traceback import operator +from crytic_compile import cryticparser + from .info import info from .test import test from .train import train @@ -27,11 +29,6 @@ def parse_args(): parser.add_argument('model', help='model.bin') - parser.add_argument('--solc', - help='solc path', - action='store', - default='solc') - parser.add_argument('--filename', action='store', dest='filename', @@ -75,6 +72,8 @@ def parse_args(): version="0.0", action='version') + cryticparser.init(parser) + if len(sys.argv) == 1: parser.print_help(sys.stderr) sys.exit(1) diff --git a/utils/similarity/encode.py b/utils/similarity/encode.py index 8dbac9f3d..2e3cbd4a9 100644 --- a/utils/similarity/encode.py +++ b/utils/similarity/encode.py @@ -16,23 +16,20 @@ from .cache import load_cache logger = logging.getLogger("Slither-simil") -def load_and_encode(infile, model, ext=None, solc='solc', nsamples=None): +def load_and_encode(infile, model, ext=None, nsamples=None, **kwargs): r = dict() if infile.endswith(".npz"): r = load_cache(infile, nsamples=nsamples) else: contracts = load_contracts(infile, ext=ext, nsamples=nsamples) for contract in contracts: - for x,ir in encode_contract(contract, solc=solc).items(): + for x,ir in encode_contract(contract, **kwargs).items(): if ir != []: y = " ".join(ir) r[x] = model.get_sentence_vector(y) return r - - - def load_contracts(dirname, ext=None, nsamples=None): r = [] walk = list(os.walk(dirname)) @@ -169,12 +166,12 @@ def encode_ir(ir): print(type(ir),"is missing encoding!") return '' -def encode_contract(filename, solc): +def encode_contract(filename, **kwargs): r = dict() # Init slither try: - slither = Slither(filename, solc=solc) + slither = Slither(filename, **kwargs) except: logger.error("Compilation failed") return r From 6c613dde8cd8132aff83630209737533d97b9371 Mon Sep 17 00:00:00 2001 From: ggrieco-tob Date: Thu, 9 May 2019 15:21:47 -0300 Subject: [PATCH 19/28] fixed formating --- utils/similarity/encode.py | 5 +---- utils/similarity/plot.py | 5 ++--- utils/similarity/test.py | 7 +++++-- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/utils/similarity/encode.py b/utils/similarity/encode.py index 2e3cbd4a9..604390802 100644 --- a/utils/similarity/encode.py +++ b/utils/similarity/encode.py @@ -53,10 +53,8 @@ def ntype(_type): else: _type = "user_defined_array" elif isinstance(_type, Structure): - print(_type) _type = str(_type) elif isinstance(_type, Enum): - print(_type) _type = str(_type) elif isinstance(_type, MappingType): _type = str(_type) @@ -86,7 +84,6 @@ def encode_ir(ir): if isinstance(ir, Assignment): return '({}):=({})'.format(encode_ir(ir.lvalue), encode_ir(ir.rvalue)) if isinstance(ir, Index): - #print(type(ir._type)) return 'index({})'.format(ntype(ir._type)) if isinstance(ir, Member): return 'member' #.format(ntype(ir._type)) @@ -163,7 +160,7 @@ def encode_ir(ir): # default else: - print(type(ir),"is missing encoding!") + logger.error(type(ir),"is missing encoding!") return '' def encode_contract(filename, **kwargs): diff --git a/utils/similarity/plot.py b/utils/similarity/plot.py index 83809faa3..12ae322e4 100644 --- a/utils/similarity/plot.py +++ b/utils/similarity/plot.py @@ -20,8 +20,8 @@ logger = logging.getLogger("Slither-simil") def plot(args): if decomposition is None or plt is None: - print("ERROR: In order to use plot mode in slither-simil, you need to install sklearn and matplotlib:") - print("$ pip3 install sklearn matplotlib --user") + logger.error("ERROR: In order to use plot mode in slither-simil, you need to install sklearn and matplotlib:") + logger.error("$ pip3 install sklearn matplotlib --user") sys.exit(-1) try: @@ -42,7 +42,6 @@ def plot(args): logger.info('Loading data..') cache = load_and_encode(infile, model, ext=ext, solc=solc, nsamples=nsamples) - #cache = load_cache(infile, model, ext=ext, solc=solc) data = list() fs = list() diff --git a/utils/similarity/test.py b/utils/similarity/test.py index 7fd7c7f56..67685a3e8 100755 --- a/utils/similarity/test.py +++ b/utils/similarity/test.py @@ -44,9 +44,12 @@ def test(args): r[x] = similarity(fvector, y) r = sorted(r.items(), key=operator.itemgetter(1), reverse=True) - logger.info("Reviewed %d functions, listing the %d most similar ones:", len(r), ntop) + logger.info("Reviewed %d functions, listing the %d most similar ones:", len(r), ntop) + format_table = "{: <65} {: <20} {: <20} {: <10}" + logger.info(format_table.format(*["filename", "contract", "function", "score"])) for x,score in r[:ntop]: - print(x,score) + score = str(round(score, 3)) + logger.info(format_table.format(*(list(x)+[score]))) except Exception: logger.error('Error in %s' % args.filename) From ee706bf5a9b3806eebe97a1a6d75ed9f562229a7 Mon Sep 17 00:00:00 2001 From: ggrieco-tob Date: Thu, 9 May 2019 17:51:09 -0300 Subject: [PATCH 20/28] fixes and improvements in slither-simil --- utils/similarity/__main__.py | 15 +++++---------- utils/similarity/encode.py | 34 ++++++++++++++++++++++++---------- utils/similarity/info.py | 12 ++++++------ utils/similarity/plot.py | 14 +++++++------- utils/similarity/test.py | 13 +++++-------- utils/similarity/train.py | 6 ++---- 6 files changed, 49 insertions(+), 45 deletions(-) diff --git a/utils/similarity/__main__.py b/utils/similarity/__main__.py index f7eca662e..8c5d5a142 100755 --- a/utils/similarity/__main__.py +++ b/utils/similarity/__main__.py @@ -34,21 +34,16 @@ def parse_args(): dest='filename', help='contract.sol') - parser.add_argument('--contract', + parser.add_argument('--fname', action='store', - dest='contract', - help='Contract') + dest='fname', + help='Target function') - parser.add_argument('--filter', + parser.add_argument('--ext', action='store', - dest='filter', + dest='ext', help='Extension to filter contracts') - parser.add_argument('--fname', - action='store', - dest='fname', - help='Function name') - parser.add_argument('--nsamples', action='store', type=int, diff --git a/utils/similarity/encode.py b/utils/similarity/encode.py index 604390802..13438a137 100644 --- a/utils/similarity/encode.py +++ b/utils/similarity/encode.py @@ -13,10 +13,24 @@ from slither.solc_parsing.variables.local_variable import * from slither.solc_parsing.variables.local_variable_init_from_tuple import * from .cache import load_cache +from crytic_compile.platform.solc import InvalidCompilation + +simil_logger = logging.getLogger("Slither-simil") +compiler_logger = logging.getLogger("CryticCompile") +compiler_logger.setLevel(logging.CRITICAL) +slither_logger = logging.getLogger("Slither") +slither_logger.setLevel(logging.CRITICAL) + +def parse_target(target): + parts = target.split('.') + if len(parts) == 1: + return None, parts[0] + elif len(parts) == 2: + return parts + else: + simil_logger.error("Invalid target. It should be 'function' or 'Contract.function'") -logger = logging.getLogger("Slither-simil") - -def load_and_encode(infile, model, ext=None, nsamples=None, **kwargs): +def load_and_encode(infile, model, filter=None, nsamples=None, **kwargs): r = dict() if infile.endswith(".npz"): r = load_cache(infile, nsamples=nsamples) @@ -30,7 +44,7 @@ def load_and_encode(infile, model, ext=None, nsamples=None, **kwargs): return r -def load_contracts(dirname, ext=None, nsamples=None): +def load_contracts(dirname, ext=None, nsamples=None, **kwargs): r = [] walk = list(os.walk(dirname)) for x, y, files in walk: @@ -160,17 +174,17 @@ def encode_ir(ir): # default else: - logger.error(type(ir),"is missing encoding!") + simil_logger.error(type(ir),"is missing encoding!") return '' -def encode_contract(filename, **kwargs): +def encode_contract(cfilename, **kwargs): r = dict() # Init slither - try: - slither = Slither(filename, **kwargs) + try: + slither = Slither(cfilename, **kwargs) except: - logger.error("Compilation failed") + simil_logger.error("Compilation failed for %s using %s", cfilename, kwargs['solc']) return r # Iterate over all the contracts @@ -185,7 +199,7 @@ def encode_contract(filename, **kwargs): if function.nodes == []: continue - x = (filename,contract.name,function.name) + x = (cfilename,contract.name,function.name) r[x] = [] diff --git a/utils/similarity/info.py b/utils/similarity/info.py index 76da37d99..7cfa0fb67 100644 --- a/utils/similarity/info.py +++ b/utils/similarity/info.py @@ -4,7 +4,7 @@ import os.path import traceback from fastText import load_model -from .encode import encode_contract +from .encode import parse_target, encode_contract logging.basicConfig() logger = logging.getLogger("Slither-simil") @@ -20,20 +20,20 @@ def info(args): model = None filename = args.filename - contract = args.contract + contract, fname = parse_target(args.fname) solc = args.solc - fname = args.fname + if filename is None and contract is None and fname is None: - print(args.model,"uses the following words:") + logger.info("%s uses the following words:",args.model) for word in model.get_words(): - print(word) + logger.info(word) sys.exit(0) if filename is None or contract is None or fname is None: logger.error('The encode mode requires filename, contract and fname parameters.') sys.exit(-1) - irs = encode_contract(filename, solc=solc) + irs = encode_contract(filename, **vars(args)) if len(irs) == 0: sys.exit(-1) diff --git a/utils/similarity/plot.py b/utils/similarity/plot.py index 12ae322e4..69d359f35 100644 --- a/utils/similarity/plot.py +++ b/utils/similarity/plot.py @@ -13,7 +13,7 @@ except ImportError: plt = None from fastText import load_model -from .encode import load_and_encode +from .encode import load_and_encode, parse_target logger = logging.getLogger("Slither-simil") @@ -29,19 +29,19 @@ def plot(args): model = args.model model = load_model(model) filename = args.filename - contract = args.contract - fname = args.fname - solc = args.solc + #contract = args.contract + contract, fname = parse_target(args.fname) + #solc = args.solc infile = args.input - ext = args.filter - nsamples = args.nsamples + #ext = args.filter + #nsamples = args.nsamples if fname is None or infile is None: logger.error('The plot mode requieres fname and input parameters.') sys.exit(-1) logger.info('Loading data..') - cache = load_and_encode(infile, model, ext=ext, solc=solc, nsamples=nsamples) + cache = load_and_encode(infile, **vars(args)) data = list() fs = list() diff --git a/utils/similarity/test.py b/utils/similarity/test.py index 67685a3e8..f821b6448 100755 --- a/utils/similarity/test.py +++ b/utils/similarity/test.py @@ -6,8 +6,8 @@ import operator import numpy as np from fastText import load_model -from .encode import encode_contract, load_and_encode -from .cache import load_cache, save_cache +from .encode import encode_contract, load_and_encode, parse_target +from .cache import save_cache from .similarity import similarity logger = logging.getLogger("Slither-simil") @@ -18,25 +18,22 @@ def test(args): model = args.model model = load_model(model) filename = args.filename - contract = args.contract - fname = args.fname - solc = args.solc + contract, fname = parse_target(args.fname) infile = args.input - ext = args.filter ntop = args.ntop if filename is None or contract is None or fname is None or infile is None: logger.error('The test mode requires filename, contract, fname and input parameters.') sys.exit(-1) - irs = encode_contract(filename,solc=solc) + irs = encode_contract(filename, **vars(args)) if len(irs) == 0: sys.exit(-1) y = " ".join(irs[(filename,contract,fname)]) fvector = model.get_sentence_vector(y) - cache = load_and_encode(infile, model, ext=ext, solc=solc) + cache = load_and_encode(infile, **vars(args)) #save_cache("cache.npz", cache) r = dict() diff --git a/utils/similarity/train.py b/utils/similarity/train.py index 679f8b7df..d3039be38 100755 --- a/utils/similarity/train.py +++ b/utils/similarity/train.py @@ -16,22 +16,20 @@ def train(args): try: last_data_train_filename = "last_data_train.txt" model_filename = args.model - solc = args.solc dirname = args.input - ext = args.filter nsamples = args.nsamples if dirname is None: logger.error('The train mode requires the input parameter.') sys.exit(-1) - contracts = load_contracts(dirname, ext=ext, nsamples=nsamples) + contracts = load_contracts(dirname, **vars(args)) logger.info('Saving extracted data into %s', last_data_train_filename) cache = [] with open(last_data_train_filename, 'w') as f: for filename in contracts: #cache[filename] = dict() - for (filename, contract, function), ir in encode_contract(filename,solc).items(): + for (filename, contract, function), ir in encode_contract(filename, **vars(args)).items(): if ir != []: x = " ".join(ir) f.write(x+"\n") From 802ab4edb258998d4ec2242c4ce2ab8496b9aa72 Mon Sep 17 00:00:00 2001 From: ggrieco-tob Date: Fri, 10 May 2019 09:24:03 -0300 Subject: [PATCH 21/28] fixes --- utils/similarity/encode.py | 3 +++ utils/similarity/info.py | 6 +++--- utils/similarity/plot.py | 2 +- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/utils/similarity/encode.py b/utils/similarity/encode.py index 13438a137..6c9f4700a 100644 --- a/utils/similarity/encode.py +++ b/utils/similarity/encode.py @@ -22,6 +22,9 @@ slither_logger = logging.getLogger("Slither") slither_logger.setLevel(logging.CRITICAL) def parse_target(target): + if target is None: + return None, None + parts = target.split('.') if len(parts) == 1: return None, parts[0] diff --git a/utils/similarity/info.py b/utils/similarity/info.py index 7cfa0fb67..431852571 100644 --- a/utils/similarity/info.py +++ b/utils/similarity/info.py @@ -40,11 +40,11 @@ def info(args): x = (filename,contract,fname) y = " ".join(irs[x]) - print("Function {} in contract {} is encoded as:".format(fname, contract)) - print(y) + logger.info("Function {} in contract {} is encoded as:".format(fname, contract)) + logger.info(y) if model is not None: fvector = model.get_sentence_vector(y) - print(fvector) + logger.info(fvector) except Exception: logger.error('Error in %s' % args.filename) diff --git a/utils/similarity/plot.py b/utils/similarity/plot.py index 69d359f35..1b4d07e02 100644 --- a/utils/similarity/plot.py +++ b/utils/similarity/plot.py @@ -61,7 +61,7 @@ def plot(args): tdata = pca.fit_transform(data) logger.info('Plotting data..') - plt.figure() + plt.figure(figsize=(20,10)) assert(len(tdata) == len(fs)) for ([x,y],l) in zip(tdata, fs): x = random.gauss(0, 0.01) + x From 089cb4a9e7bd51ddf2fd247858ed363c8c2ca9a5 Mon Sep 17 00:00:00 2001 From: ggrieco-tob Date: Fri, 10 May 2019 11:57:23 -0300 Subject: [PATCH 22/28] fixed test --- scripts/travis_test_simil.sh | 2 +- tests/simil/test_1.txt | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/travis_test_simil.sh b/scripts/travis_test_simil.sh index d520ea748..ccf332800 100755 --- a/scripts/travis_test_simil.sh +++ b/scripts/travis_test_simil.sh @@ -8,7 +8,7 @@ pip3.6 install https://github.com/facebookresearch/fastText/archive/0.2.0.zip ### Test slither-simil DIR_TESTS="tests/simil" -slither-simil info "" --filename $DIR_TESTS/../complex_func.sol --contract Complex --fname complexExternalWrites --solc solc-0.4.25 > test_1.txt 2>&1 +slither-simil info "" --filename $DIR_TESTS/../complex_func.sol --fname Complex.complexExternalWrites --solc solc-0.4.25 > test_1.txt 2>&1 DIFF=$(diff test_1.txt "$DIR_TESTS/test_1.txt") if [ "$DIFF" != "" ] then diff --git a/tests/simil/test_1.txt b/tests/simil/test_1.txt index f722b9880..1c6a7bb7e 100644 --- a/tests/simil/test_1.txt +++ b/tests/simil/test_1.txt @@ -1,2 +1,2 @@ -Function complexExternalWrites in contract Complex is encoded as: -new_contract (local_solc_variable(default)):=(temporary_variable) high_level_call high_level_call high_level_call high_level_call high_level_call new_contract (local_solc_variable(default)):=(temporary_variable) high_level_call new_contract (local_solc_variable(default)):=(temporary_variable) solidity_call(keccak256()) type_conversion(bytes4) low_level_call new_contract (local_solc_variable(default)):=(temporary_variable) solidity_call(keccak256()) type_conversion(bytes4) low_level_call +INFO:Slither-simil:Function complexExternalWrites in contract Complex is encoded as: +INFO:Slither-simil:new_contract (local_solc_variable(default)):=(temporary_variable) high_level_call high_level_call high_level_call high_level_call high_level_call new_contract (local_solc_variable(default)):=(temporary_variable) high_level_call new_contract (local_solc_variable(default)):=(temporary_variable) solidity_call(keccak256()) type_conversion(bytes4) low_level_call new_contract (local_solc_variable(default)):=(temporary_variable) solidity_call(keccak256()) type_conversion(bytes4) low_level_call From 06d70856bd60a539e29ae2e5a2e409a9b18d456f Mon Sep 17 00:00:00 2001 From: ggrieco-tob Date: Fri, 10 May 2019 12:00:22 -0300 Subject: [PATCH 23/28] fix --- utils/similarity/__init__.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/utils/similarity/__init__.py b/utils/similarity/__init__.py index 5a1473b46..e8ce679e9 100644 --- a/utils/similarity/__init__.py +++ b/utils/similarity/__init__.py @@ -4,9 +4,6 @@ import sys try: import fastText except ImportError: - fastText = None - -if fastText is None: print("ERROR: in order to use slither-simil, you need to install fastText 0.2.0:") print("$ pip3 install https://github.com/facebookresearch/fastText/archive/0.2.0.zip --user\n") sys.exit(-1) From 7f39c7721995c66811b7ebb90a86fbba740140ef Mon Sep 17 00:00:00 2001 From: ggrieco-tob Date: Fri, 10 May 2019 12:23:51 -0300 Subject: [PATCH 24/28] improved module loading --- utils/similarity/__init__.py | 10 +--------- utils/similarity/info.py | 2 +- utils/similarity/model.py | 11 +++++++++++ utils/similarity/plot.py | 6 +++--- utils/similarity/test.py | 6 +++--- utils/similarity/train.py | 2 +- 6 files changed, 20 insertions(+), 17 deletions(-) create mode 100644 utils/similarity/model.py diff --git a/utils/similarity/__init__.py b/utils/similarity/__init__.py index e8ce679e9..b31b92c60 100644 --- a/utils/similarity/__init__.py +++ b/utils/similarity/__init__.py @@ -1,9 +1 @@ -# from https://stackoverflow.com/questions/563022/whats-python-good-practice-for-importing-and-offering-optional-features -import sys - -try: - import fastText -except ImportError: - print("ERROR: in order to use slither-simil, you need to install fastText 0.2.0:") - print("$ pip3 install https://github.com/facebookresearch/fastText/archive/0.2.0.zip --user\n") - sys.exit(-1) +from .model import load_model diff --git a/utils/similarity/info.py b/utils/similarity/info.py index 431852571..e250aa991 100644 --- a/utils/similarity/info.py +++ b/utils/similarity/info.py @@ -3,7 +3,7 @@ import sys import os.path import traceback -from fastText import load_model +from .model import load_model from .encode import parse_target, encode_contract logging.basicConfig() diff --git a/utils/similarity/model.py b/utils/similarity/model.py new file mode 100644 index 000000000..5e1549058 --- /dev/null +++ b/utils/similarity/model.py @@ -0,0 +1,11 @@ +import sys + +try: + from fastText import load_model + from fastText import train_unsupervised +except ImportError: + print("ERROR: in order to use slither-simil, you need to install fastText 0.2.0:") + print("$ pip3 install https://github.com/facebookresearch/fastText/archive/0.2.0.zip --user\n") + sys.exit(-1) + + diff --git a/utils/similarity/plot.py b/utils/similarity/plot.py index 1b4d07e02..05d8bf921 100644 --- a/utils/similarity/plot.py +++ b/utils/similarity/plot.py @@ -5,15 +5,15 @@ import operator import numpy as np import random +from .model import load_model +from .encode import load_and_encode, parse_target + try: from sklearn import decomposition import matplotlib.pyplot as plt except ImportError: decomposition = None plt = None - -from fastText import load_model -from .encode import load_and_encode, parse_target logger = logging.getLogger("Slither-simil") diff --git a/utils/similarity/test.py b/utils/similarity/test.py index f821b6448..08542dd0d 100755 --- a/utils/similarity/test.py +++ b/utils/similarity/test.py @@ -5,9 +5,9 @@ import traceback import operator import numpy as np -from fastText import load_model -from .encode import encode_contract, load_and_encode, parse_target -from .cache import save_cache +from .model import load_model +from .encode import encode_contract, load_and_encode, parse_target +from .cache import save_cache from .similarity import similarity logger = logging.getLogger("Slither-simil") diff --git a/utils/similarity/train.py b/utils/similarity/train.py index d3039be38..e810450a6 100755 --- a/utils/similarity/train.py +++ b/utils/similarity/train.py @@ -5,7 +5,7 @@ import traceback import operator import os -from fastText import train_unsupervised +from .model import train_unsupervised from .encode import encode_contract, load_contracts from .cache import save_cache From d3f981699960a90f92b197b78004ce6a6c4105c6 Mon Sep 17 00:00:00 2001 From: ggrieco-tob Date: Fri, 10 May 2019 12:55:43 -0300 Subject: [PATCH 25/28] more fixes --- utils/similarity/__main__.py | 34 +++++++++++++--------------------- utils/similarity/cache.py | 9 ++++++++- utils/similarity/model.py | 2 -- 3 files changed, 21 insertions(+), 24 deletions(-) diff --git a/utils/similarity/__main__.py b/utils/similarity/__main__.py index 8c5d5a142..dc4d9da6a 100755 --- a/utils/similarity/__main__.py +++ b/utils/similarity/__main__.py @@ -16,12 +16,10 @@ from .plot import plot logging.basicConfig() logger = logging.getLogger("Slither-simil") -slither_simil_usage = "USAGE" # TODO modes = ["info", "test", "train", "plot"] def parse_args(): - parser = argparse.ArgumentParser(description='Code similarity detection tool', - usage=slither_simil_usage) + parser = argparse.ArgumentParser(description='Code similarity detection tool') parser.add_argument('mode', help="|".join(modes)) @@ -89,24 +87,18 @@ def main(): default_log = logging.INFO logger.setLevel(default_log) - try: - mode = args.mode - - if mode == "info": - info(args) - elif mode == "train": - train(args) - elif mode == "test": - test(args) - elif mode == "plot": - plot(args) - else: - logger.error('Invalid mode!. It should be one of these: %s' % ", ".join(modes)) - sys.exit(-1) - - except Exception: - logger.error('Error in %s' % args.filename) - logger.error(traceback.format_exc()) + mode = args.mode + + if mode == "info": + info(args) + elif mode == "train": + train(args) + elif mode == "test": + test(args) + elif mode == "plot": + plot(args) + else: + logger.error('Invalid mode!. It should be one of these: %s' % ", ".join(modes)) sys.exit(-1) if __name__ == '__main__': diff --git a/utils/similarity/cache.py b/utils/similarity/cache.py index f5973b4b3..efb748c99 100644 --- a/utils/similarity/cache.py +++ b/utils/similarity/cache.py @@ -1,4 +1,11 @@ -import numpy as np +import sys + +try: + import numpy as np +except ImportError: + print("ERROR: in order to use slither-simil, you need to install numpy") + print("$ pip3 install numpy --user\n") + sys.exit(-1) def load_cache(infile, nsamples=None): cache = dict() diff --git a/utils/similarity/model.py b/utils/similarity/model.py index 5e1549058..4f3412113 100644 --- a/utils/similarity/model.py +++ b/utils/similarity/model.py @@ -7,5 +7,3 @@ except ImportError: print("ERROR: in order to use slither-simil, you need to install fastText 0.2.0:") print("$ pip3 install https://github.com/facebookresearch/fastText/archive/0.2.0.zip --user\n") sys.exit(-1) - - From 2047ca6e886a54f5ceb5f8782483101c17de7761 Mon Sep 17 00:00:00 2001 From: Josselin Date: Mon, 13 May 2019 12:24:25 +0100 Subject: [PATCH 26/28] Minor improvements in slither-similar: - Fix import in utils.similarity.encore - Improve slither-similari help - Use contract.functions_not_inherited rather than contracts.functions + if function.contract == contract --- utils/similarity/__main__.py | 2 +- utils/similarity/encode.py | 51 +++++++++++++++++------------------- 2 files changed, 25 insertions(+), 28 deletions(-) diff --git a/utils/similarity/__main__.py b/utils/similarity/__main__.py index dc4d9da6a..239b68b62 100755 --- a/utils/similarity/__main__.py +++ b/utils/similarity/__main__.py @@ -19,7 +19,7 @@ logger = logging.getLogger("Slither-simil") modes = ["info", "test", "train", "plot"] def parse_args(): - parser = argparse.ArgumentParser(description='Code similarity detection tool') + parser = argparse.ArgumentParser(description='Code similarity detection tool. For usage, see https://github.com/crytic/slither/wiki/Code-Similarity-detector') parser.add_argument('mode', help="|".join(modes)) diff --git a/utils/similarity/encode.py b/utils/similarity/encode.py index 6c9f4700a..a9334a253 100644 --- a/utils/similarity/encode.py +++ b/utils/similarity/encode.py @@ -1,19 +1,19 @@ +import logging import os -import sys from slither import Slither -from slither.slithir.operations import * -from slither.slithir.variables import * -from slither.core.declarations import * -from slither.solc_parsing.declarations.function import * -from slither.core.solidity_types import * - -from slither.solc_parsing.variables.state_variable import * -from slither.solc_parsing.variables.local_variable import * -from slither.solc_parsing.variables.local_variable_init_from_tuple import * - +from slither.core.declarations import Structure, Enum, SolidityVariableComposed, SolidityVariable +from slither.core.solidity_types import ElementaryType, ArrayType, MappingType, UserDefinedType +from slither.slithir.operations import Assignment, Index, Member, Length, Balance, Binary, \ + Unary, Condition, NewArray, NewStructure, NewContract, NewElementaryType, \ + SolidityCall, Push, Delete, EventCall, LibraryCall, InternalDynamicCall, \ + HighLevelCall, LowLevelCall, TypeConversion, Return, Transfer, Send, Unpack, InitArray, InternalCall +from slither.slithir.variables import TemporaryVariable, TupleVariable, Constant, ReferenceVariable +from slither.solc_parsing.declarations.function import FunctionSolc +from slither.solc_parsing.variables.local_variable import LocalVariableSolc +from slither.solc_parsing.variables.local_variable_init_from_tuple import LocalVariableInitFromTupleSolc +from slither.solc_parsing.variables.state_variable import StateVariableSolc from .cache import load_cache -from crytic_compile.platform.solc import InvalidCompilation simil_logger = logging.getLogger("Slither-simil") compiler_logger = logging.getLogger("CryticCompile") @@ -194,25 +194,22 @@ def encode_contract(cfilename, **kwargs): for contract in slither.contracts: # Iterate over all the functions - for function in contract.functions: - - # Dont explore inherited functions - if function.contract == contract: + for function in contract.functions_not_inherited: - if function.nodes == []: - continue + if function.nodes == []: + continue - x = (cfilename,contract.name,function.name) + x = (cfilename,contract.name,function.name) - r[x] = [] + r[x] = [] - # Iterate over the nodes of the function - for node in function.nodes: - # Print the Solidity expression of the nodes - # And the SlithIR operations - if node.expression: - for ir in node.irs: - r[x].append(encode_ir(ir)) + # Iterate over the nodes of the function + for node in function.nodes: + # Print the Solidity expression of the nodes + # And the SlithIR operations + if node.expression: + for ir in node.irs: + r[x].append(encode_ir(ir)) return r From 140ea6fd0b39dd4bd41f8983b0e405c02cd6a655 Mon Sep 17 00:00:00 2001 From: Josselin Date: Mon, 13 May 2019 12:26:51 +0100 Subject: [PATCH 27/28] Remove Solidity dependencies --- utils/similarity/encode.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/utils/similarity/encode.py b/utils/similarity/encode.py index a9334a253..0418a82e7 100644 --- a/utils/similarity/encode.py +++ b/utils/similarity/encode.py @@ -2,17 +2,16 @@ import logging import os from slither import Slither -from slither.core.declarations import Structure, Enum, SolidityVariableComposed, SolidityVariable +from slither.core.declarations import Structure, Enum, SolidityVariableComposed, SolidityVariable, Function from slither.core.solidity_types import ElementaryType, ArrayType, MappingType, UserDefinedType +from slither.core.variables.local_variable import LocalVariable +from slither.core.variables.local_variable_init_from_tuple import LocalVariableInitFromTuple +from slither.core.variables.state_variable import StateVariable from slither.slithir.operations import Assignment, Index, Member, Length, Balance, Binary, \ Unary, Condition, NewArray, NewStructure, NewContract, NewElementaryType, \ SolidityCall, Push, Delete, EventCall, LibraryCall, InternalDynamicCall, \ HighLevelCall, LowLevelCall, TypeConversion, Return, Transfer, Send, Unpack, InitArray, InternalCall from slither.slithir.variables import TemporaryVariable, TupleVariable, Constant, ReferenceVariable -from slither.solc_parsing.declarations.function import FunctionSolc -from slither.solc_parsing.variables.local_variable import LocalVariableSolc -from slither.solc_parsing.variables.local_variable_init_from_tuple import LocalVariableInitFromTupleSolc -from slither.solc_parsing.variables.state_variable import StateVariableSolc from .cache import load_cache simil_logger = logging.getLogger("Slither-simil") @@ -152,7 +151,7 @@ def encode_ir(ir): return 'unpack' if isinstance(ir, InitArray): # TODO: improve return 'init_array' - if isinstance(ir, FunctionSolc): # TODO: investigate this + if isinstance(ir, Function): # TODO: investigate this return 'function_solc' # variables @@ -166,11 +165,11 @@ def encode_ir(ir): return 'temporary_variable' if isinstance(ir, ReferenceVariable): return 'reference({})'.format(ntype(ir._type)) - if isinstance(ir, LocalVariableSolc): + if isinstance(ir, LocalVariable): return 'local_solc_variable({})'.format(ir._location) - if isinstance(ir, StateVariableSolc): + if isinstance(ir, StateVariable): return 'state_solc_variable({})'.format(ntype(ir._type)) - if isinstance(ir, LocalVariableInitFromTupleSolc): + if isinstance(ir, LocalVariableInitFromTuple): return 'local_variable_init_tuple' if isinstance(ir, TupleVariable): return 'tuple_variable' From a9466230a801ed83b2ca2f22ee1fdc0400de8641 Mon Sep 17 00:00:00 2001 From: ggrieco-tob Date: Mon, 13 May 2019 11:20:44 -0300 Subject: [PATCH 28/28] final fixes --- utils/similarity/encode.py | 4 ++-- utils/similarity/test.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/utils/similarity/encode.py b/utils/similarity/encode.py index 0418a82e7..3ea47ca7c 100644 --- a/utils/similarity/encode.py +++ b/utils/similarity/encode.py @@ -32,7 +32,7 @@ def parse_target(target): else: simil_logger.error("Invalid target. It should be 'function' or 'Contract.function'") -def load_and_encode(infile, model, filter=None, nsamples=None, **kwargs): +def load_and_encode(infile, vmodel, ext=None, nsamples=None, **kwargs): r = dict() if infile.endswith(".npz"): r = load_cache(infile, nsamples=nsamples) @@ -42,7 +42,7 @@ def load_and_encode(infile, model, filter=None, nsamples=None, **kwargs): for x,ir in encode_contract(contract, **kwargs).items(): if ir != []: y = " ".join(ir) - r[x] = model.get_sentence_vector(y) + r[x] = vmodel.get_sentence_vector(y) return r diff --git a/utils/similarity/test.py b/utils/similarity/test.py index 08542dd0d..15a39cc13 100755 --- a/utils/similarity/test.py +++ b/utils/similarity/test.py @@ -33,7 +33,7 @@ def test(args): y = " ".join(irs[(filename,contract,fname)]) fvector = model.get_sentence_vector(y) - cache = load_and_encode(infile, **vars(args)) + cache = load_and_encode(infile, model, **vars(args)) #save_cache("cache.npz", cache) r = dict()