diff --git a/utils/similarity/__main__.py b/utils/similarity/__main__.py index b241f15cd..50cc09a43 100755 --- a/utils/similarity/__main__.py +++ b/utils/similarity/__main__.py @@ -9,12 +9,13 @@ import operator from .info import info from .test import test from .train import train +from .plot import plot logging.basicConfig() logger = logging.getLogger("Slither-simil") slither_simil_usage = "USAGE" # TODO -modes = ["info", "test", "train"] +modes = ["info", "test", "train", "plot"] def parse_args(): parser = argparse.ArgumentParser(description='Code similarity detection tool', @@ -57,6 +58,13 @@ def parse_args(): dest='nsamples', help='Number of contract samples used for training') + parser.add_argument('--ntop', + action='store', + type=int, + dest='ntop', + default=10, + help='Number of more similar contracts to show for testing') + parser.add_argument('--input', action='store', dest='input', @@ -96,6 +104,8 @@ def main(): train(args) elif mode == "test": test(args) + elif mode == "plot": + plot(args) else: logger.error('Invalid mode!. It should be one of these: %s' % ", ".join(modes)) sys.exit(-1) diff --git a/utils/similarity/plot.py b/utils/similarity/plot.py new file mode 100644 index 000000000..84fcd69b9 --- /dev/null +++ b/utils/similarity/plot.py @@ -0,0 +1,62 @@ +import logging +import sys +import traceback +import operator +import numpy as np +import random + +from sklearn import decomposition +import matplotlib.pyplot as plt + +from fastText import load_model +from .cache import load_cache + +logger = logging.getLogger("crytic-pred") + +def plot(args): + + try: + model = args.model + model = load_model(model) + filename = args.filename + contract = args.contract + fname = args.fname + solc = args.solc + infile = args.input + ext = args.filter + + if contract is None or fname is None or infile is None: + logger.error('The plot mode requieres contract, fname and input parameters.') + sys.exit(-1) + + cache = load_cache(infile, model, ext=ext, solc=solc) + #save_cache("cache.npz", cache) + + data = list() + fs = list() + for (f,c,n),y in cache.items(): + if c == contract and n == fname: + fs.append(f) + data.append(y) + #r[x] = similarity(fvector, y) + + + data = np.array(data) + pca = decomposition.PCA(n_components=2) + tdata = pca.fit_transform(data) + plt.figure() + assert(len(tdata) == len(fs)) + for ([x,y],l) in zip(tdata, fs): + x = random.gauss(0, 0.01) + x + y = random.gauss(0, 0.01) + y + plt.scatter(x, y, c='blue') + plt.text(x-0.001,y+0.001, l.split("_")[1].replace(".sol.ast.compact.json","")) + + plt.show() + #r = sorted(r.items(), key=operator.itemgetter(1), reverse=True) + #for x,score in r[:10]: + + except Exception: + logger.error('Error in %s' % args.filename) + logger.error(traceback.format_exc()) + sys.exit(-1) diff --git a/utils/similarity/test.py b/utils/similarity/test.py index 4f28801e0..342551499 100755 --- a/utils/similarity/test.py +++ b/utils/similarity/test.py @@ -23,6 +23,7 @@ def test(args): solc = args.solc infile = args.input ext = args.filter + ntop = args.ntop if filename is None or contract is None or fname is None or infile is None: logger.error('The test mode requires filename, contract, fname and input parameters.') @@ -43,7 +44,7 @@ def test(args): r[x] = similarity(fvector, y) r = sorted(r.items(), key=operator.itemgetter(1), reverse=True) - for x,score in r[:10]: + for x,score in r[:ntop]: print(x,score) except Exception: