Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#!/usr/bin/env python3
import argparse
import csv
import json
import numpy
import os
import pandas
import sys
from scipy.stats.mstats import gmean
def main():
parser = argparse.ArgumentParser(description='My nice tool.')
parser.add_argument('binaries', metavar='BINARIESFILE', help='File containing the names of the binaries')
parser.add_argument('arch', metavar='ARCH', help='Architecture')
parser.add_argument('function_idx_dir', metavar='FUNCTIONIDXDIR', help='Folder containing the function index')
parser.add_argument('revng_metrics_dir', metavar='REVNGMETRICSDIR', help='Folder containing the metrics files')
args = parser.parse_args()
arch = args.arch
binaries_file = args.binaries
function_idx_dir = args.function_idx_dir
revng_metrics_dir = args.revng_metrics_dir
values = []
high_duplication = []
with open(binaries_file, 'r') as binaries:
binaries_list = [line.rstrip('\n') for line in binaries]
# Compute under 5% percentage
total = 0
under_percentage = 0
for binary_name in binaries_list:
bynary_values = []
functions_csv_file = function_idx_dir + '/' + binary_name + '.csv'
with open(functions_csv_file, 'r') as function_csv:
csvreader = csv.reader(function_csv, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL)
for function in csvreader:
function_name = function[0]
function_name_orig = function_name.replace('bb.', '')
function_address = hex(int(function[1], 16))
revng_metrics_touple = []
revng_metrics_file = revng_metrics_dir + '/' + binary_name + '/duplication/' + function_name
if os.path.isfile(revng_metrics_file):
with open(revng_metrics_file, 'r') as revng_metrics:
csvreadermetrics = csv.reader(revng_metrics, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL)
# Very hacky way to skip first line and check that the metrics file is well formed.
linecounter = 0
for line in csvreadermetrics:
if linecounter == 0:
linecounter += 1
continue
elif linecounter == 1:
revng_metrics_touple = line[1:]
linecounter += 1
else:
sys.exit('bad number of lines in revng metrics file: ' + binary_name + ' ' + function_name)
duplication_factor = float(revng_metrics_touple[1])
values.append(duplication_factor)
if duplication_factor > 5:
high_duplication.append((function_name, duplication_factor))
# Compute under percentage
if (duplication_factor < 1.01):
under_percentage += 1
total += 1
print("Functions that present a very high duplication duplication:")
for touple in high_duplication:
print(touple)
print("Geometric mean of the size increase:")
print(gmean(values))
print("Percentage of the functions that have <1% size increase:")
print(under_percentage/total)
if __name__ == "__main__":
main()