Commit 774f0ec8 authored by Andrea Gussoni's avatar Andrea Gussoni
Browse files

Add scripts for computing duplication

parent f8c498dc
......@@ -2,7 +2,10 @@
basepath=$(pwd)
binaries_file=$basepath/binaries.txt
matching_script_dir=$basepath/scripting/boundaries-comparison-scripts
computation_script_dir=$basepath/scripting/computation
workdir=$basepath/workdir/$arch
matching_dir=$workdir/result-matching
function_idx_dir=$workdir/deduplicated-functions-idx-no-goto
revng_json_dir=$workdir/revng-json
ida_json_dir=$workdir/ida-json-normalized
revng_metrics_dir=$workdir/revng-metrics
#!/usr/bin/env python3
import argparse
import csv
import json
import numpy
import os
import pandas
import sys
from scipy.stats.mstats import gmean
def main():
parser = argparse.ArgumentParser(description='My nice tool.')
parser.add_argument('binaries', metavar='BINARIESFILE', help='File containing the names of the binaries')
parser.add_argument('arch', metavar='ARCH', help='Architecture')
parser.add_argument('function_idx_dir', metavar='FUNCTIONIDXDIR', help='Folder containing the function index')
parser.add_argument('revng_metrics_dir', metavar='REVNGMETRICSDIR', help='Folder containing the metrics files')
args = parser.parse_args()
arch = args.arch
binaries_file = args.binaries
function_idx_dir = args.function_idx_dir
revng_metrics_dir = args.revng_metrics_dir
values = []
high_duplication = []
with open(binaries_file, 'r') as binaries:
binaries_list = [line.rstrip('\n') for line in binaries]
# Compute under 5% percentage
total = 0
under_percentage = 0
for binary_name in binaries_list:
bynary_values = []
functions_csv_file = function_idx_dir + '/' + binary_name + '.csv'
with open(functions_csv_file, 'r') as function_csv:
csvreader = csv.reader(function_csv, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL)
for function in csvreader:
function_name = function[0]
function_name_orig = function_name.replace('bb.', '')
function_address = hex(int(function[1], 16))
revng_metrics_touple = []
revng_metrics_file = revng_metrics_dir + '/' + binary_name + '/duplication/' + function_name
if os.path.isfile(revng_metrics_file):
with open(revng_metrics_file, 'r') as revng_metrics:
csvreadermetrics = csv.reader(revng_metrics, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL)
# Very hacky way to skip first line and check that the metrics file is well formed.
linecounter = 0
for line in csvreadermetrics:
if linecounter == 0:
linecounter += 1
continue
elif linecounter == 1:
revng_metrics_touple = line[1:]
linecounter += 1
else:
sys.exit('bad number of lines in revng metrics file: ' + binary_name + ' ' + function_name)
duplication_factor = float(revng_metrics_touple[1])
values.append(duplication_factor)
if duplication_factor > 5:
high_duplication.append((function_name, duplication_factor))
# Compute under percentage
if (duplication_factor < 1.01):
under_percentage += 1
total += 1
print("Functions that present a very high duplication duplication:")
for touple in high_duplication:
print(touple)
print("Geometric mean of the size increase:")
print(gmean(values))
print("Percentage of the functions that have <1% size increase:")
print(under_percentage/total)
if __name__ == "__main__":
main()
#!/bin/bash
if [ $# -eq 0 ]; then
echo "No arguments supplied"
exit 1
fi
arch=$1
# Import the config
. ./config.sh
$computation_script_dir/compute-duplication-total.py $binaries_file $arch $function_idx_dir $revng_metrics_dir
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment