Skip to content
Snippets Groups Projects
Commit 774f0ec8 authored by Andrea Gussoni's avatar Andrea Gussoni
Browse files

Add scripts for computing duplication

parent f8c498dc
No related branches found
No related tags found
No related merge requests found
......@@ -2,7 +2,10 @@
basepath=$(pwd)
binaries_file=$basepath/binaries.txt
matching_script_dir=$basepath/scripting/boundaries-comparison-scripts
computation_script_dir=$basepath/scripting/computation
workdir=$basepath/workdir/$arch
matching_dir=$workdir/result-matching
function_idx_dir=$workdir/deduplicated-functions-idx-no-goto
revng_json_dir=$workdir/revng-json
ida_json_dir=$workdir/ida-json-normalized
revng_metrics_dir=$workdir/revng-metrics
#!/usr/bin/env python3
import argparse
import csv
import json
import numpy
import os
import pandas
import sys
from scipy.stats.mstats import gmean
def main():
parser = argparse.ArgumentParser(description='My nice tool.')
parser.add_argument('binaries', metavar='BINARIESFILE', help='File containing the names of the binaries')
parser.add_argument('arch', metavar='ARCH', help='Architecture')
parser.add_argument('function_idx_dir', metavar='FUNCTIONIDXDIR', help='Folder containing the function index')
parser.add_argument('revng_metrics_dir', metavar='REVNGMETRICSDIR', help='Folder containing the metrics files')
args = parser.parse_args()
arch = args.arch
binaries_file = args.binaries
function_idx_dir = args.function_idx_dir
revng_metrics_dir = args.revng_metrics_dir
values = []
high_duplication = []
with open(binaries_file, 'r') as binaries:
binaries_list = [line.rstrip('\n') for line in binaries]
# Compute under 5% percentage
total = 0
under_percentage = 0
for binary_name in binaries_list:
bynary_values = []
functions_csv_file = function_idx_dir + '/' + binary_name + '.csv'
with open(functions_csv_file, 'r') as function_csv:
csvreader = csv.reader(function_csv, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL)
for function in csvreader:
function_name = function[0]
function_name_orig = function_name.replace('bb.', '')
function_address = hex(int(function[1], 16))
revng_metrics_touple = []
revng_metrics_file = revng_metrics_dir + '/' + binary_name + '/duplication/' + function_name
if os.path.isfile(revng_metrics_file):
with open(revng_metrics_file, 'r') as revng_metrics:
csvreadermetrics = csv.reader(revng_metrics, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL)
# Very hacky way to skip first line and check that the metrics file is well formed.
linecounter = 0
for line in csvreadermetrics:
if linecounter == 0:
linecounter += 1
continue
elif linecounter == 1:
revng_metrics_touple = line[1:]
linecounter += 1
else:
sys.exit('bad number of lines in revng metrics file: ' + binary_name + ' ' + function_name)
duplication_factor = float(revng_metrics_touple[1])
values.append(duplication_factor)
if duplication_factor > 5:
high_duplication.append((function_name, duplication_factor))
# Compute under percentage
if (duplication_factor < 1.01):
under_percentage += 1
total += 1
print("Functions that present a very high duplication duplication:")
for touple in high_duplication:
print(touple)
print("Geometric mean of the size increase:")
print(gmean(values))
print("Percentage of the functions that have <1% size increase:")
print(under_percentage/total)
if __name__ == "__main__":
main()
#!/bin/bash
if [ $# -eq 0 ]; then
echo "No arguments supplied"
exit 1
fi
arch=$1
# Import the config
. ./config.sh
$computation_script_dir/compute-duplication-total.py $binaries_file $arch $function_idx_dir $revng_metrics_dir
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment