Commit 9c4af8fd authored by Andrea Gussoni's avatar Andrea Gussoni
Browse files

Add utils to compare function boundaries matching

parent a7cba5d1
#!/bin/bash
if [ $# -eq 0 ]; then
echo "No arguments supplied"
exit 1
fi
basepath=$(pwd)
result_dir=$basepath/result-$1
if [ ! -d $result_dir ]; then
mkdir $result_dir;
fi
revng_dir=$basepath/workdir-$1/revng-json
ida_dir=$basepath/workdir-$1/ida-json-normalized
cd $1
for filename in *
do
echo -e "\nAnalyzing binary: $filename"
$basepath/scripting/boundaries-comparison-scripts/compare-functions.py $ida_dir/$filename.stripped.json.ida $revng_dir/$filename.ll.isolated.json.revng $filename $result_dir/$filename $result_dir/$filename.matching
done;
#!/usr/bin/env python3
import argparse
import json
from binascii import unhexlify
from elftools.elf.elffile import ELFFile
from rangeset import RangeSet
empty = (RangeSet(0,0)).difference(RangeSet(-1,1))
strrange = lambda cov: " ".join([hex(x[0]) + "-" + hex(x[1]) for x in cov if x[0] != x[1]])
def get_change(current, previous):
if current == previous:
return 0.0
try:
return (abs(current - previous) / previous) * 100.0
except ZeroDivisionError:
return 100.0
nops_encoding = {
"EM_ARM": map(unhexlify, ["e320f000"]),
"EM_X86_64": map(unhexlify, ["90"]),
"EM_MIPS": map(unhexlify, [])
}
def section_by_name(elf, name):
matches = [x for x in elf.iter_sections() if x.name == name]
assert len(matches) == 1
return matches[0]
def collect_symbols_coverage(symtab, ignore):
symbols = []
for symbol in symtab.iter_symbols():
if symbol.entry.st_value != 0:
range = RangeSet(symbol.entry.st_value, symbol.entry.st_value + symbol.entry.st_size)
range = range.difference(ignore)
symbols.append((str(symbol.name), range))
return symbols
def strings_to_ranges(results, ignore):
# For each function create the python coverage rage starting from strings
for function in results:
coverage = empty
for x in function["coverage"]:
coverage = coverage | RangeSet(int(x["start"], 16), int(x["end"], 16))
function["coverage"] = coverage.difference(ignore)
def collect_nops(elf):
segments = [x for x in elf.iter_segments() if x.header.p_type == "PT_LOAD"]
nops = empty
for segment in segments:
base_addr = segment.header.p_vaddr
content = segment.data()
for nop in nops_encoding[elf.header.e_machine]:
last_match = 0
match = content.find(nop, last_match)
while match != -1:
last_match = match + 1
nops |= RangeSet(base_addr + match, base_addr + match + len(nop))
match = content.find(nop, last_match)
return nops
def collect_constant_pools(elf, symtab):
MAPPING_DATA = 0
MAPPING_CODE = 1
constant_pools = empty
blacklist = set()
for symbol in symtab.iter_symbols():
if symbol.entry.st_size != 0 and symbol.entry.st_value != 0 and symbol.entry.st_info["type"] == "STT_FUNC":
blacklist.add(symbol.entry.st_value)
ranges = []
for symbol in symtab.iter_symbols():
if symbol.entry.st_size != 0 and symbol.entry.st_value != 0 and symbol.entry.st_info["type"] == "STT_FUNC":
ranges.append((MAPPING_CODE, symbol.entry.st_value, symbol.entry.st_shndx))
if symbol.entry.st_size == 0 and symbol.entry.st_info.bind == "STB_LOCAL" and symbol.entry.st_info.type == "STT_NOTYPE" and symbol.name.startswith("$"):
if symbol.name[1] == "a":
ranges.append((MAPPING_CODE, symbol.entry.st_value, symbol.entry.st_shndx))
elif symbol.name[1] == "d":
if not (symbol.entry.st_value in blacklist):
ranges.append((MAPPING_DATA, symbol.entry.st_value, symbol.entry.st_shndx))
else:
raise "Unexpected symbol"
ranges = sorted(ranges, key=lambda x: x[1])
for i in range(elf.num_sections()):
the_section = elf.get_section(i)
section_ranges = map(lambda x: (x[0], x[1]), filter(lambda x: x[2] == i, ranges))
section_ranges = list(section_ranges)
to_skip = 0
while to_skip < len(section_ranges) and section_ranges[to_skip][0] == MAPPING_CODE:
to_skip += 1
section_ranges = section_ranges[to_skip:]
last_mapping_type = MAPPING_CODE
last_start = 0
for mapping_type, start in section_ranges:
if last_mapping_type == MAPPING_DATA and mapping_type == MAPPING_CODE:
constant_pools |= RangeSet(last_start, start)
if mapping_type != last_mapping_type:
last_mapping_type, last_start = mapping_type, start
if last_mapping_type == MAPPING_DATA:
constant_pools |= RangeSet(last_start, the_section.header.sh_addr + the_section.header.sh_size)
return constant_pools
def main():
parser = argparse.ArgumentParser(description='My nice tool.')
parser.add_argument('--only-start', action='store_true', help="Match only functions starting at the same exact address.")
parser.add_argument('ida', metavar='IDAFILE', help='IDA created file')
parser.add_argument('revng', metavar='REVNGFILE', help='rev.ng created file.')
parser.add_argument('elf', metavar='ELF', help='rev.ng created file.')
parser.add_argument('outputjson', metavar='OUTPUTFILEJSON', help='File where to write output info in JSON format.')
parser.add_argument('outputmatching', metavar='OUTPUTFILEMATCH', help='File where to write the functions that match.')
args = parser.parse_args()
with open(args.revng, "r") as revng_file, open(args.ida, "r") as ida_file, open(args.elf, "rb") as elf_file, open(args.outputjson, "w") as output_json, open(args.outputmatching, "w") as output_matching:
# JSON output file
results = []
# Load the elf file
elf = ELFFile(elf_file)
symtab = section_by_name(elf, ".symtab")
nops = collect_nops(elf)
constant_pools = collect_constant_pools(elf, symtab)
to_ignore = nops | constant_pools
symbols = collect_symbols_coverage(symtab, to_ignore)
# Load the files produced by IDA and rev.ng
revng = json.load(revng_file)
ida = json.load(ida_file)
# Convert the coverage description to python ranges
strings_to_ranges(revng, to_ignore)
strings_to_ranges(ida, to_ignore)
# Collect percentage of matching
total = 0
matching = 0
# Write in output for each function the coverage measurements
for function_revng in revng:
for function_ida in ida:
for symbol in symbols:
symbol_name, symbol_coverage = symbol
# Use function entry address
if int(function_revng['entry_point_address'], 16) == int(function_ida['entry_point_address'], 16) and function_revng['entry_point'].replace('bb.', '') == symbol_name:
coverage_revng = function_revng["coverage"]
coverage_ida = function_ida["coverage"]
coverage_symbol = symbol_coverage
total += 1
# Set that there is a match as soon as revng and ida matches, do not worry about symbol coverage
percentage_change = get_change(coverage_revng.measure(), coverage_ida.measure())
match = percentage_change <= 10.0
if not match:
print()
print("We found a non-matching function")
print(function_ida['entry_point'] + "/" + function_revng['entry_point'])
print("Percentage discepancy was: " + str(percentage_change))
item = {'function_name': function_revng['entry_point'],
'symbol_coverage': str(coverage_symbol),
'symbol_coverage_measure': coverage_symbol.measure(),
'revng_coverage': str(coverage_revng),
'revng_coverage_measure': coverage_revng.measure(),
'ida_coverage': str(coverage_ida),
'ida_coverage_measure': coverage_ida.measure(),
'match': str(match),
'revng-ida' : strrange(function_revng["coverage"] - function_ida["coverage"]),
'ida-revng' : strrange(function_ida["coverage"] - function_revng["coverage"])
}
results.append(item)
# Add to the .matching file only functions that match
if match:
output_matching.write(function_revng['entry_point'] + '\n')
matching += 1
# Write the json on the output file
json.dump(results, output_json, indent=2)
if total == 0:
print("No matching function")
else:
print(matching/total)
if __name__ == "__main__":
main()
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment