Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
A
artifacts-asiaccs20
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Andrea Gussoni
artifacts-asiaccs20
Commits
9c4af8fd
Commit
9c4af8fd
authored
4 years ago
by
Andrea Gussoni
Browse files
Options
Downloads
Patches
Plain Diff
Add utils to compare function boundaries matching
parent
a7cba5d1
No related branches found
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
scripting/boundaries-comparison-scripts/compare-all.sh
+24
-0
24 additions, 0 deletions
scripting/boundaries-comparison-scripts/compare-all.sh
scripting/boundaries-comparison-scripts/compare-functions.py
+200
-0
200 additions, 0 deletions
scripting/boundaries-comparison-scripts/compare-functions.py
with
224 additions
and
0 deletions
scripting/boundaries-comparison-scripts/compare-all.sh
0 → 100755
+
24
−
0
View file @
9c4af8fd
#!/bin/bash
if
[
$#
-eq
0
]
;
then
echo
"No arguments supplied"
exit
1
fi
basepath
=
$(
pwd
)
result_dir
=
$basepath
/result-
$1
if
[
!
-d
$result_dir
]
;
then
mkdir
$result_dir
;
fi
revng_dir
=
$basepath
/workdir-
$1
/revng-json
ida_dir
=
$basepath
/workdir-
$1
/ida-json-normalized
cd
$1
for
filename
in
*
do
echo
-e
"
\n
Analyzing binary:
$filename
"
$basepath
/scripting/boundaries-comparison-scripts/compare-functions.py
$ida_dir
/
$filename
.stripped.json.ida
$revng_dir
/
$filename
.ll.isolated.json.revng
$filename
$result_dir
/
$filename
$result_dir
/
$filename
.matching
done
;
This diff is collapsed.
Click to expand it.
scripting/boundaries-comparison-scripts/compare-functions.py
0 → 100755
+
200
−
0
View file @
9c4af8fd
#!/usr/bin/env python3
import
argparse
import
json
from
binascii
import
unhexlify
from
elftools.elf.elffile
import
ELFFile
from
rangeset
import
RangeSet
empty
=
(
RangeSet
(
0
,
0
)).
difference
(
RangeSet
(
-
1
,
1
))
strrange
=
lambda
cov
:
"
"
.
join
([
hex
(
x
[
0
])
+
"
-
"
+
hex
(
x
[
1
])
for
x
in
cov
if
x
[
0
]
!=
x
[
1
]])
def
get_change
(
current
,
previous
):
if
current
==
previous
:
return
0.0
try
:
return
(
abs
(
current
-
previous
)
/
previous
)
*
100.0
except
ZeroDivisionError
:
return
100.0
nops_encoding
=
{
"
EM_ARM
"
:
map
(
unhexlify
,
[
"
e320f000
"
]),
"
EM_X86_64
"
:
map
(
unhexlify
,
[
"
90
"
]),
"
EM_MIPS
"
:
map
(
unhexlify
,
[])
}
def
section_by_name
(
elf
,
name
):
matches
=
[
x
for
x
in
elf
.
iter_sections
()
if
x
.
name
==
name
]
assert
len
(
matches
)
==
1
return
matches
[
0
]
def
collect_symbols_coverage
(
symtab
,
ignore
):
symbols
=
[]
for
symbol
in
symtab
.
iter_symbols
():
if
symbol
.
entry
.
st_value
!=
0
:
range
=
RangeSet
(
symbol
.
entry
.
st_value
,
symbol
.
entry
.
st_value
+
symbol
.
entry
.
st_size
)
range
=
range
.
difference
(
ignore
)
symbols
.
append
((
str
(
symbol
.
name
),
range
))
return
symbols
def
strings_to_ranges
(
results
,
ignore
):
# For each function create the python coverage rage starting from strings
for
function
in
results
:
coverage
=
empty
for
x
in
function
[
"
coverage
"
]:
coverage
=
coverage
|
RangeSet
(
int
(
x
[
"
start
"
],
16
),
int
(
x
[
"
end
"
],
16
))
function
[
"
coverage
"
]
=
coverage
.
difference
(
ignore
)
def
collect_nops
(
elf
):
segments
=
[
x
for
x
in
elf
.
iter_segments
()
if
x
.
header
.
p_type
==
"
PT_LOAD
"
]
nops
=
empty
for
segment
in
segments
:
base_addr
=
segment
.
header
.
p_vaddr
content
=
segment
.
data
()
for
nop
in
nops_encoding
[
elf
.
header
.
e_machine
]:
last_match
=
0
match
=
content
.
find
(
nop
,
last_match
)
while
match
!=
-
1
:
last_match
=
match
+
1
nops
|=
RangeSet
(
base_addr
+
match
,
base_addr
+
match
+
len
(
nop
))
match
=
content
.
find
(
nop
,
last_match
)
return
nops
def
collect_constant_pools
(
elf
,
symtab
):
MAPPING_DATA
=
0
MAPPING_CODE
=
1
constant_pools
=
empty
blacklist
=
set
()
for
symbol
in
symtab
.
iter_symbols
():
if
symbol
.
entry
.
st_size
!=
0
and
symbol
.
entry
.
st_value
!=
0
and
symbol
.
entry
.
st_info
[
"
type
"
]
==
"
STT_FUNC
"
:
blacklist
.
add
(
symbol
.
entry
.
st_value
)
ranges
=
[]
for
symbol
in
symtab
.
iter_symbols
():
if
symbol
.
entry
.
st_size
!=
0
and
symbol
.
entry
.
st_value
!=
0
and
symbol
.
entry
.
st_info
[
"
type
"
]
==
"
STT_FUNC
"
:
ranges
.
append
((
MAPPING_CODE
,
symbol
.
entry
.
st_value
,
symbol
.
entry
.
st_shndx
))
if
symbol
.
entry
.
st_size
==
0
and
symbol
.
entry
.
st_info
.
bind
==
"
STB_LOCAL
"
and
symbol
.
entry
.
st_info
.
type
==
"
STT_NOTYPE
"
and
symbol
.
name
.
startswith
(
"
$
"
):
if
symbol
.
name
[
1
]
==
"
a
"
:
ranges
.
append
((
MAPPING_CODE
,
symbol
.
entry
.
st_value
,
symbol
.
entry
.
st_shndx
))
elif
symbol
.
name
[
1
]
==
"
d
"
:
if
not
(
symbol
.
entry
.
st_value
in
blacklist
):
ranges
.
append
((
MAPPING_DATA
,
symbol
.
entry
.
st_value
,
symbol
.
entry
.
st_shndx
))
else
:
raise
"
Unexpected symbol
"
ranges
=
sorted
(
ranges
,
key
=
lambda
x
:
x
[
1
])
for
i
in
range
(
elf
.
num_sections
()):
the_section
=
elf
.
get_section
(
i
)
section_ranges
=
map
(
lambda
x
:
(
x
[
0
],
x
[
1
]),
filter
(
lambda
x
:
x
[
2
]
==
i
,
ranges
))
section_ranges
=
list
(
section_ranges
)
to_skip
=
0
while
to_skip
<
len
(
section_ranges
)
and
section_ranges
[
to_skip
][
0
]
==
MAPPING_CODE
:
to_skip
+=
1
section_ranges
=
section_ranges
[
to_skip
:]
last_mapping_type
=
MAPPING_CODE
last_start
=
0
for
mapping_type
,
start
in
section_ranges
:
if
last_mapping_type
==
MAPPING_DATA
and
mapping_type
==
MAPPING_CODE
:
constant_pools
|=
RangeSet
(
last_start
,
start
)
if
mapping_type
!=
last_mapping_type
:
last_mapping_type
,
last_start
=
mapping_type
,
start
if
last_mapping_type
==
MAPPING_DATA
:
constant_pools
|=
RangeSet
(
last_start
,
the_section
.
header
.
sh_addr
+
the_section
.
header
.
sh_size
)
return
constant_pools
def
main
():
parser
=
argparse
.
ArgumentParser
(
description
=
'
My nice tool.
'
)
parser
.
add_argument
(
'
--only-start
'
,
action
=
'
store_true
'
,
help
=
"
Match only functions starting at the same exact address.
"
)
parser
.
add_argument
(
'
ida
'
,
metavar
=
'
IDAFILE
'
,
help
=
'
IDA created file
'
)
parser
.
add_argument
(
'
revng
'
,
metavar
=
'
REVNGFILE
'
,
help
=
'
rev.ng created file.
'
)
parser
.
add_argument
(
'
elf
'
,
metavar
=
'
ELF
'
,
help
=
'
rev.ng created file.
'
)
parser
.
add_argument
(
'
outputjson
'
,
metavar
=
'
OUTPUTFILEJSON
'
,
help
=
'
File where to write output info in JSON format.
'
)
parser
.
add_argument
(
'
outputmatching
'
,
metavar
=
'
OUTPUTFILEMATCH
'
,
help
=
'
File where to write the functions that match.
'
)
args
=
parser
.
parse_args
()
with
open
(
args
.
revng
,
"
r
"
)
as
revng_file
,
open
(
args
.
ida
,
"
r
"
)
as
ida_file
,
open
(
args
.
elf
,
"
rb
"
)
as
elf_file
,
open
(
args
.
outputjson
,
"
w
"
)
as
output_json
,
open
(
args
.
outputmatching
,
"
w
"
)
as
output_matching
:
# JSON output file
results
=
[]
# Load the elf file
elf
=
ELFFile
(
elf_file
)
symtab
=
section_by_name
(
elf
,
"
.symtab
"
)
nops
=
collect_nops
(
elf
)
constant_pools
=
collect_constant_pools
(
elf
,
symtab
)
to_ignore
=
nops
|
constant_pools
symbols
=
collect_symbols_coverage
(
symtab
,
to_ignore
)
# Load the files produced by IDA and rev.ng
revng
=
json
.
load
(
revng_file
)
ida
=
json
.
load
(
ida_file
)
# Convert the coverage description to python ranges
strings_to_ranges
(
revng
,
to_ignore
)
strings_to_ranges
(
ida
,
to_ignore
)
# Collect percentage of matching
total
=
0
matching
=
0
# Write in output for each function the coverage measurements
for
function_revng
in
revng
:
for
function_ida
in
ida
:
for
symbol
in
symbols
:
symbol_name
,
symbol_coverage
=
symbol
# Use function entry address
if
int
(
function_revng
[
'
entry_point_address
'
],
16
)
==
int
(
function_ida
[
'
entry_point_address
'
],
16
)
and
function_revng
[
'
entry_point
'
].
replace
(
'
bb.
'
,
''
)
==
symbol_name
:
coverage_revng
=
function_revng
[
"
coverage
"
]
coverage_ida
=
function_ida
[
"
coverage
"
]
coverage_symbol
=
symbol_coverage
total
+=
1
# Set that there is a match as soon as revng and ida matches, do not worry about symbol coverage
percentage_change
=
get_change
(
coverage_revng
.
measure
(),
coverage_ida
.
measure
())
match
=
percentage_change
<=
10.0
if
not
match
:
print
()
print
(
"
We found a non-matching function
"
)
print
(
function_ida
[
'
entry_point
'
]
+
"
/
"
+
function_revng
[
'
entry_point
'
])
print
(
"
Percentage discepancy was:
"
+
str
(
percentage_change
))
item
=
{
'
function_name
'
:
function_revng
[
'
entry_point
'
],
'
symbol_coverage
'
:
str
(
coverage_symbol
),
'
symbol_coverage_measure
'
:
coverage_symbol
.
measure
(),
'
revng_coverage
'
:
str
(
coverage_revng
),
'
revng_coverage_measure
'
:
coverage_revng
.
measure
(),
'
ida_coverage
'
:
str
(
coverage_ida
),
'
ida_coverage_measure
'
:
coverage_ida
.
measure
(),
'
match
'
:
str
(
match
),
'
revng-ida
'
:
strrange
(
function_revng
[
"
coverage
"
]
-
function_ida
[
"
coverage
"
]),
'
ida-revng
'
:
strrange
(
function_ida
[
"
coverage
"
]
-
function_revng
[
"
coverage
"
])
}
results
.
append
(
item
)
# Add to the .matching file only functions that match
if
match
:
output_matching
.
write
(
function_revng
[
'
entry_point
'
]
+
'
\n
'
)
matching
+=
1
# Write the json on the output file
json
.
dump
(
results
,
output_json
,
indent
=
2
)
if
total
==
0
:
print
(
"
No matching function
"
)
else
:
print
(
matching
/
total
)
if
__name__
==
"
__main__
"
:
main
()
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment