Newer
Older
/// \file functionboundariesdetection.cpp
/// \brief
//
// This file is distributed under the MIT License. See LICENSE.md for details.
//
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
// Standard includes
#include <cstdint>
#include <fstream>
#include <map>
#include <set>
#include <sstream>
#include <vector>
// Boost includes
#include <boost/icl/interval_set.hpp>
#include <boost/type_traits/is_same.hpp>
#include <boost/icl/right_open_interval.hpp>
// LLVM includes
#include "llvm/ADT/iterator_range.h"
#include "llvm/ADT/ilist.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
// Local includes
#include "debug.h"
#include "datastructures.h"
#include "functionboundariesdetection.h"
#include "ir-helpers.h"
#include "jumptargetmanager.h"
using namespace llvm;
using std::map;
using std::vector;
class FunctionBoundariesDetectionImpl;
using FBDP = FunctionBoundariesDetectionPass;
using FBD = FunctionBoundariesDetectionImpl;
using interval_set = boost::icl::interval_set<uint64_t>;
using interval = boost::icl::interval<uint64_t>;
char FBDP::ID = 0;
static RegisterPass<FBDP> X("fbdp",
"Function Boundaries Detection Pass",
true,
true);
class FunctionBoundariesDetectionImpl {
public:
FunctionBoundariesDetectionImpl(Function &F,
JumpTargetManager *JTM) : F(F), JTM(JTM) { }
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
map<BasicBlock *, vector<BasicBlock *>> run();
private:
enum RelationType {
UnknownRelation = 0,
Head = 1,
Fallthrough = 2,
Jump = 4,
Return = 8
};
enum CFEPReason {
UnknownReason = 0,
Callee = 1,
GlobalData = 2,
InCode = 4,
SkippingJump = 8
};
class CFEPRelation {
public:
CFEPRelation(BasicBlock *CFEP) : CFEP(CFEP), Distance(0), Type(0) { }
void setType(RelationType T) { Type |= T; }
bool hasType(RelationType T) const { return Type & T; }
void setDistance(uint32_t New) { Distance = std::max(Distance, New); }
bool isSkippingJump() const { return Distance > 0 && hasType(Jump); }
bool isNonSkippingJump() const { return Distance == 0 && hasType(Jump); }
BasicBlock *cfep() const { return CFEP; }
std::string describe() const;
private:
BasicBlock *CFEP;
uint32_t Distance;
uint32_t Type;
};
class CFEP {
public:
CFEP() : Reasons(0) { }
void setReason(CFEPReason Reason) { Reasons |= Reason; }
bool hasReason(CFEPReason Reason) { return Reasons & Reason; }
private:
uint32_t Reasons;
};
private:
void initPostDispatcherIt();
void collectFunctionCalls();
void collectReturnInstructions();
void registerBasicBlockAddressRanges();
interval_set findCoverage(BasicBlock *BB);
// CFEP related methods
void collectInitialCFEPSet();
void cfepProcessPhase1();
void cfepProcessPhase2();
/// Associate to each basic block a metadata with the list of functions it
/// belongs to
void createMetadata();
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
void serialize();
void setRelation(BasicBlock *CFEP, BasicBlock *Affected, RelationType T) {
assert(CFEP != nullptr);
CFEPRelation &Relation = getRelation(CFEP, Affected);
Relation.setType(T);
}
void setDistance(BasicBlock *CFEP, BasicBlock *Affected, uint64_t Distance) {
assert(CFEP != nullptr);
const uint64_t Max = std::numeric_limits<uint32_t>::max();
Distance = std::min(Distance, Max);
CFEPRelation &Relation = getRelation(CFEP, Affected);
Relation.setDistance(Distance);
}
bool isCFEP(BasicBlock *BB) const { return CFEPs.count(BB); }
void registerCFEP(BasicBlock *BB, CFEPReason Reason) {
assert(BB != nullptr);
CFEPs[BB].setReason(Reason);
setRelation(BB, BB, Head);
}
void filterCFEPs();
CFEPRelation &getRelation(BasicBlock *CFEP, BasicBlock *Affected) {
SmallVector<CFEPRelation, 2> &BBRelations = Relations[Affected];
auto It = std::find_if(BBRelations.begin(),
BBRelations.end(),
[CFEP] (CFEPRelation &R) {
return R.cfep() == CFEP;
});
if (It != BBRelations.end()) {
return *It;
} else {
BBRelations.emplace_back(CFEP);
return BBRelations.back();
}
}
std::vector<BasicBlock *> cfeps() const {
std::vector<BasicBlock *> Result;
Result.reserve(CFEPs.size());
for (auto &P : CFEPs)
Result.push_back(P.first);
return Result;
}
private:
Function &F;
JumpTargetManager *JTM;
std::map<TerminatorInst *, BasicBlock *> FunctionCalls;
std::map<BasicBlock *, std::vector<BasicBlock *>> CallPredecessors;
std::set<uint64_t> ReturnPCs;
std::set<TerminatorInst *> Returns;
ilist_iterator<BasicBlock> PostDispatcherIt;
std::map<BasicBlock *, interval_set> Coverage;
// CFEP related data
std::map<BasicBlock *, CFEP> CFEPs;
std::map<BasicBlock *, SmallVector<CFEPRelation, 2>> Relations;
OnceQueue<BasicBlock *> CFEPWorkList;
interval_set Callees;
std::map<BasicBlock *, std::vector<BasicBlock *>> Functions;
};
void FBD::initPostDispatcherIt() {
// Skip dispatcher and friends
auto It = F.begin();
for (; It != F.end(); It++) {
if (!It->empty()) {
if (auto *Call = dyn_cast<CallInst>(&*It->begin())) {
Function *Callee = Call->getCalledFunction();
if (Callee != nullptr && Callee->getName() == "newpc")
break;
}
}
}
PostDispatcherIt = It;
}
static inline BasicBlock *getBlock(Value *V) {
return cast<BlockAddress>(V)->getBasicBlock();
}
void FBD::collectFunctionCalls() {
Module *M = F.getParent();
Function *FC = M->getFunction("function_call");
for (User *U : FC->users()) {
if (auto *Call = dyn_cast<CallInst>(U)) {
BasicBlock *ReturnBB = getBlock(Call->getOperand(1));
uint32_t ReturnPC = getLimitedValue(Call->getOperand(2));
auto *Terminator = cast<TerminatorInst>(nextNonMarker(Call));
assert(Terminator != nullptr);
FunctionCalls[Terminator] = ReturnBB;
CallPredecessors[ReturnBB].push_back(Call->getParent());
ReturnPCs.insert(ReturnPC);
}
}
// Mark all the callee basic blocks as such
for (auto P : FunctionCalls)
for (BasicBlock *S : P.first->successors())
if (JTM->isTranslatedBB(S))
JTM->registerJT(S, JumpTargetManager::Callee);
}
void FBD::collectReturnInstructions() {
// Detect return instructions
// TODO: there is a remote possibility that we're mishandling some case here,
// in the future we should perform a stack analysis to prove that a
// register has not been touched since the function entry.
for (BasicBlock &BB : make_range(PostDispatcherIt, F.end())) {
auto *Terminator = BB.getTerminator();
if (FunctionCalls.count(Terminator) != 0)
continue;
bool JumpsToDispatcher = false;
bool IsReturn = true;
for (BasicBlock *Successor : Terminator->successors()) {
assert(!Successor->empty());
// A return instruction must jump to JTM->anyPC, while all the other
// successors (if any) must be registered returns addresses
if (Successor == JTM->anyPC()) {
JumpsToDispatcher = true;
} else if (ReturnPCs.count(JTM->getPC(&*Successor->begin()).first) == 0) {
IsReturn = false;
break;
}
}
IsReturn &= JumpsToDispatcher;
if (IsReturn) {
// TODO: assert that the destnation is the content of a register, or a
// load from a memory location at a constant offset from the content
// of a register
Returns.insert(Terminator);
}
}
}
/// \brief Register for each basic block the range of addresses it covers
void FBD::registerBasicBlockAddressRanges() {
// Register the range of addresses covered by each basic block
for (User *U : F.getParent()->getFunction("newpc")->users()) {
auto *Call = dyn_cast<CallInst>(U);
if (Call == nullptr)
continue;
BasicBlock *BB = Call->getParent();
uint64_t Address = getLimitedValue(Call->getOperand(0));
uint64_t Size = getLimitedValue(Call->getOperand(1));
assert(Address > 0 && Size > 0);
Coverage[BB] += interval::right_open(Address, Address + Size);
}
}
interval_set FBD::findCoverage(BasicBlock *BB) {
auto It = Coverage.find(BB);
if (It != Coverage.end()) {
OnceQueue<BasicBlock *> WorkList;
WorkList.insert(BB);
while (!WorkList.empty()) {
BB = WorkList.pop();
It = Coverage.find(BB);
if (It != Coverage.end()) {
for (BasicBlock *Predecessor : predecessors(BB))
if (JTM->isTranslatedBB(Predecessor))
WorkList.insert(Predecessor);
}
llvm_unreachable("Couldn't find basic block");
}
void FBD::collectInitialCFEPSet() {
// TODO: handle entry points
// registerCFEP(JTM->getBlockAt(EntryPoint), Callee);
// Collect initial set of CFEPs
for (auto &P : *JTM) {
if (contains(JTM->readRange(), P.first))
continue;
const JumpTargetManager::JumpTarget &JT = P.second;
BasicBlock *CFEPHead = JT.head();
bool Insert = false;
DBG("functions", dbg << JT.describe() << "\n");
if (JT.hasReason(JumpTargetManager::Callee)) {
registerCFEP(CFEPHead, Callee);
assert(Coverage.find(CFEPHead) != Coverage.end());
Callees += Coverage[CFEPHead];
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
Insert = true;
}
if (JT.hasReason(JumpTargetManager::UnusedGlobalData)) {
registerCFEP(CFEPHead, GlobalData);
Insert = true;
}
if (JT.hasReason(JumpTargetManager::SETNotToPC)
&& !JT.hasReason(JumpTargetManager::SETToPC)) {
registerCFEP(CFEPHead, InCode);
Insert = true;
}
if (Insert)
CFEPWorkList.insert(CFEPHead);
}
}
void FBD::cfepProcessPhase1() {
// For each CFEP record which basic block it can reach and how. Then also
// detect skipping jumps.
while (!CFEPWorkList.empty()) {
BasicBlock *CFEP = CFEPWorkList.pop();
// Find all the basic block it can reach
OnceQueue<BasicBlock *> WorkList;
WorkList.insert(CFEP);
while (!WorkList.empty()) {
BasicBlock *RelatedBB = WorkList.pop();
auto FCIt = FunctionCalls.find(RelatedBB->getTerminator());
if (FCIt != FunctionCalls.end()) {
// This basic block ends with a function call, proceed with the return
// address, unless it's a call to a noreturn function.
if (JTM->noReturn().isNoreturnBasicBlock(RelatedBB)) {
DBG("nra", dbg << "Stopping at " << getName(RelatedBB) << " since it's a noreturn call\n");
} else {
BasicBlock *ReturnBB = FCIt->second;
setRelation(CFEP, ReturnBB, Return);
WorkList.insert(ReturnBB);
}
} else if (Returns.count(RelatedBB->getTerminator()) == 0) {
// It's not a return, it's not a function call, it must be a branch part
// of the ordinary control flow of the function.
for (BasicBlock *S : successors(RelatedBB)) {
if (!JTM->isTranslatedBB(S))
continue;
// TODO: track fallthrough
setRelation(CFEP, S, Jump);
WorkList.insert(S);
}
}
}
// Compute distance of jumps
// For each basic block look at his Jump successors
for (BasicBlock *BB : WorkList.visited()) {
TerminatorInst *T = BB->getTerminator();
if (FunctionCalls.count(T) != 0 || Returns.count(T) != 0)
continue;
interval_set StartAddressRange = findCoverage(BB);
uint64_t StartAddress = StartAddressRange.begin()->lower();
assert(StartAddress != 0);
for (BasicBlock *S : successors(BB)) {
if (!JTM->isTranslatedBB(S) || Coverage.count(S) == 0)
assert(Coverage.find(S) != Coverage.end());
interval_set &DestinationAddressRange = Coverage[S];
if (DestinationAddressRange.size() == 0)
uint64_t DestinationAddress = DestinationAddressRange.begin()->lower();
interval_set JumpInterval;
if (StartAddress <= DestinationAddress)
JumpInterval += interval::closed(StartAddress, DestinationAddress);
else
JumpInterval += interval::closed(DestinationAddress, StartAddress);
JumpInterval -= StartAddressRange;
JumpInterval -= DestinationAddressRange;
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
JumpInterval &= Callees;
uint64_t Distance = JumpInterval.size();
if (Distance > 0) {
setDistance(CFEP, S, Distance);
registerCFEP(S, SkippingJump);
CFEPWorkList.insert(S);
}
}
}
}
}
void FBD::filterCFEPs() {
std::map<BasicBlock *, CFEP>::iterator It = CFEPs.begin();
while (It != CFEPs.end()) {
BasicBlock *CFEPHead = It->first;
assert(CFEPHead != nullptr);
CFEP &C = It->second;
assert(!C.hasReason(UnknownReason));
// Keep a CFEP only if its address is taken, it's a callee or all the
// paths leading there are skipping jumps
bool Keep = C.hasReason(Callee);
bool AddressTaken = C.hasReason(GlobalData) || C.hasReason(InCode);
if (!Keep && AddressTaken) {
Keep = true;
// Check no relation of Jump type and 0-distance exist
for (CFEPRelation &Relation : Relations[CFEPHead])
Keep = Keep
&& !Relation.isNonSkippingJump()
&& !Relation.hasType(Return);
}
if (!Keep && !AddressTaken) {
auto &CFEPRelations = Relations[CFEPHead];
Keep = Relations.size() > 1;
if (Keep)
for (CFEPRelation &Relation : CFEPRelations)
Keep = Keep && (Relation.hasType(Head)
|| Relation.isSkippingJump());
}
if (Keep) {
DBG("functions", {
dbg << std::hex << "0x" << getBasicBlockPC(CFEPHead)
<< " is a FEP: "
<< " Callee? " << C.hasReason(Callee)
<< " GlobalData? " << C.hasReason(GlobalData)
<< " InCode? " << C.hasReason(InCode)
<< " SkippingJump? " << C.hasReason(SkippingJump)
<< "\n";
});
It++;
} else {
DBG("functions", {
dbg << std::hex << "0x" << getBasicBlockPC(CFEPHead)
<< " is a not a FEP:";
for (CFEPRelation &Relation : Relations[CFEPHead])
dbg << " {" << Relation.describe() << "}";
dbg << "\n";
});
It = CFEPs.erase(It);
}
}
Relations.clear();
}
void FBD::cfepProcessPhase2() {
// Find all the basic block it can reach
for (BasicBlock *CFEP : cfeps()) {
OnceQueue<BasicBlock *> WorkList;
WorkList.insert(CFEP);
while (!WorkList.empty()) {
BasicBlock *RelatedBB = WorkList.pop();
assert(JTM->isTranslatedBB(RelatedBB));
auto FCIt = FunctionCalls.find(RelatedBB->getTerminator());
if (FCIt != FunctionCalls.end()) {
BasicBlock *ReturnBB = FCIt->second;
if (!isCFEP(ReturnBB))
WorkList.insert(ReturnBB);
} else if (Returns.count(RelatedBB->getTerminator()) == 0) {
for (BasicBlock *S : successors(RelatedBB)) {
if (!JTM->isTranslatedBB(S))
continue;
// TODO: doesn't handle the div in div case
if (!isCFEP(S))
WorkList.insert(S);
}
}
}
for (BasicBlock *Member : WorkList.visited())
Functions[CFEP].push_back(Member);
}
}
void FBD::createMetadata() {
LLVMContext &Context = getContext(&F);
// We first compute the list of all the functions each basic block belongs to,
// so we don't have to create a huge number of metadata which are never
// deleted (?)
std::map<BasicBlock *, std::vector<Metadata *>> ReversedFunctions;
for (auto &P : Functions) {
BasicBlock *Header = P.first;
auto *Name = MDString::get(Context, getName(Header));
MDTuple *FunctionMD = MDNode::get(Context, { Name });
Instruction *Terminator = Header->getTerminator();
assert(Terminator != nullptr);
Terminator->setMetadata("func.entry", FunctionMD);
for (BasicBlock *Member : P.second)
ReversedFunctions[Member].push_back(FunctionMD);
}
// Associate the terminator of each basic block with the previously created
// metadata node
for (auto &P : ReversedFunctions) {
BasicBlock *BB = P.first;
if (!BB->empty() ) {
Instruction *Terminator = BB->getTerminator();
assert(Terminator != nullptr);
auto *FuncMDs = MDTuple::get(Context, ArrayRef<Metadata *>(P.second));
Terminator->setMetadata("func.member.of", FuncMDs);
// Mark each return instruction
for (TerminatorInst *T : Returns)
T->setMetadata("func.return", MDNode::get(Context, { }));
map<BasicBlock *, vector<BasicBlock *>> FBD::run() {
assert(JTM != nullptr);
initPostDispatcherIt();
collectFunctionCalls();
collectReturnInstructions();
// TODO: move this code in JTM
JTM->setCFGForm(JumpTargetManager::NoFunctionCallsCFG);
JTM->noReturn().computeKillerSet(CallPredecessors, Returns);
JTM->setCFGForm(JumpTargetManager::SemanticPreservingCFG);
registerBasicBlockAddressRanges();
collectInitialCFEPSet();
cfepProcessPhase1();
filterCFEPs();
cfepProcessPhase2();
createMetadata();
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
return std::move(Functions);
}
std::string FBD::CFEPRelation::describe() const {
std::stringstream SS;
SS << getName(CFEP)
<< " Distance: " << Distance;
if (hasType(UnknownRelation))
SS << " UnknownRelation";
if (hasType(Head))
SS << " Head";
if (hasType(Fallthrough))
SS << " Fallthrough";
if (hasType(Jump))
SS << " Jump";
if (hasType(Return))
SS << " Return";
return SS.str();
}
bool FBDP::runOnFunction(Function &F) {
FBD Impl(F, JTM);
Functions = Impl.run();
serialize();
return false;
}
void FBDP::serialize() const {
if (SerializePath.size() == 0)
return;
// Emit results
std::ofstream Output(SerializePath);
Output << "index,start,end\n";
for (auto &P : Functions) {
for (BasicBlock *BB : P.second) {
for (Instruction &I : *BB) {
if (auto *Call = dyn_cast<CallInst>(&I)) {
Function *Callee = Call->getCalledFunction();
if (Callee != nullptr && Callee->getName() == "newpc") {
uint64_t StartPC = getLimitedValue(Call->getArgOperandUse(0));
uint64_t Size = getLimitedValue(Call->getArgOperandUse(1));
uint64_t EndPC = StartPC + Size;
Output << std::dec << getName(P.first) << ","
<< "0x" << std::hex << StartPC << ","
<< "0x" << std::hex << EndPC << "\n";
}
}
}
}
}
}