[sanitizers] Disable target-specific lowering of string functions.

CodeGen has hooks that allow targets to emit specialized code instead
of calls to memcmp, memchr, strcpy, stpcpy, strcmp, strlen, strnlen.
When ASan/MSan/TSan/ESan is in use, this sidesteps its interceptors, resulting
in uninstrumented memory accesses.  To avoid that, make these sanitizers
mark the calls as nobuiltin.

Differential Revision: http://reviews.llvm.org/D19781

llvm-svn: 273083
This commit is contained in:
Marcin Koscielnicki 2016-06-18 10:10:37 +00:00
parent 8457adc4c1
commit 8b5d8f33b1
10 changed files with 224 additions and 8 deletions

View File

@ -30,6 +30,7 @@ class BasicBlock;
class Function;
class BranchInst;
class Instruction;
class CallInst;
class DbgDeclareInst;
class StoreInst;
class LoadInst;
@ -354,6 +355,17 @@ bool recognizeBSwapOrBitReverseIdiom(
Instruction *I, bool MatchBSwaps, bool MatchBitReversals,
SmallVectorImpl<Instruction *> &InsertedInsts);
//===----------------------------------------------------------------------===//
// Sanitizer utilities
//
/// Given a CallInst, check if it calls a string function known to CodeGen,
/// and mark it with NoBuiltin if so. To be used by sanitizers that intend
/// to intercept string functions and want to avoid converting them to target
/// specific instructions.
void maybeMarkSanitizerLibraryCallNoBuiltin(CallInst *CI,
const TargetLibraryInfo *TLI);
} // End llvm namespace
#endif

View File

@ -1765,6 +1765,8 @@ bool AddressSanitizer::runOnFunction(Function &F) {
bool IsWrite;
unsigned Alignment;
uint64_t TypeSize;
const TargetLibraryInfo *TLI =
&getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
// Fill the set of memory operations to instrument.
for (auto &BB : F) {
@ -1793,6 +1795,8 @@ bool AddressSanitizer::runOnFunction(Function &F) {
TempsToInstrument.clear();
if (CS.doesNotReturn()) NoReturnCalls.push_back(CS.getInstruction());
}
if (CallInst *CI = dyn_cast<CallInst>(&Inst))
maybeMarkSanitizerLibraryCallNoBuiltin(CI, TLI);
continue;
}
ToInstrument.push_back(&Inst);
@ -1805,8 +1809,6 @@ bool AddressSanitizer::runOnFunction(Function &F) {
CompileKernel ||
(ClInstrumentationWithCallsThreshold >= 0 &&
ToInstrument.size() > (unsigned)ClInstrumentationWithCallsThreshold);
const TargetLibraryInfo *TLI =
&getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
const DataLayout &DL = F.getParent()->getDataLayout();
ObjectSizeOffsetVisitor ObjSizeVis(DL, TLI, F.getContext(),
/*RoundToAlign=*/true);

View File

@ -23,6 +23,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicInst.h"
@ -32,6 +33,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
using namespace llvm;
@ -149,6 +151,7 @@ public:
const EfficiencySanitizerOptions &Opts = EfficiencySanitizerOptions())
: ModulePass(ID), Options(OverrideOptionsFromCL(Opts)) {}
const char *getPassName() const override;
void getAnalysisUsage(AnalysisUsage &AU) const override;
bool runOnModule(Module &M) override;
static char ID;
@ -199,13 +202,22 @@ private:
} // namespace
char EfficiencySanitizer::ID = 0;
INITIALIZE_PASS(EfficiencySanitizer, "esan",
"EfficiencySanitizer: finds performance issues.", false, false)
INITIALIZE_PASS_BEGIN(
EfficiencySanitizer, "esan",
"EfficiencySanitizer: finds performance issues.", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(
EfficiencySanitizer, "esan",
"EfficiencySanitizer: finds performance issues.", false, false)
const char *EfficiencySanitizer::getPassName() const {
return "EfficiencySanitizer";
}
void EfficiencySanitizer::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<TargetLibraryInfoWrapperPass>();
}
ModulePass *
llvm::createEfficiencySanitizerPass(const EfficiencySanitizerOptions &Options) {
return new EfficiencySanitizer(Options);
@ -544,6 +556,8 @@ bool EfficiencySanitizer::runOnFunction(Function &F, Module &M) {
SmallVector<Instruction *, 8> GetElementPtrs;
bool Res = false;
const DataLayout &DL = M.getDataLayout();
const TargetLibraryInfo *TLI =
&getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
for (auto &BB : F) {
for (auto &Inst : BB) {
@ -555,6 +569,8 @@ bool EfficiencySanitizer::runOnFunction(Function &F, Module &M) {
MemIntrinCalls.push_back(&Inst);
else if (isa<GetElementPtrInst>(Inst))
GetElementPtrs.push_back(&Inst);
else if (CallInst *CI = dyn_cast<CallInst>(&Inst))
maybeMarkSanitizerLibraryCallNoBuiltin(CI, TLI);
}
}

View File

@ -317,6 +317,9 @@ class MemorySanitizer : public FunctionPass {
TrackOrigins(std::max(TrackOrigins, (int)ClTrackOrigins)),
WarningFn(nullptr) {}
const char *getPassName() const override { return "MemorySanitizer"; }
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<TargetLibraryInfoWrapperPass>();
}
bool runOnFunction(Function &F) override;
bool doInitialization(Module &M) override;
static char ID; // Pass identification, replacement for typeid.
@ -384,9 +387,13 @@ class MemorySanitizer : public FunctionPass {
} // anonymous namespace
char MemorySanitizer::ID = 0;
INITIALIZE_PASS(MemorySanitizer, "msan",
"MemorySanitizer: detects uninitialized reads.",
false, false)
INITIALIZE_PASS_BEGIN(
MemorySanitizer, "msan",
"MemorySanitizer: detects uninitialized reads.", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(
MemorySanitizer, "msan",
"MemorySanitizer: detects uninitialized reads.", false, false)
FunctionPass *llvm::createMemorySanitizerPass(int TrackOrigins) {
return new MemorySanitizer(TrackOrigins);
@ -618,6 +625,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
SmallVector<PHINode *, 16> ShadowPHINodes, OriginPHINodes;
ValueMap<Value*, Value*> ShadowMap, OriginMap;
std::unique_ptr<VarArgHelper> VAHelper;
const TargetLibraryInfo *TLI;
// The following flags disable parts of MSan instrumentation based on
// blacklist contents and command-line options.
@ -647,6 +655,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// FIXME: Consider using SpecialCaseList to specify a list of functions that
// must always return fully initialized values. For now, we hardcode "main".
CheckReturnValue = SanitizeFunction && (F.getName() == "main");
TLI = &MS.getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
DEBUG(if (!InsertChecks)
dbgs() << "MemorySanitizer is not inserting checks into '"
@ -2529,6 +2538,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
AttributeSet::FunctionIndex,
B));
}
maybeMarkSanitizerLibraryCallNoBuiltin(Call, TLI);
}
IRBuilder<> IRB(&I);

View File

@ -26,6 +26,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
@ -42,6 +43,7 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
using namespace llvm;
@ -82,6 +84,7 @@ namespace {
struct ThreadSanitizer : public FunctionPass {
ThreadSanitizer() : FunctionPass(ID) {}
const char *getPassName() const override;
void getAnalysisUsage(AnalysisUsage &AU) const override;
bool runOnFunction(Function &F) override;
bool doInitialization(Module &M) override;
static char ID; // Pass identification, replacement for typeid.
@ -122,7 +125,13 @@ struct ThreadSanitizer : public FunctionPass {
} // namespace
char ThreadSanitizer::ID = 0;
INITIALIZE_PASS(ThreadSanitizer, "tsan",
INITIALIZE_PASS_BEGIN(
ThreadSanitizer, "tsan",
"ThreadSanitizer: detects data races.",
false, false)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(
ThreadSanitizer, "tsan",
"ThreadSanitizer: detects data races.",
false, false)
@ -130,6 +139,10 @@ const char *ThreadSanitizer::getPassName() const {
return "ThreadSanitizer";
}
void ThreadSanitizer::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<TargetLibraryInfoWrapperPass>();
}
FunctionPass *llvm::createThreadSanitizerPass() {
return new ThreadSanitizer();
}
@ -368,6 +381,8 @@ bool ThreadSanitizer::runOnFunction(Function &F) {
bool HasCalls = false;
bool SanitizeFunction = F.hasFnAttribute(Attribute::SanitizeThread);
const DataLayout &DL = F.getParent()->getDataLayout();
const TargetLibraryInfo *TLI =
&getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
// Traverse all instructions, collect loads/stores/returns, check for calls.
for (auto &BB : F) {
@ -379,6 +394,8 @@ bool ThreadSanitizer::runOnFunction(Function &F) {
else if (isa<ReturnInst>(Inst))
RetVec.push_back(&Inst);
else if (isa<CallInst>(Inst) || isa<InvokeInst>(Inst)) {
if (CallInst *CI = dyn_cast<CallInst>(&Inst))
maybeMarkSanitizerLibraryCallNoBuiltin(CI, TLI);
if (isa<MemIntrinsic>(Inst))
MemIntrinCalls.push_back(&Inst);
HasCalls = true;

View File

@ -1942,3 +1942,29 @@ bool llvm::recognizeBSwapOrBitReverseIdiom(
InsertedInsts.push_back(CallInst::Create(F, Res->Provider, "rev", I));
return true;
}
// CodeGen has special handling for some string functions that may replace
// them with target-specific intrinsics. Since that'd skip our interceptors
// in ASan/MSan/TSan/DFSan, and thus make us miss some memory accesses,
// we mark affected calls as NoBuiltin, which will disable optimization
// in CodeGen.
void llvm::maybeMarkSanitizerLibraryCallNoBuiltin(CallInst *CI,
const TargetLibraryInfo *TLI) {
Function *F = CI->getCalledFunction();
LibFunc::Func Func;
if (!F || F->hasLocalLinkage() || !F->hasName() ||
!TLI->getLibFunc(F->getName(), Func))
return;
switch (Func) {
default: break;
case LibFunc::memcmp:
case LibFunc::memchr:
case LibFunc::strcpy:
case LibFunc::stpcpy:
case LibFunc::strcmp:
case LibFunc::strlen:
case LibFunc::strnlen:
CI->addAttribute(AttributeSet::FunctionIndex, Attribute::NoBuiltin);
break;
}
}

View File

@ -0,0 +1,33 @@
; Test marking string functions as nobuiltin in address sanitizer.
;
; RUN: opt < %s -asan -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-unknown-linux-gnu"
declare i8* @memchr(i8* %a, i32 %b, i64 %c)
declare i32 @memcmp(i8* %a, i8* %b, i64 %c)
declare i32 @strcmp(i8* %a, i8* %b)
declare i8* @strcpy(i8* %a, i8* %b)
declare i8* @stpcpy(i8* %a, i8* %b)
declare i64 @strlen(i8* %a)
declare i64 @strnlen(i8* %a, i64 %b)
; CHECK: call{{.*}}@memchr{{.*}} #[[ATTR:[0-9]+]]
; CHECK: call{{.*}}@memcmp{{.*}} #[[ATTR]]
; CHECK: call{{.*}}@strcmp{{.*}} #[[ATTR]]
; CHECK: call{{.*}}@strcpy{{.*}} #[[ATTR]]
; CHECK: call{{.*}}@stpcpy{{.*}} #[[ATTR]]
; CHECK: call{{.*}}@strlen{{.*}} #[[ATTR]]
; CHECK: call{{.*}}@strnlen{{.*}} #[[ATTR]]
; attributes #[[ATTR]] = { nobuiltin }
define void @f1(i8* %a, i8* %b) nounwind uwtable sanitize_address {
tail call i8* @memchr(i8* %a, i32 1, i64 12)
tail call i32 @memcmp(i8* %a, i8* %b, i64 12)
tail call i32 @strcmp(i8* %a, i8* %b)
tail call i8* @strcpy(i8* %a, i8* %b)
tail call i8* @stpcpy(i8* %a, i8* %b)
tail call i64 @strlen(i8* %a)
tail call i64 @strnlen(i8* %a, i64 12)
ret void
}

View File

@ -0,0 +1,33 @@
; Test marking string functions as nobuiltin in efficiency sanitizer.
;
; RUN: opt < %s -esan -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-unknown-linux-gnu"
declare i8* @memchr(i8* %a, i32 %b, i64 %c)
declare i32 @memcmp(i8* %a, i8* %b, i64 %c)
declare i32 @strcmp(i8* %a, i8* %b)
declare i8* @strcpy(i8* %a, i8* %b)
declare i8* @stpcpy(i8* %a, i8* %b)
declare i64 @strlen(i8* %a)
declare i64 @strnlen(i8* %a, i64 %b)
; CHECK: call{{.*}}@memchr{{.*}} #[[ATTR:[0-9]+]]
; CHECK: call{{.*}}@memcmp{{.*}} #[[ATTR]]
; CHECK: call{{.*}}@strcmp{{.*}} #[[ATTR]]
; CHECK: call{{.*}}@strcpy{{.*}} #[[ATTR]]
; CHECK: call{{.*}}@stpcpy{{.*}} #[[ATTR]]
; CHECK: call{{.*}}@strlen{{.*}} #[[ATTR]]
; CHECK: call{{.*}}@strnlen{{.*}} #[[ATTR]]
; attributes #[[ATTR]] = { nobuiltin }
define void @f1(i8* %a, i8* %b) nounwind uwtable {
tail call i8* @memchr(i8* %a, i32 1, i64 12)
tail call i32 @memcmp(i8* %a, i8* %b, i64 12)
tail call i32 @strcmp(i8* %a, i8* %b)
tail call i8* @strcpy(i8* %a, i8* %b)
tail call i8* @stpcpy(i8* %a, i8* %b)
tail call i64 @strlen(i8* %a)
tail call i64 @strnlen(i8* %a, i64 12)
ret void
}

View File

@ -0,0 +1,33 @@
; Test marking string functions as nobuiltin in memory sanitizer.
;
; RUN: opt < %s -msan -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-unknown-linux-gnu"
declare i8* @memchr(i8* %a, i32 %b, i64 %c)
declare i32 @memcmp(i8* %a, i8* %b, i64 %c)
declare i32 @strcmp(i8* %a, i8* %b)
declare i8* @strcpy(i8* %a, i8* %b)
declare i8* @stpcpy(i8* %a, i8* %b)
declare i64 @strlen(i8* %a)
declare i64 @strnlen(i8* %a, i64 %b)
; CHECK: call{{.*}}@memchr{{.*}} #[[ATTR:[0-9]+]]
; CHECK: call{{.*}}@memcmp{{.*}} #[[ATTR]]
; CHECK: call{{.*}}@strcmp{{.*}} #[[ATTR]]
; CHECK: call{{.*}}@strcpy{{.*}} #[[ATTR]]
; CHECK: call{{.*}}@stpcpy{{.*}} #[[ATTR]]
; CHECK: call{{.*}}@strlen{{.*}} #[[ATTR]]
; CHECK: call{{.*}}@strnlen{{.*}} #[[ATTR]]
; attributes #[[ATTR]] = { nobuiltin }
define void @f1(i8* %a, i8* %b) nounwind uwtable sanitize_memory {
tail call i8* @memchr(i8* %a, i32 1, i64 12)
tail call i32 @memcmp(i8* %a, i8* %b, i64 12)
tail call i32 @strcmp(i8* %a, i8* %b)
tail call i8* @strcpy(i8* %a, i8* %b)
tail call i8* @stpcpy(i8* %a, i8* %b)
tail call i64 @strlen(i8* %a)
tail call i64 @strnlen(i8* %a, i64 12)
ret void
}

View File

@ -0,0 +1,33 @@
; Test marking string functions as nobuiltin in thread sanitizer.
;
; RUN: opt < %s -tsan -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-unknown-linux-gnu"
declare i8* @memchr(i8* %a, i32 %b, i64 %c)
declare i32 @memcmp(i8* %a, i8* %b, i64 %c)
declare i32 @strcmp(i8* %a, i8* %b)
declare i8* @strcpy(i8* %a, i8* %b)
declare i8* @stpcpy(i8* %a, i8* %b)
declare i64 @strlen(i8* %a)
declare i64 @strnlen(i8* %a, i64 %b)
; CHECK: call{{.*}}@memchr{{.*}} #[[ATTR:[0-9]+]]
; CHECK: call{{.*}}@memcmp{{.*}} #[[ATTR]]
; CHECK: call{{.*}}@strcmp{{.*}} #[[ATTR]]
; CHECK: call{{.*}}@strcpy{{.*}} #[[ATTR]]
; CHECK: call{{.*}}@stpcpy{{.*}} #[[ATTR]]
; CHECK: call{{.*}}@strlen{{.*}} #[[ATTR]]
; CHECK: call{{.*}}@strnlen{{.*}} #[[ATTR]]
; attributes #[[ATTR]] = { nobuiltin }
define void @f1(i8* %a, i8* %b) nounwind uwtable sanitize_thread {
tail call i8* @memchr(i8* %a, i32 1, i64 12)
tail call i32 @memcmp(i8* %a, i8* %b, i64 12)
tail call i32 @strcmp(i8* %a, i8* %b)
tail call i8* @strcpy(i8* %a, i8* %b)
tail call i8* @stpcpy(i8* %a, i8* %b)
tail call i64 @strlen(i8* %a)
tail call i64 @strnlen(i8* %a, i64 12)
ret void
}