mirror of
https://github.com/RPCSX/llvm.git
synced 2024-11-29 22:50:47 +00:00
move an optimization for memcmp out of simplifylibcalls and into
SDISel. This optimization was causing simplifylibcalls to introduce type-unsafe nastiness. This is the first step, I'll be expanding the memcmp optimizations shortly, covering things that we really really wouldn't want simplifylibcalls to do. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@92098 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
465e2b950d
commit
8047d9a6be
@ -17,6 +17,7 @@
|
||||
#include "llvm/ADT/BitVector.h"
|
||||
#include "llvm/ADT/SmallSet.h"
|
||||
#include "llvm/Analysis/AliasAnalysis.h"
|
||||
#include "llvm/Analysis/ConstantFolding.h"
|
||||
#include "llvm/Constants.h"
|
||||
#include "llvm/CallingConv.h"
|
||||
#include "llvm/DerivedTypes.h"
|
||||
@ -5075,6 +5076,105 @@ void SelectionDAGBuilder::LowerCallTo(CallSite CS, SDValue Callee,
|
||||
}
|
||||
}
|
||||
|
||||
/// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the
|
||||
/// value is equal or not-equal to zero.
|
||||
static bool IsOnlyUsedInZeroEqualityComparison(Value *V) {
|
||||
for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
|
||||
UI != E; ++UI) {
|
||||
if (ICmpInst *IC = dyn_cast<ICmpInst>(*UI))
|
||||
if (IC->isEquality())
|
||||
if (Constant *C = dyn_cast<Constant>(IC->getOperand(1)))
|
||||
if (C->isNullValue())
|
||||
continue;
|
||||
// Unknown instruction.
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static SDValue getMemCmpLoad(Value *PtrVal, unsigned Size,
|
||||
SelectionDAGBuilder &Builder) {
|
||||
MVT LoadVT;
|
||||
const Type *LoadTy;
|
||||
if (Size == 2) {
|
||||
LoadVT = MVT::i16;
|
||||
LoadTy = Type::getInt16Ty(PtrVal->getContext());
|
||||
} else {
|
||||
LoadVT = MVT::i32;
|
||||
LoadTy = Type::getInt32Ty(PtrVal->getContext());
|
||||
}
|
||||
|
||||
// Check to see if this load can be trivially constant folded, e.g. if the
|
||||
// input is from a string literal.
|
||||
if (Constant *LoadInput = dyn_cast<Constant>(PtrVal)) {
|
||||
// Cast pointer to the type we really want to load.
|
||||
LoadInput = ConstantExpr::getBitCast(LoadInput,
|
||||
PointerType::getUnqual(LoadTy));
|
||||
|
||||
if (Constant *LoadCst = ConstantFoldLoadFromConstPtr(LoadInput, Builder.TD))
|
||||
return Builder.getValue(LoadCst);
|
||||
}
|
||||
|
||||
// Otherwise, we have to emit the load. If the pointer is to unfoldable but
|
||||
// still constant memory, the input chain can be the entry node.
|
||||
SDValue Root;
|
||||
bool ConstantMemory = false;
|
||||
|
||||
// Do not serialize (non-volatile) loads of constant memory with anything.
|
||||
if (Builder.AA->pointsToConstantMemory(PtrVal)) {
|
||||
Root = Builder.DAG.getEntryNode();
|
||||
ConstantMemory = true;
|
||||
} else {
|
||||
// Do not serialize non-volatile loads against each other.
|
||||
Root = Builder.DAG.getRoot();
|
||||
}
|
||||
|
||||
SDValue Ptr = Builder.getValue(PtrVal);
|
||||
SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurDebugLoc(), Root,
|
||||
Ptr, PtrVal /*SrcValue*/, 0/*SVOffset*/,
|
||||
false /*volatile*/, 1 /* align=1 */);
|
||||
|
||||
if (!ConstantMemory)
|
||||
Builder.PendingLoads.push_back(LoadVal.getValue(1));
|
||||
return LoadVal;
|
||||
}
|
||||
|
||||
|
||||
/// visitMemCmpCall - See if we can lower a call to memcmp in an optimized form.
|
||||
/// If so, return true and lower it, otherwise return false and it will be
|
||||
/// lowered like a normal call.
|
||||
bool SelectionDAGBuilder::visitMemCmpCall(CallInst &I) {
|
||||
// Verify that the prototype makes sense. int memcmp(void*,void*,size_t)
|
||||
if (I.getNumOperands() != 4)
|
||||
return false;
|
||||
|
||||
Value *LHS = I.getOperand(1), *RHS = I.getOperand(2);
|
||||
if (!isa<PointerType>(LHS->getType()) || !isa<PointerType>(RHS->getType()) ||
|
||||
!isa<IntegerType>(I.getOperand(3)->getType()) ||
|
||||
!isa<IntegerType>(I.getType()))
|
||||
return false;
|
||||
|
||||
ConstantInt *Size = dyn_cast<ConstantInt>(I.getOperand(3));
|
||||
|
||||
// memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS) != 0
|
||||
// memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS) != 0
|
||||
if (Size && (Size->getValue() == 2 || Size->getValue() == 4) &&
|
||||
IsOnlyUsedInZeroEqualityComparison(&I)) {
|
||||
SDValue LHSVal = getMemCmpLoad(LHS, Size->getZExtValue(), *this);
|
||||
SDValue RHSVal = getMemCmpLoad(RHS, Size->getZExtValue(), *this);
|
||||
|
||||
SDValue Res = DAG.getSetCC(getCurDebugLoc(), MVT::i1, LHSVal, RHSVal,
|
||||
ISD::SETNE);
|
||||
EVT CallVT = TLI.getValueType(I.getType(), true);
|
||||
setValue(&I, DAG.getZExtOrTrunc(Res, getCurDebugLoc(), CallVT));
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
void SelectionDAGBuilder::visitCall(CallInst &I) {
|
||||
const char *RenameFn = 0;
|
||||
if (Function *F = I.getCalledFunction()) {
|
||||
@ -5148,6 +5248,9 @@ void SelectionDAGBuilder::visitCall(CallInst &I) {
|
||||
Tmp.getValueType(), Tmp));
|
||||
return;
|
||||
}
|
||||
} else if (Name == "memcmp") {
|
||||
if (visitMemCmpCall(I))
|
||||
return;
|
||||
}
|
||||
}
|
||||
} else if (isa<InlineAsm>(I.getOperand(0))) {
|
||||
|
@ -91,11 +91,13 @@ class SelectionDAGBuilder {
|
||||
|
||||
DenseMap<const Value*, SDValue> NodeMap;
|
||||
|
||||
public:
|
||||
/// PendingLoads - Loads are not emitted to the program immediately. We bunch
|
||||
/// them up and then emit token factor nodes when possible. This allows us to
|
||||
/// get simple disambiguation between loads without worrying about alias
|
||||
/// analysis.
|
||||
SmallVector<SDValue, 8> PendingLoads;
|
||||
private:
|
||||
|
||||
/// PendingExports - CopyToReg nodes that copy values to virtual registers
|
||||
/// for export to other blocks need to be emitted before any terminator
|
||||
@ -461,6 +463,8 @@ private:
|
||||
void visitStore(StoreInst &I);
|
||||
void visitPHI(PHINode &I) { } // PHI nodes are handled specially.
|
||||
void visitCall(CallInst &I);
|
||||
bool visitMemCmpCall(CallInst &I);
|
||||
|
||||
void visitInlineAsm(CallSite CS);
|
||||
const char *visitIntrinsicCall(CallInst &I, unsigned Intrinsic);
|
||||
void visitTargetIntrinsic(CallInst &I, unsigned Intrinsic);
|
||||
|
@ -1011,19 +1011,6 @@ struct MemCmpOpt : public LibCallOptimization {
|
||||
return B.CreateSExt(B.CreateSub(LHSV, RHSV, "chardiff"), CI->getType());
|
||||
}
|
||||
|
||||
// memcmp(S1,S2,2) != 0 -> (*(short*)LHS ^ *(short*)RHS) != 0
|
||||
// memcmp(S1,S2,4) != 0 -> (*(int*)LHS ^ *(int*)RHS) != 0
|
||||
if ((Len == 2 || Len == 4) && IsOnlyUsedInZeroEqualityComparison(CI)) {
|
||||
const Type *PTy = PointerType::getUnqual(Len == 2 ?
|
||||
Type::getInt16Ty(*Context) : Type::getInt32Ty(*Context));
|
||||
LHS = B.CreateBitCast(LHS, PTy, "tmp");
|
||||
RHS = B.CreateBitCast(RHS, PTy, "tmp");
|
||||
LoadInst *LHSV = B.CreateLoad(LHS, "lhsv");
|
||||
LoadInst *RHSV = B.CreateLoad(RHS, "rhsv");
|
||||
LHSV->setAlignment(1); RHSV->setAlignment(1); // Unaligned loads.
|
||||
return B.CreateZExt(B.CreateXor(LHSV, RHSV, "shortdiff"), CI->getType());
|
||||
}
|
||||
|
||||
// Constant folding: memcmp(x, y, l) -> cnst (all arguments are constant)
|
||||
std::string LHSStr, RHSStr;
|
||||
if (GetConstantStringInfo(LHS, LHSStr) &&
|
||||
|
76
test/CodeGen/X86/memcmp.ll
Normal file
76
test/CodeGen/X86/memcmp.ll
Normal file
@ -0,0 +1,76 @@
|
||||
; RUN: llc %s -o - -march=x86-64 | FileCheck %s
|
||||
|
||||
; This tests codegen time inlining/optimization of memcmp
|
||||
; rdar://6480398
|
||||
|
||||
@.str = private constant [6 x i8] c"fooxx\00", align 1 ; <[6 x i8]*> [#uses=1]
|
||||
|
||||
declare i32 @memcmp(...)
|
||||
|
||||
define void @memcmp2(i8* %X, i8* %Y, i32* nocapture %P) nounwind {
|
||||
entry:
|
||||
%0 = tail call i32 (...)* @memcmp(i8* %X, i8* %Y, i32 2) nounwind ; <i32> [#uses=1]
|
||||
%1 = icmp eq i32 %0, 0 ; <i1> [#uses=1]
|
||||
br i1 %1, label %return, label %bb
|
||||
|
||||
bb: ; preds = %entry
|
||||
store i32 4, i32* %P, align 4
|
||||
ret void
|
||||
|
||||
return: ; preds = %entry
|
||||
ret void
|
||||
; CHECK: memcmp2:
|
||||
; CHECK: movw (%rsi), %ax
|
||||
; CHECK: cmpw %ax, (%rdi)
|
||||
}
|
||||
|
||||
define void @memcmp2a(i8* %X, i32* nocapture %P) nounwind {
|
||||
entry:
|
||||
%0 = tail call i32 (...)* @memcmp(i8* %X, i8* getelementptr inbounds ([6 x i8]* @.str, i32 0, i32 1), i32 2) nounwind ; <i32> [#uses=1]
|
||||
%1 = icmp eq i32 %0, 0 ; <i1> [#uses=1]
|
||||
br i1 %1, label %return, label %bb
|
||||
|
||||
bb: ; preds = %entry
|
||||
store i32 4, i32* %P, align 4
|
||||
ret void
|
||||
|
||||
return: ; preds = %entry
|
||||
ret void
|
||||
; CHECK: memcmp2a:
|
||||
; CHECK: cmpw $28527, (%rdi)
|
||||
}
|
||||
|
||||
|
||||
define void @memcmp4(i8* %X, i8* %Y, i32* nocapture %P) nounwind {
|
||||
entry:
|
||||
%0 = tail call i32 (...)* @memcmp(i8* %X, i8* %Y, i32 4) nounwind ; <i32> [#uses=1]
|
||||
%1 = icmp eq i32 %0, 0 ; <i1> [#uses=1]
|
||||
br i1 %1, label %return, label %bb
|
||||
|
||||
bb: ; preds = %entry
|
||||
store i32 4, i32* %P, align 4
|
||||
ret void
|
||||
|
||||
return: ; preds = %entry
|
||||
ret void
|
||||
; CHECK: memcmp4:
|
||||
; CHECK: movl (%rsi), %eax
|
||||
; CHECK: cmpl %eax, (%rdi)
|
||||
}
|
||||
|
||||
define void @memcmp4a(i8* %X, i32* nocapture %P) nounwind {
|
||||
entry:
|
||||
%0 = tail call i32 (...)* @memcmp(i8* %X, i8* getelementptr inbounds ([6 x i8]* @.str, i32 0, i32 1), i32 4) nounwind ; <i32> [#uses=1]
|
||||
%1 = icmp eq i32 %0, 0 ; <i1> [#uses=1]
|
||||
br i1 %1, label %return, label %bb
|
||||
|
||||
bb: ; preds = %entry
|
||||
store i32 4, i32* %P, align 4
|
||||
ret void
|
||||
|
||||
return: ; preds = %entry
|
||||
ret void
|
||||
; CHECK: memcmp4a:
|
||||
; CHECK: cmpl $2021158767, (%rdi)
|
||||
}
|
||||
|
@ -14,9 +14,6 @@ define void @test(i8* %P, i8* %Q, i32 %N, i32* %IP, i1* %BP) {
|
||||
volatile store i32 %B, i32* %IP
|
||||
%C = call i32 @memcmp( i8* %P, i8* %Q, i32 1 ) ; <i32> [#uses=1]
|
||||
volatile store i32 %C, i32* %IP
|
||||
%D = call i32 @memcmp( i8* %P, i8* %Q, i32 2 ) ; <i32> [#uses=1]
|
||||
%E = icmp eq i32 %D, 0 ; <i1> [#uses=1]
|
||||
volatile store i1 %E, i1* %BP
|
||||
%F = call i32 @memcmp(i8* getelementptr ([4 x i8]* @hel, i32 0, i32 0),
|
||||
i8* getelementptr ([8 x i8]* @hello_u, i32 0, i32 0),
|
||||
i32 3)
|
||||
|
Loading…
Reference in New Issue
Block a user