Convert calls to __sinpi and __cospi into __sincospi_stret

This adds an SimplifyLibCalls case which converts the special __sinpi and
__cospi (float & double variants) into a __sincospi_stret where appropriate to
remove duplicated work.

Patch by Tim Northover

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193943 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Bob Wilson 2013-11-03 06:48:38 +00:00
parent cb01efb798
commit 208130f113
4 changed files with 292 additions and 0 deletions

View File

@ -46,6 +46,10 @@ namespace llvm {
Znwm,
/// void *new(unsigned long, nothrow);
ZnwmRKSt9nothrow_t,
/// double __cospi(double x);
cospi,
/// float __cospif(float x);
cospif,
/// int __cxa_atexit(void (*f)(void *), void *p, void *d);
cxa_atexit,
/// void __cxa_guard_abort(guard_t *guard);
@ -61,6 +65,14 @@ namespace llvm {
dunder_isoc99_sscanf,
/// void *__memcpy_chk(void *s1, const void *s2, size_t n, size_t s1size);
memcpy_chk,
/// double __sincospi_stret(double x);
sincospi_stret,
/// float __sincospi_stretf(float x);
sincospi_stretf,
/// double __sinpi(double x);
sinpi,
/// float __sinpif(float x);
sinpif,
/// double __sqrt_finite(double x);
sqrt_finite,
/// float __sqrt_finite(float x);

View File

@ -38,6 +38,8 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
"_ZnwjRKSt9nothrow_t",
"_Znwm",
"_ZnwmRKSt9nothrow_t",
"__cospi",
"__cospif",
"__cxa_atexit",
"__cxa_guard_abort",
"__cxa_guard_acquire",
@ -45,6 +47,10 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
"__isoc99_scanf",
"__isoc99_sscanf",
"__memcpy_chk",
"__sincospi_stret",
"__sincospi_stretf",
"__sinpi",
"__sinpif",
"__sqrt_finite",
"__sqrtf_finite",
"__sqrtl_finite",
@ -331,6 +337,24 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
"write"
};
static bool hasSinCosPiStret(const Triple &T) {
// Only Darwin variants have _stret versions of combined trig functions.
if (!T.isMacOSX() && T.getOS() != Triple::IOS)
return false;
// The ABI is rather complicated on x86, so don't do anything special there.
if (T.getArch() == Triple::x86)
return false;
if (T.isMacOSX() && T.isMacOSXVersionLT(10, 9))
return false;
if (T.getOS() == Triple::IOS && T.isOSVersionLT(7, 0))
return false;
return true;
}
/// initialize - Initialize the set of available library functions based on the
/// specified target triple. This should be carefully written so that a missing
/// target triple gets a sane set of defaults.
@ -357,6 +381,15 @@ static void initialize(TargetLibraryInfo &TLI, const Triple &T,
TLI.setUnavailable(LibFunc::memset_pattern16);
}
if (!hasSinCosPiStret(T)) {
TLI.setUnavailable(LibFunc::sinpi);
TLI.setUnavailable(LibFunc::sinpif);
TLI.setUnavailable(LibFunc::cospi);
TLI.setUnavailable(LibFunc::cospif);
TLI.setUnavailable(LibFunc::sincospi_stret);
TLI.setUnavailable(LibFunc::sincospi_stretf);
}
if (T.isMacOSX() && T.getArch() == Triple::x86 &&
!T.isMacOSXVersionLT(10, 7)) {
// x86-32 OSX has a scheme where fwrite and fputs (and some other functions

View File

@ -17,6 +17,7 @@
#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
@ -1252,6 +1253,155 @@ struct Exp2Opt : public UnsafeFPLibCallOptimization {
}
};
struct SinCosPiOpt : public LibCallOptimization {
SinCosPiOpt() {}
virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
// Make sure the prototype is as expected, otherwise the rest of the
// function is probably invalid and likely to abort.
if (!isTrigLibCall(CI))
return 0;
Value *Arg = CI->getArgOperand(0);
SmallVector<CallInst *, 1> SinCalls;
SmallVector<CallInst *, 1> CosCalls;
SmallVector<CallInst *, 1> SinCosCalls;
bool IsFloat = Arg->getType()->isFloatTy();
// Look for all compatible sinpi, cospi and sincospi calls with the same
// argument. If there are enough (in some sense) we can make the
// substitution.
for (Value::use_iterator UI = Arg->use_begin(), UE = Arg->use_end();
UI != UE; ++UI)
classifyArgUse(*UI, CI->getParent(), IsFloat, SinCalls, CosCalls,
SinCosCalls);
// It's only worthwhile if both sinpi and cospi are actually used.
if (SinCosCalls.empty() && (SinCalls.empty() || CosCalls.empty()))
return 0;
Value *Sin, *Cos, *SinCos;
insertSinCosCall(B, CI->getCalledFunction(), Arg, IsFloat, Sin, Cos,
SinCos);
replaceTrigInsts(SinCalls, Sin);
replaceTrigInsts(CosCalls, Cos);
replaceTrigInsts(SinCosCalls, SinCos);
return 0;
}
bool isTrigLibCall(CallInst *CI) {
Function *Callee = CI->getCalledFunction();
FunctionType *FT = Callee->getFunctionType();
// We can only hope to do anything useful if we can ignore things like errno
// and floating-point exceptions.
bool AttributesSafe = CI->hasFnAttr(Attribute::NoUnwind) &&
CI->hasFnAttr(Attribute::ReadNone);
// Other than that we need float(float) or double(double)
return AttributesSafe && FT->getNumParams() == 1 &&
FT->getReturnType() == FT->getParamType(0) &&
(FT->getParamType(0)->isFloatTy() ||
FT->getParamType(0)->isDoubleTy());
}
void classifyArgUse(Value *Val, BasicBlock *BB, bool IsFloat,
SmallVectorImpl<CallInst *> &SinCalls,
SmallVectorImpl<CallInst *> &CosCalls,
SmallVectorImpl<CallInst *> &SinCosCalls) {
CallInst *CI = dyn_cast<CallInst>(Val);
if (!CI)
return;
Function *Callee = CI->getCalledFunction();
StringRef FuncName = Callee->getName();
LibFunc::Func Func;
if (!TLI->getLibFunc(FuncName, Func) || !TLI->has(Func) ||
!isTrigLibCall(CI))
return;
if (IsFloat) {
if (Func == LibFunc::sinpif)
SinCalls.push_back(CI);
else if (Func == LibFunc::cospif)
CosCalls.push_back(CI);
else if (Func == LibFunc::sincospi_stretf)
SinCosCalls.push_back(CI);
} else {
if (Func == LibFunc::sinpi)
SinCalls.push_back(CI);
else if (Func == LibFunc::cospi)
CosCalls.push_back(CI);
else if (Func == LibFunc::sincospi_stret)
SinCosCalls.push_back(CI);
}
}
void replaceTrigInsts(SmallVectorImpl<CallInst*> &Calls, Value *Res) {
for (SmallVectorImpl<CallInst*>::iterator I = Calls.begin(),
E = Calls.end();
I != E; ++I) {
LCS->replaceAllUsesWith(*I, Res);
}
}
void insertSinCosCall(IRBuilder<> &B, Function *OrigCallee, Value *Arg,
bool UseFloat, Value *&Sin, Value *&Cos,
Value *&SinCos) {
Type *ArgTy = Arg->getType();
Type *ResTy;
StringRef Name;
Triple T(OrigCallee->getParent()->getTargetTriple());
if (UseFloat) {
Name = "__sincospi_stretf";
assert(T.getArch() != Triple::x86 && "x86 messy and unsupported for now");
// x86_64 can't use {float, float} since that would be returned in both
// xmm0 and xmm1, which isn't what a real struct would do.
ResTy = T.getArch() == Triple::x86_64
? static_cast<Type *>(VectorType::get(ArgTy, 2))
: static_cast<Type *>(StructType::get(ArgTy, ArgTy, NULL));
} else {
Name = "__sincospi_stret";
ResTy = StructType::get(ArgTy, ArgTy, NULL);
}
Module *M = OrigCallee->getParent();
Value *Callee = M->getOrInsertFunction(Name, OrigCallee->getAttributes(),
ResTy, ArgTy, NULL);
if (Instruction *ArgInst = dyn_cast<Instruction>(Arg)) {
// If the argument is an instruction, it must dominate all uses so put our
// sincos call there.
BasicBlock::iterator Loc = ArgInst;
B.SetInsertPoint(ArgInst->getParent(), ++Loc);
} else {
// Otherwise (e.g. for a constant) the beginning of the function is as
// good a place as any.
BasicBlock &EntryBB = B.GetInsertBlock()->getParent()->getEntryBlock();
B.SetInsertPoint(&EntryBB, EntryBB.begin());
}
SinCos = B.CreateCall(Callee, Arg, "sincospi");
if (SinCos->getType()->isStructTy()) {
Sin = B.CreateExtractValue(SinCos, 0, "sinpi");
Cos = B.CreateExtractValue(SinCos, 1, "cospi");
} else {
Sin = B.CreateExtractElement(SinCos, ConstantInt::get(B.getInt32Ty(), 0),
"sinpi");
Cos = B.CreateExtractElement(SinCos, ConstantInt::get(B.getInt32Ty(), 1),
"cospi");
}
}
};
//===----------------------------------------------------------------------===//
// Integer Library Call Optimizations
//===----------------------------------------------------------------------===//
@ -1764,6 +1914,7 @@ static MemSetOpt MemSet;
// Math library call optimizations.
static UnaryDoubleFPOpt UnaryDoubleFP(false);
static UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true);
static SinCosPiOpt SinCosPi;
// Integer library call optimizations.
static FFSOpt FFS;
@ -1848,6 +1999,11 @@ LibCallOptimization *LibCallSimplifierImpl::lookupOptimization(CallInst *CI) {
case LibFunc::cos:
case LibFunc::cosl:
return &Cos;
case LibFunc::sinpif:
case LibFunc::sinpi:
case LibFunc::cospif:
case LibFunc::cospi:
return &SinCosPi;
case LibFunc::powf:
case LibFunc::pow:
case LibFunc::powl:

View File

@ -0,0 +1,91 @@
; RUN: opt -instcombine -S < %s -mtriple=x86_64-apple-macosx10.9 | FileCheck %s --check-prefix=CHECK-FLOAT-IN-VEC
; RUN: opt -instcombine -S < %s -mtriple=arm-apple-ios7.0 | FileCheck %s
; RUN: opt -instcombine -S < %s -mtriple=x86_64-apple-macosx10.8 | FileCheck %s --check-prefix=CHECK-NO-SINCOS
; RUN: opt -instcombine -S < %s -mtriple=arm-apple-ios6.0 | FileCheck %s --check-prefix=CHECK-NO-SINCOS
; RUN: opt -instcombine -S < %s -mtriple=x86_64-none-linux-gnu | FileCheck %s --check-prefix=CHECK-NO-SINCOS
attributes #0 = { readnone nounwind }
declare float @__sinpif(float %x) #0
declare float @__cospif(float %x) #0
declare double @__sinpi(double %x) #0
declare double @__cospi(double %x) #0
@var32 = global float 0.0
@var64 = global double 0.0
define float @test_instbased_f32() {
%val = load float* @var32
%sin = call float @__sinpif(float %val) #0
%cos = call float @__cospif(float %val) #0
%res = fadd float %sin, %cos
ret float %res
; CHECK-FLOAT-IN-VEC: [[VAL:%[a-z0-9]+]] = load float* @var32
; CHECK-FLOAT-IN-VEC: [[SINCOS:%[a-z0-9]+]] = call <2 x float> @__sincospi_stretf(float [[VAL]])
; CHECK-FLOAT-IN-VEC: extractelement <2 x float> [[SINCOS]], i32 0
; CHECK-FLOAT-IN-VEC: extractelement <2 x float> [[SINCOS]], i32 1
; CHECK: [[VAL:%[a-z0-9]+]] = load float* @var32
; CHECK: [[SINCOS:%[a-z0-9]+]] = call { float, float } @__sincospi_stretf(float [[VAL]])
; CHECK: extractvalue { float, float } [[SINCOS]], 0
; CHECK: extractvalue { float, float } [[SINCOS]], 1
; CHECK-NO-SINCOS: call float @__sinpif
; CHECK-NO-SINCOS: call float @__cospif
}
define float @test_constant_f32() {
%sin = call float @__sinpif(float 1.0) #0
%cos = call float @__cospif(float 1.0) #0
%res = fadd float %sin, %cos
ret float %res
; CHECK-FLOAT-IN-VEC: [[SINCOS:%[a-z0-9]+]] = call <2 x float> @__sincospi_stretf(float 1.000000e+00)
; CHECK-FLOAT-IN-VEC: extractelement <2 x float> [[SINCOS]], i32 0
; CHECK-FLOAT-IN-VEC: extractelement <2 x float> [[SINCOS]], i32 1
; CHECK: [[SINCOS:%[a-z0-9]+]] = call { float, float } @__sincospi_stretf(float 1.000000e+00)
; CHECK: extractvalue { float, float } [[SINCOS]], 0
; CHECK: extractvalue { float, float } [[SINCOS]], 1
; CHECK-NO-SINCOS: call float @__sinpif
; CHECK-NO-SINCOS: call float @__cospif
}
define double @test_instbased_f64() {
%val = load double* @var64
%sin = call double @__sinpi(double %val) #0
%cos = call double @__cospi(double %val) #0
%res = fadd double %sin, %cos
ret double %res
; CHECK-FLOAT-IN-VEC: [[VAL:%[a-z0-9]+]] = load double* @var64
; CHECK-FLOAT-IN-VEC: [[SINCOS:%[a-z0-9]+]] = call { double, double } @__sincospi_stret(double [[VAL]])
; CHECK-FLOAT-IN-VEC: extractvalue { double, double } [[SINCOS]], 0
; CHECK-FLOAT-IN-VEC: extractvalue { double, double } [[SINCOS]], 1
; CHECK: [[VAL:%[a-z0-9]+]] = load double* @var64
; CHECK: [[SINCOS:%[a-z0-9]+]] = call { double, double } @__sincospi_stret(double [[VAL]])
; CHECK: extractvalue { double, double } [[SINCOS]], 0
; CHECK: extractvalue { double, double } [[SINCOS]], 1
; CHECK-NO-SINCOS: call double @__sinpi
; CHECK-NO-SINCOS: call double @__cospi
}
define double @test_constant_f64() {
%sin = call double @__sinpi(double 1.0) #0
%cos = call double @__cospi(double 1.0) #0
%res = fadd double %sin, %cos
ret double %res
; CHECK-FLOAT-IN-VEC: [[SINCOS:%[a-z0-9]+]] = call { double, double } @__sincospi_stret(double 1.000000e+00)
; CHECK-FLOAT-IN-VEC: extractvalue { double, double } [[SINCOS]], 0
; CHECK-FLOAT-IN-VEC: extractvalue { double, double } [[SINCOS]], 1
; CHECK: [[SINCOS:%[a-z0-9]+]] = call { double, double } @__sincospi_stret(double 1.000000e+00)
; CHECK: extractvalue { double, double } [[SINCOS]], 0
; CHECK: extractvalue { double, double } [[SINCOS]], 1
; CHECK-NO-SINCOS: call double @__sinpi
; CHECK-NO-SINCOS: call double @__cospi
}