Convert calls to __sinpi and __cospi into __sincospi_stret

This adds an SimplifyLibCalls case which converts the special __sinpi and __cospi (float & double variants) into a __sincospi_stret where appropriate to remove duplicated work. Patch by Tim Northover git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193943 91177308-0d34-0410-b5e6-96231b3b80d8
2025-02-19 20:43:52 +00:00 · 2013-11-03 06:48:38 +00:00 · 2013-11-03 06:48:38 +00:00 · 208130f113
commit 208130f113
parent cb01efb798
4 changed files with 292 additions and 0 deletions
--- a/include/llvm/Target/TargetLibraryInfo.h
+++ b/include/llvm/Target/TargetLibraryInfo.h
@ -46,6 +46,10 @@ namespace llvm {
      Znwm,
      /// void *new(unsigned long, nothrow);
      ZnwmRKSt9nothrow_t,
      /// double __cospi(double x);
      cospi,
      /// float __cospif(float x);
      cospif,
      /// int __cxa_atexit(void (*f)(void *), void *p, void *d);
      cxa_atexit,
      /// void __cxa_guard_abort(guard_t *guard);
@ -61,6 +65,14 @@ namespace llvm {
      dunder_isoc99_sscanf,
      /// void *__memcpy_chk(void *s1, const void *s2, size_t n, size_t s1size);
      memcpy_chk,
      /// double __sincospi_stret(double x);
      sincospi_stret,
      /// float __sincospi_stretf(float x);
      sincospi_stretf,
      /// double __sinpi(double x);
      sinpi,
      /// float __sinpif(float x);
      sinpif,
      /// double __sqrt_finite(double x);
      sqrt_finite,
      /// float __sqrt_finite(float x);
--- a/lib/Target/TargetLibraryInfo.cpp
+++ b/lib/Target/TargetLibraryInfo.cpp
@ -38,6 +38,8 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
    "_ZnwjRKSt9nothrow_t",
    "_Znwm",
    "_ZnwmRKSt9nothrow_t",
    "__cospi",
    "__cospif",
    "__cxa_atexit",
    "__cxa_guard_abort",
    "__cxa_guard_acquire",
@ -45,6 +47,10 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
    "__isoc99_scanf",
    "__isoc99_sscanf",
    "__memcpy_chk",
    "__sincospi_stret",
    "__sincospi_stretf",
    "__sinpi",
    "__sinpif",
    "__sqrt_finite",
    "__sqrtf_finite",
    "__sqrtl_finite",
@ -331,6 +337,24 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
    "write"
  };
 static bool hasSinCosPiStret(const Triple &T) {
  // Only Darwin variants have _stret versions of combined trig functions.
  if (!T.isMacOSX() && T.getOS() != Triple::IOS)
    return false;
  // The ABI is rather complicated on x86, so don't do anything special there.
  if (T.getArch() == Triple::x86)
    return false;
  if (T.isMacOSX() && T.isMacOSXVersionLT(10, 9))
    return false;
  if (T.getOS() == Triple::IOS && T.isOSVersionLT(7, 0))
    return false;
  return true;
 }
 /// initialize - Initialize the set of available library functions based on the
 /// specified target triple.  This should be carefully written so that a missing
 /// target triple gets a sane set of defaults.
@ -357,6 +381,15 @@ static void initialize(TargetLibraryInfo &TLI, const Triple &T,
    TLI.setUnavailable(LibFunc::memset_pattern16);
  }
  if (!hasSinCosPiStret(T)) {
    TLI.setUnavailable(LibFunc::sinpi);
    TLI.setUnavailable(LibFunc::sinpif);
    TLI.setUnavailable(LibFunc::cospi);
    TLI.setUnavailable(LibFunc::cospif);
    TLI.setUnavailable(LibFunc::sincospi_stret);
    TLI.setUnavailable(LibFunc::sincospi_stretf);
  }
  if (T.isMacOSX() && T.getArch() == Triple::x86 &&
      !T.isMacOSXVersionLT(10, 7)) {
    // x86-32 OSX has a scheme where fwrite and fputs (and some other functions
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@ -17,6 +17,7 @@
 #include "llvm/Transforms/Utils/SimplifyLibCalls.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Function.h"
@ -1252,6 +1253,155 @@ struct Exp2Opt : public UnsafeFPLibCallOptimization {
  }
 };
 struct SinCosPiOpt : public LibCallOptimization {
  SinCosPiOpt() {}
  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
    // Make sure the prototype is as expected, otherwise the rest of the
    // function is probably invalid and likely to abort.
    if (!isTrigLibCall(CI))
      return 0;
    Value *Arg = CI->getArgOperand(0);
    SmallVector<CallInst *, 1> SinCalls;
    SmallVector<CallInst *, 1> CosCalls;
    SmallVector<CallInst *, 1> SinCosCalls;
    bool IsFloat = Arg->getType()->isFloatTy();
    // Look for all compatible sinpi, cospi and sincospi calls with the same
    // argument. If there are enough (in some sense) we can make the
    // substitution.
    for (Value::use_iterator UI = Arg->use_begin(), UE = Arg->use_end();
         UI != UE; ++UI)
      classifyArgUse(*UI, CI->getParent(), IsFloat, SinCalls, CosCalls,
                     SinCosCalls);
    // It's only worthwhile if both sinpi and cospi are actually used.
    if (SinCosCalls.empty() && (SinCalls.empty() || CosCalls.empty()))
      return 0;
    Value *Sin, *Cos, *SinCos;
    insertSinCosCall(B, CI->getCalledFunction(), Arg, IsFloat, Sin, Cos,
                     SinCos);
    replaceTrigInsts(SinCalls, Sin);
    replaceTrigInsts(CosCalls, Cos);
    replaceTrigInsts(SinCosCalls, SinCos);
    return 0;
  }
  bool isTrigLibCall(CallInst *CI) {
    Function *Callee = CI->getCalledFunction();
    FunctionType *FT = Callee->getFunctionType();
    // We can only hope to do anything useful if we can ignore things like errno
    // and floating-point exceptions.
    bool AttributesSafe = CI->hasFnAttr(Attribute::NoUnwind) &&
                          CI->hasFnAttr(Attribute::ReadNone);
    // Other than that we need float(float) or double(double)
    return AttributesSafe && FT->getNumParams() == 1 &&
           FT->getReturnType() == FT->getParamType(0) &&
           (FT->getParamType(0)->isFloatTy() ||
            FT->getParamType(0)->isDoubleTy());
  }
  void classifyArgUse(Value *Val, BasicBlock *BB, bool IsFloat,
                      SmallVectorImpl<CallInst *> &SinCalls,
                      SmallVectorImpl<CallInst *> &CosCalls,
                      SmallVectorImpl<CallInst *> &SinCosCalls) {
    CallInst *CI = dyn_cast<CallInst>(Val);
    if (!CI)
      return;
    Function *Callee = CI->getCalledFunction();
    StringRef FuncName = Callee->getName();
    LibFunc::Func Func;
    if (!TLI->getLibFunc(FuncName, Func) || !TLI->has(Func) ||
        !isTrigLibCall(CI))
      return;
    if (IsFloat) {
      if (Func == LibFunc::sinpif)
        SinCalls.push_back(CI);
      else if (Func == LibFunc::cospif)
        CosCalls.push_back(CI);
      else if (Func == LibFunc::sincospi_stretf)
        SinCosCalls.push_back(CI);
    } else {
      if (Func == LibFunc::sinpi)
        SinCalls.push_back(CI);
      else if (Func == LibFunc::cospi)
        CosCalls.push_back(CI);
      else if (Func == LibFunc::sincospi_stret)
        SinCosCalls.push_back(CI);
    }
  }
  void replaceTrigInsts(SmallVectorImpl<CallInst*> &Calls, Value *Res) {
    for (SmallVectorImpl<CallInst*>::iterator I = Calls.begin(),
           E = Calls.end();
         I != E; ++I) {
      LCS->replaceAllUsesWith(*I, Res);
    }
  }
  void insertSinCosCall(IRBuilder<> &B, Function *OrigCallee, Value *Arg,
                        bool UseFloat, Value *&Sin, Value *&Cos,
                        Value *&SinCos) {
    Type *ArgTy = Arg->getType();
    Type *ResTy;
    StringRef Name;
    Triple T(OrigCallee->getParent()->getTargetTriple());
    if (UseFloat) {
      Name = "__sincospi_stretf";
      assert(T.getArch() != Triple::x86 && "x86 messy and unsupported for now");
      // x86_64 can't use {float, float} since that would be returned in both
      // xmm0 and xmm1, which isn't what a real struct would do.
      ResTy = T.getArch() == Triple::x86_64
                  ? static_cast<Type *>(VectorType::get(ArgTy, 2))
                  : static_cast<Type *>(StructType::get(ArgTy, ArgTy, NULL));
    } else {
      Name = "__sincospi_stret";
      ResTy = StructType::get(ArgTy, ArgTy, NULL);
    }
    Module *M = OrigCallee->getParent();
    Value *Callee = M->getOrInsertFunction(Name, OrigCallee->getAttributes(),
                                           ResTy, ArgTy, NULL);
    if (Instruction *ArgInst = dyn_cast<Instruction>(Arg)) {
      // If the argument is an instruction, it must dominate all uses so put our
      // sincos call there.
      BasicBlock::iterator Loc = ArgInst;
      B.SetInsertPoint(ArgInst->getParent(), ++Loc);
    } else {
      // Otherwise (e.g. for a constant) the beginning of the function is as
      // good a place as any.
      BasicBlock &EntryBB = B.GetInsertBlock()->getParent()->getEntryBlock();
      B.SetInsertPoint(&EntryBB, EntryBB.begin());
    }
    SinCos = B.CreateCall(Callee, Arg, "sincospi");
    if (SinCos->getType()->isStructTy()) {
      Sin = B.CreateExtractValue(SinCos, 0, "sinpi");
      Cos = B.CreateExtractValue(SinCos, 1, "cospi");
    } else {
      Sin = B.CreateExtractElement(SinCos, ConstantInt::get(B.getInt32Ty(), 0),
                                   "sinpi");
      Cos = B.CreateExtractElement(SinCos, ConstantInt::get(B.getInt32Ty(), 1),
                                   "cospi");
    }
  }
 };
 //===----------------------------------------------------------------------===//
 // Integer Library Call Optimizations
 //===----------------------------------------------------------------------===//
@ -1764,6 +1914,7 @@ static MemSetOpt MemSet;
 // Math library call optimizations.
 static UnaryDoubleFPOpt UnaryDoubleFP(false);
 static UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true);
 static SinCosPiOpt SinCosPi;
  // Integer library call optimizations.
 static FFSOpt FFS;
@ -1848,6 +1999,11 @@ LibCallOptimization *LibCallSimplifierImpl::lookupOptimization(CallInst *CI) {
      case LibFunc::cos:
      case LibFunc::cosl:
        return &Cos;
      case LibFunc::sinpif:
      case LibFunc::sinpi:
      case LibFunc::cospif:
      case LibFunc::cospi:
        return &SinCosPi;
      case LibFunc::powf:
      case LibFunc::pow:
      case LibFunc::powl:
--- a/test/Transforms/InstCombine/sincospi.ll
+++ b/test/Transforms/InstCombine/sincospi.ll
@ -0,0 +1,91 @@
 ; RUN: opt -instcombine -S < %s -mtriple=x86_64-apple-macosx10.9 | FileCheck %s --check-prefix=CHECK-FLOAT-IN-VEC
 ; RUN: opt -instcombine -S < %s -mtriple=arm-apple-ios7.0 | FileCheck %s
 ; RUN: opt -instcombine -S < %s -mtriple=x86_64-apple-macosx10.8 | FileCheck %s --check-prefix=CHECK-NO-SINCOS
 ; RUN: opt -instcombine -S < %s -mtriple=arm-apple-ios6.0 | FileCheck %s --check-prefix=CHECK-NO-SINCOS
 ; RUN: opt -instcombine -S < %s -mtriple=x86_64-none-linux-gnu | FileCheck %s --check-prefix=CHECK-NO-SINCOS
 attributes #0 = { readnone nounwind }
 declare float @__sinpif(float %x) #0
 declare float @__cospif(float %x) #0 
 declare double @__sinpi(double %x) #0
 declare double @__cospi(double %x) #0 
@var32 = global float 0.0
@var64 = global double 0.0
 define float @test_instbased_f32() {
       %val = load float* @var32
       %sin = call float @__sinpif(float %val) #0
       %cos = call float @__cospif(float %val) #0
       %res = fadd float %sin, %cos
       ret float %res
 ; CHECK-FLOAT-IN-VEC: [[VAL:%[a-z0-9]+]] = load float* @var32
 ; CHECK-FLOAT-IN-VEC: [[SINCOS:%[a-z0-9]+]] = call <2 x float> @__sincospi_stretf(float [[VAL]])
 ; CHECK-FLOAT-IN-VEC: extractelement <2 x float> [[SINCOS]], i32 0
 ; CHECK-FLOAT-IN-VEC: extractelement <2 x float> [[SINCOS]], i32 1
 ; CHECK: [[VAL:%[a-z0-9]+]] = load float* @var32
 ; CHECK: [[SINCOS:%[a-z0-9]+]] = call { float, float } @__sincospi_stretf(float [[VAL]])
 ; CHECK: extractvalue { float, float } [[SINCOS]], 0
 ; CHECK: extractvalue { float, float } [[SINCOS]], 1
 ; CHECK-NO-SINCOS: call float @__sinpif
 ; CHECK-NO-SINCOS: call float @__cospif
 }
 define float @test_constant_f32() {
       %sin = call float @__sinpif(float 1.0) #0
       %cos = call float @__cospif(float 1.0) #0
       %res = fadd float %sin, %cos
       ret float %res
 ; CHECK-FLOAT-IN-VEC: [[SINCOS:%[a-z0-9]+]] = call <2 x float> @__sincospi_stretf(float 1.000000e+00)
 ; CHECK-FLOAT-IN-VEC: extractelement <2 x float> [[SINCOS]], i32 0
 ; CHECK-FLOAT-IN-VEC: extractelement <2 x float> [[SINCOS]], i32 1
 ; CHECK: [[SINCOS:%[a-z0-9]+]] = call { float, float } @__sincospi_stretf(float 1.000000e+00)
 ; CHECK: extractvalue { float, float } [[SINCOS]], 0
 ; CHECK: extractvalue { float, float } [[SINCOS]], 1
 ; CHECK-NO-SINCOS: call float @__sinpif
 ; CHECK-NO-SINCOS: call float @__cospif
 }
 define double @test_instbased_f64() {
       %val = load double* @var64
       %sin = call double @__sinpi(double %val) #0
       %cos = call double @__cospi(double %val) #0
       %res = fadd double %sin, %cos
       ret double %res
 ; CHECK-FLOAT-IN-VEC: [[VAL:%[a-z0-9]+]] = load double* @var64
 ; CHECK-FLOAT-IN-VEC: [[SINCOS:%[a-z0-9]+]] = call { double, double } @__sincospi_stret(double [[VAL]])
 ; CHECK-FLOAT-IN-VEC: extractvalue { double, double } [[SINCOS]], 0
 ; CHECK-FLOAT-IN-VEC: extractvalue { double, double } [[SINCOS]], 1
 ; CHECK: [[VAL:%[a-z0-9]+]] = load double* @var64
 ; CHECK: [[SINCOS:%[a-z0-9]+]] = call { double, double } @__sincospi_stret(double [[VAL]])
 ; CHECK: extractvalue { double, double } [[SINCOS]], 0
 ; CHECK: extractvalue { double, double } [[SINCOS]], 1
 ; CHECK-NO-SINCOS: call double @__sinpi
 ; CHECK-NO-SINCOS: call double @__cospi
 }
 define double @test_constant_f64() {
       %sin = call double @__sinpi(double 1.0) #0
       %cos = call double @__cospi(double 1.0) #0
       %res = fadd double %sin, %cos
       ret double %res
 ; CHECK-FLOAT-IN-VEC: [[SINCOS:%[a-z0-9]+]] = call { double, double } @__sincospi_stret(double 1.000000e+00)
 ; CHECK-FLOAT-IN-VEC: extractvalue { double, double } [[SINCOS]], 0
 ; CHECK-FLOAT-IN-VEC: extractvalue { double, double } [[SINCOS]], 1
 ; CHECK: [[SINCOS:%[a-z0-9]+]] = call { double, double } @__sincospi_stret(double 1.000000e+00)
 ; CHECK: extractvalue { double, double } [[SINCOS]], 0
 ; CHECK: extractvalue { double, double } [[SINCOS]], 1
 ; CHECK-NO-SINCOS: call double @__sinpi
 ; CHECK-NO-SINCOS: call double @__cospi
 }