Reapply r233175 and r233183: float2int.

This re-adds float2int to the tree, after fixing PR23038. It turns
out the argument to APSInt() is true-if-unsigned, rather than
true-if-signed :(. Added testcase and explanatory comment.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@233370 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
James Molloy 2015-03-27 10:36:57 +00:00
parent 9217916725
commit fb45b9fafc
10 changed files with 846 additions and 0 deletions

View File

@ -294,6 +294,7 @@ void initializeWinEHPreparePass(PassRegistry&);
void initializePlaceBackedgeSafepointsImplPass(PassRegistry&);
void initializePlaceSafepointsPass(PassRegistry&);
void initializeDwarfEHPreparePass(PassRegistry&);
void initializeFloat2IntPass(PassRegistry&);
}
#endif

View File

@ -169,6 +169,7 @@ namespace {
(void) llvm::createRewriteSymbolsPass();
(void) llvm::createStraightLineStrengthReducePass();
(void) llvm::createMemDerefPrinter();
(void) llvm::createFloat2IntPass();
(void)new llvm::IntervalPartition();
(void)new llvm::ScalarEvolution();

View File

@ -446,6 +446,12 @@ ModulePass *createPlaceSafepointsPass();
//
FunctionPass *createRewriteStatepointsForGCPass();
//===----------------------------------------------------------------------===//
//
// Float2Int - Demote floats to ints where possible.
//
FunctionPass *createFloat2IntPass();
} // End llvm namespace
#endif

View File

@ -59,6 +59,10 @@ static cl::opt<bool>
RunLoopRerolling("reroll-loops", cl::Hidden,
cl::desc("Run the loop rerolling pass"));
static cl::opt<bool>
RunFloat2Int("float-to-int", cl::Hidden, cl::init(true),
cl::desc("Run the float2int (float demotion) pass"));
static cl::opt<bool> RunLoadCombine("combine-loads", cl::init(false),
cl::Hidden,
cl::desc("Run the load combining pass"));
@ -307,6 +311,9 @@ void PassManagerBuilder::populateModulePassManager(
// we must insert a no-op module pass to reset the pass manager.
MPM.add(createBarrierNoopPass());
if (RunFloat2Int)
MPM.add(createFloat2IntPass());
// Re-rotate loops in all our loop nests. These may have fallout out of
// rotated form due to GVN or other transformations, and the vectorizer relies
// on the rotated form.

View File

@ -9,6 +9,7 @@ add_llvm_library(LLVMScalarOpts
DeadStoreElimination.cpp
EarlyCSE.cpp
FlattenCFGPass.cpp
Float2Int.cpp
GVN.cpp
InductiveRangeCheckElimination.cpp
IndVarSimplify.cpp

View File

@ -0,0 +1,540 @@
//===- Float2Int.cpp - Demote floating point ops to work on integers ------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the Float2Int pass, which aims to demote floating
// point operations to work on integers, where that is losslessly possible.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "float2int"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/EquivalenceClasses.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include <deque>
#include <functional> // For std::function
using namespace llvm;
// The algorithm is simple. Start at instructions that convert from the
// float to the int domain: fptoui, fptosi and fcmp. Walk up the def-use
// graph, using an equivalence datastructure to unify graphs that interfere.
//
// Mappable instructions are those with an integer corrollary that, given
// integer domain inputs, produce an integer output; fadd, for example.
//
// If a non-mappable instruction is seen, this entire def-use graph is marked
// as non-transformable. If we see an instruction that converts from the
// integer domain to FP domain (uitofp,sitofp), we terminate our walk.
/// The largest integer type worth dealing with.
static cl::opt<unsigned>
MaxIntegerBW("float2int-max-integer-bw", cl::init(64), cl::Hidden,
cl::desc("Max integer bitwidth to consider in float2int"
"(default=64)"));
namespace {
struct Float2Int : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
Float2Int() : FunctionPass(ID) {
initializeFloat2IntPass(*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &F) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
}
void findRoots(Function &F, SmallPtrSet<Instruction*,8> &Roots);
ConstantRange seen(Instruction *I, ConstantRange R);
ConstantRange badRange();
ConstantRange unknownRange();
ConstantRange validateRange(ConstantRange R);
void walkBackwards(const SmallPtrSetImpl<Instruction*> &Roots);
void walkForwards();
bool validateAndTransform();
Value *convert(Instruction *I, Type *ToTy);
void cleanup();
MapVector<Instruction*, ConstantRange > SeenInsts;
SmallPtrSet<Instruction*,8> Roots;
EquivalenceClasses<Instruction*> ECs;
MapVector<Instruction*, Value*> ConvertedInsts;
LLVMContext *Ctx;
};
}
char Float2Int::ID = 0;
INITIALIZE_PASS(Float2Int, "float2int", "Float to int", false, false)
// Given a FCmp predicate, return a matching ICmp predicate if one
// exists, otherwise return BAD_ICMP_PREDICATE.
static CmpInst::Predicate mapFCmpPred(CmpInst::Predicate P) {
switch (P) {
case CmpInst::FCMP_OEQ:
case CmpInst::FCMP_UEQ:
return CmpInst::ICMP_EQ;
case CmpInst::FCMP_OGT:
case CmpInst::FCMP_UGT:
return CmpInst::ICMP_SGT;
case CmpInst::FCMP_OGE:
case CmpInst::FCMP_UGE:
return CmpInst::ICMP_SGE;
case CmpInst::FCMP_OLT:
case CmpInst::FCMP_ULT:
return CmpInst::ICMP_SLT;
case CmpInst::FCMP_OLE:
case CmpInst::FCMP_ULE:
return CmpInst::ICMP_SLE;
case CmpInst::FCMP_ONE:
case CmpInst::FCMP_UNE:
return CmpInst::ICMP_NE;
default:
return CmpInst::BAD_ICMP_PREDICATE;
}
}
// Given a floating point binary operator, return the matching
// integer version.
static Instruction::BinaryOps mapBinOpcode(unsigned Opcode) {
switch (Opcode) {
default: llvm_unreachable("Unhandled opcode!");
case Instruction::FAdd: return Instruction::Add;
case Instruction::FSub: return Instruction::Sub;
case Instruction::FMul: return Instruction::Mul;
}
}
// Find the roots - instructions that convert from the FP domain to
// integer domain.
void Float2Int::findRoots(Function &F, SmallPtrSet<Instruction*,8> &Roots) {
for (auto &I : inst_range(F)) {
switch (I.getOpcode()) {
default: break;
case Instruction::FPToUI:
case Instruction::FPToSI:
Roots.insert(&I);
break;
case Instruction::FCmp:
if (mapFCmpPred(cast<CmpInst>(&I)->getPredicate()) !=
CmpInst::BAD_ICMP_PREDICATE)
Roots.insert(&I);
break;
}
}
}
// Helper - mark I as having been traversed, having range R.
ConstantRange Float2Int::seen(Instruction *I, ConstantRange R) {
DEBUG(dbgs() << "F2I: " << *I << ":" << R << "\n");
if (SeenInsts.find(I) != SeenInsts.end())
SeenInsts.find(I)->second = R;
else
SeenInsts.insert(std::make_pair(I, R));
return R;
}
// Helper - get a range representing a poison value.
ConstantRange Float2Int::badRange() {
return ConstantRange(MaxIntegerBW + 1, true);
}
ConstantRange Float2Int::unknownRange() {
return ConstantRange(MaxIntegerBW + 1, false);
}
ConstantRange Float2Int::validateRange(ConstantRange R) {
if (R.getBitWidth() > MaxIntegerBW + 1)
return badRange();
return R;
}
// The most obvious way to structure the search is a depth-first, eager
// search from each root. However, that require direct recursion and so
// can only handle small instruction sequences. Instead, we split the search
// up into two phases:
// - walkBackwards: A breadth-first walk of the use-def graph starting from
// the roots. Populate "SeenInsts" with interesting
// instructions and poison values if they're obvious and
// cheap to compute. Calculate the equivalance set structure
// while we're here too.
// - walkForwards: Iterate over SeenInsts in reverse order, so we visit
// defs before their uses. Calculate the real range info.
// Breadth-first walk of the use-def graph; determine the set of nodes
// we care about and eagerly determine if some of them are poisonous.
void Float2Int::walkBackwards(const SmallPtrSetImpl<Instruction*> &Roots) {
std::deque<Instruction*> Worklist(Roots.begin(), Roots.end());
while (!Worklist.empty()) {
Instruction *I = Worklist.back();
Worklist.pop_back();
if (SeenInsts.find(I) != SeenInsts.end())
// Seen already.
continue;
switch (I->getOpcode()) {
// FIXME: Handle select and phi nodes.
default:
// Path terminated uncleanly.
seen(I, badRange());
break;
case Instruction::UIToFP: {
// Path terminated cleanly.
unsigned BW = I->getOperand(0)->getType()->getPrimitiveSizeInBits();
APInt Min = APInt::getMinValue(BW).zextOrSelf(MaxIntegerBW+1);
APInt Max = APInt::getMaxValue(BW).zextOrSelf(MaxIntegerBW+1);
seen(I, validateRange(ConstantRange(Min, Max)));
continue;
}
case Instruction::SIToFP: {
// Path terminated cleanly.
unsigned BW = I->getOperand(0)->getType()->getPrimitiveSizeInBits();
APInt SMin = APInt::getSignedMinValue(BW).sextOrSelf(MaxIntegerBW+1);
APInt SMax = APInt::getSignedMaxValue(BW).sextOrSelf(MaxIntegerBW+1);
seen(I, validateRange(ConstantRange(SMin, SMax)));
continue;
}
case Instruction::FAdd:
case Instruction::FSub:
case Instruction::FMul:
case Instruction::FPToUI:
case Instruction::FPToSI:
case Instruction::FCmp:
seen(I, unknownRange());
break;
}
for (Value *O : I->operands()) {
if (Instruction *OI = dyn_cast<Instruction>(O)) {
// Unify def-use chains if they interfere.
ECs.unionSets(I, OI);
if (SeenInsts.find(I)->second != badRange())
Worklist.push_back(OI);
} else if (!isa<ConstantFP>(O)) {
// Not an instruction or ConstantFP? we can't do anything.
seen(I, badRange());
}
}
}
}
// Walk forwards down the list of seen instructions, so we visit defs before
// uses.
void Float2Int::walkForwards() {
for (auto It = SeenInsts.rbegin(), E = SeenInsts.rend(); It != E; ++It) {
if (It->second != unknownRange())
continue;
Instruction *I = It->first;
std::function<ConstantRange(ArrayRef<ConstantRange>)> Op;
switch (I->getOpcode()) {
// FIXME: Handle select and phi nodes.
default:
case Instruction::UIToFP:
case Instruction::SIToFP:
llvm_unreachable("Should have been handled in walkForwards!");
case Instruction::FAdd:
Op = [](ArrayRef<ConstantRange> Ops) {
assert(Ops.size() == 2 && "FAdd is a binary operator!");
return Ops[0].add(Ops[1]);
};
break;
case Instruction::FSub:
Op = [](ArrayRef<ConstantRange> Ops) {
assert(Ops.size() == 2 && "FSub is a binary operator!");
return Ops[0].sub(Ops[1]);
};
break;
case Instruction::FMul:
Op = [](ArrayRef<ConstantRange> Ops) {
assert(Ops.size() == 2 && "FMul is a binary operator!");
return Ops[0].multiply(Ops[1]);
};
break;
//
// Root-only instructions - we'll only see these if they're the
// first node in a walk.
//
case Instruction::FPToUI:
case Instruction::FPToSI:
Op = [](ArrayRef<ConstantRange> Ops) {
assert(Ops.size() == 1 && "FPTo[US]I is a unary operator!");
return Ops[0];
};
break;
case Instruction::FCmp:
Op = [](ArrayRef<ConstantRange> Ops) {
assert(Ops.size() == 2 && "FCmp is a binary operator!");
return Ops[0].unionWith(Ops[1]);
};
break;
}
bool Abort = false;
SmallVector<ConstantRange,4> OpRanges;
for (Value *O : I->operands()) {
if (Instruction *OI = dyn_cast<Instruction>(O)) {
assert(SeenInsts.find(OI) != SeenInsts.end() &&
"def not seen before use!");
OpRanges.push_back(SeenInsts.find(OI)->second);
} else if (ConstantFP *CF = dyn_cast<ConstantFP>(O)) {
// Work out if the floating point number can be losslessly represented
// as an integer.
// APFloat::convertToInteger(&Exact) purports to do what we want, but
// the exactness can be too precise. For example, negative zero can
// never be exactly converted to an integer.
//
// Instead, we ask APFloat to round itself to an integral value - this
// preserves sign-of-zero - then compare the result with the original.
//
APFloat F = CF->getValueAPF();
// First, weed out obviously incorrect values. Non-finite numbers
// can't be represented and neither can negative zero, unless
// we're in fast math mode.
if (!F.isFinite() ||
(F.isZero() && F.isNegative() && isa<FPMathOperator>(I) &&
!I->hasNoSignedZeros())) {
seen(I, badRange());
Abort = true;
break;
}
APFloat NewF = F;
auto Res = NewF.roundToIntegral(APFloat::rmNearestTiesToEven);
if (Res != APFloat::opOK || NewF.compare(F) != APFloat::cmpEqual) {
seen(I, badRange());
Abort = true;
break;
}
// OK, it's representable. Now get it.
APSInt Int(MaxIntegerBW+1, false);
bool Exact;
CF->getValueAPF().convertToInteger(Int,
APFloat::rmNearestTiesToEven,
&Exact);
OpRanges.push_back(ConstantRange(Int));
} else {
llvm_unreachable("Should have already marked this as badRange!");
}
}
// Reduce the operands' ranges to a single range and return.
if (!Abort)
seen(I, Op(OpRanges));
}
}
// If there is a valid transform to be done, do it.
bool Float2Int::validateAndTransform() {
bool MadeChange = false;
// Iterate over every disjoint partition of the def-use graph.
for (auto It = ECs.begin(), E = ECs.end(); It != E; ++It) {
ConstantRange R(MaxIntegerBW + 1, false);
bool Fail = false;
Type *ConvertedToTy = nullptr;
// For every member of the partition, union all the ranges together.
for (auto MI = ECs.member_begin(It), ME = ECs.member_end();
MI != ME; ++MI) {
Instruction *I = *MI;
auto SeenI = SeenInsts.find(I);
if (SeenI == SeenInsts.end())
continue;
R = R.unionWith(SeenI->second);
// We need to ensure I has no users that have not been seen.
// If it does, transformation would be illegal.
//
// Don't count the roots, as they terminate the graphs.
if (Roots.count(I) == 0) {
// Set the type of the conversion while we're here.
if (!ConvertedToTy)
ConvertedToTy = I->getType();
for (User *U : I->users()) {
Instruction *UI = dyn_cast<Instruction>(U);
if (!UI || SeenInsts.find(UI) == SeenInsts.end()) {
DEBUG(dbgs() << "F2I: Failing because of " << *U << "\n");
Fail = true;
break;
}
}
}
if (Fail)
break;
}
// If the set was empty, or we failed, or the range is poisonous,
// bail out.
if (ECs.member_begin(It) == ECs.member_end() || Fail ||
R.isFullSet() || R.isSignWrappedSet())
continue;
assert(ConvertedToTy && "Must have set the convertedtoty by this point!");
// The number of bits required is the maximum of the upper and
// lower limits, plus one so it can be signed.
unsigned MinBW = std::max(R.getLower().getMinSignedBits(),
R.getUpper().getMinSignedBits()) + 1;
DEBUG(dbgs() << "F2I: MinBitwidth=" << MinBW << ", R: " << R << "\n");
// If we've run off the realms of the exactly representable integers,
// the floating point result will differ from an integer approximation.
// Do we need more bits than are in the mantissa of the type we converted
// to? semanticsPrecision returns the number of mantissa bits plus one
// for the sign bit.
unsigned MaxRepresentableBits
= APFloat::semanticsPrecision(ConvertedToTy->getFltSemantics()) - 1;
if (MinBW > MaxRepresentableBits) {
DEBUG(dbgs() << "F2I: Value not guaranteed to be representable!\n");
continue;
}
if (MinBW > 64) {
DEBUG(dbgs() << "F2I: Value requires more than 64 bits to represent!\n");
continue;
}
// OK, R is known to be representable. Now pick a type for it.
// FIXME: Pick the smallest legal type that will fit.
Type *Ty = (MinBW > 32) ? Type::getInt64Ty(*Ctx) : Type::getInt32Ty(*Ctx);
for (auto MI = ECs.member_begin(It), ME = ECs.member_end();
MI != ME; ++MI)
convert(*MI, Ty);
MadeChange = true;
}
return MadeChange;
}
Value *Float2Int::convert(Instruction *I, Type *ToTy) {
if (ConvertedInsts.find(I) != ConvertedInsts.end())
// Already converted this instruction.
return ConvertedInsts[I];
SmallVector<Value*,4> NewOperands;
for (Value *V : I->operands()) {
// Don't recurse if we're an instruction that terminates the path.
if (I->getOpcode() == Instruction::UIToFP ||
I->getOpcode() == Instruction::SIToFP) {
NewOperands.push_back(V);
} else if (Instruction *VI = dyn_cast<Instruction>(V)) {
NewOperands.push_back(convert(VI, ToTy));
} else if (ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
APSInt Val(ToTy->getPrimitiveSizeInBits(), /*IsUnsigned=*/false);
bool Exact;
CF->getValueAPF().convertToInteger(Val,
APFloat::rmNearestTiesToEven,
&Exact);
NewOperands.push_back(ConstantInt::get(ToTy, Val));
} else {
llvm_unreachable("Unhandled operand type?");
}
}
// Now create a new instruction.
IRBuilder<> IRB(I);
Value *NewV = nullptr;
switch (I->getOpcode()) {
default: llvm_unreachable("Unhandled instruction!");
case Instruction::FPToUI:
NewV = IRB.CreateZExtOrTrunc(NewOperands[0], I->getType());
break;
case Instruction::FPToSI:
NewV = IRB.CreateSExtOrTrunc(NewOperands[0], I->getType());
break;
case Instruction::FCmp: {
CmpInst::Predicate P = mapFCmpPred(cast<CmpInst>(I)->getPredicate());
assert(P != CmpInst::BAD_ICMP_PREDICATE && "Unhandled predicate!");
NewV = IRB.CreateICmp(P, NewOperands[0], NewOperands[1], I->getName());
break;
}
case Instruction::UIToFP:
NewV = IRB.CreateZExtOrTrunc(NewOperands[0], ToTy);
break;
case Instruction::SIToFP:
NewV = IRB.CreateSExtOrTrunc(NewOperands[0], ToTy);
break;
case Instruction::FAdd:
case Instruction::FSub:
case Instruction::FMul:
NewV = IRB.CreateBinOp(mapBinOpcode(I->getOpcode()),
NewOperands[0], NewOperands[1],
I->getName());
break;
}
// If we're a root instruction, RAUW.
if (Roots.count(I))
I->replaceAllUsesWith(NewV);
ConvertedInsts[I] = NewV;
return NewV;
}
// Perform dead code elimination on the instructions we just modified.
void Float2Int::cleanup() {
for (auto I = ConvertedInsts.rbegin(), E = ConvertedInsts.rend();
I != E; ++I)
I->first->eraseFromParent();
}
bool Float2Int::runOnFunction(Function &F) {
if (skipOptnoneFunction(F))
return false;
DEBUG(dbgs() << "F2I: Looking at function " << F.getName() << "\n");
// Clear out all state.
ECs = EquivalenceClasses<Instruction*>();
SeenInsts.clear();
ConvertedInsts.clear();
Roots.clear();
Ctx = &F.getParent()->getContext();
findRoots(F, Roots);
walkBackwards(Roots);
walkForwards();
bool Modified = validateAndTransform();
if (Modified)
cleanup();
return Modified;
}
FunctionPass *llvm::createFloat2IntPass() {
return new Float2Int();
}

View File

@ -77,6 +77,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeLoadCombinePass(Registry);
initializePlaceBackedgeSafepointsImplPass(Registry);
initializePlaceSafepointsPass(Registry);
initializeFloat2IntPass(Registry);
}
void LLVMInitializeScalarOpts(LLVMPassRegistryRef R) {

View File

@ -0,0 +1,256 @@
; RUN: opt < %s -float2int -S | FileCheck %s
;
; Positive tests
;
; CHECK-LABEL: @simple1
; CHECK: %1 = zext i8 %a to i32
; CHECK: %2 = add i32 %1, 1
; CHECK: %3 = trunc i32 %2 to i16
; CHECK: ret i16 %3
define i16 @simple1(i8 %a) {
%1 = uitofp i8 %a to float
%2 = fadd float %1, 1.0
%3 = fptoui float %2 to i16
ret i16 %3
}
; CHECK-LABEL: @simple2
; CHECK: %1 = zext i8 %a to i32
; CHECK: %2 = sub i32 %1, 1
; CHECK: %3 = trunc i32 %2 to i8
; CHECK: ret i8 %3
define i8 @simple2(i8 %a) {
%1 = uitofp i8 %a to float
%2 = fsub float %1, 1.0
%3 = fptoui float %2 to i8
ret i8 %3
}
; CHECK-LABEL: @simple3
; CHECK: %1 = zext i8 %a to i32
; CHECK: %2 = sub i32 %1, 1
; CHECK: ret i32 %2
define i32 @simple3(i8 %a) {
%1 = uitofp i8 %a to float
%2 = fsub float %1, 1.0
%3 = fptoui float %2 to i32
ret i32 %3
}
; CHECK-LABEL: @cmp
; CHECK: %1 = zext i8 %a to i32
; CHECK: %2 = zext i8 %b to i32
; CHECK: %3 = icmp slt i32 %1, %2
; CHECK: ret i1 %3
define i1 @cmp(i8 %a, i8 %b) {
%1 = uitofp i8 %a to float
%2 = uitofp i8 %b to float
%3 = fcmp ult float %1, %2
ret i1 %3
}
; CHECK-LABEL: @simple4
; CHECK: %1 = zext i32 %a to i64
; CHECK: %2 = add i64 %1, 1
; CHECK: %3 = trunc i64 %2 to i32
; CHECK: ret i32 %3
define i32 @simple4(i32 %a) {
%1 = uitofp i32 %a to double
%2 = fadd double %1, 1.0
%3 = fptoui double %2 to i32
ret i32 %3
}
; CHECK-LABEL: @simple5
; CHECK: %1 = zext i8 %a to i32
; CHECK: %2 = zext i8 %b to i32
; CHECK: %3 = add i32 %1, 1
; CHECK: %4 = mul i32 %3, %2
; CHECK: ret i32 %4
define i32 @simple5(i8 %a, i8 %b) {
%1 = uitofp i8 %a to float
%2 = uitofp i8 %b to float
%3 = fadd float %1, 1.0
%4 = fmul float %3, %2
%5 = fptoui float %4 to i32
ret i32 %5
}
; The two chains don't interact - failure of one shouldn't
; cause failure of the other.
; CHECK-LABEL: @multi1
; CHECK: %1 = zext i8 %a to i32
; CHECK: %2 = zext i8 %b to i32
; CHECK: %fc = uitofp i8 %c to float
; CHECK: %x1 = add i32 %1, %2
; CHECK: %z = fadd float %fc, %d
; CHECK: %w = fptoui float %z to i32
; CHECK: %r = add i32 %x1, %w
; CHECK: ret i32 %r
define i32 @multi1(i8 %a, i8 %b, i8 %c, float %d) {
%fa = uitofp i8 %a to float
%fb = uitofp i8 %b to float
%fc = uitofp i8 %c to float
%x = fadd float %fa, %fb
%y = fptoui float %x to i32
%z = fadd float %fc, %d
%w = fptoui float %z to i32
%r = add i32 %y, %w
ret i32 %r
}
; CHECK-LABEL: @simple_negzero
; CHECK: %1 = zext i8 %a to i32
; CHECK: %2 = add i32 %1, 0
; CHECK: %3 = trunc i32 %2 to i16
; CHECK: ret i16 %3
define i16 @simple_negzero(i8 %a) {
%1 = uitofp i8 %a to float
%2 = fadd fast float %1, -0.0
%3 = fptoui float %2 to i16
ret i16 %3
}
; CHECK-LABEL: @simple_negative
; CHECK: %1 = sext i8 %call to i32
; CHECK: %mul1 = mul i32 %1, -3
; CHECK: %2 = trunc i32 %mul1 to i8
; CHECK: %conv3 = sext i8 %2 to i32
; CHECK: ret i32 %conv3
define i32 @simple_negative(i8 %call) {
%conv1 = sitofp i8 %call to float
%mul = fmul float %conv1, -3.000000e+00
%conv2 = fptosi float %mul to i8
%conv3 = sext i8 %conv2 to i32
ret i32 %conv3
}
;
; Negative tests
;
; CHECK-LABEL: @neg_multi1
; CHECK: %fa = uitofp i8 %a to float
; CHECK: %fc = uitofp i8 %c to float
; CHECK: %x = fadd float %fa, %fc
; CHECK: %y = fptoui float %x to i32
; CHECK: %z = fadd float %fc, %d
; CHECK: %w = fptoui float %z to i32
; CHECK: %r = add i32 %y, %w
; CHECK: ret i32 %r
; The two chains intersect, which means because one fails, no
; transform can occur.
define i32 @neg_multi1(i8 %a, i8 %b, i8 %c, float %d) {
%fa = uitofp i8 %a to float
%fc = uitofp i8 %c to float
%x = fadd float %fa, %fc
%y = fptoui float %x to i32
%z = fadd float %fc, %d
%w = fptoui float %z to i32
%r = add i32 %y, %w
ret i32 %r
}
; CHECK-LABEL: @neg_muld
; CHECK: %fa = uitofp i32 %a to double
; CHECK: %fb = uitofp i32 %b to double
; CHECK: %mul = fmul double %fa, %fb
; CHECK: %r = fptoui double %mul to i64
; CHECK: ret i64 %r
; The i32 * i32 = i64, which has 64 bits, which is greater than the 52 bits
; that can be exactly represented in a double.
define i64 @neg_muld(i32 %a, i32 %b) {
%fa = uitofp i32 %a to double
%fb = uitofp i32 %b to double
%mul = fmul double %fa, %fb
%r = fptoui double %mul to i64
ret i64 %r
}
; CHECK-LABEL: @neg_mulf
; CHECK: %fa = uitofp i16 %a to float
; CHECK: %fb = uitofp i16 %b to float
; CHECK: %mul = fmul float %fa, %fb
; CHECK: %r = fptoui float %mul to i32
; CHECK: ret i32 %r
; The i16 * i16 = i32, which can't be represented in a float, but can in a
; double. This should fail, as the written code uses floats, not doubles so
; the original result may be inaccurate.
define i32 @neg_mulf(i16 %a, i16 %b) {
%fa = uitofp i16 %a to float
%fb = uitofp i16 %b to float
%mul = fmul float %fa, %fb
%r = fptoui float %mul to i32
ret i32 %r
}
; CHECK-LABEL: @neg_cmp
; CHECK: %1 = uitofp i8 %a to float
; CHECK: %2 = uitofp i8 %b to float
; CHECK: %3 = fcmp false float %1, %2
; CHECK: ret i1 %3
; "false" doesn't have an icmp equivalent.
define i1 @neg_cmp(i8 %a, i8 %b) {
%1 = uitofp i8 %a to float
%2 = uitofp i8 %b to float
%3 = fcmp false float %1, %2
ret i1 %3
}
; CHECK-LABEL: @neg_div
; CHECK: %1 = uitofp i8 %a to float
; CHECK: %2 = fdiv float %1, 1.0
; CHECK: %3 = fptoui float %2 to i16
; CHECK: ret i16 %3
; Division isn't a supported operator.
define i16 @neg_div(i8 %a) {
%1 = uitofp i8 %a to float
%2 = fdiv float %1, 1.0
%3 = fptoui float %2 to i16
ret i16 %3
}
; CHECK-LABEL: @neg_remainder
; CHECK: %1 = uitofp i8 %a to float
; CHECK: %2 = fadd float %1, 1.2
; CHECK: %3 = fptoui float %2 to i16
; CHECK: ret i16 %3
; 1.2 is not an integer.
define i16 @neg_remainder(i8 %a) {
%1 = uitofp i8 %a to float
%2 = fadd float %1, 1.25
%3 = fptoui float %2 to i16
ret i16 %3
}
; CHECK-LABEL: @neg_toolarge
; CHECK: %1 = uitofp i80 %a to fp128
; CHECK: %2 = fadd fp128 %1, %1
; CHECK: %3 = fptoui fp128 %2 to i80
; CHECK: ret i80 %3
; i80 > i64, which is the largest bitwidth handleable by default.
define i80 @neg_toolarge(i80 %a) {
%1 = uitofp i80 %a to fp128
%2 = fadd fp128 %1, %1
%3 = fptoui fp128 %2 to i80
ret i80 %3
}
; CHECK-LABEL: @neg_calluser
; CHECK: sitofp
; CHECK: fcmp
; The sequence %1..%3 cannot be converted because %4 uses %2.
define i32 @neg_calluser(i32 %value) {
%1 = sitofp i32 %value to double
%2 = fadd double %1, 1.0
%3 = fcmp olt double %2, 0.000000e+00
%4 = tail call double @g(double %2)
%5 = fptosi double %4 to i32
%6 = zext i1 %3 to i32
%7 = add i32 %6, %5
ret i32 %7
}
declare double @g(double)

View File

@ -0,0 +1,17 @@
; RUN: opt < %s -float2int -S | FileCheck %s
;
; Verify that pass float2int is not run on optnone functions.
define i16 @simple1(i8 %a) #0 {
; CHECK-LABEL: @simple1
; CHECK: %1 = uitofp i8 %a to float
; CHECK-NEXT: %2 = fadd float %1, 1.0
; CHECK-NEXT: %3 = fptoui float %2 to i16
; CHECK-NEXT: ret i16 %3
%1 = uitofp i8 %a to float
%2 = fadd float %1, 1.0
%3 = fptoui float %2 to i16
ret i16 %3
}
attributes #0 = { noinline optnone }

View File

@ -0,0 +1,16 @@
; RUN: opt < %s -float2int -float2int-max-integer-bw=256 -S | FileCheck %s
; CHECK-LABEL: @neg_toolarge
; CHECK: %1 = uitofp i80 %a to fp128
; CHECK: %2 = fadd fp128 %1, %1
; CHECK: %3 = fptoui fp128 %2 to i80
; CHECK: ret i80 %3
; fp128 has a 112-bit mantissa, which can hold an i80. But we only support
; up to i64, so it should fail (even though the max integer bitwidth is 256).
define i80 @neg_toolarge(i80 %a) {
%1 = uitofp i80 %a to fp128
%2 = fadd fp128 %1, %1
%3 = fptoui fp128 %2 to i80
ret i80 %3
}