diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp index 65249153bd0..495cdc63214 100644 --- a/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -18,6 +18,7 @@ #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" +#include "llvm/GlobalVariable.h" #include "llvm/IRBuilder.h" #include "llvm/InlineAsm.h" #include "llvm/Instructions.h" @@ -126,6 +127,7 @@ namespace { bool OptimizeSelectInst(SelectInst *SI); bool DupRetToEnableTailCallOpts(ReturnInst *RI); bool PlaceDbgValues(Function &F); + bool ConvertLoadToSwitch(LoadInst *LI); }; } @@ -169,7 +171,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) { bool MadeChange = true; while (MadeChange) { MadeChange = false; - for (Function::iterator I = F.begin(), E = F.end(); I != E; ) { + for (Function::iterator I = F.begin(); I != F.end(); ) { BasicBlock *BB = I++; MadeChange |= OptimizeBlock(*BB); } @@ -1283,9 +1285,11 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) { return OptimizeCmpExpression(CI); if (LoadInst *LI = dyn_cast(I)) { + bool Changed = false; if (TLI) - return OptimizeMemoryInst(I, I->getOperand(0), LI->getType()); - return false; + Changed |= OptimizeMemoryInst(I, I->getOperand(0), LI->getType()); + Changed |= ConvertLoadToSwitch(LI); + return Changed; } if (StoreInst *SI = dyn_cast(I)) { @@ -1329,7 +1333,7 @@ bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) { bool MadeChange = false; CurInstIterator = BB.begin(); - for (BasicBlock::iterator E = BB.end(); CurInstIterator != E; ) + while (CurInstIterator != BB.end()) MadeChange |= OptimizeInst(CurInstIterator++); return MadeChange; @@ -1365,3 +1369,109 @@ bool CodeGenPrepare::PlaceDbgValues(Function &F) { } return MadeChange; } + +static bool TargetSupportsJumpTables(const TargetLowering &TLI) { + return TLI.supportJumpTables() && + (TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || + TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other)); +} + +/// ConvertLoadToSwitch - Convert loads from constant lookup tables into +/// switches. This undos the switch-to-lookup table transformation in +/// SimplifyCFG for targets where that is inprofitable. +bool CodeGenPrepare::ConvertLoadToSwitch(LoadInst *LI) { + // This only applies to targets that don't support jump tables. + if (!TLI || TargetSupportsJumpTables(*TLI)) + return false; + + // FIXME: In the future, it would be desirable to have enough target + // information in SimplifyCFG, so we could decide at that stage whether to + // transform the switch to a lookup table or not, and this + // reverse-transformation could be removed. + + GetElementPtrInst *GEP = dyn_cast(LI->getPointerOperand()); + if (!GEP || !GEP->isInBounds() || GEP->getPointerAddressSpace()) + return false; + if (GEP->getNumIndices() != 2) + return false; + Value *FirstIndex = GEP->idx_begin()[0]; + ConstantInt *FirstIndexInt = dyn_cast(FirstIndex); + if (!FirstIndexInt || !FirstIndexInt->isZero()) + return false; + + Value *TableIndex = GEP->idx_begin()[1]; + IntegerType *TableIndexTy = cast(TableIndex->getType()); + + GlobalVariable *GV = dyn_cast(GEP->getPointerOperand()); + if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer()) + return false; + + Constant *Arr = GV->getInitializer(); + uint64_t NumElements; + if (ConstantArray *CA = dyn_cast(Arr)) + NumElements = CA->getType()->getNumElements(); + else if (ConstantDataArray *CDA = dyn_cast(Arr)) + NumElements = CDA->getNumElements(); + else + return false; + if (NumElements < 2) + return false; + + // Split the block. + BasicBlock *OriginalBB = LI->getParent(); + BasicBlock *PostSwitchBB = OriginalBB->splitBasicBlock(LI); + + // Replace OriginalBB's terminator with a switch. + IRBuilder<> Builder(OriginalBB->getTerminator()); + SwitchInst *Switch = Builder.CreateSwitch(TableIndex, PostSwitchBB, + NumElements - 1); + OriginalBB->getTerminator()->eraseFromParent(); + + // Count the frequency of each value to decide which to use as default. + SmallDenseMap ValueFreq; + for (uint64_t I = 0; I < NumElements; ++I) + ++ValueFreq[Arr->getAggregateElement(I)]; + uint64_t MaxCount = 0; + Constant *DefaultValue = NULL; + for (SmallDenseMap::iterator I = ValueFreq.begin(), + E = ValueFreq.end(); I != E; ++I) { + if (I->second > MaxCount) { + MaxCount = I->second; + DefaultValue = I->first; + } + } + assert(DefaultValue && "No values in the array?"); + + // Create the phi node in PostSwitchBB, which will replace the load. + Builder.SetInsertPoint(PostSwitchBB->begin()); + PHINode *PHI = Builder.CreatePHI(LI->getType(), NumElements); + PHI->addIncoming(DefaultValue, OriginalBB); + + // Build basic blocks to target with the switch. + for (uint64_t I = 0; I < NumElements; ++I) { + Constant *C = Arr->getAggregateElement(I); + if (C == DefaultValue) continue; // Already covered by the default case. + + BasicBlock *BB = BasicBlock::Create(PostSwitchBB->getContext(), + "lookup.bb", + PostSwitchBB->getParent(), + PostSwitchBB); + Switch->addCase(ConstantInt::get(TableIndexTy, I), BB); + Builder.SetInsertPoint(BB); + Builder.CreateBr(PostSwitchBB); + PHI->addIncoming(C, BB); + } + + // Remove the load. + LI->replaceAllUsesWith(PHI); + LI->eraseFromParent(); + + // Clean up. + if (GEP->use_empty()) + GEP->eraseFromParent(); + if (GV->hasUnnamedAddr() && GV->hasPrivateLinkage() && GV->use_empty()) + GV->eraseFromParent(); + + CurInstIterator = Switch; + return true; +} diff --git a/test/Transforms/CodeGenPrepare/load_to_switch.ll b/test/Transforms/CodeGenPrepare/load_to_switch.ll new file mode 100644 index 00000000000..76a57c6b50e --- /dev/null +++ b/test/Transforms/CodeGenPrepare/load_to_switch.ll @@ -0,0 +1,84 @@ +; RUN: llc -march=sparc < %s | FileCheck %s + +; Check that all the switches turned into lookup tables by SimplifyCFG are +; turned back into switches for targets that don't like lookup tables. + +@.str = private unnamed_addr constant [4 x i8] c"foo\00", align 1 +@.str1 = private unnamed_addr constant [4 x i8] c"bar\00", align 1 +@.str2 = private unnamed_addr constant [4 x i8] c"baz\00", align 1 +@.str3 = private unnamed_addr constant [4 x i8] c"qux\00", align 1 +@.str4 = private unnamed_addr constant [6 x i8] c"error\00", align 1 +@switch.table = private unnamed_addr constant [7 x i32] [i32 55, i32 123, i32 0, i32 -1, i32 27, i32 62, i32 1] +@switch.table1 = private unnamed_addr constant [4 x i8] c"*\09X\05" +@switch.table2 = private unnamed_addr constant [4 x float] [float 0x40091EB860000000, float 0x3FF3BE76C0000000, float 0x4012449BA0000000, float 0x4001AE1480000000] +@switch.table3 = private unnamed_addr constant [4 x i8*] [i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str1, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str2, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str3, i64 0, i64 0)] + +define i32 @f(i32 %c) { +entry: + %switch.tableidx = sub i32 %c, 42 + %0 = icmp ult i32 %switch.tableidx, 7 + br i1 %0, label %switch.lookup, label %return + +switch.lookup: + %switch.gep = getelementptr inbounds [7 x i32]* @switch.table, i32 0, i32 %switch.tableidx + %switch.load = load i32* %switch.gep + ret i32 %switch.load + +return: + ret i32 15 + +; CHECK: f: +; CHECK: %switch.lookup +; CHECK-NOT: sethi %hi(.Lswitch.table) +} + +declare void @dummy(i8 signext, float) + +define void @h(i32 %x) { +entry: + %switch.tableidx = sub i32 %x, 0 + %0 = icmp ult i32 %switch.tableidx, 4 + br i1 %0, label %switch.lookup, label %sw.epilog + +switch.lookup: + %switch.gep = getelementptr inbounds [4 x i8]* @switch.table1, i32 0, i32 %switch.tableidx + %switch.load = load i8* %switch.gep + %switch.gep1 = getelementptr inbounds [4 x float]* @switch.table2, i32 0, i32 %switch.tableidx + %switch.load2 = load float* %switch.gep1 + br label %sw.epilog + +sw.epilog: + %a.0 = phi i8 [ %switch.load, %switch.lookup ], [ 7, %entry ] + %b.0 = phi float [ %switch.load2, %switch.lookup ], [ 0x4023FAE140000000, %entry ] + call void @dummy(i8 signext %a.0, float %b.0) + ret void + +; CHECK: h: +; CHECK: %switch.lookup +; CHECK-NOT: sethi %hi(.Lswitch.table{{[0-9]}}) +; CHECK-NOT: sethi %hi(.Lswitch.table{{[0-9]}}) +} + +define i8* @foostring(i32 %x) { +entry: + %switch.tableidx = sub i32 %x, 0 + %0 = icmp ult i32 %switch.tableidx, 4 + br i1 %0, label %switch.lookup, label %return + +switch.lookup: + %switch.gep = getelementptr inbounds [4 x i8*]* @switch.table3, i32 0, i32 %switch.tableidx + %switch.load = load i8** %switch.gep + ret i8* %switch.load + +return: + ret i8* getelementptr inbounds ([6 x i8]* @.str4, i64 0, i64 0) + +; CHECK: foostring: +; CHECK: %switch.lookup +; CHECK-NOT: sethi %hi(.Lswitch.table3) +} + +; CHECK-NOT: .Lswitch.table +; CHECK-NOT: .Lswitch.table1 +; CHECK-NOT: .Lswitch.table2 +; CHECK-NOT: .Lswitch.table3