diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp index 5c572a640ed..e6f7283ef4e 100644 --- a/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -18,6 +18,7 @@ #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" +#include "llvm/InlineAsm.h" #include "llvm/Instructions.h" #include "llvm/Pass.h" #include "llvm/Target/TargetAsmInfo.h" @@ -28,6 +29,7 @@ #include "llvm/Transforms/Utils/Local.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/Support/CallSite.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" @@ -58,6 +60,8 @@ namespace { bool OptimizeLoadStoreInst(Instruction *I, Value *Addr, const Type *AccessTy, DenseMap &SunkAddrs); + bool OptimizeInlineAsmInst(Instruction *I, CallSite CS, + DenseMap &SunkAddrs); bool OptimizeExtUses(Instruction *I); }; } @@ -928,6 +932,54 @@ bool CodeGenPrepare::OptimizeLoadStoreInst(Instruction *LdStInst, Value *Addr, return true; } +/// OptimizeInlineAsmInst - If there are any memory operands, use +/// OptimizeLoadStoreInt to sink their address computing into the block when +/// possible / profitable. +bool CodeGenPrepare::OptimizeInlineAsmInst(Instruction *I, CallSite CS, + DenseMap &SunkAddrs) { + bool MadeChange = false; + InlineAsm *IA = cast(CS.getCalledValue()); + + // Do a prepass over the constraints, canonicalizing them, and building up the + // ConstraintOperands list. + std::vector + ConstraintInfos = IA->ParseConstraints(); + + /// ConstraintOperands - Information about all of the constraints. + std::vector ConstraintOperands; + unsigned ArgNo = 0; // ArgNo - The argument of the CallInst. + for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) { + ConstraintOperands. + push_back(TargetLowering::AsmOperandInfo(ConstraintInfos[i])); + TargetLowering::AsmOperandInfo &OpInfo = ConstraintOperands.back(); + + // Compute the value type for each operand. + switch (OpInfo.Type) { + case InlineAsm::isOutput: + if (OpInfo.isIndirect) + OpInfo.CallOperandVal = CS.getArgument(ArgNo++); + break; + case InlineAsm::isInput: + OpInfo.CallOperandVal = CS.getArgument(ArgNo++); + break; + case InlineAsm::isClobber: + // Nothing to do. + break; + } + + // Compute the constraint code and ConstraintType to use. + OpInfo.ComputeConstraintToUse(*TLI); + + if (OpInfo.ConstraintType == TargetLowering::C_Memory) { + Value *OpVal = OpInfo.CallOperandVal; + MadeChange |= OptimizeLoadStoreInst(I, OpVal, OpVal->getType(), + SunkAddrs); + } + } + + return MadeChange; +} + bool CodeGenPrepare::OptimizeExtUses(Instruction *I) { BasicBlock *DefBB = I->getParent(); @@ -1076,6 +1128,9 @@ bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) { TLI->getTargetMachine().getTargetAsmInfo()) { if (TAI->ExpandInlineAsm(CI)) BBI = BB.begin(); + else + // Sink address computing for memory operands into the block. + MadeChange |= OptimizeInlineAsmInst(I, &(*CI), SunkAddrs); } } } diff --git a/test/CodeGen/X86/2008-02-25-InlineAsmBug.ll b/test/CodeGen/X86/2008-02-25-InlineAsmBug.ll new file mode 100644 index 00000000000..ff7cf5e94e2 --- /dev/null +++ b/test/CodeGen/X86/2008-02-25-InlineAsmBug.ll @@ -0,0 +1,33 @@ +; RUN: llvm-as < %s | llc -mtriple=i686-pc-linux-gnu -mattr=+sse2 +; PR2076 + +define void @h264_h_loop_filter_luma_mmx2(i8* %pix, i32 %stride, i32 %alpha, i32 %beta, i8* %tc0) nounwind { +entry: + %tmp164 = getelementptr [16 x i32]* null, i32 0, i32 11 ; [#uses=1] + %tmp169 = getelementptr [16 x i32]* null, i32 0, i32 13 ; [#uses=1] + %tmp174 = getelementptr [16 x i32]* null, i32 0, i32 15 ; [#uses=1] + %tmp154.sum317 = add i32 0, %stride ; [#uses=1] + %tmp154.sum315 = mul i32 %stride, 6 ; [#uses=1] + %tmp154.sum = mul i32 %stride, 7 ; [#uses=1] + %pix_addr.0327.rec = mul i32 0, 0 ; [#uses=4] + br i1 false, label %bb292, label %bb32 + +bb32: ; preds = %entry + %pix_addr.0327.sum340 = add i32 %pix_addr.0327.rec, 0 ; [#uses=1] + %tmp154 = getelementptr i8* %pix, i32 %pix_addr.0327.sum340 ; [#uses=1] + %tmp177178 = bitcast i8* %tmp154 to i32* ; [#uses=1] + %pix_addr.0327.sum339 = add i32 %pix_addr.0327.rec, %tmp154.sum317 ; [#uses=1] + %tmp181 = getelementptr i8* %pix, i32 %pix_addr.0327.sum339 ; [#uses=1] + %tmp181182 = bitcast i8* %tmp181 to i32* ; [#uses=1] + %pix_addr.0327.sum338 = add i32 %pix_addr.0327.rec, %tmp154.sum315 ; [#uses=1] + %tmp186 = getelementptr i8* %pix, i32 %pix_addr.0327.sum338 ; [#uses=1] + %tmp186187 = bitcast i8* %tmp186 to i32* ; [#uses=1] + %pix_addr.0327.sum337 = add i32 %pix_addr.0327.rec, %tmp154.sum ; [#uses=1] + %tmp191 = getelementptr i8* %pix, i32 %pix_addr.0327.sum337 ; [#uses=1] + %tmp191192 = bitcast i8* %tmp191 to i32* ; [#uses=1] + call void asm sideeffect "movd $4, %mm0 \0A\09movd $5, %mm1 \0A\09movd $6, %mm2 \0A\09movd $7, %mm3 \0A\09punpcklbw %mm1, %mm0 \0A\09punpcklbw %mm3, %mm2 \0A\09movq %mm0, %mm1 \0A\09punpcklwd %mm2, %mm0 \0A\09punpckhwd %mm2, %mm1 \0A\09movd %mm0, $0 \0A\09punpckhdq %mm0, %mm0 \0A\09movd %mm0, $1 \0A\09movd %mm1, $2 \0A\09punpckhdq %mm1, %mm1 \0A\09movd %mm1, $3 \0A\09", "=*m,=*m,=*m,=*m,*m,*m,*m,*m,~{dirflag},~{fpsr},~{flags}"( i32* null, i32* %tmp164, i32* %tmp169, i32* %tmp174, i32* %tmp177178, i32* %tmp181182, i32* %tmp186187, i32* %tmp191192 ) nounwind + unreachable + +bb292: ; preds = %entry + ret void +}