Fix PR2076. CodeGenPrepare now sinks address computation for inline asm memory

operands into inline asm block.

llvm-svn: 47589
This commit is contained in:
Evan Cheng 2008-02-26 02:42:37 +00:00
parent 40c26c71c0
commit 6366bbf577
2 changed files with 88 additions and 0 deletions

View File

@ -18,6 +18,7 @@
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
#include "llvm/InlineAsm.h"
#include "llvm/Instructions.h"
#include "llvm/Pass.h"
#include "llvm/Target/TargetAsmInfo.h"
@ -28,6 +29,7 @@
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/Support/CallSite.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
@ -58,6 +60,8 @@ namespace {
bool OptimizeLoadStoreInst(Instruction *I, Value *Addr,
const Type *AccessTy,
DenseMap<Value*,Value*> &SunkAddrs);
bool OptimizeInlineAsmInst(Instruction *I, CallSite CS,
DenseMap<Value*,Value*> &SunkAddrs);
bool OptimizeExtUses(Instruction *I);
};
}
@ -928,6 +932,54 @@ bool CodeGenPrepare::OptimizeLoadStoreInst(Instruction *LdStInst, Value *Addr,
return true;
}
/// OptimizeInlineAsmInst - If there are any memory operands, use
/// OptimizeLoadStoreInt to sink their address computing into the block when
/// possible / profitable.
bool CodeGenPrepare::OptimizeInlineAsmInst(Instruction *I, CallSite CS,
DenseMap<Value*,Value*> &SunkAddrs) {
bool MadeChange = false;
InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
// Do a prepass over the constraints, canonicalizing them, and building up the
// ConstraintOperands list.
std::vector<InlineAsm::ConstraintInfo>
ConstraintInfos = IA->ParseConstraints();
/// ConstraintOperands - Information about all of the constraints.
std::vector<TargetLowering::AsmOperandInfo> ConstraintOperands;
unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) {
ConstraintOperands.
push_back(TargetLowering::AsmOperandInfo(ConstraintInfos[i]));
TargetLowering::AsmOperandInfo &OpInfo = ConstraintOperands.back();
// Compute the value type for each operand.
switch (OpInfo.Type) {
case InlineAsm::isOutput:
if (OpInfo.isIndirect)
OpInfo.CallOperandVal = CS.getArgument(ArgNo++);
break;
case InlineAsm::isInput:
OpInfo.CallOperandVal = CS.getArgument(ArgNo++);
break;
case InlineAsm::isClobber:
// Nothing to do.
break;
}
// Compute the constraint code and ConstraintType to use.
OpInfo.ComputeConstraintToUse(*TLI);
if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
Value *OpVal = OpInfo.CallOperandVal;
MadeChange |= OptimizeLoadStoreInst(I, OpVal, OpVal->getType(),
SunkAddrs);
}
}
return MadeChange;
}
bool CodeGenPrepare::OptimizeExtUses(Instruction *I) {
BasicBlock *DefBB = I->getParent();
@ -1076,6 +1128,9 @@ bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) {
TLI->getTargetMachine().getTargetAsmInfo()) {
if (TAI->ExpandInlineAsm(CI))
BBI = BB.begin();
else
// Sink address computing for memory operands into the block.
MadeChange |= OptimizeInlineAsmInst(I, &(*CI), SunkAddrs);
}
}
}

View File

@ -0,0 +1,33 @@
; RUN: llvm-as < %s | llc -mtriple=i686-pc-linux-gnu -mattr=+sse2
; PR2076
define void @h264_h_loop_filter_luma_mmx2(i8* %pix, i32 %stride, i32 %alpha, i32 %beta, i8* %tc0) nounwind {
entry:
%tmp164 = getelementptr [16 x i32]* null, i32 0, i32 11 ; <i32*> [#uses=1]
%tmp169 = getelementptr [16 x i32]* null, i32 0, i32 13 ; <i32*> [#uses=1]
%tmp174 = getelementptr [16 x i32]* null, i32 0, i32 15 ; <i32*> [#uses=1]
%tmp154.sum317 = add i32 0, %stride ; <i32> [#uses=1]
%tmp154.sum315 = mul i32 %stride, 6 ; <i32> [#uses=1]
%tmp154.sum = mul i32 %stride, 7 ; <i32> [#uses=1]
%pix_addr.0327.rec = mul i32 0, 0 ; <i32> [#uses=4]
br i1 false, label %bb292, label %bb32
bb32: ; preds = %entry
%pix_addr.0327.sum340 = add i32 %pix_addr.0327.rec, 0 ; <i32> [#uses=1]
%tmp154 = getelementptr i8* %pix, i32 %pix_addr.0327.sum340 ; <i8*> [#uses=1]
%tmp177178 = bitcast i8* %tmp154 to i32* ; <i32*> [#uses=1]
%pix_addr.0327.sum339 = add i32 %pix_addr.0327.rec, %tmp154.sum317 ; <i32> [#uses=1]
%tmp181 = getelementptr i8* %pix, i32 %pix_addr.0327.sum339 ; <i8*> [#uses=1]
%tmp181182 = bitcast i8* %tmp181 to i32* ; <i32*> [#uses=1]
%pix_addr.0327.sum338 = add i32 %pix_addr.0327.rec, %tmp154.sum315 ; <i32> [#uses=1]
%tmp186 = getelementptr i8* %pix, i32 %pix_addr.0327.sum338 ; <i8*> [#uses=1]
%tmp186187 = bitcast i8* %tmp186 to i32* ; <i32*> [#uses=1]
%pix_addr.0327.sum337 = add i32 %pix_addr.0327.rec, %tmp154.sum ; <i32> [#uses=1]
%tmp191 = getelementptr i8* %pix, i32 %pix_addr.0327.sum337 ; <i8*> [#uses=1]
%tmp191192 = bitcast i8* %tmp191 to i32* ; <i32*> [#uses=1]
call void asm sideeffect "movd $4, %mm0 \0A\09movd $5, %mm1 \0A\09movd $6, %mm2 \0A\09movd $7, %mm3 \0A\09punpcklbw %mm1, %mm0 \0A\09punpcklbw %mm3, %mm2 \0A\09movq %mm0, %mm1 \0A\09punpcklwd %mm2, %mm0 \0A\09punpckhwd %mm2, %mm1 \0A\09movd %mm0, $0 \0A\09punpckhdq %mm0, %mm0 \0A\09movd %mm0, $1 \0A\09movd %mm1, $2 \0A\09punpckhdq %mm1, %mm1 \0A\09movd %mm1, $3 \0A\09", "=*m,=*m,=*m,=*m,*m,*m,*m,*m,~{dirflag},~{fpsr},~{flags}"( i32* null, i32* %tmp164, i32* %tmp169, i32* %tmp174, i32* %tmp177178, i32* %tmp181182, i32* %tmp186187, i32* %tmp191192 ) nounwind
unreachable
bb292: ; preds = %entry
ret void
}