mirror of
https://github.com/RPCSX/llvm.git
synced 2025-02-23 14:30:50 +00:00
Add support for byval function whose argument is not 32 bit aligned.
To do this it is necessary to add a "always inline" argument to the memcpy node. For completeness I have also added this node to memmove and memset. I have also added getMem* functions, because the extra argument makes it cumbersome to use getNode and because I get confused by it :-) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@43172 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
1f39dc42cd
commit
5c0d6ed325
@ -307,7 +307,19 @@ public:
|
||||
SDOperand N5);
|
||||
SDOperand getNode(unsigned Opcode, SDVTList VTs,
|
||||
const SDOperand *Ops, unsigned NumOps);
|
||||
|
||||
|
||||
SDOperand getMemcpy(SDOperand Chain, SDOperand Dest, SDOperand Src,
|
||||
SDOperand Size, SDOperand Align,
|
||||
SDOperand AlwaysInline);
|
||||
|
||||
SDOperand getMemmove(SDOperand Chain, SDOperand Dest, SDOperand Src,
|
||||
SDOperand Size, SDOperand Align,
|
||||
SDOperand AlwaysInline);
|
||||
|
||||
SDOperand getMemset(SDOperand Chain, SDOperand Dest, SDOperand Src,
|
||||
SDOperand Size, SDOperand Align,
|
||||
SDOperand AlwaysInline);
|
||||
|
||||
/// getSetCC - Helper function to make it easier to build SetCC's if you just
|
||||
/// have an ISD::CondCode instead of an SDOperand.
|
||||
///
|
||||
|
@ -485,10 +485,10 @@ namespace ISD {
|
||||
// it returns an output chain.
|
||||
STACKRESTORE,
|
||||
|
||||
// MEMSET/MEMCPY/MEMMOVE - The first operand is the chain, and the rest
|
||||
// correspond to the operands of the LLVM intrinsic functions. The only
|
||||
// result is a token chain. The alignment argument is guaranteed to be a
|
||||
// Constant node.
|
||||
// MEMSET/MEMCPY/MEMMOVE - The first operand is the chain. The following
|
||||
// correspond to the operands of the LLVM intrinsic functions and the last
|
||||
// one is AlwaysInline. The only result is a token chain. The alignment
|
||||
// argument is guaranteed to be a Constant node.
|
||||
MEMSET,
|
||||
MEMMOVE,
|
||||
MEMCPY,
|
||||
|
@ -2506,18 +2506,31 @@ SDOperand SelectionDAGLegalize::LegalizeOp(SDOperand Op) {
|
||||
break;
|
||||
}
|
||||
|
||||
SDOperand Tmp6;
|
||||
switch (getTypeAction(Node->getOperand(5).getValueType())) { // bool
|
||||
case Expand: assert(0 && "Cannot expand this yet!");
|
||||
case Legal:
|
||||
Tmp6 = LegalizeOp(Node->getOperand(5));
|
||||
break;
|
||||
case Promote:
|
||||
Tmp6 = PromoteOp(Node->getOperand(5));
|
||||
break;
|
||||
}
|
||||
|
||||
switch (TLI.getOperationAction(Node->getOpcode(), MVT::Other)) {
|
||||
default: assert(0 && "This action not implemented for this operation!");
|
||||
case TargetLowering::Custom:
|
||||
isCustom = true;
|
||||
// FALLTHROUGH
|
||||
case TargetLowering::Legal:
|
||||
Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Tmp3, Tmp4, Tmp5);
|
||||
case TargetLowering::Legal: {
|
||||
SDOperand Ops[] = { Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6 };
|
||||
Result = DAG.UpdateNodeOperands(Result, Ops, 6);
|
||||
if (isCustom) {
|
||||
Tmp1 = TLI.LowerOperation(Result, DAG);
|
||||
if (Tmp1.Val) Result = Tmp1;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case TargetLowering::Expand: {
|
||||
// Otherwise, the target does not support this operation. Lower the
|
||||
// operation to an explicit libcall as appropriate.
|
||||
|
@ -2269,6 +2269,30 @@ SDOperand SelectionDAG::getNode(unsigned Opcode, MVT::ValueType VT,
|
||||
return getNode(Opcode, VT, Ops, 5);
|
||||
}
|
||||
|
||||
SDOperand SelectionDAG::getMemcpy(SDOperand Chain, SDOperand Dest,
|
||||
SDOperand Src, SDOperand Size,
|
||||
SDOperand Align,
|
||||
SDOperand AlwaysInline) {
|
||||
SDOperand Ops[] = { Chain, Dest, Src, Size, Align, AlwaysInline };
|
||||
return getNode(ISD::MEMCPY, MVT::Other, Ops, 6);
|
||||
}
|
||||
|
||||
SDOperand SelectionDAG::getMemmove(SDOperand Chain, SDOperand Dest,
|
||||
SDOperand Src, SDOperand Size,
|
||||
SDOperand Align,
|
||||
SDOperand AlwaysInline) {
|
||||
SDOperand Ops[] = { Chain, Dest, Src, Size, Align, AlwaysInline };
|
||||
return getNode(ISD::MEMMOVE, MVT::Other, Ops, 6);
|
||||
}
|
||||
|
||||
SDOperand SelectionDAG::getMemset(SDOperand Chain, SDOperand Dest,
|
||||
SDOperand Src, SDOperand Size,
|
||||
SDOperand Align,
|
||||
SDOperand AlwaysInline) {
|
||||
SDOperand Ops[] = { Chain, Dest, Src, Size, Align, AlwaysInline };
|
||||
return getNode(ISD::MEMSET, MVT::Other, Ops, 6);
|
||||
}
|
||||
|
||||
SDOperand SelectionDAG::getLoad(MVT::ValueType VT,
|
||||
SDOperand Chain, SDOperand Ptr,
|
||||
const Value *SV, int SVOffset,
|
||||
|
@ -4367,7 +4367,22 @@ void SelectionDAGLowering::visitMemIntrinsic(CallInst &I, unsigned Op) {
|
||||
}
|
||||
}
|
||||
|
||||
DAG.setRoot(DAG.getNode(Op, MVT::Other, getRoot(), Op1, Op2, Op3, Op4));
|
||||
SDOperand AlwaysInline = DAG.getConstant(0, MVT::i1);
|
||||
SDOperand Node;
|
||||
switch(Op) {
|
||||
default:
|
||||
assert(0 && "Unknown Op");
|
||||
case ISD::MEMCPY:
|
||||
Node = DAG.getMemcpy(getRoot(), Op1, Op2, Op3, Op4, AlwaysInline);
|
||||
break;
|
||||
case ISD::MEMMOVE:
|
||||
Node = DAG.getMemmove(getRoot(), Op1, Op2, Op3, Op4, AlwaysInline);
|
||||
break;
|
||||
case ISD::MEMSET:
|
||||
Node = DAG.getMemset(getRoot(), Op1, Op2, Op3, Op4, AlwaysInline);
|
||||
break;
|
||||
}
|
||||
DAG.setRoot(Node);
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -1246,9 +1246,10 @@ X86TargetLowering::LowerMemOpCallTo(SDOperand Op, SelectionDAG &DAG,
|
||||
|
||||
SDOperand AlignNode = DAG.getConstant(Align, MVT::i32);
|
||||
SDOperand SizeNode = DAG.getConstant(Size, MVT::i32);
|
||||
SDOperand AlwaysInline = DAG.getConstant(1, MVT::i1);
|
||||
|
||||
return DAG.getNode(ISD::MEMCPY, MVT::Other, Chain, PtrOff, Arg, SizeNode,
|
||||
AlignNode);
|
||||
return DAG.getMemcpy(Chain, PtrOff, Arg, SizeNode, AlignNode,
|
||||
AlwaysInline);
|
||||
} else {
|
||||
return DAG.getStore(Chain, Arg, PtrOff, NULL, 0);
|
||||
}
|
||||
@ -4472,9 +4473,23 @@ SDOperand X86TargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) {
|
||||
SDOperand SourceOp = Op.getOperand(2);
|
||||
SDOperand CountOp = Op.getOperand(3);
|
||||
SDOperand AlignOp = Op.getOperand(4);
|
||||
SDOperand AlwaysInlineOp = Op.getOperand(5);
|
||||
|
||||
bool AlwaysInline = (bool)cast<ConstantSDNode>(AlwaysInlineOp)->getValue();
|
||||
unsigned Align = (unsigned)cast<ConstantSDNode>(AlignOp)->getValue();
|
||||
if (Align == 0) Align = 1;
|
||||
|
||||
// If size is unknown, call memcpy.
|
||||
ConstantSDNode *I = dyn_cast<ConstantSDNode>(CountOp);
|
||||
if (!I) {
|
||||
assert(!AlwaysInline && "Cannot inline copy of unknown size");
|
||||
return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG);
|
||||
}
|
||||
unsigned Size = I->getValue();
|
||||
|
||||
if (AlwaysInline)
|
||||
return LowerMEMCPYInline(ChainOp, DestOp, SourceOp, Size, Align, DAG);
|
||||
|
||||
// The libc version is likely to be faster for the following cases. It can
|
||||
// use the address value and run time information about the CPU.
|
||||
// With glibc 2.6.1 on a core 2, coping an array of 100M longs was 30% faster
|
||||
@ -4483,13 +4498,7 @@ SDOperand X86TargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) {
|
||||
if ((Align & 3) != 0)
|
||||
return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG);
|
||||
|
||||
// If size is unknown, call memcpy.
|
||||
ConstantSDNode *I = dyn_cast<ConstantSDNode>(CountOp);
|
||||
if (!I)
|
||||
return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG);
|
||||
|
||||
// If size is more than the threshold, call memcpy.
|
||||
unsigned Size = I->getValue();
|
||||
if (Size > Subtarget->getMinRepStrSizeThreshold())
|
||||
return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG);
|
||||
|
||||
|
@ -1,4 +1,5 @@
|
||||
; RUN: llvm-as < %s | llc -march=x86-64 | grep rep.movsl | count 2
|
||||
; RUN: llvm-as < %s | llc -march=x86 | grep rep.movsl | count 2
|
||||
|
||||
%struct.s = type { i32, i32, i32, i32, i32, i32 }
|
||||
|
||||
|
28
test/CodeGen/X86/byval4.ll
Normal file
28
test/CodeGen/X86/byval4.ll
Normal file
@ -0,0 +1,28 @@
|
||||
; RUN: llvm-as < %s | llc -march=x86-64 | grep rep.movsl | count 2
|
||||
; RUN: llvm-as < %s | llc -march=x86 | grep rep.movsw | count 2
|
||||
|
||||
%struct.s = type { i16, i16, i16, i16, i16, i16 }
|
||||
|
||||
|
||||
define void @g(i16 signext %a1, i16 signext %a2, i16 signext %a3,
|
||||
i16 signext %a4, i16 signext %a5, i16 signext %a6) {
|
||||
entry:
|
||||
%a = alloca %struct.s, align 16
|
||||
%tmp = getelementptr %struct.s* %a, i32 0, i32 0
|
||||
store i16 %a1, i16* %tmp, align 16
|
||||
%tmp2 = getelementptr %struct.s* %a, i32 0, i32 1
|
||||
store i16 %a2, i16* %tmp2, align 16
|
||||
%tmp4 = getelementptr %struct.s* %a, i32 0, i32 2
|
||||
store i16 %a3, i16* %tmp4, align 16
|
||||
%tmp6 = getelementptr %struct.s* %a, i32 0, i32 3
|
||||
store i16 %a4, i16* %tmp6, align 16
|
||||
%tmp8 = getelementptr %struct.s* %a, i32 0, i32 4
|
||||
store i16 %a5, i16* %tmp8, align 16
|
||||
%tmp10 = getelementptr %struct.s* %a, i32 0, i32 5
|
||||
store i16 %a6, i16* %tmp10, align 16
|
||||
call void @f( %struct.s* %a byval )
|
||||
call void @f( %struct.s* %a byval )
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @f(%struct.s* byval)
|
Loading…
x
Reference in New Issue
Block a user