mirror of
https://github.com/RPCS3/llvm.git
synced 2025-02-04 01:26:41 +00:00
Use subword loads instead of a 4-byte load when the size of a structure (or a
piece of it) that is being passed by value is smaller than a word. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@138007 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
28bea08e53
commit
5ac8547a41
@ -1805,43 +1805,90 @@ WriteByValArg(SDValue& Chain, DebugLoc dl,
|
||||
SmallVector<SDValue, 8>& MemOpChains, int& LastFI,
|
||||
MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg,
|
||||
const CCValAssign &VA, const ISD::ArgFlagsTy& Flags,
|
||||
MVT PtrType) {
|
||||
unsigned FirstWord = VA.getLocMemOffset() / 4;
|
||||
unsigned NumWords = (Flags.getByValSize() + 3) / 4;
|
||||
unsigned LastWord = FirstWord + NumWords;
|
||||
unsigned CurWord;
|
||||
MVT PtrType, bool isLittle) {
|
||||
unsigned LocMemOffset = VA.getLocMemOffset();
|
||||
unsigned Offset = 0;
|
||||
uint32_t RemainingSize = Flags.getByValSize();
|
||||
unsigned ByValAlign = Flags.getByValAlign();
|
||||
|
||||
// copy the first 4 words of byval arg to registers A0 - A3
|
||||
for (CurWord = FirstWord; CurWord < std::min(LastWord, O32IntRegsSize);
|
||||
++CurWord) {
|
||||
// Copy the first 4 words of byval arg to registers A0 - A3.
|
||||
// FIXME: Use a stricter alignment if it enables better optimization in passes
|
||||
// run later.
|
||||
for (; RemainingSize >= 4 && LocMemOffset < 4 * 4;
|
||||
Offset += 4, RemainingSize -= 4, LocMemOffset += 4) {
|
||||
SDValue LoadPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg,
|
||||
DAG.getConstant((CurWord - FirstWord) * 4,
|
||||
MVT::i32));
|
||||
DAG.getConstant(Offset, MVT::i32));
|
||||
SDValue LoadVal = DAG.getLoad(MVT::i32, dl, Chain, LoadPtr,
|
||||
MachinePointerInfo(),
|
||||
false, false, std::min(ByValAlign,
|
||||
(unsigned )4));
|
||||
MemOpChains.push_back(LoadVal.getValue(1));
|
||||
unsigned DstReg = O32IntRegs[CurWord];
|
||||
unsigned DstReg = O32IntRegs[LocMemOffset / 4];
|
||||
RegsToPass.push_back(std::make_pair(DstReg, LoadVal));
|
||||
}
|
||||
|
||||
// copy remaining part of byval arg to stack.
|
||||
if (CurWord < LastWord) {
|
||||
unsigned SizeInBytes = (LastWord - CurWord) * 4;
|
||||
SDValue Src = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg,
|
||||
DAG.getConstant((CurWord - FirstWord) * 4,
|
||||
MVT::i32));
|
||||
LastFI = MFI->CreateFixedObject(SizeInBytes, CurWord * 4, true);
|
||||
SDValue Dst = DAG.getFrameIndex(LastFI, PtrType);
|
||||
Chain = DAG.getMemcpy(Chain, dl, Dst, Src,
|
||||
DAG.getConstant(SizeInBytes, MVT::i32),
|
||||
/*Align*/ByValAlign,
|
||||
/*isVolatile=*/false, /*AlwaysInline=*/false,
|
||||
MachinePointerInfo(0), MachinePointerInfo(0));
|
||||
MemOpChains.push_back(Chain);
|
||||
if (RemainingSize == 0)
|
||||
return;
|
||||
|
||||
// If there still is a register available for argument passing, write the
|
||||
// remaining part of the structure to it using subword loads and shifts.
|
||||
if (LocMemOffset < 4 * 4) {
|
||||
assert(RemainingSize <= 3 && RemainingSize >= 1 &&
|
||||
"There must be one to three bytes remaining.");
|
||||
unsigned LoadSize = (RemainingSize == 3 ? 2 : RemainingSize);
|
||||
SDValue LoadPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg,
|
||||
DAG.getConstant(Offset, MVT::i32));
|
||||
unsigned Alignment = std::min(ByValAlign, (unsigned )4);
|
||||
SDValue LoadVal = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, Chain,
|
||||
LoadPtr, MachinePointerInfo(),
|
||||
MVT::getIntegerVT(LoadSize * 8), false,
|
||||
false, Alignment);
|
||||
MemOpChains.push_back(LoadVal.getValue(1));
|
||||
|
||||
// If target is big endian, shift it to the most significant half-word or
|
||||
// byte.
|
||||
if (!isLittle)
|
||||
LoadVal = DAG.getNode(ISD::SHL, dl, MVT::i32, LoadVal,
|
||||
DAG.getConstant(32 - LoadSize * 8, MVT::i32));
|
||||
|
||||
Offset += LoadSize;
|
||||
RemainingSize -= LoadSize;
|
||||
|
||||
// Read second subword if necessary.
|
||||
if (RemainingSize != 0) {
|
||||
assert(RemainingSize == 1 && "There must be one byte remaining.");
|
||||
LoadPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg,
|
||||
DAG.getConstant(Offset, MVT::i32));
|
||||
unsigned Alignment = std::min(ByValAlign, (unsigned )2);
|
||||
SDValue Subword = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, Chain,
|
||||
LoadPtr, MachinePointerInfo(),
|
||||
MVT::i8, false, false, Alignment);
|
||||
MemOpChains.push_back(Subword.getValue(1));
|
||||
// Insert the loaded byte to LoadVal.
|
||||
// FIXME: Use INS if supported by target.
|
||||
unsigned ShiftAmt = isLittle ? 16 : 8;
|
||||
SDValue Shift = DAG.getNode(ISD::SHL, dl, MVT::i32, Subword,
|
||||
DAG.getConstant(ShiftAmt, MVT::i32));
|
||||
LoadVal = DAG.getNode(ISD::OR, dl, MVT::i32, LoadVal, Shift);
|
||||
}
|
||||
|
||||
unsigned DstReg = O32IntRegs[LocMemOffset / 4];
|
||||
RegsToPass.push_back(std::make_pair(DstReg, LoadVal));
|
||||
return;
|
||||
}
|
||||
|
||||
// Create a fixed object on stack at offset LocMemOffset and copy
|
||||
// remaining part of byval arg to it using memcpy.
|
||||
SDValue Src = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg,
|
||||
DAG.getConstant(Offset, MVT::i32));
|
||||
LastFI = MFI->CreateFixedObject(RemainingSize, LocMemOffset, true);
|
||||
SDValue Dst = DAG.getFrameIndex(LastFI, PtrType);
|
||||
Chain = DAG.getMemcpy(Chain, dl, Dst, Src,
|
||||
DAG.getConstant(RemainingSize, MVT::i32),
|
||||
std::min(ByValAlign, (unsigned)4),
|
||||
/*isVolatile=*/false, /*AlwaysInline=*/false,
|
||||
MachinePointerInfo(0), MachinePointerInfo(0));
|
||||
MemOpChains.push_back(Chain);
|
||||
}
|
||||
|
||||
/// LowerCall - functions arguments are copied from virtual regs to
|
||||
@ -1974,7 +2021,7 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
|
||||
assert(Flags.getByValSize() &&
|
||||
"ByVal args of size 0 should have been ignored by front-end.");
|
||||
WriteByValArg(Chain, dl, RegsToPass, MemOpChains, LastFI, MFI, DAG, Arg,
|
||||
VA, Flags, getPointerTy());
|
||||
VA, Flags, getPointerTy(), Subtarget->isLittle());
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -1,16 +1,41 @@
|
||||
; RUN: llc -march=mips < %s | FileCheck %s
|
||||
|
||||
; RUN: llc < %s -march=mipsel | FileCheck %s -check-prefix=CHECK-EL
|
||||
; RUN: llc < %s -march=mips | FileCheck %s -check-prefix=CHECK-EB
|
||||
%struct.S2 = type { %struct.S1, %struct.S1 }
|
||||
%struct.S1 = type { i8, i8 }
|
||||
%struct.S4 = type { [7 x i8] }
|
||||
|
||||
@s2 = common global %struct.S2 zeroinitializer, align 1
|
||||
@s4 = common global %struct.S4 zeroinitializer, align 1
|
||||
|
||||
define void @foo1() nounwind {
|
||||
entry:
|
||||
; CHECK: ulw ${{[0-9]+}}, 2
|
||||
; CHECK-EL: lw $25, %call16(foo2)
|
||||
; CHECK-EL: ulhu $4, 2
|
||||
; CHECK-EL: lw $[[R0:[0-9]+]], %got(s4)
|
||||
; CHECK-EL: lbu $[[R1:[0-9]+]], 6($[[R0]])
|
||||
; CHECK-EL: ulhu $[[R2:[0-9]+]], 4($[[R0]])
|
||||
; CHECK-EL: sll $[[R3:[0-9]+]], $[[R1]], 16
|
||||
; CHECK-EL: ulw $4, 0($[[R0]])
|
||||
; CHECK-EL: lw $25, %call16(foo4)
|
||||
; CHECK-EL: or $5, $[[R2]], $[[R3]]
|
||||
|
||||
; CHECK-EB: ulhu $[[R0:[0-9]+]], 2
|
||||
; CHECK-EB: lw $25, %call16(foo2)
|
||||
; CHECK-EB: sll $4, $[[R0]], 16
|
||||
; CHECK-EB: lw $[[R1:[0-9]+]], %got(s4)
|
||||
; CHECK-EB: ulhu $[[R2:[0-9]+]], 4($[[R1]])
|
||||
; CHECK-EB: lbu $[[R3:[0-9]+]], 6($[[R1]])
|
||||
; CHECK-EB: sll $[[R4:[0-9]+]], $[[R2]], 16
|
||||
; CHECK-EB: sll $[[R5:[0-9]+]], $[[R3]], 8
|
||||
; CHECK-EB: ulw $4, 0($[[R1]])
|
||||
; CHECK-EB: lw $25, %call16(foo4)
|
||||
; CHECK-EB: or $5, $[[R4]], $[[R5]]
|
||||
|
||||
tail call void @foo2(%struct.S1* byval getelementptr inbounds (%struct.S2* @s2, i32 0, i32 1)) nounwind
|
||||
tail call void @foo4(%struct.S4* byval @s4) nounwind
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @foo2(%struct.S1* byval)
|
||||
|
||||
declare void @foo4(%struct.S4* byval)
|
||||
|
Loading…
x
Reference in New Issue
Block a user