Scalar to vector conversions using direct moves

This patch corresponds to review:
http://reviews.llvm.org/D11471

It improves the code generated for converting a scalar to a vector value. With
direct moves from GPRs to VSRs, we no longer require expensive stack operations
for this. Subsequent patches will handle the reverse case and more general
operations between vectors and their scalar elements.

llvm-svn: 244921
This commit is contained in:
Nemanja Ivanovic 2015-08-13 17:40:44 +00:00
parent 15b7b9c050
commit 285f278c18
9 changed files with 205 additions and 25 deletions

View File

@ -542,6 +542,14 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
if (Subtarget.hasVSX()) {
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
if (Subtarget.hasP8Vector())
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
if (Subtarget.hasDirectMove()) {
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Legal);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Legal);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Legal);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Legal);
}
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
@ -11490,7 +11498,7 @@ bool
PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
EVT VT , unsigned DefinedValues) const {
if (VT == MVT::v2i64)
return false;
return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
if (Subtarget.hasQPX()) {
if (VT == MVT::v4f32 || VT == MVT::v4f64 || VT == MVT::v4i1)

View File

@ -1181,6 +1181,23 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
AltVSXFMARel;
}
// Single Precision Conversions (FP <-> INT)
def XSCVSXDSP : XX2Form<60, 312,
(outs vssrc:$XT), (ins vsfrc:$XB),
"xscvsxdsp $XT, $XB", IIC_VecFP,
[(set f32:$XT, (PPCfcfids f64:$XB))]>;
def XSCVUXDSP : XX2Form<60, 296,
(outs vssrc:$XT), (ins vsfrc:$XB),
"xscvuxdsp $XT, $XB", IIC_VecFP,
[(set f32:$XT, (PPCfcfidus f64:$XB))]>;
// Conversions between vector and scalar single precision
def XSCVDPSPN : XX2Form<60, 267, (outs vsrc:$XT), (ins vssrc:$XB),
"xscvdpspn $XT, $XB", IIC_VecFP, []>;
def XSCVSPDPN : XX2Form<60, 331, (outs vssrc:$XT), (ins vsrc:$XB),
"xscvspdpn $XT, $XB", IIC_VecFP, []>;
} // AddedComplexity = 400
} // HasP8Vector
@ -1204,3 +1221,60 @@ let Predicates = [HasDirectMove, HasVSX] in {
"mtvsrwz $XT, $rA", IIC_VecGeneral,
[(set f64:$XT, (PPCmtvsrz i32:$rA))]>;
} // HasDirectMove, HasVSX
/* Direct moves of various size entities from GPR's into VSR's. Each lines
the value up into element 0 (both BE and LE). Namely, entities smaller than
a doubleword are shifted left and moved for BE. For LE, they're moved, then
swapped to go into the least significant element of the VSR.
*/
def Moves {
dag BE_BYTE_0 = (MTVSRD
(RLDICR
(INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32), 56, 7));
dag BE_HALF_0 = (MTVSRD
(RLDICR
(INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32), 48, 15));
dag BE_WORD_0 = (MTVSRD
(RLDICR
(INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32), 32, 31));
dag BE_DWORD_0 = (MTVSRD $A);
dag LE_MTVSRW = (MTVSRD (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32));
dag LE_WORD_1 = (v2i64 (COPY_TO_REGCLASS LE_MTVSRW, VSRC));
dag LE_WORD_0 = (XXPERMDI LE_WORD_1, LE_WORD_1, 2);
dag LE_DWORD_1 = (v2i64 (COPY_TO_REGCLASS BE_DWORD_0, VSRC));
dag LE_DWORD_0 = (XXPERMDI LE_DWORD_1, LE_DWORD_1, 2);
}
let Predicates = [IsBigEndian, HasP8Vector] in {
def : Pat<(v4f32 (scalar_to_vector f32:$A)),
(v4f32 (XSCVDPSPN $A))>;
} // IsBigEndian, HasP8Vector
let Predicates = [IsBigEndian, HasDirectMove] in {
def : Pat<(v16i8 (scalar_to_vector i32:$A)),
(v16i8 (COPY_TO_REGCLASS Moves.BE_BYTE_0, VSRC))>;
def : Pat<(v8i16 (scalar_to_vector i32:$A)),
(v8i16 (COPY_TO_REGCLASS Moves.BE_HALF_0, VSRC))>;
def : Pat<(v4i32 (scalar_to_vector i32:$A)),
(v4i32 (COPY_TO_REGCLASS Moves.BE_WORD_0, VSRC))>;
def : Pat<(v2i64 (scalar_to_vector i64:$A)),
(v2i64 (COPY_TO_REGCLASS Moves.BE_DWORD_0, VSRC))>;
} // IsBigEndian, HasDirectMove
let Predicates = [IsLittleEndian, HasP8Vector] in {
def : Pat<(v4f32 (scalar_to_vector f32:$A)),
(v4f32 (XXSLDWI (XSCVDPSPN $A), (XSCVDPSPN $A), 1))>;
} // IsLittleEndian, HasP8Vector
let Predicates = [IsLittleEndian, HasDirectMove] in {
def : Pat<(v16i8 (scalar_to_vector i32:$A)),
(v16i8 (COPY_TO_REGCLASS Moves.LE_WORD_0, VSRC))>;
def : Pat<(v8i16 (scalar_to_vector i32:$A)),
(v8i16 (COPY_TO_REGCLASS Moves.LE_WORD_0, VSRC))>;
def : Pat<(v4i32 (scalar_to_vector i32:$A)),
(v4i32 (COPY_TO_REGCLASS Moves.LE_WORD_0, VSRC))>;
def : Pat<(v2i64 (scalar_to_vector i64:$A)),
(v2i64 Moves.LE_DWORD_0)>;
} // IsLittleEndian, HasDirectMove

View File

@ -77,6 +77,10 @@ namespace {
return IsRegInClass(Reg, &PPC::F8RCRegClass, MRI);
}
bool IsVSFReg(unsigned Reg, MachineRegisterInfo &MRI) {
return IsRegInClass(Reg, &PPC::VSFRCRegClass, MRI);
}
protected:
bool processBlock(MachineBasicBlock &MBB) {
bool Changed = false;
@ -100,7 +104,8 @@ protected:
IsVRReg(SrcMO.getReg(), MRI) ? &PPC::VSHRCRegClass :
&PPC::VSLRCRegClass;
assert((IsF8Reg(SrcMO.getReg(), MRI) ||
IsVRReg(SrcMO.getReg(), MRI)) &&
IsVRReg(SrcMO.getReg(), MRI) ||
IsVSFReg(SrcMO.getReg(), MRI)) &&
"Unknown source for a VSX copy");
unsigned NewVReg = MRI.createVirtualRegister(SrcRC);

View File

@ -24,8 +24,7 @@ entry:
ret float %conv
; CHECK-LABEL: @_Z6testfcc
; CHECK: mtvsrwz [[MOVEREG01:[0-9]+]], 3
; FIXME: Once we have XSCVUXDSP implemented, this will change
; CHECK: fcfidus 1, [[MOVEREG01]]
; CHECK: xscvuxdsp 1, [[MOVEREG01]]
}
; Function Attrs: nounwind
@ -77,8 +76,7 @@ entry:
ret float %conv
; CHECK-LABEL: @_Z7testfuch
; CHECK: mtvsrwz [[MOVEREG03:[0-9]+]], 3
; FIXME: Once we have XSCVUXDSP implemented, this will change
; CHECK: fcfidus 1, [[MOVEREG03]]
; CHECK: xscvuxdsp 1, [[MOVEREG03]]
}
; Function Attrs: nounwind
@ -130,8 +128,7 @@ entry:
ret float %conv
; CHECK-LABEL: @_Z6testfss
; CHECK: mtvsrwa [[MOVEREG05:[0-9]+]], 3
; FIXME: Once we have XSCVSXDSP implemented, this will change
; CHECK: fcfids 1, [[MOVEREG05]]
; CHECK: xscvsxdsp 1, [[MOVEREG05]]
}
; Function Attrs: nounwind
@ -183,8 +180,7 @@ entry:
ret float %conv
; CHECK-LABEL: @_Z7testfust
; CHECK: mtvsrwz [[MOVEREG07:[0-9]+]], 3
; FIXME: Once we have XSCVUXDSP implemented, this will change
; CHECK: fcfidus 1, [[MOVEREG07]]
; CHECK: xscvuxdsp 1, [[MOVEREG07]]
}
; Function Attrs: nounwind
@ -236,8 +232,7 @@ entry:
ret float %conv
; CHECK-LABEL: @_Z6testfii
; CHECK: mtvsrwa [[MOVEREG09:[0-9]+]], 3
; FIXME: Once we have XSCVSXDSP implemented, this will change
; CHECK: fcfids 1, [[MOVEREG09]]
; CHECK: xscvsxdsp 1, [[MOVEREG09]]
}
; Function Attrs: nounwind
@ -289,8 +284,7 @@ entry:
ret float %conv
; CHECK-LABEL: @_Z7testfuij
; CHECK: mtvsrwz [[MOVEREG11:[0-9]+]], 3
; FIXME: Once we have XSCVUXDSP implemented, this will change
; CHECK: fcfidus 1, [[MOVEREG11]]
; CHECK: xscvuxdsp 1, [[MOVEREG11]]
}
; Function Attrs: nounwind
@ -342,8 +336,7 @@ entry:
ret float %conv
; CHECK-LABEL:@_Z7testfllx
; CHECK: mtvsrd [[MOVEREG13:[0-9]+]], 3
; FIXME: Once we have XSCVSXDSP implemented, this will change
; CHECK: fcfids 1, [[MOVEREG13]]
; CHECK: xscvsxdsp 1, [[MOVEREG13]]
}
; Function Attrs: nounwind
@ -395,8 +388,7 @@ entry:
ret float %conv
; CHECK-LABEL: @_Z8testfully
; CHECK: mtvsrd [[MOVEREG15:[0-9]+]], 3
; FIXME: Once we have XSCVUXDSP implemented, this will change
; CHECK: fcfidus 1, [[MOVEREG15]]
; CHECK: xscvuxdsp 1, [[MOVEREG15]]
}
; Function Attrs: nounwind

View File

@ -0,0 +1,79 @@
; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s
; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s -check-prefix=CHECK-LE
; The build[csilf] functions simply test the scalar_to_vector handling with
; direct moves. This corresponds to the "insertelement" instruction. Subsequent
; to this, there will be a splat corresponding to the shufflevector.
; Function Attrs: nounwind
define <16 x i8> @buildc(i8 zeroext %a) {
entry:
%a.addr = alloca i8, align 1
store i8 %a, i8* %a.addr, align 1
%0 = load i8, i8* %a.addr, align 1
%splat.splatinsert = insertelement <16 x i8> undef, i8 %0, i32 0
%splat.splat = shufflevector <16 x i8> %splat.splatinsert, <16 x i8> undef, <16 x i32> zeroinitializer
ret <16 x i8> %splat.splat
; CHECK: sldi [[REG1:[0-9]+]], 3, 56
; CHECK: mtvsrd {{[0-9]+}}, [[REG1]]
; CHECK-LE: mtvsrd [[REG1:[0-9]+]], 3
; CHECK-LE: xxswapd {{[0-9]+}}, [[REG1]]
}
; Function Attrs: nounwind
define <8 x i16> @builds(i16 zeroext %a) {
entry:
%a.addr = alloca i16, align 2
store i16 %a, i16* %a.addr, align 2
%0 = load i16, i16* %a.addr, align 2
%splat.splatinsert = insertelement <8 x i16> undef, i16 %0, i32 0
%splat.splat = shufflevector <8 x i16> %splat.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer
ret <8 x i16> %splat.splat
; CHECK: sldi [[REG1:[0-9]+]], 3, 48
; CHECK: mtvsrd {{[0-9]+}}, [[REG1]]
; CHECK-LE: mtvsrd [[REG1:[0-9]+]], 3
; CHECK-LE: xxswapd {{[0-9]+}}, [[REG1]]
}
; Function Attrs: nounwind
define <4 x i32> @buildi(i32 zeroext %a) {
entry:
%a.addr = alloca i32, align 4
store i32 %a, i32* %a.addr, align 4
%0 = load i32, i32* %a.addr, align 4
%splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0
%splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
ret <4 x i32> %splat.splat
; CHECK: sldi [[REG1:[0-9]+]], 3, 32
; CHECK: mtvsrd {{[0-9]+}}, [[REG1]]
; CHECK-LE: mtvsrd [[REG1:[0-9]+]], 3
; CHECK-LE: xxswapd {{[0-9]+}}, [[REG1]]
}
; Function Attrs: nounwind
define <2 x i64> @buildl(i64 %a) {
entry:
%a.addr = alloca i64, align 8
store i64 %a, i64* %a.addr, align 8
%0 = load i64, i64* %a.addr, align 8
%splat.splatinsert = insertelement <2 x i64> undef, i64 %0, i32 0
%splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
ret <2 x i64> %splat.splat
; CHECK: mtvsrd {{[0-9]+}}, 3
; CHECK-LE: mtvsrd [[REG1:[0-9]+]], 3
; CHECK-LE: xxswapd {{[0-9]+}}, [[REG1]]
}
; Function Attrs: nounwind
define <4 x float> @buildf(float %a) {
entry:
%a.addr = alloca float, align 4
store float %a, float* %a.addr, align 4
%0 = load float, float* %a.addr, align 4
%splat.splatinsert = insertelement <4 x float> undef, float %0, i32 0
%splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer
ret <4 x float> %splat.splat
; CHECK: xscvdpspn {{[0-9]+}}, 1
; CHECK-LE: xscvdpspn [[REG1:[0-9]+]], 1
; CHECK-LE: xxsldwi {{[0-9]+}}, [[REG1]], [[REG1]], 1
}

View File

@ -1226,11 +1226,11 @@ define <2 x i32> @test80(i32 %v) {
; CHECK-FISL: blr
; CHECK-LE-LABEL: @test80
; CHECK-LE-DAG: addi [[R1:[0-9]+]], 1, -16
; CHECK-LE-DAG: mtvsrd [[R1:[0-9]+]], 3
; CHECK-LE-DAG: addi [[R2:[0-9]+]], {{[0-9]+}}, .LCPI
; CHECK-LE-DAG: lxvd2x [[V1:[0-9]+]], 0, [[R1]]
; CHECK-LE-DAG: xxswapd [[V1:[0-9]+]], [[R1]]
; CHECK-LE-DAG: lxvd2x [[V2:[0-9]+]], 0, [[R2]]
; CHECK-LE-DAG: xxswapd 34, [[V1]]
; CHECK-LE-DAG: xxspltd 34, [[V1]]
; CHECK-LE-DAG: xxswapd 35, [[V2]]
; CHECK-LE: vaddudm 2, 2, 3
; CHECK-LE: blr

View File

@ -55,8 +55,7 @@ entry:
ret void
; CHECK-LABEL: @intToFlt
; CHECK: lxsiwax [[REGLD2:[0-9]+]],
; FIXME: the below will change when the VSX form is implemented
; CHECK: fcfids {{[0-9]}}, [[REGLD2]]
; CHECK: xscvsxdsp {{[0-9]}}, [[REGLD2]]
}
; Function Attrs: nounwind
@ -108,8 +107,7 @@ entry:
ret void
; CHECK-LABEL: @uIntToFlt
; CHECK: lxsiwzx [[REGLD4:[0-9]+]],
; FIXME: the below will change when the VSX form is implemented
; CHECK: fcfidus {{[0-9]+}}, [[REGLD4]]
; CHECK: xscvuxdsp {{[0-9]+}}, [[REGLD4]]
}
; Function Attrs: nounwind

View File

@ -57,6 +57,9 @@
# CHECK: xscvdpsp 7, 27
0xf0 0xe0 0xdc 0x24
# CHECK: xscvdpspn 7, 27
0xf0 0xe0 0xdc 0x2c
# CHECK: xscvdpsxds 7, 27
0xf0 0xe0 0xdd 0x60
@ -72,9 +75,18 @@
# CHECK: xscvspdp 7, 27
0xf0 0xe0 0xdd 0x24
# CHECK: xscvspdpn 7, 27
0xf0 0xe0 0xdd 0x2c
# CHECK: xscvsxdsp 7, 27
0xf0 0xe0 0xdc 0xe0
# CHECK: xscvsxddp 7, 27
0xf0 0xe0 0xdd 0xe0
# CHECK: xscvuxdsp 7, 27
0xf0 0xe0 0xdc 0xa0
# CHECK: xscvuxddp 7, 27
0xf0 0xe0 0xdd 0xa0

View File

@ -62,6 +62,9 @@
# CHECK-BE: xscvdpsp 7, 27 # encoding: [0xf0,0xe0,0xdc,0x24]
# CHECK-LE: xscvdpsp 7, 27 # encoding: [0x24,0xdc,0xe0,0xf0]
xscvdpsp 7, 27
# CHECK-BE: xscvdpspn 7, 27 # encoding: [0xf0,0xe0,0xdc,0x2c]
# CHECK-LE: xscvdpspn 7, 27 # encoding: [0x2c,0xdc,0xe0,0xf0]
xscvdpspn 7, 27
# CHECK-BE: xscvdpsxds 7, 27 # encoding: [0xf0,0xe0,0xdd,0x60]
# CHECK-LE: xscvdpsxds 7, 27 # encoding: [0x60,0xdd,0xe0,0xf0]
xscvdpsxds 7, 27
@ -77,9 +80,18 @@
# CHECK-BE: xscvspdp 7, 27 # encoding: [0xf0,0xe0,0xdd,0x24]
# CHECK-LE: xscvspdp 7, 27 # encoding: [0x24,0xdd,0xe0,0xf0]
xscvspdp 7, 27
# CHECK-BE: xscvspdpn 7, 27 # encoding: [0xf0,0xe0,0xdd,0x2c]
# CHECK-LE: xscvspdpn 7, 27 # encoding: [0x2c,0xdd,0xe0,0xf0]
xscvspdpn 7, 27
# CHECK-BE: xscvsxdsp 7, 27 # encoding: [0xf0,0xe0,0xdc,0xe0]
# CHECK-LE: xscvsxdsp 7, 27 # encoding: [0xe0,0xdc,0xe0,0xf0]
xscvsxdsp 7, 27
# CHECK-BE: xscvsxddp 7, 27 # encoding: [0xf0,0xe0,0xdd,0xe0]
# CHECK-LE: xscvsxddp 7, 27 # encoding: [0xe0,0xdd,0xe0,0xf0]
xscvsxddp 7, 27
# CHECK-BE: xscvuxdsp 7, 27 # encoding: [0xf0,0xe0,0xdc,0xa0]
# CHECK-LE: xscvuxdsp 7, 27 # encoding: [0xa0,0xdc,0xe0,0xf0]
xscvuxdsp 7, 27
# CHECK-BE: xscvuxddp 7, 27 # encoding: [0xf0,0xe0,0xdd,0xa0]
# CHECK-LE: xscvuxddp 7, 27 # encoding: [0xa0,0xdd,0xe0,0xf0]
xscvuxddp 7, 27