mirror of
https://github.com/RPCSX/llvm.git
synced 2024-12-14 07:31:47 +00:00
Implement XMM subregs.
Extracting the low element of a vector is now done with EXTRACT_SUBREG, and the zero-extension performed by load movss is now modeled with SUBREG_TO_REG, and so on. Register-to-register movss and movsd are no longer considered copies; they are two-address instructions which insert a scalar into a vector. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@97354 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
a363a9b71a
commit
874cadaf21
@ -276,11 +276,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
|
||||
{ X86::MOVDQArr, X86::MOVDQAmr, 0, 16 },
|
||||
{ X86::MOVPDI2DIrr, X86::MOVPDI2DImr, 0, 0 },
|
||||
{ X86::MOVPQIto64rr,X86::MOVPQI2QImr, 0, 0 },
|
||||
{ X86::MOVPS2SSrr, X86::MOVPS2SSmr, 0, 0 },
|
||||
{ X86::MOVSDrr, X86::MOVSDmr, 0, 0 },
|
||||
{ X86::MOVSDto64rr, X86::MOVSDto64mr, 0, 0 },
|
||||
{ X86::MOVSS2DIrr, X86::MOVSS2DImr, 0, 0 },
|
||||
{ X86::MOVSSrr, X86::MOVSSmr, 0, 0 },
|
||||
{ X86::MOVUPDrr, X86::MOVUPDmr, 0, 0 },
|
||||
{ X86::MOVUPSrr, X86::MOVUPSmr, 0, 0 },
|
||||
{ X86::MUL16r, X86::MUL16m, 1, 0 },
|
||||
@ -389,12 +386,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
|
||||
{ X86::MOVDI2PDIrr, X86::MOVDI2PDIrm, 0 },
|
||||
{ X86::MOVDI2SSrr, X86::MOVDI2SSrm, 0 },
|
||||
{ X86::MOVDQArr, X86::MOVDQArm, 16 },
|
||||
{ X86::MOVSD2PDrr, X86::MOVSD2PDrm, 0 },
|
||||
{ X86::MOVSDrr, X86::MOVSDrm, 0 },
|
||||
{ X86::MOVSHDUPrr, X86::MOVSHDUPrm, 16 },
|
||||
{ X86::MOVSLDUPrr, X86::MOVSLDUPrm, 16 },
|
||||
{ X86::MOVSS2PSrr, X86::MOVSS2PSrm, 0 },
|
||||
{ X86::MOVSSrr, X86::MOVSSrm, 0 },
|
||||
{ X86::MOVSX16rr8, X86::MOVSX16rm8, 0 },
|
||||
{ X86::MOVSX32rr16, X86::MOVSX32rm16, 0 },
|
||||
{ X86::MOVSX32rr8, X86::MOVSX32rm8, 0 },
|
||||
@ -682,23 +675,20 @@ bool X86InstrInfo::isMoveInstr(const MachineInstr& MI,
|
||||
case X86::MOV16rr:
|
||||
case X86::MOV32rr:
|
||||
case X86::MOV64rr:
|
||||
case X86::MOVSSrr:
|
||||
case X86::MOVSDrr:
|
||||
|
||||
// FP Stack register class copies
|
||||
case X86::MOV_Fp3232: case X86::MOV_Fp6464: case X86::MOV_Fp8080:
|
||||
case X86::MOV_Fp3264: case X86::MOV_Fp3280:
|
||||
case X86::MOV_Fp6432: case X86::MOV_Fp8032:
|
||||
|
||||
// Note that MOVSSrr and MOVSDrr are not considered copies. FR32 and FR64
|
||||
// copies are done with FsMOVAPSrr and FsMOVAPDrr.
|
||||
|
||||
case X86::FsMOVAPSrr:
|
||||
case X86::FsMOVAPDrr:
|
||||
case X86::MOVAPSrr:
|
||||
case X86::MOVAPDrr:
|
||||
case X86::MOVDQArr:
|
||||
case X86::MOVSS2PSrr:
|
||||
case X86::MOVSD2PDrr:
|
||||
case X86::MOVPS2SSrr:
|
||||
case X86::MOVPD2SDrr:
|
||||
case X86::MMX_MOVQ64rr:
|
||||
assert(MI.getNumOperands() >= 2 &&
|
||||
MI.getOperand(0).isReg() &&
|
||||
|
@ -370,18 +370,56 @@ let Uses = [EFLAGS], usesCustomInserter = 1 in {
|
||||
// SSE1 Instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Move Instructions
|
||||
let neverHasSideEffects = 1 in
|
||||
def MOVSSrr : SSI<0x10, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
|
||||
"movss\t{$src, $dst|$dst, $src}", []>;
|
||||
// Move Instructions. Register-to-register movss is not used for FR32
|
||||
// register copies because it's a partial register update; FsMOVAPSrr is
|
||||
// used instead. Register-to-register movss is not modeled as an INSERT_SUBREG
|
||||
// because INSERT_SUBREG requires that the insert be implementable in terms of
|
||||
// a copy, and just mentioned, we don't use movss for copies.
|
||||
let Constraints = "$src1 = $dst" in
|
||||
def MOVSSrr : SSI<0x10, MRMSrcReg,
|
||||
(outs VR128:$dst), (ins VR128:$src1, FR32:$src2),
|
||||
"movss\t{$src2, $dst|$dst, $src2}",
|
||||
[(set VR128:$dst,
|
||||
(movl VR128:$src1, (scalar_to_vector FR32:$src2)))]>;
|
||||
|
||||
// Extract the low 32-bit value from one vector and insert it into another.
|
||||
let AddedComplexity = 15 in
|
||||
def : Pat<(v4f32 (movl VR128:$src1, VR128:$src2)),
|
||||
(MOVSSrr VR128:$src1,
|
||||
(EXTRACT_SUBREG (v4f32 VR128:$src2), x86_subreg_ss))>;
|
||||
|
||||
// Implicitly promote a 32-bit scalar to a vector.
|
||||
def : Pat<(v4f32 (scalar_to_vector FR32:$src)),
|
||||
(INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, x86_subreg_ss)>;
|
||||
|
||||
// Loading from memory automatically zeroing upper bits.
|
||||
let canFoldAsLoad = 1, isReMaterializable = 1 in
|
||||
def MOVSSrm : SSI<0x10, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
|
||||
"movss\t{$src, $dst|$dst, $src}",
|
||||
[(set FR32:$dst, (loadf32 addr:$src))]>;
|
||||
|
||||
// MOVSSrm zeros the high parts of the register; represent this
|
||||
// with SUBREG_TO_REG.
|
||||
let AddedComplexity = 20 in {
|
||||
def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
|
||||
(SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), x86_subreg_ss)>;
|
||||
def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
|
||||
(SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), x86_subreg_ss)>;
|
||||
def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
|
||||
(SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), x86_subreg_ss)>;
|
||||
}
|
||||
|
||||
// Store scalar value to memory.
|
||||
def MOVSSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src),
|
||||
"movss\t{$src, $dst|$dst, $src}",
|
||||
[(store FR32:$src, addr:$dst)]>;
|
||||
|
||||
// Extract and store.
|
||||
def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
|
||||
addr:$dst),
|
||||
(MOVSSmr addr:$dst,
|
||||
(EXTRACT_SUBREG (v4f32 VR128:$src), x86_subreg_ss))>;
|
||||
|
||||
// Conversion instructions
|
||||
def CVTTSS2SIrr : SSI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins FR32:$src),
|
||||
"cvttss2si\t{$src, $dst|$dst, $src}",
|
||||
@ -1090,76 +1128,67 @@ def : Pat<(v16i8 immAllZerosV), (V_SET0)>;
|
||||
def : Pat<(v2f64 immAllZerosV), (V_SET0)>;
|
||||
def : Pat<(v4f32 immAllZerosV), (V_SET0)>;
|
||||
|
||||
// FR32 to 128-bit vector conversion.
|
||||
let isAsCheapAsAMove = 1 in
|
||||
def MOVSS2PSrr : SSI<0x10, MRMSrcReg, (outs VR128:$dst), (ins FR32:$src),
|
||||
"movss\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst,
|
||||
(v4f32 (scalar_to_vector FR32:$src)))]>;
|
||||
def MOVSS2PSrm : SSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f32mem:$src),
|
||||
"movss\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst,
|
||||
(v4f32 (scalar_to_vector (loadf32 addr:$src))))]>;
|
||||
|
||||
// FIXME: may not be able to eliminate this movss with coalescing the src and
|
||||
// dest register classes are different. We really want to write this pattern
|
||||
// like this:
|
||||
// def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
|
||||
// (f32 FR32:$src)>;
|
||||
let isAsCheapAsAMove = 1 in
|
||||
def MOVPS2SSrr : SSI<0x10, MRMSrcReg, (outs FR32:$dst), (ins VR128:$src),
|
||||
"movss\t{$src, $dst|$dst, $src}",
|
||||
[(set FR32:$dst, (vector_extract (v4f32 VR128:$src),
|
||||
(iPTR 0)))]>;
|
||||
def MOVPS2SSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, VR128:$src),
|
||||
"movss\t{$src, $dst|$dst, $src}",
|
||||
[(store (f32 (vector_extract (v4f32 VR128:$src),
|
||||
(iPTR 0))), addr:$dst)]>;
|
||||
|
||||
|
||||
// Move to lower bits of a VR128, leaving upper bits alone.
|
||||
// Three operand (but two address) aliases.
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
let neverHasSideEffects = 1 in
|
||||
def MOVLSS2PSrr : SSI<0x10, MRMSrcReg,
|
||||
(outs VR128:$dst), (ins VR128:$src1, FR32:$src2),
|
||||
"movss\t{$src2, $dst|$dst, $src2}", []>;
|
||||
|
||||
let AddedComplexity = 15 in
|
||||
def MOVLPSrr : SSI<0x10, MRMSrcReg,
|
||||
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
|
||||
"movss\t{$src2, $dst|$dst, $src2}",
|
||||
[(set VR128:$dst,
|
||||
(v4f32 (movl VR128:$src1, VR128:$src2)))]>;
|
||||
}
|
||||
|
||||
// Move to lower bits of a VR128 and zeroing upper bits.
|
||||
// Loading from memory automatically zeroing upper bits.
|
||||
let AddedComplexity = 20 in
|
||||
def MOVZSS2PSrm : SSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f32mem:$src),
|
||||
"movss\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst, (v4f32 (X86vzmovl (v4f32 (scalar_to_vector
|
||||
(loadf32 addr:$src))))))]>;
|
||||
|
||||
def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
|
||||
(MOVZSS2PSrm addr:$src)>;
|
||||
def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
|
||||
(f32 (EXTRACT_SUBREG (v4f32 VR128:$src), x86_subreg_ss))>;
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
// SSE2 Instructions
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
// Move Instructions
|
||||
let neverHasSideEffects = 1 in
|
||||
def MOVSDrr : SDI<0x10, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
|
||||
"movsd\t{$src, $dst|$dst, $src}", []>;
|
||||
let canFoldAsLoad = 1, isReMaterializable = 1 in
|
||||
// Move Instructions. Register-to-register movsd is not used for FR64
|
||||
// register copies because it's a partial register update; FsMOVAPDrr is
|
||||
// used instead. Register-to-register movsd is not modeled as an INSERT_SUBREG
|
||||
// because INSERT_SUBREG requires that the insert be implementable in terms of
|
||||
// a copy, and just mentioned, we don't use movsd for copies.
|
||||
let Constraints = "$src1 = $dst" in
|
||||
def MOVSDrr : SDI<0x10, MRMSrcReg,
|
||||
(outs VR128:$dst), (ins VR128:$src1, FR64:$src2),
|
||||
"movsd\t{$src2, $dst|$dst, $src2}",
|
||||
[(set VR128:$dst,
|
||||
(movl VR128:$src1, (scalar_to_vector FR64:$src2)))]>;
|
||||
|
||||
// Extract the low 64-bit value from one vector and insert it into another.
|
||||
let AddedComplexity = 15 in
|
||||
def : Pat<(v2f64 (movl VR128:$src1, VR128:$src2)),
|
||||
(MOVSDrr VR128:$src1,
|
||||
(EXTRACT_SUBREG (v2f64 VR128:$src2), x86_subreg_sd))>;
|
||||
|
||||
// Implicitly promote a 64-bit scalar to a vector.
|
||||
def : Pat<(v2f64 (scalar_to_vector FR64:$src)),
|
||||
(INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, x86_subreg_sd)>;
|
||||
|
||||
// Loading from memory automatically zeroing upper bits.
|
||||
let canFoldAsLoad = 1, isReMaterializable = 1, AddedComplexity = 20 in
|
||||
def MOVSDrm : SDI<0x10, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
|
||||
"movsd\t{$src, $dst|$dst, $src}",
|
||||
[(set FR64:$dst, (loadf64 addr:$src))]>;
|
||||
|
||||
// MOVSDrm zeros the high parts of the register; represent this
|
||||
// with SUBREG_TO_REG.
|
||||
let AddedComplexity = 20 in {
|
||||
def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
|
||||
(SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>;
|
||||
def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
|
||||
(SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>;
|
||||
def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
|
||||
(SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>;
|
||||
def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
|
||||
(SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>;
|
||||
def : Pat<(v2f64 (X86vzload addr:$src)),
|
||||
(SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>;
|
||||
}
|
||||
|
||||
// Store scalar value to memory.
|
||||
def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
|
||||
"movsd\t{$src, $dst|$dst, $src}",
|
||||
[(store FR64:$src, addr:$dst)]>;
|
||||
|
||||
// Extract and store.
|
||||
def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
|
||||
addr:$dst),
|
||||
(MOVSDmr addr:$dst,
|
||||
(EXTRACT_SUBREG (v2f64 VR128:$src), x86_subreg_sd))>;
|
||||
|
||||
// Conversion instructions
|
||||
def CVTTSD2SIrr : SDI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins FR64:$src),
|
||||
"cvttsd2si\t{$src, $dst|$dst, $src}",
|
||||
@ -1213,7 +1242,8 @@ def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src),
|
||||
Requires<[HasSSE2, OptForSize]>;
|
||||
|
||||
def : Pat<(extloadf32 addr:$src),
|
||||
(CVTSS2SDrr (MOVSSrm addr:$src))>, Requires<[HasSSE2, OptForSpeed]>;
|
||||
(CVTSS2SDrr (MOVSSrm addr:$src))>,
|
||||
Requires<[HasSSE2, OptForSpeed]>;
|
||||
|
||||
// Match intrinsics which expect XMM operand(s).
|
||||
def Int_CVTSD2SIrr : SDI<0x2D, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
|
||||
@ -2397,17 +2427,6 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
|
||||
def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "",
|
||||
[(set VR128:$dst, (v4i32 immAllOnesV))]>;
|
||||
|
||||
// FR64 to 128-bit vector conversion.
|
||||
let isAsCheapAsAMove = 1 in
|
||||
def MOVSD2PDrr : SDI<0x10, MRMSrcReg, (outs VR128:$dst), (ins FR64:$src),
|
||||
"movsd\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst,
|
||||
(v2f64 (scalar_to_vector FR64:$src)))]>;
|
||||
def MOVSD2PDrm : SDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
|
||||
"movsd\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst,
|
||||
(v2f64 (scalar_to_vector (loadf64 addr:$src))))]>;
|
||||
|
||||
def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
|
||||
"movd\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst,
|
||||
@ -2436,20 +2455,9 @@ def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
|
||||
[(store (i64 (vector_extract (v2i64 VR128:$src),
|
||||
(iPTR 0))), addr:$dst)]>;
|
||||
|
||||
// FIXME: may not be able to eliminate this movss with coalescing the src and
|
||||
// dest register classes are different. We really want to write this pattern
|
||||
// like this:
|
||||
// def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
|
||||
// (f32 FR32:$src)>;
|
||||
let isAsCheapAsAMove = 1 in
|
||||
def MOVPD2SDrr : SDI<0x10, MRMSrcReg, (outs FR64:$dst), (ins VR128:$src),
|
||||
"movsd\t{$src, $dst|$dst, $src}",
|
||||
[(set FR64:$dst, (vector_extract (v2f64 VR128:$src),
|
||||
(iPTR 0)))]>;
|
||||
def MOVPD2SDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
|
||||
"movsd\t{$src, $dst|$dst, $src}",
|
||||
[(store (f64 (vector_extract (v2f64 VR128:$src),
|
||||
(iPTR 0))), addr:$dst)]>;
|
||||
def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
|
||||
(f64 (EXTRACT_SUBREG (v2f64 VR128:$src), x86_subreg_sd))>;
|
||||
|
||||
def MOVPDI2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
|
||||
"movd\t{$src, $dst|$dst, $src}",
|
||||
[(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
|
||||
@ -2466,44 +2474,11 @@ def MOVSS2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
|
||||
"movd\t{$src, $dst|$dst, $src}",
|
||||
[(store (i32 (bitconvert FR32:$src)), addr:$dst)]>;
|
||||
|
||||
|
||||
// Move to lower bits of a VR128, leaving upper bits alone.
|
||||
// Three operand (but two address) aliases.
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
let neverHasSideEffects = 1 in
|
||||
def MOVLSD2PDrr : SDI<0x10, MRMSrcReg,
|
||||
(outs VR128:$dst), (ins VR128:$src1, FR64:$src2),
|
||||
"movsd\t{$src2, $dst|$dst, $src2}", []>;
|
||||
|
||||
let AddedComplexity = 15 in
|
||||
def MOVLPDrr : SDI<0x10, MRMSrcReg,
|
||||
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
|
||||
"movsd\t{$src2, $dst|$dst, $src2}",
|
||||
[(set VR128:$dst,
|
||||
(v2f64 (movl VR128:$src1, VR128:$src2)))]>;
|
||||
}
|
||||
|
||||
// Store / copy lower 64-bits of a XMM register.
|
||||
def MOVLQ128mr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
|
||||
"movq\t{$src, $dst|$dst, $src}",
|
||||
[(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>;
|
||||
|
||||
// Move to lower bits of a VR128 and zeroing upper bits.
|
||||
// Loading from memory automatically zeroing upper bits.
|
||||
let AddedComplexity = 20 in {
|
||||
def MOVZSD2PDrm : SDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
|
||||
"movsd\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst,
|
||||
(v2f64 (X86vzmovl (v2f64 (scalar_to_vector
|
||||
(loadf64 addr:$src))))))]>;
|
||||
|
||||
def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
|
||||
(MOVZSD2PDrm addr:$src)>;
|
||||
def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
|
||||
(MOVZSD2PDrm addr:$src)>;
|
||||
def : Pat<(v2f64 (X86vzload addr:$src)), (MOVZSD2PDrm addr:$src)>;
|
||||
}
|
||||
|
||||
// movd / movq to XMM register zero-extends
|
||||
let AddedComplexity = 15 in {
|
||||
def MOVZDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
|
||||
@ -3049,13 +3024,15 @@ let Predicates = [HasSSE2] in {
|
||||
let AddedComplexity = 15 in {
|
||||
// Zeroing a VR128 then do a MOVS{S|D} to the lower bits.
|
||||
def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))),
|
||||
(MOVLSD2PDrr (V_SET0), FR64:$src)>;
|
||||
(MOVSDrr (v2f64 (V_SET0)), FR64:$src)>;
|
||||
def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
|
||||
(MOVLSS2PSrr (V_SET0), FR32:$src)>;
|
||||
(MOVSSrr (v4f32 (V_SET0)), FR32:$src)>;
|
||||
def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
|
||||
(MOVLPSrr (V_SET0), VR128:$src)>;
|
||||
(MOVSSrr (v4f32 (V_SET0)),
|
||||
(f32 (EXTRACT_SUBREG (v4f32 VR128:$src), x86_subreg_ss)))>;
|
||||
def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
|
||||
(MOVLPSrr (V_SET0), VR128:$src)>;
|
||||
(MOVSSrr (v4i32 (V_SET0)),
|
||||
(EXTRACT_SUBREG (v4i32 VR128:$src), x86_subreg_ss))>;
|
||||
}
|
||||
|
||||
// Splat v2f64 / v2i64
|
||||
@ -3190,15 +3167,19 @@ def : Pat<(store (v2i64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
|
||||
let AddedComplexity = 15 in {
|
||||
// Setting the lowest element in the vector.
|
||||
def : Pat<(v4i32 (movl VR128:$src1, VR128:$src2)),
|
||||
(MOVLPSrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
|
||||
(MOVSSrr (v4i32 VR128:$src1),
|
||||
(EXTRACT_SUBREG (v4i32 VR128:$src2), x86_subreg_ss))>;
|
||||
def : Pat<(v2i64 (movl VR128:$src1, VR128:$src2)),
|
||||
(MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
|
||||
(MOVSDrr (v2i64 VR128:$src1),
|
||||
(EXTRACT_SUBREG (v2i64 VR128:$src2), x86_subreg_sd))>;
|
||||
|
||||
// vector_shuffle v1, v2 <4, 5, 2, 3> using MOVLPDrr (movsd)
|
||||
// vector_shuffle v1, v2 <4, 5, 2, 3> using movsd
|
||||
def : Pat<(v4f32 (movlp VR128:$src1, VR128:$src2)),
|
||||
(MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
|
||||
(MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, x86_subreg_sd))>,
|
||||
Requires<[HasSSE2]>;
|
||||
def : Pat<(v4i32 (movlp VR128:$src1, VR128:$src2)),
|
||||
(MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
|
||||
(MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, x86_subreg_sd))>,
|
||||
Requires<[HasSSE2]>;
|
||||
}
|
||||
|
||||
// vector_shuffle v1, v2 <4, 5, 2, 3> using SHUFPSrri (we prefer movsd, but
|
||||
|
@ -191,6 +191,8 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
|
||||
return &X86::GR16_NOREXRegClass;
|
||||
else if (A == &X86::GR16_ABCDRegClass)
|
||||
return &X86::GR16_ABCDRegClass;
|
||||
} else if (B == &X86::FR32RegClass) {
|
||||
return A;
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
@ -207,6 +209,8 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
|
||||
else if (A == &X86::GR16RegClass || A == &X86::GR16_ABCDRegClass ||
|
||||
A == &X86::GR16_NOREXRegClass)
|
||||
return &X86::GR16_ABCDRegClass;
|
||||
} else if (B == &X86::FR64RegClass) {
|
||||
return A;
|
||||
}
|
||||
break;
|
||||
case 3:
|
||||
@ -234,6 +238,8 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
|
||||
return &X86::GR32_NOREXRegClass;
|
||||
else if (A == &X86::GR32_ABCDRegClass)
|
||||
return &X86::GR64_ABCDRegClass;
|
||||
} else if (B == &X86::VR128RegClass) {
|
||||
return A;
|
||||
}
|
||||
break;
|
||||
case 4:
|
||||
|
@ -35,7 +35,8 @@ namespace X86 {
|
||||
/// these indices must be kept in sync with the class indices in the
|
||||
/// X86RegisterInfo.td file.
|
||||
enum SubregIndex {
|
||||
SUBREG_8BIT = 1, SUBREG_8BIT_HI = 2, SUBREG_16BIT = 3, SUBREG_32BIT = 4
|
||||
SUBREG_8BIT = 1, SUBREG_8BIT_HI = 2, SUBREG_16BIT = 3, SUBREG_32BIT = 4,
|
||||
SUBREG_SS = 1, SUBREG_SD = 2, SUBREG_XMM = 3
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -158,22 +158,22 @@ let Namespace = "X86" in {
|
||||
def XMM15: Register<"xmm15">, DwarfRegNum<[32, -2, -2]>;
|
||||
|
||||
// YMM Registers, used by AVX instructions
|
||||
def YMM0: Register<"ymm0">, DwarfRegNum<[17, 21, 21]>;
|
||||
def YMM1: Register<"ymm1">, DwarfRegNum<[18, 22, 22]>;
|
||||
def YMM2: Register<"ymm2">, DwarfRegNum<[19, 23, 23]>;
|
||||
def YMM3: Register<"ymm3">, DwarfRegNum<[20, 24, 24]>;
|
||||
def YMM4: Register<"ymm4">, DwarfRegNum<[21, 25, 25]>;
|
||||
def YMM5: Register<"ymm5">, DwarfRegNum<[22, 26, 26]>;
|
||||
def YMM6: Register<"ymm6">, DwarfRegNum<[23, 27, 27]>;
|
||||
def YMM7: Register<"ymm7">, DwarfRegNum<[24, 28, 28]>;
|
||||
def YMM8: Register<"ymm8">, DwarfRegNum<[25, -2, -2]>;
|
||||
def YMM9: Register<"ymm9">, DwarfRegNum<[26, -2, -2]>;
|
||||
def YMM10: Register<"ymm10">, DwarfRegNum<[27, -2, -2]>;
|
||||
def YMM11: Register<"ymm11">, DwarfRegNum<[28, -2, -2]>;
|
||||
def YMM12: Register<"ymm12">, DwarfRegNum<[29, -2, -2]>;
|
||||
def YMM13: Register<"ymm13">, DwarfRegNum<[30, -2, -2]>;
|
||||
def YMM14: Register<"ymm14">, DwarfRegNum<[31, -2, -2]>;
|
||||
def YMM15: Register<"ymm15">, DwarfRegNum<[32, -2, -2]>;
|
||||
def YMM0: RegisterWithSubRegs<"ymm0", [XMM0]>, DwarfRegNum<[17, 21, 21]>;
|
||||
def YMM1: RegisterWithSubRegs<"ymm1", [XMM1]>, DwarfRegNum<[18, 22, 22]>;
|
||||
def YMM2: RegisterWithSubRegs<"ymm2", [XMM2]>, DwarfRegNum<[19, 23, 23]>;
|
||||
def YMM3: RegisterWithSubRegs<"ymm3", [XMM3]>, DwarfRegNum<[20, 24, 24]>;
|
||||
def YMM4: RegisterWithSubRegs<"ymm4", [XMM4]>, DwarfRegNum<[21, 25, 25]>;
|
||||
def YMM5: RegisterWithSubRegs<"ymm5", [XMM5]>, DwarfRegNum<[22, 26, 26]>;
|
||||
def YMM6: RegisterWithSubRegs<"ymm6", [XMM6]>, DwarfRegNum<[23, 27, 27]>;
|
||||
def YMM7: RegisterWithSubRegs<"ymm7", [XMM7]>, DwarfRegNum<[24, 28, 28]>;
|
||||
def YMM8: RegisterWithSubRegs<"ymm8", [XMM8]>, DwarfRegNum<[25, -2, -2]>;
|
||||
def YMM9: RegisterWithSubRegs<"ymm9", [XMM9]>, DwarfRegNum<[26, -2, -2]>;
|
||||
def YMM10: RegisterWithSubRegs<"ymm10", [XMM10]>, DwarfRegNum<[27, -2, -2]>;
|
||||
def YMM11: RegisterWithSubRegs<"ymm11", [XMM11]>, DwarfRegNum<[28, -2, -2]>;
|
||||
def YMM12: RegisterWithSubRegs<"ymm12", [XMM12]>, DwarfRegNum<[29, -2, -2]>;
|
||||
def YMM13: RegisterWithSubRegs<"ymm13", [XMM13]>, DwarfRegNum<[30, -2, -2]>;
|
||||
def YMM14: RegisterWithSubRegs<"ymm14", [XMM14]>, DwarfRegNum<[31, -2, -2]>;
|
||||
def YMM15: RegisterWithSubRegs<"ymm15", [XMM15]>, DwarfRegNum<[32, -2, -2]>;
|
||||
|
||||
// Floating point stack registers
|
||||
def ST0 : Register<"st(0)">, DwarfRegNum<[33, 12, 11]>;
|
||||
@ -238,6 +238,10 @@ def x86_subreg_8bit_hi : PatLeaf<(i32 2)>;
|
||||
def x86_subreg_16bit : PatLeaf<(i32 3)>;
|
||||
def x86_subreg_32bit : PatLeaf<(i32 4)>;
|
||||
|
||||
def x86_subreg_ss : PatLeaf<(i32 1)>;
|
||||
def x86_subreg_sd : PatLeaf<(i32 2)>;
|
||||
def x86_subreg_xmm : PatLeaf<(i32 3)>;
|
||||
|
||||
def : SubRegSet<1, [AX, CX, DX, BX, SP, BP, SI, DI,
|
||||
R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W],
|
||||
[AL, CL, DL, BL, SPL, BPL, SIL, DIL,
|
||||
@ -282,6 +286,26 @@ def : SubRegSet<1, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
|
||||
[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
|
||||
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>;
|
||||
|
||||
def : SubRegSet<2, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
|
||||
YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15],
|
||||
[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
|
||||
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>;
|
||||
|
||||
def : SubRegSet<3, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
|
||||
YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15],
|
||||
[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
|
||||
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>;
|
||||
|
||||
def : SubRegSet<1, [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
|
||||
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15],
|
||||
[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
|
||||
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>;
|
||||
|
||||
def : SubRegSet<2, [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
|
||||
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15],
|
||||
[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
|
||||
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Register Class Definitions... now that we have all of the pieces, define the
|
||||
// top-level register classes. The order specified in the register list is
|
||||
@ -793,6 +817,7 @@ def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128,
|
||||
[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
|
||||
XMM8, XMM9, XMM10, XMM11,
|
||||
XMM12, XMM13, XMM14, XMM15]> {
|
||||
let SubRegClassList = [FR32, FR64];
|
||||
let MethodProtos = [{
|
||||
iterator allocation_order_end(const MachineFunction &MF) const;
|
||||
}];
|
||||
@ -811,7 +836,9 @@ def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128,
|
||||
def VR256 : RegisterClass<"X86", [ v8i32, v4i64, v8f32, v4f64],256,
|
||||
[YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
|
||||
YMM8, YMM9, YMM10, YMM11,
|
||||
YMM12, YMM13, YMM14, YMM15]>;
|
||||
YMM12, YMM13, YMM14, YMM15]> {
|
||||
let SubRegClassList = [FR32, FR64, VR128];
|
||||
}
|
||||
|
||||
// Status flags registers.
|
||||
def CCR : RegisterClass<"X86", [i32], 32, [EFLAGS]> {
|
||||
|
Loading…
Reference in New Issue
Block a user