mirror of
https://github.com/RPCSX/llvm.git
synced 2024-12-13 23:18:51 +00:00
- Added some SSE2 128-bit packed integer ops.
- Added SSE2 128-bit integer pack with signed saturation ops. - Added pshufhw and pshuflw ops. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@27252 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
75658b96bf
commit
506d3dfa90
@ -1399,10 +1399,67 @@ bool X86::isPSHUFDMask(SDNode *N) {
|
|||||||
return false;
|
return false;
|
||||||
|
|
||||||
// Check if the value doesn't reference the second vector.
|
// Check if the value doesn't reference the second vector.
|
||||||
for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i) {
|
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
|
||||||
assert(isa<ConstantSDNode>(N->getOperand(i)) &&
|
assert(isa<ConstantSDNode>(N->getOperand(i)) &&
|
||||||
"Invalid VECTOR_SHUFFLE mask!");
|
"Invalid VECTOR_SHUFFLE mask!");
|
||||||
if (cast<ConstantSDNode>(N->getOperand(i))->getValue() >= 4) return false;
|
if (cast<ConstantSDNode>(N->getOperand(i))->getValue() >= 4)
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand
|
||||||
|
/// specifies a shuffle of elements that is suitable for input to PSHUFD.
|
||||||
|
bool X86::isPSHUFHWMask(SDNode *N) {
|
||||||
|
assert(N->getOpcode() == ISD::BUILD_VECTOR);
|
||||||
|
|
||||||
|
if (N->getNumOperands() != 8)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// Lower quadword copied in order.
|
||||||
|
for (unsigned i = 0; i != 4; ++i) {
|
||||||
|
assert(isa<ConstantSDNode>(N->getOperand(i)) &&
|
||||||
|
"Invalid VECTOR_SHUFFLE mask!");
|
||||||
|
if (cast<ConstantSDNode>(N->getOperand(i))->getValue() != i)
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Upper quadword shuffled.
|
||||||
|
for (unsigned i = 4; i != 8; ++i) {
|
||||||
|
assert(isa<ConstantSDNode>(N->getOperand(i)) &&
|
||||||
|
"Invalid VECTOR_SHUFFLE mask!");
|
||||||
|
unsigned Val = cast<ConstantSDNode>(N->getOperand(i))->getValue();
|
||||||
|
if (Val < 4 || Val > 7)
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand
|
||||||
|
/// specifies a shuffle of elements that is suitable for input to PSHUFD.
|
||||||
|
bool X86::isPSHUFLWMask(SDNode *N) {
|
||||||
|
assert(N->getOpcode() == ISD::BUILD_VECTOR);
|
||||||
|
|
||||||
|
if (N->getNumOperands() != 8)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// Upper quadword copied in order.
|
||||||
|
for (unsigned i = 4; i != 8; ++i) {
|
||||||
|
assert(isa<ConstantSDNode>(N->getOperand(i)) &&
|
||||||
|
"Invalid VECTOR_SHUFFLE mask!");
|
||||||
|
if (cast<ConstantSDNode>(N->getOperand(i))->getValue() != i)
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Lower quadword shuffled.
|
||||||
|
for (unsigned i = 0; i != 4; ++i) {
|
||||||
|
assert(isa<ConstantSDNode>(N->getOperand(i)) &&
|
||||||
|
"Invalid VECTOR_SHUFFLE mask!");
|
||||||
|
unsigned Val = cast<ConstantSDNode>(N->getOperand(i))->getValue();
|
||||||
|
if (Val > 4)
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
@ -1431,7 +1488,7 @@ bool X86::isSHUFPMask(SDNode *N) {
|
|||||||
// Each half must refer to only one of the vector.
|
// Each half must refer to only one of the vector.
|
||||||
SDOperand Elt = N->getOperand(0);
|
SDOperand Elt = N->getOperand(0);
|
||||||
assert(isa<ConstantSDNode>(Elt) && "Invalid VECTOR_SHUFFLE mask!");
|
assert(isa<ConstantSDNode>(Elt) && "Invalid VECTOR_SHUFFLE mask!");
|
||||||
for (unsigned i = 1; i != NumElems / 2; ++i) {
|
for (unsigned i = 1; i < NumElems / 2; ++i) {
|
||||||
assert(isa<ConstantSDNode>(N->getOperand(i)) &&
|
assert(isa<ConstantSDNode>(N->getOperand(i)) &&
|
||||||
"Invalid VECTOR_SHUFFLE mask!");
|
"Invalid VECTOR_SHUFFLE mask!");
|
||||||
if (cast<ConstantSDNode>(N->getOperand(i))->getValue() !=
|
if (cast<ConstantSDNode>(N->getOperand(i))->getValue() !=
|
||||||
@ -1440,7 +1497,7 @@ bool X86::isSHUFPMask(SDNode *N) {
|
|||||||
}
|
}
|
||||||
Elt = N->getOperand(NumElems / 2);
|
Elt = N->getOperand(NumElems / 2);
|
||||||
assert(isa<ConstantSDNode>(Elt) && "Invalid VECTOR_SHUFFLE mask!");
|
assert(isa<ConstantSDNode>(Elt) && "Invalid VECTOR_SHUFFLE mask!");
|
||||||
for (unsigned i = NumElems / 2; i != NumElems; ++i) {
|
for (unsigned i = NumElems / 2 + 1; i < NumElems; ++i) {
|
||||||
assert(isa<ConstantSDNode>(N->getOperand(i)) &&
|
assert(isa<ConstantSDNode>(N->getOperand(i)) &&
|
||||||
"Invalid VECTOR_SHUFFLE mask!");
|
"Invalid VECTOR_SHUFFLE mask!");
|
||||||
if (cast<ConstantSDNode>(N->getOperand(i))->getValue() !=
|
if (cast<ConstantSDNode>(N->getOperand(i))->getValue() !=
|
||||||
@ -1583,6 +1640,40 @@ unsigned X86::getShuffleSHUFImmediate(SDNode *N) {
|
|||||||
return Mask;
|
return Mask;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle
|
||||||
|
/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW
|
||||||
|
/// instructions.
|
||||||
|
unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) {
|
||||||
|
unsigned Mask = 0;
|
||||||
|
// 8 nodes, but we only care about the last 4.
|
||||||
|
for (unsigned i = 7; i >= 4; --i) {
|
||||||
|
unsigned Val
|
||||||
|
= cast<ConstantSDNode>(N->getOperand(i))->getValue();
|
||||||
|
Mask |= (Val - 4);
|
||||||
|
if (i != 4)
|
||||||
|
Mask <<= 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
return Mask;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle
|
||||||
|
/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW
|
||||||
|
/// instructions.
|
||||||
|
unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) {
|
||||||
|
unsigned Mask = 0;
|
||||||
|
// 8 nodes, but we only care about the first 4.
|
||||||
|
for (int i = 3; i >= 0; --i) {
|
||||||
|
unsigned Val
|
||||||
|
= cast<ConstantSDNode>(N->getOperand(i))->getValue();
|
||||||
|
Mask |= Val;
|
||||||
|
if (i != 0)
|
||||||
|
Mask <<= 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
return Mask;
|
||||||
|
}
|
||||||
|
|
||||||
/// NormalizeVectorShuffle - Swap vector_shuffle operands (as well as
|
/// NormalizeVectorShuffle - Swap vector_shuffle operands (as well as
|
||||||
/// values in ther permute mask if needed. Use V1 as second vector if it is
|
/// values in ther permute mask if needed. Use V1 as second vector if it is
|
||||||
/// undef. Return an empty SDOperand is it is already well formed.
|
/// undef. Return an empty SDOperand is it is already well formed.
|
||||||
@ -2399,7 +2490,10 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
|
|||||||
|
|
||||||
// Splat && PSHUFD's 2nd vector must be undef.
|
// Splat && PSHUFD's 2nd vector must be undef.
|
||||||
if (X86::isSplatMask(PermMask.Val) ||
|
if (X86::isSplatMask(PermMask.Val) ||
|
||||||
((MVT::isInteger(VT) && X86::isPSHUFDMask(PermMask.Val)))) {
|
((MVT::isInteger(VT) &&
|
||||||
|
(X86::isPSHUFDMask(PermMask.Val) ||
|
||||||
|
X86::isPSHUFHWMask(PermMask.Val) ||
|
||||||
|
X86::isPSHUFLWMask(PermMask.Val))))) {
|
||||||
if (V2.getOpcode() != ISD::UNDEF)
|
if (V2.getOpcode() != ISD::UNDEF)
|
||||||
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
|
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
|
||||||
DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask);
|
DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask);
|
||||||
@ -2607,6 +2701,8 @@ X86TargetLowering::isShuffleMaskLegal(SDOperand Mask, MVT::ValueType VT) const {
|
|||||||
return (Mask.Val->getNumOperands() == 2 ||
|
return (Mask.Val->getNumOperands() == 2 ||
|
||||||
X86::isSplatMask(Mask.Val) ||
|
X86::isSplatMask(Mask.Val) ||
|
||||||
X86::isPSHUFDMask(Mask.Val) ||
|
X86::isPSHUFDMask(Mask.Val) ||
|
||||||
|
X86::isPSHUFHWMask(Mask.Val) ||
|
||||||
|
X86::isPSHUFLWMask(Mask.Val) ||
|
||||||
X86::isSHUFPMask(Mask.Val) ||
|
X86::isSHUFPMask(Mask.Val) ||
|
||||||
X86::isUNPCKLMask(Mask.Val) ||
|
X86::isUNPCKLMask(Mask.Val) ||
|
||||||
X86::isUNPCKHMask(Mask.Val));
|
X86::isUNPCKHMask(Mask.Val));
|
||||||
|
@ -184,6 +184,14 @@ namespace llvm {
|
|||||||
/// specifies a shuffle of elements that is suitable for input to PSHUFD.
|
/// specifies a shuffle of elements that is suitable for input to PSHUFD.
|
||||||
bool isPSHUFDMask(SDNode *N);
|
bool isPSHUFDMask(SDNode *N);
|
||||||
|
|
||||||
|
/// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand
|
||||||
|
/// specifies a shuffle of elements that is suitable for input to PSHUFD.
|
||||||
|
bool isPSHUFHWMask(SDNode *N);
|
||||||
|
|
||||||
|
/// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand
|
||||||
|
/// specifies a shuffle of elements that is suitable for input to PSHUFD.
|
||||||
|
bool isPSHUFLWMask(SDNode *N);
|
||||||
|
|
||||||
/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand
|
/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand
|
||||||
/// specifies a shuffle of elements that is suitable for input to SHUFP*.
|
/// specifies a shuffle of elements that is suitable for input to SHUFP*.
|
||||||
bool isSHUFPMask(SDNode *N);
|
bool isSHUFPMask(SDNode *N);
|
||||||
@ -212,6 +220,16 @@ namespace llvm {
|
|||||||
/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP*
|
/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP*
|
||||||
/// instructions.
|
/// instructions.
|
||||||
unsigned getShuffleSHUFImmediate(SDNode *N);
|
unsigned getShuffleSHUFImmediate(SDNode *N);
|
||||||
|
|
||||||
|
/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle
|
||||||
|
/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW
|
||||||
|
/// instructions.
|
||||||
|
unsigned getShufflePSHUFHWImmediate(SDNode *N);
|
||||||
|
|
||||||
|
/// getShufflePSHUFKWImmediate - Return the appropriate immediate to shuffle
|
||||||
|
/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW
|
||||||
|
/// instructions.
|
||||||
|
unsigned getShufflePSHUFLWImmediate(SDNode *N);
|
||||||
}
|
}
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
@ -45,6 +45,8 @@ def loadv8i16 : PatFrag<(ops node:$ptr), (v8i16 (load node:$ptr))>;
|
|||||||
def loadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (load node:$ptr))>;
|
def loadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (load node:$ptr))>;
|
||||||
def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>;
|
def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>;
|
||||||
|
|
||||||
|
def bc_v16i8 : PatFrag<(ops node:$in), (v16i8 (bitconvert node:$in))>;
|
||||||
|
def bc_v8i16 : PatFrag<(ops node:$in), (v8i16 (bitconvert node:$in))>;
|
||||||
def bc_v4i32 : PatFrag<(ops node:$in), (v4i32 (bitconvert node:$in))>;
|
def bc_v4i32 : PatFrag<(ops node:$in), (v4i32 (bitconvert node:$in))>;
|
||||||
def bc_v2i64 : PatFrag<(ops node:$in), (v2i64 (bitconvert node:$in))>;
|
def bc_v2i64 : PatFrag<(ops node:$in), (v2i64 (bitconvert node:$in))>;
|
||||||
|
|
||||||
@ -58,6 +60,18 @@ def SHUFFLE_get_shuf_imm : SDNodeXForm<build_vector, [{
|
|||||||
return getI8Imm(X86::getShuffleSHUFImmediate(N));
|
return getI8Imm(X86::getShuffleSHUFImmediate(N));
|
||||||
}]>;
|
}]>;
|
||||||
|
|
||||||
|
// SHUFFLE_get_pshufhw_imm xform function: convert vector_shuffle mask to
|
||||||
|
// PSHUFHW imm.
|
||||||
|
def SHUFFLE_get_pshufhw_imm : SDNodeXForm<build_vector, [{
|
||||||
|
return getI8Imm(X86::getShufflePSHUFHWImmediate(N));
|
||||||
|
}]>;
|
||||||
|
|
||||||
|
// SHUFFLE_get_pshuflw_imm xform function: convert vector_shuffle mask to
|
||||||
|
// PSHUFLW imm.
|
||||||
|
def SHUFFLE_get_pshuflw_imm : SDNodeXForm<build_vector, [{
|
||||||
|
return getI8Imm(X86::getShufflePSHUFLWImmediate(N));
|
||||||
|
}]>;
|
||||||
|
|
||||||
def SSE_splat_mask : PatLeaf<(build_vector), [{
|
def SSE_splat_mask : PatLeaf<(build_vector), [{
|
||||||
return X86::isSplatMask(N);
|
return X86::isSplatMask(N);
|
||||||
}], SHUFFLE_get_shuf_imm>;
|
}], SHUFFLE_get_shuf_imm>;
|
||||||
@ -82,6 +96,14 @@ def PSHUFD_shuffle_mask : PatLeaf<(build_vector), [{
|
|||||||
return X86::isPSHUFDMask(N);
|
return X86::isPSHUFDMask(N);
|
||||||
}], SHUFFLE_get_shuf_imm>;
|
}], SHUFFLE_get_shuf_imm>;
|
||||||
|
|
||||||
|
def PSHUFHW_shuffle_mask : PatLeaf<(build_vector), [{
|
||||||
|
return X86::isPSHUFHWMask(N);
|
||||||
|
}], SHUFFLE_get_pshufhw_imm>;
|
||||||
|
|
||||||
|
def PSHUFLW_shuffle_mask : PatLeaf<(build_vector), [{
|
||||||
|
return X86::isPSHUFLWMask(N);
|
||||||
|
}], SHUFFLE_get_pshuflw_imm>;
|
||||||
|
|
||||||
def SHUFP_shuffle_mask : PatLeaf<(build_vector), [{
|
def SHUFP_shuffle_mask : PatLeaf<(build_vector), [{
|
||||||
return X86::isSHUFPMask(N);
|
return X86::isSHUFPMask(N);
|
||||||
}], SHUFFLE_get_shuf_imm>;
|
}], SHUFFLE_get_shuf_imm>;
|
||||||
@ -935,25 +957,6 @@ def CMPPDrm : PDI<0xC2, MRMSrcMem,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Shuffle and unpack instructions
|
// Shuffle and unpack instructions
|
||||||
def PSHUFWrr : PSIi8<0x70, MRMDestReg,
|
|
||||||
(ops VR64:$dst, VR64:$src1, i8imm:$src2),
|
|
||||||
"pshufw {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
|
|
||||||
def PSHUFWrm : PSIi8<0x70, MRMSrcMem,
|
|
||||||
(ops VR64:$dst, i64mem:$src1, i8imm:$src2),
|
|
||||||
"pshufw {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
|
|
||||||
def PSHUFDrr : PDIi8<0x70, MRMDestReg,
|
|
||||||
(ops VR128:$dst, VR128:$src1, i8imm:$src2),
|
|
||||||
"pshufd {$src2, $src1, $dst|$dst, $src1, $src2}",
|
|
||||||
[(set VR128:$dst, (v4i32 (vector_shuffle
|
|
||||||
VR128:$src1, (undef),
|
|
||||||
PSHUFD_shuffle_mask:$src2)))]>;
|
|
||||||
def PSHUFDrm : PDIi8<0x70, MRMSrcMem,
|
|
||||||
(ops VR128:$dst, i128mem:$src1, i8imm:$src2),
|
|
||||||
"pshufd {$src2, $src1, $dst|$dst, $src1, $src2}",
|
|
||||||
[(set VR128:$dst, (v4i32 (vector_shuffle
|
|
||||||
(load addr:$src1), (undef),
|
|
||||||
PSHUFD_shuffle_mask:$src2)))]>;
|
|
||||||
|
|
||||||
let isTwoAddress = 1 in {
|
let isTwoAddress = 1 in {
|
||||||
def SHUFPSrr : PSIi8<0xC6, MRMSrcReg,
|
def SHUFPSrr : PSIi8<0xC6, MRMSrcReg,
|
||||||
(ops VR128:$dst, VR128:$src1, VR128:$src2, i32i8imm:$src3),
|
(ops VR128:$dst, VR128:$src1, VR128:$src2, i32i8imm:$src3),
|
||||||
@ -1081,6 +1084,10 @@ def PADDWrr : PDI<0xFD, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
|
|||||||
def PADDDrr : PDI<0xFE, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
|
def PADDDrr : PDI<0xFE, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
|
||||||
"paddd {$src2, $dst|$dst, $src2}",
|
"paddd {$src2, $dst|$dst, $src2}",
|
||||||
[(set VR128:$dst, (v4i32 (add VR128:$src1, VR128:$src2)))]>;
|
[(set VR128:$dst, (v4i32 (add VR128:$src1, VR128:$src2)))]>;
|
||||||
|
|
||||||
|
def PADDQrr : PDI<0xD4, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
|
||||||
|
"paddq {$src2, $dst|$dst, $src2}",
|
||||||
|
[(set VR128:$dst, (v2i64 (add VR128:$src1, VR128:$src2)))]>;
|
||||||
}
|
}
|
||||||
def PADDBrm : PDI<0xFC, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
|
def PADDBrm : PDI<0xFC, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
|
||||||
"paddb {$src2, $dst|$dst, $src2}",
|
"paddb {$src2, $dst|$dst, $src2}",
|
||||||
@ -1094,6 +1101,10 @@ def PADDDrm : PDI<0xFE, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
|
|||||||
"paddd {$src2, $dst|$dst, $src2}",
|
"paddd {$src2, $dst|$dst, $src2}",
|
||||||
[(set VR128:$dst, (v4i32 (add VR128:$src1,
|
[(set VR128:$dst, (v4i32 (add VR128:$src1,
|
||||||
(load addr:$src2))))]>;
|
(load addr:$src2))))]>;
|
||||||
|
def PADDQrm : PDI<0xD4, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
|
||||||
|
"paddd {$src2, $dst|$dst, $src2}",
|
||||||
|
[(set VR128:$dst, (v2i64 (add VR128:$src1,
|
||||||
|
(load addr:$src2))))]>;
|
||||||
|
|
||||||
def PSUBBrr : PDI<0xF8, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
|
def PSUBBrr : PDI<0xF8, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
|
||||||
"psubb {$src2, $dst|$dst, $src2}",
|
"psubb {$src2, $dst|$dst, $src2}",
|
||||||
@ -1104,6 +1115,9 @@ def PSUBWrr : PDI<0xF9, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
|
|||||||
def PSUBDrr : PDI<0xFA, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
|
def PSUBDrr : PDI<0xFA, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
|
||||||
"psubd {$src2, $dst|$dst, $src2}",
|
"psubd {$src2, $dst|$dst, $src2}",
|
||||||
[(set VR128:$dst, (v4i32 (sub VR128:$src1, VR128:$src2)))]>;
|
[(set VR128:$dst, (v4i32 (sub VR128:$src1, VR128:$src2)))]>;
|
||||||
|
def PSUBQrr : PDI<0xFB, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
|
||||||
|
"psubq {$src2, $dst|$dst, $src2}",
|
||||||
|
[(set VR128:$dst, (v2i64 (sub VR128:$src1, VR128:$src2)))]>;
|
||||||
|
|
||||||
def PSUBBrm : PDI<0xF8, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
|
def PSUBBrm : PDI<0xF8, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
|
||||||
"psubb {$src2, $dst|$dst, $src2}",
|
"psubb {$src2, $dst|$dst, $src2}",
|
||||||
@ -1117,8 +1131,146 @@ def PSUBDrm : PDI<0xFA, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
|
|||||||
"psubd {$src2, $dst|$dst, $src2}",
|
"psubd {$src2, $dst|$dst, $src2}",
|
||||||
[(set VR128:$dst, (v4i32 (sub VR128:$src1,
|
[(set VR128:$dst, (v4i32 (sub VR128:$src1,
|
||||||
(load addr:$src2))))]>;
|
(load addr:$src2))))]>;
|
||||||
|
def PSUBQrm : PDI<0xFB, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
|
||||||
|
"psubd {$src2, $dst|$dst, $src2}",
|
||||||
|
[(set VR128:$dst, (v2i64 (sub VR128:$src1,
|
||||||
|
(load addr:$src2))))]>;
|
||||||
|
}
|
||||||
|
|
||||||
// Unpack and interleave
|
// Logical
|
||||||
|
let isTwoAddress = 1 in {
|
||||||
|
let isCommutable = 1 in {
|
||||||
|
def PANDrr : PDI<0xDB, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
|
||||||
|
"pand {$src2, $dst|$dst, $src2}",
|
||||||
|
[(set VR128:$dst, (v2i64 (and VR128:$src1, VR128:$src2)))]>;
|
||||||
|
|
||||||
|
def PANDrm : PDI<0xDB, MRMSrcMem, (ops VR128:$dst, VR128:$src1, i128mem:$src2),
|
||||||
|
"pand {$src2, $dst|$dst, $src2}",
|
||||||
|
[(set VR128:$dst, (v2i64 (and VR128:$src1,
|
||||||
|
(load addr:$src2))))]>;
|
||||||
|
def PORrr : PDI<0xDB, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
|
||||||
|
"por {$src2, $dst|$dst, $src2}",
|
||||||
|
[(set VR128:$dst, (v2i64 (or VR128:$src1, VR128:$src2)))]>;
|
||||||
|
|
||||||
|
def PORrm : PDI<0xDB, MRMSrcMem, (ops VR128:$dst, VR128:$src1, i128mem:$src2),
|
||||||
|
"por {$src2, $dst|$dst, $src2}",
|
||||||
|
[(set VR128:$dst, (v2i64 (or VR128:$src1,
|
||||||
|
(load addr:$src2))))]>;
|
||||||
|
def PXORrr : PDI<0xEF, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
|
||||||
|
"pxor {$src2, $dst|$dst, $src2}",
|
||||||
|
[(set VR128:$dst, (v2i64 (xor VR128:$src1, VR128:$src2)))]>;
|
||||||
|
|
||||||
|
def PXORrm : PDI<0xEF, MRMSrcMem, (ops VR128:$dst, VR128:$src1, i128mem:$src2),
|
||||||
|
"pxor {$src2, $dst|$dst, $src2}",
|
||||||
|
[(set VR128:$dst, (v2i64 (xor VR128:$src1,
|
||||||
|
(load addr:$src2))))]>;
|
||||||
|
}
|
||||||
|
|
||||||
|
def PANDNrr : PDI<0xDF, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
|
||||||
|
"pandn {$src2, $dst|$dst, $src2}",
|
||||||
|
[(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
|
||||||
|
VR128:$src2)))]>;
|
||||||
|
|
||||||
|
def PANDNrm : PDI<0xDF, MRMSrcMem, (ops VR128:$dst, VR128:$src1, i128mem:$src2),
|
||||||
|
"pandn {$src2, $dst|$dst, $src2}",
|
||||||
|
[(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
|
||||||
|
(load addr:$src2))))]>;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pack instructions
|
||||||
|
let isTwoAddress = 1 in {
|
||||||
|
def PACKSSWBrr : PDI<0x63, MRMSrcReg, (ops VR128:$dst, VR128:$src1,
|
||||||
|
VR128:$src2),
|
||||||
|
"packsswb {$src2, $dst|$dst, $src2}",
|
||||||
|
[(set VR128:$dst, (v16i8 (int_x86_sse2_packsswb_128
|
||||||
|
(v8i16 VR128:$src1),
|
||||||
|
(v8i16 VR128:$src2))))]>;
|
||||||
|
def PACKSSWBrm : PDI<0x63, MRMSrcMem, (ops VR128:$dst, VR128:$src1,
|
||||||
|
i128mem:$src2),
|
||||||
|
"packsswb {$src2, $dst|$dst, $src2}",
|
||||||
|
[(set VR128:$dst, (v16i8 (int_x86_sse2_packsswb_128
|
||||||
|
(v8i16 VR128:$src1),
|
||||||
|
(loadv8i16 addr:$src2))))]>;
|
||||||
|
def PACKSSDWrr : PDI<0x6B, MRMSrcReg, (ops VR128:$dst, VR128:$src1,
|
||||||
|
VR128:$src2),
|
||||||
|
"packsswb {$src2, $dst|$dst, $src2}",
|
||||||
|
[(set VR128:$dst, (v8i16 (int_x86_sse2_packssdw_128
|
||||||
|
(v4i32 VR128:$src1),
|
||||||
|
(v4i32 VR128:$src2))))]>;
|
||||||
|
def PACKSSDWrm : PDI<0x6B, MRMSrcReg, (ops VR128:$dst, VR128:$src1,
|
||||||
|
i128mem:$src2),
|
||||||
|
"packsswb {$src2, $dst|$dst, $src2}",
|
||||||
|
[(set VR128:$dst, (v8i16 (int_x86_sse2_packssdw_128
|
||||||
|
(v4i32 VR128:$src1),
|
||||||
|
(loadv4i32 addr:$src2))))]>;
|
||||||
|
def PACKUSWBrr : PDI<0x67, MRMSrcReg, (ops VR128:$dst, VR128:$src1,
|
||||||
|
VR128:$src2),
|
||||||
|
"packuswb {$src2, $dst|$dst, $src2}",
|
||||||
|
[(set VR128:$dst, (v16i8 (int_x86_sse2_packuswb_128
|
||||||
|
(v8i16 VR128:$src1),
|
||||||
|
(v8i16 VR128:$src2))))]>;
|
||||||
|
def PACKUSWBrm : PDI<0x67, MRMSrcReg, (ops VR128:$dst, VR128:$src1,
|
||||||
|
i128mem:$src2),
|
||||||
|
"packuswb {$src2, $dst|$dst, $src2}",
|
||||||
|
[(set VR128:$dst, (v16i8 (int_x86_sse2_packuswb_128
|
||||||
|
(v8i16 VR128:$src1),
|
||||||
|
(loadv8i16 addr:$src2))))]>;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Shuffle and unpack instructions
|
||||||
|
def PSHUFWrr : PSIi8<0x70, MRMDestReg,
|
||||||
|
(ops VR64:$dst, VR64:$src1, i8imm:$src2),
|
||||||
|
"pshufw {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
|
||||||
|
def PSHUFWrm : PSIi8<0x70, MRMSrcMem,
|
||||||
|
(ops VR64:$dst, i64mem:$src1, i8imm:$src2),
|
||||||
|
"pshufw {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
|
||||||
|
|
||||||
|
def PSHUFDrr : PDIi8<0x70, MRMDestReg,
|
||||||
|
(ops VR128:$dst, VR128:$src1, i8imm:$src2),
|
||||||
|
"pshufd {$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||||
|
[(set VR128:$dst, (v4i32 (vector_shuffle
|
||||||
|
VR128:$src1, (undef),
|
||||||
|
PSHUFD_shuffle_mask:$src2)))]>;
|
||||||
|
def PSHUFDrm : PDIi8<0x70, MRMSrcMem,
|
||||||
|
(ops VR128:$dst, i128mem:$src1, i8imm:$src2),
|
||||||
|
"pshufd {$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||||
|
[(set VR128:$dst, (v4i32 (vector_shuffle
|
||||||
|
(load addr:$src1), (undef),
|
||||||
|
PSHUFD_shuffle_mask:$src2)))]>;
|
||||||
|
|
||||||
|
// SSE2 with ImmT == Imm8 and XS prefix.
|
||||||
|
def PSHUFHWrr : Ii8<0x70, MRMDestReg,
|
||||||
|
(ops VR128:$dst, VR128:$src1, i8imm:$src2),
|
||||||
|
"pshufhw {$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||||
|
[(set VR128:$dst, (v8i16 (vector_shuffle
|
||||||
|
VR128:$src1, (undef),
|
||||||
|
PSHUFHW_shuffle_mask:$src2)))]>,
|
||||||
|
XS, Requires<[HasSSE2]>;
|
||||||
|
def PSHUFHWrm : Ii8<0x70, MRMDestMem,
|
||||||
|
(ops VR128:$dst, i128mem:$src1, i8imm:$src2),
|
||||||
|
"pshufhw {$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||||
|
[(set VR128:$dst, (v8i16 (vector_shuffle
|
||||||
|
(bc_v8i16 (loadv2i64 addr:$src1)), (undef),
|
||||||
|
PSHUFHW_shuffle_mask:$src2)))]>,
|
||||||
|
XS, Requires<[HasSSE2]>;
|
||||||
|
|
||||||
|
// SSE2 with ImmT == Imm8 and XD prefix.
|
||||||
|
def PSHUFLWrr : Ii8<0x70, MRMDestReg,
|
||||||
|
(ops VR128:$dst, VR128:$src1, i32i8imm:$src2),
|
||||||
|
"pshufLw {$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||||
|
[(set VR128:$dst, (v8i16 (vector_shuffle
|
||||||
|
VR128:$src1, (undef),
|
||||||
|
PSHUFLW_shuffle_mask:$src2)))]>,
|
||||||
|
XD, Requires<[HasSSE2]>;
|
||||||
|
def PSHUFLWrm : Ii8<0x70, MRMDestMem,
|
||||||
|
(ops VR128:$dst, i128mem:$src1, i32i8imm:$src2),
|
||||||
|
"pshufLw {$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||||
|
[(set VR128:$dst, (v8i16 (vector_shuffle
|
||||||
|
(bc_v8i16 (loadv2i64 addr:$src1)), (undef),
|
||||||
|
PSHUFLW_shuffle_mask:$src2)))]>,
|
||||||
|
XD, Requires<[HasSSE2]>;
|
||||||
|
|
||||||
|
let isTwoAddress = 1 in {
|
||||||
def PUNPCKLBWrr : PDI<0x60, MRMSrcReg,
|
def PUNPCKLBWrr : PDI<0x60, MRMSrcReg,
|
||||||
(ops VR128:$dst, VR128:$src1, VR128:$src2),
|
(ops VR128:$dst, VR128:$src1, VR128:$src2),
|
||||||
"punpcklbw {$src2, $dst|$dst, $src2}",
|
"punpcklbw {$src2, $dst|$dst, $src2}",
|
||||||
@ -1355,6 +1507,29 @@ def : Pat<(v16i8 (X86s2vec R32:$src)), (MOVD128rr R32:$src)>,
|
|||||||
// bit_convert
|
// bit_convert
|
||||||
def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>,
|
def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>,
|
||||||
Requires<[HasSSE2]>;
|
Requires<[HasSSE2]>;
|
||||||
|
def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>,
|
||||||
|
Requires<[HasSSE2]>;
|
||||||
|
def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>,
|
||||||
|
Requires<[HasSSE2]>;
|
||||||
|
def : Pat<(v4i32 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>,
|
||||||
|
Requires<[HasSSE2]>;
|
||||||
|
def : Pat<(v4i32 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>,
|
||||||
|
Requires<[HasSSE2]>;
|
||||||
|
def : Pat<(v4i32 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>,
|
||||||
|
Requires<[HasSSE2]>;
|
||||||
|
def : Pat<(v8i16 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>,
|
||||||
|
Requires<[HasSSE2]>;
|
||||||
|
def : Pat<(v8i16 (bitconvert (v4i32 VR128:$src))), (v4i32 VR128:$src)>,
|
||||||
|
Requires<[HasSSE2]>;
|
||||||
|
def : Pat<(v8i16 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>,
|
||||||
|
Requires<[HasSSE2]>;
|
||||||
|
def : Pat<(v16i8 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>,
|
||||||
|
Requires<[HasSSE2]>;
|
||||||
|
def : Pat<(v16i8 (bitconvert (v4i32 VR128:$src))), (v4i32 VR128:$src)>,
|
||||||
|
Requires<[HasSSE2]>;
|
||||||
|
def : Pat<(v16i8 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>,
|
||||||
|
Requires<[HasSSE2]>;
|
||||||
|
|
||||||
def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>,
|
def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>,
|
||||||
Requires<[HasSSE2]>;
|
Requires<[HasSSE2]>;
|
||||||
def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>,
|
def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>,
|
||||||
|
Loading…
Reference in New Issue
Block a user