- Added some SSE2 128-bit packed integer ops.

- Added SSE2 128-bit integer pack with signed saturation ops.
- Added pshufhw and pshuflw ops.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@27252 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Evan Cheng 2006-03-29 23:07:14 +00:00
parent 75658b96bf
commit 506d3dfa90
3 changed files with 314 additions and 25 deletions

View File

@ -1399,10 +1399,67 @@ bool X86::isPSHUFDMask(SDNode *N) {
return false; return false;
// Check if the value doesn't reference the second vector. // Check if the value doesn't reference the second vector.
for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i) { for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
assert(isa<ConstantSDNode>(N->getOperand(i)) && assert(isa<ConstantSDNode>(N->getOperand(i)) &&
"Invalid VECTOR_SHUFFLE mask!"); "Invalid VECTOR_SHUFFLE mask!");
if (cast<ConstantSDNode>(N->getOperand(i))->getValue() >= 4) return false; if (cast<ConstantSDNode>(N->getOperand(i))->getValue() >= 4)
return false;
}
return true;
}
/// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to PSHUFD.
bool X86::isPSHUFHWMask(SDNode *N) {
assert(N->getOpcode() == ISD::BUILD_VECTOR);
if (N->getNumOperands() != 8)
return false;
// Lower quadword copied in order.
for (unsigned i = 0; i != 4; ++i) {
assert(isa<ConstantSDNode>(N->getOperand(i)) &&
"Invalid VECTOR_SHUFFLE mask!");
if (cast<ConstantSDNode>(N->getOperand(i))->getValue() != i)
return false;
}
// Upper quadword shuffled.
for (unsigned i = 4; i != 8; ++i) {
assert(isa<ConstantSDNode>(N->getOperand(i)) &&
"Invalid VECTOR_SHUFFLE mask!");
unsigned Val = cast<ConstantSDNode>(N->getOperand(i))->getValue();
if (Val < 4 || Val > 7)
return false;
}
return true;
}
/// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to PSHUFD.
bool X86::isPSHUFLWMask(SDNode *N) {
assert(N->getOpcode() == ISD::BUILD_VECTOR);
if (N->getNumOperands() != 8)
return false;
// Upper quadword copied in order.
for (unsigned i = 4; i != 8; ++i) {
assert(isa<ConstantSDNode>(N->getOperand(i)) &&
"Invalid VECTOR_SHUFFLE mask!");
if (cast<ConstantSDNode>(N->getOperand(i))->getValue() != i)
return false;
}
// Lower quadword shuffled.
for (unsigned i = 0; i != 4; ++i) {
assert(isa<ConstantSDNode>(N->getOperand(i)) &&
"Invalid VECTOR_SHUFFLE mask!");
unsigned Val = cast<ConstantSDNode>(N->getOperand(i))->getValue();
if (Val > 4)
return false;
} }
return true; return true;
@ -1431,7 +1488,7 @@ bool X86::isSHUFPMask(SDNode *N) {
// Each half must refer to only one of the vector. // Each half must refer to only one of the vector.
SDOperand Elt = N->getOperand(0); SDOperand Elt = N->getOperand(0);
assert(isa<ConstantSDNode>(Elt) && "Invalid VECTOR_SHUFFLE mask!"); assert(isa<ConstantSDNode>(Elt) && "Invalid VECTOR_SHUFFLE mask!");
for (unsigned i = 1; i != NumElems / 2; ++i) { for (unsigned i = 1; i < NumElems / 2; ++i) {
assert(isa<ConstantSDNode>(N->getOperand(i)) && assert(isa<ConstantSDNode>(N->getOperand(i)) &&
"Invalid VECTOR_SHUFFLE mask!"); "Invalid VECTOR_SHUFFLE mask!");
if (cast<ConstantSDNode>(N->getOperand(i))->getValue() != if (cast<ConstantSDNode>(N->getOperand(i))->getValue() !=
@ -1440,7 +1497,7 @@ bool X86::isSHUFPMask(SDNode *N) {
} }
Elt = N->getOperand(NumElems / 2); Elt = N->getOperand(NumElems / 2);
assert(isa<ConstantSDNode>(Elt) && "Invalid VECTOR_SHUFFLE mask!"); assert(isa<ConstantSDNode>(Elt) && "Invalid VECTOR_SHUFFLE mask!");
for (unsigned i = NumElems / 2; i != NumElems; ++i) { for (unsigned i = NumElems / 2 + 1; i < NumElems; ++i) {
assert(isa<ConstantSDNode>(N->getOperand(i)) && assert(isa<ConstantSDNode>(N->getOperand(i)) &&
"Invalid VECTOR_SHUFFLE mask!"); "Invalid VECTOR_SHUFFLE mask!");
if (cast<ConstantSDNode>(N->getOperand(i))->getValue() != if (cast<ConstantSDNode>(N->getOperand(i))->getValue() !=
@ -1583,6 +1640,40 @@ unsigned X86::getShuffleSHUFImmediate(SDNode *N) {
return Mask; return Mask;
} }
/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle
/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW
/// instructions.
unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) {
unsigned Mask = 0;
// 8 nodes, but we only care about the last 4.
for (unsigned i = 7; i >= 4; --i) {
unsigned Val
= cast<ConstantSDNode>(N->getOperand(i))->getValue();
Mask |= (Val - 4);
if (i != 4)
Mask <<= 2;
}
return Mask;
}
/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle
/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW
/// instructions.
unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) {
unsigned Mask = 0;
// 8 nodes, but we only care about the first 4.
for (int i = 3; i >= 0; --i) {
unsigned Val
= cast<ConstantSDNode>(N->getOperand(i))->getValue();
Mask |= Val;
if (i != 0)
Mask <<= 2;
}
return Mask;
}
/// NormalizeVectorShuffle - Swap vector_shuffle operands (as well as /// NormalizeVectorShuffle - Swap vector_shuffle operands (as well as
/// values in ther permute mask if needed. Use V1 as second vector if it is /// values in ther permute mask if needed. Use V1 as second vector if it is
/// undef. Return an empty SDOperand is it is already well formed. /// undef. Return an empty SDOperand is it is already well formed.
@ -2399,7 +2490,10 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
// Splat && PSHUFD's 2nd vector must be undef. // Splat && PSHUFD's 2nd vector must be undef.
if (X86::isSplatMask(PermMask.Val) || if (X86::isSplatMask(PermMask.Val) ||
((MVT::isInteger(VT) && X86::isPSHUFDMask(PermMask.Val)))) { ((MVT::isInteger(VT) &&
(X86::isPSHUFDMask(PermMask.Val) ||
X86::isPSHUFHWMask(PermMask.Val) ||
X86::isPSHUFLWMask(PermMask.Val))))) {
if (V2.getOpcode() != ISD::UNDEF) if (V2.getOpcode() != ISD::UNDEF)
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask);
@ -2607,6 +2701,8 @@ X86TargetLowering::isShuffleMaskLegal(SDOperand Mask, MVT::ValueType VT) const {
return (Mask.Val->getNumOperands() == 2 || return (Mask.Val->getNumOperands() == 2 ||
X86::isSplatMask(Mask.Val) || X86::isSplatMask(Mask.Val) ||
X86::isPSHUFDMask(Mask.Val) || X86::isPSHUFDMask(Mask.Val) ||
X86::isPSHUFHWMask(Mask.Val) ||
X86::isPSHUFLWMask(Mask.Val) ||
X86::isSHUFPMask(Mask.Val) || X86::isSHUFPMask(Mask.Val) ||
X86::isUNPCKLMask(Mask.Val) || X86::isUNPCKLMask(Mask.Val) ||
X86::isUNPCKHMask(Mask.Val)); X86::isUNPCKHMask(Mask.Val));

View File

@ -184,6 +184,14 @@ namespace llvm {
/// specifies a shuffle of elements that is suitable for input to PSHUFD. /// specifies a shuffle of elements that is suitable for input to PSHUFD.
bool isPSHUFDMask(SDNode *N); bool isPSHUFDMask(SDNode *N);
/// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to PSHUFD.
bool isPSHUFHWMask(SDNode *N);
/// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to PSHUFD.
bool isPSHUFLWMask(SDNode *N);
/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand /// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to SHUFP*. /// specifies a shuffle of elements that is suitable for input to SHUFP*.
bool isSHUFPMask(SDNode *N); bool isSHUFPMask(SDNode *N);
@ -212,6 +220,16 @@ namespace llvm {
/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP* /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP*
/// instructions. /// instructions.
unsigned getShuffleSHUFImmediate(SDNode *N); unsigned getShuffleSHUFImmediate(SDNode *N);
/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle
/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW
/// instructions.
unsigned getShufflePSHUFHWImmediate(SDNode *N);
/// getShufflePSHUFKWImmediate - Return the appropriate immediate to shuffle
/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW
/// instructions.
unsigned getShufflePSHUFLWImmediate(SDNode *N);
} }
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//

View File

@ -45,6 +45,8 @@ def loadv8i16 : PatFrag<(ops node:$ptr), (v8i16 (load node:$ptr))>;
def loadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (load node:$ptr))>; def loadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (load node:$ptr))>;
def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>; def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>;
def bc_v16i8 : PatFrag<(ops node:$in), (v16i8 (bitconvert node:$in))>;
def bc_v8i16 : PatFrag<(ops node:$in), (v8i16 (bitconvert node:$in))>;
def bc_v4i32 : PatFrag<(ops node:$in), (v4i32 (bitconvert node:$in))>; def bc_v4i32 : PatFrag<(ops node:$in), (v4i32 (bitconvert node:$in))>;
def bc_v2i64 : PatFrag<(ops node:$in), (v2i64 (bitconvert node:$in))>; def bc_v2i64 : PatFrag<(ops node:$in), (v2i64 (bitconvert node:$in))>;
@ -58,6 +60,18 @@ def SHUFFLE_get_shuf_imm : SDNodeXForm<build_vector, [{
return getI8Imm(X86::getShuffleSHUFImmediate(N)); return getI8Imm(X86::getShuffleSHUFImmediate(N));
}]>; }]>;
// SHUFFLE_get_pshufhw_imm xform function: convert vector_shuffle mask to
// PSHUFHW imm.
def SHUFFLE_get_pshufhw_imm : SDNodeXForm<build_vector, [{
return getI8Imm(X86::getShufflePSHUFHWImmediate(N));
}]>;
// SHUFFLE_get_pshuflw_imm xform function: convert vector_shuffle mask to
// PSHUFLW imm.
def SHUFFLE_get_pshuflw_imm : SDNodeXForm<build_vector, [{
return getI8Imm(X86::getShufflePSHUFLWImmediate(N));
}]>;
def SSE_splat_mask : PatLeaf<(build_vector), [{ def SSE_splat_mask : PatLeaf<(build_vector), [{
return X86::isSplatMask(N); return X86::isSplatMask(N);
}], SHUFFLE_get_shuf_imm>; }], SHUFFLE_get_shuf_imm>;
@ -82,6 +96,14 @@ def PSHUFD_shuffle_mask : PatLeaf<(build_vector), [{
return X86::isPSHUFDMask(N); return X86::isPSHUFDMask(N);
}], SHUFFLE_get_shuf_imm>; }], SHUFFLE_get_shuf_imm>;
def PSHUFHW_shuffle_mask : PatLeaf<(build_vector), [{
return X86::isPSHUFHWMask(N);
}], SHUFFLE_get_pshufhw_imm>;
def PSHUFLW_shuffle_mask : PatLeaf<(build_vector), [{
return X86::isPSHUFLWMask(N);
}], SHUFFLE_get_pshuflw_imm>;
def SHUFP_shuffle_mask : PatLeaf<(build_vector), [{ def SHUFP_shuffle_mask : PatLeaf<(build_vector), [{
return X86::isSHUFPMask(N); return X86::isSHUFPMask(N);
}], SHUFFLE_get_shuf_imm>; }], SHUFFLE_get_shuf_imm>;
@ -935,25 +957,6 @@ def CMPPDrm : PDI<0xC2, MRMSrcMem,
} }
// Shuffle and unpack instructions // Shuffle and unpack instructions
def PSHUFWrr : PSIi8<0x70, MRMDestReg,
(ops VR64:$dst, VR64:$src1, i8imm:$src2),
"pshufw {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
def PSHUFWrm : PSIi8<0x70, MRMSrcMem,
(ops VR64:$dst, i64mem:$src1, i8imm:$src2),
"pshufw {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
def PSHUFDrr : PDIi8<0x70, MRMDestReg,
(ops VR128:$dst, VR128:$src1, i8imm:$src2),
"pshufd {$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (v4i32 (vector_shuffle
VR128:$src1, (undef),
PSHUFD_shuffle_mask:$src2)))]>;
def PSHUFDrm : PDIi8<0x70, MRMSrcMem,
(ops VR128:$dst, i128mem:$src1, i8imm:$src2),
"pshufd {$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (v4i32 (vector_shuffle
(load addr:$src1), (undef),
PSHUFD_shuffle_mask:$src2)))]>;
let isTwoAddress = 1 in { let isTwoAddress = 1 in {
def SHUFPSrr : PSIi8<0xC6, MRMSrcReg, def SHUFPSrr : PSIi8<0xC6, MRMSrcReg,
(ops VR128:$dst, VR128:$src1, VR128:$src2, i32i8imm:$src3), (ops VR128:$dst, VR128:$src1, VR128:$src2, i32i8imm:$src3),
@ -1081,6 +1084,10 @@ def PADDWrr : PDI<0xFD, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
def PADDDrr : PDI<0xFE, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), def PADDDrr : PDI<0xFE, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
"paddd {$src2, $dst|$dst, $src2}", "paddd {$src2, $dst|$dst, $src2}",
[(set VR128:$dst, (v4i32 (add VR128:$src1, VR128:$src2)))]>; [(set VR128:$dst, (v4i32 (add VR128:$src1, VR128:$src2)))]>;
def PADDQrr : PDI<0xD4, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
"paddq {$src2, $dst|$dst, $src2}",
[(set VR128:$dst, (v2i64 (add VR128:$src1, VR128:$src2)))]>;
} }
def PADDBrm : PDI<0xFC, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2), def PADDBrm : PDI<0xFC, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
"paddb {$src2, $dst|$dst, $src2}", "paddb {$src2, $dst|$dst, $src2}",
@ -1094,6 +1101,10 @@ def PADDDrm : PDI<0xFE, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
"paddd {$src2, $dst|$dst, $src2}", "paddd {$src2, $dst|$dst, $src2}",
[(set VR128:$dst, (v4i32 (add VR128:$src1, [(set VR128:$dst, (v4i32 (add VR128:$src1,
(load addr:$src2))))]>; (load addr:$src2))))]>;
def PADDQrm : PDI<0xD4, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
"paddd {$src2, $dst|$dst, $src2}",
[(set VR128:$dst, (v2i64 (add VR128:$src1,
(load addr:$src2))))]>;
def PSUBBrr : PDI<0xF8, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), def PSUBBrr : PDI<0xF8, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
"psubb {$src2, $dst|$dst, $src2}", "psubb {$src2, $dst|$dst, $src2}",
@ -1104,6 +1115,9 @@ def PSUBWrr : PDI<0xF9, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
def PSUBDrr : PDI<0xFA, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), def PSUBDrr : PDI<0xFA, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
"psubd {$src2, $dst|$dst, $src2}", "psubd {$src2, $dst|$dst, $src2}",
[(set VR128:$dst, (v4i32 (sub VR128:$src1, VR128:$src2)))]>; [(set VR128:$dst, (v4i32 (sub VR128:$src1, VR128:$src2)))]>;
def PSUBQrr : PDI<0xFB, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
"psubq {$src2, $dst|$dst, $src2}",
[(set VR128:$dst, (v2i64 (sub VR128:$src1, VR128:$src2)))]>;
def PSUBBrm : PDI<0xF8, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2), def PSUBBrm : PDI<0xF8, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
"psubb {$src2, $dst|$dst, $src2}", "psubb {$src2, $dst|$dst, $src2}",
@ -1117,8 +1131,146 @@ def PSUBDrm : PDI<0xFA, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
"psubd {$src2, $dst|$dst, $src2}", "psubd {$src2, $dst|$dst, $src2}",
[(set VR128:$dst, (v4i32 (sub VR128:$src1, [(set VR128:$dst, (v4i32 (sub VR128:$src1,
(load addr:$src2))))]>; (load addr:$src2))))]>;
def PSUBQrm : PDI<0xFB, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
"psubd {$src2, $dst|$dst, $src2}",
[(set VR128:$dst, (v2i64 (sub VR128:$src1,
(load addr:$src2))))]>;
}
// Unpack and interleave // Logical
let isTwoAddress = 1 in {
let isCommutable = 1 in {
def PANDrr : PDI<0xDB, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
"pand {$src2, $dst|$dst, $src2}",
[(set VR128:$dst, (v2i64 (and VR128:$src1, VR128:$src2)))]>;
def PANDrm : PDI<0xDB, MRMSrcMem, (ops VR128:$dst, VR128:$src1, i128mem:$src2),
"pand {$src2, $dst|$dst, $src2}",
[(set VR128:$dst, (v2i64 (and VR128:$src1,
(load addr:$src2))))]>;
def PORrr : PDI<0xDB, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
"por {$src2, $dst|$dst, $src2}",
[(set VR128:$dst, (v2i64 (or VR128:$src1, VR128:$src2)))]>;
def PORrm : PDI<0xDB, MRMSrcMem, (ops VR128:$dst, VR128:$src1, i128mem:$src2),
"por {$src2, $dst|$dst, $src2}",
[(set VR128:$dst, (v2i64 (or VR128:$src1,
(load addr:$src2))))]>;
def PXORrr : PDI<0xEF, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
"pxor {$src2, $dst|$dst, $src2}",
[(set VR128:$dst, (v2i64 (xor VR128:$src1, VR128:$src2)))]>;
def PXORrm : PDI<0xEF, MRMSrcMem, (ops VR128:$dst, VR128:$src1, i128mem:$src2),
"pxor {$src2, $dst|$dst, $src2}",
[(set VR128:$dst, (v2i64 (xor VR128:$src1,
(load addr:$src2))))]>;
}
def PANDNrr : PDI<0xDF, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
"pandn {$src2, $dst|$dst, $src2}",
[(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
VR128:$src2)))]>;
def PANDNrm : PDI<0xDF, MRMSrcMem, (ops VR128:$dst, VR128:$src1, i128mem:$src2),
"pandn {$src2, $dst|$dst, $src2}",
[(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
(load addr:$src2))))]>;
}
// Pack instructions
let isTwoAddress = 1 in {
def PACKSSWBrr : PDI<0x63, MRMSrcReg, (ops VR128:$dst, VR128:$src1,
VR128:$src2),
"packsswb {$src2, $dst|$dst, $src2}",
[(set VR128:$dst, (v16i8 (int_x86_sse2_packsswb_128
(v8i16 VR128:$src1),
(v8i16 VR128:$src2))))]>;
def PACKSSWBrm : PDI<0x63, MRMSrcMem, (ops VR128:$dst, VR128:$src1,
i128mem:$src2),
"packsswb {$src2, $dst|$dst, $src2}",
[(set VR128:$dst, (v16i8 (int_x86_sse2_packsswb_128
(v8i16 VR128:$src1),
(loadv8i16 addr:$src2))))]>;
def PACKSSDWrr : PDI<0x6B, MRMSrcReg, (ops VR128:$dst, VR128:$src1,
VR128:$src2),
"packsswb {$src2, $dst|$dst, $src2}",
[(set VR128:$dst, (v8i16 (int_x86_sse2_packssdw_128
(v4i32 VR128:$src1),
(v4i32 VR128:$src2))))]>;
def PACKSSDWrm : PDI<0x6B, MRMSrcReg, (ops VR128:$dst, VR128:$src1,
i128mem:$src2),
"packsswb {$src2, $dst|$dst, $src2}",
[(set VR128:$dst, (v8i16 (int_x86_sse2_packssdw_128
(v4i32 VR128:$src1),
(loadv4i32 addr:$src2))))]>;
def PACKUSWBrr : PDI<0x67, MRMSrcReg, (ops VR128:$dst, VR128:$src1,
VR128:$src2),
"packuswb {$src2, $dst|$dst, $src2}",
[(set VR128:$dst, (v16i8 (int_x86_sse2_packuswb_128
(v8i16 VR128:$src1),
(v8i16 VR128:$src2))))]>;
def PACKUSWBrm : PDI<0x67, MRMSrcReg, (ops VR128:$dst, VR128:$src1,
i128mem:$src2),
"packuswb {$src2, $dst|$dst, $src2}",
[(set VR128:$dst, (v16i8 (int_x86_sse2_packuswb_128
(v8i16 VR128:$src1),
(loadv8i16 addr:$src2))))]>;
}
// Shuffle and unpack instructions
def PSHUFWrr : PSIi8<0x70, MRMDestReg,
(ops VR64:$dst, VR64:$src1, i8imm:$src2),
"pshufw {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
def PSHUFWrm : PSIi8<0x70, MRMSrcMem,
(ops VR64:$dst, i64mem:$src1, i8imm:$src2),
"pshufw {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
def PSHUFDrr : PDIi8<0x70, MRMDestReg,
(ops VR128:$dst, VR128:$src1, i8imm:$src2),
"pshufd {$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (v4i32 (vector_shuffle
VR128:$src1, (undef),
PSHUFD_shuffle_mask:$src2)))]>;
def PSHUFDrm : PDIi8<0x70, MRMSrcMem,
(ops VR128:$dst, i128mem:$src1, i8imm:$src2),
"pshufd {$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (v4i32 (vector_shuffle
(load addr:$src1), (undef),
PSHUFD_shuffle_mask:$src2)))]>;
// SSE2 with ImmT == Imm8 and XS prefix.
def PSHUFHWrr : Ii8<0x70, MRMDestReg,
(ops VR128:$dst, VR128:$src1, i8imm:$src2),
"pshufhw {$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (v8i16 (vector_shuffle
VR128:$src1, (undef),
PSHUFHW_shuffle_mask:$src2)))]>,
XS, Requires<[HasSSE2]>;
def PSHUFHWrm : Ii8<0x70, MRMDestMem,
(ops VR128:$dst, i128mem:$src1, i8imm:$src2),
"pshufhw {$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (v8i16 (vector_shuffle
(bc_v8i16 (loadv2i64 addr:$src1)), (undef),
PSHUFHW_shuffle_mask:$src2)))]>,
XS, Requires<[HasSSE2]>;
// SSE2 with ImmT == Imm8 and XD prefix.
def PSHUFLWrr : Ii8<0x70, MRMDestReg,
(ops VR128:$dst, VR128:$src1, i32i8imm:$src2),
"pshufLw {$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (v8i16 (vector_shuffle
VR128:$src1, (undef),
PSHUFLW_shuffle_mask:$src2)))]>,
XD, Requires<[HasSSE2]>;
def PSHUFLWrm : Ii8<0x70, MRMDestMem,
(ops VR128:$dst, i128mem:$src1, i32i8imm:$src2),
"pshufLw {$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (v8i16 (vector_shuffle
(bc_v8i16 (loadv2i64 addr:$src1)), (undef),
PSHUFLW_shuffle_mask:$src2)))]>,
XD, Requires<[HasSSE2]>;
let isTwoAddress = 1 in {
def PUNPCKLBWrr : PDI<0x60, MRMSrcReg, def PUNPCKLBWrr : PDI<0x60, MRMSrcReg,
(ops VR128:$dst, VR128:$src1, VR128:$src2), (ops VR128:$dst, VR128:$src1, VR128:$src2),
"punpcklbw {$src2, $dst|$dst, $src2}", "punpcklbw {$src2, $dst|$dst, $src2}",
@ -1355,6 +1507,29 @@ def : Pat<(v16i8 (X86s2vec R32:$src)), (MOVD128rr R32:$src)>,
// bit_convert // bit_convert
def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>, def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>,
Requires<[HasSSE2]>; Requires<[HasSSE2]>;
def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>,
Requires<[HasSSE2]>;
def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>,
Requires<[HasSSE2]>;
def : Pat<(v4i32 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>,
Requires<[HasSSE2]>;
def : Pat<(v4i32 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>,
Requires<[HasSSE2]>;
def : Pat<(v4i32 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>,
Requires<[HasSSE2]>;
def : Pat<(v8i16 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>,
Requires<[HasSSE2]>;
def : Pat<(v8i16 (bitconvert (v4i32 VR128:$src))), (v4i32 VR128:$src)>,
Requires<[HasSSE2]>;
def : Pat<(v8i16 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>,
Requires<[HasSSE2]>;
def : Pat<(v16i8 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>,
Requires<[HasSSE2]>;
def : Pat<(v16i8 (bitconvert (v4i32 VR128:$src))), (v4i32 VR128:$src)>,
Requires<[HasSSE2]>;
def : Pat<(v16i8 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>,
Requires<[HasSSE2]>;
def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>, def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>,
Requires<[HasSSE2]>; Requires<[HasSSE2]>;
def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>, def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>,