mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-01 08:28:19 +00:00
Fix issues in shuffle decoding around VPERM* instructions. Fix shuffle decoding for VSHUFPS/D for 256-bit types. Add pattern matching for memory forms of VPERMILPS/VPERMILPD.
llvm-svn: 145390
This commit is contained in:
parent
b92661d00a
commit
4550fc2649
@ -163,14 +163,22 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
|
||||
Src2Name = getRegName(MI->getOperand(2).getReg());
|
||||
// FALL THROUGH.
|
||||
case X86::SHUFPDrmi:
|
||||
DecodeSHUFPSMask(2, MI->getOperand(3).getImm(), ShuffleMask);
|
||||
DecodeSHUFPMask(MVT::v2f64, MI->getOperand(3).getImm(), ShuffleMask);
|
||||
Src1Name = getRegName(MI->getOperand(0).getReg());
|
||||
break;
|
||||
case X86::VSHUFPDrri:
|
||||
Src2Name = getRegName(MI->getOperand(2).getReg());
|
||||
// FALL THROUGH.
|
||||
case X86::VSHUFPDrmi:
|
||||
DecodeSHUFPSMask(2, MI->getOperand(3).getImm(), ShuffleMask);
|
||||
DecodeSHUFPMask(MVT::v2f64, MI->getOperand(3).getImm(), ShuffleMask);
|
||||
Src1Name = getRegName(MI->getOperand(1).getReg());
|
||||
DestName = getRegName(MI->getOperand(0).getReg());
|
||||
break;
|
||||
case X86::VSHUFPDYrri:
|
||||
Src2Name = getRegName(MI->getOperand(2).getReg());
|
||||
// FALL THROUGH.
|
||||
case X86::VSHUFPDYrmi:
|
||||
DecodeSHUFPMask(MVT::v4f64, MI->getOperand(3).getImm(), ShuffleMask);
|
||||
Src1Name = getRegName(MI->getOperand(1).getReg());
|
||||
DestName = getRegName(MI->getOperand(0).getReg());
|
||||
break;
|
||||
@ -179,14 +187,22 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
|
||||
Src2Name = getRegName(MI->getOperand(2).getReg());
|
||||
// FALL THROUGH.
|
||||
case X86::SHUFPSrmi:
|
||||
DecodeSHUFPSMask(4, MI->getOperand(3).getImm(), ShuffleMask);
|
||||
DecodeSHUFPMask(MVT::v4f32, MI->getOperand(3).getImm(), ShuffleMask);
|
||||
Src1Name = getRegName(MI->getOperand(0).getReg());
|
||||
break;
|
||||
case X86::VSHUFPSrri:
|
||||
Src2Name = getRegName(MI->getOperand(2).getReg());
|
||||
// FALL THROUGH.
|
||||
case X86::VSHUFPSrmi:
|
||||
DecodeSHUFPSMask(4, MI->getOperand(3).getImm(), ShuffleMask);
|
||||
DecodeSHUFPMask(MVT::v4f32, MI->getOperand(3).getImm(), ShuffleMask);
|
||||
Src1Name = getRegName(MI->getOperand(1).getReg());
|
||||
DestName = getRegName(MI->getOperand(0).getReg());
|
||||
break;
|
||||
case X86::VSHUFPSYrri:
|
||||
Src2Name = getRegName(MI->getOperand(2).getReg());
|
||||
// FALL THROUGH.
|
||||
case X86::VSHUFPSYrmi:
|
||||
DecodeSHUFPMask(MVT::v8f32, MI->getOperand(3).getImm(), ShuffleMask);
|
||||
Src1Name = getRegName(MI->getOperand(1).getReg());
|
||||
DestName = getRegName(MI->getOperand(0).getReg());
|
||||
break;
|
||||
@ -284,29 +300,47 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
|
||||
DestName = getRegName(MI->getOperand(0).getReg());
|
||||
break;
|
||||
case X86::VPERMILPSri:
|
||||
DecodeVPERMILPSMask(4, MI->getOperand(2).getImm(),
|
||||
Src1Name = getRegName(MI->getOperand(1).getReg());
|
||||
// FALL THROUGH.
|
||||
case X86::VPERMILPSmi:
|
||||
DecodeVPERMILPSMask(4, MI->getOperand(MI->getNumOperands()-1).getImm(),
|
||||
ShuffleMask);
|
||||
Src1Name = getRegName(MI->getOperand(0).getReg());
|
||||
DestName = getRegName(MI->getOperand(0).getReg());
|
||||
break;
|
||||
case X86::VPERMILPSYri:
|
||||
DecodeVPERMILPSMask(8, MI->getOperand(2).getImm(),
|
||||
Src1Name = getRegName(MI->getOperand(1).getReg());
|
||||
// FALL THROUGH.
|
||||
case X86::VPERMILPSYmi:
|
||||
DecodeVPERMILPSMask(8, MI->getOperand(MI->getNumOperands()-1).getImm(),
|
||||
ShuffleMask);
|
||||
Src1Name = getRegName(MI->getOperand(0).getReg());
|
||||
DestName = getRegName(MI->getOperand(0).getReg());
|
||||
break;
|
||||
case X86::VPERMILPDri:
|
||||
DecodeVPERMILPDMask(2, MI->getOperand(2).getImm(),
|
||||
Src1Name = getRegName(MI->getOperand(1).getReg());
|
||||
// FALL THROUGH.
|
||||
case X86::VPERMILPDmi:
|
||||
DecodeVPERMILPDMask(2, MI->getOperand(MI->getNumOperands()-1).getImm(),
|
||||
ShuffleMask);
|
||||
Src1Name = getRegName(MI->getOperand(0).getReg());
|
||||
DestName = getRegName(MI->getOperand(0).getReg());
|
||||
break;
|
||||
case X86::VPERMILPDYri:
|
||||
DecodeVPERMILPDMask(4, MI->getOperand(2).getImm(),
|
||||
Src1Name = getRegName(MI->getOperand(1).getReg());
|
||||
// FALL THROUGH.
|
||||
case X86::VPERMILPDYmi:
|
||||
DecodeVPERMILPDMask(4, MI->getOperand(MI->getNumOperands()-1).getImm(),
|
||||
ShuffleMask);
|
||||
Src1Name = getRegName(MI->getOperand(0).getReg());
|
||||
DestName = getRegName(MI->getOperand(0).getReg());
|
||||
break;
|
||||
case X86::VPERM2F128rr:
|
||||
DecodeVPERM2F128Mask(MI->getOperand(3).getImm(), ShuffleMask);
|
||||
Src1Name = getRegName(MI->getOperand(1).getReg());
|
||||
case X86::VPERM2I128rr:
|
||||
Src2Name = getRegName(MI->getOperand(2).getReg());
|
||||
// FALL THROUGH.
|
||||
case X86::VPERM2F128rm:
|
||||
case X86::VPERM2I128rm:
|
||||
DecodeVPERM2F128Mask(MI->getOperand(MI->getNumOperands()-1).getImm(),
|
||||
ShuffleMask);
|
||||
Src1Name = getRegName(MI->getOperand(1).getReg());
|
||||
DestName = getRegName(MI->getOperand(0).getReg());
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -128,17 +128,27 @@ void DecodePUNPCKHMask(unsigned NElts,
|
||||
}
|
||||
}
|
||||
|
||||
void DecodeSHUFPSMask(unsigned NElts, unsigned Imm,
|
||||
SmallVectorImpl<unsigned> &ShuffleMask) {
|
||||
// Part that reads from dest.
|
||||
for (unsigned i = 0; i != NElts/2; ++i) {
|
||||
ShuffleMask.push_back(Imm % NElts);
|
||||
Imm /= NElts;
|
||||
}
|
||||
// Part that reads from src.
|
||||
for (unsigned i = 0; i != NElts/2; ++i) {
|
||||
ShuffleMask.push_back(Imm % NElts + NElts);
|
||||
Imm /= NElts;
|
||||
void DecodeSHUFPMask(EVT VT, unsigned Imm,
|
||||
SmallVectorImpl<unsigned> &ShuffleMask) {
|
||||
unsigned NumElts = VT.getVectorNumElements();
|
||||
|
||||
unsigned NumLanes = VT.getSizeInBits() / 128;
|
||||
unsigned NumLaneElts = NumElts / NumLanes;
|
||||
|
||||
int NewImm = Imm;
|
||||
for (unsigned l = 0; l < NumLanes; ++l) {
|
||||
unsigned LaneStart = l * NumLaneElts;
|
||||
// Part that reads from dest.
|
||||
for (unsigned i = 0; i != NumLaneElts/2; ++i) {
|
||||
ShuffleMask.push_back(NewImm % NumLaneElts + LaneStart);
|
||||
NewImm /= NumLaneElts;
|
||||
}
|
||||
// Part that reads from src.
|
||||
for (unsigned i = 0; i != NumLaneElts/2; ++i) {
|
||||
ShuffleMask.push_back(NewImm % NumLaneElts + NumElts + LaneStart);
|
||||
NewImm /= NumLaneElts;
|
||||
}
|
||||
if (NumLaneElts == 4) NewImm = Imm; // reload imm
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -64,8 +64,8 @@ void DecodePUNPCKLMask(EVT VT,
|
||||
void DecodePUNPCKHMask(unsigned NElts,
|
||||
SmallVectorImpl<unsigned> &ShuffleMask);
|
||||
|
||||
void DecodeSHUFPSMask(unsigned NElts, unsigned Imm,
|
||||
SmallVectorImpl<unsigned> &ShuffleMask);
|
||||
void DecodeSHUFPMask(EVT VT, unsigned Imm,
|
||||
SmallVectorImpl<unsigned> &ShuffleMask);
|
||||
|
||||
/// DecodeUNPCKHPMask - This decodes the shuffle masks for unpckhps/unpckhpd
|
||||
/// etc. VT indicates the type of the vector allowing it to handle different
|
||||
|
@ -4567,9 +4567,8 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
|
||||
case X86ISD::SHUFPS:
|
||||
case X86ISD::SHUFPD:
|
||||
ImmN = N->getOperand(N->getNumOperands()-1);
|
||||
DecodeSHUFPSMask(NumElems,
|
||||
cast<ConstantSDNode>(ImmN)->getZExtValue(),
|
||||
ShuffleMask);
|
||||
DecodeSHUFPMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(),
|
||||
ShuffleMask);
|
||||
break;
|
||||
case X86ISD::PUNPCKH:
|
||||
DecodePUNPCKHMask(NumElems, ShuffleMask);
|
||||
|
@ -7332,6 +7332,15 @@ def : Pat<(v8i32 (X86VPermilps VR256:$src1, (i8 imm:$imm))),
|
||||
(VPERMILPSYri VR256:$src1, imm:$imm)>;
|
||||
def : Pat<(v4i64 (X86VPermilpd VR256:$src1, (i8 imm:$imm))),
|
||||
(VPERMILPDYri VR256:$src1, imm:$imm)>;
|
||||
def : Pat<(v8f32 (X86VPermilps (memopv8f32 addr:$src1), (i8 imm:$imm))),
|
||||
(VPERMILPSYmi addr:$src1, imm:$imm)>;
|
||||
def : Pat<(v4f64 (X86VPermilpd (memopv4f64 addr:$src1), (i8 imm:$imm))),
|
||||
(VPERMILPDYmi addr:$src1, imm:$imm)>;
|
||||
def : Pat<(v8i32 (X86VPermilps (bc_v8i32 (memopv4i64 addr:$src1)),
|
||||
(i8 imm:$imm))),
|
||||
(VPERMILPSYmi addr:$src1, imm:$imm)>;
|
||||
def : Pat<(v4i64 (X86VPermilpd (memopv4i64 addr:$src1), (i8 imm:$imm))),
|
||||
(VPERMILPDYmi addr:$src1, imm:$imm)>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// VPERM2F128 - Permute Floating-Point Values in 128-bit chunks
|
||||
|
@ -28,6 +28,14 @@ entry:
|
||||
ret <4 x i64> %shuffle
|
||||
}
|
||||
|
||||
; CHECK: vpermilpd
|
||||
define <4 x i64> @funcQ(<4 x i64>* %a) nounwind uwtable readnone ssp {
|
||||
entry:
|
||||
%a2 = load <4 x i64>* %a
|
||||
%shuffle = shufflevector <4 x i64> %a2, <4 x i64> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 3>
|
||||
ret <4 x i64> %shuffle
|
||||
}
|
||||
|
||||
; vpermil should match masks like this: <u,3,1,2,4,u,5,6>. Check that the
|
||||
; target specific mask was correctly generated.
|
||||
; CHECK: vpermilps $-100
|
||||
|
Loading…
Reference in New Issue
Block a user