Fix issues in shuffle decoding around VPERM* instructions. Fix shuffle decoding for VSHUFPS/D for 256-bit types. Add pattern matching for memory forms of VPERMILPS/VPERMILPD.

llvm-svn: 145390
This commit is contained in:
Craig Topper 2011-11-29 07:49:05 +00:00
parent b92661d00a
commit 4550fc2649
6 changed files with 90 additions and 30 deletions

View File

@ -163,14 +163,22 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::SHUFPDrmi:
DecodeSHUFPSMask(2, MI->getOperand(3).getImm(), ShuffleMask);
DecodeSHUFPMask(MVT::v2f64, MI->getOperand(3).getImm(), ShuffleMask);
Src1Name = getRegName(MI->getOperand(0).getReg());
break;
case X86::VSHUFPDrri:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::VSHUFPDrmi:
DecodeSHUFPSMask(2, MI->getOperand(3).getImm(), ShuffleMask);
DecodeSHUFPMask(MVT::v2f64, MI->getOperand(3).getImm(), ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
break;
case X86::VSHUFPDYrri:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::VSHUFPDYrmi:
DecodeSHUFPMask(MVT::v4f64, MI->getOperand(3).getImm(), ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
break;
@ -179,14 +187,22 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::SHUFPSrmi:
DecodeSHUFPSMask(4, MI->getOperand(3).getImm(), ShuffleMask);
DecodeSHUFPMask(MVT::v4f32, MI->getOperand(3).getImm(), ShuffleMask);
Src1Name = getRegName(MI->getOperand(0).getReg());
break;
case X86::VSHUFPSrri:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::VSHUFPSrmi:
DecodeSHUFPSMask(4, MI->getOperand(3).getImm(), ShuffleMask);
DecodeSHUFPMask(MVT::v4f32, MI->getOperand(3).getImm(), ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
break;
case X86::VSHUFPSYrri:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::VSHUFPSYrmi:
DecodeSHUFPMask(MVT::v8f32, MI->getOperand(3).getImm(), ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
break;
@ -284,29 +300,47 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
DestName = getRegName(MI->getOperand(0).getReg());
break;
case X86::VPERMILPSri:
DecodeVPERMILPSMask(4, MI->getOperand(2).getImm(),
Src1Name = getRegName(MI->getOperand(1).getReg());
// FALL THROUGH.
case X86::VPERMILPSmi:
DecodeVPERMILPSMask(4, MI->getOperand(MI->getNumOperands()-1).getImm(),
ShuffleMask);
Src1Name = getRegName(MI->getOperand(0).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
break;
case X86::VPERMILPSYri:
DecodeVPERMILPSMask(8, MI->getOperand(2).getImm(),
Src1Name = getRegName(MI->getOperand(1).getReg());
// FALL THROUGH.
case X86::VPERMILPSYmi:
DecodeVPERMILPSMask(8, MI->getOperand(MI->getNumOperands()-1).getImm(),
ShuffleMask);
Src1Name = getRegName(MI->getOperand(0).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
break;
case X86::VPERMILPDri:
DecodeVPERMILPDMask(2, MI->getOperand(2).getImm(),
Src1Name = getRegName(MI->getOperand(1).getReg());
// FALL THROUGH.
case X86::VPERMILPDmi:
DecodeVPERMILPDMask(2, MI->getOperand(MI->getNumOperands()-1).getImm(),
ShuffleMask);
Src1Name = getRegName(MI->getOperand(0).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
break;
case X86::VPERMILPDYri:
DecodeVPERMILPDMask(4, MI->getOperand(2).getImm(),
Src1Name = getRegName(MI->getOperand(1).getReg());
// FALL THROUGH.
case X86::VPERMILPDYmi:
DecodeVPERMILPDMask(4, MI->getOperand(MI->getNumOperands()-1).getImm(),
ShuffleMask);
Src1Name = getRegName(MI->getOperand(0).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
break;
case X86::VPERM2F128rr:
DecodeVPERM2F128Mask(MI->getOperand(3).getImm(), ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
case X86::VPERM2I128rr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::VPERM2F128rm:
case X86::VPERM2I128rm:
DecodeVPERM2F128Mask(MI->getOperand(MI->getNumOperands()-1).getImm(),
ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
break;
}

View File

@ -128,17 +128,27 @@ void DecodePUNPCKHMask(unsigned NElts,
}
}
void DecodeSHUFPSMask(unsigned NElts, unsigned Imm,
SmallVectorImpl<unsigned> &ShuffleMask) {
// Part that reads from dest.
for (unsigned i = 0; i != NElts/2; ++i) {
ShuffleMask.push_back(Imm % NElts);
Imm /= NElts;
}
// Part that reads from src.
for (unsigned i = 0; i != NElts/2; ++i) {
ShuffleMask.push_back(Imm % NElts + NElts);
Imm /= NElts;
void DecodeSHUFPMask(EVT VT, unsigned Imm,
SmallVectorImpl<unsigned> &ShuffleMask) {
unsigned NumElts = VT.getVectorNumElements();
unsigned NumLanes = VT.getSizeInBits() / 128;
unsigned NumLaneElts = NumElts / NumLanes;
int NewImm = Imm;
for (unsigned l = 0; l < NumLanes; ++l) {
unsigned LaneStart = l * NumLaneElts;
// Part that reads from dest.
for (unsigned i = 0; i != NumLaneElts/2; ++i) {
ShuffleMask.push_back(NewImm % NumLaneElts + LaneStart);
NewImm /= NumLaneElts;
}
// Part that reads from src.
for (unsigned i = 0; i != NumLaneElts/2; ++i) {
ShuffleMask.push_back(NewImm % NumLaneElts + NumElts + LaneStart);
NewImm /= NumLaneElts;
}
if (NumLaneElts == 4) NewImm = Imm; // reload imm
}
}

View File

@ -64,8 +64,8 @@ void DecodePUNPCKLMask(EVT VT,
void DecodePUNPCKHMask(unsigned NElts,
SmallVectorImpl<unsigned> &ShuffleMask);
void DecodeSHUFPSMask(unsigned NElts, unsigned Imm,
SmallVectorImpl<unsigned> &ShuffleMask);
void DecodeSHUFPMask(EVT VT, unsigned Imm,
SmallVectorImpl<unsigned> &ShuffleMask);
/// DecodeUNPCKHPMask - This decodes the shuffle masks for unpckhps/unpckhpd
/// etc. VT indicates the type of the vector allowing it to handle different

View File

@ -4567,9 +4567,8 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
case X86ISD::SHUFPS:
case X86ISD::SHUFPD:
ImmN = N->getOperand(N->getNumOperands()-1);
DecodeSHUFPSMask(NumElems,
cast<ConstantSDNode>(ImmN)->getZExtValue(),
ShuffleMask);
DecodeSHUFPMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(),
ShuffleMask);
break;
case X86ISD::PUNPCKH:
DecodePUNPCKHMask(NumElems, ShuffleMask);

View File

@ -7332,6 +7332,15 @@ def : Pat<(v8i32 (X86VPermilps VR256:$src1, (i8 imm:$imm))),
(VPERMILPSYri VR256:$src1, imm:$imm)>;
def : Pat<(v4i64 (X86VPermilpd VR256:$src1, (i8 imm:$imm))),
(VPERMILPDYri VR256:$src1, imm:$imm)>;
def : Pat<(v8f32 (X86VPermilps (memopv8f32 addr:$src1), (i8 imm:$imm))),
(VPERMILPSYmi addr:$src1, imm:$imm)>;
def : Pat<(v4f64 (X86VPermilpd (memopv4f64 addr:$src1), (i8 imm:$imm))),
(VPERMILPDYmi addr:$src1, imm:$imm)>;
def : Pat<(v8i32 (X86VPermilps (bc_v8i32 (memopv4i64 addr:$src1)),
(i8 imm:$imm))),
(VPERMILPSYmi addr:$src1, imm:$imm)>;
def : Pat<(v4i64 (X86VPermilpd (memopv4i64 addr:$src1), (i8 imm:$imm))),
(VPERMILPDYmi addr:$src1, imm:$imm)>;
//===----------------------------------------------------------------------===//
// VPERM2F128 - Permute Floating-Point Values in 128-bit chunks

View File

@ -28,6 +28,14 @@ entry:
ret <4 x i64> %shuffle
}
; CHECK: vpermilpd
define <4 x i64> @funcQ(<4 x i64>* %a) nounwind uwtable readnone ssp {
entry:
%a2 = load <4 x i64>* %a
%shuffle = shufflevector <4 x i64> %a2, <4 x i64> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 3>
ret <4 x i64> %shuffle
}
; vpermil should match masks like this: <u,3,1,2,4,u,5,6>. Check that the
; target specific mask was correctly generated.
; CHECK: vpermilps $-100