diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 0efeef20f2b..99f557417b7 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -2679,85 +2679,6 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { return Result; } - - // FIXME: Custom handling because TableGen doesn't support multiple implicit - // defs in an instruction pattern - case X86ISD::PCMPESTRI: { - SDValue N0 = Node->getOperand(0); - SDValue N1 = Node->getOperand(1); - SDValue N2 = Node->getOperand(2); - SDValue N3 = Node->getOperand(3); - SDValue N4 = Node->getOperand(4); - - // Make sure last argument is a constant - ConstantSDNode *Cst = dyn_cast(N4); - if (!Cst) - break; - - uint64_t Imm = Cst->getZExtValue(); - - SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, - X86::EAX, N1, SDValue()).getValue(1); - InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EDX, - N3, InFlag).getValue(1); - - SDValue Ops[] = { N0, N2, getI8Imm(Imm), InFlag }; - unsigned Opc = Subtarget->hasAVX() ? X86::VPCMPESTRIrr : - X86::PCMPESTRIrr; - InFlag = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Ops, - array_lengthof(Ops)), 0); - - if (!SDValue(Node, 0).use_empty()) { - SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - X86::ECX, NVT, InFlag); - InFlag = Result.getValue(2); - ReplaceUses(SDValue(Node, 0), Result); - } - if (!SDValue(Node, 1).use_empty()) { - SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - X86::EFLAGS, NVT, InFlag); - InFlag = Result.getValue(2); - ReplaceUses(SDValue(Node, 1), Result); - } - - return NULL; - } - - // FIXME: Custom handling because TableGen doesn't support multiple implicit - // defs in an instruction pattern - case X86ISD::PCMPISTRI: { - SDValue N0 = Node->getOperand(0); - SDValue N1 = Node->getOperand(1); - SDValue N2 = Node->getOperand(2); - - // Make sure last argument is a constant - ConstantSDNode *Cst = dyn_cast(N2); - if (!Cst) - break; - - uint64_t Imm = Cst->getZExtValue(); - - SDValue Ops[] = { N0, N1, getI8Imm(Imm) }; - unsigned Opc = Subtarget->hasAVX() ? X86::VPCMPISTRIrr : - X86::PCMPISTRIrr; - SDValue InFlag = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Ops, - array_lengthof(Ops)), 0); - - if (!SDValue(Node, 0).use_empty()) { - SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - X86::ECX, NVT, InFlag); - InFlag = Result.getValue(2); - ReplaceUses(SDValue(Node, 0), Result); - } - if (!SDValue(Node, 1).use_empty()) { - SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - X86::EFLAGS, NVT, InFlag); - InFlag = Result.getValue(2); - ReplaceUses(SDValue(Node, 1), Result); - } - - return NULL; - } } SDNode *ResNode = SelectCode(Node); diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 1a440e50f27..065d67c5ced 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -12045,6 +12045,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::FNMSUB: return "X86ISD::FNMSUB"; case X86ISD::FMADDSUB: return "X86ISD::FMADDSUB"; case X86ISD::FMSUBADD: return "X86ISD::FMSUBADD"; + case X86ISD::PCMPESTRI: return "X86ISD::PCMPESTRI"; + case X86ISD::PCMPISTRI: return "X86ISD::PCMPISTRI"; } } @@ -12839,8 +12841,8 @@ X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI, // or XMM0_V32I8 in AVX all of this code can be replaced with that // in the .td file. MachineBasicBlock * -X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB, - unsigned numArgs, bool memArg) const { +X86TargetLowering::EmitPCMPSTRM(MachineInstr *MI, MachineBasicBlock *BB, + bool Implicit, bool MemArg) const { assert(Subtarget->hasSSE42() && "Target must have SSE4.2 or AVX features enabled"); @@ -12848,23 +12850,30 @@ X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB, const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); unsigned Opc; if (!Subtarget->hasAVX()) { - if (memArg) - Opc = numArgs == 3 ? X86::PCMPISTRM128rm : X86::PCMPESTRM128rm; + if (MemArg) + Opc = Implicit ? X86::PCMPISTRM128rm : X86::PCMPESTRM128rm; else - Opc = numArgs == 3 ? X86::PCMPISTRM128rr : X86::PCMPESTRM128rr; + Opc = Implicit ? X86::PCMPISTRM128rr : X86::PCMPESTRM128rr; } else { - if (memArg) - Opc = numArgs == 3 ? X86::VPCMPISTRM128rm : X86::VPCMPESTRM128rm; + if (MemArg) + Opc = Implicit ? X86::VPCMPISTRM128rm : X86::VPCMPESTRM128rm; else - Opc = numArgs == 3 ? X86::VPCMPISTRM128rr : X86::VPCMPESTRM128rr; + Opc = Implicit ? X86::VPCMPISTRM128rr : X86::VPCMPESTRM128rr; } + unsigned NumArgs = Implicit ? 3 : 5; + if (MemArg) + NumArgs += X86::AddrNumOperands; + MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(Opc)); - for (unsigned i = 0; i < numArgs; ++i) { + for (unsigned i = 0; i < NumArgs; ++i) { MachineOperand &Op = MI->getOperand(i+1); if (!(Op.isReg() && Op.isImplicit())) MIB.addOperand(Op); } + if (MemArg) + MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), MI->getOperand(0).getReg()) .addReg(X86::XMM0); @@ -12873,6 +12882,50 @@ X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB, return BB; } +// FIXME: Custom handling because TableGen doesn't support multiple implicit +// defs in an instruction pattern +MachineBasicBlock * +X86TargetLowering::EmitPCMPSTRI(MachineInstr *MI, MachineBasicBlock *BB, + bool Implicit, bool MemArg) const { + assert(Subtarget->hasSSE42() && + "Target must have SSE4.2 or AVX features enabled"); + + DebugLoc dl = MI->getDebugLoc(); + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + unsigned Opc; + if (!Subtarget->hasAVX()) { + if (MemArg) + Opc = Implicit ? X86::PCMPISTRIrm : X86::PCMPESTRIrm; + else + Opc = Implicit ? X86::PCMPISTRIrr : X86::PCMPESTRIrr; + } else { + if (MemArg) + Opc = Implicit ? X86::VPCMPISTRIrm : X86::VPCMPESTRIrm; + else + Opc = Implicit ? X86::VPCMPISTRIrr : X86::VPCMPESTRIrr; + } + + unsigned NumArgs = Implicit ? 3 : 5; + if (MemArg) + NumArgs += X86::AddrNumOperands; + + MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(Opc)); + for (unsigned i = 0; i < NumArgs; ++i) { + MachineOperand &Op = MI->getOperand(i+1); + if (!(Op.isReg() && Op.isImplicit())) + MIB.addOperand(Op); + } + if (MemArg) + MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + + BuildMI(*BB, MI, dl, + TII->get(TargetOpcode::COPY), MI->getOperand(0).getReg()) + .addReg(X86::ECX); + + MI->eraseFromParent(); + return BB; +} + MachineBasicBlock * X86TargetLowering::EmitMonitor(MachineInstr *MI, MachineBasicBlock *BB) const { DebugLoc dl = MI->getDebugLoc(); @@ -13891,24 +13944,51 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case X86::VPCMPESTRM128REG: case X86::PCMPESTRM128MEM: case X86::VPCMPESTRM128MEM: { - unsigned NumArgs; - bool MemArg; + bool Implicit, MemArg; switch (MI->getOpcode()) { default: llvm_unreachable("illegal opcode!"); case X86::PCMPISTRM128REG: case X86::VPCMPISTRM128REG: - NumArgs = 3; MemArg = false; break; + Implicit = true; MemArg = false; break; case X86::PCMPISTRM128MEM: case X86::VPCMPISTRM128MEM: - NumArgs = 3; MemArg = true; break; + Implicit = true; MemArg = true; break; case X86::PCMPESTRM128REG: case X86::VPCMPESTRM128REG: - NumArgs = 5; MemArg = false; break; + Implicit = false; MemArg = false; break; case X86::PCMPESTRM128MEM: case X86::VPCMPESTRM128MEM: - NumArgs = 5; MemArg = true; break; + Implicit = false; MemArg = true; break; } - return EmitPCMP(MI, BB, NumArgs, MemArg); + return EmitPCMPSTRM(MI, BB, Implicit, MemArg); + } + + // String/text processing lowering. + case X86::PCMPISTRIREG: + case X86::VPCMPISTRIREG: + case X86::PCMPISTRIMEM: + case X86::VPCMPISTRIMEM: + case X86::PCMPESTRIREG: + case X86::VPCMPESTRIREG: + case X86::PCMPESTRIMEM: + case X86::VPCMPESTRIMEM: { + bool Implicit, MemArg; + switch (MI->getOpcode()) { + default: llvm_unreachable("illegal opcode!"); + case X86::PCMPISTRIREG: + case X86::VPCMPISTRIREG: + Implicit = true; MemArg = false; break; + case X86::PCMPISTRIMEM: + case X86::VPCMPISTRIMEM: + Implicit = true; MemArg = true; break; + case X86::PCMPESTRIREG: + case X86::VPCMPESTRIREG: + Implicit = false; MemArg = false; break; + case X86::PCMPESTRIMEM: + case X86::VPCMPESTRIMEM: + Implicit = false; MemArg = true; break; + } + return EmitPCMPSTRI(MI, BB, Implicit, MemArg); } // Thread synchronization. diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 7764330cc82..a2a9db08ca2 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -871,13 +871,15 @@ namespace llvm { const SmallVectorImpl &Outs, LLVMContext &Context) const; - /// Utility function to emit string processing sse4.2 instructions + /// Utility functions to emit string processing sse4.2 instructions /// that return in xmm0. /// This takes the instruction to expand, the associated machine basic /// block, the number of args, and whether or not the second arg is /// in memory or not. - MachineBasicBlock *EmitPCMP(MachineInstr *BInstr, MachineBasicBlock *BB, - unsigned argNum, bool inMem) const; + MachineBasicBlock *EmitPCMPSTRM(MachineInstr *MI, MachineBasicBlock *BB, + bool Implicit, bool MemArg) const; + MachineBasicBlock *EmitPCMPSTRI(MachineInstr *MI, MachineBasicBlock *BB, + bool Implicit, bool MemArg) const; /// Utility functions to emit monitor and mwait instructions. These /// need to make sure that the arguments to the intrinsic are in the diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 28dfbe7a1fa..6f48d7ed7fe 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -7002,8 +7002,8 @@ multiclass pseudo_pcmpistrm { imm:$src3))]>; def MEM : PseudoI<(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2, i8imm:$src3), - [(set VR128:$dst, (int_x86_sse42_pcmpistrm128 - VR128:$src1, (load addr:$src2), imm:$src3))]>; + [(set VR128:$dst, (int_x86_sse42_pcmpistrm128 VR128:$src1, + (bc_v16i8 (memopv2i64 addr:$src2)), imm:$src3))]>; } let Defs = [EFLAGS], usesCustomInserter = 1 in { @@ -7011,24 +7011,22 @@ let Defs = [EFLAGS], usesCustomInserter = 1 in { defm PCMPISTRM128 : pseudo_pcmpistrm<"#PCMPISTRM128">, Requires<[UseSSE42]>; } -let Defs = [XMM0, EFLAGS], neverHasSideEffects = 1, Predicates = [HasAVX] in { - def VPCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs), - (ins VR128:$src1, VR128:$src2, i8imm:$src3), - "vpcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize, VEX; +multiclass pcmpistrm_SS42AI { + def rr : SS42AI<0x62, MRMSrcReg, (outs), + (ins VR128:$src1, VR128:$src2, i8imm:$src3), + !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), + []>, OpSize; let mayLoad = 1 in - def VPCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs), - (ins VR128:$src1, i128mem:$src2, i8imm:$src3), - "vpcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize, VEX; + def rm :SS42AI<0x62, MRMSrcMem, (outs), + (ins VR128:$src1, i128mem:$src2, i8imm:$src3), + !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), + []>, OpSize; } let Defs = [XMM0, EFLAGS], neverHasSideEffects = 1 in { - def PCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs), - (ins VR128:$src1, VR128:$src2, i8imm:$src3), - "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize; - let mayLoad = 1 in - def PCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs), - (ins VR128:$src1, i128mem:$src2, i8imm:$src3), - "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize; + let Predicates = [HasAVX] in + defm VPCMPISTRM128 : pcmpistrm_SS42AI<"vpcmpistrm">, VEX; + defm PCMPISTRM128 : pcmpistrm_SS42AI<"pcmpistrm"> ; } // Packed Compare Explicit Length Strings, Return Mask @@ -7039,8 +7037,8 @@ multiclass pseudo_pcmpestrm { VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>; def MEM : PseudoI<(outs VR128:$dst), (ins VR128:$src1, i128mem:$src3, i8imm:$src5), - [(set VR128:$dst, (int_x86_sse42_pcmpestrm128 - VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5))]>; + [(set VR128:$dst, (int_x86_sse42_pcmpestrm128 VR128:$src1, EAX, + (bc_v16i8 (memopv2i64 addr:$src3)), EDX, imm:$src5))]>; } let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in { @@ -7048,64 +7046,94 @@ let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in { defm PCMPESTRM128 : pseudo_pcmpestrm<"#PCMPESTRM128">, Requires<[UseSSE42]>; } -let Predicates = [HasAVX], - Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in { - def VPCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs), - (ins VR128:$src1, VR128:$src3, i8imm:$src5), - "vpcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize, VEX; +multiclass SS42AI_pcmpestrm { + def rr : SS42AI<0x60, MRMSrcReg, (outs), + (ins VR128:$src1, VR128:$src3, i8imm:$src5), + !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), + []>, OpSize; let mayLoad = 1 in - def VPCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs), - (ins VR128:$src1, i128mem:$src3, i8imm:$src5), - "vpcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize, VEX; + def rm : SS42AI<0x60, MRMSrcMem, (outs), + (ins VR128:$src1, i128mem:$src3, i8imm:$src5), + !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), + []>, OpSize; } let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in { - def PCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs), - (ins VR128:$src1, VR128:$src3, i8imm:$src5), - "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize; - let mayLoad = 1 in - def PCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs), - (ins VR128:$src1, i128mem:$src3, i8imm:$src5), - "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize; + let Predicates = [HasAVX] in + defm VPCMPESTRM128 : SS42AI_pcmpestrm<"vpcmpestrm">, VEX; + defm PCMPESTRM128 : SS42AI_pcmpestrm<"pcmpestrm">; } // Packed Compare Implicit Length Strings, Return Index -let Defs = [ECX, EFLAGS], neverHasSideEffects = 1 in { - multiclass SS42AI_pcmpistri { - def rr : SS42AI<0x63, MRMSrcReg, (outs), - (ins VR128:$src1, VR128:$src2, i8imm:$src3), - !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), - []>, OpSize; - let mayLoad = 1 in - def rm : SS42AI<0x63, MRMSrcMem, (outs), - (ins VR128:$src1, i128mem:$src2, i8imm:$src3), - !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), - []>, OpSize; - } +multiclass pseudo_pcmpistri { + def REG : PseudoI<(outs GR32:$dst), + (ins VR128:$src1, VR128:$src2, i8imm:$src3), + [(set GR32:$dst, EFLAGS, + (X86pcmpistri VR128:$src1, VR128:$src2, imm:$src3))]>; + def MEM : PseudoI<(outs GR32:$dst), + (ins VR128:$src1, i128mem:$src2, i8imm:$src3), + [(set GR32:$dst, EFLAGS, (X86pcmpistri VR128:$src1, + (bc_v16i8 (memopv2i64 addr:$src2)), imm:$src3))]>; } -let Predicates = [HasAVX] in -defm VPCMPISTRI : SS42AI_pcmpistri<"vpcmpistri">, VEX; -defm PCMPISTRI : SS42AI_pcmpistri<"pcmpistri">; +let Defs = [EFLAGS], usesCustomInserter = 1 in { + defm VPCMPISTRI : pseudo_pcmpistri<"#VPCMPISTRI">, Requires<[HasAVX]>; + defm PCMPISTRI : pseudo_pcmpistri<"#PCMPISTRI">, Requires<[UseSSE42]>; +} + +multiclass SS42AI_pcmpistri { + def rr : SS42AI<0x63, MRMSrcReg, (outs), + (ins VR128:$src1, VR128:$src2, i8imm:$src3), + !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), + []>, OpSize; + let mayLoad = 1 in + def rm : SS42AI<0x63, MRMSrcMem, (outs), + (ins VR128:$src1, i128mem:$src2, i8imm:$src3), + !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), + []>, OpSize; +} + +let Defs = [ECX, EFLAGS], neverHasSideEffects = 1 in { + let Predicates = [HasAVX] in + defm VPCMPISTRI : SS42AI_pcmpistri<"vpcmpistri">, VEX; + defm PCMPISTRI : SS42AI_pcmpistri<"pcmpistri">; +} // Packed Compare Explicit Length Strings, Return Index -let Defs = [ECX, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in { - multiclass SS42AI_pcmpestri { - def rr : SS42AI<0x61, MRMSrcReg, (outs), - (ins VR128:$src1, VR128:$src3, i8imm:$src5), - !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), - []>, OpSize; - let mayLoad = 1 in - def rm : SS42AI<0x61, MRMSrcMem, (outs), - (ins VR128:$src1, i128mem:$src3, i8imm:$src5), - !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), - []>, OpSize; - } +multiclass pseudo_pcmpestri { + def REG : PseudoI<(outs GR32:$dst), + (ins VR128:$src1, VR128:$src3, i8imm:$src5), + [(set GR32:$dst, EFLAGS, + (X86pcmpestri VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>; + def MEM : PseudoI<(outs GR32:$dst), + (ins VR128:$src1, i128mem:$src3, i8imm:$src5), + [(set GR32:$dst, EFLAGS, + (X86pcmpestri VR128:$src1, EAX, (bc_v16i8 (memopv2i64 addr:$src3)), EDX, + imm:$src5))]>; } -let Predicates = [HasAVX] in -defm VPCMPESTRI : SS42AI_pcmpestri<"vpcmpestri">, VEX; -defm PCMPESTRI : SS42AI_pcmpestri<"pcmpestri">; +let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in { + defm VPCMPESTRI : pseudo_pcmpestri<"#VPCMPESTRI">, Requires<[HasAVX]>; + defm PCMPESTRI : pseudo_pcmpestri<"#PCMPESTRI">, Requires<[UseSSE42]>; +} + +multiclass SS42AI_pcmpestri { + def rr : SS42AI<0x61, MRMSrcReg, (outs), + (ins VR128:$src1, VR128:$src3, i8imm:$src5), + !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), + []>, OpSize; + let mayLoad = 1 in + def rm : SS42AI<0x61, MRMSrcMem, (outs), + (ins VR128:$src1, i128mem:$src3, i8imm:$src5), + !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), + []>, OpSize; +} + +let Defs = [ECX, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in { + let Predicates = [HasAVX] in + defm VPCMPESTRI : SS42AI_pcmpestri<"vpcmpestri">, VEX; + defm PCMPESTRI : SS42AI_pcmpestri<"pcmpestri">; +} //===----------------------------------------------------------------------===// // SSE4.2 - CRC Instructions diff --git a/test/CodeGen/X86/avx-intrinsics-x86.ll b/test/CodeGen/X86/avx-intrinsics-x86.ll index c44beb4bc2b..88ecd5a5d34 100644 --- a/test/CodeGen/X86/avx-intrinsics-x86.ll +++ b/test/CodeGen/X86/avx-intrinsics-x86.ll @@ -1140,9 +1140,9 @@ declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) noun define i32 @test_x86_sse42_pcmpestri128(<16 x i8> %a0, <16 x i8> %a2) { - ; CHECK: movl - ; CHECK: movl - ; CHECK: vpcmpestri + ; CHECK: movl $7 + ; CHECK: movl $7 + ; CHECK: vpcmpestri $7 ; CHECK: movl %res = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; [#uses=1] ret i32 %res @@ -1150,6 +1150,18 @@ define i32 @test_x86_sse42_pcmpestri128(<16 x i8> %a0, <16 x i8> %a2) { declare i32 @llvm.x86.sse42.pcmpestri128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone +define i32 @test_x86_sse42_pcmpestri128_load(<16 x i8>* %a0, <16 x i8>* %a2) { + ; CHECK: movl $7 + ; CHECK: movl $7 + ; CHECK: vpcmpestri $7, ( + ; CHECK: movl + %1 = load <16 x i8>* %a0 + %2 = load <16 x i8>* %a2 + %res = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %1, i32 7, <16 x i8> %2, i32 7, i8 7) ; [#uses=1] + ret i32 %res +} + + define i32 @test_x86_sse42_pcmpestria128(<16 x i8> %a0, <16 x i8> %a2) { ; CHECK: movl ; CHECK: movl @@ -1216,8 +1228,19 @@ define <16 x i8> @test_x86_sse42_pcmpestrm128(<16 x i8> %a0, <16 x i8> %a2) { declare <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone +define <16 x i8> @test_x86_sse42_pcmpestrm128_load(<16 x i8> %a0, <16 x i8>* %a2) { + ; CHECK: movl $7 + ; CHECK: movl $7 + ; CHECK: vpcmpestrm $7, + ; CHECK-NOT: vmov + %1 = load <16 x i8>* %a2 + %res = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %1, i32 7, i8 7) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} + + define i32 @test_x86_sse42_pcmpistri128(<16 x i8> %a0, <16 x i8> %a1) { - ; CHECK: vpcmpistri + ; CHECK: vpcmpistri $7 ; CHECK: movl %res = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; [#uses=1] ret i32 %res @@ -1225,6 +1248,16 @@ define i32 @test_x86_sse42_pcmpistri128(<16 x i8> %a0, <16 x i8> %a1) { declare i32 @llvm.x86.sse42.pcmpistri128(<16 x i8>, <16 x i8>, i8) nounwind readnone +define i32 @test_x86_sse42_pcmpistri128_load(<16 x i8>* %a0, <16 x i8>* %a1) { + ; CHECK: vpcmpistri $7, ( + ; CHECK: movl + %1 = load <16 x i8>* %a0 + %2 = load <16 x i8>* %a1 + %res = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %1, <16 x i8> %2, i8 7) ; [#uses=1] + ret i32 %res +} + + define i32 @test_x86_sse42_pcmpistria128(<16 x i8> %a0, <16 x i8> %a1) { ; CHECK: vpcmpistri ; CHECK: seta @@ -1271,7 +1304,7 @@ declare i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8>, <16 x i8>, i8) nounwind rea define <16 x i8> @test_x86_sse42_pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1) { - ; CHECK: vpcmpistrm + ; CHECK: vpcmpistrm $7 ; CHECK-NOT: vmov %res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<16 x i8>> [#uses=1] ret <16 x i8> %res @@ -1279,6 +1312,15 @@ define <16 x i8> @test_x86_sse42_pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1) { declare <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8>, <16 x i8>, i8) nounwind readnone +define <16 x i8> @test_x86_sse42_pcmpistrm128_load(<16 x i8> %a0, <16 x i8>* %a1) { + ; CHECK: vpcmpistrm $7, ( + ; CHECK-NOT: vmov + %1 = load <16 x i8>* %a1 + %res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %1, i8 7) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} + + define <4 x float> @test_x86_sse_add_ss(<4 x float> %a0, <4 x float> %a1) { ; CHECK: vaddss %res = call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]