mirror of
https://github.com/RPCS3/llvm.git
synced 2025-02-24 04:32:09 +00:00
Implement sse4.2 string/text processing instructions:
Add patterns and instruction encoding information. Add custom lowering to deal with hardwired return register of uncertain type (xmm0). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@79377 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
4b9e1d291c
commit
b120ab4057
@ -7595,6 +7595,43 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr,
|
|||||||
return nextMBB;
|
return nextMBB;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
MachineBasicBlock *
|
||||||
|
X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB,
|
||||||
|
unsigned numArgs, bool memArg) const {
|
||||||
|
|
||||||
|
MachineFunction *F = BB->getParent();
|
||||||
|
DebugLoc dl = MI->getDebugLoc();
|
||||||
|
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
|
||||||
|
|
||||||
|
unsigned Opc;
|
||||||
|
|
||||||
|
if (memArg) {
|
||||||
|
Opc = numArgs == 3 ?
|
||||||
|
X86::PCMPISTRM128rm :
|
||||||
|
X86::PCMPESTRM128rm;
|
||||||
|
} else {
|
||||||
|
Opc = numArgs == 3 ?
|
||||||
|
X86::PCMPISTRM128rr :
|
||||||
|
X86::PCMPESTRM128rr;
|
||||||
|
}
|
||||||
|
|
||||||
|
MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(Opc));
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < numArgs; ++i) {
|
||||||
|
MachineOperand &Op = MI->getOperand(i+1);
|
||||||
|
|
||||||
|
if (!(Op.isReg() && Op.isImplicit()))
|
||||||
|
MIB.addOperand(Op);
|
||||||
|
}
|
||||||
|
|
||||||
|
BuildMI(BB, dl, TII->get(X86::MOVAPSrr), MI->getOperand(0).getReg())
|
||||||
|
.addReg(X86::XMM0);
|
||||||
|
|
||||||
|
F->DeleteMachineInstr(MI);
|
||||||
|
|
||||||
|
return BB;
|
||||||
|
}
|
||||||
|
|
||||||
MachineBasicBlock *
|
MachineBasicBlock *
|
||||||
X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
|
X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
|
||||||
MachineInstr *MI,
|
MachineInstr *MI,
|
||||||
@ -7804,6 +7841,17 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
|
|||||||
F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
|
F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
|
||||||
return BB;
|
return BB;
|
||||||
}
|
}
|
||||||
|
// String/text processing lowering.
|
||||||
|
case X86::PCMPISTRM128REG:
|
||||||
|
return EmitPCMP(MI, BB, 3, false /* in-mem */);
|
||||||
|
case X86::PCMPISTRM128MEM:
|
||||||
|
return EmitPCMP(MI, BB, 3, true /* in-mem */);
|
||||||
|
case X86::PCMPESTRM128REG:
|
||||||
|
return EmitPCMP(MI, BB, 5, false /* in mem */);
|
||||||
|
case X86::PCMPESTRM128MEM:
|
||||||
|
return EmitPCMP(MI, BB, 5, true /* in mem */);
|
||||||
|
|
||||||
|
// Atomic Lowering.
|
||||||
case X86::ATOMAND32:
|
case X86::ATOMAND32:
|
||||||
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr,
|
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr,
|
||||||
X86::AND32ri, X86::MOV32rm,
|
X86::AND32ri, X86::MOV32rm,
|
||||||
|
@ -693,6 +693,14 @@ namespace llvm {
|
|||||||
const Value *DstSV, uint64_t DstSVOff,
|
const Value *DstSV, uint64_t DstSVOff,
|
||||||
const Value *SrcSV, uint64_t SrcSVOff);
|
const Value *SrcSV, uint64_t SrcSVOff);
|
||||||
|
|
||||||
|
/// Utility function to emit string processing sse4.2 instructions
|
||||||
|
/// that return in xmm0.
|
||||||
|
// This takes the instruction to expand, the associated machine basic
|
||||||
|
// block, the number of args, and whether or not the second arg is
|
||||||
|
// in memory or not.
|
||||||
|
MachineBasicBlock *EmitPCMP(MachineInstr *BInstr, MachineBasicBlock *BB,
|
||||||
|
unsigned argNum, bool inMem) const;
|
||||||
|
|
||||||
/// Utility function to emit atomic bitwise operations (and, or, xor).
|
/// Utility function to emit atomic bitwise operations (and, or, xor).
|
||||||
// It takes the bitwise instruction to expand, the associated machine basic
|
// It takes the bitwise instruction to expand, the associated machine basic
|
||||||
// block, and the associated X86 opcodes for reg/reg and reg/imm.
|
// block, and the associated X86 opcodes for reg/reg and reg/imm.
|
||||||
|
@ -235,6 +235,11 @@ class SS42FI<bits<8> o, Format F, dag outs, dag ins, string asm,
|
|||||||
list<dag> pattern>
|
list<dag> pattern>
|
||||||
: I<o, F, outs, ins, asm, pattern>, TF, Requires<[HasSSE42]>;
|
: I<o, F, outs, ins, asm, pattern>, TF, Requires<[HasSSE42]>;
|
||||||
|
|
||||||
|
// SS42AI = SSE 4.2 instructions with TA prefix
|
||||||
|
class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||||
|
list<dag> pattern>
|
||||||
|
: I<o, F, outs, ins, asm, pattern>, TA, Requires<[HasSSE42]>;
|
||||||
|
|
||||||
// X86-64 Instruction templates...
|
// X86-64 Instruction templates...
|
||||||
//
|
//
|
||||||
|
|
||||||
@ -288,4 +293,3 @@ class MMXID<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> patter
|
|||||||
: Ii8<o, F, outs, ins, asm, pattern>, XD, Requires<[HasMMX]>;
|
: Ii8<o, F, outs, ins, asm, pattern>, XD, Requires<[HasMMX]>;
|
||||||
class MMXIS<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
|
class MMXIS<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
|
||||||
: Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasMMX]>;
|
: Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasMMX]>;
|
||||||
|
|
||||||
|
@ -3657,6 +3657,11 @@ def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
|
|||||||
"movntdqa\t{$src, $dst|$dst, $src}",
|
"movntdqa\t{$src, $dst|$dst, $src}",
|
||||||
[(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>;
|
[(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>;
|
||||||
|
|
||||||
|
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// SSE4.2 Instructions
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
/// SS42I_binop_rm_int - Simple SSE 4.2 binary operator
|
/// SS42I_binop_rm_int - Simple SSE 4.2 binary operator
|
||||||
let Constraints = "$src1 = $dst" in {
|
let Constraints = "$src1 = $dst" in {
|
||||||
multiclass SS42I_binop_rm_int<bits<8> opc, string OpcodeStr,
|
multiclass SS42I_binop_rm_int<bits<8> opc, string OpcodeStr,
|
||||||
@ -3739,3 +3744,115 @@ let Constraints = "$src1 = $dst" in {
|
|||||||
(int_x86_sse42_crc32_64 GR64:$src1, GR64:$src2))]>,
|
(int_x86_sse42_crc32_64 GR64:$src1, GR64:$src2))]>,
|
||||||
OpSize, REX_W;
|
OpSize, REX_W;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// String/text processing instructions.
|
||||||
|
let Defs = [EFLAGS], usesCustomDAGSchedInserter = 1 in {
|
||||||
|
def PCMPISTRM128REG : SS42AI<0, Pseudo, (outs VR128:$dst),
|
||||||
|
(ins VR128:$src1, VR128:$src2, i8imm:$src3),
|
||||||
|
"#PCMPISTRM128rr PSEUDO!",
|
||||||
|
[(set VR128:$dst,
|
||||||
|
(int_x86_sse42_pcmpistrm128 VR128:$src1, VR128:$src2,
|
||||||
|
imm:$src3))]>, OpSize;
|
||||||
|
def PCMPISTRM128MEM : SS42AI<0, Pseudo, (outs VR128:$dst),
|
||||||
|
(ins VR128:$src1, i128mem:$src2, i8imm:$src3),
|
||||||
|
"#PCMPISTRM128rm PSEUDO!",
|
||||||
|
[(set VR128:$dst,
|
||||||
|
(int_x86_sse42_pcmpistrm128 VR128:$src1,
|
||||||
|
(load addr:$src2),
|
||||||
|
imm:$src3))]>, OpSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
let Defs = [XMM0, EFLAGS] in {
|
||||||
|
def PCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs),
|
||||||
|
(ins VR128:$src1, VR128:$src2, i8imm:$src3),
|
||||||
|
"pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}",
|
||||||
|
[]>, OpSize;
|
||||||
|
def PCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs),
|
||||||
|
(ins VR128:$src1, i128mem:$src2, i8imm:$src3),
|
||||||
|
"pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}",
|
||||||
|
[]>, OpSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
let Defs = [EFLAGS], Uses = [EAX, EDX],
|
||||||
|
usesCustomDAGSchedInserter = 1 in {
|
||||||
|
def PCMPESTRM128REG : SS42AI<0, Pseudo, (outs VR128:$dst),
|
||||||
|
(ins VR128:$src1, VR128:$src3, i8imm:$src5),
|
||||||
|
"#PCMPESTRM128rr PSEUDO!",
|
||||||
|
[(set VR128:$dst,
|
||||||
|
(int_x86_sse42_pcmpestrm128 VR128:$src1, EAX,
|
||||||
|
VR128:$src3,
|
||||||
|
EDX, imm:$src5))]>, OpSize;
|
||||||
|
def PCMPESTRM128MEM : SS42AI<0, Pseudo, (outs VR128:$dst),
|
||||||
|
(ins VR128:$src1, i128mem:$src3, i8imm:$src5),
|
||||||
|
"#PCMPESTRM128rm PSEUDO!",
|
||||||
|
[(set VR128:$dst,
|
||||||
|
(int_x86_sse42_pcmpestrm128 VR128:$src1, EAX,
|
||||||
|
(load addr:$src3),
|
||||||
|
EDX, imm:$src5))]>, OpSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX] in {
|
||||||
|
def PCMPESTRM128rr : SS42AI<0x62, MRMSrcReg, (outs),
|
||||||
|
(ins VR128:$src1, VR128:$src3, i8imm:$src5),
|
||||||
|
"pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}",
|
||||||
|
[]>, OpSize;
|
||||||
|
def PCMPESTRM128rm : SS42AI<0x62, MRMSrcMem, (outs),
|
||||||
|
(ins VR128:$src1, i128mem:$src3, i8imm:$src5),
|
||||||
|
"pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}",
|
||||||
|
[]>, OpSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
let Defs = [ECX, EFLAGS] in {
|
||||||
|
multiclass SS42AI_pcmpistri<Intrinsic IntId128> {
|
||||||
|
def rr : SS42AI<0x63, MRMSrcReg, (outs),
|
||||||
|
(ins VR128:$src1, VR128:$src2, i8imm:$src3),
|
||||||
|
"pcmpistri\t{$src3, $src2, $src1|$src1, $src2, $src3}",
|
||||||
|
[(set ECX,
|
||||||
|
(IntId128 VR128:$src1, VR128:$src2, imm:$src3)),
|
||||||
|
(implicit EFLAGS)]>,
|
||||||
|
OpSize;
|
||||||
|
def rm : SS42AI<0x63, MRMSrcMem, (outs),
|
||||||
|
(ins VR128:$src1, i128mem:$src2, i8imm:$src3),
|
||||||
|
"pcmpistri\t{$src3, $src2, $src1|$src1, $src2, $src3}",
|
||||||
|
[(set ECX,
|
||||||
|
(IntId128 VR128:$src1, (load addr:$src2), imm:$src3)),
|
||||||
|
(implicit EFLAGS)]>,
|
||||||
|
OpSize;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defm PCMPISTRI : SS42AI_pcmpistri<int_x86_sse42_pcmpistri128>;
|
||||||
|
defm PCMPISTRIA : SS42AI_pcmpistri<int_x86_sse42_pcmpistria128>;
|
||||||
|
defm PCMPISTRIC : SS42AI_pcmpistri<int_x86_sse42_pcmpistric128>;
|
||||||
|
defm PCMPISTRIO : SS42AI_pcmpistri<int_x86_sse42_pcmpistrio128>;
|
||||||
|
defm PCMPISTRIS : SS42AI_pcmpistri<int_x86_sse42_pcmpistris128>;
|
||||||
|
defm PCMPISTRIZ : SS42AI_pcmpistri<int_x86_sse42_pcmpistriz128>;
|
||||||
|
|
||||||
|
let Defs = [ECX, EFLAGS] in {
|
||||||
|
let Uses = [EAX, EDX] in {
|
||||||
|
multiclass SS42AI_pcmpestri<Intrinsic IntId128> {
|
||||||
|
def rr : SS42AI<0x61, MRMSrcReg, (outs),
|
||||||
|
(ins VR128:$src1, VR128:$src3, i8imm:$src5),
|
||||||
|
"pcmpestri\t{$src5, $src3, $src1|$src1, $src3, $src5}",
|
||||||
|
[(set ECX,
|
||||||
|
(IntId128 VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5)),
|
||||||
|
(implicit EFLAGS)]>,
|
||||||
|
OpSize;
|
||||||
|
def rm : SS42AI<0x61, MRMSrcMem, (outs),
|
||||||
|
(ins VR128:$src1, i128mem:$src3, i8imm:$src5),
|
||||||
|
"pcmpestri\t{$src5, $src3, $src1|$src1, $src3, $src5}",
|
||||||
|
[(set ECX,
|
||||||
|
(IntId128 VR128:$src1, EAX, (load addr:$src3),
|
||||||
|
EDX, imm:$src5)),
|
||||||
|
(implicit EFLAGS)]>,
|
||||||
|
OpSize;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defm PCMPESTRI : SS42AI_pcmpestri<int_x86_sse42_pcmpestri128>;
|
||||||
|
defm PCMPESTRIA : SS42AI_pcmpestri<int_x86_sse42_pcmpestria128>;
|
||||||
|
defm PCMPESTRIC : SS42AI_pcmpestri<int_x86_sse42_pcmpestric128>;
|
||||||
|
defm PCMPESTRIO : SS42AI_pcmpestri<int_x86_sse42_pcmpestrio128>;
|
||||||
|
defm PCMPESTRIS : SS42AI_pcmpestri<int_x86_sse42_pcmpestris128>;
|
||||||
|
defm PCMPESTRIZ : SS42AI_pcmpestri<int_x86_sse42_pcmpestriz128>;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user