diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td index 85966af9c82..4802eb50c17 100644 --- a/include/llvm/IR/IntrinsicsX86.td +++ b/include/llvm/IR/IntrinsicsX86.td @@ -6495,3 +6495,10 @@ let TargetPrefix = "x86" in { : GCCBuiltin<"__builtin_ia32_mwaitx">, Intrinsic<[], [ llvm_i32_ty, llvm_i32_ty, llvm_i32_ty ], []>; } + +//===----------------------------------------------------------------------===// +// Cache-line zero +let TargetPrefix = "x86" in { + def int_x86_clzero : GCCBuiltin<"__builtin_ia32_clzero">, + Intrinsic<[], [llvm_ptr_ty], []>; +} diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp index 881bb01ac93..7d37780c376 100644 --- a/lib/Support/Host.cpp +++ b/lib/Support/Host.cpp @@ -1353,6 +1353,10 @@ bool sys::getHostCPUFeatures(StringMap &Features) { Features["tbm"] = HasExtLeaf1 && ((ECX >> 21) & 1); Features["mwaitx"] = HasExtLeaf1 && ((ECX >> 29) & 1); + bool HasExtLeaf8 = MaxExtLevel >= 0x80000008 && + !getX86CpuIDAndInfoEx(0x80000008,0x0, &EAX, &EBX, &ECX, &EDX); + Features["clzero"] = HasExtLeaf8 && ((EBX >> 0) & 1); + bool HasLeaf7 = MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX); diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index fddf04f276b..fa363749fcc 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -202,6 +202,8 @@ def FeatureLAHFSAHF : SubtargetFeature<"sahf", "HasLAHFSAHF", "true", "Support LAHF and SAHF instructions">; def FeatureMWAITX : SubtargetFeature<"mwaitx", "HasMWAITX", "true", "Enable MONITORX/MWAITX timer functionality">; +def FeatureCLZERO : SubtargetFeature<"clzero", "HasCLZERO", "true", + "Enable Cache Line Zero">; def FeatureMPX : SubtargetFeature<"mpx", "HasMPX", "true", "Support MPX instructions">; def FeatureLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true", @@ -765,6 +767,7 @@ def: ProcessorModel<"znver1", BtVer2Model, [ FeatureBMI, FeatureBMI2, FeatureCLFLUSHOPT, + FeatureCLZERO, FeatureCMPXCHG16B, FeatureF16C, FeatureFMA, diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index eb0d0de45ed..807acd5b5f8 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -24418,6 +24418,26 @@ static MachineBasicBlock *emitMonitor(MachineInstr &MI, MachineBasicBlock *BB, return BB; } +static MachineBasicBlock *emitClzero(MachineInstr *MI, MachineBasicBlock *BB, + const X86Subtarget &Subtarget) { + DebugLoc dl = MI->getDebugLoc(); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + // Address into RAX/EAX + unsigned MemOpc = Subtarget.is64Bit() ? X86::LEA64r : X86::LEA32r; + unsigned MemReg = Subtarget.is64Bit() ? X86::RAX : X86::EAX; + MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(MemOpc), MemReg); + for (int i = 0; i < X86::AddrNumOperands; ++i) + MIB.add(MI->getOperand(i)); + + // The instruction doesn't actually take any operands though. + BuildMI(*BB, MI, dl, TII->get(X86::CLZEROr)); + + MI->eraseFromParent(); // The pseudo is gone now. + return BB; +} + + + MachineBasicBlock * X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const { @@ -26038,6 +26058,11 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, return emitMonitor(MI, BB, Subtarget, X86::MONITORrrr); case X86::MONITORX: return emitMonitor(MI, BB, Subtarget, X86::MONITORXrrr); + + // Cache line zero + case X86::CLZERO: + return emitClzero(&MI, BB, Subtarget); + // PKU feature case X86::WRPKRU: return emitWRPKRU(MI, BB, Subtarget); diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 7bdf2bd41b1..2ccaf6aaed2 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -859,6 +859,7 @@ def HasRDSEED : Predicate<"Subtarget->hasRDSEED()">; def HasPrefetchW : Predicate<"Subtarget->hasPRFCHW()">; def HasLAHFSAHF : Predicate<"Subtarget->hasLAHFSAHF()">; def HasMWAITX : Predicate<"Subtarget->hasMWAITX()">; +def HasCLZERO : Predicate<"Subtarget->hasCLZERO()">; def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">; def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">; def HasMPX : Predicate<"Subtarget->hasMPX()">; @@ -2456,8 +2457,19 @@ def : InstAlias<"monitorx\t{%rax, %rcx, %rdx|rdx, rcx, rax}", (MONITORXrrr)>, //===----------------------------------------------------------------------===// // CLZERO Instruction // -let Uses = [EAX] in -def CLZEROr : I<0x01, MRM_FC, (outs), (ins), "clzero", []>, TB; +let SchedRW = [WriteSystem] in { + let Uses = [EAX] in + def CLZEROr : I<0x01, MRM_FC, (outs), (ins), "clzero", [], IIC_SSE_CLZERO>, + TB, Requires<[HasCLZERO]>; + + let usesCustomInserter = 1 in { + def CLZERO : PseudoI<(outs), (ins i32mem:$src1), + [(int_x86_clzero addr:$src1)]>, Requires<[HasCLZERO]>; + } +} // SchedRW + +def : InstAlias<"clzero\t{%eax|eax}", (CLZEROr)>, Requires<[Not64BitMode]>; +def : InstAlias<"clzero\t{%rax|rax}", (CLZEROr)>, Requires<[In64BitMode]>; //===----------------------------------------------------------------------===// // Pattern fragments to auto generate TBM instructions. diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td index 35257f89100..7f7efd7cad3 100644 --- a/lib/Target/X86/X86Schedule.td +++ b/lib/Target/X86/X86Schedule.td @@ -366,6 +366,7 @@ def IIC_SSE_MWAIT : InstrItinClass; def IIC_SSE_MONITOR : InstrItinClass; def IIC_SSE_MWAITX : InstrItinClass; def IIC_SSE_MONITORX : InstrItinClass; +def IIC_SSE_CLZERO : InstrItinClass; def IIC_SSE_PREFETCH : InstrItinClass; def IIC_SSE_PAUSE : InstrItinClass; diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 1646ad37599..e09e378b3f6 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -289,6 +289,7 @@ void X86Subtarget::initializeEnvironment() { HasRDSEED = false; HasLAHFSAHF = false; HasMWAITX = false; + HasCLZERO = false; HasMPX = false; IsBTMemSlow = false; IsPMULLDSlow = false; diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 633c7adda2f..dffdc5a82ef 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -175,6 +175,9 @@ protected: /// Processor has MONITORX/MWAITX instructions. bool HasMWAITX; + /// Processor has Cache Line Zero instruction + bool HasCLZERO; + /// Processor has Prefetch with intent to Write instruction bool HasPFPREFETCHWT1; @@ -460,6 +463,7 @@ public: bool hasRDSEED() const { return HasRDSEED; } bool hasLAHFSAHF() const { return HasLAHFSAHF; } bool hasMWAITX() const { return HasMWAITX; } + bool hasCLZERO() const { return HasCLZERO; } bool isBTMemSlow() const { return IsBTMemSlow; } bool isSHLDSlow() const { return IsSHLDSlow; } bool isPMULLDSlow() const { return IsPMULLDSlow; } diff --git a/test/CodeGen/X86/clzero.ll b/test/CodeGen/X86/clzero.ll new file mode 100644 index 00000000000..f15d4deedef --- /dev/null +++ b/test/CodeGen/X86/clzero.ll @@ -0,0 +1,23 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-linux -mattr=+clzero | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=i386-pc-linux -mattr=+clzero | FileCheck %s --check-prefix=X32 + +define void @foo(i8* %p) #0 { +; X64-LABEL: foo: +; X64: # BB#0: # %entry +; X64-NEXT: leaq (%rdi), %rax +; X64-NEXT: clzero +; X64-NEXT: retq +; +; X32-LABEL: foo: +; X32: # BB#0: # %entry +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: leal (%eax), %eax +; X32-NEXT: clzero +; X32-NEXT: retl +entry: + tail call void @llvm.x86.clzero(i8* %p) #1 + ret void +} + +declare void @llvm.x86.clzero(i8*) #1 diff --git a/test/MC/Disassembler/X86/x86-32.txt b/test/MC/Disassembler/X86/x86-32.txt index 01d21c5d0fc..9dd49e51d91 100644 --- a/test/MC/Disassembler/X86/x86-32.txt +++ b/test/MC/Disassembler/X86/x86-32.txt @@ -129,6 +129,9 @@ # CHECK: invlpga 0x0f 0x01 0xdf +# CHECK: clzero +0x0f,0x01,0xfc + # CHECK: movl $0, -4(%ebp) 0xc7 0x45 0xfc 0x00 0x00 0x00 0x00 diff --git a/test/MC/X86/x86-32.s b/test/MC/X86/x86-32.s index c05cf41d91e..f3633dcffef 100644 --- a/test/MC/X86/x86-32.s +++ b/test/MC/X86/x86-32.s @@ -444,6 +444,14 @@ cmovnae %bx,%bx // CHECK: encoding: [0x0f,0x21,0xf8] movl %dr7,%eax +// CHECK: clzero +// CHECK: encoding: [0x0f,0x01,0xfc] + clzero + +// CHECK: clzero +// CHECK: encoding: [0x0f,0x01,0xfc] + clzero %eax + // radr://8017522 // CHECK: wait // CHECK: encoding: [0x9b] diff --git a/test/MC/X86/x86-64.s b/test/MC/X86/x86-64.s index 89dc599e04f..a605dbbbd74 100644 --- a/test/MC/X86/x86-64.s +++ b/test/MC/X86/x86-64.s @@ -1502,6 +1502,14 @@ vmovq %xmm0, %rax // CHECK: encoding: [0x0f,0x01,0xfb] mwaitx %rax, %rcx, %rbx +// CHECK: clzero +// CHECK: encoding: [0x0f,0x01,0xfc] + clzero + +// CHECK: clzero +// CHECK: encoding: [0x0f,0x01,0xfc] + clzero %rax + // CHECK: movl %r15d, (%r15,%r15) // CHECK: encoding: [0x47,0x89,0x3c,0x3f] movl %r15d, (%r15,%r15)