mirror of
https://github.com/RPCS3/llvm.git
synced 2024-11-28 14:10:55 +00:00
Add patterns for the x86 popcnt instruction.
- Also adds a new POPCNT subtarget feature that is currently enabled if the target supports SSE4.2 (nehalem) or SSE4A (barcelona). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@120917 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
a6091be84c
commit
1292c22645
@ -23,6 +23,9 @@ include "llvm/Target/Target.td"
|
|||||||
def FeatureCMOV : SubtargetFeature<"cmov","HasCMov", "true",
|
def FeatureCMOV : SubtargetFeature<"cmov","HasCMov", "true",
|
||||||
"Enable conditional move instructions">;
|
"Enable conditional move instructions">;
|
||||||
|
|
||||||
|
def FeaturePOPCNT : SubtargetFeature<"popcnt", "HasPOPCNT", "true",
|
||||||
|
"Support POPCNT instruction">;
|
||||||
|
|
||||||
|
|
||||||
def FeatureMMX : SubtargetFeature<"mmx","X86SSELevel", "MMX",
|
def FeatureMMX : SubtargetFeature<"mmx","X86SSELevel", "MMX",
|
||||||
"Enable MMX instructions">;
|
"Enable MMX instructions">;
|
||||||
@ -45,7 +48,7 @@ def FeatureSSE41 : SubtargetFeature<"sse41", "X86SSELevel", "SSE41",
|
|||||||
[FeatureSSSE3]>;
|
[FeatureSSSE3]>;
|
||||||
def FeatureSSE42 : SubtargetFeature<"sse42", "X86SSELevel", "SSE42",
|
def FeatureSSE42 : SubtargetFeature<"sse42", "X86SSELevel", "SSE42",
|
||||||
"Enable SSE 4.2 instructions",
|
"Enable SSE 4.2 instructions",
|
||||||
[FeatureSSE41]>;
|
[FeatureSSE41, FeaturePOPCNT]>;
|
||||||
def Feature3DNow : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow",
|
def Feature3DNow : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow",
|
||||||
"Enable 3DNow! instructions">;
|
"Enable 3DNow! instructions">;
|
||||||
def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA",
|
def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA",
|
||||||
@ -63,7 +66,8 @@ def FeatureFastUAMem : SubtargetFeature<"fast-unaligned-mem",
|
|||||||
"IsUAMemFast", "true",
|
"IsUAMemFast", "true",
|
||||||
"Fast unaligned memory access">;
|
"Fast unaligned memory access">;
|
||||||
def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true",
|
def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true",
|
||||||
"Support SSE 4a instructions">;
|
"Support SSE 4a instructions",
|
||||||
|
[FeaturePOPCNT]>;
|
||||||
|
|
||||||
def FeatureAVX : SubtargetFeature<"avx", "HasAVX", "true",
|
def FeatureAVX : SubtargetFeature<"avx", "HasAVX", "true",
|
||||||
"Enable AVX instructions">;
|
"Enable AVX instructions">;
|
||||||
|
@ -285,21 +285,27 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
|
|||||||
setOperationAction(ISD::FREM , MVT::f80 , Expand);
|
setOperationAction(ISD::FREM , MVT::f80 , Expand);
|
||||||
setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom);
|
setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom);
|
||||||
|
|
||||||
setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
|
|
||||||
setOperationAction(ISD::CTTZ , MVT::i8 , Custom);
|
setOperationAction(ISD::CTTZ , MVT::i8 , Custom);
|
||||||
setOperationAction(ISD::CTLZ , MVT::i8 , Custom);
|
setOperationAction(ISD::CTLZ , MVT::i8 , Custom);
|
||||||
setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
|
|
||||||
setOperationAction(ISD::CTTZ , MVT::i16 , Custom);
|
setOperationAction(ISD::CTTZ , MVT::i16 , Custom);
|
||||||
setOperationAction(ISD::CTLZ , MVT::i16 , Custom);
|
setOperationAction(ISD::CTLZ , MVT::i16 , Custom);
|
||||||
setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
|
|
||||||
setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
|
setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
|
||||||
setOperationAction(ISD::CTLZ , MVT::i32 , Custom);
|
setOperationAction(ISD::CTLZ , MVT::i32 , Custom);
|
||||||
if (Subtarget->is64Bit()) {
|
if (Subtarget->is64Bit()) {
|
||||||
setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
|
|
||||||
setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
|
setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
|
||||||
setOperationAction(ISD::CTLZ , MVT::i64 , Custom);
|
setOperationAction(ISD::CTLZ , MVT::i64 , Custom);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (Subtarget->hasPOPCNT()) {
|
||||||
|
setOperationAction(ISD::CTPOP , MVT::i8 , Promote);
|
||||||
|
} else {
|
||||||
|
setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
|
||||||
|
setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
|
||||||
|
setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
|
||||||
|
if (Subtarget->is64Bit())
|
||||||
|
setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
|
||||||
|
}
|
||||||
|
|
||||||
setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
|
setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
|
||||||
setOperationAction(ISD::BSWAP , MVT::i16 , Expand);
|
setOperationAction(ISD::BSWAP , MVT::i16 , Expand);
|
||||||
|
|
||||||
|
@ -4603,22 +4603,25 @@ defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, memopv4f64, v4f64>;
|
|||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
|
def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
|
||||||
"popcnt{w}\t{$src, $dst|$dst, $src}", []>, OpSize, XS;
|
"popcnt{w}\t{$src, $dst|$dst, $src}",
|
||||||
let mayLoad = 1 in
|
[(set GR16:$dst, (ctpop GR16:$src))]>, OpSize, XS;
|
||||||
def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
|
def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
|
||||||
"popcnt{w}\t{$src, $dst|$dst, $src}", []>, OpSize, XS;
|
"popcnt{w}\t{$src, $dst|$dst, $src}",
|
||||||
|
[(set GR16:$dst, (ctpop (loadi16 addr:$src)))]>, OpSize, XS;
|
||||||
|
|
||||||
def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
|
def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
|
||||||
"popcnt{l}\t{$src, $dst|$dst, $src}", []>, XS;
|
"popcnt{l}\t{$src, $dst|$dst, $src}",
|
||||||
let mayLoad = 1 in
|
[(set GR32:$dst, (ctpop GR32:$src))]>, XS;
|
||||||
def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
|
def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
|
||||||
"popcnt{l}\t{$src, $dst|$dst, $src}", []>, XS;
|
"popcnt{l}\t{$src, $dst|$dst, $src}",
|
||||||
|
[(set GR32:$dst, (ctpop (loadi32 addr:$src)))]>, XS;
|
||||||
|
|
||||||
def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
|
def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
|
||||||
"popcnt{q}\t{$src, $dst|$dst, $src}", []>, XS;
|
"popcnt{q}\t{$src, $dst|$dst, $src}",
|
||||||
let mayLoad = 1 in
|
[(set GR64:$dst, (ctpop GR64:$src))]>, XS;
|
||||||
def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
|
def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
|
||||||
"popcnt{q}\t{$src, $dst|$dst, $src}", []>, XS;
|
"popcnt{q}\t{$src, $dst|$dst, $src}",
|
||||||
|
[(set GR64:$dst, (ctpop (loadi64 addr:$src)))]>, XS;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -65,6 +65,9 @@ protected:
|
|||||||
///
|
///
|
||||||
bool HasX86_64;
|
bool HasX86_64;
|
||||||
|
|
||||||
|
/// HasPOPCNT - True if the processor supports POPCNT.
|
||||||
|
bool HasPOPCNT;
|
||||||
|
|
||||||
/// HasSSE4A - True if the processor supports SSE4A instructions.
|
/// HasSSE4A - True if the processor supports SSE4A instructions.
|
||||||
bool HasSSE4A;
|
bool HasSSE4A;
|
||||||
|
|
||||||
@ -150,6 +153,7 @@ public:
|
|||||||
bool hasSSE4A() const { return HasSSE4A; }
|
bool hasSSE4A() const { return HasSSE4A; }
|
||||||
bool has3DNow() const { return X863DNowLevel >= ThreeDNow; }
|
bool has3DNow() const { return X863DNowLevel >= ThreeDNow; }
|
||||||
bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; }
|
bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; }
|
||||||
|
bool hasPOPCNT() const { return HasPOPCNT; }
|
||||||
bool hasAVX() const { return HasAVX; }
|
bool hasAVX() const { return HasAVX; }
|
||||||
bool hasAES() const { return HasAES; }
|
bool hasAES() const { return HasAES; }
|
||||||
bool hasCLMUL() const { return HasCLMUL; }
|
bool hasCLMUL() const { return HasCLMUL; }
|
||||||
|
38
test/CodeGen/X86/popcnt.ll
Normal file
38
test/CodeGen/X86/popcnt.ll
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
; RUN: llc -march=x86-64 -mattr=+popcnt < %s | FileCheck %s
|
||||||
|
|
||||||
|
define i8 @cnt8(i8 %x) nounwind readnone {
|
||||||
|
%cnt = tail call i8 @llvm.ctpop.i8(i8 %x)
|
||||||
|
ret i8 %cnt
|
||||||
|
; CHECK: cnt8:
|
||||||
|
; CHECK: popcntw
|
||||||
|
; CHECK: ret
|
||||||
|
}
|
||||||
|
|
||||||
|
define i16 @cnt16(i16 %x) nounwind readnone {
|
||||||
|
%cnt = tail call i16 @llvm.ctpop.i16(i16 %x)
|
||||||
|
ret i16 %cnt
|
||||||
|
; CHECK: cnt16:
|
||||||
|
; CHECK: popcntw
|
||||||
|
; CHECK: ret
|
||||||
|
}
|
||||||
|
|
||||||
|
define i32 @cnt32(i32 %x) nounwind readnone {
|
||||||
|
%cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
|
||||||
|
ret i32 %cnt
|
||||||
|
; CHECK: cnt32:
|
||||||
|
; CHECK: popcntl
|
||||||
|
; CHECK: ret
|
||||||
|
}
|
||||||
|
|
||||||
|
define i64 @cnt64(i64 %x) nounwind readnone {
|
||||||
|
%cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
|
||||||
|
ret i64 %cnt
|
||||||
|
; CHECK: cnt64:
|
||||||
|
; CHECK: popcntq
|
||||||
|
; CHECK: ret
|
||||||
|
}
|
||||||
|
|
||||||
|
declare i8 @llvm.ctpop.i8(i8) nounwind readnone
|
||||||
|
declare i16 @llvm.ctpop.i16(i16) nounwind readnone
|
||||||
|
declare i32 @llvm.ctpop.i32(i32) nounwind readnone
|
||||||
|
declare i64 @llvm.ctpop.i64(i64) nounwind readnone
|
Loading…
Reference in New Issue
Block a user