diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 923f8949701..fa76619c281 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -23,6 +23,9 @@ include "llvm/Target/Target.td" def FeatureCMOV : SubtargetFeature<"cmov","HasCMov", "true", "Enable conditional move instructions">; +def FeaturePOPCNT : SubtargetFeature<"popcnt", "HasPOPCNT", "true", + "Support POPCNT instruction">; + def FeatureMMX : SubtargetFeature<"mmx","X86SSELevel", "MMX", "Enable MMX instructions">; @@ -45,7 +48,7 @@ def FeatureSSE41 : SubtargetFeature<"sse41", "X86SSELevel", "SSE41", [FeatureSSSE3]>; def FeatureSSE42 : SubtargetFeature<"sse42", "X86SSELevel", "SSE42", "Enable SSE 4.2 instructions", - [FeatureSSE41]>; + [FeatureSSE41, FeaturePOPCNT]>; def Feature3DNow : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow", "Enable 3DNow! instructions">; def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA", @@ -63,7 +66,8 @@ def FeatureFastUAMem : SubtargetFeature<"fast-unaligned-mem", "IsUAMemFast", "true", "Fast unaligned memory access">; def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true", - "Support SSE 4a instructions">; + "Support SSE 4a instructions", + [FeaturePOPCNT]>; def FeatureAVX : SubtargetFeature<"avx", "HasAVX", "true", "Enable AVX instructions">; diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 0bbf990ab77..478bf71c686 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -285,21 +285,27 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FREM , MVT::f80 , Expand); setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom); - setOperationAction(ISD::CTPOP , MVT::i8 , Expand); setOperationAction(ISD::CTTZ , MVT::i8 , Custom); setOperationAction(ISD::CTLZ , MVT::i8 , Custom); - setOperationAction(ISD::CTPOP , MVT::i16 , Expand); setOperationAction(ISD::CTTZ , MVT::i16 , Custom); setOperationAction(ISD::CTLZ , MVT::i16 , Custom); - setOperationAction(ISD::CTPOP , MVT::i32 , Expand); setOperationAction(ISD::CTTZ , MVT::i32 , Custom); setOperationAction(ISD::CTLZ , MVT::i32 , Custom); if (Subtarget->is64Bit()) { - setOperationAction(ISD::CTPOP , MVT::i64 , Expand); setOperationAction(ISD::CTTZ , MVT::i64 , Custom); setOperationAction(ISD::CTLZ , MVT::i64 , Custom); } + if (Subtarget->hasPOPCNT()) { + setOperationAction(ISD::CTPOP , MVT::i8 , Promote); + } else { + setOperationAction(ISD::CTPOP , MVT::i8 , Expand); + setOperationAction(ISD::CTPOP , MVT::i16 , Expand); + setOperationAction(ISD::CTPOP , MVT::i32 , Expand); + if (Subtarget->is64Bit()) + setOperationAction(ISD::CTPOP , MVT::i64 , Expand); + } + setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom); setOperationAction(ISD::BSWAP , MVT::i16 , Expand); diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 67cc72f3f0a..73a620244e4 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -4603,22 +4603,25 @@ defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, memopv4f64, v4f64>; //===----------------------------------------------------------------------===// def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), - "popcnt{w}\t{$src, $dst|$dst, $src}", []>, OpSize, XS; -let mayLoad = 1 in + "popcnt{w}\t{$src, $dst|$dst, $src}", + [(set GR16:$dst, (ctpop GR16:$src))]>, OpSize, XS; def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), - "popcnt{w}\t{$src, $dst|$dst, $src}", []>, OpSize, XS; + "popcnt{w}\t{$src, $dst|$dst, $src}", + [(set GR16:$dst, (ctpop (loadi16 addr:$src)))]>, OpSize, XS; def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), - "popcnt{l}\t{$src, $dst|$dst, $src}", []>, XS; -let mayLoad = 1 in + "popcnt{l}\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (ctpop GR32:$src))]>, XS; def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), - "popcnt{l}\t{$src, $dst|$dst, $src}", []>, XS; + "popcnt{l}\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (ctpop (loadi32 addr:$src)))]>, XS; def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), - "popcnt{q}\t{$src, $dst|$dst, $src}", []>, XS; -let mayLoad = 1 in + "popcnt{q}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (ctpop GR64:$src))]>, XS; def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), - "popcnt{q}\t{$src, $dst|$dst, $src}", []>, XS; + "popcnt{q}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (ctpop (loadi64 addr:$src)))]>, XS; diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 2e7312470b6..58cf3e0c6d2 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -65,6 +65,9 @@ protected: /// bool HasX86_64; + /// HasPOPCNT - True if the processor supports POPCNT. + bool HasPOPCNT; + /// HasSSE4A - True if the processor supports SSE4A instructions. bool HasSSE4A; @@ -150,6 +153,7 @@ public: bool hasSSE4A() const { return HasSSE4A; } bool has3DNow() const { return X863DNowLevel >= ThreeDNow; } bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; } + bool hasPOPCNT() const { return HasPOPCNT; } bool hasAVX() const { return HasAVX; } bool hasAES() const { return HasAES; } bool hasCLMUL() const { return HasCLMUL; } diff --git a/test/CodeGen/X86/popcnt.ll b/test/CodeGen/X86/popcnt.ll new file mode 100644 index 00000000000..430214c73b1 --- /dev/null +++ b/test/CodeGen/X86/popcnt.ll @@ -0,0 +1,38 @@ +; RUN: llc -march=x86-64 -mattr=+popcnt < %s | FileCheck %s + +define i8 @cnt8(i8 %x) nounwind readnone { + %cnt = tail call i8 @llvm.ctpop.i8(i8 %x) + ret i8 %cnt +; CHECK: cnt8: +; CHECK: popcntw +; CHECK: ret +} + +define i16 @cnt16(i16 %x) nounwind readnone { + %cnt = tail call i16 @llvm.ctpop.i16(i16 %x) + ret i16 %cnt +; CHECK: cnt16: +; CHECK: popcntw +; CHECK: ret +} + +define i32 @cnt32(i32 %x) nounwind readnone { + %cnt = tail call i32 @llvm.ctpop.i32(i32 %x) + ret i32 %cnt +; CHECK: cnt32: +; CHECK: popcntl +; CHECK: ret +} + +define i64 @cnt64(i64 %x) nounwind readnone { + %cnt = tail call i64 @llvm.ctpop.i64(i64 %x) + ret i64 %cnt +; CHECK: cnt64: +; CHECK: popcntq +; CHECK: ret +} + +declare i8 @llvm.ctpop.i8(i8) nounwind readnone +declare i16 @llvm.ctpop.i16(i16) nounwind readnone +declare i32 @llvm.ctpop.i32(i32) nounwind readnone +declare i64 @llvm.ctpop.i64(i64) nounwind readnone