From a32c319741faf11f90819ec3a5b2431d8af2ff3a Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 18 Jul 2014 06:07:13 +0000 Subject: [PATCH] R600: Implement TTI:getPopcntSupport The test is just copied from X86, and I don't know of a better way to test it. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@213351 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUSubtarget.h | 6 +- lib/Target/R600/AMDGPUTargetTransformInfo.cpp | 8 ++ test/Transforms/LoopIdiom/R600/lit.local.cfg | 3 + test/Transforms/LoopIdiom/R600/popcnt.ll | 104 ++++++++++++++++++ 4 files changed, 119 insertions(+), 2 deletions(-) create mode 100644 test/Transforms/LoopIdiom/R600/lit.local.cfg create mode 100644 test/Transforms/LoopIdiom/R600/popcnt.ll diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h index 0edaca38219..a844b37b6be 100644 --- a/lib/Target/R600/AMDGPUSubtarget.h +++ b/lib/Target/R600/AMDGPUSubtarget.h @@ -123,8 +123,10 @@ public: if (Size == 32) return (getGeneration() >= EVERGREEN); - assert(Size == 64); - return (getGeneration() >= SOUTHERN_ISLANDS); + if (Size == 64) + return (getGeneration() >= SOUTHERN_ISLANDS); + + return false; } bool hasMulU24() const { diff --git a/lib/Target/R600/AMDGPUTargetTransformInfo.cpp b/lib/Target/R600/AMDGPUTargetTransformInfo.cpp index 2d934a4cb09..e3a546e1d72 100644 --- a/lib/Target/R600/AMDGPUTargetTransformInfo.cpp +++ b/lib/Target/R600/AMDGPUTargetTransformInfo.cpp @@ -77,6 +77,8 @@ public: void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const override; + PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const override; + /// @} }; @@ -119,3 +121,9 @@ void AMDGPUTTI::getUnrollingPreferences(Loop *L, } } } + +AMDGPUTTI::PopcntSupportKind +AMDGPUTTI::getPopcntSupport(unsigned TyWidth) const { + assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); + return ST->hasBCNT(TyWidth) ? PSK_FastHardware : PSK_Software; +} diff --git a/test/Transforms/LoopIdiom/R600/lit.local.cfg b/test/Transforms/LoopIdiom/R600/lit.local.cfg new file mode 100644 index 00000000000..4086e8d681c --- /dev/null +++ b/test/Transforms/LoopIdiom/R600/lit.local.cfg @@ -0,0 +1,3 @@ +if not 'R600' in config.root.targets: + config.unsupported = True + diff --git a/test/Transforms/LoopIdiom/R600/popcnt.ll b/test/Transforms/LoopIdiom/R600/popcnt.ll new file mode 100644 index 00000000000..e4301bbb06d --- /dev/null +++ b/test/Transforms/LoopIdiom/R600/popcnt.ll @@ -0,0 +1,104 @@ +; RUN: opt -loop-idiom -mtriple=r600-- -mcpu=SI -S < %s | FileCheck %s + +; Mostly copied from x86 version. + +;To recognize this pattern: +;int popcount(unsigned long long a) { +; int c = 0; +; while (a) { +; c++; +; a &= a - 1; +; } +; return c; +;} +; + +; CHECK-LABEL: @popcount_i64 +; CHECK: entry +; CHECK: llvm.ctpop.i64 +; CHECK: ret +define i32 @popcount_i64(i64 %a) nounwind uwtable readnone ssp { +entry: + %tobool3 = icmp eq i64 %a, 0 + br i1 %tobool3, label %while.end, label %while.body + +while.body: ; preds = %entry, %while.body + %c.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ] + %a.addr.04 = phi i64 [ %and, %while.body ], [ %a, %entry ] + %inc = add nsw i32 %c.05, 1 + %sub = add i64 %a.addr.04, -1 + %and = and i64 %sub, %a.addr.04 + %tobool = icmp eq i64 %and, 0 + br i1 %tobool, label %while.end, label %while.body + +while.end: ; preds = %while.body, %entry + %c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ] + ret i32 %c.0.lcssa +} + +; CHECK-LABEL: @popcount_i32 +; CHECK: entry +; CHECK: llvm.ctpop.i32 +; CHECK: ret +define i32 @popcount_i32(i32 %a) nounwind uwtable readnone ssp { +entry: + %tobool3 = icmp eq i32 %a, 0 + br i1 %tobool3, label %while.end, label %while.body + +while.body: ; preds = %entry, %while.body + %c.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ] + %a.addr.04 = phi i32 [ %and, %while.body ], [ %a, %entry ] + %inc = add nsw i32 %c.05, 1 + %sub = add i32 %a.addr.04, -1 + %and = and i32 %sub, %a.addr.04 + %tobool = icmp eq i32 %and, 0 + br i1 %tobool, label %while.end, label %while.body + +while.end: ; preds = %while.body, %entry + %c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ] + ret i32 %c.0.lcssa +} + +; To recognize this pattern: +;int popcount(unsigned long long a, int mydata1, int mydata2) { +; int c = 0; +; while (a) { +; c++; +; a &= a - 1; +; mydata1 *= c; +; mydata2 *= (int)a; +; } +; return c + mydata1 + mydata2; +;} + +; CHECK-LABEL: @popcount2 +; CHECK: entry +; CHECK: llvm.ctpop.i64 +; CHECK: ret +define i32 @popcount2(i64 %a, i32 %mydata1, i32 %mydata2) nounwind uwtable readnone ssp { +entry: + %tobool9 = icmp eq i64 %a, 0 + br i1 %tobool9, label %while.end, label %while.body + +while.body: ; preds = %entry, %while.body + %c.013 = phi i32 [ %inc, %while.body ], [ 0, %entry ] + %mydata2.addr.012 = phi i32 [ %mul1, %while.body ], [ %mydata2, %entry ] + %mydata1.addr.011 = phi i32 [ %mul, %while.body ], [ %mydata1, %entry ] + %a.addr.010 = phi i64 [ %and, %while.body ], [ %a, %entry ] + %inc = add nsw i32 %c.013, 1 + %sub = add i64 %a.addr.010, -1 + %and = and i64 %sub, %a.addr.010 + %mul = mul nsw i32 %inc, %mydata1.addr.011 + %conv = trunc i64 %and to i32 + %mul1 = mul nsw i32 %conv, %mydata2.addr.012 + %tobool = icmp eq i64 %and, 0 + br i1 %tobool, label %while.end, label %while.body + +while.end: ; preds = %while.body, %entry + %c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ] + %mydata2.addr.0.lcssa = phi i32 [ %mydata2, %entry ], [ %mul1, %while.body ] + %mydata1.addr.0.lcssa = phi i32 [ %mydata1, %entry ], [ %mul, %while.body ] + %add = add i32 %mydata2.addr.0.lcssa, %mydata1.addr.0.lcssa + %add2 = add i32 %add, %c.0.lcssa + ret i32 %add2 +}