integer: Add popcount implementation using ctpop intrinsic

Also copy/modify the unary_intrin.inc from math/ to make the
intrinsic declaration somewhat reusable.

Passes CL CTS integer_ops/test_integer_ops popcount tests for CL 1.2

Tested-by on GCN 1.0 (Pitcairn)

Signed-off-by: Aaron Watry <awatry@gmail.com>
Reviewed-by: Jan Vesely <jan.vesely@rutgers.edu>
llvm-svn: 312854
This commit is contained in:
Aaron Watry 2017-09-09 02:23:54 +00:00
parent 628fbcae4c
commit 415a60f303
3 changed files with 27 additions and 0 deletions

View File

@ -126,6 +126,7 @@
#include <clc/integer/mad_sat.h>
#include <clc/integer/mul24.h>
#include <clc/integer/mul_hi.h>
#include <clc/integer/popcount.h>
#include <clc/integer/rhadd.h>
#include <clc/integer/rotate.h>
#include <clc/integer/sub_sat.h>

View File

@ -0,0 +1,6 @@
#undef popcount
#define popcount __clc_popcount
#define __CLC_FUNCTION __clc_popcount
#define __CLC_INTRINSIC "llvm.ctpop"
#include <clc/integer/unary_intrin.inc>

View File

@ -0,0 +1,20 @@
#define __CLC_INTRINSIC_DEF(SCALAR_TYPE, BIT_SIZE) \
_CLC_OVERLOAD SCALAR_TYPE __CLC_FUNCTION(SCALAR_TYPE x) __asm(__CLC_INTRINSIC ".i" BIT_SIZE); \
_CLC_OVERLOAD SCALAR_TYPE##2 __CLC_FUNCTION(SCALAR_TYPE##2 x) __asm(__CLC_INTRINSIC ".v2i" BIT_SIZE); \
_CLC_OVERLOAD SCALAR_TYPE##3 __CLC_FUNCTION(SCALAR_TYPE##3 x) __asm(__CLC_INTRINSIC ".v3i" BIT_SIZE); \
_CLC_OVERLOAD SCALAR_TYPE##4 __CLC_FUNCTION(SCALAR_TYPE##4 x) __asm(__CLC_INTRINSIC ".v4i" BIT_SIZE); \
_CLC_OVERLOAD SCALAR_TYPE##8 __CLC_FUNCTION(SCALAR_TYPE##8 x) __asm(__CLC_INTRINSIC ".v8i" BIT_SIZE); \
_CLC_OVERLOAD SCALAR_TYPE##16 __CLC_FUNCTION(SCALAR_TYPE##16 x) __asm(__CLC_INTRINSIC ".v16i" BIT_SIZE);
__CLC_INTRINSIC_DEF(char, "8")
__CLC_INTRINSIC_DEF(uchar, "8")
__CLC_INTRINSIC_DEF(short, "16")
__CLC_INTRINSIC_DEF(ushort, "16")
__CLC_INTRINSIC_DEF(int, "32")
__CLC_INTRINSIC_DEF(uint, "32")
__CLC_INTRINSIC_DEF(long, "64")
__CLC_INTRINSIC_DEF(ulong, "64")
#undef __CLC_FUNCTION
#undef __CLC_INTRINSIC
#undef __CLC_INTRINSIC_DEF