Implement exp2 using OpenCL C rather than using an intrinsic

Not all targets support the intrinsic, so it's better to have a
generic implementation which does not use it.

This exp2 implementation was ported from the AMD builtin library
and has been tested with piglit, OpenCV, and the ocl conformance tests.

llvm-svn: 237228
This commit is contained in:
Tom Stellard 2015-05-13 03:55:07 +00:00
parent 0cabcf211a
commit d538fdc217
8 changed files with 303 additions and 6 deletions

View File

@ -1,6 +1,24 @@
#undef exp2
#define exp2 __clc_exp2
/*
* Copyright (c) 2014, 2015 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#define __CLC_FUNCTION __clc_exp2
#define __CLC_INTRINSIC "llvm.exp2"
#include <clc/math/unary_intrin.inc>
#define __CLC_BODY <clc/math/exp2.inc>
#include <clc/math/gentype.inc>

View File

@ -0,0 +1,23 @@
/*
* Copyright (c) 2014, 2015 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE exp2(__CLC_GENTYPE x);

View File

@ -79,6 +79,8 @@ math/cospi.cl
math/ep_log.cl
math/erfc.cl
math/exp.cl
math/exp_helper.cl
math/exp2.cl
math/exp10.cl
math/fmax.cl
math/fmin.cl

View File

@ -0,0 +1,86 @@
/*
* Copyright (c) 2014,2015 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <clc/clc.h>
#include "math.h"
#include "../clcmacro.h"
_CLC_OVERLOAD _CLC_DEF float exp2(float x) {
// Reduce x
const float ln2HI = 0x1.62e300p-1f;
const float ln2LO = 0x1.2fefa2p-17f;
float t = rint(x);
int p = (int)t;
float tt = x - t;
float hi = tt * ln2HI;
float lo = tt * ln2LO;
// Evaluate poly
t = hi + lo;
tt = t*t;
float v = mad(tt,
-mad(tt,
mad(tt,
mad(tt,
mad(tt, 0x1.637698p-25f, -0x1.bbd41cp-20f),
0x1.1566aap-14f),
-0x1.6c16c2p-9f),
0x1.555556p-3f),
t);
float y = 1.0f - (((-lo) - MATH_DIVIDE(t * v, 2.0f - v)) - hi);
// Scale by 2^p
float r = as_float(as_int(y) + (p << 23));
const float ulim = 128.0f;
const float llim = -126.0f;
r = x < llim ? 0.0f : r;
r = x < ulim ? r : as_float(0x7f800000);
return isnan(x) ? x : r;
}
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, exp2, float)
#ifdef cl_khr_fp64
#include "exp_helper.h"
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
_CLC_OVERLOAD _CLC_DEF double exp2(double x) {
const double R_LN2 = 0x1.62e42fefa39efp-1; // ln(2)
const double R_1_BY_64 = 1.0 / 64.0;
int n = convert_int(x * 64.0);
double r = R_LN2 * fma(-R_1_BY_64, (double)n, x);
return __clc_exp_helper(x, -1074.0, 1024.0, r, n);
}
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, exp2, double)
#endif

View File

@ -0,0 +1,69 @@
/*
* Copyright (c) 2014, 2015 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <clc/clc.h>
#include "math.h"
#include "tables.h"
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
_CLC_DEF double __clc_exp_helper(double x, double x_min, double x_max, double r, int n) {
int j = n & 0x3f;
int m = n >> 6;
// 6 term tail of Taylor expansion of e^r
double z2 = r * fma(r,
fma(r,
fma(r,
fma(r,
fma(r, 0x1.6c16c16c16c17p-10, 0x1.1111111111111p-7),
0x1.5555555555555p-5),
0x1.5555555555555p-3),
0x1.0000000000000p-1),
1.0);
double2 tv = USE_TABLE(two_to_jby64_ep_tbl, j);
z2 = fma(tv.s0 + tv.s1, z2, tv.s1) + tv.s0;
int small_value = (m < -1022) || ((m == -1022) && (z2 < 1.0));
int n1 = m >> 2;
int n2 = m-n1;
double z3= z2 * as_double(((long)n1 + 1023) << 52);
z3 *= as_double(((long)n2 + 1023) << 52);
z2 = ldexp(z2, m);
z2 = small_value ? z3: z2;
z2 = isnan(x) ? x : z2;
z2 = x > x_max ? as_double(PINFBITPATT_DP64) : z2;
z2 = x < x_min ? 0.0 : z2;
return z2;
}
#endif // cl_khr_fp64

View File

@ -0,0 +1,29 @@
/*
* Copyright (c) 2014, 2015 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
_CLC_DECL double __clc_exp_helper(double x, double x_min, double x_max, double r, int n);
#endif

View File

@ -634,6 +634,76 @@ DECLARE_TABLE(double2, ATAN_JBY256_TBL, 241) = {
(double2)(0x1.921fb00000000p-1, 0x1.5110b4611a626p-23),
};
DECLARE_TABLE(double2, TWO_TO_JBY64_EP, 64) = {
(double2)(0x1.0000000000000p+0, 0x0.0000000000000p+0),
(double2)(0x1.02c9a30000000p+0, 0x1.cef00c1dcdef9p-25),
(double2)(0x1.059b0d0000000p+0, 0x1.8ac2ba1d73e2ap-27),
(double2)(0x1.0874510000000p+0, 0x1.0eb37901186bep-25),
(double2)(0x1.0b55860000000p+0, 0x1.9f3121ec53172p-25),
(double2)(0x1.0e3ec30000000p+0, 0x1.69e8d10103a17p-27),
(double2)(0x1.11301d0000000p+0, 0x1.25b50a4ebbf1ap-32),
(double2)(0x1.1429aa0000000p+0, 0x1.d525bbf668203p-25),
(double2)(0x1.172b830000000p+0, 0x1.8faa2f5b9bef9p-25),
(double2)(0x1.1a35be0000000p+0, 0x1.6df96ea796d31p-25),
(double2)(0x1.1d48730000000p+0, 0x1.68b9aa7805b80p-28),
(double2)(0x1.2063b80000000p+0, 0x1.0c519ac771dd6p-25),
(double2)(0x1.2387a60000000p+0, 0x1.ceac470cd83f5p-25),
(double2)(0x1.26b4560000000p+0, 0x1.789f37495e99cp-26),
(double2)(0x1.29e9df0000000p+0, 0x1.47f7b84b09745p-26),
(double2)(0x1.2d285a0000000p+0, 0x1.b900c2d002475p-26),
(double2)(0x1.306fe00000000p+0, 0x1.4636e2a5bd1abp-25),
(double2)(0x1.33c08b0000000p+0, 0x1.320b7fa64e430p-27),
(double2)(0x1.371a730000000p+0, 0x1.ceaa72a9c5154p-26),
(double2)(0x1.3a7db30000000p+0, 0x1.3967fdba86f24p-26),
(double2)(0x1.3dea640000000p+0, 0x1.82468446b6824p-25),
(double2)(0x1.4160a20000000p+0, 0x1.f72e29f84325bp-28),
(double2)(0x1.44e0860000000p+0, 0x1.8624b40c4dbd0p-30),
(double2)(0x1.486a2b0000000p+0, 0x1.704f3404f068ep-26),
(double2)(0x1.4bfdad0000000p+0, 0x1.4d8a89c750e5ep-26),
(double2)(0x1.4f9b270000000p+0, 0x1.a74b29ab4cf62p-26),
(double2)(0x1.5342b50000000p+0, 0x1.a753e077c2a0fp-26),
(double2)(0x1.56f4730000000p+0, 0x1.ad49f699bb2c0p-26),
(double2)(0x1.5ab07d0000000p+0, 0x1.a90a852b19260p-25),
(double2)(0x1.5e76f10000000p+0, 0x1.6b48521ba6f93p-26),
(double2)(0x1.6247eb0000000p+0, 0x1.d2ac258f87d03p-31),
(double2)(0x1.6623880000000p+0, 0x1.2a91124893ecfp-27),
(double2)(0x1.6a09e60000000p+0, 0x1.9fcef32422cbep-26),
(double2)(0x1.6dfb230000000p+0, 0x1.8ca345de441c5p-25),
(double2)(0x1.71f75e0000000p+0, 0x1.1d8bee7ba46e1p-25),
(double2)(0x1.75feb50000000p+0, 0x1.9099f22fdba6ap-26),
(double2)(0x1.7a11470000000p+0, 0x1.f580c36bea881p-27),
(double2)(0x1.7e2f330000000p+0, 0x1.b3d398841740ap-26),
(double2)(0x1.8258990000000p+0, 0x1.2999c25159f11p-25),
(double2)(0x1.868d990000000p+0, 0x1.68925d901c83bp-25),
(double2)(0x1.8ace540000000p+0, 0x1.15506dadd3e2ap-27),
(double2)(0x1.8f1ae90000000p+0, 0x1.22aee6c57304ep-25),
(double2)(0x1.93737b0000000p+0, 0x1.9b8bc9e8a0387p-29),
(double2)(0x1.97d8290000000p+0, 0x1.fbc9c9f173d24p-25),
(double2)(0x1.9c49180000000p+0, 0x1.51f8480e3e235p-27),
(double2)(0x1.a0c6670000000p+0, 0x1.6bbcac96535b5p-25),
(double2)(0x1.a5503b0000000p+0, 0x1.1f12ae45a1224p-27),
(double2)(0x1.a9e6b50000000p+0, 0x1.5e7f6fd0fac90p-26),
(double2)(0x1.ae89f90000000p+0, 0x1.2b5a75abd0e69p-25),
(double2)(0x1.b33a2b0000000p+0, 0x1.09e2bf5ed7fa1p-25),
(double2)(0x1.b7f76f0000000p+0, 0x1.7daf237553d84p-27),
(double2)(0x1.bcc1e90000000p+0, 0x1.2f074891ee83dp-30),
(double2)(0x1.c199bd0000000p+0, 0x1.b0aa538444196p-25),
(double2)(0x1.c67f120000000p+0, 0x1.cafa29694426fp-25),
(double2)(0x1.cb720d0000000p+0, 0x1.9df20d22a0797p-25),
(double2)(0x1.d072d40000000p+0, 0x1.40f12f71a1e45p-25),
(double2)(0x1.d5818d0000000p+0, 0x1.9f7490e4bb40bp-25),
(double2)(0x1.da9e600000000p+0, 0x1.ed9942b84600dp-27),
(double2)(0x1.dfc9730000000p+0, 0x1.bdcdaf5cb4656p-27),
(double2)(0x1.e502ee0000000p+0, 0x1.e2cffd89cf44cp-26),
(double2)(0x1.ea4afa0000000p+0, 0x1.52486cc2c7b9dp-27),
(double2)(0x1.efa1be0000000p+0, 0x1.cc2b44eee3fa4p-25),
(double2)(0x1.f507650000000p+0, 0x1.6dc8a80ce9f09p-25),
(double2)(0x1.fa7c180000000p+0, 0x1.9e90d82e90a7ep-28)
};
TABLE_FUNCTION(double2, ATAN_JBY256_TBL, atan_jby256_tbl);
TABLE_FUNCTION(double2, TWO_TO_JBY64_EP, two_to_jby64_ep_tbl);
#endif // cl_khr_fp64

View File

@ -48,5 +48,5 @@ TABLE_FUNCTION_DECL(uint4, pibits_tbl);
TABLE_FUNCTION_DECL(double2, ln_tbl);
TABLE_FUNCTION_DECL(double2, atan_jby256_tbl);
TABLE_FUNCTION_DECL(double2, two_to_jby64_ep_tbl);
#endif // cl_khr_fp64