mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2024-12-15 04:00:56 +00:00
Implement exp2 using OpenCL C rather than using an intrinsic
Not all targets support the intrinsic, so it's better to have a generic implementation which does not use it. This exp2 implementation was ported from the AMD builtin library and has been tested with piglit, OpenCV, and the ocl conformance tests. llvm-svn: 237228
This commit is contained in:
parent
0cabcf211a
commit
d538fdc217
@ -1,6 +1,24 @@
|
||||
#undef exp2
|
||||
#define exp2 __clc_exp2
|
||||
/*
|
||||
* Copyright (c) 2014, 2015 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#define __CLC_FUNCTION __clc_exp2
|
||||
#define __CLC_INTRINSIC "llvm.exp2"
|
||||
#include <clc/math/unary_intrin.inc>
|
||||
#define __CLC_BODY <clc/math/exp2.inc>
|
||||
#include <clc/math/gentype.inc>
|
||||
|
23
libclc/generic/include/clc/math/exp2.inc
Normal file
23
libclc/generic/include/clc/math/exp2.inc
Normal file
@ -0,0 +1,23 @@
|
||||
/*
|
||||
* Copyright (c) 2014, 2015 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE exp2(__CLC_GENTYPE x);
|
@ -79,6 +79,8 @@ math/cospi.cl
|
||||
math/ep_log.cl
|
||||
math/erfc.cl
|
||||
math/exp.cl
|
||||
math/exp_helper.cl
|
||||
math/exp2.cl
|
||||
math/exp10.cl
|
||||
math/fmax.cl
|
||||
math/fmin.cl
|
||||
|
86
libclc/generic/lib/math/exp2.cl
Normal file
86
libclc/generic/lib/math/exp2.cl
Normal file
@ -0,0 +1,86 @@
|
||||
/*
|
||||
* Copyright (c) 2014,2015 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <clc/clc.h>
|
||||
|
||||
#include "math.h"
|
||||
#include "../clcmacro.h"
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF float exp2(float x) {
|
||||
|
||||
// Reduce x
|
||||
const float ln2HI = 0x1.62e300p-1f;
|
||||
const float ln2LO = 0x1.2fefa2p-17f;
|
||||
|
||||
float t = rint(x);
|
||||
int p = (int)t;
|
||||
float tt = x - t;
|
||||
float hi = tt * ln2HI;
|
||||
float lo = tt * ln2LO;
|
||||
|
||||
// Evaluate poly
|
||||
t = hi + lo;
|
||||
tt = t*t;
|
||||
float v = mad(tt,
|
||||
-mad(tt,
|
||||
mad(tt,
|
||||
mad(tt,
|
||||
mad(tt, 0x1.637698p-25f, -0x1.bbd41cp-20f),
|
||||
0x1.1566aap-14f),
|
||||
-0x1.6c16c2p-9f),
|
||||
0x1.555556p-3f),
|
||||
t);
|
||||
|
||||
float y = 1.0f - (((-lo) - MATH_DIVIDE(t * v, 2.0f - v)) - hi);
|
||||
|
||||
// Scale by 2^p
|
||||
float r = as_float(as_int(y) + (p << 23));
|
||||
|
||||
const float ulim = 128.0f;
|
||||
const float llim = -126.0f;
|
||||
|
||||
r = x < llim ? 0.0f : r;
|
||||
r = x < ulim ? r : as_float(0x7f800000);
|
||||
return isnan(x) ? x : r;
|
||||
}
|
||||
|
||||
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, exp2, float)
|
||||
|
||||
#ifdef cl_khr_fp64
|
||||
|
||||
#include "exp_helper.h"
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF double exp2(double x) {
|
||||
const double R_LN2 = 0x1.62e42fefa39efp-1; // ln(2)
|
||||
const double R_1_BY_64 = 1.0 / 64.0;
|
||||
|
||||
int n = convert_int(x * 64.0);
|
||||
double r = R_LN2 * fma(-R_1_BY_64, (double)n, x);
|
||||
return __clc_exp_helper(x, -1074.0, 1024.0, r, n);
|
||||
}
|
||||
|
||||
|
||||
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, exp2, double)
|
||||
|
||||
#endif
|
69
libclc/generic/lib/math/exp_helper.cl
Normal file
69
libclc/generic/lib/math/exp_helper.cl
Normal file
@ -0,0 +1,69 @@
|
||||
/*
|
||||
* Copyright (c) 2014, 2015 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <clc/clc.h>
|
||||
|
||||
#include "math.h"
|
||||
#include "tables.h"
|
||||
|
||||
#ifdef cl_khr_fp64
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
||||
|
||||
_CLC_DEF double __clc_exp_helper(double x, double x_min, double x_max, double r, int n) {
|
||||
|
||||
int j = n & 0x3f;
|
||||
int m = n >> 6;
|
||||
|
||||
// 6 term tail of Taylor expansion of e^r
|
||||
double z2 = r * fma(r,
|
||||
fma(r,
|
||||
fma(r,
|
||||
fma(r,
|
||||
fma(r, 0x1.6c16c16c16c17p-10, 0x1.1111111111111p-7),
|
||||
0x1.5555555555555p-5),
|
||||
0x1.5555555555555p-3),
|
||||
0x1.0000000000000p-1),
|
||||
1.0);
|
||||
|
||||
double2 tv = USE_TABLE(two_to_jby64_ep_tbl, j);
|
||||
z2 = fma(tv.s0 + tv.s1, z2, tv.s1) + tv.s0;
|
||||
|
||||
int small_value = (m < -1022) || ((m == -1022) && (z2 < 1.0));
|
||||
|
||||
int n1 = m >> 2;
|
||||
int n2 = m-n1;
|
||||
double z3= z2 * as_double(((long)n1 + 1023) << 52);
|
||||
z3 *= as_double(((long)n2 + 1023) << 52);
|
||||
|
||||
z2 = ldexp(z2, m);
|
||||
z2 = small_value ? z3: z2;
|
||||
|
||||
z2 = isnan(x) ? x : z2;
|
||||
|
||||
z2 = x > x_max ? as_double(PINFBITPATT_DP64) : z2;
|
||||
z2 = x < x_min ? 0.0 : z2;
|
||||
|
||||
return z2;
|
||||
}
|
||||
|
||||
#endif // cl_khr_fp64
|
29
libclc/generic/lib/math/exp_helper.h
Normal file
29
libclc/generic/lib/math/exp_helper.h
Normal file
@ -0,0 +1,29 @@
|
||||
/*
|
||||
* Copyright (c) 2014, 2015 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
#ifdef cl_khr_fp64
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
||||
_CLC_DECL double __clc_exp_helper(double x, double x_min, double x_max, double r, int n);
|
||||
|
||||
#endif
|
@ -634,6 +634,76 @@ DECLARE_TABLE(double2, ATAN_JBY256_TBL, 241) = {
|
||||
(double2)(0x1.921fb00000000p-1, 0x1.5110b4611a626p-23),
|
||||
};
|
||||
|
||||
DECLARE_TABLE(double2, TWO_TO_JBY64_EP, 64) = {
|
||||
(double2)(0x1.0000000000000p+0, 0x0.0000000000000p+0),
|
||||
(double2)(0x1.02c9a30000000p+0, 0x1.cef00c1dcdef9p-25),
|
||||
(double2)(0x1.059b0d0000000p+0, 0x1.8ac2ba1d73e2ap-27),
|
||||
(double2)(0x1.0874510000000p+0, 0x1.0eb37901186bep-25),
|
||||
(double2)(0x1.0b55860000000p+0, 0x1.9f3121ec53172p-25),
|
||||
(double2)(0x1.0e3ec30000000p+0, 0x1.69e8d10103a17p-27),
|
||||
(double2)(0x1.11301d0000000p+0, 0x1.25b50a4ebbf1ap-32),
|
||||
(double2)(0x1.1429aa0000000p+0, 0x1.d525bbf668203p-25),
|
||||
(double2)(0x1.172b830000000p+0, 0x1.8faa2f5b9bef9p-25),
|
||||
(double2)(0x1.1a35be0000000p+0, 0x1.6df96ea796d31p-25),
|
||||
(double2)(0x1.1d48730000000p+0, 0x1.68b9aa7805b80p-28),
|
||||
(double2)(0x1.2063b80000000p+0, 0x1.0c519ac771dd6p-25),
|
||||
(double2)(0x1.2387a60000000p+0, 0x1.ceac470cd83f5p-25),
|
||||
(double2)(0x1.26b4560000000p+0, 0x1.789f37495e99cp-26),
|
||||
(double2)(0x1.29e9df0000000p+0, 0x1.47f7b84b09745p-26),
|
||||
(double2)(0x1.2d285a0000000p+0, 0x1.b900c2d002475p-26),
|
||||
(double2)(0x1.306fe00000000p+0, 0x1.4636e2a5bd1abp-25),
|
||||
(double2)(0x1.33c08b0000000p+0, 0x1.320b7fa64e430p-27),
|
||||
(double2)(0x1.371a730000000p+0, 0x1.ceaa72a9c5154p-26),
|
||||
(double2)(0x1.3a7db30000000p+0, 0x1.3967fdba86f24p-26),
|
||||
(double2)(0x1.3dea640000000p+0, 0x1.82468446b6824p-25),
|
||||
(double2)(0x1.4160a20000000p+0, 0x1.f72e29f84325bp-28),
|
||||
(double2)(0x1.44e0860000000p+0, 0x1.8624b40c4dbd0p-30),
|
||||
(double2)(0x1.486a2b0000000p+0, 0x1.704f3404f068ep-26),
|
||||
(double2)(0x1.4bfdad0000000p+0, 0x1.4d8a89c750e5ep-26),
|
||||
(double2)(0x1.4f9b270000000p+0, 0x1.a74b29ab4cf62p-26),
|
||||
(double2)(0x1.5342b50000000p+0, 0x1.a753e077c2a0fp-26),
|
||||
(double2)(0x1.56f4730000000p+0, 0x1.ad49f699bb2c0p-26),
|
||||
(double2)(0x1.5ab07d0000000p+0, 0x1.a90a852b19260p-25),
|
||||
(double2)(0x1.5e76f10000000p+0, 0x1.6b48521ba6f93p-26),
|
||||
(double2)(0x1.6247eb0000000p+0, 0x1.d2ac258f87d03p-31),
|
||||
(double2)(0x1.6623880000000p+0, 0x1.2a91124893ecfp-27),
|
||||
(double2)(0x1.6a09e60000000p+0, 0x1.9fcef32422cbep-26),
|
||||
(double2)(0x1.6dfb230000000p+0, 0x1.8ca345de441c5p-25),
|
||||
(double2)(0x1.71f75e0000000p+0, 0x1.1d8bee7ba46e1p-25),
|
||||
(double2)(0x1.75feb50000000p+0, 0x1.9099f22fdba6ap-26),
|
||||
(double2)(0x1.7a11470000000p+0, 0x1.f580c36bea881p-27),
|
||||
(double2)(0x1.7e2f330000000p+0, 0x1.b3d398841740ap-26),
|
||||
(double2)(0x1.8258990000000p+0, 0x1.2999c25159f11p-25),
|
||||
(double2)(0x1.868d990000000p+0, 0x1.68925d901c83bp-25),
|
||||
(double2)(0x1.8ace540000000p+0, 0x1.15506dadd3e2ap-27),
|
||||
(double2)(0x1.8f1ae90000000p+0, 0x1.22aee6c57304ep-25),
|
||||
(double2)(0x1.93737b0000000p+0, 0x1.9b8bc9e8a0387p-29),
|
||||
(double2)(0x1.97d8290000000p+0, 0x1.fbc9c9f173d24p-25),
|
||||
(double2)(0x1.9c49180000000p+0, 0x1.51f8480e3e235p-27),
|
||||
(double2)(0x1.a0c6670000000p+0, 0x1.6bbcac96535b5p-25),
|
||||
(double2)(0x1.a5503b0000000p+0, 0x1.1f12ae45a1224p-27),
|
||||
(double2)(0x1.a9e6b50000000p+0, 0x1.5e7f6fd0fac90p-26),
|
||||
(double2)(0x1.ae89f90000000p+0, 0x1.2b5a75abd0e69p-25),
|
||||
(double2)(0x1.b33a2b0000000p+0, 0x1.09e2bf5ed7fa1p-25),
|
||||
(double2)(0x1.b7f76f0000000p+0, 0x1.7daf237553d84p-27),
|
||||
(double2)(0x1.bcc1e90000000p+0, 0x1.2f074891ee83dp-30),
|
||||
(double2)(0x1.c199bd0000000p+0, 0x1.b0aa538444196p-25),
|
||||
(double2)(0x1.c67f120000000p+0, 0x1.cafa29694426fp-25),
|
||||
(double2)(0x1.cb720d0000000p+0, 0x1.9df20d22a0797p-25),
|
||||
(double2)(0x1.d072d40000000p+0, 0x1.40f12f71a1e45p-25),
|
||||
(double2)(0x1.d5818d0000000p+0, 0x1.9f7490e4bb40bp-25),
|
||||
(double2)(0x1.da9e600000000p+0, 0x1.ed9942b84600dp-27),
|
||||
(double2)(0x1.dfc9730000000p+0, 0x1.bdcdaf5cb4656p-27),
|
||||
(double2)(0x1.e502ee0000000p+0, 0x1.e2cffd89cf44cp-26),
|
||||
(double2)(0x1.ea4afa0000000p+0, 0x1.52486cc2c7b9dp-27),
|
||||
(double2)(0x1.efa1be0000000p+0, 0x1.cc2b44eee3fa4p-25),
|
||||
(double2)(0x1.f507650000000p+0, 0x1.6dc8a80ce9f09p-25),
|
||||
(double2)(0x1.fa7c180000000p+0, 0x1.9e90d82e90a7ep-28)
|
||||
|
||||
};
|
||||
|
||||
|
||||
TABLE_FUNCTION(double2, ATAN_JBY256_TBL, atan_jby256_tbl);
|
||||
TABLE_FUNCTION(double2, TWO_TO_JBY64_EP, two_to_jby64_ep_tbl);
|
||||
|
||||
#endif // cl_khr_fp64
|
||||
|
@ -48,5 +48,5 @@ TABLE_FUNCTION_DECL(uint4, pibits_tbl);
|
||||
|
||||
TABLE_FUNCTION_DECL(double2, ln_tbl);
|
||||
TABLE_FUNCTION_DECL(double2, atan_jby256_tbl);
|
||||
|
||||
TABLE_FUNCTION_DECL(double2, two_to_jby64_ep_tbl);
|
||||
#endif // cl_khr_fp64
|
||||
|
Loading…
Reference in New Issue
Block a user