mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-02-16 07:49:43 +00:00
rootn: Use denormal path only
It's OK to either flush to 0 or return denormal result if the device does not support denormals. See sec 7.2 and 7.5.3 of OCL specs Use 0.0f explicitly intead of relying on GPU to flush it. Fixes CTS on carrizo and turks Signed-off-by: Jan Vesely <jan.vesely@rutgers.edu> Acked-by: Aaron Watry <awatry@gmail.com> Tested-by: Aaron Watry <awatry@gmail.com> llvm-svn: 332324
This commit is contained in:
parent
93bce5108b
commit
58fdb3b09a
@ -170,16 +170,7 @@ _CLC_DEF _CLC_OVERLOAD float __clc_rootn(float x, int ny)
|
||||
tv = USE_TABLE(exp_tbl_ep, j);
|
||||
|
||||
float expylogx = mad(tv.s0, poly, mad(tv.s1, poly, tv.s1)) + tv.s0;
|
||||
float sexpylogx;
|
||||
if (!__clc_fp32_subnormals_supported()) {
|
||||
int explg = ((as_uint(expylogx) & EXPBITS_SP32 >> 23) - 127);
|
||||
m = (23-(m + 149)) == 0 ? 1: m;
|
||||
uint mantissa = ((as_uint(expylogx) & MANTBITS_SP32)|IMPBIT_SP32) >> (23-(m + 149));
|
||||
sexpylogx = as_float(mantissa);
|
||||
} else {
|
||||
sexpylogx = expylogx * as_float(0x1 << (m + 149));
|
||||
}
|
||||
|
||||
float sexpylogx = __clc_fp32_subnormals_supported() ? expylogx * as_float(0x1 << (m + 149)) : 0.0f;
|
||||
|
||||
float texpylogx = as_float(as_int(expylogx) + m2);
|
||||
expylogx = m < -125 ? sexpylogx : texpylogx;
|
||||
|
Loading…
x
Reference in New Issue
Block a user