mirror of
https://github.com/openharmony/third_party_optimized_routines.git
synced 2026-07-01 06:42:05 -04:00
update openharmony 1.0.1
This commit is contained in:
@@ -0,0 +1,3 @@
|
||||
build/
|
||||
.DS_Store
|
||||
config.mk
|
||||
@@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 1999-2019, Arm Limited.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
@@ -0,0 +1,89 @@
|
||||
# Makefile - requires GNU make
|
||||
#
|
||||
# Copyright (c) 2018-2019, Arm Limited.
|
||||
# SPDX-License-Identifier: MIT
|
||||
|
||||
srcdir = .
|
||||
prefix = /usr
|
||||
bindir = $(prefix)/bin
|
||||
libdir = $(prefix)/lib
|
||||
includedir = $(prefix)/include
|
||||
|
||||
# Configure these in config.mk, do not make changes in this file.
|
||||
SUBS = math string networking
|
||||
HOST_CC = cc
|
||||
HOST_CFLAGS = -std=c99 -O2
|
||||
HOST_LDFLAGS =
|
||||
HOST_LDLIBS =
|
||||
EMULATOR =
|
||||
CPPFLAGS =
|
||||
CFLAGS = -std=c99 -O2
|
||||
CFLAGS_SHARED = -fPIC
|
||||
CFLAGS_ALL = -Ibuild/include $(CPPFLAGS) $(CFLAGS)
|
||||
LDFLAGS =
|
||||
LDLIBS =
|
||||
AR = $(CROSS_COMPILE)ar
|
||||
RANLIB = $(CROSS_COMPILE)ranlib
|
||||
INSTALL = install
|
||||
|
||||
all:
|
||||
|
||||
-include config.mk
|
||||
|
||||
$(foreach sub,$(SUBS),$(eval include $(srcdir)/$(sub)/Dir.mk))
|
||||
|
||||
# Required targets of subproject foo:
|
||||
# all-foo
|
||||
# check-foo
|
||||
# clean-foo
|
||||
# install-foo
|
||||
# Required make variables of subproject foo:
|
||||
# foo-files: Built files (all in build/).
|
||||
# Make variables used by subproject foo:
|
||||
# foo-...: Variables defined in foo/Dir.mk or by config.mk.
|
||||
|
||||
all: $(SUBS:%=all-%)
|
||||
|
||||
ALL_FILES = $(foreach sub,$(SUBS),$($(sub)-files))
|
||||
DIRS = $(sort $(patsubst %/,%,$(dir $(ALL_FILES))))
|
||||
$(ALL_FILES): | $(DIRS)
|
||||
$(DIRS):
|
||||
mkdir -p $@
|
||||
|
||||
$(filter %.os,$(ALL_FILES)): CFLAGS_ALL += $(CFLAGS_SHARED)
|
||||
|
||||
build/%.o: $(srcdir)/%.S
|
||||
$(CC) $(CFLAGS_ALL) -c -o $@ $<
|
||||
|
||||
build/%.o: $(srcdir)/%.c
|
||||
$(CC) $(CFLAGS_ALL) -c -o $@ $<
|
||||
|
||||
build/%.os: $(srcdir)/%.S
|
||||
$(CC) $(CFLAGS_ALL) -c -o $@ $<
|
||||
|
||||
build/%.os: $(srcdir)/%.c
|
||||
$(CC) $(CFLAGS_ALL) -c -o $@ $<
|
||||
|
||||
clean: $(SUBS:%=clean-%)
|
||||
rm -rf build
|
||||
|
||||
distclean: clean
|
||||
rm -f config.mk
|
||||
|
||||
$(DESTDIR)$(bindir)/%: build/bin/%
|
||||
$(INSTALL) -D $< $@
|
||||
|
||||
$(DESTDIR)$(libdir)/%.so: build/lib/%.so
|
||||
$(INSTALL) -D $< $@
|
||||
|
||||
$(DESTDIR)$(libdir)/%: build/lib/%
|
||||
$(INSTALL) -m 644 -D $< $@
|
||||
|
||||
$(DESTDIR)$(includedir)/%: build/include/%
|
||||
$(INSTALL) -m 644 -D $< $@
|
||||
|
||||
install: $(SUBS:%=install-%)
|
||||
|
||||
check: $(SUBS:%=check-%)
|
||||
|
||||
.PHONY: all clean distclean install check
|
||||
@@ -0,0 +1,56 @@
|
||||
Arm Optimized Routines
|
||||
----------------------
|
||||
|
||||
This repository contains implementations of library functions
|
||||
provided by Arm under MIT License (See LICENSE). Contributions
|
||||
to this project are accepted, but Contributors have to sign an
|
||||
Assignment Agreement, please follow the instructions in
|
||||
contributor-agreement.pdf. This is needed so upstreaming code
|
||||
to projects that require copyright assignment is possible.
|
||||
|
||||
Regular quarterly releases are tagged as vYY.MM, the latest
|
||||
release is v20.05.
|
||||
|
||||
Source code layout:
|
||||
|
||||
build/ - build directory (created by make).
|
||||
math/ - math subproject sources.
|
||||
math/include/ - math library public headers.
|
||||
math/test/ - math test and benchmark related sources.
|
||||
math/tools/ - tools used for designing the algorithms.
|
||||
networking/ - networking subproject sources.
|
||||
networking/include/ - networking library public headers.
|
||||
networking/test/ - networking test and benchmark related sources.
|
||||
string/ - string routines subproject sources.
|
||||
string/include/ - string library public headers.
|
||||
string/test/ - string test and benchmark related sources.
|
||||
|
||||
The steps to build the target libraries and run the tests:
|
||||
|
||||
cp config.mk.dist config.mk
|
||||
# edit config.mk if necessary ...
|
||||
make
|
||||
make check
|
||||
|
||||
Or building outside of the source directory:
|
||||
|
||||
ln -s path/to/src/Makefile Makefile
|
||||
cp path/to/src/config.mk.dist config.mk
|
||||
echo 'srcdir = path/to/src' >> config.mk
|
||||
# further edits to config.mk
|
||||
make
|
||||
make check
|
||||
|
||||
Or building and testing the math subproject only:
|
||||
|
||||
make all-math
|
||||
make check-math
|
||||
|
||||
The test system requires libmpfr and libmpc.
|
||||
For example on debian linux they can be installed as:
|
||||
|
||||
sudo apt-get install libmpfr-dev libmpc-dev
|
||||
|
||||
For cross build, CROSS_COMPILE should be set in config.mk and EMULATOR
|
||||
should be set for cross testing (e.g. using qemu-user or remote access
|
||||
to a target machine), see the examples in config.mk.dist.
|
||||
@@ -0,0 +1,11 @@
|
||||
[
|
||||
{
|
||||
"Name" : "optimized-routines",
|
||||
"License" : "MIT License",
|
||||
"License File" : "LICENSE",
|
||||
"Version Number" : "v20.05",
|
||||
"Owner" : "zhaotianyu9@huawei.com",
|
||||
"Upstream URL" : "https://www.mirbsd./mksh.ht://www.arm.com/;https://github.com/ARM-software/optimized-routines",
|
||||
"Description" : "Optimized implementations of various library functions for ARM architecture processors"
|
||||
}
|
||||
]
|
||||
@@ -1,36 +0,0 @@
|
||||
# third_party_optimized_routines
|
||||
|
||||
#### Description
|
||||
{**When you're done, you can delete the content in this README and update the file with details for others getting started with your repository**}
|
||||
|
||||
#### Software Architecture
|
||||
Software architecture description
|
||||
|
||||
#### Installation
|
||||
|
||||
1. xxxx
|
||||
2. xxxx
|
||||
3. xxxx
|
||||
|
||||
#### Instructions
|
||||
|
||||
1. xxxx
|
||||
2. xxxx
|
||||
3. xxxx
|
||||
|
||||
#### Contribution
|
||||
|
||||
1. Fork the repository
|
||||
2. Create Feat_xxx branch
|
||||
3. Commit your code
|
||||
4. Create Pull Request
|
||||
|
||||
|
||||
#### Gitee Feature
|
||||
|
||||
1. You can use Readme\_XXX.md to support different languages, such as Readme\_en.md, Readme\_zh.md
|
||||
2. Gitee blog [blog.gitee.com](https://blog.gitee.com)
|
||||
3. Explore open source project [https://gitee.com/explore](https://gitee.com/explore)
|
||||
4. The most valuable open source project [GVP](https://gitee.com/gvp)
|
||||
5. The manual of Gitee [https://gitee.com/help](https://gitee.com/help)
|
||||
6. The most popular members [https://gitee.com/gitee-stars/](https://gitee.com/gitee-stars/)
|
||||
@@ -1,39 +0,0 @@
|
||||
# third_party_optimized_routines
|
||||
|
||||
#### 介绍
|
||||
{**以下是 Gitee 平台说明,您可以替换此简介**
|
||||
Gitee 是 OSCHINA 推出的基于 Git 的代码托管平台(同时支持 SVN)。专为开发者提供稳定、高效、安全的云端软件开发协作平台
|
||||
无论是个人、团队、或是企业,都能够用 Gitee 实现代码托管、项目管理、协作开发。企业项目请看 [https://gitee.com/enterprises](https://gitee.com/enterprises)}
|
||||
|
||||
#### 软件架构
|
||||
软件架构说明
|
||||
|
||||
|
||||
#### 安装教程
|
||||
|
||||
1. xxxx
|
||||
2. xxxx
|
||||
3. xxxx
|
||||
|
||||
#### 使用说明
|
||||
|
||||
1. xxxx
|
||||
2. xxxx
|
||||
3. xxxx
|
||||
|
||||
#### 参与贡献
|
||||
|
||||
1. Fork 本仓库
|
||||
2. 新建 Feat_xxx 分支
|
||||
3. 提交代码
|
||||
4. 新建 Pull Request
|
||||
|
||||
|
||||
#### 特技
|
||||
|
||||
1. 使用 Readme\_XXX.md 来支持不同的语言,例如 Readme\_en.md, Readme\_zh.md
|
||||
2. Gitee 官方博客 [blog.gitee.com](https://blog.gitee.com)
|
||||
3. 你可以 [https://gitee.com/explore](https://gitee.com/explore) 这个地址来了解 Gitee 上的优秀开源项目
|
||||
4. [GVP](https://gitee.com/gvp) 全称是 Gitee 最有价值开源项目,是综合评定出的优秀开源项目
|
||||
5. Gitee 官方提供的使用手册 [https://gitee.com/help](https://gitee.com/help)
|
||||
6. Gitee 封面人物是一档用来展示 Gitee 会员风采的栏目 [https://gitee.com/gitee-stars/](https://gitee.com/gitee-stars/)
|
||||
@@ -0,0 +1,73 @@
|
||||
# Example config.mk
|
||||
#
|
||||
# Copyright (c) 2018-2019, Arm Limited.
|
||||
# SPDX-License-Identifier: MIT
|
||||
|
||||
# Subprojects to build
|
||||
SUBS = math string networking
|
||||
|
||||
# Target architecture: aarch64, arm or x86_64
|
||||
ARCH = aarch64
|
||||
|
||||
# Use for cross compilation with gcc.
|
||||
#CROSS_COMPILE = aarch64-none-linux-gnu-
|
||||
|
||||
# Compiler for the target
|
||||
CC = $(CROSS_COMPILE)gcc
|
||||
CFLAGS = -std=c99 -pipe -O3
|
||||
CFLAGS += -Wall -Wno-missing-braces
|
||||
CFLAGS += -Werror=implicit-function-declaration
|
||||
|
||||
# Used for test case generator that is executed on the host
|
||||
HOST_CC = gcc
|
||||
HOST_CFLAGS = -std=c99 -O2
|
||||
HOST_CFLAGS += -Wall -Wno-unused-function
|
||||
|
||||
# Enable debug info.
|
||||
HOST_CFLAGS += -g
|
||||
CFLAGS += -g
|
||||
|
||||
# Optimize the shared libraries on aarch64 assuming they fit in 1M.
|
||||
#CFLAGS_SHARED = -fPIC -mcmodel=tiny
|
||||
|
||||
# Enable MTE support.
|
||||
#CFLAGS += -march=armv8.5-a+memtag -DWANT_MTE_TEST=1
|
||||
|
||||
# Use with cross testing.
|
||||
#EMULATOR = qemu-aarch64-static
|
||||
#EMULATOR = sh -c 'scp $$1 user@host:/dir && ssh user@host /dir/"$$@"' --
|
||||
|
||||
# Additional flags for subprojects.
|
||||
math-cflags =
|
||||
math-ldlibs =
|
||||
math-ulpflags =
|
||||
math-testflags =
|
||||
string-cflags =
|
||||
networking-cflags =
|
||||
|
||||
# Use if mpfr is available on the target for ulp error checking.
|
||||
#math-ldlibs += -lmpfr -lgmp
|
||||
#math-cflags += -DUSE_MPFR
|
||||
|
||||
# Use with gcc.
|
||||
math-cflags += -frounding-math -fexcess-precision=standard -fno-stack-protector
|
||||
math-cflags += -ffp-contract=fast -fno-math-errno
|
||||
|
||||
# Use with clang.
|
||||
#math-cflags += -ffp-contract=fast
|
||||
|
||||
# Disable vector math code
|
||||
#math-cflags += -DWANT_VMATH=0
|
||||
|
||||
# Disable fenv checks
|
||||
#math-ulpflags = -q -f
|
||||
#math-testflags = -nostatus
|
||||
|
||||
# Remove GNU Property Notes from asm files.
|
||||
#string-cflags += -DWANT_GNU_PROPERTY=0
|
||||
|
||||
# Enable assertion checks.
|
||||
#networking-cflags += -DWANT_ASSERT
|
||||
|
||||
# Avoid auto-vectorization of scalar code and unroll loops
|
||||
networking-cflags += -O2 -fno-tree-vectorize -funroll-loops
|
||||
Binary file not shown.
+110
@@ -0,0 +1,110 @@
|
||||
# Makefile fragment - requires GNU make
|
||||
#
|
||||
# Copyright (c) 2019, Arm Limited.
|
||||
# SPDX-License-Identifier: MIT
|
||||
|
||||
S := $(srcdir)/math
|
||||
B := build/math
|
||||
|
||||
math-lib-srcs := $(wildcard $(S)/*.[cS])
|
||||
math-test-srcs := \
|
||||
$(S)/test/mathtest.c \
|
||||
$(S)/test/mathbench.c \
|
||||
$(S)/test/ulp.c \
|
||||
|
||||
math-test-host-srcs := $(wildcard $(S)/test/rtest/*.[cS])
|
||||
|
||||
math-includes := $(patsubst $(S)/%,build/%,$(wildcard $(S)/include/*.h))
|
||||
|
||||
math-libs := \
|
||||
build/lib/libmathlib.so \
|
||||
build/lib/libmathlib.a \
|
||||
|
||||
math-tools := \
|
||||
build/bin/mathtest \
|
||||
build/bin/mathbench \
|
||||
build/bin/mathbench_libc \
|
||||
build/bin/runulp.sh \
|
||||
build/bin/ulp \
|
||||
|
||||
math-host-tools := \
|
||||
build/bin/rtest \
|
||||
|
||||
math-lib-objs := $(patsubst $(S)/%,$(B)/%.o,$(basename $(math-lib-srcs)))
|
||||
math-test-objs := $(patsubst $(S)/%,$(B)/%.o,$(basename $(math-test-srcs)))
|
||||
math-host-objs := $(patsubst $(S)/%,$(B)/%.o,$(basename $(math-test-host-srcs)))
|
||||
math-target-objs := $(math-lib-objs) $(math-test-objs)
|
||||
math-objs := $(math-target-objs) $(math-target-objs:%.o=%.os) $(math-host-objs)
|
||||
|
||||
math-files := \
|
||||
$(math-objs) \
|
||||
$(math-libs) \
|
||||
$(math-tools) \
|
||||
$(math-host-tools) \
|
||||
$(math-includes) \
|
||||
|
||||
all-math: $(math-libs) $(math-tools) $(math-includes)
|
||||
|
||||
$(math-objs): $(math-includes)
|
||||
$(math-objs): CFLAGS_ALL += $(math-cflags)
|
||||
$(B)/test/mathtest.o: CFLAGS_ALL += -fmath-errno
|
||||
$(math-host-objs): CC = $(HOST_CC)
|
||||
$(math-host-objs): CFLAGS_ALL = $(HOST_CFLAGS)
|
||||
|
||||
$(B)/test/ulp.o: $(S)/test/ulp.h
|
||||
|
||||
build/lib/libmathlib.so: $(math-lib-objs:%.o=%.os)
|
||||
$(CC) $(CFLAGS_ALL) $(LDFLAGS) -shared -o $@ $^
|
||||
|
||||
build/lib/libmathlib.a: $(math-lib-objs)
|
||||
rm -f $@
|
||||
$(AR) rc $@ $^
|
||||
$(RANLIB) $@
|
||||
|
||||
$(math-host-tools): HOST_LDLIBS += -lm -lmpfr -lmpc
|
||||
$(math-tools): LDLIBS += $(math-ldlibs) -lm
|
||||
|
||||
build/bin/rtest: $(math-host-objs)
|
||||
$(HOST_CC) $(HOST_CFLAGS) $(HOST_LDFLAGS) -o $@ $^ $(HOST_LDLIBS)
|
||||
|
||||
build/bin/mathtest: $(B)/test/mathtest.o build/lib/libmathlib.a
|
||||
$(CC) $(CFLAGS_ALL) $(LDFLAGS) -static -o $@ $^ $(LDLIBS)
|
||||
|
||||
build/bin/mathbench: $(B)/test/mathbench.o build/lib/libmathlib.a
|
||||
$(CC) $(CFLAGS_ALL) $(LDFLAGS) -static -o $@ $^ $(LDLIBS)
|
||||
|
||||
# This is not ideal, but allows custom symbols in mathbench to get resolved.
|
||||
build/bin/mathbench_libc: $(B)/test/mathbench.o build/lib/libmathlib.a
|
||||
$(CC) $(CFLAGS_ALL) $(LDFLAGS) -static -o $@ $< $(LDLIBS) -lc build/lib/libmathlib.a -lm
|
||||
|
||||
build/bin/ulp: $(B)/test/ulp.o build/lib/libmathlib.a
|
||||
$(CC) $(CFLAGS_ALL) $(LDFLAGS) -static -o $@ $^ $(LDLIBS)
|
||||
|
||||
build/include/%.h: $(S)/include/%.h
|
||||
cp $< $@
|
||||
|
||||
build/bin/%.sh: $(S)/test/%.sh
|
||||
cp $< $@
|
||||
|
||||
math-tests := $(wildcard $(S)/test/testcases/directed/*.tst)
|
||||
math-rtests := $(wildcard $(S)/test/testcases/random/*.tst)
|
||||
|
||||
check-math-test: $(math-tools)
|
||||
cat $(math-tests) | $(EMULATOR) build/bin/mathtest $(math-testflags)
|
||||
|
||||
check-math-rtest: $(math-host-tools) $(math-tools)
|
||||
cat $(math-rtests) | build/bin/rtest | $(EMULATOR) build/bin/mathtest $(math-testflags)
|
||||
|
||||
check-math-ulp: $(math-tools)
|
||||
ULPFLAGS="$(math-ulpflags)" build/bin/runulp.sh $(EMULATOR)
|
||||
|
||||
check-math: check-math-test check-math-rtest check-math-ulp
|
||||
|
||||
install-math: \
|
||||
$(math-libs:build/lib/%=$(DESTDIR)$(libdir)/%) \
|
||||
$(math-includes:build/include/%=$(DESTDIR)$(includedir)/%)
|
||||
|
||||
clean-math:
|
||||
rm -f $(math-files)
|
||||
|
||||
.PHONY: all-math check-math-test check-math-rtest check-math-ulp check-math install-math clean-math
|
||||
+63
@@ -0,0 +1,63 @@
|
||||
/*
|
||||
* Single-precision cos function.
|
||||
*
|
||||
* Copyright (c) 2018, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <math.h>
|
||||
#include "math_config.h"
|
||||
#include "sincosf.h"
|
||||
|
||||
/* Fast cosf implementation. Worst-case ULP is 0.5607, maximum relative
|
||||
error is 0.5303 * 2^-23. A single-step range reduction is used for
|
||||
small values. Large inputs have their range reduced using fast integer
|
||||
arithmetic. */
|
||||
float
|
||||
cosf (float y)
|
||||
{
|
||||
double x = y;
|
||||
double s;
|
||||
int n;
|
||||
const sincos_t *p = &__sincosf_table[0];
|
||||
|
||||
if (abstop12 (y) < abstop12 (pio4))
|
||||
{
|
||||
double x2 = x * x;
|
||||
|
||||
if (unlikely (abstop12 (y) < abstop12 (0x1p-12f)))
|
||||
return 1.0f;
|
||||
|
||||
return sinf_poly (x, x2, p, 1);
|
||||
}
|
||||
else if (likely (abstop12 (y) < abstop12 (120.0f)))
|
||||
{
|
||||
x = reduce_fast (x, p, &n);
|
||||
|
||||
/* Setup the signs for sin and cos. */
|
||||
s = p->sign[n & 3];
|
||||
|
||||
if (n & 2)
|
||||
p = &__sincosf_table[1];
|
||||
|
||||
return sinf_poly (x * s, x * x, p, n ^ 1);
|
||||
}
|
||||
else if (abstop12 (y) < abstop12 (INFINITY))
|
||||
{
|
||||
uint32_t xi = asuint (y);
|
||||
int sign = xi >> 31;
|
||||
|
||||
x = reduce_large (xi, &n);
|
||||
|
||||
/* Setup signs for sin and cos - include original sign. */
|
||||
s = p->sign[(n + sign) & 3];
|
||||
|
||||
if ((n + sign) & 2)
|
||||
p = &__sincosf_table[1];
|
||||
|
||||
return sinf_poly (x * s, x * x, p, n ^ 1);
|
||||
}
|
||||
else
|
||||
return __math_invalidf (y);
|
||||
}
|
||||
+176
@@ -0,0 +1,176 @@
|
||||
/*
|
||||
* Double-precision e^x function.
|
||||
*
|
||||
* Copyright (c) 2018, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include <float.h>
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
#include "math_config.h"
|
||||
|
||||
#define N (1 << EXP_TABLE_BITS)
|
||||
#define InvLn2N __exp_data.invln2N
|
||||
#define NegLn2hiN __exp_data.negln2hiN
|
||||
#define NegLn2loN __exp_data.negln2loN
|
||||
#define Shift __exp_data.shift
|
||||
#define T __exp_data.tab
|
||||
#define C2 __exp_data.poly[5 - EXP_POLY_ORDER]
|
||||
#define C3 __exp_data.poly[6 - EXP_POLY_ORDER]
|
||||
#define C4 __exp_data.poly[7 - EXP_POLY_ORDER]
|
||||
#define C5 __exp_data.poly[8 - EXP_POLY_ORDER]
|
||||
#define C6 __exp_data.poly[9 - EXP_POLY_ORDER]
|
||||
|
||||
/* Handle cases that may overflow or underflow when computing the result that
|
||||
is scale*(1+TMP) without intermediate rounding. The bit representation of
|
||||
scale is in SBITS, however it has a computed exponent that may have
|
||||
overflown into the sign bit so that needs to be adjusted before using it as
|
||||
a double. (int32_t)KI is the k used in the argument reduction and exponent
|
||||
adjustment of scale, positive k here means the result may overflow and
|
||||
negative k means the result may underflow. */
|
||||
static inline double
|
||||
specialcase (double_t tmp, uint64_t sbits, uint64_t ki)
|
||||
{
|
||||
double_t scale, y;
|
||||
|
||||
if ((ki & 0x80000000) == 0)
|
||||
{
|
||||
/* k > 0, the exponent of scale might have overflowed by <= 460. */
|
||||
sbits -= 1009ull << 52;
|
||||
scale = asdouble (sbits);
|
||||
y = 0x1p1009 * (scale + scale * tmp);
|
||||
return check_oflow (eval_as_double (y));
|
||||
}
|
||||
/* k < 0, need special care in the subnormal range. */
|
||||
sbits += 1022ull << 52;
|
||||
scale = asdouble (sbits);
|
||||
y = scale + scale * tmp;
|
||||
if (y < 1.0)
|
||||
{
|
||||
/* Round y to the right precision before scaling it into the subnormal
|
||||
range to avoid double rounding that can cause 0.5+E/2 ulp error where
|
||||
E is the worst-case ulp error outside the subnormal range. So this
|
||||
is only useful if the goal is better than 1 ulp worst-case error. */
|
||||
double_t hi, lo;
|
||||
lo = scale - y + scale * tmp;
|
||||
hi = 1.0 + y;
|
||||
lo = 1.0 - hi + y + lo;
|
||||
y = eval_as_double (hi + lo) - 1.0;
|
||||
/* Avoid -0.0 with downward rounding. */
|
||||
if (WANT_ROUNDING && y == 0.0)
|
||||
y = 0.0;
|
||||
/* The underflow exception needs to be signaled explicitly. */
|
||||
force_eval_double (opt_barrier_double (0x1p-1022) * 0x1p-1022);
|
||||
}
|
||||
y = 0x1p-1022 * y;
|
||||
return check_uflow (eval_as_double (y));
|
||||
}
|
||||
|
||||
/* Top 12 bits of a double (sign and exponent bits). */
|
||||
static inline uint32_t
|
||||
top12 (double x)
|
||||
{
|
||||
return asuint64 (x) >> 52;
|
||||
}
|
||||
|
||||
/* Computes exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.
|
||||
If hastail is 0 then xtail is assumed to be 0 too. */
|
||||
static inline double
|
||||
exp_inline (double x, double xtail, int hastail)
|
||||
{
|
||||
uint32_t abstop;
|
||||
uint64_t ki, idx, top, sbits;
|
||||
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
|
||||
double_t kd, z, r, r2, scale, tail, tmp;
|
||||
|
||||
abstop = top12 (x) & 0x7ff;
|
||||
if (unlikely (abstop - top12 (0x1p-54) >= top12 (512.0) - top12 (0x1p-54)))
|
||||
{
|
||||
if (abstop - top12 (0x1p-54) >= 0x80000000)
|
||||
/* Avoid spurious underflow for tiny x. */
|
||||
/* Note: 0 is common input. */
|
||||
return WANT_ROUNDING ? 1.0 + x : 1.0;
|
||||
if (abstop >= top12 (1024.0))
|
||||
{
|
||||
if (asuint64 (x) == asuint64 (-INFINITY))
|
||||
return 0.0;
|
||||
if (abstop >= top12 (INFINITY))
|
||||
return 1.0 + x;
|
||||
if (asuint64 (x) >> 63)
|
||||
return __math_uflow (0);
|
||||
else
|
||||
return __math_oflow (0);
|
||||
}
|
||||
/* Large x is special cased below. */
|
||||
abstop = 0;
|
||||
}
|
||||
|
||||
/* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */
|
||||
/* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */
|
||||
z = InvLn2N * x;
|
||||
#if TOINT_INTRINSICS
|
||||
kd = roundtoint (z);
|
||||
ki = converttoint (z);
|
||||
#elif EXP_USE_TOINT_NARROW
|
||||
/* z - kd is in [-0.5-2^-16, 0.5] in all rounding modes. */
|
||||
kd = eval_as_double (z + Shift);
|
||||
ki = asuint64 (kd) >> 16;
|
||||
kd = (double_t) (int32_t) ki;
|
||||
#else
|
||||
/* z - kd is in [-1, 1] in non-nearest rounding modes. */
|
||||
kd = eval_as_double (z + Shift);
|
||||
ki = asuint64 (kd);
|
||||
kd -= Shift;
|
||||
#endif
|
||||
r = x + kd * NegLn2hiN + kd * NegLn2loN;
|
||||
/* The code assumes 2^-200 < |xtail| < 2^-8/N. */
|
||||
if (hastail)
|
||||
r += xtail;
|
||||
/* 2^(k/N) ~= scale * (1 + tail). */
|
||||
idx = 2 * (ki % N);
|
||||
top = ki << (52 - EXP_TABLE_BITS);
|
||||
tail = asdouble (T[idx]);
|
||||
/* This is only a valid scale when -1023*N < k < 1024*N. */
|
||||
sbits = T[idx + 1] + top;
|
||||
/* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */
|
||||
/* Evaluation is optimized assuming superscalar pipelined execution. */
|
||||
r2 = r * r;
|
||||
/* Without fma the worst case error is 0.25/N ulp larger. */
|
||||
/* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */
|
||||
#if EXP_POLY_ORDER == 4
|
||||
tmp = tail + r + r2 * C2 + r * r2 * (C3 + r * C4);
|
||||
#elif EXP_POLY_ORDER == 5
|
||||
tmp = tail + r + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
|
||||
#elif EXP_POLY_ORDER == 6
|
||||
tmp = tail + r + r2 * (0.5 + r * C3) + r2 * r2 * (C4 + r * C5 + r2 * C6);
|
||||
#endif
|
||||
if (unlikely (abstop == 0))
|
||||
return specialcase (tmp, sbits, ki);
|
||||
scale = asdouble (sbits);
|
||||
/* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
|
||||
is no spurious underflow here even without fma. */
|
||||
return eval_as_double (scale + scale * tmp);
|
||||
}
|
||||
|
||||
double
|
||||
exp (double x)
|
||||
{
|
||||
return exp_inline (x, 0, 0);
|
||||
}
|
||||
|
||||
/* May be useful for implementing pow where more than double
|
||||
precision input is needed. */
|
||||
double
|
||||
__exp_dd (double x, double xtail)
|
||||
{
|
||||
return exp_inline (x, xtail, 1);
|
||||
}
|
||||
#if USE_GLIBC_ABI
|
||||
strong_alias (exp, __exp_finite)
|
||||
hidden_alias (exp, __ieee754_exp)
|
||||
hidden_alias (__exp_dd, __exp1)
|
||||
# if LDBL_MANT_DIG == 53
|
||||
long double expl (long double x) { return exp (x); }
|
||||
# endif
|
||||
#endif
|
||||
+143
@@ -0,0 +1,143 @@
|
||||
/*
|
||||
* Double-precision 2^x function.
|
||||
*
|
||||
* Copyright (c) 2018, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include <float.h>
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
#include "math_config.h"
|
||||
|
||||
#define N (1 << EXP_TABLE_BITS)
|
||||
#define Shift __exp_data.exp2_shift
|
||||
#define T __exp_data.tab
|
||||
#define C1 __exp_data.exp2_poly[0]
|
||||
#define C2 __exp_data.exp2_poly[1]
|
||||
#define C3 __exp_data.exp2_poly[2]
|
||||
#define C4 __exp_data.exp2_poly[3]
|
||||
#define C5 __exp_data.exp2_poly[4]
|
||||
#define C6 __exp_data.exp2_poly[5]
|
||||
|
||||
/* Handle cases that may overflow or underflow when computing the result that
|
||||
is scale*(1+TMP) without intermediate rounding. The bit representation of
|
||||
scale is in SBITS, however it has a computed exponent that may have
|
||||
overflown into the sign bit so that needs to be adjusted before using it as
|
||||
a double. (int32_t)KI is the k used in the argument reduction and exponent
|
||||
adjustment of scale, positive k here means the result may overflow and
|
||||
negative k means the result may underflow. */
|
||||
static inline double
|
||||
specialcase (double_t tmp, uint64_t sbits, uint64_t ki)
|
||||
{
|
||||
double_t scale, y;
|
||||
|
||||
if ((ki & 0x80000000) == 0)
|
||||
{
|
||||
/* k > 0, the exponent of scale might have overflowed by 1. */
|
||||
sbits -= 1ull << 52;
|
||||
scale = asdouble (sbits);
|
||||
y = 2 * (scale + scale * tmp);
|
||||
return check_oflow (eval_as_double (y));
|
||||
}
|
||||
/* k < 0, need special care in the subnormal range. */
|
||||
sbits += 1022ull << 52;
|
||||
scale = asdouble (sbits);
|
||||
y = scale + scale * tmp;
|
||||
if (y < 1.0)
|
||||
{
|
||||
/* Round y to the right precision before scaling it into the subnormal
|
||||
range to avoid double rounding that can cause 0.5+E/2 ulp error where
|
||||
E is the worst-case ulp error outside the subnormal range. So this
|
||||
is only useful if the goal is better than 1 ulp worst-case error. */
|
||||
double_t hi, lo;
|
||||
lo = scale - y + scale * tmp;
|
||||
hi = 1.0 + y;
|
||||
lo = 1.0 - hi + y + lo;
|
||||
y = eval_as_double (hi + lo) - 1.0;
|
||||
/* Avoid -0.0 with downward rounding. */
|
||||
if (WANT_ROUNDING && y == 0.0)
|
||||
y = 0.0;
|
||||
/* The underflow exception needs to be signaled explicitly. */
|
||||
force_eval_double (opt_barrier_double (0x1p-1022) * 0x1p-1022);
|
||||
}
|
||||
y = 0x1p-1022 * y;
|
||||
return check_uflow (eval_as_double (y));
|
||||
}
|
||||
|
||||
/* Top 12 bits of a double (sign and exponent bits). */
|
||||
static inline uint32_t
|
||||
top12 (double x)
|
||||
{
|
||||
return asuint64 (x) >> 52;
|
||||
}
|
||||
|
||||
double
|
||||
exp2 (double x)
|
||||
{
|
||||
uint32_t abstop;
|
||||
uint64_t ki, idx, top, sbits;
|
||||
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
|
||||
double_t kd, r, r2, scale, tail, tmp;
|
||||
|
||||
abstop = top12 (x) & 0x7ff;
|
||||
if (unlikely (abstop - top12 (0x1p-54) >= top12 (512.0) - top12 (0x1p-54)))
|
||||
{
|
||||
if (abstop - top12 (0x1p-54) >= 0x80000000)
|
||||
/* Avoid spurious underflow for tiny x. */
|
||||
/* Note: 0 is common input. */
|
||||
return WANT_ROUNDING ? 1.0 + x : 1.0;
|
||||
if (abstop >= top12 (1024.0))
|
||||
{
|
||||
if (asuint64 (x) == asuint64 (-INFINITY))
|
||||
return 0.0;
|
||||
if (abstop >= top12 (INFINITY))
|
||||
return 1.0 + x;
|
||||
if (!(asuint64 (x) >> 63))
|
||||
return __math_oflow (0);
|
||||
else if (asuint64 (x) >= asuint64 (-1075.0))
|
||||
return __math_uflow (0);
|
||||
}
|
||||
if (2 * asuint64 (x) > 2 * asuint64 (928.0))
|
||||
/* Large x is special cased below. */
|
||||
abstop = 0;
|
||||
}
|
||||
|
||||
/* exp2(x) = 2^(k/N) * 2^r, with 2^r in [2^(-1/2N),2^(1/2N)]. */
|
||||
/* x = k/N + r, with int k and r in [-1/2N, 1/2N]. */
|
||||
kd = eval_as_double (x + Shift);
|
||||
ki = asuint64 (kd); /* k. */
|
||||
kd -= Shift; /* k/N for int k. */
|
||||
r = x - kd;
|
||||
/* 2^(k/N) ~= scale * (1 + tail). */
|
||||
idx = 2 * (ki % N);
|
||||
top = ki << (52 - EXP_TABLE_BITS);
|
||||
tail = asdouble (T[idx]);
|
||||
/* This is only a valid scale when -1023*N < k < 1024*N. */
|
||||
sbits = T[idx + 1] + top;
|
||||
/* exp2(x) = 2^(k/N) * 2^r ~= scale + scale * (tail + 2^r - 1). */
|
||||
/* Evaluation is optimized assuming superscalar pipelined execution. */
|
||||
r2 = r * r;
|
||||
/* Without fma the worst case error is 0.5/N ulp larger. */
|
||||
/* Worst case error is less than 0.5+0.86/N+(abs poly error * 2^53) ulp. */
|
||||
#if EXP2_POLY_ORDER == 4
|
||||
tmp = tail + r * C1 + r2 * C2 + r * r2 * (C3 + r * C4);
|
||||
#elif EXP2_POLY_ORDER == 5
|
||||
tmp = tail + r * C1 + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
|
||||
#elif EXP2_POLY_ORDER == 6
|
||||
tmp = tail + r * C1 + r2 * (0.5 + r * C3) + r2 * r2 * (C4 + r * C5 + r2 * C6);
|
||||
#endif
|
||||
if (unlikely (abstop == 0))
|
||||
return specialcase (tmp, sbits, ki);
|
||||
scale = asdouble (sbits);
|
||||
/* Note: tmp == 0 or |tmp| > 2^-65 and scale > 2^-928, so there
|
||||
is no spurious underflow here even without fma. */
|
||||
return eval_as_double (scale + scale * tmp);
|
||||
}
|
||||
#if USE_GLIBC_ABI
|
||||
strong_alias (exp2, __exp2_finite)
|
||||
hidden_alias (exp2, __ieee754_exp2)
|
||||
# if LDBL_MANT_DIG == 53
|
||||
long double exp2l (long double x) { return exp2 (x); }
|
||||
# endif
|
||||
#endif
|
||||
@@ -0,0 +1,80 @@
|
||||
/*
|
||||
* Single-precision 2^x function.
|
||||
*
|
||||
* Copyright (c) 2017-2018, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
#include "math_config.h"
|
||||
|
||||
/*
|
||||
EXP2F_TABLE_BITS = 5
|
||||
EXP2F_POLY_ORDER = 3
|
||||
|
||||
ULP error: 0.502 (nearest rounding.)
|
||||
Relative error: 1.69 * 2^-34 in [-1/64, 1/64] (before rounding.)
|
||||
Wrong count: 168353 (all nearest rounding wrong results with fma.)
|
||||
Non-nearest ULP error: 1 (rounded ULP error)
|
||||
*/
|
||||
|
||||
#define N (1 << EXP2F_TABLE_BITS)
|
||||
#define T __exp2f_data.tab
|
||||
#define C __exp2f_data.poly
|
||||
#define SHIFT __exp2f_data.shift_scaled
|
||||
|
||||
static inline uint32_t
|
||||
top12 (float x)
|
||||
{
|
||||
return asuint (x) >> 20;
|
||||
}
|
||||
|
||||
float
|
||||
exp2f (float x)
|
||||
{
|
||||
uint32_t abstop;
|
||||
uint64_t ki, t;
|
||||
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
|
||||
double_t kd, xd, z, r, r2, y, s;
|
||||
|
||||
xd = (double_t) x;
|
||||
abstop = top12 (x) & 0x7ff;
|
||||
if (unlikely (abstop >= top12 (128.0f)))
|
||||
{
|
||||
/* |x| >= 128 or x is nan. */
|
||||
if (asuint (x) == asuint (-INFINITY))
|
||||
return 0.0f;
|
||||
if (abstop >= top12 (INFINITY))
|
||||
return x + x;
|
||||
if (x > 0.0f)
|
||||
return __math_oflowf (0);
|
||||
if (x <= -150.0f)
|
||||
return __math_uflowf (0);
|
||||
#if WANT_ERRNO_UFLOW
|
||||
if (x < -149.0f)
|
||||
return __math_may_uflowf (0);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* x = k/N + r with r in [-1/(2N), 1/(2N)] and int k. */
|
||||
kd = eval_as_double (xd + SHIFT);
|
||||
ki = asuint64 (kd);
|
||||
kd -= SHIFT; /* k/N for int k. */
|
||||
r = xd - kd;
|
||||
|
||||
/* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
|
||||
t = T[ki % N];
|
||||
t += ki << (52 - EXP2F_TABLE_BITS);
|
||||
s = asdouble (t);
|
||||
z = C[0] * r + C[1];
|
||||
r2 = r * r;
|
||||
y = C[2] * r + 1;
|
||||
y = z * r2 + y;
|
||||
y = y * s;
|
||||
return eval_as_float (y);
|
||||
}
|
||||
#if USE_GLIBC_ABI
|
||||
strong_alias (exp2f, __exp2f_finite)
|
||||
hidden_alias (exp2f, __ieee754_exp2f)
|
||||
#endif
|
||||
@@ -0,0 +1,78 @@
|
||||
/*
|
||||
* Shared data between expf, exp2f and powf.
|
||||
*
|
||||
* Copyright (c) 2017-2018, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "math_config.h"
|
||||
|
||||
#define N (1 << EXP2F_TABLE_BITS)
|
||||
|
||||
const struct exp2f_data __exp2f_data = {
|
||||
/* tab[i] = uint(2^(i/N)) - (i << 52-BITS)
|
||||
used for computing 2^(k/N) for an int |k| < 150 N as
|
||||
double(tab[k%N] + (k << 52-BITS)) */
|
||||
.tab = {
|
||||
#if N == 8
|
||||
0x3ff0000000000000, 0x3fef72b83c7d517b, 0x3fef06fe0a31b715, 0x3feebfdad5362a27,
|
||||
0x3feea09e667f3bcd, 0x3feeace5422aa0db, 0x3feee89f995ad3ad, 0x3fef5818dcfba487,
|
||||
#elif N == 16
|
||||
0x3ff0000000000000, 0x3fefb5586cf9890f, 0x3fef72b83c7d517b, 0x3fef387a6e756238,
|
||||
0x3fef06fe0a31b715, 0x3feedea64c123422, 0x3feebfdad5362a27, 0x3feeab07dd485429,
|
||||
0x3feea09e667f3bcd, 0x3feea11473eb0187, 0x3feeace5422aa0db, 0x3feec49182a3f090,
|
||||
0x3feee89f995ad3ad, 0x3fef199bdd85529c, 0x3fef5818dcfba487, 0x3fefa4afa2a490da,
|
||||
#elif N == 32
|
||||
0x3ff0000000000000, 0x3fefd9b0d3158574, 0x3fefb5586cf9890f, 0x3fef9301d0125b51,
|
||||
0x3fef72b83c7d517b, 0x3fef54873168b9aa, 0x3fef387a6e756238, 0x3fef1e9df51fdee1,
|
||||
0x3fef06fe0a31b715, 0x3feef1a7373aa9cb, 0x3feedea64c123422, 0x3feece086061892d,
|
||||
0x3feebfdad5362a27, 0x3feeb42b569d4f82, 0x3feeab07dd485429, 0x3feea47eb03a5585,
|
||||
0x3feea09e667f3bcd, 0x3fee9f75e8ec5f74, 0x3feea11473eb0187, 0x3feea589994cce13,
|
||||
0x3feeace5422aa0db, 0x3feeb737b0cdc5e5, 0x3feec49182a3f090, 0x3feed503b23e255d,
|
||||
0x3feee89f995ad3ad, 0x3feeff76f2fb5e47, 0x3fef199bdd85529c, 0x3fef3720dcef9069,
|
||||
0x3fef5818dcfba487, 0x3fef7c97337b9b5f, 0x3fefa4afa2a490da, 0x3fefd0765b6e4540,
|
||||
#elif N == 64
|
||||
0x3ff0000000000000, 0x3fefec9a3e778061, 0x3fefd9b0d3158574, 0x3fefc74518759bc8,
|
||||
0x3fefb5586cf9890f, 0x3fefa3ec32d3d1a2, 0x3fef9301d0125b51, 0x3fef829aaea92de0,
|
||||
0x3fef72b83c7d517b, 0x3fef635beb6fcb75, 0x3fef54873168b9aa, 0x3fef463b88628cd6,
|
||||
0x3fef387a6e756238, 0x3fef2b4565e27cdd, 0x3fef1e9df51fdee1, 0x3fef1285a6e4030b,
|
||||
0x3fef06fe0a31b715, 0x3feefc08b26416ff, 0x3feef1a7373aa9cb, 0x3feee7db34e59ff7,
|
||||
0x3feedea64c123422, 0x3feed60a21f72e2a, 0x3feece086061892d, 0x3feec6a2b5c13cd0,
|
||||
0x3feebfdad5362a27, 0x3feeb9b2769d2ca7, 0x3feeb42b569d4f82, 0x3feeaf4736b527da,
|
||||
0x3feeab07dd485429, 0x3feea76f15ad2148, 0x3feea47eb03a5585, 0x3feea23882552225,
|
||||
0x3feea09e667f3bcd, 0x3fee9fb23c651a2f, 0x3fee9f75e8ec5f74, 0x3fee9feb564267c9,
|
||||
0x3feea11473eb0187, 0x3feea2f336cf4e62, 0x3feea589994cce13, 0x3feea8d99b4492ed,
|
||||
0x3feeace5422aa0db, 0x3feeb1ae99157736, 0x3feeb737b0cdc5e5, 0x3feebd829fde4e50,
|
||||
0x3feec49182a3f090, 0x3feecc667b5de565, 0x3feed503b23e255d, 0x3feede6b5579fdbf,
|
||||
0x3feee89f995ad3ad, 0x3feef3a2b84f15fb, 0x3feeff76f2fb5e47, 0x3fef0c1e904bc1d2,
|
||||
0x3fef199bdd85529c, 0x3fef27f12e57d14b, 0x3fef3720dcef9069, 0x3fef472d4a07897c,
|
||||
0x3fef5818dcfba487, 0x3fef69e603db3285, 0x3fef7c97337b9b5f, 0x3fef902ee78b3ff6,
|
||||
0x3fefa4afa2a490da, 0x3fefba1bee615a27, 0x3fefd0765b6e4540, 0x3fefe7c1819e90d8,
|
||||
#endif
|
||||
},
|
||||
.shift_scaled = 0x1.8p+52 / N,
|
||||
.poly = {
|
||||
#if N == 8
|
||||
0x1.c6a00335106e2p-5, 0x1.ec0c313449f55p-3, 0x1.62e431111f69fp-1,
|
||||
#elif N == 16
|
||||
0x1.c6ac6aa313963p-5, 0x1.ebfff4532d9bap-3, 0x1.62e43001bc49fp-1,
|
||||
#elif N == 32
|
||||
0x1.c6af84b912394p-5, 0x1.ebfce50fac4f3p-3, 0x1.62e42ff0c52d6p-1,
|
||||
#elif N == 64
|
||||
0x1.c6b04b4221b2ap-5, 0x1.ebfc213e184d7p-3, 0x1.62e42fefb5b7fp-1,
|
||||
#endif
|
||||
},
|
||||
.shift = 0x1.8p+52,
|
||||
.invln2_scaled = 0x1.71547652b82fep+0 * N,
|
||||
.poly_scaled = {
|
||||
#if N == 8
|
||||
0x1.c6a00335106e2p-5/N/N/N, 0x1.ec0c313449f55p-3/N/N, 0x1.62e431111f69fp-1/N,
|
||||
#elif N == 16
|
||||
0x1.c6ac6aa313963p-5/N/N/N, 0x1.ebfff4532d9bap-3/N/N, 0x1.62e43001bc49fp-1/N,
|
||||
#elif N == 32
|
||||
0x1.c6af84b912394p-5/N/N/N, 0x1.ebfce50fac4f3p-3/N/N, 0x1.62e42ff0c52d6p-1/N,
|
||||
#elif N == 64
|
||||
0x1.c6b04b4221b2ap-5/N/N/N, 0x1.ebfc213e184d7p-3/N/N, 0x1.62e42fefb5b7fp-1/N,
|
||||
#endif
|
||||
},
|
||||
};
|
||||
+1120
File diff suppressed because it is too large
Load Diff
+91
@@ -0,0 +1,91 @@
|
||||
/*
|
||||
* Single-precision e^x function.
|
||||
*
|
||||
* Copyright (c) 2017-2018, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
#include "math_config.h"
|
||||
|
||||
/*
|
||||
EXP2F_TABLE_BITS = 5
|
||||
EXP2F_POLY_ORDER = 3
|
||||
|
||||
ULP error: 0.502 (nearest rounding.)
|
||||
Relative error: 1.69 * 2^-34 in [-ln2/64, ln2/64] (before rounding.)
|
||||
Wrong count: 170635 (all nearest rounding wrong results with fma.)
|
||||
Non-nearest ULP error: 1 (rounded ULP error)
|
||||
*/
|
||||
|
||||
#define N (1 << EXP2F_TABLE_BITS)
|
||||
#define InvLn2N __exp2f_data.invln2_scaled
|
||||
#define T __exp2f_data.tab
|
||||
#define C __exp2f_data.poly_scaled
|
||||
|
||||
static inline uint32_t
|
||||
top12 (float x)
|
||||
{
|
||||
return asuint (x) >> 20;
|
||||
}
|
||||
|
||||
float
|
||||
expf (float x)
|
||||
{
|
||||
uint32_t abstop;
|
||||
uint64_t ki, t;
|
||||
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
|
||||
double_t kd, xd, z, r, r2, y, s;
|
||||
|
||||
xd = (double_t) x;
|
||||
abstop = top12 (x) & 0x7ff;
|
||||
if (unlikely (abstop >= top12 (88.0f)))
|
||||
{
|
||||
/* |x| >= 88 or x is nan. */
|
||||
if (asuint (x) == asuint (-INFINITY))
|
||||
return 0.0f;
|
||||
if (abstop >= top12 (INFINITY))
|
||||
return x + x;
|
||||
if (x > 0x1.62e42ep6f) /* x > log(0x1p128) ~= 88.72 */
|
||||
return __math_oflowf (0);
|
||||
if (x < -0x1.9fe368p6f) /* x < log(0x1p-150) ~= -103.97 */
|
||||
return __math_uflowf (0);
|
||||
#if WANT_ERRNO_UFLOW
|
||||
if (x < -0x1.9d1d9ep6f) /* x < log(0x1p-149) ~= -103.28 */
|
||||
return __math_may_uflowf (0);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* x*N/Ln2 = k + r with r in [-1/2, 1/2] and int k. */
|
||||
z = InvLn2N * xd;
|
||||
|
||||
/* Round and convert z to int, the result is in [-150*N, 128*N] and
|
||||
ideally nearest int is used, otherwise the magnitude of r can be
|
||||
bigger which gives larger approximation error. */
|
||||
#if TOINT_INTRINSICS
|
||||
kd = roundtoint (z);
|
||||
ki = converttoint (z);
|
||||
#else
|
||||
# define SHIFT __exp2f_data.shift
|
||||
kd = eval_as_double (z + SHIFT);
|
||||
ki = asuint64 (kd);
|
||||
kd -= SHIFT;
|
||||
#endif
|
||||
r = z - kd;
|
||||
|
||||
/* exp(x) = 2^(k/N) * 2^(r/N) ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
|
||||
t = T[ki % N];
|
||||
t += ki << (52 - EXP2F_TABLE_BITS);
|
||||
s = asdouble (t);
|
||||
z = C[0] * r + C[1];
|
||||
r2 = r * r;
|
||||
y = C[2] * r + 1;
|
||||
y = z * r2 + y;
|
||||
y = y * s;
|
||||
return eval_as_float (y);
|
||||
}
|
||||
#if USE_GLIBC_ABI
|
||||
strong_alias (expf, __expf_finite)
|
||||
hidden_alias (expf, __ieee754_expf)
|
||||
#endif
|
||||
@@ -0,0 +1,100 @@
|
||||
/*
|
||||
* Public API.
|
||||
*
|
||||
* Copyright (c) 2015-2019, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#ifndef _MATHLIB_H
|
||||
#define _MATHLIB_H
|
||||
|
||||
float expf (float);
|
||||
float exp2f (float);
|
||||
float logf (float);
|
||||
float log2f (float);
|
||||
float powf (float, float);
|
||||
float sinf (float);
|
||||
float cosf (float);
|
||||
void sincosf (float, float*, float*);
|
||||
|
||||
double exp (double);
|
||||
double exp2 (double);
|
||||
double log (double);
|
||||
double log2 (double);
|
||||
double pow (double, double);
|
||||
|
||||
/* Scalar functions using the vector algorithm with identical result. */
|
||||
float __s_sinf (float);
|
||||
float __s_cosf (float);
|
||||
float __s_expf (float);
|
||||
float __s_expf_1u (float);
|
||||
float __s_exp2f (float);
|
||||
float __s_exp2f_1u (float);
|
||||
float __s_logf (float);
|
||||
float __s_powf (float, float);
|
||||
double __s_sin (double);
|
||||
double __s_cos (double);
|
||||
double __s_exp (double);
|
||||
double __s_log (double);
|
||||
double __s_pow (double, double);
|
||||
|
||||
#if __aarch64__
|
||||
#if __GNUC__ >= 5
|
||||
typedef __Float32x4_t __f32x4_t;
|
||||
typedef __Float64x2_t __f64x2_t;
|
||||
#elif __clang_major__*100+__clang_minor__ >= 305
|
||||
typedef __attribute__((__neon_vector_type__(4))) float __f32x4_t;
|
||||
typedef __attribute__((__neon_vector_type__(2))) double __f64x2_t;
|
||||
#else
|
||||
#error Unsupported compiler
|
||||
#endif
|
||||
|
||||
/* Vector functions following the base PCS. */
|
||||
__f32x4_t __v_sinf (__f32x4_t);
|
||||
__f32x4_t __v_cosf (__f32x4_t);
|
||||
__f32x4_t __v_expf (__f32x4_t);
|
||||
__f32x4_t __v_expf_1u (__f32x4_t);
|
||||
__f32x4_t __v_exp2f (__f32x4_t);
|
||||
__f32x4_t __v_exp2f_1u (__f32x4_t);
|
||||
__f32x4_t __v_logf (__f32x4_t);
|
||||
__f32x4_t __v_powf (__f32x4_t, __f32x4_t);
|
||||
__f64x2_t __v_sin (__f64x2_t);
|
||||
__f64x2_t __v_cos (__f64x2_t);
|
||||
__f64x2_t __v_exp (__f64x2_t);
|
||||
__f64x2_t __v_log (__f64x2_t);
|
||||
__f64x2_t __v_pow (__f64x2_t, __f64x2_t);
|
||||
|
||||
#if __GNUC__ >= 9 || __clang_major__ >= 8
|
||||
#define __vpcs __attribute__((__aarch64_vector_pcs__))
|
||||
|
||||
/* Vector functions following the vector PCS. */
|
||||
__vpcs __f32x4_t __vn_sinf (__f32x4_t);
|
||||
__vpcs __f32x4_t __vn_cosf (__f32x4_t);
|
||||
__vpcs __f32x4_t __vn_expf (__f32x4_t);
|
||||
__vpcs __f32x4_t __vn_expf_1u (__f32x4_t);
|
||||
__vpcs __f32x4_t __vn_exp2f (__f32x4_t);
|
||||
__vpcs __f32x4_t __vn_exp2f_1u (__f32x4_t);
|
||||
__vpcs __f32x4_t __vn_logf (__f32x4_t);
|
||||
__vpcs __f32x4_t __vn_powf (__f32x4_t, __f32x4_t);
|
||||
__vpcs __f64x2_t __vn_sin (__f64x2_t);
|
||||
__vpcs __f64x2_t __vn_cos (__f64x2_t);
|
||||
__vpcs __f64x2_t __vn_exp (__f64x2_t);
|
||||
__vpcs __f64x2_t __vn_log (__f64x2_t);
|
||||
__vpcs __f64x2_t __vn_pow (__f64x2_t, __f64x2_t);
|
||||
|
||||
/* Vector functions following the vector PCS using ABI names. */
|
||||
__vpcs __f32x4_t _ZGVnN4v_sinf (__f32x4_t);
|
||||
__vpcs __f32x4_t _ZGVnN4v_cosf (__f32x4_t);
|
||||
__vpcs __f32x4_t _ZGVnN4v_expf (__f32x4_t);
|
||||
__vpcs __f32x4_t _ZGVnN4v_exp2f (__f32x4_t);
|
||||
__vpcs __f32x4_t _ZGVnN4v_logf (__f32x4_t);
|
||||
__vpcs __f32x4_t _ZGVnN4vv_powf (__f32x4_t, __f32x4_t);
|
||||
__vpcs __f64x2_t _ZGVnN2v_sin (__f64x2_t);
|
||||
__vpcs __f64x2_t _ZGVnN2v_cos (__f64x2_t);
|
||||
__vpcs __f64x2_t _ZGVnN2v_exp (__f64x2_t);
|
||||
__vpcs __f64x2_t _ZGVnN2v_log (__f64x2_t);
|
||||
__vpcs __f64x2_t _ZGVnN2vv_pow (__f64x2_t, __f64x2_t);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif
|
||||
+162
@@ -0,0 +1,162 @@
|
||||
/*
|
||||
* Double-precision log(x) function.
|
||||
*
|
||||
* Copyright (c) 2018, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include <float.h>
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
#include "math_config.h"
|
||||
|
||||
#define T __log_data.tab
|
||||
#define T2 __log_data.tab2
|
||||
#define B __log_data.poly1
|
||||
#define A __log_data.poly
|
||||
#define Ln2hi __log_data.ln2hi
|
||||
#define Ln2lo __log_data.ln2lo
|
||||
#define N (1 << LOG_TABLE_BITS)
|
||||
#define OFF 0x3fe6000000000000
|
||||
|
||||
/* Top 16 bits of a double. */
|
||||
static inline uint32_t
|
||||
top16 (double x)
|
||||
{
|
||||
return asuint64 (x) >> 48;
|
||||
}
|
||||
|
||||
double
|
||||
log (double x)
|
||||
{
|
||||
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
|
||||
double_t w, z, r, r2, r3, y, invc, logc, kd, hi, lo;
|
||||
uint64_t ix, iz, tmp;
|
||||
uint32_t top;
|
||||
int k, i;
|
||||
|
||||
ix = asuint64 (x);
|
||||
top = top16 (x);
|
||||
|
||||
#if LOG_POLY1_ORDER == 10 || LOG_POLY1_ORDER == 11
|
||||
# define LO asuint64 (1.0 - 0x1p-5)
|
||||
# define HI asuint64 (1.0 + 0x1.1p-5)
|
||||
#elif LOG_POLY1_ORDER == 12
|
||||
# define LO asuint64 (1.0 - 0x1p-4)
|
||||
# define HI asuint64 (1.0 + 0x1.09p-4)
|
||||
#endif
|
||||
if (unlikely (ix - LO < HI - LO))
|
||||
{
|
||||
/* Handle close to 1.0 inputs separately. */
|
||||
/* Fix sign of zero with downward rounding when x==1. */
|
||||
if (WANT_ROUNDING && unlikely (ix == asuint64 (1.0)))
|
||||
return 0;
|
||||
r = x - 1.0;
|
||||
r2 = r * r;
|
||||
r3 = r * r2;
|
||||
#if LOG_POLY1_ORDER == 10
|
||||
/* Worst-case error is around 0.516 ULP. */
|
||||
y = r3 * (B[1] + r * B[2] + r2 * B[3]
|
||||
+ r3 * (B[4] + r * B[5] + r2 * B[6] + r3 * (B[7] + r * B[8])));
|
||||
w = B[0] * r2; /* B[0] == -0.5. */
|
||||
hi = r + w;
|
||||
y += r - hi + w;
|
||||
y += hi;
|
||||
#elif LOG_POLY1_ORDER == 11
|
||||
/* Worst-case error is around 0.516 ULP. */
|
||||
y = r3 * (B[1] + r * B[2]
|
||||
+ r2 * (B[3] + r * B[4] + r2 * B[5]
|
||||
+ r3 * (B[6] + r * B[7] + r2 * B[8] + r3 * B[9])));
|
||||
w = B[0] * r2; /* B[0] == -0.5. */
|
||||
hi = r + w;
|
||||
y += r - hi + w;
|
||||
y += hi;
|
||||
#elif LOG_POLY1_ORDER == 12
|
||||
y = r3 * (B[1] + r * B[2] + r2 * B[3]
|
||||
+ r3 * (B[4] + r * B[5] + r2 * B[6]
|
||||
+ r3 * (B[7] + r * B[8] + r2 * B[9] + r3 * B[10])));
|
||||
# if N <= 64
|
||||
/* Worst-case error is around 0.532 ULP. */
|
||||
w = B[0] * r2; /* B[0] == -0.5. */
|
||||
hi = r + w;
|
||||
y += r - hi + w;
|
||||
y += hi;
|
||||
# else
|
||||
/* Worst-case error is around 0.507 ULP. */
|
||||
w = r * 0x1p27;
|
||||
double_t rhi = r + w - w;
|
||||
double_t rlo = r - rhi;
|
||||
w = rhi * rhi * B[0]; /* B[0] == -0.5. */
|
||||
hi = r + w;
|
||||
lo = r - hi + w;
|
||||
lo += B[0] * rlo * (rhi + r);
|
||||
y += lo;
|
||||
y += hi;
|
||||
# endif
|
||||
#endif
|
||||
return eval_as_double (y);
|
||||
}
|
||||
if (unlikely (top - 0x0010 >= 0x7ff0 - 0x0010))
|
||||
{
|
||||
/* x < 0x1p-1022 or inf or nan. */
|
||||
if (ix * 2 == 0)
|
||||
return __math_divzero (1);
|
||||
if (ix == asuint64 (INFINITY)) /* log(inf) == inf. */
|
||||
return x;
|
||||
if ((top & 0x8000) || (top & 0x7ff0) == 0x7ff0)
|
||||
return __math_invalid (x);
|
||||
/* x is subnormal, normalize it. */
|
||||
ix = asuint64 (x * 0x1p52);
|
||||
ix -= 52ULL << 52;
|
||||
}
|
||||
|
||||
/* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
|
||||
The range is split into N subintervals.
|
||||
The ith subinterval contains z and c is near its center. */
|
||||
tmp = ix - OFF;
|
||||
i = (tmp >> (52 - LOG_TABLE_BITS)) % N;
|
||||
k = (int64_t) tmp >> 52; /* arithmetic shift */
|
||||
iz = ix - (tmp & 0xfffULL << 52);
|
||||
invc = T[i].invc;
|
||||
logc = T[i].logc;
|
||||
z = asdouble (iz);
|
||||
|
||||
/* log(x) = log1p(z/c-1) + log(c) + k*Ln2. */
|
||||
/* r ~= z/c - 1, |r| < 1/(2*N). */
|
||||
#if HAVE_FAST_FMA
|
||||
/* rounding error: 0x1p-55/N. */
|
||||
r = fma (z, invc, -1.0);
|
||||
#else
|
||||
/* rounding error: 0x1p-55/N + 0x1p-66. */
|
||||
r = (z - T2[i].chi - T2[i].clo) * invc;
|
||||
#endif
|
||||
kd = (double_t) k;
|
||||
|
||||
/* hi + lo = r + log(c) + k*Ln2. */
|
||||
w = kd * Ln2hi + logc;
|
||||
hi = w + r;
|
||||
lo = w - hi + r + kd * Ln2lo;
|
||||
|
||||
/* log(x) = lo + (log1p(r) - r) + hi. */
|
||||
r2 = r * r; /* rounding error: 0x1p-54/N^2. */
|
||||
/* Worst case error if |y| > 0x1p-5:
|
||||
0.5 + 4.13/N + abs-poly-error*2^57 ULP (+ 0.002 ULP without fma)
|
||||
Worst case error if |y| > 0x1p-4:
|
||||
0.5 + 2.06/N + abs-poly-error*2^56 ULP (+ 0.001 ULP without fma). */
|
||||
#if LOG_POLY_ORDER == 6
|
||||
y = lo + r2 * A[0] + r * r2 * (A[1] + r * A[2] + r2 * (A[3] + r * A[4])) + hi;
|
||||
#elif LOG_POLY_ORDER == 7
|
||||
y = lo
|
||||
+ r2 * (A[0] + r * A[1] + r2 * (A[2] + r * A[3])
|
||||
+ r2 * r2 * (A[4] + r * A[5]))
|
||||
+ hi;
|
||||
#endif
|
||||
return eval_as_double (y);
|
||||
}
|
||||
#if USE_GLIBC_ABI
|
||||
strong_alias (log, __log_finite)
|
||||
hidden_alias (log, __ieee754_log)
|
||||
# if LDBL_MANT_DIG == 53
|
||||
long double logl (long double x) { return log (x); }
|
||||
# endif
|
||||
#endif
|
||||
+141
@@ -0,0 +1,141 @@
|
||||
/*
|
||||
* Double-precision log2(x) function.
|
||||
*
|
||||
* Copyright (c) 2018, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include <float.h>
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
#include "math_config.h"
|
||||
|
||||
#define T __log2_data.tab
|
||||
#define T2 __log2_data.tab2
|
||||
#define B __log2_data.poly1
|
||||
#define A __log2_data.poly
|
||||
#define InvLn2hi __log2_data.invln2hi
|
||||
#define InvLn2lo __log2_data.invln2lo
|
||||
#define N (1 << LOG2_TABLE_BITS)
|
||||
#define OFF 0x3fe6000000000000
|
||||
|
||||
/* Top 16 bits of a double. */
|
||||
static inline uint32_t
|
||||
top16 (double x)
|
||||
{
|
||||
return asuint64 (x) >> 48;
|
||||
}
|
||||
|
||||
double
|
||||
log2 (double x)
|
||||
{
|
||||
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
|
||||
double_t z, r, r2, r4, y, invc, logc, kd, hi, lo, t1, t2, t3, p;
|
||||
uint64_t ix, iz, tmp;
|
||||
uint32_t top;
|
||||
int k, i;
|
||||
|
||||
ix = asuint64 (x);
|
||||
top = top16 (x);
|
||||
|
||||
#if LOG2_POLY1_ORDER == 11
|
||||
# define LO asuint64 (1.0 - 0x1.5b51p-5)
|
||||
# define HI asuint64 (1.0 + 0x1.6ab2p-5)
|
||||
#endif
|
||||
if (unlikely (ix - LO < HI - LO))
|
||||
{
|
||||
/* Handle close to 1.0 inputs separately. */
|
||||
/* Fix sign of zero with downward rounding when x==1. */
|
||||
if (WANT_ROUNDING && unlikely (ix == asuint64 (1.0)))
|
||||
return 0;
|
||||
r = x - 1.0;
|
||||
#if HAVE_FAST_FMA
|
||||
hi = r * InvLn2hi;
|
||||
lo = r * InvLn2lo + fma (r, InvLn2hi, -hi);
|
||||
#else
|
||||
double_t rhi, rlo;
|
||||
rhi = asdouble (asuint64 (r) & -1ULL << 32);
|
||||
rlo = r - rhi;
|
||||
hi = rhi * InvLn2hi;
|
||||
lo = rlo * InvLn2hi + r * InvLn2lo;
|
||||
#endif
|
||||
r2 = r * r; /* rounding error: 0x1p-62. */
|
||||
r4 = r2 * r2;
|
||||
#if LOG2_POLY1_ORDER == 11
|
||||
/* Worst-case error is less than 0.54 ULP (0.55 ULP without fma). */
|
||||
p = r2 * (B[0] + r * B[1]);
|
||||
y = hi + p;
|
||||
lo += hi - y + p;
|
||||
lo += r4 * (B[2] + r * B[3] + r2 * (B[4] + r * B[5])
|
||||
+ r4 * (B[6] + r * B[7] + r2 * (B[8] + r * B[9])));
|
||||
y += lo;
|
||||
#endif
|
||||
return eval_as_double (y);
|
||||
}
|
||||
if (unlikely (top - 0x0010 >= 0x7ff0 - 0x0010))
|
||||
{
|
||||
/* x < 0x1p-1022 or inf or nan. */
|
||||
if (ix * 2 == 0)
|
||||
return __math_divzero (1);
|
||||
if (ix == asuint64 (INFINITY)) /* log(inf) == inf. */
|
||||
return x;
|
||||
if ((top & 0x8000) || (top & 0x7ff0) == 0x7ff0)
|
||||
return __math_invalid (x);
|
||||
/* x is subnormal, normalize it. */
|
||||
ix = asuint64 (x * 0x1p52);
|
||||
ix -= 52ULL << 52;
|
||||
}
|
||||
|
||||
/* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
|
||||
The range is split into N subintervals.
|
||||
The ith subinterval contains z and c is near its center. */
|
||||
tmp = ix - OFF;
|
||||
i = (tmp >> (52 - LOG2_TABLE_BITS)) % N;
|
||||
k = (int64_t) tmp >> 52; /* arithmetic shift */
|
||||
iz = ix - (tmp & 0xfffULL << 52);
|
||||
invc = T[i].invc;
|
||||
logc = T[i].logc;
|
||||
z = asdouble (iz);
|
||||
kd = (double_t) k;
|
||||
|
||||
/* log2(x) = log2(z/c) + log2(c) + k. */
|
||||
/* r ~= z/c - 1, |r| < 1/(2*N). */
|
||||
#if HAVE_FAST_FMA
|
||||
/* rounding error: 0x1p-55/N. */
|
||||
r = fma (z, invc, -1.0);
|
||||
t1 = r * InvLn2hi;
|
||||
t2 = r * InvLn2lo + fma (r, InvLn2hi, -t1);
|
||||
#else
|
||||
double_t rhi, rlo;
|
||||
/* rounding error: 0x1p-55/N + 0x1p-65. */
|
||||
r = (z - T2[i].chi - T2[i].clo) * invc;
|
||||
rhi = asdouble (asuint64 (r) & -1ULL << 32);
|
||||
rlo = r - rhi;
|
||||
t1 = rhi * InvLn2hi;
|
||||
t2 = rlo * InvLn2hi + r * InvLn2lo;
|
||||
#endif
|
||||
|
||||
/* hi + lo = r/ln2 + log2(c) + k. */
|
||||
t3 = kd + logc;
|
||||
hi = t3 + t1;
|
||||
lo = t3 - hi + t1 + t2;
|
||||
|
||||
/* log2(r+1) = r/ln2 + r^2*poly(r). */
|
||||
/* Evaluation is optimized assuming superscalar pipelined execution. */
|
||||
r2 = r * r; /* rounding error: 0x1p-54/N^2. */
|
||||
r4 = r2 * r2;
|
||||
#if LOG2_POLY_ORDER == 7
|
||||
/* Worst-case error if |y| > 0x1p-4: 0.547 ULP (0.550 ULP without fma).
|
||||
~ 0.5 + 2/N/ln2 + abs-poly-error*0x1p56 ULP (+ 0.003 ULP without fma). */
|
||||
p = A[0] + r * A[1] + r2 * (A[2] + r * A[3]) + r4 * (A[4] + r * A[5]);
|
||||
y = lo + r2 * p + hi;
|
||||
#endif
|
||||
return eval_as_double (y);
|
||||
}
|
||||
#if USE_GLIBC_ABI
|
||||
strong_alias (log2, __log2_finite)
|
||||
hidden_alias (log2, __ieee754_log2)
|
||||
# if LDBL_MANT_DIG == 53
|
||||
long double log2l (long double x) { return log2 (x); }
|
||||
# endif
|
||||
#endif
|
||||
@@ -0,0 +1,209 @@
|
||||
/*
|
||||
* Data for log2.
|
||||
*
|
||||
* Copyright (c) 2018, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "math_config.h"
|
||||
|
||||
#define N (1 << LOG2_TABLE_BITS)
|
||||
|
||||
const struct log2_data __log2_data = {
|
||||
// First coefficient: 0x1.71547652b82fe1777d0ffda0d24p0
|
||||
.invln2hi = 0x1.7154765200000p+0,
|
||||
.invln2lo = 0x1.705fc2eefa200p-33,
|
||||
.poly1 = {
|
||||
#if LOG2_POLY1_ORDER == 11
|
||||
// relative error: 0x1.2fad8188p-63
|
||||
// in -0x1.5b51p-5 0x1.6ab2p-5
|
||||
-0x1.71547652b82fep-1,
|
||||
0x1.ec709dc3a03f7p-2,
|
||||
-0x1.71547652b7c3fp-2,
|
||||
0x1.2776c50f05be4p-2,
|
||||
-0x1.ec709dd768fe5p-3,
|
||||
0x1.a61761ec4e736p-3,
|
||||
-0x1.7153fbc64a79bp-3,
|
||||
0x1.484d154f01b4ap-3,
|
||||
-0x1.289e4a72c383cp-3,
|
||||
0x1.0b32f285aee66p-3,
|
||||
#endif
|
||||
},
|
||||
.poly = {
|
||||
#if N == 64 && LOG2_POLY_ORDER == 7
|
||||
// relative error: 0x1.a72c2bf8p-58
|
||||
// abs error: 0x1.67a552c8p-66
|
||||
// in -0x1.f45p-8 0x1.f45p-8
|
||||
-0x1.71547652b8339p-1,
|
||||
0x1.ec709dc3a04bep-2,
|
||||
-0x1.7154764702ffbp-2,
|
||||
0x1.2776c50034c48p-2,
|
||||
-0x1.ec7b328ea92bcp-3,
|
||||
0x1.a6225e117f92ep-3,
|
||||
#endif
|
||||
},
|
||||
/* Algorithm:
|
||||
|
||||
x = 2^k z
|
||||
log2(x) = k + log2(c) + log2(z/c)
|
||||
log2(z/c) = poly(z/c - 1)
|
||||
|
||||
where z is in [1.6p-1; 1.6p0] which is split into N subintervals and z falls
|
||||
into the ith one, then table entries are computed as
|
||||
|
||||
tab[i].invc = 1/c
|
||||
tab[i].logc = (double)log2(c)
|
||||
tab2[i].chi = (double)c
|
||||
tab2[i].clo = (double)(c - (double)c)
|
||||
|
||||
where c is near the center of the subinterval and is chosen by trying +-2^29
|
||||
floating point invc candidates around 1/center and selecting one for which
|
||||
|
||||
1) the rounding error in 0x1.8p10 + logc is 0,
|
||||
2) the rounding error in z - chi - clo is < 0x1p-64 and
|
||||
3) the rounding error in (double)log2(c) is minimized (< 0x1p-68).
|
||||
|
||||
Note: 1) ensures that k + logc can be computed without rounding error, 2)
|
||||
ensures that z/c - 1 can be computed as (z - chi - clo)*invc with close to a
|
||||
single rounding error when there is no fast fma for z*invc - 1, 3) ensures
|
||||
that logc + poly(z/c - 1) has small error, however near x == 1 when
|
||||
|log2(x)| < 0x1p-4, this is not enough so that is special cased. */
|
||||
.tab = {
|
||||
#if N == 64
|
||||
{0x1.724286bb1acf8p+0, -0x1.1095feecdb000p-1},
|
||||
{0x1.6e1f766d2cca1p+0, -0x1.08494bd76d000p-1},
|
||||
{0x1.6a13d0e30d48ap+0, -0x1.00143aee8f800p-1},
|
||||
{0x1.661ec32d06c85p+0, -0x1.efec5360b4000p-2},
|
||||
{0x1.623fa951198f8p+0, -0x1.dfdd91ab7e000p-2},
|
||||
{0x1.5e75ba4cf026cp+0, -0x1.cffae0cc79000p-2},
|
||||
{0x1.5ac055a214fb8p+0, -0x1.c043811fda000p-2},
|
||||
{0x1.571ed0f166e1ep+0, -0x1.b0b67323ae000p-2},
|
||||
{0x1.53909590bf835p+0, -0x1.a152f5a2db000p-2},
|
||||
{0x1.5014fed61adddp+0, -0x1.9217f5af86000p-2},
|
||||
{0x1.4cab88e487bd0p+0, -0x1.8304db0719000p-2},
|
||||
{0x1.49539b4334feep+0, -0x1.74189f9a9e000p-2},
|
||||
{0x1.460cbdfafd569p+0, -0x1.6552bb5199000p-2},
|
||||
{0x1.42d664ee4b953p+0, -0x1.56b23a29b1000p-2},
|
||||
{0x1.3fb01111dd8a6p+0, -0x1.483650f5fa000p-2},
|
||||
{0x1.3c995b70c5836p+0, -0x1.39de937f6a000p-2},
|
||||
{0x1.3991c4ab6fd4ap+0, -0x1.2baa1538d6000p-2},
|
||||
{0x1.3698e0ce099b5p+0, -0x1.1d98340ca4000p-2},
|
||||
{0x1.33ae48213e7b2p+0, -0x1.0fa853a40e000p-2},
|
||||
{0x1.30d191985bdb1p+0, -0x1.01d9c32e73000p-2},
|
||||
{0x1.2e025cab271d7p+0, -0x1.e857da2fa6000p-3},
|
||||
{0x1.2b404cf13cd82p+0, -0x1.cd3c8633d8000p-3},
|
||||
{0x1.288b02c7ccb50p+0, -0x1.b26034c14a000p-3},
|
||||
{0x1.25e2263944de5p+0, -0x1.97c1c2f4fe000p-3},
|
||||
{0x1.234563d8615b1p+0, -0x1.7d6023f800000p-3},
|
||||
{0x1.20b46e33eaf38p+0, -0x1.633a71a05e000p-3},
|
||||
{0x1.1e2eefdcda3ddp+0, -0x1.494f5e9570000p-3},
|
||||
{0x1.1bb4a580b3930p+0, -0x1.2f9e424e0a000p-3},
|
||||
{0x1.19453847f2200p+0, -0x1.162595afdc000p-3},
|
||||
{0x1.16e06c0d5d73cp+0, -0x1.f9c9a75bd8000p-4},
|
||||
{0x1.1485f47b7e4c2p+0, -0x1.c7b575bf9c000p-4},
|
||||
{0x1.12358ad0085d1p+0, -0x1.960c60ff48000p-4},
|
||||
{0x1.0fef00f532227p+0, -0x1.64ce247b60000p-4},
|
||||
{0x1.0db2077d03a8fp+0, -0x1.33f78b2014000p-4},
|
||||
{0x1.0b7e6d65980d9p+0, -0x1.0387d1a42c000p-4},
|
||||
{0x1.0953efe7b408dp+0, -0x1.a6f9208b50000p-5},
|
||||
{0x1.07325cac53b83p+0, -0x1.47a954f770000p-5},
|
||||
{0x1.05197e40d1b5cp+0, -0x1.d23a8c50c0000p-6},
|
||||
{0x1.03091c1208ea2p+0, -0x1.16a2629780000p-6},
|
||||
{0x1.0101025b37e21p+0, -0x1.720f8d8e80000p-8},
|
||||
{0x1.fc07ef9caa76bp-1, 0x1.6fe53b1500000p-7},
|
||||
{0x1.f4465d3f6f184p-1, 0x1.11ccce10f8000p-5},
|
||||
{0x1.ecc079f84107fp-1, 0x1.c4dfc8c8b8000p-5},
|
||||
{0x1.e573a99975ae8p-1, 0x1.3aa321e574000p-4},
|
||||
{0x1.de5d6f0bd3de6p-1, 0x1.918a0d08b8000p-4},
|
||||
{0x1.d77b681ff38b3p-1, 0x1.e72e9da044000p-4},
|
||||
{0x1.d0cb5724de943p-1, 0x1.1dcd2507f6000p-3},
|
||||
{0x1.ca4b2dc0e7563p-1, 0x1.476ab03dea000p-3},
|
||||
{0x1.c3f8ee8d6cb51p-1, 0x1.7074377e22000p-3},
|
||||
{0x1.bdd2b4f020c4cp-1, 0x1.98ede8ba94000p-3},
|
||||
{0x1.b7d6c006015cap-1, 0x1.c0db86ad2e000p-3},
|
||||
{0x1.b20366e2e338fp-1, 0x1.e840aafcee000p-3},
|
||||
{0x1.ac57026295039p-1, 0x1.0790ab4678000p-2},
|
||||
{0x1.a6d01bc2731ddp-1, 0x1.1ac056801c000p-2},
|
||||
{0x1.a16d3bc3ff18bp-1, 0x1.2db11d4fee000p-2},
|
||||
{0x1.9c2d14967feadp-1, 0x1.406464ec58000p-2},
|
||||
{0x1.970e4f47c9902p-1, 0x1.52dbe093af000p-2},
|
||||
{0x1.920fb3982bcf2p-1, 0x1.651902050d000p-2},
|
||||
{0x1.8d30187f759f1p-1, 0x1.771d2cdeaf000p-2},
|
||||
{0x1.886e5ebb9f66dp-1, 0x1.88e9c857d9000p-2},
|
||||
{0x1.83c97b658b994p-1, 0x1.9a80155e16000p-2},
|
||||
{0x1.7f405ffc61022p-1, 0x1.abe186ed3d000p-2},
|
||||
{0x1.7ad22181415cap-1, 0x1.bd0f2aea0e000p-2},
|
||||
{0x1.767dcf99eff8cp-1, 0x1.ce0a43dbf4000p-2},
|
||||
#endif
|
||||
},
|
||||
#if !HAVE_FAST_FMA
|
||||
.tab2 = {
|
||||
# if N == 64
|
||||
{0x1.6200012b90a8ep-1, 0x1.904ab0644b605p-55},
|
||||
{0x1.66000045734a6p-1, 0x1.1ff9bea62f7a9p-57},
|
||||
{0x1.69fffc325f2c5p-1, 0x1.27ecfcb3c90bap-55},
|
||||
{0x1.6e00038b95a04p-1, 0x1.8ff8856739326p-55},
|
||||
{0x1.71fffe09994e3p-1, 0x1.afd40275f82b1p-55},
|
||||
{0x1.7600015590e1p-1, -0x1.2fd75b4238341p-56},
|
||||
{0x1.7a00012655bd5p-1, 0x1.808e67c242b76p-56},
|
||||
{0x1.7e0003259e9a6p-1, -0x1.208e426f622b7p-57},
|
||||
{0x1.81fffedb4b2d2p-1, -0x1.402461ea5c92fp-55},
|
||||
{0x1.860002dfafcc3p-1, 0x1.df7f4a2f29a1fp-57},
|
||||
{0x1.89ffff78c6b5p-1, -0x1.e0453094995fdp-55},
|
||||
{0x1.8e00039671566p-1, -0x1.a04f3bec77b45p-55},
|
||||
{0x1.91fffe2bf1745p-1, -0x1.7fa34400e203cp-56},
|
||||
{0x1.95fffcc5c9fd1p-1, -0x1.6ff8005a0695dp-56},
|
||||
{0x1.9a0003bba4767p-1, 0x1.0f8c4c4ec7e03p-56},
|
||||
{0x1.9dfffe7b92da5p-1, 0x1.e7fd9478c4602p-55},
|
||||
{0x1.a1fffd72efdafp-1, -0x1.a0c554dcdae7ep-57},
|
||||
{0x1.a5fffde04ff95p-1, 0x1.67da98ce9b26bp-55},
|
||||
{0x1.a9fffca5e8d2bp-1, -0x1.284c9b54c13dep-55},
|
||||
{0x1.adfffddad03eap-1, 0x1.812c8ea602e3cp-58},
|
||||
{0x1.b1ffff10d3d4dp-1, -0x1.efaddad27789cp-55},
|
||||
{0x1.b5fffce21165ap-1, 0x1.3cb1719c61237p-58},
|
||||
{0x1.b9fffd950e674p-1, 0x1.3f7d94194cep-56},
|
||||
{0x1.be000139ca8afp-1, 0x1.50ac4215d9bcp-56},
|
||||
{0x1.c20005b46df99p-1, 0x1.beea653e9c1c9p-57},
|
||||
{0x1.c600040b9f7aep-1, -0x1.c079f274a70d6p-56},
|
||||
{0x1.ca0006255fd8ap-1, -0x1.a0b4076e84c1fp-56},
|
||||
{0x1.cdfffd94c095dp-1, 0x1.8f933f99ab5d7p-55},
|
||||
{0x1.d1ffff975d6cfp-1, -0x1.82c08665fe1bep-58},
|
||||
{0x1.d5fffa2561c93p-1, -0x1.b04289bd295f3p-56},
|
||||
{0x1.d9fff9d228b0cp-1, 0x1.70251340fa236p-55},
|
||||
{0x1.de00065bc7e16p-1, -0x1.5011e16a4d80cp-56},
|
||||
{0x1.e200002f64791p-1, 0x1.9802f09ef62ep-55},
|
||||
{0x1.e600057d7a6d8p-1, -0x1.e0b75580cf7fap-56},
|
||||
{0x1.ea00027edc00cp-1, -0x1.c848309459811p-55},
|
||||
{0x1.ee0006cf5cb7cp-1, -0x1.f8027951576f4p-55},
|
||||
{0x1.f2000782b7dccp-1, -0x1.f81d97274538fp-55},
|
||||
{0x1.f6000260c450ap-1, -0x1.071002727ffdcp-59},
|
||||
{0x1.f9fffe88cd533p-1, -0x1.81bdce1fda8bp-58},
|
||||
{0x1.fdfffd50f8689p-1, 0x1.7f91acb918e6ep-55},
|
||||
{0x1.0200004292367p+0, 0x1.b7ff365324681p-54},
|
||||
{0x1.05fffe3e3d668p+0, 0x1.6fa08ddae957bp-55},
|
||||
{0x1.0a0000a85a757p+0, -0x1.7e2de80d3fb91p-58},
|
||||
{0x1.0e0001a5f3fccp+0, -0x1.1823305c5f014p-54},
|
||||
{0x1.11ffff8afbaf5p+0, -0x1.bfabb6680bac2p-55},
|
||||
{0x1.15fffe54d91adp+0, -0x1.d7f121737e7efp-54},
|
||||
{0x1.1a00011ac36e1p+0, 0x1.c000a0516f5ffp-54},
|
||||
{0x1.1e00019c84248p+0, -0x1.082fbe4da5dap-54},
|
||||
{0x1.220000ffe5e6ep+0, -0x1.8fdd04c9cfb43p-55},
|
||||
{0x1.26000269fd891p+0, 0x1.cfe2a7994d182p-55},
|
||||
{0x1.2a00029a6e6dap+0, -0x1.00273715e8bc5p-56},
|
||||
{0x1.2dfffe0293e39p+0, 0x1.b7c39dab2a6f9p-54},
|
||||
{0x1.31ffff7dcf082p+0, 0x1.df1336edc5254p-56},
|
||||
{0x1.35ffff05a8b6p+0, -0x1.e03564ccd31ebp-54},
|
||||
{0x1.3a0002e0eaeccp+0, 0x1.5f0e74bd3a477p-56},
|
||||
{0x1.3e000043bb236p+0, 0x1.c7dcb149d8833p-54},
|
||||
{0x1.4200002d187ffp+0, 0x1.e08afcf2d3d28p-56},
|
||||
{0x1.460000d387cb1p+0, 0x1.20837856599a6p-55},
|
||||
{0x1.4a00004569f89p+0, -0x1.9fa5c904fbcd2p-55},
|
||||
{0x1.4e000043543f3p+0, -0x1.81125ed175329p-56},
|
||||
{0x1.51fffcc027f0fp+0, 0x1.883d8847754dcp-54},
|
||||
{0x1.55ffffd87b36fp+0, -0x1.709e731d02807p-55},
|
||||
{0x1.59ffff21df7bap+0, 0x1.7f79f68727b02p-55},
|
||||
{0x1.5dfffebfc3481p+0, -0x1.180902e30e93ep-54},
|
||||
# endif
|
||||
},
|
||||
#endif /* !HAVE_FAST_FMA */
|
||||
};
|
||||
@@ -0,0 +1,80 @@
|
||||
/*
|
||||
* Single-precision log2 function.
|
||||
*
|
||||
* Copyright (c) 2017-2018, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
#include "math_config.h"
|
||||
|
||||
/*
|
||||
LOG2F_TABLE_BITS = 4
|
||||
LOG2F_POLY_ORDER = 4
|
||||
|
||||
ULP error: 0.752 (nearest rounding.)
|
||||
Relative error: 1.9 * 2^-26 (before rounding.)
|
||||
*/
|
||||
|
||||
#define N (1 << LOG2F_TABLE_BITS)
|
||||
#define T __log2f_data.tab
|
||||
#define A __log2f_data.poly
|
||||
#define OFF 0x3f330000
|
||||
|
||||
float
|
||||
log2f (float x)
|
||||
{
|
||||
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
|
||||
double_t z, r, r2, p, y, y0, invc, logc;
|
||||
uint32_t ix, iz, top, tmp;
|
||||
int k, i;
|
||||
|
||||
ix = asuint (x);
|
||||
#if WANT_ROUNDING
|
||||
/* Fix sign of zero with downward rounding when x==1. */
|
||||
if (unlikely (ix == 0x3f800000))
|
||||
return 0;
|
||||
#endif
|
||||
if (unlikely (ix - 0x00800000 >= 0x7f800000 - 0x00800000))
|
||||
{
|
||||
/* x < 0x1p-126 or inf or nan. */
|
||||
if (ix * 2 == 0)
|
||||
return __math_divzerof (1);
|
||||
if (ix == 0x7f800000) /* log2(inf) == inf. */
|
||||
return x;
|
||||
if ((ix & 0x80000000) || ix * 2 >= 0xff000000)
|
||||
return __math_invalidf (x);
|
||||
/* x is subnormal, normalize it. */
|
||||
ix = asuint (x * 0x1p23f);
|
||||
ix -= 23 << 23;
|
||||
}
|
||||
|
||||
/* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
|
||||
The range is split into N subintervals.
|
||||
The ith subinterval contains z and c is near its center. */
|
||||
tmp = ix - OFF;
|
||||
i = (tmp >> (23 - LOG2F_TABLE_BITS)) % N;
|
||||
top = tmp & 0xff800000;
|
||||
iz = ix - top;
|
||||
k = (int32_t) tmp >> 23; /* arithmetic shift */
|
||||
invc = T[i].invc;
|
||||
logc = T[i].logc;
|
||||
z = (double_t) asfloat (iz);
|
||||
|
||||
/* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k */
|
||||
r = z * invc - 1;
|
||||
y0 = logc + (double_t) k;
|
||||
|
||||
/* Pipelined polynomial evaluation to approximate log1p(r)/ln2. */
|
||||
r2 = r * r;
|
||||
y = A[1] * r + A[2];
|
||||
y = A[0] * r2 + y;
|
||||
p = A[3] * r + y0;
|
||||
y = y * r2 + p;
|
||||
return eval_as_float (y);
|
||||
}
|
||||
#if USE_GLIBC_ABI
|
||||
strong_alias (log2f, __log2f_finite)
|
||||
hidden_alias (log2f, __ieee754_log2f)
|
||||
#endif
|
||||
@@ -0,0 +1,33 @@
|
||||
/*
|
||||
* Data definition for log2f.
|
||||
*
|
||||
* Copyright (c) 2017-2018, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "math_config.h"
|
||||
|
||||
const struct log2f_data __log2f_data = {
|
||||
.tab = {
|
||||
{ 0x1.661ec79f8f3bep+0, -0x1.efec65b963019p-2 },
|
||||
{ 0x1.571ed4aaf883dp+0, -0x1.b0b6832d4fca4p-2 },
|
||||
{ 0x1.49539f0f010bp+0, -0x1.7418b0a1fb77bp-2 },
|
||||
{ 0x1.3c995b0b80385p+0, -0x1.39de91a6dcf7bp-2 },
|
||||
{ 0x1.30d190c8864a5p+0, -0x1.01d9bf3f2b631p-2 },
|
||||
{ 0x1.25e227b0b8eap+0, -0x1.97c1d1b3b7afp-3 },
|
||||
{ 0x1.1bb4a4a1a343fp+0, -0x1.2f9e393af3c9fp-3 },
|
||||
{ 0x1.12358f08ae5bap+0, -0x1.960cbbf788d5cp-4 },
|
||||
{ 0x1.0953f419900a7p+0, -0x1.a6f9db6475fcep-5 },
|
||||
{ 0x1p+0, 0x0p+0 },
|
||||
{ 0x1.e608cfd9a47acp-1, 0x1.338ca9f24f53dp-4 },
|
||||
{ 0x1.ca4b31f026aap-1, 0x1.476a9543891bap-3 },
|
||||
{ 0x1.b2036576afce6p-1, 0x1.e840b4ac4e4d2p-3 },
|
||||
{ 0x1.9c2d163a1aa2dp-1, 0x1.40645f0c6651cp-2 },
|
||||
{ 0x1.886e6037841edp-1, 0x1.88e9c2c1b9ff8p-2 },
|
||||
{ 0x1.767dcf5534862p-1, 0x1.ce0a44eb17bccp-2 },
|
||||
},
|
||||
.poly = {
|
||||
-0x1.712b6f70a7e4dp-2, 0x1.ecabf496832ep-2, -0x1.715479ffae3dep-1,
|
||||
0x1.715475f35c8b8p0,
|
||||
}
|
||||
};
|
||||
+511
@@ -0,0 +1,511 @@
|
||||
/*
|
||||
* Data for log.
|
||||
*
|
||||
* Copyright (c) 2018, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "math_config.h"
|
||||
|
||||
#define N (1 << LOG_TABLE_BITS)
|
||||
|
||||
const struct log_data __log_data = {
|
||||
.ln2hi = 0x1.62e42fefa3800p-1,
|
||||
.ln2lo = 0x1.ef35793c76730p-45,
|
||||
.poly1 = {
|
||||
#if LOG_POLY1_ORDER == 10
|
||||
// relative error: 0x1.32eccc6p-62
|
||||
// in -0x1p-5 0x1.1p-5 (|log(1+x)| > 0x1p-5 outside this interval)
|
||||
-0x1p-1,
|
||||
0x1.55555555554e5p-2,
|
||||
-0x1.0000000000af2p-2,
|
||||
0x1.9999999bbe436p-3,
|
||||
-0x1.55555537f9cdep-3,
|
||||
0x1.24922fc8127cfp-3,
|
||||
-0x1.0000b7d6bb612p-3,
|
||||
0x1.c806ee1ddbcafp-4,
|
||||
-0x1.972335a9c2d6ep-4,
|
||||
#elif LOG_POLY1_ORDER == 11
|
||||
// relative error: 0x1.52c8b708p-68
|
||||
// in -0x1p-5 0x1.1p-5 (|log(1+x)| > 0x1p-5 outside this interval)
|
||||
-0x1p-1,
|
||||
0x1.5555555555555p-2,
|
||||
-0x1.ffffffffffea9p-3,
|
||||
0x1.999999999c4d4p-3,
|
||||
-0x1.55555557f5541p-3,
|
||||
0x1.249248fbe33e4p-3,
|
||||
-0x1.ffffc9a3c825bp-4,
|
||||
0x1.c71e1f204435dp-4,
|
||||
-0x1.9a7f26377d06ep-4,
|
||||
0x1.71c30cf8f7364p-4,
|
||||
#elif LOG_POLY1_ORDER == 12
|
||||
// relative error: 0x1.c04d76cp-63
|
||||
// in -0x1p-4 0x1.09p-4 (|log(1+x)| > 0x1p-4 outside the interval)
|
||||
-0x1p-1,
|
||||
0x1.5555555555577p-2,
|
||||
-0x1.ffffffffffdcbp-3,
|
||||
0x1.999999995dd0cp-3,
|
||||
-0x1.55555556745a7p-3,
|
||||
0x1.24924a344de3p-3,
|
||||
-0x1.fffffa4423d65p-4,
|
||||
0x1.c7184282ad6cap-4,
|
||||
-0x1.999eb43b068ffp-4,
|
||||
0x1.78182f7afd085p-4,
|
||||
-0x1.5521375d145cdp-4,
|
||||
#endif
|
||||
},
|
||||
.poly = {
|
||||
#if N == 64 && LOG_POLY_ORDER == 7
|
||||
// relative error: 0x1.906eb8ap-58
|
||||
// abs error: 0x1.d2cad5a8p-67
|
||||
// in -0x1.fp-8 0x1.fp-8
|
||||
-0x1.0000000000027p-1,
|
||||
0x1.555555555556ap-2,
|
||||
-0x1.fffffff0440bap-3,
|
||||
0x1.99999991906c3p-3,
|
||||
-0x1.555c8d7e8201ep-3,
|
||||
0x1.24978c59151fap-3,
|
||||
#elif N == 128 && LOG_POLY_ORDER == 6
|
||||
// relative error: 0x1.926199e8p-56
|
||||
// abs error: 0x1.882ff33p-65
|
||||
// in -0x1.fp-9 0x1.fp-9
|
||||
-0x1.0000000000001p-1,
|
||||
0x1.555555551305bp-2,
|
||||
-0x1.fffffffeb459p-3,
|
||||
0x1.999b324f10111p-3,
|
||||
-0x1.55575e506c89fp-3,
|
||||
#elif N == 128 && LOG_POLY_ORDER == 7
|
||||
// relative error: 0x1.649fc4bp-64
|
||||
// abs error: 0x1.c3b5769p-74
|
||||
// in -0x1.fp-9 0x1.fp-9
|
||||
-0x1.0000000000001p-1,
|
||||
0x1.5555555555556p-2,
|
||||
-0x1.fffffffea1a8p-3,
|
||||
0x1.99999998e9139p-3,
|
||||
-0x1.555776801b968p-3,
|
||||
0x1.2493c29331a5cp-3,
|
||||
#endif
|
||||
},
|
||||
/* Algorithm:
|
||||
|
||||
x = 2^k z
|
||||
log(x) = k ln2 + log(c) + log(z/c)
|
||||
log(z/c) = poly(z/c - 1)
|
||||
|
||||
where z is in [1.6p-1; 1.6p0] which is split into N subintervals and z falls
|
||||
into the ith one, then table entries are computed as
|
||||
|
||||
tab[i].invc = 1/c
|
||||
tab[i].logc = (double)log(c)
|
||||
tab2[i].chi = (double)c
|
||||
tab2[i].clo = (double)(c - (double)c)
|
||||
|
||||
where c is near the center of the subinterval and is chosen by trying +-2^29
|
||||
floating point invc candidates around 1/center and selecting one for which
|
||||
|
||||
1) the rounding error in 0x1.8p9 + logc is 0,
|
||||
2) the rounding error in z - chi - clo is < 0x1p-66 and
|
||||
3) the rounding error in (double)log(c) is minimized (< 0x1p-66).
|
||||
|
||||
Note: 1) ensures that k*ln2hi + logc can be computed without rounding error,
|
||||
2) ensures that z/c - 1 can be computed as (z - chi - clo)*invc with close to
|
||||
a single rounding error when there is no fast fma for z*invc - 1, 3) ensures
|
||||
that logc + poly(z/c - 1) has small error, however near x == 1 when
|
||||
|log(x)| < 0x1p-4, this is not enough so that is special cased. */
|
||||
.tab = {
|
||||
#if N == 64
|
||||
{0x1.7242886495cd8p+0, -0x1.79e267bdfe000p-2},
|
||||
{0x1.6e1f769340dc9p+0, -0x1.6e60ee0ecb000p-2},
|
||||
{0x1.6a13ccc8f195cp+0, -0x1.63002fdbf6000p-2},
|
||||
{0x1.661ec72e86f3ap+0, -0x1.57bf76c597000p-2},
|
||||
{0x1.623fa6c447b16p+0, -0x1.4c9e07f0d2000p-2},
|
||||
{0x1.5e75bbca31702p+0, -0x1.419b42f027000p-2},
|
||||
{0x1.5ac05655adb10p+0, -0x1.36b67660e6000p-2},
|
||||
{0x1.571ed3e940191p+0, -0x1.2bef0839e4800p-2},
|
||||
{0x1.539094ac0fbbfp+0, -0x1.21445727cb000p-2},
|
||||
{0x1.5015007e7fc42p+0, -0x1.16b5ca3c3d000p-2},
|
||||
{0x1.4cab877c31cf9p+0, -0x1.0c42d3805f800p-2},
|
||||
{0x1.49539e76a88d3p+0, -0x1.01eae61b60800p-2},
|
||||
{0x1.460cbc12211dap+0, -0x1.ef5adb9fb0000p-3},
|
||||
{0x1.42d6624debe3ap+0, -0x1.db13daab99000p-3},
|
||||
{0x1.3fb0144f0d462p+0, -0x1.c6ffbe896e000p-3},
|
||||
{0x1.3c995a1f9a9b4p+0, -0x1.b31d84722d000p-3},
|
||||
{0x1.3991c23952500p+0, -0x1.9f6c3cf6eb000p-3},
|
||||
{0x1.3698df35eaa14p+0, -0x1.8beafe7f13000p-3},
|
||||
{0x1.33ae463091760p+0, -0x1.7898db878d000p-3},
|
||||
{0x1.30d190aae3d72p+0, -0x1.6574efe4ec000p-3},
|
||||
{0x1.2e025c9203c89p+0, -0x1.527e620845000p-3},
|
||||
{0x1.2b404a7244988p+0, -0x1.3fb457d798000p-3},
|
||||
{0x1.288b01dc19544p+0, -0x1.2d1615a077000p-3},
|
||||
{0x1.25e2268085f69p+0, -0x1.1aa2b431e5000p-3},
|
||||
{0x1.23456812abb74p+0, -0x1.08598f1d2b000p-3},
|
||||
{0x1.20b4703174157p+0, -0x1.ec738fee40000p-4},
|
||||
{0x1.1e2ef308b4e9bp+0, -0x1.c885768862000p-4},
|
||||
{0x1.1bb4a36b70a3fp+0, -0x1.a4e75b6a46000p-4},
|
||||
{0x1.194538e960658p+0, -0x1.8197efba9a000p-4},
|
||||
{0x1.16e0692a10ac8p+0, -0x1.5e95ad734e000p-4},
|
||||
{0x1.1485f1ba1568bp+0, -0x1.3bdf67117c000p-4},
|
||||
{0x1.12358e123ed6fp+0, -0x1.1973b744f0000p-4},
|
||||
{0x1.0fef01de37c8dp+0, -0x1.eea33446bc000p-5},
|
||||
{0x1.0db20b82be414p+0, -0x1.aaef4ab304000p-5},
|
||||
{0x1.0b7e6f67f69b3p+0, -0x1.67c962fd2c000p-5},
|
||||
{0x1.0953f342fc108p+0, -0x1.252f29acf8000p-5},
|
||||
{0x1.0732604ec956bp+0, -0x1.c63d19e9c0000p-6},
|
||||
{0x1.051980117f9b0p+0, -0x1.432ab6a388000p-6},
|
||||
{0x1.03091aa6810f1p+0, -0x1.8244357f50000p-7},
|
||||
{0x1.01010152cf066p+0, -0x1.0080a711c0000p-8},
|
||||
{0x1.fc07ef6b6e30bp-1, 0x1.fe03018e80000p-8},
|
||||
{0x1.f4465aa1024afp-1, 0x1.7b91986450000p-6},
|
||||
{0x1.ecc07a8fd3f5ep-1, 0x1.39e88608c8000p-5},
|
||||
{0x1.e573ad856b537p-1, 0x1.b42dc6e624000p-5},
|
||||
{0x1.de5d6dc7b8057p-1, 0x1.165372ec20000p-4},
|
||||
{0x1.d77b6498bddf7p-1, 0x1.51b07a0170000p-4},
|
||||
{0x1.d0cb580315c0fp-1, 0x1.8c3465c7ea000p-4},
|
||||
{0x1.ca4b30d1cf449p-1, 0x1.c5e544a290000p-4},
|
||||
{0x1.c3f8ef4810d8ep-1, 0x1.fec91aa0a6000p-4},
|
||||
{0x1.bdd2b8b311f44p-1, 0x1.1b72acdc5c000p-3},
|
||||
{0x1.b7d6c2eeac054p-1, 0x1.371fc65a98000p-3},
|
||||
{0x1.b20363474c8f5p-1, 0x1.526e61c1aa000p-3},
|
||||
{0x1.ac570165eeab1p-1, 0x1.6d60ffc240000p-3},
|
||||
{0x1.a6d019f331df4p-1, 0x1.87fa08a013000p-3},
|
||||
{0x1.a16d3ebc9e3c3p-1, 0x1.a23bc630c3000p-3},
|
||||
{0x1.9c2d14567ef45p-1, 0x1.bc286a3512000p-3},
|
||||
{0x1.970e4efae9169p-1, 0x1.d5c2195697000p-3},
|
||||
{0x1.920fb3bd0b802p-1, 0x1.ef0ae132d3000p-3},
|
||||
{0x1.8d3018b58699ap-1, 0x1.040259974e000p-2},
|
||||
{0x1.886e5ff170ee6p-1, 0x1.1058bd40e2000p-2},
|
||||
{0x1.83c977ad35d27p-1, 0x1.1c898c1137800p-2},
|
||||
{0x1.7f405ed16c520p-1, 0x1.2895a3e65b000p-2},
|
||||
{0x1.7ad220d0335c4p-1, 0x1.347dd8f6bd000p-2},
|
||||
{0x1.767dce53474fdp-1, 0x1.4043083cb3800p-2},
|
||||
#elif N == 128
|
||||
{0x1.734f0c3e0de9fp+0, -0x1.7cc7f79e69000p-2},
|
||||
{0x1.713786a2ce91fp+0, -0x1.76feec20d0000p-2},
|
||||
{0x1.6f26008fab5a0p+0, -0x1.713e31351e000p-2},
|
||||
{0x1.6d1a61f138c7dp+0, -0x1.6b85b38287800p-2},
|
||||
{0x1.6b1490bc5b4d1p+0, -0x1.65d5590807800p-2},
|
||||
{0x1.69147332f0cbap+0, -0x1.602d076180000p-2},
|
||||
{0x1.6719f18224223p+0, -0x1.5a8ca86909000p-2},
|
||||
{0x1.6524f99a51ed9p+0, -0x1.54f4356035000p-2},
|
||||
{0x1.63356aa8f24c4p+0, -0x1.4f637c36b4000p-2},
|
||||
{0x1.614b36b9ddc14p+0, -0x1.49da7fda85000p-2},
|
||||
{0x1.5f66452c65c4cp+0, -0x1.445923989a800p-2},
|
||||
{0x1.5d867b5912c4fp+0, -0x1.3edf439b0b800p-2},
|
||||
{0x1.5babccb5b90dep+0, -0x1.396ce448f7000p-2},
|
||||
{0x1.59d61f2d91a78p+0, -0x1.3401e17bda000p-2},
|
||||
{0x1.5805612465687p+0, -0x1.2e9e2ef468000p-2},
|
||||
{0x1.56397cee76bd3p+0, -0x1.2941b3830e000p-2},
|
||||
{0x1.54725e2a77f93p+0, -0x1.23ec58cda8800p-2},
|
||||
{0x1.52aff42064583p+0, -0x1.1e9e129279000p-2},
|
||||
{0x1.50f22dbb2bddfp+0, -0x1.1956d2b48f800p-2},
|
||||
{0x1.4f38f4734ded7p+0, -0x1.141679ab9f800p-2},
|
||||
{0x1.4d843cfde2840p+0, -0x1.0edd094ef9800p-2},
|
||||
{0x1.4bd3ec078a3c8p+0, -0x1.09aa518db1000p-2},
|
||||
{0x1.4a27fc3e0258ap+0, -0x1.047e65263b800p-2},
|
||||
{0x1.4880524d48434p+0, -0x1.feb224586f000p-3},
|
||||
{0x1.46dce1b192d0bp+0, -0x1.f474a7517b000p-3},
|
||||
{0x1.453d9d3391854p+0, -0x1.ea4443d103000p-3},
|
||||
{0x1.43a2744b4845ap+0, -0x1.e020d44e9b000p-3},
|
||||
{0x1.420b54115f8fbp+0, -0x1.d60a22977f000p-3},
|
||||
{0x1.40782da3ef4b1p+0, -0x1.cc00104959000p-3},
|
||||
{0x1.3ee8f5d57fe8fp+0, -0x1.c202956891000p-3},
|
||||
{0x1.3d5d9a00b4ce9p+0, -0x1.b81178d811000p-3},
|
||||
{0x1.3bd60c010c12bp+0, -0x1.ae2c9ccd3d000p-3},
|
||||
{0x1.3a5242b75dab8p+0, -0x1.a45402e129000p-3},
|
||||
{0x1.38d22cd9fd002p+0, -0x1.9a877681df000p-3},
|
||||
{0x1.3755bc5847a1cp+0, -0x1.90c6d69483000p-3},
|
||||
{0x1.35dce49ad36e2p+0, -0x1.87120a645c000p-3},
|
||||
{0x1.34679984dd440p+0, -0x1.7d68fb4143000p-3},
|
||||
{0x1.32f5cceffcb24p+0, -0x1.73cb83c627000p-3},
|
||||
{0x1.3187775a10d49p+0, -0x1.6a39a9b376000p-3},
|
||||
{0x1.301c8373e3990p+0, -0x1.60b3154b7a000p-3},
|
||||
{0x1.2eb4ebb95f841p+0, -0x1.5737d76243000p-3},
|
||||
{0x1.2d50a0219a9d1p+0, -0x1.4dc7b8fc23000p-3},
|
||||
{0x1.2bef9a8b7fd2ap+0, -0x1.4462c51d20000p-3},
|
||||
{0x1.2a91c7a0c1babp+0, -0x1.3b08abc830000p-3},
|
||||
{0x1.293726014b530p+0, -0x1.31b996b490000p-3},
|
||||
{0x1.27dfa5757a1f5p+0, -0x1.2875490a44000p-3},
|
||||
{0x1.268b39b1d3bbfp+0, -0x1.1f3b9f879a000p-3},
|
||||
{0x1.2539d838ff5bdp+0, -0x1.160c8252ca000p-3},
|
||||
{0x1.23eb7aac9083bp+0, -0x1.0ce7f57f72000p-3},
|
||||
{0x1.22a012ba940b6p+0, -0x1.03cdc49fea000p-3},
|
||||
{0x1.2157996cc4132p+0, -0x1.f57bdbc4b8000p-4},
|
||||
{0x1.201201dd2fc9bp+0, -0x1.e370896404000p-4},
|
||||
{0x1.1ecf4494d480bp+0, -0x1.d17983ef94000p-4},
|
||||
{0x1.1d8f5528f6569p+0, -0x1.bf9674ed8a000p-4},
|
||||
{0x1.1c52311577e7cp+0, -0x1.adc79202f6000p-4},
|
||||
{0x1.1b17c74cb26e9p+0, -0x1.9c0c3e7288000p-4},
|
||||
{0x1.19e010c2c1ab6p+0, -0x1.8a646b372c000p-4},
|
||||
{0x1.18ab07bb670bdp+0, -0x1.78d01b3ac0000p-4},
|
||||
{0x1.1778a25efbcb6p+0, -0x1.674f145380000p-4},
|
||||
{0x1.1648d354c31dap+0, -0x1.55e0e6d878000p-4},
|
||||
{0x1.151b990275fddp+0, -0x1.4485cdea1e000p-4},
|
||||
{0x1.13f0ea432d24cp+0, -0x1.333d94d6aa000p-4},
|
||||
{0x1.12c8b7210f9dap+0, -0x1.22079f8c56000p-4},
|
||||
{0x1.11a3028ecb531p+0, -0x1.10e4698622000p-4},
|
||||
{0x1.107fbda8434afp+0, -0x1.ffa6c6ad20000p-5},
|
||||
{0x1.0f5ee0f4e6bb3p+0, -0x1.dda8d4a774000p-5},
|
||||
{0x1.0e4065d2a9fcep+0, -0x1.bbcece4850000p-5},
|
||||
{0x1.0d244632ca521p+0, -0x1.9a1894012c000p-5},
|
||||
{0x1.0c0a77ce2981ap+0, -0x1.788583302c000p-5},
|
||||
{0x1.0af2f83c636d1p+0, -0x1.5715e67d68000p-5},
|
||||
{0x1.09ddb98a01339p+0, -0x1.35c8a49658000p-5},
|
||||
{0x1.08cabaf52e7dfp+0, -0x1.149e364154000p-5},
|
||||
{0x1.07b9f2f4e28fbp+0, -0x1.e72c082eb8000p-6},
|
||||
{0x1.06ab58c358f19p+0, -0x1.a55f152528000p-6},
|
||||
{0x1.059eea5ecf92cp+0, -0x1.63d62cf818000p-6},
|
||||
{0x1.04949cdd12c90p+0, -0x1.228fb8caa0000p-6},
|
||||
{0x1.038c6c6f0ada9p+0, -0x1.c317b20f90000p-7},
|
||||
{0x1.02865137932a9p+0, -0x1.419355daa0000p-7},
|
||||
{0x1.0182427ea7348p+0, -0x1.81203c2ec0000p-8},
|
||||
{0x1.008040614b195p+0, -0x1.0040979240000p-9},
|
||||
{0x1.fe01ff726fa1ap-1, 0x1.feff384900000p-9},
|
||||
{0x1.fa11cc261ea74p-1, 0x1.7dc41353d0000p-7},
|
||||
{0x1.f6310b081992ep-1, 0x1.3cea3c4c28000p-6},
|
||||
{0x1.f25f63ceeadcdp-1, 0x1.b9fc114890000p-6},
|
||||
{0x1.ee9c8039113e7p-1, 0x1.1b0d8ce110000p-5},
|
||||
{0x1.eae8078cbb1abp-1, 0x1.58a5bd001c000p-5},
|
||||
{0x1.e741aa29d0c9bp-1, 0x1.95c8340d88000p-5},
|
||||
{0x1.e3a91830a99b5p-1, 0x1.d276aef578000p-5},
|
||||
{0x1.e01e009609a56p-1, 0x1.07598e598c000p-4},
|
||||
{0x1.dca01e577bb98p-1, 0x1.253f5e30d2000p-4},
|
||||
{0x1.d92f20b7c9103p-1, 0x1.42edd8b380000p-4},
|
||||
{0x1.d5cac66fb5ccep-1, 0x1.606598757c000p-4},
|
||||
{0x1.d272caa5ede9dp-1, 0x1.7da76356a0000p-4},
|
||||
{0x1.cf26e3e6b2ccdp-1, 0x1.9ab434e1c6000p-4},
|
||||
{0x1.cbe6da2a77902p-1, 0x1.b78c7bb0d6000p-4},
|
||||
{0x1.c8b266d37086dp-1, 0x1.d431332e72000p-4},
|
||||
{0x1.c5894bd5d5804p-1, 0x1.f0a3171de6000p-4},
|
||||
{0x1.c26b533bb9f8cp-1, 0x1.067152b914000p-3},
|
||||
{0x1.bf583eeece73fp-1, 0x1.147858292b000p-3},
|
||||
{0x1.bc4fd75db96c1p-1, 0x1.2266ecdca3000p-3},
|
||||
{0x1.b951e0c864a28p-1, 0x1.303d7a6c55000p-3},
|
||||
{0x1.b65e2c5ef3e2cp-1, 0x1.3dfc33c331000p-3},
|
||||
{0x1.b374867c9888bp-1, 0x1.4ba366b7a8000p-3},
|
||||
{0x1.b094b211d304ap-1, 0x1.5933928d1f000p-3},
|
||||
{0x1.adbe885f2ef7ep-1, 0x1.66acd2418f000p-3},
|
||||
{0x1.aaf1d31603da2p-1, 0x1.740f8ec669000p-3},
|
||||
{0x1.a82e63fd358a7p-1, 0x1.815c0f51af000p-3},
|
||||
{0x1.a5740ef09738bp-1, 0x1.8e92954f68000p-3},
|
||||
{0x1.a2c2a90ab4b27p-1, 0x1.9bb3602f84000p-3},
|
||||
{0x1.a01a01393f2d1p-1, 0x1.a8bed1c2c0000p-3},
|
||||
{0x1.9d79f24db3c1bp-1, 0x1.b5b515c01d000p-3},
|
||||
{0x1.9ae2505c7b190p-1, 0x1.c2967ccbcc000p-3},
|
||||
{0x1.9852ef297ce2fp-1, 0x1.cf635d5486000p-3},
|
||||
{0x1.95cbaeea44b75p-1, 0x1.dc1bd3446c000p-3},
|
||||
{0x1.934c69de74838p-1, 0x1.e8c01b8cfe000p-3},
|
||||
{0x1.90d4f2f6752e6p-1, 0x1.f5509c0179000p-3},
|
||||
{0x1.8e6528effd79dp-1, 0x1.00e6c121fb800p-2},
|
||||
{0x1.8bfce9fcc007cp-1, 0x1.071b80e93d000p-2},
|
||||
{0x1.899c0dabec30ep-1, 0x1.0d46b9e867000p-2},
|
||||
{0x1.87427aa2317fbp-1, 0x1.13687334bd000p-2},
|
||||
{0x1.84f00acb39a08p-1, 0x1.1980d67234800p-2},
|
||||
{0x1.82a49e8653e55p-1, 0x1.1f8ffe0cc8000p-2},
|
||||
{0x1.8060195f40260p-1, 0x1.2595fd7636800p-2},
|
||||
{0x1.7e22563e0a329p-1, 0x1.2b9300914a800p-2},
|
||||
{0x1.7beb377dcb5adp-1, 0x1.3187210436000p-2},
|
||||
{0x1.79baa679725c2p-1, 0x1.377266dec1800p-2},
|
||||
{0x1.77907f2170657p-1, 0x1.3d54ffbaf3000p-2},
|
||||
{0x1.756cadbd6130cp-1, 0x1.432eee32fe000p-2},
|
||||
#endif
|
||||
},
|
||||
#if !HAVE_FAST_FMA
|
||||
.tab2 = {
|
||||
# if N == 64
|
||||
{0x1.61ffff94c4fecp-1, -0x1.9fe4fc998f325p-56},
|
||||
{0x1.66000020377ddp-1, 0x1.e804c7a9519f2p-55},
|
||||
{0x1.6a00004c41678p-1, 0x1.902c675d9ecfep-55},
|
||||
{0x1.6dffff7384f87p-1, -0x1.2fd6b95e55043p-56},
|
||||
{0x1.720000b37216ep-1, 0x1.802bc8d437043p-55},
|
||||
{0x1.75ffffbeb3c9dp-1, 0x1.6047ad0a0d4e4p-57},
|
||||
{0x1.7a0000628daep-1, -0x1.e00434b49313dp-56},
|
||||
{0x1.7dffffd7abd1ap-1, -0x1.6015f8a083576p-56},
|
||||
{0x1.81ffffdf40c54p-1, 0x1.7f54bf76a42c9p-57},
|
||||
{0x1.860000f334e11p-1, 0x1.60054cb5344d7p-56},
|
||||
{0x1.8a0001238aca7p-1, 0x1.c03c9bd132f55p-57},
|
||||
{0x1.8dffffb81d212p-1, -0x1.001e519f2764fp-55},
|
||||
{0x1.92000086adc7cp-1, 0x1.1fe40f88f49c6p-55},
|
||||
{0x1.960000135d8eap-1, -0x1.f832268dc3095p-55},
|
||||
{0x1.99ffff9435acp-1, 0x1.7031d8b835edcp-56},
|
||||
{0x1.9e00003478565p-1, -0x1.0030b221ce3eep-58},
|
||||
{0x1.a20000b592948p-1, 0x1.8fd2f1dbd4639p-55},
|
||||
{0x1.a600000ad0bcfp-1, 0x1.901d6a974e6bep-55},
|
||||
{0x1.a9ffff55953a5p-1, 0x1.a07556192db98p-57},
|
||||
{0x1.adffff29ce03dp-1, -0x1.fff0717ec71c2p-56},
|
||||
{0x1.b1ffff34f3ac8p-1, 0x1.8005573de89d1p-57},
|
||||
{0x1.b60000894c55bp-1, -0x1.ff2fb51b044c7p-57},
|
||||
{0x1.b9fffef45ec7dp-1, -0x1.9ff7c4e8730fp-56},
|
||||
{0x1.be0000cda7b2ap-1, 0x1.57d058dbf3c1dp-55},
|
||||
{0x1.c1ffff2c57917p-1, 0x1.7e66d7e48dbc9p-58},
|
||||
{0x1.c60000ea5b82ap-1, -0x1.47f5e132ed4bep-55},
|
||||
{0x1.ca0001121ae98p-1, -0x1.40958c8d5e00ap-58},
|
||||
{0x1.ce0000f9241cbp-1, -0x1.7da063caa81c8p-59},
|
||||
{0x1.d1fffe8be95a4p-1, -0x1.82e3a411afcd9p-59},
|
||||
{0x1.d5ffff035932bp-1, -0x1.00f901b3fe87dp-58},
|
||||
{0x1.d9fffe8b54ba7p-1, 0x1.ffef55d6e3a4p-55},
|
||||
{0x1.de0000ad95d19p-1, 0x1.5feb2efd4c7c7p-55},
|
||||
{0x1.e1fffe925ce47p-1, 0x1.c8085484eaf08p-55},
|
||||
{0x1.e5fffe3ddf853p-1, -0x1.fd5ed02c5cadp-60},
|
||||
{0x1.e9fffed0a0e5fp-1, -0x1.a80aaef411586p-55},
|
||||
{0x1.ee00008f82eep-1, -0x1.b000aeaf97276p-55},
|
||||
{0x1.f20000a22d2f4p-1, -0x1.8f8906e13eba3p-56},
|
||||
{0x1.f5fffee35b57dp-1, 0x1.1fdd33b2d3714p-57},
|
||||
{0x1.fa00014eec3a6p-1, -0x1.3ee0b7a18c1a5p-58},
|
||||
{0x1.fdffff5daa89fp-1, -0x1.c1e24c8e3b503p-58},
|
||||
{0x1.0200005b93349p+0, -0x1.50197fe6bedcap-54},
|
||||
{0x1.05ffff9d597acp+0, 0x1.20160d062d0dcp-55},
|
||||
{0x1.0a00005687a63p+0, -0x1.27f3f9307696ep-54},
|
||||
{0x1.0dffff779164ep+0, 0x1.b7eb40bb9c4f4p-54},
|
||||
{0x1.12000044a0aa8p+0, 0x1.efbc914d512c4p-55},
|
||||
{0x1.16000069685bcp+0, -0x1.c0bea3eb2d82cp-57},
|
||||
{0x1.1a000093f0d78p+0, 0x1.1fecbf1e8c52p-54},
|
||||
{0x1.1dffffb2b1457p+0, -0x1.3fc91365637d6p-55},
|
||||
{0x1.2200008824a1p+0, -0x1.dff7e9feb578ap-54},
|
||||
{0x1.25ffffeef953p+0, -0x1.b00a61ec912f7p-55},
|
||||
{0x1.2a0000a1e7783p+0, 0x1.60048318b0483p-56},
|
||||
{0x1.2e0000853d4c7p+0, -0x1.77fbedf2c8cf3p-54},
|
||||
{0x1.320000324c55bp+0, 0x1.f81983997354fp-54},
|
||||
{0x1.360000594f796p+0, -0x1.cfe4beff900a9p-54},
|
||||
{0x1.3a0000a4c1c0fp+0, 0x1.07dbb2e268d0ep-54},
|
||||
{0x1.3e0000751c61bp+0, 0x1.80583ed1c566ep-56},
|
||||
{0x1.42000069e8a9fp+0, 0x1.f01f1edf82045p-54},
|
||||
{0x1.460000b5a1e34p+0, -0x1.dfdf0cf45c14ap-55},
|
||||
{0x1.4a0000187e513p+0, 0x1.401306b83a98dp-55},
|
||||
{0x1.4dffff3ba420bp+0, 0x1.9fc6539a6454ep-56},
|
||||
{0x1.51fffffe391c9p+0, -0x1.601ef3353ac83p-54},
|
||||
{0x1.560000e342455p+0, 0x1.3fb7fac8ac151p-55},
|
||||
{0x1.59ffffc39676fp+0, 0x1.4fe7dd6659cc2p-55},
|
||||
{0x1.5dfffff10ef42p+0, -0x1.48154cb592bcbp-54},
|
||||
# elif N == 128
|
||||
{0x1.61000014fb66bp-1, 0x1.e026c91425b3cp-56},
|
||||
{0x1.63000034db495p-1, 0x1.dbfea48005d41p-55},
|
||||
{0x1.650000d94d478p-1, 0x1.e7fa786d6a5b7p-55},
|
||||
{0x1.67000074e6fadp-1, 0x1.1fcea6b54254cp-57},
|
||||
{0x1.68ffffedf0faep-1, -0x1.c7e274c590efdp-56},
|
||||
{0x1.6b0000763c5bcp-1, -0x1.ac16848dcda01p-55},
|
||||
{0x1.6d0001e5cc1f6p-1, 0x1.33f1c9d499311p-55},
|
||||
{0x1.6efffeb05f63ep-1, -0x1.e80041ae22d53p-56},
|
||||
{0x1.710000e86978p-1, 0x1.bff6671097952p-56},
|
||||
{0x1.72ffffc67e912p-1, 0x1.c00e226bd8724p-55},
|
||||
{0x1.74fffdf81116ap-1, -0x1.e02916ef101d2p-57},
|
||||
{0x1.770000f679c9p-1, -0x1.7fc71cd549c74p-57},
|
||||
{0x1.78ffffa7ec835p-1, 0x1.1bec19ef50483p-55},
|
||||
{0x1.7affffe20c2e6p-1, -0x1.07e1729cc6465p-56},
|
||||
{0x1.7cfffed3fc9p-1, -0x1.08072087b8b1cp-55},
|
||||
{0x1.7efffe9261a76p-1, 0x1.dc0286d9df9aep-55},
|
||||
{0x1.81000049ca3e8p-1, 0x1.97fd251e54c33p-55},
|
||||
{0x1.8300017932c8fp-1, -0x1.afee9b630f381p-55},
|
||||
{0x1.850000633739cp-1, 0x1.9bfbf6b6535bcp-55},
|
||||
{0x1.87000204289c6p-1, -0x1.bbf65f3117b75p-55},
|
||||
{0x1.88fffebf57904p-1, -0x1.9006ea23dcb57p-55},
|
||||
{0x1.8b00022bc04dfp-1, -0x1.d00df38e04b0ap-56},
|
||||
{0x1.8cfffe50c1b8ap-1, -0x1.8007146ff9f05p-55},
|
||||
{0x1.8effffc918e43p-1, 0x1.3817bd07a7038p-55},
|
||||
{0x1.910001efa5fc7p-1, 0x1.93e9176dfb403p-55},
|
||||
{0x1.9300013467bb9p-1, 0x1.f804e4b980276p-56},
|
||||
{0x1.94fffe6ee076fp-1, -0x1.f7ef0d9ff622ep-55},
|
||||
{0x1.96fffde3c12d1p-1, -0x1.082aa962638bap-56},
|
||||
{0x1.98ffff4458a0dp-1, -0x1.7801b9164a8efp-55},
|
||||
{0x1.9afffdd982e3ep-1, -0x1.740e08a5a9337p-55},
|
||||
{0x1.9cfffed49fb66p-1, 0x1.fce08c19bep-60},
|
||||
{0x1.9f00020f19c51p-1, -0x1.a3faa27885b0ap-55},
|
||||
{0x1.a10001145b006p-1, 0x1.4ff489958da56p-56},
|
||||
{0x1.a300007bbf6fap-1, 0x1.cbeab8a2b6d18p-55},
|
||||
{0x1.a500010971d79p-1, 0x1.8fecadd78793p-55},
|
||||
{0x1.a70001df52e48p-1, -0x1.f41763dd8abdbp-55},
|
||||
{0x1.a90001c593352p-1, -0x1.ebf0284c27612p-55},
|
||||
{0x1.ab0002a4f3e4bp-1, -0x1.9fd043cff3f5fp-57},
|
||||
{0x1.acfffd7ae1ed1p-1, -0x1.23ee7129070b4p-55},
|
||||
{0x1.aefffee510478p-1, 0x1.a063ee00edea3p-57},
|
||||
{0x1.b0fffdb650d5bp-1, 0x1.a06c8381f0ab9p-58},
|
||||
{0x1.b2ffffeaaca57p-1, -0x1.9011e74233c1dp-56},
|
||||
{0x1.b4fffd995badcp-1, -0x1.9ff1068862a9fp-56},
|
||||
{0x1.b7000249e659cp-1, 0x1.aff45d0864f3ep-55},
|
||||
{0x1.b8ffff987164p-1, 0x1.cfe7796c2c3f9p-56},
|
||||
{0x1.bafffd204cb4fp-1, -0x1.3ff27eef22bc4p-57},
|
||||
{0x1.bcfffd2415c45p-1, -0x1.cffb7ee3bea21p-57},
|
||||
{0x1.beffff86309dfp-1, -0x1.14103972e0b5cp-55},
|
||||
{0x1.c0fffe1b57653p-1, 0x1.bc16494b76a19p-55},
|
||||
{0x1.c2ffff1fa57e3p-1, -0x1.4feef8d30c6edp-57},
|
||||
{0x1.c4fffdcbfe424p-1, -0x1.43f68bcec4775p-55},
|
||||
{0x1.c6fffed54b9f7p-1, 0x1.47ea3f053e0ecp-55},
|
||||
{0x1.c8fffeb998fd5p-1, 0x1.383068df992f1p-56},
|
||||
{0x1.cb0002125219ap-1, -0x1.8fd8e64180e04p-57},
|
||||
{0x1.ccfffdd94469cp-1, 0x1.e7ebe1cc7ea72p-55},
|
||||
{0x1.cefffeafdc476p-1, 0x1.ebe39ad9f88fep-55},
|
||||
{0x1.d1000169af82bp-1, 0x1.57d91a8b95a71p-56},
|
||||
{0x1.d30000d0ff71dp-1, 0x1.9c1906970c7dap-55},
|
||||
{0x1.d4fffea790fc4p-1, -0x1.80e37c558fe0cp-58},
|
||||
{0x1.d70002edc87e5p-1, -0x1.f80d64dc10f44p-56},
|
||||
{0x1.d900021dc82aap-1, -0x1.47c8f94fd5c5cp-56},
|
||||
{0x1.dafffd86b0283p-1, 0x1.c7f1dc521617ep-55},
|
||||
{0x1.dd000296c4739p-1, 0x1.8019eb2ffb153p-55},
|
||||
{0x1.defffe54490f5p-1, 0x1.e00d2c652cc89p-57},
|
||||
{0x1.e0fffcdabf694p-1, -0x1.f8340202d69d2p-56},
|
||||
{0x1.e2fffdb52c8ddp-1, 0x1.b00c1ca1b0864p-56},
|
||||
{0x1.e4ffff24216efp-1, 0x1.2ffa8b094ab51p-56},
|
||||
{0x1.e6fffe88a5e11p-1, -0x1.7f673b1efbe59p-58},
|
||||
{0x1.e9000119eff0dp-1, -0x1.4808d5e0bc801p-55},
|
||||
{0x1.eafffdfa51744p-1, 0x1.80006d54320b5p-56},
|
||||
{0x1.ed0001a127fa1p-1, -0x1.002f860565c92p-58},
|
||||
{0x1.ef00007babcc4p-1, -0x1.540445d35e611p-55},
|
||||
{0x1.f0ffff57a8d02p-1, -0x1.ffb3139ef9105p-59},
|
||||
{0x1.f30001ee58ac7p-1, 0x1.a81acf2731155p-55},
|
||||
{0x1.f4ffff5823494p-1, 0x1.a3f41d4d7c743p-55},
|
||||
{0x1.f6ffffca94c6bp-1, -0x1.202f41c987875p-57},
|
||||
{0x1.f8fffe1f9c441p-1, 0x1.77dd1f477e74bp-56},
|
||||
{0x1.fafffd2e0e37ep-1, -0x1.f01199a7ca331p-57},
|
||||
{0x1.fd0001c77e49ep-1, 0x1.181ee4bceacb1p-56},
|
||||
{0x1.feffff7e0c331p-1, -0x1.e05370170875ap-57},
|
||||
{0x1.00ffff465606ep+0, -0x1.a7ead491c0adap-55},
|
||||
{0x1.02ffff3867a58p+0, -0x1.77f69c3fcb2ep-54},
|
||||
{0x1.04ffffdfc0d17p+0, 0x1.7bffe34cb945bp-54},
|
||||
{0x1.0700003cd4d82p+0, 0x1.20083c0e456cbp-55},
|
||||
{0x1.08ffff9f2cbe8p+0, -0x1.dffdfbe37751ap-57},
|
||||
{0x1.0b000010cda65p+0, -0x1.13f7faee626ebp-54},
|
||||
{0x1.0d00001a4d338p+0, 0x1.07dfa79489ff7p-55},
|
||||
{0x1.0effffadafdfdp+0, -0x1.7040570d66bcp-56},
|
||||
{0x1.110000bbafd96p+0, 0x1.e80d4846d0b62p-55},
|
||||
{0x1.12ffffae5f45dp+0, 0x1.dbffa64fd36efp-54},
|
||||
{0x1.150000dd59ad9p+0, 0x1.a0077701250aep-54},
|
||||
{0x1.170000f21559ap+0, 0x1.dfdf9e2e3deeep-55},
|
||||
{0x1.18ffffc275426p+0, 0x1.10030dc3b7273p-54},
|
||||
{0x1.1b000123d3c59p+0, 0x1.97f7980030188p-54},
|
||||
{0x1.1cffff8299eb7p+0, -0x1.5f932ab9f8c67p-57},
|
||||
{0x1.1effff48ad4p+0, 0x1.37fbf9da75bebp-54},
|
||||
{0x1.210000c8b86a4p+0, 0x1.f806b91fd5b22p-54},
|
||||
{0x1.2300003854303p+0, 0x1.3ffc2eb9fbf33p-54},
|
||||
{0x1.24fffffbcf684p+0, 0x1.601e77e2e2e72p-56},
|
||||
{0x1.26ffff52921d9p+0, 0x1.ffcbb767f0c61p-56},
|
||||
{0x1.2900014933a3cp+0, -0x1.202ca3c02412bp-56},
|
||||
{0x1.2b00014556313p+0, -0x1.2808233f21f02p-54},
|
||||
{0x1.2cfffebfe523bp+0, -0x1.8ff7e384fdcf2p-55},
|
||||
{0x1.2f0000bb8ad96p+0, -0x1.5ff51503041c5p-55},
|
||||
{0x1.30ffffb7ae2afp+0, -0x1.10071885e289dp-55},
|
||||
{0x1.32ffffeac5f7fp+0, -0x1.1ff5d3fb7b715p-54},
|
||||
{0x1.350000ca66756p+0, 0x1.57f82228b82bdp-54},
|
||||
{0x1.3700011fbf721p+0, 0x1.000bac40dd5ccp-55},
|
||||
{0x1.38ffff9592fb9p+0, -0x1.43f9d2db2a751p-54},
|
||||
{0x1.3b00004ddd242p+0, 0x1.57f6b707638e1p-55},
|
||||
{0x1.3cffff5b2c957p+0, 0x1.a023a10bf1231p-56},
|
||||
{0x1.3efffeab0b418p+0, 0x1.87f6d66b152bp-54},
|
||||
{0x1.410001532aff4p+0, 0x1.7f8375f198524p-57},
|
||||
{0x1.4300017478b29p+0, 0x1.301e672dc5143p-55},
|
||||
{0x1.44fffe795b463p+0, 0x1.9ff69b8b2895ap-55},
|
||||
{0x1.46fffe80475ep+0, -0x1.5c0b19bc2f254p-54},
|
||||
{0x1.48fffef6fc1e7p+0, 0x1.b4009f23a2a72p-54},
|
||||
{0x1.4afffe5bea704p+0, -0x1.4ffb7bf0d7d45p-54},
|
||||
{0x1.4d000171027dep+0, -0x1.9c06471dc6a3dp-54},
|
||||
{0x1.4f0000ff03ee2p+0, 0x1.77f890b85531cp-54},
|
||||
{0x1.5100012dc4bd1p+0, 0x1.004657166a436p-57},
|
||||
{0x1.530001605277ap+0, -0x1.6bfcece233209p-54},
|
||||
{0x1.54fffecdb704cp+0, -0x1.902720505a1d7p-55},
|
||||
{0x1.56fffef5f54a9p+0, 0x1.bbfe60ec96412p-54},
|
||||
{0x1.5900017e61012p+0, 0x1.87ec581afef9p-55},
|
||||
{0x1.5b00003c93e92p+0, -0x1.f41080abf0ccp-54},
|
||||
{0x1.5d0001d4919bcp+0, -0x1.8812afb254729p-54},
|
||||
{0x1.5efffe7b87a89p+0, -0x1.47eb780ed6904p-54},
|
||||
#endif
|
||||
},
|
||||
#endif /* !HAVE_FAST_FMA */
|
||||
};
|
||||
+79
@@ -0,0 +1,79 @@
|
||||
/*
|
||||
* Single-precision log function.
|
||||
*
|
||||
* Copyright (c) 2017-2018, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
#include "math_config.h"
|
||||
|
||||
/*
|
||||
LOGF_TABLE_BITS = 4
|
||||
LOGF_POLY_ORDER = 4
|
||||
|
||||
ULP error: 0.818 (nearest rounding.)
|
||||
Relative error: 1.957 * 2^-26 (before rounding.)
|
||||
*/
|
||||
|
||||
#define T __logf_data.tab
|
||||
#define A __logf_data.poly
|
||||
#define Ln2 __logf_data.ln2
|
||||
#define N (1 << LOGF_TABLE_BITS)
|
||||
#define OFF 0x3f330000
|
||||
|
||||
float
|
||||
logf (float x)
|
||||
{
|
||||
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
|
||||
double_t z, r, r2, y, y0, invc, logc;
|
||||
uint32_t ix, iz, tmp;
|
||||
int k, i;
|
||||
|
||||
ix = asuint (x);
|
||||
#if WANT_ROUNDING
|
||||
/* Fix sign of zero with downward rounding when x==1. */
|
||||
if (unlikely (ix == 0x3f800000))
|
||||
return 0;
|
||||
#endif
|
||||
if (unlikely (ix - 0x00800000 >= 0x7f800000 - 0x00800000))
|
||||
{
|
||||
/* x < 0x1p-126 or inf or nan. */
|
||||
if (ix * 2 == 0)
|
||||
return __math_divzerof (1);
|
||||
if (ix == 0x7f800000) /* log(inf) == inf. */
|
||||
return x;
|
||||
if ((ix & 0x80000000) || ix * 2 >= 0xff000000)
|
||||
return __math_invalidf (x);
|
||||
/* x is subnormal, normalize it. */
|
||||
ix = asuint (x * 0x1p23f);
|
||||
ix -= 23 << 23;
|
||||
}
|
||||
|
||||
/* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
|
||||
The range is split into N subintervals.
|
||||
The ith subinterval contains z and c is near its center. */
|
||||
tmp = ix - OFF;
|
||||
i = (tmp >> (23 - LOGF_TABLE_BITS)) % N;
|
||||
k = (int32_t) tmp >> 23; /* arithmetic shift */
|
||||
iz = ix - (tmp & 0x1ff << 23);
|
||||
invc = T[i].invc;
|
||||
logc = T[i].logc;
|
||||
z = (double_t) asfloat (iz);
|
||||
|
||||
/* log(x) = log1p(z/c-1) + log(c) + k*Ln2 */
|
||||
r = z * invc - 1;
|
||||
y0 = logc + (double_t) k * Ln2;
|
||||
|
||||
/* Pipelined polynomial evaluation to approximate log1p(r). */
|
||||
r2 = r * r;
|
||||
y = A[1] * r + A[2];
|
||||
y = A[0] * r2 + y;
|
||||
y = y * r2 + (y0 + r);
|
||||
return eval_as_float (y);
|
||||
}
|
||||
#if USE_GLIBC_ABI
|
||||
strong_alias (logf, __logf_finite)
|
||||
hidden_alias (logf, __ieee754_logf)
|
||||
#endif
|
||||
@@ -0,0 +1,33 @@
|
||||
/*
|
||||
* Data definition for logf.
|
||||
*
|
||||
* Copyright (c) 2017-2018, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "math_config.h"
|
||||
|
||||
const struct logf_data __logf_data = {
|
||||
.tab = {
|
||||
{ 0x1.661ec79f8f3bep+0, -0x1.57bf7808caadep-2 },
|
||||
{ 0x1.571ed4aaf883dp+0, -0x1.2bef0a7c06ddbp-2 },
|
||||
{ 0x1.49539f0f010bp+0, -0x1.01eae7f513a67p-2 },
|
||||
{ 0x1.3c995b0b80385p+0, -0x1.b31d8a68224e9p-3 },
|
||||
{ 0x1.30d190c8864a5p+0, -0x1.6574f0ac07758p-3 },
|
||||
{ 0x1.25e227b0b8eap+0, -0x1.1aa2bc79c81p-3 },
|
||||
{ 0x1.1bb4a4a1a343fp+0, -0x1.a4e76ce8c0e5ep-4 },
|
||||
{ 0x1.12358f08ae5bap+0, -0x1.1973c5a611cccp-4 },
|
||||
{ 0x1.0953f419900a7p+0, -0x1.252f438e10c1ep-5 },
|
||||
{ 0x1p+0, 0x0p+0 },
|
||||
{ 0x1.e608cfd9a47acp-1, 0x1.aa5aa5df25984p-5 },
|
||||
{ 0x1.ca4b31f026aap-1, 0x1.c5e53aa362eb4p-4 },
|
||||
{ 0x1.b2036576afce6p-1, 0x1.526e57720db08p-3 },
|
||||
{ 0x1.9c2d163a1aa2dp-1, 0x1.bc2860d22477p-3 },
|
||||
{ 0x1.886e6037841edp-1, 0x1.1058bc8a07ee1p-2 },
|
||||
{ 0x1.767dcf5534862p-1, 0x1.4043057b6ee09p-2 },
|
||||
},
|
||||
.ln2 = 0x1.62e42fefa39efp-1,
|
||||
.poly = {
|
||||
-0x1.00ea348b88334p-2, 0x1.5575b0be00b6ap-2, -0x1.ffffef20a4123p-2,
|
||||
}
|
||||
};
|
||||
@@ -0,0 +1,419 @@
|
||||
/*
|
||||
* Configuration for math routines.
|
||||
*
|
||||
* Copyright (c) 2017-2018, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#ifndef _MATH_CONFIG_H
|
||||
#define _MATH_CONFIG_H
|
||||
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#ifndef WANT_ROUNDING
|
||||
/* If defined to 1, return correct results for special cases in non-nearest
|
||||
rounding modes (logf (1.0f) returns 0.0f with FE_DOWNWARD rather than -0.0f).
|
||||
This may be set to 0 if there is no fenv support or if math functions only
|
||||
get called in round to nearest mode. */
|
||||
# define WANT_ROUNDING 1
|
||||
#endif
|
||||
#ifndef WANT_ERRNO
|
||||
/* If defined to 1, set errno in math functions according to ISO C. Many math
|
||||
libraries do not set errno, so this is 0 by default. It may need to be
|
||||
set to 1 if math.h has (math_errhandling & MATH_ERRNO) != 0. */
|
||||
# define WANT_ERRNO 0
|
||||
#endif
|
||||
#ifndef WANT_ERRNO_UFLOW
|
||||
/* Set errno to ERANGE if result underflows to 0 (in all rounding modes). */
|
||||
# define WANT_ERRNO_UFLOW (WANT_ROUNDING && WANT_ERRNO)
|
||||
#endif
|
||||
|
||||
/* Compiler can inline round as a single instruction. */
|
||||
#ifndef HAVE_FAST_ROUND
|
||||
# if __aarch64__
|
||||
# define HAVE_FAST_ROUND 1
|
||||
# else
|
||||
# define HAVE_FAST_ROUND 0
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* Compiler can inline lround, but not (long)round(x). */
|
||||
#ifndef HAVE_FAST_LROUND
|
||||
# if __aarch64__ && (100*__GNUC__ + __GNUC_MINOR__) >= 408 && __NO_MATH_ERRNO__
|
||||
# define HAVE_FAST_LROUND 1
|
||||
# else
|
||||
# define HAVE_FAST_LROUND 0
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* Compiler can inline fma as a single instruction. */
|
||||
#ifndef HAVE_FAST_FMA
|
||||
# if defined FP_FAST_FMA || __aarch64__
|
||||
# define HAVE_FAST_FMA 1
|
||||
# else
|
||||
# define HAVE_FAST_FMA 0
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* Provide *_finite symbols and some of the glibc hidden symbols
|
||||
so libmathlib can be used with binaries compiled against glibc
|
||||
to interpose math functions with both static and dynamic linking. */
|
||||
#ifndef USE_GLIBC_ABI
|
||||
# if __GNUC__
|
||||
# define USE_GLIBC_ABI 1
|
||||
# else
|
||||
# define USE_GLIBC_ABI 0
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* Optionally used extensions. */
|
||||
#ifdef __GNUC__
|
||||
# define HIDDEN __attribute__ ((__visibility__ ("hidden")))
|
||||
# define NOINLINE __attribute__ ((noinline))
|
||||
# define UNUSED __attribute__ ((unused))
|
||||
# define likely(x) __builtin_expect (!!(x), 1)
|
||||
# define unlikely(x) __builtin_expect (x, 0)
|
||||
# if __GNUC__ >= 9
|
||||
# define attribute_copy(f) __attribute__ ((copy (f)))
|
||||
# else
|
||||
# define attribute_copy(f)
|
||||
# endif
|
||||
# define strong_alias(f, a) \
|
||||
extern __typeof (f) a __attribute__ ((alias (#f))) attribute_copy (f);
|
||||
# define hidden_alias(f, a) \
|
||||
extern __typeof (f) a __attribute__ ((alias (#f), visibility ("hidden"))) \
|
||||
attribute_copy (f);
|
||||
#else
|
||||
# define HIDDEN
|
||||
# define NOINLINE
|
||||
# define UNUSED
|
||||
# define likely(x) (x)
|
||||
# define unlikely(x) (x)
|
||||
#endif
|
||||
|
||||
#if HAVE_FAST_ROUND
|
||||
/* When set, the roundtoint and converttoint functions are provided with
|
||||
the semantics documented below. */
|
||||
# define TOINT_INTRINSICS 1
|
||||
|
||||
/* Round x to nearest int in all rounding modes, ties have to be rounded
|
||||
consistently with converttoint so the results match. If the result
|
||||
would be outside of [-2^31, 2^31-1] then the semantics is unspecified. */
|
||||
static inline double_t
|
||||
roundtoint (double_t x)
|
||||
{
|
||||
return round (x);
|
||||
}
|
||||
|
||||
/* Convert x to nearest int in all rounding modes, ties have to be rounded
|
||||
consistently with roundtoint. If the result is not representible in an
|
||||
int32_t then the semantics is unspecified. */
|
||||
static inline int32_t
|
||||
converttoint (double_t x)
|
||||
{
|
||||
# if HAVE_FAST_LROUND
|
||||
return lround (x);
|
||||
# else
|
||||
return (long) round (x);
|
||||
# endif
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline uint32_t
|
||||
asuint (float f)
|
||||
{
|
||||
union
|
||||
{
|
||||
float f;
|
||||
uint32_t i;
|
||||
} u = {f};
|
||||
return u.i;
|
||||
}
|
||||
|
||||
static inline float
|
||||
asfloat (uint32_t i)
|
||||
{
|
||||
union
|
||||
{
|
||||
uint32_t i;
|
||||
float f;
|
||||
} u = {i};
|
||||
return u.f;
|
||||
}
|
||||
|
||||
static inline uint64_t
|
||||
asuint64 (double f)
|
||||
{
|
||||
union
|
||||
{
|
||||
double f;
|
||||
uint64_t i;
|
||||
} u = {f};
|
||||
return u.i;
|
||||
}
|
||||
|
||||
static inline double
|
||||
asdouble (uint64_t i)
|
||||
{
|
||||
union
|
||||
{
|
||||
uint64_t i;
|
||||
double f;
|
||||
} u = {i};
|
||||
return u.f;
|
||||
}
|
||||
|
||||
#ifndef IEEE_754_2008_SNAN
|
||||
# define IEEE_754_2008_SNAN 1
|
||||
#endif
|
||||
static inline int
|
||||
issignalingf_inline (float x)
|
||||
{
|
||||
uint32_t ix = asuint (x);
|
||||
if (!IEEE_754_2008_SNAN)
|
||||
return (ix & 0x7fc00000) == 0x7fc00000;
|
||||
return 2 * (ix ^ 0x00400000) > 2u * 0x7fc00000;
|
||||
}
|
||||
|
||||
static inline int
|
||||
issignaling_inline (double x)
|
||||
{
|
||||
uint64_t ix = asuint64 (x);
|
||||
if (!IEEE_754_2008_SNAN)
|
||||
return (ix & 0x7ff8000000000000) == 0x7ff8000000000000;
|
||||
return 2 * (ix ^ 0x0008000000000000) > 2 * 0x7ff8000000000000ULL;
|
||||
}
|
||||
|
||||
#if __aarch64__ && __GNUC__
|
||||
/* Prevent the optimization of a floating-point expression. */
|
||||
static inline float
|
||||
opt_barrier_float (float x)
|
||||
{
|
||||
__asm__ __volatile__ ("" : "+w" (x));
|
||||
return x;
|
||||
}
|
||||
static inline double
|
||||
opt_barrier_double (double x)
|
||||
{
|
||||
__asm__ __volatile__ ("" : "+w" (x));
|
||||
return x;
|
||||
}
|
||||
/* Force the evaluation of a floating-point expression for its side-effect. */
|
||||
static inline void
|
||||
force_eval_float (float x)
|
||||
{
|
||||
__asm__ __volatile__ ("" : "+w" (x));
|
||||
}
|
||||
static inline void
|
||||
force_eval_double (double x)
|
||||
{
|
||||
__asm__ __volatile__ ("" : "+w" (x));
|
||||
}
|
||||
#else
|
||||
static inline float
|
||||
opt_barrier_float (float x)
|
||||
{
|
||||
volatile float y = x;
|
||||
return y;
|
||||
}
|
||||
static inline double
|
||||
opt_barrier_double (double x)
|
||||
{
|
||||
volatile double y = x;
|
||||
return y;
|
||||
}
|
||||
static inline void
|
||||
force_eval_float (float x)
|
||||
{
|
||||
volatile float y UNUSED = x;
|
||||
}
|
||||
static inline void
|
||||
force_eval_double (double x)
|
||||
{
|
||||
volatile double y UNUSED = x;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Evaluate an expression as the specified type, normally a type
|
||||
cast should be enough, but compilers implement non-standard
|
||||
excess-precision handling, so when FLT_EVAL_METHOD != 0 then
|
||||
these functions may need to be customized. */
|
||||
static inline float
|
||||
eval_as_float (float x)
|
||||
{
|
||||
return x;
|
||||
}
|
||||
static inline double
|
||||
eval_as_double (double x)
|
||||
{
|
||||
return x;
|
||||
}
|
||||
|
||||
/* Error handling tail calls for special cases, with a sign argument.
|
||||
The sign of the return value is set if the argument is non-zero. */
|
||||
|
||||
/* The result overflows. */
|
||||
HIDDEN float __math_oflowf (uint32_t);
|
||||
/* The result underflows to 0 in nearest rounding mode. */
|
||||
HIDDEN float __math_uflowf (uint32_t);
|
||||
/* The result underflows to 0 in some directed rounding mode only. */
|
||||
HIDDEN float __math_may_uflowf (uint32_t);
|
||||
/* Division by zero. */
|
||||
HIDDEN float __math_divzerof (uint32_t);
|
||||
/* The result overflows. */
|
||||
HIDDEN double __math_oflow (uint32_t);
|
||||
/* The result underflows to 0 in nearest rounding mode. */
|
||||
HIDDEN double __math_uflow (uint32_t);
|
||||
/* The result underflows to 0 in some directed rounding mode only. */
|
||||
HIDDEN double __math_may_uflow (uint32_t);
|
||||
/* Division by zero. */
|
||||
HIDDEN double __math_divzero (uint32_t);
|
||||
|
||||
/* Error handling using input checking. */
|
||||
|
||||
/* Invalid input unless it is a quiet NaN. */
|
||||
HIDDEN float __math_invalidf (float);
|
||||
/* Invalid input unless it is a quiet NaN. */
|
||||
HIDDEN double __math_invalid (double);
|
||||
|
||||
/* Error handling using output checking, only for errno setting. */
|
||||
|
||||
/* Check if the result overflowed to infinity. */
|
||||
HIDDEN double __math_check_oflow (double);
|
||||
/* Check if the result underflowed to 0. */
|
||||
HIDDEN double __math_check_uflow (double);
|
||||
|
||||
/* Check if the result overflowed to infinity. */
|
||||
static inline double
|
||||
check_oflow (double x)
|
||||
{
|
||||
return WANT_ERRNO ? __math_check_oflow (x) : x;
|
||||
}
|
||||
|
||||
/* Check if the result underflowed to 0. */
|
||||
static inline double
|
||||
check_uflow (double x)
|
||||
{
|
||||
return WANT_ERRNO ? __math_check_uflow (x) : x;
|
||||
}
|
||||
|
||||
|
||||
/* Shared between expf, exp2f and powf. */
|
||||
#define EXP2F_TABLE_BITS 5
|
||||
#define EXP2F_POLY_ORDER 3
|
||||
extern const struct exp2f_data
|
||||
{
|
||||
uint64_t tab[1 << EXP2F_TABLE_BITS];
|
||||
double shift_scaled;
|
||||
double poly[EXP2F_POLY_ORDER];
|
||||
double shift;
|
||||
double invln2_scaled;
|
||||
double poly_scaled[EXP2F_POLY_ORDER];
|
||||
} __exp2f_data HIDDEN;
|
||||
|
||||
#define LOGF_TABLE_BITS 4
|
||||
#define LOGF_POLY_ORDER 4
|
||||
extern const struct logf_data
|
||||
{
|
||||
struct
|
||||
{
|
||||
double invc, logc;
|
||||
} tab[1 << LOGF_TABLE_BITS];
|
||||
double ln2;
|
||||
double poly[LOGF_POLY_ORDER - 1]; /* First order coefficient is 1. */
|
||||
} __logf_data HIDDEN;
|
||||
|
||||
#define LOG2F_TABLE_BITS 4
|
||||
#define LOG2F_POLY_ORDER 4
|
||||
extern const struct log2f_data
|
||||
{
|
||||
struct
|
||||
{
|
||||
double invc, logc;
|
||||
} tab[1 << LOG2F_TABLE_BITS];
|
||||
double poly[LOG2F_POLY_ORDER];
|
||||
} __log2f_data HIDDEN;
|
||||
|
||||
#define POWF_LOG2_TABLE_BITS 4
|
||||
#define POWF_LOG2_POLY_ORDER 5
|
||||
#if TOINT_INTRINSICS
|
||||
# define POWF_SCALE_BITS EXP2F_TABLE_BITS
|
||||
#else
|
||||
# define POWF_SCALE_BITS 0
|
||||
#endif
|
||||
#define POWF_SCALE ((double) (1 << POWF_SCALE_BITS))
|
||||
extern const struct powf_log2_data
|
||||
{
|
||||
struct
|
||||
{
|
||||
double invc, logc;
|
||||
} tab[1 << POWF_LOG2_TABLE_BITS];
|
||||
double poly[POWF_LOG2_POLY_ORDER];
|
||||
} __powf_log2_data HIDDEN;
|
||||
|
||||
|
||||
#define EXP_TABLE_BITS 7
|
||||
#define EXP_POLY_ORDER 5
|
||||
/* Use polynomial that is optimized for a wider input range. This may be
|
||||
needed for good precision in non-nearest rounding and !TOINT_INTRINSICS. */
|
||||
#define EXP_POLY_WIDE 0
|
||||
/* Use close to nearest rounding toint when !TOINT_INTRINSICS. This may be
|
||||
needed for good precision in non-nearest rouning and !EXP_POLY_WIDE. */
|
||||
#define EXP_USE_TOINT_NARROW 0
|
||||
#define EXP2_POLY_ORDER 5
|
||||
#define EXP2_POLY_WIDE 0
|
||||
extern const struct exp_data
|
||||
{
|
||||
double invln2N;
|
||||
double shift;
|
||||
double negln2hiN;
|
||||
double negln2loN;
|
||||
double poly[4]; /* Last four coefficients. */
|
||||
double exp2_shift;
|
||||
double exp2_poly[EXP2_POLY_ORDER];
|
||||
uint64_t tab[2*(1 << EXP_TABLE_BITS)];
|
||||
} __exp_data HIDDEN;
|
||||
|
||||
#define LOG_TABLE_BITS 7
|
||||
#define LOG_POLY_ORDER 6
|
||||
#define LOG_POLY1_ORDER 12
|
||||
extern const struct log_data
|
||||
{
|
||||
double ln2hi;
|
||||
double ln2lo;
|
||||
double poly[LOG_POLY_ORDER - 1]; /* First coefficient is 1. */
|
||||
double poly1[LOG_POLY1_ORDER - 1];
|
||||
struct {double invc, logc;} tab[1 << LOG_TABLE_BITS];
|
||||
#if !HAVE_FAST_FMA
|
||||
struct {double chi, clo;} tab2[1 << LOG_TABLE_BITS];
|
||||
#endif
|
||||
} __log_data HIDDEN;
|
||||
|
||||
#define LOG2_TABLE_BITS 6
|
||||
#define LOG2_POLY_ORDER 7
|
||||
#define LOG2_POLY1_ORDER 11
|
||||
extern const struct log2_data
|
||||
{
|
||||
double invln2hi;
|
||||
double invln2lo;
|
||||
double poly[LOG2_POLY_ORDER - 1];
|
||||
double poly1[LOG2_POLY1_ORDER - 1];
|
||||
struct {double invc, logc;} tab[1 << LOG2_TABLE_BITS];
|
||||
#if !HAVE_FAST_FMA
|
||||
struct {double chi, clo;} tab2[1 << LOG2_TABLE_BITS];
|
||||
#endif
|
||||
} __log2_data HIDDEN;
|
||||
|
||||
#define POW_LOG_TABLE_BITS 7
|
||||
#define POW_LOG_POLY_ORDER 8
|
||||
extern const struct pow_log_data
|
||||
{
|
||||
double ln2hi;
|
||||
double ln2lo;
|
||||
double poly[POW_LOG_POLY_ORDER - 1]; /* First coefficient is 1. */
|
||||
/* Note: the pad field is unused, but allows slightly faster indexing. */
|
||||
struct {double invc, pad, logc, logctail;} tab[1 << POW_LOG_TABLE_BITS];
|
||||
} __pow_log_data HIDDEN;
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,80 @@
|
||||
/*
|
||||
* Double-precision math error handling.
|
||||
*
|
||||
* Copyright (c) 2018, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "math_config.h"
|
||||
|
||||
#if WANT_ERRNO
|
||||
#include <errno.h>
|
||||
/* NOINLINE reduces code size and avoids making math functions non-leaf
|
||||
when the error handling is inlined. */
|
||||
NOINLINE static double
|
||||
with_errno (double y, int e)
|
||||
{
|
||||
errno = e;
|
||||
return y;
|
||||
}
|
||||
#else
|
||||
#define with_errno(x, e) (x)
|
||||
#endif
|
||||
|
||||
/* NOINLINE reduces code size. */
|
||||
NOINLINE static double
|
||||
xflow (uint32_t sign, double y)
|
||||
{
|
||||
y = eval_as_double (opt_barrier_double (sign ? -y : y) * y);
|
||||
return with_errno (y, ERANGE);
|
||||
}
|
||||
|
||||
HIDDEN double
|
||||
__math_uflow (uint32_t sign)
|
||||
{
|
||||
return xflow (sign, 0x1p-767);
|
||||
}
|
||||
|
||||
#if WANT_ERRNO_UFLOW
|
||||
/* Underflows to zero in some non-nearest rounding mode, setting errno
|
||||
is valid even if the result is non-zero, but in the subnormal range. */
|
||||
HIDDEN double
|
||||
__math_may_uflow (uint32_t sign)
|
||||
{
|
||||
return xflow (sign, 0x1.8p-538);
|
||||
}
|
||||
#endif
|
||||
|
||||
HIDDEN double
|
||||
__math_oflow (uint32_t sign)
|
||||
{
|
||||
return xflow (sign, 0x1p769);
|
||||
}
|
||||
|
||||
HIDDEN double
|
||||
__math_divzero (uint32_t sign)
|
||||
{
|
||||
double y = opt_barrier_double (sign ? -1.0 : 1.0) / 0.0;
|
||||
return with_errno (y, ERANGE);
|
||||
}
|
||||
|
||||
HIDDEN double
|
||||
__math_invalid (double x)
|
||||
{
|
||||
double y = (x - x) / (x - x);
|
||||
return isnan (x) ? y : with_errno (y, EDOM);
|
||||
}
|
||||
|
||||
/* Check result and set errno if necessary. */
|
||||
|
||||
HIDDEN double
|
||||
__math_check_uflow (double y)
|
||||
{
|
||||
return y == 0.0 ? with_errno (y, ERANGE) : y;
|
||||
}
|
||||
|
||||
HIDDEN double
|
||||
__math_check_oflow (double y)
|
||||
{
|
||||
return isinf (y) ? with_errno (y, ERANGE) : y;
|
||||
}
|
||||
@@ -0,0 +1,66 @@
|
||||
/*
|
||||
* Single-precision math error handling.
|
||||
*
|
||||
* Copyright (c) 2017-2018, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "math_config.h"
|
||||
|
||||
#if WANT_ERRNO
|
||||
#include <errno.h>
|
||||
/* NOINLINE reduces code size and avoids making math functions non-leaf
|
||||
when the error handling is inlined. */
|
||||
NOINLINE static float
|
||||
with_errnof (float y, int e)
|
||||
{
|
||||
errno = e;
|
||||
return y;
|
||||
}
|
||||
#else
|
||||
#define with_errnof(x, e) (x)
|
||||
#endif
|
||||
|
||||
/* NOINLINE reduces code size. */
|
||||
NOINLINE static float
|
||||
xflowf (uint32_t sign, float y)
|
||||
{
|
||||
y = eval_as_float (opt_barrier_float (sign ? -y : y) * y);
|
||||
return with_errnof (y, ERANGE);
|
||||
}
|
||||
|
||||
HIDDEN float
|
||||
__math_uflowf (uint32_t sign)
|
||||
{
|
||||
return xflowf (sign, 0x1p-95f);
|
||||
}
|
||||
|
||||
#if WANT_ERRNO_UFLOW
|
||||
/* Underflows to zero in some non-nearest rounding mode, setting errno
|
||||
is valid even if the result is non-zero, but in the subnormal range. */
|
||||
HIDDEN float
|
||||
__math_may_uflowf (uint32_t sign)
|
||||
{
|
||||
return xflowf (sign, 0x1.4p-75f);
|
||||
}
|
||||
#endif
|
||||
|
||||
HIDDEN float
|
||||
__math_oflowf (uint32_t sign)
|
||||
{
|
||||
return xflowf (sign, 0x1p97f);
|
||||
}
|
||||
|
||||
HIDDEN float
|
||||
__math_divzerof (uint32_t sign)
|
||||
{
|
||||
float y = opt_barrier_float (sign ? -1.0f : 1.0f) / 0.0f;
|
||||
return with_errnof (y, ERANGE);
|
||||
}
|
||||
|
||||
HIDDEN float
|
||||
__math_invalidf (float x)
|
||||
{
|
||||
float y = (x - x) / (x - x);
|
||||
return isnan (x) ? y : with_errnof (y, EDOM);
|
||||
}
|
||||
+380
@@ -0,0 +1,380 @@
|
||||
/*
|
||||
* Double-precision x^y function.
|
||||
*
|
||||
* Copyright (c) 2018, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include <float.h>
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
#include "math_config.h"
|
||||
|
||||
/*
|
||||
Worst-case error: 0.54 ULP (~= ulperr_exp + 1024*Ln2*relerr_log*2^53)
|
||||
relerr_log: 1.3 * 2^-68 (Relative error of log, 1.5 * 2^-68 without fma)
|
||||
ulperr_exp: 0.509 ULP (ULP error of exp, 0.511 ULP without fma)
|
||||
*/
|
||||
|
||||
#define T __pow_log_data.tab
|
||||
#define A __pow_log_data.poly
|
||||
#define Ln2hi __pow_log_data.ln2hi
|
||||
#define Ln2lo __pow_log_data.ln2lo
|
||||
#define N (1 << POW_LOG_TABLE_BITS)
|
||||
#define OFF 0x3fe6955500000000
|
||||
|
||||
/* Top 12 bits of a double (sign and exponent bits). */
|
||||
static inline uint32_t
|
||||
top12 (double x)
|
||||
{
|
||||
return asuint64 (x) >> 52;
|
||||
}
|
||||
|
||||
/* Compute y+TAIL = log(x) where the rounded result is y and TAIL has about
|
||||
additional 15 bits precision. IX is the bit representation of x, but
|
||||
normalized in the subnormal range using the sign bit for the exponent. */
|
||||
static inline double_t
|
||||
log_inline (uint64_t ix, double_t *tail)
|
||||
{
|
||||
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
|
||||
double_t z, r, y, invc, logc, logctail, kd, hi, t1, t2, lo, lo1, lo2, p;
|
||||
uint64_t iz, tmp;
|
||||
int k, i;
|
||||
|
||||
/* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
|
||||
The range is split into N subintervals.
|
||||
The ith subinterval contains z and c is near its center. */
|
||||
tmp = ix - OFF;
|
||||
i = (tmp >> (52 - POW_LOG_TABLE_BITS)) % N;
|
||||
k = (int64_t) tmp >> 52; /* arithmetic shift */
|
||||
iz = ix - (tmp & 0xfffULL << 52);
|
||||
z = asdouble (iz);
|
||||
kd = (double_t) k;
|
||||
|
||||
/* log(x) = k*Ln2 + log(c) + log1p(z/c-1). */
|
||||
invc = T[i].invc;
|
||||
logc = T[i].logc;
|
||||
logctail = T[i].logctail;
|
||||
|
||||
/* Note: 1/c is j/N or j/N/2 where j is an integer in [N,2N) and
|
||||
|z/c - 1| < 1/N, so r = z/c - 1 is exactly representible. */
|
||||
#if HAVE_FAST_FMA
|
||||
r = fma (z, invc, -1.0);
|
||||
#else
|
||||
/* Split z such that rhi, rlo and rhi*rhi are exact and |rlo| <= |r|. */
|
||||
double_t zhi = asdouble ((iz + (1ULL << 31)) & (-1ULL << 32));
|
||||
double_t zlo = z - zhi;
|
||||
double_t rhi = zhi * invc - 1.0;
|
||||
double_t rlo = zlo * invc;
|
||||
r = rhi + rlo;
|
||||
#endif
|
||||
|
||||
/* k*Ln2 + log(c) + r. */
|
||||
t1 = kd * Ln2hi + logc;
|
||||
t2 = t1 + r;
|
||||
lo1 = kd * Ln2lo + logctail;
|
||||
lo2 = t1 - t2 + r;
|
||||
|
||||
/* Evaluation is optimized assuming superscalar pipelined execution. */
|
||||
double_t ar, ar2, ar3, lo3, lo4;
|
||||
ar = A[0] * r; /* A[0] = -0.5. */
|
||||
ar2 = r * ar;
|
||||
ar3 = r * ar2;
|
||||
/* k*Ln2 + log(c) + r + A[0]*r*r. */
|
||||
#if HAVE_FAST_FMA
|
||||
hi = t2 + ar2;
|
||||
lo3 = fma (ar, r, -ar2);
|
||||
lo4 = t2 - hi + ar2;
|
||||
#else
|
||||
double_t arhi = A[0] * rhi;
|
||||
double_t arhi2 = rhi * arhi;
|
||||
hi = t2 + arhi2;
|
||||
lo3 = rlo * (ar + arhi);
|
||||
lo4 = t2 - hi + arhi2;
|
||||
#endif
|
||||
/* p = log1p(r) - r - A[0]*r*r. */
|
||||
#if POW_LOG_POLY_ORDER == 8
|
||||
p = (ar3
|
||||
* (A[1] + r * A[2] + ar2 * (A[3] + r * A[4] + ar2 * (A[5] + r * A[6]))));
|
||||
#endif
|
||||
lo = lo1 + lo2 + lo3 + lo4 + p;
|
||||
y = hi + lo;
|
||||
*tail = hi - y + lo;
|
||||
return y;
|
||||
}
|
||||
|
||||
#undef N
|
||||
#undef T
|
||||
#define N (1 << EXP_TABLE_BITS)
|
||||
#define InvLn2N __exp_data.invln2N
|
||||
#define NegLn2hiN __exp_data.negln2hiN
|
||||
#define NegLn2loN __exp_data.negln2loN
|
||||
#define Shift __exp_data.shift
|
||||
#define T __exp_data.tab
|
||||
#define C2 __exp_data.poly[5 - EXP_POLY_ORDER]
|
||||
#define C3 __exp_data.poly[6 - EXP_POLY_ORDER]
|
||||
#define C4 __exp_data.poly[7 - EXP_POLY_ORDER]
|
||||
#define C5 __exp_data.poly[8 - EXP_POLY_ORDER]
|
||||
#define C6 __exp_data.poly[9 - EXP_POLY_ORDER]
|
||||
|
||||
/* Handle cases that may overflow or underflow when computing the result that
|
||||
is scale*(1+TMP) without intermediate rounding. The bit representation of
|
||||
scale is in SBITS, however it has a computed exponent that may have
|
||||
overflown into the sign bit so that needs to be adjusted before using it as
|
||||
a double. (int32_t)KI is the k used in the argument reduction and exponent
|
||||
adjustment of scale, positive k here means the result may overflow and
|
||||
negative k means the result may underflow. */
|
||||
static inline double
|
||||
specialcase (double_t tmp, uint64_t sbits, uint64_t ki)
|
||||
{
|
||||
double_t scale, y;
|
||||
|
||||
if ((ki & 0x80000000) == 0)
|
||||
{
|
||||
/* k > 0, the exponent of scale might have overflowed by <= 460. */
|
||||
sbits -= 1009ull << 52;
|
||||
scale = asdouble (sbits);
|
||||
y = 0x1p1009 * (scale + scale * tmp);
|
||||
return check_oflow (eval_as_double (y));
|
||||
}
|
||||
/* k < 0, need special care in the subnormal range. */
|
||||
sbits += 1022ull << 52;
|
||||
/* Note: sbits is signed scale. */
|
||||
scale = asdouble (sbits);
|
||||
y = scale + scale * tmp;
|
||||
if (fabs (y) < 1.0)
|
||||
{
|
||||
/* Round y to the right precision before scaling it into the subnormal
|
||||
range to avoid double rounding that can cause 0.5+E/2 ulp error where
|
||||
E is the worst-case ulp error outside the subnormal range. So this
|
||||
is only useful if the goal is better than 1 ulp worst-case error. */
|
||||
double_t hi, lo, one = 1.0;
|
||||
if (y < 0.0)
|
||||
one = -1.0;
|
||||
lo = scale - y + scale * tmp;
|
||||
hi = one + y;
|
||||
lo = one - hi + y + lo;
|
||||
y = eval_as_double (hi + lo) - one;
|
||||
/* Fix the sign of 0. */
|
||||
if (y == 0.0)
|
||||
y = asdouble (sbits & 0x8000000000000000);
|
||||
/* The underflow exception needs to be signaled explicitly. */
|
||||
force_eval_double (opt_barrier_double (0x1p-1022) * 0x1p-1022);
|
||||
}
|
||||
y = 0x1p-1022 * y;
|
||||
return check_uflow (eval_as_double (y));
|
||||
}
|
||||
|
||||
#define SIGN_BIAS (0x800 << EXP_TABLE_BITS)
|
||||
|
||||
/* Computes sign*exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.
|
||||
The sign_bias argument is SIGN_BIAS or 0 and sets the sign to -1 or 1. */
|
||||
static inline double
|
||||
exp_inline (double_t x, double_t xtail, uint32_t sign_bias)
|
||||
{
|
||||
uint32_t abstop;
|
||||
uint64_t ki, idx, top, sbits;
|
||||
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
|
||||
double_t kd, z, r, r2, scale, tail, tmp;
|
||||
|
||||
abstop = top12 (x) & 0x7ff;
|
||||
if (unlikely (abstop - top12 (0x1p-54) >= top12 (512.0) - top12 (0x1p-54)))
|
||||
{
|
||||
if (abstop - top12 (0x1p-54) >= 0x80000000)
|
||||
{
|
||||
/* Avoid spurious underflow for tiny x. */
|
||||
/* Note: 0 is common input. */
|
||||
double_t one = WANT_ROUNDING ? 1.0 + x : 1.0;
|
||||
return sign_bias ? -one : one;
|
||||
}
|
||||
if (abstop >= top12 (1024.0))
|
||||
{
|
||||
/* Note: inf and nan are already handled. */
|
||||
if (asuint64 (x) >> 63)
|
||||
return __math_uflow (sign_bias);
|
||||
else
|
||||
return __math_oflow (sign_bias);
|
||||
}
|
||||
/* Large x is special cased below. */
|
||||
abstop = 0;
|
||||
}
|
||||
|
||||
/* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */
|
||||
/* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */
|
||||
z = InvLn2N * x;
|
||||
#if TOINT_INTRINSICS
|
||||
kd = roundtoint (z);
|
||||
ki = converttoint (z);
|
||||
#elif EXP_USE_TOINT_NARROW
|
||||
/* z - kd is in [-0.5-2^-16, 0.5] in all rounding modes. */
|
||||
kd = eval_as_double (z + Shift);
|
||||
ki = asuint64 (kd) >> 16;
|
||||
kd = (double_t) (int32_t) ki;
|
||||
#else
|
||||
/* z - kd is in [-1, 1] in non-nearest rounding modes. */
|
||||
kd = eval_as_double (z + Shift);
|
||||
ki = asuint64 (kd);
|
||||
kd -= Shift;
|
||||
#endif
|
||||
r = x + kd * NegLn2hiN + kd * NegLn2loN;
|
||||
/* The code assumes 2^-200 < |xtail| < 2^-8/N. */
|
||||
r += xtail;
|
||||
/* 2^(k/N) ~= scale * (1 + tail). */
|
||||
idx = 2 * (ki % N);
|
||||
top = (ki + sign_bias) << (52 - EXP_TABLE_BITS);
|
||||
tail = asdouble (T[idx]);
|
||||
/* This is only a valid scale when -1023*N < k < 1024*N. */
|
||||
sbits = T[idx + 1] + top;
|
||||
/* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */
|
||||
/* Evaluation is optimized assuming superscalar pipelined execution. */
|
||||
r2 = r * r;
|
||||
/* Without fma the worst case error is 0.25/N ulp larger. */
|
||||
/* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */
|
||||
#if EXP_POLY_ORDER == 4
|
||||
tmp = tail + r + r2 * C2 + r * r2 * (C3 + r * C4);
|
||||
#elif EXP_POLY_ORDER == 5
|
||||
tmp = tail + r + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
|
||||
#elif EXP_POLY_ORDER == 6
|
||||
tmp = tail + r + r2 * (0.5 + r * C3) + r2 * r2 * (C4 + r * C5 + r2 * C6);
|
||||
#endif
|
||||
if (unlikely (abstop == 0))
|
||||
return specialcase (tmp, sbits, ki);
|
||||
scale = asdouble (sbits);
|
||||
/* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
|
||||
is no spurious underflow here even without fma. */
|
||||
return eval_as_double (scale + scale * tmp);
|
||||
}
|
||||
|
||||
/* Returns 0 if not int, 1 if odd int, 2 if even int. The argument is
|
||||
the bit representation of a non-zero finite floating-point value. */
|
||||
static inline int
|
||||
checkint (uint64_t iy)
|
||||
{
|
||||
int e = iy >> 52 & 0x7ff;
|
||||
if (e < 0x3ff)
|
||||
return 0;
|
||||
if (e > 0x3ff + 52)
|
||||
return 2;
|
||||
if (iy & ((1ULL << (0x3ff + 52 - e)) - 1))
|
||||
return 0;
|
||||
if (iy & (1ULL << (0x3ff + 52 - e)))
|
||||
return 1;
|
||||
return 2;
|
||||
}
|
||||
|
||||
/* Returns 1 if input is the bit representation of 0, infinity or nan. */
|
||||
static inline int
|
||||
zeroinfnan (uint64_t i)
|
||||
{
|
||||
return 2 * i - 1 >= 2 * asuint64 (INFINITY) - 1;
|
||||
}
|
||||
|
||||
double
|
||||
pow (double x, double y)
|
||||
{
|
||||
uint32_t sign_bias = 0;
|
||||
uint64_t ix, iy;
|
||||
uint32_t topx, topy;
|
||||
|
||||
ix = asuint64 (x);
|
||||
iy = asuint64 (y);
|
||||
topx = top12 (x);
|
||||
topy = top12 (y);
|
||||
if (unlikely (topx - 0x001 >= 0x7ff - 0x001
|
||||
|| (topy & 0x7ff) - 0x3be >= 0x43e - 0x3be))
|
||||
{
|
||||
/* Note: if |y| > 1075 * ln2 * 2^53 ~= 0x1.749p62 then pow(x,y) = inf/0
|
||||
and if |y| < 2^-54 / 1075 ~= 0x1.e7b6p-65 then pow(x,y) = +-1. */
|
||||
/* Special cases: (x < 0x1p-126 or inf or nan) or
|
||||
(|y| < 0x1p-65 or |y| >= 0x1p63 or nan). */
|
||||
if (unlikely (zeroinfnan (iy)))
|
||||
{
|
||||
if (2 * iy == 0)
|
||||
return issignaling_inline (x) ? x + y : 1.0;
|
||||
if (ix == asuint64 (1.0))
|
||||
return issignaling_inline (y) ? x + y : 1.0;
|
||||
if (2 * ix > 2 * asuint64 (INFINITY)
|
||||
|| 2 * iy > 2 * asuint64 (INFINITY))
|
||||
return x + y;
|
||||
if (2 * ix == 2 * asuint64 (1.0))
|
||||
return 1.0;
|
||||
if ((2 * ix < 2 * asuint64 (1.0)) == !(iy >> 63))
|
||||
return 0.0; /* |x|<1 && y==inf or |x|>1 && y==-inf. */
|
||||
return y * y;
|
||||
}
|
||||
if (unlikely (zeroinfnan (ix)))
|
||||
{
|
||||
double_t x2 = x * x;
|
||||
if (ix >> 63 && checkint (iy) == 1)
|
||||
{
|
||||
x2 = -x2;
|
||||
sign_bias = 1;
|
||||
}
|
||||
if (WANT_ERRNO && 2 * ix == 0 && iy >> 63)
|
||||
return __math_divzero (sign_bias);
|
||||
/* Without the barrier some versions of clang hoist the 1/x2 and
|
||||
thus division by zero exception can be signaled spuriously. */
|
||||
return iy >> 63 ? opt_barrier_double (1 / x2) : x2;
|
||||
}
|
||||
/* Here x and y are non-zero finite. */
|
||||
if (ix >> 63)
|
||||
{
|
||||
/* Finite x < 0. */
|
||||
int yint = checkint (iy);
|
||||
if (yint == 0)
|
||||
return __math_invalid (x);
|
||||
if (yint == 1)
|
||||
sign_bias = SIGN_BIAS;
|
||||
ix &= 0x7fffffffffffffff;
|
||||
topx &= 0x7ff;
|
||||
}
|
||||
if ((topy & 0x7ff) - 0x3be >= 0x43e - 0x3be)
|
||||
{
|
||||
/* Note: sign_bias == 0 here because y is not odd. */
|
||||
if (ix == asuint64 (1.0))
|
||||
return 1.0;
|
||||
if ((topy & 0x7ff) < 0x3be)
|
||||
{
|
||||
/* |y| < 2^-65, x^y ~= 1 + y*log(x). */
|
||||
if (WANT_ROUNDING)
|
||||
return ix > asuint64 (1.0) ? 1.0 + y : 1.0 - y;
|
||||
else
|
||||
return 1.0;
|
||||
}
|
||||
return (ix > asuint64 (1.0)) == (topy < 0x800) ? __math_oflow (0)
|
||||
: __math_uflow (0);
|
||||
}
|
||||
if (topx == 0)
|
||||
{
|
||||
/* Normalize subnormal x so exponent becomes negative. */
|
||||
/* Without the barrier some versions of clang evalutate the mul
|
||||
unconditionally causing spurious overflow exceptions. */
|
||||
ix = asuint64 (opt_barrier_double (x) * 0x1p52);
|
||||
ix &= 0x7fffffffffffffff;
|
||||
ix -= 52ULL << 52;
|
||||
}
|
||||
}
|
||||
|
||||
double_t lo;
|
||||
double_t hi = log_inline (ix, &lo);
|
||||
double_t ehi, elo;
|
||||
#if HAVE_FAST_FMA
|
||||
ehi = y * hi;
|
||||
elo = y * lo + fma (y, hi, -ehi);
|
||||
#else
|
||||
double_t yhi = asdouble (iy & -1ULL << 27);
|
||||
double_t ylo = y - yhi;
|
||||
double_t lhi = asdouble (asuint64 (hi) & -1ULL << 27);
|
||||
double_t llo = hi - lhi + lo;
|
||||
ehi = yhi * lhi;
|
||||
elo = ylo * lhi + y * llo; /* |elo| < |ehi| * 2^-25. */
|
||||
#endif
|
||||
return exp_inline (ehi, elo, sign_bias);
|
||||
}
|
||||
#if USE_GLIBC_ABI
|
||||
strong_alias (pow, __pow_finite)
|
||||
hidden_alias (pow, __ieee754_pow)
|
||||
# if LDBL_MANT_DIG == 53
|
||||
long double powl (long double x, long double y) { return pow (x, y); }
|
||||
# endif
|
||||
#endif
|
||||
@@ -0,0 +1,184 @@
|
||||
/*
|
||||
* Data for the log part of pow.
|
||||
*
|
||||
* Copyright (c) 2018, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "math_config.h"
|
||||
|
||||
#define N (1 << POW_LOG_TABLE_BITS)
|
||||
|
||||
const struct pow_log_data __pow_log_data = {
|
||||
.ln2hi = 0x1.62e42fefa3800p-1,
|
||||
.ln2lo = 0x1.ef35793c76730p-45,
|
||||
.poly = {
|
||||
#if N == 128 && POW_LOG_POLY_ORDER == 8
|
||||
// relative error: 0x1.11922ap-70
|
||||
// in -0x1.6bp-8 0x1.6bp-8
|
||||
// Coefficients are scaled to match the scaling during evaluation.
|
||||
-0x1p-1,
|
||||
0x1.555555555556p-2 * -2,
|
||||
-0x1.0000000000006p-2 * -2,
|
||||
0x1.999999959554ep-3 * 4,
|
||||
-0x1.555555529a47ap-3 * 4,
|
||||
0x1.2495b9b4845e9p-3 * -8,
|
||||
-0x1.0002b8b263fc3p-3 * -8,
|
||||
#endif
|
||||
},
|
||||
/* Algorithm:
|
||||
|
||||
x = 2^k z
|
||||
log(x) = k ln2 + log(c) + log(z/c)
|
||||
log(z/c) = poly(z/c - 1)
|
||||
|
||||
where z is in [0x1.69555p-1; 0x1.69555p0] which is split into N subintervals
|
||||
and z falls into the ith one, then table entries are computed as
|
||||
|
||||
tab[i].invc = 1/c
|
||||
tab[i].logc = round(0x1p43*log(c))/0x1p43
|
||||
tab[i].logctail = (double)(log(c) - logc)
|
||||
|
||||
where c is chosen near the center of the subinterval such that 1/c has only a
|
||||
few precision bits so z/c - 1 is exactly representible as double:
|
||||
|
||||
1/c = center < 1 ? round(N/center)/N : round(2*N/center)/N/2
|
||||
|
||||
Note: |z/c - 1| < 1/N for the chosen c, |log(c) - logc - logctail| < 0x1p-97,
|
||||
the last few bits of logc are rounded away so k*ln2hi + logc has no rounding
|
||||
error and the interval for z is selected such that near x == 1, where log(x)
|
||||
is tiny, large cancellation error is avoided in logc + poly(z/c - 1). */
|
||||
.tab = {
|
||||
#if N == 128
|
||||
#define A(a, b, c) {a, 0, b, c},
|
||||
A(0x1.6a00000000000p+0, -0x1.62c82f2b9c800p-2, 0x1.ab42428375680p-48)
|
||||
A(0x1.6800000000000p+0, -0x1.5d1bdbf580800p-2, -0x1.ca508d8e0f720p-46)
|
||||
A(0x1.6600000000000p+0, -0x1.5767717455800p-2, -0x1.362a4d5b6506dp-45)
|
||||
A(0x1.6400000000000p+0, -0x1.51aad872df800p-2, -0x1.684e49eb067d5p-49)
|
||||
A(0x1.6200000000000p+0, -0x1.4be5f95777800p-2, -0x1.41b6993293ee0p-47)
|
||||
A(0x1.6000000000000p+0, -0x1.4618bc21c6000p-2, 0x1.3d82f484c84ccp-46)
|
||||
A(0x1.5e00000000000p+0, -0x1.404308686a800p-2, 0x1.c42f3ed820b3ap-50)
|
||||
A(0x1.5c00000000000p+0, -0x1.3a64c55694800p-2, 0x1.0b1c686519460p-45)
|
||||
A(0x1.5a00000000000p+0, -0x1.347dd9a988000p-2, 0x1.5594dd4c58092p-45)
|
||||
A(0x1.5800000000000p+0, -0x1.2e8e2bae12000p-2, 0x1.67b1e99b72bd8p-45)
|
||||
A(0x1.5600000000000p+0, -0x1.2895a13de8800p-2, 0x1.5ca14b6cfb03fp-46)
|
||||
A(0x1.5600000000000p+0, -0x1.2895a13de8800p-2, 0x1.5ca14b6cfb03fp-46)
|
||||
A(0x1.5400000000000p+0, -0x1.22941fbcf7800p-2, -0x1.65a242853da76p-46)
|
||||
A(0x1.5200000000000p+0, -0x1.1c898c1699800p-2, -0x1.fafbc68e75404p-46)
|
||||
A(0x1.5000000000000p+0, -0x1.1675cababa800p-2, 0x1.f1fc63382a8f0p-46)
|
||||
A(0x1.4e00000000000p+0, -0x1.1058bf9ae4800p-2, -0x1.6a8c4fd055a66p-45)
|
||||
A(0x1.4c00000000000p+0, -0x1.0a324e2739000p-2, -0x1.c6bee7ef4030ep-47)
|
||||
A(0x1.4a00000000000p+0, -0x1.0402594b4d000p-2, -0x1.036b89ef42d7fp-48)
|
||||
A(0x1.4a00000000000p+0, -0x1.0402594b4d000p-2, -0x1.036b89ef42d7fp-48)
|
||||
A(0x1.4800000000000p+0, -0x1.fb9186d5e4000p-3, 0x1.d572aab993c87p-47)
|
||||
A(0x1.4600000000000p+0, -0x1.ef0adcbdc6000p-3, 0x1.b26b79c86af24p-45)
|
||||
A(0x1.4400000000000p+0, -0x1.e27076e2af000p-3, -0x1.72f4f543fff10p-46)
|
||||
A(0x1.4200000000000p+0, -0x1.d5c216b4fc000p-3, 0x1.1ba91bbca681bp-45)
|
||||
A(0x1.4000000000000p+0, -0x1.c8ff7c79aa000p-3, 0x1.7794f689f8434p-45)
|
||||
A(0x1.4000000000000p+0, -0x1.c8ff7c79aa000p-3, 0x1.7794f689f8434p-45)
|
||||
A(0x1.3e00000000000p+0, -0x1.bc286742d9000p-3, 0x1.94eb0318bb78fp-46)
|
||||
A(0x1.3c00000000000p+0, -0x1.af3c94e80c000p-3, 0x1.a4e633fcd9066p-52)
|
||||
A(0x1.3a00000000000p+0, -0x1.a23bc1fe2b000p-3, -0x1.58c64dc46c1eap-45)
|
||||
A(0x1.3a00000000000p+0, -0x1.a23bc1fe2b000p-3, -0x1.58c64dc46c1eap-45)
|
||||
A(0x1.3800000000000p+0, -0x1.9525a9cf45000p-3, -0x1.ad1d904c1d4e3p-45)
|
||||
A(0x1.3600000000000p+0, -0x1.87fa06520d000p-3, 0x1.bbdbf7fdbfa09p-45)
|
||||
A(0x1.3400000000000p+0, -0x1.7ab890210e000p-3, 0x1.bdb9072534a58p-45)
|
||||
A(0x1.3400000000000p+0, -0x1.7ab890210e000p-3, 0x1.bdb9072534a58p-45)
|
||||
A(0x1.3200000000000p+0, -0x1.6d60fe719d000p-3, -0x1.0e46aa3b2e266p-46)
|
||||
A(0x1.3000000000000p+0, -0x1.5ff3070a79000p-3, -0x1.e9e439f105039p-46)
|
||||
A(0x1.3000000000000p+0, -0x1.5ff3070a79000p-3, -0x1.e9e439f105039p-46)
|
||||
A(0x1.2e00000000000p+0, -0x1.526e5e3a1b000p-3, -0x1.0de8b90075b8fp-45)
|
||||
A(0x1.2c00000000000p+0, -0x1.44d2b6ccb8000p-3, 0x1.70cc16135783cp-46)
|
||||
A(0x1.2c00000000000p+0, -0x1.44d2b6ccb8000p-3, 0x1.70cc16135783cp-46)
|
||||
A(0x1.2a00000000000p+0, -0x1.371fc201e9000p-3, 0x1.178864d27543ap-48)
|
||||
A(0x1.2800000000000p+0, -0x1.29552f81ff000p-3, -0x1.48d301771c408p-45)
|
||||
A(0x1.2600000000000p+0, -0x1.1b72ad52f6000p-3, -0x1.e80a41811a396p-45)
|
||||
A(0x1.2600000000000p+0, -0x1.1b72ad52f6000p-3, -0x1.e80a41811a396p-45)
|
||||
A(0x1.2400000000000p+0, -0x1.0d77e7cd09000p-3, 0x1.a699688e85bf4p-47)
|
||||
A(0x1.2400000000000p+0, -0x1.0d77e7cd09000p-3, 0x1.a699688e85bf4p-47)
|
||||
A(0x1.2200000000000p+0, -0x1.fec9131dbe000p-4, -0x1.575545ca333f2p-45)
|
||||
A(0x1.2000000000000p+0, -0x1.e27076e2b0000p-4, 0x1.a342c2af0003cp-45)
|
||||
A(0x1.2000000000000p+0, -0x1.e27076e2b0000p-4, 0x1.a342c2af0003cp-45)
|
||||
A(0x1.1e00000000000p+0, -0x1.c5e548f5bc000p-4, -0x1.d0c57585fbe06p-46)
|
||||
A(0x1.1c00000000000p+0, -0x1.a926d3a4ae000p-4, 0x1.53935e85baac8p-45)
|
||||
A(0x1.1c00000000000p+0, -0x1.a926d3a4ae000p-4, 0x1.53935e85baac8p-45)
|
||||
A(0x1.1a00000000000p+0, -0x1.8c345d631a000p-4, 0x1.37c294d2f5668p-46)
|
||||
A(0x1.1a00000000000p+0, -0x1.8c345d631a000p-4, 0x1.37c294d2f5668p-46)
|
||||
A(0x1.1800000000000p+0, -0x1.6f0d28ae56000p-4, -0x1.69737c93373dap-45)
|
||||
A(0x1.1600000000000p+0, -0x1.51b073f062000p-4, 0x1.f025b61c65e57p-46)
|
||||
A(0x1.1600000000000p+0, -0x1.51b073f062000p-4, 0x1.f025b61c65e57p-46)
|
||||
A(0x1.1400000000000p+0, -0x1.341d7961be000p-4, 0x1.c5edaccf913dfp-45)
|
||||
A(0x1.1400000000000p+0, -0x1.341d7961be000p-4, 0x1.c5edaccf913dfp-45)
|
||||
A(0x1.1200000000000p+0, -0x1.16536eea38000p-4, 0x1.47c5e768fa309p-46)
|
||||
A(0x1.1000000000000p+0, -0x1.f0a30c0118000p-5, 0x1.d599e83368e91p-45)
|
||||
A(0x1.1000000000000p+0, -0x1.f0a30c0118000p-5, 0x1.d599e83368e91p-45)
|
||||
A(0x1.0e00000000000p+0, -0x1.b42dd71198000p-5, 0x1.c827ae5d6704cp-46)
|
||||
A(0x1.0e00000000000p+0, -0x1.b42dd71198000p-5, 0x1.c827ae5d6704cp-46)
|
||||
A(0x1.0c00000000000p+0, -0x1.77458f632c000p-5, -0x1.cfc4634f2a1eep-45)
|
||||
A(0x1.0c00000000000p+0, -0x1.77458f632c000p-5, -0x1.cfc4634f2a1eep-45)
|
||||
A(0x1.0a00000000000p+0, -0x1.39e87b9fec000p-5, 0x1.502b7f526feaap-48)
|
||||
A(0x1.0a00000000000p+0, -0x1.39e87b9fec000p-5, 0x1.502b7f526feaap-48)
|
||||
A(0x1.0800000000000p+0, -0x1.f829b0e780000p-6, -0x1.980267c7e09e4p-45)
|
||||
A(0x1.0800000000000p+0, -0x1.f829b0e780000p-6, -0x1.980267c7e09e4p-45)
|
||||
A(0x1.0600000000000p+0, -0x1.7b91b07d58000p-6, -0x1.88d5493faa639p-45)
|
||||
A(0x1.0400000000000p+0, -0x1.fc0a8b0fc0000p-7, -0x1.f1e7cf6d3a69cp-50)
|
||||
A(0x1.0400000000000p+0, -0x1.fc0a8b0fc0000p-7, -0x1.f1e7cf6d3a69cp-50)
|
||||
A(0x1.0200000000000p+0, -0x1.fe02a6b100000p-8, -0x1.9e23f0dda40e4p-46)
|
||||
A(0x1.0200000000000p+0, -0x1.fe02a6b100000p-8, -0x1.9e23f0dda40e4p-46)
|
||||
A(0x1.0000000000000p+0, 0x0.0000000000000p+0, 0x0.0000000000000p+0)
|
||||
A(0x1.0000000000000p+0, 0x0.0000000000000p+0, 0x0.0000000000000p+0)
|
||||
A(0x1.fc00000000000p-1, 0x1.0101575890000p-7, -0x1.0c76b999d2be8p-46)
|
||||
A(0x1.f800000000000p-1, 0x1.0205658938000p-6, -0x1.3dc5b06e2f7d2p-45)
|
||||
A(0x1.f400000000000p-1, 0x1.8492528c90000p-6, -0x1.aa0ba325a0c34p-45)
|
||||
A(0x1.f000000000000p-1, 0x1.0415d89e74000p-5, 0x1.111c05cf1d753p-47)
|
||||
A(0x1.ec00000000000p-1, 0x1.466aed42e0000p-5, -0x1.c167375bdfd28p-45)
|
||||
A(0x1.e800000000000p-1, 0x1.894aa149fc000p-5, -0x1.97995d05a267dp-46)
|
||||
A(0x1.e400000000000p-1, 0x1.ccb73cdddc000p-5, -0x1.a68f247d82807p-46)
|
||||
A(0x1.e200000000000p-1, 0x1.eea31c006c000p-5, -0x1.e113e4fc93b7bp-47)
|
||||
A(0x1.de00000000000p-1, 0x1.1973bd1466000p-4, -0x1.5325d560d9e9bp-45)
|
||||
A(0x1.da00000000000p-1, 0x1.3bdf5a7d1e000p-4, 0x1.cc85ea5db4ed7p-45)
|
||||
A(0x1.d600000000000p-1, 0x1.5e95a4d97a000p-4, -0x1.c69063c5d1d1ep-45)
|
||||
A(0x1.d400000000000p-1, 0x1.700d30aeac000p-4, 0x1.c1e8da99ded32p-49)
|
||||
A(0x1.d000000000000p-1, 0x1.9335e5d594000p-4, 0x1.3115c3abd47dap-45)
|
||||
A(0x1.cc00000000000p-1, 0x1.b6ac88dad6000p-4, -0x1.390802bf768e5p-46)
|
||||
A(0x1.ca00000000000p-1, 0x1.c885801bc4000p-4, 0x1.646d1c65aacd3p-45)
|
||||
A(0x1.c600000000000p-1, 0x1.ec739830a2000p-4, -0x1.dc068afe645e0p-45)
|
||||
A(0x1.c400000000000p-1, 0x1.fe89139dbe000p-4, -0x1.534d64fa10afdp-45)
|
||||
A(0x1.c000000000000p-1, 0x1.1178e8227e000p-3, 0x1.1ef78ce2d07f2p-45)
|
||||
A(0x1.be00000000000p-1, 0x1.1aa2b7e23f000p-3, 0x1.ca78e44389934p-45)
|
||||
A(0x1.ba00000000000p-1, 0x1.2d1610c868000p-3, 0x1.39d6ccb81b4a1p-47)
|
||||
A(0x1.b800000000000p-1, 0x1.365fcb0159000p-3, 0x1.62fa8234b7289p-51)
|
||||
A(0x1.b400000000000p-1, 0x1.4913d8333b000p-3, 0x1.5837954fdb678p-45)
|
||||
A(0x1.b200000000000p-1, 0x1.527e5e4a1b000p-3, 0x1.633e8e5697dc7p-45)
|
||||
A(0x1.ae00000000000p-1, 0x1.6574ebe8c1000p-3, 0x1.9cf8b2c3c2e78p-46)
|
||||
A(0x1.ac00000000000p-1, 0x1.6f0128b757000p-3, -0x1.5118de59c21e1p-45)
|
||||
A(0x1.aa00000000000p-1, 0x1.7898d85445000p-3, -0x1.c661070914305p-46)
|
||||
A(0x1.a600000000000p-1, 0x1.8beafeb390000p-3, -0x1.73d54aae92cd1p-47)
|
||||
A(0x1.a400000000000p-1, 0x1.95a5adcf70000p-3, 0x1.7f22858a0ff6fp-47)
|
||||
A(0x1.a000000000000p-1, 0x1.a93ed3c8ae000p-3, -0x1.8724350562169p-45)
|
||||
A(0x1.9e00000000000p-1, 0x1.b31d8575bd000p-3, -0x1.c358d4eace1aap-47)
|
||||
A(0x1.9c00000000000p-1, 0x1.bd087383be000p-3, -0x1.d4bc4595412b6p-45)
|
||||
A(0x1.9a00000000000p-1, 0x1.c6ffbc6f01000p-3, -0x1.1ec72c5962bd2p-48)
|
||||
A(0x1.9600000000000p-1, 0x1.db13db0d49000p-3, -0x1.aff2af715b035p-45)
|
||||
A(0x1.9400000000000p-1, 0x1.e530effe71000p-3, 0x1.212276041f430p-51)
|
||||
A(0x1.9200000000000p-1, 0x1.ef5ade4dd0000p-3, -0x1.a211565bb8e11p-51)
|
||||
A(0x1.9000000000000p-1, 0x1.f991c6cb3b000p-3, 0x1.bcbecca0cdf30p-46)
|
||||
A(0x1.8c00000000000p-1, 0x1.07138604d5800p-2, 0x1.89cdb16ed4e91p-48)
|
||||
A(0x1.8a00000000000p-1, 0x1.0c42d67616000p-2, 0x1.7188b163ceae9p-45)
|
||||
A(0x1.8800000000000p-1, 0x1.1178e8227e800p-2, -0x1.c210e63a5f01cp-45)
|
||||
A(0x1.8600000000000p-1, 0x1.16b5ccbacf800p-2, 0x1.b9acdf7a51681p-45)
|
||||
A(0x1.8400000000000p-1, 0x1.1bf99635a6800p-2, 0x1.ca6ed5147bdb7p-45)
|
||||
A(0x1.8200000000000p-1, 0x1.214456d0eb800p-2, 0x1.a87deba46baeap-47)
|
||||
A(0x1.7e00000000000p-1, 0x1.2bef07cdc9000p-2, 0x1.a9cfa4a5004f4p-45)
|
||||
A(0x1.7c00000000000p-1, 0x1.314f1e1d36000p-2, -0x1.8e27ad3213cb8p-45)
|
||||
A(0x1.7a00000000000p-1, 0x1.36b6776be1000p-2, 0x1.16ecdb0f177c8p-46)
|
||||
A(0x1.7800000000000p-1, 0x1.3c25277333000p-2, 0x1.83b54b606bd5cp-46)
|
||||
A(0x1.7600000000000p-1, 0x1.419b423d5e800p-2, 0x1.8e436ec90e09dp-47)
|
||||
A(0x1.7400000000000p-1, 0x1.4718dc271c800p-2, -0x1.f27ce0967d675p-45)
|
||||
A(0x1.7200000000000p-1, 0x1.4c9e09e173000p-2, -0x1.e20891b0ad8a4p-45)
|
||||
A(0x1.7000000000000p-1, 0x1.522ae0738a000p-2, 0x1.ebe708164c759p-45)
|
||||
A(0x1.6e00000000000p-1, 0x1.57bf753c8d000p-2, 0x1.fadedee5d40efp-46)
|
||||
A(0x1.6c00000000000p-1, 0x1.5d5bddf596000p-2, -0x1.a0b2a08a465dcp-47)
|
||||
#endif
|
||||
},
|
||||
};
|
||||
+221
@@ -0,0 +1,221 @@
|
||||
/*
|
||||
* Single-precision pow function.
|
||||
*
|
||||
* Copyright (c) 2017-2018, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
#include "math_config.h"
|
||||
|
||||
/*
|
||||
POWF_LOG2_POLY_ORDER = 5
|
||||
EXP2F_TABLE_BITS = 5
|
||||
|
||||
ULP error: 0.82 (~ 0.5 + relerr*2^24)
|
||||
relerr: 1.27 * 2^-26 (Relative error ~= 128*Ln2*relerr_log2 + relerr_exp2)
|
||||
relerr_log2: 1.83 * 2^-33 (Relative error of logx.)
|
||||
relerr_exp2: 1.69 * 2^-34 (Relative error of exp2(ylogx).)
|
||||
*/
|
||||
|
||||
#define N (1 << POWF_LOG2_TABLE_BITS)
|
||||
#define T __powf_log2_data.tab
|
||||
#define A __powf_log2_data.poly
|
||||
#define OFF 0x3f330000
|
||||
|
||||
/* Subnormal input is normalized so ix has negative biased exponent.
|
||||
Output is multiplied by N (POWF_SCALE) if TOINT_INTRINICS is set. */
|
||||
static inline double_t
|
||||
log2_inline (uint32_t ix)
|
||||
{
|
||||
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
|
||||
double_t z, r, r2, r4, p, q, y, y0, invc, logc;
|
||||
uint32_t iz, top, tmp;
|
||||
int k, i;
|
||||
|
||||
/* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
|
||||
The range is split into N subintervals.
|
||||
The ith subinterval contains z and c is near its center. */
|
||||
tmp = ix - OFF;
|
||||
i = (tmp >> (23 - POWF_LOG2_TABLE_BITS)) % N;
|
||||
top = tmp & 0xff800000;
|
||||
iz = ix - top;
|
||||
k = (int32_t) top >> (23 - POWF_SCALE_BITS); /* arithmetic shift */
|
||||
invc = T[i].invc;
|
||||
logc = T[i].logc;
|
||||
z = (double_t) asfloat (iz);
|
||||
|
||||
/* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k */
|
||||
r = z * invc - 1;
|
||||
y0 = logc + (double_t) k;
|
||||
|
||||
/* Pipelined polynomial evaluation to approximate log1p(r)/ln2. */
|
||||
r2 = r * r;
|
||||
y = A[0] * r + A[1];
|
||||
p = A[2] * r + A[3];
|
||||
r4 = r2 * r2;
|
||||
q = A[4] * r + y0;
|
||||
q = p * r2 + q;
|
||||
y = y * r4 + q;
|
||||
return y;
|
||||
}
|
||||
|
||||
#undef N
|
||||
#undef T
|
||||
#define N (1 << EXP2F_TABLE_BITS)
|
||||
#define T __exp2f_data.tab
|
||||
#define SIGN_BIAS (1 << (EXP2F_TABLE_BITS + 11))
|
||||
|
||||
/* The output of log2 and thus the input of exp2 is either scaled by N
|
||||
(in case of fast toint intrinsics) or not. The unscaled xd must be
|
||||
in [-1021,1023], sign_bias sets the sign of the result. */
|
||||
static inline float
|
||||
exp2_inline (double_t xd, uint32_t sign_bias)
|
||||
{
|
||||
uint64_t ki, ski, t;
|
||||
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
|
||||
double_t kd, z, r, r2, y, s;
|
||||
|
||||
#if TOINT_INTRINSICS
|
||||
# define C __exp2f_data.poly_scaled
|
||||
/* N*x = k + r with r in [-1/2, 1/2] */
|
||||
kd = roundtoint (xd); /* k */
|
||||
ki = converttoint (xd);
|
||||
#else
|
||||
# define C __exp2f_data.poly
|
||||
# define SHIFT __exp2f_data.shift_scaled
|
||||
/* x = k/N + r with r in [-1/(2N), 1/(2N)] */
|
||||
kd = eval_as_double (xd + SHIFT);
|
||||
ki = asuint64 (kd);
|
||||
kd -= SHIFT; /* k/N */
|
||||
#endif
|
||||
r = xd - kd;
|
||||
|
||||
/* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
|
||||
t = T[ki % N];
|
||||
ski = ki + sign_bias;
|
||||
t += ski << (52 - EXP2F_TABLE_BITS);
|
||||
s = asdouble (t);
|
||||
z = C[0] * r + C[1];
|
||||
r2 = r * r;
|
||||
y = C[2] * r + 1;
|
||||
y = z * r2 + y;
|
||||
y = y * s;
|
||||
return eval_as_float (y);
|
||||
}
|
||||
|
||||
/* Returns 0 if not int, 1 if odd int, 2 if even int. The argument is
|
||||
the bit representation of a non-zero finite floating-point value. */
|
||||
static inline int
|
||||
checkint (uint32_t iy)
|
||||
{
|
||||
int e = iy >> 23 & 0xff;
|
||||
if (e < 0x7f)
|
||||
return 0;
|
||||
if (e > 0x7f + 23)
|
||||
return 2;
|
||||
if (iy & ((1 << (0x7f + 23 - e)) - 1))
|
||||
return 0;
|
||||
if (iy & (1 << (0x7f + 23 - e)))
|
||||
return 1;
|
||||
return 2;
|
||||
}
|
||||
|
||||
static inline int
|
||||
zeroinfnan (uint32_t ix)
|
||||
{
|
||||
return 2 * ix - 1 >= 2u * 0x7f800000 - 1;
|
||||
}
|
||||
|
||||
float
|
||||
powf (float x, float y)
|
||||
{
|
||||
uint32_t sign_bias = 0;
|
||||
uint32_t ix, iy;
|
||||
|
||||
ix = asuint (x);
|
||||
iy = asuint (y);
|
||||
if (unlikely (ix - 0x00800000 >= 0x7f800000 - 0x00800000 || zeroinfnan (iy)))
|
||||
{
|
||||
/* Either (x < 0x1p-126 or inf or nan) or (y is 0 or inf or nan). */
|
||||
if (unlikely (zeroinfnan (iy)))
|
||||
{
|
||||
if (2 * iy == 0)
|
||||
return issignalingf_inline (x) ? x + y : 1.0f;
|
||||
if (ix == 0x3f800000)
|
||||
return issignalingf_inline (y) ? x + y : 1.0f;
|
||||
if (2 * ix > 2u * 0x7f800000 || 2 * iy > 2u * 0x7f800000)
|
||||
return x + y;
|
||||
if (2 * ix == 2 * 0x3f800000)
|
||||
return 1.0f;
|
||||
if ((2 * ix < 2 * 0x3f800000) == !(iy & 0x80000000))
|
||||
return 0.0f; /* |x|<1 && y==inf or |x|>1 && y==-inf. */
|
||||
return y * y;
|
||||
}
|
||||
if (unlikely (zeroinfnan (ix)))
|
||||
{
|
||||
float_t x2 = x * x;
|
||||
if (ix & 0x80000000 && checkint (iy) == 1)
|
||||
{
|
||||
x2 = -x2;
|
||||
sign_bias = 1;
|
||||
}
|
||||
#if WANT_ERRNO
|
||||
if (2 * ix == 0 && iy & 0x80000000)
|
||||
return __math_divzerof (sign_bias);
|
||||
#endif
|
||||
/* Without the barrier some versions of clang hoist the 1/x2 and
|
||||
thus division by zero exception can be signaled spuriously. */
|
||||
return iy & 0x80000000 ? opt_barrier_float (1 / x2) : x2;
|
||||
}
|
||||
/* x and y are non-zero finite. */
|
||||
if (ix & 0x80000000)
|
||||
{
|
||||
/* Finite x < 0. */
|
||||
int yint = checkint (iy);
|
||||
if (yint == 0)
|
||||
return __math_invalidf (x);
|
||||
if (yint == 1)
|
||||
sign_bias = SIGN_BIAS;
|
||||
ix &= 0x7fffffff;
|
||||
}
|
||||
if (ix < 0x00800000)
|
||||
{
|
||||
/* Normalize subnormal x so exponent becomes negative. */
|
||||
ix = asuint (x * 0x1p23f);
|
||||
ix &= 0x7fffffff;
|
||||
ix -= 23 << 23;
|
||||
}
|
||||
}
|
||||
double_t logx = log2_inline (ix);
|
||||
double_t ylogx = y * logx; /* Note: cannot overflow, y is single prec. */
|
||||
if (unlikely ((asuint64 (ylogx) >> 47 & 0xffff)
|
||||
>= asuint64 (126.0 * POWF_SCALE) >> 47))
|
||||
{
|
||||
/* |y*log(x)| >= 126. */
|
||||
if (ylogx > 0x1.fffffffd1d571p+6 * POWF_SCALE)
|
||||
/* |x^y| > 0x1.ffffffp127. */
|
||||
return __math_oflowf (sign_bias);
|
||||
if (WANT_ROUNDING && WANT_ERRNO
|
||||
&& ylogx > 0x1.fffffffa3aae2p+6 * POWF_SCALE)
|
||||
/* |x^y| > 0x1.fffffep127, check if we round away from 0. */
|
||||
if ((!sign_bias
|
||||
&& eval_as_float (1.0f + opt_barrier_float (0x1p-25f)) != 1.0f)
|
||||
|| (sign_bias
|
||||
&& eval_as_float (-1.0f - opt_barrier_float (0x1p-25f))
|
||||
!= -1.0f))
|
||||
return __math_oflowf (sign_bias);
|
||||
if (ylogx <= -150.0 * POWF_SCALE)
|
||||
return __math_uflowf (sign_bias);
|
||||
#if WANT_ERRNO_UFLOW
|
||||
if (ylogx < -149.0 * POWF_SCALE)
|
||||
return __math_may_uflowf (sign_bias);
|
||||
#endif
|
||||
}
|
||||
return exp2_inline (ylogx, sign_bias);
|
||||
}
|
||||
#if USE_GLIBC_ABI
|
||||
strong_alias (powf, __powf_finite)
|
||||
hidden_alias (powf, __ieee754_powf)
|
||||
#endif
|
||||
@@ -0,0 +1,34 @@
|
||||
/*
|
||||
* Data definition for powf.
|
||||
*
|
||||
* Copyright (c) 2017-2018, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "math_config.h"
|
||||
|
||||
const struct powf_log2_data __powf_log2_data = {
|
||||
.tab = {
|
||||
{ 0x1.661ec79f8f3bep+0, -0x1.efec65b963019p-2 * POWF_SCALE },
|
||||
{ 0x1.571ed4aaf883dp+0, -0x1.b0b6832d4fca4p-2 * POWF_SCALE },
|
||||
{ 0x1.49539f0f010bp+0, -0x1.7418b0a1fb77bp-2 * POWF_SCALE },
|
||||
{ 0x1.3c995b0b80385p+0, -0x1.39de91a6dcf7bp-2 * POWF_SCALE },
|
||||
{ 0x1.30d190c8864a5p+0, -0x1.01d9bf3f2b631p-2 * POWF_SCALE },
|
||||
{ 0x1.25e227b0b8eap+0, -0x1.97c1d1b3b7afp-3 * POWF_SCALE },
|
||||
{ 0x1.1bb4a4a1a343fp+0, -0x1.2f9e393af3c9fp-3 * POWF_SCALE },
|
||||
{ 0x1.12358f08ae5bap+0, -0x1.960cbbf788d5cp-4 * POWF_SCALE },
|
||||
{ 0x1.0953f419900a7p+0, -0x1.a6f9db6475fcep-5 * POWF_SCALE },
|
||||
{ 0x1p+0, 0x0p+0 * POWF_SCALE },
|
||||
{ 0x1.e608cfd9a47acp-1, 0x1.338ca9f24f53dp-4 * POWF_SCALE },
|
||||
{ 0x1.ca4b31f026aap-1, 0x1.476a9543891bap-3 * POWF_SCALE },
|
||||
{ 0x1.b2036576afce6p-1, 0x1.e840b4ac4e4d2p-3 * POWF_SCALE },
|
||||
{ 0x1.9c2d163a1aa2dp-1, 0x1.40645f0c6651cp-2 * POWF_SCALE },
|
||||
{ 0x1.886e6037841edp-1, 0x1.88e9c2c1b9ff8p-2 * POWF_SCALE },
|
||||
{ 0x1.767dcf5534862p-1, 0x1.ce0a44eb17bccp-2 * POWF_SCALE },
|
||||
},
|
||||
.poly = {
|
||||
0x1.27616c9496e0bp-2 * POWF_SCALE, -0x1.71969a075c67ap-2 * POWF_SCALE,
|
||||
0x1.ec70a6ca7baddp-2 * POWF_SCALE, -0x1.7154748bef6c8p-1 * POWF_SCALE,
|
||||
0x1.71547652ab82bp0 * POWF_SCALE,
|
||||
}
|
||||
};
|
||||
@@ -0,0 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2019, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
#define SCALAR 1
|
||||
#include "v_cos.c"
|
||||
@@ -0,0 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2019, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
#define SCALAR 1
|
||||
#include "v_cosf.c"
|
||||
@@ -0,0 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2019, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
#define SCALAR 1
|
||||
#include "v_exp.c"
|
||||
@@ -0,0 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2019, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
#define SCALAR 1
|
||||
#include "v_exp2f.c"
|
||||
@@ -0,0 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2019, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
#define SCALAR 1
|
||||
#include "v_exp2f_1u.c"
|
||||
@@ -0,0 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2019, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
#define SCALAR 1
|
||||
#include "v_expf.c"
|
||||
@@ -0,0 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2019, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
#define SCALAR 1
|
||||
#include "v_expf_1u.c"
|
||||
@@ -0,0 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2019, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
#define SCALAR 1
|
||||
#include "v_log.c"
|
||||
@@ -0,0 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2019, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
#define SCALAR 1
|
||||
#include "v_logf.c"
|
||||
@@ -0,0 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2020, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
#define SCALAR 1
|
||||
#include "v_pow.c"
|
||||
@@ -0,0 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2019, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
#define SCALAR 1
|
||||
#include "v_powf.c"
|
||||
@@ -0,0 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2019, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
#define SCALAR 1
|
||||
#include "v_sin.c"
|
||||
@@ -0,0 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2019, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
#define SCALAR 1
|
||||
#include "v_sinf.c"
|
||||
@@ -0,0 +1,79 @@
|
||||
/*
|
||||
* Single-precision sin/cos function.
|
||||
*
|
||||
* Copyright (c) 2018, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <math.h>
|
||||
#include "math_config.h"
|
||||
#include "sincosf.h"
|
||||
|
||||
/* Fast sincosf implementation. Worst-case ULP is 0.5607, maximum relative
|
||||
error is 0.5303 * 2^-23. A single-step range reduction is used for
|
||||
small values. Large inputs have their range reduced using fast integer
|
||||
arithmetic. */
|
||||
void
|
||||
sincosf (float y, float *sinp, float *cosp)
|
||||
{
|
||||
double x = y;
|
||||
double s;
|
||||
int n;
|
||||
const sincos_t *p = &__sincosf_table[0];
|
||||
|
||||
if (abstop12 (y) < abstop12 (pio4))
|
||||
{
|
||||
double x2 = x * x;
|
||||
|
||||
if (unlikely (abstop12 (y) < abstop12 (0x1p-12f)))
|
||||
{
|
||||
if (unlikely (abstop12 (y) < abstop12 (0x1p-126f)))
|
||||
/* Force underflow for tiny y. */
|
||||
force_eval_float (x2);
|
||||
*sinp = y;
|
||||
*cosp = 1.0f;
|
||||
return;
|
||||
}
|
||||
|
||||
sincosf_poly (x, x2, p, 0, sinp, cosp);
|
||||
}
|
||||
else if (abstop12 (y) < abstop12 (120.0f))
|
||||
{
|
||||
x = reduce_fast (x, p, &n);
|
||||
|
||||
/* Setup the signs for sin and cos. */
|
||||
s = p->sign[n & 3];
|
||||
|
||||
if (n & 2)
|
||||
p = &__sincosf_table[1];
|
||||
|
||||
sincosf_poly (x * s, x * x, p, n, sinp, cosp);
|
||||
}
|
||||
else if (likely (abstop12 (y) < abstop12 (INFINITY)))
|
||||
{
|
||||
uint32_t xi = asuint (y);
|
||||
int sign = xi >> 31;
|
||||
|
||||
x = reduce_large (xi, &n);
|
||||
|
||||
/* Setup signs for sin and cos - include original sign. */
|
||||
s = p->sign[(n + sign) & 3];
|
||||
|
||||
if ((n + sign) & 2)
|
||||
p = &__sincosf_table[1];
|
||||
|
||||
sincosf_poly (x * s, x * x, p, n, sinp, cosp);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Return NaN if Inf or NaN for both sin and cos. */
|
||||
*sinp = *cosp = y - y;
|
||||
#if WANT_ERRNO
|
||||
/* Needed to set errno for +-Inf, the add is a hack to work
|
||||
around a gcc register allocation issue: just passing y
|
||||
affects code generation in the fast path. */
|
||||
__math_invalidf (y + y);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
+153
@@ -0,0 +1,153 @@
|
||||
/*
|
||||
* Header for sinf, cosf and sincosf.
|
||||
*
|
||||
* Copyright (c) 2018, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <math.h>
|
||||
#include "math_config.h"
|
||||
|
||||
/* 2PI * 2^-64. */
|
||||
static const double pi63 = 0x1.921FB54442D18p-62;
|
||||
/* PI / 4. */
|
||||
static const double pio4 = 0x1.921FB54442D18p-1;
|
||||
|
||||
/* The constants and polynomials for sine and cosine. */
|
||||
typedef struct
|
||||
{
|
||||
double sign[4]; /* Sign of sine in quadrants 0..3. */
|
||||
double hpi_inv; /* 2 / PI ( * 2^24 if !TOINT_INTRINSICS). */
|
||||
double hpi; /* PI / 2. */
|
||||
double c0, c1, c2, c3, c4; /* Cosine polynomial. */
|
||||
double s1, s2, s3; /* Sine polynomial. */
|
||||
} sincos_t;
|
||||
|
||||
/* Polynomial data (the cosine polynomial is negated in the 2nd entry). */
|
||||
extern const sincos_t __sincosf_table[2] HIDDEN;
|
||||
|
||||
/* Table with 4/PI to 192 bit precision. */
|
||||
extern const uint32_t __inv_pio4[] HIDDEN;
|
||||
|
||||
/* Top 12 bits of the float representation with the sign bit cleared. */
|
||||
static inline uint32_t
|
||||
abstop12 (float x)
|
||||
{
|
||||
return (asuint (x) >> 20) & 0x7ff;
|
||||
}
|
||||
|
||||
/* Compute the sine and cosine of inputs X and X2 (X squared), using the
|
||||
polynomial P and store the results in SINP and COSP. N is the quadrant,
|
||||
if odd the cosine and sine polynomials are swapped. */
|
||||
static inline void
|
||||
sincosf_poly (double x, double x2, const sincos_t *p, int n, float *sinp,
|
||||
float *cosp)
|
||||
{
|
||||
double x3, x4, x5, x6, s, c, c1, c2, s1;
|
||||
|
||||
x4 = x2 * x2;
|
||||
x3 = x2 * x;
|
||||
c2 = p->c3 + x2 * p->c4;
|
||||
s1 = p->s2 + x2 * p->s3;
|
||||
|
||||
/* Swap sin/cos result based on quadrant. */
|
||||
float *tmp = (n & 1 ? cosp : sinp);
|
||||
cosp = (n & 1 ? sinp : cosp);
|
||||
sinp = tmp;
|
||||
|
||||
c1 = p->c0 + x2 * p->c1;
|
||||
x5 = x3 * x2;
|
||||
x6 = x4 * x2;
|
||||
|
||||
s = x + x3 * p->s1;
|
||||
c = c1 + x4 * p->c2;
|
||||
|
||||
*sinp = s + x5 * s1;
|
||||
*cosp = c + x6 * c2;
|
||||
}
|
||||
|
||||
/* Return the sine of inputs X and X2 (X squared) using the polynomial P.
|
||||
N is the quadrant, and if odd the cosine polynomial is used. */
|
||||
static inline float
|
||||
sinf_poly (double x, double x2, const sincos_t *p, int n)
|
||||
{
|
||||
double x3, x4, x6, x7, s, c, c1, c2, s1;
|
||||
|
||||
if ((n & 1) == 0)
|
||||
{
|
||||
x3 = x * x2;
|
||||
s1 = p->s2 + x2 * p->s3;
|
||||
|
||||
x7 = x3 * x2;
|
||||
s = x + x3 * p->s1;
|
||||
|
||||
return s + x7 * s1;
|
||||
}
|
||||
else
|
||||
{
|
||||
x4 = x2 * x2;
|
||||
c2 = p->c3 + x2 * p->c4;
|
||||
c1 = p->c0 + x2 * p->c1;
|
||||
|
||||
x6 = x4 * x2;
|
||||
c = c1 + x4 * p->c2;
|
||||
|
||||
return c + x6 * c2;
|
||||
}
|
||||
}
|
||||
|
||||
/* Fast range reduction using single multiply-subtract. Return the modulo of
|
||||
X as a value between -PI/4 and PI/4 and store the quadrant in NP.
|
||||
The values for PI/2 and 2/PI are accessed via P. Since PI/2 as a double
|
||||
is accurate to 55 bits and the worst-case cancellation happens at 6 * PI/4,
|
||||
the result is accurate for |X| <= 120.0. */
|
||||
static inline double
|
||||
reduce_fast (double x, const sincos_t *p, int *np)
|
||||
{
|
||||
double r;
|
||||
#if TOINT_INTRINSICS
|
||||
/* Use fast round and lround instructions when available. */
|
||||
r = x * p->hpi_inv;
|
||||
*np = converttoint (r);
|
||||
return x - roundtoint (r) * p->hpi;
|
||||
#else
|
||||
/* Use scaled float to int conversion with explicit rounding.
|
||||
hpi_inv is prescaled by 2^24 so the quadrant ends up in bits 24..31.
|
||||
This avoids inaccuracies introduced by truncating negative values. */
|
||||
r = x * p->hpi_inv;
|
||||
int n = ((int32_t)r + 0x800000) >> 24;
|
||||
*np = n;
|
||||
return x - n * p->hpi;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Reduce the range of XI to a multiple of PI/2 using fast integer arithmetic.
|
||||
XI is a reinterpreted float and must be >= 2.0f (the sign bit is ignored).
|
||||
Return the modulo between -PI/4 and PI/4 and store the quadrant in NP.
|
||||
Reduction uses a table of 4/PI with 192 bits of precision. A 32x96->128 bit
|
||||
multiply computes the exact 2.62-bit fixed-point modulo. Since the result
|
||||
can have at most 29 leading zeros after the binary point, the double
|
||||
precision result is accurate to 33 bits. */
|
||||
static inline double
|
||||
reduce_large (uint32_t xi, int *np)
|
||||
{
|
||||
const uint32_t *arr = &__inv_pio4[(xi >> 26) & 15];
|
||||
int shift = (xi >> 23) & 7;
|
||||
uint64_t n, res0, res1, res2;
|
||||
|
||||
xi = (xi & 0xffffff) | 0x800000;
|
||||
xi <<= shift;
|
||||
|
||||
res0 = xi * arr[0];
|
||||
res1 = (uint64_t)xi * arr[4];
|
||||
res2 = (uint64_t)xi * arr[8];
|
||||
res0 = (res2 >> 32) | (res0 << 32);
|
||||
res0 += res1;
|
||||
|
||||
n = (res0 + (1ULL << 61)) >> 62;
|
||||
res0 -= n << 62;
|
||||
double x = (int64_t)res0;
|
||||
*np = n;
|
||||
return x * pi63;
|
||||
}
|
||||
@@ -0,0 +1,63 @@
|
||||
/*
|
||||
* Data definition for sinf, cosf and sincosf.
|
||||
*
|
||||
* Copyright (c) 2018, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <math.h>
|
||||
#include "math_config.h"
|
||||
#include "sincosf.h"
|
||||
|
||||
/* The constants and polynomials for sine and cosine. The 2nd entry
|
||||
computes -cos (x) rather than cos (x) to get negation for free. */
|
||||
const sincos_t __sincosf_table[2] =
|
||||
{
|
||||
{
|
||||
{ 1.0, -1.0, -1.0, 1.0 },
|
||||
#if TOINT_INTRINSICS
|
||||
0x1.45F306DC9C883p-1,
|
||||
#else
|
||||
0x1.45F306DC9C883p+23,
|
||||
#endif
|
||||
0x1.921FB54442D18p0,
|
||||
0x1p0,
|
||||
-0x1.ffffffd0c621cp-2,
|
||||
0x1.55553e1068f19p-5,
|
||||
-0x1.6c087e89a359dp-10,
|
||||
0x1.99343027bf8c3p-16,
|
||||
-0x1.555545995a603p-3,
|
||||
0x1.1107605230bc4p-7,
|
||||
-0x1.994eb3774cf24p-13
|
||||
},
|
||||
{
|
||||
{ 1.0, -1.0, -1.0, 1.0 },
|
||||
#if TOINT_INTRINSICS
|
||||
0x1.45F306DC9C883p-1,
|
||||
#else
|
||||
0x1.45F306DC9C883p+23,
|
||||
#endif
|
||||
0x1.921FB54442D18p0,
|
||||
-0x1p0,
|
||||
0x1.ffffffd0c621cp-2,
|
||||
-0x1.55553e1068f19p-5,
|
||||
0x1.6c087e89a359dp-10,
|
||||
-0x1.99343027bf8c3p-16,
|
||||
-0x1.555545995a603p-3,
|
||||
0x1.1107605230bc4p-7,
|
||||
-0x1.994eb3774cf24p-13
|
||||
}
|
||||
};
|
||||
|
||||
/* Table with 4/PI to 192 bit precision. To avoid unaligned accesses
|
||||
only 8 new bits are added per entry, making the table 4 times larger. */
|
||||
const uint32_t __inv_pio4[24] =
|
||||
{
|
||||
0xa2, 0xa2f9, 0xa2f983, 0xa2f9836e,
|
||||
0xf9836e4e, 0x836e4e44, 0x6e4e4415, 0x4e441529,
|
||||
0x441529fc, 0x1529fc27, 0x29fc2757, 0xfc2757d1,
|
||||
0x2757d1f5, 0x57d1f534, 0xd1f534dd, 0xf534ddc0,
|
||||
0x34ddc0db, 0xddc0db62, 0xc0db6295, 0xdb629599,
|
||||
0x6295993c, 0x95993c43, 0x993c4390, 0x3c439041
|
||||
};
|
||||
+67
@@ -0,0 +1,67 @@
|
||||
/*
|
||||
* Single-precision sin function.
|
||||
*
|
||||
* Copyright (c) 2018, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
#include "math_config.h"
|
||||
#include "sincosf.h"
|
||||
|
||||
/* Fast sinf implementation. Worst-case ULP is 0.5607, maximum relative
|
||||
error is 0.5303 * 2^-23. A single-step range reduction is used for
|
||||
small values. Large inputs have their range reduced using fast integer
|
||||
arithmetic. */
|
||||
float
|
||||
sinf (float y)
|
||||
{
|
||||
double x = y;
|
||||
double s;
|
||||
int n;
|
||||
const sincos_t *p = &__sincosf_table[0];
|
||||
|
||||
if (abstop12 (y) < abstop12 (pio4))
|
||||
{
|
||||
s = x * x;
|
||||
|
||||
if (unlikely (abstop12 (y) < abstop12 (0x1p-12f)))
|
||||
{
|
||||
if (unlikely (abstop12 (y) < abstop12 (0x1p-126f)))
|
||||
/* Force underflow for tiny y. */
|
||||
force_eval_float (s);
|
||||
return y;
|
||||
}
|
||||
|
||||
return sinf_poly (x, s, p, 0);
|
||||
}
|
||||
else if (likely (abstop12 (y) < abstop12 (120.0f)))
|
||||
{
|
||||
x = reduce_fast (x, p, &n);
|
||||
|
||||
/* Setup the signs for sin and cos. */
|
||||
s = p->sign[n & 3];
|
||||
|
||||
if (n & 2)
|
||||
p = &__sincosf_table[1];
|
||||
|
||||
return sinf_poly (x * s, x * x, p, n);
|
||||
}
|
||||
else if (abstop12 (y) < abstop12 (INFINITY))
|
||||
{
|
||||
uint32_t xi = asuint (y);
|
||||
int sign = xi >> 31;
|
||||
|
||||
x = reduce_large (xi, &n);
|
||||
|
||||
/* Setup signs for sin and cos - include original sign. */
|
||||
s = p->sign[(n + sign) & 3];
|
||||
|
||||
if ((n + sign) & 2)
|
||||
p = &__sincosf_table[1];
|
||||
|
||||
return sinf_poly (x * s, x * x, p, n);
|
||||
}
|
||||
else
|
||||
return __math_invalidf (y);
|
||||
}
|
||||
@@ -0,0 +1,771 @@
|
||||
/*
|
||||
* Microbenchmark for math functions.
|
||||
*
|
||||
* Copyright (c) 2018, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#undef _GNU_SOURCE
|
||||
#define _GNU_SOURCE 1
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
#include <math.h>
|
||||
#include "mathlib.h"
|
||||
|
||||
#ifndef WANT_VMATH
|
||||
/* Enable the build of vector math code. */
|
||||
# define WANT_VMATH 1
|
||||
#endif
|
||||
|
||||
/* Number of measurements, best result is reported. */
|
||||
#define MEASURE 60
|
||||
/* Array size. */
|
||||
#define N 8000
|
||||
/* Iterations over the array. */
|
||||
#define ITER 125
|
||||
|
||||
static double *Trace;
|
||||
static size_t trace_size;
|
||||
static double A[N];
|
||||
static float Af[N];
|
||||
static long measurecount = MEASURE;
|
||||
static long itercount = ITER;
|
||||
|
||||
#if __aarch64__ && WANT_VMATH
|
||||
typedef __f64x2_t v_double;
|
||||
|
||||
#define v_double_len() 2
|
||||
|
||||
static inline v_double
|
||||
v_double_load (const double *p)
|
||||
{
|
||||
return (v_double){p[0], p[1]};
|
||||
}
|
||||
|
||||
static inline v_double
|
||||
v_double_dup (double x)
|
||||
{
|
||||
return (v_double){x, x};
|
||||
}
|
||||
|
||||
typedef __f32x4_t v_float;
|
||||
|
||||
#define v_float_len() 4
|
||||
|
||||
static inline v_float
|
||||
v_float_load (const float *p)
|
||||
{
|
||||
return (v_float){p[0], p[1], p[2], p[3]};
|
||||
}
|
||||
|
||||
static inline v_float
|
||||
v_float_dup (float x)
|
||||
{
|
||||
return (v_float){x, x, x, x};
|
||||
}
|
||||
#else
|
||||
/* dummy definitions to make things compile. */
|
||||
typedef double v_double;
|
||||
typedef float v_float;
|
||||
#define v_double_len(x) 1
|
||||
#define v_double_load(x) (x)[0]
|
||||
#define v_double_dup(x) (x)
|
||||
#define v_float_len(x) 1
|
||||
#define v_float_load(x) (x)[0]
|
||||
#define v_float_dup(x) (x)
|
||||
#endif
|
||||
|
||||
static double
|
||||
dummy (double x)
|
||||
{
|
||||
return x;
|
||||
}
|
||||
|
||||
static float
|
||||
dummyf (float x)
|
||||
{
|
||||
return x;
|
||||
}
|
||||
|
||||
#if WANT_VMATH
|
||||
#if __aarch64__
|
||||
static v_double
|
||||
__v_dummy (v_double x)
|
||||
{
|
||||
return x;
|
||||
}
|
||||
|
||||
static v_float
|
||||
__v_dummyf (v_float x)
|
||||
{
|
||||
return x;
|
||||
}
|
||||
|
||||
#ifdef __vpcs
|
||||
__vpcs static v_double
|
||||
__vn_dummy (v_double x)
|
||||
{
|
||||
return x;
|
||||
}
|
||||
|
||||
__vpcs static v_float
|
||||
__vn_dummyf (v_float x)
|
||||
{
|
||||
return x;
|
||||
}
|
||||
|
||||
__vpcs static v_float
|
||||
xy__vn_powf (v_float x)
|
||||
{
|
||||
return __vn_powf (x, x);
|
||||
}
|
||||
|
||||
__vpcs static v_float
|
||||
xy_Z_powf (v_float x)
|
||||
{
|
||||
return _ZGVnN4vv_powf (x, x);
|
||||
}
|
||||
|
||||
__vpcs static v_double
|
||||
xy__vn_pow (v_double x)
|
||||
{
|
||||
return __vn_pow (x, x);
|
||||
}
|
||||
|
||||
__vpcs static v_double
|
||||
xy_Z_pow (v_double x)
|
||||
{
|
||||
return _ZGVnN2vv_pow (x, x);
|
||||
}
|
||||
#endif
|
||||
|
||||
static v_float
|
||||
xy__v_powf (v_float x)
|
||||
{
|
||||
return __v_powf (x, x);
|
||||
}
|
||||
|
||||
static v_double
|
||||
xy__v_pow (v_double x)
|
||||
{
|
||||
return __v_pow (x, x);
|
||||
}
|
||||
#endif
|
||||
|
||||
static float
|
||||
xy__s_powf (float x)
|
||||
{
|
||||
return __s_powf (x, x);
|
||||
}
|
||||
|
||||
static double
|
||||
xy__s_pow (double x)
|
||||
{
|
||||
return __s_pow (x, x);
|
||||
}
|
||||
#endif
|
||||
|
||||
static double
|
||||
xypow (double x)
|
||||
{
|
||||
return pow (x, x);
|
||||
}
|
||||
|
||||
static float
|
||||
xypowf (float x)
|
||||
{
|
||||
return powf (x, x);
|
||||
}
|
||||
|
||||
static double
|
||||
xpow (double x)
|
||||
{
|
||||
return pow (x, 23.4);
|
||||
}
|
||||
|
||||
static float
|
||||
xpowf (float x)
|
||||
{
|
||||
return powf (x, 23.4f);
|
||||
}
|
||||
|
||||
static double
|
||||
ypow (double x)
|
||||
{
|
||||
return pow (2.34, x);
|
||||
}
|
||||
|
||||
static float
|
||||
ypowf (float x)
|
||||
{
|
||||
return powf (2.34f, x);
|
||||
}
|
||||
|
||||
static float
|
||||
sincosf_wrap (float x)
|
||||
{
|
||||
float s, c;
|
||||
sincosf (x, &s, &c);
|
||||
return s + c;
|
||||
}
|
||||
|
||||
static const struct fun
|
||||
{
|
||||
const char *name;
|
||||
int prec;
|
||||
int vec;
|
||||
double lo;
|
||||
double hi;
|
||||
union
|
||||
{
|
||||
double (*d) (double);
|
||||
float (*f) (float);
|
||||
v_double (*vd) (v_double);
|
||||
v_float (*vf) (v_float);
|
||||
#ifdef __vpcs
|
||||
__vpcs v_double (*vnd) (v_double);
|
||||
__vpcs v_float (*vnf) (v_float);
|
||||
#endif
|
||||
} fun;
|
||||
} funtab[] = {
|
||||
#define D(func, lo, hi) {#func, 'd', 0, lo, hi, {.d = func}},
|
||||
#define F(func, lo, hi) {#func, 'f', 0, lo, hi, {.f = func}},
|
||||
#define VD(func, lo, hi) {#func, 'd', 'v', lo, hi, {.vd = func}},
|
||||
#define VF(func, lo, hi) {#func, 'f', 'v', lo, hi, {.vf = func}},
|
||||
#define VND(func, lo, hi) {#func, 'd', 'n', lo, hi, {.vnd = func}},
|
||||
#define VNF(func, lo, hi) {#func, 'f', 'n', lo, hi, {.vnf = func}},
|
||||
D (dummy, 1.0, 2.0)
|
||||
D (exp, -9.9, 9.9)
|
||||
D (exp, 0.5, 1.0)
|
||||
D (exp2, -9.9, 9.9)
|
||||
D (log, 0.01, 11.1)
|
||||
D (log, 0.999, 1.001)
|
||||
D (log2, 0.01, 11.1)
|
||||
D (log2, 0.999, 1.001)
|
||||
{"pow", 'd', 0, 0.01, 11.1, {.d = xypow}},
|
||||
D (xpow, 0.01, 11.1)
|
||||
D (ypow, -9.9, 9.9)
|
||||
|
||||
F (dummyf, 1.0, 2.0)
|
||||
F (expf, -9.9, 9.9)
|
||||
F (exp2f, -9.9, 9.9)
|
||||
F (logf, 0.01, 11.1)
|
||||
F (log2f, 0.01, 11.1)
|
||||
{"powf", 'f', 0, 0.01, 11.1, {.f = xypowf}},
|
||||
F (xpowf, 0.01, 11.1)
|
||||
F (ypowf, -9.9, 9.9)
|
||||
{"sincosf", 'f', 0, 0.1, 0.7, {.f = sincosf_wrap}},
|
||||
{"sincosf", 'f', 0, 0.8, 3.1, {.f = sincosf_wrap}},
|
||||
{"sincosf", 'f', 0, -3.1, 3.1, {.f = sincosf_wrap}},
|
||||
{"sincosf", 'f', 0, 3.3, 33.3, {.f = sincosf_wrap}},
|
||||
{"sincosf", 'f', 0, 100, 1000, {.f = sincosf_wrap}},
|
||||
{"sincosf", 'f', 0, 1e6, 1e32, {.f = sincosf_wrap}},
|
||||
F (sinf, 0.1, 0.7)
|
||||
F (sinf, 0.8, 3.1)
|
||||
F (sinf, -3.1, 3.1)
|
||||
F (sinf, 3.3, 33.3)
|
||||
F (sinf, 100, 1000)
|
||||
F (sinf, 1e6, 1e32)
|
||||
F (cosf, 0.1, 0.7)
|
||||
F (cosf, 0.8, 3.1)
|
||||
F (cosf, -3.1, 3.1)
|
||||
F (cosf, 3.3, 33.3)
|
||||
F (cosf, 100, 1000)
|
||||
F (cosf, 1e6, 1e32)
|
||||
#if WANT_VMATH
|
||||
D (__s_sin, -3.1, 3.1)
|
||||
D (__s_cos, -3.1, 3.1)
|
||||
D (__s_exp, -9.9, 9.9)
|
||||
D (__s_log, 0.01, 11.1)
|
||||
{"__s_pow", 'd', 0, 0.01, 11.1, {.d = xy__s_pow}},
|
||||
F (__s_expf, -9.9, 9.9)
|
||||
F (__s_expf_1u, -9.9, 9.9)
|
||||
F (__s_exp2f, -9.9, 9.9)
|
||||
F (__s_exp2f_1u, -9.9, 9.9)
|
||||
F (__s_logf, 0.01, 11.1)
|
||||
{"__s_powf", 'f', 0, 0.01, 11.1, {.f = xy__s_powf}},
|
||||
F (__s_sinf, -3.1, 3.1)
|
||||
F (__s_cosf, -3.1, 3.1)
|
||||
#if __aarch64__
|
||||
VD (__v_dummy, 1.0, 2.0)
|
||||
VD (__v_sin, -3.1, 3.1)
|
||||
VD (__v_cos, -3.1, 3.1)
|
||||
VD (__v_exp, -9.9, 9.9)
|
||||
VD (__v_log, 0.01, 11.1)
|
||||
{"__v_pow", 'd', 'v', 0.01, 11.1, {.vd = xy__v_pow}},
|
||||
VF (__v_dummyf, 1.0, 2.0)
|
||||
VF (__v_expf, -9.9, 9.9)
|
||||
VF (__v_expf_1u, -9.9, 9.9)
|
||||
VF (__v_exp2f, -9.9, 9.9)
|
||||
VF (__v_exp2f_1u, -9.9, 9.9)
|
||||
VF (__v_logf, 0.01, 11.1)
|
||||
{"__v_powf", 'f', 'v', 0.01, 11.1, {.vf = xy__v_powf}},
|
||||
VF (__v_sinf, -3.1, 3.1)
|
||||
VF (__v_cosf, -3.1, 3.1)
|
||||
#ifdef __vpcs
|
||||
VND (__vn_dummy, 1.0, 2.0)
|
||||
VND (__vn_exp, -9.9, 9.9)
|
||||
VND (_ZGVnN2v_exp, -9.9, 9.9)
|
||||
VND (__vn_log, 0.01, 11.1)
|
||||
VND (_ZGVnN2v_log, 0.01, 11.1)
|
||||
{"__vn_pow", 'd', 'n', 0.01, 11.1, {.vnd = xy__vn_pow}},
|
||||
{"_ZGVnN2vv_pow", 'd', 'n', 0.01, 11.1, {.vnd = xy_Z_pow}},
|
||||
VND (__vn_sin, -3.1, 3.1)
|
||||
VND (_ZGVnN2v_sin, -3.1, 3.1)
|
||||
VND (__vn_cos, -3.1, 3.1)
|
||||
VND (_ZGVnN2v_cos, -3.1, 3.1)
|
||||
VNF (__vn_dummyf, 1.0, 2.0)
|
||||
VNF (__vn_expf, -9.9, 9.9)
|
||||
VNF (_ZGVnN4v_expf, -9.9, 9.9)
|
||||
VNF (__vn_expf_1u, -9.9, 9.9)
|
||||
VNF (__vn_exp2f, -9.9, 9.9)
|
||||
VNF (_ZGVnN4v_exp2f, -9.9, 9.9)
|
||||
VNF (__vn_exp2f_1u, -9.9, 9.9)
|
||||
VNF (__vn_logf, 0.01, 11.1)
|
||||
VNF (_ZGVnN4v_logf, 0.01, 11.1)
|
||||
{"__vn_powf", 'f', 'n', 0.01, 11.1, {.vnf = xy__vn_powf}},
|
||||
{"_ZGVnN4vv_powf", 'f', 'n', 0.01, 11.1, {.vnf = xy_Z_powf}},
|
||||
VNF (__vn_sinf, -3.1, 3.1)
|
||||
VNF (_ZGVnN4v_sinf, -3.1, 3.1)
|
||||
VNF (__vn_cosf, -3.1, 3.1)
|
||||
VNF (_ZGVnN4v_cosf, -3.1, 3.1)
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
{0},
|
||||
#undef F
|
||||
#undef D
|
||||
#undef VF
|
||||
#undef VD
|
||||
#undef VNF
|
||||
#undef VND
|
||||
};
|
||||
|
||||
static void
|
||||
gen_linear (double lo, double hi)
|
||||
{
|
||||
for (int i = 0; i < N; i++)
|
||||
A[i] = (lo * (N - i) + hi * i) / N;
|
||||
}
|
||||
|
||||
static void
|
||||
genf_linear (double lo, double hi)
|
||||
{
|
||||
for (int i = 0; i < N; i++)
|
||||
Af[i] = (float)(lo * (N - i) + hi * i) / N;
|
||||
}
|
||||
|
||||
static inline double
|
||||
asdouble (uint64_t i)
|
||||
{
|
||||
union
|
||||
{
|
||||
uint64_t i;
|
||||
double f;
|
||||
} u = {i};
|
||||
return u.f;
|
||||
}
|
||||
|
||||
static uint64_t seed = 0x0123456789abcdef;
|
||||
|
||||
static double
|
||||
frand (double lo, double hi)
|
||||
{
|
||||
seed = 6364136223846793005ULL * seed + 1;
|
||||
return lo + (hi - lo) * (asdouble (seed >> 12 | 0x3ffULL << 52) - 1.0);
|
||||
}
|
||||
|
||||
static void
|
||||
gen_rand (double lo, double hi)
|
||||
{
|
||||
for (int i = 0; i < N; i++)
|
||||
A[i] = frand (lo, hi);
|
||||
}
|
||||
|
||||
static void
|
||||
genf_rand (double lo, double hi)
|
||||
{
|
||||
for (int i = 0; i < N; i++)
|
||||
Af[i] = (float)frand (lo, hi);
|
||||
}
|
||||
|
||||
static void
|
||||
gen_trace (int index)
|
||||
{
|
||||
for (int i = 0; i < N; i++)
|
||||
A[i] = Trace[index + i];
|
||||
}
|
||||
|
||||
static void
|
||||
genf_trace (int index)
|
||||
{
|
||||
for (int i = 0; i < N; i++)
|
||||
Af[i] = (float)Trace[index + i];
|
||||
}
|
||||
|
||||
static void
|
||||
run_thruput (double f (double))
|
||||
{
|
||||
for (int i = 0; i < N; i++)
|
||||
f (A[i]);
|
||||
}
|
||||
|
||||
static void
|
||||
runf_thruput (float f (float))
|
||||
{
|
||||
for (int i = 0; i < N; i++)
|
||||
f (Af[i]);
|
||||
}
|
||||
|
||||
volatile double zero = 0;
|
||||
|
||||
static void
|
||||
run_latency (double f (double))
|
||||
{
|
||||
double z = zero;
|
||||
double prev = z;
|
||||
for (int i = 0; i < N; i++)
|
||||
prev = f (A[i] + prev * z);
|
||||
}
|
||||
|
||||
static void
|
||||
runf_latency (float f (float))
|
||||
{
|
||||
float z = (float)zero;
|
||||
float prev = z;
|
||||
for (int i = 0; i < N; i++)
|
||||
prev = f (Af[i] + prev * z);
|
||||
}
|
||||
|
||||
static void
|
||||
run_v_thruput (v_double f (v_double))
|
||||
{
|
||||
for (int i = 0; i < N; i += v_double_len ())
|
||||
f (v_double_load (A+i));
|
||||
}
|
||||
|
||||
static void
|
||||
runf_v_thruput (v_float f (v_float))
|
||||
{
|
||||
for (int i = 0; i < N; i += v_float_len ())
|
||||
f (v_float_load (Af+i));
|
||||
}
|
||||
|
||||
static void
|
||||
run_v_latency (v_double f (v_double))
|
||||
{
|
||||
v_double z = v_double_dup (zero);
|
||||
v_double prev = z;
|
||||
for (int i = 0; i < N; i += v_double_len ())
|
||||
prev = f (v_double_load (A+i) + prev * z);
|
||||
}
|
||||
|
||||
static void
|
||||
runf_v_latency (v_float f (v_float))
|
||||
{
|
||||
v_float z = v_float_dup (zero);
|
||||
v_float prev = z;
|
||||
for (int i = 0; i < N; i += v_float_len ())
|
||||
prev = f (v_float_load (Af+i) + prev * z);
|
||||
}
|
||||
|
||||
#ifdef __vpcs
|
||||
static void
|
||||
run_vn_thruput (__vpcs v_double f (v_double))
|
||||
{
|
||||
for (int i = 0; i < N; i += v_double_len ())
|
||||
f (v_double_load (A+i));
|
||||
}
|
||||
|
||||
static void
|
||||
runf_vn_thruput (__vpcs v_float f (v_float))
|
||||
{
|
||||
for (int i = 0; i < N; i += v_float_len ())
|
||||
f (v_float_load (Af+i));
|
||||
}
|
||||
|
||||
static void
|
||||
run_vn_latency (__vpcs v_double f (v_double))
|
||||
{
|
||||
v_double z = v_double_dup (zero);
|
||||
v_double prev = z;
|
||||
for (int i = 0; i < N; i += v_double_len ())
|
||||
prev = f (v_double_load (A+i) + prev * z);
|
||||
}
|
||||
|
||||
static void
|
||||
runf_vn_latency (__vpcs v_float f (v_float))
|
||||
{
|
||||
v_float z = v_float_dup (zero);
|
||||
v_float prev = z;
|
||||
for (int i = 0; i < N; i += v_float_len ())
|
||||
prev = f (v_float_load (Af+i) + prev * z);
|
||||
}
|
||||
#endif
|
||||
|
||||
static uint64_t
|
||||
tic (void)
|
||||
{
|
||||
struct timespec ts;
|
||||
if (clock_gettime (CLOCK_REALTIME, &ts))
|
||||
abort ();
|
||||
return ts.tv_sec * 1000000000ULL + ts.tv_nsec;
|
||||
}
|
||||
|
||||
#define TIMEIT(run, f) do { \
|
||||
dt = -1; \
|
||||
run (f); /* Warm up. */ \
|
||||
for (int j = 0; j < measurecount; j++) \
|
||||
{ \
|
||||
uint64_t t0 = tic (); \
|
||||
for (int i = 0; i < itercount; i++) \
|
||||
run (f); \
|
||||
uint64_t t1 = tic (); \
|
||||
if (t1 - t0 < dt) \
|
||||
dt = t1 - t0; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static void
|
||||
bench1 (const struct fun *f, int type, double lo, double hi)
|
||||
{
|
||||
uint64_t dt = 0;
|
||||
uint64_t ns100;
|
||||
const char *s = type == 't' ? "rthruput" : "latency";
|
||||
int vlen = 1;
|
||||
|
||||
if (f->vec && f->prec == 'd')
|
||||
vlen = v_double_len();
|
||||
else if (f->vec && f->prec == 'f')
|
||||
vlen = v_float_len();
|
||||
|
||||
if (f->prec == 'd' && type == 't' && f->vec == 0)
|
||||
TIMEIT (run_thruput, f->fun.d);
|
||||
else if (f->prec == 'd' && type == 'l' && f->vec == 0)
|
||||
TIMEIT (run_latency, f->fun.d);
|
||||
else if (f->prec == 'f' && type == 't' && f->vec == 0)
|
||||
TIMEIT (runf_thruput, f->fun.f);
|
||||
else if (f->prec == 'f' && type == 'l' && f->vec == 0)
|
||||
TIMEIT (runf_latency, f->fun.f);
|
||||
else if (f->prec == 'd' && type == 't' && f->vec == 'v')
|
||||
TIMEIT (run_v_thruput, f->fun.vd);
|
||||
else if (f->prec == 'd' && type == 'l' && f->vec == 'v')
|
||||
TIMEIT (run_v_latency, f->fun.vd);
|
||||
else if (f->prec == 'f' && type == 't' && f->vec == 'v')
|
||||
TIMEIT (runf_v_thruput, f->fun.vf);
|
||||
else if (f->prec == 'f' && type == 'l' && f->vec == 'v')
|
||||
TIMEIT (runf_v_latency, f->fun.vf);
|
||||
#ifdef __vpcs
|
||||
else if (f->prec == 'd' && type == 't' && f->vec == 'n')
|
||||
TIMEIT (run_vn_thruput, f->fun.vnd);
|
||||
else if (f->prec == 'd' && type == 'l' && f->vec == 'n')
|
||||
TIMEIT (run_vn_latency, f->fun.vnd);
|
||||
else if (f->prec == 'f' && type == 't' && f->vec == 'n')
|
||||
TIMEIT (runf_vn_thruput, f->fun.vnf);
|
||||
else if (f->prec == 'f' && type == 'l' && f->vec == 'n')
|
||||
TIMEIT (runf_vn_latency, f->fun.vnf);
|
||||
#endif
|
||||
|
||||
if (type == 't')
|
||||
{
|
||||
ns100 = (100 * dt + itercount * N / 2) / (itercount * N);
|
||||
printf ("%9s %8s: %4u.%02u ns/elem %10llu ns in [%g %g]\n", f->name, s,
|
||||
(unsigned) (ns100 / 100), (unsigned) (ns100 % 100),
|
||||
(unsigned long long) dt, lo, hi);
|
||||
}
|
||||
else if (type == 'l')
|
||||
{
|
||||
ns100 = (100 * dt + itercount * N / vlen / 2) / (itercount * N / vlen);
|
||||
printf ("%9s %8s: %4u.%02u ns/call %10llu ns in [%g %g]\n", f->name, s,
|
||||
(unsigned) (ns100 / 100), (unsigned) (ns100 % 100),
|
||||
(unsigned long long) dt, lo, hi);
|
||||
}
|
||||
fflush (stdout);
|
||||
}
|
||||
|
||||
static void
|
||||
bench (const struct fun *f, double lo, double hi, int type, int gen)
|
||||
{
|
||||
if (f->prec == 'd' && gen == 'r')
|
||||
gen_rand (lo, hi);
|
||||
else if (f->prec == 'd' && gen == 'l')
|
||||
gen_linear (lo, hi);
|
||||
else if (f->prec == 'd' && gen == 't')
|
||||
gen_trace (0);
|
||||
else if (f->prec == 'f' && gen == 'r')
|
||||
genf_rand (lo, hi);
|
||||
else if (f->prec == 'f' && gen == 'l')
|
||||
genf_linear (lo, hi);
|
||||
else if (f->prec == 'f' && gen == 't')
|
||||
genf_trace (0);
|
||||
|
||||
if (gen == 't')
|
||||
hi = trace_size / N;
|
||||
|
||||
if (type == 'b' || type == 't')
|
||||
bench1 (f, 't', lo, hi);
|
||||
|
||||
if (type == 'b' || type == 'l')
|
||||
bench1 (f, 'l', lo, hi);
|
||||
|
||||
for (int i = N; i < trace_size; i += N)
|
||||
{
|
||||
if (f->prec == 'd')
|
||||
gen_trace (i);
|
||||
else
|
||||
genf_trace (i);
|
||||
|
||||
lo = i / N;
|
||||
if (type == 'b' || type == 't')
|
||||
bench1 (f, 't', lo, hi);
|
||||
|
||||
if (type == 'b' || type == 'l')
|
||||
bench1 (f, 'l', lo, hi);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
readtrace (const char *name)
|
||||
{
|
||||
int n = 0;
|
||||
FILE *f = strcmp (name, "-") == 0 ? stdin : fopen (name, "r");
|
||||
if (!f)
|
||||
{
|
||||
printf ("openning \"%s\" failed: %m\n", name);
|
||||
exit (1);
|
||||
}
|
||||
for (;;)
|
||||
{
|
||||
if (n >= trace_size)
|
||||
{
|
||||
trace_size += N;
|
||||
Trace = realloc (Trace, trace_size * sizeof (Trace[0]));
|
||||
if (Trace == NULL)
|
||||
{
|
||||
printf ("out of memory\n");
|
||||
exit (1);
|
||||
}
|
||||
}
|
||||
if (fscanf (f, "%lf", Trace + n) != 1)
|
||||
break;
|
||||
n++;
|
||||
}
|
||||
if (ferror (f) || n == 0)
|
||||
{
|
||||
printf ("reading \"%s\" failed: %m\n", name);
|
||||
exit (1);
|
||||
}
|
||||
fclose (f);
|
||||
if (n % N == 0)
|
||||
trace_size = n;
|
||||
for (int i = 0; n < trace_size; n++, i++)
|
||||
Trace[n] = Trace[i];
|
||||
}
|
||||
|
||||
static void
|
||||
usage (void)
|
||||
{
|
||||
printf ("usage: ./mathbench [-g rand|linear|trace] [-t latency|thruput|both] "
|
||||
"[-i low high] [-f tracefile] [-m measurements] [-c iterations] func "
|
||||
"[func2 ..]\n");
|
||||
printf ("func:\n");
|
||||
printf ("%7s [run all benchmarks]\n", "all");
|
||||
for (const struct fun *f = funtab; f->name; f++)
|
||||
printf ("%7s [low: %g high: %g]\n", f->name, f->lo, f->hi);
|
||||
exit (1);
|
||||
}
|
||||
|
||||
int
|
||||
main (int argc, char *argv[])
|
||||
{
|
||||
int usergen = 0, gen = 'r', type = 'b', all = 0;
|
||||
double lo = 0, hi = 0;
|
||||
const char *tracefile = "-";
|
||||
|
||||
argv++;
|
||||
argc--;
|
||||
for (;;)
|
||||
{
|
||||
if (argc <= 0)
|
||||
usage ();
|
||||
if (argv[0][0] != '-')
|
||||
break;
|
||||
else if (argc >= 3 && strcmp (argv[0], "-i") == 0)
|
||||
{
|
||||
usergen = 1;
|
||||
lo = strtod (argv[1], 0);
|
||||
hi = strtod (argv[2], 0);
|
||||
argv += 3;
|
||||
argc -= 3;
|
||||
}
|
||||
else if (argc >= 2 && strcmp (argv[0], "-m") == 0)
|
||||
{
|
||||
measurecount = strtol (argv[1], 0, 0);
|
||||
argv += 2;
|
||||
argc -= 2;
|
||||
}
|
||||
else if (argc >= 2 && strcmp (argv[0], "-c") == 0)
|
||||
{
|
||||
itercount = strtol (argv[1], 0, 0);
|
||||
argv += 2;
|
||||
argc -= 2;
|
||||
}
|
||||
else if (argc >= 2 && strcmp (argv[0], "-g") == 0)
|
||||
{
|
||||
gen = argv[1][0];
|
||||
if (strchr ("rlt", gen) == 0)
|
||||
usage ();
|
||||
argv += 2;
|
||||
argc -= 2;
|
||||
}
|
||||
else if (argc >= 2 && strcmp (argv[0], "-f") == 0)
|
||||
{
|
||||
gen = 't'; /* -f implies -g trace. */
|
||||
tracefile = argv[1];
|
||||
argv += 2;
|
||||
argc -= 2;
|
||||
}
|
||||
else if (argc >= 2 && strcmp (argv[0], "-t") == 0)
|
||||
{
|
||||
type = argv[1][0];
|
||||
if (strchr ("ltb", type) == 0)
|
||||
usage ();
|
||||
argv += 2;
|
||||
argc -= 2;
|
||||
}
|
||||
else
|
||||
usage ();
|
||||
}
|
||||
if (gen == 't')
|
||||
{
|
||||
readtrace (tracefile);
|
||||
lo = hi = 0;
|
||||
usergen = 1;
|
||||
}
|
||||
while (argc > 0)
|
||||
{
|
||||
int found = 0;
|
||||
all = strcmp (argv[0], "all") == 0;
|
||||
for (const struct fun *f = funtab; f->name; f++)
|
||||
if (all || strcmp (argv[0], f->name) == 0)
|
||||
{
|
||||
found = 1;
|
||||
if (!usergen)
|
||||
{
|
||||
lo = f->lo;
|
||||
hi = f->hi;
|
||||
}
|
||||
bench (f, lo, hi, type, gen);
|
||||
if (usergen && !all)
|
||||
break;
|
||||
}
|
||||
if (!found)
|
||||
printf ("unknown function: %s\n", argv[0]);
|
||||
argv++;
|
||||
argc--;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,91 @@
|
||||
/*
|
||||
* intern.h
|
||||
*
|
||||
* Copyright (c) 1999-2018, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#ifndef mathtest_intern_h
|
||||
#define mathtest_intern_h
|
||||
|
||||
#include <mpfr.h>
|
||||
#include <mpc.h>
|
||||
|
||||
#include "types.h"
|
||||
#include "wrappers.h"
|
||||
|
||||
/* Generic function pointer. */
|
||||
typedef void (*funcptr)(void);
|
||||
|
||||
/* Pointers to test function types. */
|
||||
typedef int (*testfunc1)(mpfr_t, mpfr_t, mpfr_rnd_t);
|
||||
typedef int (*testfunc2)(mpfr_t, mpfr_t, mpfr_t, mpfr_rnd_t);
|
||||
typedef int (*testrred)(mpfr_t, mpfr_t, int *);
|
||||
typedef char * (*testsemi1)(uint32 *, uint32 *);
|
||||
typedef char * (*testsemi2)(uint32 *, uint32 *, uint32 *);
|
||||
typedef char * (*testsemi2f)(uint32 *, uint32 *, uint32 *);
|
||||
typedef char * (*testldexp)(uint32 *, uint32 *, uint32 *);
|
||||
typedef char * (*testfrexp)(uint32 *, uint32 *, uint32 *);
|
||||
typedef char * (*testmodf)(uint32 *, uint32 *, uint32 *);
|
||||
typedef char * (*testclassify)(uint32 *, uint32 *);
|
||||
typedef char * (*testclassifyf)(uint32 *, uint32 *);
|
||||
|
||||
typedef int (*testfunc1c)(mpc_t, mpc_t, mpc_rnd_t);
|
||||
typedef int (*testfunc2c)(mpc_t, mpc_t, mpc_t, mpc_rnd_t);
|
||||
|
||||
typedef int (*testfunc1cr)(mpfr_t, mpc_t, mpfr_rnd_t);
|
||||
|
||||
/* Pointer to a function that generates random test cases. */
|
||||
typedef void (*casegen)(uint32 *, uint32, uint32);
|
||||
|
||||
/*
|
||||
* List of testable functions, their types, and their testable range.
|
||||
*/
|
||||
enum {
|
||||
args1, /* afloat-based, one argument */
|
||||
args1f, /* same as args1 but in single prec */
|
||||
args2, /* afloat-based, two arguments */
|
||||
args2f, /* same as args2 but in single prec */
|
||||
rred, /* afloat-based, one arg, aux return */
|
||||
rredf, /* same as rred but in single prec */
|
||||
semi1, /* seminumerical, one argument */
|
||||
semi1f, /* seminumerical, 1 arg, float */
|
||||
semi2, /* seminumerical, two arguments */
|
||||
semi2f, /* seminumerical, 2 args, floats */
|
||||
t_ldexp, /* dbl * int -> dbl */
|
||||
t_ldexpf, /* sgl * int -> sgl */
|
||||
t_frexp, /* dbl -> dbl * int */
|
||||
t_frexpf, /* sgl -> sgl * int */
|
||||
t_modf, /* dbl -> dbl * dbl */
|
||||
t_modff, /* sgl -> sgl * sgl */
|
||||
classify, /* classify double: dbl -> int */
|
||||
classifyf, /* classify float: flt -> int */
|
||||
compare, /* compare doubles, returns int */
|
||||
comparef, /* compare floats, returns int */
|
||||
|
||||
args1c, /* acomplex-base, one argument */
|
||||
args2c,
|
||||
args1fc,
|
||||
args2fc,
|
||||
args1cr, /* dbl-complex -> complex */
|
||||
args1fcr /* sgl-complex -> complex */
|
||||
};
|
||||
|
||||
typedef struct __testable Testable;
|
||||
struct __testable {
|
||||
char *name;
|
||||
funcptr func;
|
||||
int type;
|
||||
wrapperfunc wrappers[MAXWRAPPERS];
|
||||
casegen cases; /* complex functions use the same casegen for both real and complex args */
|
||||
uint32 caseparam1, caseparam2;
|
||||
};
|
||||
|
||||
extern Testable functions[];
|
||||
extern const int nfunctions;
|
||||
|
||||
extern void init_pi(void);
|
||||
|
||||
int nargs_(Testable* f);
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,334 @@
|
||||
/*
|
||||
* main.c
|
||||
*
|
||||
* Copyright (c) 1999-2018, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
#include <stdlib.h>
|
||||
#include <time.h>
|
||||
|
||||
#include "intern.h"
|
||||
|
||||
void gencases(Testable *fn, int number);
|
||||
void docase(Testable *fn, uint32 *args);
|
||||
void vet_for_decline(Testable *fn, uint32 *args, uint32 *result, int got_errno_in);
|
||||
void seed_random(uint32 seed);
|
||||
|
||||
int check_declines = 0;
|
||||
int lib_fo = 0;
|
||||
int lib_no_arith = 0;
|
||||
int ntests = 0;
|
||||
|
||||
int nargs_(Testable* f) {
|
||||
switch((f)->type) {
|
||||
case args2:
|
||||
case args2f:
|
||||
case semi2:
|
||||
case semi2f:
|
||||
case t_ldexp:
|
||||
case t_ldexpf:
|
||||
case args1c:
|
||||
case args1fc:
|
||||
case args1cr:
|
||||
case args1fcr:
|
||||
case compare:
|
||||
case comparef:
|
||||
return 2;
|
||||
case args2c:
|
||||
case args2fc:
|
||||
return 4;
|
||||
default:
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
static int isdouble(Testable *f)
|
||||
{
|
||||
switch (f->type) {
|
||||
case args1:
|
||||
case rred:
|
||||
case semi1:
|
||||
case t_frexp:
|
||||
case t_modf:
|
||||
case classify:
|
||||
case t_ldexp:
|
||||
case args2:
|
||||
case semi2:
|
||||
case args1c:
|
||||
case args1cr:
|
||||
case compare:
|
||||
case args2c:
|
||||
return 1;
|
||||
case args1f:
|
||||
case rredf:
|
||||
case semi1f:
|
||||
case t_frexpf:
|
||||
case t_modff:
|
||||
case classifyf:
|
||||
case args2f:
|
||||
case semi2f:
|
||||
case t_ldexpf:
|
||||
case comparef:
|
||||
case args1fc:
|
||||
case args1fcr:
|
||||
case args2fc:
|
||||
return 0;
|
||||
default:
|
||||
assert(0 && "Bad function type");
|
||||
}
|
||||
}
|
||||
|
||||
Testable *find_function(const char *func)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < nfunctions; i++) {
|
||||
if (func && !strcmp(func, functions[i].name)) {
|
||||
return &functions[i];
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void get_operand(const char *str, Testable *f, uint32 *word0, uint32 *word1)
|
||||
{
|
||||
struct special {
|
||||
unsigned dblword0, dblword1, sglword;
|
||||
const char *name;
|
||||
} specials[] = {
|
||||
{0x00000000,0x00000000,0x00000000,"0"},
|
||||
{0x3FF00000,0x00000000,0x3f800000,"1"},
|
||||
{0x7FF00000,0x00000000,0x7f800000,"inf"},
|
||||
{0x7FF80000,0x00000001,0x7fc00000,"qnan"},
|
||||
{0x7FF00000,0x00000001,0x7f800001,"snan"},
|
||||
{0x3ff921fb,0x54442d18,0x3fc90fdb,"pi2"},
|
||||
{0x400921fb,0x54442d18,0x40490fdb,"pi"},
|
||||
{0x3fe921fb,0x54442d18,0x3f490fdb,"pi4"},
|
||||
{0x4002d97c,0x7f3321d2,0x4016cbe4,"3pi4"},
|
||||
};
|
||||
int i;
|
||||
|
||||
for (i = 0; i < (int)(sizeof(specials)/sizeof(*specials)); i++) {
|
||||
if (!strcmp(str, specials[i].name) ||
|
||||
((str[0] == '-' || str[0] == '+') &&
|
||||
!strcmp(str+1, specials[i].name))) {
|
||||
assert(f);
|
||||
if (isdouble(f)) {
|
||||
*word0 = specials[i].dblword0;
|
||||
*word1 = specials[i].dblword1;
|
||||
} else {
|
||||
*word0 = specials[i].sglword;
|
||||
*word1 = 0;
|
||||
}
|
||||
if (str[0] == '-')
|
||||
*word0 |= 0x80000000U;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
sscanf(str, "%"I32"x.%"I32"x", word0, word1);
|
||||
}
|
||||
|
||||
void dofile(FILE *fp, int translating) {
|
||||
char buf[1024], sparebuf[1024], *p;
|
||||
|
||||
/*
|
||||
* Command syntax is:
|
||||
*
|
||||
* - "seed <integer>" sets a random seed
|
||||
*
|
||||
* - "test <function> <ntests>" generates random test lines
|
||||
*
|
||||
* - "<function> op1=foo [op2=bar]" generates a specific test
|
||||
* - "func=<function> op1=foo [op2=bar]" does the same
|
||||
* - "func=<function> op1=foo result=bar" will just output the line as-is
|
||||
*
|
||||
* - a semicolon or a blank line is ignored
|
||||
*/
|
||||
while (fgets(buf, sizeof(buf), fp)) {
|
||||
buf[strcspn(buf, "\r\n")] = '\0';
|
||||
strcpy(sparebuf, buf);
|
||||
p = buf;
|
||||
while (*p && isspace(*p)) p++;
|
||||
if (!*p || *p == ';') {
|
||||
/* Comment or blank line. Only print if `translating' is set. */
|
||||
if (translating)
|
||||
printf("%s\n", buf);
|
||||
continue;
|
||||
}
|
||||
if (!strncmp(buf, "seed ", 5)) {
|
||||
seed_random(atoi(buf+5));
|
||||
} else if (!strncmp(buf, "random=", 7)) {
|
||||
/*
|
||||
* Copy 'random=on' / 'random=off' lines unconditionally
|
||||
* to the output, so that random test failures can be
|
||||
* accumulated into a recent-failures-list file and
|
||||
* still identified as random-in-origin when re-run the
|
||||
* next day.
|
||||
*/
|
||||
printf("%s\n", buf);
|
||||
} else if (!strncmp(buf, "test ", 5)) {
|
||||
char *p = buf+5;
|
||||
char *q;
|
||||
int ntests, i;
|
||||
q = p;
|
||||
while (*p && !isspace(*p)) p++;
|
||||
if (*p) *p++ = '\0';
|
||||
while (*p && isspace(*p)) p++;
|
||||
if (*p)
|
||||
ntests = atoi(p);
|
||||
else
|
||||
ntests = 100; /* *shrug* */
|
||||
for (i = 0; i < nfunctions; i++) {
|
||||
if (!strcmp(q, functions[i].name)) {
|
||||
gencases(&functions[i], ntests);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (i == nfunctions) {
|
||||
fprintf(stderr, "unknown test `%s'\n", q);
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
* Parse a specific test line.
|
||||
*/
|
||||
uint32 ops[8], result[8];
|
||||
int got_op = 0; /* &1 for got_op1, &4 for got_op3 etc. */
|
||||
Testable *f = 0;
|
||||
char *q, *r;
|
||||
int got_result = 0, got_errno_in = 0;
|
||||
|
||||
for (q = strtok(p, " \t"); q; q = strtok(NULL, " \t")) {
|
||||
r = strchr(q, '=');
|
||||
if (!r) {
|
||||
f = find_function(q);
|
||||
} else {
|
||||
*r++ = '\0';
|
||||
|
||||
if (!strcmp(q, "func"))
|
||||
f = find_function(r);
|
||||
else if (!strcmp(q, "op1") || !strcmp(q, "op1r")) {
|
||||
get_operand(r, f, &ops[0], &ops[1]);
|
||||
got_op |= 1;
|
||||
} else if (!strcmp(q, "op2") || !strcmp(q, "op1i")) {
|
||||
get_operand(r, f, &ops[2], &ops[3]);
|
||||
got_op |= 2;
|
||||
} else if (!strcmp(q, "op2r")) {
|
||||
get_operand(r, f, &ops[4], &ops[5]);
|
||||
got_op |= 4;
|
||||
} else if (!strcmp(q, "op2i")) {
|
||||
get_operand(r, f, &ops[6], &ops[7]);
|
||||
got_op |= 8;
|
||||
} else if (!strcmp(q, "result") || !strcmp(q, "resultr")) {
|
||||
get_operand(r, f, &result[0], &result[1]);
|
||||
got_result |= 1;
|
||||
} else if (!strcmp(q, "resulti")) {
|
||||
get_operand(r, f, &result[4], &result[5]);
|
||||
got_result |= 2;
|
||||
} else if (!strcmp(q, "res2")) {
|
||||
get_operand(r, f, &result[2], &result[3]);
|
||||
got_result |= 4;
|
||||
} else if (!strcmp(q, "errno_in")) {
|
||||
got_errno_in = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Test cases already set up by the input are not
|
||||
* reprocessed by default, unlike the fplib tests. (This
|
||||
* is mostly for historical reasons, because we used to
|
||||
* use a very slow and incomplete internal reference
|
||||
* implementation; now our ref impl is MPFR/MPC it
|
||||
* probably wouldn't be such a bad idea, though we'd still
|
||||
* have to make sure all the special cases came out
|
||||
* right.) If translating==2 (corresponding to the -T
|
||||
* command-line option) then we regenerate everything
|
||||
* regardless.
|
||||
*/
|
||||
if (got_result && translating < 2) {
|
||||
if (f)
|
||||
vet_for_decline(f, ops, result, got_errno_in);
|
||||
puts(sparebuf);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (f && got_op==(1<<nargs_(f))-1) {
|
||||
/*
|
||||
* And do it!
|
||||
*/
|
||||
docase(f, ops);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
int errs = 0, opts = 1, files = 0, translating = 0;
|
||||
unsigned int seed = 1; /* in case no explicit seed provided */
|
||||
|
||||
seed_random(seed);
|
||||
|
||||
setvbuf(stdout, NULL, _IOLBF, BUFSIZ); /* stops incomplete lines being printed when out of time */
|
||||
|
||||
while (--argc) {
|
||||
FILE *fp;
|
||||
char *p = *++argv;
|
||||
|
||||
if (opts && *p == '-') {
|
||||
if(*(p+1) == 0) { /* single -, read from stdin */
|
||||
break;
|
||||
} else if (!strcmp(p, "-t")) {
|
||||
translating = 1;
|
||||
} else if (!strcmp(p, "-T")) {
|
||||
translating = 2;
|
||||
} else if (!strcmp(p, "-c")) {
|
||||
check_declines = 1;
|
||||
} else if (!strcmp(p, "--")) {
|
||||
opts = 0;
|
||||
} else if (!strcmp(p,"--seed") && argc > 1 && 1==sscanf(*(argv+1),"%u",&seed)) {
|
||||
seed_random(seed);
|
||||
argv++; /* next in argv is seed value, so skip */
|
||||
--argc;
|
||||
} else if (!strcmp(p, "-fo")) {
|
||||
lib_fo = 1;
|
||||
} else if (!strcmp(p, "-noarith")) {
|
||||
lib_no_arith = 1;
|
||||
} else {
|
||||
fprintf(stderr,
|
||||
"rtest: ignoring unrecognised option '%s'\n", p);
|
||||
errs = 1;
|
||||
}
|
||||
} else {
|
||||
files = 1;
|
||||
if (!errs) {
|
||||
fp = fopen(p, "r");
|
||||
if (fp) {
|
||||
dofile(fp, translating);
|
||||
fclose(fp);
|
||||
} else {
|
||||
perror(p);
|
||||
errs = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If no filename arguments, use stdin.
|
||||
*/
|
||||
if (!files && !errs) {
|
||||
dofile(stdin, translating);
|
||||
}
|
||||
|
||||
if (check_declines) {
|
||||
fprintf(stderr, "Tests expected to run: %d\n", ntests);
|
||||
fflush(stderr);
|
||||
}
|
||||
|
||||
return errs;
|
||||
}
|
||||
@@ -0,0 +1,99 @@
|
||||
/*
|
||||
* random.c - random number generator for producing mathlib test cases
|
||||
*
|
||||
* Copyright (c) 1998-2018, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "types.h"
|
||||
#include "random.h"
|
||||
|
||||
static uint32 seedbuf[55];
|
||||
static int seedptr;
|
||||
|
||||
void seed_random(uint32 seed) {
|
||||
int i;
|
||||
|
||||
seedptr = 0;
|
||||
for (i = 0; i < 55; i++) {
|
||||
seed = seed % 44488 * 48271 - seed / 44488 * 3399;
|
||||
seedbuf[i] = seed - 1;
|
||||
}
|
||||
}
|
||||
|
||||
uint32 base_random(void) {
|
||||
seedptr %= 55;
|
||||
seedbuf[seedptr] += seedbuf[(seedptr+31)%55];
|
||||
return seedbuf[seedptr++];
|
||||
}
|
||||
|
||||
uint32 random32(void) {
|
||||
uint32 a, b, b1, b2;
|
||||
a = base_random();
|
||||
b = base_random();
|
||||
for (b1 = 0x80000000, b2 = 1; b1 > b2; b1 >>= 1, b2 <<= 1) {
|
||||
uint32 b3 = b1 | b2;
|
||||
if ((b & b3) != 0 && (b & b3) != b3)
|
||||
b ^= b3;
|
||||
}
|
||||
return a ^ b;
|
||||
}
|
||||
|
||||
/*
|
||||
* random_upto: generate a uniformly randomised number in the range
|
||||
* 0,...,limit-1. (Precondition: limit > 0.)
|
||||
*
|
||||
* random_upto_biased: generate a number in the same range, but with
|
||||
* the probability skewed towards the high end by means of taking the
|
||||
* maximum of 8*bias+1 samples from the uniform distribution on the
|
||||
* same range. (I don't know why bias is given in that curious way -
|
||||
* historical reasons, I expect.)
|
||||
*
|
||||
* For speed, I separate the implementation of random_upto into the
|
||||
* two stages of (a) generate a bitmask which reduces a 32-bit random
|
||||
* number to within a factor of two of the right range, (b) repeatedly
|
||||
* generate numbers in that range until one is small enough. Splitting
|
||||
* it up like that means that random_upto_biased can do (a) only once
|
||||
* even when it does (b) lots of times.
|
||||
*/
|
||||
|
||||
static uint32 random_upto_makemask(uint32 limit) {
|
||||
uint32 mask = 0xFFFFFFFF;
|
||||
int i;
|
||||
for (i = 16; i > 0; i >>= 1)
|
||||
if ((limit & (mask >> i)) == limit)
|
||||
mask >>= i;
|
||||
return mask;
|
||||
}
|
||||
|
||||
static uint32 random_upto_internal(uint32 limit, uint32 mask) {
|
||||
uint32 ret;
|
||||
do {
|
||||
ret = random32() & mask;
|
||||
} while (ret > limit);
|
||||
return ret;
|
||||
}
|
||||
|
||||
uint32 random_upto(uint32 limit) {
|
||||
uint32 mask = random_upto_makemask(limit);
|
||||
return random_upto_internal(limit, mask);
|
||||
}
|
||||
|
||||
uint32 random_upto_biased(uint32 limit, int bias) {
|
||||
uint32 mask = random_upto_makemask(limit);
|
||||
|
||||
uint32 ret = random_upto_internal(limit, mask);
|
||||
while (bias--) {
|
||||
uint32 tmp;
|
||||
tmp = random_upto_internal(limit, mask); if (tmp < ret) ret = tmp;
|
||||
tmp = random_upto_internal(limit, mask); if (tmp < ret) ret = tmp;
|
||||
tmp = random_upto_internal(limit, mask); if (tmp < ret) ret = tmp;
|
||||
tmp = random_upto_internal(limit, mask); if (tmp < ret) ret = tmp;
|
||||
tmp = random_upto_internal(limit, mask); if (tmp < ret) ret = tmp;
|
||||
tmp = random_upto_internal(limit, mask); if (tmp < ret) ret = tmp;
|
||||
tmp = random_upto_internal(limit, mask); if (tmp < ret) ret = tmp;
|
||||
tmp = random_upto_internal(limit, mask); if (tmp < ret) ret = tmp;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -0,0 +1,12 @@
|
||||
/*
|
||||
* random.h - header for random.c
|
||||
*
|
||||
* Copyright (c) 2009-2018, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "types.h"
|
||||
|
||||
uint32 random32(void);
|
||||
uint32 random_upto(uint32 limit);
|
||||
uint32 random_upto_biased(uint32 limit, int bias);
|
||||
@@ -0,0 +1,905 @@
|
||||
/*
|
||||
* semi.c: test implementations of mathlib seminumerical functions
|
||||
*
|
||||
* Copyright (c) 1999-2018, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include "semi.h"
|
||||
|
||||
static void test_rint(uint32 *in, uint32 *out,
|
||||
int isfloor, int isceil) {
|
||||
int sign = in[0] & 0x80000000;
|
||||
int roundup = (isfloor && sign) || (isceil && !sign);
|
||||
uint32 xh, xl, roundword;
|
||||
int ex = (in[0] >> 20) & 0x7FF; /* exponent */
|
||||
int i;
|
||||
|
||||
if ((ex > 0x3ff + 52 - 1) || /* things this big can't be fractional */
|
||||
((in[0] & 0x7FFFFFFF) == 0 && in[1] == 0)) { /* zero */
|
||||
/* NaN, Inf, a large integer, or zero: just return the input */
|
||||
out[0] = in[0];
|
||||
out[1] = in[1];
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Special case: ex < 0x3ff, ie our number is in (0,1). Return
|
||||
* 1 or 0 according to roundup.
|
||||
*/
|
||||
if (ex < 0x3ff) {
|
||||
out[0] = sign | (roundup ? 0x3FF00000 : 0);
|
||||
out[1] = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* We're not short of time here, so we'll do this the hideously
|
||||
* inefficient way. Shift bit by bit so that the units place is
|
||||
* somewhere predictable, round, and shift back again.
|
||||
*/
|
||||
xh = in[0];
|
||||
xl = in[1];
|
||||
roundword = 0;
|
||||
for (i = ex; i < 0x3ff + 52; i++) {
|
||||
if (roundword & 1)
|
||||
roundword |= 2; /* preserve sticky bit */
|
||||
roundword = (roundword >> 1) | ((xl & 1) << 31);
|
||||
xl = (xl >> 1) | ((xh & 1) << 31);
|
||||
xh = xh >> 1;
|
||||
}
|
||||
if (roundword && roundup) {
|
||||
xl++;
|
||||
xh += (xl==0);
|
||||
}
|
||||
for (i = ex; i < 0x3ff + 52; i++) {
|
||||
xh = (xh << 1) | ((xl >> 31) & 1);
|
||||
xl = (xl & 0x7FFFFFFF) << 1;
|
||||
}
|
||||
out[0] = xh;
|
||||
out[1] = xl;
|
||||
}
|
||||
|
||||
char *test_ceil(uint32 *in, uint32 *out) {
|
||||
test_rint(in, out, 0, 1);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
char *test_floor(uint32 *in, uint32 *out) {
|
||||
test_rint(in, out, 1, 0);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void test_rintf(uint32 *in, uint32 *out,
|
||||
int isfloor, int isceil) {
|
||||
int sign = *in & 0x80000000;
|
||||
int roundup = (isfloor && sign) || (isceil && !sign);
|
||||
uint32 x, roundword;
|
||||
int ex = (*in >> 23) & 0xFF; /* exponent */
|
||||
int i;
|
||||
|
||||
if ((ex > 0x7f + 23 - 1) || /* things this big can't be fractional */
|
||||
(*in & 0x7FFFFFFF) == 0) { /* zero */
|
||||
/* NaN, Inf, a large integer, or zero: just return the input */
|
||||
*out = *in;
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Special case: ex < 0x7f, ie our number is in (0,1). Return
|
||||
* 1 or 0 according to roundup.
|
||||
*/
|
||||
if (ex < 0x7f) {
|
||||
*out = sign | (roundup ? 0x3F800000 : 0);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* We're not short of time here, so we'll do this the hideously
|
||||
* inefficient way. Shift bit by bit so that the units place is
|
||||
* somewhere predictable, round, and shift back again.
|
||||
*/
|
||||
x = *in;
|
||||
roundword = 0;
|
||||
for (i = ex; i < 0x7F + 23; i++) {
|
||||
if (roundword & 1)
|
||||
roundword |= 2; /* preserve sticky bit */
|
||||
roundword = (roundword >> 1) | ((x & 1) << 31);
|
||||
x = x >> 1;
|
||||
}
|
||||
if (roundword && roundup) {
|
||||
x++;
|
||||
}
|
||||
for (i = ex; i < 0x7F + 23; i++) {
|
||||
x = x << 1;
|
||||
}
|
||||
*out = x;
|
||||
}
|
||||
|
||||
char *test_ceilf(uint32 *in, uint32 *out) {
|
||||
test_rintf(in, out, 0, 1);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
char *test_floorf(uint32 *in, uint32 *out) {
|
||||
test_rintf(in, out, 1, 0);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
char *test_fmod(uint32 *a, uint32 *b, uint32 *out) {
|
||||
int sign;
|
||||
int32 aex, bex;
|
||||
uint32 am[2], bm[2];
|
||||
|
||||
if (((a[0] & 0x7FFFFFFF) << 1) + !!a[1] > 0xFFE00000 ||
|
||||
((b[0] & 0x7FFFFFFF) << 1) + !!b[1] > 0xFFE00000) {
|
||||
/* a or b is NaN: return QNaN, optionally with IVO */
|
||||
uint32 an, bn;
|
||||
out[0] = 0x7ff80000;
|
||||
out[1] = 1;
|
||||
an = ((a[0] & 0x7FFFFFFF) << 1) + !!a[1];
|
||||
bn = ((b[0] & 0x7FFFFFFF) << 1) + !!b[1];
|
||||
if ((an > 0xFFE00000 && an < 0xFFF00000) ||
|
||||
(bn > 0xFFE00000 && bn < 0xFFF00000))
|
||||
return "i"; /* at least one SNaN: IVO */
|
||||
else
|
||||
return NULL; /* no SNaNs, but at least 1 QNaN */
|
||||
}
|
||||
if ((b[0] & 0x7FFFFFFF) == 0 && b[1] == 0) { /* b==0: EDOM */
|
||||
out[0] = 0x7ff80000;
|
||||
out[1] = 1;
|
||||
return "EDOM status=i";
|
||||
}
|
||||
if ((a[0] & 0x7FF00000) == 0x7FF00000) { /* a==Inf: EDOM */
|
||||
out[0] = 0x7ff80000;
|
||||
out[1] = 1;
|
||||
return "EDOM status=i";
|
||||
}
|
||||
if ((b[0] & 0x7FF00000) == 0x7FF00000) { /* b==Inf: return a */
|
||||
out[0] = a[0];
|
||||
out[1] = a[1];
|
||||
return NULL;
|
||||
}
|
||||
if ((a[0] & 0x7FFFFFFF) == 0 && a[1] == 0) { /* a==0: return a */
|
||||
out[0] = a[0];
|
||||
out[1] = a[1];
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* OK. That's the special cases cleared out of the way. Now we
|
||||
* have finite (though not necessarily normal) a and b.
|
||||
*/
|
||||
sign = a[0] & 0x80000000; /* we discard sign of b */
|
||||
test_frexp(a, am, (uint32 *)&aex);
|
||||
test_frexp(b, bm, (uint32 *)&bex);
|
||||
am[0] &= 0xFFFFF, am[0] |= 0x100000;
|
||||
bm[0] &= 0xFFFFF, bm[0] |= 0x100000;
|
||||
|
||||
while (aex >= bex) {
|
||||
if (am[0] > bm[0] || (am[0] == bm[0] && am[1] >= bm[1])) {
|
||||
am[1] -= bm[1];
|
||||
am[0] = am[0] - bm[0] - (am[1] > ~bm[1]);
|
||||
}
|
||||
if (aex > bex) {
|
||||
am[0] = (am[0] << 1) | ((am[1] & 0x80000000) >> 31);
|
||||
am[1] <<= 1;
|
||||
aex--;
|
||||
} else
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* Renormalise final result; this can be cunningly done by
|
||||
* passing a denormal to ldexp.
|
||||
*/
|
||||
aex += 0x3fd;
|
||||
am[0] |= sign;
|
||||
test_ldexp(am, (uint32 *)&aex, out);
|
||||
|
||||
return NULL; /* FIXME */
|
||||
}
|
||||
|
||||
char *test_fmodf(uint32 *a, uint32 *b, uint32 *out) {
|
||||
int sign;
|
||||
int32 aex, bex;
|
||||
uint32 am, bm;
|
||||
|
||||
if ((*a & 0x7FFFFFFF) > 0x7F800000 ||
|
||||
(*b & 0x7FFFFFFF) > 0x7F800000) {
|
||||
/* a or b is NaN: return QNaN, optionally with IVO */
|
||||
uint32 an, bn;
|
||||
*out = 0x7fc00001;
|
||||
an = *a & 0x7FFFFFFF;
|
||||
bn = *b & 0x7FFFFFFF;
|
||||
if ((an > 0x7f800000 && an < 0x7fc00000) ||
|
||||
(bn > 0x7f800000 && bn < 0x7fc00000))
|
||||
return "i"; /* at least one SNaN: IVO */
|
||||
else
|
||||
return NULL; /* no SNaNs, but at least 1 QNaN */
|
||||
}
|
||||
if ((*b & 0x7FFFFFFF) == 0) { /* b==0: EDOM */
|
||||
*out = 0x7fc00001;
|
||||
return "EDOM status=i";
|
||||
}
|
||||
if ((*a & 0x7F800000) == 0x7F800000) { /* a==Inf: EDOM */
|
||||
*out = 0x7fc00001;
|
||||
return "EDOM status=i";
|
||||
}
|
||||
if ((*b & 0x7F800000) == 0x7F800000) { /* b==Inf: return a */
|
||||
*out = *a;
|
||||
return NULL;
|
||||
}
|
||||
if ((*a & 0x7FFFFFFF) == 0) { /* a==0: return a */
|
||||
*out = *a;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* OK. That's the special cases cleared out of the way. Now we
|
||||
* have finite (though not necessarily normal) a and b.
|
||||
*/
|
||||
sign = a[0] & 0x80000000; /* we discard sign of b */
|
||||
test_frexpf(a, &am, (uint32 *)&aex);
|
||||
test_frexpf(b, &bm, (uint32 *)&bex);
|
||||
am &= 0x7FFFFF, am |= 0x800000;
|
||||
bm &= 0x7FFFFF, bm |= 0x800000;
|
||||
|
||||
while (aex >= bex) {
|
||||
if (am >= bm) {
|
||||
am -= bm;
|
||||
}
|
||||
if (aex > bex) {
|
||||
am <<= 1;
|
||||
aex--;
|
||||
} else
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* Renormalise final result; this can be cunningly done by
|
||||
* passing a denormal to ldexp.
|
||||
*/
|
||||
aex += 0x7d;
|
||||
am |= sign;
|
||||
test_ldexpf(&am, (uint32 *)&aex, out);
|
||||
|
||||
return NULL; /* FIXME */
|
||||
}
|
||||
|
||||
char *test_ldexp(uint32 *x, uint32 *np, uint32 *out) {
|
||||
int n = *np;
|
||||
int32 n2;
|
||||
uint32 y[2];
|
||||
int ex = (x[0] >> 20) & 0x7FF; /* exponent */
|
||||
int sign = x[0] & 0x80000000;
|
||||
|
||||
if (ex == 0x7FF) { /* inf/NaN; just return x */
|
||||
out[0] = x[0];
|
||||
out[1] = x[1];
|
||||
return NULL;
|
||||
}
|
||||
if ((x[0] & 0x7FFFFFFF) == 0 && x[1] == 0) { /* zero: return x */
|
||||
out[0] = x[0];
|
||||
out[1] = x[1];
|
||||
return NULL;
|
||||
}
|
||||
|
||||
test_frexp(x, y, (uint32 *)&n2);
|
||||
ex = n + n2;
|
||||
if (ex > 0x400) { /* overflow */
|
||||
out[0] = sign | 0x7FF00000;
|
||||
out[1] = 0;
|
||||
return "overflow";
|
||||
}
|
||||
/*
|
||||
* Underflow. 2^-1074 is 00000000.00000001; so if ex == -1074
|
||||
* then we have something [2^-1075,2^-1074). Under round-to-
|
||||
* nearest-even, this whole interval rounds up to 2^-1074,
|
||||
* except for the bottom endpoint which rounds to even and is
|
||||
* an underflow condition.
|
||||
*
|
||||
* So, ex < -1074 is definite underflow, and ex == -1074 is
|
||||
* underflow iff all mantissa bits are zero.
|
||||
*/
|
||||
if (ex < -1074 || (ex == -1074 && (y[0] & 0xFFFFF) == 0 && y[1] == 0)) {
|
||||
out[0] = sign; /* underflow: correctly signed zero */
|
||||
out[1] = 0;
|
||||
return "underflow";
|
||||
}
|
||||
|
||||
/*
|
||||
* No overflow or underflow; should be nice and simple, unless
|
||||
* we have to denormalise and round the result.
|
||||
*/
|
||||
if (ex < -1021) { /* denormalise and round */
|
||||
uint32 roundword;
|
||||
y[0] &= 0x000FFFFF;
|
||||
y[0] |= 0x00100000; /* set leading bit */
|
||||
roundword = 0;
|
||||
while (ex < -1021) {
|
||||
if (roundword & 1)
|
||||
roundword |= 2; /* preserve sticky bit */
|
||||
roundword = (roundword >> 1) | ((y[1] & 1) << 31);
|
||||
y[1] = (y[1] >> 1) | ((y[0] & 1) << 31);
|
||||
y[0] = y[0] >> 1;
|
||||
ex++;
|
||||
}
|
||||
if (roundword > 0x80000000 || /* round up */
|
||||
(roundword == 0x80000000 && (y[1] & 1))) { /* round up to even */
|
||||
y[1]++;
|
||||
y[0] += (y[1] == 0);
|
||||
}
|
||||
out[0] = sign | y[0];
|
||||
out[1] = y[1];
|
||||
/* Proper ERANGE underflow was handled earlier, but we still
|
||||
* expect an IEEE Underflow exception if this partially
|
||||
* underflowed result is not exact. */
|
||||
if (roundword)
|
||||
return "u";
|
||||
return NULL; /* underflow was handled earlier */
|
||||
} else {
|
||||
out[0] = y[0] + (ex << 20);
|
||||
out[1] = y[1];
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
char *test_ldexpf(uint32 *x, uint32 *np, uint32 *out) {
|
||||
int n = *np;
|
||||
int32 n2;
|
||||
uint32 y;
|
||||
int ex = (*x >> 23) & 0xFF; /* exponent */
|
||||
int sign = *x & 0x80000000;
|
||||
|
||||
if (ex == 0xFF) { /* inf/NaN; just return x */
|
||||
*out = *x;
|
||||
return NULL;
|
||||
}
|
||||
if ((*x & 0x7FFFFFFF) == 0) { /* zero: return x */
|
||||
*out = *x;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
test_frexpf(x, &y, (uint32 *)&n2);
|
||||
ex = n + n2;
|
||||
if (ex > 0x80) { /* overflow */
|
||||
*out = sign | 0x7F800000;
|
||||
return "overflow";
|
||||
}
|
||||
/*
|
||||
* Underflow. 2^-149 is 00000001; so if ex == -149 then we have
|
||||
* something [2^-150,2^-149). Under round-to- nearest-even,
|
||||
* this whole interval rounds up to 2^-149, except for the
|
||||
* bottom endpoint which rounds to even and is an underflow
|
||||
* condition.
|
||||
*
|
||||
* So, ex < -149 is definite underflow, and ex == -149 is
|
||||
* underflow iff all mantissa bits are zero.
|
||||
*/
|
||||
if (ex < -149 || (ex == -149 && (y & 0x7FFFFF) == 0)) {
|
||||
*out = sign; /* underflow: correctly signed zero */
|
||||
return "underflow";
|
||||
}
|
||||
|
||||
/*
|
||||
* No overflow or underflow; should be nice and simple, unless
|
||||
* we have to denormalise and round the result.
|
||||
*/
|
||||
if (ex < -125) { /* denormalise and round */
|
||||
uint32 roundword;
|
||||
y &= 0x007FFFFF;
|
||||
y |= 0x00800000; /* set leading bit */
|
||||
roundword = 0;
|
||||
while (ex < -125) {
|
||||
if (roundword & 1)
|
||||
roundword |= 2; /* preserve sticky bit */
|
||||
roundword = (roundword >> 1) | ((y & 1) << 31);
|
||||
y = y >> 1;
|
||||
ex++;
|
||||
}
|
||||
if (roundword > 0x80000000 || /* round up */
|
||||
(roundword == 0x80000000 && (y & 1))) { /* round up to even */
|
||||
y++;
|
||||
}
|
||||
*out = sign | y;
|
||||
/* Proper ERANGE underflow was handled earlier, but we still
|
||||
* expect an IEEE Underflow exception if this partially
|
||||
* underflowed result is not exact. */
|
||||
if (roundword)
|
||||
return "u";
|
||||
return NULL; /* underflow was handled earlier */
|
||||
} else {
|
||||
*out = y + (ex << 23);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
char *test_frexp(uint32 *x, uint32 *out, uint32 *nout) {
|
||||
int ex = (x[0] >> 20) & 0x7FF; /* exponent */
|
||||
if (ex == 0x7FF) { /* inf/NaN; return x/0 */
|
||||
out[0] = x[0];
|
||||
out[1] = x[1];
|
||||
nout[0] = 0;
|
||||
return NULL;
|
||||
}
|
||||
if (ex == 0) { /* denormals/zeros */
|
||||
int sign;
|
||||
uint32 xh, xl;
|
||||
if ((x[0] & 0x7FFFFFFF) == 0 && x[1] == 0) {
|
||||
/* zero: return x/0 */
|
||||
out[0] = x[0];
|
||||
out[1] = x[1];
|
||||
nout[0] = 0;
|
||||
return NULL;
|
||||
}
|
||||
sign = x[0] & 0x80000000;
|
||||
xh = x[0] & 0x7FFFFFFF;
|
||||
xl = x[1];
|
||||
ex = 1;
|
||||
while (!(xh & 0x100000)) {
|
||||
ex--;
|
||||
xh = (xh << 1) | ((xl >> 31) & 1);
|
||||
xl = (xl & 0x7FFFFFFF) << 1;
|
||||
}
|
||||
out[0] = sign | 0x3FE00000 | (xh & 0xFFFFF);
|
||||
out[1] = xl;
|
||||
nout[0] = ex - 0x3FE;
|
||||
return NULL;
|
||||
}
|
||||
out[0] = 0x3FE00000 | (x[0] & 0x800FFFFF);
|
||||
out[1] = x[1];
|
||||
nout[0] = ex - 0x3FE;
|
||||
return NULL; /* ordinary number; no error */
|
||||
}
|
||||
|
||||
char *test_frexpf(uint32 *x, uint32 *out, uint32 *nout) {
|
||||
int ex = (*x >> 23) & 0xFF; /* exponent */
|
||||
if (ex == 0xFF) { /* inf/NaN; return x/0 */
|
||||
*out = *x;
|
||||
nout[0] = 0;
|
||||
return NULL;
|
||||
}
|
||||
if (ex == 0) { /* denormals/zeros */
|
||||
int sign;
|
||||
uint32 xv;
|
||||
if ((*x & 0x7FFFFFFF) == 0) {
|
||||
/* zero: return x/0 */
|
||||
*out = *x;
|
||||
nout[0] = 0;
|
||||
return NULL;
|
||||
}
|
||||
sign = *x & 0x80000000;
|
||||
xv = *x & 0x7FFFFFFF;
|
||||
ex = 1;
|
||||
while (!(xv & 0x800000)) {
|
||||
ex--;
|
||||
xv = xv << 1;
|
||||
}
|
||||
*out = sign | 0x3F000000 | (xv & 0x7FFFFF);
|
||||
nout[0] = ex - 0x7E;
|
||||
return NULL;
|
||||
}
|
||||
*out = 0x3F000000 | (*x & 0x807FFFFF);
|
||||
nout[0] = ex - 0x7E;
|
||||
return NULL; /* ordinary number; no error */
|
||||
}
|
||||
|
||||
char *test_modf(uint32 *x, uint32 *fout, uint32 *iout) {
|
||||
int ex = (x[0] >> 20) & 0x7FF; /* exponent */
|
||||
int sign = x[0] & 0x80000000;
|
||||
uint32 fh, fl;
|
||||
|
||||
if (((x[0] & 0x7FFFFFFF) | (!!x[1])) > 0x7FF00000) {
|
||||
/*
|
||||
* NaN input: return the same in _both_ outputs.
|
||||
*/
|
||||
fout[0] = iout[0] = x[0];
|
||||
fout[1] = iout[1] = x[1];
|
||||
return NULL;
|
||||
}
|
||||
|
||||
test_rint(x, iout, 0, 0);
|
||||
fh = x[0] - iout[0];
|
||||
fl = x[1] - iout[1];
|
||||
if (!fh && !fl) { /* no fraction part */
|
||||
fout[0] = sign;
|
||||
fout[1] = 0;
|
||||
return NULL;
|
||||
}
|
||||
if (!(iout[0] & 0x7FFFFFFF) && !iout[1]) { /* no integer part */
|
||||
fout[0] = x[0];
|
||||
fout[1] = x[1];
|
||||
return NULL;
|
||||
}
|
||||
while (!(fh & 0x100000)) {
|
||||
ex--;
|
||||
fh = (fh << 1) | ((fl >> 31) & 1);
|
||||
fl = (fl & 0x7FFFFFFF) << 1;
|
||||
}
|
||||
fout[0] = sign | (ex << 20) | (fh & 0xFFFFF);
|
||||
fout[1] = fl;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
char *test_modff(uint32 *x, uint32 *fout, uint32 *iout) {
|
||||
int ex = (*x >> 23) & 0xFF; /* exponent */
|
||||
int sign = *x & 0x80000000;
|
||||
uint32 f;
|
||||
|
||||
if ((*x & 0x7FFFFFFF) > 0x7F800000) {
|
||||
/*
|
||||
* NaN input: return the same in _both_ outputs.
|
||||
*/
|
||||
*fout = *iout = *x;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
test_rintf(x, iout, 0, 0);
|
||||
f = *x - *iout;
|
||||
if (!f) { /* no fraction part */
|
||||
*fout = sign;
|
||||
return NULL;
|
||||
}
|
||||
if (!(*iout & 0x7FFFFFFF)) { /* no integer part */
|
||||
*fout = *x;
|
||||
return NULL;
|
||||
}
|
||||
while (!(f & 0x800000)) {
|
||||
ex--;
|
||||
f = f << 1;
|
||||
}
|
||||
*fout = sign | (ex << 23) | (f & 0x7FFFFF);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
char *test_copysign(uint32 *x, uint32 *y, uint32 *out)
|
||||
{
|
||||
int ysign = y[0] & 0x80000000;
|
||||
int xhigh = x[0] & 0x7fffffff;
|
||||
|
||||
out[0] = ysign | xhigh;
|
||||
out[1] = x[1];
|
||||
|
||||
/* There can be no error */
|
||||
return NULL;
|
||||
}
|
||||
|
||||
char *test_copysignf(uint32 *x, uint32 *y, uint32 *out)
|
||||
{
|
||||
int ysign = y[0] & 0x80000000;
|
||||
int xhigh = x[0] & 0x7fffffff;
|
||||
|
||||
out[0] = ysign | xhigh;
|
||||
|
||||
/* There can be no error */
|
||||
return NULL;
|
||||
}
|
||||
|
||||
char *test_isfinite(uint32 *x, uint32 *out)
|
||||
{
|
||||
int xhigh = x[0];
|
||||
/* Being finite means that the exponent is not 0x7ff */
|
||||
if ((xhigh & 0x7ff00000) == 0x7ff00000) out[0] = 0;
|
||||
else out[0] = 1;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
char *test_isfinitef(uint32 *x, uint32 *out)
|
||||
{
|
||||
/* Being finite means that the exponent is not 0xff */
|
||||
if ((x[0] & 0x7f800000) == 0x7f800000) out[0] = 0;
|
||||
else out[0] = 1;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
char *test_isinff(uint32 *x, uint32 *out)
|
||||
{
|
||||
/* Being infinite means that our bottom 30 bits equate to 0x7f800000 */
|
||||
if ((x[0] & 0x7fffffff) == 0x7f800000) out[0] = 1;
|
||||
else out[0] = 0;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
char *test_isinf(uint32 *x, uint32 *out)
|
||||
{
|
||||
int xhigh = x[0];
|
||||
int xlow = x[1];
|
||||
/* Being infinite means that our fraction is zero and exponent is 0x7ff */
|
||||
if (((xhigh & 0x7fffffff) == 0x7ff00000) && (xlow == 0)) out[0] = 1;
|
||||
else out[0] = 0;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
char *test_isnanf(uint32 *x, uint32 *out)
|
||||
{
|
||||
/* Being NaN means that our exponent is 0xff and non-0 fraction */
|
||||
int exponent = x[0] & 0x7f800000;
|
||||
int fraction = x[0] & 0x007fffff;
|
||||
if ((exponent == 0x7f800000) && (fraction != 0)) out[0] = 1;
|
||||
else out[0] = 0;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
char *test_isnan(uint32 *x, uint32 *out)
|
||||
{
|
||||
/* Being NaN means that our exponent is 0x7ff and non-0 fraction */
|
||||
int exponent = x[0] & 0x7ff00000;
|
||||
int fractionhigh = x[0] & 0x000fffff;
|
||||
if ((exponent == 0x7ff00000) && ((fractionhigh != 0) || x[1] != 0))
|
||||
out[0] = 1;
|
||||
else out[0] = 0;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
char *test_isnormalf(uint32 *x, uint32 *out)
|
||||
{
|
||||
/* Being normal means exponent is not 0 and is not 0xff */
|
||||
int exponent = x[0] & 0x7f800000;
|
||||
if (exponent == 0x7f800000) out[0] = 0;
|
||||
else if (exponent == 0) out[0] = 0;
|
||||
else out[0] = 1;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
char *test_isnormal(uint32 *x, uint32 *out)
|
||||
{
|
||||
/* Being normal means exponent is not 0 and is not 0x7ff */
|
||||
int exponent = x[0] & 0x7ff00000;
|
||||
if (exponent == 0x7ff00000) out[0] = 0;
|
||||
else if (exponent == 0) out[0] = 0;
|
||||
else out[0] = 1;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
char *test_signbitf(uint32 *x, uint32 *out)
|
||||
{
|
||||
/* Sign bit is bit 31 */
|
||||
out[0] = (x[0] >> 31) & 1;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
char *test_signbit(uint32 *x, uint32 *out)
|
||||
{
|
||||
/* Sign bit is bit 31 */
|
||||
out[0] = (x[0] >> 31) & 1;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
char *test_fpclassify(uint32 *x, uint32 *out)
|
||||
{
|
||||
int exponent = (x[0] & 0x7ff00000) >> 20;
|
||||
int fraction = (x[0] & 0x000fffff) | x[1];
|
||||
|
||||
if ((exponent == 0x00) && (fraction == 0)) out[0] = 0;
|
||||
else if ((exponent == 0x00) && (fraction != 0)) out[0] = 4;
|
||||
else if ((exponent == 0x7ff) && (fraction == 0)) out[0] = 3;
|
||||
else if ((exponent == 0x7ff) && (fraction != 0)) out[0] = 7;
|
||||
else out[0] = 5;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
char *test_fpclassifyf(uint32 *x, uint32 *out)
|
||||
{
|
||||
int exponent = (x[0] & 0x7f800000) >> 23;
|
||||
int fraction = x[0] & 0x007fffff;
|
||||
|
||||
if ((exponent == 0x000) && (fraction == 0)) out[0] = 0;
|
||||
else if ((exponent == 0x000) && (fraction != 0)) out[0] = 4;
|
||||
else if ((exponent == 0xff) && (fraction == 0)) out[0] = 3;
|
||||
else if ((exponent == 0xff) && (fraction != 0)) out[0] = 7;
|
||||
else out[0] = 5;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Internal function that compares doubles in x & y and returns -3, -2, -1, 0,
|
||||
* 1 if they compare to be signaling, unordered, less than, equal or greater
|
||||
* than.
|
||||
*/
|
||||
static int fpcmp4(uint32 *x, uint32 *y)
|
||||
{
|
||||
int result = 0;
|
||||
|
||||
/*
|
||||
* Sort out whether results are ordered or not to begin with
|
||||
* NaNs have exponent 0x7ff, and non-zero fraction. Signaling NaNs take
|
||||
* higher priority than quiet ones.
|
||||
*/
|
||||
if ((x[0] & 0x7fffffff) >= 0x7ff80000) result = -2;
|
||||
else if ((x[0] & 0x7fffffff) > 0x7ff00000) result = -3;
|
||||
else if (((x[0] & 0x7fffffff) == 0x7ff00000) && (x[1] != 0)) result = -3;
|
||||
if ((y[0] & 0x7fffffff) >= 0x7ff80000 && result != -3) result = -2;
|
||||
else if ((y[0] & 0x7fffffff) > 0x7ff00000) result = -3;
|
||||
else if (((y[0] & 0x7fffffff) == 0x7ff00000) && (y[1] != 0)) result = -3;
|
||||
if (result != 0) return result;
|
||||
|
||||
/*
|
||||
* The two forms of zero are equal
|
||||
*/
|
||||
if (((x[0] & 0x7fffffff) == 0) && x[1] == 0 &&
|
||||
((y[0] & 0x7fffffff) == 0) && y[1] == 0)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* If x and y have different signs we can tell that they're not equal
|
||||
* If x is +ve we have x > y return 1 - otherwise y is +ve return -1
|
||||
*/
|
||||
if ((x[0] >> 31) != (y[0] >> 31))
|
||||
return ((x[0] >> 31) == 0) - ((y[0] >> 31) == 0);
|
||||
|
||||
/*
|
||||
* Now we have both signs the same, let's do an initial compare of the
|
||||
* values.
|
||||
*
|
||||
* Whoever designed IEEE754's floating point formats is very clever and
|
||||
* earns my undying admiration. Once you remove the sign-bit, the
|
||||
* floating point numbers can be ordered using the standard <, ==, >
|
||||
* operators will treating the fp-numbers as integers with that bit-
|
||||
* pattern.
|
||||
*/
|
||||
if ((x[0] & 0x7fffffff) < (y[0] & 0x7fffffff)) result = -1;
|
||||
else if ((x[0] & 0x7fffffff) > (y[0] & 0x7fffffff)) result = 1;
|
||||
else if (x[1] < y[1]) result = -1;
|
||||
else if (x[1] > y[1]) result = 1;
|
||||
else result = 0;
|
||||
|
||||
/*
|
||||
* Now we return the result - is x is positive (and therefore so is y) we
|
||||
* return the plain result - otherwise we negate it and return.
|
||||
*/
|
||||
if ((x[0] >> 31) == 0) return result;
|
||||
else return -result;
|
||||
}
|
||||
|
||||
/*
|
||||
* Internal function that compares floats in x & y and returns -3, -2, -1, 0,
|
||||
* 1 if they compare to be signaling, unordered, less than, equal or greater
|
||||
* than.
|
||||
*/
|
||||
static int fpcmp4f(uint32 *x, uint32 *y)
|
||||
{
|
||||
int result = 0;
|
||||
|
||||
/*
|
||||
* Sort out whether results are ordered or not to begin with
|
||||
* NaNs have exponent 0xff, and non-zero fraction - we have to handle all
|
||||
* signaling cases over the quiet ones
|
||||
*/
|
||||
if ((x[0] & 0x7fffffff) >= 0x7fc00000) result = -2;
|
||||
else if ((x[0] & 0x7fffffff) > 0x7f800000) result = -3;
|
||||
if ((y[0] & 0x7fffffff) >= 0x7fc00000 && result != -3) result = -2;
|
||||
else if ((y[0] & 0x7fffffff) > 0x7f800000) result = -3;
|
||||
if (result != 0) return result;
|
||||
|
||||
/*
|
||||
* The two forms of zero are equal
|
||||
*/
|
||||
if (((x[0] & 0x7fffffff) == 0) && ((y[0] & 0x7fffffff) == 0))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* If x and y have different signs we can tell that they're not equal
|
||||
* If x is +ve we have x > y return 1 - otherwise y is +ve return -1
|
||||
*/
|
||||
if ((x[0] >> 31) != (y[0] >> 31))
|
||||
return ((x[0] >> 31) == 0) - ((y[0] >> 31) == 0);
|
||||
|
||||
/*
|
||||
* Now we have both signs the same, let's do an initial compare of the
|
||||
* values.
|
||||
*
|
||||
* Whoever designed IEEE754's floating point formats is very clever and
|
||||
* earns my undying admiration. Once you remove the sign-bit, the
|
||||
* floating point numbers can be ordered using the standard <, ==, >
|
||||
* operators will treating the fp-numbers as integers with that bit-
|
||||
* pattern.
|
||||
*/
|
||||
if ((x[0] & 0x7fffffff) < (y[0] & 0x7fffffff)) result = -1;
|
||||
else if ((x[0] & 0x7fffffff) > (y[0] & 0x7fffffff)) result = 1;
|
||||
else result = 0;
|
||||
|
||||
/*
|
||||
* Now we return the result - is x is positive (and therefore so is y) we
|
||||
* return the plain result - otherwise we negate it and return.
|
||||
*/
|
||||
if ((x[0] >> 31) == 0) return result;
|
||||
else return -result;
|
||||
}
|
||||
|
||||
char *test_isgreater(uint32 *x, uint32 *y, uint32 *out)
|
||||
{
|
||||
int result = fpcmp4(x, y);
|
||||
*out = (result == 1);
|
||||
return result == -3 ? "i" : NULL;
|
||||
}
|
||||
|
||||
char *test_isgreaterequal(uint32 *x, uint32 *y, uint32 *out)
|
||||
{
|
||||
int result = fpcmp4(x, y);
|
||||
*out = (result >= 0);
|
||||
return result == -3 ? "i" : NULL;
|
||||
}
|
||||
|
||||
char *test_isless(uint32 *x, uint32 *y, uint32 *out)
|
||||
{
|
||||
int result = fpcmp4(x, y);
|
||||
*out = (result == -1);
|
||||
return result == -3 ? "i" : NULL;
|
||||
}
|
||||
|
||||
char *test_islessequal(uint32 *x, uint32 *y, uint32 *out)
|
||||
{
|
||||
int result = fpcmp4(x, y);
|
||||
*out = (result == -1) || (result == 0);
|
||||
return result == -3 ? "i" : NULL;
|
||||
}
|
||||
|
||||
char *test_islessgreater(uint32 *x, uint32 *y, uint32 *out)
|
||||
{
|
||||
int result = fpcmp4(x, y);
|
||||
*out = (result == -1) || (result == 1);
|
||||
return result == -3 ? "i" : NULL;
|
||||
}
|
||||
|
||||
char *test_isunordered(uint32 *x, uint32 *y, uint32 *out)
|
||||
{
|
||||
int normal = 0;
|
||||
int result = fpcmp4(x, y);
|
||||
|
||||
test_isnormal(x, out);
|
||||
normal |= *out;
|
||||
test_isnormal(y, out);
|
||||
normal |= *out;
|
||||
*out = (result == -2) || (result == -3);
|
||||
return result == -3 ? "i" : NULL;
|
||||
}
|
||||
|
||||
char *test_isgreaterf(uint32 *x, uint32 *y, uint32 *out)
|
||||
{
|
||||
int result = fpcmp4f(x, y);
|
||||
*out = (result == 1);
|
||||
return result == -3 ? "i" : NULL;
|
||||
}
|
||||
|
||||
char *test_isgreaterequalf(uint32 *x, uint32 *y, uint32 *out)
|
||||
{
|
||||
int result = fpcmp4f(x, y);
|
||||
*out = (result >= 0);
|
||||
return result == -3 ? "i" : NULL;
|
||||
}
|
||||
|
||||
char *test_islessf(uint32 *x, uint32 *y, uint32 *out)
|
||||
{
|
||||
int result = fpcmp4f(x, y);
|
||||
*out = (result == -1);
|
||||
return result == -3 ? "i" : NULL;
|
||||
}
|
||||
|
||||
char *test_islessequalf(uint32 *x, uint32 *y, uint32 *out)
|
||||
{
|
||||
int result = fpcmp4f(x, y);
|
||||
*out = (result == -1) || (result == 0);
|
||||
return result == -3 ? "i" : NULL;
|
||||
}
|
||||
|
||||
char *test_islessgreaterf(uint32 *x, uint32 *y, uint32 *out)
|
||||
{
|
||||
int result = fpcmp4f(x, y);
|
||||
*out = (result == -1) || (result == 1);
|
||||
return result == -3 ? "i" : NULL;
|
||||
}
|
||||
|
||||
char *test_isunorderedf(uint32 *x, uint32 *y, uint32 *out)
|
||||
{
|
||||
int normal = 0;
|
||||
int result = fpcmp4f(x, y);
|
||||
|
||||
test_isnormalf(x, out);
|
||||
normal |= *out;
|
||||
test_isnormalf(y, out);
|
||||
normal |= *out;
|
||||
*out = (result == -2) || (result == -3);
|
||||
return result == -3 ? "i" : NULL;
|
||||
}
|
||||
@@ -0,0 +1,53 @@
|
||||
/*
|
||||
* semi.h: header for semi.c
|
||||
*
|
||||
* Copyright (c) 1999-2018, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#ifndef test_semi_h
|
||||
#define test_semi_h
|
||||
|
||||
#include "types.h"
|
||||
|
||||
char *test_ceil(uint32 *in, uint32 *out);
|
||||
char *test_floor(uint32 *in, uint32 *out);
|
||||
char *test_fmod(uint32 *a, uint32 *b, uint32 *out);
|
||||
char *test_ldexp(uint32 *x, uint32 *n, uint32 *out);
|
||||
char *test_frexp(uint32 *x, uint32 *out, uint32 *nout);
|
||||
char *test_modf(uint32 *x, uint32 *iout, uint32 *fout);
|
||||
char *test_ceilf(uint32 *in, uint32 *out);
|
||||
char *test_floorf(uint32 *in, uint32 *out);
|
||||
char *test_fmodf(uint32 *a, uint32 *b, uint32 *out);
|
||||
char *test_ldexpf(uint32 *x, uint32 *n, uint32 *out);
|
||||
char *test_frexpf(uint32 *x, uint32 *out, uint32 *nout);
|
||||
char *test_modff(uint32 *x, uint32 *iout, uint32 *fout);
|
||||
|
||||
char *test_copysign(uint32 *x, uint32 *y, uint32 *out);
|
||||
char *test_copysignf(uint32 *x, uint32 *y, uint32 *out);
|
||||
char *test_isfinite(uint32 *x, uint32 *out);
|
||||
char *test_isfinitef(uint32 *x, uint32 *out);
|
||||
char *test_isinf(uint32 *x, uint32 *out);
|
||||
char *test_isinff(uint32 *x, uint32 *out);
|
||||
char *test_isnan(uint32 *x, uint32 *out);
|
||||
char *test_isnanf(uint32 *x, uint32 *out);
|
||||
char *test_isnormal(uint32 *x, uint32 *out);
|
||||
char *test_isnormalf(uint32 *x, uint32 *out);
|
||||
char *test_signbit(uint32 *x, uint32 *out);
|
||||
char *test_signbitf(uint32 *x, uint32 *out);
|
||||
char *test_fpclassify(uint32 *x, uint32 *out);
|
||||
char *test_fpclassifyf(uint32 *x, uint32 *out);
|
||||
|
||||
char *test_isgreater(uint32 *x, uint32 *y, uint32 *out);
|
||||
char *test_isgreaterequal(uint32 *x, uint32 *y, uint32 *out);
|
||||
char *test_isless(uint32 *x, uint32 *y, uint32 *out);
|
||||
char *test_islessequal(uint32 *x, uint32 *y, uint32 *out);
|
||||
char *test_islessgreater(uint32 *x, uint32 *y, uint32 *out);
|
||||
char *test_isunordered(uint32 *x, uint32 *y, uint32 *out);
|
||||
char *test_isgreaterf(uint32 *x, uint32 *y, uint32 *out);
|
||||
char *test_isgreaterequalf(uint32 *x, uint32 *y, uint32 *out);
|
||||
char *test_islessf(uint32 *x, uint32 *y, uint32 *out);
|
||||
char *test_islessequalf(uint32 *x, uint32 *y, uint32 *out);
|
||||
char *test_islessgreaterf(uint32 *x, uint32 *y, uint32 *out);
|
||||
char *test_isunorderedf(uint32 *x, uint32 *y, uint32 *out);
|
||||
#endif
|
||||
@@ -0,0 +1,25 @@
|
||||
/*
|
||||
* types.h
|
||||
*
|
||||
* Copyright (c) 2005-2018, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#ifndef mathtest_types_h
|
||||
#define mathtest_types_h
|
||||
|
||||
#include <limits.h>
|
||||
|
||||
#if UINT_MAX == 4294967295
|
||||
typedef unsigned int uint32;
|
||||
typedef int int32;
|
||||
#define I32 ""
|
||||
#elif ULONG_MAX == 4294967295
|
||||
typedef unsigned long uint32;
|
||||
typedef long int32;
|
||||
#define I32 "l"
|
||||
#else
|
||||
#error Could not find an unsigned 32-bit integer type
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,261 @@
|
||||
/*
|
||||
* wrappers.c - wrappers to modify output of MPFR/MPC test functions
|
||||
*
|
||||
* Copyright (c) 2014-2018, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "intern.h"
|
||||
|
||||
void wrapper_init(wrapperctx *ctx)
|
||||
{
|
||||
int i;
|
||||
ctx->nops = ctx->nresults = 0;
|
||||
for (i = 0; i < 2; i++) {
|
||||
ctx->mpfr_ops[i] = NULL;
|
||||
ctx->mpc_ops[i] = NULL;
|
||||
ctx->ieee_ops[i] = NULL;
|
||||
}
|
||||
ctx->mpfr_result = NULL;
|
||||
ctx->mpc_result = NULL;
|
||||
ctx->ieee_result = NULL;
|
||||
ctx->need_regen = 0;
|
||||
}
|
||||
|
||||
void wrapper_op_real(wrapperctx *ctx, const mpfr_t r,
|
||||
int size, const uint32 *ieee)
|
||||
{
|
||||
assert(ctx->nops < 2);
|
||||
ctx->mpfr_ops[ctx->nops] = r;
|
||||
ctx->ieee_ops[ctx->nops] = ieee;
|
||||
ctx->size_ops[ctx->nops] = size;
|
||||
ctx->nops++;
|
||||
}
|
||||
|
||||
void wrapper_op_complex(wrapperctx *ctx, const mpc_t c,
|
||||
int size, const uint32 *ieee)
|
||||
{
|
||||
assert(ctx->nops < 2);
|
||||
ctx->mpc_ops[ctx->nops] = c;
|
||||
ctx->ieee_ops[ctx->nops] = ieee;
|
||||
ctx->size_ops[ctx->nops] = size;
|
||||
ctx->nops++;
|
||||
}
|
||||
|
||||
void wrapper_result_real(wrapperctx *ctx, mpfr_t r,
|
||||
int size, uint32 *ieee)
|
||||
{
|
||||
assert(ctx->nresults < 1);
|
||||
ctx->mpfr_result = r;
|
||||
ctx->ieee_result = ieee;
|
||||
ctx->size_result = size;
|
||||
ctx->nresults++;
|
||||
}
|
||||
|
||||
void wrapper_result_complex(wrapperctx *ctx, mpc_t c,
|
||||
int size, uint32 *ieee)
|
||||
{
|
||||
assert(ctx->nresults < 1);
|
||||
ctx->mpc_result = c;
|
||||
ctx->ieee_result = ieee;
|
||||
ctx->size_result = size;
|
||||
ctx->nresults++;
|
||||
}
|
||||
|
||||
int wrapper_run(wrapperctx *ctx, wrapperfunc wrappers[MAXWRAPPERS])
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < MAXWRAPPERS && wrappers[i]; i++)
|
||||
wrappers[i](ctx);
|
||||
universal_wrapper(ctx);
|
||||
return ctx->need_regen;
|
||||
}
|
||||
|
||||
mpfr_srcptr wrapper_get_mpfr(wrapperctx *ctx, int op)
|
||||
{
|
||||
if (op < 0) {
|
||||
assert(ctx->mpfr_result);
|
||||
return ctx->mpfr_result;
|
||||
} else {
|
||||
assert(ctx->mpfr_ops[op]);
|
||||
return ctx->mpfr_ops[op];
|
||||
}
|
||||
}
|
||||
|
||||
const uint32 *wrapper_get_ieee(wrapperctx *ctx, int op)
|
||||
{
|
||||
if (op < 0) {
|
||||
assert(ctx->mpfr_result);
|
||||
return ctx->ieee_result;
|
||||
} else {
|
||||
assert(ctx->mpfr_ops[op]);
|
||||
return ctx->ieee_ops[op];
|
||||
}
|
||||
}
|
||||
|
||||
int wrapper_get_nops(wrapperctx *ctx)
|
||||
{
|
||||
return ctx->nops;
|
||||
}
|
||||
|
||||
int wrapper_get_size(wrapperctx *ctx, int op)
|
||||
{
|
||||
if (op < 0) {
|
||||
assert(ctx->mpfr_result || ctx->mpc_result);
|
||||
return ctx->size_result;
|
||||
} else {
|
||||
assert(ctx->mpfr_ops[op] || ctx->mpc_ops[op]);
|
||||
return ctx->size_ops[op];
|
||||
}
|
||||
}
|
||||
|
||||
int wrapper_is_complex(wrapperctx *ctx, int op)
|
||||
{
|
||||
if (op < 0) {
|
||||
assert(ctx->mpfr_result || ctx->mpc_result);
|
||||
return ctx->mpc_result != NULL;
|
||||
} else {
|
||||
assert(ctx->mpfr_ops[op] || ctx->mpc_ops[op]);
|
||||
return ctx->mpc_ops[op] != NULL;
|
||||
}
|
||||
}
|
||||
|
||||
mpc_srcptr wrapper_get_mpc(wrapperctx *ctx, int op)
|
||||
{
|
||||
if (op < 0) {
|
||||
assert(ctx->mpc_result);
|
||||
return ctx->mpc_result;
|
||||
} else {
|
||||
assert(ctx->mpc_ops[op]);
|
||||
return ctx->mpc_ops[op];
|
||||
}
|
||||
}
|
||||
|
||||
mpfr_srcptr wrapper_get_mpfr_r(wrapperctx *ctx, int op)
|
||||
{
|
||||
if (op < 0) {
|
||||
assert(ctx->mpc_result);
|
||||
return mpc_realref(ctx->mpc_result);
|
||||
} else {
|
||||
assert(ctx->mpc_ops[op]);
|
||||
return mpc_realref(ctx->mpc_ops[op]);
|
||||
}
|
||||
}
|
||||
|
||||
mpfr_srcptr wrapper_get_mpfr_i(wrapperctx *ctx, int op)
|
||||
{
|
||||
if (op < 0) {
|
||||
assert(ctx->mpc_result);
|
||||
return mpc_imagref(ctx->mpc_result);
|
||||
} else {
|
||||
assert(ctx->mpc_ops[op]);
|
||||
return mpc_imagref(ctx->mpc_ops[op]);
|
||||
}
|
||||
}
|
||||
|
||||
const uint32 *wrapper_get_ieee_r(wrapperctx *ctx, int op)
|
||||
{
|
||||
if (op < 0) {
|
||||
assert(ctx->mpc_result);
|
||||
return ctx->ieee_result;
|
||||
} else {
|
||||
assert(ctx->mpc_ops[op]);
|
||||
return ctx->ieee_ops[op];
|
||||
}
|
||||
}
|
||||
|
||||
const uint32 *wrapper_get_ieee_i(wrapperctx *ctx, int op)
|
||||
{
|
||||
if (op < 0) {
|
||||
assert(ctx->mpc_result);
|
||||
return ctx->ieee_result + 4;
|
||||
} else {
|
||||
assert(ctx->mpc_ops[op]);
|
||||
return ctx->ieee_ops[op] + 2;
|
||||
}
|
||||
}
|
||||
|
||||
void wrapper_set_sign(wrapperctx *ctx, uint32 sign)
|
||||
{
|
||||
assert(ctx->mpfr_result);
|
||||
ctx->ieee_result[0] |= (sign & 0x80000000U);
|
||||
}
|
||||
|
||||
void wrapper_set_sign_r(wrapperctx *ctx, uint32 sign)
|
||||
{
|
||||
assert(ctx->mpc_result);
|
||||
ctx->ieee_result[0] |= (sign & 0x80000000U);
|
||||
}
|
||||
|
||||
void wrapper_set_sign_i(wrapperctx *ctx, uint32 sign)
|
||||
{
|
||||
assert(ctx->mpc_result);
|
||||
ctx->ieee_result[4] |= (sign & 0x80000000U);
|
||||
}
|
||||
|
||||
void wrapper_set_nan(wrapperctx *ctx)
|
||||
{
|
||||
assert(ctx->mpfr_result);
|
||||
mpfr_set_nan(ctx->mpfr_result);
|
||||
ctx->need_regen = 1;
|
||||
}
|
||||
|
||||
void wrapper_set_nan_r(wrapperctx *ctx)
|
||||
{
|
||||
assert(ctx->mpc_result);
|
||||
mpfr_set_nan(mpc_realref(ctx->mpc_result)); /* FIXME: better way? */
|
||||
ctx->need_regen = 1;
|
||||
}
|
||||
|
||||
void wrapper_set_nan_i(wrapperctx *ctx)
|
||||
{
|
||||
assert(ctx->mpc_result);
|
||||
mpfr_set_nan(mpc_imagref(ctx->mpc_result)); /* FIXME: better way? */
|
||||
ctx->need_regen = 1;
|
||||
}
|
||||
|
||||
void wrapper_set_int(wrapperctx *ctx, int val)
|
||||
{
|
||||
assert(ctx->mpfr_result);
|
||||
mpfr_set_si(ctx->mpfr_result, val, GMP_RNDN);
|
||||
ctx->need_regen = 1;
|
||||
}
|
||||
|
||||
void wrapper_set_int_r(wrapperctx *ctx, int val)
|
||||
{
|
||||
assert(ctx->mpc_result);
|
||||
mpfr_set_si(mpc_realref(ctx->mpc_result), val, GMP_RNDN);
|
||||
ctx->need_regen = 1;
|
||||
}
|
||||
|
||||
void wrapper_set_int_i(wrapperctx *ctx, int val)
|
||||
{
|
||||
assert(ctx->mpc_result);
|
||||
mpfr_set_si(mpc_realref(ctx->mpc_result), val, GMP_RNDN);
|
||||
ctx->need_regen = 1;
|
||||
}
|
||||
|
||||
void wrapper_set_mpfr(wrapperctx *ctx, const mpfr_t val)
|
||||
{
|
||||
assert(ctx->mpfr_result);
|
||||
mpfr_set(ctx->mpfr_result, val, GMP_RNDN);
|
||||
ctx->need_regen = 1;
|
||||
}
|
||||
|
||||
void wrapper_set_mpfr_r(wrapperctx *ctx, const mpfr_t val)
|
||||
{
|
||||
assert(ctx->mpc_result);
|
||||
mpfr_set(mpc_realref(ctx->mpc_result), val, GMP_RNDN);
|
||||
ctx->need_regen = 1;
|
||||
}
|
||||
|
||||
void wrapper_set_mpfr_i(wrapperctx *ctx, const mpfr_t val)
|
||||
{
|
||||
assert(ctx->mpc_result);
|
||||
mpfr_set(mpc_realref(ctx->mpc_result), val, GMP_RNDN);
|
||||
ctx->need_regen = 1;
|
||||
}
|
||||
@@ -0,0 +1,114 @@
|
||||
/*
|
||||
* wrappers.h - wrappers to modify output of MPFR/MPC test functions
|
||||
*
|
||||
* Copyright (c) 2014-2018, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
typedef struct {
|
||||
/* Structure type should be considered opaque outside wrappers.c,
|
||||
* though we have to define it here so its size is known. */
|
||||
int nops;
|
||||
int nresults;
|
||||
mpfr_srcptr mpfr_ops[2];
|
||||
mpfr_ptr mpfr_result;
|
||||
mpc_srcptr mpc_ops[2];
|
||||
mpc_ptr mpc_result;
|
||||
const uint32 *ieee_ops[2];
|
||||
uint32 *ieee_result;
|
||||
int size_ops[2];
|
||||
int size_result;
|
||||
int need_regen;
|
||||
} wrapperctx;
|
||||
|
||||
typedef void (*wrapperfunc)(wrapperctx *ctx);
|
||||
#define MAXWRAPPERS 3
|
||||
|
||||
/*
|
||||
* Functions for the test harness to call.
|
||||
*
|
||||
* When the test harness executes a test function, it should
|
||||
* initialise a wrapperctx with wrapper_init, then provide all the
|
||||
* operands and results in both mpfr/mpc and IEEE (+ extrabits)
|
||||
* formats via wrapper_op_* and wrapper_result_*. Then it should run
|
||||
* the function's wrappers using wrapper_run(), and if that returns
|
||||
* true then the primary result has been rewritten in mpfr/mpc format
|
||||
* and it should therefore retranslate into IEEE.
|
||||
*
|
||||
* 'size' in all prototypes below represents an FP type by giving the
|
||||
* number of 32-bit words it requires, so 1=float and 2=double. Input
|
||||
* operands will be that many words (or that many for both their real
|
||||
* and imag parts); outputs will have one extra word for 'extrabits'.
|
||||
*
|
||||
* This system only applies at all to reference functions using
|
||||
* mpfr/mpc. The seminumerical functions we implement in pure IEEE
|
||||
* form are expected to handle all their own special cases correctly.
|
||||
*/
|
||||
|
||||
void wrapper_init(wrapperctx *ctx);
|
||||
|
||||
/* Real operand. */
|
||||
void wrapper_op_real(wrapperctx *ctx, const mpfr_t r,
|
||||
int size, const uint32 *ieee);
|
||||
|
||||
/* Complex operand. Real part starts at ieee[0], the imag part at ieee[2]. */
|
||||
void wrapper_op_complex(wrapperctx *ctx, const mpc_t c,
|
||||
int size, const uint32 *ieee);
|
||||
|
||||
/* Real result. ieee contains size+1 words, as discussed above. */
|
||||
void wrapper_result_real(wrapperctx *ctx, mpfr_t r,
|
||||
int size, uint32 *ieee);
|
||||
|
||||
/* Complex result. ieee contains size+1 words of real part starting at
|
||||
* ieee[0], and another size+1 of imag part starting at ieee[4]. */
|
||||
void wrapper_result_complex(wrapperctx *ctx, mpc_t c,
|
||||
int size, uint32 *ieee);
|
||||
|
||||
int wrapper_run(wrapperctx *ctx, wrapperfunc wrappers[MAXWRAPPERS]);
|
||||
|
||||
/*
|
||||
* Functions for wrappers to call. 'op' indicates which operand is
|
||||
* being requested: 0,1 means first and second, and -1 means the
|
||||
* result.
|
||||
*/
|
||||
|
||||
mpfr_srcptr wrapper_get_mpfr(wrapperctx *ctx, int op);
|
||||
const uint32 *wrapper_get_ieee(wrapperctx *ctx, int op);
|
||||
|
||||
mpc_srcptr wrapper_get_mpc(wrapperctx *ctx, int op);
|
||||
mpfr_srcptr wrapper_get_mpfr_r(wrapperctx *ctx, int op);
|
||||
mpfr_srcptr wrapper_get_mpfr_i(wrapperctx *ctx, int op);
|
||||
const uint32 *wrapper_get_ieee_r(wrapperctx *ctx, int op);
|
||||
const uint32 *wrapper_get_ieee_i(wrapperctx *ctx, int op);
|
||||
|
||||
/* Query operand count + types */
|
||||
int wrapper_get_nops(wrapperctx *ctx);
|
||||
int wrapper_get_size(wrapperctx *ctx, int op);
|
||||
int wrapper_is_complex(wrapperctx *ctx, int op);
|
||||
|
||||
/* Change just the sign of the result. Only the top bit of 'sign' is used. */
|
||||
void wrapper_set_sign(wrapperctx *ctx, uint32 sign);
|
||||
void wrapper_set_sign_r(wrapperctx *ctx, uint32 sign);
|
||||
void wrapper_set_sign_i(wrapperctx *ctx, uint32 sign);
|
||||
|
||||
/* Set a result to NaN. */
|
||||
void wrapper_set_nan(wrapperctx *ctx);
|
||||
void wrapper_set_nan_r(wrapperctx *ctx);
|
||||
void wrapper_set_nan_i(wrapperctx *ctx);
|
||||
|
||||
/* Set a result to an integer value (converted to the appropriate
|
||||
* float format). */
|
||||
void wrapper_set_int(wrapperctx *ctx, int val);
|
||||
void wrapper_set_int_r(wrapperctx *ctx, int val);
|
||||
void wrapper_set_int_i(wrapperctx *ctx, int val);
|
||||
|
||||
/* Set a result to a new MPFR float. */
|
||||
void wrapper_set_mpfr(wrapperctx *ctx, const mpfr_t val);
|
||||
void wrapper_set_mpfr_r(wrapperctx *ctx, const mpfr_t val);
|
||||
void wrapper_set_mpfr_i(wrapperctx *ctx, const mpfr_t val);
|
||||
|
||||
/*
|
||||
* A universal wrapper called for _all_ functions, that doesn't have
|
||||
* to be specified individually everywhere.
|
||||
*/
|
||||
void universal_wrapper(wrapperctx *ctx);
|
||||
@@ -0,0 +1,294 @@
|
||||
#!/bin/bash
|
||||
|
||||
# ULP error check script.
|
||||
#
|
||||
# Copyright (c) 2019, Arm Limited.
|
||||
# SPDX-License-Identifier: MIT
|
||||
|
||||
#set -x
|
||||
set -eu
|
||||
|
||||
# cd to bin directory.
|
||||
cd "${0%/*}"
|
||||
|
||||
rmodes='n u d z'
|
||||
#rmodes=n
|
||||
flags="${ULPFLAGS:--q}"
|
||||
emu="$@"
|
||||
|
||||
FAIL=0
|
||||
PASS=0
|
||||
|
||||
t() {
|
||||
[ $r = "n" ] && Lt=$L || Lt=$Ldir
|
||||
$emu ./ulp -r $r -e $Lt $flags "$@" && PASS=$((PASS+1)) || FAIL=$((FAIL+1))
|
||||
}
|
||||
|
||||
check() {
|
||||
$emu ./ulp -f -q "$@" >/dev/null
|
||||
}
|
||||
|
||||
Ldir=0.5
|
||||
for r in $rmodes
|
||||
do
|
||||
L=0.01
|
||||
t exp 0 0xffff000000000000 10000
|
||||
t exp 0x1p-6 0x1p6 40000
|
||||
t exp -0x1p-6 -0x1p6 40000
|
||||
t exp 633.3 733.3 10000
|
||||
t exp -633.3 -777.3 10000
|
||||
|
||||
L=0.01
|
||||
t exp2 0 0xffff000000000000 10000
|
||||
t exp2 0x1p-6 0x1p6 40000
|
||||
t exp2 -0x1p-6 -0x1p6 40000
|
||||
t exp2 633.3 733.3 10000
|
||||
t exp2 -633.3 -777.3 10000
|
||||
|
||||
L=0.02
|
||||
t log 0 0xffff000000000000 10000
|
||||
t log 0x1p-4 0x1p4 40000
|
||||
t log 0 inf 40000
|
||||
|
||||
L=0.05
|
||||
t log2 0 0xffff000000000000 10000
|
||||
t log2 0x1p-4 0x1p4 40000
|
||||
t log2 0 inf 40000
|
||||
|
||||
L=0.05
|
||||
t pow 0.5 2.0 x 0 inf 20000
|
||||
t pow -0.5 -2.0 x 0 inf 20000
|
||||
t pow 0.5 2.0 x -0 -inf 20000
|
||||
t pow -0.5 -2.0 x -0 -inf 20000
|
||||
t pow 0.5 2.0 x 0x1p-10 0x1p10 40000
|
||||
t pow 0.5 2.0 x -0x1p-10 -0x1p10 40000
|
||||
t pow 0 inf x 0.5 2.0 80000
|
||||
t pow 0 inf x -0.5 -2.0 80000
|
||||
t pow 0x1.fp-1 0x1.08p0 x 0x1p8 0x1p17 80000
|
||||
t pow 0x1.fp-1 0x1.08p0 x -0x1p8 -0x1p17 80000
|
||||
t pow 0 0x1p-1000 x 0 1.0 50000
|
||||
t pow 0x1p1000 inf x 0 1.0 50000
|
||||
t pow 0x1.ffffffffffff0p-1 0x1.0000000000008p0 x 0x1p60 0x1p68 50000
|
||||
t pow 0x1.ffffffffff000p-1 0x1p0 x 0x1p50 0x1p52 50000
|
||||
t pow -0x1.ffffffffff000p-1 -0x1p0 x 0x1p50 0x1p52 50000
|
||||
|
||||
L=0.01
|
||||
t expf 0 0xffff0000 10000
|
||||
t expf 0x1p-14 0x1p8 50000
|
||||
t expf -0x1p-14 -0x1p8 50000
|
||||
|
||||
L=0.01
|
||||
t exp2f 0 0xffff0000 10000
|
||||
t exp2f 0x1p-14 0x1p8 50000
|
||||
t exp2f -0x1p-14 -0x1p8 50000
|
||||
|
||||
L=0.32
|
||||
t logf 0 0xffff0000 10000
|
||||
t logf 0x1p-4 0x1p4 50000
|
||||
t logf 0 inf 50000
|
||||
|
||||
L=0.26
|
||||
t log2f 0 0xffff0000 10000
|
||||
t log2f 0x1p-4 0x1p4 50000
|
||||
t log2f 0 inf 50000
|
||||
|
||||
L=0.06
|
||||
t sinf 0 0xffff0000 10000
|
||||
t sinf 0x1p-14 0x1p54 50000
|
||||
t sinf -0x1p-14 -0x1p54 50000
|
||||
|
||||
L=0.06
|
||||
t cosf 0 0xffff0000 10000
|
||||
t cosf 0x1p-14 0x1p54 50000
|
||||
t cosf -0x1p-14 -0x1p54 50000
|
||||
|
||||
L=0.06
|
||||
t sincosf_sinf 0 0xffff0000 10000
|
||||
t sincosf_sinf 0x1p-14 0x1p54 50000
|
||||
t sincosf_sinf -0x1p-14 -0x1p54 50000
|
||||
|
||||
L=0.06
|
||||
t sincosf_cosf 0 0xffff0000 10000
|
||||
t sincosf_cosf 0x1p-14 0x1p54 50000
|
||||
t sincosf_cosf -0x1p-14 -0x1p54 50000
|
||||
|
||||
L=0.4
|
||||
t powf 0x1p-1 0x1p1 x 0x1p-7 0x1p7 50000
|
||||
t powf 0x1p-1 0x1p1 x -0x1p-7 -0x1p7 50000
|
||||
t powf 0x1p-70 0x1p70 x 0x1p-1 0x1p1 50000
|
||||
t powf 0x1p-70 0x1p70 x -0x1p-1 -0x1p1 50000
|
||||
t powf 0x1.ep-1 0x1.1p0 x 0x1p8 0x1p14 50000
|
||||
t powf 0x1.ep-1 0x1.1p0 x -0x1p8 -0x1p14 50000
|
||||
done
|
||||
|
||||
# vector functions
|
||||
Ldir=0.5
|
||||
r='n'
|
||||
flags="${ULPFLAGS:--q} -f"
|
||||
runs=
|
||||
check __s_exp 1 && runs=1
|
||||
runv=
|
||||
check __v_exp 1 && runv=1
|
||||
runvn=
|
||||
check __vn_exp 1 && runvn=1
|
||||
|
||||
range_exp='
|
||||
0 0xffff000000000000 10000
|
||||
0x1p-6 0x1p6 400000
|
||||
-0x1p-6 -0x1p6 400000
|
||||
633.3 733.3 10000
|
||||
-633.3 -777.3 10000
|
||||
'
|
||||
|
||||
range_log='
|
||||
0 0xffff000000000000 10000
|
||||
0x1p-4 0x1p4 400000
|
||||
0 inf 400000
|
||||
'
|
||||
|
||||
range_pow='
|
||||
0x1p-1 0x1p1 x 0x1p-10 0x1p10 50000
|
||||
0x1p-1 0x1p1 x -0x1p-10 -0x1p10 50000
|
||||
0x1p-500 0x1p500 x 0x1p-1 0x1p1 50000
|
||||
0x1p-500 0x1p500 x -0x1p-1 -0x1p1 50000
|
||||
0x1.ep-1 0x1.1p0 x 0x1p8 0x1p16 50000
|
||||
0x1.ep-1 0x1.1p0 x -0x1p8 -0x1p16 50000
|
||||
'
|
||||
|
||||
range_sin='
|
||||
0 0xffff000000000000 10000
|
||||
0x1p-4 0x1p4 400000
|
||||
-0x1p-23 0x1p23 400000
|
||||
'
|
||||
range_cos="$range_sin"
|
||||
|
||||
range_expf='
|
||||
0 0xffff0000 10000
|
||||
0x1p-14 0x1p8 500000
|
||||
-0x1p-14 -0x1p8 500000
|
||||
'
|
||||
|
||||
range_expf_1u="$range_expf"
|
||||
range_exp2f="$range_expf"
|
||||
range_exp2f_1u="$range_expf"
|
||||
|
||||
range_logf='
|
||||
0 0xffff0000 10000
|
||||
0x1p-4 0x1p4 500000
|
||||
'
|
||||
|
||||
range_sinf='
|
||||
0 0xffff0000 10000
|
||||
0x1p-4 0x1p4 300000
|
||||
-0x1p-9 -0x1p9 300000
|
||||
'
|
||||
range_cosf="$range_sinf"
|
||||
|
||||
range_powf='
|
||||
0x1p-1 0x1p1 x 0x1p-7 0x1p7 50000
|
||||
0x1p-1 0x1p1 x -0x1p-7 -0x1p7 50000
|
||||
0x1p-70 0x1p70 x 0x1p-1 0x1p1 50000
|
||||
0x1p-70 0x1p70 x -0x1p-1 -0x1p1 50000
|
||||
0x1.ep-1 0x1.1p0 x 0x1p8 0x1p14 50000
|
||||
0x1.ep-1 0x1.1p0 x -0x1p8 -0x1p14 50000
|
||||
'
|
||||
|
||||
# error limits
|
||||
L_exp=1.9
|
||||
L_log=1.2
|
||||
L_pow=0.05
|
||||
L_sin=3.0
|
||||
L_cos=3.0
|
||||
L_expf=1.49
|
||||
L_expf_1u=0.4
|
||||
L_exp2f=1.49
|
||||
L_exp2f_1u=0.4
|
||||
L_logf=2.9
|
||||
L_sinf=1.4
|
||||
L_cosf=1.4
|
||||
L_powf=2.1
|
||||
|
||||
while read G F R
|
||||
do
|
||||
[ "$R" = 1 ] || continue
|
||||
case "$G" in \#*) continue ;; esac
|
||||
eval range="\${range_$G}"
|
||||
eval L="\${L_$G}"
|
||||
while read X
|
||||
do
|
||||
[ -n "$X" ] || continue
|
||||
case "$X" in \#*) continue ;; esac
|
||||
t $F $X
|
||||
done << EOF
|
||||
$range
|
||||
EOF
|
||||
done << EOF
|
||||
# group symbol run
|
||||
exp __s_exp $runs
|
||||
exp __v_exp $runv
|
||||
exp __vn_exp $runvn
|
||||
exp _ZGVnN2v_exp $runvn
|
||||
|
||||
log __s_log $runs
|
||||
log __v_log $runv
|
||||
log __vn_log $runvn
|
||||
log _ZGVnN2v_log $runvn
|
||||
|
||||
pow __s_pow $runs
|
||||
pow __v_pow $runv
|
||||
pow __vn_pow $runvn
|
||||
pow _ZGVnN2vv_pow $runvn
|
||||
|
||||
sin __s_sin $runs
|
||||
sin __v_sin $runv
|
||||
sin __vn_sin $runvn
|
||||
sin _ZGVnN2v_sin $runvn
|
||||
|
||||
cos __s_cos $runs
|
||||
cos __v_cos $runv
|
||||
cos __vn_cos $runvn
|
||||
cos _ZGVnN2v_cos $runvn
|
||||
|
||||
expf __s_expf $runs
|
||||
expf __v_expf $runv
|
||||
expf __vn_expf $runvn
|
||||
expf _ZGVnN4v_expf $runvn
|
||||
|
||||
expf_1u __s_expf_1u $runs
|
||||
expf_1u __v_expf_1u $runv
|
||||
expf_1u __vn_expf_1u $runvn
|
||||
|
||||
exp2f __s_exp2f $runs
|
||||
exp2f __v_exp2f $runv
|
||||
exp2f __vn_exp2f $runvn
|
||||
exp2f _ZGVnN4v_exp2f $runvn
|
||||
|
||||
exp2f_1u __s_exp2f_1u $runs
|
||||
exp2f_1u __v_exp2f_1u $runv
|
||||
exp2f_1u __vn_exp2f_1u $runvn
|
||||
|
||||
logf __s_logf $runs
|
||||
logf __v_logf $runv
|
||||
logf __vn_logf $runvn
|
||||
logf _ZGVnN4v_logf $runvn
|
||||
|
||||
sinf __s_sinf $runs
|
||||
sinf __v_sinf $runv
|
||||
sinf __vn_sinf $runvn
|
||||
sinf _ZGVnN4v_sinf $runvn
|
||||
|
||||
cosf __s_cosf $runs
|
||||
cosf __v_cosf $runv
|
||||
cosf __vn_cosf $runvn
|
||||
cosf _ZGVnN4v_cosf $runvn
|
||||
|
||||
powf __s_powf $runs
|
||||
powf __v_powf $runv
|
||||
powf __vn_powf $runvn
|
||||
powf _ZGVnN4vv_powf $runvn
|
||||
EOF
|
||||
|
||||
[ 0 -eq $FAIL ] || {
|
||||
echo "FAILED $FAIL PASSED $PASS"
|
||||
exit 1
|
||||
}
|
||||
@@ -0,0 +1,25 @@
|
||||
; cosf.tst - Directed test cases for SP cosine
|
||||
;
|
||||
; Copyright (c) 2007-2018, Arm Limited.
|
||||
; SPDX-License-Identifier: MIT
|
||||
|
||||
func=cosf op1=7fc00001 result=7fc00001 errno=0
|
||||
func=cosf op1=ffc00001 result=7fc00001 errno=0
|
||||
func=cosf op1=7f800001 result=7fc00001 errno=0 status=i
|
||||
func=cosf op1=ff800001 result=7fc00001 errno=0 status=i
|
||||
func=cosf op1=7f800000 result=7fc00001 errno=EDOM status=i
|
||||
func=cosf op1=ff800000 result=7fc00001 errno=EDOM status=i
|
||||
func=cosf op1=00000000 result=3f800000 errno=0
|
||||
func=cosf op1=80000000 result=3f800000 errno=0
|
||||
; SDCOMP-26094: check cosf in the cases for which the range reducer
|
||||
; returns values furthest beyond its nominal upper bound of pi/4.
|
||||
func=cosf op1=46427f1b result=3f34dc5c.565 error=0
|
||||
func=cosf op1=4647e568 result=3f34dc33.c1f error=0
|
||||
func=cosf op1=46428bac result=bf34dbf2.8e3 error=0
|
||||
func=cosf op1=4647f1f9 result=bf34dbc9.f9b error=0
|
||||
func=cosf op1=4647fe8a result=3f34db60.313 error=0
|
||||
func=cosf op1=45d8d7f1 result=bf35006a.7fd error=0
|
||||
func=cosf op1=45d371a4 result=3f350056.39b error=0
|
||||
func=cosf op1=45ce0b57 result=bf350041.f38 error=0
|
||||
func=cosf op1=45d35882 result=bf34ffec.868 error=0
|
||||
func=cosf op1=45cdf235 result=3f34ffd8.404 error=0
|
||||
@@ -0,0 +1,31 @@
|
||||
; Directed test cases for exp
|
||||
;
|
||||
; Copyright (c) 2018, Arm Limited.
|
||||
; SPDX-License-Identifier: MIT
|
||||
|
||||
func=exp op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
|
||||
func=exp op1=fff80000.00000001 result=7ff80000.00000001 errno=0
|
||||
func=exp op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
|
||||
func=exp op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
|
||||
func=exp op1=7ff00000.00000000 result=7ff00000.00000000 errno=0
|
||||
func=exp op1=fff00000.00000000 result=00000000.00000000 errno=0
|
||||
func=exp op1=7fefffff.ffffffff result=7ff00000.00000000 errno=ERANGE status=ox
|
||||
func=exp op1=ffefffff.ffffffff result=00000000.00000000 errno=ERANGE status=ux
|
||||
func=exp op1=00000000.00000000 result=3ff00000.00000000 errno=0
|
||||
func=exp op1=80000000.00000000 result=3ff00000.00000000 errno=0
|
||||
func=exp op1=00000000.00000001 result=3ff00000.00000000 errno=0
|
||||
func=exp op1=80000000.00000001 result=3ff00000.00000000 errno=0
|
||||
func=exp op1=3c900000.00000000 result=3ff00000.00000000.400 errno=0
|
||||
func=exp op1=bc900000.00000000 result=3fefffff.ffffffff.800 errno=0
|
||||
func=exp op1=3fe00000.00000000 result=3ffa6129.8e1e069b.c97 errno=0
|
||||
func=exp op1=bfe00000.00000000 result=3fe368b2.fc6f9609.fe8 errno=0
|
||||
func=exp op1=3ff00000.00000000 result=4005bf0a.8b145769.535 errno=0
|
||||
func=exp op1=bff00000.00000000 result=3fd78b56.362cef37.c6b errno=0
|
||||
func=exp op1=40000000.00000000 result=401d8e64.b8d4ddad.cc3 errno=0
|
||||
func=exp op1=c0000000.00000000 result=3fc152aa.a3bf81cb.9fe errno=0
|
||||
func=exp op1=3ff12345.6789abcd result=40075955.c34718ed.6e3 errno=0
|
||||
func=exp op1=40862e42.fefa39ef result=7fefffff.ffffff2a.1b1 errno=0
|
||||
func=exp op1=40862e42.fefa39f0 result=7ff00000.00000000 errno=ERANGE status=ox
|
||||
func=exp op1=c0874910.d52d3051 result=00000000.00000001 status=ux
|
||||
func=exp op1=c0874910.d52d3052 result=00000000.00000000 errno=ERANGE status=ux
|
||||
func=exp op1=c085d589.f2fe5107 result=00f00000.000000f1.46b errno=0
|
||||
@@ -0,0 +1,30 @@
|
||||
; Directed test cases for exp2
|
||||
;
|
||||
; Copyright (c) 2018, Arm Limited.
|
||||
; SPDX-License-Identifier: MIT
|
||||
|
||||
func=exp2 op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
|
||||
func=exp2 op1=fff80000.00000001 result=7ff80000.00000001 errno=0
|
||||
func=exp2 op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
|
||||
func=exp2 op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
|
||||
func=exp2 op1=7ff00000.00000000 result=7ff00000.00000000 errno=0
|
||||
func=exp2 op1=fff00000.00000000 result=00000000.00000000 errno=0
|
||||
func=exp2 op1=7fefffff.ffffffff result=7ff00000.00000000 errno=ERANGE status=ox
|
||||
func=exp2 op1=ffefffff.ffffffff result=00000000.00000000 errno=ERANGE status=ux
|
||||
func=exp2 op1=00000000.00000000 result=3ff00000.00000000 errno=0
|
||||
func=exp2 op1=80000000.00000000 result=3ff00000.00000000 errno=0
|
||||
func=exp2 op1=00000000.00000001 result=3ff00000.00000000 errno=0
|
||||
func=exp2 op1=80000000.00000001 result=3ff00000.00000000 errno=0
|
||||
func=exp2 op1=3ca00000.00000000 result=3ff00000.00000000.58c errno=0
|
||||
func=exp2 op1=bc900000.00000000 result=3fefffff.ffffffff.a74 errno=0
|
||||
func=exp2 op1=3fe00000.00000000 result=3ff6a09e.667f3bcc.909 errno=0
|
||||
func=exp2 op1=bfe00000.00000000 result=3fe6a09e.667f3bcc.909 errno=0
|
||||
func=exp2 op1=3ff00000.00000000 result=40000000.00000000 errno=0
|
||||
func=exp2 op1=bff00000.00000000 result=3fe00000.00000000 errno=0
|
||||
func=exp2 op1=40000000.00000000 result=40100000.00000000 errno=0
|
||||
func=exp2 op1=c0000000.00000000 result=3fd00000.00000000 errno=0
|
||||
func=exp2 op1=3ff12345.6789abcd result=4000cef3.c5d12321.663 errno=0
|
||||
func=exp2 op1=408fffff.ffffffff result=7fefffff.fffffd3a.37a errno=0
|
||||
func=exp2 op1=40900000.00000000 result=7ff00000.00000000 errno=ERANGE status=ox
|
||||
func=exp2 op1=c090ca00.00000000 result=00000000.00000000.b50 status=ux
|
||||
func=exp2 op1=c090cc00.00000000 result=00000000.00000000 errno=ERANGE status=ux
|
||||
@@ -0,0 +1,25 @@
|
||||
; exp2f.tst - Directed test cases for exp2f
|
||||
;
|
||||
; Copyright (c) 2017-2018, Arm Limited.
|
||||
; SPDX-License-Identifier: MIT
|
||||
|
||||
func=exp2f op1=7fc00001 result=7fc00001 errno=0
|
||||
func=exp2f op1=ffc00001 result=7fc00001 errno=0
|
||||
func=exp2f op1=7f800001 result=7fc00001 errno=0 status=i
|
||||
func=exp2f op1=ff800001 result=7fc00001 errno=0 status=i
|
||||
func=exp2f op1=7f800000 result=7f800000 errno=0
|
||||
func=exp2f op1=7f7fffff result=7f800000 errno=ERANGE status=ox
|
||||
func=exp2f op1=ff800000 result=00000000 errno=0
|
||||
func=exp2f op1=ff7fffff result=00000000 errno=ERANGE status=ux
|
||||
func=exp2f op1=00000000 result=3f800000 errno=0
|
||||
func=exp2f op1=80000000 result=3f800000 errno=0
|
||||
func=exp2f op1=42fa0001 result=7e00002c.5c8 errno=0
|
||||
func=exp2f op1=42ffffff result=7f7fffa7.470 errno=0
|
||||
func=exp2f op1=43000000 result=7f800000 errno=ERANGE status=ox
|
||||
func=exp2f op1=43000001 result=7f800000 errno=ERANGE status=ox
|
||||
func=exp2f op1=c2fa0001 result=00ffffa7.470 errno=0
|
||||
func=exp2f op1=c2fc0000 result=00800000 errno=0
|
||||
func=exp2f op1=c2fc0001 result=007fffd3.a38 errno=0 status=ux
|
||||
func=exp2f op1=c3150000 result=00000001 errno=0
|
||||
func=exp2f op1=c3158000 result=00000000.800 errno=ERANGE status=ux
|
||||
func=exp2f op1=c3165432 result=00000000.4bd errno=ERANGE status=ux
|
||||
@@ -0,0 +1,23 @@
|
||||
; expf.tst - Directed test cases for expf
|
||||
;
|
||||
; Copyright (c) 2007-2018, Arm Limited.
|
||||
; SPDX-License-Identifier: MIT
|
||||
|
||||
func=expf op1=7fc00001 result=7fc00001 errno=0
|
||||
func=expf op1=ffc00001 result=7fc00001 errno=0
|
||||
func=expf op1=7f800001 result=7fc00001 errno=0 status=i
|
||||
func=expf op1=ff800001 result=7fc00001 errno=0 status=i
|
||||
func=expf op1=7f800000 result=7f800000 errno=0
|
||||
func=expf op1=7f7fffff result=7f800000 errno=ERANGE status=ox
|
||||
func=expf op1=ff800000 result=00000000 errno=0
|
||||
func=expf op1=ff7fffff result=00000000 errno=ERANGE status=ux
|
||||
func=expf op1=00000000 result=3f800000 errno=0
|
||||
func=expf op1=80000000 result=3f800000 errno=0
|
||||
func=expf op1=42affff8 result=7ef87ed4.e0c errno=0
|
||||
func=expf op1=42b00008 result=7ef88698.f67 errno=0
|
||||
func=expf op1=42cffff8 result=7f800000 errno=ERANGE status=ox
|
||||
func=expf op1=42d00008 result=7f800000 errno=ERANGE status=ox
|
||||
func=expf op1=c2affff8 result=0041eecc.041 errno=0 status=ux
|
||||
func=expf op1=c2b00008 result=0041ecbc.95e errno=0 status=ux
|
||||
func=expf op1=c2cffff8 result=00000000 errno=ERANGE status=ux
|
||||
func=expf op1=c2d00008 result=00000000 errno=ERANGE status=ux
|
||||
@@ -0,0 +1,21 @@
|
||||
; Directed test cases for log
|
||||
;
|
||||
; Copyright (c) 2018, Arm Limited.
|
||||
; SPDX-License-Identifier: MIT
|
||||
|
||||
func=log op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
|
||||
func=log op1=fff80000.00000001 result=7ff80000.00000001 errno=0
|
||||
func=log op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
|
||||
func=log op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
|
||||
func=log op1=7ff00000.00000000 result=7ff00000.00000000 errno=0
|
||||
func=log op1=fff00000.00000000 result=7ff80000.00000001 errno=EDOM status=i
|
||||
func=log op1=7fefffff.ffffffff result=40862e42.fefa39ef.354 errno=0
|
||||
func=log op1=ffefffff.ffffffff result=7ff80000.00000001 errno=EDOM status=i
|
||||
func=log op1=3ff00000.00000000 result=00000000.00000000 errno=0
|
||||
func=log op1=bff00000.00000000 result=7ff80000.00000001 errno=EDOM status=i
|
||||
func=log op1=00000000.00000000 result=fff00000.00000000 errno=ERANGE status=z
|
||||
func=log op1=80000000.00000000 result=fff00000.00000000 errno=ERANGE status=z
|
||||
func=log op1=00000000.00000001 result=c0874385.446d71c3.639 errno=0
|
||||
func=log op1=80000000.00000001 result=7ff80000.00000001 errno=EDOM status=i
|
||||
func=log op1=40000000.00000000 result=3fe62e42.fefa39ef.358 errno=0
|
||||
func=log op1=3fe00000.00000000 result=bfe62e42.fefa39ef.358 errno=0
|
||||
@@ -0,0 +1,21 @@
|
||||
; Directed test cases for log2
|
||||
;
|
||||
; Copyright (c) 2018, Arm Limited.
|
||||
; SPDX-License-Identifier: MIT
|
||||
|
||||
func=log2 op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
|
||||
func=log2 op1=fff80000.00000001 result=7ff80000.00000001 errno=0
|
||||
func=log2 op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
|
||||
func=log2 op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
|
||||
func=log2 op1=7ff00000.00000000 result=7ff00000.00000000 errno=0
|
||||
func=log2 op1=fff00000.00000000 result=7ff80000.00000001 errno=EDOM status=i
|
||||
func=log2 op1=7fefffff.ffffffff result=408fffff.ffffffff.ffa errno=0
|
||||
func=log2 op1=ffefffff.ffffffff result=7ff80000.00000001 errno=EDOM status=i
|
||||
func=log2 op1=3ff00000.00000000 result=00000000.00000000 errno=0
|
||||
func=log2 op1=bff00000.00000000 result=7ff80000.00000001 errno=EDOM status=i
|
||||
func=log2 op1=00000000.00000000 result=fff00000.00000000 errno=ERANGE status=z
|
||||
func=log2 op1=80000000.00000000 result=fff00000.00000000 errno=ERANGE status=z
|
||||
func=log2 op1=00000000.00000001 result=c090c800.00000000 errno=0
|
||||
func=log2 op1=80000000.00000001 result=7ff80000.00000001 errno=EDOM status=i
|
||||
func=log2 op1=40000000.00000000 result=3ff00000.00000000 errno=0
|
||||
func=log2 op1=3fe00000.00000000 result=bff00000.00000000 errno=0
|
||||
@@ -0,0 +1,27 @@
|
||||
; log2f.tst - Directed test cases for log2f
|
||||
;
|
||||
; Copyright (c) 2017-2018, Arm Limited.
|
||||
; SPDX-License-Identifier: MIT
|
||||
|
||||
func=log2f op1=7fc00001 result=7fc00001 errno=0
|
||||
func=log2f op1=ffc00001 result=7fc00001 errno=0
|
||||
func=log2f op1=7f800001 result=7fc00001 errno=0 status=i
|
||||
func=log2f op1=ff800001 result=7fc00001 errno=0 status=i
|
||||
func=log2f op1=ff810000 result=7fc00001 errno=0 status=i
|
||||
func=log2f op1=7f800000 result=7f800000 errno=0
|
||||
func=log2f op1=ff800000 result=7fc00001 errno=EDOM status=i
|
||||
func=log2f op1=3f800000 result=00000000 errno=0
|
||||
func=log2f op1=00000000 result=ff800000 errno=ERANGE status=z
|
||||
func=log2f op1=80000000 result=ff800000 errno=ERANGE status=z
|
||||
func=log2f op1=80000001 result=7fc00001 errno=EDOM status=i
|
||||
|
||||
func=log2f op1=3f7d70a4 result=bc6d8f8b.7d4 error=0
|
||||
func=log2f op1=3f604189 result=be4394c8.395 error=0
|
||||
func=log2f op1=3f278034 result=bf1caa73.88e error=0
|
||||
func=log2f op1=3edd3c36 result=bf9af3b9.619 error=0
|
||||
func=log2f op1=3e61259a result=c00bdb95.650 error=0
|
||||
func=log2f op1=3f8147ae result=3c6b3267.d6a error=0
|
||||
func=log2f op1=3f8fbe77 result=3e2b5fe2.a1c error=0
|
||||
func=log2f op1=3fac3eea result=3edb4d5e.1fc error=0
|
||||
func=log2f op1=3fd6e632 result=3f3f5d3a.827 error=0
|
||||
func=log2f op1=40070838 result=3f89e055.a0a error=0
|
||||
@@ -0,0 +1,69 @@
|
||||
; logf.tst - Directed test cases for logf
|
||||
;
|
||||
; Copyright (c) 2007-2018, Arm Limited.
|
||||
; SPDX-License-Identifier: MIT
|
||||
|
||||
func=logf op1=7fc00001 result=7fc00001 errno=0
|
||||
func=logf op1=ffc00001 result=7fc00001 errno=0
|
||||
func=logf op1=7f800001 result=7fc00001 errno=0 status=i
|
||||
func=logf op1=ff800001 result=7fc00001 errno=0 status=i
|
||||
func=logf op1=ff810000 result=7fc00001 errno=0 status=i
|
||||
func=logf op1=7f800000 result=7f800000 errno=0
|
||||
func=logf op1=ff800000 result=7fc00001 errno=EDOM status=i
|
||||
func=logf op1=3f800000 result=00000000 errno=0
|
||||
func=logf op1=00000000 result=ff800000 errno=ERANGE status=z
|
||||
func=logf op1=80000000 result=ff800000 errno=ERANGE status=z
|
||||
func=logf op1=80000001 result=7fc00001 errno=EDOM status=i
|
||||
|
||||
; Directed tests for the special-case handling of log of things
|
||||
; very near 1
|
||||
func=logf op1=3f781e49 result=bd0016d9.4ae error=0
|
||||
func=logf op1=3f78e602 result=bce675e5.f31 error=0
|
||||
func=logf op1=3f844a18 result=3d07030e.ae1 error=0
|
||||
func=logf op1=3f79b55b result=bccbd88a.6cb error=0
|
||||
func=logf op1=3f7e2f5f result=bbe92452.74a error=0
|
||||
func=logf op1=3f7f1c03 result=bb6462c1.c2c error=0
|
||||
func=logf op1=3f78b213 result=bced23e2.f56 error=0
|
||||
func=logf op1=3f87d5c0 result=3d735847.b7a error=0
|
||||
func=logf op1=3f7fa6ad result=bab2c532.12d error=0
|
||||
func=logf op1=3f87c06a result=3d70d4b6.b5e error=0
|
||||
func=logf op1=3f79cf30 result=bcc88942.6e9 error=0
|
||||
func=logf op1=3f794c77 result=bcd94c6f.b1e error=0
|
||||
func=logf op1=3f835655 result=3cd2d8a0.0bf error=0
|
||||
func=logf op1=3f81b5c0 result=3c596d08.520 error=0
|
||||
func=logf op1=3f805e2f result=3b3c18d4.d2b error=0
|
||||
func=logf op1=3f7aa609 result=bcad0f90.fdb error=0
|
||||
func=logf op1=3f7a9091 result=bcafcd59.f83 error=0
|
||||
func=logf op1=3f7a7475 result=bcb36490.a0f error=0
|
||||
func=logf op1=3f823417 result=3c8bd287.fa6 error=0
|
||||
func=logf op1=3f7fbcc3 result=ba868bac.14c error=0
|
||||
func=logf op1=3f805fc9 result=3b3f4a76.169 error=0
|
||||
func=logf op1=3f833d43 result=3cccbc4f.cb7 error=0
|
||||
func=logf op1=3f7cb1de result=bc54e91e.6b5 error=0
|
||||
func=logf op1=3f7f2793 result=bb58c8af.bfc error=0
|
||||
func=logf op1=3f7bb8c3 result=bc8a0fc9.93c error=0
|
||||
func=logf op1=3f81d349 result=3c67fe09.42e error=0
|
||||
func=logf op1=3f7c254d result=bc788cf4.610 error=0
|
||||
func=logf op1=3f7f789d result=bb0786d9.6c6 error=0
|
||||
func=logf op1=3f7ed1f2 result=bb97605f.963 error=0
|
||||
func=logf op1=3f826067 result=3c96b4af.5e1 error=0
|
||||
func=logf op1=3f821a68 result=3c8581f9.dac error=0
|
||||
func=logf op1=3f864e1a result=3d44f368.e66 error=0
|
||||
func=logf op1=3f7fea3d result=b9ae1f66.b58 error=0
|
||||
func=logf op1=3f7cf4f5 result=bc43ed76.1c5 error=0
|
||||
func=logf op1=3f84c223 result=3d15814e.36d error=0
|
||||
func=logf op1=3f7dae6d result=bc1511d5.0aa error=0
|
||||
func=logf op1=3f7c0a3c result=bc7f6c0d.758 error=0
|
||||
func=logf op1=3f858b22 result=3d2da861.f36 error=0
|
||||
func=logf op1=3f85d7c7 result=3d36d490.ee9 error=0
|
||||
func=logf op1=3f7f2109 result=bb5f5851.2ed error=0
|
||||
func=logf op1=3f83809c result=3cdd23f7.6b1 error=0
|
||||
func=logf op1=3f83d96e result=3cf2b9c8.0b1 error=0
|
||||
func=logf op1=3f86ca84 result=3d53bee8.53f error=0
|
||||
func=logf op1=3f83548e result=3cd269c3.39d error=0
|
||||
func=logf op1=3f7c199c result=bc7b84b6.0da error=0
|
||||
func=logf op1=3f83133f result=3cc27c0a.9dd error=0
|
||||
func=logf op1=3f7c97b4 result=bc5b89dd.399 error=0
|
||||
func=logf op1=3f810bc1 result=3c05553c.011 error=0
|
||||
func=logf op1=3f7dadb8 result=bc153f7e.fbb error=0
|
||||
func=logf op1=3f87be56 result=3d709602.538 error=0
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,246 @@
|
||||
; powf.tst - Directed test cases for powf
|
||||
;
|
||||
; Copyright (c) 2007-2018, Arm Limited.
|
||||
; SPDX-License-Identifier: MIT
|
||||
|
||||
func=powf op1=7f800001 op2=7f800001 result=7fc00001 errno=0 status=i
|
||||
func=powf op1=7f800001 op2=ff800001 result=7fc00001 errno=0 status=i
|
||||
func=powf op1=7f800001 op2=7fc00001 result=7fc00001 errno=0 status=i
|
||||
func=powf op1=7f800001 op2=ffc00001 result=7fc00001 errno=0 status=i
|
||||
func=powf op1=7f800001 op2=7f800000 result=7fc00001 errno=0 status=i
|
||||
func=powf op1=7f800001 op2=40800000 result=7fc00001 errno=0 status=i
|
||||
func=powf op1=7f800001 op2=40400000 result=7fc00001 errno=0 status=i
|
||||
func=powf op1=7f800001 op2=3f000000 result=7fc00001 errno=0 status=i
|
||||
func=powf op1=7f800001 op2=00000000 result=7fc00001 errno=0 status=i
|
||||
func=powf op1=7f800001 op2=80000000 result=7fc00001 errno=0 status=i
|
||||
func=powf op1=7f800001 op2=bf000000 result=7fc00001 errno=0 status=i
|
||||
func=powf op1=7f800001 op2=c0400000 result=7fc00001 errno=0 status=i
|
||||
func=powf op1=7f800001 op2=c0800000 result=7fc00001 errno=0 status=i
|
||||
func=powf op1=7f800001 op2=ff800000 result=7fc00001 errno=0 status=i
|
||||
func=powf op1=7f800001 op2=7f800001 result=7fc00001 errno=0 status=i
|
||||
func=powf op1=7f800001 op2=ff800001 result=7fc00001 errno=0 status=i
|
||||
func=powf op1=ff800001 op2=7fc00001 result=7fc00001 errno=0 status=i
|
||||
func=powf op1=ff800001 op2=ffc00001 result=7fc00001 errno=0 status=i
|
||||
func=powf op1=ff800001 op2=7f800000 result=7fc00001 errno=0 status=i
|
||||
func=powf op1=ff800001 op2=40800000 result=7fc00001 errno=0 status=i
|
||||
func=powf op1=ff800001 op2=40400000 result=7fc00001 errno=0 status=i
|
||||
func=powf op1=ff800001 op2=3f000000 result=7fc00001 errno=0 status=i
|
||||
func=powf op1=ff800001 op2=00000000 result=7fc00001 errno=0 status=i
|
||||
func=powf op1=ff800001 op2=80000000 result=7fc00001 errno=0 status=i
|
||||
func=powf op1=ff800001 op2=bf000000 result=7fc00001 errno=0 status=i
|
||||
func=powf op1=ff800001 op2=c0400000 result=7fc00001 errno=0 status=i
|
||||
func=powf op1=ff800001 op2=c0800000 result=7fc00001 errno=0 status=i
|
||||
func=powf op1=ff800001 op2=ff800000 result=7fc00001 errno=0 status=i
|
||||
func=powf op1=7fc00001 op2=7f800001 result=7fc00001 errno=0 status=i
|
||||
func=powf op1=7fc00001 op2=ff800001 result=7fc00001 errno=0 status=i
|
||||
func=powf op1=7fc00001 op2=7fc00001 result=7fc00001 errno=0
|
||||
func=powf op1=7fc00001 op2=ffc00001 result=7fc00001 errno=0
|
||||
func=powf op1=7fc00001 op2=7f800000 result=7fc00001 errno=0
|
||||
func=powf op1=7fc00001 op2=40800000 result=7fc00001 errno=0
|
||||
func=powf op1=7fc00001 op2=40400000 result=7fc00001 errno=0
|
||||
func=powf op1=7fc00001 op2=3f000000 result=7fc00001 errno=0
|
||||
func=powf op1=7fc00001 op2=00000000 result=3f800000 errno=0
|
||||
func=powf op1=7fc00001 op2=80000000 result=3f800000 errno=0
|
||||
func=powf op1=7fc00001 op2=bf000000 result=7fc00001 errno=0
|
||||
func=powf op1=7fc00001 op2=c0400000 result=7fc00001 errno=0
|
||||
func=powf op1=7fc00001 op2=c0800000 result=7fc00001 errno=0
|
||||
func=powf op1=7fc00001 op2=ff800000 result=7fc00001 errno=0
|
||||
func=powf op1=ffc00001 op2=7f800001 result=7fc00001 errno=0 status=i
|
||||
func=powf op1=ffc00001 op2=ff800001 result=7fc00001 errno=0 status=i
|
||||
func=powf op1=ffc00001 op2=7fc00001 result=7fc00001 errno=0
|
||||
func=powf op1=ffc00001 op2=ffc00001 result=7fc00001 errno=0
|
||||
func=powf op1=ffc00001 op2=7f800000 result=7fc00001 errno=0
|
||||
func=powf op1=ffc00001 op2=40800000 result=7fc00001 errno=0
|
||||
func=powf op1=ffc00001 op2=40400000 result=7fc00001 errno=0
|
||||
func=powf op1=ffc00001 op2=3f000000 result=7fc00001 errno=0
|
||||
func=powf op1=ffc00001 op2=00000000 result=3f800000 errno=0
|
||||
func=powf op1=ffc00001 op2=80000000 result=3f800000 errno=0
|
||||
func=powf op1=ffc00001 op2=bf000000 result=7fc00001 errno=0
|
||||
func=powf op1=ffc00001 op2=c0400000 result=7fc00001 errno=0
|
||||
func=powf op1=ffc00001 op2=c0800000 result=7fc00001 errno=0
|
||||
func=powf op1=ffc00001 op2=ff800000 result=7fc00001 errno=0
|
||||
func=powf op1=7f800000 op2=7f800001 result=7fc00001 errno=0 status=i
|
||||
func=powf op1=7f800000 op2=ff800001 result=7fc00001 errno=0 status=i
|
||||
func=powf op1=7f800000 op2=7fc00001 result=7fc00001 errno=0
|
||||
func=powf op1=7f800000 op2=ffc00001 result=7fc00001 errno=0
|
||||
func=powf op1=7f800000 op2=7f800000 result=7f800000 errno=0
|
||||
func=powf op1=7f800000 op2=40800000 result=7f800000 errno=0
|
||||
func=powf op1=7f800000 op2=40400000 result=7f800000 errno=0
|
||||
func=powf op1=7f800000 op2=3f000000 result=7f800000 errno=0
|
||||
func=powf op1=7f800000 op2=00000001 result=7f800000 errno=0
|
||||
func=powf op1=7f800000 op2=00000000 result=3f800000 errno=0
|
||||
func=powf op1=7f800000 op2=80000000 result=3f800000 errno=0
|
||||
func=powf op1=7f800000 op2=bf000000 result=00000000 errno=0
|
||||
func=powf op1=7f800000 op2=c0400000 result=00000000 errno=0
|
||||
func=powf op1=7f800000 op2=c0800000 result=00000000 errno=0
|
||||
func=powf op1=7f800000 op2=ff800000 result=00000000 errno=0
|
||||
func=powf op1=40800000 op2=7f800001 result=7fc00001 errno=0 status=i
|
||||
func=powf op1=40800000 op2=ff800001 result=7fc00001 errno=0 status=i
|
||||
func=powf op1=40800000 op2=7fc00001 result=7fc00001 errno=0
|
||||
func=powf op1=40800000 op2=ffc00001 result=7fc00001 errno=0
|
||||
func=powf op1=40800000 op2=7f800000 result=7f800000 errno=0
|
||||
func=powf op1=40800000 op2=40800000 result=43800000 errno=0
|
||||
func=powf op1=40800000 op2=40400000 result=42800000 errno=0
|
||||
func=powf op1=40800000 op2=3f000000 result=40000000 errno=0
|
||||
func=powf op1=40800000 op2=00000000 result=3f800000 errno=0
|
||||
func=powf op1=40800000 op2=80000000 result=3f800000 errno=0
|
||||
func=powf op1=40800000 op2=bf000000 result=3f000000 errno=0
|
||||
func=powf op1=40800000 op2=c0400000 result=3c800000 errno=0
|
||||
func=powf op1=40800000 op2=c0800000 result=3b800000 errno=0
|
||||
func=powf op1=40800000 op2=ff800000 result=00000000 errno=0
|
||||
func=powf op1=3f800000 op2=7f800001 result=7fc00001 errno=0 status=i
|
||||
func=powf op1=3f800000 op2=ff800001 result=7fc00001 errno=0 status=i
|
||||
func=powf op1=3f800000 op2=7fc00001 result=3f800000 errno=0
|
||||
func=powf op1=3f800000 op2=ffc00001 result=3f800000 errno=0
|
||||
func=powf op1=3f800000 op2=7f800000 result=3f800000 errno=0
|
||||
func=powf op1=3f800000 op2=40800000 result=3f800000 errno=0
|
||||
func=powf op1=3f800000 op2=40400000 result=3f800000 errno=0
|
||||
func=powf op1=3f800000 op2=3f000000 result=3f800000 errno=0
|
||||
func=powf op1=3f800000 op2=00000000 result=3f800000 errno=0
|
||||
func=powf op1=3f800000 op2=80000000 result=3f800000 errno=0
|
||||
func=powf op1=3f800000 op2=bf000000 result=3f800000 errno=0
|
||||
func=powf op1=3f800000 op2=c0400000 result=3f800000 errno=0
|
||||
func=powf op1=3f800000 op2=c0800000 result=3f800000 errno=0
|
||||
func=powf op1=3f800000 op2=ff800000 result=3f800000 errno=0
|
||||
func=powf op1=3e800000 op2=7f800001 result=7fc00001 errno=0 status=i
|
||||
func=powf op1=3e800000 op2=ff800001 result=7fc00001 errno=0 status=i
|
||||
func=powf op1=3e800000 op2=7fc00001 result=7fc00001 errno=0
|
||||
func=powf op1=3e800000 op2=ffc00001 result=7fc00001 errno=0
|
||||
func=powf op1=3e800000 op2=7f800000 result=00000000 errno=0
|
||||
func=powf op1=3e800000 op2=40800000 result=3b800000 errno=0
|
||||
func=powf op1=3e800000 op2=40400000 result=3c800000 errno=0
|
||||
func=powf op1=3e800000 op2=3f000000 result=3f000000 errno=0
|
||||
func=powf op1=3e800000 op2=00000000 result=3f800000 errno=0
|
||||
func=powf op1=3e800000 op2=80000000 result=3f800000 errno=0
|
||||
func=powf op1=3e800000 op2=bf000000 result=40000000 errno=0
|
||||
func=powf op1=3e800000 op2=c0400000 result=42800000 errno=0
|
||||
func=powf op1=3e800000 op2=c0800000 result=43800000 errno=0
|
||||
func=powf op1=3e800000 op2=ff800000 result=7f800000 errno=0
|
||||
func=powf op1=00000001 op2=bf800000 result=7f800000 errno=ERANGE status=ox
|
||||
func=powf op1=00000000 op2=7f800001 result=7fc00001 errno=0 status=i
|
||||
func=powf op1=00000000 op2=ff800001 result=7fc00001 errno=0 status=i
|
||||
func=powf op1=00000000 op2=7fc00001 result=7fc00001 errno=0
|
||||
func=powf op1=00000000 op2=ffc00001 result=7fc00001 errno=0
|
||||
func=powf op1=00000000 op2=7f800000 result=00000000 errno=0
|
||||
func=powf op1=00000000 op2=40800000 result=00000000 errno=0
|
||||
func=powf op1=00000000 op2=40400000 result=00000000 errno=0
|
||||
func=powf op1=00000000 op2=3f000000 result=00000000 errno=0
|
||||
func=powf op1=00000000 op2=00000000 result=3f800000 errno=0
|
||||
func=powf op1=00000000 op2=80000000 result=3f800000 errno=0
|
||||
func=powf op1=00000000 op2=bf000000 result=7f800000 errno=ERANGE status=z
|
||||
func=powf op1=00000000 op2=c0400000 result=7f800000 errno=ERANGE status=z
|
||||
func=powf op1=00000000 op2=c0800000 result=7f800000 errno=ERANGE status=z
|
||||
func=powf op1=00000000 op2=ff800000 result=7f800000 errno=ERANGE
|
||||
func=powf op1=80000000 op2=7f800001 result=7fc00001 errno=0 status=i
|
||||
func=powf op1=80000000 op2=ff800001 result=7fc00001 errno=0 status=i
|
||||
func=powf op1=80000000 op2=7fc00001 result=7fc00001 errno=0
|
||||
func=powf op1=80000000 op2=ffc00001 result=7fc00001 errno=0
|
||||
func=powf op1=80000000 op2=7f800000 result=00000000 errno=0
|
||||
func=powf op1=80000000 op2=40800000 result=00000000 errno=0
|
||||
func=powf op1=80000000 op2=40400000 result=80000000 errno=0
|
||||
func=powf op1=80000000 op2=3f000000 result=00000000 errno=0
|
||||
func=powf op1=80000000 op2=00000000 result=3f800000 errno=0
|
||||
func=powf op1=80000000 op2=80000000 result=3f800000 errno=0
|
||||
func=powf op1=80000000 op2=bf000000 result=7f800000 errno=ERANGE status=z
|
||||
func=powf op1=80000000 op2=c0400000 result=ff800000 errno=ERANGE status=z
|
||||
func=powf op1=80000000 op2=c0800000 result=7f800000 errno=ERANGE status=z
|
||||
func=powf op1=80000000 op2=ff800000 result=7f800000 errno=ERANGE
|
||||
func=powf op1=be800000 op2=7f800001 result=7fc00001 errno=0 status=i
|
||||
func=powf op1=be800000 op2=ff800001 result=7fc00001 errno=0 status=i
|
||||
func=powf op1=be800000 op2=7fc00001 result=7fc00001 errno=0
|
||||
func=powf op1=be800000 op2=ffc00001 result=7fc00001 errno=0
|
||||
func=powf op1=be800000 op2=7f800000 result=00000000 errno=0
|
||||
func=powf op1=be800000 op2=40800000 result=3b800000 errno=0
|
||||
func=powf op1=be800000 op2=40400000 result=bc800000 errno=0
|
||||
func=powf op1=be800000 op2=3f000000 result=7fc00001 errno=EDOM status=i
|
||||
func=powf op1=be800000 op2=00000000 result=3f800000 errno=0
|
||||
func=powf op1=be800000 op2=80000000 result=3f800000 errno=0
|
||||
func=powf op1=be800000 op2=bf000000 result=7fc00001 errno=EDOM status=i
|
||||
func=powf op1=be800000 op2=c0400000 result=c2800000 errno=0
|
||||
func=powf op1=be800000 op2=c0800000 result=43800000 errno=0
|
||||
func=powf op1=be800000 op2=ff800000 result=7f800000 errno=0
|
||||
func=powf op1=bf800000 op2=7f800001 result=7fc00001 errno=0 status=i
|
||||
func=powf op1=bf800000 op2=ff800001 result=7fc00001 errno=0 status=i
|
||||
func=powf op1=bf800000 op2=7fc00001 result=7fc00001 errno=0
|
||||
func=powf op1=bf800000 op2=ffc00001 result=7fc00001 errno=0
|
||||
func=powf op1=bf800000 op2=7f800000 result=3f800000 errno=0
|
||||
func=powf op1=bf800000 op2=40800000 result=3f800000 errno=0
|
||||
func=powf op1=bf800000 op2=40400000 result=bf800000 errno=0
|
||||
func=powf op1=bf800000 op2=3f000000 result=7fc00001 errno=EDOM status=i
|
||||
func=powf op1=bf800000 op2=00000000 result=3f800000 errno=0
|
||||
func=powf op1=bf800000 op2=80000000 result=3f800000 errno=0
|
||||
func=powf op1=bf800000 op2=bf000000 result=7fc00001 errno=EDOM status=i
|
||||
func=powf op1=bf800000 op2=c0400000 result=bf800000 errno=0
|
||||
func=powf op1=bf800000 op2=c0800000 result=3f800000 errno=0
|
||||
func=powf op1=bf800000 op2=ff800000 result=3f800000 errno=0
|
||||
func=powf op1=c0800000 op2=7f800001 result=7fc00001 errno=0 status=i
|
||||
func=powf op1=c0800000 op2=ff800001 result=7fc00001 errno=0 status=i
|
||||
func=powf op1=c0800000 op2=7fc00001 result=7fc00001 errno=0
|
||||
func=powf op1=c0800000 op2=ffc00001 result=7fc00001 errno=0
|
||||
func=powf op1=c0800000 op2=7f800000 result=7f800000 errno=0
|
||||
func=powf op1=c0800000 op2=40800000 result=43800000 errno=0
|
||||
func=powf op1=c0800000 op2=40400000 result=c2800000 errno=0
|
||||
func=powf op1=c0800000 op2=3f000000 result=7fc00001 errno=EDOM status=i
|
||||
func=powf op1=c0800000 op2=00000000 result=3f800000 errno=0
|
||||
func=powf op1=c0800000 op2=80000000 result=3f800000 errno=0
|
||||
func=powf op1=c0800000 op2=bf000000 result=7fc00001 errno=EDOM status=i
|
||||
func=powf op1=c0800000 op2=c0400000 result=bc800000 errno=0
|
||||
func=powf op1=c0800000 op2=c0800000 result=3b800000 errno=0
|
||||
func=powf op1=c0800000 op2=ff800000 result=00000000 errno=0
|
||||
func=powf op1=ff800000 op2=7f800001 result=7fc00001 errno=0 status=i
|
||||
func=powf op1=ff800000 op2=ff800001 result=7fc00001 errno=0 status=i
|
||||
func=powf op1=ff800000 op2=7fc00001 result=7fc00001 errno=0
|
||||
func=powf op1=ff800000 op2=ffc00001 result=7fc00001 errno=0
|
||||
func=powf op1=ff800000 op2=7f800000 result=7f800000 errno=0
|
||||
func=powf op1=ff800000 op2=40800000 result=7f800000 errno=0
|
||||
func=powf op1=ff800000 op2=40400000 result=ff800000 errno=0
|
||||
func=powf op1=ff800000 op2=3f000000 result=7f800000 errno=0
|
||||
func=powf op1=ff800000 op2=00000000 result=3f800000 errno=0
|
||||
func=powf op1=ff800000 op2=80000000 result=3f800000 errno=0
|
||||
func=powf op1=ff800000 op2=bf000000 result=00000000 errno=0
|
||||
func=powf op1=ff800000 op2=c0400000 result=80000000 errno=0
|
||||
func=powf op1=ff800000 op2=c0800000 result=00000000 errno=0
|
||||
func=powf op1=ff800000 op2=ff800000 result=00000000 errno=0
|
||||
|
||||
|
||||
func=powf op1=36c27f9d op2=4109fa51 result=00000000 errno=ERANGE status=ux
|
||||
func=powf op1=351738cd op2=c0c55691 result=7f800000 errno=ERANGE status=ox
|
||||
func=powf op1=42836035 op2=41a99f40 result=7f800000 errno=ERANGE status=ox
|
||||
func=powf op1=32bd53f3 op2=40bcba58 result=00000000 errno=ERANGE status=ux
|
||||
func=powf op1=32dc5bff op2=40be62ea result=00000000 errno=ERANGE status=ux
|
||||
func=powf op1=3a8a3f66 op2=4172bd43 result=00000000 errno=ERANGE status=ux
|
||||
func=powf op1=28f0e770 op2=c035b4ca result=7f800000 errno=ERANGE status=ox
|
||||
func=powf op1=40886699 op2=c28f703a result=00000000 errno=ERANGE status=ux
|
||||
func=powf op1=414bd593 op2=c22370cf result=00000000 errno=ERANGE status=ux
|
||||
func=powf op1=3a2f1163 op2=c1422d45 result=7f800000 errno=ERANGE status=ox
|
||||
func=powf op1=434f5cf3 op2=41851272 result=7f800000 errno=ERANGE status=ox
|
||||
func=powf op1=2e0e27a4 op2=c06b13f5 result=7f800000 errno=ERANGE status=ox
|
||||
func=powf op1=39aef7a6 op2=414fd60a result=00000000 errno=ERANGE status=ux
|
||||
func=powf op1=21c80729 op2=c00a04ab result=7f800000 errno=ERANGE status=ox
|
||||
func=powf op1=42455a4b op2=c1d55905 result=00000000 errno=ERANGE status=ux
|
||||
func=powf op1=2d173e0b op2=c05ee797 result=7f800000 errno=ERANGE status=ox
|
||||
func=powf op1=452edf9a op2=4132dd7f result=7f800000 errno=ERANGE status=ox
|
||||
func=powf op1=406bf67b op2=c29f5f12 result=00000000 errno=ERANGE status=ux
|
||||
func=powf op1=2d82a6fc op2=4085779e result=00000000 errno=ERANGE status=ux
|
||||
func=powf op1=4551f827 op2=41304516 result=7f800000 errno=ERANGE status=ox
|
||||
func=powf op1=3a917c51 op2=41726c0a result=00000001.37f errno=0 status=ux
|
||||
; iso c allows both errno=ERANGE and errno=0
|
||||
;func=powf op1=3b19bbaa op2=4188e6fb result=00000000.b5f errno=0 status=ux
|
||||
;func=powf op1=4088bd18 op2=c28ef056 result=00000000.986 errno=0 status=ux
|
||||
func=powf op1=3f7ffd76 op2=4a09221e result=00aa9d24.3ad error=0
|
||||
|
||||
func=powf op1=007fffff op2=bf000001 result=5f00002c.2b2 error=0
|
||||
func=powf op1=000007ff op2=bf000001 result=62000830.96f error=0
|
||||
func=powf op1=007fffff op2=80800001 result=3f800000.000 error=0
|
||||
func=powf op1=00000000 op2=800007ff result=7f800000 errno=ERANGE status=z
|
||||
func=powf op1=00000000 op2=000007ff result=00000000 error=0
|
||||
func=powf op1=bf800000 op2=ff7fffff result=3f800000 error=0
|
||||
func=powf op1=2e4e4f30 op2=406b0dc2 result=007e9c59.eb4 errno=0 status=u
|
||||
|
||||
; SDCOMP-25549: ensure the biggest overflow case possible is not
|
||||
; mishandled. Also check the analogous underflow, and also ensure that
|
||||
; our massive-overflow checks do not affect numbers _just within_ the
|
||||
; range.
|
||||
func=powf op1=7f7fffff op2=7f7fffff result=7f800000 error=overflow
|
||||
func=powf op1=7f7fffff op2=ff7fffff result=00000000 error=underflow
|
||||
func=powf op1=54cb3000 op2=403fffff result=7f7fffb2.a95 error=0
|
||||
@@ -0,0 +1,51 @@
|
||||
; Directed test cases for SP sincos
|
||||
;
|
||||
; Copyright (c) 2007-2018, Arm Limited.
|
||||
; SPDX-License-Identifier: MIT
|
||||
|
||||
|
||||
func=sincosf_sinf op1=7fc00001 result=7fc00001 errno=0
|
||||
func=sincosf_sinf op1=ffc00001 result=7fc00001 errno=0
|
||||
func=sincosf_sinf op1=7f800001 result=7fc00001 errno=0 status=i
|
||||
func=sincosf_sinf op1=ff800001 result=7fc00001 errno=0 status=i
|
||||
func=sincosf_sinf op1=7f800000 result=7fc00001 errno=EDOM status=i
|
||||
func=sincosf_sinf op1=ff800000 result=7fc00001 errno=EDOM status=i
|
||||
func=sincosf_sinf op1=00000000 result=00000000 errno=0
|
||||
func=sincosf_sinf op1=80000000 result=80000000 errno=0
|
||||
func=sincosf_sinf op1=c70d39a1 result=be37fad5.7ed errno=0
|
||||
func=sincosf_sinf op1=46427f1b result=3f352d80.f9b error=0
|
||||
func=sincosf_sinf op1=4647e568 result=3f352da9.7be error=0
|
||||
func=sincosf_sinf op1=46428bac result=bf352dea.924 error=0
|
||||
func=sincosf_sinf op1=4647f1f9 result=bf352e13.146 error=0
|
||||
func=sincosf_sinf op1=4647fe8a result=3f352e7c.ac9 error=0
|
||||
func=sincosf_sinf op1=45d8d7f1 result=3f35097b.cb0 error=0
|
||||
func=sincosf_sinf op1=45d371a4 result=bf350990.102 error=0
|
||||
func=sincosf_sinf op1=45ce0b57 result=3f3509a4.554 error=0
|
||||
func=sincosf_sinf op1=45d35882 result=3f3509f9.bdb error=0
|
||||
func=sincosf_sinf op1=45cdf235 result=bf350a0e.02c error=0
|
||||
|
||||
func=sincosf_cosf op1=7fc00001 result=7fc00001 errno=0
|
||||
func=sincosf_cosf op1=ffc00001 result=7fc00001 errno=0
|
||||
func=sincosf_cosf op1=7f800001 result=7fc00001 errno=0 status=i
|
||||
func=sincosf_cosf op1=ff800001 result=7fc00001 errno=0 status=i
|
||||
func=sincosf_cosf op1=7f800000 result=7fc00001 errno=EDOM status=i
|
||||
func=sincosf_cosf op1=ff800000 result=7fc00001 errno=EDOM status=i
|
||||
func=sincosf_cosf op1=00000000 result=3f800000 errno=0
|
||||
func=sincosf_cosf op1=80000000 result=3f800000 errno=0
|
||||
func=sincosf_cosf op1=46427f1b result=3f34dc5c.565 error=0
|
||||
func=sincosf_cosf op1=4647e568 result=3f34dc33.c1f error=0
|
||||
func=sincosf_cosf op1=46428bac result=bf34dbf2.8e3 error=0
|
||||
func=sincosf_cosf op1=4647f1f9 result=bf34dbc9.f9b error=0
|
||||
func=sincosf_cosf op1=4647fe8a result=3f34db60.313 error=0
|
||||
func=sincosf_cosf op1=45d8d7f1 result=bf35006a.7fd error=0
|
||||
func=sincosf_cosf op1=45d371a4 result=3f350056.39b error=0
|
||||
func=sincosf_cosf op1=45ce0b57 result=bf350041.f38 error=0
|
||||
func=sincosf_cosf op1=45d35882 result=bf34ffec.868 error=0
|
||||
func=sincosf_cosf op1=45cdf235 result=3f34ffd8.404 error=0
|
||||
|
||||
; no underflow
|
||||
func=sincosf_sinf op1=17800000 result=17800000.000
|
||||
func=sincosf_cosf op1=17800000 result=3f800000.000
|
||||
; underflow
|
||||
func=sincosf_sinf op1=00400000 result=00400000.000 status=ux
|
||||
func=sincosf_cosf op1=00400000 result=3f800000.000 status=ux
|
||||
@@ -0,0 +1,28 @@
|
||||
; sinf.tst - Directed test cases for SP sine
|
||||
;
|
||||
; Copyright (c) 2007-2018, Arm Limited.
|
||||
; SPDX-License-Identifier: MIT
|
||||
|
||||
|
||||
func=sinf op1=7fc00001 result=7fc00001 errno=0
|
||||
func=sinf op1=ffc00001 result=7fc00001 errno=0
|
||||
func=sinf op1=7f800001 result=7fc00001 errno=0 status=i
|
||||
func=sinf op1=ff800001 result=7fc00001 errno=0 status=i
|
||||
func=sinf op1=7f800000 result=7fc00001 errno=EDOM status=i
|
||||
func=sinf op1=ff800000 result=7fc00001 errno=EDOM status=i
|
||||
func=sinf op1=00000000 result=00000000 errno=0
|
||||
func=sinf op1=80000000 result=80000000 errno=0
|
||||
; Directed test for a failure I found while developing mathbench
|
||||
func=sinf op1=c70d39a1 result=be37fad5.7ed errno=0
|
||||
; SDCOMP-26094: check sinf in the cases for which the range reducer
|
||||
; returns values furthest beyond its nominal upper bound of pi/4.
|
||||
func=sinf op1=46427f1b result=3f352d80.f9b error=0
|
||||
func=sinf op1=4647e568 result=3f352da9.7be error=0
|
||||
func=sinf op1=46428bac result=bf352dea.924 error=0
|
||||
func=sinf op1=4647f1f9 result=bf352e13.146 error=0
|
||||
func=sinf op1=4647fe8a result=3f352e7c.ac9 error=0
|
||||
func=sinf op1=45d8d7f1 result=3f35097b.cb0 error=0
|
||||
func=sinf op1=45d371a4 result=bf350990.102 error=0
|
||||
func=sinf op1=45ce0b57 result=3f3509a4.554 error=0
|
||||
func=sinf op1=45d35882 result=3f3509f9.bdb error=0
|
||||
func=sinf op1=45cdf235 result=bf350a0e.02c error=0
|
||||
@@ -0,0 +1,10 @@
|
||||
!! double.tst - Random test case specification for DP functions
|
||||
!!
|
||||
!! Copyright (c) 1999-2018, Arm Limited.
|
||||
!! SPDX-License-Identifier: MIT
|
||||
|
||||
test exp 10000
|
||||
test exp2 10000
|
||||
test log 10000
|
||||
test log2 10000
|
||||
test pow 40000
|
||||
@@ -0,0 +1,15 @@
|
||||
!! single.tst - Random test case specification for SP functions
|
||||
!!
|
||||
!! Copyright (c) 1999-2018, Arm Limited.
|
||||
!! SPDX-License-Identifier: MIT
|
||||
|
||||
test sinf 10000
|
||||
test cosf 10000
|
||||
test sincosf_sinf 5000
|
||||
test sincosf_cosf 5000
|
||||
test tanf 10000
|
||||
test expf 10000
|
||||
test exp2f 10000
|
||||
test logf 10000
|
||||
test log2f 10000
|
||||
test powf 10000
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
+851
@@ -0,0 +1,851 @@
|
||||
/*
|
||||
* ULP error checking tool for math functions.
|
||||
*
|
||||
* Copyright (c) 2019, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include <ctype.h>
|
||||
#include <fenv.h>
|
||||
#include <float.h>
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "mathlib.h"
|
||||
|
||||
/* Don't depend on mpfr by default. */
|
||||
#ifndef USE_MPFR
|
||||
# define USE_MPFR 0
|
||||
#endif
|
||||
#if USE_MPFR
|
||||
# include <mpfr.h>
|
||||
#endif
|
||||
|
||||
#ifndef WANT_VMATH
|
||||
/* Enable the build of vector math code. */
|
||||
# define WANT_VMATH 1
|
||||
#endif
|
||||
|
||||
static inline uint64_t
|
||||
asuint64 (double f)
|
||||
{
|
||||
union
|
||||
{
|
||||
double f;
|
||||
uint64_t i;
|
||||
} u = {f};
|
||||
return u.i;
|
||||
}
|
||||
|
||||
static inline double
|
||||
asdouble (uint64_t i)
|
||||
{
|
||||
union
|
||||
{
|
||||
uint64_t i;
|
||||
double f;
|
||||
} u = {i};
|
||||
return u.f;
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
asuint (float f)
|
||||
{
|
||||
union
|
||||
{
|
||||
float f;
|
||||
uint32_t i;
|
||||
} u = {f};
|
||||
return u.i;
|
||||
}
|
||||
|
||||
static inline float
|
||||
asfloat (uint32_t i)
|
||||
{
|
||||
union
|
||||
{
|
||||
uint32_t i;
|
||||
float f;
|
||||
} u = {i};
|
||||
return u.f;
|
||||
}
|
||||
|
||||
static uint64_t seed = 0x0123456789abcdef;
|
||||
static uint64_t
|
||||
rand64 (void)
|
||||
{
|
||||
seed = 6364136223846793005ull * seed + 1;
|
||||
return seed ^ (seed >> 32);
|
||||
}
|
||||
|
||||
/* Uniform random in [0,n]. */
|
||||
static uint64_t
|
||||
randn (uint64_t n)
|
||||
{
|
||||
uint64_t r, m;
|
||||
|
||||
if (n == 0)
|
||||
return 0;
|
||||
n++;
|
||||
if (n == 0)
|
||||
return rand64 ();
|
||||
for (;;)
|
||||
{
|
||||
r = rand64 ();
|
||||
m = r % n;
|
||||
if (r - m <= -n)
|
||||
return m;
|
||||
}
|
||||
}
|
||||
|
||||
struct gen
|
||||
{
|
||||
uint64_t start;
|
||||
uint64_t len;
|
||||
uint64_t start2;
|
||||
uint64_t len2;
|
||||
uint64_t off;
|
||||
uint64_t step;
|
||||
uint64_t cnt;
|
||||
};
|
||||
|
||||
struct args_f1
|
||||
{
|
||||
float x;
|
||||
};
|
||||
|
||||
struct args_f2
|
||||
{
|
||||
float x;
|
||||
float x2;
|
||||
};
|
||||
|
||||
struct args_d1
|
||||
{
|
||||
double x;
|
||||
};
|
||||
|
||||
struct args_d2
|
||||
{
|
||||
double x;
|
||||
double x2;
|
||||
};
|
||||
|
||||
/* result = y + tail*2^ulpexp. */
|
||||
struct ret_f
|
||||
{
|
||||
float y;
|
||||
double tail;
|
||||
int ulpexp;
|
||||
int ex;
|
||||
int ex_may;
|
||||
};
|
||||
|
||||
struct ret_d
|
||||
{
|
||||
double y;
|
||||
double tail;
|
||||
int ulpexp;
|
||||
int ex;
|
||||
int ex_may;
|
||||
};
|
||||
|
||||
static inline uint64_t
|
||||
next1 (struct gen *g)
|
||||
{
|
||||
/* For single argument use randomized incremental steps,
|
||||
that produce dense sampling without collisions and allow
|
||||
testing all inputs in a range. */
|
||||
uint64_t r = g->start + g->off;
|
||||
g->off += g->step + randn (g->step / 2);
|
||||
if (g->off > g->len)
|
||||
g->off -= g->len; /* hack. */
|
||||
return r;
|
||||
}
|
||||
|
||||
static inline uint64_t
|
||||
next2 (uint64_t *x2, struct gen *g)
|
||||
{
|
||||
/* For two arguments use uniform random sampling. */
|
||||
uint64_t r = g->start + randn (g->len);
|
||||
*x2 = g->start2 + randn (g->len2);
|
||||
return r;
|
||||
}
|
||||
|
||||
static struct args_f1
|
||||
next_f1 (void *g)
|
||||
{
|
||||
return (struct args_f1){asfloat (next1 (g))};
|
||||
}
|
||||
|
||||
static struct args_f2
|
||||
next_f2 (void *g)
|
||||
{
|
||||
uint64_t x2;
|
||||
uint64_t x = next2 (&x2, g);
|
||||
return (struct args_f2){asfloat (x), asfloat (x2)};
|
||||
}
|
||||
|
||||
static struct args_d1
|
||||
next_d1 (void *g)
|
||||
{
|
||||
return (struct args_d1){asdouble (next1 (g))};
|
||||
}
|
||||
|
||||
static struct args_d2
|
||||
next_d2 (void *g)
|
||||
{
|
||||
uint64_t x2;
|
||||
uint64_t x = next2 (&x2, g);
|
||||
return (struct args_d2){asdouble (x), asdouble (x2)};
|
||||
}
|
||||
|
||||
struct conf
|
||||
{
|
||||
int r;
|
||||
int rc;
|
||||
int quiet;
|
||||
int mpfr;
|
||||
int fenv;
|
||||
unsigned long long n;
|
||||
double softlim;
|
||||
double errlim;
|
||||
};
|
||||
|
||||
/* Wrappers for sincos. */
|
||||
static float sincosf_sinf(float x) {(void)cosf(x); return sinf(x);}
|
||||
static float sincosf_cosf(float x) {(void)sinf(x); return cosf(x);}
|
||||
static double sincos_sin(double x) {(void)cos(x); return sin(x);}
|
||||
static double sincos_cos(double x) {(void)sin(x); return cos(x);}
|
||||
#if USE_MPFR
|
||||
static int sincos_mpfr_sin(mpfr_t y, const mpfr_t x, mpfr_rnd_t r) { mpfr_cos(y,x,r); return mpfr_sin(y,x,r); }
|
||||
static int sincos_mpfr_cos(mpfr_t y, const mpfr_t x, mpfr_rnd_t r) { mpfr_sin(y,x,r); return mpfr_cos(y,x,r); }
|
||||
#endif
|
||||
|
||||
/* A bit of a hack: call vector functions twice with the same
|
||||
input in lane 0 but a different value in other lanes: once
|
||||
with an in-range value and then with a special case value. */
|
||||
static int secondcall;
|
||||
|
||||
/* Wrappers for vector functions. */
|
||||
#if __aarch64__ && WANT_VMATH
|
||||
typedef __f32x4_t v_float;
|
||||
typedef __f64x2_t v_double;
|
||||
static const float fv[2] = {1.0f, -INFINITY};
|
||||
static const double dv[2] = {1.0, -INFINITY};
|
||||
static inline v_float argf(float x) { return (v_float){x,x,x,fv[secondcall]}; }
|
||||
static inline v_double argd(double x) { return (v_double){x,dv[secondcall]}; }
|
||||
|
||||
static float v_sinf(float x) { return __v_sinf(argf(x))[0]; }
|
||||
static float v_cosf(float x) { return __v_cosf(argf(x))[0]; }
|
||||
static float v_expf_1u(float x) { return __v_expf_1u(argf(x))[0]; }
|
||||
static float v_expf(float x) { return __v_expf(argf(x))[0]; }
|
||||
static float v_exp2f_1u(float x) { return __v_exp2f_1u(argf(x))[0]; }
|
||||
static float v_exp2f(float x) { return __v_exp2f(argf(x))[0]; }
|
||||
static float v_logf(float x) { return __v_logf(argf(x))[0]; }
|
||||
static float v_powf(float x, float y) { return __v_powf(argf(x),argf(y))[0]; }
|
||||
static double v_sin(double x) { return __v_sin(argd(x))[0]; }
|
||||
static double v_cos(double x) { return __v_cos(argd(x))[0]; }
|
||||
static double v_exp(double x) { return __v_exp(argd(x))[0]; }
|
||||
static double v_log(double x) { return __v_log(argd(x))[0]; }
|
||||
static double v_pow(double x, double y) { return __v_pow(argd(x),argd(y))[0]; }
|
||||
#ifdef __vpcs
|
||||
static float vn_sinf(float x) { return __vn_sinf(argf(x))[0]; }
|
||||
static float vn_cosf(float x) { return __vn_cosf(argf(x))[0]; }
|
||||
static float vn_expf_1u(float x) { return __vn_expf_1u(argf(x))[0]; }
|
||||
static float vn_expf(float x) { return __vn_expf(argf(x))[0]; }
|
||||
static float vn_exp2f_1u(float x) { return __vn_exp2f_1u(argf(x))[0]; }
|
||||
static float vn_exp2f(float x) { return __vn_exp2f(argf(x))[0]; }
|
||||
static float vn_logf(float x) { return __vn_logf(argf(x))[0]; }
|
||||
static float vn_powf(float x, float y) { return __vn_powf(argf(x),argf(y))[0]; }
|
||||
static double vn_sin(double x) { return __vn_sin(argd(x))[0]; }
|
||||
static double vn_cos(double x) { return __vn_cos(argd(x))[0]; }
|
||||
static double vn_exp(double x) { return __vn_exp(argd(x))[0]; }
|
||||
static double vn_log(double x) { return __vn_log(argd(x))[0]; }
|
||||
static double vn_pow(double x, double y) { return __vn_pow(argd(x),argd(y))[0]; }
|
||||
static float Z_sinf(float x) { return _ZGVnN4v_sinf(argf(x))[0]; }
|
||||
static float Z_cosf(float x) { return _ZGVnN4v_cosf(argf(x))[0]; }
|
||||
static float Z_expf(float x) { return _ZGVnN4v_expf(argf(x))[0]; }
|
||||
static float Z_exp2f(float x) { return _ZGVnN4v_exp2f(argf(x))[0]; }
|
||||
static float Z_logf(float x) { return _ZGVnN4v_logf(argf(x))[0]; }
|
||||
static float Z_powf(float x, float y) { return _ZGVnN4vv_powf(argf(x),argf(y))[0]; }
|
||||
static double Z_sin(double x) { return _ZGVnN2v_sin(argd(x))[0]; }
|
||||
static double Z_cos(double x) { return _ZGVnN2v_cos(argd(x))[0]; }
|
||||
static double Z_exp(double x) { return _ZGVnN2v_exp(argd(x))[0]; }
|
||||
static double Z_log(double x) { return _ZGVnN2v_log(argd(x))[0]; }
|
||||
static double Z_pow(double x, double y) { return _ZGVnN2vv_pow(argd(x),argd(y))[0]; }
|
||||
#endif
|
||||
#endif
|
||||
|
||||
struct fun
|
||||
{
|
||||
const char *name;
|
||||
int arity;
|
||||
int singleprec;
|
||||
int twice;
|
||||
union
|
||||
{
|
||||
float (*f1) (float);
|
||||
float (*f2) (float, float);
|
||||
double (*d1) (double);
|
||||
double (*d2) (double, double);
|
||||
} fun;
|
||||
union
|
||||
{
|
||||
double (*f1) (double);
|
||||
double (*f2) (double, double);
|
||||
long double (*d1) (long double);
|
||||
long double (*d2) (long double, long double);
|
||||
} fun_long;
|
||||
#if USE_MPFR
|
||||
union
|
||||
{
|
||||
int (*f1) (mpfr_t, const mpfr_t, mpfr_rnd_t);
|
||||
int (*f2) (mpfr_t, const mpfr_t, const mpfr_t, mpfr_rnd_t);
|
||||
int (*d1) (mpfr_t, const mpfr_t, mpfr_rnd_t);
|
||||
int (*d2) (mpfr_t, const mpfr_t, const mpfr_t, mpfr_rnd_t);
|
||||
} fun_mpfr;
|
||||
#endif
|
||||
};
|
||||
|
||||
static const struct fun fun[] = {
|
||||
#if USE_MPFR
|
||||
# define F(x, x_wrap, x_long, x_mpfr, a, s, t, twice) \
|
||||
{#x, a, s, twice, {.t = x_wrap}, {.t = x_long}, {.t = x_mpfr}},
|
||||
#else
|
||||
# define F(x, x_wrap, x_long, x_mpfr, a, s, t, twice) \
|
||||
{#x, a, s, twice, {.t = x_wrap}, {.t = x_long}},
|
||||
#endif
|
||||
#define F1(x) F (x##f, x##f, x, mpfr_##x, 1, 1, f1, 0)
|
||||
#define F2(x) F (x##f, x##f, x, mpfr_##x, 2, 1, f2, 0)
|
||||
#define D1(x) F (x, x, x##l, mpfr_##x, 1, 0, d1, 0)
|
||||
#define D2(x) F (x, x, x##l, mpfr_##x, 2, 0, d2, 0)
|
||||
F1 (sin)
|
||||
F1 (cos)
|
||||
F (sincosf_sinf, sincosf_sinf, sincos_sin, sincos_mpfr_sin, 1, 1, f1, 0)
|
||||
F (sincosf_cosf, sincosf_cosf, sincos_cos, sincos_mpfr_cos, 1, 1, f1, 0)
|
||||
F1 (exp)
|
||||
F1 (exp2)
|
||||
F1 (log)
|
||||
F1 (log2)
|
||||
F2 (pow)
|
||||
D1 (exp)
|
||||
D1 (exp2)
|
||||
D1 (log)
|
||||
D1 (log2)
|
||||
D2 (pow)
|
||||
#if WANT_VMATH
|
||||
F (__s_sinf, __s_sinf, sin, mpfr_sin, 1, 1, f1, 0)
|
||||
F (__s_cosf, __s_cosf, cos, mpfr_cos, 1, 1, f1, 0)
|
||||
F (__s_expf_1u, __s_expf_1u, exp, mpfr_exp, 1, 1, f1, 0)
|
||||
F (__s_expf, __s_expf, exp, mpfr_exp, 1, 1, f1, 0)
|
||||
F (__s_exp2f_1u, __s_exp2f_1u, exp2, mpfr_exp2, 1, 1, f1, 0)
|
||||
F (__s_exp2f, __s_exp2f, exp2, mpfr_exp2, 1, 1, f1, 0)
|
||||
F (__s_powf, __s_powf, pow, mpfr_pow, 2, 1, f2, 0)
|
||||
F (__s_logf, __s_logf, log, mpfr_log, 1, 1, f1, 0)
|
||||
F (__s_sin, __s_sin, sinl, mpfr_sin, 1, 0, d1, 0)
|
||||
F (__s_cos, __s_cos, cosl, mpfr_cos, 1, 0, d1, 0)
|
||||
F (__s_exp, __s_exp, expl, mpfr_exp, 1, 0, d1, 0)
|
||||
F (__s_log, __s_log, logl, mpfr_log, 1, 0, d1, 0)
|
||||
F (__s_pow, __s_pow, powl, mpfr_pow, 2, 0, d2, 0)
|
||||
#if __aarch64__
|
||||
F (__v_sinf, v_sinf, sin, mpfr_sin, 1, 1, f1, 1)
|
||||
F (__v_cosf, v_cosf, cos, mpfr_cos, 1, 1, f1, 1)
|
||||
F (__v_expf_1u, v_expf_1u, exp, mpfr_exp, 1, 1, f1, 1)
|
||||
F (__v_expf, v_expf, exp, mpfr_exp, 1, 1, f1, 1)
|
||||
F (__v_exp2f_1u, v_exp2f_1u, exp2, mpfr_exp2, 1, 1, f1, 1)
|
||||
F (__v_exp2f, v_exp2f, exp2, mpfr_exp2, 1, 1, f1, 1)
|
||||
F (__v_logf, v_logf, log, mpfr_log, 1, 1, f1, 1)
|
||||
F (__v_powf, v_powf, pow, mpfr_pow, 2, 1, f2, 1)
|
||||
F (__v_sin, v_sin, sinl, mpfr_sin, 1, 0, d1, 1)
|
||||
F (__v_cos, v_cos, cosl, mpfr_cos, 1, 0, d1, 1)
|
||||
F (__v_exp, v_exp, expl, mpfr_exp, 1, 0, d1, 1)
|
||||
F (__v_log, v_log, logl, mpfr_log, 1, 0, d1, 1)
|
||||
F (__v_pow, v_pow, powl, mpfr_pow, 2, 0, d2, 1)
|
||||
#ifdef __vpcs
|
||||
F (__vn_sinf, vn_sinf, sin, mpfr_sin, 1, 1, f1, 1)
|
||||
F (__vn_cosf, vn_cosf, cos, mpfr_cos, 1, 1, f1, 1)
|
||||
F (__vn_expf_1u, vn_expf_1u, exp, mpfr_exp, 1, 1, f1, 1)
|
||||
F (__vn_expf, vn_expf, exp, mpfr_exp, 1, 1, f1, 1)
|
||||
F (__vn_exp2f_1u, vn_exp2f_1u, exp2, mpfr_exp2, 1, 1, f1, 1)
|
||||
F (__vn_exp2f, vn_exp2f, exp2, mpfr_exp2, 1, 1, f1, 1)
|
||||
F (__vn_logf, vn_logf, log, mpfr_log, 1, 1, f1, 1)
|
||||
F (__vn_powf, vn_powf, pow, mpfr_pow, 2, 1, f2, 1)
|
||||
F (__vn_sin, vn_sin, sinl, mpfr_sin, 1, 0, d1, 1)
|
||||
F (__vn_cos, vn_cos, cosl, mpfr_cos, 1, 0, d1, 1)
|
||||
F (__vn_exp, vn_exp, expl, mpfr_exp, 1, 0, d1, 1)
|
||||
F (__vn_log, vn_log, logl, mpfr_log, 1, 0, d1, 1)
|
||||
F (__vn_pow, vn_pow, powl, mpfr_pow, 2, 0, d2, 1)
|
||||
F (_ZGVnN4v_sinf, Z_sinf, sin, mpfr_sin, 1, 1, f1, 1)
|
||||
F (_ZGVnN4v_cosf, Z_cosf, cos, mpfr_cos, 1, 1, f1, 1)
|
||||
F (_ZGVnN4v_expf, Z_expf, exp, mpfr_exp, 1, 1, f1, 1)
|
||||
F (_ZGVnN4v_exp2f, Z_exp2f, exp2, mpfr_exp2, 1, 1, f1, 1)
|
||||
F (_ZGVnN4v_logf, Z_logf, log, mpfr_log, 1, 1, f1, 1)
|
||||
F (_ZGVnN4vv_powf, Z_powf, pow, mpfr_pow, 2, 1, f2, 1)
|
||||
F (_ZGVnN2v_sin, Z_sin, sinl, mpfr_sin, 1, 0, d1, 1)
|
||||
F (_ZGVnN2v_cos, Z_cos, cosl, mpfr_cos, 1, 0, d1, 1)
|
||||
F (_ZGVnN2v_exp, Z_exp, expl, mpfr_exp, 1, 0, d1, 1)
|
||||
F (_ZGVnN2v_log, Z_log, logl, mpfr_log, 1, 0, d1, 1)
|
||||
F (_ZGVnN2vv_pow, Z_pow, powl, mpfr_pow, 2, 0, d2, 1)
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#undef F
|
||||
#undef F1
|
||||
#undef F2
|
||||
#undef D1
|
||||
#undef D2
|
||||
{0}};
|
||||
|
||||
/* Boilerplate for generic calls. */
|
||||
|
||||
static inline int
|
||||
ulpscale_f (float x)
|
||||
{
|
||||
int e = asuint (x) >> 23 & 0xff;
|
||||
if (!e)
|
||||
e++;
|
||||
return e - 0x7f - 23;
|
||||
}
|
||||
static inline int
|
||||
ulpscale_d (double x)
|
||||
{
|
||||
int e = asuint64 (x) >> 52 & 0x7ff;
|
||||
if (!e)
|
||||
e++;
|
||||
return e - 0x3ff - 52;
|
||||
}
|
||||
static inline float
|
||||
call_f1 (const struct fun *f, struct args_f1 a)
|
||||
{
|
||||
return f->fun.f1 (a.x);
|
||||
}
|
||||
static inline float
|
||||
call_f2 (const struct fun *f, struct args_f2 a)
|
||||
{
|
||||
return f->fun.f2 (a.x, a.x2);
|
||||
}
|
||||
|
||||
static inline double
|
||||
call_d1 (const struct fun *f, struct args_d1 a)
|
||||
{
|
||||
return f->fun.d1 (a.x);
|
||||
}
|
||||
static inline double
|
||||
call_d2 (const struct fun *f, struct args_d2 a)
|
||||
{
|
||||
return f->fun.d2 (a.x, a.x2);
|
||||
}
|
||||
static inline double
|
||||
call_long_f1 (const struct fun *f, struct args_f1 a)
|
||||
{
|
||||
return f->fun_long.f1 (a.x);
|
||||
}
|
||||
static inline double
|
||||
call_long_f2 (const struct fun *f, struct args_f2 a)
|
||||
{
|
||||
return f->fun_long.f2 (a.x, a.x2);
|
||||
}
|
||||
static inline long double
|
||||
call_long_d1 (const struct fun *f, struct args_d1 a)
|
||||
{
|
||||
return f->fun_long.d1 (a.x);
|
||||
}
|
||||
static inline long double
|
||||
call_long_d2 (const struct fun *f, struct args_d2 a)
|
||||
{
|
||||
return f->fun_long.d2 (a.x, a.x2);
|
||||
}
|
||||
static inline void
|
||||
printcall_f1 (const struct fun *f, struct args_f1 a)
|
||||
{
|
||||
printf ("%s(%a)", f->name, a.x);
|
||||
}
|
||||
static inline void
|
||||
printcall_f2 (const struct fun *f, struct args_f2 a)
|
||||
{
|
||||
printf ("%s(%a, %a)", f->name, a.x, a.x2);
|
||||
}
|
||||
static inline void
|
||||
printcall_d1 (const struct fun *f, struct args_d1 a)
|
||||
{
|
||||
printf ("%s(%a)", f->name, a.x);
|
||||
}
|
||||
static inline void
|
||||
printcall_d2 (const struct fun *f, struct args_d2 a)
|
||||
{
|
||||
printf ("%s(%a, %a)", f->name, a.x, a.x2);
|
||||
}
|
||||
static inline void
|
||||
printgen_f1 (const struct fun *f, struct gen *gen)
|
||||
{
|
||||
printf ("%s in [%a;%a]", f->name, asfloat (gen->start),
|
||||
asfloat (gen->start + gen->len));
|
||||
}
|
||||
static inline void
|
||||
printgen_f2 (const struct fun *f, struct gen *gen)
|
||||
{
|
||||
printf ("%s in [%a;%a] x [%a;%a]", f->name, asfloat (gen->start),
|
||||
asfloat (gen->start + gen->len), asfloat (gen->start2),
|
||||
asfloat (gen->start2 + gen->len2));
|
||||
}
|
||||
static inline void
|
||||
printgen_d1 (const struct fun *f, struct gen *gen)
|
||||
{
|
||||
printf ("%s in [%a;%a]", f->name, asdouble (gen->start),
|
||||
asdouble (gen->start + gen->len));
|
||||
}
|
||||
static inline void
|
||||
printgen_d2 (const struct fun *f, struct gen *gen)
|
||||
{
|
||||
printf ("%s in [%a;%a] x [%a;%a]", f->name, asdouble (gen->start),
|
||||
asdouble (gen->start + gen->len), asdouble (gen->start2),
|
||||
asdouble (gen->start2 + gen->len2));
|
||||
}
|
||||
|
||||
#define reduce_f1(a, f, op) (f (a.x))
|
||||
#define reduce_f2(a, f, op) (f (a.x) op f (a.x2))
|
||||
#define reduce_d1(a, f, op) (f (a.x))
|
||||
#define reduce_d2(a, f, op) (f (a.x) op f (a.x2))
|
||||
|
||||
#ifndef IEEE_754_2008_SNAN
|
||||
# define IEEE_754_2008_SNAN 1
|
||||
#endif
|
||||
static inline int
|
||||
issignaling_f (float x)
|
||||
{
|
||||
uint32_t ix = asuint (x);
|
||||
if (!IEEE_754_2008_SNAN)
|
||||
return (ix & 0x7fc00000) == 0x7fc00000;
|
||||
return 2 * (ix ^ 0x00400000) > 2u * 0x7fc00000;
|
||||
}
|
||||
static inline int
|
||||
issignaling_d (double x)
|
||||
{
|
||||
uint64_t ix = asuint64 (x);
|
||||
if (!IEEE_754_2008_SNAN)
|
||||
return (ix & 0x7ff8000000000000) == 0x7ff8000000000000;
|
||||
return 2 * (ix ^ 0x0008000000000000) > 2 * 0x7ff8000000000000ULL;
|
||||
}
|
||||
|
||||
#if USE_MPFR
|
||||
static mpfr_rnd_t
|
||||
rmap (int r)
|
||||
{
|
||||
switch (r)
|
||||
{
|
||||
case FE_TONEAREST:
|
||||
return MPFR_RNDN;
|
||||
case FE_TOWARDZERO:
|
||||
return MPFR_RNDZ;
|
||||
case FE_UPWARD:
|
||||
return MPFR_RNDU;
|
||||
case FE_DOWNWARD:
|
||||
return MPFR_RNDD;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
#define prec_mpfr_f 50
|
||||
#define prec_mpfr_d 80
|
||||
#define prec_f 24
|
||||
#define prec_d 53
|
||||
#define emin_f -148
|
||||
#define emin_d -1073
|
||||
#define emax_f 128
|
||||
#define emax_d 1024
|
||||
static inline int
|
||||
call_mpfr_f1 (mpfr_t y, const struct fun *f, struct args_f1 a, mpfr_rnd_t r)
|
||||
{
|
||||
MPFR_DECL_INIT (x, prec_f);
|
||||
mpfr_set_flt (x, a.x, MPFR_RNDN);
|
||||
return f->fun_mpfr.f1 (y, x, r);
|
||||
}
|
||||
static inline int
|
||||
call_mpfr_f2 (mpfr_t y, const struct fun *f, struct args_f2 a, mpfr_rnd_t r)
|
||||
{
|
||||
MPFR_DECL_INIT (x, prec_f);
|
||||
MPFR_DECL_INIT (x2, prec_f);
|
||||
mpfr_set_flt (x, a.x, MPFR_RNDN);
|
||||
mpfr_set_flt (x2, a.x2, MPFR_RNDN);
|
||||
return f->fun_mpfr.f2 (y, x, x2, r);
|
||||
}
|
||||
static inline int
|
||||
call_mpfr_d1 (mpfr_t y, const struct fun *f, struct args_d1 a, mpfr_rnd_t r)
|
||||
{
|
||||
MPFR_DECL_INIT (x, prec_d);
|
||||
mpfr_set_d (x, a.x, MPFR_RNDN);
|
||||
return f->fun_mpfr.d1 (y, x, r);
|
||||
}
|
||||
static inline int
|
||||
call_mpfr_d2 (mpfr_t y, const struct fun *f, struct args_d2 a, mpfr_rnd_t r)
|
||||
{
|
||||
MPFR_DECL_INIT (x, prec_d);
|
||||
MPFR_DECL_INIT (x2, prec_d);
|
||||
mpfr_set_d (x, a.x, MPFR_RNDN);
|
||||
mpfr_set_d (x2, a.x2, MPFR_RNDN);
|
||||
return f->fun_mpfr.d2 (y, x, x2, r);
|
||||
}
|
||||
#endif
|
||||
|
||||
#define float_f float
|
||||
#define double_f double
|
||||
#define copysign_f copysignf
|
||||
#define nextafter_f nextafterf
|
||||
#define fabs_f fabsf
|
||||
#define asuint_f asuint
|
||||
#define asfloat_f asfloat
|
||||
#define scalbn_f scalbnf
|
||||
#define lscalbn_f scalbn
|
||||
#define halfinf_f 0x1p127f
|
||||
#define min_normal_f 0x1p-126f
|
||||
|
||||
#define float_d double
|
||||
#define double_d long double
|
||||
#define copysign_d copysign
|
||||
#define nextafter_d nextafter
|
||||
#define fabs_d fabs
|
||||
#define asuint_d asuint64
|
||||
#define asfloat_d asdouble
|
||||
#define scalbn_d scalbn
|
||||
#define lscalbn_d scalbnl
|
||||
#define halfinf_d 0x1p1023
|
||||
#define min_normal_d 0x1p-1022
|
||||
|
||||
#define NEW_RT
|
||||
#define RT(x) x##_f
|
||||
#define T(x) x##_f1
|
||||
#include "ulp.h"
|
||||
#undef T
|
||||
#define T(x) x##_f2
|
||||
#include "ulp.h"
|
||||
#undef T
|
||||
#undef RT
|
||||
|
||||
#define NEW_RT
|
||||
#define RT(x) x##_d
|
||||
#define T(x) x##_d1
|
||||
#include "ulp.h"
|
||||
#undef T
|
||||
#define T(x) x##_d2
|
||||
#include "ulp.h"
|
||||
#undef T
|
||||
#undef RT
|
||||
|
||||
static void
|
||||
usage (void)
|
||||
{
|
||||
puts ("./ulp [-q] [-m] [-f] [-r nudz] [-l soft-ulplimit] [-e ulplimit] func "
|
||||
"lo [hi [x lo2 hi2] [count]]");
|
||||
puts ("Compares func against a higher precision implementation in [lo; hi].");
|
||||
puts ("-q: quiet.");
|
||||
puts ("-m: use mpfr even if faster method is available.");
|
||||
puts ("-f: disable fenv testing (rounding modes and exceptions).");
|
||||
puts ("Supported func:");
|
||||
for (const struct fun *f = fun; f->name; f++)
|
||||
printf ("\t%s\n", f->name);
|
||||
exit (1);
|
||||
}
|
||||
|
||||
static int
|
||||
cmp (const struct fun *f, struct gen *gen, const struct conf *conf)
|
||||
{
|
||||
int r = 1;
|
||||
if (f->arity == 1 && f->singleprec)
|
||||
r = cmp_f1 (f, gen, conf);
|
||||
else if (f->arity == 2 && f->singleprec)
|
||||
r = cmp_f2 (f, gen, conf);
|
||||
else if (f->arity == 1 && !f->singleprec)
|
||||
r = cmp_d1 (f, gen, conf);
|
||||
else if (f->arity == 2 && !f->singleprec)
|
||||
r = cmp_d2 (f, gen, conf);
|
||||
else
|
||||
usage ();
|
||||
return r;
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
getnum (const char *s, int singleprec)
|
||||
{
|
||||
// int i;
|
||||
uint64_t sign = 0;
|
||||
// char buf[12];
|
||||
|
||||
if (s[0] == '+')
|
||||
s++;
|
||||
else if (s[0] == '-')
|
||||
{
|
||||
sign = singleprec ? 1ULL << 31 : 1ULL << 63;
|
||||
s++;
|
||||
}
|
||||
/* 0xXXXX is treated as bit representation, '-' flips the sign bit. */
|
||||
if (s[0] == '0' && tolower (s[1]) == 'x' && strchr (s, 'p') == 0)
|
||||
return sign ^ strtoull (s, 0, 0);
|
||||
// /* SNaN, QNaN, NaN, Inf. */
|
||||
// for (i=0; s[i] && i < sizeof buf; i++)
|
||||
// buf[i] = tolower(s[i]);
|
||||
// buf[i] = 0;
|
||||
// if (strcmp(buf, "snan") == 0)
|
||||
// return sign | (singleprec ? 0x7fa00000 : 0x7ff4000000000000);
|
||||
// if (strcmp(buf, "qnan") == 0 || strcmp(buf, "nan") == 0)
|
||||
// return sign | (singleprec ? 0x7fc00000 : 0x7ff8000000000000);
|
||||
// if (strcmp(buf, "inf") == 0 || strcmp(buf, "infinity") == 0)
|
||||
// return sign | (singleprec ? 0x7f800000 : 0x7ff0000000000000);
|
||||
/* Otherwise assume it's a floating-point literal. */
|
||||
return sign
|
||||
| (singleprec ? asuint (strtof (s, 0)) : asuint64 (strtod (s, 0)));
|
||||
}
|
||||
|
||||
static void
|
||||
parsegen (struct gen *g, int argc, char *argv[], const struct fun *f)
|
||||
{
|
||||
int singleprec = f->singleprec;
|
||||
int arity = f->arity;
|
||||
uint64_t a, b, a2, b2, n;
|
||||
if (argc < 1)
|
||||
usage ();
|
||||
b = a = getnum (argv[0], singleprec);
|
||||
n = 0;
|
||||
if (argc > 1 && strcmp (argv[1], "x") == 0)
|
||||
{
|
||||
argc -= 2;
|
||||
argv += 2;
|
||||
}
|
||||
else if (argc > 1)
|
||||
{
|
||||
b = getnum (argv[1], singleprec);
|
||||
if (argc > 2 && strcmp (argv[2], "x") == 0)
|
||||
{
|
||||
argc -= 3;
|
||||
argv += 3;
|
||||
}
|
||||
}
|
||||
b2 = a2 = getnum (argv[0], singleprec);
|
||||
if (argc > 1)
|
||||
b2 = getnum (argv[1], singleprec);
|
||||
if (argc > 2)
|
||||
n = strtoull (argv[2], 0, 0);
|
||||
if (argc > 3)
|
||||
usage ();
|
||||
//printf("ab %lx %lx ab2 %lx %lx n %lu\n", a, b, a2, b2, n);
|
||||
if (arity == 1)
|
||||
{
|
||||
g->start = a;
|
||||
g->len = b - a;
|
||||
if (n - 1 > b - a)
|
||||
n = b - a + 1;
|
||||
g->off = 0;
|
||||
g->step = n ? (g->len + 1) / n : 1;
|
||||
g->start2 = g->len2 = 0;
|
||||
g->cnt = n;
|
||||
}
|
||||
else if (arity == 2)
|
||||
{
|
||||
g->start = a;
|
||||
g->len = b - a;
|
||||
g->off = g->step = 0;
|
||||
g->start2 = a2;
|
||||
g->len2 = b2 - a2;
|
||||
g->cnt = n;
|
||||
}
|
||||
else
|
||||
usage ();
|
||||
}
|
||||
|
||||
int
|
||||
main (int argc, char *argv[])
|
||||
{
|
||||
const struct fun *f;
|
||||
struct gen gen;
|
||||
struct conf conf;
|
||||
conf.rc = 'n';
|
||||
conf.quiet = 0;
|
||||
conf.mpfr = 0;
|
||||
conf.fenv = 1;
|
||||
conf.softlim = 0;
|
||||
conf.errlim = INFINITY;
|
||||
for (;;)
|
||||
{
|
||||
argc--;
|
||||
argv++;
|
||||
if (argc < 1)
|
||||
usage ();
|
||||
if (argv[0][0] != '-')
|
||||
break;
|
||||
switch (argv[0][1])
|
||||
{
|
||||
case 'e':
|
||||
argc--;
|
||||
argv++;
|
||||
if (argc < 1)
|
||||
usage ();
|
||||
conf.errlim = strtod (argv[0], 0);
|
||||
break;
|
||||
case 'f':
|
||||
conf.fenv = 0;
|
||||
break;
|
||||
case 'l':
|
||||
argc--;
|
||||
argv++;
|
||||
if (argc < 1)
|
||||
usage ();
|
||||
conf.softlim = strtod (argv[0], 0);
|
||||
break;
|
||||
case 'm':
|
||||
conf.mpfr = 1;
|
||||
break;
|
||||
case 'q':
|
||||
conf.quiet = 1;
|
||||
break;
|
||||
case 'r':
|
||||
conf.rc = argv[0][2];
|
||||
if (!conf.rc)
|
||||
{
|
||||
argc--;
|
||||
argv++;
|
||||
if (argc < 1)
|
||||
usage ();
|
||||
conf.rc = argv[0][0];
|
||||
}
|
||||
break;
|
||||
default:
|
||||
usage ();
|
||||
}
|
||||
}
|
||||
switch (conf.rc)
|
||||
{
|
||||
case 'n':
|
||||
conf.r = FE_TONEAREST;
|
||||
break;
|
||||
case 'u':
|
||||
conf.r = FE_UPWARD;
|
||||
break;
|
||||
case 'd':
|
||||
conf.r = FE_DOWNWARD;
|
||||
break;
|
||||
case 'z':
|
||||
conf.r = FE_TOWARDZERO;
|
||||
break;
|
||||
default:
|
||||
usage ();
|
||||
}
|
||||
for (f = fun; f->name; f++)
|
||||
if (strcmp (argv[0], f->name) == 0)
|
||||
break;
|
||||
if (!f->name)
|
||||
usage ();
|
||||
if (!f->singleprec && LDBL_MANT_DIG == DBL_MANT_DIG)
|
||||
conf.mpfr = 1; /* Use mpfr if long double has no extra precision. */
|
||||
if (!USE_MPFR && conf.mpfr)
|
||||
{
|
||||
puts ("mpfr is not available.");
|
||||
return 0;
|
||||
}
|
||||
argc--;
|
||||
argv++;
|
||||
parsegen (&gen, argc, argv, f);
|
||||
conf.n = gen.cnt;
|
||||
return cmp (f, &gen, &conf);
|
||||
}
|
||||
+362
@@ -0,0 +1,362 @@
|
||||
/*
|
||||
* Generic functions for ULP error estimation.
|
||||
*
|
||||
* Copyright (c) 2019, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
/* For each different math function type,
|
||||
T(x) should add a different suffix to x.
|
||||
RT(x) should add a return type specific suffix to x. */
|
||||
|
||||
#ifdef NEW_RT
|
||||
#undef NEW_RT
|
||||
|
||||
# if USE_MPFR
|
||||
static int RT(ulpscale_mpfr) (mpfr_t x, int t)
|
||||
{
|
||||
/* TODO: pow of 2 cases. */
|
||||
if (mpfr_regular_p (x))
|
||||
{
|
||||
mpfr_exp_t e = mpfr_get_exp (x) - RT(prec);
|
||||
if (e < RT(emin))
|
||||
e = RT(emin) - 1;
|
||||
if (e > RT(emax) - RT(prec))
|
||||
e = RT(emax) - RT(prec);
|
||||
return e;
|
||||
}
|
||||
if (mpfr_zero_p (x))
|
||||
return RT(emin) - 1;
|
||||
if (mpfr_inf_p (x))
|
||||
return RT(emax) - RT(prec);
|
||||
/* NaN. */
|
||||
return 0;
|
||||
}
|
||||
# endif
|
||||
|
||||
/* Difference between exact result and closest real number that
|
||||
gets rounded to got, i.e. error before rounding, for a correctly
|
||||
rounded result the difference is 0. */
|
||||
static double RT(ulperr) (RT(float) got, const struct RT(ret) * p, int r)
|
||||
{
|
||||
RT(float) want = p->y;
|
||||
RT(float) d;
|
||||
double e;
|
||||
|
||||
if (RT(asuint) (got) == RT(asuint) (want))
|
||||
return 0.0;
|
||||
if (signbit (got) != signbit (want))
|
||||
/* May have false positives with NaN. */
|
||||
//return isnan(got) && isnan(want) ? 0 : INFINITY;
|
||||
return INFINITY;
|
||||
if (!isfinite (want) || !isfinite (got))
|
||||
{
|
||||
if (isnan (got) != isnan (want))
|
||||
return INFINITY;
|
||||
if (isnan (want))
|
||||
return 0;
|
||||
if (isinf (got))
|
||||
{
|
||||
got = RT(copysign) (RT(halfinf), got);
|
||||
want *= 0.5f;
|
||||
}
|
||||
if (isinf (want))
|
||||
{
|
||||
want = RT(copysign) (RT(halfinf), want);
|
||||
got *= 0.5f;
|
||||
}
|
||||
}
|
||||
if (r == FE_TONEAREST)
|
||||
{
|
||||
// TODO: incorrect when got vs want cross a powof2 boundary
|
||||
/* error = got > want
|
||||
? got - want - tail ulp - 0.5 ulp
|
||||
: got - want - tail ulp + 0.5 ulp; */
|
||||
d = got - want;
|
||||
e = d > 0 ? -p->tail - 0.5 : -p->tail + 0.5;
|
||||
}
|
||||
else
|
||||
{
|
||||
if ((r == FE_DOWNWARD && got < want) || (r == FE_UPWARD && got > want)
|
||||
|| (r == FE_TOWARDZERO && fabs (got) < fabs (want)))
|
||||
got = RT(nextafter) (got, want);
|
||||
d = got - want;
|
||||
e = -p->tail;
|
||||
}
|
||||
return RT(scalbn) (d, -p->ulpexp) + e;
|
||||
}
|
||||
|
||||
static int RT(isok) (RT(float) ygot, int exgot, RT(float) ywant, int exwant,
|
||||
int exmay)
|
||||
{
|
||||
return RT(asuint) (ygot) == RT(asuint) (ywant)
|
||||
&& ((exgot ^ exwant) & ~exmay) == 0;
|
||||
}
|
||||
|
||||
static int RT(isok_nofenv) (RT(float) ygot, RT(float) ywant)
|
||||
{
|
||||
return RT(asuint) (ygot) == RT(asuint) (ywant);
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline void T(call_fenv) (const struct fun *f, struct T(args) a, int r,
|
||||
RT(float) * y, int *ex)
|
||||
{
|
||||
if (r != FE_TONEAREST)
|
||||
fesetround (r);
|
||||
feclearexcept (FE_ALL_EXCEPT);
|
||||
*y = T(call) (f, a);
|
||||
*ex = fetestexcept (FE_ALL_EXCEPT);
|
||||
if (r != FE_TONEAREST)
|
||||
fesetround (FE_TONEAREST);
|
||||
}
|
||||
|
||||
static inline void T(call_nofenv) (const struct fun *f, struct T(args) a,
|
||||
int r, RT(float) * y, int *ex)
|
||||
{
|
||||
*y = T(call) (f, a);
|
||||
*ex = 0;
|
||||
}
|
||||
|
||||
static inline int T(call_long_fenv) (const struct fun *f, struct T(args) a,
|
||||
int r, struct RT(ret) * p,
|
||||
RT(float) ygot, int exgot)
|
||||
{
|
||||
if (r != FE_TONEAREST)
|
||||
fesetround (r);
|
||||
feclearexcept (FE_ALL_EXCEPT);
|
||||
volatile struct T(args) va = a; // TODO: barrier
|
||||
a = va;
|
||||
RT(double) yl = T(call_long) (f, a);
|
||||
p->y = (RT(float)) yl;
|
||||
volatile RT(float) vy = p->y; // TODO: barrier
|
||||
(void) vy;
|
||||
p->ex = fetestexcept (FE_ALL_EXCEPT);
|
||||
if (r != FE_TONEAREST)
|
||||
fesetround (FE_TONEAREST);
|
||||
p->ex_may = FE_INEXACT;
|
||||
if (RT(isok) (ygot, exgot, p->y, p->ex, p->ex_may))
|
||||
return 1;
|
||||
p->ulpexp = RT(ulpscale) (p->y);
|
||||
if (isinf (p->y))
|
||||
p->tail = RT(lscalbn) (yl - (RT(double)) 2 * RT(halfinf), -p->ulpexp);
|
||||
else
|
||||
p->tail = RT(lscalbn) (yl - p->y, -p->ulpexp);
|
||||
if (RT(fabs) (p->y) < RT(min_normal))
|
||||
{
|
||||
/* TODO: subnormal result is treated as undeflow even if it's
|
||||
exact since call_long may not raise inexact correctly. */
|
||||
if (p->y != 0 || (p->ex & FE_INEXACT))
|
||||
p->ex |= FE_UNDERFLOW | FE_INEXACT;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
static inline int T(call_long_nofenv) (const struct fun *f, struct T(args) a,
|
||||
int r, struct RT(ret) * p,
|
||||
RT(float) ygot, int exgot)
|
||||
{
|
||||
RT(double) yl = T(call_long) (f, a);
|
||||
p->y = (RT(float)) yl;
|
||||
if (RT(isok_nofenv) (ygot, p->y))
|
||||
return 1;
|
||||
p->ulpexp = RT(ulpscale) (p->y);
|
||||
if (isinf (p->y))
|
||||
p->tail = RT(lscalbn) (yl - (RT(double)) 2 * RT(halfinf), -p->ulpexp);
|
||||
else
|
||||
p->tail = RT(lscalbn) (yl - p->y, -p->ulpexp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* There are nan input args and all quiet. */
|
||||
static inline int T(qnanpropagation) (struct T(args) a)
|
||||
{
|
||||
return T(reduce) (a, isnan, ||) && !T(reduce) (a, RT(issignaling), ||);
|
||||
}
|
||||
static inline RT(float) T(sum) (struct T(args) a)
|
||||
{
|
||||
return T(reduce) (a, , +);
|
||||
}
|
||||
|
||||
/* returns 1 if the got result is ok. */
|
||||
static inline int T(call_mpfr_fix) (const struct fun *f, struct T(args) a,
|
||||
int r_fenv, struct RT(ret) * p,
|
||||
RT(float) ygot, int exgot)
|
||||
{
|
||||
#if USE_MPFR
|
||||
int t, t2;
|
||||
mpfr_rnd_t r = rmap (r_fenv);
|
||||
MPFR_DECL_INIT(my, RT(prec_mpfr));
|
||||
MPFR_DECL_INIT(mr, RT(prec));
|
||||
MPFR_DECL_INIT(me, RT(prec_mpfr));
|
||||
mpfr_clear_flags ();
|
||||
t = T(call_mpfr) (my, f, a, r);
|
||||
/* Double rounding. */
|
||||
t2 = mpfr_set (mr, my, r);
|
||||
if (t2)
|
||||
t = t2;
|
||||
mpfr_set_emin (RT(emin));
|
||||
mpfr_set_emax (RT(emax));
|
||||
t = mpfr_check_range (mr, t, r);
|
||||
t = mpfr_subnormalize (mr, t, r);
|
||||
mpfr_set_emax (MPFR_EMAX_DEFAULT);
|
||||
mpfr_set_emin (MPFR_EMIN_DEFAULT);
|
||||
p->y = mpfr_get_d (mr, r);
|
||||
p->ex = t ? FE_INEXACT : 0;
|
||||
p->ex_may = FE_INEXACT;
|
||||
if (mpfr_underflow_p () && (p->ex & FE_INEXACT))
|
||||
/* TODO: handle before and after rounding uflow cases. */
|
||||
p->ex |= FE_UNDERFLOW;
|
||||
if (mpfr_overflow_p ())
|
||||
p->ex |= FE_OVERFLOW | FE_INEXACT;
|
||||
if (mpfr_divby0_p ())
|
||||
p->ex |= FE_DIVBYZERO;
|
||||
//if (mpfr_erangeflag_p ())
|
||||
// p->ex |= FE_INVALID;
|
||||
if (!mpfr_nanflag_p () && RT(isok) (ygot, exgot, p->y, p->ex, p->ex_may))
|
||||
return 1;
|
||||
if (mpfr_nanflag_p () && !T(qnanpropagation) (a))
|
||||
p->ex |= FE_INVALID;
|
||||
p->ulpexp = RT(ulpscale_mpfr) (my, t);
|
||||
if (!isfinite (p->y))
|
||||
{
|
||||
p->tail = 0;
|
||||
if (isnan (p->y))
|
||||
{
|
||||
/* If an input was nan keep its sign. */
|
||||
p->y = T(sum) (a);
|
||||
if (!isnan (p->y))
|
||||
p->y = (p->y - p->y) / (p->y - p->y);
|
||||
return RT(isok) (ygot, exgot, p->y, p->ex, p->ex_may);
|
||||
}
|
||||
mpfr_set_si_2exp (mr, signbit (p->y) ? -1 : 1, 1024, MPFR_RNDN);
|
||||
if (mpfr_cmpabs (my, mr) >= 0)
|
||||
return RT(isok) (ygot, exgot, p->y, p->ex, p->ex_may);
|
||||
}
|
||||
mpfr_sub (me, my, mr, MPFR_RNDN);
|
||||
mpfr_mul_2si (me, me, -p->ulpexp, MPFR_RNDN);
|
||||
p->tail = mpfr_get_d (me, MPFR_RNDN);
|
||||
return 0;
|
||||
#else
|
||||
abort ();
|
||||
#endif
|
||||
}
|
||||
|
||||
static int T(cmp) (const struct fun *f, struct gen *gen,
|
||||
const struct conf *conf)
|
||||
{
|
||||
double maxerr = 0;
|
||||
uint64_t cnt = 0;
|
||||
uint64_t cnt1 = 0;
|
||||
uint64_t cnt2 = 0;
|
||||
uint64_t cntfail = 0;
|
||||
int r = conf->r;
|
||||
int use_mpfr = conf->mpfr;
|
||||
int fenv = conf->fenv;
|
||||
for (;;)
|
||||
{
|
||||
struct RT(ret) want;
|
||||
struct T(args) a = T(next) (gen);
|
||||
int exgot;
|
||||
int exgot2;
|
||||
RT(float) ygot;
|
||||
RT(float) ygot2;
|
||||
int fail = 0;
|
||||
if (fenv)
|
||||
T(call_fenv) (f, a, r, &ygot, &exgot);
|
||||
else
|
||||
T(call_nofenv) (f, a, r, &ygot, &exgot);
|
||||
if (f->twice) {
|
||||
secondcall = 1;
|
||||
if (fenv)
|
||||
T(call_fenv) (f, a, r, &ygot2, &exgot2);
|
||||
else
|
||||
T(call_nofenv) (f, a, r, &ygot2, &exgot2);
|
||||
secondcall = 0;
|
||||
if (RT(asuint) (ygot) != RT(asuint) (ygot2))
|
||||
{
|
||||
fail = 1;
|
||||
cntfail++;
|
||||
T(printcall) (f, a);
|
||||
printf (" got %a then %a for same input\n", ygot, ygot2);
|
||||
}
|
||||
}
|
||||
cnt++;
|
||||
int ok = use_mpfr
|
||||
? T(call_mpfr_fix) (f, a, r, &want, ygot, exgot)
|
||||
: (fenv ? T(call_long_fenv) (f, a, r, &want, ygot, exgot)
|
||||
: T(call_long_nofenv) (f, a, r, &want, ygot, exgot));
|
||||
if (!ok)
|
||||
{
|
||||
int print = 0;
|
||||
double err = RT(ulperr) (ygot, &want, r);
|
||||
double abserr = fabs (err);
|
||||
// TODO: count errors below accuracy limit.
|
||||
if (abserr > 0)
|
||||
cnt1++;
|
||||
if (abserr > 1)
|
||||
cnt2++;
|
||||
if (abserr > conf->errlim)
|
||||
{
|
||||
print = 1;
|
||||
if (!fail)
|
||||
{
|
||||
fail = 1;
|
||||
cntfail++;
|
||||
}
|
||||
}
|
||||
if (abserr > maxerr)
|
||||
{
|
||||
maxerr = abserr;
|
||||
if (!conf->quiet && abserr > conf->softlim)
|
||||
print = 1;
|
||||
}
|
||||
if (print)
|
||||
{
|
||||
T(printcall) (f, a);
|
||||
// TODO: inf ulp handling
|
||||
printf (" got %a want %a %+g ulp err %g\n", ygot, want.y,
|
||||
want.tail, err);
|
||||
}
|
||||
int diff = fenv ? exgot ^ want.ex : 0;
|
||||
if (fenv && (diff & ~want.ex_may))
|
||||
{
|
||||
if (!fail)
|
||||
{
|
||||
fail = 1;
|
||||
cntfail++;
|
||||
}
|
||||
T(printcall) (f, a);
|
||||
printf (" is %a %+g ulp, got except 0x%0x", want.y, want.tail,
|
||||
exgot);
|
||||
if (diff & exgot)
|
||||
printf (" wrongly set: 0x%x", diff & exgot);
|
||||
if (diff & ~exgot)
|
||||
printf (" wrongly clear: 0x%x", diff & ~exgot);
|
||||
putchar ('\n');
|
||||
}
|
||||
}
|
||||
if (cnt >= conf->n)
|
||||
break;
|
||||
if (!conf->quiet && cnt % 0x100000 == 0)
|
||||
printf ("progress: %6.3f%% cnt %llu cnt1 %llu cnt2 %llu cntfail %llu "
|
||||
"maxerr %g\n",
|
||||
100.0 * cnt / conf->n, (unsigned long long) cnt,
|
||||
(unsigned long long) cnt1, (unsigned long long) cnt2,
|
||||
(unsigned long long) cntfail, maxerr);
|
||||
}
|
||||
double cc = cnt;
|
||||
if (cntfail)
|
||||
printf ("FAIL ");
|
||||
else
|
||||
printf ("PASS ");
|
||||
T(printgen) (f, gen);
|
||||
printf (" round %c errlim %g maxerr %g %s cnt %llu cnt1 %llu %g%% cnt2 %llu "
|
||||
"%g%% cntfail %llu %g%%\n",
|
||||
conf->rc, conf->errlim,
|
||||
maxerr, conf->r == FE_TONEAREST ? "+0.5" : "+1.0",
|
||||
(unsigned long long) cnt,
|
||||
(unsigned long long) cnt1, 100.0 * cnt1 / cc,
|
||||
(unsigned long long) cnt2, 100.0 * cnt2 / cc,
|
||||
(unsigned long long) cntfail, 100.0 * cntfail / cc);
|
||||
return !!cntfail;
|
||||
}
|
||||
@@ -0,0 +1,31 @@
|
||||
// polynomial for approximating cos(x)
|
||||
//
|
||||
// Copyright (c) 2019, Arm Limited.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
deg = 8; // polynomial degree
|
||||
a = -pi/4; // interval
|
||||
b = pi/4;
|
||||
|
||||
// find even polynomial with minimal abs error compared to cos(x)
|
||||
|
||||
f = cos(x);
|
||||
|
||||
// return p that minimizes |f(x) - poly(x) - x^d*p(x)|
|
||||
approx = proc(poly,d) {
|
||||
return remez(f(x)-poly(x), deg-d, [a;b], x^d, 1e-10);
|
||||
};
|
||||
|
||||
// first coeff is fixed, iteratively find optimal double prec coeffs
|
||||
poly = 1;
|
||||
for i from 1 to deg/2 do {
|
||||
p = roundcoefficients(approx(poly,2*i), [|D ...|]);
|
||||
poly = poly + x^(2*i)*coeff(p,0);
|
||||
};
|
||||
|
||||
display = hexadecimal;
|
||||
print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
|
||||
print("abs error:", accurateinfnorm(f(x)-poly(x), [a;b], 30));
|
||||
print("in [",a,b,"]");
|
||||
print("coeffs:");
|
||||
for i from 0 to deg do coeff(poly,i);
|
||||
@@ -0,0 +1,35 @@
|
||||
// polynomial for approximating e^x
|
||||
//
|
||||
// Copyright (c) 2019, Arm Limited.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
deg = 5; // poly degree
|
||||
N = 128; // table entries
|
||||
b = log(2)/(2*N); // interval
|
||||
b = b + b*0x1p-16; // increase interval for non-nearest rounding (TOINT_NARROW)
|
||||
a = -b;
|
||||
|
||||
// find polynomial with minimal abs error
|
||||
|
||||
// return p that minimizes |exp(x) - poly(x) - x^d*p(x)|
|
||||
approx = proc(poly,d) {
|
||||
return remez(exp(x)-poly(x), deg-d, [a;b], x^d, 1e-10);
|
||||
};
|
||||
|
||||
// first 2 coeffs are fixed, iteratively find optimal double prec coeffs
|
||||
poly = 1 + x;
|
||||
for i from 2 to deg do {
|
||||
p = roundcoefficients(approx(poly,i), [|D ...|]);
|
||||
poly = poly + x^i*coeff(p,0);
|
||||
};
|
||||
|
||||
display = hexadecimal;
|
||||
print("rel error:", accurateinfnorm(1-poly(x)/exp(x), [a;b], 30));
|
||||
print("abs error:", accurateinfnorm(exp(x)-poly(x), [a;b], 30));
|
||||
print("in [",a,b,"]");
|
||||
// double interval error for non-nearest rounding
|
||||
print("rel2 error:", accurateinfnorm(1-poly(x)/exp(x), [2*a;2*b], 30));
|
||||
print("abs2 error:", accurateinfnorm(exp(x)-poly(x), [2*a;2*b], 30));
|
||||
print("in [",2*a,2*b,"]");
|
||||
print("coeffs:");
|
||||
for i from 0 to deg do coeff(poly,i);
|
||||
@@ -0,0 +1,48 @@
|
||||
// polynomial for approximating 2^x
|
||||
//
|
||||
// Copyright (c) 2019, Arm Limited.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
// exp2f parameters
|
||||
deg = 3; // poly degree
|
||||
N = 32; // table entries
|
||||
b = 1/(2*N); // interval
|
||||
a = -b;
|
||||
|
||||
//// exp2 parameters
|
||||
//deg = 5; // poly degree
|
||||
//N = 128; // table entries
|
||||
//b = 1/(2*N); // interval
|
||||
//a = -b;
|
||||
|
||||
// find polynomial with minimal relative error
|
||||
|
||||
f = 2^x;
|
||||
|
||||
// return p that minimizes |f(x) - poly(x) - x^d*p(x)|/|f(x)|
|
||||
approx = proc(poly,d) {
|
||||
return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10);
|
||||
};
|
||||
// return p that minimizes |f(x) - poly(x) - x^d*p(x)|
|
||||
approx_abs = proc(poly,d) {
|
||||
return remez(f(x) - poly(x), deg-d, [a;b], x^d, 1e-10);
|
||||
};
|
||||
|
||||
// first coeff is fixed, iteratively find optimal double prec coeffs
|
||||
poly = 1;
|
||||
for i from 1 to deg do {
|
||||
p = roundcoefficients(approx(poly,i), [|D ...|]);
|
||||
// p = roundcoefficients(approx_abs(poly,i), [|D ...|]);
|
||||
poly = poly + x^i*coeff(p,0);
|
||||
};
|
||||
|
||||
display = hexadecimal;
|
||||
print("rel error:", accurateinfnorm(1-poly(x)/2^x, [a;b], 30));
|
||||
print("abs error:", accurateinfnorm(2^x-poly(x), [a;b], 30));
|
||||
print("in [",a,b,"]");
|
||||
// double interval error for non-nearest rounding:
|
||||
print("rel2 error:", accurateinfnorm(1-poly(x)/2^x, [2*a;2*b], 30));
|
||||
print("abs2 error:", accurateinfnorm(2^x-poly(x), [2*a;2*b], 30));
|
||||
print("in [",2*a,2*b,"]");
|
||||
print("coeffs:");
|
||||
for i from 0 to deg do coeff(poly,i);
|
||||
@@ -0,0 +1,35 @@
|
||||
// polynomial for approximating log(1+x)
|
||||
//
|
||||
// Copyright (c) 2019, Arm Limited.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
deg = 12; // poly degree
|
||||
// |log(1+x)| > 0x1p-4 outside the interval
|
||||
a = -0x1p-4;
|
||||
b = 0x1.09p-4;
|
||||
|
||||
// find log(1+x)/x polynomial with minimal relative error
|
||||
// (minimal relative error polynomial for log(1+x) is the same * x)
|
||||
deg = deg-1; // because of /x
|
||||
|
||||
// f = log(1+x)/x; using taylor series
|
||||
f = 0;
|
||||
for i from 0 to 60 do { f = f + (-x)^i/(i+1); };
|
||||
|
||||
// return p that minimizes |f(x) - poly(x) - x^d*p(x)|/|f(x)|
|
||||
approx = proc(poly,d) {
|
||||
return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10);
|
||||
};
|
||||
|
||||
// first coeff is fixed, iteratively find optimal double prec coeffs
|
||||
poly = 1;
|
||||
for i from 1 to deg do {
|
||||
p = roundcoefficients(approx(poly,i), [|D ...|]);
|
||||
poly = poly + x^i*coeff(p,0);
|
||||
};
|
||||
|
||||
display = hexadecimal;
|
||||
print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
|
||||
print("in [",a,b,"]");
|
||||
print("coeffs:");
|
||||
for i from 0 to deg do coeff(poly,i);
|
||||
@@ -0,0 +1,42 @@
|
||||
// polynomial for approximating log2(1+x)
|
||||
//
|
||||
// Copyright (c) 2019, Arm Limited.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
deg = 11; // poly degree
|
||||
// |log2(1+x)| > 0x1p-4 outside the interval
|
||||
a = -0x1.5b51p-5;
|
||||
b = 0x1.6ab2p-5;
|
||||
|
||||
ln2 = evaluate(log(2),0);
|
||||
invln2hi = double(1/ln2 + 0x1p21) - 0x1p21; // round away last 21 bits
|
||||
invln2lo = double(1/ln2 - invln2hi);
|
||||
|
||||
// find log2(1+x)/x polynomial with minimal relative error
|
||||
// (minimal relative error polynomial for log2(1+x) is the same * x)
|
||||
deg = deg-1; // because of /x
|
||||
|
||||
// f = log(1+x)/x; using taylor series
|
||||
f = 0;
|
||||
for i from 0 to 60 do { f = f + (-x)^i/(i+1); };
|
||||
f = f/ln2;
|
||||
|
||||
// return p that minimizes |f(x) - poly(x) - x^d*p(x)|/|f(x)|
|
||||
approx = proc(poly,d) {
|
||||
return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10);
|
||||
};
|
||||
|
||||
// first coeff is fixed, iteratively find optimal double prec coeffs
|
||||
poly = invln2hi + invln2lo;
|
||||
for i from 1 to deg do {
|
||||
p = roundcoefficients(approx(poly,i), [|D ...|]);
|
||||
poly = poly + x^i*coeff(p,0);
|
||||
};
|
||||
|
||||
display = hexadecimal;
|
||||
print("invln2hi:", invln2hi);
|
||||
print("invln2lo:", invln2lo);
|
||||
print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
|
||||
print("in [",a,b,"]");
|
||||
print("coeffs:");
|
||||
for i from 0 to deg do coeff(poly,i);
|
||||
@@ -0,0 +1,41 @@
|
||||
// polynomial for approximating log2(1+x)
|
||||
//
|
||||
// Copyright (c) 2019, Arm Limited.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
deg = 7; // poly degree
|
||||
// interval ~= 1/(2*N), where N is the table entries
|
||||
a= -0x1.f45p-8;
|
||||
b= 0x1.f45p-8;
|
||||
|
||||
ln2 = evaluate(log(2),0);
|
||||
invln2hi = double(1/ln2 + 0x1p21) - 0x1p21; // round away last 21 bits
|
||||
invln2lo = double(1/ln2 - invln2hi);
|
||||
|
||||
// find log2(1+x) polynomial with minimal absolute error
|
||||
f = log(1+x)/ln2;
|
||||
|
||||
// return p that minimizes |f(x) - poly(x) - x^d*p(x)|
|
||||
approx = proc(poly,d) {
|
||||
return remez(f(x) - poly(x), deg-d, [a;b], x^d, 1e-10);
|
||||
};
|
||||
|
||||
// first coeff is fixed, iteratively find optimal double prec coeffs
|
||||
poly = x*(invln2lo + invln2hi);
|
||||
for i from 2 to deg do {
|
||||
p = roundcoefficients(approx(poly,i), [|D ...|]);
|
||||
poly = poly + x^i*coeff(p,0);
|
||||
};
|
||||
|
||||
display = hexadecimal;
|
||||
print("invln2hi:", invln2hi);
|
||||
print("invln2lo:", invln2lo);
|
||||
print("abs error:", accurateinfnorm(f(x)-poly(x), [a;b], 30));
|
||||
//// relative error computation fails if f(0)==0
|
||||
//// g = f(x)/x = log2(1+x)/x; using taylor series
|
||||
//g = 0;
|
||||
//for i from 0 to 60 do { g = g + (-x)^i/(i+1)/ln2; };
|
||||
//print("rel error:", accurateinfnorm(1-(poly(x)/x)/g(x), [a;b], 30));
|
||||
print("in [",a,b,"]");
|
||||
print("coeffs:");
|
||||
for i from 0 to deg do coeff(poly,i);
|
||||
@@ -0,0 +1,35 @@
|
||||
// polynomial for approximating log(1+x)
|
||||
//
|
||||
// Copyright (c) 2019, Arm Limited.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
deg = 6; // poly degree
|
||||
// interval ~= 1/(2*N), where N is the table entries
|
||||
a = -0x1.fp-9;
|
||||
b = 0x1.fp-9;
|
||||
|
||||
// find log(1+x) polynomial with minimal absolute error
|
||||
f = log(1+x);
|
||||
|
||||
// return p that minimizes |f(x) - poly(x) - x^d*p(x)|
|
||||
approx = proc(poly,d) {
|
||||
return remez(f(x) - poly(x), deg-d, [a;b], x^d, 1e-10);
|
||||
};
|
||||
|
||||
// first coeff is fixed, iteratively find optimal double prec coeffs
|
||||
poly = x;
|
||||
for i from 2 to deg do {
|
||||
p = roundcoefficients(approx(poly,i), [|D ...|]);
|
||||
poly = poly + x^i*coeff(p,0);
|
||||
};
|
||||
|
||||
display = hexadecimal;
|
||||
print("abs error:", accurateinfnorm(f(x)-poly(x), [a;b], 30));
|
||||
// relative error computation fails if f(0)==0
|
||||
// g = f(x)/x = log(1+x)/x; using taylor series
|
||||
g = 0;
|
||||
for i from 0 to 60 do { g = g + (-x)^i/(i+1); };
|
||||
print("rel error:", accurateinfnorm(1-poly(x)/x/g(x), [a;b], 30));
|
||||
print("in [",a,b,"]");
|
||||
print("coeffs:");
|
||||
for i from 0 to deg do coeff(poly,i);
|
||||
@@ -0,0 +1,61 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
# ULP error plot tool.
|
||||
#
|
||||
# Copyright (c) 2019, Arm Limited.
|
||||
# SPDX-License-Identifier: MIT
|
||||
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
import sys
|
||||
import re
|
||||
|
||||
# example usage:
|
||||
# build/bin/ulp -e .0001 log 0.5 2.0 2345678 | math/tools/plot.py
|
||||
|
||||
def fhex(s):
|
||||
return float.fromhex(s)
|
||||
|
||||
def parse(f):
|
||||
xs = []
|
||||
gs = []
|
||||
ys = []
|
||||
es = []
|
||||
# Has to match the format used in ulp.c
|
||||
r = re.compile(r'[^ (]+\(([^ )]*)\) got ([^ ]+) want ([^ ]+) [^ ]+ ulp err ([^ ]+)')
|
||||
for line in f:
|
||||
m = r.match(line)
|
||||
if m:
|
||||
x = fhex(m.group(1))
|
||||
g = fhex(m.group(2))
|
||||
y = fhex(m.group(3))
|
||||
e = float(m.group(4))
|
||||
xs.append(x)
|
||||
gs.append(g)
|
||||
ys.append(y)
|
||||
es.append(e)
|
||||
elif line.startswith('PASS') or line.startswith('FAIL'):
|
||||
# Print the summary line
|
||||
print(line)
|
||||
return xs, gs, ys, es
|
||||
|
||||
def plot(xs, gs, ys, es):
|
||||
if len(xs) < 2:
|
||||
print('not enough samples')
|
||||
return
|
||||
a = min(xs)
|
||||
b = max(xs)
|
||||
fig, (ax0,ax1) = plt.subplots(nrows=2)
|
||||
es = np.abs(es) # ignore the sign
|
||||
emax = max(es)
|
||||
ax0.text(a+(b-a)*0.7, emax*0.8, '%s\n%g'%(emax.hex(),emax))
|
||||
ax0.plot(xs,es,'r.')
|
||||
ax0.grid()
|
||||
ax1.plot(xs,ys,'r.',label='want')
|
||||
ax1.plot(xs,gs,'b.',label='got')
|
||||
ax1.grid()
|
||||
ax1.legend()
|
||||
plt.show()
|
||||
|
||||
xs, gs, ys, es = parse(sys.stdin)
|
||||
plot(xs, gs, ys, es)
|
||||
+1334
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,37 @@
|
||||
// polynomial for approximating sin(x)
|
||||
//
|
||||
// Copyright (c) 2019, Arm Limited.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
deg = 7; // polynomial degree
|
||||
a = -pi/4; // interval
|
||||
b = pi/4;
|
||||
|
||||
// find even polynomial with minimal abs error compared to sin(x)/x
|
||||
|
||||
// account for /x
|
||||
deg = deg-1;
|
||||
|
||||
// f = sin(x)/x;
|
||||
f = 1;
|
||||
c = 1;
|
||||
for i from 1 to 60 do { c = 2*i*(2*i + 1)*c; f = f + (-1)^i*x^(2*i)/c; };
|
||||
|
||||
// return p that minimizes |f(x) - poly(x) - x^d*p(x)|
|
||||
approx = proc(poly,d) {
|
||||
return remez(f(x)-poly(x), deg-d, [a;b], x^d, 1e-10);
|
||||
};
|
||||
|
||||
// first coeff is fixed, iteratively find optimal double prec coeffs
|
||||
poly = 1;
|
||||
for i from 1 to deg/2 do {
|
||||
p = roundcoefficients(approx(poly,2*i), [|D ...|]);
|
||||
poly = poly + x^(2*i)*coeff(p,0);
|
||||
};
|
||||
|
||||
display = hexadecimal;
|
||||
print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
|
||||
print("abs error:", accurateinfnorm(sin(x)-x*poly(x), [a;b], 30));
|
||||
print("in [",a,b,"]");
|
||||
print("coeffs:");
|
||||
for i from 0 to deg do coeff(poly,i);
|
||||
@@ -0,0 +1,30 @@
|
||||
// polynomial for approximating e^x
|
||||
//
|
||||
// Copyright (c) 2019, Arm Limited.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
deg = 4; // poly degree
|
||||
N = 128; // table entries
|
||||
b = log(2)/(2*N); // interval
|
||||
a = -b;
|
||||
|
||||
// find polynomial with minimal abs error
|
||||
|
||||
// return p that minimizes |exp(x) - poly(x) - x^d*p(x)|
|
||||
approx = proc(poly,d) {
|
||||
return remez(exp(x)-poly(x), deg-d, [a;b], x^d, 1e-10);
|
||||
};
|
||||
|
||||
// first 2 coeffs are fixed, iteratively find optimal double prec coeffs
|
||||
poly = 1 + x;
|
||||
for i from 2 to deg do {
|
||||
p = roundcoefficients(approx(poly,i), [|D ...|]);
|
||||
poly = poly + x^i*coeff(p,0);
|
||||
};
|
||||
|
||||
display = hexadecimal;
|
||||
print("rel error:", accurateinfnorm(1-poly(x)/exp(x), [a;b], 30));
|
||||
print("abs error:", accurateinfnorm(exp(x)-poly(x), [a;b], 30));
|
||||
print("in [",a,b,"]");
|
||||
print("coeffs:");
|
||||
for i from 0 to deg do coeff(poly,i);
|
||||
@@ -0,0 +1,34 @@
|
||||
// polynomial used for __v_log(x)
|
||||
//
|
||||
// Copyright (c) 2019, Arm Limited.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
deg = 6; // poly degree
|
||||
a = -0x1.fc1p-9;
|
||||
b = 0x1.009p-8;
|
||||
|
||||
// find log(1+x)/x polynomial with minimal relative error
|
||||
// (minimal relative error polynomial for log(1+x) is the same * x)
|
||||
deg = deg-1; // because of /x
|
||||
|
||||
// f = log(1+x)/x; using taylor series
|
||||
f = 0;
|
||||
for i from 0 to 60 do { f = f + (-x)^i/(i+1); };
|
||||
|
||||
// return p that minimizes |f(x) - poly(x) - x^d*p(x)|/|f(x)|
|
||||
approx = proc(poly,d) {
|
||||
return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10);
|
||||
};
|
||||
|
||||
// first coeff is fixed, iteratively find optimal double prec coeffs
|
||||
poly = 1;
|
||||
for i from 1 to deg do {
|
||||
p = roundcoefficients(approx(poly,i), [|D ...|]);
|
||||
poly = poly + x^i*coeff(p,0);
|
||||
};
|
||||
|
||||
display = hexadecimal;
|
||||
print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
|
||||
print("in [",a,b,"]");
|
||||
print("coeffs:");
|
||||
for i from 0 to deg do coeff(poly,i);
|
||||
@@ -0,0 +1,36 @@
|
||||
// polynomial for approximating sin(x)
|
||||
//
|
||||
// Copyright (c) 2019, Arm Limited.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
deg = 15; // polynomial degree
|
||||
a = -pi/2; // interval
|
||||
b = pi/2;
|
||||
|
||||
// find even polynomial with minimal abs error compared to sin(x)/x
|
||||
|
||||
// account for /x
|
||||
deg = deg-1;
|
||||
|
||||
// f = sin(x)/x;
|
||||
f = 1;
|
||||
c = 1;
|
||||
for i from 1 to 60 do { c = 2*i*(2*i + 1)*c; f = f + (-1)^i*x^(2*i)/c; };
|
||||
|
||||
// return p that minimizes |f(x) - poly(x) - x^d*p(x)|
|
||||
approx = proc(poly,d) {
|
||||
return remez(f(x)-poly(x), deg-d, [a;b], x^d, 1e-10);
|
||||
};
|
||||
|
||||
// first coeff is fixed, iteratively find optimal double prec coeffs
|
||||
poly = 1;
|
||||
for i from 1 to deg/2 do {
|
||||
p = roundcoefficients(approx(poly,2*i), [|D ...|]);
|
||||
poly = poly + x^(2*i)*coeff(p,0);
|
||||
};
|
||||
|
||||
display = hexadecimal;
|
||||
print("abs error:", accurateinfnorm(sin(x)-x*poly(x), [a;b], 30));
|
||||
print("in [",a,b,"]");
|
||||
print("coeffs:");
|
||||
for i from 0 to deg do coeff(poly,i);
|
||||
@@ -0,0 +1,87 @@
|
||||
/*
|
||||
* Double-precision vector cos function.
|
||||
*
|
||||
* Copyright (c) 2019, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "mathlib.h"
|
||||
#include "v_math.h"
|
||||
#if V_SUPPORTED
|
||||
|
||||
static const double Poly[] = {
|
||||
/* worst-case error is 3.5 ulp.
|
||||
abs error: 0x1.be222a58p-53 in [-pi/2, pi/2]. */
|
||||
-0x1.9f4a9c8b21dc9p-41,
|
||||
0x1.60e88a10163f2p-33,
|
||||
-0x1.ae6361b7254e7p-26,
|
||||
0x1.71de382e8d62bp-19,
|
||||
-0x1.a01a019aeb4ffp-13,
|
||||
0x1.111111110b25ep-7,
|
||||
-0x1.55555555554c3p-3,
|
||||
};
|
||||
|
||||
#define C7 v_f64 (Poly[0])
|
||||
#define C6 v_f64 (Poly[1])
|
||||
#define C5 v_f64 (Poly[2])
|
||||
#define C4 v_f64 (Poly[3])
|
||||
#define C3 v_f64 (Poly[4])
|
||||
#define C2 v_f64 (Poly[5])
|
||||
#define C1 v_f64 (Poly[6])
|
||||
|
||||
#define InvPi v_f64 (0x1.45f306dc9c883p-2)
|
||||
#define HalfPi v_f64 (0x1.921fb54442d18p+0)
|
||||
#define Pi1 v_f64 (0x1.921fb54442d18p+1)
|
||||
#define Pi2 v_f64 (0x1.1a62633145c06p-53)
|
||||
#define Pi3 v_f64 (0x1.c1cd129024e09p-106)
|
||||
#define Shift v_f64 (0x1.8p52)
|
||||
#define RangeVal v_f64 (0x1p23)
|
||||
#define AbsMask v_u64 (0x7fffffffffffffff)
|
||||
|
||||
VPCS_ATTR
|
||||
__attribute__ ((noinline)) static v_f64_t
|
||||
specialcase (v_f64_t x, v_f64_t y, v_u64_t cmp)
|
||||
{
|
||||
return v_call_f64 (cos, x, y, cmp);
|
||||
}
|
||||
|
||||
VPCS_ATTR
|
||||
v_f64_t
|
||||
V_NAME(cos) (v_f64_t x)
|
||||
{
|
||||
v_f64_t n, r, r2, y;
|
||||
v_u64_t odd, cmp;
|
||||
|
||||
r = v_as_f64_u64 (v_as_u64_f64 (x) & AbsMask);
|
||||
cmp = v_cond_u64 (v_as_u64_f64 (r) >= v_as_u64_f64 (RangeVal));
|
||||
|
||||
/* n = rint((|x|+pi/2)/pi) - 0.5. */
|
||||
n = v_fma_f64 (InvPi, r + HalfPi, Shift);
|
||||
odd = v_as_u64_f64 (n) << 63;
|
||||
n -= Shift;
|
||||
n -= v_f64 (0.5);
|
||||
|
||||
/* r = |x| - n*pi (range reduction into -pi/2 .. pi/2). */
|
||||
r = v_fma_f64 (-Pi1, n, r);
|
||||
r = v_fma_f64 (-Pi2, n, r);
|
||||
r = v_fma_f64 (-Pi3, n, r);
|
||||
|
||||
/* sin(r) poly approx. */
|
||||
r2 = r * r;
|
||||
y = v_fma_f64 (C7, r2, C6);
|
||||
y = v_fma_f64 (y, r2, C5);
|
||||
y = v_fma_f64 (y, r2, C4);
|
||||
y = v_fma_f64 (y, r2, C3);
|
||||
y = v_fma_f64 (y, r2, C2);
|
||||
y = v_fma_f64 (y, r2, C1);
|
||||
y = v_fma_f64 (y * r2, r, r);
|
||||
|
||||
/* sign. */
|
||||
y = v_as_f64_u64 (v_as_u64_f64 (y) ^ odd);
|
||||
|
||||
if (unlikely (v_any_u64 (cmp)))
|
||||
return specialcase (x, y, cmp);
|
||||
return y;
|
||||
}
|
||||
VPCS_ALIAS
|
||||
#endif
|
||||
@@ -0,0 +1,76 @@
|
||||
/*
|
||||
* Single-precision vector cos function.
|
||||
*
|
||||
* Copyright (c) 2019, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "mathlib.h"
|
||||
#include "v_math.h"
|
||||
#if V_SUPPORTED
|
||||
|
||||
static const float Poly[] = {
|
||||
/* 1.886 ulp error */
|
||||
0x1.5b2e76p-19f,
|
||||
-0x1.9f42eap-13f,
|
||||
0x1.110df4p-7f,
|
||||
-0x1.555548p-3f,
|
||||
};
|
||||
#define Pi1 v_f32 (0x1.921fb6p+1f)
|
||||
#define Pi2 v_f32 (-0x1.777a5cp-24f)
|
||||
#define Pi3 v_f32 (-0x1.ee59dap-49f)
|
||||
#define A3 v_f32 (Poly[3])
|
||||
#define A5 v_f32 (Poly[2])
|
||||
#define A7 v_f32 (Poly[1])
|
||||
#define A9 v_f32 (Poly[0])
|
||||
#define RangeVal v_f32 (0x1p20f)
|
||||
#define InvPi v_f32 (0x1.45f306p-2f)
|
||||
#define Shift v_f32 (0x1.8p+23f)
|
||||
#define AbsMask v_u32 (0x7fffffff)
|
||||
#define HalfPi v_f32 (0x1.921fb6p0f)
|
||||
|
||||
VPCS_ATTR
|
||||
static v_f32_t
|
||||
specialcase (v_f32_t x, v_f32_t y, v_u32_t cmp)
|
||||
{
|
||||
/* Fall back to scalar code. */
|
||||
return v_call_f32 (cosf, x, y, cmp);
|
||||
}
|
||||
|
||||
VPCS_ATTR
|
||||
v_f32_t
|
||||
V_NAME(cosf) (v_f32_t x)
|
||||
{
|
||||
v_f32_t n, r, r2, y;
|
||||
v_u32_t odd, cmp;
|
||||
|
||||
r = v_as_f32_u32 (v_as_u32_f32 (x) & AbsMask);
|
||||
cmp = v_cond_u32 (v_as_u32_f32 (r) >= v_as_u32_f32 (RangeVal));
|
||||
|
||||
/* n = rint((|x|+pi/2)/pi) - 0.5 */
|
||||
n = v_fma_f32 (InvPi, r + HalfPi, Shift);
|
||||
odd = v_as_u32_f32 (n) << 31;
|
||||
n -= Shift;
|
||||
n -= v_f32 (0.5f);
|
||||
|
||||
/* r = |x| - n*pi (range reduction into -pi/2 .. pi/2) */
|
||||
r = v_fma_f32 (-Pi1, n, r);
|
||||
r = v_fma_f32 (-Pi2, n, r);
|
||||
r = v_fma_f32 (-Pi3, n, r);
|
||||
|
||||
/* y = sin(r) */
|
||||
r2 = r * r;
|
||||
y = v_fma_f32 (A9, r2, A7);
|
||||
y = v_fma_f32 (y, r2, A5);
|
||||
y = v_fma_f32 (y, r2, A3);
|
||||
y = v_fma_f32 (y * r2, r, r);
|
||||
|
||||
/* sign fix */
|
||||
y = v_as_f32_u32 (v_as_u32_f32 (y) ^ odd);
|
||||
|
||||
if (unlikely (v_any_u32 (cmp)))
|
||||
return specialcase (x, y, cmp);
|
||||
return y;
|
||||
}
|
||||
VPCS_ALIAS
|
||||
#endif
|
||||
@@ -0,0 +1,94 @@
|
||||
/*
|
||||
* Double-precision vector e^x function.
|
||||
*
|
||||
* Copyright (c) 2019, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "mathlib.h"
|
||||
#include "v_math.h"
|
||||
#if V_SUPPORTED
|
||||
#include "v_exp.h"
|
||||
|
||||
#if V_EXP_TABLE_BITS == 7
|
||||
/* maxerr: 1.88 +0.5 ulp
|
||||
rel error: 1.4337*2^-53
|
||||
abs error: 1.4299*2^-53 in [ -ln2/256, ln2/256 ]. */
|
||||
#define C1 v_f64 (0x1.ffffffffffd43p-2)
|
||||
#define C2 v_f64 (0x1.55555c75adbb2p-3)
|
||||
#define C3 v_f64 (0x1.55555da646206p-5)
|
||||
#define InvLn2 v_f64 (0x1.71547652b82fep7) /* N/ln2. */
|
||||
#define Ln2hi v_f64 (0x1.62e42fefa39efp-8) /* ln2/N. */
|
||||
#define Ln2lo v_f64 (0x1.abc9e3b39803f3p-63)
|
||||
#elif V_EXP_TABLE_BITS == 8
|
||||
/* maxerr: 0.54 +0.5 ulp
|
||||
rel error: 1.4318*2^-58
|
||||
abs error: 1.4299*2^-58 in [ -ln2/512, ln2/512 ]. */
|
||||
#define C1 v_f64 (0x1.fffffffffffd4p-2)
|
||||
#define C2 v_f64 (0x1.5555571d6b68cp-3)
|
||||
#define C3 v_f64 (0x1.5555576a59599p-5)
|
||||
#define InvLn2 v_f64 (0x1.71547652b82fep8)
|
||||
#define Ln2hi v_f64 (0x1.62e42fefa39efp-9)
|
||||
#define Ln2lo v_f64 (0x1.abc9e3b39803f3p-64)
|
||||
#endif
|
||||
|
||||
#define N (1 << V_EXP_TABLE_BITS)
|
||||
#define Tab __v_exp_data
|
||||
#define IndexMask v_u64 (N - 1)
|
||||
#define Shift v_f64 (0x1.8p+52)
|
||||
#define Thres v_f64 (704.0)
|
||||
|
||||
VPCS_ATTR
|
||||
static v_f64_t
|
||||
specialcase (v_f64_t s, v_f64_t y, v_f64_t n)
|
||||
{
|
||||
v_f64_t absn = v_abs_f64 (n);
|
||||
|
||||
/* 2^(n/N) may overflow, break it up into s1*s2. */
|
||||
v_u64_t b = v_cond_u64 (n <= v_f64 (0.0)) & v_u64 (0x6000000000000000);
|
||||
v_f64_t s1 = v_as_f64_u64 (v_u64 (0x7000000000000000) - b);
|
||||
v_f64_t s2 = v_as_f64_u64 (v_as_u64_f64 (s) - v_u64 (0x3010000000000000) + b);
|
||||
v_u64_t cmp = v_cond_u64 (absn > v_f64 (1280.0 * N));
|
||||
v_f64_t r1 = s1 * s1;
|
||||
v_f64_t r0 = v_fma_f64 (y, s2, s2) * s1;
|
||||
return v_as_f64_u64 ((cmp & v_as_u64_f64 (r1)) | (~cmp & v_as_u64_f64 (r0)));
|
||||
}
|
||||
|
||||
VPCS_ATTR
|
||||
v_f64_t
|
||||
V_NAME(exp) (v_f64_t x)
|
||||
{
|
||||
v_f64_t n, r, r2, s, y, z;
|
||||
v_u64_t cmp, u, e, i;
|
||||
|
||||
cmp = v_cond_u64 (v_abs_f64 (x) > Thres);
|
||||
|
||||
/* n = round(x/(ln2/N)). */
|
||||
z = v_fma_f64 (x, InvLn2, Shift);
|
||||
u = v_as_u64_f64 (z);
|
||||
n = z - Shift;
|
||||
|
||||
/* r = x - n*ln2/N. */
|
||||
r = x;
|
||||
r = v_fma_f64 (-Ln2hi, n, r);
|
||||
r = v_fma_f64 (-Ln2lo, n, r);
|
||||
|
||||
e = u << (52 - V_EXP_TABLE_BITS);
|
||||
i = u & IndexMask;
|
||||
|
||||
/* y = exp(r) - 1 ~= r + C1 r^2 + C2 r^3 + C3 r^4. */
|
||||
r2 = r * r;
|
||||
y = v_fma_f64 (C2, r, C1);
|
||||
y = v_fma_f64 (C3, r2, y);
|
||||
y = v_fma_f64 (y, r2, r);
|
||||
|
||||
/* s = 2^(n/N). */
|
||||
u = v_lookup_u64 (Tab, i);
|
||||
s = v_as_f64_u64 (u + e);
|
||||
|
||||
if (unlikely (v_any_u64 (cmp)))
|
||||
return specialcase (s, y, n);
|
||||
return v_fma_f64 (y, s, s);
|
||||
}
|
||||
VPCS_ALIAS
|
||||
#endif
|
||||
@@ -0,0 +1,14 @@
|
||||
/*
|
||||
* Declarations for double-precision e^x vector function.
|
||||
*
|
||||
* Copyright (c) 2019, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "v_math.h"
|
||||
#if WANT_VMATH
|
||||
|
||||
#define V_EXP_TABLE_BITS 7
|
||||
|
||||
extern const u64_t __v_exp_data[1 << V_EXP_TABLE_BITS] HIDDEN;
|
||||
#endif
|
||||
@@ -0,0 +1,78 @@
|
||||
/*
|
||||
* Single-precision vector 2^x function.
|
||||
*
|
||||
* Copyright (c) 2019, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "mathlib.h"
|
||||
#include "v_math.h"
|
||||
#if V_SUPPORTED
|
||||
|
||||
static const float Poly[] = {
|
||||
/* maxerr: 1.962 ulp. */
|
||||
0x1.59977ap-10f,
|
||||
0x1.3ce9e4p-7f,
|
||||
0x1.c6bd32p-5f,
|
||||
0x1.ebf9bcp-3f,
|
||||
0x1.62e422p-1f,
|
||||
};
|
||||
#define C0 v_f32 (Poly[0])
|
||||
#define C1 v_f32 (Poly[1])
|
||||
#define C2 v_f32 (Poly[2])
|
||||
#define C3 v_f32 (Poly[3])
|
||||
#define C4 v_f32 (Poly[4])
|
||||
|
||||
#define Shift v_f32 (0x1.8p23f)
|
||||
|
||||
VPCS_ATTR
|
||||
static v_f32_t
|
||||
specialcase (v_f32_t poly, v_f32_t n, v_u32_t e, v_f32_t absn, v_u32_t cmp1, v_f32_t scale)
|
||||
{
|
||||
/* 2^n may overflow, break it up into s1*s2. */
|
||||
v_u32_t b = v_cond_u32 (n <= v_f32 (0.0f)) & v_u32 (0x82000000);
|
||||
v_f32_t s1 = v_as_f32_u32 (v_u32 (0x7f000000) + b);
|
||||
v_f32_t s2 = v_as_f32_u32 (e - b);
|
||||
v_u32_t cmp2 = v_cond_u32 (absn > v_f32 (192.0f));
|
||||
v_u32_t r2 = v_as_u32_f32 (s1 * s1);
|
||||
v_u32_t r1 = v_as_u32_f32 (v_fma_f32 (poly, s2, s2) * s1);
|
||||
/* Similar to r1 but avoids double rounding in the subnormal range. */
|
||||
v_u32_t r0 = v_as_u32_f32 (v_fma_f32 (poly, scale, scale));
|
||||
return v_as_f32_u32 ((cmp2 & r2) | (~cmp2 & cmp1 & r1) | (~cmp1 & r0));
|
||||
}
|
||||
|
||||
VPCS_ATTR
|
||||
v_f32_t
|
||||
V_NAME(exp2f) (v_f32_t x)
|
||||
{
|
||||
v_f32_t n, r, r2, scale, p, q, poly, absn;
|
||||
v_u32_t cmp, e;
|
||||
|
||||
/* exp2(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)]
|
||||
x = n + r, with r in [-1/2, 1/2]. */
|
||||
#if 0
|
||||
v_f32_t z;
|
||||
z = x + Shift;
|
||||
n = z - Shift;
|
||||
r = x - n;
|
||||
e = v_as_u32_f32 (z) << 23;
|
||||
#else
|
||||
n = v_round_f32 (x);
|
||||
r = x - n;
|
||||
e = v_as_u32_s32 (v_round_s32 (x)) << 23;
|
||||
#endif
|
||||
scale = v_as_f32_u32 (e + v_u32 (0x3f800000));
|
||||
absn = v_abs_f32 (n);
|
||||
cmp = v_cond_u32 (absn > v_f32 (126.0f));
|
||||
r2 = r * r;
|
||||
p = v_fma_f32 (C0, r, C1);
|
||||
q = v_fma_f32 (C2, r, C3);
|
||||
q = v_fma_f32 (p, r2, q);
|
||||
p = C4 * r;
|
||||
poly = v_fma_f32 (q, r2, p);
|
||||
if (unlikely (v_any_u32 (cmp)))
|
||||
return specialcase (poly, n, e, absn, cmp, scale);
|
||||
return v_fma_f32 (poly, scale, scale);
|
||||
}
|
||||
VPCS_ALIAS
|
||||
#endif
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user