update openharmony 1.0.1

2026-07-01 06:42:05 -04:00 · 2021-03-11 18:37:56 +08:00
parent 8987b1fdf0
commit 155329e2ce
200 changed files with 77182 additions and 75 deletions
@@ -0,0 +1,3 @@
+build/
+.DS_Store
+config.mk
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 1999-2019, Arm Limited.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
@@ -0,0 +1,89 @@
+# Makefile - requires GNU make
+#
+# Copyright (c) 2018-2019, Arm Limited.
+# SPDX-License-Identifier: MIT
+
+srcdir = .
+prefix = /usr
+bindir = $(prefix)/bin
+libdir = $(prefix)/lib
+includedir = $(prefix)/include
+
+# Configure these in config.mk, do not make changes in this file.
+SUBS = math string networking
+HOST_CC = cc
+HOST_CFLAGS = -std=c99 -O2
+HOST_LDFLAGS =
+HOST_LDLIBS =
+EMULATOR =
+CPPFLAGS =
+CFLAGS = -std=c99 -O2
+CFLAGS_SHARED = -fPIC
+CFLAGS_ALL = -Ibuild/include $(CPPFLAGS) $(CFLAGS)
+LDFLAGS =
+LDLIBS =
+AR = $(CROSS_COMPILE)ar
+RANLIB = $(CROSS_COMPILE)ranlib
+INSTALL = install
+
+all:
+
+-include config.mk
+
+$(foreach sub,$(SUBS),$(eval include $(srcdir)/$(sub)/Dir.mk))
+
+# Required targets of subproject foo:
+#   all-foo
+#   check-foo
+#   clean-foo
+#   install-foo
+# Required make variables of subproject foo:
+#   foo-files: Built files (all in build/).
+# Make variables used by subproject foo:
+#   foo-...: Variables defined in foo/Dir.mk or by config.mk.
+
+all: $(SUBS:%=all-%)
+
+ALL_FILES = $(foreach sub,$(SUBS),$($(sub)-files))
+DIRS = $(sort $(patsubst %/,%,$(dir $(ALL_FILES))))
+$(ALL_FILES): | $(DIRS)
+$(DIRS):
+	mkdir -p $@
+
+$(filter %.os,$(ALL_FILES)): CFLAGS_ALL += $(CFLAGS_SHARED)
+
+build/%.o: $(srcdir)/%.S
+	$(CC) $(CFLAGS_ALL) -c -o $@ $<
+
+build/%.o: $(srcdir)/%.c
+	$(CC) $(CFLAGS_ALL) -c -o $@ $<
+
+build/%.os: $(srcdir)/%.S
+	$(CC) $(CFLAGS_ALL) -c -o $@ $<
+
+build/%.os: $(srcdir)/%.c
+	$(CC) $(CFLAGS_ALL) -c -o $@ $<
+
+clean: $(SUBS:%=clean-%)
+	rm -rf build
+
+distclean: clean
+	rm -f config.mk
+
+$(DESTDIR)$(bindir)/%: build/bin/%
+	$(INSTALL) -D $< $@
+
+$(DESTDIR)$(libdir)/%.so: build/lib/%.so
+	$(INSTALL) -D $< $@
+
+$(DESTDIR)$(libdir)/%: build/lib/%
+	$(INSTALL) -m 644 -D $< $@
+
+$(DESTDIR)$(includedir)/%: build/include/%
+	$(INSTALL) -m 644 -D $< $@
+
+install: $(SUBS:%=install-%)
+
+check: $(SUBS:%=check-%)
+
+.PHONY: all clean distclean install check
@@ -0,0 +1,56 @@
+Arm Optimized Routines
+----------------------
+
+This repository contains implementations of library functions
+provided by Arm under MIT License (See LICENSE). Contributions
+to this project are accepted, but Contributors have to sign an
+Assignment Agreement, please follow the instructions in
+contributor-agreement.pdf. This is needed so upstreaming code
+to projects that require copyright assignment is possible.
+
+Regular quarterly releases are tagged as vYY.MM, the latest
+release is v20.05.
+
+Source code layout:
+
+build/          - build directory (created by make).
+math/           - math subproject sources.
+math/include/   - math library public headers.
+math/test/      - math test and benchmark related sources.
+math/tools/     - tools used for designing the algorithms.
+networking/     - networking subproject sources.
+networking/include/ - networking library public headers.
+networking/test/ - networking test and benchmark related sources.
+string/         - string routines subproject sources.
+string/include/ - string library public headers.
+string/test/    - string test and benchmark related sources.
+
+The steps to build the target libraries and run the tests:
+
+cp config.mk.dist config.mk
+# edit config.mk if necessary ...
+make
+make check
+
+Or building outside of the source directory:
+
+ln -s path/to/src/Makefile Makefile
+cp path/to/src/config.mk.dist config.mk
+echo 'srcdir = path/to/src' >> config.mk
+# further edits to config.mk
+make
+make check
+
+Or building and testing the math subproject only:
+
+make all-math
+make check-math
+
+The test system requires libmpfr and libmpc.
+For example on debian linux they can be installed as:
+
+sudo apt-get install libmpfr-dev libmpc-dev
+
+For cross build, CROSS_COMPILE should be set in config.mk and EMULATOR
+should be set for cross testing (e.g. using qemu-user or remote access
+to a target machine), see the examples in config.mk.dist.
@@ -0,0 +1,11 @@
+[
+    {
+        "Name"                  : "optimized-routines",
+        "License"               : "MIT License",
+        "License File"          : "LICENSE",
+        "Version Number"        : "v20.05",
+        "Owner"                 : "zhaotianyu9@huawei.com",
+        "Upstream URL"          : "https://www.mirbsd./mksh.ht://www.arm.com/;https://github.com/ARM-software/optimized-routines",
+        "Description"           : "Optimized implementations of various library functions for ARM architecture processors"
+    }
+]
@@ -1,36 +0,0 @@
-# third_party_optimized_routines
-
-#### Description
-{**When you're done, you can delete the content in this README and update the file with details for others getting started with your repository**}
-
-#### Software Architecture
-Software architecture description
-
-#### Installation
-
-1.  xxxx
-2.  xxxx
-3.  xxxx
-
-#### Instructions
-
-1.  xxxx
-2.  xxxx
-3.  xxxx
-
-#### Contribution
-
-1.  Fork the repository
-2.  Create Feat_xxx branch
-3.  Commit your code
-4.  Create Pull Request
-
-
-#### Gitee Feature
-
-1.  You can use Readme\_XXX.md to support different languages, such as Readme\_en.md, Readme\_zh.md
-2.  Gitee blog [blog.gitee.com](https://blog.gitee.com)
-3.  Explore open source project [https://gitee.com/explore](https://gitee.com/explore)
-4.  The most valuable open source project [GVP](https://gitee.com/gvp)
-5.  The manual of Gitee [https://gitee.com/help](https://gitee.com/help)
-6.  The most popular members  [https://gitee.com/gitee-stars/](https://gitee.com/gitee-stars/)
@@ -1,39 +0,0 @@
-# third_party_optimized_routines
-
-#### 介绍
-{**以下是 Gitee 平台说明，您可以替换此简介**
-Gitee 是 OSCHINA 推出的基于 Git 的代码托管平台（同时支持 SVN）。专为开发者提供稳定、高效、安全的云端软件开发协作平台
-无论是个人、团队、或是企业，都能够用 Gitee 实现代码托管、项目管理、协作开发。企业项目请看 [https://gitee.com/enterprises](https://gitee.com/enterprises)}
-
-#### 软件架构
-软件架构说明
-
-
-#### 安装教程
-
-1.  xxxx
-2.  xxxx
-3.  xxxx
-
-#### 使用说明
-
-1.  xxxx
-2.  xxxx
-3.  xxxx
-
-#### 参与贡献
-
-1.  Fork 本仓库
-2.  新建 Feat_xxx 分支
-3.  提交代码
-4.  新建 Pull Request
-
-
-#### 特技
-
-1.  使用 Readme\_XXX.md 来支持不同的语言，例如 Readme\_en.md, Readme\_zh.md
-2.  Gitee 官方博客 [blog.gitee.com](https://blog.gitee.com)
-3.  你可以 [https://gitee.com/explore](https://gitee.com/explore) 这个地址来了解 Gitee 上的优秀开源项目
-4.  [GVP](https://gitee.com/gvp) 全称是 Gitee 最有价值开源项目，是综合评定出的优秀开源项目
-5.  Gitee 官方提供的使用手册 [https://gitee.com/help](https://gitee.com/help)
-6.  Gitee 封面人物是一档用来展示 Gitee 会员风采的栏目 [https://gitee.com/gitee-stars/](https://gitee.com/gitee-stars/)
@@ -0,0 +1,73 @@
+# Example config.mk
+#
+# Copyright (c) 2018-2019, Arm Limited.
+# SPDX-License-Identifier: MIT
+
+# Subprojects to build
+SUBS = math string networking
+
+# Target architecture: aarch64, arm or x86_64
+ARCH = aarch64
+
+# Use for cross compilation with gcc.
+#CROSS_COMPILE = aarch64-none-linux-gnu-
+
+# Compiler for the target
+CC = $(CROSS_COMPILE)gcc
+CFLAGS = -std=c99 -pipe -O3
+CFLAGS += -Wall -Wno-missing-braces
+CFLAGS += -Werror=implicit-function-declaration
+
+# Used for test case generator that is executed on the host
+HOST_CC = gcc
+HOST_CFLAGS = -std=c99 -O2
+HOST_CFLAGS += -Wall -Wno-unused-function
+
+# Enable debug info.
+HOST_CFLAGS += -g
+CFLAGS += -g
+
+# Optimize the shared libraries on aarch64 assuming they fit in 1M.
+#CFLAGS_SHARED = -fPIC -mcmodel=tiny
+
+# Enable MTE support.
+#CFLAGS += -march=armv8.5-a+memtag -DWANT_MTE_TEST=1
+
+# Use with cross testing.
+#EMULATOR = qemu-aarch64-static
+#EMULATOR = sh -c 'scp $$1 user@host:/dir && ssh user@host /dir/"$$@"' --
+
+# Additional flags for subprojects.
+math-cflags =
+math-ldlibs =
+math-ulpflags =
+math-testflags =
+string-cflags =
+networking-cflags =
+
+# Use if mpfr is available on the target for ulp error checking.
+#math-ldlibs += -lmpfr -lgmp
+#math-cflags += -DUSE_MPFR
+
+# Use with gcc.
+math-cflags += -frounding-math -fexcess-precision=standard -fno-stack-protector
+math-cflags += -ffp-contract=fast -fno-math-errno
+
+# Use with clang.
+#math-cflags += -ffp-contract=fast
+
+# Disable vector math code
+#math-cflags += -DWANT_VMATH=0
+
+# Disable fenv checks
+#math-ulpflags = -q -f
+#math-testflags = -nostatus
+
+# Remove GNU Property Notes from asm files.
+#string-cflags += -DWANT_GNU_PROPERTY=0
+
+# Enable assertion checks.
+#networking-cflags += -DWANT_ASSERT
+
+# Avoid auto-vectorization of scalar code and unroll loops
+networking-cflags += -O2 -fno-tree-vectorize -funroll-loops
@@ -0,0 +1,110 @@
+# Makefile fragment - requires GNU make
+#
+# Copyright (c) 2019, Arm Limited.
+# SPDX-License-Identifier: MIT
+
+S := $(srcdir)/math
+B := build/math
+
+math-lib-srcs := $(wildcard $(S)/*.[cS])
+math-test-srcs := \
+	$(S)/test/mathtest.c \
+	$(S)/test/mathbench.c \
+	$(S)/test/ulp.c \
+
+math-test-host-srcs := $(wildcard $(S)/test/rtest/*.[cS])
+
+math-includes := $(patsubst $(S)/%,build/%,$(wildcard $(S)/include/*.h))
+
+math-libs := \
+	build/lib/libmathlib.so \
+	build/lib/libmathlib.a \
+
+math-tools := \
+	build/bin/mathtest \
+	build/bin/mathbench \
+	build/bin/mathbench_libc \
+	build/bin/runulp.sh \
+	build/bin/ulp \
+
+math-host-tools := \
+	build/bin/rtest \
+
+math-lib-objs := $(patsubst $(S)/%,$(B)/%.o,$(basename $(math-lib-srcs)))
+math-test-objs := $(patsubst $(S)/%,$(B)/%.o,$(basename $(math-test-srcs)))
+math-host-objs := $(patsubst $(S)/%,$(B)/%.o,$(basename $(math-test-host-srcs)))
+math-target-objs := $(math-lib-objs) $(math-test-objs)
+math-objs := $(math-target-objs) $(math-target-objs:%.o=%.os) $(math-host-objs)
+
+math-files := \
+	$(math-objs) \
+	$(math-libs) \
+	$(math-tools) \
+	$(math-host-tools) \
+	$(math-includes) \
+
+all-math: $(math-libs) $(math-tools) $(math-includes)
+
+$(math-objs): $(math-includes)
+$(math-objs): CFLAGS_ALL += $(math-cflags)
+$(B)/test/mathtest.o: CFLAGS_ALL += -fmath-errno
+$(math-host-objs): CC = $(HOST_CC)
+$(math-host-objs): CFLAGS_ALL = $(HOST_CFLAGS)
+
+$(B)/test/ulp.o: $(S)/test/ulp.h
+
+build/lib/libmathlib.so: $(math-lib-objs:%.o=%.os)
+	$(CC) $(CFLAGS_ALL) $(LDFLAGS) -shared -o $@ $^
+
+build/lib/libmathlib.a: $(math-lib-objs)
+	rm -f $@
+	$(AR) rc $@ $^
+	$(RANLIB) $@
+
+$(math-host-tools): HOST_LDLIBS += -lm -lmpfr -lmpc
+$(math-tools): LDLIBS += $(math-ldlibs) -lm
+
+build/bin/rtest: $(math-host-objs)
+	$(HOST_CC) $(HOST_CFLAGS) $(HOST_LDFLAGS) -o $@ $^ $(HOST_LDLIBS)
+
+build/bin/mathtest: $(B)/test/mathtest.o build/lib/libmathlib.a
+	$(CC) $(CFLAGS_ALL) $(LDFLAGS) -static -o $@ $^ $(LDLIBS)
+
+build/bin/mathbench: $(B)/test/mathbench.o build/lib/libmathlib.a
+	$(CC) $(CFLAGS_ALL) $(LDFLAGS) -static -o $@ $^ $(LDLIBS)
+
+# This is not ideal, but allows custom symbols in mathbench to get resolved.
+build/bin/mathbench_libc: $(B)/test/mathbench.o build/lib/libmathlib.a
+	$(CC) $(CFLAGS_ALL) $(LDFLAGS) -static -o $@ $< $(LDLIBS) -lc build/lib/libmathlib.a -lm
+
+build/bin/ulp: $(B)/test/ulp.o build/lib/libmathlib.a
+	$(CC) $(CFLAGS_ALL) $(LDFLAGS) -static -o $@ $^ $(LDLIBS)
+
+build/include/%.h: $(S)/include/%.h
+	cp $< $@
+
+build/bin/%.sh: $(S)/test/%.sh
+	cp $< $@
+
+math-tests := $(wildcard $(S)/test/testcases/directed/*.tst)
+math-rtests := $(wildcard $(S)/test/testcases/random/*.tst)
+
+check-math-test: $(math-tools)
+	cat $(math-tests) | $(EMULATOR) build/bin/mathtest $(math-testflags)
+
+check-math-rtest: $(math-host-tools) $(math-tools)
+	cat $(math-rtests) | build/bin/rtest | $(EMULATOR) build/bin/mathtest $(math-testflags)
+
+check-math-ulp: $(math-tools)
+	ULPFLAGS="$(math-ulpflags)" build/bin/runulp.sh $(EMULATOR)
+
+check-math: check-math-test check-math-rtest check-math-ulp
+
+install-math: \
+ $(math-libs:build/lib/%=$(DESTDIR)$(libdir)/%) \
+ $(math-includes:build/include/%=$(DESTDIR)$(includedir)/%)
+
+clean-math:
+	rm -f $(math-files)
+
+.PHONY: all-math check-math-test check-math-rtest check-math-ulp check-math install-math clean-math
@@ -0,0 +1,63 @@
+/*
+ * Single-precision cos function.
+ *
+ * Copyright (c) 2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include <stdint.h>
+#include <math.h>
+#include "math_config.h"
+#include "sincosf.h"
+
+/* Fast cosf implementation.  Worst-case ULP is 0.5607, maximum relative
+   error is 0.5303 * 2^-23.  A single-step range reduction is used for
+   small values.  Large inputs have their range reduced using fast integer
+   arithmetic.  */
+float
+cosf (float y)
+{
+  double x = y;
+  double s;
+  int n;
+  const sincos_t *p = &__sincosf_table[0];
+
+  if (abstop12 (y) < abstop12 (pio4))
+    {
+      double x2 = x * x;
+
+      if (unlikely (abstop12 (y) < abstop12 (0x1p-12f)))
+	return 1.0f;
+
+      return sinf_poly (x, x2, p, 1);
+    }
+  else if (likely (abstop12 (y) < abstop12 (120.0f)))
+    {
+      x = reduce_fast (x, p, &n);
+
+      /* Setup the signs for sin and cos.  */
+      s = p->sign[n & 3];
+
+      if (n & 2)
+	p = &__sincosf_table[1];
+
+      return sinf_poly (x * s, x * x, p, n ^ 1);
+    }
+  else if (abstop12 (y) < abstop12 (INFINITY))
+    {
+      uint32_t xi = asuint (y);
+      int sign = xi >> 31;
+
+      x = reduce_large (xi, &n);
+
+      /* Setup signs for sin and cos - include original sign.  */
+      s = p->sign[(n + sign) & 3];
+
+      if ((n + sign) & 2)
+	p = &__sincosf_table[1];
+
+      return sinf_poly (x * s, x * x, p, n ^ 1);
+    }
+  else
+    return __math_invalidf (y);
+}
@@ -0,0 +1,176 @@
+/*
+ * Double-precision e^x function.
+ *
+ * Copyright (c) 2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include <float.h>
+#include <math.h>
+#include <stdint.h>
+#include "math_config.h"
+
+#define N (1 << EXP_TABLE_BITS)
+#define InvLn2N __exp_data.invln2N
+#define NegLn2hiN __exp_data.negln2hiN
+#define NegLn2loN __exp_data.negln2loN
+#define Shift __exp_data.shift
+#define T __exp_data.tab
+#define C2 __exp_data.poly[5 - EXP_POLY_ORDER]
+#define C3 __exp_data.poly[6 - EXP_POLY_ORDER]
+#define C4 __exp_data.poly[7 - EXP_POLY_ORDER]
+#define C5 __exp_data.poly[8 - EXP_POLY_ORDER]
+#define C6 __exp_data.poly[9 - EXP_POLY_ORDER]
+
+/* Handle cases that may overflow or underflow when computing the result that
+   is scale*(1+TMP) without intermediate rounding.  The bit representation of
+   scale is in SBITS, however it has a computed exponent that may have
+   overflown into the sign bit so that needs to be adjusted before using it as
+   a double.  (int32_t)KI is the k used in the argument reduction and exponent
+   adjustment of scale, positive k here means the result may overflow and
+   negative k means the result may underflow.  */
+static inline double
+specialcase (double_t tmp, uint64_t sbits, uint64_t ki)
+{
+  double_t scale, y;
+
+  if ((ki & 0x80000000) == 0)
+    {
+      /* k > 0, the exponent of scale might have overflowed by <= 460.  */
+      sbits -= 1009ull << 52;
+      scale = asdouble (sbits);
+      y = 0x1p1009 * (scale + scale * tmp);
+      return check_oflow (eval_as_double (y));
+    }
+  /* k < 0, need special care in the subnormal range.  */
+  sbits += 1022ull << 52;
+  scale = asdouble (sbits);
+  y = scale + scale * tmp;
+  if (y < 1.0)
+    {
+      /* Round y to the right precision before scaling it into the subnormal
+	 range to avoid double rounding that can cause 0.5+E/2 ulp error where
+	 E is the worst-case ulp error outside the subnormal range.  So this
+	 is only useful if the goal is better than 1 ulp worst-case error.  */
+      double_t hi, lo;
+      lo = scale - y + scale * tmp;
+      hi = 1.0 + y;
+      lo = 1.0 - hi + y + lo;
+      y = eval_as_double (hi + lo) - 1.0;
+      /* Avoid -0.0 with downward rounding.  */
+      if (WANT_ROUNDING && y == 0.0)
+	y = 0.0;
+      /* The underflow exception needs to be signaled explicitly.  */
+      force_eval_double (opt_barrier_double (0x1p-1022) * 0x1p-1022);
+    }
+  y = 0x1p-1022 * y;
+  return check_uflow (eval_as_double (y));
+}
+
+/* Top 12 bits of a double (sign and exponent bits).  */
+static inline uint32_t
+top12 (double x)
+{
+  return asuint64 (x) >> 52;
+}
+
+/* Computes exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.
+   If hastail is 0 then xtail is assumed to be 0 too.  */
+static inline double
+exp_inline (double x, double xtail, int hastail)
+{
+  uint32_t abstop;
+  uint64_t ki, idx, top, sbits;
+  /* double_t for better performance on targets with FLT_EVAL_METHOD==2.  */
+  double_t kd, z, r, r2, scale, tail, tmp;
+
+  abstop = top12 (x) & 0x7ff;
+  if (unlikely (abstop - top12 (0x1p-54) >= top12 (512.0) - top12 (0x1p-54)))
+    {
+      if (abstop - top12 (0x1p-54) >= 0x80000000)
+	/* Avoid spurious underflow for tiny x.  */
+	/* Note: 0 is common input.  */
+	return WANT_ROUNDING ? 1.0 + x : 1.0;
+      if (abstop >= top12 (1024.0))
+	{
+	  if (asuint64 (x) == asuint64 (-INFINITY))
+	    return 0.0;
+	  if (abstop >= top12 (INFINITY))
+	    return 1.0 + x;
+	  if (asuint64 (x) >> 63)
+	    return __math_uflow (0);
+	  else
+	    return __math_oflow (0);
+	}
+      /* Large x is special cased below.  */
+      abstop = 0;
+    }
+
+  /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)].  */
+  /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N].  */
+  z = InvLn2N * x;
+#if TOINT_INTRINSICS
+  kd = roundtoint (z);
+  ki = converttoint (z);
+#elif EXP_USE_TOINT_NARROW
+  /* z - kd is in [-0.5-2^-16, 0.5] in all rounding modes.  */
+  kd = eval_as_double (z + Shift);
+  ki = asuint64 (kd) >> 16;
+  kd = (double_t) (int32_t) ki;
+#else
+  /* z - kd is in [-1, 1] in non-nearest rounding modes.  */
+  kd = eval_as_double (z + Shift);
+  ki = asuint64 (kd);
+  kd -= Shift;
+#endif
+  r = x + kd * NegLn2hiN + kd * NegLn2loN;
+  /* The code assumes 2^-200 < |xtail| < 2^-8/N.  */
+  if (hastail)
+    r += xtail;
+  /* 2^(k/N) ~= scale * (1 + tail).  */
+  idx = 2 * (ki % N);
+  top = ki << (52 - EXP_TABLE_BITS);
+  tail = asdouble (T[idx]);
+  /* This is only a valid scale when -1023*N < k < 1024*N.  */
+  sbits = T[idx + 1] + top;
+  /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1).  */
+  /* Evaluation is optimized assuming superscalar pipelined execution.  */
+  r2 = r * r;
+  /* Without fma the worst case error is 0.25/N ulp larger.  */
+  /* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp.  */
+#if EXP_POLY_ORDER == 4
+  tmp = tail + r + r2 * C2 + r * r2 * (C3 + r * C4);
+#elif EXP_POLY_ORDER == 5
+  tmp = tail + r + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
+#elif EXP_POLY_ORDER == 6
+  tmp = tail + r + r2 * (0.5 + r * C3) + r2 * r2 * (C4 + r * C5 + r2 * C6);
+#endif
+  if (unlikely (abstop == 0))
+    return specialcase (tmp, sbits, ki);
+  scale = asdouble (sbits);
+  /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
+     is no spurious underflow here even without fma.  */
+  return eval_as_double (scale + scale * tmp);
+}
+
+double
+exp (double x)
+{
+  return exp_inline (x, 0, 0);
+}
+
+/* May be useful for implementing pow where more than double
+   precision input is needed.  */
+double
+__exp_dd (double x, double xtail)
+{
+  return exp_inline (x, xtail, 1);
+}
+#if USE_GLIBC_ABI
+strong_alias (exp, __exp_finite)
+hidden_alias (exp, __ieee754_exp)
+hidden_alias (__exp_dd, __exp1)
+# if LDBL_MANT_DIG == 53
+long double expl (long double x) { return exp (x); }
+# endif
+#endif
@@ -0,0 +1,143 @@
+/*
+ * Double-precision 2^x function.
+ *
+ * Copyright (c) 2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include <float.h>
+#include <math.h>
+#include <stdint.h>
+#include "math_config.h"
+
+#define N (1 << EXP_TABLE_BITS)
+#define Shift __exp_data.exp2_shift
+#define T __exp_data.tab
+#define C1 __exp_data.exp2_poly[0]
+#define C2 __exp_data.exp2_poly[1]
+#define C3 __exp_data.exp2_poly[2]
+#define C4 __exp_data.exp2_poly[3]
+#define C5 __exp_data.exp2_poly[4]
+#define C6 __exp_data.exp2_poly[5]
+
+/* Handle cases that may overflow or underflow when computing the result that
+   is scale*(1+TMP) without intermediate rounding.  The bit representation of
+   scale is in SBITS, however it has a computed exponent that may have
+   overflown into the sign bit so that needs to be adjusted before using it as
+   a double.  (int32_t)KI is the k used in the argument reduction and exponent
+   adjustment of scale, positive k here means the result may overflow and
+   negative k means the result may underflow.  */
+static inline double
+specialcase (double_t tmp, uint64_t sbits, uint64_t ki)
+{
+  double_t scale, y;
+
+  if ((ki & 0x80000000) == 0)
+    {
+      /* k > 0, the exponent of scale might have overflowed by 1.  */
+      sbits -= 1ull << 52;
+      scale = asdouble (sbits);
+      y = 2 * (scale + scale * tmp);
+      return check_oflow (eval_as_double (y));
+    }
+  /* k < 0, need special care in the subnormal range.  */
+  sbits += 1022ull << 52;
+  scale = asdouble (sbits);
+  y = scale + scale * tmp;
+  if (y < 1.0)
+    {
+      /* Round y to the right precision before scaling it into the subnormal
+	 range to avoid double rounding that can cause 0.5+E/2 ulp error where
+	 E is the worst-case ulp error outside the subnormal range.  So this
+	 is only useful if the goal is better than 1 ulp worst-case error.  */
+      double_t hi, lo;
+      lo = scale - y + scale * tmp;
+      hi = 1.0 + y;
+      lo = 1.0 - hi + y + lo;
+      y = eval_as_double (hi + lo) - 1.0;
+      /* Avoid -0.0 with downward rounding.  */
+      if (WANT_ROUNDING && y == 0.0)
+	y = 0.0;
+      /* The underflow exception needs to be signaled explicitly.  */
+      force_eval_double (opt_barrier_double (0x1p-1022) * 0x1p-1022);
+    }
+  y = 0x1p-1022 * y;
+  return check_uflow (eval_as_double (y));
+}
+
+/* Top 12 bits of a double (sign and exponent bits).  */
+static inline uint32_t
+top12 (double x)
+{
+  return asuint64 (x) >> 52;
+}
+
+double
+exp2 (double x)
+{
+  uint32_t abstop;
+  uint64_t ki, idx, top, sbits;
+  /* double_t for better performance on targets with FLT_EVAL_METHOD==2.  */
+  double_t kd, r, r2, scale, tail, tmp;
+
+  abstop = top12 (x) & 0x7ff;
+  if (unlikely (abstop - top12 (0x1p-54) >= top12 (512.0) - top12 (0x1p-54)))
+    {
+      if (abstop - top12 (0x1p-54) >= 0x80000000)
+	/* Avoid spurious underflow for tiny x.  */
+	/* Note: 0 is common input.  */
+	return WANT_ROUNDING ? 1.0 + x : 1.0;
+      if (abstop >= top12 (1024.0))
+	{
+	  if (asuint64 (x) == asuint64 (-INFINITY))
+	    return 0.0;
+	  if (abstop >= top12 (INFINITY))
+	    return 1.0 + x;
+	  if (!(asuint64 (x) >> 63))
+	    return __math_oflow (0);
+	  else if (asuint64 (x) >= asuint64 (-1075.0))
+	    return __math_uflow (0);
+	}
+      if (2 * asuint64 (x) > 2 * asuint64 (928.0))
+	/* Large x is special cased below.  */
+	abstop = 0;
+    }
+
+  /* exp2(x) = 2^(k/N) * 2^r, with 2^r in [2^(-1/2N),2^(1/2N)].  */
+  /* x = k/N + r, with int k and r in [-1/2N, 1/2N].  */
+  kd = eval_as_double (x + Shift);
+  ki = asuint64 (kd); /* k.  */
+  kd -= Shift; /* k/N for int k.  */
+  r = x - kd;
+  /* 2^(k/N) ~= scale * (1 + tail).  */
+  idx = 2 * (ki % N);
+  top = ki << (52 - EXP_TABLE_BITS);
+  tail = asdouble (T[idx]);
+  /* This is only a valid scale when -1023*N < k < 1024*N.  */
+  sbits = T[idx + 1] + top;
+  /* exp2(x) = 2^(k/N) * 2^r ~= scale + scale * (tail + 2^r - 1).  */
+  /* Evaluation is optimized assuming superscalar pipelined execution.  */
+  r2 = r * r;
+  /* Without fma the worst case error is 0.5/N ulp larger.  */
+  /* Worst case error is less than 0.5+0.86/N+(abs poly error * 2^53) ulp.  */
+#if EXP2_POLY_ORDER == 4
+  tmp = tail + r * C1 + r2 * C2 + r * r2 * (C3 + r * C4);
+#elif EXP2_POLY_ORDER == 5
+  tmp = tail + r * C1 + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
+#elif EXP2_POLY_ORDER == 6
+  tmp = tail + r * C1 + r2 * (0.5 + r * C3) + r2 * r2 * (C4 + r * C5 + r2 * C6);
+#endif
+  if (unlikely (abstop == 0))
+    return specialcase (tmp, sbits, ki);
+  scale = asdouble (sbits);
+  /* Note: tmp == 0 or |tmp| > 2^-65 and scale > 2^-928, so there
+     is no spurious underflow here even without fma.  */
+  return eval_as_double (scale + scale * tmp);
+}
+#if USE_GLIBC_ABI
+strong_alias (exp2, __exp2_finite)
+hidden_alias (exp2, __ieee754_exp2)
+# if LDBL_MANT_DIG == 53
+long double exp2l (long double x) { return exp2 (x); }
+# endif
+#endif
@@ -0,0 +1,80 @@
+/*
+ * Single-precision 2^x function.
+ *
+ * Copyright (c) 2017-2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include <math.h>
+#include <stdint.h>
+#include "math_config.h"
+
+/*
+EXP2F_TABLE_BITS = 5
+EXP2F_POLY_ORDER = 3
+
+ULP error: 0.502 (nearest rounding.)
+Relative error: 1.69 * 2^-34 in [-1/64, 1/64] (before rounding.)
+Wrong count: 168353 (all nearest rounding wrong results with fma.)
+Non-nearest ULP error: 1 (rounded ULP error)
+*/
+
+#define N (1 << EXP2F_TABLE_BITS)
+#define T __exp2f_data.tab
+#define C __exp2f_data.poly
+#define SHIFT __exp2f_data.shift_scaled
+
+static inline uint32_t
+top12 (float x)
+{
+  return asuint (x) >> 20;
+}
+
+float
+exp2f (float x)
+{
+  uint32_t abstop;
+  uint64_t ki, t;
+  /* double_t for better performance on targets with FLT_EVAL_METHOD==2.  */
+  double_t kd, xd, z, r, r2, y, s;
+
+  xd = (double_t) x;
+  abstop = top12 (x) & 0x7ff;
+  if (unlikely (abstop >= top12 (128.0f)))
+    {
+      /* |x| >= 128 or x is nan.  */
+      if (asuint (x) == asuint (-INFINITY))
+	return 0.0f;
+      if (abstop >= top12 (INFINITY))
+	return x + x;
+      if (x > 0.0f)
+	return __math_oflowf (0);
+      if (x <= -150.0f)
+	return __math_uflowf (0);
+#if WANT_ERRNO_UFLOW
+      if (x < -149.0f)
+	return __math_may_uflowf (0);
+#endif
+    }
+
+  /* x = k/N + r with r in [-1/(2N), 1/(2N)] and int k.  */
+  kd = eval_as_double (xd + SHIFT);
+  ki = asuint64 (kd);
+  kd -= SHIFT; /* k/N for int k.  */
+  r = xd - kd;
+
+  /* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
+  t = T[ki % N];
+  t += ki << (52 - EXP2F_TABLE_BITS);
+  s = asdouble (t);
+  z = C[0] * r + C[1];
+  r2 = r * r;
+  y = C[2] * r + 1;
+  y = z * r2 + y;
+  y = y * s;
+  return eval_as_float (y);
+}
+#if USE_GLIBC_ABI
+strong_alias (exp2f, __exp2f_finite)
+hidden_alias (exp2f, __ieee754_exp2f)
+#endif
@@ -0,0 +1,78 @@
+/*
+ * Shared data between expf, exp2f and powf.
+ *
+ * Copyright (c) 2017-2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "math_config.h"
+
+#define N (1 << EXP2F_TABLE_BITS)
+
+const struct exp2f_data __exp2f_data = {
+  /* tab[i] = uint(2^(i/N)) - (i << 52-BITS)
+     used for computing 2^(k/N) for an int |k| < 150 N as
+     double(tab[k%N] + (k << 52-BITS)) */
+  .tab = {
+#if N == 8
+0x3ff0000000000000, 0x3fef72b83c7d517b, 0x3fef06fe0a31b715, 0x3feebfdad5362a27,
+0x3feea09e667f3bcd, 0x3feeace5422aa0db, 0x3feee89f995ad3ad, 0x3fef5818dcfba487,
+#elif N == 16
+0x3ff0000000000000, 0x3fefb5586cf9890f, 0x3fef72b83c7d517b, 0x3fef387a6e756238,
+0x3fef06fe0a31b715, 0x3feedea64c123422, 0x3feebfdad5362a27, 0x3feeab07dd485429,
+0x3feea09e667f3bcd, 0x3feea11473eb0187, 0x3feeace5422aa0db, 0x3feec49182a3f090,
+0x3feee89f995ad3ad, 0x3fef199bdd85529c, 0x3fef5818dcfba487, 0x3fefa4afa2a490da,
+#elif N == 32
+0x3ff0000000000000, 0x3fefd9b0d3158574, 0x3fefb5586cf9890f, 0x3fef9301d0125b51,
+0x3fef72b83c7d517b, 0x3fef54873168b9aa, 0x3fef387a6e756238, 0x3fef1e9df51fdee1,
+0x3fef06fe0a31b715, 0x3feef1a7373aa9cb, 0x3feedea64c123422, 0x3feece086061892d,
+0x3feebfdad5362a27, 0x3feeb42b569d4f82, 0x3feeab07dd485429, 0x3feea47eb03a5585,
+0x3feea09e667f3bcd, 0x3fee9f75e8ec5f74, 0x3feea11473eb0187, 0x3feea589994cce13,
+0x3feeace5422aa0db, 0x3feeb737b0cdc5e5, 0x3feec49182a3f090, 0x3feed503b23e255d,
+0x3feee89f995ad3ad, 0x3feeff76f2fb5e47, 0x3fef199bdd85529c, 0x3fef3720dcef9069,
+0x3fef5818dcfba487, 0x3fef7c97337b9b5f, 0x3fefa4afa2a490da, 0x3fefd0765b6e4540,
+#elif N == 64
+0x3ff0000000000000, 0x3fefec9a3e778061, 0x3fefd9b0d3158574, 0x3fefc74518759bc8,
+0x3fefb5586cf9890f, 0x3fefa3ec32d3d1a2, 0x3fef9301d0125b51, 0x3fef829aaea92de0,
+0x3fef72b83c7d517b, 0x3fef635beb6fcb75, 0x3fef54873168b9aa, 0x3fef463b88628cd6,
+0x3fef387a6e756238, 0x3fef2b4565e27cdd, 0x3fef1e9df51fdee1, 0x3fef1285a6e4030b,
+0x3fef06fe0a31b715, 0x3feefc08b26416ff, 0x3feef1a7373aa9cb, 0x3feee7db34e59ff7,
+0x3feedea64c123422, 0x3feed60a21f72e2a, 0x3feece086061892d, 0x3feec6a2b5c13cd0,
+0x3feebfdad5362a27, 0x3feeb9b2769d2ca7, 0x3feeb42b569d4f82, 0x3feeaf4736b527da,
+0x3feeab07dd485429, 0x3feea76f15ad2148, 0x3feea47eb03a5585, 0x3feea23882552225,
+0x3feea09e667f3bcd, 0x3fee9fb23c651a2f, 0x3fee9f75e8ec5f74, 0x3fee9feb564267c9,
+0x3feea11473eb0187, 0x3feea2f336cf4e62, 0x3feea589994cce13, 0x3feea8d99b4492ed,
+0x3feeace5422aa0db, 0x3feeb1ae99157736, 0x3feeb737b0cdc5e5, 0x3feebd829fde4e50,
+0x3feec49182a3f090, 0x3feecc667b5de565, 0x3feed503b23e255d, 0x3feede6b5579fdbf,
+0x3feee89f995ad3ad, 0x3feef3a2b84f15fb, 0x3feeff76f2fb5e47, 0x3fef0c1e904bc1d2,
+0x3fef199bdd85529c, 0x3fef27f12e57d14b, 0x3fef3720dcef9069, 0x3fef472d4a07897c,
+0x3fef5818dcfba487, 0x3fef69e603db3285, 0x3fef7c97337b9b5f, 0x3fef902ee78b3ff6,
+0x3fefa4afa2a490da, 0x3fefba1bee615a27, 0x3fefd0765b6e4540, 0x3fefe7c1819e90d8,
+#endif
+  },
+  .shift_scaled = 0x1.8p+52 / N,
+  .poly = {
+#if N == 8
+  0x1.c6a00335106e2p-5, 0x1.ec0c313449f55p-3, 0x1.62e431111f69fp-1,
+#elif N == 16
+  0x1.c6ac6aa313963p-5, 0x1.ebfff4532d9bap-3, 0x1.62e43001bc49fp-1,
+#elif N == 32
+  0x1.c6af84b912394p-5, 0x1.ebfce50fac4f3p-3, 0x1.62e42ff0c52d6p-1,
+#elif N == 64
+  0x1.c6b04b4221b2ap-5, 0x1.ebfc213e184d7p-3, 0x1.62e42fefb5b7fp-1,
+#endif
+  },
+  .shift = 0x1.8p+52,
+  .invln2_scaled = 0x1.71547652b82fep+0 * N,
+  .poly_scaled = {
+#if N == 8
+  0x1.c6a00335106e2p-5/N/N/N, 0x1.ec0c313449f55p-3/N/N, 0x1.62e431111f69fp-1/N,
+#elif N == 16
+  0x1.c6ac6aa313963p-5/N/N/N, 0x1.ebfff4532d9bap-3/N/N, 0x1.62e43001bc49fp-1/N,
+#elif N == 32
+  0x1.c6af84b912394p-5/N/N/N, 0x1.ebfce50fac4f3p-3/N/N, 0x1.62e42ff0c52d6p-1/N,
+#elif N == 64
+  0x1.c6b04b4221b2ap-5/N/N/N, 0x1.ebfc213e184d7p-3/N/N, 0x1.62e42fefb5b7fp-1/N,
+#endif
+  },
+};
@@ -0,0 +1,91 @@
+/*
+ * Single-precision e^x function.
+ *
+ * Copyright (c) 2017-2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include <math.h>
+#include <stdint.h>
+#include "math_config.h"
+
+/*
+EXP2F_TABLE_BITS = 5
+EXP2F_POLY_ORDER = 3
+
+ULP error: 0.502 (nearest rounding.)
+Relative error: 1.69 * 2^-34 in [-ln2/64, ln2/64] (before rounding.)
+Wrong count: 170635 (all nearest rounding wrong results with fma.)
+Non-nearest ULP error: 1 (rounded ULP error)
+*/
+
+#define N (1 << EXP2F_TABLE_BITS)
+#define InvLn2N __exp2f_data.invln2_scaled
+#define T __exp2f_data.tab
+#define C __exp2f_data.poly_scaled
+
+static inline uint32_t
+top12 (float x)
+{
+  return asuint (x) >> 20;
+}
+
+float
+expf (float x)
+{
+  uint32_t abstop;
+  uint64_t ki, t;
+  /* double_t for better performance on targets with FLT_EVAL_METHOD==2.  */
+  double_t kd, xd, z, r, r2, y, s;
+
+  xd = (double_t) x;
+  abstop = top12 (x) & 0x7ff;
+  if (unlikely (abstop >= top12 (88.0f)))
+    {
+      /* |x| >= 88 or x is nan.  */
+      if (asuint (x) == asuint (-INFINITY))
+	return 0.0f;
+      if (abstop >= top12 (INFINITY))
+	return x + x;
+      if (x > 0x1.62e42ep6f) /* x > log(0x1p128) ~= 88.72 */
+	return __math_oflowf (0);
+      if (x < -0x1.9fe368p6f) /* x < log(0x1p-150) ~= -103.97 */
+	return __math_uflowf (0);
+#if WANT_ERRNO_UFLOW
+      if (x < -0x1.9d1d9ep6f) /* x < log(0x1p-149) ~= -103.28 */
+	return __math_may_uflowf (0);
+#endif
+    }
+
+  /* x*N/Ln2 = k + r with r in [-1/2, 1/2] and int k.  */
+  z = InvLn2N * xd;
+
+  /* Round and convert z to int, the result is in [-150*N, 128*N] and
+     ideally nearest int is used, otherwise the magnitude of r can be
+     bigger which gives larger approximation error.  */
+#if TOINT_INTRINSICS
+  kd = roundtoint (z);
+  ki = converttoint (z);
+#else
+# define SHIFT __exp2f_data.shift
+  kd = eval_as_double (z + SHIFT);
+  ki = asuint64 (kd);
+  kd -= SHIFT;
+#endif
+  r = z - kd;
+
+  /* exp(x) = 2^(k/N) * 2^(r/N) ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
+  t = T[ki % N];
+  t += ki << (52 - EXP2F_TABLE_BITS);
+  s = asdouble (t);
+  z = C[0] * r + C[1];
+  r2 = r * r;
+  y = C[2] * r + 1;
+  y = z * r2 + y;
+  y = y * s;
+  return eval_as_float (y);
+}
+#if USE_GLIBC_ABI
+strong_alias (expf, __expf_finite)
+hidden_alias (expf, __ieee754_expf)
+#endif
@@ -0,0 +1,100 @@
+/*
+ * Public API.
+ *
+ * Copyright (c) 2015-2019, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#ifndef _MATHLIB_H
+#define _MATHLIB_H
+
+float expf (float);
+float exp2f (float);
+float logf (float);
+float log2f (float);
+float powf (float, float);
+float sinf (float);
+float cosf (float);
+void sincosf (float, float*, float*);
+
+double exp (double);
+double exp2 (double);
+double log (double);
+double log2 (double);
+double pow (double, double);
+
+/* Scalar functions using the vector algorithm with identical result.  */
+float __s_sinf (float);
+float __s_cosf (float);
+float __s_expf (float);
+float __s_expf_1u (float);
+float __s_exp2f (float);
+float __s_exp2f_1u (float);
+float __s_logf (float);
+float __s_powf (float, float);
+double __s_sin (double);
+double __s_cos (double);
+double __s_exp (double);
+double __s_log (double);
+double __s_pow (double, double);
+
+#if __aarch64__
+#if __GNUC__ >= 5
+typedef __Float32x4_t __f32x4_t;
+typedef __Float64x2_t __f64x2_t;
+#elif __clang_major__*100+__clang_minor__ >= 305
+typedef __attribute__((__neon_vector_type__(4))) float __f32x4_t;
+typedef __attribute__((__neon_vector_type__(2))) double __f64x2_t;
+#else
+#error Unsupported compiler
+#endif
+
+/* Vector functions following the base PCS.  */
+__f32x4_t __v_sinf (__f32x4_t);
+__f32x4_t __v_cosf (__f32x4_t);
+__f32x4_t __v_expf (__f32x4_t);
+__f32x4_t __v_expf_1u (__f32x4_t);
+__f32x4_t __v_exp2f (__f32x4_t);
+__f32x4_t __v_exp2f_1u (__f32x4_t);
+__f32x4_t __v_logf (__f32x4_t);
+__f32x4_t __v_powf (__f32x4_t, __f32x4_t);
+__f64x2_t __v_sin (__f64x2_t);
+__f64x2_t __v_cos (__f64x2_t);
+__f64x2_t __v_exp (__f64x2_t);
+__f64x2_t __v_log (__f64x2_t);
+__f64x2_t __v_pow (__f64x2_t, __f64x2_t);
+
+#if __GNUC__ >= 9 || __clang_major__ >= 8
+#define __vpcs __attribute__((__aarch64_vector_pcs__))
+
+/* Vector functions following the vector PCS.  */
+__vpcs __f32x4_t __vn_sinf (__f32x4_t);
+__vpcs __f32x4_t __vn_cosf (__f32x4_t);
+__vpcs __f32x4_t __vn_expf (__f32x4_t);
+__vpcs __f32x4_t __vn_expf_1u (__f32x4_t);
+__vpcs __f32x4_t __vn_exp2f (__f32x4_t);
+__vpcs __f32x4_t __vn_exp2f_1u (__f32x4_t);
+__vpcs __f32x4_t __vn_logf (__f32x4_t);
+__vpcs __f32x4_t __vn_powf (__f32x4_t, __f32x4_t);
+__vpcs __f64x2_t __vn_sin (__f64x2_t);
+__vpcs __f64x2_t __vn_cos (__f64x2_t);
+__vpcs __f64x2_t __vn_exp (__f64x2_t);
+__vpcs __f64x2_t __vn_log (__f64x2_t);
+__vpcs __f64x2_t __vn_pow (__f64x2_t, __f64x2_t);
+
+/* Vector functions following the vector PCS using ABI names.  */
+__vpcs __f32x4_t _ZGVnN4v_sinf (__f32x4_t);
+__vpcs __f32x4_t _ZGVnN4v_cosf (__f32x4_t);
+__vpcs __f32x4_t _ZGVnN4v_expf (__f32x4_t);
+__vpcs __f32x4_t _ZGVnN4v_exp2f (__f32x4_t);
+__vpcs __f32x4_t _ZGVnN4v_logf (__f32x4_t);
+__vpcs __f32x4_t _ZGVnN4vv_powf (__f32x4_t, __f32x4_t);
+__vpcs __f64x2_t _ZGVnN2v_sin (__f64x2_t);
+__vpcs __f64x2_t _ZGVnN2v_cos (__f64x2_t);
+__vpcs __f64x2_t _ZGVnN2v_exp (__f64x2_t);
+__vpcs __f64x2_t _ZGVnN2v_log (__f64x2_t);
+__vpcs __f64x2_t _ZGVnN2vv_pow (__f64x2_t, __f64x2_t);
+#endif
+#endif
+
+#endif
@@ -0,0 +1,162 @@
+/*
+ * Double-precision log(x) function.
+ *
+ * Copyright (c) 2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include <float.h>
+#include <math.h>
+#include <stdint.h>
+#include "math_config.h"
+
+#define T __log_data.tab
+#define T2 __log_data.tab2
+#define B __log_data.poly1
+#define A __log_data.poly
+#define Ln2hi __log_data.ln2hi
+#define Ln2lo __log_data.ln2lo
+#define N (1 << LOG_TABLE_BITS)
+#define OFF 0x3fe6000000000000
+
+/* Top 16 bits of a double.  */
+static inline uint32_t
+top16 (double x)
+{
+  return asuint64 (x) >> 48;
+}
+
+double
+log (double x)
+{
+  /* double_t for better performance on targets with FLT_EVAL_METHOD==2.  */
+  double_t w, z, r, r2, r3, y, invc, logc, kd, hi, lo;
+  uint64_t ix, iz, tmp;
+  uint32_t top;
+  int k, i;
+
+  ix = asuint64 (x);
+  top = top16 (x);
+
+#if LOG_POLY1_ORDER == 10 || LOG_POLY1_ORDER == 11
+# define LO asuint64 (1.0 - 0x1p-5)
+# define HI asuint64 (1.0 + 0x1.1p-5)
+#elif LOG_POLY1_ORDER == 12
+# define LO asuint64 (1.0 - 0x1p-4)
+# define HI asuint64 (1.0 + 0x1.09p-4)
+#endif
+  if (unlikely (ix - LO < HI - LO))
+    {
+      /* Handle close to 1.0 inputs separately.  */
+      /* Fix sign of zero with downward rounding when x==1.  */
+      if (WANT_ROUNDING && unlikely (ix == asuint64 (1.0)))
+	return 0;
+      r = x - 1.0;
+      r2 = r * r;
+      r3 = r * r2;
+#if LOG_POLY1_ORDER == 10
+      /* Worst-case error is around 0.516 ULP.  */
+      y = r3 * (B[1] + r * B[2] + r2 * B[3]
+		+ r3 * (B[4] + r * B[5] + r2 * B[6] + r3 * (B[7] + r * B[8])));
+      w = B[0] * r2; /* B[0] == -0.5.  */
+      hi = r + w;
+      y += r - hi + w;
+      y += hi;
+#elif LOG_POLY1_ORDER == 11
+      /* Worst-case error is around 0.516 ULP.  */
+      y = r3 * (B[1] + r * B[2]
+		+ r2 * (B[3] + r * B[4] + r2 * B[5]
+			+ r3 * (B[6] + r * B[7] + r2 * B[8] + r3 * B[9])));
+      w = B[0] * r2; /* B[0] == -0.5.  */
+      hi = r + w;
+      y += r - hi + w;
+      y += hi;
+#elif LOG_POLY1_ORDER == 12
+      y = r3 * (B[1] + r * B[2] + r2 * B[3]
+		+ r3 * (B[4] + r * B[5] + r2 * B[6]
+			+ r3 * (B[7] + r * B[8] + r2 * B[9] + r3 * B[10])));
+# if N <= 64
+      /* Worst-case error is around 0.532 ULP.  */
+      w = B[0] * r2; /* B[0] == -0.5.  */
+      hi = r + w;
+      y += r - hi + w;
+      y += hi;
+# else
+      /* Worst-case error is around 0.507 ULP.  */
+      w = r * 0x1p27;
+      double_t rhi = r + w - w;
+      double_t rlo = r - rhi;
+      w = rhi * rhi * B[0]; /* B[0] == -0.5.  */
+      hi = r + w;
+      lo = r - hi + w;
+      lo += B[0] * rlo * (rhi + r);
+      y += lo;
+      y += hi;
+# endif
+#endif
+      return eval_as_double (y);
+    }
+  if (unlikely (top - 0x0010 >= 0x7ff0 - 0x0010))
+    {
+      /* x < 0x1p-1022 or inf or nan.  */
+      if (ix * 2 == 0)
+	return __math_divzero (1);
+      if (ix == asuint64 (INFINITY)) /* log(inf) == inf.  */
+	return x;
+      if ((top & 0x8000) || (top & 0x7ff0) == 0x7ff0)
+	return __math_invalid (x);
+      /* x is subnormal, normalize it.  */
+      ix = asuint64 (x * 0x1p52);
+      ix -= 52ULL << 52;
+    }
+
+  /* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
+     The range is split into N subintervals.
+     The ith subinterval contains z and c is near its center.  */
+  tmp = ix - OFF;
+  i = (tmp >> (52 - LOG_TABLE_BITS)) % N;
+  k = (int64_t) tmp >> 52; /* arithmetic shift */
+  iz = ix - (tmp & 0xfffULL << 52);
+  invc = T[i].invc;
+  logc = T[i].logc;
+  z = asdouble (iz);
+
+  /* log(x) = log1p(z/c-1) + log(c) + k*Ln2.  */
+  /* r ~= z/c - 1, |r| < 1/(2*N).  */
+#if HAVE_FAST_FMA
+  /* rounding error: 0x1p-55/N.  */
+  r = fma (z, invc, -1.0);
+#else
+  /* rounding error: 0x1p-55/N + 0x1p-66.  */
+  r = (z - T2[i].chi - T2[i].clo) * invc;
+#endif
+  kd = (double_t) k;
+
+  /* hi + lo = r + log(c) + k*Ln2.  */
+  w = kd * Ln2hi + logc;
+  hi = w + r;
+  lo = w - hi + r + kd * Ln2lo;
+
+  /* log(x) = lo + (log1p(r) - r) + hi.  */
+  r2 = r * r; /* rounding error: 0x1p-54/N^2.  */
+  /* Worst case error if |y| > 0x1p-5:
+     0.5 + 4.13/N + abs-poly-error*2^57 ULP (+ 0.002 ULP without fma)
+     Worst case error if |y| > 0x1p-4:
+     0.5 + 2.06/N + abs-poly-error*2^56 ULP (+ 0.001 ULP without fma).  */
+#if LOG_POLY_ORDER == 6
+  y = lo + r2 * A[0] + r * r2 * (A[1] + r * A[2] + r2 * (A[3] + r * A[4])) + hi;
+#elif LOG_POLY_ORDER == 7
+  y = lo
+      + r2 * (A[0] + r * A[1] + r2 * (A[2] + r * A[3])
+	      + r2 * r2 * (A[4] + r * A[5]))
+      + hi;
+#endif
+  return eval_as_double (y);
+}
+#if USE_GLIBC_ABI
+strong_alias (log, __log_finite)
+hidden_alias (log, __ieee754_log)
+# if LDBL_MANT_DIG == 53
+long double logl (long double x) { return log (x); }
+# endif
+#endif
@@ -0,0 +1,141 @@
+/*
+ * Double-precision log2(x) function.
+ *
+ * Copyright (c) 2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include <float.h>
+#include <math.h>
+#include <stdint.h>
+#include "math_config.h"
+
+#define T __log2_data.tab
+#define T2 __log2_data.tab2
+#define B __log2_data.poly1
+#define A __log2_data.poly
+#define InvLn2hi __log2_data.invln2hi
+#define InvLn2lo __log2_data.invln2lo
+#define N (1 << LOG2_TABLE_BITS)
+#define OFF 0x3fe6000000000000
+
+/* Top 16 bits of a double.  */
+static inline uint32_t
+top16 (double x)
+{
+  return asuint64 (x) >> 48;
+}
+
+double
+log2 (double x)
+{
+  /* double_t for better performance on targets with FLT_EVAL_METHOD==2.  */
+  double_t z, r, r2, r4, y, invc, logc, kd, hi, lo, t1, t2, t3, p;
+  uint64_t ix, iz, tmp;
+  uint32_t top;
+  int k, i;
+
+  ix = asuint64 (x);
+  top = top16 (x);
+
+#if LOG2_POLY1_ORDER == 11
+# define LO asuint64 (1.0 - 0x1.5b51p-5)
+# define HI asuint64 (1.0 + 0x1.6ab2p-5)
+#endif
+  if (unlikely (ix - LO < HI - LO))
+    {
+      /* Handle close to 1.0 inputs separately.  */
+      /* Fix sign of zero with downward rounding when x==1.  */
+      if (WANT_ROUNDING && unlikely (ix == asuint64 (1.0)))
+	return 0;
+      r = x - 1.0;
+#if HAVE_FAST_FMA
+      hi = r * InvLn2hi;
+      lo = r * InvLn2lo + fma (r, InvLn2hi, -hi);
+#else
+      double_t rhi, rlo;
+      rhi = asdouble (asuint64 (r) & -1ULL << 32);
+      rlo = r - rhi;
+      hi = rhi * InvLn2hi;
+      lo = rlo * InvLn2hi + r * InvLn2lo;
+#endif
+      r2 = r * r; /* rounding error: 0x1p-62.  */
+      r4 = r2 * r2;
+#if LOG2_POLY1_ORDER == 11
+      /* Worst-case error is less than 0.54 ULP (0.55 ULP without fma).  */
+      p = r2 * (B[0] + r * B[1]);
+      y = hi + p;
+      lo += hi - y + p;
+      lo += r4 * (B[2] + r * B[3] + r2 * (B[4] + r * B[5])
+		  + r4 * (B[6] + r * B[7] + r2 * (B[8] + r * B[9])));
+      y += lo;
+#endif
+      return eval_as_double (y);
+    }
+  if (unlikely (top - 0x0010 >= 0x7ff0 - 0x0010))
+    {
+      /* x < 0x1p-1022 or inf or nan.  */
+      if (ix * 2 == 0)
+	return __math_divzero (1);
+      if (ix == asuint64 (INFINITY)) /* log(inf) == inf.  */
+	return x;
+      if ((top & 0x8000) || (top & 0x7ff0) == 0x7ff0)
+	return __math_invalid (x);
+      /* x is subnormal, normalize it.  */
+      ix = asuint64 (x * 0x1p52);
+      ix -= 52ULL << 52;
+    }
+
+  /* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
+     The range is split into N subintervals.
+     The ith subinterval contains z and c is near its center.  */
+  tmp = ix - OFF;
+  i = (tmp >> (52 - LOG2_TABLE_BITS)) % N;
+  k = (int64_t) tmp >> 52; /* arithmetic shift */
+  iz = ix - (tmp & 0xfffULL << 52);
+  invc = T[i].invc;
+  logc = T[i].logc;
+  z = asdouble (iz);
+  kd = (double_t) k;
+
+  /* log2(x) = log2(z/c) + log2(c) + k.  */
+  /* r ~= z/c - 1, |r| < 1/(2*N).  */
+#if HAVE_FAST_FMA
+  /* rounding error: 0x1p-55/N.  */
+  r = fma (z, invc, -1.0);
+  t1 = r * InvLn2hi;
+  t2 = r * InvLn2lo + fma (r, InvLn2hi, -t1);
+#else
+  double_t rhi, rlo;
+  /* rounding error: 0x1p-55/N + 0x1p-65.  */
+  r = (z - T2[i].chi - T2[i].clo) * invc;
+  rhi = asdouble (asuint64 (r) & -1ULL << 32);
+  rlo = r - rhi;
+  t1 = rhi * InvLn2hi;
+  t2 = rlo * InvLn2hi + r * InvLn2lo;
+#endif
+
+  /* hi + lo = r/ln2 + log2(c) + k.  */
+  t3 = kd + logc;
+  hi = t3 + t1;
+  lo = t3 - hi + t1 + t2;
+
+  /* log2(r+1) = r/ln2 + r^2*poly(r).  */
+  /* Evaluation is optimized assuming superscalar pipelined execution.  */
+  r2 = r * r; /* rounding error: 0x1p-54/N^2.  */
+  r4 = r2 * r2;
+#if LOG2_POLY_ORDER == 7
+  /* Worst-case error if |y| > 0x1p-4: 0.547 ULP (0.550 ULP without fma).
+     ~ 0.5 + 2/N/ln2 + abs-poly-error*0x1p56 ULP (+ 0.003 ULP without fma).  */
+  p = A[0] + r * A[1] + r2 * (A[2] + r * A[3]) + r4 * (A[4] + r * A[5]);
+  y = lo + r2 * p + hi;
+#endif
+  return eval_as_double (y);
+}
+#if USE_GLIBC_ABI
+strong_alias (log2, __log2_finite)
+hidden_alias (log2, __ieee754_log2)
+# if LDBL_MANT_DIG == 53
+long double log2l (long double x) { return log2 (x); }
+# endif
+#endif
@@ -0,0 +1,209 @@
+/*
+ * Data for log2.
+ *
+ * Copyright (c) 2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "math_config.h"
+
+#define N (1 << LOG2_TABLE_BITS)
+
+const struct log2_data __log2_data = {
+// First coefficient: 0x1.71547652b82fe1777d0ffda0d24p0
+.invln2hi = 0x1.7154765200000p+0,
+.invln2lo = 0x1.705fc2eefa200p-33,
+.poly1 = {
+#if LOG2_POLY1_ORDER == 11
+// relative error: 0x1.2fad8188p-63
+// in -0x1.5b51p-5 0x1.6ab2p-5
+-0x1.71547652b82fep-1,
+0x1.ec709dc3a03f7p-2,
+-0x1.71547652b7c3fp-2,
+0x1.2776c50f05be4p-2,
+-0x1.ec709dd768fe5p-3,
+0x1.a61761ec4e736p-3,
+-0x1.7153fbc64a79bp-3,
+0x1.484d154f01b4ap-3,
+-0x1.289e4a72c383cp-3,
+0x1.0b32f285aee66p-3,
+#endif
+},
+.poly = {
+#if N == 64 && LOG2_POLY_ORDER == 7
+// relative error: 0x1.a72c2bf8p-58
+// abs error: 0x1.67a552c8p-66
+// in -0x1.f45p-8 0x1.f45p-8
+-0x1.71547652b8339p-1,
+0x1.ec709dc3a04bep-2,
+-0x1.7154764702ffbp-2,
+0x1.2776c50034c48p-2,
+-0x1.ec7b328ea92bcp-3,
+0x1.a6225e117f92ep-3,
+#endif
+},
+/* Algorithm:
+
+	x = 2^k z
+	log2(x) = k + log2(c) + log2(z/c)
+	log2(z/c) = poly(z/c - 1)
+
+where z is in [1.6p-1; 1.6p0] which is split into N subintervals and z falls
+into the ith one, then table entries are computed as
+
+	tab[i].invc = 1/c
+	tab[i].logc = (double)log2(c)
+	tab2[i].chi = (double)c
+	tab2[i].clo = (double)(c - (double)c)
+
+where c is near the center of the subinterval and is chosen by trying +-2^29
+floating point invc candidates around 1/center and selecting one for which
+
+	1) the rounding error in 0x1.8p10 + logc is 0,
+	2) the rounding error in z - chi - clo is < 0x1p-64 and
+	3) the rounding error in (double)log2(c) is minimized (< 0x1p-68).
+
+Note: 1) ensures that k + logc can be computed without rounding error, 2)
+ensures that z/c - 1 can be computed as (z - chi - clo)*invc with close to a
+single rounding error when there is no fast fma for z*invc - 1, 3) ensures
+that logc + poly(z/c - 1) has small error, however near x == 1 when
+|log2(x)| < 0x1p-4, this is not enough so that is special cased.  */
+.tab = {
+#if N == 64
+{0x1.724286bb1acf8p+0, -0x1.1095feecdb000p-1},
+{0x1.6e1f766d2cca1p+0, -0x1.08494bd76d000p-1},
+{0x1.6a13d0e30d48ap+0, -0x1.00143aee8f800p-1},
+{0x1.661ec32d06c85p+0, -0x1.efec5360b4000p-2},
+{0x1.623fa951198f8p+0, -0x1.dfdd91ab7e000p-2},
+{0x1.5e75ba4cf026cp+0, -0x1.cffae0cc79000p-2},
+{0x1.5ac055a214fb8p+0, -0x1.c043811fda000p-2},
+{0x1.571ed0f166e1ep+0, -0x1.b0b67323ae000p-2},
+{0x1.53909590bf835p+0, -0x1.a152f5a2db000p-2},
+{0x1.5014fed61adddp+0, -0x1.9217f5af86000p-2},
+{0x1.4cab88e487bd0p+0, -0x1.8304db0719000p-2},
+{0x1.49539b4334feep+0, -0x1.74189f9a9e000p-2},
+{0x1.460cbdfafd569p+0, -0x1.6552bb5199000p-2},
+{0x1.42d664ee4b953p+0, -0x1.56b23a29b1000p-2},
+{0x1.3fb01111dd8a6p+0, -0x1.483650f5fa000p-2},
+{0x1.3c995b70c5836p+0, -0x1.39de937f6a000p-2},
+{0x1.3991c4ab6fd4ap+0, -0x1.2baa1538d6000p-2},
+{0x1.3698e0ce099b5p+0, -0x1.1d98340ca4000p-2},
+{0x1.33ae48213e7b2p+0, -0x1.0fa853a40e000p-2},
+{0x1.30d191985bdb1p+0, -0x1.01d9c32e73000p-2},
+{0x1.2e025cab271d7p+0, -0x1.e857da2fa6000p-3},
+{0x1.2b404cf13cd82p+0, -0x1.cd3c8633d8000p-3},
+{0x1.288b02c7ccb50p+0, -0x1.b26034c14a000p-3},
+{0x1.25e2263944de5p+0, -0x1.97c1c2f4fe000p-3},
+{0x1.234563d8615b1p+0, -0x1.7d6023f800000p-3},
+{0x1.20b46e33eaf38p+0, -0x1.633a71a05e000p-3},
+{0x1.1e2eefdcda3ddp+0, -0x1.494f5e9570000p-3},
+{0x1.1bb4a580b3930p+0, -0x1.2f9e424e0a000p-3},
+{0x1.19453847f2200p+0, -0x1.162595afdc000p-3},
+{0x1.16e06c0d5d73cp+0, -0x1.f9c9a75bd8000p-4},
+{0x1.1485f47b7e4c2p+0, -0x1.c7b575bf9c000p-4},
+{0x1.12358ad0085d1p+0, -0x1.960c60ff48000p-4},
+{0x1.0fef00f532227p+0, -0x1.64ce247b60000p-4},
+{0x1.0db2077d03a8fp+0, -0x1.33f78b2014000p-4},
+{0x1.0b7e6d65980d9p+0, -0x1.0387d1a42c000p-4},
+{0x1.0953efe7b408dp+0, -0x1.a6f9208b50000p-5},
+{0x1.07325cac53b83p+0, -0x1.47a954f770000p-5},
+{0x1.05197e40d1b5cp+0, -0x1.d23a8c50c0000p-6},
+{0x1.03091c1208ea2p+0, -0x1.16a2629780000p-6},
+{0x1.0101025b37e21p+0, -0x1.720f8d8e80000p-8},
+{0x1.fc07ef9caa76bp-1, 0x1.6fe53b1500000p-7},
+{0x1.f4465d3f6f184p-1, 0x1.11ccce10f8000p-5},
+{0x1.ecc079f84107fp-1, 0x1.c4dfc8c8b8000p-5},
+{0x1.e573a99975ae8p-1, 0x1.3aa321e574000p-4},
+{0x1.de5d6f0bd3de6p-1, 0x1.918a0d08b8000p-4},
+{0x1.d77b681ff38b3p-1, 0x1.e72e9da044000p-4},
+{0x1.d0cb5724de943p-1, 0x1.1dcd2507f6000p-3},
+{0x1.ca4b2dc0e7563p-1, 0x1.476ab03dea000p-3},
+{0x1.c3f8ee8d6cb51p-1, 0x1.7074377e22000p-3},
+{0x1.bdd2b4f020c4cp-1, 0x1.98ede8ba94000p-3},
+{0x1.b7d6c006015cap-1, 0x1.c0db86ad2e000p-3},
+{0x1.b20366e2e338fp-1, 0x1.e840aafcee000p-3},
+{0x1.ac57026295039p-1, 0x1.0790ab4678000p-2},
+{0x1.a6d01bc2731ddp-1, 0x1.1ac056801c000p-2},
+{0x1.a16d3bc3ff18bp-1, 0x1.2db11d4fee000p-2},
+{0x1.9c2d14967feadp-1, 0x1.406464ec58000p-2},
+{0x1.970e4f47c9902p-1, 0x1.52dbe093af000p-2},
+{0x1.920fb3982bcf2p-1, 0x1.651902050d000p-2},
+{0x1.8d30187f759f1p-1, 0x1.771d2cdeaf000p-2},
+{0x1.886e5ebb9f66dp-1, 0x1.88e9c857d9000p-2},
+{0x1.83c97b658b994p-1, 0x1.9a80155e16000p-2},
+{0x1.7f405ffc61022p-1, 0x1.abe186ed3d000p-2},
+{0x1.7ad22181415cap-1, 0x1.bd0f2aea0e000p-2},
+{0x1.767dcf99eff8cp-1, 0x1.ce0a43dbf4000p-2},
+#endif
+},
+#if !HAVE_FAST_FMA
+.tab2 = {
+# if N == 64
+{0x1.6200012b90a8ep-1, 0x1.904ab0644b605p-55},
+{0x1.66000045734a6p-1, 0x1.1ff9bea62f7a9p-57},
+{0x1.69fffc325f2c5p-1, 0x1.27ecfcb3c90bap-55},
+{0x1.6e00038b95a04p-1, 0x1.8ff8856739326p-55},
+{0x1.71fffe09994e3p-1, 0x1.afd40275f82b1p-55},
+{0x1.7600015590e1p-1, -0x1.2fd75b4238341p-56},
+{0x1.7a00012655bd5p-1, 0x1.808e67c242b76p-56},
+{0x1.7e0003259e9a6p-1, -0x1.208e426f622b7p-57},
+{0x1.81fffedb4b2d2p-1, -0x1.402461ea5c92fp-55},
+{0x1.860002dfafcc3p-1, 0x1.df7f4a2f29a1fp-57},
+{0x1.89ffff78c6b5p-1, -0x1.e0453094995fdp-55},
+{0x1.8e00039671566p-1, -0x1.a04f3bec77b45p-55},
+{0x1.91fffe2bf1745p-1, -0x1.7fa34400e203cp-56},
+{0x1.95fffcc5c9fd1p-1, -0x1.6ff8005a0695dp-56},
+{0x1.9a0003bba4767p-1, 0x1.0f8c4c4ec7e03p-56},
+{0x1.9dfffe7b92da5p-1, 0x1.e7fd9478c4602p-55},
+{0x1.a1fffd72efdafp-1, -0x1.a0c554dcdae7ep-57},
+{0x1.a5fffde04ff95p-1, 0x1.67da98ce9b26bp-55},
+{0x1.a9fffca5e8d2bp-1, -0x1.284c9b54c13dep-55},
+{0x1.adfffddad03eap-1, 0x1.812c8ea602e3cp-58},
+{0x1.b1ffff10d3d4dp-1, -0x1.efaddad27789cp-55},
+{0x1.b5fffce21165ap-1, 0x1.3cb1719c61237p-58},
+{0x1.b9fffd950e674p-1, 0x1.3f7d94194cep-56},
+{0x1.be000139ca8afp-1, 0x1.50ac4215d9bcp-56},
+{0x1.c20005b46df99p-1, 0x1.beea653e9c1c9p-57},
+{0x1.c600040b9f7aep-1, -0x1.c079f274a70d6p-56},
+{0x1.ca0006255fd8ap-1, -0x1.a0b4076e84c1fp-56},
+{0x1.cdfffd94c095dp-1, 0x1.8f933f99ab5d7p-55},
+{0x1.d1ffff975d6cfp-1, -0x1.82c08665fe1bep-58},
+{0x1.d5fffa2561c93p-1, -0x1.b04289bd295f3p-56},
+{0x1.d9fff9d228b0cp-1, 0x1.70251340fa236p-55},
+{0x1.de00065bc7e16p-1, -0x1.5011e16a4d80cp-56},
+{0x1.e200002f64791p-1, 0x1.9802f09ef62ep-55},
+{0x1.e600057d7a6d8p-1, -0x1.e0b75580cf7fap-56},
+{0x1.ea00027edc00cp-1, -0x1.c848309459811p-55},
+{0x1.ee0006cf5cb7cp-1, -0x1.f8027951576f4p-55},
+{0x1.f2000782b7dccp-1, -0x1.f81d97274538fp-55},
+{0x1.f6000260c450ap-1, -0x1.071002727ffdcp-59},
+{0x1.f9fffe88cd533p-1, -0x1.81bdce1fda8bp-58},
+{0x1.fdfffd50f8689p-1, 0x1.7f91acb918e6ep-55},
+{0x1.0200004292367p+0, 0x1.b7ff365324681p-54},
+{0x1.05fffe3e3d668p+0, 0x1.6fa08ddae957bp-55},
+{0x1.0a0000a85a757p+0, -0x1.7e2de80d3fb91p-58},
+{0x1.0e0001a5f3fccp+0, -0x1.1823305c5f014p-54},
+{0x1.11ffff8afbaf5p+0, -0x1.bfabb6680bac2p-55},
+{0x1.15fffe54d91adp+0, -0x1.d7f121737e7efp-54},
+{0x1.1a00011ac36e1p+0, 0x1.c000a0516f5ffp-54},
+{0x1.1e00019c84248p+0, -0x1.082fbe4da5dap-54},
+{0x1.220000ffe5e6ep+0, -0x1.8fdd04c9cfb43p-55},
+{0x1.26000269fd891p+0, 0x1.cfe2a7994d182p-55},
+{0x1.2a00029a6e6dap+0, -0x1.00273715e8bc5p-56},
+{0x1.2dfffe0293e39p+0, 0x1.b7c39dab2a6f9p-54},
+{0x1.31ffff7dcf082p+0, 0x1.df1336edc5254p-56},
+{0x1.35ffff05a8b6p+0, -0x1.e03564ccd31ebp-54},
+{0x1.3a0002e0eaeccp+0, 0x1.5f0e74bd3a477p-56},
+{0x1.3e000043bb236p+0, 0x1.c7dcb149d8833p-54},
+{0x1.4200002d187ffp+0, 0x1.e08afcf2d3d28p-56},
+{0x1.460000d387cb1p+0, 0x1.20837856599a6p-55},
+{0x1.4a00004569f89p+0, -0x1.9fa5c904fbcd2p-55},
+{0x1.4e000043543f3p+0, -0x1.81125ed175329p-56},
+{0x1.51fffcc027f0fp+0, 0x1.883d8847754dcp-54},
+{0x1.55ffffd87b36fp+0, -0x1.709e731d02807p-55},
+{0x1.59ffff21df7bap+0, 0x1.7f79f68727b02p-55},
+{0x1.5dfffebfc3481p+0, -0x1.180902e30e93ep-54},
+# endif
+},
+#endif /* !HAVE_FAST_FMA */
+};
@@ -0,0 +1,80 @@
+/*
+ * Single-precision log2 function.
+ *
+ * Copyright (c) 2017-2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include <math.h>
+#include <stdint.h>
+#include "math_config.h"
+
+/*
+LOG2F_TABLE_BITS = 4
+LOG2F_POLY_ORDER = 4
+
+ULP error: 0.752 (nearest rounding.)
+Relative error: 1.9 * 2^-26 (before rounding.)
+*/
+
+#define N (1 << LOG2F_TABLE_BITS)
+#define T __log2f_data.tab
+#define A __log2f_data.poly
+#define OFF 0x3f330000
+
+float
+log2f (float x)
+{
+  /* double_t for better performance on targets with FLT_EVAL_METHOD==2.  */
+  double_t z, r, r2, p, y, y0, invc, logc;
+  uint32_t ix, iz, top, tmp;
+  int k, i;
+
+  ix = asuint (x);
+#if WANT_ROUNDING
+  /* Fix sign of zero with downward rounding when x==1.  */
+  if (unlikely (ix == 0x3f800000))
+    return 0;
+#endif
+  if (unlikely (ix - 0x00800000 >= 0x7f800000 - 0x00800000))
+    {
+      /* x < 0x1p-126 or inf or nan.  */
+      if (ix * 2 == 0)
+	return __math_divzerof (1);
+      if (ix == 0x7f800000) /* log2(inf) == inf.  */
+	return x;
+      if ((ix & 0x80000000) || ix * 2 >= 0xff000000)
+	return __math_invalidf (x);
+      /* x is subnormal, normalize it.  */
+      ix = asuint (x * 0x1p23f);
+      ix -= 23 << 23;
+    }
+
+  /* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
+     The range is split into N subintervals.
+     The ith subinterval contains z and c is near its center.  */
+  tmp = ix - OFF;
+  i = (tmp >> (23 - LOG2F_TABLE_BITS)) % N;
+  top = tmp & 0xff800000;
+  iz = ix - top;
+  k = (int32_t) tmp >> 23; /* arithmetic shift */
+  invc = T[i].invc;
+  logc = T[i].logc;
+  z = (double_t) asfloat (iz);
+
+  /* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k */
+  r = z * invc - 1;
+  y0 = logc + (double_t) k;
+
+  /* Pipelined polynomial evaluation to approximate log1p(r)/ln2.  */
+  r2 = r * r;
+  y = A[1] * r + A[2];
+  y = A[0] * r2 + y;
+  p = A[3] * r + y0;
+  y = y * r2 + p;
+  return eval_as_float (y);
+}
+#if USE_GLIBC_ABI
+strong_alias (log2f, __log2f_finite)
+hidden_alias (log2f, __ieee754_log2f)
+#endif
@@ -0,0 +1,33 @@
+/*
+ * Data definition for log2f.
+ *
+ * Copyright (c) 2017-2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "math_config.h"
+
+const struct log2f_data __log2f_data = {
+  .tab = {
+  { 0x1.661ec79f8f3bep+0, -0x1.efec65b963019p-2 },
+  { 0x1.571ed4aaf883dp+0, -0x1.b0b6832d4fca4p-2 },
+  { 0x1.49539f0f010bp+0, -0x1.7418b0a1fb77bp-2 },
+  { 0x1.3c995b0b80385p+0, -0x1.39de91a6dcf7bp-2 },
+  { 0x1.30d190c8864a5p+0, -0x1.01d9bf3f2b631p-2 },
+  { 0x1.25e227b0b8eap+0, -0x1.97c1d1b3b7afp-3 },
+  { 0x1.1bb4a4a1a343fp+0, -0x1.2f9e393af3c9fp-3 },
+  { 0x1.12358f08ae5bap+0, -0x1.960cbbf788d5cp-4 },
+  { 0x1.0953f419900a7p+0, -0x1.a6f9db6475fcep-5 },
+  { 0x1p+0, 0x0p+0 },
+  { 0x1.e608cfd9a47acp-1, 0x1.338ca9f24f53dp-4 },
+  { 0x1.ca4b31f026aap-1, 0x1.476a9543891bap-3 },
+  { 0x1.b2036576afce6p-1, 0x1.e840b4ac4e4d2p-3 },
+  { 0x1.9c2d163a1aa2dp-1, 0x1.40645f0c6651cp-2 },
+  { 0x1.886e6037841edp-1, 0x1.88e9c2c1b9ff8p-2 },
+  { 0x1.767dcf5534862p-1, 0x1.ce0a44eb17bccp-2 },
+  },
+  .poly = {
+  -0x1.712b6f70a7e4dp-2, 0x1.ecabf496832ep-2, -0x1.715479ffae3dep-1,
+  0x1.715475f35c8b8p0,
+  }
+};
@@ -0,0 +1,511 @@
+/*
+ * Data for log.
+ *
+ * Copyright (c) 2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "math_config.h"
+
+#define N (1 << LOG_TABLE_BITS)
+
+const struct log_data __log_data = {
+.ln2hi = 0x1.62e42fefa3800p-1,
+.ln2lo = 0x1.ef35793c76730p-45,
+.poly1 = {
+#if LOG_POLY1_ORDER == 10
+// relative error: 0x1.32eccc6p-62
+// in -0x1p-5 0x1.1p-5 (|log(1+x)| > 0x1p-5 outside this interval)
+-0x1p-1,
+0x1.55555555554e5p-2,
+-0x1.0000000000af2p-2,
+0x1.9999999bbe436p-3,
+-0x1.55555537f9cdep-3,
+0x1.24922fc8127cfp-3,
+-0x1.0000b7d6bb612p-3,
+0x1.c806ee1ddbcafp-4,
+-0x1.972335a9c2d6ep-4,
+#elif LOG_POLY1_ORDER == 11
+// relative error: 0x1.52c8b708p-68
+// in -0x1p-5 0x1.1p-5 (|log(1+x)| > 0x1p-5 outside this interval)
+-0x1p-1,
+0x1.5555555555555p-2,
+-0x1.ffffffffffea9p-3,
+0x1.999999999c4d4p-3,
+-0x1.55555557f5541p-3,
+0x1.249248fbe33e4p-3,
+-0x1.ffffc9a3c825bp-4,
+0x1.c71e1f204435dp-4,
+-0x1.9a7f26377d06ep-4,
+0x1.71c30cf8f7364p-4,
+#elif LOG_POLY1_ORDER == 12
+// relative error: 0x1.c04d76cp-63
+// in -0x1p-4 0x1.09p-4 (|log(1+x)| > 0x1p-4 outside the interval)
+-0x1p-1,
+0x1.5555555555577p-2,
+-0x1.ffffffffffdcbp-3,
+0x1.999999995dd0cp-3,
+-0x1.55555556745a7p-3,
+0x1.24924a344de3p-3,
+-0x1.fffffa4423d65p-4,
+0x1.c7184282ad6cap-4,
+-0x1.999eb43b068ffp-4,
+0x1.78182f7afd085p-4,
+-0x1.5521375d145cdp-4,
+#endif
+},
+.poly = {
+#if N == 64 && LOG_POLY_ORDER == 7
+// relative error: 0x1.906eb8ap-58
+// abs error: 0x1.d2cad5a8p-67
+// in -0x1.fp-8 0x1.fp-8
+-0x1.0000000000027p-1,
+0x1.555555555556ap-2,
+-0x1.fffffff0440bap-3,
+0x1.99999991906c3p-3,
+-0x1.555c8d7e8201ep-3,
+0x1.24978c59151fap-3,
+#elif N == 128 && LOG_POLY_ORDER == 6
+// relative error: 0x1.926199e8p-56
+// abs error: 0x1.882ff33p-65
+// in -0x1.fp-9 0x1.fp-9
+-0x1.0000000000001p-1,
+0x1.555555551305bp-2,
+-0x1.fffffffeb459p-3,
+0x1.999b324f10111p-3,
+-0x1.55575e506c89fp-3,
+#elif N == 128 && LOG_POLY_ORDER == 7
+// relative error: 0x1.649fc4bp-64
+// abs error: 0x1.c3b5769p-74
+// in -0x1.fp-9 0x1.fp-9
+-0x1.0000000000001p-1,
+0x1.5555555555556p-2,
+-0x1.fffffffea1a8p-3,
+0x1.99999998e9139p-3,
+-0x1.555776801b968p-3,
+0x1.2493c29331a5cp-3,
+#endif
+},
+/* Algorithm:
+
+	x = 2^k z
+	log(x) = k ln2 + log(c) + log(z/c)
+	log(z/c) = poly(z/c - 1)
+
+where z is in [1.6p-1; 1.6p0] which is split into N subintervals and z falls
+into the ith one, then table entries are computed as
+
+	tab[i].invc = 1/c
+	tab[i].logc = (double)log(c)
+	tab2[i].chi = (double)c
+	tab2[i].clo = (double)(c - (double)c)
+
+where c is near the center of the subinterval and is chosen by trying +-2^29
+floating point invc candidates around 1/center and selecting one for which
+
+	1) the rounding error in 0x1.8p9 + logc is 0,
+	2) the rounding error in z - chi - clo is < 0x1p-66 and
+	3) the rounding error in (double)log(c) is minimized (< 0x1p-66).
+
+Note: 1) ensures that k*ln2hi + logc can be computed without rounding error,
+2) ensures that z/c - 1 can be computed as (z - chi - clo)*invc with close to
+a single rounding error when there is no fast fma for z*invc - 1, 3) ensures
+that logc + poly(z/c - 1) has small error, however near x == 1 when
+|log(x)| < 0x1p-4, this is not enough so that is special cased.  */
+.tab = {
+#if N == 64
+{0x1.7242886495cd8p+0, -0x1.79e267bdfe000p-2},
+{0x1.6e1f769340dc9p+0, -0x1.6e60ee0ecb000p-2},
+{0x1.6a13ccc8f195cp+0, -0x1.63002fdbf6000p-2},
+{0x1.661ec72e86f3ap+0, -0x1.57bf76c597000p-2},
+{0x1.623fa6c447b16p+0, -0x1.4c9e07f0d2000p-2},
+{0x1.5e75bbca31702p+0, -0x1.419b42f027000p-2},
+{0x1.5ac05655adb10p+0, -0x1.36b67660e6000p-2},
+{0x1.571ed3e940191p+0, -0x1.2bef0839e4800p-2},
+{0x1.539094ac0fbbfp+0, -0x1.21445727cb000p-2},
+{0x1.5015007e7fc42p+0, -0x1.16b5ca3c3d000p-2},
+{0x1.4cab877c31cf9p+0, -0x1.0c42d3805f800p-2},
+{0x1.49539e76a88d3p+0, -0x1.01eae61b60800p-2},
+{0x1.460cbc12211dap+0, -0x1.ef5adb9fb0000p-3},
+{0x1.42d6624debe3ap+0, -0x1.db13daab99000p-3},
+{0x1.3fb0144f0d462p+0, -0x1.c6ffbe896e000p-3},
+{0x1.3c995a1f9a9b4p+0, -0x1.b31d84722d000p-3},
+{0x1.3991c23952500p+0, -0x1.9f6c3cf6eb000p-3},
+{0x1.3698df35eaa14p+0, -0x1.8beafe7f13000p-3},
+{0x1.33ae463091760p+0, -0x1.7898db878d000p-3},
+{0x1.30d190aae3d72p+0, -0x1.6574efe4ec000p-3},
+{0x1.2e025c9203c89p+0, -0x1.527e620845000p-3},
+{0x1.2b404a7244988p+0, -0x1.3fb457d798000p-3},
+{0x1.288b01dc19544p+0, -0x1.2d1615a077000p-3},
+{0x1.25e2268085f69p+0, -0x1.1aa2b431e5000p-3},
+{0x1.23456812abb74p+0, -0x1.08598f1d2b000p-3},
+{0x1.20b4703174157p+0, -0x1.ec738fee40000p-4},
+{0x1.1e2ef308b4e9bp+0, -0x1.c885768862000p-4},
+{0x1.1bb4a36b70a3fp+0, -0x1.a4e75b6a46000p-4},
+{0x1.194538e960658p+0, -0x1.8197efba9a000p-4},
+{0x1.16e0692a10ac8p+0, -0x1.5e95ad734e000p-4},
+{0x1.1485f1ba1568bp+0, -0x1.3bdf67117c000p-4},
+{0x1.12358e123ed6fp+0, -0x1.1973b744f0000p-4},
+{0x1.0fef01de37c8dp+0, -0x1.eea33446bc000p-5},
+{0x1.0db20b82be414p+0, -0x1.aaef4ab304000p-5},
+{0x1.0b7e6f67f69b3p+0, -0x1.67c962fd2c000p-5},
+{0x1.0953f342fc108p+0, -0x1.252f29acf8000p-5},
+{0x1.0732604ec956bp+0, -0x1.c63d19e9c0000p-6},
+{0x1.051980117f9b0p+0, -0x1.432ab6a388000p-6},
+{0x1.03091aa6810f1p+0, -0x1.8244357f50000p-7},
+{0x1.01010152cf066p+0, -0x1.0080a711c0000p-8},
+{0x1.fc07ef6b6e30bp-1, 0x1.fe03018e80000p-8},
+{0x1.f4465aa1024afp-1, 0x1.7b91986450000p-6},
+{0x1.ecc07a8fd3f5ep-1, 0x1.39e88608c8000p-5},
+{0x1.e573ad856b537p-1, 0x1.b42dc6e624000p-5},
+{0x1.de5d6dc7b8057p-1, 0x1.165372ec20000p-4},
+{0x1.d77b6498bddf7p-1, 0x1.51b07a0170000p-4},
+{0x1.d0cb580315c0fp-1, 0x1.8c3465c7ea000p-4},
+{0x1.ca4b30d1cf449p-1, 0x1.c5e544a290000p-4},
+{0x1.c3f8ef4810d8ep-1, 0x1.fec91aa0a6000p-4},
+{0x1.bdd2b8b311f44p-1, 0x1.1b72acdc5c000p-3},
+{0x1.b7d6c2eeac054p-1, 0x1.371fc65a98000p-3},
+{0x1.b20363474c8f5p-1, 0x1.526e61c1aa000p-3},
+{0x1.ac570165eeab1p-1, 0x1.6d60ffc240000p-3},
+{0x1.a6d019f331df4p-1, 0x1.87fa08a013000p-3},
+{0x1.a16d3ebc9e3c3p-1, 0x1.a23bc630c3000p-3},
+{0x1.9c2d14567ef45p-1, 0x1.bc286a3512000p-3},
+{0x1.970e4efae9169p-1, 0x1.d5c2195697000p-3},
+{0x1.920fb3bd0b802p-1, 0x1.ef0ae132d3000p-3},
+{0x1.8d3018b58699ap-1, 0x1.040259974e000p-2},
+{0x1.886e5ff170ee6p-1, 0x1.1058bd40e2000p-2},
+{0x1.83c977ad35d27p-1, 0x1.1c898c1137800p-2},
+{0x1.7f405ed16c520p-1, 0x1.2895a3e65b000p-2},
+{0x1.7ad220d0335c4p-1, 0x1.347dd8f6bd000p-2},
+{0x1.767dce53474fdp-1, 0x1.4043083cb3800p-2},
+#elif N == 128
+{0x1.734f0c3e0de9fp+0, -0x1.7cc7f79e69000p-2},
+{0x1.713786a2ce91fp+0, -0x1.76feec20d0000p-2},
+{0x1.6f26008fab5a0p+0, -0x1.713e31351e000p-2},
+{0x1.6d1a61f138c7dp+0, -0x1.6b85b38287800p-2},
+{0x1.6b1490bc5b4d1p+0, -0x1.65d5590807800p-2},
+{0x1.69147332f0cbap+0, -0x1.602d076180000p-2},
+{0x1.6719f18224223p+0, -0x1.5a8ca86909000p-2},
+{0x1.6524f99a51ed9p+0, -0x1.54f4356035000p-2},
+{0x1.63356aa8f24c4p+0, -0x1.4f637c36b4000p-2},
+{0x1.614b36b9ddc14p+0, -0x1.49da7fda85000p-2},
+{0x1.5f66452c65c4cp+0, -0x1.445923989a800p-2},
+{0x1.5d867b5912c4fp+0, -0x1.3edf439b0b800p-2},
+{0x1.5babccb5b90dep+0, -0x1.396ce448f7000p-2},
+{0x1.59d61f2d91a78p+0, -0x1.3401e17bda000p-2},
+{0x1.5805612465687p+0, -0x1.2e9e2ef468000p-2},
+{0x1.56397cee76bd3p+0, -0x1.2941b3830e000p-2},
+{0x1.54725e2a77f93p+0, -0x1.23ec58cda8800p-2},
+{0x1.52aff42064583p+0, -0x1.1e9e129279000p-2},
+{0x1.50f22dbb2bddfp+0, -0x1.1956d2b48f800p-2},
+{0x1.4f38f4734ded7p+0, -0x1.141679ab9f800p-2},
+{0x1.4d843cfde2840p+0, -0x1.0edd094ef9800p-2},
+{0x1.4bd3ec078a3c8p+0, -0x1.09aa518db1000p-2},
+{0x1.4a27fc3e0258ap+0, -0x1.047e65263b800p-2},
+{0x1.4880524d48434p+0, -0x1.feb224586f000p-3},
+{0x1.46dce1b192d0bp+0, -0x1.f474a7517b000p-3},
+{0x1.453d9d3391854p+0, -0x1.ea4443d103000p-3},
+{0x1.43a2744b4845ap+0, -0x1.e020d44e9b000p-3},
+{0x1.420b54115f8fbp+0, -0x1.d60a22977f000p-3},
+{0x1.40782da3ef4b1p+0, -0x1.cc00104959000p-3},
+{0x1.3ee8f5d57fe8fp+0, -0x1.c202956891000p-3},
+{0x1.3d5d9a00b4ce9p+0, -0x1.b81178d811000p-3},
+{0x1.3bd60c010c12bp+0, -0x1.ae2c9ccd3d000p-3},
+{0x1.3a5242b75dab8p+0, -0x1.a45402e129000p-3},
+{0x1.38d22cd9fd002p+0, -0x1.9a877681df000p-3},
+{0x1.3755bc5847a1cp+0, -0x1.90c6d69483000p-3},
+{0x1.35dce49ad36e2p+0, -0x1.87120a645c000p-3},
+{0x1.34679984dd440p+0, -0x1.7d68fb4143000p-3},
+{0x1.32f5cceffcb24p+0, -0x1.73cb83c627000p-3},
+{0x1.3187775a10d49p+0, -0x1.6a39a9b376000p-3},
+{0x1.301c8373e3990p+0, -0x1.60b3154b7a000p-3},
+{0x1.2eb4ebb95f841p+0, -0x1.5737d76243000p-3},
+{0x1.2d50a0219a9d1p+0, -0x1.4dc7b8fc23000p-3},
+{0x1.2bef9a8b7fd2ap+0, -0x1.4462c51d20000p-3},
+{0x1.2a91c7a0c1babp+0, -0x1.3b08abc830000p-3},
+{0x1.293726014b530p+0, -0x1.31b996b490000p-3},
+{0x1.27dfa5757a1f5p+0, -0x1.2875490a44000p-3},
+{0x1.268b39b1d3bbfp+0, -0x1.1f3b9f879a000p-3},
+{0x1.2539d838ff5bdp+0, -0x1.160c8252ca000p-3},
+{0x1.23eb7aac9083bp+0, -0x1.0ce7f57f72000p-3},
+{0x1.22a012ba940b6p+0, -0x1.03cdc49fea000p-3},
+{0x1.2157996cc4132p+0, -0x1.f57bdbc4b8000p-4},
+{0x1.201201dd2fc9bp+0, -0x1.e370896404000p-4},
+{0x1.1ecf4494d480bp+0, -0x1.d17983ef94000p-4},
+{0x1.1d8f5528f6569p+0, -0x1.bf9674ed8a000p-4},
+{0x1.1c52311577e7cp+0, -0x1.adc79202f6000p-4},
+{0x1.1b17c74cb26e9p+0, -0x1.9c0c3e7288000p-4},
+{0x1.19e010c2c1ab6p+0, -0x1.8a646b372c000p-4},
+{0x1.18ab07bb670bdp+0, -0x1.78d01b3ac0000p-4},
+{0x1.1778a25efbcb6p+0, -0x1.674f145380000p-4},
+{0x1.1648d354c31dap+0, -0x1.55e0e6d878000p-4},
+{0x1.151b990275fddp+0, -0x1.4485cdea1e000p-4},
+{0x1.13f0ea432d24cp+0, -0x1.333d94d6aa000p-4},
+{0x1.12c8b7210f9dap+0, -0x1.22079f8c56000p-4},
+{0x1.11a3028ecb531p+0, -0x1.10e4698622000p-4},
+{0x1.107fbda8434afp+0, -0x1.ffa6c6ad20000p-5},
+{0x1.0f5ee0f4e6bb3p+0, -0x1.dda8d4a774000p-5},
+{0x1.0e4065d2a9fcep+0, -0x1.bbcece4850000p-5},
+{0x1.0d244632ca521p+0, -0x1.9a1894012c000p-5},
+{0x1.0c0a77ce2981ap+0, -0x1.788583302c000p-5},
+{0x1.0af2f83c636d1p+0, -0x1.5715e67d68000p-5},
+{0x1.09ddb98a01339p+0, -0x1.35c8a49658000p-5},
+{0x1.08cabaf52e7dfp+0, -0x1.149e364154000p-5},
+{0x1.07b9f2f4e28fbp+0, -0x1.e72c082eb8000p-6},
+{0x1.06ab58c358f19p+0, -0x1.a55f152528000p-6},
+{0x1.059eea5ecf92cp+0, -0x1.63d62cf818000p-6},
+{0x1.04949cdd12c90p+0, -0x1.228fb8caa0000p-6},
+{0x1.038c6c6f0ada9p+0, -0x1.c317b20f90000p-7},
+{0x1.02865137932a9p+0, -0x1.419355daa0000p-7},
+{0x1.0182427ea7348p+0, -0x1.81203c2ec0000p-8},
+{0x1.008040614b195p+0, -0x1.0040979240000p-9},
+{0x1.fe01ff726fa1ap-1, 0x1.feff384900000p-9},
+{0x1.fa11cc261ea74p-1, 0x1.7dc41353d0000p-7},
+{0x1.f6310b081992ep-1, 0x1.3cea3c4c28000p-6},
+{0x1.f25f63ceeadcdp-1, 0x1.b9fc114890000p-6},
+{0x1.ee9c8039113e7p-1, 0x1.1b0d8ce110000p-5},
+{0x1.eae8078cbb1abp-1, 0x1.58a5bd001c000p-5},
+{0x1.e741aa29d0c9bp-1, 0x1.95c8340d88000p-5},
+{0x1.e3a91830a99b5p-1, 0x1.d276aef578000p-5},
+{0x1.e01e009609a56p-1, 0x1.07598e598c000p-4},
+{0x1.dca01e577bb98p-1, 0x1.253f5e30d2000p-4},
+{0x1.d92f20b7c9103p-1, 0x1.42edd8b380000p-4},
+{0x1.d5cac66fb5ccep-1, 0x1.606598757c000p-4},
+{0x1.d272caa5ede9dp-1, 0x1.7da76356a0000p-4},
+{0x1.cf26e3e6b2ccdp-1, 0x1.9ab434e1c6000p-4},
+{0x1.cbe6da2a77902p-1, 0x1.b78c7bb0d6000p-4},
+{0x1.c8b266d37086dp-1, 0x1.d431332e72000p-4},
+{0x1.c5894bd5d5804p-1, 0x1.f0a3171de6000p-4},
+{0x1.c26b533bb9f8cp-1, 0x1.067152b914000p-3},
+{0x1.bf583eeece73fp-1, 0x1.147858292b000p-3},
+{0x1.bc4fd75db96c1p-1, 0x1.2266ecdca3000p-3},
+{0x1.b951e0c864a28p-1, 0x1.303d7a6c55000p-3},
+{0x1.b65e2c5ef3e2cp-1, 0x1.3dfc33c331000p-3},
+{0x1.b374867c9888bp-1, 0x1.4ba366b7a8000p-3},
+{0x1.b094b211d304ap-1, 0x1.5933928d1f000p-3},
+{0x1.adbe885f2ef7ep-1, 0x1.66acd2418f000p-3},
+{0x1.aaf1d31603da2p-1, 0x1.740f8ec669000p-3},
+{0x1.a82e63fd358a7p-1, 0x1.815c0f51af000p-3},
+{0x1.a5740ef09738bp-1, 0x1.8e92954f68000p-3},
+{0x1.a2c2a90ab4b27p-1, 0x1.9bb3602f84000p-3},
+{0x1.a01a01393f2d1p-1, 0x1.a8bed1c2c0000p-3},
+{0x1.9d79f24db3c1bp-1, 0x1.b5b515c01d000p-3},
+{0x1.9ae2505c7b190p-1, 0x1.c2967ccbcc000p-3},
+{0x1.9852ef297ce2fp-1, 0x1.cf635d5486000p-3},
+{0x1.95cbaeea44b75p-1, 0x1.dc1bd3446c000p-3},
+{0x1.934c69de74838p-1, 0x1.e8c01b8cfe000p-3},
+{0x1.90d4f2f6752e6p-1, 0x1.f5509c0179000p-3},
+{0x1.8e6528effd79dp-1, 0x1.00e6c121fb800p-2},
+{0x1.8bfce9fcc007cp-1, 0x1.071b80e93d000p-2},
+{0x1.899c0dabec30ep-1, 0x1.0d46b9e867000p-2},
+{0x1.87427aa2317fbp-1, 0x1.13687334bd000p-2},
+{0x1.84f00acb39a08p-1, 0x1.1980d67234800p-2},
+{0x1.82a49e8653e55p-1, 0x1.1f8ffe0cc8000p-2},
+{0x1.8060195f40260p-1, 0x1.2595fd7636800p-2},
+{0x1.7e22563e0a329p-1, 0x1.2b9300914a800p-2},
+{0x1.7beb377dcb5adp-1, 0x1.3187210436000p-2},
+{0x1.79baa679725c2p-1, 0x1.377266dec1800p-2},
+{0x1.77907f2170657p-1, 0x1.3d54ffbaf3000p-2},
+{0x1.756cadbd6130cp-1, 0x1.432eee32fe000p-2},
+#endif
+},
+#if !HAVE_FAST_FMA
+.tab2 = {
+# if N == 64
+{0x1.61ffff94c4fecp-1, -0x1.9fe4fc998f325p-56},
+{0x1.66000020377ddp-1, 0x1.e804c7a9519f2p-55},
+{0x1.6a00004c41678p-1, 0x1.902c675d9ecfep-55},
+{0x1.6dffff7384f87p-1, -0x1.2fd6b95e55043p-56},
+{0x1.720000b37216ep-1, 0x1.802bc8d437043p-55},
+{0x1.75ffffbeb3c9dp-1, 0x1.6047ad0a0d4e4p-57},
+{0x1.7a0000628daep-1, -0x1.e00434b49313dp-56},
+{0x1.7dffffd7abd1ap-1, -0x1.6015f8a083576p-56},
+{0x1.81ffffdf40c54p-1, 0x1.7f54bf76a42c9p-57},
+{0x1.860000f334e11p-1, 0x1.60054cb5344d7p-56},
+{0x1.8a0001238aca7p-1, 0x1.c03c9bd132f55p-57},
+{0x1.8dffffb81d212p-1, -0x1.001e519f2764fp-55},
+{0x1.92000086adc7cp-1, 0x1.1fe40f88f49c6p-55},
+{0x1.960000135d8eap-1, -0x1.f832268dc3095p-55},
+{0x1.99ffff9435acp-1, 0x1.7031d8b835edcp-56},
+{0x1.9e00003478565p-1, -0x1.0030b221ce3eep-58},
+{0x1.a20000b592948p-1, 0x1.8fd2f1dbd4639p-55},
+{0x1.a600000ad0bcfp-1, 0x1.901d6a974e6bep-55},
+{0x1.a9ffff55953a5p-1, 0x1.a07556192db98p-57},
+{0x1.adffff29ce03dp-1, -0x1.fff0717ec71c2p-56},
+{0x1.b1ffff34f3ac8p-1, 0x1.8005573de89d1p-57},
+{0x1.b60000894c55bp-1, -0x1.ff2fb51b044c7p-57},
+{0x1.b9fffef45ec7dp-1, -0x1.9ff7c4e8730fp-56},
+{0x1.be0000cda7b2ap-1, 0x1.57d058dbf3c1dp-55},
+{0x1.c1ffff2c57917p-1, 0x1.7e66d7e48dbc9p-58},
+{0x1.c60000ea5b82ap-1, -0x1.47f5e132ed4bep-55},
+{0x1.ca0001121ae98p-1, -0x1.40958c8d5e00ap-58},
+{0x1.ce0000f9241cbp-1, -0x1.7da063caa81c8p-59},
+{0x1.d1fffe8be95a4p-1, -0x1.82e3a411afcd9p-59},
+{0x1.d5ffff035932bp-1, -0x1.00f901b3fe87dp-58},
+{0x1.d9fffe8b54ba7p-1, 0x1.ffef55d6e3a4p-55},
+{0x1.de0000ad95d19p-1, 0x1.5feb2efd4c7c7p-55},
+{0x1.e1fffe925ce47p-1, 0x1.c8085484eaf08p-55},
+{0x1.e5fffe3ddf853p-1, -0x1.fd5ed02c5cadp-60},
+{0x1.e9fffed0a0e5fp-1, -0x1.a80aaef411586p-55},
+{0x1.ee00008f82eep-1, -0x1.b000aeaf97276p-55},
+{0x1.f20000a22d2f4p-1, -0x1.8f8906e13eba3p-56},
+{0x1.f5fffee35b57dp-1, 0x1.1fdd33b2d3714p-57},
+{0x1.fa00014eec3a6p-1, -0x1.3ee0b7a18c1a5p-58},
+{0x1.fdffff5daa89fp-1, -0x1.c1e24c8e3b503p-58},
+{0x1.0200005b93349p+0, -0x1.50197fe6bedcap-54},
+{0x1.05ffff9d597acp+0, 0x1.20160d062d0dcp-55},
+{0x1.0a00005687a63p+0, -0x1.27f3f9307696ep-54},
+{0x1.0dffff779164ep+0, 0x1.b7eb40bb9c4f4p-54},
+{0x1.12000044a0aa8p+0, 0x1.efbc914d512c4p-55},
+{0x1.16000069685bcp+0, -0x1.c0bea3eb2d82cp-57},
+{0x1.1a000093f0d78p+0, 0x1.1fecbf1e8c52p-54},
+{0x1.1dffffb2b1457p+0, -0x1.3fc91365637d6p-55},
+{0x1.2200008824a1p+0, -0x1.dff7e9feb578ap-54},
+{0x1.25ffffeef953p+0, -0x1.b00a61ec912f7p-55},
+{0x1.2a0000a1e7783p+0, 0x1.60048318b0483p-56},
+{0x1.2e0000853d4c7p+0, -0x1.77fbedf2c8cf3p-54},
+{0x1.320000324c55bp+0, 0x1.f81983997354fp-54},
+{0x1.360000594f796p+0, -0x1.cfe4beff900a9p-54},
+{0x1.3a0000a4c1c0fp+0, 0x1.07dbb2e268d0ep-54},
+{0x1.3e0000751c61bp+0, 0x1.80583ed1c566ep-56},
+{0x1.42000069e8a9fp+0, 0x1.f01f1edf82045p-54},
+{0x1.460000b5a1e34p+0, -0x1.dfdf0cf45c14ap-55},
+{0x1.4a0000187e513p+0, 0x1.401306b83a98dp-55},
+{0x1.4dffff3ba420bp+0, 0x1.9fc6539a6454ep-56},
+{0x1.51fffffe391c9p+0, -0x1.601ef3353ac83p-54},
+{0x1.560000e342455p+0, 0x1.3fb7fac8ac151p-55},
+{0x1.59ffffc39676fp+0, 0x1.4fe7dd6659cc2p-55},
+{0x1.5dfffff10ef42p+0, -0x1.48154cb592bcbp-54},
+# elif N == 128
+{0x1.61000014fb66bp-1, 0x1.e026c91425b3cp-56},
+{0x1.63000034db495p-1, 0x1.dbfea48005d41p-55},
+{0x1.650000d94d478p-1, 0x1.e7fa786d6a5b7p-55},
+{0x1.67000074e6fadp-1, 0x1.1fcea6b54254cp-57},
+{0x1.68ffffedf0faep-1, -0x1.c7e274c590efdp-56},
+{0x1.6b0000763c5bcp-1, -0x1.ac16848dcda01p-55},
+{0x1.6d0001e5cc1f6p-1, 0x1.33f1c9d499311p-55},
+{0x1.6efffeb05f63ep-1, -0x1.e80041ae22d53p-56},
+{0x1.710000e86978p-1, 0x1.bff6671097952p-56},
+{0x1.72ffffc67e912p-1, 0x1.c00e226bd8724p-55},
+{0x1.74fffdf81116ap-1, -0x1.e02916ef101d2p-57},
+{0x1.770000f679c9p-1, -0x1.7fc71cd549c74p-57},
+{0x1.78ffffa7ec835p-1, 0x1.1bec19ef50483p-55},
+{0x1.7affffe20c2e6p-1, -0x1.07e1729cc6465p-56},
+{0x1.7cfffed3fc9p-1, -0x1.08072087b8b1cp-55},
+{0x1.7efffe9261a76p-1, 0x1.dc0286d9df9aep-55},
+{0x1.81000049ca3e8p-1, 0x1.97fd251e54c33p-55},
+{0x1.8300017932c8fp-1, -0x1.afee9b630f381p-55},
+{0x1.850000633739cp-1, 0x1.9bfbf6b6535bcp-55},
+{0x1.87000204289c6p-1, -0x1.bbf65f3117b75p-55},
+{0x1.88fffebf57904p-1, -0x1.9006ea23dcb57p-55},
+{0x1.8b00022bc04dfp-1, -0x1.d00df38e04b0ap-56},
+{0x1.8cfffe50c1b8ap-1, -0x1.8007146ff9f05p-55},
+{0x1.8effffc918e43p-1, 0x1.3817bd07a7038p-55},
+{0x1.910001efa5fc7p-1, 0x1.93e9176dfb403p-55},
+{0x1.9300013467bb9p-1, 0x1.f804e4b980276p-56},
+{0x1.94fffe6ee076fp-1, -0x1.f7ef0d9ff622ep-55},
+{0x1.96fffde3c12d1p-1, -0x1.082aa962638bap-56},
+{0x1.98ffff4458a0dp-1, -0x1.7801b9164a8efp-55},
+{0x1.9afffdd982e3ep-1, -0x1.740e08a5a9337p-55},
+{0x1.9cfffed49fb66p-1, 0x1.fce08c19bep-60},
+{0x1.9f00020f19c51p-1, -0x1.a3faa27885b0ap-55},
+{0x1.a10001145b006p-1, 0x1.4ff489958da56p-56},
+{0x1.a300007bbf6fap-1, 0x1.cbeab8a2b6d18p-55},
+{0x1.a500010971d79p-1, 0x1.8fecadd78793p-55},
+{0x1.a70001df52e48p-1, -0x1.f41763dd8abdbp-55},
+{0x1.a90001c593352p-1, -0x1.ebf0284c27612p-55},
+{0x1.ab0002a4f3e4bp-1, -0x1.9fd043cff3f5fp-57},
+{0x1.acfffd7ae1ed1p-1, -0x1.23ee7129070b4p-55},
+{0x1.aefffee510478p-1, 0x1.a063ee00edea3p-57},
+{0x1.b0fffdb650d5bp-1, 0x1.a06c8381f0ab9p-58},
+{0x1.b2ffffeaaca57p-1, -0x1.9011e74233c1dp-56},
+{0x1.b4fffd995badcp-1, -0x1.9ff1068862a9fp-56},
+{0x1.b7000249e659cp-1, 0x1.aff45d0864f3ep-55},
+{0x1.b8ffff987164p-1, 0x1.cfe7796c2c3f9p-56},
+{0x1.bafffd204cb4fp-1, -0x1.3ff27eef22bc4p-57},
+{0x1.bcfffd2415c45p-1, -0x1.cffb7ee3bea21p-57},
+{0x1.beffff86309dfp-1, -0x1.14103972e0b5cp-55},
+{0x1.c0fffe1b57653p-1, 0x1.bc16494b76a19p-55},
+{0x1.c2ffff1fa57e3p-1, -0x1.4feef8d30c6edp-57},
+{0x1.c4fffdcbfe424p-1, -0x1.43f68bcec4775p-55},
+{0x1.c6fffed54b9f7p-1, 0x1.47ea3f053e0ecp-55},
+{0x1.c8fffeb998fd5p-1, 0x1.383068df992f1p-56},
+{0x1.cb0002125219ap-1, -0x1.8fd8e64180e04p-57},
+{0x1.ccfffdd94469cp-1, 0x1.e7ebe1cc7ea72p-55},
+{0x1.cefffeafdc476p-1, 0x1.ebe39ad9f88fep-55},
+{0x1.d1000169af82bp-1, 0x1.57d91a8b95a71p-56},
+{0x1.d30000d0ff71dp-1, 0x1.9c1906970c7dap-55},
+{0x1.d4fffea790fc4p-1, -0x1.80e37c558fe0cp-58},
+{0x1.d70002edc87e5p-1, -0x1.f80d64dc10f44p-56},
+{0x1.d900021dc82aap-1, -0x1.47c8f94fd5c5cp-56},
+{0x1.dafffd86b0283p-1, 0x1.c7f1dc521617ep-55},
+{0x1.dd000296c4739p-1, 0x1.8019eb2ffb153p-55},
+{0x1.defffe54490f5p-1, 0x1.e00d2c652cc89p-57},
+{0x1.e0fffcdabf694p-1, -0x1.f8340202d69d2p-56},
+{0x1.e2fffdb52c8ddp-1, 0x1.b00c1ca1b0864p-56},
+{0x1.e4ffff24216efp-1, 0x1.2ffa8b094ab51p-56},
+{0x1.e6fffe88a5e11p-1, -0x1.7f673b1efbe59p-58},
+{0x1.e9000119eff0dp-1, -0x1.4808d5e0bc801p-55},
+{0x1.eafffdfa51744p-1, 0x1.80006d54320b5p-56},
+{0x1.ed0001a127fa1p-1, -0x1.002f860565c92p-58},
+{0x1.ef00007babcc4p-1, -0x1.540445d35e611p-55},
+{0x1.f0ffff57a8d02p-1, -0x1.ffb3139ef9105p-59},
+{0x1.f30001ee58ac7p-1, 0x1.a81acf2731155p-55},
+{0x1.f4ffff5823494p-1, 0x1.a3f41d4d7c743p-55},
+{0x1.f6ffffca94c6bp-1, -0x1.202f41c987875p-57},
+{0x1.f8fffe1f9c441p-1, 0x1.77dd1f477e74bp-56},
+{0x1.fafffd2e0e37ep-1, -0x1.f01199a7ca331p-57},
+{0x1.fd0001c77e49ep-1, 0x1.181ee4bceacb1p-56},
+{0x1.feffff7e0c331p-1, -0x1.e05370170875ap-57},
+{0x1.00ffff465606ep+0, -0x1.a7ead491c0adap-55},
+{0x1.02ffff3867a58p+0, -0x1.77f69c3fcb2ep-54},
+{0x1.04ffffdfc0d17p+0, 0x1.7bffe34cb945bp-54},
+{0x1.0700003cd4d82p+0, 0x1.20083c0e456cbp-55},
+{0x1.08ffff9f2cbe8p+0, -0x1.dffdfbe37751ap-57},
+{0x1.0b000010cda65p+0, -0x1.13f7faee626ebp-54},
+{0x1.0d00001a4d338p+0, 0x1.07dfa79489ff7p-55},
+{0x1.0effffadafdfdp+0, -0x1.7040570d66bcp-56},
+{0x1.110000bbafd96p+0, 0x1.e80d4846d0b62p-55},
+{0x1.12ffffae5f45dp+0, 0x1.dbffa64fd36efp-54},
+{0x1.150000dd59ad9p+0, 0x1.a0077701250aep-54},
+{0x1.170000f21559ap+0, 0x1.dfdf9e2e3deeep-55},
+{0x1.18ffffc275426p+0, 0x1.10030dc3b7273p-54},
+{0x1.1b000123d3c59p+0, 0x1.97f7980030188p-54},
+{0x1.1cffff8299eb7p+0, -0x1.5f932ab9f8c67p-57},
+{0x1.1effff48ad4p+0, 0x1.37fbf9da75bebp-54},
+{0x1.210000c8b86a4p+0, 0x1.f806b91fd5b22p-54},
+{0x1.2300003854303p+0, 0x1.3ffc2eb9fbf33p-54},
+{0x1.24fffffbcf684p+0, 0x1.601e77e2e2e72p-56},
+{0x1.26ffff52921d9p+0, 0x1.ffcbb767f0c61p-56},
+{0x1.2900014933a3cp+0, -0x1.202ca3c02412bp-56},
+{0x1.2b00014556313p+0, -0x1.2808233f21f02p-54},
+{0x1.2cfffebfe523bp+0, -0x1.8ff7e384fdcf2p-55},
+{0x1.2f0000bb8ad96p+0, -0x1.5ff51503041c5p-55},
+{0x1.30ffffb7ae2afp+0, -0x1.10071885e289dp-55},
+{0x1.32ffffeac5f7fp+0, -0x1.1ff5d3fb7b715p-54},
+{0x1.350000ca66756p+0, 0x1.57f82228b82bdp-54},
+{0x1.3700011fbf721p+0, 0x1.000bac40dd5ccp-55},
+{0x1.38ffff9592fb9p+0, -0x1.43f9d2db2a751p-54},
+{0x1.3b00004ddd242p+0, 0x1.57f6b707638e1p-55},
+{0x1.3cffff5b2c957p+0, 0x1.a023a10bf1231p-56},
+{0x1.3efffeab0b418p+0, 0x1.87f6d66b152bp-54},
+{0x1.410001532aff4p+0, 0x1.7f8375f198524p-57},
+{0x1.4300017478b29p+0, 0x1.301e672dc5143p-55},
+{0x1.44fffe795b463p+0, 0x1.9ff69b8b2895ap-55},
+{0x1.46fffe80475ep+0, -0x1.5c0b19bc2f254p-54},
+{0x1.48fffef6fc1e7p+0, 0x1.b4009f23a2a72p-54},
+{0x1.4afffe5bea704p+0, -0x1.4ffb7bf0d7d45p-54},
+{0x1.4d000171027dep+0, -0x1.9c06471dc6a3dp-54},
+{0x1.4f0000ff03ee2p+0, 0x1.77f890b85531cp-54},
+{0x1.5100012dc4bd1p+0, 0x1.004657166a436p-57},
+{0x1.530001605277ap+0, -0x1.6bfcece233209p-54},
+{0x1.54fffecdb704cp+0, -0x1.902720505a1d7p-55},
+{0x1.56fffef5f54a9p+0, 0x1.bbfe60ec96412p-54},
+{0x1.5900017e61012p+0, 0x1.87ec581afef9p-55},
+{0x1.5b00003c93e92p+0, -0x1.f41080abf0ccp-54},
+{0x1.5d0001d4919bcp+0, -0x1.8812afb254729p-54},
+{0x1.5efffe7b87a89p+0, -0x1.47eb780ed6904p-54},
+#endif
+},
+#endif /* !HAVE_FAST_FMA */
+};
@@ -0,0 +1,79 @@
+/*
+ * Single-precision log function.
+ *
+ * Copyright (c) 2017-2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include <math.h>
+#include <stdint.h>
+#include "math_config.h"
+
+/*
+LOGF_TABLE_BITS = 4
+LOGF_POLY_ORDER = 4
+
+ULP error: 0.818 (nearest rounding.)
+Relative error: 1.957 * 2^-26 (before rounding.)
+*/
+
+#define T __logf_data.tab
+#define A __logf_data.poly
+#define Ln2 __logf_data.ln2
+#define N (1 << LOGF_TABLE_BITS)
+#define OFF 0x3f330000
+
+float
+logf (float x)
+{
+  /* double_t for better performance on targets with FLT_EVAL_METHOD==2.  */
+  double_t z, r, r2, y, y0, invc, logc;
+  uint32_t ix, iz, tmp;
+  int k, i;
+
+  ix = asuint (x);
+#if WANT_ROUNDING
+  /* Fix sign of zero with downward rounding when x==1.  */
+  if (unlikely (ix == 0x3f800000))
+    return 0;
+#endif
+  if (unlikely (ix - 0x00800000 >= 0x7f800000 - 0x00800000))
+    {
+      /* x < 0x1p-126 or inf or nan.  */
+      if (ix * 2 == 0)
+	return __math_divzerof (1);
+      if (ix == 0x7f800000) /* log(inf) == inf.  */
+	return x;
+      if ((ix & 0x80000000) || ix * 2 >= 0xff000000)
+	return __math_invalidf (x);
+      /* x is subnormal, normalize it.  */
+      ix = asuint (x * 0x1p23f);
+      ix -= 23 << 23;
+    }
+
+  /* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
+     The range is split into N subintervals.
+     The ith subinterval contains z and c is near its center.  */
+  tmp = ix - OFF;
+  i = (tmp >> (23 - LOGF_TABLE_BITS)) % N;
+  k = (int32_t) tmp >> 23; /* arithmetic shift */
+  iz = ix - (tmp & 0x1ff << 23);
+  invc = T[i].invc;
+  logc = T[i].logc;
+  z = (double_t) asfloat (iz);
+
+  /* log(x) = log1p(z/c-1) + log(c) + k*Ln2 */
+  r = z * invc - 1;
+  y0 = logc + (double_t) k * Ln2;
+
+  /* Pipelined polynomial evaluation to approximate log1p(r).  */
+  r2 = r * r;
+  y = A[1] * r + A[2];
+  y = A[0] * r2 + y;
+  y = y * r2 + (y0 + r);
+  return eval_as_float (y);
+}
+#if USE_GLIBC_ABI
+strong_alias (logf, __logf_finite)
+hidden_alias (logf, __ieee754_logf)
+#endif
@@ -0,0 +1,33 @@
+/*
+ * Data definition for logf.
+ *
+ * Copyright (c) 2017-2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "math_config.h"
+
+const struct logf_data __logf_data = {
+  .tab = {
+  { 0x1.661ec79f8f3bep+0, -0x1.57bf7808caadep-2 },
+  { 0x1.571ed4aaf883dp+0, -0x1.2bef0a7c06ddbp-2 },
+  { 0x1.49539f0f010bp+0, -0x1.01eae7f513a67p-2 },
+  { 0x1.3c995b0b80385p+0, -0x1.b31d8a68224e9p-3 },
+  { 0x1.30d190c8864a5p+0, -0x1.6574f0ac07758p-3 },
+  { 0x1.25e227b0b8eap+0, -0x1.1aa2bc79c81p-3 },
+  { 0x1.1bb4a4a1a343fp+0, -0x1.a4e76ce8c0e5ep-4 },
+  { 0x1.12358f08ae5bap+0, -0x1.1973c5a611cccp-4 },
+  { 0x1.0953f419900a7p+0, -0x1.252f438e10c1ep-5 },
+  { 0x1p+0, 0x0p+0 },
+  { 0x1.e608cfd9a47acp-1, 0x1.aa5aa5df25984p-5 },
+  { 0x1.ca4b31f026aap-1, 0x1.c5e53aa362eb4p-4 },
+  { 0x1.b2036576afce6p-1, 0x1.526e57720db08p-3 },
+  { 0x1.9c2d163a1aa2dp-1, 0x1.bc2860d22477p-3 },
+  { 0x1.886e6037841edp-1, 0x1.1058bc8a07ee1p-2 },
+  { 0x1.767dcf5534862p-1, 0x1.4043057b6ee09p-2 },
+  },
+  .ln2 = 0x1.62e42fefa39efp-1,
+  .poly = {
+  -0x1.00ea348b88334p-2, 0x1.5575b0be00b6ap-2, -0x1.ffffef20a4123p-2,
+  }
+};
@@ -0,0 +1,419 @@
+/*
+ * Configuration for math routines.
+ *
+ * Copyright (c) 2017-2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#ifndef _MATH_CONFIG_H
+#define _MATH_CONFIG_H
+
+#include <math.h>
+#include <stdint.h>
+
+#ifndef WANT_ROUNDING
+/* If defined to 1, return correct results for special cases in non-nearest
+   rounding modes (logf (1.0f) returns 0.0f with FE_DOWNWARD rather than -0.0f).
+   This may be set to 0 if there is no fenv support or if math functions only
+   get called in round to nearest mode.  */
+# define WANT_ROUNDING 1
+#endif
+#ifndef WANT_ERRNO
+/* If defined to 1, set errno in math functions according to ISO C.  Many math
+   libraries do not set errno, so this is 0 by default.  It may need to be
+   set to 1 if math.h has (math_errhandling & MATH_ERRNO) != 0.  */
+# define WANT_ERRNO 0
+#endif
+#ifndef WANT_ERRNO_UFLOW
+/* Set errno to ERANGE if result underflows to 0 (in all rounding modes).  */
+# define WANT_ERRNO_UFLOW (WANT_ROUNDING && WANT_ERRNO)
+#endif
+
+/* Compiler can inline round as a single instruction.  */
+#ifndef HAVE_FAST_ROUND
+# if __aarch64__
+#   define HAVE_FAST_ROUND 1
+# else
+#   define HAVE_FAST_ROUND 0
+# endif
+#endif
+
+/* Compiler can inline lround, but not (long)round(x).  */
+#ifndef HAVE_FAST_LROUND
+# if __aarch64__ && (100*__GNUC__ + __GNUC_MINOR__) >= 408 && __NO_MATH_ERRNO__
+#   define HAVE_FAST_LROUND 1
+# else
+#   define HAVE_FAST_LROUND 0
+# endif
+#endif
+
+/* Compiler can inline fma as a single instruction.  */
+#ifndef HAVE_FAST_FMA
+# if defined FP_FAST_FMA || __aarch64__
+#   define HAVE_FAST_FMA 1
+# else
+#   define HAVE_FAST_FMA 0
+# endif
+#endif
+
+/* Provide *_finite symbols and some of the glibc hidden symbols
+   so libmathlib can be used with binaries compiled against glibc
+   to interpose math functions with both static and dynamic linking.  */
+#ifndef USE_GLIBC_ABI
+# if __GNUC__
+#   define USE_GLIBC_ABI 1
+# else
+#   define USE_GLIBC_ABI 0
+# endif
+#endif
+
+/* Optionally used extensions.  */
+#ifdef __GNUC__
+# define HIDDEN __attribute__ ((__visibility__ ("hidden")))
+# define NOINLINE __attribute__ ((noinline))
+# define UNUSED __attribute__ ((unused))
+# define likely(x) __builtin_expect (!!(x), 1)
+# define unlikely(x) __builtin_expect (x, 0)
+# if __GNUC__ >= 9
+#   define attribute_copy(f) __attribute__ ((copy (f)))
+# else
+#   define attribute_copy(f)
+# endif
+# define strong_alias(f, a) \
+  extern __typeof (f) a __attribute__ ((alias (#f))) attribute_copy (f);
+# define hidden_alias(f, a) \
+  extern __typeof (f) a __attribute__ ((alias (#f), visibility ("hidden"))) \
+  attribute_copy (f);
+#else
+# define HIDDEN
+# define NOINLINE
+# define UNUSED
+# define likely(x) (x)
+# define unlikely(x) (x)
+#endif
+
+#if HAVE_FAST_ROUND
+/* When set, the roundtoint and converttoint functions are provided with
+   the semantics documented below.  */
+# define TOINT_INTRINSICS 1
+
+/* Round x to nearest int in all rounding modes, ties have to be rounded
+   consistently with converttoint so the results match.  If the result
+   would be outside of [-2^31, 2^31-1] then the semantics is unspecified.  */
+static inline double_t
+roundtoint (double_t x)
+{
+  return round (x);
+}
+
+/* Convert x to nearest int in all rounding modes, ties have to be rounded
+   consistently with roundtoint.  If the result is not representible in an
+   int32_t then the semantics is unspecified.  */
+static inline int32_t
+converttoint (double_t x)
+{
+# if HAVE_FAST_LROUND
+  return lround (x);
+# else
+  return (long) round (x);
+# endif
+}
+#endif
+
+static inline uint32_t
+asuint (float f)
+{
+  union
+  {
+    float f;
+    uint32_t i;
+  } u = {f};
+  return u.i;
+}
+
+static inline float
+asfloat (uint32_t i)
+{
+  union
+  {
+    uint32_t i;
+    float f;
+  } u = {i};
+  return u.f;
+}
+
+static inline uint64_t
+asuint64 (double f)
+{
+  union
+  {
+    double f;
+    uint64_t i;
+  } u = {f};
+  return u.i;
+}
+
+static inline double
+asdouble (uint64_t i)
+{
+  union
+  {
+    uint64_t i;
+    double f;
+  } u = {i};
+  return u.f;
+}
+
+#ifndef IEEE_754_2008_SNAN
+# define IEEE_754_2008_SNAN 1
+#endif
+static inline int
+issignalingf_inline (float x)
+{
+  uint32_t ix = asuint (x);
+  if (!IEEE_754_2008_SNAN)
+    return (ix & 0x7fc00000) == 0x7fc00000;
+  return 2 * (ix ^ 0x00400000) > 2u * 0x7fc00000;
+}
+
+static inline int
+issignaling_inline (double x)
+{
+  uint64_t ix = asuint64 (x);
+  if (!IEEE_754_2008_SNAN)
+    return (ix & 0x7ff8000000000000) == 0x7ff8000000000000;
+  return 2 * (ix ^ 0x0008000000000000) > 2 * 0x7ff8000000000000ULL;
+}
+
+#if __aarch64__ && __GNUC__
+/* Prevent the optimization of a floating-point expression.  */
+static inline float
+opt_barrier_float (float x)
+{
+  __asm__ __volatile__ ("" : "+w" (x));
+  return x;
+}
+static inline double
+opt_barrier_double (double x)
+{
+  __asm__ __volatile__ ("" : "+w" (x));
+  return x;
+}
+/* Force the evaluation of a floating-point expression for its side-effect.  */
+static inline void
+force_eval_float (float x)
+{
+  __asm__ __volatile__ ("" : "+w" (x));
+}
+static inline void
+force_eval_double (double x)
+{
+  __asm__ __volatile__ ("" : "+w" (x));
+}
+#else
+static inline float
+opt_barrier_float (float x)
+{
+  volatile float y = x;
+  return y;
+}
+static inline double
+opt_barrier_double (double x)
+{
+  volatile double y = x;
+  return y;
+}
+static inline void
+force_eval_float (float x)
+{
+  volatile float y UNUSED = x;
+}
+static inline void
+force_eval_double (double x)
+{
+  volatile double y UNUSED = x;
+}
+#endif
+
+/* Evaluate an expression as the specified type, normally a type
+   cast should be enough, but compilers implement non-standard
+   excess-precision handling, so when FLT_EVAL_METHOD != 0 then
+   these functions may need to be customized.  */
+static inline float
+eval_as_float (float x)
+{
+  return x;
+}
+static inline double
+eval_as_double (double x)
+{
+  return x;
+}
+
+/* Error handling tail calls for special cases, with a sign argument.
+   The sign of the return value is set if the argument is non-zero.  */
+
+/* The result overflows.  */
+HIDDEN float __math_oflowf (uint32_t);
+/* The result underflows to 0 in nearest rounding mode.  */
+HIDDEN float __math_uflowf (uint32_t);
+/* The result underflows to 0 in some directed rounding mode only.  */
+HIDDEN float __math_may_uflowf (uint32_t);
+/* Division by zero.  */
+HIDDEN float __math_divzerof (uint32_t);
+/* The result overflows.  */
+HIDDEN double __math_oflow (uint32_t);
+/* The result underflows to 0 in nearest rounding mode.  */
+HIDDEN double __math_uflow (uint32_t);
+/* The result underflows to 0 in some directed rounding mode only.  */
+HIDDEN double __math_may_uflow (uint32_t);
+/* Division by zero.  */
+HIDDEN double __math_divzero (uint32_t);
+
+/* Error handling using input checking.  */
+
+/* Invalid input unless it is a quiet NaN.  */
+HIDDEN float __math_invalidf (float);
+/* Invalid input unless it is a quiet NaN.  */
+HIDDEN double __math_invalid (double);
+
+/* Error handling using output checking, only for errno setting.  */
+
+/* Check if the result overflowed to infinity.  */
+HIDDEN double __math_check_oflow (double);
+/* Check if the result underflowed to 0.  */
+HIDDEN double __math_check_uflow (double);
+
+/* Check if the result overflowed to infinity.  */
+static inline double
+check_oflow (double x)
+{
+  return WANT_ERRNO ? __math_check_oflow (x) : x;
+}
+
+/* Check if the result underflowed to 0.  */
+static inline double
+check_uflow (double x)
+{
+  return WANT_ERRNO ? __math_check_uflow (x) : x;
+}
+
+
+/* Shared between expf, exp2f and powf.  */
+#define EXP2F_TABLE_BITS 5
+#define EXP2F_POLY_ORDER 3
+extern const struct exp2f_data
+{
+  uint64_t tab[1 << EXP2F_TABLE_BITS];
+  double shift_scaled;
+  double poly[EXP2F_POLY_ORDER];
+  double shift;
+  double invln2_scaled;
+  double poly_scaled[EXP2F_POLY_ORDER];
+} __exp2f_data HIDDEN;
+
+#define LOGF_TABLE_BITS 4
+#define LOGF_POLY_ORDER 4
+extern const struct logf_data
+{
+  struct
+  {
+    double invc, logc;
+  } tab[1 << LOGF_TABLE_BITS];
+  double ln2;
+  double poly[LOGF_POLY_ORDER - 1]; /* First order coefficient is 1.  */
+} __logf_data HIDDEN;
+
+#define LOG2F_TABLE_BITS 4
+#define LOG2F_POLY_ORDER 4
+extern const struct log2f_data
+{
+  struct
+  {
+    double invc, logc;
+  } tab[1 << LOG2F_TABLE_BITS];
+  double poly[LOG2F_POLY_ORDER];
+} __log2f_data HIDDEN;
+
+#define POWF_LOG2_TABLE_BITS 4
+#define POWF_LOG2_POLY_ORDER 5
+#if TOINT_INTRINSICS
+# define POWF_SCALE_BITS EXP2F_TABLE_BITS
+#else
+# define POWF_SCALE_BITS 0
+#endif
+#define POWF_SCALE ((double) (1 << POWF_SCALE_BITS))
+extern const struct powf_log2_data
+{
+  struct
+  {
+    double invc, logc;
+  } tab[1 << POWF_LOG2_TABLE_BITS];
+  double poly[POWF_LOG2_POLY_ORDER];
+} __powf_log2_data HIDDEN;
+
+
+#define EXP_TABLE_BITS 7
+#define EXP_POLY_ORDER 5
+/* Use polynomial that is optimized for a wider input range.  This may be
+   needed for good precision in non-nearest rounding and !TOINT_INTRINSICS.  */
+#define EXP_POLY_WIDE 0
+/* Use close to nearest rounding toint when !TOINT_INTRINSICS.  This may be
+   needed for good precision in non-nearest rouning and !EXP_POLY_WIDE.  */
+#define EXP_USE_TOINT_NARROW 0
+#define EXP2_POLY_ORDER 5
+#define EXP2_POLY_WIDE 0
+extern const struct exp_data
+{
+  double invln2N;
+  double shift;
+  double negln2hiN;
+  double negln2loN;
+  double poly[4]; /* Last four coefficients.  */
+  double exp2_shift;
+  double exp2_poly[EXP2_POLY_ORDER];
+  uint64_t tab[2*(1 << EXP_TABLE_BITS)];
+} __exp_data HIDDEN;
+
+#define LOG_TABLE_BITS 7
+#define LOG_POLY_ORDER 6
+#define LOG_POLY1_ORDER 12
+extern const struct log_data
+{
+  double ln2hi;
+  double ln2lo;
+  double poly[LOG_POLY_ORDER - 1]; /* First coefficient is 1.  */
+  double poly1[LOG_POLY1_ORDER - 1];
+  struct {double invc, logc;} tab[1 << LOG_TABLE_BITS];
+#if !HAVE_FAST_FMA
+  struct {double chi, clo;} tab2[1 << LOG_TABLE_BITS];
+#endif
+} __log_data HIDDEN;
+
+#define LOG2_TABLE_BITS 6
+#define LOG2_POLY_ORDER 7
+#define LOG2_POLY1_ORDER 11
+extern const struct log2_data
+{
+  double invln2hi;
+  double invln2lo;
+  double poly[LOG2_POLY_ORDER - 1];
+  double poly1[LOG2_POLY1_ORDER - 1];
+  struct {double invc, logc;} tab[1 << LOG2_TABLE_BITS];
+#if !HAVE_FAST_FMA
+  struct {double chi, clo;} tab2[1 << LOG2_TABLE_BITS];
+#endif
+} __log2_data HIDDEN;
+
+#define POW_LOG_TABLE_BITS 7
+#define POW_LOG_POLY_ORDER 8
+extern const struct pow_log_data
+{
+  double ln2hi;
+  double ln2lo;
+  double poly[POW_LOG_POLY_ORDER - 1]; /* First coefficient is 1.  */
+  /* Note: the pad field is unused, but allows slightly faster indexing.  */
+  struct {double invc, pad, logc, logctail;} tab[1 << POW_LOG_TABLE_BITS];
+} __pow_log_data HIDDEN;
+
+#endif
@@ -0,0 +1,80 @@
+/*
+ * Double-precision math error handling.
+ *
+ * Copyright (c) 2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "math_config.h"
+
+#if WANT_ERRNO
+#include <errno.h>
+/* NOINLINE reduces code size and avoids making math functions non-leaf
+   when the error handling is inlined.  */
+NOINLINE static double
+with_errno (double y, int e)
+{
+  errno = e;
+  return y;
+}
+#else
+#define with_errno(x, e) (x)
+#endif
+
+/* NOINLINE reduces code size.  */
+NOINLINE static double
+xflow (uint32_t sign, double y)
+{
+  y = eval_as_double (opt_barrier_double (sign ? -y : y) * y);
+  return with_errno (y, ERANGE);
+}
+
+HIDDEN double
+__math_uflow (uint32_t sign)
+{
+  return xflow (sign, 0x1p-767);
+}
+
+#if WANT_ERRNO_UFLOW
+/* Underflows to zero in some non-nearest rounding mode, setting errno
+   is valid even if the result is non-zero, but in the subnormal range.  */
+HIDDEN double
+__math_may_uflow (uint32_t sign)
+{
+  return xflow (sign, 0x1.8p-538);
+}
+#endif
+
+HIDDEN double
+__math_oflow (uint32_t sign)
+{
+  return xflow (sign, 0x1p769);
+}
+
+HIDDEN double
+__math_divzero (uint32_t sign)
+{
+  double y = opt_barrier_double (sign ? -1.0 : 1.0) / 0.0;
+  return with_errno (y, ERANGE);
+}
+
+HIDDEN double
+__math_invalid (double x)
+{
+  double y = (x - x) / (x - x);
+  return isnan (x) ? y : with_errno (y, EDOM);
+}
+
+/* Check result and set errno if necessary.  */
+
+HIDDEN double
+__math_check_uflow (double y)
+{
+  return y == 0.0 ? with_errno (y, ERANGE) : y;
+}
+
+HIDDEN double
+__math_check_oflow (double y)
+{
+  return isinf (y) ? with_errno (y, ERANGE) : y;
+}
@@ -0,0 +1,66 @@
+/*
+ * Single-precision math error handling.
+ *
+ * Copyright (c) 2017-2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "math_config.h"
+
+#if WANT_ERRNO
+#include <errno.h>
+/* NOINLINE reduces code size and avoids making math functions non-leaf
+   when the error handling is inlined.  */
+NOINLINE static float
+with_errnof (float y, int e)
+{
+  errno = e;
+  return y;
+}
+#else
+#define with_errnof(x, e) (x)
+#endif
+
+/* NOINLINE reduces code size.  */
+NOINLINE static float
+xflowf (uint32_t sign, float y)
+{
+  y = eval_as_float (opt_barrier_float (sign ? -y : y) * y);
+  return with_errnof (y, ERANGE);
+}
+
+HIDDEN float
+__math_uflowf (uint32_t sign)
+{
+  return xflowf (sign, 0x1p-95f);
+}
+
+#if WANT_ERRNO_UFLOW
+/* Underflows to zero in some non-nearest rounding mode, setting errno
+   is valid even if the result is non-zero, but in the subnormal range.  */
+HIDDEN float
+__math_may_uflowf (uint32_t sign)
+{
+  return xflowf (sign, 0x1.4p-75f);
+}
+#endif
+
+HIDDEN float
+__math_oflowf (uint32_t sign)
+{
+  return xflowf (sign, 0x1p97f);
+}
+
+HIDDEN float
+__math_divzerof (uint32_t sign)
+{
+  float y = opt_barrier_float (sign ? -1.0f : 1.0f) / 0.0f;
+  return with_errnof (y, ERANGE);
+}
+
+HIDDEN float
+__math_invalidf (float x)
+{
+  float y = (x - x) / (x - x);
+  return isnan (x) ? y : with_errnof (y, EDOM);
+}
@@ -0,0 +1,380 @@
+/*
+ * Double-precision x^y function.
+ *
+ * Copyright (c) 2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include <float.h>
+#include <math.h>
+#include <stdint.h>
+#include "math_config.h"
+
+/*
+Worst-case error: 0.54 ULP (~= ulperr_exp + 1024*Ln2*relerr_log*2^53)
+relerr_log: 1.3 * 2^-68 (Relative error of log, 1.5 * 2^-68 without fma)
+ulperr_exp: 0.509 ULP (ULP error of exp, 0.511 ULP without fma)
+*/
+
+#define T __pow_log_data.tab
+#define A __pow_log_data.poly
+#define Ln2hi __pow_log_data.ln2hi
+#define Ln2lo __pow_log_data.ln2lo
+#define N (1 << POW_LOG_TABLE_BITS)
+#define OFF 0x3fe6955500000000
+
+/* Top 12 bits of a double (sign and exponent bits).  */
+static inline uint32_t
+top12 (double x)
+{
+  return asuint64 (x) >> 52;
+}
+
+/* Compute y+TAIL = log(x) where the rounded result is y and TAIL has about
+   additional 15 bits precision.  IX is the bit representation of x, but
+   normalized in the subnormal range using the sign bit for the exponent.  */
+static inline double_t
+log_inline (uint64_t ix, double_t *tail)
+{
+  /* double_t for better performance on targets with FLT_EVAL_METHOD==2.  */
+  double_t z, r, y, invc, logc, logctail, kd, hi, t1, t2, lo, lo1, lo2, p;
+  uint64_t iz, tmp;
+  int k, i;
+
+  /* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
+     The range is split into N subintervals.
+     The ith subinterval contains z and c is near its center.  */
+  tmp = ix - OFF;
+  i = (tmp >> (52 - POW_LOG_TABLE_BITS)) % N;
+  k = (int64_t) tmp >> 52; /* arithmetic shift */
+  iz = ix - (tmp & 0xfffULL << 52);
+  z = asdouble (iz);
+  kd = (double_t) k;
+
+  /* log(x) = k*Ln2 + log(c) + log1p(z/c-1).  */
+  invc = T[i].invc;
+  logc = T[i].logc;
+  logctail = T[i].logctail;
+
+  /* Note: 1/c is j/N or j/N/2 where j is an integer in [N,2N) and
+     |z/c - 1| < 1/N, so r = z/c - 1 is exactly representible.  */
+#if HAVE_FAST_FMA
+  r = fma (z, invc, -1.0);
+#else
+  /* Split z such that rhi, rlo and rhi*rhi are exact and |rlo| <= |r|.  */
+  double_t zhi = asdouble ((iz + (1ULL << 31)) & (-1ULL << 32));
+  double_t zlo = z - zhi;
+  double_t rhi = zhi * invc - 1.0;
+  double_t rlo = zlo * invc;
+  r = rhi + rlo;
+#endif
+
+  /* k*Ln2 + log(c) + r.  */
+  t1 = kd * Ln2hi + logc;
+  t2 = t1 + r;
+  lo1 = kd * Ln2lo + logctail;
+  lo2 = t1 - t2 + r;
+
+  /* Evaluation is optimized assuming superscalar pipelined execution.  */
+  double_t ar, ar2, ar3, lo3, lo4;
+  ar = A[0] * r; /* A[0] = -0.5.  */
+  ar2 = r * ar;
+  ar3 = r * ar2;
+  /* k*Ln2 + log(c) + r + A[0]*r*r.  */
+#if HAVE_FAST_FMA
+  hi = t2 + ar2;
+  lo3 = fma (ar, r, -ar2);
+  lo4 = t2 - hi + ar2;
+#else
+  double_t arhi = A[0] * rhi;
+  double_t arhi2 = rhi * arhi;
+  hi = t2 + arhi2;
+  lo3 = rlo * (ar + arhi);
+  lo4 = t2 - hi + arhi2;
+#endif
+  /* p = log1p(r) - r - A[0]*r*r.  */
+#if POW_LOG_POLY_ORDER == 8
+  p = (ar3
+       * (A[1] + r * A[2] + ar2 * (A[3] + r * A[4] + ar2 * (A[5] + r * A[6]))));
+#endif
+  lo = lo1 + lo2 + lo3 + lo4 + p;
+  y = hi + lo;
+  *tail = hi - y + lo;
+  return y;
+}
+
+#undef N
+#undef T
+#define N (1 << EXP_TABLE_BITS)
+#define InvLn2N __exp_data.invln2N
+#define NegLn2hiN __exp_data.negln2hiN
+#define NegLn2loN __exp_data.negln2loN
+#define Shift __exp_data.shift
+#define T __exp_data.tab
+#define C2 __exp_data.poly[5 - EXP_POLY_ORDER]
+#define C3 __exp_data.poly[6 - EXP_POLY_ORDER]
+#define C4 __exp_data.poly[7 - EXP_POLY_ORDER]
+#define C5 __exp_data.poly[8 - EXP_POLY_ORDER]
+#define C6 __exp_data.poly[9 - EXP_POLY_ORDER]
+
+/* Handle cases that may overflow or underflow when computing the result that
+   is scale*(1+TMP) without intermediate rounding.  The bit representation of
+   scale is in SBITS, however it has a computed exponent that may have
+   overflown into the sign bit so that needs to be adjusted before using it as
+   a double.  (int32_t)KI is the k used in the argument reduction and exponent
+   adjustment of scale, positive k here means the result may overflow and
+   negative k means the result may underflow.  */
+static inline double
+specialcase (double_t tmp, uint64_t sbits, uint64_t ki)
+{
+  double_t scale, y;
+
+  if ((ki & 0x80000000) == 0)
+    {
+      /* k > 0, the exponent of scale might have overflowed by <= 460.  */
+      sbits -= 1009ull << 52;
+      scale = asdouble (sbits);
+      y = 0x1p1009 * (scale + scale * tmp);
+      return check_oflow (eval_as_double (y));
+    }
+  /* k < 0, need special care in the subnormal range.  */
+  sbits += 1022ull << 52;
+  /* Note: sbits is signed scale.  */
+  scale = asdouble (sbits);
+  y = scale + scale * tmp;
+  if (fabs (y) < 1.0)
+    {
+      /* Round y to the right precision before scaling it into the subnormal
+	 range to avoid double rounding that can cause 0.5+E/2 ulp error where
+	 E is the worst-case ulp error outside the subnormal range.  So this
+	 is only useful if the goal is better than 1 ulp worst-case error.  */
+      double_t hi, lo, one = 1.0;
+      if (y < 0.0)
+	one = -1.0;
+      lo = scale - y + scale * tmp;
+      hi = one + y;
+      lo = one - hi + y + lo;
+      y = eval_as_double (hi + lo) - one;
+      /* Fix the sign of 0.  */
+      if (y == 0.0)
+	y = asdouble (sbits & 0x8000000000000000);
+      /* The underflow exception needs to be signaled explicitly.  */
+      force_eval_double (opt_barrier_double (0x1p-1022) * 0x1p-1022);
+    }
+  y = 0x1p-1022 * y;
+  return check_uflow (eval_as_double (y));
+}
+
+#define SIGN_BIAS (0x800 << EXP_TABLE_BITS)
+
+/* Computes sign*exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.
+   The sign_bias argument is SIGN_BIAS or 0 and sets the sign to -1 or 1.  */
+static inline double
+exp_inline (double_t x, double_t xtail, uint32_t sign_bias)
+{
+  uint32_t abstop;
+  uint64_t ki, idx, top, sbits;
+  /* double_t for better performance on targets with FLT_EVAL_METHOD==2.  */
+  double_t kd, z, r, r2, scale, tail, tmp;
+
+  abstop = top12 (x) & 0x7ff;
+  if (unlikely (abstop - top12 (0x1p-54) >= top12 (512.0) - top12 (0x1p-54)))
+    {
+      if (abstop - top12 (0x1p-54) >= 0x80000000)
+	{
+	  /* Avoid spurious underflow for tiny x.  */
+	  /* Note: 0 is common input.  */
+	  double_t one = WANT_ROUNDING ? 1.0 + x : 1.0;
+	  return sign_bias ? -one : one;
+	}
+      if (abstop >= top12 (1024.0))
+	{
+	  /* Note: inf and nan are already handled.  */
+	  if (asuint64 (x) >> 63)
+	    return __math_uflow (sign_bias);
+	  else
+	    return __math_oflow (sign_bias);
+	}
+      /* Large x is special cased below.  */
+      abstop = 0;
+    }
+
+  /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)].  */
+  /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N].  */
+  z = InvLn2N * x;
+#if TOINT_INTRINSICS
+  kd = roundtoint (z);
+  ki = converttoint (z);
+#elif EXP_USE_TOINT_NARROW
+  /* z - kd is in [-0.5-2^-16, 0.5] in all rounding modes.  */
+  kd = eval_as_double (z + Shift);
+  ki = asuint64 (kd) >> 16;
+  kd = (double_t) (int32_t) ki;
+#else
+  /* z - kd is in [-1, 1] in non-nearest rounding modes.  */
+  kd = eval_as_double (z + Shift);
+  ki = asuint64 (kd);
+  kd -= Shift;
+#endif
+  r = x + kd * NegLn2hiN + kd * NegLn2loN;
+  /* The code assumes 2^-200 < |xtail| < 2^-8/N.  */
+  r += xtail;
+  /* 2^(k/N) ~= scale * (1 + tail).  */
+  idx = 2 * (ki % N);
+  top = (ki + sign_bias) << (52 - EXP_TABLE_BITS);
+  tail = asdouble (T[idx]);
+  /* This is only a valid scale when -1023*N < k < 1024*N.  */
+  sbits = T[idx + 1] + top;
+  /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1).  */
+  /* Evaluation is optimized assuming superscalar pipelined execution.  */
+  r2 = r * r;
+  /* Without fma the worst case error is 0.25/N ulp larger.  */
+  /* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp.  */
+#if EXP_POLY_ORDER == 4
+  tmp = tail + r + r2 * C2 + r * r2 * (C3 + r * C4);
+#elif EXP_POLY_ORDER == 5
+  tmp = tail + r + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
+#elif EXP_POLY_ORDER == 6
+  tmp = tail + r + r2 * (0.5 + r * C3) + r2 * r2 * (C4 + r * C5 + r2 * C6);
+#endif
+  if (unlikely (abstop == 0))
+    return specialcase (tmp, sbits, ki);
+  scale = asdouble (sbits);
+  /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
+     is no spurious underflow here even without fma.  */
+  return eval_as_double (scale + scale * tmp);
+}
+
+/* Returns 0 if not int, 1 if odd int, 2 if even int.  The argument is
+   the bit representation of a non-zero finite floating-point value.  */
+static inline int
+checkint (uint64_t iy)
+{
+  int e = iy >> 52 & 0x7ff;
+  if (e < 0x3ff)
+    return 0;
+  if (e > 0x3ff + 52)
+    return 2;
+  if (iy & ((1ULL << (0x3ff + 52 - e)) - 1))
+    return 0;
+  if (iy & (1ULL << (0x3ff + 52 - e)))
+    return 1;
+  return 2;
+}
+
+/* Returns 1 if input is the bit representation of 0, infinity or nan.  */
+static inline int
+zeroinfnan (uint64_t i)
+{
+  return 2 * i - 1 >= 2 * asuint64 (INFINITY) - 1;
+}
+
+double
+pow (double x, double y)
+{
+  uint32_t sign_bias = 0;
+  uint64_t ix, iy;
+  uint32_t topx, topy;
+
+  ix = asuint64 (x);
+  iy = asuint64 (y);
+  topx = top12 (x);
+  topy = top12 (y);
+  if (unlikely (topx - 0x001 >= 0x7ff - 0x001
+		|| (topy & 0x7ff) - 0x3be >= 0x43e - 0x3be))
+    {
+      /* Note: if |y| > 1075 * ln2 * 2^53 ~= 0x1.749p62 then pow(x,y) = inf/0
+	 and if |y| < 2^-54 / 1075 ~= 0x1.e7b6p-65 then pow(x,y) = +-1.  */
+      /* Special cases: (x < 0x1p-126 or inf or nan) or
+	 (|y| < 0x1p-65 or |y| >= 0x1p63 or nan).  */
+      if (unlikely (zeroinfnan (iy)))
+	{
+	  if (2 * iy == 0)
+	    return issignaling_inline (x) ? x + y : 1.0;
+	  if (ix == asuint64 (1.0))
+	    return issignaling_inline (y) ? x + y : 1.0;
+	  if (2 * ix > 2 * asuint64 (INFINITY)
+	      || 2 * iy > 2 * asuint64 (INFINITY))
+	    return x + y;
+	  if (2 * ix == 2 * asuint64 (1.0))
+	    return 1.0;
+	  if ((2 * ix < 2 * asuint64 (1.0)) == !(iy >> 63))
+	    return 0.0; /* |x|<1 && y==inf or |x|>1 && y==-inf.  */
+	  return y * y;
+	}
+      if (unlikely (zeroinfnan (ix)))
+	{
+	  double_t x2 = x * x;
+	  if (ix >> 63 && checkint (iy) == 1)
+	    {
+	      x2 = -x2;
+	      sign_bias = 1;
+	    }
+	  if (WANT_ERRNO && 2 * ix == 0 && iy >> 63)
+	    return __math_divzero (sign_bias);
+	  /* Without the barrier some versions of clang hoist the 1/x2 and
+	     thus division by zero exception can be signaled spuriously.  */
+	  return iy >> 63 ? opt_barrier_double (1 / x2) : x2;
+	}
+      /* Here x and y are non-zero finite.  */
+      if (ix >> 63)
+	{
+	  /* Finite x < 0.  */
+	  int yint = checkint (iy);
+	  if (yint == 0)
+	    return __math_invalid (x);
+	  if (yint == 1)
+	    sign_bias = SIGN_BIAS;
+	  ix &= 0x7fffffffffffffff;
+	  topx &= 0x7ff;
+	}
+      if ((topy & 0x7ff) - 0x3be >= 0x43e - 0x3be)
+	{
+	  /* Note: sign_bias == 0 here because y is not odd.  */
+	  if (ix == asuint64 (1.0))
+	    return 1.0;
+	  if ((topy & 0x7ff) < 0x3be)
+	    {
+	      /* |y| < 2^-65, x^y ~= 1 + y*log(x).  */
+	      if (WANT_ROUNDING)
+		return ix > asuint64 (1.0) ? 1.0 + y : 1.0 - y;
+	      else
+		return 1.0;
+	    }
+	  return (ix > asuint64 (1.0)) == (topy < 0x800) ? __math_oflow (0)
+							 : __math_uflow (0);
+	}
+      if (topx == 0)
+	{
+	  /* Normalize subnormal x so exponent becomes negative.  */
+	  /* Without the barrier some versions of clang evalutate the mul
+	     unconditionally causing spurious overflow exceptions.  */
+	  ix = asuint64 (opt_barrier_double (x) * 0x1p52);
+	  ix &= 0x7fffffffffffffff;
+	  ix -= 52ULL << 52;
+	}
+    }
+
+  double_t lo;
+  double_t hi = log_inline (ix, &lo);
+  double_t ehi, elo;
+#if HAVE_FAST_FMA
+  ehi = y * hi;
+  elo = y * lo + fma (y, hi, -ehi);
+#else
+  double_t yhi = asdouble (iy & -1ULL << 27);
+  double_t ylo = y - yhi;
+  double_t lhi = asdouble (asuint64 (hi) & -1ULL << 27);
+  double_t llo = hi - lhi + lo;
+  ehi = yhi * lhi;
+  elo = ylo * lhi + y * llo; /* |elo| < |ehi| * 2^-25.  */
+#endif
+  return exp_inline (ehi, elo, sign_bias);
+}
+#if USE_GLIBC_ABI
+strong_alias (pow, __pow_finite)
+hidden_alias (pow, __ieee754_pow)
+# if LDBL_MANT_DIG == 53
+long double powl (long double x, long double y) { return pow (x, y); }
+# endif
+#endif
@@ -0,0 +1,184 @@
+/*
+ * Data for the log part of pow.
+ *
+ * Copyright (c) 2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "math_config.h"
+
+#define N (1 << POW_LOG_TABLE_BITS)
+
+const struct pow_log_data __pow_log_data = {
+.ln2hi = 0x1.62e42fefa3800p-1,
+.ln2lo = 0x1.ef35793c76730p-45,
+.poly = {
+#if N == 128 && POW_LOG_POLY_ORDER == 8
+// relative error: 0x1.11922ap-70
+// in -0x1.6bp-8 0x1.6bp-8
+// Coefficients are scaled to match the scaling during evaluation.
+-0x1p-1,
+0x1.555555555556p-2 * -2,
+-0x1.0000000000006p-2 * -2,
+0x1.999999959554ep-3 * 4,
+-0x1.555555529a47ap-3 * 4,
+0x1.2495b9b4845e9p-3 * -8,
+-0x1.0002b8b263fc3p-3 * -8,
+#endif
+},
+/* Algorithm:
+
+	x = 2^k z
+	log(x) = k ln2 + log(c) + log(z/c)
+	log(z/c) = poly(z/c - 1)
+
+where z is in [0x1.69555p-1; 0x1.69555p0] which is split into N subintervals
+and z falls into the ith one, then table entries are computed as
+
+	tab[i].invc = 1/c
+	tab[i].logc = round(0x1p43*log(c))/0x1p43
+	tab[i].logctail = (double)(log(c) - logc)
+
+where c is chosen near the center of the subinterval such that 1/c has only a
+few precision bits so z/c - 1 is exactly representible as double:
+
+	1/c = center < 1 ? round(N/center)/N : round(2*N/center)/N/2
+
+Note: |z/c - 1| < 1/N for the chosen c, |log(c) - logc - logctail| < 0x1p-97,
+the last few bits of logc are rounded away so k*ln2hi + logc has no rounding
+error and the interval for z is selected such that near x == 1, where log(x)
+is tiny, large cancellation error is avoided in logc + poly(z/c - 1).  */
+.tab = {
+#if N == 128
+#define A(a, b, c) {a, 0, b, c},
+A(0x1.6a00000000000p+0, -0x1.62c82f2b9c800p-2, 0x1.ab42428375680p-48)
+A(0x1.6800000000000p+0, -0x1.5d1bdbf580800p-2, -0x1.ca508d8e0f720p-46)
+A(0x1.6600000000000p+0, -0x1.5767717455800p-2, -0x1.362a4d5b6506dp-45)
+A(0x1.6400000000000p+0, -0x1.51aad872df800p-2, -0x1.684e49eb067d5p-49)
+A(0x1.6200000000000p+0, -0x1.4be5f95777800p-2, -0x1.41b6993293ee0p-47)
+A(0x1.6000000000000p+0, -0x1.4618bc21c6000p-2, 0x1.3d82f484c84ccp-46)
+A(0x1.5e00000000000p+0, -0x1.404308686a800p-2, 0x1.c42f3ed820b3ap-50)
+A(0x1.5c00000000000p+0, -0x1.3a64c55694800p-2, 0x1.0b1c686519460p-45)
+A(0x1.5a00000000000p+0, -0x1.347dd9a988000p-2, 0x1.5594dd4c58092p-45)
+A(0x1.5800000000000p+0, -0x1.2e8e2bae12000p-2, 0x1.67b1e99b72bd8p-45)
+A(0x1.5600000000000p+0, -0x1.2895a13de8800p-2, 0x1.5ca14b6cfb03fp-46)
+A(0x1.5600000000000p+0, -0x1.2895a13de8800p-2, 0x1.5ca14b6cfb03fp-46)
+A(0x1.5400000000000p+0, -0x1.22941fbcf7800p-2, -0x1.65a242853da76p-46)
+A(0x1.5200000000000p+0, -0x1.1c898c1699800p-2, -0x1.fafbc68e75404p-46)
+A(0x1.5000000000000p+0, -0x1.1675cababa800p-2, 0x1.f1fc63382a8f0p-46)
+A(0x1.4e00000000000p+0, -0x1.1058bf9ae4800p-2, -0x1.6a8c4fd055a66p-45)
+A(0x1.4c00000000000p+0, -0x1.0a324e2739000p-2, -0x1.c6bee7ef4030ep-47)
+A(0x1.4a00000000000p+0, -0x1.0402594b4d000p-2, -0x1.036b89ef42d7fp-48)
+A(0x1.4a00000000000p+0, -0x1.0402594b4d000p-2, -0x1.036b89ef42d7fp-48)
+A(0x1.4800000000000p+0, -0x1.fb9186d5e4000p-3, 0x1.d572aab993c87p-47)
+A(0x1.4600000000000p+0, -0x1.ef0adcbdc6000p-3, 0x1.b26b79c86af24p-45)
+A(0x1.4400000000000p+0, -0x1.e27076e2af000p-3, -0x1.72f4f543fff10p-46)
+A(0x1.4200000000000p+0, -0x1.d5c216b4fc000p-3, 0x1.1ba91bbca681bp-45)
+A(0x1.4000000000000p+0, -0x1.c8ff7c79aa000p-3, 0x1.7794f689f8434p-45)
+A(0x1.4000000000000p+0, -0x1.c8ff7c79aa000p-3, 0x1.7794f689f8434p-45)
+A(0x1.3e00000000000p+0, -0x1.bc286742d9000p-3, 0x1.94eb0318bb78fp-46)
+A(0x1.3c00000000000p+0, -0x1.af3c94e80c000p-3, 0x1.a4e633fcd9066p-52)
+A(0x1.3a00000000000p+0, -0x1.a23bc1fe2b000p-3, -0x1.58c64dc46c1eap-45)
+A(0x1.3a00000000000p+0, -0x1.a23bc1fe2b000p-3, -0x1.58c64dc46c1eap-45)
+A(0x1.3800000000000p+0, -0x1.9525a9cf45000p-3, -0x1.ad1d904c1d4e3p-45)
+A(0x1.3600000000000p+0, -0x1.87fa06520d000p-3, 0x1.bbdbf7fdbfa09p-45)
+A(0x1.3400000000000p+0, -0x1.7ab890210e000p-3, 0x1.bdb9072534a58p-45)
+A(0x1.3400000000000p+0, -0x1.7ab890210e000p-3, 0x1.bdb9072534a58p-45)
+A(0x1.3200000000000p+0, -0x1.6d60fe719d000p-3, -0x1.0e46aa3b2e266p-46)
+A(0x1.3000000000000p+0, -0x1.5ff3070a79000p-3, -0x1.e9e439f105039p-46)
+A(0x1.3000000000000p+0, -0x1.5ff3070a79000p-3, -0x1.e9e439f105039p-46)
+A(0x1.2e00000000000p+0, -0x1.526e5e3a1b000p-3, -0x1.0de8b90075b8fp-45)
+A(0x1.2c00000000000p+0, -0x1.44d2b6ccb8000p-3, 0x1.70cc16135783cp-46)
+A(0x1.2c00000000000p+0, -0x1.44d2b6ccb8000p-3, 0x1.70cc16135783cp-46)
+A(0x1.2a00000000000p+0, -0x1.371fc201e9000p-3, 0x1.178864d27543ap-48)
+A(0x1.2800000000000p+0, -0x1.29552f81ff000p-3, -0x1.48d301771c408p-45)
+A(0x1.2600000000000p+0, -0x1.1b72ad52f6000p-3, -0x1.e80a41811a396p-45)
+A(0x1.2600000000000p+0, -0x1.1b72ad52f6000p-3, -0x1.e80a41811a396p-45)
+A(0x1.2400000000000p+0, -0x1.0d77e7cd09000p-3, 0x1.a699688e85bf4p-47)
+A(0x1.2400000000000p+0, -0x1.0d77e7cd09000p-3, 0x1.a699688e85bf4p-47)
+A(0x1.2200000000000p+0, -0x1.fec9131dbe000p-4, -0x1.575545ca333f2p-45)
+A(0x1.2000000000000p+0, -0x1.e27076e2b0000p-4, 0x1.a342c2af0003cp-45)
+A(0x1.2000000000000p+0, -0x1.e27076e2b0000p-4, 0x1.a342c2af0003cp-45)
+A(0x1.1e00000000000p+0, -0x1.c5e548f5bc000p-4, -0x1.d0c57585fbe06p-46)
+A(0x1.1c00000000000p+0, -0x1.a926d3a4ae000p-4, 0x1.53935e85baac8p-45)
+A(0x1.1c00000000000p+0, -0x1.a926d3a4ae000p-4, 0x1.53935e85baac8p-45)
+A(0x1.1a00000000000p+0, -0x1.8c345d631a000p-4, 0x1.37c294d2f5668p-46)
+A(0x1.1a00000000000p+0, -0x1.8c345d631a000p-4, 0x1.37c294d2f5668p-46)
+A(0x1.1800000000000p+0, -0x1.6f0d28ae56000p-4, -0x1.69737c93373dap-45)
+A(0x1.1600000000000p+0, -0x1.51b073f062000p-4, 0x1.f025b61c65e57p-46)
+A(0x1.1600000000000p+0, -0x1.51b073f062000p-4, 0x1.f025b61c65e57p-46)
+A(0x1.1400000000000p+0, -0x1.341d7961be000p-4, 0x1.c5edaccf913dfp-45)
+A(0x1.1400000000000p+0, -0x1.341d7961be000p-4, 0x1.c5edaccf913dfp-45)
+A(0x1.1200000000000p+0, -0x1.16536eea38000p-4, 0x1.47c5e768fa309p-46)
+A(0x1.1000000000000p+0, -0x1.f0a30c0118000p-5, 0x1.d599e83368e91p-45)
+A(0x1.1000000000000p+0, -0x1.f0a30c0118000p-5, 0x1.d599e83368e91p-45)
+A(0x1.0e00000000000p+0, -0x1.b42dd71198000p-5, 0x1.c827ae5d6704cp-46)
+A(0x1.0e00000000000p+0, -0x1.b42dd71198000p-5, 0x1.c827ae5d6704cp-46)
+A(0x1.0c00000000000p+0, -0x1.77458f632c000p-5, -0x1.cfc4634f2a1eep-45)
+A(0x1.0c00000000000p+0, -0x1.77458f632c000p-5, -0x1.cfc4634f2a1eep-45)
+A(0x1.0a00000000000p+0, -0x1.39e87b9fec000p-5, 0x1.502b7f526feaap-48)
+A(0x1.0a00000000000p+0, -0x1.39e87b9fec000p-5, 0x1.502b7f526feaap-48)
+A(0x1.0800000000000p+0, -0x1.f829b0e780000p-6, -0x1.980267c7e09e4p-45)
+A(0x1.0800000000000p+0, -0x1.f829b0e780000p-6, -0x1.980267c7e09e4p-45)
+A(0x1.0600000000000p+0, -0x1.7b91b07d58000p-6, -0x1.88d5493faa639p-45)
+A(0x1.0400000000000p+0, -0x1.fc0a8b0fc0000p-7, -0x1.f1e7cf6d3a69cp-50)
+A(0x1.0400000000000p+0, -0x1.fc0a8b0fc0000p-7, -0x1.f1e7cf6d3a69cp-50)
+A(0x1.0200000000000p+0, -0x1.fe02a6b100000p-8, -0x1.9e23f0dda40e4p-46)
+A(0x1.0200000000000p+0, -0x1.fe02a6b100000p-8, -0x1.9e23f0dda40e4p-46)
+A(0x1.0000000000000p+0, 0x0.0000000000000p+0, 0x0.0000000000000p+0)
+A(0x1.0000000000000p+0, 0x0.0000000000000p+0, 0x0.0000000000000p+0)
+A(0x1.fc00000000000p-1, 0x1.0101575890000p-7, -0x1.0c76b999d2be8p-46)
+A(0x1.f800000000000p-1, 0x1.0205658938000p-6, -0x1.3dc5b06e2f7d2p-45)
+A(0x1.f400000000000p-1, 0x1.8492528c90000p-6, -0x1.aa0ba325a0c34p-45)
+A(0x1.f000000000000p-1, 0x1.0415d89e74000p-5, 0x1.111c05cf1d753p-47)
+A(0x1.ec00000000000p-1, 0x1.466aed42e0000p-5, -0x1.c167375bdfd28p-45)
+A(0x1.e800000000000p-1, 0x1.894aa149fc000p-5, -0x1.97995d05a267dp-46)
+A(0x1.e400000000000p-1, 0x1.ccb73cdddc000p-5, -0x1.a68f247d82807p-46)
+A(0x1.e200000000000p-1, 0x1.eea31c006c000p-5, -0x1.e113e4fc93b7bp-47)
+A(0x1.de00000000000p-1, 0x1.1973bd1466000p-4, -0x1.5325d560d9e9bp-45)
+A(0x1.da00000000000p-1, 0x1.3bdf5a7d1e000p-4, 0x1.cc85ea5db4ed7p-45)
+A(0x1.d600000000000p-1, 0x1.5e95a4d97a000p-4, -0x1.c69063c5d1d1ep-45)
+A(0x1.d400000000000p-1, 0x1.700d30aeac000p-4, 0x1.c1e8da99ded32p-49)
+A(0x1.d000000000000p-1, 0x1.9335e5d594000p-4, 0x1.3115c3abd47dap-45)
+A(0x1.cc00000000000p-1, 0x1.b6ac88dad6000p-4, -0x1.390802bf768e5p-46)
+A(0x1.ca00000000000p-1, 0x1.c885801bc4000p-4, 0x1.646d1c65aacd3p-45)
+A(0x1.c600000000000p-1, 0x1.ec739830a2000p-4, -0x1.dc068afe645e0p-45)
+A(0x1.c400000000000p-1, 0x1.fe89139dbe000p-4, -0x1.534d64fa10afdp-45)
+A(0x1.c000000000000p-1, 0x1.1178e8227e000p-3, 0x1.1ef78ce2d07f2p-45)
+A(0x1.be00000000000p-1, 0x1.1aa2b7e23f000p-3, 0x1.ca78e44389934p-45)
+A(0x1.ba00000000000p-1, 0x1.2d1610c868000p-3, 0x1.39d6ccb81b4a1p-47)
+A(0x1.b800000000000p-1, 0x1.365fcb0159000p-3, 0x1.62fa8234b7289p-51)
+A(0x1.b400000000000p-1, 0x1.4913d8333b000p-3, 0x1.5837954fdb678p-45)
+A(0x1.b200000000000p-1, 0x1.527e5e4a1b000p-3, 0x1.633e8e5697dc7p-45)
+A(0x1.ae00000000000p-1, 0x1.6574ebe8c1000p-3, 0x1.9cf8b2c3c2e78p-46)
+A(0x1.ac00000000000p-1, 0x1.6f0128b757000p-3, -0x1.5118de59c21e1p-45)
+A(0x1.aa00000000000p-1, 0x1.7898d85445000p-3, -0x1.c661070914305p-46)
+A(0x1.a600000000000p-1, 0x1.8beafeb390000p-3, -0x1.73d54aae92cd1p-47)
+A(0x1.a400000000000p-1, 0x1.95a5adcf70000p-3, 0x1.7f22858a0ff6fp-47)
+A(0x1.a000000000000p-1, 0x1.a93ed3c8ae000p-3, -0x1.8724350562169p-45)
+A(0x1.9e00000000000p-1, 0x1.b31d8575bd000p-3, -0x1.c358d4eace1aap-47)
+A(0x1.9c00000000000p-1, 0x1.bd087383be000p-3, -0x1.d4bc4595412b6p-45)
+A(0x1.9a00000000000p-1, 0x1.c6ffbc6f01000p-3, -0x1.1ec72c5962bd2p-48)
+A(0x1.9600000000000p-1, 0x1.db13db0d49000p-3, -0x1.aff2af715b035p-45)
+A(0x1.9400000000000p-1, 0x1.e530effe71000p-3, 0x1.212276041f430p-51)
+A(0x1.9200000000000p-1, 0x1.ef5ade4dd0000p-3, -0x1.a211565bb8e11p-51)
+A(0x1.9000000000000p-1, 0x1.f991c6cb3b000p-3, 0x1.bcbecca0cdf30p-46)
+A(0x1.8c00000000000p-1, 0x1.07138604d5800p-2, 0x1.89cdb16ed4e91p-48)
+A(0x1.8a00000000000p-1, 0x1.0c42d67616000p-2, 0x1.7188b163ceae9p-45)
+A(0x1.8800000000000p-1, 0x1.1178e8227e800p-2, -0x1.c210e63a5f01cp-45)
+A(0x1.8600000000000p-1, 0x1.16b5ccbacf800p-2, 0x1.b9acdf7a51681p-45)
+A(0x1.8400000000000p-1, 0x1.1bf99635a6800p-2, 0x1.ca6ed5147bdb7p-45)
+A(0x1.8200000000000p-1, 0x1.214456d0eb800p-2, 0x1.a87deba46baeap-47)
+A(0x1.7e00000000000p-1, 0x1.2bef07cdc9000p-2, 0x1.a9cfa4a5004f4p-45)
+A(0x1.7c00000000000p-1, 0x1.314f1e1d36000p-2, -0x1.8e27ad3213cb8p-45)
+A(0x1.7a00000000000p-1, 0x1.36b6776be1000p-2, 0x1.16ecdb0f177c8p-46)
+A(0x1.7800000000000p-1, 0x1.3c25277333000p-2, 0x1.83b54b606bd5cp-46)
+A(0x1.7600000000000p-1, 0x1.419b423d5e800p-2, 0x1.8e436ec90e09dp-47)
+A(0x1.7400000000000p-1, 0x1.4718dc271c800p-2, -0x1.f27ce0967d675p-45)
+A(0x1.7200000000000p-1, 0x1.4c9e09e173000p-2, -0x1.e20891b0ad8a4p-45)
+A(0x1.7000000000000p-1, 0x1.522ae0738a000p-2, 0x1.ebe708164c759p-45)
+A(0x1.6e00000000000p-1, 0x1.57bf753c8d000p-2, 0x1.fadedee5d40efp-46)
+A(0x1.6c00000000000p-1, 0x1.5d5bddf596000p-2, -0x1.a0b2a08a465dcp-47)
+#endif
+},
+};
@@ -0,0 +1,221 @@
+/*
+ * Single-precision pow function.
+ *
+ * Copyright (c) 2017-2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include <math.h>
+#include <stdint.h>
+#include "math_config.h"
+
+/*
+POWF_LOG2_POLY_ORDER = 5
+EXP2F_TABLE_BITS = 5
+
+ULP error: 0.82 (~ 0.5 + relerr*2^24)
+relerr: 1.27 * 2^-26 (Relative error ~= 128*Ln2*relerr_log2 + relerr_exp2)
+relerr_log2: 1.83 * 2^-33 (Relative error of logx.)
+relerr_exp2: 1.69 * 2^-34 (Relative error of exp2(ylogx).)
+*/
+
+#define N (1 << POWF_LOG2_TABLE_BITS)
+#define T __powf_log2_data.tab
+#define A __powf_log2_data.poly
+#define OFF 0x3f330000
+
+/* Subnormal input is normalized so ix has negative biased exponent.
+   Output is multiplied by N (POWF_SCALE) if TOINT_INTRINICS is set.  */
+static inline double_t
+log2_inline (uint32_t ix)
+{
+  /* double_t for better performance on targets with FLT_EVAL_METHOD==2.  */
+  double_t z, r, r2, r4, p, q, y, y0, invc, logc;
+  uint32_t iz, top, tmp;
+  int k, i;
+
+  /* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
+     The range is split into N subintervals.
+     The ith subinterval contains z and c is near its center.  */
+  tmp = ix - OFF;
+  i = (tmp >> (23 - POWF_LOG2_TABLE_BITS)) % N;
+  top = tmp & 0xff800000;
+  iz = ix - top;
+  k = (int32_t) top >> (23 - POWF_SCALE_BITS); /* arithmetic shift */
+  invc = T[i].invc;
+  logc = T[i].logc;
+  z = (double_t) asfloat (iz);
+
+  /* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k */
+  r = z * invc - 1;
+  y0 = logc + (double_t) k;
+
+  /* Pipelined polynomial evaluation to approximate log1p(r)/ln2.  */
+  r2 = r * r;
+  y = A[0] * r + A[1];
+  p = A[2] * r + A[3];
+  r4 = r2 * r2;
+  q = A[4] * r + y0;
+  q = p * r2 + q;
+  y = y * r4 + q;
+  return y;
+}
+
+#undef N
+#undef T
+#define N (1 << EXP2F_TABLE_BITS)
+#define T __exp2f_data.tab
+#define SIGN_BIAS (1 << (EXP2F_TABLE_BITS + 11))
+
+/* The output of log2 and thus the input of exp2 is either scaled by N
+   (in case of fast toint intrinsics) or not.  The unscaled xd must be
+   in [-1021,1023], sign_bias sets the sign of the result.  */
+static inline float
+exp2_inline (double_t xd, uint32_t sign_bias)
+{
+  uint64_t ki, ski, t;
+  /* double_t for better performance on targets with FLT_EVAL_METHOD==2.  */
+  double_t kd, z, r, r2, y, s;
+
+#if TOINT_INTRINSICS
+# define C __exp2f_data.poly_scaled
+  /* N*x = k + r with r in [-1/2, 1/2] */
+  kd = roundtoint (xd); /* k */
+  ki = converttoint (xd);
+#else
+# define C __exp2f_data.poly
+# define SHIFT __exp2f_data.shift_scaled
+  /* x = k/N + r with r in [-1/(2N), 1/(2N)] */
+  kd = eval_as_double (xd + SHIFT);
+  ki = asuint64 (kd);
+  kd -= SHIFT; /* k/N */
+#endif
+  r = xd - kd;
+
+  /* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
+  t = T[ki % N];
+  ski = ki + sign_bias;
+  t += ski << (52 - EXP2F_TABLE_BITS);
+  s = asdouble (t);
+  z = C[0] * r + C[1];
+  r2 = r * r;
+  y = C[2] * r + 1;
+  y = z * r2 + y;
+  y = y * s;
+  return eval_as_float (y);
+}
+
+/* Returns 0 if not int, 1 if odd int, 2 if even int.  The argument is
+   the bit representation of a non-zero finite floating-point value.  */
+static inline int
+checkint (uint32_t iy)
+{
+  int e = iy >> 23 & 0xff;
+  if (e < 0x7f)
+    return 0;
+  if (e > 0x7f + 23)
+    return 2;
+  if (iy & ((1 << (0x7f + 23 - e)) - 1))
+    return 0;
+  if (iy & (1 << (0x7f + 23 - e)))
+    return 1;
+  return 2;
+}
+
+static inline int
+zeroinfnan (uint32_t ix)
+{
+  return 2 * ix - 1 >= 2u * 0x7f800000 - 1;
+}
+
+float
+powf (float x, float y)
+{
+  uint32_t sign_bias = 0;
+  uint32_t ix, iy;
+
+  ix = asuint (x);
+  iy = asuint (y);
+  if (unlikely (ix - 0x00800000 >= 0x7f800000 - 0x00800000 || zeroinfnan (iy)))
+    {
+      /* Either (x < 0x1p-126 or inf or nan) or (y is 0 or inf or nan).  */
+      if (unlikely (zeroinfnan (iy)))
+	{
+	  if (2 * iy == 0)
+	    return issignalingf_inline (x) ? x + y : 1.0f;
+	  if (ix == 0x3f800000)
+	    return issignalingf_inline (y) ? x + y : 1.0f;
+	  if (2 * ix > 2u * 0x7f800000 || 2 * iy > 2u * 0x7f800000)
+	    return x + y;
+	  if (2 * ix == 2 * 0x3f800000)
+	    return 1.0f;
+	  if ((2 * ix < 2 * 0x3f800000) == !(iy & 0x80000000))
+	    return 0.0f; /* |x|<1 && y==inf or |x|>1 && y==-inf.  */
+	  return y * y;
+	}
+      if (unlikely (zeroinfnan (ix)))
+	{
+	  float_t x2 = x * x;
+	  if (ix & 0x80000000 && checkint (iy) == 1)
+	    {
+	      x2 = -x2;
+	      sign_bias = 1;
+	    }
+#if WANT_ERRNO
+	  if (2 * ix == 0 && iy & 0x80000000)
+	    return __math_divzerof (sign_bias);
+#endif
+	  /* Without the barrier some versions of clang hoist the 1/x2 and
+	     thus division by zero exception can be signaled spuriously.  */
+	  return iy & 0x80000000 ? opt_barrier_float (1 / x2) : x2;
+	}
+      /* x and y are non-zero finite.  */
+      if (ix & 0x80000000)
+	{
+	  /* Finite x < 0.  */
+	  int yint = checkint (iy);
+	  if (yint == 0)
+	    return __math_invalidf (x);
+	  if (yint == 1)
+	    sign_bias = SIGN_BIAS;
+	  ix &= 0x7fffffff;
+	}
+      if (ix < 0x00800000)
+	{
+	  /* Normalize subnormal x so exponent becomes negative.  */
+	  ix = asuint (x * 0x1p23f);
+	  ix &= 0x7fffffff;
+	  ix -= 23 << 23;
+	}
+    }
+  double_t logx = log2_inline (ix);
+  double_t ylogx = y * logx; /* Note: cannot overflow, y is single prec.  */
+  if (unlikely ((asuint64 (ylogx) >> 47 & 0xffff)
+		 >= asuint64 (126.0 * POWF_SCALE) >> 47))
+    {
+      /* |y*log(x)| >= 126.  */
+      if (ylogx > 0x1.fffffffd1d571p+6 * POWF_SCALE)
+	/* |x^y| > 0x1.ffffffp127.  */
+	return __math_oflowf (sign_bias);
+      if (WANT_ROUNDING && WANT_ERRNO
+	  && ylogx > 0x1.fffffffa3aae2p+6 * POWF_SCALE)
+	/* |x^y| > 0x1.fffffep127, check if we round away from 0.  */
+	if ((!sign_bias
+	     && eval_as_float (1.0f + opt_barrier_float (0x1p-25f)) != 1.0f)
+	    || (sign_bias
+		&& eval_as_float (-1.0f - opt_barrier_float (0x1p-25f))
+		     != -1.0f))
+	  return __math_oflowf (sign_bias);
+      if (ylogx <= -150.0 * POWF_SCALE)
+	return __math_uflowf (sign_bias);
+#if WANT_ERRNO_UFLOW
+      if (ylogx < -149.0 * POWF_SCALE)
+	return __math_may_uflowf (sign_bias);
+#endif
+    }
+  return exp2_inline (ylogx, sign_bias);
+}
+#if USE_GLIBC_ABI
+strong_alias (powf, __powf_finite)
+hidden_alias (powf, __ieee754_powf)
+#endif
@@ -0,0 +1,34 @@
+/*
+ * Data definition for powf.
+ *
+ * Copyright (c) 2017-2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "math_config.h"
+
+const struct powf_log2_data __powf_log2_data = {
+  .tab = {
+  { 0x1.661ec79f8f3bep+0, -0x1.efec65b963019p-2 * POWF_SCALE },
+  { 0x1.571ed4aaf883dp+0, -0x1.b0b6832d4fca4p-2 * POWF_SCALE },
+  { 0x1.49539f0f010bp+0, -0x1.7418b0a1fb77bp-2 * POWF_SCALE },
+  { 0x1.3c995b0b80385p+0, -0x1.39de91a6dcf7bp-2 * POWF_SCALE },
+  { 0x1.30d190c8864a5p+0, -0x1.01d9bf3f2b631p-2 * POWF_SCALE },
+  { 0x1.25e227b0b8eap+0, -0x1.97c1d1b3b7afp-3 * POWF_SCALE },
+  { 0x1.1bb4a4a1a343fp+0, -0x1.2f9e393af3c9fp-3 * POWF_SCALE },
+  { 0x1.12358f08ae5bap+0, -0x1.960cbbf788d5cp-4 * POWF_SCALE },
+  { 0x1.0953f419900a7p+0, -0x1.a6f9db6475fcep-5 * POWF_SCALE },
+  { 0x1p+0, 0x0p+0 * POWF_SCALE },
+  { 0x1.e608cfd9a47acp-1, 0x1.338ca9f24f53dp-4 * POWF_SCALE },
+  { 0x1.ca4b31f026aap-1, 0x1.476a9543891bap-3 * POWF_SCALE },
+  { 0x1.b2036576afce6p-1, 0x1.e840b4ac4e4d2p-3 * POWF_SCALE },
+  { 0x1.9c2d163a1aa2dp-1, 0x1.40645f0c6651cp-2 * POWF_SCALE },
+  { 0x1.886e6037841edp-1, 0x1.88e9c2c1b9ff8p-2 * POWF_SCALE },
+  { 0x1.767dcf5534862p-1, 0x1.ce0a44eb17bccp-2 * POWF_SCALE },
+  },
+  .poly = {
+  0x1.27616c9496e0bp-2 * POWF_SCALE, -0x1.71969a075c67ap-2 * POWF_SCALE,
+  0x1.ec70a6ca7baddp-2 * POWF_SCALE, -0x1.7154748bef6c8p-1 * POWF_SCALE,
+  0x1.71547652ab82bp0 * POWF_SCALE,
+  }
+};
@@ -0,0 +1,6 @@
+/*
+ * Copyright (c) 2019, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+#define SCALAR 1
+#include "v_cos.c"
@@ -0,0 +1,6 @@
+/*
+ * Copyright (c) 2019, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+#define SCALAR 1
+#include "v_cosf.c"
@@ -0,0 +1,6 @@
+/*
+ * Copyright (c) 2019, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+#define SCALAR 1
+#include "v_exp.c"
@@ -0,0 +1,6 @@
+/*
+ * Copyright (c) 2019, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+#define SCALAR 1
+#include "v_exp2f.c"
@@ -0,0 +1,6 @@
+/*
+ * Copyright (c) 2019, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+#define SCALAR 1
+#include "v_exp2f_1u.c"
@@ -0,0 +1,6 @@
+/*
+ * Copyright (c) 2019, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+#define SCALAR 1
+#include "v_expf.c"
@@ -0,0 +1,6 @@
+/*
+ * Copyright (c) 2019, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+#define SCALAR 1
+#include "v_expf_1u.c"
@@ -0,0 +1,6 @@
+/*
+ * Copyright (c) 2019, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+#define SCALAR 1
+#include "v_log.c"
@@ -0,0 +1,6 @@
+/*
+ * Copyright (c) 2019, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+#define SCALAR 1
+#include "v_logf.c"
@@ -0,0 +1,6 @@
+/*
+ * Copyright (c) 2020, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+#define SCALAR 1
+#include "v_pow.c"
@@ -0,0 +1,6 @@
+/*
+ * Copyright (c) 2019, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+#define SCALAR 1
+#include "v_powf.c"
@@ -0,0 +1,6 @@
+/*
+ * Copyright (c) 2019, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+#define SCALAR 1
+#include "v_sin.c"
@@ -0,0 +1,6 @@
+/*
+ * Copyright (c) 2019, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+#define SCALAR 1
+#include "v_sinf.c"
@@ -0,0 +1,79 @@
+/*
+ * Single-precision sin/cos function.
+ *
+ * Copyright (c) 2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include <stdint.h>
+#include <math.h>
+#include "math_config.h"
+#include "sincosf.h"
+
+/* Fast sincosf implementation.  Worst-case ULP is 0.5607, maximum relative
+   error is 0.5303 * 2^-23.  A single-step range reduction is used for
+   small values.  Large inputs have their range reduced using fast integer
+   arithmetic.  */
+void
+sincosf (float y, float *sinp, float *cosp)
+{
+  double x = y;
+  double s;
+  int n;
+  const sincos_t *p = &__sincosf_table[0];
+
+  if (abstop12 (y) < abstop12 (pio4))
+    {
+      double x2 = x * x;
+
+      if (unlikely (abstop12 (y) < abstop12 (0x1p-12f)))
+	{
+	  if (unlikely (abstop12 (y) < abstop12 (0x1p-126f)))
+	    /* Force underflow for tiny y.  */
+	    force_eval_float (x2);
+	  *sinp = y;
+	  *cosp = 1.0f;
+	  return;
+	}
+
+      sincosf_poly (x, x2, p, 0, sinp, cosp);
+    }
+  else if (abstop12 (y) < abstop12 (120.0f))
+    {
+      x = reduce_fast (x, p, &n);
+
+      /* Setup the signs for sin and cos.  */
+      s = p->sign[n & 3];
+
+      if (n & 2)
+	p = &__sincosf_table[1];
+
+      sincosf_poly (x * s, x * x, p, n, sinp, cosp);
+    }
+  else if (likely (abstop12 (y) < abstop12 (INFINITY)))
+    {
+      uint32_t xi = asuint (y);
+      int sign = xi >> 31;
+
+      x = reduce_large (xi, &n);
+
+      /* Setup signs for sin and cos - include original sign.  */
+      s = p->sign[(n + sign) & 3];
+
+      if ((n + sign) & 2)
+	p = &__sincosf_table[1];
+
+      sincosf_poly (x * s, x * x, p, n, sinp, cosp);
+    }
+  else
+    {
+      /* Return NaN if Inf or NaN for both sin and cos.  */
+      *sinp = *cosp = y - y;
+#if WANT_ERRNO
+      /* Needed to set errno for +-Inf, the add is a hack to work
+	 around a gcc register allocation issue: just passing y
+	 affects code generation in the fast path.  */
+      __math_invalidf (y + y);
+#endif
+    }
+}
@@ -0,0 +1,153 @@
+/*
+ * Header for sinf, cosf and sincosf.
+ *
+ * Copyright (c) 2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include <stdint.h>
+#include <math.h>
+#include "math_config.h"
+
+/* 2PI * 2^-64.  */
+static const double pi63 = 0x1.921FB54442D18p-62;
+/* PI / 4.  */
+static const double pio4 = 0x1.921FB54442D18p-1;
+
+/* The constants and polynomials for sine and cosine.  */
+typedef struct
+{
+  double sign[4];		/* Sign of sine in quadrants 0..3.  */
+  double hpi_inv;		/* 2 / PI ( * 2^24 if !TOINT_INTRINSICS).  */
+  double hpi;			/* PI / 2.  */
+  double c0, c1, c2, c3, c4;	/* Cosine polynomial.  */
+  double s1, s2, s3;		/* Sine polynomial.  */
+} sincos_t;
+
+/* Polynomial data (the cosine polynomial is negated in the 2nd entry).  */
+extern const sincos_t __sincosf_table[2] HIDDEN;
+
+/* Table with 4/PI to 192 bit precision.  */
+extern const uint32_t __inv_pio4[] HIDDEN;
+
+/* Top 12 bits of the float representation with the sign bit cleared.  */
+static inline uint32_t
+abstop12 (float x)
+{
+  return (asuint (x) >> 20) & 0x7ff;
+}
+
+/* Compute the sine and cosine of inputs X and X2 (X squared), using the
+   polynomial P and store the results in SINP and COSP.  N is the quadrant,
+   if odd the cosine and sine polynomials are swapped.  */
+static inline void
+sincosf_poly (double x, double x2, const sincos_t *p, int n, float *sinp,
+	      float *cosp)
+{
+  double x3, x4, x5, x6, s, c, c1, c2, s1;
+
+  x4 = x2 * x2;
+  x3 = x2 * x;
+  c2 = p->c3 + x2 * p->c4;
+  s1 = p->s2 + x2 * p->s3;
+
+  /* Swap sin/cos result based on quadrant.  */
+  float *tmp = (n & 1 ? cosp : sinp);
+  cosp = (n & 1 ? sinp : cosp);
+  sinp = tmp;
+
+  c1 = p->c0 + x2 * p->c1;
+  x5 = x3 * x2;
+  x6 = x4 * x2;
+
+  s = x + x3 * p->s1;
+  c = c1 + x4 * p->c2;
+
+  *sinp = s + x5 * s1;
+  *cosp = c + x6 * c2;
+}
+
+/* Return the sine of inputs X and X2 (X squared) using the polynomial P.
+   N is the quadrant, and if odd the cosine polynomial is used.  */
+static inline float
+sinf_poly (double x, double x2, const sincos_t *p, int n)
+{
+  double x3, x4, x6, x7, s, c, c1, c2, s1;
+
+  if ((n & 1) == 0)
+    {
+      x3 = x * x2;
+      s1 = p->s2 + x2 * p->s3;
+
+      x7 = x3 * x2;
+      s = x + x3 * p->s1;
+
+      return s + x7 * s1;
+    }
+  else
+    {
+      x4 = x2 * x2;
+      c2 = p->c3 + x2 * p->c4;
+      c1 = p->c0 + x2 * p->c1;
+
+      x6 = x4 * x2;
+      c = c1 + x4 * p->c2;
+
+      return c + x6 * c2;
+    }
+}
+
+/* Fast range reduction using single multiply-subtract.  Return the modulo of
+   X as a value between -PI/4 and PI/4 and store the quadrant in NP.
+   The values for PI/2 and 2/PI are accessed via P.  Since PI/2 as a double
+   is accurate to 55 bits and the worst-case cancellation happens at 6 * PI/4,
+   the result is accurate for |X| <= 120.0.  */
+static inline double
+reduce_fast (double x, const sincos_t *p, int *np)
+{
+  double r;
+#if TOINT_INTRINSICS
+  /* Use fast round and lround instructions when available.  */
+  r = x * p->hpi_inv;
+  *np = converttoint (r);
+  return x - roundtoint (r) * p->hpi;
+#else
+  /* Use scaled float to int conversion with explicit rounding.
+     hpi_inv is prescaled by 2^24 so the quadrant ends up in bits 24..31.
+     This avoids inaccuracies introduced by truncating negative values.  */
+  r = x * p->hpi_inv;
+  int n = ((int32_t)r + 0x800000) >> 24;
+  *np = n;
+  return x - n * p->hpi;
+#endif
+}
+
+/* Reduce the range of XI to a multiple of PI/2 using fast integer arithmetic.
+   XI is a reinterpreted float and must be >= 2.0f (the sign bit is ignored).
+   Return the modulo between -PI/4 and PI/4 and store the quadrant in NP.
+   Reduction uses a table of 4/PI with 192 bits of precision.  A 32x96->128 bit
+   multiply computes the exact 2.62-bit fixed-point modulo.  Since the result
+   can have at most 29 leading zeros after the binary point, the double
+   precision result is accurate to 33 bits.  */
+static inline double
+reduce_large (uint32_t xi, int *np)
+{
+  const uint32_t *arr = &__inv_pio4[(xi >> 26) & 15];
+  int shift = (xi >> 23) & 7;
+  uint64_t n, res0, res1, res2;
+
+  xi = (xi & 0xffffff) | 0x800000;
+  xi <<= shift;
+
+  res0 = xi * arr[0];
+  res1 = (uint64_t)xi * arr[4];
+  res2 = (uint64_t)xi * arr[8];
+  res0 = (res2 >> 32) | (res0 << 32);
+  res0 += res1;
+
+  n = (res0 + (1ULL << 61)) >> 62;
+  res0 -= n << 62;
+  double x = (int64_t)res0;
+  *np = n;
+  return x * pi63;
+}
@@ -0,0 +1,63 @@
+/*
+ * Data definition for sinf, cosf and sincosf.
+ *
+ * Copyright (c) 2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include <stdint.h>
+#include <math.h>
+#include "math_config.h"
+#include "sincosf.h"
+
+/* The constants and polynomials for sine and cosine.  The 2nd entry
+   computes -cos (x) rather than cos (x) to get negation for free.  */
+const sincos_t __sincosf_table[2] =
+{
+  {
+    { 1.0, -1.0, -1.0, 1.0 },
+#if TOINT_INTRINSICS
+    0x1.45F306DC9C883p-1,
+#else
+    0x1.45F306DC9C883p+23,
+#endif
+    0x1.921FB54442D18p0,
+    0x1p0,
+    -0x1.ffffffd0c621cp-2,
+    0x1.55553e1068f19p-5,
+    -0x1.6c087e89a359dp-10,
+    0x1.99343027bf8c3p-16,
+    -0x1.555545995a603p-3,
+    0x1.1107605230bc4p-7,
+    -0x1.994eb3774cf24p-13
+  },
+  {
+    { 1.0, -1.0, -1.0, 1.0 },
+#if TOINT_INTRINSICS
+    0x1.45F306DC9C883p-1,
+#else
+    0x1.45F306DC9C883p+23,
+#endif
+    0x1.921FB54442D18p0,
+    -0x1p0,
+    0x1.ffffffd0c621cp-2,
+    -0x1.55553e1068f19p-5,
+    0x1.6c087e89a359dp-10,
+    -0x1.99343027bf8c3p-16,
+    -0x1.555545995a603p-3,
+    0x1.1107605230bc4p-7,
+    -0x1.994eb3774cf24p-13
+  }
+};
+
+/* Table with 4/PI to 192 bit precision.  To avoid unaligned accesses
+   only 8 new bits are added per entry, making the table 4 times larger.  */
+const uint32_t __inv_pio4[24] =
+{
+  0xa2,       0xa2f9,	  0xa2f983,   0xa2f9836e,
+  0xf9836e4e, 0x836e4e44, 0x6e4e4415, 0x4e441529,
+  0x441529fc, 0x1529fc27, 0x29fc2757, 0xfc2757d1,
+  0x2757d1f5, 0x57d1f534, 0xd1f534dd, 0xf534ddc0,
+  0x34ddc0db, 0xddc0db62, 0xc0db6295, 0xdb629599,
+  0x6295993c, 0x95993c43, 0x993c4390, 0x3c439041
+};
@@ -0,0 +1,67 @@
+/*
+ * Single-precision sin function.
+ *
+ * Copyright (c) 2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include <math.h>
+#include "math_config.h"
+#include "sincosf.h"
+
+/* Fast sinf implementation.  Worst-case ULP is 0.5607, maximum relative
+   error is 0.5303 * 2^-23.  A single-step range reduction is used for
+   small values.  Large inputs have their range reduced using fast integer
+   arithmetic.  */
+float
+sinf (float y)
+{
+  double x = y;
+  double s;
+  int n;
+  const sincos_t *p = &__sincosf_table[0];
+
+  if (abstop12 (y) < abstop12 (pio4))
+    {
+      s = x * x;
+
+      if (unlikely (abstop12 (y) < abstop12 (0x1p-12f)))
+	{
+	  if (unlikely (abstop12 (y) < abstop12 (0x1p-126f)))
+	    /* Force underflow for tiny y.  */
+	    force_eval_float (s);
+	  return y;
+	}
+
+      return sinf_poly (x, s, p, 0);
+    }
+  else if (likely (abstop12 (y) < abstop12 (120.0f)))
+    {
+      x = reduce_fast (x, p, &n);
+
+      /* Setup the signs for sin and cos.  */
+      s = p->sign[n & 3];
+
+      if (n & 2)
+	p = &__sincosf_table[1];
+
+      return sinf_poly (x * s, x * x, p, n);
+    }
+  else if (abstop12 (y) < abstop12 (INFINITY))
+    {
+      uint32_t xi = asuint (y);
+      int sign = xi >> 31;
+
+      x = reduce_large (xi, &n);
+
+      /* Setup signs for sin and cos - include original sign.  */
+      s = p->sign[(n + sign) & 3];
+
+      if ((n + sign) & 2)
+	p = &__sincosf_table[1];
+
+      return sinf_poly (x * s, x * x, p, n);
+    }
+  else
+    return __math_invalidf (y);
+}
@@ -0,0 +1,771 @@
+/*
+ * Microbenchmark for math functions.
+ *
+ * Copyright (c) 2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#undef _GNU_SOURCE
+#define _GNU_SOURCE 1
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <time.h>
+#include <math.h>
+#include "mathlib.h"
+
+#ifndef WANT_VMATH
+/* Enable the build of vector math code.  */
+# define WANT_VMATH 1
+#endif
+
+/* Number of measurements, best result is reported.  */
+#define MEASURE 60
+/* Array size.  */
+#define N 8000
+/* Iterations over the array.  */
+#define ITER 125
+
+static double *Trace;
+static size_t trace_size;
+static double A[N];
+static float Af[N];
+static long measurecount = MEASURE;
+static long itercount = ITER;
+
+#if __aarch64__ && WANT_VMATH
+typedef __f64x2_t v_double;
+
+#define v_double_len() 2
+
+static inline v_double
+v_double_load (const double *p)
+{
+  return (v_double){p[0], p[1]};
+}
+
+static inline v_double
+v_double_dup (double x)
+{
+  return (v_double){x, x};
+}
+
+typedef __f32x4_t v_float;
+
+#define v_float_len() 4
+
+static inline v_float
+v_float_load (const float *p)
+{
+  return (v_float){p[0], p[1], p[2], p[3]};
+}
+
+static inline v_float
+v_float_dup (float x)
+{
+  return (v_float){x, x, x, x};
+}
+#else
+/* dummy definitions to make things compile.  */
+typedef double v_double;
+typedef float v_float;
+#define v_double_len(x) 1
+#define v_double_load(x) (x)[0]
+#define v_double_dup(x) (x)
+#define v_float_len(x) 1
+#define v_float_load(x) (x)[0]
+#define v_float_dup(x) (x)
+#endif
+
+static double
+dummy (double x)
+{
+  return x;
+}
+
+static float
+dummyf (float x)
+{
+  return x;
+}
+
+#if WANT_VMATH
+#if __aarch64__
+static v_double
+__v_dummy (v_double x)
+{
+  return x;
+}
+
+static v_float
+__v_dummyf (v_float x)
+{
+  return x;
+}
+
+#ifdef __vpcs
+__vpcs static v_double
+__vn_dummy (v_double x)
+{
+  return x;
+}
+
+__vpcs static v_float
+__vn_dummyf (v_float x)
+{
+  return x;
+}
+
+__vpcs static v_float
+xy__vn_powf (v_float x)
+{
+  return __vn_powf (x, x);
+}
+
+__vpcs static v_float
+xy_Z_powf (v_float x)
+{
+  return _ZGVnN4vv_powf (x, x);
+}
+
+__vpcs static v_double
+xy__vn_pow (v_double x)
+{
+  return __vn_pow (x, x);
+}
+
+__vpcs static v_double
+xy_Z_pow (v_double x)
+{
+  return _ZGVnN2vv_pow (x, x);
+}
+#endif
+
+static v_float
+xy__v_powf (v_float x)
+{
+  return __v_powf (x, x);
+}
+
+static v_double
+xy__v_pow (v_double x)
+{
+  return __v_pow (x, x);
+}
+#endif
+
+static float
+xy__s_powf (float x)
+{
+  return __s_powf (x, x);
+}
+
+static double
+xy__s_pow (double x)
+{
+  return __s_pow (x, x);
+}
+#endif
+
+static double
+xypow (double x)
+{
+  return pow (x, x);
+}
+
+static float
+xypowf (float x)
+{
+  return powf (x, x);
+}
+
+static double
+xpow (double x)
+{
+  return pow (x, 23.4);
+}
+
+static float
+xpowf (float x)
+{
+  return powf (x, 23.4f);
+}
+
+static double
+ypow (double x)
+{
+  return pow (2.34, x);
+}
+
+static float
+ypowf (float x)
+{
+  return powf (2.34f, x);
+}
+
+static float
+sincosf_wrap (float x)
+{
+  float s, c;
+  sincosf (x, &s, &c);
+  return s + c;
+}
+
+static const struct fun
+{
+  const char *name;
+  int prec;
+  int vec;
+  double lo;
+  double hi;
+  union
+  {
+    double (*d) (double);
+    float (*f) (float);
+    v_double (*vd) (v_double);
+    v_float (*vf) (v_float);
+#ifdef __vpcs
+    __vpcs v_double (*vnd) (v_double);
+    __vpcs v_float (*vnf) (v_float);
+#endif
+  } fun;
+} funtab[] = {
+#define D(func, lo, hi) {#func, 'd', 0, lo, hi, {.d = func}},
+#define F(func, lo, hi) {#func, 'f', 0, lo, hi, {.f = func}},
+#define VD(func, lo, hi) {#func, 'd', 'v', lo, hi, {.vd = func}},
+#define VF(func, lo, hi) {#func, 'f', 'v', lo, hi, {.vf = func}},
+#define VND(func, lo, hi) {#func, 'd', 'n', lo, hi, {.vnd = func}},
+#define VNF(func, lo, hi) {#func, 'f', 'n', lo, hi, {.vnf = func}},
+D (dummy, 1.0, 2.0)
+D (exp, -9.9, 9.9)
+D (exp, 0.5, 1.0)
+D (exp2, -9.9, 9.9)
+D (log, 0.01, 11.1)
+D (log, 0.999, 1.001)
+D (log2, 0.01, 11.1)
+D (log2, 0.999, 1.001)
+{"pow", 'd', 0, 0.01, 11.1, {.d = xypow}},
+D (xpow, 0.01, 11.1)
+D (ypow, -9.9, 9.9)
+
+F (dummyf, 1.0, 2.0)
+F (expf, -9.9, 9.9)
+F (exp2f, -9.9, 9.9)
+F (logf, 0.01, 11.1)
+F (log2f, 0.01, 11.1)
+{"powf", 'f', 0, 0.01, 11.1, {.f = xypowf}},
+F (xpowf, 0.01, 11.1)
+F (ypowf, -9.9, 9.9)
+{"sincosf", 'f', 0, 0.1, 0.7, {.f = sincosf_wrap}},
+{"sincosf", 'f', 0, 0.8, 3.1, {.f = sincosf_wrap}},
+{"sincosf", 'f', 0, -3.1, 3.1, {.f = sincosf_wrap}},
+{"sincosf", 'f', 0, 3.3, 33.3, {.f = sincosf_wrap}},
+{"sincosf", 'f', 0, 100, 1000, {.f = sincosf_wrap}},
+{"sincosf", 'f', 0, 1e6, 1e32, {.f = sincosf_wrap}},
+F (sinf, 0.1, 0.7)
+F (sinf, 0.8, 3.1)
+F (sinf, -3.1, 3.1)
+F (sinf, 3.3, 33.3)
+F (sinf, 100, 1000)
+F (sinf, 1e6, 1e32)
+F (cosf, 0.1, 0.7)
+F (cosf, 0.8, 3.1)
+F (cosf, -3.1, 3.1)
+F (cosf, 3.3, 33.3)
+F (cosf, 100, 1000)
+F (cosf, 1e6, 1e32)
+#if WANT_VMATH
+D (__s_sin, -3.1, 3.1)
+D (__s_cos, -3.1, 3.1)
+D (__s_exp, -9.9, 9.9)
+D (__s_log, 0.01, 11.1)
+{"__s_pow", 'd', 0, 0.01, 11.1, {.d = xy__s_pow}},
+F (__s_expf, -9.9, 9.9)
+F (__s_expf_1u, -9.9, 9.9)
+F (__s_exp2f, -9.9, 9.9)
+F (__s_exp2f_1u, -9.9, 9.9)
+F (__s_logf, 0.01, 11.1)
+{"__s_powf", 'f', 0, 0.01, 11.1, {.f = xy__s_powf}},
+F (__s_sinf, -3.1, 3.1)
+F (__s_cosf, -3.1, 3.1)
+#if __aarch64__
+VD (__v_dummy, 1.0, 2.0)
+VD (__v_sin, -3.1, 3.1)
+VD (__v_cos, -3.1, 3.1)
+VD (__v_exp, -9.9, 9.9)
+VD (__v_log, 0.01, 11.1)
+{"__v_pow", 'd', 'v', 0.01, 11.1, {.vd = xy__v_pow}},
+VF (__v_dummyf, 1.0, 2.0)
+VF (__v_expf, -9.9, 9.9)
+VF (__v_expf_1u, -9.9, 9.9)
+VF (__v_exp2f, -9.9, 9.9)
+VF (__v_exp2f_1u, -9.9, 9.9)
+VF (__v_logf, 0.01, 11.1)
+{"__v_powf", 'f', 'v', 0.01, 11.1, {.vf = xy__v_powf}},
+VF (__v_sinf, -3.1, 3.1)
+VF (__v_cosf, -3.1, 3.1)
+#ifdef __vpcs
+VND (__vn_dummy, 1.0, 2.0)
+VND (__vn_exp, -9.9, 9.9)
+VND (_ZGVnN2v_exp, -9.9, 9.9)
+VND (__vn_log, 0.01, 11.1)
+VND (_ZGVnN2v_log, 0.01, 11.1)
+{"__vn_pow", 'd', 'n', 0.01, 11.1, {.vnd = xy__vn_pow}},
+{"_ZGVnN2vv_pow", 'd', 'n', 0.01, 11.1, {.vnd = xy_Z_pow}},
+VND (__vn_sin, -3.1, 3.1)
+VND (_ZGVnN2v_sin, -3.1, 3.1)
+VND (__vn_cos, -3.1, 3.1)
+VND (_ZGVnN2v_cos, -3.1, 3.1)
+VNF (__vn_dummyf, 1.0, 2.0)
+VNF (__vn_expf, -9.9, 9.9)
+VNF (_ZGVnN4v_expf, -9.9, 9.9)
+VNF (__vn_expf_1u, -9.9, 9.9)
+VNF (__vn_exp2f, -9.9, 9.9)
+VNF (_ZGVnN4v_exp2f, -9.9, 9.9)
+VNF (__vn_exp2f_1u, -9.9, 9.9)
+VNF (__vn_logf, 0.01, 11.1)
+VNF (_ZGVnN4v_logf, 0.01, 11.1)
+{"__vn_powf", 'f', 'n', 0.01, 11.1, {.vnf = xy__vn_powf}},
+{"_ZGVnN4vv_powf", 'f', 'n', 0.01, 11.1, {.vnf = xy_Z_powf}},
+VNF (__vn_sinf, -3.1, 3.1)
+VNF (_ZGVnN4v_sinf, -3.1, 3.1)
+VNF (__vn_cosf, -3.1, 3.1)
+VNF (_ZGVnN4v_cosf, -3.1, 3.1)
+#endif
+#endif
+#endif
+{0},
+#undef F
+#undef D
+#undef VF
+#undef VD
+#undef VNF
+#undef VND
+};
+
+static void
+gen_linear (double lo, double hi)
+{
+  for (int i = 0; i < N; i++)
+    A[i] = (lo * (N - i) + hi * i) / N;
+}
+
+static void
+genf_linear (double lo, double hi)
+{
+  for (int i = 0; i < N; i++)
+    Af[i] = (float)(lo * (N - i) + hi * i) / N;
+}
+
+static inline double
+asdouble (uint64_t i)
+{
+  union
+  {
+    uint64_t i;
+    double f;
+  } u = {i};
+  return u.f;
+}
+
+static uint64_t seed = 0x0123456789abcdef;
+
+static double
+frand (double lo, double hi)
+{
+  seed = 6364136223846793005ULL * seed + 1;
+  return lo + (hi - lo) * (asdouble (seed >> 12 | 0x3ffULL << 52) - 1.0);
+}
+
+static void
+gen_rand (double lo, double hi)
+{
+  for (int i = 0; i < N; i++)
+    A[i] = frand (lo, hi);
+}
+
+static void
+genf_rand (double lo, double hi)
+{
+  for (int i = 0; i < N; i++)
+    Af[i] = (float)frand (lo, hi);
+}
+
+static void
+gen_trace (int index)
+{
+  for (int i = 0; i < N; i++)
+    A[i] = Trace[index + i];
+}
+
+static void
+genf_trace (int index)
+{
+  for (int i = 0; i < N; i++)
+    Af[i] = (float)Trace[index + i];
+}
+
+static void
+run_thruput (double f (double))
+{
+  for (int i = 0; i < N; i++)
+    f (A[i]);
+}
+
+static void
+runf_thruput (float f (float))
+{
+  for (int i = 0; i < N; i++)
+    f (Af[i]);
+}
+
+volatile double zero = 0;
+
+static void
+run_latency (double f (double))
+{
+  double z = zero;
+  double prev = z;
+  for (int i = 0; i < N; i++)
+    prev = f (A[i] + prev * z);
+}
+
+static void
+runf_latency (float f (float))
+{
+  float z = (float)zero;
+  float prev = z;
+  for (int i = 0; i < N; i++)
+    prev = f (Af[i] + prev * z);
+}
+
+static void
+run_v_thruput (v_double f (v_double))
+{
+  for (int i = 0; i < N; i += v_double_len ())
+    f (v_double_load (A+i));
+}
+
+static void
+runf_v_thruput (v_float f (v_float))
+{
+  for (int i = 0; i < N; i += v_float_len ())
+    f (v_float_load (Af+i));
+}
+
+static void
+run_v_latency (v_double f (v_double))
+{
+  v_double z = v_double_dup (zero);
+  v_double prev = z;
+  for (int i = 0; i < N; i += v_double_len ())
+    prev = f (v_double_load (A+i) + prev * z);
+}
+
+static void
+runf_v_latency (v_float f (v_float))
+{
+  v_float z = v_float_dup (zero);
+  v_float prev = z;
+  for (int i = 0; i < N; i += v_float_len ())
+    prev = f (v_float_load (Af+i) + prev * z);
+}
+
+#ifdef __vpcs
+static void
+run_vn_thruput (__vpcs v_double f (v_double))
+{
+  for (int i = 0; i < N; i += v_double_len ())
+    f (v_double_load (A+i));
+}
+
+static void
+runf_vn_thruput (__vpcs v_float f (v_float))
+{
+  for (int i = 0; i < N; i += v_float_len ())
+    f (v_float_load (Af+i));
+}
+
+static void
+run_vn_latency (__vpcs v_double f (v_double))
+{
+  v_double z = v_double_dup (zero);
+  v_double prev = z;
+  for (int i = 0; i < N; i += v_double_len ())
+    prev = f (v_double_load (A+i) + prev * z);
+}
+
+static void
+runf_vn_latency (__vpcs v_float f (v_float))
+{
+  v_float z = v_float_dup (zero);
+  v_float prev = z;
+  for (int i = 0; i < N; i += v_float_len ())
+    prev = f (v_float_load (Af+i) + prev * z);
+}
+#endif
+
+static uint64_t
+tic (void)
+{
+  struct timespec ts;
+  if (clock_gettime (CLOCK_REALTIME, &ts))
+    abort ();
+  return ts.tv_sec * 1000000000ULL + ts.tv_nsec;
+}
+
+#define TIMEIT(run, f) do { \
+  dt = -1; \
+  run (f); /* Warm up.  */ \
+  for (int j = 0; j < measurecount; j++) \
+    { \
+      uint64_t t0 = tic (); \
+      for (int i = 0; i < itercount; i++) \
+	run (f); \
+      uint64_t t1 = tic (); \
+      if (t1 - t0 < dt) \
+	dt = t1 - t0; \
+    } \
+} while (0)
+
+static void
+bench1 (const struct fun *f, int type, double lo, double hi)
+{
+  uint64_t dt = 0;
+  uint64_t ns100;
+  const char *s = type == 't' ? "rthruput" : "latency";
+  int vlen = 1;
+
+  if (f->vec && f->prec == 'd')
+    vlen = v_double_len();
+  else if (f->vec && f->prec == 'f')
+    vlen = v_float_len();
+
+  if (f->prec == 'd' && type == 't' && f->vec == 0)
+    TIMEIT (run_thruput, f->fun.d);
+  else if (f->prec == 'd' && type == 'l' && f->vec == 0)
+    TIMEIT (run_latency, f->fun.d);
+  else if (f->prec == 'f' && type == 't' && f->vec == 0)
+    TIMEIT (runf_thruput, f->fun.f);
+  else if (f->prec == 'f' && type == 'l' && f->vec == 0)
+    TIMEIT (runf_latency, f->fun.f);
+  else if (f->prec == 'd' && type == 't' && f->vec == 'v')
+    TIMEIT (run_v_thruput, f->fun.vd);
+  else if (f->prec == 'd' && type == 'l' && f->vec == 'v')
+    TIMEIT (run_v_latency, f->fun.vd);
+  else if (f->prec == 'f' && type == 't' && f->vec == 'v')
+    TIMEIT (runf_v_thruput, f->fun.vf);
+  else if (f->prec == 'f' && type == 'l' && f->vec == 'v')
+    TIMEIT (runf_v_latency, f->fun.vf);
+#ifdef __vpcs
+  else if (f->prec == 'd' && type == 't' && f->vec == 'n')
+    TIMEIT (run_vn_thruput, f->fun.vnd);
+  else if (f->prec == 'd' && type == 'l' && f->vec == 'n')
+    TIMEIT (run_vn_latency, f->fun.vnd);
+  else if (f->prec == 'f' && type == 't' && f->vec == 'n')
+    TIMEIT (runf_vn_thruput, f->fun.vnf);
+  else if (f->prec == 'f' && type == 'l' && f->vec == 'n')
+    TIMEIT (runf_vn_latency, f->fun.vnf);
+#endif
+
+  if (type == 't')
+    {
+      ns100 = (100 * dt + itercount * N / 2) / (itercount * N);
+      printf ("%9s %8s: %4u.%02u ns/elem %10llu ns in [%g %g]\n", f->name, s,
+	      (unsigned) (ns100 / 100), (unsigned) (ns100 % 100),
+	      (unsigned long long) dt, lo, hi);
+    }
+  else if (type == 'l')
+    {
+      ns100 = (100 * dt + itercount * N / vlen / 2) / (itercount * N / vlen);
+      printf ("%9s %8s: %4u.%02u ns/call %10llu ns in [%g %g]\n", f->name, s,
+	      (unsigned) (ns100 / 100), (unsigned) (ns100 % 100),
+	      (unsigned long long) dt, lo, hi);
+    }
+  fflush (stdout);
+}
+
+static void
+bench (const struct fun *f, double lo, double hi, int type, int gen)
+{
+  if (f->prec == 'd' && gen == 'r')
+    gen_rand (lo, hi);
+  else if (f->prec == 'd' && gen == 'l')
+    gen_linear (lo, hi);
+  else if (f->prec == 'd' && gen == 't')
+    gen_trace (0);
+  else if (f->prec == 'f' && gen == 'r')
+    genf_rand (lo, hi);
+  else if (f->prec == 'f' && gen == 'l')
+    genf_linear (lo, hi);
+  else if (f->prec == 'f' && gen == 't')
+    genf_trace (0);
+
+  if (gen == 't')
+    hi = trace_size / N;
+
+  if (type == 'b' || type == 't')
+    bench1 (f, 't', lo, hi);
+
+  if (type == 'b' || type == 'l')
+    bench1 (f, 'l', lo, hi);
+
+  for (int i = N; i < trace_size; i += N)
+    {
+      if (f->prec == 'd')
+	gen_trace (i);
+      else
+	genf_trace (i);
+
+      lo = i / N;
+      if (type == 'b' || type == 't')
+	bench1 (f, 't', lo, hi);
+
+      if (type == 'b' || type == 'l')
+	bench1 (f, 'l', lo, hi);
+    }
+}
+
+static void
+readtrace (const char *name)
+{
+	int n = 0;
+	FILE *f = strcmp (name, "-") == 0 ? stdin : fopen (name, "r");
+	if (!f)
+	  {
+	    printf ("openning \"%s\" failed: %m\n", name);
+	    exit (1);
+	  }
+	for (;;)
+	  {
+	    if (n >= trace_size)
+	      {
+		trace_size += N;
+		Trace = realloc (Trace, trace_size * sizeof (Trace[0]));
+		if (Trace == NULL)
+		  {
+		    printf ("out of memory\n");
+		    exit (1);
+		  }
+	      }
+	    if (fscanf (f, "%lf", Trace + n) != 1)
+	      break;
+	    n++;
+	  }
+	if (ferror (f) || n == 0)
+	  {
+	    printf ("reading \"%s\" failed: %m\n", name);
+	    exit (1);
+	  }
+	fclose (f);
+	if (n % N == 0)
+	  trace_size = n;
+	for (int i = 0; n < trace_size; n++, i++)
+	  Trace[n] = Trace[i];
+}
+
+static void
+usage (void)
+{
+  printf ("usage: ./mathbench [-g rand|linear|trace] [-t latency|thruput|both] "
+	  "[-i low high] [-f tracefile] [-m measurements] [-c iterations] func "
+	  "[func2 ..]\n");
+  printf ("func:\n");
+  printf ("%7s [run all benchmarks]\n", "all");
+  for (const struct fun *f = funtab; f->name; f++)
+    printf ("%7s [low: %g high: %g]\n", f->name, f->lo, f->hi);
+  exit (1);
+}
+
+int
+main (int argc, char *argv[])
+{
+  int usergen = 0, gen = 'r', type = 'b', all = 0;
+  double lo = 0, hi = 0;
+  const char *tracefile = "-";
+
+  argv++;
+  argc--;
+  for (;;)
+    {
+      if (argc <= 0)
+	usage ();
+      if (argv[0][0] != '-')
+	break;
+      else if (argc >= 3 && strcmp (argv[0], "-i") == 0)
+	{
+	  usergen = 1;
+	  lo = strtod (argv[1], 0);
+	  hi = strtod (argv[2], 0);
+	  argv += 3;
+	  argc -= 3;
+	}
+      else if (argc >= 2 && strcmp (argv[0], "-m") == 0)
+	{
+	  measurecount = strtol (argv[1], 0, 0);
+	  argv += 2;
+	  argc -= 2;
+	}
+      else if (argc >= 2 && strcmp (argv[0], "-c") == 0)
+	{
+	  itercount = strtol (argv[1], 0, 0);
+	  argv += 2;
+	  argc -= 2;
+	}
+      else if (argc >= 2 && strcmp (argv[0], "-g") == 0)
+	{
+	  gen = argv[1][0];
+	  if (strchr ("rlt", gen) == 0)
+	    usage ();
+	  argv += 2;
+	  argc -= 2;
+	}
+      else if (argc >= 2 && strcmp (argv[0], "-f") == 0)
+	{
+	  gen = 't';  /* -f implies -g trace.  */
+	  tracefile = argv[1];
+	  argv += 2;
+	  argc -= 2;
+	}
+      else if (argc >= 2 && strcmp (argv[0], "-t") == 0)
+	{
+	  type = argv[1][0];
+	  if (strchr ("ltb", type) == 0)
+	    usage ();
+	  argv += 2;
+	  argc -= 2;
+	}
+      else
+	usage ();
+    }
+  if (gen == 't')
+    {
+      readtrace (tracefile);
+      lo = hi = 0;
+      usergen = 1;
+    }
+  while (argc > 0)
+    {
+      int found = 0;
+      all = strcmp (argv[0], "all") == 0;
+      for (const struct fun *f = funtab; f->name; f++)
+	if (all || strcmp (argv[0], f->name) == 0)
+	  {
+	    found = 1;
+	    if (!usergen)
+	      {
+		lo = f->lo;
+		hi = f->hi;
+	      }
+	    bench (f, lo, hi, type, gen);
+	    if (usergen && !all)
+	      break;
+	  }
+      if (!found)
+	printf ("unknown function: %s\n", argv[0]);
+      argv++;
+      argc--;
+    }
+  return 0;
+}
@@ -0,0 +1,91 @@
+/*
+ * intern.h
+ *
+ * Copyright (c) 1999-2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#ifndef mathtest_intern_h
+#define mathtest_intern_h
+
+#include <mpfr.h>
+#include <mpc.h>
+
+#include "types.h"
+#include "wrappers.h"
+
+/* Generic function pointer. */
+typedef void (*funcptr)(void);
+
+/* Pointers to test function types. */
+typedef int    (*testfunc1)(mpfr_t, mpfr_t, mpfr_rnd_t);
+typedef int    (*testfunc2)(mpfr_t, mpfr_t, mpfr_t, mpfr_rnd_t);
+typedef int    (*testrred)(mpfr_t, mpfr_t, int *);
+typedef char * (*testsemi1)(uint32 *, uint32 *);
+typedef char * (*testsemi2)(uint32 *, uint32 *, uint32 *);
+typedef char * (*testsemi2f)(uint32 *, uint32 *, uint32 *);
+typedef char * (*testldexp)(uint32 *, uint32 *, uint32 *);
+typedef char * (*testfrexp)(uint32 *, uint32 *, uint32 *);
+typedef char * (*testmodf)(uint32 *, uint32 *, uint32 *);
+typedef char * (*testclassify)(uint32 *, uint32 *);
+typedef char * (*testclassifyf)(uint32 *, uint32 *);
+
+typedef int    (*testfunc1c)(mpc_t, mpc_t, mpc_rnd_t);
+typedef int    (*testfunc2c)(mpc_t, mpc_t, mpc_t, mpc_rnd_t);
+
+typedef int    (*testfunc1cr)(mpfr_t, mpc_t, mpfr_rnd_t);
+
+/* Pointer to a function that generates random test cases. */
+typedef void (*casegen)(uint32 *, uint32, uint32);
+
+/*
+ * List of testable functions, their types, and their testable range.
+ */
+enum {
+    args1,                             /* afloat-based, one argument */
+    args1f,                            /* same as args1 but in single prec */
+    args2,                             /* afloat-based, two arguments */
+    args2f,                            /* same as args2 but in single prec */
+    rred,                              /* afloat-based, one arg, aux return */
+    rredf,                             /* same as rred but in single prec */
+    semi1,                             /* seminumerical, one argument */
+    semi1f,                            /* seminumerical, 1 arg, float */
+    semi2,                             /* seminumerical, two arguments */
+    semi2f,                            /* seminumerical, 2 args, floats */
+    t_ldexp,                           /* dbl * int -> dbl */
+    t_ldexpf,                          /* sgl * int -> sgl */
+    t_frexp,                           /* dbl -> dbl * int */
+    t_frexpf,                          /* sgl -> sgl * int */
+    t_modf,                            /* dbl -> dbl * dbl */
+    t_modff,                           /* sgl -> sgl * sgl */
+    classify,                          /* classify double: dbl -> int */
+    classifyf,                         /* classify float: flt -> int */
+    compare,                           /* compare doubles, returns int */
+    comparef,                          /* compare floats, returns int */
+
+    args1c,                            /* acomplex-base, one argument */
+    args2c,
+    args1fc,
+    args2fc,
+    args1cr,                           /* dbl-complex -> complex */
+    args1fcr                           /* sgl-complex -> complex */
+};
+
+typedef struct __testable Testable;
+struct __testable {
+    char *name;
+    funcptr func;
+    int type;
+    wrapperfunc wrappers[MAXWRAPPERS];
+    casegen cases; /* complex functions use the same casegen for both real and complex args */
+    uint32 caseparam1, caseparam2;
+};
+
+extern Testable functions[];
+extern const int nfunctions;
+
+extern void init_pi(void);
+
+int nargs_(Testable* f);
+
+#endif
@@ -0,0 +1,334 @@
+/*
+ * main.c
+ *
+ * Copyright (c) 1999-2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <stdlib.h>
+#include <time.h>
+
+#include "intern.h"
+
+void gencases(Testable *fn, int number);
+void docase(Testable *fn, uint32 *args);
+void vet_for_decline(Testable *fn, uint32 *args, uint32 *result, int got_errno_in);
+void seed_random(uint32 seed);
+
+int check_declines = 0;
+int lib_fo = 0;
+int lib_no_arith = 0;
+int ntests = 0;
+
+int nargs_(Testable* f) {
+    switch((f)->type) {
+    case args2:
+    case args2f:
+    case semi2:
+    case semi2f:
+    case t_ldexp:
+    case t_ldexpf:
+    case args1c:
+    case args1fc:
+    case args1cr:
+    case args1fcr:
+    case compare:
+    case comparef:
+        return 2;
+    case args2c:
+    case args2fc:
+        return 4;
+    default:
+        return 1;
+    }
+}
+
+static int isdouble(Testable *f)
+{
+    switch (f->type) {
+      case args1:
+      case rred:
+      case semi1:
+      case t_frexp:
+      case t_modf:
+      case classify:
+      case t_ldexp:
+      case args2:
+      case semi2:
+      case args1c:
+      case args1cr:
+      case compare:
+      case args2c:
+        return 1;
+      case args1f:
+      case rredf:
+      case semi1f:
+      case t_frexpf:
+      case t_modff:
+      case classifyf:
+      case args2f:
+      case semi2f:
+      case t_ldexpf:
+      case comparef:
+      case args1fc:
+      case args1fcr:
+      case args2fc:
+        return 0;
+      default:
+        assert(0 && "Bad function type");
+    }
+}
+
+Testable *find_function(const char *func)
+{
+    int i;
+    for (i = 0; i < nfunctions; i++) {
+        if (func && !strcmp(func, functions[i].name)) {
+            return &functions[i];
+        }
+    }
+    return NULL;
+}
+
+void get_operand(const char *str, Testable *f, uint32 *word0, uint32 *word1)
+{
+    struct special {
+        unsigned dblword0, dblword1, sglword;
+        const char *name;
+    } specials[] = {
+        {0x00000000,0x00000000,0x00000000,"0"},
+        {0x3FF00000,0x00000000,0x3f800000,"1"},
+        {0x7FF00000,0x00000000,0x7f800000,"inf"},
+        {0x7FF80000,0x00000001,0x7fc00000,"qnan"},
+        {0x7FF00000,0x00000001,0x7f800001,"snan"},
+        {0x3ff921fb,0x54442d18,0x3fc90fdb,"pi2"},
+        {0x400921fb,0x54442d18,0x40490fdb,"pi"},
+        {0x3fe921fb,0x54442d18,0x3f490fdb,"pi4"},
+        {0x4002d97c,0x7f3321d2,0x4016cbe4,"3pi4"},
+    };
+    int i;
+
+    for (i = 0; i < (int)(sizeof(specials)/sizeof(*specials)); i++) {
+        if (!strcmp(str, specials[i].name) ||
+            ((str[0] == '-' || str[0] == '+') &&
+             !strcmp(str+1, specials[i].name))) {
+            assert(f);
+            if (isdouble(f)) {
+                *word0 = specials[i].dblword0;
+                *word1 = specials[i].dblword1;
+            } else {
+                *word0 = specials[i].sglword;
+                *word1 = 0;
+            }
+            if (str[0] == '-')
+                *word0 |= 0x80000000U;
+            return;
+        }
+    }
+
+    sscanf(str, "%"I32"x.%"I32"x", word0, word1);
+}
+
+void dofile(FILE *fp, int translating) {
+    char buf[1024], sparebuf[1024], *p;
+
+    /*
+     * Command syntax is:
+     *
+     *  - "seed <integer>" sets a random seed
+     *
+     *  - "test <function> <ntests>" generates random test lines
+     *
+     *  - "<function> op1=foo [op2=bar]" generates a specific test
+     *  - "func=<function> op1=foo [op2=bar]" does the same
+     *  - "func=<function> op1=foo result=bar" will just output the line as-is
+     *
+     *  - a semicolon or a blank line is ignored
+     */
+    while (fgets(buf, sizeof(buf), fp)) {
+        buf[strcspn(buf, "\r\n")] = '\0';
+        strcpy(sparebuf, buf);
+        p = buf;
+        while (*p && isspace(*p)) p++;
+        if (!*p || *p == ';') {
+            /* Comment or blank line. Only print if `translating' is set. */
+            if (translating)
+                printf("%s\n", buf);
+            continue;
+        }
+        if (!strncmp(buf, "seed ", 5)) {
+            seed_random(atoi(buf+5));
+        } else if (!strncmp(buf, "random=", 7)) {
+            /*
+             * Copy 'random=on' / 'random=off' lines unconditionally
+             * to the output, so that random test failures can be
+             * accumulated into a recent-failures-list file and
+             * still identified as random-in-origin when re-run the
+             * next day.
+             */
+            printf("%s\n", buf);
+        } else if (!strncmp(buf, "test ", 5)) {
+            char *p = buf+5;
+            char *q;
+            int ntests, i;
+            q = p;
+            while (*p && !isspace(*p)) p++;
+            if (*p) *p++ = '\0';
+            while (*p && isspace(*p)) p++;
+            if (*p)
+                ntests = atoi(p);
+            else
+                ntests = 100;          /* *shrug* */
+            for (i = 0; i < nfunctions; i++) {
+                if (!strcmp(q, functions[i].name)) {
+                    gencases(&functions[i], ntests);
+                    break;
+                }
+            }
+            if (i == nfunctions) {
+                fprintf(stderr, "unknown test `%s'\n", q);
+            }
+        } else {
+            /*
+             * Parse a specific test line.
+             */
+            uint32 ops[8], result[8];
+            int got_op = 0; /* &1 for got_op1, &4 for got_op3 etc. */
+            Testable *f = 0;
+            char *q, *r;
+            int got_result = 0, got_errno_in = 0;
+
+            for (q = strtok(p, " \t"); q; q = strtok(NULL, " \t")) {
+                r = strchr(q, '=');
+                if (!r) {
+                    f = find_function(q);
+                } else {
+                    *r++ = '\0';
+
+                    if (!strcmp(q, "func"))
+                        f = find_function(r);
+                    else if (!strcmp(q, "op1") || !strcmp(q, "op1r")) {
+                        get_operand(r, f, &ops[0], &ops[1]);
+                        got_op |= 1;
+                    } else if (!strcmp(q, "op2") || !strcmp(q, "op1i")) {
+                        get_operand(r, f, &ops[2], &ops[3]);
+                        got_op |= 2;
+                    } else if (!strcmp(q, "op2r")) {
+                        get_operand(r, f, &ops[4], &ops[5]);
+                        got_op |= 4;
+                    } else if (!strcmp(q, "op2i")) {
+                        get_operand(r, f, &ops[6], &ops[7]);
+                        got_op |= 8;
+                    } else if (!strcmp(q, "result") || !strcmp(q, "resultr")) {
+                        get_operand(r, f, &result[0], &result[1]);
+                        got_result |= 1;
+                    } else if (!strcmp(q, "resulti")) {
+                        get_operand(r, f, &result[4], &result[5]);
+                        got_result |= 2;
+                    } else if (!strcmp(q, "res2")) {
+                        get_operand(r, f, &result[2], &result[3]);
+                        got_result |= 4;
+                    } else if (!strcmp(q, "errno_in")) {
+                        got_errno_in = 1;
+                    }
+                }
+            }
+
+            /*
+             * Test cases already set up by the input are not
+             * reprocessed by default, unlike the fplib tests. (This
+             * is mostly for historical reasons, because we used to
+             * use a very slow and incomplete internal reference
+             * implementation; now our ref impl is MPFR/MPC it
+             * probably wouldn't be such a bad idea, though we'd still
+             * have to make sure all the special cases came out
+             * right.) If translating==2 (corresponding to the -T
+             * command-line option) then we regenerate everything
+             * regardless.
+             */
+            if (got_result && translating < 2) {
+                if (f)
+                    vet_for_decline(f, ops, result, got_errno_in);
+                puts(sparebuf);
+                continue;
+            }
+
+            if (f && got_op==(1<<nargs_(f))-1) {
+                /*
+                 * And do it!
+                 */
+                docase(f, ops);
+            }
+        }
+    }
+}
+
+int main(int argc, char **argv) {
+    int errs = 0, opts = 1, files = 0, translating = 0;
+    unsigned int seed = 1; /* in case no explicit seed provided */
+
+    seed_random(seed);
+
+    setvbuf(stdout, NULL, _IOLBF, BUFSIZ); /* stops incomplete lines being printed when out of time */
+
+    while (--argc) {
+        FILE *fp;
+        char *p = *++argv;
+
+        if (opts && *p == '-') {
+            if(*(p+1) == 0) { /* single -, read from stdin */
+                break;
+            } else if (!strcmp(p, "-t")) {
+                translating = 1;
+            } else if (!strcmp(p, "-T")) {
+                translating = 2;
+            } else if (!strcmp(p, "-c")) {
+                check_declines = 1;
+            } else if (!strcmp(p, "--")) {
+                opts = 0;
+            } else if (!strcmp(p,"--seed") && argc > 1 && 1==sscanf(*(argv+1),"%u",&seed)) {
+                seed_random(seed);
+                argv++; /* next in argv is seed value, so skip */
+                --argc;
+            } else if (!strcmp(p, "-fo")) {
+                lib_fo = 1;
+            } else if (!strcmp(p, "-noarith")) {
+                lib_no_arith = 1;
+            } else {
+                fprintf(stderr,
+                        "rtest: ignoring unrecognised option '%s'\n", p);
+                errs = 1;
+            }
+        } else {
+            files = 1;
+            if (!errs) {
+                fp = fopen(p, "r");
+                if (fp) {
+                    dofile(fp, translating);
+                    fclose(fp);
+                } else {
+                    perror(p);
+                    errs = 1;
+                }
+            }
+        }
+    }
+
+    /*
+     * If no filename arguments, use stdin.
+     */
+    if (!files && !errs) {
+        dofile(stdin, translating);
+    }
+
+    if (check_declines) {
+        fprintf(stderr, "Tests expected to run: %d\n", ntests);
+        fflush(stderr);
+    }
+
+    return errs;
+}
@@ -0,0 +1,99 @@
+/*
+ * random.c - random number generator for producing mathlib test cases
+ *
+ * Copyright (c) 1998-2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "types.h"
+#include "random.h"
+
+static uint32 seedbuf[55];
+static int seedptr;
+
+void seed_random(uint32 seed) {
+    int i;
+
+    seedptr = 0;
+    for (i = 0; i < 55; i++) {
+        seed = seed % 44488 * 48271 - seed / 44488 * 3399;
+        seedbuf[i] = seed - 1;
+    }
+}
+
+uint32 base_random(void) {
+    seedptr %= 55;
+    seedbuf[seedptr] += seedbuf[(seedptr+31)%55];
+    return seedbuf[seedptr++];
+}
+
+uint32 random32(void) {
+    uint32 a, b, b1, b2;
+    a = base_random();
+    b = base_random();
+    for (b1 = 0x80000000, b2 = 1; b1 > b2; b1 >>= 1, b2 <<= 1) {
+        uint32 b3 = b1 | b2;
+        if ((b & b3) != 0 && (b & b3) != b3)
+            b ^= b3;
+    }
+    return a ^ b;
+}
+
+/*
+ * random_upto: generate a uniformly randomised number in the range
+ * 0,...,limit-1. (Precondition: limit > 0.)
+ *
+ * random_upto_biased: generate a number in the same range, but with
+ * the probability skewed towards the high end by means of taking the
+ * maximum of 8*bias+1 samples from the uniform distribution on the
+ * same range. (I don't know why bias is given in that curious way -
+ * historical reasons, I expect.)
+ *
+ * For speed, I separate the implementation of random_upto into the
+ * two stages of (a) generate a bitmask which reduces a 32-bit random
+ * number to within a factor of two of the right range, (b) repeatedly
+ * generate numbers in that range until one is small enough. Splitting
+ * it up like that means that random_upto_biased can do (a) only once
+ * even when it does (b) lots of times.
+ */
+
+static uint32 random_upto_makemask(uint32 limit) {
+    uint32 mask = 0xFFFFFFFF;
+    int i;
+    for (i = 16; i > 0; i >>= 1)
+        if ((limit & (mask >> i)) == limit)
+            mask >>= i;
+    return mask;
+}
+
+static uint32 random_upto_internal(uint32 limit, uint32 mask) {
+    uint32 ret;
+    do {
+        ret = random32() & mask;
+    } while (ret > limit);
+    return ret;
+}
+
+uint32 random_upto(uint32 limit) {
+    uint32 mask = random_upto_makemask(limit);
+    return random_upto_internal(limit, mask);
+}
+
+uint32 random_upto_biased(uint32 limit, int bias) {
+    uint32 mask = random_upto_makemask(limit);
+
+    uint32 ret = random_upto_internal(limit, mask);
+    while (bias--) {
+        uint32 tmp;
+        tmp = random_upto_internal(limit, mask); if (tmp < ret) ret = tmp;
+        tmp = random_upto_internal(limit, mask); if (tmp < ret) ret = tmp;
+        tmp = random_upto_internal(limit, mask); if (tmp < ret) ret = tmp;
+        tmp = random_upto_internal(limit, mask); if (tmp < ret) ret = tmp;
+        tmp = random_upto_internal(limit, mask); if (tmp < ret) ret = tmp;
+        tmp = random_upto_internal(limit, mask); if (tmp < ret) ret = tmp;
+        tmp = random_upto_internal(limit, mask); if (tmp < ret) ret = tmp;
+        tmp = random_upto_internal(limit, mask); if (tmp < ret) ret = tmp;
+    }
+
+    return ret;
+}
@@ -0,0 +1,12 @@
+/*
+ * random.h - header for random.c
+ *
+ * Copyright (c) 2009-2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "types.h"
+
+uint32 random32(void);
+uint32 random_upto(uint32 limit);
+uint32 random_upto_biased(uint32 limit, int bias);
@@ -0,0 +1,905 @@
+/*
+ * semi.c: test implementations of mathlib seminumerical functions
+ *
+ * Copyright (c) 1999-2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include <stdio.h>
+#include "semi.h"
+
+static void test_rint(uint32 *in, uint32 *out,
+                       int isfloor, int isceil) {
+    int sign = in[0] & 0x80000000;
+    int roundup = (isfloor && sign) || (isceil && !sign);
+    uint32 xh, xl, roundword;
+    int ex = (in[0] >> 20) & 0x7FF;    /* exponent */
+    int i;
+
+    if ((ex > 0x3ff + 52 - 1) ||     /* things this big can't be fractional */
+        ((in[0] & 0x7FFFFFFF) == 0 && in[1] == 0)) {   /* zero */
+        /* NaN, Inf, a large integer, or zero: just return the input */
+        out[0] = in[0];
+        out[1] = in[1];
+        return;
+    }
+
+    /*
+     * Special case: ex < 0x3ff, ie our number is in (0,1). Return
+     * 1 or 0 according to roundup.
+     */
+    if (ex < 0x3ff) {
+        out[0] = sign | (roundup ? 0x3FF00000 : 0);
+        out[1] = 0;
+        return;
+    }
+
+    /*
+     * We're not short of time here, so we'll do this the hideously
+     * inefficient way. Shift bit by bit so that the units place is
+     * somewhere predictable, round, and shift back again.
+     */
+    xh = in[0];
+    xl = in[1];
+    roundword = 0;
+    for (i = ex; i < 0x3ff + 52; i++) {
+        if (roundword & 1)
+            roundword |= 2;            /* preserve sticky bit */
+        roundword = (roundword >> 1) | ((xl & 1) << 31);
+        xl = (xl >> 1) | ((xh & 1) << 31);
+        xh = xh >> 1;
+    }
+    if (roundword && roundup) {
+        xl++;
+        xh += (xl==0);
+    }
+    for (i = ex; i < 0x3ff + 52; i++) {
+        xh = (xh << 1) | ((xl >> 31) & 1);
+        xl = (xl & 0x7FFFFFFF) << 1;
+    }
+    out[0] = xh;
+    out[1] = xl;
+}
+
+char *test_ceil(uint32 *in, uint32 *out) {
+    test_rint(in, out, 0, 1);
+    return NULL;
+}
+
+char *test_floor(uint32 *in, uint32 *out) {
+    test_rint(in, out, 1, 0);
+    return NULL;
+}
+
+static void test_rintf(uint32 *in, uint32 *out,
+                       int isfloor, int isceil) {
+    int sign = *in & 0x80000000;
+    int roundup = (isfloor && sign) || (isceil && !sign);
+    uint32 x, roundword;
+    int ex = (*in >> 23) & 0xFF;       /* exponent */
+    int i;
+
+    if ((ex > 0x7f + 23 - 1) ||      /* things this big can't be fractional */
+        (*in & 0x7FFFFFFF) == 0) {     /* zero */
+        /* NaN, Inf, a large integer, or zero: just return the input */
+        *out = *in;
+        return;
+    }
+
+    /*
+     * Special case: ex < 0x7f, ie our number is in (0,1). Return
+     * 1 or 0 according to roundup.
+     */
+    if (ex < 0x7f) {
+        *out = sign | (roundup ? 0x3F800000 : 0);
+        return;
+    }
+
+    /*
+     * We're not short of time here, so we'll do this the hideously
+     * inefficient way. Shift bit by bit so that the units place is
+     * somewhere predictable, round, and shift back again.
+     */
+    x = *in;
+    roundword = 0;
+    for (i = ex; i < 0x7F + 23; i++) {
+        if (roundword & 1)
+            roundword |= 2;            /* preserve sticky bit */
+        roundword = (roundword >> 1) | ((x & 1) << 31);
+        x = x >> 1;
+    }
+    if (roundword && roundup) {
+        x++;
+    }
+    for (i = ex; i < 0x7F + 23; i++) {
+        x = x << 1;
+    }
+    *out = x;
+}
+
+char *test_ceilf(uint32 *in, uint32 *out) {
+    test_rintf(in, out, 0, 1);
+    return NULL;
+}
+
+char *test_floorf(uint32 *in, uint32 *out) {
+    test_rintf(in, out, 1, 0);
+    return NULL;
+}
+
+char *test_fmod(uint32 *a, uint32 *b, uint32 *out) {
+    int sign;
+    int32 aex, bex;
+    uint32 am[2], bm[2];
+
+    if (((a[0] & 0x7FFFFFFF) << 1) + !!a[1] > 0xFFE00000 ||
+        ((b[0] & 0x7FFFFFFF) << 1) + !!b[1] > 0xFFE00000) {
+        /* a or b is NaN: return QNaN, optionally with IVO */
+        uint32 an, bn;
+        out[0] = 0x7ff80000;
+        out[1] = 1;
+        an = ((a[0] & 0x7FFFFFFF) << 1) + !!a[1];
+        bn = ((b[0] & 0x7FFFFFFF) << 1) + !!b[1];
+        if ((an > 0xFFE00000 && an < 0xFFF00000) ||
+            (bn > 0xFFE00000 && bn < 0xFFF00000))
+            return "i";                /* at least one SNaN: IVO */
+        else
+            return NULL;               /* no SNaNs, but at least 1 QNaN */
+    }
+    if ((b[0] & 0x7FFFFFFF) == 0 && b[1] == 0) {   /* b==0: EDOM */
+        out[0] = 0x7ff80000;
+        out[1] = 1;
+        return "EDOM status=i";
+    }
+    if ((a[0] & 0x7FF00000) == 0x7FF00000) {   /* a==Inf: EDOM */
+        out[0] = 0x7ff80000;
+        out[1] = 1;
+        return "EDOM status=i";
+    }
+    if ((b[0] & 0x7FF00000) == 0x7FF00000) {   /* b==Inf: return a */
+        out[0] = a[0];
+        out[1] = a[1];
+        return NULL;
+    }
+    if ((a[0] & 0x7FFFFFFF) == 0 && a[1] == 0) {   /* a==0: return a */
+        out[0] = a[0];
+        out[1] = a[1];
+        return NULL;
+    }
+
+    /*
+     * OK. That's the special cases cleared out of the way. Now we
+     * have finite (though not necessarily normal) a and b.
+     */
+    sign = a[0] & 0x80000000;          /* we discard sign of b */
+    test_frexp(a, am, (uint32 *)&aex);
+    test_frexp(b, bm, (uint32 *)&bex);
+    am[0] &= 0xFFFFF, am[0] |= 0x100000;
+    bm[0] &= 0xFFFFF, bm[0] |= 0x100000;
+
+    while (aex >= bex) {
+        if (am[0] > bm[0] || (am[0] == bm[0] && am[1] >= bm[1])) {
+            am[1] -= bm[1];
+            am[0] = am[0] - bm[0] - (am[1] > ~bm[1]);
+        }
+        if (aex > bex) {
+            am[0] = (am[0] << 1) | ((am[1] & 0x80000000) >> 31);
+            am[1] <<= 1;
+            aex--;
+        } else
+            break;
+    }
+
+    /*
+     * Renormalise final result; this can be cunningly done by
+     * passing a denormal to ldexp.
+     */
+    aex += 0x3fd;
+    am[0] |= sign;
+    test_ldexp(am, (uint32 *)&aex, out);
+
+    return NULL;                       /* FIXME */
+}
+
+char *test_fmodf(uint32 *a, uint32 *b, uint32 *out) {
+    int sign;
+    int32 aex, bex;
+    uint32 am, bm;
+
+    if ((*a & 0x7FFFFFFF) > 0x7F800000 ||
+        (*b & 0x7FFFFFFF) > 0x7F800000) {
+        /* a or b is NaN: return QNaN, optionally with IVO */
+        uint32 an, bn;
+        *out = 0x7fc00001;
+        an = *a & 0x7FFFFFFF;
+        bn = *b & 0x7FFFFFFF;
+        if ((an > 0x7f800000 && an < 0x7fc00000) ||
+            (bn > 0x7f800000 && bn < 0x7fc00000))
+            return "i";                /* at least one SNaN: IVO */
+        else
+            return NULL;               /* no SNaNs, but at least 1 QNaN */
+    }
+    if ((*b & 0x7FFFFFFF) == 0) {      /* b==0: EDOM */
+        *out = 0x7fc00001;
+        return "EDOM status=i";
+    }
+    if ((*a & 0x7F800000) == 0x7F800000) {   /* a==Inf: EDOM */
+        *out = 0x7fc00001;
+        return "EDOM status=i";
+    }
+    if ((*b & 0x7F800000) == 0x7F800000) {   /* b==Inf: return a */
+        *out = *a;
+        return NULL;
+    }
+    if ((*a & 0x7FFFFFFF) == 0) {      /* a==0: return a */
+        *out = *a;
+        return NULL;
+    }
+
+    /*
+     * OK. That's the special cases cleared out of the way. Now we
+     * have finite (though not necessarily normal) a and b.
+     */
+    sign = a[0] & 0x80000000;          /* we discard sign of b */
+    test_frexpf(a, &am, (uint32 *)&aex);
+    test_frexpf(b, &bm, (uint32 *)&bex);
+    am &= 0x7FFFFF, am |= 0x800000;
+    bm &= 0x7FFFFF, bm |= 0x800000;
+
+    while (aex >= bex) {
+        if (am >= bm) {
+            am -= bm;
+        }
+        if (aex > bex) {
+            am <<= 1;
+            aex--;
+        } else
+            break;
+    }
+
+    /*
+     * Renormalise final result; this can be cunningly done by
+     * passing a denormal to ldexp.
+     */
+    aex += 0x7d;
+    am |= sign;
+    test_ldexpf(&am, (uint32 *)&aex, out);
+
+    return NULL;                       /* FIXME */
+}
+
+char *test_ldexp(uint32 *x, uint32 *np, uint32 *out) {
+    int n = *np;
+    int32 n2;
+    uint32 y[2];
+    int ex = (x[0] >> 20) & 0x7FF;     /* exponent */
+    int sign = x[0] & 0x80000000;
+
+    if (ex == 0x7FF) {                 /* inf/NaN; just return x */
+        out[0] = x[0];
+        out[1] = x[1];
+        return NULL;
+    }
+    if ((x[0] & 0x7FFFFFFF) == 0 && x[1] == 0) {   /* zero: return x */
+        out[0] = x[0];
+        out[1] = x[1];
+        return NULL;
+    }
+
+    test_frexp(x, y, (uint32 *)&n2);
+    ex = n + n2;
+    if (ex > 0x400) {                  /* overflow */
+        out[0] = sign | 0x7FF00000;
+        out[1] = 0;
+        return "overflow";
+    }
+    /*
+     * Underflow. 2^-1074 is 00000000.00000001; so if ex == -1074
+     * then we have something [2^-1075,2^-1074). Under round-to-
+     * nearest-even, this whole interval rounds up to 2^-1074,
+     * except for the bottom endpoint which rounds to even and is
+     * an underflow condition.
+     *
+     * So, ex < -1074 is definite underflow, and ex == -1074 is
+     * underflow iff all mantissa bits are zero.
+     */
+    if (ex < -1074 || (ex == -1074 && (y[0] & 0xFFFFF) == 0 && y[1] == 0)) {
+        out[0] = sign;                 /* underflow: correctly signed zero */
+        out[1] = 0;
+        return "underflow";
+    }
+
+    /*
+     * No overflow or underflow; should be nice and simple, unless
+     * we have to denormalise and round the result.
+     */
+    if (ex < -1021) {                  /* denormalise and round */
+        uint32 roundword;
+        y[0] &= 0x000FFFFF;
+        y[0] |= 0x00100000;            /* set leading bit */
+        roundword = 0;
+        while (ex < -1021) {
+            if (roundword & 1)
+                roundword |= 2;        /* preserve sticky bit */
+            roundword = (roundword >> 1) | ((y[1] & 1) << 31);
+            y[1] = (y[1] >> 1) | ((y[0] & 1) << 31);
+            y[0] = y[0] >> 1;
+            ex++;
+        }
+        if (roundword > 0x80000000 ||  /* round up */
+            (roundword == 0x80000000 && (y[1] & 1))) {  /* round up to even */
+            y[1]++;
+            y[0] += (y[1] == 0);
+        }
+        out[0] = sign | y[0];
+        out[1] = y[1];
+        /* Proper ERANGE underflow was handled earlier, but we still
+         * expect an IEEE Underflow exception if this partially
+         * underflowed result is not exact. */
+        if (roundword)
+            return "u";
+        return NULL;                   /* underflow was handled earlier */
+    } else {
+        out[0] = y[0] + (ex << 20);
+        out[1] = y[1];
+        return NULL;
+    }
+}
+
+char *test_ldexpf(uint32 *x, uint32 *np, uint32 *out) {
+    int n = *np;
+    int32 n2;
+    uint32 y;
+    int ex = (*x >> 23) & 0xFF;     /* exponent */
+    int sign = *x & 0x80000000;
+
+    if (ex == 0xFF) {                 /* inf/NaN; just return x */
+        *out = *x;
+        return NULL;
+    }
+    if ((*x & 0x7FFFFFFF) == 0) {      /* zero: return x */
+        *out = *x;
+        return NULL;
+    }
+
+    test_frexpf(x, &y, (uint32 *)&n2);
+    ex = n + n2;
+    if (ex > 0x80) {                  /* overflow */
+        *out = sign | 0x7F800000;
+        return "overflow";
+    }
+    /*
+     * Underflow. 2^-149 is 00000001; so if ex == -149 then we have
+     * something [2^-150,2^-149). Under round-to- nearest-even,
+     * this whole interval rounds up to 2^-149, except for the
+     * bottom endpoint which rounds to even and is an underflow
+     * condition.
+     *
+     * So, ex < -149 is definite underflow, and ex == -149 is
+     * underflow iff all mantissa bits are zero.
+     */
+    if (ex < -149 || (ex == -149 && (y & 0x7FFFFF) == 0)) {
+        *out = sign;                 /* underflow: correctly signed zero */
+        return "underflow";
+    }
+
+    /*
+     * No overflow or underflow; should be nice and simple, unless
+     * we have to denormalise and round the result.
+     */
+    if (ex < -125) {                  /* denormalise and round */
+        uint32 roundword;
+        y &= 0x007FFFFF;
+        y |= 0x00800000;               /* set leading bit */
+        roundword = 0;
+        while (ex < -125) {
+            if (roundword & 1)
+                roundword |= 2;        /* preserve sticky bit */
+            roundword = (roundword >> 1) | ((y & 1) << 31);
+            y = y >> 1;
+            ex++;
+        }
+        if (roundword > 0x80000000 ||  /* round up */
+            (roundword == 0x80000000 && (y & 1))) {  /* round up to even */
+            y++;
+        }
+        *out = sign | y;
+        /* Proper ERANGE underflow was handled earlier, but we still
+         * expect an IEEE Underflow exception if this partially
+         * underflowed result is not exact. */
+        if (roundword)
+            return "u";
+        return NULL;                   /* underflow was handled earlier */
+    } else {
+        *out = y + (ex << 23);
+        return NULL;
+    }
+}
+
+char *test_frexp(uint32 *x, uint32 *out, uint32 *nout) {
+    int ex = (x[0] >> 20) & 0x7FF;     /* exponent */
+    if (ex == 0x7FF) {                 /* inf/NaN; return x/0 */
+        out[0] = x[0];
+        out[1] = x[1];
+        nout[0] = 0;
+        return NULL;
+    }
+    if (ex == 0) {                     /* denormals/zeros */
+        int sign;
+        uint32 xh, xl;
+        if ((x[0] & 0x7FFFFFFF) == 0 && x[1] == 0) {
+            /* zero: return x/0 */
+            out[0] = x[0];
+            out[1] = x[1];
+            nout[0] = 0;
+            return NULL;
+        }
+        sign = x[0] & 0x80000000;
+        xh = x[0] & 0x7FFFFFFF;
+        xl = x[1];
+        ex = 1;
+        while (!(xh & 0x100000)) {
+            ex--;
+            xh = (xh << 1) | ((xl >> 31) & 1);
+            xl = (xl & 0x7FFFFFFF) << 1;
+        }
+        out[0] = sign | 0x3FE00000 | (xh & 0xFFFFF);
+        out[1] = xl;
+        nout[0] = ex - 0x3FE;
+        return NULL;
+    }
+    out[0] = 0x3FE00000 | (x[0] & 0x800FFFFF);
+    out[1] = x[1];
+    nout[0] = ex - 0x3FE;
+    return NULL;                       /* ordinary number; no error */
+}
+
+char *test_frexpf(uint32 *x, uint32 *out, uint32 *nout) {
+    int ex = (*x >> 23) & 0xFF;        /* exponent */
+    if (ex == 0xFF) {                  /* inf/NaN; return x/0 */
+        *out = *x;
+        nout[0] = 0;
+        return NULL;
+    }
+    if (ex == 0) {                     /* denormals/zeros */
+        int sign;
+        uint32 xv;
+        if ((*x & 0x7FFFFFFF) == 0) {
+            /* zero: return x/0 */
+            *out = *x;
+            nout[0] = 0;
+            return NULL;
+        }
+        sign = *x & 0x80000000;
+        xv = *x & 0x7FFFFFFF;
+        ex = 1;
+        while (!(xv & 0x800000)) {
+            ex--;
+            xv = xv << 1;
+        }
+        *out = sign | 0x3F000000 | (xv & 0x7FFFFF);
+        nout[0] = ex - 0x7E;
+        return NULL;
+    }
+    *out = 0x3F000000 | (*x & 0x807FFFFF);
+    nout[0] = ex - 0x7E;
+    return NULL;                       /* ordinary number; no error */
+}
+
+char *test_modf(uint32 *x, uint32 *fout, uint32 *iout) {
+    int ex = (x[0] >> 20) & 0x7FF;     /* exponent */
+    int sign = x[0] & 0x80000000;
+    uint32 fh, fl;
+
+    if (((x[0] & 0x7FFFFFFF) | (!!x[1])) > 0x7FF00000) {
+        /*
+         * NaN input: return the same in _both_ outputs.
+         */
+        fout[0] = iout[0] = x[0];
+        fout[1] = iout[1] = x[1];
+        return NULL;
+    }
+
+    test_rint(x, iout, 0, 0);
+    fh = x[0] - iout[0];
+    fl = x[1] - iout[1];
+    if (!fh && !fl) {                  /* no fraction part */
+        fout[0] = sign;
+        fout[1] = 0;
+        return NULL;
+    }
+    if (!(iout[0] & 0x7FFFFFFF) && !iout[1]) {   /* no integer part */
+        fout[0] = x[0];
+        fout[1] = x[1];
+        return NULL;
+    }
+    while (!(fh & 0x100000)) {
+        ex--;
+        fh = (fh << 1) | ((fl >> 31) & 1);
+        fl = (fl & 0x7FFFFFFF) << 1;
+    }
+    fout[0] = sign | (ex << 20) | (fh & 0xFFFFF);
+    fout[1] = fl;
+    return NULL;
+}
+
+char *test_modff(uint32 *x, uint32 *fout, uint32 *iout) {
+    int ex = (*x >> 23) & 0xFF;        /* exponent */
+    int sign = *x & 0x80000000;
+    uint32 f;
+
+    if ((*x & 0x7FFFFFFF) > 0x7F800000) {
+        /*
+         * NaN input: return the same in _both_ outputs.
+         */
+        *fout = *iout = *x;
+        return NULL;
+    }
+
+    test_rintf(x, iout, 0, 0);
+    f = *x - *iout;
+    if (!f) {                          /* no fraction part */
+        *fout = sign;
+        return NULL;
+    }
+    if (!(*iout & 0x7FFFFFFF)) {       /* no integer part */
+        *fout = *x;
+        return NULL;
+    }
+    while (!(f & 0x800000)) {
+        ex--;
+        f = f << 1;
+    }
+    *fout = sign | (ex << 23) | (f & 0x7FFFFF);
+    return NULL;
+}
+
+char *test_copysign(uint32 *x, uint32 *y, uint32 *out)
+{
+    int ysign = y[0] & 0x80000000;
+    int xhigh = x[0] & 0x7fffffff;
+
+    out[0] = ysign | xhigh;
+    out[1] = x[1];
+
+    /* There can be no error */
+    return NULL;
+}
+
+char *test_copysignf(uint32 *x, uint32 *y, uint32 *out)
+{
+    int ysign = y[0] & 0x80000000;
+    int xhigh = x[0] & 0x7fffffff;
+
+    out[0] = ysign | xhigh;
+
+    /* There can be no error */
+    return NULL;
+}
+
+char *test_isfinite(uint32 *x, uint32 *out)
+{
+    int xhigh = x[0];
+    /* Being finite means that the exponent is not 0x7ff */
+    if ((xhigh & 0x7ff00000) == 0x7ff00000) out[0] = 0;
+    else out[0] = 1;
+    return NULL;
+}
+
+char *test_isfinitef(uint32 *x, uint32 *out)
+{
+    /* Being finite means that the exponent is not 0xff */
+    if ((x[0] & 0x7f800000) == 0x7f800000) out[0] = 0;
+    else out[0] = 1;
+    return NULL;
+}
+
+char *test_isinff(uint32 *x, uint32 *out)
+{
+    /* Being infinite means that our bottom 30 bits equate to 0x7f800000 */
+    if ((x[0] & 0x7fffffff) == 0x7f800000) out[0] = 1;
+    else out[0] = 0;
+    return NULL;
+}
+
+char *test_isinf(uint32 *x, uint32 *out)
+{
+    int xhigh = x[0];
+    int xlow = x[1];
+    /* Being infinite means that our fraction is zero and exponent is 0x7ff */
+    if (((xhigh & 0x7fffffff) == 0x7ff00000) && (xlow == 0)) out[0] = 1;
+    else out[0] = 0;
+    return NULL;
+}
+
+char *test_isnanf(uint32 *x, uint32 *out)
+{
+    /* Being NaN means that our exponent is 0xff and non-0 fraction */
+    int exponent = x[0] & 0x7f800000;
+    int fraction = x[0] & 0x007fffff;
+    if ((exponent == 0x7f800000) && (fraction != 0)) out[0] = 1;
+    else out[0] = 0;
+    return NULL;
+}
+
+char *test_isnan(uint32 *x, uint32 *out)
+{
+    /* Being NaN means that our exponent is 0x7ff and non-0 fraction */
+    int exponent = x[0] & 0x7ff00000;
+    int fractionhigh = x[0] & 0x000fffff;
+    if ((exponent == 0x7ff00000) && ((fractionhigh != 0) || x[1] != 0))
+        out[0] = 1;
+    else out[0] = 0;
+    return NULL;
+}
+
+char *test_isnormalf(uint32 *x, uint32 *out)
+{
+    /* Being normal means exponent is not 0 and is not 0xff */
+    int exponent = x[0] & 0x7f800000;
+    if (exponent == 0x7f800000) out[0] = 0;
+    else if (exponent == 0) out[0] = 0;
+    else out[0] = 1;
+    return NULL;
+}
+
+char *test_isnormal(uint32 *x, uint32 *out)
+{
+    /* Being normal means exponent is not 0 and is not 0x7ff */
+    int exponent = x[0] & 0x7ff00000;
+    if (exponent == 0x7ff00000) out[0] = 0;
+    else if (exponent == 0) out[0] = 0;
+    else out[0] = 1;
+    return NULL;
+}
+
+char *test_signbitf(uint32 *x, uint32 *out)
+{
+    /* Sign bit is bit 31 */
+    out[0] = (x[0] >> 31) & 1;
+    return NULL;
+}
+
+char *test_signbit(uint32 *x, uint32 *out)
+{
+    /* Sign bit is bit 31 */
+    out[0] = (x[0] >> 31) & 1;
+    return NULL;
+}
+
+char *test_fpclassify(uint32 *x, uint32 *out)
+{
+    int exponent = (x[0] & 0x7ff00000) >> 20;
+    int fraction = (x[0] & 0x000fffff) | x[1];
+
+    if ((exponent == 0x00) && (fraction == 0)) out[0] = 0;
+    else if ((exponent == 0x00) && (fraction != 0)) out[0] = 4;
+    else if ((exponent == 0x7ff) && (fraction == 0)) out[0] = 3;
+    else if ((exponent == 0x7ff) && (fraction != 0)) out[0] = 7;
+    else out[0] = 5;
+    return NULL;
+}
+
+char *test_fpclassifyf(uint32 *x, uint32 *out)
+{
+    int exponent = (x[0] & 0x7f800000) >> 23;
+    int fraction = x[0] & 0x007fffff;
+
+    if ((exponent == 0x000) && (fraction == 0)) out[0] = 0;
+    else if ((exponent == 0x000) && (fraction != 0)) out[0] = 4;
+    else if ((exponent == 0xff) && (fraction == 0)) out[0] = 3;
+    else if ((exponent == 0xff) && (fraction != 0)) out[0] = 7;
+    else out[0] = 5;
+    return NULL;
+}
+
+/*
+ * Internal function that compares doubles in x & y and returns -3, -2, -1, 0,
+ * 1 if they compare to be signaling, unordered, less than, equal or greater
+ * than.
+ */
+static int fpcmp4(uint32 *x, uint32 *y)
+{
+    int result = 0;
+
+    /*
+     * Sort out whether results are ordered or not to begin with
+     * NaNs have exponent 0x7ff, and non-zero fraction. Signaling NaNs take
+     * higher priority than quiet ones.
+     */
+    if ((x[0] & 0x7fffffff) >= 0x7ff80000) result = -2;
+    else if ((x[0] & 0x7fffffff) > 0x7ff00000) result = -3;
+    else if (((x[0] & 0x7fffffff) == 0x7ff00000) && (x[1] != 0)) result = -3;
+    if ((y[0] & 0x7fffffff) >= 0x7ff80000 && result != -3) result = -2;
+    else if ((y[0] & 0x7fffffff) > 0x7ff00000) result = -3;
+    else if (((y[0] & 0x7fffffff) == 0x7ff00000) && (y[1] != 0)) result = -3;
+    if (result != 0) return result;
+
+    /*
+     * The two forms of zero are equal
+     */
+    if (((x[0] & 0x7fffffff) == 0) && x[1] == 0 &&
+        ((y[0] & 0x7fffffff) == 0) && y[1] == 0)
+        return 0;
+
+    /*
+     * If x and y have different signs we can tell that they're not equal
+     * If x is +ve we have x > y return 1 - otherwise y is +ve return -1
+     */
+    if ((x[0] >> 31) != (y[0] >> 31))
+        return ((x[0] >> 31) == 0) - ((y[0] >> 31) == 0);
+
+    /*
+     * Now we have both signs the same, let's do an initial compare of the
+     * values.
+     *
+     * Whoever designed IEEE754's floating point formats is very clever and
+     * earns my undying admiration.  Once you remove the sign-bit, the
+     * floating point numbers can be ordered using the standard <, ==, >
+     * operators will treating the fp-numbers as integers with that bit-
+     * pattern.
+     */
+    if ((x[0] & 0x7fffffff) < (y[0] & 0x7fffffff)) result = -1;
+    else if ((x[0] & 0x7fffffff) > (y[0] & 0x7fffffff)) result = 1;
+    else if (x[1] < y[1]) result = -1;
+    else if (x[1] > y[1]) result = 1;
+    else result = 0;
+
+    /*
+     * Now we return the result - is x is positive (and therefore so is y) we
+     * return the plain result - otherwise we negate it and return.
+     */
+    if ((x[0] >> 31) == 0) return result;
+    else return -result;
+}
+
+/*
+ * Internal function that compares floats in x & y and returns -3, -2, -1, 0,
+ * 1 if they compare to be signaling, unordered, less than, equal or greater
+ * than.
+ */
+static int fpcmp4f(uint32 *x, uint32 *y)
+{
+    int result = 0;
+
+    /*
+     * Sort out whether results are ordered or not to begin with
+     * NaNs have exponent 0xff, and non-zero fraction - we have to handle all
+     * signaling cases over the quiet ones
+     */
+    if ((x[0] & 0x7fffffff) >= 0x7fc00000) result = -2;
+    else if ((x[0] & 0x7fffffff) > 0x7f800000) result = -3;
+    if ((y[0] & 0x7fffffff) >= 0x7fc00000 && result != -3) result = -2;
+    else if ((y[0] & 0x7fffffff) > 0x7f800000) result = -3;
+    if (result != 0) return result;
+
+    /*
+     * The two forms of zero are equal
+     */
+    if (((x[0] & 0x7fffffff) == 0) && ((y[0] & 0x7fffffff) == 0))
+        return 0;
+
+    /*
+     * If x and y have different signs we can tell that they're not equal
+     * If x is +ve we have x > y return 1 - otherwise y is +ve return -1
+     */
+    if ((x[0] >> 31) != (y[0] >> 31))
+        return ((x[0] >> 31) == 0) - ((y[0] >> 31) == 0);
+
+    /*
+     * Now we have both signs the same, let's do an initial compare of the
+     * values.
+     *
+     * Whoever designed IEEE754's floating point formats is very clever and
+     * earns my undying admiration.  Once you remove the sign-bit, the
+     * floating point numbers can be ordered using the standard <, ==, >
+     * operators will treating the fp-numbers as integers with that bit-
+     * pattern.
+     */
+    if ((x[0] & 0x7fffffff) < (y[0] & 0x7fffffff)) result = -1;
+    else if ((x[0] & 0x7fffffff) > (y[0] & 0x7fffffff)) result = 1;
+    else result = 0;
+
+    /*
+     * Now we return the result - is x is positive (and therefore so is y) we
+     * return the plain result - otherwise we negate it and return.
+     */
+    if ((x[0] >> 31) == 0) return result;
+    else return -result;
+}
+
+char *test_isgreater(uint32 *x, uint32 *y, uint32 *out)
+{
+    int result = fpcmp4(x, y);
+    *out = (result == 1);
+    return result == -3 ? "i" : NULL;
+}
+
+char *test_isgreaterequal(uint32 *x, uint32 *y, uint32 *out)
+{
+    int result = fpcmp4(x, y);
+    *out = (result >= 0);
+    return result == -3 ? "i" : NULL;
+}
+
+char *test_isless(uint32 *x, uint32 *y, uint32 *out)
+{
+    int result = fpcmp4(x, y);
+    *out = (result == -1);
+    return result == -3 ? "i" : NULL;
+}
+
+char *test_islessequal(uint32 *x, uint32 *y, uint32 *out)
+{
+    int result = fpcmp4(x, y);
+    *out = (result == -1) || (result == 0);
+    return result == -3 ? "i" : NULL;
+}
+
+char *test_islessgreater(uint32 *x, uint32 *y, uint32 *out)
+{
+    int result = fpcmp4(x, y);
+    *out = (result == -1) || (result == 1);
+    return result == -3 ? "i" : NULL;
+}
+
+char *test_isunordered(uint32 *x, uint32 *y, uint32 *out)
+{
+    int normal = 0;
+    int result = fpcmp4(x, y);
+
+    test_isnormal(x, out);
+    normal |= *out;
+    test_isnormal(y, out);
+    normal |= *out;
+    *out = (result == -2) || (result == -3);
+    return result == -3 ? "i" : NULL;
+}
+
+char *test_isgreaterf(uint32 *x, uint32 *y, uint32 *out)
+{
+    int result = fpcmp4f(x, y);
+    *out = (result == 1);
+    return result == -3 ? "i" : NULL;
+}
+
+char *test_isgreaterequalf(uint32 *x, uint32 *y, uint32 *out)
+{
+    int result = fpcmp4f(x, y);
+    *out = (result >= 0);
+    return result == -3 ? "i" : NULL;
+}
+
+char *test_islessf(uint32 *x, uint32 *y, uint32 *out)
+{
+    int result = fpcmp4f(x, y);
+    *out = (result == -1);
+    return result == -3 ? "i" : NULL;
+}
+
+char *test_islessequalf(uint32 *x, uint32 *y, uint32 *out)
+{
+    int result = fpcmp4f(x, y);
+    *out = (result == -1) || (result == 0);
+    return result == -3 ? "i" : NULL;
+}
+
+char *test_islessgreaterf(uint32 *x, uint32 *y, uint32 *out)
+{
+    int result = fpcmp4f(x, y);
+    *out = (result == -1) || (result == 1);
+    return result == -3 ? "i" : NULL;
+}
+
+char *test_isunorderedf(uint32 *x, uint32 *y, uint32 *out)
+{
+    int normal = 0;
+    int result = fpcmp4f(x, y);
+
+    test_isnormalf(x, out);
+    normal |= *out;
+    test_isnormalf(y, out);
+    normal |= *out;
+    *out = (result == -2) || (result == -3);
+    return result == -3 ? "i" : NULL;
+}
@@ -0,0 +1,53 @@
+/*
+ * semi.h: header for semi.c
+ *
+ * Copyright (c) 1999-2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#ifndef test_semi_h
+#define test_semi_h
+
+#include "types.h"
+
+char *test_ceil(uint32 *in, uint32 *out);
+char *test_floor(uint32 *in, uint32 *out);
+char *test_fmod(uint32 *a, uint32 *b, uint32 *out);
+char *test_ldexp(uint32 *x, uint32 *n, uint32 *out);
+char *test_frexp(uint32 *x, uint32 *out, uint32 *nout);
+char *test_modf(uint32 *x, uint32 *iout, uint32 *fout);
+char *test_ceilf(uint32 *in, uint32 *out);
+char *test_floorf(uint32 *in, uint32 *out);
+char *test_fmodf(uint32 *a, uint32 *b, uint32 *out);
+char *test_ldexpf(uint32 *x, uint32 *n, uint32 *out);
+char *test_frexpf(uint32 *x, uint32 *out, uint32 *nout);
+char *test_modff(uint32 *x, uint32 *iout, uint32 *fout);
+
+char *test_copysign(uint32 *x, uint32 *y, uint32 *out);
+char *test_copysignf(uint32 *x, uint32 *y, uint32 *out);
+char *test_isfinite(uint32 *x, uint32 *out);
+char *test_isfinitef(uint32 *x, uint32 *out);
+char *test_isinf(uint32 *x, uint32 *out);
+char *test_isinff(uint32 *x, uint32 *out);
+char *test_isnan(uint32 *x, uint32 *out);
+char *test_isnanf(uint32 *x, uint32 *out);
+char *test_isnormal(uint32 *x, uint32 *out);
+char *test_isnormalf(uint32 *x, uint32 *out);
+char *test_signbit(uint32 *x, uint32 *out);
+char *test_signbitf(uint32 *x, uint32 *out);
+char *test_fpclassify(uint32 *x, uint32 *out);
+char *test_fpclassifyf(uint32 *x, uint32 *out);
+
+char *test_isgreater(uint32 *x, uint32 *y, uint32 *out);
+char *test_isgreaterequal(uint32 *x, uint32 *y, uint32 *out);
+char *test_isless(uint32 *x, uint32 *y, uint32 *out);
+char *test_islessequal(uint32 *x, uint32 *y, uint32 *out);
+char *test_islessgreater(uint32 *x, uint32 *y, uint32 *out);
+char *test_isunordered(uint32 *x, uint32 *y, uint32 *out);
+char *test_isgreaterf(uint32 *x, uint32 *y, uint32 *out);
+char *test_isgreaterequalf(uint32 *x, uint32 *y, uint32 *out);
+char *test_islessf(uint32 *x, uint32 *y, uint32 *out);
+char *test_islessequalf(uint32 *x, uint32 *y, uint32 *out);
+char *test_islessgreaterf(uint32 *x, uint32 *y, uint32 *out);
+char *test_isunorderedf(uint32 *x, uint32 *y, uint32 *out);
+#endif
@@ -0,0 +1,25 @@
+/*
+ * types.h
+ *
+ * Copyright (c) 2005-2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#ifndef mathtest_types_h
+#define mathtest_types_h
+
+#include <limits.h>
+
+#if UINT_MAX == 4294967295
+typedef unsigned int uint32;
+typedef int int32;
+#define I32 ""
+#elif ULONG_MAX == 4294967295
+typedef unsigned long uint32;
+typedef long int32;
+#define I32 "l"
+#else
+#error Could not find an unsigned 32-bit integer type
+#endif
+
+#endif
@@ -0,0 +1,261 @@
+/*
+ * wrappers.c - wrappers to modify output of MPFR/MPC test functions
+ *
+ * Copyright (c) 2014-2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include <assert.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "intern.h"
+
+void wrapper_init(wrapperctx *ctx)
+{
+    int i;
+    ctx->nops = ctx->nresults = 0;
+    for (i = 0; i < 2; i++) {
+        ctx->mpfr_ops[i] = NULL;
+        ctx->mpc_ops[i] = NULL;
+        ctx->ieee_ops[i] = NULL;
+    }
+    ctx->mpfr_result = NULL;
+    ctx->mpc_result = NULL;
+    ctx->ieee_result = NULL;
+    ctx->need_regen = 0;
+}
+
+void wrapper_op_real(wrapperctx *ctx, const mpfr_t r,
+                     int size, const uint32 *ieee)
+{
+    assert(ctx->nops < 2);
+    ctx->mpfr_ops[ctx->nops] = r;
+    ctx->ieee_ops[ctx->nops] = ieee;
+    ctx->size_ops[ctx->nops] = size;
+    ctx->nops++;
+}
+
+void wrapper_op_complex(wrapperctx *ctx, const mpc_t c,
+                        int size, const uint32 *ieee)
+{
+    assert(ctx->nops < 2);
+    ctx->mpc_ops[ctx->nops] = c;
+    ctx->ieee_ops[ctx->nops] = ieee;
+    ctx->size_ops[ctx->nops] = size;
+    ctx->nops++;
+}
+
+void wrapper_result_real(wrapperctx *ctx, mpfr_t r,
+                         int size, uint32 *ieee)
+{
+    assert(ctx->nresults < 1);
+    ctx->mpfr_result = r;
+    ctx->ieee_result = ieee;
+    ctx->size_result = size;
+    ctx->nresults++;
+}
+
+void wrapper_result_complex(wrapperctx *ctx, mpc_t c,
+                            int size, uint32 *ieee)
+{
+    assert(ctx->nresults < 1);
+    ctx->mpc_result = c;
+    ctx->ieee_result = ieee;
+    ctx->size_result = size;
+    ctx->nresults++;
+}
+
+int wrapper_run(wrapperctx *ctx, wrapperfunc wrappers[MAXWRAPPERS])
+{
+    int i;
+    for (i = 0; i < MAXWRAPPERS && wrappers[i]; i++)
+        wrappers[i](ctx);
+    universal_wrapper(ctx);
+    return ctx->need_regen;
+}
+
+mpfr_srcptr wrapper_get_mpfr(wrapperctx *ctx, int op)
+{
+    if (op < 0) {
+        assert(ctx->mpfr_result);
+        return ctx->mpfr_result;
+    } else {
+        assert(ctx->mpfr_ops[op]);
+        return ctx->mpfr_ops[op];
+    }
+}
+
+const uint32 *wrapper_get_ieee(wrapperctx *ctx, int op)
+{
+    if (op < 0) {
+        assert(ctx->mpfr_result);
+        return ctx->ieee_result;
+    } else {
+        assert(ctx->mpfr_ops[op]);
+        return ctx->ieee_ops[op];
+    }
+}
+
+int wrapper_get_nops(wrapperctx *ctx)
+{
+    return ctx->nops;
+}
+
+int wrapper_get_size(wrapperctx *ctx, int op)
+{
+    if (op < 0) {
+        assert(ctx->mpfr_result || ctx->mpc_result);
+        return ctx->size_result;
+    } else {
+        assert(ctx->mpfr_ops[op] || ctx->mpc_ops[op]);
+        return ctx->size_ops[op];
+    }
+}
+
+int wrapper_is_complex(wrapperctx *ctx, int op)
+{
+    if (op < 0) {
+        assert(ctx->mpfr_result || ctx->mpc_result);
+        return ctx->mpc_result != NULL;
+    } else {
+        assert(ctx->mpfr_ops[op] || ctx->mpc_ops[op]);
+        return ctx->mpc_ops[op] != NULL;
+    }
+}
+
+mpc_srcptr wrapper_get_mpc(wrapperctx *ctx, int op)
+{
+    if (op < 0) {
+        assert(ctx->mpc_result);
+        return ctx->mpc_result;
+    } else {
+        assert(ctx->mpc_ops[op]);
+        return ctx->mpc_ops[op];
+    }
+}
+
+mpfr_srcptr wrapper_get_mpfr_r(wrapperctx *ctx, int op)
+{
+    if (op < 0) {
+        assert(ctx->mpc_result);
+        return mpc_realref(ctx->mpc_result);
+    } else {
+        assert(ctx->mpc_ops[op]);
+        return mpc_realref(ctx->mpc_ops[op]);
+    }
+}
+
+mpfr_srcptr wrapper_get_mpfr_i(wrapperctx *ctx, int op)
+{
+    if (op < 0) {
+        assert(ctx->mpc_result);
+        return mpc_imagref(ctx->mpc_result);
+    } else {
+        assert(ctx->mpc_ops[op]);
+        return mpc_imagref(ctx->mpc_ops[op]);
+    }
+}
+
+const uint32 *wrapper_get_ieee_r(wrapperctx *ctx, int op)
+{
+    if (op < 0) {
+        assert(ctx->mpc_result);
+        return ctx->ieee_result;
+    } else {
+        assert(ctx->mpc_ops[op]);
+        return ctx->ieee_ops[op];
+    }
+}
+
+const uint32 *wrapper_get_ieee_i(wrapperctx *ctx, int op)
+{
+    if (op < 0) {
+        assert(ctx->mpc_result);
+        return ctx->ieee_result + 4;
+    } else {
+        assert(ctx->mpc_ops[op]);
+        return ctx->ieee_ops[op] + 2;
+    }
+}
+
+void wrapper_set_sign(wrapperctx *ctx, uint32 sign)
+{
+    assert(ctx->mpfr_result);
+    ctx->ieee_result[0] |= (sign & 0x80000000U);
+}
+
+void wrapper_set_sign_r(wrapperctx *ctx, uint32 sign)
+{
+    assert(ctx->mpc_result);
+    ctx->ieee_result[0] |= (sign & 0x80000000U);
+}
+
+void wrapper_set_sign_i(wrapperctx *ctx, uint32 sign)
+{
+    assert(ctx->mpc_result);
+    ctx->ieee_result[4] |= (sign & 0x80000000U);
+}
+
+void wrapper_set_nan(wrapperctx *ctx)
+{
+    assert(ctx->mpfr_result);
+    mpfr_set_nan(ctx->mpfr_result);
+    ctx->need_regen = 1;
+}
+
+void wrapper_set_nan_r(wrapperctx *ctx)
+{
+    assert(ctx->mpc_result);
+    mpfr_set_nan(mpc_realref(ctx->mpc_result)); /* FIXME: better way? */
+    ctx->need_regen = 1;
+}
+
+void wrapper_set_nan_i(wrapperctx *ctx)
+{
+    assert(ctx->mpc_result);
+    mpfr_set_nan(mpc_imagref(ctx->mpc_result)); /* FIXME: better way? */
+    ctx->need_regen = 1;
+}
+
+void wrapper_set_int(wrapperctx *ctx, int val)
+{
+    assert(ctx->mpfr_result);
+    mpfr_set_si(ctx->mpfr_result, val, GMP_RNDN);
+    ctx->need_regen = 1;
+}
+
+void wrapper_set_int_r(wrapperctx *ctx, int val)
+{
+    assert(ctx->mpc_result);
+    mpfr_set_si(mpc_realref(ctx->mpc_result), val, GMP_RNDN);
+    ctx->need_regen = 1;
+}
+
+void wrapper_set_int_i(wrapperctx *ctx, int val)
+{
+    assert(ctx->mpc_result);
+    mpfr_set_si(mpc_realref(ctx->mpc_result), val, GMP_RNDN);
+    ctx->need_regen = 1;
+}
+
+void wrapper_set_mpfr(wrapperctx *ctx, const mpfr_t val)
+{
+    assert(ctx->mpfr_result);
+    mpfr_set(ctx->mpfr_result, val, GMP_RNDN);
+    ctx->need_regen = 1;
+}
+
+void wrapper_set_mpfr_r(wrapperctx *ctx, const mpfr_t val)
+{
+    assert(ctx->mpc_result);
+    mpfr_set(mpc_realref(ctx->mpc_result), val, GMP_RNDN);
+    ctx->need_regen = 1;
+}
+
+void wrapper_set_mpfr_i(wrapperctx *ctx, const mpfr_t val)
+{
+    assert(ctx->mpc_result);
+    mpfr_set(mpc_realref(ctx->mpc_result), val, GMP_RNDN);
+    ctx->need_regen = 1;
+}
@@ -0,0 +1,114 @@
+/*
+ * wrappers.h - wrappers to modify output of MPFR/MPC test functions
+ *
+ * Copyright (c) 2014-2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+typedef struct {
+    /* Structure type should be considered opaque outside wrappers.c,
+     * though we have to define it here so its size is known. */
+    int nops;
+    int nresults;
+    mpfr_srcptr mpfr_ops[2];
+    mpfr_ptr mpfr_result;
+    mpc_srcptr mpc_ops[2];
+    mpc_ptr mpc_result;
+    const uint32 *ieee_ops[2];
+    uint32 *ieee_result;
+    int size_ops[2];
+    int size_result;
+    int need_regen;
+} wrapperctx;
+
+typedef void (*wrapperfunc)(wrapperctx *ctx);
+#define MAXWRAPPERS 3
+
+/*
+ * Functions for the test harness to call.
+ *
+ * When the test harness executes a test function, it should
+ * initialise a wrapperctx with wrapper_init, then provide all the
+ * operands and results in both mpfr/mpc and IEEE (+ extrabits)
+ * formats via wrapper_op_* and wrapper_result_*. Then it should run
+ * the function's wrappers using wrapper_run(), and if that returns
+ * true then the primary result has been rewritten in mpfr/mpc format
+ * and it should therefore retranslate into IEEE.
+ *
+ * 'size' in all prototypes below represents an FP type by giving the
+ * number of 32-bit words it requires, so 1=float and 2=double. Input
+ * operands will be that many words (or that many for both their real
+ * and imag parts); outputs will have one extra word for 'extrabits'.
+ *
+ * This system only applies at all to reference functions using
+ * mpfr/mpc. The seminumerical functions we implement in pure IEEE
+ * form are expected to handle all their own special cases correctly.
+ */
+
+void wrapper_init(wrapperctx *ctx);
+
+/* Real operand. */
+void wrapper_op_real(wrapperctx *ctx, const mpfr_t r,
+                     int size, const uint32 *ieee);
+
+/* Complex operand. Real part starts at ieee[0], the imag part at ieee[2]. */
+void wrapper_op_complex(wrapperctx *ctx, const mpc_t c,
+                        int size, const uint32 *ieee);
+
+/* Real result. ieee contains size+1 words, as discussed above. */
+void wrapper_result_real(wrapperctx *ctx, mpfr_t r,
+                         int size, uint32 *ieee);
+
+/* Complex result. ieee contains size+1 words of real part starting at
+ * ieee[0], and another size+1 of imag part starting at ieee[4]. */
+void wrapper_result_complex(wrapperctx *ctx, mpc_t c,
+                            int size, uint32 *ieee);
+
+int wrapper_run(wrapperctx *ctx, wrapperfunc wrappers[MAXWRAPPERS]);
+
+/*
+ * Functions for wrappers to call. 'op' indicates which operand is
+ * being requested: 0,1 means first and second, and -1 means the
+ * result.
+ */
+
+mpfr_srcptr wrapper_get_mpfr(wrapperctx *ctx, int op);
+const uint32 *wrapper_get_ieee(wrapperctx *ctx, int op);
+
+mpc_srcptr wrapper_get_mpc(wrapperctx *ctx, int op);
+mpfr_srcptr wrapper_get_mpfr_r(wrapperctx *ctx, int op);
+mpfr_srcptr wrapper_get_mpfr_i(wrapperctx *ctx, int op);
+const uint32 *wrapper_get_ieee_r(wrapperctx *ctx, int op);
+const uint32 *wrapper_get_ieee_i(wrapperctx *ctx, int op);
+
+/* Query operand count + types */
+int wrapper_get_nops(wrapperctx *ctx);
+int wrapper_get_size(wrapperctx *ctx, int op);
+int wrapper_is_complex(wrapperctx *ctx, int op);
+
+/* Change just the sign of the result. Only the top bit of 'sign' is used. */
+void wrapper_set_sign(wrapperctx *ctx, uint32 sign);
+void wrapper_set_sign_r(wrapperctx *ctx, uint32 sign);
+void wrapper_set_sign_i(wrapperctx *ctx, uint32 sign);
+
+/* Set a result to NaN. */
+void wrapper_set_nan(wrapperctx *ctx);
+void wrapper_set_nan_r(wrapperctx *ctx);
+void wrapper_set_nan_i(wrapperctx *ctx);
+
+/* Set a result to an integer value (converted to the appropriate
+ * float format). */
+void wrapper_set_int(wrapperctx *ctx, int val);
+void wrapper_set_int_r(wrapperctx *ctx, int val);
+void wrapper_set_int_i(wrapperctx *ctx, int val);
+
+/* Set a result to a new MPFR float. */
+void wrapper_set_mpfr(wrapperctx *ctx, const mpfr_t val);
+void wrapper_set_mpfr_r(wrapperctx *ctx, const mpfr_t val);
+void wrapper_set_mpfr_i(wrapperctx *ctx, const mpfr_t val);
+
+/*
+ * A universal wrapper called for _all_ functions, that doesn't have
+ * to be specified individually everywhere.
+ */
+void universal_wrapper(wrapperctx *ctx);
@@ -0,0 +1,294 @@
+#!/bin/bash
+
+# ULP error check script.
+#
+# Copyright (c) 2019, Arm Limited.
+# SPDX-License-Identifier: MIT
+
+#set -x
+set -eu
+
+# cd to bin directory.
+cd "${0%/*}"
+
+rmodes='n u d z'
+#rmodes=n
+flags="${ULPFLAGS:--q}"
+emu="$@"
+
+FAIL=0
+PASS=0
+
+t() {
+	[ $r = "n" ] && Lt=$L || Lt=$Ldir
+	$emu ./ulp -r $r -e $Lt $flags "$@" && PASS=$((PASS+1)) || FAIL=$((FAIL+1))
+}
+
+check() {
+	$emu ./ulp -f -q "$@" >/dev/null
+}
+
+Ldir=0.5
+for r in $rmodes
+do
+L=0.01
+t exp  0 0xffff000000000000 10000
+t exp  0x1p-6     0x1p6     40000
+t exp -0x1p-6    -0x1p6     40000
+t exp  633.3      733.3     10000
+t exp -633.3     -777.3     10000
+
+L=0.01
+t exp2  0 0xffff000000000000 10000
+t exp2  0x1p-6     0x1p6     40000
+t exp2 -0x1p-6    -0x1p6     40000
+t exp2  633.3      733.3     10000
+t exp2 -633.3     -777.3     10000
+
+L=0.02
+t log  0 0xffff000000000000 10000
+t log  0x1p-4    0x1p4      40000
+t log  0         inf        40000
+
+L=0.05
+t log2  0 0xffff000000000000 10000
+t log2  0x1p-4    0x1p4      40000
+t log2  0         inf        40000
+
+L=0.05
+t pow  0.5  2.0  x  0  inf 20000
+t pow -0.5 -2.0  x  0  inf 20000
+t pow  0.5  2.0  x -0 -inf 20000
+t pow -0.5 -2.0  x -0 -inf 20000
+t pow  0.5  2.0  x  0x1p-10  0x1p10  40000
+t pow  0.5  2.0  x -0x1p-10 -0x1p10  40000
+t pow  0    inf  x    0.5      2.0   80000
+t pow  0    inf  x   -0.5     -2.0   80000
+t pow  0x1.fp-1   0x1.08p0  x  0x1p8 0x1p17  80000
+t pow  0x1.fp-1   0x1.08p0  x -0x1p8 -0x1p17 80000
+t pow  0         0x1p-1000  x  0 1.0 50000
+t pow  0x1p1000        inf  x  0 1.0 50000
+t pow  0x1.ffffffffffff0p-1  0x1.0000000000008p0 x 0x1p60 0x1p68 50000
+t pow  0x1.ffffffffff000p-1  0x1p0 x 0x1p50 0x1p52 50000
+t pow -0x1.ffffffffff000p-1 -0x1p0 x 0x1p50 0x1p52 50000
+
+L=0.01
+t expf  0    0xffff0000    10000
+t expf  0x1p-14   0x1p8    50000
+t expf -0x1p-14  -0x1p8    50000
+
+L=0.01
+t exp2f  0    0xffff0000   10000
+t exp2f  0x1p-14   0x1p8   50000
+t exp2f -0x1p-14  -0x1p8   50000
+
+L=0.32
+t logf  0    0xffff0000    10000
+t logf  0x1p-4    0x1p4    50000
+t logf  0         inf      50000
+
+L=0.26
+t log2f  0    0xffff0000   10000
+t log2f  0x1p-4    0x1p4   50000
+t log2f  0         inf     50000
+
+L=0.06
+t sinf  0    0xffff0000    10000
+t sinf  0x1p-14  0x1p54    50000
+t sinf -0x1p-14 -0x1p54    50000
+
+L=0.06
+t cosf  0    0xffff0000    10000
+t cosf  0x1p-14  0x1p54    50000
+t cosf -0x1p-14 -0x1p54    50000
+
+L=0.06
+t sincosf_sinf  0    0xffff0000    10000
+t sincosf_sinf  0x1p-14  0x1p54    50000
+t sincosf_sinf -0x1p-14 -0x1p54    50000
+
+L=0.06
+t sincosf_cosf  0    0xffff0000    10000
+t sincosf_cosf  0x1p-14  0x1p54    50000
+t sincosf_cosf -0x1p-14 -0x1p54    50000
+
+L=0.4
+t powf  0x1p-1   0x1p1  x  0x1p-7 0x1p7   50000
+t powf  0x1p-1   0x1p1  x -0x1p-7 -0x1p7  50000
+t powf  0x1p-70 0x1p70  x  0x1p-1 0x1p1   50000
+t powf  0x1p-70 0x1p70  x  -0x1p-1 -0x1p1 50000
+t powf  0x1.ep-1 0x1.1p0 x  0x1p8 0x1p14  50000
+t powf  0x1.ep-1 0x1.1p0 x -0x1p8 -0x1p14 50000
+done
+
+# vector functions
+Ldir=0.5
+r='n'
+flags="${ULPFLAGS:--q} -f"
+runs=
+check __s_exp 1 && runs=1
+runv=
+check __v_exp 1 && runv=1
+runvn=
+check __vn_exp 1 && runvn=1
+
+range_exp='
+  0 0xffff000000000000 10000
+  0x1p-6     0x1p6     400000
+ -0x1p-6    -0x1p6     400000
+  633.3      733.3     10000
+ -633.3     -777.3     10000
+'
+
+range_log='
+  0 0xffff000000000000 10000
+  0x1p-4     0x1p4     400000
+  0          inf       400000
+'
+
+range_pow='
+ 0x1p-1   0x1p1  x  0x1p-10 0x1p10   50000
+ 0x1p-1   0x1p1  x -0x1p-10 -0x1p10  50000
+ 0x1p-500 0x1p500  x  0x1p-1 0x1p1   50000
+ 0x1p-500 0x1p500  x  -0x1p-1 -0x1p1 50000
+ 0x1.ep-1 0x1.1p0 x  0x1p8 0x1p16    50000
+ 0x1.ep-1 0x1.1p0 x -0x1p8 -0x1p16   50000
+'
+
+range_sin='
+  0 0xffff000000000000 10000
+  0x1p-4     0x1p4     400000
+ -0x1p-23    0x1p23    400000
+'
+range_cos="$range_sin"
+
+range_expf='
+  0    0xffff0000    10000
+  0x1p-14   0x1p8    500000
+ -0x1p-14  -0x1p8    500000
+'
+
+range_expf_1u="$range_expf"
+range_exp2f="$range_expf"
+range_exp2f_1u="$range_expf"
+
+range_logf='
+ 0    0xffff0000    10000
+ 0x1p-4    0x1p4    500000
+'
+
+range_sinf='
+ 0    0xffff0000    10000
+ 0x1p-4    0x1p4    300000
+-0x1p-9   -0x1p9    300000
+'
+range_cosf="$range_sinf"
+
+range_powf='
+ 0x1p-1   0x1p1  x  0x1p-7 0x1p7   50000
+ 0x1p-1   0x1p1  x -0x1p-7 -0x1p7  50000
+ 0x1p-70 0x1p70  x  0x1p-1 0x1p1   50000
+ 0x1p-70 0x1p70  x  -0x1p-1 -0x1p1 50000
+ 0x1.ep-1 0x1.1p0 x  0x1p8 0x1p14  50000
+ 0x1.ep-1 0x1.1p0 x -0x1p8 -0x1p14 50000
+'
+
+# error limits
+L_exp=1.9
+L_log=1.2
+L_pow=0.05
+L_sin=3.0
+L_cos=3.0
+L_expf=1.49
+L_expf_1u=0.4
+L_exp2f=1.49
+L_exp2f_1u=0.4
+L_logf=2.9
+L_sinf=1.4
+L_cosf=1.4
+L_powf=2.1
+
+while read G F R
+do
+	[ "$R" = 1 ] || continue
+	case "$G" in \#*) continue ;; esac
+	eval range="\${range_$G}"
+	eval L="\${L_$G}"
+	while read X
+	do
+		[ -n "$X" ] || continue
+		case "$X" in \#*) continue ;; esac
+		t $F $X
+	done << EOF
+$range
+EOF
+done << EOF
+# group symbol run
+exp  __s_exp       $runs
+exp  __v_exp       $runv
+exp  __vn_exp      $runvn
+exp  _ZGVnN2v_exp  $runvn
+
+log  __s_log       $runs
+log  __v_log       $runv
+log  __vn_log      $runvn
+log  _ZGVnN2v_log  $runvn
+
+pow __s_pow       $runs
+pow __v_pow       $runv
+pow __vn_pow      $runvn
+pow _ZGVnN2vv_pow $runvn
+
+sin __s_sin       $runs
+sin __v_sin       $runv
+sin __vn_sin      $runvn
+sin _ZGVnN2v_sin  $runvn
+
+cos __s_cos       $runs
+cos __v_cos       $runv
+cos __vn_cos      $runvn
+cos _ZGVnN2v_cos  $runvn
+
+expf __s_expf      $runs
+expf __v_expf      $runv
+expf __vn_expf     $runvn
+expf _ZGVnN4v_expf $runvn
+
+expf_1u __s_expf_1u   $runs
+expf_1u __v_expf_1u   $runv
+expf_1u __vn_expf_1u  $runvn
+
+exp2f __s_exp2f      $runs
+exp2f __v_exp2f      $runv
+exp2f __vn_exp2f     $runvn
+exp2f _ZGVnN4v_exp2f $runvn
+
+exp2f_1u __s_exp2f_1u  $runs
+exp2f_1u __v_exp2f_1u  $runv
+exp2f_1u __vn_exp2f_1u $runvn
+
+logf __s_logf      $runs
+logf __v_logf      $runv
+logf __vn_logf     $runvn
+logf _ZGVnN4v_logf $runvn
+
+sinf __s_sinf      $runs
+sinf __v_sinf      $runv
+sinf __vn_sinf     $runvn
+sinf _ZGVnN4v_sinf $runvn
+
+cosf __s_cosf      $runs
+cosf __v_cosf      $runv
+cosf __vn_cosf     $runvn
+cosf _ZGVnN4v_cosf $runvn
+
+powf __s_powf       $runs
+powf __v_powf       $runv
+powf __vn_powf      $runvn
+powf _ZGVnN4vv_powf $runvn
+EOF
+
+[ 0 -eq $FAIL ] || {
+	echo "FAILED $FAIL PASSED $PASS"
+	exit 1
+}
@@ -0,0 +1,25 @@
+; cosf.tst - Directed test cases for SP cosine
+;
+; Copyright (c) 2007-2018, Arm Limited.
+; SPDX-License-Identifier: MIT
+
+func=cosf op1=7fc00001 result=7fc00001 errno=0
+func=cosf op1=ffc00001 result=7fc00001 errno=0
+func=cosf op1=7f800001 result=7fc00001 errno=0 status=i
+func=cosf op1=ff800001 result=7fc00001 errno=0 status=i
+func=cosf op1=7f800000 result=7fc00001 errno=EDOM status=i
+func=cosf op1=ff800000 result=7fc00001 errno=EDOM status=i
+func=cosf op1=00000000 result=3f800000 errno=0
+func=cosf op1=80000000 result=3f800000 errno=0
+; SDCOMP-26094: check cosf in the cases for which the range reducer
+; returns values furthest beyond its nominal upper bound of pi/4.
+func=cosf op1=46427f1b result=3f34dc5c.565 error=0
+func=cosf op1=4647e568 result=3f34dc33.c1f error=0
+func=cosf op1=46428bac result=bf34dbf2.8e3 error=0
+func=cosf op1=4647f1f9 result=bf34dbc9.f9b error=0
+func=cosf op1=4647fe8a result=3f34db60.313 error=0
+func=cosf op1=45d8d7f1 result=bf35006a.7fd error=0
+func=cosf op1=45d371a4 result=3f350056.39b error=0
+func=cosf op1=45ce0b57 result=bf350041.f38 error=0
+func=cosf op1=45d35882 result=bf34ffec.868 error=0
+func=cosf op1=45cdf235 result=3f34ffd8.404 error=0
@@ -0,0 +1,31 @@
+; Directed test cases for exp
+;
+; Copyright (c) 2018, Arm Limited.
+; SPDX-License-Identifier: MIT
+
+func=exp op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
+func=exp op1=fff80000.00000001 result=7ff80000.00000001 errno=0
+func=exp op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=exp op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=exp op1=7ff00000.00000000 result=7ff00000.00000000 errno=0
+func=exp op1=fff00000.00000000 result=00000000.00000000 errno=0
+func=exp op1=7fefffff.ffffffff result=7ff00000.00000000 errno=ERANGE status=ox
+func=exp op1=ffefffff.ffffffff result=00000000.00000000 errno=ERANGE status=ux
+func=exp op1=00000000.00000000 result=3ff00000.00000000 errno=0
+func=exp op1=80000000.00000000 result=3ff00000.00000000 errno=0
+func=exp op1=00000000.00000001 result=3ff00000.00000000 errno=0
+func=exp op1=80000000.00000001 result=3ff00000.00000000 errno=0
+func=exp op1=3c900000.00000000 result=3ff00000.00000000.400 errno=0
+func=exp op1=bc900000.00000000 result=3fefffff.ffffffff.800 errno=0
+func=exp op1=3fe00000.00000000 result=3ffa6129.8e1e069b.c97 errno=0
+func=exp op1=bfe00000.00000000 result=3fe368b2.fc6f9609.fe8 errno=0
+func=exp op1=3ff00000.00000000 result=4005bf0a.8b145769.535 errno=0
+func=exp op1=bff00000.00000000 result=3fd78b56.362cef37.c6b errno=0
+func=exp op1=40000000.00000000 result=401d8e64.b8d4ddad.cc3 errno=0
+func=exp op1=c0000000.00000000 result=3fc152aa.a3bf81cb.9fe errno=0
+func=exp op1=3ff12345.6789abcd result=40075955.c34718ed.6e3 errno=0
+func=exp op1=40862e42.fefa39ef result=7fefffff.ffffff2a.1b1 errno=0
+func=exp op1=40862e42.fefa39f0 result=7ff00000.00000000 errno=ERANGE status=ox
+func=exp op1=c0874910.d52d3051 result=00000000.00000001 status=ux
+func=exp op1=c0874910.d52d3052 result=00000000.00000000 errno=ERANGE status=ux
+func=exp op1=c085d589.f2fe5107 result=00f00000.000000f1.46b errno=0
@@ -0,0 +1,30 @@
+; Directed test cases for exp2
+;
+; Copyright (c) 2018, Arm Limited.
+; SPDX-License-Identifier: MIT
+
+func=exp2 op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
+func=exp2 op1=fff80000.00000001 result=7ff80000.00000001 errno=0
+func=exp2 op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=exp2 op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=exp2 op1=7ff00000.00000000 result=7ff00000.00000000 errno=0
+func=exp2 op1=fff00000.00000000 result=00000000.00000000 errno=0
+func=exp2 op1=7fefffff.ffffffff result=7ff00000.00000000 errno=ERANGE status=ox
+func=exp2 op1=ffefffff.ffffffff result=00000000.00000000 errno=ERANGE status=ux
+func=exp2 op1=00000000.00000000 result=3ff00000.00000000 errno=0
+func=exp2 op1=80000000.00000000 result=3ff00000.00000000 errno=0
+func=exp2 op1=00000000.00000001 result=3ff00000.00000000 errno=0
+func=exp2 op1=80000000.00000001 result=3ff00000.00000000 errno=0
+func=exp2 op1=3ca00000.00000000 result=3ff00000.00000000.58c errno=0
+func=exp2 op1=bc900000.00000000 result=3fefffff.ffffffff.a74 errno=0
+func=exp2 op1=3fe00000.00000000 result=3ff6a09e.667f3bcc.909 errno=0
+func=exp2 op1=bfe00000.00000000 result=3fe6a09e.667f3bcc.909 errno=0
+func=exp2 op1=3ff00000.00000000 result=40000000.00000000 errno=0
+func=exp2 op1=bff00000.00000000 result=3fe00000.00000000 errno=0
+func=exp2 op1=40000000.00000000 result=40100000.00000000 errno=0
+func=exp2 op1=c0000000.00000000 result=3fd00000.00000000 errno=0
+func=exp2 op1=3ff12345.6789abcd result=4000cef3.c5d12321.663 errno=0
+func=exp2 op1=408fffff.ffffffff result=7fefffff.fffffd3a.37a errno=0
+func=exp2 op1=40900000.00000000 result=7ff00000.00000000 errno=ERANGE status=ox
+func=exp2 op1=c090ca00.00000000 result=00000000.00000000.b50 status=ux
+func=exp2 op1=c090cc00.00000000 result=00000000.00000000 errno=ERANGE status=ux
@@ -0,0 +1,25 @@
+; exp2f.tst - Directed test cases for exp2f
+;
+; Copyright (c) 2017-2018, Arm Limited.
+; SPDX-License-Identifier: MIT
+
+func=exp2f op1=7fc00001 result=7fc00001 errno=0
+func=exp2f op1=ffc00001 result=7fc00001 errno=0
+func=exp2f op1=7f800001 result=7fc00001 errno=0 status=i
+func=exp2f op1=ff800001 result=7fc00001 errno=0 status=i
+func=exp2f op1=7f800000 result=7f800000 errno=0
+func=exp2f op1=7f7fffff result=7f800000 errno=ERANGE status=ox
+func=exp2f op1=ff800000 result=00000000 errno=0
+func=exp2f op1=ff7fffff result=00000000 errno=ERANGE status=ux
+func=exp2f op1=00000000 result=3f800000 errno=0
+func=exp2f op1=80000000 result=3f800000 errno=0
+func=exp2f op1=42fa0001 result=7e00002c.5c8 errno=0
+func=exp2f op1=42ffffff result=7f7fffa7.470 errno=0
+func=exp2f op1=43000000 result=7f800000 errno=ERANGE status=ox
+func=exp2f op1=43000001 result=7f800000 errno=ERANGE status=ox
+func=exp2f op1=c2fa0001 result=00ffffa7.470 errno=0
+func=exp2f op1=c2fc0000 result=00800000 errno=0
+func=exp2f op1=c2fc0001 result=007fffd3.a38 errno=0 status=ux
+func=exp2f op1=c3150000 result=00000001 errno=0
+func=exp2f op1=c3158000 result=00000000.800 errno=ERANGE status=ux
+func=exp2f op1=c3165432 result=00000000.4bd errno=ERANGE status=ux
@@ -0,0 +1,23 @@
+; expf.tst - Directed test cases for expf
+;
+; Copyright (c) 2007-2018, Arm Limited.
+; SPDX-License-Identifier: MIT
+
+func=expf op1=7fc00001 result=7fc00001 errno=0
+func=expf op1=ffc00001 result=7fc00001 errno=0
+func=expf op1=7f800001 result=7fc00001 errno=0 status=i
+func=expf op1=ff800001 result=7fc00001 errno=0 status=i
+func=expf op1=7f800000 result=7f800000 errno=0
+func=expf op1=7f7fffff result=7f800000 errno=ERANGE status=ox
+func=expf op1=ff800000 result=00000000 errno=0
+func=expf op1=ff7fffff result=00000000 errno=ERANGE status=ux
+func=expf op1=00000000 result=3f800000 errno=0
+func=expf op1=80000000 result=3f800000 errno=0
+func=expf op1=42affff8 result=7ef87ed4.e0c errno=0
+func=expf op1=42b00008 result=7ef88698.f67 errno=0
+func=expf op1=42cffff8 result=7f800000 errno=ERANGE status=ox
+func=expf op1=42d00008 result=7f800000 errno=ERANGE status=ox
+func=expf op1=c2affff8 result=0041eecc.041 errno=0 status=ux
+func=expf op1=c2b00008 result=0041ecbc.95e errno=0 status=ux
+func=expf op1=c2cffff8 result=00000000 errno=ERANGE status=ux
+func=expf op1=c2d00008 result=00000000 errno=ERANGE status=ux
@@ -0,0 +1,21 @@
+; Directed test cases for log
+;
+; Copyright (c) 2018, Arm Limited.
+; SPDX-License-Identifier: MIT
+
+func=log op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
+func=log op1=fff80000.00000001 result=7ff80000.00000001 errno=0
+func=log op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=log op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=log op1=7ff00000.00000000 result=7ff00000.00000000 errno=0
+func=log op1=fff00000.00000000 result=7ff80000.00000001 errno=EDOM status=i
+func=log op1=7fefffff.ffffffff result=40862e42.fefa39ef.354 errno=0
+func=log op1=ffefffff.ffffffff result=7ff80000.00000001 errno=EDOM status=i
+func=log op1=3ff00000.00000000 result=00000000.00000000 errno=0
+func=log op1=bff00000.00000000 result=7ff80000.00000001 errno=EDOM status=i
+func=log op1=00000000.00000000 result=fff00000.00000000 errno=ERANGE status=z
+func=log op1=80000000.00000000 result=fff00000.00000000 errno=ERANGE status=z
+func=log op1=00000000.00000001 result=c0874385.446d71c3.639 errno=0
+func=log op1=80000000.00000001 result=7ff80000.00000001 errno=EDOM status=i
+func=log op1=40000000.00000000 result=3fe62e42.fefa39ef.358 errno=0
+func=log op1=3fe00000.00000000 result=bfe62e42.fefa39ef.358 errno=0
@@ -0,0 +1,21 @@
+; Directed test cases for log2
+;
+; Copyright (c) 2018, Arm Limited.
+; SPDX-License-Identifier: MIT
+
+func=log2 op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
+func=log2 op1=fff80000.00000001 result=7ff80000.00000001 errno=0
+func=log2 op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=log2 op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=log2 op1=7ff00000.00000000 result=7ff00000.00000000 errno=0
+func=log2 op1=fff00000.00000000 result=7ff80000.00000001 errno=EDOM status=i
+func=log2 op1=7fefffff.ffffffff result=408fffff.ffffffff.ffa errno=0
+func=log2 op1=ffefffff.ffffffff result=7ff80000.00000001 errno=EDOM status=i
+func=log2 op1=3ff00000.00000000 result=00000000.00000000 errno=0
+func=log2 op1=bff00000.00000000 result=7ff80000.00000001 errno=EDOM status=i
+func=log2 op1=00000000.00000000 result=fff00000.00000000 errno=ERANGE status=z
+func=log2 op1=80000000.00000000 result=fff00000.00000000 errno=ERANGE status=z
+func=log2 op1=00000000.00000001 result=c090c800.00000000 errno=0
+func=log2 op1=80000000.00000001 result=7ff80000.00000001 errno=EDOM status=i
+func=log2 op1=40000000.00000000 result=3ff00000.00000000 errno=0
+func=log2 op1=3fe00000.00000000 result=bff00000.00000000 errno=0
@@ -0,0 +1,27 @@
+; log2f.tst - Directed test cases for log2f
+;
+; Copyright (c) 2017-2018, Arm Limited.
+; SPDX-License-Identifier: MIT
+
+func=log2f op1=7fc00001 result=7fc00001 errno=0
+func=log2f op1=ffc00001 result=7fc00001 errno=0
+func=log2f op1=7f800001 result=7fc00001 errno=0 status=i
+func=log2f op1=ff800001 result=7fc00001 errno=0 status=i
+func=log2f op1=ff810000 result=7fc00001 errno=0 status=i
+func=log2f op1=7f800000 result=7f800000 errno=0
+func=log2f op1=ff800000 result=7fc00001 errno=EDOM status=i
+func=log2f op1=3f800000 result=00000000 errno=0
+func=log2f op1=00000000 result=ff800000 errno=ERANGE status=z
+func=log2f op1=80000000 result=ff800000 errno=ERANGE status=z
+func=log2f op1=80000001 result=7fc00001 errno=EDOM status=i
+
+func=log2f op1=3f7d70a4 result=bc6d8f8b.7d4 error=0
+func=log2f op1=3f604189 result=be4394c8.395 error=0
+func=log2f op1=3f278034 result=bf1caa73.88e error=0
+func=log2f op1=3edd3c36 result=bf9af3b9.619 error=0
+func=log2f op1=3e61259a result=c00bdb95.650 error=0
+func=log2f op1=3f8147ae result=3c6b3267.d6a error=0
+func=log2f op1=3f8fbe77 result=3e2b5fe2.a1c error=0
+func=log2f op1=3fac3eea result=3edb4d5e.1fc error=0
+func=log2f op1=3fd6e632 result=3f3f5d3a.827 error=0
+func=log2f op1=40070838 result=3f89e055.a0a error=0
@@ -0,0 +1,69 @@
+; logf.tst - Directed test cases for logf
+;
+; Copyright (c) 2007-2018, Arm Limited.
+; SPDX-License-Identifier: MIT
+
+func=logf op1=7fc00001 result=7fc00001 errno=0
+func=logf op1=ffc00001 result=7fc00001 errno=0
+func=logf op1=7f800001 result=7fc00001 errno=0 status=i
+func=logf op1=ff800001 result=7fc00001 errno=0 status=i
+func=logf op1=ff810000 result=7fc00001 errno=0 status=i
+func=logf op1=7f800000 result=7f800000 errno=0
+func=logf op1=ff800000 result=7fc00001 errno=EDOM status=i
+func=logf op1=3f800000 result=00000000 errno=0
+func=logf op1=00000000 result=ff800000 errno=ERANGE status=z
+func=logf op1=80000000 result=ff800000 errno=ERANGE status=z
+func=logf op1=80000001 result=7fc00001 errno=EDOM status=i
+
+; Directed tests for the special-case handling of log of things
+; very near 1
+func=logf op1=3f781e49 result=bd0016d9.4ae error=0
+func=logf op1=3f78e602 result=bce675e5.f31 error=0
+func=logf op1=3f844a18 result=3d07030e.ae1 error=0
+func=logf op1=3f79b55b result=bccbd88a.6cb error=0
+func=logf op1=3f7e2f5f result=bbe92452.74a error=0
+func=logf op1=3f7f1c03 result=bb6462c1.c2c error=0
+func=logf op1=3f78b213 result=bced23e2.f56 error=0
+func=logf op1=3f87d5c0 result=3d735847.b7a error=0
+func=logf op1=3f7fa6ad result=bab2c532.12d error=0
+func=logf op1=3f87c06a result=3d70d4b6.b5e error=0
+func=logf op1=3f79cf30 result=bcc88942.6e9 error=0
+func=logf op1=3f794c77 result=bcd94c6f.b1e error=0
+func=logf op1=3f835655 result=3cd2d8a0.0bf error=0
+func=logf op1=3f81b5c0 result=3c596d08.520 error=0
+func=logf op1=3f805e2f result=3b3c18d4.d2b error=0
+func=logf op1=3f7aa609 result=bcad0f90.fdb error=0
+func=logf op1=3f7a9091 result=bcafcd59.f83 error=0
+func=logf op1=3f7a7475 result=bcb36490.a0f error=0
+func=logf op1=3f823417 result=3c8bd287.fa6 error=0
+func=logf op1=3f7fbcc3 result=ba868bac.14c error=0
+func=logf op1=3f805fc9 result=3b3f4a76.169 error=0
+func=logf op1=3f833d43 result=3cccbc4f.cb7 error=0
+func=logf op1=3f7cb1de result=bc54e91e.6b5 error=0
+func=logf op1=3f7f2793 result=bb58c8af.bfc error=0
+func=logf op1=3f7bb8c3 result=bc8a0fc9.93c error=0
+func=logf op1=3f81d349 result=3c67fe09.42e error=0
+func=logf op1=3f7c254d result=bc788cf4.610 error=0
+func=logf op1=3f7f789d result=bb0786d9.6c6 error=0
+func=logf op1=3f7ed1f2 result=bb97605f.963 error=0
+func=logf op1=3f826067 result=3c96b4af.5e1 error=0
+func=logf op1=3f821a68 result=3c8581f9.dac error=0
+func=logf op1=3f864e1a result=3d44f368.e66 error=0
+func=logf op1=3f7fea3d result=b9ae1f66.b58 error=0
+func=logf op1=3f7cf4f5 result=bc43ed76.1c5 error=0
+func=logf op1=3f84c223 result=3d15814e.36d error=0
+func=logf op1=3f7dae6d result=bc1511d5.0aa error=0
+func=logf op1=3f7c0a3c result=bc7f6c0d.758 error=0
+func=logf op1=3f858b22 result=3d2da861.f36 error=0
+func=logf op1=3f85d7c7 result=3d36d490.ee9 error=0
+func=logf op1=3f7f2109 result=bb5f5851.2ed error=0
+func=logf op1=3f83809c result=3cdd23f7.6b1 error=0
+func=logf op1=3f83d96e result=3cf2b9c8.0b1 error=0
+func=logf op1=3f86ca84 result=3d53bee8.53f error=0
+func=logf op1=3f83548e result=3cd269c3.39d error=0
+func=logf op1=3f7c199c result=bc7b84b6.0da error=0
+func=logf op1=3f83133f result=3cc27c0a.9dd error=0
+func=logf op1=3f7c97b4 result=bc5b89dd.399 error=0
+func=logf op1=3f810bc1 result=3c05553c.011 error=0
+func=logf op1=3f7dadb8 result=bc153f7e.fbb error=0
+func=logf op1=3f87be56 result=3d709602.538 error=0
@@ -0,0 +1,246 @@
+; powf.tst - Directed test cases for powf
+;
+; Copyright (c) 2007-2018, Arm Limited.
+; SPDX-License-Identifier: MIT
+
+func=powf op1=7f800001 op2=7f800001 result=7fc00001 errno=0 status=i
+func=powf op1=7f800001 op2=ff800001 result=7fc00001 errno=0 status=i
+func=powf op1=7f800001 op2=7fc00001 result=7fc00001 errno=0 status=i
+func=powf op1=7f800001 op2=ffc00001 result=7fc00001 errno=0 status=i
+func=powf op1=7f800001 op2=7f800000 result=7fc00001 errno=0 status=i
+func=powf op1=7f800001 op2=40800000 result=7fc00001 errno=0 status=i
+func=powf op1=7f800001 op2=40400000 result=7fc00001 errno=0 status=i
+func=powf op1=7f800001 op2=3f000000 result=7fc00001 errno=0 status=i
+func=powf op1=7f800001 op2=00000000 result=7fc00001 errno=0 status=i
+func=powf op1=7f800001 op2=80000000 result=7fc00001 errno=0 status=i
+func=powf op1=7f800001 op2=bf000000 result=7fc00001 errno=0 status=i
+func=powf op1=7f800001 op2=c0400000 result=7fc00001 errno=0 status=i
+func=powf op1=7f800001 op2=c0800000 result=7fc00001 errno=0 status=i
+func=powf op1=7f800001 op2=ff800000 result=7fc00001 errno=0 status=i
+func=powf op1=7f800001 op2=7f800001 result=7fc00001 errno=0 status=i
+func=powf op1=7f800001 op2=ff800001 result=7fc00001 errno=0 status=i
+func=powf op1=ff800001 op2=7fc00001 result=7fc00001 errno=0 status=i
+func=powf op1=ff800001 op2=ffc00001 result=7fc00001 errno=0 status=i
+func=powf op1=ff800001 op2=7f800000 result=7fc00001 errno=0 status=i
+func=powf op1=ff800001 op2=40800000 result=7fc00001 errno=0 status=i
+func=powf op1=ff800001 op2=40400000 result=7fc00001 errno=0 status=i
+func=powf op1=ff800001 op2=3f000000 result=7fc00001 errno=0 status=i
+func=powf op1=ff800001 op2=00000000 result=7fc00001 errno=0 status=i
+func=powf op1=ff800001 op2=80000000 result=7fc00001 errno=0 status=i
+func=powf op1=ff800001 op2=bf000000 result=7fc00001 errno=0 status=i
+func=powf op1=ff800001 op2=c0400000 result=7fc00001 errno=0 status=i
+func=powf op1=ff800001 op2=c0800000 result=7fc00001 errno=0 status=i
+func=powf op1=ff800001 op2=ff800000 result=7fc00001 errno=0 status=i
+func=powf op1=7fc00001 op2=7f800001 result=7fc00001 errno=0 status=i
+func=powf op1=7fc00001 op2=ff800001 result=7fc00001 errno=0 status=i
+func=powf op1=7fc00001 op2=7fc00001 result=7fc00001 errno=0
+func=powf op1=7fc00001 op2=ffc00001 result=7fc00001 errno=0
+func=powf op1=7fc00001 op2=7f800000 result=7fc00001 errno=0
+func=powf op1=7fc00001 op2=40800000 result=7fc00001 errno=0
+func=powf op1=7fc00001 op2=40400000 result=7fc00001 errno=0
+func=powf op1=7fc00001 op2=3f000000 result=7fc00001 errno=0
+func=powf op1=7fc00001 op2=00000000 result=3f800000 errno=0
+func=powf op1=7fc00001 op2=80000000 result=3f800000 errno=0
+func=powf op1=7fc00001 op2=bf000000 result=7fc00001 errno=0
+func=powf op1=7fc00001 op2=c0400000 result=7fc00001 errno=0
+func=powf op1=7fc00001 op2=c0800000 result=7fc00001 errno=0
+func=powf op1=7fc00001 op2=ff800000 result=7fc00001 errno=0
+func=powf op1=ffc00001 op2=7f800001 result=7fc00001 errno=0 status=i
+func=powf op1=ffc00001 op2=ff800001 result=7fc00001 errno=0 status=i
+func=powf op1=ffc00001 op2=7fc00001 result=7fc00001 errno=0
+func=powf op1=ffc00001 op2=ffc00001 result=7fc00001 errno=0
+func=powf op1=ffc00001 op2=7f800000 result=7fc00001 errno=0
+func=powf op1=ffc00001 op2=40800000 result=7fc00001 errno=0
+func=powf op1=ffc00001 op2=40400000 result=7fc00001 errno=0
+func=powf op1=ffc00001 op2=3f000000 result=7fc00001 errno=0
+func=powf op1=ffc00001 op2=00000000 result=3f800000 errno=0
+func=powf op1=ffc00001 op2=80000000 result=3f800000 errno=0
+func=powf op1=ffc00001 op2=bf000000 result=7fc00001 errno=0
+func=powf op1=ffc00001 op2=c0400000 result=7fc00001 errno=0
+func=powf op1=ffc00001 op2=c0800000 result=7fc00001 errno=0
+func=powf op1=ffc00001 op2=ff800000 result=7fc00001 errno=0
+func=powf op1=7f800000 op2=7f800001 result=7fc00001 errno=0 status=i
+func=powf op1=7f800000 op2=ff800001 result=7fc00001 errno=0 status=i
+func=powf op1=7f800000 op2=7fc00001 result=7fc00001 errno=0
+func=powf op1=7f800000 op2=ffc00001 result=7fc00001 errno=0
+func=powf op1=7f800000 op2=7f800000 result=7f800000 errno=0
+func=powf op1=7f800000 op2=40800000 result=7f800000 errno=0
+func=powf op1=7f800000 op2=40400000 result=7f800000 errno=0
+func=powf op1=7f800000 op2=3f000000 result=7f800000 errno=0
+func=powf op1=7f800000 op2=00000001 result=7f800000 errno=0
+func=powf op1=7f800000 op2=00000000 result=3f800000 errno=0
+func=powf op1=7f800000 op2=80000000 result=3f800000 errno=0
+func=powf op1=7f800000 op2=bf000000 result=00000000 errno=0
+func=powf op1=7f800000 op2=c0400000 result=00000000 errno=0
+func=powf op1=7f800000 op2=c0800000 result=00000000 errno=0
+func=powf op1=7f800000 op2=ff800000 result=00000000 errno=0
+func=powf op1=40800000 op2=7f800001 result=7fc00001 errno=0 status=i
+func=powf op1=40800000 op2=ff800001 result=7fc00001 errno=0 status=i
+func=powf op1=40800000 op2=7fc00001 result=7fc00001 errno=0
+func=powf op1=40800000 op2=ffc00001 result=7fc00001 errno=0
+func=powf op1=40800000 op2=7f800000 result=7f800000 errno=0
+func=powf op1=40800000 op2=40800000 result=43800000 errno=0
+func=powf op1=40800000 op2=40400000 result=42800000 errno=0
+func=powf op1=40800000 op2=3f000000 result=40000000 errno=0
+func=powf op1=40800000 op2=00000000 result=3f800000 errno=0
+func=powf op1=40800000 op2=80000000 result=3f800000 errno=0
+func=powf op1=40800000 op2=bf000000 result=3f000000 errno=0
+func=powf op1=40800000 op2=c0400000 result=3c800000 errno=0
+func=powf op1=40800000 op2=c0800000 result=3b800000 errno=0
+func=powf op1=40800000 op2=ff800000 result=00000000 errno=0
+func=powf op1=3f800000 op2=7f800001 result=7fc00001 errno=0 status=i
+func=powf op1=3f800000 op2=ff800001 result=7fc00001 errno=0 status=i
+func=powf op1=3f800000 op2=7fc00001 result=3f800000 errno=0
+func=powf op1=3f800000 op2=ffc00001 result=3f800000 errno=0
+func=powf op1=3f800000 op2=7f800000 result=3f800000 errno=0
+func=powf op1=3f800000 op2=40800000 result=3f800000 errno=0
+func=powf op1=3f800000 op2=40400000 result=3f800000 errno=0
+func=powf op1=3f800000 op2=3f000000 result=3f800000 errno=0
+func=powf op1=3f800000 op2=00000000 result=3f800000 errno=0
+func=powf op1=3f800000 op2=80000000 result=3f800000 errno=0
+func=powf op1=3f800000 op2=bf000000 result=3f800000 errno=0
+func=powf op1=3f800000 op2=c0400000 result=3f800000 errno=0
+func=powf op1=3f800000 op2=c0800000 result=3f800000 errno=0
+func=powf op1=3f800000 op2=ff800000 result=3f800000 errno=0
+func=powf op1=3e800000 op2=7f800001 result=7fc00001 errno=0 status=i
+func=powf op1=3e800000 op2=ff800001 result=7fc00001 errno=0 status=i
+func=powf op1=3e800000 op2=7fc00001 result=7fc00001 errno=0
+func=powf op1=3e800000 op2=ffc00001 result=7fc00001 errno=0
+func=powf op1=3e800000 op2=7f800000 result=00000000 errno=0
+func=powf op1=3e800000 op2=40800000 result=3b800000 errno=0
+func=powf op1=3e800000 op2=40400000 result=3c800000 errno=0
+func=powf op1=3e800000 op2=3f000000 result=3f000000 errno=0
+func=powf op1=3e800000 op2=00000000 result=3f800000 errno=0
+func=powf op1=3e800000 op2=80000000 result=3f800000 errno=0
+func=powf op1=3e800000 op2=bf000000 result=40000000 errno=0
+func=powf op1=3e800000 op2=c0400000 result=42800000 errno=0
+func=powf op1=3e800000 op2=c0800000 result=43800000 errno=0
+func=powf op1=3e800000 op2=ff800000 result=7f800000 errno=0
+func=powf op1=00000001 op2=bf800000 result=7f800000 errno=ERANGE status=ox
+func=powf op1=00000000 op2=7f800001 result=7fc00001 errno=0 status=i
+func=powf op1=00000000 op2=ff800001 result=7fc00001 errno=0 status=i
+func=powf op1=00000000 op2=7fc00001 result=7fc00001 errno=0
+func=powf op1=00000000 op2=ffc00001 result=7fc00001 errno=0
+func=powf op1=00000000 op2=7f800000 result=00000000 errno=0
+func=powf op1=00000000 op2=40800000 result=00000000 errno=0
+func=powf op1=00000000 op2=40400000 result=00000000 errno=0
+func=powf op1=00000000 op2=3f000000 result=00000000 errno=0
+func=powf op1=00000000 op2=00000000 result=3f800000 errno=0
+func=powf op1=00000000 op2=80000000 result=3f800000 errno=0
+func=powf op1=00000000 op2=bf000000 result=7f800000 errno=ERANGE status=z
+func=powf op1=00000000 op2=c0400000 result=7f800000 errno=ERANGE status=z
+func=powf op1=00000000 op2=c0800000 result=7f800000 errno=ERANGE status=z
+func=powf op1=00000000 op2=ff800000 result=7f800000 errno=ERANGE
+func=powf op1=80000000 op2=7f800001 result=7fc00001 errno=0 status=i
+func=powf op1=80000000 op2=ff800001 result=7fc00001 errno=0 status=i
+func=powf op1=80000000 op2=7fc00001 result=7fc00001 errno=0
+func=powf op1=80000000 op2=ffc00001 result=7fc00001 errno=0
+func=powf op1=80000000 op2=7f800000 result=00000000 errno=0
+func=powf op1=80000000 op2=40800000 result=00000000 errno=0
+func=powf op1=80000000 op2=40400000 result=80000000 errno=0
+func=powf op1=80000000 op2=3f000000 result=00000000 errno=0
+func=powf op1=80000000 op2=00000000 result=3f800000 errno=0
+func=powf op1=80000000 op2=80000000 result=3f800000 errno=0
+func=powf op1=80000000 op2=bf000000 result=7f800000 errno=ERANGE status=z
+func=powf op1=80000000 op2=c0400000 result=ff800000 errno=ERANGE status=z
+func=powf op1=80000000 op2=c0800000 result=7f800000 errno=ERANGE status=z
+func=powf op1=80000000 op2=ff800000 result=7f800000 errno=ERANGE
+func=powf op1=be800000 op2=7f800001 result=7fc00001 errno=0 status=i
+func=powf op1=be800000 op2=ff800001 result=7fc00001 errno=0 status=i
+func=powf op1=be800000 op2=7fc00001 result=7fc00001 errno=0
+func=powf op1=be800000 op2=ffc00001 result=7fc00001 errno=0
+func=powf op1=be800000 op2=7f800000 result=00000000 errno=0
+func=powf op1=be800000 op2=40800000 result=3b800000 errno=0
+func=powf op1=be800000 op2=40400000 result=bc800000 errno=0
+func=powf op1=be800000 op2=3f000000 result=7fc00001 errno=EDOM status=i
+func=powf op1=be800000 op2=00000000 result=3f800000 errno=0
+func=powf op1=be800000 op2=80000000 result=3f800000 errno=0
+func=powf op1=be800000 op2=bf000000 result=7fc00001 errno=EDOM status=i
+func=powf op1=be800000 op2=c0400000 result=c2800000 errno=0
+func=powf op1=be800000 op2=c0800000 result=43800000 errno=0
+func=powf op1=be800000 op2=ff800000 result=7f800000 errno=0
+func=powf op1=bf800000 op2=7f800001 result=7fc00001 errno=0 status=i
+func=powf op1=bf800000 op2=ff800001 result=7fc00001 errno=0 status=i
+func=powf op1=bf800000 op2=7fc00001 result=7fc00001 errno=0
+func=powf op1=bf800000 op2=ffc00001 result=7fc00001 errno=0
+func=powf op1=bf800000 op2=7f800000 result=3f800000 errno=0
+func=powf op1=bf800000 op2=40800000 result=3f800000 errno=0
+func=powf op1=bf800000 op2=40400000 result=bf800000 errno=0
+func=powf op1=bf800000 op2=3f000000 result=7fc00001 errno=EDOM status=i
+func=powf op1=bf800000 op2=00000000 result=3f800000 errno=0
+func=powf op1=bf800000 op2=80000000 result=3f800000 errno=0
+func=powf op1=bf800000 op2=bf000000 result=7fc00001 errno=EDOM status=i
+func=powf op1=bf800000 op2=c0400000 result=bf800000 errno=0
+func=powf op1=bf800000 op2=c0800000 result=3f800000 errno=0
+func=powf op1=bf800000 op2=ff800000 result=3f800000 errno=0
+func=powf op1=c0800000 op2=7f800001 result=7fc00001 errno=0 status=i
+func=powf op1=c0800000 op2=ff800001 result=7fc00001 errno=0 status=i
+func=powf op1=c0800000 op2=7fc00001 result=7fc00001 errno=0
+func=powf op1=c0800000 op2=ffc00001 result=7fc00001 errno=0
+func=powf op1=c0800000 op2=7f800000 result=7f800000 errno=0
+func=powf op1=c0800000 op2=40800000 result=43800000 errno=0
+func=powf op1=c0800000 op2=40400000 result=c2800000 errno=0
+func=powf op1=c0800000 op2=3f000000 result=7fc00001 errno=EDOM status=i
+func=powf op1=c0800000 op2=00000000 result=3f800000 errno=0
+func=powf op1=c0800000 op2=80000000 result=3f800000 errno=0
+func=powf op1=c0800000 op2=bf000000 result=7fc00001 errno=EDOM status=i
+func=powf op1=c0800000 op2=c0400000 result=bc800000 errno=0
+func=powf op1=c0800000 op2=c0800000 result=3b800000 errno=0
+func=powf op1=c0800000 op2=ff800000 result=00000000 errno=0
+func=powf op1=ff800000 op2=7f800001 result=7fc00001 errno=0 status=i
+func=powf op1=ff800000 op2=ff800001 result=7fc00001 errno=0 status=i
+func=powf op1=ff800000 op2=7fc00001 result=7fc00001 errno=0
+func=powf op1=ff800000 op2=ffc00001 result=7fc00001 errno=0
+func=powf op1=ff800000 op2=7f800000 result=7f800000 errno=0
+func=powf op1=ff800000 op2=40800000 result=7f800000 errno=0
+func=powf op1=ff800000 op2=40400000 result=ff800000 errno=0
+func=powf op1=ff800000 op2=3f000000 result=7f800000 errno=0
+func=powf op1=ff800000 op2=00000000 result=3f800000 errno=0
+func=powf op1=ff800000 op2=80000000 result=3f800000 errno=0
+func=powf op1=ff800000 op2=bf000000 result=00000000 errno=0
+func=powf op1=ff800000 op2=c0400000 result=80000000 errno=0
+func=powf op1=ff800000 op2=c0800000 result=00000000 errno=0
+func=powf op1=ff800000 op2=ff800000 result=00000000 errno=0
+
+
+func=powf op1=36c27f9d op2=4109fa51 result=00000000 errno=ERANGE status=ux
+func=powf op1=351738cd op2=c0c55691 result=7f800000 errno=ERANGE status=ox
+func=powf op1=42836035 op2=41a99f40 result=7f800000 errno=ERANGE status=ox
+func=powf op1=32bd53f3 op2=40bcba58 result=00000000 errno=ERANGE status=ux
+func=powf op1=32dc5bff op2=40be62ea result=00000000 errno=ERANGE status=ux
+func=powf op1=3a8a3f66 op2=4172bd43 result=00000000 errno=ERANGE status=ux
+func=powf op1=28f0e770 op2=c035b4ca result=7f800000 errno=ERANGE status=ox
+func=powf op1=40886699 op2=c28f703a result=00000000 errno=ERANGE status=ux
+func=powf op1=414bd593 op2=c22370cf result=00000000 errno=ERANGE status=ux
+func=powf op1=3a2f1163 op2=c1422d45 result=7f800000 errno=ERANGE status=ox
+func=powf op1=434f5cf3 op2=41851272 result=7f800000 errno=ERANGE status=ox
+func=powf op1=2e0e27a4 op2=c06b13f5 result=7f800000 errno=ERANGE status=ox
+func=powf op1=39aef7a6 op2=414fd60a result=00000000 errno=ERANGE status=ux
+func=powf op1=21c80729 op2=c00a04ab result=7f800000 errno=ERANGE status=ox
+func=powf op1=42455a4b op2=c1d55905 result=00000000 errno=ERANGE status=ux
+func=powf op1=2d173e0b op2=c05ee797 result=7f800000 errno=ERANGE status=ox
+func=powf op1=452edf9a op2=4132dd7f result=7f800000 errno=ERANGE status=ox
+func=powf op1=406bf67b op2=c29f5f12 result=00000000 errno=ERANGE status=ux
+func=powf op1=2d82a6fc op2=4085779e result=00000000 errno=ERANGE status=ux
+func=powf op1=4551f827 op2=41304516 result=7f800000 errno=ERANGE status=ox
+func=powf op1=3a917c51 op2=41726c0a result=00000001.37f errno=0 status=ux
+; iso c allows both errno=ERANGE and errno=0
+;func=powf op1=3b19bbaa op2=4188e6fb result=00000000.b5f errno=0 status=ux
+;func=powf op1=4088bd18 op2=c28ef056 result=00000000.986 errno=0 status=ux
+func=powf op1=3f7ffd76 op2=4a09221e result=00aa9d24.3ad error=0
+
+func=powf op1=007fffff op2=bf000001 result=5f00002c.2b2 error=0
+func=powf op1=000007ff op2=bf000001 result=62000830.96f error=0
+func=powf op1=007fffff op2=80800001 result=3f800000.000 error=0
+func=powf op1=00000000 op2=800007ff result=7f800000 errno=ERANGE status=z
+func=powf op1=00000000 op2=000007ff result=00000000 error=0
+func=powf op1=bf800000 op2=ff7fffff result=3f800000 error=0
+func=powf op1=2e4e4f30 op2=406b0dc2 result=007e9c59.eb4 errno=0 status=u
+
+; SDCOMP-25549: ensure the biggest overflow case possible is not
+; mishandled. Also check the analogous underflow, and also ensure that
+; our massive-overflow checks do not affect numbers _just within_ the
+; range.
+func=powf op1=7f7fffff op2=7f7fffff result=7f800000 error=overflow
+func=powf op1=7f7fffff op2=ff7fffff result=00000000 error=underflow
+func=powf op1=54cb3000 op2=403fffff result=7f7fffb2.a95 error=0
@@ -0,0 +1,51 @@
+; Directed test cases for SP sincos
+;
+; Copyright (c) 2007-2018, Arm Limited.
+; SPDX-License-Identifier: MIT
+
+
+func=sincosf_sinf op1=7fc00001 result=7fc00001 errno=0
+func=sincosf_sinf op1=ffc00001 result=7fc00001 errno=0
+func=sincosf_sinf op1=7f800001 result=7fc00001 errno=0 status=i
+func=sincosf_sinf op1=ff800001 result=7fc00001 errno=0 status=i
+func=sincosf_sinf op1=7f800000 result=7fc00001 errno=EDOM status=i
+func=sincosf_sinf op1=ff800000 result=7fc00001 errno=EDOM status=i
+func=sincosf_sinf op1=00000000 result=00000000 errno=0
+func=sincosf_sinf op1=80000000 result=80000000 errno=0
+func=sincosf_sinf op1=c70d39a1 result=be37fad5.7ed errno=0
+func=sincosf_sinf op1=46427f1b result=3f352d80.f9b error=0
+func=sincosf_sinf op1=4647e568 result=3f352da9.7be error=0
+func=sincosf_sinf op1=46428bac result=bf352dea.924 error=0
+func=sincosf_sinf op1=4647f1f9 result=bf352e13.146 error=0
+func=sincosf_sinf op1=4647fe8a result=3f352e7c.ac9 error=0
+func=sincosf_sinf op1=45d8d7f1 result=3f35097b.cb0 error=0
+func=sincosf_sinf op1=45d371a4 result=bf350990.102 error=0
+func=sincosf_sinf op1=45ce0b57 result=3f3509a4.554 error=0
+func=sincosf_sinf op1=45d35882 result=3f3509f9.bdb error=0
+func=sincosf_sinf op1=45cdf235 result=bf350a0e.02c error=0
+
+func=sincosf_cosf op1=7fc00001 result=7fc00001 errno=0
+func=sincosf_cosf op1=ffc00001 result=7fc00001 errno=0
+func=sincosf_cosf op1=7f800001 result=7fc00001 errno=0 status=i
+func=sincosf_cosf op1=ff800001 result=7fc00001 errno=0 status=i
+func=sincosf_cosf op1=7f800000 result=7fc00001 errno=EDOM status=i
+func=sincosf_cosf op1=ff800000 result=7fc00001 errno=EDOM status=i
+func=sincosf_cosf op1=00000000 result=3f800000 errno=0
+func=sincosf_cosf op1=80000000 result=3f800000 errno=0
+func=sincosf_cosf op1=46427f1b result=3f34dc5c.565 error=0
+func=sincosf_cosf op1=4647e568 result=3f34dc33.c1f error=0
+func=sincosf_cosf op1=46428bac result=bf34dbf2.8e3 error=0
+func=sincosf_cosf op1=4647f1f9 result=bf34dbc9.f9b error=0
+func=sincosf_cosf op1=4647fe8a result=3f34db60.313 error=0
+func=sincosf_cosf op1=45d8d7f1 result=bf35006a.7fd error=0
+func=sincosf_cosf op1=45d371a4 result=3f350056.39b error=0
+func=sincosf_cosf op1=45ce0b57 result=bf350041.f38 error=0
+func=sincosf_cosf op1=45d35882 result=bf34ffec.868 error=0
+func=sincosf_cosf op1=45cdf235 result=3f34ffd8.404 error=0
+
+; no underflow
+func=sincosf_sinf op1=17800000 result=17800000.000
+func=sincosf_cosf op1=17800000 result=3f800000.000
+; underflow
+func=sincosf_sinf op1=00400000 result=00400000.000 status=ux
+func=sincosf_cosf op1=00400000 result=3f800000.000 status=ux
@@ -0,0 +1,28 @@
+; sinf.tst - Directed test cases for SP sine
+;
+; Copyright (c) 2007-2018, Arm Limited.
+; SPDX-License-Identifier: MIT
+
+
+func=sinf op1=7fc00001 result=7fc00001 errno=0
+func=sinf op1=ffc00001 result=7fc00001 errno=0
+func=sinf op1=7f800001 result=7fc00001 errno=0 status=i
+func=sinf op1=ff800001 result=7fc00001 errno=0 status=i
+func=sinf op1=7f800000 result=7fc00001 errno=EDOM status=i
+func=sinf op1=ff800000 result=7fc00001 errno=EDOM status=i
+func=sinf op1=00000000 result=00000000 errno=0
+func=sinf op1=80000000 result=80000000 errno=0
+; Directed test for a failure I found while developing mathbench
+func=sinf op1=c70d39a1 result=be37fad5.7ed errno=0
+; SDCOMP-26094: check sinf in the cases for which the range reducer
+; returns values furthest beyond its nominal upper bound of pi/4.
+func=sinf op1=46427f1b result=3f352d80.f9b error=0
+func=sinf op1=4647e568 result=3f352da9.7be error=0
+func=sinf op1=46428bac result=bf352dea.924 error=0
+func=sinf op1=4647f1f9 result=bf352e13.146 error=0
+func=sinf op1=4647fe8a result=3f352e7c.ac9 error=0
+func=sinf op1=45d8d7f1 result=3f35097b.cb0 error=0
+func=sinf op1=45d371a4 result=bf350990.102 error=0
+func=sinf op1=45ce0b57 result=3f3509a4.554 error=0
+func=sinf op1=45d35882 result=3f3509f9.bdb error=0
+func=sinf op1=45cdf235 result=bf350a0e.02c error=0
@@ -0,0 +1,10 @@
+!! double.tst - Random test case specification for DP functions
+!!
+!! Copyright (c) 1999-2018, Arm Limited.
+!! SPDX-License-Identifier: MIT
+
+test exp 10000
+test exp2 10000
+test log 10000
+test log2 10000
+test pow 40000
@@ -0,0 +1,15 @@
+!! single.tst - Random test case specification for SP functions
+!!
+!! Copyright (c) 1999-2018, Arm Limited.
+!! SPDX-License-Identifier: MIT
+
+test sinf 10000
+test cosf 10000
+test sincosf_sinf 5000
+test sincosf_cosf 5000
+test tanf 10000
+test expf 10000
+test exp2f 10000
+test logf 10000
+test log2f 10000
+test powf 10000
@@ -0,0 +1,851 @@
+/*
+ * ULP error checking tool for math functions.
+ *
+ * Copyright (c) 2019, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include <ctype.h>
+#include <fenv.h>
+#include <float.h>
+#include <math.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "mathlib.h"
+
+/* Don't depend on mpfr by default.  */
+#ifndef USE_MPFR
+# define USE_MPFR 0
+#endif
+#if USE_MPFR
+# include <mpfr.h>
+#endif
+
+#ifndef WANT_VMATH
+/* Enable the build of vector math code.  */
+# define WANT_VMATH 1
+#endif
+
+static inline uint64_t
+asuint64 (double f)
+{
+  union
+  {
+    double f;
+    uint64_t i;
+  } u = {f};
+  return u.i;
+}
+
+static inline double
+asdouble (uint64_t i)
+{
+  union
+  {
+    uint64_t i;
+    double f;
+  } u = {i};
+  return u.f;
+}
+
+static inline uint32_t
+asuint (float f)
+{
+  union
+  {
+    float f;
+    uint32_t i;
+  } u = {f};
+  return u.i;
+}
+
+static inline float
+asfloat (uint32_t i)
+{
+  union
+  {
+    uint32_t i;
+    float f;
+  } u = {i};
+  return u.f;
+}
+
+static uint64_t seed = 0x0123456789abcdef;
+static uint64_t
+rand64 (void)
+{
+  seed = 6364136223846793005ull * seed + 1;
+  return seed ^ (seed >> 32);
+}
+
+/* Uniform random in [0,n].  */
+static uint64_t
+randn (uint64_t n)
+{
+  uint64_t r, m;
+
+  if (n == 0)
+    return 0;
+  n++;
+  if (n == 0)
+    return rand64 ();
+  for (;;)
+    {
+      r = rand64 ();
+      m = r % n;
+      if (r - m <= -n)
+	return m;
+    }
+}
+
+struct gen
+{
+  uint64_t start;
+  uint64_t len;
+  uint64_t start2;
+  uint64_t len2;
+  uint64_t off;
+  uint64_t step;
+  uint64_t cnt;
+};
+
+struct args_f1
+{
+  float x;
+};
+
+struct args_f2
+{
+  float x;
+  float x2;
+};
+
+struct args_d1
+{
+  double x;
+};
+
+struct args_d2
+{
+  double x;
+  double x2;
+};
+
+/* result = y + tail*2^ulpexp.  */
+struct ret_f
+{
+  float y;
+  double tail;
+  int ulpexp;
+  int ex;
+  int ex_may;
+};
+
+struct ret_d
+{
+  double y;
+  double tail;
+  int ulpexp;
+  int ex;
+  int ex_may;
+};
+
+static inline uint64_t
+next1 (struct gen *g)
+{
+  /* For single argument use randomized incremental steps,
+     that produce dense sampling without collisions and allow
+     testing all inputs in a range.  */
+  uint64_t r = g->start + g->off;
+  g->off += g->step + randn (g->step / 2);
+  if (g->off > g->len)
+    g->off -= g->len; /* hack.  */
+  return r;
+}
+
+static inline uint64_t
+next2 (uint64_t *x2, struct gen *g)
+{
+  /* For two arguments use uniform random sampling.  */
+  uint64_t r = g->start + randn (g->len);
+  *x2 = g->start2 + randn (g->len2);
+  return r;
+}
+
+static struct args_f1
+next_f1 (void *g)
+{
+  return (struct args_f1){asfloat (next1 (g))};
+}
+
+static struct args_f2
+next_f2 (void *g)
+{
+  uint64_t x2;
+  uint64_t x = next2 (&x2, g);
+  return (struct args_f2){asfloat (x), asfloat (x2)};
+}
+
+static struct args_d1
+next_d1 (void *g)
+{
+  return (struct args_d1){asdouble (next1 (g))};
+}
+
+static struct args_d2
+next_d2 (void *g)
+{
+  uint64_t x2;
+  uint64_t x = next2 (&x2, g);
+  return (struct args_d2){asdouble (x), asdouble (x2)};
+}
+
+struct conf
+{
+  int r;
+  int rc;
+  int quiet;
+  int mpfr;
+  int fenv;
+  unsigned long long n;
+  double softlim;
+  double errlim;
+};
+
+/* Wrappers for sincos.  */
+static float sincosf_sinf(float x) {(void)cosf(x); return sinf(x);}
+static float sincosf_cosf(float x) {(void)sinf(x); return cosf(x);}
+static double sincos_sin(double x) {(void)cos(x); return sin(x);}
+static double sincos_cos(double x) {(void)sin(x); return cos(x);}
+#if USE_MPFR
+static int sincos_mpfr_sin(mpfr_t y, const mpfr_t x, mpfr_rnd_t r) { mpfr_cos(y,x,r); return mpfr_sin(y,x,r); }
+static int sincos_mpfr_cos(mpfr_t y, const mpfr_t x, mpfr_rnd_t r) { mpfr_sin(y,x,r); return mpfr_cos(y,x,r); }
+#endif
+
+/* A bit of a hack: call vector functions twice with the same
+   input in lane 0 but a different value in other lanes: once
+   with an in-range value and then with a special case value.  */
+static int secondcall;
+
+/* Wrappers for vector functions.  */
+#if __aarch64__ && WANT_VMATH
+typedef __f32x4_t v_float;
+typedef __f64x2_t v_double;
+static const float fv[2] = {1.0f, -INFINITY};
+static const double dv[2] = {1.0, -INFINITY};
+static inline v_float argf(float x) { return (v_float){x,x,x,fv[secondcall]}; }
+static inline v_double argd(double x) { return (v_double){x,dv[secondcall]}; }
+
+static float v_sinf(float x) { return __v_sinf(argf(x))[0]; }
+static float v_cosf(float x) { return __v_cosf(argf(x))[0]; }
+static float v_expf_1u(float x) { return __v_expf_1u(argf(x))[0]; }
+static float v_expf(float x) { return __v_expf(argf(x))[0]; }
+static float v_exp2f_1u(float x) { return __v_exp2f_1u(argf(x))[0]; }
+static float v_exp2f(float x) { return __v_exp2f(argf(x))[0]; }
+static float v_logf(float x) { return __v_logf(argf(x))[0]; }
+static float v_powf(float x, float y) { return __v_powf(argf(x),argf(y))[0]; }
+static double v_sin(double x) { return __v_sin(argd(x))[0]; }
+static double v_cos(double x) { return __v_cos(argd(x))[0]; }
+static double v_exp(double x) { return __v_exp(argd(x))[0]; }
+static double v_log(double x) { return __v_log(argd(x))[0]; }
+static double v_pow(double x, double y) { return __v_pow(argd(x),argd(y))[0]; }
+#ifdef __vpcs
+static float vn_sinf(float x) { return __vn_sinf(argf(x))[0]; }
+static float vn_cosf(float x) { return __vn_cosf(argf(x))[0]; }
+static float vn_expf_1u(float x) { return __vn_expf_1u(argf(x))[0]; }
+static float vn_expf(float x) { return __vn_expf(argf(x))[0]; }
+static float vn_exp2f_1u(float x) { return __vn_exp2f_1u(argf(x))[0]; }
+static float vn_exp2f(float x) { return __vn_exp2f(argf(x))[0]; }
+static float vn_logf(float x) { return __vn_logf(argf(x))[0]; }
+static float vn_powf(float x, float y) { return __vn_powf(argf(x),argf(y))[0]; }
+static double vn_sin(double x) { return __vn_sin(argd(x))[0]; }
+static double vn_cos(double x) { return __vn_cos(argd(x))[0]; }
+static double vn_exp(double x) { return __vn_exp(argd(x))[0]; }
+static double vn_log(double x) { return __vn_log(argd(x))[0]; }
+static double vn_pow(double x, double y) { return __vn_pow(argd(x),argd(y))[0]; }
+static float Z_sinf(float x) { return _ZGVnN4v_sinf(argf(x))[0]; }
+static float Z_cosf(float x) { return _ZGVnN4v_cosf(argf(x))[0]; }
+static float Z_expf(float x) { return _ZGVnN4v_expf(argf(x))[0]; }
+static float Z_exp2f(float x) { return _ZGVnN4v_exp2f(argf(x))[0]; }
+static float Z_logf(float x) { return _ZGVnN4v_logf(argf(x))[0]; }
+static float Z_powf(float x, float y) { return _ZGVnN4vv_powf(argf(x),argf(y))[0]; }
+static double Z_sin(double x) { return _ZGVnN2v_sin(argd(x))[0]; }
+static double Z_cos(double x) { return _ZGVnN2v_cos(argd(x))[0]; }
+static double Z_exp(double x) { return _ZGVnN2v_exp(argd(x))[0]; }
+static double Z_log(double x) { return _ZGVnN2v_log(argd(x))[0]; }
+static double Z_pow(double x, double y) { return _ZGVnN2vv_pow(argd(x),argd(y))[0]; }
+#endif
+#endif
+
+struct fun
+{
+  const char *name;
+  int arity;
+  int singleprec;
+  int twice;
+  union
+  {
+    float (*f1) (float);
+    float (*f2) (float, float);
+    double (*d1) (double);
+    double (*d2) (double, double);
+  } fun;
+  union
+  {
+    double (*f1) (double);
+    double (*f2) (double, double);
+    long double (*d1) (long double);
+    long double (*d2) (long double, long double);
+  } fun_long;
+#if USE_MPFR
+  union
+  {
+    int (*f1) (mpfr_t, const mpfr_t, mpfr_rnd_t);
+    int (*f2) (mpfr_t, const mpfr_t, const mpfr_t, mpfr_rnd_t);
+    int (*d1) (mpfr_t, const mpfr_t, mpfr_rnd_t);
+    int (*d2) (mpfr_t, const mpfr_t, const mpfr_t, mpfr_rnd_t);
+  } fun_mpfr;
+#endif
+};
+
+static const struct fun fun[] = {
+#if USE_MPFR
+# define F(x, x_wrap, x_long, x_mpfr, a, s, t, twice) \
+  {#x, a, s, twice, {.t = x_wrap}, {.t = x_long}, {.t = x_mpfr}},
+#else
+# define F(x, x_wrap, x_long, x_mpfr, a, s, t, twice) \
+  {#x, a, s, twice, {.t = x_wrap}, {.t = x_long}},
+#endif
+#define F1(x) F (x##f, x##f, x, mpfr_##x, 1, 1, f1, 0)
+#define F2(x) F (x##f, x##f, x, mpfr_##x, 2, 1, f2, 0)
+#define D1(x) F (x, x, x##l, mpfr_##x, 1, 0, d1, 0)
+#define D2(x) F (x, x, x##l, mpfr_##x, 2, 0, d2, 0)
+ F1 (sin)
+ F1 (cos)
+ F (sincosf_sinf, sincosf_sinf, sincos_sin, sincos_mpfr_sin, 1, 1, f1, 0)
+ F (sincosf_cosf, sincosf_cosf, sincos_cos, sincos_mpfr_cos, 1, 1, f1, 0)
+ F1 (exp)
+ F1 (exp2)
+ F1 (log)
+ F1 (log2)
+ F2 (pow)
+ D1 (exp)
+ D1 (exp2)
+ D1 (log)
+ D1 (log2)
+ D2 (pow)
+#if WANT_VMATH
+ F (__s_sinf, __s_sinf, sin, mpfr_sin, 1, 1, f1, 0)
+ F (__s_cosf, __s_cosf, cos, mpfr_cos, 1, 1, f1, 0)
+ F (__s_expf_1u, __s_expf_1u, exp, mpfr_exp, 1, 1, f1, 0)
+ F (__s_expf, __s_expf, exp, mpfr_exp, 1, 1, f1, 0)
+ F (__s_exp2f_1u, __s_exp2f_1u, exp2, mpfr_exp2, 1, 1, f1, 0)
+ F (__s_exp2f, __s_exp2f, exp2, mpfr_exp2, 1, 1, f1, 0)
+ F (__s_powf, __s_powf, pow, mpfr_pow, 2, 1, f2, 0)
+ F (__s_logf, __s_logf, log, mpfr_log, 1, 1, f1, 0)
+ F (__s_sin, __s_sin, sinl, mpfr_sin, 1, 0, d1, 0)
+ F (__s_cos, __s_cos, cosl, mpfr_cos, 1, 0, d1, 0)
+ F (__s_exp, __s_exp, expl, mpfr_exp, 1, 0, d1, 0)
+ F (__s_log, __s_log, logl, mpfr_log, 1, 0, d1, 0)
+ F (__s_pow, __s_pow, powl, mpfr_pow, 2, 0, d2, 0)
+#if __aarch64__
+ F (__v_sinf, v_sinf, sin, mpfr_sin, 1, 1, f1, 1)
+ F (__v_cosf, v_cosf, cos, mpfr_cos, 1, 1, f1, 1)
+ F (__v_expf_1u, v_expf_1u, exp, mpfr_exp, 1, 1, f1, 1)
+ F (__v_expf, v_expf, exp, mpfr_exp, 1, 1, f1, 1)
+ F (__v_exp2f_1u, v_exp2f_1u, exp2, mpfr_exp2, 1, 1, f1, 1)
+ F (__v_exp2f, v_exp2f, exp2, mpfr_exp2, 1, 1, f1, 1)
+ F (__v_logf, v_logf, log, mpfr_log, 1, 1, f1, 1)
+ F (__v_powf, v_powf, pow, mpfr_pow, 2, 1, f2, 1)
+ F (__v_sin, v_sin, sinl, mpfr_sin, 1, 0, d1, 1)
+ F (__v_cos, v_cos, cosl, mpfr_cos, 1, 0, d1, 1)
+ F (__v_exp, v_exp, expl, mpfr_exp, 1, 0, d1, 1)
+ F (__v_log, v_log, logl, mpfr_log, 1, 0, d1, 1)
+ F (__v_pow, v_pow, powl, mpfr_pow, 2, 0, d2, 1)
+#ifdef __vpcs
+ F (__vn_sinf, vn_sinf, sin, mpfr_sin, 1, 1, f1, 1)
+ F (__vn_cosf, vn_cosf, cos, mpfr_cos, 1, 1, f1, 1)
+ F (__vn_expf_1u, vn_expf_1u, exp, mpfr_exp, 1, 1, f1, 1)
+ F (__vn_expf, vn_expf, exp, mpfr_exp, 1, 1, f1, 1)
+ F (__vn_exp2f_1u, vn_exp2f_1u, exp2, mpfr_exp2, 1, 1, f1, 1)
+ F (__vn_exp2f, vn_exp2f, exp2, mpfr_exp2, 1, 1, f1, 1)
+ F (__vn_logf, vn_logf, log, mpfr_log, 1, 1, f1, 1)
+ F (__vn_powf, vn_powf, pow, mpfr_pow, 2, 1, f2, 1)
+ F (__vn_sin, vn_sin, sinl, mpfr_sin, 1, 0, d1, 1)
+ F (__vn_cos, vn_cos, cosl, mpfr_cos, 1, 0, d1, 1)
+ F (__vn_exp, vn_exp, expl, mpfr_exp, 1, 0, d1, 1)
+ F (__vn_log, vn_log, logl, mpfr_log, 1, 0, d1, 1)
+ F (__vn_pow, vn_pow, powl, mpfr_pow, 2, 0, d2, 1)
+ F (_ZGVnN4v_sinf, Z_sinf, sin, mpfr_sin, 1, 1, f1, 1)
+ F (_ZGVnN4v_cosf, Z_cosf, cos, mpfr_cos, 1, 1, f1, 1)
+ F (_ZGVnN4v_expf, Z_expf, exp, mpfr_exp, 1, 1, f1, 1)
+ F (_ZGVnN4v_exp2f, Z_exp2f, exp2, mpfr_exp2, 1, 1, f1, 1)
+ F (_ZGVnN4v_logf, Z_logf, log, mpfr_log, 1, 1, f1, 1)
+ F (_ZGVnN4vv_powf, Z_powf, pow, mpfr_pow, 2, 1, f2, 1)
+ F (_ZGVnN2v_sin, Z_sin, sinl, mpfr_sin, 1, 0, d1, 1)
+ F (_ZGVnN2v_cos, Z_cos, cosl, mpfr_cos, 1, 0, d1, 1)
+ F (_ZGVnN2v_exp, Z_exp, expl, mpfr_exp, 1, 0, d1, 1)
+ F (_ZGVnN2v_log, Z_log, logl, mpfr_log, 1, 0, d1, 1)
+ F (_ZGVnN2vv_pow, Z_pow, powl, mpfr_pow, 2, 0, d2, 1)
+#endif
+#endif
+#endif
+#undef F
+#undef F1
+#undef F2
+#undef D1
+#undef D2
+ {0}};
+
+/* Boilerplate for generic calls.  */
+
+static inline int
+ulpscale_f (float x)
+{
+  int e = asuint (x) >> 23 & 0xff;
+  if (!e)
+    e++;
+  return e - 0x7f - 23;
+}
+static inline int
+ulpscale_d (double x)
+{
+  int e = asuint64 (x) >> 52 & 0x7ff;
+  if (!e)
+    e++;
+  return e - 0x3ff - 52;
+}
+static inline float
+call_f1 (const struct fun *f, struct args_f1 a)
+{
+  return f->fun.f1 (a.x);
+}
+static inline float
+call_f2 (const struct fun *f, struct args_f2 a)
+{
+  return f->fun.f2 (a.x, a.x2);
+}
+
+static inline double
+call_d1 (const struct fun *f, struct args_d1 a)
+{
+  return f->fun.d1 (a.x);
+}
+static inline double
+call_d2 (const struct fun *f, struct args_d2 a)
+{
+  return f->fun.d2 (a.x, a.x2);
+}
+static inline double
+call_long_f1 (const struct fun *f, struct args_f1 a)
+{
+  return f->fun_long.f1 (a.x);
+}
+static inline double
+call_long_f2 (const struct fun *f, struct args_f2 a)
+{
+  return f->fun_long.f2 (a.x, a.x2);
+}
+static inline long double
+call_long_d1 (const struct fun *f, struct args_d1 a)
+{
+  return f->fun_long.d1 (a.x);
+}
+static inline long double
+call_long_d2 (const struct fun *f, struct args_d2 a)
+{
+  return f->fun_long.d2 (a.x, a.x2);
+}
+static inline void
+printcall_f1 (const struct fun *f, struct args_f1 a)
+{
+  printf ("%s(%a)", f->name, a.x);
+}
+static inline void
+printcall_f2 (const struct fun *f, struct args_f2 a)
+{
+  printf ("%s(%a, %a)", f->name, a.x, a.x2);
+}
+static inline void
+printcall_d1 (const struct fun *f, struct args_d1 a)
+{
+  printf ("%s(%a)", f->name, a.x);
+}
+static inline void
+printcall_d2 (const struct fun *f, struct args_d2 a)
+{
+  printf ("%s(%a, %a)", f->name, a.x, a.x2);
+}
+static inline void
+printgen_f1 (const struct fun *f, struct gen *gen)
+{
+  printf ("%s in [%a;%a]", f->name, asfloat (gen->start),
+	  asfloat (gen->start + gen->len));
+}
+static inline void
+printgen_f2 (const struct fun *f, struct gen *gen)
+{
+  printf ("%s in [%a;%a] x [%a;%a]", f->name, asfloat (gen->start),
+	  asfloat (gen->start + gen->len), asfloat (gen->start2),
+	  asfloat (gen->start2 + gen->len2));
+}
+static inline void
+printgen_d1 (const struct fun *f, struct gen *gen)
+{
+  printf ("%s in [%a;%a]", f->name, asdouble (gen->start),
+	  asdouble (gen->start + gen->len));
+}
+static inline void
+printgen_d2 (const struct fun *f, struct gen *gen)
+{
+  printf ("%s in [%a;%a] x [%a;%a]", f->name, asdouble (gen->start),
+	  asdouble (gen->start + gen->len), asdouble (gen->start2),
+	  asdouble (gen->start2 + gen->len2));
+}
+
+#define reduce_f1(a, f, op) (f (a.x))
+#define reduce_f2(a, f, op) (f (a.x) op f (a.x2))
+#define reduce_d1(a, f, op) (f (a.x))
+#define reduce_d2(a, f, op) (f (a.x) op f (a.x2))
+
+#ifndef IEEE_754_2008_SNAN
+# define IEEE_754_2008_SNAN 1
+#endif
+static inline int
+issignaling_f (float x)
+{
+  uint32_t ix = asuint (x);
+  if (!IEEE_754_2008_SNAN)
+    return (ix & 0x7fc00000) == 0x7fc00000;
+  return 2 * (ix ^ 0x00400000) > 2u * 0x7fc00000;
+}
+static inline int
+issignaling_d (double x)
+{
+  uint64_t ix = asuint64 (x);
+  if (!IEEE_754_2008_SNAN)
+    return (ix & 0x7ff8000000000000) == 0x7ff8000000000000;
+  return 2 * (ix ^ 0x0008000000000000) > 2 * 0x7ff8000000000000ULL;
+}
+
+#if USE_MPFR
+static mpfr_rnd_t
+rmap (int r)
+{
+  switch (r)
+    {
+    case FE_TONEAREST:
+      return MPFR_RNDN;
+    case FE_TOWARDZERO:
+      return MPFR_RNDZ;
+    case FE_UPWARD:
+      return MPFR_RNDU;
+    case FE_DOWNWARD:
+      return MPFR_RNDD;
+    }
+  return -1;
+}
+
+#define prec_mpfr_f 50
+#define prec_mpfr_d 80
+#define prec_f 24
+#define prec_d 53
+#define emin_f -148
+#define emin_d -1073
+#define emax_f 128
+#define emax_d 1024
+static inline int
+call_mpfr_f1 (mpfr_t y, const struct fun *f, struct args_f1 a, mpfr_rnd_t r)
+{
+  MPFR_DECL_INIT (x, prec_f);
+  mpfr_set_flt (x, a.x, MPFR_RNDN);
+  return f->fun_mpfr.f1 (y, x, r);
+}
+static inline int
+call_mpfr_f2 (mpfr_t y, const struct fun *f, struct args_f2 a, mpfr_rnd_t r)
+{
+  MPFR_DECL_INIT (x, prec_f);
+  MPFR_DECL_INIT (x2, prec_f);
+  mpfr_set_flt (x, a.x, MPFR_RNDN);
+  mpfr_set_flt (x2, a.x2, MPFR_RNDN);
+  return f->fun_mpfr.f2 (y, x, x2, r);
+}
+static inline int
+call_mpfr_d1 (mpfr_t y, const struct fun *f, struct args_d1 a, mpfr_rnd_t r)
+{
+  MPFR_DECL_INIT (x, prec_d);
+  mpfr_set_d (x, a.x, MPFR_RNDN);
+  return f->fun_mpfr.d1 (y, x, r);
+}
+static inline int
+call_mpfr_d2 (mpfr_t y, const struct fun *f, struct args_d2 a, mpfr_rnd_t r)
+{
+  MPFR_DECL_INIT (x, prec_d);
+  MPFR_DECL_INIT (x2, prec_d);
+  mpfr_set_d (x, a.x, MPFR_RNDN);
+  mpfr_set_d (x2, a.x2, MPFR_RNDN);
+  return f->fun_mpfr.d2 (y, x, x2, r);
+}
+#endif
+
+#define float_f float
+#define double_f double
+#define copysign_f copysignf
+#define nextafter_f nextafterf
+#define fabs_f fabsf
+#define asuint_f asuint
+#define asfloat_f asfloat
+#define scalbn_f scalbnf
+#define lscalbn_f scalbn
+#define halfinf_f 0x1p127f
+#define min_normal_f 0x1p-126f
+
+#define float_d double
+#define double_d long double
+#define copysign_d copysign
+#define nextafter_d nextafter
+#define fabs_d fabs
+#define asuint_d asuint64
+#define asfloat_d asdouble
+#define scalbn_d scalbn
+#define lscalbn_d scalbnl
+#define halfinf_d 0x1p1023
+#define min_normal_d 0x1p-1022
+
+#define NEW_RT
+#define RT(x) x##_f
+#define T(x) x##_f1
+#include "ulp.h"
+#undef T
+#define T(x) x##_f2
+#include "ulp.h"
+#undef T
+#undef RT
+
+#define NEW_RT
+#define RT(x) x##_d
+#define T(x) x##_d1
+#include "ulp.h"
+#undef T
+#define T(x) x##_d2
+#include "ulp.h"
+#undef T
+#undef RT
+
+static void
+usage (void)
+{
+  puts ("./ulp [-q] [-m] [-f] [-r nudz] [-l soft-ulplimit] [-e ulplimit] func "
+	"lo [hi [x lo2 hi2] [count]]");
+  puts ("Compares func against a higher precision implementation in [lo; hi].");
+  puts ("-q: quiet.");
+  puts ("-m: use mpfr even if faster method is available.");
+  puts ("-f: disable fenv testing (rounding modes and exceptions).");
+  puts ("Supported func:");
+  for (const struct fun *f = fun; f->name; f++)
+    printf ("\t%s\n", f->name);
+  exit (1);
+}
+
+static int
+cmp (const struct fun *f, struct gen *gen, const struct conf *conf)
+{
+  int r = 1;
+  if (f->arity == 1 && f->singleprec)
+    r = cmp_f1 (f, gen, conf);
+  else if (f->arity == 2 && f->singleprec)
+    r = cmp_f2 (f, gen, conf);
+  else if (f->arity == 1 && !f->singleprec)
+    r = cmp_d1 (f, gen, conf);
+  else if (f->arity == 2 && !f->singleprec)
+    r = cmp_d2 (f, gen, conf);
+  else
+    usage ();
+  return r;
+}
+
+static uint64_t
+getnum (const char *s, int singleprec)
+{
+  //	int i;
+  uint64_t sign = 0;
+  //	char buf[12];
+
+  if (s[0] == '+')
+    s++;
+  else if (s[0] == '-')
+    {
+      sign = singleprec ? 1ULL << 31 : 1ULL << 63;
+      s++;
+    }
+  /* 0xXXXX is treated as bit representation, '-' flips the sign bit.  */
+  if (s[0] == '0' && tolower (s[1]) == 'x' && strchr (s, 'p') == 0)
+    return sign ^ strtoull (s, 0, 0);
+  //	/* SNaN, QNaN, NaN, Inf.  */
+  //	for (i=0; s[i] && i < sizeof buf; i++)
+  //		buf[i] = tolower(s[i]);
+  //	buf[i] = 0;
+  //	if (strcmp(buf, "snan") == 0)
+  //		return sign | (singleprec ? 0x7fa00000 : 0x7ff4000000000000);
+  //	if (strcmp(buf, "qnan") == 0 || strcmp(buf, "nan") == 0)
+  //		return sign | (singleprec ? 0x7fc00000 : 0x7ff8000000000000);
+  //	if (strcmp(buf, "inf") == 0 || strcmp(buf, "infinity") == 0)
+  //		return sign | (singleprec ? 0x7f800000 : 0x7ff0000000000000);
+  /* Otherwise assume it's a floating-point literal.  */
+  return sign
+	 | (singleprec ? asuint (strtof (s, 0)) : asuint64 (strtod (s, 0)));
+}
+
+static void
+parsegen (struct gen *g, int argc, char *argv[], const struct fun *f)
+{
+  int singleprec = f->singleprec;
+  int arity = f->arity;
+  uint64_t a, b, a2, b2, n;
+  if (argc < 1)
+    usage ();
+  b = a = getnum (argv[0], singleprec);
+  n = 0;
+  if (argc > 1 && strcmp (argv[1], "x") == 0)
+    {
+      argc -= 2;
+      argv += 2;
+    }
+  else if (argc > 1)
+    {
+      b = getnum (argv[1], singleprec);
+      if (argc > 2 && strcmp (argv[2], "x") == 0)
+	{
+	  argc -= 3;
+	  argv += 3;
+	}
+    }
+  b2 = a2 = getnum (argv[0], singleprec);
+  if (argc > 1)
+    b2 = getnum (argv[1], singleprec);
+  if (argc > 2)
+    n = strtoull (argv[2], 0, 0);
+  if (argc > 3)
+    usage ();
+  //printf("ab %lx %lx ab2 %lx %lx n %lu\n", a, b, a2, b2, n);
+  if (arity == 1)
+    {
+      g->start = a;
+      g->len = b - a;
+      if (n - 1 > b - a)
+	n = b - a + 1;
+      g->off = 0;
+      g->step = n ? (g->len + 1) / n : 1;
+      g->start2 = g->len2 = 0;
+      g->cnt = n;
+    }
+  else if (arity == 2)
+    {
+      g->start = a;
+      g->len = b - a;
+      g->off = g->step = 0;
+      g->start2 = a2;
+      g->len2 = b2 - a2;
+      g->cnt = n;
+    }
+  else
+    usage ();
+}
+
+int
+main (int argc, char *argv[])
+{
+  const struct fun *f;
+  struct gen gen;
+  struct conf conf;
+  conf.rc = 'n';
+  conf.quiet = 0;
+  conf.mpfr = 0;
+  conf.fenv = 1;
+  conf.softlim = 0;
+  conf.errlim = INFINITY;
+  for (;;)
+    {
+      argc--;
+      argv++;
+      if (argc < 1)
+	usage ();
+      if (argv[0][0] != '-')
+	break;
+      switch (argv[0][1])
+	{
+	case 'e':
+	  argc--;
+	  argv++;
+	  if (argc < 1)
+	    usage ();
+	  conf.errlim = strtod (argv[0], 0);
+	  break;
+	case 'f':
+	  conf.fenv = 0;
+	  break;
+	case 'l':
+	  argc--;
+	  argv++;
+	  if (argc < 1)
+	    usage ();
+	  conf.softlim = strtod (argv[0], 0);
+	  break;
+	case 'm':
+	  conf.mpfr = 1;
+	  break;
+	case 'q':
+	  conf.quiet = 1;
+	  break;
+	case 'r':
+	  conf.rc = argv[0][2];
+	  if (!conf.rc)
+	    {
+	      argc--;
+	      argv++;
+	      if (argc < 1)
+		usage ();
+	      conf.rc = argv[0][0];
+	    }
+	  break;
+	default:
+	  usage ();
+	}
+    }
+  switch (conf.rc)
+    {
+    case 'n':
+      conf.r = FE_TONEAREST;
+      break;
+    case 'u':
+      conf.r = FE_UPWARD;
+      break;
+    case 'd':
+      conf.r = FE_DOWNWARD;
+      break;
+    case 'z':
+      conf.r = FE_TOWARDZERO;
+      break;
+    default:
+      usage ();
+    }
+  for (f = fun; f->name; f++)
+    if (strcmp (argv[0], f->name) == 0)
+      break;
+  if (!f->name)
+    usage ();
+  if (!f->singleprec && LDBL_MANT_DIG == DBL_MANT_DIG)
+    conf.mpfr = 1; /* Use mpfr if long double has no extra precision.  */
+  if (!USE_MPFR && conf.mpfr)
+    {
+      puts ("mpfr is not available.");
+      return 0;
+    }
+  argc--;
+  argv++;
+  parsegen (&gen, argc, argv, f);
+  conf.n = gen.cnt;
+  return cmp (f, &gen, &conf);
+}
@@ -0,0 +1,362 @@
+/*
+ * Generic functions for ULP error estimation.
+ *
+ * Copyright (c) 2019, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+/* For each different math function type,
+   T(x) should add a different suffix to x.
+   RT(x) should add a return type specific suffix to x. */
+
+#ifdef NEW_RT
+#undef NEW_RT
+
+# if USE_MPFR
+static int RT(ulpscale_mpfr) (mpfr_t x, int t)
+{
+  /* TODO: pow of 2 cases.  */
+  if (mpfr_regular_p (x))
+    {
+      mpfr_exp_t e = mpfr_get_exp (x) - RT(prec);
+      if (e < RT(emin))
+	e = RT(emin) - 1;
+      if (e > RT(emax) - RT(prec))
+	e = RT(emax) - RT(prec);
+      return e;
+    }
+  if (mpfr_zero_p (x))
+    return RT(emin) - 1;
+  if (mpfr_inf_p (x))
+    return RT(emax) - RT(prec);
+  /* NaN.  */
+  return 0;
+}
+# endif
+
+/* Difference between exact result and closest real number that
+   gets rounded to got, i.e. error before rounding, for a correctly
+   rounded result the difference is 0.  */
+static double RT(ulperr) (RT(float) got, const struct RT(ret) * p, int r)
+{
+  RT(float) want = p->y;
+  RT(float) d;
+  double e;
+
+  if (RT(asuint) (got) == RT(asuint) (want))
+    return 0.0;
+  if (signbit (got) != signbit (want))
+    /* May have false positives with NaN.  */
+    //return isnan(got) && isnan(want) ? 0 : INFINITY;
+    return INFINITY;
+  if (!isfinite (want) || !isfinite (got))
+    {
+      if (isnan (got) != isnan (want))
+	return INFINITY;
+      if (isnan (want))
+	return 0;
+      if (isinf (got))
+	{
+	  got = RT(copysign) (RT(halfinf), got);
+	  want *= 0.5f;
+	}
+      if (isinf (want))
+	{
+	  want = RT(copysign) (RT(halfinf), want);
+	  got *= 0.5f;
+	}
+    }
+  if (r == FE_TONEAREST)
+    {
+      // TODO: incorrect when got vs want cross a powof2 boundary
+      /* error = got > want
+	      ? got - want - tail ulp - 0.5 ulp
+	      : got - want - tail ulp + 0.5 ulp;  */
+      d = got - want;
+      e = d > 0 ? -p->tail - 0.5 : -p->tail + 0.5;
+    }
+  else
+    {
+      if ((r == FE_DOWNWARD && got < want) || (r == FE_UPWARD && got > want)
+	  || (r == FE_TOWARDZERO && fabs (got) < fabs (want)))
+	got = RT(nextafter) (got, want);
+      d = got - want;
+      e = -p->tail;
+    }
+  return RT(scalbn) (d, -p->ulpexp) + e;
+}
+
+static int RT(isok) (RT(float) ygot, int exgot, RT(float) ywant, int exwant,
+		      int exmay)
+{
+  return RT(asuint) (ygot) == RT(asuint) (ywant)
+	 && ((exgot ^ exwant) & ~exmay) == 0;
+}
+
+static int RT(isok_nofenv) (RT(float) ygot, RT(float) ywant)
+{
+  return RT(asuint) (ygot) == RT(asuint) (ywant);
+}
+#endif
+
+static inline void T(call_fenv) (const struct fun *f, struct T(args) a, int r,
+				  RT(float) * y, int *ex)
+{
+  if (r != FE_TONEAREST)
+    fesetround (r);
+  feclearexcept (FE_ALL_EXCEPT);
+  *y = T(call) (f, a);
+  *ex = fetestexcept (FE_ALL_EXCEPT);
+  if (r != FE_TONEAREST)
+    fesetround (FE_TONEAREST);
+}
+
+static inline void T(call_nofenv) (const struct fun *f, struct T(args) a,
+				    int r, RT(float) * y, int *ex)
+{
+  *y = T(call) (f, a);
+  *ex = 0;
+}
+
+static inline int T(call_long_fenv) (const struct fun *f, struct T(args) a,
+				      int r, struct RT(ret) * p,
+				      RT(float) ygot, int exgot)
+{
+  if (r != FE_TONEAREST)
+    fesetround (r);
+  feclearexcept (FE_ALL_EXCEPT);
+  volatile struct T(args) va = a; // TODO: barrier
+  a = va;
+  RT(double) yl = T(call_long) (f, a);
+  p->y = (RT(float)) yl;
+  volatile RT(float) vy = p->y; // TODO: barrier
+  (void) vy;
+  p->ex = fetestexcept (FE_ALL_EXCEPT);
+  if (r != FE_TONEAREST)
+    fesetround (FE_TONEAREST);
+  p->ex_may = FE_INEXACT;
+  if (RT(isok) (ygot, exgot, p->y, p->ex, p->ex_may))
+    return 1;
+  p->ulpexp = RT(ulpscale) (p->y);
+  if (isinf (p->y))
+    p->tail = RT(lscalbn) (yl - (RT(double)) 2 * RT(halfinf), -p->ulpexp);
+  else
+    p->tail = RT(lscalbn) (yl - p->y, -p->ulpexp);
+  if (RT(fabs) (p->y) < RT(min_normal))
+    {
+      /* TODO: subnormal result is treated as undeflow even if it's
+	 exact since call_long may not raise inexact correctly.  */
+      if (p->y != 0 || (p->ex & FE_INEXACT))
+	p->ex |= FE_UNDERFLOW | FE_INEXACT;
+    }
+  return 0;
+}
+static inline int T(call_long_nofenv) (const struct fun *f, struct T(args) a,
+					int r, struct RT(ret) * p,
+					RT(float) ygot, int exgot)
+{
+  RT(double) yl = T(call_long) (f, a);
+  p->y = (RT(float)) yl;
+  if (RT(isok_nofenv) (ygot, p->y))
+    return 1;
+  p->ulpexp = RT(ulpscale) (p->y);
+  if (isinf (p->y))
+    p->tail = RT(lscalbn) (yl - (RT(double)) 2 * RT(halfinf), -p->ulpexp);
+  else
+    p->tail = RT(lscalbn) (yl - p->y, -p->ulpexp);
+  return 0;
+}
+
+/* There are nan input args and all quiet.  */
+static inline int T(qnanpropagation) (struct T(args) a)
+{
+  return T(reduce) (a, isnan, ||) && !T(reduce) (a, RT(issignaling), ||);
+}
+static inline RT(float) T(sum) (struct T(args) a)
+{
+  return T(reduce) (a, , +);
+}
+
+/* returns 1 if the got result is ok.  */
+static inline int T(call_mpfr_fix) (const struct fun *f, struct T(args) a,
+				     int r_fenv, struct RT(ret) * p,
+				     RT(float) ygot, int exgot)
+{
+#if USE_MPFR
+  int t, t2;
+  mpfr_rnd_t r = rmap (r_fenv);
+  MPFR_DECL_INIT(my, RT(prec_mpfr));
+  MPFR_DECL_INIT(mr, RT(prec));
+  MPFR_DECL_INIT(me, RT(prec_mpfr));
+  mpfr_clear_flags ();
+  t = T(call_mpfr) (my, f, a, r);
+  /* Double rounding.  */
+  t2 = mpfr_set (mr, my, r);
+  if (t2)
+    t = t2;
+  mpfr_set_emin (RT(emin));
+  mpfr_set_emax (RT(emax));
+  t = mpfr_check_range (mr, t, r);
+  t = mpfr_subnormalize (mr, t, r);
+  mpfr_set_emax (MPFR_EMAX_DEFAULT);
+  mpfr_set_emin (MPFR_EMIN_DEFAULT);
+  p->y = mpfr_get_d (mr, r);
+  p->ex = t ? FE_INEXACT : 0;
+  p->ex_may = FE_INEXACT;
+  if (mpfr_underflow_p () && (p->ex & FE_INEXACT))
+    /* TODO: handle before and after rounding uflow cases.  */
+    p->ex |= FE_UNDERFLOW;
+  if (mpfr_overflow_p ())
+    p->ex |= FE_OVERFLOW | FE_INEXACT;
+  if (mpfr_divby0_p ())
+    p->ex |= FE_DIVBYZERO;
+  //if (mpfr_erangeflag_p ())
+  //  p->ex |= FE_INVALID;
+  if (!mpfr_nanflag_p () && RT(isok) (ygot, exgot, p->y, p->ex, p->ex_may))
+    return 1;
+  if (mpfr_nanflag_p () && !T(qnanpropagation) (a))
+    p->ex |= FE_INVALID;
+  p->ulpexp = RT(ulpscale_mpfr) (my, t);
+  if (!isfinite (p->y))
+    {
+      p->tail = 0;
+      if (isnan (p->y))
+	{
+	  /* If an input was nan keep its sign.  */
+	  p->y = T(sum) (a);
+	  if (!isnan (p->y))
+	    p->y = (p->y - p->y) / (p->y - p->y);
+	  return RT(isok) (ygot, exgot, p->y, p->ex, p->ex_may);
+	}
+      mpfr_set_si_2exp (mr, signbit (p->y) ? -1 : 1, 1024, MPFR_RNDN);
+      if (mpfr_cmpabs (my, mr) >= 0)
+	return RT(isok) (ygot, exgot, p->y, p->ex, p->ex_may);
+    }
+  mpfr_sub (me, my, mr, MPFR_RNDN);
+  mpfr_mul_2si (me, me, -p->ulpexp, MPFR_RNDN);
+  p->tail = mpfr_get_d (me, MPFR_RNDN);
+  return 0;
+#else
+  abort ();
+#endif
+}
+
+static int T(cmp) (const struct fun *f, struct gen *gen,
+		     const struct conf *conf)
+{
+  double maxerr = 0;
+  uint64_t cnt = 0;
+  uint64_t cnt1 = 0;
+  uint64_t cnt2 = 0;
+  uint64_t cntfail = 0;
+  int r = conf->r;
+  int use_mpfr = conf->mpfr;
+  int fenv = conf->fenv;
+  for (;;)
+    {
+      struct RT(ret) want;
+      struct T(args) a = T(next) (gen);
+      int exgot;
+      int exgot2;
+      RT(float) ygot;
+      RT(float) ygot2;
+      int fail = 0;
+      if (fenv)
+	T(call_fenv) (f, a, r, &ygot, &exgot);
+      else
+	T(call_nofenv) (f, a, r, &ygot, &exgot);
+      if (f->twice) {
+	secondcall = 1;
+	if (fenv)
+	  T(call_fenv) (f, a, r, &ygot2, &exgot2);
+	else
+	  T(call_nofenv) (f, a, r, &ygot2, &exgot2);
+	secondcall = 0;
+	if (RT(asuint) (ygot) != RT(asuint) (ygot2))
+	  {
+	    fail = 1;
+	    cntfail++;
+	    T(printcall) (f, a);
+	    printf (" got %a then %a for same input\n", ygot, ygot2);
+	  }
+      }
+      cnt++;
+      int ok = use_mpfr
+		 ? T(call_mpfr_fix) (f, a, r, &want, ygot, exgot)
+		 : (fenv ? T(call_long_fenv) (f, a, r, &want, ygot, exgot)
+			 : T(call_long_nofenv) (f, a, r, &want, ygot, exgot));
+      if (!ok)
+	{
+	  int print = 0;
+	  double err = RT(ulperr) (ygot, &want, r);
+	  double abserr = fabs (err);
+	  // TODO: count errors below accuracy limit.
+	  if (abserr > 0)
+	    cnt1++;
+	  if (abserr > 1)
+	    cnt2++;
+	  if (abserr > conf->errlim)
+	    {
+	      print = 1;
+	      if (!fail)
+		{
+		  fail = 1;
+		  cntfail++;
+		}
+	    }
+	  if (abserr > maxerr)
+	    {
+	      maxerr = abserr;
+	      if (!conf->quiet && abserr > conf->softlim)
+		print = 1;
+	    }
+	  if (print)
+	    {
+	      T(printcall) (f, a);
+	      // TODO: inf ulp handling
+	      printf (" got %a want %a %+g ulp err %g\n", ygot, want.y,
+		      want.tail, err);
+	    }
+	  int diff = fenv ? exgot ^ want.ex : 0;
+	  if (fenv && (diff & ~want.ex_may))
+	    {
+	      if (!fail)
+		{
+		  fail = 1;
+		  cntfail++;
+		}
+	      T(printcall) (f, a);
+	      printf (" is %a %+g ulp, got except 0x%0x", want.y, want.tail,
+		      exgot);
+	      if (diff & exgot)
+		printf (" wrongly set: 0x%x", diff & exgot);
+	      if (diff & ~exgot)
+		printf (" wrongly clear: 0x%x", diff & ~exgot);
+	      putchar ('\n');
+	    }
+	}
+      if (cnt >= conf->n)
+	break;
+      if (!conf->quiet && cnt % 0x100000 == 0)
+	printf ("progress: %6.3f%% cnt %llu cnt1 %llu cnt2 %llu cntfail %llu "
+		"maxerr %g\n",
+		100.0 * cnt / conf->n, (unsigned long long) cnt,
+		(unsigned long long) cnt1, (unsigned long long) cnt2,
+		(unsigned long long) cntfail, maxerr);
+    }
+  double cc = cnt;
+  if (cntfail)
+    printf ("FAIL ");
+  else
+    printf ("PASS ");
+  T(printgen) (f, gen);
+  printf (" round %c errlim %g maxerr %g %s cnt %llu cnt1 %llu %g%% cnt2 %llu "
+	  "%g%% cntfail %llu %g%%\n",
+	  conf->rc, conf->errlim,
+	  maxerr, conf->r == FE_TONEAREST ? "+0.5" : "+1.0",
+	  (unsigned long long) cnt,
+	  (unsigned long long) cnt1, 100.0 * cnt1 / cc,
+	  (unsigned long long) cnt2, 100.0 * cnt2 / cc,
+	  (unsigned long long) cntfail, 100.0 * cntfail / cc);
+  return !!cntfail;
+}
@@ -0,0 +1,31 @@
+// polynomial for approximating cos(x)
+//
+// Copyright (c) 2019, Arm Limited.
+// SPDX-License-Identifier: MIT
+
+deg = 8;   // polynomial degree
+a = -pi/4; // interval
+b = pi/4;
+
+// find even polynomial with minimal abs error compared to cos(x)
+
+f = cos(x);
+
+// return p that minimizes |f(x) - poly(x) - x^d*p(x)|
+approx = proc(poly,d) {
+  return remez(f(x)-poly(x), deg-d, [a;b], x^d, 1e-10);
+};
+
+// first coeff is fixed, iteratively find optimal double prec coeffs
+poly = 1;
+for i from 1 to deg/2 do {
+  p = roundcoefficients(approx(poly,2*i), [|D ...|]);
+  poly = poly + x^(2*i)*coeff(p,0);
+};
+
+display = hexadecimal;
+print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
+print("abs error:", accurateinfnorm(f(x)-poly(x), [a;b], 30));
+print("in [",a,b,"]");
+print("coeffs:");
+for i from 0 to deg do coeff(poly,i);
@@ -0,0 +1,35 @@
+// polynomial for approximating e^x
+//
+// Copyright (c) 2019, Arm Limited.
+// SPDX-License-Identifier: MIT
+
+deg = 5; // poly degree
+N = 128; // table entries
+b = log(2)/(2*N);  // interval
+b = b + b*0x1p-16; // increase interval for non-nearest rounding (TOINT_NARROW)
+a = -b;
+
+// find polynomial with minimal abs error
+
+// return p that minimizes |exp(x) - poly(x) - x^d*p(x)|
+approx = proc(poly,d) {
+  return remez(exp(x)-poly(x), deg-d, [a;b], x^d, 1e-10);
+};
+
+// first 2 coeffs are fixed, iteratively find optimal double prec coeffs
+poly = 1 + x;
+for i from 2 to deg do {
+  p = roundcoefficients(approx(poly,i), [|D ...|]);
+  poly = poly + x^i*coeff(p,0);
+};
+
+display = hexadecimal;
+print("rel error:", accurateinfnorm(1-poly(x)/exp(x), [a;b], 30));
+print("abs error:", accurateinfnorm(exp(x)-poly(x), [a;b], 30));
+print("in [",a,b,"]");
+// double interval error for non-nearest rounding
+print("rel2 error:", accurateinfnorm(1-poly(x)/exp(x), [2*a;2*b], 30));
+print("abs2 error:", accurateinfnorm(exp(x)-poly(x), [2*a;2*b], 30));
+print("in [",2*a,2*b,"]");
+print("coeffs:");
+for i from 0 to deg do coeff(poly,i);
@@ -0,0 +1,48 @@
+// polynomial for approximating 2^x
+//
+// Copyright (c) 2019, Arm Limited.
+// SPDX-License-Identifier: MIT
+
+// exp2f parameters
+deg = 3; // poly degree
+N = 32;  // table entries
+b = 1/(2*N); // interval
+a = -b;
+
+//// exp2 parameters
+//deg = 5; // poly degree
+//N = 128; // table entries
+//b = 1/(2*N); // interval
+//a = -b;
+
+// find polynomial with minimal relative error
+
+f = 2^x;
+
+// return p that minimizes |f(x) - poly(x) - x^d*p(x)|/|f(x)|
+approx = proc(poly,d) {
+  return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10);
+};
+// return p that minimizes |f(x) - poly(x) - x^d*p(x)|
+approx_abs = proc(poly,d) {
+  return remez(f(x) - poly(x), deg-d, [a;b], x^d, 1e-10);
+};
+
+// first coeff is fixed, iteratively find optimal double prec coeffs
+poly = 1;
+for i from 1 to deg do {
+  p = roundcoefficients(approx(poly,i), [|D ...|]);
+//  p = roundcoefficients(approx_abs(poly,i), [|D ...|]);
+  poly = poly + x^i*coeff(p,0);
+};
+
+display = hexadecimal;
+print("rel error:", accurateinfnorm(1-poly(x)/2^x, [a;b], 30));
+print("abs error:", accurateinfnorm(2^x-poly(x), [a;b], 30));
+print("in [",a,b,"]");
+// double interval error for non-nearest rounding:
+print("rel2 error:", accurateinfnorm(1-poly(x)/2^x, [2*a;2*b], 30));
+print("abs2 error:", accurateinfnorm(2^x-poly(x), [2*a;2*b], 30));
+print("in [",2*a,2*b,"]");
+print("coeffs:");
+for i from 0 to deg do coeff(poly,i);
@@ -0,0 +1,35 @@
+// polynomial for approximating log(1+x)
+//
+// Copyright (c) 2019, Arm Limited.
+// SPDX-License-Identifier: MIT
+
+deg = 12; // poly degree
+// |log(1+x)| > 0x1p-4 outside the interval
+a = -0x1p-4;
+b =  0x1.09p-4;
+
+// find log(1+x)/x polynomial with minimal relative error
+// (minimal relative error polynomial for log(1+x) is the same * x)
+deg = deg-1; // because of /x
+
+// f = log(1+x)/x; using taylor series
+f = 0;
+for i from 0 to 60 do { f = f + (-x)^i/(i+1); };
+
+// return p that minimizes |f(x) - poly(x) - x^d*p(x)|/|f(x)|
+approx = proc(poly,d) {
+  return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10);
+};
+
+// first coeff is fixed, iteratively find optimal double prec coeffs
+poly = 1;
+for i from 1 to deg do {
+  p = roundcoefficients(approx(poly,i), [|D ...|]);
+  poly = poly + x^i*coeff(p,0);
+};
+
+display = hexadecimal;
+print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
+print("in [",a,b,"]");
+print("coeffs:");
+for i from 0 to deg do coeff(poly,i);
@@ -0,0 +1,42 @@
+// polynomial for approximating log2(1+x)
+//
+// Copyright (c) 2019, Arm Limited.
+// SPDX-License-Identifier: MIT
+
+deg = 11; // poly degree
+// |log2(1+x)| > 0x1p-4 outside the interval
+a = -0x1.5b51p-5;
+b =  0x1.6ab2p-5;
+
+ln2 = evaluate(log(2),0);
+invln2hi = double(1/ln2 + 0x1p21) - 0x1p21; // round away last 21 bits
+invln2lo = double(1/ln2 - invln2hi);
+
+// find log2(1+x)/x polynomial with minimal relative error
+// (minimal relative error polynomial for log2(1+x) is the same * x)
+deg = deg-1; // because of /x
+
+// f = log(1+x)/x; using taylor series
+f = 0;
+for i from 0 to 60 do { f = f + (-x)^i/(i+1); };
+f = f/ln2;
+
+// return p that minimizes |f(x) - poly(x) - x^d*p(x)|/|f(x)|
+approx = proc(poly,d) {
+  return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10);
+};
+
+// first coeff is fixed, iteratively find optimal double prec coeffs
+poly = invln2hi + invln2lo;
+for i from 1 to deg do {
+  p = roundcoefficients(approx(poly,i), [|D ...|]);
+  poly = poly + x^i*coeff(p,0);
+};
+
+display = hexadecimal;
+print("invln2hi:", invln2hi);
+print("invln2lo:", invln2lo);
+print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
+print("in [",a,b,"]");
+print("coeffs:");
+for i from 0 to deg do coeff(poly,i);
@@ -0,0 +1,41 @@
+// polynomial for approximating log2(1+x)
+//
+// Copyright (c) 2019, Arm Limited.
+// SPDX-License-Identifier: MIT
+
+deg = 7; // poly degree
+// interval ~= 1/(2*N), where N is the table entries
+a= -0x1.f45p-8;
+b=  0x1.f45p-8;
+
+ln2 = evaluate(log(2),0);
+invln2hi = double(1/ln2 + 0x1p21) - 0x1p21; // round away last 21 bits
+invln2lo = double(1/ln2 - invln2hi);
+
+// find log2(1+x) polynomial with minimal absolute error
+f = log(1+x)/ln2;
+
+// return p that minimizes |f(x) - poly(x) - x^d*p(x)|
+approx = proc(poly,d) {
+  return remez(f(x) - poly(x), deg-d, [a;b], x^d, 1e-10);
+};
+
+// first coeff is fixed, iteratively find optimal double prec coeffs
+poly = x*(invln2lo + invln2hi);
+for i from 2 to deg do {
+  p = roundcoefficients(approx(poly,i), [|D ...|]);
+  poly = poly + x^i*coeff(p,0);
+};
+
+display = hexadecimal;
+print("invln2hi:", invln2hi);
+print("invln2lo:", invln2lo);
+print("abs error:", accurateinfnorm(f(x)-poly(x), [a;b], 30));
+//// relative error computation fails if f(0)==0
+//// g = f(x)/x = log2(1+x)/x; using taylor series
+//g = 0;
+//for i from 0 to 60 do { g = g + (-x)^i/(i+1)/ln2; };
+//print("rel error:", accurateinfnorm(1-(poly(x)/x)/g(x), [a;b], 30));
+print("in [",a,b,"]");
+print("coeffs:");
+for i from 0 to deg do coeff(poly,i);
@@ -0,0 +1,35 @@
+// polynomial for approximating log(1+x)
+//
+// Copyright (c) 2019, Arm Limited.
+// SPDX-License-Identifier: MIT
+
+deg = 6; // poly degree
+// interval ~= 1/(2*N), where N is the table entries
+a = -0x1.fp-9;
+b =  0x1.fp-9;
+
+// find log(1+x) polynomial with minimal absolute error
+f = log(1+x);
+
+// return p that minimizes |f(x) - poly(x) - x^d*p(x)|
+approx = proc(poly,d) {
+  return remez(f(x) - poly(x), deg-d, [a;b], x^d, 1e-10);
+};
+
+// first coeff is fixed, iteratively find optimal double prec coeffs
+poly = x;
+for i from 2 to deg do {
+  p = roundcoefficients(approx(poly,i), [|D ...|]);
+  poly = poly + x^i*coeff(p,0);
+};
+
+display = hexadecimal;
+print("abs error:", accurateinfnorm(f(x)-poly(x), [a;b], 30));
+// relative error computation fails if f(0)==0
+// g = f(x)/x = log(1+x)/x; using taylor series
+g = 0;
+for i from 0 to 60 do { g = g + (-x)^i/(i+1); };
+print("rel error:", accurateinfnorm(1-poly(x)/x/g(x), [a;b], 30));
+print("in [",a,b,"]");
+print("coeffs:");
+for i from 0 to deg do coeff(poly,i);
@@ -0,0 +1,61 @@
+#!/usr/bin/python
+
+# ULP error plot tool.
+#
+# Copyright (c) 2019, Arm Limited.
+# SPDX-License-Identifier: MIT
+
+import numpy as np
+import matplotlib.pyplot as plt
+import sys
+import re
+
+# example usage:
+# build/bin/ulp -e .0001 log 0.5 2.0 2345678 | math/tools/plot.py
+
+def fhex(s):
+	return float.fromhex(s)
+
+def parse(f):
+	xs = []
+	gs = []
+	ys = []
+	es = []
+	# Has to match the format used in ulp.c
+	r = re.compile(r'[^ (]+\(([^ )]*)\) got ([^ ]+) want ([^ ]+) [^ ]+ ulp err ([^ ]+)')
+	for line in f:
+		m = r.match(line)
+		if m:
+			x = fhex(m.group(1))
+			g = fhex(m.group(2))
+			y = fhex(m.group(3))
+			e = float(m.group(4))
+			xs.append(x)
+			gs.append(g)
+			ys.append(y)
+			es.append(e)
+		elif line.startswith('PASS') or line.startswith('FAIL'):
+			# Print the summary line
+			print(line)
+	return xs, gs, ys, es
+
+def plot(xs, gs, ys, es):
+	if len(xs) < 2:
+		print('not enough samples')
+		return
+	a = min(xs)
+	b = max(xs)
+	fig, (ax0,ax1) = plt.subplots(nrows=2)
+	es = np.abs(es) # ignore the sign
+	emax = max(es)
+	ax0.text(a+(b-a)*0.7, emax*0.8, '%s\n%g'%(emax.hex(),emax))
+	ax0.plot(xs,es,'r.')
+	ax0.grid()
+	ax1.plot(xs,ys,'r.',label='want')
+	ax1.plot(xs,gs,'b.',label='got')
+	ax1.grid()
+	ax1.legend()
+	plt.show()
+
+xs, gs, ys, es = parse(sys.stdin)
+plot(xs, gs, ys, es)
@@ -0,0 +1,37 @@
+// polynomial for approximating sin(x)
+//
+// Copyright (c) 2019, Arm Limited.
+// SPDX-License-Identifier: MIT
+
+deg = 7;   // polynomial degree
+a = -pi/4; // interval
+b = pi/4;
+
+// find even polynomial with minimal abs error compared to sin(x)/x
+
+// account for /x
+deg = deg-1;
+
+// f = sin(x)/x;
+f = 1;
+c = 1;
+for i from 1 to 60 do { c = 2*i*(2*i + 1)*c; f = f + (-1)^i*x^(2*i)/c; };
+
+// return p that minimizes |f(x) - poly(x) - x^d*p(x)|
+approx = proc(poly,d) {
+  return remez(f(x)-poly(x), deg-d, [a;b], x^d, 1e-10);
+};
+
+// first coeff is fixed, iteratively find optimal double prec coeffs
+poly = 1;
+for i from 1 to deg/2 do {
+  p = roundcoefficients(approx(poly,2*i), [|D ...|]);
+  poly = poly + x^(2*i)*coeff(p,0);
+};
+
+display = hexadecimal;
+print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
+print("abs error:", accurateinfnorm(sin(x)-x*poly(x), [a;b], 30));
+print("in [",a,b,"]");
+print("coeffs:");
+for i from 0 to deg do coeff(poly,i);
@@ -0,0 +1,30 @@
+// polynomial for approximating e^x
+//
+// Copyright (c) 2019, Arm Limited.
+// SPDX-License-Identifier: MIT
+
+deg = 4; // poly degree
+N = 128; // table entries
+b = log(2)/(2*N);  // interval
+a = -b;
+
+// find polynomial with minimal abs error
+
+// return p that minimizes |exp(x) - poly(x) - x^d*p(x)|
+approx = proc(poly,d) {
+  return remez(exp(x)-poly(x), deg-d, [a;b], x^d, 1e-10);
+};
+
+// first 2 coeffs are fixed, iteratively find optimal double prec coeffs
+poly = 1 + x;
+for i from 2 to deg do {
+  p = roundcoefficients(approx(poly,i), [|D ...|]);
+  poly = poly + x^i*coeff(p,0);
+};
+
+display = hexadecimal;
+print("rel error:", accurateinfnorm(1-poly(x)/exp(x), [a;b], 30));
+print("abs error:", accurateinfnorm(exp(x)-poly(x), [a;b], 30));
+print("in [",a,b,"]");
+print("coeffs:");
+for i from 0 to deg do coeff(poly,i);
@@ -0,0 +1,34 @@
+// polynomial used for __v_log(x)
+//
+// Copyright (c) 2019, Arm Limited.
+// SPDX-License-Identifier: MIT
+
+deg = 6; // poly degree
+a = -0x1.fc1p-9;
+b = 0x1.009p-8;
+
+// find log(1+x)/x polynomial with minimal relative error
+// (minimal relative error polynomial for log(1+x) is the same * x)
+deg = deg-1; // because of /x
+
+// f = log(1+x)/x; using taylor series
+f = 0;
+for i from 0 to 60 do { f = f + (-x)^i/(i+1); };
+
+// return p that minimizes |f(x) - poly(x) - x^d*p(x)|/|f(x)|
+approx = proc(poly,d) {
+  return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10);
+};
+
+// first coeff is fixed, iteratively find optimal double prec coeffs
+poly = 1;
+for i from 1 to deg do {
+  p = roundcoefficients(approx(poly,i), [|D ...|]);
+  poly = poly + x^i*coeff(p,0);
+};
+
+display = hexadecimal;
+print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
+print("in [",a,b,"]");
+print("coeffs:");
+for i from 0 to deg do coeff(poly,i);
@@ -0,0 +1,36 @@
+// polynomial for approximating sin(x)
+//
+// Copyright (c) 2019, Arm Limited.
+// SPDX-License-Identifier: MIT
+
+deg = 15;  // polynomial degree
+a = -pi/2; // interval
+b = pi/2;
+
+// find even polynomial with minimal abs error compared to sin(x)/x
+
+// account for /x
+deg = deg-1;
+
+// f = sin(x)/x;
+f = 1;
+c = 1;
+for i from 1 to 60 do { c = 2*i*(2*i + 1)*c; f = f + (-1)^i*x^(2*i)/c; };
+
+// return p that minimizes |f(x) - poly(x) - x^d*p(x)|
+approx = proc(poly,d) {
+  return remez(f(x)-poly(x), deg-d, [a;b], x^d, 1e-10);
+};
+
+// first coeff is fixed, iteratively find optimal double prec coeffs
+poly = 1;
+for i from 1 to deg/2 do {
+  p = roundcoefficients(approx(poly,2*i), [|D ...|]);
+  poly = poly + x^(2*i)*coeff(p,0);
+};
+
+display = hexadecimal;
+print("abs error:", accurateinfnorm(sin(x)-x*poly(x), [a;b], 30));
+print("in [",a,b,"]");
+print("coeffs:");
+for i from 0 to deg do coeff(poly,i);
@@ -0,0 +1,87 @@
+/*
+ * Double-precision vector cos function.
+ *
+ * Copyright (c) 2019, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "mathlib.h"
+#include "v_math.h"
+#if V_SUPPORTED
+
+static const double Poly[] = {
+/* worst-case error is 3.5 ulp.
+   abs error: 0x1.be222a58p-53 in [-pi/2, pi/2].  */
+-0x1.9f4a9c8b21dc9p-41,
+ 0x1.60e88a10163f2p-33,
+-0x1.ae6361b7254e7p-26,
+ 0x1.71de382e8d62bp-19,
+-0x1.a01a019aeb4ffp-13,
+ 0x1.111111110b25ep-7,
+-0x1.55555555554c3p-3,
+};
+
+#define C7 v_f64 (Poly[0])
+#define C6 v_f64 (Poly[1])
+#define C5 v_f64 (Poly[2])
+#define C4 v_f64 (Poly[3])
+#define C3 v_f64 (Poly[4])
+#define C2 v_f64 (Poly[5])
+#define C1 v_f64 (Poly[6])
+
+#define InvPi v_f64 (0x1.45f306dc9c883p-2)
+#define HalfPi v_f64 (0x1.921fb54442d18p+0)
+#define Pi1 v_f64 (0x1.921fb54442d18p+1)
+#define Pi2 v_f64 (0x1.1a62633145c06p-53)
+#define Pi3 v_f64 (0x1.c1cd129024e09p-106)
+#define Shift v_f64 (0x1.8p52)
+#define RangeVal v_f64 (0x1p23)
+#define AbsMask v_u64 (0x7fffffffffffffff)
+
+VPCS_ATTR
+__attribute__ ((noinline)) static v_f64_t
+specialcase (v_f64_t x, v_f64_t y, v_u64_t cmp)
+{
+  return v_call_f64 (cos, x, y, cmp);
+}
+
+VPCS_ATTR
+v_f64_t
+V_NAME(cos) (v_f64_t x)
+{
+  v_f64_t n, r, r2, y;
+  v_u64_t odd, cmp;
+
+  r = v_as_f64_u64 (v_as_u64_f64 (x) & AbsMask);
+  cmp = v_cond_u64 (v_as_u64_f64 (r) >= v_as_u64_f64 (RangeVal));
+
+  /* n = rint((|x|+pi/2)/pi) - 0.5.  */
+  n = v_fma_f64 (InvPi, r + HalfPi, Shift);
+  odd = v_as_u64_f64 (n) << 63;
+  n -= Shift;
+  n -= v_f64 (0.5);
+
+  /* r = |x| - n*pi  (range reduction into -pi/2 .. pi/2).  */
+  r = v_fma_f64 (-Pi1, n, r);
+  r = v_fma_f64 (-Pi2, n, r);
+  r = v_fma_f64 (-Pi3, n, r);
+
+  /* sin(r) poly approx.  */
+  r2 = r * r;
+  y = v_fma_f64 (C7, r2, C6);
+  y = v_fma_f64 (y, r2, C5);
+  y = v_fma_f64 (y, r2, C4);
+  y = v_fma_f64 (y, r2, C3);
+  y = v_fma_f64 (y, r2, C2);
+  y = v_fma_f64 (y, r2, C1);
+  y = v_fma_f64 (y * r2, r, r);
+
+  /* sign.  */
+  y = v_as_f64_u64 (v_as_u64_f64 (y) ^ odd);
+
+  if (unlikely (v_any_u64 (cmp)))
+    return specialcase (x, y, cmp);
+  return y;
+}
+VPCS_ALIAS
+#endif
@@ -0,0 +1,76 @@
+/*
+ * Single-precision vector cos function.
+ *
+ * Copyright (c) 2019, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "mathlib.h"
+#include "v_math.h"
+#if V_SUPPORTED
+
+static const float Poly[] = {
+  /* 1.886 ulp error */
+  0x1.5b2e76p-19f,
+  -0x1.9f42eap-13f,
+  0x1.110df4p-7f,
+  -0x1.555548p-3f,
+};
+#define Pi1 v_f32 (0x1.921fb6p+1f)
+#define Pi2 v_f32 (-0x1.777a5cp-24f)
+#define Pi3 v_f32 (-0x1.ee59dap-49f)
+#define A3 v_f32 (Poly[3])
+#define A5 v_f32 (Poly[2])
+#define A7 v_f32 (Poly[1])
+#define A9 v_f32 (Poly[0])
+#define RangeVal v_f32 (0x1p20f)
+#define InvPi v_f32 (0x1.45f306p-2f)
+#define Shift v_f32 (0x1.8p+23f)
+#define AbsMask v_u32 (0x7fffffff)
+#define HalfPi v_f32 (0x1.921fb6p0f)
+
+VPCS_ATTR
+static v_f32_t
+specialcase (v_f32_t x, v_f32_t y, v_u32_t cmp)
+{
+  /* Fall back to scalar code.  */
+  return v_call_f32 (cosf, x, y, cmp);
+}
+
+VPCS_ATTR
+v_f32_t
+V_NAME(cosf) (v_f32_t x)
+{
+  v_f32_t n, r, r2, y;
+  v_u32_t odd, cmp;
+
+  r = v_as_f32_u32 (v_as_u32_f32 (x) & AbsMask);
+  cmp = v_cond_u32 (v_as_u32_f32 (r) >= v_as_u32_f32 (RangeVal));
+
+  /* n = rint((|x|+pi/2)/pi) - 0.5 */
+  n = v_fma_f32 (InvPi, r + HalfPi, Shift);
+  odd = v_as_u32_f32 (n) << 31;
+  n -= Shift;
+  n -= v_f32 (0.5f);
+
+  /* r = |x| - n*pi  (range reduction into -pi/2 .. pi/2) */
+  r = v_fma_f32 (-Pi1, n, r);
+  r = v_fma_f32 (-Pi2, n, r);
+  r = v_fma_f32 (-Pi3, n, r);
+
+  /* y = sin(r) */
+  r2 = r * r;
+  y = v_fma_f32 (A9, r2, A7);
+  y = v_fma_f32 (y, r2, A5);
+  y = v_fma_f32 (y, r2, A3);
+  y = v_fma_f32 (y * r2, r, r);
+
+  /* sign fix */
+  y = v_as_f32_u32 (v_as_u32_f32 (y) ^ odd);
+
+  if (unlikely (v_any_u32 (cmp)))
+    return specialcase (x, y, cmp);
+  return y;
+}
+VPCS_ALIAS
+#endif
@@ -0,0 +1,94 @@
+/*
+ * Double-precision vector e^x function.
+ *
+ * Copyright (c) 2019, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "mathlib.h"
+#include "v_math.h"
+#if V_SUPPORTED
+#include "v_exp.h"
+
+#if V_EXP_TABLE_BITS == 7
+/* maxerr: 1.88 +0.5 ulp
+   rel error: 1.4337*2^-53
+   abs error: 1.4299*2^-53 in [ -ln2/256, ln2/256 ].  */
+#define C1 v_f64 (0x1.ffffffffffd43p-2)
+#define C2 v_f64 (0x1.55555c75adbb2p-3)
+#define C3 v_f64 (0x1.55555da646206p-5)
+#define InvLn2 v_f64 (0x1.71547652b82fep7) /* N/ln2.  */
+#define Ln2hi v_f64 (0x1.62e42fefa39efp-8) /* ln2/N.  */
+#define Ln2lo v_f64 (0x1.abc9e3b39803f3p-63)
+#elif V_EXP_TABLE_BITS == 8
+/* maxerr: 0.54 +0.5 ulp
+   rel error: 1.4318*2^-58
+   abs error: 1.4299*2^-58 in [ -ln2/512, ln2/512 ].  */
+#define C1 v_f64 (0x1.fffffffffffd4p-2)
+#define C2 v_f64 (0x1.5555571d6b68cp-3)
+#define C3 v_f64 (0x1.5555576a59599p-5)
+#define InvLn2 v_f64 (0x1.71547652b82fep8)
+#define Ln2hi v_f64 (0x1.62e42fefa39efp-9)
+#define Ln2lo v_f64 (0x1.abc9e3b39803f3p-64)
+#endif
+
+#define N (1 << V_EXP_TABLE_BITS)
+#define Tab __v_exp_data
+#define IndexMask v_u64 (N - 1)
+#define Shift v_f64 (0x1.8p+52)
+#define Thres v_f64 (704.0)
+
+VPCS_ATTR
+static v_f64_t
+specialcase (v_f64_t s, v_f64_t y, v_f64_t n)
+{
+  v_f64_t absn = v_abs_f64 (n);
+
+  /* 2^(n/N) may overflow, break it up into s1*s2.  */
+  v_u64_t b = v_cond_u64 (n <= v_f64 (0.0)) & v_u64 (0x6000000000000000);
+  v_f64_t s1 = v_as_f64_u64 (v_u64 (0x7000000000000000) - b);
+  v_f64_t s2 = v_as_f64_u64 (v_as_u64_f64 (s) - v_u64 (0x3010000000000000) + b);
+  v_u64_t cmp = v_cond_u64 (absn > v_f64 (1280.0 * N));
+  v_f64_t r1 = s1 * s1;
+  v_f64_t r0 = v_fma_f64 (y, s2, s2) * s1;
+  return v_as_f64_u64 ((cmp & v_as_u64_f64 (r1)) | (~cmp & v_as_u64_f64 (r0)));
+}
+
+VPCS_ATTR
+v_f64_t
+V_NAME(exp) (v_f64_t x)
+{
+  v_f64_t n, r, r2, s, y, z;
+  v_u64_t cmp, u, e, i;
+
+  cmp = v_cond_u64 (v_abs_f64 (x) > Thres);
+
+  /* n = round(x/(ln2/N)).  */
+  z = v_fma_f64 (x, InvLn2, Shift);
+  u = v_as_u64_f64 (z);
+  n = z - Shift;
+
+  /* r = x - n*ln2/N.  */
+  r = x;
+  r = v_fma_f64 (-Ln2hi, n, r);
+  r = v_fma_f64 (-Ln2lo, n, r);
+
+  e = u << (52 - V_EXP_TABLE_BITS);
+  i = u & IndexMask;
+
+  /* y = exp(r) - 1 ~= r + C1 r^2 + C2 r^3 + C3 r^4.  */
+  r2 = r * r;
+  y = v_fma_f64 (C2, r, C1);
+  y = v_fma_f64 (C3, r2, y);
+  y = v_fma_f64 (y, r2, r);
+
+  /* s = 2^(n/N).  */
+  u = v_lookup_u64 (Tab, i);
+  s = v_as_f64_u64 (u + e);
+
+  if (unlikely (v_any_u64 (cmp)))
+    return specialcase (s, y, n);
+  return v_fma_f64 (y, s, s);
+}
+VPCS_ALIAS
+#endif
@@ -0,0 +1,14 @@
+/*
+ * Declarations for double-precision e^x vector function.
+ *
+ * Copyright (c) 2019, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "v_math.h"
+#if WANT_VMATH
+
+#define V_EXP_TABLE_BITS 7
+
+extern const u64_t __v_exp_data[1 << V_EXP_TABLE_BITS] HIDDEN;
+#endif
@@ -0,0 +1,78 @@
+/*
+ * Single-precision vector 2^x function.
+ *
+ * Copyright (c) 2019, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "mathlib.h"
+#include "v_math.h"
+#if V_SUPPORTED
+
+static const float Poly[] = {
+  /* maxerr: 1.962 ulp.  */
+  0x1.59977ap-10f,
+  0x1.3ce9e4p-7f,
+  0x1.c6bd32p-5f,
+  0x1.ebf9bcp-3f,
+  0x1.62e422p-1f,
+};
+#define C0 v_f32 (Poly[0])
+#define C1 v_f32 (Poly[1])
+#define C2 v_f32 (Poly[2])
+#define C3 v_f32 (Poly[3])
+#define C4 v_f32 (Poly[4])
+
+#define Shift v_f32 (0x1.8p23f)
+
+VPCS_ATTR
+static v_f32_t
+specialcase (v_f32_t poly, v_f32_t n, v_u32_t e, v_f32_t absn, v_u32_t cmp1, v_f32_t scale)
+{
+  /* 2^n may overflow, break it up into s1*s2.  */
+  v_u32_t b = v_cond_u32 (n <= v_f32 (0.0f)) & v_u32 (0x82000000);
+  v_f32_t s1 = v_as_f32_u32 (v_u32 (0x7f000000) + b);
+  v_f32_t s2 = v_as_f32_u32 (e - b);
+  v_u32_t cmp2 = v_cond_u32 (absn > v_f32 (192.0f));
+  v_u32_t r2 = v_as_u32_f32 (s1 * s1);
+  v_u32_t r1 = v_as_u32_f32 (v_fma_f32 (poly, s2, s2) * s1);
+  /* Similar to r1 but avoids double rounding in the subnormal range.  */
+  v_u32_t r0 = v_as_u32_f32 (v_fma_f32 (poly, scale, scale));
+  return v_as_f32_u32 ((cmp2 & r2) | (~cmp2 & cmp1 & r1) | (~cmp1 & r0));
+}
+
+VPCS_ATTR
+v_f32_t
+V_NAME(exp2f) (v_f32_t x)
+{
+  v_f32_t n, r, r2, scale, p, q, poly, absn;
+  v_u32_t cmp, e;
+
+  /* exp2(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)]
+     x = n + r, with r in [-1/2, 1/2].  */
+#if 0
+  v_f32_t z;
+  z = x + Shift;
+  n = z - Shift;
+  r = x - n;
+  e = v_as_u32_f32 (z) << 23;
+#else
+  n = v_round_f32 (x);
+  r = x - n;
+  e = v_as_u32_s32 (v_round_s32 (x)) << 23;
+#endif
+  scale = v_as_f32_u32 (e + v_u32 (0x3f800000));
+  absn = v_abs_f32 (n);
+  cmp = v_cond_u32 (absn > v_f32 (126.0f));
+  r2 = r * r;
+  p = v_fma_f32 (C0, r, C1);
+  q = v_fma_f32 (C2, r, C3);
+  q = v_fma_f32 (p, r2, q);
+  p = C4 * r;
+  poly = v_fma_f32 (q, r2, p);
+  if (unlikely (v_any_u32 (cmp)))
+    return specialcase (poly, n, e, absn, cmp, scale);
+  return v_fma_f32 (poly, scale, scale);
+}
+VPCS_ALIAS
+#endif
--- a/Show More
+++ b/Show More