mirror of
https://github.com/FEX-Emu/vixl.git
synced 2024-11-23 14:40:17 +00:00
VIXL Release 1.9
Refer to the README.md and LICENCE files for details.
This commit is contained in:
parent
5289c5900f
commit
6e2c8275d5
57
README.md
57
README.md
@ -1,44 +1,24 @@
|
||||
VIXL: AArch64 Runtime Code Generation Library Version 1.8
|
||||
VIXL: AArch64 Runtime Code Generation Library Version 1.9
|
||||
=========================================================
|
||||
|
||||
Contents:
|
||||
|
||||
* Requirements
|
||||
* Overview
|
||||
* Requirements
|
||||
* Known limitations
|
||||
* Usage
|
||||
|
||||
|
||||
Requirements
|
||||
============
|
||||
|
||||
To build VIXL the following software is required:
|
||||
|
||||
1. Python 2.7
|
||||
2. SCons 2.0
|
||||
3. GCC 4.6+
|
||||
|
||||
A 64-bit host machine is required, implementing an LP64 data model. VIXL has
|
||||
only been tested using GCC on AArch64 Debian and amd64 Ubuntu systems.
|
||||
|
||||
To run the linter stage of the tests, the following software is also required:
|
||||
|
||||
1. Git
|
||||
2. [Google's `cpplint.py`][cpplint]
|
||||
|
||||
Refer to the 'Usage' section for details.
|
||||
|
||||
|
||||
Overview
|
||||
========
|
||||
|
||||
VIXL is made of three components.
|
||||
VIXL contains three components.
|
||||
|
||||
1. A programmatic assembler to generate A64 code at runtime. The assembler
|
||||
1. A programmatic **assembler** to generate A64 code at runtime. The assembler
|
||||
abstracts some of the constraints of the A64 ISA; for example, most
|
||||
instructions support any immediate.
|
||||
2. A disassembler which can print any instruction emitted by the assembler.
|
||||
3. A simulator which can simulate any instruction emitted by the assembler.
|
||||
2. A **disassembler** that can print any instruction emitted by the assembler.
|
||||
3. A **simulator** that can simulate any instruction emitted by the assembler.
|
||||
The simulator allows generated code to be run on another architecture
|
||||
without the need for a full ISA model.
|
||||
|
||||
@ -48,11 +28,32 @@ Changes from previous versions of VIXL can be found in the
|
||||
[Changelog](doc/changelog.md).
|
||||
|
||||
|
||||
Requirements
|
||||
============
|
||||
|
||||
To build VIXL the following software is required:
|
||||
|
||||
1. Python 2.7
|
||||
2. SCons 2.0
|
||||
3. GCC 4.8+ or Clang 3.4+
|
||||
|
||||
A 64-bit host machine is required, implementing an LP64 data model. VIXL has
|
||||
been tested using GCC on AArch64 Debian, GCC and Clang on amd64 Ubuntu
|
||||
systems.
|
||||
|
||||
To run the linter stage of the tests, the following software is also required:
|
||||
|
||||
1. Git
|
||||
2. [Google's `cpplint.py`][cpplint]
|
||||
|
||||
Refer to the 'Usage' section for details.
|
||||
|
||||
|
||||
Known Limitations
|
||||
=================
|
||||
|
||||
VIXL was developed to target JavaScript engines so a number of features from A64
|
||||
were deemed unnecessary:
|
||||
VIXL was developed for JavaScript engines so a number of features from A64 were
|
||||
deemed unnecessary:
|
||||
|
||||
* Limited rounding mode support for floating point.
|
||||
* Limited support for synchronisation instructions.
|
||||
|
63
SConstruct
63
SConstruct
@ -49,18 +49,19 @@ Some common build targets are:
|
||||
# Global configuration.
|
||||
PROJ_SRC_DIR = 'src'
|
||||
PROJ_SRC_FILES = '''
|
||||
src/a64/assembler-a64.cc
|
||||
src/a64/cpu-a64.cc
|
||||
src/a64/debugger-a64.cc
|
||||
src/a64/decoder-a64.cc
|
||||
src/a64/disasm-a64.cc
|
||||
src/a64/instructions-a64.cc
|
||||
src/a64/instrument-a64.cc
|
||||
src/a64/logic-a64.cc
|
||||
src/a64/macro-assembler-a64.cc
|
||||
src/a64/simulator-a64.cc
|
||||
src/code-buffer.cc
|
||||
src/utils.cc
|
||||
src/vixl/a64/assembler-a64.cc
|
||||
src/vixl/a64/cpu-a64.cc
|
||||
src/vixl/a64/debugger-a64.cc
|
||||
src/vixl/a64/decoder-a64.cc
|
||||
src/vixl/a64/disasm-a64.cc
|
||||
src/vixl/a64/instructions-a64.cc
|
||||
src/vixl/a64/instrument-a64.cc
|
||||
src/vixl/a64/logic-a64.cc
|
||||
src/vixl/a64/macro-assembler-a64.cc
|
||||
src/vixl/a64/simulator-a64.cc
|
||||
src/vixl/code-buffer.cc
|
||||
src/vixl/compiler-intrinsics.cc
|
||||
src/vixl/utils.cc
|
||||
'''.split()
|
||||
PROJ_EXAMPLES_DIR = 'examples'
|
||||
PROJ_EXAMPLES_SRC_FILES = '''
|
||||
@ -119,9 +120,7 @@ TARGET_SRC_FILES = {
|
||||
benchmarks/bench-branch-link-masm.cc
|
||||
'''.split()
|
||||
}
|
||||
RELEASE_OBJ_DIR = 'obj/release'
|
||||
DEBUG_OBJ_DIR = 'obj/debug'
|
||||
|
||||
OBJ_DIR = 'obj'
|
||||
|
||||
# Helper functions.
|
||||
def abort(message):
|
||||
@ -133,6 +132,10 @@ def list_target(obj_dir, src_files):
|
||||
return map(lambda x: os.path.join(obj_dir, x), src_files)
|
||||
|
||||
|
||||
def is_compiler(compiler):
|
||||
return env['CXX'].find(compiler) == 0
|
||||
|
||||
|
||||
def create_variant(obj_dir, targets_dir):
|
||||
VariantDir(os.path.join(obj_dir, PROJ_SRC_DIR), PROJ_SRC_DIR)
|
||||
for directory in targets_dir.itervalues():
|
||||
@ -146,10 +149,9 @@ args.Add(EnumVariable('mode', 'Build mode', 'release',
|
||||
sim_default = 'off' if platform.machine() == 'aarch64' else 'on'
|
||||
args.Add(EnumVariable('simulator', 'build for the simulator', sim_default,
|
||||
allowed_values = ['on', 'off']))
|
||||
args.Add('std', 'c++ standard')
|
||||
|
||||
# Configure the environment.
|
||||
create_variant(RELEASE_OBJ_DIR, TARGET_SRC_DIR)
|
||||
create_variant(DEBUG_OBJ_DIR, TARGET_SRC_DIR)
|
||||
env = Environment(variables=args)
|
||||
|
||||
# Commandline help.
|
||||
@ -175,18 +177,32 @@ if os.environ.get('LINKFLAGS'):
|
||||
env.Append(LINKFLAGS = os.environ.get('LINKFLAGS').split())
|
||||
|
||||
# Always look in 'src' for include files.
|
||||
# TODO: Restore the '-Wunreachable-code' flag. This flag breaks builds for clang
|
||||
# 3.4 with std=c++98. So we need to re-enable this conditionally when clang is at
|
||||
# version 3.5 or later.
|
||||
env.Append(CPPPATH = [PROJ_SRC_DIR])
|
||||
env.Append(CPPFLAGS = ['-Wall',
|
||||
'-Werror',
|
||||
'-fdiagnostics-show-option',
|
||||
'-Wextra',
|
||||
'-Wredundant-decls',
|
||||
'-pedantic',
|
||||
# Explicitly enable the write-strings warning. VIXL uses
|
||||
# const correctly when handling string constants.
|
||||
'-Wwrite-strings'])
|
||||
|
||||
build_suffix = ''
|
||||
std_path = 'default-std'
|
||||
|
||||
if 'std' in env:
|
||||
env.Append(CPPFLAGS = ['-std=' + env['std']])
|
||||
std_path = env['std']
|
||||
|
||||
if is_compiler('clang++'):
|
||||
# This warning only works for Clang, when compiling the code base as C++11
|
||||
# or newer. The compiler does not complain if the option is passed when
|
||||
# compiling earlier C++ standards.
|
||||
env.Append(CPPFLAGS = ['-Wimplicit-fallthrough'])
|
||||
|
||||
if env['simulator'] == 'on':
|
||||
env.Append(CPPFLAGS = ['-DUSE_SIMULATOR'])
|
||||
@ -196,11 +212,9 @@ if env['mode'] == 'debug':
|
||||
env.Append(CPPFLAGS = ['-g', '-DVIXL_DEBUG'])
|
||||
# Append the debug mode suffix to the executable name.
|
||||
build_suffix += '_g'
|
||||
build_dir = DEBUG_OBJ_DIR
|
||||
else:
|
||||
# Release mode.
|
||||
env.Append(CPPFLAGS = ['-O3'])
|
||||
build_dir = RELEASE_OBJ_DIR
|
||||
process = subprocess.Popen(env['CXX'] + ' --version | grep "gnu.*4\.8"',
|
||||
shell = True,
|
||||
stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
|
||||
@ -214,6 +228,9 @@ else:
|
||||
# GCC 4.8.
|
||||
env.Append(CPPFLAGS = ['-Wno-maybe-uninitialized'])
|
||||
|
||||
# Configure build directory
|
||||
build_dir = os.path.join(OBJ_DIR, env['mode'], env['CXX'], std_path, '')
|
||||
create_variant(build_dir, TARGET_SRC_DIR)
|
||||
|
||||
# The lists of available targets and target names.
|
||||
targets = []
|
||||
@ -226,7 +243,7 @@ def create_alias(name, target):
|
||||
|
||||
|
||||
# The vixl library.
|
||||
libvixl = env.Library('vixl' + build_suffix,
|
||||
libvixl = env.Library(build_dir + 'vixl' + build_suffix,
|
||||
list_target(build_dir, PROJ_SRC_FILES))
|
||||
create_alias('libvixl', libvixl)
|
||||
|
||||
@ -238,7 +255,7 @@ test_ex_vdir = os.path.join(build_dir, 'test_examples')
|
||||
VariantDir(test_ex_vdir, '.')
|
||||
test_ex_obj = env.Object(list_target(test_ex_vdir, PROJ_EXAMPLES_SRC_FILES),
|
||||
CPPFLAGS = env['CPPFLAGS'] + ['-DTEST_EXAMPLES'])
|
||||
test = env.Program('test-runner' + build_suffix,
|
||||
test = env.Program(build_dir + 'test-runner' + build_suffix,
|
||||
list_target(build_dir, TARGET_SRC_FILES['test']) +
|
||||
test_ex_obj + libvixl,
|
||||
CPPPATH = env['CPPPATH'] + [PROJ_EXAMPLES_DIR])
|
||||
@ -248,7 +265,7 @@ create_alias('test', test)
|
||||
benchmarks = ['bench-dataop', 'bench-branch', 'bench-branch-link',
|
||||
'bench-branch-masm', 'bench-branch-link-masm']
|
||||
for bench in benchmarks:
|
||||
prog = env.Program(bench + build_suffix,
|
||||
prog = env.Program(build_dir + bench + build_suffix,
|
||||
list_target(build_dir, TARGET_SRC_FILES[bench]) + libvixl)
|
||||
create_alias(bench, prog)
|
||||
# Alias to build all benchmarks.
|
||||
@ -258,7 +275,7 @@ create_alias('benchmarks', benchmarks)
|
||||
examples = []
|
||||
for example in PROJ_EXAMPLES_SRC_FILES:
|
||||
example_name = "example-" + os.path.splitext(os.path.basename(example))[0]
|
||||
prog = env.Program(example_name,
|
||||
prog = env.Program(build_dir + example_name,
|
||||
[os.path.join(build_dir, example)] + libvixl,
|
||||
CPPPATH = env['CPPPATH'] + [PROJ_EXAMPLES_DIR])
|
||||
create_alias(example_name, prog)
|
||||
|
@ -24,9 +24,9 @@
|
||||
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#include "a64/macro-assembler-a64.h"
|
||||
#include "a64/instructions-a64.h"
|
||||
#include "globals.h"
|
||||
#include "vixl/a64/macro-assembler-a64.h"
|
||||
#include "vixl/a64/instructions-a64.h"
|
||||
#include "vixl/globals.h"
|
||||
|
||||
using namespace vixl;
|
||||
|
||||
|
@ -24,9 +24,9 @@
|
||||
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#include "a64/macro-assembler-a64.h"
|
||||
#include "a64/instructions-a64.h"
|
||||
#include "globals.h"
|
||||
#include "vixl/a64/macro-assembler-a64.h"
|
||||
#include "vixl/a64/instructions-a64.h"
|
||||
#include "vixl/globals.h"
|
||||
|
||||
using namespace vixl;
|
||||
|
||||
|
@ -24,10 +24,10 @@
|
||||
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#include "globals.h"
|
||||
#include "vixl/globals.h"
|
||||
|
||||
#include "a64/macro-assembler-a64.h"
|
||||
#include "a64/instructions-a64.h"
|
||||
#include "vixl/a64/macro-assembler-a64.h"
|
||||
#include "vixl/a64/instructions-a64.h"
|
||||
|
||||
using namespace vixl;
|
||||
|
||||
|
@ -24,9 +24,9 @@
|
||||
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#include "a64/macro-assembler-a64.h"
|
||||
#include "a64/instructions-a64.h"
|
||||
#include "globals.h"
|
||||
#include "vixl/a64/macro-assembler-a64.h"
|
||||
#include "vixl/a64/instructions-a64.h"
|
||||
#include "vixl/globals.h"
|
||||
|
||||
using namespace vixl;
|
||||
|
||||
|
@ -24,9 +24,9 @@
|
||||
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#include "a64/macro-assembler-a64.h"
|
||||
#include "a64/instructions-a64.h"
|
||||
#include "globals.h"
|
||||
#include "vixl/a64/macro-assembler-a64.h"
|
||||
#include "vixl/a64/instructions-a64.h"
|
||||
#include "vixl/globals.h"
|
||||
|
||||
using namespace vixl;
|
||||
|
||||
|
@ -1,6 +1,13 @@
|
||||
VIXL Change Log
|
||||
===============
|
||||
|
||||
* 1.9
|
||||
+ Improved compatibility with Android build system.
|
||||
+ Improved compatibility with Clang toolchain.
|
||||
+ Added support for `umulh` instruction.
|
||||
+ Added support for `fcmpe` and `fccmpe` instructions.
|
||||
+ Other small bug fixes and improvements.
|
||||
|
||||
* 1.8
|
||||
+ Complete NEON instruction set support.
|
||||
+ Support long branches using veneers.
|
||||
|
@ -27,7 +27,7 @@
|
||||
#ifndef VIXL_EXAMPLES_CUSTOM_DISASSEMBLER_H_
|
||||
#define VIXL_EXAMPLES_CUSTOM_DISASSEMBLER_H_
|
||||
|
||||
#include "a64/disasm-a64.h"
|
||||
#include "vixl/a64/disasm-a64.h"
|
||||
|
||||
using namespace vixl;
|
||||
|
||||
|
@ -27,9 +27,9 @@
|
||||
#ifndef VIXL_EXAMPLE_EXAMPLES_H_
|
||||
# define VIXL_EXAMPLE_EXAMPLES_H_
|
||||
|
||||
#include "a64/simulator-a64.h"
|
||||
#include "a64/debugger-a64.h"
|
||||
#include "a64/macro-assembler-a64.h"
|
||||
#include "vixl/a64/simulator-a64.h"
|
||||
#include "vixl/a64/debugger-a64.h"
|
||||
#include "vixl/a64/macro-assembler-a64.h"
|
||||
|
||||
using namespace vixl;
|
||||
|
||||
|
@ -24,8 +24,8 @@
|
||||
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#include "a64/simulator-a64.h"
|
||||
#include "a64/macro-assembler-a64.h"
|
||||
#include "vixl/a64/simulator-a64.h"
|
||||
#include "vixl/a64/macro-assembler-a64.h"
|
||||
|
||||
#define BUF_SIZE (4096)
|
||||
#define __ masm->
|
||||
|
@ -117,7 +117,7 @@ int main(void) {
|
||||
float mat1[kLength], mat2[kLength], output[kLength];
|
||||
|
||||
// Initialise the output matrix to the zero matrix.
|
||||
memset(output, 0, sizeof(float)*kLength);
|
||||
memset(output, 0, sizeof(output[0]) * kLength);
|
||||
|
||||
// Fill the two input matrices with some 32 bit floating point values.
|
||||
// Array initialisation using curly brackets is also possible like so:
|
||||
|
@ -26,7 +26,7 @@
|
||||
|
||||
|
||||
#include <cmath>
|
||||
#include "a64/assembler-a64.h"
|
||||
#include "vixl/a64/assembler-a64.h"
|
||||
|
||||
namespace vixl {
|
||||
|
||||
@ -35,7 +35,7 @@ CPURegister CPURegList::PopLowestIndex() {
|
||||
if (IsEmpty()) {
|
||||
return NoCPUReg;
|
||||
}
|
||||
int index = CountTrailingZeros(list_, kRegListSizeInBits);
|
||||
int index = CountTrailingZeros(list_);
|
||||
VIXL_ASSERT((1 << index) & list_);
|
||||
Remove(index);
|
||||
return CPURegister(index, size_, type_);
|
||||
@ -47,7 +47,7 @@ CPURegister CPURegList::PopHighestIndex() {
|
||||
if (IsEmpty()) {
|
||||
return NoCPUReg;
|
||||
}
|
||||
int index = CountLeadingZeros(list_, kRegListSizeInBits);
|
||||
int index = CountLeadingZeros(list_);
|
||||
index = kRegListSizeInBits - 1 - index;
|
||||
VIXL_ASSERT((1 << index) & list_);
|
||||
Remove(index);
|
||||
@ -463,6 +463,12 @@ bool MemOperand::IsPostIndex() const {
|
||||
}
|
||||
|
||||
|
||||
void MemOperand::AddOffset(int64_t offset) {
|
||||
VIXL_ASSERT(IsImmediateOffset());
|
||||
offset_ += offset;
|
||||
}
|
||||
|
||||
|
||||
// Assembler
|
||||
Assembler::Assembler(byte* buffer, size_t capacity,
|
||||
PositionIndependentCodeOption pic)
|
||||
@ -1349,6 +1355,14 @@ void Assembler::smulh(const Register& xd,
|
||||
}
|
||||
|
||||
|
||||
void Assembler::umulh(const Register& xd,
|
||||
const Register& xn,
|
||||
const Register& xm) {
|
||||
VIXL_ASSERT(xd.Is64Bits() && xn.Is64Bits() && xm.Is64Bits());
|
||||
DataProcessing3Source(xd, xn, xm, xzr, UMULH_x);
|
||||
}
|
||||
|
||||
|
||||
void Assembler::udiv(const Register& rd,
|
||||
const Register& rn,
|
||||
const Register& rm) {
|
||||
@ -2628,33 +2642,78 @@ void Assembler::fnmul(const VRegister& vd,
|
||||
}
|
||||
|
||||
|
||||
void Assembler::fcmp(const VRegister& vn,
|
||||
const VRegister& vm) {
|
||||
void Assembler::FPCompareMacro(const VRegister& vn,
|
||||
double value,
|
||||
FPTrapFlags trap) {
|
||||
USE(value);
|
||||
// Although the fcmp{e} instructions can strictly only take an immediate
|
||||
// value of +0.0, we don't need to check for -0.0 because the sign of 0.0
|
||||
// doesn't affect the result of the comparison.
|
||||
VIXL_ASSERT(value == 0.0);
|
||||
VIXL_ASSERT(vn.Is1S() || vn.Is1D());
|
||||
Instr op = (trap == EnableTrap) ? FCMPE_zero : FCMP_zero;
|
||||
Emit(FPType(vn) | op | Rn(vn));
|
||||
}
|
||||
|
||||
|
||||
void Assembler::FPCompareMacro(const VRegister& vn,
|
||||
const VRegister& vm,
|
||||
FPTrapFlags trap) {
|
||||
VIXL_ASSERT(vn.Is1S() || vn.Is1D());
|
||||
VIXL_ASSERT(vn.IsSameSizeAndType(vm));
|
||||
Emit(FPType(vn) | FCMP | Rm(vm) | Rn(vn));
|
||||
Instr op = (trap == EnableTrap) ? FCMPE : FCMP;
|
||||
Emit(FPType(vn) | op | Rm(vm) | Rn(vn));
|
||||
}
|
||||
|
||||
|
||||
void Assembler::fcmp(const VRegister& vn,
|
||||
const VRegister& vm) {
|
||||
FPCompareMacro(vn, vm, DisableTrap);
|
||||
}
|
||||
|
||||
|
||||
void Assembler::fcmpe(const VRegister& vn,
|
||||
const VRegister& vm) {
|
||||
FPCompareMacro(vn, vm, EnableTrap);
|
||||
}
|
||||
|
||||
|
||||
void Assembler::fcmp(const VRegister& vn,
|
||||
double value) {
|
||||
USE(value);
|
||||
// Although the fcmp instruction can strictly only take an immediate value of
|
||||
// +0.0, we don't need to check for -0.0 because the sign of 0.0 doesn't
|
||||
// affect the result of the comparison.
|
||||
VIXL_ASSERT(value == 0.0);
|
||||
VIXL_ASSERT(vn.Is1S() || vn.Is1D());
|
||||
Emit(FPType(vn) | FCMP_zero | Rn(vn));
|
||||
FPCompareMacro(vn, value, DisableTrap);
|
||||
}
|
||||
|
||||
|
||||
void Assembler::fcmpe(const VRegister& vn,
|
||||
double value) {
|
||||
FPCompareMacro(vn, value, EnableTrap);
|
||||
}
|
||||
|
||||
|
||||
void Assembler::FPCCompareMacro(const VRegister& vn,
|
||||
const VRegister& vm,
|
||||
StatusFlags nzcv,
|
||||
Condition cond,
|
||||
FPTrapFlags trap) {
|
||||
VIXL_ASSERT(vn.Is1S() || vn.Is1D());
|
||||
VIXL_ASSERT(vn.IsSameSizeAndType(vm));
|
||||
Instr op = (trap == EnableTrap) ? FCCMPE : FCCMP;
|
||||
Emit(FPType(vn) | op | Rm(vm) | Cond(cond) | Rn(vn) | Nzcv(nzcv));
|
||||
}
|
||||
|
||||
void Assembler::fccmp(const VRegister& vn,
|
||||
const VRegister& vm,
|
||||
StatusFlags nzcv,
|
||||
Condition cond) {
|
||||
VIXL_ASSERT(vn.Is1S() || vn.Is1D());
|
||||
VIXL_ASSERT(vn.IsSameSizeAndType(vm));
|
||||
Emit(FPType(vn) | FCCMP | Rm(vm) | Cond(cond) | Rn(vn) | Nzcv(nzcv));
|
||||
FPCCompareMacro(vn, vm, nzcv, cond, DisableTrap);
|
||||
}
|
||||
|
||||
|
||||
void Assembler::fccmpe(const VRegister& vn,
|
||||
const VRegister& vm,
|
||||
StatusFlags nzcv,
|
||||
Condition cond) {
|
||||
FPCCompareMacro(vn, vm, nzcv, cond, EnableTrap);
|
||||
}
|
||||
|
||||
|
||||
@ -4948,6 +5007,7 @@ bool Assembler::IsImmFP64(double imm) {
|
||||
|
||||
|
||||
bool Assembler::IsImmLSPair(int64_t offset, unsigned access_size) {
|
||||
VIXL_ASSERT(access_size <= kQRegSizeInBytesLog2);
|
||||
bool offset_is_size_multiple =
|
||||
(((offset >> access_size) << access_size) == offset);
|
||||
return offset_is_size_multiple && is_int7(offset >> access_size);
|
||||
@ -4955,6 +5015,7 @@ bool Assembler::IsImmLSPair(int64_t offset, unsigned access_size) {
|
||||
|
||||
|
||||
bool Assembler::IsImmLSScaled(int64_t offset, unsigned access_size) {
|
||||
VIXL_ASSERT(access_size <= kQRegSizeInBytesLog2);
|
||||
bool offset_is_size_multiple =
|
||||
(((offset >> access_size) << access_size) == offset);
|
||||
return offset_is_size_multiple && is_uint12(offset >> access_size);
|
||||
@ -5319,10 +5380,8 @@ bool AreAliased(const CPURegister& reg1, const CPURegister& reg2,
|
||||
}
|
||||
}
|
||||
|
||||
int number_of_unique_regs =
|
||||
CountSetBits(unique_regs, sizeof(unique_regs) * 8);
|
||||
int number_of_unique_fpregs =
|
||||
CountSetBits(unique_fpregs, sizeof(unique_fpregs) * 8);
|
||||
int number_of_unique_regs = CountSetBits(unique_regs);
|
||||
int number_of_unique_fpregs = CountSetBits(unique_fpregs);
|
||||
|
||||
VIXL_ASSERT(number_of_valid_regs >= number_of_unique_regs);
|
||||
VIXL_ASSERT(number_of_valid_fpregs >= number_of_unique_fpregs);
|
@ -28,11 +28,11 @@
|
||||
#define VIXL_A64_ASSEMBLER_A64_H_
|
||||
|
||||
|
||||
#include "globals.h"
|
||||
#include "invalset.h"
|
||||
#include "utils.h"
|
||||
#include "code-buffer.h"
|
||||
#include "a64/instructions-a64.h"
|
||||
#include "vixl/globals.h"
|
||||
#include "vixl/invalset.h"
|
||||
#include "vixl/utils.h"
|
||||
#include "vixl/code-buffer.h"
|
||||
#include "vixl/a64/instructions-a64.h"
|
||||
|
||||
namespace vixl {
|
||||
|
||||
@ -55,6 +55,7 @@ class CPURegister {
|
||||
kInvalid = 0,
|
||||
kRegister,
|
||||
kVRegister,
|
||||
kFPRegister = kVRegister,
|
||||
kNoRegister
|
||||
};
|
||||
|
||||
@ -556,6 +557,10 @@ class CPURegList {
|
||||
const CPURegList& list_3,
|
||||
const CPURegList& list_4);
|
||||
|
||||
bool Overlaps(const CPURegList& other) const {
|
||||
return (type_ == other.type_) && ((list_ & other.list_) != 0);
|
||||
}
|
||||
|
||||
RegList list() const {
|
||||
VIXL_ASSERT(IsValid());
|
||||
return list_;
|
||||
@ -600,7 +605,7 @@ class CPURegList {
|
||||
|
||||
int Count() const {
|
||||
VIXL_ASSERT(IsValid());
|
||||
return CountSetBits(list_, kRegListSizeInBits);
|
||||
return CountSetBits(list_);
|
||||
}
|
||||
|
||||
unsigned RegisterSizeInBits() const {
|
||||
@ -630,7 +635,7 @@ class CPURegList {
|
||||
|
||||
// AAPCS64 callee-saved registers.
|
||||
extern const CPURegList kCalleeSaved;
|
||||
extern const CPURegList kCalleeSavedFP;
|
||||
extern const CPURegList kCalleeSavedV;
|
||||
|
||||
|
||||
// AAPCS64 caller-saved registers. Note that this includes lr.
|
||||
@ -710,17 +715,17 @@ class MemOperand {
|
||||
explicit MemOperand(Register base,
|
||||
int64_t offset = 0,
|
||||
AddrMode addrmode = Offset);
|
||||
explicit MemOperand(Register base,
|
||||
Register regoffset,
|
||||
Shift shift = LSL,
|
||||
unsigned shift_amount = 0);
|
||||
explicit MemOperand(Register base,
|
||||
Register regoffset,
|
||||
Extend extend,
|
||||
unsigned shift_amount = 0);
|
||||
explicit MemOperand(Register base,
|
||||
const Operand& offset,
|
||||
AddrMode addrmode = Offset);
|
||||
MemOperand(Register base,
|
||||
Register regoffset,
|
||||
Shift shift = LSL,
|
||||
unsigned shift_amount = 0);
|
||||
MemOperand(Register base,
|
||||
Register regoffset,
|
||||
Extend extend,
|
||||
unsigned shift_amount = 0);
|
||||
MemOperand(Register base,
|
||||
const Operand& offset,
|
||||
AddrMode addrmode = Offset);
|
||||
|
||||
const Register& base() const { return base_; }
|
||||
const Register& regoffset() const { return regoffset_; }
|
||||
@ -734,6 +739,8 @@ class MemOperand {
|
||||
bool IsPreIndex() const;
|
||||
bool IsPostIndex() const;
|
||||
|
||||
void AddOffset(int64_t offset);
|
||||
|
||||
private:
|
||||
Register base_;
|
||||
Register regoffset_;
|
||||
@ -1606,6 +1613,11 @@ class Assembler {
|
||||
umaddl(rd, rn, rm, xzr);
|
||||
}
|
||||
|
||||
// Unsigned multiply high: 64 x 64 -> 64-bit <127:64>.
|
||||
void umulh(const Register& xd,
|
||||
const Register& xn,
|
||||
const Register& xm);
|
||||
|
||||
// Signed long multiply and subtract: 64 - (32 x 32) -> 64-bit.
|
||||
void smsubl(const Register& rd,
|
||||
const Register& rn,
|
||||
@ -2022,18 +2034,44 @@ class Assembler {
|
||||
// FP round to integer, towards zero.
|
||||
void frintz(const VRegister& vd, const VRegister& vn);
|
||||
|
||||
void FPCompareMacro(const VRegister& vn,
|
||||
double value,
|
||||
FPTrapFlags trap);
|
||||
|
||||
void FPCompareMacro(const VRegister& vn,
|
||||
const VRegister& vm,
|
||||
FPTrapFlags trap);
|
||||
|
||||
// FP compare registers.
|
||||
void fcmp(const VRegister& vn, const VRegister& vm);
|
||||
|
||||
// FP compare immediate.
|
||||
void fcmp(const VRegister& vn, double value);
|
||||
|
||||
void FPCCompareMacro(const VRegister& vn,
|
||||
const VRegister& vm,
|
||||
StatusFlags nzcv,
|
||||
Condition cond,
|
||||
FPTrapFlags trap);
|
||||
|
||||
// FP conditional compare.
|
||||
void fccmp(const VRegister& vn,
|
||||
const VRegister& vm,
|
||||
StatusFlags nzcv,
|
||||
Condition cond);
|
||||
|
||||
// FP signaling compare registers.
|
||||
void fcmpe(const VRegister& vn, const VRegister& vm);
|
||||
|
||||
// FP signaling compare immediate.
|
||||
void fcmpe(const VRegister& vn, double value);
|
||||
|
||||
// FP conditional signaling compare.
|
||||
void fccmpe(const VRegister& vn,
|
||||
const VRegister& vm,
|
||||
StatusFlags nzcv,
|
||||
Condition cond);
|
||||
|
||||
// FP conditional select.
|
||||
void fcsel(const VRegister& vd,
|
||||
const VRegister& vn,
|
||||
@ -3949,8 +3987,8 @@ class Assembler {
|
||||
unsigned* n = NULL,
|
||||
unsigned* imm_s = NULL,
|
||||
unsigned* imm_r = NULL);
|
||||
static bool IsImmLSPair(int64_t offset, unsigned size);
|
||||
static bool IsImmLSScaled(int64_t offset, unsigned size);
|
||||
static bool IsImmLSPair(int64_t offset, unsigned access_size);
|
||||
static bool IsImmLSScaled(int64_t offset, unsigned access_size);
|
||||
static bool IsImmLSUnscaled(int64_t offset);
|
||||
static bool IsImmMovn(uint64_t imm, unsigned reg_size);
|
||||
static bool IsImmMovz(uint64_t imm, unsigned reg_size);
|
@ -225,6 +225,11 @@ inline Condition InvertCondition(Condition cond) {
|
||||
return static_cast<Condition>(cond ^ 1);
|
||||
}
|
||||
|
||||
enum FPTrapFlags {
|
||||
EnableTrap = 1,
|
||||
DisableTrap = 0
|
||||
};
|
||||
|
||||
enum FlagsUpdate {
|
||||
SetFlags = 1,
|
||||
LeaveFlags = 0
|
||||
@ -1092,8 +1097,10 @@ enum FPCompareOp {
|
||||
FCMP_zero = FCMP_s_zero,
|
||||
FCMPE_s = FPCompareFixed | 0x00000010,
|
||||
FCMPE_d = FPCompareFixed | FP64 | 0x00000010,
|
||||
FCMPE = FCMPE_s,
|
||||
FCMPE_s_zero = FPCompareFixed | 0x00000018,
|
||||
FCMPE_d_zero = FPCompareFixed | FP64 | 0x00000018
|
||||
FCMPE_d_zero = FPCompareFixed | FP64 | 0x00000018,
|
||||
FCMPE_zero = FCMPE_s_zero
|
||||
};
|
||||
|
||||
// Floating point conditional compare.
|
@ -24,8 +24,8 @@
|
||||
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#include "utils.h"
|
||||
#include "a64/cpu-a64.h"
|
||||
#include "vixl/utils.h"
|
||||
#include "vixl/a64/cpu-a64.h"
|
||||
|
||||
namespace vixl {
|
||||
|
@ -27,8 +27,8 @@
|
||||
#ifndef VIXL_CPU_A64_H
|
||||
#define VIXL_CPU_A64_H
|
||||
|
||||
#include "globals.h"
|
||||
#include "instructions-a64.h"
|
||||
#include "vixl/globals.h"
|
||||
#include "vixl/a64/instructions-a64.h"
|
||||
|
||||
namespace vixl {
|
||||
|
@ -26,7 +26,7 @@
|
||||
|
||||
#ifdef USE_SIMULATOR
|
||||
|
||||
#include "a64/debugger-a64.h"
|
||||
#include "vixl/a64/debugger-a64.h"
|
||||
|
||||
namespace vixl {
|
||||
|
||||
@ -645,7 +645,8 @@ void Debugger::VisitException(const Instruction* instr) {
|
||||
case BRK:
|
||||
DoBreakpoint(instr);
|
||||
return;
|
||||
case HLT: // Fall through.
|
||||
case HLT:
|
||||
VIXL_FALLTHROUGH();
|
||||
default: Simulator::VisitException(instr);
|
||||
}
|
||||
}
|
||||
@ -994,6 +995,7 @@ Token* FormatToken::Tokenize(const char* arg) {
|
||||
break;
|
||||
case 'i':
|
||||
if (length == 1) return new Format<uint32_t>("%08" PRIx32, 'i');
|
||||
VIXL_FALLTHROUGH();
|
||||
default: return NULL;
|
||||
}
|
||||
|
@ -32,10 +32,10 @@
|
||||
#include <errno.h>
|
||||
#include <vector>
|
||||
|
||||
#include "globals.h"
|
||||
#include "utils.h"
|
||||
#include "a64/constants-a64.h"
|
||||
#include "a64/simulator-a64.h"
|
||||
#include "vixl/globals.h"
|
||||
#include "vixl/utils.h"
|
||||
#include "vixl/a64/constants-a64.h"
|
||||
#include "vixl/a64/simulator-a64.h"
|
||||
|
||||
namespace vixl {
|
||||
|
@ -24,9 +24,9 @@
|
||||
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#include "globals.h"
|
||||
#include "utils.h"
|
||||
#include "a64/decoder-a64.h"
|
||||
#include "vixl/globals.h"
|
||||
#include "vixl/utils.h"
|
||||
#include "vixl/a64/decoder-a64.h"
|
||||
|
||||
namespace vixl {
|
||||
|
||||
@ -488,6 +488,7 @@ void Decoder::DecodeDataProcessing(const Instruction* instr) {
|
||||
case 6: {
|
||||
if (instr->Bit(29) == 0x1) {
|
||||
VisitUnallocated(instr);
|
||||
VIXL_FALLTHROUGH();
|
||||
} else {
|
||||
if (instr->Bit(30) == 0) {
|
||||
if ((instr->Bit(15) == 0x1) ||
|
@ -29,8 +29,8 @@
|
||||
|
||||
#include <list>
|
||||
|
||||
#include "globals.h"
|
||||
#include "a64/instructions-a64.h"
|
||||
#include "vixl/globals.h"
|
||||
#include "vixl/a64/instructions-a64.h"
|
||||
|
||||
|
||||
// List macro containing all visitors needed by the decoder class.
|
@ -25,7 +25,7 @@
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#include <cstdlib>
|
||||
#include "a64/disasm-a64.h"
|
||||
#include "vixl/a64/disasm-a64.h"
|
||||
|
||||
namespace vixl {
|
||||
|
||||
@ -890,9 +890,9 @@ void Disassembler::VisitLoadStoreUnscaledOffset(const Instruction* instr) {
|
||||
case LDUR_s: mnemonic = "ldur"; form = form_s; break;
|
||||
case LDUR_d: mnemonic = "ldur"; form = form_d; break;
|
||||
case LDUR_q: mnemonic = "ldur"; form = form_q; break;
|
||||
case LDURSB_x: form = form_x; // Fall through.
|
||||
case LDURSB_x: form = form_x; VIXL_FALLTHROUGH();
|
||||
case LDURSB_w: mnemonic = "ldursb"; break;
|
||||
case LDURSH_x: form = form_x; // Fall through.
|
||||
case LDURSH_x: form = form_x; VIXL_FALLTHROUGH();
|
||||
case LDURSH_w: mnemonic = "ldursh"; break;
|
||||
case LDURSW_x: mnemonic = "ldursw"; form = form_x; break;
|
||||
case PRFUM: mnemonic = "prfum"; form = form_prefetch; break;
|
||||
@ -1054,9 +1054,13 @@ void Disassembler::VisitFPCompare(const Instruction* instr) {
|
||||
|
||||
switch (instr->Mask(FPCompareMask)) {
|
||||
case FCMP_s_zero:
|
||||
case FCMP_d_zero: form = form_zero; // Fall through.
|
||||
case FCMP_d_zero: form = form_zero; VIXL_FALLTHROUGH();
|
||||
case FCMP_s:
|
||||
case FCMP_d: mnemonic = "fcmp"; break;
|
||||
case FCMPE_s_zero:
|
||||
case FCMPE_d_zero: form = form_zero; VIXL_FALLTHROUGH();
|
||||
case FCMPE_s:
|
||||
case FCMPE_d: mnemonic = "fcmpe"; break;
|
||||
default: form = "(FPCompare)";
|
||||
}
|
||||
Format(instr, mnemonic, form);
|
||||
@ -2884,8 +2888,8 @@ int Disassembler::SubstituteRegisterField(const Instruction* instr,
|
||||
field_len = 3;
|
||||
}
|
||||
|
||||
CPURegister::RegisterType reg_type;
|
||||
unsigned reg_size;
|
||||
CPURegister::RegisterType reg_type = CPURegister::kRegister;
|
||||
unsigned reg_size = kXRegSize;
|
||||
|
||||
if (reg_prefix == 'R') {
|
||||
reg_prefix = instr->SixtyFourBits() ? 'X' : 'W';
|
||||
@ -2913,8 +2917,6 @@ int Disassembler::SubstituteRegisterField(const Instruction* instr,
|
||||
return field_len;
|
||||
default:
|
||||
VIXL_UNREACHABLE();
|
||||
reg_type = CPURegister::kRegister;
|
||||
reg_size = kXRegSize;
|
||||
}
|
||||
|
||||
if ((reg_type == CPURegister::kRegister) &&
|
||||
@ -3087,6 +3089,7 @@ int Disassembler::SubstituteImmediateField(const Instruction* instr,
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
VIXL_FALLTHROUGH();
|
||||
}
|
||||
case 'L': { // IVLSLane[0123] - suffix indicates access size shift.
|
||||
AppendToOutput("%d", instr->NEONLSIndex(format[8] - '0'));
|
||||
@ -3236,7 +3239,8 @@ int Disassembler::SubstituteShiftField(const Instruction* instr,
|
||||
switch (format[1]) {
|
||||
case 'D': { // HDP.
|
||||
VIXL_ASSERT(instr->ShiftDP() != ROR);
|
||||
} // Fall through.
|
||||
VIXL_FALLTHROUGH();
|
||||
}
|
||||
case 'L': { // HLo.
|
||||
if (instr->ImmDPShift() != 0) {
|
||||
const char* shift_type[] = {"lsl", "lsr", "asr", "ror"};
|
@ -27,11 +27,11 @@
|
||||
#ifndef VIXL_A64_DISASM_A64_H
|
||||
#define VIXL_A64_DISASM_A64_H
|
||||
|
||||
#include "globals.h"
|
||||
#include "utils.h"
|
||||
#include "instructions-a64.h"
|
||||
#include "decoder-a64.h"
|
||||
#include "assembler-a64.h"
|
||||
#include "vixl/globals.h"
|
||||
#include "vixl/utils.h"
|
||||
#include "vixl/a64/instructions-a64.h"
|
||||
#include "vixl/a64/decoder-a64.h"
|
||||
#include "vixl/a64/assembler-a64.h"
|
||||
|
||||
namespace vixl {
|
||||
|
@ -24,8 +24,8 @@
|
||||
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#include "a64/instructions-a64.h"
|
||||
#include "a64/assembler-a64.h"
|
||||
#include "vixl/a64/instructions-a64.h"
|
||||
#include "vixl/a64/assembler-a64.h"
|
||||
|
||||
namespace vixl {
|
||||
|
@ -27,9 +27,9 @@
|
||||
#ifndef VIXL_A64_INSTRUCTIONS_A64_H_
|
||||
#define VIXL_A64_INSTRUCTIONS_A64_H_
|
||||
|
||||
#include "globals.h"
|
||||
#include "utils.h"
|
||||
#include "a64/constants-a64.h"
|
||||
#include "vixl/globals.h"
|
||||
#include "vixl/utils.h"
|
||||
#include "vixl/a64/constants-a64.h"
|
||||
|
||||
namespace vixl {
|
||||
// ISA constants. --------------------------------------------------------------
|
@ -24,7 +24,7 @@
|
||||
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#include "a64/instrument-a64.h"
|
||||
#include "vixl/a64/instrument-a64.h"
|
||||
|
||||
namespace vixl {
|
||||
|
||||
@ -421,22 +421,26 @@ void Instrument::InstrumentLoadStore(const Instruction* instr) {
|
||||
static Counter* store_fp_counter = GetCounter("Store FP");
|
||||
|
||||
switch (instr->Mask(LoadStoreMask)) {
|
||||
case STRB_w: // Fall through.
|
||||
case STRH_w: // Fall through.
|
||||
case STR_w: // Fall through.
|
||||
case STRB_w:
|
||||
case STRH_w:
|
||||
case STR_w:
|
||||
VIXL_FALLTHROUGH();
|
||||
case STR_x: store_int_counter->Increment(); break;
|
||||
case STR_s: // Fall through.
|
||||
case STR_s:
|
||||
VIXL_FALLTHROUGH();
|
||||
case STR_d: store_fp_counter->Increment(); break;
|
||||
case LDRB_w: // Fall through.
|
||||
case LDRH_w: // Fall through.
|
||||
case LDR_w: // Fall through.
|
||||
case LDR_x: // Fall through.
|
||||
case LDRSB_x: // Fall through.
|
||||
case LDRSH_x: // Fall through.
|
||||
case LDRSW_x: // Fall through.
|
||||
case LDRSB_w: // Fall through.
|
||||
case LDRB_w:
|
||||
case LDRH_w:
|
||||
case LDR_w:
|
||||
case LDR_x:
|
||||
case LDRSB_x:
|
||||
case LDRSH_x:
|
||||
case LDRSW_x:
|
||||
case LDRSB_w:
|
||||
VIXL_FALLTHROUGH();
|
||||
case LDRSH_w: load_int_counter->Increment(); break;
|
||||
case LDR_s: // Fall through.
|
||||
case LDR_s:
|
||||
VIXL_FALLTHROUGH();
|
||||
case LDR_d: load_fp_counter->Increment(); break;
|
||||
}
|
||||
}
|
@ -27,11 +27,11 @@
|
||||
#ifndef VIXL_A64_INSTRUMENT_A64_H_
|
||||
#define VIXL_A64_INSTRUMENT_A64_H_
|
||||
|
||||
#include "globals.h"
|
||||
#include "utils.h"
|
||||
#include "a64/decoder-a64.h"
|
||||
#include "a64/constants-a64.h"
|
||||
#include "a64/instrument-a64.h"
|
||||
#include "vixl/globals.h"
|
||||
#include "vixl/utils.h"
|
||||
#include "vixl/a64/decoder-a64.h"
|
||||
#include "vixl/a64/constants-a64.h"
|
||||
#include "vixl/a64/instrument-a64.h"
|
||||
|
||||
namespace vixl {
|
||||
|
@ -24,9 +24,365 @@
|
||||
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#include "a64/simulator-a64.h"
|
||||
#include <cmath>
|
||||
#include "vixl/a64/simulator-a64.h"
|
||||
|
||||
namespace vixl {
|
||||
|
||||
template<> double Simulator::FPDefaultNaN<double>() {
|
||||
return kFP64DefaultNaN;
|
||||
}
|
||||
|
||||
|
||||
template<> float Simulator::FPDefaultNaN<float>() {
|
||||
return kFP32DefaultNaN;
|
||||
}
|
||||
|
||||
// See FPRound for a description of this function.
|
||||
static inline double FPRoundToDouble(int64_t sign, int64_t exponent,
|
||||
uint64_t mantissa, FPRounding round_mode) {
|
||||
int64_t bits =
|
||||
FPRound<int64_t, kDoubleExponentBits, kDoubleMantissaBits>(sign,
|
||||
exponent,
|
||||
mantissa,
|
||||
round_mode);
|
||||
return rawbits_to_double(bits);
|
||||
}
|
||||
|
||||
|
||||
// See FPRound for a description of this function.
|
||||
static inline float FPRoundToFloat(int64_t sign, int64_t exponent,
|
||||
uint64_t mantissa, FPRounding round_mode) {
|
||||
int32_t bits =
|
||||
FPRound<int32_t, kFloatExponentBits, kFloatMantissaBits>(sign,
|
||||
exponent,
|
||||
mantissa,
|
||||
round_mode);
|
||||
return rawbits_to_float(bits);
|
||||
}
|
||||
|
||||
|
||||
// See FPRound for a description of this function.
|
||||
static inline float16 FPRoundToFloat16(int64_t sign,
|
||||
int64_t exponent,
|
||||
uint64_t mantissa,
|
||||
FPRounding round_mode) {
|
||||
return FPRound<float16, kFloat16ExponentBits, kFloat16MantissaBits>(
|
||||
sign, exponent, mantissa, round_mode);
|
||||
}
|
||||
|
||||
|
||||
double Simulator::FixedToDouble(int64_t src, int fbits, FPRounding round) {
|
||||
if (src >= 0) {
|
||||
return UFixedToDouble(src, fbits, round);
|
||||
} else {
|
||||
// This works for all negative values, including INT64_MIN.
|
||||
return -UFixedToDouble(-src, fbits, round);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
double Simulator::UFixedToDouble(uint64_t src, int fbits, FPRounding round) {
|
||||
// An input of 0 is a special case because the result is effectively
|
||||
// subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
|
||||
if (src == 0) {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
// Calculate the exponent. The highest significant bit will have the value
|
||||
// 2^exponent.
|
||||
const int highest_significant_bit = 63 - CountLeadingZeros(src);
|
||||
const int64_t exponent = highest_significant_bit - fbits;
|
||||
|
||||
return FPRoundToDouble(0, exponent, src, round);
|
||||
}
|
||||
|
||||
|
||||
float Simulator::FixedToFloat(int64_t src, int fbits, FPRounding round) {
|
||||
if (src >= 0) {
|
||||
return UFixedToFloat(src, fbits, round);
|
||||
} else {
|
||||
// This works for all negative values, including INT64_MIN.
|
||||
return -UFixedToFloat(-src, fbits, round);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
float Simulator::UFixedToFloat(uint64_t src, int fbits, FPRounding round) {
|
||||
// An input of 0 is a special case because the result is effectively
|
||||
// subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
|
||||
if (src == 0) {
|
||||
return 0.0f;
|
||||
}
|
||||
|
||||
// Calculate the exponent. The highest significant bit will have the value
|
||||
// 2^exponent.
|
||||
const int highest_significant_bit = 63 - CountLeadingZeros(src);
|
||||
const int32_t exponent = highest_significant_bit - fbits;
|
||||
|
||||
return FPRoundToFloat(0, exponent, src, round);
|
||||
}
|
||||
|
||||
|
||||
double Simulator::FPToDouble(float value) {
|
||||
switch (std::fpclassify(value)) {
|
||||
case FP_NAN: {
|
||||
if (IsSignallingNaN(value)) {
|
||||
FPProcessException();
|
||||
}
|
||||
if (DN()) return kFP64DefaultNaN;
|
||||
|
||||
// Convert NaNs as the processor would:
|
||||
// - The sign is propagated.
|
||||
// - The payload (mantissa) is transferred entirely, except that the top
|
||||
// bit is forced to '1', making the result a quiet NaN. The unused
|
||||
// (low-order) payload bits are set to 0.
|
||||
uint32_t raw = float_to_rawbits(value);
|
||||
|
||||
uint64_t sign = raw >> 31;
|
||||
uint64_t exponent = (1 << 11) - 1;
|
||||
uint64_t payload = unsigned_bitextract_64(21, 0, raw);
|
||||
payload <<= (52 - 23); // The unused low-order bits should be 0.
|
||||
payload |= (UINT64_C(1) << 51); // Force a quiet NaN.
|
||||
|
||||
return rawbits_to_double((sign << 63) | (exponent << 52) | payload);
|
||||
}
|
||||
|
||||
case FP_ZERO:
|
||||
case FP_NORMAL:
|
||||
case FP_SUBNORMAL:
|
||||
case FP_INFINITE: {
|
||||
// All other inputs are preserved in a standard cast, because every value
|
||||
// representable using an IEEE-754 float is also representable using an
|
||||
// IEEE-754 double.
|
||||
return static_cast<double>(value);
|
||||
}
|
||||
}
|
||||
|
||||
VIXL_UNREACHABLE();
|
||||
return static_cast<double>(value);
|
||||
}
|
||||
|
||||
|
||||
float Simulator::FPToFloat(float16 value) {
|
||||
uint32_t sign = value >> 15;
|
||||
uint32_t exponent = unsigned_bitextract_32(
|
||||
kFloat16MantissaBits + kFloat16ExponentBits - 1, kFloat16MantissaBits,
|
||||
value);
|
||||
uint32_t mantissa = unsigned_bitextract_32(
|
||||
kFloat16MantissaBits - 1, 0, value);
|
||||
|
||||
switch (float16classify(value)) {
|
||||
case FP_ZERO:
|
||||
return (sign == 0) ? 0.0f : -0.0f;
|
||||
|
||||
case FP_INFINITE:
|
||||
return (sign == 0) ? kFP32PositiveInfinity : kFP32NegativeInfinity;
|
||||
|
||||
case FP_SUBNORMAL: {
|
||||
// Calculate shift required to put mantissa into the most-significant bits
|
||||
// of the destination mantissa.
|
||||
int shift = CountLeadingZeros(mantissa << (32 - 10));
|
||||
|
||||
// Shift mantissa and discard implicit '1'.
|
||||
mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits) + shift + 1;
|
||||
mantissa &= (1 << kFloatMantissaBits) - 1;
|
||||
|
||||
// Adjust the exponent for the shift applied, and rebias.
|
||||
exponent = exponent - shift + (-15 + 127);
|
||||
break;
|
||||
}
|
||||
|
||||
case FP_NAN:
|
||||
if (IsSignallingNaN(value)) {
|
||||
FPProcessException();
|
||||
}
|
||||
if (DN()) return kFP32DefaultNaN;
|
||||
|
||||
// Convert NaNs as the processor would:
|
||||
// - The sign is propagated.
|
||||
// - The payload (mantissa) is transferred entirely, except that the top
|
||||
// bit is forced to '1', making the result a quiet NaN. The unused
|
||||
// (low-order) payload bits are set to 0.
|
||||
exponent = (1 << kFloatExponentBits) - 1;
|
||||
|
||||
// Increase bits in mantissa, making low-order bits 0.
|
||||
mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
|
||||
mantissa |= 1 << 22; // Force a quiet NaN.
|
||||
break;
|
||||
|
||||
case FP_NORMAL:
|
||||
// Increase bits in mantissa, making low-order bits 0.
|
||||
mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
|
||||
|
||||
// Change exponent bias.
|
||||
exponent += (-15 + 127);
|
||||
break;
|
||||
|
||||
default: VIXL_UNREACHABLE();
|
||||
}
|
||||
return rawbits_to_float((sign << 31) |
|
||||
(exponent << kFloatMantissaBits) |
|
||||
mantissa);
|
||||
}
|
||||
|
||||
|
||||
float16 Simulator::FPToFloat16(float value, FPRounding round_mode) {
|
||||
// Only the FPTieEven rounding mode is implemented.
|
||||
VIXL_ASSERT(round_mode == FPTieEven);
|
||||
USE(round_mode);
|
||||
|
||||
uint32_t raw = float_to_rawbits(value);
|
||||
int32_t sign = raw >> 31;
|
||||
int32_t exponent = unsigned_bitextract_32(30, 23, raw) - 127;
|
||||
uint32_t mantissa = unsigned_bitextract_32(22, 0, raw);
|
||||
|
||||
switch (std::fpclassify(value)) {
|
||||
case FP_NAN: {
|
||||
if (IsSignallingNaN(value)) {
|
||||
FPProcessException();
|
||||
}
|
||||
if (DN()) return kFP16DefaultNaN;
|
||||
|
||||
// Convert NaNs as the processor would:
|
||||
// - The sign is propagated.
|
||||
// - The payload (mantissa) is transferred as much as possible, except
|
||||
// that the top bit is forced to '1', making the result a quiet NaN.
|
||||
float16 result = (sign == 0) ? kFP16PositiveInfinity
|
||||
: kFP16NegativeInfinity;
|
||||
result |= mantissa >> (kFloatMantissaBits - kFloat16MantissaBits);
|
||||
result |= (1 << 9); // Force a quiet NaN;
|
||||
return result;
|
||||
}
|
||||
|
||||
case FP_ZERO:
|
||||
return (sign == 0) ? 0 : 0x8000;
|
||||
|
||||
case FP_INFINITE:
|
||||
return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
|
||||
|
||||
case FP_NORMAL:
|
||||
case FP_SUBNORMAL: {
|
||||
// Convert float-to-half as the processor would, assuming that FPCR.FZ
|
||||
// (flush-to-zero) is not set.
|
||||
|
||||
// Add the implicit '1' bit to the mantissa.
|
||||
mantissa += (1 << 23);
|
||||
return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
|
||||
}
|
||||
}
|
||||
|
||||
VIXL_UNREACHABLE();
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
float16 Simulator::FPToFloat16(double value, FPRounding round_mode) {
|
||||
// Only the FPTieEven rounding mode is implemented.
|
||||
VIXL_ASSERT(round_mode == FPTieEven);
|
||||
USE(round_mode);
|
||||
|
||||
uint64_t raw = double_to_rawbits(value);
|
||||
int32_t sign = raw >> 63;
|
||||
int64_t exponent = unsigned_bitextract_64(62, 52, raw) - 1023;
|
||||
uint64_t mantissa = unsigned_bitextract_64(51, 0, raw);
|
||||
|
||||
switch (std::fpclassify(value)) {
|
||||
case FP_NAN: {
|
||||
if (IsSignallingNaN(value)) {
|
||||
FPProcessException();
|
||||
}
|
||||
if (DN()) return kFP16DefaultNaN;
|
||||
|
||||
// Convert NaNs as the processor would:
|
||||
// - The sign is propagated.
|
||||
// - The payload (mantissa) is transferred as much as possible, except
|
||||
// that the top bit is forced to '1', making the result a quiet NaN.
|
||||
float16 result = (sign == 0) ? kFP16PositiveInfinity
|
||||
: kFP16NegativeInfinity;
|
||||
result |= mantissa >> (kDoubleMantissaBits - kFloat16MantissaBits);
|
||||
result |= (1 << 9); // Force a quiet NaN;
|
||||
return result;
|
||||
}
|
||||
|
||||
case FP_ZERO:
|
||||
return (sign == 0) ? 0 : 0x8000;
|
||||
|
||||
case FP_INFINITE:
|
||||
return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
|
||||
|
||||
case FP_NORMAL:
|
||||
case FP_SUBNORMAL: {
|
||||
// Convert double-to-half as the processor would, assuming that FPCR.FZ
|
||||
// (flush-to-zero) is not set.
|
||||
|
||||
// Add the implicit '1' bit to the mantissa.
|
||||
mantissa += (UINT64_C(1) << 52);
|
||||
return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
|
||||
}
|
||||
}
|
||||
|
||||
VIXL_UNREACHABLE();
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
float Simulator::FPToFloat(double value, FPRounding round_mode) {
|
||||
// Only the FPTieEven rounding mode is implemented.
|
||||
VIXL_ASSERT((round_mode == FPTieEven) || (round_mode == FPRoundOdd));
|
||||
USE(round_mode);
|
||||
|
||||
switch (std::fpclassify(value)) {
|
||||
case FP_NAN: {
|
||||
if (IsSignallingNaN(value)) {
|
||||
FPProcessException();
|
||||
}
|
||||
if (DN()) return kFP32DefaultNaN;
|
||||
|
||||
// Convert NaNs as the processor would:
|
||||
// - The sign is propagated.
|
||||
// - The payload (mantissa) is transferred as much as possible, except
|
||||
// that the top bit is forced to '1', making the result a quiet NaN.
|
||||
uint64_t raw = double_to_rawbits(value);
|
||||
|
||||
uint32_t sign = raw >> 63;
|
||||
uint32_t exponent = (1 << 8) - 1;
|
||||
uint32_t payload = unsigned_bitextract_64(50, 52 - 23, raw);
|
||||
payload |= (1 << 22); // Force a quiet NaN.
|
||||
|
||||
return rawbits_to_float((sign << 31) | (exponent << 23) | payload);
|
||||
}
|
||||
|
||||
case FP_ZERO:
|
||||
case FP_INFINITE: {
|
||||
// In a C++ cast, any value representable in the target type will be
|
||||
// unchanged. This is always the case for +/-0.0 and infinities.
|
||||
return static_cast<float>(value);
|
||||
}
|
||||
|
||||
case FP_NORMAL:
|
||||
case FP_SUBNORMAL: {
|
||||
// Convert double-to-float as the processor would, assuming that FPCR.FZ
|
||||
// (flush-to-zero) is not set.
|
||||
uint64_t raw = double_to_rawbits(value);
|
||||
// Extract the IEEE-754 double components.
|
||||
uint32_t sign = raw >> 63;
|
||||
// Extract the exponent and remove the IEEE-754 encoding bias.
|
||||
int32_t exponent = unsigned_bitextract_64(62, 52, raw) - 1023;
|
||||
// Extract the mantissa and add the implicit '1' bit.
|
||||
uint64_t mantissa = unsigned_bitextract_64(51, 0, raw);
|
||||
if (std::fpclassify(value) == FP_NORMAL) {
|
||||
mantissa |= (UINT64_C(1) << 52);
|
||||
}
|
||||
return FPRoundToFloat(sign, exponent, mantissa, round_mode);
|
||||
}
|
||||
}
|
||||
|
||||
VIXL_UNREACHABLE();
|
||||
return value;
|
||||
}
|
||||
|
||||
|
||||
void Simulator::ld1(VectorFormat vform,
|
||||
LogicVRegister dst,
|
||||
uint64_t addr) {
|
||||
@ -1524,7 +1880,7 @@ LogicVRegister Simulator::sshl(VectorFormat vform,
|
||||
int64_t lj_src_val = src1.IntLeftJustified(vform, i);
|
||||
|
||||
// Set signed saturation state.
|
||||
if ((shift_val > CountLeadingSignBits(lj_src_val, 64)) &&
|
||||
if ((shift_val > CountLeadingSignBits(lj_src_val)) &&
|
||||
(lj_src_val != 0)) {
|
||||
dst.SetSignedSat(i, lj_src_val >= 0);
|
||||
}
|
||||
@ -1532,7 +1888,7 @@ LogicVRegister Simulator::sshl(VectorFormat vform,
|
||||
// Set unsigned saturation state.
|
||||
if (lj_src_val < 0) {
|
||||
dst.SetUnsignedSat(i, false);
|
||||
} else if ((shift_val > CountLeadingZeros(lj_src_val, 64)) &&
|
||||
} else if ((shift_val > CountLeadingZeros(lj_src_val)) &&
|
||||
(lj_src_val != 0)) {
|
||||
dst.SetUnsignedSat(i, true);
|
||||
}
|
||||
@ -1570,7 +1926,7 @@ LogicVRegister Simulator::ushl(VectorFormat vform,
|
||||
uint64_t lj_src_val = src1.UintLeftJustified(vform, i);
|
||||
|
||||
// Set saturation state.
|
||||
if ((shift_val > CountLeadingZeros(lj_src_val, 64)) && (lj_src_val != 0)) {
|
||||
if ((shift_val > CountLeadingZeros(lj_src_val)) && (lj_src_val != 0)) {
|
||||
dst.SetUnsignedSat(i, true);
|
||||
}
|
||||
|
||||
@ -3153,9 +3509,9 @@ LogicVRegister Simulator::uzp2(VectorFormat vform,
|
||||
template <typename T>
|
||||
T Simulator::FPAdd(T op1, T op2) {
|
||||
T result = FPProcessNaNs(op1, op2);
|
||||
if (isnan(result)) return result;
|
||||
if (std::isnan(result)) return result;
|
||||
|
||||
if (isinf(op1) && isinf(op2) && (op1 != op2)) {
|
||||
if (std::isinf(op1) && std::isinf(op2) && (op1 != op2)) {
|
||||
// inf + -inf returns the default NaN.
|
||||
FPProcessException();
|
||||
return FPDefaultNaN<T>();
|
||||
@ -3169,9 +3525,9 @@ T Simulator::FPAdd(T op1, T op2) {
|
||||
template <typename T>
|
||||
T Simulator::FPSub(T op1, T op2) {
|
||||
// NaNs should be handled elsewhere.
|
||||
VIXL_ASSERT(!isnan(op1) && !isnan(op2));
|
||||
VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2));
|
||||
|
||||
if (isinf(op1) && isinf(op2) && (op1 == op2)) {
|
||||
if (std::isinf(op1) && std::isinf(op2) && (op1 == op2)) {
|
||||
// inf - inf returns the default NaN.
|
||||
FPProcessException();
|
||||
return FPDefaultNaN<T>();
|
||||
@ -3185,9 +3541,9 @@ T Simulator::FPSub(T op1, T op2) {
|
||||
template <typename T>
|
||||
T Simulator::FPMul(T op1, T op2) {
|
||||
// NaNs should be handled elsewhere.
|
||||
VIXL_ASSERT(!isnan(op1) && !isnan(op2));
|
||||
VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2));
|
||||
|
||||
if ((isinf(op1) && (op2 == 0.0)) || (isinf(op2) && (op1 == 0.0))) {
|
||||
if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) {
|
||||
// inf * 0.0 returns the default NaN.
|
||||
FPProcessException();
|
||||
return FPDefaultNaN<T>();
|
||||
@ -3200,7 +3556,7 @@ T Simulator::FPMul(T op1, T op2) {
|
||||
|
||||
template<typename T>
|
||||
T Simulator::FPMulx(T op1, T op2) {
|
||||
if ((isinf(op1) && (op2 == 0.0)) || (isinf(op2) && (op1 == 0.0))) {
|
||||
if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) {
|
||||
// inf * 0.0 returns +/-2.0.
|
||||
T two = 2.0;
|
||||
return copysign(1.0, op1) * copysign(1.0, op2) * two;
|
||||
@ -3215,13 +3571,13 @@ T Simulator::FPMulAdd(T a, T op1, T op2) {
|
||||
|
||||
T sign_a = copysign(1.0, a);
|
||||
T sign_prod = copysign(1.0, op1) * copysign(1.0, op2);
|
||||
bool isinf_prod = isinf(op1) || isinf(op2);
|
||||
bool isinf_prod = std::isinf(op1) || std::isinf(op2);
|
||||
bool operation_generates_nan =
|
||||
(isinf(op1) && (op2 == 0.0)) || // inf * 0.0
|
||||
(isinf(op2) && (op1 == 0.0)) || // 0.0 * inf
|
||||
(isinf(a) && isinf_prod && (sign_a != sign_prod)); // inf - inf
|
||||
(std::isinf(op1) && (op2 == 0.0)) || // inf * 0.0
|
||||
(std::isinf(op2) && (op1 == 0.0)) || // 0.0 * inf
|
||||
(std::isinf(a) && isinf_prod && (sign_a != sign_prod)); // inf - inf
|
||||
|
||||
if (isnan(result)) {
|
||||
if (std::isnan(result)) {
|
||||
// Generated NaNs override quiet NaNs propagated from a.
|
||||
if (operation_generates_nan && IsQuietNaN(a)) {
|
||||
FPProcessException();
|
||||
@ -3244,7 +3600,7 @@ T Simulator::FPMulAdd(T a, T op1, T op2) {
|
||||
}
|
||||
|
||||
result = FusedMultiplyAdd(op1, op2, a);
|
||||
VIXL_ASSERT(!isnan(result));
|
||||
VIXL_ASSERT(!std::isnan(result));
|
||||
|
||||
// Work around broken fma implementations for rounded zero results: If a is
|
||||
// 0.0, the sign of the result is the sign of op1 * op2 before rounding.
|
||||
@ -3259,9 +3615,9 @@ T Simulator::FPMulAdd(T a, T op1, T op2) {
|
||||
template <typename T>
|
||||
T Simulator::FPDiv(T op1, T op2) {
|
||||
// NaNs should be handled elsewhere.
|
||||
VIXL_ASSERT(!isnan(op1) && !isnan(op2));
|
||||
VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2));
|
||||
|
||||
if ((isinf(op1) && isinf(op2)) || ((op1 == 0.0) && (op2 == 0.0))) {
|
||||
if ((std::isinf(op1) && std::isinf(op2)) || ((op1 == 0.0) && (op2 == 0.0))) {
|
||||
// inf / inf and 0.0 / 0.0 return the default NaN.
|
||||
FPProcessException();
|
||||
return FPDefaultNaN<T>();
|
||||
@ -3276,7 +3632,7 @@ T Simulator::FPDiv(T op1, T op2) {
|
||||
|
||||
template <typename T>
|
||||
T Simulator::FPSqrt(T op) {
|
||||
if (isnan(op)) {
|
||||
if (std::isnan(op)) {
|
||||
return FPProcessNaN(op);
|
||||
} else if (op < 0.0) {
|
||||
FPProcessException();
|
||||
@ -3290,7 +3646,7 @@ T Simulator::FPSqrt(T op) {
|
||||
template <typename T>
|
||||
T Simulator::FPMax(T a, T b) {
|
||||
T result = FPProcessNaNs(a, b);
|
||||
if (isnan(result)) return result;
|
||||
if (std::isnan(result)) return result;
|
||||
|
||||
if ((a == 0.0) && (b == 0.0) &&
|
||||
(copysign(1.0, a) != copysign(1.0, b))) {
|
||||
@ -3311,14 +3667,14 @@ T Simulator::FPMaxNM(T a, T b) {
|
||||
}
|
||||
|
||||
T result = FPProcessNaNs(a, b);
|
||||
return isnan(result) ? result : FPMax(a, b);
|
||||
return std::isnan(result) ? result : FPMax(a, b);
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
T Simulator::FPMin(T a, T b) {
|
||||
T result = FPProcessNaNs(a, b);
|
||||
if (isnan(result)) return result;
|
||||
if (std::isnan(result)) return result;
|
||||
|
||||
if ((a == 0.0) && (b == 0.0) &&
|
||||
(copysign(1.0, a) != copysign(1.0, b))) {
|
||||
@ -3339,16 +3695,17 @@ T Simulator::FPMinNM(T a, T b) {
|
||||
}
|
||||
|
||||
T result = FPProcessNaNs(a, b);
|
||||
return isnan(result) ? result : FPMin(a, b);
|
||||
return std::isnan(result) ? result : FPMin(a, b);
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
T Simulator::FPRecipStepFused(T op1, T op2) {
|
||||
const T two = 2.0;
|
||||
if ((isinf(op1) && (op2 == 0.0)) || ((op1 == 0.0) && (isinf(op2)))) {
|
||||
if ((std::isinf(op1) && (op2 == 0.0))
|
||||
|| ((op1 == 0.0) && (std::isinf(op2)))) {
|
||||
return two;
|
||||
} else if (isinf(op1) || isinf(op2)) {
|
||||
} else if (std::isinf(op1) || std::isinf(op2)) {
|
||||
// Return +inf if signs match, otherwise -inf.
|
||||
return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
|
||||
: kFP64NegativeInfinity;
|
||||
@ -3363,9 +3720,10 @@ T Simulator::FPRSqrtStepFused(T op1, T op2) {
|
||||
const T one_point_five = 1.5;
|
||||
const T two = 2.0;
|
||||
|
||||
if ((isinf(op1) && (op2 == 0.0)) || ((op1 == 0.0) && (isinf(op2)))) {
|
||||
if ((std::isinf(op1) && (op2 == 0.0))
|
||||
|| ((op1 == 0.0) && (std::isinf(op2)))) {
|
||||
return one_point_five;
|
||||
} else if (isinf(op1) || isinf(op2)) {
|
||||
} else if (std::isinf(op1) || std::isinf(op2)) {
|
||||
// Return +inf if signs match, otherwise -inf.
|
||||
return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
|
||||
: kFP64NegativeInfinity;
|
||||
@ -3373,9 +3731,9 @@ T Simulator::FPRSqrtStepFused(T op1, T op2) {
|
||||
// The multiply-add-halve operation must be fully fused, so avoid interim
|
||||
// rounding by checking which operand can be losslessly divided by two
|
||||
// before doing the multiply-add.
|
||||
if (isnormal(op1 / two)) {
|
||||
if (std::isnormal(op1 / two)) {
|
||||
return FusedMultiplyAdd(op1 / two, op2, one_point_five);
|
||||
} else if (isnormal(op2 / two)) {
|
||||
} else if (std::isnormal(op2 / two)) {
|
||||
return FusedMultiplyAdd(op1, op2 / two, one_point_five);
|
||||
} else {
|
||||
// Neither operand is normal after halving: the result is dominated by
|
||||
@ -3390,11 +3748,11 @@ double Simulator::FPRoundInt(double value, FPRounding round_mode) {
|
||||
if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
|
||||
(value == kFP64NegativeInfinity)) {
|
||||
return value;
|
||||
} else if (isnan(value)) {
|
||||
} else if (std::isnan(value)) {
|
||||
return FPProcessNaN(value);
|
||||
}
|
||||
|
||||
double int_result = floor(value);
|
||||
double int_result = std::floor(value);
|
||||
double error = value - int_result;
|
||||
switch (round_mode) {
|
||||
case FPTieAway: {
|
||||
@ -3419,7 +3777,7 @@ double Simulator::FPRoundInt(double value, FPRounding round_mode) {
|
||||
// If the error is greater than 0.5, or is equal to 0.5 and the integer
|
||||
// result is odd, round up.
|
||||
} else if ((error > 0.5) ||
|
||||
((error == 0.5) && (fmod(int_result, 2) != 0))) {
|
||||
((error == 0.5) && (std::fmod(int_result, 2) != 0))) {
|
||||
int_result++;
|
||||
}
|
||||
break;
|
||||
@ -3461,7 +3819,7 @@ int32_t Simulator::FPToInt32(double value, FPRounding rmode) {
|
||||
} else if (value < kWMinInt) {
|
||||
return kWMinInt;
|
||||
}
|
||||
return isnan(value) ? 0 : static_cast<int32_t>(value);
|
||||
return std::isnan(value) ? 0 : static_cast<int32_t>(value);
|
||||
}
|
||||
|
||||
|
||||
@ -3472,7 +3830,7 @@ int64_t Simulator::FPToInt64(double value, FPRounding rmode) {
|
||||
} else if (value < kXMinInt) {
|
||||
return kXMinInt;
|
||||
}
|
||||
return isnan(value) ? 0 : static_cast<int64_t>(value);
|
||||
return std::isnan(value) ? 0 : static_cast<int64_t>(value);
|
||||
}
|
||||
|
||||
|
||||
@ -3483,7 +3841,7 @@ uint32_t Simulator::FPToUInt32(double value, FPRounding rmode) {
|
||||
} else if (value < 0.0) {
|
||||
return 0;
|
||||
}
|
||||
return isnan(value) ? 0 : static_cast<uint32_t>(value);
|
||||
return std::isnan(value) ? 0 : static_cast<uint32_t>(value);
|
||||
}
|
||||
|
||||
|
||||
@ -3494,7 +3852,7 @@ uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) {
|
||||
} else if (value < 0.0) {
|
||||
return 0;
|
||||
}
|
||||
return isnan(value) ? 0 : static_cast<uint64_t>(value);
|
||||
return std::isnan(value) ? 0 : static_cast<uint64_t>(value);
|
||||
}
|
||||
|
||||
|
||||
@ -3511,7 +3869,7 @@ LogicVRegister Simulator::FN(VectorFormat vform, \
|
||||
T result; \
|
||||
if (PROCNAN) { \
|
||||
result = FPProcessNaNs(op1, op2); \
|
||||
if (!isnan(result)) { \
|
||||
if (!std::isnan(result)) { \
|
||||
result = OP(op1, op2); \
|
||||
} \
|
||||
} else { \
|
||||
@ -3558,7 +3916,7 @@ LogicVRegister Simulator::frecps(VectorFormat vform,
|
||||
T op1 = -src1.Float<T>(i);
|
||||
T op2 = src2.Float<T>(i);
|
||||
T result = FPProcessNaNs(op1, op2);
|
||||
dst.SetFloat(i, isnan(result) ? result : FPRecipStepFused(op1, op2));
|
||||
dst.SetFloat(i, std::isnan(result) ? result : FPRecipStepFused(op1, op2));
|
||||
}
|
||||
return dst;
|
||||
}
|
||||
@ -3588,7 +3946,7 @@ LogicVRegister Simulator::frsqrts(VectorFormat vform,
|
||||
T op1 = -src1.Float<T>(i);
|
||||
T op2 = src2.Float<T>(i);
|
||||
T result = FPProcessNaNs(op1, op2);
|
||||
dst.SetFloat(i, isnan(result) ? result : FPRSqrtStepFused(op1, op2));
|
||||
dst.SetFloat(i, std::isnan(result) ? result : FPRSqrtStepFused(op1, op2));
|
||||
}
|
||||
return dst;
|
||||
}
|
||||
@ -3620,7 +3978,7 @@ LogicVRegister Simulator::fcmp(VectorFormat vform,
|
||||
T op1 = src1.Float<T>(i);
|
||||
T op2 = src2.Float<T>(i);
|
||||
T nan_result = FPProcessNaNs(op1, op2);
|
||||
if (!isnan(nan_result)) {
|
||||
if (!std::isnan(nan_result)) {
|
||||
switch (cond) {
|
||||
case eq: result = (op1 == op2); break;
|
||||
case ge: result = (op1 >= op2); break;
|
||||
@ -4001,7 +4359,7 @@ LogicVRegister Simulator::frint(VectorFormat vform,
|
||||
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
|
||||
float input = src.Float<float>(i);
|
||||
float rounded = FPRoundInt(input, rounding_mode);
|
||||
if (inexact_exception && !isnan(input) && (input != rounded)) {
|
||||
if (inexact_exception && !std::isnan(input) && (input != rounded)) {
|
||||
FPProcessException();
|
||||
}
|
||||
dst.SetFloat<float>(i, rounded);
|
||||
@ -4011,7 +4369,7 @@ LogicVRegister Simulator::frint(VectorFormat vform,
|
||||
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
|
||||
double input = src.Float<double>(i);
|
||||
double rounded = FPRoundInt(input, rounding_mode);
|
||||
if (inexact_exception && !isnan(input) && (input != rounded)) {
|
||||
if (inexact_exception && !std::isnan(input) && (input != rounded)) {
|
||||
FPProcessException();
|
||||
}
|
||||
dst.SetFloat<double>(i, rounded);
|
||||
@ -4029,13 +4387,13 @@ LogicVRegister Simulator::fcvts(VectorFormat vform,
|
||||
dst.ClearForWrite(vform);
|
||||
if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
|
||||
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
|
||||
float op = src.Float<float>(i) * powf(2.0f, fbits);
|
||||
float op = src.Float<float>(i) * std::pow(2.0f, fbits);
|
||||
dst.SetInt(vform, i, FPToInt32(op, rounding_mode));
|
||||
}
|
||||
} else {
|
||||
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
|
||||
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
|
||||
double op = src.Float<double>(i) * pow(2.0, fbits);
|
||||
double op = src.Float<double>(i) * std::pow(2.0, fbits);
|
||||
dst.SetInt(vform, i, FPToInt64(op, rounding_mode));
|
||||
}
|
||||
}
|
||||
@ -4051,13 +4409,13 @@ LogicVRegister Simulator::fcvtu(VectorFormat vform,
|
||||
dst.ClearForWrite(vform);
|
||||
if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
|
||||
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
|
||||
float op = src.Float<float>(i) * powf(2.0f, fbits);
|
||||
float op = src.Float<float>(i) * std::pow(2.0f, fbits);
|
||||
dst.SetUint(vform, i, FPToUInt32(op, rounding_mode));
|
||||
}
|
||||
} else {
|
||||
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
|
||||
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
|
||||
double op = src.Float<double>(i) * pow(2.0, fbits);
|
||||
double op = src.Float<double>(i) * std::pow(2.0, fbits);
|
||||
dst.SetUint(vform, i, FPToUInt64(op, rounding_mode));
|
||||
}
|
||||
}
|
||||
@ -4182,7 +4540,7 @@ static inline uint64_t Bits(uint64_t val, int start_bit, int end_bit) {
|
||||
|
||||
template <typename T>
|
||||
T Simulator::FPRecipSqrtEstimate(T op) {
|
||||
if (isnan(op)) {
|
||||
if (std::isnan(op)) {
|
||||
return FPProcessNaN(op);
|
||||
} else if (op == 0.0) {
|
||||
if (copysign(1.0, op) < 0.0) {
|
||||
@ -4193,7 +4551,7 @@ T Simulator::FPRecipSqrtEstimate(T op) {
|
||||
} else if (copysign(1.0, op) < 0.0) {
|
||||
FPProcessException();
|
||||
return FPDefaultNaN<T>();
|
||||
} else if (isinf(op)) {
|
||||
} else if (std::isinf(op)) {
|
||||
return 0.0;
|
||||
} else {
|
||||
uint64_t fraction;
|
||||
@ -4271,17 +4629,17 @@ T Simulator::FPRecipEstimate(T op, FPRounding rounding) {
|
||||
sign = double_sign(op);
|
||||
}
|
||||
|
||||
if (isnan(op)) {
|
||||
if (std::isnan(op)) {
|
||||
return FPProcessNaN(op);
|
||||
} else if (isinf(op)) {
|
||||
} else if (std::isinf(op)) {
|
||||
return (sign == 1) ? -0.0 : 0.0;
|
||||
} else if (op == 0.0) {
|
||||
FPProcessException(); // FPExc_DivideByZero exception.
|
||||
return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
|
||||
} else if (((sizeof(T) == sizeof(float)) && // NOLINT(runtime/sizeof)
|
||||
(fabsf(op) < pow(2.0, -128))) ||
|
||||
(std::fabs(op) < std::pow(2.0, -128.0))) ||
|
||||
((sizeof(T) == sizeof(double)) && // NOLINT(runtime/sizeof)
|
||||
(fabs(op) < pow(2.0, -1024)))) {
|
||||
(std::fabs(op) < std::pow(2.0, -1024.0)))) {
|
||||
bool overflow_to_inf = false;
|
||||
switch (rounding) {
|
||||
case FPTieEven: overflow_to_inf = true; break;
|
||||
@ -4338,9 +4696,9 @@ T Simulator::FPRecipEstimate(T op, FPRounding rounding) {
|
||||
|
||||
fraction = double_mantissa(estimate);
|
||||
if (result_exp == 0) {
|
||||
fraction = (1L << 51) | Bits(fraction, 51, 1);
|
||||
fraction = (UINT64_C(1) << 51) | Bits(fraction, 51, 1);
|
||||
} else if (result_exp == -1) {
|
||||
fraction = (1L << 50) | Bits(fraction, 51, 2);
|
||||
fraction = (UINT64_C(1) << 50) | Bits(fraction, 51, 2);
|
||||
result_exp = 0;
|
||||
}
|
||||
if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof)
|
||||
@ -4384,8 +4742,8 @@ LogicVRegister Simulator::ursqrte(VectorFormat vform,
|
||||
if (operand <= 0x3FFFFFFF) {
|
||||
result = 0xFFFFFFFF;
|
||||
} else {
|
||||
dp_operand = operand * pow(2.0, -32);
|
||||
dp_result = recip_sqrt_estimate(dp_operand) * pow(2.0, 31);
|
||||
dp_operand = operand * std::pow(2.0, -32);
|
||||
dp_result = recip_sqrt_estimate(dp_operand) * std::pow(2.0, 31);
|
||||
result = static_cast<uint32_t>(dp_result);
|
||||
}
|
||||
dst.SetUint(vform, i, result);
|
||||
@ -4416,8 +4774,8 @@ LogicVRegister Simulator::urecpe(VectorFormat vform,
|
||||
if (operand <= 0x7FFFFFFF) {
|
||||
result = 0xFFFFFFFF;
|
||||
} else {
|
||||
dp_operand = operand * pow(2.0, -32);
|
||||
dp_result = recip_estimate(dp_operand) * pow(2.0, 31);
|
||||
dp_operand = operand * std::pow(2.0, -32);
|
||||
dp_result = recip_estimate(dp_operand) * std::pow(2.0, 31);
|
||||
result = static_cast<uint32_t>(dp_result);
|
||||
}
|
||||
dst.SetUint(vform, i, result);
|
||||
@ -4433,7 +4791,7 @@ LogicVRegister Simulator::frecpx(VectorFormat vform,
|
||||
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
|
||||
T op = src.Float<T>(i);
|
||||
T result;
|
||||
if (isnan(op)) {
|
||||
if (std::isnan(op)) {
|
||||
result = FPProcessNaN(op);
|
||||
} else {
|
||||
int exp;
|
@ -24,7 +24,7 @@
|
||||
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#include "a64/macro-assembler-a64.h"
|
||||
#include "vixl/a64/macro-assembler-a64.h"
|
||||
|
||||
namespace vixl {
|
||||
|
||||
@ -43,8 +43,8 @@ void Pool::SetNextCheckpoint(ptrdiff_t checkpoint) {
|
||||
}
|
||||
|
||||
|
||||
LiteralPool::LiteralPool(MacroAssembler* masm) :
|
||||
Pool(masm), size_(0), first_use_(-1) {
|
||||
LiteralPool::LiteralPool(MacroAssembler* masm)
|
||||
: Pool(masm), size_(0), first_use_(-1) {
|
||||
}
|
||||
|
||||
|
||||
@ -718,11 +718,13 @@ void MacroAssembler::LogicalMacro(const Register& rd,
|
||||
case AND:
|
||||
Mov(rd, 0);
|
||||
return;
|
||||
case ORR: // Fall through.
|
||||
case ORR:
|
||||
VIXL_FALLTHROUGH();
|
||||
case EOR:
|
||||
Mov(rd, rn);
|
||||
return;
|
||||
case ANDS: // Fall through.
|
||||
case ANDS:
|
||||
VIXL_FALLTHROUGH();
|
||||
case BICS:
|
||||
break;
|
||||
default:
|
||||
@ -740,7 +742,8 @@ void MacroAssembler::LogicalMacro(const Register& rd,
|
||||
case EOR:
|
||||
Mvn(rd, rn);
|
||||
return;
|
||||
case ANDS: // Fall through.
|
||||
case ANDS:
|
||||
VIXL_FALLTHROUGH();
|
||||
case BICS:
|
||||
break;
|
||||
default:
|
||||
@ -1131,13 +1134,14 @@ void MacroAssembler::Csel(const Register& rd,
|
||||
|
||||
void MacroAssembler::Add(const Register& rd,
|
||||
const Register& rn,
|
||||
const Operand& operand) {
|
||||
const Operand& operand,
|
||||
FlagsUpdate S) {
|
||||
VIXL_ASSERT(allow_macro_instructions_);
|
||||
if (operand.IsImmediate() && (operand.immediate() < 0) &&
|
||||
IsImmAddSub(-operand.immediate())) {
|
||||
AddSubMacro(rd, rn, -operand.immediate(), LeaveFlags, SUB);
|
||||
AddSubMacro(rd, rn, -operand.immediate(), S, SUB);
|
||||
} else {
|
||||
AddSubMacro(rd, rn, operand, LeaveFlags, ADD);
|
||||
AddSubMacro(rd, rn, operand, S, ADD);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1145,25 +1149,20 @@ void MacroAssembler::Add(const Register& rd,
|
||||
void MacroAssembler::Adds(const Register& rd,
|
||||
const Register& rn,
|
||||
const Operand& operand) {
|
||||
VIXL_ASSERT(allow_macro_instructions_);
|
||||
if (operand.IsImmediate() && (operand.immediate() < 0) &&
|
||||
IsImmAddSub(-operand.immediate())) {
|
||||
AddSubMacro(rd, rn, -operand.immediate(), SetFlags, SUB);
|
||||
} else {
|
||||
AddSubMacro(rd, rn, operand, SetFlags, ADD);
|
||||
}
|
||||
Add(rd, rn, operand, SetFlags);
|
||||
}
|
||||
|
||||
|
||||
void MacroAssembler::Sub(const Register& rd,
|
||||
const Register& rn,
|
||||
const Operand& operand) {
|
||||
const Operand& operand,
|
||||
FlagsUpdate S) {
|
||||
VIXL_ASSERT(allow_macro_instructions_);
|
||||
if (operand.IsImmediate() && (operand.immediate() < 0) &&
|
||||
IsImmAddSub(-operand.immediate())) {
|
||||
AddSubMacro(rd, rn, -operand.immediate(), LeaveFlags, ADD);
|
||||
AddSubMacro(rd, rn, -operand.immediate(), S, ADD);
|
||||
} else {
|
||||
AddSubMacro(rd, rn, operand, LeaveFlags, SUB);
|
||||
AddSubMacro(rd, rn, operand, S, SUB);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1171,13 +1170,7 @@ void MacroAssembler::Sub(const Register& rd,
|
||||
void MacroAssembler::Subs(const Register& rd,
|
||||
const Register& rn,
|
||||
const Operand& operand) {
|
||||
VIXL_ASSERT(allow_macro_instructions_);
|
||||
if (operand.IsImmediate() && (operand.immediate() < 0) &&
|
||||
IsImmAddSub(-operand.immediate())) {
|
||||
AddSubMacro(rd, rn, -operand.immediate(), SetFlags, ADD);
|
||||
} else {
|
||||
AddSubMacro(rd, rn, operand, SetFlags, SUB);
|
||||
}
|
||||
Sub(rd, rn, operand, SetFlags);
|
||||
}
|
||||
|
||||
|
||||
@ -1193,23 +1186,29 @@ void MacroAssembler::Cmp(const Register& rn, const Operand& operand) {
|
||||
}
|
||||
|
||||
|
||||
void MacroAssembler::Fcmp(const FPRegister& fn, double value) {
|
||||
void MacroAssembler::Fcmp(const FPRegister& fn, double value,
|
||||
FPTrapFlags trap) {
|
||||
VIXL_ASSERT(allow_macro_instructions_);
|
||||
// The worst case for size is:
|
||||
// * 1 to materialise the constant, using literal pool if necessary
|
||||
// * 1 instruction for fcmp
|
||||
// * 1 instruction for fcmp{e}
|
||||
MacroEmissionCheckScope guard(this);
|
||||
if (value != 0.0) {
|
||||
UseScratchRegisterScope temps(this);
|
||||
FPRegister tmp = temps.AcquireSameSizeAs(fn);
|
||||
Fmov(tmp, value);
|
||||
fcmp(fn, tmp);
|
||||
FPCompareMacro(fn, tmp, trap);
|
||||
} else {
|
||||
fcmp(fn, value);
|
||||
FPCompareMacro(fn, value, trap);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void MacroAssembler::Fcmpe(const FPRegister& fn, double value) {
|
||||
Fcmp(fn, value, EnableTrap);
|
||||
}
|
||||
|
||||
|
||||
void MacroAssembler::Fmov(VRegister vd, double imm) {
|
||||
VIXL_ASSERT(allow_macro_instructions_);
|
||||
// Floating point immediates are loaded through the literal pool.
|
||||
@ -1637,41 +1636,67 @@ void MacroAssembler::Pop(const CPURegister& dst0, const CPURegister& dst1,
|
||||
|
||||
|
||||
void MacroAssembler::PushCPURegList(CPURegList registers) {
|
||||
int size = registers.RegisterSizeInBytes();
|
||||
|
||||
PrepareForPush(registers.Count(), size);
|
||||
// Push up to four registers at a time because if the current stack pointer is
|
||||
// sp and reg_size is 32, registers must be pushed in blocks of four in order
|
||||
// to maintain the 16-byte alignment for sp.
|
||||
VIXL_ASSERT(!registers.Overlaps(*TmpList()));
|
||||
VIXL_ASSERT(!registers.Overlaps(*FPTmpList()));
|
||||
VIXL_ASSERT(allow_macro_instructions_);
|
||||
|
||||
int reg_size = registers.RegisterSizeInBytes();
|
||||
PrepareForPush(registers.Count(), reg_size);
|
||||
|
||||
// Bump the stack pointer and store two registers at the bottom.
|
||||
int size = registers.TotalSizeInBytes();
|
||||
const CPURegister& bottom_0 = registers.PopLowestIndex();
|
||||
const CPURegister& bottom_1 = registers.PopLowestIndex();
|
||||
if (bottom_0.IsValid() && bottom_1.IsValid()) {
|
||||
Stp(bottom_0, bottom_1, MemOperand(StackPointer(), -size, PreIndex));
|
||||
} else if (bottom_0.IsValid()) {
|
||||
Str(bottom_0, MemOperand(StackPointer(), -size, PreIndex));
|
||||
}
|
||||
|
||||
int offset = 2 * reg_size;
|
||||
while (!registers.IsEmpty()) {
|
||||
int count_before = registers.Count();
|
||||
const CPURegister& src0 = registers.PopHighestIndex();
|
||||
const CPURegister& src1 = registers.PopHighestIndex();
|
||||
const CPURegister& src2 = registers.PopHighestIndex();
|
||||
const CPURegister& src3 = registers.PopHighestIndex();
|
||||
int count = count_before - registers.Count();
|
||||
PushHelper(count, size, src0, src1, src2, src3);
|
||||
const CPURegister& src0 = registers.PopLowestIndex();
|
||||
const CPURegister& src1 = registers.PopLowestIndex();
|
||||
if (src1.IsValid()) {
|
||||
Stp(src0, src1, MemOperand(StackPointer(), offset));
|
||||
} else {
|
||||
Str(src0, MemOperand(StackPointer(), offset));
|
||||
}
|
||||
offset += 2 * reg_size;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void MacroAssembler::PopCPURegList(CPURegList registers) {
|
||||
int size = registers.RegisterSizeInBytes();
|
||||
|
||||
PrepareForPop(registers.Count(), size);
|
||||
// Pop up to four registers at a time because if the current stack pointer is
|
||||
// sp and reg_size is 32, registers must be pushed in blocks of four in order
|
||||
// to maintain the 16-byte alignment for sp.
|
||||
VIXL_ASSERT(!registers.Overlaps(*TmpList()));
|
||||
VIXL_ASSERT(!registers.Overlaps(*FPTmpList()));
|
||||
VIXL_ASSERT(allow_macro_instructions_);
|
||||
|
||||
int reg_size = registers.RegisterSizeInBytes();
|
||||
PrepareForPop(registers.Count(), reg_size);
|
||||
|
||||
|
||||
int size = registers.TotalSizeInBytes();
|
||||
const CPURegister& bottom_0 = registers.PopLowestIndex();
|
||||
const CPURegister& bottom_1 = registers.PopLowestIndex();
|
||||
|
||||
int offset = 2 * reg_size;
|
||||
while (!registers.IsEmpty()) {
|
||||
int count_before = registers.Count();
|
||||
const CPURegister& dst0 = registers.PopLowestIndex();
|
||||
const CPURegister& dst1 = registers.PopLowestIndex();
|
||||
const CPURegister& dst2 = registers.PopLowestIndex();
|
||||
const CPURegister& dst3 = registers.PopLowestIndex();
|
||||
int count = count_before - registers.Count();
|
||||
PopHelper(count, size, dst0, dst1, dst2, dst3);
|
||||
if (dst1.IsValid()) {
|
||||
Ldp(dst0, dst1, MemOperand(StackPointer(), offset));
|
||||
} else {
|
||||
Ldr(dst0, MemOperand(StackPointer(), offset));
|
||||
}
|
||||
offset += 2 * reg_size;
|
||||
}
|
||||
|
||||
// Load the two registers at the bottom and drop the stack pointer.
|
||||
if (bottom_0.IsValid() && bottom_1.IsValid()) {
|
||||
Ldp(bottom_0, bottom_1, MemOperand(StackPointer(), size, PostIndex));
|
||||
} else if (bottom_0.IsValid()) {
|
||||
Ldr(bottom_0, MemOperand(StackPointer(), size, PostIndex));
|
||||
}
|
||||
}
|
||||
|
||||
@ -1831,42 +1856,6 @@ void MacroAssembler::Peek(const Register& dst, const Operand& offset) {
|
||||
}
|
||||
|
||||
|
||||
void MacroAssembler::PeekCPURegList(CPURegList registers, int offset) {
|
||||
VIXL_ASSERT(!registers.IncludesAliasOf(StackPointer()));
|
||||
VIXL_ASSERT(offset >= 0);
|
||||
int size = registers.RegisterSizeInBytes();
|
||||
|
||||
while (registers.Count() >= 2) {
|
||||
const CPURegister& dst0 = registers.PopLowestIndex();
|
||||
const CPURegister& dst1 = registers.PopLowestIndex();
|
||||
Ldp(dst0, dst1, MemOperand(StackPointer(), offset));
|
||||
offset += 2 * size;
|
||||
}
|
||||
if (!registers.IsEmpty()) {
|
||||
Ldr(registers.PopLowestIndex(),
|
||||
MemOperand(StackPointer(), offset));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void MacroAssembler::PokeCPURegList(CPURegList registers, int offset) {
|
||||
VIXL_ASSERT(!registers.IncludesAliasOf(StackPointer()));
|
||||
VIXL_ASSERT(offset >= 0);
|
||||
int size = registers.RegisterSizeInBytes();
|
||||
|
||||
while (registers.Count() >= 2) {
|
||||
const CPURegister& dst0 = registers.PopLowestIndex();
|
||||
const CPURegister& dst1 = registers.PopLowestIndex();
|
||||
Stp(dst0, dst1, MemOperand(StackPointer(), offset));
|
||||
offset += 2 * size;
|
||||
}
|
||||
if (!registers.IsEmpty()) {
|
||||
Str(registers.PopLowestIndex(),
|
||||
MemOperand(StackPointer(), offset));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void MacroAssembler::Claim(const Operand& size) {
|
||||
VIXL_ASSERT(allow_macro_instructions_);
|
||||
|
||||
@ -1956,6 +1945,80 @@ void MacroAssembler::PopCalleeSavedRegisters() {
|
||||
ldp(x29, x30, tos);
|
||||
}
|
||||
|
||||
void MacroAssembler::LoadCPURegList(CPURegList registers,
|
||||
const MemOperand& src) {
|
||||
LoadStoreCPURegListHelper(kLoad, registers, src);
|
||||
}
|
||||
|
||||
void MacroAssembler::StoreCPURegList(CPURegList registers,
|
||||
const MemOperand& dst) {
|
||||
LoadStoreCPURegListHelper(kStore, registers, dst);
|
||||
}
|
||||
|
||||
|
||||
void MacroAssembler::LoadStoreCPURegListHelper(LoadStoreCPURegListAction op,
|
||||
CPURegList registers,
|
||||
const MemOperand& mem) {
|
||||
// We do not handle pre-indexing or post-indexing.
|
||||
VIXL_ASSERT(!(mem.IsPreIndex() || mem.IsPostIndex()));
|
||||
VIXL_ASSERT(!registers.Overlaps(tmp_list_));
|
||||
VIXL_ASSERT(!registers.Overlaps(fptmp_list_));
|
||||
VIXL_ASSERT(!registers.IncludesAliasOf(sp));
|
||||
|
||||
UseScratchRegisterScope temps(this);
|
||||
|
||||
MemOperand loc = BaseMemOperandForLoadStoreCPURegList(registers,
|
||||
mem,
|
||||
&temps);
|
||||
|
||||
while (registers.Count() >= 2) {
|
||||
const CPURegister& dst0 = registers.PopLowestIndex();
|
||||
const CPURegister& dst1 = registers.PopLowestIndex();
|
||||
if (op == kStore) {
|
||||
Stp(dst0, dst1, loc);
|
||||
} else {
|
||||
VIXL_ASSERT(op == kLoad);
|
||||
Ldp(dst0, dst1, loc);
|
||||
}
|
||||
loc.AddOffset(2 * registers.RegisterSizeInBytes());
|
||||
}
|
||||
if (!registers.IsEmpty()) {
|
||||
if (op == kStore) {
|
||||
Str(registers.PopLowestIndex(), loc);
|
||||
} else {
|
||||
VIXL_ASSERT(op == kLoad);
|
||||
Ldr(registers.PopLowestIndex(), loc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
MemOperand MacroAssembler::BaseMemOperandForLoadStoreCPURegList(
|
||||
const CPURegList& registers,
|
||||
const MemOperand& mem,
|
||||
UseScratchRegisterScope* scratch_scope) {
|
||||
// If necessary, pre-compute the base address for the accesses.
|
||||
if (mem.IsRegisterOffset()) {
|
||||
Register reg_base = scratch_scope->AcquireX();
|
||||
ComputeAddress(reg_base, mem);
|
||||
return MemOperand(reg_base);
|
||||
|
||||
} else if (mem.IsImmediateOffset()) {
|
||||
int reg_size = registers.RegisterSizeInBytes();
|
||||
int total_size = registers.TotalSizeInBytes();
|
||||
int64_t min_offset = mem.offset();
|
||||
int64_t max_offset = mem.offset() + std::max(0, total_size - 2 * reg_size);
|
||||
if ((registers.Count() >= 2) &&
|
||||
(!Assembler::IsImmLSPair(min_offset, WhichPowerOf2(reg_size)) ||
|
||||
!Assembler::IsImmLSPair(max_offset, WhichPowerOf2(reg_size)))) {
|
||||
Register reg_base = scratch_scope->AcquireX();
|
||||
ComputeAddress(reg_base, mem);
|
||||
return MemOperand(reg_base);
|
||||
}
|
||||
}
|
||||
|
||||
return mem;
|
||||
}
|
||||
|
||||
void MacroAssembler::BumpSystemStackPointer(const Operand& space) {
|
||||
VIXL_ASSERT(!sp.Is(StackPointer()));
|
||||
// TODO: Several callers rely on this not using scratch registers, so we use
|
@ -30,9 +30,9 @@
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
|
||||
#include "globals.h"
|
||||
#include "a64/assembler-a64.h"
|
||||
#include "a64/debugger-a64.h"
|
||||
#include "vixl/globals.h"
|
||||
#include "vixl/a64/assembler-a64.h"
|
||||
#include "vixl/a64/debugger-a64.h"
|
||||
|
||||
|
||||
#define LS_MACRO_LIST(V) \
|
||||
@ -56,6 +56,7 @@ namespace vixl {
|
||||
|
||||
// Forward declaration
|
||||
class MacroAssembler;
|
||||
class UseScratchRegisterScope;
|
||||
|
||||
class Pool {
|
||||
public:
|
||||
@ -631,13 +632,15 @@ class MacroAssembler : public Assembler {
|
||||
// Add and sub macros.
|
||||
void Add(const Register& rd,
|
||||
const Register& rn,
|
||||
const Operand& operand);
|
||||
const Operand& operand,
|
||||
FlagsUpdate S = LeaveFlags);
|
||||
void Adds(const Register& rd,
|
||||
const Register& rn,
|
||||
const Operand& operand);
|
||||
void Sub(const Register& rd,
|
||||
const Register& rn,
|
||||
const Operand& operand);
|
||||
const Operand& operand,
|
||||
FlagsUpdate S = LeaveFlags);
|
||||
void Subs(const Register& rd,
|
||||
const Register& rn,
|
||||
const Operand& operand);
|
||||
@ -844,39 +847,43 @@ class MacroAssembler : public Assembler {
|
||||
// supported.
|
||||
//
|
||||
// Otherwise, (Peek|Poke)(CPU|X|W|D|S)RegList is preferred.
|
||||
void PeekCPURegList(CPURegList registers, int offset);
|
||||
void PokeCPURegList(CPURegList registers, int offset);
|
||||
void PeekCPURegList(CPURegList registers, int64_t offset) {
|
||||
LoadCPURegList(registers, MemOperand(StackPointer(), offset));
|
||||
}
|
||||
void PokeCPURegList(CPURegList registers, int64_t offset) {
|
||||
StoreCPURegList(registers, MemOperand(StackPointer(), offset));
|
||||
}
|
||||
|
||||
void PeekSizeRegList(RegList registers, int offset, unsigned reg_size,
|
||||
void PeekSizeRegList(RegList registers, int64_t offset, unsigned reg_size,
|
||||
CPURegister::RegisterType type = CPURegister::kRegister) {
|
||||
PeekCPURegList(CPURegList(type, reg_size, registers), offset);
|
||||
}
|
||||
void PokeSizeRegList(RegList registers, int offset, unsigned reg_size,
|
||||
void PokeSizeRegList(RegList registers, int64_t offset, unsigned reg_size,
|
||||
CPURegister::RegisterType type = CPURegister::kRegister) {
|
||||
PokeCPURegList(CPURegList(type, reg_size, registers), offset);
|
||||
}
|
||||
void PeekXRegList(RegList regs, int offset) {
|
||||
void PeekXRegList(RegList regs, int64_t offset) {
|
||||
PeekSizeRegList(regs, offset, kXRegSize);
|
||||
}
|
||||
void PokeXRegList(RegList regs, int offset) {
|
||||
void PokeXRegList(RegList regs, int64_t offset) {
|
||||
PokeSizeRegList(regs, offset, kXRegSize);
|
||||
}
|
||||
void PeekWRegList(RegList regs, int offset) {
|
||||
void PeekWRegList(RegList regs, int64_t offset) {
|
||||
PeekSizeRegList(regs, offset, kWRegSize);
|
||||
}
|
||||
void PokeWRegList(RegList regs, int offset) {
|
||||
void PokeWRegList(RegList regs, int64_t offset) {
|
||||
PokeSizeRegList(regs, offset, kWRegSize);
|
||||
}
|
||||
void PeekDRegList(RegList regs, int offset) {
|
||||
void PeekDRegList(RegList regs, int64_t offset) {
|
||||
PeekSizeRegList(regs, offset, kDRegSize, CPURegister::kVRegister);
|
||||
}
|
||||
void PokeDRegList(RegList regs, int offset) {
|
||||
void PokeDRegList(RegList regs, int64_t offset) {
|
||||
PokeSizeRegList(regs, offset, kDRegSize, CPURegister::kVRegister);
|
||||
}
|
||||
void PeekSRegList(RegList regs, int offset) {
|
||||
void PeekSRegList(RegList regs, int64_t offset) {
|
||||
PeekSizeRegList(regs, offset, kSRegSize, CPURegister::kVRegister);
|
||||
}
|
||||
void PokeSRegList(RegList regs, int offset) {
|
||||
void PokeSRegList(RegList regs, int64_t offset) {
|
||||
PokeSizeRegList(regs, offset, kSRegSize, CPURegister::kVRegister);
|
||||
}
|
||||
|
||||
@ -911,6 +918,9 @@ class MacroAssembler : public Assembler {
|
||||
// aligned to 16 bytes.
|
||||
void PopCalleeSavedRegisters();
|
||||
|
||||
void LoadCPURegList(CPURegList registers, const MemOperand& src);
|
||||
void StoreCPURegList(CPURegList registers, const MemOperand& dst);
|
||||
|
||||
// Remaining instructions are simple pass-through calls to the assembler.
|
||||
void Adr(const Register& rd, Label* label) {
|
||||
VIXL_ASSERT(allow_macro_instructions_);
|
||||
@ -1135,18 +1145,31 @@ class MacroAssembler : public Assembler {
|
||||
void Fccmp(const VRegister& vn,
|
||||
const VRegister& vm,
|
||||
StatusFlags nzcv,
|
||||
Condition cond) {
|
||||
Condition cond,
|
||||
FPTrapFlags trap = DisableTrap) {
|
||||
VIXL_ASSERT(allow_macro_instructions_);
|
||||
VIXL_ASSERT((cond != al) && (cond != nv));
|
||||
SingleEmissionCheckScope guard(this);
|
||||
fccmp(vn, vm, nzcv, cond);
|
||||
FPCCompareMacro(vn, vm, nzcv, cond, trap);
|
||||
}
|
||||
void Fcmp(const VRegister& vn, const VRegister& vm) {
|
||||
void Fccmpe(const VRegister& vn,
|
||||
const VRegister& vm,
|
||||
StatusFlags nzcv,
|
||||
Condition cond) {
|
||||
Fccmp(vn, vm, nzcv, cond, EnableTrap);
|
||||
}
|
||||
void Fcmp(const VRegister& vn, const VRegister& vm,
|
||||
FPTrapFlags trap = DisableTrap) {
|
||||
VIXL_ASSERT(allow_macro_instructions_);
|
||||
SingleEmissionCheckScope guard(this);
|
||||
fcmp(vn, vm);
|
||||
FPCompareMacro(vn, vm, trap);
|
||||
}
|
||||
void Fcmp(const VRegister& vn, double value,
|
||||
FPTrapFlags trap = DisableTrap);
|
||||
void Fcmpe(const VRegister& vn, double value);
|
||||
void Fcmpe(const VRegister& vn, const VRegister& vm) {
|
||||
Fcmp(vn, vm, EnableTrap);
|
||||
}
|
||||
void Fcmp(const VRegister& vn, double value);
|
||||
void Fcsel(const VRegister& vd,
|
||||
const VRegister& vn,
|
||||
const VRegister& vm,
|
||||
@ -2000,6 +2023,14 @@ class MacroAssembler : public Assembler {
|
||||
SingleEmissionCheckScope guard(this);
|
||||
umull(rd, rn, rm);
|
||||
}
|
||||
void Umulh(const Register& xd, const Register& xn, const Register& xm) {
|
||||
VIXL_ASSERT(allow_macro_instructions_);
|
||||
VIXL_ASSERT(!xd.IsZero());
|
||||
VIXL_ASSERT(!xn.IsZero());
|
||||
VIXL_ASSERT(!xm.IsZero());
|
||||
SingleEmissionCheckScope guard(this);
|
||||
umulh(xd, xn, xm);
|
||||
}
|
||||
void Umsubl(const Register& rd,
|
||||
const Register& rn,
|
||||
const Register& rm,
|
||||
@ -2989,6 +3020,23 @@ class MacroAssembler : public Assembler {
|
||||
void PrepareForPush(int count, int size);
|
||||
void PrepareForPop(int count, int size);
|
||||
|
||||
// The actual implementation of load and store operations for CPURegList.
|
||||
enum LoadStoreCPURegListAction {
|
||||
kLoad,
|
||||
kStore
|
||||
};
|
||||
void LoadStoreCPURegListHelper(LoadStoreCPURegListAction operation,
|
||||
CPURegList registers,
|
||||
const MemOperand& mem);
|
||||
// Returns a MemOperand suitable for loading or storing a CPURegList at `dst`.
|
||||
// This helper may allocate registers from `scratch_scope` and generate code
|
||||
// to compute an intermediate address. The resulting MemOperand is only valid
|
||||
// as long as `scratch_scope` remains valid.
|
||||
MemOperand BaseMemOperandForLoadStoreCPURegList(
|
||||
const CPURegList& registers,
|
||||
const MemOperand& mem,
|
||||
UseScratchRegisterScope* scratch_scope);
|
||||
|
||||
bool LabelIsOutOfRange(Label* label, ImmBranchType branch_type) {
|
||||
return !Instruction::IsValidImmPCOffset(branch_type,
|
||||
label->location() - CursorOffset());
|
@ -27,8 +27,8 @@
|
||||
#ifdef USE_SIMULATOR
|
||||
|
||||
#include <string.h>
|
||||
#include <math.h>
|
||||
#include "a64/simulator-a64.h"
|
||||
#include <cmath>
|
||||
#include "vixl/a64/simulator-a64.h"
|
||||
|
||||
namespace vixl {
|
||||
|
||||
@ -396,23 +396,18 @@ int64_t Simulator::ExtendValue(unsigned reg_size,
|
||||
}
|
||||
|
||||
|
||||
template<> double Simulator::FPDefaultNaN<double>() const {
|
||||
return kFP64DefaultNaN;
|
||||
}
|
||||
|
||||
|
||||
template<> float Simulator::FPDefaultNaN<float>() const {
|
||||
return kFP32DefaultNaN;
|
||||
}
|
||||
|
||||
|
||||
void Simulator::FPCompare(double val0, double val1) {
|
||||
void Simulator::FPCompare(double val0, double val1, FPTrapFlags trap) {
|
||||
AssertSupportedFPCR();
|
||||
|
||||
// TODO: This assumes that the C++ implementation handles comparisons in the
|
||||
// way that we expect (as per AssertSupportedFPCR()).
|
||||
if ((isnan(val0) != 0) || (isnan(val1) != 0)) {
|
||||
bool process_exception = false;
|
||||
if ((std::isnan(val0) != 0) || (std::isnan(val1) != 0)) {
|
||||
nzcv().SetRawValue(FPUnorderedFlag);
|
||||
if (IsSignallingNaN(val0) || IsSignallingNaN(val1) ||
|
||||
(trap == EnableTrap)) {
|
||||
process_exception = true;
|
||||
}
|
||||
} else if (val0 < val1) {
|
||||
nzcv().SetRawValue(FPLessThanFlag);
|
||||
} else if (val0 > val1) {
|
||||
@ -423,6 +418,7 @@ void Simulator::FPCompare(double val0, double val1) {
|
||||
VIXL_UNREACHABLE();
|
||||
}
|
||||
LogSystemRegister(NZCV);
|
||||
if (process_exception) FPProcessException();
|
||||
}
|
||||
|
||||
|
||||
@ -440,7 +436,7 @@ Simulator::PrintRegisterFormat Simulator::GetPrintRegisterFormatForSize(
|
||||
}
|
||||
|
||||
switch (lane_size) {
|
||||
default: VIXL_UNREACHABLE();
|
||||
default: VIXL_UNREACHABLE(); break;
|
||||
case kQRegSizeInBytes: format |= kPrintReg1Q; break;
|
||||
case kDRegSizeInBytes: format |= kPrintReg1D; break;
|
||||
case kSRegSizeInBytes: format |= kPrintReg1S; break;
|
||||
@ -460,7 +456,7 @@ Simulator::PrintRegisterFormat Simulator::GetPrintRegisterFormatForSize(
|
||||
Simulator::PrintRegisterFormat Simulator::GetPrintRegisterFormat(
|
||||
VectorFormat vform) {
|
||||
switch (vform) {
|
||||
default: VIXL_UNREACHABLE();
|
||||
default: VIXL_UNREACHABLE(); return kPrintReg16B;
|
||||
case kFormat16B: return kPrintReg16B;
|
||||
case kFormat8B: return kPrintReg8B;
|
||||
case kFormat8H: return kPrintReg8H;
|
||||
@ -841,7 +837,7 @@ void Simulator::VisitUnconditionalBranch(const Instruction* instr) {
|
||||
switch (instr->Mask(UnconditionalBranchMask)) {
|
||||
case BL:
|
||||
set_lr(instr->NextInstruction());
|
||||
// Fall through.
|
||||
VIXL_FALLTHROUGH();
|
||||
case B:
|
||||
set_pc(instr->ImmPCOffsetTarget());
|
||||
break;
|
||||
@ -864,7 +860,7 @@ void Simulator::VisitUnconditionalBranchToRegister(const Instruction* instr) {
|
||||
switch (instr->Mask(UnconditionalBranchToRegisterMask)) {
|
||||
case BLR:
|
||||
set_lr(instr->NextInstruction());
|
||||
// Fall through.
|
||||
VIXL_FALLTHROUGH();
|
||||
case BR:
|
||||
case RET: set_pc(target); break;
|
||||
default: VIXL_UNREACHABLE();
|
||||
@ -1007,7 +1003,7 @@ void Simulator::LogicalHelper(const Instruction* instr, int64_t op2) {
|
||||
// Switch on the logical operation, stripping out the NOT bit, as it has a
|
||||
// different meaning for logical immediate instructions.
|
||||
switch (instr->Mask(LogicalOpMask & ~NOT)) {
|
||||
case ANDS: update_flags = true; // Fall through.
|
||||
case ANDS: update_flags = true; VIXL_FALLTHROUGH();
|
||||
case AND: result = op1 & op2; break;
|
||||
case ORR: result = op1 | op2; break;
|
||||
case EOR: result = op1 ^ op2; break;
|
||||
@ -1616,14 +1612,14 @@ void Simulator::VisitDataProcessing1Source(const Instruction* instr) {
|
||||
case REV_w: set_wreg(dst, ReverseBytes(wreg(src), Reverse32)); break;
|
||||
case REV32_x: set_xreg(dst, ReverseBytes(xreg(src), Reverse32)); break;
|
||||
case REV_x: set_xreg(dst, ReverseBytes(xreg(src), Reverse64)); break;
|
||||
case CLZ_w: set_wreg(dst, CountLeadingZeros(wreg(src), kWRegSize)); break;
|
||||
case CLZ_x: set_xreg(dst, CountLeadingZeros(xreg(src), kXRegSize)); break;
|
||||
case CLZ_w: set_wreg(dst, CountLeadingZeros(wreg(src))); break;
|
||||
case CLZ_x: set_xreg(dst, CountLeadingZeros(xreg(src))); break;
|
||||
case CLS_w: {
|
||||
set_wreg(dst, CountLeadingSignBits(wreg(src), kWRegSize));
|
||||
set_wreg(dst, CountLeadingSignBits(wreg(src)));
|
||||
break;
|
||||
}
|
||||
case CLS_x: {
|
||||
set_xreg(dst, CountLeadingSignBits(xreg(src), kXRegSize));
|
||||
set_xreg(dst, CountLeadingSignBits(xreg(src)));
|
||||
break;
|
||||
}
|
||||
default: VIXL_UNIMPLEMENTED();
|
||||
@ -1831,9 +1827,13 @@ void Simulator::VisitDataProcessing2Source(const Instruction* instr) {
|
||||
// The algorithm used is adapted from the one described in section 8.2 of
|
||||
// Hacker's Delight, by Henry S. Warren, Jr.
|
||||
// It assumes that a right shift on a signed integer is an arithmetic shift.
|
||||
static int64_t MultiplyHighSigned(int64_t u, int64_t v) {
|
||||
// Type T must be either uint64_t or int64_t.
|
||||
template <typename T>
|
||||
static T MultiplyHigh(T u, T v) {
|
||||
uint64_t u0, v0, w0;
|
||||
int64_t u1, v1, w1, w2, t;
|
||||
T u1, v1, w1, w2, t;
|
||||
|
||||
VIXL_ASSERT(sizeof(u) == sizeof(u0));
|
||||
|
||||
u0 = u & 0xffffffff;
|
||||
u1 = u >> 32;
|
||||
@ -1872,8 +1872,12 @@ void Simulator::VisitDataProcessing3Source(const Instruction* instr) {
|
||||
case SMSUBL_x: result = xreg(instr->Ra()) - (rn_s32 * rm_s32); break;
|
||||
case UMADDL_x: result = xreg(instr->Ra()) + (rn_u32 * rm_u32); break;
|
||||
case UMSUBL_x: result = xreg(instr->Ra()) - (rn_u32 * rm_u32); break;
|
||||
case UMULH_x:
|
||||
result = MultiplyHigh(reg<uint64_t>(instr->Rn()),
|
||||
reg<uint64_t>(instr->Rm()));
|
||||
break;
|
||||
case SMULH_x:
|
||||
result = MultiplyHighSigned(xreg(instr->Rn()), xreg(instr->Rm()));
|
||||
result = MultiplyHigh(xreg(instr->Rn()), xreg(instr->Rm()));
|
||||
break;
|
||||
default: VIXL_UNIMPLEMENTED();
|
||||
}
|
||||
@ -2112,28 +2116,28 @@ void Simulator::VisitFPFixedPointConvert(const Instruction* instr) {
|
||||
break;
|
||||
}
|
||||
case FCVTZS_xd_fixed:
|
||||
set_xreg(dst, FPToInt64(dreg(src) * pow(2.0, fbits), FPZero));
|
||||
set_xreg(dst, FPToInt64(dreg(src) * std::pow(2.0, fbits), FPZero));
|
||||
break;
|
||||
case FCVTZS_wd_fixed:
|
||||
set_wreg(dst, FPToInt32(dreg(src) * pow(2.0, fbits), FPZero));
|
||||
set_wreg(dst, FPToInt32(dreg(src) * std::pow(2.0, fbits), FPZero));
|
||||
break;
|
||||
case FCVTZU_xd_fixed:
|
||||
set_xreg(dst, FPToUInt64(dreg(src) * pow(2.0, fbits), FPZero));
|
||||
set_xreg(dst, FPToUInt64(dreg(src) * std::pow(2.0, fbits), FPZero));
|
||||
break;
|
||||
case FCVTZU_wd_fixed:
|
||||
set_wreg(dst, FPToUInt32(dreg(src) * pow(2.0, fbits), FPZero));
|
||||
set_wreg(dst, FPToUInt32(dreg(src) * std::pow(2.0, fbits), FPZero));
|
||||
break;
|
||||
case FCVTZS_xs_fixed:
|
||||
set_xreg(dst, FPToInt64(sreg(src) * powf(2.0f, fbits), FPZero));
|
||||
set_xreg(dst, FPToInt64(sreg(src) * std::pow(2.0f, fbits), FPZero));
|
||||
break;
|
||||
case FCVTZS_ws_fixed:
|
||||
set_wreg(dst, FPToInt32(sreg(src) * powf(2.0f, fbits), FPZero));
|
||||
set_wreg(dst, FPToInt32(sreg(src) * std::pow(2.0f, fbits), FPZero));
|
||||
break;
|
||||
case FCVTZU_xs_fixed:
|
||||
set_xreg(dst, FPToUInt64(sreg(src) * powf(2.0f, fbits), FPZero));
|
||||
set_xreg(dst, FPToUInt64(sreg(src) * std::pow(2.0f, fbits), FPZero));
|
||||
break;
|
||||
case FCVTZU_ws_fixed:
|
||||
set_wreg(dst, FPToUInt32(sreg(src) * powf(2.0f, fbits), FPZero));
|
||||
set_wreg(dst, FPToUInt32(sreg(src) * std::pow(2.0f, fbits), FPZero));
|
||||
break;
|
||||
default: VIXL_UNREACHABLE();
|
||||
}
|
||||
@ -2143,11 +2147,16 @@ void Simulator::VisitFPFixedPointConvert(const Instruction* instr) {
|
||||
void Simulator::VisitFPCompare(const Instruction* instr) {
|
||||
AssertSupportedFPCR();
|
||||
|
||||
FPTrapFlags trap = DisableTrap;
|
||||
switch (instr->Mask(FPCompareMask)) {
|
||||
case FCMP_s: FPCompare(sreg(instr->Rn()), sreg(instr->Rm())); break;
|
||||
case FCMP_d: FPCompare(dreg(instr->Rn()), dreg(instr->Rm())); break;
|
||||
case FCMP_s_zero: FPCompare(sreg(instr->Rn()), 0.0f); break;
|
||||
case FCMP_d_zero: FPCompare(dreg(instr->Rn()), 0.0); break;
|
||||
case FCMPE_s: trap = EnableTrap; VIXL_FALLTHROUGH();
|
||||
case FCMP_s: FPCompare(sreg(instr->Rn()), sreg(instr->Rm()), trap); break;
|
||||
case FCMPE_d: trap = EnableTrap; VIXL_FALLTHROUGH();
|
||||
case FCMP_d: FPCompare(dreg(instr->Rn()), dreg(instr->Rm()), trap); break;
|
||||
case FCMPE_s_zero: trap = EnableTrap; VIXL_FALLTHROUGH();
|
||||
case FCMP_s_zero: FPCompare(sreg(instr->Rn()), 0.0f, trap); break;
|
||||
case FCMPE_d_zero: trap = EnableTrap; VIXL_FALLTHROUGH();
|
||||
case FCMP_d_zero: FPCompare(dreg(instr->Rn()), 0.0, trap); break;
|
||||
default: VIXL_UNIMPLEMENTED();
|
||||
}
|
||||
}
|
||||
@ -2156,18 +2165,23 @@ void Simulator::VisitFPCompare(const Instruction* instr) {
|
||||
void Simulator::VisitFPConditionalCompare(const Instruction* instr) {
|
||||
AssertSupportedFPCR();
|
||||
|
||||
FPTrapFlags trap = DisableTrap;
|
||||
switch (instr->Mask(FPConditionalCompareMask)) {
|
||||
case FCCMPE_s: trap = EnableTrap;
|
||||
VIXL_FALLTHROUGH();
|
||||
case FCCMP_s:
|
||||
if (ConditionPassed(instr->Condition())) {
|
||||
FPCompare(sreg(instr->Rn()), sreg(instr->Rm()));
|
||||
FPCompare(sreg(instr->Rn()), sreg(instr->Rm()), trap);
|
||||
} else {
|
||||
nzcv().SetFlags(instr->Nzcv());
|
||||
LogSystemRegister(NZCV);
|
||||
}
|
||||
break;
|
||||
case FCCMPE_d: trap = EnableTrap;
|
||||
VIXL_FALLTHROUGH();
|
||||
case FCCMP_d:
|
||||
if (ConditionPassed(instr->Condition())) {
|
||||
FPCompare(dreg(instr->Rn()), dreg(instr->Rm()));
|
||||
FPCompare(dreg(instr->Rn()), dreg(instr->Rm()), trap);
|
||||
} else {
|
||||
nzcv().SetFlags(instr->Nzcv());
|
||||
LogSystemRegister(NZCV);
|
||||
@ -2245,547 +2259,6 @@ void Simulator::VisitFPDataProcessing1Source(const Instruction* instr) {
|
||||
}
|
||||
|
||||
|
||||
// Assemble the specified IEEE-754 components into the target type and apply
|
||||
// appropriate rounding.
|
||||
// sign: 0 = positive, 1 = negative
|
||||
// exponent: Unbiased IEEE-754 exponent.
|
||||
// mantissa: The mantissa of the input. The top bit (which is not encoded for
|
||||
// normal IEEE-754 values) must not be omitted. This bit has the
|
||||
// value 'pow(2, exponent)'.
|
||||
//
|
||||
// The input value is assumed to be a normalized value. That is, the input may
|
||||
// not be infinity or NaN. If the source value is subnormal, it must be
|
||||
// normalized before calling this function such that the highest set bit in the
|
||||
// mantissa has the value 'pow(2, exponent)'.
|
||||
//
|
||||
// Callers should use FPRoundToFloat or FPRoundToDouble directly, rather than
|
||||
// calling a templated FPRound.
|
||||
template <class T, int ebits, int mbits>
|
||||
static T FPRound(int64_t sign, int64_t exponent, uint64_t mantissa,
|
||||
FPRounding round_mode) {
|
||||
VIXL_ASSERT((sign == 0) || (sign == 1));
|
||||
|
||||
// Only FPTieEven and FPRoundOdd rounding modes are implemented.
|
||||
VIXL_ASSERT((round_mode == FPTieEven) || (round_mode == FPRoundOdd));
|
||||
|
||||
// Rounding can promote subnormals to normals, and normals to infinities. For
|
||||
// example, a double with exponent 127 (FLT_MAX_EXP) would appear to be
|
||||
// encodable as a float, but rounding based on the low-order mantissa bits
|
||||
// could make it overflow. With ties-to-even rounding, this value would become
|
||||
// an infinity.
|
||||
|
||||
// ---- Rounding Method ----
|
||||
//
|
||||
// The exponent is irrelevant in the rounding operation, so we treat the
|
||||
// lowest-order bit that will fit into the result ('onebit') as having
|
||||
// the value '1'. Similarly, the highest-order bit that won't fit into
|
||||
// the result ('halfbit') has the value '0.5'. The 'point' sits between
|
||||
// 'onebit' and 'halfbit':
|
||||
//
|
||||
// These bits fit into the result.
|
||||
// |---------------------|
|
||||
// mantissa = 0bxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
|
||||
// ||
|
||||
// / |
|
||||
// / halfbit
|
||||
// onebit
|
||||
//
|
||||
// For subnormal outputs, the range of representable bits is smaller and
|
||||
// the position of onebit and halfbit depends on the exponent of the
|
||||
// input, but the method is otherwise similar.
|
||||
//
|
||||
// onebit(frac)
|
||||
// |
|
||||
// | halfbit(frac) halfbit(adjusted)
|
||||
// | / /
|
||||
// | | |
|
||||
// 0b00.0 (exact) -> 0b00.0 (exact) -> 0b00
|
||||
// 0b00.0... -> 0b00.0... -> 0b00
|
||||
// 0b00.1 (exact) -> 0b00.0111..111 -> 0b00
|
||||
// 0b00.1... -> 0b00.1... -> 0b01
|
||||
// 0b01.0 (exact) -> 0b01.0 (exact) -> 0b01
|
||||
// 0b01.0... -> 0b01.0... -> 0b01
|
||||
// 0b01.1 (exact) -> 0b01.1 (exact) -> 0b10
|
||||
// 0b01.1... -> 0b01.1... -> 0b10
|
||||
// 0b10.0 (exact) -> 0b10.0 (exact) -> 0b10
|
||||
// 0b10.0... -> 0b10.0... -> 0b10
|
||||
// 0b10.1 (exact) -> 0b10.0111..111 -> 0b10
|
||||
// 0b10.1... -> 0b10.1... -> 0b11
|
||||
// 0b11.0 (exact) -> 0b11.0 (exact) -> 0b11
|
||||
// ... / | / |
|
||||
// / | / |
|
||||
// / |
|
||||
// adjusted = frac - (halfbit(mantissa) & ~onebit(frac)); / |
|
||||
//
|
||||
// mantissa = (mantissa >> shift) + halfbit(adjusted);
|
||||
|
||||
static const int mantissa_offset = 0;
|
||||
static const int exponent_offset = mantissa_offset + mbits;
|
||||
static const int sign_offset = exponent_offset + ebits;
|
||||
VIXL_ASSERT(sign_offset == (sizeof(T) * 8 - 1));
|
||||
|
||||
// Bail out early for zero inputs.
|
||||
if (mantissa == 0) {
|
||||
return sign << sign_offset;
|
||||
}
|
||||
|
||||
// If all bits in the exponent are set, the value is infinite or NaN.
|
||||
// This is true for all binary IEEE-754 formats.
|
||||
static const int infinite_exponent = (1 << ebits) - 1;
|
||||
static const int max_normal_exponent = infinite_exponent - 1;
|
||||
|
||||
// Apply the exponent bias to encode it for the result. Doing this early makes
|
||||
// it easy to detect values that will be infinite or subnormal.
|
||||
exponent += max_normal_exponent >> 1;
|
||||
|
||||
if (exponent > max_normal_exponent) {
|
||||
// Overflow: the input is too large for the result type to represent.
|
||||
if (round_mode == FPTieEven) {
|
||||
// FPTieEven rounding mode handles overflows using infinities.
|
||||
exponent = infinite_exponent;
|
||||
mantissa = 0;
|
||||
} else {
|
||||
VIXL_ASSERT(round_mode == FPRoundOdd);
|
||||
// FPRoundOdd rounding mode handles overflows using the largest magnitude
|
||||
// normal number.
|
||||
exponent = max_normal_exponent;
|
||||
mantissa = (UINT64_C(1) << exponent_offset) - 1;
|
||||
}
|
||||
return (sign << sign_offset) |
|
||||
(exponent << exponent_offset) |
|
||||
(mantissa << mantissa_offset);
|
||||
}
|
||||
|
||||
// Calculate the shift required to move the top mantissa bit to the proper
|
||||
// place in the destination type.
|
||||
const int highest_significant_bit = 63 - CountLeadingZeros(mantissa, 64);
|
||||
int shift = highest_significant_bit - mbits;
|
||||
|
||||
if (exponent <= 0) {
|
||||
// The output will be subnormal (before rounding).
|
||||
// For subnormal outputs, the shift must be adjusted by the exponent. The +1
|
||||
// is necessary because the exponent of a subnormal value (encoded as 0) is
|
||||
// the same as the exponent of the smallest normal value (encoded as 1).
|
||||
shift += -exponent + 1;
|
||||
|
||||
// Handle inputs that would produce a zero output.
|
||||
//
|
||||
// Shifts higher than highest_significant_bit+1 will always produce a zero
|
||||
// result. A shift of exactly highest_significant_bit+1 might produce a
|
||||
// non-zero result after rounding.
|
||||
if (shift > (highest_significant_bit + 1)) {
|
||||
if (round_mode == FPTieEven) {
|
||||
// The result will always be +/-0.0.
|
||||
return sign << sign_offset;
|
||||
} else {
|
||||
VIXL_ASSERT(round_mode == FPRoundOdd);
|
||||
VIXL_ASSERT(mantissa != 0);
|
||||
// For FPRoundOdd, if the mantissa is too small to represent and
|
||||
// non-zero return the next "odd" value.
|
||||
return (sign << sign_offset) | 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Properly encode the exponent for a subnormal output.
|
||||
exponent = 0;
|
||||
} else {
|
||||
// Clear the topmost mantissa bit, since this is not encoded in IEEE-754
|
||||
// normal values.
|
||||
mantissa &= ~(UINT64_C(1) << highest_significant_bit);
|
||||
}
|
||||
|
||||
if (shift > 0) {
|
||||
if (round_mode == FPTieEven) {
|
||||
// We have to shift the mantissa to the right. Some precision is lost, so
|
||||
// we need to apply rounding.
|
||||
uint64_t onebit_mantissa = (mantissa >> (shift)) & 1;
|
||||
uint64_t halfbit_mantissa = (mantissa >> (shift-1)) & 1;
|
||||
uint64_t adjustment = (halfbit_mantissa & ~onebit_mantissa);
|
||||
uint64_t adjusted = mantissa - adjustment;
|
||||
T halfbit_adjusted = (adjusted >> (shift-1)) & 1;
|
||||
|
||||
T result = (sign << sign_offset) |
|
||||
(exponent << exponent_offset) |
|
||||
((mantissa >> shift) << mantissa_offset);
|
||||
|
||||
// A very large mantissa can overflow during rounding. If this happens,
|
||||
// the exponent should be incremented and the mantissa set to 1.0
|
||||
// (encoded as 0). Applying halfbit_adjusted after assembling the float
|
||||
// has the nice side-effect that this case is handled for free.
|
||||
//
|
||||
// This also handles cases where a very large finite value overflows to
|
||||
// infinity, or where a very large subnormal value overflows to become
|
||||
// normal.
|
||||
return result + halfbit_adjusted;
|
||||
} else {
|
||||
VIXL_ASSERT(round_mode == FPRoundOdd);
|
||||
// If any bits at position halfbit or below are set, onebit (ie. the
|
||||
// bottom bit of the resulting mantissa) must be set.
|
||||
uint64_t fractional_bits = mantissa & ((UINT64_C(1) << shift) - 1);
|
||||
if (fractional_bits != 0) {
|
||||
mantissa |= UINT64_C(1) << shift;
|
||||
}
|
||||
|
||||
return (sign << sign_offset) |
|
||||
(exponent << exponent_offset) |
|
||||
((mantissa >> shift) << mantissa_offset);
|
||||
}
|
||||
} else {
|
||||
// We have to shift the mantissa to the left (or not at all). The input
|
||||
// mantissa is exactly representable in the output mantissa, so apply no
|
||||
// rounding correction.
|
||||
return (sign << sign_offset) |
|
||||
(exponent << exponent_offset) |
|
||||
((mantissa << -shift) << mantissa_offset);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// See FPRound for a description of this function.
|
||||
static inline double FPRoundToDouble(int64_t sign, int64_t exponent,
|
||||
uint64_t mantissa, FPRounding round_mode) {
|
||||
int64_t bits =
|
||||
FPRound<int64_t, kDoubleExponentBits, kDoubleMantissaBits>(sign,
|
||||
exponent,
|
||||
mantissa,
|
||||
round_mode);
|
||||
return rawbits_to_double(bits);
|
||||
}
|
||||
|
||||
|
||||
// See FPRound for a description of this function.
|
||||
static inline float FPRoundToFloat(int64_t sign, int64_t exponent,
|
||||
uint64_t mantissa, FPRounding round_mode) {
|
||||
int32_t bits =
|
||||
FPRound<int32_t, kFloatExponentBits, kFloatMantissaBits>(sign,
|
||||
exponent,
|
||||
mantissa,
|
||||
round_mode);
|
||||
return rawbits_to_float(bits);
|
||||
}
|
||||
|
||||
|
||||
// See FPRound for a description of this function.
|
||||
static inline float16 FPRoundToFloat16(int64_t sign,
|
||||
int64_t exponent,
|
||||
uint64_t mantissa,
|
||||
FPRounding round_mode) {
|
||||
return FPRound<float16, kFloat16ExponentBits, kFloat16MantissaBits>(
|
||||
sign, exponent, mantissa, round_mode);
|
||||
}
|
||||
|
||||
|
||||
double Simulator::FixedToDouble(int64_t src, int fbits, FPRounding round) {
|
||||
if (src >= 0) {
|
||||
return UFixedToDouble(src, fbits, round);
|
||||
} else {
|
||||
// This works for all negative values, including INT64_MIN.
|
||||
return -UFixedToDouble(-src, fbits, round);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
double Simulator::UFixedToDouble(uint64_t src, int fbits, FPRounding round) {
|
||||
// An input of 0 is a special case because the result is effectively
|
||||
// subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
|
||||
if (src == 0) {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
// Calculate the exponent. The highest significant bit will have the value
|
||||
// 2^exponent.
|
||||
const int highest_significant_bit = 63 - CountLeadingZeros(src, 64);
|
||||
const int64_t exponent = highest_significant_bit - fbits;
|
||||
|
||||
return FPRoundToDouble(0, exponent, src, round);
|
||||
}
|
||||
|
||||
|
||||
float Simulator::FixedToFloat(int64_t src, int fbits, FPRounding round) {
|
||||
if (src >= 0) {
|
||||
return UFixedToFloat(src, fbits, round);
|
||||
} else {
|
||||
// This works for all negative values, including INT64_MIN.
|
||||
return -UFixedToFloat(-src, fbits, round);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
float Simulator::UFixedToFloat(uint64_t src, int fbits, FPRounding round) {
|
||||
// An input of 0 is a special case because the result is effectively
|
||||
// subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
|
||||
if (src == 0) {
|
||||
return 0.0f;
|
||||
}
|
||||
|
||||
// Calculate the exponent. The highest significant bit will have the value
|
||||
// 2^exponent.
|
||||
const int highest_significant_bit = 63 - CountLeadingZeros(src, 64);
|
||||
const int32_t exponent = highest_significant_bit - fbits;
|
||||
|
||||
return FPRoundToFloat(0, exponent, src, round);
|
||||
}
|
||||
|
||||
|
||||
double Simulator::FPToDouble(float value) {
|
||||
switch (fpclassify(value)) {
|
||||
case FP_NAN: {
|
||||
if (IsSignallingNaN(value)) {
|
||||
FPProcessException();
|
||||
}
|
||||
if (DN()) return kFP64DefaultNaN;
|
||||
|
||||
// Convert NaNs as the processor would:
|
||||
// - The sign is propagated.
|
||||
// - The payload (mantissa) is transferred entirely, except that the top
|
||||
// bit is forced to '1', making the result a quiet NaN. The unused
|
||||
// (low-order) payload bits are set to 0.
|
||||
uint32_t raw = float_to_rawbits(value);
|
||||
|
||||
uint64_t sign = raw >> 31;
|
||||
uint64_t exponent = (1 << 11) - 1;
|
||||
uint64_t payload = unsigned_bitextract_64(21, 0, raw);
|
||||
payload <<= (52 - 23); // The unused low-order bits should be 0.
|
||||
payload |= (UINT64_C(1) << 51); // Force a quiet NaN.
|
||||
|
||||
return rawbits_to_double((sign << 63) | (exponent << 52) | payload);
|
||||
}
|
||||
|
||||
case FP_ZERO:
|
||||
case FP_NORMAL:
|
||||
case FP_SUBNORMAL:
|
||||
case FP_INFINITE: {
|
||||
// All other inputs are preserved in a standard cast, because every value
|
||||
// representable using an IEEE-754 float is also representable using an
|
||||
// IEEE-754 double.
|
||||
return static_cast<double>(value);
|
||||
}
|
||||
}
|
||||
|
||||
VIXL_UNREACHABLE();
|
||||
return static_cast<double>(value);
|
||||
}
|
||||
|
||||
|
||||
float Simulator::FPToFloat(float16 value) {
|
||||
uint32_t sign = value >> 15;
|
||||
uint32_t exponent = unsigned_bitextract_32(
|
||||
kFloat16MantissaBits + kFloat16ExponentBits - 1, kFloat16MantissaBits,
|
||||
value);
|
||||
uint32_t mantissa = unsigned_bitextract_32(
|
||||
kFloat16MantissaBits - 1, 0, value);
|
||||
|
||||
switch (float16classify(value)) {
|
||||
case FP_ZERO:
|
||||
return (sign == 0) ? 0.0f : -0.0f;
|
||||
|
||||
case FP_INFINITE:
|
||||
return (sign == 0) ? kFP32PositiveInfinity : kFP32NegativeInfinity;
|
||||
|
||||
case FP_SUBNORMAL: {
|
||||
// Calculate shift required to put mantissa into the most-significant bits
|
||||
// of the destination mantissa.
|
||||
int shift = CountLeadingZeros(mantissa << (32 - 10), 32);
|
||||
|
||||
// Shift mantissa and discard implicit '1'.
|
||||
mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits) + shift + 1;
|
||||
mantissa &= (1 << kFloatMantissaBits) - 1;
|
||||
|
||||
// Adjust the exponent for the shift applied, and rebias.
|
||||
exponent = exponent - shift + (-15 + 127);
|
||||
break;
|
||||
}
|
||||
|
||||
case FP_NAN:
|
||||
if (IsSignallingNaN(value)) {
|
||||
FPProcessException();
|
||||
}
|
||||
if (DN()) return kFP32DefaultNaN;
|
||||
|
||||
// Convert NaNs as the processor would:
|
||||
// - The sign is propagated.
|
||||
// - The payload (mantissa) is transferred entirely, except that the top
|
||||
// bit is forced to '1', making the result a quiet NaN. The unused
|
||||
// (low-order) payload bits are set to 0.
|
||||
exponent = (1 << kFloatExponentBits) - 1;
|
||||
|
||||
// Increase bits in mantissa, making low-order bits 0.
|
||||
mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
|
||||
mantissa |= 1 << 22; // Force a quiet NaN.
|
||||
break;
|
||||
|
||||
case FP_NORMAL:
|
||||
// Increase bits in mantissa, making low-order bits 0.
|
||||
mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
|
||||
|
||||
// Change exponent bias.
|
||||
exponent += (-15 + 127);
|
||||
break;
|
||||
|
||||
default: VIXL_UNREACHABLE();
|
||||
}
|
||||
return rawbits_to_float((sign << 31) |
|
||||
(exponent << kFloatMantissaBits) |
|
||||
mantissa);
|
||||
}
|
||||
|
||||
|
||||
float16 Simulator::FPToFloat16(float value, FPRounding round_mode) {
|
||||
// Only the FPTieEven rounding mode is implemented.
|
||||
VIXL_ASSERT(round_mode == FPTieEven);
|
||||
USE(round_mode);
|
||||
|
||||
uint32_t raw = float_to_rawbits(value);
|
||||
int32_t sign = raw >> 31;
|
||||
int32_t exponent = unsigned_bitextract_32(30, 23, raw) - 127;
|
||||
uint32_t mantissa = unsigned_bitextract_32(22, 0, raw);
|
||||
|
||||
switch (fpclassify(value)) {
|
||||
case FP_NAN: {
|
||||
if (IsSignallingNaN(value)) {
|
||||
FPProcessException();
|
||||
}
|
||||
if (DN()) return kFP16DefaultNaN;
|
||||
|
||||
// Convert NaNs as the processor would:
|
||||
// - The sign is propagated.
|
||||
// - The payload (mantissa) is transferred as much as possible, except
|
||||
// that the top bit is forced to '1', making the result a quiet NaN.
|
||||
float16 result = (sign == 0) ? kFP16PositiveInfinity
|
||||
: kFP16NegativeInfinity;
|
||||
result |= mantissa >> (kFloatMantissaBits - kFloat16MantissaBits);
|
||||
result |= (1 << 9); // Force a quiet NaN;
|
||||
return result;
|
||||
}
|
||||
|
||||
case FP_ZERO:
|
||||
return (sign == 0) ? 0 : 0x8000;
|
||||
|
||||
case FP_INFINITE:
|
||||
return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
|
||||
|
||||
case FP_NORMAL:
|
||||
case FP_SUBNORMAL: {
|
||||
// Convert float-to-half as the processor would, assuming that FPCR.FZ
|
||||
// (flush-to-zero) is not set.
|
||||
|
||||
// Add the implicit '1' bit to the mantissa.
|
||||
mantissa += (1 << 23);
|
||||
return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
|
||||
}
|
||||
}
|
||||
|
||||
VIXL_UNREACHABLE();
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
float16 Simulator::FPToFloat16(double value, FPRounding round_mode) {
|
||||
// Only the FPTieEven rounding mode is implemented.
|
||||
VIXL_ASSERT(round_mode == FPTieEven);
|
||||
USE(round_mode);
|
||||
|
||||
uint64_t raw = double_to_rawbits(value);
|
||||
int32_t sign = raw >> 63;
|
||||
int64_t exponent = unsigned_bitextract_64(62, 52, raw) - 1023;
|
||||
uint64_t mantissa = unsigned_bitextract_64(51, 0, raw);
|
||||
|
||||
switch (fpclassify(value)) {
|
||||
case FP_NAN: {
|
||||
if (IsSignallingNaN(value)) {
|
||||
FPProcessException();
|
||||
}
|
||||
if (DN()) return kFP16DefaultNaN;
|
||||
|
||||
// Convert NaNs as the processor would:
|
||||
// - The sign is propagated.
|
||||
// - The payload (mantissa) is transferred as much as possible, except
|
||||
// that the top bit is forced to '1', making the result a quiet NaN.
|
||||
float16 result = (sign == 0) ? kFP16PositiveInfinity
|
||||
: kFP16NegativeInfinity;
|
||||
result |= mantissa >> (kDoubleMantissaBits - kFloat16MantissaBits);
|
||||
result |= (1 << 9); // Force a quiet NaN;
|
||||
return result;
|
||||
}
|
||||
|
||||
case FP_ZERO:
|
||||
return (sign == 0) ? 0 : 0x8000;
|
||||
|
||||
case FP_INFINITE:
|
||||
return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
|
||||
|
||||
case FP_NORMAL:
|
||||
case FP_SUBNORMAL: {
|
||||
// Convert double-to-half as the processor would, assuming that FPCR.FZ
|
||||
// (flush-to-zero) is not set.
|
||||
|
||||
// Add the implicit '1' bit to the mantissa.
|
||||
mantissa += (UINT64_C(1) << 52);
|
||||
return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
|
||||
}
|
||||
}
|
||||
|
||||
VIXL_UNREACHABLE();
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
float Simulator::FPToFloat(double value, FPRounding round_mode) {
|
||||
// Only the FPTieEven rounding mode is implemented.
|
||||
VIXL_ASSERT((round_mode == FPTieEven) || (round_mode == FPRoundOdd));
|
||||
USE(round_mode);
|
||||
|
||||
switch (fpclassify(value)) {
|
||||
case FP_NAN: {
|
||||
if (IsSignallingNaN(value)) {
|
||||
FPProcessException();
|
||||
}
|
||||
if (DN()) return kFP32DefaultNaN;
|
||||
|
||||
// Convert NaNs as the processor would:
|
||||
// - The sign is propagated.
|
||||
// - The payload (mantissa) is transferred as much as possible, except
|
||||
// that the top bit is forced to '1', making the result a quiet NaN.
|
||||
uint64_t raw = double_to_rawbits(value);
|
||||
|
||||
uint32_t sign = raw >> 63;
|
||||
uint32_t exponent = (1 << 8) - 1;
|
||||
uint32_t payload = unsigned_bitextract_64(50, 52 - 23, raw);
|
||||
payload |= (1 << 22); // Force a quiet NaN.
|
||||
|
||||
return rawbits_to_float((sign << 31) | (exponent << 23) | payload);
|
||||
}
|
||||
|
||||
case FP_ZERO:
|
||||
case FP_INFINITE: {
|
||||
// In a C++ cast, any value representable in the target type will be
|
||||
// unchanged. This is always the case for +/-0.0 and infinities.
|
||||
return static_cast<float>(value);
|
||||
}
|
||||
|
||||
case FP_NORMAL:
|
||||
case FP_SUBNORMAL: {
|
||||
// Convert double-to-float as the processor would, assuming that FPCR.FZ
|
||||
// (flush-to-zero) is not set.
|
||||
uint64_t raw = double_to_rawbits(value);
|
||||
// Extract the IEEE-754 double components.
|
||||
uint32_t sign = raw >> 63;
|
||||
// Extract the exponent and remove the IEEE-754 encoding bias.
|
||||
int32_t exponent = unsigned_bitextract_64(62, 52, raw) - 1023;
|
||||
// Extract the mantissa and add the implicit '1' bit.
|
||||
uint64_t mantissa = unsigned_bitextract_64(51, 0, raw);
|
||||
if (fpclassify(value) == FP_NORMAL) {
|
||||
mantissa |= (UINT64_C(1) << 52);
|
||||
}
|
||||
return FPRoundToFloat(sign, exponent, mantissa, round_mode);
|
||||
}
|
||||
}
|
||||
|
||||
VIXL_UNREACHABLE();
|
||||
return value;
|
||||
}
|
||||
|
||||
|
||||
void Simulator::VisitFPDataProcessing2Source(const Instruction* instr) {
|
||||
AssertSupportedFPCR();
|
||||
|
||||
@ -2851,63 +2324,6 @@ void Simulator::VisitFPDataProcessing3Source(const Instruction* instr) {
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
T Simulator::FPProcessNaN(T op) {
|
||||
VIXL_ASSERT(isnan(op));
|
||||
if (IsSignallingNaN(op)) {
|
||||
FPProcessException();
|
||||
}
|
||||
return DN() ? FPDefaultNaN<T>() : ToQuietNaN(op);
|
||||
}
|
||||
|
||||
template float Simulator::FPProcessNaN(float op);
|
||||
template double Simulator::FPProcessNaN(double op);
|
||||
|
||||
template <typename T>
|
||||
T Simulator::FPProcessNaNs(T op1, T op2) {
|
||||
if (IsSignallingNaN(op1)) {
|
||||
return FPProcessNaN(op1);
|
||||
} else if (IsSignallingNaN(op2)) {
|
||||
return FPProcessNaN(op2);
|
||||
} else if (isnan(op1)) {
|
||||
VIXL_ASSERT(IsQuietNaN(op1));
|
||||
return FPProcessNaN(op1);
|
||||
} else if (isnan(op2)) {
|
||||
VIXL_ASSERT(IsQuietNaN(op2));
|
||||
return FPProcessNaN(op2);
|
||||
} else {
|
||||
return 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
template float Simulator::FPProcessNaNs(float op1, float op2);
|
||||
template double Simulator::FPProcessNaNs(double op1, double op2);
|
||||
|
||||
template <typename T>
|
||||
T Simulator::FPProcessNaNs3(T op1, T op2, T op3) {
|
||||
if (IsSignallingNaN(op1)) {
|
||||
return FPProcessNaN(op1);
|
||||
} else if (IsSignallingNaN(op2)) {
|
||||
return FPProcessNaN(op2);
|
||||
} else if (IsSignallingNaN(op3)) {
|
||||
return FPProcessNaN(op3);
|
||||
} else if (isnan(op1)) {
|
||||
VIXL_ASSERT(IsQuietNaN(op1));
|
||||
return FPProcessNaN(op1);
|
||||
} else if (isnan(op2)) {
|
||||
VIXL_ASSERT(IsQuietNaN(op2));
|
||||
return FPProcessNaN(op2);
|
||||
} else if (isnan(op3)) {
|
||||
VIXL_ASSERT(IsQuietNaN(op3));
|
||||
return FPProcessNaN(op3);
|
||||
} else {
|
||||
return 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
template float Simulator::FPProcessNaNs3(float op1, float op2, float op3);
|
||||
template double Simulator::FPProcessNaNs3(double op1, double op2, double op3);
|
||||
|
||||
bool Simulator::FPProcessNaNs(const Instruction* instr) {
|
||||
unsigned fd = instr->Rd();
|
||||
unsigned fn = instr->Rn();
|
||||
@ -2916,13 +2332,13 @@ bool Simulator::FPProcessNaNs(const Instruction* instr) {
|
||||
|
||||
if (instr->Mask(FP64) == FP64) {
|
||||
double result = FPProcessNaNs(dreg(fn), dreg(fm));
|
||||
if (isnan(result)) {
|
||||
if (std::isnan(result)) {
|
||||
set_dreg(fd, result);
|
||||
done = true;
|
||||
}
|
||||
} else {
|
||||
float result = FPProcessNaNs(sreg(fn), sreg(fm));
|
||||
if (isnan(result)) {
|
||||
if (std::isnan(result)) {
|
||||
set_sreg(fd, result);
|
||||
done = true;
|
||||
}
|
||||
@ -3618,13 +3034,13 @@ void Simulator::NEONLoadStoreMultiStructHelper(const Instruction* instr,
|
||||
switch (instr->Mask(NEONLoadStoreMultiStructPostIndexMask)) {
|
||||
case NEON_LD1_4v:
|
||||
case NEON_LD1_4v_post: ld1(vf, vreg(reg[3]), addr[3]); count++;
|
||||
// Fall through.
|
||||
VIXL_FALLTHROUGH();
|
||||
case NEON_LD1_3v:
|
||||
case NEON_LD1_3v_post: ld1(vf, vreg(reg[2]), addr[2]); count++;
|
||||
// Fall through.
|
||||
VIXL_FALLTHROUGH();
|
||||
case NEON_LD1_2v:
|
||||
case NEON_LD1_2v_post: ld1(vf, vreg(reg[1]), addr[1]); count++;
|
||||
// Fall through.
|
||||
VIXL_FALLTHROUGH();
|
||||
case NEON_LD1_1v:
|
||||
case NEON_LD1_1v_post:
|
||||
ld1(vf, vreg(reg[0]), addr[0]);
|
||||
@ -3632,13 +3048,13 @@ void Simulator::NEONLoadStoreMultiStructHelper(const Instruction* instr,
|
||||
break;
|
||||
case NEON_ST1_4v:
|
||||
case NEON_ST1_4v_post: st1(vf, vreg(reg[3]), addr[3]); count++;
|
||||
// Fall through.
|
||||
VIXL_FALLTHROUGH();
|
||||
case NEON_ST1_3v:
|
||||
case NEON_ST1_3v_post: st1(vf, vreg(reg[2]), addr[2]); count++;
|
||||
// Fall through.
|
||||
VIXL_FALLTHROUGH();
|
||||
case NEON_ST1_2v:
|
||||
case NEON_ST1_2v_post: st1(vf, vreg(reg[1]), addr[1]); count++;
|
||||
// Fall through.
|
||||
VIXL_FALLTHROUGH();
|
||||
case NEON_ST1_1v:
|
||||
case NEON_ST1_1v_post:
|
||||
st1(vf, vreg(reg[0]), addr[0]);
|
||||
@ -3745,6 +3161,7 @@ void Simulator::NEONLoadStoreSingleStructHelper(const Instruction* instr,
|
||||
case NEON_LD3_b_post:
|
||||
case NEON_LD4_b:
|
||||
case NEON_LD4_b_post: do_load = true;
|
||||
VIXL_FALLTHROUGH();
|
||||
case NEON_ST1_b:
|
||||
case NEON_ST1_b_post:
|
||||
case NEON_ST2_b:
|
||||
@ -3762,6 +3179,7 @@ void Simulator::NEONLoadStoreSingleStructHelper(const Instruction* instr,
|
||||
case NEON_LD3_h_post:
|
||||
case NEON_LD4_h:
|
||||
case NEON_LD4_h_post: do_load = true;
|
||||
VIXL_FALLTHROUGH();
|
||||
case NEON_ST1_h:
|
||||
case NEON_ST1_h_post:
|
||||
case NEON_ST2_h:
|
||||
@ -3778,6 +3196,7 @@ void Simulator::NEONLoadStoreSingleStructHelper(const Instruction* instr,
|
||||
case NEON_LD3_s_post:
|
||||
case NEON_LD4_s:
|
||||
case NEON_LD4_s_post: do_load = true;
|
||||
VIXL_FALLTHROUGH();
|
||||
case NEON_ST1_s:
|
||||
case NEON_ST1_s_post:
|
||||
case NEON_ST2_s:
|
@ -27,12 +27,12 @@
|
||||
#ifndef VIXL_A64_SIMULATOR_A64_H_
|
||||
#define VIXL_A64_SIMULATOR_A64_H_
|
||||
|
||||
#include "globals.h"
|
||||
#include "utils.h"
|
||||
#include "a64/instructions-a64.h"
|
||||
#include "a64/assembler-a64.h"
|
||||
#include "a64/disasm-a64.h"
|
||||
#include "a64/instrument-a64.h"
|
||||
#include "vixl/globals.h"
|
||||
#include "vixl/utils.h"
|
||||
#include "vixl/a64/instructions-a64.h"
|
||||
#include "vixl/a64/assembler-a64.h"
|
||||
#include "vixl/a64/disasm-a64.h"
|
||||
#include "vixl/a64/instrument-a64.h"
|
||||
|
||||
namespace vixl {
|
||||
|
||||
@ -150,6 +150,201 @@ const unsigned kLogParamsOffset = 1 * kInstructionSize;
|
||||
const unsigned kLogLength = 2 * kInstructionSize;
|
||||
|
||||
|
||||
// Assemble the specified IEEE-754 components into the target type and apply
|
||||
// appropriate rounding.
|
||||
// sign: 0 = positive, 1 = negative
|
||||
// exponent: Unbiased IEEE-754 exponent.
|
||||
// mantissa: The mantissa of the input. The top bit (which is not encoded for
|
||||
// normal IEEE-754 values) must not be omitted. This bit has the
|
||||
// value 'pow(2, exponent)'.
|
||||
//
|
||||
// The input value is assumed to be a normalized value. That is, the input may
|
||||
// not be infinity or NaN. If the source value is subnormal, it must be
|
||||
// normalized before calling this function such that the highest set bit in the
|
||||
// mantissa has the value 'pow(2, exponent)'.
|
||||
//
|
||||
// Callers should use FPRoundToFloat or FPRoundToDouble directly, rather than
|
||||
// calling a templated FPRound.
|
||||
template <class T, int ebits, int mbits>
|
||||
T FPRound(int64_t sign, int64_t exponent, uint64_t mantissa,
|
||||
FPRounding round_mode) {
|
||||
VIXL_ASSERT((sign == 0) || (sign == 1));
|
||||
|
||||
// Only FPTieEven and FPRoundOdd rounding modes are implemented.
|
||||
VIXL_ASSERT((round_mode == FPTieEven) || (round_mode == FPRoundOdd));
|
||||
|
||||
// Rounding can promote subnormals to normals, and normals to infinities. For
|
||||
// example, a double with exponent 127 (FLT_MAX_EXP) would appear to be
|
||||
// encodable as a float, but rounding based on the low-order mantissa bits
|
||||
// could make it overflow. With ties-to-even rounding, this value would become
|
||||
// an infinity.
|
||||
|
||||
// ---- Rounding Method ----
|
||||
//
|
||||
// The exponent is irrelevant in the rounding operation, so we treat the
|
||||
// lowest-order bit that will fit into the result ('onebit') as having
|
||||
// the value '1'. Similarly, the highest-order bit that won't fit into
|
||||
// the result ('halfbit') has the value '0.5'. The 'point' sits between
|
||||
// 'onebit' and 'halfbit':
|
||||
//
|
||||
// These bits fit into the result.
|
||||
// |---------------------|
|
||||
// mantissa = 0bxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
|
||||
// ||
|
||||
// / |
|
||||
// / halfbit
|
||||
// onebit
|
||||
//
|
||||
// For subnormal outputs, the range of representable bits is smaller and
|
||||
// the position of onebit and halfbit depends on the exponent of the
|
||||
// input, but the method is otherwise similar.
|
||||
//
|
||||
// onebit(frac)
|
||||
// |
|
||||
// | halfbit(frac) halfbit(adjusted)
|
||||
// | / /
|
||||
// | | |
|
||||
// 0b00.0 (exact) -> 0b00.0 (exact) -> 0b00
|
||||
// 0b00.0... -> 0b00.0... -> 0b00
|
||||
// 0b00.1 (exact) -> 0b00.0111..111 -> 0b00
|
||||
// 0b00.1... -> 0b00.1... -> 0b01
|
||||
// 0b01.0 (exact) -> 0b01.0 (exact) -> 0b01
|
||||
// 0b01.0... -> 0b01.0... -> 0b01
|
||||
// 0b01.1 (exact) -> 0b01.1 (exact) -> 0b10
|
||||
// 0b01.1... -> 0b01.1... -> 0b10
|
||||
// 0b10.0 (exact) -> 0b10.0 (exact) -> 0b10
|
||||
// 0b10.0... -> 0b10.0... -> 0b10
|
||||
// 0b10.1 (exact) -> 0b10.0111..111 -> 0b10
|
||||
// 0b10.1... -> 0b10.1... -> 0b11
|
||||
// 0b11.0 (exact) -> 0b11.0 (exact) -> 0b11
|
||||
// ... / | / |
|
||||
// / | / |
|
||||
// / |
|
||||
// adjusted = frac - (halfbit(mantissa) & ~onebit(frac)); / |
|
||||
//
|
||||
// mantissa = (mantissa >> shift) + halfbit(adjusted);
|
||||
|
||||
static const int mantissa_offset = 0;
|
||||
static const int exponent_offset = mantissa_offset + mbits;
|
||||
static const int sign_offset = exponent_offset + ebits;
|
||||
VIXL_ASSERT(sign_offset == (sizeof(T) * 8 - 1));
|
||||
|
||||
// Bail out early for zero inputs.
|
||||
if (mantissa == 0) {
|
||||
return sign << sign_offset;
|
||||
}
|
||||
|
||||
// If all bits in the exponent are set, the value is infinite or NaN.
|
||||
// This is true for all binary IEEE-754 formats.
|
||||
static const int infinite_exponent = (1 << ebits) - 1;
|
||||
static const int max_normal_exponent = infinite_exponent - 1;
|
||||
|
||||
// Apply the exponent bias to encode it for the result. Doing this early makes
|
||||
// it easy to detect values that will be infinite or subnormal.
|
||||
exponent += max_normal_exponent >> 1;
|
||||
|
||||
if (exponent > max_normal_exponent) {
|
||||
// Overflow: the input is too large for the result type to represent.
|
||||
if (round_mode == FPTieEven) {
|
||||
// FPTieEven rounding mode handles overflows using infinities.
|
||||
exponent = infinite_exponent;
|
||||
mantissa = 0;
|
||||
} else {
|
||||
VIXL_ASSERT(round_mode == FPRoundOdd);
|
||||
// FPRoundOdd rounding mode handles overflows using the largest magnitude
|
||||
// normal number.
|
||||
exponent = max_normal_exponent;
|
||||
mantissa = (UINT64_C(1) << exponent_offset) - 1;
|
||||
}
|
||||
return (sign << sign_offset) |
|
||||
(exponent << exponent_offset) |
|
||||
(mantissa << mantissa_offset);
|
||||
}
|
||||
|
||||
// Calculate the shift required to move the top mantissa bit to the proper
|
||||
// place in the destination type.
|
||||
const int highest_significant_bit = 63 - CountLeadingZeros(mantissa);
|
||||
int shift = highest_significant_bit - mbits;
|
||||
|
||||
if (exponent <= 0) {
|
||||
// The output will be subnormal (before rounding).
|
||||
// For subnormal outputs, the shift must be adjusted by the exponent. The +1
|
||||
// is necessary because the exponent of a subnormal value (encoded as 0) is
|
||||
// the same as the exponent of the smallest normal value (encoded as 1).
|
||||
shift += -exponent + 1;
|
||||
|
||||
// Handle inputs that would produce a zero output.
|
||||
//
|
||||
// Shifts higher than highest_significant_bit+1 will always produce a zero
|
||||
// result. A shift of exactly highest_significant_bit+1 might produce a
|
||||
// non-zero result after rounding.
|
||||
if (shift > (highest_significant_bit + 1)) {
|
||||
if (round_mode == FPTieEven) {
|
||||
// The result will always be +/-0.0.
|
||||
return sign << sign_offset;
|
||||
} else {
|
||||
VIXL_ASSERT(round_mode == FPRoundOdd);
|
||||
VIXL_ASSERT(mantissa != 0);
|
||||
// For FPRoundOdd, if the mantissa is too small to represent and
|
||||
// non-zero return the next "odd" value.
|
||||
return (sign << sign_offset) | 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Properly encode the exponent for a subnormal output.
|
||||
exponent = 0;
|
||||
} else {
|
||||
// Clear the topmost mantissa bit, since this is not encoded in IEEE-754
|
||||
// normal values.
|
||||
mantissa &= ~(UINT64_C(1) << highest_significant_bit);
|
||||
}
|
||||
|
||||
if (shift > 0) {
|
||||
if (round_mode == FPTieEven) {
|
||||
// We have to shift the mantissa to the right. Some precision is lost, so
|
||||
// we need to apply rounding.
|
||||
uint64_t onebit_mantissa = (mantissa >> (shift)) & 1;
|
||||
uint64_t halfbit_mantissa = (mantissa >> (shift-1)) & 1;
|
||||
uint64_t adjustment = (halfbit_mantissa & ~onebit_mantissa);
|
||||
uint64_t adjusted = mantissa - adjustment;
|
||||
T halfbit_adjusted = (adjusted >> (shift-1)) & 1;
|
||||
|
||||
T result = (sign << sign_offset) |
|
||||
(exponent << exponent_offset) |
|
||||
((mantissa >> shift) << mantissa_offset);
|
||||
|
||||
// A very large mantissa can overflow during rounding. If this happens,
|
||||
// the exponent should be incremented and the mantissa set to 1.0
|
||||
// (encoded as 0). Applying halfbit_adjusted after assembling the float
|
||||
// has the nice side-effect that this case is handled for free.
|
||||
//
|
||||
// This also handles cases where a very large finite value overflows to
|
||||
// infinity, or where a very large subnormal value overflows to become
|
||||
// normal.
|
||||
return result + halfbit_adjusted;
|
||||
} else {
|
||||
VIXL_ASSERT(round_mode == FPRoundOdd);
|
||||
// If any bits at position halfbit or below are set, onebit (ie. the
|
||||
// bottom bit of the resulting mantissa) must be set.
|
||||
uint64_t fractional_bits = mantissa & ((UINT64_C(1) << shift) - 1);
|
||||
if (fractional_bits != 0) {
|
||||
mantissa |= UINT64_C(1) << shift;
|
||||
}
|
||||
|
||||
return (sign << sign_offset) |
|
||||
(exponent << exponent_offset) |
|
||||
((mantissa >> shift) << mantissa_offset);
|
||||
}
|
||||
} else {
|
||||
// We have to shift the mantissa to the left (or not at all). The input
|
||||
// mantissa is exactly representable in the output mantissa, so apply no
|
||||
// rounding correction.
|
||||
return (sign << sign_offset) |
|
||||
(exponent << exponent_offset) |
|
||||
((mantissa << -shift) << mantissa_offset);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Representation of memory, with typed getters and setters for access.
|
||||
class Memory {
|
||||
@ -988,7 +1183,7 @@ class Simulator : public DecoderVisitor {
|
||||
|
||||
PrintRegisterFormat GetPrintRegisterFormatForSizeFP(unsigned size) {
|
||||
switch (size) {
|
||||
default: VIXL_UNREACHABLE();
|
||||
default: VIXL_UNREACHABLE(); return kPrintDReg;
|
||||
case kDRegSizeInBytes: return kPrintDReg;
|
||||
case kSRegSizeInBytes: return kPrintSReg;
|
||||
}
|
||||
@ -1170,7 +1365,8 @@ class Simulator : public DecoderVisitor {
|
||||
return !Z() && (N() == V());
|
||||
case le:
|
||||
return !(!Z() && (N() == V()));
|
||||
case nv: // Fall through.
|
||||
case nv:
|
||||
VIXL_FALLTHROUGH();
|
||||
case al:
|
||||
return true;
|
||||
default:
|
||||
@ -2317,8 +2513,6 @@ class Simulator : public DecoderVisitor {
|
||||
|
||||
void SysOp_W(int op, int64_t val);
|
||||
|
||||
template <typename T>
|
||||
T FPDefaultNaN() const;
|
||||
template <typename T>
|
||||
T FPRecipSqrtEstimate(T op);
|
||||
template <typename T>
|
||||
@ -2326,7 +2520,7 @@ class Simulator : public DecoderVisitor {
|
||||
template <typename T, typename R>
|
||||
R FPToFixed(T op, int fbits, bool is_signed, FPRounding rounding);
|
||||
|
||||
void FPCompare(double val0, double val1);
|
||||
void FPCompare(double val0, double val1, FPTrapFlags trap);
|
||||
double FPRoundInt(double value, FPRounding round_mode);
|
||||
double FPToDouble(float value);
|
||||
float FPToFloat(double value, FPRounding round_mode);
|
||||
@ -2389,18 +2583,8 @@ class Simulator : public DecoderVisitor {
|
||||
// for cumulative exception bits or floating-point exceptions.
|
||||
void FPProcessException() { }
|
||||
|
||||
// Standard NaN processing.
|
||||
template <typename T>
|
||||
T FPProcessNaN(T op);
|
||||
|
||||
bool FPProcessNaNs(const Instruction* instr);
|
||||
|
||||
template <typename T>
|
||||
T FPProcessNaNs(T op1, T op2);
|
||||
|
||||
template <typename T>
|
||||
T FPProcessNaNs3(T op1, T op2, T op3);
|
||||
|
||||
// Pseudo Printf instruction
|
||||
void DoPrintf(const Instruction* instr);
|
||||
|
||||
@ -2478,6 +2662,58 @@ class Simulator : public DecoderVisitor {
|
||||
static const Instruction* kEndOfSimAddress;
|
||||
|
||||
private:
|
||||
template <typename T>
|
||||
static T FPDefaultNaN();
|
||||
|
||||
// Standard NaN processing.
|
||||
template <typename T>
|
||||
T FPProcessNaN(T op) {
|
||||
VIXL_ASSERT(std::isnan(op));
|
||||
if (IsSignallingNaN(op)) {
|
||||
FPProcessException();
|
||||
}
|
||||
return DN() ? FPDefaultNaN<T>() : ToQuietNaN(op);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T FPProcessNaNs(T op1, T op2) {
|
||||
if (IsSignallingNaN(op1)) {
|
||||
return FPProcessNaN(op1);
|
||||
} else if (IsSignallingNaN(op2)) {
|
||||
return FPProcessNaN(op2);
|
||||
} else if (std::isnan(op1)) {
|
||||
VIXL_ASSERT(IsQuietNaN(op1));
|
||||
return FPProcessNaN(op1);
|
||||
} else if (std::isnan(op2)) {
|
||||
VIXL_ASSERT(IsQuietNaN(op2));
|
||||
return FPProcessNaN(op2);
|
||||
} else {
|
||||
return 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T FPProcessNaNs3(T op1, T op2, T op3) {
|
||||
if (IsSignallingNaN(op1)) {
|
||||
return FPProcessNaN(op1);
|
||||
} else if (IsSignallingNaN(op2)) {
|
||||
return FPProcessNaN(op2);
|
||||
} else if (IsSignallingNaN(op3)) {
|
||||
return FPProcessNaN(op3);
|
||||
} else if (std::isnan(op1)) {
|
||||
VIXL_ASSERT(IsQuietNaN(op1));
|
||||
return FPProcessNaN(op1);
|
||||
} else if (std::isnan(op2)) {
|
||||
VIXL_ASSERT(IsQuietNaN(op2));
|
||||
return FPProcessNaN(op2);
|
||||
} else if (std::isnan(op3)) {
|
||||
VIXL_ASSERT(IsQuietNaN(op3));
|
||||
return FPProcessNaN(op3);
|
||||
} else {
|
||||
return 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
bool coloured_trace_;
|
||||
|
||||
// A set of TraceParameters flags.
|
@ -24,8 +24,8 @@
|
||||
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#include "code-buffer.h"
|
||||
#include "utils.h"
|
||||
#include "vixl/code-buffer.h"
|
||||
#include "vixl/utils.h"
|
||||
|
||||
namespace vixl {
|
||||
|
@ -28,7 +28,7 @@
|
||||
#define VIXL_CODE_BUFFER_H
|
||||
|
||||
#include <string.h>
|
||||
#include "globals.h"
|
||||
#include "vixl/globals.h"
|
||||
|
||||
namespace vixl {
|
||||
|
144
src/vixl/compiler-intrinsics.cc
Normal file
144
src/vixl/compiler-intrinsics.cc
Normal file
@ -0,0 +1,144 @@
|
||||
// Copyright 2015, ARM Limited
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
// * Neither the name of ARM Limited nor the names of its contributors may be
|
||||
// used to endorse or promote products derived from this software without
|
||||
// specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
|
||||
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
|
||||
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#include "compiler-intrinsics.h"
|
||||
|
||||
namespace vixl {
|
||||
|
||||
|
||||
int CountLeadingSignBitsFallBack(int64_t value, int width) {
|
||||
VIXL_ASSERT(IsPowerOf2(width) && (width <= 64));
|
||||
if (value >= 0) {
|
||||
return CountLeadingZeros(value, width) - 1;
|
||||
} else {
|
||||
return CountLeadingZeros(~value, width) - 1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int CountLeadingZerosFallBack(uint64_t value, int width) {
|
||||
VIXL_ASSERT(IsPowerOf2(width) && (width <= 64));
|
||||
if (value == 0) {
|
||||
return width;
|
||||
}
|
||||
int count = 0;
|
||||
value = value << (64 - width);
|
||||
if ((value & UINT64_C(0xffffffff00000000)) == 0) {
|
||||
count += 32;
|
||||
value = value << 32;
|
||||
}
|
||||
if ((value & UINT64_C(0xffff000000000000)) == 0) {
|
||||
count += 16;
|
||||
value = value << 16;
|
||||
}
|
||||
if ((value & UINT64_C(0xff00000000000000)) == 0) {
|
||||
count += 8;
|
||||
value = value << 8;
|
||||
}
|
||||
if ((value & UINT64_C(0xf000000000000000)) == 0) {
|
||||
count += 4;
|
||||
value = value << 4;
|
||||
}
|
||||
if ((value & UINT64_C(0xc000000000000000)) == 0) {
|
||||
count += 2;
|
||||
value = value << 2;
|
||||
}
|
||||
if ((value & UINT64_C(0x8000000000000000)) == 0) {
|
||||
count += 1;
|
||||
}
|
||||
count += (value == 0);
|
||||
return count;
|
||||
}
|
||||
|
||||
|
||||
int CountSetBitsFallBack(uint64_t value, int width) {
|
||||
VIXL_ASSERT(IsPowerOf2(width) && (width <= 64));
|
||||
|
||||
// Mask out unused bits to ensure that they are not counted.
|
||||
value &= (UINT64_C(0xffffffffffffffff) >> (64 - width));
|
||||
|
||||
// Add up the set bits.
|
||||
// The algorithm works by adding pairs of bit fields together iteratively,
|
||||
// where the size of each bit field doubles each time.
|
||||
// An example for an 8-bit value:
|
||||
// Bits: h g f e d c b a
|
||||
// \ | \ | \ | \ |
|
||||
// value = h+g f+e d+c b+a
|
||||
// \ | \ |
|
||||
// value = h+g+f+e d+c+b+a
|
||||
// \ |
|
||||
// value = h+g+f+e+d+c+b+a
|
||||
const uint64_t kMasks[] = {
|
||||
UINT64_C(0x5555555555555555),
|
||||
UINT64_C(0x3333333333333333),
|
||||
UINT64_C(0x0f0f0f0f0f0f0f0f),
|
||||
UINT64_C(0x00ff00ff00ff00ff),
|
||||
UINT64_C(0x0000ffff0000ffff),
|
||||
UINT64_C(0x00000000ffffffff),
|
||||
};
|
||||
|
||||
for (unsigned i = 0; i < (sizeof(kMasks) / sizeof(kMasks[0])); i++) {
|
||||
int shift = 1 << i;
|
||||
value = ((value >> shift) & kMasks[i]) + (value & kMasks[i]);
|
||||
}
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
|
||||
int CountTrailingZerosFallBack(uint64_t value, int width) {
|
||||
VIXL_ASSERT(IsPowerOf2(width) && (width <= 64));
|
||||
int count = 0;
|
||||
value = value << (64 - width);
|
||||
if ((value & UINT64_C(0xffffffff)) == 0) {
|
||||
count += 32;
|
||||
value = value >> 32;
|
||||
}
|
||||
if ((value & 0xffff) == 0) {
|
||||
count += 16;
|
||||
value = value >> 16;
|
||||
}
|
||||
if ((value & 0xff) == 0) {
|
||||
count += 8;
|
||||
value = value >> 8;
|
||||
}
|
||||
if ((value & 0xf) == 0) {
|
||||
count += 4;
|
||||
value = value >> 4;
|
||||
}
|
||||
if ((value & 0x3) == 0) {
|
||||
count += 2;
|
||||
value = value >> 2;
|
||||
}
|
||||
if ((value & 0x1) == 0) {
|
||||
count += 1;
|
||||
}
|
||||
count += (value == 0);
|
||||
return count - (64 - width);
|
||||
}
|
||||
|
||||
|
||||
} // namespace vixl
|
155
src/vixl/compiler-intrinsics.h
Normal file
155
src/vixl/compiler-intrinsics.h
Normal file
@ -0,0 +1,155 @@
|
||||
// Copyright 2015, ARM Limited
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
// * Neither the name of ARM Limited nor the names of its contributors may be
|
||||
// used to endorse or promote products derived from this software without
|
||||
// specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
|
||||
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
|
||||
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
|
||||
#ifndef VIXL_COMPILER_INTRINSICS_H
|
||||
#define VIXL_COMPILER_INTRINSICS_H
|
||||
|
||||
#include "globals.h"
|
||||
|
||||
namespace vixl {
|
||||
|
||||
// Helper to check whether the version of GCC used is greater than the specified
|
||||
// requirement.
|
||||
#define MAJOR 1000000
|
||||
#define MINOR 1000
|
||||
#if defined(__GNUC__) && defined(__GNUC_MINOR__) && defined(__GNUC_PATCHLEVEL__)
|
||||
#define GCC_VERSION_OR_NEWER(major, minor, patchlevel) \
|
||||
((__GNUC__ * MAJOR + __GNUC_MINOR__ * MINOR + __GNUC_PATCHLEVEL__) >= \
|
||||
((major) * MAJOR + (minor) * MINOR + (patchlevel)))
|
||||
#elif defined(__GNUC__) && defined(__GNUC_MINOR__)
|
||||
#define GCC_VERSION_OR_NEWER(major, minor, patchlevel) \
|
||||
((__GNUC__ * MAJOR + __GNUC_MINOR__ * MINOR) >= \
|
||||
((major) * MAJOR + (minor) * MINOR + (patchlevel)))
|
||||
#else
|
||||
#define GCC_VERSION_OR_NEWER(major, minor, patchlevel) 0
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__clang__) && !defined(VIXL_NO_COMPILER_BUILTINS)
|
||||
|
||||
#define COMPILER_HAS_BUILTIN_CLRSB (__has_builtin(__builtin_clrsb))
|
||||
#define COMPILER_HAS_BUILTIN_CLZ (__has_builtin(__builtin_clz))
|
||||
#define COMPILER_HAS_BUILTIN_CTZ (__has_builtin(__builtin_ctz))
|
||||
#define COMPILER_HAS_BUILTIN_FFS (__has_builtin(__builtin_ffs))
|
||||
#define COMPILER_HAS_BUILTIN_POPCOUNT (__has_builtin(__builtin_popcount))
|
||||
|
||||
#elif defined(__GNUC__) && !defined(VIXL_NO_COMPILER_BUILTINS)
|
||||
// The documentation for these builtins is available at:
|
||||
// https://gcc.gnu.org/onlinedocs/gcc-$MAJOR.$MINOR.$PATCHLEVEL/gcc//Other-Builtins.html
|
||||
|
||||
# define COMPILER_HAS_BUILTIN_CLRSB (GCC_VERSION_OR_NEWER(4, 7, 0))
|
||||
# define COMPILER_HAS_BUILTIN_CLZ (GCC_VERSION_OR_NEWER(3, 4, 0))
|
||||
# define COMPILER_HAS_BUILTIN_CTZ (GCC_VERSION_OR_NEWER(3, 4, 0))
|
||||
# define COMPILER_HAS_BUILTIN_FFS (GCC_VERSION_OR_NEWER(3, 4, 0))
|
||||
# define COMPILER_HAS_BUILTIN_POPCOUNT (GCC_VERSION_OR_NEWER(3, 4, 0))
|
||||
|
||||
#else
|
||||
// One can define VIXL_NO_COMPILER_BUILTINS to force using the manually
|
||||
// implemented C++ methods.
|
||||
|
||||
#define COMPILER_HAS_BUILTIN_BSWAP false
|
||||
#define COMPILER_HAS_BUILTIN_CLRSB false
|
||||
#define COMPILER_HAS_BUILTIN_CLZ false
|
||||
#define COMPILER_HAS_BUILTIN_CTZ false
|
||||
#define COMPILER_HAS_BUILTIN_FFS false
|
||||
#define COMPILER_HAS_BUILTIN_POPCOUNT false
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
template<typename V>
|
||||
inline bool IsPowerOf2(V value) {
|
||||
return (value != 0) && ((value & (value - 1)) == 0);
|
||||
}
|
||||
|
||||
|
||||
// Declaration of fallback functions.
|
||||
int CountLeadingSignBitsFallBack(int64_t value, int width);
|
||||
int CountLeadingZerosFallBack(uint64_t value, int width);
|
||||
int CountSetBitsFallBack(uint64_t value, int width);
|
||||
int CountTrailingZerosFallBack(uint64_t value, int width);
|
||||
|
||||
|
||||
// Implementation of intrinsics functions.
|
||||
// TODO: The implementations could be improved for sizes different from 32bit
|
||||
// and 64bit: we could mask the values and call the appropriate builtin.
|
||||
|
||||
template<typename V>
|
||||
inline int CountLeadingSignBits(V value, int width = (sizeof(V) * 8)) {
|
||||
#if COMPILER_HAS_BUILTIN_CLRSB
|
||||
if (width == 32) {
|
||||
return __builtin_clrsb(value);
|
||||
} else if (width == 64) {
|
||||
return __builtin_clrsbll(value);
|
||||
}
|
||||
#endif
|
||||
return CountLeadingSignBitsFallBack(value, width);
|
||||
}
|
||||
|
||||
|
||||
template<typename V>
|
||||
inline int CountLeadingZeros(V value, int width = (sizeof(V) * 8)) {
|
||||
#if COMPILER_HAS_BUILTIN_CLZ
|
||||
if (width == 32) {
|
||||
return (value == 0) ? 32 : __builtin_clz(value);
|
||||
} else if (width == 64) {
|
||||
return (value == 0) ? 64 : __builtin_clzll(value);
|
||||
}
|
||||
#endif
|
||||
return CountLeadingZerosFallBack(value, width);
|
||||
}
|
||||
|
||||
|
||||
template<typename V>
|
||||
inline int CountSetBits(V value, int width = (sizeof(V) * 8)) {
|
||||
#if COMPILER_HAS_BUILTIN_POPCOUNT
|
||||
if (width == 32) {
|
||||
return __builtin_popcount(value);
|
||||
} else if (width == 64) {
|
||||
return __builtin_popcountll(value);
|
||||
}
|
||||
#endif
|
||||
return CountSetBitsFallBack(value, width);
|
||||
}
|
||||
|
||||
|
||||
template<typename V>
|
||||
inline int CountTrailingZeros(V value, int width = (sizeof(V) * 8)) {
|
||||
#if COMPILER_HAS_BUILTIN_CTZ
|
||||
if (width == 32) {
|
||||
return (value == 0) ? 32 : __builtin_ctz(value);
|
||||
} else if (width == 64) {
|
||||
return (value == 0) ? 64 : __builtin_ctzll(value);
|
||||
}
|
||||
#endif
|
||||
return CountTrailingZerosFallBack(value, width);
|
||||
}
|
||||
|
||||
} // namespace vixl
|
||||
|
||||
#endif // VIXL_COMPILER_INTRINSICS_H
|
||||
|
@ -49,7 +49,7 @@
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <stddef.h>
|
||||
#include "platform.h"
|
||||
#include "vixl/platform.h"
|
||||
|
||||
|
||||
typedef uint8_t byte;
|
||||
@ -88,4 +88,20 @@ template <typename T> inline void USE(T, T, T, T) {}
|
||||
|
||||
#define VIXL_ALIGNMENT_EXCEPTION() printf("ALIGNMENT EXCEPTION\t"); VIXL_ABORT()
|
||||
|
||||
// The clang::fallthrough attribute is used along with the Wimplicit-fallthrough
|
||||
// argument to annotate intentional fall-through between switch labels.
|
||||
// For more information please refer to:
|
||||
// http://clang.llvm.org/docs/AttributeReference.html#fallthrough-clang-fallthrough
|
||||
#ifndef __has_warning
|
||||
#define __has_warning(x) 0
|
||||
#endif
|
||||
|
||||
// Note: This option is only available for Clang. And will only be enabled for
|
||||
// C++11(201103L).
|
||||
#if __has_warning("-Wimplicit-fallthrough") && __cplusplus >= 201103L
|
||||
#define VIXL_FALLTHROUGH() [[clang::fallthrough]] //NOLINT
|
||||
#else
|
||||
#define VIXL_FALLTHROUGH() do {} while (0)
|
||||
#endif
|
||||
|
||||
#endif // VIXL_GLOBALS_H
|
@ -32,7 +32,7 @@
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
|
||||
#include "globals.h"
|
||||
#include "vixl/globals.h"
|
||||
|
||||
namespace vixl {
|
||||
|
||||
@ -250,7 +250,7 @@ template<class S> class InvalSetIterator {
|
||||
|
||||
// Indicates if the iterator is looking at the vector or at the preallocated
|
||||
// elements.
|
||||
bool using_vector_;
|
||||
const bool using_vector_;
|
||||
// Used when looking at the preallocated elements, or in debug mode when using
|
||||
// the vector to track how many times the iterator has advanced.
|
||||
size_t index_;
|
||||
@ -657,13 +657,14 @@ void InvalSet<TEMPLATE_INVALSET_P_DEF>::ReclaimMemory() {
|
||||
|
||||
template<class S>
|
||||
InvalSetIterator<S>::InvalSetIterator(S* inval_set)
|
||||
: using_vector_(false), index_(0), inval_set_(inval_set) {
|
||||
: using_vector_((inval_set != NULL) && inval_set->IsUsingVector()),
|
||||
index_(0),
|
||||
inval_set_(inval_set) {
|
||||
if (inval_set != NULL) {
|
||||
inval_set->Sort(S::kSoftSort);
|
||||
#ifdef VIXL_DEBUG
|
||||
inval_set->Acquire();
|
||||
#endif
|
||||
using_vector_ = inval_set->IsUsingVector();
|
||||
if (using_vector_) {
|
||||
iterator_ = typename std::vector<ElementType>::iterator(
|
||||
inval_set_->vector_->begin());
|
@ -24,7 +24,7 @@
|
||||
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#include "utils.h"
|
||||
#include "vixl/utils.h"
|
||||
#include <stdio.h>
|
||||
|
||||
namespace vixl {
|
||||
@ -127,91 +127,6 @@ int float16classify(float16 value) {
|
||||
}
|
||||
|
||||
|
||||
int CountLeadingZeros(uint64_t value, int width) {
|
||||
VIXL_ASSERT((width == 8) || (width == 16) || (width == 32) || (width == 64));
|
||||
int count = 0;
|
||||
uint64_t bit_test = UINT64_C(1) << (width - 1);
|
||||
while ((count < width) && ((bit_test & value) == 0)) {
|
||||
count++;
|
||||
bit_test >>= 1;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
|
||||
int CountLeadingSignBits(int64_t value, int width) {
|
||||
VIXL_ASSERT((width == 8) || (width == 16) || (width == 32) || (width == 64));
|
||||
if (value >= 0) {
|
||||
return CountLeadingZeros(value, width) - 1;
|
||||
} else {
|
||||
return CountLeadingZeros(~value, width) - 1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int CountTrailingZeros(uint64_t value, int width) {
|
||||
VIXL_ASSERT((width == 32) || (width == 64));
|
||||
int count = 0;
|
||||
while ((count < width) && (((value >> count) & 1) == 0)) {
|
||||
count++;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
|
||||
int CountSetBits(uint64_t value, int width) {
|
||||
// TODO: Other widths could be added here, as the implementation already
|
||||
// supports them.
|
||||
VIXL_ASSERT((width == 32) || (width == 64));
|
||||
|
||||
// Mask out unused bits to ensure that they are not counted.
|
||||
value &= (UINT64_C(0xffffffffffffffff) >> (64-width));
|
||||
|
||||
// Add up the set bits.
|
||||
// The algorithm works by adding pairs of bit fields together iteratively,
|
||||
// where the size of each bit field doubles each time.
|
||||
// An example for an 8-bit value:
|
||||
// Bits: h g f e d c b a
|
||||
// \ | \ | \ | \ |
|
||||
// value = h+g f+e d+c b+a
|
||||
// \ | \ |
|
||||
// value = h+g+f+e d+c+b+a
|
||||
// \ |
|
||||
// value = h+g+f+e+d+c+b+a
|
||||
const uint64_t kMasks[] = {
|
||||
UINT64_C(0x5555555555555555),
|
||||
UINT64_C(0x3333333333333333),
|
||||
UINT64_C(0x0f0f0f0f0f0f0f0f),
|
||||
UINT64_C(0x00ff00ff00ff00ff),
|
||||
UINT64_C(0x0000ffff0000ffff),
|
||||
UINT64_C(0x00000000ffffffff),
|
||||
};
|
||||
|
||||
for (unsigned i = 0; i < (sizeof(kMasks) / sizeof(kMasks[0])); i++) {
|
||||
int shift = 1 << i;
|
||||
value = ((value >> shift) & kMasks[i]) + (value & kMasks[i]);
|
||||
}
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
|
||||
uint64_t LowestSetBit(uint64_t value) {
|
||||
return value & -value;
|
||||
}
|
||||
|
||||
|
||||
int HighestSetBitPosition(uint64_t number) {
|
||||
VIXL_ASSERT(number != 0);
|
||||
return 63 - CountLeadingZeros(number, 64);
|
||||
}
|
||||
|
||||
|
||||
bool IsPowerOf2(int64_t value) {
|
||||
return (value != 0) && ((value & (value - 1)) == 0);
|
||||
}
|
||||
|
||||
|
||||
unsigned CountClearHalfWords(uint64_t imm, unsigned reg_size) {
|
||||
VIXL_ASSERT((reg_size % 8) == 0);
|
||||
int count = 0;
|
@ -27,9 +27,10 @@
|
||||
#ifndef VIXL_UTILS_H
|
||||
#define VIXL_UTILS_H
|
||||
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
#include "globals.h"
|
||||
#include <cmath>
|
||||
#include "vixl/globals.h"
|
||||
#include "vixl/compiler-intrinsics.h"
|
||||
|
||||
namespace vixl {
|
||||
|
||||
@ -121,7 +122,7 @@ int float16classify(float16 value);
|
||||
inline bool IsSignallingNaN(double num) {
|
||||
const uint64_t kFP64QuietNaNMask = UINT64_C(0x0008000000000000);
|
||||
uint64_t raw = double_to_rawbits(num);
|
||||
if (isnan(num) && ((raw & kFP64QuietNaNMask) == 0)) {
|
||||
if (std::isnan(num) && ((raw & kFP64QuietNaNMask) == 0)) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
@ -131,7 +132,7 @@ inline bool IsSignallingNaN(double num) {
|
||||
inline bool IsSignallingNaN(float num) {
|
||||
const uint32_t kFP32QuietNaNMask = 0x00400000;
|
||||
uint32_t raw = float_to_rawbits(num);
|
||||
if (isnan(num) && ((raw & kFP32QuietNaNMask) == 0)) {
|
||||
if (std::isnan(num) && ((raw & kFP32QuietNaNMask) == 0)) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
@ -147,21 +148,21 @@ inline bool IsSignallingNaN(float16 num) {
|
||||
|
||||
template <typename T>
|
||||
inline bool IsQuietNaN(T num) {
|
||||
return isnan(num) && !IsSignallingNaN(num);
|
||||
return std::isnan(num) && !IsSignallingNaN(num);
|
||||
}
|
||||
|
||||
|
||||
// Convert the NaN in 'num' to a quiet NaN.
|
||||
inline double ToQuietNaN(double num) {
|
||||
const uint64_t kFP64QuietNaNMask = UINT64_C(0x0008000000000000);
|
||||
VIXL_ASSERT(isnan(num));
|
||||
VIXL_ASSERT(std::isnan(num));
|
||||
return rawbits_to_double(double_to_rawbits(num) | kFP64QuietNaNMask);
|
||||
}
|
||||
|
||||
|
||||
inline float ToQuietNaN(float num) {
|
||||
const uint32_t kFP32QuietNaNMask = 0x00400000;
|
||||
VIXL_ASSERT(isnan(num));
|
||||
VIXL_ASSERT(std::isnan(num));
|
||||
return rawbits_to_float(float_to_rawbits(num) | kFP32QuietNaNMask);
|
||||
}
|
||||
|
||||
@ -177,14 +178,23 @@ inline float FusedMultiplyAdd(float op1, float op2, float a) {
|
||||
}
|
||||
|
||||
|
||||
// Bit counting.
|
||||
int CountLeadingZeros(uint64_t value, int width);
|
||||
int CountLeadingSignBits(int64_t value, int width);
|
||||
int CountTrailingZeros(uint64_t value, int width);
|
||||
int CountSetBits(uint64_t value, int width);
|
||||
uint64_t LowestSetBit(uint64_t value);
|
||||
int HighestSetBitPosition(uint64_t value);
|
||||
bool IsPowerOf2(int64_t value);
|
||||
inline uint64_t LowestSetBit(uint64_t value) {
|
||||
return value & -value;
|
||||
}
|
||||
|
||||
|
||||
template<typename T>
|
||||
inline int HighestSetBitPosition(T value) {
|
||||
VIXL_ASSERT(value != 0);
|
||||
return (sizeof(value) * 8 - 1) - CountLeadingZeros(value);
|
||||
}
|
||||
|
||||
|
||||
template<typename V>
|
||||
inline int WhichPowerOf2(V value) {
|
||||
VIXL_ASSERT(IsPowerOf2(value));
|
||||
return CountTrailingZeros(value);
|
||||
}
|
||||
|
||||
unsigned CountClearHalfWords(uint64_t imm, unsigned reg_size);
|
||||
|
@ -24,9 +24,9 @@
|
||||
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#include "a64/macro-assembler-a64.h"
|
||||
#include "a64/debugger-a64.h"
|
||||
#include "a64/simulator-a64.h"
|
||||
#include "vixl/a64/macro-assembler-a64.h"
|
||||
#include "vixl/a64/debugger-a64.h"
|
||||
#include "vixl/a64/simulator-a64.h"
|
||||
#include "examples.h"
|
||||
#include "non-const-visitor.h"
|
||||
#include "custom-disassembler.h"
|
||||
|
@ -27,16 +27,16 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <math.h>
|
||||
#include <float.h>
|
||||
#include <cmath>
|
||||
|
||||
#include "test-runner.h"
|
||||
#include "test-utils-a64.h"
|
||||
#include "a64/macro-assembler-a64.h"
|
||||
#include "a64/simulator-a64.h"
|
||||
#include "a64/debugger-a64.h"
|
||||
#include "a64/disasm-a64.h"
|
||||
#include "a64/cpu-a64.h"
|
||||
#include "vixl/a64/macro-assembler-a64.h"
|
||||
#include "vixl/a64/simulator-a64.h"
|
||||
#include "vixl/a64/debugger-a64.h"
|
||||
#include "vixl/a64/disasm-a64.h"
|
||||
#include "vixl/a64/cpu-a64.h"
|
||||
|
||||
namespace vixl {
|
||||
|
||||
@ -1072,28 +1072,28 @@ TEST(mul) {
|
||||
SETUP();
|
||||
|
||||
START();
|
||||
__ Mov(x16, 0);
|
||||
__ Mov(x17, 1);
|
||||
__ Mov(x25, 0);
|
||||
__ Mov(x26, 1);
|
||||
__ Mov(x18, 0xffffffff);
|
||||
__ Mov(x19, 0xffffffffffffffff);
|
||||
|
||||
__ Mul(w0, w16, w16);
|
||||
__ Mul(w1, w16, w17);
|
||||
__ Mul(w2, w17, w18);
|
||||
__ Mul(w0, w25, w25);
|
||||
__ Mul(w1, w25, w26);
|
||||
__ Mul(w2, w26, w18);
|
||||
__ Mul(w3, w18, w19);
|
||||
__ Mul(x4, x16, x16);
|
||||
__ Mul(x5, x17, x18);
|
||||
__ Mul(x4, x25, x25);
|
||||
__ Mul(x5, x26, x18);
|
||||
__ Mul(x6, x18, x19);
|
||||
__ Mul(x7, x19, x19);
|
||||
__ Smull(x8, w17, w18);
|
||||
__ Smull(x8, w26, w18);
|
||||
__ Smull(x9, w18, w18);
|
||||
__ Smull(x10, w19, w19);
|
||||
__ Mneg(w11, w16, w16);
|
||||
__ Mneg(w12, w16, w17);
|
||||
__ Mneg(w13, w17, w18);
|
||||
__ Mneg(w11, w25, w25);
|
||||
__ Mneg(w12, w25, w26);
|
||||
__ Mneg(w13, w26, w18);
|
||||
__ Mneg(w14, w18, w19);
|
||||
__ Mneg(x20, x16, x16);
|
||||
__ Mneg(x21, x17, x18);
|
||||
__ Mneg(x20, x25, x25);
|
||||
__ Mneg(x21, x26, x18);
|
||||
__ Mneg(x22, x18, x19);
|
||||
__ Mneg(x23, x19, x19);
|
||||
END();
|
||||
@ -1333,6 +1333,54 @@ TEST(smulh) {
|
||||
}
|
||||
|
||||
|
||||
TEST(umulh) {
|
||||
SETUP();
|
||||
|
||||
START();
|
||||
__ Mov(x20, 0);
|
||||
__ Mov(x21, 1);
|
||||
__ Mov(x22, 0x0000000100000000);
|
||||
__ Mov(x23, 0x0000000012345678);
|
||||
__ Mov(x24, 0x0123456789abcdef);
|
||||
__ Mov(x25, 0x0000000200000000);
|
||||
__ Mov(x26, 0x8000000000000000);
|
||||
__ Mov(x27, 0xffffffffffffffff);
|
||||
__ Mov(x28, 0x5555555555555555);
|
||||
__ Mov(x29, 0xaaaaaaaaaaaaaaaa);
|
||||
|
||||
__ Umulh(x0, x20, x24);
|
||||
__ Umulh(x1, x21, x24);
|
||||
__ Umulh(x2, x22, x23);
|
||||
__ Umulh(x3, x22, x24);
|
||||
__ Umulh(x4, x24, x25);
|
||||
__ Umulh(x5, x23, x27);
|
||||
__ Umulh(x6, x26, x26);
|
||||
__ Umulh(x7, x26, x27);
|
||||
__ Umulh(x8, x27, x27);
|
||||
__ Umulh(x9, x28, x28);
|
||||
__ Umulh(x10, x28, x29);
|
||||
__ Umulh(x11, x29, x29);
|
||||
END();
|
||||
|
||||
RUN();
|
||||
|
||||
ASSERT_EQUAL_64(0, x0);
|
||||
ASSERT_EQUAL_64(0, x1);
|
||||
ASSERT_EQUAL_64(0, x2);
|
||||
ASSERT_EQUAL_64(0x0000000001234567, x3);
|
||||
ASSERT_EQUAL_64(0x0000000002468acf, x4);
|
||||
ASSERT_EQUAL_64(0x0000000012345677, x5);
|
||||
ASSERT_EQUAL_64(0x4000000000000000, x6);
|
||||
ASSERT_EQUAL_64(0x7fffffffffffffff, x7);
|
||||
ASSERT_EQUAL_64(0xfffffffffffffffe, x8);
|
||||
ASSERT_EQUAL_64(0x1c71c71c71c71c71, x9);
|
||||
ASSERT_EQUAL_64(0x38e38e38e38e38e3, x10);
|
||||
ASSERT_EQUAL_64(0x71c71c71c71c71c6, x11);
|
||||
|
||||
TEARDOWN();
|
||||
}
|
||||
|
||||
|
||||
TEST(smaddl_umaddl_umull) {
|
||||
SETUP();
|
||||
|
||||
@ -9446,26 +9494,26 @@ static float MinMaxHelper(float n,
|
||||
uint32_t raw_n = float_to_rawbits(n);
|
||||
uint32_t raw_m = float_to_rawbits(m);
|
||||
|
||||
if (isnan(n) && ((raw_n & kFP32QuietNaNMask) == 0)) {
|
||||
if (std::isnan(n) && ((raw_n & kFP32QuietNaNMask) == 0)) {
|
||||
// n is signalling NaN.
|
||||
return rawbits_to_float(raw_n | kFP32QuietNaNMask);
|
||||
} else if (isnan(m) && ((raw_m & kFP32QuietNaNMask) == 0)) {
|
||||
} else if (std::isnan(m) && ((raw_m & kFP32QuietNaNMask) == 0)) {
|
||||
// m is signalling NaN.
|
||||
return rawbits_to_float(raw_m | kFP32QuietNaNMask);
|
||||
} else if (quiet_nan_substitute == 0.0) {
|
||||
if (isnan(n)) {
|
||||
if (std::isnan(n)) {
|
||||
// n is quiet NaN.
|
||||
return n;
|
||||
} else if (isnan(m)) {
|
||||
} else if (std::isnan(m)) {
|
||||
// m is quiet NaN.
|
||||
return m;
|
||||
}
|
||||
} else {
|
||||
// Substitute n or m if one is quiet, but not both.
|
||||
if (isnan(n) && !isnan(m)) {
|
||||
if (std::isnan(n) && !std::isnan(m)) {
|
||||
// n is quiet NaN: replace with substitute.
|
||||
n = quiet_nan_substitute;
|
||||
} else if (!isnan(n) && isnan(m)) {
|
||||
} else if (!std::isnan(n) && std::isnan(m)) {
|
||||
// m is quiet NaN: replace with substitute.
|
||||
m = quiet_nan_substitute;
|
||||
}
|
||||
@ -9488,26 +9536,26 @@ static double MinMaxHelper(double n,
|
||||
uint64_t raw_n = double_to_rawbits(n);
|
||||
uint64_t raw_m = double_to_rawbits(m);
|
||||
|
||||
if (isnan(n) && ((raw_n & kFP64QuietNaNMask) == 0)) {
|
||||
if (std::isnan(n) && ((raw_n & kFP64QuietNaNMask) == 0)) {
|
||||
// n is signalling NaN.
|
||||
return rawbits_to_double(raw_n | kFP64QuietNaNMask);
|
||||
} else if (isnan(m) && ((raw_m & kFP64QuietNaNMask) == 0)) {
|
||||
} else if (std::isnan(m) && ((raw_m & kFP64QuietNaNMask) == 0)) {
|
||||
// m is signalling NaN.
|
||||
return rawbits_to_double(raw_m | kFP64QuietNaNMask);
|
||||
} else if (quiet_nan_substitute == 0.0) {
|
||||
if (isnan(n)) {
|
||||
if (std::isnan(n)) {
|
||||
// n is quiet NaN.
|
||||
return n;
|
||||
} else if (isnan(m)) {
|
||||
} else if (std::isnan(m)) {
|
||||
// m is quiet NaN.
|
||||
return m;
|
||||
}
|
||||
} else {
|
||||
// Substitute n or m if one is quiet, but not both.
|
||||
if (isnan(n) && !isnan(m)) {
|
||||
if (std::isnan(n) && !std::isnan(m)) {
|
||||
// n is quiet NaN: replace with substitute.
|
||||
n = quiet_nan_substitute;
|
||||
} else if (!isnan(n) && isnan(m)) {
|
||||
} else if (!std::isnan(n) && std::isnan(m)) {
|
||||
// m is quiet NaN: replace with substitute.
|
||||
m = quiet_nan_substitute;
|
||||
}
|
||||
@ -9700,6 +9748,10 @@ TEST(fccmp) {
|
||||
__ Fmov(d18, -0.5);
|
||||
__ Fmov(d19, -1.0);
|
||||
__ Mov(x20, 0);
|
||||
__ Mov(x21, 0x7ff0000000000001); // Double precision NaN.
|
||||
__ Fmov(d21, x21);
|
||||
__ Mov(w22, 0x7f800001); // Single precision NaN.
|
||||
__ Fmov(s22, w22);
|
||||
|
||||
__ Cmp(x20, 0);
|
||||
__ Fccmp(s16, s16, NoFlag, eq);
|
||||
@ -9739,6 +9791,22 @@ TEST(fccmp) {
|
||||
|
||||
__ fccmp(d18, d18, NFlag, nv);
|
||||
__ Mrs(x9, NZCV);
|
||||
|
||||
__ Cmp(x20, 0);
|
||||
__ Fccmpe(s16, s16, NoFlag, eq);
|
||||
__ Mrs(x10, NZCV);
|
||||
|
||||
__ Cmp(x20, 0);
|
||||
__ Fccmpe(d18, d19, ZCVFlag, ls);
|
||||
__ Mrs(x11, NZCV);
|
||||
|
||||
__ Cmp(x20, 0);
|
||||
__ Fccmpe(d21, d21, NoFlag, eq);
|
||||
__ Mrs(x12, NZCV);
|
||||
|
||||
__ Cmp(x20, 0);
|
||||
__ Fccmpe(s22, s22, NoFlag, eq);
|
||||
__ Mrs(x13, NZCV);
|
||||
END();
|
||||
|
||||
RUN();
|
||||
@ -9753,6 +9821,10 @@ TEST(fccmp) {
|
||||
ASSERT_EQUAL_32(NFlag, w7);
|
||||
ASSERT_EQUAL_32(ZCFlag, w8);
|
||||
ASSERT_EQUAL_32(ZCFlag, w9);
|
||||
ASSERT_EQUAL_32(ZCFlag, w10);
|
||||
ASSERT_EQUAL_32(CFlag, w11);
|
||||
ASSERT_EQUAL_32(CVFlag, w12);
|
||||
ASSERT_EQUAL_32(CVFlag, w13);
|
||||
|
||||
TEARDOWN();
|
||||
}
|
||||
@ -9813,6 +9885,19 @@ TEST(fcmp) {
|
||||
__ Fcmp(d19, 12.3456);
|
||||
temps.Exclude(d0);
|
||||
__ Mrs(x16, NZCV);
|
||||
|
||||
__ Fcmpe(s8, s8);
|
||||
__ Mrs(x22, NZCV);
|
||||
__ Fcmpe(s8, 0.0);
|
||||
__ Mrs(x23, NZCV);
|
||||
__ Fcmpe(d19, d19);
|
||||
__ Mrs(x24, NZCV);
|
||||
__ Fcmpe(d19, 0.0);
|
||||
__ Mrs(x25, NZCV);
|
||||
__ Fcmpe(s18, s18);
|
||||
__ Mrs(x26, NZCV);
|
||||
__ Fcmpe(d21, d21);
|
||||
__ Mrs(x27, NZCV);
|
||||
}
|
||||
|
||||
END();
|
||||
@ -9833,6 +9918,12 @@ TEST(fcmp) {
|
||||
ASSERT_EQUAL_32(CVFlag, w14);
|
||||
ASSERT_EQUAL_32(ZCFlag, w15);
|
||||
ASSERT_EQUAL_32(NFlag, w16);
|
||||
ASSERT_EQUAL_32(ZCFlag, w22);
|
||||
ASSERT_EQUAL_32(ZCFlag, w23);
|
||||
ASSERT_EQUAL_32(ZCFlag, w24);
|
||||
ASSERT_EQUAL_32(ZCFlag, w25);
|
||||
ASSERT_EQUAL_32(CVFlag, w26);
|
||||
ASSERT_EQUAL_32(CVFlag, w27);
|
||||
|
||||
TEARDOWN();
|
||||
}
|
||||
@ -11869,16 +11960,16 @@ static void TestUScvtfHelper(uint64_t in,
|
||||
double expected_ucvtf_base = rawbits_to_double(expected_ucvtf_bits);
|
||||
|
||||
for (int fbits = 0; fbits <= 32; fbits++) {
|
||||
double expected_scvtf = expected_scvtf_base / pow(2, fbits);
|
||||
double expected_ucvtf = expected_ucvtf_base / pow(2, fbits);
|
||||
double expected_scvtf = expected_scvtf_base / std::pow(2, fbits);
|
||||
double expected_ucvtf = expected_ucvtf_base / std::pow(2, fbits);
|
||||
ASSERT_EQUAL_FP64(expected_scvtf, results_scvtf_x[fbits]);
|
||||
ASSERT_EQUAL_FP64(expected_ucvtf, results_ucvtf_x[fbits]);
|
||||
if (cvtf_s32) ASSERT_EQUAL_FP64(expected_scvtf, results_scvtf_w[fbits]);
|
||||
if (cvtf_u32) ASSERT_EQUAL_FP64(expected_ucvtf, results_ucvtf_w[fbits]);
|
||||
}
|
||||
for (int fbits = 33; fbits <= 64; fbits++) {
|
||||
double expected_scvtf = expected_scvtf_base / pow(2, fbits);
|
||||
double expected_ucvtf = expected_ucvtf_base / pow(2, fbits);
|
||||
double expected_scvtf = expected_scvtf_base / std::pow(2, fbits);
|
||||
double expected_ucvtf = expected_ucvtf_base / std::pow(2, fbits);
|
||||
ASSERT_EQUAL_FP64(expected_scvtf, results_scvtf_x[fbits]);
|
||||
ASSERT_EQUAL_FP64(expected_ucvtf, results_ucvtf_x[fbits]);
|
||||
}
|
||||
@ -12023,18 +12114,16 @@ static void TestUScvtf32Helper(uint64_t in,
|
||||
float expected_ucvtf_base = rawbits_to_float(expected_ucvtf_bits);
|
||||
|
||||
for (int fbits = 0; fbits <= 32; fbits++) {
|
||||
float expected_scvtf = expected_scvtf_base / powf(2, fbits);
|
||||
float expected_ucvtf = expected_ucvtf_base / powf(2, fbits);
|
||||
float expected_scvtf = expected_scvtf_base / std::pow(2.0f, fbits);
|
||||
float expected_ucvtf = expected_ucvtf_base / std::pow(2.0f, fbits);
|
||||
ASSERT_EQUAL_FP32(expected_scvtf, results_scvtf_x[fbits]);
|
||||
ASSERT_EQUAL_FP32(expected_ucvtf, results_ucvtf_x[fbits]);
|
||||
if (cvtf_s32) ASSERT_EQUAL_FP32(expected_scvtf, results_scvtf_w[fbits]);
|
||||
if (cvtf_u32) ASSERT_EQUAL_FP32(expected_ucvtf, results_ucvtf_w[fbits]);
|
||||
break;
|
||||
}
|
||||
for (int fbits = 33; fbits <= 64; fbits++) {
|
||||
break;
|
||||
float expected_scvtf = expected_scvtf_base / powf(2, fbits);
|
||||
float expected_ucvtf = expected_ucvtf_base / powf(2, fbits);
|
||||
float expected_scvtf = expected_scvtf_base / std::pow(2.0f, fbits);
|
||||
float expected_ucvtf = expected_ucvtf_base / std::pow(2.0f, fbits);
|
||||
ASSERT_EQUAL_FP32(expected_scvtf, results_scvtf_x[fbits]);
|
||||
ASSERT_EQUAL_FP32(expected_ucvtf, results_ucvtf_x[fbits]);
|
||||
}
|
||||
@ -12617,6 +12706,10 @@ TEST(peek_poke_mixed) {
|
||||
SETUP();
|
||||
START();
|
||||
|
||||
// Acquire all temps from the MacroAssembler. They are used arbitrarily below.
|
||||
UseScratchRegisterScope temps(&masm);
|
||||
temps.ExcludeAll();
|
||||
|
||||
// The literal base is chosen to have two useful properties:
|
||||
// * When multiplied by small values (such as a register index), this value
|
||||
// is clearly readable in the result.
|
||||
@ -12687,6 +12780,10 @@ TEST(peek_poke_reglist) {
|
||||
SETUP();
|
||||
START();
|
||||
|
||||
// Acquire all temps from the MacroAssembler. They are used arbitrarily below.
|
||||
UseScratchRegisterScope temps(&masm);
|
||||
temps.ExcludeAll();
|
||||
|
||||
// The literal base is chosen to have two useful properties:
|
||||
// * When multiplied by small values (such as a register index), this value
|
||||
// is clearly readable in the result.
|
||||
@ -12769,6 +12866,121 @@ TEST(peek_poke_reglist) {
|
||||
}
|
||||
|
||||
|
||||
TEST(load_store_reglist) {
|
||||
SETUP();
|
||||
START();
|
||||
|
||||
// The literal base is chosen to have two useful properties:
|
||||
// * When multiplied by small values (such as a register index), this value
|
||||
// is clearly readable in the result.
|
||||
// * The value is not formed from repeating fixed-size smaller values, so it
|
||||
// can be used to detect endianness-related errors.
|
||||
uint64_t high_base = UINT32_C(0x01000010);
|
||||
uint64_t low_base = UINT32_C(0x00100101);
|
||||
uint64_t base = (high_base << 32) | low_base;
|
||||
uint64_t array[21];
|
||||
memset(array, 0, sizeof(array));
|
||||
|
||||
// Initialize the registers.
|
||||
__ Mov(x1, base);
|
||||
__ Add(x2, x1, x1);
|
||||
__ Add(x3, x2, x1);
|
||||
__ Add(x4, x3, x1);
|
||||
__ Fmov(d1, x1);
|
||||
__ Fmov(d2, x2);
|
||||
__ Fmov(d3, x3);
|
||||
__ Fmov(d4, x4);
|
||||
__ Fmov(d5, x1);
|
||||
__ Fmov(d6, x2);
|
||||
__ Fmov(d7, x3);
|
||||
__ Fmov(d8, x4);
|
||||
|
||||
Register reg_base = x20;
|
||||
Register reg_index = x21;
|
||||
int size_stored = 0;
|
||||
|
||||
__ Mov(reg_base, reinterpret_cast<uintptr_t>(&array));
|
||||
|
||||
// Test aligned accesses.
|
||||
CPURegList list_src(w1, w2, w3, w4);
|
||||
CPURegList list_dst(w11, w12, w13, w14);
|
||||
CPURegList list_fp_src_1(d1, d2, d3, d4);
|
||||
CPURegList list_fp_dst_1(d11, d12, d13, d14);
|
||||
|
||||
__ StoreCPURegList(list_src, MemOperand(reg_base, 0 * sizeof(uint64_t)));
|
||||
__ LoadCPURegList(list_dst, MemOperand(reg_base, 0 * sizeof(uint64_t)));
|
||||
size_stored += 4 * kWRegSizeInBytes;
|
||||
|
||||
__ Mov(reg_index, size_stored);
|
||||
__ StoreCPURegList(list_src, MemOperand(reg_base, reg_index));
|
||||
__ LoadCPURegList(list_dst, MemOperand(reg_base, reg_index));
|
||||
size_stored += 4 * kWRegSizeInBytes;
|
||||
|
||||
__ StoreCPURegList(list_fp_src_1, MemOperand(reg_base, size_stored));
|
||||
__ LoadCPURegList(list_fp_dst_1, MemOperand(reg_base, size_stored));
|
||||
size_stored += 4 * kDRegSizeInBytes;
|
||||
|
||||
__ Mov(reg_index, size_stored);
|
||||
__ StoreCPURegList(list_fp_src_1, MemOperand(reg_base, reg_index));
|
||||
__ LoadCPURegList(list_fp_dst_1, MemOperand(reg_base, reg_index));
|
||||
size_stored += 4 * kDRegSizeInBytes;
|
||||
|
||||
// Test unaligned accesses.
|
||||
CPURegList list_fp_src_2(d5, d6, d7, d8);
|
||||
CPURegList list_fp_dst_2(d15, d16, d17, d18);
|
||||
|
||||
__ Str(wzr, MemOperand(reg_base, size_stored));
|
||||
size_stored += 1 * kWRegSizeInBytes;
|
||||
__ StoreCPURegList(list_fp_src_2, MemOperand(reg_base, size_stored));
|
||||
__ LoadCPURegList(list_fp_dst_2, MemOperand(reg_base, size_stored));
|
||||
size_stored += 4 * kDRegSizeInBytes;
|
||||
|
||||
__ Mov(reg_index, size_stored);
|
||||
__ StoreCPURegList(list_fp_src_2, MemOperand(reg_base, reg_index));
|
||||
__ LoadCPURegList(list_fp_dst_2, MemOperand(reg_base, reg_index));
|
||||
|
||||
END();
|
||||
RUN();
|
||||
|
||||
VIXL_CHECK(array[0] == (1 * low_base) + (2 * low_base << kWRegSize));
|
||||
VIXL_CHECK(array[1] == (3 * low_base) + (4 * low_base << kWRegSize));
|
||||
VIXL_CHECK(array[2] == (1 * low_base) + (2 * low_base << kWRegSize));
|
||||
VIXL_CHECK(array[3] == (3 * low_base) + (4 * low_base << kWRegSize));
|
||||
VIXL_CHECK(array[4] == 1 * base);
|
||||
VIXL_CHECK(array[5] == 2 * base);
|
||||
VIXL_CHECK(array[6] == 3 * base);
|
||||
VIXL_CHECK(array[7] == 4 * base);
|
||||
VIXL_CHECK(array[8] == 1 * base);
|
||||
VIXL_CHECK(array[9] == 2 * base);
|
||||
VIXL_CHECK(array[10] == 3 * base);
|
||||
VIXL_CHECK(array[11] == 4 * base);
|
||||
VIXL_CHECK(array[12] == ((1 * low_base) << kSRegSize));
|
||||
VIXL_CHECK(array[13] == (((2 * low_base) << kSRegSize) | (1 * high_base)));
|
||||
VIXL_CHECK(array[14] == (((3 * low_base) << kSRegSize) | (2 * high_base)));
|
||||
VIXL_CHECK(array[15] == (((4 * low_base) << kSRegSize) | (3 * high_base)));
|
||||
VIXL_CHECK(array[16] == (((1 * low_base) << kSRegSize) | (4 * high_base)));
|
||||
VIXL_CHECK(array[17] == (((2 * low_base) << kSRegSize) | (1 * high_base)));
|
||||
VIXL_CHECK(array[18] == (((3 * low_base) << kSRegSize) | (2 * high_base)));
|
||||
VIXL_CHECK(array[19] == (((4 * low_base) << kSRegSize) | (3 * high_base)));
|
||||
VIXL_CHECK(array[20] == (4 * high_base));
|
||||
|
||||
ASSERT_EQUAL_64(1 * low_base, x11);
|
||||
ASSERT_EQUAL_64(2 * low_base, x12);
|
||||
ASSERT_EQUAL_64(3 * low_base, x13);
|
||||
ASSERT_EQUAL_64(4 * low_base, x14);
|
||||
ASSERT_EQUAL_FP64(rawbits_to_double(1 * base), d11);
|
||||
ASSERT_EQUAL_FP64(rawbits_to_double(2 * base), d12);
|
||||
ASSERT_EQUAL_FP64(rawbits_to_double(3 * base), d13);
|
||||
ASSERT_EQUAL_FP64(rawbits_to_double(4 * base), d14);
|
||||
ASSERT_EQUAL_FP64(rawbits_to_double(1 * base), d15);
|
||||
ASSERT_EQUAL_FP64(rawbits_to_double(2 * base), d16);
|
||||
ASSERT_EQUAL_FP64(rawbits_to_double(3 * base), d17);
|
||||
ASSERT_EQUAL_FP64(rawbits_to_double(4 * base), d18);
|
||||
|
||||
TEARDOWN();
|
||||
}
|
||||
|
||||
|
||||
// This enum is used only as an argument to the push-pop test helpers.
|
||||
enum PushPopMethod {
|
||||
// Push or Pop using the Push and Pop methods, with blocks of up to four
|
||||
@ -12814,6 +13026,10 @@ static void PushPopXRegSimpleHelper(int reg_count,
|
||||
RegList list = PopulateRegisterArray(NULL, x, r, reg_size, reg_count,
|
||||
allowed);
|
||||
|
||||
// Acquire all temps from the MacroAssembler. They are used arbitrarily below.
|
||||
UseScratchRegisterScope temps(&masm);
|
||||
temps.ExcludeAll();
|
||||
|
||||
// The literal base is chosen to have two useful properties:
|
||||
// * When multiplied by small values (such as a register index), this value
|
||||
// is clearly readable in the result.
|
||||
@ -12993,6 +13209,10 @@ static void PushPopFPXRegSimpleHelper(int reg_count,
|
||||
// Arbitrarily pick a register to use as a stack pointer.
|
||||
const Register& stack_pointer = x10;
|
||||
|
||||
// Acquire all temps from the MacroAssembler. They are used arbitrarily below.
|
||||
UseScratchRegisterScope temps(&masm);
|
||||
temps.ExcludeAll();
|
||||
|
||||
// The literal base is chosen to have two useful properties:
|
||||
// * When multiplied (using an integer) by small values (such as a register
|
||||
// index), this value is clearly readable in the result.
|
||||
@ -13167,6 +13387,10 @@ static void PushPopXRegMixedMethodsHelper(int claim, int reg_size) {
|
||||
r6_to_r9 |= x[i].Bit();
|
||||
}
|
||||
|
||||
// Acquire all temps from the MacroAssembler. They are used arbitrarily below.
|
||||
UseScratchRegisterScope temps(&masm);
|
||||
temps.ExcludeAll();
|
||||
|
||||
// The literal base is chosen to have two useful properties:
|
||||
// * When multiplied by small values (such as a register index), this value
|
||||
// is clearly readable in the result.
|
||||
@ -13267,6 +13491,10 @@ static void PushPopXRegWXOverlapHelper(int reg_count, int claim) {
|
||||
stack[i] = 0xdeadbeef;
|
||||
}
|
||||
|
||||
// Acquire all temps from the MacroAssembler. They are used arbitrarily below.
|
||||
UseScratchRegisterScope temps(&masm);
|
||||
temps.ExcludeAll();
|
||||
|
||||
// The literal base is chosen to have two useful properties:
|
||||
// * When multiplied by small values (such as a register index), this value
|
||||
// is clearly readable in the result.
|
||||
@ -13446,6 +13674,10 @@ TEST(push_pop_sp) {
|
||||
|
||||
VIXL_ASSERT(sp.Is(__ StackPointer()));
|
||||
|
||||
// Acquire all temps from the MacroAssembler. They are used arbitrarily below.
|
||||
UseScratchRegisterScope temps(&masm);
|
||||
temps.ExcludeAll();
|
||||
|
||||
__ Mov(x3, 0x3333333333333333);
|
||||
__ Mov(x2, 0x2222222222222222);
|
||||
__ Mov(x1, 0x1111111111111111);
|
||||
@ -14154,8 +14386,8 @@ TEST(process_nan_float) {
|
||||
|
||||
|
||||
static void ProcessNaNsHelper(double n, double m, double expected) {
|
||||
VIXL_ASSERT(isnan(n) || isnan(m));
|
||||
VIXL_ASSERT(isnan(expected));
|
||||
VIXL_ASSERT(std::isnan(n) || std::isnan(m));
|
||||
VIXL_ASSERT(std::isnan(expected));
|
||||
|
||||
SETUP();
|
||||
START();
|
||||
@ -14225,8 +14457,8 @@ TEST(process_nans_double) {
|
||||
|
||||
|
||||
static void ProcessNaNsHelper(float n, float m, float expected) {
|
||||
VIXL_ASSERT(isnan(n) || isnan(m));
|
||||
VIXL_ASSERT(isnan(expected));
|
||||
VIXL_ASSERT(std::isnan(n) || std::isnan(m));
|
||||
VIXL_ASSERT(std::isnan(expected));
|
||||
|
||||
SETUP();
|
||||
START();
|
||||
@ -14296,10 +14528,10 @@ TEST(process_nans_float) {
|
||||
|
||||
|
||||
static void DefaultNaNHelper(float n, float m, float a) {
|
||||
VIXL_ASSERT(isnan(n) || isnan(m) || isnan(a));
|
||||
VIXL_ASSERT(std::isnan(n) || std::isnan(m) || std::isnan(a));
|
||||
|
||||
bool test_1op = isnan(n);
|
||||
bool test_2op = isnan(n) || isnan(m);
|
||||
bool test_1op = std::isnan(n);
|
||||
bool test_2op = std::isnan(n) || std::isnan(m);
|
||||
|
||||
SETUP();
|
||||
START();
|
||||
@ -14423,10 +14655,10 @@ TEST(default_nan_float) {
|
||||
|
||||
|
||||
static void DefaultNaNHelper(double n, double m, double a) {
|
||||
VIXL_ASSERT(isnan(n) || isnan(m) || isnan(a));
|
||||
VIXL_ASSERT(std::isnan(n) || std::isnan(m) || std::isnan(a));
|
||||
|
||||
bool test_1op = isnan(n);
|
||||
bool test_2op = isnan(n) || isnan(m);
|
||||
bool test_1op = std::isnan(n);
|
||||
bool test_2op = std::isnan(n) || std::isnan(m);
|
||||
|
||||
SETUP();
|
||||
START();
|
||||
|
@ -28,8 +28,8 @@
|
||||
#include <cstring>
|
||||
#include "test-runner.h"
|
||||
|
||||
#include "a64/macro-assembler-a64.h"
|
||||
#include "a64/disasm-a64.h"
|
||||
#include "vixl/a64/macro-assembler-a64.h"
|
||||
#include "vixl/a64/disasm-a64.h"
|
||||
|
||||
#define TEST(name) TEST_(DISASM_##name)
|
||||
|
||||
@ -457,6 +457,7 @@ TEST(mul_and_div) {
|
||||
COMPARE(smull(x0, w0, w1), "smull x0, w0, w1");
|
||||
COMPARE(smull(x30, w30, w0), "smull x30, w30, w0");
|
||||
COMPARE(smulh(x0, x1, x2), "smulh x0, x1, x2");
|
||||
COMPARE(umulh(x0, x2, x1), "umulh x0, x2, x1");
|
||||
|
||||
COMPARE(sdiv(w0, w1, w2), "sdiv w0, w1, w2");
|
||||
COMPARE(sdiv(x3, x4, x5), "sdiv x3, x4, x5");
|
||||
@ -2361,6 +2362,13 @@ TEST(fp_compare) {
|
||||
COMPARE(fcmp(s12, 0), "fcmp s12, #0.0");
|
||||
COMPARE(fcmp(d12, 0), "fcmp d12, #0.0");
|
||||
|
||||
COMPARE(fcmpe(s0, s1), "fcmpe s0, s1");
|
||||
COMPARE(fcmpe(s31, s30), "fcmpe s31, s30");
|
||||
COMPARE(fcmpe(d0, d1), "fcmpe d0, d1");
|
||||
COMPARE(fcmpe(d31, d30), "fcmpe d31, d30");
|
||||
COMPARE(fcmpe(s12, 0), "fcmpe s12, #0.0");
|
||||
COMPARE(fcmpe(d12, 0), "fcmpe d12, #0.0");
|
||||
|
||||
CLEANUP();
|
||||
}
|
||||
|
||||
@ -2379,6 +2387,17 @@ TEST(fp_cond_compare) {
|
||||
COMPARE(fccmp(s14, s15, CVFlag, al), "fccmp s14, s15, #nzCV, al");
|
||||
COMPARE(fccmp(d16, d17, CFlag, nv), "fccmp d16, d17, #nzCv, nv");
|
||||
|
||||
COMPARE(fccmpe(s0, s1, NoFlag, eq), "fccmpe s0, s1, #nzcv, eq");
|
||||
COMPARE(fccmpe(s2, s3, ZVFlag, ne), "fccmpe s2, s3, #nZcV, ne");
|
||||
COMPARE(fccmpe(s30, s16, NCFlag, pl), "fccmpe s30, s16, #NzCv, pl");
|
||||
COMPARE(fccmpe(s31, s31, NZCVFlag, le), "fccmpe s31, s31, #NZCV, le");
|
||||
COMPARE(fccmpe(d4, d5, VFlag, gt), "fccmpe d4, d5, #nzcV, gt");
|
||||
COMPARE(fccmpe(d6, d7, NFlag, vs), "fccmpe d6, d7, #Nzcv, vs");
|
||||
COMPARE(fccmpe(d30, d0, NZFlag, vc), "fccmpe d30, d0, #NZcv, vc");
|
||||
COMPARE(fccmpe(d31, d31, ZFlag, hs), "fccmpe d31, d31, #nZcv, hs");
|
||||
COMPARE(fccmpe(s14, s15, CVFlag, al), "fccmpe s14, s15, #nzCV, al");
|
||||
COMPARE(fccmpe(d16, d17, CFlag, nv), "fccmpe d16, d17, #nzCv, nv");
|
||||
|
||||
CLEANUP();
|
||||
}
|
||||
|
||||
@ -2655,6 +2674,12 @@ TEST(add_sub_negative) {
|
||||
COMPARE(Add(w19, w3, -0x344), "sub w19, w3, #0x344 (836)");
|
||||
COMPARE(Add(w20, w4, -2000), "sub w20, w4, #0x7d0 (2000)");
|
||||
|
||||
COMPARE(Add(w0, w1, 5, LeaveFlags), "add w0, w1, #0x5 (5)");
|
||||
COMPARE(Add(w1, w2, 15, SetFlags), "adds w1, w2, #0xf (15)");
|
||||
|
||||
COMPARE(Sub(w0, w1, 5, LeaveFlags), "sub w0, w1, #0x5 (5)");
|
||||
COMPARE(Sub(w1, w2, 15, SetFlags), "subs w1, w2, #0xf (15)");
|
||||
|
||||
COMPARE(Sub(w21, w3, -0xbc), "add w21, w3, #0xbc (188)");
|
||||
COMPARE(Sub(w22, w4, -2000), "add w22, w4, #0x7d0 (2000)");
|
||||
|
||||
|
@ -27,8 +27,8 @@
|
||||
#include <stdlib.h>
|
||||
#include "test-runner.h"
|
||||
|
||||
#include "a64/decoder-a64.h"
|
||||
#include "a64/disasm-a64.h"
|
||||
#include "vixl/a64/decoder-a64.h"
|
||||
#include "vixl/a64/disasm-a64.h"
|
||||
|
||||
#define TEST(name) TEST_(FUZZ_##name)
|
||||
|
||||
|
@ -26,7 +26,7 @@
|
||||
|
||||
#include "test-runner.h"
|
||||
|
||||
#include "invalset.h"
|
||||
#include "vixl/invalset.h"
|
||||
|
||||
namespace vixl {
|
||||
|
||||
|
@ -27,7 +27,7 @@
|
||||
#ifndef TEST_TEST_H_
|
||||
#define TEST_TEST_H_
|
||||
|
||||
#include "utils.h"
|
||||
#include "vixl/utils.h"
|
||||
|
||||
namespace vixl {
|
||||
|
||||
|
@ -31,8 +31,8 @@
|
||||
#include "test-utils-a64.h"
|
||||
#include "test-simulator-inputs-a64.h"
|
||||
#include "test-simulator-traces-a64.h"
|
||||
#include "a64/macro-assembler-a64.h"
|
||||
#include "a64/simulator-a64.h"
|
||||
#include "vixl/a64/macro-assembler-a64.h"
|
||||
#include "vixl/a64/simulator-a64.h"
|
||||
|
||||
namespace vixl {
|
||||
|
||||
|
@ -26,13 +26,13 @@
|
||||
|
||||
#include "test-utils-a64.h"
|
||||
|
||||
#include <math.h> // Needed for isnan().
|
||||
#include <cmath>
|
||||
|
||||
#include "test-runner.h"
|
||||
#include "a64/macro-assembler-a64.h"
|
||||
#include "a64/simulator-a64.h"
|
||||
#include "a64/disasm-a64.h"
|
||||
#include "a64/cpu-a64.h"
|
||||
#include "vixl/a64/macro-assembler-a64.h"
|
||||
#include "vixl/a64/simulator-a64.h"
|
||||
#include "vixl/a64/disasm-a64.h"
|
||||
#include "vixl/a64/cpu-a64.h"
|
||||
|
||||
#define __ masm->
|
||||
|
||||
@ -85,7 +85,7 @@ bool EqualFP32(float expected, const RegisterDump*, float result) {
|
||||
if (float_to_rawbits(expected) == float_to_rawbits(result)) {
|
||||
return true;
|
||||
} else {
|
||||
if (isnan(expected) || (expected == 0.0)) {
|
||||
if (std::isnan(expected) || (expected == 0.0)) {
|
||||
printf("Expected 0x%08" PRIx32 "\t Found 0x%08" PRIx32 "\n",
|
||||
float_to_rawbits(expected), float_to_rawbits(result));
|
||||
} else {
|
||||
@ -104,7 +104,7 @@ bool EqualFP64(double expected, const RegisterDump*, double result) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (isnan(expected) || (expected == 0.0)) {
|
||||
if (std::isnan(expected) || (expected == 0.0)) {
|
||||
printf("Expected 0x%016" PRIx64 "\t Found 0x%016" PRIx64 "\n",
|
||||
double_to_rawbits(expected), double_to_rawbits(result));
|
||||
} else {
|
||||
|
@ -28,10 +28,10 @@
|
||||
#define VIXL_A64_TEST_UTILS_A64_H_
|
||||
|
||||
#include "test-runner.h"
|
||||
#include "a64/macro-assembler-a64.h"
|
||||
#include "a64/simulator-a64.h"
|
||||
#include "a64/disasm-a64.h"
|
||||
#include "a64/cpu-a64.h"
|
||||
#include "vixl/a64/macro-assembler-a64.h"
|
||||
#include "vixl/a64/simulator-a64.h"
|
||||
#include "vixl/a64/disasm-a64.h"
|
||||
#include "vixl/a64/cpu-a64.h"
|
||||
|
||||
namespace vixl {
|
||||
|
||||
|
@ -40,6 +40,10 @@ import test
|
||||
import util
|
||||
|
||||
|
||||
SUPPORTED_COMPILERS = ['g++', 'clang++']
|
||||
OBJ_DIR = './obj'
|
||||
|
||||
|
||||
def BuildOptions():
|
||||
result = argparse.ArgumentParser(
|
||||
description='Run the linter and unit tests.',
|
||||
@ -53,9 +57,11 @@ def BuildOptions():
|
||||
help='Do not run the linter. Run the tests only.')
|
||||
result.add_argument('--noclean', action='store_true',
|
||||
help='Do not clean before build.')
|
||||
result.add_argument('--fast', action='store_true',
|
||||
help='Only test with one toolchain')
|
||||
result.add_argument('--jobs', '-j', metavar='N', type=int, nargs='?',
|
||||
default=1, const=multiprocessing.cpu_count(),
|
||||
help='''Runs the tests using N jobs. If the option is set
|
||||
help='''Run the tests using N jobs. If the option is set
|
||||
but no value is provided, the script will use as many jobs
|
||||
as it thinks useful.''')
|
||||
sim_default = 'off' if platform.machine() == 'aarch64' else 'on'
|
||||
@ -65,30 +71,72 @@ def BuildOptions():
|
||||
return result.parse_args()
|
||||
|
||||
|
||||
def CleanBuildSystem():
|
||||
def clean(mode):
|
||||
if args.verbose: print('Cleaning ' + mode + ' mode test...')
|
||||
command = 'scons mode=%s simulator=%s all --clean' % \
|
||||
(mode, args.simulator)
|
||||
def check_supported(compiler, mode, std):
|
||||
if compiler not in SUPPORTED_COMPILERS:
|
||||
print 'Invalid compiler.'
|
||||
sys.exit(1)
|
||||
if mode not in ['release', 'debug']:
|
||||
print 'Invalid mode.'
|
||||
sys.exit(1)
|
||||
if std not in ['c++98', 'c++11']:
|
||||
print 'Invalid c++ standard.'
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def initalize_compiler_list():
|
||||
compiler_list = []
|
||||
for compiler in SUPPORTED_COMPILERS:
|
||||
if util.has_compiler(compiler) and (len(compiler_list) == 0 or not args.fast):
|
||||
compiler_list.append(compiler)
|
||||
else:
|
||||
# This warning suffices for args.fast too.
|
||||
print 'WARNING: Skipping ' + compiler + ' tests.'
|
||||
if len(compiler_list) == 0:
|
||||
util.abort('Found no supported compilers')
|
||||
return compiler_list
|
||||
|
||||
|
||||
def CleanBuildSystem(compiler):
|
||||
def clean(compiler, mode, std):
|
||||
check_supported(compiler, mode, std)
|
||||
os.environ['CXX'] = compiler
|
||||
if args.verbose:
|
||||
print 'Cleaning ' + compiler + ' ' + std + ' ' \
|
||||
+ mode + ' mode test...'
|
||||
command = 'scons mode=%s std=%s simulator=%s all --clean' % \
|
||||
(mode, std, args.simulator)
|
||||
status, output = util.getstatusoutput(command)
|
||||
if status != 0:
|
||||
print(output)
|
||||
util.abort('Failed cleaning test: ' + command)
|
||||
clean('debug')
|
||||
clean('release')
|
||||
|
||||
clean(compiler, 'debug', 'c++98')
|
||||
clean(compiler, 'debug', 'c++11')
|
||||
clean(compiler, 'release', 'c++98')
|
||||
clean(compiler, 'release', 'c++11')
|
||||
|
||||
|
||||
def BuildEverything():
|
||||
def build(mode):
|
||||
if args.verbose: print('Building ' + mode + ' mode test...')
|
||||
command = 'scons mode=%s simulator=%s all -j%u' % \
|
||||
(mode, args.simulator, args.jobs)
|
||||
def BuildEverything(compiler):
|
||||
def build(compiler, mode, std):
|
||||
check_supported(compiler, mode, std)
|
||||
os.environ['CXX'] = compiler
|
||||
if args.verbose:
|
||||
print 'Building ' + compiler + ' ' + std + ' ' \
|
||||
+ mode + ' mode test...'
|
||||
if args.jobs == 1:
|
||||
print '- This may take a while. Pass `-j` to use multiple threads.'
|
||||
command = 'scons mode=%s std=%s simulator=%s all -j%u' % \
|
||||
(mode, std, args.simulator, args.jobs)
|
||||
status, output = util.getstatusoutput(command)
|
||||
if status != 0:
|
||||
print(output)
|
||||
util.abort('Failed building test: ' + command)
|
||||
build('debug')
|
||||
build('release')
|
||||
|
||||
print 'Building ' + compiler + ' tests...'
|
||||
build(compiler, 'debug', 'c++98')
|
||||
build(compiler, 'debug', 'c++11')
|
||||
build(compiler, 'release', 'c++98')
|
||||
build(compiler, 'release', 'c++11')
|
||||
|
||||
|
||||
NOT_RUN = 'NOT RUN'
|
||||
@ -101,7 +149,7 @@ class Test:
|
||||
self.status = NOT_RUN
|
||||
|
||||
def name_prefix(self):
|
||||
return '%-26s : ' % self.name
|
||||
return '%-40s : ' % self.name
|
||||
|
||||
|
||||
class Tester:
|
||||
@ -121,33 +169,36 @@ class Tester:
|
||||
|
||||
|
||||
class VIXLTest(Test):
|
||||
def __init__(self, mode, simulator, debugger = False, verbose = False):
|
||||
if not mode in ['release', 'debug']:
|
||||
print 'Invalid mode.'
|
||||
sys.exit(1)
|
||||
|
||||
self.debugger = debugger
|
||||
def __init__(self, compiler, mode, std, simulator, debugger = False, verbose = False):
|
||||
check_supported(compiler, mode, std)
|
||||
self.verbose = verbose
|
||||
self.debugger = debugger
|
||||
self.compiler = compiler
|
||||
self.mode = mode
|
||||
self.std = std
|
||||
|
||||
name = 'test ' + mode
|
||||
name = 'test ' + compiler + ' ' + std + ' ' + mode
|
||||
if simulator:
|
||||
name += ' (%s)' % ('debugger' if debugger else 'simulator')
|
||||
Test.__init__(self, name)
|
||||
|
||||
self.exe = './test-runner'
|
||||
self.exe = 'test-runner'
|
||||
if simulator:
|
||||
self.exe += '_sim'
|
||||
if mode == 'debug':
|
||||
self.exe += '_g'
|
||||
|
||||
def Run(self):
|
||||
manifest = test.ReadManifest(self.exe, [], self.debugger,
|
||||
False, self.verbose)
|
||||
self.status = PASSED
|
||||
command = os.path.join(OBJ_DIR, self.mode, self.compiler,
|
||||
self.std, self.exe)
|
||||
manifest = test.ReadManifest(command, [], self.debugger, False, self.verbose)
|
||||
retcode = test.RunTests(manifest, jobs = args.jobs,
|
||||
verbose = self.verbose, debugger = self.debugger,
|
||||
progress_prefix = self.name_prefix())
|
||||
printer.EnsureNewLine()
|
||||
self.status = PASSED if retcode == 0 else FAILED
|
||||
if retcode != 0:
|
||||
self.status = FAILED
|
||||
|
||||
|
||||
class LintTest(Test):
|
||||
@ -167,13 +218,17 @@ details.'''
|
||||
n_errors = lint.LintFiles(lint.default_tracked_files,
|
||||
jobs = args.jobs, verbose = args.verbose,
|
||||
progress_prefix = self.name_prefix())
|
||||
|
||||
self.status = PASSED if n_errors == 0 else FAILED
|
||||
|
||||
|
||||
class BenchTest(Test):
|
||||
def __init__(self, mode, simulator):
|
||||
name = 'benchmarks ' + mode
|
||||
def __init__(self, compiler, mode, std, simulator):
|
||||
check_supported(compiler, mode, std)
|
||||
self.compiler = compiler
|
||||
self.mode = mode
|
||||
self.std = std
|
||||
|
||||
name = 'benchmarks ' + compiler + ' ' + std + ' ' + mode
|
||||
Test.__init__(self, name)
|
||||
self.exe_suffix = ''
|
||||
if simulator:
|
||||
@ -186,7 +241,8 @@ class BenchTest(Test):
|
||||
'bench-branch-masm', 'bench-branch-link-masm']
|
||||
self.status = PASSED
|
||||
for bench in benchmarks:
|
||||
command = './' + bench + self.exe_suffix
|
||||
command = os.path.join(OBJ_DIR, self.mode, self.compiler, self.std,
|
||||
bench + self.exe_suffix)
|
||||
(rc, out) = util.getstatusoutput(command)
|
||||
if rc != 0:
|
||||
self.status = FAILED
|
||||
@ -206,31 +262,44 @@ if __name__ == '__main__':
|
||||
print 'WARNING: This is not a Git repository. The linter will not run.'
|
||||
args.nolint = True
|
||||
|
||||
tester = Tester()
|
||||
if not args.nolint:
|
||||
import lint
|
||||
tester.AddTest(LintTest())
|
||||
LintTest().Run()
|
||||
|
||||
if not args.notest:
|
||||
if not args.noclean:
|
||||
CleanBuildSystem()
|
||||
BuildEverything()
|
||||
tester = Tester()
|
||||
compiler_list = initalize_compiler_list()
|
||||
|
||||
if args.simulator == 'on':
|
||||
# mode, sim, debugger, verbose
|
||||
tester.AddTest(VIXLTest('release', True, True, args.verbose))
|
||||
tester.AddTest(VIXLTest('debug', True, True, args.verbose))
|
||||
tester.AddTest(VIXLTest('release', True, False, args.verbose))
|
||||
tester.AddTest(VIXLTest('debug', True, False, args.verbose))
|
||||
tester.AddTest(BenchTest('release', True))
|
||||
tester.AddTest(BenchTest('debug', True))
|
||||
else:
|
||||
tester.AddTest(VIXLTest('release', False, False, args.verbose))
|
||||
tester.AddTest(VIXLTest('debug', False, False, args.verbose))
|
||||
tester.AddTest(BenchTest('release', False))
|
||||
tester.AddTest(BenchTest('debug', False))
|
||||
for compiler in compiler_list:
|
||||
if not args.noclean:
|
||||
CleanBuildSystem(compiler)
|
||||
BuildEverything(compiler)
|
||||
|
||||
tester.RunAll()
|
||||
if args.simulator == 'on':
|
||||
# mode, std, sim, debugger, verbose
|
||||
tester.AddTest(VIXLTest(compiler, 'release', 'c++98', True, True, args.verbose))
|
||||
tester.AddTest(VIXLTest(compiler, 'debug', 'c++98', True, True, args.verbose))
|
||||
tester.AddTest(VIXLTest(compiler, 'release', 'c++98', True, False, args.verbose))
|
||||
tester.AddTest(VIXLTest(compiler, 'debug', 'c++98', True, False, args.verbose))
|
||||
tester.AddTest(VIXLTest(compiler, 'release', 'c++11', True, True, args.verbose))
|
||||
tester.AddTest(VIXLTest(compiler, 'debug', 'c++11', True, True, args.verbose))
|
||||
tester.AddTest(VIXLTest(compiler, 'release', 'c++11', True, False, args.verbose))
|
||||
tester.AddTest(VIXLTest(compiler, 'debug', 'c++11', True, False, args.verbose))
|
||||
tester.AddTest(BenchTest(compiler,'release', 'c++98', True))
|
||||
tester.AddTest(BenchTest(compiler,'debug', 'c++98', True))
|
||||
tester.AddTest(BenchTest(compiler,'release', 'c++11', True))
|
||||
tester.AddTest(BenchTest(compiler,'debug', 'c++11', True))
|
||||
else:
|
||||
tester.AddTest(VIXLTest(compiler, 'release', 'c++98', False, False, args.verbose))
|
||||
tester.AddTest(VIXLTest(compiler, 'debug', 'c++98', False, False, args.verbose))
|
||||
tester.AddTest(VIXLTest(compiler, 'release', 'c++11', False, False, args.verbose))
|
||||
tester.AddTest(VIXLTest(compiler, 'debug', 'c++11', False, False, args.verbose))
|
||||
tester.AddTest(BenchTest(compiler,'release', 'c++98', False))
|
||||
tester.AddTest(BenchTest(compiler,'debug', 'c++98', False))
|
||||
tester.AddTest(BenchTest(compiler,'release', 'c++11', False))
|
||||
tester.AddTest(BenchTest(compiler,'debug', 'c++11', False))
|
||||
|
||||
tester.RunAll()
|
||||
|
||||
if git.is_git_repository_root():
|
||||
untracked_files = git.get_untracked_files()
|
||||
|
@ -24,6 +24,7 @@
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
import os
|
||||
import sys
|
||||
import subprocess
|
||||
import shlex
|
||||
@ -49,3 +50,8 @@ def last_line(text):
|
||||
lines = text.split('\n')
|
||||
last = lines[-1].split('\r')
|
||||
return last[-1]
|
||||
|
||||
|
||||
def has_compiler(compiler):
|
||||
status, output = getstatusoutput('which ' + compiler)
|
||||
return status == 0
|
||||
|
Loading…
Reference in New Issue
Block a user