mirror of
https://github.com/openharmony/third_party_vixl.git
synced 2026-07-01 20:54:04 -04:00
!48 merge update-gitcode-master-with-patches into master
Upgrade VIXL to 8.0.0 Created-by: chernykhsergey Commit-by: Sergey Chernykh;ivagin Merged-by: openharmony_ci Description: ### **Issue** https://gitcode.com/openharmony/third_party_vixl/issues/30 ### **Reason** Update vixl from 7.0.0 to 8.0.0 ### **Description** Update vixl to current 7.0.0 master remove local patches ### **Tests** ``` ninja all tests ``` **All required pre-merge tests passed. Results are available in the internal CI.** See merge request: openharmony/third_party_vixl!48
This commit is contained in:
+2
-2
@@ -3,8 +3,8 @@
|
||||
"Name": "vixl",
|
||||
"License": "BSD 3-clause",
|
||||
"License File": "LICENCE",
|
||||
"Version Number": "7.0.0",
|
||||
"Owner": "huanghuijin@huawei.com",
|
||||
"Version Number": "8.0.0",
|
||||
"Owner": "liyiming13@huawei.com",
|
||||
"Upstream URL": "https://github.com/Linaro/vixl",
|
||||
"Description": "vixl is a programmatic assemblers to generate A64, A32 or T32 code at runtime."
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
VIXL: ARMv8 Runtime Code Generation Library 7.0.0
|
||||
VIXL: ARMv8 Runtime Code Generation Library 8.0.0
|
||||
=================================================
|
||||
|
||||
Contents:
|
||||
|
||||
+10
-1
@@ -98,7 +98,9 @@ options = {
|
||||
'CCFLAGS' : ['-O3'],
|
||||
},
|
||||
'simulator:aarch64' : {
|
||||
'CCFLAGS' : ['-DVIXL_INCLUDE_SIMULATOR_AARCH64'],
|
||||
'CCFLAGS' : ['-DVIXL_INCLUDE_SIMULATOR_AARCH64',
|
||||
'-pthread'],
|
||||
'LINKFLAGS' : ['-pthread']
|
||||
},
|
||||
'symbols:on' : {
|
||||
'CCFLAGS' : ['-g'],
|
||||
@@ -120,6 +122,9 @@ options = {
|
||||
'coverage:on' : {
|
||||
'CCFLAGS': ['-fprofile-instr-generate', '-fcoverage-mapping'],
|
||||
'LINKFLAGS': ['-fprofile-instr-generate', '-fcoverage-mapping']
|
||||
},
|
||||
'implicit_checks:on' : {
|
||||
'CCFLAGS' : ['-DVIXL_ENABLE_IMPLICIT_CHECKS'],
|
||||
}
|
||||
}
|
||||
|
||||
@@ -265,6 +270,10 @@ vars.AddVariables(
|
||||
EnumVariable('negative_testing',
|
||||
'Enable negative testing (needs exceptions)',
|
||||
'off', allowed_values=['on', 'off']),
|
||||
EnumVariable('implicit_checks',
|
||||
'Allow signals raised from simulated invalid (e.g: out of'
|
||||
+ ' bounds) memory reads to be handled by the host.',
|
||||
'off', allowed_values=['on', 'off']),
|
||||
DefaultVariable('symbols', 'Include debugging symbols in the binaries',
|
||||
['on', 'off']),
|
||||
DefaultVariable('simulator', 'Simulators to include', ['aarch64', 'none']),
|
||||
|
||||
@@ -0,0 +1,148 @@
|
||||
Immediate Range Limits in VIXL
|
||||
==============================
|
||||
|
||||
VIXL's macro assembler tries to increase the range of branches and literal loads
|
||||
automatically for you, but applications must still be aware of these extended
|
||||
limits, and stay within them, in order to ensure valid code is generated.
|
||||
|
||||
In debug builds, assertions prevent exceeding these limits at run time. In
|
||||
release builds, for performance reasons, the application is responsible for
|
||||
staying within the limits.
|
||||
|
||||
You should decide what corrections should be applied in your application if it
|
||||
exceeds these limits.
|
||||
|
||||
Terms
|
||||
-----
|
||||
|
||||
**Bind** assigning an address to a label such that the instructions that refer
|
||||
to the label can be assigned PC-relative offsets.
|
||||
|
||||
**Forward** a forward branch or load literal will refer to a location that will
|
||||
be bound later in code generation, ie. at a higher address.
|
||||
|
||||
**Backward** a backward branch or load literal refers to a location that has
|
||||
already been bound earlier in code generation, ie. at a lower address.
|
||||
|
||||
**Instruction range** the range of values that can be encoded in the instruction
|
||||
to be generated. Outside the instruction range, additional instructions may be
|
||||
generated to increase the range, branching further than would be possible in
|
||||
one instruction, for example.
|
||||
|
||||
**Veneer** a sequence of additional instructions produced to increase the
|
||||
instruction range.
|
||||
|
||||
**Adjusted PC** the PC including its architecturally-defined offset. In AArch32
|
||||
T32, this is the current PC plus four bytes. In AArch64, there is no adjustment;
|
||||
Adjusted PC is equal to PC.
|
||||
|
||||
AArch64
|
||||
-------
|
||||
|
||||
### Branches
|
||||
|
||||
All instructions and targets must be aligned to the instruction size, four
|
||||
bytes.
|
||||
|
||||
#### Unconditional immediate branches (`B`)
|
||||
|
||||
* Unconditional immediate branches have an instruction range of -134,217,728 to
|
||||
+134,217,724 bytes from the current PC.
|
||||
* No veneers are applied to unconditional immediate branches to extend their
|
||||
instruction range.
|
||||
* Callers can use the function `IsValidImmPCOffset(UncondBranchType, offset)` to
|
||||
check `offset` (in units of instruction) is within the instruction range.
|
||||
|
||||
#### Conditional branches (`B.cond`) and compare-and-branch (`CBZ`, `CBNZ`)
|
||||
|
||||
* Conditional branch and compare-and-branch instructions have the same
|
||||
instruction range.
|
||||
* The instruction range is -1,048,576 to +1,048,574 bytes from the current PC.
|
||||
* Veneers are applied to extend the range to -134,217,724 to +135,266,298 bytes
|
||||
from the current PC.
|
||||
* Unconditional branch range minus one instruction backwards.
|
||||
* Unconditional branch range plus conditional branch range forwards.
|
||||
* Callers can use the functions `IsValidImmPCOffset(CondBranchType, offset)` and
|
||||
`IsValidImmPCOffset(CompareBranchType, offset)` to check `offset` (in units of
|
||||
instruction) is within the instruction range.
|
||||
|
||||
#### Test-and-branch (`TBZ`, `TBNZ`)
|
||||
|
||||
* Test-and-branch instructions have an instruction range of -32,768 to 32,764
|
||||
bytes from the current PC.
|
||||
* Veneers are applied to extend the range to -134,217,728 to +135,299,062 bytes
|
||||
from the current PC.
|
||||
* Unconditional branch range minus one instruction backwards.
|
||||
* Unconditional branch range plus test-and-branch range forwards.
|
||||
* Callers can use the function `IsValidImmPCOffset(TestBranchType, offset)` to
|
||||
check `offset` (in units of instruction) is within the instruction range.
|
||||
|
||||
### Literals
|
||||
|
||||
#### Compute PC-relative address (`ADR`)
|
||||
|
||||
* Compute PC-relative address instructions have an instruction range of
|
||||
-1,048,576 to +1,048,575 bytes from the current PC.
|
||||
* No veneers are applied to extend the instruction range.
|
||||
* Callers can use `IsInt21(offset)` to check `offset` (in bytes) is within the
|
||||
instruction range.
|
||||
|
||||
#### Load from PC-relative address (`LDR`)
|
||||
|
||||
* Load from PC-relative address instructions have an instruction range of
|
||||
-1,048,576 to +1,048,572 bytes from the current PC. The offset must be four-byte
|
||||
aligned.
|
||||
* Automatically-placed literals (eg. those created by `Ldr(reg, literal_value)`)
|
||||
will be emitted into code such that they are in range of the instructions that
|
||||
refer to them.
|
||||
* Veneers are not applied to manually-placed literals, ie. those created by
|
||||
`Literal<T> x(value)` and emitted by `place()`.
|
||||
* Callers can use `IsInt19(offset)` to check `offset` (in units of instruction)
|
||||
is within the instruction range.
|
||||
|
||||
AArch32
|
||||
-------
|
||||
|
||||
Limits stated in this section relate to the T32 instruction encodings only.
|
||||
|
||||
### Branches
|
||||
|
||||
#### Unconditional immediate branches (`B`)
|
||||
|
||||
* Unconditional immediate branches have an instruction range of -16,777,216 to
|
||||
+16,777,214 bytes from the current adjusted PC.
|
||||
* Veneers are applied to forward branches to extend them to an unlimited range.
|
||||
* No veneers are applied to backward branches.
|
||||
|
||||
#### Conditional immediate branches (`B`)
|
||||
|
||||
* Conditional immediate branches have an instruction range of -1,048,576 to
|
||||
+1,048,574 bytes from the current adjusted PC.
|
||||
* Veneers are applied to forward branches to extend them to an unlimited range.
|
||||
* Veneers are applied to backward branches to extend the range to that of
|
||||
unconditional immediate branches, -16,777,216 bytes from the current adjusted
|
||||
PC.
|
||||
|
||||
#### Compare and branch (`CBZ`, `CBNZ`)
|
||||
|
||||
* Compare and branch has an instruction range of 0 to +126 bytes from the
|
||||
current adjusted PC.
|
||||
* Veneers are applied to forward branches to extend them to an unlimited range.
|
||||
* Veneers are applied to backward branches to extend the range to that of
|
||||
unconditional immediate branches, -16,777,216 bytes from the current adjusted
|
||||
PC.
|
||||
|
||||
### Literals
|
||||
|
||||
#### Compute/load PC-relative address (`ADR`, `LDR`)
|
||||
|
||||
* Compute and load PC-relative address instructions have the same instruction
|
||||
range.
|
||||
* The instruction range is -4,095 to +4,095 bytes from the current adjusted PC.
|
||||
The PC is aligned down to a four-byte boundary before the offset is added.
|
||||
* Automatically-placed literals (ie. those created by `Literal<T> x(value)`)
|
||||
will be emitted into code such that they are in range of the instructions that
|
||||
refer to them.
|
||||
* Veneers are not applied to manually-placed literals, ie. those created by
|
||||
`Literal<T> x(value, RawLiteral::kManuallyPlaced)` and emitted by `Place()`.
|
||||
|
||||
@@ -636,20 +636,15 @@ ImmediateT32::ImmediateT32(uint32_t imm) {
|
||||
}
|
||||
|
||||
|
||||
static inline uint32_t ror(uint32_t x, int i) {
|
||||
VIXL_ASSERT((0 < i) && (i < 32));
|
||||
return (x >> i) | (x << (32 - i));
|
||||
}
|
||||
|
||||
|
||||
bool ImmediateT32::IsImmediateT32(uint32_t imm) {
|
||||
/* abcdefgh abcdefgh abcdefgh abcdefgh */
|
||||
if ((imm ^ ror(imm, 8)) == 0) return true;
|
||||
if (AllBytesMatch(imm)) return true;
|
||||
/* 00000000 abcdefgh 00000000 abcdefgh */
|
||||
/* abcdefgh 00000000 abcdefgh 00000000 */
|
||||
if ((imm ^ ror(imm, 16)) == 0 &&
|
||||
(((imm & 0xff00) == 0) || ((imm & 0xff) == 0)))
|
||||
if (AllHalfwordsMatch(imm) &&
|
||||
(((imm & 0xff00) == 0) || ((imm & 0xff) == 0))) {
|
||||
return true;
|
||||
}
|
||||
/* isolate least-significant set bit */
|
||||
uint32_t lsb = imm & UnsignedNegate(imm);
|
||||
/* if imm is less than lsb*256 then it fits, but instead we test imm/256 to
|
||||
@@ -697,7 +692,7 @@ bool ImmediateA32::IsImmediateA32(uint32_t imm) {
|
||||
if (imm < 256) return true;
|
||||
/* avoid getting confused by wrapped-around bytes (this transform has no
|
||||
* effect on pass/fail results) */
|
||||
if (imm & 0xff000000) imm = ror(imm, 16);
|
||||
if (imm & 0xff000000) imm = static_cast<uint32_t>(RotateRight(imm, 16, 32));
|
||||
/* copy odd-numbered set bits into even-numbered bits immediately below, so
|
||||
* that the least-significant set bit is always an even bit */
|
||||
imm = imm | ((imm >> 1) & 0x55555555);
|
||||
|
||||
@@ -80,6 +80,8 @@ class Location : public LocationBase<int32_t> {
|
||||
#endif
|
||||
}
|
||||
|
||||
Location(Location&&) = default; // movable
|
||||
|
||||
bool IsReferenced() const { return referenced_; }
|
||||
|
||||
private:
|
||||
|
||||
@@ -1268,6 +1268,57 @@ void MacroAssembler::Delegate(InstructionType type,
|
||||
}
|
||||
|
||||
|
||||
void MacroAssembler::Delegate(InstructionType type,
|
||||
InstructionCondSizeL instruction,
|
||||
Condition cond,
|
||||
EncodingSize size,
|
||||
Location* location) {
|
||||
VIXL_ASSERT(type == kB);
|
||||
|
||||
CONTEXT_SCOPE;
|
||||
|
||||
// Apply veneer to increase range of backwards conditional branches.
|
||||
// This replaces:
|
||||
// label:
|
||||
// <instructions>
|
||||
// bcond label ; T3
|
||||
// With:
|
||||
// label:
|
||||
// <instructions>
|
||||
// binvcond skip ; T1
|
||||
// b label ; T4
|
||||
// skip:
|
||||
Location::Offset offset = location->GetLocation() -
|
||||
(GetCursorOffset() + GetArchitectureStatePCOffset());
|
||||
if (IsUsingT32() && location->IsBound() && ((offset & 0x1) == 0) &&
|
||||
!cond.Is(al) && cond.IsNotNever()) {
|
||||
// Bound locations must be earlier in the code.
|
||||
VIXL_ASSERT(offset < 0);
|
||||
|
||||
// The offset must be within range of a T4 branch, accounting for the
|
||||
// conditional branch (T1) we emit first, in order to jump over it.
|
||||
offset -= k16BitT32InstructionSizeInBytes;
|
||||
if (offset >= -16777216) {
|
||||
CodeBufferCheckScope scope(this, k16BitT32InstructionSizeInBytes +
|
||||
k32BitT32InstructionSizeInBytes);
|
||||
#ifndef PANDA_BUILD
|
||||
Label skip;
|
||||
#else
|
||||
Label skip(allocator_);
|
||||
#endif
|
||||
b(cond.Negate(), Narrow, &skip);
|
||||
b(location);
|
||||
Bind(&skip);
|
||||
return;
|
||||
} else {
|
||||
VIXL_ABORT_WITH_MSG("Conditional branch too far for veneer.\n");
|
||||
}
|
||||
}
|
||||
|
||||
Assembler::Delegate(type, instruction, cond, size, location);
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
static inline bool IsI64BitPattern(T imm) {
|
||||
for (T mask = 0xff << ((sizeof(T) - 1) * 8); mask != 0; mask >>= 8) {
|
||||
|
||||
@@ -1041,6 +1041,12 @@ ITScope(AllocatorWrapper allocator, MacroAssembler* masm,
|
||||
InstructionRL instruction,
|
||||
Register rn,
|
||||
Location* location) VIXL_OVERRIDE;
|
||||
// B
|
||||
virtual void Delegate(InstructionType type,
|
||||
InstructionCondSizeL instruction,
|
||||
Condition cond,
|
||||
EncodingSize size,
|
||||
Location* location) VIXL_OVERRIDE;
|
||||
// VMOV
|
||||
virtual void Delegate(InstructionType type,
|
||||
InstructionCondDtSSop instruction,
|
||||
|
||||
@@ -1918,6 +1918,12 @@ void Assembler::sys(int op, const Register& xt) {
|
||||
}
|
||||
|
||||
|
||||
void Assembler::sysl(int op, const Register& xt) {
|
||||
VIXL_ASSERT(xt.Is64Bits());
|
||||
Emit(SYSL | SysOp(op) | Rt(xt));
|
||||
}
|
||||
|
||||
|
||||
void Assembler::dc(DataCacheOp op, const Register& rt) {
|
||||
if (op == CVAP) VIXL_ASSERT(CPUHas(CPUFeatures::kDCPoP));
|
||||
if (op == CVADP) VIXL_ASSERT(CPUHas(CPUFeatures::kDCCVADP));
|
||||
@@ -1930,6 +1936,35 @@ void Assembler::ic(InstructionCacheOp op, const Register& rt) {
|
||||
sys(op, rt);
|
||||
}
|
||||
|
||||
void Assembler::gcspushm(const Register& rt) {
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kGCS));
|
||||
sys(GCSPUSHM, rt);
|
||||
}
|
||||
|
||||
void Assembler::gcspopm(const Register& rt) {
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kGCS));
|
||||
sysl(GCSPOPM, rt);
|
||||
}
|
||||
|
||||
|
||||
void Assembler::gcsss1(const Register& rt) {
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kGCS));
|
||||
sys(GCSSS1, rt);
|
||||
}
|
||||
|
||||
|
||||
void Assembler::gcsss2(const Register& rt) {
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kGCS));
|
||||
sysl(GCSSS2, rt);
|
||||
}
|
||||
|
||||
|
||||
void Assembler::chkfeat(const Register& rd) {
|
||||
VIXL_ASSERT(rd.Is(x16));
|
||||
USE(rd);
|
||||
hint(CHKFEAT);
|
||||
}
|
||||
|
||||
|
||||
void Assembler::hint(SystemHint code) { hint(static_cast<int>(code)); }
|
||||
|
||||
@@ -2913,6 +2948,25 @@ void Assembler::st1(const VRegister& vt, int lane, const MemOperand& dst) {
|
||||
LoadStoreStructSingle(vt, lane, dst, NEONLoadStoreSingleStructStore1);
|
||||
}
|
||||
|
||||
void Assembler::pmull(const VRegister& vd,
|
||||
const VRegister& vn,
|
||||
const VRegister& vm) {
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
|
||||
VIXL_ASSERT(AreSameFormat(vn, vm));
|
||||
VIXL_ASSERT((vn.Is8B() && vd.Is8H()) || (vn.Is1D() && vd.Is1Q()));
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kPmull1Q) || vd.Is8H());
|
||||
Emit(VFormat(vn) | NEON_PMULL | Rm(vm) | Rn(vn) | Rd(vd));
|
||||
}
|
||||
|
||||
void Assembler::pmull2(const VRegister& vd,
|
||||
const VRegister& vn,
|
||||
const VRegister& vm) {
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
|
||||
VIXL_ASSERT(AreSameFormat(vn, vm));
|
||||
VIXL_ASSERT((vn.Is16B() && vd.Is8H()) || (vn.Is2D() && vd.Is1Q()));
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kPmull1Q) || vd.Is8H());
|
||||
Emit(VFormat(vn) | NEON_PMULL2 | Rm(vm) | Rn(vn) | Rd(vd));
|
||||
}
|
||||
|
||||
void Assembler::NEON3DifferentL(const VRegister& vd,
|
||||
const VRegister& vn,
|
||||
@@ -2960,8 +3014,6 @@ void Assembler::NEON3DifferentHN(const VRegister& vd,
|
||||
|
||||
// clang-format off
|
||||
#define NEON_3DIFF_LONG_LIST(V) \
|
||||
V(pmull, NEON_PMULL, vn.IsVector() && vn.Is8B()) \
|
||||
V(pmull2, NEON_PMULL2, vn.IsVector() && vn.Is16B()) \
|
||||
V(saddl, NEON_SADDL, vn.IsVector() && vn.IsD()) \
|
||||
V(saddl2, NEON_SADDL2, vn.IsVector() && vn.IsQ()) \
|
||||
V(sabal, NEON_SABAL, vn.IsVector() && vn.IsD()) \
|
||||
@@ -4336,7 +4388,7 @@ void Assembler::sqrdmlah(const VRegister& vd,
|
||||
const VRegister& vm) {
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kNEON, CPUFeatures::kRDM));
|
||||
VIXL_ASSERT(AreSameFormat(vd, vn, vm));
|
||||
VIXL_ASSERT(vd.IsVector() || !vd.IsQ());
|
||||
VIXL_ASSERT(vd.IsLaneSizeH() || vd.IsLaneSizeS());
|
||||
|
||||
Instr format, op = NEON_SQRDMLAH;
|
||||
if (vd.IsScalar()) {
|
||||
@@ -4355,7 +4407,7 @@ void Assembler::sqrdmlsh(const VRegister& vd,
|
||||
const VRegister& vm) {
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kNEON, CPUFeatures::kRDM));
|
||||
VIXL_ASSERT(AreSameFormat(vd, vn, vm));
|
||||
VIXL_ASSERT(vd.IsVector() || !vd.IsQ());
|
||||
VIXL_ASSERT(vd.IsLaneSizeH() || vd.IsLaneSizeS());
|
||||
|
||||
Instr format, op = NEON_SQRDMLSH;
|
||||
if (vd.IsScalar()) {
|
||||
@@ -5824,6 +5876,263 @@ void Assembler::ummla(const VRegister& vd, const VRegister& vn, const VRegister&
|
||||
Emit(0x6e80a400 | Rd(vd) | Rn(vn) | Rm(vm));
|
||||
}
|
||||
|
||||
void Assembler::bcax(const VRegister& vd, const VRegister& vn, const VRegister& vm, const VRegister& va) {
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kSHA3));
|
||||
VIXL_ASSERT(vd.Is16B() && vn.Is16B() && vm.Is16B());
|
||||
|
||||
Emit(0xce200000 | Rd(vd) | Rn(vn) | Rm(vm) | Ra(va));
|
||||
}
|
||||
|
||||
void Assembler::eor3(const VRegister& vd, const VRegister& vn, const VRegister& vm, const VRegister& va) {
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kSHA3));
|
||||
VIXL_ASSERT(vd.Is16B() && vn.Is16B() && vm.Is16B() && va.Is16B());
|
||||
|
||||
Emit(0xce000000 | Rd(vd) | Rn(vn) | Rm(vm) | Ra(va));
|
||||
}
|
||||
|
||||
void Assembler::xar(const VRegister& vd, const VRegister& vn, const VRegister& vm, int rotate) {
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kSHA3));
|
||||
VIXL_ASSERT(vd.Is2D() && vn.Is2D() && vm.Is2D());
|
||||
VIXL_ASSERT(IsUint6(rotate));
|
||||
|
||||
Emit(0xce800000 | Rd(vd) | Rn(vn) | Rm(vm) | rotate << 10);
|
||||
}
|
||||
|
||||
void Assembler::rax1(const VRegister& vd, const VRegister& vn, const VRegister& vm) {
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kSHA3));
|
||||
VIXL_ASSERT(vd.Is2D() && vn.Is2D() && vm.Is2D());
|
||||
|
||||
Emit(0xce608c00 | Rd(vd) | Rn(vn) | Rm(vm));
|
||||
}
|
||||
|
||||
void Assembler::sha1c(const VRegister& vd, const VRegister& vn, const VRegister& vm) {
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kSHA1));
|
||||
VIXL_ASSERT(vd.IsQ() && vn.IsS() && vm.Is4S());
|
||||
|
||||
Emit(0x5e000000 | Rd(vd) | Rn(vn) | Rm(vm));
|
||||
}
|
||||
|
||||
void Assembler::sha1h(const VRegister& sd, const VRegister& sn) {
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kSHA1));
|
||||
VIXL_ASSERT(sd.IsS() && sn.IsS());
|
||||
|
||||
Emit(0x5e280800 | Rd(sd) | Rn(sn));
|
||||
}
|
||||
|
||||
void Assembler::sha1m(const VRegister& vd, const VRegister& vn, const VRegister& vm) {
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kSHA1));
|
||||
VIXL_ASSERT(vd.IsQ() && vn.IsS() && vm.Is4S());
|
||||
|
||||
Emit(0x5e002000 | Rd(vd) | Rn(vn) | Rm(vm));
|
||||
}
|
||||
|
||||
void Assembler::sha1p(const VRegister& vd, const VRegister& vn, const VRegister& vm) {
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kSHA1));
|
||||
VIXL_ASSERT(vd.IsQ() && vn.IsS() && vm.Is4S());
|
||||
|
||||
Emit(0x5e001000 | Rd(vd) | Rn(vn) | Rm(vm));
|
||||
}
|
||||
|
||||
void Assembler::sha1su0(const VRegister& vd, const VRegister& vn, const VRegister& vm) {
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kSHA1));
|
||||
VIXL_ASSERT(vd.Is4S() && vn.Is4S() && vm.Is4S());
|
||||
|
||||
Emit(0x5e003000 | Rd(vd) | Rn(vn) | Rm(vm));
|
||||
}
|
||||
|
||||
void Assembler::sha1su1(const VRegister& vd, const VRegister& vn) {
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kSHA1));
|
||||
VIXL_ASSERT(vd.Is4S() && vn.Is4S());
|
||||
|
||||
Emit(0x5e281800 | Rd(vd) | Rn(vn));
|
||||
}
|
||||
|
||||
void Assembler::sha256h(const VRegister& vd, const VRegister& vn, const VRegister& vm) {
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kSHA2));
|
||||
VIXL_ASSERT(vd.IsQ() && vn.IsQ() && vm.Is4S());
|
||||
|
||||
Emit(0x5e004000 | Rd(vd) | Rn(vn) | Rm(vm));
|
||||
}
|
||||
|
||||
void Assembler::sha256h2(const VRegister& vd, const VRegister& vn, const VRegister& vm) {
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kSHA2));
|
||||
VIXL_ASSERT(vd.IsQ() && vn.IsQ() && vm.Is4S());
|
||||
|
||||
Emit(0x5e005000 | Rd(vd) | Rn(vn) | Rm(vm));
|
||||
}
|
||||
|
||||
void Assembler::sha256su0(const VRegister& vd, const VRegister& vn) {
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kSHA2));
|
||||
VIXL_ASSERT(vd.Is4S() && vn.Is4S());
|
||||
|
||||
Emit(0x5e282800 | Rd(vd) | Rn(vn));
|
||||
}
|
||||
|
||||
void Assembler::sha256su1(const VRegister& vd, const VRegister& vn, const VRegister& vm) {
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kSHA2));
|
||||
VIXL_ASSERT(vd.Is4S() && vn.Is4S() && vm.Is4S());
|
||||
|
||||
Emit(0x5e006000 | Rd(vd) | Rn(vn) | Rm(vm));
|
||||
}
|
||||
|
||||
void Assembler::sha512h(const VRegister& vd, const VRegister& vn, const VRegister& vm) {
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kSHA512));
|
||||
VIXL_ASSERT(vd.IsQ() && vn.IsQ() && vm.Is2D());
|
||||
|
||||
Emit(0xce608000 | Rd(vd) | Rn(vn) | Rm(vm));
|
||||
}
|
||||
|
||||
void Assembler::sha512h2(const VRegister& vd, const VRegister& vn, const VRegister& vm) {
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kSHA512));
|
||||
VIXL_ASSERT(vd.IsQ() && vn.IsQ() && vm.Is2D());
|
||||
|
||||
Emit(0xce608400 | Rd(vd) | Rn(vn) | Rm(vm));
|
||||
}
|
||||
|
||||
void Assembler::sha512su0(const VRegister& vd, const VRegister& vn) {
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kSHA512));
|
||||
VIXL_ASSERT(vd.Is2D() && vn.Is2D());
|
||||
|
||||
Emit(0xcec08000 | Rd(vd) | Rn(vn));
|
||||
}
|
||||
|
||||
void Assembler::sha512su1(const VRegister& vd, const VRegister& vn, const VRegister& vm) {
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kSHA512));
|
||||
VIXL_ASSERT(vd.Is2D() && vn.Is2D() && vm.Is2D());
|
||||
|
||||
Emit(0xce608800 | Rd(vd) | Rn(vn) | Rm(vm));
|
||||
}
|
||||
|
||||
void Assembler::aesd(const VRegister& vd, const VRegister& vn) {
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kAES));
|
||||
VIXL_ASSERT(vd.Is16B() && vn.Is16B());
|
||||
|
||||
Emit(0x4e285800 | Rd(vd) | Rn(vn));
|
||||
}
|
||||
|
||||
void Assembler::aese(const VRegister& vd, const VRegister& vn) {
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kAES));
|
||||
VIXL_ASSERT(vd.Is16B() && vn.Is16B());
|
||||
|
||||
Emit(0x4e284800 | Rd(vd) | Rn(vn));
|
||||
}
|
||||
|
||||
void Assembler::aesimc(const VRegister& vd, const VRegister& vn) {
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kAES));
|
||||
VIXL_ASSERT(vd.Is16B() && vn.Is16B());
|
||||
|
||||
Emit(0x4e287800 | Rd(vd) | Rn(vn));
|
||||
}
|
||||
|
||||
void Assembler::aesmc(const VRegister& vd, const VRegister& vn) {
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kAES));
|
||||
VIXL_ASSERT(vd.Is16B() && vn.Is16B());
|
||||
|
||||
Emit(0x4e286800 | Rd(vd) | Rn(vn));
|
||||
}
|
||||
|
||||
void Assembler::sm3partw1(const VRegister& vd, const VRegister& vn, const VRegister& vm) {
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kSM3));
|
||||
VIXL_ASSERT(vd.Is4S() && vn.Is4S() && vm.Is4S());
|
||||
|
||||
Emit(0xce60c000 | Rd(vd) | Rn(vn) | Rm(vm));
|
||||
}
|
||||
|
||||
void Assembler::sm3partw2(const VRegister& vd, const VRegister& vn, const VRegister& vm) {
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kSM3));
|
||||
VIXL_ASSERT(vd.Is4S() && vn.Is4S() && vm.Is4S());
|
||||
|
||||
Emit(0xce60c400 | Rd(vd) | Rn(vn) | Rm(vm));
|
||||
}
|
||||
|
||||
void Assembler::sm3ss1(const VRegister& vd, const VRegister& vn, const VRegister& vm, const VRegister& va) {
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kSM3));
|
||||
VIXL_ASSERT(vd.Is4S() && vn.Is4S() && vm.Is4S() && va.Is4S());
|
||||
|
||||
Emit(0xce400000 | Rd(vd) | Rn(vn) | Rm(vm) | Ra(va));
|
||||
}
|
||||
|
||||
void Assembler::sm3tt1a(const VRegister& vd, const VRegister& vn, const VRegister& vm, int index) {
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kSM3));
|
||||
VIXL_ASSERT(vd.Is4S() && vn.Is4S() && vm.Is4S());
|
||||
VIXL_ASSERT(IsUint2(index));
|
||||
|
||||
Instr i = static_cast<uint32_t>(index) << 12;
|
||||
Emit(0xce408000 | Rd(vd) | Rn(vn) | Rm(vm) | i);
|
||||
}
|
||||
|
||||
void Assembler::sm3tt1b(const VRegister& vd, const VRegister& vn, const VRegister& vm, int index) {
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kSM3));
|
||||
VIXL_ASSERT(vd.Is4S() && vn.Is4S() && vm.Is4S());
|
||||
VIXL_ASSERT(IsUint2(index));
|
||||
|
||||
Instr i = static_cast<uint32_t>(index) << 12;
|
||||
Emit(0xce408400 | Rd(vd) | Rn(vn) | Rm(vm) | i);
|
||||
}
|
||||
|
||||
void Assembler::sm3tt2a(const VRegister& vd, const VRegister& vn, const VRegister& vm, int index) {
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kSM3));
|
||||
VIXL_ASSERT(vd.Is4S() && vn.Is4S() && vm.Is4S());
|
||||
VIXL_ASSERT(IsUint2(index));
|
||||
|
||||
Instr i = static_cast<uint32_t>(index) << 12;
|
||||
Emit(0xce408800 | Rd(vd) | Rn(vn) | Rm(vm) | i);
|
||||
}
|
||||
|
||||
void Assembler::sm3tt2b(const VRegister& vd, const VRegister& vn, const VRegister& vm, int index) {
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kSM3));
|
||||
VIXL_ASSERT(vd.Is4S() && vn.Is4S() && vm.Is4S());
|
||||
VIXL_ASSERT(IsUint2(index));
|
||||
|
||||
Instr i = static_cast<uint32_t>(index) << 12;
|
||||
Emit(0xce408c00 | Rd(vd) | Rn(vn) | Rm(vm) | i);
|
||||
}
|
||||
|
||||
void Assembler::sm4e(const VRegister& vd, const VRegister& vn) {
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kSM4));
|
||||
VIXL_ASSERT(vd.Is4S() && vn.Is4S());
|
||||
|
||||
Emit(0xcec08400 | Rd(vd) | Rn(vn));
|
||||
}
|
||||
|
||||
void Assembler::sm4ekey(const VRegister& vd, const VRegister& vn, const VRegister& vm) {
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kSM4));
|
||||
VIXL_ASSERT(vd.Is4S() && vn.Is4S() && vm.Is4S());
|
||||
|
||||
Emit(0xce60c800 | Rd(vd) | Rn(vn) | Rm(vm));
|
||||
}
|
||||
|
||||
// Note:
|
||||
// For all ToImm instructions below, a difference in case
|
||||
// for the same letter indicates a negated bit.
|
||||
@@ -6868,6 +7177,7 @@ bool Assembler::CPUHas(SystemRegister sysreg) const {
|
||||
return CPUHas(CPUFeatures::kRNG);
|
||||
case FPCR:
|
||||
case NZCV:
|
||||
case DCZID_EL0:
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
|
||||
@@ -2183,6 +2183,9 @@ class Assembler : public vixl::internal::AssemblerBase {
|
||||
// System instruction with pre-encoded op (op1:crn:crm:op2).
|
||||
void sys(int op, const Register& xt = xzr);
|
||||
|
||||
// System instruction with result.
|
||||
void sysl(int op, const Register& xt = xzr);
|
||||
|
||||
// System data cache operation.
|
||||
void dc(DataCacheOp op, const Register& rt);
|
||||
|
||||
@@ -3643,6 +3646,123 @@ class Assembler : public vixl::internal::AssemblerBase {
|
||||
// Unsigned 8-bit integer matrix multiply-accumulate (vector).
|
||||
void ummla(const VRegister& vd, const VRegister& vn, const VRegister& vm);
|
||||
|
||||
// Bit Clear and exclusive-OR.
|
||||
void bcax(const VRegister& vd,
|
||||
const VRegister& vn,
|
||||
const VRegister& vm,
|
||||
const VRegister& va);
|
||||
|
||||
// Three-way Exclusive-OR.
|
||||
void eor3(const VRegister& vd,
|
||||
const VRegister& vn,
|
||||
const VRegister& vm,
|
||||
const VRegister& va);
|
||||
|
||||
// Exclusive-OR and Rotate.
|
||||
void xar(const VRegister& vd,
|
||||
const VRegister& vn,
|
||||
const VRegister& vm,
|
||||
int rotate);
|
||||
|
||||
// Rotate and Exclusive-OR
|
||||
void rax1(const VRegister& vd, const VRegister& vn, const VRegister& vm);
|
||||
|
||||
// SHA1 hash update (choose).
|
||||
void sha1c(const VRegister& vd, const VRegister& vn, const VRegister& vm);
|
||||
|
||||
// SHA1 fixed rotate.
|
||||
void sha1h(const VRegister& sd, const VRegister& sn);
|
||||
|
||||
// SHA1 hash update (majority).
|
||||
void sha1m(const VRegister& vd, const VRegister& vn, const VRegister& vm);
|
||||
|
||||
// SHA1 hash update (parity).
|
||||
void sha1p(const VRegister& vd, const VRegister& vn, const VRegister& vm);
|
||||
|
||||
// SHA1 schedule update 0.
|
||||
void sha1su0(const VRegister& vd, const VRegister& vn, const VRegister& vm);
|
||||
|
||||
// SHA1 schedule update 1.
|
||||
void sha1su1(const VRegister& vd, const VRegister& vn);
|
||||
|
||||
// SHA256 hash update (part 1).
|
||||
void sha256h(const VRegister& vd, const VRegister& vn, const VRegister& vm);
|
||||
|
||||
// SHA256 hash update (part 2).
|
||||
void sha256h2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
|
||||
|
||||
// SHA256 schedule update 0.
|
||||
void sha256su0(const VRegister& vd, const VRegister& vn);
|
||||
|
||||
// SHA256 schedule update 1.
|
||||
void sha256su1(const VRegister& vd, const VRegister& vn, const VRegister& vm);
|
||||
|
||||
// SHA512 hash update part 1.
|
||||
void sha512h(const VRegister& vd, const VRegister& vn, const VRegister& vm);
|
||||
|
||||
// SHA512 hash update part 2.
|
||||
void sha512h2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
|
||||
|
||||
// SHA512 schedule Update 0.
|
||||
void sha512su0(const VRegister& vd, const VRegister& vn);
|
||||
|
||||
// SHA512 schedule Update 1.
|
||||
void sha512su1(const VRegister& vd, const VRegister& vn, const VRegister& vm);
|
||||
|
||||
// AES single round decryption.
|
||||
void aesd(const VRegister& vd, const VRegister& vn);
|
||||
|
||||
// AES single round encryption.
|
||||
void aese(const VRegister& vd, const VRegister& vn);
|
||||
|
||||
// AES inverse mix columns.
|
||||
void aesimc(const VRegister& vd, const VRegister& vn);
|
||||
|
||||
// AES mix columns.
|
||||
void aesmc(const VRegister& vd, const VRegister& vn);
|
||||
|
||||
// SM3PARTW1.
|
||||
void sm3partw1(const VRegister& vd, const VRegister& vn, const VRegister& vm);
|
||||
|
||||
// SM3PARTW2.
|
||||
void sm3partw2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
|
||||
|
||||
// SM3SS1.
|
||||
void sm3ss1(const VRegister& vd,
|
||||
const VRegister& vn,
|
||||
const VRegister& vm,
|
||||
const VRegister& va);
|
||||
|
||||
// SM3TT1A.
|
||||
void sm3tt1a(const VRegister& vd,
|
||||
const VRegister& vn,
|
||||
const VRegister& vm,
|
||||
int index);
|
||||
|
||||
// SM3TT1B.
|
||||
void sm3tt1b(const VRegister& vd,
|
||||
const VRegister& vn,
|
||||
const VRegister& vm,
|
||||
int index);
|
||||
|
||||
// SM3TT2A.
|
||||
void sm3tt2a(const VRegister& vd,
|
||||
const VRegister& vn,
|
||||
const VRegister& vm,
|
||||
int index);
|
||||
|
||||
// SM3TT2B.
|
||||
void sm3tt2b(const VRegister& vd,
|
||||
const VRegister& vn,
|
||||
const VRegister& vm,
|
||||
int index);
|
||||
|
||||
// SM4 Encode.
|
||||
void sm4e(const VRegister& vd, const VRegister& vn);
|
||||
|
||||
// SM4 Key.
|
||||
void sm4ekey(const VRegister& vd, const VRegister& vn, const VRegister& vm);
|
||||
|
||||
// Scalable Vector Extensions.
|
||||
|
||||
// Absolute value (predicated).
|
||||
@@ -7097,6 +7217,21 @@ class Assembler : public vixl::internal::AssemblerBase {
|
||||
// Unsigned Minimum.
|
||||
void umin(const Register& rd, const Register& rn, const Operand& op);
|
||||
|
||||
// Check feature status.
|
||||
void chkfeat(const Register& rd);
|
||||
|
||||
// Guarded Control Stack Push.
|
||||
void gcspushm(const Register& rt);
|
||||
|
||||
// Guarded Control Stack Pop.
|
||||
void gcspopm(const Register& rt);
|
||||
|
||||
// Guarded Control Stack Switch Stack 1.
|
||||
void gcsss1(const Register& rt);
|
||||
|
||||
// Guarded Control Stack Switch Stack 2.
|
||||
void gcsss2(const Register& rt);
|
||||
|
||||
// Emit generic instructions.
|
||||
|
||||
// Emit raw instructions into the instruction stream.
|
||||
@@ -7565,6 +7700,8 @@ class Assembler : public vixl::internal::AssemblerBase {
|
||||
static Instr VFormat(VRegister vd) {
|
||||
if (vd.Is64Bits()) {
|
||||
switch (vd.GetLanes()) {
|
||||
case 1:
|
||||
return NEON_1D;
|
||||
case 2:
|
||||
return NEON_2S;
|
||||
case 4:
|
||||
|
||||
@@ -7410,13 +7410,13 @@ void Assembler::pmullb(const ZRegister& zd,
|
||||
// size<23:22> | Zm<20:16> | op<12> | U<11> | T<10> | Zn<9:5> | Zd<4:0>
|
||||
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kSVEPmull128) || !zd.IsLaneSizeQ());
|
||||
VIXL_ASSERT(AreSameLaneSize(zn, zm));
|
||||
VIXL_ASSERT(!zd.IsLaneSizeB() && !zd.IsLaneSizeS());
|
||||
VIXL_ASSERT(zd.GetLaneSizeInBytes() == zn.GetLaneSizeInBytes() * 2);
|
||||
// SVEPmull128 is not supported
|
||||
VIXL_ASSERT(!zd.IsLaneSizeQ());
|
||||
Instr size = zd.IsLaneSizeQ() ? 0 : SVESize(zd);
|
||||
|
||||
Emit(0x45006800 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
|
||||
Emit(0x45006800 | size | Rd(zd) | Rn(zn) | Rm(zm));
|
||||
}
|
||||
|
||||
void Assembler::pmullt(const ZRegister& zd,
|
||||
@@ -7427,13 +7427,13 @@ void Assembler::pmullt(const ZRegister& zd,
|
||||
// size<23:22> | Zm<20:16> | op<12> | U<11> | T<10> | Zn<9:5> | Zd<4:0>
|
||||
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
|
||||
VIXL_ASSERT(CPUHas(CPUFeatures::kSVEPmull128) || !zd.IsLaneSizeQ());
|
||||
VIXL_ASSERT(AreSameLaneSize(zn, zm));
|
||||
VIXL_ASSERT(!zd.IsLaneSizeB() && !zd.IsLaneSizeS());
|
||||
VIXL_ASSERT(zd.GetLaneSizeInBytes() == zn.GetLaneSizeInBytes() * 2);
|
||||
// SVEPmull128 is not supported
|
||||
VIXL_ASSERT(!zd.IsLaneSizeQ());
|
||||
Instr size = zd.IsLaneSizeQ() ? 0 : SVESize(zd);
|
||||
|
||||
Emit(0x45006c00 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
|
||||
Emit(0x45006c00 | size | Rd(zd) | Rn(zn) | Rm(zm));
|
||||
}
|
||||
|
||||
void Assembler::raddhnb(const ZRegister& zd,
|
||||
|
||||
@@ -389,7 +389,8 @@ enum SystemHint {
|
||||
BTI = 32,
|
||||
BTI_c = 34,
|
||||
BTI_j = 36,
|
||||
BTI_jc = 38
|
||||
BTI_jc = 38,
|
||||
CHKFEAT = 40
|
||||
};
|
||||
|
||||
enum BranchTargetIdentifier {
|
||||
@@ -500,7 +501,8 @@ enum SystemRegister {
|
||||
NZCV = SystemRegisterEncoder<3, 3, 4, 2, 0>::value,
|
||||
FPCR = SystemRegisterEncoder<3, 3, 4, 4, 0>::value,
|
||||
RNDR = SystemRegisterEncoder<3, 3, 2, 4, 0>::value, // Random number.
|
||||
RNDRRS = SystemRegisterEncoder<3, 3, 2, 4, 1>::value // Reseeded random number.
|
||||
RNDRRS = SystemRegisterEncoder<3, 3, 2, 4, 1>::value, // Reseeded random number.
|
||||
DCZID_EL0 = SystemRegisterEncoder<3, 3, 0, 0, 7>::value
|
||||
};
|
||||
|
||||
template<int op1, int crn, int crm, int op2>
|
||||
@@ -534,6 +536,13 @@ enum DataCacheOp {
|
||||
CIGDVAC = CacheOpEncoder<3, 7, 14, 5>::value
|
||||
};
|
||||
|
||||
enum GCSOp {
|
||||
GCSPUSHM = CacheOpEncoder<3, 7, 7, 0>::value,
|
||||
GCSPOPM = CacheOpEncoder<3, 7, 7, 1>::value,
|
||||
GCSSS1 = CacheOpEncoder<3, 7, 7, 2>::value,
|
||||
GCSSS2 = CacheOpEncoder<3, 7, 7, 3>::value
|
||||
};
|
||||
|
||||
// Some SVE instructions support a predicate constraint pattern. This is
|
||||
// interpreted as a VL-dependent value, and is typically used to initialise
|
||||
// predicates, or to otherwise limit the number of processed elements.
|
||||
@@ -942,7 +951,8 @@ enum SystemSysOp {
|
||||
SystemSysFixed = 0xD5080000,
|
||||
SystemSysFMask = 0xFFF80000,
|
||||
SystemSysMask = 0xFFF80000,
|
||||
SYS = SystemSysFixed | 0x00000000
|
||||
SYS = SystemSysFixed | 0x00000000,
|
||||
SYSL = SystemSysFixed | 0x00200000
|
||||
};
|
||||
|
||||
// Exception.
|
||||
|
||||
@@ -244,16 +244,47 @@ void CPUFeaturesAuditor::VisitConditionalSelect(const Instruction* instr) {
|
||||
|
||||
void CPUFeaturesAuditor::VisitCrypto2RegSHA(const Instruction* instr) {
|
||||
RecordInstructionFeaturesScope scope(this);
|
||||
if (form_hash_ == "sha256su0_vv_cryptosha2"_h) {
|
||||
scope.Record(CPUFeatures::kNEON, CPUFeatures::kSHA2);
|
||||
} else {
|
||||
scope.Record(CPUFeatures::kNEON, CPUFeatures::kSHA1);
|
||||
}
|
||||
USE(instr);
|
||||
}
|
||||
|
||||
void CPUFeaturesAuditor::VisitCrypto3RegSHA(const Instruction* instr) {
|
||||
RecordInstructionFeaturesScope scope(this);
|
||||
switch (form_hash_) {
|
||||
case "sha1c_qsv_cryptosha3"_h:
|
||||
case "sha1m_qsv_cryptosha3"_h:
|
||||
case "sha1p_qsv_cryptosha3"_h:
|
||||
case "sha1su0_vvv_cryptosha3"_h:
|
||||
scope.Record(CPUFeatures::kNEON, CPUFeatures::kSHA1);
|
||||
break;
|
||||
case "sha256h_qqv_cryptosha3"_h:
|
||||
case "sha256h2_qqv_cryptosha3"_h:
|
||||
case "sha256su1_vvv_cryptosha3"_h:
|
||||
scope.Record(CPUFeatures::kNEON, CPUFeatures::kSHA2);
|
||||
break;
|
||||
}
|
||||
USE(instr);
|
||||
}
|
||||
|
||||
void CPUFeaturesAuditor::VisitCryptoAES(const Instruction* instr) {
|
||||
RecordInstructionFeaturesScope scope(this);
|
||||
scope.Record(CPUFeatures::kNEON, CPUFeatures::kAES);
|
||||
USE(instr);
|
||||
}
|
||||
|
||||
void CPUFeaturesAuditor::VisitCryptoSM3(const Instruction* instr) {
|
||||
RecordInstructionFeaturesScope scope(this);
|
||||
scope.Record(CPUFeatures::kNEON, CPUFeatures::kSM3);
|
||||
USE(instr);
|
||||
}
|
||||
|
||||
void CPUFeaturesAuditor::VisitCryptoSM4(const Instruction* instr) {
|
||||
RecordInstructionFeaturesScope scope(this);
|
||||
scope.Record(CPUFeatures::kNEON, CPUFeatures::kSM4);
|
||||
USE(instr);
|
||||
}
|
||||
|
||||
@@ -733,6 +764,12 @@ void CPUFeaturesAuditor::VisitNEON3Different(const Instruction* instr) {
|
||||
RecordInstructionFeaturesScope scope(this);
|
||||
// All of these instructions require NEON.
|
||||
scope.Record(CPUFeatures::kNEON);
|
||||
if (form_hash_ == "pmull_asimddiff_l"_h) {
|
||||
if (instr->GetNEONSize() == 3) {
|
||||
// Source is 1D or 2D, destination is 1Q.
|
||||
scope.Record(CPUFeatures::kPmull1Q);
|
||||
}
|
||||
}
|
||||
USE(instr);
|
||||
}
|
||||
|
||||
@@ -1267,91 +1304,93 @@ VIXL_SIMPLE_SVE_VISITOR_LIST(VIXL_DEFINE_SIMPLE_SVE_VISITOR)
|
||||
|
||||
void CPUFeaturesAuditor::VisitSystem(const Instruction* instr) {
|
||||
RecordInstructionFeaturesScope scope(this);
|
||||
if (instr->Mask(SystemHintFMask) == SystemHintFixed) {
|
||||
CPUFeatures required;
|
||||
switch (instr->GetInstructionBits()) {
|
||||
case PACIA1716:
|
||||
case PACIB1716:
|
||||
case AUTIA1716:
|
||||
case AUTIB1716:
|
||||
case PACIAZ:
|
||||
case PACIASP:
|
||||
case PACIBZ:
|
||||
case PACIBSP:
|
||||
case AUTIAZ:
|
||||
case AUTIASP:
|
||||
case AUTIBZ:
|
||||
case AUTIBSP:
|
||||
case XPACLRI:
|
||||
required.Combine(CPUFeatures::kPAuth);
|
||||
break;
|
||||
default:
|
||||
switch (instr->GetImmHint()) {
|
||||
case ESB:
|
||||
required.Combine(CPUFeatures::kRAS);
|
||||
break;
|
||||
case BTI:
|
||||
case BTI_j:
|
||||
case BTI_c:
|
||||
case BTI_jc:
|
||||
required.Combine(CPUFeatures::kBTI);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// These are all HINT instructions, and behave as NOPs if the corresponding
|
||||
// features are not implemented, so we record the corresponding features
|
||||
// only if they are available.
|
||||
if (available_.Has(required)) scope.Record(required);
|
||||
} else if (instr->Mask(SystemSysMask) == SYS) {
|
||||
switch (instr->GetSysOp()) {
|
||||
// DC instruction variants.
|
||||
case CGVAC:
|
||||
case CGDVAC:
|
||||
case CGVAP:
|
||||
case CGDVAP:
|
||||
case CIGVAC:
|
||||
case CIGDVAC:
|
||||
case GVA:
|
||||
case GZVA:
|
||||
scope.Record(CPUFeatures::kMTE);
|
||||
break;
|
||||
case CVAP:
|
||||
scope.Record(CPUFeatures::kDCPoP);
|
||||
break;
|
||||
case CVADP:
|
||||
scope.Record(CPUFeatures::kDCCVADP);
|
||||
break;
|
||||
case IVAU:
|
||||
case CVAC:
|
||||
case CVAU:
|
||||
case CIVAC:
|
||||
case ZVA:
|
||||
// No special CPU features.
|
||||
break;
|
||||
}
|
||||
} else if (instr->Mask(SystemPStateFMask) == SystemPStateFixed) {
|
||||
switch (instr->Mask(SystemPStateMask)) {
|
||||
case CFINV:
|
||||
scope.Record(CPUFeatures::kFlagM);
|
||||
break;
|
||||
case AXFLAG:
|
||||
case XAFLAG:
|
||||
scope.Record(CPUFeatures::kAXFlag);
|
||||
break;
|
||||
}
|
||||
} else if (instr->Mask(SystemSysRegFMask) == SystemSysRegFixed) {
|
||||
if (instr->Mask(SystemSysRegMask) == MRS) {
|
||||
CPUFeatures required;
|
||||
switch (form_hash_) {
|
||||
case "pacib1716_hi_hints"_h:
|
||||
case "pacia1716_hi_hints"_h:
|
||||
case "pacibsp_hi_hints"_h:
|
||||
case "paciasp_hi_hints"_h:
|
||||
case "pacibz_hi_hints"_h:
|
||||
case "paciaz_hi_hints"_h:
|
||||
case "autib1716_hi_hints"_h:
|
||||
case "autia1716_hi_hints"_h:
|
||||
case "autibsp_hi_hints"_h:
|
||||
case "autiasp_hi_hints"_h:
|
||||
case "autibz_hi_hints"_h:
|
||||
case "autiaz_hi_hints"_h:
|
||||
case "xpaclri_hi_hints"_h:
|
||||
required.Combine(CPUFeatures::kPAuth);
|
||||
break;
|
||||
case "esb_hi_hints"_h:
|
||||
required.Combine(CPUFeatures::kRAS);
|
||||
break;
|
||||
case "bti_hb_hints"_h:
|
||||
required.Combine(CPUFeatures::kBTI);
|
||||
break;
|
||||
}
|
||||
|
||||
// The instructions above are all HINTs and behave as NOPs if the
|
||||
// corresponding features are not implemented, so we record the corresponding
|
||||
// features only if they are available.
|
||||
if (available_.Has(required)) scope.Record(required);
|
||||
|
||||
switch (form_hash_) {
|
||||
case "cfinv_m_pstate"_h:
|
||||
scope.Record(CPUFeatures::kFlagM);
|
||||
break;
|
||||
case "axflag_m_pstate"_h:
|
||||
case "xaflag_m_pstate"_h:
|
||||
scope.Record(CPUFeatures::kAXFlag);
|
||||
break;
|
||||
case "mrs_rs_systemmove"_h:
|
||||
switch (instr->GetImmSystemRegister()) {
|
||||
case RNDR:
|
||||
case RNDRRS:
|
||||
scope.Record(CPUFeatures::kRNG);
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case "sys_cr_systeminstrs"_h:
|
||||
switch (instr->GetSysOp()) {
|
||||
// DC instruction variants.
|
||||
case CGVAC:
|
||||
case CGDVAC:
|
||||
case CGVAP:
|
||||
case CGDVAP:
|
||||
case CIGVAC:
|
||||
case CIGDVAC:
|
||||
case GVA:
|
||||
case GZVA:
|
||||
scope.Record(CPUFeatures::kMTE);
|
||||
break;
|
||||
case CVAP:
|
||||
scope.Record(CPUFeatures::kDCPoP);
|
||||
break;
|
||||
case CVADP:
|
||||
scope.Record(CPUFeatures::kDCCVADP);
|
||||
break;
|
||||
case IVAU:
|
||||
case CVAC:
|
||||
case CVAU:
|
||||
case CIVAC:
|
||||
case ZVA:
|
||||
// No special CPU features.
|
||||
break;
|
||||
case GCSPUSHM:
|
||||
case GCSSS1:
|
||||
scope.Record(CPUFeatures::kGCS);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case "sysl_rc_systeminstrs"_h:
|
||||
switch (instr->GetSysOp()) {
|
||||
case GCSPOPM:
|
||||
case GCSSS2:
|
||||
scope.Record(CPUFeatures::kGCS);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1405,9 +1444,9 @@ void CPUFeaturesAuditor::VisitUnimplemented(const Instruction* instr) {
|
||||
void CPUFeaturesAuditor::Visit(Metadata* metadata, const Instruction* instr) {
|
||||
VIXL_ASSERT(metadata->count("form") > 0);
|
||||
const std::string& form = (*metadata)["form"];
|
||||
uint32_t form_hash = Hash(form.c_str());
|
||||
form_hash_ = Hash(form.c_str());
|
||||
const FormToVisitorFnMap* fv = CPUFeaturesAuditor::GetFormToVisitorFnMap();
|
||||
FormToVisitorFnMap::const_iterator it = fv->find(form_hash);
|
||||
FormToVisitorFnMap::const_iterator it = fv->find(form_hash_);
|
||||
if (it == fv->end()) {
|
||||
RecordInstructionFeaturesScope scope(this);
|
||||
std::map<uint32_t, const CPUFeatures> features = {
|
||||
@@ -1824,10 +1863,30 @@ void CPUFeaturesAuditor::Visit(Metadata* metadata, const Instruction* instr) {
|
||||
{"umax_64u_minmax_imm"_h, CPUFeatures::kCSSC},
|
||||
{"umin_32u_minmax_imm"_h, CPUFeatures::kCSSC},
|
||||
{"umin_64u_minmax_imm"_h, CPUFeatures::kCSSC},
|
||||
{"bcax_vvv16_crypto4"_h,
|
||||
CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kSHA3)},
|
||||
{"eor3_vvv16_crypto4"_h,
|
||||
CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kSHA3)},
|
||||
{"rax1_vvv2_cryptosha512_3"_h,
|
||||
CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kSHA3)},
|
||||
{"xar_vvv2_crypto3_imm6"_h,
|
||||
CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kSHA3)},
|
||||
{"sha512h_qqv_cryptosha512_3"_h,
|
||||
CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kSHA512)},
|
||||
{"sha512h2_qqv_cryptosha512_3"_h,
|
||||
CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kSHA512)},
|
||||
{"sha512su0_vv2_cryptosha512_2"_h,
|
||||
CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kSHA512)},
|
||||
{"sha512su1_vvv2_cryptosha512_3"_h,
|
||||
CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kSHA512)},
|
||||
{"pmullb_z_zz_q"_h,
|
||||
CPUFeatures(CPUFeatures::kSVE2, CPUFeatures::kSVEPmull128)},
|
||||
{"pmullt_z_zz_q"_h,
|
||||
CPUFeatures(CPUFeatures::kSVE2, CPUFeatures::kSVEPmull128)},
|
||||
};
|
||||
|
||||
if (features.count(form_hash) > 0) {
|
||||
scope.Record(features[form_hash]);
|
||||
if (features.count(form_hash_) > 0) {
|
||||
scope.Record(features[form_hash_]);
|
||||
}
|
||||
} else {
|
||||
(it->second)(this, instr);
|
||||
|
||||
@@ -31,7 +31,7 @@
|
||||
#include <iostream>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "cpu-features.h"
|
||||
#include "../cpu-features.h"
|
||||
|
||||
#include "decoder-aarch64.h"
|
||||
#include "decoder-visitor-map-aarch64.h"
|
||||
@@ -113,6 +113,8 @@ class CPUFeaturesAuditor : public DecoderVisitor {
|
||||
#define DECLARE(A) virtual void Visit##A(const Instruction* instr);
|
||||
VISITOR_LIST(DECLARE)
|
||||
#undef DECLARE
|
||||
void VisitCryptoSM3(const Instruction* instr);
|
||||
void VisitCryptoSM4(const Instruction* instr);
|
||||
|
||||
void LoadStoreHelper(const Instruction* instr);
|
||||
void LoadStorePairHelper(const Instruction* instr);
|
||||
@@ -126,6 +128,7 @@ class CPUFeaturesAuditor : public DecoderVisitor {
|
||||
using FormToVisitorFnMap = FormToVisitorFnMapT<CPUFeaturesAuditor>;
|
||||
|
||||
static const FormToVisitorFnMap* GetFormToVisitorFnMap();
|
||||
uint32_t form_hash_;
|
||||
};
|
||||
|
||||
} // namespace aarch64
|
||||
|
||||
@@ -33,7 +33,6 @@
|
||||
#include <cstring>
|
||||
#include <errno.h>
|
||||
#include <limits>
|
||||
#include <unistd.h>
|
||||
|
||||
namespace vixl {
|
||||
namespace aarch64 {
|
||||
@@ -205,7 +204,7 @@ std::optional<Debugger::RegisterParsedFormat> Debugger::ParseRegString(
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
return {{reg_prefix, *reg_code}};
|
||||
return {{reg_prefix, static_cast<unsigned int>(*reg_code)}};
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -31,9 +31,9 @@
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
|
||||
#include "../cpu-features.h"
|
||||
#include "../globals-vixl.h"
|
||||
#include "../utils-vixl.h"
|
||||
#include "cpu-features.h"
|
||||
|
||||
#include "abi-aarch64.h"
|
||||
#include "cpu-features-auditor-aarch64.h"
|
||||
|
||||
@@ -3764,7 +3764,7 @@ static const DecodeMapping kDecodeMapping[] = {
|
||||
{"001110"_b, "autiaz_hi_hints"},
|
||||
{"001111"_b, "autibz_hi_hints"},
|
||||
{"0100xx"_b, "bti_hb_hints"},
|
||||
{"010100"_b, "chkfeat_hi_hints"},
|
||||
{"010100"_b, "chkfeat_hf_hints"},
|
||||
{"0101x1"_b, "hint_hm_hints"},
|
||||
{"01x110"_b, "hint_hm_hints"},
|
||||
{"10xxxx"_b, "hint_hm_hints"},
|
||||
|
||||
@@ -2074,7 +2074,6 @@
|
||||
{"scvtf_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \
|
||||
{"ucvtf_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \
|
||||
{"addhn_asimddiff_n"_h, &VISITORCLASS::VisitNEON3Different}, \
|
||||
{"pmull_asimddiff_l"_h, &VISITORCLASS::VisitNEON3Different}, \
|
||||
{"raddhn_asimddiff_n"_h, &VISITORCLASS::VisitNEON3Different}, \
|
||||
{"rsubhn_asimddiff_n"_h, &VISITORCLASS::VisitNEON3Different}, \
|
||||
{"sabal_asimddiff_l"_h, &VISITORCLASS::VisitNEON3Different}, \
|
||||
@@ -2592,6 +2591,7 @@
|
||||
{"dmb_bo_barriers"_h, &VISITORCLASS::VisitSystem}, \
|
||||
{"dsb_bo_barriers"_h, &VISITORCLASS::VisitSystem}, \
|
||||
{"hint_hm_hints"_h, &VISITORCLASS::VisitSystem}, \
|
||||
{"chkfeat_hf_hints"_h, &VISITORCLASS::VisitSystem}, \
|
||||
{"mrs_rs_systemmove"_h, &VISITORCLASS::VisitSystem}, \
|
||||
{"msr_sr_systemmove"_h, &VISITORCLASS::VisitSystem}, \
|
||||
{"psb_hc_hints"_h, &VISITORCLASS::VisitSystem}, \
|
||||
@@ -2638,7 +2638,6 @@
|
||||
&VISITORCLASS::VisitUnconditionalBranchToRegister}, \
|
||||
{"ret_64r_branch_reg"_h, \
|
||||
&VISITORCLASS::VisitUnconditionalBranchToRegister}, \
|
||||
{"bcax_vvv16_crypto4"_h, &VISITORCLASS::VisitUnimplemented}, \
|
||||
{"bfcvtn_asimdmisc_4s"_h, &VISITORCLASS::VisitUnimplemented}, \
|
||||
{"bfdot_asimdelem_e"_h, &VISITORCLASS::VisitUnimplemented}, \
|
||||
{"bfdot_asimdsame2_d"_h, &VISITORCLASS::VisitUnimplemented}, \
|
||||
@@ -2646,7 +2645,6 @@
|
||||
{"bfmlal_asimdsame2_f"_h, &VISITORCLASS::VisitUnimplemented}, \
|
||||
{"bfmmla_asimdsame2_e"_h, &VISITORCLASS::VisitUnimplemented}, \
|
||||
{"dsb_bon_barriers"_h, &VISITORCLASS::VisitUnimplemented}, \
|
||||
{"eor3_vvv16_crypto4"_h, &VISITORCLASS::VisitUnimplemented}, \
|
||||
{"ld64b_64l_memop"_h, &VISITORCLASS::VisitUnimplemented}, \
|
||||
{"ldgm_64bulk_ldsttags"_h, &VISITORCLASS::VisitUnimplemented}, \
|
||||
{"ldtrb_32_ldst_unpriv"_h, &VISITORCLASS::VisitUnimplemented}, \
|
||||
@@ -2658,20 +2656,15 @@
|
||||
{"ldtrsw_64_ldst_unpriv"_h, &VISITORCLASS::VisitUnimplemented}, \
|
||||
{"ldtr_32_ldst_unpriv"_h, &VISITORCLASS::VisitUnimplemented}, \
|
||||
{"ldtr_64_ldst_unpriv"_h, &VISITORCLASS::VisitUnimplemented}, \
|
||||
{"rax1_vvv2_cryptosha512_3"_h, &VISITORCLASS::VisitUnimplemented}, \
|
||||
{"sha512h2_qqv_cryptosha512_3"_h, &VISITORCLASS::VisitUnimplemented}, \
|
||||
{"sha512h_qqv_cryptosha512_3"_h, &VISITORCLASS::VisitUnimplemented}, \
|
||||
{"sha512su0_vv2_cryptosha512_2"_h, &VISITORCLASS::VisitUnimplemented}, \
|
||||
{"sha512su1_vvv2_cryptosha512_3"_h, &VISITORCLASS::VisitUnimplemented}, \
|
||||
{"sm3partw1_vvv4_cryptosha512_3"_h, &VISITORCLASS::VisitUnimplemented}, \
|
||||
{"sm3partw2_vvv4_cryptosha512_3"_h, &VISITORCLASS::VisitUnimplemented}, \
|
||||
{"sm3ss1_vvv4_crypto4"_h, &VISITORCLASS::VisitUnimplemented}, \
|
||||
{"sm3tt1a_vvv4_crypto3_imm2"_h, &VISITORCLASS::VisitUnimplemented}, \
|
||||
{"sm3tt1b_vvv4_crypto3_imm2"_h, &VISITORCLASS::VisitUnimplemented}, \
|
||||
{"sm3tt2a_vvv4_crypto3_imm2"_h, &VISITORCLASS::VisitUnimplemented}, \
|
||||
{"sm3tt2b_vvv_crypto3_imm2"_h, &VISITORCLASS::VisitUnimplemented}, \
|
||||
{"sm4ekey_vvv4_cryptosha512_3"_h, &VISITORCLASS::VisitUnimplemented}, \
|
||||
{"sm4e_vv4_cryptosha512_2"_h, &VISITORCLASS::VisitUnimplemented}, \
|
||||
{"sm3partw1_vvv4_cryptosha512_3"_h, &VISITORCLASS::VisitCryptoSM3}, \
|
||||
{"sm3partw2_vvv4_cryptosha512_3"_h, &VISITORCLASS::VisitCryptoSM3}, \
|
||||
{"sm3ss1_vvv4_crypto4"_h, &VISITORCLASS::VisitCryptoSM3}, \
|
||||
{"sm3tt1a_vvv4_crypto3_imm2"_h, &VISITORCLASS::VisitCryptoSM3}, \
|
||||
{"sm3tt1b_vvv4_crypto3_imm2"_h, &VISITORCLASS::VisitCryptoSM3}, \
|
||||
{"sm3tt2a_vvv4_crypto3_imm2"_h, &VISITORCLASS::VisitCryptoSM3}, \
|
||||
{"sm3tt2b_vvv_crypto3_imm2"_h, &VISITORCLASS::VisitCryptoSM3}, \
|
||||
{"sm4ekey_vvv4_cryptosha512_3"_h, &VISITORCLASS::VisitCryptoSM4}, \
|
||||
{"sm4e_vv4_cryptosha512_2"_h, &VISITORCLASS::VisitCryptoSM4}, \
|
||||
{"st64b_64l_memop"_h, &VISITORCLASS::VisitUnimplemented}, \
|
||||
{"st64bv_64_memop"_h, &VISITORCLASS::VisitUnimplemented}, \
|
||||
{"st64bv0_64_memop"_h, &VISITORCLASS::VisitUnimplemented}, \
|
||||
@@ -2686,7 +2679,6 @@
|
||||
{"ttest_br_systemresult"_h, &VISITORCLASS::VisitUnimplemented}, \
|
||||
{"wfet_only_systeminstrswithreg"_h, &VISITORCLASS::VisitUnimplemented}, \
|
||||
{"wfit_only_systeminstrswithreg"_h, &VISITORCLASS::VisitUnimplemented}, \
|
||||
{"xar_vvv2_crypto3_imm6"_h, &VISITORCLASS::VisitUnimplemented}, \
|
||||
{"bfcvt_z_p_z_s2bf"_h, &VISITORCLASS::VisitUnimplemented}, \
|
||||
{"bfcvtnt_z_p_z_s2bf"_h, &VISITORCLASS::VisitUnimplemented}, \
|
||||
{"bfdot_z_zzz"_h, &VISITORCLASS::VisitUnimplemented}, \
|
||||
@@ -2827,6 +2819,7 @@
|
||||
{"fmlal_asimdsame_f"_h, &VISITORCLASS::VisitNEON3Same}, \
|
||||
{"fmlsl2_asimdsame_f"_h, &VISITORCLASS::VisitNEON3Same}, \
|
||||
{"fmlsl_asimdsame_f"_h, &VISITORCLASS::VisitNEON3Same}, \
|
||||
{"pmull_asimddiff_l"_h, &VISITORCLASS::VisitNEON3Different}, \
|
||||
{"ushll_asimdshf_l"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \
|
||||
{"sshll_asimdshf_l"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \
|
||||
{"shrn_asimdshf_n"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \
|
||||
@@ -2856,22 +2849,6 @@
|
||||
&VISITORCLASS::VisitNEONScalarByIndexedElement}, \
|
||||
{"sqdmull_asisdelem_l"_h, \
|
||||
&VISITORCLASS::VisitNEONScalarByIndexedElement}, \
|
||||
{"fmla_asisdelem_rh_h"_h, \
|
||||
&VISITORCLASS::VisitNEONScalarByIndexedElement}, \
|
||||
{"fmla_asisdelem_r_sd"_h, \
|
||||
&VISITORCLASS::VisitNEONScalarByIndexedElement}, \
|
||||
{"fmls_asisdelem_rh_h"_h, \
|
||||
&VISITORCLASS::VisitNEONScalarByIndexedElement}, \
|
||||
{"fmls_asisdelem_r_sd"_h, \
|
||||
&VISITORCLASS::VisitNEONScalarByIndexedElement}, \
|
||||
{"fmulx_asisdelem_rh_h"_h, \
|
||||
&VISITORCLASS::VisitNEONScalarByIndexedElement}, \
|
||||
{"fmulx_asisdelem_r_sd"_h, \
|
||||
&VISITORCLASS::VisitNEONScalarByIndexedElement}, \
|
||||
{"fmul_asisdelem_rh_h"_h, \
|
||||
&VISITORCLASS::VisitNEONScalarByIndexedElement}, \
|
||||
{"fmul_asisdelem_r_sd"_h, \
|
||||
&VISITORCLASS::VisitNEONScalarByIndexedElement}, \
|
||||
{"fabd_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \
|
||||
{"facge_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \
|
||||
{"facgt_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \
|
||||
@@ -2944,6 +2921,22 @@
|
||||
{"frecpe_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \
|
||||
{"frecpx_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \
|
||||
{"frsqrte_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \
|
||||
{"scvtf_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, { \
|
||||
"ucvtf_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc \
|
||||
{"scvtf_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \
|
||||
{"ucvtf_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \
|
||||
{"fmla_asisdelem_rh_h"_h, \
|
||||
&VISITORCLASS::VisitNEONScalarByIndexedElement}, \
|
||||
{"fmla_asisdelem_r_sd"_h, \
|
||||
&VISITORCLASS::VisitNEONScalarByIndexedElement}, \
|
||||
{"fmls_asisdelem_rh_h"_h, \
|
||||
&VISITORCLASS::VisitNEONScalarByIndexedElement}, \
|
||||
{"fmls_asisdelem_r_sd"_h, \
|
||||
&VISITORCLASS::VisitNEONScalarByIndexedElement}, \
|
||||
{"fmulx_asisdelem_rh_h"_h, \
|
||||
&VISITORCLASS::VisitNEONScalarByIndexedElement}, \
|
||||
{"fmulx_asisdelem_r_sd"_h, \
|
||||
&VISITORCLASS::VisitNEONScalarByIndexedElement}, \
|
||||
{"fmul_asisdelem_rh_h"_h, \
|
||||
&VISITORCLASS::VisitNEONScalarByIndexedElement}, \
|
||||
{ \
|
||||
"fmul_asisdelem_r_sd"_h, &VISITORCLASS::VisitNEONScalarByIndexedElement \
|
||||
}
|
||||
|
||||
+160
-45
@@ -2024,7 +2024,7 @@ void Disassembler::DisassembleNoArgs(const Instruction *instr) {
|
||||
|
||||
void Disassembler::VisitSystem(const Instruction *instr) {
|
||||
const char *mnemonic = mnemonic_.c_str();
|
||||
const char *form = "(System)";
|
||||
const char *form = "";
|
||||
const char *suffix = NULL;
|
||||
|
||||
switch (form_hash_) {
|
||||
@@ -2053,6 +2053,10 @@ void Disassembler::VisitSystem(const Instruction *instr) {
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case "chkfeat_hf_hints"_h:
|
||||
mnemonic = "chkfeat";
|
||||
form = "x16";
|
||||
break;
|
||||
case "hint_hm_hints"_h:
|
||||
form = "'IH";
|
||||
break;
|
||||
@@ -2073,9 +2077,6 @@ void Disassembler::VisitSystem(const Instruction *instr) {
|
||||
break;
|
||||
}
|
||||
case Hash("sys_cr_systeminstrs"): {
|
||||
mnemonic = "dc";
|
||||
suffix = ", 'Xt";
|
||||
|
||||
const std::map<uint32_t, const char *> dcop = {
|
||||
{IVAU, "ivau"},
|
||||
{CVAC, "cvac"},
|
||||
@@ -2098,17 +2099,36 @@ void Disassembler::VisitSystem(const Instruction *instr) {
|
||||
if (dcop.count(sysop)) {
|
||||
if (sysop == IVAU) {
|
||||
mnemonic = "ic";
|
||||
} else {
|
||||
mnemonic = "dc";
|
||||
}
|
||||
form = dcop.at(sysop);
|
||||
suffix = ", 'Xt";
|
||||
} else if (sysop == GCSSS1) {
|
||||
mnemonic = "gcsss1";
|
||||
form = "'Xt";
|
||||
} else if (sysop == GCSPUSHM) {
|
||||
mnemonic = "gcspushm";
|
||||
form = "'Xt";
|
||||
} else {
|
||||
mnemonic = "sys";
|
||||
form = "'G1, 'Kn, 'Km, 'G2";
|
||||
if (instr->GetRt() == 31) {
|
||||
suffix = NULL;
|
||||
if (instr->GetRt() < 31) {
|
||||
suffix = ", 'Xt";
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case "sysl_rc_systeminstrs"_h:
|
||||
uint32_t sysop = instr->GetSysOp();
|
||||
if (sysop == GCSPOPM) {
|
||||
mnemonic = "gcspopm";
|
||||
form = (instr->GetRt() == 31) ? "" : "'Xt";
|
||||
} else if (sysop == GCSSS2) {
|
||||
mnemonic = "gcsss2";
|
||||
form = "'Xt";
|
||||
}
|
||||
break;
|
||||
}
|
||||
Format(instr, mnemonic, form, suffix);
|
||||
}
|
||||
@@ -2154,17 +2174,74 @@ void Disassembler::VisitException(const Instruction *instr) {
|
||||
|
||||
|
||||
void Disassembler::VisitCrypto2RegSHA(const Instruction *instr) {
|
||||
VisitUnimplemented(instr);
|
||||
const char *form = "'Vd.4s, 'Vn.4s";
|
||||
if (form_hash_ == "sha1h_ss_cryptosha2"_h) {
|
||||
form = "'Sd, 'Sn";
|
||||
}
|
||||
FormatWithDecodedMnemonic(instr, form);
|
||||
}
|
||||
|
||||
|
||||
void Disassembler::VisitCrypto3RegSHA(const Instruction *instr) {
|
||||
VisitUnimplemented(instr);
|
||||
const char *form = "'Qd, 'Sn, 'Vm.4s";
|
||||
switch (form_hash_) {
|
||||
case "sha1su0_vvv_cryptosha3"_h:
|
||||
case "sha256su1_vvv_cryptosha3"_h:
|
||||
form = "'Vd.4s, 'Vn.4s, 'Vm.4s";
|
||||
break;
|
||||
case "sha256h_qqv_cryptosha3"_h:
|
||||
case "sha256h2_qqv_cryptosha3"_h:
|
||||
form = "'Qd, 'Qn, 'Vm.4s";
|
||||
break;
|
||||
}
|
||||
FormatWithDecodedMnemonic(instr, form);
|
||||
}
|
||||
|
||||
|
||||
void Disassembler::VisitCryptoAES(const Instruction *instr) {
|
||||
VisitUnimplemented(instr);
|
||||
FormatWithDecodedMnemonic(instr, "'Vd.16b, 'Vn.16b");
|
||||
}
|
||||
|
||||
void Disassembler::VisitCryptoSM3(const Instruction *instr) {
|
||||
const char *form = "'Vd.4s, 'Vn.4s, 'Vm.";
|
||||
const char *suffix = "4s";
|
||||
|
||||
switch (form_hash_) {
|
||||
case "sm3ss1_vvv4_crypto4"_h:
|
||||
suffix = "4s, 'Va.4s";
|
||||
break;
|
||||
case "sm3tt1a_vvv4_crypto3_imm2"_h:
|
||||
case "sm3tt1b_vvv4_crypto3_imm2"_h:
|
||||
case "sm3tt2a_vvv4_crypto3_imm2"_h:
|
||||
case "sm3tt2b_vvv_crypto3_imm2"_h:
|
||||
suffix = "s['u1312]";
|
||||
break;
|
||||
}
|
||||
|
||||
FormatWithDecodedMnemonic(instr, form, suffix);
|
||||
}
|
||||
|
||||
void Disassembler::VisitCryptoSM4(const Instruction *instr) {
|
||||
VIXL_ASSERT((form_hash_ == "sm4ekey_vvv4_cryptosha512_3"_h) ||
|
||||
(form_hash_ == "sm4e_vv4_cryptosha512_2"_h));
|
||||
const char *form = "'Vd.4s, 'Vn.4s";
|
||||
const char *suffix =
|
||||
(form_hash_ == "sm4e_vv4_cryptosha512_2"_h) ? NULL : ", 'Vm.4s";
|
||||
|
||||
FormatWithDecodedMnemonic(instr, form, suffix);
|
||||
}
|
||||
|
||||
void Disassembler::DisassembleSHA512(const Instruction *instr) {
|
||||
const char *form = "'Qd, 'Qn, 'Vm.2d";
|
||||
const char *suffix = NULL;
|
||||
switch (form_hash_) {
|
||||
case "sha512su1_vvv2_cryptosha512_3"_h:
|
||||
suffix = ", 'Vm.2d";
|
||||
VIXL_FALLTHROUGH();
|
||||
case "sha512su0_vv2_cryptosha512_2"_h:
|
||||
form = "'Vd.2d, 'Vn.2d";
|
||||
}
|
||||
FormatWithDecodedMnemonic(instr, form, suffix);
|
||||
}
|
||||
|
||||
void Disassembler::DisassembleNEON2RegAddlp(const Instruction *instr) {
|
||||
@@ -2380,13 +2457,19 @@ void Disassembler::VisitNEON3SameFP16(const Instruction *instr) {
|
||||
}
|
||||
|
||||
void Disassembler::VisitNEON3SameExtra(const Instruction *instr) {
|
||||
static const NEONFormatMap map_usdot = {{30}, {NF_8B, NF_16B}};
|
||||
static const NEONFormatMap map_dot =
|
||||
{{23, 22, 30}, {NF_UNDEF, NF_UNDEF, NF_UNDEF, NF_UNDEF, NF_2S, NF_4S}};
|
||||
static const NEONFormatMap map_fc =
|
||||
{{23, 22, 30},
|
||||
{NF_UNDEF, NF_UNDEF, NF_4H, NF_8H, NF_2S, NF_4S, NF_UNDEF, NF_2D}};
|
||||
static const NEONFormatMap map_rdm =
|
||||
{{23, 22, 30}, {NF_UNDEF, NF_UNDEF, NF_4H, NF_8H, NF_2S, NF_4S}};
|
||||
|
||||
const char *mnemonic = mnemonic_.c_str();
|
||||
const char *form = "'Vd.%s, 'Vn.%s, 'Vm.%s";
|
||||
const char *suffix = NULL;
|
||||
|
||||
NEONFormatDecoder nfd(instr);
|
||||
NEONFormatDecoder nfd(instr, &map_fc);
|
||||
|
||||
switch (form_hash_) {
|
||||
case "fcmla_asimdsame2_c"_h:
|
||||
@@ -2399,17 +2482,28 @@ void Disassembler::VisitNEON3SameExtra(const Instruction *instr) {
|
||||
case "sdot_asimdsame2_d"_h:
|
||||
case "udot_asimdsame2_d"_h:
|
||||
case "usdot_asimdsame2_d"_h:
|
||||
nfd.SetFormatMap(1, &map_usdot);
|
||||
nfd.SetFormatMap(2, &map_usdot);
|
||||
nfd.SetFormatMaps(nfd.LogicalFormatMap());
|
||||
nfd.SetFormatMap(0, &map_dot);
|
||||
break;
|
||||
default:
|
||||
// sqrdml[as]h - nothing to do.
|
||||
nfd.SetFormatMaps(&map_rdm);
|
||||
break;
|
||||
}
|
||||
|
||||
Format(instr, mnemonic, nfd.Substitute(form), suffix);
|
||||
}
|
||||
|
||||
void Disassembler::DisassembleNEON4Same(const Instruction *instr) {
|
||||
FormatWithDecodedMnemonic(instr, "'Vd.16b, 'Vn.16b, 'Vm.16b, 'Va.16b");
|
||||
}
|
||||
|
||||
void Disassembler::DisassembleNEONXar(const Instruction *instr) {
|
||||
FormatWithDecodedMnemonic(instr, "'Vd.2d, 'Vn.2d, 'Vm.2d, #'u1510");
|
||||
}
|
||||
|
||||
void Disassembler::DisassembleNEONRax1(const Instruction *instr) {
|
||||
FormatWithDecodedMnemonic(instr, "'Vd.2d, 'Vn.2d, 'Vm.2d");
|
||||
}
|
||||
|
||||
void Disassembler::VisitNEON3Different(const Instruction *instr) {
|
||||
const char *mnemonic = mnemonic_.c_str();
|
||||
@@ -2432,11 +2526,6 @@ void Disassembler::VisitNEON3Different(const Instruction *instr) {
|
||||
nfd.SetFormatMaps(nfd.LongIntegerFormatMap());
|
||||
nfd.SetFormatMap(0, nfd.IntegerFormatMap());
|
||||
break;
|
||||
case "pmull_asimddiff_l"_h:
|
||||
if (nfd.GetVectorFormat(0) != kFormat8H) {
|
||||
mnemonic = NULL;
|
||||
}
|
||||
break;
|
||||
case "sqdmlal_asimddiff_l"_h:
|
||||
case "sqdmlsl_asimddiff_l"_h:
|
||||
case "sqdmull_asimddiff_l"_h:
|
||||
@@ -2448,6 +2537,22 @@ void Disassembler::VisitNEON3Different(const Instruction *instr) {
|
||||
Format(instr, nfd.Mnemonic(mnemonic), nfd.Substitute(form));
|
||||
}
|
||||
|
||||
void Disassembler::DisassembleNEONPolynomialMul(const Instruction *instr) {
|
||||
const char *mnemonic = instr->ExtractBit(30) ? "pmull2" : "pmull";
|
||||
const char *form = NULL;
|
||||
int size = instr->ExtractBits(23, 22);
|
||||
if (size == 0) {
|
||||
// Bits 30:27 of the instruction are x001, where x is the Q bit. Map
|
||||
// this to "8" and "16" by adding 7.
|
||||
form = "'Vd.8h, 'Vn.'u3127+7b, 'Vm.'u3127+7b";
|
||||
} else if (size == 3) {
|
||||
form = "'Vd.1q, 'Vn.'?30:21d, 'Vm.'?30:21d";
|
||||
} else {
|
||||
mnemonic = NULL;
|
||||
}
|
||||
Format(instr, mnemonic, form);
|
||||
}
|
||||
|
||||
void Disassembler::DisassembleNEONFPAcrossLanes(const Instruction *instr) {
|
||||
const char *mnemonic = mnemonic_.c_str();
|
||||
const char *form = "'Sd, 'Vn.4s";
|
||||
@@ -2624,10 +2729,10 @@ void Disassembler::VisitNEONExtract(const Instruction *instr) {
|
||||
void Disassembler::VisitNEONLoadStoreMultiStruct(const Instruction *instr) {
|
||||
const char *mnemonic = NULL;
|
||||
const char *form = NULL;
|
||||
const char *form_1v = "{'Vt.%1$s}, ['Xns]";
|
||||
const char *form_2v = "{'Vt.%1$s, 'Vt2.%1$s}, ['Xns]";
|
||||
const char *form_3v = "{'Vt.%1$s, 'Vt2.%1$s, 'Vt3.%1$s}, ['Xns]";
|
||||
const char *form_4v = "{'Vt.%1$s, 'Vt2.%1$s, 'Vt3.%1$s, 'Vt4.%1$s}, ['Xns]";
|
||||
const char *form_1v = "{'Vt.%s}, ['Xns]";
|
||||
const char *form_2v = "{'Vt.%s, 'Vt2.%s}, ['Xns]";
|
||||
const char *form_3v = "{'Vt.%s, 'Vt2.%s, 'Vt3.%s}, ['Xns]";
|
||||
const char *form_4v = "{'Vt.%s, 'Vt2.%s, 'Vt3.%s, 'Vt4.%s}, ['Xns]";
|
||||
NEONFormatDecoder nfd(instr, NEONFormatDecoder::LoadStoreFormatMap());
|
||||
|
||||
switch (instr->Mask(NEONLoadStoreMultiStructMask)) {
|
||||
@@ -2722,11 +2827,10 @@ void Disassembler::VisitNEONLoadStoreMultiStructPostIndex(
|
||||
const Instruction *instr) {
|
||||
const char *mnemonic = NULL;
|
||||
const char *form = NULL;
|
||||
const char *form_1v = "{'Vt.%1$s}, ['Xns], 'Xmr1";
|
||||
const char *form_2v = "{'Vt.%1$s, 'Vt2.%1$s}, ['Xns], 'Xmr2";
|
||||
const char *form_3v = "{'Vt.%1$s, 'Vt2.%1$s, 'Vt3.%1$s}, ['Xns], 'Xmr3";
|
||||
const char *form_4v =
|
||||
"{'Vt.%1$s, 'Vt2.%1$s, 'Vt3.%1$s, 'Vt4.%1$s}, ['Xns], 'Xmr4";
|
||||
const char *form_1v = "{'Vt.%s}, ['Xns], 'Xmr1";
|
||||
const char *form_2v = "{'Vt.%s, 'Vt2.%s}, ['Xns], 'Xmr2";
|
||||
const char *form_3v = "{'Vt.%s, 'Vt2.%s, 'Vt3.%s}, ['Xns], 'Xmr3";
|
||||
const char *form_4v = "{'Vt.%s, 'Vt2.%s, 'Vt3.%s, 'Vt4.%s}, ['Xns], 'Xmr4";
|
||||
NEONFormatDecoder nfd(instr, NEONFormatDecoder::LoadStoreFormatMap());
|
||||
|
||||
switch (instr->Mask(NEONLoadStoreMultiStructPostIndexMask)) {
|
||||
@@ -2929,7 +3033,7 @@ void Disassembler::VisitNEONLoadStoreSingleStruct(const Instruction *instr) {
|
||||
break;
|
||||
case NEON_LD4R:
|
||||
mnemonic = "ld4r";
|
||||
form = "{'Vt.%1$s, 'Vt2.%1$s, 'Vt3.%1$s, 'Vt4.%1$s}, ['Xns]";
|
||||
form = "{'Vt.%s, 'Vt2.%s, 'Vt3.%s, 'Vt4.%s}, ['Xns]";
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
@@ -3089,7 +3193,7 @@ void Disassembler::VisitNEONLoadStoreSingleStructPostIndex(
|
||||
break;
|
||||
case NEON_LD4R_post:
|
||||
mnemonic = "ld4r";
|
||||
form = "{'Vt.%1$s, 'Vt2.%1$s, 'Vt3.%1$s, 'Vt4.%1$s}, ['Xns], 'Xmz4";
|
||||
form = "{'Vt.%s, 'Vt2.%s, 'Vt3.%s, 'Vt4.%s}, ['Xns], 'Xmz4";
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
@@ -3305,6 +3409,8 @@ void Disassembler::VisitNEONScalar3Same(const Instruction *instr) {
|
||||
break;
|
||||
case "sqdmulh_asisdsame_only"_h:
|
||||
case "sqrdmulh_asisdsame_only"_h:
|
||||
case "sqrdmlah_asisdsame2_only"_h:
|
||||
case "sqrdmlsh_asisdsame2_only"_h:
|
||||
if ((vform == kFormatB) || (vform == kFormatD)) {
|
||||
mnemonic = NULL;
|
||||
}
|
||||
@@ -3923,8 +4029,7 @@ static bool SVEMoveMaskPreferred(uint64_t value, int lane_bytes_log2) {
|
||||
}
|
||||
|
||||
// Check 0x0000pq00_0000pq00 or 0xffffpq00_ffffpq00.
|
||||
uint64_t rotvalue = RotateRight(value, 32, 64);
|
||||
if (value == rotvalue) {
|
||||
if (AllWordsMatch(value)) {
|
||||
generic_value &= 0xffffffff;
|
||||
if ((generic_value == 0xffff) || (generic_value == UINT32_MAX)) {
|
||||
return false;
|
||||
@@ -3932,8 +4037,7 @@ static bool SVEMoveMaskPreferred(uint64_t value, int lane_bytes_log2) {
|
||||
}
|
||||
|
||||
// Check 0xpq00pq00_pq00pq00.
|
||||
rotvalue = RotateRight(value, 16, 64);
|
||||
if (value == rotvalue) {
|
||||
if (AllHalfwordsMatch(value)) {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
@@ -3947,8 +4051,7 @@ static bool SVEMoveMaskPreferred(uint64_t value, int lane_bytes_log2) {
|
||||
}
|
||||
|
||||
// Check 0x000000pq_000000pq or 0xffffffpq_ffffffpq.
|
||||
uint64_t rotvalue = RotateRight(value, 32, 64);
|
||||
if (value == rotvalue) {
|
||||
if (AllWordsMatch(value)) {
|
||||
generic_value &= 0xffffffff;
|
||||
if ((generic_value == 0xff) || (generic_value == UINT32_MAX)) {
|
||||
return false;
|
||||
@@ -3956,8 +4059,7 @@ static bool SVEMoveMaskPreferred(uint64_t value, int lane_bytes_log2) {
|
||||
}
|
||||
|
||||
// Check 0x00pq00pq_00pq00pq or 0xffpqffpq_ffpqffpq.
|
||||
rotvalue = RotateRight(value, 16, 64);
|
||||
if (value == rotvalue) {
|
||||
if (AllHalfwordsMatch(value)) {
|
||||
generic_value &= 0xffff;
|
||||
if ((generic_value == 0xff) || (generic_value == UINT16_MAX)) {
|
||||
return false;
|
||||
@@ -3965,8 +4067,7 @@ static bool SVEMoveMaskPreferred(uint64_t value, int lane_bytes_log2) {
|
||||
}
|
||||
|
||||
// Check 0xpqpqpqpq_pqpqpqpq.
|
||||
rotvalue = RotateRight(value, 8, 64);
|
||||
if (value == rotvalue) {
|
||||
if (AllBytesMatch(value)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@@ -5748,15 +5849,26 @@ void Disassembler::Disassemble_ZdT_ZnTb(const Instruction *instr) {
|
||||
}
|
||||
}
|
||||
|
||||
void Disassembler::DisassembleSVEPmull(const Instruction *instr) {
|
||||
if (instr->GetSVEVectorFormat() == kFormatVnS) {
|
||||
VisitUnallocated(instr);
|
||||
} else {
|
||||
Disassemble_ZdT_ZnTb_ZmTb(instr);
|
||||
}
|
||||
}
|
||||
|
||||
void Disassembler::DisassembleSVEPmull128(const Instruction *instr) {
|
||||
FormatWithDecodedMnemonic(instr, "'Zd.q, 'Zn.d, 'Zm.d");
|
||||
}
|
||||
|
||||
void Disassembler::Disassemble_ZdT_ZnTb_ZmTb(const Instruction *instr) {
|
||||
const char *form = "'Zd.'t, 'Zn.'th, 'Zm.'th";
|
||||
if (instr->GetSVEVectorFormat() == kFormatVnB) {
|
||||
// TODO: This is correct for saddlbt, ssublbt, subltb, which don't have
|
||||
// b-lane sized form, and for pmull[b|t] as feature `SVEPmull128` isn't
|
||||
// supported, but may need changes for other instructions reaching here.
|
||||
// b-lane sized form, but may need changes for other instructions reaching
|
||||
// here.
|
||||
Format(instr, "unimplemented", "(ZdT_ZnTb_ZmTb)");
|
||||
} else {
|
||||
Format(instr, mnemonic_.c_str(), form);
|
||||
FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Zn.'th, 'Zm.'th");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6908,6 +7020,9 @@ int Disassembler::SubstituteImmediateField(const Instruction *instr,
|
||||
case RNDRRS:
|
||||
AppendToOutput("rndrrs");
|
||||
break;
|
||||
case DCZID_EL0:
|
||||
AppendToOutput("dczid_el0");
|
||||
break;
|
||||
default:
|
||||
AppendToOutput("S%d_%d_c%d_c%d_%d",
|
||||
instr->GetSysOp0(),
|
||||
|
||||
@@ -206,6 +206,8 @@ class Disassembler : public DecoderVisitor {
|
||||
void DisassembleSVEBitwiseTernary(const Instruction* instr);
|
||||
void DisassembleSVEFlogb(const Instruction* instr);
|
||||
void DisassembleSVEFPPair(const Instruction* instr);
|
||||
void DisassembleSVEPmull(const Instruction* instr);
|
||||
void DisassembleSVEPmull128(const Instruction* instr);
|
||||
|
||||
void DisassembleNoArgs(const Instruction* instr);
|
||||
|
||||
@@ -238,6 +240,11 @@ class Disassembler : public DecoderVisitor {
|
||||
void DisassembleNEONScalarShiftRightNarrowImm(const Instruction* instr);
|
||||
void DisassembleNEONScalar2RegMiscOnlyD(const Instruction* instr);
|
||||
void DisassembleNEONFPScalar2RegMisc(const Instruction* instr);
|
||||
void DisassembleNEONPolynomialMul(const Instruction* instr);
|
||||
void DisassembleNEON4Same(const Instruction* instr);
|
||||
void DisassembleNEONXar(const Instruction* instr);
|
||||
void DisassembleNEONRax1(const Instruction* instr);
|
||||
void DisassembleSHA512(const Instruction* instr);
|
||||
|
||||
void DisassembleMTELoadTag(const Instruction* instr);
|
||||
void DisassembleMTEStoreTag(const Instruction* instr);
|
||||
@@ -248,6 +255,9 @@ class Disassembler : public DecoderVisitor {
|
||||
void Disassemble_Xd_XnSP_Xm(const Instruction* instr);
|
||||
void Disassemble_Xd_XnSP_XmSP(const Instruction* instr);
|
||||
|
||||
void VisitCryptoSM3(const Instruction* instr);
|
||||
void VisitCryptoSM4(const Instruction* instr);
|
||||
|
||||
void Format(const Instruction* instr,
|
||||
const char* mnemonic,
|
||||
const char* format0,
|
||||
|
||||
@@ -603,6 +603,28 @@ std::pair<int, int> Instruction::GetSVEMulLongZmAndIndex() const {
|
||||
return std::make_pair(reg_code, index);
|
||||
}
|
||||
|
||||
// Get the register and index for NEON indexed multiplies.
|
||||
std::pair<int, int> Instruction::GetNEONMulRmAndIndex() const {
|
||||
int reg_code = GetRm();
|
||||
int index = (GetNEONH() << 2) | (GetNEONL() << 1) | GetNEONM();
|
||||
switch (GetNEONSize()) {
|
||||
case 0: // FP H-sized elements.
|
||||
case 1: // Integer H-sized elements.
|
||||
// 4-bit Rm, 3-bit index.
|
||||
reg_code &= 0xf;
|
||||
break;
|
||||
case 2: // S-sized elements.
|
||||
// 5-bit Rm, 2-bit index.
|
||||
index >>= 1;
|
||||
break;
|
||||
case 3: // FP D-sized elements.
|
||||
// 5-bit Rm, 1-bit index.
|
||||
index >>= 2;
|
||||
break;
|
||||
}
|
||||
return std::make_pair(reg_code, index);
|
||||
}
|
||||
|
||||
// Logical immediates can't encode zero, so a return value of zero is used to
|
||||
// indicate a failure case. Specifically, where the constraints on imm_s are
|
||||
// not met.
|
||||
@@ -1011,6 +1033,8 @@ VectorFormat VectorFormatHalfWidth(VectorFormat vform) {
|
||||
return kFormat4H;
|
||||
case kFormat2D:
|
||||
return kFormat2S;
|
||||
case kFormat1Q:
|
||||
return kFormat1D;
|
||||
case kFormatH:
|
||||
return kFormatB;
|
||||
case kFormatS:
|
||||
@@ -1023,6 +1047,8 @@ VectorFormat VectorFormatHalfWidth(VectorFormat vform) {
|
||||
return kFormatVnH;
|
||||
case kFormatVnD:
|
||||
return kFormatVnS;
|
||||
case kFormatVnQ:
|
||||
return kFormatVnD;
|
||||
default:
|
||||
VIXL_UNREACHABLE();
|
||||
return kFormatUndefined;
|
||||
@@ -1095,6 +1121,8 @@ VectorFormat VectorFormatHalfWidthDoubleLanes(VectorFormat vform) {
|
||||
return kFormat2S;
|
||||
case kFormat2D:
|
||||
return kFormat4S;
|
||||
case kFormat1Q:
|
||||
return kFormat2D;
|
||||
case kFormatVnH:
|
||||
return kFormatVnB;
|
||||
case kFormatVnS:
|
||||
@@ -1246,6 +1274,7 @@ unsigned RegisterSizeInBitsFromFormat(VectorFormat vform) {
|
||||
case kFormat8H:
|
||||
case kFormat4S:
|
||||
case kFormat2D:
|
||||
case kFormat1Q:
|
||||
return kQRegSize;
|
||||
default:
|
||||
VIXL_UNREACHABLE();
|
||||
@@ -1283,6 +1312,7 @@ unsigned LaneSizeInBitsFromFormat(VectorFormat vform) {
|
||||
case kFormat2D:
|
||||
case kFormatVnD:
|
||||
return 64;
|
||||
case kFormat1Q:
|
||||
case kFormatVnQ:
|
||||
return 128;
|
||||
case kFormatVnO:
|
||||
@@ -1348,6 +1378,7 @@ int LaneCountFromFormat(VectorFormat vform) {
|
||||
case kFormat2D:
|
||||
return 2;
|
||||
case kFormat1D:
|
||||
case kFormat1Q:
|
||||
case kFormatB:
|
||||
case kFormatH:
|
||||
case kFormatS:
|
||||
|
||||
@@ -217,9 +217,10 @@ enum VectorFormat {
|
||||
kFormatVnQ = kFormatSVEQ | kFormatSVE,
|
||||
kFormatVnO = kFormatSVEO | kFormatSVE,
|
||||
|
||||
// An artificial value, used by simulator trace tests and a few oddball
|
||||
// Artificial values, used by simulator trace tests and a few oddball
|
||||
// instructions (such as FMLAL).
|
||||
kFormat2H = 0xfffffffe
|
||||
kFormat2H = 0xfffffffe,
|
||||
kFormat1Q = 0xfffffffd
|
||||
};
|
||||
|
||||
// Instructions. ---------------------------------------------------------------
|
||||
@@ -372,6 +373,7 @@ class Instruction {
|
||||
|
||||
std::pair<int, int> GetSVEPermuteIndexAndLaneSizeLog2() const;
|
||||
|
||||
std::pair<int, int> GetNEONMulRmAndIndex() const;
|
||||
std::pair<int, int> GetSVEMulZmAndIndex() const;
|
||||
std::pair<int, int> GetSVEMulLongZmAndIndex() const;
|
||||
|
||||
@@ -854,11 +856,13 @@ class NEONFormatDecoder {
|
||||
// Set the format mapping for all or individual substitutions.
|
||||
void SetFormatMaps(const NEONFormatMap* format0,
|
||||
const NEONFormatMap* format1 = NULL,
|
||||
const NEONFormatMap* format2 = NULL) {
|
||||
const NEONFormatMap* format2 = NULL,
|
||||
const NEONFormatMap* format3 = NULL) {
|
||||
VIXL_ASSERT(format0 != NULL);
|
||||
formats_[0] = format0;
|
||||
formats_[1] = (format1 == NULL) ? formats_[0] : format1;
|
||||
formats_[2] = (format2 == NULL) ? formats_[1] : format2;
|
||||
formats_[3] = (format3 == NULL) ? formats_[2] : format3;
|
||||
}
|
||||
void SetFormatMap(unsigned index, const NEONFormatMap* format) {
|
||||
VIXL_ASSERT(index <= ArrayLength(formats_));
|
||||
@@ -877,12 +881,15 @@ class NEONFormatDecoder {
|
||||
const char* Substitute(const char* string,
|
||||
SubstitutionMode mode0 = kFormat,
|
||||
SubstitutionMode mode1 = kFormat,
|
||||
SubstitutionMode mode2 = kFormat) {
|
||||
SubstitutionMode mode2 = kFormat,
|
||||
SubstitutionMode mode3 = kFormat) {
|
||||
const char* subst0 = GetSubstitute(0, mode0);
|
||||
const char* subst1 = GetSubstitute(1, mode1);
|
||||
const char* subst2 = GetSubstitute(2, mode2);
|
||||
const char* subst3 = GetSubstitute(3, mode3);
|
||||
|
||||
if ((subst0 == NULL) || (subst1 == NULL) || (subst2 == NULL)) {
|
||||
if ((subst0 == NULL) || (subst1 == NULL) || (subst2 == NULL) ||
|
||||
(subst3 == NULL)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@@ -891,7 +898,8 @@ class NEONFormatDecoder {
|
||||
string,
|
||||
subst0,
|
||||
subst1,
|
||||
subst2);
|
||||
subst2,
|
||||
subst3);
|
||||
return form_buffer_;
|
||||
}
|
||||
|
||||
@@ -1129,7 +1137,7 @@ class NEONFormatDecoder {
|
||||
}
|
||||
|
||||
Instr instrbits_;
|
||||
const NEONFormatMap* formats_[3];
|
||||
const NEONFormatMap* formats_[4];
|
||||
char form_buffer_[64];
|
||||
char mne_buffer_[16];
|
||||
};
|
||||
|
||||
+888
-188
File diff suppressed because it is too large
Load Diff
@@ -1240,11 +1240,14 @@ void MacroAssembler::Ccmp(const Register& rn,
|
||||
StatusFlags nzcv,
|
||||
Condition cond) {
|
||||
VIXL_ASSERT(allow_macro_instructions_);
|
||||
if (operand.IsImmediate() && (operand.GetImmediate() < 0)) {
|
||||
ConditionalCompareMacro(rn, -operand.GetImmediate(), nzcv, cond, CCMN);
|
||||
} else {
|
||||
ConditionalCompareMacro(rn, operand, nzcv, cond, CCMP);
|
||||
if (operand.IsImmediate()) {
|
||||
int64_t imm = operand.GetImmediate();
|
||||
if ((imm < 0) && CanBeNegated(imm)) {
|
||||
ConditionalCompareMacro(rn, -imm, nzcv, cond, CCMN);
|
||||
return;
|
||||
}
|
||||
}
|
||||
ConditionalCompareMacro(rn, operand, nzcv, cond, CCMP);
|
||||
}
|
||||
|
||||
|
||||
@@ -1253,11 +1256,14 @@ void MacroAssembler::Ccmn(const Register& rn,
|
||||
StatusFlags nzcv,
|
||||
Condition cond) {
|
||||
VIXL_ASSERT(allow_macro_instructions_);
|
||||
if (operand.IsImmediate() && (operand.GetImmediate() < 0)) {
|
||||
ConditionalCompareMacro(rn, -operand.GetImmediate(), nzcv, cond, CCMP);
|
||||
} else {
|
||||
ConditionalCompareMacro(rn, operand, nzcv, cond, CCMN);
|
||||
if (operand.IsImmediate()) {
|
||||
int64_t imm = operand.GetImmediate();
|
||||
if ((imm < 0) && CanBeNegated(imm)) {
|
||||
ConditionalCompareMacro(rn, -imm, nzcv, cond, CCMP);
|
||||
return;
|
||||
}
|
||||
}
|
||||
ConditionalCompareMacro(rn, operand, nzcv, cond, CCMN);
|
||||
}
|
||||
|
||||
|
||||
@@ -1491,8 +1497,7 @@ void MacroAssembler::Add(const Register& rd,
|
||||
VIXL_ASSERT(allow_macro_instructions_);
|
||||
if (operand.IsImmediate()) {
|
||||
int64_t imm = operand.GetImmediate();
|
||||
if ((imm < 0) && (imm != std::numeric_limits<int64_t>::min()) &&
|
||||
IsImmAddSub(-imm)) {
|
||||
if ((imm < 0) && CanBeNegated(imm) && IsImmAddSub(-imm)) {
|
||||
AddSubMacro(rd, rn, -imm, S, SUB);
|
||||
return;
|
||||
}
|
||||
@@ -1579,8 +1584,7 @@ void MacroAssembler::Sub(const Register& rd,
|
||||
VIXL_ASSERT(allow_macro_instructions_);
|
||||
if (operand.IsImmediate()) {
|
||||
int64_t imm = operand.GetImmediate();
|
||||
if ((imm < 0) && (imm != std::numeric_limits<int64_t>::min()) &&
|
||||
IsImmAddSub(-imm)) {
|
||||
if ((imm < 0) && CanBeNegated(imm) && IsImmAddSub(-imm)) {
|
||||
AddSubMacro(rd, rn, -imm, S, ADD);
|
||||
return;
|
||||
}
|
||||
@@ -1749,7 +1753,7 @@ void MacroAssembler::Fmov(VRegister vd, Float16 imm) {
|
||||
|
||||
void MacroAssembler::Neg(const Register& rd, const Operand& operand) {
|
||||
VIXL_ASSERT(allow_macro_instructions_);
|
||||
if (operand.IsImmediate()) {
|
||||
if (operand.IsImmediate() && CanBeNegated(operand.GetImmediate())) {
|
||||
Mov(rd, -operand.GetImmediate());
|
||||
} else {
|
||||
Sub(rd, AppropriateZeroRegFor(rd), operand);
|
||||
@@ -2065,6 +2069,22 @@ void MacroAssembler::Setf16(const Register& wn) {
|
||||
setf16(wn);
|
||||
}
|
||||
|
||||
void MacroAssembler::Chkfeat(const Register& xdn) {
|
||||
VIXL_ASSERT(allow_macro_instructions_);
|
||||
MacroEmissionCheckScope guard(this);
|
||||
if (xdn.Is(x16)) {
|
||||
chkfeat(xdn);
|
||||
} else {
|
||||
UseScratchRegisterScope temps(this);
|
||||
if (temps.TryAcquire(x16)) {
|
||||
Mov(x16, xdn);
|
||||
chkfeat(x16);
|
||||
Mov(xdn, x16);
|
||||
} else {
|
||||
VIXL_ABORT();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#define DEFINE_FUNCTION(FN, REGTYPE, REG, OP) \
|
||||
void MacroAssembler::FN(const REGTYPE REG, const MemOperand& addr) { \
|
||||
|
||||
@@ -2844,6 +2844,27 @@ MacroAssembler(PandaAllocator* allocator, byte* buffer,
|
||||
subps(xd, xn, xm);
|
||||
}
|
||||
void Cmpp(const Register& xn, const Register& xm) { Subps(xzr, xn, xm); }
|
||||
void Chkfeat(const Register& xdn);
|
||||
void Gcspushm(const Register& rt) {
|
||||
VIXL_ASSERT(allow_macro_instructions_);
|
||||
SingleEmissionCheckScope guard(this);
|
||||
gcspushm(rt);
|
||||
}
|
||||
void Gcspopm(const Register& rt = xzr) {
|
||||
VIXL_ASSERT(allow_macro_instructions_);
|
||||
SingleEmissionCheckScope guard(this);
|
||||
gcspopm(rt);
|
||||
}
|
||||
void Gcsss1(const Register& rt) {
|
||||
VIXL_ASSERT(allow_macro_instructions_);
|
||||
SingleEmissionCheckScope guard(this);
|
||||
gcsss1(rt);
|
||||
}
|
||||
void Gcsss2(const Register& rt) {
|
||||
VIXL_ASSERT(allow_macro_instructions_);
|
||||
SingleEmissionCheckScope guard(this);
|
||||
gcsss2(rt);
|
||||
}
|
||||
|
||||
// NEON 3 vector register instructions.
|
||||
#define NEON_3VREG_MACRO_LIST(V) \
|
||||
@@ -2893,6 +2914,7 @@ MacroAssembler(PandaAllocator* allocator, byte* buffer,
|
||||
V(pmull2, Pmull2) \
|
||||
V(raddhn, Raddhn) \
|
||||
V(raddhn2, Raddhn2) \
|
||||
V(rax1, Rax1) \
|
||||
V(rsubhn, Rsubhn) \
|
||||
V(rsubhn2, Rsubhn2) \
|
||||
V(saba, Saba) \
|
||||
@@ -2905,8 +2927,21 @@ MacroAssembler(PandaAllocator* allocator, byte* buffer,
|
||||
V(saddl2, Saddl2) \
|
||||
V(saddw, Saddw) \
|
||||
V(saddw2, Saddw2) \
|
||||
V(sha1c, Sha1c) \
|
||||
V(sha1m, Sha1m) \
|
||||
V(sha1p, Sha1p) \
|
||||
V(sha1su0, Sha1su0) \
|
||||
V(sha256h, Sha256h) \
|
||||
V(sha256h2, Sha256h2) \
|
||||
V(sha256su1, Sha256su1) \
|
||||
V(sha512h, Sha512h) \
|
||||
V(sha512h2, Sha512h2) \
|
||||
V(sha512su1, Sha512su1) \
|
||||
V(shadd, Shadd) \
|
||||
V(shsub, Shsub) \
|
||||
V(sm3partw1, Sm3partw1) \
|
||||
V(sm3partw2, Sm3partw2) \
|
||||
V(sm4ekey, Sm4ekey) \
|
||||
V(smax, Smax) \
|
||||
V(smaxp, Smaxp) \
|
||||
V(smin, Smin) \
|
||||
@@ -3001,6 +3036,10 @@ MacroAssembler(PandaAllocator* allocator, byte* buffer,
|
||||
V(abs, Abs) \
|
||||
V(addp, Addp) \
|
||||
V(addv, Addv) \
|
||||
V(aesd, Aesd) \
|
||||
V(aese, Aese) \
|
||||
V(aesimc, Aesimc) \
|
||||
V(aesmc, Aesmc) \
|
||||
V(cls, Cls) \
|
||||
V(clz, Clz) \
|
||||
V(cnt, Cnt) \
|
||||
@@ -3049,6 +3088,11 @@ MacroAssembler(PandaAllocator* allocator, byte* buffer,
|
||||
V(sadalp, Sadalp) \
|
||||
V(saddlp, Saddlp) \
|
||||
V(saddlv, Saddlv) \
|
||||
V(sha1h, Sha1h) \
|
||||
V(sha1su1, Sha1su1) \
|
||||
V(sha256su0, Sha256su0) \
|
||||
V(sha512su0, Sha512su0) \
|
||||
V(sm4e, Sm4e) \
|
||||
V(smaxv, Smaxv) \
|
||||
V(sminv, Sminv) \
|
||||
V(sqabs, Sqabs) \
|
||||
@@ -3139,7 +3183,11 @@ MacroAssembler(PandaAllocator* allocator, byte* buffer,
|
||||
V(umlsl, Umlsl) \
|
||||
V(umlsl2, Umlsl2) \
|
||||
V(sudot, Sudot) \
|
||||
V(usdot, Usdot)
|
||||
V(usdot, Usdot) \
|
||||
V(sm3tt1a, Sm3tt1a) \
|
||||
V(sm3tt1b, Sm3tt1b) \
|
||||
V(sm3tt2a, Sm3tt2a) \
|
||||
V(sm3tt2b, Sm3tt2b)
|
||||
|
||||
|
||||
#define DEFINE_MACRO_ASM_FUNC(ASM, MASM) \
|
||||
@@ -3258,6 +3306,14 @@ MacroAssembler(PandaAllocator* allocator, byte* buffer,
|
||||
SVE_3VREG_COMMUTATIVE_MACRO_LIST(DEFINE_MACRO_ASM_FUNC)
|
||||
#undef DEFINE_MACRO_ASM_FUNC
|
||||
|
||||
void Bcax(const VRegister& vd,
|
||||
const VRegister& vn,
|
||||
const VRegister& vm,
|
||||
const VRegister& va) {
|
||||
VIXL_ASSERT(allow_macro_instructions_);
|
||||
SingleEmissionCheckScope guard(this);
|
||||
bcax(vd, vn, vm, va);
|
||||
}
|
||||
void Bic(const VRegister& vd, const int imm8, const int left_shift = 0) {
|
||||
VIXL_ASSERT(allow_macro_instructions_);
|
||||
SingleEmissionCheckScope guard(this);
|
||||
@@ -3298,6 +3354,14 @@ MacroAssembler(PandaAllocator* allocator, byte* buffer,
|
||||
SingleEmissionCheckScope guard(this);
|
||||
dup(vd, rn);
|
||||
}
|
||||
void Eor3(const VRegister& vd,
|
||||
const VRegister& vn,
|
||||
const VRegister& vm,
|
||||
const VRegister& va) {
|
||||
VIXL_ASSERT(allow_macro_instructions_);
|
||||
SingleEmissionCheckScope guard(this);
|
||||
eor3(vd, vn, vm, va);
|
||||
}
|
||||
void Ext(const VRegister& vd,
|
||||
const VRegister& vn,
|
||||
const VRegister& vm,
|
||||
@@ -3594,6 +3658,14 @@ MacroAssembler(PandaAllocator* allocator, byte* buffer,
|
||||
SingleEmissionCheckScope guard(this);
|
||||
st4(vt, vt2, vt3, vt4, lane, dst);
|
||||
}
|
||||
void Sm3ss1(const VRegister& vd,
|
||||
const VRegister& vn,
|
||||
const VRegister& vm,
|
||||
const VRegister& va) {
|
||||
VIXL_ASSERT(allow_macro_instructions_);
|
||||
SingleEmissionCheckScope guard(this);
|
||||
sm3ss1(vd, vn, vm, va);
|
||||
}
|
||||
void Smov(const Register& rd, const VRegister& vn, int vn_index) {
|
||||
VIXL_ASSERT(allow_macro_instructions_);
|
||||
SingleEmissionCheckScope guard(this);
|
||||
@@ -3604,6 +3676,14 @@ MacroAssembler(PandaAllocator* allocator, byte* buffer,
|
||||
SingleEmissionCheckScope guard(this);
|
||||
umov(rd, vn, vn_index);
|
||||
}
|
||||
void Xar(const VRegister& vd,
|
||||
const VRegister& vn,
|
||||
const VRegister& vm,
|
||||
int rotate) {
|
||||
VIXL_ASSERT(allow_macro_instructions_);
|
||||
SingleEmissionCheckScope guard(this);
|
||||
xar(vd, vn, vm, rotate);
|
||||
}
|
||||
void Crc32b(const Register& rd, const Register& rn, const Register& rm) {
|
||||
VIXL_ASSERT(allow_macro_instructions_);
|
||||
SingleEmissionCheckScope guard(this);
|
||||
@@ -8311,9 +8391,10 @@ MacroAssembler(PandaAllocator* allocator, byte* buffer,
|
||||
UseScratchRegisterScope* scratch_scope);
|
||||
|
||||
bool LabelIsOutOfRange(Label* label, ImmBranchType branch_type) {
|
||||
int64_t offset = label->GetLocation() - GetCursorOffset();
|
||||
VIXL_ASSERT(IsMultiple(offset, kInstructionSize));
|
||||
return !Instruction::IsValidImmPCOffset(branch_type,
|
||||
label->GetLocation() -
|
||||
GetCursorOffset());
|
||||
offset / kInstructionSize);
|
||||
}
|
||||
|
||||
void ConfigureSimulatorCPUFeaturesHelper(const CPUFeatures& features,
|
||||
@@ -8727,6 +8808,16 @@ class UseScratchRegisterScope {
|
||||
return AcquireFrom(available, kGoverningPRegisterMask).P();
|
||||
}
|
||||
|
||||
// TODO: extend to other scratch register lists.
|
||||
bool TryAcquire(const Register& required_reg) {
|
||||
CPURegList* list = masm_->GetScratchRegisterList();
|
||||
if (list->IncludesAliasOf(required_reg)) {
|
||||
list->Remove(required_reg);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
Register AcquireRegisterOfSize(int size_in_bits);
|
||||
Register AcquireSameSizeAs(const Register& reg) {
|
||||
return AcquireRegisterOfSize(reg.GetSizeInBits());
|
||||
|
||||
@@ -151,7 +151,7 @@ uint64_t Simulator::AuthPAC(uint64_t ptr,
|
||||
|
||||
uint64_t pac = ComputePAC(original_ptr, context, key);
|
||||
|
||||
uint64_t error_code = 1 << key.number;
|
||||
uint64_t error_code = uint64_t{1} << key.number;
|
||||
if ((pac & pac_mask) == (ptr & pac_mask)) {
|
||||
return original_ptr;
|
||||
} else {
|
||||
|
||||
+914
-358
File diff suppressed because it is too large
Load Diff
+435
-56
@@ -28,12 +28,14 @@
|
||||
#define VIXL_AARCH64_SIMULATOR_AARCH64_H_
|
||||
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <random>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include "../cpu-features.h"
|
||||
#include "../globals-vixl.h"
|
||||
#include "../utils-vixl.h"
|
||||
#include "cpu-features.h"
|
||||
|
||||
#include "abi-aarch64.h"
|
||||
#include "cpu-features-auditor-aarch64.h"
|
||||
@@ -68,6 +70,28 @@ namespace aarch64 {
|
||||
class Simulator;
|
||||
struct RuntimeCallStructHelper;
|
||||
|
||||
enum class MemoryAccessResult { Success = 0, Failure = 1 };
|
||||
|
||||
// Try to access a piece of memory at the given address. Accessing that memory
|
||||
// might raise a signal which, if handled by a custom signal handler, should
|
||||
// setup the native and simulated context in order to continue. Return whether
|
||||
// the memory access failed (i.e: raised a signal) or succeeded.
|
||||
MemoryAccessResult TryMemoryAccess(uintptr_t address, uintptr_t access_size);
|
||||
|
||||
#ifdef VIXL_ENABLE_IMPLICIT_CHECKS
|
||||
// Access a byte of memory from the address at the given offset. If the memory
|
||||
// could be accessed then return MemoryAccessResult::Success. If the memory
|
||||
// could not be accessed, and therefore raised a signal, setup the simulated
|
||||
// context and return MemoryAccessResult::Failure.
|
||||
//
|
||||
// If a signal is raised then it is expected that the signal handler will place
|
||||
// MemoryAccessResult::Failure in the native return register and the address of
|
||||
// _vixl_internal_AccessMemory_continue into the native instruction pointer.
|
||||
extern "C" MemoryAccessResult _vixl_internal_ReadMemory(uintptr_t address,
|
||||
uintptr_t offset);
|
||||
extern "C" uintptr_t _vixl_internal_AccessMemory_continue();
|
||||
#endif // VIXL_ENABLE_IMPLICIT_CHECKS
|
||||
|
||||
class SimStack {
|
||||
public:
|
||||
SimStack() {}
|
||||
@@ -136,7 +160,7 @@ class SimStack {
|
||||
|
||||
// Allocate the stack, locking the parameters.
|
||||
Allocated Allocate() {
|
||||
size_t align_to = 1 << align_log2_;
|
||||
size_t align_to = uint64_t{1} << align_log2_;
|
||||
size_t l = AlignUp(limit_guard_size_, align_to);
|
||||
size_t u = AlignUp(usable_size_, align_to);
|
||||
size_t b = AlignUp(base_guard_size_, align_to);
|
||||
@@ -366,7 +390,7 @@ class Memory {
|
||||
}
|
||||
|
||||
template <typename T, typename A>
|
||||
T Read(A address, Instruction const* pc = nullptr) const {
|
||||
std::optional<T> Read(A address, Instruction const* pc = nullptr) const {
|
||||
T value;
|
||||
VIXL_STATIC_ASSERT((sizeof(value) == 1) || (sizeof(value) == 2) ||
|
||||
(sizeof(value) == 4) || (sizeof(value) == 8) ||
|
||||
@@ -378,12 +402,16 @@ class Memory {
|
||||
if (!IsMTETagsMatched(address, pc)) {
|
||||
VIXL_ABORT_WITH_MSG("Tag mismatch.");
|
||||
}
|
||||
if (TryMemoryAccess(reinterpret_cast<uintptr_t>(base), sizeof(value)) ==
|
||||
MemoryAccessResult::Failure) {
|
||||
return std::nullopt;
|
||||
}
|
||||
memcpy(&value, base, sizeof(value));
|
||||
return value;
|
||||
}
|
||||
|
||||
template <typename T, typename A>
|
||||
void Write(A address, T value, Instruction const* pc = nullptr) const {
|
||||
bool Write(A address, T value, Instruction const* pc = nullptr) const {
|
||||
VIXL_STATIC_ASSERT((sizeof(value) == 1) || (sizeof(value) == 2) ||
|
||||
(sizeof(value) == 4) || (sizeof(value) == 8) ||
|
||||
(sizeof(value) == 16));
|
||||
@@ -394,11 +422,16 @@ class Memory {
|
||||
if (!IsMTETagsMatched(address, pc)) {
|
||||
VIXL_ABORT_WITH_MSG("Tag mismatch.");
|
||||
}
|
||||
if (TryMemoryAccess(reinterpret_cast<uintptr_t>(base), sizeof(value)) ==
|
||||
MemoryAccessResult::Failure) {
|
||||
return false;
|
||||
}
|
||||
memcpy(base, &value, sizeof(value));
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename A>
|
||||
uint64_t ReadUint(int size_in_bytes, A address) const {
|
||||
std::optional<uint64_t> ReadUint(int size_in_bytes, A address) const {
|
||||
switch (size_in_bytes) {
|
||||
case 1:
|
||||
return Read<uint8_t>(address);
|
||||
@@ -414,7 +447,7 @@ class Memory {
|
||||
}
|
||||
|
||||
template <typename A>
|
||||
int64_t ReadInt(int size_in_bytes, A address) const {
|
||||
std::optional<int64_t> ReadInt(int size_in_bytes, A address) const {
|
||||
switch (size_in_bytes) {
|
||||
case 1:
|
||||
return Read<int8_t>(address);
|
||||
@@ -430,7 +463,7 @@ class Memory {
|
||||
}
|
||||
|
||||
template <typename A>
|
||||
void Write(int size_in_bytes, A address, uint64_t value) const {
|
||||
bool Write(int size_in_bytes, A address, uint64_t value) const {
|
||||
switch (size_in_bytes) {
|
||||
case 1:
|
||||
return Write(address, static_cast<uint8_t>(value));
|
||||
@@ -442,6 +475,7 @@ class Memory {
|
||||
return Write(address, value);
|
||||
}
|
||||
VIXL_UNREACHABLE();
|
||||
return false;
|
||||
}
|
||||
|
||||
void AppendMetaData(MetaDataDepot* metadata_depot) {
|
||||
@@ -650,7 +684,7 @@ class LogicPRegister {
|
||||
|
||||
void SetAllBits() {
|
||||
int chunk_size = sizeof(ChunkType) * kBitsPerByte;
|
||||
ChunkType bits = GetUintMask(chunk_size);
|
||||
ChunkType bits = static_cast<ChunkType>(GetUintMask(chunk_size));
|
||||
for (int lane = 0;
|
||||
lane < (static_cast<int>(register_.GetSizeInBits() / chunk_size));
|
||||
lane++) {
|
||||
@@ -703,6 +737,8 @@ class LogicPRegister {
|
||||
SimPRegister& register_;
|
||||
};
|
||||
|
||||
using vixl_uint128_t = std::pair<uint64_t, uint64_t>;
|
||||
|
||||
// Representation of a vector register, with typed getters and setters for lanes
|
||||
// and additional information to represent lane state.
|
||||
class LogicVRegister {
|
||||
@@ -831,6 +867,16 @@ class LogicVRegister {
|
||||
}
|
||||
}
|
||||
|
||||
void SetUint(VectorFormat vform, int index, vixl_uint128_t value) const {
|
||||
if (LaneSizeInBitsFromFormat(vform) <= 64) {
|
||||
SetUint(vform, index, value.second);
|
||||
return;
|
||||
}
|
||||
VIXL_ASSERT((vform == kFormat1Q) || (vform == kFormatVnQ));
|
||||
SetUint(kFormatVnD, 2 * index, value.second);
|
||||
SetUint(kFormatVnD, 2 * index + 1, value.first);
|
||||
}
|
||||
|
||||
void SetUintArray(VectorFormat vform, const uint64_t* src) const {
|
||||
ClearForWrite(vform);
|
||||
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
|
||||
@@ -1234,9 +1280,10 @@ class SimExclusiveGlobalMonitor {
|
||||
uint32_t seed_;
|
||||
};
|
||||
|
||||
|
||||
class Debugger;
|
||||
|
||||
template <uint32_t mode>
|
||||
uint64_t CryptoOp(uint64_t x, uint64_t y, uint64_t z);
|
||||
|
||||
class Simulator : public DecoderVisitor {
|
||||
public:
|
||||
@@ -1269,7 +1316,7 @@ class Simulator : public DecoderVisitor {
|
||||
|
||||
|
||||
#if defined(VIXL_HAS_ABI_SUPPORT) && __cplusplus >= 201103L && \
|
||||
(defined(__clang__) || GCC_VERSION_OR_NEWER(4, 9, 1))
|
||||
(defined(_MSC_VER) || defined(__clang__) || GCC_VERSION_OR_NEWER(4, 9, 1))
|
||||
// Templated `RunFrom` version taking care of passing arguments and returning
|
||||
// the result value.
|
||||
// This allows code like:
|
||||
@@ -1472,6 +1519,7 @@ class Simulator : public DecoderVisitor {
|
||||
void SimulateSVESaturatingMulAddHigh(const Instruction* instr);
|
||||
void SimulateSVESaturatingMulHighIndex(const Instruction* instr);
|
||||
void SimulateSVEFPConvertLong(const Instruction* instr);
|
||||
void SimulateSVEPmull128(const Instruction* instr);
|
||||
void SimulateMatrixMul(const Instruction* instr);
|
||||
void SimulateSVEFPMatrixMul(const Instruction* instr);
|
||||
void SimulateNEONMulByElementLong(const Instruction* instr);
|
||||
@@ -1479,6 +1527,7 @@ class Simulator : public DecoderVisitor {
|
||||
void SimulateNEONFPMulByElementLong(const Instruction* instr);
|
||||
void SimulateNEONComplexMulByElement(const Instruction* instr);
|
||||
void SimulateNEONDotProdByElement(const Instruction* instr);
|
||||
void SimulateNEONSHA3(const Instruction* instr);
|
||||
void SimulateMTEAddSubTag(const Instruction* instr);
|
||||
void SimulateMTETagMaskInsert(const Instruction* instr);
|
||||
void SimulateMTESubPointer(const Instruction* instr);
|
||||
@@ -1498,7 +1547,10 @@ class Simulator : public DecoderVisitor {
|
||||
void SimulateSetGM(const Instruction* instr);
|
||||
void SimulateSignedMinMax(const Instruction* instr);
|
||||
void SimulateUnsignedMinMax(const Instruction* instr);
|
||||
void SimulateSHA512(const Instruction* instr);
|
||||
|
||||
void VisitCryptoSM3(const Instruction* instr);
|
||||
void VisitCryptoSM4(const Instruction* instr);
|
||||
|
||||
// Integer register accessors.
|
||||
|
||||
@@ -2029,62 +2081,66 @@ class Simulator : public DecoderVisitor {
|
||||
}
|
||||
|
||||
template <typename T, typename A>
|
||||
T MemRead(A address) const {
|
||||
std::optional<T> MemRead(A address) const {
|
||||
Instruction const* pc = ReadPc();
|
||||
return memory_.Read<T>(address, pc);
|
||||
}
|
||||
|
||||
template <typename T, typename A>
|
||||
void MemWrite(A address, T value) const {
|
||||
bool MemWrite(A address, T value) const {
|
||||
Instruction const* pc = ReadPc();
|
||||
return memory_.Write(address, value, pc);
|
||||
}
|
||||
|
||||
template <typename A>
|
||||
uint64_t MemReadUint(int size_in_bytes, A address) const {
|
||||
std::optional<uint64_t> MemReadUint(int size_in_bytes, A address) const {
|
||||
return memory_.ReadUint(size_in_bytes, address);
|
||||
}
|
||||
|
||||
template <typename A>
|
||||
int64_t MemReadInt(int size_in_bytes, A address) const {
|
||||
std::optional<int64_t> MemReadInt(int size_in_bytes, A address) const {
|
||||
return memory_.ReadInt(size_in_bytes, address);
|
||||
}
|
||||
|
||||
template <typename A>
|
||||
void MemWrite(int size_in_bytes, A address, uint64_t value) const {
|
||||
bool MemWrite(int size_in_bytes, A address, uint64_t value) const {
|
||||
return memory_.Write(size_in_bytes, address, value);
|
||||
}
|
||||
|
||||
void LoadLane(LogicVRegister dst,
|
||||
bool LoadLane(LogicVRegister dst,
|
||||
VectorFormat vform,
|
||||
int index,
|
||||
uint64_t addr) const {
|
||||
unsigned msize_in_bytes = LaneSizeInBytesFromFormat(vform);
|
||||
LoadUintToLane(dst, vform, msize_in_bytes, index, addr);
|
||||
return LoadUintToLane(dst, vform, msize_in_bytes, index, addr);
|
||||
}
|
||||
|
||||
void LoadUintToLane(LogicVRegister dst,
|
||||
bool LoadUintToLane(LogicVRegister dst,
|
||||
VectorFormat vform,
|
||||
unsigned msize_in_bytes,
|
||||
int index,
|
||||
uint64_t addr) const {
|
||||
dst.SetUint(vform, index, MemReadUint(msize_in_bytes, addr));
|
||||
VIXL_DEFINE_OR_RETURN_FALSE(value, MemReadUint(msize_in_bytes, addr));
|
||||
dst.SetUint(vform, index, value);
|
||||
return true;
|
||||
}
|
||||
|
||||
void LoadIntToLane(LogicVRegister dst,
|
||||
bool LoadIntToLane(LogicVRegister dst,
|
||||
VectorFormat vform,
|
||||
unsigned msize_in_bytes,
|
||||
int index,
|
||||
uint64_t addr) const {
|
||||
dst.SetInt(vform, index, MemReadInt(msize_in_bytes, addr));
|
||||
VIXL_DEFINE_OR_RETURN_FALSE(value, MemReadInt(msize_in_bytes, addr));
|
||||
dst.SetInt(vform, index, value);
|
||||
return true;
|
||||
}
|
||||
|
||||
void StoreLane(const LogicVRegister& src,
|
||||
bool StoreLane(const LogicVRegister& src,
|
||||
VectorFormat vform,
|
||||
int index,
|
||||
uint64_t addr) const {
|
||||
unsigned msize_in_bytes = LaneSizeInBytesFromFormat(vform);
|
||||
MemWrite(msize_in_bytes, addr, src.Uint(vform, index));
|
||||
return MemWrite(msize_in_bytes, addr, src.Uint(vform, index));
|
||||
}
|
||||
|
||||
uint64_t ComputeMemOperandAddress(const MemOperand& mem_op) const;
|
||||
@@ -2095,12 +2151,14 @@ class Simulator : public DecoderVisitor {
|
||||
return ReadCPURegister<T>(operand.GetCPURegister());
|
||||
} else {
|
||||
VIXL_ASSERT(operand.IsMemOperand());
|
||||
return MemRead<T>(ComputeMemOperandAddress(operand.GetMemOperand()));
|
||||
auto res = MemRead<T>(ComputeMemOperandAddress(operand.GetMemOperand()));
|
||||
VIXL_ASSERT(res);
|
||||
return *res;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void WriteGenericOperand(GenericOperand operand,
|
||||
bool WriteGenericOperand(GenericOperand operand,
|
||||
T value,
|
||||
RegLogMode log_mode = LogRegWrites) {
|
||||
if (operand.IsCPURegister()) {
|
||||
@@ -2116,8 +2174,9 @@ class Simulator : public DecoderVisitor {
|
||||
WriteCPURegister(operand.GetCPURegister(), raw, log_mode);
|
||||
} else {
|
||||
VIXL_ASSERT(operand.IsMemOperand());
|
||||
MemWrite(ComputeMemOperandAddress(operand.GetMemOperand()), value);
|
||||
return MemWrite(ComputeMemOperandAddress(operand.GetMemOperand()), value);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ReadN() const { return nzcv_.GetN() != 0; }
|
||||
@@ -2493,12 +2552,16 @@ class Simulator : public DecoderVisitor {
|
||||
// Other state updates, including system registers.
|
||||
void PrintSystemRegister(SystemRegister id);
|
||||
void PrintTakenBranch(const Instruction* target);
|
||||
void PrintGCS(bool is_push, uint64_t addr, size_t entry);
|
||||
void LogSystemRegister(SystemRegister id) {
|
||||
if (ShouldTraceSysRegs()) PrintSystemRegister(id);
|
||||
}
|
||||
void LogTakenBranch(const Instruction* target) {
|
||||
if (ShouldTraceBranches()) PrintTakenBranch(target);
|
||||
}
|
||||
void LogGCS(bool is_push, uint64_t addr, size_t entry) {
|
||||
if (ShouldTraceSysRegs()) PrintGCS(is_push, addr, entry);
|
||||
}
|
||||
|
||||
// Trace memory accesses.
|
||||
|
||||
@@ -2528,6 +2591,14 @@ class Simulator : public DecoderVisitor {
|
||||
void PrintPWrite(int rt_code, uintptr_t address) {
|
||||
PrintPAccess(rt_code, "->", address);
|
||||
}
|
||||
void PrintWriteU64(uint64_t x, uintptr_t address) {
|
||||
fprintf(stream_,
|
||||
"# 0x%016lx -> %s0x%016" PRIxPTR "%s\n",
|
||||
x,
|
||||
clr_memory_address,
|
||||
address,
|
||||
clr_normal);
|
||||
}
|
||||
|
||||
// Like Print* (above), but respect GetTraceParameters().
|
||||
void LogRead(int rt_code, PrintRegisterFormat format, uintptr_t address) {
|
||||
@@ -2562,6 +2633,9 @@ class Simulator : public DecoderVisitor {
|
||||
void LogPWrite(int rt_code, uintptr_t address) {
|
||||
if (ShouldTraceWrites()) PrintPWrite(rt_code, address);
|
||||
}
|
||||
void LogWriteU64(uint64_t x, uintptr_t address) {
|
||||
if (ShouldTraceWrites()) PrintWriteU64(x, address);
|
||||
}
|
||||
void LogMemTransfer(uintptr_t dst, uintptr_t src, uint8_t value) {
|
||||
if (ShouldTraceWrites()) PrintMemTransfer(dst, src, value);
|
||||
}
|
||||
@@ -2860,7 +2934,7 @@ class Simulator : public DecoderVisitor {
|
||||
}
|
||||
|
||||
if (offset == 0) {
|
||||
while ((exclude & (1 << tag)) != 0) {
|
||||
while ((exclude & (uint64_t{1} << tag)) != 0) {
|
||||
tag = (tag + 1) % 16;
|
||||
}
|
||||
}
|
||||
@@ -2868,7 +2942,7 @@ class Simulator : public DecoderVisitor {
|
||||
while (offset > 0) {
|
||||
offset--;
|
||||
tag = (tag + 1) % 16;
|
||||
while ((exclude & (1 << tag)) != 0) {
|
||||
while ((exclude & (uint64_t{1} << tag)) != 0) {
|
||||
tag = (tag + 1) % 16;
|
||||
}
|
||||
}
|
||||
@@ -2880,12 +2954,15 @@ class Simulator : public DecoderVisitor {
|
||||
return (addr & ~(UINT64_C(0xf) << 56)) | (tag << 56);
|
||||
}
|
||||
|
||||
#if __linux__
|
||||
#define VIXL_HAS_SIMULATED_MMAP
|
||||
// Create or remove a mapping with memory protection. Memory attributes such
|
||||
// as MTE and BTI are represented by metadata in Simulator.
|
||||
void* Mmap(
|
||||
void* address, size_t length, int prot, int flags, int fd, off_t offset);
|
||||
|
||||
int Munmap(void* address, size_t length, int prot);
|
||||
#endif
|
||||
|
||||
// The common CPUFeatures interface with the set of available features.
|
||||
|
||||
@@ -2908,7 +2985,7 @@ class Simulator : public DecoderVisitor {
|
||||
// Also, the initialisation of the tuples in RuntimeCall(Non)Void is incorrect
|
||||
// in GCC before 4.9.1: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=51253
|
||||
#if defined(VIXL_HAS_ABI_SUPPORT) && __cplusplus >= 201103L && \
|
||||
(defined(__clang__) || GCC_VERSION_OR_NEWER(4, 9, 1))
|
||||
(defined(_MSC_VER) || defined(__clang__) || GCC_VERSION_OR_NEWER(4, 9, 1))
|
||||
|
||||
#define VIXL_HAS_SIMULATED_RUNTIME_CALL_SUPPORT
|
||||
|
||||
@@ -2966,7 +3043,10 @@ class Simulator : public DecoderVisitor {
|
||||
R return_value = DoRuntimeCall(function,
|
||||
argument_operands,
|
||||
__local_index_sequence_for<P...>{});
|
||||
WriteGenericOperand(abi.GetReturnGenericOperand<R>(), return_value);
|
||||
bool succeeded =
|
||||
WriteGenericOperand(abi.GetReturnGenericOperand<R>(), return_value);
|
||||
USE(succeeded);
|
||||
VIXL_ASSERT(succeeded);
|
||||
}
|
||||
|
||||
template <typename R, typename... P>
|
||||
@@ -3154,6 +3234,43 @@ class Simulator : public DecoderVisitor {
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef VIXL_ENABLE_IMPLICIT_CHECKS
|
||||
// Returns true if the faulting instruction address (usually the program
|
||||
// counter or instruction pointer) comes from an internal VIXL memory access.
|
||||
// This can be used by signal handlers to check if a signal was raised from
|
||||
// the simulator (via TryMemoryAccess) before the actual
|
||||
// access occurs.
|
||||
bool IsSimulatedMemoryAccess(uintptr_t fault_pc) const {
|
||||
return (fault_pc ==
|
||||
reinterpret_cast<uintptr_t>(&_vixl_internal_ReadMemory));
|
||||
}
|
||||
|
||||
// Get the instruction address of the internal VIXL memory access continuation
|
||||
// label. Signal handlers can resume execution at this address to return to
|
||||
// TryMemoryAccess which will continue simulation.
|
||||
uintptr_t GetSignalReturnAddress() const {
|
||||
return reinterpret_cast<uintptr_t>(&_vixl_internal_AccessMemory_continue);
|
||||
}
|
||||
|
||||
// Replace the fault address reported by the kernel with the actual faulting
|
||||
// address.
|
||||
//
|
||||
// This is required because TryMemoryAccess reads a section of
|
||||
// memory 1 byte at a time meaning the fault address reported may not be the
|
||||
// base address of memory being accessed.
|
||||
void ReplaceFaultAddress(siginfo_t* siginfo, void* context) {
|
||||
#ifdef __x86_64__
|
||||
// The base address being accessed is passed in as the first argument to
|
||||
// _vixl_internal_ReadMemory.
|
||||
ucontext_t* uc = reinterpret_cast<ucontext_t*>(context);
|
||||
siginfo->si_addr = reinterpret_cast<void*>(uc->uc_mcontext.gregs[REG_RDI]);
|
||||
#else
|
||||
USE(siginfo);
|
||||
USE(context);
|
||||
#endif // __x86_64__
|
||||
}
|
||||
#endif // VIXL_ENABLE_IMPLICIT_CHECKS
|
||||
|
||||
protected:
|
||||
const char* clr_normal;
|
||||
const char* clr_flag_name;
|
||||
@@ -3234,8 +3351,9 @@ class Simulator : public DecoderVisitor {
|
||||
uint64_t left,
|
||||
uint64_t right,
|
||||
int carry_in);
|
||||
using vixl_uint128_t = std::pair<uint64_t, uint64_t>;
|
||||
vixl_uint128_t Add128(vixl_uint128_t x, vixl_uint128_t y);
|
||||
vixl_uint128_t Lsl128(vixl_uint128_t x, unsigned shift) const;
|
||||
vixl_uint128_t Eor128(vixl_uint128_t x, vixl_uint128_t y) const;
|
||||
vixl_uint128_t Mul64(uint64_t x, uint64_t y);
|
||||
vixl_uint128_t Neg128(vixl_uint128_t x);
|
||||
void LogicalHelper(const Instruction* instr, int64_t op2);
|
||||
@@ -3317,92 +3435,95 @@ class Simulator : public DecoderVisitor {
|
||||
uint64_t PolynomialMult(uint64_t op1,
|
||||
uint64_t op2,
|
||||
int lane_size_in_bits) const;
|
||||
vixl_uint128_t PolynomialMult128(uint64_t op1,
|
||||
uint64_t op2,
|
||||
int lane_size_in_bits) const;
|
||||
|
||||
void ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr);
|
||||
void ld1(VectorFormat vform, LogicVRegister dst, int index, uint64_t addr);
|
||||
void ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr);
|
||||
void ld1r(VectorFormat vform,
|
||||
bool ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr);
|
||||
bool ld1(VectorFormat vform, LogicVRegister dst, int index, uint64_t addr);
|
||||
bool ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr);
|
||||
bool ld1r(VectorFormat vform,
|
||||
VectorFormat unpack_vform,
|
||||
LogicVRegister dst,
|
||||
uint64_t addr,
|
||||
bool is_signed = false);
|
||||
void ld2(VectorFormat vform,
|
||||
bool ld2(VectorFormat vform,
|
||||
LogicVRegister dst1,
|
||||
LogicVRegister dst2,
|
||||
uint64_t addr);
|
||||
void ld2(VectorFormat vform,
|
||||
bool ld2(VectorFormat vform,
|
||||
LogicVRegister dst1,
|
||||
LogicVRegister dst2,
|
||||
int index,
|
||||
uint64_t addr);
|
||||
void ld2r(VectorFormat vform,
|
||||
bool ld2r(VectorFormat vform,
|
||||
LogicVRegister dst1,
|
||||
LogicVRegister dst2,
|
||||
uint64_t addr);
|
||||
void ld3(VectorFormat vform,
|
||||
bool ld3(VectorFormat vform,
|
||||
LogicVRegister dst1,
|
||||
LogicVRegister dst2,
|
||||
LogicVRegister dst3,
|
||||
uint64_t addr);
|
||||
void ld3(VectorFormat vform,
|
||||
bool ld3(VectorFormat vform,
|
||||
LogicVRegister dst1,
|
||||
LogicVRegister dst2,
|
||||
LogicVRegister dst3,
|
||||
int index,
|
||||
uint64_t addr);
|
||||
void ld3r(VectorFormat vform,
|
||||
bool ld3r(VectorFormat vform,
|
||||
LogicVRegister dst1,
|
||||
LogicVRegister dst2,
|
||||
LogicVRegister dst3,
|
||||
uint64_t addr);
|
||||
void ld4(VectorFormat vform,
|
||||
bool ld4(VectorFormat vform,
|
||||
LogicVRegister dst1,
|
||||
LogicVRegister dst2,
|
||||
LogicVRegister dst3,
|
||||
LogicVRegister dst4,
|
||||
uint64_t addr);
|
||||
void ld4(VectorFormat vform,
|
||||
bool ld4(VectorFormat vform,
|
||||
LogicVRegister dst1,
|
||||
LogicVRegister dst2,
|
||||
LogicVRegister dst3,
|
||||
LogicVRegister dst4,
|
||||
int index,
|
||||
uint64_t addr);
|
||||
void ld4r(VectorFormat vform,
|
||||
bool ld4r(VectorFormat vform,
|
||||
LogicVRegister dst1,
|
||||
LogicVRegister dst2,
|
||||
LogicVRegister dst3,
|
||||
LogicVRegister dst4,
|
||||
uint64_t addr);
|
||||
void st1(VectorFormat vform, LogicVRegister src, uint64_t addr);
|
||||
void st1(VectorFormat vform, LogicVRegister src, int index, uint64_t addr);
|
||||
void st2(VectorFormat vform,
|
||||
bool st1(VectorFormat vform, LogicVRegister src, uint64_t addr);
|
||||
bool st1(VectorFormat vform, LogicVRegister src, int index, uint64_t addr);
|
||||
bool st2(VectorFormat vform,
|
||||
LogicVRegister src,
|
||||
LogicVRegister src2,
|
||||
uint64_t addr);
|
||||
void st2(VectorFormat vform,
|
||||
bool st2(VectorFormat vform,
|
||||
LogicVRegister src,
|
||||
LogicVRegister src2,
|
||||
int index,
|
||||
uint64_t addr);
|
||||
void st3(VectorFormat vform,
|
||||
bool st3(VectorFormat vform,
|
||||
LogicVRegister src,
|
||||
LogicVRegister src2,
|
||||
LogicVRegister src3,
|
||||
uint64_t addr);
|
||||
void st3(VectorFormat vform,
|
||||
bool st3(VectorFormat vform,
|
||||
LogicVRegister src,
|
||||
LogicVRegister src2,
|
||||
LogicVRegister src3,
|
||||
int index,
|
||||
uint64_t addr);
|
||||
void st4(VectorFormat vform,
|
||||
bool st4(VectorFormat vform,
|
||||
LogicVRegister src,
|
||||
LogicVRegister src2,
|
||||
LogicVRegister src3,
|
||||
LogicVRegister src4,
|
||||
uint64_t addr);
|
||||
void st4(VectorFormat vform,
|
||||
bool st4(VectorFormat vform,
|
||||
LogicVRegister src,
|
||||
LogicVRegister src2,
|
||||
LogicVRegister src3,
|
||||
@@ -3688,6 +3809,10 @@ class Simulator : public DecoderVisitor {
|
||||
LogicVRegister dst,
|
||||
const LogicVRegister& src,
|
||||
int rotation);
|
||||
LogicVRegister rol(VectorFormat vform,
|
||||
LogicVRegister dst,
|
||||
const LogicVRegister& src,
|
||||
int rotation);
|
||||
LogicVRegister ext(VectorFormat vform,
|
||||
LogicVRegister dst,
|
||||
const LogicVRegister& src1,
|
||||
@@ -4412,6 +4537,95 @@ class Simulator : public DecoderVisitor {
|
||||
LogicVRegister srcdst,
|
||||
const LogicVRegister& src1,
|
||||
const LogicVRegister& src2);
|
||||
|
||||
template <unsigned N>
|
||||
static void SHARotateEltsLeftOne(uint64_t (&x)[N]) {
|
||||
VIXL_STATIC_ASSERT(N == 4);
|
||||
uint64_t temp = x[3];
|
||||
x[3] = x[2];
|
||||
x[2] = x[1];
|
||||
x[1] = x[0];
|
||||
x[0] = temp;
|
||||
}
|
||||
|
||||
template <uint32_t mode>
|
||||
LogicVRegister sha1(LogicVRegister srcdst,
|
||||
const LogicVRegister& src1,
|
||||
const LogicVRegister& src2) {
|
||||
uint64_t y = src1.Uint(kFormat4S, 0);
|
||||
uint64_t sd[4] = {};
|
||||
srcdst.UintArray(kFormat4S, sd);
|
||||
|
||||
for (unsigned i = 0; i < ArrayLength(sd); i++) {
|
||||
uint64_t t = CryptoOp<mode>(sd[1], sd[2], sd[3]);
|
||||
|
||||
y += RotateLeft(sd[0], 5, kSRegSize) + t;
|
||||
y += src2.Uint(kFormat4S, i);
|
||||
|
||||
sd[1] = RotateLeft(sd[1], 30, kSRegSize);
|
||||
|
||||
// y:sd = ROL(y:sd, 32)
|
||||
SHARotateEltsLeftOne(sd);
|
||||
std::swap(sd[0], y);
|
||||
}
|
||||
|
||||
srcdst.SetUintArray(kFormat4S, sd);
|
||||
return srcdst;
|
||||
}
|
||||
|
||||
LogicVRegister sha2h(LogicVRegister srcdst,
|
||||
const LogicVRegister& src1,
|
||||
const LogicVRegister& src2,
|
||||
bool part1);
|
||||
LogicVRegister sha2su0(LogicVRegister srcdst, const LogicVRegister& src1);
|
||||
LogicVRegister sha2su1(LogicVRegister srcdst,
|
||||
const LogicVRegister& src1,
|
||||
const LogicVRegister& src2);
|
||||
LogicVRegister sha512h(LogicVRegister srcdst,
|
||||
const LogicVRegister& src1,
|
||||
const LogicVRegister& src2);
|
||||
LogicVRegister sha512h2(LogicVRegister srcdst,
|
||||
const LogicVRegister& src1,
|
||||
const LogicVRegister& src2);
|
||||
LogicVRegister sha512su0(LogicVRegister srcdst, const LogicVRegister& src1);
|
||||
LogicVRegister sha512su1(LogicVRegister srcdst,
|
||||
const LogicVRegister& src1,
|
||||
const LogicVRegister& src2);
|
||||
|
||||
|
||||
LogicVRegister aes(LogicVRegister srcdst,
|
||||
const LogicVRegister& src1,
|
||||
bool decrypt);
|
||||
LogicVRegister aesmix(LogicVRegister srcdst,
|
||||
const LogicVRegister& src1,
|
||||
bool inverse);
|
||||
|
||||
LogicVRegister sm3partw1(LogicVRegister dst,
|
||||
const LogicVRegister& src1,
|
||||
const LogicVRegister& src2);
|
||||
LogicVRegister sm3partw2(LogicVRegister dst,
|
||||
const LogicVRegister& src1,
|
||||
const LogicVRegister& src2);
|
||||
LogicVRegister sm3ss1(LogicVRegister dst,
|
||||
const LogicVRegister& src1,
|
||||
const LogicVRegister& src2,
|
||||
const LogicVRegister& src3);
|
||||
LogicVRegister sm3tt1(LogicVRegister srcdst,
|
||||
const LogicVRegister& src1,
|
||||
const LogicVRegister& src2,
|
||||
int index,
|
||||
bool is_a);
|
||||
LogicVRegister sm3tt2(LogicVRegister srcdst,
|
||||
const LogicVRegister& src1,
|
||||
const LogicVRegister& src2,
|
||||
int index,
|
||||
bool is_a);
|
||||
|
||||
LogicVRegister sm4(LogicVRegister dst,
|
||||
const LogicVRegister& src1,
|
||||
const LogicVRegister& src2,
|
||||
bool is_key);
|
||||
|
||||
#define NEON_3VREG_LOGIC_LIST(V) \
|
||||
V(addhn) \
|
||||
V(addhn2) \
|
||||
@@ -4825,7 +5039,7 @@ class Simulator : public DecoderVisitor {
|
||||
uint32_t Crc32Checksum(uint32_t acc, T val, uint32_t poly);
|
||||
uint32_t Crc32Checksum(uint32_t acc, uint64_t val, uint32_t poly);
|
||||
|
||||
void SysOp_W(int op, int64_t val);
|
||||
bool SysOp_W(int op, int64_t val);
|
||||
|
||||
template <typename T>
|
||||
T FPRecipSqrtEstimate(T op);
|
||||
@@ -4979,7 +5193,8 @@ class Simulator : public DecoderVisitor {
|
||||
unsigned zt_code,
|
||||
const LogicSVEAddressVector& addr);
|
||||
// Load each active zt<i>[lane] from `addr.GetElementAddress(lane, ...)`.
|
||||
void SVEStructuredLoadHelper(VectorFormat vform,
|
||||
// Returns false if a load failed.
|
||||
bool SVEStructuredLoadHelper(VectorFormat vform,
|
||||
const LogicPRegister& pg,
|
||||
unsigned zt_code,
|
||||
const LogicSVEAddressVector& addr,
|
||||
@@ -5178,10 +5393,12 @@ class Simulator : public DecoderVisitor {
|
||||
|
||||
bool CanReadMemory(uintptr_t address, size_t size);
|
||||
|
||||
#ifndef _WIN32
|
||||
// CanReadMemory needs placeholder file descriptors, so we use a pipe. We can
|
||||
// save some system call overhead by opening them on construction, rather than
|
||||
// on every call to CanReadMemory.
|
||||
int placeholder_pipe_fd_[2];
|
||||
#endif
|
||||
|
||||
template <typename T>
|
||||
static T FPDefaultNaN();
|
||||
@@ -5265,15 +5482,22 @@ class Simulator : public DecoderVisitor {
|
||||
Vector<CPUFeatures>saved_cpu_features_;
|
||||
#endif
|
||||
|
||||
// State for *rand48 functions, used to simulate randomness with repeatable
|
||||
// linear_congruential_engine, used to simulate randomness with repeatable
|
||||
// behaviour (so that tests are deterministic). This is used to simulate RNDR
|
||||
// and RNDRRS, as well as to simulate a source of entropy for architecturally
|
||||
// undefined behaviour.
|
||||
uint16_t rand_state_[3];
|
||||
std::linear_congruential_engine<uint64_t,
|
||||
0x5DEECE66D,
|
||||
0xB,
|
||||
static_cast<uint64_t>(1) << 48>
|
||||
rand_gen_;
|
||||
|
||||
// A configurable size of SVE vector registers.
|
||||
unsigned vector_length_;
|
||||
|
||||
// DC ZVA enable (= 0) status and block size.
|
||||
unsigned dczid_ = (0 << 4) | 4; // 2^4 words => 64-byte block size.
|
||||
|
||||
// Representation of memory attributes such as MTE tagging and BTI page
|
||||
// protection in addition to branch interceptions.
|
||||
MetaDataDepot meta_data_;
|
||||
@@ -5287,6 +5511,161 @@ class Simulator : public DecoderVisitor {
|
||||
#else
|
||||
Debugger* debugger_{nullptr};
|
||||
#endif
|
||||
|
||||
// The Guarded Control Stack is represented using a vector, where the more
|
||||
// recently stored addresses are at higher-numbered indices.
|
||||
using GuardedControlStack = std::vector<uint64_t>;
|
||||
|
||||
// The GCSManager handles the synchronisation of GCS across multiple
|
||||
// Simulator instances. Each Simulator has its own stack, but all share
|
||||
// a GCSManager instance. This allows exchanging stacks between Simulators
|
||||
// in a threaded application.
|
||||
class GCSManager {
|
||||
public:
|
||||
// Allocate a new Guarded Control Stack and add it to the vector of stacks.
|
||||
uint64_t AllocateStack() {
|
||||
const std::lock_guard<std::mutex> lock(stacks_mtx_);
|
||||
|
||||
GuardedControlStack* new_stack = new GuardedControlStack;
|
||||
uint64_t result;
|
||||
|
||||
// Put the new stack into the first available slot.
|
||||
for (result = 0; result < stacks_.size(); result++) {
|
||||
if (stacks_[result] == nullptr) {
|
||||
stacks_[result] = new_stack;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// If there were no slots, create a new one.
|
||||
if (result == stacks_.size()) {
|
||||
stacks_.push_back(new_stack);
|
||||
}
|
||||
|
||||
// Shift the index to look like a stack pointer aligned to a page.
|
||||
result <<= kPageSizeLog2;
|
||||
|
||||
// Push the tagged index onto the new stack as a seal.
|
||||
new_stack->push_back(result + 1);
|
||||
return result;
|
||||
}
|
||||
|
||||
// Free a Guarded Control Stack and set the stacks_ slot to null.
|
||||
void FreeStack(uint64_t gcs) {
|
||||
const std::lock_guard<std::mutex> lock(stacks_mtx_);
|
||||
uint64_t gcs_index = GetGCSIndex(gcs);
|
||||
GuardedControlStack* gcsptr = stacks_[gcs_index];
|
||||
if (gcsptr == nullptr) {
|
||||
VIXL_ABORT_WITH_MSG("Tried to free unallocated GCS ");
|
||||
} else {
|
||||
delete gcsptr;
|
||||
stacks_[gcs_index] = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
// Get a pointer to the GCS vector using a GCS id.
|
||||
GuardedControlStack* GetGCSPtr(uint64_t gcs) const {
|
||||
return stacks_[GetGCSIndex(gcs)];
|
||||
}
|
||||
|
||||
private:
|
||||
uint64_t GetGCSIndex(uint64_t gcs) const { return gcs >> 12; }
|
||||
|
||||
std::vector<GuardedControlStack*> stacks_;
|
||||
std::mutex stacks_mtx_;
|
||||
};
|
||||
|
||||
// A GCS id indicating no GCS has been allocated.
|
||||
static const uint64_t kGCSNoStack = kPageSize - 1;
|
||||
uint64_t gcs_;
|
||||
bool gcs_enabled_;
|
||||
|
||||
public:
|
||||
GCSManager& GetGCSManager() {
|
||||
static GCSManager manager;
|
||||
return manager;
|
||||
}
|
||||
|
||||
void EnableGCSCheck() { gcs_enabled_ = true; }
|
||||
void DisableGCSCheck() { gcs_enabled_ = false; }
|
||||
bool IsGCSCheckEnabled() const { return gcs_enabled_; }
|
||||
|
||||
private:
|
||||
bool IsAllocatedGCS(uint64_t gcs) const { return gcs != kGCSNoStack; }
|
||||
void ResetGCSState() {
|
||||
GCSManager& m = GetGCSManager();
|
||||
if (IsAllocatedGCS(gcs_)) {
|
||||
m.FreeStack(gcs_);
|
||||
}
|
||||
ActivateGCS(m.AllocateStack());
|
||||
GCSPop(); // Remove seal.
|
||||
}
|
||||
|
||||
GuardedControlStack* GetGCSPtr(uint64_t gcs) {
|
||||
GCSManager& m = GetGCSManager();
|
||||
GuardedControlStack* result = m.GetGCSPtr(gcs);
|
||||
return result;
|
||||
}
|
||||
GuardedControlStack* GetActiveGCSPtr() { return GetGCSPtr(gcs_); }
|
||||
|
||||
uint64_t ActivateGCS(uint64_t gcs) {
|
||||
uint64_t outgoing_gcs = gcs_;
|
||||
gcs_ = gcs;
|
||||
return outgoing_gcs;
|
||||
}
|
||||
|
||||
void GCSPush(uint64_t addr) {
|
||||
GetActiveGCSPtr()->push_back(addr);
|
||||
size_t entry = GetActiveGCSPtr()->size() - 1;
|
||||
LogGCS(/* is_push = */ true, addr, entry);
|
||||
}
|
||||
|
||||
uint64_t GCSPop() {
|
||||
GuardedControlStack* gcs = GetActiveGCSPtr();
|
||||
if (gcs->empty()) {
|
||||
return 0;
|
||||
}
|
||||
uint64_t return_addr = gcs->back();
|
||||
size_t entry = gcs->size() - 1;
|
||||
gcs->pop_back();
|
||||
LogGCS(/* is_push = */ false, return_addr, entry);
|
||||
return return_addr;
|
||||
}
|
||||
|
||||
uint64_t GCSPeek() {
|
||||
GuardedControlStack* gcs = GetActiveGCSPtr();
|
||||
if (gcs->empty()) {
|
||||
return 0;
|
||||
}
|
||||
uint64_t return_addr = gcs->back();
|
||||
return return_addr;
|
||||
}
|
||||
|
||||
void ReportGCSFailure(const char* msg) {
|
||||
if (IsGCSCheckEnabled()) {
|
||||
GuardedControlStack* gcs = GetActiveGCSPtr();
|
||||
printf("%s", msg);
|
||||
if (gcs == nullptr) {
|
||||
printf("GCS pointer is null\n");
|
||||
} else {
|
||||
printf("GCS records, most recent first:\n");
|
||||
int most_recent_index = static_cast<int>(gcs->size()) - 1;
|
||||
for (int i = 0; i < 8; i++) {
|
||||
if (!gcs->empty()) {
|
||||
uint64_t entry = gcs->back();
|
||||
gcs->pop_back();
|
||||
int index = most_recent_index - i;
|
||||
printf(" gcs%" PRIu64 "[%d]: 0x%016" PRIx64 "\n",
|
||||
gcs_,
|
||||
index,
|
||||
entry);
|
||||
}
|
||||
}
|
||||
printf("End of GCS records.\n");
|
||||
}
|
||||
VIXL_ABORT_WITH_MSG("GCS failed ");
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
#if defined(VIXL_HAS_SIMULATED_RUNTIME_CALL_SUPPORT) && __cplusplus < 201402L
|
||||
|
||||
+2
-1
@@ -201,7 +201,8 @@ namespace vixl {
|
||||
/* Extended BFloat16 instructions */ \
|
||||
V(kEBF16, "EBF16", "ebf16") \
|
||||
V(kSVE_EBF16, "EBF16 (SVE)", "sveebf16") \
|
||||
V(kCSSC, "CSSC", "cssc")
|
||||
V(kCSSC, "CSSC", "cssc") \
|
||||
V(kGCS, "GCS", "gcs")
|
||||
// clang-format on
|
||||
|
||||
|
||||
|
||||
@@ -215,6 +215,18 @@ inline void USE(const T1&, const T2&, const T3&, const T4&) {}
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
// Evaluate 'init' to an std::optional and return if it's empty. If 'init' is
|
||||
// not empty then define a variable 'name' with the value inside the
|
||||
// std::optional.
|
||||
#define VIXL_DEFINE_OR_RETURN(name, init) \
|
||||
auto opt##name = init; \
|
||||
if (!opt##name) return; \
|
||||
auto name = *opt##name;
|
||||
#define VIXL_DEFINE_OR_RETURN_FALSE(name, init) \
|
||||
auto opt##name = init; \
|
||||
if (!opt##name) return false; \
|
||||
auto name = *opt##name;
|
||||
|
||||
#if __cplusplus >= 201103L
|
||||
#define VIXL_NO_RETURN [[noreturn]]
|
||||
#else
|
||||
|
||||
+1
-2
@@ -1,4 +1,3 @@
|
||||
// Copyright 2015, VIXL authors
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -95,7 +94,7 @@ class InvalSet {
|
||||
#else
|
||||
InvalSet() = delete;
|
||||
InvalSet(AllocatorWrapper alocator);
|
||||
InvalSet(InvalSet&&) = default;
|
||||
InvalSet(InvalSet&&) = default; // movable
|
||||
#endif
|
||||
~InvalSet() VIXL_NEGATIVE_TESTING_ALLOW_EXCEPTION;
|
||||
|
||||
|
||||
@@ -491,7 +491,7 @@ void PoolManager<T>::Release(T pc) {
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
PoolManager<T>::~PoolManager<T>() VIXL_NEGATIVE_TESTING_ALLOW_EXCEPTION {
|
||||
PoolManager<T>::~PoolManager() VIXL_NEGATIVE_TESTING_ALLOW_EXCEPTION {
|
||||
#ifdef VIXL_DEBUG
|
||||
// Check for unbound objects.
|
||||
for (objects_iter iter = objects_.begin(); iter != objects_.end(); ++iter) {
|
||||
|
||||
+50
-5
@@ -42,9 +42,9 @@
|
||||
#endif
|
||||
|
||||
#ifdef VIXL_USE_PANDA_ALLOC
|
||||
#include "mem/arena_allocator_stl_adapter.h"
|
||||
#include "mem/arena_allocator.h"
|
||||
#include "utils/arena_containers.h"
|
||||
#include "libarkbase/mem/arena_allocator_stl_adapter.h"
|
||||
#include "libarkbase/mem/arena_allocator.h"
|
||||
#include "libarkbase/utils/arena_containers.h"
|
||||
#else
|
||||
#include <list>
|
||||
#include <map>
|
||||
@@ -385,6 +385,11 @@ inline uint64_t RotateRight(uint64_t value,
|
||||
return value & width_mask;
|
||||
}
|
||||
|
||||
inline uint64_t RotateLeft(uint64_t value,
|
||||
unsigned int rotate,
|
||||
unsigned int width) {
|
||||
return RotateRight(value, width - rotate, width);
|
||||
}
|
||||
|
||||
// Wrapper class for passing FP16 values through the assembler.
|
||||
// This is purely to aid with type checking/casting.
|
||||
@@ -437,6 +442,12 @@ T UnsignedNegate(T value) {
|
||||
return ~value + 1;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool CanBeNegated(T value) {
|
||||
VIXL_STATIC_ASSERT(std::is_signed<T>::value);
|
||||
return (value == std::numeric_limits<T>::min()) ? false : true;
|
||||
}
|
||||
|
||||
// An absolute operation for signed integers that is defined for results outside
|
||||
// the representable range. Specifically, Abs(MIN_INT) is MIN_INT.
|
||||
template <typename T>
|
||||
@@ -694,13 +705,14 @@ inline T SignExtend(T val, int size_in_bits) {
|
||||
template <typename T>
|
||||
T ReverseBytes(T value, int block_bytes_log2) {
|
||||
VIXL_ASSERT((sizeof(value) == 4) || (sizeof(value) == 8));
|
||||
VIXL_ASSERT((1U << block_bytes_log2) <= sizeof(value));
|
||||
VIXL_ASSERT((uint64_t{1} << block_bytes_log2) <= sizeof(value));
|
||||
// Split the 64-bit value into an 8-bit array, where b[0] is the least
|
||||
// significant byte, and b[7] is the most significant.
|
||||
uint8_t bytes[8];
|
||||
uint64_t mask = UINT64_C(0xff00000000000000);
|
||||
for (int i = 7; i >= 0; i--) {
|
||||
bytes[i] = (static_cast<uint64_t>(value) & mask) >> (i * 8);
|
||||
bytes[i] =
|
||||
static_cast<uint8_t>((static_cast<uint64_t>(value) & mask) >> (i * 8));
|
||||
mask >>= 8;
|
||||
}
|
||||
|
||||
@@ -757,6 +769,39 @@ bool IsWordAligned(T pointer) {
|
||||
return IsAligned<4>(pointer);
|
||||
}
|
||||
|
||||
template <unsigned BITS, typename T>
|
||||
bool IsRepeatingPattern(T value) {
|
||||
VIXL_STATIC_ASSERT(std::is_unsigned<T>::value);
|
||||
VIXL_ASSERT(IsMultiple(sizeof(value) * kBitsPerByte, BITS));
|
||||
VIXL_ASSERT(IsMultiple(BITS, 2));
|
||||
VIXL_STATIC_ASSERT(BITS >= 2);
|
||||
#if (defined(__x86_64__) || defined(__i386)) && __clang_major__ >= 17 && \
|
||||
__clang_major__ <= 19
|
||||
// Workaround for https://github.com/llvm/llvm-project/issues/108722
|
||||
unsigned hbits = BITS / 2;
|
||||
T midmask = (~static_cast<T>(0) >> BITS) << hbits;
|
||||
// E.g. for bytes in a word (0xb3b2b1b0): .b3b2b1. == .b2b1b0.
|
||||
return (((value >> hbits) & midmask) == ((value << hbits) & midmask));
|
||||
#else
|
||||
return value == RotateRight(value, BITS, sizeof(value) * kBitsPerByte);
|
||||
#endif
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool AllBytesMatch(T value) {
|
||||
return IsRepeatingPattern<kBitsPerByte>(value);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool AllHalfwordsMatch(T value) {
|
||||
return IsRepeatingPattern<kBitsPerByte * 2>(value);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool AllWordsMatch(T value) {
|
||||
return IsRepeatingPattern<kBitsPerByte * 4>(value);
|
||||
}
|
||||
|
||||
// Increment a pointer until it has the specified alignment. The alignment must
|
||||
// be a power of two.
|
||||
template <class T>
|
||||
|
||||
@@ -177,17 +177,23 @@ namespace aarch32 {
|
||||
#ifdef VIXL_INCLUDE_SIMULATOR_AARCH32
|
||||
// No simulator yet. We can't test the results.
|
||||
|
||||
#define ASSERT_EQUAL_32(expected, result)
|
||||
#define ASSERT_EQUAL_32(expected, result) \
|
||||
USE(expected, result)
|
||||
|
||||
#define ASSERT_EQUAL_64(expected, result)
|
||||
#define ASSERT_EQUAL_64(expected, result) \
|
||||
USE(expected, result)
|
||||
|
||||
#define ASSERT_EQUAL_128(expected_h, expected_l, result)
|
||||
#define ASSERT_EQUAL_128(expected_h, expected_l, result) \
|
||||
USE(expected_h, expected_l, result)
|
||||
|
||||
#define ASSERT_EQUAL_FP32(expected, result)
|
||||
#define ASSERT_EQUAL_FP32(expected, result) \
|
||||
USE(expected, result)
|
||||
|
||||
#define ASSERT_EQUAL_FP64(expected, result)
|
||||
#define ASSERT_EQUAL_FP64(expected, result) \
|
||||
USE(expected, result)
|
||||
|
||||
#define ASSERT_EQUAL_NZCV(expected)
|
||||
#define ASSERT_EQUAL_NZCV(expected) \
|
||||
USE(expected)
|
||||
|
||||
#else
|
||||
|
||||
@@ -3634,8 +3640,6 @@ static void NearBranchAndLiteralFuzzHelper(InstructionSet isa,
|
||||
const int label_count = 15;
|
||||
const int literal_count = 31;
|
||||
Label* labels;
|
||||
uint64_t* literal_values;
|
||||
Literal<uint64_t>* literals[literal_count];
|
||||
|
||||
// Use multiple iterations, as each produces a different predictably random
|
||||
// sequence.
|
||||
@@ -3679,12 +3683,13 @@ static void NearBranchAndLiteralFuzzHelper(InstructionSet isa,
|
||||
labels = new Label[label_count];
|
||||
|
||||
// Create new literal values.
|
||||
literal_values = new uint64_t[literal_count];
|
||||
std::vector<uint64_t> literal_values;
|
||||
std::vector<Literal<uint64_t>> literals;
|
||||
for (int lit = 0; lit < literal_count; lit++) {
|
||||
// TODO: Generate pseudo-random data for literals. At the moment, the
|
||||
// disassembler breaks if we do this.
|
||||
literal_values[lit] = lit;
|
||||
literals[lit] = new Literal<uint64_t>(literal_values[lit]);
|
||||
literal_values.push_back(lit);
|
||||
literals.emplace_back(Literal<uint64_t>(literal_values[lit]));
|
||||
}
|
||||
|
||||
for (;;) {
|
||||
@@ -3736,13 +3741,13 @@ static void NearBranchAndLiteralFuzzHelper(InstructionSet isa,
|
||||
__ Nop();
|
||||
break;
|
||||
case 4:
|
||||
__ Ldr(r2, literals[literal_index]);
|
||||
__ Ldr(r2, &literals[literal_index]);
|
||||
__ Cmp(r2, static_cast<uint32_t>(literal_values[literal_index]));
|
||||
__ B(ne, &fail);
|
||||
__ Mov(r2, 0);
|
||||
break;
|
||||
case 5:
|
||||
__ Ldrb(r2, literals[literal_index]);
|
||||
__ Ldrb(r2, &literals[literal_index]);
|
||||
__ Cmp(r2,
|
||||
static_cast<uint32_t>(literal_values[literal_index]) &
|
||||
0xff);
|
||||
@@ -3750,7 +3755,7 @@ static void NearBranchAndLiteralFuzzHelper(InstructionSet isa,
|
||||
__ Mov(r2, 0);
|
||||
break;
|
||||
case 6:
|
||||
__ Ldrd(r2, r3, literals[literal_index]);
|
||||
__ Ldrd(r2, r3, &literals[literal_index]);
|
||||
__ Cmp(r2, static_cast<uint32_t>(literal_values[literal_index]));
|
||||
__ B(ne, &fail);
|
||||
__ Mov(r2, 0);
|
||||
@@ -3761,7 +3766,7 @@ static void NearBranchAndLiteralFuzzHelper(InstructionSet isa,
|
||||
__ Mov(r3, 0);
|
||||
break;
|
||||
case 7:
|
||||
__ Vldr(s0, literals[literal_index]);
|
||||
__ Vldr(s0, &literals[literal_index]);
|
||||
__ Vmov(s1, static_cast<uint32_t>(literal_values[literal_index]));
|
||||
__ Vcmp(s0, s1);
|
||||
__ B(ne, &fail);
|
||||
@@ -3875,9 +3880,6 @@ static void NearBranchAndLiteralFuzzHelper(InstructionSet isa,
|
||||
// independent.
|
||||
masm.FinalizeCode(MacroAssembler::kFallThrough);
|
||||
delete[] labels;
|
||||
for (int lit = 0; lit < literal_count; lit++) {
|
||||
delete literals[lit];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1700,6 +1700,41 @@ TEST(macro_assembler_Cbz) {
|
||||
}
|
||||
|
||||
|
||||
TEST(macro_assembler_b_cond_t32) {
|
||||
SETUP();
|
||||
|
||||
#ifdef VIXL_INCLUDE_TARGET_T32
|
||||
// Ensure backward conditional branches are veneered correctly.
|
||||
__ UseT32();
|
||||
int pc_off = __ GetArchitectureStatePCOffset();
|
||||
|
||||
// Largest encodable backwards offset.
|
||||
int curs = __ GetCursorOffset() + pc_off;
|
||||
Label label_neg1m(curs - 1048576);
|
||||
COMPARE_T32(B(ne, &label_neg1m), "bne 0xfff00004\n");
|
||||
|
||||
// Next largest cannot be encoded.
|
||||
curs = __ GetCursorOffset() + pc_off;
|
||||
Label label_neg1m_plus_inst(curs - (1048576 + 2));
|
||||
COMPARE_T32(B(ne, &label_neg1m_plus_inst), "beq 0x00000006\n"
|
||||
"b 0xfff00002\n");
|
||||
|
||||
// Offset that requires largest unconditional branch in veneer.
|
||||
curs = __ GetCursorOffset() + pc_off;
|
||||
Label label_neg16m(curs - (16777216 - 2));
|
||||
COMPARE_T32(B(ne, &label_neg16m), "beq 0x00000006\n"
|
||||
"b 0xff000006\n");
|
||||
|
||||
// Next largest cannot be veneered.
|
||||
curs = __ GetCursorOffset() + pc_off;
|
||||
Label label_neg16m_plus_inst(curs - 16777216);
|
||||
MUST_FAIL_TEST_T32(B(ne, &label_neg16m_plus_inst),
|
||||
"Conditional branch too far for veneer.\n");
|
||||
#endif
|
||||
|
||||
CLEANUP();
|
||||
}
|
||||
|
||||
#ifdef VIXL_NEGATIVE_TESTING
|
||||
TEST(assembler_crc_negative) {
|
||||
SETUP();
|
||||
|
||||
@@ -27,6 +27,7 @@
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
|
||||
#include "test-runner.h"
|
||||
#include "test-utils.h"
|
||||
@@ -1763,6 +1764,24 @@ TEST(sim_stack) {
|
||||
VIXL_CHECK(s.IsAccessInGuardRegion(s.GetLimit() - 1280, 2048));
|
||||
VIXL_CHECK(s.IsAccessInGuardRegion(s.GetLimit() - 1280, 10000));
|
||||
}
|
||||
|
||||
void AllocateAndFreeGCS() {
|
||||
Decoder d;
|
||||
Simulator s(&d);
|
||||
|
||||
for (int i = 0; i < 100000; i++) {
|
||||
uint64_t gcs = s.GetGCSManager().AllocateStack();
|
||||
s.GetGCSManager().FreeStack(gcs);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(sim_gcs_manager) {
|
||||
std::thread t1(AllocateAndFreeGCS);
|
||||
std::thread t2(AllocateAndFreeGCS);
|
||||
|
||||
t1.join();
|
||||
t2.join();
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace aarch64
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -10975,8 +10975,26 @@ TEST(neon_usdot_element) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST(neon_pmull_regression_test) {
|
||||
SETUP_WITH_FEATURES(CPUFeatures::kNEON);
|
||||
|
||||
START();
|
||||
__ Movi(v0.V2D(), 0xdecafc0ffee);
|
||||
__ Pmull(v0.V8H(), v0.V8B(), v0.V8B());
|
||||
|
||||
__ Movi(v1.V2D(), 0xaaaaaaaa55555555);
|
||||
__ Pmull2(v1.V8H(), v1.V16B(), v1.V16B());
|
||||
END();
|
||||
|
||||
if (CAN_RUN()) {
|
||||
RUN();
|
||||
ASSERT_EQUAL_128(0x0000000000515450, 0x4455500055555454, q0);
|
||||
ASSERT_EQUAL_128(0x4444444444444444, 0x1111111111111111, q1);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(zero_high_b) {
|
||||
SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kNEON, CPUFeatures::kRDM);
|
||||
SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kNEON);
|
||||
START();
|
||||
|
||||
__ Mov(x0, 0x55aa42ffaa42ff55);
|
||||
@@ -10996,7 +11014,7 @@ TEST(zero_high_b) {
|
||||
__ Ror(x0, x0, 8);
|
||||
|
||||
{
|
||||
ExactAssemblyScope scope(&masm, 81 * kInstructionSize);
|
||||
ExactAssemblyScope scope(&masm, 75 * kInstructionSize);
|
||||
__ movi(q9.V16B(), 0x55);
|
||||
__ dci(0x5e010409); // mov b9, v0.b[0]
|
||||
__ orr(q30.V16B(), q30.V16B(), q9.V16B());
|
||||
@@ -11013,14 +11031,6 @@ TEST(zero_high_b) {
|
||||
__ dci(0x7e207809); // sqneg b9, b0
|
||||
__ orr(q30.V16B(), q30.V16B(), q9.V16B());
|
||||
|
||||
__ movi(q9.V16B(), 0x55);
|
||||
__ dci(0x7e008429); // sqrdmlah b9, b1, b0
|
||||
__ orr(q30.V16B(), q30.V16B(), q9.V16B());
|
||||
|
||||
__ movi(q9.V16B(), 0x55);
|
||||
__ dci(0x7e008c29); // sqrdmlsh b9, b1, b0
|
||||
__ orr(q30.V16B(), q30.V16B(), q9.V16B());
|
||||
|
||||
__ movi(q9.V16B(), 0x55);
|
||||
__ dci(0x5e205c29); // sqrshl b9, b1, b0
|
||||
__ orr(q30.V16B(), q30.V16B(), q9.V16B());
|
||||
@@ -11821,10 +11831,7 @@ TEST(zero_high_s) {
|
||||
}
|
||||
|
||||
TEST(zero_high_d) {
|
||||
SETUP_WITH_FEATURES(CPUFeatures::kSVE,
|
||||
CPUFeatures::kNEON,
|
||||
CPUFeatures::kFP,
|
||||
CPUFeatures::kRDM);
|
||||
SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kNEON, CPUFeatures::kFP);
|
||||
START();
|
||||
|
||||
__ Mov(x0, 0x55aa42ffaa42ff55);
|
||||
@@ -11844,7 +11851,7 @@ TEST(zero_high_d) {
|
||||
__ Ror(x0, x0, 8);
|
||||
|
||||
{
|
||||
ExactAssemblyScope scope(&masm, 291 * kInstructionSize);
|
||||
ExactAssemblyScope scope(&masm, 285 * kInstructionSize);
|
||||
__ movi(q9.V16B(), 0x55);
|
||||
__ dci(0x5ee0b809); // abs d9, d0
|
||||
__ orr(q30.V16B(), q30.V16B(), q9.V16B());
|
||||
@@ -12113,14 +12120,6 @@ TEST(zero_high_d) {
|
||||
__ dci(0x7ee07809); // sqneg d9, d0
|
||||
__ orr(q30.V16B(), q30.V16B(), q9.V16B());
|
||||
|
||||
__ movi(q9.V16B(), 0x55);
|
||||
__ dci(0x7ec08429); // sqrdmlah d9, d1, d0
|
||||
__ orr(q30.V16B(), q30.V16B(), q9.V16B());
|
||||
|
||||
__ movi(q9.V16B(), 0x55);
|
||||
__ dci(0x7ec08c29); // sqrdmlsh d9, d1, d0
|
||||
__ orr(q30.V16B(), q30.V16B(), q9.V16B());
|
||||
|
||||
__ movi(q9.V16B(), 0x55);
|
||||
__ dci(0x5ee05c29); // sqrshl d9, d1, d0
|
||||
__ orr(q30.V16B(), q30.V16B(), q9.V16B());
|
||||
|
||||
@@ -19729,6 +19729,709 @@ TEST_SVE(sudot_usdot) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST_SVE(neon_ins_zero_high_regression_test) {
|
||||
SVE_SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kSVE);
|
||||
|
||||
START();
|
||||
__ Movi(v0.V2D(), 0x0f0e0d0c0b0a0908, 0x0706050403020100);
|
||||
|
||||
// Check that both forms of ins zero bits <VL-1:128>
|
||||
__ Index(z1.VnB(), 0, 1);
|
||||
__ Ins(v1.V16B(), 0, wzr);
|
||||
__ Index(z2.VnB(), 0, 1);
|
||||
__ Ins(v2.V16B(), 3, v2.V16B(), 3);
|
||||
END();
|
||||
|
||||
if (CAN_RUN()) {
|
||||
RUN();
|
||||
ASSERT_EQUAL_SVE(z0, z1);
|
||||
ASSERT_EQUAL_SVE(z0, z2);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_SVE(neon_fcvt_zero_high_regression_test) {
|
||||
SVE_SETUP_WITH_FEATURES(CPUFeatures::kFP,
|
||||
CPUFeatures::kNEON,
|
||||
CPUFeatures::kSVE);
|
||||
|
||||
START();
|
||||
__ Mov(z1.VnD(), 0);
|
||||
__ Mov(z2.VnD(), 0);
|
||||
__ Mov(z3.VnD(), 0);
|
||||
__ Mov(z4.VnD(), 0);
|
||||
__ Mov(z5.VnD(), 0);
|
||||
__ Mov(z6.VnD(), 0);
|
||||
__ Mov(z10.VnD(), 0);
|
||||
|
||||
Label done;
|
||||
// Skip calculations for VL128.
|
||||
__ Rdvl(x0, 1);
|
||||
__ Cmp(x0, 16);
|
||||
__ B(eq, &done);
|
||||
|
||||
__ Movi(v0.V2D(), 0x3ff000003f800000);
|
||||
__ Index(z1.VnB(), 0, 1);
|
||||
__ Index(z2.VnB(), 0, 1);
|
||||
__ Index(z3.VnB(), 0, 1);
|
||||
__ Index(z4.VnB(), 0, 1);
|
||||
__ Index(z5.VnB(), 0, 1);
|
||||
__ Index(z6.VnB(), 0, 1);
|
||||
|
||||
// Test zeroing bits <VL-1:128> for fcvtl, fcvtn and fcvtxn.
|
||||
__ Fcvtl(v1.V2D(), v0.V2S());
|
||||
__ Fcvtl2(v2.V2D(), v0.V4S());
|
||||
|
||||
__ Fcvtn(v3.V2S(), v0.V2D());
|
||||
__ Fcvtn2(v4.V4S(), v0.V2D());
|
||||
|
||||
__ Fcvtxn(v5.V2S(), v0.V2D());
|
||||
__ Fcvtxn2(v6.V4S(), v0.V2D());
|
||||
|
||||
// Set the expected non-zero bits to zero.
|
||||
__ Ext(z1.VnB(), z1.VnB(), z10.VnB(), kDRegSizeInBytes * 2);
|
||||
__ Ext(z2.VnB(), z2.VnB(), z10.VnB(), kDRegSizeInBytes * 2);
|
||||
__ Ext(z3.VnB(), z3.VnB(), z10.VnB(), kSRegSizeInBytes * 2);
|
||||
__ Ext(z4.VnB(), z4.VnB(), z10.VnB(), kSRegSizeInBytes * 4);
|
||||
__ Ext(z5.VnB(), z5.VnB(), z10.VnB(), kSRegSizeInBytes * 2);
|
||||
__ Ext(z6.VnB(), z6.VnB(), z10.VnB(), kSRegSizeInBytes * 4);
|
||||
|
||||
__ Bind(&done);
|
||||
END();
|
||||
|
||||
if (CAN_RUN()) {
|
||||
RUN();
|
||||
ASSERT_EQUAL_SVE(z10, z1);
|
||||
ASSERT_EQUAL_SVE(z10, z2);
|
||||
ASSERT_EQUAL_SVE(z10, z3);
|
||||
ASSERT_EQUAL_SVE(z10, z4);
|
||||
ASSERT_EQUAL_SVE(z10, z5);
|
||||
ASSERT_EQUAL_SVE(z10, z6);
|
||||
}
|
||||
}
|
||||
|
||||
#define TEST_ZEROING(INST) \
|
||||
__ Index(z0.VnB(), 0, 1); \
|
||||
__ INST; \
|
||||
__ Orr(z10.VnB(), z10.VnB(), z0.VnB());
|
||||
|
||||
TEST_SVE(neon_zero_high) {
|
||||
SVE_SETUP_WITH_FEATURES(CPUFeatures::kFP,
|
||||
CPUFeatures::kNEON,
|
||||
CPUFeatures::kNEONHalf,
|
||||
CPUFeatures::kSVE,
|
||||
CPUFeatures::kFcma,
|
||||
CPUFeatures::kFHM,
|
||||
CPUFeatures::kFrintToFixedSizedInt,
|
||||
CPUFeatures::kDotProduct,
|
||||
CPUFeatures::kRDM,
|
||||
CPUFeatures::kI8MM);
|
||||
|
||||
START();
|
||||
__ Mov(z10.VnD(), 0); // Initialise cumulative result register.
|
||||
|
||||
TEST_ZEROING(Abs(v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Abs(v0.V2S(), v0.V2S()));
|
||||
TEST_ZEROING(Add(v0.V16B(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Add(v0.V2S(), v0.V2S(), v0.V2S()));
|
||||
TEST_ZEROING(Addhn2(v0.V16B(), v0.V8H(), v0.V8H()));
|
||||
TEST_ZEROING(Addhn(v0.V4H(), v0.V4S(), v0.V4S()));
|
||||
TEST_ZEROING(Addp(v0.V16B(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Addp(v0.V2S(), v0.V2S(), v0.V2S()));
|
||||
TEST_ZEROING(And(v0.V16B(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Bic(v0.V8H(), 0, 0));
|
||||
TEST_ZEROING(Bic(v0.V2S(), 255, 0));
|
||||
TEST_ZEROING(Bic(v0.V16B(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Bif(v0.V16B(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Bit(v0.V16B(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Bsl(v0.V16B(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Cls(v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Cls(v0.V2S(), v0.V2S()));
|
||||
TEST_ZEROING(Clz(v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Clz(v0.V2S(), v0.V2S()));
|
||||
TEST_ZEROING(Cmeq(v0.V16B(), v0.V16B(), 0));
|
||||
TEST_ZEROING(Cmeq(v0.V2S(), v0.V2S(), 0));
|
||||
TEST_ZEROING(Cmeq(v0.V16B(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Cmeq(v0.V2S(), v0.V2S(), v0.V2S()));
|
||||
TEST_ZEROING(Cmge(v0.V16B(), v0.V16B(), 0));
|
||||
TEST_ZEROING(Cmge(v0.V2S(), v0.V2S(), 0));
|
||||
TEST_ZEROING(Cmge(v0.V16B(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Cmge(v0.V2S(), v0.V2S(), v0.V2S()));
|
||||
TEST_ZEROING(Cmgt(v0.V16B(), v0.V16B(), 0));
|
||||
TEST_ZEROING(Cmgt(v0.V2S(), v0.V2S(), 0));
|
||||
TEST_ZEROING(Cmgt(v0.V16B(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Cmgt(v0.V2S(), v0.V2S(), v0.V2S()));
|
||||
TEST_ZEROING(Cmhi(v0.V16B(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Cmhi(v0.V2S(), v0.V2S(), v0.V2S()));
|
||||
TEST_ZEROING(Cmhs(v0.V16B(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Cmhs(v0.V2S(), v0.V2S(), v0.V2S()));
|
||||
TEST_ZEROING(Cmle(v0.V16B(), v0.V16B(), 0));
|
||||
TEST_ZEROING(Cmle(v0.V2S(), v0.V2S(), 0));
|
||||
TEST_ZEROING(Cmlt(v0.V16B(), v0.V16B(), 0));
|
||||
TEST_ZEROING(Cmlt(v0.V2S(), v0.V2S(), 0));
|
||||
TEST_ZEROING(Cmtst(v0.V16B(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Cmtst(v0.V2S(), v0.V2S(), v0.V2S()));
|
||||
TEST_ZEROING(Cnt(v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Dup(v0.V2S(), w0));
|
||||
TEST_ZEROING(Dup(v0.V8B(), w0));
|
||||
TEST_ZEROING(Dup(v0.V2S(), v0.S(), 0));
|
||||
TEST_ZEROING(Dup(v0.V8B(), v0.B(), 0));
|
||||
TEST_ZEROING(Eor(v0.V16B(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Ext(v0.V16B(), v0.V16B(), v0.V16B(), 0));
|
||||
TEST_ZEROING(Ext(v0.V8B(), v0.V8B(), v0.V8B(), 4));
|
||||
TEST_ZEROING(Fabd(v0.V4S(), v0.V4S(), v0.V4S()));
|
||||
TEST_ZEROING(Fabd(v0.V8H(), v0.V8H(), v0.V8H()));
|
||||
TEST_ZEROING(Fabs(v0.V4S(), v0.V4S()));
|
||||
TEST_ZEROING(Fabs(v0.V8H(), v0.V8H()));
|
||||
TEST_ZEROING(Facge(v0.V2S(), v0.V2S(), v0.V2S()));
|
||||
TEST_ZEROING(Facge(v0.V8H(), v0.V8H(), v0.V8H()));
|
||||
TEST_ZEROING(Facgt(v0.V2S(), v0.V2S(), v0.V2S()));
|
||||
TEST_ZEROING(Facgt(v0.V8H(), v0.V8H(), v0.V8H()));
|
||||
TEST_ZEROING(Fadd(v0.V2S(), v0.V2S(), v0.V2S()));
|
||||
TEST_ZEROING(Fadd(v0.V8H(), v0.V8H(), v0.V8H()));
|
||||
TEST_ZEROING(Faddp(v0.V2S(), v0.V2S(), v0.V2S()));
|
||||
TEST_ZEROING(Faddp(v0.V8H(), v0.V8H(), v0.V8H()));
|
||||
TEST_ZEROING(Fcadd(v0.V2S(), v0.V2S(), v0.V2S(), 90));
|
||||
TEST_ZEROING(Fcadd(v0.V8H(), v0.V8H(), v0.V8H(), 90));
|
||||
TEST_ZEROING(Fcmeq(v0.V2S(), v0.V2S(), 0));
|
||||
TEST_ZEROING(Fcmeq(v0.V8H(), v0.V8H(), 0));
|
||||
TEST_ZEROING(Fcmeq(v0.V2S(), v0.V2S(), v0.V2S()));
|
||||
TEST_ZEROING(Fcmeq(v0.V8H(), v0.V8H(), v0.V8H()));
|
||||
TEST_ZEROING(Fcmge(v0.V2S(), v0.V2S(), 0));
|
||||
TEST_ZEROING(Fcmge(v0.V8H(), v0.V8H(), 0));
|
||||
TEST_ZEROING(Fcmge(v0.V2S(), v0.V2S(), v0.V2S()));
|
||||
TEST_ZEROING(Fcmge(v0.V8H(), v0.V8H(), v0.V8H()));
|
||||
TEST_ZEROING(Fcmgt(v0.V2S(), v0.V2S(), 0));
|
||||
TEST_ZEROING(Fcmgt(v0.V8H(), v0.V8H(), 0));
|
||||
TEST_ZEROING(Fcmgt(v0.V2S(), v0.V2S(), v0.V2S()));
|
||||
TEST_ZEROING(Fcmgt(v0.V8H(), v0.V8H(), v0.V8H()));
|
||||
TEST_ZEROING(Fcmla(v0.V4H(), v0.V4H(), v0.H(), 0, 0));
|
||||
TEST_ZEROING(Fcmla(v0.V4S(), v0.V4S(), v0.S(), 0, 0));
|
||||
TEST_ZEROING(Fcmla(v0.V4S(), v0.V4S(), v0.V4S(), 0));
|
||||
TEST_ZEROING(Fcmla(v0.V4H(), v0.V4H(), v0.V4H(), 0));
|
||||
TEST_ZEROING(Fcmle(v0.V2S(), v0.V2S(), 0));
|
||||
TEST_ZEROING(Fcmle(v0.V8H(), v0.V8H(), 0));
|
||||
TEST_ZEROING(Fcmlt(v0.V2S(), v0.V2S(), 0));
|
||||
TEST_ZEROING(Fcmlt(v0.V8H(), v0.V8H(), 0));
|
||||
TEST_ZEROING(Fcvtas(v0.V2S(), v0.V2S()));
|
||||
TEST_ZEROING(Fcvtas(v0.V8H(), v0.V8H()));
|
||||
TEST_ZEROING(Fcvtau(v0.V2S(), v0.V2S()));
|
||||
TEST_ZEROING(Fcvtau(v0.V8H(), v0.V8H()));
|
||||
TEST_ZEROING(Fcvtl2(v0.V4S(), v0.V8H()));
|
||||
TEST_ZEROING(Fcvtl(v0.V2D(), v0.V2S()));
|
||||
TEST_ZEROING(Fcvtms(v0.V2S(), v0.V2S()));
|
||||
TEST_ZEROING(Fcvtms(v0.V8H(), v0.V8H()));
|
||||
TEST_ZEROING(Fcvtmu(v0.V2S(), v0.V2S()));
|
||||
TEST_ZEROING(Fcvtmu(v0.V8H(), v0.V8H()));
|
||||
TEST_ZEROING(Fcvtn2(v0.V8H(), v0.V4S()));
|
||||
TEST_ZEROING(Fcvtn(v0.V2S(), v0.V2D()));
|
||||
TEST_ZEROING(Fcvtns(v0.V2S(), v0.V2S()));
|
||||
TEST_ZEROING(Fcvtns(v0.V8H(), v0.V8H()));
|
||||
TEST_ZEROING(Fcvtnu(v0.V2S(), v0.V2S()));
|
||||
TEST_ZEROING(Fcvtnu(v0.V8H(), v0.V8H()));
|
||||
TEST_ZEROING(Fcvtps(v0.V2S(), v0.V2S()));
|
||||
TEST_ZEROING(Fcvtps(v0.V8H(), v0.V8H()));
|
||||
TEST_ZEROING(Fcvtpu(v0.V2S(), v0.V2S()));
|
||||
TEST_ZEROING(Fcvtpu(v0.V8H(), v0.V8H()));
|
||||
TEST_ZEROING(Fcvtxn(v0.V2S(), v0.V2D()));
|
||||
TEST_ZEROING(Fcvtxn2(v0.V4S(), v0.V2D()));
|
||||
TEST_ZEROING(Fcvtzs(v0.V2S(), v0.V2S()));
|
||||
TEST_ZEROING(Fcvtzs(v0.V8H(), v0.V8H()));
|
||||
TEST_ZEROING(Fcvtzs(v0.V2D(), v0.V2D(), 8));
|
||||
TEST_ZEROING(Fcvtzu(v0.V2S(), v0.V2S()));
|
||||
TEST_ZEROING(Fcvtzu(v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Fcvtzu(v0.V2D(), v0.V2D(), 8));
|
||||
TEST_ZEROING(Fdiv(v0.V2S(), v0.V2S(), v0.V2S()));
|
||||
TEST_ZEROING(Fdiv(v0.V8H(), v0.V8H(), v0.V8H()));
|
||||
TEST_ZEROING(Fmax(v0.V2S(), v0.V2S(), v0.V2S()));
|
||||
TEST_ZEROING(Fmax(v0.V8H(), v0.V8H(), v0.V8H()));
|
||||
TEST_ZEROING(Fmaxnm(v0.V2S(), v0.V2S(), v0.V2S()));
|
||||
TEST_ZEROING(Fmaxnm(v0.V8H(), v0.V8H(), v0.V8H()));
|
||||
TEST_ZEROING(Fmaxnmp(v0.V2S(), v0.V2S(), v0.V2S()));
|
||||
TEST_ZEROING(Fmaxnmp(v0.V8H(), v0.V8H(), v0.V8H()));
|
||||
TEST_ZEROING(Fmaxp(v0.V2S(), v0.V2S(), v0.V2S()));
|
||||
TEST_ZEROING(Fmaxp(v0.V8H(), v0.V8H(), v0.V8H()));
|
||||
TEST_ZEROING(Fmin(v0.V2S(), v0.V2S(), v0.V2S()));
|
||||
TEST_ZEROING(Fmin(v0.V8H(), v0.V8H(), v0.V8H()));
|
||||
TEST_ZEROING(Fminnm(v0.V2S(), v0.V2S(), v0.V2S()));
|
||||
TEST_ZEROING(Fminnm(v0.V8H(), v0.V8H(), v0.V8H()));
|
||||
TEST_ZEROING(Fminnmp(v0.V2S(), v0.V2S(), v0.V2S()));
|
||||
TEST_ZEROING(Fminnmp(v0.V8H(), v0.V8H(), v0.V8H()));
|
||||
TEST_ZEROING(Fminp(v0.V2S(), v0.V2S(), v0.V2S()));
|
||||
TEST_ZEROING(Fminp(v0.V8H(), v0.V8H(), v0.V8H()));
|
||||
TEST_ZEROING(Fmla(v0.V4S(), v0.V4S(), v0.S(), 0));
|
||||
TEST_ZEROING(Fmla(v0.V4H(), v0.V4H(), v0.H(), 2));
|
||||
TEST_ZEROING(Fmla(v0.V4S(), v0.V4S(), v0.V4S()));
|
||||
TEST_ZEROING(Fmla(v0.V4H(), v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Fmlal2(v0.V4S(), v0.V4H(), v0.H(), 0));
|
||||
TEST_ZEROING(Fmlal2(v0.V2S(), v0.V2H(), v0.H(), 2));
|
||||
TEST_ZEROING(Fmlal2(v0.V4S(), v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Fmlal(v0.V4S(), v0.V4H(), v0.H(), 0));
|
||||
TEST_ZEROING(Fmlal(v0.V2S(), v0.V2H(), v0.H(), 2));
|
||||
TEST_ZEROING(Fmlal(v0.V4S(), v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Fmls(v0.V4S(), v0.V4S(), v0.S(), 0));
|
||||
TEST_ZEROING(Fmls(v0.V4H(), v0.V4H(), v0.H(), 2));
|
||||
TEST_ZEROING(Fmls(v0.V4S(), v0.V4S(), v0.V4S()));
|
||||
TEST_ZEROING(Fmls(v0.V4H(), v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Fmlsl2(v0.V4S(), v0.V4H(), v0.H(), 0));
|
||||
TEST_ZEROING(Fmlsl2(v0.V2S(), v0.V2H(), v0.H(), 2));
|
||||
TEST_ZEROING(Fmlsl2(v0.V4S(), v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Fmlsl(v0.V4S(), v0.V4H(), v0.H(), 0));
|
||||
TEST_ZEROING(Fmlsl(v0.V2S(), v0.V2H(), v0.H(), 2));
|
||||
TEST_ZEROING(Fmlsl(v0.V4S(), v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Fmov(v0.V2D(), 2.0000));
|
||||
TEST_ZEROING(Fmov(v0.V4H(), 2.0000));
|
||||
TEST_ZEROING(Fmov(v0.D(), 1, x1));
|
||||
TEST_ZEROING(Fmul(v0.V4S(), v0.V4S(), v0.S(), 0));
|
||||
TEST_ZEROING(Fmul(v0.V4H(), v0.V4H(), v0.H(), 2));
|
||||
TEST_ZEROING(Fmul(v0.V4S(), v0.V4S(), v0.V4S()));
|
||||
TEST_ZEROING(Fmul(v0.V4H(), v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Fmulx(v0.V4S(), v0.V4S(), v0.S(), 0));
|
||||
TEST_ZEROING(Fmulx(v0.V4H(), v0.V4H(), v0.H(), 2));
|
||||
TEST_ZEROING(Fmulx(v0.V4S(), v0.V4S(), v0.V4S()));
|
||||
TEST_ZEROING(Fmulx(v0.V4H(), v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Fneg(v0.V4S(), v0.V4S()));
|
||||
TEST_ZEROING(Fneg(v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Frecpe(v0.V4S(), v0.V4S()));
|
||||
TEST_ZEROING(Frecpe(v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Frecps(v0.V4S(), v0.V4S(), v0.V4S()));
|
||||
TEST_ZEROING(Frecps(v0.V4H(), v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Frint32x(v0.V4S(), v0.V4S()));
|
||||
TEST_ZEROING(Frint32z(v0.V4S(), v0.V4S()));
|
||||
TEST_ZEROING(Frint64x(v0.V4S(), v0.V4S()));
|
||||
TEST_ZEROING(Frint64z(v0.V4S(), v0.V4S()));
|
||||
TEST_ZEROING(Frinta(v0.V4S(), v0.V4S()));
|
||||
TEST_ZEROING(Frinta(v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Frinti(v0.V4S(), v0.V4S()));
|
||||
TEST_ZEROING(Frinti(v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Frintm(v0.V4S(), v0.V4S()));
|
||||
TEST_ZEROING(Frintm(v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Frintn(v0.V4S(), v0.V4S()));
|
||||
TEST_ZEROING(Frintn(v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Frintp(v0.V4S(), v0.V4S()));
|
||||
TEST_ZEROING(Frintp(v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Frintx(v0.V4S(), v0.V4S()));
|
||||
TEST_ZEROING(Frintx(v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Frintz(v0.V4S(), v0.V4S()));
|
||||
TEST_ZEROING(Frintz(v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Frsqrte(v0.V4S(), v0.V4S()));
|
||||
TEST_ZEROING(Frsqrte(v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Frsqrts(v0.V4S(), v0.V4S(), v0.V4S()));
|
||||
TEST_ZEROING(Frsqrts(v0.V4H(), v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Fsqrt(v0.V4S(), v0.V4S()));
|
||||
TEST_ZEROING(Fsqrt(v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Fsub(v0.V4S(), v0.V4S(), v0.V4S()));
|
||||
TEST_ZEROING(Fsub(v0.V4H(), v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Mov(v0.D(), 0, x0));
|
||||
TEST_ZEROING(Mov(v0.S(), 0, w0));
|
||||
TEST_ZEROING(Mov(v0.H(), 0, w0));
|
||||
TEST_ZEROING(Mov(v0.B(), 0, w0));
|
||||
TEST_ZEROING(Mov(v0.D(), 0, v0.D(), 0));
|
||||
TEST_ZEROING(Mov(v0.S(), 0, v0.S(), 0));
|
||||
TEST_ZEROING(Mov(v0.H(), 0, v0.H(), 0));
|
||||
TEST_ZEROING(Mov(v0.B(), 0, v0.B(), 0));
|
||||
TEST_ZEROING(Mla(v0.V4S(), v0.V4S(), v0.S(), 0));
|
||||
TEST_ZEROING(Mla(v0.V4H(), v0.V4H(), v0.H(), 0));
|
||||
TEST_ZEROING(Mla(v0.V4S(), v0.V4S(), v0.V4S()));
|
||||
TEST_ZEROING(Mla(v0.V4H(), v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Mls(v0.V4S(), v0.V4S(), v0.S(), 0));
|
||||
TEST_ZEROING(Mls(v0.V4H(), v0.V4H(), v0.H(), 0));
|
||||
TEST_ZEROING(Mls(v0.V4S(), v0.V4S(), v0.V4S()));
|
||||
TEST_ZEROING(Mls(v0.V4H(), v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Movi(v0.V2D(), 0xff));
|
||||
TEST_ZEROING(Movi(v0.V2S(), 0xff));
|
||||
TEST_ZEROING(Movi(v0.V4S(), 0x10, LSL, 8));
|
||||
TEST_ZEROING(Movi(v0.V2S(), 0x10, LSL, 8));
|
||||
TEST_ZEROING(Mul(v0.V4S(), v0.V4S(), v0.S(), 0));
|
||||
TEST_ZEROING(Mul(v0.V4H(), v0.V4H(), v0.H(), 0));
|
||||
TEST_ZEROING(Mul(v0.V4S(), v0.V4S(), v0.V4S()));
|
||||
TEST_ZEROING(Mul(v0.V4H(), v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Mvni(v0.V4H(), 0x10, LSL, 8));
|
||||
TEST_ZEROING(Mvni(v0.V4H(), 0x10, LSL, 8));
|
||||
TEST_ZEROING(Neg(v0.V4S(), v0.V4S()));
|
||||
TEST_ZEROING(Neg(v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Mvn(v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Mvn(v0.V8B(), v0.V8B()));
|
||||
TEST_ZEROING(Orn(v0.V8B(), v0.V8B(), v0.V8B()));
|
||||
TEST_ZEROING(Orn(v0.V16B(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Orr(v0.V8H(), 0x10, 8));
|
||||
TEST_ZEROING(Orr(v0.V4H(), 0x10, 8));
|
||||
TEST_ZEROING(Mov(v0.V8B(), v0.V8B()));
|
||||
TEST_ZEROING(Mov(v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Pmul(v0.V16B(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Pmull(v0.V8H(), v0.V8B(), v0.V8B()));
|
||||
TEST_ZEROING(Pmull2(v0.V8H(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Raddhn2(v0.V16B(), v0.V8H(), v0.V8H()));
|
||||
TEST_ZEROING(Raddhn(v0.V4H(), v0.V4S(), v0.V4S()));
|
||||
TEST_ZEROING(Rbit(v0.V8B(), v0.V8B()));
|
||||
TEST_ZEROING(Rbit(v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Rsubhn2(v0.V16B(), v0.V8H(), v0.V8H()));
|
||||
TEST_ZEROING(Rsubhn(v0.V4H(), v0.V4S(), v0.V4S()));
|
||||
TEST_ZEROING(Saba(v0.V16B(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Saba(v0.V2S(), v0.V2S(), v0.V2S()));
|
||||
TEST_ZEROING(Saba(v0.V4H(), v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Sabal2(v0.V8H(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Sabal(v0.V4S(), v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Sabd(v0.V16B(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Sabd(v0.V2S(), v0.V2S(), v0.V2S()));
|
||||
TEST_ZEROING(Sabd(v0.V4H(), v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Sabdl2(v0.V8H(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Sabdl(v0.V4S(), v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Sadalp(v0.V8H(), v0.V16B()));
|
||||
TEST_ZEROING(Saddl2(v0.V8H(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Saddl(v0.V2D(), v0.V2S(), v0.V2S()));
|
||||
TEST_ZEROING(Saddl(v0.V4S(), v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Saddw2(v0.V8H(), v0.V8H(), v0.V16B()));
|
||||
TEST_ZEROING(Saddw(v0.V4S(), v0.V4S(), v0.V4H()));
|
||||
TEST_ZEROING(Scvtf(v0.V4S(), v0.V4S()));
|
||||
TEST_ZEROING(Scvtf(v0.V8H(), v0.V8H()));
|
||||
TEST_ZEROING(Scvtf(v0.V2D(), v0.V2D(), 8));
|
||||
TEST_ZEROING(Sdot(v0.V4S(), v0.V16B(), v0.S4B(), 0));
|
||||
TEST_ZEROING(Sdot(v0.V2S(), v0.V8B(), v0.S4B(), 0));
|
||||
TEST_ZEROING(Sdot(v0.V4S(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Sdot(v0.V2S(), v0.V8B(), v0.V8B()));
|
||||
TEST_ZEROING(Shadd(v0.V16B(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Shadd(v0.V4H(), v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Shl(v0.V2D(), v0.V2D(), 56));
|
||||
TEST_ZEROING(Shll2(v0.V8H(), v0.V16B(), 8));
|
||||
TEST_ZEROING(Shll(v0.V2D(), v0.V2S(), 32));
|
||||
TEST_ZEROING(Shsub(v0.V16B(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Shsub(v0.V4H(), v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Sli(v0.V2D(), v0.V2D(), 56));
|
||||
TEST_ZEROING(Sli(v0.V2S(), v0.V2S(), 16));
|
||||
TEST_ZEROING(Smax(v0.V16B(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Smax(v0.V4H(), v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Smaxp(v0.V16B(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Smaxp(v0.V4H(), v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Smin(v0.V16B(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Smin(v0.V4H(), v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Sminp(v0.V16B(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Sminp(v0.V4H(), v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Smlal2(v0.V8H(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Smlal(v0.V2D(), v0.V2S(), v0.V2S()));
|
||||
TEST_ZEROING(Smlal(v0.V2D(), v0.V2S(), v0.S(), 0));
|
||||
TEST_ZEROING(Smlsl2(v0.V8H(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Smlsl(v0.V2D(), v0.V2S(), v0.V2S()));
|
||||
TEST_ZEROING(Smlsl(v0.V2D(), v0.V2S(), v0.S(), 0));
|
||||
TEST_ZEROING(Smull2(v0.V8H(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Smull(v0.V2D(), v0.V2S(), v0.V2S()));
|
||||
TEST_ZEROING(Smull(v0.V2D(), v0.V2S(), v0.S(), 0));
|
||||
TEST_ZEROING(Sqabs(v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Sqabs(v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Sqadd(v0.V16B(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Sqadd(v0.V4H(), v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Sqdmlal2(v0.V4S(), v0.V8H(), v0.V8H()));
|
||||
TEST_ZEROING(Sqdmlal(v0.V2D(), v0.V2S(), v0.V2S()));
|
||||
TEST_ZEROING(Sqdmlal(v0.V2D(), v0.V2S(), v0.S(), 0));
|
||||
TEST_ZEROING(Sqdmlsl2(v0.V4S(), v0.V8H(), v0.V8H()));
|
||||
TEST_ZEROING(Sqdmlsl(v0.V2D(), v0.V2S(), v0.V2S()));
|
||||
TEST_ZEROING(Sqdmlsl(v0.V2D(), v0.V2S(), v0.S(), 0));
|
||||
TEST_ZEROING(Sqdmulh(v0.V4S(), v0.V4S(), v0.S(), 0));
|
||||
TEST_ZEROING(Sqdmulh(v0.V4H(), v0.V4H(), v0.H(), 0));
|
||||
TEST_ZEROING(Sqdmulh(v0.V4S(), v0.V4S(), v0.V4S()));
|
||||
TEST_ZEROING(Sqdmulh(v0.V4H(), v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Sqdmull2(v0.V2D(), v0.V4S(), v0.V4S()));
|
||||
TEST_ZEROING(Sqdmull(v0.V4S(), v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Sqdmull2(v0.V2D(), v0.V4S(), v0.S(), 0));
|
||||
TEST_ZEROING(Sqdmull(v0.V4S(), v0.V4H(), v0.H(), 0));
|
||||
TEST_ZEROING(Sqneg(v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Sqneg(v0.V2S(), v0.V2S()));
|
||||
TEST_ZEROING(Sqrdmlah(v0.V4S(), v0.V4S(), v0.S(), 0));
|
||||
TEST_ZEROING(Sqrdmlah(v0.V4H(), v0.V4H(), v0.H(), 0));
|
||||
TEST_ZEROING(Sqrdmlah(v0.V4S(), v0.V4S(), v0.V4S()));
|
||||
TEST_ZEROING(Sqrdmlah(v0.V4H(), v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Sqrdmlsh(v0.V4S(), v0.V4S(), v0.S(), 0));
|
||||
TEST_ZEROING(Sqrdmlsh(v0.V4H(), v0.V4H(), v0.H(), 0));
|
||||
TEST_ZEROING(Sqrdmlsh(v0.V4S(), v0.V4S(), v0.V4S()));
|
||||
TEST_ZEROING(Sqrdmlsh(v0.V4H(), v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Sqrdmulh(v0.V4S(), v0.V4S(), v0.S(), 0));
|
||||
TEST_ZEROING(Sqrdmulh(v0.V4H(), v0.V4H(), v0.H(), 0));
|
||||
TEST_ZEROING(Sqrdmulh(v0.V4S(), v0.V4S(), v0.V4S()));
|
||||
TEST_ZEROING(Sqrdmulh(v0.V4H(), v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Sqrshl(v0.V16B(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Sqrshl(v0.V4H(), v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Sqshl(v0.V16B(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Sqshl(v0.V4H(), v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Sqshl(v0.V2D(), v0.V2D(), 56));
|
||||
TEST_ZEROING(Sqshl(v0.V2S(), v0.V2S(), 16));
|
||||
TEST_ZEROING(Sqshlu(v0.V2D(), v0.V2D(), 56));
|
||||
TEST_ZEROING(Sqshlu(v0.V2S(), v0.V2S(), 16));
|
||||
TEST_ZEROING(Sqsub(v0.V16B(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Sqsub(v0.V4H(), v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Sqxtn2(v0.V16B(), v0.V8H()));
|
||||
TEST_ZEROING(Sqxtn(v0.V2S(), v0.V2D()));
|
||||
TEST_ZEROING(Sqxtun2(v0.V16B(), v0.V8H()));
|
||||
TEST_ZEROING(Sqxtun(v0.V2S(), v0.V2D()));
|
||||
TEST_ZEROING(Srhadd(v0.V16B(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Srhadd(v0.V4H(), v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Sri(v0.V2D(), v0.V2D(), 8));
|
||||
TEST_ZEROING(Sri(v0.V2S(), v0.V2S(), 8));
|
||||
TEST_ZEROING(Srshl(v0.V16B(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Srshl(v0.V4H(), v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Srshr(v0.V2D(), v0.V2D(), 8));
|
||||
TEST_ZEROING(Srshr(v0.V2S(), v0.V2S(), 8));
|
||||
TEST_ZEROING(Srsra(v0.V2D(), v0.V2D(), 8));
|
||||
TEST_ZEROING(Srsra(v0.V2S(), v0.V2S(), 8));
|
||||
TEST_ZEROING(Sshl(v0.V16B(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Sshl(v0.V4H(), v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Sshr(v0.V2D(), v0.V2D(), 8));
|
||||
TEST_ZEROING(Sshr(v0.V2S(), v0.V2S(), 8));
|
||||
TEST_ZEROING(Ssra(v0.V2D(), v0.V2D(), 8));
|
||||
TEST_ZEROING(Ssra(v0.V2S(), v0.V2S(), 8));
|
||||
TEST_ZEROING(Ssubl2(v0.V8H(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Ssubl(v0.V4S(), v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Ssubw2(v0.V8H(), v0.V8H(), v0.V16B()));
|
||||
TEST_ZEROING(Ssubw(v0.V4S(), v0.V4S(), v0.V4H()));
|
||||
TEST_ZEROING(Sub(v0.V16B(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Sub(v0.V4H(), v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Subhn2(v0.V16B(), v0.V8H(), v0.V8H()));
|
||||
TEST_ZEROING(Subhn(v0.V4H(), v0.V4S(), v0.V4S()));
|
||||
TEST_ZEROING(Sudot(v0.V4S(), v0.V16B(), v0.S4B(), 0));
|
||||
TEST_ZEROING(Sudot(v0.V2S(), v0.V8B(), v0.S4B(), 2));
|
||||
TEST_ZEROING(Suqadd(v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Suqadd(v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Tbl(v0.V8B(), {v0.V16B()}, v0.V8B()));
|
||||
TEST_ZEROING(Tbl(v0.V16B(), {v0.V16B()}, v0.V16B()));
|
||||
TEST_ZEROING(Tbx(v0.V8B(), {v0.V16B()}, v0.V8B()));
|
||||
TEST_ZEROING(Tbx(v0.V16B(), {v0.V16B()}, v0.V16B()));
|
||||
TEST_ZEROING(Trn1(v0.V16B(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Trn1(v0.V4H(), v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Trn2(v0.V16B(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Trn2(v0.V4H(), v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Uaba(v0.V16B(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Uaba(v0.V4H(), v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Uabal2(v0.V8H(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Uabal(v0.V4S(), v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Uabd(v0.V16B(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Uabd(v0.V4H(), v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Uabdl2(v0.V8H(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Uabdl(v0.V4S(), v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Uadalp(v0.V8H(), v0.V16B()));
|
||||
TEST_ZEROING(Uadalp(v0.V2S(), v0.V4H()));
|
||||
TEST_ZEROING(Uaddl2(v0.V8H(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Uaddl(v0.V4S(), v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Uaddlp(v0.V8H(), v0.V16B()));
|
||||
TEST_ZEROING(Uaddlp(v0.V2S(), v0.V4H()));
|
||||
TEST_ZEROING(Uaddw2(v0.V8H(), v0.V8H(), v0.V16B()));
|
||||
TEST_ZEROING(Uaddw(v0.V4S(), v0.V4S(), v0.V4H()));
|
||||
TEST_ZEROING(Ucvtf(v0.V4S(), v0.V4S()));
|
||||
TEST_ZEROING(Ucvtf(v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Ucvtf(v0.V2D(), v0.V2D(), 8));
|
||||
TEST_ZEROING(Ucvtf(v0.V2S(), v0.V2S(), 8));
|
||||
TEST_ZEROING(Udot(v0.V4S(), v0.V16B(), v0.S4B(), 0));
|
||||
TEST_ZEROING(Udot(v0.V2S(), v0.V8B(), v0.S4B(), 0));
|
||||
TEST_ZEROING(Udot(v0.V2S(), v0.V8B(), v0.V8B()));
|
||||
TEST_ZEROING(Udot(v0.V4S(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Uhadd(v0.V16B(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Uhadd(v0.V4H(), v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Uhsub(v0.V16B(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Uhsub(v0.V2S(), v0.V2S(), v0.V2S()));
|
||||
TEST_ZEROING(Umax(v0.V16B(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Umax(v0.V4H(), v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Umaxp(v0.V16B(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Umaxp(v0.V4H(), v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Umin(v0.V16B(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Umin(v0.V4H(), v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Uminp(v0.V16B(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Uminp(v0.V4H(), v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Umlal2(v0.V8H(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Umlal(v0.V4S(), v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Umlal(v0.V2D(), v0.V2S(), v0.S(), 0));
|
||||
TEST_ZEROING(Umlal(v0.V4S(), v0.V4H(), v0.H(), 0));
|
||||
TEST_ZEROING(Umlsl2(v0.V8H(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Umlsl(v0.V4S(), v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Umlsl(v0.V2D(), v0.V2S(), v0.S(), 0));
|
||||
TEST_ZEROING(Umlsl(v0.V4S(), v0.V4H(), v0.H(), 0));
|
||||
TEST_ZEROING(Umull2(v0.V8H(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Umull(v0.V4S(), v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Umull(v0.V2D(), v0.V2S(), v0.S(), 0));
|
||||
TEST_ZEROING(Umull(v0.V4S(), v0.V4H(), v0.H(), 0));
|
||||
TEST_ZEROING(Uqadd(v0.V16B(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Uqadd(v0.V4H(), v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Uqrshl(v0.V16B(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Uqrshl(v0.V4H(), v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Uqshl(v0.V16B(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Uqshl(v0.V4H(), v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Uqsub(v0.V16B(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Uqsub(v0.V4H(), v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Uqxtn2(v0.V16B(), v0.V8H()));
|
||||
TEST_ZEROING(Uqxtn(v0.V2S(), v0.V2D()));
|
||||
TEST_ZEROING(Urecpe(v0.V2S(), v0.V2S()));
|
||||
TEST_ZEROING(Urecpe(v0.V4S(), v0.V4S()));
|
||||
TEST_ZEROING(Urhadd(v0.V16B(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Urhadd(v0.V4H(), v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Urshl(v0.V16B(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Urshl(v0.V4H(), v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Urshr(v0.V2D(), v0.V2D(), 8));
|
||||
TEST_ZEROING(Urshr(v0.V2S(), v0.V2S(), 8));
|
||||
TEST_ZEROING(Ursqrte(v0.V4S(), v0.V4S()));
|
||||
TEST_ZEROING(Ursqrte(v0.V2S(), v0.V2S()));
|
||||
TEST_ZEROING(Ursra(v0.V2D(), v0.V2D(), 8));
|
||||
TEST_ZEROING(Ursra(v0.V2S(), v0.V2S(), 8));
|
||||
TEST_ZEROING(Usdot(v0.V4S(), v0.V16B(), v0.S4B(), 0));
|
||||
TEST_ZEROING(Usdot(v0.V2S(), v0.V8B(), v0.S4B(), 1));
|
||||
TEST_ZEROING(Usdot(v0.V4S(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Usdot(v0.V2S(), v0.V8B(), v0.V8B()));
|
||||
TEST_ZEROING(Ushl(v0.V16B(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Ushl(v0.V4H(), v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Ushr(v0.V2D(), v0.V2D(), 8));
|
||||
TEST_ZEROING(Ushr(v0.V2S(), v0.V2S(), 8));
|
||||
TEST_ZEROING(Usqadd(v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Usqadd(v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Usra(v0.V2D(), v0.V2D(), 8));
|
||||
TEST_ZEROING(Usra(v0.V2S(), v0.V2S(), 8));
|
||||
TEST_ZEROING(Usubl2(v0.V8H(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Usubl(v0.V4S(), v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Usubw2(v0.V8H(), v0.V8H(), v0.V16B()));
|
||||
TEST_ZEROING(Usubw(v0.V4S(), v0.V4S(), v0.V4H()));
|
||||
TEST_ZEROING(Uzp1(v0.V16B(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Uzp1(v0.V4H(), v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Uzp2(v0.V16B(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Uzp2(v0.V4H(), v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Xtn2(v0.V16B(), v0.V8H()));
|
||||
TEST_ZEROING(Xtn(v0.V4H(), v0.V4S()));
|
||||
TEST_ZEROING(Zip1(v0.V16B(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Zip1(v0.V4H(), v0.V4H(), v0.V4H()));
|
||||
TEST_ZEROING(Zip2(v0.V16B(), v0.V16B(), v0.V16B()));
|
||||
TEST_ZEROING(Zip2(v0.V4H(), v0.V4H(), v0.V4H()));
|
||||
|
||||
__ Mov(z11.VnD(), 0);
|
||||
|
||||
Label done, zero_127_to_0;
|
||||
__ Rdvl(x0, 1);
|
||||
__ Cmp(x0, 16);
|
||||
__ B(gt, &zero_127_to_0);
|
||||
|
||||
// For 128-bit VL, there's nothing to be tested, so zero the whole register.
|
||||
__ Mov(z10.VnD(), 0);
|
||||
__ B(&done);
|
||||
|
||||
// Set the expected non-zero bits to zero.
|
||||
__ Bind(&zero_127_to_0);
|
||||
__ Ext(z10.VnB(), z10.VnB(), z11.VnB(), kDRegSizeInBytes * 2);
|
||||
|
||||
__ Bind(&done);
|
||||
|
||||
END();
|
||||
|
||||
if (CAN_RUN()) {
|
||||
RUN();
|
||||
ASSERT_EQUAL_SVE(z11, z10);
|
||||
}
|
||||
}
|
||||
|
||||
#undef TEST_ZEROING
|
||||
|
||||
#define TEST_ZEROING_1(INST) \
|
||||
__ Index(z0.VnB(), 0, 1); \
|
||||
__ INST; \
|
||||
__ Orr(z10.VnB(), z10.VnB(), z0.VnB());
|
||||
#define TEST_ZEROING_2(INST) \
|
||||
__ Index(z0.VnB(), 0, 1); \
|
||||
__ Index(z1.VnB(), 0, 1); \
|
||||
__ INST; \
|
||||
__ Orr(z10.VnB(), z10.VnB(), z0.VnB()); \
|
||||
__ Orr(z10.VnB(), z10.VnB(), z1.VnB());
|
||||
#define TEST_ZEROING_3(INST) \
|
||||
__ Index(z0.VnB(), 0, 1); \
|
||||
__ Index(z1.VnB(), 0, 1); \
|
||||
__ Index(z2.VnB(), 0, 1); \
|
||||
__ INST; \
|
||||
__ Orr(z10.VnB(), z10.VnB(), z0.VnB()); \
|
||||
__ Orr(z10.VnB(), z10.VnB(), z1.VnB()); \
|
||||
__ Orr(z10.VnB(), z10.VnB(), z2.VnB());
|
||||
#define TEST_ZEROING_4(INST) \
|
||||
__ Index(z0.VnB(), 0, 1); \
|
||||
__ Index(z1.VnB(), 0, 1); \
|
||||
__ Index(z2.VnB(), 0, 1); \
|
||||
__ Index(z3.VnB(), 0, 1); \
|
||||
__ INST; \
|
||||
__ Orr(z10.VnB(), z10.VnB(), z0.VnB()); \
|
||||
__ Orr(z10.VnB(), z10.VnB(), z1.VnB()); \
|
||||
__ Orr(z10.VnB(), z10.VnB(), z2.VnB()); \
|
||||
__ Orr(z10.VnB(), z10.VnB(), z3.VnB());
|
||||
|
||||
TEST_SVE(neon_load_zero_high) {
|
||||
SVE_SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kSVE);
|
||||
|
||||
START();
|
||||
__ Mov(z10.VnD(), 0); // Initialise cumulative result register.
|
||||
|
||||
// Initialise x0 to point to a buffer from which data is loaded. The contents
|
||||
// does not need to be defined.
|
||||
int data_size = 4 * kQRegSizeInBytes;
|
||||
uint8_t* data = new uint8_t[data_size];
|
||||
__ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size]));
|
||||
|
||||
MemOperand mop = MemOperand(x0);
|
||||
TEST_ZEROING_1(Ld1(v0.V16B(), mop));
|
||||
TEST_ZEROING_1(Ld1(v0.V4H(), mop));
|
||||
TEST_ZEROING_1(Ld1(v0.V16B(), v1.V16B(), mop));
|
||||
TEST_ZEROING_1(Ld1(v0.V4H(), v1.V4H(), mop));
|
||||
TEST_ZEROING_1(Ld1(v0.V16B(), v1.V16B(), v2.V16B(), mop));
|
||||
TEST_ZEROING_1(Ld1(v0.V4H(), v1.V4H(), v2.V4H(), mop));
|
||||
TEST_ZEROING_1(Ld1(v0.V16B(), v1.V16B(), v2.V16B(), v3.V16B(), mop));
|
||||
TEST_ZEROING_1(Ld1(v0.V4H(), v1.V4H(), v2.V4H(), v3.V4H(), mop));
|
||||
TEST_ZEROING_1(Ld1(v0.B(), 1, mop));
|
||||
TEST_ZEROING_1(Ld1(v0.D(), 1, mop));
|
||||
TEST_ZEROING_1(Ld1(v0.H(), 1, mop));
|
||||
TEST_ZEROING_1(Ld1(v0.S(), 1, mop));
|
||||
TEST_ZEROING_1(Ld1r(v0.V16B(), mop));
|
||||
TEST_ZEROING_1(Ld1r(v0.V4H(), mop));
|
||||
TEST_ZEROING_2(Ld2(v0.V16B(), v1.V16B(), mop));
|
||||
TEST_ZEROING_2(Ld2(v0.V4H(), v1.V4H(), mop));
|
||||
TEST_ZEROING_2(Ld2(v0.B(), v1.B(), 1, mop));
|
||||
TEST_ZEROING_2(Ld2(v0.D(), v1.D(), 1, mop));
|
||||
TEST_ZEROING_2(Ld2(v0.H(), v1.H(), 1, mop));
|
||||
TEST_ZEROING_2(Ld2(v0.S(), v1.S(), 1, mop));
|
||||
TEST_ZEROING_2(Ld2r(v0.V16B(), v1.V16B(), mop));
|
||||
TEST_ZEROING_2(Ld2r(v0.V4H(), v1.V4H(), mop));
|
||||
TEST_ZEROING_3(Ld3(v0.V16B(), v1.V16B(), v2.V16B(), mop));
|
||||
TEST_ZEROING_3(Ld3(v0.V4H(), v1.V4H(), v2.V4H(), mop));
|
||||
TEST_ZEROING_3(Ld3(v0.B(), v1.B(), v2.B(), 1, mop));
|
||||
TEST_ZEROING_3(Ld3(v0.D(), v1.D(), v2.D(), 1, mop));
|
||||
TEST_ZEROING_3(Ld3(v0.H(), v1.H(), v2.H(), 1, mop));
|
||||
TEST_ZEROING_3(Ld3(v0.S(), v1.S(), v2.S(), 1, mop));
|
||||
TEST_ZEROING_3(Ld3r(v0.V16B(), v1.V16B(), v2.V16B(), mop));
|
||||
TEST_ZEROING_3(Ld3r(v0.V4H(), v1.V4H(), v2.V4H(), mop));
|
||||
TEST_ZEROING_4(Ld4(v0.V16B(), v1.V16B(), v2.V16B(), v3.V16B(), mop));
|
||||
TEST_ZEROING_4(Ld4(v0.V4H(), v1.V4H(), v2.V4H(), v3.V4H(), mop));
|
||||
TEST_ZEROING_4(Ld4(v0.B(), v1.B(), v2.B(), v3.B(), 1, mop));
|
||||
TEST_ZEROING_4(Ld4(v0.D(), v1.D(), v2.D(), v3.D(), 1, mop));
|
||||
TEST_ZEROING_4(Ld4(v0.H(), v1.H(), v2.H(), v3.H(), 1, mop));
|
||||
TEST_ZEROING_4(Ld4(v0.S(), v1.S(), v2.S(), v3.S(), 1, mop));
|
||||
TEST_ZEROING_4(Ld4r(v0.V16B(), v1.V16B(), v2.V16B(), v3.V16B(), mop));
|
||||
TEST_ZEROING_4(Ld4r(v0.V4H(), v1.V4H(), v2.V4H(), v3.V4H(), mop));
|
||||
|
||||
__ Mov(z11.VnD(), 0);
|
||||
|
||||
Label done, zero_127_to_0;
|
||||
__ Rdvl(x0, 1);
|
||||
__ Cmp(x0, 16);
|
||||
__ B(gt, &zero_127_to_0);
|
||||
|
||||
// For 128-bit VL, there's nothing to be tested, so zero the whole register.
|
||||
__ Mov(z10.VnD(), 0);
|
||||
__ B(&done);
|
||||
|
||||
// Set the expected non-zero bits to zero.
|
||||
__ Bind(&zero_127_to_0);
|
||||
__ Ext(z10.VnB(), z10.VnB(), z11.VnB(), kDRegSizeInBytes * 2);
|
||||
|
||||
__ Bind(&done);
|
||||
|
||||
END();
|
||||
|
||||
if (CAN_RUN()) {
|
||||
RUN();
|
||||
ASSERT_EQUAL_SVE(z11, z10);
|
||||
}
|
||||
}
|
||||
|
||||
#undef TEST_ZEROING_1
|
||||
#undef TEST_ZEROING_2
|
||||
#undef TEST_ZEROING_3
|
||||
#undef TEST_ZEROING_4
|
||||
|
||||
TEST_SVE(sve_load_store_sp_base_regression_test) {
|
||||
SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
|
||||
START();
|
||||
@@ -19945,6 +20648,8 @@ TEST_SVE(sve_load_store_sp_base_regression_test) {
|
||||
__ dci(0xe58043e0); // str z0, [sp]
|
||||
}
|
||||
|
||||
__ Drop(128 * 2 * kXRegSizeInBytes);
|
||||
|
||||
END();
|
||||
|
||||
if (CAN_RUN()) {
|
||||
|
||||
@@ -3778,5 +3778,91 @@ TEST_FP_FCMA_NEON_NEONHALF(fcmla_1, fcmla(v0.V8H(), v1.V8H(), v2.H(), 2, 180))
|
||||
TEST_FP_FCMA_NEON_NEONHALF(fcmla_2, fcmla(v0.V4H(), v1.V4H(), v2.V4H(), 180))
|
||||
TEST_FP_FCMA_NEON_NEONHALF(fcmla_3, fcmla(v0.V8H(), v1.V8H(), v2.V8H(), 0))
|
||||
|
||||
#define TEST_FEAT(NAME, ASM) \
|
||||
TEST_TEMPLATE(CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kPmull1Q), \
|
||||
NEON_Pmull1Q_##NAME, \
|
||||
ASM)
|
||||
TEST_FEAT(pmull1q_0, pmull(v5.V1Q(), v6.V1D(), v7.V1D()))
|
||||
#undef TEST_FEAT
|
||||
|
||||
#define TEST_NEON_SHA3(NAME, ASM) \
|
||||
TEST_TEMPLATE(CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kSHA3), \
|
||||
NEON_SHA3_##NAME, \
|
||||
ASM)
|
||||
TEST_NEON_SHA3(bcax_0, bcax(v0.V16B(), v1.V16B(), v2.V16B(), v3.V16B()))
|
||||
TEST_NEON_SHA3(eor3_0, eor3(v0.V16B(), v1.V16B(), v2.V16B(), v3.V16B()))
|
||||
TEST_NEON_SHA3(xar_0, xar(v0.V2D(), v1.V2D(), v2.V2D(), 42))
|
||||
TEST_NEON_SHA3(rax1_0, rax1(v0.V2D(), v1.V2D(), v2.V2D()))
|
||||
|
||||
#define TEST_NEON_SHA1(NAME, ASM) \
|
||||
TEST_TEMPLATE(CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kSHA1), \
|
||||
NEON_SHA1_##NAME, \
|
||||
ASM)
|
||||
TEST_NEON_SHA1(sha1c_0, sha1c(q0, s12, v20.V4S()))
|
||||
TEST_NEON_SHA1(sha1m_0, sha1m(q22, s2, v13.V4S()))
|
||||
TEST_NEON_SHA1(sha1p_0, sha1p(q31, s5, v15.V4S()))
|
||||
TEST_NEON_SHA1(sha1su0_0, sha1su0(v19.V4S(), v9.V4S(), v27.V4S()))
|
||||
TEST_NEON_SHA1(sha1h_0, sha1h(s12, s0))
|
||||
TEST_NEON_SHA1(sha1su1_0, sha1su1(v2.V4S(), v4.V4S()))
|
||||
|
||||
#define TEST_FEAT(NAME, ASM) \
|
||||
TEST_TEMPLATE(CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kSHA2), \
|
||||
NEON_SHA2_##NAME, \
|
||||
ASM)
|
||||
TEST_FEAT(sha256h_0, sha256h(q0, q12, v20.V4S()))
|
||||
TEST_FEAT(sha256h2_0, sha256h2(q22, q2, v13.V4S()))
|
||||
TEST_FEAT(sha256su0_0, sha256su0(v2.V4S(), v4.V4S()))
|
||||
TEST_FEAT(sha256su1_0, sha256su1(v19.V4S(), v9.V4S(), v27.V4S()))
|
||||
#undef TEST_FEAT
|
||||
|
||||
#define TEST_FEAT(NAME, ASM) \
|
||||
TEST_TEMPLATE(CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kSHA512), \
|
||||
NEON_SHA512_##NAME, \
|
||||
ASM)
|
||||
TEST_FEAT(sha512h_0, sha512h(q0, q12, v20.V2D()))
|
||||
TEST_FEAT(sha512h2_0, sha512h2(q22, q2, v13.V2D()))
|
||||
TEST_FEAT(sha512su0_0, sha512su0(v2.V2D(), v4.V2D()))
|
||||
TEST_FEAT(sha512su1_0, sha512su1(v19.V2D(), v9.V2D(), v27.V2D()))
|
||||
#undef TEST_FEAT
|
||||
|
||||
#define TEST_FEAT(NAME, ASM) \
|
||||
TEST_TEMPLATE(CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kAES), \
|
||||
NEON_AES_##NAME, \
|
||||
ASM)
|
||||
TEST_FEAT(aesd_0, aesd(v0.V16B(), v29.V16B()))
|
||||
TEST_FEAT(aese_0, aese(v0.V16B(), v29.V16B()))
|
||||
TEST_FEAT(aesimc_0, aesimc(v0.V16B(), v29.V16B()))
|
||||
TEST_FEAT(aesmc_0, aesmc(v0.V16B(), v29.V16B()))
|
||||
#undef TEST_FEAT
|
||||
|
||||
#define TEST_FEAT(NAME, ASM) \
|
||||
TEST_TEMPLATE(CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kSM3), \
|
||||
NEON_SM3_##NAME, \
|
||||
ASM)
|
||||
TEST_FEAT(sm3partw1_0, sm3partw1(v12.V4S(), v13.V4S(), v14.V4S()))
|
||||
TEST_FEAT(sm3partw2_0, sm3partw2(v12.V4S(), v13.V4S(), v14.V4S()))
|
||||
TEST_FEAT(sm3ss1_0, sm3ss1(v13.V4S(), v15.V4S(), v17.V4S(), v21.V4S()))
|
||||
TEST_FEAT(sm3tt1a_0, sm3tt1a(v30.V4S(), v29.V4S(), v9.V4S(), 1))
|
||||
TEST_FEAT(sm3tt1b_0, sm3tt1b(v30.V4S(), v29.V4S(), v9.V4S(), 3))
|
||||
TEST_FEAT(sm3tt2a_0, sm3tt2a(v30.V4S(), v29.V4S(), v9.V4S(), 2))
|
||||
TEST_FEAT(sm3tt2b_0, sm3tt2b(v30.V4S(), v29.V4S(), v9.V4S(), 0))
|
||||
#undef TEST_FEAT
|
||||
|
||||
#define TEST_FEAT(NAME, ASM) \
|
||||
TEST_TEMPLATE(CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kSM4), \
|
||||
NEON_SM4_##NAME, \
|
||||
ASM)
|
||||
TEST_FEAT(sm4e, sm4e(v12.V4S(), v13.V4S()))
|
||||
TEST_FEAT(sm4ekey, sm4ekey(v12.V4S(), v13.V4S(), v14.V4S()))
|
||||
#undef TEST_FEAT
|
||||
|
||||
#define TEST_FEAT(NAME, ASM) \
|
||||
TEST_TEMPLATE(CPUFeatures(CPUFeatures::kSVE2, CPUFeatures::kSVEPmull128), \
|
||||
SVE_PMULL128_##NAME, \
|
||||
ASM)
|
||||
TEST_FEAT(pmullb, pmullb(z12.VnQ(), z21.VnD(), z12.VnD()))
|
||||
TEST_FEAT(pmullt, pmullt(z12.VnQ(), z21.VnD(), z12.VnD()))
|
||||
#undef TEST_FEAT
|
||||
|
||||
} // namespace aarch64
|
||||
} // namespace vixl
|
||||
|
||||
@@ -2611,6 +2611,7 @@ TEST(system_mrs) {
|
||||
COMPARE(mrs(x15, FPCR), "mrs x15, fpcr");
|
||||
COMPARE(mrs(x20, RNDR), "mrs x20, rndr");
|
||||
COMPARE(mrs(x5, RNDRRS), "mrs x5, rndrrs");
|
||||
COMPARE(mrs(x9, DCZID_EL0), "mrs x9, dczid_el0");
|
||||
|
||||
// Test mrs that use system registers we haven't named.
|
||||
COMPARE(dci(MRS | (0x5555 << 5)), "mrs x0, S3_2_c10_c10_5");
|
||||
@@ -3359,6 +3360,20 @@ TEST(cssc) {
|
||||
CLEANUP();
|
||||
}
|
||||
|
||||
TEST(gcs) {
|
||||
SETUP();
|
||||
|
||||
COMPARE_MACRO(Chkfeat(x16), "chkfeat x16");
|
||||
COMPARE_MACRO(Gcspopm(x0), "gcspopm x0");
|
||||
COMPARE_MACRO(Gcspopm(), "gcspopm");
|
||||
COMPARE_MACRO(Gcspopm(xzr), "gcspopm");
|
||||
COMPARE_MACRO(Gcsss1(x4), "gcsss1 x4");
|
||||
COMPARE_MACRO(Gcsss2(x2), "gcsss2 x2");
|
||||
COMPARE_MACRO(Gcspushm(x1), "gcspushm x1");
|
||||
|
||||
CLEANUP();
|
||||
}
|
||||
|
||||
TEST(architecture_features) {
|
||||
SETUP();
|
||||
|
||||
@@ -3543,19 +3558,19 @@ TEST(architecture_features) {
|
||||
COMPARE_PREFIX(dci(0xf8e08000), "swpal"); // SWPAL_64_memop
|
||||
|
||||
// ARMv8.1 - RDM
|
||||
COMPARE_PREFIX(dci(0x2e008400), "sqrdmlah"); // SQRDMLAH_asimdsame2_only
|
||||
COMPARE_PREFIX(dci(0x2e008c00), "sqrdmlsh"); // SQRDMLSH_asimdsame2_only
|
||||
COMPARE_PREFIX(dci(0x2e808400), "sqrdmlah"); // SQRDMLAH_asimdsame2_only
|
||||
COMPARE_PREFIX(dci(0x2e808c00), "sqrdmlsh"); // SQRDMLSH_asimdsame2_only
|
||||
COMPARE_PREFIX(dci(0x2f40d000), "sqrdmlah"); // SQRDMLAH_asimdelem_R
|
||||
COMPARE_PREFIX(dci(0x2f40f000), "sqrdmlsh"); // SQRDMLSH_asimdelem_R
|
||||
COMPARE_PREFIX(dci(0x7e008400), "sqrdmlah"); // SQRDMLAH_asisdsame2_only
|
||||
COMPARE_PREFIX(dci(0x7e008c00), "sqrdmlsh"); // SQRDMLSH_asisdsame2_only
|
||||
COMPARE_PREFIX(dci(0x7e408400), "sqrdmlah"); // SQRDMLAH_asisdsame2_only
|
||||
COMPARE_PREFIX(dci(0x7e408c00), "sqrdmlsh"); // SQRDMLSH_asisdsame2_only
|
||||
COMPARE_PREFIX(dci(0x7f40d000), "sqrdmlah"); // SQRDMLAH_asisdelem_R
|
||||
COMPARE_PREFIX(dci(0x7f40f000), "sqrdmlsh"); // SQRDMLSH_asisdelem_R
|
||||
|
||||
// ARMv8.2 - DotProd
|
||||
COMPARE_PREFIX(dci(0x0e009400), "sdot"); // SDOT_asimdsame2_D
|
||||
COMPARE_PREFIX(dci(0x0e809400), "sdot"); // SDOT_asimdsame2_D
|
||||
COMPARE_PREFIX(dci(0x0f00e000), "sdot"); // SDOT_asimdelem_D
|
||||
COMPARE_PREFIX(dci(0x2e009400), "udot"); // UDOT_asimdsame2_D
|
||||
COMPARE_PREFIX(dci(0x2e809400), "udot"); // UDOT_asimdsame2_D
|
||||
COMPARE_PREFIX(dci(0x2f00e000), "udot"); // UDOT_asimdelem_D
|
||||
|
||||
// ARMv8.2 - FHM
|
||||
@@ -3775,42 +3790,39 @@ TEST(architecture_features) {
|
||||
COMPARE_PREFIX(dci(0xd503221f), "esb"); // ESB_HI_hints
|
||||
|
||||
// ARMv8.2 - SHA3
|
||||
// COMPARE_PREFIX(dci(0xce000000), "eor3"); // EOR3_VVV16_crypto4
|
||||
// COMPARE_PREFIX(dci(0xce200000), "bcax"); // BCAX_VVV16_crypto4
|
||||
// COMPARE_PREFIX(dci(0xce608c00), "rax1"); // RAX1_VVV2_cryptosha512_3
|
||||
// COMPARE_PREFIX(dci(0xce800000), "xar"); // XAR_VVV2_crypto3_imm6
|
||||
COMPARE_PREFIX(dci(0xce000000), "eor3"); // EOR3_VVV16_crypto4
|
||||
COMPARE_PREFIX(dci(0xce200000), "bcax"); // BCAX_VVV16_crypto4
|
||||
COMPARE_PREFIX(dci(0xce608c00), "rax1"); // RAX1_VVV2_cryptosha512_3
|
||||
COMPARE_PREFIX(dci(0xce800000), "xar"); // XAR_VVV2_crypto3_imm6
|
||||
|
||||
// ARMv8.2 - SHA512
|
||||
// COMPARE_PREFIX(dci(0xce608000), "sha512h"); // SHA512H_QQV_cryptosha512_3
|
||||
// COMPARE_PREFIX(dci(0xce608400), "sha512h2"); //
|
||||
// SHA512H2_QQV_cryptosha512_3
|
||||
// COMPARE_PREFIX(dci(0xce608800), "sha512su1"); //
|
||||
// SHA512SU1_VVV2_cryptosha512_3
|
||||
// COMPARE_PREFIX(dci(0xcec08000), "sha512su0"); //
|
||||
// SHA512SU0_VV2_cryptosha512_2
|
||||
COMPARE_PREFIX(dci(0xce608000), "sha512h"); // SHA512H_QQV_cryptosha512_3
|
||||
COMPARE_PREFIX(dci(0xce608400), "sha512h2"); // SHA512H2_QQV_cryptosha512_3
|
||||
COMPARE_PREFIX(dci(0xce608800),
|
||||
"sha512su1"); // SHA512SU1_VVV2_cryptosha512_3
|
||||
COMPARE_PREFIX(dci(0xcec08000), "sha512su0"); // SHA512SU0_VV2_cryptosha512_2
|
||||
|
||||
// ARMv8.2 - SM3
|
||||
// COMPARE_PREFIX(dci(0xce400000), "sm3ss1"); // SM3SS1_VVV4_crypto4
|
||||
// COMPARE_PREFIX(dci(0xce408000), "sm3tt1a"); // SM3TT1A_VVV4_crypto3_imm2
|
||||
// COMPARE_PREFIX(dci(0xce408400), "sm3tt1b"); // SM3TT1B_VVV4_crypto3_imm2
|
||||
// COMPARE_PREFIX(dci(0xce408800), "sm3tt2a"); // SM3TT2A_VVV4_crypto3_imm2
|
||||
// COMPARE_PREFIX(dci(0xce408c00), "sm3tt2b"); // SM3TT2B_VVV_crypto3_imm2
|
||||
// COMPARE_PREFIX(dci(0xce60c000), "sm3partw1"); //
|
||||
// SM3PARTW1_VVV4_cryptosha512_3
|
||||
// COMPARE_PREFIX(dci(0xce60c400), "sm3partw2"); //
|
||||
// SM3PARTW2_VVV4_cryptosha512_3
|
||||
COMPARE_PREFIX(dci(0xce400000), "sm3ss1"); // SM3SS1_VVV4_crypto4
|
||||
COMPARE_PREFIX(dci(0xce408000), "sm3tt1a"); // SM3TT1A_VVV4_crypto3_imm2
|
||||
COMPARE_PREFIX(dci(0xce408400), "sm3tt1b"); // SM3TT1B_VVV4_crypto3_imm2
|
||||
COMPARE_PREFIX(dci(0xce408800), "sm3tt2a"); // SM3TT2A_VVV4_crypto3_imm2
|
||||
COMPARE_PREFIX(dci(0xce408c00), "sm3tt2b"); // SM3TT2B_VVV_crypto3_imm2
|
||||
COMPARE_PREFIX(dci(0xce60c000),
|
||||
"sm3partw1"); // SM3PARTW1_VVV4_cryptosha512_3
|
||||
COMPARE_PREFIX(dci(0xce60c400),
|
||||
"sm3partw2"); // SM3PARTW2_VVV4_cryptosha512_3
|
||||
|
||||
// ARMv8.2 - SM4
|
||||
// COMPARE_PREFIX(dci(0xce60c800), "sm4ekey"); //
|
||||
// SM4EKEY_VVV4_cryptosha512_3
|
||||
// COMPARE_PREFIX(dci(0xcec08400), "sm4e"); // SM4E_VV4_cryptosha512_2
|
||||
COMPARE_PREFIX(dci(0xce60c800), "sm4ekey"); // SM4EKEY_VVV4_cryptosha512_3
|
||||
COMPARE_PREFIX(dci(0xcec08400), "sm4e"); // SM4E_VV4_cryptosha512_2
|
||||
|
||||
// ARMv8.2 - SPE
|
||||
// COMPARE_PREFIX(dci(0xd503223f), "psb"); // PSB_HC_hints
|
||||
|
||||
// ARMv8.3 - FCMA
|
||||
COMPARE_PREFIX(dci(0x2e40c400), "fcmla"); // FCMLA_asimdsame2_C
|
||||
COMPARE_PREFIX(dci(0x2e00e400), "fcadd"); // FCADD_asimdsame2_C
|
||||
COMPARE_PREFIX(dci(0x2e40e400), "fcadd"); // FCADD_asimdsame2_C
|
||||
COMPARE_PREFIX(dci(0x2f401000), "fcmla"); // FCMLA_asimdelem_C_H
|
||||
COMPARE_PREFIX(dci(0x6f801000), "fcmla"); // FCMLA_asimdelem_C_S
|
||||
|
||||
|
||||
@@ -1792,6 +1792,34 @@ TEST(neon_3same) {
|
||||
COMPARE_MACRO(Pmul(v6.V16B(), v7.V16B(), v8.V16B()),
|
||||
"pmul v6.16b, v7.16b, v8.16b");
|
||||
|
||||
// Check unallocated vector types for SDOT.
|
||||
COMPARE(dci(0x0e009400), "unallocated (Unallocated)"); // 8B
|
||||
COMPARE(dci(0x4e009400), "unallocated (Unallocated)"); // 16B
|
||||
COMPARE(dci(0x0e409400), "unallocated (Unallocated)"); // 4H
|
||||
COMPARE(dci(0x4e409400), "unallocated (Unallocated)"); // 8H
|
||||
COMPARE(dci(0x0ec09400), "unallocated (Unallocated)"); // 1D
|
||||
COMPARE(dci(0x4ec09400), "unallocated (Unallocated)"); // 2D
|
||||
|
||||
// Check unallocated vector types for UDOT.
|
||||
COMPARE(dci(0x2e009400), "unallocated (Unallocated)"); // 8B
|
||||
COMPARE(dci(0x6e009400), "unallocated (Unallocated)"); // 16B
|
||||
COMPARE(dci(0x2e409400), "unallocated (Unallocated)"); // 4H
|
||||
COMPARE(dci(0x6e409400), "unallocated (Unallocated)"); // 8H
|
||||
COMPARE(dci(0x2ec09400), "unallocated (Unallocated)"); // 1D
|
||||
COMPARE(dci(0x6ec09400), "unallocated (Unallocated)"); // 2D
|
||||
|
||||
// Check unallocated vector types for SQRDMLAH.
|
||||
COMPARE(dci(0x2e008400), "unallocated (Unallocated)"); // 8B
|
||||
COMPARE(dci(0x6e008400), "unallocated (Unallocated)"); // 16B
|
||||
COMPARE(dci(0x2ec08400), "unallocated (Unallocated)"); // 1D
|
||||
COMPARE(dci(0x6ec08400), "unallocated (Unallocated)"); // 2D
|
||||
|
||||
// Check unallocated vector types for SQRDMLSH.
|
||||
COMPARE(dci(0x2e008c00), "unallocated (Unallocated)"); // 8B
|
||||
COMPARE(dci(0x6e008c00), "unallocated (Unallocated)"); // 16B
|
||||
COMPARE(dci(0x2ec08c00), "unallocated (Unallocated)"); // 1D
|
||||
COMPARE(dci(0x6ec08c00), "unallocated (Unallocated)"); // 2D
|
||||
|
||||
CLEANUP();
|
||||
}
|
||||
|
||||
@@ -1924,6 +1952,16 @@ TEST(neon_3same_extra_fcadd) {
|
||||
COMPARE(dci(0x2e00ec00), "unallocated (Unallocated)"); // opcode = 0x1101
|
||||
COMPARE(dci(0x2e00fc00), "unallocated (Unallocated)"); // opcode = 0x1111
|
||||
|
||||
// Check unallocated vector types for FCADD.
|
||||
COMPARE(dci(0x2e00e400), "unallocated (Unallocated)"); // 8B
|
||||
COMPARE(dci(0x6e00e400), "unallocated (Unallocated)"); // 16B
|
||||
COMPARE(dci(0x2ec0e400), "unallocated (Unallocated)"); // 1D
|
||||
|
||||
// Check unallocated vector types for FCMLA.
|
||||
COMPARE(dci(0x2e00c400), "unallocated (Unallocated)"); // 8B
|
||||
COMPARE(dci(0x6e00c400), "unallocated (Unallocated)"); // 16B
|
||||
COMPARE(dci(0x2ec0c400), "unallocated (Unallocated)"); // 1D
|
||||
|
||||
CLEANUP();
|
||||
}
|
||||
|
||||
@@ -2594,6 +2632,13 @@ TEST(neon_fp_byelement) {
|
||||
COMPARE_MACRO(Fcmla(v0.V8H(), v1.V8H(), v31.H(), 3, 0),
|
||||
"fcmla v0.8h, v1.8h, v31.h[3], #0");
|
||||
|
||||
// Check unallocated vector types for FCMLA.
|
||||
COMPARE(dci(0x2f001000), "unallocated (Unallocated)"); // 8B
|
||||
COMPARE(dci(0x6f001000), "unallocated (Unallocated)"); // 16B
|
||||
COMPARE(dci(0x2f801000), "unallocated (Unallocated)"); // 2S
|
||||
COMPARE(dci(0x2fc01000), "unallocated (Unallocated)"); // 1D
|
||||
COMPARE(dci(0x6fc01000), "unallocated (Unallocated)"); // 2D
|
||||
|
||||
CLEANUP();
|
||||
}
|
||||
|
||||
@@ -2904,6 +2949,10 @@ TEST(neon_3different) {
|
||||
"pmull v0.8h, v1.8b, v2.8b");
|
||||
COMPARE_MACRO(Pmull2(v2.V8H(), v3.V16B(), v4.V16B()),
|
||||
"pmull2 v2.8h, v3.16b, v4.16b");
|
||||
COMPARE_MACRO(Pmull(v5.V1Q(), v6.V1D(), v7.V1D()),
|
||||
"pmull v5.1q, v6.1d, v7.1d");
|
||||
COMPARE_MACRO(Pmull2(v8.V1Q(), v9.V2D(), v10.V2D()),
|
||||
"pmull2 v8.1q, v9.2d, v10.2d");
|
||||
|
||||
CLEANUP();
|
||||
}
|
||||
@@ -4467,6 +4516,100 @@ TEST(neon_matmul) {
|
||||
CLEANUP();
|
||||
}
|
||||
|
||||
TEST(neon_sha3) {
|
||||
SETUP();
|
||||
|
||||
COMPARE_MACRO(Bcax(v0.V16B(), v1.V16B(), v2.V16B(), v3.V16B()),
|
||||
"bcax v0.16b, v1.16b, v2.16b, v3.16b");
|
||||
COMPARE_MACRO(Eor3(v10.V16B(), v11.V16B(), v12.V16B(), v13.V16B()),
|
||||
"eor3 v10.16b, v11.16b, v12.16b, v13.16b");
|
||||
COMPARE_MACRO(Xar(v20.V2D(), v21.V2D(), v22.V2D(), 42),
|
||||
"xar v20.2d, v21.2d, v22.2d, #42");
|
||||
COMPARE_MACRO(Rax1(v0.V2D(), v1.V2D(), v2.V2D()), "rax1 v0.2d, v1.2d, v2.2d");
|
||||
|
||||
CLEANUP();
|
||||
}
|
||||
|
||||
TEST(neon_sha1) {
|
||||
SETUP();
|
||||
|
||||
COMPARE_MACRO(Sha1c(q0, s12, v20.V4S()), "sha1c q0, s12, v20.4s");
|
||||
COMPARE_MACRO(Sha1m(q22, s2, v13.V4S()), "sha1m q22, s2, v13.4s");
|
||||
COMPARE_MACRO(Sha1p(q31, s5, v15.V4S()), "sha1p q31, s5, v15.4s");
|
||||
COMPARE_MACRO(Sha1su0(v19.V4S(), v9.V4S(), v27.V4S()),
|
||||
"sha1su0 v19.4s, v9.4s, v27.4s");
|
||||
COMPARE_MACRO(Sha1h(s12, s0), "sha1h s12, s0");
|
||||
COMPARE_MACRO(Sha1su1(v2.V4S(), v4.V4S()), "sha1su1 v2.4s, v4.4s");
|
||||
|
||||
CLEANUP();
|
||||
}
|
||||
|
||||
TEST(neon_sha2) {
|
||||
SETUP();
|
||||
|
||||
COMPARE_MACRO(Sha256h(q0, q12, v20.V4S()), "sha256h q0, q12, v20.4s");
|
||||
COMPARE_MACRO(Sha256h2(q22, q2, v13.V4S()), "sha256h2 q22, q2, v13.4s");
|
||||
COMPARE_MACRO(Sha256su0(v2.V4S(), v4.V4S()), "sha256su0 v2.4s, v4.4s");
|
||||
COMPARE_MACRO(Sha256su1(v19.V4S(), v9.V4S(), v27.V4S()),
|
||||
"sha256su1 v19.4s, v9.4s, v27.4s");
|
||||
|
||||
CLEANUP();
|
||||
}
|
||||
|
||||
TEST(neon_sha512) {
|
||||
SETUP();
|
||||
|
||||
COMPARE_MACRO(Sha512h(q0, q12, v20.V2D()), "sha512h q0, q12, v20.2d");
|
||||
COMPARE_MACRO(Sha512h2(q22, q2, v13.V2D()), "sha512h2 q22, q2, v13.2d");
|
||||
COMPARE_MACRO(Sha512su0(v2.V2D(), v4.V2D()), "sha512su0 v2.2d, v4.2d");
|
||||
COMPARE_MACRO(Sha512su1(v19.V2D(), v9.V2D(), v27.V2D()),
|
||||
"sha512su1 v19.2d, v9.2d, v27.2d");
|
||||
|
||||
CLEANUP();
|
||||
}
|
||||
|
||||
TEST(neon_aes) {
|
||||
SETUP();
|
||||
|
||||
COMPARE_MACRO(Aesd(v0.V16B(), v29.V16B()), "aesd v0.16b, v29.16b");
|
||||
COMPARE_MACRO(Aese(v0.V16B(), v29.V16B()), "aese v0.16b, v29.16b");
|
||||
COMPARE_MACRO(Aesimc(v0.V16B(), v29.V16B()), "aesimc v0.16b, v29.16b");
|
||||
COMPARE_MACRO(Aesmc(v0.V16B(), v29.V16B()), "aesmc v0.16b, v29.16b");
|
||||
|
||||
CLEANUP();
|
||||
}
|
||||
|
||||
TEST(neon_sm3) {
|
||||
SETUP();
|
||||
|
||||
COMPARE_MACRO(Sm3partw1(v12.V4S(), v13.V4S(), v14.V4S()),
|
||||
"sm3partw1 v12.4s, v13.4s, v14.4s");
|
||||
COMPARE_MACRO(Sm3partw2(v12.V4S(), v13.V4S(), v14.V4S()),
|
||||
"sm3partw2 v12.4s, v13.4s, v14.4s");
|
||||
COMPARE_MACRO(Sm3ss1(v13.V4S(), v15.V4S(), v17.V4S(), v21.V4S()),
|
||||
"sm3ss1 v13.4s, v15.4s, v17.4s, v21.4s");
|
||||
COMPARE_MACRO(Sm3tt1a(v30.V4S(), v29.V4S(), v9.V4S(), 1),
|
||||
"sm3tt1a v30.4s, v29.4s, v9.s[1]");
|
||||
COMPARE_MACRO(Sm3tt1b(v30.V4S(), v29.V4S(), v9.V4S(), 3),
|
||||
"sm3tt1b v30.4s, v29.4s, v9.s[3]");
|
||||
COMPARE_MACRO(Sm3tt2a(v30.V4S(), v29.V4S(), v9.V4S(), 2),
|
||||
"sm3tt2a v30.4s, v29.4s, v9.s[2]");
|
||||
COMPARE_MACRO(Sm3tt2b(v30.V4S(), v29.V4S(), v9.V4S(), 0),
|
||||
"sm3tt2b v30.4s, v29.4s, v9.s[0]");
|
||||
|
||||
CLEANUP();
|
||||
}
|
||||
|
||||
TEST(neon_sm4) {
|
||||
SETUP();
|
||||
|
||||
COMPARE_MACRO(Sm4e(v12.V4S(), v13.V4S()), "sm4e v12.4s, v13.4s");
|
||||
COMPARE_MACRO(Sm4ekey(v12.V4S(), v13.V4S(), v14.V4S()),
|
||||
"sm4ekey v12.4s, v13.4s, v14.4s");
|
||||
|
||||
CLEANUP();
|
||||
}
|
||||
|
||||
TEST(neon_unallocated_regression_test) {
|
||||
SETUP();
|
||||
|
||||
@@ -4562,8 +4705,6 @@ TEST(neon_unallocated_regression_test) {
|
||||
COMPARE_PREFIX(dci(0x2efb9dbd), "unallocated"); // pmul v.und, v.und, v.und
|
||||
COMPARE_PREFIX(dci(0x4eace101), "unallocated"); // pmull v.d, v.s, v.s
|
||||
COMPARE_PREFIX(dci(0x0e6de3ad), "unallocated"); // pmull v.s, v.h, v.h
|
||||
COMPARE_PREFIX(dci(0x4ee3e2c0), "unallocated"); // pmull v.und, v.d, v.d
|
||||
COMPARE_PREFIX(dci(0x0eede060), "unallocated"); // pmull v.und, v.und, v.und
|
||||
COMPARE_PREFIX(dci(0x6ee00afd), "unallocated"); // rev v.d, v.d
|
||||
COMPARE_PREFIX(dci(0x4e601975), "unallocated"); // rev v.h, v.h
|
||||
COMPARE_PREFIX(dci(0x4ea019f3), "unallocated"); // rev v.s, v.s
|
||||
@@ -4633,10 +4774,14 @@ TEST(neon_unallocated_regression_test) {
|
||||
COMPARE_PREFIX(dci(0x6fd6d80f), "unallocated"); // sqrdmlah v.d, v.d, v.d[]
|
||||
COMPARE_PREFIX(dci(0x2fecdae5),
|
||||
"unallocated"); // sqrdmlah v.und, v.und, v.d[]
|
||||
COMPARE_PREFIX(dci(0x7e008429), "unallocated"); // sqrdmlah b9, b1, b0
|
||||
COMPARE_PREFIX(dci(0x7ec08429), "unallocated"); // sqrdmlah d9, d1, d0
|
||||
COMPARE_PREFIX(dci(0x7fe0f992), "unallocated"); // sqrdmlsh d, d, v.d[]
|
||||
COMPARE_PREFIX(dci(0x6ff1f9df), "unallocated"); // sqrdmlsh v.d, v.d, v.d[]
|
||||
COMPARE_PREFIX(dci(0x2fcdfad1),
|
||||
"unallocated"); // sqrdmlsh v.und, v.und, v.d[]
|
||||
COMPARE_PREFIX(dci(0x7e008c29), "unallocated"); // sqrdmlsh b9, b1, b0
|
||||
COMPARE_PREFIX(dci(0x7ec08c29), "unallocated"); // sqrdmlsh d9, d1, d0
|
||||
COMPARE_PREFIX(dci(0x7e23b7fa), "unallocated"); // sqrdmulh b, b, b
|
||||
COMPARE_PREFIX(dci(0x5f1ad272), "unallocated"); // sqrdmulh b, b, v.b[]
|
||||
COMPARE_PREFIX(dci(0x7ef8b6e0), "unallocated"); // sqrdmulh d, d, d
|
||||
|
||||
@@ -7673,13 +7673,14 @@ TEST(sve2_integer_multiply_long_vector) {
|
||||
COMPARE(sqdmullt(z7.VnD(), z4.VnS(), z0.VnS(), 0),
|
||||
"sqdmullt z7.d, z4.s, z0.s[0]");
|
||||
|
||||
// Feature `SVEPmull128` is not supported.
|
||||
// COMPARE(pmullb(z12.VnQ(), z21.VnD(), z12.VnD()),
|
||||
// "pmullb z12.q, z21.d, z12.d");
|
||||
COMPARE(pmullb(z12.VnH(), z21.VnB(), z12.VnB()),
|
||||
"pmullb z12.h, z21.b, z12.b");
|
||||
COMPARE(pmullt(z31.VnD(), z30.VnS(), z26.VnS()),
|
||||
"pmullt z31.d, z30.s, z26.s");
|
||||
COMPARE(pmullb(z12.VnQ(), z21.VnD(), z12.VnD()),
|
||||
"pmullb z12.q, z21.d, z12.d");
|
||||
COMPARE(pmullt(z12.VnQ(), z21.VnD(), z12.VnD()),
|
||||
"pmullt z12.q, z21.d, z12.d");
|
||||
|
||||
COMPARE(smullb(z10.VnD(), z4.VnS(), z4.VnS()), "smullb z10.d, z4.s, z4.s");
|
||||
COMPARE(smullb(z11.VnH(), z14.VnB(), z14.VnB()),
|
||||
@@ -7701,6 +7702,10 @@ TEST(sve2_integer_multiply_long_vector) {
|
||||
COMPARE(umullt(z24.VnH(), z7.VnB(), z16.VnB()), "umullt z24.h, z7.b, z16.b");
|
||||
COMPARE(umullt(z24.VnS(), z8.VnH(), z26.VnH()), "umullt z24.s, z8.h, z26.h");
|
||||
|
||||
// Check related but undefined encodings.
|
||||
COMPARE(dci(0x45806800), "unallocated (Unallocated)"); // pmullb s, h, h
|
||||
COMPARE(dci(0x45806c00), "unallocated (Unallocated)"); // pmullt s, h, h
|
||||
|
||||
CLEANUP();
|
||||
}
|
||||
|
||||
|
||||
@@ -102,6 +102,95 @@ namespace aarch64 {
|
||||
/* The simulator can run every test. */ \
|
||||
*skipped = false
|
||||
|
||||
#ifdef VIXL_ENABLE_IMPLICIT_CHECKS
|
||||
// The signal handler needs access to the simulator.
|
||||
Simulator* gImplicitCheckSim;
|
||||
|
||||
#ifdef __x86_64__
|
||||
#include <signal.h>
|
||||
#include <ucontext.h>
|
||||
void HandleSegFault(int sig, siginfo_t* info, void* context) {
|
||||
USE(sig);
|
||||
USE(info);
|
||||
Simulator* sim = gImplicitCheckSim;
|
||||
|
||||
// Did the signal come from the simulator?
|
||||
ucontext_t* uc = reinterpret_cast<ucontext_t*>(context);
|
||||
uintptr_t fault_pc = uc->uc_mcontext.gregs[REG_RIP];
|
||||
VIXL_CHECK(sim->IsSimulatedMemoryAccess(fault_pc));
|
||||
|
||||
// Increment the counter (x1) each time we handle a signal.
|
||||
int64_t counter = reinterpret_cast<int64_t>(sim->ReadXRegister(1));
|
||||
sim->WriteXRegister(1, ++counter);
|
||||
|
||||
// Return to the VIXL memory access continuation point, which is also the
|
||||
// next instruction, after this handler.
|
||||
uc->uc_mcontext.gregs[REG_RIP] = sim->GetSignalReturnAddress();
|
||||
// Return that the memory access failed.
|
||||
uc->uc_mcontext.gregs[REG_RAX] =
|
||||
static_cast<greg_t>(MemoryAccessResult::Failure);
|
||||
}
|
||||
#endif // __x86_64__
|
||||
|
||||
// Start an implicit check test with a counter and start label so the number of
|
||||
// faults can be counted. Note: each instruction after the start will be
|
||||
// expected to fault.
|
||||
#define START_IMPLICIT_CHECK() \
|
||||
gImplicitCheckSim = &simulator; \
|
||||
/* Set up a signal handler to count the number of faulting instructions. */ \
|
||||
struct sigaction sa; \
|
||||
sa.sa_sigaction = HandleSegFault; \
|
||||
sigaction(SIGSEGV, &sa, NULL); \
|
||||
START(); \
|
||||
/* Reset the counter. */ \
|
||||
__ Mov(x1, 0); \
|
||||
/* Use a consistent bad address. */ \
|
||||
__ Mov(x15, xzr); \
|
||||
__ Mov(ip0, xzr); \
|
||||
/* Load an amount of data to load. */ \
|
||||
__ Mov(ip1, 4096); \
|
||||
[[maybe_unused]] MemOperand bad_memory = MemOperand(ip0); \
|
||||
if (masm.GetCPUFeatures()->Has(CPUFeatures::kSVE)) { \
|
||||
/* Turn on all lanes to ensure all loads/stores are tested. */ \
|
||||
__ Ptrue(p0.VnB()); \
|
||||
__ Ptrue(p1.VnB()); \
|
||||
__ Ptrue(p2.VnB()); \
|
||||
__ Ptrue(p3.VnB()); \
|
||||
__ Ptrue(p4.VnB()); \
|
||||
__ Ptrue(p5.VnB()); \
|
||||
__ Ptrue(p6.VnB()); \
|
||||
__ Ptrue(p7.VnB()); \
|
||||
__ Ptrue(p8.VnB()); \
|
||||
__ Ptrue(p9.VnB()); \
|
||||
__ Ptrue(p10.VnB()); \
|
||||
__ Ptrue(p11.VnB()); \
|
||||
__ Ptrue(p12.VnB()); \
|
||||
__ Ptrue(p13.VnB()); \
|
||||
__ Ptrue(p14.VnB()); \
|
||||
__ Ptrue(p15.VnB()); \
|
||||
} \
|
||||
Label l_start, l_end; \
|
||||
__ Bind(&l_start);
|
||||
|
||||
#define END_IMPLICIT_CHECK() \
|
||||
__ Bind(&l_end); \
|
||||
/* Return the counter. */ \
|
||||
__ Mov(x0, x1); \
|
||||
END();
|
||||
|
||||
#define TRY_RUN_IMPLICIT_CHECK() \
|
||||
bool skipped; \
|
||||
TRY_RUN(&skipped); \
|
||||
/* Implicit checks should only be used with the simulator. */ \
|
||||
VIXL_ASSERT(!skipped); \
|
||||
/* Check that each load/store instruction generated a segfault that was */ \
|
||||
/* raised and dealt with. */ \
|
||||
size_t result = simulator.ReadXRegister(0); \
|
||||
size_t num_of_faulting_instr = masm.GetSizeOfCodeGeneratedSince(&l_start) - \
|
||||
masm.GetSizeOfCodeGeneratedSince(&l_end); \
|
||||
VIXL_CHECK((result * kInstructionSize) == num_of_faulting_instr);
|
||||
|
||||
#endif // VIXL_ENABLE_IMPLICIT_CHECKS
|
||||
|
||||
#else // VIXL_INCLUDE_SIMULATOR_AARCH64
|
||||
|
||||
@@ -2850,7 +2939,7 @@ static void TestOpImmOpImmNEON(const char* name,
|
||||
}
|
||||
}
|
||||
}
|
||||
VIXL_ASSERT(counted_length == expected_length);
|
||||
VIXL_CHECK(counted_length == expected_length);
|
||||
if (error_count > kErrorReportLimit) {
|
||||
printf("%u other errors follow.\n", error_count - kErrorReportLimit);
|
||||
}
|
||||
@@ -5012,6 +5101,802 @@ DEFINE_TEST_NEON_FHM_BYELEMENT(fmlsl, Basic, Basic, Basic)
|
||||
DEFINE_TEST_NEON_FHM_BYELEMENT(fmlsl2, Basic, Basic, Basic)
|
||||
|
||||
|
||||
#ifdef VIXL_ENABLE_IMPLICIT_CHECKS
|
||||
TEST(ImplicitCheck) {
|
||||
SETUP_WITH_FEATURES(CPUFeatures::kNEON);
|
||||
START_IMPLICIT_CHECK();
|
||||
|
||||
EmissionCheckScope guard(&masm, masm.GetBuffer()->GetRemainingBytes());
|
||||
// Invalid memory reads.
|
||||
__ ldar(w3, bad_memory);
|
||||
__ ldar(x4, bad_memory);
|
||||
__ ldarb(w5, bad_memory);
|
||||
__ ldarb(x6, bad_memory);
|
||||
__ ldarh(w7, bad_memory);
|
||||
__ ldarh(x8, bad_memory);
|
||||
__ ldaxp(w9, w10, bad_memory);
|
||||
__ ldaxp(x11, x12, bad_memory);
|
||||
__ ldaxr(w13, bad_memory);
|
||||
__ ldaxr(x14, bad_memory);
|
||||
__ ldaxrb(w15, bad_memory);
|
||||
__ ldaxrb(x16, bad_memory);
|
||||
__ ldaxrh(w17, bad_memory);
|
||||
__ ldaxrh(x18, bad_memory);
|
||||
__ ldnp(w19, w20, bad_memory);
|
||||
__ ldnp(x21, x22, bad_memory);
|
||||
__ ldp(w23, w24, bad_memory);
|
||||
__ ldp(x25, x26, bad_memory);
|
||||
__ ldpsw(x27, x28, bad_memory);
|
||||
__ ldr(w29, bad_memory);
|
||||
__ ldr(x2, bad_memory);
|
||||
__ ldrb(w3, bad_memory);
|
||||
__ ldrb(x4, bad_memory);
|
||||
__ ldrh(w5, bad_memory);
|
||||
__ ldrh(x6, bad_memory);
|
||||
__ ldrsb(w7, bad_memory);
|
||||
__ ldrsb(x8, bad_memory);
|
||||
__ ldrsh(w9, bad_memory);
|
||||
__ ldrsh(x10, bad_memory);
|
||||
__ ldrsw(x11, bad_memory);
|
||||
__ ldur(w12, bad_memory);
|
||||
__ ldur(x13, bad_memory);
|
||||
__ ldurb(w14, bad_memory);
|
||||
__ ldurb(x15, bad_memory);
|
||||
__ ldurh(w16, bad_memory);
|
||||
__ ldurh(x17, bad_memory);
|
||||
__ ldursb(w18, bad_memory);
|
||||
__ ldursb(x19, bad_memory);
|
||||
__ ldursh(w20, bad_memory);
|
||||
__ ldursh(x21, bad_memory);
|
||||
__ ldursw(x22, bad_memory);
|
||||
__ ldxp(w23, w24, bad_memory);
|
||||
__ ldxp(x25, x26, bad_memory);
|
||||
__ ldxr(w27, bad_memory);
|
||||
__ ldxr(x28, bad_memory);
|
||||
__ ldxrb(w29, bad_memory);
|
||||
__ ldxrb(x2, bad_memory);
|
||||
__ ldxrh(w3, bad_memory);
|
||||
__ ldxrh(x4, bad_memory);
|
||||
|
||||
// Invalid memory writes. Note: exclusive store instructions are not tested
|
||||
// because they can fail due to the global monitor before trying to perform a
|
||||
// memory store.
|
||||
__ stlr(w18, bad_memory);
|
||||
__ stlr(x19, bad_memory);
|
||||
__ stlrb(w20, bad_memory);
|
||||
__ stlrb(x21, bad_memory);
|
||||
__ stlrh(w22, bad_memory);
|
||||
__ stlrh(x23, bad_memory);
|
||||
__ stnp(w14, w15, bad_memory);
|
||||
__ stnp(x16, x17, bad_memory);
|
||||
__ stp(w18, w19, bad_memory);
|
||||
__ stp(x20, x21, bad_memory);
|
||||
__ str(w22, bad_memory);
|
||||
__ str(x23, bad_memory);
|
||||
__ strb(w24, bad_memory);
|
||||
__ strb(x25, bad_memory);
|
||||
__ strh(w26, bad_memory);
|
||||
__ strh(x27, bad_memory);
|
||||
__ stur(w28, bad_memory);
|
||||
__ stur(x29, bad_memory);
|
||||
__ sturb(w2, bad_memory);
|
||||
__ sturb(x3, bad_memory);
|
||||
__ sturh(w4, bad_memory);
|
||||
__ sturh(x5, bad_memory);
|
||||
|
||||
END_IMPLICIT_CHECK();
|
||||
TRY_RUN_IMPLICIT_CHECK();
|
||||
}
|
||||
|
||||
TEST(ImplicitCheckNeon) {
|
||||
SETUP_WITH_FEATURES(CPUFeatures::kNEON);
|
||||
START_IMPLICIT_CHECK();
|
||||
|
||||
EmissionCheckScope guard(&masm, masm.GetBuffer()->GetRemainingBytes());
|
||||
__ ld1(v18.V16B(), v19.V16B(), v20.V16B(), v21.V16B(), bad_memory);
|
||||
__ ld1(v23.V16B(), v24.V16B(), v25.V16B(), v26.V16B(), bad_memory);
|
||||
__ ld1(v5.V16B(), v6.V16B(), v7.V16B(), v8.V16B(), bad_memory);
|
||||
__ ld1(v18.V16B(), v19.V16B(), v20.V16B(), bad_memory);
|
||||
__ ld1(v13.V16B(), v14.V16B(), v15.V16B(), bad_memory);
|
||||
__ ld1(v19.V16B(), v20.V16B(), v21.V16B(), bad_memory);
|
||||
__ ld1(v17.V16B(), v18.V16B(), bad_memory);
|
||||
__ ld1(v20.V16B(), v21.V16B(), bad_memory);
|
||||
__ ld1(v28.V16B(), v29.V16B(), bad_memory);
|
||||
__ ld1(v29.V16B(), bad_memory);
|
||||
__ ld1(v21.V16B(), bad_memory);
|
||||
__ ld1(v4.V16B(), bad_memory);
|
||||
__ ld1(v4.V1D(), v5.V1D(), v6.V1D(), v7.V1D(), bad_memory);
|
||||
__ ld1(v17.V1D(), v18.V1D(), v19.V1D(), v20.V1D(), bad_memory);
|
||||
__ ld1(v28.V1D(), v29.V1D(), v30.V1D(), v31.V1D(), bad_memory);
|
||||
__ ld1(v20.V1D(), v21.V1D(), v22.V1D(), bad_memory);
|
||||
__ ld1(v19.V1D(), v20.V1D(), v21.V1D(), bad_memory);
|
||||
__ ld1(v12.V1D(), v13.V1D(), v14.V1D(), bad_memory);
|
||||
__ ld1(v29.V1D(), v30.V1D(), bad_memory);
|
||||
__ ld1(v31.V1D(), v0.V1D(), bad_memory);
|
||||
__ ld1(v3.V1D(), v4.V1D(), bad_memory);
|
||||
__ ld1(v28.V1D(), bad_memory);
|
||||
__ ld1(v11.V1D(), bad_memory);
|
||||
__ ld1(v29.V1D(), bad_memory);
|
||||
__ ld1(v28.V2D(), v29.V2D(), v30.V2D(), v31.V2D(), bad_memory);
|
||||
__ ld1(v8.V2D(), v9.V2D(), v10.V2D(), v11.V2D(), bad_memory);
|
||||
__ ld1(v14.V2D(), v15.V2D(), v16.V2D(), v17.V2D(), bad_memory);
|
||||
__ ld1(v26.V2D(), v27.V2D(), v28.V2D(), bad_memory);
|
||||
__ ld1(v5.V2D(), v6.V2D(), v7.V2D(), bad_memory);
|
||||
__ ld1(v26.V2D(), v27.V2D(), v28.V2D(), bad_memory);
|
||||
__ ld1(v18.V2D(), v19.V2D(), bad_memory);
|
||||
__ ld1(v21.V2D(), v22.V2D(), bad_memory);
|
||||
__ ld1(v17.V2D(), v18.V2D(), bad_memory);
|
||||
__ ld1(v5.V2D(), bad_memory);
|
||||
__ ld1(v6.V2D(), bad_memory);
|
||||
__ ld1(v15.V2D(), bad_memory);
|
||||
__ ld1(v30.V2S(), v31.V2S(), v0.V2S(), v1.V2S(), bad_memory);
|
||||
__ ld1(v24.V2S(), v25.V2S(), v26.V2S(), v27.V2S(), bad_memory);
|
||||
__ ld1(v27.V2S(), v28.V2S(), v29.V2S(), v30.V2S(), bad_memory);
|
||||
__ ld1(v11.V2S(), v12.V2S(), v13.V2S(), bad_memory);
|
||||
__ ld1(v8.V2S(), v9.V2S(), v10.V2S(), bad_memory);
|
||||
__ ld1(v31.V2S(), v0.V2S(), v1.V2S(), bad_memory);
|
||||
__ ld1(v0.V2S(), v1.V2S(), bad_memory);
|
||||
__ ld1(v13.V2S(), v14.V2S(), bad_memory);
|
||||
__ ld1(v3.V2S(), v4.V2S(), bad_memory);
|
||||
__ ld1(v26.V2S(), bad_memory);
|
||||
__ ld1(v0.V2S(), bad_memory);
|
||||
__ ld1(v11.V2S(), bad_memory);
|
||||
__ ld1(v16.V4H(), v17.V4H(), v18.V4H(), v19.V4H(), bad_memory);
|
||||
__ ld1(v24.V4H(), v25.V4H(), v26.V4H(), v27.V4H(), bad_memory);
|
||||
__ ld1(v1.V4H(), v2.V4H(), v3.V4H(), v4.V4H(), bad_memory);
|
||||
__ ld1(v30.V4H(), v31.V4H(), v0.V4H(), bad_memory);
|
||||
__ ld1(v25.V4H(), v26.V4H(), v27.V4H(), bad_memory);
|
||||
__ ld1(v3.V4H(), v4.V4H(), v5.V4H(), bad_memory);
|
||||
__ ld1(v3.V4H(), v4.V4H(), bad_memory);
|
||||
__ ld1(v3.V4H(), v4.V4H(), bad_memory);
|
||||
__ ld1(v23.V4H(), v24.V4H(), bad_memory);
|
||||
__ ld1(v26.V4H(), bad_memory);
|
||||
__ ld1(v1.V4H(), bad_memory);
|
||||
__ ld1(v14.V4H(), bad_memory);
|
||||
__ ld1(v26.V4S(), v27.V4S(), v28.V4S(), v29.V4S(), bad_memory);
|
||||
__ ld1(v28.V4S(), v29.V4S(), v30.V4S(), v31.V4S(), bad_memory);
|
||||
__ ld1(v4.V4S(), v5.V4S(), v6.V4S(), v7.V4S(), bad_memory);
|
||||
__ ld1(v2.V4S(), v3.V4S(), v4.V4S(), bad_memory);
|
||||
__ ld1(v22.V4S(), v23.V4S(), v24.V4S(), bad_memory);
|
||||
__ ld1(v15.V4S(), v16.V4S(), v17.V4S(), bad_memory);
|
||||
__ ld1(v20.V4S(), v21.V4S(), bad_memory);
|
||||
__ ld1(v30.V4S(), v31.V4S(), bad_memory);
|
||||
__ ld1(v11.V4S(), v12.V4S(), bad_memory);
|
||||
__ ld1(v15.V4S(), bad_memory);
|
||||
__ ld1(v12.V4S(), bad_memory);
|
||||
__ ld1(v0.V4S(), bad_memory);
|
||||
__ ld1(v17.V8B(), v18.V8B(), v19.V8B(), v20.V8B(), bad_memory);
|
||||
__ ld1(v5.V8B(), v6.V8B(), v7.V8B(), v8.V8B(), bad_memory);
|
||||
__ ld1(v9.V8B(), v10.V8B(), v11.V8B(), v12.V8B(), bad_memory);
|
||||
__ ld1(v4.V8B(), v5.V8B(), v6.V8B(), bad_memory);
|
||||
__ ld1(v2.V8B(), v3.V8B(), v4.V8B(), bad_memory);
|
||||
__ ld1(v12.V8B(), v13.V8B(), v14.V8B(), bad_memory);
|
||||
__ ld1(v10.V8B(), v11.V8B(), bad_memory);
|
||||
__ ld1(v11.V8B(), v12.V8B(), bad_memory);
|
||||
__ ld1(v27.V8B(), v28.V8B(), bad_memory);
|
||||
__ ld1(v31.V8B(), bad_memory);
|
||||
__ ld1(v10.V8B(), bad_memory);
|
||||
__ ld1(v28.V8B(), bad_memory);
|
||||
__ ld1(v5.V8H(), v6.V8H(), v7.V8H(), v8.V8H(), bad_memory);
|
||||
__ ld1(v2.V8H(), v3.V8H(), v4.V8H(), v5.V8H(), bad_memory);
|
||||
__ ld1(v10.V8H(), v11.V8H(), v12.V8H(), v13.V8H(), bad_memory);
|
||||
__ ld1(v26.V8H(), v27.V8H(), v28.V8H(), bad_memory);
|
||||
__ ld1(v3.V8H(), v4.V8H(), v5.V8H(), bad_memory);
|
||||
__ ld1(v17.V8H(), v18.V8H(), v19.V8H(), bad_memory);
|
||||
__ ld1(v4.V8H(), v5.V8H(), bad_memory);
|
||||
__ ld1(v21.V8H(), v22.V8H(), bad_memory);
|
||||
__ ld1(v4.V8H(), v5.V8H(), bad_memory);
|
||||
__ ld1(v9.V8H(), bad_memory);
|
||||
__ ld1(v27.V8H(), bad_memory);
|
||||
__ ld1(v26.V8H(), bad_memory);
|
||||
__ ld1(v19.B(), 1, bad_memory);
|
||||
__ ld1(v12.B(), 3, bad_memory);
|
||||
__ ld1(v27.B(), 12, bad_memory);
|
||||
__ ld1(v10.D(), 1, bad_memory);
|
||||
__ ld1(v26.D(), 1, bad_memory);
|
||||
__ ld1(v7.D(), 1, bad_memory);
|
||||
__ ld1(v19.H(), 5, bad_memory);
|
||||
__ ld1(v10.H(), 1, bad_memory);
|
||||
__ ld1(v5.H(), 4, bad_memory);
|
||||
__ ld1(v21.S(), 2, bad_memory);
|
||||
__ ld1(v13.S(), 2, bad_memory);
|
||||
__ ld1(v1.S(), 2, bad_memory);
|
||||
__ ld1r(v2.V16B(), bad_memory);
|
||||
__ ld1r(v2.V16B(), bad_memory);
|
||||
__ ld1r(v22.V16B(), bad_memory);
|
||||
__ ld1r(v25.V1D(), bad_memory);
|
||||
__ ld1r(v9.V1D(), bad_memory);
|
||||
__ ld1r(v23.V1D(), bad_memory);
|
||||
__ ld1r(v19.V2D(), bad_memory);
|
||||
__ ld1r(v21.V2D(), bad_memory);
|
||||
__ ld1r(v30.V2D(), bad_memory);
|
||||
__ ld1r(v24.V2S(), bad_memory);
|
||||
__ ld1r(v26.V2S(), bad_memory);
|
||||
__ ld1r(v28.V2S(), bad_memory);
|
||||
__ ld1r(v19.V4H(), bad_memory);
|
||||
__ ld1r(v1.V4H(), bad_memory);
|
||||
__ ld1r(v21.V4H(), bad_memory);
|
||||
__ ld1r(v15.V4S(), bad_memory);
|
||||
__ ld1r(v21.V4S(), bad_memory);
|
||||
__ ld1r(v23.V4S(), bad_memory);
|
||||
__ ld1r(v26.V8B(), bad_memory);
|
||||
__ ld1r(v14.V8B(), bad_memory);
|
||||
__ ld1r(v19.V8B(), bad_memory);
|
||||
__ ld1r(v13.V8H(), bad_memory);
|
||||
__ ld1r(v30.V8H(), bad_memory);
|
||||
__ ld1r(v27.V8H(), bad_memory);
|
||||
__ ld2(v21.V16B(), v22.V16B(), bad_memory);
|
||||
__ ld2(v21.V16B(), v22.V16B(), bad_memory);
|
||||
__ ld2(v12.V16B(), v13.V16B(), bad_memory);
|
||||
__ ld2(v14.V2D(), v15.V2D(), bad_memory);
|
||||
__ ld2(v0.V2D(), v1.V2D(), bad_memory);
|
||||
__ ld2(v12.V2D(), v13.V2D(), bad_memory);
|
||||
__ ld2(v27.V2S(), v28.V2S(), bad_memory);
|
||||
__ ld2(v2.V2S(), v3.V2S(), bad_memory);
|
||||
__ ld2(v12.V2S(), v13.V2S(), bad_memory);
|
||||
__ ld2(v9.V4H(), v10.V4H(), bad_memory);
|
||||
__ ld2(v23.V4H(), v24.V4H(), bad_memory);
|
||||
__ ld2(v1.V4H(), v2.V4H(), bad_memory);
|
||||
__ ld2(v20.V4S(), v21.V4S(), bad_memory);
|
||||
__ ld2(v10.V4S(), v11.V4S(), bad_memory);
|
||||
__ ld2(v24.V4S(), v25.V4S(), bad_memory);
|
||||
__ ld2(v17.V8B(), v18.V8B(), bad_memory);
|
||||
__ ld2(v13.V8B(), v14.V8B(), bad_memory);
|
||||
__ ld2(v7.V8B(), v8.V8B(), bad_memory);
|
||||
__ ld2(v30.V8H(), v31.V8H(), bad_memory);
|
||||
__ ld2(v4.V8H(), v5.V8H(), bad_memory);
|
||||
__ ld2(v13.V8H(), v14.V8H(), bad_memory);
|
||||
__ ld2(v5.B(), v6.B(), 12, bad_memory);
|
||||
__ ld2(v16.B(), v17.B(), 7, bad_memory);
|
||||
__ ld2(v29.B(), v30.B(), 2, bad_memory);
|
||||
__ ld2(v11.D(), v12.D(), 1, bad_memory);
|
||||
__ ld2(v26.D(), v27.D(), 0, bad_memory);
|
||||
__ ld2(v25.D(), v26.D(), 0, bad_memory);
|
||||
__ ld2(v18.H(), v19.H(), 7, bad_memory);
|
||||
__ ld2(v17.H(), v18.H(), 5, bad_memory);
|
||||
__ ld2(v30.H(), v31.H(), 2, bad_memory);
|
||||
__ ld2(v29.S(), v30.S(), 3, bad_memory);
|
||||
__ ld2(v28.S(), v29.S(), 0, bad_memory);
|
||||
__ ld2(v6.S(), v7.S(), 1, bad_memory);
|
||||
__ ld2r(v26.V16B(), v27.V16B(), bad_memory);
|
||||
__ ld2r(v21.V16B(), v22.V16B(), bad_memory);
|
||||
__ ld2r(v5.V16B(), v6.V16B(), bad_memory);
|
||||
__ ld2r(v26.V1D(), v27.V1D(), bad_memory);
|
||||
__ ld2r(v14.V1D(), v15.V1D(), bad_memory);
|
||||
__ ld2r(v23.V1D(), v24.V1D(), bad_memory);
|
||||
__ ld2r(v11.V2D(), v12.V2D(), bad_memory);
|
||||
__ ld2r(v29.V2D(), v30.V2D(), bad_memory);
|
||||
__ ld2r(v15.V2D(), v16.V2D(), bad_memory);
|
||||
__ ld2r(v26.V2S(), v27.V2S(), bad_memory);
|
||||
__ ld2r(v22.V2S(), v23.V2S(), bad_memory);
|
||||
__ ld2r(v2.V2S(), v3.V2S(), bad_memory);
|
||||
__ ld2r(v2.V4H(), v3.V4H(), bad_memory);
|
||||
__ ld2r(v9.V4H(), v10.V4H(), bad_memory);
|
||||
__ ld2r(v6.V4H(), v7.V4H(), bad_memory);
|
||||
__ ld2r(v7.V4S(), v8.V4S(), bad_memory);
|
||||
__ ld2r(v19.V4S(), v20.V4S(), bad_memory);
|
||||
__ ld2r(v21.V4S(), v22.V4S(), bad_memory);
|
||||
__ ld2r(v26.V8B(), v27.V8B(), bad_memory);
|
||||
__ ld2r(v20.V8B(), v21.V8B(), bad_memory);
|
||||
__ ld2r(v11.V8B(), v12.V8B(), bad_memory);
|
||||
__ ld2r(v12.V8H(), v13.V8H(), bad_memory);
|
||||
__ ld2r(v6.V8H(), v7.V8H(), bad_memory);
|
||||
__ ld2r(v25.V8H(), v26.V8H(), bad_memory);
|
||||
__ ld3(v20.V16B(), v21.V16B(), v22.V16B(), bad_memory);
|
||||
__ ld3(v28.V16B(), v29.V16B(), v30.V16B(), bad_memory);
|
||||
__ ld3(v20.V16B(), v21.V16B(), v22.V16B(), bad_memory);
|
||||
__ ld3(v21.V2D(), v22.V2D(), v23.V2D(), bad_memory);
|
||||
__ ld3(v18.V2D(), v19.V2D(), v20.V2D(), bad_memory);
|
||||
__ ld3(v27.V2D(), v28.V2D(), v29.V2D(), bad_memory);
|
||||
__ ld3(v7.V2S(), v8.V2S(), v9.V2S(), bad_memory);
|
||||
__ ld3(v20.V2S(), v21.V2S(), v22.V2S(), bad_memory);
|
||||
__ ld3(v26.V2S(), v27.V2S(), v28.V2S(), bad_memory);
|
||||
__ ld3(v27.V4H(), v28.V4H(), v29.V4H(), bad_memory);
|
||||
__ ld3(v28.V4H(), v29.V4H(), v30.V4H(), bad_memory);
|
||||
__ ld3(v7.V4H(), v8.V4H(), v9.V4H(), bad_memory);
|
||||
__ ld3(v2.V4S(), v3.V4S(), v4.V4S(), bad_memory);
|
||||
__ ld3(v24.V4S(), v25.V4S(), v26.V4S(), bad_memory);
|
||||
__ ld3(v11.V4S(), v12.V4S(), v13.V4S(), bad_memory);
|
||||
__ ld3(v29.V8B(), v30.V8B(), v31.V8B(), bad_memory);
|
||||
__ ld3(v1.V8B(), v2.V8B(), v3.V8B(), bad_memory);
|
||||
__ ld3(v12.V8B(), v13.V8B(), v14.V8B(), bad_memory);
|
||||
__ ld3(v22.V8H(), v23.V8H(), v24.V8H(), bad_memory);
|
||||
__ ld3(v13.V8H(), v14.V8H(), v15.V8H(), bad_memory);
|
||||
__ ld3(v28.V8H(), v29.V8H(), v30.V8H(), bad_memory);
|
||||
__ ld3(v21.B(), v22.B(), v23.B(), 11, bad_memory);
|
||||
__ ld3(v5.B(), v6.B(), v7.B(), 9, bad_memory);
|
||||
__ ld3(v23.B(), v24.B(), v25.B(), 0, bad_memory);
|
||||
__ ld3(v16.D(), v17.D(), v18.D(), 0, bad_memory);
|
||||
__ ld3(v30.D(), v31.D(), v0.D(), 0, bad_memory);
|
||||
__ ld3(v28.D(), v29.D(), v30.D(), 1, bad_memory);
|
||||
__ ld3(v13.H(), v14.H(), v15.H(), 2, bad_memory);
|
||||
__ ld3(v22.H(), v23.H(), v24.H(), 7, bad_memory);
|
||||
__ ld3(v14.H(), v15.H(), v16.H(), 3, bad_memory);
|
||||
__ ld3(v22.S(), v23.S(), v24.S(), 3, bad_memory);
|
||||
__ ld3(v30.S(), v31.S(), v0.S(), 2, bad_memory);
|
||||
__ ld3(v12.S(), v13.S(), v14.S(), 1, bad_memory);
|
||||
__ ld3r(v24.V16B(), v25.V16B(), v26.V16B(), bad_memory);
|
||||
__ ld3r(v24.V16B(), v25.V16B(), v26.V16B(), bad_memory);
|
||||
__ ld3r(v3.V16B(), v4.V16B(), v5.V16B(), bad_memory);
|
||||
__ ld3r(v4.V1D(), v5.V1D(), v6.V1D(), bad_memory);
|
||||
__ ld3r(v7.V1D(), v8.V1D(), v9.V1D(), bad_memory);
|
||||
__ ld3r(v17.V1D(), v18.V1D(), v19.V1D(), bad_memory);
|
||||
__ ld3r(v16.V2D(), v17.V2D(), v18.V2D(), bad_memory);
|
||||
__ ld3r(v20.V2D(), v21.V2D(), v22.V2D(), bad_memory);
|
||||
__ ld3r(v14.V2D(), v15.V2D(), v16.V2D(), bad_memory);
|
||||
__ ld3r(v10.V2S(), v11.V2S(), v12.V2S(), bad_memory);
|
||||
__ ld3r(v0.V2S(), v1.V2S(), v2.V2S(), bad_memory);
|
||||
__ ld3r(v23.V2S(), v24.V2S(), v25.V2S(), bad_memory);
|
||||
__ ld3r(v22.V4H(), v23.V4H(), v24.V4H(), bad_memory);
|
||||
__ ld3r(v6.V4H(), v7.V4H(), v8.V4H(), bad_memory);
|
||||
__ ld3r(v7.V4H(), v8.V4H(), v9.V4H(), bad_memory);
|
||||
__ ld3r(v26.V4S(), v27.V4S(), v28.V4S(), bad_memory);
|
||||
__ ld3r(v0.V4S(), v1.V4S(), v2.V4S(), bad_memory);
|
||||
__ ld3r(v30.V4S(), v31.V4S(), v0.V4S(), bad_memory);
|
||||
__ ld3r(v2.V8B(), v3.V8B(), v4.V8B(), bad_memory);
|
||||
__ ld3r(v10.V8B(), v11.V8B(), v12.V8B(), bad_memory);
|
||||
__ ld3r(v28.V8B(), v29.V8B(), v30.V8B(), bad_memory);
|
||||
__ ld3r(v6.V8H(), v7.V8H(), v8.V8H(), bad_memory);
|
||||
__ ld3r(v29.V8H(), v30.V8H(), v31.V8H(), bad_memory);
|
||||
__ ld3r(v7.V8H(), v8.V8H(), v9.V8H(), bad_memory);
|
||||
__ ld4(v3.V16B(), v4.V16B(), v5.V16B(), v6.V16B(), bad_memory);
|
||||
__ ld4(v2.V16B(), v3.V16B(), v4.V16B(), v5.V16B(), bad_memory);
|
||||
__ ld4(v5.V16B(), v6.V16B(), v7.V16B(), v8.V16B(), bad_memory);
|
||||
__ ld4(v18.V2D(), v19.V2D(), v20.V2D(), v21.V2D(), bad_memory);
|
||||
__ ld4(v4.V2D(), v5.V2D(), v6.V2D(), v7.V2D(), bad_memory);
|
||||
__ ld4(v29.V2D(), v30.V2D(), v31.V2D(), v0.V2D(), bad_memory);
|
||||
__ ld4(v27.V2S(), v28.V2S(), v29.V2S(), v30.V2S(), bad_memory);
|
||||
__ ld4(v24.V2S(), v25.V2S(), v26.V2S(), v27.V2S(), bad_memory);
|
||||
__ ld4(v4.V2S(), v5.V2S(), v6.V2S(), v7.V2S(), bad_memory);
|
||||
__ ld4(v16.V4H(), v17.V4H(), v18.V4H(), v19.V4H(), bad_memory);
|
||||
__ ld4(v23.V4H(), v24.V4H(), v25.V4H(), v26.V4H(), bad_memory);
|
||||
__ ld4(v2.V4H(), v3.V4H(), v4.V4H(), v5.V4H(), bad_memory);
|
||||
__ ld4(v7.V4S(), v8.V4S(), v9.V4S(), v10.V4S(), bad_memory);
|
||||
__ ld4(v28.V4S(), v29.V4S(), v30.V4S(), v31.V4S(), bad_memory);
|
||||
__ ld4(v29.V4S(), v30.V4S(), v31.V4S(), v0.V4S(), bad_memory);
|
||||
__ ld4(v15.V8B(), v16.V8B(), v17.V8B(), v18.V8B(), bad_memory);
|
||||
__ ld4(v27.V8B(), v28.V8B(), v29.V8B(), v30.V8B(), bad_memory);
|
||||
__ ld4(v5.V8B(), v6.V8B(), v7.V8B(), v8.V8B(), bad_memory);
|
||||
__ ld4(v25.V8H(), v26.V8H(), v27.V8H(), v28.V8H(), bad_memory);
|
||||
__ ld4(v2.V8H(), v3.V8H(), v4.V8H(), v5.V8H(), bad_memory);
|
||||
__ ld4(v20.V8H(), v21.V8H(), v22.V8H(), v23.V8H(), bad_memory);
|
||||
__ ld4(v20.B(), v21.B(), v22.B(), v23.B(), 3, bad_memory);
|
||||
__ ld4(v12.B(), v13.B(), v14.B(), v15.B(), 3, bad_memory);
|
||||
__ ld4(v27.B(), v28.B(), v29.B(), v30.B(), 6, bad_memory);
|
||||
__ ld4(v28.D(), v29.D(), v30.D(), v31.D(), 1, bad_memory);
|
||||
__ ld4(v15.D(), v16.D(), v17.D(), v18.D(), 1, bad_memory);
|
||||
__ ld4(v16.D(), v17.D(), v18.D(), v19.D(), 1, bad_memory);
|
||||
__ ld4(v2.H(), v3.H(), v4.H(), v5.H(), 6, bad_memory);
|
||||
__ ld4(v5.H(), v6.H(), v7.H(), v8.H(), 3, bad_memory);
|
||||
__ ld4(v7.H(), v8.H(), v9.H(), v10.H(), 6, bad_memory);
|
||||
__ ld4(v6.S(), v7.S(), v8.S(), v9.S(), 1, bad_memory);
|
||||
__ ld4(v25.S(), v26.S(), v27.S(), v28.S(), 2, bad_memory);
|
||||
__ ld4(v8.S(), v9.S(), v10.S(), v11.S(), 3, bad_memory);
|
||||
__ ld4r(v14.V16B(), v15.V16B(), v16.V16B(), v17.V16B(), bad_memory);
|
||||
__ ld4r(v13.V16B(), v14.V16B(), v15.V16B(), v16.V16B(), bad_memory);
|
||||
__ ld4r(v9.V16B(), v10.V16B(), v11.V16B(), v12.V16B(), bad_memory);
|
||||
__ ld4r(v8.V1D(), v9.V1D(), v10.V1D(), v11.V1D(), bad_memory);
|
||||
__ ld4r(v4.V1D(), v5.V1D(), v6.V1D(), v7.V1D(), bad_memory);
|
||||
__ ld4r(v26.V1D(), v27.V1D(), v28.V1D(), v29.V1D(), bad_memory);
|
||||
__ ld4r(v19.V2D(), v20.V2D(), v21.V2D(), v22.V2D(), bad_memory);
|
||||
__ ld4r(v28.V2D(), v29.V2D(), v30.V2D(), v31.V2D(), bad_memory);
|
||||
__ ld4r(v15.V2D(), v16.V2D(), v17.V2D(), v18.V2D(), bad_memory);
|
||||
__ ld4r(v31.V2S(), v0.V2S(), v1.V2S(), v2.V2S(), bad_memory);
|
||||
__ ld4r(v28.V2S(), v29.V2S(), v30.V2S(), v31.V2S(), bad_memory);
|
||||
__ ld4r(v11.V2S(), v12.V2S(), v13.V2S(), v14.V2S(), bad_memory);
|
||||
__ ld4r(v19.V4H(), v20.V4H(), v21.V4H(), v22.V4H(), bad_memory);
|
||||
__ ld4r(v22.V4H(), v23.V4H(), v24.V4H(), v25.V4H(), bad_memory);
|
||||
__ ld4r(v20.V4H(), v21.V4H(), v22.V4H(), v23.V4H(), bad_memory);
|
||||
__ ld4r(v16.V4S(), v17.V4S(), v18.V4S(), v19.V4S(), bad_memory);
|
||||
__ ld4r(v25.V4S(), v26.V4S(), v27.V4S(), v28.V4S(), bad_memory);
|
||||
__ ld4r(v23.V4S(), v24.V4S(), v25.V4S(), v26.V4S(), bad_memory);
|
||||
__ ld4r(v22.V8B(), v23.V8B(), v24.V8B(), v25.V8B(), bad_memory);
|
||||
__ ld4r(v27.V8B(), v28.V8B(), v29.V8B(), v30.V8B(), bad_memory);
|
||||
__ ld4r(v29.V8B(), v30.V8B(), v31.V8B(), v0.V8B(), bad_memory);
|
||||
__ ld4r(v28.V8H(), v29.V8H(), v30.V8H(), v31.V8H(), bad_memory);
|
||||
__ ld4r(v25.V8H(), v26.V8H(), v27.V8H(), v28.V8H(), bad_memory);
|
||||
__ ld4r(v22.V8H(), v23.V8H(), v24.V8H(), v25.V8H(), bad_memory);
|
||||
|
||||
__ st1(v18.V16B(), v19.V16B(), v20.V16B(), v21.V16B(), bad_memory);
|
||||
__ st1(v10.V16B(), v11.V16B(), v12.V16B(), v13.V16B(), bad_memory);
|
||||
__ st1(v27.V16B(), v28.V16B(), v29.V16B(), v30.V16B(), bad_memory);
|
||||
__ st1(v16.V16B(), v17.V16B(), v18.V16B(), bad_memory);
|
||||
__ st1(v21.V16B(), v22.V16B(), v23.V16B(), bad_memory);
|
||||
__ st1(v9.V16B(), v10.V16B(), v11.V16B(), bad_memory);
|
||||
__ st1(v7.V16B(), v8.V16B(), bad_memory);
|
||||
__ st1(v26.V16B(), v27.V16B(), bad_memory);
|
||||
__ st1(v22.V16B(), v23.V16B(), bad_memory);
|
||||
__ st1(v23.V16B(), bad_memory);
|
||||
__ st1(v28.V16B(), bad_memory);
|
||||
__ st1(v2.V16B(), bad_memory);
|
||||
__ st1(v29.V1D(), v30.V1D(), v31.V1D(), v0.V1D(), bad_memory);
|
||||
__ st1(v12.V1D(), v13.V1D(), v14.V1D(), v15.V1D(), bad_memory);
|
||||
__ st1(v30.V1D(), v31.V1D(), v0.V1D(), v1.V1D(), bad_memory);
|
||||
__ st1(v16.V1D(), v17.V1D(), v18.V1D(), bad_memory);
|
||||
__ st1(v3.V1D(), v4.V1D(), v5.V1D(), bad_memory);
|
||||
__ st1(v14.V1D(), v15.V1D(), v16.V1D(), bad_memory);
|
||||
__ st1(v18.V1D(), v19.V1D(), bad_memory);
|
||||
__ st1(v5.V1D(), v6.V1D(), bad_memory);
|
||||
__ st1(v2.V1D(), v3.V1D(), bad_memory);
|
||||
__ st1(v4.V1D(), bad_memory);
|
||||
__ st1(v27.V1D(), bad_memory);
|
||||
__ st1(v23.V1D(), bad_memory);
|
||||
__ st1(v2.V2D(), v3.V2D(), v4.V2D(), v5.V2D(), bad_memory);
|
||||
__ st1(v22.V2D(), v23.V2D(), v24.V2D(), v25.V2D(), bad_memory);
|
||||
__ st1(v28.V2D(), v29.V2D(), v30.V2D(), v31.V2D(), bad_memory);
|
||||
__ st1(v17.V2D(), v18.V2D(), v19.V2D(), bad_memory);
|
||||
__ st1(v16.V2D(), v17.V2D(), v18.V2D(), bad_memory);
|
||||
__ st1(v22.V2D(), v23.V2D(), v24.V2D(), bad_memory);
|
||||
__ st1(v21.V2D(), v22.V2D(), bad_memory);
|
||||
__ st1(v6.V2D(), v7.V2D(), bad_memory);
|
||||
__ st1(v27.V2D(), v28.V2D(), bad_memory);
|
||||
__ st1(v21.V2D(), bad_memory);
|
||||
__ st1(v29.V2D(), bad_memory);
|
||||
__ st1(v20.V2D(), bad_memory);
|
||||
__ st1(v22.V2S(), v23.V2S(), v24.V2S(), v25.V2S(), bad_memory);
|
||||
__ st1(v8.V2S(), v9.V2S(), v10.V2S(), v11.V2S(), bad_memory);
|
||||
__ st1(v15.V2S(), v16.V2S(), v17.V2S(), v18.V2S(), bad_memory);
|
||||
__ st1(v2.V2S(), v3.V2S(), v4.V2S(), bad_memory);
|
||||
__ st1(v23.V2S(), v24.V2S(), v25.V2S(), bad_memory);
|
||||
__ st1(v7.V2S(), v8.V2S(), v9.V2S(), bad_memory);
|
||||
__ st1(v28.V2S(), v29.V2S(), bad_memory);
|
||||
__ st1(v29.V2S(), v30.V2S(), bad_memory);
|
||||
__ st1(v23.V2S(), v24.V2S(), bad_memory);
|
||||
__ st1(v6.V2S(), bad_memory);
|
||||
__ st1(v11.V2S(), bad_memory);
|
||||
__ st1(v17.V2S(), bad_memory);
|
||||
__ st1(v6.V4H(), v7.V4H(), v8.V4H(), v9.V4H(), bad_memory);
|
||||
__ st1(v9.V4H(), v10.V4H(), v11.V4H(), v12.V4H(), bad_memory);
|
||||
__ st1(v25.V4H(), v26.V4H(), v27.V4H(), v28.V4H(), bad_memory);
|
||||
__ st1(v11.V4H(), v12.V4H(), v13.V4H(), bad_memory);
|
||||
__ st1(v10.V4H(), v11.V4H(), v12.V4H(), bad_memory);
|
||||
__ st1(v12.V4H(), v13.V4H(), v14.V4H(), bad_memory);
|
||||
__ st1(v13.V4H(), v14.V4H(), bad_memory);
|
||||
__ st1(v15.V4H(), v16.V4H(), bad_memory);
|
||||
__ st1(v21.V4H(), v22.V4H(), bad_memory);
|
||||
__ st1(v16.V4H(), bad_memory);
|
||||
__ st1(v8.V4H(), bad_memory);
|
||||
__ st1(v30.V4H(), bad_memory);
|
||||
__ st1(v3.V4S(), v4.V4S(), v5.V4S(), v6.V4S(), bad_memory);
|
||||
__ st1(v25.V4S(), v26.V4S(), v27.V4S(), v28.V4S(), bad_memory);
|
||||
__ st1(v5.V4S(), v6.V4S(), v7.V4S(), v8.V4S(), bad_memory);
|
||||
__ st1(v31.V4S(), v0.V4S(), v1.V4S(), bad_memory);
|
||||
__ st1(v30.V4S(), v31.V4S(), v0.V4S(), bad_memory);
|
||||
__ st1(v6.V4S(), v7.V4S(), v8.V4S(), bad_memory);
|
||||
__ st1(v17.V4S(), v18.V4S(), bad_memory);
|
||||
__ st1(v31.V4S(), v0.V4S(), bad_memory);
|
||||
__ st1(v1.V4S(), v2.V4S(), bad_memory);
|
||||
__ st1(v26.V4S(), bad_memory);
|
||||
__ st1(v15.V4S(), bad_memory);
|
||||
__ st1(v13.V4S(), bad_memory);
|
||||
__ st1(v26.V8B(), v27.V8B(), v28.V8B(), v29.V8B(), bad_memory);
|
||||
__ st1(v10.V8B(), v11.V8B(), v12.V8B(), v13.V8B(), bad_memory);
|
||||
__ st1(v15.V8B(), v16.V8B(), v17.V8B(), v18.V8B(), bad_memory);
|
||||
__ st1(v19.V8B(), v20.V8B(), v21.V8B(), bad_memory);
|
||||
__ st1(v31.V8B(), v0.V8B(), v1.V8B(), bad_memory);
|
||||
__ st1(v9.V8B(), v10.V8B(), v11.V8B(), bad_memory);
|
||||
__ st1(v12.V8B(), v13.V8B(), bad_memory);
|
||||
__ st1(v2.V8B(), v3.V8B(), bad_memory);
|
||||
__ st1(v0.V8B(), v1.V8B(), bad_memory);
|
||||
__ st1(v16.V8B(), bad_memory);
|
||||
__ st1(v25.V8B(), bad_memory);
|
||||
__ st1(v31.V8B(), bad_memory);
|
||||
__ st1(v4.V8H(), v5.V8H(), v6.V8H(), v7.V8H(), bad_memory);
|
||||
__ st1(v3.V8H(), v4.V8H(), v5.V8H(), v6.V8H(), bad_memory);
|
||||
__ st1(v26.V8H(), v27.V8H(), v28.V8H(), v29.V8H(), bad_memory);
|
||||
__ st1(v10.V8H(), v11.V8H(), v12.V8H(), bad_memory);
|
||||
__ st1(v21.V8H(), v22.V8H(), v23.V8H(), bad_memory);
|
||||
__ st1(v18.V8H(), v19.V8H(), v20.V8H(), bad_memory);
|
||||
__ st1(v26.V8H(), v27.V8H(), bad_memory);
|
||||
__ st1(v24.V8H(), v25.V8H(), bad_memory);
|
||||
__ st1(v17.V8H(), v18.V8H(), bad_memory);
|
||||
__ st1(v29.V8H(), bad_memory);
|
||||
__ st1(v19.V8H(), bad_memory);
|
||||
__ st1(v23.V8H(), bad_memory);
|
||||
__ st1(v19.B(), 15, bad_memory);
|
||||
__ st1(v25.B(), 9, bad_memory);
|
||||
__ st1(v4.B(), 8, bad_memory);
|
||||
__ st1(v13.D(), 0, bad_memory);
|
||||
__ st1(v30.D(), 0, bad_memory);
|
||||
__ st1(v3.D(), 0, bad_memory);
|
||||
__ st1(v22.H(), 0, bad_memory);
|
||||
__ st1(v31.H(), 7, bad_memory);
|
||||
__ st1(v23.H(), 3, bad_memory);
|
||||
__ st1(v0.S(), 0, bad_memory);
|
||||
__ st1(v11.S(), 3, bad_memory);
|
||||
__ st1(v24.S(), 3, bad_memory);
|
||||
__ st2(v7.V16B(), v8.V16B(), bad_memory);
|
||||
__ st2(v5.V16B(), v6.V16B(), bad_memory);
|
||||
__ st2(v18.V16B(), v19.V16B(), bad_memory);
|
||||
__ st2(v14.V2D(), v15.V2D(), bad_memory);
|
||||
__ st2(v7.V2D(), v8.V2D(), bad_memory);
|
||||
__ st2(v24.V2D(), v25.V2D(), bad_memory);
|
||||
__ st2(v22.V2S(), v23.V2S(), bad_memory);
|
||||
__ st2(v4.V2S(), v5.V2S(), bad_memory);
|
||||
__ st2(v2.V2S(), v3.V2S(), bad_memory);
|
||||
__ st2(v23.V4H(), v24.V4H(), bad_memory);
|
||||
__ st2(v8.V4H(), v9.V4H(), bad_memory);
|
||||
__ st2(v7.V4H(), v8.V4H(), bad_memory);
|
||||
__ st2(v17.V4S(), v18.V4S(), bad_memory);
|
||||
__ st2(v6.V4S(), v7.V4S(), bad_memory);
|
||||
__ st2(v26.V4S(), v27.V4S(), bad_memory);
|
||||
__ st2(v31.V8B(), v0.V8B(), bad_memory);
|
||||
__ st2(v0.V8B(), v1.V8B(), bad_memory);
|
||||
__ st2(v21.V8B(), v22.V8B(), bad_memory);
|
||||
__ st2(v7.V8H(), v8.V8H(), bad_memory);
|
||||
__ st2(v22.V8H(), v23.V8H(), bad_memory);
|
||||
__ st2(v4.V8H(), v5.V8H(), bad_memory);
|
||||
__ st2(v8.B(), v9.B(), 15, bad_memory);
|
||||
__ st2(v8.B(), v9.B(), 15, bad_memory);
|
||||
__ st2(v7.B(), v8.B(), 4, bad_memory);
|
||||
__ st2(v25.D(), v26.D(), 0, bad_memory);
|
||||
__ st2(v17.D(), v18.D(), 1, bad_memory);
|
||||
__ st2(v3.D(), v4.D(), 1, bad_memory);
|
||||
__ st2(v4.H(), v5.H(), 3, bad_memory);
|
||||
__ st2(v0.H(), v1.H(), 5, bad_memory);
|
||||
__ st2(v22.H(), v23.H(), 2, bad_memory);
|
||||
__ st2(v14.S(), v15.S(), 3, bad_memory);
|
||||
__ st2(v23.S(), v24.S(), 3, bad_memory);
|
||||
__ st2(v0.S(), v1.S(), 2, bad_memory);
|
||||
__ st3(v26.V16B(), v27.V16B(), v28.V16B(), bad_memory);
|
||||
__ st3(v21.V16B(), v22.V16B(), v23.V16B(), bad_memory);
|
||||
__ st3(v24.V16B(), v25.V16B(), v26.V16B(), bad_memory);
|
||||
__ st3(v17.V2D(), v18.V2D(), v19.V2D(), bad_memory);
|
||||
__ st3(v23.V2D(), v24.V2D(), v25.V2D(), bad_memory);
|
||||
__ st3(v10.V2D(), v11.V2D(), v12.V2D(), bad_memory);
|
||||
__ st3(v9.V2S(), v10.V2S(), v11.V2S(), bad_memory);
|
||||
__ st3(v13.V2S(), v14.V2S(), v15.V2S(), bad_memory);
|
||||
__ st3(v22.V2S(), v23.V2S(), v24.V2S(), bad_memory);
|
||||
__ st3(v31.V4H(), v0.V4H(), v1.V4H(), bad_memory);
|
||||
__ st3(v8.V4H(), v9.V4H(), v10.V4H(), bad_memory);
|
||||
__ st3(v19.V4H(), v20.V4H(), v21.V4H(), bad_memory);
|
||||
__ st3(v18.V4S(), v19.V4S(), v20.V4S(), bad_memory);
|
||||
__ st3(v25.V4S(), v26.V4S(), v27.V4S(), bad_memory);
|
||||
__ st3(v16.V4S(), v17.V4S(), v18.V4S(), bad_memory);
|
||||
__ st3(v27.V8B(), v28.V8B(), v29.V8B(), bad_memory);
|
||||
__ st3(v29.V8B(), v30.V8B(), v31.V8B(), bad_memory);
|
||||
__ st3(v30.V8B(), v31.V8B(), v0.V8B(), bad_memory);
|
||||
__ st3(v8.V8H(), v9.V8H(), v10.V8H(), bad_memory);
|
||||
__ st3(v18.V8H(), v19.V8H(), v20.V8H(), bad_memory);
|
||||
__ st3(v18.V8H(), v19.V8H(), v20.V8H(), bad_memory);
|
||||
__ st3(v31.B(), v0.B(), v1.B(), 10, bad_memory);
|
||||
__ st3(v4.B(), v5.B(), v6.B(), 5, bad_memory);
|
||||
__ st3(v5.B(), v6.B(), v7.B(), 1, bad_memory);
|
||||
__ st3(v5.D(), v6.D(), v7.D(), 0, bad_memory);
|
||||
__ st3(v6.D(), v7.D(), v8.D(), 0, bad_memory);
|
||||
__ st3(v0.D(), v1.D(), v2.D(), 0, bad_memory);
|
||||
__ st3(v31.H(), v0.H(), v1.H(), 2, bad_memory);
|
||||
__ st3(v14.H(), v15.H(), v16.H(), 5, bad_memory);
|
||||
__ st3(v21.H(), v22.H(), v23.H(), 6, bad_memory);
|
||||
__ st3(v21.S(), v22.S(), v23.S(), 0, bad_memory);
|
||||
__ st3(v11.S(), v12.S(), v13.S(), 1, bad_memory);
|
||||
__ st3(v15.S(), v16.S(), v17.S(), 0, bad_memory);
|
||||
__ st4(v22.V16B(), v23.V16B(), v24.V16B(), v25.V16B(), bad_memory);
|
||||
__ st4(v24.V16B(), v25.V16B(), v26.V16B(), v27.V16B(), bad_memory);
|
||||
__ st4(v15.V16B(), v16.V16B(), v17.V16B(), v18.V16B(), bad_memory);
|
||||
__ st4(v16.V2D(), v17.V2D(), v18.V2D(), v19.V2D(), bad_memory);
|
||||
__ st4(v17.V2D(), v18.V2D(), v19.V2D(), v20.V2D(), bad_memory);
|
||||
__ st4(v9.V2D(), v10.V2D(), v11.V2D(), v12.V2D(), bad_memory);
|
||||
__ st4(v23.V2S(), v24.V2S(), v25.V2S(), v26.V2S(), bad_memory);
|
||||
__ st4(v15.V2S(), v16.V2S(), v17.V2S(), v18.V2S(), bad_memory);
|
||||
__ st4(v24.V2S(), v25.V2S(), v26.V2S(), v27.V2S(), bad_memory);
|
||||
__ st4(v14.V4H(), v15.V4H(), v16.V4H(), v17.V4H(), bad_memory);
|
||||
__ st4(v18.V4H(), v19.V4H(), v20.V4H(), v21.V4H(), bad_memory);
|
||||
__ st4(v1.V4H(), v2.V4H(), v3.V4H(), v4.V4H(), bad_memory);
|
||||
__ st4(v13.V4S(), v14.V4S(), v15.V4S(), v16.V4S(), bad_memory);
|
||||
__ st4(v6.V4S(), v7.V4S(), v8.V4S(), v9.V4S(), bad_memory);
|
||||
__ st4(v15.V4S(), v16.V4S(), v17.V4S(), v18.V4S(), bad_memory);
|
||||
__ st4(v26.V8B(), v27.V8B(), v28.V8B(), v29.V8B(), bad_memory);
|
||||
__ st4(v25.V8B(), v26.V8B(), v27.V8B(), v28.V8B(), bad_memory);
|
||||
__ st4(v19.V8B(), v20.V8B(), v21.V8B(), v22.V8B(), bad_memory);
|
||||
__ st4(v19.V8H(), v20.V8H(), v21.V8H(), v22.V8H(), bad_memory);
|
||||
__ st4(v15.V8H(), v16.V8H(), v17.V8H(), v18.V8H(), bad_memory);
|
||||
__ st4(v31.V8H(), v0.V8H(), v1.V8H(), v2.V8H(), bad_memory);
|
||||
__ st4(v0.B(), v1.B(), v2.B(), v3.B(), 13, bad_memory);
|
||||
__ st4(v4.B(), v5.B(), v6.B(), v7.B(), 10, bad_memory);
|
||||
__ st4(v9.B(), v10.B(), v11.B(), v12.B(), 9, bad_memory);
|
||||
__ st4(v2.D(), v3.D(), v4.D(), v5.D(), 1, bad_memory);
|
||||
__ st4(v7.D(), v8.D(), v9.D(), v10.D(), 0, bad_memory);
|
||||
__ st4(v31.D(), v0.D(), v1.D(), v2.D(), 1, bad_memory);
|
||||
__ st4(v2.H(), v3.H(), v4.H(), v5.H(), 1, bad_memory);
|
||||
__ st4(v27.H(), v28.H(), v29.H(), v30.H(), 3, bad_memory);
|
||||
__ st4(v24.H(), v25.H(), v26.H(), v27.H(), 4, bad_memory);
|
||||
__ st4(v18.S(), v19.S(), v20.S(), v21.S(), 2, bad_memory);
|
||||
__ st4(v6.S(), v7.S(), v8.S(), v9.S(), 2, bad_memory);
|
||||
__ st4(v25.S(), v26.S(), v27.S(), v28.S(), 1, bad_memory);
|
||||
|
||||
END_IMPLICIT_CHECK();
|
||||
TRY_RUN_IMPLICIT_CHECK();
|
||||
}
|
||||
|
||||
TEST(ImplicitCheckSve) {
|
||||
SETUP_WITH_FEATURES(CPUFeatures::kSVE,
|
||||
CPUFeatures::kSVE2,
|
||||
CPUFeatures::kNEON);
|
||||
START_IMPLICIT_CHECK();
|
||||
|
||||
SVEMemOperand bad_sve_memory = SVEMemOperand(ip0);
|
||||
|
||||
EmissionCheckScope guard(&masm, masm.GetBuffer()->GetRemainingBytes());
|
||||
// Simple, unpredicated loads and stores.
|
||||
__ Str(p12.VnD(), bad_sve_memory);
|
||||
__ Str(p13.VnS(), bad_sve_memory);
|
||||
__ Str(p14.VnH(), bad_sve_memory);
|
||||
__ Str(p15.VnB(), bad_sve_memory);
|
||||
__ Ldr(p8.VnD(), bad_sve_memory);
|
||||
__ Ldr(p9.VnS(), bad_sve_memory);
|
||||
__ Ldr(p10.VnH(), bad_sve_memory);
|
||||
__ Ldr(p11.VnB(), bad_sve_memory);
|
||||
|
||||
__ Str(z0.VnD(), bad_sve_memory);
|
||||
__ Str(z1.VnS(), bad_sve_memory);
|
||||
__ Str(z2.VnH(), bad_sve_memory);
|
||||
__ Str(z3.VnB(), bad_sve_memory);
|
||||
__ Ldr(z20.VnD(), bad_sve_memory);
|
||||
__ Ldr(z21.VnS(), bad_sve_memory);
|
||||
__ Ldr(z22.VnH(), bad_sve_memory);
|
||||
__ Ldr(z23.VnB(), bad_sve_memory);
|
||||
|
||||
// Structured accesses.
|
||||
__ St1b(z0.VnB(), p2, bad_sve_memory);
|
||||
__ St1h(z1.VnH(), p1, bad_sve_memory);
|
||||
__ St1w(z2.VnS(), p1, bad_sve_memory);
|
||||
__ St1d(z3.VnD(), p2, bad_sve_memory);
|
||||
__ Ld1b(z20.VnB(), p1.Zeroing(), bad_sve_memory);
|
||||
__ Ld1h(z21.VnH(), p2.Zeroing(), bad_sve_memory);
|
||||
__ Ld1w(z22.VnS(), p1.Zeroing(), bad_sve_memory);
|
||||
__ Ld1d(z23.VnD(), p1.Zeroing(), bad_sve_memory);
|
||||
|
||||
// Structured, packed accesses.
|
||||
__ St1b(z2.VnH(), p1, bad_sve_memory);
|
||||
__ St1b(z3.VnS(), p2, bad_sve_memory);
|
||||
__ St1b(z4.VnD(), p2, bad_sve_memory);
|
||||
__ St1h(z0.VnS(), p1, bad_sve_memory);
|
||||
__ St1h(z1.VnD(), p1, bad_sve_memory);
|
||||
__ St1w(z2.VnD(), p1, bad_sve_memory);
|
||||
__ Ld1b(z20.VnH(), p1.Zeroing(), bad_sve_memory);
|
||||
__ Ld1b(z21.VnS(), p1.Zeroing(), bad_sve_memory);
|
||||
__ Ld1b(z22.VnD(), p1.Zeroing(), bad_sve_memory);
|
||||
__ Ld1h(z23.VnS(), p2.Zeroing(), bad_sve_memory);
|
||||
__ Ld1h(z24.VnD(), p2.Zeroing(), bad_sve_memory);
|
||||
__ Ld1w(z20.VnD(), p1.Zeroing(), bad_sve_memory);
|
||||
__ Ld1sb(z21.VnH(), p1.Zeroing(), bad_sve_memory);
|
||||
__ Ld1sb(z22.VnS(), p1.Zeroing(), bad_sve_memory);
|
||||
__ Ld1sb(z23.VnD(), p2.Zeroing(), bad_sve_memory);
|
||||
__ Ld1sh(z24.VnS(), p2.Zeroing(), bad_sve_memory);
|
||||
__ Ld1sh(z20.VnD(), p1.Zeroing(), bad_sve_memory);
|
||||
__ Ld1sw(z21.VnD(), p1.Zeroing(), bad_sve_memory);
|
||||
|
||||
// Structured, interleaved accesses.
|
||||
__ St2b(z0.VnB(), z1.VnB(), p4, bad_sve_memory);
|
||||
__ St2h(z1.VnH(), z2.VnH(), p4, bad_sve_memory);
|
||||
__ St2w(z2.VnS(), z3.VnS(), p3, bad_sve_memory);
|
||||
__ St2d(z3.VnD(), z4.VnD(), p4, bad_sve_memory);
|
||||
__ Ld2b(z20.VnB(), z21.VnB(), p5.Zeroing(), bad_sve_memory);
|
||||
__ Ld2h(z21.VnH(), z22.VnH(), p6.Zeroing(), bad_sve_memory);
|
||||
__ Ld2w(z22.VnS(), z23.VnS(), p6.Zeroing(), bad_sve_memory);
|
||||
__ Ld2d(z23.VnD(), z24.VnD(), p5.Zeroing(), bad_sve_memory);
|
||||
|
||||
__ St3b(z4.VnB(), z5.VnB(), z6.VnB(), p4, bad_sve_memory);
|
||||
__ St3h(z5.VnH(), z6.VnH(), z7.VnH(), p4, bad_sve_memory);
|
||||
__ St3w(z6.VnS(), z7.VnS(), z8.VnS(), p3, bad_sve_memory);
|
||||
__ St3d(z7.VnD(), z8.VnD(), z9.VnD(), p4, bad_sve_memory);
|
||||
__ Ld3b(z24.VnB(), z25.VnB(), z26.VnB(), p5.Zeroing(), bad_sve_memory);
|
||||
__ Ld3h(z25.VnH(), z26.VnH(), z27.VnH(), p6.Zeroing(), bad_sve_memory);
|
||||
__ Ld3w(z26.VnS(), z27.VnS(), z28.VnS(), p6.Zeroing(), bad_sve_memory);
|
||||
__ Ld3d(z27.VnD(), z28.VnD(), z29.VnD(), p5.Zeroing(), bad_sve_memory);
|
||||
|
||||
__ St4b(z31.VnB(), z0.VnB(), z1.VnB(), z2.VnB(), p4, bad_sve_memory);
|
||||
__ St4h(z0.VnH(), z1.VnH(), z2.VnH(), z3.VnH(), p4, bad_sve_memory);
|
||||
__ St4w(z1.VnS(), z2.VnS(), z3.VnS(), z4.VnS(), p3, bad_sve_memory);
|
||||
__ St4d(z2.VnD(), z3.VnD(), z4.VnD(), z5.VnD(), p4, bad_sve_memory);
|
||||
__ Ld4b(z25.VnB(),
|
||||
z26.VnB(),
|
||||
z27.VnB(),
|
||||
z28.VnB(),
|
||||
p5.Zeroing(),
|
||||
bad_sve_memory);
|
||||
__ Ld4h(z26.VnH(),
|
||||
z27.VnH(),
|
||||
z28.VnH(),
|
||||
z29.VnH(),
|
||||
p6.Zeroing(),
|
||||
bad_sve_memory);
|
||||
__ Ld4w(z27.VnS(),
|
||||
z28.VnS(),
|
||||
z29.VnS(),
|
||||
z30.VnS(),
|
||||
p6.Zeroing(),
|
||||
bad_sve_memory);
|
||||
__ Ld4d(z28.VnD(),
|
||||
z29.VnD(),
|
||||
z30.VnD(),
|
||||
z31.VnD(),
|
||||
p5.Zeroing(),
|
||||
bad_sve_memory);
|
||||
|
||||
END_IMPLICIT_CHECK();
|
||||
TRY_RUN_IMPLICIT_CHECK();
|
||||
}
|
||||
|
||||
TEST(ImplicitCheckAtomics) {
|
||||
SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kAtomics);
|
||||
START_IMPLICIT_CHECK();
|
||||
|
||||
EmissionCheckScope guard(&masm, masm.GetBuffer()->GetRemainingBytes());
|
||||
#define INST_LIST(OP) \
|
||||
__ Ld##OP##b(w0, w0, bad_memory); \
|
||||
__ Ld##OP##ab(w0, w1, bad_memory); \
|
||||
__ Ld##OP##lb(w0, w2, bad_memory); \
|
||||
__ Ld##OP##alb(w0, w3, bad_memory); \
|
||||
__ Ld##OP##h(w0, w0, bad_memory); \
|
||||
__ Ld##OP##ah(w0, w1, bad_memory); \
|
||||
__ Ld##OP##lh(w0, w2, bad_memory); \
|
||||
__ Ld##OP##alh(w0, w3, bad_memory); \
|
||||
__ Ld##OP(w0, w0, bad_memory); \
|
||||
__ Ld##OP##a(w0, w1, bad_memory); \
|
||||
__ Ld##OP##l(w0, w2, bad_memory); \
|
||||
__ Ld##OP##al(w0, w3, bad_memory); \
|
||||
__ Ld##OP(x0, x0, bad_memory); \
|
||||
__ Ld##OP##a(x0, x1, bad_memory); \
|
||||
__ Ld##OP##l(x0, x2, bad_memory); \
|
||||
__ Ld##OP##al(x0, x3, bad_memory); \
|
||||
__ St##OP##b(w0, bad_memory); \
|
||||
__ St##OP##lb(w0, bad_memory); \
|
||||
__ St##OP##h(w0, bad_memory); \
|
||||
__ St##OP##lh(w0, bad_memory); \
|
||||
__ St##OP(w0, bad_memory); \
|
||||
__ St##OP##l(w0, bad_memory); \
|
||||
__ St##OP(x0, bad_memory); \
|
||||
__ St##OP##l(x0, bad_memory);
|
||||
|
||||
INST_LIST(add);
|
||||
INST_LIST(set);
|
||||
INST_LIST(eor);
|
||||
INST_LIST(smin);
|
||||
INST_LIST(smax);
|
||||
INST_LIST(umin);
|
||||
INST_LIST(umax);
|
||||
INST_LIST(clr);
|
||||
|
||||
#undef INST_LIST
|
||||
|
||||
END_IMPLICIT_CHECK();
|
||||
TRY_RUN_IMPLICIT_CHECK();
|
||||
}
|
||||
|
||||
TEST(ImplicitCheckMops) {
|
||||
SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kMOPS);
|
||||
START_IMPLICIT_CHECK();
|
||||
|
||||
EmissionCheckScope guard(&masm, masm.GetBuffer()->GetRemainingBytes());
|
||||
__ Set(x15, ip1, ip0);
|
||||
__ Setn(x15, ip1, ip0);
|
||||
__ Setg(x15, ip1, ip0);
|
||||
__ Setgn(x15, ip1, ip0);
|
||||
|
||||
__ Cpy(x15, ip0, ip1);
|
||||
__ Cpyn(x15, ip0, ip1);
|
||||
__ Cpyrn(x15, ip0, ip1);
|
||||
__ Cpywn(x15, ip0, ip1);
|
||||
__ Cpyf(x15, ip0, ip1);
|
||||
__ Cpyfn(x15, ip0, ip1);
|
||||
__ Cpyfrn(x15, ip0, ip1);
|
||||
__ Cpyfwn(x15, ip0, ip1);
|
||||
|
||||
// The macro-assembler expands each instruction into prologue, main and
|
||||
// epilogue instructions where only the main instruction will fail. Increase
|
||||
// the counter to account for those additional instructions and the following
|
||||
// instructions.
|
||||
__ Mov(x0, 3);
|
||||
__ Mul(x1, x1, x0);
|
||||
__ Add(x1, x1, x0);
|
||||
|
||||
END_IMPLICIT_CHECK();
|
||||
TRY_RUN_IMPLICIT_CHECK();
|
||||
}
|
||||
#endif // VIXL_ENABLE_IMPLICIT_CHECKS
|
||||
|
||||
#undef __
|
||||
#define __ masm->
|
||||
|
||||
@@ -5140,6 +6025,7 @@ TEST(RunFrom) {
|
||||
3.0);
|
||||
VIXL_CHECK(res_double == 6.0);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -9117,5 +9117,130 @@ TEST_SVE(sve2_extract) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST_SVE(sve2_pmull128) {
|
||||
SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
|
||||
CPUFeatures::kSVE2,
|
||||
CPUFeatures::kNEON,
|
||||
CPUFeatures::kCRC32,
|
||||
CPUFeatures::kSVEPmull128);
|
||||
START();
|
||||
|
||||
SetInitialMachineState(&masm);
|
||||
// state = 0xe2bd2480
|
||||
|
||||
{
|
||||
ExactAssemblyScope scope(&masm, 40 * kInstructionSize);
|
||||
__ dci(0x45006800); // pmullb z0.q, z0.d, z0.d
|
||||
// vl128 state = 0x4107ca0c
|
||||
__ dci(0x45006a28); // pmullb z8.q, z17.d, z0.d
|
||||
// vl128 state = 0xa87d231a
|
||||
__ dci(0x45016a6c); // pmullb z12.q, z19.d, z1.d
|
||||
// vl128 state = 0xc547fcf6
|
||||
__ dci(0x45116e68); // pmullt z8.q, z19.d, z17.d
|
||||
// vl128 state = 0x6a01d521
|
||||
__ dci(0x45106a69); // pmullb z9.q, z19.d, z16.d
|
||||
// vl128 state = 0x64a7ba8a
|
||||
__ dci(0x45006a4d); // pmullb z13.q, z18.d, z0.d
|
||||
// vl128 state = 0xe59e3f8e
|
||||
__ dci(0x45086e5d); // pmullt z29.q, z18.d, z8.d
|
||||
// vl128 state = 0xbfbb9316
|
||||
__ dci(0x450a6e75); // pmullt z21.q, z19.d, z10.d
|
||||
// vl128 state = 0x29f6a4c7
|
||||
__ dci(0x45126e74); // pmullt z20.q, z19.d, z18.d
|
||||
// vl128 state = 0x4ced9406
|
||||
__ dci(0x45176e75); // pmullt z21.q, z19.d, z23.d
|
||||
// vl128 state = 0xd09e5676
|
||||
__ dci(0x45176e77); // pmullt z23.q, z19.d, z23.d
|
||||
// vl128 state = 0x568c0e25
|
||||
__ dci(0x45176e75); // pmullt z21.q, z19.d, z23.d
|
||||
// vl128 state = 0xb2f13c36
|
||||
__ dci(0x45176b71); // pmullb z17.q, z27.d, z23.d
|
||||
// vl128 state = 0x160bec4f
|
||||
__ dci(0x451f6b30); // pmullb z16.q, z25.d, z31.d
|
||||
// vl128 state = 0x2d7e7f49
|
||||
__ dci(0x451f6b20); // pmullb z0.q, z25.d, z31.d
|
||||
// vl128 state = 0x113d828b
|
||||
__ dci(0x451f6b90); // pmullb z16.q, z28.d, z31.d
|
||||
// vl128 state = 0xb8b3b3d9
|
||||
__ dci(0x451f6f12); // pmullt z18.q, z24.d, z31.d
|
||||
// vl128 state = 0x277aacb8
|
||||
__ dci(0x451f6f16); // pmullt z22.q, z24.d, z31.d
|
||||
// vl128 state = 0xef79c8da
|
||||
__ dci(0x450b6f17); // pmullt z23.q, z24.d, z11.d
|
||||
// vl128 state = 0x1dc19104
|
||||
__ dci(0x450a6e1f); // pmullt z31.q, z16.d, z10.d
|
||||
// vl128 state = 0x3ccb4ea8
|
||||
__ dci(0x451a6e2f); // pmullt z15.q, z17.d, z26.d
|
||||
// vl128 state = 0x14e13481
|
||||
__ dci(0x45126a3f); // pmullb z31.q, z17.d, z18.d
|
||||
// vl128 state = 0x4e6502f9
|
||||
__ dci(0x451a6b3e); // pmullb z30.q, z25.d, z26.d
|
||||
// vl128 state = 0xf6f18478
|
||||
__ dci(0x45126a3a); // pmullb z26.q, z17.d, z18.d
|
||||
// vl128 state = 0xdd4f14fb
|
||||
__ dci(0x45126afb); // pmullb z27.q, z23.d, z18.d
|
||||
// vl128 state = 0xcbf3bee2
|
||||
__ dci(0x45126aff); // pmullb z31.q, z23.d, z18.d
|
||||
// vl128 state = 0x627bec09
|
||||
__ dci(0x45126aef); // pmullb z15.q, z23.d, z18.d
|
||||
// vl128 state = 0xf5de1fa9
|
||||
__ dci(0x45106abf); // pmullb z31.q, z21.d, z16.d
|
||||
// vl128 state = 0x44bb6385
|
||||
__ dci(0x451a6abb); // pmullb z27.q, z21.d, z26.d
|
||||
// vl128 state = 0x5c5fa224
|
||||
__ dci(0x450a68b3); // pmullb z19.q, z5.d, z10.d
|
||||
// vl128 state = 0x28b6085c
|
||||
__ dci(0x450e69b2); // pmullb z18.q, z13.d, z14.d
|
||||
// vl128 state = 0x450898d6
|
||||
__ dci(0x450e69b6); // pmullb z22.q, z13.d, z14.d
|
||||
// vl128 state = 0x79d7911b
|
||||
__ dci(0x450e69b4); // pmullb z20.q, z13.d, z14.d
|
||||
// vl128 state = 0x98bf6939
|
||||
__ dci(0x450f6924); // pmullb z4.q, z9.d, z15.d
|
||||
// vl128 state = 0xb8a1bbc7
|
||||
__ dci(0x45176925); // pmullb z5.q, z9.d, z23.d
|
||||
// vl128 state = 0x631b41c8
|
||||
__ dci(0x451f69a4); // pmullb z4.q, z13.d, z31.d
|
||||
// vl128 state = 0x617fc272
|
||||
__ dci(0x451b69e0); // pmullb z0.q, z15.d, z27.d
|
||||
// vl128 state = 0x77780ac1
|
||||
__ dci(0x451b69e8); // pmullb z8.q, z15.d, z27.d
|
||||
// vl128 state = 0xce5ae18f
|
||||
__ dci(0x450f69e0); // pmullb z0.q, z15.d, z15.d
|
||||
// vl128 state = 0xa037371a
|
||||
__ dci(0x450b6be8); // pmullb z8.q, z31.d, z11.d
|
||||
// vl128 state = 0xb59be233
|
||||
}
|
||||
|
||||
uint32_t state;
|
||||
ComputeMachineStateHash(&masm, &state);
|
||||
__ Mov(x0, reinterpret_cast<uint64_t>(&state));
|
||||
__ Ldr(w0, MemOperand(x0));
|
||||
|
||||
END();
|
||||
if (CAN_RUN()) {
|
||||
RUN();
|
||||
uint32_t expected_hashes[] = {
|
||||
0xb59be233,
|
||||
0x32430624,
|
||||
0x5cc3ec66,
|
||||
0xecfdffe7,
|
||||
0x6d77a270,
|
||||
0xa0d604f2,
|
||||
0x2178aa11,
|
||||
0xabdcbeaa,
|
||||
0xab3b974f,
|
||||
0x11a874f5,
|
||||
0xf2eb6131,
|
||||
0x6d311c6c,
|
||||
0xd4e99b72,
|
||||
0x5177ce8e,
|
||||
0x32aa02f0,
|
||||
0x681ef977,
|
||||
};
|
||||
ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace aarch64
|
||||
} // namespace vixl
|
||||
|
||||
@@ -89,6 +89,34 @@ bool Equal64(uint64_t reference,
|
||||
}
|
||||
|
||||
|
||||
bool Equal64(std::vector<uint64_t> reference_list,
|
||||
const RegisterDump*,
|
||||
uint64_t result,
|
||||
ExpectedResult option) {
|
||||
switch (option) {
|
||||
case kExpectEqual:
|
||||
for (uint64_t reference : reference_list) {
|
||||
if (result == reference) return true;
|
||||
}
|
||||
printf("Expected a result in (\n");
|
||||
break;
|
||||
case kExpectNotEqual:
|
||||
for (uint64_t reference : reference_list) {
|
||||
if (result == reference) {
|
||||
printf("Expected a result not in (\n");
|
||||
break;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
for (uint64_t reference : reference_list) {
|
||||
printf(" 0x%016" PRIx64 ",\n", reference);
|
||||
}
|
||||
printf(")\t Found 0x%016" PRIx64 "\n", result);
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
bool Equal128(QRegisterValue expected,
|
||||
const RegisterDump*,
|
||||
QRegisterValue result) {
|
||||
@@ -200,6 +228,16 @@ bool Equal64(uint64_t reference,
|
||||
}
|
||||
|
||||
|
||||
bool Equal64(std::vector<uint64_t> reference_list,
|
||||
const RegisterDump* core,
|
||||
const Register& reg,
|
||||
ExpectedResult option) {
|
||||
VIXL_ASSERT(reg.Is64Bits());
|
||||
uint64_t result = core->xreg(reg.GetCode());
|
||||
return Equal64(reference_list, core, result, option);
|
||||
}
|
||||
|
||||
|
||||
bool NotEqual64(uint64_t reference,
|
||||
const RegisterDump* core,
|
||||
const Register& reg) {
|
||||
|
||||
@@ -345,6 +345,10 @@ bool Equal64(uint64_t reference,
|
||||
const RegisterDump*,
|
||||
uint64_t result,
|
||||
ExpectedResult option = kExpectEqual);
|
||||
bool Equal64(std::vector<uint64_t> reference_list,
|
||||
const RegisterDump*,
|
||||
uint64_t result,
|
||||
ExpectedResult option = kExpectEqual);
|
||||
bool Equal128(QRegisterValue expected,
|
||||
const RegisterDump*,
|
||||
QRegisterValue result);
|
||||
@@ -358,6 +362,10 @@ bool Equal64(uint64_t reference,
|
||||
const RegisterDump* core,
|
||||
const Register& reg,
|
||||
ExpectedResult option = kExpectEqual);
|
||||
bool Equal64(std::vector<uint64_t> reference_list,
|
||||
const RegisterDump* core,
|
||||
const Register& reg,
|
||||
ExpectedResult option = kExpectEqual);
|
||||
bool Equal64(uint64_t expected,
|
||||
const RegisterDump* core,
|
||||
const VRegister& vreg);
|
||||
|
||||
@@ -397,5 +397,27 @@ TEST(stl_forward_iterator) {
|
||||
#endif
|
||||
}
|
||||
|
||||
TEST(move) {
|
||||
TestSet set1;
|
||||
|
||||
set1.insert(Obj(-123, 456));
|
||||
set1.insert(Obj(2718, 2871828));
|
||||
|
||||
TestSet set2(std::move(set1));
|
||||
VIXL_CHECK(set1.empty());
|
||||
VIXL_CHECK(set2.size() == 2);
|
||||
VIXL_CHECK(set2.GetMinElement() == Obj(-123, 456));
|
||||
|
||||
// Test with more elements.
|
||||
for (unsigned i = 0; i < 4 * kNPreallocatedElements; i++) {
|
||||
set2.insert(Obj(i, -1));
|
||||
}
|
||||
|
||||
TestSet set3(std::move(set2));
|
||||
VIXL_CHECK(set2.empty());
|
||||
VIXL_CHECK(set3.size() == 2 + 4 * kNPreallocatedElements);
|
||||
VIXL_CHECK(set3.GetMinElement() == Obj(-123, 456));
|
||||
}
|
||||
|
||||
|
||||
} // namespace vixl
|
||||
|
||||
@@ -376,7 +376,7 @@ TEST(FuzzObjectDeletedWhenPlaced) {
|
||||
}
|
||||
|
||||
int32_t pc = 0;
|
||||
for (int i = 0; !objects.empty(); ++i) {
|
||||
while (!objects.empty()) {
|
||||
IF_VERBOSE(printf("PC = 0x%x (%d)\n", pc, pc));
|
||||
int32_t pc_increment = RandomPCIncrement();
|
||||
IF_VERBOSE(printf("Attempting to increment PC by %d\n", pc_increment));
|
||||
@@ -451,7 +451,7 @@ TEST(FuzzObjectUpdatedWhenPlaced) {
|
||||
}
|
||||
|
||||
int32_t pc = 0;
|
||||
for (int i = 0; !objects.empty(); ++i) {
|
||||
while (!objects.empty()) {
|
||||
IF_VERBOSE(printf("PC = 0x%x (%d)\n", pc, pc));
|
||||
|
||||
int32_t pc_increment = RandomPCIncrement();
|
||||
|
||||
@@ -14,11 +14,22 @@
|
||||
1660224011 82.79% 97.51% 95.50%
|
||||
1663161852 82.79% 97.51% 95.50%
|
||||
1666104118 82.79% 97.51% 95.50%
|
||||
1668785529 82.75% 97.44% 95.40%
|
||||
1669202345 82.79% 97.51% 95.51%
|
||||
1673432155 82.79% 97.51% 95.51%
|
||||
1677171445 82.78% 97.56% 94.81%
|
||||
1681814646 82.90% 97.57% 94.87%
|
||||
1686666000 82.90% 97.57% 94.87%
|
||||
1693487542 82.91% 97.57% 94.87%
|
||||
1694008240 82.72% 97.50% 94.95%
|
||||
1697036303 82.87% 97.56% 94.76%
|
||||
1698228274 82.93% 97.68% 94.90%
|
||||
1698330215 82.92% 97.57% 94.88%
|
||||
1702052331 82.89% 97.59% 94.77%
|
||||
1706691191 82.87% 97.59% 94.74%
|
||||
1707395574 82.89% 97.59% 94.77%
|
||||
1715261843 82.84% 97.60% 94.69%
|
||||
1718190785 82.85% 97.60% 94.70%
|
||||
1722595938 82.94% 97.78% 94.72%
|
||||
1728570468 82.94% 97.78% 94.71%
|
||||
1736874659 82.94% 97.63% 94.78%
|
||||
|
||||
+2
-2
@@ -91,7 +91,7 @@ def Lint(filename, progress_prefix = ''):
|
||||
printer.Print(outerr)
|
||||
|
||||
# Find the number of errors in this file.
|
||||
res = re.search('Total errors found: (\d+)', outerr)
|
||||
res = re.search(r'Total errors found: (\d+)', outerr)
|
||||
if res:
|
||||
n_errors_str = res.string[res.start(1):res.end(1)]
|
||||
n_errors = int(n_errors_str)
|
||||
@@ -192,7 +192,7 @@ def IsCppLintAvailable():
|
||||
return retcode == 0
|
||||
|
||||
|
||||
CPP_EXT_REGEXP = re.compile('\.(cc|h)$')
|
||||
CPP_EXT_REGEXP = re.compile(r'\.(cc|h)$')
|
||||
def IsLinterInput(filename):
|
||||
# lint all C++ files.
|
||||
return CPP_EXT_REGEXP.search(filename) != None
|
||||
|
||||
+2
-2
@@ -89,7 +89,7 @@ def GetCompilerDirectives(env):
|
||||
match.group(1): match.group(2)
|
||||
for match in [
|
||||
# Capture macro name.
|
||||
re.search('^#define (\S+?) (.+)$', macro)
|
||||
re.search(r'^#define (\S+?) (.+)$', macro)
|
||||
for macro in out.split('\n')
|
||||
]
|
||||
# Filter out non-matches.
|
||||
@@ -183,7 +183,7 @@ class CompilerInformation(object):
|
||||
# "{compiler}-{major}.{minor}". The comparison is done using the provided
|
||||
# `operator` argument.
|
||||
def CompareVersion(self, operator, description):
|
||||
match = re.search('^(\S+)-(.*?)$', description)
|
||||
match = re.search(r'^(\S+)-(.*?)$', description)
|
||||
if not match:
|
||||
raise Exception("A version number is required when comparing compilers")
|
||||
compiler, version = match.group(1), match.group(2)
|
||||
|
||||
Reference in New Issue
Block a user