Backed out 3 changesets (bug 1779807) for causing SM build failures. CLOSED TREE

Backed out changeset 68e92976dc0f (bug 1779807)
Backed out changeset f11ef6602f59 (bug 1779807)
Backed out changeset faa38e8360b1 (bug 1779807)
This commit is contained in:
smolnar 2022-07-26 00:44:58 +03:00
parent 64c6aae7ac
commit af5711925c
5 changed files with 80 additions and 547 deletions

View File

@ -10,7 +10,6 @@
#include "mozilla/DebugOnly.h"
#include "mozilla/MathAlgorithms.h"
#include "mozilla/Maybe.h"
#include "mozilla/SIMD.h"
#include "mozilla/TextUtils.h"
#include <algorithm>
@ -70,7 +69,6 @@ using mozilla::CheckedInt;
using mozilla::DebugOnly;
using mozilla::IsAsciiDigit;
using mozilla::Maybe;
using mozilla::SIMD;
using JS::AutoCheckCannotGC;
using JS::IsArrayAnswer;
@ -4075,19 +4073,6 @@ bool js::array_indexOf(JSContext* cx, unsigned argc, Value* vp) {
std::min(nobj->getDenseInitializedLength(), uint32_t(len));
const Value* elements = nobj->getDenseElements();
if (CanUseBitwiseCompareForStrictlyEqual(searchElement) && length > start) {
const uint64_t* elementsAsBits =
reinterpret_cast<const uint64_t*>(elements);
const uint64_t* res = SIMD::memchr64(
elementsAsBits + start, searchElement.asRawBits(), length - start);
if (res) {
args.rval().setInt32(static_cast<int32_t>(res - elementsAsBits));
} else {
args.rval().setInt32(-1);
}
return true;
}
auto iterator = [elements, start, length](JSContext* cx, auto cmp,
MutableHandleValue rval) {
static_assert(NativeObject::MAX_DENSE_ELEMENTS_COUNT <= INT32_MAX,
@ -4322,19 +4307,6 @@ bool js::array_includes(JSContext* cx, unsigned argc, Value* vp) {
return true;
}
// For |includes| we need to treat hole values as |undefined| so we use a
// different path if searching for |undefined|.
if (CanUseBitwiseCompareForStrictlyEqual(searchElement) &&
!searchElement.isUndefined() && length > start) {
if (SIMD::memchr64(reinterpret_cast<const uint64_t*>(elements) + start,
searchElement.asRawBits(), length - start)) {
args.rval().setBoolean(true);
} else {
args.rval().setBoolean(false);
}
return true;
}
auto iterator = [elements, start, length](JSContext* cx, auto cmp,
MutableHandleValue rval) {
for (uint32_t i = start; i < length; i++) {

View File

@ -9,24 +9,10 @@
#include <stdint.h>
#include <type_traits>
#include "mozilla/EndianUtils.h"
#include "mozilla/SSE.h"
namespace mozilla {
template <typename TValue>
const TValue* FindInBufferNaive(const TValue* ptr, TValue value,
size_t length) {
const TValue* end = ptr + length;
while (ptr < end) {
if (*ptr == value) {
return ptr;
}
ptr++;
}
return nullptr;
}
#ifdef MOZILLA_PRESUME_SSE2
# include <immintrin.h>
@ -35,10 +21,6 @@ const __m128i* Cast128(uintptr_t ptr) {
return reinterpret_cast<const __m128i*>(ptr);
}
const __m256i* Cast256(uintptr_t ptr) {
return reinterpret_cast<const __m256i*>(ptr);
}
template <typename T>
T GetAs(uintptr_t ptr) {
return *reinterpret_cast<const T*>(ptr);
@ -50,33 +32,15 @@ uintptr_t AlignDown16(uintptr_t ptr) { return ptr & ~0xf; }
uintptr_t AlignUp16(uintptr_t ptr) { return AlignDown16(ptr + 0xf); }
uintptr_t AlignDown32(uintptr_t ptr) { return ptr & ~0x1f; }
uintptr_t AlignUp32(uintptr_t ptr) { return AlignDown32(ptr + 0x1f); }
template <typename TValue>
template <typename CharType>
__m128i CmpEq128(__m128i a, __m128i b) {
static_assert(sizeof(TValue) == 1 || sizeof(TValue) == 2);
if (sizeof(TValue) == 1) {
static_assert(sizeof(CharType) == 1 || sizeof(CharType) == 2);
if (sizeof(CharType) == 1) {
return _mm_cmpeq_epi8(a, b);
}
return _mm_cmpeq_epi16(a, b);
}
template <typename TValue>
__m256i CmpEq256(__m256i a, __m256i b) {
static_assert(sizeof(TValue) == 1 || sizeof(TValue) == 2 ||
sizeof(TValue) == 8);
if (sizeof(TValue) == 1) {
return _mm256_cmpeq_epi8(a, b);
}
if (sizeof(TValue) == 2) {
return _mm256_cmpeq_epi16(a, b);
}
return _mm256_cmpeq_epi64(a, b);
}
# ifdef __GNUC__
// Earlier versions of GCC are missing the _mm_loadu_si32 instruction. This
@ -134,55 +98,17 @@ const char* Check4x4Chars(__m128i needle, uintptr_t a, uintptr_t b, uintptr_t c,
return nullptr;
}
template <typename TValue>
const TValue* Check4x8Bytes(__m128i needle, uintptr_t a, uintptr_t b,
uintptr_t c, uintptr_t d) {
__m128i haystackA = _mm_loadu_si64(Cast128(a));
__m128i cmpA = CmpEq128<TValue>(needle, haystackA);
__m128i haystackB = _mm_loadu_si64(Cast128(b));
__m128i cmpB = CmpEq128<TValue>(needle, haystackB);
__m128i haystackC = _mm_loadu_si64(Cast128(c));
__m128i cmpC = CmpEq128<TValue>(needle, haystackC);
__m128i haystackD = _mm_loadu_si64(Cast128(d));
__m128i cmpD = CmpEq128<TValue>(needle, haystackD);
__m128i or_ab = _mm_or_si128(cmpA, cmpB);
__m128i or_cd = _mm_or_si128(cmpC, cmpD);
__m128i or_abcd = _mm_or_si128(or_ab, or_cd);
int orMask = _mm_movemask_epi8(or_abcd);
if (orMask & 0xff) {
int cmpMask;
cmpMask = _mm_movemask_epi8(cmpA);
if (cmpMask & 0xff) {
return reinterpret_cast<const TValue*>(a + __builtin_ctz(cmpMask));
}
cmpMask = _mm_movemask_epi8(cmpB);
if (cmpMask & 0xff) {
return reinterpret_cast<const TValue*>(b + __builtin_ctz(cmpMask));
}
cmpMask = _mm_movemask_epi8(cmpC);
if (cmpMask & 0xff) {
return reinterpret_cast<const TValue*>(c + __builtin_ctz(cmpMask));
}
cmpMask = _mm_movemask_epi8(cmpD);
if (cmpMask & 0xff) {
return reinterpret_cast<const TValue*>(d + __builtin_ctz(cmpMask));
}
}
return nullptr;
}
template <typename TValue>
const TValue* Check4x16Bytes(__m128i needle, uintptr_t a, uintptr_t b,
uintptr_t c, uintptr_t d) {
template <typename CharType>
const CharType* Check4x16Bytes(__m128i needle, uintptr_t a, uintptr_t b,
uintptr_t c, uintptr_t d) {
__m128i haystackA = _mm_loadu_si128(Cast128(a));
__m128i cmpA = CmpEq128<TValue>(needle, haystackA);
__m128i cmpA = CmpEq128<CharType>(needle, haystackA);
__m128i haystackB = _mm_loadu_si128(Cast128(b));
__m128i cmpB = CmpEq128<TValue>(needle, haystackB);
__m128i cmpB = CmpEq128<CharType>(needle, haystackB);
__m128i haystackC = _mm_loadu_si128(Cast128(c));
__m128i cmpC = CmpEq128<TValue>(needle, haystackC);
__m128i cmpC = CmpEq128<CharType>(needle, haystackC);
__m128i haystackD = _mm_loadu_si128(Cast128(d));
__m128i cmpD = CmpEq128<TValue>(needle, haystackD);
__m128i cmpD = CmpEq128<CharType>(needle, haystackD);
__m128i or_ab = _mm_or_si128(cmpA, cmpB);
__m128i or_cd = _mm_or_si128(cmpC, cmpD);
__m128i or_abcd = _mm_or_si128(or_ab, or_cd);
@ -191,57 +117,19 @@ const TValue* Check4x16Bytes(__m128i needle, uintptr_t a, uintptr_t b,
int cmpMask;
cmpMask = _mm_movemask_epi8(cmpA);
if (cmpMask) {
return reinterpret_cast<const TValue*>(a + __builtin_ctz(cmpMask));
return reinterpret_cast<const CharType*>(a + __builtin_ctz(cmpMask));
}
cmpMask = _mm_movemask_epi8(cmpB);
if (cmpMask) {
return reinterpret_cast<const TValue*>(b + __builtin_ctz(cmpMask));
return reinterpret_cast<const CharType*>(b + __builtin_ctz(cmpMask));
}
cmpMask = _mm_movemask_epi8(cmpC);
if (cmpMask) {
return reinterpret_cast<const TValue*>(c + __builtin_ctz(cmpMask));
return reinterpret_cast<const CharType*>(c + __builtin_ctz(cmpMask));
}
cmpMask = _mm_movemask_epi8(cmpD);
if (cmpMask) {
return reinterpret_cast<const TValue*>(d + __builtin_ctz(cmpMask));
}
}
return nullptr;
}
template <typename TValue>
const TValue* Check4x32Bytes(__m256i needle, uintptr_t a, uintptr_t b,
uintptr_t c, uintptr_t d) {
__m256i haystackA = _mm256_loadu_si256(Cast256(a));
__m256i cmpA = CmpEq256<TValue>(needle, haystackA);
__m256i haystackB = _mm256_loadu_si256(Cast256(b));
__m256i cmpB = CmpEq256<TValue>(needle, haystackB);
__m256i haystackC = _mm256_loadu_si256(Cast256(c));
__m256i cmpC = CmpEq256<TValue>(needle, haystackC);
__m256i haystackD = _mm256_loadu_si256(Cast256(d));
__m256i cmpD = CmpEq256<TValue>(needle, haystackD);
__m256i or_ab = _mm256_or_si256(cmpA, cmpB);
__m256i or_cd = _mm256_or_si256(cmpC, cmpD);
__m256i or_abcd = _mm256_or_si256(or_ab, or_cd);
int orMask = _mm256_movemask_epi8(or_abcd);
if (orMask) {
int cmpMask;
cmpMask = _mm256_movemask_epi8(cmpA);
if (cmpMask) {
return reinterpret_cast<const TValue*>(a + __builtin_ctz(cmpMask));
}
cmpMask = _mm256_movemask_epi8(cmpB);
if (cmpMask) {
return reinterpret_cast<const TValue*>(b + __builtin_ctz(cmpMask));
}
cmpMask = _mm256_movemask_epi8(cmpC);
if (cmpMask) {
return reinterpret_cast<const TValue*>(c + __builtin_ctz(cmpMask));
}
cmpMask = _mm256_movemask_epi8(cmpD);
if (cmpMask) {
return reinterpret_cast<const TValue*>(d + __builtin_ctz(cmpMask));
return reinterpret_cast<const CharType*>(d + __builtin_ctz(cmpMask));
}
}
@ -261,15 +149,15 @@ enum class HaystackOverlap {
// the next a's 16-byte chunk is needle2. `overlap` and whether
// `carryIn`/`carryOut` are NULL should be knowable at compile time to avoid
// branching.
template <typename TValue>
const TValue* Check2x2x16Bytes(__m128i needle1, __m128i needle2, uintptr_t a,
uintptr_t b, __m128i* carryIn, __m128i* carryOut,
HaystackOverlap overlap) {
const int shiftRightAmount = 16 - sizeof(TValue);
const int shiftLeftAmount = sizeof(TValue);
template <typename CharType>
const CharType* Check2x2x16Bytes(__m128i needle1, __m128i needle2, uintptr_t a,
uintptr_t b, __m128i* carryIn,
__m128i* carryOut, HaystackOverlap overlap) {
const int shiftRightAmount = 16 - sizeof(CharType);
const int shiftLeftAmount = sizeof(CharType);
__m128i haystackA = _mm_loadu_si128(Cast128(a));
__m128i cmpA1 = CmpEq128<TValue>(needle1, haystackA);
__m128i cmpA2 = CmpEq128<TValue>(needle2, haystackA);
__m128i cmpA1 = CmpEq128<CharType>(needle1, haystackA);
__m128i cmpA2 = CmpEq128<CharType>(needle2, haystackA);
__m128i cmpA;
if (carryIn) {
cmpA = _mm_and_si128(
@ -278,8 +166,8 @@ const TValue* Check2x2x16Bytes(__m128i needle1, __m128i needle2, uintptr_t a,
cmpA = _mm_and_si128(_mm_bslli_si128(cmpA1, shiftLeftAmount), cmpA2);
}
__m128i haystackB = _mm_loadu_si128(Cast128(b));
__m128i cmpB1 = CmpEq128<TValue>(needle1, haystackB);
__m128i cmpB2 = CmpEq128<TValue>(needle2, haystackB);
__m128i cmpB1 = CmpEq128<CharType>(needle1, haystackB);
__m128i cmpB2 = CmpEq128<CharType>(needle2, haystackB);
__m128i cmpB;
if (overlap == HaystackOverlap::Overlapping) {
cmpB = _mm_and_si128(_mm_bslli_si128(cmpB1, shiftLeftAmount), cmpB2);
@ -295,13 +183,13 @@ const TValue* Check2x2x16Bytes(__m128i needle1, __m128i needle2, uintptr_t a,
int cmpMask;
cmpMask = _mm_movemask_epi8(cmpA);
if (cmpMask) {
return reinterpret_cast<const TValue*>(a + __builtin_ctz(cmpMask) -
shiftLeftAmount);
return reinterpret_cast<const CharType*>(a + __builtin_ctz(cmpMask) -
shiftLeftAmount);
}
cmpMask = _mm_movemask_epi8(cmpB);
if (cmpMask) {
return reinterpret_cast<const TValue*>(b + __builtin_ctz(cmpMask) -
shiftLeftAmount);
return reinterpret_cast<const CharType*>(b + __builtin_ctz(cmpMask) -
shiftLeftAmount);
}
}
@ -312,12 +200,13 @@ const TValue* Check2x2x16Bytes(__m128i needle1, __m128i needle2, uintptr_t a,
return nullptr;
}
template <typename TValue>
const TValue* FindInBuffer(const TValue* ptr, TValue value, size_t length) {
static_assert(sizeof(TValue) == 1 || sizeof(TValue) == 2);
static_assert(std::is_unsigned<TValue>::value);
template <typename CharType>
const CharType* FindInBuffer(const CharType* ptr, CharType value,
size_t length) {
static_assert(sizeof(CharType) == 1 || sizeof(CharType) == 2);
static_assert(std::is_unsigned<CharType>::value);
uint64_t splat64;
if (sizeof(TValue) == 1) {
if (sizeof(CharType) == 1) {
splat64 = 0x0101010101010101llu;
} else {
splat64 = 0x0001000100010001llu;
@ -328,16 +217,16 @@ const TValue* FindInBuffer(const TValue* ptr, TValue value, size_t length) {
int64_t i64_value = *reinterpret_cast<int64_t*>(&u64_value);
__m128i needle = _mm_set_epi64x(i64_value, i64_value);
size_t numBytes = length * sizeof(TValue);
size_t numBytes = length * sizeof(CharType);
uintptr_t cur = reinterpret_cast<uintptr_t>(ptr);
uintptr_t end = cur + numBytes;
if ((sizeof(TValue) > 1 && numBytes < 16) || numBytes < 4) {
if ((sizeof(CharType) > 1 && numBytes < 16) || numBytes < 4) {
while (cur < end) {
if (GetAs<TValue>(cur) == value) {
return reinterpret_cast<const TValue*>(cur);
if (GetAs<CharType>(cur) == value) {
return reinterpret_cast<const CharType*>(cur);
}
cur += sizeof(TValue);
cur += sizeof(CharType);
}
return nullptr;
}
@ -357,9 +246,9 @@ const TValue* FindInBuffer(const TValue* ptr, TValue value, size_t length) {
uintptr_t c = end - 4 - ((numBytes & 8) >> 1);
uintptr_t d = end - 4;
const char* charResult = Check4x4Chars(needle, a, b, c, d);
// Note: we ensure above that sizeof(TValue) == 1 here, so this is
// Note: we ensure above that sizeof(CharType) == 1 here, so this is
// either char to char or char to something like a uint8_t.
return reinterpret_cast<const TValue*>(charResult);
return reinterpret_cast<const CharType*>(charResult);
}
if (numBytes < 64) {
@ -369,17 +258,17 @@ const TValue* FindInBuffer(const TValue* ptr, TValue value, size_t length) {
uintptr_t b = cur + ((numBytes & 32) >> 1);
uintptr_t c = end - 16 - ((numBytes & 32) >> 1);
uintptr_t d = end - 16;
return Check4x16Bytes<TValue>(needle, a, b, c, d);
return Check4x16Bytes<CharType>(needle, a, b, c, d);
}
// Get the initial unaligned load out of the way. This will overlap with the
// aligned stuff below, but the overlapped part should effectively be free
// (relative to a mispredict from doing a byte-by-byte loop).
__m128i haystack = _mm_loadu_si128(Cast128(cur));
__m128i cmp = CmpEq128<TValue>(needle, haystack);
__m128i cmp = CmpEq128<CharType>(needle, haystack);
int cmpMask = _mm_movemask_epi8(cmp);
if (cmpMask) {
return reinterpret_cast<const TValue*>(cur + __builtin_ctz(cmpMask));
return reinterpret_cast<const CharType*>(cur + __builtin_ctz(cmpMask));
}
// Now we're working with aligned memory. Hooray! \o/
@ -396,7 +285,7 @@ const TValue* FindInBuffer(const TValue* ptr, TValue value, size_t length) {
uintptr_t b = cur + 16;
uintptr_t c = cur + 32;
uintptr_t d = cur + 48;
const TValue* result = Check4x16Bytes<TValue>(needle, a, b, c, d);
const CharType* result = Check4x16Bytes<CharType>(needle, a, b, c, d);
if (result) {
return result;
}
@ -407,102 +296,12 @@ const TValue* FindInBuffer(const TValue* ptr, TValue value, size_t length) {
uintptr_t b = tailStartPtr + 16;
uintptr_t c = tailStartPtr + 32;
uintptr_t d = tailEndPtr;
return Check4x16Bytes<TValue>(needle, a, b, c, d);
return Check4x16Bytes<CharType>(needle, a, b, c, d);
}
template <typename TValue>
const TValue* FindInBufferAVX2(const TValue* ptr, TValue value, size_t length) {
static_assert(sizeof(TValue) == 1 || sizeof(TValue) == 2 ||
sizeof(TValue) == 8);
static_assert(std::is_unsigned<TValue>::value);
// Load our needle into a 32-byte register
__m256i needle;
if (sizeof(TValue) == 1) {
needle = _mm256_set1_epi8(value);
} else if (sizeof(TValue) == 2) {
needle = _mm256_set1_epi16(value);
} else {
needle = _mm256_set1_epi64x(value);
}
size_t numBytes = length * sizeof(TValue);
uintptr_t cur = reinterpret_cast<uintptr_t>(ptr);
uintptr_t end = cur + numBytes;
if (numBytes < 8 || (sizeof(TValue) == 8 && numBytes < 32)) {
while (cur < end) {
if (GetAs<TValue>(cur) == value) {
return reinterpret_cast<const TValue*>(cur);
}
cur += sizeof(TValue);
}
return nullptr;
}
if constexpr (sizeof(TValue) != 8) {
if (numBytes < 32) {
__m128i needle_narrow;
if (sizeof(TValue) == 1) {
needle_narrow = _mm_set1_epi8(value);
} else {
needle_narrow = _mm_set1_epi16(value);
}
uintptr_t a = cur;
uintptr_t b = cur + ((numBytes & 16) >> 1);
uintptr_t c = end - 8 - ((numBytes & 16) >> 1);
uintptr_t d = end - 8;
return Check4x8Bytes<TValue>(needle_narrow, a, b, c, d);
}
}
if (numBytes < 128) {
// NOTE: see the above explanation of the similar chunk of code, but in
// this case, replace 16 with 64 and 8 with 32.
uintptr_t a = cur;
uintptr_t b = cur + ((numBytes & 64) >> 1);
uintptr_t c = end - 32 - ((numBytes & 64) >> 1);
uintptr_t d = end - 32;
return Check4x32Bytes<TValue>(needle, a, b, c, d);
}
// Get the initial unaligned load out of the way. This will overlap with the
// aligned stuff below, but the overlapped part should effectively be free
// (relative to a mispredict from doing a byte-by-byte loop).
__m256i haystack = _mm256_loadu_si256(Cast256(cur));
__m256i cmp = CmpEq256<TValue>(needle, haystack);
int cmpMask = _mm256_movemask_epi8(cmp);
if (cmpMask) {
return reinterpret_cast<const TValue*>(cur + __builtin_ctz(cmpMask));
}
// Now we're working with aligned memory. Hooray! \o/
cur = AlignUp32(cur);
uintptr_t tailStartPtr = AlignDown32(end - 96);
uintptr_t tailEndPtr = end - 32;
while (cur < tailStartPtr) {
uintptr_t a = cur;
uintptr_t b = cur + 32;
uintptr_t c = cur + 64;
uintptr_t d = cur + 96;
const TValue* result = Check4x32Bytes<TValue>(needle, a, b, c, d);
if (result) {
return result;
}
cur += 128;
}
uintptr_t a = tailStartPtr;
uintptr_t b = tailStartPtr + 32;
uintptr_t c = tailStartPtr + 64;
uintptr_t d = tailEndPtr;
return Check4x32Bytes<TValue>(needle, a, b, c, d);
}
template <typename TValue>
const TValue* TwoByteLoop(uintptr_t start, uintptr_t end, TValue v1, TValue v2);
template <typename CharType>
const CharType* TwoByteLoop(uintptr_t start, uintptr_t end, CharType v1,
CharType v2);
template <>
const unsigned char* TwoByteLoop<unsigned char>(uintptr_t start, uintptr_t end,
@ -541,13 +340,13 @@ const char16_t* TwoByteLoop<char16_t>(uintptr_t start, uintptr_t end,
return nullptr;
}
template <typename TValue>
const TValue* FindTwoInBuffer(const TValue* ptr, TValue v1, TValue v2,
size_t length) {
static_assert(sizeof(TValue) == 1 || sizeof(TValue) == 2);
static_assert(std::is_unsigned<TValue>::value);
template <typename CharType>
const CharType* FindTwoInBuffer(const CharType* ptr, CharType v1, CharType v2,
size_t length) {
static_assert(sizeof(CharType) == 1 || sizeof(CharType) == 2);
static_assert(std::is_unsigned<CharType>::value);
uint64_t splat64;
if (sizeof(TValue) == 1) {
if (sizeof(CharType) == 1) {
splat64 = 0x0101010101010101llu;
} else {
splat64 = 0x0001000100010001llu;
@ -561,33 +360,33 @@ const TValue* FindTwoInBuffer(const TValue* ptr, TValue v1, TValue v2,
int64_t i64_v2 = *reinterpret_cast<int64_t*>(&u64_v2);
__m128i needle2 = _mm_set_epi64x(i64_v2, i64_v2);
size_t numBytes = length * sizeof(TValue);
size_t numBytes = length * sizeof(CharType);
uintptr_t cur = reinterpret_cast<uintptr_t>(ptr);
uintptr_t end = cur + numBytes;
if (numBytes < 16) {
return TwoByteLoop<TValue>(cur, end, v1, v2);
return TwoByteLoop<CharType>(cur, end, v1, v2);
}
if (numBytes < 32) {
uintptr_t a = cur;
uintptr_t b = end - 16;
return Check2x2x16Bytes<TValue>(needle1, needle2, a, b, nullptr, nullptr,
HaystackOverlap::Overlapping);
return Check2x2x16Bytes<CharType>(needle1, needle2, a, b, nullptr, nullptr,
HaystackOverlap::Overlapping);
}
// Get the initial unaligned load out of the way. This will likely overlap
// with the aligned stuff below, but the overlapped part should effectively
// be free.
__m128i haystack = _mm_loadu_si128(Cast128(cur));
__m128i cmp1 = CmpEq128<TValue>(needle1, haystack);
__m128i cmp2 = CmpEq128<TValue>(needle2, haystack);
__m128i cmp1 = CmpEq128<CharType>(needle1, haystack);
__m128i cmp2 = CmpEq128<CharType>(needle2, haystack);
int cmpMask1 = _mm_movemask_epi8(cmp1);
int cmpMask2 = _mm_movemask_epi8(cmp2);
int cmpMask = (cmpMask1 << sizeof(TValue)) & cmpMask2;
int cmpMask = (cmpMask1 << sizeof(CharType)) & cmpMask2;
if (cmpMask) {
return reinterpret_cast<const TValue*>(cur + __builtin_ctz(cmpMask) -
sizeof(TValue));
return reinterpret_cast<const CharType*>(cur + __builtin_ctz(cmpMask) -
sizeof(CharType));
}
// Now we're working with aligned memory. Hooray! \o/
@ -603,9 +402,9 @@ const TValue* FindTwoInBuffer(const TValue* ptr, TValue v1, TValue v2,
while (cur < tailStartPtr) {
uintptr_t a = cur;
uintptr_t b = cur + 16;
const TValue* result =
Check2x2x16Bytes<TValue>(needle1, needle2, a, b, &cmpMaskCarry,
&cmpMaskCarry, HaystackOverlap::Sequential);
const CharType* result =
Check2x2x16Bytes<CharType>(needle1, needle2, a, b, &cmpMaskCarry,
&cmpMaskCarry, HaystackOverlap::Sequential);
if (result) {
return result;
}
@ -617,11 +416,11 @@ const TValue* FindTwoInBuffer(const TValue* ptr, TValue v1, TValue v2,
cmpMaskCarry = _mm_and_si128(cmpMaskCarry, wideCarry);
uintptr_t a = tailStartPtr;
uintptr_t b = tailEndPtr;
return Check2x2x16Bytes<TValue>(needle1, needle2, a, b, &cmpMaskCarry,
nullptr, HaystackOverlap::Overlapping);
return Check2x2x16Bytes<CharType>(needle1, needle2, a, b, &cmpMaskCarry,
nullptr, HaystackOverlap::Overlapping);
}
const char* SIMD::memchr8SSE2(const char* ptr, char value, size_t length) {
const char* SIMD::memchr8(const char* ptr, char value, size_t length) {
// Signed chars are just really annoying to do bit logic with. Convert to
// unsigned at the outermost scope so we don't have to worry about it.
const unsigned char* uptr = reinterpret_cast<const unsigned char*>(ptr);
@ -631,36 +430,9 @@ const char* SIMD::memchr8SSE2(const char* ptr, char value, size_t length) {
return reinterpret_cast<const char*>(uresult);
}
const char* SIMD::memchr8(const char* ptr, char value, size_t length) {
if (supports_avx2()) {
const unsigned char* uptr = reinterpret_cast<const unsigned char*>(ptr);
unsigned char uvalue = static_cast<unsigned char>(value);
const unsigned char* uresult =
FindInBufferAVX2<unsigned char>(uptr, uvalue, length);
return reinterpret_cast<const char*>(uresult);
}
return memchr8SSE2(ptr, value, length);
}
const char16_t* SIMD::memchr16SSE2(const char16_t* ptr, char16_t value,
size_t length) {
return FindInBuffer<char16_t>(ptr, value, length);
}
const char16_t* SIMD::memchr16(const char16_t* ptr, char16_t value,
size_t length) {
if (supports_avx2()) {
return FindInBufferAVX2<char16_t>(ptr, value, length);
}
return memchr16SSE2(ptr, value, length);
}
const uint64_t* SIMD::memchr64(const uint64_t* ptr, uint64_t value,
size_t length) {
if (supports_avx2()) {
return FindInBufferAVX2<uint64_t>(ptr, value, length);
}
return FindInBufferNaive<uint64_t>(ptr, value, length);
return FindInBuffer<char16_t>(ptr, value, length);
}
const char* SIMD::memchr2x8(const char* ptr, char v1, char v2, size_t length) {
@ -689,23 +461,16 @@ const char* SIMD::memchr8(const char* ptr, char value, size_t length) {
return reinterpret_cast<const char*>(result);
}
const char* SIMD::memchr8SSE2(const char* ptr, char value, size_t length) {
return memchr8(ptr, value, length);
}
const char16_t* SIMD::memchr16(const char16_t* ptr, char16_t value,
size_t length) {
return FindInBufferNaive<char16_t>(ptr, value, length);
}
const char16_t* SIMD::memchr16SSE2(const char16_t* ptr, char16_t value,
size_t length) {
return memchr16(ptr, value, length);
}
const uint64_t* SIMD::memchr64(const uint64_t* ptr, uint64_t value,
size_t length) {
return FindInBufferNaive<uint64_t>(ptr, value, length);
const char16_t* end = ptr + length;
while (ptr < end) {
if (*ptr == value) {
return ptr;
}
ptr++;
}
return nullptr;
}
const char* SIMD::memchr2x8(const char* ptr, char v1, char v2, size_t length) {

View File

@ -33,24 +33,11 @@ class SIMD {
static MFBT_API const char* memchr8(const char* ptr, char value,
size_t length);
// This function just restricts our execution to the SSE2 path
static MFBT_API const char* memchr8SSE2(const char* ptr, char value,
size_t length);
// Search through `ptr[0..length]` for the first occurrence of `value` and
// return the pointer to it, or nullptr if it cannot be found.
static MFBT_API const char16_t* memchr16(const char16_t* ptr, char16_t value,
size_t length);
// This function just restricts our execution to the SSE2 path
static MFBT_API const char16_t* memchr16SSE2(const char16_t* ptr,
char16_t value, size_t length);
// Search through `ptr[0..length]` for the first occurrence of `value` and
// return the pointer to it, or nullptr if it cannot be found.
static MFBT_API const uint64_t* memchr64(const uint64_t* ptr, uint64_t value,
size_t length);
// Search through `ptr[0..length]` for the first occurrence of `v1` which is
// immediately followed by `v2` and return the pointer to the occurrence of
// `v1`.

View File

@ -177,17 +177,13 @@ UNIFIED_SOURCES += [
"Poison.cpp",
"RandomNum.cpp",
"SHA1.cpp",
"SIMD.cpp",
"TaggedAnonymousMemory.cpp",
"UniquePtrExtensions.cpp",
"Unused.cpp",
"Utf8.cpp",
]
SOURCES += [
"SIMD.cpp",
]
SOURCES["SIMD.cpp"].flags += ["-mavx2"]
if CONFIG["CPU_ARCH"].startswith("x86"):
SOURCES += [
"SSE.cpp",

View File

@ -13,79 +13,47 @@ void TestTinyString() {
const char* test = "012\n";
MOZ_RELEASE_ASSERT(SIMD::memchr8(test, '0', 3) == test + 0x0);
MOZ_RELEASE_ASSERT(SIMD::memchr8SSE2(test, '0', 3) == test + 0x0);
MOZ_RELEASE_ASSERT(SIMD::memchr8(test, '1', 3) == test + 0x1);
MOZ_RELEASE_ASSERT(SIMD::memchr8SSE2(test, '1', 3) == test + 0x1);
MOZ_RELEASE_ASSERT(SIMD::memchr8(test, '2', 3) == test + 0x2);
MOZ_RELEASE_ASSERT(SIMD::memchr8SSE2(test, '2', 3) == test + 0x2);
MOZ_RELEASE_ASSERT(SIMD::memchr8(test, '\n', 3) == nullptr);
MOZ_RELEASE_ASSERT(SIMD::memchr8SSE2(test, '\n', 3) == nullptr);
}
void TestShortString() {
const char* test = "0123456789\n";
MOZ_RELEASE_ASSERT(SIMD::memchr8(test, '0', 10) == test + 0x0);
MOZ_RELEASE_ASSERT(SIMD::memchr8SSE2(test, '0', 10) == test + 0x0);
MOZ_RELEASE_ASSERT(SIMD::memchr8(test, '1', 10) == test + 0x1);
MOZ_RELEASE_ASSERT(SIMD::memchr8SSE2(test, '1', 10) == test + 0x1);
MOZ_RELEASE_ASSERT(SIMD::memchr8(test, '2', 10) == test + 0x2);
MOZ_RELEASE_ASSERT(SIMD::memchr8SSE2(test, '2', 10) == test + 0x2);
MOZ_RELEASE_ASSERT(SIMD::memchr8(test, '3', 10) == test + 0x3);
MOZ_RELEASE_ASSERT(SIMD::memchr8SSE2(test, '3', 10) == test + 0x3);
MOZ_RELEASE_ASSERT(SIMD::memchr8(test, '4', 10) == test + 0x4);
MOZ_RELEASE_ASSERT(SIMD::memchr8SSE2(test, '4', 10) == test + 0x4);
MOZ_RELEASE_ASSERT(SIMD::memchr8(test, '5', 10) == test + 0x5);
MOZ_RELEASE_ASSERT(SIMD::memchr8SSE2(test, '5', 10) == test + 0x5);
MOZ_RELEASE_ASSERT(SIMD::memchr8(test, '6', 10) == test + 0x6);
MOZ_RELEASE_ASSERT(SIMD::memchr8SSE2(test, '6', 10) == test + 0x6);
MOZ_RELEASE_ASSERT(SIMD::memchr8(test, '7', 10) == test + 0x7);
MOZ_RELEASE_ASSERT(SIMD::memchr8SSE2(test, '7', 10) == test + 0x7);
MOZ_RELEASE_ASSERT(SIMD::memchr8(test, '8', 10) == test + 0x8);
MOZ_RELEASE_ASSERT(SIMD::memchr8SSE2(test, '8', 10) == test + 0x8);
MOZ_RELEASE_ASSERT(SIMD::memchr8(test, '9', 10) == test + 0x9);
MOZ_RELEASE_ASSERT(SIMD::memchr8SSE2(test, '9', 10) == test + 0x9);
MOZ_RELEASE_ASSERT(SIMD::memchr8(test, '\n', 10) == nullptr);
MOZ_RELEASE_ASSERT(SIMD::memchr8SSE2(test, '\n', 10) == nullptr);
}
void TestMediumString() {
const char* test = "0123456789abcdef\n";
MOZ_RELEASE_ASSERT(SIMD::memchr8(test, '0', 16) == test + 0x0);
MOZ_RELEASE_ASSERT(SIMD::memchr8SSE2(test, '0', 16) == test + 0x0);
MOZ_RELEASE_ASSERT(SIMD::memchr8(test, '1', 16) == test + 0x1);
MOZ_RELEASE_ASSERT(SIMD::memchr8SSE2(test, '1', 16) == test + 0x1);
MOZ_RELEASE_ASSERT(SIMD::memchr8(test, '2', 16) == test + 0x2);
MOZ_RELEASE_ASSERT(SIMD::memchr8SSE2(test, '2', 16) == test + 0x2);
MOZ_RELEASE_ASSERT(SIMD::memchr8(test, '3', 16) == test + 0x3);
MOZ_RELEASE_ASSERT(SIMD::memchr8SSE2(test, '3', 16) == test + 0x3);
MOZ_RELEASE_ASSERT(SIMD::memchr8(test, '4', 16) == test + 0x4);
MOZ_RELEASE_ASSERT(SIMD::memchr8SSE2(test, '4', 16) == test + 0x4);
MOZ_RELEASE_ASSERT(SIMD::memchr8(test, '5', 16) == test + 0x5);
MOZ_RELEASE_ASSERT(SIMD::memchr8SSE2(test, '5', 16) == test + 0x5);
MOZ_RELEASE_ASSERT(SIMD::memchr8(test, '6', 16) == test + 0x6);
MOZ_RELEASE_ASSERT(SIMD::memchr8SSE2(test, '6', 16) == test + 0x6);
MOZ_RELEASE_ASSERT(SIMD::memchr8(test, '7', 16) == test + 0x7);
MOZ_RELEASE_ASSERT(SIMD::memchr8SSE2(test, '7', 16) == test + 0x7);
MOZ_RELEASE_ASSERT(SIMD::memchr8(test, '8', 16) == test + 0x8);
MOZ_RELEASE_ASSERT(SIMD::memchr8SSE2(test, '8', 16) == test + 0x8);
MOZ_RELEASE_ASSERT(SIMD::memchr8(test, '9', 16) == test + 0x9);
MOZ_RELEASE_ASSERT(SIMD::memchr8SSE2(test, '9', 16) == test + 0x9);
MOZ_RELEASE_ASSERT(SIMD::memchr8(test, 'a', 16) == test + 0xa);
MOZ_RELEASE_ASSERT(SIMD::memchr8SSE2(test, 'a', 16) == test + 0xa);
MOZ_RELEASE_ASSERT(SIMD::memchr8(test, 'b', 16) == test + 0xb);
MOZ_RELEASE_ASSERT(SIMD::memchr8SSE2(test, 'b', 16) == test + 0xb);
MOZ_RELEASE_ASSERT(SIMD::memchr8(test, 'c', 16) == test + 0xc);
MOZ_RELEASE_ASSERT(SIMD::memchr8SSE2(test, 'c', 16) == test + 0xc);
MOZ_RELEASE_ASSERT(SIMD::memchr8(test, 'd', 16) == test + 0xd);
MOZ_RELEASE_ASSERT(SIMD::memchr8SSE2(test, 'd', 16) == test + 0xd);
MOZ_RELEASE_ASSERT(SIMD::memchr8(test, 'e', 16) == test + 0xe);
MOZ_RELEASE_ASSERT(SIMD::memchr8SSE2(test, 'e', 16) == test + 0xe);
MOZ_RELEASE_ASSERT(SIMD::memchr8(test, 'f', 16) == test + 0xf);
MOZ_RELEASE_ASSERT(SIMD::memchr8SSE2(test, 'f', 16) == test + 0xf);
MOZ_RELEASE_ASSERT(SIMD::memchr8(test, '\n', 16) == nullptr);
MOZ_RELEASE_ASSERT(SIMD::memchr8SSE2(test, '\n', 16) == nullptr);
}
void TestLongString() {
@ -102,8 +70,6 @@ void TestLongString() {
for (size_t i = 0; i < count - 1; ++i) {
MOZ_RELEASE_ASSERT(SIMD::memchr8(test, static_cast<char>(i), count - 1) ==
test + i);
MOZ_RELEASE_ASSERT(
SIMD::memchr8SSE2(test, static_cast<char>(i), count - 1) == test + i);
}
MOZ_RELEASE_ASSERT(
SIMD::memchr8(test, static_cast<char>(count - 1), count - 1) == nullptr);
@ -126,8 +92,6 @@ void TestGauntlet() {
}
MOZ_RELEASE_ASSERT(
SIMD::memchr8(test + k, static_cast<char>(j), i - k) == expected);
MOZ_RELEASE_ASSERT(SIMD::memchr8SSE2(test + k, static_cast<char>(j),
i - k) == expected);
}
}
}
@ -138,79 +102,47 @@ void TestTinyString16() {
const char16_t* test = u"012\n";
MOZ_RELEASE_ASSERT(SIMD::memchr16(test, u'0', 3) == test + 0x0);
MOZ_RELEASE_ASSERT(SIMD::memchr16SSE2(test, u'0', 3) == test + 0x0);
MOZ_RELEASE_ASSERT(SIMD::memchr16(test, u'1', 3) == test + 0x1);
MOZ_RELEASE_ASSERT(SIMD::memchr16SSE2(test, u'1', 3) == test + 0x1);
MOZ_RELEASE_ASSERT(SIMD::memchr16(test, u'2', 3) == test + 0x2);
MOZ_RELEASE_ASSERT(SIMD::memchr16SSE2(test, u'2', 3) == test + 0x2);
MOZ_RELEASE_ASSERT(SIMD::memchr16(test, u'\n', 3) == nullptr);
MOZ_RELEASE_ASSERT(SIMD::memchr16SSE2(test, u'\n', 3) == nullptr);
}
void TestShortString16() {
const char16_t* test = u"0123456789\n";
MOZ_RELEASE_ASSERT(SIMD::memchr16(test, u'0', 10) == test + 0x0);
MOZ_RELEASE_ASSERT(SIMD::memchr16SSE2(test, u'0', 10) == test + 0x0);
MOZ_RELEASE_ASSERT(SIMD::memchr16(test, u'1', 10) == test + 0x1);
MOZ_RELEASE_ASSERT(SIMD::memchr16SSE2(test, u'1', 10) == test + 0x1);
MOZ_RELEASE_ASSERT(SIMD::memchr16(test, u'2', 10) == test + 0x2);
MOZ_RELEASE_ASSERT(SIMD::memchr16SSE2(test, u'2', 10) == test + 0x2);
MOZ_RELEASE_ASSERT(SIMD::memchr16(test, u'3', 10) == test + 0x3);
MOZ_RELEASE_ASSERT(SIMD::memchr16SSE2(test, u'3', 10) == test + 0x3);
MOZ_RELEASE_ASSERT(SIMD::memchr16(test, u'4', 10) == test + 0x4);
MOZ_RELEASE_ASSERT(SIMD::memchr16SSE2(test, u'4', 10) == test + 0x4);
MOZ_RELEASE_ASSERT(SIMD::memchr16(test, u'5', 10) == test + 0x5);
MOZ_RELEASE_ASSERT(SIMD::memchr16SSE2(test, u'5', 10) == test + 0x5);
MOZ_RELEASE_ASSERT(SIMD::memchr16(test, u'6', 10) == test + 0x6);
MOZ_RELEASE_ASSERT(SIMD::memchr16SSE2(test, u'6', 10) == test + 0x6);
MOZ_RELEASE_ASSERT(SIMD::memchr16(test, u'7', 10) == test + 0x7);
MOZ_RELEASE_ASSERT(SIMD::memchr16SSE2(test, u'7', 10) == test + 0x7);
MOZ_RELEASE_ASSERT(SIMD::memchr16(test, u'8', 10) == test + 0x8);
MOZ_RELEASE_ASSERT(SIMD::memchr16SSE2(test, u'8', 10) == test + 0x8);
MOZ_RELEASE_ASSERT(SIMD::memchr16(test, u'9', 10) == test + 0x9);
MOZ_RELEASE_ASSERT(SIMD::memchr16SSE2(test, u'9', 10) == test + 0x9);
MOZ_RELEASE_ASSERT(SIMD::memchr16(test, u'\n', 10) == nullptr);
MOZ_RELEASE_ASSERT(SIMD::memchr16SSE2(test, u'\n', 10) == nullptr);
}
void TestMediumString16() {
const char16_t* test = u"0123456789abcdef\n";
MOZ_RELEASE_ASSERT(SIMD::memchr16(test, u'0', 16) == test + 0x0);
MOZ_RELEASE_ASSERT(SIMD::memchr16SSE2(test, u'0', 16) == test + 0x0);
MOZ_RELEASE_ASSERT(SIMD::memchr16(test, u'1', 16) == test + 0x1);
MOZ_RELEASE_ASSERT(SIMD::memchr16SSE2(test, u'1', 16) == test + 0x1);
MOZ_RELEASE_ASSERT(SIMD::memchr16(test, u'2', 16) == test + 0x2);
MOZ_RELEASE_ASSERT(SIMD::memchr16SSE2(test, u'2', 16) == test + 0x2);
MOZ_RELEASE_ASSERT(SIMD::memchr16(test, u'3', 16) == test + 0x3);
MOZ_RELEASE_ASSERT(SIMD::memchr16SSE2(test, u'3', 16) == test + 0x3);
MOZ_RELEASE_ASSERT(SIMD::memchr16(test, u'4', 16) == test + 0x4);
MOZ_RELEASE_ASSERT(SIMD::memchr16SSE2(test, u'4', 16) == test + 0x4);
MOZ_RELEASE_ASSERT(SIMD::memchr16(test, u'5', 16) == test + 0x5);
MOZ_RELEASE_ASSERT(SIMD::memchr16SSE2(test, u'5', 16) == test + 0x5);
MOZ_RELEASE_ASSERT(SIMD::memchr16(test, u'6', 16) == test + 0x6);
MOZ_RELEASE_ASSERT(SIMD::memchr16SSE2(test, u'6', 16) == test + 0x6);
MOZ_RELEASE_ASSERT(SIMD::memchr16(test, u'7', 16) == test + 0x7);
MOZ_RELEASE_ASSERT(SIMD::memchr16SSE2(test, u'7', 16) == test + 0x7);
MOZ_RELEASE_ASSERT(SIMD::memchr16(test, u'8', 16) == test + 0x8);
MOZ_RELEASE_ASSERT(SIMD::memchr16SSE2(test, u'8', 16) == test + 0x8);
MOZ_RELEASE_ASSERT(SIMD::memchr16(test, u'9', 16) == test + 0x9);
MOZ_RELEASE_ASSERT(SIMD::memchr16SSE2(test, u'9', 16) == test + 0x9);
MOZ_RELEASE_ASSERT(SIMD::memchr16(test, u'a', 16) == test + 0xa);
MOZ_RELEASE_ASSERT(SIMD::memchr16SSE2(test, u'a', 16) == test + 0xa);
MOZ_RELEASE_ASSERT(SIMD::memchr16(test, u'b', 16) == test + 0xb);
MOZ_RELEASE_ASSERT(SIMD::memchr16SSE2(test, u'b', 16) == test + 0xb);
MOZ_RELEASE_ASSERT(SIMD::memchr16(test, u'c', 16) == test + 0xc);
MOZ_RELEASE_ASSERT(SIMD::memchr16SSE2(test, u'c', 16) == test + 0xc);
MOZ_RELEASE_ASSERT(SIMD::memchr16(test, u'd', 16) == test + 0xd);
MOZ_RELEASE_ASSERT(SIMD::memchr16SSE2(test, u'd', 16) == test + 0xd);
MOZ_RELEASE_ASSERT(SIMD::memchr16(test, u'e', 16) == test + 0xe);
MOZ_RELEASE_ASSERT(SIMD::memchr16SSE2(test, u'e', 16) == test + 0xe);
MOZ_RELEASE_ASSERT(SIMD::memchr16(test, u'f', 16) == test + 0xf);
MOZ_RELEASE_ASSERT(SIMD::memchr16SSE2(test, u'f', 16) == test + 0xf);
MOZ_RELEASE_ASSERT(SIMD::memchr16(test, u'\n', 16) == nullptr);
MOZ_RELEASE_ASSERT(SIMD::memchr16SSE2(test, u'\n', 16) == nullptr);
}
void TestLongString16() {
@ -223,11 +155,8 @@ void TestLongString16() {
for (size_t i = 0; i < count - 1; ++i) {
MOZ_RELEASE_ASSERT(
SIMD::memchr16(test, static_cast<char16_t>(i), count - 1) == test + i);
MOZ_RELEASE_ASSERT(SIMD::memchr16SSE2(test, static_cast<char16_t>(i),
count - 1) == test + i);
}
MOZ_RELEASE_ASSERT(SIMD::memchr16(test, count - 1, count - 1) == nullptr);
MOZ_RELEASE_ASSERT(SIMD::memchr16SSE2(test, count - 1, count - 1) == nullptr);
}
void TestGauntlet16() {
@ -247,116 +176,6 @@ void TestGauntlet16() {
}
MOZ_RELEASE_ASSERT(SIMD::memchr16(test + k, static_cast<char16_t>(j),
i - k) == expected);
MOZ_RELEASE_ASSERT(SIMD::memchr16SSE2(test + k,
static_cast<char16_t>(j),
i - k) == expected);
}
}
}
}
}
void TestTinyString64() {
const uint64_t test[4] = {0, 1, 2, 3};
MOZ_RELEASE_ASSERT(SIMD::memchr64(test, 0, 3) == test + 0x0);
MOZ_RELEASE_ASSERT(SIMD::memchr64(test, 1, 3) == test + 0x1);
MOZ_RELEASE_ASSERT(SIMD::memchr64(test, 2, 3) == test + 0x2);
MOZ_RELEASE_ASSERT(SIMD::memchr64(test, 3, 3) == nullptr);
}
void TestShortString64() {
const uint64_t test[16] = {0, 1, 2, 3, 4, 5, 6, 7,
8, 9, 10, 11, 12, 13, 14, 15};
MOZ_RELEASE_ASSERT(SIMD::memchr64(test, 0, 15) == test + 0);
MOZ_RELEASE_ASSERT(SIMD::memchr64(test, 1, 15) == test + 1);
MOZ_RELEASE_ASSERT(SIMD::memchr64(test, 2, 15) == test + 2);
MOZ_RELEASE_ASSERT(SIMD::memchr64(test, 3, 15) == test + 3);
MOZ_RELEASE_ASSERT(SIMD::memchr64(test, 4, 15) == test + 4);
MOZ_RELEASE_ASSERT(SIMD::memchr64(test, 5, 15) == test + 5);
MOZ_RELEASE_ASSERT(SIMD::memchr64(test, 6, 15) == test + 6);
MOZ_RELEASE_ASSERT(SIMD::memchr64(test, 7, 15) == test + 7);
MOZ_RELEASE_ASSERT(SIMD::memchr64(test, 8, 15) == test + 8);
MOZ_RELEASE_ASSERT(SIMD::memchr64(test, 9, 15) == test + 9);
MOZ_RELEASE_ASSERT(SIMD::memchr64(test, 9, 15) == test + 9);
MOZ_RELEASE_ASSERT(SIMD::memchr64(test, 10, 15) == test + 10);
MOZ_RELEASE_ASSERT(SIMD::memchr64(test, 11, 15) == test + 11);
MOZ_RELEASE_ASSERT(SIMD::memchr64(test, 12, 15) == test + 12);
MOZ_RELEASE_ASSERT(SIMD::memchr64(test, 13, 15) == test + 13);
MOZ_RELEASE_ASSERT(SIMD::memchr64(test, 14, 15) == test + 14);
MOZ_RELEASE_ASSERT(SIMD::memchr64(test, 15, 15) == nullptr);
}
void TestMediumString64() {
const uint64_t test[32] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23, 24, 25, 26, 27, 28, 29, 30, 31};
MOZ_RELEASE_ASSERT(SIMD::memchr64(test, 0, 31) == test + 0);
MOZ_RELEASE_ASSERT(SIMD::memchr64(test, 1, 31) == test + 1);
MOZ_RELEASE_ASSERT(SIMD::memchr64(test, 2, 31) == test + 2);
MOZ_RELEASE_ASSERT(SIMD::memchr64(test, 3, 31) == test + 3);
MOZ_RELEASE_ASSERT(SIMD::memchr64(test, 4, 31) == test + 4);
MOZ_RELEASE_ASSERT(SIMD::memchr64(test, 5, 31) == test + 5);
MOZ_RELEASE_ASSERT(SIMD::memchr64(test, 6, 31) == test + 6);
MOZ_RELEASE_ASSERT(SIMD::memchr64(test, 7, 31) == test + 7);
MOZ_RELEASE_ASSERT(SIMD::memchr64(test, 8, 31) == test + 8);
MOZ_RELEASE_ASSERT(SIMD::memchr64(test, 9, 31) == test + 9);
MOZ_RELEASE_ASSERT(SIMD::memchr64(test, 9, 31) == test + 9);
MOZ_RELEASE_ASSERT(SIMD::memchr64(test, 10, 31) == test + 10);
MOZ_RELEASE_ASSERT(SIMD::memchr64(test, 11, 31) == test + 11);
MOZ_RELEASE_ASSERT(SIMD::memchr64(test, 12, 31) == test + 12);
MOZ_RELEASE_ASSERT(SIMD::memchr64(test, 13, 31) == test + 13);
MOZ_RELEASE_ASSERT(SIMD::memchr64(test, 14, 31) == test + 14);
MOZ_RELEASE_ASSERT(SIMD::memchr64(test, 15, 31) == test + 15);
MOZ_RELEASE_ASSERT(SIMD::memchr64(test, 16, 31) == test + 16);
MOZ_RELEASE_ASSERT(SIMD::memchr64(test, 17, 31) == test + 17);
MOZ_RELEASE_ASSERT(SIMD::memchr64(test, 18, 31) == test + 18);
MOZ_RELEASE_ASSERT(SIMD::memchr64(test, 19, 31) == test + 19);
MOZ_RELEASE_ASSERT(SIMD::memchr64(test, 20, 31) == test + 20);
MOZ_RELEASE_ASSERT(SIMD::memchr64(test, 21, 31) == test + 21);
MOZ_RELEASE_ASSERT(SIMD::memchr64(test, 22, 31) == test + 22);
MOZ_RELEASE_ASSERT(SIMD::memchr64(test, 23, 31) == test + 23);
MOZ_RELEASE_ASSERT(SIMD::memchr64(test, 24, 31) == test + 24);
MOZ_RELEASE_ASSERT(SIMD::memchr64(test, 25, 31) == test + 25);
MOZ_RELEASE_ASSERT(SIMD::memchr64(test, 26, 31) == test + 26);
MOZ_RELEASE_ASSERT(SIMD::memchr64(test, 27, 31) == test + 27);
MOZ_RELEASE_ASSERT(SIMD::memchr64(test, 28, 31) == test + 28);
MOZ_RELEASE_ASSERT(SIMD::memchr64(test, 29, 31) == test + 29);
MOZ_RELEASE_ASSERT(SIMD::memchr64(test, 30, 31) == test + 30);
MOZ_RELEASE_ASSERT(SIMD::memchr64(test, 31, 31) == nullptr);
}
void TestLongString64() {
const size_t count = 256;
uint64_t test[count];
for (size_t i = 0; i < count; ++i) {
test[i] = i;
}
for (uint64_t i = 0; i < count - 1; ++i) {
MOZ_RELEASE_ASSERT(SIMD::memchr64(test, i, count - 1) == test + i);
}
MOZ_RELEASE_ASSERT(SIMD::memchr64(test, count - 1, count - 1) == nullptr);
}
void TestGauntlet64() {
const size_t count = 257;
uint64_t test[count];
for (size_t i = 0; i < count; ++i) {
test[i] = i;
}
for (uint64_t i = 0; i < count - 1; ++i) {
for (uint64_t j = 0; j < count - 1; ++j) {
for (uint64_t k = 0; k < count - 1; ++k) {
if (i >= k) {
const uint64_t* expected = nullptr;
if (j >= k && j < i) {
expected = test + j;
}
MOZ_RELEASE_ASSERT(SIMD::memchr64(test + k, j, i - k) == expected);
}
}
}
@ -604,12 +423,6 @@ int main(void) {
TestLongString16();
TestGauntlet16();
TestTinyString64();
TestShortString64();
TestMediumString64();
TestLongString64();
TestGauntlet64();
TestTinyString2x8();
TestShortString2x8();
TestMediumString2x8();