mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-11-23 21:01:08 +00:00
Backed out 2 changesets (bug 1776013) for causing spidermonkey bustages on SIMD.cpp CLOSED TREE
Backed out changeset bb467568da37 (bug 1776013) Backed out changeset 9669cd465518 (bug 1776013)
This commit is contained in:
parent
21a943f29d
commit
9112cf321e
@ -14,7 +14,6 @@
|
||||
#endif
|
||||
#include "mozilla/PodOperations.h"
|
||||
#include "mozilla/Range.h"
|
||||
#include "mozilla/SIMD.h"
|
||||
#include "mozilla/TextUtils.h"
|
||||
|
||||
#include <algorithm>
|
||||
@ -77,7 +76,6 @@ using mozilla::IsAsciiHexDigit;
|
||||
using mozilla::IsNaN;
|
||||
using mozilla::PodCopy;
|
||||
using mozilla::RangedPtr;
|
||||
using mozilla::SIMD;
|
||||
|
||||
using JS::AutoCheckCannotGC;
|
||||
using JS::AutoStableStringChars;
|
||||
@ -1720,7 +1718,7 @@ struct MemCmp {
|
||||
using Extent = uint32_t;
|
||||
static MOZ_ALWAYS_INLINE Extent computeExtent(const PatChar*,
|
||||
uint32_t patLen) {
|
||||
return (patLen - 2) * sizeof(PatChar);
|
||||
return (patLen - 1) * sizeof(PatChar);
|
||||
}
|
||||
static MOZ_ALWAYS_INLINE bool match(const PatChar* p, const TextChar* t,
|
||||
Extent extent) {
|
||||
@ -1747,35 +1745,78 @@ struct ManualCmp {
|
||||
}
|
||||
};
|
||||
|
||||
template <typename TextChar, typename PatChar>
|
||||
static const TextChar* FirstCharMatcherUnrolled(const TextChar* text,
|
||||
uint32_t n, const PatChar pat) {
|
||||
const TextChar* textend = text + n;
|
||||
const TextChar* t = text;
|
||||
|
||||
switch ((textend - t) & 7) {
|
||||
case 0:
|
||||
if (*t++ == pat) return t - 1;
|
||||
[[fallthrough]];
|
||||
case 7:
|
||||
if (*t++ == pat) return t - 1;
|
||||
[[fallthrough]];
|
||||
case 6:
|
||||
if (*t++ == pat) return t - 1;
|
||||
[[fallthrough]];
|
||||
case 5:
|
||||
if (*t++ == pat) return t - 1;
|
||||
[[fallthrough]];
|
||||
case 4:
|
||||
if (*t++ == pat) return t - 1;
|
||||
[[fallthrough]];
|
||||
case 3:
|
||||
if (*t++ == pat) return t - 1;
|
||||
[[fallthrough]];
|
||||
case 2:
|
||||
if (*t++ == pat) return t - 1;
|
||||
[[fallthrough]];
|
||||
case 1:
|
||||
if (*t++ == pat) return t - 1;
|
||||
}
|
||||
while (textend != t) {
|
||||
if (t[0] == pat) return t;
|
||||
if (t[1] == pat) return t + 1;
|
||||
if (t[2] == pat) return t + 2;
|
||||
if (t[3] == pat) return t + 3;
|
||||
if (t[4] == pat) return t + 4;
|
||||
if (t[5] == pat) return t + 5;
|
||||
if (t[6] == pat) return t + 6;
|
||||
if (t[7] == pat) return t + 7;
|
||||
t += 8;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
static const char* FirstCharMatcher8bit(const char* text, uint32_t n,
|
||||
const char pat) {
|
||||
return reinterpret_cast<const char*>(memchr(text, pat, n));
|
||||
}
|
||||
|
||||
template <class InnerMatch, typename TextChar, typename PatChar>
|
||||
static int Matcher(const TextChar* text, uint32_t textlen, const PatChar* pat,
|
||||
uint32_t patlen) {
|
||||
MOZ_ASSERT(patlen > 1);
|
||||
MOZ_ASSERT(patlen > 0);
|
||||
|
||||
if (sizeof(TextChar) == 1 && sizeof(PatChar) > 1 && pat[0] > 0xff) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
const typename InnerMatch::Extent extent =
|
||||
InnerMatch::computeExtent(pat, patlen);
|
||||
|
||||
uint32_t i = 0;
|
||||
uint32_t n = textlen - patlen + 1;
|
||||
|
||||
while (i < n) {
|
||||
const TextChar* pos;
|
||||
|
||||
// This is a bit awkward. Consider the case where we're searching "abcdef"
|
||||
// for "def". n will be 4, because we know in advance that the last place we
|
||||
// can *start* a successful search will be at 'd'. However, if we just use n
|
||||
// - i, then our first search will be looking through "abcd" for "de",
|
||||
// because our memchr2xN functions search for two characters at a time. So
|
||||
// we just have to compensate by adding 1. This will never exceed textlen
|
||||
// because we know patlen is at least two.
|
||||
size_t searchLen = n - i + 1;
|
||||
if (sizeof(TextChar) == 1) {
|
||||
MOZ_ASSERT(pat[0] <= 0xff);
|
||||
pos = (TextChar*)SIMD::memchr2x8((char*)text + i, pat[0], pat[1],
|
||||
searchLen);
|
||||
pos = (TextChar*)FirstCharMatcher8bit((char*)text + i, n - i, pat[0]);
|
||||
} else {
|
||||
pos = (TextChar*)SIMD::memchr2x16((char16_t*)(text + i), char16_t(pat[0]),
|
||||
char16_t(pat[1]), searchLen);
|
||||
pos = FirstCharMatcherUnrolled(text + i, n - i, char16_t(pat[0]));
|
||||
}
|
||||
|
||||
if (pos == nullptr) {
|
||||
@ -1783,9 +1824,7 @@ static int Matcher(const TextChar* text, uint32_t textlen, const PatChar* pat,
|
||||
}
|
||||
|
||||
i = static_cast<uint32_t>(pos - text);
|
||||
const uint32_t inlineLookaheadChars = 2;
|
||||
if (InnerMatch::match(pat + inlineLookaheadChars,
|
||||
text + i + inlineLookaheadChars, extent)) {
|
||||
if (InnerMatch::match(pat + 1, text + i + 1, extent)) {
|
||||
return i;
|
||||
}
|
||||
|
||||
@ -1804,26 +1843,22 @@ static MOZ_ALWAYS_INLINE int StringMatch(const TextChar* text, uint32_t textLen,
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (sizeof(TextChar) == 1 && sizeof(PatChar) > 1 && pat[0] > 0xff) {
|
||||
#if defined(__i386__) || defined(_M_IX86) || defined(__i386)
|
||||
/*
|
||||
* Given enough registers, the unrolled loop below is faster than the
|
||||
* following loop. 32-bit x86 does not have enough registers.
|
||||
*/
|
||||
if (patLen == 1) {
|
||||
const PatChar p0 = *pat;
|
||||
const TextChar* end = text + textLen;
|
||||
for (const TextChar* c = text; c != end; ++c) {
|
||||
if (*c == p0) {
|
||||
return c - text;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (patLen == 1) {
|
||||
const TextChar* pos;
|
||||
if (sizeof(TextChar) == 1) {
|
||||
MOZ_ASSERT(pat[0] <= 0xff);
|
||||
pos = (TextChar*)SIMD::memchr8((char*)text, pat[0], textLen);
|
||||
} else {
|
||||
pos =
|
||||
(TextChar*)SIMD::memchr16((char16_t*)text, char16_t(pat[0]), textLen);
|
||||
}
|
||||
|
||||
if (pos == nullptr) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
return pos - text;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* If the text or pattern string is short, BMH will be more expensive than
|
||||
|
489
mfbt/SIMD.cpp
489
mfbt/SIMD.cpp
@ -1,489 +0,0 @@
|
||||
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#include "mozilla/SIMD.h"
|
||||
|
||||
#include <stdint.h>
|
||||
#include <type_traits>
|
||||
|
||||
#include "mozilla/SSE.h"
|
||||
|
||||
namespace mozilla {
|
||||
|
||||
#ifdef MOZILLA_PRESUME_SSE2
|
||||
|
||||
# include <immintrin.h>
|
||||
|
||||
const __m128i* Cast128(uintptr_t ptr) {
|
||||
return reinterpret_cast<const __m128i*>(ptr);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T GetAs(uintptr_t ptr) {
|
||||
return *reinterpret_cast<const T*>(ptr);
|
||||
}
|
||||
|
||||
// Akin to ceil/floor, AlignDown/AlignUp will return the original pointer if it
|
||||
// is already aligned.
|
||||
uintptr_t AlignDown16(uintptr_t ptr) { return ptr & ~0xf; }
|
||||
|
||||
uintptr_t AlignUp16(uintptr_t ptr) { return AlignDown16(ptr + 0xf); }
|
||||
|
||||
template <typename CharType>
|
||||
__m128i CmpEq128(__m128i a, __m128i b) {
|
||||
static_assert(sizeof(CharType) == 1 || sizeof(CharType) == 2);
|
||||
if (sizeof(CharType) == 1) {
|
||||
return _mm_cmpeq_epi8(a, b);
|
||||
}
|
||||
return _mm_cmpeq_epi16(a, b);
|
||||
}
|
||||
|
||||
const char* Check4x4Chars(__m128i needle, uintptr_t a, uintptr_t b, uintptr_t c,
|
||||
uintptr_t d) {
|
||||
__m128i haystackA = _mm_loadu_si32(Cast128(a));
|
||||
__m128i cmpA = CmpEq128<char>(needle, haystackA);
|
||||
__m128i haystackB = _mm_loadu_si32(Cast128(b));
|
||||
__m128i cmpB = CmpEq128<char>(needle, haystackB);
|
||||
__m128i haystackC = _mm_loadu_si32(Cast128(c));
|
||||
__m128i cmpC = CmpEq128<char>(needle, haystackC);
|
||||
__m128i haystackD = _mm_loadu_si32(Cast128(d));
|
||||
__m128i cmpD = CmpEq128<char>(needle, haystackD);
|
||||
__m128i or_ab = _mm_or_si128(cmpA, cmpB);
|
||||
__m128i or_cd = _mm_or_si128(cmpC, cmpD);
|
||||
__m128i or_abcd = _mm_or_si128(or_ab, or_cd);
|
||||
int orMask = _mm_movemask_epi8(or_abcd);
|
||||
if (orMask & 0xf) {
|
||||
int cmpMask;
|
||||
cmpMask = _mm_movemask_epi8(cmpA);
|
||||
if (cmpMask & 0xf) {
|
||||
return reinterpret_cast<const char*>(a + __builtin_ctz(cmpMask));
|
||||
}
|
||||
cmpMask = _mm_movemask_epi8(cmpB);
|
||||
if (cmpMask & 0xf) {
|
||||
return reinterpret_cast<const char*>(b + __builtin_ctz(cmpMask));
|
||||
}
|
||||
cmpMask = _mm_movemask_epi8(cmpC);
|
||||
if (cmpMask & 0xf) {
|
||||
return reinterpret_cast<const char*>(c + __builtin_ctz(cmpMask));
|
||||
}
|
||||
cmpMask = _mm_movemask_epi8(cmpD);
|
||||
if (cmpMask & 0xf) {
|
||||
return reinterpret_cast<const char*>(d + __builtin_ctz(cmpMask));
|
||||
}
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
template <typename CharType>
|
||||
const CharType* Check4x16Bytes(__m128i needle, uintptr_t a, uintptr_t b,
|
||||
uintptr_t c, uintptr_t d) {
|
||||
__m128i haystackA = _mm_loadu_si128(Cast128(a));
|
||||
__m128i cmpA = CmpEq128<CharType>(needle, haystackA);
|
||||
__m128i haystackB = _mm_loadu_si128(Cast128(b));
|
||||
__m128i cmpB = CmpEq128<CharType>(needle, haystackB);
|
||||
__m128i haystackC = _mm_loadu_si128(Cast128(c));
|
||||
__m128i cmpC = CmpEq128<CharType>(needle, haystackC);
|
||||
__m128i haystackD = _mm_loadu_si128(Cast128(d));
|
||||
__m128i cmpD = CmpEq128<CharType>(needle, haystackD);
|
||||
__m128i or_ab = _mm_or_si128(cmpA, cmpB);
|
||||
__m128i or_cd = _mm_or_si128(cmpC, cmpD);
|
||||
__m128i or_abcd = _mm_or_si128(or_ab, or_cd);
|
||||
int orMask = _mm_movemask_epi8(or_abcd);
|
||||
if (orMask) {
|
||||
int cmpMask;
|
||||
cmpMask = _mm_movemask_epi8(cmpA);
|
||||
if (cmpMask) {
|
||||
return reinterpret_cast<const CharType*>(a + __builtin_ctz(cmpMask));
|
||||
}
|
||||
cmpMask = _mm_movemask_epi8(cmpB);
|
||||
if (cmpMask) {
|
||||
return reinterpret_cast<const CharType*>(b + __builtin_ctz(cmpMask));
|
||||
}
|
||||
cmpMask = _mm_movemask_epi8(cmpC);
|
||||
if (cmpMask) {
|
||||
return reinterpret_cast<const CharType*>(c + __builtin_ctz(cmpMask));
|
||||
}
|
||||
cmpMask = _mm_movemask_epi8(cmpD);
|
||||
if (cmpMask) {
|
||||
return reinterpret_cast<const CharType*>(d + __builtin_ctz(cmpMask));
|
||||
}
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
enum class HaystackOverlap {
|
||||
Overlapping,
|
||||
Sequential,
|
||||
};
|
||||
|
||||
// Check two 16-byte chunks for the two-byte sequence loaded into needle1
|
||||
// followed by needle1. `carryOut` is an optional pointer which we will
|
||||
// populate based on whether the last character of b matches needle1. This
|
||||
// should be provided on subsequent calls via `carryIn` so we can detect cases
|
||||
// where the last byte of b's 16-byte chunk is needle1 and the first byte of
|
||||
// the next a's 16-byte chunk is needle2. `overlap` and whether
|
||||
// `carryIn`/`carryOut` are NULL should be knowable at compile time to avoid
|
||||
// branching.
|
||||
template <typename CharType>
|
||||
const CharType* Check2x2x16Bytes(__m128i needle1, __m128i needle2, uintptr_t a,
|
||||
uintptr_t b, __m128i* carryIn,
|
||||
__m128i* carryOut, HaystackOverlap overlap) {
|
||||
const int shiftRightAmount = 16 - sizeof(CharType);
|
||||
const int shiftLeftAmount = sizeof(CharType);
|
||||
__m128i haystackA = _mm_loadu_si128(Cast128(a));
|
||||
__m128i cmpA1 = CmpEq128<CharType>(needle1, haystackA);
|
||||
__m128i cmpA2 = CmpEq128<CharType>(needle2, haystackA);
|
||||
__m128i cmpA;
|
||||
if (carryIn) {
|
||||
cmpA = _mm_and_si128(
|
||||
_mm_or_si128(_mm_bslli_si128(cmpA1, shiftLeftAmount), *carryIn), cmpA2);
|
||||
} else {
|
||||
cmpA = _mm_and_si128(_mm_bslli_si128(cmpA1, shiftLeftAmount), cmpA2);
|
||||
}
|
||||
__m128i haystackB = _mm_loadu_si128(Cast128(b));
|
||||
__m128i cmpB1 = CmpEq128<CharType>(needle1, haystackB);
|
||||
__m128i cmpB2 = CmpEq128<CharType>(needle2, haystackB);
|
||||
__m128i cmpB;
|
||||
if (overlap == HaystackOverlap::Overlapping) {
|
||||
cmpB = _mm_and_si128(_mm_bslli_si128(cmpB1, shiftLeftAmount), cmpB2);
|
||||
} else {
|
||||
MOZ_ASSERT(overlap == HaystackOverlap::Sequential);
|
||||
__m128i carryAB = _mm_bsrli_si128(cmpA1, shiftRightAmount);
|
||||
cmpB = _mm_and_si128(
|
||||
_mm_or_si128(_mm_bslli_si128(cmpB1, shiftLeftAmount), carryAB), cmpB2);
|
||||
}
|
||||
__m128i or_ab = _mm_or_si128(cmpA, cmpB);
|
||||
int orMask = _mm_movemask_epi8(or_ab);
|
||||
if (orMask) {
|
||||
int cmpMask;
|
||||
cmpMask = _mm_movemask_epi8(cmpA);
|
||||
if (cmpMask) {
|
||||
return reinterpret_cast<const CharType*>(a + __builtin_ctz(cmpMask) -
|
||||
shiftLeftAmount);
|
||||
}
|
||||
cmpMask = _mm_movemask_epi8(cmpB);
|
||||
if (cmpMask) {
|
||||
return reinterpret_cast<const CharType*>(b + __builtin_ctz(cmpMask) -
|
||||
shiftLeftAmount);
|
||||
}
|
||||
}
|
||||
|
||||
if (carryOut) {
|
||||
_mm_store_si128(carryOut, _mm_bsrli_si128(cmpB1, shiftRightAmount));
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
template <typename CharType>
|
||||
const CharType* FindInBuffer(const CharType* ptr, CharType value,
|
||||
size_t length) {
|
||||
static_assert(sizeof(CharType) == 1 || sizeof(CharType) == 2);
|
||||
static_assert(std::is_unsigned<CharType>::value);
|
||||
uint64_t splat64;
|
||||
if (sizeof(CharType) == 1) {
|
||||
splat64 = 0x0101010101010101llu;
|
||||
} else {
|
||||
splat64 = 0x0001000100010001llu;
|
||||
}
|
||||
|
||||
// Load our needle into a 16-byte register
|
||||
uint64_t u64_value = static_cast<uint64_t>(value) * splat64;
|
||||
int64_t i64_value = *reinterpret_cast<int64_t*>(&u64_value);
|
||||
__m128i needle = _mm_set_epi64x(i64_value, i64_value);
|
||||
|
||||
size_t numBytes = length * sizeof(CharType);
|
||||
uintptr_t cur = reinterpret_cast<uintptr_t>(ptr);
|
||||
uintptr_t end = cur + numBytes;
|
||||
|
||||
if ((sizeof(CharType) > 1 && numBytes < 16) || numBytes < 4) {
|
||||
while (cur < end) {
|
||||
if (GetAs<CharType>(cur) == value) {
|
||||
return reinterpret_cast<const CharType*>(cur);
|
||||
}
|
||||
cur += sizeof(CharType);
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (numBytes < 16) {
|
||||
// NOTE: here and below, we have some bit fiddling which could look a
|
||||
// little weird. The important thing to note though is it's just a trick
|
||||
// for getting the number 4 if numBytes is greater than or equal to 8,
|
||||
// and 0 otherwise. This lets us fully cover the range without any
|
||||
// branching for the case where numBytes is in [4,8), and [8,16). We get
|
||||
// four ranges from this - if numbytes > 8, we get:
|
||||
// [0,4), [4,8], [end - 8), [end - 4)
|
||||
// and if numbytes < 8, we get
|
||||
// [0,4), [0,4), [end - 4), [end - 4)
|
||||
uintptr_t a = cur;
|
||||
uintptr_t b = cur + ((numBytes & 8) >> 1);
|
||||
uintptr_t c = end - 4 - ((numBytes & 8) >> 1);
|
||||
uintptr_t d = end - 4;
|
||||
const char* charResult = Check4x4Chars(needle, a, b, c, d);
|
||||
// Note: we ensure above that sizeof(CharType) == 1 here, so this is
|
||||
// either char to char or char to something like a uint8_t.
|
||||
return reinterpret_cast<const CharType*>(charResult);
|
||||
}
|
||||
|
||||
if (numBytes < 64) {
|
||||
// NOTE: see the above explanation of the similar chunk of code, but in
|
||||
// this case, replace 8 with 32 and 4 with 16.
|
||||
uintptr_t a = cur;
|
||||
uintptr_t b = cur + ((numBytes & 32) >> 1);
|
||||
uintptr_t c = end - 16 - ((numBytes & 32) >> 1);
|
||||
uintptr_t d = end - 16;
|
||||
return Check4x16Bytes<CharType>(needle, a, b, c, d);
|
||||
}
|
||||
|
||||
// Get the initial unaligned load out of the way. This will overlap with the
|
||||
// aligned stuff below, but the overlapped part should effectively be free
|
||||
// (relative to a mispredict from doing a byte-by-byte loop).
|
||||
__m128i haystack = _mm_loadu_si128(Cast128(cur));
|
||||
__m128i cmp = CmpEq128<CharType>(needle, haystack);
|
||||
int cmpMask = _mm_movemask_epi8(cmp);
|
||||
if (cmpMask) {
|
||||
return reinterpret_cast<const CharType*>(cur + __builtin_ctz(cmpMask));
|
||||
}
|
||||
|
||||
// Now we're working with aligned memory. Hooray! \o/
|
||||
cur = AlignUp16(cur);
|
||||
|
||||
// The address of the final 48-63 bytes. We overlap this with what we check in
|
||||
// our hot loop below to avoid branching. Again, the overlap should be
|
||||
// negligible compared with a branch mispredict.
|
||||
uintptr_t tailStartPtr = AlignDown16(end - 48);
|
||||
uintptr_t tailEndPtr = end - 16;
|
||||
|
||||
while (cur < tailStartPtr) {
|
||||
uintptr_t a = cur;
|
||||
uintptr_t b = cur + 16;
|
||||
uintptr_t c = cur + 32;
|
||||
uintptr_t d = cur + 48;
|
||||
const CharType* result = Check4x16Bytes<CharType>(needle, a, b, c, d);
|
||||
if (result) {
|
||||
return result;
|
||||
}
|
||||
cur += 64;
|
||||
}
|
||||
|
||||
uintptr_t a = tailStartPtr;
|
||||
uintptr_t b = tailStartPtr + 16;
|
||||
uintptr_t c = tailStartPtr + 32;
|
||||
uintptr_t d = tailEndPtr;
|
||||
return Check4x16Bytes<CharType>(needle, a, b, c, d);
|
||||
}
|
||||
|
||||
template <typename CharType>
|
||||
const CharType* TwoByteLoop(uintptr_t start, uintptr_t end, CharType v1,
|
||||
CharType v2);
|
||||
|
||||
template <>
|
||||
const unsigned char* TwoByteLoop<unsigned char>(uintptr_t start, uintptr_t end,
|
||||
unsigned char v1,
|
||||
unsigned char v2) {
|
||||
uintptr_t cur = start;
|
||||
uintptr_t preEnd = end - sizeof(unsigned char);
|
||||
while (cur < preEnd) {
|
||||
// NOTE: this should only ever be called on little endian architectures.
|
||||
static_assert(MOZ_LITTLE_ENDIAN());
|
||||
uint16_t pattern =
|
||||
static_cast<uint16_t>(v1) | (static_cast<uint16_t>(v2) << 8);
|
||||
if (GetAs<uint16_t>(cur) == pattern) {
|
||||
return reinterpret_cast<const unsigned char*>(cur);
|
||||
}
|
||||
cur += sizeof(unsigned char);
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
template <>
|
||||
const char16_t* TwoByteLoop<char16_t>(uintptr_t start, uintptr_t end,
|
||||
char16_t v1, char16_t v2) {
|
||||
uintptr_t cur = start;
|
||||
uintptr_t preEnd = end - sizeof(char16_t);
|
||||
while (cur < preEnd) {
|
||||
// NOTE: this should only ever be called on little endian architectures
|
||||
static_assert(MOZ_LITTLE_ENDIAN());
|
||||
uint32_t pattern =
|
||||
static_cast<uint32_t>(v1) | (static_cast<uint32_t>(v2) << 16);
|
||||
if (GetAs<uint32_t>(cur) == pattern) {
|
||||
return reinterpret_cast<const char16_t*>(cur);
|
||||
}
|
||||
cur += sizeof(char16_t);
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
template <typename CharType>
|
||||
const CharType* FindTwoInBuffer(const CharType* ptr, CharType v1, CharType v2,
|
||||
size_t length) {
|
||||
static_assert(sizeof(CharType) == 1 || sizeof(CharType) == 2);
|
||||
static_assert(std::is_unsigned<CharType>::value);
|
||||
uint64_t splat64;
|
||||
if (sizeof(CharType) == 1) {
|
||||
splat64 = 0x0101010101010101llu;
|
||||
} else {
|
||||
splat64 = 0x0001000100010001llu;
|
||||
}
|
||||
|
||||
// Load our needle into a 16-byte register
|
||||
uint64_t u64_v1 = static_cast<uint64_t>(v1) * splat64;
|
||||
int64_t i64_v1 = *reinterpret_cast<int64_t*>(&u64_v1);
|
||||
__m128i needle1 = _mm_set_epi64x(i64_v1, i64_v1);
|
||||
uint64_t u64_v2 = static_cast<uint64_t>(v2) * splat64;
|
||||
int64_t i64_v2 = *reinterpret_cast<int64_t*>(&u64_v2);
|
||||
__m128i needle2 = _mm_set_epi64x(i64_v2, i64_v2);
|
||||
|
||||
size_t numBytes = length * sizeof(CharType);
|
||||
uintptr_t cur = reinterpret_cast<uintptr_t>(ptr);
|
||||
uintptr_t end = cur + numBytes;
|
||||
|
||||
if (numBytes < 16) {
|
||||
return TwoByteLoop<CharType>(cur, end, v1, v2);
|
||||
}
|
||||
|
||||
if (numBytes < 32) {
|
||||
uintptr_t a = cur;
|
||||
uintptr_t b = end - 16;
|
||||
return Check2x2x16Bytes<CharType>(needle1, needle2, a, b, nullptr, nullptr,
|
||||
HaystackOverlap::Overlapping);
|
||||
}
|
||||
|
||||
// Get the initial unaligned load out of the way. This will likely overlap
|
||||
// with the aligned stuff below, but the overlapped part should effectively
|
||||
// be free.
|
||||
__m128i haystack = _mm_loadu_si128(Cast128(cur));
|
||||
__m128i cmp1 = CmpEq128<CharType>(needle1, haystack);
|
||||
__m128i cmp2 = CmpEq128<CharType>(needle2, haystack);
|
||||
int cmpMask1 = _mm_movemask_epi8(cmp1);
|
||||
int cmpMask2 = _mm_movemask_epi8(cmp2);
|
||||
int cmpMask = (cmpMask1 << sizeof(CharType)) & cmpMask2;
|
||||
if (cmpMask) {
|
||||
return reinterpret_cast<const CharType*>(cur + __builtin_ctz(cmpMask) -
|
||||
sizeof(CharType));
|
||||
}
|
||||
|
||||
// Now we're working with aligned memory. Hooray! \o/
|
||||
cur = AlignUp16(cur);
|
||||
|
||||
// The address of the final 48-63 bytes. We overlap this with what we check in
|
||||
// our hot loop below to avoid branching. Again, the overlap should be
|
||||
// negligible compared with a branch mispredict.
|
||||
uintptr_t tailEndPtr = end - 16;
|
||||
uintptr_t tailStartPtr = AlignDown16(tailEndPtr);
|
||||
|
||||
__m128i cmpMaskCarry = _mm_set1_epi32(0);
|
||||
while (cur < tailStartPtr) {
|
||||
uintptr_t a = cur;
|
||||
uintptr_t b = cur + 16;
|
||||
const CharType* result =
|
||||
Check2x2x16Bytes<CharType>(needle1, needle2, a, b, &cmpMaskCarry,
|
||||
&cmpMaskCarry, HaystackOverlap::Sequential);
|
||||
if (result) {
|
||||
return result;
|
||||
}
|
||||
cur += 32;
|
||||
}
|
||||
|
||||
uint32_t carry = (cur == tailStartPtr) ? 0xffffffff : 0;
|
||||
__m128i wideCarry = _mm_loadu_si32(&carry);
|
||||
cmpMaskCarry = _mm_and_si128(cmpMaskCarry, wideCarry);
|
||||
uintptr_t a = tailStartPtr;
|
||||
uintptr_t b = tailEndPtr;
|
||||
return Check2x2x16Bytes<CharType>(needle1, needle2, a, b, &cmpMaskCarry,
|
||||
nullptr, HaystackOverlap::Overlapping);
|
||||
}
|
||||
|
||||
const char* SIMD::memchr8(const char* ptr, char value, size_t length) {
|
||||
// Signed chars are just really annoying to do bit logic with. Convert to
|
||||
// unsigned at the outermost scope so we don't have to worry about it.
|
||||
const unsigned char* uptr = reinterpret_cast<const unsigned char*>(ptr);
|
||||
unsigned char uvalue = static_cast<unsigned char>(value);
|
||||
const unsigned char* uresult =
|
||||
FindInBuffer<unsigned char>(uptr, uvalue, length);
|
||||
return reinterpret_cast<const char*>(uresult);
|
||||
}
|
||||
|
||||
const char16_t* SIMD::memchr16(const char16_t* ptr, char16_t value,
|
||||
size_t length) {
|
||||
return FindInBuffer<char16_t>(ptr, value, length);
|
||||
}
|
||||
|
||||
const char* SIMD::memchr2x8(const char* ptr, char v1, char v2, size_t length) {
|
||||
// Signed chars are just really annoying to do bit logic with. Convert to
|
||||
// unsigned at the outermost scope so we don't have to worry about it.
|
||||
const unsigned char* uptr = reinterpret_cast<const unsigned char*>(ptr);
|
||||
unsigned char uv1 = static_cast<unsigned char>(v1);
|
||||
unsigned char uv2 = static_cast<unsigned char>(v2);
|
||||
const unsigned char* uresult =
|
||||
FindTwoInBuffer<unsigned char>(uptr, uv1, uv2, length);
|
||||
return reinterpret_cast<const char*>(uresult);
|
||||
}
|
||||
|
||||
const char16_t* SIMD::memchr2x16(const char16_t* ptr, char16_t v1, char16_t v2,
|
||||
size_t length) {
|
||||
return FindTwoInBuffer<char16_t>(ptr, v1, v2, length);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
# include <cstring>
|
||||
|
||||
const char* SIMD::memchr8(const char* ptr, char value, size_t length) {
|
||||
const void* result = ::memchr(reinterpret_cast<const void*>(ptr),
|
||||
static_cast<int>(value), length);
|
||||
return reinterpret_cast<const char*>(result);
|
||||
}
|
||||
|
||||
const char16_t* SIMD::memchr16(const char16_t* ptr, char16_t value,
|
||||
size_t length) {
|
||||
const char16_t* end = ptr + length;
|
||||
while (ptr < end) {
|
||||
if (*ptr == value) {
|
||||
return ptr;
|
||||
}
|
||||
ptr++;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
const char* SIMD::memchr2x8(const char* ptr, char v1, char v2, size_t length) {
|
||||
const char* end = ptr + length - 1;
|
||||
while (ptr < end) {
|
||||
ptr = memchr8(ptr, v1, end - ptr);
|
||||
if (!ptr) {
|
||||
return nullptr;
|
||||
}
|
||||
if (ptr[1] == v2) {
|
||||
return ptr;
|
||||
}
|
||||
ptr++;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
const char16_t* SIMD::memchr2x16(const char16_t* ptr, char16_t v1, char16_t v2,
|
||||
size_t length) {
|
||||
const char16_t* end = ptr + length - 1;
|
||||
while (ptr < end) {
|
||||
ptr = memchr16(ptr, v1, end - ptr);
|
||||
if (!ptr) {
|
||||
return nullptr;
|
||||
}
|
||||
if (ptr[1] == v2) {
|
||||
return ptr;
|
||||
}
|
||||
ptr++;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
} // namespace mozilla
|
56
mfbt/SIMD.h
56
mfbt/SIMD.h
@ -1,56 +0,0 @@
|
||||
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#ifndef mozilla_SIMD_h
|
||||
#define mozilla_SIMD_h
|
||||
|
||||
#include "mozilla/Types.h"
|
||||
|
||||
namespace mozilla {
|
||||
// A collection of SIMD-implemented algorithms. Some of these exist in the CRT.
|
||||
// However, the quality of the C runtime implementation varies wildly across
|
||||
// platforms, so these should at least ensure consistency.
|
||||
//
|
||||
// NOTE: these are currently only implemented with hand-written SIMD for x86
|
||||
// and AMD64 platforms, and fallback to the the C runtime or naive loops on
|
||||
// other architectures. Please consider this before switching an already
|
||||
// optimized loop to these helpers.
|
||||
class SIMD {
|
||||
public:
|
||||
// NOTE: for memchr we have a goofy void* signature just to be an easy drop
|
||||
// in replacement for the CRT version. We also give memchr8 which is just a
|
||||
// typed version of memchr.
|
||||
static const void* memchr(const void* ptr, int value, size_t num) {
|
||||
return memchr8(reinterpret_cast<const char*>(ptr), static_cast<char>(value),
|
||||
num);
|
||||
}
|
||||
|
||||
// Search through `ptr[0..length]` for the first occurrence of `value` and
|
||||
// return the pointer to it, or nullptr if it cannot be found.
|
||||
static MFBT_API const char* memchr8(const char* ptr, char value,
|
||||
size_t length);
|
||||
|
||||
// Search through `ptr[0..length]` for the first occurrence of `value` and
|
||||
// return the pointer to it, or nullptr if it cannot be found.
|
||||
static MFBT_API const char16_t* memchr16(const char16_t* ptr, char16_t value,
|
||||
size_t length);
|
||||
|
||||
// Search through `ptr[0..length]` for the first occurrence of `v1` which is
|
||||
// immediately followed by `v2` and return the pointer to the occurrence of
|
||||
// `v1`.
|
||||
static MFBT_API const char* memchr2x8(const char* ptr, char v1, char v2,
|
||||
size_t length);
|
||||
|
||||
// Search through `ptr[0..length]` for the first occurrence of `v1` which is
|
||||
// immediately followed by `v2` and return the pointer to the occurrence of
|
||||
// `v1`.
|
||||
static MFBT_API const char16_t* memchr2x16(const char16_t* ptr, char16_t v1,
|
||||
char16_t v2, size_t length);
|
||||
};
|
||||
|
||||
} // namespace mozilla
|
||||
|
||||
#endif // mozilla_SIMD_h
|
@ -99,12 +99,10 @@ EXPORTS.mozilla = [
|
||||
"SegmentedVector.h",
|
||||
"SHA1.h",
|
||||
"SharedLibrary.h",
|
||||
"SIMD.h",
|
||||
"SmallPointerArray.h",
|
||||
"Span.h",
|
||||
"SplayTree.h",
|
||||
"SPSCQueue.h",
|
||||
"SSE.h",
|
||||
"StaticAnalysisFunctions.h",
|
||||
"TaggedAnonymousMemory.h",
|
||||
"Tainting.h",
|
||||
@ -177,18 +175,12 @@ UNIFIED_SOURCES += [
|
||||
"Poison.cpp",
|
||||
"RandomNum.cpp",
|
||||
"SHA1.cpp",
|
||||
"SIMD.cpp",
|
||||
"TaggedAnonymousMemory.cpp",
|
||||
"UniquePtrExtensions.cpp",
|
||||
"Unused.cpp",
|
||||
"Utf8.cpp",
|
||||
]
|
||||
|
||||
if CONFIG["CPU_ARCH"].startswith("x86"):
|
||||
SOURCES += [
|
||||
"SSE.cpp",
|
||||
]
|
||||
|
||||
if CONFIG["MOZ_BUILD_APP"] not in (
|
||||
"memory",
|
||||
"tools/update-programs",
|
||||
|
@ -1,444 +0,0 @@
|
||||
/* -*- Mode: C++; tab-width: 9; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
* You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#include "mozilla/Assertions.h"
|
||||
#include "mozilla/SIMD.h"
|
||||
|
||||
using mozilla::SIMD;
|
||||
|
||||
void TestTinyString() {
|
||||
const char* test = "012\n";
|
||||
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr8(test, '0', 3) == test + 0x0);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr8(test, '1', 3) == test + 0x1);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr8(test, '2', 3) == test + 0x2);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr8(test, '\n', 3) == nullptr);
|
||||
}
|
||||
|
||||
void TestShortString() {
|
||||
const char* test = "0123456789\n";
|
||||
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr8(test, '0', 10) == test + 0x0);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr8(test, '1', 10) == test + 0x1);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr8(test, '2', 10) == test + 0x2);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr8(test, '3', 10) == test + 0x3);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr8(test, '4', 10) == test + 0x4);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr8(test, '5', 10) == test + 0x5);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr8(test, '6', 10) == test + 0x6);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr8(test, '7', 10) == test + 0x7);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr8(test, '8', 10) == test + 0x8);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr8(test, '9', 10) == test + 0x9);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr8(test, '\n', 10) == nullptr);
|
||||
}
|
||||
|
||||
void TestMediumString() {
|
||||
const char* test = "0123456789abcdef\n";
|
||||
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr8(test, '0', 16) == test + 0x0);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr8(test, '1', 16) == test + 0x1);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr8(test, '2', 16) == test + 0x2);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr8(test, '3', 16) == test + 0x3);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr8(test, '4', 16) == test + 0x4);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr8(test, '5', 16) == test + 0x5);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr8(test, '6', 16) == test + 0x6);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr8(test, '7', 16) == test + 0x7);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr8(test, '8', 16) == test + 0x8);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr8(test, '9', 16) == test + 0x9);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr8(test, 'a', 16) == test + 0xa);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr8(test, 'b', 16) == test + 0xb);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr8(test, 'c', 16) == test + 0xc);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr8(test, 'd', 16) == test + 0xd);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr8(test, 'e', 16) == test + 0xe);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr8(test, 'f', 16) == test + 0xf);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr8(test, '\n', 16) == nullptr);
|
||||
}
|
||||
|
||||
void TestLongString() {
|
||||
// NOTE: here we make sure we go all the way up to 256 to ensure we're
|
||||
// handling negative-valued chars appropriately. We don't need to bother
|
||||
// testing this side of things with char16_t's because they are very
|
||||
// sensibly guaranteed to be unsigned.
|
||||
const size_t count = 256;
|
||||
char test[count];
|
||||
for (size_t i = 0; i < count; ++i) {
|
||||
test[i] = static_cast<char>(i);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < count - 1; ++i) {
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr8(test, static_cast<char>(i), count - 1) ==
|
||||
test + i);
|
||||
}
|
||||
MOZ_RELEASE_ASSERT(
|
||||
SIMD::memchr8(test, static_cast<char>(count - 1), count - 1) == nullptr);
|
||||
}
|
||||
|
||||
void TestGauntlet() {
|
||||
const size_t count = 256;
|
||||
char test[count];
|
||||
for (size_t i = 0; i < count; ++i) {
|
||||
test[i] = static_cast<char>(i);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < count - 1; ++i) {
|
||||
for (size_t j = 0; j < count - 1; ++j) {
|
||||
for (size_t k = 0; k < count - 1; ++k) {
|
||||
if (i >= k) {
|
||||
const char* expected = nullptr;
|
||||
if (j >= k && j < i) {
|
||||
expected = test + j;
|
||||
}
|
||||
MOZ_RELEASE_ASSERT(
|
||||
SIMD::memchr8(test + k, static_cast<char>(j), i - k) == expected);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void TestTinyString16() {
|
||||
const char16_t* test = u"012\n";
|
||||
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr16(test, u'0', 3) == test + 0x0);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr16(test, u'1', 3) == test + 0x1);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr16(test, u'2', 3) == test + 0x2);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr16(test, u'\n', 3) == nullptr);
|
||||
}
|
||||
|
||||
void TestShortString16() {
|
||||
const char16_t* test = u"0123456789\n";
|
||||
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr16(test, u'0', 10) == test + 0x0);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr16(test, u'1', 10) == test + 0x1);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr16(test, u'2', 10) == test + 0x2);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr16(test, u'3', 10) == test + 0x3);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr16(test, u'4', 10) == test + 0x4);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr16(test, u'5', 10) == test + 0x5);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr16(test, u'6', 10) == test + 0x6);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr16(test, u'7', 10) == test + 0x7);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr16(test, u'8', 10) == test + 0x8);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr16(test, u'9', 10) == test + 0x9);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr16(test, u'\n', 10) == nullptr);
|
||||
}
|
||||
|
||||
void TestMediumString16() {
|
||||
const char16_t* test = u"0123456789abcdef\n";
|
||||
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr16(test, u'0', 16) == test + 0x0);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr16(test, u'1', 16) == test + 0x1);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr16(test, u'2', 16) == test + 0x2);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr16(test, u'3', 16) == test + 0x3);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr16(test, u'4', 16) == test + 0x4);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr16(test, u'5', 16) == test + 0x5);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr16(test, u'6', 16) == test + 0x6);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr16(test, u'7', 16) == test + 0x7);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr16(test, u'8', 16) == test + 0x8);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr16(test, u'9', 16) == test + 0x9);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr16(test, u'a', 16) == test + 0xa);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr16(test, u'b', 16) == test + 0xb);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr16(test, u'c', 16) == test + 0xc);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr16(test, u'd', 16) == test + 0xd);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr16(test, u'e', 16) == test + 0xe);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr16(test, u'f', 16) == test + 0xf);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr16(test, u'\n', 16) == nullptr);
|
||||
}
|
||||
|
||||
void TestLongString16() {
|
||||
const size_t count = 256;
|
||||
char16_t test[count];
|
||||
for (size_t i = 0; i < count; ++i) {
|
||||
test[i] = i;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < count - 1; ++i) {
|
||||
MOZ_RELEASE_ASSERT(
|
||||
SIMD::memchr16(test, static_cast<char16_t>(i), count - 1) == test + i);
|
||||
}
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr16(test, count - 1, count - 1) == nullptr);
|
||||
}
|
||||
|
||||
void TestGauntlet16() {
|
||||
const size_t count = 257;
|
||||
char16_t test[count];
|
||||
for (size_t i = 0; i < count; ++i) {
|
||||
test[i] = i;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < count - 1; ++i) {
|
||||
for (size_t j = 0; j < count - 1; ++j) {
|
||||
for (size_t k = 0; k < count - 1; ++k) {
|
||||
if (i >= k) {
|
||||
const char16_t* expected = nullptr;
|
||||
if (j >= k && j < i) {
|
||||
expected = test + j;
|
||||
}
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr16(test + k, static_cast<char16_t>(j),
|
||||
i - k) == expected);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void TestTinyString2x8() {
|
||||
const char* test = "012\n";
|
||||
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x8(test, '0', '1', 3) == test + 0x0);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x8(test, '1', '2', 3) == test + 0x1);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x8(test, '2', '\n', 3) == nullptr);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x8(test, '0', '2', 3) == nullptr);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x8(test, '1', '\n', 3) == nullptr);
|
||||
}
|
||||
|
||||
void TestShortString2x8() {
|
||||
const char* test = "0123456789\n";
|
||||
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x8(test, '0', '1', 10) == test + 0x0);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x8(test, '1', '2', 10) == test + 0x1);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x8(test, '2', '3', 10) == test + 0x2);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x8(test, '3', '4', 10) == test + 0x3);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x8(test, '4', '5', 10) == test + 0x4);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x8(test, '5', '6', 10) == test + 0x5);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x8(test, '6', '7', 10) == test + 0x6);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x8(test, '7', '8', 10) == test + 0x7);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x8(test, '8', '9', 10) == test + 0x8);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x8(test, '9', '\n', 10) == nullptr);
|
||||
}
|
||||
|
||||
void TestMediumString2x8() {
|
||||
const char* test = "0123456789abcdef\n";
|
||||
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x8(test, '0', '1', 16) == test + 0x0);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x8(test, '1', '2', 16) == test + 0x1);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x8(test, '2', '3', 16) == test + 0x2);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x8(test, '3', '4', 16) == test + 0x3);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x8(test, '4', '5', 16) == test + 0x4);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x8(test, '5', '6', 16) == test + 0x5);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x8(test, '6', '7', 16) == test + 0x6);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x8(test, '7', '8', 16) == test + 0x7);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x8(test, '8', '9', 16) == test + 0x8);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x8(test, '9', 'a', 16) == test + 0x9);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x8(test, 'a', 'b', 16) == test + 0xa);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x8(test, 'b', 'c', 16) == test + 0xb);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x8(test, 'c', 'd', 16) == test + 0xc);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x8(test, 'd', 'e', 16) == test + 0xd);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x8(test, 'e', 'f', 16) == test + 0xe);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x8(test, 'f', '\n', 16) == nullptr);
|
||||
}
|
||||
|
||||
void TestLongString2x8() {
|
||||
const size_t count = 256;
|
||||
char test[count];
|
||||
for (size_t i = 0; i < count; ++i) {
|
||||
test[i] = static_cast<char>(i);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < count - 2; ++i) {
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x8(test, static_cast<char>(i),
|
||||
static_cast<char>(i + 1),
|
||||
count - 1) == test + i);
|
||||
}
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x8(test, static_cast<char>(count - 2),
|
||||
static_cast<char>(count - 1),
|
||||
count - 1) == nullptr);
|
||||
}
|
||||
|
||||
void TestTinyString2x16() {
|
||||
const char16_t* test = u"012\n";
|
||||
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x16(test, u'0', u'1', 3) == test + 0x0);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x16(test, u'1', u'2', 3) == test + 0x1);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x16(test, u'2', u'\n', 3) == nullptr);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x16(test, u'0', u'2', 3) == nullptr);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x16(test, u'1', u'\n', 3) == nullptr);
|
||||
}
|
||||
|
||||
void TestShortString2x16() {
|
||||
const char16_t* test = u"0123456789\n";
|
||||
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x16(test, u'0', u'1', 10) == test + 0x0);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x16(test, u'1', u'2', 10) == test + 0x1);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x16(test, u'2', u'3', 10) == test + 0x2);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x16(test, u'3', u'4', 10) == test + 0x3);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x16(test, u'4', u'5', 10) == test + 0x4);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x16(test, u'5', u'6', 10) == test + 0x5);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x16(test, u'6', u'7', 10) == test + 0x6);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x16(test, u'7', u'8', 10) == test + 0x7);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x16(test, u'8', u'9', 10) == test + 0x8);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x16(test, u'9', u'\n', 10) == nullptr);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x16(test, u'0', u'2', 10) == nullptr);
|
||||
}
|
||||
|
||||
void TestMediumString2x16() {
|
||||
const char16_t* test = u"0123456789abcdef\n";
|
||||
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x16(test, u'0', u'1', 16) == test + 0x0);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x16(test, u'1', u'2', 16) == test + 0x1);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x16(test, u'2', u'3', 16) == test + 0x2);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x16(test, u'3', u'4', 16) == test + 0x3);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x16(test, u'4', u'5', 16) == test + 0x4);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x16(test, u'5', u'6', 16) == test + 0x5);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x16(test, u'6', u'7', 16) == test + 0x6);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x16(test, u'7', u'8', 16) == test + 0x7);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x16(test, u'8', u'9', 16) == test + 0x8);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x16(test, u'9', u'a', 16) == test + 0x9);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x16(test, u'a', u'b', 16) == test + 0xa);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x16(test, u'b', u'c', 16) == test + 0xb);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x16(test, u'c', u'd', 16) == test + 0xc);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x16(test, u'd', u'e', 16) == test + 0xd);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x16(test, u'e', u'f', 16) == test + 0xe);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x16(test, u'f', u'\n', 16) == nullptr);
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x16(test, u'0', u'2', 10) == nullptr);
|
||||
}
|
||||
|
||||
void TestLongString2x16() {
|
||||
const size_t count = 257;
|
||||
char16_t test[count];
|
||||
for (size_t i = 0; i < count; ++i) {
|
||||
test[i] = static_cast<char16_t>(i);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < count - 2; ++i) {
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x16(test, static_cast<char16_t>(i),
|
||||
static_cast<char16_t>(i + 1),
|
||||
count - 1) == test + i);
|
||||
}
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x16(test, static_cast<char16_t>(count - 2),
|
||||
static_cast<char16_t>(count - 1),
|
||||
count - 1) == nullptr);
|
||||
}
|
||||
|
||||
void TestGauntlet2x8() {
|
||||
const size_t count = 256;
|
||||
char test[count * 2];
|
||||
// load in the evens
|
||||
for (size_t i = 0; i < count / 2; ++i) {
|
||||
test[i] = static_cast<char>(2 * i);
|
||||
}
|
||||
// load in the odds
|
||||
for (size_t i = 0; i < count / 2; ++i) {
|
||||
test[count / 2 + i] = static_cast<char>(2 * i + 1);
|
||||
}
|
||||
// load in evens and odds sequentially
|
||||
for (size_t i = 0; i < count; ++i) {
|
||||
test[count + i] = static_cast<char>(i);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < count - 1; ++i) {
|
||||
for (size_t j = 0; j < count - 2; ++j) {
|
||||
for (size_t k = 0; k < count - 1; ++k) {
|
||||
if (i > k + 1) {
|
||||
const char* expected1 = nullptr;
|
||||
const char* expected2 = nullptr;
|
||||
if (i > j + 1) {
|
||||
expected1 = test + j + count; // Add count to skip over odds/evens
|
||||
if (j >= k) {
|
||||
expected2 = test + j + count;
|
||||
}
|
||||
}
|
||||
char a = static_cast<char>(j);
|
||||
char b = static_cast<char>(j + 1);
|
||||
// Make sure it doesn't pick up any in the alternating odd/even
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x8(test + k, a, b, i - k + count) ==
|
||||
expected1);
|
||||
// Make sure we cover smaller inputs
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x8(test + k + count, a, b, i - k) ==
|
||||
expected2);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void TestGauntlet2x16() {
|
||||
const size_t count = 1024;
|
||||
char16_t test[count * 2];
|
||||
// load in the evens
|
||||
for (size_t i = 0; i < count / 2; ++i) {
|
||||
test[i] = static_cast<char16_t>(2 * i);
|
||||
}
|
||||
// load in the odds
|
||||
for (size_t i = 0; i < count / 2; ++i) {
|
||||
test[count / 2 + i] = static_cast<char16_t>(2 * i + 1);
|
||||
}
|
||||
// load in evens and odds sequentially
|
||||
for (size_t i = 0; i < count; ++i) {
|
||||
test[count + i] = static_cast<char16_t>(i);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < count - 1; ++i) {
|
||||
for (size_t j = 0; j < count - 2; ++j) {
|
||||
for (size_t k = 0; k < count - 1; ++k) {
|
||||
if (i > k + 1) {
|
||||
const char16_t* expected1 = nullptr;
|
||||
const char16_t* expected2 = nullptr;
|
||||
if (i > j + 1) {
|
||||
expected1 = test + j + count; // Add count to skip over odds/evens
|
||||
if (j >= k) {
|
||||
expected2 = test + j + count;
|
||||
}
|
||||
}
|
||||
char16_t a = static_cast<char16_t>(j);
|
||||
char16_t b = static_cast<char16_t>(j + 1);
|
||||
// Make sure it doesn't pick up any in the alternating odd/even
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x16(test + k, a, b, i - k + count) ==
|
||||
expected1);
|
||||
// Make sure we cover smaller inputs
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x16(test + k + count, a, b, i - k) ==
|
||||
expected2);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void TestSpecialCases() {
|
||||
// The following 4 asserts test the case where we do two overlapping checks,
|
||||
// where the first one ends with our first search character, and the second
|
||||
// one begins with our search character. Since they are overlapping, we want
|
||||
// to ensure that the search function doesn't carry the match from the
|
||||
// first check over to the second check.
|
||||
const char* test1 = "x123456789abcdey";
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x8(test1, 'y', 'x', 16) == nullptr);
|
||||
const char* test2 = "1000000000000000200000000000000030b000000000000a40";
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x8(test2, 'a', 'b', 52) == nullptr);
|
||||
const char16_t* test1wide = u"x123456y";
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x16(test1wide, 'y', 'x', 8) == nullptr);
|
||||
const char16_t* test2wide = u"100000002000000030b0000a40";
|
||||
MOZ_RELEASE_ASSERT(SIMD::memchr2x16(test2wide, 'a', 'b', 26) == nullptr);
|
||||
}
|
||||
|
||||
int main(void) {
|
||||
TestTinyString();
|
||||
TestShortString();
|
||||
TestMediumString();
|
||||
TestLongString();
|
||||
TestGauntlet();
|
||||
|
||||
TestTinyString16();
|
||||
TestShortString16();
|
||||
TestMediumString16();
|
||||
TestLongString16();
|
||||
TestGauntlet16();
|
||||
|
||||
TestTinyString2x8();
|
||||
TestShortString2x8();
|
||||
TestMediumString2x8();
|
||||
TestLongString2x8();
|
||||
|
||||
TestTinyString2x16();
|
||||
TestShortString2x16();
|
||||
TestMediumString2x16();
|
||||
TestLongString2x16();
|
||||
|
||||
TestSpecialCases();
|
||||
|
||||
// These are too slow to run all the time, but they should be run when making
|
||||
// meaningful changes just to be sure.
|
||||
// TestGauntlet2x8();
|
||||
// TestGauntlet2x16();
|
||||
|
||||
return 0;
|
||||
}
|
@ -57,7 +57,6 @@ CppUnitTests(
|
||||
"TestScopeExit",
|
||||
"TestSegmentedVector",
|
||||
"TestSHA1",
|
||||
"TestSIMD",
|
||||
"TestSmallPointerArray",
|
||||
"TestSplayTree",
|
||||
"TestTemplateLib",
|
||||
|
@ -78,8 +78,14 @@ if CONFIG["MOZ_WIDGET_TOOLKIT"]:
|
||||
"arm.h",
|
||||
"mips.h",
|
||||
"ppc.h",
|
||||
"SSE.h",
|
||||
]
|
||||
|
||||
if CONFIG["CPU_ARCH"].startswith("x86"):
|
||||
SOURCES += [
|
||||
"SSE.cpp",
|
||||
]
|
||||
|
||||
if CONFIG["CPU_ARCH"] == "arm":
|
||||
SOURCES += [
|
||||
"arm.cpp",
|
||||
|
@ -75,7 +75,6 @@ skip-if =
|
||||
[TestScopeExit]
|
||||
[TestSegmentedVector]
|
||||
[TestSHA1]
|
||||
[TestSIMD]
|
||||
[TestSmallPointerArray]
|
||||
[TestSaturate]
|
||||
[TestSplayTree]
|
||||
|
Loading…
Reference in New Issue
Block a user