Bug 1358297: Part 2. Helper class to mask ascii characters, letting us do a faster test for membership in a string/character set. r=froydnj

MozReview-Commit-ID: 1s3mYNVThoD

--HG--
extra : rebase_source : d9c845addaa4a7a326b787a99f5cc83abb567848
This commit is contained in:
Milan Sreckovic 2017-05-10 13:12:30 -04:00
parent 0bf3678bac
commit 40b6fdfb4b
4 changed files with 175 additions and 0 deletions

View File

@ -8,6 +8,7 @@ with Files('**'):
BUG_COMPONENT = ('Core', 'String')
EXPORTS += [
'nsASCIIMask.h',
'nsAString.h',
'nsCharTraits.h',
'nsDependentString.h',
@ -38,6 +39,7 @@ EXPORTS += [
]
UNIFIED_SOURCES += [
'nsASCIIMask.cpp',
'nsDependentString.cpp',
'nsDependentSubstring.cpp',
'nsPromiseFlatString.cpp',

View File

@ -0,0 +1,55 @@
/* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 2 -*-
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "nsASCIIMask.h"
namespace mozilla {
constexpr bool TestWhitespace(char c)
{
return c == '\f' || c == '\t' || c == '\r' || c == '\n' || c == ' ';
}
constexpr ASCIIMaskArray sWhitespaceMask = CreateASCIIMask(TestWhitespace);
constexpr bool TestCRLF(char c)
{
return c == '\r' || c == '\n';
}
constexpr ASCIIMaskArray sCRLFMask = CreateASCIIMask(TestCRLF);
constexpr bool TestCRLFTab(char c)
{
return c == '\r' || c == '\n' || c == '\t';
}
constexpr ASCIIMaskArray sCRLFTabMask = CreateASCIIMask(TestCRLFTab);
constexpr bool TestZeroToNine(char c)
{
return c == '0' || c == '1' || c == '2' || c == '3' || c == '4' ||
c == '5' || c == '6' || c == '7' || c == '8' || c == '9';
}
constexpr ASCIIMaskArray sZeroToNineMask = CreateASCIIMask(TestZeroToNine);
const ASCIIMaskArray& ASCIIMask::MaskWhitespace()
{
return sWhitespaceMask;
}
const ASCIIMaskArray& ASCIIMask::MaskCRLF()
{
return sCRLFMask;
}
const ASCIIMaskArray& ASCIIMask::MaskCRLFTab()
{
return sCRLFTabMask;
}
const ASCIIMaskArray& ASCIIMask::Mask0to9()
{
return sZeroToNineMask;
}
} // namespace mozilla

View File

@ -0,0 +1,70 @@
/* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 2 -*-
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifndef nsASCIIMask_h_
#define nsASCIIMask_h_
#include <array>
#include "mozilla/IndexSequence.h"
typedef std::array<bool, 128> ASCIIMaskArray;
namespace mozilla {
// Boolean arrays, fixed size and filled in at compile time, meant to
// record something about each of the (standard) ASCII characters.
// No extended ASCII for now, there has been no use case.
// If you have loops that go through a string character by character
// and test for equality to a certain set of characters before deciding
// on a course of action, chances are building up one of these arrays
// and using it is going to be faster, especially if the set of
// characters is more than one long, and known at compile time.
class ASCIIMask
{
public:
// Preset masks for some common character groups
// When testing, you must check if the index is < 128 or use IsMasked()
//
// if (someChar < 128 && MaskCRLF()[someChar]) this is \r or \n
static const ASCIIMaskArray& MaskCRLF();
static const ASCIIMaskArray& Mask0to9();
static const ASCIIMaskArray& MaskCRLFTab();
static const ASCIIMaskArray& MaskWhitespace();
static MOZ_ALWAYS_INLINE bool IsMasked(const ASCIIMaskArray& aMask, uint32_t aChar)
{
return aChar < 128 && aMask[aChar];
}
};
// Outside of the preset ones, use these templates to create more masks.
//
// The example creation will look like this:
//
// constexpr bool TestABC(char c) { return c == 'A' || c == 'B' || c == 'C'; }
// constexpr std::array<bool, 128> sABCMask = CreateASCIIMask(TestABC);
// ...
// if (someChar < 128 && sABCMask[someChar]) this is A or B or C
namespace details
{
template<typename F, size_t... Indices>
constexpr std::array<bool, 128> CreateASCIIMask(F fun, mozilla::IndexSequence<Indices...>)
{
return {{ fun(Indices)... }};
}
} // namespace details
template<typename F>
constexpr std::array<bool, 128> CreateASCIIMask(F fun)
{
return details::CreateASCIIMask(fun, mozilla::MakeIndexSequence<128>::Type{});
}
} // namespace mozilla
#endif // nsASCIIMask_h_

View File

@ -6,6 +6,7 @@
#include <stdio.h>
#include <stdlib.h>
#include "nsASCIIMask.h"
#include "nsString.h"
#include "nsStringBuffer.h"
#include "nsReadableUtils.h"
@ -1088,6 +1089,53 @@ TEST(Strings, Split)
EXPECT_EQ(counter, (size_t)2);
}
constexpr bool TestSomeChars(char c)
{
return c == 'a' || c == 'c' || c == 'e' || c == '7' ||
c == 'G' || c == 'Z' || c == '\b' || c == '?';
}
TEST(Strings,ASCIIMask)
{
const ASCIIMaskArray& maskCRLF = mozilla::ASCIIMask::MaskCRLF();
EXPECT_TRUE(maskCRLF['\n'] && mozilla::ASCIIMask::IsMasked(maskCRLF, '\n'));
EXPECT_TRUE(maskCRLF['\r'] && mozilla::ASCIIMask::IsMasked(maskCRLF, '\r'));
EXPECT_FALSE(maskCRLF['g'] || mozilla::ASCIIMask::IsMasked(maskCRLF, 'g'));
EXPECT_FALSE(maskCRLF[' '] || mozilla::ASCIIMask::IsMasked(maskCRLF, ' '));
EXPECT_FALSE(maskCRLF['\0'] || mozilla::ASCIIMask::IsMasked(maskCRLF, '\0'));
EXPECT_FALSE(mozilla::ASCIIMask::IsMasked(maskCRLF, 14324));
const ASCIIMaskArray& mask0to9 = mozilla::ASCIIMask::Mask0to9();
EXPECT_TRUE(mask0to9['9'] && mozilla::ASCIIMask::IsMasked(mask0to9, '9'));
EXPECT_TRUE(mask0to9['0'] && mozilla::ASCIIMask::IsMasked(mask0to9, '0'));
EXPECT_TRUE(mask0to9['4'] && mozilla::ASCIIMask::IsMasked(mask0to9, '4'));
EXPECT_FALSE(mask0to9['g'] || mozilla::ASCIIMask::IsMasked(mask0to9, 'g'));
EXPECT_FALSE(mask0to9[' '] || mozilla::ASCIIMask::IsMasked(mask0to9, ' '));
EXPECT_FALSE(mask0to9['\n'] || mozilla::ASCIIMask::IsMasked(mask0to9, '\n'));
EXPECT_FALSE(mask0to9['\0'] || mozilla::ASCIIMask::IsMasked(mask0to9, '\0'));
EXPECT_FALSE(mozilla::ASCIIMask::IsMasked(maskCRLF, 14324));
const ASCIIMaskArray& maskWS = mozilla::ASCIIMask::MaskWhitespace();
EXPECT_TRUE(maskWS[' '] && mozilla::ASCIIMask::IsMasked(maskWS, ' '));
EXPECT_TRUE(maskWS['\t'] && mozilla::ASCIIMask::IsMasked(maskWS, '\t'));
EXPECT_FALSE(maskWS['8'] || mozilla::ASCIIMask::IsMasked(maskWS, '8'));
EXPECT_FALSE(maskWS['\0'] || mozilla::ASCIIMask::IsMasked(maskWS, '\0'));
EXPECT_FALSE(mozilla::ASCIIMask::IsMasked(maskCRLF, 14324));
constexpr ASCIIMaskArray maskSome = mozilla::CreateASCIIMask(TestSomeChars);
EXPECT_TRUE(maskSome['a'] && mozilla::ASCIIMask::IsMasked(maskSome, 'a'));
EXPECT_TRUE(maskSome['c'] && mozilla::ASCIIMask::IsMasked(maskSome, 'c'));
EXPECT_TRUE(maskSome['e'] && mozilla::ASCIIMask::IsMasked(maskSome, 'e'));
EXPECT_TRUE(maskSome['7'] && mozilla::ASCIIMask::IsMasked(maskSome, '7'));
EXPECT_TRUE(maskSome['G'] && mozilla::ASCIIMask::IsMasked(maskSome, 'G'));
EXPECT_TRUE(maskSome['Z'] && mozilla::ASCIIMask::IsMasked(maskSome, 'Z'));
EXPECT_TRUE(maskSome['\b'] && mozilla::ASCIIMask::IsMasked(maskSome, '\b'));
EXPECT_TRUE(maskSome['?'] && mozilla::ASCIIMask::IsMasked(maskSome, '?'));
EXPECT_FALSE(maskSome['8'] || mozilla::ASCIIMask::IsMasked(maskSome, '8'));
EXPECT_FALSE(maskSome['\0'] || mozilla::ASCIIMask::IsMasked(maskSome, '\0'));
EXPECT_FALSE(mozilla::ASCIIMask::IsMasked(maskCRLF, 14324));
}
template <typename T> void
CompressWhitespaceHelper()
{