mirror of
https://github.com/darlinghq/darling-JavaScriptCore.git
synced 2024-11-23 04:09:40 +00:00
509 lines
16 KiB
C++
509 lines
16 KiB
C++
/*
|
|
* Copyright (C) 1999-2001, 2004 Harri Porten (porten@kde.org)
|
|
* Copyright (c) 2007-2020 Apple Inc. All rights reserved.
|
|
* Copyright (C) 2009 Torch Mobile, Inc.
|
|
* Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
|
|
*
|
|
* This library is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2 of the License, or (at your option) any later version.
|
|
*
|
|
* This library is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with this library; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
*
|
|
*/
|
|
|
|
#include "config.h"
|
|
#include "RegExp.h"
|
|
|
|
#include "RegExpCache.h"
|
|
#include "RegExpInlines.h"
|
|
#include "YarrJIT.h"
|
|
#include <wtf/Assertions.h>
|
|
|
|
namespace JSC {
|
|
|
|
const ClassInfo RegExp::s_info = { "RegExp", nullptr, nullptr, nullptr, CREATE_METHOD_TABLE(RegExp) };
|
|
|
|
#if REGEXP_FUNC_TEST_DATA_GEN
|
|
const char* const RegExpFunctionalTestCollector::s_fileName = "/tmp/RegExpTestsData";
|
|
RegExpFunctionalTestCollector* RegExpFunctionalTestCollector::s_instance = 0;
|
|
|
|
RegExpFunctionalTestCollector* RegExpFunctionalTestCollector::get()
|
|
{
|
|
if (!s_instance)
|
|
s_instance = new RegExpFunctionalTestCollector();
|
|
|
|
return s_instance;
|
|
}
|
|
|
|
void RegExpFunctionalTestCollector::outputOneTest(RegExp* regExp, const String& s, int startOffset, int* ovector, int result)
|
|
{
|
|
if ((!m_lastRegExp) || (m_lastRegExp != regExp)) {
|
|
m_lastRegExp = regExp;
|
|
fputc('/', m_file);
|
|
outputEscapedString(regExp->pattern(), true);
|
|
fputc('/', m_file);
|
|
if (regExp->global())
|
|
fputc('g', m_file);
|
|
if (regExp->ignoreCase())
|
|
fputc('i', m_file);
|
|
if (regExp->multiline())
|
|
fputc('m', m_file);
|
|
if (regExp->dotAll())
|
|
fputc('s', m_file);
|
|
if (regExp->unicode())
|
|
fputc('u', m_file);
|
|
if (regExp->sticky())
|
|
fputc('y', m_file);
|
|
fprintf(m_file, "\n");
|
|
}
|
|
|
|
fprintf(m_file, " \"");
|
|
outputEscapedString(s);
|
|
fprintf(m_file, "\", %d, %d, (", startOffset, result);
|
|
for (unsigned i = 0; i <= regExp->numSubpatterns(); i++) {
|
|
int subpatternBegin = ovector[i * 2];
|
|
int subpatternEnd = ovector[i * 2 + 1];
|
|
if (subpatternBegin == -1)
|
|
subpatternEnd = -1;
|
|
fprintf(m_file, "%d, %d", subpatternBegin, subpatternEnd);
|
|
if (i < regExp->numSubpatterns())
|
|
fputs(", ", m_file);
|
|
}
|
|
|
|
fprintf(m_file, ")\n");
|
|
fflush(m_file);
|
|
}
|
|
|
|
RegExpFunctionalTestCollector::RegExpFunctionalTestCollector()
|
|
{
|
|
m_file = fopen(s_fileName, "r+");
|
|
if (!m_file)
|
|
m_file = fopen(s_fileName, "w+");
|
|
|
|
fseek(m_file, 0L, SEEK_END);
|
|
}
|
|
|
|
RegExpFunctionalTestCollector::~RegExpFunctionalTestCollector()
|
|
{
|
|
fclose(m_file);
|
|
s_instance = 0;
|
|
}
|
|
|
|
void RegExpFunctionalTestCollector::outputEscapedString(const String& s, bool escapeSlash)
|
|
{
|
|
int len = s.length();
|
|
|
|
for (int i = 0; i < len; ++i) {
|
|
UChar c = s[i];
|
|
|
|
switch (c) {
|
|
case '\0':
|
|
fputs("\\0", m_file);
|
|
break;
|
|
case '\a':
|
|
fputs("\\a", m_file);
|
|
break;
|
|
case '\b':
|
|
fputs("\\b", m_file);
|
|
break;
|
|
case '\f':
|
|
fputs("\\f", m_file);
|
|
break;
|
|
case '\n':
|
|
fputs("\\n", m_file);
|
|
break;
|
|
case '\r':
|
|
fputs("\\r", m_file);
|
|
break;
|
|
case '\t':
|
|
fputs("\\t", m_file);
|
|
break;
|
|
case '\v':
|
|
fputs("\\v", m_file);
|
|
break;
|
|
case '/':
|
|
if (escapeSlash)
|
|
fputs("\\/", m_file);
|
|
else
|
|
fputs("/", m_file);
|
|
break;
|
|
case '\"':
|
|
fputs("\\\"", m_file);
|
|
break;
|
|
case '\\':
|
|
fputs("\\\\", m_file);
|
|
break;
|
|
case '\?':
|
|
fputs("\?", m_file);
|
|
break;
|
|
default:
|
|
if (c > 0x7f)
|
|
fprintf(m_file, "\\u%04x", c);
|
|
else
|
|
fputc(c, m_file);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
|
|
RegExp::RegExp(VM& vm, const String& patternString, OptionSet<Yarr::Flags> flags)
|
|
: JSCell(vm, vm.regExpStructure.get())
|
|
, m_patternString(patternString)
|
|
, m_flags(flags)
|
|
{
|
|
ASSERT(m_flags != Yarr::Flags::DeletedValue);
|
|
}
|
|
|
|
void RegExp::finishCreation(VM& vm)
|
|
{
|
|
Base::finishCreation(vm);
|
|
Yarr::YarrPattern pattern(m_patternString, m_flags, m_constructionErrorCode);
|
|
if (!isValid()) {
|
|
m_state = ParseError;
|
|
return;
|
|
}
|
|
|
|
m_numSubpatterns = pattern.m_numSubpatterns;
|
|
if (!pattern.m_captureGroupNames.isEmpty() || !pattern.m_namedGroupToParenIndex.isEmpty()) {
|
|
m_rareData = makeUnique<RareData>();
|
|
m_rareData->m_captureGroupNames.swap(pattern.m_captureGroupNames);
|
|
m_rareData->m_namedGroupToParenIndex.swap(pattern.m_namedGroupToParenIndex);
|
|
}
|
|
}
|
|
|
|
void RegExp::destroy(JSCell* cell)
|
|
{
|
|
RegExp* thisObject = static_cast<RegExp*>(cell);
|
|
#if REGEXP_FUNC_TEST_DATA_GEN
|
|
RegExpFunctionalTestCollector::get()->clearRegExp(this);
|
|
#endif
|
|
thisObject->RegExp::~RegExp();
|
|
}
|
|
|
|
size_t RegExp::estimatedSize(JSCell* cell, VM& vm)
|
|
{
|
|
RegExp* thisObject = static_cast<RegExp*>(cell);
|
|
size_t regexDataSize = thisObject->m_regExpBytecode ? thisObject->m_regExpBytecode->estimatedSizeInBytes() : 0;
|
|
#if ENABLE(YARR_JIT)
|
|
if (auto* jitCode = thisObject->m_regExpJITCode.get())
|
|
regexDataSize += jitCode->size();
|
|
#endif
|
|
return Base::estimatedSize(cell, vm) + regexDataSize;
|
|
}
|
|
|
|
RegExp* RegExp::createWithoutCaching(VM& vm, const String& patternString, OptionSet<Yarr::Flags> flags)
|
|
{
|
|
RegExp* regExp = new (NotNull, allocateCell<RegExp>(vm.heap)) RegExp(vm, patternString, flags);
|
|
regExp->finishCreation(vm);
|
|
return regExp;
|
|
}
|
|
|
|
RegExp* RegExp::create(VM& vm, const String& patternString, OptionSet<Yarr::Flags> flags)
|
|
{
|
|
return vm.regExpCache()->lookupOrCreate(patternString, flags);
|
|
}
|
|
|
|
|
|
static std::unique_ptr<Yarr::BytecodePattern> byteCodeCompilePattern(VM* vm, Yarr::YarrPattern& pattern, Yarr::ErrorCode& errorCode)
|
|
{
|
|
return Yarr::byteCompile(pattern, &vm->m_regExpAllocator, errorCode, &vm->m_regExpAllocatorLock);
|
|
}
|
|
|
|
void RegExp::byteCodeCompileIfNecessary(VM* vm)
|
|
{
|
|
if (m_regExpBytecode)
|
|
return;
|
|
|
|
Yarr::YarrPattern pattern(m_patternString, m_flags, m_constructionErrorCode);
|
|
if (hasError(m_constructionErrorCode)) {
|
|
m_state = ParseError;
|
|
return;
|
|
}
|
|
ASSERT(m_numSubpatterns == pattern.m_numSubpatterns);
|
|
|
|
m_regExpBytecode = byteCodeCompilePattern(vm, pattern, m_constructionErrorCode);
|
|
if (!m_regExpBytecode) {
|
|
m_state = ParseError;
|
|
return;
|
|
}
|
|
}
|
|
|
|
void RegExp::compile(VM* vm, Yarr::YarrCharSize charSize)
|
|
{
|
|
auto locker = holdLock(cellLock());
|
|
|
|
Yarr::YarrPattern pattern(m_patternString, m_flags, m_constructionErrorCode);
|
|
if (hasError(m_constructionErrorCode)) {
|
|
m_state = ParseError;
|
|
return;
|
|
}
|
|
ASSERT(m_numSubpatterns == pattern.m_numSubpatterns);
|
|
|
|
if (!hasCode()) {
|
|
ASSERT(m_state == NotCompiled);
|
|
vm->regExpCache()->addToStrongCache(this);
|
|
m_state = ByteCode;
|
|
}
|
|
|
|
#if ENABLE(YARR_JIT)
|
|
if (!pattern.containsUnsignedLengthPattern() && Options::useRegExpJIT()
|
|
#if !ENABLE(YARR_JIT_BACKREFERENCES)
|
|
&& !pattern.m_containsBackreferences
|
|
#endif
|
|
) {
|
|
auto& jitCode = ensureRegExpJITCode();
|
|
Yarr::jitCompile(pattern, m_patternString, charSize, vm, jitCode);
|
|
if (!jitCode.failureReason()) {
|
|
m_state = JITCode;
|
|
return;
|
|
}
|
|
}
|
|
#else
|
|
UNUSED_PARAM(charSize);
|
|
#endif
|
|
|
|
if (Options::dumpCompiledRegExpPatterns())
|
|
dataLog("Can't JIT this regular expression: \"", m_patternString, "\"\n");
|
|
|
|
m_state = ByteCode;
|
|
m_regExpBytecode = byteCodeCompilePattern(vm, pattern, m_constructionErrorCode);
|
|
if (!m_regExpBytecode) {
|
|
m_state = ParseError;
|
|
return;
|
|
}
|
|
}
|
|
|
|
int RegExp::match(JSGlobalObject* globalObject, const String& s, unsigned startOffset, Vector<int>& ovector)
|
|
{
|
|
return matchInline(globalObject, globalObject->vm(), s, startOffset, ovector);
|
|
}
|
|
|
|
bool RegExp::matchConcurrently(
|
|
VM& vm, const String& s, unsigned startOffset, int& position, Vector<int>& ovector)
|
|
{
|
|
auto locker = holdLock(cellLock());
|
|
|
|
if (!hasCodeFor(s.is8Bit() ? Yarr::Char8 : Yarr::Char16))
|
|
return false;
|
|
|
|
position = matchInline<Vector<int>&, Yarr::MatchFrom::CompilerThread>(nullptr, vm, s, startOffset, ovector);
|
|
if (m_state == ParseError)
|
|
return false;
|
|
return true;
|
|
}
|
|
|
|
void RegExp::compileMatchOnly(VM* vm, Yarr::YarrCharSize charSize)
|
|
{
|
|
auto locker = holdLock(cellLock());
|
|
|
|
Yarr::YarrPattern pattern(m_patternString, m_flags, m_constructionErrorCode);
|
|
if (hasError(m_constructionErrorCode)) {
|
|
m_state = ParseError;
|
|
return;
|
|
}
|
|
ASSERT(m_numSubpatterns == pattern.m_numSubpatterns);
|
|
|
|
if (!hasCode()) {
|
|
ASSERT(m_state == NotCompiled);
|
|
vm->regExpCache()->addToStrongCache(this);
|
|
m_state = ByteCode;
|
|
}
|
|
|
|
#if ENABLE(YARR_JIT)
|
|
if (!pattern.containsUnsignedLengthPattern() && Options::useRegExpJIT()
|
|
#if !ENABLE(YARR_JIT_BACKREFERENCES)
|
|
&& !pattern.m_containsBackreferences
|
|
#endif
|
|
) {
|
|
auto& jitCode = ensureRegExpJITCode();
|
|
Yarr::jitCompile(pattern, m_patternString, charSize, vm, jitCode, Yarr::MatchOnly);
|
|
if (!jitCode.failureReason()) {
|
|
m_state = JITCode;
|
|
return;
|
|
}
|
|
}
|
|
#else
|
|
UNUSED_PARAM(charSize);
|
|
#endif
|
|
|
|
if (Options::dumpCompiledRegExpPatterns())
|
|
dataLog("Can't JIT this regular expression: \"", m_patternString, "\"\n");
|
|
|
|
m_state = ByteCode;
|
|
m_regExpBytecode = byteCodeCompilePattern(vm, pattern, m_constructionErrorCode);
|
|
if (!m_regExpBytecode) {
|
|
m_state = ParseError;
|
|
return;
|
|
}
|
|
}
|
|
|
|
MatchResult RegExp::match(JSGlobalObject* globalObject, const String& s, unsigned startOffset)
|
|
{
|
|
return matchInline(globalObject, globalObject->vm(), s, startOffset);
|
|
}
|
|
|
|
bool RegExp::matchConcurrently(VM& vm, const String& s, unsigned startOffset, MatchResult& result)
|
|
{
|
|
auto locker = holdLock(cellLock());
|
|
|
|
if (!hasMatchOnlyCodeFor(s.is8Bit() ? Yarr::Char8 : Yarr::Char16))
|
|
return false;
|
|
|
|
result = matchInline<Yarr::MatchFrom::CompilerThread>(nullptr, vm, s, startOffset);
|
|
return true;
|
|
}
|
|
|
|
void RegExp::deleteCode()
|
|
{
|
|
auto locker = holdLock(cellLock());
|
|
|
|
if (!hasCode())
|
|
return;
|
|
m_state = NotCompiled;
|
|
#if ENABLE(YARR_JIT)
|
|
if (m_regExpJITCode)
|
|
m_regExpJITCode->clear();
|
|
#endif
|
|
m_regExpBytecode = nullptr;
|
|
}
|
|
|
|
#if ENABLE(YARR_JIT_DEBUG)
|
|
void RegExp::matchCompareWithInterpreter(const String& s, int startOffset, int* offsetVector, int jitResult)
|
|
{
|
|
int offsetVectorSize = (m_numSubpatterns + 1) * 2;
|
|
Vector<int> interpreterOvector;
|
|
interpreterOvector.resize(offsetVectorSize);
|
|
int* interpreterOffsetVector = interpreterOvector.data();
|
|
int interpreterResult = 0;
|
|
int differences = 0;
|
|
|
|
// Initialize interpreterOffsetVector with the return value (index 0) and the
|
|
// first subpattern start indicies (even index values) set to -1.
|
|
// No need to init the subpattern end indicies.
|
|
for (unsigned j = 0, i = 0; i < m_numSubpatterns + 1; j += 2, i++)
|
|
interpreterOffsetVector[j] = -1;
|
|
|
|
interpreterResult = Yarr::interpret(m_regExpBytecode.get(), s, startOffset, reinterpret_cast<unsigned*>(interpreterOffsetVector));
|
|
|
|
if (jitResult != interpreterResult)
|
|
differences++;
|
|
|
|
for (unsigned j = 2, i = 0; i < m_numSubpatterns; j +=2, i++)
|
|
if ((offsetVector[j] != interpreterOffsetVector[j])
|
|
|| ((offsetVector[j] >= 0) && (offsetVector[j+1] != interpreterOffsetVector[j+1])))
|
|
differences++;
|
|
|
|
if (differences) {
|
|
dataLogF("RegExp Discrepency for /%s/\n string input ", pattern().utf8().data());
|
|
unsigned segmentLen = s.length() - static_cast<unsigned>(startOffset);
|
|
|
|
dataLogF((segmentLen < 150) ? "\"%s\"\n" : "\"%148s...\"\n", s.utf8().data() + startOffset);
|
|
|
|
if (jitResult != interpreterResult) {
|
|
dataLogF(" JIT result = %d, interpreted result = %d\n", jitResult, interpreterResult);
|
|
differences--;
|
|
} else {
|
|
dataLogF(" Correct result = %d\n", jitResult);
|
|
}
|
|
|
|
if (differences) {
|
|
for (unsigned j = 2, i = 0; i < m_numSubpatterns; j +=2, i++) {
|
|
if (offsetVector[j] != interpreterOffsetVector[j])
|
|
dataLogF(" JIT offset[%d] = %d, interpreted offset[%d] = %d\n", j, offsetVector[j], j, interpreterOffsetVector[j]);
|
|
if ((offsetVector[j] >= 0) && (offsetVector[j+1] != interpreterOffsetVector[j+1]))
|
|
dataLogF(" JIT offset[%d] = %d, interpreted offset[%d] = %d\n", j+1, offsetVector[j+1], j+1, interpreterOffsetVector[j+1]);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
|
|
#if ENABLE(REGEXP_TRACING)
|
|
void RegExp::printTraceData()
|
|
{
|
|
char formattedPattern[41];
|
|
char rawPattern[41];
|
|
|
|
strncpy(rawPattern, pattern().utf8().data(), 40);
|
|
rawPattern[40]= '\0';
|
|
|
|
int pattLen = strlen(rawPattern);
|
|
|
|
snprintf(formattedPattern, 41, (pattLen <= 38) ? "/%.38s/" : "/%.36s...", rawPattern);
|
|
|
|
#if ENABLE(YARR_JIT)
|
|
const size_t jitAddrSize = 20;
|
|
char jit8BitMatchOnlyAddr[jitAddrSize] { };
|
|
char jit16BitMatchOnlyAddr[jitAddrSize] { };
|
|
char jit8BitMatchAddr[jitAddrSize] { };
|
|
char jit16BitMatchAddr[jitAddrSize] { };
|
|
switch (m_state) {
|
|
case ParseError:
|
|
case NotCompiled:
|
|
break;
|
|
case ByteCode:
|
|
snprintf(jit8BitMatchOnlyAddr, jitAddrSize, "fallback ");
|
|
snprintf(jit16BitMatchOnlyAddr, jitAddrSize, "---- ");
|
|
snprintf(jit8BitMatchAddr, jitAddrSize, "fallback ");
|
|
snprintf(jit16BitMatchAddr, jitAddrSize, "---- ");
|
|
break;
|
|
case JITCode: {
|
|
Yarr::YarrCodeBlock& codeBlock = *m_regExpJITCode.get();
|
|
snprintf(jit8BitMatchOnlyAddr, jitAddrSize, "0x%014lx", reinterpret_cast<uintptr_t>(codeBlock.get8BitMatchOnlyAddr()));
|
|
snprintf(jit16BitMatchOnlyAddr, jitAddrSize, "0x%014lx", reinterpret_cast<uintptr_t>(codeBlock.get16BitMatchOnlyAddr()));
|
|
snprintf(jit8BitMatchAddr, jitAddrSize, "0x%014lx", reinterpret_cast<uintptr_t>(codeBlock.get8BitMatchAddr()));
|
|
snprintf(jit16BitMatchAddr, jitAddrSize, "0x%014lx", reinterpret_cast<uintptr_t>(codeBlock.get16BitMatchAddr()));
|
|
break;
|
|
}
|
|
}
|
|
#else
|
|
const char* jit8BitMatchOnlyAddr = "JIT Off";
|
|
const char* jit16BitMatchOnlyAddr = "";
|
|
const char* jit8BitMatchAddr = "JIT Off";
|
|
const char* jit16BitMatchAddr = "";
|
|
#endif
|
|
unsigned averageMatchOnlyStringLen = (unsigned)(m_rtMatchOnlyTotalSubjectStringLen / m_rtMatchOnlyCallCount);
|
|
unsigned averageMatchStringLen = (unsigned)(m_rtMatchTotalSubjectStringLen / m_rtMatchCallCount);
|
|
|
|
printf("%-40.40s %16.16s %16.16s %10d %10d %10u\n", formattedPattern, jit8BitMatchOnlyAddr, jit16BitMatchOnlyAddr, m_rtMatchOnlyCallCount, m_rtMatchOnlyFoundCount, averageMatchOnlyStringLen);
|
|
printf(" %16.16s %16.16s %10d %10d %10u\n", jit8BitMatchAddr, jit16BitMatchAddr, m_rtMatchCallCount, m_rtMatchFoundCount, averageMatchStringLen);
|
|
}
|
|
#endif
|
|
|
|
static CString regexpToSourceString(const RegExp* regExp)
|
|
{
|
|
char postfix[7] = { '/', 0, 0, 0, 0, 0, 0 };
|
|
int index = 1;
|
|
if (regExp->global())
|
|
postfix[index++] = 'g';
|
|
if (regExp->ignoreCase())
|
|
postfix[index++] = 'i';
|
|
if (regExp->multiline())
|
|
postfix[index] = 'm';
|
|
if (regExp->dotAll())
|
|
postfix[index++] = 's';
|
|
if (regExp->unicode())
|
|
postfix[index++] = 'u';
|
|
if (regExp->sticky())
|
|
postfix[index++] = 'y';
|
|
|
|
return toCString("/", regExp->pattern().impl(), postfix);
|
|
}
|
|
|
|
void RegExp::dumpToStream(const JSCell* cell, PrintStream& out)
|
|
{
|
|
out.print(regexpToSourceString(jsCast<const RegExp*>(cell)));
|
|
}
|
|
|
|
} // namespace JSC
|