darling-JavaScriptCore/testRegExp.cpp
2018-01-02 21:17:19 -08:00

536 lines
15 KiB
C++

/*
* Copyright (C) 2011, 2015 Apple Inc. All rights reserved.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public License
* along with this library; see the file COPYING.LIB. If not, write to
* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
#include "config.h"
#include "RegExp.h"
#include <wtf/CurrentTime.h>
#include "InitializeThreading.h"
#include "JSCInlines.h"
#include "JSGlobalObject.h"
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <wtf/text/StringBuilder.h>
#if !OS(WINDOWS)
#include <unistd.h>
#endif
#if HAVE(SYS_TIME_H)
#include <sys/time.h>
#endif
#if COMPILER(MSVC)
#include <crtdbg.h>
#include <mmsystem.h>
#include <windows.h>
#endif
const int MaxLineLength = 100 * 1024;
using namespace JSC;
using namespace WTF;
struct CommandLine {
CommandLine()
: interactive(false)
, verbose(false)
{
}
bool interactive;
bool verbose;
Vector<String> arguments;
Vector<String> files;
};
class StopWatch {
public:
void start();
void stop();
long getElapsedMS(); // call stop() first
private:
double m_startTime;
double m_stopTime;
};
void StopWatch::start()
{
m_startTime = monotonicallyIncreasingTime();
}
void StopWatch::stop()
{
m_stopTime = monotonicallyIncreasingTime();
}
long StopWatch::getElapsedMS()
{
return static_cast<long>((m_stopTime - m_startTime) * 1000);
}
struct RegExpTest {
RegExpTest()
: offset(0)
, result(0)
{
}
String subject;
int offset;
int result;
Vector<int, 32> expectVector;
};
class GlobalObject : public JSGlobalObject {
private:
GlobalObject(VM&, Structure*, const Vector<String>& arguments);
public:
typedef JSGlobalObject Base;
static GlobalObject* create(VM& vm, Structure* structure, const Vector<String>& arguments)
{
GlobalObject* globalObject = new (NotNull, allocateCell<GlobalObject>(vm.heap)) GlobalObject(vm, structure, arguments);
return globalObject;
}
DECLARE_INFO;
static const bool needsDestructor = false;
static Structure* createStructure(VM& vm, JSValue prototype)
{
return Structure::create(vm, 0, prototype, TypeInfo(GlobalObjectType, StructureFlags), info());
}
protected:
void finishCreation(VM& vm, const Vector<String>& arguments)
{
Base::finishCreation(vm);
UNUSED_PARAM(arguments);
}
};
const ClassInfo GlobalObject::s_info = { "global", &JSGlobalObject::s_info, nullptr, nullptr, CREATE_METHOD_TABLE(GlobalObject) };
GlobalObject::GlobalObject(VM& vm, Structure* structure, const Vector<String>& arguments)
: JSGlobalObject(vm, structure)
{
finishCreation(vm, arguments);
}
// Use SEH for Release builds only to get rid of the crash report dialog
// (luckily the same tests fail in Release and Debug builds so far). Need to
// be in a separate main function because the realMain function requires object
// unwinding.
#if COMPILER(MSVC) && !defined(_DEBUG)
#define TRY __try {
#define EXCEPT(x) } __except (EXCEPTION_EXECUTE_HANDLER) { x; }
#else
#define TRY
#define EXCEPT(x)
#endif
int realMain(int argc, char** argv);
int main(int argc, char** argv)
{
#if OS(WINDOWS)
// Cygwin calls ::SetErrorMode(SEM_FAILCRITICALERRORS), which we will inherit. This is bad for
// testing/debugging, as it causes the post-mortem debugger not to be invoked. We reset the
// error mode here to work around Cygwin's behavior. See <http://webkit.org/b/55222>.
::SetErrorMode(0);
#if defined(_DEBUG)
_CrtSetReportFile(_CRT_WARN, _CRTDBG_FILE_STDERR);
_CrtSetReportMode(_CRT_WARN, _CRTDBG_MODE_FILE);
_CrtSetReportFile(_CRT_ERROR, _CRTDBG_FILE_STDERR);
_CrtSetReportMode(_CRT_ERROR, _CRTDBG_MODE_FILE);
_CrtSetReportFile(_CRT_ASSERT, _CRTDBG_FILE_STDERR);
_CrtSetReportMode(_CRT_ASSERT, _CRTDBG_MODE_FILE);
#endif
timeBeginPeriod(1);
#endif
// Initialize JSC before getting VM.
JSC::initializeThreading();
// We can't use destructors in the following code because it uses Windows
// Structured Exception Handling
int res = 0;
TRY
res = realMain(argc, argv);
EXCEPT(res = 3)
return res;
}
static bool testOneRegExp(VM& vm, RegExp* regexp, RegExpTest* regExpTest, bool verbose, unsigned int lineNumber)
{
bool result = true;
Vector<int> outVector;
outVector.resize(regExpTest->expectVector.size());
int matchResult = regexp->match(vm, regExpTest->subject, regExpTest->offset, outVector);
if (matchResult != regExpTest->result) {
result = false;
if (verbose)
printf("Line %d: results mismatch - expected %d got %d\n", lineNumber, regExpTest->result, matchResult);
} else if (matchResult != -1) {
if (outVector.size() != regExpTest->expectVector.size()) {
result = false;
if (verbose) {
#if OS(WINDOWS)
printf("Line %d: output vector size mismatch - expected %Iu got %Iu\n", lineNumber, regExpTest->expectVector.size(), outVector.size());
#else
printf("Line %d: output vector size mismatch - expected %zu got %zu\n", lineNumber, regExpTest->expectVector.size(), outVector.size());
#endif
}
} else if (outVector.size() % 2) {
result = false;
if (verbose) {
#if OS(WINDOWS)
printf("Line %d: output vector size is odd (%Iu), should be even\n", lineNumber, outVector.size());
#else
printf("Line %d: output vector size is odd (%zu), should be even\n", lineNumber, outVector.size());
#endif
}
} else {
// Check in pairs since the first value of the pair could be -1 in which case the second doesn't matter.
size_t pairCount = outVector.size() / 2;
for (size_t i = 0; i < pairCount; ++i) {
size_t startIndex = i*2;
if (outVector[startIndex] != regExpTest->expectVector[startIndex]) {
result = false;
if (verbose) {
#if OS(WINDOWS)
printf("Line %d: output vector mismatch at index %Iu - expected %d got %d\n", lineNumber, startIndex, regExpTest->expectVector[startIndex], outVector[startIndex]);
#else
printf("Line %d: output vector mismatch at index %zu - expected %d got %d\n", lineNumber, startIndex, regExpTest->expectVector[startIndex], outVector[startIndex]);
#endif
}
}
if ((i > 0) && (regExpTest->expectVector[startIndex] != -1) && (outVector[startIndex+1] != regExpTest->expectVector[startIndex+1])) {
result = false;
if (verbose) {
#if OS(WINDOWS)
printf("Line %d: output vector mismatch at index %Iu - expected %d got %d\n", lineNumber, startIndex + 1, regExpTest->expectVector[startIndex + 1], outVector[startIndex + 1]);
#else
printf("Line %d: output vector mismatch at index %zu - expected %d got %d\n", lineNumber, startIndex + 1, regExpTest->expectVector[startIndex + 1], outVector[startIndex + 1]);
#endif
}
}
}
}
}
return result;
}
static int scanString(char* buffer, int bufferLength, StringBuilder& builder, char termChar)
{
bool escape = false;
for (int i = 0; i < bufferLength; ++i) {
UChar c = buffer[i];
if (escape) {
switch (c) {
case '0':
c = '\0';
break;
case 'a':
c = '\a';
break;
case 'b':
c = '\b';
break;
case 'f':
c = '\f';
break;
case 'n':
c = '\n';
break;
case 'r':
c = '\r';
break;
case 't':
c = '\t';
break;
case 'v':
c = '\v';
break;
case '\\':
c = '\\';
break;
case '?':
c = '\?';
break;
case 'u':
if ((i + 4) >= bufferLength)
return -1;
unsigned int charValue;
if (sscanf(buffer+i+1, "%04x", &charValue) != 1)
return -1;
c = static_cast<UChar>(charValue);
i += 4;
break;
}
builder.append(c);
escape = false;
} else {
if (c == termChar)
return i;
if (c == '\\')
escape = true;
else
builder.append(c);
}
}
return -1;
}
static RegExp* parseRegExpLine(VM& vm, char* line, int lineLength)
{
StringBuilder pattern;
if (line[0] != '/')
return 0;
int i = scanString(line + 1, lineLength - 1, pattern, '/') + 1;
if ((i >= lineLength) || (line[i] != '/'))
return 0;
++i;
RegExp* r = RegExp::create(vm, pattern.toString(), regExpFlags(line + i));
if (r->isValid())
return r;
return nullptr;
}
static RegExpTest* parseTestLine(char* line, int lineLength)
{
StringBuilder subjectString;
if ((line[0] != ' ') || (line[1] != '"'))
return 0;
int i = scanString(line + 2, lineLength - 2, subjectString, '"') + 2;
if ((i >= (lineLength - 2)) || (line[i] != '"') || (line[i+1] != ',') || (line[i+2] != ' '))
return 0;
i += 3;
int offset;
if (sscanf(line + i, "%d, ", &offset) != 1)
return 0;
while (line[i] && line[i] != ' ')
++i;
++i;
int matchResult;
if (sscanf(line + i, "%d, ", &matchResult) != 1)
return 0;
while (line[i] && line[i] != ' ')
++i;
++i;
if (line[i++] != '(')
return 0;
int start, end;
RegExpTest* result = new RegExpTest();
result->subject = subjectString.toString();
result->offset = offset;
result->result = matchResult;
while (line[i] && line[i] != ')') {
if (sscanf(line + i, "%d, %d", &start, &end) != 2) {
delete result;
return 0;
}
result->expectVector.append(start);
result->expectVector.append(end);
while (line[i] && (line[i] != ',') && (line[i] != ')'))
i++;
i++;
while (line[i] && (line[i] != ',') && (line[i] != ')'))
i++;
if (line[i] == ')')
break;
if (!line[i] || (line[i] != ',')) {
delete result;
return 0;
}
i++;
}
return result;
}
static bool runFromFiles(GlobalObject* globalObject, const Vector<String>& files, bool verbose)
{
String script;
String fileName;
Vector<char> scriptBuffer;
unsigned tests = 0;
unsigned failures = 0;
char* lineBuffer = new char[MaxLineLength + 1];
VM& vm = globalObject->vm();
bool success = true;
for (size_t i = 0; i < files.size(); i++) {
FILE* testCasesFile = fopen(files[i].utf8().data(), "rb");
if (!testCasesFile) {
printf("Unable to open test data file \"%s\"\n", files[i].utf8().data());
continue;
}
RegExp* regexp = 0;
size_t lineLength = 0;
char* linePtr = 0;
unsigned int lineNumber = 0;
while ((linePtr = fgets(&lineBuffer[0], MaxLineLength, testCasesFile))) {
lineLength = strlen(linePtr);
if (linePtr[lineLength - 1] == '\n') {
linePtr[lineLength - 1] = '\0';
--lineLength;
}
++lineNumber;
if (linePtr[0] == '#')
continue;
if (linePtr[0] == '/') {
regexp = parseRegExpLine(vm, linePtr, lineLength);
} else if (linePtr[0] == ' ') {
RegExpTest* regExpTest = parseTestLine(linePtr, lineLength);
if (regexp && regExpTest) {
++tests;
if (!testOneRegExp(vm, regexp, regExpTest, verbose, lineNumber)) {
failures++;
printf("Failure on line %u\n", lineNumber);
}
}
if (regExpTest)
delete regExpTest;
} else if (linePtr[0] == '-') {
tests++;
regexp = 0; // Reset the live regexp to avoid confusing other subsequent tests
bool successfullyParsed = parseRegExpLine(vm, linePtr + 1, lineLength - 1);
if (successfullyParsed) {
failures++;
fprintf(stderr, "Failure on line %u. '%s' is not a valid regexp\n", lineNumber, linePtr + 1);
}
}
}
fclose(testCasesFile);
}
if (failures)
printf("%u tests run, %u failures\n", tests, failures);
else
printf("%u tests passed\n", tests);
delete[] lineBuffer;
#if ENABLE(REGEXP_TRACING)
vm.dumpRegExpTrace();
#endif
return success;
}
#define RUNNING_FROM_XCODE 0
static NO_RETURN void printUsageStatement(bool help = false)
{
fprintf(stderr, "Usage: regexp_test [options] file\n");
fprintf(stderr, " -h|--help Prints this help message\n");
fprintf(stderr, " -v|--verbose Verbose output\n");
exit(help ? EXIT_SUCCESS : EXIT_FAILURE);
}
static void parseArguments(int argc, char** argv, CommandLine& options)
{
int i = 1;
for (; i < argc; ++i) {
const char* arg = argv[i];
if (!strcmp(arg, "-h") || !strcmp(arg, "--help"))
printUsageStatement(true);
if (!strcmp(arg, "-v") || !strcmp(arg, "--verbose"))
options.verbose = true;
else
options.files.append(argv[i]);
}
for (; i < argc; ++i)
options.arguments.append(argv[i]);
}
int realMain(int argc, char** argv)
{
VM* vm = &VM::create(LargeHeap).leakRef();
JSLockHolder locker(vm);
CommandLine options;
parseArguments(argc, argv, options);
GlobalObject* globalObject = GlobalObject::create(*vm, GlobalObject::createStructure(*vm, jsNull()), options.arguments);
bool success = runFromFiles(globalObject, options.files, options.verbose);
return success ? 0 : 3;
}
#if OS(WINDOWS)
extern "C" __declspec(dllexport) int WINAPI dllLauncherEntryPoint(int argc, const char* argv[])
{
return main(argc, const_cast<char**>(argv));
}
#endif