diff --git a/js/src/Makefile.in b/js/src/Makefile.in index 8023b1750b05..71f35a53d881 100644 --- a/js/src/Makefile.in +++ b/js/src/Makefile.in @@ -362,7 +362,6 @@ CPPSRCS += MethodJIT.cpp \ Retcon.cpp \ TrampolineCompiler.cpp \ $(NULL) -# PICStubCompiler.cpp \ ifeq (86, $(findstring 86,$(TARGET_CPU))) ifeq (x86_64, $(TARGET_CPU)) @@ -420,14 +419,17 @@ ifeq (,$(filter arm% sparc %86 x86_64,$(TARGET_CPU))) VPATH += $(srcdir)/assembler \ $(srcdir)/assembler/wtf \ - $(srcdir)/yarr/pcre \ + $(srcdir)/yarr\ $(NULL) -CPPSRCS += pcre_compile.cpp \ - pcre_exec.cpp \ - pcre_tables.cpp \ - pcre_xclass.cpp \ - pcre_ucp_searchfuncs.cpp \ +CPPSRCS += \ + Assertions.cpp \ + OSAllocatorPosix.cpp \ + OSAllocatorWin.cpp \ + PageBlock.cpp \ + YarrInterpreter.cpp \ + YarrPattern.cpp \ + YarrSyntaxChecker.cpp \ $(NULL) else @@ -440,9 +442,6 @@ VPATH += $(srcdir)/assembler \ $(srcdir)/assembler/assembler \ $(srcdir)/methodjit \ $(srcdir)/yarr \ - $(srcdir)/yarr/yarr \ - $(srcdir)/yarr/pcre \ - $(srcdir)/yarr/wtf \ $(NONE) CPPSRCS += Assertions.cpp \ @@ -451,16 +450,16 @@ CPPSRCS += Assertions.cpp \ ExecutableAllocatorOS2.cpp \ ExecutableAllocator.cpp \ ARMAssembler.cpp \ - Logging.cpp \ + Logging.cpp \ MacroAssemblerARM.cpp \ MacroAssemblerX86Common.cpp \ - RegexCompiler.cpp \ - RegexJIT.cpp \ - pcre_compile.cpp \ - pcre_exec.cpp \ - pcre_tables.cpp \ - pcre_xclass.cpp \ - pcre_ucp_searchfuncs.cpp \ + OSAllocatorPosix.cpp \ + OSAllocatorWin.cpp \ + PageBlock.cpp \ + YarrInterpreter.cpp \ + YarrJIT.cpp \ + YarrPattern.cpp \ + YarrSyntaxChecker.cpp \ $(NONE) ifeq (86, $(findstring 86,$(TARGET_CPU))) @@ -653,7 +652,7 @@ check-malloc-function-usage: $(filter-out %jsalloc.h %jscntxt.h %jsutil.h, $(ALL # We desire these numbers to go down, not up. See "User guide to memory # management within SpiderMonkey" in jsutil.h. - $(srcdir)/config/check_source_count.py OffTheBooks:: 52 \ + $(srcdir)/config/check_source_count.py OffTheBooks:: 54 \ "in Makefile.in" "{cx,rt}->{new_,new_array,malloc_,calloc_,realloc_}" $^ # This should go to zero, if possible. $(srcdir)/config/check_source_count.py UnwantedForeground:: 34 \ diff --git a/js/src/assembler/assembler/ARMAssembler.h b/js/src/assembler/assembler/ARMAssembler.h index d45a1e41043f..8214c189d8af 100644 --- a/js/src/assembler/assembler/ARMAssembler.h +++ b/js/src/assembler/assembler/ARMAssembler.h @@ -847,7 +847,7 @@ namespace JSC { JmpSrc blx(int rm, Condition cc = AL) { -#if WTF_ARM_ARCH_AT_LEAST(5) +#if WTF_CPU_ARM && WTF_ARM_ARCH_VERSION >= 5 int s = m_buffer.uncheckedSize(); js::JaegerSpew( js::JSpew_Insns, @@ -980,7 +980,7 @@ namespace JSC { static ARMWord* getLdrImmAddress(ARMWord* insn) { -#if WTF_ARM_ARCH_AT_LEAST(5) +#if WTF_CPU_ARM && WTF_ARM_ARCH_VERSION >= 5 // Check for call if ((*insn & 0x0f7f0000) != 0x051f0000) { // Must be BLX diff --git a/js/src/assembler/assembler/AbstractMacroAssembler.h b/js/src/assembler/assembler/AbstractMacroAssembler.h index 260380acd307..3c14c80bd4b0 100644 --- a/js/src/assembler/assembler/AbstractMacroAssembler.h +++ b/js/src/assembler/assembler/AbstractMacroAssembler.h @@ -166,13 +166,13 @@ public: void* m_ptr; }; - // ImmPtr: + // TrustedImmPtr: // // A pointer sized immediate operand to an instruction - this is wrapped // in a class requiring explicit construction in order to differentiate // from pointers used as absolute addresses to memory operations - struct ImmPtr { - explicit ImmPtr(const void* value) + struct TrustedImmPtr { + explicit TrustedImmPtr(const void* value) : m_value(value) { } @@ -185,14 +185,21 @@ public: const void* m_value; }; - // Imm32: + struct ImmPtr : public TrustedImmPtr { + explicit ImmPtr(const void* value) + : TrustedImmPtr(value) + { + } + }; + + // TrustedImm32: // // A 32bit immediate operand to an instruction - this is wrapped in a // class requiring explicit construction in order to prevent RegisterIDs // (which are implemented as an enum) from accidentally being passed as // immediate values. - struct Imm32 { - explicit Imm32(int32_t value) + struct TrustedImm32 { + explicit TrustedImm32(int32_t value) : m_value(value) #if WTF_CPU_ARM || WTF_CPU_MIPS , m_isPointer(false) @@ -201,7 +208,7 @@ public: } #if !WTF_CPU_X86_64 - explicit Imm32(ImmPtr ptr) + explicit TrustedImm32(TrustedImmPtr ptr) : m_value(ptr.asIntptr()) #if WTF_CPU_ARM || WTF_CPU_MIPS , m_isPointer(true) @@ -223,6 +230,20 @@ public: #endif }; + + struct Imm32 : public TrustedImm32 { + explicit Imm32(int32_t value) + : TrustedImm32(value) + { + } +#if !WTF_CPU_X86_64 + explicit Imm32(TrustedImmPtr ptr) + : TrustedImm32(ptr) + { + } +#endif + }; + struct ImmDouble { union { struct { @@ -241,7 +262,6 @@ public: } }; - // Section 2: MacroAssembler code buffer handles // // The following types are used to reference items in the code buffer @@ -273,7 +293,7 @@ public: bool isUsed() const { return m_label.isUsed(); } void used() { m_label.used(); } - bool isValid() const { return m_label.isValid(); } + bool isSet() const { return m_label.isValid(); } private: JmpDst m_label; }; @@ -296,6 +316,8 @@ public: { } + bool isSet() const { return m_label.isValid(); } + private: JmpDst m_label; }; @@ -411,6 +433,20 @@ public: public: typedef js::Vector JumpVector; + JumpList() {} + + JumpList(const JumpList &other) + { + m_jumps.append(other.m_jumps); + } + + JumpList &operator=(const JumpList &other) + { + m_jumps.clear(); + m_jumps.append(other.m_jumps); + return *this; + } + void link(AbstractMacroAssembler* masm) { size_t size = m_jumps.length(); @@ -432,17 +468,22 @@ public: m_jumps.append(jump); } - void append(JumpList& other) + void append(const JumpList& other) { m_jumps.append(other.m_jumps.begin(), other.m_jumps.length()); } + void clear() + { + m_jumps.clear(); + } + bool empty() { return !m_jumps.length(); } - const JumpVector& jumps() { return m_jumps; } + const JumpVector& jumps() const { return m_jumps; } private: JumpVector m_jumps; diff --git a/js/src/assembler/assembler/MacroAssembler.h b/js/src/assembler/assembler/MacroAssembler.h index 73bda22a8ee2..8a73863515c8 100644 --- a/js/src/assembler/assembler/MacroAssembler.h +++ b/js/src/assembler/assembler/MacroAssembler.h @@ -95,12 +95,12 @@ public: storePtr(src, Address(stackPointerRegister, (index * sizeof(void*)))); } - void poke(Imm32 value, int index = 0) + void poke(TrustedImm32 value, int index = 0) { store32(value, Address(stackPointerRegister, (index * sizeof(void*)))); } - void poke(ImmPtr imm, int index = 0) + void poke(TrustedImmPtr imm, int index = 0) { storePtr(imm, Address(stackPointerRegister, (index * sizeof(void*)))); } @@ -117,7 +117,7 @@ public: branch32(cond, op1, op2).linkTo(target, this); } - void branch32(Condition cond, RegisterID op1, Imm32 imm, Label target) + void branch32(Condition cond, RegisterID op1, TrustedImm32 imm, Label target) { branch32(cond, op1, imm).linkTo(target, this); } @@ -177,21 +177,11 @@ public: and32(src, dest); } - void andPtr(Address address, RegisterID srcDest) - { - and32(address, srcDest); - } - void andPtr(Imm32 imm, RegisterID srcDest) { and32(imm, srcDest); } - void andPtr(ImmPtr ptr, RegisterID srcDest) - { - and32(Imm32(ptr), srcDest); - } - void notPtr(RegisterID srcDest) { not32(srcDest); @@ -212,11 +202,6 @@ public: or32(imm, dest); } - void orPtr(Address address, RegisterID srcDest) - { - or32(address, srcDest); - } - void subPtr(RegisterID src, RegisterID dest) { sub32(src, dest); @@ -278,27 +263,22 @@ public: store32(src, address); } - void storePtr(RegisterID src, BaseIndex address) - { - store32(src, address); - } - void storePtr(RegisterID src, void* address) { store32(src, address); } - void storePtr(ImmPtr imm, ImplicitAddress address) + void storePtr(TrustedImmPtr imm, ImplicitAddress address) { store32(Imm32(imm), address); } - void storePtr(ImmPtr imm, BaseIndex address) + void storePtr(TrustedImmPtr imm, BaseIndex address) { store32(Imm32(imm), address); } - void storePtr(ImmPtr imm, void* address) + void storePtr(TrustedImmPtr imm, void* address) { store32(Imm32(imm), address); } diff --git a/js/src/assembler/assembler/MacroAssemblerARM.cpp b/js/src/assembler/assembler/MacroAssemblerARM.cpp index 14b4166b7ea9..065c98197395 100644 --- a/js/src/assembler/assembler/MacroAssemblerARM.cpp +++ b/js/src/assembler/assembler/MacroAssemblerARM.cpp @@ -34,7 +34,7 @@ #include "MacroAssemblerARM.h" -#if WTF_PLATFORM_LINUX || WTF_PLATFORM_ANDROID +#if WTF_OS_LINUX || WTF_OS_ANDROID #include #include #include diff --git a/js/src/assembler/assembler/MacroAssemblerARM.h b/js/src/assembler/assembler/MacroAssemblerARM.h index 7413411f4500..2630bce7a909 100644 --- a/js/src/assembler/assembler/MacroAssemblerARM.h +++ b/js/src/assembler/assembler/MacroAssemblerARM.h @@ -91,14 +91,14 @@ public: m_assembler.adds_r(dest, dest, src); } - void add32(Imm32 imm, Address address) + void add32(TrustedImm32 imm, Address address) { load32(address, ARMRegisters::S1); add32(imm, ARMRegisters::S1); store32(ARMRegisters::S1, address); } - void add32(Imm32 imm, RegisterID dest) + void add32(TrustedImm32 imm, RegisterID dest) { m_assembler.adds_r(dest, dest, m_assembler.getImm(imm.m_value, ARMRegisters::S0)); } @@ -173,7 +173,7 @@ public: m_assembler.orrs_r(dest, dest, src); } - void or32(Imm32 imm, RegisterID dest) + void or32(TrustedImm32 imm, RegisterID dest) { m_assembler.orrs_r(dest, dest, m_assembler.getImm(imm.m_value, ARMRegisters::S0)); } @@ -211,12 +211,12 @@ public: m_assembler.subs_r(dest, dest, src); } - void sub32(Imm32 imm, RegisterID dest) + void sub32(TrustedImm32 imm, RegisterID dest) { m_assembler.subs_r(dest, dest, m_assembler.getImm(imm.m_value, ARMRegisters::S0)); } - void sub32(Imm32 imm, Address address) + void sub32(TrustedImm32 imm, Address address) { load32(address, ARMRegisters::S1); sub32(imm, ARMRegisters::S1); @@ -240,7 +240,7 @@ public: m_assembler.eors_r(dest, dest, src); } - void xor32(Imm32 imm, RegisterID dest) + void xor32(TrustedImm32 imm, RegisterID dest) { m_assembler.eors_r(dest, dest, m_assembler.getImm(imm.m_value, ARMRegisters::S0)); } @@ -380,7 +380,7 @@ public: m_assembler.baseIndexTransfer32(false, src, address.base, address.index, static_cast(address.scale), address.offset); } - void store32(Imm32 imm, BaseIndex address) + void store32(TrustedImm32 imm, BaseIndex address) { if (imm.m_isPointer) m_assembler.ldr_un_imm(ARMRegisters::S1, imm.m_value); @@ -389,7 +389,7 @@ public: store32(ARMRegisters::S1, address); } - void store32(Imm32 imm, ImplicitAddress address) + void store32(TrustedImm32 imm, ImplicitAddress address) { if (imm.m_isPointer) m_assembler.ldr_un_imm(ARMRegisters::S1, imm.m_value); @@ -404,7 +404,7 @@ public: m_assembler.dtr_u(false, src, ARMRegisters::S0, 0); } - void store32(Imm32 imm, void* address) + void store32(TrustedImm32 imm, void* address) { m_assembler.ldr_un_imm(ARMRegisters::S0, reinterpret_cast(address)); if (imm.m_isPointer) @@ -436,7 +436,7 @@ public: push(ARMRegisters::S0); } - void move(Imm32 imm, RegisterID dest) + void move(TrustedImm32 imm, RegisterID dest) { if (imm.m_isPointer) m_assembler.ldr_un_imm(dest, imm.m_value); @@ -449,7 +449,7 @@ public: m_assembler.mov_r(dest, src); } - void move(ImmPtr imm, RegisterID dest) + void move(TrustedImmPtr imm, RegisterID dest) { move(Imm32(imm), dest); } @@ -485,7 +485,7 @@ public: return Jump(m_assembler.jmp(ARMCondition(cond), useConstantPool)); } - Jump branch32(Condition cond, RegisterID left, Imm32 right, int useConstantPool = 0) + Jump branch32(Condition cond, RegisterID left, TrustedImm32 right, int useConstantPool = 0) { ASSERT(left != ARMRegisters::S0); if (right.m_isPointer) { @@ -500,21 +500,21 @@ public: // number of instructions emitted is constant, regardless of the argument // values. For ARM, this is identical to branch32WithPatch, except that it // does not generate a DataLabel32. - Jump branch32FixedLength(Condition cond, RegisterID left, Imm32 right) + Jump branch32FixedLength(Condition cond, RegisterID left, TrustedImm32 right) { m_assembler.ldr_un_imm(ARMRegisters::S1, right.m_value); return branch32(cond, left, ARMRegisters::S1, true); } // As branch32_force32, but allow the value ('right') to be patched. - Jump branch32WithPatch(Condition cond, RegisterID left, Imm32 right, DataLabel32 &dataLabel) + Jump branch32WithPatch(Condition cond, RegisterID left, TrustedImm32 right, DataLabel32 &dataLabel) { ASSERT(left != ARMRegisters::S1); dataLabel = moveWithPatch(right, ARMRegisters::S1); return branch32(cond, left, ARMRegisters::S1, true); } - Jump branch32WithPatch(Condition cond, Address left, Imm32 right, DataLabel32 &dataLabel) + Jump branch32WithPatch(Condition cond, Address left, TrustedImm32 right, DataLabel32 &dataLabel) { ASSERT(left.base != ARMRegisters::S1); load32(left, ARMRegisters::S1); @@ -534,19 +534,19 @@ public: return branch32(cond, ARMRegisters::S1, right); } - Jump branch32(Condition cond, Address left, Imm32 right) + Jump branch32(Condition cond, Address left, TrustedImm32 right) { load32(left, ARMRegisters::S1); return branch32(cond, ARMRegisters::S1, right); } - Jump branch32(Condition cond, BaseIndex left, Imm32 right) + Jump branch32(Condition cond, BaseIndex left, TrustedImm32 right) { load32(left, ARMRegisters::S1); return branch32(cond, ARMRegisters::S1, right); } - Jump branch32WithUnalignedHalfWords(Condition cond, BaseIndex left, Imm32 right) + Jump branch32WithUnalignedHalfWords(Condition cond, BaseIndex left, TrustedImm32 right) { load32WithUnalignedHalfWords(left, ARMRegisters::S1); return branch32(cond, ARMRegisters::S1, right); @@ -828,7 +828,7 @@ public: setTest32(cond, address, mask, dest); } - void add32(Imm32 imm, RegisterID src, RegisterID dest) + void add32(TrustedImm32 imm, RegisterID src, RegisterID dest) { m_assembler.add_r(dest, src, m_assembler.getImm(imm.m_value, ARMRegisters::S0)); } @@ -850,7 +850,7 @@ public: move(ARMRegisters::S1, dest); } - void add32(Imm32 imm, AbsoluteAddress address) + void add32(TrustedImm32 imm, AbsoluteAddress address) { m_assembler.ldr_un_imm(ARMRegisters::S1, reinterpret_cast(address.m_ptr)); m_assembler.dtr_u(true, ARMRegisters::S1, ARMRegisters::S1, 0); @@ -859,7 +859,7 @@ public: m_assembler.dtr_u(false, ARMRegisters::S1, ARMRegisters::S0, 0); } - void sub32(Imm32 imm, AbsoluteAddress address) + void sub32(TrustedImm32 imm, AbsoluteAddress address) { m_assembler.ldr_un_imm(ARMRegisters::S1, reinterpret_cast(address.m_ptr)); m_assembler.dtr_u(true, ARMRegisters::S1, ARMRegisters::S1, 0); @@ -880,7 +880,7 @@ public: return branch32(cond, ARMRegisters::S1, right); } - Jump branch32(Condition cond, AbsoluteAddress left, Imm32 right) + Jump branch32(Condition cond, AbsoluteAddress left, TrustedImm32 right) { load32(left.m_ptr, ARMRegisters::S1); return branch32(cond, ARMRegisters::S1, right); @@ -908,14 +908,14 @@ public: return Call::fromTailJump(oldJump); } - DataLabelPtr moveWithPatch(ImmPtr initialValue, RegisterID dest) + DataLabelPtr moveWithPatch(TrustedImmPtr initialValue, RegisterID dest) { DataLabelPtr dataLabel(this); m_assembler.ldr_un_imm(dest, reinterpret_cast(initialValue.m_value)); return dataLabel; } - DataLabel32 moveWithPatch(Imm32 initialValue, RegisterID dest) + DataLabel32 moveWithPatch(TrustedImm32 initialValue, RegisterID dest) { DataLabel32 dataLabel(this); m_assembler.ldr_un_imm(dest, initialValue.m_value); @@ -937,7 +937,7 @@ public: return jump; } - DataLabelPtr storePtrWithPatch(ImmPtr initialValue, ImplicitAddress address) + DataLabelPtr storePtrWithPatch(TrustedImmPtr initialValue, ImplicitAddress address) { DataLabelPtr dataLabel = moveWithPatch(initialValue, ARMRegisters::S1); store32(ARMRegisters::S1, address); diff --git a/js/src/assembler/assembler/MacroAssemblerARMv7.h b/js/src/assembler/assembler/MacroAssemblerARMv7.h index 5492f8246a06..2bdb6e8fdb5e 100644 --- a/js/src/assembler/assembler/MacroAssemblerARMv7.h +++ b/js/src/assembler/assembler/MacroAssemblerARMv7.h @@ -52,7 +52,7 @@ class MacroAssemblerARMv7 : public AbstractMacroAssembler { struct ArmAddress { enum AddressType { HasOffset, - HasIndex, + HasIndex } type; RegisterID base; union { @@ -113,7 +113,7 @@ public: DoubleGreaterThanOrUnordered = ARMv7Assembler::ConditionHI, DoubleGreaterThanOrEqualOrUnordered = ARMv7Assembler::ConditionHS, DoubleLessThanOrUnordered = ARMv7Assembler::ConditionLT, - DoubleLessThanOrEqualOrUnordered = ARMv7Assembler::ConditionLE, + DoubleLessThanOrEqualOrUnordered = ARMv7Assembler::ConditionLE }; static const RegisterID stackPointerRegister = ARMRegisters::sp; @@ -131,12 +131,12 @@ public: m_assembler.add(dest, dest, src); } - void add32(Imm32 imm, RegisterID dest) + void add32(TrustedImm32 imm, RegisterID dest) { add32(imm, dest, dest); } - void add32(Imm32 imm, RegisterID src, RegisterID dest) + void add32(TrustedImm32 imm, RegisterID src, RegisterID dest) { ARMThumbImmediate armImm = ARMThumbImmediate::makeUInt12OrEncodedImm(imm.m_value); if (armImm.isValid()) @@ -147,7 +147,7 @@ public: } } - void add32(Imm32 imm, Address address) + void add32(TrustedImm32 imm, Address address) { load32(address, dataTempRegister); @@ -170,7 +170,7 @@ public: add32(dataTempRegister, dest); } - void add32(Imm32 imm, AbsoluteAddress address) + void add32(TrustedImm32 imm, AbsoluteAddress address) { load32(address.m_ptr, dataTempRegister); @@ -239,7 +239,7 @@ public: m_assembler.orr(dest, dest, src); } - void or32(Imm32 imm, RegisterID dest) + void or32(TrustedImm32 imm, RegisterID dest) { ARMThumbImmediate armImm = ARMThumbImmediate::makeEncodedImm(imm.m_value); if (armImm.isValid()) @@ -285,7 +285,7 @@ public: m_assembler.sub(dest, dest, src); } - void sub32(Imm32 imm, RegisterID dest) + void sub32(TrustedImm32 imm, RegisterID dest) { ARMThumbImmediate armImm = ARMThumbImmediate::makeUInt12OrEncodedImm(imm.m_value); if (armImm.isValid()) @@ -296,7 +296,7 @@ public: } } - void sub32(Imm32 imm, Address address) + void sub32(TrustedImm32 imm, Address address) { load32(address, dataTempRegister); @@ -319,7 +319,7 @@ public: sub32(dataTempRegister, dest); } - void sub32(Imm32 imm, AbsoluteAddress address) + void sub32(TrustedImm32 imm, AbsoluteAddress address) { load32(address.m_ptr, dataTempRegister); @@ -341,7 +341,7 @@ public: m_assembler.eor(dest, dest, src); } - void xor32(Imm32 imm, RegisterID dest) + void xor32(TrustedImm32 imm, RegisterID dest) { ARMThumbImmediate armImm = ARMThumbImmediate::makeEncodedImm(imm.m_value); if (armImm.isValid()) @@ -486,7 +486,7 @@ public: store32(src, setupArmAddress(address)); } - void store32(Imm32 imm, ImplicitAddress address) + void store32(TrustedImm32 imm, ImplicitAddress address) { move(imm, dataTempRegister); store32(dataTempRegister, setupArmAddress(address)); @@ -498,7 +498,7 @@ public: m_assembler.str(src, addressTempRegister, ARMThumbImmediate::makeUInt16(0)); } - void store32(Imm32 imm, void* address) + void store32(TrustedImm32 imm, void* address) { move(imm, dataTempRegister); store32(dataTempRegister, address); @@ -667,7 +667,7 @@ public: // // Move values in registers. - void move(Imm32 imm, RegisterID dest) + void move(TrustedImm32 imm, RegisterID dest) { uint32_t value = imm.m_value; @@ -693,7 +693,7 @@ public: m_assembler.mov(dest, src); } - void move(ImmPtr imm, RegisterID dest) + void move(TrustedImmPtr imm, RegisterID dest) { move(Imm32(imm), dest); } @@ -780,7 +780,7 @@ public: return Jump(makeBranch(cond)); } - Jump branch32(Condition cond, RegisterID left, Imm32 right) + Jump branch32(Condition cond, RegisterID left, TrustedImm32 right) { compare32(left, right); return Jump(makeBranch(cond)); @@ -798,21 +798,21 @@ public: return branch32(cond, dataTempRegister, right); } - Jump branch32(Condition cond, Address left, Imm32 right) + Jump branch32(Condition cond, Address left, TrustedImm32 right) { // use addressTempRegister incase the branch32 we call uses dataTempRegister. :-/ load32(left, addressTempRegister); return branch32(cond, addressTempRegister, right); } - Jump branch32(Condition cond, BaseIndex left, Imm32 right) + Jump branch32(Condition cond, BaseIndex left, TrustedImm32 right) { // use addressTempRegister incase the branch32 we call uses dataTempRegister. :-/ load32(left, addressTempRegister); return branch32(cond, addressTempRegister, right); } - Jump branch32WithUnalignedHalfWords(Condition cond, BaseIndex left, Imm32 right) + Jump branch32WithUnalignedHalfWords(Condition cond, BaseIndex left, TrustedImm32 right) { // use addressTempRegister incase the branch32 we call uses dataTempRegister. :-/ load32WithUnalignedHalfWords(left, addressTempRegister); @@ -825,7 +825,7 @@ public: return branch32(cond, dataTempRegister, right); } - Jump branch32(Condition cond, AbsoluteAddress left, Imm32 right) + Jump branch32(Condition cond, AbsoluteAddress left, TrustedImm32 right) { // use addressTempRegister incase the branch32 we call uses dataTempRegister. :-/ load32(left.m_ptr, addressTempRegister); @@ -1065,13 +1065,13 @@ public: m_assembler.mov(dest, ARMThumbImmediate::makeUInt16(0)); } - DataLabel32 moveWithPatch(Imm32 imm, RegisterID dst) + DataLabel32 moveWithPatch(TrustedImm32 imm, RegisterID dst) { moveFixedWidthEncoding(imm, dst); return DataLabel32(this); } - DataLabelPtr moveWithPatch(ImmPtr imm, RegisterID dst) + DataLabelPtr moveWithPatch(TrustedImmPtr imm, RegisterID dst) { moveFixedWidthEncoding(Imm32(imm), dst); return DataLabelPtr(this); @@ -1090,7 +1090,7 @@ public: return branch32(cond, addressTempRegister, dataTempRegister); } - DataLabelPtr storePtrWithPatch(ImmPtr initialValue, ImplicitAddress address) + DataLabelPtr storePtrWithPatch(TrustedImmPtr initialValue, ImplicitAddress address) { DataLabelPtr label = moveWithPatch(initialValue, dataTempRegister); store32(dataTempRegister, address); @@ -1179,7 +1179,7 @@ protected: return addressTempRegister; } - void moveFixedWidthEncoding(Imm32 imm, RegisterID dst) + void moveFixedWidthEncoding(TrustedImm32 imm, RegisterID dst) { uint32_t value = imm.m_value; m_assembler.movT3(dst, ARMThumbImmediate::makeUInt16(value & 0xffff)); diff --git a/js/src/assembler/assembler/MacroAssemblerCodeRef.h b/js/src/assembler/assembler/MacroAssemblerCodeRef.h index 841fa9647128..6acdfd67a74d 100644 --- a/js/src/assembler/assembler/MacroAssemblerCodeRef.h +++ b/js/src/assembler/assembler/MacroAssemblerCodeRef.h @@ -180,7 +180,8 @@ private: class MacroAssemblerCodeRef { public: MacroAssemblerCodeRef() - : m_size(0) + : m_executablePool(NULL), + m_size(0) { } @@ -191,6 +192,20 @@ public: { } + // Release the code memory in this code ref. + void release() + { + if (!m_executablePool) + return; + +#if defined DEBUG && (defined WTF_CPU_X86 || defined WTF_CPU_X86_64) + void *addr = m_code.executableAddress(); + memset(addr, 0xcc, m_size); +#endif + m_executablePool->release(); + m_executablePool = NULL; + } + MacroAssemblerCodePtr m_code; ExecutablePool* m_executablePool; size_t m_size; diff --git a/js/src/assembler/assembler/MacroAssemblerSparc.h b/js/src/assembler/assembler/MacroAssemblerSparc.h index 3bdd2d871b1b..91a8f0e16163 100644 --- a/js/src/assembler/assembler/MacroAssemblerSparc.h +++ b/js/src/assembler/assembler/MacroAssemblerSparc.h @@ -97,14 +97,14 @@ namespace JSC { m_assembler.addcc_r(dest, src, dest); } - void add32(Imm32 imm, Address address) + void add32(TrustedImm32 imm, Address address) { load32(address, SparcRegisters::g2); add32(imm, SparcRegisters::g2); store32(SparcRegisters::g2, address); } - void add32(Imm32 imm, RegisterID dest) + void add32(TrustedImm32 imm, RegisterID dest) { if (m_assembler.isimm13(imm.m_value)) m_assembler.addcc_imm(dest, imm.m_value, dest); @@ -126,7 +126,7 @@ namespace JSC { m_assembler.andcc_r(dest, SparcRegisters::g2, dest); } - void add32(Imm32 imm, RegisterID src, RegisterID dest) + void add32(TrustedImm32 imm, RegisterID src, RegisterID dest) { if (m_assembler.isimm13(imm.m_value)) m_assembler.addcc_imm(src, imm.m_value, dest); @@ -194,7 +194,7 @@ namespace JSC { m_assembler.orcc_r(dest, src, dest); } - void or32(Imm32 imm, RegisterID dest) + void or32(TrustedImm32 imm, RegisterID dest) { if (m_assembler.isimm13(imm.m_value)) m_assembler.orcc_imm(dest, imm.m_value, dest); @@ -240,7 +240,7 @@ namespace JSC { m_assembler.subcc_r(dest, src, dest); } - void sub32(Imm32 imm, RegisterID dest) + void sub32(TrustedImm32 imm, RegisterID dest) { if (m_assembler.isimm13(imm.m_value)) m_assembler.subcc_imm(dest, imm.m_value, dest); @@ -250,7 +250,7 @@ namespace JSC { } } - void sub32(Imm32 imm, Address address) + void sub32(TrustedImm32 imm, Address address) { load32(address, SparcRegisters::g2); sub32(imm, SparcRegisters::g2); @@ -268,7 +268,7 @@ namespace JSC { m_assembler.xorcc_r(src, dest, dest); } - void xor32(Imm32 imm, RegisterID dest) + void xor32(TrustedImm32 imm, RegisterID dest) { if (m_assembler.isimm13(imm.m_value)) m_assembler.xorcc_imm(dest, imm.m_value, dest); @@ -548,7 +548,7 @@ namespace JSC { m_assembler.stw_r(src, address.base, SparcRegisters::g2); } - void store32(Imm32 imm, BaseIndex address) + void store32(TrustedImm32 imm, BaseIndex address) { m_assembler.sll_imm(address.index, address.scale, SparcRegisters::g2); add32(Imm32(address.offset), SparcRegisters::g2); @@ -556,7 +556,7 @@ namespace JSC { m_assembler.stw_r(SparcRegisters::g3, SparcRegisters::g2, address.base); } - void store32(Imm32 imm, ImplicitAddress address) + void store32(TrustedImm32 imm, ImplicitAddress address) { m_assembler.move_nocheck(imm.m_value, SparcRegisters::g2); store32(SparcRegisters::g2, address); @@ -568,7 +568,7 @@ namespace JSC { m_assembler.stw_r(src, SparcRegisters::g0, SparcRegisters::g3); } - void store32(Imm32 imm, void* address) + void store32(TrustedImm32 imm, void* address) { move(imm, SparcRegisters::g2); store32(SparcRegisters::g2, address); @@ -598,7 +598,7 @@ namespace JSC { push(SparcRegisters::g2); } - void move(Imm32 imm, RegisterID dest) + void move(TrustedImm32 imm, RegisterID dest) { if (m_assembler.isimm13(imm.m_value)) m_assembler.or_imm(SparcRegisters::g0, imm.m_value, dest); @@ -611,7 +611,7 @@ namespace JSC { m_assembler.or_r(src, SparcRegisters::g0, dest); } - void move(ImmPtr imm, RegisterID dest) + void move(TrustedImmPtr imm, RegisterID dest) { move(Imm32(imm), dest); } @@ -641,20 +641,20 @@ namespace JSC { return branch32(cond, SparcRegisters::g2, right); } - Jump branch32_force32(Condition cond, RegisterID left, Imm32 right) + Jump branch32_force32(Condition cond, RegisterID left, TrustedImm32 right) { m_assembler.move_nocheck(right.m_value, SparcRegisters::g3); m_assembler.subcc_r(left, SparcRegisters::g3, SparcRegisters::g0); return Jump(m_assembler.branch(SparcCondition(cond))); } - Jump branch32FixedLength(Condition cond, RegisterID left, Imm32 right) + Jump branch32FixedLength(Condition cond, RegisterID left, TrustedImm32 right) { m_assembler.move_nocheck(right.m_value, SparcRegisters::g2); return branch32(cond, left, SparcRegisters::g2); } - Jump branch32WithPatch(Condition cond, RegisterID left, Imm32 right, DataLabel32 &dataLabel) + Jump branch32WithPatch(Condition cond, RegisterID left, TrustedImm32 right, DataLabel32 &dataLabel) { // Always use move_nocheck, since the value is to be patched. dataLabel = DataLabel32(this); @@ -669,7 +669,7 @@ namespace JSC { return Jump(m_assembler.branch(SparcCondition(cond))); } - Jump branch32(Condition cond, RegisterID left, Imm32 right) + Jump branch32(Condition cond, RegisterID left, TrustedImm32 right) { if (m_assembler.isimm13(right.m_value)) m_assembler.subcc_imm(left, right.m_value, SparcRegisters::g0); @@ -692,20 +692,20 @@ namespace JSC { return branch32(cond, SparcRegisters::g2, right); } - Jump branch32(Condition cond, Address left, Imm32 right) + Jump branch32(Condition cond, Address left, TrustedImm32 right) { load32(left, SparcRegisters::g2); return branch32(cond, SparcRegisters::g2, right); } - Jump branch32(Condition cond, BaseIndex left, Imm32 right) + Jump branch32(Condition cond, BaseIndex left, TrustedImm32 right) { load32(left, SparcRegisters::g2); return branch32(cond, SparcRegisters::g2, right); } - Jump branch32WithUnalignedHalfWords(Condition cond, BaseIndex left, Imm32 right) + Jump branch32WithUnalignedHalfWords(Condition cond, BaseIndex left, TrustedImm32 right) { load32WithUnalignedHalfWords(left, SparcRegisters::g4); return branch32(cond, SparcRegisters::g4, right); @@ -1052,7 +1052,7 @@ namespace JSC { store32(SparcRegisters::g2, address.m_ptr); } - void sub32(Imm32 imm, AbsoluteAddress address) + void sub32(TrustedImm32 imm, AbsoluteAddress address) { load32(address.m_ptr, SparcRegisters::g2); sub32(imm, SparcRegisters::g2); @@ -1071,7 +1071,7 @@ namespace JSC { return branch32(cond, SparcRegisters::g2, right); } - Jump branch32(Condition cond, AbsoluteAddress left, Imm32 right) + Jump branch32(Condition cond, AbsoluteAddress left, TrustedImm32 right) { load32(left.m_ptr, SparcRegisters::g2); return branch32(cond, SparcRegisters::g2, right); @@ -1099,7 +1099,7 @@ namespace JSC { return Call::fromTailJump(oldJump); } - DataLabelPtr moveWithPatch(ImmPtr initialValue, RegisterID dest) + DataLabelPtr moveWithPatch(TrustedImmPtr initialValue, RegisterID dest) { DataLabelPtr dataLabel(this); Imm32 imm = Imm32(initialValue); @@ -1107,7 +1107,7 @@ namespace JSC { return dataLabel; } - DataLabel32 moveWithPatch(Imm32 initialValue, RegisterID dest) + DataLabel32 moveWithPatch(TrustedImm32 initialValue, RegisterID dest) { DataLabel32 dataLabel(this); m_assembler.move_nocheck(initialValue.m_value, dest); @@ -1129,7 +1129,7 @@ namespace JSC { return jump; } - DataLabelPtr storePtrWithPatch(ImmPtr initialValue, ImplicitAddress address) + DataLabelPtr storePtrWithPatch(TrustedImmPtr initialValue, ImplicitAddress address) { DataLabelPtr dataLabel = moveWithPatch(initialValue, SparcRegisters::g2); store32(SparcRegisters::g2, address); diff --git a/js/src/assembler/assembler/MacroAssemblerX86.h b/js/src/assembler/assembler/MacroAssemblerX86.h index ee61b895a8fe..c6ab40f587fa 100644 --- a/js/src/assembler/assembler/MacroAssemblerX86.h +++ b/js/src/assembler/assembler/MacroAssemblerX86.h @@ -60,7 +60,7 @@ public: using MacroAssemblerX86Common::storeDouble; using MacroAssemblerX86Common::convertInt32ToDouble; - void add32(Imm32 imm, RegisterID src, RegisterID dest) + void add32(TrustedImm32 imm, RegisterID src, RegisterID dest) { m_assembler.leal_mr(imm.m_value, src, dest); } @@ -90,12 +90,12 @@ public: m_assembler.andl_im(imm.m_value, address.m_ptr); } - void or32(Imm32 imm, AbsoluteAddress address) + void or32(TrustedImm32 imm, AbsoluteAddress address) { m_assembler.orl_im(imm.m_value, address.m_ptr); } - void sub32(Imm32 imm, AbsoluteAddress address) + void sub32(TrustedImm32 imm, AbsoluteAddress address) { m_assembler.subl_im(imm.m_value, address.m_ptr); } @@ -148,7 +148,7 @@ public: addDouble(Address(srcDest), dest); } - void store32(Imm32 imm, void* address) + void store32(TrustedImm32 imm, void* address) { m_assembler.movl_i32m(imm.m_value, address); } @@ -164,7 +164,7 @@ public: return Jump(m_assembler.jCC(x86Condition(cond))); } - Jump branch32(Condition cond, AbsoluteAddress left, Imm32 right) + Jump branch32(Condition cond, AbsoluteAddress left, TrustedImm32 right) { m_assembler.cmpl_im(right.m_value, left.m_ptr); return Jump(m_assembler.jCC(x86Condition(cond))); @@ -186,7 +186,7 @@ public: } - DataLabelPtr moveWithPatch(ImmPtr initialValue, RegisterID dest) + DataLabelPtr moveWithPatch(TrustedImmPtr initialValue, RegisterID dest) { m_assembler.movl_i32r(initialValue.asIntptr(), dest); return DataLabelPtr(this); @@ -206,7 +206,7 @@ public: return Jump(m_assembler.jCC(x86Condition(cond))); } - DataLabelPtr storePtrWithPatch(ImmPtr initialValue, ImplicitAddress address) + DataLabelPtr storePtrWithPatch(TrustedImmPtr initialValue, ImplicitAddress address) { m_assembler.movl_i32m(initialValue.asIntptr(), address.offset, address.base); return DataLabelPtr(this); diff --git a/js/src/assembler/assembler/MacroAssemblerX86Common.h b/js/src/assembler/assembler/MacroAssemblerX86Common.h index 1ead9665f4e2..fa1b7ba8cb10 100644 --- a/js/src/assembler/assembler/MacroAssemblerX86Common.h +++ b/js/src/assembler/assembler/MacroAssemblerX86Common.h @@ -116,12 +116,12 @@ public: m_assembler.addl_rr(src, dest); } - void add32(Imm32 imm, Address address) + void add32(TrustedImm32 imm, Address address) { m_assembler.addl_im(imm.m_value, address.offset, address.base); } - void add32(Imm32 imm, RegisterID dest) + void add32(TrustedImm32 imm, RegisterID dest) { m_assembler.addl_ir(imm.m_value, dest); } @@ -234,7 +234,7 @@ public: m_assembler.orl_rr(src, dest); } - void or32(Imm32 imm, RegisterID dest) + void or32(TrustedImm32 imm, RegisterID dest) { m_assembler.orl_ir(imm.m_value, dest); } @@ -249,7 +249,7 @@ public: m_assembler.orl_mr(src.offset, src.base, dest); } - void or32(Imm32 imm, Address address) + void or32(TrustedImm32 imm, Address address) { m_assembler.orl_im(imm.m_value, address.offset, address.base); } @@ -313,12 +313,12 @@ public: m_assembler.subl_rr(src, dest); } - void sub32(Imm32 imm, RegisterID dest) + void sub32(TrustedImm32 imm, RegisterID dest) { m_assembler.subl_ir(imm.m_value, dest); } - void sub32(Imm32 imm, Address address) + void sub32(TrustedImm32 imm, Address address) { m_assembler.subl_im(imm.m_value, address.offset, address.base); } @@ -339,12 +339,12 @@ public: m_assembler.xorl_rr(src, dest); } - void xor32(Imm32 imm, Address dest) + void xor32(TrustedImm32 imm, Address dest) { m_assembler.xorl_im(imm.m_value, dest.offset, dest.base); } - void xor32(Imm32 imm, RegisterID dest) + void xor32(TrustedImm32 imm, RegisterID dest) { m_assembler.xorl_ir(imm.m_value, dest); } @@ -468,7 +468,7 @@ public: m_assembler.movl_rm(src, address.offset, address.base, address.index, address.scale); } - void store32(Imm32 imm, BaseIndex address) + void store32(TrustedImm32 imm, BaseIndex address) { m_assembler.movl_i32m(imm.m_value, address.offset, address.base, address.index, address.scale); } @@ -483,7 +483,7 @@ public: m_assembler.movb_i8m(imm.m_value, address.offset, address.base, address.index, address.scale); } - void store32(Imm32 imm, ImplicitAddress address) + void store32(TrustedImm32 imm, ImplicitAddress address) { m_assembler.movl_i32m(imm.m_value, address.offset, address.base); } @@ -748,7 +748,7 @@ public: // // Move values in registers. - void move(Imm32 imm, RegisterID dest) + void move(TrustedImm32 imm, RegisterID dest) { // Note: on 64-bit the Imm32 value is zero extended into the register, it // may be useful to have a separate version that sign extends the value? @@ -767,7 +767,7 @@ public: m_assembler.movq_rr(src, dest); } - void move(ImmPtr imm, RegisterID dest) + void move(TrustedImmPtr imm, RegisterID dest) { m_assembler.movq_i64r(imm.asIntptr(), dest); } @@ -798,7 +798,7 @@ public: m_assembler.movl_rr(src, dest); } - void move(ImmPtr imm, RegisterID dest) + void move(TrustedImmPtr imm, RegisterID dest) { m_assembler.movl_i32r(imm.asIntptr(), dest); } @@ -852,7 +852,7 @@ public: return Jump(m_assembler.jCC(x86Condition(cond))); } - Jump branch32(Condition cond, RegisterID left, Imm32 right) + Jump branch32(Condition cond, RegisterID left, TrustedImm32 right) { if (((cond == Equal) || (cond == NotEqual)) && !right.m_value) m_assembler.testl_rr(left, left); @@ -864,14 +864,14 @@ public: // Branch based on a 32-bit comparison, forcing the size of the // immediate operand to 32 bits in the native code stream to ensure that // the length of code emitted by this instruction is consistent. - Jump branch32FixedLength(Condition cond, RegisterID left, Imm32 right) + Jump branch32FixedLength(Condition cond, RegisterID left, TrustedImm32 right) { m_assembler.cmpl_ir_force32(right.m_value, left); return Jump(m_assembler.jCC(x86Condition(cond))); } // Branch and record a label after the comparison. - Jump branch32WithPatch(Condition cond, RegisterID left, Imm32 right, DataLabel32 &dataLabel) + Jump branch32WithPatch(Condition cond, RegisterID left, TrustedImm32 right, DataLabel32 &dataLabel) { // Always use cmpl, since the value is to be patched. m_assembler.cmpl_ir_force32(right.m_value, left); @@ -879,7 +879,7 @@ public: return Jump(m_assembler.jCC(x86Condition(cond))); } - Jump branch32WithPatch(Condition cond, Address left, Imm32 right, DataLabel32 &dataLabel) + Jump branch32WithPatch(Condition cond, Address left, TrustedImm32 right, DataLabel32 &dataLabel) { m_assembler.cmpl_im_force32(right.m_value, left.offset, left.base); dataLabel = DataLabel32(this); @@ -898,19 +898,19 @@ public: return Jump(m_assembler.jCC(x86Condition(cond))); } - Jump branch32(Condition cond, Address left, Imm32 right) + Jump branch32(Condition cond, Address left, TrustedImm32 right) { m_assembler.cmpl_im(right.m_value, left.offset, left.base); return Jump(m_assembler.jCC(x86Condition(cond))); } - Jump branch32(Condition cond, BaseIndex left, Imm32 right) + Jump branch32(Condition cond, BaseIndex left, TrustedImm32 right) { m_assembler.cmpl_im(right.m_value, left.offset, left.base, left.index, left.scale); return Jump(m_assembler.jCC(x86Condition(cond))); } - Jump branch32WithUnalignedHalfWords(Condition cond, BaseIndex left, Imm32 right) + Jump branch32WithUnalignedHalfWords(Condition cond, BaseIndex left, TrustedImm32 right) { return branch32(cond, left, right); } @@ -1369,7 +1369,7 @@ private: } #if WTF_CPU_X86 -#if WTF_PLATFORM_MAC +#if WTF_OS_MAC_OS_X // All X86 Macs are guaranteed to support at least SSE2 static bool isSSEPresent() @@ -1382,7 +1382,7 @@ private: return true; } -#else // PLATFORM(MAC) +#else // OS(MAC_OS_X) static bool isSSEPresent() { diff --git a/js/src/assembler/assembler/MacroAssemblerX86_64.h b/js/src/assembler/assembler/MacroAssemblerX86_64.h index 7dadc6bcaf2e..a5038a930e56 100644 --- a/js/src/assembler/assembler/MacroAssemblerX86_64.h +++ b/js/src/assembler/assembler/MacroAssemblerX86_64.h @@ -60,7 +60,7 @@ public: using MacroAssemblerX86Common::storeDouble; using MacroAssemblerX86Common::convertInt32ToDouble; - void add32(Imm32 imm, AbsoluteAddress address) + void add32(TrustedImm32 imm, AbsoluteAddress address) { move(ImmPtr(address.m_ptr), scratchRegister); add32(imm, Address(scratchRegister)); @@ -72,13 +72,13 @@ public: and32(imm, Address(scratchRegister)); } - void or32(Imm32 imm, AbsoluteAddress address) + void or32(TrustedImm32 imm, AbsoluteAddress address) { move(ImmPtr(address.m_ptr), scratchRegister); or32(imm, Address(scratchRegister)); } - void sub32(Imm32 imm, AbsoluteAddress address) + void sub32(TrustedImm32 imm, AbsoluteAddress address) { move(ImmPtr(address.m_ptr), scratchRegister); sub32(imm, Address(scratchRegister)); @@ -114,7 +114,7 @@ public: m_assembler.cvtsq2sd_rr(srcDest, dest); } - void store32(Imm32 imm, void* address) + void store32(TrustedImm32 imm, void* address) { move(X86Registers::eax, scratchRegister); move(imm, X86Registers::eax); @@ -311,7 +311,7 @@ public: m_assembler.movq_rm(src, address.offset, address.base); } - void storePtr(ImmPtr imm, BaseIndex address) + void storePtr(TrustedImmPtr imm, BaseIndex address) { intptr_t value = intptr_t(imm.m_value); @@ -341,7 +341,7 @@ public: } } - void storePtr(ImmPtr imm, ImplicitAddress address) + void storePtr(TrustedImmPtr imm, ImplicitAddress address) { intptr_t value = intptr_t(imm.m_value); @@ -487,7 +487,7 @@ public: return Jump(m_assembler.jCC(x86Condition(cond))); } - DataLabelPtr moveWithPatch(ImmPtr initialValue, RegisterID dest) + DataLabelPtr moveWithPatch(TrustedImmPtr initialValue, RegisterID dest) { m_assembler.movq_i64r(initialValue.asIntptr(), dest); return DataLabelPtr(this); @@ -505,7 +505,7 @@ public: return branchPtr(cond, left, scratchRegister); } - DataLabelPtr storePtrWithPatch(ImmPtr initialValue, ImplicitAddress address) + DataLabelPtr storePtrWithPatch(TrustedImmPtr initialValue, ImplicitAddress address) { DataLabelPtr label = moveWithPatch(initialValue, scratchRegister); storePtr(scratchRegister, address); diff --git a/js/src/assembler/jit/ExecutableAllocator.h b/js/src/assembler/jit/ExecutableAllocator.h index a54a4dab143b..9cca26f48c99 100644 --- a/js/src/assembler/jit/ExecutableAllocator.h +++ b/js/src/assembler/jit/ExecutableAllocator.h @@ -52,16 +52,16 @@ extern "C" void sync_instruction_memory(caddr_t v, u_int len); #endif #endif -#if WTF_PLATFORM_IPHONE +#if WTF_OS_IOS #include #include #endif -#if WTF_PLATFORM_SYMBIAN +#if WTF_OS_SYMBIAN #include #endif -#if WTF_CPU_MIPS && WTF_PLATFORM_LINUX +#if WTF_CPU_MIPS && WTF_OS_LINUX #include #endif @@ -90,7 +90,7 @@ private: struct Allocation { char* pages; size_t size; -#if WTF_PLATFORM_SYMBIAN +#if WTF_OS_SYMBIAN RChunk* chunk; #endif }; @@ -269,6 +269,7 @@ private: return pool; } +public: ExecutablePool* poolForSize(size_t n) { #ifndef DEBUG_STRESS_JSC_ALLOCATOR @@ -327,7 +328,6 @@ private: return pool; } -public: #if ENABLE_ASSEMBLER_WX_EXCLUSIVE static void makeWritable(void* start, size_t size) { @@ -374,13 +374,13 @@ public: _flush_cache(reinterpret_cast(code), size, BCACHE); #endif } -#elif WTF_CPU_ARM_THUMB2 && WTF_PLATFORM_IPHONE +#elif WTF_CPU_ARM_THUMB2 && WTF_OS_IOS static void cacheFlush(void* code, size_t size) { sys_dcache_flush(code, size); sys_icache_invalidate(code, size); } -#elif WTF_CPU_ARM_THUMB2 && WTF_PLATFORM_LINUX +#elif WTF_CPU_ARM_THUMB2 && WTF_IOS static void cacheFlush(void* code, size_t size) { asm volatile ( @@ -396,14 +396,14 @@ public: : "r" (code), "r" (reinterpret_cast(code) + size) : "r0", "r1", "r2"); } -#elif WTF_PLATFORM_SYMBIAN +#elif WTF_OS_SYMBIAN static void cacheFlush(void* code, size_t size) { User::IMB_Range(code, static_cast(code) + size); } -#elif WTF_CPU_ARM_TRADITIONAL && WTF_PLATFORM_LINUX && WTF_COMPILER_RVCT +#elif WTF_CPU_ARM_TRADITIONAL && WTF_OS_LINUX && WTF_COMPILER_RVCT static __asm void cacheFlush(void* code, size_t size); -#elif WTF_CPU_ARM_TRADITIONAL && (WTF_PLATFORM_LINUX || WTF_PLATFORM_ANDROID) && WTF_COMPILER_GCC +#elif WTF_CPU_ARM_TRADITIONAL && (WTF_OS_LINUX || WTF_OS_ANDROID) && WTF_COMPILER_GCC static void cacheFlush(void* code, size_t size) { asm volatile ( diff --git a/js/src/assembler/jit/ExecutableAllocatorOS2.cpp b/js/src/assembler/jit/ExecutableAllocatorOS2.cpp index ef9e27d92b47..675b604ae914 100644 --- a/js/src/assembler/jit/ExecutableAllocatorOS2.cpp +++ b/js/src/assembler/jit/ExecutableAllocatorOS2.cpp @@ -26,7 +26,7 @@ #include "ExecutableAllocator.h" -#if ENABLE_ASSEMBLER && WTF_PLATFORM_OS2 +#if ENABLE_ASSEMBLER && WTF_OS_OS2 #define INCL_DOS #include diff --git a/js/src/assembler/jit/ExecutableAllocatorPosix.cpp b/js/src/assembler/jit/ExecutableAllocatorPosix.cpp index 50efd932e02a..e334626ccc2f 100644 --- a/js/src/assembler/jit/ExecutableAllocatorPosix.cpp +++ b/js/src/assembler/jit/ExecutableAllocatorPosix.cpp @@ -25,7 +25,7 @@ #include "ExecutableAllocator.h" -#if ENABLE_ASSEMBLER && WTF_PLATFORM_UNIX && !WTF_PLATFORM_SYMBIAN +#if ENABLE_ASSEMBLER && WTF_OS_UNIX && !WTF_OS_SYMBIAN #include #include @@ -74,7 +74,7 @@ void ExecutableAllocator::reprotectRegion(void* start, size_t size, ProtectionSe } #endif -#if WTF_CPU_ARM_TRADITIONAL && WTF_PLATFORM_LINUX && WTF_COMPILER_RVCT +#if WTF_CPU_ARM_TRADITIONAL && WTF_OS_LINUX && WTF_COMPILER_RVCT __asm void ExecutableAllocator::cacheFlush(void* code, size_t size) { ARM diff --git a/js/src/assembler/jit/ExecutableAllocatorSymbian.cpp b/js/src/assembler/jit/ExecutableAllocatorSymbian.cpp index c66fa80fff12..f51c0d507877 100644 --- a/js/src/assembler/jit/ExecutableAllocatorSymbian.cpp +++ b/js/src/assembler/jit/ExecutableAllocatorSymbian.cpp @@ -22,7 +22,7 @@ #include "ExecutableAllocator.h" -#if ENABLE_ASSEMBLER && WTF_PLATFORM_SYMBIAN +#if ENABLE_ASSEMBLER && WTF_OS_SYMBIAN #include #include diff --git a/js/src/assembler/jit/ExecutableAllocatorWin.cpp b/js/src/assembler/jit/ExecutableAllocatorWin.cpp index f5775608f36f..da6e756cfa66 100644 --- a/js/src/assembler/jit/ExecutableAllocatorWin.cpp +++ b/js/src/assembler/jit/ExecutableAllocatorWin.cpp @@ -26,7 +26,7 @@ #include "ExecutableAllocator.h" -#if ENABLE_ASSEMBLER && WTF_PLATFORM_WIN_OS +#if ENABLE_ASSEMBLER && WTF_OS_WINDOWS #include "jswin.h" diff --git a/js/src/assembler/wtf/Platform.h b/js/src/assembler/wtf/Platform.h index 68713cd4c810..b8c77f5d6e69 100644 --- a/js/src/assembler/wtf/Platform.h +++ b/js/src/assembler/wtf/Platform.h @@ -1,6 +1,7 @@ /* * Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. * Copyright (C) 2007-2009 Torch Mobile, Inc. + * Copyright (C) Research In Motion Limited 2010. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -27,206 +28,267 @@ #ifndef WTF_Platform_h #define WTF_Platform_h -/* Either XX(YY) --> WTF_XX_YY or XX(YY) --> XX_YY, depending on XX - - PLATFORM(YY) --> WTF_PLATFORM_YY - COMPILER(YY) --> WTF_COMPILER_YY - CPU(YY) --> WTF_CPU_YY - OS(YY) --> WTF_OS_YY - USE(YY) --> WTF_USE_YY - - HAVE(YY) --> HAVE_YY - ENABLE(YY) --> ENABLE_YY -*/ - /* ==== PLATFORM handles OS, operating environment, graphics API, and CPU. This macro will be phased out in favor of platform adaptation macros, policy decision macros, and top-level port definitions. ==== */ -//#define PLATFORM(WTF_FEATURE) (defined(WTF_PLATFORM_##WTF_FEATURE) && WTF_PLATFORM_##WTF_FEATURE) +#define PLATFORM(WTF_FEATURE) (defined WTF_PLATFORM_##WTF_FEATURE && WTF_PLATFORM_##WTF_FEATURE) /* ==== Platform adaptation macros: these describe properties of the target environment. ==== */ /* COMPILER() - the compiler being used to build the project */ -//#define COMPILER(WTF_FEATURE) (defined(WTF_COMPILER_##WTF_FEATURE) && WTF_COMPILER_##WTF_FEATURE) +#define COMPILER(WTF_FEATURE) (defined WTF_COMPILER_##WTF_FEATURE && WTF_COMPILER_##WTF_FEATURE) /* CPU() - the target CPU architecture */ -//#define CPU(WTF_FEATURE) (defined(WTF_CPU_##WTF_FEATURE) && WTF_CPU_##WTF_FEATURE) +#define CPU(WTF_FEATURE) (defined WTF_CPU_##WTF_FEATURE && WTF_CPU_##WTF_FEATURE) /* HAVE() - specific system features (headers, functions or similar) that are present or not */ -//#define HAVE(WTF_FEATURE) (defined(HAVE_##WTF_FEATURE) && HAVE_##WTF_FEATURE) +#define HAVE(WTF_FEATURE) (defined HAVE_##WTF_FEATURE && HAVE_##WTF_FEATURE) /* OS() - underlying operating system; only to be used for mandated low-level services like virtual memory, not to choose a GUI toolkit */ -//#define OS(WTF_FEATURE) (defined(WTF_OS_##WTF_FEATURE) && WTF_OS_##WTF_FEATURE) +#define OS(WTF_FEATURE) (defined WTF_OS_##WTF_FEATURE && WTF_OS_##WTF_FEATURE) /* ==== Policy decision macros: these define policy choices for a particular port. ==== */ /* USE() - use a particular third-party library or optional OS service */ -//#define USE(WTF_FEATURE) (defined(WTF_USE_##WTF_FEATURE) && WTF_USE_##WTF_FEATURE) +#define USE(WTF_FEATURE) (defined WTF_USE_##WTF_FEATURE && WTF_USE_##WTF_FEATURE) /* ENABLE() - turn on a specific feature of WebKit */ -//#define ENABLE(WTF_FEATURE) (defined(ENABLE_##WTF_FEATURE) && ENABLE_##WTF_FEATURE) +#define ENABLE(WTF_FEATURE) (defined ENABLE_##WTF_FEATURE && ENABLE_##WTF_FEATURE) /* ==== COMPILER() - the compiler being used to build the project ==== */ -/* COMPILER(MSVC) Microsoft Visual C++ */ -/* COMPILER(MSVC7) Microsoft Visual C++ v7 or lower*/ +/* WTF_COMPILER_MSVC Microsoft Visual C++ */ +/* WTF_COMPILER_MSVC7_OR_LOWER Microsoft Visual C++ 2003 or lower*/ +/* WTF_COMPILER_MSVC9_OR_LOWER Microsoft Visual C++ 2008 or lower*/ #if defined(_MSC_VER) #define WTF_COMPILER_MSVC 1 #if _MSC_VER < 1400 -#define WTF_COMPILER_MSVC7 1 +#define WTF_COMPILER_MSVC7_OR_LOWER 1 +#elif _MSC_VER < 1600 +#define WTF_COMPILER_MSVC9_OR_LOWER 1 #endif #endif -/* COMPILER(RVCT) - ARM RealView Compilation Tools */ +/* WTF_COMPILER_RVCT - ARM RealView Compilation Tools */ +/* WTF_COMPILER_RVCT4_OR_GREATER - ARM RealView Compilation Tools 4.0 or greater */ #if defined(__CC_ARM) || defined(__ARMCC__) #define WTF_COMPILER_RVCT 1 +#define RVCT_VERSION_AT_LEAST(major, minor, patch, build) (__ARMCC_VERSION >= (major * 100000 + minor * 10000 + patch * 1000 + build)) +#else +/* Define this for !RVCT compilers, just so we can write things like RVCT_VERSION_AT_LEAST(3, 0, 0, 0). */ +#define RVCT_VERSION_AT_LEAST(major, minor, patch, build) 0 #endif -/* COMPILER(GCC) - GNU Compiler Collection */ +/* WTF_COMPILER_GCC - GNU Compiler Collection */ /* --gnu option of the RVCT compiler also defines __GNUC__ */ #if defined(__GNUC__) && !WTF_COMPILER_RVCT #define WTF_COMPILER_GCC 1 #define GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) +#define GCC_VERSION_AT_LEAST(major, minor, patch) (GCC_VERSION >= (major * 10000 + minor * 100 + patch)) +#else +/* Define this for !GCC compilers, just so we can write things like GCC_VERSION_AT_LEAST(4, 1, 0). */ +#define GCC_VERSION_AT_LEAST(major, minor, patch) 0 #endif -/* COMPILER(MINGW) - MinGW GCC */ -#if defined(MINGW) || defined(__MINGW32__) +/* WTF_COMPILER_MINGW - MinGW GCC */ +/* WTF_COMPILER_MINGW64 - mingw-w64 GCC - only used as additional check to exclude mingw.org specific functions */ +#if defined(__MINGW32__) #define WTF_COMPILER_MINGW 1 -#endif +#include <_mingw.h> /* private MinGW header */ + #if defined(__MINGW64_VERSION_MAJOR) /* best way to check for mingw-w64 vs mingw.org */ + #define WTF_COMPILER_MINGW64 1 + #endif /* __MINGW64_VERSION_MAJOR */ +#endif /* __MINGW32__ */ -/* COMPILER(WINSCW) - CodeWarrior for Symbian emulator */ +/* WTF_COMPILER_WINSCW - CodeWarrior for Symbian emulator */ #if defined(__WINSCW__) #define WTF_COMPILER_WINSCW 1 +/* cross-compiling, it is not really windows */ +#undef WIN32 +#undef _WIN32 #endif -/* COMPILER(SUNPRO) - Sun Studio for Solaris */ -#if defined(__SUNPRO_C) || defined(__SUNPRO_CC) -#define WTF_COMPILER_SUNPRO 1 +/* WTF_COMPILER_INTEL - Intel C++ Compiler */ +#if defined(__INTEL_COMPILER) +#define WTF_COMPILER_INTEL 1 #endif +/* WTF_COMPILER_SUNCC */ +#if defined(__SUNPRO_CC) || defined(__SUNPRO_C) +#define WTF_COMPILER_SUNCC 1 +#endif /* ==== CPU() - the target CPU architecture ==== */ -/* This also defines CPU(BIG_ENDIAN) or CPU(MIDDLE_ENDIAN) or neither, as appropriate. */ +/* This also defines WTF_CPU_BIG_ENDIAN or WTF_CPU_MIDDLE_ENDIAN or neither, as appropriate. */ - -/* CPU(ALPHA) - DEC Alpha */ +/* WTF_CPU_ALPHA - DEC Alpha */ #if defined(__alpha__) #define WTF_CPU_ALPHA 1 #endif -/* CPU(IA64) - Itanium / IA-64 */ +/* WTF_CPU_IA64 - Itanium / IA-64 */ #if defined(__ia64__) #define WTF_CPU_IA64 1 +/* 32-bit mode on Itanium */ +#if !defined(__LP64__) +#define WTF_CPU_IA64_32 1 +#endif #endif -/* CPU(PPC) - PowerPC 32-bit */ +/* WTF_CPU_MIPS - MIPS 32-bit */ +/* Note: Only O32 ABI is tested, so we enable it for O32 ABI for now. */ +#if (defined(mips) || defined(__mips__) || defined(MIPS) || defined(_MIPS_)) \ + && defined(_ABIO32) +#define WTF_CPU_MIPS 1 +#if defined(__MIPSEB__) +#define WTF_CPU_BIG_ENDIAN 1 +#endif +#define WTF_MIPS_PIC (defined __PIC__) +#define WTF_MIPS_ARCH __mips +#define WTF_MIPS_ISA(v) (defined WTF_MIPS_ARCH && WTF_MIPS_ARCH == v) +#define WTF_MIPS_ISA_AT_LEAST(v) (defined WTF_MIPS_ARCH && WTF_MIPS_ARCH >= v) +#define WTF_MIPS_ARCH_REV __mips_isa_rev +#define WTF_MIPS_ISA_REV(v) (defined WTF_MIPS_ARCH_REV && WTF_MIPS_ARCH_REV == v) +#define WTF_MIPS_DOUBLE_FLOAT (defined __mips_hard_float && !defined __mips_single_float) +#define WTF_MIPS_FP64 (defined __mips_fpr && __mips_fpr == 64) +/* MIPS requires allocators to use aligned memory */ +#define WTF_USE_ARENA_ALLOC_ALIGNMENT_INTEGER 1 +#endif /* MIPS */ + +/* WTF_CPU_PPC - PowerPC 32-bit */ #if defined(__ppc__) \ - || defined(__PPC__) \ - || defined(__powerpc__) \ - || defined(__powerpc) \ - || defined(__POWERPC__) \ - || defined(_M_PPC) \ - || defined(__PPC) + || defined(__PPC__) \ + || defined(__powerpc__) \ + || defined(__powerpc) \ + || defined(__POWERPC__) \ + || defined(_M_PPC) \ + || defined(__PPC) #define WTF_CPU_PPC 1 #define WTF_CPU_BIG_ENDIAN 1 #endif -/* CPU(PPC64) - PowerPC 64-bit */ +/* WTF_CPU_PPC64 - PowerPC 64-bit */ #if defined(__ppc64__) \ - || defined(__PPC64__) + || defined(__PPC64__) #define WTF_CPU_PPC64 1 #define WTF_CPU_BIG_ENDIAN 1 #endif -/* CPU(SH4) - SuperH SH-4 */ +/* WTF_CPU_SH4 - SuperH SH-4 */ #if defined(__SH4__) #define WTF_CPU_SH4 1 #endif -/* CPU(SPARC32) - SPARC 32-bit */ +/* WTF_CPU_SPARC32 - SPARC 32-bit */ #if defined(__sparc) && !defined(__arch64__) || defined(__sparcv8) #define WTF_CPU_SPARC32 1 #define WTF_CPU_BIG_ENDIAN 1 #endif -/* CPU(SPARC64) - SPARC 64-bit */ +/* WTF_CPU_SPARC64 - SPARC 64-bit */ #if defined(__sparc__) && defined(__arch64__) || defined (__sparcv9) #define WTF_CPU_SPARC64 1 #define WTF_CPU_BIG_ENDIAN 1 #endif -/* CPU(SPARC) - any SPARC, true for CPU(SPARC32) and CPU(SPARC64) */ +/* WTF_CPU_SPARC - any SPARC, true for WTF_CPU_SPARC32 and WTF_CPU_SPARC64 */ #if WTF_CPU_SPARC32 || WTF_CPU_SPARC64 #define WTF_CPU_SPARC 1 #endif -/* CPU(X86) - i386 / x86 32-bit */ +/* WTF_CPU_S390X - S390 64-bit */ +#if defined(__s390x__) +#define WTF_CPU_S390X 1 +#define WTF_CPU_BIG_ENDIAN 1 +#endif + +/* WTF_CPU_S390 - S390 32-bit */ +#if defined(__s390__) +#define WTF_CPU_S390 1 +#define WTF_CPU_BIG_ENDIAN 1 +#endif + +/* WTF_CPU_X86 - i386 / x86 32-bit */ #if defined(__i386__) \ - || defined(i386) \ - || defined(_M_IX86) \ - || defined(_X86_) \ - || defined(__THW_INTEL) + || defined(i386) \ + || defined(_M_IX86) \ + || defined(_X86_) \ + || defined(__THW_INTEL) #define WTF_CPU_X86 1 #endif -/* CPU(X86_64) - AMD64 / Intel64 / x86_64 64-bit */ +/* WTF_CPU_X86_64 - AMD64 / Intel64 / x86_64 64-bit */ #if defined(__x86_64__) \ - || defined(_M_X64) + || defined(_M_X64) #define WTF_CPU_X86_64 1 #endif -/* CPU(ARM) - ARM, any version*/ +/* WTF_CPU_ARM - ARM, any version*/ #if defined(arm) \ - || defined(__arm__) + || defined(__arm__) \ + || defined(ARM) \ + || defined(_ARM_) #define WTF_CPU_ARM 1 -#if defined(__ARMEB__) +#if defined(__ARMEB__) || (WTF_COMPILER_RVCT && defined(__BIG_ENDIAN)) #define WTF_CPU_BIG_ENDIAN 1 #elif !defined(__ARM_EABI__) \ - && !defined(__EABI__) \ - && !defined(__VFP_FP__) \ - && !defined(ANDROID) + && !defined(__EABI__) \ + && !defined(__VFP_FP__) \ + && !defined(_WIN32_WCE) \ + && !defined(ANDROID) #define WTF_CPU_MIDDLE_ENDIAN 1 #endif -#define WTF_ARM_ARCH_AT_LEAST(N) (WTF_CPU_ARM && WTF_ARM_ARCH_VERSION >= N) +#define WTF_ARM_ARCH_AT_LEAST(N) (CPU(ARM) && WTF_ARM_ARCH_VERSION >= N) /* Set WTF_ARM_ARCH_VERSION */ #if defined(__ARM_ARCH_4__) \ - || defined(__ARM_ARCH_4T__) \ - || defined(__MARM_ARMV4__) \ - || defined(_ARMV4I_) + || defined(__ARM_ARCH_4T__) \ + || defined(__MARM_ARMV4__) \ + || defined(_ARMV4I_) #define WTF_ARM_ARCH_VERSION 4 #elif defined(__ARM_ARCH_5__) \ - || defined(__ARM_ARCH_5T__) \ - || defined(__ARM_ARCH_5E__) \ - || defined(__ARM_ARCH_5TE__) \ - || defined(__ARM_ARCH_5TEJ__) \ - || defined(__MARM_ARMV5__) + || defined(__ARM_ARCH_5T__) \ + || defined(__MARM_ARMV5__) #define WTF_ARM_ARCH_VERSION 5 +#elif defined(__ARM_ARCH_5E__) \ + || defined(__ARM_ARCH_5TE__) \ + || defined(__ARM_ARCH_5TEJ__) +#define WTF_ARM_ARCH_VERSION 5 +/*ARMv5TE requires allocators to use aligned memory*/ +#define WTF_USE_ARENA_ALLOC_ALIGNMENT_INTEGER 1 + #elif defined(__ARM_ARCH_6__) \ - || defined(__ARM_ARCH_6J__) \ - || defined(__ARM_ARCH_6K__) \ - || defined(__ARM_ARCH_6Z__) \ - || defined(__ARM_ARCH_6ZK__) \ - || defined(__ARM_ARCH_6T2__) \ - || defined(__ARMV6__) + || defined(__ARM_ARCH_6J__) \ + || defined(__ARM_ARCH_6K__) \ + || defined(__ARM_ARCH_6Z__) \ + || defined(__ARM_ARCH_6ZK__) \ + || defined(__ARM_ARCH_6T2__) \ + || defined(__ARMV6__) #define WTF_ARM_ARCH_VERSION 6 #elif defined(__ARM_ARCH_7A__) \ - || defined(__ARM_ARCH_7R__) + || defined(__ARM_ARCH_7R__) #define WTF_ARM_ARCH_VERSION 7 /* RVCT sets _TARGET_ARCH_ARM */ #elif defined(__TARGET_ARCH_ARM) #define WTF_ARM_ARCH_VERSION __TARGET_ARCH_ARM +#if defined(__TARGET_ARCH_5E) \ + || defined(__TARGET_ARCH_5TE) \ + || defined(__TARGET_ARCH_5TEJ) +/*ARMv5TE requires allocators to use aligned memory*/ +#define WTF_USE_ARENA_ALLOC_ALIGNMENT_INTEGER 1 +#endif + #else #define WTF_ARM_ARCH_VERSION 0 @@ -237,22 +299,22 @@ #define WTF_THUMB_ARCH_VERSION 1 #elif defined(__ARM_ARCH_5T__) \ - || defined(__ARM_ARCH_5TE__) \ - || defined(__ARM_ARCH_5TEJ__) + || defined(__ARM_ARCH_5TE__) \ + || defined(__ARM_ARCH_5TEJ__) #define WTF_THUMB_ARCH_VERSION 2 #elif defined(__ARM_ARCH_6J__) \ - || defined(__ARM_ARCH_6K__) \ - || defined(__ARM_ARCH_6Z__) \ - || defined(__ARM_ARCH_6ZK__) \ - || defined(__ARM_ARCH_6M__) + || defined(__ARM_ARCH_6K__) \ + || defined(__ARM_ARCH_6Z__) \ + || defined(__ARM_ARCH_6ZK__) \ + || defined(__ARM_ARCH_6M__) #define WTF_THUMB_ARCH_VERSION 3 #elif defined(__ARM_ARCH_6T2__) \ - || defined(__ARM_ARCH_7__) \ - || defined(__ARM_ARCH_7A__) \ - || defined(__ARM_ARCH_7R__) \ - || defined(__ARM_ARCH_7M__) + || defined(__ARM_ARCH_7__) \ + || defined(__ARM_ARCH_7A__) \ + || defined(__ARM_ARCH_7R__) \ + || defined(__ARM_ARCH_7M__) #define WTF_THUMB_ARCH_VERSION 4 /* RVCT sets __TARGET_ARCH_THUMB */ @@ -264,23 +326,23 @@ #endif -/* CPU(ARMV5_OR_LOWER) - ARM instruction set v5 or earlier */ +/* WTF_CPU_ARMV5_OR_LOWER - ARM instruction set v5 or earlier */ /* On ARMv5 and below the natural alignment is required. And there are some other differences for v5 or earlier. */ -#if !defined(ARMV5_OR_LOWER) /* && !CPU_ARM_ARCH_AT_LEAST(6) */ +#if !defined(ARMV5_OR_LOWER) && WTF_CPU_ARM && WTF_ARM_ARCH_VERSION >= 6 #define WTF_CPU_ARMV5_OR_LOWER 1 #endif -/* CPU(ARM_TRADITIONAL) - Thumb2 is not available, only traditional ARM (v4 or greater) */ -/* CPU(ARM_THUMB2) - Thumb2 instruction set is available */ +/* WTF_CPU_ARM_TRADITIONAL - Thumb2 is not available, only traditional ARM (v4 or greater) */ +/* WTF_CPU_ARM_THUMB2 - Thumb2 instruction set is available */ /* Only one of these will be defined. */ #if !defined(WTF_CPU_ARM_TRADITIONAL) && !defined(WTF_CPU_ARM_THUMB2) # if defined(thumb2) || defined(__thumb2__) \ - || ((defined(__thumb) || defined(__thumb__)) && WTF_THUMB_ARCH_VERSION == 4) + || ((defined(__thumb) || defined(__thumb__)) && WTF_THUMB_ARCH_VERSION == 4) # define WTF_CPU_ARM_TRADITIONAL 1 # define WTF_CPU_ARM_THUMB2 0 -# elif WTF_ARM_ARCH_AT_LEAST(4) +# elif WTF_CPU_ARM && WTF_ARM_ARCH_VERSION >= 4 # define WTF_CPU_ARM_TRADITIONAL 1 # define WTF_CPU_ARM_THUMB2 0 # else @@ -288,19 +350,36 @@ # endif #elif WTF_CPU_ARM_TRADITIONAL && WTF_CPU_ARM_THUMB2 /* Sanity Check */ # error "Cannot use both of WTF_CPU_ARM_TRADITIONAL and WTF_CPU_ARM_THUMB2 platforms" -#endif // !defined(WTF_CPU_ARM_TRADITIONAL) && !defined(WTF_CPU_ARM_THUMB2) +#endif /* !defined(WTF_CPU_ARM_TRADITIONAL) && !defined(WTF_CPU_ARM_THUMB2) */ + +#if defined(__ARM_NEON__) && !defined(WTF_CPU_ARM_NEON) +#define WTF_CPU_ARM_NEON 1 +#endif #endif /* ARM */ +#if WTF_CPU_ARM || WTF_CPU_MIPS +#define WTF_CPU_NEEDS_ALIGNED_ACCESS 1 +#endif +/* ==== OS() - underlying operating system; only to be used for mandated low-level services like + virtual memory, not to choose a GUI toolkit ==== */ -/* Operating systems - low-level dependencies */ +/* WTF_OS_ANDROID - Android */ +#ifdef ANDROID +#define WTF_OS_ANDROID 1 +#endif -/* PLATFORM(DARWIN) */ -/* Operating system level dependencies for Mac OS X / Darwin that should */ -/* be used regardless of operating environment */ +/* WTF_OS_AIX - AIX */ +#ifdef _AIX +#define WTF_OS_AIX 1 +#endif + +/* WTF_OS_DARWIN - Any Darwin-based OS, including Mac OS X and iPhone OS */ #ifdef __APPLE__ -#define WTF_PLATFORM_DARWIN 1 +#define WTF_OS_DARWIN 1 + +/* FIXME: BUILDING_ON_.., and TARGETING... macros should be folded into the OS() system */ #include #if !defined(MAC_OS_X_VERSION_10_5) || MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_5 #define BUILDING_ON_TIGER 1 @@ -317,98 +396,97 @@ #define TARGETING_SNOW_LEOPARD 1 #endif #include + #endif -/* PLATFORM(WIN_OS) */ -/* Operating system level dependencies for Windows that should be used */ -/* regardless of operating environment */ -#if defined(WIN32) || defined(_WIN32) -#define WTF_PLATFORM_WIN_OS 1 +/* WTF_OS_IOS - iOS */ +/* WTF_OS_MAC_OS_X - Mac OS X (not including iOS) */ +#if WTF_OS_DARWIN && ((defined(TARGET_OS_EMBEDDED) && TARGET_OS_EMBEDDED) \ + || (defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE) \ + || (defined(TARGET_IPHONE_SIMULATOR) && TARGET_IPHONE_SIMULATOR)) +#define WTF_OS_IOS 1 +#elif WTF_OS_DARWIN && defined(TARGET_OS_MAC) && TARGET_OS_MAC +#define WTF_OS_MAC_OS_X 1 #endif -/* PLATFORM(LINUX) */ -/* Operating system level dependencies for Linux-like systems that */ -/* should be used regardless of operating environment */ +/* WTF_OS_FREEBSD - FreeBSD */ +#if defined(__FreeBSD__) || defined(__DragonFly__) +#define WTF_OS_FREEBSD 1 +#endif + +/* WTF_OS_HAIKU - Haiku */ +#ifdef __HAIKU__ +#define WTF_OS_HAIKU 1 +#endif + +/* WTF_OS_LINUX - Linux */ #ifdef __linux__ -#define WTF_PLATFORM_LINUX 1 +#define WTF_OS_LINUX 1 #endif -/* PLATFORM(FREEBSD) */ -/* Operating system level dependencies for FreeBSD-like systems that */ -/* should be used regardless of operating environment */ -#ifdef __FreeBSD__ -#define WTF_PLATFORM_FREEBSD 1 -#endif - -/* PLATFORM(OPENBSD) */ -/* Operating system level dependencies for OpenBSD systems that */ -/* should be used regardless of operating environment */ -#ifdef __OpenBSD__ -#define WTF_PLATFORM_OPENBSD 1 -#endif - -/* PLATFORM(SOLARIS) */ -/* Operating system level dependencies for Solaris that should be used */ -/* regardless of operating environment */ -#if defined(sun) || defined(__sun) -#define WTF_PLATFORM_SOLARIS 1 -#endif - -/* PLATFORM(OS2) */ -/* Operating system level dependencies for OS/2 that should be used */ -/* regardless of operating environment */ -#if defined(OS2) || defined(__OS2__) -#define WTF_PLATFORM_OS2 1 -#endif - -#if defined (__SYMBIAN32__) -/* we are cross-compiling, it is not really windows */ -#undef WTF_PLATFORM_WIN_OS -#undef WTF_PLATFORM_WIN -#define WTF_PLATFORM_SYMBIAN 1 -#endif - - -/* PLATFORM(NETBSD) */ -/* Operating system level dependencies for NetBSD that should be used */ -/* regardless of operating environment */ +/* WTF_OS_NETBSD - NetBSD */ #if defined(__NetBSD__) -#define WTF_PLATFORM_NETBSD 1 +#define WTF_OS_NETBSD 1 #endif -/* PLATFORM(QNX) */ -/* Operating system level dependencies for QNX that should be used */ -/* regardless of operating environment */ +/* WTF_OS_OPENBSD - OpenBSD */ +#ifdef __OpenBSD__ +#define WTF_OS_OPENBSD 1 +#endif + +/* WTF_OS_QNX - QNX */ #if defined(__QNXNTO__) -#define WTF_PLATFORM_QNX 1 +#define WTF_OS_QNX 1 #endif -/* PLATFORM(UNIX) */ -/* Operating system level dependencies for Unix-like systems that */ -/* should be used regardless of operating environment */ -#if WTF_PLATFORM_DARWIN \ - || WTF_PLATFORM_FREEBSD \ - || WTF_PLATFORM_SYMBIAN \ - || WTF_PLATFORM_NETBSD \ - || defined(unix) \ - || defined(__unix) \ - || defined(__unix__) \ - || defined(_AIX) \ - || defined(__HAIKU__) \ - || defined(__QNXNTO__) \ - || defined(ANDROID) -#define WTF_PLATFORM_UNIX 1 +/* WTF_OS_SOLARIS - Solaris */ +#if defined(sun) || defined(__sun) +#define WTF_OS_SOLARIS 1 +#endif + +/* WTF_OS_WINCE - Windows CE; note that for this platform WTF_OS_WINDOWS is also defined */ +#if defined(_WIN32_WCE) +#define WTF_OS_WINCE 1 +#endif + +/* WTF_OS_WINDOWS - Any version of Windows */ +#if defined(WIN32) || defined(_WIN32) +#define WTF_OS_WINDOWS 1 +#endif + +/* WTF_OS_SYMBIAN - Symbian */ +#if defined (__SYMBIAN32__) +#define WTF_OS_SYMBIAN 1 +#endif + +/* WTF_OS_UNIX - Any Unix-like system */ +#if WTF_OS_AIX \ + || WTF_OS_ANDROID \ + || WTF_OS_DARWIN \ + || WTF_OS_FREEBSD \ + || WTF_OS_HAIKU \ + || WTF_OS_LINUX \ + || WTF_OS_NETBSD \ + || WTF_OS_OPENBSD \ + || WTF_OS_QNX \ + || WTF_OS_SOLARIS \ + || WTF_OS_SYMBIAN \ + || defined(unix) \ + || defined(__unix) \ + || defined(__unix__) +#define WTF_OS_UNIX 1 #endif /* Operating environments */ -/* PLATFORM(CHROMIUM) */ -/* PLATFORM(QT) */ -/* PLATFORM(WX) */ -/* PLATFORM(GTK) */ -/* PLATFORM(HAIKU) */ -/* PLATFORM(MAC) */ -/* PLATFORM(WIN) */ +/* FIXME: these are all mixes of OS, operating environment and policy choices. */ +/* WTF_PLATFORM_CHROMIUM */ +/* WTF_PLATFORM_QT */ +/* WTF_PLATFORM_WX */ +/* WTF_PLATFORM_GTK */ +/* WTF_PLATFORM_HAIKU */ +/* WTF_PLATFORM_MAC */ +/* WTF_PLATFORM_WIN */ #if defined(BUILDING_CHROMIUM__) #define WTF_PLATFORM_CHROMIUM 1 #elif defined(BUILDING_QT__) @@ -419,142 +497,229 @@ #define WTF_PLATFORM_GTK 1 #elif defined(BUILDING_HAIKU__) #define WTF_PLATFORM_HAIKU 1 -#elif WTF_PLATFORM_DARWIN +#elif defined(BUILDING_BREWMP__) +#define WTF_PLATFORM_BREWMP 1 +#if defined(AEE_SIMULATOR) +#define WTF_PLATFORM_BREWMP_SIMULATOR 1 +#else +#define WTF_PLATFORM_BREWMP_SIMULATOR 0 +#endif +#undef WTF_OS_WINDOWS +#undef WTF_PLATFORM_WIN +#elif WTF_OS_DARWIN #define WTF_PLATFORM_MAC 1 -#elif WTF_PLATFORM_WIN_OS +#elif WTF_OS_WINDOWS #define WTF_PLATFORM_WIN 1 #endif -/* PLATFORM(IPHONE) */ +/* WTF_PLATFORM_IOS */ +/* FIXME: this is sometimes used as an OS switch and sometimes for higher-level things */ #if (defined(TARGET_OS_EMBEDDED) && TARGET_OS_EMBEDDED) || (defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE) -#define WTF_PLATFORM_IPHONE 1 +#define WTF_PLATFORM_IOS 1 #endif -/* PLATFORM(IPHONE_SIMULATOR) */ +/* WTF_PLATFORM_IOS_SIMULATOR */ #if defined(TARGET_IPHONE_SIMULATOR) && TARGET_IPHONE_SIMULATOR -#define WTF_PLATFORM_IPHONE 1 -#define WTF_PLATFORM_IPHONE_SIMULATOR 1 +#define WTF_PLATFORM_IOS 1 +#define WTF_PLATFORM_IOS_SIMULATOR 1 #else -#define WTF_PLATFORM_IPHONE_SIMULATOR 0 +#define WTF_PLATFORM_IOS_SIMULATOR 0 #endif -#if !defined(WTF_PLATFORM_IPHONE) -#define WTF_PLATFORM_IPHONE 0 +#if !defined(WTF_PLATFORM_IOS) +#define WTF_PLATFORM_IOS 0 #endif -/* PLATFORM(ANDROID) */ +/* WTF_PLATFORM_ANDROID */ +/* FIXME: this is sometimes used as an OS() switch, and other times to drive + policy choices */ #if defined(ANDROID) #define WTF_PLATFORM_ANDROID 1 #endif /* Graphics engines */ -/* PLATFORM(CG) and PLATFORM(CI) */ -#if WTF_PLATFORM_MAC || WTF_PLATFORM_IPHONE -#define WTF_PLATFORM_CG 1 +/* WTF_USE_CG and WTF_PLATFORM_CI */ +#if WTF_PLATFORM_MAC || WTF_PLATFORM_IOS +#define WTF_USE_CG 1 #endif -#if WTF_PLATFORM_MAC && !WTF_PLATFORM_IPHONE -#define WTF_PLATFORM_CI 1 +#if WTF_PLATFORM_MAC || WTF_PLATFORM_IOS || (WTF_PLATFORM_WIN && WTF_USE_CG) +#define WTF_USE_CA 1 #endif -/* PLATFORM(SKIA) for Win/Linux, CG/CI for Mac */ +/* WTF_USE_SKIA for Win/Linux, CG for Mac */ #if WTF_PLATFORM_CHROMIUM -#if WTF_PLATFORM_DARWIN -#define WTF_PLATFORM_CG 1 -#define WTF_PLATFORM_CI 1 +#if WTF_OS_DARWIN +#define WTF_USE_CG 1 #define WTF_USE_ATSUI 1 #define WTF_USE_CORE_TEXT 1 +#define WTF_USE_ICCJPEG 1 #else -#define WTF_PLATFORM_SKIA 1 +#define WTF_USE_SKIA 1 +#define WTF_USE_CHROMIUM_NET 1 #endif #endif +#if WTF_PLATFORM_BREWMP +#define WTF_USE_SKIA 1 +#endif + #if WTF_PLATFORM_GTK -#define WTF_PLATFORM_CAIRO 1 +#define WTF_USE_CAIRO 1 #endif -#if (WTF_PLATFORM_IPHONE || WTF_PLATFORM_MAC || WTF_PLATFORM_WIN || WTF_PLATFORM_OS2 || (WTF_PLATFORM_QT && WTF_PLATFORM_DARWIN && !ENABLE_SINGLE_THREADED)) && !defined(ENABLE_JSC_MULTIPLE_THREADS) +#if WTF_OS_WINCE +#include +#define WTF_USE_MERSENNE_TWISTER_19937 1 +#endif + +#if WTF_PLATFORM_QT && WTF_OS_UNIX && !WTF_OS_SYMBIAN && !WTF_OS_DARWIN +#define WTF_USE_PTHREAD_BASED_QT 1 +#endif + +#if (WTF_PLATFORM_GTK || WTF_PLATFORM_IOS || WTF_PLATFORM_MAC || WTF_PLATFORM_WIN || (WTF_PLATFORM_QT && (WTF_OS_DARWIN || WTF_USE_PTHREAD_BASED_QT) && !ENABLE_SINGLE_THREADED)) && !defined(ENABLE_JSC_MULTIPLE_THREADS) #define ENABLE_JSC_MULTIPLE_THREADS 1 #endif +#if ENABLE_JSC_MULTIPLE_THREADS +#define ENABLE_WTF_MULTIPLE_THREADS 1 +#endif + /* On Windows, use QueryPerformanceCounter by default */ -#if WTF_PLATFORM_WIN_OS +#if WTF_OS_WINDOWS #define WTF_USE_QUERY_PERFORMANCE_COUNTER 1 #endif +#if WTF_OS_WINCE && !WTF_PLATFORM_QT +#define NOMINMAX /* Windows min and max conflict with standard macros */ +#define NOSHLWAPI /* shlwapi.h not available on WinCe */ + +/* MSDN documentation says these functions are provided with uspce.lib. But we cannot find this file. */ +#define __usp10__ /* disable "usp10.h" */ + +#define _INC_ASSERT /* disable "assert.h" */ +#define assert(x) + +#endif /* WTF_OS_WINCE && !WTF_PLATFORM_QT */ + #if WTF_PLATFORM_QT #define WTF_USE_QT4_UNICODE 1 +#elif WTF_OS_WINCE +#define WTF_USE_WINCE_UNICODE 1 +#elif WTF_PLATFORM_BREWMP +#define WTF_USE_BREWMP_UNICODE 1 #elif WTF_PLATFORM_GTK /* The GTK+ Unicode backend is configurable */ #else #define WTF_USE_ICU_UNICODE 1 #endif -#if WTF_PLATFORM_MAC && !WTF_PLATFORM_IPHONE -#define WTF_PLATFORM_CF 1 -#define WTF_USE_PTHREADS 1 -#define HAVE_PTHREAD_RWLOCK 1 +#if WTF_PLATFORM_MAC && !WTF_PLATFORM_IOS #if !defined(BUILDING_ON_LEOPARD) && !defined(BUILDING_ON_TIGER) && WTF_CPU_X86_64 #define WTF_USE_PLUGIN_HOST_PROCESS 1 #endif -#if !defined(ENABLE_MAC_JAVA_BRIDGE) -#define ENABLE_MAC_JAVA_BRIDGE 1 +#if !defined(BUILDING_ON_TIGER) && !defined(BUILDING_ON_LEOPARD) && !defined(BUILDING_ON_SNOW_LEOPARD) +#define ENABLE_GESTURE_EVENTS 1 +#define ENABLE_RUBBER_BANDING 1 +#define WTF_USE_WK_SCROLLBAR_PAINTER 1 +#endif +#if !defined(ENABLE_JAVA_BRIDGE) +#define ENABLE_JAVA_BRIDGE 1 #endif #if !defined(ENABLE_DASHBOARD_SUPPORT) #define ENABLE_DASHBOARD_SUPPORT 1 #endif +#define WTF_USE_CF 1 +#define WTF_USE_PTHREADS 1 +#define HAVE_PTHREAD_RWLOCK 1 #define HAVE_READLINE 1 #define HAVE_RUNLOOP_TIMER 1 -#endif /* PLATFORM(MAC) && !PLATFORM(IPHONE) */ +#define ENABLE_FULLSCREEN_API 1 +#define ENABLE_SMOOTH_SCROLLING 1 +#define ENABLE_WEB_ARCHIVE 1 +#endif /* WTF_PLATFORM_MAC && !WTF_PLATFORM_IOS */ -#if WTF_PLATFORM_CHROMIUM && WTF_PLATFORM_DARWIN -#define WTF_PLATFORM_CF 1 +#if WTF_PLATFORM_CHROMIUM && WTF_OS_DARWIN +#define WTF_USE_CF 1 #define WTF_USE_PTHREADS 1 #define HAVE_PTHREAD_RWLOCK 1 #endif -#if WTF_PLATFORM_QT && WTF_PLATFORM_DARWIN -#define WTF_PLATFORM_CF 1 +#if WTF_PLATFORM_BREWMP +#define ENABLE_SINGLE_THREADED 1 #endif -#if WTF_PLATFORM_IPHONE +#if WTF_PLATFORM_QT && WTF_OS_DARWIN +#define WTF_USE_CF 1 +#endif + +#if WTF_OS_DARWIN && !defined(BUILDING_ON_TIGER) && !WTF_PLATFORM_GTK && !WTF_PLATFORM_QT +#define ENABLE_PURGEABLE_MEMORY 1 +#endif + +#if WTF_PLATFORM_IOS #define ENABLE_CONTEXT_MENUS 0 #define ENABLE_DRAG_SUPPORT 0 +#define ENABLE_DATA_TRANSFER_ITEMS 0 #define ENABLE_FTPDIR 1 #define ENABLE_GEOLOCATION 1 #define ENABLE_ICONDATABASE 0 #define ENABLE_INSPECTOR 0 -#define ENABLE_MAC_JAVA_BRIDGE 0 +#define ENABLE_JAVA_BRIDGE 0 #define ENABLE_NETSCAPE_PLUGIN_API 0 #define ENABLE_ORIENTATION_EVENTS 1 #define ENABLE_REPAINT_THROTTLING 1 #define HAVE_READLINE 1 -#define WTF_PLATFORM_CF 1 +#define WTF_USE_CF 1 #define WTF_USE_PTHREADS 1 #define HAVE_PTHREAD_RWLOCK 1 +#define ENABLE_WEB_ARCHIVE 1 #endif #if WTF_PLATFORM_ANDROID #define WTF_USE_PTHREADS 1 -#define WTF_PLATFORM_SGL 1 #define USE_SYSTEM_MALLOC 1 -#define ENABLE_MAC_JAVA_BRIDGE 1 +#define ENABLE_JAVA_BRIDGE 1 #define LOG_DISABLED 1 -// Prevents Webkit from drawing the caret in textfields and textareas -// This prevents unnecessary invals. +/* Prevents Webkit from drawing the caret in textfields and textareas + This prevents unnecessary invals. */ #define ENABLE_TEXT_CARET 1 #define ENABLE_JAVASCRIPT_DEBUGGER 0 +#if !defined(ENABLE_JIT) && !ENABLE_ANDROID_JSC_JIT +#define ENABLE_JIT 0 +#endif #endif -#if WTF_PLATFORM_WIN -#define WTF_USE_WININET 1 +#if WTF_PLATFORM_WIN && !WTF_OS_WINCE +#define WTF_USE_CF 1 +#define WTF_USE_PTHREADS 0 +#endif + +#if WTF_PLATFORM_WIN && !WTF_OS_WINCE && !WTF_PLATFORM_CHROMIUM && !defined(WIN_CAIRO) +#define WTF_USE_CFNETWORK 1 +#endif + +#if WTF_USE_CFNETWORK || WTF_PLATFORM_MAC +#define WTF_USE_CFURLCACHE 1 +#define WTF_USE_CFURLSTORAGESESSIONS 1 +#endif + +#if WTF_PLATFORM_WIN && !WTF_OS_WINCE && !WTF_PLATFORM_CHROMIUM && !WTF_PLATFORM_QT +#define ENABLE_WEB_ARCHIVE 1 #endif #if WTF_PLATFORM_WX #define ENABLE_ASSEMBLER 1 -#if WTF_PLATFORM_DARWIN -#define WTF_PLATFORM_CF 1 +#define ENABLE_GLOBAL_FASTMALLOC_NEW 0 +#if WTF_OS_DARWIN +#define WTF_USE_CF 1 +#ifndef BUILDING_ON_TIGER +#define WTF_USE_CORE_TEXT 1 +#define ENABLE_WEB_ARCHIVE 1 +#else +#define WTF_USE_ATSUI 1 +#endif #endif #endif @@ -574,25 +739,39 @@ #define ENABLE_NETSCAPE_PLUGIN_API 0 #endif +#if WTF_PLATFORM_BREWMP +#define USE_SYSTEM_MALLOC 1 +#endif + +#if WTF_PLATFORM_BREWMP_SIMULATOR +#define ENABLE_JIT 0 +#endif + #if !defined(HAVE_ACCESSIBILITY) -#if WTF_PLATFORM_IPHONE || WTF_PLATFORM_MAC || WTF_PLATFORM_WIN || WTF_PLATFORM_GTK || WTF_PLATFORM_CHROMIUM +#if WTF_PLATFORM_IOS || WTF_PLATFORM_MAC || WTF_PLATFORM_WIN || WTF_PLATFORM_GTK || WTF_PLATFORM_CHROMIUM #define HAVE_ACCESSIBILITY 1 #endif #endif /* !defined(HAVE_ACCESSIBILITY) */ -#if WTF_PLATFORM_UNIX && !WTF_PLATFORM_SYMBIAN +#if WTF_OS_UNIX && !WTF_OS_SYMBIAN #define HAVE_SIGNAL_H 1 #endif -#if !WTF_PLATFORM_WIN_OS && !WTF_PLATFORM_SOLARIS && !WTF_PLATFORM_QNX \ - && !WTF_PLATFORM_SYMBIAN && !WTF_PLATFORM_HAIKU && !WTF_COMPILER_RVCT \ - && !WTF_PLATFORM_ANDROID && !WTF_PLATFORM_OS2 +#if !defined(HAVE_STRNSTR) +#if WTF_OS_DARWIN || WTF_OS_FREEBSD +#define HAVE_STRNSTR 1 +#endif +#endif + +#if !WTF_OS_WINDOWS && !WTF_OS_SOLARIS && !WTF_OS_QNX \ + && !WTF_OS_SYMBIAN && !WTF_OS_HAIKU && !WTF_OS_RVCT \ + && !WTF_OS_ANDROID && !WTF_PLATFORM_BREWMP #define HAVE_TM_GMTOFF 1 #define HAVE_TM_ZONE 1 #define HAVE_TIMEGM 1 -#endif +#endif -#if WTF_PLATFORM_DARWIN +#if WTF_OS_DARWIN #define HAVE_ERRNO_H 1 #define HAVE_LANGINFO_H 1 @@ -603,23 +782,37 @@ #define HAVE_SYS_PARAM_H 1 #define HAVE_SYS_TIME_H 1 #define HAVE_SYS_TIMEB_H 1 +#define WTF_USE_ACCELERATE 1 -#if !defined(BUILDING_ON_TIGER) && !defined(BUILDING_ON_LEOPARD) && !WTF_PLATFORM_IPHONE && !WTF_PLATFORM_QT +#if !defined(TARGETING_TIGER) && !defined(TARGETING_LEOPARD) + +#define HAVE_DISPATCH_H 1 +#define HAVE_HOSTED_CORE_ANIMATION 1 + +#if !WTF_PLATFORM_IOS #define HAVE_MADV_FREE_REUSE 1 #define HAVE_MADV_FREE 1 #define HAVE_PTHREAD_SETNAME_NP 1 #endif -#if WTF_PLATFORM_IPHONE +#endif + +#if WTF_PLATFORM_IOS #define HAVE_MADV_FREE 1 #endif -#elif WTF_PLATFORM_WIN_OS +#elif WTF_OS_WINDOWS +#if WTF_OS_WINCE +#define HAVE_ERRNO_H 0 +#else #define HAVE_SYS_TIMEB_H 1 +#define HAVE_ALIGNED_MALLOC 1 +#define HAVE_ISDEBUGGERPRESENT 1 +#endif #define HAVE_VIRTUALALLOC 1 -#elif WTF_PLATFORM_SYMBIAN +#elif WTF_OS_SYMBIAN #define HAVE_ERRNO_H 1 #define HAVE_MMAP 0 @@ -632,7 +825,11 @@ #define HAVE_SYS_PARAM_H 1 #endif -#elif WTF_PLATFORM_QNX +#elif WTF_PLATFORM_BREWMP + +#define HAVE_ERRNO_H 1 + +#elif WTF_OS_QNX #define HAVE_ERRNO_H 1 #define HAVE_MMAP 1 @@ -641,7 +838,7 @@ #define HAVE_SYS_PARAM_H 1 #define HAVE_SYS_TIME_H 1 -#elif WTF_PLATFORM_ANDROID +#elif WTF_OS_ANDROID #define HAVE_ERRNO_H 1 #define HAVE_LANGINFO_H 0 @@ -651,23 +848,13 @@ #define HAVE_SYS_PARAM_H 1 #define HAVE_SYS_TIME_H 1 -#elif WTF_PLATFORM_OS2 - -#define HAVE_MMAP 1 -#define ENABLE_ASSEMBLER 1 -#define HAVE_ERRNO_H 1 -#define HAVE_STRINGS_H 1 -#define HAVE_SYS_PARAM_H 1 -#define HAVE_SYS_TIME_H 1 -#define HAVE_SYS_TIMEB_H 1 - #else /* FIXME: is this actually used or do other platforms generate their own config.h? */ #define HAVE_ERRNO_H 1 /* As long as Haiku doesn't have a complete support of locale this will be disabled. */ -#if !WTF_PLATFORM_HAIKU +#if !WTF_OS_HAIKU #define HAVE_LANGINFO_H 1 #endif #define HAVE_MMAP 1 @@ -680,6 +867,14 @@ /* ENABLE macro defaults */ +#if WTF_PLATFORM_QT +/* We must not customize the global operator new and delete for the Qt port. */ +#define ENABLE_GLOBAL_FASTMALLOC_NEW 0 +#if !WTF_OS_UNIX || WTF_OS_SYMBIAN +#define USE_SYSTEM_MALLOC 1 +#endif +#endif + /* fastMalloc match validation allows for runtime verification that new is matched by delete, fastMalloc is matched by fastFree, etc. */ #if !defined(ENABLE_FAST_MALLOC_MATCH_VALIDATION) @@ -710,6 +905,10 @@ #define ENABLE_DRAG_SUPPORT 1 #endif +#if !defined(ENABLE_DATA_TRANSFER_ITEMS) +#define ENABLE_DATA_TRANSFER_ITEMS 0 +#endif + #if !defined(ENABLE_DASHBOARD_SUPPORT) #define ENABLE_DASHBOARD_SUPPORT 0 #endif @@ -718,14 +917,22 @@ #define ENABLE_INSPECTOR 1 #endif -#if !defined(ENABLE_MAC_JAVA_BRIDGE) -#define ENABLE_MAC_JAVA_BRIDGE 0 +#if !defined(ENABLE_JAVA_BRIDGE) +#define ENABLE_JAVA_BRIDGE 0 #endif #if !defined(ENABLE_NETSCAPE_PLUGIN_API) #define ENABLE_NETSCAPE_PLUGIN_API 1 #endif +#if !defined(ENABLE_NETSCAPE_PLUGIN_METADATA_CACHE) +#define ENABLE_NETSCAPE_PLUGIN_METADATA_CACHE 0 +#endif + +#if !defined(ENABLE_PURGEABLE_MEMORY) +#define ENABLE_PURGEABLE_MEMORY 0 +#endif + #if !defined(WTF_USE_PLUGIN_HOST_PROCESS) #define WTF_USE_PLUGIN_HOST_PROCESS 0 #endif @@ -738,6 +945,11 @@ #define ENABLE_OPCODE_STATS 0 #endif +#if !defined(ENABLE_GLOBAL_FASTMALLOC_NEW) +#define ENABLE_GLOBAL_FASTMALLOC_NEW 1 +#endif + +#define ENABLE_DEBUG_WITH_BREAKPOINT 0 #define ENABLE_SAMPLING_COUNTERS 0 #define ENABLE_SAMPLING_FLAGS 0 #define ENABLE_OPCODE_SAMPLING 0 @@ -753,10 +965,18 @@ #define ENABLE_GEOLOCATION 0 #endif +#if !defined(ENABLE_GESTURE_RECOGNIZER) +#define ENABLE_GESTURE_RECOGNIZER 0 +#endif + #if !defined(ENABLE_NOTIFICATIONS) #define ENABLE_NOTIFICATIONS 0 #endif +#if WTF_PLATFORM_IOS +#define ENABLE_TEXT_CARET 0 +#endif + #if !defined(ENABLE_TEXT_CARET) #define ENABLE_TEXT_CARET 1 #endif @@ -765,80 +985,88 @@ #define ENABLE_ON_FIRST_TEXTAREA_FOCUS_SELECT_ALL 0 #endif -#if !defined(WTF_USE_JSVALUE64) && !defined(WTF_USE_JSVALUE32) && !defined(WTF_USE_JSVALUE32_64) -#if (WTF_CPU_X86_64 && (WTF_PLATFORM_UNIX || WTF_PLATFORM_WIN_OS)) || WTF_CPU_IA64 || WTF_CPU_ALPHA +#if !defined(ENABLE_FULLSCREEN_API) +#define ENABLE_FULLSCREEN_API 0 +#endif + +#if !defined(WTF_USE_JSVALUE64) && !defined(WTF_USE_JSVALUE32_64) +#if (WTF_CPU_X86_64 && (WTF_OS_UNIX || WTF_OS_WINDOWS)) \ + || (WTF_CPU_IA64 && !WTF_CPU_IA64_32) \ + || WTF_CPU_ALPHA \ + || WTF_CPU_SPARC64 \ + || WTF_CPU_S390X \ + || WTF_CPU_PPC64 #define WTF_USE_JSVALUE64 1 -#elif WTF_CPU_ARM || WTF_CPU_PPC64 -#define WTF_USE_JSVALUE32 1 -#elif WTF_PLATFORM_WIN_OS && WTF_COMPILER_MINGW -/* Using JSVALUE32_64 causes padding/alignement issues for JITStubArg -on MinGW. See https://bugs.webkit.org/show_bug.cgi?id=29268 */ -#define WTF_USE_JSVALUE32 1 #else #define WTF_USE_JSVALUE32_64 1 #endif -#endif /* !defined(WTF_USE_JSVALUE64) && !defined(WTF_USE_JSVALUE32) && !defined(WTF_USE_JSVALUE32_64) */ +#endif /* !defined(WTF_USE_JSVALUE64) && !defined(WTF_USE_JSVALUE32_64) */ #if !defined(ENABLE_REPAINT_THROTTLING) #define ENABLE_REPAINT_THROTTLING 0 #endif -#if !defined(ENABLE_JIT) - -/* The JIT is tested & working on x86_64 Mac */ -#if WTF_CPU_X86_64 && WTF_PLATFORM_MAC - #define ENABLE_JIT 1 -/* The JIT is tested & working on x86 Mac */ -#elif WTF_CPU_X86 && WTF_PLATFORM_MAC - #define ENABLE_JIT 1 - #define WTF_USE_JIT_STUB_ARGUMENT_VA_LIST 1 -#elif WTF_CPU_ARM_THUMB2 && WTF_PLATFORM_IPHONE - #define ENABLE_JIT 1 -/* The JIT is tested & working on x86 OS/2 */ -#elif WTF_CPU_X86 && WTF_PLATFORM_OS2 - #define ENABLE_JIT 1 -/* The JIT is tested & working on x86 Windows */ -#elif WTF_CPU_X86 && WTF_PLATFORM_WIN - #define ENABLE_JIT 1 -#elif WTF_CPU_SPARC - #define ENABLE_JIT 1 +/* Disable the JIT on versions of GCC prior to 4.1 */ +#if !defined(ENABLE_JIT) && WTF_COMPILER_GCC && !GCC_VERSION_AT_LEAST(4, 1, 0) +#define ENABLE_JIT 0 #endif -#if WTF_PLATFORM_QT -#if WTF_CPU_X86_64 && WTF_PLATFORM_DARWIN - #define ENABLE_JIT 1 -#elif WTF_CPU_X86 && WTF_PLATFORM_DARWIN - #define ENABLE_JIT 1 - #define WTF_USE_JIT_STUB_ARGUMENT_VA_LIST 1 -#elif WTF_CPU_X86 && WTF_PLATFORM_WIN_OS && WTF_COMPILER_MINGW && GCC_VERSION >= 40100 - #define ENABLE_JIT 1 - #define WTF_USE_JIT_STUB_ARGUMENT_VA_LIST 1 -#elif WTF_CPU_X86 && WTF_PLATFORM_WIN_OS && WTF_COMPILER_MSVC - #define ENABLE_JIT 1 - #define WTF_USE_JIT_STUB_ARGUMENT_REGISTER 1 -#elif WTF_CPU_X86 && WTF_PLATFORM_LINUX && GCC_VERSION >= 40100 - #define ENABLE_JIT 1 - #define WTF_USE_JIT_STUB_ARGUMENT_VA_LIST 1 -#elif WTF_CPU_ARM_TRADITIONAL && WTF_PLATFORM_LINUX - #define ENABLE_JIT 1 +/* JIT is not implemented for 64 bit on MSVC */ +#if !defined(ENABLE_JIT) && WTF_COMPILER_MSVC && WTF_CPU_X86_64 +#define ENABLE_JIT 0 #endif -#endif /* PLATFORM(QT) */ -#endif /* !defined(ENABLE_JIT) */ +/* The JIT is enabled by default on all x86, x64-64, ARM & MIPS platforms. */ +#if !defined(ENABLE_JIT) \ + && (WTF_CPU_X86 || WTF_CPU_X86_64 || WTF_CPU_ARM || WTF_CPU_MIPS) \ + && (WTF_OS_DARWIN || !WTF_COMPILER_GCC || GCC_VERSION_AT_LEAST(4, 1, 0)) \ + && !WTF_OS_WINCE +#define ENABLE_JIT 1 +#endif +/* Currently only implemented for JSVALUE64, only tested on WTF_PLATFORM_MAC */ +#if ENABLE_JIT && WTF_USE_JSVALUE64 && WTF_PLATFORM_MAC +#define ENABLE_DFG_JIT 1 +/* Enabled with restrictions to circumvent known performance regressions. */ +#define ENABLE_DFG_JIT_RESTRICTIONS 1 +#endif + +/* Ensure that either the JIT or the interpreter has been enabled. */ +#if !defined(ENABLE_INTERPRETER) && !ENABLE_JIT +#define ENABLE_INTERPRETER 1 +#endif +#if !(ENABLE_JIT || ENABLE_INTERPRETER) +#error You have to have at least one execution model enabled to build JSC +#endif + +#if WTF_CPU_SH4 && WTF_PLATFORM_QT +#define ENABLE_JIT 1 +#define ENABLE_YARR 1 +#define ENABLE_YARR_JIT 1 +#define WTF_USE_JIT_STUB_ARGUMENT_REGISTER 1 +#define ENABLE_ASSEMBLER 1 +#endif + +/* Configure the JIT */ #if ENABLE_JIT -#ifndef ENABLE_JIT_OPTIMIZE_CALL -#define ENABLE_JIT_OPTIMIZE_CALL 1 -#endif -#ifndef ENABLE_JIT_OPTIMIZE_NATIVE_CALL -#define ENABLE_JIT_OPTIMIZE_NATIVE_CALL 1 -#endif -#ifndef ENABLE_JIT_OPTIMIZE_PROPERTY_ACCESS -#define ENABLE_JIT_OPTIMIZE_PROPERTY_ACCESS 1 -#endif -#ifndef ENABLE_JIT_OPTIMIZE_METHOD_CALLS -#define ENABLE_JIT_OPTIMIZE_METHOD_CALLS 1 -#endif + #if WTF_CPU_ARM + #if !defined(ENABLE_JIT_USE_SOFT_MODULO) && WTF_CPU_ARM && WTF_ARM_ARCH_VERSION >= 5 + #define ENABLE_JIT_USE_SOFT_MODULO 1 + #endif + #endif + + #ifndef ENABLE_JIT_OPTIMIZE_CALL + #define ENABLE_JIT_OPTIMIZE_CALL 1 + #endif + #ifndef ENABLE_JIT_OPTIMIZE_NATIVE_CALL + #define ENABLE_JIT_OPTIMIZE_NATIVE_CALL 1 + #endif + #ifndef ENABLE_JIT_OPTIMIZE_PROPERTY_ACCESS + #define ENABLE_JIT_OPTIMIZE_PROPERTY_ACCESS 1 + #endif + #ifndef ENABLE_JIT_OPTIMIZE_METHOD_CALLS + #define ENABLE_JIT_OPTIMIZE_METHOD_CALLS 1 + #endif #endif #if WTF_CPU_X86 && WTF_COMPILER_MSVC @@ -849,80 +1077,89 @@ on MinGW. See https://bugs.webkit.org/show_bug.cgi?id=29268 */ #define JSC_HOST_CALL #endif -#if WTF_COMPILER_GCC && !ENABLE_JIT +/* Configure the interpreter */ +#if WTF_COMPILER_GCC || (RVCT_VERSION_AT_LEAST(4, 0, 0, 0) && defined(__GNUC__)) #define HAVE_COMPUTED_GOTO 1 #endif - -#if ENABLE_JIT && defined(COVERAGE) - #define WTF_USE_INTERPRETER 0 -#else - #define WTF_USE_INTERPRETER 1 +#if HAVE_COMPUTED_GOTO && ENABLE_INTERPRETER +#define ENABLE_COMPUTED_GOTO_INTERPRETER 1 #endif -/* Yet Another Regex Runtime. */ -#if !defined(ENABLE_YARR_JIT) +/* Regular Expression Tracing - Set to 1 to trace RegExp's in jsc. Results dumped at exit */ +#define ENABLE_REGEXP_TRACING 0 -/* YARR supports x86 & x86-64, and has been tested on Mac and Windows. */ -#if (WTF_CPU_X86 \ - || WTF_CPU_X86_64 \ - || WTF_CPU_SPARC \ - || WTF_CPU_ARM_TRADITIONAL \ - || WTF_CPU_ARM_THUMB2 \ - || WTF_CPU_X86) -#define ENABLE_YARR_JIT 1 -#else +/* Yet Another Regex Runtime - turned on by default for JIT enabled ports. */ +#if WTF_PLATFORM_CHROMIUM #define ENABLE_YARR_JIT 0 + +#elif ENABLE_JIT && !defined(ENABLE_YARR_JIT) +#define ENABLE_YARR_JIT 1 + +/* Setting this flag compares JIT results with interpreter results. */ +#define ENABLE_YARR_JIT_DEBUG 0 #endif -#endif /* !defined(ENABLE_YARR_JIT) */ - -#if (ENABLE_JIT || ENABLE_YARR_JIT) +#if ENABLE_JIT || ENABLE_YARR_JIT #define ENABLE_ASSEMBLER 1 #endif /* Setting this flag prevents the assembler from using RWX memory; this may improve security but currectly comes at a significant performance cost. */ -#if WTF_PLATFORM_IPHONE +#if WTF_PLATFORM_IOS #define ENABLE_ASSEMBLER_WX_EXCLUSIVE 1 -#else -#define ENABLE_ASSEMBLER_WX_EXCLUSIVE 0 #endif -#if !defined(ENABLE_PAN_SCROLLING) && WTF_PLATFORM_WIN_OS +/* Pick which allocator to use; we only need an executable allocator if the assembler is compiled in. + On x86-64 we use a single fixed mmap, on other platforms we mmap on demand. */ +#if ENABLE_ASSEMBLER +#if WTF_CPU_X86_64 +#define ENABLE_EXECUTABLE_ALLOCATOR_FIXED 1 +#else +#define ENABLE_EXECUTABLE_ALLOCATOR_DEMAND 1 +#endif +#endif + +#if !defined(ENABLE_PAN_SCROLLING) && WTF_OS_WINDOWS #define ENABLE_PAN_SCROLLING 1 #endif -/* Use the QXmlStreamReader implementation for XMLTokenizer */ +#if !defined(ENABLE_SMOOTH_SCROLLING) +#define ENABLE_SMOOTH_SCROLLING 0 +#endif + +#if !defined(ENABLE_WEB_ARCHIVE) +#define ENABLE_WEB_ARCHIVE 0 +#endif + +/* Use the QXmlStreamReader implementation for XMLDocumentParser */ /* Use the QXmlQuery implementation for XSLTProcessor */ #if WTF_PLATFORM_QT #define WTF_USE_QXMLSTREAM 1 #define WTF_USE_QXMLQUERY 1 #endif -#if !WTF_PLATFORM_QT -#define WTF_USE_FONT_FAST_PATH 1 +#if WTF_PLATFORM_MAC +/* Complex text framework */ +#if !defined(BUILDING_ON_TIGER) && !defined(BUILDING_ON_LEOPARD) +#define WTF_USE_ATSUI 0 +#define WTF_USE_CORE_TEXT 1 +#else +#define WTF_USE_ATSUI 1 +#define WTF_USE_CORE_TEXT 0 +#endif #endif /* Accelerated compositing */ -#if WTF_PLATFORM_MAC -#if !defined(BUILDING_ON_TIGER) -#define WTF_USE_ACCELERATED_COMPOSITING 1 -#endif -#endif - -#if WTF_PLATFORM_IPHONE +#if (WTF_PLATFORM_MAC && !defined(BUILDING_ON_TIGER)) || WTF_PLATFORM_IOS || WTF_PLATFORM_QT || (WTF_PLATFORM_WIN && !WTF_OS_WINCE &&!defined(WIN_CAIRO)) #define WTF_USE_ACCELERATED_COMPOSITING 1 #endif -/* FIXME: Defining ENABLE_3D_RENDERING here isn't really right, but it's always used with - with WTF_USE_ACCELERATED_COMPOSITING, and it allows the feature to be turned on and - off in one place. */ -//#if WTF_PLATFORM_WIN -//#include "QuartzCorePresent.h" -//#if QUARTZCORE_PRESENT -//#define WTF_USE_ACCELERATED_COMPOSITING 1 -//#define ENABLE_3D_RENDERING 1 -//#endif -//#endif +#if (WTF_PLATFORM_MAC && !defined(BUILDING_ON_TIGER) && !defined(BUILDING_ON_LEOPARD)) || WTF_PLATFORM_IOS +#define WTF_USE_PROTECTION_SPACE_AUTH_CALLBACK 1 +#endif + +#if WTF_PLATFORM_MAC && !defined(BUILDING_ON_TIGER) && !defined(BUILDING_ON_LEOPARD) && !defined(BUILDING_ON_SNOW_LEOPARD) +#define WTF_USE_AVFOUNDATION 1 +#endif #if WTF_COMPILER_GCC #define WARN_UNUSED_RETURN __attribute__ ((warn_unused_result)) @@ -930,7 +1167,7 @@ on MinGW. See https://bugs.webkit.org/show_bug.cgi?id=29268 */ #define WARN_UNUSED_RETURN #endif -#if !ENABLE_NETSCAPE_PLUGIN_API || (ENABLE_NETSCAPE_PLUGIN_API && ((WTF_PLATFORM_UNIX && (WTF_PLATFORM_QT || WTF_PLATFORM_WX)) || WTF_PLATFORM_GTK)) +#if !ENABLE_NETSCAPE_PLUGIN_API || (ENABLE_NETSCAPE_PLUGIN_API && ((WTF_OS_UNIX && (WTF_PLATFORM_QT || WTF_PLATFORM_WX)) || WTF_PLATFORM_GTK)) #define ENABLE_PLUGIN_PACKAGE_SIMPLE_HASH 1 #endif @@ -939,4 +1176,46 @@ on MinGW. See https://bugs.webkit.org/show_bug.cgi?id=29268 */ #define ENABLE_JSC_ZOMBIES 0 +/* FIXME: Eventually we should enable this for all platforms and get rid of the define. */ +#if WTF_PLATFORM_MAC || WTF_PLATFORM_WIN || WTF_PLATFORM_QT +#define WTF_USE_PLATFORM_STRATEGIES 1 +#endif + +#if WTF_PLATFORM_WIN +#define WTF_USE_CROSS_PLATFORM_CONTEXT_MENUS 1 +#endif + +/* Geolocation request policy. pre-emptive policy is to acquire user permission before acquiring location. + Client based implementations will have option to choose between pre-emptive and nonpre-emptive permission policy. + pre-emptive permission policy is enabled by default for all client-based implementations. */ +#if ENABLE_CLIENT_BASED_GEOLOCATION +#define WTF_USE_PREEMPT_GEOLOCATION_PERMISSION 1 +#endif + +#if WTF_CPU_ARM_THUMB2 +#define ENABLE_BRANCH_COMPACTION 1 +#endif + +#if !defined(ENABLE_THREADING_OPENMP) && defined(_OPENMP) +#define ENABLE_THREADING_OPENMP 1 +#endif + +#if !defined(ENABLE_PARALLEL_JOBS) && !ENABLE_SINGLE_THREADED && (ENABLE_THREADING_GENERIC || ENABLE_THREADING_LIBDISPATCH || ENABLE_THREADING_OPENMP) +#define ENABLE_PARALLEL_JOBS 1 +#endif + +#if ENABLE_GLIB_SUPPORT +#include "GTypedefs.h" +#endif + +/* FIXME: This define won't be needed once #27551 is fully landed. However, + since most ports try to support sub-project independence, adding new headers + to WTF causes many ports to break, and so this way we can address the build + breakages one port at a time. */ +#define WTF_USE_EXPORT_MACROS 0 + +#if WTF_PLATFORM_QT || WTF_PLATFORM_GTK +#define WTF_USE_UNIX_DOMAIN_SOCKETS 1 +#endif + #endif /* WTF_Platform_h */ diff --git a/js/src/jit-test/tests/basic/bug632964-regexp.js b/js/src/jit-test/tests/basic/bug632964-regexp.js index 7151d3713647..75612dbc735d 100644 --- a/js/src/jit-test/tests/basic/bug632964-regexp.js +++ b/js/src/jit-test/tests/basic/bug632964-regexp.js @@ -1,5 +1,3 @@ -// |jit-test| error: InternalError: regular expression too complex - var sText = "s"; for (var i = 0; i < 250000; ++i) @@ -12,6 +10,5 @@ var match = sText.match(/s(\s|.)*?e/gi); //var match = sText.match(/s([\s\S]*?)e/gi); //var match = sText.match(/s(?:[\s\S]*?)e/gi); var end = new Date(); -print(end - start); assertEq(match.length, 1); diff --git a/js/src/jscompartment.cpp b/js/src/jscompartment.cpp index d210893a4791..ef0c73bf24b9 100644 --- a/js/src/jscompartment.cpp +++ b/js/src/jscompartment.cpp @@ -48,6 +48,7 @@ #include "jstracer.h" #include "jswrapper.h" #include "assembler/wtf/Platform.h" +#include "yarr/BumpPointerAllocator.h" #include "methodjit/MethodJIT.h" #include "methodjit/PolyIC.h" #include "methodjit/MonoIC.h" @@ -73,6 +74,9 @@ JSCompartment::JSCompartment(JSRuntime *rt) active(false), #ifdef JS_METHODJIT jaegerCompartment(NULL), +#endif +#if ENABLE_YARR_JIT + regExpAllocator(NULL), #endif propertyTree(thisForCtor()), emptyArgumentsShape(NULL), @@ -84,9 +88,6 @@ JSCompartment::JSCompartment(JSRuntime *rt) initialRegExpShape(NULL), initialStringShape(NULL), debugMode(rt->debugMode), -#if ENABLE_YARR_JIT - regExpAllocator(NULL), -#endif mathCache(NULL) { JS_INIT_CLIST(&scripts); @@ -135,11 +136,9 @@ JSCompartment::init() return false; #endif -#if ENABLE_YARR_JIT - regExpAllocator = rt->new_(); + regExpAllocator = rt->new_(); if (!regExpAllocator) return false; -#endif if (!backEdgeTable.init()) return false; diff --git a/js/src/jscompartment.h b/js/src/jscompartment.h index 5b3f16643050..8827a275f796 100644 --- a/js/src/jscompartment.h +++ b/js/src/jscompartment.h @@ -54,11 +54,8 @@ #pragma warning(disable:4251) /* Silence warning about JS_FRIEND_API and data members. */ #endif -namespace JSC { - -class ExecutableAllocator; - -} +namespace JSC { class ExecutableAllocator; } +namespace WTF { class BumpPointerAllocator; } namespace js { @@ -420,6 +417,7 @@ struct JS_FRIEND_API(JSCompartment) { */ size_t getMjitCodeSize() const; #endif + WTF::BumpPointerAllocator *regExpAllocator; /* * Shared scope property tree, and arena-pool for allocating its nodes. @@ -466,8 +464,6 @@ struct JS_FRIEND_API(JSCompartment) { bool debugMode; // true iff debug mode on JSCList scripts; // scripts in this compartment - JSC::ExecutableAllocator *regExpAllocator; - js::NativeIterCache nativeIterCache; typedef js::Maybe LazyToSourceCache; diff --git a/js/src/jsregexp.cpp b/js/src/jsregexp.cpp index 0df8ae536a47..3d9f09f56559 100644 --- a/js/src/jsregexp.cpp +++ b/js/src/jsregexp.cpp @@ -59,8 +59,6 @@ #include "jsobjinlines.h" #include "jsregexpinlines.h" -#include "yarr/RegexParser.h" - #ifdef JS_TRACER #include "jstracer.h" using namespace nanojit; @@ -193,11 +191,11 @@ js_ObjectIsRegExp(JSObject *obj) */ void -RegExp::handleYarrError(JSContext *cx, int error) +RegExp::reportYarrError(JSContext *cx, JSC::Yarr::ErrorCode error) { switch (error) { case JSC::Yarr::NoError: - JS_NOT_REACHED("Precondition violation: an error must have occurred."); + JS_NOT_REACHED("Called reportYarrError with value for no error"); return; #define COMPILE_EMSG(__code, __msg) \ case JSC::Yarr::__code: \ @@ -210,49 +208,16 @@ RegExp::handleYarrError(JSContext *cx, int error) COMPILE_EMSG(ParenthesesUnmatched, JSMSG_UNMATCHED_RIGHT_PAREN); COMPILE_EMSG(ParenthesesTypeInvalid, JSMSG_BAD_QUANTIFIER); /* "(?" with bad next char */ COMPILE_EMSG(CharacterClassUnmatched, JSMSG_BAD_CLASS_RANGE); + COMPILE_EMSG(CharacterClassInvalidRange, JSMSG_BAD_CLASS_RANGE); COMPILE_EMSG(CharacterClassOutOfOrder, JSMSG_BAD_CLASS_RANGE); - COMPILE_EMSG(CharacterClassRangeSingleChar, JSMSG_BAD_CLASS_RANGE); - COMPILE_EMSG(EscapeUnterminated, JSMSG_TRAILING_SLASH); COMPILE_EMSG(QuantifierTooLarge, JSMSG_BAD_QUANTIFIER); - COMPILE_EMSG(HitRecursionLimit, JSMSG_REGEXP_TOO_COMPLEX); + COMPILE_EMSG(EscapeUnterminated, JSMSG_TRAILING_SLASH); #undef COMPILE_EMSG default: - JS_NOT_REACHED("Precondition violation: unknown Yarr error code."); + JS_NOT_REACHED("Unknown Yarr error code"); } } -void -RegExp::handlePCREError(JSContext *cx, int error) -{ -#define REPORT(msg_) \ - JS_ReportErrorFlagsAndNumberUC(cx, JSREPORT_ERROR, js_GetErrorMessage, NULL, msg_); \ - return - switch (error) { - case -2: REPORT(JSMSG_REGEXP_TOO_COMPLEX); - case 0: JS_NOT_REACHED("Precondition violation: an error must have occurred."); - case 1: REPORT(JSMSG_TRAILING_SLASH); - case 2: REPORT(JSMSG_TRAILING_SLASH); - case 3: REPORT(JSMSG_REGEXP_TOO_COMPLEX); - case 4: REPORT(JSMSG_BAD_QUANTIFIER); - case 5: REPORT(JSMSG_BAD_QUANTIFIER); - case 6: REPORT(JSMSG_BAD_CLASS_RANGE); - case 7: REPORT(JSMSG_REGEXP_TOO_COMPLEX); - case 8: REPORT(JSMSG_BAD_CLASS_RANGE); - case 9: REPORT(JSMSG_BAD_QUANTIFIER); - case 10: REPORT(JSMSG_UNMATCHED_RIGHT_PAREN); - case 11: REPORT(JSMSG_REGEXP_TOO_COMPLEX); - case 12: REPORT(JSMSG_UNMATCHED_RIGHT_PAREN); - case 13: REPORT(JSMSG_REGEXP_TOO_COMPLEX); - case 14: REPORT(JSMSG_MISSING_PAREN); - case 15: REPORT(JSMSG_BAD_BACKREF); - case 16: REPORT(JSMSG_REGEXP_TOO_COMPLEX); - case 17: REPORT(JSMSG_REGEXP_TOO_COMPLEX); - default: - JS_NOT_REACHED("Precondition violation: unknown PCRE error code."); - } -#undef REPORT -} - bool RegExp::parseFlags(JSContext *cx, JSString *flagStr, uintN *flagsOut) { @@ -929,3 +894,4 @@ js_InitRegExpClass(JSContext *cx, JSObject *global) return proto; } + diff --git a/js/src/jsregexpinlines.h b/js/src/jsregexpinlines.h index 70db45413e1d..d1c073147225 100644 --- a/js/src/jsregexpinlines.h +++ b/js/src/jsregexpinlines.h @@ -48,12 +48,13 @@ #include "jsobjinlines.h" #include "jsstrinlines.h" +#include "methodjit/MethodJIT.h" #include "assembler/wtf/Platform.h" +#include "yarr/BumpPointerAllocator.h" +#include "yarr/Yarr.h" #if ENABLE_YARR_JIT -#include "yarr/yarr/RegexJIT.h" -#else -#include "yarr/pcre/pcre.h" +#include "yarr/YarrJIT.h" #endif namespace js { @@ -95,10 +96,10 @@ regexp_statics_construct(JSContext *cx, GlobalObject *parent) class RegExp { #if ENABLE_YARR_JIT - JSC::Yarr::RegexCodeBlock compiled; -#else - JSRegExp *compiled; + /* native code is valid only if codeBlock.isFallBack() == false */ + JSC::Yarr::YarrCodeBlock codeBlock; #endif + JSC::Yarr::BytecodePattern *byteCode; JSLinearString *source; size_t refCount; unsigned parenCount; /* Must be |unsigned| to interface with YARR. */ @@ -111,7 +112,11 @@ class RegExp #endif RegExp(JSLinearString *source, uint32 flags, JSCompartment *compartment) - : compiled(), source(source), refCount(1), parenCount(0), flags(flags) + : +#if ENABLE_YARR_JIT + codeBlock(), +#endif + byteCode(NULL), source(source), refCount(1), parenCount(0), flags(flags) #ifdef DEBUG , compartment(compartment) #endif @@ -120,17 +125,18 @@ class RegExp JS_DECLARE_ALLOCATION_FRIENDS_FOR_PRIVATE_CONSTRUCTOR; ~RegExp() { -#if !ENABLE_YARR_JIT - if (compiled) - jsRegExpFree(compiled); +#if ENABLE_YARR_JIT + codeBlock.release(); #endif + // YYY + if (byteCode) + delete byteCode; } bool compileHelper(JSContext *cx, JSLinearString &pattern); bool compile(JSContext *cx); static const uint32 allFlags = JSREG_FOLD | JSREG_GLOB | JSREG_MULTILINE | JSREG_STICKY; - void handlePCREError(JSContext *cx, int error); - void handleYarrError(JSContext *cx, int error); + void reportYarrError(JSContext *cx, JSC::Yarr::ErrorCode error); static inline bool initArena(JSContext *cx); static inline void checkMatchPairs(JSString *input, int *buf, size_t matchItemCount); static JSObject *createResult(JSContext *cx, JSString *input, int *buf, size_t matchItemCount); @@ -318,9 +324,6 @@ inline bool RegExp::executeInternal(JSContext *cx, RegExpStatics *res, JSString *inputstr, size_t *lastIndex, bool test, Value *rval) { -#if !ENABLE_YARR_JIT - JS_ASSERT(compiled); -#endif const size_t pairCount = parenCount + 1; const size_t bufCount = pairCount * 3; /* Should be x2, but PCRE has... needs. */ const size_t matchItemCount = pairCount * 2; @@ -360,27 +363,20 @@ RegExp::executeInternal(JSContext *cx, RegExpStatics *res, JSString *inputstr, inputOffset = *lastIndex; } + int result; #if ENABLE_YARR_JIT - int result = JSC::Yarr::executeRegex(cx, compiled, chars, *lastIndex - inputOffset, len, buf, - bufCount); + if (!codeBlock.isFallBack()) + result = JSC::Yarr::execute(codeBlock, chars, *lastIndex - inputOffset, len, buf); + else + result = JSC::Yarr::interpret(byteCode, chars, *lastIndex - inputOffset, len, buf); #else - int result = jsRegExpExecute(cx, compiled, chars, len, *lastIndex - inputOffset, buf, - bufCount); + result = JSC::Yarr::interpret(byteCode, chars, *lastIndex - inputOffset, len, buf); #endif if (result == -1) { *rval = NullValue(); return true; } - if (result < 0) { -#if ENABLE_YARR_JIT - handleYarrError(cx, result); -#else - handlePCREError(cx, result); -#endif - return false; - } - /* * Adjust buf for the inputOffset. Use of sticky is rare and the matchItemCount is small, so * just do another pass. @@ -460,53 +456,43 @@ RegExp::createObjectNoStatics(JSContext *cx, const jschar *chars, size_t length, return obj; } -#ifdef ANDROID -static bool -YarrJITIsBroken(JSContext *cx) +/* + * This function should be deleted once we can. See bug 604774. + */ +static inline bool +EnableYarrJIT(JSContext *cx) { -#if defined(JS_TRACER) && defined(JS_METHODJIT) - /* FIXME/bug 604774: dead code walking. - * - * If both JITs are disabled, assume they were disabled because - * we're running on a blacklisted device. - */ - return !cx->traceJitEnabled && !cx->methodJitEnabled; +#if defined ANDROID && defined(JS_TRACER) && defined(JS_METHODJIT) + return cx->traceJitEnabled || cx->methodJitEnabled; #else - return false; + return true; #endif } -#endif /* ANDROID */ inline bool RegExp::compileHelper(JSContext *cx, JSLinearString &pattern) { -#if ENABLE_YARR_JIT - bool fellBack = false; - int error = 0; - jitCompileRegex(*cx->compartment->regExpAllocator, compiled, pattern, parenCount, error, fellBack, ignoreCase(), multiline() -#ifdef ANDROID - /* Temporary gross hack to work around buggy kernels. */ - , YarrJITIsBroken(cx) -#endif -); - if (!error) - return true; - if (fellBack) - handlePCREError(cx, error); - else - handleYarrError(cx, error); - return false; -#else - int error = 0; - compiled = jsRegExpCompile(pattern.chars(), pattern.length(), - ignoreCase() ? JSRegExpIgnoreCase : JSRegExpDoNotIgnoreCase, - multiline() ? JSRegExpMultiline : JSRegExpSingleLine, - &parenCount, &error); - if (!error) - return true; - handlePCREError(cx, error); - return false; + JSC::Yarr::ErrorCode yarrError; + JSC::Yarr::YarrPattern yarrPattern(pattern, ignoreCase(), multiline(), &yarrError); + if (yarrError) { + reportYarrError(cx, yarrError); + return false; + } + parenCount = yarrPattern.m_numSubpatterns; + +#if ENABLE_YARR_JIT && defined(JS_METHODJIT) + if (EnableYarrJIT(cx) && !yarrPattern.m_containsBackreferences) { + JSC::Yarr::JSGlobalData globalData(cx->compartment->jaegerCompartment->execAlloc()); + JSC::Yarr::jitCompile(yarrPattern, &globalData, codeBlock); + if (!codeBlock.isFallBack()) + return true; + } #endif + + codeBlock.setFallBack(true); + byteCode = JSC::Yarr::byteCompile(yarrPattern, cx->compartment->regExpAllocator).get(); + + return true; } inline bool diff --git a/js/src/jsvector.h b/js/src/jsvector.h index 3d413c1f64b6..4eaf58b6a4e7 100644 --- a/js/src/jsvector.h +++ b/js/src/jsvector.h @@ -208,12 +208,30 @@ class Vector : private AllocPolicy /* compute constants */ + /* + * Consider element size to be 1 for buffer sizing if there are + * 0 inline elements. This allows us to compile when the definition + * of the element type is not visible here. + * + * Explicit specialization is only allowed at namespace scope, so + * in order to keep everything here, we use a dummy template + * parameter with partial specialization. + */ + template + struct ElemSize { + static const size_t result = sizeof(T); + }; + template + struct ElemSize<0, Dummy> { + static const size_t result = 1; + }; + static const size_t sInlineCapacity = - tl::Min::result; + tl::Min::result>::result; /* Calculate inline buffer size; avoid 0-sized array. */ static const size_t sInlineBytes = - tl::Max<1, sInlineCapacity * sizeof(T)>::result; + tl::Max<1, sInlineCapacity * ElemSize::result>::result; /* member data */ diff --git a/js/src/methodjit/Compiler.cpp b/js/src/methodjit/Compiler.cpp index f509598c4b7e..0db553b77d73 100644 --- a/js/src/methodjit/Compiler.cpp +++ b/js/src/methodjit/Compiler.cpp @@ -503,7 +503,7 @@ mjit::Compiler::finishThisUp(JITScript **jitp) analyze::Bytecode *opinfo = analysis->maybeCode(i); if (opinfo && opinfo->safePoint) { Label L = jumpMap[i]; - JS_ASSERT(L.isValid()); + JS_ASSERT(L.isSet()); jitNmap[ix].bcOff = i; jitNmap[ix].ncode = (uint8 *)(result + masm.distanceOf(L)); ix++; @@ -625,7 +625,7 @@ mjit::Compiler::finishThisUp(JITScript **jitp) cursor += sizeof(ic::EqualityICInfo) * jit->nEqualityICs; for (size_t i = 0; i < jit->nEqualityICs; i++) { uint32 offs = uint32(equalityICs[i].jumpTarget - script->code); - JS_ASSERT(jumpMap[offs].isValid()); + JS_ASSERT(jumpMap[offs].isSet()); jitEqualityICs[i].target = fullCode.locationOf(jumpMap[offs]); jitEqualityICs[i].stubEntry = stubCode.locationOf(equalityICs[i].stubEntry); jitEqualityICs[i].stubCall = stubCode.locationOf(equalityICs[i].stubCall); @@ -650,7 +650,7 @@ mjit::Compiler::finishThisUp(JITScript **jitp) continue; uint32 offs = uint32(traceICs[i].jumpTarget - script->code); - JS_ASSERT(jumpMap[offs].isValid()); + JS_ASSERT(jumpMap[offs].isSet()); jitTraceICs[i].traceHint = fullCode.locationOf(traceICs[i].traceHint); jitTraceICs[i].jumpTarget = fullCode.locationOf(jumpMap[offs]); jitTraceICs[i].stubEntry = stubCode.locationOf(traceICs[i].stubEntry); @@ -800,7 +800,7 @@ mjit::Compiler::finishThisUp(JITScript **jitp) for (size_t i = 0; i < jumpTableOffsets.length(); i++) { uint32 offset = jumpTableOffsets[i]; - JS_ASSERT(jumpMap[offset].isValid()); + JS_ASSERT(jumpMap[offset].isSet()); jumpVec[i] = (void *)(result + masm.distanceOf(jumpMap[offset])); } @@ -2089,7 +2089,7 @@ JSC::MacroAssembler::Label mjit::Compiler::labelOf(jsbytecode *pc) { uint32 offs = uint32(pc - script->code); - JS_ASSERT(jumpMap[offs].isValid()); + JS_ASSERT(jumpMap[offs].isSet()); return jumpMap[offs]; } diff --git a/js/src/methodjit/MethodJIT.cpp b/js/src/methodjit/MethodJIT.cpp index bd738b92ba2b..b55b0dba48a5 100644 --- a/js/src/methodjit/MethodJIT.cpp +++ b/js/src/methodjit/MethodJIT.cpp @@ -846,12 +846,7 @@ static inline void Destroy(T &t) mjit::JITScript::~JITScript() { -#if defined DEBUG && (defined JS_CPU_X86 || defined JS_CPU_X64) - void *addr = code.m_code.executableAddress(); - memset(addr, 0xcc, code.m_size); -#endif - - code.m_executablePool->release(); + code.release(); #if defined JS_POLYIC ic::GetElementIC *getElems_ = getElems(); diff --git a/js/src/methodjit/TrampolineCompiler.cpp b/js/src/methodjit/TrampolineCompiler.cpp index a6ac9d709f0e..77bb148ba311 100644 --- a/js/src/methodjit/TrampolineCompiler.cpp +++ b/js/src/methodjit/TrampolineCompiler.cpp @@ -93,7 +93,7 @@ TrampolineCompiler::compileTrampoline(Trampolines::TrampolinePtr *where, Label entry = masm.label(); CHECK_RESULT(generator(masm)); - JS_ASSERT(entry.isValid()); + JS_ASSERT(entry.isSet()); bool ok; JSC::LinkBuffer buffer(&masm, execAlloc, poolp, &ok); diff --git a/js/src/yarr/wtf/ASCIICType.h b/js/src/yarr/ASCIICType.h similarity index 81% rename from js/src/yarr/wtf/ASCIICType.h rename to js/src/yarr/ASCIICType.h index cf53d9ac0c87..a3ae9f4455e2 100644 --- a/js/src/yarr/wtf/ASCIICType.h +++ b/js/src/yarr/ASCIICType.h @@ -1,4 +1,7 @@ -/* +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- + * vim: set ts=8 sw=4 et tw=99 ft=cpp: + * + * ***** BEGIN LICENSE BLOCK ***** * Copyright (C) 2007, 2008, 2009 Apple Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -24,12 +27,13 @@ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ + * + * ***** END LICENSE BLOCK ***** */ #ifndef WTF_ASCIICType_h #define WTF_ASCIICType_h -#include "yarr/jswtfbridge.h" +#include "assembler/wtf/Assertions.h" // The behavior of many of the functions in the header is dependent // on the current locale. But in the WebKit project, all uses of those functions @@ -49,6 +53,7 @@ namespace WTF { inline bool isASCII(wchar_t c) { return !(c & ~0x7F); } #endif inline bool isASCII(int c) { return !(c & ~0x7F); } + inline bool isASCII(unsigned c) { return !(c & ~0x7F); } inline bool isASCIIAlpha(char c) { return (c | 0x20) >= 'a' && (c | 0x20) <= 'z'; } inline bool isASCIIAlpha(unsigned short c) { return (c | 0x20) >= 'a' && (c | 0x20) <= 'z'; } @@ -56,6 +61,7 @@ namespace WTF { inline bool isASCIIAlpha(wchar_t c) { return (c | 0x20) >= 'a' && (c | 0x20) <= 'z'; } #endif inline bool isASCIIAlpha(int c) { return (c | 0x20) >= 'a' && (c | 0x20) <= 'z'; } + inline bool isASCIIAlpha(unsigned c) { return (c | 0x20) >= 'a' && (c | 0x20) <= 'z'; } inline bool isASCIIAlphanumeric(char c) { return (c >= '0' && c <= '9') || ((c | 0x20) >= 'a' && (c | 0x20) <= 'z'); } inline bool isASCIIAlphanumeric(unsigned short c) { return (c >= '0' && c <= '9') || ((c | 0x20) >= 'a' && (c | 0x20) <= 'z'); } @@ -63,6 +69,7 @@ namespace WTF { inline bool isASCIIAlphanumeric(wchar_t c) { return (c >= '0' && c <= '9') || ((c | 0x20) >= 'a' && (c | 0x20) <= 'z'); } #endif inline bool isASCIIAlphanumeric(int c) { return (c >= '0' && c <= '9') || ((c | 0x20) >= 'a' && (c | 0x20) <= 'z'); } + inline bool isASCIIAlphanumeric(unsigned c) { return (c >= '0' && c <= '9') || ((c | 0x20) >= 'a' && (c | 0x20) <= 'z'); } inline bool isASCIIDigit(char c) { return (c >= '0') & (c <= '9'); } inline bool isASCIIDigit(unsigned short c) { return (c >= '0') & (c <= '9'); } @@ -70,6 +77,7 @@ namespace WTF { inline bool isASCIIDigit(wchar_t c) { return (c >= '0') & (c <= '9'); } #endif inline bool isASCIIDigit(int c) { return (c >= '0') & (c <= '9'); } + inline bool isASCIIDigit(unsigned c) { return (c >= '0') & (c <= '9'); } inline bool isASCIIHexDigit(char c) { return (c >= '0' && c <= '9') || ((c | 0x20) >= 'a' && (c | 0x20) <= 'f'); } inline bool isASCIIHexDigit(unsigned short c) { return (c >= '0' && c <= '9') || ((c | 0x20) >= 'a' && (c | 0x20) <= 'f'); } @@ -77,6 +85,7 @@ namespace WTF { inline bool isASCIIHexDigit(wchar_t c) { return (c >= '0' && c <= '9') || ((c | 0x20) >= 'a' && (c | 0x20) <= 'f'); } #endif inline bool isASCIIHexDigit(int c) { return (c >= '0' && c <= '9') || ((c | 0x20) >= 'a' && (c | 0x20) <= 'f'); } + inline bool isASCIIHexDigit(unsigned c) { return (c >= '0' && c <= '9') || ((c | 0x20) >= 'a' && (c | 0x20) <= 'f'); } inline bool isASCIIOctalDigit(char c) { return (c >= '0') & (c <= '7'); } inline bool isASCIIOctalDigit(unsigned short c) { return (c >= '0') & (c <= '7'); } @@ -84,6 +93,7 @@ namespace WTF { inline bool isASCIIOctalDigit(wchar_t c) { return (c >= '0') & (c <= '7'); } #endif inline bool isASCIIOctalDigit(int c) { return (c >= '0') & (c <= '7'); } + inline bool isASCIIOctalDigit(unsigned c) { return (c >= '0') & (c <= '7'); } inline bool isASCIILower(char c) { return c >= 'a' && c <= 'z'; } inline bool isASCIILower(unsigned short c) { return c >= 'a' && c <= 'z'; } @@ -91,6 +101,7 @@ namespace WTF { inline bool isASCIILower(wchar_t c) { return c >= 'a' && c <= 'z'; } #endif inline bool isASCIILower(int c) { return c >= 'a' && c <= 'z'; } + inline bool isASCIILower(unsigned c) { return c >= 'a' && c <= 'z'; } inline bool isASCIIUpper(char c) { return c >= 'A' && c <= 'Z'; } inline bool isASCIIUpper(unsigned short c) { return c >= 'A' && c <= 'Z'; } @@ -98,6 +109,7 @@ namespace WTF { inline bool isASCIIUpper(wchar_t c) { return c >= 'A' && c <= 'Z'; } #endif inline bool isASCIIUpper(int c) { return c >= 'A' && c <= 'Z'; } + inline bool isASCIIUpper(unsigned c) { return c >= 'A' && c <= 'Z'; } /* Statistics from a run of Apple's page load test for callers of isASCIISpace: @@ -118,6 +130,7 @@ namespace WTF { inline bool isASCIISpace(wchar_t c) { return c <= ' ' && (c == ' ' || (c <= 0xD && c >= 0x9)); } #endif inline bool isASCIISpace(int c) { return c <= ' ' && (c == ' ' || (c <= 0xD && c >= 0x9)); } + inline bool isASCIISpace(unsigned c) { return c <= ' ' && (c == ' ' || (c <= 0xD && c >= 0x9)); } inline char toASCIILower(char c) { return c | ((c >= 'A' && c <= 'Z') << 5); } inline unsigned short toASCIILower(unsigned short c) { return c | ((c >= 'A' && c <= 'Z') << 5); } @@ -125,20 +138,24 @@ namespace WTF { inline wchar_t toASCIILower(wchar_t c) { return c | ((c >= 'A' && c <= 'Z') << 5); } #endif inline int toASCIILower(int c) { return c | ((c >= 'A' && c <= 'Z') << 5); } + inline unsigned toASCIILower(unsigned c) { return c | ((c >= 'A' && c <= 'Z') << 5); } + // FIXME: Why do these need static_cast? inline char toASCIIUpper(char c) { return static_cast(c & ~((c >= 'a' && c <= 'z') << 5)); } inline unsigned short toASCIIUpper(unsigned short c) { return static_cast(c & ~((c >= 'a' && c <= 'z') << 5)); } #if !WTF_COMPILER_MSVC || defined(_NATIVE_WCHAR_T_DEFINED) inline wchar_t toASCIIUpper(wchar_t c) { return static_cast(c & ~((c >= 'a' && c <= 'z') << 5)); } #endif inline int toASCIIUpper(int c) { return static_cast(c & ~((c >= 'a' && c <= 'z') << 5)); } + inline unsigned toASCIIUpper(unsigned c) { return static_cast(c & ~((c >= 'a' && c <= 'z') << 5)); } - inline int toASCIIHexValue(char c) { JS_ASSERT(isASCIIHexDigit(c)); return c < 'A' ? c - '0' : (c - 'A' + 10) & 0xF; } - inline int toASCIIHexValue(unsigned short c) { JS_ASSERT(isASCIIHexDigit(c)); return c < 'A' ? c - '0' : (c - 'A' + 10) & 0xF; } + inline int toASCIIHexValue(char c) { ASSERT(isASCIIHexDigit(c)); return c < 'A' ? c - '0' : (c - 'A' + 10) & 0xF; } + inline int toASCIIHexValue(unsigned short c) { ASSERT(isASCIIHexDigit(c)); return c < 'A' ? c - '0' : (c - 'A' + 10) & 0xF; } #if !WTF_COMPILER_MSVC || defined(_NATIVE_WCHAR_T_DEFINED) - inline int toASCIIHexValue(wchar_t c) { JS_ASSERT(isASCIIHexDigit(c)); return c < 'A' ? c - '0' : (c - 'A' + 10) & 0xF; } + inline int toASCIIHexValue(wchar_t c) { ASSERT(isASCIIHexDigit(c)); return c < 'A' ? c - '0' : (c - 'A' + 10) & 0xF; } #endif - inline int toASCIIHexValue(int c) { JS_ASSERT(isASCIIHexDigit(c)); return c < 'A' ? c - '0' : (c - 'A' + 10) & 0xF; } + inline int toASCIIHexValue(int c) { ASSERT(isASCIIHexDigit(c)); return c < 'A' ? c - '0' : (c - 'A' + 10) & 0xF; } + inline int toASCIIHexValue(unsigned c) { ASSERT(isASCIIHexDigit(c)); return c < 'A' ? c - '0' : (c - 'A' + 10) & 0xF; } inline bool isASCIIPrintable(char c) { return c >= ' ' && c <= '~'; } inline bool isASCIIPrintable(unsigned short c) { return c >= ' ' && c <= '~'; } @@ -146,7 +163,7 @@ namespace WTF { inline bool isASCIIPrintable(wchar_t c) { return c >= ' ' && c <= '~'; } #endif inline bool isASCIIPrintable(int c) { return c >= ' ' && c <= '~'; } - + inline bool isASCIIPrintable(unsigned c) { return c >= ' ' && c <= '~'; } } using WTF::isASCII; diff --git a/js/src/yarr/BumpPointerAllocator.h b/js/src/yarr/BumpPointerAllocator.h new file mode 100644 index 000000000000..8ef5a780f9d8 --- /dev/null +++ b/js/src/yarr/BumpPointerAllocator.h @@ -0,0 +1,254 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- + * vim: set ts=8 sw=4 et tw=99 ft=cpp: + * + * ***** BEGIN LICENSE BLOCK ***** + * Copyright (C) 2010 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ***** END LICENSE BLOCK ***** */ + +#ifndef BumpPointerAllocator_h +#define BumpPointerAllocator_h + +#include "PageAllocation.h" + +namespace WTF { + +#define MINIMUM_BUMP_POOL_SIZE 0x1000 + +class BumpPointerPool { +public: + // ensureCapacity will check whether the current pool has capacity to + // allocate 'size' bytes of memory If it does not, it will attempt to + // allocate a new pool (which will be added to this one in a chain). + // + // If allocation fails (out of memory) this method will return null. + // If the return value is non-null, then callers should update any + // references they have to this current (possibly full) BumpPointerPool + // to instead point to the newly returned BumpPointerPool. + BumpPointerPool* ensureCapacity(size_t size) + { + void* allocationEnd = static_cast(m_current) + size; + ASSERT(allocationEnd > m_current); // check for overflow + if (allocationEnd <= static_cast(this)) + return this; + return ensureCapacityCrossPool(this, size); + } + + // alloc should only be called after calling ensureCapacity; as such + // alloc will never fail. + void* alloc(size_t size) + { + void* current = m_current; + void* allocationEnd = static_cast(current) + size; + ASSERT(allocationEnd > current); // check for overflow + ASSERT(allocationEnd <= static_cast(this)); + m_current = allocationEnd; + return current; + } + + // The dealloc method releases memory allocated using alloc. Memory + // must be released in a LIFO fashion, e.g. if the client calls alloc + // four times, returning pointer A, B, C, D, then the only valid order + // in which these may be deallocaed is D, C, B, A. + // + // The client may optionally skip some deallocations. In the example + // above, it would be valid to only explicitly dealloc C, A (D being + // dealloced along with C, B along with A). + // + // If pointer was not allocated from this pool (or pools) then dealloc + // will CRASH(). Callers should update any references they have to + // this current BumpPointerPool to instead point to the returned + // BumpPointerPool. + BumpPointerPool* dealloc(void* position) + { + if ((position >= m_start) && (position <= static_cast(this))) { + ASSERT(position <= m_current); + m_current = position; + return this; + } + return deallocCrossPool(this, position); + } + +private: + // Placement operator new, returns the last 'size' bytes of allocation for use as this. + void* operator new(size_t size, const PageAllocation& allocation) + { + ASSERT(size < allocation.size()); + return reinterpret_cast(reinterpret_cast(allocation.base()) + allocation.size()) - size; + } + + BumpPointerPool(const PageAllocation& allocation) + : m_current(allocation.base()) + , m_start(allocation.base()) + , m_next(0) + , m_previous(0) + , m_allocation(allocation) + { + } + + static BumpPointerPool* create(size_t minimumCapacity = 0) + { + // Add size of BumpPointerPool object, check for overflow. + minimumCapacity += sizeof(BumpPointerPool); + if (minimumCapacity < sizeof(BumpPointerPool)) + return 0; + + size_t poolSize = MINIMUM_BUMP_POOL_SIZE; + while (poolSize < minimumCapacity) { + poolSize <<= 1; + // The following if check relies on MINIMUM_BUMP_POOL_SIZE being a power of 2! + ASSERT(!(MINIMUM_BUMP_POOL_SIZE & (MINIMUM_BUMP_POOL_SIZE - 1))); + if (!poolSize) + return 0; + } + + PageAllocation allocation = PageAllocation::allocate(poolSize); + if (!!allocation) + return new(allocation) BumpPointerPool(allocation); + return 0; + } + + void shrink() + { + ASSERT(!m_previous); + m_current = m_start; + while (m_next) { + BumpPointerPool* nextNext = m_next->m_next; + m_next->destroy(); + m_next = nextNext; + } + } + + void destroy() + { + m_allocation.deallocate(); + } + + static BumpPointerPool* ensureCapacityCrossPool(BumpPointerPool* previousPool, size_t size) + { + // The pool passed should not have capacity, so we'll start with the next one. + ASSERT(previousPool); + ASSERT((static_cast(previousPool->m_current) + size) > previousPool->m_current); // check for overflow + ASSERT((static_cast(previousPool->m_current) + size) > static_cast(previousPool)); + BumpPointerPool* pool = previousPool->m_next; + + while (true) { + if (!pool) { + // We've run to the end; allocate a new pool. + pool = BumpPointerPool::create(size); + previousPool->m_next = pool; + pool->m_previous = previousPool; + return pool; + } + + // + void* current = pool->m_current; + void* allocationEnd = static_cast(current) + size; + ASSERT(allocationEnd > current); // check for overflow + if (allocationEnd <= static_cast(pool)) + return pool; + } + } + + static BumpPointerPool* deallocCrossPool(BumpPointerPool* pool, void* position) + { + // Should only be called if position is not in the current pool. + ASSERT((position < pool->m_start) || (position > static_cast(pool))); + + while (true) { + // Unwind the current pool to the start, move back in the chain to the previous pool. + pool->m_current = pool->m_start; + pool = pool->m_previous; + + // position was nowhere in the chain! + if (!pool) + CRASH(); + + if ((position >= pool->m_start) && (position <= static_cast(pool))) { + ASSERT(position <= pool->m_current); + pool->m_current = position; + return pool; + } + } + } + + void* m_current; + void* m_start; + BumpPointerPool* m_next; + BumpPointerPool* m_previous; + PageAllocation m_allocation; + + friend class BumpPointerAllocator; +}; + +// A BumpPointerAllocator manages a set of BumpPointerPool objects, which +// can be used for LIFO (stack like) allocation. +// +// To begin allocating using this class call startAllocator(). The result +// of this method will be null if the initial pool allocation fails, or a +// pointer to a BumpPointerPool object that can be used to perform +// allocations. Whilst running no memory will be released until +// stopAllocator() is called. At this point all allocations made through +// this allocator will be reaped, and underlying memory may be freed. +// +// (In practice we will still hold on to the initial pool to allow allocation +// to be quickly restared, but aditional pools will be freed). +// +// This allocator is non-renetrant, it is encumbant on the clients to ensure +// startAllocator() is not called again until stopAllocator() has been called. +class BumpPointerAllocator { +public: + BumpPointerAllocator() + : m_head(0) + { + } + + ~BumpPointerAllocator() + { + if (m_head) + m_head->destroy(); + } + + BumpPointerPool* startAllocator() + { + if (!m_head) + m_head = BumpPointerPool::create(); + return m_head; + } + + void stopAllocator() + { + if (m_head) + m_head->shrink(); + } + +private: + BumpPointerPool* m_head; +}; + +} + +using WTF::BumpPointerAllocator; + +#endif // BumpPointerAllocator_h diff --git a/js/src/yarr/Makefile b/js/src/yarr/Makefile deleted file mode 100644 index c824cdb96b6c..000000000000 --- a/js/src/yarr/Makefile +++ /dev/null @@ -1,5 +0,0 @@ -INCLUDES := -I. -Iyarr -Iwtf -I../assembler/assembler -I../assembler - -all: - $(CXX) -g3 -c $(INCLUDES) yarr/*.cpp - $(CXX) -g3 $(INCLUDES) TestMain.cpp *.o diff --git a/js/src/yarr/OSAllocator.h b/js/src/yarr/OSAllocator.h new file mode 100644 index 000000000000..ecfdc3b042ec --- /dev/null +++ b/js/src/yarr/OSAllocator.h @@ -0,0 +1,103 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- + * vim: set ts=8 sw=4 et tw=99 ft=cpp: + * + * ***** BEGIN LICENSE BLOCK ***** + * Copyright (C) 2010 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + * + * ***** END LICENSE BLOCK ***** */ + +#ifndef OSAllocator_h +#define OSAllocator_h + +#include +#include "wtfbridge.h" +#include "assembler/wtf/VMTags.h" +#include "assembler/wtf/Assertions.h" + +namespace WTF { + +class OSAllocator { +public: + enum Usage { + UnknownUsage = -1, + FastMallocPages = VM_TAG_FOR_TCMALLOC_MEMORY, + JSGCHeapPages = VM_TAG_FOR_COLLECTOR_MEMORY, + JSVMStackPages = VM_TAG_FOR_REGISTERFILE_MEMORY, + JSJITCodePages = VM_TAG_FOR_EXECUTABLEALLOCATOR_MEMORY + }; + + // These methods are symmetric; reserveUncommitted allocates VM in an uncommitted state, + // releaseDecommitted should be called on a region of VM allocated by a single reservation, + // the memory must all currently be in a decommitted state. + static void* reserveUncommitted(size_t, Usage = UnknownUsage, bool writable = true, bool executable = false); + static void releaseDecommitted(void*, size_t); + + // These methods are symmetric; they commit or decommit a region of VM (uncommitted VM should + // never be accessed, since the OS may not have attached physical memory for these regions). + // Clients should only call commit on uncommitted regions and decommit on committed regions. + static void commit(void*, size_t, bool writable, bool executable); + static void decommit(void*, size_t); + + // These methods are symmetric; reserveAndCommit allocates VM in an committed state, + // decommitAndRelease should be called on a region of VM allocated by a single reservation, + // the memory must all currently be in a committed state. + static void* reserveAndCommit(size_t, Usage = UnknownUsage, bool writable = true, bool executable = false); + static void decommitAndRelease(void* base, size_t size); + + // These methods are akin to reserveAndCommit/decommitAndRelease, above - however rather than + // committing/decommitting the entire region additional parameters allow a subregion to be + // specified. + static void* reserveAndCommit(size_t reserveSize, size_t commitSize, Usage = UnknownUsage, bool writable = true, bool executable = false); + static void decommitAndRelease(void* releaseBase, size_t releaseSize, void* decommitBase, size_t decommitSize); +}; + +inline void* OSAllocator::reserveAndCommit(size_t reserveSize, size_t commitSize, Usage usage, bool writable, bool executable) +{ + void* base = reserveUncommitted(reserveSize, usage, writable, executable); + commit(base, commitSize, writable, executable); + return base; +} + +inline void OSAllocator::decommitAndRelease(void* releaseBase, size_t releaseSize, void* decommitBase, size_t decommitSize) +{ + ASSERT(decommitBase >= releaseBase && (static_cast(decommitBase) + decommitSize) <= (static_cast(releaseBase) + releaseSize)); +#if WTF_OS_WINCE || WTF_OS_SYMBIAN + // On most platforms we can actually skip this final decommit; releasing the VM will + // implicitly decommit any physical memory in the region. This is not true on WINCE. + // On Symbian, this makes implementation simpler and better aligned with the RChunk API + decommit(decommitBase, decommitSize); +#endif + releaseDecommitted(releaseBase, releaseSize); +} + +inline void OSAllocator::decommitAndRelease(void* base, size_t size) +{ + decommitAndRelease(base, size, base, size); +} + +} // namespace WTF + +using WTF::OSAllocator; + +#endif // OSAllocator_h diff --git a/js/src/yarr/OSAllocatorPosix.cpp b/js/src/yarr/OSAllocatorPosix.cpp new file mode 100644 index 000000000000..57c240b22fe5 --- /dev/null +++ b/js/src/yarr/OSAllocatorPosix.cpp @@ -0,0 +1,129 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- + * vim: set ts=8 sw=4 et tw=99 ft=cpp: + * + * ***** BEGIN LICENSE BLOCK ***** + * Copyright (C) 2010 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + * + * ***** END LICENSE BLOCK ***** */ + +#include "assembler/wtf/Platform.h" + +#if ENABLE_ASSEMBLER && WTF_OS_UNIX && !WTF_OS_SYMBIAN + +#include "OSAllocator.h" + +#include +#include +#include "wtf/Assertions.h" + +namespace WTF { + +void* OSAllocator::reserveUncommitted(size_t bytes, Usage usage, bool writable, bool executable) +{ + void* result = reserveAndCommit(bytes, usage, writable, executable); +#if HAVE_MADV_FREE_REUSE + // To support the "reserve then commit" model, we have to initially decommit. + while (madvise(result, bytes, MADV_FREE_REUSABLE) == -1 && errno == EAGAIN) { } +#endif + return result; +} + +void* OSAllocator::reserveAndCommit(size_t bytes, Usage usage, bool writable, bool executable) +{ + // All POSIX reservations start out logically committed. + int protection = PROT_READ; + if (writable) + protection |= PROT_WRITE; + if (executable) + protection |= PROT_EXEC; + + int flags = MAP_PRIVATE | MAP_ANON; + +#if WTF_OS_DARWIN && !defined(BUILDING_ON_TIGER) + int fd = usage; +#else + int fd = -1; +#endif + + void* result = 0; +#if (WTF_OS_DARWIN && WTF_CPU_X86_64) + if (executable) { + // Cook up an address to allocate at, using the following recipe: + // 17 bits of zero, stay in userspace kids. + // 26 bits of randomness for ASLR. + // 21 bits of zero, at least stay aligned within one level of the pagetables. + // + // But! - as a temporary workaround for some plugin problems (rdar://problem/6812854), + // for now instead of 2^26 bits of ASLR lets stick with 25 bits of randomization plus + // 2^24, which should put up somewhere in the middle of userspace (in the address range + // 0x200000000000 .. 0x5fffffffffff). + intptr_t randomLocation = 0; + randomLocation = arc4random() & ((1 << 25) - 1); + randomLocation += (1 << 24); + randomLocation <<= 21; + result = reinterpret_cast(randomLocation); + } +#endif + + result = mmap(result, bytes, protection, flags, fd, 0); + if (result == MAP_FAILED) + CRASH(); + return result; +} + +void OSAllocator::commit(void* address, size_t bytes, bool, bool) +{ +#if HAVE_MADV_FREE_REUSE + while (madvise(address, bytes, MADV_FREE_REUSE) == -1 && errno == EAGAIN) { } +#else + // Non-MADV_FREE_REUSE reservations automatically commit on demand. + UNUSED_PARAM(address); + UNUSED_PARAM(bytes); +#endif +} + +void OSAllocator::decommit(void* address, size_t bytes) +{ +#if HAVE_MADV_FREE_REUSE + while (madvise(address, bytes, MADV_FREE_REUSABLE) == -1 && errno == EAGAIN) { } +#elif HAVE_MADV_FREE + while (madvise(address, bytes, MADV_FREE) == -1 && errno == EAGAIN) { } +#elif HAVE_MADV_DONTNEED + while (madvise(address, bytes, MADV_DONTNEED) == -1 && errno == EAGAIN) { } +#else + UNUSED_PARAM(address); + UNUSED_PARAM(bytes); +#endif +} + +void OSAllocator::releaseDecommitted(void* address, size_t bytes) +{ + int result = munmap(address, bytes); + if (result == -1) + CRASH(); +} + +} // namespace WTF + +#endif diff --git a/js/src/yarr/OSAllocatorWin.cpp b/js/src/yarr/OSAllocatorWin.cpp new file mode 100644 index 000000000000..08df9e98aefb --- /dev/null +++ b/js/src/yarr/OSAllocatorWin.cpp @@ -0,0 +1,89 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- + * vim: set ts=8 sw=4 et tw=99 ft=cpp: + * + * ***** BEGIN LICENSE BLOCK ***** + * Copyright (C) 2010 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + * + * ***** END LICENSE BLOCK ***** */ + +#include "assembler/wtf/Platform.h" + +#if ENABLE_ASSEMBLER && WTF_OS_WINDOWS + +#include "windows.h" +#include "wtf/Assertions.h" + +#include "OSAllocator.h" + +namespace WTF { + +static inline DWORD protection(bool writable, bool executable) +{ + return executable ? + (writable ? PAGE_EXECUTE_READWRITE : PAGE_EXECUTE_READ) : + (writable ? PAGE_READWRITE : PAGE_READONLY); +} + +void* OSAllocator::reserveUncommitted(size_t bytes, Usage, bool writable, bool executable) +{ + void* result = VirtualAlloc(0, bytes, MEM_RESERVE, protection(writable, executable)); + if (!result) + CRASH(); + return result; +} + +void* OSAllocator::reserveAndCommit(size_t bytes, Usage, bool writable, bool executable) +{ + void* result = VirtualAlloc(0, bytes, MEM_RESERVE | MEM_COMMIT, protection(writable, executable)); + if (!result) + CRASH(); + return result; +} + +void OSAllocator::commit(void* address, size_t bytes, bool writable, bool executable) +{ + void* result = VirtualAlloc(address, bytes, MEM_COMMIT, protection(writable, executable)); + if (!result) + CRASH(); +} + +void OSAllocator::decommit(void* address, size_t bytes) +{ + bool result = VirtualFree(address, bytes, MEM_DECOMMIT); + if (!result) + CRASH(); +} + +void OSAllocator::releaseDecommitted(void* address, size_t bytes) +{ + // According to http://msdn.microsoft.com/en-us/library/aa366892(VS.85).aspx, + // dwSize must be 0 if dwFreeType is MEM_RELEASE. + bool result = VirtualFree(address, 0, MEM_RELEASE); + if (!result) + CRASH(); +} + +} // namespace WTF + +#endif diff --git a/js/src/yarr/PageAllocation.h b/js/src/yarr/PageAllocation.h new file mode 100644 index 000000000000..a86f37116e50 --- /dev/null +++ b/js/src/yarr/PageAllocation.h @@ -0,0 +1,131 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- + * vim: set ts=8 sw=4 et tw=99 ft=cpp: + * + * ***** BEGIN LICENSE BLOCK ***** + * Copyright (C) 2010 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ***** END LICENSE BLOCK ***** */ + +#ifndef PageAllocation_h +#define PageAllocation_h + +#include "wtfbridge.h" +#include "OSAllocator.h" +#include "PageBlock.h" +#include "assembler/wtf/VMTags.h" + +#if WTF_OS_DARWIN +#include +#include +#endif + +#if WTF_OS_HAIKU +#include +#endif + +#if WTF_OS_WINDOWS +#include +#include +#endif + +#if WTF_OS_SYMBIAN +#include +#include +#endif + +#if WTF_HAVE_ERRNO_H +#include +#endif + +#if WTF_HAVE_MMAP +#include +#include +#endif + +namespace WTF { + +/* + PageAllocation + + The PageAllocation class provides a cross-platform memory allocation interface + with similar capabilities to posix mmap/munmap. Memory is allocated by calling + PageAllocation::allocate, and deallocated by calling deallocate on the + PageAllocation object. The PageAllocation holds the allocation's base pointer + and size. + + The allocate method is passed the size required (which must be a multiple of + the system page size, which can be accessed using PageAllocation::pageSize). + Callers may also optinally provide a flag indicating the usage (for use by + system memory usage tracking tools, where implemented), and boolean values + specifying the required protection (defaulting to writable, non-executable). +*/ + +class PageAllocation : private PageBlock { +public: + PageAllocation() + { + } + + using PageBlock::size; + using PageBlock::base; + +#ifndef __clang__ + using PageBlock::operator bool; +#else + // FIXME: This is a workaround for , wherein Clang incorrectly emits an access + // control warning when a client tries to use operator bool exposed above via "using PageBlock::operator bool". + operator bool() const { return PageBlock::operator bool(); } +#endif + + static PageAllocation allocate(size_t size, OSAllocator::Usage usage = OSAllocator::UnknownUsage, bool writable = true, bool executable = false) + { + ASSERT(isPageAligned(size)); + return PageAllocation(OSAllocator::reserveAndCommit(size, usage, writable, executable), size); + } + + void deallocate() + { + // Clear base & size before calling release; if this is *inside* allocation + // then we won't be able to clear then after deallocating the memory. + PageAllocation tmp; + JSC::std::swap(tmp, *this); + + ASSERT(tmp); + ASSERT(!*this); + + OSAllocator::decommitAndRelease(tmp.base(), tmp.size()); + } + +private: + PageAllocation(void* base, size_t size) + : PageBlock(base, size) + { + } +}; + +} // namespace WTF + +using WTF::PageAllocation; + +#endif // PageAllocation_h diff --git a/js/src/yarr/PageBlock.cpp b/js/src/yarr/PageBlock.cpp new file mode 100644 index 000000000000..0f435b772860 --- /dev/null +++ b/js/src/yarr/PageBlock.cpp @@ -0,0 +1,88 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- + * vim: set ts=8 sw=4 et tw=99 ft=cpp: + * + * ***** BEGIN LICENSE BLOCK ***** + * Copyright (C) 2010 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + * + * ***** END LICENSE BLOCK ***** */ + +#include "PageBlock.h" +#include "wtf/Assertions.h" + +#if WTF_OS_UNIX && !WTF_OS_SYMBIAN +#include +#endif + +#if WTF_OS_WINDOWS +#include +#include +#endif + +#if WTF_OS_SYMBIAN +#include +#include +#endif + +namespace WTF { + +static size_t s_pageSize; + +#if WTF_OS_UNIX && !WTF_OS_SYMBIAN + +inline size_t systemPageSize() +{ + return getpagesize(); +} + +#elif WTF_OS_WINDOWS + +inline size_t systemPageSize() +{ + static size_t size = 0; + SYSTEM_INFO system_info; + GetSystemInfo(&system_info); + size = system_info.dwPageSize; + return size; +} + +#elif WTF_OS_SYMBIAN + +inline size_t systemPageSize() +{ + static TInt page_size = 0; + UserHal::PageSizeInBytes(page_size); + return page_size; +} + +#endif + +size_t pageSize() +{ + if (!s_pageSize) + s_pageSize = systemPageSize(); + ASSERT(isPowerOfTwo(s_pageSize)); + return s_pageSize; +} + +} // namespace WTF diff --git a/js/src/yarr/PageBlock.h b/js/src/yarr/PageBlock.h new file mode 100644 index 000000000000..33751315e049 --- /dev/null +++ b/js/src/yarr/PageBlock.h @@ -0,0 +1,91 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- + * vim: set ts=8 sw=4 et tw=99 ft=cpp: + * + * ***** BEGIN LICENSE BLOCK ***** + * Copyright (C) 2010 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + * + * ***** END LICENSE BLOCK ***** */ + +#ifndef PageBlock_h +#define PageBlock_h + +#include +#include "jsstdint.h" +#include "assembler/wtf/Platform.h" + +namespace WTF { + +size_t pageSize(); +inline bool isPageAligned(void* address) { return !(reinterpret_cast(address) & (pageSize() - 1)); } +inline bool isPageAligned(size_t size) { return !(size & (pageSize() - 1)); } +inline bool isPowerOfTwo(size_t size) { return !(size & (size - 1)); } + +class PageBlock { +public: + PageBlock(); + PageBlock(const PageBlock&); + PageBlock(void*, size_t); + + void* base() const { return m_base; } + size_t size() const { return m_size; } + + operator bool() const { return !!m_base; } + + bool contains(void* containedBase, size_t containedSize) + { + return containedBase >= m_base + && (static_cast(containedBase) + containedSize) <= (static_cast(m_base) + m_size); + } + +private: + void* m_base; + size_t m_size; +}; + +inline PageBlock::PageBlock() + : m_base(0) + , m_size(0) +{ +} + +inline PageBlock::PageBlock(const PageBlock& other) + : m_base(other.m_base) + , m_size(other.m_size) +{ +} + +inline PageBlock::PageBlock(void* base, size_t size) + : m_base(base) + , m_size(size) +{ +} + +} // namespace WTF + +using WTF::pageSize; +using WTF::isPageAligned; +using WTF::isPageAligned; +using WTF::isPowerOfTwo; + +#endif // PageBlock_h diff --git a/js/src/yarr/yarr/RegExpJitTables.h b/js/src/yarr/RegExpJitTables.h similarity index 100% rename from js/src/yarr/yarr/RegExpJitTables.h rename to js/src/yarr/RegExpJitTables.h diff --git a/js/src/yarr/VMTags.h b/js/src/yarr/VMTags.h new file mode 100644 index 000000000000..fe6a006d3601 --- /dev/null +++ b/js/src/yarr/VMTags.h @@ -0,0 +1,90 @@ +/* + * Copyright (C) 2009 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef VMTags_h +#define VMTags_h + +// On Mac OS X, the VM subsystem allows tagging memory requested from mmap and vm_map +// in order to aid tools that inspect system memory use. +#if WTF_OS_DARWIN + +#include + +#if !defined(TARGETING_TIGER) + +#if defined(VM_MEMORY_TCMALLOC) +#define VM_TAG_FOR_TCMALLOC_MEMORY VM_MAKE_TAG(VM_MEMORY_TCMALLOC) +#else +#define VM_TAG_FOR_TCMALLOC_MEMORY VM_MAKE_TAG(53) +#endif // defined(VM_MEMORY_TCMALLOC) + +#if defined(VM_MEMORY_JAVASCRIPT_JIT_EXECUTABLE_ALLOCATOR) +#define VM_TAG_FOR_EXECUTABLEALLOCATOR_MEMORY VM_MAKE_TAG(VM_MEMORY_JAVASCRIPT_JIT_EXECUTABLE_ALLOCATOR) +#else +#define VM_TAG_FOR_EXECUTABLEALLOCATOR_MEMORY VM_MAKE_TAG(64) +#endif // defined(VM_MEMORY_JAVASCRIPT_JIT_EXECUTABLE_ALLOCATOR) + +#if defined(VM_MEMORY_JAVASCRIPT_JIT_REGISTER_FILE) +#define VM_TAG_FOR_REGISTERFILE_MEMORY VM_MAKE_TAG(VM_MEMORY_JAVASCRIPT_JIT_REGISTER_FILE) +#else +#define VM_TAG_FOR_REGISTERFILE_MEMORY VM_MAKE_TAG(65) +#endif // defined(VM_MEMORY_JAVASCRIPT_JIT_REGISTER_FILE) + +#else // !defined(TARGETING_TIGER) + +// mmap on Tiger fails with tags that work on Leopard, so fall +// back to Tiger-compatible tags (that also work on Leopard) +// when targeting Tiger. +#define VM_TAG_FOR_TCMALLOC_MEMORY -1 +#define VM_TAG_FOR_EXECUTABLEALLOCATOR_MEMORY -1 +#define VM_TAG_FOR_REGISTERFILE_MEMORY -1 + +#endif // !defined(TARGETING_TIGER) + +// Tags for vm_map and vm_allocate work on both Tiger and Leopard. + +#if defined(VM_MEMORY_JAVASCRIPT_CORE) +#define VM_TAG_FOR_COLLECTOR_MEMORY VM_MAKE_TAG(VM_MEMORY_JAVASCRIPT_CORE) +#else +#define VM_TAG_FOR_COLLECTOR_MEMORY VM_MAKE_TAG(63) +#endif // defined(VM_MEMORY_JAVASCRIPT_CORE) + +#if defined(VM_MEMORY_WEBCORE_PURGEABLE_BUFFERS) +#define VM_TAG_FOR_WEBCORE_PURGEABLE_MEMORY VM_MAKE_TAG(VM_MEMORY_WEBCORE_PURGEABLE_BUFFERS) +#else +#define VM_TAG_FOR_WEBCORE_PURGEABLE_MEMORY VM_MAKE_TAG(69) +#endif // defined(VM_MEMORY_WEBCORE_PURGEABLE_BUFFERS) + +#else // OS(DARWIN) + +#define VM_TAG_FOR_TCMALLOC_MEMORY -1 +#define VM_TAG_FOR_COLLECTOR_MEMORY -1 +#define VM_TAG_FOR_EXECUTABLEALLOCATOR_MEMORY -1 +#define VM_TAG_FOR_REGISTERFILE_MEMORY -1 +#define VM_TAG_FOR_WEBCORE_PURGEABLE_MEMORY -1 + +#endif // OS(DARWIN) + +#endif // VMTags_h diff --git a/js/src/yarr/Yarr.h b/js/src/yarr/Yarr.h new file mode 100644 index 000000000000..40ebcca096af --- /dev/null +++ b/js/src/yarr/Yarr.h @@ -0,0 +1,72 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- + * vim: set ts=8 sw=4 et tw=99 ft=cpp: + * + * ***** BEGIN LICENSE BLOCK ***** + * Copyright (C) 2009 Apple Inc. All rights reserved. + * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY UNIVERSITY OF SZEGED ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL UNIVERSITY OF SZEGED OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ***** END LICENSE BLOCK ***** */ + +#ifndef Yarr_h +#define Yarr_h + +#include + +#include "YarrInterpreter.h" +#include "YarrPattern.h" + +namespace JSC { namespace Yarr { + +#define YarrStackSpaceForBackTrackInfoPatternCharacter 1 // Only for !fixed quantifiers. +#define YarrStackSpaceForBackTrackInfoCharacterClass 1 // Only for !fixed quantifiers. +#define YarrStackSpaceForBackTrackInfoBackReference 2 +#define YarrStackSpaceForBackTrackInfoAlternative 1 // One per alternative. +#define YarrStackSpaceForBackTrackInfoParentheticalAssertion 1 +#define YarrStackSpaceForBackTrackInfoParenthesesOnce 1 // Only for !fixed quantifiers. +#define YarrStackSpaceForBackTrackInfoParenthesesTerminal 1 +#define YarrStackSpaceForBackTrackInfoParentheses 2 + +static const unsigned quantifyInfinite = UINT_MAX; + +// The below limit restricts the number of "recursive" match calls in order to +// avoid spending exponential time on complex regular expressions. +static const unsigned matchLimit = 1000000; + +enum JSRegExpResult { + JSRegExpMatch = 1, + JSRegExpNoMatch = 0, + JSRegExpErrorNoMatch = -1, + JSRegExpErrorHitLimit = -2, + JSRegExpErrorNoMemory = -3, + JSRegExpErrorInternal = -4 +}; + +PassOwnPtr byteCompile(YarrPattern&, BumpPointerAllocator*); +int interpret(BytecodePattern*, const UChar* input, unsigned start, unsigned length, int* output); + +} } // namespace JSC::Yarr + +#endif // Yarr_h + diff --git a/js/src/yarr/YarrInterpreter.cpp b/js/src/yarr/YarrInterpreter.cpp new file mode 100644 index 000000000000..8792d75db000 --- /dev/null +++ b/js/src/yarr/YarrInterpreter.cpp @@ -0,0 +1,1914 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- + * vim: set ts=8 sw=4 et tw=99 ft=cpp: + * + * ***** BEGIN LICENSE BLOCK ***** + * Copyright (C) 2009 Apple Inc. All rights reserved. + * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ***** END LICENSE BLOCK ***** */ + +#include "YarrInterpreter.h" + +#include "Yarr.h" +#include "BumpPointerAllocator.h" + +#ifndef NDEBUG +#include +#endif + +using namespace WTF; + +namespace JSC { namespace Yarr { + +class Interpreter { +public: + struct ParenthesesDisjunctionContext; + + struct BackTrackInfoPatternCharacter { + uintptr_t matchAmount; + }; + struct BackTrackInfoCharacterClass { + uintptr_t matchAmount; + }; + struct BackTrackInfoBackReference { + uintptr_t begin; // Not really needed for greedy quantifiers. + uintptr_t matchAmount; // Not really needed for fixed quantifiers. + }; + struct BackTrackInfoAlternative { + uintptr_t offset; + }; + struct BackTrackInfoParentheticalAssertion { + uintptr_t begin; + }; + struct BackTrackInfoParenthesesOnce { + uintptr_t begin; + }; + struct BackTrackInfoParenthesesTerminal { + uintptr_t begin; + }; + struct BackTrackInfoParentheses { + uintptr_t matchAmount; + ParenthesesDisjunctionContext* lastContext; + }; + + static inline void appendParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack, ParenthesesDisjunctionContext* context) + { + context->next = backTrack->lastContext; + backTrack->lastContext = context; + ++backTrack->matchAmount; + } + + static inline void popParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack) + { + ASSERT(backTrack->matchAmount); + ASSERT(backTrack->lastContext); + backTrack->lastContext = backTrack->lastContext->next; + --backTrack->matchAmount; + } + + struct DisjunctionContext + { + DisjunctionContext() + : term(0) + { + } + + void* operator new(size_t, void* where) + { + return where; + } + + int term; + unsigned matchBegin; + unsigned matchEnd; + uintptr_t frame[1]; + }; + + DisjunctionContext* allocDisjunctionContext(ByteDisjunction* disjunction) + { + size_t size = sizeof(DisjunctionContext) - sizeof(uintptr_t) + disjunction->m_frameSize * sizeof(uintptr_t); + allocatorPool = allocatorPool->ensureCapacity(size); + if (!allocatorPool) + CRASH(); + return new(allocatorPool->alloc(size)) DisjunctionContext(); + } + + void freeDisjunctionContext(DisjunctionContext* context) + { + allocatorPool = allocatorPool->dealloc(context); + } + + struct ParenthesesDisjunctionContext + { + ParenthesesDisjunctionContext(int* output, ByteTerm& term) + : next(0) + { + unsigned firstSubpatternId = term.atom.subpatternId; + unsigned numNestedSubpatterns = term.atom.parenthesesDisjunction->m_numSubpatterns; + + for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i) { + subpatternBackup[i] = output[(firstSubpatternId << 1) + i]; + output[(firstSubpatternId << 1) + i] = -1; + } + + new(getDisjunctionContext(term)) DisjunctionContext(); + } + + void* operator new(size_t, void* where) + { + return where; + } + + void restoreOutput(int* output, unsigned firstSubpatternId, unsigned numNestedSubpatterns) + { + for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i) + output[(firstSubpatternId << 1) + i] = subpatternBackup[i]; + } + + DisjunctionContext* getDisjunctionContext(ByteTerm& term) + { + return reinterpret_cast(&(subpatternBackup[term.atom.parenthesesDisjunction->m_numSubpatterns << 1])); + } + + ParenthesesDisjunctionContext* next; + int subpatternBackup[1]; + }; + + ParenthesesDisjunctionContext* allocParenthesesDisjunctionContext(ByteDisjunction* disjunction, int* output, ByteTerm& term) + { + size_t size = sizeof(ParenthesesDisjunctionContext) - sizeof(int) + (term.atom.parenthesesDisjunction->m_numSubpatterns << 1) * sizeof(int) + sizeof(DisjunctionContext) - sizeof(uintptr_t) + disjunction->m_frameSize * sizeof(uintptr_t); + allocatorPool = allocatorPool->ensureCapacity(size); + if (!allocatorPool) + CRASH(); + return new(allocatorPool->alloc(size)) ParenthesesDisjunctionContext(output, term); + } + + void freeParenthesesDisjunctionContext(ParenthesesDisjunctionContext* context) + { + allocatorPool = allocatorPool->dealloc(context); + } + + class InputStream { + public: + InputStream(const UChar* input, unsigned start, unsigned length) + : input(input) + , pos(start) + , length(length) + { + } + + void next() + { + ++pos; + } + + void rewind(unsigned amount) + { + ASSERT(pos >= amount); + pos -= amount; + } + + int read() + { + ASSERT(pos < length); + if (pos < length) + return input[pos]; + return -1; + } + + int readPair() + { + ASSERT(pos + 1 < length); + return input[pos] | input[pos + 1] << 16; + } + + int readChecked(int position) + { + ASSERT(position < 0); + ASSERT(static_cast(-position) <= pos); + unsigned p = pos + position; + ASSERT(p < length); + return input[p]; + } + + int reread(unsigned from) + { + ASSERT(from < length); + return input[from]; + } + + int prev() + { + ASSERT(!(pos > length)); + if (pos && length) + return input[pos - 1]; + return -1; + } + + unsigned getPos() + { + return pos; + } + + void setPos(unsigned p) + { + pos = p; + } + + bool atStart() + { + return pos == 0; + } + + bool atEnd() + { + return pos == length; + } + + bool checkInput(int count) + { + if ((pos + count) <= length) { + pos += count; + return true; + } + return false; + } + + void uncheckInput(int count) + { + pos -= count; + } + + bool atStart(int position) + { + return (pos + position) == 0; + } + + bool atEnd(int position) + { + return (pos + position) == length; + } + + bool isNotAvailableInput(int position) + { + return (pos + position) > length; + } + + private: + const UChar* input; + unsigned pos; + unsigned length; + }; + + bool testCharacterClass(CharacterClass* characterClass, int ch) + { + if (ch & 0xFF80) { + for (unsigned i = 0; i < characterClass->m_matchesUnicode.size(); ++i) + if (ch == characterClass->m_matchesUnicode[i]) + return true; + for (unsigned i = 0; i < characterClass->m_rangesUnicode.size(); ++i) + if ((ch >= characterClass->m_rangesUnicode[i].begin) && (ch <= characterClass->m_rangesUnicode[i].end)) + return true; + } else { + for (unsigned i = 0; i < characterClass->m_matches.size(); ++i) + if (ch == characterClass->m_matches[i]) + return true; + for (unsigned i = 0; i < characterClass->m_ranges.size(); ++i) + if ((ch >= characterClass->m_ranges[i].begin) && (ch <= characterClass->m_ranges[i].end)) + return true; + } + + return false; + } + + bool checkCharacter(int testChar, int inputPosition) + { + return testChar == input.readChecked(inputPosition); + } + + bool checkCasedCharacter(int loChar, int hiChar, int inputPosition) + { + int ch = input.readChecked(inputPosition); + return (loChar == ch) || (hiChar == ch); + } + + bool checkCharacterClass(CharacterClass* characterClass, bool invert, int inputPosition) + { + bool match = testCharacterClass(characterClass, input.readChecked(inputPosition)); + return invert ? !match : match; + } + + bool tryConsumeBackReference(int matchBegin, int matchEnd, int inputOffset) + { + int matchSize = matchEnd - matchBegin; + + if (!input.checkInput(matchSize)) + return false; + + if (pattern->m_ignoreCase) { + for (int i = 0; i < matchSize; ++i) { + int ch = input.reread(matchBegin + i); + + int lo = Unicode::toLower(ch); + int hi = Unicode::toUpper(ch); + + if ((lo != hi) ? (!checkCasedCharacter(lo, hi, inputOffset - matchSize + i)) : (!checkCharacter(ch, inputOffset - matchSize + i))) { + input.uncheckInput(matchSize); + return false; + } + } + } else { + for (int i = 0; i < matchSize; ++i) { + if (!checkCharacter(input.reread(matchBegin + i), inputOffset - matchSize + i)) { + input.uncheckInput(matchSize); + return false; + } + } + } + + return true; + } + + bool matchAssertionBOL(ByteTerm& term) + { + return (input.atStart(term.inputPosition)) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition - 1))); + } + + bool matchAssertionEOL(ByteTerm& term) + { + if (term.inputPosition) + return (input.atEnd(term.inputPosition)) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition))); + + return (input.atEnd()) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.read())); + } + + bool matchAssertionWordBoundary(ByteTerm& term) + { + bool prevIsWordchar = !input.atStart(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition - 1)); + bool readIsWordchar; + if (term.inputPosition) + readIsWordchar = !input.atEnd(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition)); + else + readIsWordchar = !input.atEnd() && testCharacterClass(pattern->wordcharCharacterClass, input.read()); + + bool wordBoundary = prevIsWordchar != readIsWordchar; + return term.invert() ? !wordBoundary : wordBoundary; + } + + bool backtrackPatternCharacter(ByteTerm& term, DisjunctionContext* context) + { + BackTrackInfoPatternCharacter* backTrack = reinterpret_cast(context->frame + term.frameLocation); + + switch (term.atom.quantityType) { + case QuantifierFixedCount: + break; + + case QuantifierGreedy: + if (backTrack->matchAmount) { + --backTrack->matchAmount; + input.uncheckInput(1); + return true; + } + break; + + case QuantifierNonGreedy: + if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) { + ++backTrack->matchAmount; + if (checkCharacter(term.atom.patternCharacter, term.inputPosition - 1)) + return true; + } + input.uncheckInput(backTrack->matchAmount); + break; + } + + return false; + } + + bool backtrackPatternCasedCharacter(ByteTerm& term, DisjunctionContext* context) + { + BackTrackInfoPatternCharacter* backTrack = reinterpret_cast(context->frame + term.frameLocation); + + switch (term.atom.quantityType) { + case QuantifierFixedCount: + break; + + case QuantifierGreedy: + if (backTrack->matchAmount) { + --backTrack->matchAmount; + input.uncheckInput(1); + return true; + } + break; + + case QuantifierNonGreedy: + if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) { + ++backTrack->matchAmount; + if (checkCasedCharacter(term.atom.casedCharacter.lo, term.atom.casedCharacter.hi, term.inputPosition - 1)) + return true; + } + input.uncheckInput(backTrack->matchAmount); + break; + } + + return false; + } + + bool matchCharacterClass(ByteTerm& term, DisjunctionContext* context) + { + ASSERT(term.type == ByteTerm::TypeCharacterClass); + BackTrackInfoPatternCharacter* backTrack = reinterpret_cast(context->frame + term.frameLocation); + + switch (term.atom.quantityType) { + case QuantifierFixedCount: { + for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) { + if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + matchAmount)) + return false; + } + return true; + } + + case QuantifierGreedy: { + unsigned matchAmount = 0; + while ((matchAmount < term.atom.quantityCount) && input.checkInput(1)) { + if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition - 1)) { + input.uncheckInput(1); + break; + } + ++matchAmount; + } + backTrack->matchAmount = matchAmount; + + return true; + } + + case QuantifierNonGreedy: + backTrack->matchAmount = 0; + return true; + } + + ASSERT_NOT_REACHED(); + return false; + } + + bool backtrackCharacterClass(ByteTerm& term, DisjunctionContext* context) + { + ASSERT(term.type == ByteTerm::TypeCharacterClass); + BackTrackInfoPatternCharacter* backTrack = reinterpret_cast(context->frame + term.frameLocation); + + switch (term.atom.quantityType) { + case QuantifierFixedCount: + break; + + case QuantifierGreedy: + if (backTrack->matchAmount) { + --backTrack->matchAmount; + input.uncheckInput(1); + return true; + } + break; + + case QuantifierNonGreedy: + if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) { + ++backTrack->matchAmount; + if (checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition - 1)) + return true; + } + input.uncheckInput(backTrack->matchAmount); + break; + } + + return false; + } + + bool matchBackReference(ByteTerm& term, DisjunctionContext* context) + { + ASSERT(term.type == ByteTerm::TypeBackReference); + BackTrackInfoBackReference* backTrack = reinterpret_cast(context->frame + term.frameLocation); + + int matchBegin = output[(term.atom.subpatternId << 1)]; + int matchEnd = output[(term.atom.subpatternId << 1) + 1]; + + // If the end position of the referenced match hasn't set yet then the backreference in the same parentheses where it references to that. + // In this case the result of match is empty string like when it references to a parentheses with zero-width match. + // Eg.: /(a\1)/ + if (matchEnd == -1) + return true; + + ASSERT((matchBegin == -1) || (matchBegin <= matchEnd)); + + if (matchBegin == matchEnd) + return true; + + switch (term.atom.quantityType) { + case QuantifierFixedCount: { + backTrack->begin = input.getPos(); + for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) { + if (!tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) { + input.setPos(backTrack->begin); + return false; + } + } + return true; + } + + case QuantifierGreedy: { + unsigned matchAmount = 0; + while ((matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) + ++matchAmount; + backTrack->matchAmount = matchAmount; + return true; + } + + case QuantifierNonGreedy: + backTrack->begin = input.getPos(); + backTrack->matchAmount = 0; + return true; + } + + ASSERT_NOT_REACHED(); + return false; + } + + bool backtrackBackReference(ByteTerm& term, DisjunctionContext* context) + { + ASSERT(term.type == ByteTerm::TypeBackReference); + BackTrackInfoBackReference* backTrack = reinterpret_cast(context->frame + term.frameLocation); + + int matchBegin = output[(term.atom.subpatternId << 1)]; + int matchEnd = output[(term.atom.subpatternId << 1) + 1]; + ASSERT((matchBegin == -1) || (matchBegin <= matchEnd)); + + if (matchBegin == matchEnd) + return false; + + switch (term.atom.quantityType) { + case QuantifierFixedCount: + // for quantityCount == 1, could rewind. + input.setPos(backTrack->begin); + break; + + case QuantifierGreedy: + if (backTrack->matchAmount) { + --backTrack->matchAmount; + input.rewind(matchEnd - matchBegin); + return true; + } + break; + + case QuantifierNonGreedy: + if ((backTrack->matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) { + ++backTrack->matchAmount; + return true; + } + input.setPos(backTrack->begin); + break; + } + + return false; + } + + void recordParenthesesMatch(ByteTerm& term, ParenthesesDisjunctionContext* context) + { + if (term.capture()) { + unsigned subpatternId = term.atom.subpatternId; + output[(subpatternId << 1)] = context->getDisjunctionContext(term)->matchBegin + term.inputPosition; + output[(subpatternId << 1) + 1] = context->getDisjunctionContext(term)->matchEnd + term.inputPosition; + } + } + void resetMatches(ByteTerm& term, ParenthesesDisjunctionContext* context) + { + unsigned firstSubpatternId = term.atom.subpatternId; + unsigned count = term.atom.parenthesesDisjunction->m_numSubpatterns; + context->restoreOutput(output, firstSubpatternId, count); + } + JSRegExpResult parenthesesDoBacktrack(ByteTerm& term, BackTrackInfoParentheses* backTrack) + { + while (backTrack->matchAmount) { + ParenthesesDisjunctionContext* context = backTrack->lastContext; + + JSRegExpResult result = matchDisjunction(term.atom.parenthesesDisjunction, context->getDisjunctionContext(term), true); + if (result == JSRegExpMatch) + return JSRegExpMatch; + + resetMatches(term, context); + popParenthesesDisjunctionContext(backTrack); + freeParenthesesDisjunctionContext(context); + + if (result != JSRegExpNoMatch) + return result; + } + + return JSRegExpNoMatch; + } + + bool matchParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context) + { + ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin); + ASSERT(term.atom.quantityCount == 1); + + BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast(context->frame + term.frameLocation); + + switch (term.atom.quantityType) { + case QuantifierGreedy: { + // set this speculatively; if we get to the parens end this will be true. + backTrack->begin = input.getPos(); + break; + } + case QuantifierNonGreedy: { + backTrack->begin = notFound; + context->term += term.atom.parenthesesWidth; + return true; + } + case QuantifierFixedCount: + break; + } + + if (term.capture()) { + unsigned subpatternId = term.atom.subpatternId; + output[(subpatternId << 1)] = input.getPos() + term.inputPosition; + } + + return true; + } + + bool matchParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context) + { + ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd); + ASSERT(term.atom.quantityCount == 1); + + if (term.capture()) { + unsigned subpatternId = term.atom.subpatternId; + output[(subpatternId << 1) + 1] = input.getPos() + term.inputPosition; + } + + if (term.atom.quantityType == QuantifierFixedCount) + return true; + + BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast(context->frame + term.frameLocation); + return backTrack->begin != input.getPos(); + } + + bool backtrackParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context) + { + ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin); + ASSERT(term.atom.quantityCount == 1); + + BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast(context->frame + term.frameLocation); + + if (term.capture()) { + unsigned subpatternId = term.atom.subpatternId; + output[(subpatternId << 1)] = -1; + output[(subpatternId << 1) + 1] = -1; + } + + switch (term.atom.quantityType) { + case QuantifierGreedy: + // if we backtrack to this point, there is another chance - try matching nothing. + ASSERT(backTrack->begin != notFound); + backTrack->begin = notFound; + context->term += term.atom.parenthesesWidth; + return true; + case QuantifierNonGreedy: + ASSERT(backTrack->begin != notFound); + case QuantifierFixedCount: + break; + } + + return false; + } + + bool backtrackParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context) + { + ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd); + ASSERT(term.atom.quantityCount == 1); + + BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast(context->frame + term.frameLocation); + + switch (term.atom.quantityType) { + case QuantifierGreedy: + if (backTrack->begin == notFound) { + context->term -= term.atom.parenthesesWidth; + return false; + } + case QuantifierNonGreedy: + if (backTrack->begin == notFound) { + backTrack->begin = input.getPos(); + if (term.capture()) { + // Technically this access to inputPosition should be accessing the begin term's + // inputPosition, but for repeats other than fixed these values should be + // the same anyway! (We don't pre-check for greedy or non-greedy matches.) + ASSERT((&term - term.atom.parenthesesWidth)->type == ByteTerm::TypeParenthesesSubpatternOnceBegin); + ASSERT((&term - term.atom.parenthesesWidth)->inputPosition == term.inputPosition); + unsigned subpatternId = term.atom.subpatternId; + output[subpatternId << 1] = input.getPos() + term.inputPosition; + } + context->term -= term.atom.parenthesesWidth; + return true; + } + case QuantifierFixedCount: + break; + } + + return false; + } + + bool matchParenthesesTerminalBegin(ByteTerm& term, DisjunctionContext* context) + { + ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin); + ASSERT(term.atom.quantityType == QuantifierGreedy); + ASSERT(term.atom.quantityCount == quantifyInfinite); + ASSERT(!term.capture()); + + BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast(context->frame + term.frameLocation); + backTrack->begin = input.getPos(); + return true; + } + + bool matchParenthesesTerminalEnd(ByteTerm& term, DisjunctionContext* context) + { + ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalEnd); + + BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast(context->frame + term.frameLocation); + // Empty match is a failed match. + if (backTrack->begin == input.getPos()) + return false; + + // Successful match! Okay, what's next? - loop around and try to match moar! + context->term -= (term.atom.parenthesesWidth + 1); + return true; + } + + bool backtrackParenthesesTerminalBegin(ByteTerm& term, DisjunctionContext* context) + { + ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin); + ASSERT(term.atom.quantityType == QuantifierGreedy); + ASSERT(term.atom.quantityCount == quantifyInfinite); + ASSERT(!term.capture()); + + // If we backtrack to this point, we have failed to match this iteration of the parens. + // Since this is greedy / zero minimum a failed is also accepted as a match! + context->term += term.atom.parenthesesWidth; + return true; + } + + bool backtrackParenthesesTerminalEnd(ByteTerm&, DisjunctionContext*) + { + // 'Terminal' parentheses are at the end of the regex, and as such a match past end + // should always be returned as a successful match - we should never backtrack to here. + ASSERT_NOT_REACHED(); + return false; + } + + bool matchParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context) + { + ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin); + ASSERT(term.atom.quantityCount == 1); + + BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast(context->frame + term.frameLocation); + + backTrack->begin = input.getPos(); + return true; + } + + bool matchParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context) + { + ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd); + ASSERT(term.atom.quantityCount == 1); + + BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast(context->frame + term.frameLocation); + + input.setPos(backTrack->begin); + + // We've reached the end of the parens; if they are inverted, this is failure. + if (term.invert()) { + context->term -= term.atom.parenthesesWidth; + return false; + } + + return true; + } + + bool backtrackParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context) + { + ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin); + ASSERT(term.atom.quantityCount == 1); + + // We've failed to match parens; if they are inverted, this is win! + if (term.invert()) { + context->term += term.atom.parenthesesWidth; + return true; + } + + return false; + } + + bool backtrackParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context) + { + ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd); + ASSERT(term.atom.quantityCount == 1); + + BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast(context->frame + term.frameLocation); + + input.setPos(backTrack->begin); + + context->term -= term.atom.parenthesesWidth; + return false; + } + + JSRegExpResult matchParentheses(ByteTerm& term, DisjunctionContext* context) + { + ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern); + + BackTrackInfoParentheses* backTrack = reinterpret_cast(context->frame + term.frameLocation); + ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction; + + backTrack->matchAmount = 0; + backTrack->lastContext = 0; + + switch (term.atom.quantityType) { + case QuantifierFixedCount: { + // While we haven't yet reached our fixed limit, + while (backTrack->matchAmount < term.atom.quantityCount) { + // Try to do a match, and it it succeeds, add it to the list. + ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term); + JSRegExpResult result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term)); + if (result == JSRegExpMatch) + appendParenthesesDisjunctionContext(backTrack, context); + else { + // The match failed; try to find an alternate point to carry on from. + resetMatches(term, context); + freeParenthesesDisjunctionContext(context); + + if (result == JSRegExpNoMatch) { + JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack); + if (backtrackResult != JSRegExpMatch) + return backtrackResult; + } else + return result; + } + } + + ASSERT(backTrack->matchAmount == term.atom.quantityCount); + ParenthesesDisjunctionContext* context = backTrack->lastContext; + recordParenthesesMatch(term, context); + return JSRegExpMatch; + } + + case QuantifierGreedy: { + while (backTrack->matchAmount < term.atom.quantityCount) { + ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term); + JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term)); + if (result == JSRegExpMatch) + appendParenthesesDisjunctionContext(backTrack, context); + else { + resetMatches(term, context); + freeParenthesesDisjunctionContext(context); + + if (result != JSRegExpNoMatch) + return result; + + break; + } + } + + if (backTrack->matchAmount) { + ParenthesesDisjunctionContext* context = backTrack->lastContext; + recordParenthesesMatch(term, context); + } + return JSRegExpMatch; + } + + case QuantifierNonGreedy: + return JSRegExpMatch; + } + + ASSERT_NOT_REACHED(); + return JSRegExpErrorNoMatch; + } + + // Rules for backtracking differ depending on whether this is greedy or non-greedy. + // + // Greedy matches never should try just adding more - you should already have done + // the 'more' cases. Always backtrack, at least a leetle bit. However cases where + // you backtrack an item off the list needs checking, since we'll never have matched + // the one less case. Tracking forwards, still add as much as possible. + // + // Non-greedy, we've already done the one less case, so don't match on popping. + // We haven't done the one more case, so always try to add that. + // + JSRegExpResult backtrackParentheses(ByteTerm& term, DisjunctionContext* context) + { + ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern); + + BackTrackInfoParentheses* backTrack = reinterpret_cast(context->frame + term.frameLocation); + ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction; + + switch (term.atom.quantityType) { + case QuantifierFixedCount: { + ASSERT(backTrack->matchAmount == term.atom.quantityCount); + + ParenthesesDisjunctionContext* context = 0; + JSRegExpResult result = parenthesesDoBacktrack(term, backTrack); + + if (result != JSRegExpMatch) + return result; + + // While we haven't yet reached our fixed limit, + while (backTrack->matchAmount < term.atom.quantityCount) { + // Try to do a match, and it it succeeds, add it to the list. + context = allocParenthesesDisjunctionContext(disjunctionBody, output, term); + result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term)); + + if (result == JSRegExpMatch) + appendParenthesesDisjunctionContext(backTrack, context); + else { + // The match failed; try to find an alternate point to carry on from. + resetMatches(term, context); + freeParenthesesDisjunctionContext(context); + + if (result == JSRegExpNoMatch) { + JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack); + if (backtrackResult != JSRegExpMatch) + return backtrackResult; + } else + return result; + } + } + + ASSERT(backTrack->matchAmount == term.atom.quantityCount); + context = backTrack->lastContext; + recordParenthesesMatch(term, context); + return JSRegExpMatch; + } + + case QuantifierGreedy: { + if (!backTrack->matchAmount) + return JSRegExpNoMatch; + + ParenthesesDisjunctionContext* context = backTrack->lastContext; + JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true); + if (result == JSRegExpMatch) { + while (backTrack->matchAmount < term.atom.quantityCount) { + ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term); + JSRegExpResult parenthesesResult = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term)); + if (parenthesesResult == JSRegExpMatch) + appendParenthesesDisjunctionContext(backTrack, context); + else { + resetMatches(term, context); + freeParenthesesDisjunctionContext(context); + + if (parenthesesResult != JSRegExpNoMatch) + return parenthesesResult; + + break; + } + } + } else { + resetMatches(term, context); + popParenthesesDisjunctionContext(backTrack); + freeParenthesesDisjunctionContext(context); + + if (result != JSRegExpNoMatch) + return result; + } + + if (backTrack->matchAmount) { + ParenthesesDisjunctionContext* context = backTrack->lastContext; + recordParenthesesMatch(term, context); + } + return JSRegExpMatch; + } + + case QuantifierNonGreedy: { + // If we've not reached the limit, try to add one more match. + if (backTrack->matchAmount < term.atom.quantityCount) { + ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term); + JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term)); + if (result == JSRegExpMatch) { + appendParenthesesDisjunctionContext(backTrack, context); + recordParenthesesMatch(term, context); + return JSRegExpMatch; + } + + resetMatches(term, context); + freeParenthesesDisjunctionContext(context); + + if (result != JSRegExpNoMatch) + return result; + } + + // Nope - okay backtrack looking for an alternative. + while (backTrack->matchAmount) { + ParenthesesDisjunctionContext* context = backTrack->lastContext; + JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true); + if (result == JSRegExpMatch) { + // successful backtrack! we're back in the game! + if (backTrack->matchAmount) { + context = backTrack->lastContext; + recordParenthesesMatch(term, context); + } + return JSRegExpMatch; + } + + // pop a match off the stack + resetMatches(term, context); + popParenthesesDisjunctionContext(backTrack); + freeParenthesesDisjunctionContext(context); + + return result; + } + + return JSRegExpNoMatch; + } + } + + ASSERT_NOT_REACHED(); + return JSRegExpErrorNoMatch; + } + + void lookupForBeginChars() + { + int character; + bool firstSingleCharFound; + + while (true) { + if (input.isNotAvailableInput(2)) + return; + + firstSingleCharFound = false; + + character = input.readPair(); + + for (unsigned i = 0; i < pattern->m_beginChars.size(); ++i) { + BeginChar bc = pattern->m_beginChars[i]; + + if (!firstSingleCharFound && bc.value <= 0xFFFF) { + firstSingleCharFound = true; + character &= 0xFFFF; + } + + if ((character | bc.mask) == bc.value) + return; + } + + input.next(); + } + } + +#define MATCH_NEXT() { ++context->term; goto matchAgain; } +#define BACKTRACK() { --context->term; goto backtrack; } +#define currentTerm() (disjunction->terms[context->term]) + JSRegExpResult matchDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false, bool isBody = false) + { + if (!--remainingMatchCount) + return JSRegExpErrorHitLimit; + + if (btrack) + BACKTRACK(); + + if (pattern->m_containsBeginChars && isBody) + lookupForBeginChars(); + + context->matchBegin = input.getPos(); + context->term = 0; + + matchAgain: + ASSERT(context->term < static_cast(disjunction->terms.size())); + + switch (currentTerm().type) { + case ByteTerm::TypeSubpatternBegin: + MATCH_NEXT(); + case ByteTerm::TypeSubpatternEnd: + context->matchEnd = input.getPos(); + return JSRegExpMatch; + + case ByteTerm::TypeBodyAlternativeBegin: + MATCH_NEXT(); + case ByteTerm::TypeBodyAlternativeDisjunction: + case ByteTerm::TypeBodyAlternativeEnd: + context->matchEnd = input.getPos(); + return JSRegExpMatch; + + case ByteTerm::TypeAlternativeBegin: + MATCH_NEXT(); + case ByteTerm::TypeAlternativeDisjunction: + case ByteTerm::TypeAlternativeEnd: { + int offset = currentTerm().alternative.end; + BackTrackInfoAlternative* backTrack = reinterpret_cast(context->frame + currentTerm().frameLocation); + backTrack->offset = offset; + context->term += offset; + MATCH_NEXT(); + } + + case ByteTerm::TypeAssertionBOL: + if (matchAssertionBOL(currentTerm())) + MATCH_NEXT(); + BACKTRACK(); + case ByteTerm::TypeAssertionEOL: + if (matchAssertionEOL(currentTerm())) + MATCH_NEXT(); + BACKTRACK(); + case ByteTerm::TypeAssertionWordBoundary: + if (matchAssertionWordBoundary(currentTerm())) + MATCH_NEXT(); + BACKTRACK(); + + case ByteTerm::TypePatternCharacterOnce: + case ByteTerm::TypePatternCharacterFixed: { + for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) { + if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition + matchAmount)) + BACKTRACK(); + } + MATCH_NEXT(); + } + case ByteTerm::TypePatternCharacterGreedy: { + BackTrackInfoPatternCharacter* backTrack = reinterpret_cast(context->frame + currentTerm().frameLocation); + unsigned matchAmount = 0; + while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) { + if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition - 1)) { + input.uncheckInput(1); + break; + } + ++matchAmount; + } + backTrack->matchAmount = matchAmount; + + MATCH_NEXT(); + } + case ByteTerm::TypePatternCharacterNonGreedy: { + BackTrackInfoPatternCharacter* backTrack = reinterpret_cast(context->frame + currentTerm().frameLocation); + backTrack->matchAmount = 0; + MATCH_NEXT(); + } + + case ByteTerm::TypePatternCasedCharacterOnce: + case ByteTerm::TypePatternCasedCharacterFixed: { + for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) { + if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition + matchAmount)) + BACKTRACK(); + } + MATCH_NEXT(); + } + case ByteTerm::TypePatternCasedCharacterGreedy: { + BackTrackInfoPatternCharacter* backTrack = reinterpret_cast(context->frame + currentTerm().frameLocation); + unsigned matchAmount = 0; + while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) { + if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition - 1)) { + input.uncheckInput(1); + break; + } + ++matchAmount; + } + backTrack->matchAmount = matchAmount; + + MATCH_NEXT(); + } + case ByteTerm::TypePatternCasedCharacterNonGreedy: { + BackTrackInfoPatternCharacter* backTrack = reinterpret_cast(context->frame + currentTerm().frameLocation); + backTrack->matchAmount = 0; + MATCH_NEXT(); + } + + case ByteTerm::TypeCharacterClass: + if (matchCharacterClass(currentTerm(), context)) + MATCH_NEXT(); + BACKTRACK(); + case ByteTerm::TypeBackReference: + if (matchBackReference(currentTerm(), context)) + MATCH_NEXT(); + BACKTRACK(); + case ByteTerm::TypeParenthesesSubpattern: { + JSRegExpResult result = matchParentheses(currentTerm(), context); + + if (result == JSRegExpMatch) { + MATCH_NEXT(); + } else if (result != JSRegExpNoMatch) + return result; + + BACKTRACK(); + } + case ByteTerm::TypeParenthesesSubpatternOnceBegin: + if (matchParenthesesOnceBegin(currentTerm(), context)) + MATCH_NEXT(); + BACKTRACK(); + case ByteTerm::TypeParenthesesSubpatternOnceEnd: + if (matchParenthesesOnceEnd(currentTerm(), context)) + MATCH_NEXT(); + BACKTRACK(); + case ByteTerm::TypeParenthesesSubpatternTerminalBegin: + if (matchParenthesesTerminalBegin(currentTerm(), context)) + MATCH_NEXT(); + BACKTRACK(); + case ByteTerm::TypeParenthesesSubpatternTerminalEnd: + if (matchParenthesesTerminalEnd(currentTerm(), context)) + MATCH_NEXT(); + BACKTRACK(); + case ByteTerm::TypeParentheticalAssertionBegin: + if (matchParentheticalAssertionBegin(currentTerm(), context)) + MATCH_NEXT(); + BACKTRACK(); + case ByteTerm::TypeParentheticalAssertionEnd: + if (matchParentheticalAssertionEnd(currentTerm(), context)) + MATCH_NEXT(); + BACKTRACK(); + + case ByteTerm::TypeCheckInput: + if (input.checkInput(currentTerm().checkInputCount)) + MATCH_NEXT(); + BACKTRACK(); + + case ByteTerm::TypeUncheckInput: + input.uncheckInput(currentTerm().checkInputCount); + MATCH_NEXT(); + } + + // We should never fall-through to here. + ASSERT_NOT_REACHED(); + + backtrack: + ASSERT(context->term < static_cast(disjunction->terms.size())); + + switch (currentTerm().type) { + case ByteTerm::TypeSubpatternBegin: + return JSRegExpNoMatch; + case ByteTerm::TypeSubpatternEnd: + ASSERT_NOT_REACHED(); + + case ByteTerm::TypeBodyAlternativeBegin: + case ByteTerm::TypeBodyAlternativeDisjunction: { + int offset = currentTerm().alternative.next; + context->term += offset; + if (offset > 0) + MATCH_NEXT(); + + if (input.atEnd()) + return JSRegExpNoMatch; + + input.next(); + + if (pattern->m_containsBeginChars && isBody) + lookupForBeginChars(); + + context->matchBegin = input.getPos(); + + if (currentTerm().alternative.onceThrough) + context->term += currentTerm().alternative.next; + + MATCH_NEXT(); + } + case ByteTerm::TypeBodyAlternativeEnd: + ASSERT_NOT_REACHED(); + + case ByteTerm::TypeAlternativeBegin: + case ByteTerm::TypeAlternativeDisjunction: { + int offset = currentTerm().alternative.next; + context->term += offset; + if (offset > 0) + MATCH_NEXT(); + BACKTRACK(); + } + case ByteTerm::TypeAlternativeEnd: { + // We should never backtrack back into an alternative of the main body of the regex. + BackTrackInfoAlternative* backTrack = reinterpret_cast(context->frame + currentTerm().frameLocation); + unsigned offset = backTrack->offset; + context->term -= offset; + BACKTRACK(); + } + + case ByteTerm::TypeAssertionBOL: + case ByteTerm::TypeAssertionEOL: + case ByteTerm::TypeAssertionWordBoundary: + BACKTRACK(); + + case ByteTerm::TypePatternCharacterOnce: + case ByteTerm::TypePatternCharacterFixed: + case ByteTerm::TypePatternCharacterGreedy: + case ByteTerm::TypePatternCharacterNonGreedy: + if (backtrackPatternCharacter(currentTerm(), context)) + MATCH_NEXT(); + BACKTRACK(); + case ByteTerm::TypePatternCasedCharacterOnce: + case ByteTerm::TypePatternCasedCharacterFixed: + case ByteTerm::TypePatternCasedCharacterGreedy: + case ByteTerm::TypePatternCasedCharacterNonGreedy: + if (backtrackPatternCasedCharacter(currentTerm(), context)) + MATCH_NEXT(); + BACKTRACK(); + case ByteTerm::TypeCharacterClass: + if (backtrackCharacterClass(currentTerm(), context)) + MATCH_NEXT(); + BACKTRACK(); + case ByteTerm::TypeBackReference: + if (backtrackBackReference(currentTerm(), context)) + MATCH_NEXT(); + BACKTRACK(); + case ByteTerm::TypeParenthesesSubpattern: { + JSRegExpResult result = backtrackParentheses(currentTerm(), context); + + if (result == JSRegExpMatch) { + MATCH_NEXT(); + } else if (result != JSRegExpNoMatch) + return result; + + BACKTRACK(); + } + case ByteTerm::TypeParenthesesSubpatternOnceBegin: + if (backtrackParenthesesOnceBegin(currentTerm(), context)) + MATCH_NEXT(); + BACKTRACK(); + case ByteTerm::TypeParenthesesSubpatternOnceEnd: + if (backtrackParenthesesOnceEnd(currentTerm(), context)) + MATCH_NEXT(); + BACKTRACK(); + case ByteTerm::TypeParenthesesSubpatternTerminalBegin: + if (backtrackParenthesesTerminalBegin(currentTerm(), context)) + MATCH_NEXT(); + BACKTRACK(); + case ByteTerm::TypeParenthesesSubpatternTerminalEnd: + if (backtrackParenthesesTerminalEnd(currentTerm(), context)) + MATCH_NEXT(); + BACKTRACK(); + case ByteTerm::TypeParentheticalAssertionBegin: + if (backtrackParentheticalAssertionBegin(currentTerm(), context)) + MATCH_NEXT(); + BACKTRACK(); + case ByteTerm::TypeParentheticalAssertionEnd: + if (backtrackParentheticalAssertionEnd(currentTerm(), context)) + MATCH_NEXT(); + BACKTRACK(); + + case ByteTerm::TypeCheckInput: + input.uncheckInput(currentTerm().checkInputCount); + BACKTRACK(); + + case ByteTerm::TypeUncheckInput: + input.checkInput(currentTerm().checkInputCount); + BACKTRACK(); + } + + ASSERT_NOT_REACHED(); + return JSRegExpErrorNoMatch; + } + + JSRegExpResult matchNonZeroDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false) + { + JSRegExpResult result = matchDisjunction(disjunction, context, btrack); + + if (result == JSRegExpMatch) { + while (context->matchBegin == context->matchEnd) { + result = matchDisjunction(disjunction, context, true); + if (result != JSRegExpMatch) + return result; + } + return JSRegExpMatch; + } + + return result; + } + + int interpret() + { + allocatorPool = pattern->m_allocator->startAllocator(); + if (!allocatorPool) + CRASH(); + + for (unsigned i = 0; i < ((pattern->m_body->m_numSubpatterns + 1) << 1); ++i) + output[i] = -1; + + DisjunctionContext* context = allocDisjunctionContext(pattern->m_body.get()); + + JSRegExpResult result = matchDisjunction(pattern->m_body.get(), context, false, true); + if (result == JSRegExpMatch) { + output[0] = context->matchBegin; + output[1] = context->matchEnd; + } + + freeDisjunctionContext(context); + + pattern->m_allocator->stopAllocator(); + + // RegExp.cpp currently expects all error to be converted to -1. + ASSERT((result == JSRegExpMatch) == (output[0] != -1)); + return output[0]; + } + + Interpreter(BytecodePattern* pattern, int* output, const UChar* inputChar, unsigned start, unsigned length) + : pattern(pattern) + , output(output) + , input(inputChar, start, length) + , allocatorPool(0) + , remainingMatchCount(matchLimit) + { + } + +private: + BytecodePattern* pattern; + int* output; + InputStream input; + BumpPointerPool* allocatorPool; + unsigned remainingMatchCount; +}; + + + +class ByteCompiler { + struct ParenthesesStackEntry { + unsigned beginTerm; + unsigned savedAlternativeIndex; + // For js::Vector. Does not create a valid object. + ParenthesesStackEntry() {} + ParenthesesStackEntry(unsigned beginTerm, unsigned savedAlternativeIndex/*, unsigned subpatternId, bool capture = false*/) + : beginTerm(beginTerm) + , savedAlternativeIndex(savedAlternativeIndex) + { + } + }; + +public: + ByteCompiler(YarrPattern& pattern) + : m_pattern(pattern) + { + m_currentAlternativeIndex = 0; + } + + PassOwnPtr compile(BumpPointerAllocator* allocator) + { + regexBegin(m_pattern.m_numSubpatterns, m_pattern.m_body->m_callFrameSize, m_pattern.m_body->m_alternatives[0]->onceThrough()); + emitDisjunction(m_pattern.m_body); + regexEnd(); + + return adoptPtr(js::OffTheBooks::new_(m_bodyDisjunction.release(), m_allParenthesesInfo, Ref(m_pattern), allocator)); + } + + void checkInput(unsigned count) + { + m_bodyDisjunction->terms.append(ByteTerm::CheckInput(count)); + } + + void uncheckInput(unsigned count) + { + m_bodyDisjunction->terms.append(ByteTerm::UncheckInput(count)); + } + + void assertionBOL(int inputPosition) + { + m_bodyDisjunction->terms.append(ByteTerm::BOL(inputPosition)); + } + + void assertionEOL(int inputPosition) + { + m_bodyDisjunction->terms.append(ByteTerm::EOL(inputPosition)); + } + + void assertionWordBoundary(bool invert, int inputPosition) + { + m_bodyDisjunction->terms.append(ByteTerm::WordBoundary(invert, inputPosition)); + } + + void atomPatternCharacter(UChar ch, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType) + { + if (m_pattern.m_ignoreCase) { + UChar lo = Unicode::toLower(ch); + UChar hi = Unicode::toUpper(ch); + + if (lo != hi) { + m_bodyDisjunction->terms.append(ByteTerm(lo, hi, inputPosition, frameLocation, quantityCount, quantityType)); + return; + } + } + + m_bodyDisjunction->terms.append(ByteTerm(ch, inputPosition, frameLocation, quantityCount, quantityType)); + } + + void atomCharacterClass(CharacterClass* characterClass, bool invert, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType) + { + m_bodyDisjunction->terms.append(ByteTerm(characterClass, invert, inputPosition)); + + m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount; + m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType; + m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation; + } + + void atomBackReference(unsigned subpatternId, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType) + { + ASSERT(subpatternId); + + m_bodyDisjunction->terms.append(ByteTerm::BackReference(subpatternId, inputPosition)); + + m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount; + m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType; + m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation; + } + + void atomParenthesesOnceBegin(unsigned subpatternId, bool capture, int inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation) + { + int beginTerm = m_bodyDisjunction->terms.size(); + + m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition)); + m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation; + m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin()); + m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation; + + m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex)); + m_currentAlternativeIndex = beginTerm + 1; + } + + void atomParenthesesTerminalBegin(unsigned subpatternId, bool capture, int inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation) + { + int beginTerm = m_bodyDisjunction->terms.size(); + + m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalBegin, subpatternId, capture, false, inputPosition)); + m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation; + m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin()); + m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation; + + m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex)); + m_currentAlternativeIndex = beginTerm + 1; + } + + void atomParenthesesSubpatternBegin(unsigned subpatternId, bool capture, int inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation) + { + // Errrk! - this is a little crazy, we initially generate as a TypeParenthesesSubpatternOnceBegin, + // then fix this up at the end! - simplifying this should make it much clearer. + // https://bugs.webkit.org/show_bug.cgi?id=50136 + + int beginTerm = m_bodyDisjunction->terms.size(); + + m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition)); + m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation; + m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin()); + m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation; + + m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex)); + m_currentAlternativeIndex = beginTerm + 1; + } + + void atomParentheticalAssertionBegin(unsigned subpatternId, bool invert, unsigned frameLocation, unsigned alternativeFrameLocation) + { + int beginTerm = m_bodyDisjunction->terms.size(); + + m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionBegin, subpatternId, false, invert, 0)); + m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation; + m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin()); + m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation; + + m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex)); + m_currentAlternativeIndex = beginTerm + 1; + } + + void atomParentheticalAssertionEnd(int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType) + { + unsigned beginTerm = popParenthesesStack(); + closeAlternative(beginTerm + 1); + unsigned endTerm = m_bodyDisjunction->terms.size(); + + ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParentheticalAssertionBegin); + + bool invert = m_bodyDisjunction->terms[beginTerm].invert(); + unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId; + + m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionEnd, subpatternId, false, invert, inputPosition)); + m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm; + m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm; + m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation; + + m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount; + m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType; + m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount; + m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType; + } + + unsigned popParenthesesStack() + { + ASSERT(m_parenthesesStack.size()); + int stackEnd = m_parenthesesStack.size() - 1; + unsigned beginTerm = m_parenthesesStack[stackEnd].beginTerm; + m_currentAlternativeIndex = m_parenthesesStack[stackEnd].savedAlternativeIndex; + m_parenthesesStack.shrink(stackEnd); + + ASSERT(beginTerm < m_bodyDisjunction->terms.size()); + ASSERT(m_currentAlternativeIndex < m_bodyDisjunction->terms.size()); + + return beginTerm; + } + +#ifndef NDEBUG + void dumpDisjunction(ByteDisjunction* disjunction) + { + printf("ByteDisjunction(%p):\n\t", (void *) disjunction); + for (unsigned i = 0; i < disjunction->terms.size(); ++i) + printf("{ %d } ", disjunction->terms[i].type); + printf("\n"); + } +#endif + + void closeAlternative(int beginTerm) + { + int origBeginTerm = beginTerm; + ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeBegin); + int endIndex = m_bodyDisjunction->terms.size(); + + unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation; + + if (!m_bodyDisjunction->terms[beginTerm].alternative.next) + m_bodyDisjunction->terms.remove(beginTerm); + else { + while (m_bodyDisjunction->terms[beginTerm].alternative.next) { + beginTerm += m_bodyDisjunction->terms[beginTerm].alternative.next; + ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeDisjunction); + m_bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm; + m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation; + } + + m_bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm; + + m_bodyDisjunction->terms.append(ByteTerm::AlternativeEnd()); + m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation; + } + } + + void closeBodyAlternative() + { + int beginTerm = 0; + int origBeginTerm = 0; + ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeBegin); + int endIndex = m_bodyDisjunction->terms.size(); + + unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation; + + while (m_bodyDisjunction->terms[beginTerm].alternative.next) { + beginTerm += m_bodyDisjunction->terms[beginTerm].alternative.next; + ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeDisjunction); + m_bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm; + m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation; + } + + m_bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm; + + m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeEnd()); + m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation; + } + + void atomParenthesesSubpatternEnd(unsigned lastSubpatternId, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType, unsigned callFrameSize = 0) + { + unsigned beginTerm = popParenthesesStack(); + closeAlternative(beginTerm + 1); + unsigned endTerm = m_bodyDisjunction->terms.size(); + + ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin); + + ByteTerm& parenthesesBegin = m_bodyDisjunction->terms[beginTerm]; + + bool capture = parenthesesBegin.capture(); + unsigned subpatternId = parenthesesBegin.atom.subpatternId; + + unsigned numSubpatterns = lastSubpatternId - subpatternId + 1; + ByteDisjunction* parenthesesDisjunction = js::OffTheBooks::new_(numSubpatterns, callFrameSize); + + parenthesesDisjunction->terms.append(ByteTerm::SubpatternBegin()); + for (unsigned termInParentheses = beginTerm + 1; termInParentheses < endTerm; ++termInParentheses) + parenthesesDisjunction->terms.append(m_bodyDisjunction->terms[termInParentheses]); + parenthesesDisjunction->terms.append(ByteTerm::SubpatternEnd()); + + m_bodyDisjunction->terms.shrink(beginTerm); + + m_allParenthesesInfo.append(parenthesesDisjunction); + m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction, capture, inputPosition)); + + m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount; + m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType; + m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation; + } + + void atomParenthesesOnceEnd(int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType) + { + unsigned beginTerm = popParenthesesStack(); + closeAlternative(beginTerm + 1); + unsigned endTerm = m_bodyDisjunction->terms.size(); + + ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin); + + bool capture = m_bodyDisjunction->terms[beginTerm].capture(); + unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId; + + m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceEnd, subpatternId, capture, false, inputPosition)); + m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm; + m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm; + m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation; + + m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount; + m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType; + m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount; + m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType; + } + + void atomParenthesesTerminalEnd(int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType) + { + unsigned beginTerm = popParenthesesStack(); + closeAlternative(beginTerm + 1); + unsigned endTerm = m_bodyDisjunction->terms.size(); + + ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternTerminalBegin); + + bool capture = m_bodyDisjunction->terms[beginTerm].capture(); + unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId; + + m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalEnd, subpatternId, capture, false, inputPosition)); + m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm; + m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm; + m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation; + + m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount; + m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType; + m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount; + m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType; + } + + void regexBegin(unsigned numSubpatterns, unsigned callFrameSize, bool onceThrough) + { + m_bodyDisjunction = adoptPtr(js::OffTheBooks::new_(numSubpatterns, callFrameSize)); + m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeBegin(onceThrough)); + m_bodyDisjunction->terms[0].frameLocation = 0; + m_currentAlternativeIndex = 0; + } + + void regexEnd() + { + closeBodyAlternative(); + } + + void alternativeBodyDisjunction(bool onceThrough) + { + int newAlternativeIndex = m_bodyDisjunction->terms.size(); + m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex; + m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeDisjunction(onceThrough)); + + m_currentAlternativeIndex = newAlternativeIndex; + } + + void alternativeDisjunction() + { + int newAlternativeIndex = m_bodyDisjunction->terms.size(); + m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex; + m_bodyDisjunction->terms.append(ByteTerm::AlternativeDisjunction()); + + m_currentAlternativeIndex = newAlternativeIndex; + } + + void emitDisjunction(PatternDisjunction* disjunction, unsigned inputCountAlreadyChecked = 0, unsigned parenthesesInputCountAlreadyChecked = 0) + { + for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) { + unsigned currentCountAlreadyChecked = inputCountAlreadyChecked; + + PatternAlternative* alternative = disjunction->m_alternatives[alt]; + + if (alt) { + if (disjunction == m_pattern.m_body) + alternativeBodyDisjunction(alternative->onceThrough()); + else + alternativeDisjunction(); + } + + unsigned minimumSize = alternative->m_minimumSize; + int countToCheck = minimumSize - parenthesesInputCountAlreadyChecked; + + ASSERT(countToCheck >= 0); + if (countToCheck) { + checkInput(countToCheck); + currentCountAlreadyChecked += countToCheck; + } + + for (unsigned i = 0; i < alternative->m_terms.size(); ++i) { + PatternTerm& term = alternative->m_terms[i]; + + switch (term.type) { + case PatternTerm::TypeAssertionBOL: + assertionBOL(term.inputPosition - currentCountAlreadyChecked); + break; + + case PatternTerm::TypeAssertionEOL: + assertionEOL(term.inputPosition - currentCountAlreadyChecked); + break; + + case PatternTerm::TypeAssertionWordBoundary: + assertionWordBoundary(term.invert(), term.inputPosition - currentCountAlreadyChecked); + break; + + case PatternTerm::TypePatternCharacter: + atomPatternCharacter(term.patternCharacter, term.inputPosition - currentCountAlreadyChecked, term.frameLocation, term.quantityCount, term.quantityType); + break; + + case PatternTerm::TypeCharacterClass: + atomCharacterClass(term.characterClass, term.invert(), term.inputPosition - currentCountAlreadyChecked, term.frameLocation, term.quantityCount, term.quantityType); + break; + + case PatternTerm::TypeBackReference: + atomBackReference(term.backReferenceSubpatternId, term.inputPosition - currentCountAlreadyChecked, term.frameLocation, term.quantityCount, term.quantityType); + break; + + case PatternTerm::TypeForwardReference: + break; + + case PatternTerm::TypeParenthesesSubpattern: { + unsigned disjunctionAlreadyCheckedCount = 0; + if (term.quantityCount == 1 && !term.parentheses.isCopy) { + unsigned alternativeFrameLocation = term.frameLocation; + // For QuantifierFixedCount we pre-check the minimum size; for greedy/non-greedy we reserve a slot in the frame. + if (term.quantityType == QuantifierFixedCount) + disjunctionAlreadyCheckedCount = term.parentheses.disjunction->m_minimumSize; + else + alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce; + unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked; + atomParenthesesOnceBegin(term.parentheses.subpatternId, term.capture(), delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, alternativeFrameLocation); + emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount); + atomParenthesesOnceEnd(delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType); + } else if (term.parentheses.isTerminal) { + unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked; + atomParenthesesTerminalBegin(term.parentheses.subpatternId, term.capture(), delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, term.frameLocation + YarrStackSpaceForBackTrackInfoParenthesesOnce); + emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount); + atomParenthesesTerminalEnd(delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType); + } else { + unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked; + atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.capture(), delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, 0); + emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, 0); + atomParenthesesSubpatternEnd(term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize); + } + break; + } + + case PatternTerm::TypeParentheticalAssertion: { + unsigned alternativeFrameLocation = term.frameLocation + YarrStackSpaceForBackTrackInfoParentheticalAssertion; + + ASSERT(currentCountAlreadyChecked >= static_cast(term.inputPosition)); + int positiveInputOffset = currentCountAlreadyChecked - term.inputPosition; + int uncheckAmount = positiveInputOffset - term.parentheses.disjunction->m_minimumSize; + + if (uncheckAmount > 0) { + uncheckInput(uncheckAmount); + currentCountAlreadyChecked -= uncheckAmount; + } else + uncheckAmount = 0; + + atomParentheticalAssertionBegin(term.parentheses.subpatternId, term.invert(), term.frameLocation, alternativeFrameLocation); + emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, positiveInputOffset - uncheckAmount); + atomParentheticalAssertionEnd(0, term.frameLocation, term.quantityCount, term.quantityType); + if (uncheckAmount) { + checkInput(uncheckAmount); + currentCountAlreadyChecked += uncheckAmount; + } + break; + } + } + } + } + } + +private: + YarrPattern& m_pattern; + OwnPtr m_bodyDisjunction; + unsigned m_currentAlternativeIndex; + Vector m_parenthesesStack; + Vector m_allParenthesesInfo; +}; + +PassOwnPtr byteCompile(YarrPattern& pattern, BumpPointerAllocator* allocator) +{ + return ByteCompiler(pattern).compile(allocator); +} + +int interpret(BytecodePattern* bytecode, const UChar* input, unsigned start, unsigned length, int* output) +{ + return Interpreter(bytecode, output, input, start, length).interpret(); +} + +COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoPatternCharacter) == (YarrStackSpaceForBackTrackInfoPatternCharacter * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoPatternCharacter); +COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoCharacterClass) == (YarrStackSpaceForBackTrackInfoCharacterClass * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoCharacterClass); +COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoBackReference) == (YarrStackSpaceForBackTrackInfoBackReference * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoBackReference); +COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoAlternative) == (YarrStackSpaceForBackTrackInfoAlternative * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoAlternative); +COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParentheticalAssertion) == (YarrStackSpaceForBackTrackInfoParentheticalAssertion * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheticalAssertion); +COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParenthesesOnce) == (YarrStackSpaceForBackTrackInfoParenthesesOnce * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParenthesesOnce); +COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParentheses) == (YarrStackSpaceForBackTrackInfoParentheses * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheses); + + +} } diff --git a/js/src/yarr/YarrInterpreter.h b/js/src/yarr/YarrInterpreter.h new file mode 100644 index 000000000000..32b72858cad1 --- /dev/null +++ b/js/src/yarr/YarrInterpreter.h @@ -0,0 +1,380 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- + * vim: set ts=8 sw=4 et tw=99 ft=cpp: + * + * ***** BEGIN LICENSE BLOCK ***** + * Copyright (C) 2009, 2010 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ***** END LICENSE BLOCK ***** */ + +#ifndef YarrInterpreter_h +#define YarrInterpreter_h + +#include "YarrPattern.h" + +namespace WTF { +class BumpPointerAllocator; +} +using WTF::BumpPointerAllocator; + +namespace JSC { namespace Yarr { + +class ByteDisjunction; + +struct ByteTerm { + enum Type { + TypeBodyAlternativeBegin, + TypeBodyAlternativeDisjunction, + TypeBodyAlternativeEnd, + TypeAlternativeBegin, + TypeAlternativeDisjunction, + TypeAlternativeEnd, + TypeSubpatternBegin, + TypeSubpatternEnd, + TypeAssertionBOL, + TypeAssertionEOL, + TypeAssertionWordBoundary, + TypePatternCharacterOnce, + TypePatternCharacterFixed, + TypePatternCharacterGreedy, + TypePatternCharacterNonGreedy, + TypePatternCasedCharacterOnce, + TypePatternCasedCharacterFixed, + TypePatternCasedCharacterGreedy, + TypePatternCasedCharacterNonGreedy, + TypeCharacterClass, + TypeBackReference, + TypeParenthesesSubpattern, + TypeParenthesesSubpatternOnceBegin, + TypeParenthesesSubpatternOnceEnd, + TypeParenthesesSubpatternTerminalBegin, + TypeParenthesesSubpatternTerminalEnd, + TypeParentheticalAssertionBegin, + TypeParentheticalAssertionEnd, + TypeCheckInput, + TypeUncheckInput + } type; + union { + struct { + union { + UChar patternCharacter; + struct { + UChar lo; + UChar hi; + } casedCharacter; + CharacterClass* characterClass; + unsigned subpatternId; + }; + union { + ByteDisjunction* parenthesesDisjunction; + unsigned parenthesesWidth; + }; + QuantifierType quantityType; + unsigned quantityCount; + } atom; + struct { + int next; + int end; + bool onceThrough; + } alternative; + unsigned checkInputCount; + }; + unsigned frameLocation; + bool m_capture : 1; + bool m_invert : 1; + int inputPosition; + + // For js::Vector. Does not create a valid object. + ByteTerm() + { + } + + ByteTerm(UChar ch, int inputPos, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType) + : frameLocation(frameLocation) + , m_capture(false) + , m_invert(false) + { + switch (quantityType) { + case QuantifierFixedCount: + type = (quantityCount == 1) ? ByteTerm::TypePatternCharacterOnce : ByteTerm::TypePatternCharacterFixed; + break; + case QuantifierGreedy: + type = ByteTerm::TypePatternCharacterGreedy; + break; + case QuantifierNonGreedy: + type = ByteTerm::TypePatternCharacterNonGreedy; + break; + } + + atom.patternCharacter = ch; + atom.quantityType = quantityType; + atom.quantityCount = quantityCount; + inputPosition = inputPos; + } + + ByteTerm(UChar lo, UChar hi, int inputPos, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType) + : frameLocation(frameLocation) + , m_capture(false) + , m_invert(false) + { + switch (quantityType) { + case QuantifierFixedCount: + type = (quantityCount == 1) ? ByteTerm::TypePatternCasedCharacterOnce : ByteTerm::TypePatternCasedCharacterFixed; + break; + case QuantifierGreedy: + type = ByteTerm::TypePatternCasedCharacterGreedy; + break; + case QuantifierNonGreedy: + type = ByteTerm::TypePatternCasedCharacterNonGreedy; + break; + } + + atom.casedCharacter.lo = lo; + atom.casedCharacter.hi = hi; + atom.quantityType = quantityType; + atom.quantityCount = quantityCount; + inputPosition = inputPos; + } + + ByteTerm(CharacterClass* characterClass, bool invert, int inputPos) + : type(ByteTerm::TypeCharacterClass) + , m_capture(false) + , m_invert(invert) + { + atom.characterClass = characterClass; + atom.quantityType = QuantifierFixedCount; + atom.quantityCount = 1; + inputPosition = inputPos; + } + + ByteTerm(Type type, unsigned subpatternId, ByteDisjunction* parenthesesInfo, bool capture, int inputPos) + : type(type) + , m_capture(capture) + , m_invert(false) + { + atom.subpatternId = subpatternId; + atom.parenthesesDisjunction = parenthesesInfo; + atom.quantityType = QuantifierFixedCount; + atom.quantityCount = 1; + inputPosition = inputPos; + } + + ByteTerm(Type type, bool invert = false) + : type(type) + , m_capture(false) + , m_invert(invert) + { + atom.quantityType = QuantifierFixedCount; + atom.quantityCount = 1; + } + + ByteTerm(Type type, unsigned subpatternId, bool capture, bool invert, int inputPos) + : type(type) + , m_capture(capture) + , m_invert(invert) + { + atom.subpatternId = subpatternId; + atom.quantityType = QuantifierFixedCount; + atom.quantityCount = 1; + inputPosition = inputPos; + } + + static ByteTerm BOL(int inputPos) + { + ByteTerm term(TypeAssertionBOL); + term.inputPosition = inputPos; + return term; + } + + static ByteTerm CheckInput(unsigned count) + { + ByteTerm term(TypeCheckInput); + term.checkInputCount = count; + return term; + } + + static ByteTerm UncheckInput(unsigned count) + { + ByteTerm term(TypeUncheckInput); + term.checkInputCount = count; + return term; + } + + static ByteTerm EOL(int inputPos) + { + ByteTerm term(TypeAssertionEOL); + term.inputPosition = inputPos; + return term; + } + + static ByteTerm WordBoundary(bool invert, int inputPos) + { + ByteTerm term(TypeAssertionWordBoundary, invert); + term.inputPosition = inputPos; + return term; + } + + static ByteTerm BackReference(unsigned subpatternId, int inputPos) + { + return ByteTerm(TypeBackReference, subpatternId, false, false, inputPos); + } + + static ByteTerm BodyAlternativeBegin(bool onceThrough) + { + ByteTerm term(TypeBodyAlternativeBegin); + term.alternative.next = 0; + term.alternative.end = 0; + term.alternative.onceThrough = onceThrough; + return term; + } + + static ByteTerm BodyAlternativeDisjunction(bool onceThrough) + { + ByteTerm term(TypeBodyAlternativeDisjunction); + term.alternative.next = 0; + term.alternative.end = 0; + term.alternative.onceThrough = onceThrough; + return term; + } + + static ByteTerm BodyAlternativeEnd() + { + ByteTerm term(TypeBodyAlternativeEnd); + term.alternative.next = 0; + term.alternative.end = 0; + term.alternative.onceThrough = false; + return term; + } + + static ByteTerm AlternativeBegin() + { + ByteTerm term(TypeAlternativeBegin); + term.alternative.next = 0; + term.alternative.end = 0; + term.alternative.onceThrough = false; + return term; + } + + static ByteTerm AlternativeDisjunction() + { + ByteTerm term(TypeAlternativeDisjunction); + term.alternative.next = 0; + term.alternative.end = 0; + term.alternative.onceThrough = false; + return term; + } + + static ByteTerm AlternativeEnd() + { + ByteTerm term(TypeAlternativeEnd); + term.alternative.next = 0; + term.alternative.end = 0; + term.alternative.onceThrough = false; + return term; + } + + static ByteTerm SubpatternBegin() + { + return ByteTerm(TypeSubpatternBegin); + } + + static ByteTerm SubpatternEnd() + { + return ByteTerm(TypeSubpatternEnd); + } + + bool invert() + { + return m_invert; + } + + bool capture() + { + return m_capture; + } +}; + +class ByteDisjunction { + WTF_MAKE_FAST_ALLOCATED +public: + ByteDisjunction(unsigned numSubpatterns, unsigned frameSize) + : m_numSubpatterns(numSubpatterns) + , m_frameSize(frameSize) + { + } + + Vector terms; + unsigned m_numSubpatterns; + unsigned m_frameSize; +}; + +struct BytecodePattern { + WTF_MAKE_FAST_ALLOCATED +public: + BytecodePattern(PassOwnPtr body, Vector allParenthesesInfo, YarrPattern& pattern, BumpPointerAllocator* allocator) + : m_body(body) + , m_ignoreCase(pattern.m_ignoreCase) + , m_multiline(pattern.m_multiline) + , m_containsBeginChars(pattern.m_containsBeginChars) + , m_allocator(allocator) + { + newlineCharacterClass = pattern.newlineCharacterClass(); + wordcharCharacterClass = pattern.wordcharCharacterClass(); + + m_allParenthesesInfo.append(allParenthesesInfo); + m_userCharacterClasses.append(pattern.m_userCharacterClasses); + // 'Steal' the YarrPattern's CharacterClasses! We clear its + // array, so that it won't delete them on destruction. We'll + // take responsibility for that. + pattern.m_userCharacterClasses.clear(); + + m_beginChars.append(pattern.m_beginChars); + } + + ~BytecodePattern() + { + deleteAllValues(m_allParenthesesInfo); + deleteAllValues(m_userCharacterClasses); + } + + OwnPtr m_body; + bool m_ignoreCase; + bool m_multiline; + bool m_containsBeginChars; + // Each BytecodePattern is associated with a RegExp, each RegExp is associated + // with a JSGlobalData. Cache a pointer to out JSGlobalData's m_regExpAllocator. + BumpPointerAllocator* m_allocator; + + CharacterClass* newlineCharacterClass; + CharacterClass* wordcharCharacterClass; + + Vector m_beginChars; + +private: + Vector m_allParenthesesInfo; + Vector m_userCharacterClasses; +}; + +} } // namespace JSC::Yarr + +#endif // YarrInterpreter_h diff --git a/js/src/yarr/YarrJIT.cpp b/js/src/yarr/YarrJIT.cpp new file mode 100644 index 000000000000..c0187f240b6d --- /dev/null +++ b/js/src/yarr/YarrJIT.cpp @@ -0,0 +1,2405 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- + * vim: set ts=8 sw=4 et tw=99 ft=cpp: + * + * ***** BEGIN LICENSE BLOCK ***** + * Copyright (C) 2009 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ***** END LICENSE BLOCK ***** */ + +#include "YarrJIT.h" + +#include "assembler/assembler/LinkBuffer.h" +#include "Yarr.h" + +#if ENABLE_YARR_JIT + +using namespace WTF; + +namespace JSC { namespace Yarr { + +class YarrGenerator : private MacroAssembler { + friend void jitCompile(JSGlobalData*, YarrCodeBlock& jitObject, const UString& pattern, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline); + +#if WTF_CPU_ARM + static const RegisterID input = ARMRegisters::r0; + static const RegisterID index = ARMRegisters::r1; + static const RegisterID length = ARMRegisters::r2; + static const RegisterID output = ARMRegisters::r4; + + static const RegisterID regT0 = ARMRegisters::r5; + static const RegisterID regT1 = ARMRegisters::r6; + + static const RegisterID returnRegister = ARMRegisters::r0; +#elif WTF_CPU_MIPS + static const RegisterID input = MIPSRegisters::a0; + static const RegisterID index = MIPSRegisters::a1; + static const RegisterID length = MIPSRegisters::a2; + static const RegisterID output = MIPSRegisters::a3; + + static const RegisterID regT0 = MIPSRegisters::t4; + static const RegisterID regT1 = MIPSRegisters::t5; + + static const RegisterID returnRegister = MIPSRegisters::v0; +#elif WTF_CPU_SH4 + static const RegisterID input = SH4Registers::r4; + static const RegisterID index = SH4Registers::r5; + static const RegisterID length = SH4Registers::r6; + static const RegisterID output = SH4Registers::r7; + + static const RegisterID regT0 = SH4Registers::r0; + static const RegisterID regT1 = SH4Registers::r1; + + static const RegisterID returnRegister = SH4Registers::r0; +#elif WTF_CPU_X86 + static const RegisterID input = X86Registers::eax; + static const RegisterID index = X86Registers::edx; + static const RegisterID length = X86Registers::ecx; + static const RegisterID output = X86Registers::edi; + + static const RegisterID regT0 = X86Registers::ebx; + static const RegisterID regT1 = X86Registers::esi; + + static const RegisterID returnRegister = X86Registers::eax; +#elif WTF_CPU_X86_64 + static const RegisterID input = X86Registers::edi; + static const RegisterID index = X86Registers::esi; + static const RegisterID length = X86Registers::edx; + static const RegisterID output = X86Registers::ecx; + + static const RegisterID regT0 = X86Registers::eax; + static const RegisterID regT1 = X86Registers::ebx; + + static const RegisterID returnRegister = X86Registers::eax; +#endif + + void optimizeAlternative(PatternAlternative* alternative) + { + if (!alternative->m_terms.size()) + return; + + for (unsigned i = 0; i < alternative->m_terms.size() - 1; ++i) { + PatternTerm& term = alternative->m_terms[i]; + PatternTerm& nextTerm = alternative->m_terms[i + 1]; + + if ((term.type == PatternTerm::TypeCharacterClass) + && (term.quantityType == QuantifierFixedCount) + && (nextTerm.type == PatternTerm::TypePatternCharacter) + && (nextTerm.quantityType == QuantifierFixedCount)) { + PatternTerm termCopy = term; + alternative->m_terms[i] = nextTerm; + alternative->m_terms[i + 1] = termCopy; + } + } + } + + void matchCharacterClassRange(RegisterID character, JumpList& failures, JumpList& matchDest, const CharacterRange* ranges, unsigned count, unsigned* matchIndex, const UChar* matches, unsigned matchCount) + { + do { + // pick which range we're going to generate + int which = count >> 1; + char lo = ranges[which].begin; + char hi = ranges[which].end; + + // check if there are any ranges or matches below lo. If not, just jl to failure - + // if there is anything else to check, check that first, if it falls through jmp to failure. + if ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) { + Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo)); + + // generate code for all ranges before this one + if (which) + matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount); + + while ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) { + matchDest.append(branch32(Equal, character, Imm32((unsigned short)matches[*matchIndex]))); + ++*matchIndex; + } + failures.append(jump()); + + loOrAbove.link(this); + } else if (which) { + Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo)); + + matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount); + failures.append(jump()); + + loOrAbove.link(this); + } else + failures.append(branch32(LessThan, character, Imm32((unsigned short)lo))); + + while ((*matchIndex < matchCount) && (matches[*matchIndex] <= hi)) + ++*matchIndex; + + matchDest.append(branch32(LessThanOrEqual, character, Imm32((unsigned short)hi))); + // fall through to here, the value is above hi. + + // shuffle along & loop around if there are any more matches to handle. + unsigned next = which + 1; + ranges += next; + count -= next; + } while (count); + } + + void matchCharacterClass(RegisterID character, JumpList& matchDest, const CharacterClass* charClass) + { + if (charClass->m_table) { + ExtendedAddress tableEntry(character, reinterpret_cast(charClass->m_table->m_table)); + matchDest.append(branchTest8(charClass->m_table->m_inverted ? Zero : NonZero, tableEntry)); + return; + } + Jump unicodeFail; + if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size()) { + Jump isAscii = branch32(LessThanOrEqual, character, TrustedImm32(0x7f)); + + if (charClass->m_matchesUnicode.size()) { + for (unsigned i = 0; i < charClass->m_matchesUnicode.size(); ++i) { + UChar ch = charClass->m_matchesUnicode[i]; + matchDest.append(branch32(Equal, character, Imm32(ch))); + } + } + + if (charClass->m_rangesUnicode.size()) { + for (unsigned i = 0; i < charClass->m_rangesUnicode.size(); ++i) { + UChar lo = charClass->m_rangesUnicode[i].begin; + UChar hi = charClass->m_rangesUnicode[i].end; + + Jump below = branch32(LessThan, character, Imm32(lo)); + matchDest.append(branch32(LessThanOrEqual, character, Imm32(hi))); + below.link(this); + } + } + + unicodeFail = jump(); + isAscii.link(this); + } + + if (charClass->m_ranges.size()) { + unsigned matchIndex = 0; + JumpList failures; + matchCharacterClassRange(character, failures, matchDest, charClass->m_ranges.begin(), charClass->m_ranges.size(), &matchIndex, charClass->m_matches.begin(), charClass->m_matches.size()); + while (matchIndex < charClass->m_matches.size()) + matchDest.append(branch32(Equal, character, Imm32((unsigned short)charClass->m_matches[matchIndex++]))); + + failures.link(this); + } else if (charClass->m_matches.size()) { + // optimization: gather 'a','A' etc back together, can mask & test once. + Vector matchesAZaz; + + for (unsigned i = 0; i < charClass->m_matches.size(); ++i) { + char ch = charClass->m_matches[i]; + if (m_pattern.m_ignoreCase) { + if (isASCIILower(ch)) { + matchesAZaz.append(ch); + continue; + } + if (isASCIIUpper(ch)) + continue; + } + matchDest.append(branch32(Equal, character, Imm32((unsigned short)ch))); + } + + if (unsigned countAZaz = matchesAZaz.size()) { + or32(TrustedImm32(32), character); + for (unsigned i = 0; i < countAZaz; ++i) + matchDest.append(branch32(Equal, character, TrustedImm32(matchesAZaz[i]))); + } + } + + if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size()) + unicodeFail.link(this); + } + + // Jumps if input not available; will have (incorrectly) incremented already! + Jump jumpIfNoAvailableInput(unsigned countToCheck = 0) + { + if (countToCheck) + add32(Imm32(countToCheck), index); + return branch32(Above, index, length); + } + + Jump jumpIfAvailableInput(unsigned countToCheck) + { + add32(Imm32(countToCheck), index); + return branch32(BelowOrEqual, index, length); + } + + Jump checkInput() + { + return branch32(BelowOrEqual, index, length); + } + + Jump atEndOfInput() + { + return branch32(Equal, index, length); + } + + Jump notAtEndOfInput() + { + return branch32(NotEqual, index, length); + } + + Jump jumpIfCharEquals(UChar ch, int inputPosition) + { + return branch16(Equal, BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), Imm32(ch)); + } + + Jump jumpIfCharNotEquals(UChar ch, int inputPosition) + { + return branch16(NotEqual, BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), Imm32(ch)); + } + + void readCharacter(int inputPosition, RegisterID reg) + { + load16(BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), reg); + } + + void storeToFrame(RegisterID reg, unsigned frameLocation) + { + poke(reg, frameLocation); + } + + void storeToFrame(TrustedImm32 imm, unsigned frameLocation) + { + poke(imm, frameLocation); + } + + DataLabelPtr storeToFrameWithPatch(unsigned frameLocation) + { + return storePtrWithPatch(TrustedImmPtr(0), Address(stackPointerRegister, frameLocation * sizeof(void*))); + } + + void loadFromFrame(unsigned frameLocation, RegisterID reg) + { + peek(reg, frameLocation); + } + + void loadFromFrameAndJump(unsigned frameLocation) + { + jump(Address(stackPointerRegister, frameLocation * sizeof(void*))); + } + + enum YarrOpCode { + // These nodes wrap body alternatives - those in the main disjunction, + // rather than subpatterns or assertions. These are chained together in + // a doubly linked list, with a 'begin' node for the first alternative, + // a 'next' node for each subsequent alternative, and an 'end' node at + // the end. In the case of repeating alternatives, the 'end' node also + // has a reference back to 'begin'. + OpBodyAlternativeBegin, + OpBodyAlternativeNext, + OpBodyAlternativeEnd, + // Similar to the body alternatives, but used for subpatterns with two + // or more alternatives. + OpNestedAlternativeBegin, + OpNestedAlternativeNext, + OpNestedAlternativeEnd, + // Used for alternatives in subpatterns where there is only a single + // alternative (backtrackingis easier in these cases), or for alternatives + // which never need to be backtracked (those in parenthetical assertions, + // terminal subpatterns). + OpSimpleNestedAlternativeBegin, + OpSimpleNestedAlternativeNext, + OpSimpleNestedAlternativeEnd, + // Used to wrap 'Once' subpattern matches (quantityCount == 1). + OpParenthesesSubpatternOnceBegin, + OpParenthesesSubpatternOnceEnd, + // Used to wrap 'Terminal' subpattern matches (at the end of the regexp). + OpParenthesesSubpatternTerminalBegin, + OpParenthesesSubpatternTerminalEnd, + // Used to wrap parenthetical assertions. + OpParentheticalAssertionBegin, + OpParentheticalAssertionEnd, + // Wraps all simple terms (pattern characters, character classes). + OpTerm, + // Where an expression contains only 'once through' body alternatives + // and no repeating ones, this op is used to return match failure. + OpMatchFailed + }; + + // This structure is used to hold the compiled opcode information, + // including reference back to the original PatternTerm/PatternAlternatives, + // and JIT compilation data structures. + struct YarrOp { + explicit YarrOp(PatternTerm* term) + : m_op(OpTerm) + , m_term(term) + , m_isDeadCode(false) + { + } + + explicit YarrOp(YarrOpCode op) + : m_op(op) + , m_isDeadCode(false) + { + } + + // The operation, as a YarrOpCode, and also a reference to the PatternTerm. + YarrOpCode m_op; + PatternTerm* m_term; + + // For alternatives, this holds the PatternAlternative and doubly linked + // references to this alternative's siblings. In the case of the + // OpBodyAlternativeEnd node at the end of a section of repeating nodes, + // m_nextOp will reference the OpBodyAlternativeBegin node of the first + // repeating alternative. + PatternAlternative* m_alternative; + size_t m_previousOp; + size_t m_nextOp; + + // Used to record a set of Jumps out of the generated code, typically + // used for jumps out to backtracking code, and a single reentry back + // into the code for a node (likely where a backtrack will trigger + // rematching). + Label m_reentry; + JumpList m_jumps; + + // This flag is used to null out the second pattern character, when + // two are fused to match a pair together. + bool m_isDeadCode; + + // Currently used in the case of some of the more complex management of + // 'm_checked', to cache the offset used in this alternative, to avoid + // recalculating it. + int m_checkAdjust; + + // Used by OpNestedAlternativeNext/End to hold the pointer to the + // value that will be pushed into the pattern's frame to return to, + // upon backtracking back into the disjunction. + DataLabelPtr m_returnAddress; + }; + + // BacktrackingState + // This class encapsulates information about the state of code generation + // whilst generating the code for backtracking, when a term fails to match. + // Upon entry to code generation of the backtracking code for a given node, + // the Backtracking state will hold references to all control flow sources + // that are outputs in need of further backtracking from the prior node + // generated (which is the subsequent operation in the regular expression, + // and in the m_ops Vector, since we generated backtracking backwards). + // These references to control flow take the form of: + // - A jump list of jumps, to be linked to code that will backtrack them + // further. + // - A set of DataLabelPtr values, to be populated with values to be + // treated effectively as return addresses backtracking into complex + // subpatterns. + // - A flag indicating that the current sequence of generated code up to + // this point requires backtracking. + class BacktrackingState { + public: + BacktrackingState() + : m_pendingFallthrough(false) + { + } + + // Add a jump or jumps, a return address, or set the flag indicating + // that the current 'fallthrough' control flow requires backtracking. + void append(const Jump& jump) + { + m_laterFailures.append(jump); + } + void append(JumpList& jumpList) + { + m_laterFailures.append(jumpList); + } + void append(const DataLabelPtr& returnAddress) + { + m_pendingReturns.append(returnAddress); + } + void fallthrough() + { + ASSERT(!m_pendingFallthrough); + m_pendingFallthrough = true; + } + + // These methods clear the backtracking state, either linking to the + // current location, a provided label, or copying the backtracking out + // to a JumpList. All actions may require code generation to take place, + // and as such are passed a pointer to the assembler. + void link(MacroAssembler* assembler) + { + if (m_pendingReturns.size()) { + Label here(assembler); + for (unsigned i = 0; i < m_pendingReturns.size(); ++i) + m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], here)); + m_pendingReturns.clear(); + } + m_laterFailures.link(assembler); + m_laterFailures.clear(); + m_pendingFallthrough = false; + } + void linkTo(Label label, MacroAssembler* assembler) + { + if (m_pendingReturns.size()) { + for (unsigned i = 0; i < m_pendingReturns.size(); ++i) + m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], label)); + m_pendingReturns.clear(); + } + if (m_pendingFallthrough) + assembler->jump(label); + m_laterFailures.linkTo(label, assembler); + m_laterFailures.clear(); + m_pendingFallthrough = false; + } + void takeBacktracksToJumpList(JumpList& jumpList, MacroAssembler* assembler) + { + if (m_pendingReturns.size()) { + Label here(assembler); + for (unsigned i = 0; i < m_pendingReturns.size(); ++i) + m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], here)); + m_pendingReturns.clear(); + m_pendingFallthrough = true; + } + if (m_pendingFallthrough) + jumpList.append(assembler->jump()); + jumpList.append(m_laterFailures); + m_laterFailures.clear(); + m_pendingFallthrough = false; + } + + bool isEmpty() + { + return m_laterFailures.empty() && m_pendingReturns.isEmpty() && !m_pendingFallthrough; + } + + // Called at the end of code generation to link all return addresses. + void linkDataLabels(LinkBuffer& linkBuffer) + { + ASSERT(isEmpty()); + for (unsigned i = 0; i < m_backtrackRecords.size(); ++i) + linkBuffer.patch(m_backtrackRecords[i].m_dataLabel, linkBuffer.locationOf(m_backtrackRecords[i].m_backtrackLocation)); + } + + private: + struct ReturnAddressRecord { + ReturnAddressRecord(DataLabelPtr dataLabel, Label backtrackLocation) + : m_dataLabel(dataLabel) + , m_backtrackLocation(backtrackLocation) + { + } + + DataLabelPtr m_dataLabel; + Label m_backtrackLocation; + }; + + JumpList m_laterFailures; + bool m_pendingFallthrough; + Vector m_pendingReturns; + Vector m_backtrackRecords; + }; + + // Generation methods: + // =================== + + // This method provides a default implementation of backtracking common + // to many terms; terms commonly jump out of the forwards matching path + // on any failed conditions, and add these jumps to the m_jumps list. If + // no special handling is required we can often just backtrack to m_jumps. + void backtrackTermDefault(size_t opIndex) + { + YarrOp& op = m_ops[opIndex]; + m_backtrackingState.append(op.m_jumps); + } + + void generateAssertionBOL(size_t opIndex) + { + YarrOp& op = m_ops[opIndex]; + PatternTerm* term = op.m_term; + + if (m_pattern.m_multiline) { + const RegisterID character = regT0; + + JumpList matchDest; + if (!term->inputPosition) + matchDest.append(branch32(Equal, index, Imm32(m_checked))); + + readCharacter((term->inputPosition - m_checked) - 1, character); + matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass()); + op.m_jumps.append(jump()); + + matchDest.link(this); + } else { + // Erk, really should poison out these alternatives early. :-/ + if (term->inputPosition) + op.m_jumps.append(jump()); + else + op.m_jumps.append(branch32(NotEqual, index, Imm32(m_checked))); + } + } + void backtrackAssertionBOL(size_t opIndex) + { + backtrackTermDefault(opIndex); + } + + void generateAssertionEOL(size_t opIndex) + { + YarrOp& op = m_ops[opIndex]; + PatternTerm* term = op.m_term; + + if (m_pattern.m_multiline) { + const RegisterID character = regT0; + + JumpList matchDest; + if (term->inputPosition == m_checked) + matchDest.append(atEndOfInput()); + + readCharacter((term->inputPosition - m_checked), character); + matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass()); + op.m_jumps.append(jump()); + + matchDest.link(this); + } else { + if (term->inputPosition == m_checked) + op.m_jumps.append(notAtEndOfInput()); + // Erk, really should poison out these alternatives early. :-/ + else + op.m_jumps.append(jump()); + } + } + void backtrackAssertionEOL(size_t opIndex) + { + backtrackTermDefault(opIndex); + } + + // Also falls though on nextIsNotWordChar. + void matchAssertionWordchar(size_t opIndex, JumpList& nextIsWordChar, JumpList& nextIsNotWordChar) + { + YarrOp& op = m_ops[opIndex]; + PatternTerm* term = op.m_term; + + const RegisterID character = regT0; + + if (term->inputPosition == m_checked) + nextIsNotWordChar.append(atEndOfInput()); + + readCharacter((term->inputPosition - m_checked), character); + matchCharacterClass(character, nextIsWordChar, m_pattern.wordcharCharacterClass()); + } + + void generateAssertionWordBoundary(size_t opIndex) + { + YarrOp& op = m_ops[opIndex]; + PatternTerm* term = op.m_term; + + const RegisterID character = regT0; + + Jump atBegin; + JumpList matchDest; + if (!term->inputPosition) + atBegin = branch32(Equal, index, Imm32(m_checked)); + readCharacter((term->inputPosition - m_checked) - 1, character); + matchCharacterClass(character, matchDest, m_pattern.wordcharCharacterClass()); + if (!term->inputPosition) + atBegin.link(this); + + // We fall through to here if the last character was not a wordchar. + JumpList nonWordCharThenWordChar; + JumpList nonWordCharThenNonWordChar; + if (term->invert()) { + matchAssertionWordchar(opIndex, nonWordCharThenNonWordChar, nonWordCharThenWordChar); + nonWordCharThenWordChar.append(jump()); + } else { + matchAssertionWordchar(opIndex, nonWordCharThenWordChar, nonWordCharThenNonWordChar); + nonWordCharThenNonWordChar.append(jump()); + } + op.m_jumps.append(nonWordCharThenNonWordChar); + + // We jump here if the last character was a wordchar. + matchDest.link(this); + JumpList wordCharThenWordChar; + JumpList wordCharThenNonWordChar; + if (term->invert()) { + matchAssertionWordchar(opIndex, wordCharThenNonWordChar, wordCharThenWordChar); + wordCharThenWordChar.append(jump()); + } else { + matchAssertionWordchar(opIndex, wordCharThenWordChar, wordCharThenNonWordChar); + // This can fall-though! + } + + op.m_jumps.append(wordCharThenWordChar); + + nonWordCharThenWordChar.link(this); + wordCharThenNonWordChar.link(this); + } + void backtrackAssertionWordBoundary(size_t opIndex) + { + backtrackTermDefault(opIndex); + } + + void generatePatternCharacterOnce(size_t opIndex) + { + YarrOp& op = m_ops[opIndex]; + + // m_ops always ends with a OpBodyAlternativeEnd or OpMatchFailed + // node, so there must always be at least one more node. + ASSERT(opIndex + 1 < m_ops.size()); + YarrOp& nextOp = m_ops[opIndex + 1]; + + if (op.m_isDeadCode) + return; + + PatternTerm* term = op.m_term; + UChar ch = term->patternCharacter; + + const RegisterID character = regT0; + + if (nextOp.m_op == OpTerm) { + PatternTerm* nextTerm = nextOp.m_term; + if (nextTerm->type == PatternTerm::TypePatternCharacter + && nextTerm->quantityType == QuantifierFixedCount + && nextTerm->quantityCount == 1 + && nextTerm->inputPosition == (term->inputPosition + 1)) { + + UChar ch2 = nextTerm->patternCharacter; + + int mask = 0; + int chPair = ch | (ch2 << 16); + + if (m_pattern.m_ignoreCase) { + if (isASCIIAlpha(ch)) + mask |= 32; + if (isASCIIAlpha(ch2)) + mask |= 32 << 16; + } + + BaseIndex address(input, index, TimesTwo, (term->inputPosition - m_checked) * sizeof(UChar)); + if (mask) { + load32WithUnalignedHalfWords(address, character); + or32(Imm32(mask), character); + op.m_jumps.append(branch32(NotEqual, character, Imm32(chPair | mask))); + } else + op.m_jumps.append(branch32WithUnalignedHalfWords(NotEqual, address, Imm32(chPair))); + + nextOp.m_isDeadCode = true; + return; + } + } + + if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) { + readCharacter(term->inputPosition - m_checked, character); + or32(TrustedImm32(32), character); + op.m_jumps.append(branch32(NotEqual, character, Imm32(Unicode::toLower(ch)))); + } else { + ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch))); + op.m_jumps.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked)); + } + } + void backtrackPatternCharacterOnce(size_t opIndex) + { + backtrackTermDefault(opIndex); + } + + void generatePatternCharacterFixed(size_t opIndex) + { + YarrOp& op = m_ops[opIndex]; + PatternTerm* term = op.m_term; + UChar ch = term->patternCharacter; + + const RegisterID character = regT0; + const RegisterID countRegister = regT1; + + move(index, countRegister); + sub32(Imm32(term->quantityCount), countRegister); + + Label loop(this); + BaseIndex address(input, countRegister, TimesTwo, (term->inputPosition - m_checked + term->quantityCount) * sizeof(UChar)); + + if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) { + load16(address, character); + or32(TrustedImm32(32), character); + op.m_jumps.append(branch32(NotEqual, character, Imm32(Unicode::toLower(ch)))); + } else { + ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch))); + op.m_jumps.append(branch16(NotEqual, address, Imm32(ch))); + } + add32(TrustedImm32(1), countRegister); + branch32(NotEqual, countRegister, index).linkTo(loop, this); + } + void backtrackPatternCharacterFixed(size_t opIndex) + { + backtrackTermDefault(opIndex); + } + + void generatePatternCharacterGreedy(size_t opIndex) + { + YarrOp& op = m_ops[opIndex]; + PatternTerm* term = op.m_term; + UChar ch = term->patternCharacter; + + const RegisterID character = regT0; + const RegisterID countRegister = regT1; + + move(TrustedImm32(0), countRegister); + + JumpList failures; + Label loop(this); + failures.append(atEndOfInput()); + if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) { + readCharacter(term->inputPosition - m_checked, character); + or32(TrustedImm32(32), character); + failures.append(branch32(NotEqual, character, Imm32(Unicode::toLower(ch)))); + } else { + ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch))); + failures.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked)); + } + + add32(TrustedImm32(1), countRegister); + add32(TrustedImm32(1), index); + if (term->quantityCount == quantifyInfinite) + jump(loop); + else + branch32(NotEqual, countRegister, Imm32(term->quantityCount)).linkTo(loop, this); + + failures.link(this); + op.m_reentry = label(); + + storeToFrame(countRegister, term->frameLocation); + + } + void backtrackPatternCharacterGreedy(size_t opIndex) + { + YarrOp& op = m_ops[opIndex]; + PatternTerm* term = op.m_term; + + const RegisterID countRegister = regT1; + + m_backtrackingState.link(this); + + loadFromFrame(term->frameLocation, countRegister); + m_backtrackingState.append(branchTest32(Zero, countRegister)); + sub32(TrustedImm32(1), countRegister); + sub32(TrustedImm32(1), index); + jump(op.m_reentry); + } + + void generatePatternCharacterNonGreedy(size_t opIndex) + { + YarrOp& op = m_ops[opIndex]; + PatternTerm* term = op.m_term; + + const RegisterID countRegister = regT1; + + move(TrustedImm32(0), countRegister); + op.m_reentry = label(); + storeToFrame(countRegister, term->frameLocation); + } + void backtrackPatternCharacterNonGreedy(size_t opIndex) + { + YarrOp& op = m_ops[opIndex]; + PatternTerm* term = op.m_term; + UChar ch = term->patternCharacter; + + const RegisterID character = regT0; + const RegisterID countRegister = regT1; + + JumpList nonGreedyFailures; + + m_backtrackingState.link(this); + + loadFromFrame(term->frameLocation, countRegister); + + nonGreedyFailures.append(atEndOfInput()); + if (term->quantityCount != quantifyInfinite) + nonGreedyFailures.append(branch32(Equal, countRegister, Imm32(term->quantityCount))); + if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) { + readCharacter(term->inputPosition - m_checked, character); + or32(TrustedImm32(32), character); + nonGreedyFailures.append(branch32(NotEqual, character, Imm32(Unicode::toLower(ch)))); + } else { + ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch))); + nonGreedyFailures.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked)); + } + + add32(TrustedImm32(1), countRegister); + add32(TrustedImm32(1), index); + + jump(op.m_reentry); + + nonGreedyFailures.link(this); + sub32(countRegister, index); + m_backtrackingState.fallthrough(); + } + + void generateCharacterClassOnce(size_t opIndex) + { + YarrOp& op = m_ops[opIndex]; + PatternTerm* term = op.m_term; + + const RegisterID character = regT0; + + JumpList matchDest; + readCharacter((term->inputPosition - m_checked), character); + matchCharacterClass(character, matchDest, term->characterClass); + + if (term->invert()) + op.m_jumps.append(matchDest); + else { + op.m_jumps.append(jump()); + matchDest.link(this); + } + } + void backtrackCharacterClassOnce(size_t opIndex) + { + backtrackTermDefault(opIndex); + } + + void generateCharacterClassFixed(size_t opIndex) + { + YarrOp& op = m_ops[opIndex]; + PatternTerm* term = op.m_term; + + const RegisterID character = regT0; + const RegisterID countRegister = regT1; + + move(index, countRegister); + sub32(Imm32(term->quantityCount), countRegister); + + Label loop(this); + JumpList matchDest; + load16(BaseIndex(input, countRegister, TimesTwo, (term->inputPosition - m_checked + term->quantityCount) * sizeof(UChar)), character); + matchCharacterClass(character, matchDest, term->characterClass); + + if (term->invert()) + op.m_jumps.append(matchDest); + else { + op.m_jumps.append(jump()); + matchDest.link(this); + } + + add32(TrustedImm32(1), countRegister); + branch32(NotEqual, countRegister, index).linkTo(loop, this); + } + void backtrackCharacterClassFixed(size_t opIndex) + { + backtrackTermDefault(opIndex); + } + + void generateCharacterClassGreedy(size_t opIndex) + { + YarrOp& op = m_ops[opIndex]; + PatternTerm* term = op.m_term; + + const RegisterID character = regT0; + const RegisterID countRegister = regT1; + + move(TrustedImm32(0), countRegister); + + JumpList failures; + Label loop(this); + failures.append(atEndOfInput()); + + if (term->invert()) { + readCharacter(term->inputPosition - m_checked, character); + matchCharacterClass(character, failures, term->characterClass); + } else { + JumpList matchDest; + readCharacter(term->inputPosition - m_checked, character); + matchCharacterClass(character, matchDest, term->characterClass); + failures.append(jump()); + matchDest.link(this); + } + + add32(TrustedImm32(1), countRegister); + add32(TrustedImm32(1), index); + if (term->quantityCount != quantifyInfinite) { + branch32(NotEqual, countRegister, Imm32(term->quantityCount)).linkTo(loop, this); + failures.append(jump()); + } else + jump(loop); + + failures.link(this); + op.m_reentry = label(); + + storeToFrame(countRegister, term->frameLocation); + } + void backtrackCharacterClassGreedy(size_t opIndex) + { + YarrOp& op = m_ops[opIndex]; + PatternTerm* term = op.m_term; + + const RegisterID countRegister = regT1; + + m_backtrackingState.link(this); + + loadFromFrame(term->frameLocation, countRegister); + m_backtrackingState.append(branchTest32(Zero, countRegister)); + sub32(TrustedImm32(1), countRegister); + sub32(TrustedImm32(1), index); + jump(op.m_reentry); + } + + void generateCharacterClassNonGreedy(size_t opIndex) + { + YarrOp& op = m_ops[opIndex]; + PatternTerm* term = op.m_term; + + const RegisterID countRegister = regT1; + + move(TrustedImm32(0), countRegister); + op.m_reentry = label(); + storeToFrame(countRegister, term->frameLocation); + } + void backtrackCharacterClassNonGreedy(size_t opIndex) + { + YarrOp& op = m_ops[opIndex]; + PatternTerm* term = op.m_term; + + const RegisterID character = regT0; + const RegisterID countRegister = regT1; + + JumpList nonGreedyFailures; + + m_backtrackingState.link(this); + + Label backtrackBegin(this); + loadFromFrame(term->frameLocation, countRegister); + + nonGreedyFailures.append(atEndOfInput()); + nonGreedyFailures.append(branch32(Equal, countRegister, Imm32(term->quantityCount))); + + JumpList matchDest; + readCharacter(term->inputPosition - m_checked, character); + matchCharacterClass(character, matchDest, term->characterClass); + + if (term->invert()) + nonGreedyFailures.append(matchDest); + else { + nonGreedyFailures.append(jump()); + matchDest.link(this); + } + + add32(TrustedImm32(1), countRegister); + add32(TrustedImm32(1), index); + + jump(op.m_reentry); + + nonGreedyFailures.link(this); + sub32(countRegister, index); + m_backtrackingState.fallthrough(); + } + + // Code generation/backtracking for simple terms + // (pattern characters, character classes, and assertions). + // These methods farm out work to the set of functions above. + void generateTerm(size_t opIndex) + { + YarrOp& op = m_ops[opIndex]; + PatternTerm* term = op.m_term; + + switch (term->type) { + case PatternTerm::TypePatternCharacter: + switch (term->quantityType) { + case QuantifierFixedCount: + if (term->quantityCount == 1) + generatePatternCharacterOnce(opIndex); + else + generatePatternCharacterFixed(opIndex); + break; + case QuantifierGreedy: + generatePatternCharacterGreedy(opIndex); + break; + case QuantifierNonGreedy: + generatePatternCharacterNonGreedy(opIndex); + break; + } + break; + + case PatternTerm::TypeCharacterClass: + switch (term->quantityType) { + case QuantifierFixedCount: + if (term->quantityCount == 1) + generateCharacterClassOnce(opIndex); + else + generateCharacterClassFixed(opIndex); + break; + case QuantifierGreedy: + generateCharacterClassGreedy(opIndex); + break; + case QuantifierNonGreedy: + generateCharacterClassNonGreedy(opIndex); + break; + } + break; + + case PatternTerm::TypeAssertionBOL: + generateAssertionBOL(opIndex); + break; + + case PatternTerm::TypeAssertionEOL: + generateAssertionEOL(opIndex); + break; + + case PatternTerm::TypeAssertionWordBoundary: + generateAssertionWordBoundary(opIndex); + break; + + case PatternTerm::TypeForwardReference: + break; + + case PatternTerm::TypeParenthesesSubpattern: + case PatternTerm::TypeParentheticalAssertion: + ASSERT_NOT_REACHED(); + case PatternTerm::TypeBackReference: + m_shouldFallBack = true; + break; + } + } + void backtrackTerm(size_t opIndex) + { + YarrOp& op = m_ops[opIndex]; + PatternTerm* term = op.m_term; + + switch (term->type) { + case PatternTerm::TypePatternCharacter: + switch (term->quantityType) { + case QuantifierFixedCount: + if (term->quantityCount == 1) + backtrackPatternCharacterOnce(opIndex); + else + backtrackPatternCharacterFixed(opIndex); + break; + case QuantifierGreedy: + backtrackPatternCharacterGreedy(opIndex); + break; + case QuantifierNonGreedy: + backtrackPatternCharacterNonGreedy(opIndex); + break; + } + break; + + case PatternTerm::TypeCharacterClass: + switch (term->quantityType) { + case QuantifierFixedCount: + if (term->quantityCount == 1) + backtrackCharacterClassOnce(opIndex); + else + backtrackCharacterClassFixed(opIndex); + break; + case QuantifierGreedy: + backtrackCharacterClassGreedy(opIndex); + break; + case QuantifierNonGreedy: + backtrackCharacterClassNonGreedy(opIndex); + break; + } + break; + + case PatternTerm::TypeAssertionBOL: + backtrackAssertionBOL(opIndex); + break; + + case PatternTerm::TypeAssertionEOL: + backtrackAssertionEOL(opIndex); + break; + + case PatternTerm::TypeAssertionWordBoundary: + backtrackAssertionWordBoundary(opIndex); + break; + + case PatternTerm::TypeForwardReference: + break; + + case PatternTerm::TypeParenthesesSubpattern: + case PatternTerm::TypeParentheticalAssertion: + ASSERT_NOT_REACHED(); + case PatternTerm::TypeBackReference: + m_shouldFallBack = true; + break; + } + } + + void generate() + { + // Forwards generate the matching code. + ASSERT(m_ops.size()); + size_t opIndex = 0; + + do { + YarrOp& op = m_ops[opIndex]; + switch (op.m_op) { + + case OpTerm: + generateTerm(opIndex); + break; + + // OpBodyAlternativeBegin/Next/End + // + // These nodes wrap the set of alternatives in the body of the regular expression. + // There may be either one or two chains of OpBodyAlternative nodes, one representing + // the 'once through' sequence of alternatives (if any exist), and one representing + // the repeating alternatives (again, if any exist). + // + // Upon normal entry to the Begin alternative, we will check that input is available. + // Reentry to the Begin alternative will take place after the check has taken place, + // and will assume that the input position has already been progressed as appropriate. + // + // Entry to subsequent Next/End alternatives occurs when the prior alternative has + // successfully completed a match - return a success state from JIT code. + // + // Next alternatives allow for reentry optimized to suit backtracking from its + // preceding alternative. It expects the input position to still be set to a position + // appropriate to its predecessor, and it will only perform an input check if the + // predecessor had a minimum size less than its own. + // + // In the case 'once through' expressions, the End node will also have a reentry + // point to jump to when the last alternative fails. Again, this expects the input + // position to still reflect that expected by the prior alternative. + case OpBodyAlternativeBegin: { + PatternAlternative* alternative = op.m_alternative; + + // Upon entry at the head of the set of alternatives, check if input is available + // to run the first alternative. (This progresses the input position). + op.m_jumps.append(jumpIfNoAvailableInput(alternative->m_minimumSize)); + // We will reenter after the check, and assume the input position to have been + // set as appropriate to this alternative. + op.m_reentry = label(); + + m_checked += alternative->m_minimumSize; + break; + } + case OpBodyAlternativeNext: + case OpBodyAlternativeEnd: { + PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative; + PatternAlternative* alternative = op.m_alternative; + + // If we get here, the prior alternative matched - return success. + + // Adjust the stack pointer to remove the pattern's frame. + if (m_pattern.m_body->m_callFrameSize) + addPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister); + + // Load appropriate values into the return register and the first output + // slot, and return. In the case of pattern with a fixed size, we will + // not have yet set the value in the first + ASSERT(index != returnRegister); + if (m_pattern.m_body->m_hasFixedSize) { + move(index, returnRegister); + if (priorAlternative->m_minimumSize) + sub32(Imm32(priorAlternative->m_minimumSize), returnRegister); + store32(returnRegister, output); + } else + load32(Address(output), returnRegister); + store32(index, Address(output, 4)); + generateReturn(); + + // This is the divide between the tail of the prior alternative, above, and + // the head of the subsequent alternative, below. + + if (op.m_op == OpBodyAlternativeNext) { + // This is the reentry point for the Next alternative. We expect any code + // that jumps here to do so with the input position matching that of the + // PRIOR alteranative, and we will only check input availability if we + // need to progress it forwards. + op.m_reentry = label(); + if (int delta = alternative->m_minimumSize - priorAlternative->m_minimumSize) { + add32(Imm32(delta), index); + if (delta > 0) + op.m_jumps.append(jumpIfNoAvailableInput()); + } + } else if (op.m_nextOp == notFound) { + // This is the reentry point for the End of 'once through' alternatives, + // jumped to when the las alternative fails to match. + op.m_reentry = label(); + sub32(Imm32(priorAlternative->m_minimumSize), index); + } + + if (op.m_op == OpBodyAlternativeNext) + m_checked += alternative->m_minimumSize; + m_checked -= priorAlternative->m_minimumSize; + break; + } + + // OpSimpleNestedAlternativeBegin/Next/End + // OpNestedAlternativeBegin/Next/End + // + // These nodes are used to handle sets of alternatives that are nested within + // subpatterns and parenthetical assertions. The 'simple' forms are used where + // we do not need to be able to backtrack back into any alternative other than + // the last, the normal forms allow backtracking into any alternative. + // + // Each Begin/Next node is responsible for planting an input check to ensure + // sufficient input is available on entry. Next nodes additionally need to + // jump to the end - Next nodes use the End node's m_jumps list to hold this + // set of jumps. + // + // In the non-simple forms, successful alternative matches must store a + // 'return address' using a DataLabelPtr, used to store the address to jump + // to when backtracking, to get to the code for the appropriate alternative. + case OpSimpleNestedAlternativeBegin: + case OpNestedAlternativeBegin: { + PatternTerm* term = op.m_term; + PatternAlternative* alternative = op.m_alternative; + PatternDisjunction* disjunction = term->parentheses.disjunction; + + // Calculate how much input we need to check for, and if non-zero check. + op.m_checkAdjust = alternative->m_minimumSize; + if ((term->quantityType == QuantifierFixedCount) && (term->type != PatternTerm::TypeParentheticalAssertion)) + op.m_checkAdjust -= disjunction->m_minimumSize; + if (op.m_checkAdjust) + op.m_jumps.append(jumpIfNoAvailableInput(op.m_checkAdjust)); + + m_checked += op.m_checkAdjust; + break; + } + case OpSimpleNestedAlternativeNext: + case OpNestedAlternativeNext: { + PatternTerm* term = op.m_term; + PatternAlternative* alternative = op.m_alternative; + PatternDisjunction* disjunction = term->parentheses.disjunction; + + // In the non-simple case, store a 'return address' so we can backtrack correctly. + if (op.m_op == OpNestedAlternativeNext) { + unsigned parenthesesFrameLocation = term->frameLocation; + unsigned alternativeFrameLocation = parenthesesFrameLocation; + if (term->quantityType != QuantifierFixedCount) + alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce; + op.m_returnAddress = storeToFrameWithPatch(alternativeFrameLocation); + } + + // If we reach here then the last alternative has matched - jump to the + // End node, to skip over any further alternatives. + // + // FIXME: this is logically O(N^2) (though N can be expected to be very + // small). We could avoid this either by adding an extra jump to the JIT + // data structures, or by making backtracking code that jumps to Next + // alternatives are responsible for checking that input is available (if + // we didn't need to plant the input checks, then m_jumps would be free). + YarrOp* endOp = &m_ops[op.m_nextOp]; + while (endOp->m_nextOp != notFound) { + ASSERT(endOp->m_op == OpSimpleNestedAlternativeNext || endOp->m_op == OpNestedAlternativeNext); + endOp = &m_ops[endOp->m_nextOp]; + } + ASSERT(endOp->m_op == OpSimpleNestedAlternativeEnd || endOp->m_op == OpNestedAlternativeEnd); + endOp->m_jumps.append(jump()); + + // This is the entry point for the next alternative. + op.m_reentry = label(); + + // Calculate how much input we need to check for, and if non-zero check. + op.m_checkAdjust = alternative->m_minimumSize; + if ((term->quantityType == QuantifierFixedCount) && (term->type != PatternTerm::TypeParentheticalAssertion)) + op.m_checkAdjust -= disjunction->m_minimumSize; + if (op.m_checkAdjust) + op.m_jumps.append(jumpIfNoAvailableInput(op.m_checkAdjust)); + + YarrOp& lastOp = m_ops[op.m_previousOp]; + m_checked -= lastOp.m_checkAdjust; + m_checked += op.m_checkAdjust; + break; + } + case OpSimpleNestedAlternativeEnd: + case OpNestedAlternativeEnd: { + PatternTerm* term = op.m_term; + + // In the non-simple case, store a 'return address' so we can backtrack correctly. + if (op.m_op == OpNestedAlternativeEnd) { + unsigned parenthesesFrameLocation = term->frameLocation; + unsigned alternativeFrameLocation = parenthesesFrameLocation; + if (term->quantityType != QuantifierFixedCount) + alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce; + op.m_returnAddress = storeToFrameWithPatch(alternativeFrameLocation); + } + + // If this set of alternatives contains more than one alternative, + // then the Next nodes will have planted jumps to the End, and added + // them to this node's m_jumps list. + op.m_jumps.link(this); + op.m_jumps.clear(); + + YarrOp& lastOp = m_ops[op.m_previousOp]; + m_checked -= lastOp.m_checkAdjust; + break; + } + + // OpParenthesesSubpatternOnceBegin/End + // + // These nodes support (optionally) capturing subpatterns, that have a + // quantity count of 1 (this covers fixed once, and ?/?? quantifiers). + case OpParenthesesSubpatternOnceBegin: { + PatternTerm* term = op.m_term; + unsigned parenthesesFrameLocation = term->frameLocation; + const RegisterID indexTemporary = regT0; + ASSERT(term->quantityCount == 1); + + // Upon entry to a Greedy quantified set of parenthese store the index. + // We'll use this for two purposes: + // - To indicate which iteration we are on of mathing the remainder of + // the expression after the parentheses - the first, including the + // match within the parentheses, or the second having skipped over them. + // - To check for empty matches, which must be rejected. + // + // At the head of a NonGreedy set of parentheses we'll immediately set the + // value on the stack to -1 (indicating a match skipping the subpattern), + // and plant a jump to the end. We'll also plant a label to backtrack to + // to reenter the subpattern later, with a store to set up index on the + // second iteration. + // + // FIXME: for capturing parens, could use the index in the capture array? + if (term->quantityType == QuantifierGreedy) + storeToFrame(index, parenthesesFrameLocation); + else if (term->quantityType == QuantifierNonGreedy) { + storeToFrame(TrustedImm32(-1), parenthesesFrameLocation); + op.m_jumps.append(jump()); + op.m_reentry = label(); + storeToFrame(index, parenthesesFrameLocation); + } + + // If the parenthese are capturing, store the starting index value to the + // captures array, offsetting as necessary. + // + // FIXME: could avoid offsetting this value in JIT code, apply + // offsets only afterwards, at the point the results array is + // being accessed. + if (term->capture()) { + int offsetId = term->parentheses.subpatternId << 1; + int inputOffset = term->inputPosition - m_checked; + if (term->quantityType == QuantifierFixedCount) + inputOffset -= term->parentheses.disjunction->m_minimumSize; + if (inputOffset) { + move(index, indexTemporary); + add32(Imm32(inputOffset), indexTemporary); + store32(indexTemporary, Address(output, offsetId * sizeof(int))); + } else + store32(index, Address(output, offsetId * sizeof(int))); + } + break; + } + case OpParenthesesSubpatternOnceEnd: { + PatternTerm* term = op.m_term; + unsigned parenthesesFrameLocation = term->frameLocation; + const RegisterID indexTemporary = regT0; + ASSERT(term->quantityCount == 1); + + // For Greedy/NonGreedy quantified parentheses, we must reject zero length + // matches. If the minimum size is know to be non-zero we need not check. + if (term->quantityType != QuantifierFixedCount && !term->parentheses.disjunction->m_minimumSize) + op.m_jumps.append(branch32(Equal, index, Address(stackPointerRegister, parenthesesFrameLocation * sizeof(void*)))); + + // If the parenthese are capturing, store the ending index value to the + // captures array, offsetting as necessary. + // + // FIXME: could avoid offsetting this value in JIT code, apply + // offsets only afterwards, at the point the results array is + // being accessed. + if (term->capture()) { + int offsetId = (term->parentheses.subpatternId << 1) + 1; + int inputOffset = term->inputPosition - m_checked; + if (inputOffset) { + move(index, indexTemporary); + add32(Imm32(inputOffset), indexTemporary); + store32(indexTemporary, Address(output, offsetId * sizeof(int))); + } else + store32(index, Address(output, offsetId * sizeof(int))); + } + + // If the parentheses are quantified Greedy then add a label to jump back + // to if get a failed match from after the parentheses. For NonGreedy + // parentheses, link the jump from before the subpattern to here. + if (term->quantityType == QuantifierGreedy) + op.m_reentry = label(); + else if (term->quantityType == QuantifierNonGreedy) { + YarrOp& beginOp = m_ops[op.m_previousOp]; + beginOp.m_jumps.link(this); + } + break; + } + + // OpParenthesesSubpatternTerminalBegin/End + case OpParenthesesSubpatternTerminalBegin: { + PatternTerm* term = op.m_term; + ASSERT(term->quantityType == QuantifierGreedy); + ASSERT(term->quantityCount == quantifyInfinite); + ASSERT(!term->capture()); + + // Upon entry set a label to loop back to. + op.m_reentry = label(); + + // Store the start index of the current match; we need to reject zero + // length matches. + storeToFrame(index, term->frameLocation); + break; + } + case OpParenthesesSubpatternTerminalEnd: { + PatternTerm* term = op.m_term; + + // Check for zero length matches - if the match is non-zero, then we + // can accept it & loop back up to the head of the subpattern. + YarrOp& beginOp = m_ops[op.m_previousOp]; + branch32(NotEqual, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*)), beginOp.m_reentry); + + // Reject the match - backtrack back into the subpattern. + op.m_jumps.append(jump()); + + // This is the entry point to jump to when we stop matching - we will + // do so once the subpattern cannot match any more. + op.m_reentry = label(); + break; + } + + // OpParentheticalAssertionBegin/End + case OpParentheticalAssertionBegin: { + PatternTerm* term = op.m_term; + + // Store the current index - assertions should not update index, so + // we will need to restore it upon a successful match. + unsigned parenthesesFrameLocation = term->frameLocation; + storeToFrame(index, parenthesesFrameLocation); + + // Check + op.m_checkAdjust = m_checked - term->inputPosition; + if (op.m_checkAdjust) + sub32(Imm32(op.m_checkAdjust), index); + + m_checked -= op.m_checkAdjust; + break; + } + case OpParentheticalAssertionEnd: { + PatternTerm* term = op.m_term; + + // Restore the input index value. + unsigned parenthesesFrameLocation = term->frameLocation; + loadFromFrame(parenthesesFrameLocation, index); + + // If inverted, a successful match of the assertion must be treated + // as a failure, so jump to backtracking. + if (term->invert()) { + op.m_jumps.append(jump()); + op.m_reentry = label(); + } + + YarrOp& lastOp = m_ops[op.m_previousOp]; + m_checked += lastOp.m_checkAdjust; + break; + } + + case OpMatchFailed: + if (m_pattern.m_body->m_callFrameSize) + addPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister); + move(TrustedImm32(-1), returnRegister); + generateReturn(); + break; + } + + ++opIndex; + } while (opIndex < m_ops.size()); + } + + void backtrack() + { + // Backwards generate the backtracking code. + size_t opIndex = m_ops.size(); + ASSERT(opIndex); + + do { + --opIndex; + YarrOp& op = m_ops[opIndex]; + switch (op.m_op) { + + case OpTerm: + backtrackTerm(opIndex); + break; + + // OpBodyAlternativeBegin/Next/End + // + // For each Begin/Next node representing an alternative, we need to decide what to do + // in two circumstances: + // - If we backtrack back into this node, from within the alternative. + // - If the input check at the head of the alternative fails (if this exists). + // + // We treat these two cases differently since in the former case we have slightly + // more information - since we are backtracking out of a prior alternative we know + // that at least enough input was available to run it. For example, given the regular + // expression /a|b/, if we backtrack out of the first alternative (a failed pattern + // character match of 'a'), then we need not perform an additional input availability + // check before running the second alternative. + // + // Backtracking required differs for the last alternative, which in the case of the + // repeating set of alternatives must loop. The code generated for the last alternative + // will also be used to handle all input check failures from any prior alternatives - + // these require similar functionality, in seeking the next available alternative for + // which there is sufficient input. + // + // Since backtracking of all other alternatives simply requires us to link backtracks + // to the reentry point for the subsequent alternative, we will only be generating any + // code when backtracking the last alternative. + case OpBodyAlternativeBegin: + case OpBodyAlternativeNext: { + PatternAlternative* alternative = op.m_alternative; + + if (op.m_op == OpBodyAlternativeNext) { + PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative; + m_checked += priorAlternative->m_minimumSize; + } + m_checked -= alternative->m_minimumSize; + + // Is this the last alternative? If not, then if we backtrack to this point we just + // need to jump to try to match the next alternative. + if (m_ops[op.m_nextOp].m_op != OpBodyAlternativeEnd) { + m_backtrackingState.linkTo(m_ops[op.m_nextOp].m_reentry, this); + break; + } + YarrOp& endOp = m_ops[op.m_nextOp]; + + YarrOp* beginOp = &op; + while (beginOp->m_op != OpBodyAlternativeBegin) { + ASSERT(beginOp->m_op == OpBodyAlternativeNext); + beginOp = &m_ops[beginOp->m_previousOp]; + } + + bool onceThrough = endOp.m_nextOp == notFound; + + // First, generate code to handle cases where we backtrack out of an attempted match + // of the last alternative. If this is a 'once through' set of alternatives then we + // have nothing to do - link this straight through to the End. + if (onceThrough) + m_backtrackingState.linkTo(endOp.m_reentry, this); + else { + // Okay, we're going to need to loop. Calculate the delta between where the input + // position was, and where we want it to be allowing for the fact that we need to + // increment by 1. E.g. for the regexp /a|x/ we need to increment the position by + // 1 between loop iterations, but for /abcd|xyz/ we need to increment by two when + // looping from the last alternative to the first, for /a|xyz/ we need to decrement + // by 1, and for /a|xy/ we don't need to move the input position at all. + int deltaLastAlternativeToFirstAlternativePlusOne = (beginOp->m_alternative->m_minimumSize - alternative->m_minimumSize) + 1; + + // If we don't need to move the input poistion, and the pattern has a fixed size + // (in which case we omit the store of the start index until the pattern has matched) + // then we can just link the backtrack out of the last alternative straight to the + // head of the first alternative. + if (!deltaLastAlternativeToFirstAlternativePlusOne && m_pattern.m_body->m_hasFixedSize) + m_backtrackingState.linkTo(beginOp->m_reentry, this); + else { + // We need to generate a trampoline of code to execute before looping back + // around to the first alternative. + m_backtrackingState.link(this); + + // If the pattern size is not fixed, then store the start index, for use if we match. + if (!m_pattern.m_body->m_hasFixedSize) { + if (alternative->m_minimumSize == 1) + store32(index, Address(output)); + else { + move(index, regT0); + if (alternative->m_minimumSize) + sub32(Imm32(alternative->m_minimumSize - 1), regT0); + else + add32(Imm32(1), regT0); + store32(regT0, Address(output)); + } + } + + if (deltaLastAlternativeToFirstAlternativePlusOne) + add32(Imm32(deltaLastAlternativeToFirstAlternativePlusOne), index); + + // Loop. Since this code is only reached when we backtrack out of the last + // alternative (and NOT linked to from the input check upon entry to the + // last alternative) we know that there must be at least enough input as + // required by the last alternative. As such, we only need to check if the + // first will require more to run - if the same or less is required we can + // unconditionally jump. + if (deltaLastAlternativeToFirstAlternativePlusOne > 0) + checkInput().linkTo(beginOp->m_reentry, this); + else + jump(beginOp->m_reentry); + } + } + + // We can reach this point in the code in two ways: + // - Fallthrough from the code above (a repeating alternative backtracked out of its + // last alternative, and did not have sufficent input to run the first). + // - We will loop back up to the following label when a releating alternative loops, + // following a failed input check. + // + // Either way, we have just failed the input check for the first alternative. + Label firstInputCheckFailed(this); + + // Generate code to handle input check failures from alternatives except the last. + // prevOp is the alternative we're handling a bail out from (initially Begin), and + // nextOp is the alternative we will be attempting to reenter into. + // + // We will link input check failures from the forwards matching path back to the code + // that can handle them. + YarrOp* prevOp = beginOp; + YarrOp* nextOp = &m_ops[beginOp->m_nextOp]; + while (nextOp->m_op != OpBodyAlternativeEnd) { + prevOp->m_jumps.link(this); + + int delta = nextOp->m_alternative->m_minimumSize - prevOp->m_alternative->m_minimumSize; + if (delta) + add32(Imm32(delta), index); + + // We only get here if an input check fails, it is only worth checking again + // if the next alternative has a minimum size less than the last. + if (delta < 0) { + // FIXME: if we added an extra label to YarrOp, we could avoid needing to + // subtract delta back out, and reduce this code. Should performance test + // the benefit of this. + Jump fail = jumpIfNoAvailableInput(); + sub32(Imm32(delta), index); + jump(nextOp->m_reentry); + fail.link(this); + } + prevOp = nextOp; + nextOp = &m_ops[nextOp->m_nextOp]; + } + + // We fall through to here if there is insufficient input to run the last alternative. + + // If there is insufficient input to run the last alternative, then for 'once through' + // alternatives we are done - just jump back up into the forwards matching path at the End. + if (onceThrough) { + op.m_jumps.linkTo(endOp.m_reentry, this); + jump(endOp.m_reentry); + break; + } + + // For repeating alternatives, link any input check failure from the last alternative to + // this point. + op.m_jumps.link(this); + + bool needsToUpdateMatchStart = !m_pattern.m_body->m_hasFixedSize; + + // Check for cases where input position is already incremented by 1 for the last + // alternative (this is particularly useful where the minimum size of the body + // disjunction is 0, e.g. /a*|b/). + if (needsToUpdateMatchStart && alternative->m_minimumSize == 1) { + // index is already incremented by 1, so just store it now! + store32(index, Address(output)); + needsToUpdateMatchStart = false; + } + + // Check whether there is sufficient input to loop. Increment the input position by + // one, and check. Also add in the minimum disjunction size before checking - there + // is no point in looping if we're just going to fail all the input checks around + // the next iteration. + int deltaLastAlternativeToBodyMinimumPlusOne = (m_pattern.m_body->m_minimumSize + 1) - alternative->m_minimumSize; + if (deltaLastAlternativeToBodyMinimumPlusOne) + add32(Imm32(deltaLastAlternativeToBodyMinimumPlusOne), index); + Jump matchFailed = jumpIfNoAvailableInput(); + + if (needsToUpdateMatchStart) { + if (!m_pattern.m_body->m_minimumSize) + store32(index, Address(output)); + else { + move(index, regT0); + sub32(Imm32(m_pattern.m_body->m_minimumSize), regT0); + store32(regT0, Address(output)); + } + } + + // Calculate how much more input the first alternative requires than the minimum + // for the body as a whole. If no more is needed then we dont need an additional + // input check here - jump straight back up to the start of the first alternative. + int deltaBodyMinimumToFirstAlternative = beginOp->m_alternative->m_minimumSize - m_pattern.m_body->m_minimumSize; + if (!deltaBodyMinimumToFirstAlternative) + jump(beginOp->m_reentry); + else { + add32(Imm32(deltaBodyMinimumToFirstAlternative), index); + checkInput().linkTo(beginOp->m_reentry, this); + jump(firstInputCheckFailed); + } + + // We jump to here if we iterate to the point that there is insufficient input to + // run any matches, and need to return a failure state from JIT code. + matchFailed.link(this); + + if (m_pattern.m_body->m_callFrameSize) + addPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister); + move(TrustedImm32(-1), returnRegister); + generateReturn(); + break; + } + case OpBodyAlternativeEnd: { + // We should never backtrack back into a body disjunction. + ASSERT(m_backtrackingState.isEmpty()); + + PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative; + m_checked += priorAlternative->m_minimumSize; + break; + } + + // OpSimpleNestedAlternativeBegin/Next/End + // OpNestedAlternativeBegin/Next/End + // + // Generate code for when we backtrack back out of an alternative into + // a Begin or Next node, or when the entry input count check fails. If + // there are more alternatives we need to jump to the next alternative, + // if not we backtrack back out of the current set of parentheses. + // + // In the case of non-simple nested assertions we need to also link the + // 'return address' appropriately to backtrack back out into the correct + // alternative. + case OpSimpleNestedAlternativeBegin: + case OpSimpleNestedAlternativeNext: + case OpNestedAlternativeBegin: + case OpNestedAlternativeNext: { + YarrOp& nextOp = m_ops[op.m_nextOp]; + bool isBegin = op.m_previousOp == notFound; + bool isLastAlternative = nextOp.m_nextOp == notFound; + ASSERT(isBegin == (op.m_op == OpSimpleNestedAlternativeBegin || op.m_op == OpNestedAlternativeBegin)); + ASSERT(isLastAlternative == (nextOp.m_op == OpSimpleNestedAlternativeEnd || nextOp.m_op == OpNestedAlternativeEnd)); + + // Treat an input check failure the same as a failed match. + m_backtrackingState.append(op.m_jumps); + + // Set the backtracks to jump to the appropriate place. We may need + // to link the backtracks in one of three different way depending on + // the type of alternative we are dealing with: + // - A single alternative, with no simplings. + // - The last alternative of a set of two or more. + // - An alternative other than the last of a set of two or more. + // + // In the case of a single alternative on its own, we don't need to + // jump anywhere - if the alternative fails to match we can just + // continue to backtrack out of the parentheses without jumping. + // + // In the case of the last alternative in a set of more than one, we + // need to jump to return back out to the beginning. We'll do so by + // adding a jump to the End node's m_jumps list, and linking this + // when we come to generate the Begin node. For alternatives other + // than the last, we need to jump to the next alternative. + // + // If the alternative had adjusted the input position we must link + // backtracking to here, correct, and then jump on. If not we can + // link the backtracks directly to their destination. + if (op.m_checkAdjust) { + // Handle the cases where we need to link the backtracks here. + m_backtrackingState.link(this); + sub32(Imm32(op.m_checkAdjust), index); + if (!isLastAlternative) { + // An alternative that is not the last should jump to its successor. + jump(nextOp.m_reentry); + } else if (!isBegin) { + // The last of more than one alternatives must jump back to the begnning. + nextOp.m_jumps.append(jump()); + } else { + // A single alternative on its own can fall through. + m_backtrackingState.fallthrough(); + } + } else { + // Handle the cases where we can link the backtracks directly to their destinations. + if (!isLastAlternative) { + // An alternative that is not the last should jump to its successor. + m_backtrackingState.linkTo(nextOp.m_reentry, this); + } else if (!isBegin) { + // The last of more than one alternatives must jump back to the begnning. + m_backtrackingState.takeBacktracksToJumpList(nextOp.m_jumps, this); + } + // In the case of a single alternative on its own do nothing - it can fall through. + } + + // At this point we've handled the backtracking back into this node. + // Now link any backtracks that need to jump to here. + + // For non-simple alternatives, link the alternative's 'return address' + // so that we backtrack back out into the previous alternative. + if (op.m_op == OpNestedAlternativeNext) + m_backtrackingState.append(op.m_returnAddress); + + // If there is more than one alternative, then the last alternative will + // have planted a jump to be linked to the end. This jump was added to the + // End node's m_jumps list. If we are back at the beginning, link it here. + if (isBegin) { + YarrOp* endOp = &m_ops[op.m_nextOp]; + while (endOp->m_nextOp != notFound) { + ASSERT(endOp->m_op == OpSimpleNestedAlternativeNext || endOp->m_op == OpNestedAlternativeNext); + endOp = &m_ops[endOp->m_nextOp]; + } + ASSERT(endOp->m_op == OpSimpleNestedAlternativeEnd || endOp->m_op == OpNestedAlternativeEnd); + m_backtrackingState.append(endOp->m_jumps); + } + + if (!isBegin) { + YarrOp& lastOp = m_ops[op.m_previousOp]; + m_checked += lastOp.m_checkAdjust; + } + m_checked -= op.m_checkAdjust; + break; + } + case OpSimpleNestedAlternativeEnd: + case OpNestedAlternativeEnd: { + PatternTerm* term = op.m_term; + + // If we backtrack into the end of a simple subpattern do nothing; + // just continue through into the last alternative. If we backtrack + // into the end of a non-simple set of alterntives we need to jump + // to the backtracking return address set up during generation. + if (op.m_op == OpNestedAlternativeEnd) { + m_backtrackingState.link(this); + + // Plant a jump to the return address. + unsigned parenthesesFrameLocation = term->frameLocation; + unsigned alternativeFrameLocation = parenthesesFrameLocation; + if (term->quantityType != QuantifierFixedCount) + alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce; + loadFromFrameAndJump(alternativeFrameLocation); + + // Link the DataLabelPtr associated with the end of the last + // alternative to this point. + m_backtrackingState.append(op.m_returnAddress); + } + + YarrOp& lastOp = m_ops[op.m_previousOp]; + m_checked += lastOp.m_checkAdjust; + break; + } + + // OpParenthesesSubpatternOnceBegin/End + // + // When we are backtracking back out of a capturing subpattern we need + // to clear the start index in the matches output array, to record that + // this subpattern has not been captured. + // + // When backtracking back out of a Greedy quantified subpattern we need + // to catch this, and try running the remainder of the alternative after + // the subpattern again, skipping the parentheses. + // + // Upon backtracking back into a quantified set of parentheses we need to + // check whether we were currently skipping the subpattern. If not, we + // can backtrack into them, if we were we need to either backtrack back + // out of the start of the parentheses, or jump back to the forwards + // matching start, depending of whether the match is Greedy or NonGreedy. + case OpParenthesesSubpatternOnceBegin: { + PatternTerm* term = op.m_term; + ASSERT(term->quantityCount == 1); + + // We only need to backtrack to thispoint if capturing or greedy. + if (term->capture() || term->quantityType == QuantifierGreedy) { + m_backtrackingState.link(this); + + // If capturing, clear the capture (we only need to reset start). + if (term->capture()) + store32(TrustedImm32(-1), Address(output, (term->parentheses.subpatternId << 1) * sizeof(int))); + + // If Greedy, jump to the end. + if (term->quantityType == QuantifierGreedy) { + // Clear the flag in the stackframe indicating we ran through the subpattern. + unsigned parenthesesFrameLocation = term->frameLocation; + storeToFrame(TrustedImm32(-1), parenthesesFrameLocation); + // Jump to after the parentheses, skipping the subpattern. + jump(m_ops[op.m_nextOp].m_reentry); + // A backtrack from after the parentheses, when skipping the subpattern, + // will jump back to here. + op.m_jumps.link(this); + } + + m_backtrackingState.fallthrough(); + } + break; + } + case OpParenthesesSubpatternOnceEnd: { + PatternTerm* term = op.m_term; + + if (term->quantityType != QuantifierFixedCount) { + m_backtrackingState.link(this); + + // Check whether we should backtrack back into the parentheses, or if we + // are currently in a state where we had skipped over the subpattern + // (in which case the flag value on the stack will be -1). + unsigned parenthesesFrameLocation = term->frameLocation; + Jump hadSkipped = branch32(Equal, Address(stackPointerRegister, parenthesesFrameLocation * sizeof(void*)), TrustedImm32(-1)); + + if (term->quantityType == QuantifierGreedy) { + // For Greedy parentheses, we skip after having already tried going + // through the subpattern, so if we get here we're done. + YarrOp& beginOp = m_ops[op.m_previousOp]; + beginOp.m_jumps.append(hadSkipped); + } else { + // For NonGreedy parentheses, we try skipping the subpattern first, + // so if we get here we need to try running through the subpattern + // next. Jump back to the start of the parentheses in the forwards + // matching path. + ASSERT(term->quantityType == QuantifierNonGreedy); + YarrOp& beginOp = m_ops[op.m_previousOp]; + hadSkipped.linkTo(beginOp.m_reentry, this); + } + + m_backtrackingState.fallthrough(); + } + + m_backtrackingState.append(op.m_jumps); + break; + } + + // OpParenthesesSubpatternTerminalBegin/End + // + // Terminal subpatterns will always match - there is nothing after them to + // force a backtrack, and they have a minimum count of 0, and as such will + // always produce an acceptable result. + case OpParenthesesSubpatternTerminalBegin: { + // We will backtrack to this point once the subpattern cannot match any + // more. Since no match is accepted as a successful match (we are Greedy + // quantified with a minimum of zero) jump back to the forwards matching + // path at the end. + YarrOp& endOp = m_ops[op.m_nextOp]; + m_backtrackingState.linkTo(endOp.m_reentry, this); + break; + } + case OpParenthesesSubpatternTerminalEnd: + // We should never be backtracking to here (hence the 'terminal' in the name). + ASSERT(m_backtrackingState.isEmpty()); + m_backtrackingState.append(op.m_jumps); + break; + + // OpParentheticalAssertionBegin/End + case OpParentheticalAssertionBegin: { + PatternTerm* term = op.m_term; + YarrOp& endOp = m_ops[op.m_nextOp]; + + // We need to handle the backtracks upon backtracking back out + // of a parenthetical assertion if either we need to correct + // the input index, or the assertion was inverted. + if (op.m_checkAdjust || term->invert()) { + m_backtrackingState.link(this); + + if (op.m_checkAdjust) + add32(Imm32(op.m_checkAdjust), index); + + // In an inverted assertion failure to match the subpattern + // is treated as a successful match - jump to the end of the + // subpattern. We already have adjusted the input position + // back to that before the assertion, which is correct. + if (term->invert()) + jump(endOp.m_reentry); + + m_backtrackingState.fallthrough(); + } + + // The End node's jump list will contain any backtracks into + // the end of the assertion. Also, if inverted, we will have + // added the failure caused by a successful match to this. + m_backtrackingState.append(endOp.m_jumps); + + m_checked += op.m_checkAdjust; + break; + } + case OpParentheticalAssertionEnd: { + // FIXME: We should really be clearing any nested subpattern + // matches on bailing out from after the pattern. Firefox has + // this bug too (presumably because they use YARR!) + + // Never backtrack into an assertion; later failures bail to before the begin. + m_backtrackingState.takeBacktracksToJumpList(op.m_jumps, this); + + YarrOp& lastOp = m_ops[op.m_previousOp]; + m_checked -= lastOp.m_checkAdjust; + break; + } + + case OpMatchFailed: + break; + } + + } while (opIndex); + } + + // Compilation methods: + // ==================== + + // opCompileParenthesesSubpattern + // Emits ops for a subpattern (set of parentheses). These consist + // of a set of alternatives wrapped in an outer set of nodes for + // the parentheses. + // Supported types of parentheses are 'Once' (quantityCount == 1) + // and 'Terminal' (non-capturing parentheses quantified as greedy + // and infinite). + // Alternatives will use the 'Simple' set of ops if either the + // subpattern is terminal (in which case we will never need to + // backtrack), or if the subpattern only contains one alternative. + void opCompileParenthesesSubpattern(PatternTerm* term) + { + YarrOpCode parenthesesBeginOpCode; + YarrOpCode parenthesesEndOpCode; + YarrOpCode alternativeBeginOpCode = OpSimpleNestedAlternativeBegin; + YarrOpCode alternativeNextOpCode = OpSimpleNestedAlternativeNext; + YarrOpCode alternativeEndOpCode = OpSimpleNestedAlternativeEnd; + + // We can currently only compile quantity 1 subpatterns that are + // not copies. We generate a copy in the case of a range quantifier, + // e.g. /(?:x){3,9}/, or /(?:x)+/ (These are effectively expanded to + // /(?:x){3,3}(?:x){0,6}/ and /(?:x)(?:x)*/ repectively). The problem + // comes where the subpattern is capturing, in which case we would + // need to restore the capture from the first subpattern upon a + // failure in the second. + if (term->quantityCount == 1 && !term->parentheses.isCopy) { + // Select the 'Once' nodes. + parenthesesBeginOpCode = OpParenthesesSubpatternOnceBegin; + parenthesesEndOpCode = OpParenthesesSubpatternOnceEnd; + + // If there is more than one alternative we cannot use the 'simple' nodes. + if (term->parentheses.disjunction->m_alternatives.size() != 1) { + alternativeBeginOpCode = OpNestedAlternativeBegin; + alternativeNextOpCode = OpNestedAlternativeNext; + alternativeEndOpCode = OpNestedAlternativeEnd; + } + } else if (term->parentheses.isTerminal) { + // Select the 'Terminal' nodes. + parenthesesBeginOpCode = OpParenthesesSubpatternTerminalBegin; + parenthesesEndOpCode = OpParenthesesSubpatternTerminalEnd; + } else { + // This subpattern is not supported by the JIT. + m_shouldFallBack = true; + return; + } + + size_t parenBegin = m_ops.size(); + m_ops.append(parenthesesBeginOpCode); + + m_ops.append(alternativeBeginOpCode); + m_ops.last().m_previousOp = notFound; + m_ops.last().m_term = term; + Vector& alternatives = term->parentheses.disjunction->m_alternatives; + for (unsigned i = 0; i < alternatives.size(); ++i) { + size_t lastOpIndex = m_ops.size() - 1; + + PatternAlternative* nestedAlternative = alternatives[i]; + opCompileAlternative(nestedAlternative); + + size_t thisOpIndex = m_ops.size(); + m_ops.append(YarrOp(alternativeNextOpCode)); + + YarrOp& lastOp = m_ops[lastOpIndex]; + YarrOp& thisOp = m_ops[thisOpIndex]; + + lastOp.m_alternative = nestedAlternative; + lastOp.m_nextOp = thisOpIndex; + thisOp.m_previousOp = lastOpIndex; + thisOp.m_term = term; + } + YarrOp& lastOp = m_ops.last(); + ASSERT(lastOp.m_op == alternativeNextOpCode); + lastOp.m_op = alternativeEndOpCode; + lastOp.m_alternative = 0; + lastOp.m_nextOp = notFound; + + size_t parenEnd = m_ops.size(); + m_ops.append(parenthesesEndOpCode); + + m_ops[parenBegin].m_term = term; + m_ops[parenBegin].m_previousOp = notFound; + m_ops[parenBegin].m_nextOp = parenEnd; + m_ops[parenEnd].m_term = term; + m_ops[parenEnd].m_previousOp = parenBegin; + m_ops[parenEnd].m_nextOp = notFound; + } + + // opCompileParentheticalAssertion + // Emits ops for a parenthetical assertion. These consist of an + // OpSimpleNestedAlternativeBegin/Next/End set of nodes wrapping + // the alternatives, with these wrapped by an outer pair of + // OpParentheticalAssertionBegin/End nodes. + // We can always use the OpSimpleNestedAlternative nodes in the + // case of parenthetical assertions since these only ever match + // once, and will never backtrack back into the assertion. + void opCompileParentheticalAssertion(PatternTerm* term) + { + size_t parenBegin = m_ops.size(); + m_ops.append(OpParentheticalAssertionBegin); + + m_ops.append(OpSimpleNestedAlternativeBegin); + m_ops.last().m_previousOp = notFound; + m_ops.last().m_term = term; + Vector& alternatives = term->parentheses.disjunction->m_alternatives; + for (unsigned i = 0; i < alternatives.size(); ++i) { + size_t lastOpIndex = m_ops.size() - 1; + + PatternAlternative* nestedAlternative = alternatives[i]; + opCompileAlternative(nestedAlternative); + + size_t thisOpIndex = m_ops.size(); + m_ops.append(YarrOp(OpSimpleNestedAlternativeNext)); + + YarrOp& lastOp = m_ops[lastOpIndex]; + YarrOp& thisOp = m_ops[thisOpIndex]; + + lastOp.m_alternative = nestedAlternative; + lastOp.m_nextOp = thisOpIndex; + thisOp.m_previousOp = lastOpIndex; + thisOp.m_term = term; + } + YarrOp& lastOp = m_ops.last(); + ASSERT(lastOp.m_op == OpSimpleNestedAlternativeNext); + lastOp.m_op = OpSimpleNestedAlternativeEnd; + lastOp.m_alternative = 0; + lastOp.m_nextOp = notFound; + + size_t parenEnd = m_ops.size(); + m_ops.append(OpParentheticalAssertionEnd); + + m_ops[parenBegin].m_term = term; + m_ops[parenBegin].m_previousOp = notFound; + m_ops[parenBegin].m_nextOp = parenEnd; + m_ops[parenEnd].m_term = term; + m_ops[parenEnd].m_previousOp = parenBegin; + m_ops[parenEnd].m_nextOp = notFound; + } + + // opCompileAlternative + // Called to emit nodes for all terms in an alternative. + void opCompileAlternative(PatternAlternative* alternative) + { + optimizeAlternative(alternative); + + for (unsigned i = 0; i < alternative->m_terms.size(); ++i) { + PatternTerm* term = &alternative->m_terms[i]; + + switch (term->type) { + case PatternTerm::TypeParenthesesSubpattern: + opCompileParenthesesSubpattern(term); + break; + + case PatternTerm::TypeParentheticalAssertion: + opCompileParentheticalAssertion(term); + break; + + default: + m_ops.append(term); + } + } + } + + // opCompileBody + // This method compiles the body disjunction of the regular expression. + // The body consists of two sets of alternatives - zero or more 'once + // through' (BOL anchored) alternatives, followed by zero or more + // repeated alternatives. + // For each of these two sets of alteratives, if not empty they will be + // wrapped in a set of OpBodyAlternativeBegin/Next/End nodes (with the + // 'begin' node referencing the first alternative, and 'next' nodes + // referencing any further alternatives. The begin/next/end nodes are + // linked together in a doubly linked list. In the case of repeating + // alternatives, the end node is also linked back to the beginning. + // If no repeating alternatives exist, then a OpMatchFailed node exists + // to return the failing result. + void opCompileBody(PatternDisjunction* disjunction) + { + Vector& alternatives = disjunction->m_alternatives; + size_t currentAlternativeIndex = 0; + + // Emit the 'once through' alternatives. + if (alternatives.size() && alternatives[0]->onceThrough()) { + m_ops.append(YarrOp(OpBodyAlternativeBegin)); + m_ops.last().m_previousOp = notFound; + + do { + size_t lastOpIndex = m_ops.size() - 1; + PatternAlternative* alternative = alternatives[currentAlternativeIndex]; + opCompileAlternative(alternative); + + size_t thisOpIndex = m_ops.size(); + m_ops.append(YarrOp(OpBodyAlternativeNext)); + + YarrOp& lastOp = m_ops[lastOpIndex]; + YarrOp& thisOp = m_ops[thisOpIndex]; + + lastOp.m_alternative = alternative; + lastOp.m_nextOp = thisOpIndex; + thisOp.m_previousOp = lastOpIndex; + + ++currentAlternativeIndex; + } while (currentAlternativeIndex < alternatives.size() && alternatives[currentAlternativeIndex]->onceThrough()); + + YarrOp& lastOp = m_ops.last(); + + ASSERT(lastOp.m_op == OpBodyAlternativeNext); + lastOp.m_op = OpBodyAlternativeEnd; + lastOp.m_alternative = 0; + lastOp.m_nextOp = notFound; + } + + if (currentAlternativeIndex == alternatives.size()) { + m_ops.append(YarrOp(OpMatchFailed)); + return; + } + + // Emit the repeated alternatives. + size_t repeatLoop = m_ops.size(); + m_ops.append(YarrOp(OpBodyAlternativeBegin)); + m_ops.last().m_previousOp = notFound; + do { + size_t lastOpIndex = m_ops.size() - 1; + PatternAlternative* alternative = alternatives[currentAlternativeIndex]; + ASSERT(!alternative->onceThrough()); + opCompileAlternative(alternative); + + size_t thisOpIndex = m_ops.size(); + m_ops.append(YarrOp(OpBodyAlternativeNext)); + + YarrOp& lastOp = m_ops[lastOpIndex]; + YarrOp& thisOp = m_ops[thisOpIndex]; + + lastOp.m_alternative = alternative; + lastOp.m_nextOp = thisOpIndex; + thisOp.m_previousOp = lastOpIndex; + + ++currentAlternativeIndex; + } while (currentAlternativeIndex < alternatives.size()); + YarrOp& lastOp = m_ops.last(); + ASSERT(lastOp.m_op == OpBodyAlternativeNext); + lastOp.m_op = OpBodyAlternativeEnd; + lastOp.m_alternative = 0; + lastOp.m_nextOp = repeatLoop; + } + + void generateEnter() + { +#if WTF_CPU_X86_64 + push(X86Registers::ebp); + move(stackPointerRegister, X86Registers::ebp); + push(X86Registers::ebx); +#elif WTF_CPU_X86 + push(X86Registers::ebp); + move(stackPointerRegister, X86Registers::ebp); + // TODO: do we need spill registers to fill the output pointer if there are no sub captures? + push(X86Registers::ebx); + push(X86Registers::edi); + push(X86Registers::esi); + // load output into edi (2 = saved ebp + return address). + #if WTF_COMPILER_MSVC + loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), input); + loadPtr(Address(X86Registers::ebp, 3 * sizeof(void*)), index); + loadPtr(Address(X86Registers::ebp, 4 * sizeof(void*)), length); + loadPtr(Address(X86Registers::ebp, 5 * sizeof(void*)), output); + #else + loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), output); + #endif +#elif WTF_CPU_ARM + push(ARMRegisters::r4); + push(ARMRegisters::r5); + push(ARMRegisters::r6); +#if WTF_CPU_ARM_TRADITIONAL + push(ARMRegisters::r8); // scratch register +#endif + move(ARMRegisters::r3, output); +#elif WTF_CPU_SH4 + push(SH4Registers::r11); + push(SH4Registers::r13); +#elif WTF_CPU_MIPS + // Do nothing. +#endif + } + + void generateReturn() + { +#if WTF_CPU_X86_64 + pop(X86Registers::ebx); + pop(X86Registers::ebp); +#elif WTF_CPU_X86 + pop(X86Registers::esi); + pop(X86Registers::edi); + pop(X86Registers::ebx); + pop(X86Registers::ebp); +#elif WTF_CPU_ARM +#if WTF_CPU_ARM_TRADITIONAL + pop(ARMRegisters::r8); // scratch register +#endif + pop(ARMRegisters::r6); + pop(ARMRegisters::r5); + pop(ARMRegisters::r4); +#elif WTF_CPU_SH4 + pop(SH4Registers::r13); + pop(SH4Registers::r11); +#elif WTF_CPU_MIPS + // Do nothing +#endif + ret(); + } + +public: + YarrGenerator(YarrPattern& pattern) + : m_pattern(pattern) + , m_shouldFallBack(false) + , m_checked(0) + { + } + + void compile(JSGlobalData* globalData, YarrCodeBlock& jitObject) + { + generateEnter(); + + if (!m_pattern.m_body->m_hasFixedSize) + store32(index, Address(output)); + + if (m_pattern.m_body->m_callFrameSize) + subPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister); + + // Compile the pattern to the internal 'YarrOp' representation. + opCompileBody(m_pattern.m_body); + + // If we encountered anything we can't handle in the JIT code + // (e.g. backreferences) then return early. + if (m_shouldFallBack) { + jitObject.setFallBack(true); + return; + } + + generate(); + backtrack(); + + // Link & finalize the code. + // XXX yarr-oom + ExecutablePool *pool; + bool ok; + LinkBuffer linkBuffer(this, globalData->regexAllocator, &pool, &ok); + m_backtrackingState.linkDataLabels(linkBuffer); + jitObject.set(linkBuffer.finalizeCode()); + jitObject.setFallBack(m_shouldFallBack); + } + +private: + YarrPattern& m_pattern; + + // Used to detect regular expression constructs that are not currently + // supported in the JIT; fall back to the interpreter when this is detected. + bool m_shouldFallBack; + + // The regular expression expressed as a linear sequence of operations. + Vector m_ops; + + // This records the current input offset being applied due to the current + // set of alternatives we are nested within. E.g. when matching the + // character 'b' within the regular expression /abc/, we will know that + // the minimum size for the alternative is 3, checked upon entry to the + // alternative, and that 'b' is at offset 1 from the start, and as such + // when matching 'b' we need to apply an offset of -2 to the load. + // + // FIXME: This should go away. Rather than tracking this value throughout + // code generation, we should gather this information up front & store it + // on the YarrOp structure. + int m_checked; + + // This class records state whilst generating the backtracking path of code. + BacktrackingState m_backtrackingState; +}; + +void jitCompile(YarrPattern& pattern, JSGlobalData* globalData, YarrCodeBlock& jitObject) +{ + YarrGenerator(pattern).compile(globalData, jitObject); +} + +int execute(YarrCodeBlock& jitObject, const UChar* input, unsigned start, unsigned length, int* output) +{ + return jitObject.execute(input, start, length, output); +} + +}} + +#endif diff --git a/js/src/yarr/YarrJIT.h b/js/src/yarr/YarrJIT.h new file mode 100644 index 000000000000..4f0f47f8c548 --- /dev/null +++ b/js/src/yarr/YarrJIT.h @@ -0,0 +1,93 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- + * vim: set ts=8 sw=4 et tw=99 ft=cpp: + * + * ***** BEGIN LICENSE BLOCK ***** + * Copyright (C) 2009 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ***** END LICENSE BLOCK ***** */ + +#ifndef YarrJIT_h +#define YarrJIT_h + +#include "assembler/wtf/Platform.h" + +#if ENABLE_YARR_JIT + +#include "assembler/assembler/MacroAssembler.h" +#include "YarrPattern.h" + +#if WTF_CPU_X86 && !WTF_COMPILER_MSVC +#define YARR_CALL __attribute__ ((regparm (3))) +#else +#define YARR_CALL +#endif + +namespace JSC { + +class JSGlobalData; +class ExecutablePool; + +namespace Yarr { + +class YarrCodeBlock { + typedef int (*YarrJITCode)(const UChar* input, unsigned start, unsigned length, int* output) YARR_CALL; + +public: + YarrCodeBlock() + : m_needFallBack(false) + { + } + + ~YarrCodeBlock() + { + } + + void setFallBack(bool fallback) { m_needFallBack = fallback; } + bool isFallBack() { return m_needFallBack; } + void set(MacroAssembler::CodeRef ref) { m_ref = ref; } + + int execute(const UChar* input, unsigned start, unsigned length, int* output) + { + return JS_EXTENSION((reinterpret_cast(m_ref.m_code.executableAddress()))(input, start, length, output)); + } + +#if ENABLE_REGEXP_TRACING + void *getAddr() { return m_ref.m_code.executableAddress(); } +#endif + + void release() { m_ref.release(); } + +private: + MacroAssembler::CodeRef m_ref; + bool m_needFallBack; +}; + +void jitCompile(YarrPattern&, JSGlobalData*, YarrCodeBlock& jitObject); +int execute(YarrCodeBlock& jitObject, const UChar* input, unsigned start, unsigned length, int* output); + +} } // namespace JSC::Yarr + +#endif + +#endif // YarrJIT_h diff --git a/js/src/yarr/yarr/RegexParser.h b/js/src/yarr/YarrParser.h similarity index 81% rename from js/src/yarr/yarr/RegexParser.h rename to js/src/yarr/YarrParser.h index 1ae2c2fd049b..f2b50dd867e3 100644 --- a/js/src/yarr/yarr/RegexParser.h +++ b/js/src/yarr/YarrParser.h @@ -1,4 +1,7 @@ -/* +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- + * vim: set ts=8 sw=4 et tw=99 ft=cpp: + * + * ***** BEGIN LICENSE BLOCK ***** * Copyright (C) 2009 Apple Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -21,18 +24,18 @@ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ + * + * ***** END LICENSE BLOCK ***** */ -#ifndef RegexParser_h -#define RegexParser_h +#ifndef YarrParser_h +#define YarrParser_h -#include -#include -#include "yarr/jswtfbridge.h" -#include "yarr/yarr/RegexCommon.h" +#include "Yarr.h" namespace JSC { namespace Yarr { +#define REGEXP_ERROR_PREFIX "Invalid regular expression: " + enum BuiltInCharacterClassID { DigitClassID, SpaceClassID, @@ -45,7 +48,7 @@ template class Parser { private: template - friend int parse(FriendDelegate& delegate, const UString& pattern, unsigned backReferenceLimit); + friend ErrorCode parse(FriendDelegate& delegate, const UString& pattern, unsigned backReferenceLimit); /* * CharacterClassParserDelegate: @@ -61,10 +64,8 @@ private: CharacterClassParserDelegate(Delegate& delegate, ErrorCode& err) : m_delegate(delegate) , m_err(err) - , m_state(empty) -#if __GNUC__ >= 4 && __GNUC_MINOR__ >= 5 /* quell GCC overwarning */ - , m_character(0xFFFF) -#endif + , m_state(Empty) + , m_character(0) { } @@ -79,56 +80,62 @@ private: } /* - * atomPatternCharacterUnescaped(): + * atomPatternCharacter(): * - * This method is called directly from parseCharacterClass(), to report a new - * pattern character token. This method differs from atomPatternCharacter(), - * which will be called from parseEscape(), since a hypen provided via this - * method may be indicating a character range, but a hyphen parsed by - * parseEscape() cannot be interpreted as doing so. + * This method is called either from parseCharacterClass() (for an unescaped + * character in a character class), or from parseEscape(). In the former case + * the value true will be passed for the argument 'hyphenIsRange', and in this + * mode we will allow a hypen to be treated as indicating a range (i.e. /[a-z]/ + * is different to /[a\-z]/). */ - void atomPatternCharacterUnescaped(UChar ch) + void atomPatternCharacter(UChar ch, bool hyphenIsRange = false) { switch (m_state) { - case empty: - m_character = ch; - m_state = cachedCharacter; - break; + case AfterCharacterClass: + // Following a builtin character class we need look out for a hyphen. + // We're looking for invalid ranges, such as /[\d-x]/ or /[\d-\d]/. + // If we see a hyphen following a charater class then unlike usual + // we'll report it to the delegate immediately, and put ourself into + // a poisoned state. Any following calls to add another character or + // character class will result in an error. (A hypen following a + // character-class is itself valid, but only at the end of a regex). + if (hyphenIsRange && ch == '-') { + m_delegate.atomCharacterClassAtom('-'); + m_state = AfterCharacterClassHyphen; + return; + } + // Otherwise just fall through - cached character so treat this as Empty. - case cachedCharacter: - if (ch == '-') - m_state = cachedCharacterHyphen; + case Empty: + m_character = ch; + m_state = CachedCharacter; + return; + + case CachedCharacter: + if (hyphenIsRange && ch == '-') + m_state = CachedCharacterHyphen; else { m_delegate.atomCharacterClassAtom(m_character); m_character = ch; } - break; + return; - case cachedCharacterHyphen: - if (ch >= m_character) - m_delegate.atomCharacterClassRange(m_character, ch); - else + case CachedCharacterHyphen: + if (ch < m_character) { m_err = CharacterClassOutOfOrder; - m_state = empty; + return; + } + m_delegate.atomCharacterClassRange(m_character, ch); + m_state = Empty; + return; + + case AfterCharacterClassHyphen: + m_delegate.atomCharacterClassAtom(ch); + m_state = Empty; + return; } } - /* - * atomPatternCharacter(): - * - * Adds a pattern character, called by parseEscape(), as such will not - * interpret a hyphen as indicating a character range. - */ - void atomPatternCharacter(UChar ch) - { - // Flush if a character is already pending to prevent the - // hyphen from begin interpreted as indicating a range. - if((ch == '-') && (m_state == cachedCharacter)) - flush(); - - atomPatternCharacterUnescaped(ch); - } - /* * atomBuiltInCharacterClass(): * @@ -136,17 +143,28 @@ private: */ void atomBuiltInCharacterClass(BuiltInCharacterClassID classID, bool invert) { - if (m_state == cachedCharacterHyphen) { - // If the RHS of a range does not contain exacly one character then a SyntaxError - // must be thrown. SpiderMonkey only errors out in the [c-\s] case as an extension. - // (This assumes none of the built in character classes contain a single - // character.) - m_err = CharacterClassRangeSingleChar; - m_state = empty; + switch (m_state) { + case CachedCharacter: + // Flush the currently cached character, then fall through. + m_delegate.atomCharacterClassAtom(m_character); + + case Empty: + case AfterCharacterClass: + m_state = AfterCharacterClass; + m_delegate.atomCharacterClassBuiltIn(classID, invert); + return; + + case CachedCharacterHyphen: + // Error! We have a range that looks like [x-\d]. We require + // the end of the range to be a single character. + m_err = CharacterClassInvalidRange; + return; + + case AfterCharacterClassHyphen: + m_delegate.atomCharacterClassBuiltIn(classID, invert); + m_state = Empty; return; } - flush(); - m_delegate.atomCharacterClassBuiltIn(classID, invert); } /* @@ -156,31 +174,29 @@ private: */ void end() { - flush(); + if (m_state == CachedCharacter) + m_delegate.atomCharacterClassAtom(m_character); + else if (m_state == CachedCharacterHyphen) { + m_delegate.atomCharacterClassAtom(m_character); + m_delegate.atomCharacterClassAtom('-'); + } m_delegate.atomCharacterClassEnd(); } // parseEscape() should never call these delegate methods when // invoked with inCharacterClass set. - void assertionWordBoundary(bool) { JS_NOT_REACHED("parseEscape() should never call this"); } - void atomBackReference(unsigned) { JS_NOT_REACHED("parseEscape() should never call this"); } + void assertionWordBoundary(bool) { ASSERT_NOT_REACHED(); } + void atomBackReference(unsigned) { ASSERT_NOT_REACHED(); } private: - void flush() - { - if (m_state != empty) // either cachedCharacter or cachedCharacterHyphen - m_delegate.atomCharacterClassAtom(m_character); - if (m_state == cachedCharacterHyphen) - m_delegate.atomCharacterClassAtom('-'); - m_state = empty; - } - Delegate& m_delegate; ErrorCode& m_err; enum CharacterClassConstructionState { - empty, - cachedCharacter, - cachedCharacterHyphen + Empty, + CachedCharacter, + CachedCharacterHyphen, + AfterCharacterClass, + AfterCharacterClassHyphen } m_state; UChar m_character; }; @@ -189,7 +205,7 @@ private: : m_delegate(delegate) , m_backReferenceLimit(backReferenceLimit) , m_err(NoError) - , m_data(const_cast(pattern).chars()) + , m_data(pattern.chars()) , m_size(pattern.length()) , m_index(0) , m_parenthesesNestingDepth(0) @@ -219,8 +235,8 @@ private: template bool parseEscape(EscapeDelegate& delegate) { - JS_ASSERT(!m_err); - JS_ASSERT(peek() == '\\'); + ASSERT(!m_err); + ASSERT(peek() == '\\'); consume(); if (atEndOfPattern()) { @@ -292,7 +308,7 @@ private: unsigned backReference; if (!consumeNumber(backReference)) - return false; + break; if (backReference <= m_backReferenceLimit) { delegate.atomBackReference(backReference); break; @@ -402,14 +418,14 @@ private: /* * parseCharacterClass(): * - * Helper for parseTokens(); calls directly and indirectly (via parseCharacterClassEscape) + * Helper for parseTokens(); calls dirctly and indirectly (via parseCharacterClassEscape) * to an instance of CharacterClassParserDelegate, to describe the character class to the * delegate. */ void parseCharacterClass() { - JS_ASSERT(!m_err); - JS_ASSERT(peek() == '['); + ASSERT(!m_err); + ASSERT(peek() == '['); consume(); CharacterClassParserDelegate characterClassConstructor(m_delegate, m_err); @@ -428,7 +444,7 @@ private: break; default: - characterClassConstructor.atomPatternCharacterUnescaped(consume()); + characterClassConstructor.atomPatternCharacter(consume(), true); } if (m_err) @@ -445,8 +461,8 @@ private: */ void parseParenthesesBegin() { - JS_ASSERT(!m_err); - JS_ASSERT(peek() == '('); + ASSERT(!m_err); + ASSERT(peek() == '('); consume(); if (tryConsume('?')) { @@ -484,8 +500,8 @@ private: */ void parseParenthesesEnd() { - JS_ASSERT(!m_err); - JS_ASSERT(peek() == ')'); + ASSERT(!m_err); + ASSERT(peek() == ')'); consume(); if (m_parenthesesNestingDepth > 0) @@ -503,8 +519,8 @@ private: */ void parseQuantifier(bool lastTokenWasAnAtom, unsigned min, unsigned max) { - JS_ASSERT(!m_err); - JS_ASSERT(min <= max); + ASSERT(!m_err); + ASSERT(min <= max); if (lastTokenWasAnAtom) m_delegate.quantifyAtom(min, max, !tryConsume('?')); @@ -572,13 +588,13 @@ private: case '*': consume(); - parseQuantifier(lastTokenWasAnAtom, 0, UINT_MAX); + parseQuantifier(lastTokenWasAnAtom, 0, quantifyInfinite); lastTokenWasAnAtom = false; break; case '+': consume(); - parseQuantifier(lastTokenWasAnAtom, 1, UINT_MAX); + parseQuantifier(lastTokenWasAnAtom, 1, quantifyInfinite); lastTokenWasAnAtom = false; break; @@ -603,7 +619,7 @@ private: if (!consumeNumber(max)) break; } else { - max = UINT_MAX; + max = quantifyInfinite; } } @@ -636,26 +652,18 @@ private: /* * parse(): * - * This method calls regexBegin(), calls parseTokens() to parse over the input - * patterns, calls regexEnd() or regexError() as appropriate, and converts any + * This method calls parseTokens() to parse over the input and converts any * error code to a const char* for a result. */ - int parse() + ErrorCode parse() { - m_delegate.regexBegin(); - if (m_size > MAX_PATTERN_SIZE) m_err = PatternTooLarge; else parseTokens(); - JS_ASSERT(atEndOfPattern() || m_err); + ASSERT(atEndOfPattern() || m_err); - if (m_err) - m_delegate.regexError(); - else - m_delegate.regexEnd(); - - return static_cast(m_err); + return m_err; } @@ -675,13 +683,13 @@ private: bool atEndOfPattern() { - JS_ASSERT(m_index <= m_size); + ASSERT(m_index <= m_size); return m_index == m_size; } int peek() { - JS_ASSERT(m_index < m_size); + ASSERT(m_index < m_size); return m_data[m_index]; } @@ -692,40 +700,40 @@ private: unsigned peekDigit() { - JS_ASSERT(peekIsDigit()); + ASSERT(peekIsDigit()); return peek() - '0'; } int consume() { - JS_ASSERT(m_index < m_size); + ASSERT(m_index < m_size); return m_data[m_index++]; } unsigned consumeDigit() { - JS_ASSERT(peekIsDigit()); + ASSERT(peekIsDigit()); return consume() - '0'; } - bool consumeNumber(unsigned &accum) - { - accum = consumeDigit(); - while (peekIsDigit()) { - unsigned newValue = accum * 10 + peekDigit(); - if (newValue < accum) { /* Overflow check. */ - m_err = QuantifierTooLarge; - return false; - } - accum = newValue; - consume(); - } - return true; + bool consumeNumber(unsigned &accum) + { + accum = consumeDigit(); + while (peekIsDigit()) { + unsigned newValue = accum * 10 + peekDigit(); + if (newValue < accum) { /* Overflow check. */ + m_err = QuantifierTooLarge; + return false; + } + accum = newValue; + consume(); + } + return true; } unsigned consumeOctal() { - JS_ASSERT(WTF::isASCIIOctalDigit(peek())); + ASSERT(WTF::isASCIIOctalDigit(peek())); unsigned n = consumeDigit(); while (n < 32 && !atEndOfPattern() && WTF::isASCIIOctalDigit(peek())) @@ -798,14 +806,6 @@ private: * * void disjunction(); * - * void regexBegin(); - * void regexEnd(); - * void regexError(); - * - * Before any call recording tokens are made, regexBegin() will be called on the - * delegate once. Once parsing is complete either regexEnd() or regexError() will - * be called, as appropriate. - * * The regular expression is described by a sequence of assertion*() and atom*() * callbacks to the delegate, describing the terms in the regular expression. * Following an atom a quantifyAtom() call may occur to indicate that the previous @@ -836,11 +836,11 @@ private: */ template -int parse(Delegate& delegate, const UString& pattern, unsigned backReferenceLimit = UINT_MAX) +ErrorCode parse(Delegate& delegate, const UString& pattern, unsigned backReferenceLimit = quantifyInfinite) { return Parser(delegate, pattern, backReferenceLimit).parse(); } } } // namespace JSC::Yarr -#endif // RegexParser_h +#endif // YarrParser_h diff --git a/js/src/yarr/yarr/RegexCompiler.cpp b/js/src/yarr/YarrPattern.cpp similarity index 52% rename from js/src/yarr/yarr/RegexCompiler.cpp rename to js/src/yarr/YarrPattern.cpp index 9b60cbd4a78b..413d342f1585 100644 --- a/js/src/yarr/yarr/RegexCompiler.cpp +++ b/js/src/yarr/YarrPattern.cpp @@ -1,5 +1,9 @@ -/* +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- + * vim: set ts=8 sw=4 et tw=99 ft=cpp: + * + * ***** BEGIN LICENSE BLOCK ***** * Copyright (C) 2009 Apple Inc. All rights reserved. + * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -21,12 +25,13 @@ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ + * + * ***** END LICENSE BLOCK ***** */ -#include "jsinttypes.h" -#include "RegexCompiler.h" +#include "YarrPattern.h" -#include "RegexPattern.h" +#include "Yarr.h" +#include "YarrParser.h" using namespace WTF; @@ -34,12 +39,6 @@ namespace JSC { namespace Yarr { #include "RegExpJitTables.h" -#if WTF_CPU_SPARC -#define BASE_FRAME_SIZE 24 -#else -#define BASE_FRAME_SIZE 0 -#endif - class CharacterClassConstructor { public: CharacterClassConstructor(bool isCaseInsensitive = false) @@ -57,13 +56,13 @@ public: void append(const CharacterClass* other) { - for (size_t i = 0; i < other->m_matches.length(); ++i) + for (size_t i = 0; i < other->m_matches.size(); ++i) addSorted(m_matches, other->m_matches[i]); - for (size_t i = 0; i < other->m_ranges.length(); ++i) + for (size_t i = 0; i < other->m_ranges.size(); ++i) addSortedRange(m_ranges, other->m_ranges[i].begin, other->m_ranges[i].end); - for (size_t i = 0; i < other->m_matchesUnicode.length(); ++i) + for (size_t i = 0; i < other->m_matchesUnicode.size(); ++i) addSorted(m_matchesUnicode, other->m_matchesUnicode[i]); - for (size_t i = 0; i < other->m_rangesUnicode.length(); ++i) + for (size_t i = 0; i < other->m_rangesUnicode.size(); ++i) addSortedRange(m_rangesUnicode, other->m_rangesUnicode[i].begin, other->m_rangesUnicode[i].end); } @@ -101,18 +100,18 @@ public: { if (lo <= 0x7f) { char asciiLo = lo; - char asciiHi = JS_MIN(hi, (UChar)0x7f); + char asciiHi = std::min(hi, (UChar)0x7f); addSortedRange(m_ranges, lo, asciiHi); if (m_isCaseInsensitive) { if ((asciiLo <= 'Z') && (asciiHi >= 'A')) - addSortedRange(m_ranges, JS_MAX(asciiLo, 'A')+('a'-'A'), JS_MIN(asciiHi, 'Z')+('a'-'A')); + addSortedRange(m_ranges, std::max(asciiLo, 'A')+('a'-'A'), std::min(asciiHi, 'Z')+('a'-'A')); if ((asciiLo <= 'z') && (asciiHi >= 'a')) - addSortedRange(m_ranges, JS_MAX(asciiLo, 'a')+('A'-'a'), JS_MIN(asciiHi, 'z')+('A'-'a')); + addSortedRange(m_ranges, std::max(asciiLo, 'a')+('A'-'a'), std::min(asciiHi, 'z')+('A'-'a')); } } if (hi >= 0x80) { - uint32 unicodeCurr = JS_MAX(lo, (UChar)0x80); + uint32_t unicodeCurr = std::max(lo, (UChar)0x80); addSortedRange(m_rangesUnicode, unicodeCurr, hi); if (m_isCaseInsensitive) { @@ -122,7 +121,7 @@ public: // (if so we won't re-enter the loop, since the loop condition above // will definitely fail) - but this does mean we cannot use a UChar // to represent unicodeCurr, we must use a 32-bit value instead. - JS_ASSERT(unicodeCurr <= 0xffff); + ASSERT(unicodeCurr <= 0xffff); if (isUnicodeUpper(unicodeCurr)) { UChar lowerCaseRangeBegin = Unicode::toLower(unicodeCurr); @@ -145,8 +144,7 @@ public: CharacterClass* charClass() { - // FIXME: bug 574459 -- no NULL check - CharacterClass* characterClass = js::OffTheBooks::new_((CharacterClassTable*)NULL); + CharacterClass* characterClass = js::OffTheBooks::new_(PassRefPtr(0)); characterClass->m_matches.append(m_matches); characterClass->m_ranges.append(m_ranges); @@ -159,12 +157,10 @@ public: } private: - typedef js::Vector UChars; - typedef js::Vector CharacterRanges; - void addSorted(UChars& matches, UChar ch) + void addSorted(Vector& matches, UChar ch) { unsigned pos = 0; - unsigned range = matches.length(); + unsigned range = matches.size(); // binary chop, find position to insert char. while (range) { @@ -181,15 +177,15 @@ private: } } - if (pos == matches.length()) + if (pos == matches.size()) matches.append(ch); else - matches.insert(matches.begin() + pos, ch); + matches.insert(pos, ch); } - void addSortedRange(CharacterRanges& ranges, UChar lo, UChar hi) + void addSortedRange(Vector& ranges, UChar lo, UChar hi) { - unsigned end = ranges.length(); + unsigned end = ranges.size(); // Simple linear scan - I doubt there are that many ranges anyway... // feel free to fix this with something faster (eg binary chop). @@ -201,7 +197,7 @@ private: ranges[i].begin = lo; return; } - ranges.insert(ranges.begin() + i, CharacterRange(lo, hi)); + ranges.insert(i, CharacterRange(lo, hi)); return; } // Okay, since we didn't hit the last case, the end of the new range is definitely at or after the begining @@ -209,17 +205,17 @@ private: // end of the last range they concatenate, which is just as good. if (lo <= (ranges[i].end + 1)) { // found an intersect! we'll replace this entry in the array. - ranges[i].begin = JS_MIN(ranges[i].begin, lo); - ranges[i].end = JS_MAX(ranges[i].end, hi); + ranges[i].begin = std::min(ranges[i].begin, lo); + ranges[i].end = std::max(ranges[i].end, hi); // now check if the new range can subsume any subsequent ranges. unsigned next = i+1; // each iteration of the loop we will either remove something from the list, or break the loop. - while (next < ranges.length()) { + while (next < ranges.size()) { if (ranges[next].begin <= (ranges[i].end + 1)) { // the next entry now overlaps / concatenates this one. - ranges[i].end = JS_MAX(ranges[i].end, ranges[next].end); - ranges.erase(ranges.begin() + next); + ranges[i].end = std::max(ranges[i].end, ranges[next].end); + ranges.remove(next); } else break; } @@ -234,21 +230,131 @@ private: bool m_isCaseInsensitive; - UChars m_matches; - CharacterRanges m_ranges; - UChars m_matchesUnicode; - CharacterRanges m_rangesUnicode; + Vector m_matches; + Vector m_ranges; + Vector m_matchesUnicode; + Vector m_rangesUnicode; }; -class RegexPatternConstructor { -public: - RegexPatternConstructor(RegexPattern& pattern) - : m_pattern(pattern) - , m_characterClassConstructor(pattern.m_ignoreCase) +struct BeginCharHelper { + BeginCharHelper(Vector* beginChars, bool isCaseInsensitive = false) + : m_beginChars(beginChars) + , m_isCaseInsensitive(isCaseInsensitive) + {} + + void addBeginChar(BeginChar beginChar, Vector* hotTerms, QuantifierType quantityType, unsigned quantityCount) { + if (quantityType == QuantifierFixedCount && quantityCount > 1) { + // We duplicate the first found character if the quantity of the term is more than one. eg.: /a{3}/ + beginChar.value |= beginChar.value << 16; + beginChar.mask |= beginChar.mask << 16; + addCharacter(beginChar); + } else if (quantityType == QuantifierFixedCount && quantityCount == 1 && hotTerms->size()) + // In case of characters with fixed quantifier we should check the next character as well. + linkHotTerms(beginChar, hotTerms); + else + // In case of greedy matching the next character checking is unnecessary therefore we just store + // the first character. + addCharacter(beginChar); } - ~RegexPatternConstructor() + // Merge two following BeginChars in the vector to reduce the number of character checks. + void merge(unsigned size) + { + for (unsigned i = 0; i < size; i++) { + BeginChar* curr = &m_beginChars->at(i); + BeginChar* next = &m_beginChars->at(i + 1); + + // If the current and the next size of value is different we should skip the merge process + // because the 16bit and 32bit values are unmergable. + if (curr->value <= 0xFFFF && next->value > 0xFFFF) + continue; + + unsigned diff = curr->value ^ next->value; + + curr->mask |= diff; + curr->value |= curr->mask; + + m_beginChars->remove(i + 1); + size--; + } + } + +private: + void addCharacter(BeginChar beginChar) + { + unsigned pos = 0; + unsigned range = m_beginChars->size(); + + // binary chop, find position to insert char. + while (range) { + unsigned index = range >> 1; + + int val = m_beginChars->at(pos+index).value - beginChar.value; + if (!val) + return; + if (val < 0) + range = index; + else { + pos += (index+1); + range -= (index+1); + } + } + + if (pos == m_beginChars->size()) + m_beginChars->append(beginChar); + else + m_beginChars->insert(pos, beginChar); + } + + // Create BeginChar objects by appending each terms from a hotTerms vector to an existing BeginChar object. + void linkHotTerms(BeginChar beginChar, Vector* hotTerms) + { + for (unsigned i = 0; i < hotTerms->size(); i++) { + PatternTerm hotTerm = hotTerms->at(i).term; + ASSERT(hotTerm.type == PatternTerm::TypePatternCharacter); + + UChar characterNext = hotTerm.patternCharacter; + + // Append a character to an existing BeginChar object. + if (characterNext <= 0x7f) { + unsigned mask = 0; + + if (m_isCaseInsensitive && isASCIIAlpha(characterNext)) { + mask = 32; + characterNext = toASCIILower(characterNext); + } + + addCharacter(BeginChar(beginChar.value | (characterNext << 16), beginChar.mask | (mask << 16))); + } else { + UChar upper, lower; + if (m_isCaseInsensitive && ((upper = Unicode::toUpper(characterNext)) != (lower = Unicode::toLower(characterNext)))) { + addCharacter(BeginChar(beginChar.value | (upper << 16), beginChar.mask)); + addCharacter(BeginChar(beginChar.value | (lower << 16), beginChar.mask)); + } else + addCharacter(BeginChar(beginChar.value | (characterNext << 16), beginChar.mask)); + } + } + } + + Vector* m_beginChars; + bool m_isCaseInsensitive; +}; + +class YarrPatternConstructor { +public: + YarrPatternConstructor(YarrPattern& pattern) + : m_pattern(pattern) + , m_characterClassConstructor(pattern.m_ignoreCase) + , m_beginCharHelper(&pattern.m_beginChars, pattern.m_ignoreCase) + , m_invertParentheticalAssertion(false) + { + m_pattern.m_body = js::OffTheBooks::new_(); + m_alternative = m_pattern.m_body->addNewAlternative(); + m_pattern.m_disjunctions.append(m_pattern.m_body); + } + + ~YarrPatternConstructor() { } @@ -256,10 +362,19 @@ public: { m_pattern.reset(); m_characterClassConstructor.reset(); + + m_pattern.m_body = js::OffTheBooks::new_(); + m_alternative = m_pattern.m_body->addNewAlternative(); + m_pattern.m_disjunctions.append(m_pattern.m_body); } void assertionBOL() { + if (!m_alternative->m_terms.size() & !m_invertParentheticalAssertion) { + m_alternative->m_startsWithBOL = true; + m_alternative->m_containsBOL = true; + m_pattern.m_containsBOL = true; + } m_alternative->m_terms.append(PatternTerm::BOL()); } void assertionEOL() @@ -318,7 +433,7 @@ public: void atomCharacterClassBuiltIn(BuiltInCharacterClassID classID, bool invert) { - JS_ASSERT(classID != NewlineClassID); + ASSERT(classID != NewlineClassID); switch (classID) { case DigitClassID: @@ -334,7 +449,7 @@ public: break; default: - JS_NOT_REACHED("Invalid character class."); + ASSERT_NOT_REACHED(); } } @@ -351,36 +466,56 @@ public: if (capture) m_pattern.m_numSubpatterns++; - // FIXME: bug 574459 -- no NULL check PatternDisjunction* parenthesesDisjunction = js::OffTheBooks::new_(m_alternative); m_pattern.m_disjunctions.append(parenthesesDisjunction); - m_alternative->m_terms.append(PatternTerm(PatternTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction, capture)); + m_alternative->m_terms.append(PatternTerm(PatternTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction, capture, false)); m_alternative = parenthesesDisjunction->addNewAlternative(); } void atomParentheticalAssertionBegin(bool invert = false) { - // FIXME: bug 574459 -- no NULL check PatternDisjunction* parenthesesDisjunction = js::OffTheBooks::new_(m_alternative); m_pattern.m_disjunctions.append(parenthesesDisjunction); - m_alternative->m_terms.append(PatternTerm(PatternTerm::TypeParentheticalAssertion, m_pattern.m_numSubpatterns + 1, parenthesesDisjunction, invert)); + m_alternative->m_terms.append(PatternTerm(PatternTerm::TypeParentheticalAssertion, m_pattern.m_numSubpatterns + 1, parenthesesDisjunction, false, invert)); m_alternative = parenthesesDisjunction->addNewAlternative(); + m_invertParentheticalAssertion = invert; } void atomParenthesesEnd() { - JS_ASSERT(m_alternative->m_parent); - JS_ASSERT(m_alternative->m_parent->m_parent); + ASSERT(m_alternative->m_parent); + ASSERT(m_alternative->m_parent->m_parent); + + PatternDisjunction* parenthesesDisjunction = m_alternative->m_parent; m_alternative = m_alternative->m_parent->m_parent; - - m_alternative->lastTerm().parentheses.lastSubpatternId = m_pattern.m_numSubpatterns; + + PatternTerm& lastTerm = m_alternative->lastTerm(); + + unsigned numParenAlternatives = parenthesesDisjunction->m_alternatives.size(); + unsigned numBOLAnchoredAlts = 0; + + for (unsigned i = 0; i < numParenAlternatives; i++) { + // Bubble up BOL flags + if (parenthesesDisjunction->m_alternatives[i]->m_startsWithBOL) + numBOLAnchoredAlts++; + } + + if (numBOLAnchoredAlts) { + m_alternative->m_containsBOL = true; + // If all the alternatives in parens start with BOL, then so does this one + if (numBOLAnchoredAlts == numParenAlternatives) + m_alternative->m_startsWithBOL = true; + } + + lastTerm.parentheses.lastSubpatternId = m_pattern.m_numSubpatterns; + m_invertParentheticalAssertion = false; } void atomBackReference(unsigned subpatternId) { - JS_ASSERT(subpatternId); + ASSERT(subpatternId); m_pattern.m_containsBackreferences = true; - m_pattern.m_maxBackReference = JS_MAX(m_pattern.m_maxBackReference, subpatternId); + m_pattern.m_maxBackReference = std::max(m_pattern.m_maxBackReference, subpatternId); if (subpatternId > m_pattern.m_numSubpatterns) { m_alternative->m_terms.append(PatternTerm::ForwardReference()); @@ -388,14 +523,14 @@ public: } PatternAlternative* currentAlternative = m_alternative; - JS_ASSERT(currentAlternative); + ASSERT(currentAlternative); // Note to self: if we waited until the AST was baked, we could also remove forwards refs while ((currentAlternative = currentAlternative->m_parent->m_parent)) { PatternTerm& term = currentAlternative->lastTerm(); - JS_ASSERT((term.type == PatternTerm::TypeParenthesesSubpattern) || (term.type == PatternTerm::TypeParentheticalAssertion)); + ASSERT((term.type == PatternTerm::TypeParenthesesSubpattern) || (term.type == PatternTerm::TypeParentheticalAssertion)); - if ((term.type == PatternTerm::TypeParenthesesSubpattern) && term.invertOrCapture && (subpatternId == term.subpatternId)) { + if ((term.type == PatternTerm::TypeParenthesesSubpattern) && term.capture() && (subpatternId == term.parentheses.subpatternId)) { m_alternative->m_terms.append(PatternTerm::ForwardReference()); return; } @@ -404,37 +539,43 @@ public: m_alternative->m_terms.append(PatternTerm(subpatternId)); } - PatternDisjunction* copyDisjunction(PatternDisjunction* disjunction) + // deep copy the argument disjunction. If filterStartsWithBOL is true, + // skip alternatives with m_startsWithBOL set true. + PatternDisjunction* copyDisjunction(PatternDisjunction* disjunction, bool filterStartsWithBOL = false) { - // FIXME: bug 574459 -- no NULL check - PatternDisjunction* newDisjunction = js::OffTheBooks::new_(); - - newDisjunction->m_parent = disjunction->m_parent; - for (unsigned alt = 0; alt < disjunction->m_alternatives.length(); ++alt) { + PatternDisjunction* newDisjunction = 0; + for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) { PatternAlternative* alternative = disjunction->m_alternatives[alt]; - PatternAlternative* newAlternative = newDisjunction->addNewAlternative(); - for (unsigned i = 0; i < alternative->m_terms.length(); ++i) - newAlternative->m_terms.append(copyTerm(alternative->m_terms[i])); + if (!filterStartsWithBOL || !alternative->m_startsWithBOL) { + if (!newDisjunction) { + newDisjunction = js::OffTheBooks::new_(); + newDisjunction->m_parent = disjunction->m_parent; + } + PatternAlternative* newAlternative = newDisjunction->addNewAlternative(); + for (unsigned i = 0; i < alternative->m_terms.size(); ++i) + newAlternative->m_terms.append(copyTerm(alternative->m_terms[i], filterStartsWithBOL)); + } } - - m_pattern.m_disjunctions.append(newDisjunction); + + if (newDisjunction) + m_pattern.m_disjunctions.append(newDisjunction); return newDisjunction; } - - PatternTerm copyTerm(PatternTerm& term) + + PatternTerm copyTerm(PatternTerm& term, bool filterStartsWithBOL = false) { if ((term.type != PatternTerm::TypeParenthesesSubpattern) && (term.type != PatternTerm::TypeParentheticalAssertion)) return PatternTerm(term); - + PatternTerm termCopy = term; - termCopy.parentheses.disjunction = copyDisjunction(termCopy.parentheses.disjunction); + termCopy.parentheses.disjunction = copyDisjunction(termCopy.parentheses.disjunction, filterStartsWithBOL); return termCopy; } - + void quantifyAtom(unsigned min, unsigned max, bool greedy) { - JS_ASSERT(min <= max); - JS_ASSERT(m_alternative->m_terms.length()); + ASSERT(min <= max); + ASSERT(m_alternative->m_terms.size()); if (!max) { m_alternative->removeLastTerm(); @@ -442,8 +583,8 @@ public: } PatternTerm& term = m_alternative->lastTerm(); - JS_ASSERT(term.type > PatternTerm::TypeAssertionWordBoundary); - JS_ASSERT((term.quantityCount == 1) && (term.quantityType == QuantifierFixedCount)); + ASSERT(term.type > PatternTerm::TypeAssertionWordBoundary); + ASSERT((term.quantityCount == 1) && (term.quantityType == QuantifierFixedCount)); // For any assertion with a zero minimum, not matching is valid and has no effect, // remove it. Otherwise, we need to match as least once, but there is no point @@ -464,7 +605,7 @@ public: term.quantify(min, QuantifierFixedCount); m_alternative->m_terms.append(copyTerm(term)); // NOTE: this term is interesting from an analysis perspective, in that it can be ignored..... - m_alternative->lastTerm().quantify((max == UINT_MAX) ? max : max - min, greedy ? QuantifierGreedy : QuantifierNonGreedy); + m_alternative->lastTerm().quantify((max == quantifyInfinite) ? max : max - min, greedy ? QuantifierGreedy : QuantifierNonGreedy); if (m_alternative->lastTerm().type == PatternTerm::TypeParenthesesSubpattern) m_alternative->lastTerm().parentheses.isCopy = true; } @@ -475,26 +616,12 @@ public: m_alternative = m_alternative->m_parent->addNewAlternative(); } - void regexBegin() - { - // FIXME: bug 574459 -- no NULL check - m_pattern.m_body = js::OffTheBooks::new_(); - m_alternative = m_pattern.m_body->addNewAlternative(); - m_pattern.m_disjunctions.append(m_pattern.m_body); - } - void regexEnd() - { - } - void regexError() - { - } - unsigned setupAlternativeOffsets(PatternAlternative* alternative, unsigned currentCallFrameSize, unsigned initialInputPosition) { alternative->m_hasFixedSize = true; unsigned currentInputPosition = initialInputPosition; - for (unsigned i = 0; i < alternative->m_terms.length(); ++i) { + for (unsigned i = 0; i < alternative->m_terms.size(); ++i) { PatternTerm& term = alternative->m_terms[i]; switch (term.type) { @@ -507,7 +634,7 @@ public: case PatternTerm::TypeBackReference: term.inputPosition = currentInputPosition; term.frameLocation = currentCallFrameSize; - currentCallFrameSize += RegexStackSpaceForBackTrackInfoBackReference; + currentCallFrameSize += YarrStackSpaceForBackTrackInfoBackReference; alternative->m_hasFixedSize = false; break; @@ -518,7 +645,7 @@ public: term.inputPosition = currentInputPosition; if (term.quantityType != QuantifierFixedCount) { term.frameLocation = currentCallFrameSize; - currentCallFrameSize += RegexStackSpaceForBackTrackInfoPatternCharacter; + currentCallFrameSize += YarrStackSpaceForBackTrackInfoPatternCharacter; alternative->m_hasFixedSize = false; } else currentInputPosition += term.quantityCount; @@ -528,7 +655,7 @@ public: term.inputPosition = currentInputPosition; if (term.quantityType != QuantifierFixedCount) { term.frameLocation = currentCallFrameSize; - currentCallFrameSize += RegexStackSpaceForBackTrackInfoCharacterClass; + currentCallFrameSize += YarrStackSpaceForBackTrackInfoCharacterClass; alternative->m_hasFixedSize = false; } else currentInputPosition += term.quantityCount; @@ -539,20 +666,20 @@ public: term.frameLocation = currentCallFrameSize; if (term.quantityCount == 1 && !term.parentheses.isCopy) { if (term.quantityType != QuantifierFixedCount) - currentCallFrameSize += RegexStackSpaceForBackTrackInfoParenthesesOnce; + currentCallFrameSize += YarrStackSpaceForBackTrackInfoParenthesesOnce; currentCallFrameSize = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize, currentInputPosition); // If quantity is fixed, then pre-check its minimum size. if (term.quantityType == QuantifierFixedCount) currentInputPosition += term.parentheses.disjunction->m_minimumSize; term.inputPosition = currentInputPosition; } else if (term.parentheses.isTerminal) { - currentCallFrameSize += RegexStackSpaceForBackTrackInfoParenthesesTerminal; + currentCallFrameSize += YarrStackSpaceForBackTrackInfoParenthesesTerminal; currentCallFrameSize = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize, currentInputPosition); term.inputPosition = currentInputPosition; } else { term.inputPosition = currentInputPosition; setupDisjunctionOffsets(term.parentheses.disjunction, 0, currentInputPosition); - currentCallFrameSize += RegexStackSpaceForBackTrackInfoParentheses; + currentCallFrameSize += YarrStackSpaceForBackTrackInfoParentheses; } // Fixed count of 1 could be accepted, if they have a fixed size *AND* if all alternatives are of the same length. alternative->m_hasFixedSize = false; @@ -561,7 +688,7 @@ public: case PatternTerm::TypeParentheticalAssertion: term.inputPosition = currentInputPosition; term.frameLocation = currentCallFrameSize; - currentCallFrameSize = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize + RegexStackSpaceForBackTrackInfoParentheticalAssertion, currentInputPosition); + currentCallFrameSize = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize + YarrStackSpaceForBackTrackInfoParentheticalAssertion, currentInputPosition); break; } } @@ -572,23 +699,23 @@ public: unsigned setupDisjunctionOffsets(PatternDisjunction* disjunction, unsigned initialCallFrameSize, unsigned initialInputPosition) { - if ((disjunction != m_pattern.m_body) && (disjunction->m_alternatives.length() > 1)) - initialCallFrameSize += RegexStackSpaceForBackTrackInfoAlternative; + if ((disjunction != m_pattern.m_body) && (disjunction->m_alternatives.size() > 1)) + initialCallFrameSize += YarrStackSpaceForBackTrackInfoAlternative; unsigned minimumInputSize = UINT_MAX; unsigned maximumCallFrameSize = 0; bool hasFixedSize = true; - for (unsigned alt = 0; alt < disjunction->m_alternatives.length(); ++alt) { + for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) { PatternAlternative* alternative = disjunction->m_alternatives[alt]; unsigned currentAlternativeCallFrameSize = setupAlternativeOffsets(alternative, initialCallFrameSize, initialInputPosition); - minimumInputSize = JS_MIN(minimumInputSize, alternative->m_minimumSize); - maximumCallFrameSize = JS_MAX(maximumCallFrameSize, currentAlternativeCallFrameSize); + minimumInputSize = std::min(minimumInputSize, alternative->m_minimumSize); + maximumCallFrameSize = std::max(maximumCallFrameSize, currentAlternativeCallFrameSize); hasFixedSize &= alternative->m_hasFixedSize; } - JS_ASSERT(minimumInputSize != UINT_MAX); - JS_ASSERT(maximumCallFrameSize >= initialCallFrameSize); + ASSERT(minimumInputSize != UINT_MAX); + ASSERT(maximumCallFrameSize >= initialCallFrameSize); disjunction->m_hasFixedSize = hasFixedSize; disjunction->m_minimumSize = minimumInputSize; @@ -598,13 +725,14 @@ public: void setupOffsets() { - setupDisjunctionOffsets(m_pattern.m_body, BASE_FRAME_SIZE, 0); + setupDisjunctionOffsets(m_pattern.m_body, 0, 0); } // This optimization identifies sets of parentheses that we will never need to backtrack. // In these cases we do not need to store state from prior iterations. // We can presently avoid backtracking for: - // * a set of parens at the end of the regular expression (last term in any of the alternatives of the main body disjunction). + // * where the parens are at the end of the regular expression (last term in any of the + // alternatives of the main body disjunction). // * where the parens are non-capturing, and quantified unbounded greedy (*). // * where the parens do not contain any capturing subpatterns. void checkForTerminalParentheses() @@ -614,57 +742,239 @@ public: if (m_pattern.m_numSubpatterns) return; - js::Vector& alternatives = m_pattern.m_body->m_alternatives; - for (unsigned i =0; i < alternatives.length(); ++i) { - js::Vector& terms = alternatives[i]->m_terms; - if (terms.length()) { - PatternTerm& term = terms.back(); + Vector& alternatives = m_pattern.m_body->m_alternatives; + for (size_t i = 0; i < alternatives.size(); ++i) { + Vector& terms = alternatives[i]->m_terms; + if (terms.size()) { + PatternTerm& term = terms.last(); if (term.type == PatternTerm::TypeParenthesesSubpattern && term.quantityType == QuantifierGreedy - && term.quantityCount == UINT_MAX + && term.quantityCount == quantifyInfinite && !term.capture()) term.parentheses.isTerminal = true; } } } + void optimizeBOL() + { + // Look for expressions containing beginning of line (^) anchoring and unroll them. + // e.g. /^a|^b|c/ becomes /^a|^b|c/ which is executed once followed by /c/ which loops + // This code relies on the parsing code tagging alternatives with m_containsBOL and + // m_startsWithBOL and rolling those up to containing alternatives. + // At this point, this is only valid for non-multiline expressions. + PatternDisjunction* disjunction = m_pattern.m_body; + + if (!m_pattern.m_containsBOL || m_pattern.m_multiline) + return; + + PatternDisjunction* loopDisjunction = copyDisjunction(disjunction, true); + + // Set alternatives in disjunction to "onceThrough" + for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) + disjunction->m_alternatives[alt]->setOnceThrough(); + + if (loopDisjunction) { + // Move alternatives from loopDisjunction to disjunction + for (unsigned alt = 0; alt < loopDisjunction->m_alternatives.size(); ++alt) + disjunction->m_alternatives.append(loopDisjunction->m_alternatives[alt]); + + loopDisjunction->m_alternatives.clear(); + } + } + + // This function collects the terms which are potentially matching the first number of depth characters in the result. + // If this function returns false then it found at least one term which makes the beginning character + // look-up optimization inefficient. + bool setupDisjunctionBeginTerms(PatternDisjunction* disjunction, Vector* beginTerms, unsigned depth) + { + for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) { + PatternAlternative* alternative = disjunction->m_alternatives[alt]; + + if (!setupAlternativeBeginTerms(alternative, beginTerms, 0, depth)) + return false; + } + + return true; + } + + bool setupAlternativeBeginTerms(PatternAlternative* alternative, Vector* beginTerms, unsigned termIndex, unsigned depth) + { + bool checkNext = true; + unsigned numTerms = alternative->m_terms.size(); + + while (checkNext && termIndex < numTerms) { + PatternTerm term = alternative->m_terms[termIndex]; + checkNext = false; + + switch (term.type) { + case PatternTerm::TypeAssertionBOL: + case PatternTerm::TypeAssertionEOL: + case PatternTerm::TypeAssertionWordBoundary: + return false; + + case PatternTerm::TypeBackReference: + case PatternTerm::TypeForwardReference: + return false; + + case PatternTerm::TypePatternCharacter: + if (termIndex != numTerms - 1) { + beginTerms->append(TermChain(term)); + termIndex++; + checkNext = true; + } else if (term.quantityType == QuantifierFixedCount) { + beginTerms->append(TermChain(term)); + if (depth < 2 && termIndex < numTerms - 1 && term.quantityCount == 1) + if (!setupAlternativeBeginTerms(alternative, &beginTerms->last().hotTerms, termIndex + 1, depth + 1)) + return false; + } + + break; + + case PatternTerm::TypeCharacterClass: + return false; + + case PatternTerm::TypeParentheticalAssertion: + if (term.invert()) + return false; + + case PatternTerm::TypeParenthesesSubpattern: + if (term.quantityType != QuantifierFixedCount) { + if (termIndex == numTerms - 1) + break; + + termIndex++; + checkNext = true; + } + + if (!setupDisjunctionBeginTerms(term.parentheses.disjunction, beginTerms, depth)) + return false; + + break; + } + } + + return true; + } + + void setupBeginChars() + { + Vector beginTerms; + bool containsFixedCharacter = false; + + if ((!m_pattern.m_body->m_hasFixedSize || m_pattern.m_body->m_alternatives.size() > 1) + && setupDisjunctionBeginTerms(m_pattern.m_body, &beginTerms, 0)) { + unsigned size = beginTerms.size(); + + // If we haven't collected any terms we should abort the preparation of beginning character look-up optimization. + if (!size) + return; + + m_pattern.m_containsBeginChars = true; + + for (unsigned i = 0; i < size; i++) { + PatternTerm term = beginTerms[i].term; + + // We have just collected PatternCharacter terms, other terms are not allowed. + ASSERT(term.type == PatternTerm::TypePatternCharacter); + + if (term.quantityType == QuantifierFixedCount) + containsFixedCharacter = true; + + UChar character = term.patternCharacter; + unsigned mask = 0; + + if (character <= 0x7f) { + if (m_pattern.m_ignoreCase && isASCIIAlpha(character)) { + mask = 32; + character = toASCIILower(character); + } + + m_beginCharHelper.addBeginChar(BeginChar(character, mask), &beginTerms[i].hotTerms, term.quantityType, term.quantityCount); + } else { + UChar upper, lower; + if (m_pattern.m_ignoreCase && ((upper = Unicode::toUpper(character)) != (lower = Unicode::toLower(character)))) { + m_beginCharHelper.addBeginChar(BeginChar(upper, mask), &beginTerms[i].hotTerms, term.quantityType, term.quantityCount); + m_beginCharHelper.addBeginChar(BeginChar(lower, mask), &beginTerms[i].hotTerms, term.quantityType, term.quantityCount); + } else + m_beginCharHelper.addBeginChar(BeginChar(character, mask), &beginTerms[i].hotTerms, term.quantityType, term.quantityCount); + } + } + + // If the pattern doesn't contain terms with fixed quantifiers then the beginning character look-up optimization is inefficient. + if (!containsFixedCharacter) { + m_pattern.m_containsBeginChars = false; + return; + } + + size = m_pattern.m_beginChars.size(); + + if (size > 2) + m_beginCharHelper.merge(size - 1); + else if (size <= 1) + m_pattern.m_containsBeginChars = false; + } + } + private: - RegexPattern& m_pattern; + YarrPattern& m_pattern; PatternAlternative* m_alternative; CharacterClassConstructor m_characterClassConstructor; + BeginCharHelper m_beginCharHelper; bool m_invertCharacterClass; + bool m_invertParentheticalAssertion; }; - -int compileRegex(const UString& patternString, RegexPattern& pattern) +ErrorCode YarrPattern::compile(const UString& patternString) { - RegexPatternConstructor constructor(pattern); + YarrPatternConstructor constructor(*this); - if (int error = parse(constructor, patternString)) + if (ErrorCode error = parse(constructor, patternString)) return error; // If the pattern contains illegal backreferences reset & reparse. // Quoting Netscape's "What's new in JavaScript 1.2", // "Note: if the number of left parentheses is less than the number specified // in \#, the \# is taken as an octal escape as described in the next row." - if (pattern.containsIllegalBackReference()) { - unsigned numSubpatterns = pattern.m_numSubpatterns; + if (containsIllegalBackReference()) { + unsigned numSubpatterns = m_numSubpatterns; constructor.reset(); -#ifdef DEBUG - int error = +#if !ASSERT_DISABLED + ErrorCode error = #endif parse(constructor, patternString, numSubpatterns); - JS_ASSERT(!error); - JS_ASSERT(numSubpatterns == pattern.m_numSubpatterns); + ASSERT(!error); + ASSERT(numSubpatterns == m_numSubpatterns); } constructor.checkForTerminalParentheses(); + constructor.optimizeBOL(); + constructor.setupOffsets(); + constructor.setupBeginChars(); - return 0; + return NoError; } +YarrPattern::YarrPattern(const UString& pattern, bool ignoreCase, bool multiline, ErrorCode* error) + : m_ignoreCase(ignoreCase) + , m_multiline(multiline) + , m_containsBackreferences(false) + , m_containsBeginChars(false) + , m_containsBOL(false) + , m_numSubpatterns(0) + , m_maxBackReference(0) + , newlineCached(0) + , digitsCached(0) + , spacesCached(0) + , wordcharCached(0) + , nondigitsCached(0) + , nonspacesCached(0) + , nonwordcharCached(0) +{ + *error = compile(pattern); +} } } diff --git a/js/src/yarr/yarr/RegexPattern.h b/js/src/yarr/YarrPattern.h similarity index 70% rename from js/src/yarr/yarr/RegexPattern.h rename to js/src/yarr/YarrPattern.h index 9d9b286a653e..38ae10fcf289 100644 --- a/js/src/yarr/yarr/RegexPattern.h +++ b/js/src/yarr/YarrPattern.h @@ -1,5 +1,9 @@ -/* +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- + * vim: set ts=8 sw=4 et tw=99 ft=cpp: + * + * ***** BEGIN LICENSE BLOCK ***** * Copyright (C) 2009 Apple Inc. All rights reserved. + * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -21,26 +25,32 @@ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ + * + * ***** END LICENSE BLOCK ***** */ -#ifndef RegexPattern_h -#define RegexPattern_h - -#include "jsvector.h" -#include "yarr/jswtfbridge.h" -#include "yarr/yarr/RegexCommon.h" +#ifndef YarrPattern_h +#define YarrPattern_h +#include "wtfbridge.h" +#include "ASCIICType.h" namespace JSC { namespace Yarr { -#define RegexStackSpaceForBackTrackInfoPatternCharacter 1 // Only for !fixed quantifiers. -#define RegexStackSpaceForBackTrackInfoCharacterClass 1 // Only for !fixed quantifiers. -#define RegexStackSpaceForBackTrackInfoBackReference 2 -#define RegexStackSpaceForBackTrackInfoAlternative 1 // One per alternative. -#define RegexStackSpaceForBackTrackInfoParentheticalAssertion 1 -#define RegexStackSpaceForBackTrackInfoParenthesesOnce 1 // Only for !fixed quantifiers. -#define RegexStackSpaceForBackTrackInfoParenthesesTerminal 1 -#define RegexStackSpaceForBackTrackInfoParentheses 4 +enum ErrorCode { + NoError, + PatternTooLarge, + QuantifierOutOfOrder, + QuantifierWithoutAtom, + MissingParentheses, + ParenthesesUnmatched, + ParenthesesTypeInvalid, + CharacterClassUnmatched, + CharacterClassInvalidRange, + CharacterClassOutOfOrder, + EscapeUnterminated, + QuantifierTooLarge, + NumberOfErrorCodes +}; struct PatternDisjunction; @@ -55,60 +65,42 @@ struct CharacterRange { } }; -/* - * Wraps a table and indicates inversion. Can be efficiently borrowed - * between character classes, so it's refcounted. - */ -struct CharacterClassTable { - - JS_DECLARE_ALLOCATION_FRIENDS_FOR_PRIVATE_CONSTRUCTOR; - +struct CharacterClassTable : RefCounted { + friend class js::OffTheBooks; const char* m_table; bool m_inverted; - jsrefcount m_refcount; - - /* Ownership transferred to caller. */ - static CharacterClassTable *create(const char* table, bool inverted) + static PassRefPtr create(const char* table, bool inverted) { - // FIXME: bug 574459 -- no NULL checks done by any of the callers, all - // of which are in RegExpJitTables.h. - return js::OffTheBooks::new_(table, inverted); + return adoptRef(js::OffTheBooks::new_(table, inverted)); } - void incref() { JS_ATOMIC_INCREMENT(&m_refcount); } - void decref() { if (JS_ATOMIC_DECREMENT(&m_refcount) == 0) js::Foreground::delete_(this); } - private: CharacterClassTable(const char* table, bool inverted) : m_table(table) , m_inverted(inverted) - , m_refcount(0) { } }; struct CharacterClass { + WTF_MAKE_FAST_ALLOCATED +public: // All CharacterClass instances have to have the full set of matches and ranges, // they may have an optional table for faster lookups (which must match the // specified matches and ranges) - CharacterClass(CharacterClassTable *table) + CharacterClass(PassRefPtr table) : m_table(table) { - if (m_table) - m_table->incref(); } ~CharacterClass() { - if (m_table) - m_table->decref(); + js::Foreground::delete_(m_table.get()); } - typedef js::Vector UChars; - typedef js::Vector CharacterRanges; - UChars m_matches; - CharacterRanges m_ranges; - UChars m_matchesUnicode; - CharacterRanges m_rangesUnicode; - CharacterClassTable *m_table; + Vector m_matches; + Vector m_ranges; + Vector m_matchesUnicode; + Vector m_rangesUnicode; + RefPtr m_table; }; enum QuantifierType { @@ -129,11 +121,12 @@ struct PatternTerm { TypeParenthesesSubpattern, TypeParentheticalAssertion } type; - bool invertOrCapture; + bool m_capture :1; + bool m_invert :1; union { UChar patternCharacter; CharacterClass* characterClass; - unsigned subpatternId; + unsigned backReferenceSubpatternId; struct { PatternDisjunction* disjunction; unsigned subpatternId; @@ -147,8 +140,21 @@ struct PatternTerm { int inputPosition; unsigned frameLocation; + // No-argument constructor for js::Vector. + PatternTerm() + : type(PatternTerm::TypePatternCharacter) + , m_capture(false) + , m_invert(false) + { + patternCharacter = 0; + quantityType = QuantifierFixedCount; + quantityCount = 1; + } + PatternTerm(UChar ch) : type(PatternTerm::TypePatternCharacter) + , m_capture(false) + , m_invert(false) { patternCharacter = ch; quantityType = QuantifierFixedCount; @@ -157,16 +163,18 @@ struct PatternTerm { PatternTerm(CharacterClass* charClass, bool invert) : type(PatternTerm::TypeCharacterClass) - , invertOrCapture(invert) + , m_capture(false) + , m_invert(invert) { characterClass = charClass; quantityType = QuantifierFixedCount; quantityCount = 1; } - PatternTerm(Type type, unsigned subpatternId, PatternDisjunction* disjunction, bool invertOrCapture) + PatternTerm(Type type, unsigned subpatternId, PatternDisjunction* disjunction, bool capture = false, bool invert = false) : type(type) - , invertOrCapture(invertOrCapture) + , m_capture(capture) + , m_invert(invert) { parentheses.disjunction = disjunction; parentheses.subpatternId = subpatternId; @@ -178,7 +186,8 @@ struct PatternTerm { PatternTerm(Type type, bool invert = false) : type(type) - , invertOrCapture(invert) + , m_capture(false) + , m_invert(invert) { quantityType = QuantifierFixedCount; quantityCount = 1; @@ -186,9 +195,10 @@ struct PatternTerm { PatternTerm(unsigned spatternId) : type(TypeBackReference) - , invertOrCapture(false) + , m_capture(false) + , m_invert(false) { - subpatternId = spatternId; + backReferenceSubpatternId = spatternId; quantityType = QuantifierFixedCount; quantityCount = 1; } @@ -215,12 +225,12 @@ struct PatternTerm { bool invert() { - return invertOrCapture; + return m_invert; } bool capture() { - return invertOrCapture; + return m_capture; } void quantify(unsigned count, QuantifierType type) @@ -231,9 +241,8 @@ struct PatternTerm { }; struct PatternAlternative { - - JS_DECLARE_ALLOCATION_FRIENDS_FOR_PRIVATE_CONSTRUCTOR; - + WTF_MAKE_FAST_ALLOCATED +public: PatternAlternative(PatternDisjunction* disjunction) : m_parent(disjunction) , m_onceThrough(false) @@ -245,14 +254,14 @@ struct PatternAlternative { PatternTerm& lastTerm() { - JS_ASSERT(m_terms.length()); - return m_terms[m_terms.length() - 1]; + ASSERT(m_terms.size()); + return m_terms[m_terms.size() - 1]; } void removeLastTerm() { - JS_ASSERT(m_terms.length()); - m_terms.popBack(); + ASSERT(m_terms.size()); + m_terms.shrink(m_terms.size() - 1); } void setOnceThrough() @@ -265,7 +274,7 @@ struct PatternAlternative { return m_onceThrough; } - js::Vector m_terms; + Vector m_terms; PatternDisjunction* m_parent; unsigned m_minimumSize; bool m_onceThrough : 1; @@ -274,18 +283,9 @@ struct PatternAlternative { bool m_containsBOL : 1; }; -template -static inline void -deleteAllValues(js::Vector &vector) -{ - for (T** t = vector.begin(); t < vector.end(); ++t) - js::Foreground::delete_(*t); -} - struct PatternDisjunction { - - JS_DECLARE_ALLOCATION_FRIENDS_FOR_PRIVATE_CONSTRUCTOR; - + WTF_MAKE_FAST_ALLOCATED +public: PatternDisjunction(PatternAlternative* parent = 0) : m_parent(parent) , m_hasFixedSize(false) @@ -299,13 +299,12 @@ struct PatternDisjunction { PatternAlternative* addNewAlternative() { - // FIXME: bug 574459 -- no NULL check PatternAlternative* alternative = js::OffTheBooks::new_(this); m_alternatives.append(alternative); return alternative; } - js::Vector m_alternatives; + Vector m_alternatives; PatternAlternative* m_parent; unsigned m_minimumSize; unsigned m_callFrameSize; @@ -314,7 +313,7 @@ struct PatternDisjunction { // You probably don't want to be calling these functions directly // (please to be calling newlineCharacterClass() et al on your -// friendly neighborhood RegexPattern instance to get nicely +// friendly neighborhood YarrPattern instance to get nicely // cached copies). CharacterClass* newlineCreate(); CharacterClass* digitsCreate(); @@ -324,25 +323,34 @@ CharacterClass* nondigitsCreate(); CharacterClass* nonspacesCreate(); CharacterClass* nonwordcharCreate(); -struct RegexPattern { - RegexPattern(bool ignoreCase, bool multiline) - : m_ignoreCase(ignoreCase) - , m_multiline(multiline) - , m_containsBackreferences(false) - , m_containsBOL(false) - , m_numSubpatterns(0) - , m_maxBackReference(0) - , newlineCached(0) - , digitsCached(0) - , spacesCached(0) - , wordcharCached(0) - , nondigitsCached(0) - , nonspacesCached(0) - , nonwordcharCached(0) - { - } +struct TermChain { + TermChain(PatternTerm term) + : term(term) + {} - ~RegexPattern() + PatternTerm term; + Vector hotTerms; +}; + +struct BeginChar { + BeginChar() + : value(0) + , mask(0) + {} + + BeginChar(unsigned value, unsigned mask) + : value(value) + , mask(mask) + {} + + unsigned value; + unsigned mask; +}; + +struct YarrPattern { + YarrPattern(const UString& pattern, bool ignoreCase, bool multiline, ErrorCode* error); + + ~YarrPattern() { deleteAllValues(m_disjunctions); deleteAllValues(m_userCharacterClasses); @@ -354,6 +362,7 @@ struct RegexPattern { m_maxBackReference = 0; m_containsBackreferences = false; + m_containsBeginChars = false; m_containsBOL = false; newlineCached = 0; @@ -368,6 +377,7 @@ struct RegexPattern { m_disjunctions.clear(); deleteAllValues(m_userCharacterClasses); m_userCharacterClasses.clear(); + m_beginChars.clear(); } bool containsIllegalBackReference() @@ -418,19 +428,21 @@ struct RegexPattern { return nonwordcharCached; } - typedef js::Vector PatternDisjunctions; - typedef js::Vector CharacterClasses; bool m_ignoreCase : 1; bool m_multiline : 1; bool m_containsBackreferences : 1; + bool m_containsBeginChars : 1; bool m_containsBOL : 1; unsigned m_numSubpatterns; unsigned m_maxBackReference; - PatternDisjunction *m_body; - PatternDisjunctions m_disjunctions; - CharacterClasses m_userCharacterClasses; + PatternDisjunction* m_body; + Vector m_disjunctions; + Vector m_userCharacterClasses; + Vector m_beginChars; private: + ErrorCode compile(const UString& patternString); + CharacterClass* newlineCached; CharacterClass* digitsCached; CharacterClass* spacesCached; @@ -442,4 +454,4 @@ private: } } // namespace JSC::Yarr -#endif // RegexPattern_h +#endif // YarrPattern_h diff --git a/js/src/yarr/yarr/RegexCommon.h b/js/src/yarr/YarrSyntaxChecker.cpp similarity index 52% rename from js/src/yarr/yarr/RegexCommon.h rename to js/src/yarr/YarrSyntaxChecker.cpp index 3ae337ea62cc..f36ac5a3f5bc 100644 --- a/js/src/yarr/yarr/RegexCommon.h +++ b/js/src/yarr/YarrSyntaxChecker.cpp @@ -1,5 +1,8 @@ -/* - * Copyright (C) 2009 Apple Inc. All rights reserved. +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- + * vim: set ts=8 sw=4 et tw=99 ft=cpp: + * + * ***** BEGIN LICENSE BLOCK ***** + * Copyright (C) 2011 Apple Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -21,30 +24,39 @@ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ + * + * ***** END LICENSE BLOCK ***** */ -#ifndef RegexCommon_h -#define RegexCommon_h +#include "YarrSyntaxChecker.h" + +#include "YarrParser.h" namespace JSC { namespace Yarr { -enum ErrorCode { - HitRecursionLimit = -2, - NoError = 0, - PatternTooLarge, - QuantifierOutOfOrder, - QuantifierWithoutAtom, - MissingParentheses, - ParenthesesUnmatched, - ParenthesesTypeInvalid, - CharacterClassUnmatched, - CharacterClassOutOfOrder, - CharacterClassRangeSingleChar, - EscapeUnterminated, - QuantifierTooLarge, - NumberOfErrorCodes +class SyntaxChecker { +public: + void assertionBOL() {} + void assertionEOL() {} + void assertionWordBoundary(bool) {} + void atomPatternCharacter(UChar) {} + void atomBuiltInCharacterClass(BuiltInCharacterClassID, bool) {} + void atomCharacterClassBegin(bool = false) {} + void atomCharacterClassAtom(UChar) {} + void atomCharacterClassRange(UChar, UChar) {} + void atomCharacterClassBuiltIn(BuiltInCharacterClassID, bool) {} + void atomCharacterClassEnd() {} + void atomParenthesesSubpatternBegin(bool = true) {} + void atomParentheticalAssertionBegin(bool = false) {} + void atomParenthesesEnd() {} + void atomBackReference(unsigned) {} + void quantifyAtom(unsigned, unsigned, bool) {} + void disjunction() {} }; -}} +ErrorCode checkSyntax(const UString& pattern) +{ + SyntaxChecker syntaxChecker; + return parse(syntaxChecker, pattern); +} -#endif +}} // JSC::YARR diff --git a/js/src/yarr/yarr/RegexCompiler.h b/js/src/yarr/YarrSyntaxChecker.h similarity index 74% rename from js/src/yarr/yarr/RegexCompiler.h rename to js/src/yarr/YarrSyntaxChecker.h index 307c15866e59..87f2ed5093b0 100644 --- a/js/src/yarr/yarr/RegexCompiler.h +++ b/js/src/yarr/YarrSyntaxChecker.h @@ -1,5 +1,8 @@ -/* - * Copyright (C) 2009 Apple Inc. All rights reserved. +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- + * vim: set ts=8 sw=4 et tw=99 ft=cpp: + * + * ***** BEGIN LICENSE BLOCK ***** + * Copyright (C) 2011 Apple Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -21,18 +24,20 @@ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ + * + * ***** END LICENSE BLOCK ***** */ -#ifndef RegexCompiler_h -#define RegexCompiler_h +#ifndef YarrSyntaxChecker_h +#define YarrSyntaxChecker_h -#include "RegexParser.h" -#include "RegexPattern.h" +#include "wtfbridge.h" +#include "YarrParser.h" namespace JSC { namespace Yarr { -int compileRegex(const UString& patternString, RegexPattern& pattern); +ErrorCode checkSyntax(const UString& pattern); -} } // namespace JSC::Yarr +}} // JSC::YARR + +#endif // YarrSyntaxChecker_h -#endif // RegexCompiler_h diff --git a/js/src/yarr/jswtfbridge.h b/js/src/yarr/jswtfbridge.h deleted file mode 100644 index b38f76ead5a8..000000000000 --- a/js/src/yarr/jswtfbridge.h +++ /dev/null @@ -1,61 +0,0 @@ -/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- - * vim: set ts=8 sw=4 et tw=99 ft=cpp: - * - * ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is Mozilla SpiderMonkey JavaScript 1.9 code, released - * June 12, 2009. - * - * The Initial Developer of the Original Code is - * the Mozilla Corporation. - * - * Contributor(s): - * Chris Leary - * - * Alternatively, the contents of this file may be used under the terms of - * either of the GNU General Public License Version 2 or later (the "GPL"), - * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -#ifndef jswtfbridge_h__ -#define jswtfbridge_h__ - -/* - * The JS/WTF Bridge to Bona-fide Quality. - */ - -#include "assembler/wtf/Platform.h" -#include "jsstr.h" -#include "jsprvtd.h" -#include "jstl.h" - -typedef jschar UChar; -typedef JSLinearString UString; - -class Unicode { - public: - static UChar toUpper(UChar c) { return JS_TOUPPER(c); } - static UChar toLower(UChar c) { return JS_TOLOWER(c); } -}; - -#endif diff --git a/js/src/yarr/pcre/AUTHORS b/js/src/yarr/pcre/AUTHORS deleted file mode 100644 index dbac2a54834b..000000000000 --- a/js/src/yarr/pcre/AUTHORS +++ /dev/null @@ -1,12 +0,0 @@ -Originally written by: Philip Hazel -Email local part: ph10 -Email domain: cam.ac.uk - -University of Cambridge Computing Service, -Cambridge, England. Phone: +44 1223 334714. - -Copyright (c) 1997-2005 University of Cambridge. All rights reserved. - -Adapted for JavaScriptCore and WebKit by Apple Inc. - -Copyright (c) 2005, 2006, 2007 Apple Inc. All rights reserved. diff --git a/js/src/yarr/pcre/COPYING b/js/src/yarr/pcre/COPYING deleted file mode 100644 index 6ffdc24342d5..000000000000 --- a/js/src/yarr/pcre/COPYING +++ /dev/null @@ -1,35 +0,0 @@ -PCRE is a library of functions to support regular expressions whose syntax -and semantics are as close as possible to those of the Perl 5 language. - -This is JavaScriptCore's variant of the PCRE library. While this library -started out as a copy of PCRE, many of the features of PCRE have been -removed. - -Copyright (c) 1997-2005 University of Cambridge. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of the University of Cambridge nor the name of Apple - Inc. nor the names of their contributors may be used to endorse or - promote products derived from this software without specific prior - written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. diff --git a/js/src/yarr/pcre/chartables.c b/js/src/yarr/pcre/chartables.c deleted file mode 100644 index 5c99db0b980f..000000000000 --- a/js/src/yarr/pcre/chartables.c +++ /dev/null @@ -1,96 +0,0 @@ -/************************************************* -* Perl-Compatible Regular Expressions * -*************************************************/ - -/* This file is automatically written by the dftables auxiliary -program. If you edit it by hand, you might like to edit the Makefile to -prevent its ever being regenerated. - -This file contains the default tables for characters with codes less than -128 (ASCII characters). These tables are used when no external tables are -passed to PCRE. */ - -const unsigned char jsc_pcre_default_tables[480] = { - -/* This table is a lower casing table. */ - - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, - 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, - 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, - 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, - 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, - 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, - 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, - 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F, - 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, - 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, - 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, - 0x78, 0x79, 0x7A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F, - 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, - 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, - 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, - 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, - -/* This table is a case flipping table. */ - - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, - 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, - 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, - 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, - 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, - 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, - 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, - 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F, - 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, - 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, - 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, - 0x78, 0x79, 0x7A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F, - 0x60, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, - 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, - 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, - 0x58, 0x59, 0x5A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, - -/* This table contains bit maps for various character classes. -Each map is 32 bytes long and the bits run from the least -significant end of each byte. The classes are: space, digit, word. */ - - 0x00, 0x3E, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0x03, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0x03, - 0xFE, 0xFF, 0xFF, 0x87, 0xFE, 0xFF, 0xFF, 0x07, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - -/* This table identifies various classes of character by individual bits: - 0x01 white space character - 0x08 hexadecimal digit - 0x10 alphanumeric or '_' -*/ - - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0- 7 */ - 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, /* 8- 15 */ - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 16- 23 */ - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 24- 31 */ - 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* - ' */ - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* ( - / */ - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, /* 0 - 7 */ - 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 8 - ? */ - 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x10, /* @ - G */ - 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, /* H - O */ - 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, /* P - W */ - 0x10, 0x10, 0x10, 0x00, 0x00, 0x00, 0x00, 0x10, /* X - _ */ - 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x10, /* ` - g */ - 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, /* h - o */ - 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, /* p - w */ - 0x10, 0x10, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00}; /* x -127 */ - - -/* End of chartables.c */ diff --git a/js/src/yarr/pcre/dftables b/js/src/yarr/pcre/dftables deleted file mode 100644 index 669b948ffc91..000000000000 --- a/js/src/yarr/pcre/dftables +++ /dev/null @@ -1,273 +0,0 @@ -#!/usr/bin/perl -w -# -# This is JavaScriptCore's variant of the PCRE library. While this library -# started out as a copy of PCRE, many of the features of PCRE have been -# removed. This library now supports only the regular expression features -# required by the JavaScript language specification, and has only the functions -# needed by JavaScriptCore and the rest of WebKit. -# -# Originally written by Philip Hazel -# Copyright (c) 1997-2006 University of Cambridge -# Copyright (C) 2002, 2004, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. -# -# ----------------------------------------------------------------------------- -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# * Neither the name of the University of Cambridge nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -# POSSIBILITY OF SUCH DAMAGE. -# ----------------------------------------------------------------------------- - -# This is a freestanding support program to generate a file containing -# character tables. The tables are built according to the default C -# locale. - -use strict; - -use File::Basename; -use File::Spec; -use File::Temp qw(tempfile); -use Getopt::Long; - -sub readHeaderValues(); - -my %pcre_internal; - -if (scalar(@ARGV) < 1) { - print STDERR "Usage: ", basename($0), " [--preprocessor=program] output-file\n"; - exit 1; -} - -my $outputFile; -my $preprocessor; -GetOptions('preprocessor=s' => \$preprocessor); -if (not $preprocessor) { - $preprocessor = "cpp"; -} - -$outputFile = $ARGV[0]; -die('Must specify output file.') unless defined($outputFile); - -readHeaderValues(); - -open(OUT, ">", $outputFile) or die "$!"; -binmode(OUT); - -printf(OUT - "/*************************************************\n" . - "* Perl-Compatible Regular Expressions *\n" . - "*************************************************/\n\n" . - "/* This file is automatically written by the dftables auxiliary \n" . - "program. If you edit it by hand, you might like to edit the Makefile to \n" . - "prevent its ever being regenerated.\n\n"); -printf(OUT - "This file contains the default tables for characters with codes less than\n" . - "128 (ASCII characters). These tables are used when no external tables are\n" . - "passed to PCRE. */\n\n" . - "const unsigned char jsc_pcre_default_tables[%d] = {\n\n" . - "/* This table is a lower casing table. */\n\n", $pcre_internal{tables_length}); - -if ($pcre_internal{lcc_offset} != 0) { - die "lcc_offset != 0"; -} - -printf(OUT " "); -for (my $i = 0; $i < 128; $i++) { - if (($i & 7) == 0 && $i != 0) { - printf(OUT "\n "); - } - printf(OUT "0x%02X", ord(lc(chr($i)))); - if ($i != 127) { - printf(OUT ", "); - } -} -printf(OUT ",\n\n"); - -printf(OUT "/* This table is a case flipping table. */\n\n"); - -if ($pcre_internal{fcc_offset} != 128) { - die "fcc_offset != 128"; -} - -printf(OUT " "); -for (my $i = 0; $i < 128; $i++) { - if (($i & 7) == 0 && $i != 0) { - printf(OUT "\n "); - } - my $c = chr($i); - printf(OUT "0x%02X", $c =~ /[[:lower:]]/ ? ord(uc($c)) : ord(lc($c))); - if ($i != 127) { - printf(OUT ", "); - } -} -printf(OUT ",\n\n"); - -printf(OUT - "/* This table contains bit maps for various character classes.\n" . - "Each map is 32 bytes long and the bits run from the least\n" . - "significant end of each byte. The classes are: space, digit, word. */\n\n"); - -if ($pcre_internal{cbits_offset} != $pcre_internal{fcc_offset} + 128) { - die "cbits_offset != fcc_offset + 128"; -} - -my @cbit_table = (0) x $pcre_internal{cbit_length}; -for (my $i = ord('0'); $i <= ord('9'); $i++) { - $cbit_table[$pcre_internal{cbit_digit} + $i / 8] |= 1 << ($i & 7); -} -$cbit_table[$pcre_internal{cbit_word} + ord('_') / 8] |= 1 << (ord('_') & 7); -for (my $i = 0; $i < 128; $i++) { - my $c = chr($i); - if ($c =~ /[[:alnum:]]/) { - $cbit_table[$pcre_internal{cbit_word} + $i / 8] |= 1 << ($i & 7); - } - if ($c =~ /[[:space:]]/) { - $cbit_table[$pcre_internal{cbit_space} + $i / 8] |= 1 << ($i & 7); - } -} - -printf(OUT " "); -for (my $i = 0; $i < $pcre_internal{cbit_length}; $i++) { - if (($i & 7) == 0 && $i != 0) { - if (($i & 31) == 0) { - printf(OUT "\n"); - } - printf(OUT "\n "); - } - printf(OUT "0x%02X", $cbit_table[$i]); - if ($i != $pcre_internal{cbit_length} - 1) { - printf(OUT ", "); - } -} -printf(OUT ",\n\n"); - -printf(OUT - "/* This table identifies various classes of character by individual bits:\n" . - " 0x%02x white space character\n" . - " 0x%02x hexadecimal digit\n" . - " 0x%02x alphanumeric or '_'\n*/\n\n", - $pcre_internal{ctype_space}, $pcre_internal{ctype_xdigit}, $pcre_internal{ctype_word}); - -if ($pcre_internal{ctypes_offset} != $pcre_internal{cbits_offset} + $pcre_internal{cbit_length}) { - die "ctypes_offset != cbits_offset + cbit_length"; -} - -printf(OUT " "); -for (my $i = 0; $i < 128; $i++) { - my $x = 0; - my $c = chr($i); - if ($c =~ /[[:space:]]/) { - $x += $pcre_internal{ctype_space}; - } - if ($c =~ /[[:xdigit:]]/) { - $x += $pcre_internal{ctype_xdigit}; - } - if ($c =~ /[[:alnum:]_]/) { - $x += $pcre_internal{ctype_word}; - } - printf(OUT "0x%02X", $x); - if ($i != 127) { - printf(OUT ", "); - } else { - printf(OUT "};"); - } - if (($i & 7) == 7) { - printf(OUT " /* "); - my $d = chr($i - 7); - if ($d =~ /[[:print:]]/) { - printf(OUT " %c -", $i - 7); - } else { - printf(OUT "%3d-", $i - 7); - } - if ($c =~ m/[[:print:]]/) { - printf(OUT " %c ", $i); - } else { - printf(OUT "%3d", $i); - } - printf(OUT " */\n"); - if ($i != 127) { - printf(OUT " "); - } - } -} - -if ($pcre_internal{tables_length} != $pcre_internal{ctypes_offset} + 128) { - die "tables_length != ctypes_offset + 128"; -} - -printf(OUT "\n\n/* End of chartables.c */\n"); - -close(OUT); - -exit 0; - -sub readHeaderValues() -{ - my @variables = qw( - cbit_digit - cbit_length - cbit_space - cbit_word - cbits_offset - ctype_space - ctype_word - ctype_xdigit - ctypes_offset - fcc_offset - lcc_offset - tables_length - ); - - local $/ = undef; - - my $headerPath = File::Spec->catfile(dirname($0), "pcre_internal.h"); - - my ($fh, $tempFile) = tempfile( - basename($0) . "-XXXXXXXX", - DIR => File::Spec->tmpdir(), - SUFFIX => ".in", - UNLINK => 0, - ); - - print $fh "#define DFTABLES\n\n"; - - open(HEADER, "<", $headerPath) or die "$!"; - print $fh
; - close(HEADER); - - print $fh "\n\n"; - - for my $v (@variables) { - print $fh "\$pcre_internal{\"$v\"} = $v;\n"; - } - - close($fh); - - open(CPP, "$preprocessor \"$tempFile\" |") or die "$!"; - my $content = ; - close(CPP); - - eval $content; - die "$@" if $@; - unlink $tempFile; -} diff --git a/js/src/yarr/pcre/pcre.h b/js/src/yarr/pcre/pcre.h deleted file mode 100644 index 91d96b784905..000000000000 --- a/js/src/yarr/pcre/pcre.h +++ /dev/null @@ -1,68 +0,0 @@ -/* This is the public header file for JavaScriptCore's variant of the PCRE -library. While this library started out as a copy of PCRE, many of the -features of PCRE have been removed. This library now supports only the -regular expression features required by the JavaScript language -specification, and has only the functions needed by JavaScriptCore and the -rest of WebKit. - - Copyright (c) 1997-2005 University of Cambridge - Copyright (C) 2002, 2004, 2006, 2007 Apple Inc. All rights reserved. - ------------------------------------------------------------------------------ -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of the University of Cambridge nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. ------------------------------------------------------------------------------ -*/ - -// FIXME: This file needs to be renamed to JSRegExp.h; it's no longer PCRE. - -#ifndef JSRegExp_h -#define JSRegExp_h - -#include "yarr/jswtfbridge.h" - -struct JSRegExp; -struct JSContext; - -enum JSRegExpIgnoreCaseOption { JSRegExpDoNotIgnoreCase, JSRegExpIgnoreCase }; -enum JSRegExpMultilineOption { JSRegExpSingleLine, JSRegExpMultiline }; - -/* jsRegExpExecute error codes */ -const int JSRegExpErrorNoMatch = -1; -const int JSRegExpErrorHitLimit = -2; -const int JSRegExpErrorInternal = -4; - -JSRegExp* jsRegExpCompile(const UChar* pattern, int patternLength, - JSRegExpIgnoreCaseOption, JSRegExpMultilineOption, - unsigned* numSubpatterns, int *error); - -int jsRegExpExecute(JSContext *, const JSRegExp*, - const UChar* subject, int subjectLength, int startOffset, - int* offsetsVector, int offsetsVectorLength); - -void jsRegExpFree(JSRegExp*); - -#endif diff --git a/js/src/yarr/pcre/pcre.pri b/js/src/yarr/pcre/pcre.pri deleted file mode 100644 index 4f59e17f4d91..000000000000 --- a/js/src/yarr/pcre/pcre.pri +++ /dev/null @@ -1,12 +0,0 @@ -# Perl Compatible Regular Expressions - Qt4 build info -VPATH += $$PWD -INCLUDEPATH += $$PWD $$OUTPUT_DIR/JavaScriptCore/tmp -DEPENDPATH += $$PWD - -SOURCES += \ - pcre_compile.cpp \ - pcre_exec.cpp \ - pcre_tables.cpp \ - pcre_ucp_searchfuncs.cpp \ - pcre_xclass.cpp - diff --git a/js/src/yarr/pcre/pcre_compile.cpp b/js/src/yarr/pcre/pcre_compile.cpp deleted file mode 100644 index 8d273bcbe5a6..000000000000 --- a/js/src/yarr/pcre/pcre_compile.cpp +++ /dev/null @@ -1,2702 +0,0 @@ -/* This is JavaScriptCore's variant of the PCRE library. While this library -started out as a copy of PCRE, many of the features of PCRE have been -removed. This library now supports only the regular expression features -required by the JavaScript language specification, and has only the functions -needed by JavaScriptCore and the rest of WebKit. - - Originally written by Philip Hazel - Copyright (c) 1997-2006 University of Cambridge - Copyright (C) 2002, 2004, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. - Copyright (C) 2007 Eric Seidel - ------------------------------------------------------------------------------ -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of the University of Cambridge nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. ------------------------------------------------------------------------------ -*/ - -/* This module contains the external function jsRegExpExecute(), along with -supporting internal functions that are not used by other modules. */ - -#include "pcre_internal.h" - -#include -#include "yarr/wtf/ASCIICType.h" -#include "jsvector.h" - -using namespace WTF; - -/* Negative values for the firstchar and reqchar variables */ - -#define REQ_UNSET (-2) -#define REQ_NONE (-1) - -/************************************************* -* Code parameters and static tables * -*************************************************/ - -/* Maximum number of items on the nested bracket stacks at compile time. This -applies to the nesting of all kinds of parentheses. It does not limit -un-nested, non-capturing parentheses. This number can be made bigger if -necessary - it is used to dimension one int and one unsigned char vector at -compile time. */ - -#define BRASTACK_SIZE 200 - -/* Table for handling escaped characters in the range '0'-'z'. Positive returns -are simple data values; negative values are for special things like \d and so -on. Zero means further processing is needed (for things like \x), or the escape -is invalid. */ - -static const short escapes[] = { - 0, 0, 0, 0, 0, 0, 0, 0, /* 0 - 7 */ - 0, 0, ':', ';', '<', '=', '>', '?', /* 8 - ? */ - '@', 0, -ESC_B, 0, -ESC_D, 0, 0, 0, /* @ - G */ - 0, 0, 0, 0, 0, 0, 0, 0, /* H - O */ - 0, 0, 0, -ESC_S, 0, 0, 0, -ESC_W, /* P - W */ - 0, 0, 0, '[', '\\', ']', '^', '_', /* X - _ */ - '`', 7, -ESC_b, 0, -ESC_d, 0, '\f', 0, /* ` - g */ - 0, 0, 0, 0, 0, 0, '\n', 0, /* h - o */ - 0, 0, '\r', -ESC_s, '\t', 0, '\v', -ESC_w, /* p - w */ - 0, 0, 0 /* x - z */ -}; -static const unsigned OPCODE_LEN = 1; -static const unsigned BRAZERO_LEN = OPCODE_LEN; -static const unsigned BRA_NEST_SIZE = 2; -static const unsigned BRA_LEN = OPCODE_LEN + LINK_SIZE + BRA_NEST_SIZE; -static const unsigned KET_LEN = OPCODE_LEN + LINK_SIZE; - -/* Error code numbers. They are given names so that they can more easily be -tracked. */ - -enum ErrorCode { - ERR0, ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9, - ERR10, ERR11, ERR12, ERR13, ERR14, ERR15, ERR16, ERR17 -}; - -/* These are the error texts that correspond to the above error codes: - // 1 - "\\ at end of pattern\0" - "\\c at end of pattern\0" - "character value in \\x{...} sequence is too large\0" - "numbers out of order in {} quantifier\0" - // 5 - "number too big in {} quantifier\0" - "missing terminating ] for character class\0" - "internal error: code overflow\0" - "range out of order in character class\0" - "nothing to repeat\0" - // 10 - "unmatched parentheses\0" - "internal error: unexpected repeat\0" - "unrecognized character after (?\0" - "failed to get memory\0" - "missing )\0" - // 15 - "reference to non-existent subpattern\0" - "regular expression too large\0" - "parentheses nested too deeply" */ - -/* Structure for passing "static" information around between the functions -doing the compiling. */ - -struct CompileData { - CompileData() { - topBackref = 0; - backrefMap = 0; - reqVaryOpt = 0; - needOuterBracket = false; - numCapturingBrackets = 0; - } - int topBackref; /* Maximum back reference */ - unsigned backrefMap; /* Bitmap of low back refs */ - int reqVaryOpt; /* "After variable item" flag for reqByte */ - bool needOuterBracket; - int numCapturingBrackets; -}; - -/* Definitions to allow mutual recursion */ - -static bool compileBracket(int, int*, unsigned char**, const UChar**, const UChar*, ErrorCode*, int, int*, int*, CompileData&); -static bool bracketIsAnchored(const unsigned char* code); -static bool bracketNeedsLineStart(const unsigned char* code, unsigned captureMap, unsigned backrefMap); -static int bracketFindFirstAssertedCharacter(const unsigned char* code, bool inassert); - -/************************************************* -* Handle escapes * -*************************************************/ - -/* This function is called when a \ has been encountered. It either returns a -positive value for a simple escape such as \n, or a negative value which -encodes one of the more complicated things such as \d. When UTF-8 is enabled, -a positive value greater than 255 may be returned. On entry, ptr is pointing at -the \. On exit, it is on the final character of the escape sequence. - -Arguments: - ptrPtr points to the pattern position pointer - errorCodePtr points to the errorcode variable - bracount number of previous extracting brackets - options the options bits - isClass true if inside a character class - -Returns: zero or positive => a data character - negative => a special escape sequence - on error, error is set -*/ - -static int checkEscape(const UChar** ptrPtr, const UChar* patternEnd, ErrorCode* errorCodePtr, int bracount, bool isClass) -{ - const UChar* ptr = *ptrPtr + 1; - - /* If backslash is at the end of the pattern, it's an error. */ - if (ptr == patternEnd) { - *errorCodePtr = ERR1; - *ptrPtr = ptr; - return 0; - } - - int c = *ptr; - - /* Non-alphamerics are literals. For digits or letters, do an initial lookup in - a table. A non-zero result is something that can be returned immediately. - Otherwise further processing may be required. */ - - if (c < '0' || c > 'z') { /* Not alphameric */ - } else if (int escapeValue = escapes[c - '0']) { - c = escapeValue; - if (isClass) { - if (-c == ESC_b) - c = '\b'; /* \b is backslash in a class */ - else if (-c == ESC_B) - c = 'B'; /* and \B is a capital B in a class (in browsers event though ECMAScript 15.10.2.19 says it raises an error) */ - } - /* Escapes that need further processing, or are illegal. */ - - } else { - switch (c) { - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - /* Escape sequences starting with a non-zero digit are backreferences, - unless there are insufficient brackets, in which case they are octal - escape sequences. Those sequences end on the first non-octal character - or when we overflow 0-255, whichever comes first. */ - - if (!isClass) { - const UChar* oldptr = ptr; - c -= '0'; - while ((ptr + 1 < patternEnd) && isASCIIDigit(ptr[1]) && c <= bracount) - c = c * 10 + *(++ptr) - '0'; - if (c <= bracount) { - c = -(ESC_REF + c); - break; - } - ptr = oldptr; /* Put the pointer back and fall through */ - } - - /* Handle an octal number following \. If the first digit is 8 or 9, - this is not octal. */ - - if ((c = *ptr) >= '8') { - c = '\\'; - ptr -= 1; - break; - } - - /* \0 always starts an octal number, but we may drop through to here with a - larger first octal digit. */ - - case '0': { - c -= '0'; - int i; - for (i = 1; i <= 2; ++i) { - if (ptr + i >= patternEnd || ptr[i] < '0' || ptr[i] > '7') - break; - int cc = c * 8 + ptr[i] - '0'; - if (cc > 255) - break; - c = cc; - } - ptr += i - 1; - break; - } - - case 'x': { - c = 0; - int i; - for (i = 1; i <= 2; ++i) { - if (ptr + i >= patternEnd || !isASCIIHexDigit(ptr[i])) { - c = 'x'; - i = 1; - break; - } - int cc = ptr[i]; - if (cc >= 'a') - cc -= 32; /* Convert to upper case */ - c = c * 16 + cc - ((cc < 'A') ? '0' : ('A' - 10)); - } - ptr += i - 1; - break; - } - - case 'u': { - c = 0; - int i; - for (i = 1; i <= 4; ++i) { - if (ptr + i >= patternEnd || !isASCIIHexDigit(ptr[i])) { - c = 'u'; - i = 1; - break; - } - int cc = ptr[i]; - if (cc >= 'a') - cc -= 32; /* Convert to upper case */ - c = c * 16 + cc - ((cc < 'A') ? '0' : ('A' - 10)); - } - ptr += i - 1; - break; - } - - case 'c': - if (++ptr == patternEnd) { - *errorCodePtr = ERR2; - return 0; - } - - c = *ptr; - - /* To match Firefox, inside a character class, we also accept - numbers and '_' as control characters */ - if ((!isClass && !isASCIIAlpha(c)) || (!isASCIIAlphanumeric(c) && c != '_')) { - c = '\\'; - ptr -= 2; - break; - } - - /* A letter is upper-cased; then the 0x40 bit is flipped. This coding - is ASCII-specific, but then the whole concept of \cx is ASCII-specific. */ - c = toASCIIUpper(c) ^ 0x40; - break; - } - } - - *ptrPtr = ptr; - return c; -} - -/************************************************* -* Check for counted repeat * -*************************************************/ - -/* This function is called when a '{' is encountered in a place where it might -start a quantifier. It looks ahead to see if it really is a quantifier or not. -It is only a quantifier if it is one of the forms {ddd} {ddd,} or {ddd,ddd} -where the ddds are digits. - -Arguments: - p pointer to the first char after '{' - -Returns: true or false -*/ - -static bool isCountedRepeat(const UChar* p, const UChar* patternEnd) -{ - if (p >= patternEnd || !isASCIIDigit(*p)) - return false; - p++; - while (p < patternEnd && isASCIIDigit(*p)) - p++; - if (p < patternEnd && *p == '}') - return true; - - if (p >= patternEnd || *p++ != ',') - return false; - if (p < patternEnd && *p == '}') - return true; - - if (p >= patternEnd || !isASCIIDigit(*p)) - return false; - p++; - while (p < patternEnd && isASCIIDigit(*p)) - p++; - - return (p < patternEnd && *p == '}'); -} - -/************************************************* -* Read repeat counts * -*************************************************/ - -/* Read an item of the form {n,m} and return the values. This is called only -after isCountedRepeat() has confirmed that a repeat-count quantifier exists, -so the syntax is guaranteed to be correct, but we need to check the values. - -Arguments: - p pointer to first char after '{' - minp pointer to int for min - maxp pointer to int for max - returned as -1 if no max - errorCodePtr points to error code variable - -Returns: pointer to '}' on success; - current ptr on error, with errorCodePtr set non-zero -*/ - -static const UChar* readRepeatCounts(const UChar* p, int* minp, int* maxp, ErrorCode* errorCodePtr) -{ - int min = 0; - int max = -1; - - /* Read the minimum value and do a paranoid check: a negative value indicates - an integer overflow. */ - - while (isASCIIDigit(*p)) - min = min * 10 + *p++ - '0'; - if (min < 0 || min > 65535) { - *errorCodePtr = ERR5; - return p; - } - - /* Read the maximum value if there is one, and again do a paranoid on its size. - Also, max must not be less than min. */ - - if (*p == '}') - max = min; - else { - if (*(++p) != '}') { - max = 0; - while (isASCIIDigit(*p)) - max = max * 10 + *p++ - '0'; - if (max < 0 || max > 65535) { - *errorCodePtr = ERR5; - return p; - } - if (max < min) { - *errorCodePtr = ERR4; - return p; - } - } - } - - /* Fill in the required variables, and pass back the pointer to the terminating - '}'. */ - - *minp = min; - *maxp = max; - return p; -} - -/************************************************* -* Find first significant op code * -*************************************************/ - -/* This is called by several functions that scan a compiled expression looking -for a fixed first character, or an anchoring op code etc. It skips over things -that do not influence this. - -Arguments: - code pointer to the start of the group -Returns: pointer to the first significant opcode -*/ - -static const unsigned char* firstSignificantOpcode(const unsigned char* code) -{ - while (*code == OP_BRANUMBER) - code += 3; - return code; -} - -static const unsigned char* firstSignificantOpcodeSkippingAssertions(const unsigned char* code) -{ - while (true) { - switch (*code) { - case OP_ASSERT_NOT: - advanceToEndOfBracket(code); - code += 1 + LINK_SIZE; - break; - case OP_WORD_BOUNDARY: - case OP_NOT_WORD_BOUNDARY: - ++code; - break; - case OP_BRANUMBER: - code += 3; - break; - default: - return code; - } - } -} - -/************************************************* -* Get othercase range * -*************************************************/ - -/* This function is passed the start and end of a class range, in UTF-8 mode -with UCP support. It searches up the characters, looking for internal ranges of -characters in the "other" case. Each call returns the next one, updating the -start address. - -Arguments: - cptr points to starting character value; updated - d end value - ocptr where to put start of othercase range - odptr where to put end of othercase range - -Yield: true when range returned; false when no more -*/ - -static bool getOthercaseRange(int* cptr, int d, int* ocptr, int* odptr) -{ - int c, othercase = 0; - - for (c = *cptr; c <= d; c++) { - if ((othercase = jsc_pcre_ucp_othercase(c)) >= 0) - break; - } - - if (c > d) - return false; - - *ocptr = othercase; - int next = othercase + 1; - - for (++c; c <= d; c++) { - if (jsc_pcre_ucp_othercase(c) != next) - break; - next++; - } - - *odptr = next - 1; - *cptr = c; - - return true; -} - -/************************************************* - * Convert character value to UTF-8 * - *************************************************/ - -/* This function takes an integer value in the range 0 - 0x7fffffff - and encodes it as a UTF-8 character in 0 to 6 bytes. - - Arguments: - cvalue the character value - buffer pointer to buffer for result - at least 6 bytes long - - Returns: number of characters placed in the buffer - */ - -static int encodeUTF8(int cvalue, unsigned char *buffer) -{ - int i; - for (i = 0; i < jsc_pcre_utf8_table1_size; i++) - if (cvalue <= jsc_pcre_utf8_table1[i]) - break; - buffer += i; - for (int j = i; j > 0; j--) { - *buffer-- = 0x80 | (cvalue & 0x3f); - cvalue >>= 6; - } - *buffer = jsc_pcre_utf8_table2[i] | cvalue; - return i + 1; -} - -/************************************************* -* Compile one branch * -*************************************************/ - -/* Scan the pattern, compiling it into the code vector. - -Arguments: - options the option bits - brackets points to number of extracting brackets used - codePtr points to the pointer to the current code point - ptrPtr points to the current pattern pointer - errorCodePtr points to error code variable - firstbyteptr set to initial literal character, or < 0 (REQ_UNSET, REQ_NONE) - reqbyteptr set to the last literal character required, else < 0 - cd contains pointers to tables etc. - -Returns: true on success - false, with *errorCodePtr set non-zero on error -*/ - -static inline bool safelyCheckNextChar(const UChar* ptr, const UChar* patternEnd, UChar expected) -{ - return ((ptr + 1 < patternEnd) && ptr[1] == expected); -} - -static bool -compileBranch(int options, int* brackets, unsigned char** codePtr, - const UChar** ptrPtr, const UChar* patternEnd, ErrorCode* errorCodePtr, int *firstbyteptr, - int* reqbyteptr, CompileData& cd) -{ - int repeatType, opType; - int repeatMin = 0, repeat_max = 0; /* To please picky compilers */ - int bravalue = 0; - int reqvary, tempreqvary; - int c; - unsigned char* code = *codePtr; - unsigned char* tempcode; - bool didGroupSetFirstByte = false; - const UChar* ptr = *ptrPtr; - const UChar* tempptr; - unsigned char* previous = NULL; - unsigned char classbits[32]; - - bool class_utf8; - unsigned char* class_utf8data; - unsigned char utf8_char[6]; - - /* Initialize no first byte, no required byte. REQ_UNSET means "no char - matching encountered yet". It gets changed to REQ_NONE if we hit something that - matches a non-fixed char first char; reqByte just remains unset if we never - find one. - - When we hit a repeat whose minimum is zero, we may have to adjust these values - to take the zero repeat into account. This is implemented by setting them to - zeroFirstByte and zeroReqByte when such a repeat is encountered. The individual - item types that can be repeated set these backoff variables appropriately. */ - - int firstByte = REQ_UNSET; - int reqByte = REQ_UNSET; - int zeroReqByte = REQ_UNSET; - int zeroFirstByte = REQ_UNSET; - - /* The variable reqCaseOpt contains either the REQ_IGNORE_CASE value or zero, - according to the current setting of the ignores-case flag. REQ_IGNORE_CASE is a bit - value > 255. It is added into the firstByte or reqByte variables to record the - case status of the value. This is used only for ASCII characters. */ - - int reqCaseOpt = (options & IgnoreCaseOption) ? REQ_IGNORE_CASE : 0; - - /* Switch on next character until the end of the branch */ - - for (;; ptr++) { - bool negateClass; - bool shouldFlipNegation; /* If a negative special such as \S is used, we should negate the whole class to properly support Unicode. */ - int classCharCount; - int classLastChar; - int skipBytes; - int subReqByte; - int subFirstByte; - int mcLength; - unsigned char mcbuffer[8]; - - /* Next byte in the pattern */ - - c = ptr < patternEnd ? *ptr : 0; - - /* Fill in length of a previous callout, except when the next thing is - a quantifier. */ - - bool isQuantifier = c == '*' || c == '+' || c == '?' || (c == '{' && isCountedRepeat(ptr + 1, patternEnd)); - - switch (c) { - /* The branch terminates at end of string, |, or ). */ - - case 0: - if (ptr < patternEnd) - goto NORMAL_CHAR; - // End of string; fall through - case '|': - case ')': - *firstbyteptr = firstByte; - *reqbyteptr = reqByte; - *codePtr = code; - *ptrPtr = ptr; - return true; - - /* Handle single-character metacharacters. In multiline mode, ^ disables - the setting of any following char as a first character. */ - - case '^': - if (options & MatchAcrossMultipleLinesOption) { - if (firstByte == REQ_UNSET) - firstByte = REQ_NONE; - *code++ = OP_BOL; - } else - *code++ = OP_CIRC; - previous = NULL; - break; - - case '$': - previous = NULL; - if (options & MatchAcrossMultipleLinesOption) - *code++ = OP_EOL; - else - *code++ = OP_DOLL; - break; - - /* There can never be a first char if '.' is first, whatever happens about - repeats. The value of reqByte doesn't change either. */ - - case '.': - if (firstByte == REQ_UNSET) - firstByte = REQ_NONE; - zeroFirstByte = firstByte; - zeroReqByte = reqByte; - previous = code; - *code++ = OP_NOT_NEWLINE; - break; - - /* Character classes. If the included characters are all < 256, we build a - 32-byte bitmap of the permitted characters, except in the special case - where there is only one such character. For negated classes, we build the - map as usual, then invert it at the end. However, we use a different opcode - so that data characters > 255 can be handled correctly. - - If the class contains characters outside the 0-255 range, a different - opcode is compiled. It may optionally have a bit map for characters < 256, - but those above are are explicitly listed afterwards. A flag byte tells - whether the bitmap is present, and whether this is a negated class or not. - */ - - case '[': { - previous = code; - shouldFlipNegation = false; - - /* PCRE supports POSIX class stuff inside a class. Perl gives an error if - they are encountered at the top level, so we'll do that too. */ - - /* If the first character is '^', set the negation flag and skip it. */ - - if (ptr + 1 >= patternEnd) { - *errorCodePtr = ERR6; - return false; - } - - if (ptr[1] == '^') { - negateClass = true; - ++ptr; - } else - negateClass = false; - - /* Keep a count of chars with values < 256 so that we can optimize the case - of just a single character (as long as it's < 256). For higher valued UTF-8 - characters, we don't yet do any optimization. */ - - classCharCount = 0; - classLastChar = -1; - - class_utf8 = false; /* No chars >= 256 */ - class_utf8data = code + LINK_SIZE + 34; /* For UTF-8 items */ - - /* Initialize the 32-char bit map to all zeros. We have to build the - map in a temporary bit of store, in case the class contains only 1 - character (< 256), because in that case the compiled code doesn't use the - bit map. */ - - memset(classbits, 0, 32 * sizeof(unsigned char)); - - /* Process characters until ] is reached. The first pass - through the regex checked the overall syntax, so we don't need to be very - strict here. At the start of the loop, c contains the first byte of the - character. */ - - while ((++ptr < patternEnd) && (c = *ptr) != ']') { - /* Backslash may introduce a single character, or it may introduce one - of the specials, which just set a flag. Escaped items are checked for - validity in the pre-compiling pass. The sequence \b is a special case. - Inside a class (and only there) it is treated as backspace. Elsewhere - it marks a word boundary. Other escapes have preset maps ready to - or into the one we are building. We assume they have more than one - character in them, so set classCharCount bigger than one. */ - - if (c == '\\') { - c = checkEscape(&ptr, patternEnd, errorCodePtr, cd.numCapturingBrackets, true); - if (c < 0) { - classCharCount += 2; /* Greater than 1 is what matters */ - switch (-c) { - case ESC_d: - for (c = 0; c < 32; c++) - classbits[c] |= classBitmapForChar(c + cbit_digit); - continue; - - case ESC_D: - shouldFlipNegation = true; - for (c = 0; c < 32; c++) - classbits[c] |= ~classBitmapForChar(c + cbit_digit); - continue; - - case ESC_w: - for (c = 0; c < 32; c++) - classbits[c] |= classBitmapForChar(c + cbit_word); - continue; - - case ESC_W: - shouldFlipNegation = true; - for (c = 0; c < 32; c++) - classbits[c] |= ~classBitmapForChar(c + cbit_word); - continue; - - case ESC_s: - for (c = 0; c < 32; c++) - classbits[c] |= classBitmapForChar(c + cbit_space); - continue; - - case ESC_S: - shouldFlipNegation = true; - for (c = 0; c < 32; c++) - classbits[c] |= ~classBitmapForChar(c + cbit_space); - continue; - - /* Unrecognized escapes are faulted if PCRE is running in its - strict mode. By default, for compatibility with Perl, they are - treated as literals. */ - - default: - c = *ptr; /* The final character */ - classCharCount -= 2; /* Undo the default count from above */ - } - } - - /* Fall through if we have a single character (c >= 0). This may be - > 256 in UTF-8 mode. */ - - } /* End of backslash handling */ - - /* A single character may be followed by '-' to form a range. However, - Perl does not permit ']' to be the end of the range. A '-' character - here is treated as a literal. */ - - if ((ptr + 2 < patternEnd) && ptr[1] == '-' && ptr[2] != ']') { - ptr += 2; - - int d = *ptr; - - /* The second part of a range can be a single-character escape, but - not any of the other escapes. Perl 5.6 treats a hyphen as a literal - in such circumstances. */ - - if (d == '\\') { - const UChar* oldptr = ptr; - d = checkEscape(&ptr, patternEnd, errorCodePtr, cd.numCapturingBrackets, true); - - /* \X is literal X; any other special means the '-' was literal */ - if (d < 0) { - ptr = oldptr - 2; - goto LONE_SINGLE_CHARACTER; /* A few lines below */ - } - } - - /* The check that the two values are in the correct order happens in - the pre-pass. Optimize one-character ranges */ - - if (d == c) - goto LONE_SINGLE_CHARACTER; /* A few lines below */ - - /* In UTF-8 mode, if the upper limit is > 255, or > 127 for caseless - matching, we have to use an XCLASS with extra data items. Caseless - matching for characters > 127 is available only if UCP support is - available. */ - - if ((d > 255 || ((options & IgnoreCaseOption) && d > 127))) { - class_utf8 = true; - - /* With UCP support, we can find the other case equivalents of - the relevant characters. There may be several ranges. Optimize how - they fit with the basic range. */ - - if (options & IgnoreCaseOption) { - int occ, ocd; - int cc = c; - int origd = d; - while (getOthercaseRange(&cc, origd, &occ, &ocd)) { - if (occ >= c && ocd <= d) - continue; /* Skip embedded ranges */ - - if (occ < c && ocd >= c - 1) /* Extend the basic range */ - { /* if there is overlap, */ - c = occ; /* noting that if occ < c */ - continue; /* we can't have ocd > d */ - } /* because a subrange is */ - if (ocd > d && occ <= d + 1) /* always shorter than */ - { /* the basic range. */ - d = ocd; - continue; - } - - if (occ == ocd) - *class_utf8data++ = XCL_SINGLE; - else { - *class_utf8data++ = XCL_RANGE; - class_utf8data += encodeUTF8(occ, class_utf8data); - } - class_utf8data += encodeUTF8(ocd, class_utf8data); - } - } - - /* Now record the original range, possibly modified for UCP caseless - overlapping ranges. */ - - *class_utf8data++ = XCL_RANGE; - class_utf8data += encodeUTF8(c, class_utf8data); - class_utf8data += encodeUTF8(d, class_utf8data); - - /* With UCP support, we are done. Without UCP support, there is no - caseless matching for UTF-8 characters > 127; we can use the bit map - for the smaller ones. */ - - continue; /* With next character in the class */ - } - - /* We use the bit map for all cases when not in UTF-8 mode; else - ranges that lie entirely within 0-127 when there is UCP support; else - for partial ranges without UCP support. */ - - for (; c <= d; c++) { - classbits[c/8] |= (1 << (c&7)); - if (options & IgnoreCaseOption) { - int uc = flipCase(c); - classbits[uc/8] |= (1 << (uc&7)); - } - classCharCount++; /* in case a one-char range */ - classLastChar = c; - } - - continue; /* Go get the next char in the class */ - } - - /* Handle a lone single character - we can get here for a normal - non-escape char, or after \ that introduces a single character or for an - apparent range that isn't. */ - - LONE_SINGLE_CHARACTER: - - /* Handle a character that cannot go in the bit map */ - - if ((c > 255 || ((options & IgnoreCaseOption) && c > 127))) { - class_utf8 = true; - *class_utf8data++ = XCL_SINGLE; - class_utf8data += encodeUTF8(c, class_utf8data); - - if (options & IgnoreCaseOption) { - int othercase; - if ((othercase = jsc_pcre_ucp_othercase(c)) >= 0) { - *class_utf8data++ = XCL_SINGLE; - class_utf8data += encodeUTF8(othercase, class_utf8data); - } - } - } else { - /* Handle a single-byte character */ - classbits[c/8] |= (1 << (c&7)); - if (options & IgnoreCaseOption) { - c = flipCase(c); - classbits[c/8] |= (1 << (c&7)); - } - classCharCount++; - classLastChar = c; - } - } - - /* If classCharCount is 1, we saw precisely one character whose value is - less than 256. In non-UTF-8 mode we can always optimize. In UTF-8 mode, we - can optimize the negative case only if there were no characters >= 128 - because OP_NOT and the related opcodes like OP_NOTSTAR operate on - single-bytes only. This is an historical hangover. Maybe one day we can - tidy these opcodes to handle multi-byte characters. - - The optimization throws away the bit map. We turn the item into a - 1-character OP_CHAR[NC] if it's positive, or OP_NOT if it's negative. Note - that OP_NOT does not support multibyte characters. In the positive case, it - can cause firstByte to be set. Otherwise, there can be no first char if - this item is first, whatever repeat count may follow. In the case of - reqByte, save the previous value for reinstating. */ - - if (classCharCount == 1 && (!class_utf8 && (!negateClass || classLastChar < 128))) { - zeroReqByte = reqByte; - - /* The OP_NOT opcode works on one-byte characters only. */ - - if (negateClass) { - if (firstByte == REQ_UNSET) - firstByte = REQ_NONE; - zeroFirstByte = firstByte; - *code++ = OP_NOT; - *code++ = classLastChar; - break; - } - - /* For a single, positive character, get the value into c, and - then we can handle this with the normal one-character code. */ - - c = classLastChar; - goto NORMAL_CHAR; - } /* End of 1-char optimization */ - - /* The general case - not the one-char optimization. If this is the first - thing in the branch, there can be no first char setting, whatever the - repeat count. Any reqByte setting must remain unchanged after any kind of - repeat. */ - - if (firstByte == REQ_UNSET) firstByte = REQ_NONE; - zeroFirstByte = firstByte; - zeroReqByte = reqByte; - - /* If there are characters with values > 255, we have to compile an - extended class, with its own opcode. If there are no characters < 256, - we can omit the bitmap. */ - - if (class_utf8 && !shouldFlipNegation) { - *class_utf8data++ = XCL_END; /* Marks the end of extra data */ - *code++ = OP_XCLASS; - code += LINK_SIZE; - *code = negateClass? XCL_NOT : 0; - - /* If the map is required, install it, and move on to the end of - the extra data */ - - if (classCharCount > 0) { - *code++ |= XCL_MAP; - memcpy(code, classbits, 32); - code = class_utf8data; - } - - /* If the map is not required, slide down the extra data. */ - - else { - int len = class_utf8data - (code + 33); - memmove(code + 1, code + 33, len); - code += len + 1; - } - - /* Now fill in the complete length of the item */ - - putLinkValue(previous + 1, code - previous); - break; /* End of class handling */ - } - - /* If there are no characters > 255, negate the 32-byte map if necessary, - and copy it into the code vector. If this is the first thing in the branch, - there can be no first char setting, whatever the repeat count. Any reqByte - setting must remain unchanged after any kind of repeat. */ - - *code++ = (negateClass == shouldFlipNegation) ? OP_CLASS : OP_NCLASS; - if (negateClass) - for (c = 0; c < 32; c++) - code[c] = ~classbits[c]; - else - memcpy(code, classbits, 32); - code += 32; - break; - } - - /* Various kinds of repeat; '{' is not necessarily a quantifier, but this - has been tested above. */ - - case '{': - if (!isQuantifier) - goto NORMAL_CHAR; - ptr = readRepeatCounts(ptr + 1, &repeatMin, &repeat_max, errorCodePtr); - if (*errorCodePtr) - goto FAILED; - goto REPEAT; - - case '*': - repeatMin = 0; - repeat_max = -1; - goto REPEAT; - - case '+': - repeatMin = 1; - repeat_max = -1; - goto REPEAT; - - case '?': - repeatMin = 0; - repeat_max = 1; - - REPEAT: - if (!previous) { - *errorCodePtr = ERR9; - goto FAILED; - } - - if (repeatMin == 0) { - firstByte = zeroFirstByte; /* Adjust for zero repeat */ - reqByte = zeroReqByte; /* Ditto */ - } - - /* Remember whether this is a variable length repeat */ - - reqvary = (repeatMin == repeat_max) ? 0 : REQ_VARY; - - opType = 0; /* Default single-char op codes */ - - /* Save start of previous item, in case we have to move it up to make space - for an inserted OP_ONCE for the additional '+' extension. */ - /* FIXME: Probably don't need this because we don't use OP_ONCE. */ - - tempcode = previous; - - /* If the next character is '+', we have a possessive quantifier. This - implies greediness, whatever the setting of the PCRE_UNGREEDY option. - If the next character is '?' this is a minimizing repeat, by default, - but if PCRE_UNGREEDY is set, it works the other way round. We change the - repeat type to the non-default. */ - - if (safelyCheckNextChar(ptr, patternEnd, '?')) { - repeatType = 1; - ptr++; - } else - repeatType = 0; - - /* If previous was a character match, abolish the item and generate a - repeat item instead. If a char item has a minumum of more than one, ensure - that it is set in reqByte - it might not be if a sequence such as x{3} is - the first thing in a branch because the x will have gone into firstByte - instead. */ - - if (*previous == OP_CHAR || *previous == OP_CHAR_IGNORING_CASE) { - /* Deal with UTF-8 characters that take up more than one byte. It's - easier to write this out separately than try to macrify it. Use c to - hold the length of the character in bytes, plus 0x80 to flag that it's a - length rather than a small character. */ - - if (code[-1] & 0x80) { - unsigned char *lastchar = code - 1; - while((*lastchar & 0xc0) == 0x80) - lastchar--; - c = code - lastchar; /* Length of UTF-8 character */ - memcpy(utf8_char, lastchar, c); /* Save the char */ - c |= 0x80; /* Flag c as a length */ - } - else { - c = code[-1]; - if (repeatMin > 1) - reqByte = c | reqCaseOpt | cd.reqVaryOpt; - } - - goto OUTPUT_SINGLE_REPEAT; /* Code shared with single character types */ - } - - else if (*previous == OP_ASCII_CHAR || *previous == OP_ASCII_LETTER_IGNORING_CASE) { - c = previous[1]; - if (repeatMin > 1) - reqByte = c | reqCaseOpt | cd.reqVaryOpt; - goto OUTPUT_SINGLE_REPEAT; - } - - /* If previous was a single negated character ([^a] or similar), we use - one of the special opcodes, replacing it. The code is shared with single- - character repeats by setting opt_type to add a suitable offset into - repeatType. OP_NOT is currently used only for single-byte chars. */ - - else if (*previous == OP_NOT) { - opType = OP_NOTSTAR - OP_STAR; /* Use "not" opcodes */ - c = previous[1]; - goto OUTPUT_SINGLE_REPEAT; - } - - /* If previous was a character type match (\d or similar), abolish it and - create a suitable repeat item. The code is shared with single-character - repeats by setting opType to add a suitable offset into repeatType. */ - - else if (*previous <= OP_NOT_NEWLINE) { - opType = OP_TYPESTAR - OP_STAR; /* Use type opcodes */ - c = *previous; - - OUTPUT_SINGLE_REPEAT: - int prop_type = -1; - int prop_value = -1; - - unsigned char* oldcode = code; - code = previous; /* Usually overwrite previous item */ - - /* If the maximum is zero then the minimum must also be zero; Perl allows - this case, so we do too - by simply omitting the item altogether. */ - - if (repeat_max == 0) - goto END_REPEAT; - - /* Combine the opType with the repeatType */ - - repeatType += opType; - - /* A minimum of zero is handled either as the special case * or ?, or as - an UPTO, with the maximum given. */ - - if (repeatMin == 0) { - if (repeat_max == -1) - *code++ = OP_STAR + repeatType; - else if (repeat_max == 1) - *code++ = OP_QUERY + repeatType; - else { - *code++ = OP_UPTO + repeatType; - put2ByteValueAndAdvance(code, repeat_max); - } - } - - /* A repeat minimum of 1 is optimized into some special cases. If the - maximum is unlimited, we use OP_PLUS. Otherwise, the original item it - left in place and, if the maximum is greater than 1, we use OP_UPTO with - one less than the maximum. */ - - else if (repeatMin == 1) { - if (repeat_max == -1) - *code++ = OP_PLUS + repeatType; - else { - code = oldcode; /* leave previous item in place */ - if (repeat_max == 1) - goto END_REPEAT; - *code++ = OP_UPTO + repeatType; - put2ByteValueAndAdvance(code, repeat_max - 1); - } - } - - /* The case {n,n} is just an EXACT, while the general case {n,m} is - handled as an EXACT followed by an UPTO. */ - - else { - *code++ = OP_EXACT + opType; /* NB EXACT doesn't have repeatType */ - put2ByteValueAndAdvance(code, repeatMin); - - /* If the maximum is unlimited, insert an OP_STAR. Before doing so, - we have to insert the character for the previous code. For a repeated - Unicode property match, there are two extra bytes that define the - required property. In UTF-8 mode, long characters have their length in - c, with the 0x80 bit as a flag. */ - - if (repeat_max < 0) { - if (c >= 128) { - memcpy(code, utf8_char, c & 7); - code += c & 7; - } else { - *code++ = c; - if (prop_type >= 0) { - *code++ = prop_type; - *code++ = prop_value; - } - } - *code++ = OP_STAR + repeatType; - } - - /* Else insert an UPTO if the max is greater than the min, again - preceded by the character, for the previously inserted code. */ - - else if (repeat_max != repeatMin) { - if (c >= 128) { - memcpy(code, utf8_char, c & 7); - code += c & 7; - } else - *code++ = c; - if (prop_type >= 0) { - *code++ = prop_type; - *code++ = prop_value; - } - repeat_max -= repeatMin; - *code++ = OP_UPTO + repeatType; - put2ByteValueAndAdvance(code, repeat_max); - } - } - - /* The character or character type itself comes last in all cases. */ - - if (c >= 128) { - memcpy(code, utf8_char, c & 7); - code += c & 7; - } else - *code++ = c; - - /* For a repeated Unicode property match, there are two extra bytes that - define the required property. */ - - if (prop_type >= 0) { - *code++ = prop_type; - *code++ = prop_value; - } - } - - /* If previous was a character class or a back reference, we put the repeat - stuff after it, but just skip the item if the repeat was {0,0}. */ - - else if (*previous == OP_CLASS || - *previous == OP_NCLASS || - *previous == OP_XCLASS || - *previous == OP_REF) - { - if (repeat_max == 0) { - code = previous; - goto END_REPEAT; - } - - if (repeatMin == 0 && repeat_max == -1) - *code++ = OP_CRSTAR + repeatType; - else if (repeatMin == 1 && repeat_max == -1) - *code++ = OP_CRPLUS + repeatType; - else if (repeatMin == 0 && repeat_max == 1) - *code++ = OP_CRQUERY + repeatType; - else { - *code++ = OP_CRRANGE + repeatType; - put2ByteValueAndAdvance(code, repeatMin); - if (repeat_max == -1) - repeat_max = 0; /* 2-byte encoding for max */ - put2ByteValueAndAdvance(code, repeat_max); - } - } - - /* If previous was a bracket group, we may have to replicate it in certain - cases. */ - - else if (*previous >= OP_BRA) { - int ketoffset = 0; - int len = code - previous; - unsigned char* bralink = NULL; - int nested = get2ByteValue(previous + 1 + LINK_SIZE); - - /* If the maximum repeat count is unlimited, find the end of the bracket - by scanning through from the start, and compute the offset back to it - from the current code pointer. There may be an OP_OPT setting following - the final KET, so we can't find the end just by going back from the code - pointer. */ - - if (repeat_max == -1) { - const unsigned char* ket = previous; - advanceToEndOfBracket(ket); - ketoffset = code - ket; - } - - /* The case of a zero minimum is special because of the need to stick - OP_BRAZERO in front of it, and because the group appears once in the - data, whereas in other cases it appears the minimum number of times. For - this reason, it is simplest to treat this case separately, as otherwise - the code gets far too messy. There are several special subcases when the - minimum is zero. */ - - if (repeatMin == 0) { - /* If the maximum is also zero, we just omit the group from the output - altogether. */ - - if (repeat_max == 0) { - code = previous; - goto END_REPEAT; - } - - /* If the maximum is 1 or unlimited, we just have to stick in the - BRAZERO and do no more at this point. However, we do need to adjust - any OP_RECURSE calls inside the group that refer to the group itself or - any internal group, because the offset is from the start of the whole - regex. Temporarily terminate the pattern while doing this. */ - - if (repeat_max <= 1) { - *code = OP_END; - memmove(previous+1, previous, len); - code++; - *previous++ = OP_BRAZERO + repeatType; - } - - /* If the maximum is greater than 1 and limited, we have to replicate - in a nested fashion, sticking OP_BRAZERO before each set of brackets. - The first one has to be handled carefully because it's the original - copy, which has to be moved up. The remainder can be handled by code - that is common with the non-zero minimum case below. We have to - adjust the value of repeat_max, since one less copy is required. */ - - else { - *code = OP_END; - memmove(previous + 4 + LINK_SIZE, previous, len); - code += 4 + LINK_SIZE; - *previous++ = OP_BRAZERO + repeatType; - *previous++ = OP_BRA; - - /* We chain together the bracket offset fields that have to be - filled in later when the ends of the brackets are reached. */ - - int offset = (!bralink) ? 0 : previous - bralink; - bralink = previous; - putLinkValueAllowZeroAndAdvance(previous, offset); - put2ByteValueAndAdvance(previous, nested); - } - - repeat_max--; - } - - /* If the minimum is greater than zero, replicate the group as many - times as necessary, and adjust the maximum to the number of subsequent - copies that we need. If we set a first char from the group, and didn't - set a required char, copy the latter from the former. */ - - else { - if (repeatMin > 1) { - if (didGroupSetFirstByte && reqByte < 0) - reqByte = firstByte; - for (int i = 1; i < repeatMin; i++) { - memcpy(code, previous, len); - code += len; - } - } - if (repeat_max > 0) - repeat_max -= repeatMin; - } - - /* This code is common to both the zero and non-zero minimum cases. If - the maximum is limited, it replicates the group in a nested fashion, - remembering the bracket starts on a stack. In the case of a zero minimum, - the first one was set up above. In all cases the repeat_max now specifies - the number of additional copies needed. */ - - if (repeat_max >= 0) { - for (int i = repeat_max - 1; i >= 0; i--) { - *code++ = OP_BRAZERO + repeatType; - - /* All but the final copy start a new nesting, maintaining the - chain of brackets outstanding. */ - - if (i != 0) { - *code++ = OP_BRA; - int offset = (!bralink) ? 0 : code - bralink; - bralink = code; - putLinkValueAllowZeroAndAdvance(code, offset); - put2ByteValueAndAdvance(code, nested); - } - - memcpy(code, previous, len); - code += len; - } - - /* Now chain through the pending brackets, and fill in their length - fields (which are holding the chain links pro tem). */ - - while (bralink) { - int offset = code - bralink + 1; - unsigned char* bra = code - offset; - int oldlinkoffset = getLinkValueAllowZero(bra + 1); - bralink = (!oldlinkoffset) ? 0 : bralink - oldlinkoffset; - *code++ = OP_KET; - putLinkValueAndAdvance(code, offset); - putLinkValue(bra + 1, offset); - } - } - - /* If the maximum is unlimited, set a repeater in the final copy. We - can't just offset backwards from the current code point, because we - don't know if there's been an options resetting after the ket. The - correct offset was computed above. */ - - else - code[-ketoffset] = OP_KETRMAX + repeatType; - } - - // A quantifier after an assertion is mostly meaningless, but it - // can nullify the assertion if it has a 0 minimum. - else if (*previous == OP_ASSERT || *previous == OP_ASSERT_NOT) { - if (repeatMin == 0) { - code = previous; - goto END_REPEAT; - } - } - - /* Else there's some kind of shambles */ - - else { - *errorCodePtr = ERR11; - goto FAILED; - } - - /* In all case we no longer have a previous item. We also set the - "follows varying string" flag for subsequently encountered reqbytes if - it isn't already set and we have just passed a varying length item. */ - - END_REPEAT: - previous = NULL; - cd.reqVaryOpt |= reqvary; - break; - - /* Start of nested bracket sub-expression, or comment or lookahead or - lookbehind or option setting or condition. First deal with special things - that can come after a bracket; all are introduced by ?, and the appearance - of any of them means that this is not a referencing group. They were - checked for validity in the first pass over the string, so we don't have to - check for syntax errors here. */ - - case '(': - { - skipBytes = 2; - unsigned minBracket = *brackets + 1; - if (*(++ptr) == '?') { - switch (*(++ptr)) { - case ':': /* Non-extracting bracket */ - bravalue = OP_BRA; - ptr++; - break; - - case '=': /* Positive lookahead */ - bravalue = OP_ASSERT; - ptr++; - break; - - case '!': /* Negative lookahead */ - bravalue = OP_ASSERT_NOT; - ptr++; - break; - - /* Character after (? not specially recognized */ - - default: - *errorCodePtr = ERR12; - goto FAILED; - } - } - - /* Else we have a referencing group; adjust the opcode. If the bracket - number is greater than EXTRACT_BASIC_MAX, we set the opcode one higher, and - arrange for the true number to follow later, in an OP_BRANUMBER item. */ - - else { - if (++(*brackets) > EXTRACT_BASIC_MAX) { - bravalue = OP_BRA + EXTRACT_BASIC_MAX + 1; - code[3 + LINK_SIZE] = OP_BRANUMBER; - put2ByteValue(code + 4 + LINK_SIZE, *brackets); - skipBytes = 5; - } - else - bravalue = OP_BRA + *brackets; - } - - /* Process nested bracketed re. We copy code into a non-variable - in order to be able to pass its address because some compilers - complain otherwise. Pass in a new setting for the ims options - if they have changed. */ - - previous = code; - *code = bravalue; - tempcode = code; - tempreqvary = cd.reqVaryOpt; /* Save value before bracket */ - { - unsigned bracketsBeforeRecursion = *brackets; - if (!compileBracket( - options, - brackets, /* Extracting bracket count */ - &tempcode, /* Where to put code (updated) */ - &ptr, /* Input pointer (updated) */ - patternEnd, - errorCodePtr, /* Where to put an error message */ - skipBytes, /* Skip over OP_BRANUMBER */ - &subFirstByte, /* For possible first char */ - &subReqByte, /* For possible last char */ - cd)) /* Tables block */ - goto FAILED; - unsigned enclosedBrackets = (*brackets - bracketsBeforeRecursion); - unsigned limitBracket = minBracket + enclosedBrackets + (bravalue > OP_BRA); - if (!((minBracket & 0xff) == minBracket && (limitBracket & 0xff) == limitBracket)) { - *errorCodePtr = ERR17; - return false; - } - JS_ASSERT(minBracket <= limitBracket); - put2ByteValue(code + 1 + LINK_SIZE, minBracket << 8 | limitBracket); - } - - /* At the end of compiling, code is still pointing to the start of the - group, while tempcode has been updated to point past the end of the group - and any option resetting that may follow it. The pattern pointer (ptr) - is on the bracket. */ - - /* Handle updating of the required and first characters. Update for normal - brackets of all kinds, and conditions with two branches (see code above). - If the bracket is followed by a quantifier with zero repeat, we have to - back off. Hence the definition of zeroReqByte and zeroFirstByte outside the - main loop so that they can be accessed for the back off. */ - - zeroReqByte = reqByte; - zeroFirstByte = firstByte; - didGroupSetFirstByte = false; - - if (bravalue >= OP_BRA) { - /* If we have not yet set a firstByte in this branch, take it from the - subpattern, remembering that it was set here so that a repeat of more - than one can replicate it as reqByte if necessary. If the subpattern has - no firstByte, set "none" for the whole branch. In both cases, a zero - repeat forces firstByte to "none". */ - - if (firstByte == REQ_UNSET) { - if (subFirstByte >= 0) { - firstByte = subFirstByte; - didGroupSetFirstByte = true; - } - else - firstByte = REQ_NONE; - zeroFirstByte = REQ_NONE; - } - - /* If firstByte was previously set, convert the subpattern's firstByte - into reqByte if there wasn't one, using the vary flag that was in - existence beforehand. */ - - else if (subFirstByte >= 0 && subReqByte < 0) - subReqByte = subFirstByte | tempreqvary; - - /* If the subpattern set a required byte (or set a first byte that isn't - really the first byte - see above), set it. */ - - if (subReqByte >= 0) - reqByte = subReqByte; - } - - /* For a forward assertion, we take the reqByte, if set. This can be - helpful if the pattern that follows the assertion doesn't set a different - char. For example, it's useful for /(?=abcde).+/. We can't set firstByte - for an assertion, however because it leads to incorrect effect for patterns - such as /(?=a)a.+/ when the "real" "a" would then become a reqByte instead - of a firstByte. This is overcome by a scan at the end if there's no - firstByte, looking for an asserted first char. */ - - else if (bravalue == OP_ASSERT && subReqByte >= 0) - reqByte = subReqByte; - - /* Now update the main code pointer to the end of the group. */ - - code = tempcode; - - /* Error if hit end of pattern */ - - if (ptr >= patternEnd || *ptr != ')') { - *errorCodePtr = ERR14; - goto FAILED; - } - break; - - } - /* Check \ for being a real metacharacter; if not, fall through and handle - it as a data character at the start of a string. Escape items are checked - for validity in the pre-compiling pass. */ - - case '\\': - tempptr = ptr; - c = checkEscape(&ptr, patternEnd, errorCodePtr, cd.numCapturingBrackets, false); - - /* Handle metacharacters introduced by \. For ones like \d, the ESC_ values - are arranged to be the negation of the corresponding OP_values. For the - back references, the values are ESC_REF plus the reference number. Only - back references and those types that consume a character may be repeated. - We can test for values between ESC_b and ESC_w for the latter; this may - have to change if any new ones are ever created. */ - - if (c < 0) { - /* For metasequences that actually match a character, we disable the - setting of a first character if it hasn't already been set. */ - - if (firstByte == REQ_UNSET && -c > ESC_b && -c <= ESC_w) - firstByte = REQ_NONE; - - /* Set values to reset to if this is followed by a zero repeat. */ - - zeroFirstByte = firstByte; - zeroReqByte = reqByte; - - /* Back references are handled specially */ - - if (-c >= ESC_REF) { - int number = -c - ESC_REF; - previous = code; - *code++ = OP_REF; - put2ByteValueAndAdvance(code, number); - } - - /* For the rest, we can obtain the OP value by negating the escape - value */ - - else { - previous = (-c > ESC_b && -c <= ESC_w) ? code : NULL; - *code++ = -c; - } - continue; - } - - /* Fall through. */ - - /* Handle a literal character. It is guaranteed not to be whitespace or # - when the extended flag is set. If we are in UTF-8 mode, it may be a - multi-byte literal character. */ - - default: - NORMAL_CHAR: - - previous = code; - - if (c < 128) { - mcLength = 1; - mcbuffer[0] = c; - - if ((options & IgnoreCaseOption) && (c | 0x20) >= 'a' && (c | 0x20) <= 'z') { - *code++ = OP_ASCII_LETTER_IGNORING_CASE; - *code++ = c | 0x20; - } else { - *code++ = OP_ASCII_CHAR; - *code++ = c; - } - } else { - mcLength = encodeUTF8(c, mcbuffer); - - *code++ = (options & IgnoreCaseOption) ? OP_CHAR_IGNORING_CASE : OP_CHAR; - for (c = 0; c < mcLength; c++) - *code++ = mcbuffer[c]; - } - - /* Set the first and required bytes appropriately. If no previous first - byte, set it from this character, but revert to none on a zero repeat. - Otherwise, leave the firstByte value alone, and don't change it on a zero - repeat. */ - - if (firstByte == REQ_UNSET) { - zeroFirstByte = REQ_NONE; - zeroReqByte = reqByte; - - /* If the character is more than one byte long, we can set firstByte - only if it is not to be matched caselessly. */ - - if (mcLength == 1 || reqCaseOpt == 0) { - firstByte = mcbuffer[0] | reqCaseOpt; - if (mcLength != 1) - reqByte = code[-1] | cd.reqVaryOpt; - } - else - firstByte = reqByte = REQ_NONE; - } - - /* firstByte was previously set; we can set reqByte only the length is - 1 or the matching is caseful. */ - - else { - zeroFirstByte = firstByte; - zeroReqByte = reqByte; - if (mcLength == 1 || reqCaseOpt == 0) - reqByte = code[-1] | reqCaseOpt | cd.reqVaryOpt; - } - - break; /* End of literal character handling */ - } - } /* end of big loop */ - - /* Control never reaches here by falling through, only by a goto for all the - error states. Pass back the position in the pattern so that it can be displayed - to the user for diagnosing the error. */ - -FAILED: - *ptrPtr = ptr; - return false; -} - -/************************************************* -* Compile sequence of alternatives * -*************************************************/ - -/* On entry, ptr is pointing past the bracket character, but on return -it points to the closing bracket, or vertical bar, or end of string. -The code variable is pointing at the byte into which the BRA operator has been -stored. If the ims options are changed at the start (for a (?ims: group) or -during any branch, we need to insert an OP_OPT item at the start of every -following branch to ensure they get set correctly at run time, and also pass -the new options into every subsequent branch compile. - -Argument: - options option bits, including any changes for this subpattern - brackets -> int containing the number of extracting brackets used - codePtr -> the address of the current code pointer - ptrPtr -> the address of the current pattern pointer - errorCodePtr -> pointer to error code variable - skipBytes skip this many bytes at start (for OP_BRANUMBER) - firstbyteptr place to put the first required character, or a negative number - reqbyteptr place to put the last required character, or a negative number - cd points to the data block with tables pointers etc. - -Returns: true on success -*/ - -static bool -compileBracket(int options, int* brackets, unsigned char** codePtr, - const UChar** ptrPtr, const UChar* patternEnd, ErrorCode* errorCodePtr, int skipBytes, - int* firstbyteptr, int* reqbyteptr, CompileData& cd) -{ - const UChar* ptr = *ptrPtr; - unsigned char* code = *codePtr; - unsigned char* lastBranch = code; - unsigned char* start_bracket = code; - int firstByte = REQ_UNSET; - int reqByte = REQ_UNSET; - - /* Offset is set zero to mark that this bracket is still open */ - - putLinkValueAllowZero(code + 1, 0); - code += 1 + LINK_SIZE + skipBytes; - - /* Loop for each alternative branch */ - - while (true) { - /* Now compile the branch */ - - int branchFirstByte; - int branchReqByte; - if (!compileBranch(options, brackets, &code, &ptr, patternEnd, errorCodePtr, - &branchFirstByte, &branchReqByte, cd)) { - *ptrPtr = ptr; - return false; - } - - /* If this is the first branch, the firstByte and reqByte values for the - branch become the values for the regex. */ - - if (*lastBranch != OP_ALT) { - firstByte = branchFirstByte; - reqByte = branchReqByte; - } - - /* If this is not the first branch, the first char and reqByte have to - match the values from all the previous branches, except that if the previous - value for reqByte didn't have REQ_VARY set, it can still match, and we set - REQ_VARY for the regex. */ - - else { - /* If we previously had a firstByte, but it doesn't match the new branch, - we have to abandon the firstByte for the regex, but if there was previously - no reqByte, it takes on the value of the old firstByte. */ - - if (firstByte >= 0 && firstByte != branchFirstByte) { - if (reqByte < 0) - reqByte = firstByte; - firstByte = REQ_NONE; - } - - /* If we (now or from before) have no firstByte, a firstByte from the - branch becomes a reqByte if there isn't a branch reqByte. */ - - if (firstByte < 0 && branchFirstByte >= 0 && branchReqByte < 0) - branchReqByte = branchFirstByte; - - /* Now ensure that the reqbytes match */ - - if ((reqByte & ~REQ_VARY) != (branchReqByte & ~REQ_VARY)) - reqByte = REQ_NONE; - else - reqByte |= branchReqByte; /* To "or" REQ_VARY */ - } - - /* Reached end of expression, either ')' or end of pattern. Go back through - the alternative branches and reverse the chain of offsets, with the field in - the BRA item now becoming an offset to the first alternative. If there are - no alternatives, it points to the end of the group. The length in the - terminating ket is always the length of the whole bracketed item. - Return leaving the pointer at the terminating char. */ - - if (ptr >= patternEnd || *ptr != '|') { - int length = code - lastBranch; - do { - int prevLength = getLinkValueAllowZero(lastBranch + 1); - putLinkValue(lastBranch + 1, length); - length = prevLength; - lastBranch -= length; - } while (length > 0); - - /* Fill in the ket */ - - *code = OP_KET; - putLinkValue(code + 1, code - start_bracket); - code += 1 + LINK_SIZE; - - /* Set values to pass back */ - - *codePtr = code; - *ptrPtr = ptr; - *firstbyteptr = firstByte; - *reqbyteptr = reqByte; - return true; - } - - /* Another branch follows; insert an "or" node. Its length field points back - to the previous branch while the bracket remains open. At the end the chain - is reversed. It's done like this so that the start of the bracket has a - zero offset until it is closed, making it possible to detect recursion. */ - - *code = OP_ALT; - putLinkValue(code + 1, code - lastBranch); - lastBranch = code; - code += 1 + LINK_SIZE; - ptr++; - } - JS_NOT_REACHED("No fallthru."); -} - -/************************************************* -* Check for anchored expression * -*************************************************/ - -/* Try to find out if this is an anchored regular expression. Consider each -alternative branch. If they all start OP_CIRC, or with a bracket -all of whose alternatives start OP_CIRC (recurse ad lib), then -it's anchored. - -Arguments: - code points to start of expression (the bracket) - captureMap a bitmap of which brackets we are inside while testing; this - handles up to substring 31; all brackets after that share - the zero bit - backrefMap the back reference bitmap -*/ - -static bool branchIsAnchored(const unsigned char* code) -{ - const unsigned char* scode = firstSignificantOpcode(code); - int op = *scode; - - /* Brackets */ - if (op >= OP_BRA || op == OP_ASSERT) - return bracketIsAnchored(scode); - - /* Check for explicit anchoring */ - return op == OP_CIRC; -} - -static bool bracketIsAnchored(const unsigned char* code) -{ - do { - if (!branchIsAnchored(code + 1 + LINK_SIZE)) - return false; - code += getLinkValue(code + 1); - } while (*code == OP_ALT); /* Loop for each alternative */ - return true; -} - -/************************************************* -* Check for starting with ^ or .* * -*************************************************/ - -/* This is called to find out if every branch starts with ^ or .* so that -"first char" processing can be done to speed things up in multiline -matching and for non-DOTALL patterns that start with .* (which must start at -the beginning or after \n) - -Except when the .* appears inside capturing parentheses, and there is a -subsequent back reference to those parentheses. By keeping a bitmap of the -first 31 back references, we can catch some of the more common cases more -precisely; all the greater back references share a single bit. - -Arguments: - code points to start of expression (the bracket) - captureMap a bitmap of which brackets we are inside while testing; this - handles up to substring 31; all brackets after that share - the zero bit - backrefMap the back reference bitmap -*/ - -static bool branchNeedsLineStart(const unsigned char* code, unsigned captureMap, unsigned backrefMap) -{ - const unsigned char* scode = firstSignificantOpcode(code); - int op = *scode; - - /* Capturing brackets */ - if (op > OP_BRA) { - int captureNum = op - OP_BRA; - if (captureNum > EXTRACT_BASIC_MAX) - captureNum = get2ByteValue(scode + 2 + LINK_SIZE); - int bracketMask = (captureNum < 32) ? (1 << captureNum) : 1; - return bracketNeedsLineStart(scode, captureMap | bracketMask, backrefMap); - } - - /* Other brackets */ - if (op == OP_BRA || op == OP_ASSERT) - return bracketNeedsLineStart(scode, captureMap, backrefMap); - - /* .* means "start at start or after \n" if it isn't in brackets that - may be referenced. */ - - if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR) - return scode[1] == OP_NOT_NEWLINE && !(captureMap & backrefMap); - - /* Explicit ^ */ - return op == OP_CIRC || op == OP_BOL; -} - -static bool bracketNeedsLineStart(const unsigned char* code, unsigned captureMap, unsigned backrefMap) -{ - do { - if (!branchNeedsLineStart(code + 1 + LINK_SIZE, captureMap, backrefMap)) - return false; - code += getLinkValue(code + 1); - } while (*code == OP_ALT); /* Loop for each alternative */ - return true; -} - -/************************************************* -* Check for asserted fixed first char * -*************************************************/ - -/* During compilation, the "first char" settings from forward assertions are -discarded, because they can cause conflicts with actual literals that follow. -However, if we end up without a first char setting for an unanchored pattern, -it is worth scanning the regex to see if there is an initial asserted first -char. If all branches start with the same asserted char, or with a bracket all -of whose alternatives start with the same asserted char (recurse ad lib), then -we return that char, otherwise -1. - -Arguments: - code points to start of expression (the bracket) - options pointer to the options (used to check casing changes) - inassert true if in an assertion - -Returns: -1 or the fixed first char -*/ - -static int branchFindFirstAssertedCharacter(const unsigned char* code, bool inassert) -{ - const unsigned char* scode = firstSignificantOpcodeSkippingAssertions(code); - int op = *scode; - - if (op >= OP_BRA) - op = OP_BRA; - - switch (op) { - default: - return -1; - - case OP_BRA: - case OP_ASSERT: - return bracketFindFirstAssertedCharacter(scode, op == OP_ASSERT); - - case OP_EXACT: - scode += 2; - /* Fall through */ - - case OP_CHAR: - case OP_CHAR_IGNORING_CASE: - case OP_ASCII_CHAR: - case OP_ASCII_LETTER_IGNORING_CASE: - case OP_PLUS: - case OP_MINPLUS: - if (!inassert) - return -1; - return scode[1]; - } -} - -static int bracketFindFirstAssertedCharacter(const unsigned char* code, bool inassert) -{ - int c = -1; - do { - int d = branchFindFirstAssertedCharacter(code + 1 + LINK_SIZE, inassert); - if (d < 0) - return -1; - if (c < 0) - c = d; - else if (c != d) - return -1; - code += getLinkValue(code + 1); - } while (*code == OP_ALT); - return c; -} - -static inline int multiplyWithOverflowCheck(int a, int b) -{ - if (!a || !b) - return 0; - if (a > MAX_PATTERN_SIZE / b) - return -1; - return a * b; -} - -static int calculateCompiledPatternLength(const UChar* pattern, int patternLength, JSRegExpIgnoreCaseOption ignoreCase, - CompileData& cd, ErrorCode& errorcode) -{ - /* Make a pass over the pattern to compute the - amount of store required to hold the compiled code. This does not have to be - perfect as long as errors are overestimates. */ - - if (patternLength > MAX_PATTERN_SIZE) { - errorcode = ERR16; - return -1; - } - - int length = BRA_LEN; /* For initial BRA. */ - int branch_extra = 0; - int lastitemlength = 0; - unsigned brastackptr = 0; - int brastack[BRASTACK_SIZE]; - unsigned char bralenstack[BRASTACK_SIZE]; - int bracount = 0; - - const UChar* ptr = (const UChar*)(pattern - 1); - const UChar* patternEnd = (const UChar*)(pattern + patternLength); - - while (++ptr < patternEnd) { - int minRepeats = 0, maxRepeats = 0; - int c = *ptr; - - switch (c) { - /* A backslashed item may be an escaped data character or it may be a - character type. */ - - case '\\': - c = checkEscape(&ptr, patternEnd, &errorcode, cd.numCapturingBrackets, false); - if (errorcode != 0) - return -1; - - lastitemlength = 1; /* Default length of last item for repeats */ - - if (c >= 0) { /* Data character */ - length += 2; /* For a one-byte character */ - - if (c > 127) { - int i; - for (i = 0; i < jsc_pcre_utf8_table1_size; i++) - if (c <= jsc_pcre_utf8_table1[i]) break; - length += i; - lastitemlength += i; - } - - continue; - } - - /* Other escapes need one byte */ - - length++; - - /* A back reference needs an additional 2 bytes, plus either one or 5 - bytes for a repeat. We also need to keep the value of the highest - back reference. */ - - if (c <= -ESC_REF) { - int refnum = -c - ESC_REF; - cd.backrefMap |= (refnum < 32) ? (1 << refnum) : 1; - if (refnum > cd.topBackref) - cd.topBackref = refnum; - length += 2; /* For single back reference */ - if (safelyCheckNextChar(ptr, patternEnd, '{') && isCountedRepeat(ptr + 2, patternEnd)) { - ptr = readRepeatCounts(ptr + 2, &minRepeats, &maxRepeats, &errorcode); - if (errorcode) - return -1; - if ((minRepeats == 0 && (maxRepeats == 1 || maxRepeats == -1)) || - (minRepeats == 1 && maxRepeats == -1)) - length++; - else - length += 5; - if (safelyCheckNextChar(ptr, patternEnd, '?')) - ptr++; - } - } - continue; - - case '^': /* Single-byte metacharacters */ - case '.': - case '$': - length++; - lastitemlength = 1; - continue; - - case '*': /* These repeats won't be after brackets; */ - case '+': /* those are handled separately */ - case '?': - length++; - goto POSSESSIVE; - - /* This covers the cases of braced repeats after a single char, metachar, - class, or back reference. */ - - case '{': - if (!isCountedRepeat(ptr + 1, patternEnd)) - goto NORMAL_CHAR; - ptr = readRepeatCounts(ptr + 1, &minRepeats, &maxRepeats, &errorcode); - if (errorcode != 0) - return -1; - - /* These special cases just insert one extra opcode */ - - if ((minRepeats == 0 && (maxRepeats == 1 || maxRepeats == -1)) || - (minRepeats == 1 && maxRepeats == -1)) - length++; - - /* These cases might insert additional copies of a preceding character. */ - - else { - if (minRepeats != 1) { - length -= lastitemlength; /* Uncount the original char or metachar */ - if (minRepeats > 0) - length += 5 + lastitemlength; - } - length += lastitemlength + ((maxRepeats > 0) ? 5 : 1); - } - - if (safelyCheckNextChar(ptr, patternEnd, '?')) - ptr++; /* Needs no extra length */ - - POSSESSIVE: /* Test for possessive quantifier */ - if (safelyCheckNextChar(ptr, patternEnd, '+')) { - ptr++; - length += 2 + 2 * LINK_SIZE; /* Allow for atomic brackets */ - } - continue; - - /* An alternation contains an offset to the next branch or ket. If any ims - options changed in the previous branch(es), and/or if we are in a - lookbehind assertion, extra space will be needed at the start of the - branch. This is handled by branch_extra. */ - - case '|': - if (brastackptr == 0) - cd.needOuterBracket = true; - length += 1 + LINK_SIZE + branch_extra; - continue; - - /* A character class uses 33 characters provided that all the character - values are less than 256. Otherwise, it uses a bit map for low valued - characters, and individual items for others. Don't worry about character - types that aren't allowed in classes - they'll get picked up during the - compile. A character class that contains only one single-byte character - uses 2 or 3 bytes, depending on whether it is negated or not. Notice this - where we can. (In UTF-8 mode we can do this only for chars < 128.) */ - - case '[': { - int class_optcount; - if (*(++ptr) == '^') { - class_optcount = 10; /* Greater than one */ - ptr++; - } - else - class_optcount = 0; - - bool class_utf8 = false; - - for (; ptr < patternEnd && *ptr != ']'; ++ptr) { - /* Check for escapes */ - - if (*ptr == '\\') { - c = checkEscape(&ptr, patternEnd, &errorcode, cd.numCapturingBrackets, true); - if (errorcode != 0) - return -1; - - /* Handle escapes that turn into characters */ - - if (c >= 0) - goto NON_SPECIAL_CHARACTER; - - /* Escapes that are meta-things. The normal ones just affect the - bit map, but Unicode properties require an XCLASS extended item. */ - - else - class_optcount = 10; /* \d, \s etc; make sure > 1 */ - } - - /* Anything else increments the possible optimization count. We have to - detect ranges here so that we can compute the number of extra ranges for - caseless wide characters when UCP support is available. If there are wide - characters, we are going to have to use an XCLASS, even for single - characters. */ - - else { - c = *ptr; - - /* Come here from handling \ above when it escapes to a char value */ - - NON_SPECIAL_CHARACTER: - class_optcount++; - - int d = -1; - if (safelyCheckNextChar(ptr, patternEnd, '-')) { - const UChar* hyptr = ptr++; - if (safelyCheckNextChar(ptr, patternEnd, '\\')) { - ptr++; - d = checkEscape(&ptr, patternEnd, &errorcode, cd.numCapturingBrackets, true); - if (errorcode != 0) - return -1; - } - else if ((ptr + 1 < patternEnd) && ptr[1] != ']') - d = *++ptr; - if (d < 0) - ptr = hyptr; /* go back to hyphen as data */ - } - - /* If d >= 0 we have a range. In UTF-8 mode, if the end is > 255, or > - 127 for caseless matching, we will need to use an XCLASS. */ - - if (d >= 0) { - class_optcount = 10; /* Ensure > 1 */ - if (d < c) { - errorcode = ERR8; - return -1; - } - - if ((d > 255 || (ignoreCase && d > 127))) { - unsigned char buffer[6]; - if (!class_utf8) /* Allow for XCLASS overhead */ - { - class_utf8 = true; - length += LINK_SIZE + 2; - } - - /* If we have UCP support, find out how many extra ranges are - needed to map the other case of characters within this range. We - have to mimic the range optimization here, because extending the - range upwards might push d over a boundary that makes it use - another byte in the UTF-8 representation. */ - - if (ignoreCase) { - int occ, ocd; - int cc = c; - int origd = d; - while (getOthercaseRange(&cc, origd, &occ, &ocd)) { - if (occ >= c && ocd <= d) - continue; /* Skip embedded */ - - if (occ < c && ocd >= c - 1) /* Extend the basic range */ - { /* if there is overlap, */ - c = occ; /* noting that if occ < c */ - continue; /* we can't have ocd > d */ - } /* because a subrange is */ - if (ocd > d && occ <= d + 1) /* always shorter than */ - { /* the basic range. */ - d = ocd; - continue; - } - - /* An extra item is needed */ - - length += 1 + encodeUTF8(occ, buffer) + - ((occ == ocd) ? 0 : encodeUTF8(ocd, buffer)); - } - } - - /* The length of the (possibly extended) range */ - - length += 1 + encodeUTF8(c, buffer) + encodeUTF8(d, buffer); - } - - } - - /* We have a single character. There is nothing to be done unless we - are in UTF-8 mode. If the char is > 255, or 127 when caseless, we must - allow for an XCL_SINGLE item, doubled for caselessness if there is UCP - support. */ - - else { - if ((c > 255 || (ignoreCase && c > 127))) { - unsigned char buffer[6]; - class_optcount = 10; /* Ensure > 1 */ - if (!class_utf8) /* Allow for XCLASS overhead */ - { - class_utf8 = true; - length += LINK_SIZE + 2; - } - length += (ignoreCase ? 2 : 1) * (1 + encodeUTF8(c, buffer)); - } - } - } - } - - if (ptr >= patternEnd) { /* Missing terminating ']' */ - errorcode = ERR6; - return -1; - } - - /* We can optimize when there was only one optimizable character. - Note that this does not detect the case of a negated single character. - In that case we do an incorrect length computation, but it's not a serious - problem because the computed length is too large rather than too small. */ - - if (class_optcount == 1) - goto NORMAL_CHAR; - - /* Here, we handle repeats for the class opcodes. */ - { - length += 33; - - /* A repeat needs either 1 or 5 bytes. If it is a possessive quantifier, - we also need extra for wrapping the whole thing in a sub-pattern. */ - - if (safelyCheckNextChar(ptr, patternEnd, '{') && isCountedRepeat(ptr + 2, patternEnd)) { - ptr = readRepeatCounts(ptr + 2, &minRepeats, &maxRepeats, &errorcode); - if (errorcode != 0) - return -1; - if ((minRepeats == 0 && (maxRepeats == 1 || maxRepeats == -1)) || - (minRepeats == 1 && maxRepeats == -1)) - length++; - else - length += 5; - if (safelyCheckNextChar(ptr, patternEnd, '+')) { - ptr++; - length += 2 + 2 * LINK_SIZE; - } else if (safelyCheckNextChar(ptr, patternEnd, '?')) - ptr++; - } - } - continue; - } - - /* Brackets may be genuine groups or special things */ - - case '(': { - int branch_newextra = 0; - int bracket_length = BRA_LEN; - bool capturing = false; - - /* Handle special forms of bracket, which all start (? */ - - if (safelyCheckNextChar(ptr, patternEnd, '?')) { - switch (c = (ptr + 2 < patternEnd ? ptr[2] : 0)) { - /* Non-referencing groups and lookaheads just move the pointer on, and - then behave like a non-special bracket, except that they don't increment - the count of extracting brackets. Ditto for the "once only" bracket, - which is in Perl from version 5.005. */ - - case ':': - case '=': - case '!': - ptr += 2; - break; - - /* Else loop checking valid options until ) is met. Anything else is an - error. If we are without any brackets, i.e. at top level, the settings - act as if specified in the options, so massage the options immediately. - This is for backward compatibility with Perl 5.004. */ - - default: - errorcode = ERR12; - return -1; - } - } else - capturing = true; - - /* Capturing brackets must be counted so we can process escapes in a - Perlish way. If the number exceeds EXTRACT_BASIC_MAX we are going to need - an additional 3 bytes of memory per capturing bracket. */ - - if (capturing) { - bracount++; - if (bracount > EXTRACT_BASIC_MAX) - bracket_length += 3; - } - - /* Save length for computing whole length at end if there's a repeat that - requires duplication of the group. Also save the current value of - branch_extra, and start the new group with the new value. If non-zero, this - will either be 2 for a (?imsx: group, or 3 for a lookbehind assertion. */ - - if (brastackptr >= sizeof(brastack)/sizeof(int)) { - errorcode = ERR17; - return -1; - } - - bralenstack[brastackptr] = branch_extra; - branch_extra = branch_newextra; - - brastack[brastackptr++] = length; - length += bracket_length; - continue; - } - - /* Handle ket. Look for subsequent maxRepeats/minRepeats; for certain sets of values we - have to replicate this bracket up to that many times. If brastackptr is - 0 this is an unmatched bracket which will generate an error, but take care - not to try to access brastack[-1] when computing the length and restoring - the branch_extra value. */ - - case ')': { - int duplength; - length += KET_LEN; - if (brastackptr > 0) { - duplength = length - brastack[--brastackptr]; - branch_extra = bralenstack[brastackptr]; - } - else - duplength = 0; - - /* Leave ptr at the final char; for readRepeatCounts this happens - automatically; for the others we need an increment. */ - - if ((ptr + 1 < patternEnd) && (c = ptr[1]) == '{' && isCountedRepeat(ptr + 2, patternEnd)) { - ptr = readRepeatCounts(ptr + 2, &minRepeats, &maxRepeats, &errorcode); - if (errorcode) - return -1; - } else if (c == '*') { - minRepeats = 0; - maxRepeats = -1; - ptr++; - } else if (c == '+') { - minRepeats = 1; - maxRepeats = -1; - ptr++; - } else if (c == '?') { - minRepeats = 0; - maxRepeats = 1; - ptr++; - } else { - minRepeats = 1; - maxRepeats = 1; - } - - /* If the minimum is zero, we have to allow for an OP_BRAZERO before the - group, and if the maximum is greater than zero, we have to replicate - maxval-1 times; each replication acquires an OP_BRAZERO plus a nesting - bracket set. */ - - int repeatsLength; - if (minRepeats == 0) { - length++; - if (maxRepeats > 0) { - repeatsLength = multiplyWithOverflowCheck(maxRepeats - 1, duplength + BRA_LEN + KET_LEN + OPCODE_LEN); - if (repeatsLength < 0) { - errorcode = ERR16; - return -1; - } - length += repeatsLength; - if (length > MAX_PATTERN_SIZE) { - errorcode = ERR16; - return -1; - } - } - } - - /* When the minimum is greater than zero, we have to replicate up to - minval-1 times, with no additions required in the copies. Then, if there - is a limited maximum we have to replicate up to maxval-1 times allowing - for a BRAZERO item before each optional copy and nesting brackets for all - but one of the optional copies. */ - - else { - repeatsLength = multiplyWithOverflowCheck(minRepeats - 1, duplength); - if (repeatsLength < 0) { - errorcode = ERR16; - return -1; - } - length += repeatsLength; - if (maxRepeats > minRepeats) { /* Need this test as maxRepeats=-1 means no limit */ - repeatsLength = multiplyWithOverflowCheck(maxRepeats - minRepeats, duplength + BRAZERO_LEN + BRA_LEN + KET_LEN); - if (repeatsLength < 0) { - errorcode = ERR16; - return -1; - } - length += repeatsLength - (2 + 2 * LINK_SIZE); - } - if (length > MAX_PATTERN_SIZE) { - errorcode = ERR16; - return -1; - } - } - - /* Allow space for once brackets for "possessive quantifier" */ - - if (safelyCheckNextChar(ptr, patternEnd, '+')) { - ptr++; - length += 2 + 2 * LINK_SIZE; - } - continue; - } - - /* Non-special character. It won't be space or # in extended mode, so it is - always a genuine character. If we are in a \Q...\E sequence, check for the - end; if not, we have a literal. */ - - default: - NORMAL_CHAR: - length += 2; /* For a one-byte character */ - lastitemlength = 1; /* Default length of last item for repeats */ - - if (c > 127) { - int i; - for (i = 0; i < jsc_pcre_utf8_table1_size; i++) - if (c <= jsc_pcre_utf8_table1[i]) - break; - length += i; - lastitemlength += i; - } - - continue; - } - } - - length += KET_LEN + OPCODE_LEN; /* For final KET and END */ - - cd.numCapturingBrackets = bracount; - return length; -} - -/************************************************* -* Compile a Regular Expression * -*************************************************/ - -/* This function takes a string and returns a pointer to a block of store -holding a compiled version of the expression. The original API for this -function had no error code return variable; it is retained for backwards -compatibility. The new function is given a new name. - -Arguments: - pattern the regular expression - options various option bits - errorCodePtr pointer to error code variable (pcre_compile2() only) - can be NULL if you don't want a code value - error pointer to pointer to error text - erroroffset ptr offset in pattern where error was detected - tables pointer to character tables or NULL - -Returns: pointer to compiled data block, or NULL on error, - with error and erroroffset set -*/ - -static inline JSRegExp* returnError(ErrorCode errorcode, int *error) -{ - *error = static_cast(errorcode); - return 0; -} - -JSRegExp* jsRegExpCompile(const UChar* pattern, int patternLength, - JSRegExpIgnoreCaseOption ignoreCase, JSRegExpMultilineOption multiline, - unsigned* numSubpatterns, int *error) -{ - /* We can't pass back an error message if error is NULL; I guess the best we - can do is just return NULL, but we can set a code value if there is a code pointer. */ - if (!error) - return 0; - *error = 0; - - CompileData cd; - - ErrorCode errorcode = ERR0; - /* Call this once just to count the brackets. */ - calculateCompiledPatternLength(pattern, patternLength, ignoreCase, cd, errorcode); - /* Call it again to compute the length. */ - int length = calculateCompiledPatternLength(pattern, patternLength, ignoreCase, cd, errorcode); - if (errorcode) - return returnError(errorcode, error); - - if (length > MAX_PATTERN_SIZE) - return returnError(ERR16, error); - - size_t size = length + sizeof(JSRegExp); - JSRegExp* re = reinterpret_cast(js::OffTheBooks::array_new(size)); - if (!re) - return returnError(ERR13, error); - - re->options = (ignoreCase ? IgnoreCaseOption : 0) | (multiline ? MatchAcrossMultipleLinesOption : 0); - - /* The starting points of the name/number translation table and of the code are - passed around in the compile data block. */ - - const unsigned char* codeStart = (const unsigned char*)(re + 1); - - /* Set up a starting, non-extracting bracket, then compile the expression. On - error, errorcode will be set non-zero, so we don't need to look at the result - of the function here. */ - - const UChar* ptr = (const UChar*)pattern; - const UChar* patternEnd = pattern + patternLength; - unsigned char* code = const_cast(codeStart); - int firstByte, reqByte; - int bracketCount = 0; - if (!cd.needOuterBracket) - compileBranch(re->options, &bracketCount, &code, &ptr, patternEnd, &errorcode, &firstByte, &reqByte, cd); - else { - *code = OP_BRA; - unsigned char * const codeBefore = code; - compileBracket(re->options, &bracketCount, &code, &ptr, patternEnd, &errorcode, 2, &firstByte, &reqByte, cd); - JS_ASSERT((bracketCount & 0xff) == bracketCount); - put2ByteValue(codeBefore + 1 + LINK_SIZE, 0 << 8 | (bracketCount & 0xff)); - } - re->topBracket = bracketCount; - re->topBackref = cd.topBackref; - - /* If not reached end of pattern on success, there's an excess bracket. */ - - if (errorcode == 0 && ptr < patternEnd) - errorcode = ERR10; - - /* Fill in the terminating state and check for disastrous overflow, but - if debugging, leave the test till after things are printed out. */ - - *code++ = OP_END; - - JS_ASSERT(code - codeStart <= length); - if (code - codeStart > length) - errorcode = ERR7; - - /* Give an error if there's back reference to a non-existent capturing - subpattern. */ - - if (re->topBackref > re->topBracket) - errorcode = ERR15; - - /* Failed to compile, or error while post-processing */ - - if (errorcode != ERR0) { - js::Foreground::array_delete(reinterpret_cast(re)); - return returnError(errorcode, error); - } - - /* If the anchored option was not passed, set the flag if we can determine that - the pattern is anchored by virtue of ^ characters or \A or anything else (such - as starting with .* when DOTALL is set). - - Otherwise, if we know what the first character has to be, save it, because that - speeds up unanchored matches no end. If not, see if we can set the - UseMultiLineFirstByteOptimizationOption flag. This is helpful for multiline matches when all branches - start with ^. and also when all branches start with .* for non-DOTALL matches. - */ - - if (cd.needOuterBracket ? bracketIsAnchored(codeStart) : branchIsAnchored(codeStart)) - re->options |= IsAnchoredOption; - else { - if (firstByte < 0) { - firstByte = (cd.needOuterBracket - ? bracketFindFirstAssertedCharacter(codeStart, false) - : branchFindFirstAssertedCharacter(codeStart, false)) - | ((re->options & IgnoreCaseOption) ? REQ_IGNORE_CASE : 0); - } - if (firstByte >= 0) { - int ch = firstByte & 255; - if (ch < 127) { - re->firstByte = ((firstByte & REQ_IGNORE_CASE) && flipCase(ch) == ch) ? ch : firstByte; - re->options |= UseFirstByteOptimizationOption; - } - } else { - if (cd.needOuterBracket ? bracketNeedsLineStart(codeStart, 0, cd.backrefMap) : branchNeedsLineStart(codeStart, 0, cd.backrefMap)) - re->options |= UseMultiLineFirstByteOptimizationOption; - } - } - - /* For an anchored pattern, we use the "required byte" only if it follows a - variable length item in the regex. Remove the caseless flag for non-caseable - bytes. */ - - if (reqByte >= 0 && (!(re->options & IsAnchoredOption) || (reqByte & REQ_VARY))) { - int ch = reqByte & 255; - if (ch < 127) { - re->reqByte = ((reqByte & REQ_IGNORE_CASE) && flipCase(ch) == ch) ? (reqByte & ~REQ_IGNORE_CASE) : reqByte; - re->options |= UseRequiredByteOptimizationOption; - } - } - - if (numSubpatterns) - *numSubpatterns = re->topBracket; - - return re; -} - -void jsRegExpFree(JSRegExp* re) -{ - js::Foreground::array_delete(reinterpret_cast(re)); -} diff --git a/js/src/yarr/pcre/pcre_exec.cpp b/js/src/yarr/pcre/pcre_exec.cpp deleted file mode 100644 index c2d154d67b8e..000000000000 --- a/js/src/yarr/pcre/pcre_exec.cpp +++ /dev/null @@ -1,2193 +0,0 @@ -/* This is JavaScriptCore's variant of the PCRE library. While this library -started out as a copy of PCRE, many of the features of PCRE have been -removed. This library now supports only the regular expression features -required by the JavaScript language specification, and has only the functions -needed by JavaScriptCore and the rest of WebKit. - - Originally written by Philip Hazel - Copyright (c) 1997-2006 University of Cambridge - Copyright (C) 2002, 2004, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. - Copyright (C) 2007 Eric Seidel - ------------------------------------------------------------------------------ -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of the University of Cambridge nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. ------------------------------------------------------------------------------ -*/ - -/* This module contains jsRegExpExecute(), the externally visible function -that does pattern matching using an NFA algorithm, following the rules from -the JavaScript specification. There are also some supporting functions. */ - -#include "pcre_internal.h" - -#include -#include "yarr/jswtfbridge.h" -#include "yarr/wtf/ASCIICType.h" -#include "jsarena.h" -#include "jscntxt.h" - -using namespace WTF; - -#if !WTF_COMPILER_MSVC && !WTF_COMPILER_SUNPRO -#define USE_COMPUTED_GOTO_FOR_MATCH_RECURSION -#endif - -/* Note: Webkit sources have USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP disabled. */ -/* Note: There are hardcoded constants all over the place, but in the port of - Yarr to TraceMonkey two bytes are added to the OP_BRA* opcodes, so the - instruction stream now looks like this at the start of a bracket group: - - OP_BRA* [link:LINK_SIZE] [minNestedBracket,maxNestedBracket:2] - - Both capturing and non-capturing brackets encode this information. */ - -/* Avoid warnings on Windows. */ -#undef min -#undef max - -#ifndef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION -typedef int ReturnLocation; -#else -typedef void* ReturnLocation; -#endif - -/* Node on a stack of brackets. This is used to detect and reject - matches of the empty string per ECMAScript repeat match rules. This - also prevents infinite loops on quantified empty matches. One node - represents the start state at the start of this bracket group. */ -struct BracketChainNode { - BracketChainNode* previousBracket; - const UChar* bracketStart; - /* True if the minimum number of matches was already satisfied - when we started matching this group. */ - bool minSatisfied; -}; - -struct MatchFrame { - ReturnLocation returnLocation; - struct MatchFrame* previousFrame; - int *savedOffsets; - /* The frame allocates saved offsets into the regular expression arena pool so - that they can be restored during backtracking. */ - size_t savedOffsetsSize; - JSArenaPool *regExpPool; - - MatchFrame() : savedOffsetsSize(0), regExpPool(0) {} - void init(JSArenaPool *regExpPool) { this->regExpPool = regExpPool; } - - /* Function arguments that may change */ - struct { - const UChar* subjectPtr; - const unsigned char* instructionPtr; - int offsetTop; - BracketChainNode* bracketChain; - } args; - - - /* PCRE uses "fake" recursion built off of gotos, thus - stack-based local variables are not safe to use. Instead we have to - store local variables on the current MatchFrame. */ - struct { - const unsigned char* data; - const unsigned char* startOfRepeatingBracket; - const UChar* subjectPtrAtStartOfInstruction; // Several instrutions stash away a subjectPtr here for later compare - const unsigned char* instructionPtrAtStartOfOnce; - - int repeatOthercase; - int savedSubjectOffset; - - int ctype; - int fc; - int fi; - int length; - int max; - int number; - int offset; - int skipBytes; - int minBracket; - int limitBracket; - int bracketsBefore; - bool minSatisfied; - - BracketChainNode bracketChainNode; - } locals; - - void saveOffsets(int minBracket, int limitBracket, int *offsets, int offsetEnd) { - JS_ASSERT(regExpPool); - JS_ASSERT(minBracket >= 0); - JS_ASSERT(limitBracket >= minBracket); - JS_ASSERT(offsetEnd >= 0); - if (minBracket == limitBracket) - return; - const size_t newSavedOffsetCount = 3 * (limitBracket - minBracket); - /* Increase saved offset space if necessary. */ - { - size_t targetSize = sizeof(*savedOffsets) * newSavedOffsetCount; - if (savedOffsetsSize < targetSize) { - JS_ARENA_ALLOCATE_CAST(savedOffsets, int *, regExpPool, targetSize); - JS_ASSERT(savedOffsets); /* FIXME: error code, bug 574459. */ - savedOffsetsSize = targetSize; - } - } - for (unsigned i = 0; i < unsigned(limitBracket - minBracket); ++i) { - int bracketIter = minBracket + i; - JS_ASSERT(2 * bracketIter + 1 <= offsetEnd); - int start = offsets[2 * bracketIter]; - int end = offsets[2 * bracketIter + 1]; - JS_ASSERT(bracketIter <= offsetEnd); - int offset = offsets[offsetEnd - bracketIter]; - DPRINTF(("saving bracket %d; start: %d; end: %d; offset: %d\n", bracketIter, start, end, offset)); - JS_ASSERT(start <= end); - JS_ASSERT(i * 3 + 2 < newSavedOffsetCount); - savedOffsets[i * 3 + 0] = start; - savedOffsets[i * 3 + 1] = end; - savedOffsets[i * 3 + 2] = offset; - } - } - - void clobberOffsets(int minBracket, int limitBracket, int *offsets, int offsetEnd) { - for (int i = 0; i < limitBracket - minBracket; ++i) { - int bracketIter = minBracket + i; - JS_ASSERT(2 * bracketIter + 1 < offsetEnd); - offsets[2 * bracketIter + 0] = -1; - offsets[2 * bracketIter + 1] = -1; - } - } - - void restoreOffsets(int minBracket, int limitBracket, int *offsets, int offsetEnd) { - JS_ASSERT(regExpPool); - JS_ASSERT_IF(limitBracket > minBracket, savedOffsets); - for (int i = 0; i < limitBracket - minBracket; ++i) { - int bracketIter = minBracket + i; - int start = savedOffsets[i * 3 + 0]; - int end = savedOffsets[i * 3 + 1]; - int offset = savedOffsets[i * 3 + 2]; - DPRINTF(("restoring bracket %d; start: %d; end: %d; offset: %d\n", bracketIter, start, end, offset)); - JS_ASSERT(start <= end); - offsets[2 * bracketIter + 0] = start; - offsets[2 * bracketIter + 1] = end; - offsets[offsetEnd - bracketIter] = offset; - } - } - - /* Extract the bracket data after the current opcode/link at |instructionPtr| into the locals. */ - void extractBrackets(const unsigned char *instructionPtr) { - uint16 bracketMess = get2ByteValue(instructionPtr + 1 + LINK_SIZE); - locals.minBracket = (bracketMess >> 8) & 0xff; - locals.limitBracket = (bracketMess & 0xff); - JS_ASSERT(locals.minBracket <= locals.limitBracket); - } - - /* At the start of a bracketed group, add the current subject pointer to the - stack of such pointers, to be re-instated at the end of the group when we hit - the closing ket. When match() is called in other circumstances, we don't add to - this stack. */ - void startNewGroup(bool minSatisfied) { - locals.bracketChainNode.previousBracket = args.bracketChain; - locals.bracketChainNode.bracketStart = args.subjectPtr; - locals.bracketChainNode.minSatisfied = minSatisfied; - args.bracketChain = &locals.bracketChainNode; - } -}; - -/* Structure for passing "static" information around between the functions -doing traditional NFA matching, so that they are thread-safe. */ - -struct MatchData { - int *offsetVector; /* Offset vector */ - int offsetEnd; /* One past the end */ - int offsetMax; /* The maximum usable for return data */ - bool offsetOverflow; /* Set if too many extractions */ - const UChar *startSubject; /* Start of the subject string */ - const UChar *endSubject; /* End of the subject string */ - const UChar *endMatchPtr; /* Subject position at end match */ - int endOffsetTop; /* Highwater mark at end of match */ - bool multiline; - bool ignoreCase; - - void setOffsetPair(size_t pairNum, int start, int end) { - JS_ASSERT(int(2 * pairNum + 1) < offsetEnd && int(pairNum) < offsetEnd); - JS_ASSERT(start <= end); - JS_ASSERT_IF(start < 0, start == end && start == -1); - DPRINTF(("setting offset pair at %u (%d, %d)\n", pairNum, start, end)); - offsetVector[2 * pairNum + 0] = start; - offsetVector[2 * pairNum + 1] = end; - } -}; - -/* The maximum remaining length of subject we are prepared to search for a -reqByte match. */ - -#define REQ_BYTE_MAX 1000 - -/* The below limit restricts the number of "recursive" match calls in order to -avoid spending exponential time on complex regular expressions. */ - -static const unsigned matchLimit = 1000000; - -/************************************************* -* Match a back-reference * -*************************************************/ - -/* If a back reference hasn't been set, the length that is passed is greater -than the number of characters left in the string, so the match fails. - -Arguments: - offset index into the offset vector - subjectPtr points into the subject - length length to be matched - md points to match data block - -Returns: true if matched -*/ - -static bool matchRef(int offset, const UChar* subjectPtr, int length, const MatchData& md) -{ - const UChar* p = md.startSubject + md.offsetVector[offset]; - - /* Always fail if not enough characters left */ - - if (length > md.endSubject - subjectPtr) - return false; - - /* Separate the caselesss case for speed */ - - if (md.ignoreCase) { - while (length-- > 0) { - UChar c = *p++; - int othercase = jsc_pcre_ucp_othercase(c); - UChar d = *subjectPtr++; - if (c != d && othercase != d) - return false; - } - } - else { - while (length-- > 0) - if (*p++ != *subjectPtr++) - return false; - } - - return true; -} - -#ifndef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION - -/* Use numbered labels and switch statement at the bottom of the match function. */ - -#define RMATCH_WHERE(num) num -#define RRETURN_LABEL RRETURN_SWITCH - -#else - -/* Use GCC's computed goto extension. */ - -/* For one test case this is more than 40% faster than the switch statement. -We could avoid the use of the num argument entirely by using local labels, -but using it for the GCC case as well as the non-GCC case allows us to share -a bit more code and notice if we use conflicting numbers.*/ - -#define RMATCH_WHERE(num) JS_EXTENSION(&&RRETURN_##num) -#define RRETURN_LABEL *stack.currentFrame->returnLocation - -#endif - -#define RECURSIVE_MATCH_COMMON(num) \ - goto RECURSE;\ - RRETURN_##num: \ - stack.popCurrentFrame(); - -#define RECURSIVE_MATCH(num, ra, rb) \ - do { \ - stack.pushNewFrame((ra), (rb), RMATCH_WHERE(num)); \ - RECURSIVE_MATCH_COMMON(num) \ - } while (0) - -#define RECURSIVE_MATCH_NEW_GROUP(num, ra, rb, gm) \ - do { \ - stack.pushNewFrame((ra), (rb), RMATCH_WHERE(num)); \ - stack.currentFrame->startNewGroup(gm); \ - RECURSIVE_MATCH_COMMON(num) \ - } while (0) - -#define RRETURN do { JS_EXTENSION_(goto RRETURN_LABEL); } while (0) - -#define RRETURN_NO_MATCH do { isMatch = false; RRETURN; } while (0) - -/************************************************* -* Match from current position * -*************************************************/ - -/* On entry instructionPtr points to the first opcode, and subjectPtr to the first character -in the subject string, while substringStart holds the value of subjectPtr at the start of the -last bracketed group - used for breaking infinite loops matching zero-length -strings. This function is called recursively in many circumstances. Whenever it -returns a negative (error) response, the outer match() call must also return the -same response. - -Arguments: - subjectPtr pointer in subject - instructionPtr position in code - offsetTop current top pointer - md pointer to "static" info for the match - -Returns: 1 if matched ) these values are >= 0 - 0 if failed to match ) - a negative error value if aborted by an error condition - (e.g. stopped by repeated call or recursion limit) -*/ - -static const unsigned numFramesOnStack = 16; - -struct MatchStack { - JSArenaPool *regExpPool; - void *regExpPoolMark; - - MatchStack(JSArenaPool *regExpPool) - : regExpPool(regExpPool) - , regExpPoolMark(JS_ARENA_MARK(regExpPool)) - , framesEnd(frames + numFramesOnStack) - , currentFrame(frames) - , size(1) // match() creates accesses the first frame w/o calling pushNewFrame - { - JS_ASSERT((sizeof(frames) / sizeof(frames[0])) == numFramesOnStack); - JS_ASSERT(regExpPool); - for (size_t i = 0; i < numFramesOnStack; ++i) - frames[i].init(regExpPool); - } - - ~MatchStack() { JS_ARENA_RELEASE(regExpPool, regExpPoolMark); } - - MatchFrame frames[numFramesOnStack]; - MatchFrame* framesEnd; - MatchFrame* currentFrame; - unsigned size; - - bool canUseStackBufferForNextFrame() { - return size < numFramesOnStack; - } - - MatchFrame* allocateNextFrame() { - if (canUseStackBufferForNextFrame()) - return currentFrame + 1; - // FIXME: bug 574459 -- no NULL check - MatchFrame *frame = js::OffTheBooks::new_(); - frame->init(regExpPool); - return frame; - } - - void pushNewFrame(const unsigned char* instructionPtr, BracketChainNode* bracketChain, ReturnLocation returnLocation) { - MatchFrame* newframe = allocateNextFrame(); - newframe->previousFrame = currentFrame; - - newframe->args.subjectPtr = currentFrame->args.subjectPtr; - newframe->args.offsetTop = currentFrame->args.offsetTop; - newframe->args.instructionPtr = instructionPtr; - newframe->args.bracketChain = bracketChain; - newframe->returnLocation = returnLocation; - size++; - - currentFrame = newframe; - } - - void popCurrentFrame() { - MatchFrame* oldFrame = currentFrame; - currentFrame = currentFrame->previousFrame; - if (size > numFramesOnStack) - js::Foreground::delete_(oldFrame); - size--; - } - - void popAllFrames() { - while (size) - popCurrentFrame(); - } -}; - -static int matchError(int errorCode, MatchStack& stack) -{ - stack.popAllFrames(); - return errorCode; -} - -/* Get the next UTF-8 character, not advancing the pointer, incrementing length - if there are extra bytes. This is called when we know we are in UTF-8 mode. */ - -static inline void getUTF8CharAndIncrementLength(int& c, const unsigned char* subjectPtr, int& len) -{ - c = *subjectPtr; - if ((c & 0xc0) == 0xc0) { - int gcaa = jsc_pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */ - int gcss = 6 * gcaa; - c = (c & jsc_pcre_utf8_table3[gcaa]) << gcss; - for (int gcii = 1; gcii <= gcaa; gcii++) { - gcss -= 6; - c |= (subjectPtr[gcii] & 0x3f) << gcss; - } - len += gcaa; - } -} - -static inline void repeatInformationFromInstructionOffset(short instructionOffset, bool& minimize, int& minimumRepeats, int& maximumRepeats) -{ - // Instruction offsets are based off of OP_CRSTAR, OP_STAR, OP_TYPESTAR, OP_NOTSTAR - static const char minimumRepeatsFromInstructionOffset[] = { 0, 0, 1, 1, 0, 0 }; - static const int maximumRepeatsFromInstructionOffset[] = { INT_MAX, INT_MAX, INT_MAX, INT_MAX, 1, 1 }; - - JS_ASSERT(instructionOffset >= 0); - JS_ASSERT(instructionOffset <= (OP_CRMINQUERY - OP_CRSTAR)); - - minimize = (instructionOffset & 1); // this assumes ordering: Instruction, MinimizeInstruction, Instruction2, MinimizeInstruction2 - minimumRepeats = minimumRepeatsFromInstructionOffset[instructionOffset]; - maximumRepeats = maximumRepeatsFromInstructionOffset[instructionOffset]; -} - -/* Helper class for passing a flag value from one op to the next that runs. - This allows us to set the flag in certain ops. When the flag is read, it - will be true only if the previous op set the flag, otherwise it is false. */ -class LinearFlag { -public: - LinearFlag() : flag(false) {} - - bool readAndClear() { - bool rv = flag; - flag = false; - return rv; - } - - void set() { - flag = true; - } - -private: - bool flag; -}; - -static int -match(JSArenaPool *regExpPool, const UChar* subjectPtr, const unsigned char* instructionPtr, int offsetTop, MatchData& md) -{ - bool isMatch = false; - int min; - bool minimize = false; /* Initialization not really needed, but some compilers think so. */ - unsigned remainingMatchCount = matchLimit; - int othercase; /* Declare here to avoid errors during jumps */ - bool minSatisfied; - - MatchStack stack(regExpPool); - LinearFlag minSatNextBracket; - - /* The opcode jump table. */ -#ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP -#define EMIT_JUMP_TABLE_ENTRY(opcode) JS_EXTENSION(&&LABEL_OP_##opcode) - static void* opcodeJumpTable[256] = { FOR_EACH_OPCODE(EMIT_JUMP_TABLE_ENTRY) }; -#undef EMIT_JUMP_TABLE_ENTRY -#endif - - /* One-time setup of the opcode jump table. */ -#ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP - for (int i = 255; !opcodeJumpTable[i]; i--) - opcodeJumpTable[i] = &&CAPTURING_BRACKET; -#endif - -#ifdef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION - // Shark shows this as a hot line - // Using a static const here makes this line disappear, but makes later access hotter (not sure why) - stack.currentFrame->returnLocation = JS_EXTENSION(&&RETURN); -#else - stack.currentFrame->returnLocation = 0; -#endif - stack.currentFrame->args.subjectPtr = subjectPtr; - stack.currentFrame->args.instructionPtr = instructionPtr; - stack.currentFrame->args.offsetTop = offsetTop; - stack.currentFrame->args.bracketChain = 0; - stack.currentFrame->startNewGroup(false); - - /* This is where control jumps back to to effect "recursion" */ - -RECURSE: - if (!--remainingMatchCount) - return matchError(JSRegExpErrorHitLimit, stack); - - /* Now start processing the operations. */ - -#ifndef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP - while (true) -#endif - { - -#ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP -#define BEGIN_OPCODE(opcode) LABEL_OP_##opcode -#define NEXT_OPCODE goto *opcodeJumpTable[*stack.currentFrame->args.instructionPtr] -#else -#define BEGIN_OPCODE(opcode) case OP_##opcode -#define NEXT_OPCODE continue -#endif -#define LOCALS(__ident) (stack.currentFrame->locals.__ident) - -#ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP - NEXT_OPCODE; -#else - switch (*stack.currentFrame->args.instructionPtr) -#endif - { - /* Non-capturing bracket: optimized */ - - BEGIN_OPCODE(BRA): - NON_CAPTURING_BRACKET: - DPRINTF(("start non-capturing bracket\n")); - stack.currentFrame->extractBrackets(stack.currentFrame->args.instructionPtr); - /* If we see no ALT, we have to skip three bytes of bracket data (link plus nested - bracket data. */ - stack.currentFrame->locals.skipBytes = 3; - /* We must compute this value at the top, before we move the instruction pointer. */ - stack.currentFrame->locals.minSatisfied = minSatNextBracket.readAndClear(); - do { - /* We need to extract this into a variable so we can correctly pass it by value - through RECURSIVE_MATCH_NEW_GROUP, which modifies currentFrame. */ - minSatisfied = stack.currentFrame->locals.minSatisfied; - RECURSIVE_MATCH_NEW_GROUP(2, stack.currentFrame->args.instructionPtr + stack.currentFrame->locals.skipBytes + LINK_SIZE, stack.currentFrame->args.bracketChain, minSatisfied); - if (isMatch) { - DPRINTF(("non-capturing bracket succeeded\n")); - RRETURN; - } - stack.currentFrame->locals.skipBytes = 1; - stack.currentFrame->args.instructionPtr += getLinkValue(stack.currentFrame->args.instructionPtr + 1); - } while (*stack.currentFrame->args.instructionPtr == OP_ALT); - DPRINTF(("non-capturing bracket failed\n")); - for (size_t i = LOCALS(minBracket); i < size_t(LOCALS(limitBracket)); ++i) - md.setOffsetPair(i, -1, -1); - RRETURN; - - /* Skip over large extraction number data if encountered. */ - - BEGIN_OPCODE(BRANUMBER): - stack.currentFrame->args.instructionPtr += 3; - NEXT_OPCODE; - - /* End of the pattern. */ - - BEGIN_OPCODE(END): - md.endMatchPtr = stack.currentFrame->args.subjectPtr; /* Record where we ended */ - md.endOffsetTop = stack.currentFrame->args.offsetTop; /* and how many extracts were taken */ - isMatch = true; - RRETURN; - - /* Assertion brackets. Check the alternative branches in turn - the - matching won't pass the KET for an assertion. If any one branch matches, - the assertion is true. Lookbehind assertions have an OP_REVERSE item at the - start of each branch to move the current point backwards, so the code at - this level is identical to the lookahead case. */ - - BEGIN_OPCODE(ASSERT): - { - uint16 bracketMess = get2ByteValue(stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE); - LOCALS(minBracket) = (bracketMess >> 8) & 0xff; - LOCALS(limitBracket) = bracketMess & 0xff; - JS_ASSERT(LOCALS(minBracket) <= LOCALS(limitBracket)); - } - stack.currentFrame->locals.skipBytes = 3; - do { - RECURSIVE_MATCH_NEW_GROUP(6, stack.currentFrame->args.instructionPtr + stack.currentFrame->locals.skipBytes + LINK_SIZE, NULL, false); - if (isMatch) - break; - stack.currentFrame->locals.skipBytes = 1; - stack.currentFrame->args.instructionPtr += getLinkValue(stack.currentFrame->args.instructionPtr + 1); - } while (*stack.currentFrame->args.instructionPtr == OP_ALT); - if (*stack.currentFrame->args.instructionPtr == OP_KET) { - for (size_t i = LOCALS(minBracket); i < size_t(LOCALS(limitBracket)); ++i) - md.setOffsetPair(i, -1, -1); - RRETURN_NO_MATCH; - } - - /* Continue from after the assertion, updating the offsets high water - mark, since extracts may have been taken during the assertion. */ - - advanceToEndOfBracket(stack.currentFrame->args.instructionPtr); - stack.currentFrame->args.instructionPtr += 1 + LINK_SIZE; - stack.currentFrame->args.offsetTop = md.endOffsetTop; - NEXT_OPCODE; - - /* Negative assertion: all branches must fail to match */ - - BEGIN_OPCODE(ASSERT_NOT): - stack.currentFrame->locals.skipBytes = 3; - { - unsigned bracketMess = get2ByteValue(stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE); - LOCALS(minBracket) = (bracketMess >> 8) & 0xff; - LOCALS(limitBracket) = bracketMess & 0xff; - } - JS_ASSERT(LOCALS(minBracket) <= LOCALS(limitBracket)); - do { - RECURSIVE_MATCH_NEW_GROUP(7, stack.currentFrame->args.instructionPtr + stack.currentFrame->locals.skipBytes + LINK_SIZE, NULL, false); - if (isMatch) - RRETURN_NO_MATCH; - stack.currentFrame->locals.skipBytes = 1; - stack.currentFrame->args.instructionPtr += getLinkValue(stack.currentFrame->args.instructionPtr + 1); - } while (*stack.currentFrame->args.instructionPtr == OP_ALT); - - stack.currentFrame->args.instructionPtr += stack.currentFrame->locals.skipBytes + LINK_SIZE; - NEXT_OPCODE; - - /* An alternation is the end of a branch; scan along to find the end of the - bracketed group and go to there. */ - - BEGIN_OPCODE(ALT): - advanceToEndOfBracket(stack.currentFrame->args.instructionPtr); - NEXT_OPCODE; - - /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating - that it may occur zero times. It may repeat infinitely, or not at all - - i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper - repeat limits are compiled as a number of copies, with the optional ones - preceded by BRAZERO or BRAMINZERO. */ - - BEGIN_OPCODE(BRAZERO): { - stack.currentFrame->locals.startOfRepeatingBracket = stack.currentFrame->args.instructionPtr + 1; - stack.currentFrame->extractBrackets(stack.currentFrame->args.instructionPtr + 1); - stack.currentFrame->saveOffsets(LOCALS(minBracket), LOCALS(limitBracket), md.offsetVector, md.offsetEnd); - minSatNextBracket.set(); - RECURSIVE_MATCH_NEW_GROUP(14, stack.currentFrame->locals.startOfRepeatingBracket, stack.currentFrame->args.bracketChain, true); - if (isMatch) - RRETURN; - stack.currentFrame->restoreOffsets(LOCALS(minBracket), LOCALS(limitBracket), md.offsetVector, md.offsetEnd); - advanceToEndOfBracket(stack.currentFrame->locals.startOfRepeatingBracket); - stack.currentFrame->args.instructionPtr = stack.currentFrame->locals.startOfRepeatingBracket + 1 + LINK_SIZE; - NEXT_OPCODE; - } - - BEGIN_OPCODE(BRAMINZERO): { - stack.currentFrame->locals.startOfRepeatingBracket = stack.currentFrame->args.instructionPtr + 1; - advanceToEndOfBracket(stack.currentFrame->locals.startOfRepeatingBracket); - RECURSIVE_MATCH_NEW_GROUP(15, stack.currentFrame->locals.startOfRepeatingBracket + 1 + LINK_SIZE, stack.currentFrame->args.bracketChain, false); - if (isMatch) - RRETURN; - stack.currentFrame->args.instructionPtr++; - NEXT_OPCODE; - } - - /* End of a group, repeated or non-repeating. If we are at the end of - an assertion "group", stop matching and return 1, but record the - current high water mark for use by positive assertions. Do this also - for the "once" (not-backup up) groups. */ - - BEGIN_OPCODE(KET): - BEGIN_OPCODE(KETRMIN): - BEGIN_OPCODE(KETRMAX): - stack.currentFrame->locals.instructionPtrAtStartOfOnce = stack.currentFrame->args.instructionPtr - getLinkValue(stack.currentFrame->args.instructionPtr + 1); - stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.bracketChain->bracketStart; - stack.currentFrame->locals.minSatisfied = stack.currentFrame->args.bracketChain->minSatisfied; - - /* Back up the stack of bracket start pointers. */ - - stack.currentFrame->args.bracketChain = stack.currentFrame->args.bracketChain->previousBracket; - - if (*stack.currentFrame->locals.instructionPtrAtStartOfOnce == OP_ASSERT || *stack.currentFrame->locals.instructionPtrAtStartOfOnce == OP_ASSERT_NOT) { - md.endOffsetTop = stack.currentFrame->args.offsetTop; - isMatch = true; - RRETURN; - } - - /* In all other cases except a conditional group we have to check the - group number back at the start and if necessary complete handling an - extraction by setting the offsets and bumping the high water mark. */ - - stack.currentFrame->locals.number = *stack.currentFrame->locals.instructionPtrAtStartOfOnce - OP_BRA; - - /* For extended extraction brackets (large number), we have to fish out - the number from a dummy opcode at the start. */ - - if (stack.currentFrame->locals.number > EXTRACT_BASIC_MAX) - stack.currentFrame->locals.number = get2ByteValue(stack.currentFrame->locals.instructionPtrAtStartOfOnce + 4 + LINK_SIZE); - stack.currentFrame->locals.offset = 2 * stack.currentFrame->locals.number; - - DPRINTF(("end bracket %d\n", stack.currentFrame->locals.number)); - - /* Test for a numbered group. This includes groups called as a result - of recursion. Note that whole-pattern recursion is coded as a recurse - into group 0, so it won't be picked up here. Instead, we catch it when - the OP_END is reached. */ - - if (stack.currentFrame->locals.number > 0) { - if (stack.currentFrame->locals.offset >= md.offsetMax) - md.offsetOverflow = true; - else { - int start = md.offsetVector[md.offsetEnd - stack.currentFrame->locals.number]; - int end = stack.currentFrame->args.subjectPtr - md.startSubject; - if (start == end && stack.currentFrame->locals.minSatisfied) { - DPRINTF(("empty string while group already matched; bailing")); - RRETURN_NO_MATCH; - } - DPRINTF(("saving; start: %d; end: %d\n", start, end)); - JS_ASSERT(start <= end); - md.setOffsetPair(stack.currentFrame->locals.number, start, end); - if (stack.currentFrame->args.offsetTop <= stack.currentFrame->locals.offset) - stack.currentFrame->args.offsetTop = stack.currentFrame->locals.offset + 2; - } - } - - /* For a non-repeating ket, just continue at this level. This also - happens for a repeating ket if no characters were matched in the group. - This is the forcible breaking of infinite loops as implemented in Perl - 5.005. If there is an options reset, it will get obeyed in the normal - course of events. */ - - if (*stack.currentFrame->args.instructionPtr == OP_KET || stack.currentFrame->args.subjectPtr == stack.currentFrame->locals.subjectPtrAtStartOfInstruction) { - DPRINTF(("non-repeating ket or empty match\n")); - if (stack.currentFrame->args.subjectPtr == stack.currentFrame->locals.subjectPtrAtStartOfInstruction && stack.currentFrame->locals.minSatisfied) { - DPRINTF(("empty string while group already matched; bailing")); - RRETURN_NO_MATCH; - } - stack.currentFrame->args.instructionPtr += 1 + LINK_SIZE; - NEXT_OPCODE; - } - - /* The repeating kets try the rest of the pattern or restart from the - preceding bracket, in the appropriate order. */ - - stack.currentFrame->extractBrackets(LOCALS(instructionPtrAtStartOfOnce)); - JS_ASSERT_IF(LOCALS(number), LOCALS(minBracket) <= LOCALS(number) && LOCALS(number) < LOCALS(limitBracket)); - if (*stack.currentFrame->args.instructionPtr == OP_KETRMIN) { - stack.currentFrame->saveOffsets(LOCALS(minBracket), LOCALS(limitBracket), md.offsetVector, md.offsetEnd); - RECURSIVE_MATCH(16, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.bracketChain); - if (isMatch) - RRETURN; - else - stack.currentFrame->restoreOffsets(LOCALS(minBracket), LOCALS(limitBracket), md.offsetVector, md.offsetEnd); - DPRINTF(("recursively matching lazy group\n")); - minSatNextBracket.set(); - RECURSIVE_MATCH_NEW_GROUP(17, LOCALS(instructionPtrAtStartOfOnce), stack.currentFrame->args.bracketChain, true); - } else { /* OP_KETRMAX */ - stack.currentFrame->saveOffsets(LOCALS(minBracket), LOCALS(limitBracket), md.offsetVector, md.offsetEnd); - stack.currentFrame->clobberOffsets(LOCALS(minBracket), LOCALS(limitBracket), md.offsetVector, md.offsetEnd); - DPRINTF(("recursively matching greedy group\n")); - minSatNextBracket.set(); - RECURSIVE_MATCH_NEW_GROUP(18, LOCALS(instructionPtrAtStartOfOnce), stack.currentFrame->args.bracketChain, true); - if (isMatch) - RRETURN; - else - stack.currentFrame->restoreOffsets(LOCALS(minBracket), LOCALS(limitBracket), md.offsetVector, md.offsetEnd); - RECURSIVE_MATCH(19, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.bracketChain); - } - RRETURN; - - /* Start of subject. */ - - BEGIN_OPCODE(CIRC): - if (stack.currentFrame->args.subjectPtr != md.startSubject) - RRETURN_NO_MATCH; - stack.currentFrame->args.instructionPtr++; - NEXT_OPCODE; - - /* After internal newline if multiline. */ - - BEGIN_OPCODE(BOL): - if (stack.currentFrame->args.subjectPtr != md.startSubject && !isNewline(stack.currentFrame->args.subjectPtr[-1])) - RRETURN_NO_MATCH; - stack.currentFrame->args.instructionPtr++; - NEXT_OPCODE; - - /* End of subject. */ - - BEGIN_OPCODE(DOLL): - if (stack.currentFrame->args.subjectPtr < md.endSubject) - RRETURN_NO_MATCH; - stack.currentFrame->args.instructionPtr++; - NEXT_OPCODE; - - /* Before internal newline if multiline. */ - - BEGIN_OPCODE(EOL): - if (stack.currentFrame->args.subjectPtr < md.endSubject && !isNewline(*stack.currentFrame->args.subjectPtr)) - RRETURN_NO_MATCH; - stack.currentFrame->args.instructionPtr++; - NEXT_OPCODE; - - /* Word boundary assertions */ - - BEGIN_OPCODE(NOT_WORD_BOUNDARY): - BEGIN_OPCODE(WORD_BOUNDARY): { - bool currentCharIsWordChar = false; - bool previousCharIsWordChar = false; - - if (stack.currentFrame->args.subjectPtr > md.startSubject) - previousCharIsWordChar = isWordChar(stack.currentFrame->args.subjectPtr[-1]); - if (stack.currentFrame->args.subjectPtr < md.endSubject) - currentCharIsWordChar = isWordChar(*stack.currentFrame->args.subjectPtr); - - /* Now see if the situation is what we want */ - bool wordBoundaryDesired = (*stack.currentFrame->args.instructionPtr++ == OP_WORD_BOUNDARY); - if (wordBoundaryDesired ? currentCharIsWordChar == previousCharIsWordChar : currentCharIsWordChar != previousCharIsWordChar) - RRETURN_NO_MATCH; - NEXT_OPCODE; - } - - /* Match a single character type; inline for speed */ - - BEGIN_OPCODE(NOT_NEWLINE): - if (stack.currentFrame->args.subjectPtr >= md.endSubject) - RRETURN_NO_MATCH; - if (isNewline(*stack.currentFrame->args.subjectPtr++)) - RRETURN_NO_MATCH; - stack.currentFrame->args.instructionPtr++; - NEXT_OPCODE; - - BEGIN_OPCODE(NOT_DIGIT): - if (stack.currentFrame->args.subjectPtr >= md.endSubject) - RRETURN_NO_MATCH; - if (isASCIIDigit(*stack.currentFrame->args.subjectPtr++)) - RRETURN_NO_MATCH; - stack.currentFrame->args.instructionPtr++; - NEXT_OPCODE; - - BEGIN_OPCODE(DIGIT): - if (stack.currentFrame->args.subjectPtr >= md.endSubject) - RRETURN_NO_MATCH; - if (!isASCIIDigit(*stack.currentFrame->args.subjectPtr++)) - RRETURN_NO_MATCH; - stack.currentFrame->args.instructionPtr++; - NEXT_OPCODE; - - BEGIN_OPCODE(NOT_WHITESPACE): - if (stack.currentFrame->args.subjectPtr >= md.endSubject) - RRETURN_NO_MATCH; - if (isSpaceChar(*stack.currentFrame->args.subjectPtr++)) - RRETURN_NO_MATCH; - stack.currentFrame->args.instructionPtr++; - NEXT_OPCODE; - - BEGIN_OPCODE(WHITESPACE): - if (stack.currentFrame->args.subjectPtr >= md.endSubject) - RRETURN_NO_MATCH; - if (!isSpaceChar(*stack.currentFrame->args.subjectPtr++)) - RRETURN_NO_MATCH; - stack.currentFrame->args.instructionPtr++; - NEXT_OPCODE; - - BEGIN_OPCODE(NOT_WORDCHAR): - if (stack.currentFrame->args.subjectPtr >= md.endSubject) - RRETURN_NO_MATCH; - if (isWordChar(*stack.currentFrame->args.subjectPtr++)) - RRETURN_NO_MATCH; - stack.currentFrame->args.instructionPtr++; - NEXT_OPCODE; - - BEGIN_OPCODE(WORDCHAR): - if (stack.currentFrame->args.subjectPtr >= md.endSubject) - RRETURN_NO_MATCH; - if (!isWordChar(*stack.currentFrame->args.subjectPtr++)) - RRETURN_NO_MATCH; - stack.currentFrame->args.instructionPtr++; - NEXT_OPCODE; - - /* Match a back reference, possibly repeatedly. Look past the end of the - item to see if there is repeat information following. The code is similar - to that for character classes, but repeated for efficiency. Then obey - similar code to character type repeats - written out again for speed. - However, if the referenced string is the empty string, always treat - it as matched, any number of times (otherwise there could be infinite - loops). */ - - BEGIN_OPCODE(REF): - stack.currentFrame->locals.offset = get2ByteValue(stack.currentFrame->args.instructionPtr + 1) << 1; /* Doubled ref number */ - stack.currentFrame->args.instructionPtr += 3; /* Advance past item */ - - /* If the reference is unset, set the length to be longer than the amount - of subject left; this ensures that every attempt at a match fails. We - can't just fail here, because of the possibility of quantifiers with zero - minima. */ - - if (stack.currentFrame->locals.offset >= stack.currentFrame->args.offsetTop || md.offsetVector[stack.currentFrame->locals.offset] < 0) - stack.currentFrame->locals.length = 0; - else - stack.currentFrame->locals.length = md.offsetVector[stack.currentFrame->locals.offset+1] - md.offsetVector[stack.currentFrame->locals.offset]; - - /* Set up for repetition, or handle the non-repeated case */ - - switch (*stack.currentFrame->args.instructionPtr) { - case OP_CRSTAR: - case OP_CRMINSTAR: - case OP_CRPLUS: - case OP_CRMINPLUS: - case OP_CRQUERY: - case OP_CRMINQUERY: - repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_CRSTAR, minimize, min, stack.currentFrame->locals.max); - break; - - case OP_CRRANGE: - case OP_CRMINRANGE: - minimize = (*stack.currentFrame->args.instructionPtr == OP_CRMINRANGE); - min = get2ByteValue(stack.currentFrame->args.instructionPtr + 1); - stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 3); - if (stack.currentFrame->locals.max == 0) - stack.currentFrame->locals.max = INT_MAX; - stack.currentFrame->args.instructionPtr += 5; - break; - - default: /* No repeat follows */ - if (!matchRef(stack.currentFrame->locals.offset, stack.currentFrame->args.subjectPtr, stack.currentFrame->locals.length, md)) - RRETURN_NO_MATCH; - stack.currentFrame->args.subjectPtr += stack.currentFrame->locals.length; - NEXT_OPCODE; - } - - /* If the length of the reference is zero, just continue with the - main loop. */ - - if (stack.currentFrame->locals.length == 0) - NEXT_OPCODE; - - /* First, ensure the minimum number of matches are present. */ - - for (int i = 1; i <= min; i++) { - if (!matchRef(stack.currentFrame->locals.offset, stack.currentFrame->args.subjectPtr, stack.currentFrame->locals.length, md)) - RRETURN_NO_MATCH; - stack.currentFrame->args.subjectPtr += stack.currentFrame->locals.length; - } - - /* If min = max, continue at the same level without recursion. - They are not both allowed to be zero. */ - - if (min == stack.currentFrame->locals.max) - NEXT_OPCODE; - - /* If minimizing, keep trying and advancing the pointer */ - - if (minimize) { - for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) { - RECURSIVE_MATCH(20, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain); - if (isMatch) - RRETURN; - if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || !matchRef(stack.currentFrame->locals.offset, stack.currentFrame->args.subjectPtr, stack.currentFrame->locals.length, md)) - RRETURN; - stack.currentFrame->args.subjectPtr += stack.currentFrame->locals.length; - } - /* Control never reaches here */ - } - - /* If maximizing, find the longest string and work backwards */ - - else { - stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr; - for (int i = min; i < stack.currentFrame->locals.max; i++) { - if (!matchRef(stack.currentFrame->locals.offset, stack.currentFrame->args.subjectPtr, stack.currentFrame->locals.length, md)) - break; - stack.currentFrame->args.subjectPtr += stack.currentFrame->locals.length; - } - while (stack.currentFrame->args.subjectPtr >= stack.currentFrame->locals.subjectPtrAtStartOfInstruction) { - RECURSIVE_MATCH(21, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain); - if (isMatch) - RRETURN; - stack.currentFrame->args.subjectPtr -= stack.currentFrame->locals.length; - } - RRETURN_NO_MATCH; - } - /* Control never reaches here */ - - /* Match a bit-mapped character class, possibly repeatedly. This op code is - used when all the characters in the class have values in the range 0-255, - and either the matching is caseful, or the characters are in the range - 0-127 when UTF-8 processing is enabled. The only difference between - OP_CLASS and OP_NCLASS occurs when a data character outside the range is - encountered. - - First, look past the end of the item to see if there is repeat information - following. Then obey similar code to character type repeats - written out - again for speed. */ - - BEGIN_OPCODE(NCLASS): - BEGIN_OPCODE(CLASS): - stack.currentFrame->locals.data = stack.currentFrame->args.instructionPtr + 1; /* Save for matching */ - stack.currentFrame->args.instructionPtr += 33; /* Advance past the item */ - - switch (*stack.currentFrame->args.instructionPtr) { - case OP_CRSTAR: - case OP_CRMINSTAR: - case OP_CRPLUS: - case OP_CRMINPLUS: - case OP_CRQUERY: - case OP_CRMINQUERY: - repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_CRSTAR, minimize, min, stack.currentFrame->locals.max); - break; - - case OP_CRRANGE: - case OP_CRMINRANGE: - minimize = (*stack.currentFrame->args.instructionPtr == OP_CRMINRANGE); - min = get2ByteValue(stack.currentFrame->args.instructionPtr + 1); - stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 3); - if (stack.currentFrame->locals.max == 0) - stack.currentFrame->locals.max = INT_MAX; - stack.currentFrame->args.instructionPtr += 5; - break; - - default: /* No repeat follows */ - min = stack.currentFrame->locals.max = 1; - break; - } - - /* First, ensure the minimum number of matches are present. */ - - for (int i = 1; i <= min; i++) { - if (stack.currentFrame->args.subjectPtr >= md.endSubject) - RRETURN_NO_MATCH; - int c = *stack.currentFrame->args.subjectPtr++; - if (c > 255) { - if (stack.currentFrame->locals.data[-1] == OP_CLASS) - RRETURN_NO_MATCH; - } else { - if (!(stack.currentFrame->locals.data[c / 8] & (1 << (c & 7)))) - RRETURN_NO_MATCH; - } - } - - /* If max == min we can continue with the main loop without the - need to recurse. */ - - if (min == stack.currentFrame->locals.max) - NEXT_OPCODE; - - /* If minimizing, keep testing the rest of the expression and advancing - the pointer while it matches the class. */ - if (minimize) { - for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) { - RECURSIVE_MATCH(22, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain); - if (isMatch) - RRETURN; - if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject) - RRETURN; - int c = *stack.currentFrame->args.subjectPtr++; - if (c > 255) { - if (stack.currentFrame->locals.data[-1] == OP_CLASS) - RRETURN; - } else { - if ((stack.currentFrame->locals.data[c/8] & (1 << (c&7))) == 0) - RRETURN; - } - } - /* Control never reaches here */ - } - /* If maximizing, find the longest possible run, then work backwards. */ - else { - stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr; - - for (int i = min; i < stack.currentFrame->locals.max; i++) { - if (stack.currentFrame->args.subjectPtr >= md.endSubject) - break; - int c = *stack.currentFrame->args.subjectPtr; - if (c > 255) { - if (stack.currentFrame->locals.data[-1] == OP_CLASS) - break; - } else { - if (!(stack.currentFrame->locals.data[c / 8] & (1 << (c & 7)))) - break; - } - ++stack.currentFrame->args.subjectPtr; - } - for (;;) { - RECURSIVE_MATCH(24, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain); - if (isMatch) - RRETURN; - if (stack.currentFrame->args.subjectPtr-- == stack.currentFrame->locals.subjectPtrAtStartOfInstruction) - break; /* Stop if tried at original pos */ - } - - RRETURN; - } - /* Control never reaches here */ - - /* Match an extended character class. */ - - BEGIN_OPCODE(XCLASS): - stack.currentFrame->locals.data = stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE; /* Save for matching */ - stack.currentFrame->args.instructionPtr += getLinkValue(stack.currentFrame->args.instructionPtr + 1); /* Advance past the item */ - - switch (*stack.currentFrame->args.instructionPtr) { - case OP_CRSTAR: - case OP_CRMINSTAR: - case OP_CRPLUS: - case OP_CRMINPLUS: - case OP_CRQUERY: - case OP_CRMINQUERY: - repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_CRSTAR, minimize, min, stack.currentFrame->locals.max); - break; - - case OP_CRRANGE: - case OP_CRMINRANGE: - minimize = (*stack.currentFrame->args.instructionPtr == OP_CRMINRANGE); - min = get2ByteValue(stack.currentFrame->args.instructionPtr + 1); - stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 3); - if (stack.currentFrame->locals.max == 0) - stack.currentFrame->locals.max = INT_MAX; - stack.currentFrame->args.instructionPtr += 5; - break; - - default: /* No repeat follows */ - min = stack.currentFrame->locals.max = 1; - } - - /* First, ensure the minimum number of matches are present. */ - - for (int i = 1; i <= min; i++) { - if (stack.currentFrame->args.subjectPtr >= md.endSubject) - RRETURN_NO_MATCH; - int c = *stack.currentFrame->args.subjectPtr++; - if (!jsc_pcre_xclass(c, stack.currentFrame->locals.data)) - RRETURN_NO_MATCH; - } - - /* If max == min we can continue with the main loop without the - need to recurse. */ - - if (min == stack.currentFrame->locals.max) - NEXT_OPCODE; - - /* If minimizing, keep testing the rest of the expression and advancing - the pointer while it matches the class. */ - - if (minimize) { - for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) { - RECURSIVE_MATCH(26, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain); - if (isMatch) - RRETURN; - if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject) - RRETURN; - int c = *stack.currentFrame->args.subjectPtr++; - if (!jsc_pcre_xclass(c, stack.currentFrame->locals.data)) - RRETURN; - } - /* Control never reaches here */ - } - - /* If maximizing, find the longest possible run, then work backwards. */ - - else { - stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr; - for (int i = min; i < stack.currentFrame->locals.max; i++) { - if (stack.currentFrame->args.subjectPtr >= md.endSubject) - break; - int c = *stack.currentFrame->args.subjectPtr; - if (!jsc_pcre_xclass(c, stack.currentFrame->locals.data)) - break; - ++stack.currentFrame->args.subjectPtr; - } - for(;;) { - RECURSIVE_MATCH(27, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain); - if (isMatch) - RRETURN; - if (stack.currentFrame->args.subjectPtr-- == stack.currentFrame->locals.subjectPtrAtStartOfInstruction) - break; /* Stop if tried at original pos */ - } - RRETURN; - } - - /* Control never reaches here */ - - /* Match a single character, casefully */ - - BEGIN_OPCODE(CHAR): - stack.currentFrame->locals.length = 1; - stack.currentFrame->args.instructionPtr++; - getUTF8CharAndIncrementLength(stack.currentFrame->locals.fc, stack.currentFrame->args.instructionPtr, stack.currentFrame->locals.length); - stack.currentFrame->args.instructionPtr += stack.currentFrame->locals.length; - if (stack.currentFrame->args.subjectPtr >= md.endSubject) - RRETURN_NO_MATCH; - if (stack.currentFrame->locals.fc != *stack.currentFrame->args.subjectPtr++) - RRETURN_NO_MATCH; - NEXT_OPCODE; - - /* Match a single character, caselessly */ - - BEGIN_OPCODE(CHAR_IGNORING_CASE): { - stack.currentFrame->locals.length = 1; - stack.currentFrame->args.instructionPtr++; - getUTF8CharAndIncrementLength(stack.currentFrame->locals.fc, stack.currentFrame->args.instructionPtr, stack.currentFrame->locals.length); - stack.currentFrame->args.instructionPtr += stack.currentFrame->locals.length; - if (stack.currentFrame->args.subjectPtr >= md.endSubject) - RRETURN_NO_MATCH; - int dc = *stack.currentFrame->args.subjectPtr++; - if (stack.currentFrame->locals.fc != dc && jsc_pcre_ucp_othercase(stack.currentFrame->locals.fc) != dc) - RRETURN_NO_MATCH; - NEXT_OPCODE; - } - - /* Match a single ASCII character. */ - - BEGIN_OPCODE(ASCII_CHAR): - if (md.endSubject == stack.currentFrame->args.subjectPtr) - RRETURN_NO_MATCH; - if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->args.instructionPtr[1]) - RRETURN_NO_MATCH; - ++stack.currentFrame->args.subjectPtr; - stack.currentFrame->args.instructionPtr += 2; - NEXT_OPCODE; - - /* Match one of two cases of an ASCII letter. */ - - BEGIN_OPCODE(ASCII_LETTER_IGNORING_CASE): - if (md.endSubject == stack.currentFrame->args.subjectPtr) - RRETURN_NO_MATCH; - if ((*stack.currentFrame->args.subjectPtr | 0x20) != stack.currentFrame->args.instructionPtr[1]) - RRETURN_NO_MATCH; - ++stack.currentFrame->args.subjectPtr; - stack.currentFrame->args.instructionPtr += 2; - NEXT_OPCODE; - - /* Match a single character repeatedly; different opcodes share code. */ - - BEGIN_OPCODE(EXACT): - min = stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 1); - minimize = false; - stack.currentFrame->args.instructionPtr += 3; - goto REPEATCHAR; - - BEGIN_OPCODE(UPTO): - BEGIN_OPCODE(MINUPTO): - min = 0; - stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 1); - minimize = *stack.currentFrame->args.instructionPtr == OP_MINUPTO; - stack.currentFrame->args.instructionPtr += 3; - goto REPEATCHAR; - - BEGIN_OPCODE(STAR): - BEGIN_OPCODE(MINSTAR): - BEGIN_OPCODE(PLUS): - BEGIN_OPCODE(MINPLUS): - BEGIN_OPCODE(QUERY): - BEGIN_OPCODE(MINQUERY): - repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_STAR, minimize, min, stack.currentFrame->locals.max); - - /* Common code for all repeated single-character matches. We can give - up quickly if there are fewer than the minimum number of characters left in - the subject. */ - - REPEATCHAR: - - stack.currentFrame->locals.length = 1; - getUTF8CharAndIncrementLength(stack.currentFrame->locals.fc, stack.currentFrame->args.instructionPtr, stack.currentFrame->locals.length); - if (min * (stack.currentFrame->locals.fc > 0xFFFF ? 2 : 1) > md.endSubject - stack.currentFrame->args.subjectPtr) - RRETURN_NO_MATCH; - stack.currentFrame->args.instructionPtr += stack.currentFrame->locals.length; - - if (stack.currentFrame->locals.fc <= 0xFFFF) { - othercase = md.ignoreCase ? jsc_pcre_ucp_othercase(stack.currentFrame->locals.fc) : -1; - - for (int i = 1; i <= min; i++) { - if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc && *stack.currentFrame->args.subjectPtr != othercase) - RRETURN_NO_MATCH; - ++stack.currentFrame->args.subjectPtr; - } - - if (min == stack.currentFrame->locals.max) - NEXT_OPCODE; - - if (minimize) { - stack.currentFrame->locals.repeatOthercase = othercase; - for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) { - RECURSIVE_MATCH(28, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain); - if (isMatch) - RRETURN; - if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject) - RRETURN; - if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc && *stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.repeatOthercase) - RRETURN; - ++stack.currentFrame->args.subjectPtr; - } - /* Control never reaches here */ - } else { - stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr; - for (int i = min; i < stack.currentFrame->locals.max; i++) { - if (stack.currentFrame->args.subjectPtr >= md.endSubject) - break; - if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc && *stack.currentFrame->args.subjectPtr != othercase) - break; - ++stack.currentFrame->args.subjectPtr; - } - while (stack.currentFrame->args.subjectPtr >= stack.currentFrame->locals.subjectPtrAtStartOfInstruction) { - RECURSIVE_MATCH(29, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain); - if (isMatch) - RRETURN; - --stack.currentFrame->args.subjectPtr; - } - RRETURN_NO_MATCH; - } - /* Control never reaches here */ - } else { - /* No case on surrogate pairs, so no need to bother with "othercase". */ - - for (int i = 1; i <= min; i++) { - if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc) - RRETURN_NO_MATCH; - stack.currentFrame->args.subjectPtr += 2; - } - - if (min == stack.currentFrame->locals.max) - NEXT_OPCODE; - - if (minimize) { - for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) { - RECURSIVE_MATCH(30, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain); - if (isMatch) - RRETURN; - if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject) - RRETURN; - if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc) - RRETURN; - stack.currentFrame->args.subjectPtr += 2; - } - /* Control never reaches here */ - } else { - stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr; - for (int i = min; i < stack.currentFrame->locals.max; i++) { - if (stack.currentFrame->args.subjectPtr > md.endSubject - 2) - break; - if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc) - break; - stack.currentFrame->args.subjectPtr += 2; - } - while (stack.currentFrame->args.subjectPtr >= stack.currentFrame->locals.subjectPtrAtStartOfInstruction) { - RECURSIVE_MATCH(31, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain); - if (isMatch) - RRETURN; - stack.currentFrame->args.subjectPtr -= 2; - } - RRETURN_NO_MATCH; - } - /* Control never reaches here */ - } - /* Control never reaches here */ - - /* Match a negated single one-byte character. */ - - BEGIN_OPCODE(NOT): { - if (stack.currentFrame->args.subjectPtr >= md.endSubject) - RRETURN_NO_MATCH; - int b = stack.currentFrame->args.instructionPtr[1]; - int c = *stack.currentFrame->args.subjectPtr++; - stack.currentFrame->args.instructionPtr += 2; - if (md.ignoreCase) { - if (c < 128) - c = toLowerCase(c); - if (toLowerCase(b) == c) - RRETURN_NO_MATCH; - } else { - if (b == c) - RRETURN_NO_MATCH; - } - NEXT_OPCODE; - } - - /* Match a negated single one-byte character repeatedly. This is almost a - repeat of the code for a repeated single character, but I haven't found a - nice way of commoning these up that doesn't require a test of the - positive/negative option for each character match. Maybe that wouldn't add - very much to the time taken, but character matching *is* what this is all - about... */ - - BEGIN_OPCODE(NOTEXACT): - min = stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 1); - minimize = false; - stack.currentFrame->args.instructionPtr += 3; - goto REPEATNOTCHAR; - - BEGIN_OPCODE(NOTUPTO): - BEGIN_OPCODE(NOTMINUPTO): - min = 0; - stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 1); - minimize = *stack.currentFrame->args.instructionPtr == OP_NOTMINUPTO; - stack.currentFrame->args.instructionPtr += 3; - goto REPEATNOTCHAR; - - BEGIN_OPCODE(NOTSTAR): - BEGIN_OPCODE(NOTMINSTAR): - BEGIN_OPCODE(NOTPLUS): - BEGIN_OPCODE(NOTMINPLUS): - BEGIN_OPCODE(NOTQUERY): - BEGIN_OPCODE(NOTMINQUERY): - repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_NOTSTAR, minimize, min, stack.currentFrame->locals.max); - - /* Common code for all repeated single-byte matches. We can give up quickly - if there are fewer than the minimum number of bytes left in the - subject. */ - - REPEATNOTCHAR: - if (min > md.endSubject - stack.currentFrame->args.subjectPtr) - RRETURN_NO_MATCH; - stack.currentFrame->locals.fc = *stack.currentFrame->args.instructionPtr++; - - /* The code is duplicated for the caseless and caseful cases, for speed, - since matching characters is likely to be quite common. First, ensure the - minimum number of matches are present. If min = max, continue at the same - level without recursing. Otherwise, if minimizing, keep trying the rest of - the expression and advancing one matching character if failing, up to the - maximum. Alternatively, if maximizing, find the maximum number of - characters and work backwards. */ - - DPRINTF(("negative matching %c{%d,%d}\n", stack.currentFrame->locals.fc, min, stack.currentFrame->locals.max)); - - if (md.ignoreCase) { - if (stack.currentFrame->locals.fc < 128) - stack.currentFrame->locals.fc = toLowerCase(stack.currentFrame->locals.fc); - - for (int i = 1; i <= min; i++) { - int d = *stack.currentFrame->args.subjectPtr++; - if (d < 128) - d = toLowerCase(d); - if (stack.currentFrame->locals.fc == d) - RRETURN_NO_MATCH; - } - - if (min == stack.currentFrame->locals.max) - NEXT_OPCODE; - - if (minimize) { - for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) { - RECURSIVE_MATCH(38, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain); - if (isMatch) - RRETURN; - int d = *stack.currentFrame->args.subjectPtr++; - if (d < 128) - d = toLowerCase(d); - if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject || stack.currentFrame->locals.fc == d) - RRETURN; - } - /* Control never reaches here */ - } - - /* Maximize case */ - - else { - stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr; - - for (int i = min; i < stack.currentFrame->locals.max; i++) { - if (stack.currentFrame->args.subjectPtr >= md.endSubject) - break; - int d = *stack.currentFrame->args.subjectPtr; - if (d < 128) - d = toLowerCase(d); - if (stack.currentFrame->locals.fc == d) - break; - ++stack.currentFrame->args.subjectPtr; - } - for (;;) { - RECURSIVE_MATCH(40, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain); - if (isMatch) - RRETURN; - if (stack.currentFrame->args.subjectPtr-- == stack.currentFrame->locals.subjectPtrAtStartOfInstruction) - break; /* Stop if tried at original pos */ - } - - RRETURN; - } - /* Control never reaches here */ - } - - /* Caseful comparisons */ - - else { - for (int i = 1; i <= min; i++) { - int d = *stack.currentFrame->args.subjectPtr++; - if (stack.currentFrame->locals.fc == d) - RRETURN_NO_MATCH; - } - - if (min == stack.currentFrame->locals.max) - NEXT_OPCODE; - - if (minimize) { - for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) { - RECURSIVE_MATCH(42, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain); - if (isMatch) - RRETURN; - int d = *stack.currentFrame->args.subjectPtr++; - if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject || stack.currentFrame->locals.fc == d) - RRETURN; - } - /* Control never reaches here */ - } - - /* Maximize case */ - - else { - stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr; - - for (int i = min; i < stack.currentFrame->locals.max; i++) { - if (stack.currentFrame->args.subjectPtr >= md.endSubject) - break; - int d = *stack.currentFrame->args.subjectPtr; - if (stack.currentFrame->locals.fc == d) - break; - ++stack.currentFrame->args.subjectPtr; - } - for (;;) { - RECURSIVE_MATCH(44, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain); - if (isMatch) - RRETURN; - if (stack.currentFrame->args.subjectPtr-- == stack.currentFrame->locals.subjectPtrAtStartOfInstruction) - break; /* Stop if tried at original pos */ - } - - RRETURN; - } - } - /* Control never reaches here */ - - /* Match a single character type repeatedly; several different opcodes - share code. This is very similar to the code for single characters, but we - repeat it in the interests of efficiency. */ - - BEGIN_OPCODE(TYPEEXACT): - min = stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 1); - minimize = true; - stack.currentFrame->args.instructionPtr += 3; - goto REPEATTYPE; - - BEGIN_OPCODE(TYPEUPTO): - BEGIN_OPCODE(TYPEMINUPTO): - min = 0; - stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 1); - minimize = *stack.currentFrame->args.instructionPtr == OP_TYPEMINUPTO; - stack.currentFrame->args.instructionPtr += 3; - goto REPEATTYPE; - - BEGIN_OPCODE(TYPESTAR): - BEGIN_OPCODE(TYPEMINSTAR): - BEGIN_OPCODE(TYPEPLUS): - BEGIN_OPCODE(TYPEMINPLUS): - BEGIN_OPCODE(TYPEQUERY): - BEGIN_OPCODE(TYPEMINQUERY): - repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_TYPESTAR, minimize, min, stack.currentFrame->locals.max); - - /* Common code for all repeated single character type matches. Note that - in UTF-8 mode, '.' matches a character of any length, but for the other - character types, the valid characters are all one-byte long. */ - - REPEATTYPE: - stack.currentFrame->locals.ctype = *stack.currentFrame->args.instructionPtr++; /* Code for the character type */ - - /* First, ensure the minimum number of matches are present. Use inline - code for maximizing the speed, and do the type test once at the start - (i.e. keep it out of the loop). Also we can test that there are at least - the minimum number of characters before we start. */ - - if (min > md.endSubject - stack.currentFrame->args.subjectPtr) - RRETURN_NO_MATCH; - if (min > 0) { - switch (stack.currentFrame->locals.ctype) { - case OP_NOT_NEWLINE: - for (int i = 1; i <= min; i++) { - if (isNewline(*stack.currentFrame->args.subjectPtr)) - RRETURN_NO_MATCH; - ++stack.currentFrame->args.subjectPtr; - } - break; - - case OP_NOT_DIGIT: - for (int i = 1; i <= min; i++) { - if (isASCIIDigit(*stack.currentFrame->args.subjectPtr)) - RRETURN_NO_MATCH; - ++stack.currentFrame->args.subjectPtr; - } - break; - - case OP_DIGIT: - for (int i = 1; i <= min; i++) { - if (!isASCIIDigit(*stack.currentFrame->args.subjectPtr)) - RRETURN_NO_MATCH; - ++stack.currentFrame->args.subjectPtr; - } - break; - - case OP_NOT_WHITESPACE: - for (int i = 1; i <= min; i++) { - if (isSpaceChar(*stack.currentFrame->args.subjectPtr)) - RRETURN_NO_MATCH; - ++stack.currentFrame->args.subjectPtr; - } - break; - - case OP_WHITESPACE: - for (int i = 1; i <= min; i++) { - if (!isSpaceChar(*stack.currentFrame->args.subjectPtr)) - RRETURN_NO_MATCH; - ++stack.currentFrame->args.subjectPtr; - } - break; - - case OP_NOT_WORDCHAR: - for (int i = 1; i <= min; i++) { - if (isWordChar(*stack.currentFrame->args.subjectPtr)) - RRETURN_NO_MATCH; - ++stack.currentFrame->args.subjectPtr; - } - break; - - case OP_WORDCHAR: - for (int i = 1; i <= min; i++) { - if (!isWordChar(*stack.currentFrame->args.subjectPtr)) - RRETURN_NO_MATCH; - ++stack.currentFrame->args.subjectPtr; - } - break; - - default: - JS_NOT_REACHED("Invalid character type."); - return matchError(JSRegExpErrorInternal, stack); - } /* End switch(stack.currentFrame->locals.ctype) */ - } - - /* If min = max, continue at the same level without recursing */ - - if (min == stack.currentFrame->locals.max) - NEXT_OPCODE; - - /* If minimizing, we have to test the rest of the pattern before each - subsequent match. */ - - if (minimize) { - for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) { - RECURSIVE_MATCH(48, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain); - if (isMatch) - RRETURN; - if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject) - RRETURN; - - int c = *stack.currentFrame->args.subjectPtr++; - switch (stack.currentFrame->locals.ctype) { - case OP_NOT_NEWLINE: - if (isNewline(c)) - RRETURN; - break; - - case OP_NOT_DIGIT: - if (isASCIIDigit(c)) - RRETURN; - break; - - case OP_DIGIT: - if (!isASCIIDigit(c)) - RRETURN; - break; - - case OP_NOT_WHITESPACE: - if (isSpaceChar(c)) - RRETURN; - break; - - case OP_WHITESPACE: - if (!isSpaceChar(c)) - RRETURN; - break; - - case OP_NOT_WORDCHAR: - if (isWordChar(c)) - RRETURN; - break; - - case OP_WORDCHAR: - if (!isWordChar(c)) - RRETURN; - break; - - default: - JS_NOT_REACHED("Invalid character type."); - return matchError(JSRegExpErrorInternal, stack); - } - } - /* Control never reaches here */ - } - - /* If maximizing it is worth using inline code for speed, doing the type - test once at the start (i.e. keep it out of the loop). */ - - else { - stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr; /* Remember where we started */ - - switch (stack.currentFrame->locals.ctype) { - case OP_NOT_NEWLINE: - for (int i = min; i < stack.currentFrame->locals.max; i++) { - if (stack.currentFrame->args.subjectPtr >= md.endSubject || isNewline(*stack.currentFrame->args.subjectPtr)) - break; - stack.currentFrame->args.subjectPtr++; - } - break; - - case OP_NOT_DIGIT: - for (int i = min; i < stack.currentFrame->locals.max; i++) { - if (stack.currentFrame->args.subjectPtr >= md.endSubject) - break; - int c = *stack.currentFrame->args.subjectPtr; - if (isASCIIDigit(c)) - break; - ++stack.currentFrame->args.subjectPtr; - } - break; - - case OP_DIGIT: - for (int i = min; i < stack.currentFrame->locals.max; i++) { - if (stack.currentFrame->args.subjectPtr >= md.endSubject) - break; - int c = *stack.currentFrame->args.subjectPtr; - if (!isASCIIDigit(c)) - break; - ++stack.currentFrame->args.subjectPtr; - } - break; - - case OP_NOT_WHITESPACE: - for (int i = min; i < stack.currentFrame->locals.max; i++) { - if (stack.currentFrame->args.subjectPtr >= md.endSubject) - break; - int c = *stack.currentFrame->args.subjectPtr; - if (isSpaceChar(c)) - break; - ++stack.currentFrame->args.subjectPtr; - } - break; - - case OP_WHITESPACE: - for (int i = min; i < stack.currentFrame->locals.max; i++) { - if (stack.currentFrame->args.subjectPtr >= md.endSubject) - break; - int c = *stack.currentFrame->args.subjectPtr; - if (!isSpaceChar(c)) - break; - ++stack.currentFrame->args.subjectPtr; - } - break; - - case OP_NOT_WORDCHAR: - for (int i = min; i < stack.currentFrame->locals.max; i++) { - if (stack.currentFrame->args.subjectPtr >= md.endSubject) - break; - int c = *stack.currentFrame->args.subjectPtr; - if (isWordChar(c)) - break; - ++stack.currentFrame->args.subjectPtr; - } - break; - - case OP_WORDCHAR: - for (int i = min; i < stack.currentFrame->locals.max; i++) { - if (stack.currentFrame->args.subjectPtr >= md.endSubject) - break; - int c = *stack.currentFrame->args.subjectPtr; - if (!isWordChar(c)) - break; - ++stack.currentFrame->args.subjectPtr; - } - break; - - default: - JS_NOT_REACHED("Invalid character type."); - return matchError(JSRegExpErrorInternal, stack); - } - - /* stack.currentFrame->args.subjectPtr is now past the end of the maximum run */ - - for (;;) { - RECURSIVE_MATCH(52, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain); - if (isMatch) - RRETURN; - if (stack.currentFrame->args.subjectPtr-- == stack.currentFrame->locals.subjectPtrAtStartOfInstruction) - break; /* Stop if tried at original pos */ - } - - /* Get here if we can't make it match with any permitted repetitions */ - - RRETURN; - } - /* Control never reaches here */ - - BEGIN_OPCODE(CRMINPLUS): - BEGIN_OPCODE(CRMINQUERY): - BEGIN_OPCODE(CRMINRANGE): - BEGIN_OPCODE(CRMINSTAR): - BEGIN_OPCODE(CRPLUS): - BEGIN_OPCODE(CRQUERY): - BEGIN_OPCODE(CRRANGE): - BEGIN_OPCODE(CRSTAR): - JS_NOT_REACHED("Invalid opcode."); - return matchError(JSRegExpErrorInternal, stack); - -#ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP - CAPTURING_BRACKET: -#else - default: -#endif - /* Opening capturing bracket. If there is space in the offset vector, save - the current subject position in the working slot at the top of the vector. We - mustn't change the current values of the data slot, because they may be set - from a previous iteration of this group, and be referred to by a reference - inside the group. - - If the bracket fails to match, we need to restore this value and also the - values of the final offsets, in case they were set by a previous iteration of - the same bracket. - - If there isn't enough space in the offset vector, treat this as if it were a - non-capturing bracket. Don't worry about setting the flag for the error case - here; that is handled in the code for KET. */ - - JS_ASSERT(*stack.currentFrame->args.instructionPtr > OP_BRA); - - LOCALS(number) = *stack.currentFrame->args.instructionPtr - OP_BRA; - stack.currentFrame->extractBrackets(stack.currentFrame->args.instructionPtr); - DPRINTF(("opening capturing bracket %d\n", stack.currentFrame->locals.number)); - - /* For extended extraction brackets (large number), we have to fish out the - number from a dummy opcode at the start. */ - - if (stack.currentFrame->locals.number > EXTRACT_BASIC_MAX) - stack.currentFrame->locals.number = get2ByteValue(stack.currentFrame->args.instructionPtr + 4 + LINK_SIZE); - stack.currentFrame->locals.offset = 2 * stack.currentFrame->locals.number; - - JS_ASSERT_IF(LOCALS(number), LOCALS(minBracket) <= LOCALS(number) && LOCALS(number) < LOCALS(limitBracket)); - - if (stack.currentFrame->locals.offset < md.offsetMax) { - stack.currentFrame->locals.savedSubjectOffset = md.offsetVector[md.offsetEnd - stack.currentFrame->locals.number]; - DPRINTF(("setting subject offset for bracket to %d\n", stack.currentFrame->args.subjectPtr - md.startSubject)); - md.offsetVector[md.offsetEnd - stack.currentFrame->locals.number] = stack.currentFrame->args.subjectPtr - md.startSubject; - stack.currentFrame->locals.skipBytes = 3; /* For OP_BRAs. */ - - /* We must compute this value at the top, before we move the instruction pointer. */ - stack.currentFrame->locals.minSatisfied = minSatNextBracket.readAndClear(); - do { - /* We need to extract this into a variable so we can correctly pass it by value - through RECURSIVE_MATCH_NEW_GROUP, which modifies currentFrame. */ - minSatisfied = stack.currentFrame->locals.minSatisfied; - RECURSIVE_MATCH_NEW_GROUP(1, stack.currentFrame->args.instructionPtr + stack.currentFrame->locals.skipBytes + LINK_SIZE, stack.currentFrame->args.bracketChain, minSatisfied); - if (isMatch) - RRETURN; - stack.currentFrame->locals.skipBytes = 1; /* For OP_ALTs. */ - stack.currentFrame->args.instructionPtr += getLinkValue(stack.currentFrame->args.instructionPtr + 1); - } while (*stack.currentFrame->args.instructionPtr == OP_ALT); - - DPRINTF(("bracket %d failed\n", stack.currentFrame->locals.number)); - for (size_t i = LOCALS(minBracket); i < size_t(LOCALS(limitBracket)); ++i) - md.setOffsetPair(i, -1, -1); - DPRINTF(("restoring subject offset for bracket to %d\n", stack.currentFrame->locals.savedSubjectOffset)); - md.offsetVector[md.offsetEnd - stack.currentFrame->locals.number] = stack.currentFrame->locals.savedSubjectOffset; - - RRETURN; - } - - /* Insufficient room for saving captured contents */ - - goto NON_CAPTURING_BRACKET; - } - - /* Do not stick any code in here without much thought; it is assumed - that "continue" in the code above comes out to here to repeat the main - loop. */ - - } /* End of main loop */ - - JS_NOT_REACHED("Loop does not fallthru."); - -#ifndef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION - -RRETURN_SWITCH: - switch (stack.currentFrame->returnLocation) { - case 0: goto RETURN; - case 1: goto RRETURN_1; - case 2: goto RRETURN_2; - case 6: goto RRETURN_6; - case 7: goto RRETURN_7; - case 14: goto RRETURN_14; - case 15: goto RRETURN_15; - case 16: goto RRETURN_16; - case 17: goto RRETURN_17; - case 18: goto RRETURN_18; - case 19: goto RRETURN_19; - case 20: goto RRETURN_20; - case 21: goto RRETURN_21; - case 22: goto RRETURN_22; - case 24: goto RRETURN_24; - case 26: goto RRETURN_26; - case 27: goto RRETURN_27; - case 28: goto RRETURN_28; - case 29: goto RRETURN_29; - case 30: goto RRETURN_30; - case 31: goto RRETURN_31; - case 38: goto RRETURN_38; - case 40: goto RRETURN_40; - case 42: goto RRETURN_42; - case 44: goto RRETURN_44; - case 48: goto RRETURN_48; - case 52: goto RRETURN_52; - } - - JS_NOT_REACHED("Bad computed return location."); - return matchError(JSRegExpErrorInternal, stack); - -#endif - -RETURN: - return isMatch; -} - - -/************************************************* -* Execute a Regular Expression * -*************************************************/ - -/* This function applies a compiled re to a subject string and picks out -portions of the string if it matches. Two elements in the vector are set for -each substring: the offsets to the start and end of the substring. - -Arguments: - re points to the compiled expression - extra_data points to extra data or is NULL - subject points to the subject string - length length of subject string (may contain binary zeros) - start_offset where to start in the subject string - options option bits - offsets points to a vector of ints to be filled in with offsets - offsetCount the number of elements in the vector - -Returns: > 0 => success; value is the number of elements filled in - = 0 => success, but offsets is not big enough - -1 => failed to match - < -1 => some kind of unexpected problem -*/ - -static void tryFirstByteOptimization(const UChar*& subjectPtr, const UChar* endSubject, int firstByte, bool firstByteIsCaseless, bool useMultiLineFirstCharOptimization, const UChar* originalSubjectStart) -{ - // If firstByte is set, try scanning to the first instance of that byte - // no need to try and match against any earlier part of the subject string. - if (firstByte >= 0) { - UChar firstChar = firstByte; - if (firstByteIsCaseless) - while (subjectPtr < endSubject) { - int c = *subjectPtr; - if (c > 127) - break; - if (toLowerCase(c) == firstChar) - break; - subjectPtr++; - } - else { - while (subjectPtr < endSubject && *subjectPtr != firstChar) - subjectPtr++; - } - } else if (useMultiLineFirstCharOptimization) { - /* Or to just after \n for a multiline match if possible */ - // I'm not sure why this != originalSubjectStart check is necessary -- ecs 11/18/07 - if (subjectPtr > originalSubjectStart) { - while (subjectPtr < endSubject && !isNewline(subjectPtr[-1])) - subjectPtr++; - } - } -} - -static bool tryRequiredByteOptimization(const UChar*& subjectPtr, const UChar* endSubject, int reqByte, int reqByte2, bool reqByteIsCaseless, bool hasFirstByte, const UChar*& reqBytePtr) -{ - /* If reqByte is set, we know that that character must appear in the subject - for the match to succeed. If the first character is set, reqByte must be - later in the subject; otherwise the test starts at the match point. This - optimization can save a huge amount of backtracking in patterns with nested - unlimited repeats that aren't going to match. Writing separate code for - cased/caseless versions makes it go faster, as does using an autoincrement - and backing off on a match. - - HOWEVER: when the subject string is very, very long, searching to its end can - take a long time, and give bad performance on quite ordinary patterns. This - showed up when somebody was matching /^C/ on a 32-megabyte string... so we - don't do this when the string is sufficiently long. - */ - - if (reqByte >= 0 && endSubject - subjectPtr < REQ_BYTE_MAX) { - const UChar* p = subjectPtr + (hasFirstByte ? 1 : 0); - - /* We don't need to repeat the search if we haven't yet reached the - place we found it at last time. */ - - if (p > reqBytePtr) { - if (reqByteIsCaseless) { - while (p < endSubject) { - int pp = *p++; - if (pp == reqByte || pp == reqByte2) { - p--; - break; - } - } - } else { - while (p < endSubject) { - if (*p++ == reqByte) { - p--; - break; - } - } - } - - /* If we can't find the required character, break the matching loop */ - - if (p >= endSubject) - return true; - - /* If we have found the required character, save the point where we - found it, so that we don't search again next time round the loop if - the start hasn't passed this character yet. */ - - reqBytePtr = p; - } - } - return false; -} - -int jsRegExpExecute(JSContext *cx, const JSRegExp* re, - const UChar* subject, int length, int start_offset, int* offsets, - int offsetCount) -{ - JS_ASSERT(re); - JS_ASSERT(subject || !length); - JS_ASSERT(offsetCount >= 0); - JS_ASSERT(offsets || offsetCount == 0); - - MatchData matchBlock; - matchBlock.startSubject = subject; - matchBlock.endSubject = matchBlock.startSubject + length; - const UChar* endSubject = matchBlock.endSubject; - - matchBlock.multiline = (re->options & MatchAcrossMultipleLinesOption); - matchBlock.ignoreCase = (re->options & IgnoreCaseOption); - - /* Use the vector supplied, rounding down its size to a multiple of 3. */ - int ocount = offsetCount - (offsetCount % 3); - - matchBlock.offsetVector = offsets; - matchBlock.offsetEnd = ocount; - matchBlock.offsetMax = (2*ocount)/3; - matchBlock.offsetOverflow = false; - - /* Compute the minimum number of offsets that we need to reset each time. Doing - this makes a huge difference to execution time when there aren't many brackets - in the pattern. */ - - int resetCount = 2 + re->topBracket * 2; - if (resetCount > offsetCount) - resetCount = ocount; - - /* Reset the working variable associated with each extraction. These should - never be used unless previously set, but they get saved and restored, and so we - initialize them to avoid reading uninitialized locations. */ - - if (matchBlock.offsetVector) { - int* iptr = matchBlock.offsetVector + ocount; - int* iend = iptr - resetCount/2 + 1; - while (--iptr >= iend) - *iptr = -1; - } - - /* Set up the first character to match, if available. The firstByte value is - never set for an anchored regular expression, but the anchoring may be forced - at run time, so we have to test for anchoring. The first char may be unset for - an unanchored pattern, of course. If there's no first char and the pattern was - studied, there may be a bitmap of possible first characters. */ - - bool firstByteIsCaseless = false; - int firstByte = -1; - if (re->options & UseFirstByteOptimizationOption) { - firstByte = re->firstByte & 255; - if ((firstByteIsCaseless = (re->firstByte & REQ_IGNORE_CASE))) - firstByte = toLowerCase(firstByte); - } - - /* For anchored or unanchored matches, there may be a "last known required - character" set. */ - - bool reqByteIsCaseless = false; - int reqByte = -1; - int reqByte2 = -1; - if (re->options & UseRequiredByteOptimizationOption) { - reqByte = re->reqByte & 255; - reqByteIsCaseless = (re->reqByte & REQ_IGNORE_CASE); - reqByte2 = flipCase(reqByte); - } - - /* Loop for handling unanchored repeated matching attempts; for anchored regexs - the loop runs just once. */ - - const UChar* startMatch = subject + start_offset; - const UChar* reqBytePtr = startMatch - 1; - bool useMultiLineFirstCharOptimization = re->options & UseMultiLineFirstByteOptimizationOption; - - do { - /* Reset the maximum number of extractions we might see. */ - if (matchBlock.offsetVector) { - int* iptr = matchBlock.offsetVector; - int* iend = iptr + resetCount; - while (iptr < iend) - *iptr++ = -1; - } - - tryFirstByteOptimization(startMatch, endSubject, firstByte, firstByteIsCaseless, useMultiLineFirstCharOptimization, matchBlock.startSubject + start_offset); - if (tryRequiredByteOptimization(startMatch, endSubject, reqByte, reqByte2, reqByteIsCaseless, firstByte >= 0, reqBytePtr)) - break; - - /* When a match occurs, substrings will be set for all internal extractions; - we just need to set up the whole thing as substring 0 before returning. If - there were too many extractions, set the return code to zero. In the case - where we had to get some local store to hold offsets for backreferences, copy - those back references that we can. In this case there need not be overflow - if certain parts of the pattern were not used. */ - - /* The code starts after the JSRegExp block and the capture name table. */ - const unsigned char* start_code = (const unsigned char*)(re + 1); - - int returnCode = match(&cx->regExpPool, startMatch, start_code, 2, matchBlock); - - /* When the result is no match, advance the pointer to the next character - and continue. */ - if (returnCode == 0) { - startMatch++; - continue; - } - - if (returnCode != 1) { - JS_ASSERT(returnCode == JSRegExpErrorHitLimit); - DPRINTF((">>>> error: returning %d\n", returnCode)); - return returnCode; - } - - /* We have a match! */ - - returnCode = matchBlock.offsetOverflow ? 0 : matchBlock.endOffsetTop / 2; - - if (offsetCount < 2) - returnCode = 0; - else { - offsets[0] = startMatch - matchBlock.startSubject; - offsets[1] = matchBlock.endMatchPtr - matchBlock.startSubject; - } - - JS_ASSERT(returnCode >= 0); - DPRINTF((">>>> returning %d\n", returnCode)); - return returnCode; - } while (!(re->options & IsAnchoredOption) && startMatch <= endSubject); - - DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n")); - return JSRegExpErrorNoMatch; -} diff --git a/js/src/yarr/pcre/pcre_internal.h b/js/src/yarr/pcre/pcre_internal.h deleted file mode 100644 index d677cfcfa255..000000000000 --- a/js/src/yarr/pcre/pcre_internal.h +++ /dev/null @@ -1,434 +0,0 @@ -/* This is JavaScriptCore's variant of the PCRE library. While this library -started out as a copy of PCRE, many of the features of PCRE have been -removed. This library now supports only the regular expression features -required by the JavaScript language specification, and has only the functions -needed by JavaScriptCore and the rest of WebKit. - - Originally written by Philip Hazel - Copyright (c) 1997-2006 University of Cambridge - Copyright (C) 2002, 2004, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. - ------------------------------------------------------------------------------ -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of the University of Cambridge nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. ------------------------------------------------------------------------------ -*/ - -/* This header contains definitions that are shared between the different -modules, but which are not relevant to the exported API. This includes some -functions whose names all begin with "_pcre_". */ - -#ifndef PCRE_INTERNAL_H -#define PCRE_INTERNAL_H - -/* Bit definitions for entries in the pcre_ctypes table. */ - -#define ctype_space 0x01 -#define ctype_xdigit 0x08 -#define ctype_word 0x10 /* alphameric or '_' */ - -/* Offsets for the bitmap tables in pcre_cbits. Each table contains a set -of bits for a class map. Some classes are built by combining these tables. */ - -#define cbit_space 0 /* \s */ -#define cbit_digit 32 /* \d */ -#define cbit_word 64 /* \w */ -#define cbit_length 96 /* Length of the cbits table */ - -/* Offsets of the various tables from the base tables pointer, and -total length. */ - -#define lcc_offset 0 -#define fcc_offset 128 -#define cbits_offset 256 -#define ctypes_offset (cbits_offset + cbit_length) -#define tables_length (ctypes_offset + 128) - -#ifndef DFTABLES - -#include "pcre.h" - -/* The value of LINK_SIZE determines the number of bytes used to store links as -offsets within the compiled regex. The default is 2, which allows for compiled -patterns up to 64K long. */ - -#define LINK_SIZE 3 - -/* Use a macro for debugging printing, 'cause that eliminates the use of #ifdef -inline, and there are *still* stupid compilers about that don't like indented -pre-processor statements, or at least there were when I first wrote this. After -all, it had only been about 10 years then... */ - -#ifdef DEBUG -#define DPRINTF(p) /*printf p; fflush(stdout);*/ -#else -#define DPRINTF(p) /*nothing*/ -#endif - -/* PCRE keeps offsets in its compiled code as 2-byte quantities (always stored -in big-endian order) by default. These are used, for example, to link from the -start of a subpattern to its alternatives and its end. The use of 2 bytes per -offset limits the size of the compiled regex to around 64K, which is big enough -for almost everybody. However, I received a request for an even bigger limit. -For this reason, and also to make the code easier to maintain, the storing and -loading of offsets from the byte string is now handled by the functions that are -defined here. */ - -/* PCRE uses some other 2-byte quantities that do not change when the size of -offsets changes. There are used for repeat counts and for other things such as -capturing parenthesis numbers in back references. */ - -static inline void put2ByteValue(unsigned char* opcodePtr, int value) -{ - JS_ASSERT(value >= 0 && value <= 0xFFFF); - opcodePtr[0] = value >> 8; - opcodePtr[1] = value; -} - -static inline void put3ByteValue(unsigned char* opcodePtr, int value) -{ - JS_ASSERT(value >= 0 && value <= 0xFFFFFF); - opcodePtr[0] = value >> 16; - opcodePtr[1] = value >> 8; - opcodePtr[2] = value; -} - -static inline int get2ByteValue(const unsigned char* opcodePtr) -{ - return (opcodePtr[0] << 8) | opcodePtr[1]; -} - -static inline int get3ByteValue(const unsigned char* opcodePtr) -{ - return (opcodePtr[0] << 16) | (opcodePtr[1] << 8) | opcodePtr[2]; -} - -static inline void put2ByteValueAndAdvance(unsigned char*& opcodePtr, int value) -{ - put2ByteValue(opcodePtr, value); - opcodePtr += 2; -} - -static inline void put3ByteValueAndAdvance(unsigned char*& opcodePtr, int value) -{ - put3ByteValue(opcodePtr, value); - opcodePtr += 3; -} - -static inline void putLinkValueAllowZero(unsigned char* opcodePtr, int value) -{ -#if LINK_SIZE == 3 - put3ByteValue(opcodePtr, value); -#elif LINK_SIZE == 2 - put2ByteValue(opcodePtr, value); -#else -# error LINK_SIZE not supported. -#endif -} - -static inline int getLinkValueAllowZero(const unsigned char* opcodePtr) -{ -#if LINK_SIZE == 3 - return get3ByteValue(opcodePtr); -#elif LINK_SIZE == 2 - return get2ByteValue(opcodePtr); -#else -# error LINK_SIZE not supported. -#endif -} - -#define MAX_PATTERN_SIZE 1024 * 1024 // Derived by empirical testing of compile time in PCRE and WREC. -JS_STATIC_ASSERT(MAX_PATTERN_SIZE < (1 << (8 * LINK_SIZE))); - -static inline void putLinkValue(unsigned char* opcodePtr, int value) -{ - JS_ASSERT(value); - putLinkValueAllowZero(opcodePtr, value); -} - -static inline int getLinkValue(const unsigned char* opcodePtr) -{ - int value = getLinkValueAllowZero(opcodePtr); - JS_ASSERT(value); - return value; -} - -static inline void putLinkValueAndAdvance(unsigned char*& opcodePtr, int value) -{ - putLinkValue(opcodePtr, value); - opcodePtr += LINK_SIZE; -} - -static inline void putLinkValueAllowZeroAndAdvance(unsigned char*& opcodePtr, int value) -{ - putLinkValueAllowZero(opcodePtr, value); - opcodePtr += LINK_SIZE; -} - -// FIXME: These are really more of a "compiled regexp state" than "regexp options" -enum RegExpOptions { - UseFirstByteOptimizationOption = 0x40000000, /* firstByte is set */ - UseRequiredByteOptimizationOption = 0x20000000, /* reqByte is set */ - UseMultiLineFirstByteOptimizationOption = 0x10000000, /* start after \n for multiline */ - IsAnchoredOption = 0x02000000, /* can't use partial with this regex */ - IgnoreCaseOption = 0x00000001, - MatchAcrossMultipleLinesOption = 0x00000002 -}; - -/* Flags added to firstByte or reqByte; a "non-literal" item is either a -variable-length repeat, or a anything other than literal characters. */ - -#define REQ_IGNORE_CASE 0x0100 /* indicates should ignore case */ -#define REQ_VARY 0x0200 /* reqByte followed non-literal item */ - -/* Miscellaneous definitions */ - -/* Flag bits and data types for the extended class (OP_XCLASS) for classes that -contain UTF-8 characters with values greater than 255. */ - -#define XCL_NOT 0x01 /* Flag: this is a negative class */ -#define XCL_MAP 0x02 /* Flag: a 32-byte map is present */ - -#define XCL_END 0 /* Marks end of individual items */ -#define XCL_SINGLE 1 /* Single item (one multibyte char) follows */ -#define XCL_RANGE 2 /* A range (two multibyte chars) follows */ - -/* These are escaped items that aren't just an encoding of a particular data -value such as \n. They must have non-zero values, as check_escape() returns -their negation. Also, they must appear in the same order as in the opcode -definitions below, up to ESC_w. The final one must be -ESC_REF as subsequent values are used for \1, \2, \3, etc. There is are two -tests in the code for an escape > ESC_b and <= ESC_w to -detect the types that may be repeated. These are the types that consume -characters. If any new escapes are put in between that don't consume a -character, that code will have to change. */ - -enum { ESC_B = 1, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s, ESC_W, ESC_w, ESC_REF }; - -/* Opcode table: OP_BRA must be last, as all values >= it are used for brackets -that extract substrings. Starting from 1 (i.e. after OP_END), the values up to -OP_EOD must correspond in order to the list of escapes immediately above. -Note that whenever this list is updated, the two macro definitions that follow -must also be updated to match. */ - -#define FOR_EACH_OPCODE(macro) \ - macro(END) \ - \ - , macro(NOT_WORD_BOUNDARY) \ - , macro(WORD_BOUNDARY) \ - , macro(NOT_DIGIT) \ - , macro(DIGIT) \ - , macro(NOT_WHITESPACE) \ - , macro(WHITESPACE) \ - , macro(NOT_WORDCHAR) \ - , macro(WORDCHAR) \ - \ - , macro(NOT_NEWLINE) \ - \ - , macro(CIRC) \ - , macro(DOLL) \ - , macro(BOL) \ - , macro(EOL) \ - , macro(CHAR) \ - , macro(CHAR_IGNORING_CASE) \ - , macro(ASCII_CHAR) \ - , macro(ASCII_LETTER_IGNORING_CASE) \ - , macro(NOT) \ - \ - , macro(STAR) \ - , macro(MINSTAR) \ - , macro(PLUS) \ - , macro(MINPLUS) \ - , macro(QUERY) \ - , macro(MINQUERY) \ - , macro(UPTO) \ - , macro(MINUPTO) \ - , macro(EXACT) \ - \ - , macro(NOTSTAR) \ - , macro(NOTMINSTAR) \ - , macro(NOTPLUS) \ - , macro(NOTMINPLUS) \ - , macro(NOTQUERY) \ - , macro(NOTMINQUERY) \ - , macro(NOTUPTO) \ - , macro(NOTMINUPTO) \ - , macro(NOTEXACT) \ - \ - , macro(TYPESTAR) \ - , macro(TYPEMINSTAR) \ - , macro(TYPEPLUS) \ - , macro(TYPEMINPLUS) \ - , macro(TYPEQUERY) \ - , macro(TYPEMINQUERY) \ - , macro(TYPEUPTO) \ - , macro(TYPEMINUPTO) \ - , macro(TYPEEXACT) \ - \ - , macro(CRSTAR) \ - , macro(CRMINSTAR) \ - , macro(CRPLUS) \ - , macro(CRMINPLUS) \ - , macro(CRQUERY) \ - , macro(CRMINQUERY) \ - , macro(CRRANGE) \ - , macro(CRMINRANGE) \ - \ - , macro(CLASS) \ - , macro(NCLASS) \ - , macro(XCLASS) \ - \ - , macro(REF) \ - \ - , macro(ALT) \ - , macro(KET) \ - , macro(KETRMAX) \ - , macro(KETRMIN) \ - \ - , macro(ASSERT) \ - , macro(ASSERT_NOT) \ - \ - , macro(BRAZERO) \ - , macro(BRAMINZERO) \ - , macro(BRANUMBER) \ - , macro(BRA) - -#define OPCODE_ENUM_VALUE(opcode) OP_##opcode -enum { FOR_EACH_OPCODE(OPCODE_ENUM_VALUE) }; - -/* WARNING WARNING WARNING: There is an implicit assumption in pcre.c and -study.c that all opcodes are less than 128 in value. This makes handling UTF-8 -character sequences easier. */ - -/* The highest extraction number before we have to start using additional -bytes. (Originally PCRE didn't have support for extraction counts higher than -this number.) The value is limited by the number of opcodes left after OP_BRA, -i.e. 255 - OP_BRA. We actually set it a bit lower to leave room for additional -opcodes. */ - -/* FIXME: Note that OP_BRA + 100 is > 128, so the two comments above -are in conflict! */ - -#define EXTRACT_BASIC_MAX 100 - -/* The code vector runs on as long as necessary after the end. */ - -struct JSRegExp { - unsigned options; - - unsigned short topBracket; - unsigned short topBackref; - - unsigned short firstByte; - unsigned short reqByte; -}; - -/* Internal shared data tables. These are tables that are used by more than one - of the exported public functions. They have to be "external" in the C sense, - but are not part of the PCRE public API. The data for these tables is in the - pcre_tables.c module. */ - -#define jsc_pcre_utf8_table1_size 6 - -extern const int jsc_pcre_utf8_table1[6]; -extern const int jsc_pcre_utf8_table2[6]; -extern const int jsc_pcre_utf8_table3[6]; -extern const unsigned char jsc_pcre_utf8_table4[0x40]; - -extern const unsigned char jsc_pcre_default_tables[tables_length]; - -static inline unsigned char toLowerCase(unsigned char c) -{ - static const unsigned char* lowerCaseChars = jsc_pcre_default_tables + lcc_offset; - return lowerCaseChars[c]; -} - -static inline unsigned char flipCase(unsigned char c) -{ - static const unsigned char* flippedCaseChars = jsc_pcre_default_tables + fcc_offset; - return flippedCaseChars[c]; -} - -static inline unsigned char classBitmapForChar(unsigned char c) -{ - static const unsigned char* charClassBitmaps = jsc_pcre_default_tables + cbits_offset; - return charClassBitmaps[c]; -} - -static inline unsigned char charTypeForChar(unsigned char c) -{ - const unsigned char* charTypeMap = jsc_pcre_default_tables + ctypes_offset; - return charTypeMap[c]; -} - -static inline bool isWordChar(UChar c) -{ - return c < 128 && (charTypeForChar(c) & ctype_word); -} - -static inline bool isSpaceChar(UChar c) -{ - return (c < 128 && (charTypeForChar(c) & ctype_space)) || c == 0x00A0; -} - -static inline bool isNewline(UChar nl) -{ - return (nl == 0xA || nl == 0xD || nl == 0x2028 || nl == 0x2029); -} - -static inline bool isBracketStartOpcode(unsigned char opcode) -{ - if (opcode >= OP_BRA) - return true; - switch (opcode) { - case OP_ASSERT: - case OP_ASSERT_NOT: - return true; - default: - return false; - } -} - -static inline void advanceToEndOfBracket(const unsigned char*& opcodePtr) -{ - JS_ASSERT(isBracketStartOpcode(*opcodePtr) || *opcodePtr == OP_ALT); - do - opcodePtr += getLinkValue(opcodePtr + 1); - while (*opcodePtr == OP_ALT); -} - -/* Internal shared functions. These are functions that are used in more -that one of the source files. They have to have external linkage, but -but are not part of the public API and so not exported from the library. */ - -extern int jsc_pcre_ucp_othercase(unsigned); -extern bool jsc_pcre_xclass(int, const unsigned char*); - -#endif - -#endif - -/* End of pcre_internal.h */ diff --git a/js/src/yarr/pcre/pcre_tables.cpp b/js/src/yarr/pcre/pcre_tables.cpp deleted file mode 100644 index b1ac229d5912..000000000000 --- a/js/src/yarr/pcre/pcre_tables.cpp +++ /dev/null @@ -1,71 +0,0 @@ -/* This is JavaScriptCore's variant of the PCRE library. While this library -started out as a copy of PCRE, many of the features of PCRE have been -removed. This library now supports only the regular expression features -required by the JavaScript language specification, and has only the functions -needed by JavaScriptCore and the rest of WebKit. - - Originally written by Philip Hazel - Copyright (c) 1997-2006 University of Cambridge - Copyright (C) 2002, 2004, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. - ------------------------------------------------------------------------------ -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of the University of Cambridge nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. ------------------------------------------------------------------------------ -*/ - -/* This module contains some fixed tables that are used by more than one of the -PCRE code modules. */ - -#include "pcre_internal.h" - -/************************************************* -* Tables for UTF-8 support * -*************************************************/ - -/* These are the breakpoints for different numbers of bytes in a UTF-8 -character. */ - -const int jsc_pcre_utf8_table1[6] = - { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff}; - -/* These are the indicator bits and the mask for the data bits to set in the -first byte of a character, indexed by the number of additional bytes. */ - -const int jsc_pcre_utf8_table2[6] = { 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc}; -const int jsc_pcre_utf8_table3[6] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01}; - -/* Table of the number of extra characters, indexed by the first character -masked with 0x3f. The highest number for a valid UTF-8 character is in fact -0x3d. */ - -const unsigned char jsc_pcre_utf8_table4[0x40] = { - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, - 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 }; - -#include "chartables.c" diff --git a/js/src/yarr/pcre/pcre_ucp_searchfuncs.cpp b/js/src/yarr/pcre/pcre_ucp_searchfuncs.cpp deleted file mode 100644 index b97db921c981..000000000000 --- a/js/src/yarr/pcre/pcre_ucp_searchfuncs.cpp +++ /dev/null @@ -1,98 +0,0 @@ -/* This is JavaScriptCore's variant of the PCRE library. While this library -started out as a copy of PCRE, many of the features of PCRE have been -removed. This library now supports only the regular expression features -required by the JavaScript language specification, and has only the functions -needed by JavaScriptCore and the rest of WebKit. - - Originally written by Philip Hazel - Copyright (c) 1997-2006 University of Cambridge - Copyright (C) 2002, 2004, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. - ------------------------------------------------------------------------------ -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of the University of Cambridge nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. ------------------------------------------------------------------------------ -*/ - - -/* This module contains code for searching the table of Unicode character -properties. */ - -#include "pcre_internal.h" - -#include "ucpinternal.h" /* Internal table details */ -#include "ucptable.cpp" /* The table itself */ - -/************************************************* -* Search table and return other case * -*************************************************/ - -/* If the given character is a letter, and there is another case for the -letter, return the other case. Otherwise, return -1. - -Arguments: - c the character value - -Returns: the other case or -1 if none -*/ - -int jsc_pcre_ucp_othercase(unsigned c) -{ - int bot = 0; - int top = sizeof(ucp_table) / sizeof(cnode); - int mid; - - /* The table is searched using a binary chop. You might think that using - intermediate variables to hold some of the common expressions would speed - things up, but tests with gcc 3.4.4 on Linux showed that, on the contrary, it - makes things a lot slower. */ - - for (;;) { - if (top <= bot) - return -1; - mid = (bot + top) >> 1; - if (c == (ucp_table[mid].f0 & f0_charmask)) - break; - if (c < (ucp_table[mid].f0 & f0_charmask)) - top = mid; - else { - if ((ucp_table[mid].f0 & f0_rangeflag) && (c <= (ucp_table[mid].f0 & f0_charmask) + (ucp_table[mid].f1 & f1_rangemask))) - break; - bot = mid + 1; - } - } - - /* Found an entry in the table. Return -1 for a range entry. Otherwise return - the other case if there is one, else -1. */ - - if (ucp_table[mid].f0 & f0_rangeflag) - return -1; - - int offset = ucp_table[mid].f1 & f1_casemask; - if (offset & f1_caseneg) - offset |= f1_caseneg; - return !offset ? -1 : c + offset; -} diff --git a/js/src/yarr/pcre/pcre_xclass.cpp b/js/src/yarr/pcre/pcre_xclass.cpp deleted file mode 100644 index 8e59018ead0c..000000000000 --- a/js/src/yarr/pcre/pcre_xclass.cpp +++ /dev/null @@ -1,114 +0,0 @@ -/* This is JavaScriptCore's variant of the PCRE library. While this library -started out as a copy of PCRE, many of the features of PCRE have been -removed. This library now supports only the regular expression features -required by the JavaScript language specification, and has only the functions -needed by JavaScriptCore and the rest of WebKit. - - Originally written by Philip Hazel - Copyright (c) 1997-2006 University of Cambridge - Copyright (C) 2002, 2004, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. - ------------------------------------------------------------------------------ -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of the University of Cambridge nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. ------------------------------------------------------------------------------ -*/ - -/* This module contains an internal function that is used to match an extended -class (one that contains characters whose values are > 255). */ - -#include "pcre_internal.h" - -/************************************************* -* Match character against an XCLASS * -*************************************************/ - -/* This function is called to match a character against an extended class that -might contain values > 255. - -Arguments: - c the character - data points to the flag byte of the XCLASS data - -Returns: true if character matches, else false -*/ - -/* Get the next UTF-8 character, advancing the pointer. This is called when we - know we are in UTF-8 mode. */ - -static inline void getUTF8CharAndAdvancePointer(int& c, const unsigned char*& subjectPtr) -{ - c = *subjectPtr++; - if ((c & 0xc0) == 0xc0) { - int gcaa = jsc_pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */ - int gcss = 6 * gcaa; - c = (c & jsc_pcre_utf8_table3[gcaa]) << gcss; - while (gcaa-- > 0) { - gcss -= 6; - c |= (*subjectPtr++ & 0x3f) << gcss; - } - } -} - -bool jsc_pcre_xclass(int c, const unsigned char* data) -{ - bool negated = (*data & XCL_NOT); - - /* Character values < 256 are matched against a bitmap, if one is present. If - not, we still carry on, because there may be ranges that start below 256 in the - additional data. */ - - if (c < 256) { - if ((*data & XCL_MAP) != 0 && (data[1 + c/8] & (1 << (c&7))) != 0) - return !negated; /* char found */ - } - - /* First skip the bit map if present. Then match against the list of Unicode - properties or large chars or ranges that end with a large char. We won't ever - encounter XCL_PROP or XCL_NOTPROP when UCP support is not compiled. */ - - if ((*data++ & XCL_MAP) != 0) - data += 32; - - int t; - while ((t = *data++) != XCL_END) { - if (t == XCL_SINGLE) { - int x; - getUTF8CharAndAdvancePointer(x, data); - if (c == x) - return !negated; - } - else if (t == XCL_RANGE) { - int x, y; - getUTF8CharAndAdvancePointer(x, data); - getUTF8CharAndAdvancePointer(y, data); - if (c >= x && c <= y) - return !negated; - } - } - - return negated; /* char did not match */ -} diff --git a/js/src/yarr/pcre/ucpinternal.h b/js/src/yarr/pcre/ucpinternal.h deleted file mode 100644 index c8bc4aab679c..000000000000 --- a/js/src/yarr/pcre/ucpinternal.h +++ /dev/null @@ -1,126 +0,0 @@ -/* This is JavaScriptCore's variant of the PCRE library. While this library -started out as a copy of PCRE, many of the features of PCRE have been -removed. This library now supports only the regular expression features -required by the JavaScript language specification, and has only the functions -needed by JavaScriptCore and the rest of WebKit. - - Originally written by Philip Hazel - Copyright (c) 1997-2006 University of Cambridge - Copyright (C) 2002, 2004, 2006, 2007 Apple Inc. All rights reserved. - ------------------------------------------------------------------------------ -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of the University of Cambridge nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. ------------------------------------------------------------------------------ -*/ - -/************************************************* -* Unicode Property Table handler * -*************************************************/ - -/* Internal header file defining the layout of the bits in each pair of 32-bit -words that form a data item in the table. */ - -typedef struct cnode { - unsigned f0; - unsigned f1; -} cnode; - -/* Things for the f0 field */ - -#define f0_scriptmask 0xff000000 /* Mask for script field */ -#define f0_scriptshift 24 /* Shift for script value */ -#define f0_rangeflag 0x00f00000 /* Flag for a range item */ -#define f0_charmask 0x001fffff /* Mask for code point value */ - -/* Things for the f1 field */ - -#define f1_typemask 0xfc000000 /* Mask for char type field */ -#define f1_typeshift 26 /* Shift for the type field */ -#define f1_rangemask 0x0000ffff /* Mask for a range offset */ -#define f1_casemask 0x0000ffff /* Mask for a case offset */ -#define f1_caseneg 0xffff8000 /* Bits for negation */ - -/* The data consists of a vector of structures of type cnode. The two unsigned -32-bit integers are used as follows: - -(f0) (1) The most significant byte holds the script number. The numbers are - defined by the enum in ucp.h. - - (2) The 0x00800000 bit is set if this entry defines a range of characters. - It is not set if this entry defines a single character - - (3) The 0x00600000 bits are spare. - - (4) The 0x001fffff bits contain the code point. No Unicode code point will - ever be greater than 0x0010ffff, so this should be OK for ever. - -(f1) (1) The 0xfc000000 bits contain the character type number. The numbers are - defined by an enum in ucp.h. - - (2) The 0x03ff0000 bits are spare. - - (3) The 0x0000ffff bits contain EITHER the unsigned offset to the top of - range if this entry defines a range, OR the *signed* offset to the - character's "other case" partner if this entry defines a single - character. There is no partner if the value is zero. - -------------------------------------------------------------------------------- -| script (8) |.|.|.| codepoint (21) || type (6) |.|.| spare (8) | offset (16) | -------------------------------------------------------------------------------- - | | | | | - | | |-> spare | |-> spare - | | | - | |-> spare |-> spare - | - |-> range flag - -The upper/lower casing information is set only for characters that come in -pairs. The non-one-to-one mappings in the Unicode data are ignored. - -When searching the data, proceed as follows: - -(1) Set up for a binary chop search. - -(2) If the top is not greater than the bottom, the character is not in the - table. Its type must therefore be "Cn" ("Undefined"). - -(3) Find the middle vector element. - -(4) Extract the code point and compare. If equal, we are done. - -(5) If the test character is smaller, set the top to the current point, and - goto (2). - -(6) If the current entry defines a range, compute the last character by adding - the offset, and see if the test character is within the range. If it is, - we are done. - -(7) Otherwise, set the bottom to one element past the current point and goto - (2). -*/ - -/* End of ucpinternal.h */ diff --git a/js/src/yarr/pcre/ucptable.cpp b/js/src/yarr/pcre/ucptable.cpp deleted file mode 100644 index 011f7f572443..000000000000 --- a/js/src/yarr/pcre/ucptable.cpp +++ /dev/null @@ -1,2968 +0,0 @@ -/* This source module is automatically generated from the Unicode -property table. See ucpinternal.h for a description of the layout. */ - -static const cnode ucp_table[] = { - { 0x09800000, 0x0000001f }, - { 0x09000020, 0x74000000 }, - { 0x09800021, 0x54000002 }, - { 0x09000024, 0x5c000000 }, - { 0x09800025, 0x54000002 }, - { 0x09000028, 0x58000000 }, - { 0x09000029, 0x48000000 }, - { 0x0900002a, 0x54000000 }, - { 0x0900002b, 0x64000000 }, - { 0x0900002c, 0x54000000 }, - { 0x0900002d, 0x44000000 }, - { 0x0980002e, 0x54000001 }, - { 0x09800030, 0x34000009 }, - { 0x0980003a, 0x54000001 }, - { 0x0980003c, 0x64000002 }, - { 0x0980003f, 0x54000001 }, - { 0x21000041, 0x24000020 }, - { 0x21000042, 0x24000020 }, - { 0x21000043, 0x24000020 }, - { 0x21000044, 0x24000020 }, - { 0x21000045, 0x24000020 }, - { 0x21000046, 0x24000020 }, - { 0x21000047, 0x24000020 }, - { 0x21000048, 0x24000020 }, - { 0x21000049, 0x24000020 }, - { 0x2100004a, 0x24000020 }, - { 0x2100004b, 0x24000020 }, - { 0x2100004c, 0x24000020 }, - { 0x2100004d, 0x24000020 }, - { 0x2100004e, 0x24000020 }, - { 0x2100004f, 0x24000020 }, - { 0x21000050, 0x24000020 }, - { 0x21000051, 0x24000020 }, - { 0x21000052, 0x24000020 }, - { 0x21000053, 0x24000020 }, - { 0x21000054, 0x24000020 }, - { 0x21000055, 0x24000020 }, - { 0x21000056, 0x24000020 }, - { 0x21000057, 0x24000020 }, - { 0x21000058, 0x24000020 }, - { 0x21000059, 0x24000020 }, - { 0x2100005a, 0x24000020 }, - { 0x0900005b, 0x58000000 }, - { 0x0900005c, 0x54000000 }, - { 0x0900005d, 0x48000000 }, - { 0x0900005e, 0x60000000 }, - { 0x0900005f, 0x40000000 }, - { 0x09000060, 0x60000000 }, - { 0x21000061, 0x1400ffe0 }, - { 0x21000062, 0x1400ffe0 }, - { 0x21000063, 0x1400ffe0 }, - { 0x21000064, 0x1400ffe0 }, - { 0x21000065, 0x1400ffe0 }, - { 0x21000066, 0x1400ffe0 }, - { 0x21000067, 0x1400ffe0 }, - { 0x21000068, 0x1400ffe0 }, - { 0x21000069, 0x1400ffe0 }, - { 0x2100006a, 0x1400ffe0 }, - { 0x2100006b, 0x1400ffe0 }, - { 0x2100006c, 0x1400ffe0 }, - { 0x2100006d, 0x1400ffe0 }, - { 0x2100006e, 0x1400ffe0 }, - { 0x2100006f, 0x1400ffe0 }, - { 0x21000070, 0x1400ffe0 }, - { 0x21000071, 0x1400ffe0 }, - { 0x21000072, 0x1400ffe0 }, - { 0x21000073, 0x1400ffe0 }, - { 0x21000074, 0x1400ffe0 }, - { 0x21000075, 0x1400ffe0 }, - { 0x21000076, 0x1400ffe0 }, - { 0x21000077, 0x1400ffe0 }, - { 0x21000078, 0x1400ffe0 }, - { 0x21000079, 0x1400ffe0 }, - { 0x2100007a, 0x1400ffe0 }, - { 0x0900007b, 0x58000000 }, - { 0x0900007c, 0x64000000 }, - { 0x0900007d, 0x48000000 }, - { 0x0900007e, 0x64000000 }, - { 0x0980007f, 0x00000020 }, - { 0x090000a0, 0x74000000 }, - { 0x090000a1, 0x54000000 }, - { 0x098000a2, 0x5c000003 }, - { 0x098000a6, 0x68000001 }, - { 0x090000a8, 0x60000000 }, - { 0x090000a9, 0x68000000 }, - { 0x210000aa, 0x14000000 }, - { 0x090000ab, 0x50000000 }, - { 0x090000ac, 0x64000000 }, - { 0x090000ad, 0x04000000 }, - { 0x090000ae, 0x68000000 }, - { 0x090000af, 0x60000000 }, - { 0x090000b0, 0x68000000 }, - { 0x090000b1, 0x64000000 }, - { 0x098000b2, 0x3c000001 }, - { 0x090000b4, 0x60000000 }, - { 0x090000b5, 0x140002e7 }, - { 0x090000b6, 0x68000000 }, - { 0x090000b7, 0x54000000 }, - { 0x090000b8, 0x60000000 }, - { 0x090000b9, 0x3c000000 }, - { 0x210000ba, 0x14000000 }, - { 0x090000bb, 0x4c000000 }, - { 0x098000bc, 0x3c000002 }, - { 0x090000bf, 0x54000000 }, - { 0x210000c0, 0x24000020 }, - { 0x210000c1, 0x24000020 }, - { 0x210000c2, 0x24000020 }, - { 0x210000c3, 0x24000020 }, - { 0x210000c4, 0x24000020 }, - { 0x210000c5, 0x24000020 }, - { 0x210000c6, 0x24000020 }, - { 0x210000c7, 0x24000020 }, - { 0x210000c8, 0x24000020 }, - { 0x210000c9, 0x24000020 }, - { 0x210000ca, 0x24000020 }, - { 0x210000cb, 0x24000020 }, - { 0x210000cc, 0x24000020 }, - { 0x210000cd, 0x24000020 }, - { 0x210000ce, 0x24000020 }, - { 0x210000cf, 0x24000020 }, - { 0x210000d0, 0x24000020 }, - { 0x210000d1, 0x24000020 }, - { 0x210000d2, 0x24000020 }, - { 0x210000d3, 0x24000020 }, - { 0x210000d4, 0x24000020 }, - { 0x210000d5, 0x24000020 }, - { 0x210000d6, 0x24000020 }, - { 0x090000d7, 0x64000000 }, - { 0x210000d8, 0x24000020 }, - { 0x210000d9, 0x24000020 }, - { 0x210000da, 0x24000020 }, - { 0x210000db, 0x24000020 }, - { 0x210000dc, 0x24000020 }, - { 0x210000dd, 0x24000020 }, - { 0x210000de, 0x24000020 }, - { 0x210000df, 0x14000000 }, - { 0x210000e0, 0x1400ffe0 }, - { 0x210000e1, 0x1400ffe0 }, - { 0x210000e2, 0x1400ffe0 }, - { 0x210000e3, 0x1400ffe0 }, - { 0x210000e4, 0x1400ffe0 }, - { 0x210000e5, 0x1400ffe0 }, - { 0x210000e6, 0x1400ffe0 }, - { 0x210000e7, 0x1400ffe0 }, - { 0x210000e8, 0x1400ffe0 }, - { 0x210000e9, 0x1400ffe0 }, - { 0x210000ea, 0x1400ffe0 }, - { 0x210000eb, 0x1400ffe0 }, - { 0x210000ec, 0x1400ffe0 }, - { 0x210000ed, 0x1400ffe0 }, - { 0x210000ee, 0x1400ffe0 }, - { 0x210000ef, 0x1400ffe0 }, - { 0x210000f0, 0x1400ffe0 }, - { 0x210000f1, 0x1400ffe0 }, - { 0x210000f2, 0x1400ffe0 }, - { 0x210000f3, 0x1400ffe0 }, - { 0x210000f4, 0x1400ffe0 }, - { 0x210000f5, 0x1400ffe0 }, - { 0x210000f6, 0x1400ffe0 }, - { 0x090000f7, 0x64000000 }, - { 0x210000f8, 0x1400ffe0 }, - { 0x210000f9, 0x1400ffe0 }, - { 0x210000fa, 0x1400ffe0 }, - { 0x210000fb, 0x1400ffe0 }, - { 0x210000fc, 0x1400ffe0 }, - { 0x210000fd, 0x1400ffe0 }, - { 0x210000fe, 0x1400ffe0 }, - { 0x210000ff, 0x14000079 }, - { 0x21000100, 0x24000001 }, - { 0x21000101, 0x1400ffff }, - { 0x21000102, 0x24000001 }, - { 0x21000103, 0x1400ffff }, - { 0x21000104, 0x24000001 }, - { 0x21000105, 0x1400ffff }, - { 0x21000106, 0x24000001 }, - { 0x21000107, 0x1400ffff }, - { 0x21000108, 0x24000001 }, - { 0x21000109, 0x1400ffff }, - { 0x2100010a, 0x24000001 }, - { 0x2100010b, 0x1400ffff }, - { 0x2100010c, 0x24000001 }, - { 0x2100010d, 0x1400ffff }, - { 0x2100010e, 0x24000001 }, - { 0x2100010f, 0x1400ffff }, - { 0x21000110, 0x24000001 }, - { 0x21000111, 0x1400ffff }, - { 0x21000112, 0x24000001 }, - { 0x21000113, 0x1400ffff }, - { 0x21000114, 0x24000001 }, - { 0x21000115, 0x1400ffff }, - { 0x21000116, 0x24000001 }, - { 0x21000117, 0x1400ffff }, - { 0x21000118, 0x24000001 }, - { 0x21000119, 0x1400ffff }, - { 0x2100011a, 0x24000001 }, - { 0x2100011b, 0x1400ffff }, - { 0x2100011c, 0x24000001 }, - { 0x2100011d, 0x1400ffff }, - { 0x2100011e, 0x24000001 }, - { 0x2100011f, 0x1400ffff }, - { 0x21000120, 0x24000001 }, - { 0x21000121, 0x1400ffff }, - { 0x21000122, 0x24000001 }, - { 0x21000123, 0x1400ffff }, - { 0x21000124, 0x24000001 }, - { 0x21000125, 0x1400ffff }, - { 0x21000126, 0x24000001 }, - { 0x21000127, 0x1400ffff }, - { 0x21000128, 0x24000001 }, - { 0x21000129, 0x1400ffff }, - { 0x2100012a, 0x24000001 }, - { 0x2100012b, 0x1400ffff }, - { 0x2100012c, 0x24000001 }, - { 0x2100012d, 0x1400ffff }, - { 0x2100012e, 0x24000001 }, - { 0x2100012f, 0x1400ffff }, - { 0x21000130, 0x2400ff39 }, - { 0x21000131, 0x1400ff18 }, - { 0x21000132, 0x24000001 }, - { 0x21000133, 0x1400ffff }, - { 0x21000134, 0x24000001 }, - { 0x21000135, 0x1400ffff }, - { 0x21000136, 0x24000001 }, - { 0x21000137, 0x1400ffff }, - { 0x21000138, 0x14000000 }, - { 0x21000139, 0x24000001 }, - { 0x2100013a, 0x1400ffff }, - { 0x2100013b, 0x24000001 }, - { 0x2100013c, 0x1400ffff }, - { 0x2100013d, 0x24000001 }, - { 0x2100013e, 0x1400ffff }, - { 0x2100013f, 0x24000001 }, - { 0x21000140, 0x1400ffff }, - { 0x21000141, 0x24000001 }, - { 0x21000142, 0x1400ffff }, - { 0x21000143, 0x24000001 }, - { 0x21000144, 0x1400ffff }, - { 0x21000145, 0x24000001 }, - { 0x21000146, 0x1400ffff }, - { 0x21000147, 0x24000001 }, - { 0x21000148, 0x1400ffff }, - { 0x21000149, 0x14000000 }, - { 0x2100014a, 0x24000001 }, - { 0x2100014b, 0x1400ffff }, - { 0x2100014c, 0x24000001 }, - { 0x2100014d, 0x1400ffff }, - { 0x2100014e, 0x24000001 }, - { 0x2100014f, 0x1400ffff }, - { 0x21000150, 0x24000001 }, - { 0x21000151, 0x1400ffff }, - { 0x21000152, 0x24000001 }, - { 0x21000153, 0x1400ffff }, - { 0x21000154, 0x24000001 }, - { 0x21000155, 0x1400ffff }, - { 0x21000156, 0x24000001 }, - { 0x21000157, 0x1400ffff }, - { 0x21000158, 0x24000001 }, - { 0x21000159, 0x1400ffff }, - { 0x2100015a, 0x24000001 }, - { 0x2100015b, 0x1400ffff }, - { 0x2100015c, 0x24000001 }, - { 0x2100015d, 0x1400ffff }, - { 0x2100015e, 0x24000001 }, - { 0x2100015f, 0x1400ffff }, - { 0x21000160, 0x24000001 }, - { 0x21000161, 0x1400ffff }, - { 0x21000162, 0x24000001 }, - { 0x21000163, 0x1400ffff }, - { 0x21000164, 0x24000001 }, - { 0x21000165, 0x1400ffff }, - { 0x21000166, 0x24000001 }, - { 0x21000167, 0x1400ffff }, - { 0x21000168, 0x24000001 }, - { 0x21000169, 0x1400ffff }, - { 0x2100016a, 0x24000001 }, - { 0x2100016b, 0x1400ffff }, - { 0x2100016c, 0x24000001 }, - { 0x2100016d, 0x1400ffff }, - { 0x2100016e, 0x24000001 }, - { 0x2100016f, 0x1400ffff }, - { 0x21000170, 0x24000001 }, - { 0x21000171, 0x1400ffff }, - { 0x21000172, 0x24000001 }, - { 0x21000173, 0x1400ffff }, - { 0x21000174, 0x24000001 }, - { 0x21000175, 0x1400ffff }, - { 0x21000176, 0x24000001 }, - { 0x21000177, 0x1400ffff }, - { 0x21000178, 0x2400ff87 }, - { 0x21000179, 0x24000001 }, - { 0x2100017a, 0x1400ffff }, - { 0x2100017b, 0x24000001 }, - { 0x2100017c, 0x1400ffff }, - { 0x2100017d, 0x24000001 }, - { 0x2100017e, 0x1400ffff }, - { 0x2100017f, 0x1400fed4 }, - { 0x21000180, 0x14000000 }, - { 0x21000181, 0x240000d2 }, - { 0x21000182, 0x24000001 }, - { 0x21000183, 0x1400ffff }, - { 0x21000184, 0x24000001 }, - { 0x21000185, 0x1400ffff }, - { 0x21000186, 0x240000ce }, - { 0x21000187, 0x24000001 }, - { 0x21000188, 0x1400ffff }, - { 0x21000189, 0x240000cd }, - { 0x2100018a, 0x240000cd }, - { 0x2100018b, 0x24000001 }, - { 0x2100018c, 0x1400ffff }, - { 0x2100018d, 0x14000000 }, - { 0x2100018e, 0x2400004f }, - { 0x2100018f, 0x240000ca }, - { 0x21000190, 0x240000cb }, - { 0x21000191, 0x24000001 }, - { 0x21000192, 0x1400ffff }, - { 0x21000193, 0x240000cd }, - { 0x21000194, 0x240000cf }, - { 0x21000195, 0x14000061 }, - { 0x21000196, 0x240000d3 }, - { 0x21000197, 0x240000d1 }, - { 0x21000198, 0x24000001 }, - { 0x21000199, 0x1400ffff }, - { 0x2100019a, 0x140000a3 }, - { 0x2100019b, 0x14000000 }, - { 0x2100019c, 0x240000d3 }, - { 0x2100019d, 0x240000d5 }, - { 0x2100019e, 0x14000082 }, - { 0x2100019f, 0x240000d6 }, - { 0x210001a0, 0x24000001 }, - { 0x210001a1, 0x1400ffff }, - { 0x210001a2, 0x24000001 }, - { 0x210001a3, 0x1400ffff }, - { 0x210001a4, 0x24000001 }, - { 0x210001a5, 0x1400ffff }, - { 0x210001a6, 0x240000da }, - { 0x210001a7, 0x24000001 }, - { 0x210001a8, 0x1400ffff }, - { 0x210001a9, 0x240000da }, - { 0x218001aa, 0x14000001 }, - { 0x210001ac, 0x24000001 }, - { 0x210001ad, 0x1400ffff }, - { 0x210001ae, 0x240000da }, - { 0x210001af, 0x24000001 }, - { 0x210001b0, 0x1400ffff }, - { 0x210001b1, 0x240000d9 }, - { 0x210001b2, 0x240000d9 }, - { 0x210001b3, 0x24000001 }, - { 0x210001b4, 0x1400ffff }, - { 0x210001b5, 0x24000001 }, - { 0x210001b6, 0x1400ffff }, - { 0x210001b7, 0x240000db }, - { 0x210001b8, 0x24000001 }, - { 0x210001b9, 0x1400ffff }, - { 0x210001ba, 0x14000000 }, - { 0x210001bb, 0x1c000000 }, - { 0x210001bc, 0x24000001 }, - { 0x210001bd, 0x1400ffff }, - { 0x210001be, 0x14000000 }, - { 0x210001bf, 0x14000038 }, - { 0x218001c0, 0x1c000003 }, - { 0x210001c4, 0x24000002 }, - { 0x210001c5, 0x2000ffff }, - { 0x210001c6, 0x1400fffe }, - { 0x210001c7, 0x24000002 }, - { 0x210001c8, 0x2000ffff }, - { 0x210001c9, 0x1400fffe }, - { 0x210001ca, 0x24000002 }, - { 0x210001cb, 0x2000ffff }, - { 0x210001cc, 0x1400fffe }, - { 0x210001cd, 0x24000001 }, - { 0x210001ce, 0x1400ffff }, - { 0x210001cf, 0x24000001 }, - { 0x210001d0, 0x1400ffff }, - { 0x210001d1, 0x24000001 }, - { 0x210001d2, 0x1400ffff }, - { 0x210001d3, 0x24000001 }, - { 0x210001d4, 0x1400ffff }, - { 0x210001d5, 0x24000001 }, - { 0x210001d6, 0x1400ffff }, - { 0x210001d7, 0x24000001 }, - { 0x210001d8, 0x1400ffff }, - { 0x210001d9, 0x24000001 }, - { 0x210001da, 0x1400ffff }, - { 0x210001db, 0x24000001 }, - { 0x210001dc, 0x1400ffff }, - { 0x210001dd, 0x1400ffb1 }, - { 0x210001de, 0x24000001 }, - { 0x210001df, 0x1400ffff }, - { 0x210001e0, 0x24000001 }, - { 0x210001e1, 0x1400ffff }, - { 0x210001e2, 0x24000001 }, - { 0x210001e3, 0x1400ffff }, - { 0x210001e4, 0x24000001 }, - { 0x210001e5, 0x1400ffff }, - { 0x210001e6, 0x24000001 }, - { 0x210001e7, 0x1400ffff }, - { 0x210001e8, 0x24000001 }, - { 0x210001e9, 0x1400ffff }, - { 0x210001ea, 0x24000001 }, - { 0x210001eb, 0x1400ffff }, - { 0x210001ec, 0x24000001 }, - { 0x210001ed, 0x1400ffff }, - { 0x210001ee, 0x24000001 }, - { 0x210001ef, 0x1400ffff }, - { 0x210001f0, 0x14000000 }, - { 0x210001f1, 0x24000002 }, - { 0x210001f2, 0x2000ffff }, - { 0x210001f3, 0x1400fffe }, - { 0x210001f4, 0x24000001 }, - { 0x210001f5, 0x1400ffff }, - { 0x210001f6, 0x2400ff9f }, - { 0x210001f7, 0x2400ffc8 }, - { 0x210001f8, 0x24000001 }, - { 0x210001f9, 0x1400ffff }, - { 0x210001fa, 0x24000001 }, - { 0x210001fb, 0x1400ffff }, - { 0x210001fc, 0x24000001 }, - { 0x210001fd, 0x1400ffff }, - { 0x210001fe, 0x24000001 }, - { 0x210001ff, 0x1400ffff }, - { 0x21000200, 0x24000001 }, - { 0x21000201, 0x1400ffff }, - { 0x21000202, 0x24000001 }, - { 0x21000203, 0x1400ffff }, - { 0x21000204, 0x24000001 }, - { 0x21000205, 0x1400ffff }, - { 0x21000206, 0x24000001 }, - { 0x21000207, 0x1400ffff }, - { 0x21000208, 0x24000001 }, - { 0x21000209, 0x1400ffff }, - { 0x2100020a, 0x24000001 }, - { 0x2100020b, 0x1400ffff }, - { 0x2100020c, 0x24000001 }, - { 0x2100020d, 0x1400ffff }, - { 0x2100020e, 0x24000001 }, - { 0x2100020f, 0x1400ffff }, - { 0x21000210, 0x24000001 }, - { 0x21000211, 0x1400ffff }, - { 0x21000212, 0x24000001 }, - { 0x21000213, 0x1400ffff }, - { 0x21000214, 0x24000001 }, - { 0x21000215, 0x1400ffff }, - { 0x21000216, 0x24000001 }, - { 0x21000217, 0x1400ffff }, - { 0x21000218, 0x24000001 }, - { 0x21000219, 0x1400ffff }, - { 0x2100021a, 0x24000001 }, - { 0x2100021b, 0x1400ffff }, - { 0x2100021c, 0x24000001 }, - { 0x2100021d, 0x1400ffff }, - { 0x2100021e, 0x24000001 }, - { 0x2100021f, 0x1400ffff }, - { 0x21000220, 0x2400ff7e }, - { 0x21000221, 0x14000000 }, - { 0x21000222, 0x24000001 }, - { 0x21000223, 0x1400ffff }, - { 0x21000224, 0x24000001 }, - { 0x21000225, 0x1400ffff }, - { 0x21000226, 0x24000001 }, - { 0x21000227, 0x1400ffff }, - { 0x21000228, 0x24000001 }, - { 0x21000229, 0x1400ffff }, - { 0x2100022a, 0x24000001 }, - { 0x2100022b, 0x1400ffff }, - { 0x2100022c, 0x24000001 }, - { 0x2100022d, 0x1400ffff }, - { 0x2100022e, 0x24000001 }, - { 0x2100022f, 0x1400ffff }, - { 0x21000230, 0x24000001 }, - { 0x21000231, 0x1400ffff }, - { 0x21000232, 0x24000001 }, - { 0x21000233, 0x1400ffff }, - { 0x21800234, 0x14000005 }, - { 0x2100023a, 0x24000000 }, - { 0x2100023b, 0x24000001 }, - { 0x2100023c, 0x1400ffff }, - { 0x2100023d, 0x2400ff5d }, - { 0x2100023e, 0x24000000 }, - { 0x2180023f, 0x14000001 }, - { 0x21000241, 0x24000053 }, - { 0x21800250, 0x14000002 }, - { 0x21000253, 0x1400ff2e }, - { 0x21000254, 0x1400ff32 }, - { 0x21000255, 0x14000000 }, - { 0x21000256, 0x1400ff33 }, - { 0x21000257, 0x1400ff33 }, - { 0x21000258, 0x14000000 }, - { 0x21000259, 0x1400ff36 }, - { 0x2100025a, 0x14000000 }, - { 0x2100025b, 0x1400ff35 }, - { 0x2180025c, 0x14000003 }, - { 0x21000260, 0x1400ff33 }, - { 0x21800261, 0x14000001 }, - { 0x21000263, 0x1400ff31 }, - { 0x21800264, 0x14000003 }, - { 0x21000268, 0x1400ff2f }, - { 0x21000269, 0x1400ff2d }, - { 0x2180026a, 0x14000004 }, - { 0x2100026f, 0x1400ff2d }, - { 0x21800270, 0x14000001 }, - { 0x21000272, 0x1400ff2b }, - { 0x21800273, 0x14000001 }, - { 0x21000275, 0x1400ff2a }, - { 0x21800276, 0x14000009 }, - { 0x21000280, 0x1400ff26 }, - { 0x21800281, 0x14000001 }, - { 0x21000283, 0x1400ff26 }, - { 0x21800284, 0x14000003 }, - { 0x21000288, 0x1400ff26 }, - { 0x21000289, 0x14000000 }, - { 0x2100028a, 0x1400ff27 }, - { 0x2100028b, 0x1400ff27 }, - { 0x2180028c, 0x14000005 }, - { 0x21000292, 0x1400ff25 }, - { 0x21000293, 0x14000000 }, - { 0x21000294, 0x1400ffad }, - { 0x21800295, 0x1400001a }, - { 0x218002b0, 0x18000011 }, - { 0x098002c2, 0x60000003 }, - { 0x098002c6, 0x1800000b }, - { 0x098002d2, 0x6000000d }, - { 0x218002e0, 0x18000004 }, - { 0x098002e5, 0x60000008 }, - { 0x090002ee, 0x18000000 }, - { 0x098002ef, 0x60000010 }, - { 0x1b800300, 0x30000044 }, - { 0x1b000345, 0x30000054 }, - { 0x1b800346, 0x30000029 }, - { 0x13800374, 0x60000001 }, - { 0x1300037a, 0x18000000 }, - { 0x0900037e, 0x54000000 }, - { 0x13800384, 0x60000001 }, - { 0x13000386, 0x24000026 }, - { 0x09000387, 0x54000000 }, - { 0x13000388, 0x24000025 }, - { 0x13000389, 0x24000025 }, - { 0x1300038a, 0x24000025 }, - { 0x1300038c, 0x24000040 }, - { 0x1300038e, 0x2400003f }, - { 0x1300038f, 0x2400003f }, - { 0x13000390, 0x14000000 }, - { 0x13000391, 0x24000020 }, - { 0x13000392, 0x24000020 }, - { 0x13000393, 0x24000020 }, - { 0x13000394, 0x24000020 }, - { 0x13000395, 0x24000020 }, - { 0x13000396, 0x24000020 }, - { 0x13000397, 0x24000020 }, - { 0x13000398, 0x24000020 }, - { 0x13000399, 0x24000020 }, - { 0x1300039a, 0x24000020 }, - { 0x1300039b, 0x24000020 }, - { 0x1300039c, 0x24000020 }, - { 0x1300039d, 0x24000020 }, - { 0x1300039e, 0x24000020 }, - { 0x1300039f, 0x24000020 }, - { 0x130003a0, 0x24000020 }, - { 0x130003a1, 0x24000020 }, - { 0x130003a3, 0x24000020 }, - { 0x130003a4, 0x24000020 }, - { 0x130003a5, 0x24000020 }, - { 0x130003a6, 0x24000020 }, - { 0x130003a7, 0x24000020 }, - { 0x130003a8, 0x24000020 }, - { 0x130003a9, 0x24000020 }, - { 0x130003aa, 0x24000020 }, - { 0x130003ab, 0x24000020 }, - { 0x130003ac, 0x1400ffda }, - { 0x130003ad, 0x1400ffdb }, - { 0x130003ae, 0x1400ffdb }, - { 0x130003af, 0x1400ffdb }, - { 0x130003b0, 0x14000000 }, - { 0x130003b1, 0x1400ffe0 }, - { 0x130003b2, 0x1400ffe0 }, - { 0x130003b3, 0x1400ffe0 }, - { 0x130003b4, 0x1400ffe0 }, - { 0x130003b5, 0x1400ffe0 }, - { 0x130003b6, 0x1400ffe0 }, - { 0x130003b7, 0x1400ffe0 }, - { 0x130003b8, 0x1400ffe0 }, - { 0x130003b9, 0x1400ffe0 }, - { 0x130003ba, 0x1400ffe0 }, - { 0x130003bb, 0x1400ffe0 }, - { 0x130003bc, 0x1400ffe0 }, - { 0x130003bd, 0x1400ffe0 }, - { 0x130003be, 0x1400ffe0 }, - { 0x130003bf, 0x1400ffe0 }, - { 0x130003c0, 0x1400ffe0 }, - { 0x130003c1, 0x1400ffe0 }, - { 0x130003c2, 0x1400ffe1 }, - { 0x130003c3, 0x1400ffe0 }, - { 0x130003c4, 0x1400ffe0 }, - { 0x130003c5, 0x1400ffe0 }, - { 0x130003c6, 0x1400ffe0 }, - { 0x130003c7, 0x1400ffe0 }, - { 0x130003c8, 0x1400ffe0 }, - { 0x130003c9, 0x1400ffe0 }, - { 0x130003ca, 0x1400ffe0 }, - { 0x130003cb, 0x1400ffe0 }, - { 0x130003cc, 0x1400ffc0 }, - { 0x130003cd, 0x1400ffc1 }, - { 0x130003ce, 0x1400ffc1 }, - { 0x130003d0, 0x1400ffc2 }, - { 0x130003d1, 0x1400ffc7 }, - { 0x138003d2, 0x24000002 }, - { 0x130003d5, 0x1400ffd1 }, - { 0x130003d6, 0x1400ffca }, - { 0x130003d7, 0x14000000 }, - { 0x130003d8, 0x24000001 }, - { 0x130003d9, 0x1400ffff }, - { 0x130003da, 0x24000001 }, - { 0x130003db, 0x1400ffff }, - { 0x130003dc, 0x24000001 }, - { 0x130003dd, 0x1400ffff }, - { 0x130003de, 0x24000001 }, - { 0x130003df, 0x1400ffff }, - { 0x130003e0, 0x24000001 }, - { 0x130003e1, 0x1400ffff }, - { 0x0a0003e2, 0x24000001 }, - { 0x0a0003e3, 0x1400ffff }, - { 0x0a0003e4, 0x24000001 }, - { 0x0a0003e5, 0x1400ffff }, - { 0x0a0003e6, 0x24000001 }, - { 0x0a0003e7, 0x1400ffff }, - { 0x0a0003e8, 0x24000001 }, - { 0x0a0003e9, 0x1400ffff }, - { 0x0a0003ea, 0x24000001 }, - { 0x0a0003eb, 0x1400ffff }, - { 0x0a0003ec, 0x24000001 }, - { 0x0a0003ed, 0x1400ffff }, - { 0x0a0003ee, 0x24000001 }, - { 0x0a0003ef, 0x1400ffff }, - { 0x130003f0, 0x1400ffaa }, - { 0x130003f1, 0x1400ffb0 }, - { 0x130003f2, 0x14000007 }, - { 0x130003f3, 0x14000000 }, - { 0x130003f4, 0x2400ffc4 }, - { 0x130003f5, 0x1400ffa0 }, - { 0x130003f6, 0x64000000 }, - { 0x130003f7, 0x24000001 }, - { 0x130003f8, 0x1400ffff }, - { 0x130003f9, 0x2400fff9 }, - { 0x130003fa, 0x24000001 }, - { 0x130003fb, 0x1400ffff }, - { 0x130003fc, 0x14000000 }, - { 0x138003fd, 0x24000002 }, - { 0x0c000400, 0x24000050 }, - { 0x0c000401, 0x24000050 }, - { 0x0c000402, 0x24000050 }, - { 0x0c000403, 0x24000050 }, - { 0x0c000404, 0x24000050 }, - { 0x0c000405, 0x24000050 }, - { 0x0c000406, 0x24000050 }, - { 0x0c000407, 0x24000050 }, - { 0x0c000408, 0x24000050 }, - { 0x0c000409, 0x24000050 }, - { 0x0c00040a, 0x24000050 }, - { 0x0c00040b, 0x24000050 }, - { 0x0c00040c, 0x24000050 }, - { 0x0c00040d, 0x24000050 }, - { 0x0c00040e, 0x24000050 }, - { 0x0c00040f, 0x24000050 }, - { 0x0c000410, 0x24000020 }, - { 0x0c000411, 0x24000020 }, - { 0x0c000412, 0x24000020 }, - { 0x0c000413, 0x24000020 }, - { 0x0c000414, 0x24000020 }, - { 0x0c000415, 0x24000020 }, - { 0x0c000416, 0x24000020 }, - { 0x0c000417, 0x24000020 }, - { 0x0c000418, 0x24000020 }, - { 0x0c000419, 0x24000020 }, - { 0x0c00041a, 0x24000020 }, - { 0x0c00041b, 0x24000020 }, - { 0x0c00041c, 0x24000020 }, - { 0x0c00041d, 0x24000020 }, - { 0x0c00041e, 0x24000020 }, - { 0x0c00041f, 0x24000020 }, - { 0x0c000420, 0x24000020 }, - { 0x0c000421, 0x24000020 }, - { 0x0c000422, 0x24000020 }, - { 0x0c000423, 0x24000020 }, - { 0x0c000424, 0x24000020 }, - { 0x0c000425, 0x24000020 }, - { 0x0c000426, 0x24000020 }, - { 0x0c000427, 0x24000020 }, - { 0x0c000428, 0x24000020 }, - { 0x0c000429, 0x24000020 }, - { 0x0c00042a, 0x24000020 }, - { 0x0c00042b, 0x24000020 }, - { 0x0c00042c, 0x24000020 }, - { 0x0c00042d, 0x24000020 }, - { 0x0c00042e, 0x24000020 }, - { 0x0c00042f, 0x24000020 }, - { 0x0c000430, 0x1400ffe0 }, - { 0x0c000431, 0x1400ffe0 }, - { 0x0c000432, 0x1400ffe0 }, - { 0x0c000433, 0x1400ffe0 }, - { 0x0c000434, 0x1400ffe0 }, - { 0x0c000435, 0x1400ffe0 }, - { 0x0c000436, 0x1400ffe0 }, - { 0x0c000437, 0x1400ffe0 }, - { 0x0c000438, 0x1400ffe0 }, - { 0x0c000439, 0x1400ffe0 }, - { 0x0c00043a, 0x1400ffe0 }, - { 0x0c00043b, 0x1400ffe0 }, - { 0x0c00043c, 0x1400ffe0 }, - { 0x0c00043d, 0x1400ffe0 }, - { 0x0c00043e, 0x1400ffe0 }, - { 0x0c00043f, 0x1400ffe0 }, - { 0x0c000440, 0x1400ffe0 }, - { 0x0c000441, 0x1400ffe0 }, - { 0x0c000442, 0x1400ffe0 }, - { 0x0c000443, 0x1400ffe0 }, - { 0x0c000444, 0x1400ffe0 }, - { 0x0c000445, 0x1400ffe0 }, - { 0x0c000446, 0x1400ffe0 }, - { 0x0c000447, 0x1400ffe0 }, - { 0x0c000448, 0x1400ffe0 }, - { 0x0c000449, 0x1400ffe0 }, - { 0x0c00044a, 0x1400ffe0 }, - { 0x0c00044b, 0x1400ffe0 }, - { 0x0c00044c, 0x1400ffe0 }, - { 0x0c00044d, 0x1400ffe0 }, - { 0x0c00044e, 0x1400ffe0 }, - { 0x0c00044f, 0x1400ffe0 }, - { 0x0c000450, 0x1400ffb0 }, - { 0x0c000451, 0x1400ffb0 }, - { 0x0c000452, 0x1400ffb0 }, - { 0x0c000453, 0x1400ffb0 }, - { 0x0c000454, 0x1400ffb0 }, - { 0x0c000455, 0x1400ffb0 }, - { 0x0c000456, 0x1400ffb0 }, - { 0x0c000457, 0x1400ffb0 }, - { 0x0c000458, 0x1400ffb0 }, - { 0x0c000459, 0x1400ffb0 }, - { 0x0c00045a, 0x1400ffb0 }, - { 0x0c00045b, 0x1400ffb0 }, - { 0x0c00045c, 0x1400ffb0 }, - { 0x0c00045d, 0x1400ffb0 }, - { 0x0c00045e, 0x1400ffb0 }, - { 0x0c00045f, 0x1400ffb0 }, - { 0x0c000460, 0x24000001 }, - { 0x0c000461, 0x1400ffff }, - { 0x0c000462, 0x24000001 }, - { 0x0c000463, 0x1400ffff }, - { 0x0c000464, 0x24000001 }, - { 0x0c000465, 0x1400ffff }, - { 0x0c000466, 0x24000001 }, - { 0x0c000467, 0x1400ffff }, - { 0x0c000468, 0x24000001 }, - { 0x0c000469, 0x1400ffff }, - { 0x0c00046a, 0x24000001 }, - { 0x0c00046b, 0x1400ffff }, - { 0x0c00046c, 0x24000001 }, - { 0x0c00046d, 0x1400ffff }, - { 0x0c00046e, 0x24000001 }, - { 0x0c00046f, 0x1400ffff }, - { 0x0c000470, 0x24000001 }, - { 0x0c000471, 0x1400ffff }, - { 0x0c000472, 0x24000001 }, - { 0x0c000473, 0x1400ffff }, - { 0x0c000474, 0x24000001 }, - { 0x0c000475, 0x1400ffff }, - { 0x0c000476, 0x24000001 }, - { 0x0c000477, 0x1400ffff }, - { 0x0c000478, 0x24000001 }, - { 0x0c000479, 0x1400ffff }, - { 0x0c00047a, 0x24000001 }, - { 0x0c00047b, 0x1400ffff }, - { 0x0c00047c, 0x24000001 }, - { 0x0c00047d, 0x1400ffff }, - { 0x0c00047e, 0x24000001 }, - { 0x0c00047f, 0x1400ffff }, - { 0x0c000480, 0x24000001 }, - { 0x0c000481, 0x1400ffff }, - { 0x0c000482, 0x68000000 }, - { 0x0c800483, 0x30000003 }, - { 0x0c800488, 0x2c000001 }, - { 0x0c00048a, 0x24000001 }, - { 0x0c00048b, 0x1400ffff }, - { 0x0c00048c, 0x24000001 }, - { 0x0c00048d, 0x1400ffff }, - { 0x0c00048e, 0x24000001 }, - { 0x0c00048f, 0x1400ffff }, - { 0x0c000490, 0x24000001 }, - { 0x0c000491, 0x1400ffff }, - { 0x0c000492, 0x24000001 }, - { 0x0c000493, 0x1400ffff }, - { 0x0c000494, 0x24000001 }, - { 0x0c000495, 0x1400ffff }, - { 0x0c000496, 0x24000001 }, - { 0x0c000497, 0x1400ffff }, - { 0x0c000498, 0x24000001 }, - { 0x0c000499, 0x1400ffff }, - { 0x0c00049a, 0x24000001 }, - { 0x0c00049b, 0x1400ffff }, - { 0x0c00049c, 0x24000001 }, - { 0x0c00049d, 0x1400ffff }, - { 0x0c00049e, 0x24000001 }, - { 0x0c00049f, 0x1400ffff }, - { 0x0c0004a0, 0x24000001 }, - { 0x0c0004a1, 0x1400ffff }, - { 0x0c0004a2, 0x24000001 }, - { 0x0c0004a3, 0x1400ffff }, - { 0x0c0004a4, 0x24000001 }, - { 0x0c0004a5, 0x1400ffff }, - { 0x0c0004a6, 0x24000001 }, - { 0x0c0004a7, 0x1400ffff }, - { 0x0c0004a8, 0x24000001 }, - { 0x0c0004a9, 0x1400ffff }, - { 0x0c0004aa, 0x24000001 }, - { 0x0c0004ab, 0x1400ffff }, - { 0x0c0004ac, 0x24000001 }, - { 0x0c0004ad, 0x1400ffff }, - { 0x0c0004ae, 0x24000001 }, - { 0x0c0004af, 0x1400ffff }, - { 0x0c0004b0, 0x24000001 }, - { 0x0c0004b1, 0x1400ffff }, - { 0x0c0004b2, 0x24000001 }, - { 0x0c0004b3, 0x1400ffff }, - { 0x0c0004b4, 0x24000001 }, - { 0x0c0004b5, 0x1400ffff }, - { 0x0c0004b6, 0x24000001 }, - { 0x0c0004b7, 0x1400ffff }, - { 0x0c0004b8, 0x24000001 }, - { 0x0c0004b9, 0x1400ffff }, - { 0x0c0004ba, 0x24000001 }, - { 0x0c0004bb, 0x1400ffff }, - { 0x0c0004bc, 0x24000001 }, - { 0x0c0004bd, 0x1400ffff }, - { 0x0c0004be, 0x24000001 }, - { 0x0c0004bf, 0x1400ffff }, - { 0x0c0004c0, 0x24000000 }, - { 0x0c0004c1, 0x24000001 }, - { 0x0c0004c2, 0x1400ffff }, - { 0x0c0004c3, 0x24000001 }, - { 0x0c0004c4, 0x1400ffff }, - { 0x0c0004c5, 0x24000001 }, - { 0x0c0004c6, 0x1400ffff }, - { 0x0c0004c7, 0x24000001 }, - { 0x0c0004c8, 0x1400ffff }, - { 0x0c0004c9, 0x24000001 }, - { 0x0c0004ca, 0x1400ffff }, - { 0x0c0004cb, 0x24000001 }, - { 0x0c0004cc, 0x1400ffff }, - { 0x0c0004cd, 0x24000001 }, - { 0x0c0004ce, 0x1400ffff }, - { 0x0c0004d0, 0x24000001 }, - { 0x0c0004d1, 0x1400ffff }, - { 0x0c0004d2, 0x24000001 }, - { 0x0c0004d3, 0x1400ffff }, - { 0x0c0004d4, 0x24000001 }, - { 0x0c0004d5, 0x1400ffff }, - { 0x0c0004d6, 0x24000001 }, - { 0x0c0004d7, 0x1400ffff }, - { 0x0c0004d8, 0x24000001 }, - { 0x0c0004d9, 0x1400ffff }, - { 0x0c0004da, 0x24000001 }, - { 0x0c0004db, 0x1400ffff }, - { 0x0c0004dc, 0x24000001 }, - { 0x0c0004dd, 0x1400ffff }, - { 0x0c0004de, 0x24000001 }, - { 0x0c0004df, 0x1400ffff }, - { 0x0c0004e0, 0x24000001 }, - { 0x0c0004e1, 0x1400ffff }, - { 0x0c0004e2, 0x24000001 }, - { 0x0c0004e3, 0x1400ffff }, - { 0x0c0004e4, 0x24000001 }, - { 0x0c0004e5, 0x1400ffff }, - { 0x0c0004e6, 0x24000001 }, - { 0x0c0004e7, 0x1400ffff }, - { 0x0c0004e8, 0x24000001 }, - { 0x0c0004e9, 0x1400ffff }, - { 0x0c0004ea, 0x24000001 }, - { 0x0c0004eb, 0x1400ffff }, - { 0x0c0004ec, 0x24000001 }, - { 0x0c0004ed, 0x1400ffff }, - { 0x0c0004ee, 0x24000001 }, - { 0x0c0004ef, 0x1400ffff }, - { 0x0c0004f0, 0x24000001 }, - { 0x0c0004f1, 0x1400ffff }, - { 0x0c0004f2, 0x24000001 }, - { 0x0c0004f3, 0x1400ffff }, - { 0x0c0004f4, 0x24000001 }, - { 0x0c0004f5, 0x1400ffff }, - { 0x0c0004f6, 0x24000001 }, - { 0x0c0004f7, 0x1400ffff }, - { 0x0c0004f8, 0x24000001 }, - { 0x0c0004f9, 0x1400ffff }, - { 0x0c000500, 0x24000001 }, - { 0x0c000501, 0x1400ffff }, - { 0x0c000502, 0x24000001 }, - { 0x0c000503, 0x1400ffff }, - { 0x0c000504, 0x24000001 }, - { 0x0c000505, 0x1400ffff }, - { 0x0c000506, 0x24000001 }, - { 0x0c000507, 0x1400ffff }, - { 0x0c000508, 0x24000001 }, - { 0x0c000509, 0x1400ffff }, - { 0x0c00050a, 0x24000001 }, - { 0x0c00050b, 0x1400ffff }, - { 0x0c00050c, 0x24000001 }, - { 0x0c00050d, 0x1400ffff }, - { 0x0c00050e, 0x24000001 }, - { 0x0c00050f, 0x1400ffff }, - { 0x01000531, 0x24000030 }, - { 0x01000532, 0x24000030 }, - { 0x01000533, 0x24000030 }, - { 0x01000534, 0x24000030 }, - { 0x01000535, 0x24000030 }, - { 0x01000536, 0x24000030 }, - { 0x01000537, 0x24000030 }, - { 0x01000538, 0x24000030 }, - { 0x01000539, 0x24000030 }, - { 0x0100053a, 0x24000030 }, - { 0x0100053b, 0x24000030 }, - { 0x0100053c, 0x24000030 }, - { 0x0100053d, 0x24000030 }, - { 0x0100053e, 0x24000030 }, - { 0x0100053f, 0x24000030 }, - { 0x01000540, 0x24000030 }, - { 0x01000541, 0x24000030 }, - { 0x01000542, 0x24000030 }, - { 0x01000543, 0x24000030 }, - { 0x01000544, 0x24000030 }, - { 0x01000545, 0x24000030 }, - { 0x01000546, 0x24000030 }, - { 0x01000547, 0x24000030 }, - { 0x01000548, 0x24000030 }, - { 0x01000549, 0x24000030 }, - { 0x0100054a, 0x24000030 }, - { 0x0100054b, 0x24000030 }, - { 0x0100054c, 0x24000030 }, - { 0x0100054d, 0x24000030 }, - { 0x0100054e, 0x24000030 }, - { 0x0100054f, 0x24000030 }, - { 0x01000550, 0x24000030 }, - { 0x01000551, 0x24000030 }, - { 0x01000552, 0x24000030 }, - { 0x01000553, 0x24000030 }, - { 0x01000554, 0x24000030 }, - { 0x01000555, 0x24000030 }, - { 0x01000556, 0x24000030 }, - { 0x01000559, 0x18000000 }, - { 0x0180055a, 0x54000005 }, - { 0x01000561, 0x1400ffd0 }, - { 0x01000562, 0x1400ffd0 }, - { 0x01000563, 0x1400ffd0 }, - { 0x01000564, 0x1400ffd0 }, - { 0x01000565, 0x1400ffd0 }, - { 0x01000566, 0x1400ffd0 }, - { 0x01000567, 0x1400ffd0 }, - { 0x01000568, 0x1400ffd0 }, - { 0x01000569, 0x1400ffd0 }, - { 0x0100056a, 0x1400ffd0 }, - { 0x0100056b, 0x1400ffd0 }, - { 0x0100056c, 0x1400ffd0 }, - { 0x0100056d, 0x1400ffd0 }, - { 0x0100056e, 0x1400ffd0 }, - { 0x0100056f, 0x1400ffd0 }, - { 0x01000570, 0x1400ffd0 }, - { 0x01000571, 0x1400ffd0 }, - { 0x01000572, 0x1400ffd0 }, - { 0x01000573, 0x1400ffd0 }, - { 0x01000574, 0x1400ffd0 }, - { 0x01000575, 0x1400ffd0 }, - { 0x01000576, 0x1400ffd0 }, - { 0x01000577, 0x1400ffd0 }, - { 0x01000578, 0x1400ffd0 }, - { 0x01000579, 0x1400ffd0 }, - { 0x0100057a, 0x1400ffd0 }, - { 0x0100057b, 0x1400ffd0 }, - { 0x0100057c, 0x1400ffd0 }, - { 0x0100057d, 0x1400ffd0 }, - { 0x0100057e, 0x1400ffd0 }, - { 0x0100057f, 0x1400ffd0 }, - { 0x01000580, 0x1400ffd0 }, - { 0x01000581, 0x1400ffd0 }, - { 0x01000582, 0x1400ffd0 }, - { 0x01000583, 0x1400ffd0 }, - { 0x01000584, 0x1400ffd0 }, - { 0x01000585, 0x1400ffd0 }, - { 0x01000586, 0x1400ffd0 }, - { 0x01000587, 0x14000000 }, - { 0x09000589, 0x54000000 }, - { 0x0100058a, 0x44000000 }, - { 0x19800591, 0x30000028 }, - { 0x198005bb, 0x30000002 }, - { 0x190005be, 0x54000000 }, - { 0x190005bf, 0x30000000 }, - { 0x190005c0, 0x54000000 }, - { 0x198005c1, 0x30000001 }, - { 0x190005c3, 0x54000000 }, - { 0x198005c4, 0x30000001 }, - { 0x190005c6, 0x54000000 }, - { 0x190005c7, 0x30000000 }, - { 0x198005d0, 0x1c00001a }, - { 0x198005f0, 0x1c000002 }, - { 0x198005f3, 0x54000001 }, - { 0x09800600, 0x04000003 }, - { 0x0000060b, 0x5c000000 }, - { 0x0980060c, 0x54000001 }, - { 0x0080060e, 0x68000001 }, - { 0x00800610, 0x30000005 }, - { 0x0900061b, 0x54000000 }, - { 0x0080061e, 0x54000001 }, - { 0x00800621, 0x1c000019 }, - { 0x09000640, 0x18000000 }, - { 0x00800641, 0x1c000009 }, - { 0x1b80064b, 0x30000013 }, - { 0x09800660, 0x34000009 }, - { 0x0080066a, 0x54000003 }, - { 0x0080066e, 0x1c000001 }, - { 0x1b000670, 0x30000000 }, - { 0x00800671, 0x1c000062 }, - { 0x000006d4, 0x54000000 }, - { 0x000006d5, 0x1c000000 }, - { 0x008006d6, 0x30000006 }, - { 0x090006dd, 0x04000000 }, - { 0x000006de, 0x2c000000 }, - { 0x008006df, 0x30000005 }, - { 0x008006e5, 0x18000001 }, - { 0x008006e7, 0x30000001 }, - { 0x000006e9, 0x68000000 }, - { 0x008006ea, 0x30000003 }, - { 0x008006ee, 0x1c000001 }, - { 0x008006f0, 0x34000009 }, - { 0x008006fa, 0x1c000002 }, - { 0x008006fd, 0x68000001 }, - { 0x000006ff, 0x1c000000 }, - { 0x31800700, 0x5400000d }, - { 0x3100070f, 0x04000000 }, - { 0x31000710, 0x1c000000 }, - { 0x31000711, 0x30000000 }, - { 0x31800712, 0x1c00001d }, - { 0x31800730, 0x3000001a }, - { 0x3180074d, 0x1c000020 }, - { 0x37800780, 0x1c000025 }, - { 0x378007a6, 0x3000000a }, - { 0x370007b1, 0x1c000000 }, - { 0x0e800901, 0x30000001 }, - { 0x0e000903, 0x28000000 }, - { 0x0e800904, 0x1c000035 }, - { 0x0e00093c, 0x30000000 }, - { 0x0e00093d, 0x1c000000 }, - { 0x0e80093e, 0x28000002 }, - { 0x0e800941, 0x30000007 }, - { 0x0e800949, 0x28000003 }, - { 0x0e00094d, 0x30000000 }, - { 0x0e000950, 0x1c000000 }, - { 0x0e800951, 0x30000003 }, - { 0x0e800958, 0x1c000009 }, - { 0x0e800962, 0x30000001 }, - { 0x09800964, 0x54000001 }, - { 0x0e800966, 0x34000009 }, - { 0x09000970, 0x54000000 }, - { 0x0e00097d, 0x1c000000 }, - { 0x02000981, 0x30000000 }, - { 0x02800982, 0x28000001 }, - { 0x02800985, 0x1c000007 }, - { 0x0280098f, 0x1c000001 }, - { 0x02800993, 0x1c000015 }, - { 0x028009aa, 0x1c000006 }, - { 0x020009b2, 0x1c000000 }, - { 0x028009b6, 0x1c000003 }, - { 0x020009bc, 0x30000000 }, - { 0x020009bd, 0x1c000000 }, - { 0x028009be, 0x28000002 }, - { 0x028009c1, 0x30000003 }, - { 0x028009c7, 0x28000001 }, - { 0x028009cb, 0x28000001 }, - { 0x020009cd, 0x30000000 }, - { 0x020009ce, 0x1c000000 }, - { 0x020009d7, 0x28000000 }, - { 0x028009dc, 0x1c000001 }, - { 0x028009df, 0x1c000002 }, - { 0x028009e2, 0x30000001 }, - { 0x028009e6, 0x34000009 }, - { 0x028009f0, 0x1c000001 }, - { 0x028009f2, 0x5c000001 }, - { 0x028009f4, 0x3c000005 }, - { 0x020009fa, 0x68000000 }, - { 0x15800a01, 0x30000001 }, - { 0x15000a03, 0x28000000 }, - { 0x15800a05, 0x1c000005 }, - { 0x15800a0f, 0x1c000001 }, - { 0x15800a13, 0x1c000015 }, - { 0x15800a2a, 0x1c000006 }, - { 0x15800a32, 0x1c000001 }, - { 0x15800a35, 0x1c000001 }, - { 0x15800a38, 0x1c000001 }, - { 0x15000a3c, 0x30000000 }, - { 0x15800a3e, 0x28000002 }, - { 0x15800a41, 0x30000001 }, - { 0x15800a47, 0x30000001 }, - { 0x15800a4b, 0x30000002 }, - { 0x15800a59, 0x1c000003 }, - { 0x15000a5e, 0x1c000000 }, - { 0x15800a66, 0x34000009 }, - { 0x15800a70, 0x30000001 }, - { 0x15800a72, 0x1c000002 }, - { 0x14800a81, 0x30000001 }, - { 0x14000a83, 0x28000000 }, - { 0x14800a85, 0x1c000008 }, - { 0x14800a8f, 0x1c000002 }, - { 0x14800a93, 0x1c000015 }, - { 0x14800aaa, 0x1c000006 }, - { 0x14800ab2, 0x1c000001 }, - { 0x14800ab5, 0x1c000004 }, - { 0x14000abc, 0x30000000 }, - { 0x14000abd, 0x1c000000 }, - { 0x14800abe, 0x28000002 }, - { 0x14800ac1, 0x30000004 }, - { 0x14800ac7, 0x30000001 }, - { 0x14000ac9, 0x28000000 }, - { 0x14800acb, 0x28000001 }, - { 0x14000acd, 0x30000000 }, - { 0x14000ad0, 0x1c000000 }, - { 0x14800ae0, 0x1c000001 }, - { 0x14800ae2, 0x30000001 }, - { 0x14800ae6, 0x34000009 }, - { 0x14000af1, 0x5c000000 }, - { 0x2b000b01, 0x30000000 }, - { 0x2b800b02, 0x28000001 }, - { 0x2b800b05, 0x1c000007 }, - { 0x2b800b0f, 0x1c000001 }, - { 0x2b800b13, 0x1c000015 }, - { 0x2b800b2a, 0x1c000006 }, - { 0x2b800b32, 0x1c000001 }, - { 0x2b800b35, 0x1c000004 }, - { 0x2b000b3c, 0x30000000 }, - { 0x2b000b3d, 0x1c000000 }, - { 0x2b000b3e, 0x28000000 }, - { 0x2b000b3f, 0x30000000 }, - { 0x2b000b40, 0x28000000 }, - { 0x2b800b41, 0x30000002 }, - { 0x2b800b47, 0x28000001 }, - { 0x2b800b4b, 0x28000001 }, - { 0x2b000b4d, 0x30000000 }, - { 0x2b000b56, 0x30000000 }, - { 0x2b000b57, 0x28000000 }, - { 0x2b800b5c, 0x1c000001 }, - { 0x2b800b5f, 0x1c000002 }, - { 0x2b800b66, 0x34000009 }, - { 0x2b000b70, 0x68000000 }, - { 0x2b000b71, 0x1c000000 }, - { 0x35000b82, 0x30000000 }, - { 0x35000b83, 0x1c000000 }, - { 0x35800b85, 0x1c000005 }, - { 0x35800b8e, 0x1c000002 }, - { 0x35800b92, 0x1c000003 }, - { 0x35800b99, 0x1c000001 }, - { 0x35000b9c, 0x1c000000 }, - { 0x35800b9e, 0x1c000001 }, - { 0x35800ba3, 0x1c000001 }, - { 0x35800ba8, 0x1c000002 }, - { 0x35800bae, 0x1c00000b }, - { 0x35800bbe, 0x28000001 }, - { 0x35000bc0, 0x30000000 }, - { 0x35800bc1, 0x28000001 }, - { 0x35800bc6, 0x28000002 }, - { 0x35800bca, 0x28000002 }, - { 0x35000bcd, 0x30000000 }, - { 0x35000bd7, 0x28000000 }, - { 0x35800be6, 0x34000009 }, - { 0x35800bf0, 0x3c000002 }, - { 0x35800bf3, 0x68000005 }, - { 0x35000bf9, 0x5c000000 }, - { 0x35000bfa, 0x68000000 }, - { 0x36800c01, 0x28000002 }, - { 0x36800c05, 0x1c000007 }, - { 0x36800c0e, 0x1c000002 }, - { 0x36800c12, 0x1c000016 }, - { 0x36800c2a, 0x1c000009 }, - { 0x36800c35, 0x1c000004 }, - { 0x36800c3e, 0x30000002 }, - { 0x36800c41, 0x28000003 }, - { 0x36800c46, 0x30000002 }, - { 0x36800c4a, 0x30000003 }, - { 0x36800c55, 0x30000001 }, - { 0x36800c60, 0x1c000001 }, - { 0x36800c66, 0x34000009 }, - { 0x1c800c82, 0x28000001 }, - { 0x1c800c85, 0x1c000007 }, - { 0x1c800c8e, 0x1c000002 }, - { 0x1c800c92, 0x1c000016 }, - { 0x1c800caa, 0x1c000009 }, - { 0x1c800cb5, 0x1c000004 }, - { 0x1c000cbc, 0x30000000 }, - { 0x1c000cbd, 0x1c000000 }, - { 0x1c000cbe, 0x28000000 }, - { 0x1c000cbf, 0x30000000 }, - { 0x1c800cc0, 0x28000004 }, - { 0x1c000cc6, 0x30000000 }, - { 0x1c800cc7, 0x28000001 }, - { 0x1c800cca, 0x28000001 }, - { 0x1c800ccc, 0x30000001 }, - { 0x1c800cd5, 0x28000001 }, - { 0x1c000cde, 0x1c000000 }, - { 0x1c800ce0, 0x1c000001 }, - { 0x1c800ce6, 0x34000009 }, - { 0x24800d02, 0x28000001 }, - { 0x24800d05, 0x1c000007 }, - { 0x24800d0e, 0x1c000002 }, - { 0x24800d12, 0x1c000016 }, - { 0x24800d2a, 0x1c00000f }, - { 0x24800d3e, 0x28000002 }, - { 0x24800d41, 0x30000002 }, - { 0x24800d46, 0x28000002 }, - { 0x24800d4a, 0x28000002 }, - { 0x24000d4d, 0x30000000 }, - { 0x24000d57, 0x28000000 }, - { 0x24800d60, 0x1c000001 }, - { 0x24800d66, 0x34000009 }, - { 0x2f800d82, 0x28000001 }, - { 0x2f800d85, 0x1c000011 }, - { 0x2f800d9a, 0x1c000017 }, - { 0x2f800db3, 0x1c000008 }, - { 0x2f000dbd, 0x1c000000 }, - { 0x2f800dc0, 0x1c000006 }, - { 0x2f000dca, 0x30000000 }, - { 0x2f800dcf, 0x28000002 }, - { 0x2f800dd2, 0x30000002 }, - { 0x2f000dd6, 0x30000000 }, - { 0x2f800dd8, 0x28000007 }, - { 0x2f800df2, 0x28000001 }, - { 0x2f000df4, 0x54000000 }, - { 0x38800e01, 0x1c00002f }, - { 0x38000e31, 0x30000000 }, - { 0x38800e32, 0x1c000001 }, - { 0x38800e34, 0x30000006 }, - { 0x09000e3f, 0x5c000000 }, - { 0x38800e40, 0x1c000005 }, - { 0x38000e46, 0x18000000 }, - { 0x38800e47, 0x30000007 }, - { 0x38000e4f, 0x54000000 }, - { 0x38800e50, 0x34000009 }, - { 0x38800e5a, 0x54000001 }, - { 0x20800e81, 0x1c000001 }, - { 0x20000e84, 0x1c000000 }, - { 0x20800e87, 0x1c000001 }, - { 0x20000e8a, 0x1c000000 }, - { 0x20000e8d, 0x1c000000 }, - { 0x20800e94, 0x1c000003 }, - { 0x20800e99, 0x1c000006 }, - { 0x20800ea1, 0x1c000002 }, - { 0x20000ea5, 0x1c000000 }, - { 0x20000ea7, 0x1c000000 }, - { 0x20800eaa, 0x1c000001 }, - { 0x20800ead, 0x1c000003 }, - { 0x20000eb1, 0x30000000 }, - { 0x20800eb2, 0x1c000001 }, - { 0x20800eb4, 0x30000005 }, - { 0x20800ebb, 0x30000001 }, - { 0x20000ebd, 0x1c000000 }, - { 0x20800ec0, 0x1c000004 }, - { 0x20000ec6, 0x18000000 }, - { 0x20800ec8, 0x30000005 }, - { 0x20800ed0, 0x34000009 }, - { 0x20800edc, 0x1c000001 }, - { 0x39000f00, 0x1c000000 }, - { 0x39800f01, 0x68000002 }, - { 0x39800f04, 0x5400000e }, - { 0x39800f13, 0x68000004 }, - { 0x39800f18, 0x30000001 }, - { 0x39800f1a, 0x68000005 }, - { 0x39800f20, 0x34000009 }, - { 0x39800f2a, 0x3c000009 }, - { 0x39000f34, 0x68000000 }, - { 0x39000f35, 0x30000000 }, - { 0x39000f36, 0x68000000 }, - { 0x39000f37, 0x30000000 }, - { 0x39000f38, 0x68000000 }, - { 0x39000f39, 0x30000000 }, - { 0x39000f3a, 0x58000000 }, - { 0x39000f3b, 0x48000000 }, - { 0x39000f3c, 0x58000000 }, - { 0x39000f3d, 0x48000000 }, - { 0x39800f3e, 0x28000001 }, - { 0x39800f40, 0x1c000007 }, - { 0x39800f49, 0x1c000021 }, - { 0x39800f71, 0x3000000d }, - { 0x39000f7f, 0x28000000 }, - { 0x39800f80, 0x30000004 }, - { 0x39000f85, 0x54000000 }, - { 0x39800f86, 0x30000001 }, - { 0x39800f88, 0x1c000003 }, - { 0x39800f90, 0x30000007 }, - { 0x39800f99, 0x30000023 }, - { 0x39800fbe, 0x68000007 }, - { 0x39000fc6, 0x30000000 }, - { 0x39800fc7, 0x68000005 }, - { 0x39000fcf, 0x68000000 }, - { 0x39800fd0, 0x54000001 }, - { 0x26801000, 0x1c000021 }, - { 0x26801023, 0x1c000004 }, - { 0x26801029, 0x1c000001 }, - { 0x2600102c, 0x28000000 }, - { 0x2680102d, 0x30000003 }, - { 0x26001031, 0x28000000 }, - { 0x26001032, 0x30000000 }, - { 0x26801036, 0x30000001 }, - { 0x26001038, 0x28000000 }, - { 0x26001039, 0x30000000 }, - { 0x26801040, 0x34000009 }, - { 0x2680104a, 0x54000005 }, - { 0x26801050, 0x1c000005 }, - { 0x26801056, 0x28000001 }, - { 0x26801058, 0x30000001 }, - { 0x100010a0, 0x24001c60 }, - { 0x100010a1, 0x24001c60 }, - { 0x100010a2, 0x24001c60 }, - { 0x100010a3, 0x24001c60 }, - { 0x100010a4, 0x24001c60 }, - { 0x100010a5, 0x24001c60 }, - { 0x100010a6, 0x24001c60 }, - { 0x100010a7, 0x24001c60 }, - { 0x100010a8, 0x24001c60 }, - { 0x100010a9, 0x24001c60 }, - { 0x100010aa, 0x24001c60 }, - { 0x100010ab, 0x24001c60 }, - { 0x100010ac, 0x24001c60 }, - { 0x100010ad, 0x24001c60 }, - { 0x100010ae, 0x24001c60 }, - { 0x100010af, 0x24001c60 }, - { 0x100010b0, 0x24001c60 }, - { 0x100010b1, 0x24001c60 }, - { 0x100010b2, 0x24001c60 }, - { 0x100010b3, 0x24001c60 }, - { 0x100010b4, 0x24001c60 }, - { 0x100010b5, 0x24001c60 }, - { 0x100010b6, 0x24001c60 }, - { 0x100010b7, 0x24001c60 }, - { 0x100010b8, 0x24001c60 }, - { 0x100010b9, 0x24001c60 }, - { 0x100010ba, 0x24001c60 }, - { 0x100010bb, 0x24001c60 }, - { 0x100010bc, 0x24001c60 }, - { 0x100010bd, 0x24001c60 }, - { 0x100010be, 0x24001c60 }, - { 0x100010bf, 0x24001c60 }, - { 0x100010c0, 0x24001c60 }, - { 0x100010c1, 0x24001c60 }, - { 0x100010c2, 0x24001c60 }, - { 0x100010c3, 0x24001c60 }, - { 0x100010c4, 0x24001c60 }, - { 0x100010c5, 0x24001c60 }, - { 0x108010d0, 0x1c00002a }, - { 0x090010fb, 0x54000000 }, - { 0x100010fc, 0x18000000 }, - { 0x17801100, 0x1c000059 }, - { 0x1780115f, 0x1c000043 }, - { 0x178011a8, 0x1c000051 }, - { 0x0f801200, 0x1c000048 }, - { 0x0f80124a, 0x1c000003 }, - { 0x0f801250, 0x1c000006 }, - { 0x0f001258, 0x1c000000 }, - { 0x0f80125a, 0x1c000003 }, - { 0x0f801260, 0x1c000028 }, - { 0x0f80128a, 0x1c000003 }, - { 0x0f801290, 0x1c000020 }, - { 0x0f8012b2, 0x1c000003 }, - { 0x0f8012b8, 0x1c000006 }, - { 0x0f0012c0, 0x1c000000 }, - { 0x0f8012c2, 0x1c000003 }, - { 0x0f8012c8, 0x1c00000e }, - { 0x0f8012d8, 0x1c000038 }, - { 0x0f801312, 0x1c000003 }, - { 0x0f801318, 0x1c000042 }, - { 0x0f00135f, 0x30000000 }, - { 0x0f001360, 0x68000000 }, - { 0x0f801361, 0x54000007 }, - { 0x0f801369, 0x3c000013 }, - { 0x0f801380, 0x1c00000f }, - { 0x0f801390, 0x68000009 }, - { 0x088013a0, 0x1c000054 }, - { 0x07801401, 0x1c00026b }, - { 0x0780166d, 0x54000001 }, - { 0x0780166f, 0x1c000007 }, - { 0x28001680, 0x74000000 }, - { 0x28801681, 0x1c000019 }, - { 0x2800169b, 0x58000000 }, - { 0x2800169c, 0x48000000 }, - { 0x2d8016a0, 0x1c00004a }, - { 0x098016eb, 0x54000002 }, - { 0x2d8016ee, 0x38000002 }, - { 0x32801700, 0x1c00000c }, - { 0x3280170e, 0x1c000003 }, - { 0x32801712, 0x30000002 }, - { 0x18801720, 0x1c000011 }, - { 0x18801732, 0x30000002 }, - { 0x09801735, 0x54000001 }, - { 0x06801740, 0x1c000011 }, - { 0x06801752, 0x30000001 }, - { 0x33801760, 0x1c00000c }, - { 0x3380176e, 0x1c000002 }, - { 0x33801772, 0x30000001 }, - { 0x1f801780, 0x1c000033 }, - { 0x1f8017b4, 0x04000001 }, - { 0x1f0017b6, 0x28000000 }, - { 0x1f8017b7, 0x30000006 }, - { 0x1f8017be, 0x28000007 }, - { 0x1f0017c6, 0x30000000 }, - { 0x1f8017c7, 0x28000001 }, - { 0x1f8017c9, 0x3000000a }, - { 0x1f8017d4, 0x54000002 }, - { 0x1f0017d7, 0x18000000 }, - { 0x1f8017d8, 0x54000002 }, - { 0x1f0017db, 0x5c000000 }, - { 0x1f0017dc, 0x1c000000 }, - { 0x1f0017dd, 0x30000000 }, - { 0x1f8017e0, 0x34000009 }, - { 0x1f8017f0, 0x3c000009 }, - { 0x25801800, 0x54000005 }, - { 0x25001806, 0x44000000 }, - { 0x25801807, 0x54000003 }, - { 0x2580180b, 0x30000002 }, - { 0x2500180e, 0x74000000 }, - { 0x25801810, 0x34000009 }, - { 0x25801820, 0x1c000022 }, - { 0x25001843, 0x18000000 }, - { 0x25801844, 0x1c000033 }, - { 0x25801880, 0x1c000028 }, - { 0x250018a9, 0x30000000 }, - { 0x22801900, 0x1c00001c }, - { 0x22801920, 0x30000002 }, - { 0x22801923, 0x28000003 }, - { 0x22801927, 0x30000001 }, - { 0x22801929, 0x28000002 }, - { 0x22801930, 0x28000001 }, - { 0x22001932, 0x30000000 }, - { 0x22801933, 0x28000005 }, - { 0x22801939, 0x30000002 }, - { 0x22001940, 0x68000000 }, - { 0x22801944, 0x54000001 }, - { 0x22801946, 0x34000009 }, - { 0x34801950, 0x1c00001d }, - { 0x34801970, 0x1c000004 }, - { 0x27801980, 0x1c000029 }, - { 0x278019b0, 0x28000010 }, - { 0x278019c1, 0x1c000006 }, - { 0x278019c8, 0x28000001 }, - { 0x278019d0, 0x34000009 }, - { 0x278019de, 0x54000001 }, - { 0x1f8019e0, 0x6800001f }, - { 0x05801a00, 0x1c000016 }, - { 0x05801a17, 0x30000001 }, - { 0x05801a19, 0x28000002 }, - { 0x05801a1e, 0x54000001 }, - { 0x21801d00, 0x1400002b }, - { 0x21801d2c, 0x18000035 }, - { 0x21801d62, 0x14000015 }, - { 0x0c001d78, 0x18000000 }, - { 0x21801d79, 0x14000021 }, - { 0x21801d9b, 0x18000024 }, - { 0x1b801dc0, 0x30000003 }, - { 0x21001e00, 0x24000001 }, - { 0x21001e01, 0x1400ffff }, - { 0x21001e02, 0x24000001 }, - { 0x21001e03, 0x1400ffff }, - { 0x21001e04, 0x24000001 }, - { 0x21001e05, 0x1400ffff }, - { 0x21001e06, 0x24000001 }, - { 0x21001e07, 0x1400ffff }, - { 0x21001e08, 0x24000001 }, - { 0x21001e09, 0x1400ffff }, - { 0x21001e0a, 0x24000001 }, - { 0x21001e0b, 0x1400ffff }, - { 0x21001e0c, 0x24000001 }, - { 0x21001e0d, 0x1400ffff }, - { 0x21001e0e, 0x24000001 }, - { 0x21001e0f, 0x1400ffff }, - { 0x21001e10, 0x24000001 }, - { 0x21001e11, 0x1400ffff }, - { 0x21001e12, 0x24000001 }, - { 0x21001e13, 0x1400ffff }, - { 0x21001e14, 0x24000001 }, - { 0x21001e15, 0x1400ffff }, - { 0x21001e16, 0x24000001 }, - { 0x21001e17, 0x1400ffff }, - { 0x21001e18, 0x24000001 }, - { 0x21001e19, 0x1400ffff }, - { 0x21001e1a, 0x24000001 }, - { 0x21001e1b, 0x1400ffff }, - { 0x21001e1c, 0x24000001 }, - { 0x21001e1d, 0x1400ffff }, - { 0x21001e1e, 0x24000001 }, - { 0x21001e1f, 0x1400ffff }, - { 0x21001e20, 0x24000001 }, - { 0x21001e21, 0x1400ffff }, - { 0x21001e22, 0x24000001 }, - { 0x21001e23, 0x1400ffff }, - { 0x21001e24, 0x24000001 }, - { 0x21001e25, 0x1400ffff }, - { 0x21001e26, 0x24000001 }, - { 0x21001e27, 0x1400ffff }, - { 0x21001e28, 0x24000001 }, - { 0x21001e29, 0x1400ffff }, - { 0x21001e2a, 0x24000001 }, - { 0x21001e2b, 0x1400ffff }, - { 0x21001e2c, 0x24000001 }, - { 0x21001e2d, 0x1400ffff }, - { 0x21001e2e, 0x24000001 }, - { 0x21001e2f, 0x1400ffff }, - { 0x21001e30, 0x24000001 }, - { 0x21001e31, 0x1400ffff }, - { 0x21001e32, 0x24000001 }, - { 0x21001e33, 0x1400ffff }, - { 0x21001e34, 0x24000001 }, - { 0x21001e35, 0x1400ffff }, - { 0x21001e36, 0x24000001 }, - { 0x21001e37, 0x1400ffff }, - { 0x21001e38, 0x24000001 }, - { 0x21001e39, 0x1400ffff }, - { 0x21001e3a, 0x24000001 }, - { 0x21001e3b, 0x1400ffff }, - { 0x21001e3c, 0x24000001 }, - { 0x21001e3d, 0x1400ffff }, - { 0x21001e3e, 0x24000001 }, - { 0x21001e3f, 0x1400ffff }, - { 0x21001e40, 0x24000001 }, - { 0x21001e41, 0x1400ffff }, - { 0x21001e42, 0x24000001 }, - { 0x21001e43, 0x1400ffff }, - { 0x21001e44, 0x24000001 }, - { 0x21001e45, 0x1400ffff }, - { 0x21001e46, 0x24000001 }, - { 0x21001e47, 0x1400ffff }, - { 0x21001e48, 0x24000001 }, - { 0x21001e49, 0x1400ffff }, - { 0x21001e4a, 0x24000001 }, - { 0x21001e4b, 0x1400ffff }, - { 0x21001e4c, 0x24000001 }, - { 0x21001e4d, 0x1400ffff }, - { 0x21001e4e, 0x24000001 }, - { 0x21001e4f, 0x1400ffff }, - { 0x21001e50, 0x24000001 }, - { 0x21001e51, 0x1400ffff }, - { 0x21001e52, 0x24000001 }, - { 0x21001e53, 0x1400ffff }, - { 0x21001e54, 0x24000001 }, - { 0x21001e55, 0x1400ffff }, - { 0x21001e56, 0x24000001 }, - { 0x21001e57, 0x1400ffff }, - { 0x21001e58, 0x24000001 }, - { 0x21001e59, 0x1400ffff }, - { 0x21001e5a, 0x24000001 }, - { 0x21001e5b, 0x1400ffff }, - { 0x21001e5c, 0x24000001 }, - { 0x21001e5d, 0x1400ffff }, - { 0x21001e5e, 0x24000001 }, - { 0x21001e5f, 0x1400ffff }, - { 0x21001e60, 0x24000001 }, - { 0x21001e61, 0x1400ffff }, - { 0x21001e62, 0x24000001 }, - { 0x21001e63, 0x1400ffff }, - { 0x21001e64, 0x24000001 }, - { 0x21001e65, 0x1400ffff }, - { 0x21001e66, 0x24000001 }, - { 0x21001e67, 0x1400ffff }, - { 0x21001e68, 0x24000001 }, - { 0x21001e69, 0x1400ffff }, - { 0x21001e6a, 0x24000001 }, - { 0x21001e6b, 0x1400ffff }, - { 0x21001e6c, 0x24000001 }, - { 0x21001e6d, 0x1400ffff }, - { 0x21001e6e, 0x24000001 }, - { 0x21001e6f, 0x1400ffff }, - { 0x21001e70, 0x24000001 }, - { 0x21001e71, 0x1400ffff }, - { 0x21001e72, 0x24000001 }, - { 0x21001e73, 0x1400ffff }, - { 0x21001e74, 0x24000001 }, - { 0x21001e75, 0x1400ffff }, - { 0x21001e76, 0x24000001 }, - { 0x21001e77, 0x1400ffff }, - { 0x21001e78, 0x24000001 }, - { 0x21001e79, 0x1400ffff }, - { 0x21001e7a, 0x24000001 }, - { 0x21001e7b, 0x1400ffff }, - { 0x21001e7c, 0x24000001 }, - { 0x21001e7d, 0x1400ffff }, - { 0x21001e7e, 0x24000001 }, - { 0x21001e7f, 0x1400ffff }, - { 0x21001e80, 0x24000001 }, - { 0x21001e81, 0x1400ffff }, - { 0x21001e82, 0x24000001 }, - { 0x21001e83, 0x1400ffff }, - { 0x21001e84, 0x24000001 }, - { 0x21001e85, 0x1400ffff }, - { 0x21001e86, 0x24000001 }, - { 0x21001e87, 0x1400ffff }, - { 0x21001e88, 0x24000001 }, - { 0x21001e89, 0x1400ffff }, - { 0x21001e8a, 0x24000001 }, - { 0x21001e8b, 0x1400ffff }, - { 0x21001e8c, 0x24000001 }, - { 0x21001e8d, 0x1400ffff }, - { 0x21001e8e, 0x24000001 }, - { 0x21001e8f, 0x1400ffff }, - { 0x21001e90, 0x24000001 }, - { 0x21001e91, 0x1400ffff }, - { 0x21001e92, 0x24000001 }, - { 0x21001e93, 0x1400ffff }, - { 0x21001e94, 0x24000001 }, - { 0x21001e95, 0x1400ffff }, - { 0x21801e96, 0x14000004 }, - { 0x21001e9b, 0x1400ffc5 }, - { 0x21001ea0, 0x24000001 }, - { 0x21001ea1, 0x1400ffff }, - { 0x21001ea2, 0x24000001 }, - { 0x21001ea3, 0x1400ffff }, - { 0x21001ea4, 0x24000001 }, - { 0x21001ea5, 0x1400ffff }, - { 0x21001ea6, 0x24000001 }, - { 0x21001ea7, 0x1400ffff }, - { 0x21001ea8, 0x24000001 }, - { 0x21001ea9, 0x1400ffff }, - { 0x21001eaa, 0x24000001 }, - { 0x21001eab, 0x1400ffff }, - { 0x21001eac, 0x24000001 }, - { 0x21001ead, 0x1400ffff }, - { 0x21001eae, 0x24000001 }, - { 0x21001eaf, 0x1400ffff }, - { 0x21001eb0, 0x24000001 }, - { 0x21001eb1, 0x1400ffff }, - { 0x21001eb2, 0x24000001 }, - { 0x21001eb3, 0x1400ffff }, - { 0x21001eb4, 0x24000001 }, - { 0x21001eb5, 0x1400ffff }, - { 0x21001eb6, 0x24000001 }, - { 0x21001eb7, 0x1400ffff }, - { 0x21001eb8, 0x24000001 }, - { 0x21001eb9, 0x1400ffff }, - { 0x21001eba, 0x24000001 }, - { 0x21001ebb, 0x1400ffff }, - { 0x21001ebc, 0x24000001 }, - { 0x21001ebd, 0x1400ffff }, - { 0x21001ebe, 0x24000001 }, - { 0x21001ebf, 0x1400ffff }, - { 0x21001ec0, 0x24000001 }, - { 0x21001ec1, 0x1400ffff }, - { 0x21001ec2, 0x24000001 }, - { 0x21001ec3, 0x1400ffff }, - { 0x21001ec4, 0x24000001 }, - { 0x21001ec5, 0x1400ffff }, - { 0x21001ec6, 0x24000001 }, - { 0x21001ec7, 0x1400ffff }, - { 0x21001ec8, 0x24000001 }, - { 0x21001ec9, 0x1400ffff }, - { 0x21001eca, 0x24000001 }, - { 0x21001ecb, 0x1400ffff }, - { 0x21001ecc, 0x24000001 }, - { 0x21001ecd, 0x1400ffff }, - { 0x21001ece, 0x24000001 }, - { 0x21001ecf, 0x1400ffff }, - { 0x21001ed0, 0x24000001 }, - { 0x21001ed1, 0x1400ffff }, - { 0x21001ed2, 0x24000001 }, - { 0x21001ed3, 0x1400ffff }, - { 0x21001ed4, 0x24000001 }, - { 0x21001ed5, 0x1400ffff }, - { 0x21001ed6, 0x24000001 }, - { 0x21001ed7, 0x1400ffff }, - { 0x21001ed8, 0x24000001 }, - { 0x21001ed9, 0x1400ffff }, - { 0x21001eda, 0x24000001 }, - { 0x21001edb, 0x1400ffff }, - { 0x21001edc, 0x24000001 }, - { 0x21001edd, 0x1400ffff }, - { 0x21001ede, 0x24000001 }, - { 0x21001edf, 0x1400ffff }, - { 0x21001ee0, 0x24000001 }, - { 0x21001ee1, 0x1400ffff }, - { 0x21001ee2, 0x24000001 }, - { 0x21001ee3, 0x1400ffff }, - { 0x21001ee4, 0x24000001 }, - { 0x21001ee5, 0x1400ffff }, - { 0x21001ee6, 0x24000001 }, - { 0x21001ee7, 0x1400ffff }, - { 0x21001ee8, 0x24000001 }, - { 0x21001ee9, 0x1400ffff }, - { 0x21001eea, 0x24000001 }, - { 0x21001eeb, 0x1400ffff }, - { 0x21001eec, 0x24000001 }, - { 0x21001eed, 0x1400ffff }, - { 0x21001eee, 0x24000001 }, - { 0x21001eef, 0x1400ffff }, - { 0x21001ef0, 0x24000001 }, - { 0x21001ef1, 0x1400ffff }, - { 0x21001ef2, 0x24000001 }, - { 0x21001ef3, 0x1400ffff }, - { 0x21001ef4, 0x24000001 }, - { 0x21001ef5, 0x1400ffff }, - { 0x21001ef6, 0x24000001 }, - { 0x21001ef7, 0x1400ffff }, - { 0x21001ef8, 0x24000001 }, - { 0x21001ef9, 0x1400ffff }, - { 0x13001f00, 0x14000008 }, - { 0x13001f01, 0x14000008 }, - { 0x13001f02, 0x14000008 }, - { 0x13001f03, 0x14000008 }, - { 0x13001f04, 0x14000008 }, - { 0x13001f05, 0x14000008 }, - { 0x13001f06, 0x14000008 }, - { 0x13001f07, 0x14000008 }, - { 0x13001f08, 0x2400fff8 }, - { 0x13001f09, 0x2400fff8 }, - { 0x13001f0a, 0x2400fff8 }, - { 0x13001f0b, 0x2400fff8 }, - { 0x13001f0c, 0x2400fff8 }, - { 0x13001f0d, 0x2400fff8 }, - { 0x13001f0e, 0x2400fff8 }, - { 0x13001f0f, 0x2400fff8 }, - { 0x13001f10, 0x14000008 }, - { 0x13001f11, 0x14000008 }, - { 0x13001f12, 0x14000008 }, - { 0x13001f13, 0x14000008 }, - { 0x13001f14, 0x14000008 }, - { 0x13001f15, 0x14000008 }, - { 0x13001f18, 0x2400fff8 }, - { 0x13001f19, 0x2400fff8 }, - { 0x13001f1a, 0x2400fff8 }, - { 0x13001f1b, 0x2400fff8 }, - { 0x13001f1c, 0x2400fff8 }, - { 0x13001f1d, 0x2400fff8 }, - { 0x13001f20, 0x14000008 }, - { 0x13001f21, 0x14000008 }, - { 0x13001f22, 0x14000008 }, - { 0x13001f23, 0x14000008 }, - { 0x13001f24, 0x14000008 }, - { 0x13001f25, 0x14000008 }, - { 0x13001f26, 0x14000008 }, - { 0x13001f27, 0x14000008 }, - { 0x13001f28, 0x2400fff8 }, - { 0x13001f29, 0x2400fff8 }, - { 0x13001f2a, 0x2400fff8 }, - { 0x13001f2b, 0x2400fff8 }, - { 0x13001f2c, 0x2400fff8 }, - { 0x13001f2d, 0x2400fff8 }, - { 0x13001f2e, 0x2400fff8 }, - { 0x13001f2f, 0x2400fff8 }, - { 0x13001f30, 0x14000008 }, - { 0x13001f31, 0x14000008 }, - { 0x13001f32, 0x14000008 }, - { 0x13001f33, 0x14000008 }, - { 0x13001f34, 0x14000008 }, - { 0x13001f35, 0x14000008 }, - { 0x13001f36, 0x14000008 }, - { 0x13001f37, 0x14000008 }, - { 0x13001f38, 0x2400fff8 }, - { 0x13001f39, 0x2400fff8 }, - { 0x13001f3a, 0x2400fff8 }, - { 0x13001f3b, 0x2400fff8 }, - { 0x13001f3c, 0x2400fff8 }, - { 0x13001f3d, 0x2400fff8 }, - { 0x13001f3e, 0x2400fff8 }, - { 0x13001f3f, 0x2400fff8 }, - { 0x13001f40, 0x14000008 }, - { 0x13001f41, 0x14000008 }, - { 0x13001f42, 0x14000008 }, - { 0x13001f43, 0x14000008 }, - { 0x13001f44, 0x14000008 }, - { 0x13001f45, 0x14000008 }, - { 0x13001f48, 0x2400fff8 }, - { 0x13001f49, 0x2400fff8 }, - { 0x13001f4a, 0x2400fff8 }, - { 0x13001f4b, 0x2400fff8 }, - { 0x13001f4c, 0x2400fff8 }, - { 0x13001f4d, 0x2400fff8 }, - { 0x13001f50, 0x14000000 }, - { 0x13001f51, 0x14000008 }, - { 0x13001f52, 0x14000000 }, - { 0x13001f53, 0x14000008 }, - { 0x13001f54, 0x14000000 }, - { 0x13001f55, 0x14000008 }, - { 0x13001f56, 0x14000000 }, - { 0x13001f57, 0x14000008 }, - { 0x13001f59, 0x2400fff8 }, - { 0x13001f5b, 0x2400fff8 }, - { 0x13001f5d, 0x2400fff8 }, - { 0x13001f5f, 0x2400fff8 }, - { 0x13001f60, 0x14000008 }, - { 0x13001f61, 0x14000008 }, - { 0x13001f62, 0x14000008 }, - { 0x13001f63, 0x14000008 }, - { 0x13001f64, 0x14000008 }, - { 0x13001f65, 0x14000008 }, - { 0x13001f66, 0x14000008 }, - { 0x13001f67, 0x14000008 }, - { 0x13001f68, 0x2400fff8 }, - { 0x13001f69, 0x2400fff8 }, - { 0x13001f6a, 0x2400fff8 }, - { 0x13001f6b, 0x2400fff8 }, - { 0x13001f6c, 0x2400fff8 }, - { 0x13001f6d, 0x2400fff8 }, - { 0x13001f6e, 0x2400fff8 }, - { 0x13001f6f, 0x2400fff8 }, - { 0x13001f70, 0x1400004a }, - { 0x13001f71, 0x1400004a }, - { 0x13001f72, 0x14000056 }, - { 0x13001f73, 0x14000056 }, - { 0x13001f74, 0x14000056 }, - { 0x13001f75, 0x14000056 }, - { 0x13001f76, 0x14000064 }, - { 0x13001f77, 0x14000064 }, - { 0x13001f78, 0x14000080 }, - { 0x13001f79, 0x14000080 }, - { 0x13001f7a, 0x14000070 }, - { 0x13001f7b, 0x14000070 }, - { 0x13001f7c, 0x1400007e }, - { 0x13001f7d, 0x1400007e }, - { 0x13001f80, 0x14000008 }, - { 0x13001f81, 0x14000008 }, - { 0x13001f82, 0x14000008 }, - { 0x13001f83, 0x14000008 }, - { 0x13001f84, 0x14000008 }, - { 0x13001f85, 0x14000008 }, - { 0x13001f86, 0x14000008 }, - { 0x13001f87, 0x14000008 }, - { 0x13001f88, 0x2000fff8 }, - { 0x13001f89, 0x2000fff8 }, - { 0x13001f8a, 0x2000fff8 }, - { 0x13001f8b, 0x2000fff8 }, - { 0x13001f8c, 0x2000fff8 }, - { 0x13001f8d, 0x2000fff8 }, - { 0x13001f8e, 0x2000fff8 }, - { 0x13001f8f, 0x2000fff8 }, - { 0x13001f90, 0x14000008 }, - { 0x13001f91, 0x14000008 }, - { 0x13001f92, 0x14000008 }, - { 0x13001f93, 0x14000008 }, - { 0x13001f94, 0x14000008 }, - { 0x13001f95, 0x14000008 }, - { 0x13001f96, 0x14000008 }, - { 0x13001f97, 0x14000008 }, - { 0x13001f98, 0x2000fff8 }, - { 0x13001f99, 0x2000fff8 }, - { 0x13001f9a, 0x2000fff8 }, - { 0x13001f9b, 0x2000fff8 }, - { 0x13001f9c, 0x2000fff8 }, - { 0x13001f9d, 0x2000fff8 }, - { 0x13001f9e, 0x2000fff8 }, - { 0x13001f9f, 0x2000fff8 }, - { 0x13001fa0, 0x14000008 }, - { 0x13001fa1, 0x14000008 }, - { 0x13001fa2, 0x14000008 }, - { 0x13001fa3, 0x14000008 }, - { 0x13001fa4, 0x14000008 }, - { 0x13001fa5, 0x14000008 }, - { 0x13001fa6, 0x14000008 }, - { 0x13001fa7, 0x14000008 }, - { 0x13001fa8, 0x2000fff8 }, - { 0x13001fa9, 0x2000fff8 }, - { 0x13001faa, 0x2000fff8 }, - { 0x13001fab, 0x2000fff8 }, - { 0x13001fac, 0x2000fff8 }, - { 0x13001fad, 0x2000fff8 }, - { 0x13001fae, 0x2000fff8 }, - { 0x13001faf, 0x2000fff8 }, - { 0x13001fb0, 0x14000008 }, - { 0x13001fb1, 0x14000008 }, - { 0x13001fb2, 0x14000000 }, - { 0x13001fb3, 0x14000009 }, - { 0x13001fb4, 0x14000000 }, - { 0x13801fb6, 0x14000001 }, - { 0x13001fb8, 0x2400fff8 }, - { 0x13001fb9, 0x2400fff8 }, - { 0x13001fba, 0x2400ffb6 }, - { 0x13001fbb, 0x2400ffb6 }, - { 0x13001fbc, 0x2000fff7 }, - { 0x13001fbd, 0x60000000 }, - { 0x13001fbe, 0x1400e3db }, - { 0x13801fbf, 0x60000002 }, - { 0x13001fc2, 0x14000000 }, - { 0x13001fc3, 0x14000009 }, - { 0x13001fc4, 0x14000000 }, - { 0x13801fc6, 0x14000001 }, - { 0x13001fc8, 0x2400ffaa }, - { 0x13001fc9, 0x2400ffaa }, - { 0x13001fca, 0x2400ffaa }, - { 0x13001fcb, 0x2400ffaa }, - { 0x13001fcc, 0x2000fff7 }, - { 0x13801fcd, 0x60000002 }, - { 0x13001fd0, 0x14000008 }, - { 0x13001fd1, 0x14000008 }, - { 0x13801fd2, 0x14000001 }, - { 0x13801fd6, 0x14000001 }, - { 0x13001fd8, 0x2400fff8 }, - { 0x13001fd9, 0x2400fff8 }, - { 0x13001fda, 0x2400ff9c }, - { 0x13001fdb, 0x2400ff9c }, - { 0x13801fdd, 0x60000002 }, - { 0x13001fe0, 0x14000008 }, - { 0x13001fe1, 0x14000008 }, - { 0x13801fe2, 0x14000002 }, - { 0x13001fe5, 0x14000007 }, - { 0x13801fe6, 0x14000001 }, - { 0x13001fe8, 0x2400fff8 }, - { 0x13001fe9, 0x2400fff8 }, - { 0x13001fea, 0x2400ff90 }, - { 0x13001feb, 0x2400ff90 }, - { 0x13001fec, 0x2400fff9 }, - { 0x13801fed, 0x60000002 }, - { 0x13001ff2, 0x14000000 }, - { 0x13001ff3, 0x14000009 }, - { 0x13001ff4, 0x14000000 }, - { 0x13801ff6, 0x14000001 }, - { 0x13001ff8, 0x2400ff80 }, - { 0x13001ff9, 0x2400ff80 }, - { 0x13001ffa, 0x2400ff82 }, - { 0x13001ffb, 0x2400ff82 }, - { 0x13001ffc, 0x2000fff7 }, - { 0x13801ffd, 0x60000001 }, - { 0x09802000, 0x7400000a }, - { 0x0980200b, 0x04000004 }, - { 0x09802010, 0x44000005 }, - { 0x09802016, 0x54000001 }, - { 0x09002018, 0x50000000 }, - { 0x09002019, 0x4c000000 }, - { 0x0900201a, 0x58000000 }, - { 0x0980201b, 0x50000001 }, - { 0x0900201d, 0x4c000000 }, - { 0x0900201e, 0x58000000 }, - { 0x0900201f, 0x50000000 }, - { 0x09802020, 0x54000007 }, - { 0x09002028, 0x6c000000 }, - { 0x09002029, 0x70000000 }, - { 0x0980202a, 0x04000004 }, - { 0x0900202f, 0x74000000 }, - { 0x09802030, 0x54000008 }, - { 0x09002039, 0x50000000 }, - { 0x0900203a, 0x4c000000 }, - { 0x0980203b, 0x54000003 }, - { 0x0980203f, 0x40000001 }, - { 0x09802041, 0x54000002 }, - { 0x09002044, 0x64000000 }, - { 0x09002045, 0x58000000 }, - { 0x09002046, 0x48000000 }, - { 0x09802047, 0x5400000a }, - { 0x09002052, 0x64000000 }, - { 0x09002053, 0x54000000 }, - { 0x09002054, 0x40000000 }, - { 0x09802055, 0x54000009 }, - { 0x0900205f, 0x74000000 }, - { 0x09802060, 0x04000003 }, - { 0x0980206a, 0x04000005 }, - { 0x09002070, 0x3c000000 }, - { 0x21002071, 0x14000000 }, - { 0x09802074, 0x3c000005 }, - { 0x0980207a, 0x64000002 }, - { 0x0900207d, 0x58000000 }, - { 0x0900207e, 0x48000000 }, - { 0x2100207f, 0x14000000 }, - { 0x09802080, 0x3c000009 }, - { 0x0980208a, 0x64000002 }, - { 0x0900208d, 0x58000000 }, - { 0x0900208e, 0x48000000 }, - { 0x21802090, 0x18000004 }, - { 0x098020a0, 0x5c000015 }, - { 0x1b8020d0, 0x3000000c }, - { 0x1b8020dd, 0x2c000003 }, - { 0x1b0020e1, 0x30000000 }, - { 0x1b8020e2, 0x2c000002 }, - { 0x1b8020e5, 0x30000006 }, - { 0x09802100, 0x68000001 }, - { 0x09002102, 0x24000000 }, - { 0x09802103, 0x68000003 }, - { 0x09002107, 0x24000000 }, - { 0x09802108, 0x68000001 }, - { 0x0900210a, 0x14000000 }, - { 0x0980210b, 0x24000002 }, - { 0x0980210e, 0x14000001 }, - { 0x09802110, 0x24000002 }, - { 0x09002113, 0x14000000 }, - { 0x09002114, 0x68000000 }, - { 0x09002115, 0x24000000 }, - { 0x09802116, 0x68000002 }, - { 0x09802119, 0x24000004 }, - { 0x0980211e, 0x68000005 }, - { 0x09002124, 0x24000000 }, - { 0x09002125, 0x68000000 }, - { 0x13002126, 0x2400e2a3 }, - { 0x09002127, 0x68000000 }, - { 0x09002128, 0x24000000 }, - { 0x09002129, 0x68000000 }, - { 0x2100212a, 0x2400df41 }, - { 0x2100212b, 0x2400dfba }, - { 0x0980212c, 0x24000001 }, - { 0x0900212e, 0x68000000 }, - { 0x0900212f, 0x14000000 }, - { 0x09802130, 0x24000001 }, - { 0x09002132, 0x68000000 }, - { 0x09002133, 0x24000000 }, - { 0x09002134, 0x14000000 }, - { 0x09802135, 0x1c000003 }, - { 0x09002139, 0x14000000 }, - { 0x0980213a, 0x68000001 }, - { 0x0980213c, 0x14000001 }, - { 0x0980213e, 0x24000001 }, - { 0x09802140, 0x64000004 }, - { 0x09002145, 0x24000000 }, - { 0x09802146, 0x14000003 }, - { 0x0900214a, 0x68000000 }, - { 0x0900214b, 0x64000000 }, - { 0x0900214c, 0x68000000 }, - { 0x09802153, 0x3c00000c }, - { 0x09002160, 0x38000010 }, - { 0x09002161, 0x38000010 }, - { 0x09002162, 0x38000010 }, - { 0x09002163, 0x38000010 }, - { 0x09002164, 0x38000010 }, - { 0x09002165, 0x38000010 }, - { 0x09002166, 0x38000010 }, - { 0x09002167, 0x38000010 }, - { 0x09002168, 0x38000010 }, - { 0x09002169, 0x38000010 }, - { 0x0900216a, 0x38000010 }, - { 0x0900216b, 0x38000010 }, - { 0x0900216c, 0x38000010 }, - { 0x0900216d, 0x38000010 }, - { 0x0900216e, 0x38000010 }, - { 0x0900216f, 0x38000010 }, - { 0x09002170, 0x3800fff0 }, - { 0x09002171, 0x3800fff0 }, - { 0x09002172, 0x3800fff0 }, - { 0x09002173, 0x3800fff0 }, - { 0x09002174, 0x3800fff0 }, - { 0x09002175, 0x3800fff0 }, - { 0x09002176, 0x3800fff0 }, - { 0x09002177, 0x3800fff0 }, - { 0x09002178, 0x3800fff0 }, - { 0x09002179, 0x3800fff0 }, - { 0x0900217a, 0x3800fff0 }, - { 0x0900217b, 0x3800fff0 }, - { 0x0900217c, 0x3800fff0 }, - { 0x0900217d, 0x3800fff0 }, - { 0x0900217e, 0x3800fff0 }, - { 0x0900217f, 0x3800fff0 }, - { 0x09802180, 0x38000003 }, - { 0x09802190, 0x64000004 }, - { 0x09802195, 0x68000004 }, - { 0x0980219a, 0x64000001 }, - { 0x0980219c, 0x68000003 }, - { 0x090021a0, 0x64000000 }, - { 0x098021a1, 0x68000001 }, - { 0x090021a3, 0x64000000 }, - { 0x098021a4, 0x68000001 }, - { 0x090021a6, 0x64000000 }, - { 0x098021a7, 0x68000006 }, - { 0x090021ae, 0x64000000 }, - { 0x098021af, 0x6800001e }, - { 0x098021ce, 0x64000001 }, - { 0x098021d0, 0x68000001 }, - { 0x090021d2, 0x64000000 }, - { 0x090021d3, 0x68000000 }, - { 0x090021d4, 0x64000000 }, - { 0x098021d5, 0x6800001e }, - { 0x098021f4, 0x6400010b }, - { 0x09802300, 0x68000007 }, - { 0x09802308, 0x64000003 }, - { 0x0980230c, 0x68000013 }, - { 0x09802320, 0x64000001 }, - { 0x09802322, 0x68000006 }, - { 0x09002329, 0x58000000 }, - { 0x0900232a, 0x48000000 }, - { 0x0980232b, 0x68000050 }, - { 0x0900237c, 0x64000000 }, - { 0x0980237d, 0x6800001d }, - { 0x0980239b, 0x64000018 }, - { 0x090023b4, 0x58000000 }, - { 0x090023b5, 0x48000000 }, - { 0x090023b6, 0x54000000 }, - { 0x098023b7, 0x68000024 }, - { 0x09802400, 0x68000026 }, - { 0x09802440, 0x6800000a }, - { 0x09802460, 0x3c00003b }, - { 0x0980249c, 0x68000019 }, - { 0x090024b6, 0x6800001a }, - { 0x090024b7, 0x6800001a }, - { 0x090024b8, 0x6800001a }, - { 0x090024b9, 0x6800001a }, - { 0x090024ba, 0x6800001a }, - { 0x090024bb, 0x6800001a }, - { 0x090024bc, 0x6800001a }, - { 0x090024bd, 0x6800001a }, - { 0x090024be, 0x6800001a }, - { 0x090024bf, 0x6800001a }, - { 0x090024c0, 0x6800001a }, - { 0x090024c1, 0x6800001a }, - { 0x090024c2, 0x6800001a }, - { 0x090024c3, 0x6800001a }, - { 0x090024c4, 0x6800001a }, - { 0x090024c5, 0x6800001a }, - { 0x090024c6, 0x6800001a }, - { 0x090024c7, 0x6800001a }, - { 0x090024c8, 0x6800001a }, - { 0x090024c9, 0x6800001a }, - { 0x090024ca, 0x6800001a }, - { 0x090024cb, 0x6800001a }, - { 0x090024cc, 0x6800001a }, - { 0x090024cd, 0x6800001a }, - { 0x090024ce, 0x6800001a }, - { 0x090024cf, 0x6800001a }, - { 0x090024d0, 0x6800ffe6 }, - { 0x090024d1, 0x6800ffe6 }, - { 0x090024d2, 0x6800ffe6 }, - { 0x090024d3, 0x6800ffe6 }, - { 0x090024d4, 0x6800ffe6 }, - { 0x090024d5, 0x6800ffe6 }, - { 0x090024d6, 0x6800ffe6 }, - { 0x090024d7, 0x6800ffe6 }, - { 0x090024d8, 0x6800ffe6 }, - { 0x090024d9, 0x6800ffe6 }, - { 0x090024da, 0x6800ffe6 }, - { 0x090024db, 0x6800ffe6 }, - { 0x090024dc, 0x6800ffe6 }, - { 0x090024dd, 0x6800ffe6 }, - { 0x090024de, 0x6800ffe6 }, - { 0x090024df, 0x6800ffe6 }, - { 0x090024e0, 0x6800ffe6 }, - { 0x090024e1, 0x6800ffe6 }, - { 0x090024e2, 0x6800ffe6 }, - { 0x090024e3, 0x6800ffe6 }, - { 0x090024e4, 0x6800ffe6 }, - { 0x090024e5, 0x6800ffe6 }, - { 0x090024e6, 0x6800ffe6 }, - { 0x090024e7, 0x6800ffe6 }, - { 0x090024e8, 0x6800ffe6 }, - { 0x090024e9, 0x6800ffe6 }, - { 0x098024ea, 0x3c000015 }, - { 0x09802500, 0x680000b6 }, - { 0x090025b7, 0x64000000 }, - { 0x098025b8, 0x68000008 }, - { 0x090025c1, 0x64000000 }, - { 0x098025c2, 0x68000035 }, - { 0x098025f8, 0x64000007 }, - { 0x09802600, 0x6800006e }, - { 0x0900266f, 0x64000000 }, - { 0x09802670, 0x6800002c }, - { 0x098026a0, 0x68000011 }, - { 0x09802701, 0x68000003 }, - { 0x09802706, 0x68000003 }, - { 0x0980270c, 0x6800001b }, - { 0x09802729, 0x68000022 }, - { 0x0900274d, 0x68000000 }, - { 0x0980274f, 0x68000003 }, - { 0x09002756, 0x68000000 }, - { 0x09802758, 0x68000006 }, - { 0x09802761, 0x68000006 }, - { 0x09002768, 0x58000000 }, - { 0x09002769, 0x48000000 }, - { 0x0900276a, 0x58000000 }, - { 0x0900276b, 0x48000000 }, - { 0x0900276c, 0x58000000 }, - { 0x0900276d, 0x48000000 }, - { 0x0900276e, 0x58000000 }, - { 0x0900276f, 0x48000000 }, - { 0x09002770, 0x58000000 }, - { 0x09002771, 0x48000000 }, - { 0x09002772, 0x58000000 }, - { 0x09002773, 0x48000000 }, - { 0x09002774, 0x58000000 }, - { 0x09002775, 0x48000000 }, - { 0x09802776, 0x3c00001d }, - { 0x09002794, 0x68000000 }, - { 0x09802798, 0x68000017 }, - { 0x098027b1, 0x6800000d }, - { 0x098027c0, 0x64000004 }, - { 0x090027c5, 0x58000000 }, - { 0x090027c6, 0x48000000 }, - { 0x098027d0, 0x64000015 }, - { 0x090027e6, 0x58000000 }, - { 0x090027e7, 0x48000000 }, - { 0x090027e8, 0x58000000 }, - { 0x090027e9, 0x48000000 }, - { 0x090027ea, 0x58000000 }, - { 0x090027eb, 0x48000000 }, - { 0x098027f0, 0x6400000f }, - { 0x04802800, 0x680000ff }, - { 0x09802900, 0x64000082 }, - { 0x09002983, 0x58000000 }, - { 0x09002984, 0x48000000 }, - { 0x09002985, 0x58000000 }, - { 0x09002986, 0x48000000 }, - { 0x09002987, 0x58000000 }, - { 0x09002988, 0x48000000 }, - { 0x09002989, 0x58000000 }, - { 0x0900298a, 0x48000000 }, - { 0x0900298b, 0x58000000 }, - { 0x0900298c, 0x48000000 }, - { 0x0900298d, 0x58000000 }, - { 0x0900298e, 0x48000000 }, - { 0x0900298f, 0x58000000 }, - { 0x09002990, 0x48000000 }, - { 0x09002991, 0x58000000 }, - { 0x09002992, 0x48000000 }, - { 0x09002993, 0x58000000 }, - { 0x09002994, 0x48000000 }, - { 0x09002995, 0x58000000 }, - { 0x09002996, 0x48000000 }, - { 0x09002997, 0x58000000 }, - { 0x09002998, 0x48000000 }, - { 0x09802999, 0x6400003e }, - { 0x090029d8, 0x58000000 }, - { 0x090029d9, 0x48000000 }, - { 0x090029da, 0x58000000 }, - { 0x090029db, 0x48000000 }, - { 0x098029dc, 0x6400001f }, - { 0x090029fc, 0x58000000 }, - { 0x090029fd, 0x48000000 }, - { 0x098029fe, 0x64000101 }, - { 0x09802b00, 0x68000013 }, - { 0x11002c00, 0x24000030 }, - { 0x11002c01, 0x24000030 }, - { 0x11002c02, 0x24000030 }, - { 0x11002c03, 0x24000030 }, - { 0x11002c04, 0x24000030 }, - { 0x11002c05, 0x24000030 }, - { 0x11002c06, 0x24000030 }, - { 0x11002c07, 0x24000030 }, - { 0x11002c08, 0x24000030 }, - { 0x11002c09, 0x24000030 }, - { 0x11002c0a, 0x24000030 }, - { 0x11002c0b, 0x24000030 }, - { 0x11002c0c, 0x24000030 }, - { 0x11002c0d, 0x24000030 }, - { 0x11002c0e, 0x24000030 }, - { 0x11002c0f, 0x24000030 }, - { 0x11002c10, 0x24000030 }, - { 0x11002c11, 0x24000030 }, - { 0x11002c12, 0x24000030 }, - { 0x11002c13, 0x24000030 }, - { 0x11002c14, 0x24000030 }, - { 0x11002c15, 0x24000030 }, - { 0x11002c16, 0x24000030 }, - { 0x11002c17, 0x24000030 }, - { 0x11002c18, 0x24000030 }, - { 0x11002c19, 0x24000030 }, - { 0x11002c1a, 0x24000030 }, - { 0x11002c1b, 0x24000030 }, - { 0x11002c1c, 0x24000030 }, - { 0x11002c1d, 0x24000030 }, - { 0x11002c1e, 0x24000030 }, - { 0x11002c1f, 0x24000030 }, - { 0x11002c20, 0x24000030 }, - { 0x11002c21, 0x24000030 }, - { 0x11002c22, 0x24000030 }, - { 0x11002c23, 0x24000030 }, - { 0x11002c24, 0x24000030 }, - { 0x11002c25, 0x24000030 }, - { 0x11002c26, 0x24000030 }, - { 0x11002c27, 0x24000030 }, - { 0x11002c28, 0x24000030 }, - { 0x11002c29, 0x24000030 }, - { 0x11002c2a, 0x24000030 }, - { 0x11002c2b, 0x24000030 }, - { 0x11002c2c, 0x24000030 }, - { 0x11002c2d, 0x24000030 }, - { 0x11002c2e, 0x24000030 }, - { 0x11002c30, 0x1400ffd0 }, - { 0x11002c31, 0x1400ffd0 }, - { 0x11002c32, 0x1400ffd0 }, - { 0x11002c33, 0x1400ffd0 }, - { 0x11002c34, 0x1400ffd0 }, - { 0x11002c35, 0x1400ffd0 }, - { 0x11002c36, 0x1400ffd0 }, - { 0x11002c37, 0x1400ffd0 }, - { 0x11002c38, 0x1400ffd0 }, - { 0x11002c39, 0x1400ffd0 }, - { 0x11002c3a, 0x1400ffd0 }, - { 0x11002c3b, 0x1400ffd0 }, - { 0x11002c3c, 0x1400ffd0 }, - { 0x11002c3d, 0x1400ffd0 }, - { 0x11002c3e, 0x1400ffd0 }, - { 0x11002c3f, 0x1400ffd0 }, - { 0x11002c40, 0x1400ffd0 }, - { 0x11002c41, 0x1400ffd0 }, - { 0x11002c42, 0x1400ffd0 }, - { 0x11002c43, 0x1400ffd0 }, - { 0x11002c44, 0x1400ffd0 }, - { 0x11002c45, 0x1400ffd0 }, - { 0x11002c46, 0x1400ffd0 }, - { 0x11002c47, 0x1400ffd0 }, - { 0x11002c48, 0x1400ffd0 }, - { 0x11002c49, 0x1400ffd0 }, - { 0x11002c4a, 0x1400ffd0 }, - { 0x11002c4b, 0x1400ffd0 }, - { 0x11002c4c, 0x1400ffd0 }, - { 0x11002c4d, 0x1400ffd0 }, - { 0x11002c4e, 0x1400ffd0 }, - { 0x11002c4f, 0x1400ffd0 }, - { 0x11002c50, 0x1400ffd0 }, - { 0x11002c51, 0x1400ffd0 }, - { 0x11002c52, 0x1400ffd0 }, - { 0x11002c53, 0x1400ffd0 }, - { 0x11002c54, 0x1400ffd0 }, - { 0x11002c55, 0x1400ffd0 }, - { 0x11002c56, 0x1400ffd0 }, - { 0x11002c57, 0x1400ffd0 }, - { 0x11002c58, 0x1400ffd0 }, - { 0x11002c59, 0x1400ffd0 }, - { 0x11002c5a, 0x1400ffd0 }, - { 0x11002c5b, 0x1400ffd0 }, - { 0x11002c5c, 0x1400ffd0 }, - { 0x11002c5d, 0x1400ffd0 }, - { 0x11002c5e, 0x1400ffd0 }, - { 0x0a002c80, 0x24000001 }, - { 0x0a002c81, 0x1400ffff }, - { 0x0a002c82, 0x24000001 }, - { 0x0a002c83, 0x1400ffff }, - { 0x0a002c84, 0x24000001 }, - { 0x0a002c85, 0x1400ffff }, - { 0x0a002c86, 0x24000001 }, - { 0x0a002c87, 0x1400ffff }, - { 0x0a002c88, 0x24000001 }, - { 0x0a002c89, 0x1400ffff }, - { 0x0a002c8a, 0x24000001 }, - { 0x0a002c8b, 0x1400ffff }, - { 0x0a002c8c, 0x24000001 }, - { 0x0a002c8d, 0x1400ffff }, - { 0x0a002c8e, 0x24000001 }, - { 0x0a002c8f, 0x1400ffff }, - { 0x0a002c90, 0x24000001 }, - { 0x0a002c91, 0x1400ffff }, - { 0x0a002c92, 0x24000001 }, - { 0x0a002c93, 0x1400ffff }, - { 0x0a002c94, 0x24000001 }, - { 0x0a002c95, 0x1400ffff }, - { 0x0a002c96, 0x24000001 }, - { 0x0a002c97, 0x1400ffff }, - { 0x0a002c98, 0x24000001 }, - { 0x0a002c99, 0x1400ffff }, - { 0x0a002c9a, 0x24000001 }, - { 0x0a002c9b, 0x1400ffff }, - { 0x0a002c9c, 0x24000001 }, - { 0x0a002c9d, 0x1400ffff }, - { 0x0a002c9e, 0x24000001 }, - { 0x0a002c9f, 0x1400ffff }, - { 0x0a002ca0, 0x24000001 }, - { 0x0a002ca1, 0x1400ffff }, - { 0x0a002ca2, 0x24000001 }, - { 0x0a002ca3, 0x1400ffff }, - { 0x0a002ca4, 0x24000001 }, - { 0x0a002ca5, 0x1400ffff }, - { 0x0a002ca6, 0x24000001 }, - { 0x0a002ca7, 0x1400ffff }, - { 0x0a002ca8, 0x24000001 }, - { 0x0a002ca9, 0x1400ffff }, - { 0x0a002caa, 0x24000001 }, - { 0x0a002cab, 0x1400ffff }, - { 0x0a002cac, 0x24000001 }, - { 0x0a002cad, 0x1400ffff }, - { 0x0a002cae, 0x24000001 }, - { 0x0a002caf, 0x1400ffff }, - { 0x0a002cb0, 0x24000001 }, - { 0x0a002cb1, 0x1400ffff }, - { 0x0a002cb2, 0x24000001 }, - { 0x0a002cb3, 0x1400ffff }, - { 0x0a002cb4, 0x24000001 }, - { 0x0a002cb5, 0x1400ffff }, - { 0x0a002cb6, 0x24000001 }, - { 0x0a002cb7, 0x1400ffff }, - { 0x0a002cb8, 0x24000001 }, - { 0x0a002cb9, 0x1400ffff }, - { 0x0a002cba, 0x24000001 }, - { 0x0a002cbb, 0x1400ffff }, - { 0x0a002cbc, 0x24000001 }, - { 0x0a002cbd, 0x1400ffff }, - { 0x0a002cbe, 0x24000001 }, - { 0x0a002cbf, 0x1400ffff }, - { 0x0a002cc0, 0x24000001 }, - { 0x0a002cc1, 0x1400ffff }, - { 0x0a002cc2, 0x24000001 }, - { 0x0a002cc3, 0x1400ffff }, - { 0x0a002cc4, 0x24000001 }, - { 0x0a002cc5, 0x1400ffff }, - { 0x0a002cc6, 0x24000001 }, - { 0x0a002cc7, 0x1400ffff }, - { 0x0a002cc8, 0x24000001 }, - { 0x0a002cc9, 0x1400ffff }, - { 0x0a002cca, 0x24000001 }, - { 0x0a002ccb, 0x1400ffff }, - { 0x0a002ccc, 0x24000001 }, - { 0x0a002ccd, 0x1400ffff }, - { 0x0a002cce, 0x24000001 }, - { 0x0a002ccf, 0x1400ffff }, - { 0x0a002cd0, 0x24000001 }, - { 0x0a002cd1, 0x1400ffff }, - { 0x0a002cd2, 0x24000001 }, - { 0x0a002cd3, 0x1400ffff }, - { 0x0a002cd4, 0x24000001 }, - { 0x0a002cd5, 0x1400ffff }, - { 0x0a002cd6, 0x24000001 }, - { 0x0a002cd7, 0x1400ffff }, - { 0x0a002cd8, 0x24000001 }, - { 0x0a002cd9, 0x1400ffff }, - { 0x0a002cda, 0x24000001 }, - { 0x0a002cdb, 0x1400ffff }, - { 0x0a002cdc, 0x24000001 }, - { 0x0a002cdd, 0x1400ffff }, - { 0x0a002cde, 0x24000001 }, - { 0x0a002cdf, 0x1400ffff }, - { 0x0a002ce0, 0x24000001 }, - { 0x0a002ce1, 0x1400ffff }, - { 0x0a002ce2, 0x24000001 }, - { 0x0a002ce3, 0x1400ffff }, - { 0x0a002ce4, 0x14000000 }, - { 0x0a802ce5, 0x68000005 }, - { 0x0a802cf9, 0x54000003 }, - { 0x0a002cfd, 0x3c000000 }, - { 0x0a802cfe, 0x54000001 }, - { 0x10002d00, 0x1400e3a0 }, - { 0x10002d01, 0x1400e3a0 }, - { 0x10002d02, 0x1400e3a0 }, - { 0x10002d03, 0x1400e3a0 }, - { 0x10002d04, 0x1400e3a0 }, - { 0x10002d05, 0x1400e3a0 }, - { 0x10002d06, 0x1400e3a0 }, - { 0x10002d07, 0x1400e3a0 }, - { 0x10002d08, 0x1400e3a0 }, - { 0x10002d09, 0x1400e3a0 }, - { 0x10002d0a, 0x1400e3a0 }, - { 0x10002d0b, 0x1400e3a0 }, - { 0x10002d0c, 0x1400e3a0 }, - { 0x10002d0d, 0x1400e3a0 }, - { 0x10002d0e, 0x1400e3a0 }, - { 0x10002d0f, 0x1400e3a0 }, - { 0x10002d10, 0x1400e3a0 }, - { 0x10002d11, 0x1400e3a0 }, - { 0x10002d12, 0x1400e3a0 }, - { 0x10002d13, 0x1400e3a0 }, - { 0x10002d14, 0x1400e3a0 }, - { 0x10002d15, 0x1400e3a0 }, - { 0x10002d16, 0x1400e3a0 }, - { 0x10002d17, 0x1400e3a0 }, - { 0x10002d18, 0x1400e3a0 }, - { 0x10002d19, 0x1400e3a0 }, - { 0x10002d1a, 0x1400e3a0 }, - { 0x10002d1b, 0x1400e3a0 }, - { 0x10002d1c, 0x1400e3a0 }, - { 0x10002d1d, 0x1400e3a0 }, - { 0x10002d1e, 0x1400e3a0 }, - { 0x10002d1f, 0x1400e3a0 }, - { 0x10002d20, 0x1400e3a0 }, - { 0x10002d21, 0x1400e3a0 }, - { 0x10002d22, 0x1400e3a0 }, - { 0x10002d23, 0x1400e3a0 }, - { 0x10002d24, 0x1400e3a0 }, - { 0x10002d25, 0x1400e3a0 }, - { 0x3a802d30, 0x1c000035 }, - { 0x3a002d6f, 0x18000000 }, - { 0x0f802d80, 0x1c000016 }, - { 0x0f802da0, 0x1c000006 }, - { 0x0f802da8, 0x1c000006 }, - { 0x0f802db0, 0x1c000006 }, - { 0x0f802db8, 0x1c000006 }, - { 0x0f802dc0, 0x1c000006 }, - { 0x0f802dc8, 0x1c000006 }, - { 0x0f802dd0, 0x1c000006 }, - { 0x0f802dd8, 0x1c000006 }, - { 0x09802e00, 0x54000001 }, - { 0x09002e02, 0x50000000 }, - { 0x09002e03, 0x4c000000 }, - { 0x09002e04, 0x50000000 }, - { 0x09002e05, 0x4c000000 }, - { 0x09802e06, 0x54000002 }, - { 0x09002e09, 0x50000000 }, - { 0x09002e0a, 0x4c000000 }, - { 0x09002e0b, 0x54000000 }, - { 0x09002e0c, 0x50000000 }, - { 0x09002e0d, 0x4c000000 }, - { 0x09802e0e, 0x54000008 }, - { 0x09002e17, 0x44000000 }, - { 0x09002e1c, 0x50000000 }, - { 0x09002e1d, 0x4c000000 }, - { 0x16802e80, 0x68000019 }, - { 0x16802e9b, 0x68000058 }, - { 0x16802f00, 0x680000d5 }, - { 0x09802ff0, 0x6800000b }, - { 0x09003000, 0x74000000 }, - { 0x09803001, 0x54000002 }, - { 0x09003004, 0x68000000 }, - { 0x16003005, 0x18000000 }, - { 0x09003006, 0x1c000000 }, - { 0x16003007, 0x38000000 }, - { 0x09003008, 0x58000000 }, - { 0x09003009, 0x48000000 }, - { 0x0900300a, 0x58000000 }, - { 0x0900300b, 0x48000000 }, - { 0x0900300c, 0x58000000 }, - { 0x0900300d, 0x48000000 }, - { 0x0900300e, 0x58000000 }, - { 0x0900300f, 0x48000000 }, - { 0x09003010, 0x58000000 }, - { 0x09003011, 0x48000000 }, - { 0x09803012, 0x68000001 }, - { 0x09003014, 0x58000000 }, - { 0x09003015, 0x48000000 }, - { 0x09003016, 0x58000000 }, - { 0x09003017, 0x48000000 }, - { 0x09003018, 0x58000000 }, - { 0x09003019, 0x48000000 }, - { 0x0900301a, 0x58000000 }, - { 0x0900301b, 0x48000000 }, - { 0x0900301c, 0x44000000 }, - { 0x0900301d, 0x58000000 }, - { 0x0980301e, 0x48000001 }, - { 0x09003020, 0x68000000 }, - { 0x16803021, 0x38000008 }, - { 0x1b80302a, 0x30000005 }, - { 0x09003030, 0x44000000 }, - { 0x09803031, 0x18000004 }, - { 0x09803036, 0x68000001 }, - { 0x16803038, 0x38000002 }, - { 0x1600303b, 0x18000000 }, - { 0x0900303c, 0x1c000000 }, - { 0x0900303d, 0x54000000 }, - { 0x0980303e, 0x68000001 }, - { 0x1a803041, 0x1c000055 }, - { 0x1b803099, 0x30000001 }, - { 0x0980309b, 0x60000001 }, - { 0x1a80309d, 0x18000001 }, - { 0x1a00309f, 0x1c000000 }, - { 0x090030a0, 0x44000000 }, - { 0x1d8030a1, 0x1c000059 }, - { 0x090030fb, 0x54000000 }, - { 0x098030fc, 0x18000002 }, - { 0x1d0030ff, 0x1c000000 }, - { 0x03803105, 0x1c000027 }, - { 0x17803131, 0x1c00005d }, - { 0x09803190, 0x68000001 }, - { 0x09803192, 0x3c000003 }, - { 0x09803196, 0x68000009 }, - { 0x038031a0, 0x1c000017 }, - { 0x098031c0, 0x6800000f }, - { 0x1d8031f0, 0x1c00000f }, - { 0x17803200, 0x6800001e }, - { 0x09803220, 0x3c000009 }, - { 0x0980322a, 0x68000019 }, - { 0x09003250, 0x68000000 }, - { 0x09803251, 0x3c00000e }, - { 0x17803260, 0x6800001f }, - { 0x09803280, 0x3c000009 }, - { 0x0980328a, 0x68000026 }, - { 0x098032b1, 0x3c00000e }, - { 0x098032c0, 0x6800003e }, - { 0x09803300, 0x680000ff }, - { 0x16803400, 0x1c0019b5 }, - { 0x09804dc0, 0x6800003f }, - { 0x16804e00, 0x1c0051bb }, - { 0x3c80a000, 0x1c000014 }, - { 0x3c00a015, 0x18000000 }, - { 0x3c80a016, 0x1c000476 }, - { 0x3c80a490, 0x68000036 }, - { 0x0980a700, 0x60000016 }, - { 0x3080a800, 0x1c000001 }, - { 0x3000a802, 0x28000000 }, - { 0x3080a803, 0x1c000002 }, - { 0x3000a806, 0x30000000 }, - { 0x3080a807, 0x1c000003 }, - { 0x3000a80b, 0x30000000 }, - { 0x3080a80c, 0x1c000016 }, - { 0x3080a823, 0x28000001 }, - { 0x3080a825, 0x30000001 }, - { 0x3000a827, 0x28000000 }, - { 0x3080a828, 0x68000003 }, - { 0x1780ac00, 0x1c002ba3 }, - { 0x0980d800, 0x1000037f }, - { 0x0980db80, 0x1000007f }, - { 0x0980dc00, 0x100003ff }, - { 0x0980e000, 0x0c0018ff }, - { 0x1680f900, 0x1c00012d }, - { 0x1680fa30, 0x1c00003a }, - { 0x1680fa70, 0x1c000069 }, - { 0x2180fb00, 0x14000006 }, - { 0x0180fb13, 0x14000004 }, - { 0x1900fb1d, 0x1c000000 }, - { 0x1900fb1e, 0x30000000 }, - { 0x1980fb1f, 0x1c000009 }, - { 0x1900fb29, 0x64000000 }, - { 0x1980fb2a, 0x1c00000c }, - { 0x1980fb38, 0x1c000004 }, - { 0x1900fb3e, 0x1c000000 }, - { 0x1980fb40, 0x1c000001 }, - { 0x1980fb43, 0x1c000001 }, - { 0x1980fb46, 0x1c00006b }, - { 0x0080fbd3, 0x1c00016a }, - { 0x0900fd3e, 0x58000000 }, - { 0x0900fd3f, 0x48000000 }, - { 0x0080fd50, 0x1c00003f }, - { 0x0080fd92, 0x1c000035 }, - { 0x0080fdf0, 0x1c00000b }, - { 0x0000fdfc, 0x5c000000 }, - { 0x0900fdfd, 0x68000000 }, - { 0x1b80fe00, 0x3000000f }, - { 0x0980fe10, 0x54000006 }, - { 0x0900fe17, 0x58000000 }, - { 0x0900fe18, 0x48000000 }, - { 0x0900fe19, 0x54000000 }, - { 0x1b80fe20, 0x30000003 }, - { 0x0900fe30, 0x54000000 }, - { 0x0980fe31, 0x44000001 }, - { 0x0980fe33, 0x40000001 }, - { 0x0900fe35, 0x58000000 }, - { 0x0900fe36, 0x48000000 }, - { 0x0900fe37, 0x58000000 }, - { 0x0900fe38, 0x48000000 }, - { 0x0900fe39, 0x58000000 }, - { 0x0900fe3a, 0x48000000 }, - { 0x0900fe3b, 0x58000000 }, - { 0x0900fe3c, 0x48000000 }, - { 0x0900fe3d, 0x58000000 }, - { 0x0900fe3e, 0x48000000 }, - { 0x0900fe3f, 0x58000000 }, - { 0x0900fe40, 0x48000000 }, - { 0x0900fe41, 0x58000000 }, - { 0x0900fe42, 0x48000000 }, - { 0x0900fe43, 0x58000000 }, - { 0x0900fe44, 0x48000000 }, - { 0x0980fe45, 0x54000001 }, - { 0x0900fe47, 0x58000000 }, - { 0x0900fe48, 0x48000000 }, - { 0x0980fe49, 0x54000003 }, - { 0x0980fe4d, 0x40000002 }, - { 0x0980fe50, 0x54000002 }, - { 0x0980fe54, 0x54000003 }, - { 0x0900fe58, 0x44000000 }, - { 0x0900fe59, 0x58000000 }, - { 0x0900fe5a, 0x48000000 }, - { 0x0900fe5b, 0x58000000 }, - { 0x0900fe5c, 0x48000000 }, - { 0x0900fe5d, 0x58000000 }, - { 0x0900fe5e, 0x48000000 }, - { 0x0980fe5f, 0x54000002 }, - { 0x0900fe62, 0x64000000 }, - { 0x0900fe63, 0x44000000 }, - { 0x0980fe64, 0x64000002 }, - { 0x0900fe68, 0x54000000 }, - { 0x0900fe69, 0x5c000000 }, - { 0x0980fe6a, 0x54000001 }, - { 0x0080fe70, 0x1c000004 }, - { 0x0080fe76, 0x1c000086 }, - { 0x0900feff, 0x04000000 }, - { 0x0980ff01, 0x54000002 }, - { 0x0900ff04, 0x5c000000 }, - { 0x0980ff05, 0x54000002 }, - { 0x0900ff08, 0x58000000 }, - { 0x0900ff09, 0x48000000 }, - { 0x0900ff0a, 0x54000000 }, - { 0x0900ff0b, 0x64000000 }, - { 0x0900ff0c, 0x54000000 }, - { 0x0900ff0d, 0x44000000 }, - { 0x0980ff0e, 0x54000001 }, - { 0x0980ff10, 0x34000009 }, - { 0x0980ff1a, 0x54000001 }, - { 0x0980ff1c, 0x64000002 }, - { 0x0980ff1f, 0x54000001 }, - { 0x2100ff21, 0x24000020 }, - { 0x2100ff22, 0x24000020 }, - { 0x2100ff23, 0x24000020 }, - { 0x2100ff24, 0x24000020 }, - { 0x2100ff25, 0x24000020 }, - { 0x2100ff26, 0x24000020 }, - { 0x2100ff27, 0x24000020 }, - { 0x2100ff28, 0x24000020 }, - { 0x2100ff29, 0x24000020 }, - { 0x2100ff2a, 0x24000020 }, - { 0x2100ff2b, 0x24000020 }, - { 0x2100ff2c, 0x24000020 }, - { 0x2100ff2d, 0x24000020 }, - { 0x2100ff2e, 0x24000020 }, - { 0x2100ff2f, 0x24000020 }, - { 0x2100ff30, 0x24000020 }, - { 0x2100ff31, 0x24000020 }, - { 0x2100ff32, 0x24000020 }, - { 0x2100ff33, 0x24000020 }, - { 0x2100ff34, 0x24000020 }, - { 0x2100ff35, 0x24000020 }, - { 0x2100ff36, 0x24000020 }, - { 0x2100ff37, 0x24000020 }, - { 0x2100ff38, 0x24000020 }, - { 0x2100ff39, 0x24000020 }, - { 0x2100ff3a, 0x24000020 }, - { 0x0900ff3b, 0x58000000 }, - { 0x0900ff3c, 0x54000000 }, - { 0x0900ff3d, 0x48000000 }, - { 0x0900ff3e, 0x60000000 }, - { 0x0900ff3f, 0x40000000 }, - { 0x0900ff40, 0x60000000 }, - { 0x2100ff41, 0x1400ffe0 }, - { 0x2100ff42, 0x1400ffe0 }, - { 0x2100ff43, 0x1400ffe0 }, - { 0x2100ff44, 0x1400ffe0 }, - { 0x2100ff45, 0x1400ffe0 }, - { 0x2100ff46, 0x1400ffe0 }, - { 0x2100ff47, 0x1400ffe0 }, - { 0x2100ff48, 0x1400ffe0 }, - { 0x2100ff49, 0x1400ffe0 }, - { 0x2100ff4a, 0x1400ffe0 }, - { 0x2100ff4b, 0x1400ffe0 }, - { 0x2100ff4c, 0x1400ffe0 }, - { 0x2100ff4d, 0x1400ffe0 }, - { 0x2100ff4e, 0x1400ffe0 }, - { 0x2100ff4f, 0x1400ffe0 }, - { 0x2100ff50, 0x1400ffe0 }, - { 0x2100ff51, 0x1400ffe0 }, - { 0x2100ff52, 0x1400ffe0 }, - { 0x2100ff53, 0x1400ffe0 }, - { 0x2100ff54, 0x1400ffe0 }, - { 0x2100ff55, 0x1400ffe0 }, - { 0x2100ff56, 0x1400ffe0 }, - { 0x2100ff57, 0x1400ffe0 }, - { 0x2100ff58, 0x1400ffe0 }, - { 0x2100ff59, 0x1400ffe0 }, - { 0x2100ff5a, 0x1400ffe0 }, - { 0x0900ff5b, 0x58000000 }, - { 0x0900ff5c, 0x64000000 }, - { 0x0900ff5d, 0x48000000 }, - { 0x0900ff5e, 0x64000000 }, - { 0x0900ff5f, 0x58000000 }, - { 0x0900ff60, 0x48000000 }, - { 0x0900ff61, 0x54000000 }, - { 0x0900ff62, 0x58000000 }, - { 0x0900ff63, 0x48000000 }, - { 0x0980ff64, 0x54000001 }, - { 0x1d80ff66, 0x1c000009 }, - { 0x0900ff70, 0x18000000 }, - { 0x1d80ff71, 0x1c00002c }, - { 0x0980ff9e, 0x18000001 }, - { 0x1780ffa0, 0x1c00001e }, - { 0x1780ffc2, 0x1c000005 }, - { 0x1780ffca, 0x1c000005 }, - { 0x1780ffd2, 0x1c000005 }, - { 0x1780ffda, 0x1c000002 }, - { 0x0980ffe0, 0x5c000001 }, - { 0x0900ffe2, 0x64000000 }, - { 0x0900ffe3, 0x60000000 }, - { 0x0900ffe4, 0x68000000 }, - { 0x0980ffe5, 0x5c000001 }, - { 0x0900ffe8, 0x68000000 }, - { 0x0980ffe9, 0x64000003 }, - { 0x0980ffed, 0x68000001 }, - { 0x0980fff9, 0x04000002 }, - { 0x0980fffc, 0x68000001 }, - { 0x23810000, 0x1c00000b }, - { 0x2381000d, 0x1c000019 }, - { 0x23810028, 0x1c000012 }, - { 0x2381003c, 0x1c000001 }, - { 0x2381003f, 0x1c00000e }, - { 0x23810050, 0x1c00000d }, - { 0x23810080, 0x1c00007a }, - { 0x09810100, 0x54000001 }, - { 0x09010102, 0x68000000 }, - { 0x09810107, 0x3c00002c }, - { 0x09810137, 0x68000008 }, - { 0x13810140, 0x38000034 }, - { 0x13810175, 0x3c000003 }, - { 0x13810179, 0x68000010 }, - { 0x1301018a, 0x3c000000 }, - { 0x29810300, 0x1c00001e }, - { 0x29810320, 0x3c000003 }, - { 0x12810330, 0x1c000019 }, - { 0x1201034a, 0x38000000 }, - { 0x3b810380, 0x1c00001d }, - { 0x3b01039f, 0x54000000 }, - { 0x2a8103a0, 0x1c000023 }, - { 0x2a8103c8, 0x1c000007 }, - { 0x2a0103d0, 0x68000000 }, - { 0x2a8103d1, 0x38000004 }, - { 0x0d010400, 0x24000028 }, - { 0x0d010401, 0x24000028 }, - { 0x0d010402, 0x24000028 }, - { 0x0d010403, 0x24000028 }, - { 0x0d010404, 0x24000028 }, - { 0x0d010405, 0x24000028 }, - { 0x0d010406, 0x24000028 }, - { 0x0d010407, 0x24000028 }, - { 0x0d010408, 0x24000028 }, - { 0x0d010409, 0x24000028 }, - { 0x0d01040a, 0x24000028 }, - { 0x0d01040b, 0x24000028 }, - { 0x0d01040c, 0x24000028 }, - { 0x0d01040d, 0x24000028 }, - { 0x0d01040e, 0x24000028 }, - { 0x0d01040f, 0x24000028 }, - { 0x0d010410, 0x24000028 }, - { 0x0d010411, 0x24000028 }, - { 0x0d010412, 0x24000028 }, - { 0x0d010413, 0x24000028 }, - { 0x0d010414, 0x24000028 }, - { 0x0d010415, 0x24000028 }, - { 0x0d010416, 0x24000028 }, - { 0x0d010417, 0x24000028 }, - { 0x0d010418, 0x24000028 }, - { 0x0d010419, 0x24000028 }, - { 0x0d01041a, 0x24000028 }, - { 0x0d01041b, 0x24000028 }, - { 0x0d01041c, 0x24000028 }, - { 0x0d01041d, 0x24000028 }, - { 0x0d01041e, 0x24000028 }, - { 0x0d01041f, 0x24000028 }, - { 0x0d010420, 0x24000028 }, - { 0x0d010421, 0x24000028 }, - { 0x0d010422, 0x24000028 }, - { 0x0d010423, 0x24000028 }, - { 0x0d010424, 0x24000028 }, - { 0x0d010425, 0x24000028 }, - { 0x0d010426, 0x24000028 }, - { 0x0d010427, 0x24000028 }, - { 0x0d010428, 0x1400ffd8 }, - { 0x0d010429, 0x1400ffd8 }, - { 0x0d01042a, 0x1400ffd8 }, - { 0x0d01042b, 0x1400ffd8 }, - { 0x0d01042c, 0x1400ffd8 }, - { 0x0d01042d, 0x1400ffd8 }, - { 0x0d01042e, 0x1400ffd8 }, - { 0x0d01042f, 0x1400ffd8 }, - { 0x0d010430, 0x1400ffd8 }, - { 0x0d010431, 0x1400ffd8 }, - { 0x0d010432, 0x1400ffd8 }, - { 0x0d010433, 0x1400ffd8 }, - { 0x0d010434, 0x1400ffd8 }, - { 0x0d010435, 0x1400ffd8 }, - { 0x0d010436, 0x1400ffd8 }, - { 0x0d010437, 0x1400ffd8 }, - { 0x0d010438, 0x1400ffd8 }, - { 0x0d010439, 0x1400ffd8 }, - { 0x0d01043a, 0x1400ffd8 }, - { 0x0d01043b, 0x1400ffd8 }, - { 0x0d01043c, 0x1400ffd8 }, - { 0x0d01043d, 0x1400ffd8 }, - { 0x0d01043e, 0x1400ffd8 }, - { 0x0d01043f, 0x1400ffd8 }, - { 0x0d010440, 0x1400ffd8 }, - { 0x0d010441, 0x1400ffd8 }, - { 0x0d010442, 0x1400ffd8 }, - { 0x0d010443, 0x1400ffd8 }, - { 0x0d010444, 0x1400ffd8 }, - { 0x0d010445, 0x1400ffd8 }, - { 0x0d010446, 0x1400ffd8 }, - { 0x0d010447, 0x1400ffd8 }, - { 0x0d010448, 0x1400ffd8 }, - { 0x0d010449, 0x1400ffd8 }, - { 0x0d01044a, 0x1400ffd8 }, - { 0x0d01044b, 0x1400ffd8 }, - { 0x0d01044c, 0x1400ffd8 }, - { 0x0d01044d, 0x1400ffd8 }, - { 0x0d01044e, 0x1400ffd8 }, - { 0x0d01044f, 0x1400ffd8 }, - { 0x2e810450, 0x1c00004d }, - { 0x2c8104a0, 0x34000009 }, - { 0x0b810800, 0x1c000005 }, - { 0x0b010808, 0x1c000000 }, - { 0x0b81080a, 0x1c00002b }, - { 0x0b810837, 0x1c000001 }, - { 0x0b01083c, 0x1c000000 }, - { 0x0b01083f, 0x1c000000 }, - { 0x1e010a00, 0x1c000000 }, - { 0x1e810a01, 0x30000002 }, - { 0x1e810a05, 0x30000001 }, - { 0x1e810a0c, 0x30000003 }, - { 0x1e810a10, 0x1c000003 }, - { 0x1e810a15, 0x1c000002 }, - { 0x1e810a19, 0x1c00001a }, - { 0x1e810a38, 0x30000002 }, - { 0x1e010a3f, 0x30000000 }, - { 0x1e810a40, 0x3c000007 }, - { 0x1e810a50, 0x54000008 }, - { 0x0981d000, 0x680000f5 }, - { 0x0981d100, 0x68000026 }, - { 0x0981d12a, 0x6800003a }, - { 0x0981d165, 0x28000001 }, - { 0x1b81d167, 0x30000002 }, - { 0x0981d16a, 0x68000002 }, - { 0x0981d16d, 0x28000005 }, - { 0x0981d173, 0x04000007 }, - { 0x1b81d17b, 0x30000007 }, - { 0x0981d183, 0x68000001 }, - { 0x1b81d185, 0x30000006 }, - { 0x0981d18c, 0x6800001d }, - { 0x1b81d1aa, 0x30000003 }, - { 0x0981d1ae, 0x6800002f }, - { 0x1381d200, 0x68000041 }, - { 0x1381d242, 0x30000002 }, - { 0x1301d245, 0x68000000 }, - { 0x0981d300, 0x68000056 }, - { 0x0981d400, 0x24000019 }, - { 0x0981d41a, 0x14000019 }, - { 0x0981d434, 0x24000019 }, - { 0x0981d44e, 0x14000006 }, - { 0x0981d456, 0x14000011 }, - { 0x0981d468, 0x24000019 }, - { 0x0981d482, 0x14000019 }, - { 0x0901d49c, 0x24000000 }, - { 0x0981d49e, 0x24000001 }, - { 0x0901d4a2, 0x24000000 }, - { 0x0981d4a5, 0x24000001 }, - { 0x0981d4a9, 0x24000003 }, - { 0x0981d4ae, 0x24000007 }, - { 0x0981d4b6, 0x14000003 }, - { 0x0901d4bb, 0x14000000 }, - { 0x0981d4bd, 0x14000006 }, - { 0x0981d4c5, 0x1400000a }, - { 0x0981d4d0, 0x24000019 }, - { 0x0981d4ea, 0x14000019 }, - { 0x0981d504, 0x24000001 }, - { 0x0981d507, 0x24000003 }, - { 0x0981d50d, 0x24000007 }, - { 0x0981d516, 0x24000006 }, - { 0x0981d51e, 0x14000019 }, - { 0x0981d538, 0x24000001 }, - { 0x0981d53b, 0x24000003 }, - { 0x0981d540, 0x24000004 }, - { 0x0901d546, 0x24000000 }, - { 0x0981d54a, 0x24000006 }, - { 0x0981d552, 0x14000019 }, - { 0x0981d56c, 0x24000019 }, - { 0x0981d586, 0x14000019 }, - { 0x0981d5a0, 0x24000019 }, - { 0x0981d5ba, 0x14000019 }, - { 0x0981d5d4, 0x24000019 }, - { 0x0981d5ee, 0x14000019 }, - { 0x0981d608, 0x24000019 }, - { 0x0981d622, 0x14000019 }, - { 0x0981d63c, 0x24000019 }, - { 0x0981d656, 0x14000019 }, - { 0x0981d670, 0x24000019 }, - { 0x0981d68a, 0x1400001b }, - { 0x0981d6a8, 0x24000018 }, - { 0x0901d6c1, 0x64000000 }, - { 0x0981d6c2, 0x14000018 }, - { 0x0901d6db, 0x64000000 }, - { 0x0981d6dc, 0x14000005 }, - { 0x0981d6e2, 0x24000018 }, - { 0x0901d6fb, 0x64000000 }, - { 0x0981d6fc, 0x14000018 }, - { 0x0901d715, 0x64000000 }, - { 0x0981d716, 0x14000005 }, - { 0x0981d71c, 0x24000018 }, - { 0x0901d735, 0x64000000 }, - { 0x0981d736, 0x14000018 }, - { 0x0901d74f, 0x64000000 }, - { 0x0981d750, 0x14000005 }, - { 0x0981d756, 0x24000018 }, - { 0x0901d76f, 0x64000000 }, - { 0x0981d770, 0x14000018 }, - { 0x0901d789, 0x64000000 }, - { 0x0981d78a, 0x14000005 }, - { 0x0981d790, 0x24000018 }, - { 0x0901d7a9, 0x64000000 }, - { 0x0981d7aa, 0x14000018 }, - { 0x0901d7c3, 0x64000000 }, - { 0x0981d7c4, 0x14000005 }, - { 0x0981d7ce, 0x34000031 }, - { 0x16820000, 0x1c00a6d6 }, - { 0x1682f800, 0x1c00021d }, - { 0x090e0001, 0x04000000 }, - { 0x098e0020, 0x0400005f }, - { 0x1b8e0100, 0x300000ef }, - { 0x098f0000, 0x0c00fffd }, - { 0x09900000, 0x0c00fffd }, -}; diff --git a/js/src/yarr/wtfbridge.h b/js/src/yarr/wtfbridge.h new file mode 100644 index 000000000000..f11630c4bb11 --- /dev/null +++ b/js/src/yarr/wtfbridge.h @@ -0,0 +1,329 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- + * vim: set ts=8 sw=4 et tw=99 ft=cpp: + * + * ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is Mozilla SpiderMonkey JavaScript 1.9 code, released + * June 12, 2009. + * + * The Initial Developer of the Original Code is + * the Mozilla Corporation. + * + * Contributor(s): + * David Mandelin + * + * Alternatively, the contents of this file may be used under the terms of + * either of the GNU General Public License Version 2 or later (the "GPL"), + * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +#ifndef jswtfbridge_h__ +#define jswtfbridge_h__ + +/* + * WTF compatibility layer. This file provides various type and data + * definitions for use by Yarr. + */ + +#include "jsstr.h" +#include "jsprvtd.h" +#include "jstl.h" +#include "assembler/wtf/Platform.h" +#include "assembler/jit/ExecutableAllocator.h" + +namespace JSC { namespace Yarr { + +/* + * Basic type definitions. + */ + +typedef jschar UChar; +typedef JSLinearString UString; + +class Unicode { + public: + static UChar toUpper(UChar c) { return JS_TOUPPER(c); } + static UChar toLower(UChar c) { return JS_TOLOWER(c); } +}; + +/* + * Do-nothing smart pointer classes. These have a compatible interface + * with the smart pointers used by Yarr, but they don't actually do + * reference counting. + */ +template +class RefCounted { +}; + +template +class RefPtr { + T *ptr; + public: + RefPtr(T *p) { ptr = p; } + operator bool() const { return ptr != NULL; } + const T *operator ->() const { return ptr; } + T *get() { return ptr; } +}; + +template +class PassRefPtr { + T *ptr; + public: + PassRefPtr(T *p) { ptr = p; } + operator T*() { return ptr; } +}; + +template +class PassOwnPtr { + T *ptr; + public: + PassOwnPtr(T *p) { ptr = p; } + + T *get() { return ptr; } +}; + +template +class OwnPtr { + T *ptr; + public: + OwnPtr() : ptr(NULL) { } + OwnPtr(PassOwnPtr p) : ptr(p.get()) { } + + ~OwnPtr() { + if (ptr) + js::Foreground::delete_(ptr); + } + + OwnPtr &operator=(PassOwnPtr p) { + ptr = p.get(); + return *this; + } + + T *operator ->() { return ptr; } + + T *get() { return ptr; } + + T *release() { + T *result = ptr; + ptr = NULL; + return result; + } +}; + +template +PassRefPtr adoptRef(T *p) { return PassRefPtr(p); } + +template +PassOwnPtr adoptPtr(T *p) { return PassOwnPtr(p); } + +#define WTF_MAKE_FAST_ALLOCATED + +template +class Ref { + T &val; + public: + Ref(T &val) : val(val) { } + operator T&() const { return val; } +}; + +/* + * Vector class for Yarr. This wraps js::Vector and provides all + * the API method signatures used by Yarr. + */ +template +class Vector { + public: + js::Vector impl; + public: + Vector() {} + + Vector(const Vector &v) { + // XXX yarr-oom + (void) append(v); + } + + size_t size() const { + return impl.length(); + } + + T &operator[](size_t i) { + return impl[i]; + } + + const T &operator[](size_t i) const { + return impl[i]; + } + + T &at(size_t i) { + return impl[i]; + } + + const T *begin() const { + return impl.begin(); + } + + T &last() { + return impl.back(); + } + + bool isEmpty() const { + return impl.empty(); + } + + template + void append(const U &u) { + // XXX yarr-oom + (void) impl.append(static_cast(u)); + } + + template + void append(const Vector &v) { + // XXX yarr-oom + (void) impl.append(v.impl); + } + + void insert(size_t i, const T& t) { + // XXX yarr-oom + (void) impl.insert(&impl[i], t); + } + + void remove(size_t i) { + impl.erase(&impl[i]); + } + + void clear() { + return impl.clear(); + } + + void shrink(size_t newLength) { + // XXX yarr-oom + JS_ASSERT(newLength <= impl.length()); + (void) impl.resize(newLength); + } + + void deleteAllValues() { + for (T *p = impl.begin(); p != impl.end(); ++p) + js::Foreground::delete_(*p); + } +}; + +template +class Vector > { + public: + js::Vector impl; + public: + Vector() {} + + size_t size() const { + return impl.length(); + } + + void append(T *t) { + // XXX yarr-oom + (void) impl.append(t); + } + + PassOwnPtr operator[](size_t i) { + return PassOwnPtr(impl[i]); + } + + void clear() { + for (T **p = impl.begin(); p != impl.end(); ++p) + js::Foreground::delete_(*p); + return impl.clear(); + } +}; + +template +inline void +deleteAllValues(Vector &v) { + v.deleteAllValues(); +} + +/* + * Minimal JSGlobalData. This used by Yarr to get the allocator. + */ +class JSGlobalData { + public: + ExecutableAllocator *regexAllocator; + + JSGlobalData(ExecutableAllocator *regexAllocator) + : regexAllocator(regexAllocator) { } +}; + +/* + * Sentinel value used in Yarr. + */ +const size_t notFound = size_t(-1); + + /* + * Do-nothing version of a macro used by WTF to avoid unused + * parameter warnings. + */ +#define UNUSED_PARAM(e) + +} /* namespace Yarr */ + +/* + * Replacements for std:: functions used in Yarr. We put them in + * namespace JSC::std so that they can still be called as std::X + * in Yarr. + */ +namespace std { + +/* + * windows.h defines a 'min' macro that would mangle the function + * name. + */ +#if WTF_COMPILER_MSVC +# undef min +# undef max +#endif + +template +inline T +min(T t1, T t2) +{ + return JS_MIN(t1, t2); +} + +template +inline T +max(T t1, T t2) +{ + return JS_MAX(t1, t2); +} + +template +inline void +swap(T &t1, T &t2) +{ + T tmp = t1; + t1 = t2; + t2 = tmp; +} +} /* namespace std */ + +} /* namespace JSC */ + +#endif diff --git a/js/src/yarr/yarr/RegexJIT.cpp b/js/src/yarr/yarr/RegexJIT.cpp deleted file mode 100644 index 1571c35b7125..000000000000 --- a/js/src/yarr/yarr/RegexJIT.cpp +++ /dev/null @@ -1,1589 +0,0 @@ -/* - * Copyright (C) 2009 Apple Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "RegexJIT.h" - -#if ENABLE_ASSEMBLER - -#include "assembler/assembler/LinkBuffer.h" -#include "assembler/assembler/MacroAssembler.h" -#include "RegexCompiler.h" - -#include "yarr/pcre/pcre.h" // temporary, remove when fallback is removed. - -using namespace WTF; - -namespace JSC { namespace Yarr { - -class JSGlobalData; - -class RegexGenerator : private MacroAssembler { - friend void jitCompileRegex(JSGlobalData* globalData, RegexCodeBlock& jitObject, const UString& pattern, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline); - -#if WTF_CPU_ARM - static const RegisterID input = ARMRegisters::r0; - static const RegisterID index = ARMRegisters::r1; - static const RegisterID length = ARMRegisters::r2; - static const RegisterID output = ARMRegisters::r4; - - static const RegisterID regT0 = ARMRegisters::r5; - static const RegisterID regT1 = ARMRegisters::r6; - - static const RegisterID returnRegister = ARMRegisters::r0; -#elif WTF_CPU_MIPS - static const RegisterID input = MIPSRegisters::a0; - static const RegisterID index = MIPSRegisters::a1; - static const RegisterID length = MIPSRegisters::a2; - static const RegisterID output = MIPSRegisters::a3; - - static const RegisterID regT0 = MIPSRegisters::t4; - static const RegisterID regT1 = MIPSRegisters::t5; - - static const RegisterID returnRegister = MIPSRegisters::v0; -#elif WTF_CPU_SPARC - static const RegisterID input = SparcRegisters::i0; - static const RegisterID index = SparcRegisters::i1; - static const RegisterID length = SparcRegisters::i2; - static const RegisterID output = SparcRegisters::i3; - - static const RegisterID regT0 = SparcRegisters::i4; - static const RegisterID regT1 = SparcRegisters::i5; - - static const RegisterID returnRegister = SparcRegisters::i0; -#elif WTF_CPU_X86 - static const RegisterID input = X86Registers::eax; - static const RegisterID index = X86Registers::edx; - static const RegisterID length = X86Registers::ecx; - static const RegisterID output = X86Registers::edi; - - static const RegisterID regT0 = X86Registers::ebx; - static const RegisterID regT1 = X86Registers::esi; - - static const RegisterID returnRegister = X86Registers::eax; -#elif WTF_CPU_X86_64 -#if WTF_PLATFORM_WIN - static const RegisterID input = X86Registers::ecx; - static const RegisterID index = X86Registers::edx; - static const RegisterID length = X86Registers::r8; - static const RegisterID output = X86Registers::r9; -#else - static const RegisterID input = X86Registers::edi; - static const RegisterID index = X86Registers::esi; - static const RegisterID length = X86Registers::edx; - static const RegisterID output = X86Registers::ecx; -#endif - - static const RegisterID regT0 = X86Registers::eax; - static const RegisterID regT1 = X86Registers::ebx; - - static const RegisterID returnRegister = X86Registers::eax; -#endif - - void optimizeAlternative(PatternAlternative* alternative) - { - if (!alternative->m_terms.length()) - return; - - for (unsigned i = 0; i < alternative->m_terms.length() - 1; ++i) { - PatternTerm& term = alternative->m_terms[i]; - PatternTerm& nextTerm = alternative->m_terms[i + 1]; - - if ((term.type == PatternTerm::TypeCharacterClass) - && (term.quantityType == QuantifierFixedCount) - && (nextTerm.type == PatternTerm::TypePatternCharacter) - && (nextTerm.quantityType == QuantifierFixedCount)) { - PatternTerm termCopy = term; - alternative->m_terms[i] = nextTerm; - alternative->m_terms[i + 1] = termCopy; - } - } - } - - void matchCharacterClassRange(RegisterID character, JumpList& failures, JumpList& matchDest, const CharacterRange* ranges, unsigned count, unsigned* matchIndex, const UChar* matches, unsigned matchCount) - { - do { - // pick which range we're going to generate - int which = count >> 1; - char lo = ranges[which].begin; - char hi = ranges[which].end; - - // check if there are any ranges or matches below lo. If not, just jl to failure - - // if there is anything else to check, check that first, if it falls through jmp to failure. - if ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) { - Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo)); - - // generate code for all ranges before this one - if (which) - matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount); - - while ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) { - matchDest.append(branch32(Equal, character, Imm32((unsigned short)matches[*matchIndex]))); - ++*matchIndex; - } - failures.append(jump()); - - loOrAbove.link(this); - } else if (which) { - Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo)); - - matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount); - failures.append(jump()); - - loOrAbove.link(this); - } else - failures.append(branch32(LessThan, character, Imm32((unsigned short)lo))); - - while ((*matchIndex < matchCount) && (matches[*matchIndex] <= hi)) - ++*matchIndex; - - matchDest.append(branch32(LessThanOrEqual, character, Imm32((unsigned short)hi))); - // fall through to here, the value is above hi. - - // shuffle along & loop around if there are any more matches to handle. - unsigned next = which + 1; - ranges += next; - count -= next; - } while (count); - } - - void matchCharacterClass(RegisterID character, JumpList& matchDest, const CharacterClass* charClass) - { - if (charClass->m_table) { - ExtendedAddress tableEntry(character, reinterpret_cast(charClass->m_table->m_table)); - matchDest.append(branchTest8(charClass->m_table->m_inverted ? Zero : NonZero, tableEntry)); - return; - } - Jump unicodeFail; - if (charClass->m_matchesUnicode.length() || charClass->m_rangesUnicode.length()) { - Jump isAscii = branch32(LessThanOrEqual, character, Imm32(0x7f)); - - if (charClass->m_matchesUnicode.length()) { - for (unsigned i = 0; i < charClass->m_matchesUnicode.length(); ++i) { - UChar ch = charClass->m_matchesUnicode[i]; - matchDest.append(branch32(Equal, character, Imm32(ch))); - } - } - - if (charClass->m_rangesUnicode.length()) { - for (unsigned i = 0; i < charClass->m_rangesUnicode.length(); ++i) { - UChar lo = charClass->m_rangesUnicode[i].begin; - UChar hi = charClass->m_rangesUnicode[i].end; - - Jump below = branch32(LessThan, character, Imm32(lo)); - matchDest.append(branch32(LessThanOrEqual, character, Imm32(hi))); - below.link(this); - } - } - - unicodeFail = jump(); - isAscii.link(this); - } - - if (charClass->m_ranges.length()) { - unsigned matchIndex = 0; - JumpList failures; - matchCharacterClassRange(character, failures, matchDest, charClass->m_ranges.begin(), charClass->m_ranges.length(), &matchIndex, charClass->m_matches.begin(), charClass->m_matches.length()); - while (matchIndex < charClass->m_matches.length()) - matchDest.append(branch32(Equal, character, Imm32((unsigned short)charClass->m_matches[matchIndex++]))); - - failures.link(this); - } else if (charClass->m_matches.length()) { - // optimization: gather 'a','A' etc back together, can mask & test once. - js::Vector matchesAZaz; - - for (unsigned i = 0; i < charClass->m_matches.length(); ++i) { - char ch = charClass->m_matches[i]; - if (m_pattern.m_ignoreCase) { - if (isASCIILower(ch)) { - matchesAZaz.append(ch); - continue; - } - if (isASCIIUpper(ch)) - continue; - } - matchDest.append(branch32(Equal, character, Imm32((unsigned short)ch))); - } - - if (unsigned countAZaz = matchesAZaz.length()) { - or32(Imm32(32), character); - for (unsigned i = 0; i < countAZaz; ++i) - matchDest.append(branch32(Equal, character, Imm32(matchesAZaz[i]))); - } - } - - if (charClass->m_matchesUnicode.length() || charClass->m_rangesUnicode.length()) - unicodeFail.link(this); - } - - // Jumps if input not available; will have (incorrectly) incremented already! - Jump jumpIfNoAvailableInput(unsigned countToCheck) - { - add32(Imm32(countToCheck), index); - return branch32(Above, index, length); - } - - Jump jumpIfAvailableInput(unsigned countToCheck) - { - add32(Imm32(countToCheck), index); - return branch32(BelowOrEqual, index, length); - } - - Jump checkInput() - { - return branch32(BelowOrEqual, index, length); - } - - Jump atEndOfInput() - { - return branch32(Equal, index, length); - } - - Jump notAtEndOfInput() - { - return branch32(NotEqual, index, length); - } - - Jump jumpIfCharEquals(UChar ch, int inputPosition) - { - return branch16(Equal, BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), Imm32(ch)); - } - - Jump jumpIfCharNotEquals(UChar ch, int inputPosition) - { - return branch16(NotEqual, BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), Imm32(ch)); - } - - void readCharacter(int inputPosition, RegisterID reg) - { - load16(BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), reg); - } - - void storeToFrame(RegisterID reg, unsigned frameLocation) - { - poke(reg, frameLocation); - } - - void storeToFrame(Imm32 imm, unsigned frameLocation) - { - poke(imm, frameLocation); - } - - DataLabelPtr storeToFrameWithPatch(unsigned frameLocation) - { - return storePtrWithPatch(ImmPtr(0), Address(stackPointerRegister, frameLocation * sizeof(void*))); - } - - void loadFromFrame(unsigned frameLocation, RegisterID reg) - { - peek(reg, frameLocation); - } - - void loadFromFrameAndJump(unsigned frameLocation) - { - jump(Address(stackPointerRegister, frameLocation * sizeof(void*))); - } - - struct AlternativeBacktrackRecord { - DataLabelPtr dataLabel; - Label backtrackLocation; - - AlternativeBacktrackRecord(DataLabelPtr dataLabel, Label backtrackLocation) - : dataLabel(dataLabel) - , backtrackLocation(backtrackLocation) - { - } - }; - - struct TermGenerationState { - TermGenerationState(PatternDisjunction* disjunction, unsigned checkedTotal) - : disjunction(disjunction) - , checkedTotal(checkedTotal) - { - } - - void resetAlternative() - { - isBackTrackGenerated = false; - alt = 0; - } - bool alternativeValid() - { - return alt < disjunction->m_alternatives.length(); - } - void nextAlternative() - { - ++alt; - } - PatternAlternative* alternative() - { - return disjunction->m_alternatives[alt]; - } - - void resetTerm() - { - ASSERT(alternativeValid()); - t = 0; - } - bool termValid() - { - ASSERT(alternativeValid()); - return t < alternative()->m_terms.length(); - } - void nextTerm() - { - ASSERT(alternativeValid()); - ++t; - } - PatternTerm& term() - { - ASSERT(alternativeValid()); - return alternative()->m_terms[t]; - } - bool isLastTerm() - { - ASSERT(alternativeValid()); - return (t + 1) == alternative()->m_terms.length(); - } - bool isMainDisjunction() - { - return !disjunction->m_parent; - } - - PatternTerm& lookaheadTerm() - { - ASSERT(alternativeValid()); - ASSERT((t + 1) < alternative()->m_terms.length()); - return alternative()->m_terms[t + 1]; - } - bool isSinglePatternCharacterLookaheadTerm() - { - ASSERT(alternativeValid()); - return ((t + 1) < alternative()->m_terms.length()) - && (lookaheadTerm().type == PatternTerm::TypePatternCharacter) - && (lookaheadTerm().quantityType == QuantifierFixedCount) - && (lookaheadTerm().quantityCount == 1); - } - - int inputOffset() - { - return term().inputPosition - checkedTotal; - } - - void jumpToBacktrack(Jump jump, MacroAssembler* masm) - { - if (isBackTrackGenerated) - jump.linkTo(backtrackLabel, masm); - else - backTrackJumps.append(jump); - } - void jumpToBacktrack(JumpList& jumps, MacroAssembler* masm) - { - if (isBackTrackGenerated) - jumps.linkTo(backtrackLabel, masm); - else - backTrackJumps.append(jumps); - } - bool plantJumpToBacktrackIfExists(MacroAssembler* masm) - { - if (isBackTrackGenerated) { - masm->jump(backtrackLabel); - return true; - } - return false; - } - void addBacktrackJump(Jump jump) - { - backTrackJumps.append(jump); - } - void setBacktrackGenerated(Label label) - { - isBackTrackGenerated = true; - backtrackLabel = label; - } - void linkAlternativeBacktracks(MacroAssembler* masm) - { - isBackTrackGenerated = false; - backTrackJumps.link(masm); - } - void linkAlternativeBacktracksTo(Label label, MacroAssembler* masm) - { - isBackTrackGenerated = false; - backTrackJumps.linkTo(label, masm); - } - void propagateBacktrackingFrom(TermGenerationState& nestedParenthesesState, MacroAssembler* masm) - { - jumpToBacktrack(nestedParenthesesState.backTrackJumps, masm); - if (nestedParenthesesState.isBackTrackGenerated) - setBacktrackGenerated(nestedParenthesesState.backtrackLabel); - } - - PatternDisjunction* disjunction; - int checkedTotal; - private: - unsigned alt; - unsigned t; - JumpList backTrackJumps; - Label backtrackLabel; - bool isBackTrackGenerated; - }; - - void generateAssertionBOL(TermGenerationState& state) - { - PatternTerm& term = state.term(); - - if (m_pattern.m_multiline) { - const RegisterID character = regT0; - - JumpList matchDest; - if (!term.inputPosition) - matchDest.append(branch32(Equal, index, Imm32(state.checkedTotal))); - - readCharacter(state.inputOffset() - 1, character); - matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass()); - state.jumpToBacktrack(jump(), this); - - matchDest.link(this); - } else { - // Erk, really should poison out these alternatives early. :-/ - if (term.inputPosition) - state.jumpToBacktrack(jump(), this); - else - state.jumpToBacktrack(branch32(NotEqual, index, Imm32(state.checkedTotal)), this); - } - } - - void generateAssertionEOL(TermGenerationState& state) - { - PatternTerm& term = state.term(); - - if (m_pattern.m_multiline) { - const RegisterID character = regT0; - - JumpList matchDest; - if (term.inputPosition == state.checkedTotal) - matchDest.append(atEndOfInput()); - - readCharacter(state.inputOffset(), character); - matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass()); - state.jumpToBacktrack(jump(), this); - - matchDest.link(this); - } else { - if (term.inputPosition == state.checkedTotal) - state.jumpToBacktrack(notAtEndOfInput(), this); - // Erk, really should poison out these alternatives early. :-/ - else - state.jumpToBacktrack(jump(), this); - } - } - - // Also falls though on nextIsNotWordChar. - void matchAssertionWordchar(TermGenerationState& state, JumpList& nextIsWordChar, JumpList& nextIsNotWordChar) - { - const RegisterID character = regT0; - PatternTerm& term = state.term(); - - if (term.inputPosition == state.checkedTotal) - nextIsNotWordChar.append(atEndOfInput()); - - readCharacter(state.inputOffset(), character); - matchCharacterClass(character, nextIsWordChar, m_pattern.wordcharCharacterClass()); - } - - void generateAssertionWordBoundary(TermGenerationState& state) - { - const RegisterID character = regT0; - PatternTerm& term = state.term(); - - Jump atBegin; - JumpList matchDest; - if (!term.inputPosition) - atBegin = branch32(Equal, index, Imm32(state.checkedTotal)); - readCharacter(state.inputOffset() - 1, character); - matchCharacterClass(character, matchDest, m_pattern.wordcharCharacterClass()); - if (!term.inputPosition) - atBegin.link(this); - - // We fall through to here if the last character was not a wordchar. - JumpList nonWordCharThenWordChar; - JumpList nonWordCharThenNonWordChar; - if (term.invertOrCapture) { - matchAssertionWordchar(state, nonWordCharThenNonWordChar, nonWordCharThenWordChar); - nonWordCharThenWordChar.append(jump()); - } else { - matchAssertionWordchar(state, nonWordCharThenWordChar, nonWordCharThenNonWordChar); - nonWordCharThenNonWordChar.append(jump()); - } - state.jumpToBacktrack(nonWordCharThenNonWordChar, this); - - // We jump here if the last character was a wordchar. - matchDest.link(this); - JumpList wordCharThenWordChar; - JumpList wordCharThenNonWordChar; - if (term.invertOrCapture) { - matchAssertionWordchar(state, wordCharThenNonWordChar, wordCharThenWordChar); - wordCharThenWordChar.append(jump()); - } else { - matchAssertionWordchar(state, wordCharThenWordChar, wordCharThenNonWordChar); - // This can fall-though! - } - - state.jumpToBacktrack(wordCharThenWordChar, this); - - nonWordCharThenWordChar.link(this); - wordCharThenNonWordChar.link(this); - } - - void generatePatternCharacterSingle(TermGenerationState& state) - { - const RegisterID character = regT0; - UChar ch = state.term().patternCharacter; - - if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) { - readCharacter(state.inputOffset(), character); - or32(Imm32(32), character); - state.jumpToBacktrack(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))), this); - } else { - ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch))); - state.jumpToBacktrack(jumpIfCharNotEquals(ch, state.inputOffset()), this); - } - } - - void generatePatternCharacterPair(TermGenerationState& state) - { - const RegisterID character = regT0; -#if WTF_CPU_BIG_ENDIAN - UChar ch2 = state.term().patternCharacter; - UChar ch1 = state.lookaheadTerm().patternCharacter; -#else - UChar ch1 = state.term().patternCharacter; - UChar ch2 = state.lookaheadTerm().patternCharacter; -#endif - - int mask = 0; - int chPair = ch1 | (ch2 << 16); - - if (m_pattern.m_ignoreCase) { - if (isASCIIAlpha(ch1)) - mask |= 32; - if (isASCIIAlpha(ch2)) - mask |= 32 << 16; - } - - if (mask) { - load32WithUnalignedHalfWords(BaseIndex(input, index, TimesTwo, state.inputOffset() * sizeof(UChar)), character); - or32(Imm32(mask), character); - state.jumpToBacktrack(branch32(NotEqual, character, Imm32(chPair | mask)), this); - } else - state.jumpToBacktrack(branch32WithUnalignedHalfWords(NotEqual, BaseIndex(input, index, TimesTwo, state.inputOffset() * sizeof(UChar)), Imm32(chPair)), this); - } - - void generatePatternCharacterFixed(TermGenerationState& state) - { - const RegisterID character = regT0; - const RegisterID countRegister = regT1; - PatternTerm& term = state.term(); - UChar ch = term.patternCharacter; - - move(index, countRegister); - sub32(Imm32(term.quantityCount), countRegister); - - Label loop(this); - if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) { - load16(BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), character); - or32(Imm32(32), character); - state.jumpToBacktrack(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))), this); - } else { - ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch))); - state.jumpToBacktrack(branch16(NotEqual, BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), Imm32(ch)), this); - } - add32(Imm32(1), countRegister); - branch32(NotEqual, countRegister, index).linkTo(loop, this); - } - - void generatePatternCharacterGreedy(TermGenerationState& state) - { - const RegisterID character = regT0; - const RegisterID countRegister = regT1; - PatternTerm& term = state.term(); - UChar ch = term.patternCharacter; - - move(Imm32(0), countRegister); - - JumpList failures; - Label loop(this); - failures.append(atEndOfInput()); - if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) { - readCharacter(state.inputOffset(), character); - or32(Imm32(32), character); - failures.append(branch32(NotEqual, character, Imm32(Unicode::toLower(ch)))); - } else { - ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch))); - failures.append(jumpIfCharNotEquals(ch, state.inputOffset())); - } - - add32(Imm32(1), countRegister); - add32(Imm32(1), index); - if (term.quantityCount != 0xffffffff) { - branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this); - failures.append(jump()); - } else - jump(loop); - - Label backtrackBegin(this); - loadFromFrame(term.frameLocation, countRegister); - state.jumpToBacktrack(branchTest32(Zero, countRegister), this); - sub32(Imm32(1), countRegister); - sub32(Imm32(1), index); - - failures.link(this); - - storeToFrame(countRegister, term.frameLocation); - - state.setBacktrackGenerated(backtrackBegin); - } - - void generatePatternCharacterNonGreedy(TermGenerationState& state) - { - const RegisterID character = regT0; - const RegisterID countRegister = regT1; - PatternTerm& term = state.term(); - UChar ch = term.patternCharacter; - - move(Imm32(0), countRegister); - - Jump firstTimeDoNothing = jump(); - - Label hardFail(this); - sub32(countRegister, index); - state.jumpToBacktrack(jump(), this); - - Label backtrackBegin(this); - loadFromFrame(term.frameLocation, countRegister); - - atEndOfInput().linkTo(hardFail, this); - if (term.quantityCount != 0xffffffff) - branch32(Equal, countRegister, Imm32(term.quantityCount), hardFail); - if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) { - readCharacter(state.inputOffset(), character); - or32(Imm32(32), character); - branch32(NotEqual, character, Imm32(Unicode::toLower(ch))).linkTo(hardFail, this); - } else { - ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch))); - jumpIfCharNotEquals(ch, state.inputOffset()).linkTo(hardFail, this); - } - - add32(Imm32(1), countRegister); - add32(Imm32(1), index); - - firstTimeDoNothing.link(this); - storeToFrame(countRegister, term.frameLocation); - - state.setBacktrackGenerated(backtrackBegin); - } - - void generateCharacterClassSingle(TermGenerationState& state) - { - const RegisterID character = regT0; - PatternTerm& term = state.term(); - - JumpList matchDest; - readCharacter(state.inputOffset(), character); - matchCharacterClass(character, matchDest, term.characterClass); - - if (term.invertOrCapture) - state.jumpToBacktrack(matchDest, this); - else { - state.jumpToBacktrack(jump(), this); - matchDest.link(this); - } - } - - void generateCharacterClassFixed(TermGenerationState& state) - { - const RegisterID character = regT0; - const RegisterID countRegister = regT1; - PatternTerm& term = state.term(); - - move(index, countRegister); - sub32(Imm32(term.quantityCount), countRegister); - - Label loop(this); - JumpList matchDest; - load16(BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), character); - matchCharacterClass(character, matchDest, term.characterClass); - - if (term.invertOrCapture) - state.jumpToBacktrack(matchDest, this); - else { - state.jumpToBacktrack(jump(), this); - matchDest.link(this); - } - - add32(Imm32(1), countRegister); - branch32(NotEqual, countRegister, index).linkTo(loop, this); - } - - void generateCharacterClassGreedy(TermGenerationState& state) - { - const RegisterID character = regT0; - const RegisterID countRegister = regT1; - PatternTerm& term = state.term(); - - move(Imm32(0), countRegister); - - JumpList failures; - Label loop(this); - failures.append(atEndOfInput()); - - if (term.invertOrCapture) { - readCharacter(state.inputOffset(), character); - matchCharacterClass(character, failures, term.characterClass); - } else { - JumpList matchDest; - readCharacter(state.inputOffset(), character); - matchCharacterClass(character, matchDest, term.characterClass); - failures.append(jump()); - matchDest.link(this); - } - - add32(Imm32(1), countRegister); - add32(Imm32(1), index); - if (term.quantityCount != 0xffffffff) { - branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this); - failures.append(jump()); - } else - jump(loop); - - Label backtrackBegin(this); - loadFromFrame(term.frameLocation, countRegister); - state.jumpToBacktrack(branchTest32(Zero, countRegister), this); - sub32(Imm32(1), countRegister); - sub32(Imm32(1), index); - - failures.link(this); - - storeToFrame(countRegister, term.frameLocation); - - state.setBacktrackGenerated(backtrackBegin); - } - - void generateCharacterClassNonGreedy(TermGenerationState& state) - { - const RegisterID character = regT0; - const RegisterID countRegister = regT1; - PatternTerm& term = state.term(); - - move(Imm32(0), countRegister); - - Jump firstTimeDoNothing = jump(); - - Label hardFail(this); - sub32(countRegister, index); - state.jumpToBacktrack(jump(), this); - - Label backtrackBegin(this); - loadFromFrame(term.frameLocation, countRegister); - - atEndOfInput().linkTo(hardFail, this); - branch32(Equal, countRegister, Imm32(term.quantityCount), hardFail); - - JumpList matchDest; - readCharacter(state.inputOffset(), character); - matchCharacterClass(character, matchDest, term.characterClass); - - if (term.invertOrCapture) - matchDest.linkTo(hardFail, this); - else { - jump(hardFail); - matchDest.link(this); - } - - add32(Imm32(1), countRegister); - add32(Imm32(1), index); - - firstTimeDoNothing.link(this); - storeToFrame(countRegister, term.frameLocation); - - state.setBacktrackGenerated(backtrackBegin); - } - - void generateParenthesesDisjunction(PatternTerm& parenthesesTerm, TermGenerationState& state, unsigned alternativeFrameLocation) - { - ASSERT((parenthesesTerm.type == PatternTerm::TypeParenthesesSubpattern) || (parenthesesTerm.type == PatternTerm::TypeParentheticalAssertion)); - ASSERT(parenthesesTerm.quantityCount == 1); - - PatternDisjunction* disjunction = parenthesesTerm.parentheses.disjunction; - unsigned preCheckedCount = ((parenthesesTerm.quantityType == QuantifierFixedCount) && (parenthesesTerm.type != PatternTerm::TypeParentheticalAssertion)) ? disjunction->m_minimumSize : 0; - - if (disjunction->m_alternatives.length() == 1) { - state.resetAlternative(); - ASSERT(state.alternativeValid()); - PatternAlternative* alternative = state.alternative(); - optimizeAlternative(alternative); - - int countToCheck = alternative->m_minimumSize - preCheckedCount; - if (countToCheck) { - ASSERT((parenthesesTerm.type == PatternTerm::TypeParentheticalAssertion) || (parenthesesTerm.quantityType != QuantifierFixedCount)); - - // FIXME: This is quite horrible. The call to 'plantJumpToBacktrackIfExists' - // will be forced to always trampoline into here, just to decrement the index. - // Ick. - Jump skip = jump(); - - Label backtrackBegin(this); - sub32(Imm32(countToCheck), index); - state.addBacktrackJump(jump()); - - skip.link(this); - - state.setBacktrackGenerated(backtrackBegin); - - state.jumpToBacktrack(jumpIfNoAvailableInput(countToCheck), this); - state.checkedTotal += countToCheck; - } - - for (state.resetTerm(); state.termValid(); state.nextTerm()) - generateTerm(state); - - state.checkedTotal -= countToCheck; - } else { - JumpList successes; - - for (state.resetAlternative(); state.alternativeValid(); state.nextAlternative()) { - - PatternAlternative* alternative = state.alternative(); - optimizeAlternative(alternative); - - ASSERT(alternative->m_minimumSize >= preCheckedCount); - int countToCheck = alternative->m_minimumSize - preCheckedCount; - if (countToCheck) { - state.addBacktrackJump(jumpIfNoAvailableInput(countToCheck)); - state.checkedTotal += countToCheck; - } - - for (state.resetTerm(); state.termValid(); state.nextTerm()) - generateTerm(state); - - // Matched an alternative. - DataLabelPtr dataLabel = storeToFrameWithPatch(alternativeFrameLocation); - successes.append(jump()); - - // Alternative did not match. - Label backtrackLocation(this); - - // Can we backtrack the alternative? - if so, do so. If not, just fall through to the next one. - state.plantJumpToBacktrackIfExists(this); - - state.linkAlternativeBacktracks(this); - - if (countToCheck) { - sub32(Imm32(countToCheck), index); - state.checkedTotal -= countToCheck; - } - - m_backtrackRecords.append(AlternativeBacktrackRecord(dataLabel, backtrackLocation)); - } - // We fall through to here when the last alternative fails. - // Add a backtrack out of here for the parenthese handling code to link up. - state.addBacktrackJump(jump()); - - // Generate a trampoline for the parens code to backtrack to, to retry the - // next alternative. - state.setBacktrackGenerated(label()); - loadFromFrameAndJump(alternativeFrameLocation); - - // FIXME: both of the above hooks are a little inefficient, in that you - // may end up trampolining here, just to trampoline back out to the - // parentheses code, or vice versa. We can probably eliminate a jump - // by restructuring, but coding this way for now for simplicity during - // development. - - successes.link(this); - } - } - - void generateParenthesesSingle(TermGenerationState& state) - { - const RegisterID indexTemporary = regT0; - PatternTerm& term = state.term(); - PatternDisjunction* disjunction = term.parentheses.disjunction; - ASSERT(term.quantityCount == 1); - - unsigned preCheckedCount = (term.quantityType == QuantifierFixedCount) ? disjunction->m_minimumSize : 0; - - unsigned parenthesesFrameLocation = term.frameLocation; - unsigned alternativeFrameLocation = parenthesesFrameLocation; - if (term.quantityType != QuantifierFixedCount) - alternativeFrameLocation += RegexStackSpaceForBackTrackInfoParenthesesOnce; - - // optimized case - no capture & no quantifier can be handled in a light-weight manner. - if (!term.invertOrCapture && (term.quantityType == QuantifierFixedCount)) { - TermGenerationState parenthesesState(disjunction, state.checkedTotal); - generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation); - // this expects that any backtracks back out of the parentheses will be in the - // parenthesesState's backTrackJumps vector, and that if they need backtracking - // they will have set an entry point on the parenthesesState's backtrackLabel. - state.propagateBacktrackingFrom(parenthesesState, this); - } else { - Jump nonGreedySkipParentheses; - Label nonGreedyTryParentheses; - if (term.quantityType == QuantifierGreedy) - storeToFrame(index, parenthesesFrameLocation); - else if (term.quantityType == QuantifierNonGreedy) { - storeToFrame(Imm32(-1), parenthesesFrameLocation); - nonGreedySkipParentheses = jump(); - nonGreedyTryParentheses = label(); - storeToFrame(index, parenthesesFrameLocation); - } - - // store the match start index - if (term.invertOrCapture) { - int inputOffset = state.inputOffset() - preCheckedCount; - if (inputOffset) { - move(index, indexTemporary); - add32(Imm32(inputOffset), indexTemporary); - store32(indexTemporary, Address(output, (term.parentheses.subpatternId << 1) * sizeof(int))); - } else - store32(index, Address(output, (term.parentheses.subpatternId << 1) * sizeof(int))); - } - - // generate the body of the parentheses - TermGenerationState parenthesesState(disjunction, state.checkedTotal); - generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation); - - Jump success = (term.quantityType == QuantifierFixedCount) ? - jump() : - branch32(NotEqual, index, Address(stackPointerRegister, (parenthesesFrameLocation * sizeof(void*)))); - - // A failure AFTER the parens jumps here - Label backtrackFromAfterParens(this); - - if (term.quantityType == QuantifierGreedy) { - // If this is -1 we have now tested with both with and without the parens. - loadFromFrame(parenthesesFrameLocation, indexTemporary); - state.jumpToBacktrack(branch32(Equal, indexTemporary, Imm32(-1)), this); - } else if (term.quantityType == QuantifierNonGreedy) { - // If this is -1 we have now tested without the parens, now test with. - loadFromFrame(parenthesesFrameLocation, indexTemporary); - branch32(Equal, indexTemporary, Imm32(-1)).linkTo(nonGreedyTryParentheses, this); - } - - parenthesesState.plantJumpToBacktrackIfExists(this); - // A failure WITHIN the parens jumps here - parenthesesState.linkAlternativeBacktracks(this); - if (term.invertOrCapture) { - store32(Imm32(-1), Address(output, (term.parentheses.subpatternId << 1) * sizeof(int))); -#if 0 - store32(Imm32(-1), Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int))); -#endif - } - - if (term.quantityType == QuantifierGreedy) - storeToFrame(Imm32(-1), parenthesesFrameLocation); - else - state.jumpToBacktrack(jump(), this); - - state.setBacktrackGenerated(backtrackFromAfterParens); - if (term.quantityType == QuantifierNonGreedy) - nonGreedySkipParentheses.link(this); - success.link(this); - - // store the match end index - if (term.invertOrCapture) { - int inputOffset = state.inputOffset(); - if (inputOffset) { - move(index, indexTemporary); - add32(Imm32(state.inputOffset()), indexTemporary); - store32(indexTemporary, Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int))); - } else - store32(index, Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int))); - } - } - } - - void generateParenthesesGreedyNoBacktrack(TermGenerationState& state) - { - PatternTerm& parenthesesTerm = state.term(); - PatternDisjunction* disjunction = parenthesesTerm.parentheses.disjunction; - ASSERT(parenthesesTerm.type == PatternTerm::TypeParenthesesSubpattern); - ASSERT(parenthesesTerm.quantityCount != 1); // Handled by generateParenthesesSingle. - - TermGenerationState parenthesesState(disjunction, state.checkedTotal); - - Label matchAgain(this); - - storeToFrame(index, parenthesesTerm.frameLocation); // Save the current index to check for zero len matches later. - - for (parenthesesState.resetAlternative(); parenthesesState.alternativeValid(); parenthesesState.nextAlternative()) { - - PatternAlternative* alternative = parenthesesState.alternative(); - optimizeAlternative(alternative); - - int countToCheck = alternative->m_minimumSize; - if (countToCheck) { - parenthesesState.addBacktrackJump(jumpIfNoAvailableInput(countToCheck)); - parenthesesState.checkedTotal += countToCheck; - } - - for (parenthesesState.resetTerm(); parenthesesState.termValid(); parenthesesState.nextTerm()) - generateTerm(parenthesesState); - - // If we get here, we matched! If the index advanced then try to match more since limit isn't supported yet. - branch32(NotEqual, index, Address(stackPointerRegister, (parenthesesTerm.frameLocation * sizeof(void*))), matchAgain); - - // If we get here we matched, but we matched "" - cannot accept this alternative as is, so either backtrack, - // or fall through to try the next alternative if no backtrack is available. - parenthesesState.plantJumpToBacktrackIfExists(this); - - parenthesesState.linkAlternativeBacktracks(this); - // We get here if the alternative fails to match - fall through to the next iteration, or out of the loop. - - if (countToCheck) { - sub32(Imm32(countToCheck), index); - parenthesesState.checkedTotal -= countToCheck; - } - } - - // If the last alternative falls through to here, we have a failed match... - // Which means that we match whatever we have matched up to this point (even if nothing). - } - - void generateParentheticalAssertion(TermGenerationState& state) - { - PatternTerm& term = state.term(); - PatternDisjunction* disjunction = term.parentheses.disjunction; - ASSERT(term.quantityCount == 1); - ASSERT(term.quantityType == QuantifierFixedCount); - - unsigned parenthesesFrameLocation = term.frameLocation; - unsigned alternativeFrameLocation = parenthesesFrameLocation + RegexStackSpaceForBackTrackInfoParentheticalAssertion; - - int countCheckedAfterAssertion = state.checkedTotal - term.inputPosition; - - if (term.invertOrCapture) { - // Inverted case - storeToFrame(index, parenthesesFrameLocation); - - state.checkedTotal -= countCheckedAfterAssertion; - if (countCheckedAfterAssertion) - sub32(Imm32(countCheckedAfterAssertion), index); - - TermGenerationState parenthesesState(disjunction, state.checkedTotal); - generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation); - // Success! - which means - Fail! - loadFromFrame(parenthesesFrameLocation, index); - state.jumpToBacktrack(jump(), this); - - // And fail means success. - parenthesesState.linkAlternativeBacktracks(this); - loadFromFrame(parenthesesFrameLocation, index); - - state.checkedTotal += countCheckedAfterAssertion; - } else { - // Normal case - storeToFrame(index, parenthesesFrameLocation); - - state.checkedTotal -= countCheckedAfterAssertion; - if (countCheckedAfterAssertion) - sub32(Imm32(countCheckedAfterAssertion), index); - - TermGenerationState parenthesesState(disjunction, state.checkedTotal); - generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation); - // Success! - which means - Success! - loadFromFrame(parenthesesFrameLocation, index); - Jump success = jump(); - - parenthesesState.linkAlternativeBacktracks(this); - loadFromFrame(parenthesesFrameLocation, index); - state.jumpToBacktrack(jump(), this); - - success.link(this); - - state.checkedTotal += countCheckedAfterAssertion; - } - } - - void generateTerm(TermGenerationState& state) - { - PatternTerm& term = state.term(); - - switch (term.type) { - case PatternTerm::TypeAssertionBOL: - generateAssertionBOL(state); - break; - - case PatternTerm::TypeAssertionEOL: - generateAssertionEOL(state); - break; - - case PatternTerm::TypeAssertionWordBoundary: - generateAssertionWordBoundary(state); - break; - - case PatternTerm::TypePatternCharacter: - switch (term.quantityType) { - case QuantifierFixedCount: - if (term.quantityCount == 1) { - if (state.isSinglePatternCharacterLookaheadTerm() && (state.lookaheadTerm().inputPosition == (term.inputPosition + 1))) { - generatePatternCharacterPair(state); - state.nextTerm(); - } else - generatePatternCharacterSingle(state); - } else - generatePatternCharacterFixed(state); - break; - case QuantifierGreedy: - generatePatternCharacterGreedy(state); - break; - case QuantifierNonGreedy: - generatePatternCharacterNonGreedy(state); - break; - } - break; - - case PatternTerm::TypeCharacterClass: - switch (term.quantityType) { - case QuantifierFixedCount: - if (term.quantityCount == 1) - generateCharacterClassSingle(state); - else - generateCharacterClassFixed(state); - break; - case QuantifierGreedy: - generateCharacterClassGreedy(state); - break; - case QuantifierNonGreedy: - generateCharacterClassNonGreedy(state); - break; - } - break; - - case PatternTerm::TypeBackReference: - m_shouldFallBack = true; - break; - - case PatternTerm::TypeForwardReference: - break; - - case PatternTerm::TypeParenthesesSubpattern: - if (term.quantityCount == 1 && !term.parentheses.isCopy) - generateParenthesesSingle(state); - else if (term.parentheses.isTerminal) - generateParenthesesGreedyNoBacktrack(state); - else - m_shouldFallBack = true; - break; - - case PatternTerm::TypeParentheticalAssertion: - generateParentheticalAssertion(state); - break; - } - } - - void generateDisjunction(PatternDisjunction* disjunction) - { - TermGenerationState state(disjunction, 0); - state.resetAlternative(); - - // check availability for the next alternative - int countCheckedForCurrentAlternative = 0; - int countToCheckForFirstAlternative = 0; - bool hasShorterAlternatives = false; - bool setRepeatAlternativeLabels = false; - JumpList notEnoughInputForPreviousAlternative; - Label firstAlternative; - Label firstAlternativeInputChecked; - - // The label 'firstAlternative' is used to plant a check to see if there is - // sufficient input available to run the first repeating alternative. - // The label 'firstAlternativeInputChecked' will jump directly to matching - // the first repeating alternative having skipped this check. - - if (state.alternativeValid()) { - PatternAlternative* alternative = state.alternative(); - if (!alternative->onceThrough()) { - firstAlternative = Label(this); - setRepeatAlternativeLabels = true; - } - countToCheckForFirstAlternative = alternative->m_minimumSize; - state.checkedTotal += countToCheckForFirstAlternative; - if (countToCheckForFirstAlternative) - notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForFirstAlternative)); - countCheckedForCurrentAlternative = countToCheckForFirstAlternative; - } - - if (setRepeatAlternativeLabels) - firstAlternativeInputChecked = Label(this); - - while (state.alternativeValid()) { - PatternAlternative* alternative = state.alternative(); - optimizeAlternative(alternative); - - // Track whether any alternatives are shorter than the first one. - if (!alternative->onceThrough()) - hasShorterAlternatives = hasShorterAlternatives || (countCheckedForCurrentAlternative < countToCheckForFirstAlternative); - - for (state.resetTerm(); state.termValid(); state.nextTerm()) - generateTerm(state); - - // If we get here, the alternative matched. - if (m_pattern.m_body->m_callFrameSize) - addPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister); - - ASSERT(index != returnRegister); - if (m_pattern.m_body->m_hasFixedSize) { - move(index, returnRegister); - if (alternative->m_minimumSize) - sub32(Imm32(alternative->m_minimumSize), returnRegister); - - store32(returnRegister, output); - } else - load32(Address(output), returnRegister); - - store32(index, Address(output, 4)); - - generateReturn(); - - state.nextAlternative(); - - // if there are any more alternatives, plant the check for input before looping. - if (state.alternativeValid()) { - PatternAlternative* nextAlternative = state.alternative(); - if (!setRepeatAlternativeLabels && !nextAlternative->onceThrough()) { - // We have handled non-repeating alternatives, jump to next iteration - // and loop over repeating alternatives. - state.jumpToBacktrack(jump(), this); - - countToCheckForFirstAlternative = nextAlternative->m_minimumSize; - - // If we get here, there the last input checked failed. - notEnoughInputForPreviousAlternative.link(this); - - state.linkAlternativeBacktracks(this); - - // Back up to start the looping alternatives. - if (countCheckedForCurrentAlternative) - sub32(Imm32(countCheckedForCurrentAlternative), index); - - firstAlternative = Label(this); - - state.checkedTotal = countToCheckForFirstAlternative; - if (countToCheckForFirstAlternative) - notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForFirstAlternative)); - - countCheckedForCurrentAlternative = countToCheckForFirstAlternative; - - firstAlternativeInputChecked = Label(this); - - setRepeatAlternativeLabels = true; - } else { - int countToCheckForNextAlternative = nextAlternative->m_minimumSize; - - if (countCheckedForCurrentAlternative > countToCheckForNextAlternative) { // CASE 1: current alternative was longer than the next one. - // If we get here, then the last input checked failed. - notEnoughInputForPreviousAlternative.link(this); - - // Check if sufficent input available to run the next alternative - notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForNextAlternative - countCheckedForCurrentAlternative)); - // We are now in the correct state to enter the next alternative; this add is only required - // to mirror and revert operation of the sub32, just below. - add32(Imm32(countCheckedForCurrentAlternative - countToCheckForNextAlternative), index); - - // If we get here, then the last input checked passed. - state.linkAlternativeBacktracks(this); - // No need to check if we can run the next alternative, since it is shorter - - // just update index. - sub32(Imm32(countCheckedForCurrentAlternative - countToCheckForNextAlternative), index); - } else if (countCheckedForCurrentAlternative < countToCheckForNextAlternative) { // CASE 2: next alternative is longer than the current one. - // If we get here, then the last input checked failed. - // If there is insufficient input to run the current alternative, and the next alternative is longer, - // then there is definitely not enough input to run it - don't even check. Just adjust index, as if - // we had checked. - notEnoughInputForPreviousAlternative.link(this); - add32(Imm32(countToCheckForNextAlternative - countCheckedForCurrentAlternative), index); - notEnoughInputForPreviousAlternative.append(jump()); - - // The next alternative is longer than the current one; check the difference. - state.linkAlternativeBacktracks(this); - notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForNextAlternative - countCheckedForCurrentAlternative)); - } else { // CASE 3: Both alternatives are the same length. - ASSERT(countCheckedForCurrentAlternative == countToCheckForNextAlternative); - - // If the next alterative is the same length as this one, then no need to check the input - - // if there was sufficent input to run the current alternative then there is sufficient - // input to run the next one; if not, there isn't. - state.linkAlternativeBacktracks(this); - } - state.checkedTotal -= countCheckedForCurrentAlternative; - countCheckedForCurrentAlternative = countToCheckForNextAlternative; - state.checkedTotal += countCheckedForCurrentAlternative; - } - } - } - - // If we get here, all Alternatives failed... - - state.checkedTotal -= countCheckedForCurrentAlternative; - - if (!setRepeatAlternativeLabels) { - // If there are no alternatives that need repeating (all are marked 'onceThrough') then just link - // the match failures to this point, and fall through to the return below. - state.linkAlternativeBacktracks(this); - notEnoughInputForPreviousAlternative.link(this); - } else { - // How much more input need there be to be able to retry from the first alternative? - // examples: - // /yarr_jit/ or /wrec|pcre/ - // In these examples we need check for one more input before looping. - // /yarr_jit|pcre/ - // In this case we need check for 5 more input to loop (+4 to allow for the first alterative - // being four longer than the last alternative checked, and another +1 to effectively move - // the start position along by one). - // /yarr|rules/ or /wrec|notsomuch/ - // In these examples, provided that there was sufficient input to have just been matching for - // the second alternative we can loop without checking for available input (since the second - // alternative is longer than the first). In the latter example we need to decrement index - // (by 4) so the start position is only progressed by 1 from the last iteration. - int incrementForNextIter = (countToCheckForFirstAlternative - countCheckedForCurrentAlternative) + 1; - - // First, deal with the cases where there was sufficient input to try the last alternative. - if (incrementForNextIter > 0) // We need to check for more input anyway, fall through to the checking below. - state.linkAlternativeBacktracks(this); - else if (m_pattern.m_body->m_hasFixedSize && !incrementForNextIter) // No need to update anything, link these backtracks straight to the to pof the loop! - state.linkAlternativeBacktracksTo(firstAlternativeInputChecked, this); - else { // no need to check the input, but we do have some bookkeeping to do first. - state.linkAlternativeBacktracks(this); - - // Where necessary update our preserved start position. - if (!m_pattern.m_body->m_hasFixedSize) { - move(index, regT0); - sub32(Imm32(countCheckedForCurrentAlternative - 1), regT0); - store32(regT0, Address(output)); - } - - // Update index if necessary, and loop (without checking). - if (incrementForNextIter) - add32(Imm32(incrementForNextIter), index); - jump().linkTo(firstAlternativeInputChecked, this); - } - - notEnoughInputForPreviousAlternative.link(this); - // Update our idea of the start position, if we're tracking this. - if (!m_pattern.m_body->m_hasFixedSize) { - if (countCheckedForCurrentAlternative - 1) { - move(index, regT0); - sub32(Imm32(countCheckedForCurrentAlternative - 1), regT0); - store32(regT0, Address(output)); - } else - store32(index, Address(output)); - } - - // Check if there is sufficent input to run the first alternative again. - jumpIfAvailableInput(incrementForNextIter).linkTo(firstAlternativeInputChecked, this); - // No - insufficent input to run the first alteranative, are there any other alternatives we - // might need to check? If so, the last check will have left the index incremented by - // (countToCheckForFirstAlternative + 1), so we need test whether countToCheckForFirstAlternative - // LESS input is available, to have the effect of just progressing the start position by 1 - // from the last iteration. If this check passes we can just jump up to the check associated - // with the first alternative in the loop. This is a bit sad, since we'll end up trying the - // first alternative again, and this check will fail (otherwise the check planted just above - // here would have passed). This is a bit sad, however it saves trying to do something more - // complex here in compilation, and in the common case we should end up coallescing the checks. - // - // FIXME: a nice improvement here may be to stop trying to match sooner, based on the least - // of the minimum-alternative-lengths. E.g. if I have two alternatives of length 200 and 150, - // and a string of length 100, we'll end up looping index from 0 to 100, checking whether there - // is sufficient input to run either alternative (constantly failing). If there had been only - // one alternative, or if the shorter alternative had come first, we would have terminated - // immediately. :-/ - if (hasShorterAlternatives) - jumpIfAvailableInput(-countToCheckForFirstAlternative).linkTo(firstAlternative, this); - // index will now be a bit garbled (depending on whether 'hasShorterAlternatives' is true, - // it has either been incremented by 1 or by (countToCheckForFirstAlternative + 1) ... - // but since we're about to return a failure this doesn't really matter!) - } - - if (m_pattern.m_body->m_callFrameSize) - addPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister); - - move(Imm32(-1), returnRegister); - - generateReturn(); - } - - void generateEnter() - { -#if WTF_CPU_X86_64 - push(X86Registers::ebp); - move(stackPointerRegister, X86Registers::ebp); - push(X86Registers::ebx); -#elif WTF_CPU_X86 - push(X86Registers::ebp); - move(stackPointerRegister, X86Registers::ebp); - // TODO: do we need spill registers to fill the output pointer if there are no sub captures? - push(X86Registers::ebx); - push(X86Registers::edi); - push(X86Registers::esi); - // load output into edi (2 = saved ebp + return address). - #if WTF_COMPILER_MSVC || WTF_COMPILER_SUNPRO - loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), input); - loadPtr(Address(X86Registers::ebp, 3 * sizeof(void*)), index); - loadPtr(Address(X86Registers::ebp, 4 * sizeof(void*)), length); - loadPtr(Address(X86Registers::ebp, 5 * sizeof(void*)), output); - #else - loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), output); - #endif -#elif WTF_CPU_ARM - push(ARMRegisters::r4); - push(ARMRegisters::r5); - push(ARMRegisters::r6); -#if WTF_CPU_ARM_TRADITIONAL - push(ARMRegisters::r8); // scratch register -#endif - move(ARMRegisters::r3, output); -#elif WTF_CPU_SPARC - save(Imm32(-m_pattern.m_body->m_callFrameSize * sizeof(void*))); - // set m_callFrameSize to 0 avoid and stack movement later. - m_pattern.m_body->m_callFrameSize = 0; -#elif WTF_CPU_MIPS - // Do nothing. -#endif - } - - void generateReturn() - { -#if WTF_CPU_X86_64 - pop(X86Registers::ebx); - pop(X86Registers::ebp); -#elif WTF_CPU_X86 - pop(X86Registers::esi); - pop(X86Registers::edi); - pop(X86Registers::ebx); - pop(X86Registers::ebp); -#elif WTF_CPU_ARM -#if WTF_CPU_ARM_TRADITIONAL - pop(ARMRegisters::r8); // scratch register -#endif - pop(ARMRegisters::r6); - pop(ARMRegisters::r5); - pop(ARMRegisters::r4); -#elif WTF_CPU_SPARC - ret_and_restore(); - return; -#elif WTF_CPU_MIPS - // Do nothing -#endif - ret(); - } - -public: - RegexGenerator(RegexPattern& pattern) - : m_pattern(pattern) - , m_shouldFallBack(false) - { - } - - void generate() - { - generateEnter(); - - if (!m_pattern.m_body->m_hasFixedSize) - store32(index, Address(output)); - - if (m_pattern.m_body->m_callFrameSize) - subPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister); - - generateDisjunction(m_pattern.m_body); - } - - void compile(ExecutableAllocator& allocator, RegexCodeBlock& jitObject) - { - generate(); - - if (oom()) { - m_shouldFallBack = true; - return; - } - - ExecutablePool *dummy; - bool ok; - LinkBuffer patchBuffer(this, &allocator, &dummy, &ok); - if (!ok) { - m_shouldFallBack = true; - return; - } - - for (unsigned i = 0; i < m_backtrackRecords.length(); ++i) - patchBuffer.patch(m_backtrackRecords[i].dataLabel, patchBuffer.locationOf(m_backtrackRecords[i].backtrackLocation)); - - jitObject.set(patchBuffer.finalizeCode()); - } - - bool shouldFallBack() - { - return m_shouldFallBack; - } - -private: - RegexPattern& m_pattern; - bool m_shouldFallBack; - js::Vector m_backtrackRecords; -}; - -void jitCompileRegex(ExecutableAllocator& allocator, RegexCodeBlock& jitObject, const UString&patternString, unsigned& numSubpatterns, int &error, bool &fellBack, bool ignoreCase, bool multiline -#ifdef ANDROID - , bool forceFallback -#endif -) -{ -#ifdef ANDROID - if (!forceFallback) { -#endif - fellBack = false; - RegexPattern pattern(ignoreCase, multiline); - if ((error = compileRegex(patternString, pattern))) - return; - numSubpatterns = pattern.m_numSubpatterns; - - if (!pattern.m_containsBackreferences) { - RegexGenerator generator(pattern); - generator.compile(allocator, jitObject); - if (!generator.shouldFallBack()) - return; - } -#ifdef ANDROID - } // forceFallback -#endif - - fellBack = true; - JSRegExpIgnoreCaseOption ignoreCaseOption = ignoreCase ? JSRegExpIgnoreCase : JSRegExpDoNotIgnoreCase; - JSRegExpMultilineOption multilineOption = multiline ? JSRegExpMultiline : JSRegExpSingleLine; - jitObject.setFallback(jsRegExpCompile(reinterpret_cast(const_cast(patternString).chars()), patternString.length(), ignoreCaseOption, multilineOption, &numSubpatterns, &error)); -} - -}} - -#endif diff --git a/js/src/yarr/yarr/RegexJIT.h b/js/src/yarr/yarr/RegexJIT.h deleted file mode 100644 index 60a51b484c02..000000000000 --- a/js/src/yarr/yarr/RegexJIT.h +++ /dev/null @@ -1,112 +0,0 @@ -/* - * Copyright (C) 2009 Apple Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef RegexJIT_h -#define RegexJIT_h - -#if ENABLE_ASSEMBLER - -#include "assembler/assembler/MacroAssembler.h" -#include "assembler/assembler/MacroAssemblerCodeRef.h" -#include "assembler/jit/ExecutableAllocator.h" -#include "RegexPattern.h" -#include "yarr/jswtfbridge.h" - -#include "yarr/pcre/pcre.h" -struct JSRegExp; // temporary, remove when fallback is removed. - -#if WTF_CPU_X86 && !WTF_COMPILER_MSVC && !WTF_COMPILER_SUNPRO -#define YARR_CALL __attribute__ ((regparm (3))) -#else -#define YARR_CALL -#endif - -struct JSContext; - -namespace JSC { - -namespace Yarr { - -class RegexCodeBlock { - typedef int (*RegexJITCode)(const UChar* input, unsigned start, unsigned length, int* output) YARR_CALL; - -public: - RegexCodeBlock() - : m_fallback(0) - { - } - - ~RegexCodeBlock() - { - if (m_fallback) - jsRegExpFree(m_fallback); - if (m_ref.m_size) - m_ref.m_executablePool->release(); - } - - JSRegExp* getFallback() { return m_fallback; } - void setFallback(JSRegExp* fallback) { m_fallback = fallback; } - - bool operator!() { return (!m_ref.m_code.executableAddress() && !m_fallback); } - void set(MacroAssembler::CodeRef ref) { m_ref = ref; } - - int execute(const UChar* input, unsigned start, unsigned length, int* output) - { - void *code = m_ref.m_code.executableAddress(); - return JS_EXTENSION((reinterpret_cast(code))(input, start, length, output)); - } - -private: - MacroAssembler::CodeRef m_ref; - JSRegExp* m_fallback; -}; - -void jitCompileRegex(ExecutableAllocator &allocator, RegexCodeBlock& jitObject, const UString& pattern, unsigned& numSubpatterns, int& error, bool &fellBack, bool ignoreCase = false, bool multiline = false -#ifdef ANDROID - , bool forceFallback = false -#endif -); - -inline int executeRegex(JSContext *cx, RegexCodeBlock& jitObject, const UChar* input, unsigned start, unsigned length, int* output, int outputArraySize) -{ - if (JSRegExp* fallback = jitObject.getFallback()) { - int result = jsRegExpExecute(cx, fallback, input, length, start, output, outputArraySize); - - if (result == JSRegExpErrorHitLimit) - return HitRecursionLimit; - - // -1 represents no-match for both PCRE and YARR. - JS_ASSERT(result >= -1); - return result; - } - - return jitObject.execute(input, start, length, output); -} - -} } // namespace JSC::Yarr - -#endif /* ENABLE_ASSEMBLER */ - -#endif // RegexJIT_h diff --git a/toolkit/content/license.html b/toolkit/content/license.html index 50711ae8a547..7341d40d7f65 100644 --- a/toolkit/content/license.html +++ b/toolkit/content/license.html @@ -2032,7 +2032,7 @@ POSSIBILITY OF SUCH DAMAGE.

Apple License

-

This license applies to certain files in the directories js/src/assembler/assembler/, js/src/assembler/wtf/, js/src/yarr/wtf, js/src/yarr/yarr, and widget/src/cocoa.

+

This license applies to certain files in the directories js/src/assembler/assembler/, js/src/assembler/wtf/, js/src/yarr, and widget/src/cocoa.

 Copyright (C) 2008, 2009 Apple Inc. All rights reserved.