Bug 1562830 - Keep Baseline Interpreter bytecode pc in a register between VM/IC calls. r=djvj

This is both simpler and faster than the old scheme where the pc was stored in
a register but could be clobbered by R2.

On x64 this wins about 9-10%. On 32-bit x86 we don't have enough registers so
there we load the pc from the frame in more cases. That's about a 2-3%
regression and is a reasonable trade-off.

Differential Revision: https://phabricator.services.mozilla.com/D36583

--HG--
extra : moz-landing-system : lando
This commit is contained in:
Jan de Mooij 2019-07-06 12:30:31 +00:00
parent 65bcd242ec
commit 4165bae74e
9 changed files with 257 additions and 119 deletions

View File

@ -49,13 +49,6 @@ using mozilla::Maybe;
namespace js {
namespace jit {
// When generating the Baseline Interpreter, this register is guaranteed to hold
// the bytecode pc at the start of a bytecode instruction. Instructions are free
// to clobber this register: the frame's interpreterPC is the canonical
// location for the pc. This register is useful to avoid loading the pc when
// compiling simple ops like JSOP_INT8 or JSOP_GETLOCAL.
static constexpr Register PCRegAtStart = R2.scratchReg();
BaselineCompilerHandler::BaselineCompilerHandler(JSContext* cx,
MacroAssembler& masm,
TempAllocator& alloc,
@ -383,20 +376,44 @@ MethodStatus BaselineCompiler::compile() {
return Method_Compiled;
}
static void LoadInt8Operand(MacroAssembler& masm, Register pc, Register dest) {
// On most platforms we use a dedicated bytecode PC register to avoid many
// dependent loads and stores for sequences of simple bytecode ops. This
// register must be saved/restored around VM and IC calls.
//
// On 32-bit x86 we don't have enough registers for this (because R0-R2 require
// 6 registers) so there we always store the pc on the frame.
static constexpr bool HasInterpreterPCReg() {
return InterpreterPCReg != InvalidReg;
}
static Register LoadBytecodePC(MacroAssembler& masm, Register scratch) {
if (HasInterpreterPCReg()) {
return InterpreterPCReg;
}
Address pcAddr(BaselineFrameReg,
BaselineFrame::reverseOffsetOfInterpreterPC());
masm.loadPtr(pcAddr, scratch);
return scratch;
}
static void LoadInt8Operand(MacroAssembler& masm, Register dest) {
Register pc = LoadBytecodePC(masm, dest);
masm.load8SignExtend(Address(pc, sizeof(jsbytecode)), dest);
}
static void LoadUint8Operand(MacroAssembler& masm, Register pc, Register dest) {
static void LoadUint8Operand(MacroAssembler& masm, Register dest) {
Register pc = LoadBytecodePC(masm, dest);
masm.load8ZeroExtend(Address(pc, sizeof(jsbytecode)), dest);
}
static void LoadUint16Operand(MacroAssembler& masm, Register pc,
Register dest) {
static void LoadUint16Operand(MacroAssembler& masm, Register dest) {
Register pc = LoadBytecodePC(masm, dest);
masm.load16ZeroExtend(Address(pc, sizeof(jsbytecode)), dest);
}
static void LoadInt32Operand(MacroAssembler& masm, Register pc, Register dest) {
static void LoadInt32Operand(MacroAssembler& masm, Register dest) {
Register pc = LoadBytecodePC(masm, dest);
masm.load32(Address(pc, sizeof(jsbytecode)), dest);
}
@ -405,18 +422,19 @@ static void LoadInt32OperandSignExtendToPtr(MacroAssembler& masm, Register pc,
masm.load32SignExtendToPtr(Address(pc, sizeof(jsbytecode)), dest);
}
static void LoadUint24Operand(MacroAssembler& masm, Register pc, size_t offset,
static void LoadUint24Operand(MacroAssembler& masm, size_t offset,
Register dest) {
// Load the opcode and operand, then left shift to discard the opcode.
Register pc = LoadBytecodePC(masm, dest);
masm.load32(Address(pc, offset), dest);
masm.rshift32(Imm32(8), dest);
}
static void LoadInlineValueOperand(MacroAssembler& masm, Register pc,
ValueOperand dest) {
static void LoadInlineValueOperand(MacroAssembler& masm, ValueOperand dest) {
// Note: the Value might be unaligned but as above we rely on all our
// platforms having appropriate support for unaligned accesses (except for
// floating point instructions on ARM).
Register pc = LoadBytecodePC(masm, dest.scratchReg());
masm.loadUnalignedValue(Address(pc, sizeof(jsbytecode)), dest);
}
@ -430,6 +448,26 @@ void BaselineInterpreterCodeGen::loadScript(Register dest) {
masm.loadPtr(frame.addressOfInterpreterScript(), dest);
}
template <>
void BaselineCompilerCodeGen::saveInterpreterPCReg() {}
template <>
void BaselineInterpreterCodeGen::saveInterpreterPCReg() {
if (HasInterpreterPCReg()) {
masm.storePtr(InterpreterPCReg, frame.addressOfInterpreterPC());
}
}
template <>
void BaselineCompilerCodeGen::restoreInterpreterPCReg() {}
template <>
void BaselineInterpreterCodeGen::restoreInterpreterPCReg() {
if (HasInterpreterPCReg()) {
masm.loadPtr(frame.addressOfInterpreterPC(), InterpreterPCReg);
}
}
template <>
void BaselineCompilerCodeGen::loadScriptAtom(Register index, Register dest) {
MOZ_CRASH("BaselineCompiler shouldn't call loadScriptAtom");
@ -516,6 +554,8 @@ template <typename Handler>
bool BaselineCodeGen<Handler>::emitOutOfLinePostBarrierSlot() {
masm.bind(&postBarrierSlot_);
saveInterpreterPCReg();
Register objReg = R2.scratchReg();
AllocatableGeneralRegisterSet regs(GeneralRegisterSet::All());
regs.take(R0);
@ -537,6 +577,8 @@ bool BaselineCodeGen<Handler>::emitOutOfLinePostBarrierSlot() {
masm.passABIArg(objReg);
masm.callWithABI(JS_FUNC_TO_DATA_PTR(void*, PostWriteBarrier));
restoreInterpreterPCReg();
masm.popValue(R0);
masm.ret();
return true;
@ -579,9 +621,11 @@ bool BaselineCompilerCodeGen::emitNextIC() {
template <>
bool BaselineInterpreterCodeGen::emitNextIC() {
saveInterpreterPCReg();
masm.loadPtr(frame.addressOfInterpreterICEntry(), ICStubReg);
masm.loadPtr(Address(ICStubReg, ICEntry::offsetOfFirstStub()), ICStubReg);
masm.call(Address(ICStubReg, ICStub::offsetOfStubCode()));
restoreInterpreterPCReg();
return true;
}
@ -663,6 +707,8 @@ bool BaselineCodeGen<Handler>::callVMInternal(VMFunctionId id,
// Assert all arguments were pushed.
MOZ_ASSERT(masm.framePushed() - pushedBeforeCall_ == argSize);
saveInterpreterPCReg();
Address frameSizeAddress(BaselineFrameReg,
BaselineFrame::reverseOffsetOfFrameSize());
uint32_t frameBaseSize =
@ -701,6 +747,8 @@ bool BaselineCodeGen<Handler>::callVMInternal(VMFunctionId id,
// Pop arguments from framePushed.
masm.implicitPop(fun.explicitStackSlots() * sizeof(void*));
restoreInterpreterPCReg();
#ifdef DEBUG
// Assert the frame does not have an override pc when we're executing JIT
// code.
@ -793,7 +841,11 @@ void BaselineInterpreterCodeGen::emitIsDebuggeeCheck() {
Label skipCheck;
CodeOffset toggleOffset = masm.toggledJump(&skipCheck);
EmitCallFrameIsDebuggeeCheck(masm);
{
saveInterpreterPCReg();
EmitCallFrameIsDebuggeeCheck(masm);
restoreInterpreterPCReg();
}
masm.bind(&skipCheck);
handler.setDebuggeeCheckOffset(toggleOffset);
}
@ -907,7 +959,11 @@ void BaselineCompilerCodeGen::pushBytecodePCArg() {
template <>
void BaselineInterpreterCodeGen::pushBytecodePCArg() {
pushArg(frame.addressOfInterpreterPC());
if (HasInterpreterPCReg()) {
pushArg(InterpreterPCReg);
} else {
pushArg(frame.addressOfInterpreterPC());
}
}
template <>
@ -921,8 +977,7 @@ void BaselineInterpreterCodeGen::pushScriptNameArg(Register scratch1,
Register scratch2) {
MOZ_ASSERT(scratch1 != scratch2);
masm.loadPtr(frame.addressOfInterpreterPC(), scratch1);
LoadInt32Operand(masm, scratch1, scratch1);
LoadInt32Operand(masm, scratch1);
loadScriptAtom(scratch1, scratch2);
pushArg(scratch2);
@ -958,8 +1013,7 @@ void BaselineInterpreterCodeGen::loadScriptGCThing(ScriptGCThingType type,
MOZ_ASSERT(dest != scratch);
// Load the index in |scratch|.
masm.loadPtr(frame.addressOfInterpreterPC(), scratch);
LoadInt32Operand(masm, scratch, scratch);
LoadInt32Operand(masm, scratch);
// Load the GCCellPtr.
loadScript(dest);
@ -1021,8 +1075,7 @@ void BaselineCompilerCodeGen::pushUint8BytecodeOperandArg(Register) {
template <>
void BaselineInterpreterCodeGen::pushUint8BytecodeOperandArg(Register scratch) {
masm.loadPtr(frame.addressOfInterpreterPC(), scratch);
LoadUint8Operand(masm, scratch, scratch);
LoadUint8Operand(masm, scratch);
pushArg(scratch);
}
@ -1035,8 +1088,7 @@ void BaselineCompilerCodeGen::pushUint16BytecodeOperandArg(Register) {
template <>
void BaselineInterpreterCodeGen::pushUint16BytecodeOperandArg(
Register scratch) {
masm.loadPtr(frame.addressOfInterpreterPC(), scratch);
LoadUint16Operand(masm, scratch, scratch);
LoadUint16Operand(masm, scratch);
pushArg(scratch);
}
@ -1051,8 +1103,7 @@ void BaselineCompilerCodeGen::loadInt32LengthBytecodeOperand(Register dest) {
template <>
void BaselineInterpreterCodeGen::loadInt32LengthBytecodeOperand(Register dest) {
masm.loadPtr(frame.addressOfInterpreterPC(), dest);
LoadInt32Operand(masm, dest, dest);
LoadInt32Operand(masm, dest);
}
template <>
@ -1068,8 +1119,7 @@ template <>
void BaselineInterpreterCodeGen::loadInt32IndexBytecodeOperand(
ValueOperand dest) {
Register scratch = dest.scratchReg();
masm.loadPtr(frame.addressOfInterpreterPC(), scratch);
LoadInt32Operand(masm, scratch, scratch);
LoadInt32Operand(masm, scratch);
masm.tagValue(JSVAL_TYPE_INT32, scratch, dest);
}
@ -1143,7 +1193,10 @@ void BaselineCompilerCodeGen::emitInitFrameFields() {
template <>
void BaselineInterpreterCodeGen::emitInitFrameFields() {
Register scratch1 = R0.scratchReg();
// If we have a dedicated PC register we use it as scratch1 to avoid a
// register move below.
Register scratch1 =
HasInterpreterPCReg() ? InterpreterPCReg : R0.scratchReg();
Register scratch2 = R2.scratchReg();
masm.store32(Imm32(BaselineFrame::RUNNING_IN_INTERPRETER),
@ -1174,12 +1227,18 @@ void BaselineInterpreterCodeGen::emitInitFrameFields() {
Address(scratch2, JitScript::offsetOfICEntries()), scratch2);
masm.storePtr(scratch2, frame.addressOfInterpreterICEntry());
// Initialize interpreterPC.
// Initialize interpreter pc.
masm.loadPtr(Address(scratch1, JSScript::offsetOfScriptData()), scratch1);
masm.load32(Address(scratch1, SharedScriptData::offsetOfCodeOffset()),
scratch2);
masm.addPtr(scratch2, scratch1);
masm.storePtr(scratch1, frame.addressOfInterpreterPC());
if (HasInterpreterPCReg()) {
MOZ_ASSERT(scratch1 == InterpreterPCReg,
"pc must be stored in the pc register");
} else {
masm.storePtr(scratch1, frame.addressOfInterpreterPC());
}
}
template <>
@ -1712,7 +1771,7 @@ bool BaselineCompilerCodeGen::emit_JSOP_POPN() {
template <>
bool BaselineInterpreterCodeGen::emit_JSOP_POPN() {
LoadUint16Operand(masm, PCRegAtStart, R0.scratchReg());
LoadUint16Operand(masm, R0.scratchReg());
frame.popn(R0.scratchReg());
return true;
}
@ -1733,7 +1792,7 @@ bool BaselineCompilerCodeGen::emit_JSOP_DUPAT() {
template <>
bool BaselineInterpreterCodeGen::emit_JSOP_DUPAT() {
LoadUint24Operand(masm, PCRegAtStart, 0, R0.scratchReg());
LoadUint24Operand(masm, 0, R0.scratchReg());
masm.loadValue(frame.addressOfStackValue(R0.scratchReg()), R0);
frame.push(R0);
return true;
@ -1806,17 +1865,18 @@ bool BaselineCompilerCodeGen::emit_JSOP_PICK() {
template <>
bool BaselineInterpreterCodeGen::emit_JSOP_PICK() {
// First, move the value to move up into R0.
LoadUint8Operand(masm, PCRegAtStart, PCRegAtStart);
masm.loadValue(frame.addressOfStackValue(PCRegAtStart), R0);
Register scratch = R2.scratchReg();
LoadUint8Operand(masm, scratch);
masm.loadValue(frame.addressOfStackValue(scratch), R0);
// Move the other values down.
Label top, done;
masm.bind(&top);
masm.sub32(Imm32(1), PCRegAtStart);
masm.branchTest32(Assembler::Signed, PCRegAtStart, PCRegAtStart, &done);
masm.sub32(Imm32(1), scratch);
masm.branchTest32(Assembler::Signed, scratch, scratch, &done);
{
masm.loadValue(frame.addressOfStackValue(PCRegAtStart), R1);
masm.storeValue(R1, frame.addressOfStackValue(PCRegAtStart, sizeof(Value)));
masm.loadValue(frame.addressOfStackValue(scratch), R1);
masm.storeValue(R1, frame.addressOfStackValue(scratch, sizeof(Value)));
masm.jump(&top);
}
@ -1859,14 +1919,15 @@ bool BaselineCompilerCodeGen::emit_JSOP_UNPICK() {
template <>
bool BaselineInterpreterCodeGen::emit_JSOP_UNPICK() {
LoadUint8Operand(masm, PCRegAtStart, PCRegAtStart);
Register scratch = R2.scratchReg();
LoadUint8Operand(masm, scratch);
// Move the top value into R0.
masm.loadValue(frame.addressOfStackValue(-1), R0);
// Overwrite the nth stack value with R0 but first save the old value in R1.
masm.loadValue(frame.addressOfStackValue(PCRegAtStart), R1);
masm.storeValue(R0, frame.addressOfStackValue(PCRegAtStart));
masm.loadValue(frame.addressOfStackValue(scratch), R1);
masm.storeValue(R0, frame.addressOfStackValue(scratch));
// Now for each slot x in [n-1, 1] do the following:
//
@ -1879,7 +1940,7 @@ bool BaselineInterpreterCodeGen::emit_JSOP_UNPICK() {
// values.
{
Label ok;
masm.branch32(Assembler::GreaterThan, PCRegAtStart, Imm32(0), &ok);
masm.branch32(Assembler::GreaterThan, scratch, Imm32(0), &ok);
masm.assumeUnreachable("JSOP_UNPICK with operand <= 0?");
masm.bind(&ok);
}
@ -1887,12 +1948,12 @@ bool BaselineInterpreterCodeGen::emit_JSOP_UNPICK() {
Label top, done;
masm.bind(&top);
masm.sub32(Imm32(1), PCRegAtStart);
masm.branchTest32(Assembler::Zero, PCRegAtStart, PCRegAtStart, &done);
masm.sub32(Imm32(1), scratch);
masm.branchTest32(Assembler::Zero, scratch, scratch, &done);
{
// Overwrite stack slot x with slot x + 1, saving the old value in R1.
masm.loadValue(frame.addressOfStackValue(PCRegAtStart), R0);
masm.storeValue(R1, frame.addressOfStackValue(PCRegAtStart));
masm.loadValue(frame.addressOfStackValue(scratch), R0);
masm.storeValue(R1, frame.addressOfStackValue(scratch));
masm.moveValue(R0, R1);
masm.jump(&top);
}
@ -1916,15 +1977,20 @@ void BaselineCompilerCodeGen::emitJump() {
template <>
void BaselineInterpreterCodeGen::emitJump() {
// We have to add the current pc's jump offset to the frame's pc. We can use
// R0 as scratch because we jump to the "next op" label and that assumes a
// synced stack.
Register scratch = R0.scratchReg();
masm.loadPtr(frame.addressOfInterpreterPC(), scratch);
LoadInt32OperandSignExtendToPtr(masm, scratch, scratch);
masm.addPtr(frame.addressOfInterpreterPC(), scratch);
masm.storePtr(scratch, frame.addressOfInterpreterPC());
masm.jump(handler.interpretOpLabel());
// We have to add the current pc's jump offset to the current pc. We can use
// R0 and R1 as scratch because we jump to the "next op" label so these
// registers aren't in use at this point.
Register scratch1 = R0.scratchReg();
Register scratch2 = R1.scratchReg();
Register pc = LoadBytecodePC(masm, scratch1);
LoadInt32OperandSignExtendToPtr(masm, pc, scratch2);
if (HasInterpreterPCReg()) {
masm.addPtr(scratch2, InterpreterPCReg);
} else {
masm.addPtr(pc, scratch2);
masm.storePtr(scratch2, frame.addressOfInterpreterPC());
}
masm.jump(handler.interpretOpWithPCRegLabel());
}
template <>
@ -2452,7 +2518,7 @@ bool BaselineCompilerCodeGen::emit_JSOP_INT8() {
template <>
bool BaselineInterpreterCodeGen::emit_JSOP_INT8() {
LoadInt8Operand(masm, PCRegAtStart, R0.scratchReg());
LoadInt8Operand(masm, R0.scratchReg());
masm.tagValue(JSVAL_TYPE_INT32, R0.scratchReg(), R0);
frame.push(R0);
return true;
@ -2466,7 +2532,7 @@ bool BaselineCompilerCodeGen::emit_JSOP_INT32() {
template <>
bool BaselineInterpreterCodeGen::emit_JSOP_INT32() {
LoadInt32Operand(masm, PCRegAtStart, R0.scratchReg());
LoadInt32Operand(masm, R0.scratchReg());
masm.tagValue(JSVAL_TYPE_INT32, R0.scratchReg(), R0);
frame.push(R0);
return true;
@ -2480,7 +2546,7 @@ bool BaselineCompilerCodeGen::emit_JSOP_UINT16() {
template <>
bool BaselineInterpreterCodeGen::emit_JSOP_UINT16() {
LoadUint16Operand(masm, PCRegAtStart, R0.scratchReg());
LoadUint16Operand(masm, R0.scratchReg());
masm.tagValue(JSVAL_TYPE_INT32, R0.scratchReg(), R0);
frame.push(R0);
return true;
@ -2494,7 +2560,7 @@ bool BaselineCompilerCodeGen::emit_JSOP_UINT24() {
template <>
bool BaselineInterpreterCodeGen::emit_JSOP_UINT24() {
LoadUint24Operand(masm, PCRegAtStart, 0, R0.scratchReg());
LoadUint24Operand(masm, 0, R0.scratchReg());
masm.tagValue(JSVAL_TYPE_INT32, R0.scratchReg(), R0);
frame.push(R0);
return true;
@ -2513,7 +2579,7 @@ bool BaselineCompilerCodeGen::emit_JSOP_DOUBLE() {
template <>
bool BaselineInterpreterCodeGen::emit_JSOP_DOUBLE() {
LoadInlineValueOperand(masm, PCRegAtStart, R0);
LoadInlineValueOperand(masm, R0);
frame.push(R0);
return true;
}
@ -2545,7 +2611,7 @@ template <>
bool BaselineInterpreterCodeGen::emit_JSOP_STRING() {
Register scratch1 = R0.scratchReg();
Register scratch2 = R1.scratchReg();
LoadInt32Operand(masm, PCRegAtStart, scratch1);
LoadInt32Operand(masm, scratch1);
loadScriptAtom(scratch1, scratch2);
@ -2566,7 +2632,7 @@ template <>
bool BaselineInterpreterCodeGen::emit_JSOP_SYMBOL() {
Register scratch1 = R0.scratchReg();
Register scratch2 = R1.scratchReg();
LoadUint8Operand(masm, PCRegAtStart, scratch1);
LoadUint8Operand(masm, scratch1);
masm.movePtr(ImmPtr(cx->runtime()->wellKnownSymbols), scratch2);
masm.loadPtr(BaseIndex(scratch2, scratch1, ScalePointer), scratch1);
@ -3604,11 +3670,11 @@ Address BaselineCodeGen<Handler>::getEnvironmentCoordinateAddress(
// For a JOF_ENVCOORD op load the number of hops from the bytecode and skip this
// number of environment objects.
static void LoadAliasedVarEnv(MacroAssembler& masm, Register pc, Register env,
static void LoadAliasedVarEnv(MacroAssembler& masm, Register env,
Register scratch) {
static_assert(ENVCOORD_HOPS_LEN == 1,
"Code assumes number of hops is stored in uint8 operand");
LoadUint8Operand(masm, pc, scratch);
LoadUint8Operand(masm, scratch);
Label top, done;
masm.bind(&top);
@ -3637,12 +3703,12 @@ void BaselineInterpreterCodeGen::emitGetAliasedVar(ValueOperand dest) {
// Load the right environment object.
masm.loadPtr(frame.addressOfEnvironmentChain(), env);
LoadAliasedVarEnv(masm, PCRegAtStart, env, scratch);
LoadAliasedVarEnv(masm, env, scratch);
// Load the slot index.
static_assert(ENVCOORD_SLOT_LEN == 3,
"Code assumes slot is stored in uint24 operand");
LoadUint24Operand(masm, PCRegAtStart, ENVCOORD_HOPS_LEN, scratch);
LoadUint24Operand(masm, ENVCOORD_HOPS_LEN, scratch);
// Load the Value from a fixed or dynamic slot.
// See EnvironmentObject::nonExtensibleIsFixedSlot.
@ -3712,7 +3778,9 @@ bool BaselineInterpreterCodeGen::emit_JSOP_SETALIASEDVAR() {
AllocatableGeneralRegisterSet regs(GeneralRegisterSet::All());
regs.take(BaselineFrameReg);
regs.take(R2);
MOZ_ASSERT(!regs.has(PCRegAtStart), "R2 contains PCRegAtStart");
if (HasInterpreterPCReg()) {
regs.take(InterpreterPCReg);
}
Register env = regs.takeAny();
Register scratch1 = regs.takeAny();
@ -3721,12 +3789,12 @@ bool BaselineInterpreterCodeGen::emit_JSOP_SETALIASEDVAR() {
// Load the right environment object.
masm.loadPtr(frame.addressOfEnvironmentChain(), env);
LoadAliasedVarEnv(masm, PCRegAtStart, env, scratch1);
LoadAliasedVarEnv(masm, env, scratch1);
// Load the slot index.
static_assert(ENVCOORD_SLOT_LEN == 3,
"Code assumes slot is stored in uint24 operand");
LoadUint24Operand(masm, PCRegAtStart, ENVCOORD_HOPS_LEN, scratch1);
LoadUint24Operand(masm, ENVCOORD_HOPS_LEN, scratch1);
// Store the RHS Value in R2.
masm.loadValue(frame.addressOfStackValue(-1), R2);
@ -4152,7 +4220,7 @@ static BaseValueIndex ComputeAddressOfLocal(MacroAssembler& masm,
template <>
bool BaselineInterpreterCodeGen::emit_JSOP_GETLOCAL() {
Register scratch = R0.scratchReg();
LoadUint24Operand(masm, PCRegAtStart, 0, scratch);
LoadUint24Operand(masm, 0, scratch);
BaseValueIndex addr = ComputeAddressOfLocal(masm, scratch);
masm.loadValue(addr, R0);
frame.push(R0);
@ -4173,7 +4241,7 @@ bool BaselineCompilerCodeGen::emit_JSOP_SETLOCAL() {
template <>
bool BaselineInterpreterCodeGen::emit_JSOP_SETLOCAL() {
Register scratch = R0.scratchReg();
LoadUint24Operand(masm, PCRegAtStart, 0, scratch);
LoadUint24Operand(masm, 0, scratch);
BaseValueIndex addr = ComputeAddressOfLocal(masm, scratch);
masm.loadValue(frame.addressOfStackValue(-1), R1);
masm.storeValue(R1, addr);
@ -4262,7 +4330,7 @@ bool BaselineInterpreterCodeGen::emitFormalArgAccess(JSOp op) {
// Load the index.
Register argReg = R1.scratchReg();
LoadUint16Operand(masm, PCRegAtStart, argReg);
LoadUint16Operand(masm, argReg);
// If the frame has no arguments object, this must be an unaliased access.
Label isUnaliased, done;
@ -4496,7 +4564,7 @@ bool BaselineCompilerCodeGen::emit_JSOP_CHECKLEXICAL() {
template <>
bool BaselineInterpreterCodeGen::emit_JSOP_CHECKLEXICAL() {
Register scratch = R0.scratchReg();
LoadUint24Operand(masm, PCRegAtStart, 0, scratch);
LoadUint24Operand(masm, 0, scratch);
BaseValueIndex addr = ComputeAddressOfLocal(masm, scratch);
masm.loadValue(addr, R0);
return emitUninitializedLexicalCheck(R0);
@ -4559,7 +4627,7 @@ bool BaselineInterpreterCodeGen::emitCall(JSOp op) {
MOZ_ASSERT(IsCallOp(op));
// The IC expects argc in R0.
LoadUint16Operand(masm, PCRegAtStart, R0.scratchReg());
LoadUint16Operand(masm, R0.scratchReg());
if (!emitNextIC()) {
return false;
}
@ -4569,8 +4637,7 @@ bool BaselineInterpreterCodeGen::emitCall(JSOp op) {
Register scratch = R1.scratchReg();
uint32_t extraValuesToPop = IsConstructorCallOp(op) ? 3 : 2;
Register spReg = AsRegister(masm.getStackPointer());
masm.loadPtr(frame.addressOfInterpreterPC(), scratch);
LoadUint16Operand(masm, scratch, scratch);
LoadUint16Operand(masm, scratch);
masm.computeEffectiveAddress(
BaseValueIndex(spReg, scratch, extraValuesToPop * sizeof(Value)), spReg);
frame.push(R0);
@ -5414,10 +5481,9 @@ void BaselineInterpreterCodeGen::emitGetTableSwitchIndex(ValueOperand val,
masm.branchTestInt32(Assembler::NotEqual, val, &jumpToDefault);
masm.unboxInt32(val, dest);
masm.loadPtr(frame.addressOfInterpreterPC(), scratch1);
Address lowAddr(scratch1, sizeof(jsbytecode) + TableSwitchOpLowOffset);
Address highAddr(scratch1, sizeof(jsbytecode) + TableSwitchOpHighOffset);
Register pcReg = LoadBytecodePC(masm, scratch1);
Address lowAddr(pcReg, sizeof(jsbytecode) + TableSwitchOpLowOffset);
Address highAddr(pcReg, sizeof(jsbytecode) + TableSwitchOpHighOffset);
// Jump to default if val > high.
masm.branch32(Assembler::LessThan, highAddr, dest, &jumpToDefault);
@ -5458,9 +5524,7 @@ void BaselineInterpreterCodeGen::emitTableSwitchJump(Register key,
Register scratch1,
Register scratch2) {
// Load the op's firstResumeIndex in scratch1.
masm.loadPtr(frame.addressOfInterpreterPC(), scratch1);
LoadUint24Operand(masm, scratch1, TableSwitchOpFirstResumeIndexOffset,
scratch1);
LoadUint24Operand(masm, TableSwitchOpFirstResumeIndexOffset, scratch1);
masm.add32(key, scratch1);
jumpToResumeEntry(scratch1, key, scratch2);
@ -5539,6 +5603,9 @@ bool BaselineCodeGen<Handler>::emit_JSOP_ENDITER() {
AllocatableGeneralRegisterSet regs(GeneralRegisterSet::All());
regs.take(BaselineFrameReg);
if (HasInterpreterPCReg()) {
regs.take(InterpreterPCReg);
}
Register obj = R0.scratchReg();
regs.take(obj);
@ -5608,7 +5675,7 @@ bool BaselineCompilerCodeGen::emit_JSOP_ENVCALLEE() {
template <>
bool BaselineInterpreterCodeGen::emit_JSOP_ENVCALLEE() {
Register numHops = R0.scratchReg();
LoadUint8Operand(masm, PCRegAtStart, numHops);
LoadUint8Operand(masm, numHops);
Register env = R1.scratchReg();
masm.loadPtr(frame.addressOfEnvironmentChain(), env);
@ -6032,6 +6099,9 @@ bool BaselineCodeGen<Handler>::emitGeneratorResume(
AllocatableGeneralRegisterSet regs(GeneralRegisterSet::All());
regs.take(BaselineFrameReg);
if (HasInterpreterPCReg()) {
regs.take(InterpreterPCReg);
}
// Load generator object.
Register genObj = regs.takeAny();
@ -6310,6 +6380,7 @@ bool BaselineCodeGen<Handler>::emitGeneratorResume(
} else {
masm.switchToBaselineFrameRealm(R2.scratchReg());
}
restoreInterpreterPCReg();
frame.popn(2);
frame.push(R0);
return true;
@ -6324,8 +6395,9 @@ bool BaselineCompilerCodeGen::emit_JSOP_RESUME() {
template <>
bool BaselineInterpreterCodeGen::emit_JSOP_RESUME() {
Register scratch = R0.scratchReg();
LoadUint8Operand(masm, scratch);
LoadUint8Operand(masm, PCRegAtStart, scratch);
saveInterpreterPCReg();
Label throw_, return_, done;
masm.branch32(Assembler::Equal, scratch,
@ -6404,7 +6476,7 @@ bool BaselineInterpreterCodeGen::emit_JSOP_JUMPTARGET() {
}
// Load icIndex in scratch1.
LoadInt32Operand(masm, PCRegAtStart, scratch1);
LoadInt32Operand(masm, scratch1);
// scratch1 := scratch1 * sizeof(ICEntry)
static_assert(sizeof(ICEntry) == 8 || sizeof(ICEntry) == 16,
@ -6896,27 +6968,29 @@ bool BaselineInterpreterGenerator::emitDebugTrap() {
return true;
}
// Register holding the bytecode pc during dispatch. This exists so the debug
// trap handler can reload the pc into this register when it's done.
static constexpr Register InterpreterPCRegAtDispatch =
HasInterpreterPCReg() ? InterpreterPCReg : R0.scratchReg();
bool BaselineInterpreterGenerator::emitInterpreterLoop() {
Register scratch1 = R0.scratchReg();
Register scratch2 = R1.scratchReg();
Address pcAddr = frame.addressOfInterpreterPC();
// Entry point for interpreting a bytecode op. No registers are live except
// for InterpreterPCReg.
masm.bind(handler.interpretOpWithPCRegLabel());
// Entry point for interpreting a bytecode op. No registers are live. PC is
// loaded from frame->interpreterPC.
masm.bind(handler.interpretOpLabel());
interpretOpOffset_ = masm.currentOffset();
// Emit a patchable call for debugger breakpoints/stepping. Note: there must
// be no code between interpretOpOffset_ and this debug trap. EnterBaseline
// and BaselineCompileFromBaselineInterpreter depend on this.
// Emit a patchable call for debugger breakpoints/stepping.
if (!emitDebugTrap()) {
return false;
}
Label interpretOpAfterDebugTrap;
masm.bind(&interpretOpAfterDebugTrap);
// Load pc, bytecode op.
masm.loadPtr(pcAddr, PCRegAtStart);
masm.load8ZeroExtend(Address(PCRegAtStart, 0), scratch1);
Register pcReg = LoadBytecodePC(masm, scratch1);
masm.load8ZeroExtend(Address(pcReg, 0), scratch1);
// Jump to table[op].
{
@ -6944,17 +7018,23 @@ bool BaselineInterpreterGenerator::emitInterpreterLoop() {
frame.bumpInterpreterICEntry();
}
// Bump frame->interpreterPC, keep pc in PCRegAtStart.
masm.loadPtr(pcAddr, PCRegAtStart);
masm.addPtr(Imm32(opLength), PCRegAtStart);
masm.storePtr(PCRegAtStart, pcAddr);
// Bump bytecode PC.
if (HasInterpreterPCReg()) {
MOZ_ASSERT(InterpreterPCRegAtDispatch == InterpreterPCReg);
masm.addPtr(Imm32(opLength), InterpreterPCReg);
} else {
MOZ_ASSERT(InterpreterPCRegAtDispatch == scratch1);
masm.loadPtr(frame.addressOfInterpreterPC(), InterpreterPCRegAtDispatch);
masm.addPtr(Imm32(opLength), InterpreterPCRegAtDispatch);
masm.storePtr(InterpreterPCRegAtDispatch, frame.addressOfInterpreterPC());
}
if (!emitDebugTrap()) {
return false;
}
// Load the opcode, jump to table[op].
masm.load8ZeroExtend(Address(PCRegAtStart, 0), scratch1);
masm.load8ZeroExtend(Address(InterpreterPCRegAtDispatch, 0), scratch1);
CodeOffset label = masm.movWithPatch(ImmWord(uintptr_t(-1)), scratch2);
if (!tableLabels_.append(label)) {
return false;
@ -6979,6 +7059,19 @@ bool BaselineInterpreterGenerator::emitInterpreterLoop() {
OPCODE_LIST(EMIT_OP)
#undef EMIT_OP
// External entry point to start interpreting bytecode ops. This is used for
// things like exception handling and OSR.
masm.bind(handler.interpretOpLabel());
interpretOpOffset_ = masm.currentOffset();
restoreInterpreterPCReg();
masm.jump(handler.interpretOpWithPCRegLabel());
// Second external entry point: this skips the debug trap for the first op
// and is used by OSR.
interpretOpNoDebugTrapOffset_ = masm.currentOffset();
restoreInterpreterPCReg();
masm.jump(&interpretOpAfterDebugTrap);
// Emit code for JSOP_UNUSED* ops.
Label invalidOp;
masm.bind(&invalidOp);
@ -7029,16 +7122,18 @@ void BaselineInterpreterGenerator::emitOutOfLineCodeCoverageInstrumentation() {
masm.pushReturnAddress();
#endif
saveInterpreterPCReg();
masm.Push(BaselineFrameReg);
masm.Push(PCRegAtStart);
masm.setupUnalignedABICall(R0.scratchReg());
masm.loadBaselineFramePtr(BaselineFrameReg, R0.scratchReg());
masm.passABIArg(R0.scratchReg());
masm.passABIArg(PCRegAtStart);
Register pcReg = LoadBytecodePC(masm, R2.scratchReg());
masm.passABIArg(pcReg);
masm.callWithABI(JS_FUNC_TO_DATA_PTR(void*, jit::HandleCodeCoverageAtPC));
masm.Pop(PCRegAtStart);
masm.Pop(BaselineFrameReg);
restoreInterpreterPCReg();
masm.ret();
}
@ -7103,11 +7198,12 @@ bool BaselineInterpreterGenerator::generate(BaselineInterpreter& interpreter) {
vtune::MarkStub(code, "BaselineInterpreter");
#endif
interpreter.init(
code, interpretOpOffset_, profilerEnterFrameToggleOffset_.offset(),
profilerExitFrameToggleOffset_.offset(),
handler.debuggeeCheckOffset().offset(), std::move(debugTrapOffsets_),
std::move(handler.codeCoverageOffsets()));
interpreter.init(code, interpretOpOffset_, interpretOpNoDebugTrapOffset_,
profilerEnterFrameToggleOffset_.offset(),
profilerExitFrameToggleOffset_.offset(),
handler.debuggeeCheckOffset().offset(),
std::move(debugTrapOffsets_),
std::move(handler.codeCoverageOffsets()));
}
if (cx->runtime()->geckoProfiler().enabled()) {
@ -7128,7 +7224,9 @@ JitCode* JitRuntime::generateDebugTrapHandler(JSContext* cx,
AllocatableGeneralRegisterSet regs(GeneralRegisterSet::All());
regs.takeUnchecked(BaselineFrameReg);
regs.takeUnchecked(ICStubReg);
regs.takeUnchecked(PCRegAtStart);
if (HasInterpreterPCReg()) {
regs.takeUnchecked(InterpreterPCReg);
}
#ifdef JS_CODEGEN_ARM
regs.takeUnchecked(BaselineSecondScratchReg);
masm.setSecondScratchReg(BaselineSecondScratchReg);
@ -7151,6 +7249,13 @@ JitCode* JitRuntime::generateDebugTrapHandler(JSContext* cx,
&hasDebugScript);
masm.abiret();
masm.bind(&hasDebugScript);
if (HasInterpreterPCReg()) {
// Update frame's bytecode pc because the debugger depends on it.
Address pcAddr(BaselineFrameReg,
BaselineFrame::reverseOffsetOfInterpreterPC());
masm.storePtr(InterpreterPCReg, pcAddr);
}
}
// Load the return address in scratch1.
@ -7185,7 +7290,7 @@ JitCode* JitRuntime::generateDebugTrapHandler(JSContext* cx,
// We have to reload the bytecode pc register.
Address pcAddr(BaselineFrameReg,
BaselineFrame::reverseOffsetOfInterpreterPC());
masm.loadPtr(pcAddr, PCRegAtStart);
masm.loadPtr(pcAddr, InterpreterPCRegAtDispatch);
}
masm.abiret();

View File

@ -337,6 +337,9 @@ class BaselineCodeGen {
// Loads the current JSScript* in dest.
void loadScript(Register dest);
void saveInterpreterPCReg();
void restoreInterpreterPCReg();
// Subtracts |script->nslots() * sizeof(Value)| from reg.
void subtractScriptSlotsSize(Register reg, Register scratch);
@ -667,7 +670,16 @@ class BaselineCompiler final : private BaselineCompilerCodeGen {
// Interface used by BaselineCodeGen for BaselineInterpreterGenerator.
class BaselineInterpreterHandler {
InterpreterFrameInfo frame_;
// Entry point to start interpreting a bytecode op. No registers are live. PC
// is loaded from the frame.
Label interpretOp_;
// Like interpretOp_ but at this point the PC is expected to be in
// InterpreterPCReg.
Label interpretOpWithPCReg_;
// Offset of toggled jump for prologue debugger instrumentation.
CodeOffset debuggeeCheckOffset_;
// Offsets of toggled jumps for code coverage instrumentation.
@ -684,6 +696,8 @@ class BaselineInterpreterHandler {
InterpreterFrameInfo& frame() { return frame_; }
Label* interpretOpLabel() { return &interpretOp_; }
Label* interpretOpWithPCRegLabel() { return &interpretOpWithPCReg_; }
Label* codeCoverageAtPrologueLabel() { return &codeCoverageAtPrologueLabel_; }
Label* codeCoverageAtPCLabel() { return &codeCoverageAtPCLabel_; }
@ -732,6 +746,9 @@ class BaselineInterpreterGenerator final : private BaselineInterpreterCodeGen {
// Offset of the code to start interpreting a bytecode op.
uint32_t interpretOpOffset_ = 0;
// Like interpretOpOffset_ but skips the debug trap for the current op.
uint32_t interpretOpNoDebugTrapOffset_ = 0;
public:
explicit BaselineInterpreterGenerator(JSContext* cx);

View File

@ -150,13 +150,10 @@ JitExecStatus jit::EnterBaselineAtBranch(JSContext* cx, InterpreterFrame* fp,
data.jitcode += MacroAssembler::ToggledCallSize(data.jitcode);
}
} else {
// As above, use the entry point that skips the debug trap.
const BaselineInterpreter& interp =
cx->runtime()->jitRuntime()->baselineInterpreter();
data.jitcode = interp.interpretOpAddr().value;
if (fp->isDebuggee()) {
// Skip the debug trap emitted by emitInterpreterLoop.
data.jitcode += MacroAssembler::ToggledCallSize(data.jitcode);
}
data.jitcode = interp.interpretOpNoDebugTrapAddr().value;
}
// Note: keep this in sync with SetEnterJitData.
@ -1107,6 +1104,7 @@ void jit::ToggleBaselineTraceLoggerEngine(JSRuntime* runtime, bool enable) {
#endif
void BaselineInterpreter::init(JitCode* code, uint32_t interpretOpOffset,
uint32_t interpretOpNoDebugTrapOffset,
uint32_t profilerEnterToggleOffset,
uint32_t profilerExitToggleOffset,
uint32_t debuggeeCheckOffset,
@ -1114,6 +1112,7 @@ void BaselineInterpreter::init(JitCode* code, uint32_t interpretOpOffset,
CodeOffsetVector&& codeCoverageOffsets) {
code_ = code;
interpretOpOffset_ = interpretOpOffset;
interpretOpNoDebugTrapOffset_ = interpretOpNoDebugTrapOffset;
profilerEnterToggleOffset_ = profilerEnterToggleOffset;
profilerExitToggleOffset_ = profilerExitToggleOffset;
debuggeeCheckOffset_ = debuggeeCheckOffset;

View File

@ -631,6 +631,9 @@ class BaselineInterpreter {
// Offset of the code to start interpreting a bytecode op.
uint32_t interpretOpOffset_ = 0;
// Like interpretOpOffset_ but skips the debug trap for the current op.
uint32_t interpretOpNoDebugTrapOffset_ = 0;
// The offsets for the toggledJump instructions for profiler instrumentation.
uint32_t profilerEnterToggleOffset_ = 0;
uint32_t profilerExitToggleOffset_ = 0;
@ -654,6 +657,7 @@ class BaselineInterpreter {
void operator=(const BaselineInterpreter&) = delete;
void init(JitCode* code, uint32_t interpretOpOffset,
uint32_t interpretOpNoDebugTrapOffset,
uint32_t profilerEnterToggleOffset,
uint32_t profilerExitToggleOffset, uint32_t debuggeeCheckOffset,
CodeOffsetVector&& debugTrapOffsets,
@ -664,6 +668,9 @@ class BaselineInterpreter {
TrampolinePtr interpretOpAddr() const {
return TrampolinePtr(codeRaw() + interpretOpOffset_);
}
TrampolinePtr interpretOpNoDebugTrapAddr() const {
return TrampolinePtr(codeRaw() + interpretOpNoDebugTrapOffset_);
}
void toggleProfilerInstrumentation(bool enable);
void toggleDebuggerInstrumentation(bool enable);

View File

@ -185,6 +185,8 @@ static constexpr Register WasmTableCallIndexReg = ABINonArgReg3;
static constexpr Register PreBarrierReg = r1;
static constexpr Register InterpreterPCReg = r9;
static constexpr Register InvalidReg{Registers::invalid_reg};
static constexpr FloatRegister InvalidFloatReg;

View File

@ -69,6 +69,8 @@ static constexpr Register CallTempReg5{Registers::x14};
static constexpr Register PreBarrierReg{Registers::x1};
static constexpr Register InterpreterPCReg{Registers::x9};
static constexpr Register ReturnReg{Registers::x0};
static constexpr Register64 ReturnReg64(ReturnReg);
static constexpr Register JSReturnReg{Registers::x2};

View File

@ -37,6 +37,7 @@ struct ScratchDoubleScope : FloatRegister {
static constexpr Register OsrFrameReg{Registers::invalid_reg};
static constexpr Register PreBarrierReg{Registers::invalid_reg};
static constexpr Register InterpreterPCReg{Registers::invalid_reg};
static constexpr Register CallTempReg0{Registers::invalid_reg};
static constexpr Register CallTempReg1{Registers::invalid_reg};
static constexpr Register CallTempReg2{Registers::invalid_reg};

View File

@ -235,6 +235,8 @@ static constexpr Register OsrFrameReg = IntArgReg3;
static constexpr Register PreBarrierReg = rdx;
static constexpr Register InterpreterPCReg = r14;
static constexpr uint32_t ABIStackAlignment = 16;
static constexpr uint32_t CodeAlignment = 16;
static constexpr uint32_t JitStackAlignment = 16;

View File

@ -129,6 +129,9 @@ static constexpr Register WasmTableCallIndexReg = ABINonArgReg3;
static constexpr Register OsrFrameReg = edx;
static constexpr Register PreBarrierReg = edx;
// Not enough registers for a PC register (R0-R2 use 2 registers each).
static constexpr Register InterpreterPCReg = InvalidReg;
// Registerd used in RegExpMatcher instruction (do not use JSReturnOperand).
static constexpr Register RegExpMatcherRegExpReg = CallTempReg0;
static constexpr Register RegExpMatcherStringReg = CallTempReg1;