mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-11-28 15:23:51 +00:00
Bug 1499507 - Allow the compiler to generate a non-atomic increment instruction for the stack pointer increment. r=njn
This change reduces the binary size on macOS x64 by around 50KB. Here's a diff of the impact on the code generated for Attr_Binding::get_specified in the Mac build. It's a bit hard to read because %r12 and %rbx swap their function, but what happens in this method is that "movq %r12, %rcx" goes away, and the two instructions "leal 0x1(%r12) %eax" and "movl %eax, 0x10(%rbx)" turn into an "incl 0x10(%r12)". So the old code was preserving the original value of profilingStack->stackPointer in a register, and then using it later to compute the incremented stackPointer. The new code uses an "incl" instruction for the stackPointer increment and doesn't worry that the stackPointer value might have changed since the stack size check at the start of the function. (It can't have changed.) before: %rbx has the ProfilingStack*, %r12 has profilingStack->stackPointer after: %r12 has the ProfilingStack*, %rbx has profilingStack->stackPointer @@ -3,37 +3,35 @@ movq %rsp, %rbp pushq %r15 pushq %r14 pushq %r12 pushq %rbx subq $0x10, %rsp movq %rcx, %r14 movq %rdx, %r15 - movq 0x80(%rdi), %rbx - movq %rbx, -40(%rbp) - testq %rbx, %rbx + movq 0x80(%rdi), %r12 + movq %r12, -40(%rbp) + testq %r12, %r12 je loc_xxxxx - movl 0x10(%rbx), %r12d - cmpl (%rbx), %r12d + movl 0x10(%r12), %ebx + cmpl (%r12), %ebx jae loc_xxxxx - movq 0x8(%rbx), %rax - movq %r12, %rcx - shlq $0x5, %rcx - leaq aAttr, %rdx ; "Attr" - movq %rdx, (%rax,%rcx) - leaq aSpecified, %rdx ; "specified" - movq %rdx, 0x8(%rax,%rcx) - leaq -40(%rbp), %rdx - movq %rdx, 0x10(%rax,%rcx) - movl $0x3a1, 0x1c(%rax,%rcx) - leal 0x1(%r12), %eax - movl %eax, 0x10(%rbx) + movq 0x8(%r12), %rax + shlq $0x5, %rbx + leaq aAttr, %rcx ; "Attr" + movq %rcx, (%rax,%rbx) + leaq aSpecified, %rcx ; "specified" + movq %rcx, 0x8(%rax,%rbx) + leaq -40(%rbp), %rcx + movq %rcx, 0x10(%rax,%rbx) + movl $0x3a1, 0x1c(%rax,%rbx) + incl 0x10(%r12) movq %r15, %rdi call __ZNK7mozilla3dom4Attr9SpecifiedEv ; mozilla::dom::Attr::Specified() const movzxl %al, %eax movabsq $0xfff9000000000000, %rcx orq %rax, %rcx movq %rcx, (%r14) movq -40(%rbp), %rax @@ -47,11 +45,11 @@ popq %rbx popq %r12 popq %r14 popq %r15 popq %rbp ret ; endp - movq %rbx, %rdi + movq %r12, %rdi call __ZN14ProfilingStack18ensureCapacitySlowEv ; ProfilingStack::ensureCapacitySlow() jmp loc_xxxxx Depends on D9205 Differential Revision: https://phabricator.services.mozilla.com/D9206 --HG-- extra : moz-landing-system : lando
This commit is contained in:
parent
42d8bddb50
commit
ed18047939
@ -394,12 +394,17 @@ class ProfilingStack final
|
||||
void pushLabelFrame(const char* label, const char* dynamicString, void* sp,
|
||||
js::ProfilingStackFrame::Category category,
|
||||
uint32_t flags = 0) {
|
||||
uint32_t oldStackPointer = stackPointer;
|
||||
// This thread is the only one that ever changes the value of
|
||||
// stackPointer.
|
||||
// Store the value of the atomic in a non-atomic local variable so that
|
||||
// the compiler won't generate two separate loads from the atomic for
|
||||
// the size check and the frames[] array indexing operation.
|
||||
uint32_t stackPointerVal = stackPointer;
|
||||
|
||||
if (MOZ_UNLIKELY(oldStackPointer >= capacity)) {
|
||||
if (MOZ_UNLIKELY(stackPointerVal >= capacity)) {
|
||||
ensureCapacitySlow();
|
||||
}
|
||||
frames[oldStackPointer].initLabelFrame(label, dynamicString, sp,
|
||||
frames[stackPointerVal].initLabelFrame(label, dynamicString, sp,
|
||||
category, flags);
|
||||
|
||||
// This must happen at the end! The compiler will not reorder this
|
||||
@ -408,9 +413,10 @@ class ProfilingStack final
|
||||
// Do the read and the write as two separate statements, in order to
|
||||
// make it clear that we don't need an atomic increment, which would be
|
||||
// more expensive on x86 than the separate operations done here.
|
||||
// This thread is the only one that ever changes the value of
|
||||
// stackPointer.
|
||||
stackPointer = oldStackPointer + 1;
|
||||
// However, don't use stackPointerVal here; instead, allow the compiler
|
||||
// to turn this store into a non-atomic increment instruction which
|
||||
// takes up less code size.
|
||||
stackPointer = stackPointer + 1;
|
||||
}
|
||||
|
||||
void pushSpMarkerFrame(void* sp) {
|
||||
@ -427,6 +433,8 @@ class ProfilingStack final
|
||||
|
||||
void pushJsFrame(const char* label, const char* dynamicString, JSScript* script,
|
||||
jsbytecode* pc) {
|
||||
// This thread is the only one that ever changes the value of
|
||||
// stackPointer. Only load the atomic once.
|
||||
uint32_t oldStackPointer = stackPointer;
|
||||
|
||||
if (MOZ_UNLIKELY(oldStackPointer >= capacity)) {
|
||||
@ -435,7 +443,7 @@ class ProfilingStack final
|
||||
frames[oldStackPointer].initJsFrame(label, dynamicString, script, pc);
|
||||
|
||||
// This must happen at the end, see the comment in pushLabelFrame.
|
||||
stackPointer = oldStackPointer + 1;
|
||||
stackPointer = stackPointer + 1;
|
||||
}
|
||||
|
||||
void pop() {
|
||||
|
Loading…
Reference in New Issue
Block a user