This makes it easier to read these flags from JIT code. The patch also splits
them in MutableFlags and ImmutableFlags, this should let us simplify XDR and
CopyScript in the future.
Differential Revision: https://phabricator.services.mozilla.com/D10735
--HG--
extra : moz-landing-system : lando
Also uses JS_DefinePropertyById instead of JS_SetPropertyById because it's more natural.
Differential Revision: https://phabricator.services.mozilla.com/D11094
--HG--
extra : moz-landing-system : lando
This change reduces the binary size on macOS x64 by around 50KB.
Here's a diff of the impact on the code generated for Attr_Binding::get_specified
in the Mac build. It's a bit hard to read because %r12 and %rbx swap their
function, but what happens in this method is that "movq %r12, %rcx" goes
away, and the two instructions "leal 0x1(%r12) %eax" and
"movl %eax, 0x10(%rbx)" turn into an "incl 0x10(%r12)".
So the old code was preserving the original value of profilingStack->stackPointer
in a register, and then using it later to compute the incremented stackPointer.
The new code uses an "incl" instruction for the stackPointer increment and
doesn't worry that the stackPointer value might have changed since the stack
size check at the start of the function. (It can't have changed.)
before: %rbx has the ProfilingStack*, %r12 has profilingStack->stackPointer
after: %r12 has the ProfilingStack*, %rbx has profilingStack->stackPointer
@@ -3,37 +3,35 @@
movq %rsp, %rbp
pushq %r15
pushq %r14
pushq %r12
pushq %rbx
subq $0x10, %rsp
movq %rcx, %r14
movq %rdx, %r15
- movq 0x80(%rdi), %rbx
- movq %rbx, -40(%rbp)
- testq %rbx, %rbx
+ movq 0x80(%rdi), %r12
+ movq %r12, -40(%rbp)
+ testq %r12, %r12
je loc_xxxxx
- movl 0x10(%rbx), %r12d
- cmpl (%rbx), %r12d
+ movl 0x10(%r12), %ebx
+ cmpl (%r12), %ebx
jae loc_xxxxx
- movq 0x8(%rbx), %rax
- movq %r12, %rcx
- shlq $0x5, %rcx
- leaq aAttr, %rdx ; "Attr"
- movq %rdx, (%rax,%rcx)
- leaq aSpecified, %rdx ; "specified"
- movq %rdx, 0x8(%rax,%rcx)
- leaq -40(%rbp), %rdx
- movq %rdx, 0x10(%rax,%rcx)
- movl $0x3a1, 0x1c(%rax,%rcx)
- leal 0x1(%r12), %eax
- movl %eax, 0x10(%rbx)
+ movq 0x8(%r12), %rax
+ shlq $0x5, %rbx
+ leaq aAttr, %rcx ; "Attr"
+ movq %rcx, (%rax,%rbx)
+ leaq aSpecified, %rcx ; "specified"
+ movq %rcx, 0x8(%rax,%rbx)
+ leaq -40(%rbp), %rcx
+ movq %rcx, 0x10(%rax,%rbx)
+ movl $0x3a1, 0x1c(%rax,%rbx)
+ incl 0x10(%r12)
movq %r15, %rdi
call __ZNK7mozilla3dom4Attr9SpecifiedEv ; mozilla::dom::Attr::Specified() const
movzxl %al, %eax
movabsq $0xfff9000000000000, %rcx
orq %rax, %rcx
movq %rcx, (%r14)
movq -40(%rbp), %rax
@@ -47,11 +45,11 @@
popq %rbx
popq %r12
popq %r14
popq %r15
popq %rbp
ret
; endp
- movq %rbx, %rdi
+ movq %r12, %rdi
call __ZN14ProfilingStack18ensureCapacitySlowEv ; ProfilingStack::ensureCapacitySlow()
jmp loc_xxxxx
Depends on D9205
Differential Revision: https://phabricator.services.mozilla.com/D9206
--HG--
extra : moz-landing-system : lando
These flags will be used by WebIDL APIs in an upcoming patch.
Depends on D9199
Differential Revision: https://phabricator.services.mozilla.com/D9203
--HG--
extra : moz-landing-system : lando
They were not displayed in the UI, and the instructions to initialize the line
field of a stack frame increased code size unnecessarily.
This change reduces the binary size on Linux x64 by around 100KB.
Here's a diff of the impact on the code generated for Attr_Binding::get_specified
in the Mac build:
@@ -20,17 +20,16 @@
movq 0x8(%rbx), %rax
movq %r12, %rcx
shlq $0x5, %rcx
leaq aGetAttrspecifi, %rdx ; "get Attr.specified"
movq %rdx, (%rax,%rcx)
movq $0x0, 0x8(%rax,%rcx)
leaq -40(%rbp), %rdx
movq %rdx, 0x10(%rax,%rcx)
- movl $0x106, 0x18(%rax,%rcx)
movl $0x1c, 0x1c(%rax,%rcx)
leal 0x1(%r12), %eax
movl %eax, 0x10(%rbx)
movq %r15, %rdi
call __ZNK7mozilla3dom4Attr9SpecifiedEv ; mozilla::dom::Attr::Specified() const
movzxl %al, %eax
movabsq $0xfff9000000000000, %rcx
Depends on D9193
Differential Revision: https://phabricator.services.mozilla.com/D9195
--HG--
extra : moz-landing-system : lando
This eliminates a few instructions from each inlined instance of
AutoProfilerLabel because we no longer need to handle allocation failure in the
inlined code.
I think this allocation should be fine to make infallible: The allocation size
is limited by the thread's stack depth, and we only hit this code path when the
stack is the deepest it's ever been during the thread's life time.
This change reduces the binary size on Linux x64 by around 100KB.
Here's a diff of the impact on the code generated for Attr_Binding::get_specified
in the Mac build, it really just eliminates one test and one jump at the very end
of the method:
@@ -9,30 +9,29 @@
movq %rcx, %r14
movq %rdx, %r15
movq 0x80(%rdi), %rbx
movq %rbx, -40(%rbp)
testq %rbx, %rbx
je loc_xxxxx
movl 0x10(%rbx), %r12d
- cmpl %r12d, (%rbx)
- jbe loc_xxxxx
+ cmpl (%rbx), %r12d
+ jae loc_xxxxx
movq 0x8(%rbx), %rax
movq %r12, %rcx
shlq $0x5, %rcx
leaq aGetAttrspecifi, %rdx ; "get Attr.specified"
movq %rdx, (%rax,%rcx)
movq $0x0, 0x8(%rax,%rcx)
leaq -40(%rbp), %rdx
movq %rdx, 0x10(%rax,%rcx)
movl $0x106, 0x18(%rax,%rcx)
movl $0x1c, 0x1c(%rax,%rcx)
-
leal 0x1(%r12), %eax
movl %eax, 0x10(%rbx)
movq %r15, %rdi
call __ZNK7mozilla3dom4Attr9SpecifiedEv ; mozilla::dom::Attr::Specified() const
movzxl %al, %eax
movabsq $0xfff9000000000000, %rcx
orq %rax, %rcx
@@ -50,12 +49,9 @@
popq %r14
popq %r15
popq %rbp
ret
; endp
movq %rbx, %rdi
call __ZN14ProfilingStack18ensureCapacitySlowEv ; ProfilingStack::ensureCapacitySlow()
- testb %al, %al
- jne loc_xxxxx
-
jmp loc_xxxxx
Depends on D9192
Differential Revision: https://phabricator.services.mozilla.com/D9193
--HG--
extra : moz-landing-system : lando
This eliminates a few instructions from every profiler label and saves code size.
We have around 9000 WebIDL constructors + methods + getters + setters which all
have an inlined instance of this code.
This change reduces the binary size on Linux x64 by around 160KB.
Here's a diff of the impact on the code generated for Attr_Binding::get_specified
in the Mac build:
movq %rsp, %rbp
pushq %r15
pushq %r14
pushq %r12
pushq %rbx
subq $0x10, %rsp
movq %rcx, %r14
movq %rdx, %r15
- movq __ZN7mozilla8profiler6detail12RacyFeatures18sActiveAndFeaturesE@GOT, %rax ; __ZN7mozilla8profiler6detail12RacyFeatures18sActiveAndFeaturesE@GOT
- movl (%rax), %eax
- testl %eax, %eax
- js loc_xxxxx
-
- movq $0x0, -40(%rbp)
- jmp loc_xxxxx
-
- movq 0x78(%rdi), %rbx
+ movq 0x80(%rdi), %rbx
movq %rbx, -40(%rbp)
testq %rbx, %rbx
je loc_xxxxx
movl 0x10(%rbx), %r12d
cmpl %r12d, (%rbx)
jbe loc_xxxxx
Differential Revision: https://phabricator.services.mozilla.com/D9192
--HG--
extra : moz-landing-system : lando
- Add JOF_ARGC, JOF_RESUMEINDEX instead of bare integer tags
- Fix some inconsistencies of JOF_ type and opcode size
- Reorganize JOF_ type list
Differential Revision: https://phabricator.services.mozilla.com/D10567
--HG--
extra : moz-landing-system : lando
This also reorders the remaining members of class ReadableStream a bit.
Depends on D10435
Differential Revision: https://phabricator.services.mozilla.com/D10436
--HG--
extra : moz-landing-system : lando
In passing, change ReadableStreamControllerPullSteps to use explicit downcasts,
so it's a little more obvious what's going on.
Depends on D10430
Differential Revision: https://phabricator.services.mozilla.com/D10431
--HG--
extra : moz-landing-system : lando
This commit changes ReadableStreamReaderGenericRelease to use
UnwrapInternalSlot when accessing reader.[[closedPromise]]. Before this commit,
the code assumed that there would be a promise in the slot, not a wrapper.
I think that was actually correct, because it's impossible for the stream to
still be readable and the reader to have a [[closedPromise]] that's a wrapper.
Still, it's a bit precarious (and I'm still not 100% sure about that), so I just
changed it to unwrap.
Depends on D10374
Differential Revision: https://phabricator.services.mozilla.com/D10428
--HG--
extra : moz-landing-system : lando
Note: JSOP_GOSUB has a use count of 2 even though it doesn't push/pop anything.
This isn't great but was done to match the old behavior (def count of 0, pushed
2 values).
Depends on D10571
Differential Revision: https://phabricator.services.mozilla.com/D10572
--HG--
extra : moz-landing-system : lando
This also reorders the remaining members of class ReadableStream a bit.
Depends on D10435
Differential Revision: https://phabricator.services.mozilla.com/D10436
--HG--
extra : moz-landing-system : lando
In passing, change ReadableStreamControllerPullSteps to use explicit downcasts,
so it's a little more obvious what's going on.
Depends on D10430
Differential Revision: https://phabricator.services.mozilla.com/D10431
--HG--
extra : moz-landing-system : lando
This commit changes ReadableStreamReaderGenericRelease to use
UnwrapInternalSlot when accessing reader.[[closedPromise]]. Before this commit,
the code assumed that there would be a promise in the slot, not a wrapper.
I think that was actually correct, because it's impossible for the stream to
still be readable and the reader to have a [[closedPromise]] that's a wrapper.
Still, it's a bit precarious (and I'm still not 100% sure about that), so I just
changed it to unwrap.
Depends on D10374
Differential Revision: https://phabricator.services.mozilla.com/D10428
--HG--
extra : moz-landing-system : lando
Switch over to mozilla::TimeStamp for timing. This should use clock_gettime, mach_absolute_time, ,and QueryPerformanceCounter timing implementations on Linux, OSX, and Windows which are all very efficient. It increases overhead a little bit, but allows for synchronization across cores and the profiler, and is portable. Longer term solution should be to move over to rdtscp for timing.
Differential Revision: https://phabricator.services.mozilla.com/D10588
--HG--
extra : moz-landing-system : lando
The arguments to the intrinsics here are not guaranteed to be int32s,
however, if a double enters the intrinsic, that's probably a bug in our
builtin javascript. Values should be converted to int32 before being
passed in, with `|0`.
Differential Revision: https://phabricator.services.mozilla.com/D10573
--HG--
extra : moz-landing-system : lando
This also, circumstantially, cleans up an ugly bug in the attachment logic
in the Binary and Compare IR generators where we would never update the
IC state.
Differential Revision: https://phabricator.services.mozilla.com/D10317
--HG--
extra : rebase_source : 109ed6d6b4332724f92fbde868076f5c7075c2c1
This reorganizes the JSScript constructor sequence into the following layers and responsibilities:
JSScript(...) The actual C++ constructor putting script in
GC-safe state.
JSScript::New(...) Performs allocation within GC.
JSScript::Create(...) Common initialization for all script
initialization pathways.
This is more in-line with how other GC-things are created. The bitfields
saved from compile options are now done at the JSScript::Create level.
Differential Revision: https://phabricator.services.mozilla.com/D9946
--HG--
extra : moz-landing-system : lando
This will later be used to store variable length data that hangs off of
each JSScript. This primarily is a refactor of existing JSScript code to
put in its own data structure.
- ScopeArray and friends now store offsets instead of pointers. This
saves memory on 64-bit platforms and simplifies cloning.
- GCPtr constructors are used instead of relying on pod_calloc. This
fixes C++ object-model violations.
- A packed bitfield is used to locate optional array headers instead of
previous daisy-chain approach. This also lets js::PrivateScriptData
understand array layout without coupling to JSScript.
The arguments to the intrinsics here are not guaranteed to be int32s,
however, if a double enters the intrinsic, that's probably a bug in our
builtin javascript. Values should be converted to int32 before being
passed in, with `|0`.
Differential Revision: https://phabricator.services.mozilla.com/D8446
--HG--
extra : moz-landing-system : lando
Make off-thread and main-thread parsing more consistent by using the
JSContext tempLifoAlloc always. This also makes BytecodeCompiler APIs
more consistent.
Remove BytecodeCompiler::alloc which was obscuring lifetimes and
conflicts of allocator.
Differential Revision: https://phabricator.services.mozilla.com/D9978
--HG--
extra : moz-landing-system : lando
Move LifoAllocScope out of BytecodeParser to avoid ordering issues that
may arise when LifoAllocScope is wrapped.
Differential Revision: https://phabricator.services.mozilla.com/D9977
--HG--
extra : moz-landing-system : lando
Directly construct a LifoAllocScope on cx->tempLifoAlloc inside
RegExpObject::create. The data allocated temporarily does not outlive
this function. Also simplify callers as a result.
Differential Revision: https://phabricator.services.mozilla.com/D9976
--HG--
extra : moz-landing-system : lando
ToggledCallSize() on ARM64 did not properly account for all the
possible places where a constant pool could be found; also, it can be
updated to use better abstractions.
WritePoolHeader() on ARM64 did not generate a correct header because
it used sizeof(Instructions), which is meaningless on this platform,
as Instruction has no data fields. Use kInstructionSize instead,
that's what it's for, and everyone else uses it.
This was only ever useful before we had compartment-based security isolation.
Now it is just a pervasive nuisance.
Differential Revision: https://phabricator.services.mozilla.com/D10795
--HG--
extra : rebase_source : 9af37bbd959b2db3d7a6c5ac723a474c47676f79
This is the equivalent of the rustc-workspace-hack used by the rust build to
ensure cargo and RLS see the same set of features for dependencies so that
these dependencies may be reused by invocations of cargo for these two
projects. The trivial crate added specifies the union of the set of
features activated for a particular crate for each time it appears in the
dependency tree so that cargo will understand these dependencies to be
re-usable across cargo implementations. This eliminates re-building jsrust
and some of its dependencies twice, and reduces the number of crates compiled
in the tree by about 90 in testing on linux.
Differential Revision: https://phabricator.services.mozilla.com/D9041
+ Comment currentStartChunk_ in more detail
+ Remove misplaced comment about allocation during maybeResizeNursery, this
code no-longer does any allocation.
--HG--
extra : source : 2e1fdb726acdb11bc62301693d1a7fce82ae64db
extra : histedit_source : 241bf679544db318bea08086f88b8bce36f92e41
Add a new function for a commonly accesses structure.
--HG--
extra : source : fd1d0c5f34ec8402895db683a163d3c1b26bc2b5
extra : histedit_source : c9335f652332b5462c47c60a0ab49f76ef5a61f7
This change reduces the binary size on macOS x64 by around 50KB.
Here's a diff of the impact on the code generated for Attr_Binding::get_specified
in the Mac build. It's a bit hard to read because %r12 and %rbx swap their
function, but what happens in this method is that "movq %r12, %rcx" goes
away, and the two instructions "leal 0x1(%r12) %eax" and
"movl %eax, 0x10(%rbx)" turn into an "incl 0x10(%r12)".
So the old code was preserving the original value of profilingStack->stackPointer
in a register, and then using it later to compute the incremented stackPointer.
The new code uses an "incl" instruction for the stackPointer increment and
doesn't worry that the stackPointer value might have changed since the stack
size check at the start of the function. (It can't have changed.)
before: %rbx has the ProfilingStack*, %r12 has profilingStack->stackPointer
after: %r12 has the ProfilingStack*, %rbx has profilingStack->stackPointer
@@ -3,37 +3,35 @@
movq %rsp, %rbp
pushq %r15
pushq %r14
pushq %r12
pushq %rbx
subq $0x10, %rsp
movq %rcx, %r14
movq %rdx, %r15
- movq 0x80(%rdi), %rbx
- movq %rbx, -40(%rbp)
- testq %rbx, %rbx
+ movq 0x80(%rdi), %r12
+ movq %r12, -40(%rbp)
+ testq %r12, %r12
je loc_xxxxx
- movl 0x10(%rbx), %r12d
- cmpl (%rbx), %r12d
+ movl 0x10(%r12), %ebx
+ cmpl (%r12), %ebx
jae loc_xxxxx
- movq 0x8(%rbx), %rax
- movq %r12, %rcx
- shlq $0x5, %rcx
- leaq aAttr, %rdx ; "Attr"
- movq %rdx, (%rax,%rcx)
- leaq aSpecified, %rdx ; "specified"
- movq %rdx, 0x8(%rax,%rcx)
- leaq -40(%rbp), %rdx
- movq %rdx, 0x10(%rax,%rcx)
- movl $0x3a1, 0x1c(%rax,%rcx)
- leal 0x1(%r12), %eax
- movl %eax, 0x10(%rbx)
+ movq 0x8(%r12), %rax
+ shlq $0x5, %rbx
+ leaq aAttr, %rcx ; "Attr"
+ movq %rcx, (%rax,%rbx)
+ leaq aSpecified, %rcx ; "specified"
+ movq %rcx, 0x8(%rax,%rbx)
+ leaq -40(%rbp), %rcx
+ movq %rcx, 0x10(%rax,%rbx)
+ movl $0x3a1, 0x1c(%rax,%rbx)
+ incl 0x10(%r12)
movq %r15, %rdi
call __ZNK7mozilla3dom4Attr9SpecifiedEv ; mozilla::dom::Attr::Specified() const
movzxl %al, %eax
movabsq $0xfff9000000000000, %rcx
orq %rax, %rcx
movq %rcx, (%r14)
movq -40(%rbp), %rax
@@ -47,11 +45,11 @@
popq %rbx
popq %r12
popq %r14
popq %r15
popq %rbp
ret
; endp
- movq %rbx, %rdi
+ movq %r12, %rdi
call __ZN14ProfilingStack18ensureCapacitySlowEv ; ProfilingStack::ensureCapacitySlow()
jmp loc_xxxxx
Depends on D9205
Differential Revision: https://phabricator.services.mozilla.com/D9206
--HG--
extra : moz-landing-system : lando
These flags will be used by WebIDL APIs in an upcoming patch.
Depends on D9199
Differential Revision: https://phabricator.services.mozilla.com/D9203
--HG--
extra : moz-landing-system : lando
They were not displayed in the UI, and the instructions to initialize the line
field of a stack frame increased code size unnecessarily.
This change reduces the binary size on Linux x64 by around 100KB.
Here's a diff of the impact on the code generated for Attr_Binding::get_specified
in the Mac build:
@@ -20,17 +20,16 @@
movq 0x8(%rbx), %rax
movq %r12, %rcx
shlq $0x5, %rcx
leaq aGetAttrspecifi, %rdx ; "get Attr.specified"
movq %rdx, (%rax,%rcx)
movq $0x0, 0x8(%rax,%rcx)
leaq -40(%rbp), %rdx
movq %rdx, 0x10(%rax,%rcx)
- movl $0x106, 0x18(%rax,%rcx)
movl $0x1c, 0x1c(%rax,%rcx)
leal 0x1(%r12), %eax
movl %eax, 0x10(%rbx)
movq %r15, %rdi
call __ZNK7mozilla3dom4Attr9SpecifiedEv ; mozilla::dom::Attr::Specified() const
movzxl %al, %eax
movabsq $0xfff9000000000000, %rcx
Depends on D9193
Differential Revision: https://phabricator.services.mozilla.com/D9195
--HG--
extra : moz-landing-system : lando
This eliminates a few instructions from each inlined instance of
AutoProfilerLabel because we no longer need to handle allocation failure in the
inlined code.
I think this allocation should be fine to make infallible: The allocation size
is limited by the thread's stack depth, and we only hit this code path when the
stack is the deepest it's ever been during the thread's life time.
This change reduces the binary size on Linux x64 by around 100KB.
Here's a diff of the impact on the code generated for Attr_Binding::get_specified
in the Mac build, it really just eliminates one test and one jump at the very end
of the method:
@@ -9,30 +9,29 @@
movq %rcx, %r14
movq %rdx, %r15
movq 0x80(%rdi), %rbx
movq %rbx, -40(%rbp)
testq %rbx, %rbx
je loc_xxxxx
movl 0x10(%rbx), %r12d
- cmpl %r12d, (%rbx)
- jbe loc_xxxxx
+ cmpl (%rbx), %r12d
+ jae loc_xxxxx
movq 0x8(%rbx), %rax
movq %r12, %rcx
shlq $0x5, %rcx
leaq aGetAttrspecifi, %rdx ; "get Attr.specified"
movq %rdx, (%rax,%rcx)
movq $0x0, 0x8(%rax,%rcx)
leaq -40(%rbp), %rdx
movq %rdx, 0x10(%rax,%rcx)
movl $0x106, 0x18(%rax,%rcx)
movl $0x1c, 0x1c(%rax,%rcx)
-
leal 0x1(%r12), %eax
movl %eax, 0x10(%rbx)
movq %r15, %rdi
call __ZNK7mozilla3dom4Attr9SpecifiedEv ; mozilla::dom::Attr::Specified() const
movzxl %al, %eax
movabsq $0xfff9000000000000, %rcx
orq %rax, %rcx
@@ -50,12 +49,9 @@
popq %r14
popq %r15
popq %rbp
ret
; endp
movq %rbx, %rdi
call __ZN14ProfilingStack18ensureCapacitySlowEv ; ProfilingStack::ensureCapacitySlow()
- testb %al, %al
- jne loc_xxxxx
-
jmp loc_xxxxx
Depends on D9192
Differential Revision: https://phabricator.services.mozilla.com/D9193
--HG--
extra : moz-landing-system : lando
This eliminates a few instructions from every profiler label and saves code size.
We have around 9000 WebIDL constructors + methods + getters + setters which all
have an inlined instance of this code.
This change reduces the binary size on Linux x64 by around 160KB.
Here's a diff of the impact on the code generated for Attr_Binding::get_specified
in the Mac build:
movq %rsp, %rbp
pushq %r15
pushq %r14
pushq %r12
pushq %rbx
subq $0x10, %rsp
movq %rcx, %r14
movq %rdx, %r15
- movq __ZN7mozilla8profiler6detail12RacyFeatures18sActiveAndFeaturesE@GOT, %rax ; __ZN7mozilla8profiler6detail12RacyFeatures18sActiveAndFeaturesE@GOT
- movl (%rax), %eax
- testl %eax, %eax
- js loc_xxxxx
-
- movq $0x0, -40(%rbp)
- jmp loc_xxxxx
-
- movq 0x78(%rdi), %rbx
+ movq 0x80(%rdi), %rbx
movq %rbx, -40(%rbp)
testq %rbx, %rbx
je loc_xxxxx
movl 0x10(%rbx), %r12d
cmpl %r12d, (%rbx)
jbe loc_xxxxx
Differential Revision: https://phabricator.services.mozilla.com/D9192
--HG--
extra : moz-landing-system : lando
This change reduces the binary size on macOS x64 by around 50KB.
Here's a diff of the impact on the code generated for Attr_Binding::get_specified
in the Mac build. It's a bit hard to read because %r12 and %rbx swap their
function, but what happens in this method is that "movq %r12, %rcx" goes
away, and the two instructions "leal 0x1(%r12) %eax" and
"movl %eax, 0x10(%rbx)" turn into an "incl 0x10(%r12)".
So the old code was preserving the original value of profilingStack->stackPointer
in a register, and then using it later to compute the incremented stackPointer.
The new code uses an "incl" instruction for the stackPointer increment and
doesn't worry that the stackPointer value might have changed since the stack
size check at the start of the function. (It can't have changed.)
before: %rbx has the ProfilingStack*, %r12 has profilingStack->stackPointer
after: %r12 has the ProfilingStack*, %rbx has profilingStack->stackPointer
@@ -3,37 +3,35 @@
movq %rsp, %rbp
pushq %r15
pushq %r14
pushq %r12
pushq %rbx
subq $0x10, %rsp
movq %rcx, %r14
movq %rdx, %r15
- movq 0x80(%rdi), %rbx
- movq %rbx, -40(%rbp)
- testq %rbx, %rbx
+ movq 0x80(%rdi), %r12
+ movq %r12, -40(%rbp)
+ testq %r12, %r12
je loc_xxxxx
- movl 0x10(%rbx), %r12d
- cmpl (%rbx), %r12d
+ movl 0x10(%r12), %ebx
+ cmpl (%r12), %ebx
jae loc_xxxxx
- movq 0x8(%rbx), %rax
- movq %r12, %rcx
- shlq $0x5, %rcx
- leaq aAttr, %rdx ; "Attr"
- movq %rdx, (%rax,%rcx)
- leaq aSpecified, %rdx ; "specified"
- movq %rdx, 0x8(%rax,%rcx)
- leaq -40(%rbp), %rdx
- movq %rdx, 0x10(%rax,%rcx)
- movl $0x3a1, 0x1c(%rax,%rcx)
- leal 0x1(%r12), %eax
- movl %eax, 0x10(%rbx)
+ movq 0x8(%r12), %rax
+ shlq $0x5, %rbx
+ leaq aAttr, %rcx ; "Attr"
+ movq %rcx, (%rax,%rbx)
+ leaq aSpecified, %rcx ; "specified"
+ movq %rcx, 0x8(%rax,%rbx)
+ leaq -40(%rbp), %rcx
+ movq %rcx, 0x10(%rax,%rbx)
+ movl $0x3a1, 0x1c(%rax,%rbx)
+ incl 0x10(%r12)
movq %r15, %rdi
call __ZNK7mozilla3dom4Attr9SpecifiedEv ; mozilla::dom::Attr::Specified() const
movzxl %al, %eax
movabsq $0xfff9000000000000, %rcx
orq %rax, %rcx
movq %rcx, (%r14)
movq -40(%rbp), %rax
@@ -47,11 +45,11 @@
popq %rbx
popq %r12
popq %r14
popq %r15
popq %rbp
ret
; endp
- movq %rbx, %rdi
+ movq %r12, %rdi
call __ZN14ProfilingStack18ensureCapacitySlowEv ; ProfilingStack::ensureCapacitySlow()
jmp loc_xxxxx
Depends on D9205
Differential Revision: https://phabricator.services.mozilla.com/D9206
--HG--
extra : moz-landing-system : lando
These flags will be used by WebIDL APIs in an upcoming patch.
Depends on D9199
Differential Revision: https://phabricator.services.mozilla.com/D9203
--HG--
extra : moz-landing-system : lando
They were not displayed in the UI, and the instructions to initialize the line
field of a stack frame increased code size unnecessarily.
This change reduces the binary size on Linux x64 by around 100KB.
Here's a diff of the impact on the code generated for Attr_Binding::get_specified
in the Mac build:
@@ -20,17 +20,16 @@
movq 0x8(%rbx), %rax
movq %r12, %rcx
shlq $0x5, %rcx
leaq aGetAttrspecifi, %rdx ; "get Attr.specified"
movq %rdx, (%rax,%rcx)
movq $0x0, 0x8(%rax,%rcx)
leaq -40(%rbp), %rdx
movq %rdx, 0x10(%rax,%rcx)
- movl $0x106, 0x18(%rax,%rcx)
movl $0x1c, 0x1c(%rax,%rcx)
leal 0x1(%r12), %eax
movl %eax, 0x10(%rbx)
movq %r15, %rdi
call __ZNK7mozilla3dom4Attr9SpecifiedEv ; mozilla::dom::Attr::Specified() const
movzxl %al, %eax
movabsq $0xfff9000000000000, %rcx
Depends on D9193
Differential Revision: https://phabricator.services.mozilla.com/D9195
--HG--
extra : moz-landing-system : lando
This eliminates a few instructions from each inlined instance of
AutoProfilerLabel because we no longer need to handle allocation failure in the
inlined code.
I think this allocation should be fine to make infallible: The allocation size
is limited by the thread's stack depth, and we only hit this code path when the
stack is the deepest it's ever been during the thread's life time.
This change reduces the binary size on Linux x64 by around 100KB.
Here's a diff of the impact on the code generated for Attr_Binding::get_specified
in the Mac build, it really just eliminates one test and one jump at the very end
of the method:
@@ -9,30 +9,29 @@
movq %rcx, %r14
movq %rdx, %r15
movq 0x80(%rdi), %rbx
movq %rbx, -40(%rbp)
testq %rbx, %rbx
je loc_xxxxx
movl 0x10(%rbx), %r12d
- cmpl %r12d, (%rbx)
- jbe loc_xxxxx
+ cmpl (%rbx), %r12d
+ jae loc_xxxxx
movq 0x8(%rbx), %rax
movq %r12, %rcx
shlq $0x5, %rcx
leaq aGetAttrspecifi, %rdx ; "get Attr.specified"
movq %rdx, (%rax,%rcx)
movq $0x0, 0x8(%rax,%rcx)
leaq -40(%rbp), %rdx
movq %rdx, 0x10(%rax,%rcx)
movl $0x106, 0x18(%rax,%rcx)
movl $0x1c, 0x1c(%rax,%rcx)
-
leal 0x1(%r12), %eax
movl %eax, 0x10(%rbx)
movq %r15, %rdi
call __ZNK7mozilla3dom4Attr9SpecifiedEv ; mozilla::dom::Attr::Specified() const
movzxl %al, %eax
movabsq $0xfff9000000000000, %rcx
orq %rax, %rcx
@@ -50,12 +49,9 @@
popq %r14
popq %r15
popq %rbp
ret
; endp
movq %rbx, %rdi
call __ZN14ProfilingStack18ensureCapacitySlowEv ; ProfilingStack::ensureCapacitySlow()
- testb %al, %al
- jne loc_xxxxx
-
jmp loc_xxxxx
Depends on D9192
Differential Revision: https://phabricator.services.mozilla.com/D9193
--HG--
extra : moz-landing-system : lando
This eliminates a few instructions from every profiler label and saves code size.
We have around 9000 WebIDL constructors + methods + getters + setters which all
have an inlined instance of this code.
This change reduces the binary size on Linux x64 by around 160KB.
Here's a diff of the impact on the code generated for Attr_Binding::get_specified
in the Mac build:
movq %rsp, %rbp
pushq %r15
pushq %r14
pushq %r12
pushq %rbx
subq $0x10, %rsp
movq %rcx, %r14
movq %rdx, %r15
- movq __ZN7mozilla8profiler6detail12RacyFeatures18sActiveAndFeaturesE@GOT, %rax ; __ZN7mozilla8profiler6detail12RacyFeatures18sActiveAndFeaturesE@GOT
- movl (%rax), %eax
- testl %eax, %eax
- js loc_xxxxx
-
- movq $0x0, -40(%rbp)
- jmp loc_xxxxx
-
- movq 0x78(%rdi), %rbx
+ movq 0x80(%rdi), %rbx
movq %rbx, -40(%rbp)
testq %rbx, %rbx
je loc_xxxxx
movl 0x10(%rbx), %r12d
cmpl %r12d, (%rbx)
jbe loc_xxxxx
Differential Revision: https://phabricator.services.mozilla.com/D9192
--HG--
extra : moz-landing-system : lando