Backed out 5 changesets (bug 1749665) for causing hazard failures. CLOSED TREE

Backed out changeset 750012d34b73 (bug 1749665) Backed out changeset 7a30186c853a (bug 1749665) Backed out changeset 1942703cc665 (bug 1749665) Backed out changeset 012177f72587 (bug 1749665) Backed out changeset e02c59ccee17 (bug 1749665)
2024-10-16 23:05:42 +00:00 · 2022-01-24 00:05:05 +02:00 · 2022-01-24 00:05:05 +02:00 · 4d6f01e19a
commit 4d6f01e19a
parent f77a9b12f4
21 changed files with 1474 additions and 985 deletions
--- a/config/check_spidermonkey_style.py
+++ b/config/check_spidermonkey_style.py
@ -65,7 +65,6 @@ included_inclnames_to_ignore = set(
        "frontend/smoosh_generated.h",  # generated in $OBJDIR
        "gc/StatsPhasesGenerated.h",  # generated in $OBJDIR
        "gc/StatsPhasesGenerated.inc",  # generated in $OBJDIR
-        "jit/AtomicOperationsGenerated.h",  # generated in $OBJDIR
        "jit/CacheIROpsGenerated.h",  # generated in $OBJDIR
        "jit/LIROpsGenerated.h",  # generated in $OBJDIR
        "jit/MIROpsGenerated.h",  # generated in $OBJDIR
--- a/js/src/jit/AtomicOperations.h
+++ b/js/src/jit/AtomicOperations.h
@ -11,7 +11,6 @@

 #include <string.h>

-#include "jit/AtomicOperationsGenerated.h"
 #include "vm/SharedMem.h"

 namespace js {
@ -65,7 +64,7 @@ namespace jit {
 *
 * It's not a requirement that these functions be inlined; performance
 * is not a great concern.  On some platforms these functions may call
- * functions that use inline assembly.  See GenerateAtomicOperations.py.
+ * out to code that's generated at run time.
 *
 * In principle these functions will not be written in C++, thus
 * making races defined behavior if all racy accesses from C++ go via
@ -150,6 +149,13 @@ class AtomicOperations {
                                         size_t nbytes);

 public:
+  // On some platforms we generate code for the atomics at run-time; that
+  // happens here.
+  static bool Initialize();
+
+  // Deallocate the code segment for generated atomics functions.
+  static void ShutDown();
+
  // Test lock-freedom for any int32 value.  This implements the
  // Atomics::isLockFree() operation in the ECMAScript Shared Memory and
  // Atomics specification, as follows:
@ -341,12 +347,45 @@ constexpr inline bool AtomicOperations::isLockfreeJS(int32_t size) {
 // participate in the memory exclusivity monitors implemented by the simulator.
 // Such a solution is likely to be difficult.

-#ifdef JS_HAVE_GENERATED_ATOMIC_OPS
-#  include "jit/shared/AtomicOperations-shared-jit.h"
-#elif defined(JS_SIMULATOR_MIPS32) || defined(__mips__)
-#  include "jit/mips-shared/AtomicOperations-mips-shared.h"
-#else
+#if defined(JS_SIMULATOR_MIPS32)
+#  if defined(__clang__) || defined(__GNUC__)
+#    include "jit/mips-shared/AtomicOperations-mips-shared.h"
+#  else
+#    error "AtomicOperations on MIPS-32 for unknown compiler"
+#  endif
+#elif defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || \
+    defined(_M_IX86)
+#  if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64)
+#    include "jit/shared/AtomicOperations-shared-jit.h"
+#  else
+#    include "jit/shared/AtomicOperations-feeling-lucky.h"
+#  endif
+#elif defined(__arm__)
+#  if defined(JS_CODEGEN_ARM)
+#    include "jit/shared/AtomicOperations-shared-jit.h"
+#  else
+#    include "jit/shared/AtomicOperations-feeling-lucky.h"
+#  endif
+#elif defined(__aarch64__) || defined(_M_ARM64)
+#  if defined(JS_CODEGEN_ARM64)
+#    include "jit/shared/AtomicOperations-shared-jit.h"
+#  else
+#    include "jit/shared/AtomicOperations-feeling-lucky.h"
+#  endif
+#elif defined(__mips__)
+#  if defined(__clang__) || defined(__GNUC__)
+#    include "jit/mips-shared/AtomicOperations-mips-shared.h"
+#  else
+#    error "AtomicOperations on MIPS for an unknown compiler"
+#  endif
+#elif defined(__ppc__) || defined(__PPC__) || defined(__sparc__) ||     \
+    defined(__ppc64__) || defined(__PPC64__) || defined(__ppc64le__) || \
+    defined(__PPC64LE__) || defined(__alpha__) || defined(__hppa__) ||  \
+    defined(__sh__) || defined(__s390__) || defined(__s390x__) ||       \
+    defined(__m68k__) || defined(__riscv) || defined(__wasi__)
 #  include "jit/shared/AtomicOperations-feeling-lucky.h"
+#else
+#  error "No AtomicOperations support provided for this platform"
 #endif

 #endif  // jit_AtomicOperations_h
--- a/js/src/jit/CacheIRCompiler.cpp
+++ b/js/src/jit/CacheIRCompiler.cpp
@ -8118,8 +8118,6 @@ bool CacheIRCompiler::emitAtomicsLoadResult(ObjOperandId objId,
  // Load the value.
  BaseIndex source(scratch, index, ScaleFromScalarType(elementType));

-  // NOTE: the generated code must match the assembly code in gen_load in
-  // GenerateAtomicOperations.py
  auto sync = Synchronization::Load();

  masm.memoryBarrierBefore(sync);
@ -8170,8 +8168,6 @@ bool CacheIRCompiler::emitAtomicsStoreResult(ObjOperandId objId,
    // Store the value.
    BaseIndex dest(scratch, index, ScaleFromScalarType(elementType));

-    // NOTE: the generated code must match the assembly code in gen_store in
-    // GenerateAtomicOperations.py
    auto sync = Synchronization::Store();

    masm.memoryBarrierBefore(sync);
--- a/js/src/jit/GenerateAtomicOperations.py
+++ b/js/src/jit/GenerateAtomicOperations.py
@ -1,861 +0,0 @@
-# This Source Code Form is subject to the terms of the Mozilla Public
-# License, v. 2.0. If a copy of the MPL was not distributed with this
-# file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-# This script generates jit/AtomicOperationsGenerated.h
-#
-# See the big comment in jit/AtomicOperations.h for an explanation.
-
-import buildconfig
-
-is_64bit = "JS_64BIT" in buildconfig.defines
-cpu_arch = buildconfig.substs["CPU_ARCH"]
-
-
-def fmt_insn(s):
-    return '"' + s + '\\n\\t"\n'
-
-
-def gen_seqcst(fun_name):
-    if cpu_arch in ("x86", "x86_64"):
-        return r"""
-            inline void %(fun_name)s() {
-                asm volatile ("mfence\n\t" ::: "memory");
-            }""" % {
-            "fun_name": fun_name,
-        }
-    if cpu_arch == "aarch64":
-        return r"""
-            inline void %(fun_name)s() {
-                asm volatile ("dmb ish\n\t" ::: "memory");
-            }""" % {
-            "fun_name": fun_name,
-        }
-    if cpu_arch == "arm":
-        return r"""
-            inline void %(fun_name)s() {
-                asm volatile ("dmb sy\n\t" ::: "memory");
-            }""" % {
-            "fun_name": fun_name,
-        }
-    raise Exception("Unexpected arch")
-
-
-def gen_load(fun_name, cpp_type, size, barrier):
-    # NOTE: the assembly code must match the generated code in:
-    # - CacheIRCompiler::emitAtomicsLoadResult
-    # - LIRGenerator::visitLoadUnboxedScalar
-    # - CodeGenerator::visitAtomicLoad64 (on 64-bit platforms)
-    # - MacroAssembler::wasmLoad
-    if cpu_arch in ("x86", "x86_64"):
-        insns = ""
-        if barrier:
-            insns += fmt_insn("mfence")
-        if size == 8:
-            insns += fmt_insn("movb (%[arg]), %[res]")
-        elif size == 16:
-            insns += fmt_insn("movw (%[arg]), %[res]")
-        elif size == 32:
-            insns += fmt_insn("movl (%[arg]), %[res]")
-        else:
-            assert size == 64
-            insns += fmt_insn("movq (%[arg]), %[res]")
-        if barrier:
-            insns += fmt_insn("mfence")
-        return """
-            inline %(cpp_type)s %(fun_name)s(const %(cpp_type)s* arg) {
-                %(cpp_type)s res;
-                asm volatile (%(insns)s
-                    : [res] "=r" (res)
-                    : [arg] "r" (arg)
-                    : "memory");
-                return res;
-            }""" % {
-            "cpp_type": cpp_type,
-            "fun_name": fun_name,
-            "insns": insns,
-        }
-    if cpu_arch == "aarch64":
-        insns = ""
-        if barrier:
-            insns += fmt_insn("dmb ish")
-        if size == 8:
-            insns += fmt_insn("ldrb %w[res], [%x[arg]]")
-        elif size == 16:
-            insns += fmt_insn("ldrh %w[res], [%x[arg]]")
-        elif size == 32:
-            insns += fmt_insn("ldr %w[res], [%x[arg]]")
-        else:
-            assert size == 64
-            insns += fmt_insn("ldr %x[res], [%x[arg]]")
-        if barrier:
-            insns += fmt_insn("dmb ish")
-        return """
-            inline %(cpp_type)s %(fun_name)s(const %(cpp_type)s* arg) {
-                %(cpp_type)s res;
-                asm volatile (%(insns)s
-                    : [res] "=r" (res)
-                    : [arg] "r" (arg)
-                    : "memory");
-                return res;
-            }""" % {
-            "cpp_type": cpp_type,
-            "fun_name": fun_name,
-            "insns": insns,
-        }
-    if cpu_arch == "arm":
-        insns = ""
-        if barrier:
-            insns += fmt_insn("dmb sy")
-        if size == 8:
-            insns += fmt_insn("ldrb %[res], [%[arg]]")
-        elif size == 16:
-            insns += fmt_insn("ldrh %[res], [%[arg]]")
-        else:
-            assert size == 32
-            insns += fmt_insn("ldr %[res], [%[arg]]")
-        if barrier:
-            insns += fmt_insn("dmb sy")
-        return """
-            inline %(cpp_type)s %(fun_name)s(const %(cpp_type)s* arg) {
-                %(cpp_type)s res;
-                asm volatile (%(insns)s
-                    : [res] "=r" (res)
-                    : [arg] "r" (arg)
-                    : "memory");
-                return res;
-            }""" % {
-            "cpp_type": cpp_type,
-            "fun_name": fun_name,
-            "insns": insns,
-        }
-    raise Exception("Unexpected arch")
-
-
-def gen_store(fun_name, cpp_type, size, barrier):
-    # NOTE: the assembly code must match the generated code in:
-    # - CacheIRCompiler::emitAtomicsStoreResult
-    # - LIRGenerator::visitStoreUnboxedScalar
-    # - CodeGenerator::visitAtomicStore64 (on 64-bit platforms)
-    # - MacroAssembler::wasmStore
-    if cpu_arch in ("x86", "x86_64"):
-        insns = ""
-        if barrier:
-            insns += fmt_insn("mfence")
-        if size == 8:
-            insns += fmt_insn("movb %[val], (%[addr])")
-        elif size == 16:
-            insns += fmt_insn("movw %[val], (%[addr])")
-        elif size == 32:
-            insns += fmt_insn("movl %[val], (%[addr])")
-        else:
-            assert size == 64
-            insns += fmt_insn("movq %[val], (%[addr])")
-        if barrier:
-            insns += fmt_insn("mfence")
-        return """
-            inline void %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) {
-                asm volatile (%(insns)s
-                    :
-                    : [addr] "r" (addr), [val] "r"(val)
-                    : "memory");
-            }""" % {
-            "cpp_type": cpp_type,
-            "fun_name": fun_name,
-            "insns": insns,
-        }
-    if cpu_arch == "aarch64":
-        insns = ""
-        if barrier:
-            insns += fmt_insn("dmb ish")
-        if size == 8:
-            insns += fmt_insn("strb %w[val], [%x[addr]]")
-        elif size == 16:
-            insns += fmt_insn("strh %w[val], [%x[addr]]")
-        elif size == 32:
-            insns += fmt_insn("str %w[val], [%x[addr]]")
-        else:
-            assert size == 64
-            insns += fmt_insn("str %x[val], [%x[addr]]")
-        if barrier:
-            insns += fmt_insn("dmb ish")
-        return """
-            inline void %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) {
-                asm volatile (%(insns)s
-                    :
-                    : [addr] "r" (addr), [val] "r"(val)
-                    : "memory");
-            }""" % {
-            "cpp_type": cpp_type,
-            "fun_name": fun_name,
-            "insns": insns,
-        }
-    if cpu_arch == "arm":
-        insns = ""
-        if barrier:
-            insns += fmt_insn("dmb sy")
-        if size == 8:
-            insns += fmt_insn("strb %[val], [%[addr]]")
-        elif size == 16:
-            insns += fmt_insn("strh %[val], [%[addr]]")
-        else:
-            assert size == 32
-            insns += fmt_insn("str %[val], [%[addr]]")
-        if barrier:
-            insns += fmt_insn("dmb sy")
-        return """
-            inline void %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) {
-                asm volatile (%(insns)s
-                    :
-                    : [addr] "r" (addr), [val] "r"(val)
-                    : "memory");
-            }""" % {
-            "cpp_type": cpp_type,
-            "fun_name": fun_name,
-            "insns": insns,
-        }
-    raise Exception("Unexpected arch")
-
-
-def gen_exchange(fun_name, cpp_type, size):
-    # NOTE: the assembly code must match the generated code in:
-    # - MacroAssembler::atomicExchange
-    # - MacroAssembler::atomicExchange64 (on 64-bit platforms)
-    if cpu_arch in ("x86", "x86_64"):
-        # Request an input/output register for `val` so that we can simply XCHG it
-        # with *addr.
-        insns = ""
-        if size == 8:
-            insns += fmt_insn("xchgb %[val], (%[addr])")
-        elif size == 16:
-            insns += fmt_insn("xchgw %[val], (%[addr])")
-        elif size == 32:
-            insns += fmt_insn("xchgl %[val], (%[addr])")
-        else:
-            assert size == 64
-            insns += fmt_insn("xchgq %[val], (%[addr])")
-        return """
-            inline %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) {
-                asm volatile (%(insns)s
-                    : [val] "+r" (val)
-                    : [addr] "r" (addr)
-                    : "memory");
-                return val;
-            }""" % {
-            "cpp_type": cpp_type,
-            "fun_name": fun_name,
-            "insns": insns,
-        }
-    if cpu_arch == "aarch64":
-        insns = ""
-        insns += fmt_insn("dmb ish")
-        insns += fmt_insn("0:")
-        if size == 8:
-            insns += fmt_insn("ldxrb %w[res], [%x[addr]]")
-            insns += fmt_insn("stxrb %w[scratch], %w[val], [%x[addr]]")
-        elif size == 16:
-            insns += fmt_insn("ldxrh %w[res], [%x[addr]]")
-            insns += fmt_insn("stxrh %w[scratch], %w[val], [%x[addr]]")
-        elif size == 32:
-            insns += fmt_insn("ldxr %w[res], [%x[addr]]")
-            insns += fmt_insn("stxr %w[scratch], %w[val], [%x[addr]]")
-        else:
-            assert size == 64
-            insns += fmt_insn("ldxr %x[res], [%x[addr]]")
-            insns += fmt_insn("stxr %w[scratch], %x[val], [%x[addr]]")
-        insns += fmt_insn("cbnz %w[scratch], 0b")
-        insns += fmt_insn("dmb ish")
-        return """
-            inline %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) {
-                %(cpp_type)s res;
-                uint32_t scratch;
-                asm volatile (%(insns)s
-                    : [res] "=&r"(res), [scratch] "=&r"(scratch)
-                    : [addr] "r" (addr), [val] "r"(val)
-                    : "memory", "cc");
-                return res;
-            }""" % {
-            "cpp_type": cpp_type,
-            "fun_name": fun_name,
-            "insns": insns,
-        }
-    if cpu_arch == "arm":
-        insns = ""
-        insns += fmt_insn("dmb sy")
-        insns += fmt_insn("0:")
-        if size == 8:
-            insns += fmt_insn("ldrexb %[res], [%[addr]]")
-            insns += fmt_insn("strexb %[scratch], %[val], [%[addr]]")
-        elif size == 16:
-            insns += fmt_insn("ldrexh %[res], [%[addr]]")
-            insns += fmt_insn("strexh %[scratch], %[val], [%[addr]]")
-        else:
-            assert size == 32
-            insns += fmt_insn("ldrex %[res], [%[addr]]")
-            insns += fmt_insn("strex %[scratch], %[val], [%[addr]]")
-        insns += fmt_insn("cmp %[scratch], #1")
-        insns += fmt_insn("beq 0b")
-        insns += fmt_insn("dmb sy")
-        return """
-            inline %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) {
-                %(cpp_type)s res;
-                uint32_t scratch;
-                asm volatile (%(insns)s
-                    : [res] "=&r"(res), [scratch] "=&r"(scratch)
-                    : [addr] "r" (addr), [val] "r"(val)
-                    : "memory", "cc");
-                return res;
-            }""" % {
-            "cpp_type": cpp_type,
-            "fun_name": fun_name,
-            "insns": insns,
-        }
-    raise Exception("Unexpected arch")
-
-
-def gen_cmpxchg(fun_name, cpp_type, size):
-    # NOTE: the assembly code must match the generated code in:
-    # - MacroAssembler::compareExchange
-    # - MacroAssembler::compareExchange64
-    if cpu_arch == "x86" and size == 64:
-        # Use a +A constraint to load `oldval` into EDX:EAX as input/output.
-        # `newval` is loaded into ECX:EBX.
-        return r"""
-            inline %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr,
-                                             %(cpp_type)s oldval,
-                                             %(cpp_type)s newval) {
-                asm volatile ("lock; cmpxchg8b (%%[addr])\n\t"
-                : "+A" (oldval)
-                : [addr] "r" (addr),
-                  "b" (uint32_t(newval & 0xffff'ffff)),
-                  "c" (uint32_t(newval >> 32))
-                : "memory", "cc");
-                return oldval;
-            }""" % {
-            "cpp_type": cpp_type,
-            "fun_name": fun_name,
-        }
-    if cpu_arch == "arm" and size == 64:
-        return r"""
-            inline %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr,
-                                             %(cpp_type)s oldval,
-                                             %(cpp_type)s newval) {
-                uint32_t oldval0 = oldval & 0xffff'ffff;
-                uint32_t oldval1 = oldval >> 32;
-                uint32_t newval0 = newval & 0xffff'ffff;
-                uint32_t newval1 = newval >> 32;
-                asm volatile (
-                    "dmb sy\n\t"
-                    "0: ldrexd r0, r1, [%%[addr]]\n\t"
-                    "cmp r0, %%[oldval0]\n\t"
-                    "bne 1f\n\t"
-                    "cmp r1, %%[oldval1]\n\t"
-                    "bne 1f\n\t"
-                    "mov r2, %%[newval0]\n\t"
-                    "mov r3, %%[newval1]\n\t"
-                    "strexd r4, r2, r3, [%%[addr]]\n\t"
-                    "cmp r4, #1\n\t"
-                    "beq 0b\n\t"
-                    "1: dmb sy\n\t"
-                    "mov %%[oldval0], r0\n\t"
-                    "mov %%[oldval1], r1\n\t"
-                    : [oldval0] "+&r" (oldval0), [oldval1] "+&r"(oldval1)
-                    : [addr] "r" (addr), [newval0] "r" (newval0), [newval1] "r" (newval1)
-                    : "memory", "cc", "r0", "r1", "r2", "r3", "r4");
-                return uint64_t(oldval0) | (uint64_t(oldval1) << 32);
-            }""" % {
-            "cpp_type": cpp_type,
-            "fun_name": fun_name,
-        }
-    if cpu_arch in ("x86", "x86_64"):
-        # Use a +a constraint to load `oldval` into RAX as input/output register.
-        insns = ""
-        if size == 8:
-            insns += fmt_insn("lock; cmpxchgb %[newval], (%[addr])")
-        elif size == 16:
-            insns += fmt_insn("lock; cmpxchgw %[newval], (%[addr])")
-        elif size == 32:
-            insns += fmt_insn("lock; cmpxchgl %[newval], (%[addr])")
-        else:
-            assert size == 64
-            insns += fmt_insn("lock; cmpxchgq %[newval], (%[addr])")
-        return """
-            inline %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr,
-                                             %(cpp_type)s oldval,
-                                             %(cpp_type)s newval) {
-                asm volatile (%(insns)s
-                    : [oldval] "+a" (oldval)
-                    : [addr] "r" (addr), [newval] "r" (newval)
-                    : "memory", "cc");
-                return oldval;
-            }""" % {
-            "cpp_type": cpp_type,
-            "fun_name": fun_name,
-            "insns": insns,
-        }
-    if cpu_arch == "aarch64":
-        insns = ""
-        insns += fmt_insn("dmb ish")
-        insns += fmt_insn("0:")
-        if size == 8:
-            insns += fmt_insn("uxtb %w[scratch], %w[oldval]")
-            insns += fmt_insn("ldxrb %w[res], [%x[addr]]")
-            insns += fmt_insn("cmp %w[res], %w[scratch]")
-            insns += fmt_insn("b.ne 1f")
-            insns += fmt_insn("stxrb %w[scratch], %w[newval], [%x[addr]]")
-        elif size == 16:
-            insns += fmt_insn("uxth %w[scratch], %w[oldval]")
-            insns += fmt_insn("ldxrh %w[res], [%x[addr]]")
-            insns += fmt_insn("cmp %w[res], %w[scratch]")
-            insns += fmt_insn("b.ne 1f")
-            insns += fmt_insn("stxrh %w[scratch], %w[newval], [%x[addr]]")
-        elif size == 32:
-            insns += fmt_insn("mov %w[scratch], %w[oldval]")
-            insns += fmt_insn("ldxr %w[res], [%x[addr]]")
-            insns += fmt_insn("cmp %w[res], %w[scratch]")
-            insns += fmt_insn("b.ne 1f")
-            insns += fmt_insn("stxr %w[scratch], %w[newval], [%x[addr]]")
-        else:
-            assert size == 64
-            insns += fmt_insn("mov %x[scratch], %x[oldval]")
-            insns += fmt_insn("ldxr %x[res], [%x[addr]]")
-            insns += fmt_insn("cmp %x[res], %x[scratch]")
-            insns += fmt_insn("b.ne 1f")
-            insns += fmt_insn("stxr %w[scratch], %x[newval], [%x[addr]]")
-        insns += fmt_insn("cbnz %w[scratch], 0b")
-        insns += fmt_insn("1: dmb ish")
-        return """
-            inline %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr,
-                                             %(cpp_type)s oldval,
-                                             %(cpp_type)s newval) {
-                %(cpp_type)s res, scratch;
-                asm volatile (%(insns)s
-                    : [res] "=&r" (res), [scratch] "=&r" (scratch)
-                    : [addr] "r" (addr), [oldval] "r"(oldval), [newval] "r" (newval)
-                    : "memory", "cc");
-                return res;
-            }""" % {
-            "cpp_type": cpp_type,
-            "fun_name": fun_name,
-            "insns": insns,
-        }
-    if cpu_arch == "arm":
-        insns = ""
-        insns += fmt_insn("dmb sy")
-        insns += fmt_insn("0:")
-        if size == 8:
-            insns += fmt_insn("uxtb %[scratch], %[oldval]")
-            insns += fmt_insn("ldrexb %[res], [%[addr]]")
-            insns += fmt_insn("cmp %[res], %[scratch]")
-            insns += fmt_insn("bne 1f")
-            insns += fmt_insn("strexb %[scratch], %[newval], [%[addr]]")
-        elif size == 16:
-            insns += fmt_insn("uxth %[scratch], %[oldval]")
-            insns += fmt_insn("ldrexh %[res], [%[addr]]")
-            insns += fmt_insn("cmp %[res], %[scratch]")
-            insns += fmt_insn("bne 1f")
-            insns += fmt_insn("strexh %[scratch], %[newval], [%[addr]]")
-        else:
-            assert size == 32
-            insns += fmt_insn("mov %[scratch], %[oldval]")
-            insns += fmt_insn("ldrex %[res], [%[addr]]")
-            insns += fmt_insn("cmp %[res], %[scratch]")
-            insns += fmt_insn("bne 1f")
-            insns += fmt_insn("strex %[scratch], %[newval], [%[addr]]")
-        insns += fmt_insn("cmp %[scratch], #1")
-        insns += fmt_insn("beq 0b")
-        insns += fmt_insn("1: dmb sy")
-        return """
-            inline %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr,
-                                             %(cpp_type)s oldval,
-                                             %(cpp_type)s newval) {
-                %(cpp_type)s res, scratch;
-                asm volatile (%(insns)s
-                    : [res] "=&r" (res), [scratch] "=&r" (scratch)
-                    : [addr] "r" (addr), [oldval] "r"(oldval), [newval] "r" (newval)
-                    : "memory", "cc");
-                return res;
-            }""" % {
-            "cpp_type": cpp_type,
-            "fun_name": fun_name,
-            "insns": insns,
-        }
-    raise Exception("Unexpected arch")
-
-
-def gen_fetchop(fun_name, cpp_type, size, op):
-    # NOTE: the assembly code must match the generated code in:
-    # - MacroAssembler::atomicFetchOp
-    # - MacroAssembler::atomicFetchOp64 (on 64-bit platforms)
-    if cpu_arch in ("x86", "x86_64"):
-        # The `add` operation can be optimized with XADD.
-        if op == "add":
-            insns = ""
-            if size == 8:
-                insns += fmt_insn("lock; xaddb %[val], (%[addr])")
-            elif size == 16:
-                insns += fmt_insn("lock; xaddw %[val], (%[addr])")
-            elif size == 32:
-                insns += fmt_insn("lock; xaddl %[val], (%[addr])")
-            else:
-                assert size == 64
-                insns += fmt_insn("lock; xaddq %[val], (%[addr])")
-            return """
-                inline %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) {
-                    asm volatile (%(insns)s
-                        : [val] "+&r" (val)
-                        : [addr] "r" (addr)
-                        : "memory", "cc");
-                    return val;
-                }""" % {
-                "cpp_type": cpp_type,
-                "fun_name": fun_name,
-                "insns": insns,
-            }
-        # Use a +a constraint to ensure `res` is stored in RAX. This is required
-        # for the CMPXCHG instruction.
-        insns = ""
-        if size == 8:
-            insns += fmt_insn("movb (%[addr]), %[res]")
-            insns += fmt_insn("0: movb %[res], %[scratch]")
-            insns += fmt_insn("OPb %[val], %[scratch]")
-            insns += fmt_insn("lock; cmpxchgb %[scratch], (%[addr])")
-        elif size == 16:
-            insns += fmt_insn("movw (%[addr]), %[res]")
-            insns += fmt_insn("0: movw %[res], %[scratch]")
-            insns += fmt_insn("OPw %[val], %[scratch]")
-            insns += fmt_insn("lock; cmpxchgw %[scratch], (%[addr])")
-        elif size == 32:
-            insns += fmt_insn("movl (%[addr]), %[res]")
-            insns += fmt_insn("0: movl %[res], %[scratch]")
-            insns += fmt_insn("OPl %[val], %[scratch]")
-            insns += fmt_insn("lock; cmpxchgl %[scratch], (%[addr])")
-        else:
-            assert size == 64
-            insns += fmt_insn("movq (%[addr]), %[res]")
-            insns += fmt_insn("0: movq %[res], %[scratch]")
-            insns += fmt_insn("OPq %[val], %[scratch]")
-            insns += fmt_insn("lock; cmpxchgq %[scratch], (%[addr])")
-        insns = insns.replace("OP", op)
-        insns += fmt_insn("jnz 0b")
-        return """
-            inline %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) {
-                %(cpp_type)s res, scratch;
-                asm volatile (%(insns)s
-                    : [res] "=&a" (res), [scratch] "=&r" (scratch)
-                    : [addr] "r" (addr), [val] "r"(val)
-                    : "memory", "cc");
-                return res;
-            }""" % {
-            "cpp_type": cpp_type,
-            "fun_name": fun_name,
-            "insns": insns,
-        }
-    if cpu_arch == "aarch64":
-        insns = ""
-        insns += fmt_insn("dmb ish")
-        insns += fmt_insn("0:")
-        if size == 8:
-            insns += fmt_insn("ldxrb %w[res], [%x[addr]]")
-            insns += fmt_insn("OP %x[scratch1], %x[res], %x[val]")
-            insns += fmt_insn("stxrb %w[scratch2], %w[scratch1], [%x[addr]]")
-        elif size == 16:
-            insns += fmt_insn("ldxrh %w[res], [%x[addr]]")
-            insns += fmt_insn("OP %x[scratch1], %x[res], %x[val]")
-            insns += fmt_insn("stxrh %w[scratch2], %w[scratch1], [%x[addr]]")
-        elif size == 32:
-            insns += fmt_insn("ldxr %w[res], [%x[addr]]")
-            insns += fmt_insn("OP %x[scratch1], %x[res], %x[val]")
-            insns += fmt_insn("stxr %w[scratch2], %w[scratch1], [%x[addr]]")
-        else:
-            assert size == 64
-            insns += fmt_insn("ldxr %x[res], [%x[addr]]")
-            insns += fmt_insn("OP %x[scratch1], %x[res], %x[val]")
-            insns += fmt_insn("stxr %w[scratch2], %x[scratch1], [%x[addr]]")
-        cpu_op = op
-        if cpu_op == "or":
-            cpu_op = "orr"
-        if cpu_op == "xor":
-            cpu_op = "eor"
-        insns = insns.replace("OP", cpu_op)
-        insns += fmt_insn("cbnz %w[scratch2], 0b")
-        insns += fmt_insn("dmb ish")
-        return """
-            inline %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) {
-                %(cpp_type)s res;
-                uintptr_t scratch1, scratch2;
-                asm volatile (%(insns)s
-                    : [res] "=&r" (res), [scratch1] "=&r" (scratch1), [scratch2] "=&r"(scratch2)
-                    : [addr] "r" (addr), [val] "r"(val)
-                    : "memory", "cc");
-                return res;
-            }""" % {
-            "cpp_type": cpp_type,
-            "fun_name": fun_name,
-            "insns": insns,
-        }
-    if cpu_arch == "arm":
-        insns = ""
-        insns += fmt_insn("dmb sy")
-        insns += fmt_insn("0:")
-        if size == 8:
-            insns += fmt_insn("ldrexb %[res], [%[addr]]")
-            insns += fmt_insn("OP %[scratch1], %[res], %[val]")
-            insns += fmt_insn("strexb %[scratch2], %[scratch1], [%[addr]]")
-        elif size == 16:
-            insns += fmt_insn("ldrexh %[res], [%[addr]]")
-            insns += fmt_insn("OP %[scratch1], %[res], %[val]")
-            insns += fmt_insn("strexh %[scratch2], %[scratch1], [%[addr]]")
-        else:
-            assert size == 32
-            insns += fmt_insn("ldrex %[res], [%[addr]]")
-            insns += fmt_insn("OP %[scratch1], %[res], %[val]")
-            insns += fmt_insn("strex %[scratch2], %[scratch1], [%[addr]]")
-        cpu_op = op
-        if cpu_op == "or":
-            cpu_op = "orr"
-        if cpu_op == "xor":
-            cpu_op = "eor"
-        insns = insns.replace("OP", cpu_op)
-        insns += fmt_insn("cmp %[scratch2], #1")
-        insns += fmt_insn("beq 0b")
-        insns += fmt_insn("dmb sy")
-        return """
-            inline %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) {
-                %(cpp_type)s res;
-                uintptr_t scratch1, scratch2;
-                asm volatile (%(insns)s
-                    : [res] "=&r" (res), [scratch1] "=&r" (scratch1), [scratch2] "=&r"(scratch2)
-                    : [addr] "r" (addr), [val] "r"(val)
-                    : "memory", "cc");
-                return res;
-            }""" % {
-            "cpp_type": cpp_type,
-            "fun_name": fun_name,
-            "insns": insns,
-        }
-    raise Exception("Unexpected arch")
-
-
-def gen_copy(fun_name, cpp_type, size, unroll, direction):
-    assert direction in ("down", "up")
-    offset = 0
-    if direction == "up":
-        offset = unroll - 1
-    insns = ""
-    for i in range(unroll):
-        if cpu_arch in ("x86", "x86_64"):
-            if size == 1:
-                insns += fmt_insn("movb OFFSET(%[src]), %[scratch]")
-                insns += fmt_insn("movb %[scratch], OFFSET(%[dst])")
-            elif size == 4:
-                insns += fmt_insn("movl OFFSET(%[src]), %[scratch]")
-                insns += fmt_insn("movl %[scratch], OFFSET(%[dst])")
-            else:
-                assert size == 8
-                insns += fmt_insn("movq OFFSET(%[src]), %[scratch]")
-                insns += fmt_insn("movq %[scratch], OFFSET(%[dst])")
-        elif cpu_arch == "aarch64":
-            if size == 1:
-                insns += fmt_insn("ldrb %w[scratch], [%x[src], OFFSET]")
-                insns += fmt_insn("strb %w[scratch], [%x[dst], OFFSET]")
-            else:
-                assert size == 8
-                insns += fmt_insn("ldr %x[scratch], [%x[src], OFFSET]")
-                insns += fmt_insn("str %x[scratch], [%x[dst], OFFSET]")
-        elif cpu_arch == "arm":
-            if size == 1:
-                insns += fmt_insn("ldrb %[scratch], [%[src], OFFSET]")
-                insns += fmt_insn("strb %[scratch], [%[dst], OFFSET]")
-            else:
-                assert size == 4
-                insns += fmt_insn("ldr %[scratch], [%[src], OFFSET]")
-                insns += fmt_insn("str %[scratch], [%[dst], OFFSET]")
-        else:
-            raise Exception("Unexpected arch")
-        insns = insns.replace("OFFSET", str(offset * size))
-
-        if direction == "down":
-            offset += 1
-        else:
-            offset -= 1
-
-    return """
-        inline void %(fun_name)s(uint8_t* dst, const uint8_t* src) {
-            %(cpp_type)s* dst_ = reinterpret_cast<%(cpp_type)s*>(dst);
-            const %(cpp_type)s* src_ = reinterpret_cast<const %(cpp_type)s*>(src);
-            %(cpp_type)s scratch;
-            asm volatile (%(insns)s
-                : [scratch] "=&r" (scratch)
-                : [dst] "r" (dst_), [src] "r"(src_)
-                : "memory");
-        }""" % {
-        "cpp_type": cpp_type,
-        "fun_name": fun_name,
-        "insns": insns,
-    }
-
-
-HEADER_TEMPLATE = """\
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-#ifndef jit_AtomicOperationsGenerated_h
-#define jit_AtomicOperationsGenerated_h
-
-/* This file is generated by jit/GenerateAtomicOperations.py. Do not edit! */
-
-namespace js {
-namespace jit {
-
-%(contents)s
-
-} // namespace jit
-} // namespace js
-
-#endif // jit_AtomicOperationsGenerated_h
-"""
-
-
-def generate_atomics_header(c_out):
-    contents = ""
-    if cpu_arch in ("x86", "x86_64", "arm", "aarch64"):
-        contents += "#define JS_HAVE_GENERATED_ATOMIC_OPS 1"
-
-        # `fence` performs a full memory barrier.
-        contents += gen_seqcst("AtomicFenceSeqCst")
-
-        contents += gen_load("AtomicLoad8SeqCst", "uint8_t", 8, True)
-        contents += gen_load("AtomicLoad16SeqCst", "uint16_t", 16, True)
-        contents += gen_load("AtomicLoad32SeqCst", "uint32_t", 32, True)
-        if is_64bit:
-            contents += gen_load("AtomicLoad64SeqCst", "uint64_t", 64, True)
-
-        # These are access-atomic up to sizeof(uintptr_t).
-        contents += gen_load("AtomicLoad8Unsynchronized", "uint8_t", 8, False)
-        contents += gen_load("AtomicLoad16Unsynchronized", "uint16_t", 16, False)
-        contents += gen_load("AtomicLoad32Unsynchronized", "uint32_t", 32, False)
-        if is_64bit:
-            contents += gen_load("AtomicLoad64Unsynchronized", "uint64_t", 64, False)
-
-        contents += gen_store("AtomicStore8SeqCst", "uint8_t", 8, True)
-        contents += gen_store("AtomicStore16SeqCst", "uint16_t", 16, True)
-        contents += gen_store("AtomicStore32SeqCst", "uint32_t", 32, True)
-        if is_64bit:
-            contents += gen_store("AtomicStore64SeqCst", "uint64_t", 64, True)
-
-        # These are access-atomic up to sizeof(uintptr_t).
-        contents += gen_store("AtomicStore8Unsynchronized", "uint8_t", 8, False)
-        contents += gen_store("AtomicStore16Unsynchronized", "uint16_t", 16, False)
-        contents += gen_store("AtomicStore32Unsynchronized", "uint32_t", 32, False)
-        if is_64bit:
-            contents += gen_store("AtomicStore64Unsynchronized", "uint64_t", 64, False)
-
-        # `exchange` takes a cell address and a value.  It stores it in the cell and
-        # returns the value previously in the cell.
-        contents += gen_exchange("AtomicExchange8SeqCst", "uint8_t", 8)
-        contents += gen_exchange("AtomicExchange16SeqCst", "uint16_t", 16)
-        contents += gen_exchange("AtomicExchange32SeqCst", "uint32_t", 32)
-        if is_64bit:
-            contents += gen_exchange("AtomicExchange64SeqCst", "uint64_t", 64)
-
-        # `cmpxchg` takes a cell address, an expected value and a replacement value.
-        # If the value in the cell equals the expected value then the replacement value
-        # is stored in the cell.  It always returns the value previously in the cell.
-        contents += gen_cmpxchg("AtomicCmpXchg8SeqCst", "uint8_t", 8)
-        contents += gen_cmpxchg("AtomicCmpXchg16SeqCst", "uint16_t", 16)
-        contents += gen_cmpxchg("AtomicCmpXchg32SeqCst", "uint32_t", 32)
-        contents += gen_cmpxchg("AtomicCmpXchg64SeqCst", "uint64_t", 64)
-
-        # `add` adds a value atomically to the cell and returns the old value in the
-        # cell.  (There is no `sub`; just add the negated value.)
-        contents += gen_fetchop("AtomicAdd8SeqCst", "uint8_t", 8, "add")
-        contents += gen_fetchop("AtomicAdd16SeqCst", "uint16_t", 16, "add")
-        contents += gen_fetchop("AtomicAdd32SeqCst", "uint32_t", 32, "add")
-        if is_64bit:
-            contents += gen_fetchop("AtomicAdd64SeqCst", "uint64_t", 64, "add")
-
-        # `and` bitwise-ands a value atomically into the cell and returns the old value
-        # in the cell.
-        contents += gen_fetchop("AtomicAnd8SeqCst", "uint8_t", 8, "and")
-        contents += gen_fetchop("AtomicAnd16SeqCst", "uint16_t", 16, "and")
-        contents += gen_fetchop("AtomicAnd32SeqCst", "uint32_t", 32, "and")
-        if is_64bit:
-            contents += gen_fetchop("AtomicAnd64SeqCst", "uint64_t", 64, "and")
-
-        # `or` bitwise-ors a value atomically into the cell and returns the old value
-        # in the cell.
-        contents += gen_fetchop("AtomicOr8SeqCst", "uint8_t", 8, "or")
-        contents += gen_fetchop("AtomicOr16SeqCst", "uint16_t", 16, "or")
-        contents += gen_fetchop("AtomicOr32SeqCst", "uint32_t", 32, "or")
-        if is_64bit:
-            contents += gen_fetchop("AtomicOr64SeqCst", "uint64_t", 64, "or")
-
-        # `xor` bitwise-xors a value atomically into the cell and returns the old value
-        # in the cell.
-        contents += gen_fetchop("AtomicXor8SeqCst", "uint8_t", 8, "xor")
-        contents += gen_fetchop("AtomicXor16SeqCst", "uint16_t", 16, "xor")
-        contents += gen_fetchop("AtomicXor32SeqCst", "uint32_t", 32, "xor")
-        if is_64bit:
-            contents += gen_fetchop("AtomicXor64SeqCst", "uint64_t", 64, "xor")
-
-        # See comment in jit/AtomicOperations-shared-jit.cpp for an explanation.
-        wordsize = 8 if is_64bit else 4
-        words_in_block = 8
-        blocksize = words_in_block * wordsize
-
-        contents += gen_copy(
-            "AtomicCopyUnalignedBlockDownUnsynchronized",
-            "uint8_t",
-            1,
-            blocksize,
-            "down",
-        )
-        contents += gen_copy(
-            "AtomicCopyUnalignedBlockUpUnsynchronized", "uint8_t", 1, blocksize, "up"
-        )
-
-        contents += gen_copy(
-            "AtomicCopyUnalignedWordDownUnsynchronized", "uint8_t", 1, wordsize, "down"
-        )
-        contents += gen_copy(
-            "AtomicCopyUnalignedWordUpUnsynchronized", "uint8_t", 1, wordsize, "up"
-        )
-
-        contents += gen_copy(
-            "AtomicCopyBlockDownUnsynchronized",
-            "uintptr_t",
-            wordsize,
-            words_in_block,
-            "down",
-        )
-        contents += gen_copy(
-            "AtomicCopyBlockUpUnsynchronized",
-            "uintptr_t",
-            wordsize,
-            words_in_block,
-            "up",
-        )
-
-        contents += gen_copy(
-            "AtomicCopyWordUnsynchronized", "uintptr_t", wordsize, 1, "down"
-        )
-        contents += gen_copy("AtomicCopyByteUnsynchronized", "uint8_t", 1, 1, "down")
-
-        contents += "\n"
-        contents += (
-            "constexpr size_t JS_GENERATED_ATOMICS_BLOCKSIZE = "
-            + str(blocksize)
-            + ";\n"
-        )
-        contents += (
-            "constexpr size_t JS_GENERATED_ATOMICS_WORDSIZE = " + str(wordsize) + ";\n"
-        )
-
-    c_out.write(
-        HEADER_TEMPLATE
-        % {
-            "contents": contents,
-        }
-    )
--- a/js/src/jit/JitContext.cpp
+++ b/js/src/jit/JitContext.cpp
@ -13,7 +13,6 @@

 #include "jit/CacheIRSpewer.h"
 #include "jit/CompileWrappers.h"
-#include "jit/Ion.h"
 #include "jit/JitCode.h"
 #include "jit/JitOptions.h"
 #include "jit/JitSpewer.h"
@ -98,11 +97,6 @@ bool jit::InitializeJit() {
  }
 #endif

-#if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64)
-  // Compute flags.
-  js::jit::CPUInfo::GetSSEVersion();
-#endif
-
 #if defined(JS_CODEGEN_ARM)
  InitARMFlags();
 #endif
@ -111,10 +105,6 @@ bool jit::InitializeJit() {
  ComputeJitSupportFlags();

  CheckPerf();
-
-#ifndef JS_CODEGEN_NONE
-  MOZ_ASSERT(js::jit::CPUFlagsHaveBeenComputed());
-#endif
  return true;
 }

--- a/js/src/jit/Lowering.cpp
+++ b/js/src/jit/Lowering.cpp
@ -3764,8 +3764,6 @@ void LIRGenerator::visitLoadUnboxedScalar(MLoadUnboxedScalar* ins) {
  const LAllocation index = useRegisterOrIndexConstant(
      ins->index(), ins->storageType(), ins->offsetAdjustment());

-  // NOTE: the generated code must match the assembly code in gen_load in
-  // GenerateAtomicOperations.py
  Synchronization sync = Synchronization::Load();
  if (ins->requiresMemoryBarrier()) {
    LMemoryBarrier* fence = new (alloc()) LMemoryBarrier(sync.barrierBefore);
@ -3953,9 +3951,6 @@ void LIRGenerator::visitStoreUnboxedScalar(MStoreUnboxedScalar* ins) {
  // is a store instruction that incorporates the necessary
  // barriers, and we could use that instead of separate barrier and
  // store instructions.  See bug #1077027.
-  //
-  // NOTE: the generated code must match the assembly code in gen_store in
-  // GenerateAtomicOperations.py
  Synchronization sync = Synchronization::Store();
  if (ins->requiresMemoryBarrier()) {
    LMemoryBarrier* fence = new (alloc()) LMemoryBarrier(sync.barrierBefore);
--- a/js/src/jit/arm/MacroAssembler-arm.cpp
+++ b/js/src/jit/arm/MacroAssembler-arm.cpp
@ -4935,8 +4935,6 @@ static void CompareExchange(MacroAssembler& masm,

  ScratchRegisterScope scratch(masm);

-  // NOTE: the generated code must match the assembly code in gen_cmpxchg in
-  // GenerateAtomicOperations.py
  masm.memoryBarrierBefore(sync);

  masm.bind(&again);
@ -5040,8 +5038,6 @@ static void AtomicExchange(MacroAssembler& masm,

  ScratchRegisterScope scratch(masm);

-  // NOTE: the generated code must match the assembly code in gen_exchange in
-  // GenerateAtomicOperations.py
  masm.memoryBarrierBefore(sync);

  masm.bind(&again);
@ -5143,8 +5139,6 @@ static void AtomicFetchOp(MacroAssembler& masm,
  SecondScratchRegisterScope scratch2(masm);
  Register ptr = ComputePointerForAtomic(masm, mem, scratch2);

-  // NOTE: the generated code must match the assembly code in gen_fetchop in
-  // GenerateAtomicOperations.py
  masm.memoryBarrierBefore(sync);

  ScratchRegisterScope scratch(masm);
@ -5400,8 +5394,6 @@ static void CompareExchange64(MacroAssembler& masm,
  SecondScratchRegisterScope scratch2(masm);
  Register ptr = ComputePointerForAtomic(masm, mem, scratch2);

-  // NOTE: the generated code must match the assembly code in gen_cmpxchg in
-  // GenerateAtomicOperations.py
  masm.memoryBarrierBefore(sync);

  masm.bind(&again);
@ -6160,8 +6152,6 @@ void MacroAssemblerARM::wasmLoadImpl(const wasm::MemoryAccessDesc& access,
                  type == Scalar::Int32 || type == Scalar::Int64;
  unsigned byteSize = access.byteSize();

-  // NOTE: the generated code must match the assembly code in gen_load in
-  // GenerateAtomicOperations.py
  asMasm().memoryBarrierBefore(access.sync());

  BufferOffset load;
@ -6277,8 +6267,6 @@ void MacroAssemblerARM::wasmStoreImpl(const wasm::MemoryAccessDesc& access,
    }
  }

-  // NOTE: the generated code must match the assembly code in gen_store in
-  // GenerateAtomicOperations.py
  asMasm().memoryBarrierAfter(access.sync());

  BufferOffset store;
--- a/js/src/jit/arm64/CodeGenerator-arm64.cpp
+++ b/js/src/jit/arm64/CodeGenerator-arm64.cpp
@ -1985,8 +1985,6 @@ void CodeGenerator::visitAtomicLoad64(LAtomicLoad64* lir) {

  Scalar::Type storageType = mir->storageType();

-  // NOTE: the generated code must match the assembly code in gen_load in
-  // GenerateAtomicOperations.py
  auto sync = Synchronization::Load();

  masm.memoryBarrierBefore(sync);
@ -2013,8 +2011,6 @@ void CodeGenerator::visitAtomicStore64(LAtomicStore64* lir) {

  masm.loadBigInt64(value, temp1);

-  // NOTE: the generated code must match the assembly code in gen_store in
-  // GenerateAtomicOperations.py
  auto sync = Synchronization::Store();

  masm.memoryBarrierBefore(sync);
--- a/js/src/jit/arm64/MacroAssembler-arm64.cpp
+++ b/js/src/jit/arm64/MacroAssembler-arm64.cpp
@ -474,8 +474,6 @@ void MacroAssemblerCompat::wasmLoadImpl(const wasm::MemoryAccessDesc& access,
    instructionsExpected++;
  }

-  // NOTE: the generated code must match the assembly code in gen_load in
-  // GenerateAtomicOperations.py
  asMasm().memoryBarrierBefore(access.sync());

  {
@ -627,8 +625,6 @@ void MacroAssemblerCompat::wasmStoreImpl(const wasm::MemoryAccessDesc& access,
 void MacroAssemblerCompat::wasmStoreImpl(const wasm::MemoryAccessDesc& access,
                                         MemOperand dstAddr, AnyRegister valany,
                                         Register64 val64) {
-  // NOTE: the generated code must match the assembly code in gen_store in
-  // GenerateAtomicOperations.py
  asMasm().memoryBarrierBefore(access.sync());

  {
@ -2338,8 +2334,6 @@ static void CompareExchange(MacroAssembler& masm,

  MOZ_ASSERT(ptr.base().asUnsized() != output);

-  // NOTE: the generated code must match the assembly code in gen_cmpxchg in
-  // GenerateAtomicOperations.py
  masm.memoryBarrierBefore(sync);

  Register scratch = temps.AcquireX().asUnsized();
@ -2371,8 +2365,6 @@ static void AtomicExchange(MacroAssembler& masm,
  Register scratch2 = temps.AcquireX().asUnsized();
  MemOperand ptr = ComputePointerForAtomic(masm, mem, scratch2);

-  // NOTE: the generated code must match the assembly code in gen_exchange in
-  // GenerateAtomicOperations.py
  masm.memoryBarrierBefore(sync);

  Register scratch = temps.AcquireX().asUnsized();
@ -2403,8 +2395,6 @@ static void AtomicFetchOp(MacroAssembler& masm,
  Register scratch2 = temps.AcquireX().asUnsized();
  MemOperand ptr = ComputePointerForAtomic(masm, mem, scratch2);

-  // NOTE: the generated code must match the assembly code in gen_fetchop in
-  // GenerateAtomicOperations.py
  masm.memoryBarrierBefore(sync);

  Register scratch = temps.AcquireX().asUnsized();
--- a/js/src/jit/moz.build
+++ b/js/src/jit/moz.build
@ -75,7 +75,6 @@ UNIFIED_SOURCES += [
    "Safepoints.cpp",
    "ScalarReplacement.cpp",
    "shared/Assembler-shared.cpp",
-    "shared/AtomicOperations-shared-jit.cpp",
    "shared/CodeGenerator-shared.cpp",
    "shared/Disassembler-shared.cpp",
    "shared/Lowering-shared.cpp",
@ -99,6 +98,7 @@ if CONFIG["JS_CODEGEN_NONE"]:
    UNIFIED_SOURCES += ["none/Trampoline-none.cpp"]
 elif CONFIG["JS_CODEGEN_X86"] or CONFIG["JS_CODEGEN_X64"]:
    UNIFIED_SOURCES += [
+        "shared/AtomicOperations-shared-jit.cpp",
        "x86-shared/Architecture-x86-shared.cpp",
        "x86-shared/Assembler-x86-shared.cpp",
        "x86-shared/AssemblerBuffer-x86-shared.cpp",
@ -139,6 +139,7 @@ elif CONFIG["JS_CODEGEN_ARM"]:
        "arm/MacroAssembler-arm.cpp",
        "arm/MoveEmitter-arm.cpp",
        "arm/Trampoline-arm.cpp",
+        "shared/AtomicOperations-shared-jit.cpp",
    ]
    if CONFIG["JS_SIMULATOR_ARM"]:
        UNIFIED_SOURCES += ["arm/Simulator-arm.cpp"]
@ -167,6 +168,7 @@ elif CONFIG["JS_CODEGEN_ARM64"]:
        "arm64/vixl/MozCpu-vixl.cpp",
        "arm64/vixl/MozInstructions-vixl.cpp",
        "arm64/vixl/Utils-vixl.cpp",
+        "shared/AtomicOperations-shared-jit.cpp",
    ]
    vixl_werror_sources = [
        "arm64/vixl/Disasm-vixl.cpp",
@ -246,12 +248,5 @@ GeneratedFile(
    inputs=["CacheIROps.yaml"],
 )

-GeneratedFile(
-    "AtomicOperationsGenerated.h",
-    script="GenerateAtomicOperations.py",
-    entry_point="generate_atomics_header",
-    inputs=[],
-)
-
 if CONFIG["FUZZING_INTERFACES"] or CONFIG["FUZZING_JS_FUZZILLI"]:
    include("/tools/fuzzing/libfuzzer-config.mozbuild")
--- a/js/src/jit/shared/AtomicOperations-feeling-lucky-gcc.h
+++ b/js/src/jit/shared/AtomicOperations-feeling-lucky-gcc.h
@ -30,9 +30,11 @@

 // Explicitly exclude tier-1 platforms.

-#if (defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || \
-     defined(_M_IX86) || defined(__arm__) || defined(__aarch64__))
-#  error "Do not use on a tier-1 platform where inline assembly is available"
+#if ((defined(__x86_64__) || defined(_M_X64)) && defined(JS_CODEGEN_X64)) || \
+    ((defined(__i386__) || defined(_M_IX86)) && defined(JS_CODEGEN_X86)) ||  \
+    (defined(__arm__) && defined(JS_CODEGEN_ARM)) ||                         \
+    ((defined(__aarch64__) || defined(_M_ARM64)) && defined(JS_CODEGEN_ARM64))
+#  error "Do not use this code on a tier-1 platform when a JIT is available"
 #endif

 #if !(defined(__clang__) || defined(__GNUC__))
@ -101,6 +103,15 @@

 // Try to avoid platform #ifdefs below this point.

+inline bool js::jit::AtomicOperations::Initialize() {
+  // Nothing
+  return true;
+}
+
+inline void js::jit::AtomicOperations::ShutDown() {
+  // Nothing
+}
+
 // When compiling with Clang on 32-bit linux it will be necessary to link with
 // -latomic to get the proper 64-bit intrinsics.

--- a/js/src/jit/shared/AtomicOperations-feeling-lucky-msvc.h
+++ b/js/src/jit/shared/AtomicOperations-feeling-lucky-msvc.h
@ -0,0 +1,373 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef jit_shared_AtomicOperations_feeling_lucky_msvc_h
+#define jit_shared_AtomicOperations_feeling_lucky_msvc_h
+
+#include "mozilla/Assertions.h"
+#include "mozilla/Types.h"
+
+// Explicitly exclude tier-1 platforms.
+
+#if ((defined(__x86_64__) || defined(_M_X64)) && defined(JS_CODEGEN_X64)) || \
+    ((defined(__i386__) || defined(_M_IX86)) && defined(JS_CODEGEN_X86)) ||  \
+    (defined(__arm__) && defined(JS_CODEGEN_ARM)) ||                         \
+    ((defined(__aarch64__) || defined(_M_ARM64)) && defined(JS_CODEGEN_ARM64))
+#  error "Do not use this code on a tier-1 platform when a JIT is available"
+#endif
+
+#if !defined(_MSC_VER)
+#  error "This file only for Microsoft Visual C++"
+#endif
+
+// For overall documentation, see jit/AtomicOperations.h.
+
+// Below, _ReadWriteBarrier is a compiler directive, preventing reordering of
+// instructions and reuse of memory values across it in the compiler, but having
+// no impact on what the CPU does.
+
+// Note, here we use MSVC intrinsics directly.  But MSVC supports a slightly
+// higher level of function which uses the intrinsic when possible (8, 16, and
+// 32-bit operations, and 64-bit operations on 64-bit systems) and otherwise
+// falls back on CMPXCHG8B for 64-bit operations on 32-bit systems.  We could be
+// using those functions in many cases here (though not all).  I have not done
+// so because I don't yet know how far back those functions are supported.
+
+// Note, _InterlockedCompareExchange takes the *new* value as the second
+// argument and the *comparand* (expected old value) as the third argument.
+
+inline bool js::jit::AtomicOperations::Initialize() {
+  // Nothing
+  return true;
+}
+
+inline void js::jit::AtomicOperations::ShutDown() {
+  // Nothing
+}
+
+inline bool js::jit::AtomicOperations::hasAtomic8() { return true; }
+
+inline bool js::jit::AtomicOperations::isLockfree8() {
+  // The MSDN docs suggest very strongly that if code is compiled for Pentium
+  // or better the 64-bit primitives will be lock-free, see eg the "Remarks"
+  // secion of the page for _InterlockedCompareExchange64, currently here:
+  // https://msdn.microsoft.com/en-us/library/ttk2z1ws%28v=vs.85%29.aspx
+  //
+  // But I've found no way to assert that at compile time or run time, there
+  // appears to be no WinAPI is_lock_free() test.
+
+  return true;
+}
+
+inline void js::jit::AtomicOperations::fenceSeqCst() {
+  _ReadWriteBarrier();
+#if defined(_M_IX86) || defined(_M_X64)
+  _mm_mfence();
+#elif defined(_M_ARM64)
+  // MemoryBarrier is defined in winnt.h, which we don't want to include here.
+  // This expression is the expansion of MemoryBarrier.
+  __dmb(_ARM64_BARRIER_SY);
+#else
+#  error "Unknown hardware for MSVC"
+#endif
+}
+
+template <typename T>
+inline T js::jit::AtomicOperations::loadSeqCst(T* addr) {
+  _ReadWriteBarrier();
+  T v = *addr;
+  _ReadWriteBarrier();
+  return v;
+}
+
+#ifdef _M_IX86
+namespace js {
+namespace jit {
+
+#  define MSC_LOADOP(T)                                                       \
+    template <>                                                               \
+    inline T AtomicOperations::loadSeqCst(T* addr) {                          \
+      _ReadWriteBarrier();                                                    \
+      return (T)_InterlockedCompareExchange64((__int64 volatile*)addr, 0, 0); \
+    }
+
+MSC_LOADOP(int64_t)
+MSC_LOADOP(uint64_t)
+
+#  undef MSC_LOADOP
+
+}  // namespace jit
+}  // namespace js
+#endif  // _M_IX86
+
+template <typename T>
+inline void js::jit::AtomicOperations::storeSeqCst(T* addr, T val) {
+  _ReadWriteBarrier();
+  *addr = val;
+  fenceSeqCst();
+}
+
+#ifdef _M_IX86
+namespace js {
+namespace jit {
+
+#  define MSC_STOREOP(T)                                             \
+    template <>                                                      \
+    inline void AtomicOperations::storeSeqCst(T* addr, T val) {      \
+      _ReadWriteBarrier();                                           \
+      T oldval = *addr;                                              \
+      for (;;) {                                                     \
+        T nextval = (T)_InterlockedCompareExchange64(                \
+            (__int64 volatile*)addr, (__int64)val, (__int64)oldval); \
+        if (nextval == oldval) break;                                \
+        oldval = nextval;                                            \
+      }                                                              \
+      _ReadWriteBarrier();                                           \
+    }
+
+MSC_STOREOP(int64_t)
+MSC_STOREOP(uint64_t)
+
+#  undef MSC_STOREOP
+
+}  // namespace jit
+}  // namespace js
+#endif  // _M_IX86
+
+#define MSC_EXCHANGEOP(T, U, xchgop)                          \
+  template <>                                                 \
+  inline T AtomicOperations::exchangeSeqCst(T* addr, T val) { \
+    return (T)xchgop((U volatile*)addr, (U)val);              \
+  }
+
+#ifdef _M_IX86
+#  define MSC_EXCHANGEOP_CAS(T)                                      \
+    template <>                                                      \
+    inline T AtomicOperations::exchangeSeqCst(T* addr, T val) {      \
+      _ReadWriteBarrier();                                           \
+      T oldval = *addr;                                              \
+      for (;;) {                                                     \
+        T nextval = (T)_InterlockedCompareExchange64(                \
+            (__int64 volatile*)addr, (__int64)val, (__int64)oldval); \
+        if (nextval == oldval) break;                                \
+        oldval = nextval;                                            \
+      }                                                              \
+      _ReadWriteBarrier();                                           \
+      return oldval;                                                 \
+    }
+#endif  // _M_IX86
+
+namespace js {
+namespace jit {
+
+MSC_EXCHANGEOP(int8_t, char, _InterlockedExchange8)
+MSC_EXCHANGEOP(uint8_t, char, _InterlockedExchange8)
+MSC_EXCHANGEOP(int16_t, short, _InterlockedExchange16)
+MSC_EXCHANGEOP(uint16_t, short, _InterlockedExchange16)
+MSC_EXCHANGEOP(int32_t, long, _InterlockedExchange)
+MSC_EXCHANGEOP(uint32_t, long, _InterlockedExchange)
+
+#ifdef _M_IX86
+MSC_EXCHANGEOP_CAS(int64_t)
+MSC_EXCHANGEOP_CAS(uint64_t)
+#else
+MSC_EXCHANGEOP(int64_t, __int64, _InterlockedExchange64)
+MSC_EXCHANGEOP(uint64_t, __int64, _InterlockedExchange64)
+#endif
+
+}  // namespace jit
+}  // namespace js
+
+#undef MSC_EXCHANGEOP
+#undef MSC_EXCHANGEOP_CAS
+
+#define MSC_CAS(T, U, cmpxchg)                                        \
+  template <>                                                         \
+  inline T AtomicOperations::compareExchangeSeqCst(T* addr, T oldval, \
+                                                   T newval) {        \
+    return (T)cmpxchg((U volatile*)addr, (U)newval, (U)oldval);       \
+  }
+
+namespace js {
+namespace jit {
+
+MSC_CAS(int8_t, char, _InterlockedCompareExchange8)
+MSC_CAS(uint8_t, char, _InterlockedCompareExchange8)
+MSC_CAS(int16_t, short, _InterlockedCompareExchange16)
+MSC_CAS(uint16_t, short, _InterlockedCompareExchange16)
+MSC_CAS(int32_t, long, _InterlockedCompareExchange)
+MSC_CAS(uint32_t, long, _InterlockedCompareExchange)
+MSC_CAS(int64_t, __int64, _InterlockedCompareExchange64)
+MSC_CAS(uint64_t, __int64, _InterlockedCompareExchange64)
+
+}  // namespace jit
+}  // namespace js
+
+#undef MSC_CAS
+
+#define MSC_FETCHADDOP(T, U, xadd)                            \
+  template <>                                                 \
+  inline T AtomicOperations::fetchAddSeqCst(T* addr, T val) { \
+    return (T)xadd((U volatile*)addr, (U)val);                \
+  }
+
+#define MSC_FETCHSUBOP(T)                                     \
+  template <>                                                 \
+  inline T AtomicOperations::fetchSubSeqCst(T* addr, T val) { \
+    return fetchAddSeqCst(addr, (T)(0 - val));                \
+  }
+
+#ifdef _M_IX86
+#  define MSC_FETCHADDOP_CAS(T)                                               \
+    template <>                                                               \
+    inline T AtomicOperations::fetchAddSeqCst(T* addr, T val) {               \
+      _ReadWriteBarrier();                                                    \
+      T oldval = *addr;                                                       \
+      for (;;) {                                                              \
+        T nextval = (T)_InterlockedCompareExchange64((__int64 volatile*)addr, \
+                                                     (__int64)(oldval + val), \
+                                                     (__int64)oldval);        \
+        if (nextval == oldval) break;                                         \
+        oldval = nextval;                                                     \
+      }                                                                       \
+      _ReadWriteBarrier();                                                    \
+      return oldval;                                                          \
+    }
+#endif  // _M_IX86
+
+namespace js {
+namespace jit {
+
+MSC_FETCHADDOP(int8_t, char, _InterlockedExchangeAdd8)
+MSC_FETCHADDOP(uint8_t, char, _InterlockedExchangeAdd8)
+MSC_FETCHADDOP(int16_t, short, _InterlockedExchangeAdd16)
+MSC_FETCHADDOP(uint16_t, short, _InterlockedExchangeAdd16)
+MSC_FETCHADDOP(int32_t, long, _InterlockedExchangeAdd)
+MSC_FETCHADDOP(uint32_t, long, _InterlockedExchangeAdd)
+
+#ifdef _M_IX86
+MSC_FETCHADDOP_CAS(int64_t)
+MSC_FETCHADDOP_CAS(uint64_t)
+#else
+MSC_FETCHADDOP(int64_t, __int64, _InterlockedExchangeAdd64)
+MSC_FETCHADDOP(uint64_t, __int64, _InterlockedExchangeAdd64)
+#endif
+
+MSC_FETCHSUBOP(int8_t)
+MSC_FETCHSUBOP(uint8_t)
+MSC_FETCHSUBOP(int16_t)
+MSC_FETCHSUBOP(uint16_t)
+MSC_FETCHSUBOP(int32_t)
+MSC_FETCHSUBOP(uint32_t)
+MSC_FETCHSUBOP(int64_t)
+MSC_FETCHSUBOP(uint64_t)
+
+}  // namespace jit
+}  // namespace js
+
+#undef MSC_FETCHADDOP
+#undef MSC_FETCHADDOP_CAS
+#undef MSC_FETCHSUBOP
+
+#define MSC_FETCHBITOPX(T, U, name, op)             \
+  template <>                                       \
+  inline T AtomicOperations::name(T* addr, T val) { \
+    return (T)op((U volatile*)addr, (U)val);        \
+  }
+
+#define MSC_FETCHBITOP(T, U, andop, orop, xorop) \
+  MSC_FETCHBITOPX(T, U, fetchAndSeqCst, andop)   \
+  MSC_FETCHBITOPX(T, U, fetchOrSeqCst, orop)     \
+  MSC_FETCHBITOPX(T, U, fetchXorSeqCst, xorop)
+
+#ifdef _M_IX86
+#  define AND_OP &
+#  define OR_OP |
+#  define XOR_OP ^
+#  define MSC_FETCHBITOPX_CAS(T, name, OP)                                     \
+    template <>                                                                \
+    inline T AtomicOperations::name(T* addr, T val) {                          \
+      _ReadWriteBarrier();                                                     \
+      T oldval = *addr;                                                        \
+      for (;;) {                                                               \
+        T nextval = (T)_InterlockedCompareExchange64((__int64 volatile*)addr,  \
+                                                     (__int64)(oldval OP val), \
+                                                     (__int64)oldval);         \
+        if (nextval == oldval) break;                                          \
+        oldval = nextval;                                                      \
+      }                                                                        \
+      _ReadWriteBarrier();                                                     \
+      return oldval;                                                           \
+    }
+
+#  define MSC_FETCHBITOP_CAS(T)                    \
+    MSC_FETCHBITOPX_CAS(T, fetchAndSeqCst, AND_OP) \
+    MSC_FETCHBITOPX_CAS(T, fetchOrSeqCst, OR_OP)   \
+    MSC_FETCHBITOPX_CAS(T, fetchXorSeqCst, XOR_OP)
+
+#endif
+
+namespace js {
+namespace jit {
+
+MSC_FETCHBITOP(int8_t, char, _InterlockedAnd8, _InterlockedOr8,
+               _InterlockedXor8)
+MSC_FETCHBITOP(uint8_t, char, _InterlockedAnd8, _InterlockedOr8,
+               _InterlockedXor8)
+MSC_FETCHBITOP(int16_t, short, _InterlockedAnd16, _InterlockedOr16,
+               _InterlockedXor16)
+MSC_FETCHBITOP(uint16_t, short, _InterlockedAnd16, _InterlockedOr16,
+               _InterlockedXor16)
+MSC_FETCHBITOP(int32_t, long, _InterlockedAnd, _InterlockedOr, _InterlockedXor)
+MSC_FETCHBITOP(uint32_t, long, _InterlockedAnd, _InterlockedOr, _InterlockedXor)
+
+#ifdef _M_IX86
+MSC_FETCHBITOP_CAS(int64_t)
+MSC_FETCHBITOP_CAS(uint64_t)
+#else
+MSC_FETCHBITOP(int64_t, __int64, _InterlockedAnd64, _InterlockedOr64,
+               _InterlockedXor64)
+MSC_FETCHBITOP(uint64_t, __int64, _InterlockedAnd64, _InterlockedOr64,
+               _InterlockedXor64)
+#endif
+
+}  // namespace jit
+}  // namespace js
+
+#undef MSC_FETCHBITOPX_CAS
+#undef MSC_FETCHBITOPX
+#undef MSC_FETCHBITOP_CAS
+#undef MSC_FETCHBITOP
+
+template <typename T>
+inline T js::jit::AtomicOperations::loadSafeWhenRacy(T* addr) {
+  // This is also appropriate for double, int64, and uint64 on 32-bit
+  // platforms since there are no guarantees of access-atomicity.
+  return *addr;
+}
+
+template <typename T>
+inline void js::jit::AtomicOperations::storeSafeWhenRacy(T* addr, T val) {
+  // This is also appropriate for double, int64, and uint64 on 32-bit
+  // platforms since there are no guarantees of access-atomicity.
+  *addr = val;
+}
+
+inline void js::jit::AtomicOperations::memcpySafeWhenRacy(void* dest,
+                                                          const void* src,
+                                                          size_t nbytes) {
+  MOZ_ASSERT(!((char*)dest <= (char*)src && (char*)src < (char*)dest + nbytes));
+  MOZ_ASSERT(!((char*)src <= (char*)dest && (char*)dest < (char*)src + nbytes));
+  ::memcpy(dest, src, nbytes);
+}
+
+inline void js::jit::AtomicOperations::memmoveSafeWhenRacy(void* dest,
+                                                           const void* src,
+                                                           size_t nbytes) {
+  ::memmove(dest, src, nbytes);
+}
+
+#endif  // jit_shared_AtomicOperations_feeling_lucky_msvc_h
--- a/js/src/jit/shared/AtomicOperations-feeling-lucky.h
+++ b/js/src/jit/shared/AtomicOperations-feeling-lucky.h
@ -9,6 +9,8 @@

 #if defined(__clang__) || defined(__GNUC__)
 #  include "jit/shared/AtomicOperations-feeling-lucky-gcc.h"
+#elif defined(_MSC_VER)
+#  include "jit/shared/AtomicOperations-feeling-lucky-msvc.h"
 #else
 #  error "No AtomicOperations support for this platform+compiler combination"
 #endif
--- a/js/src/jit/shared/AtomicOperations-shared-jit.cpp
+++ b/js/src/jit/shared/AtomicOperations-shared-jit.cpp
@ -4,15 +4,156 @@
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

+#include "mozilla/Atomics.h"
+
+#ifdef JS_CODEGEN_ARM
+#  include "jit/arm/Architecture-arm.h"
+#endif
 #include "jit/AtomicOperations.h"
+#include "jit/IonTypes.h"
+#include "jit/MacroAssembler.h"
+#include "jit/RegisterSets.h"
+#include "js/ScalarType.h"  // js::Scalar::Type
+#include "util/Poison.h"

-#ifdef JS_HAVE_GENERATED_ATOMIC_OPS
-
-#  include <atomic>
+#include "jit/MacroAssembler-inl.h"

 using namespace js;
 using namespace js::jit;

+// Assigned registers must follow these rules:
+//
+//  - if they overlap the argument registers (for arguments we use) then they
+//
+//                     M   M   U   U   SSSS  TTTTT
+//          ====\      MM MM   U   U  S        T      /====
+//          =====>     M M M   U   U   SSS     T     <=====
+//          ====/      M   M   U   U      S    T      \====
+//                     M   M    UUU   SSSS     T
+//
+//    require no register movement, even for 64-bit registers.  (If this becomes
+//    too complex to handle then we need to create an abstraction that uses the
+//    MoveResolver, see comments on bug 1394420.)
+//
+//  - they should be volatile when possible so that we don't have to save and
+//    restore them.
+//
+// Note that the functions we're generating have a very limited number of
+// signatures, and the register assignments need only work for these signatures.
+// The signatures are these:
+//
+//   ()
+//   (ptr)
+//   (ptr, val/val64)
+//   (ptr, ptr)
+//   (ptr, val/val64, val/val64)
+//
+// It would be nice to avoid saving and restoring all the nonvolatile registers
+// for all the operations, and instead save and restore only the registers used
+// by each specific operation, but the amount of protocol needed to accomplish
+// that probably does not pay for itself.
+
+#if defined(JS_CODEGEN_X64)
+
+// Selected registers match the argument registers exactly, and none of them
+// overlap the result register.
+
+static const LiveRegisterSet AtomicNonVolatileRegs;
+
+static constexpr Register AtomicPtrReg = IntArgReg0;
+static constexpr Register AtomicPtr2Reg = IntArgReg1;
+static constexpr Register AtomicValReg = IntArgReg1;
+static constexpr Register64 AtomicValReg64(IntArgReg1);
+static constexpr Register AtomicVal2Reg = IntArgReg2;
+static constexpr Register64 AtomicVal2Reg64(IntArgReg2);
+static constexpr Register AtomicTemp = IntArgReg3;
+static constexpr Register64 AtomicTemp64(IntArgReg3);
+
+static constexpr Register64 AtomicReturnReg64 = ReturnReg64;
+
+#elif defined(JS_CODEGEN_ARM64)
+
+// Selected registers match the argument registers, except that the Ptr is not
+// in IntArgReg0 so as not to conflict with the result register.
+
+static const LiveRegisterSet AtomicNonVolatileRegs;
+
+static constexpr Register AtomicPtrReg = IntArgReg4;
+static constexpr Register AtomicPtr2Reg = IntArgReg1;
+static constexpr Register AtomicValReg = IntArgReg1;
+static constexpr Register64 AtomicValReg64(IntArgReg1);
+static constexpr Register AtomicVal2Reg = IntArgReg2;
+static constexpr Register64 AtomicVal2Reg64(IntArgReg2);
+static constexpr Register AtomicTemp = IntArgReg3;
+static constexpr Register64 AtomicTemp64(IntArgReg3);
+
+static constexpr Register64 AtomicReturnReg64 = ReturnReg64;
+
+#elif defined(JS_CODEGEN_ARM)
+
+// Assigned registers except temp are disjoint from the argument registers,
+// since accounting for both 32-bit and 64-bit arguments and constraints on the
+// result register is much too messy.  The temp is in an argument register since
+// it won't be used until we've moved all arguments to other registers.
+//
+// Save LR because it's the second scratch register.  The first scratch register
+// is r12 (IP).  The atomics implementation in the MacroAssembler uses both.
+
+static const LiveRegisterSet AtomicNonVolatileRegs = LiveRegisterSet(
+    GeneralRegisterSet(
+        (uint32_t(1) << Registers::r4) | (uint32_t(1) << Registers::r5) |
+        (uint32_t(1) << Registers::r6) | (uint32_t(1) << Registers::r7) |
+        (uint32_t(1) << Registers::r8) | (uint32_t(1) << Registers::lr)),
+    FloatRegisterSet(0));
+
+static constexpr Register AtomicPtrReg = r8;
+static constexpr Register AtomicPtr2Reg = r6;
+static constexpr Register AtomicTemp = r3;
+static constexpr Register AtomicValReg = r6;
+static constexpr Register64 AtomicValReg64(r7, r6);
+static constexpr Register AtomicVal2Reg = r4;
+static constexpr Register64 AtomicVal2Reg64(r5, r4);
+
+static constexpr Register64 AtomicReturnReg64 = ReturnReg64;
+
+#elif defined(JS_CODEGEN_X86)
+
+// There are no argument registers.
+
+static const LiveRegisterSet AtomicNonVolatileRegs = LiveRegisterSet(
+    GeneralRegisterSet((1 << X86Encoding::rbx) | (1 << X86Encoding::rsi)),
+    FloatRegisterSet(0));
+
+static constexpr Register AtomicPtrReg = esi;
+static constexpr Register AtomicPtr2Reg = ebx;
+static constexpr Register AtomicValReg = ebx;
+static constexpr Register AtomicVal2Reg = ecx;
+static constexpr Register AtomicTemp = edx;
+
+// 64-bit registers for cmpxchg8b.  ValReg/Val2Reg/Temp are not used in this
+// case.
+
+static constexpr Register64 AtomicValReg64(edx, eax);
+static constexpr Register64 AtomicVal2Reg64(ecx, ebx);
+
+// AtomicReturnReg64 is unused on x86.
+
+#else
+#  error "Unsupported platform"
+#endif
+
+// These are useful shorthands and hide the meaningless uint/int distinction.
+
+static constexpr Scalar::Type SIZE8 = Scalar::Uint8;
+static constexpr Scalar::Type SIZE16 = Scalar::Uint16;
+static constexpr Scalar::Type SIZE32 = Scalar::Uint32;
+static constexpr Scalar::Type SIZE64 = Scalar::Int64;
+#ifdef JS_64BIT
+static constexpr Scalar::Type SIZEWORD = SIZE64;
+#else
+static constexpr Scalar::Type SIZEWORD = SIZE32;
+#endif
+
 // A "block" is a sequence of bytes that is a reasonable quantum to copy to
 // amortize call overhead when implementing memcpy and memmove.  A block will
 // not fit in registers on all platforms and copying it without using
@ -25,45 +166,487 @@ using namespace js::jit;
 // Blocks and words can be aligned or unaligned; specific (generated) copying
 // functions handle this in platform-specific ways.

-static constexpr size_t WORDSIZE = sizeof(uintptr_t);
+static constexpr size_t WORDSIZE =
+    sizeof(uintptr_t);                             // Also see SIZEWORD above
 static constexpr size_t BLOCKSIZE = 8 * WORDSIZE;  // Must be a power of 2

 static_assert(BLOCKSIZE % WORDSIZE == 0,
              "A block is an integral number of words");

-// Constants must match the ones in GenerateAtomicOperations.py
-static_assert(JS_GENERATED_ATOMICS_BLOCKSIZE == BLOCKSIZE);
-static_assert(JS_GENERATED_ATOMICS_WORDSIZE == WORDSIZE);
-
 static constexpr size_t WORDMASK = WORDSIZE - 1;
 static constexpr size_t BLOCKMASK = BLOCKSIZE - 1;

+struct ArgIterator {
+  ABIArgGenerator abi;
+  unsigned argBase = 0;
+};
+
+static void GenGprArg(MacroAssembler& masm, MIRType t, ArgIterator* iter,
+                      Register reg) {
+  MOZ_ASSERT(t == MIRType::Pointer || t == MIRType::Int32);
+  ABIArg arg = iter->abi.next(t);
+  switch (arg.kind()) {
+    case ABIArg::GPR: {
+      if (arg.gpr() != reg) {
+        masm.movePtr(arg.gpr(), reg);
+      }
+      break;
+    }
+    case ABIArg::Stack: {
+      Address src(masm.getStackPointer(),
+                  iter->argBase + arg.offsetFromArgBase());
+      masm.loadPtr(src, reg);
+      break;
+    }
+    default: {
+      MOZ_CRASH("Not possible");
+    }
+  }
+}
+
+static void GenGpr64Arg(MacroAssembler& masm, ArgIterator* iter,
+                        Register64 reg) {
+  ABIArg arg = iter->abi.next(MIRType::Int64);
+  switch (arg.kind()) {
+    case ABIArg::GPR: {
+      if (arg.gpr64() != reg) {
+        masm.move64(arg.gpr64(), reg);
+      }
+      break;
+    }
+    case ABIArg::Stack: {
+      Address src(masm.getStackPointer(),
+                  iter->argBase + arg.offsetFromArgBase());
+#ifdef JS_64BIT
+      masm.load64(src, reg);
+#else
+      masm.load32(LowWord(src), reg.low);
+      masm.load32(HighWord(src), reg.high);
+#endif
+      break;
+    }
+#if defined(JS_CODEGEN_REGISTER_PAIR)
+    case ABIArg::GPR_PAIR: {
+      if (arg.gpr64() != reg) {
+        masm.move32(arg.oddGpr(), reg.high);
+        masm.move32(arg.evenGpr(), reg.low);
+      }
+      break;
+    }
+#endif
+    default: {
+      MOZ_CRASH("Not possible");
+    }
+  }
+}
+
+static uint32_t GenPrologue(MacroAssembler& masm, ArgIterator* iter) {
+  masm.assumeUnreachable("Shouldn't get here");
+  masm.flushBuffer();
+  masm.haltingAlign(CodeAlignment);
+  masm.setFramePushed(0);
+  uint32_t start = masm.currentOffset();
+  masm.PushRegsInMask(AtomicNonVolatileRegs);
+#if defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_ARM64)
+  // The return address is among the nonvolatile registers, if pushed at all.
+  iter->argBase = masm.framePushed();
+#elif defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64)
+  // The return address is pushed separately.
+  iter->argBase = sizeof(void*) + masm.framePushed();
+#else
+#  error "Unsupported platform"
+#endif
+  return start;
+}
+
+static void GenEpilogue(MacroAssembler& masm) {
+  masm.PopRegsInMask(AtomicNonVolatileRegs);
+  MOZ_ASSERT(masm.framePushed() == 0);
+#if defined(JS_CODEGEN_ARM64)
+  masm.Ret();
+#elif defined(JS_CODEGEN_ARM)
+  masm.mov(lr, pc);
+#elif defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64)
+  masm.ret();
+#endif
+}
+
+#ifndef JS_64BIT
+static uint32_t GenNop(MacroAssembler& masm) {
+  ArgIterator iter;
+  uint32_t start = GenPrologue(masm, &iter);
+  GenEpilogue(masm);
+  return start;
+}
+#endif
+
+static uint32_t GenFenceSeqCst(MacroAssembler& masm) {
+  ArgIterator iter;
+  uint32_t start = GenPrologue(masm, &iter);
+  masm.memoryBarrier(MembarFull);
+  GenEpilogue(masm);
+  return start;
+}
+
+static uint32_t GenLoad(MacroAssembler& masm, Scalar::Type size,
+                        Synchronization sync) {
+  ArgIterator iter;
+  uint32_t start = GenPrologue(masm, &iter);
+  GenGprArg(masm, MIRType::Pointer, &iter, AtomicPtrReg);
+
+  masm.memoryBarrier(sync.barrierBefore);
+  Address addr(AtomicPtrReg, 0);
+  switch (size) {
+    case SIZE8:
+      masm.load8ZeroExtend(addr, ReturnReg);
+      break;
+    case SIZE16:
+      masm.load16ZeroExtend(addr, ReturnReg);
+      break;
+    case SIZE32:
+      masm.load32(addr, ReturnReg);
+      break;
+    case SIZE64:
+#if defined(JS_64BIT)
+      masm.load64(addr, AtomicReturnReg64);
+      break;
+#else
+      MOZ_CRASH("64-bit atomic load not available on this platform");
+#endif
+    default:
+      MOZ_CRASH("Unknown size");
+  }
+  masm.memoryBarrier(sync.barrierAfter);
+
+  GenEpilogue(masm);
+  return start;
+}
+
+static uint32_t GenStore(MacroAssembler& masm, Scalar::Type size,
+                         Synchronization sync) {
+  ArgIterator iter;
+  uint32_t start = GenPrologue(masm, &iter);
+  GenGprArg(masm, MIRType::Pointer, &iter, AtomicPtrReg);
+
+  masm.memoryBarrier(sync.barrierBefore);
+  Address addr(AtomicPtrReg, 0);
+  switch (size) {
+    case SIZE8:
+      GenGprArg(masm, MIRType::Int32, &iter, AtomicValReg);
+      masm.store8(AtomicValReg, addr);
+      break;
+    case SIZE16:
+      GenGprArg(masm, MIRType::Int32, &iter, AtomicValReg);
+      masm.store16(AtomicValReg, addr);
+      break;
+    case SIZE32:
+      GenGprArg(masm, MIRType::Int32, &iter, AtomicValReg);
+      masm.store32(AtomicValReg, addr);
+      break;
+    case SIZE64:
+#if defined(JS_64BIT)
+      GenGpr64Arg(masm, &iter, AtomicValReg64);
+      masm.store64(AtomicValReg64, addr);
+      break;
+#else
+      MOZ_CRASH("64-bit atomic store not available on this platform");
+#endif
+    default:
+      MOZ_CRASH("Unknown size");
+  }
+  masm.memoryBarrier(sync.barrierAfter);
+
+  GenEpilogue(masm);
+  return start;
+}
+
+enum class CopyDir {
+  DOWN,  // Move data down, ie, iterate toward higher addresses
+  UP     // The other way
+};
+
+static uint32_t GenCopy(MacroAssembler& masm, Scalar::Type size,
+                        uint32_t unroll, CopyDir direction) {
+  ArgIterator iter;
+  uint32_t start = GenPrologue(masm, &iter);
+
+  Register dest = AtomicPtrReg;
+  Register src = AtomicPtr2Reg;
+
+  GenGprArg(masm, MIRType::Pointer, &iter, dest);
+  GenGprArg(masm, MIRType::Pointer, &iter, src);
+
+  uint32_t offset = direction == CopyDir::DOWN ? 0 : unroll - 1;
+  for (uint32_t i = 0; i < unroll; i++) {
+    switch (size) {
+      case SIZE8:
+        masm.load8ZeroExtend(Address(src, offset), AtomicTemp);
+        masm.store8(AtomicTemp, Address(dest, offset));
+        break;
+      case SIZE16:
+        masm.load16ZeroExtend(Address(src, offset * 2), AtomicTemp);
+        masm.store16(AtomicTemp, Address(dest, offset * 2));
+        break;
+      case SIZE32:
+        masm.load32(Address(src, offset * 4), AtomicTemp);
+        masm.store32(AtomicTemp, Address(dest, offset * 4));
+        break;
+      case SIZE64:
+#if defined(JS_64BIT)
+        masm.load64(Address(src, offset * 8), AtomicTemp64);
+        masm.store64(AtomicTemp64, Address(dest, offset * 8));
+        break;
+#else
+        MOZ_CRASH("64-bit atomic load/store not available on this platform");
+#endif
+      default:
+        MOZ_CRASH("Unknown size");
+    }
+    offset += direction == CopyDir::DOWN ? 1 : -1;
+  }
+
+  GenEpilogue(masm);
+  return start;
+}
+
+static uint32_t GenCmpxchg(MacroAssembler& masm, Scalar::Type size,
+                           Synchronization sync) {
+  ArgIterator iter;
+  uint32_t start = GenPrologue(masm, &iter);
+  GenGprArg(masm, MIRType::Pointer, &iter, AtomicPtrReg);
+
+  Address addr(AtomicPtrReg, 0);
+  switch (size) {
+    case SIZE8:
+    case SIZE16:
+    case SIZE32:
+      GenGprArg(masm, MIRType::Int32, &iter, AtomicValReg);
+      GenGprArg(masm, MIRType::Int32, &iter, AtomicVal2Reg);
+      masm.compareExchange(size, sync, addr, AtomicValReg, AtomicVal2Reg,
+                           ReturnReg);
+      break;
+    case SIZE64:
+      GenGpr64Arg(masm, &iter, AtomicValReg64);
+      GenGpr64Arg(masm, &iter, AtomicVal2Reg64);
+#if defined(JS_CODEGEN_X86)
+      static_assert(AtomicValReg64 == Register64(edx, eax));
+      static_assert(AtomicVal2Reg64 == Register64(ecx, ebx));
+
+      // The return register edx:eax is a compiler/ABI assumption that is not
+      // necessarily the same as ReturnReg64, so it's correct not to use
+      // ReturnReg64 here.
+      masm.lock_cmpxchg8b(edx, eax, ecx, ebx, Operand(addr));
+#else
+      masm.compareExchange64(sync, addr, AtomicValReg64, AtomicVal2Reg64,
+                             AtomicReturnReg64);
+#endif
+      break;
+    default:
+      MOZ_CRASH("Unknown size");
+  }
+
+  GenEpilogue(masm);
+  return start;
+}
+
+static uint32_t GenExchange(MacroAssembler& masm, Scalar::Type size,
+                            Synchronization sync) {
+  ArgIterator iter;
+  uint32_t start = GenPrologue(masm, &iter);
+  GenGprArg(masm, MIRType::Pointer, &iter, AtomicPtrReg);
+
+  Address addr(AtomicPtrReg, 0);
+  switch (size) {
+    case SIZE8:
+    case SIZE16:
+    case SIZE32:
+      GenGprArg(masm, MIRType::Int32, &iter, AtomicValReg);
+      masm.atomicExchange(size, sync, addr, AtomicValReg, ReturnReg);
+      break;
+    case SIZE64:
+#if defined(JS_64BIT)
+      GenGpr64Arg(masm, &iter, AtomicValReg64);
+      masm.atomicExchange64(sync, addr, AtomicValReg64, AtomicReturnReg64);
+      break;
+#else
+      MOZ_CRASH("64-bit atomic exchange not available on this platform");
+#endif
+    default:
+      MOZ_CRASH("Unknown size");
+  }
+
+  GenEpilogue(masm);
+  return start;
+}
+
+static uint32_t GenFetchOp(MacroAssembler& masm, Scalar::Type size, AtomicOp op,
+                           Synchronization sync) {
+  ArgIterator iter;
+  uint32_t start = GenPrologue(masm, &iter);
+  GenGprArg(masm, MIRType::Pointer, &iter, AtomicPtrReg);
+
+  Address addr(AtomicPtrReg, 0);
+  switch (size) {
+    case SIZE8:
+    case SIZE16:
+    case SIZE32: {
+#if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64)
+      Register tmp = op == AtomicFetchAddOp || op == AtomicFetchSubOp
+                         ? Register::Invalid()
+                         : AtomicTemp;
+#else
+      Register tmp = AtomicTemp;
+#endif
+      GenGprArg(masm, MIRType::Int32, &iter, AtomicValReg);
+      masm.atomicFetchOp(size, sync, op, AtomicValReg, addr, tmp, ReturnReg);
+      break;
+    }
+    case SIZE64: {
+#if defined(JS_64BIT)
+#  if defined(JS_CODEGEN_X64)
+      Register64 tmp = op == AtomicFetchAddOp || op == AtomicFetchSubOp
+                           ? Register64::Invalid()
+                           : AtomicTemp64;
+#  else
+      Register64 tmp = AtomicTemp64;
+#  endif
+      GenGpr64Arg(masm, &iter, AtomicValReg64);
+      masm.atomicFetchOp64(sync, op, AtomicValReg64, addr, tmp,
+                           AtomicReturnReg64);
+      break;
+#else
+      MOZ_CRASH("64-bit atomic fetchOp not available on this platform");
+#endif
+    }
+    default:
+      MOZ_CRASH("Unknown size");
+  }
+
+  GenEpilogue(masm);
+  return start;
+}
+
 namespace js {
 namespace jit {

+void (*AtomicFenceSeqCst)();
+
+#ifndef JS_64BIT
+void (*AtomicCompilerFence)();
+#endif
+
+uint8_t (*AtomicLoad8SeqCst)(const uint8_t* addr);
+uint16_t (*AtomicLoad16SeqCst)(const uint16_t* addr);
+uint32_t (*AtomicLoad32SeqCst)(const uint32_t* addr);
+#ifdef JS_64BIT
+uint64_t (*AtomicLoad64SeqCst)(const uint64_t* addr);
+#endif
+
+uint8_t (*AtomicLoad8Unsynchronized)(const uint8_t* addr);
+uint16_t (*AtomicLoad16Unsynchronized)(const uint16_t* addr);
+uint32_t (*AtomicLoad32Unsynchronized)(const uint32_t* addr);
+#ifdef JS_64BIT
+uint64_t (*AtomicLoad64Unsynchronized)(const uint64_t* addr);
+#endif
+
+uint8_t (*AtomicStore8SeqCst)(uint8_t* addr, uint8_t val);
+uint16_t (*AtomicStore16SeqCst)(uint16_t* addr, uint16_t val);
+uint32_t (*AtomicStore32SeqCst)(uint32_t* addr, uint32_t val);
+#ifdef JS_64BIT
+uint64_t (*AtomicStore64SeqCst)(uint64_t* addr, uint64_t val);
+#endif
+
+uint8_t (*AtomicStore8Unsynchronized)(uint8_t* addr, uint8_t val);
+uint16_t (*AtomicStore16Unsynchronized)(uint16_t* addr, uint16_t val);
+uint32_t (*AtomicStore32Unsynchronized)(uint32_t* addr, uint32_t val);
+#ifdef JS_64BIT
+uint64_t (*AtomicStore64Unsynchronized)(uint64_t* addr, uint64_t val);
+#endif
+
+// See the definitions of BLOCKSIZE and WORDSIZE earlier.  The "unaligned"
+// functions perform individual byte copies (and must always be "down" or "up").
+// The others ignore alignment issues, and thus either depend on unaligned
+// accesses being OK or not being invoked on unaligned addresses.
+//
+// src and dest point to the lower addresses of the respective data areas
+// irrespective of "up" or "down".
+
+static void (*AtomicCopyUnalignedBlockDownUnsynchronized)(uint8_t* dest,
+                                                          const uint8_t* src);
+static void (*AtomicCopyUnalignedBlockUpUnsynchronized)(uint8_t* dest,
+                                                        const uint8_t* src);
+static void (*AtomicCopyUnalignedWordDownUnsynchronized)(uint8_t* dest,
+                                                         const uint8_t* src);
+static void (*AtomicCopyUnalignedWordUpUnsynchronized)(uint8_t* dest,
+                                                       const uint8_t* src);
+
+static void (*AtomicCopyBlockDownUnsynchronized)(uint8_t* dest,
+                                                 const uint8_t* src);
+static void (*AtomicCopyBlockUpUnsynchronized)(uint8_t* dest,
+                                               const uint8_t* src);
+static void (*AtomicCopyWordUnsynchronized)(uint8_t* dest, const uint8_t* src);
+static void (*AtomicCopyByteUnsynchronized)(uint8_t* dest, const uint8_t* src);
+
+uint8_t (*AtomicCmpXchg8SeqCst)(uint8_t* addr, uint8_t oldval, uint8_t newval);
+uint16_t (*AtomicCmpXchg16SeqCst)(uint16_t* addr, uint16_t oldval,
+                                  uint16_t newval);
+uint32_t (*AtomicCmpXchg32SeqCst)(uint32_t* addr, uint32_t oldval,
+                                  uint32_t newval);
+uint64_t (*AtomicCmpXchg64SeqCst)(uint64_t* addr, uint64_t oldval,
+                                  uint64_t newval);
+
+uint8_t (*AtomicExchange8SeqCst)(uint8_t* addr, uint8_t val);
+uint16_t (*AtomicExchange16SeqCst)(uint16_t* addr, uint16_t val);
+uint32_t (*AtomicExchange32SeqCst)(uint32_t* addr, uint32_t val);
+#ifdef JS_64BIT
+uint64_t (*AtomicExchange64SeqCst)(uint64_t* addr, uint64_t val);
+#endif
+
+uint8_t (*AtomicAdd8SeqCst)(uint8_t* addr, uint8_t val);
+uint16_t (*AtomicAdd16SeqCst)(uint16_t* addr, uint16_t val);
+uint32_t (*AtomicAdd32SeqCst)(uint32_t* addr, uint32_t val);
+#ifdef JS_64BIT
+uint64_t (*AtomicAdd64SeqCst)(uint64_t* addr, uint64_t val);
+#endif
+
+uint8_t (*AtomicAnd8SeqCst)(uint8_t* addr, uint8_t val);
+uint16_t (*AtomicAnd16SeqCst)(uint16_t* addr, uint16_t val);
+uint32_t (*AtomicAnd32SeqCst)(uint32_t* addr, uint32_t val);
+#ifdef JS_64BIT
+uint64_t (*AtomicAnd64SeqCst)(uint64_t* addr, uint64_t val);
+#endif
+
+uint8_t (*AtomicOr8SeqCst)(uint8_t* addr, uint8_t val);
+uint16_t (*AtomicOr16SeqCst)(uint16_t* addr, uint16_t val);
+uint32_t (*AtomicOr32SeqCst)(uint32_t* addr, uint32_t val);
+#ifdef JS_64BIT
+uint64_t (*AtomicOr64SeqCst)(uint64_t* addr, uint64_t val);
+#endif
+
+uint8_t (*AtomicXor8SeqCst)(uint8_t* addr, uint8_t val);
+uint16_t (*AtomicXor16SeqCst)(uint16_t* addr, uint16_t val);
+uint32_t (*AtomicXor32SeqCst)(uint32_t* addr, uint32_t val);
+#ifdef JS_64BIT
+uint64_t (*AtomicXor64SeqCst)(uint64_t* addr, uint64_t val);
+#endif
+
 static bool UnalignedAccessesAreOK() {
-#  ifdef DEBUG
+#ifdef DEBUG
  const char* flag = getenv("JS_NO_UNALIGNED_MEMCPY");
  if (flag && *flag == '1') return false;
-#  endif
-#  if defined(__x86_64__) || defined(__i386__)
+#endif
+#if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64)
  return true;
-#  elif defined(__arm__)
+#elif defined(JS_CODEGEN_ARM)
  return !HasAlignmentFault();
-#  elif defined(__aarch64__)
+#elif defined(JS_CODEGEN_ARM64)
  // This is not necessarily true but it's the best guess right now.
  return true;
-#  else
-#    error "Unsupported platform"
-#  endif
+#else
+#  error "Unsupported platform"
+#endif
 }

-#  ifndef JS_64BIT
-void AtomicCompilerFence() {
-  std::atomic_signal_fence(std::memory_order_acq_rel);
-}
-#  endif
-
 void AtomicMemcpyDownUnsynchronized(uint8_t* dest, const uint8_t* src,
                                    size_t nbytes) {
  const uint8_t* lim = src + nbytes;
@ -164,7 +747,293 @@ void AtomicMemcpyUpUnsynchronized(uint8_t* dest, const uint8_t* src,
  }
 }

+// These will be read and written only by the main thread during startup and
+// shutdown.
+
+static uint8_t* codeSegment;
+static uint32_t codeSegmentSize;
+
+bool InitializeJittedAtomics() {
+  // We should only initialize once.
+  MOZ_ASSERT(!codeSegment);
+
+  LifoAlloc lifo(4096);
+  TempAllocator alloc(&lifo);
+  JitContext jcx(&alloc);
+  StackMacroAssembler masm;
+  AutoCreatedBy acb(masm, "InitializeJittedAtomics");
+
+  uint32_t fenceSeqCst = GenFenceSeqCst(masm);
+
+#ifndef JS_64BIT
+  uint32_t nop = GenNop(masm);
+#endif
+
+  Synchronization Full = Synchronization::Full();
+  Synchronization None = Synchronization::None();
+
+  uint32_t load8SeqCst = GenLoad(masm, SIZE8, Full);
+  uint32_t load16SeqCst = GenLoad(masm, SIZE16, Full);
+  uint32_t load32SeqCst = GenLoad(masm, SIZE32, Full);
+#ifdef JS_64BIT
+  uint32_t load64SeqCst = GenLoad(masm, SIZE64, Full);
+#endif
+
+  uint32_t load8Unsynchronized = GenLoad(masm, SIZE8, None);
+  uint32_t load16Unsynchronized = GenLoad(masm, SIZE16, None);
+  uint32_t load32Unsynchronized = GenLoad(masm, SIZE32, None);
+#ifdef JS_64BIT
+  uint32_t load64Unsynchronized = GenLoad(masm, SIZE64, None);
+#endif
+
+  uint32_t store8SeqCst = GenStore(masm, SIZE8, Full);
+  uint32_t store16SeqCst = GenStore(masm, SIZE16, Full);
+  uint32_t store32SeqCst = GenStore(masm, SIZE32, Full);
+#ifdef JS_64BIT
+  uint32_t store64SeqCst = GenStore(masm, SIZE64, Full);
+#endif
+
+  uint32_t store8Unsynchronized = GenStore(masm, SIZE8, None);
+  uint32_t store16Unsynchronized = GenStore(masm, SIZE16, None);
+  uint32_t store32Unsynchronized = GenStore(masm, SIZE32, None);
+#ifdef JS_64BIT
+  uint32_t store64Unsynchronized = GenStore(masm, SIZE64, None);
+#endif
+
+  uint32_t copyUnalignedBlockDownUnsynchronized =
+      GenCopy(masm, SIZE8, BLOCKSIZE, CopyDir::DOWN);
+  uint32_t copyUnalignedBlockUpUnsynchronized =
+      GenCopy(masm, SIZE8, BLOCKSIZE, CopyDir::UP);
+  uint32_t copyUnalignedWordDownUnsynchronized =
+      GenCopy(masm, SIZE8, WORDSIZE, CopyDir::DOWN);
+  uint32_t copyUnalignedWordUpUnsynchronized =
+      GenCopy(masm, SIZE8, WORDSIZE, CopyDir::UP);
+
+  uint32_t copyBlockDownUnsynchronized =
+      GenCopy(masm, SIZEWORD, BLOCKSIZE / WORDSIZE, CopyDir::DOWN);
+  uint32_t copyBlockUpUnsynchronized =
+      GenCopy(masm, SIZEWORD, BLOCKSIZE / WORDSIZE, CopyDir::UP);
+  uint32_t copyWordUnsynchronized = GenCopy(masm, SIZEWORD, 1, CopyDir::DOWN);
+  uint32_t copyByteUnsynchronized = GenCopy(masm, SIZE8, 1, CopyDir::DOWN);
+
+  uint32_t cmpxchg8SeqCst = GenCmpxchg(masm, SIZE8, Full);
+  uint32_t cmpxchg16SeqCst = GenCmpxchg(masm, SIZE16, Full);
+  uint32_t cmpxchg32SeqCst = GenCmpxchg(masm, SIZE32, Full);
+  uint32_t cmpxchg64SeqCst = GenCmpxchg(masm, SIZE64, Full);
+
+  uint32_t exchange8SeqCst = GenExchange(masm, SIZE8, Full);
+  uint32_t exchange16SeqCst = GenExchange(masm, SIZE16, Full);
+  uint32_t exchange32SeqCst = GenExchange(masm, SIZE32, Full);
+#ifdef JS_64BIT
+  uint32_t exchange64SeqCst = GenExchange(masm, SIZE64, Full);
+#endif
+
+  uint32_t add8SeqCst = GenFetchOp(masm, SIZE8, AtomicFetchAddOp, Full);
+  uint32_t add16SeqCst = GenFetchOp(masm, SIZE16, AtomicFetchAddOp, Full);
+  uint32_t add32SeqCst = GenFetchOp(masm, SIZE32, AtomicFetchAddOp, Full);
+#ifdef JS_64BIT
+  uint32_t add64SeqCst = GenFetchOp(masm, SIZE64, AtomicFetchAddOp, Full);
+#endif
+
+  uint32_t and8SeqCst = GenFetchOp(masm, SIZE8, AtomicFetchAndOp, Full);
+  uint32_t and16SeqCst = GenFetchOp(masm, SIZE16, AtomicFetchAndOp, Full);
+  uint32_t and32SeqCst = GenFetchOp(masm, SIZE32, AtomicFetchAndOp, Full);
+#ifdef JS_64BIT
+  uint32_t and64SeqCst = GenFetchOp(masm, SIZE64, AtomicFetchAndOp, Full);
+#endif
+
+  uint32_t or8SeqCst = GenFetchOp(masm, SIZE8, AtomicFetchOrOp, Full);
+  uint32_t or16SeqCst = GenFetchOp(masm, SIZE16, AtomicFetchOrOp, Full);
+  uint32_t or32SeqCst = GenFetchOp(masm, SIZE32, AtomicFetchOrOp, Full);
+#ifdef JS_64BIT
+  uint32_t or64SeqCst = GenFetchOp(masm, SIZE64, AtomicFetchOrOp, Full);
+#endif
+
+  uint32_t xor8SeqCst = GenFetchOp(masm, SIZE8, AtomicFetchXorOp, Full);
+  uint32_t xor16SeqCst = GenFetchOp(masm, SIZE16, AtomicFetchXorOp, Full);
+  uint32_t xor32SeqCst = GenFetchOp(masm, SIZE32, AtomicFetchXorOp, Full);
+#ifdef JS_64BIT
+  uint32_t xor64SeqCst = GenFetchOp(masm, SIZE64, AtomicFetchXorOp, Full);
+#endif
+
+  masm.finish();
+  if (masm.oom()) {
+    return false;
+  }
+
+  // Allocate executable memory.
+  uint32_t codeLength = masm.bytesNeeded();
+  size_t roundedCodeLength = RoundUp(codeLength, ExecutableCodePageSize);
+  uint8_t* code = (uint8_t*)AllocateExecutableMemory(
+      roundedCodeLength, ProtectionSetting::Writable,
+      MemCheckKind::MakeUndefined);
+  if (!code) {
+    return false;
+  }
+
+  // Zero the padding.
+  memset(code + codeLength, 0, roundedCodeLength - codeLength);
+
+  // Copy the code into place.
+  masm.executableCopy(code);
+
+  // Reprotect the whole region to avoid having separate RW and RX mappings.
+  if (!ExecutableAllocator::makeExecutableAndFlushICache(
+          FlushICacheSpec::LocalThreadOnly, code, roundedCodeLength)) {
+    DeallocateExecutableMemory(code, roundedCodeLength);
+    return false;
+  }
+
+  // Create the function pointers.
+
+  AtomicFenceSeqCst = (void (*)())(code + fenceSeqCst);
+
+#ifndef JS_64BIT
+  AtomicCompilerFence = (void (*)())(code + nop);
+#endif
+
+  AtomicLoad8SeqCst = (uint8_t(*)(const uint8_t* addr))(code + load8SeqCst);
+  AtomicLoad16SeqCst = (uint16_t(*)(const uint16_t* addr))(code + load16SeqCst);
+  AtomicLoad32SeqCst = (uint32_t(*)(const uint32_t* addr))(code + load32SeqCst);
+#ifdef JS_64BIT
+  AtomicLoad64SeqCst = (uint64_t(*)(const uint64_t* addr))(code + load64SeqCst);
+#endif
+
+  AtomicLoad8Unsynchronized =
+      (uint8_t(*)(const uint8_t* addr))(code + load8Unsynchronized);
+  AtomicLoad16Unsynchronized =
+      (uint16_t(*)(const uint16_t* addr))(code + load16Unsynchronized);
+  AtomicLoad32Unsynchronized =
+      (uint32_t(*)(const uint32_t* addr))(code + load32Unsynchronized);
+#ifdef JS_64BIT
+  AtomicLoad64Unsynchronized =
+      (uint64_t(*)(const uint64_t* addr))(code + load64Unsynchronized);
+#endif
+
+  AtomicStore8SeqCst =
+      (uint8_t(*)(uint8_t * addr, uint8_t val))(code + store8SeqCst);
+  AtomicStore16SeqCst =
+      (uint16_t(*)(uint16_t * addr, uint16_t val))(code + store16SeqCst);
+  AtomicStore32SeqCst =
+      (uint32_t(*)(uint32_t * addr, uint32_t val))(code + store32SeqCst);
+#ifdef JS_64BIT
+  AtomicStore64SeqCst =
+      (uint64_t(*)(uint64_t * addr, uint64_t val))(code + store64SeqCst);
+#endif
+
+  AtomicStore8Unsynchronized =
+      (uint8_t(*)(uint8_t * addr, uint8_t val))(code + store8Unsynchronized);
+  AtomicStore16Unsynchronized = (uint16_t(*)(uint16_t * addr, uint16_t val))(
+      code + store16Unsynchronized);
+  AtomicStore32Unsynchronized = (uint32_t(*)(uint32_t * addr, uint32_t val))(
+      code + store32Unsynchronized);
+#ifdef JS_64BIT
+  AtomicStore64Unsynchronized = (uint64_t(*)(uint64_t * addr, uint64_t val))(
+      code + store64Unsynchronized);
+#endif
+
+  AtomicCopyUnalignedBlockDownUnsynchronized =
+      (void (*)(uint8_t * dest, const uint8_t* src))(
+          code + copyUnalignedBlockDownUnsynchronized);
+  AtomicCopyUnalignedBlockUpUnsynchronized =
+      (void (*)(uint8_t * dest, const uint8_t* src))(
+          code + copyUnalignedBlockUpUnsynchronized);
+  AtomicCopyUnalignedWordDownUnsynchronized =
+      (void (*)(uint8_t * dest, const uint8_t* src))(
+          code + copyUnalignedWordDownUnsynchronized);
+  AtomicCopyUnalignedWordUpUnsynchronized =
+      (void (*)(uint8_t * dest, const uint8_t* src))(
+          code + copyUnalignedWordUpUnsynchronized);
+
+  AtomicCopyBlockDownUnsynchronized = (void (*)(
+      uint8_t * dest, const uint8_t* src))(code + copyBlockDownUnsynchronized);
+  AtomicCopyBlockUpUnsynchronized = (void (*)(
+      uint8_t * dest, const uint8_t* src))(code + copyBlockUpUnsynchronized);
+  AtomicCopyWordUnsynchronized = (void (*)(uint8_t * dest, const uint8_t* src))(
+      code + copyWordUnsynchronized);
+  AtomicCopyByteUnsynchronized = (void (*)(uint8_t * dest, const uint8_t* src))(
+      code + copyByteUnsynchronized);
+
+  AtomicCmpXchg8SeqCst = (uint8_t(*)(uint8_t * addr, uint8_t oldval,
+                                     uint8_t newval))(code + cmpxchg8SeqCst);
+  AtomicCmpXchg16SeqCst =
+      (uint16_t(*)(uint16_t * addr, uint16_t oldval, uint16_t newval))(
+          code + cmpxchg16SeqCst);
+  AtomicCmpXchg32SeqCst =
+      (uint32_t(*)(uint32_t * addr, uint32_t oldval, uint32_t newval))(
+          code + cmpxchg32SeqCst);
+  AtomicCmpXchg64SeqCst =
+      (uint64_t(*)(uint64_t * addr, uint64_t oldval, uint64_t newval))(
+          code + cmpxchg64SeqCst);
+
+  AtomicExchange8SeqCst =
+      (uint8_t(*)(uint8_t * addr, uint8_t val))(code + exchange8SeqCst);
+  AtomicExchange16SeqCst =
+      (uint16_t(*)(uint16_t * addr, uint16_t val))(code + exchange16SeqCst);
+  AtomicExchange32SeqCst =
+      (uint32_t(*)(uint32_t * addr, uint32_t val))(code + exchange32SeqCst);
+#ifdef JS_64BIT
+  AtomicExchange64SeqCst =
+      (uint64_t(*)(uint64_t * addr, uint64_t val))(code + exchange64SeqCst);
+#endif
+
+  AtomicAdd8SeqCst =
+      (uint8_t(*)(uint8_t * addr, uint8_t val))(code + add8SeqCst);
+  AtomicAdd16SeqCst =
+      (uint16_t(*)(uint16_t * addr, uint16_t val))(code + add16SeqCst);
+  AtomicAdd32SeqCst =
+      (uint32_t(*)(uint32_t * addr, uint32_t val))(code + add32SeqCst);
+#ifdef JS_64BIT
+  AtomicAdd64SeqCst =
+      (uint64_t(*)(uint64_t * addr, uint64_t val))(code + add64SeqCst);
+#endif
+
+  AtomicAnd8SeqCst =
+      (uint8_t(*)(uint8_t * addr, uint8_t val))(code + and8SeqCst);
+  AtomicAnd16SeqCst =
+      (uint16_t(*)(uint16_t * addr, uint16_t val))(code + and16SeqCst);
+  AtomicAnd32SeqCst =
+      (uint32_t(*)(uint32_t * addr, uint32_t val))(code + and32SeqCst);
+#ifdef JS_64BIT
+  AtomicAnd64SeqCst =
+      (uint64_t(*)(uint64_t * addr, uint64_t val))(code + and64SeqCst);
+#endif
+
+  AtomicOr8SeqCst = (uint8_t(*)(uint8_t * addr, uint8_t val))(code + or8SeqCst);
+  AtomicOr16SeqCst =
+      (uint16_t(*)(uint16_t * addr, uint16_t val))(code + or16SeqCst);
+  AtomicOr32SeqCst =
+      (uint32_t(*)(uint32_t * addr, uint32_t val))(code + or32SeqCst);
+#ifdef JS_64BIT
+  AtomicOr64SeqCst =
+      (uint64_t(*)(uint64_t * addr, uint64_t val))(code + or64SeqCst);
+#endif
+
+  AtomicXor8SeqCst =
+      (uint8_t(*)(uint8_t * addr, uint8_t val))(code + xor8SeqCst);
+  AtomicXor16SeqCst =
+      (uint16_t(*)(uint16_t * addr, uint16_t val))(code + xor16SeqCst);
+  AtomicXor32SeqCst =
+      (uint32_t(*)(uint32_t * addr, uint32_t val))(code + xor32SeqCst);
+#ifdef JS_64BIT
+  AtomicXor64SeqCst =
+      (uint64_t(*)(uint64_t * addr, uint64_t val))(code + xor64SeqCst);
+#endif
+
+  codeSegment = code;
+  codeSegmentSize = roundedCodeLength;
+
+  return true;
+}
+
+void ShutDownJittedAtomics() {
+  // Must have been initialized.
+  MOZ_ASSERT(codeSegment);
+
+  DeallocateExecutableMemory(codeSegment, codeSegmentSize);
+  codeSegment = nullptr;
+  codeSegmentSize = 0;
+}
+
 }  // namespace jit
 }  // namespace js
-
-#endif  // JS_HAVE_GENERATED_ATOMIC_OPS
--- a/js/src/jit/shared/AtomicOperations-shared-jit.h
+++ b/js/src/jit/shared/AtomicOperations-shared-jit.h
@ -22,19 +22,118 @@
 #include <stddef.h>
 #include <stdint.h>

-#include "jit/AtomicOperationsGenerated.h"
+#include "js/GCAPI.h"
 #include "vm/Uint8Clamped.h"

 namespace js {
 namespace jit {

+// The function pointers in this section all point to jitted code.
+//
+// On 32-bit systems we assume for simplicity's sake that we don't have any
+// 64-bit atomic operations except cmpxchg (this is a concession to x86 but it's
+// not a hardship).  On 32-bit systems we therefore implement other 64-bit
+// atomic operations in terms of cmpxchg along with some C++ code and a local
+// reordering fence to prevent other loads and stores from being intermingled
+// with operations in the implementation of the atomic.
+
+// `fence` performs a full memory barrier.
+extern void (*AtomicFenceSeqCst)();
+
 #ifndef JS_64BIT
-// `AtomicCompilerFence` erects a reordering boundary for operations on the
-// current thread.  We use it to prevent the compiler from reordering loads and
-// stores inside larger primitives that are synthesized from cmpxchg.
-extern void AtomicCompilerFence();
+// `compiler_fence` erects a reordering boundary for operations on the current
+// thread.  We use it to prevent the compiler from reordering loads and stores
+// inside larger primitives that are synthesized from cmpxchg.
+extern void (*AtomicCompilerFence)();
 #endif

+extern uint8_t (*AtomicLoad8SeqCst)(const uint8_t* addr);
+extern uint16_t (*AtomicLoad16SeqCst)(const uint16_t* addr);
+extern uint32_t (*AtomicLoad32SeqCst)(const uint32_t* addr);
+#ifdef JS_64BIT
+extern uint64_t (*AtomicLoad64SeqCst)(const uint64_t* addr);
+#endif
+
+// These are access-atomic up to sizeof(uintptr_t).
+extern uint8_t (*AtomicLoad8Unsynchronized)(const uint8_t* addr);
+extern uint16_t (*AtomicLoad16Unsynchronized)(const uint16_t* addr);
+extern uint32_t (*AtomicLoad32Unsynchronized)(const uint32_t* addr);
+#ifdef JS_64BIT
+extern uint64_t (*AtomicLoad64Unsynchronized)(const uint64_t* addr);
+#endif
+
+extern uint8_t (*AtomicStore8SeqCst)(uint8_t* addr, uint8_t val);
+extern uint16_t (*AtomicStore16SeqCst)(uint16_t* addr, uint16_t val);
+extern uint32_t (*AtomicStore32SeqCst)(uint32_t* addr, uint32_t val);
+#ifdef JS_64BIT
+extern uint64_t (*AtomicStore64SeqCst)(uint64_t* addr, uint64_t val);
+#endif
+
+// These are access-atomic up to sizeof(uintptr_t).
+extern uint8_t (*AtomicStore8Unsynchronized)(uint8_t* addr, uint8_t val);
+extern uint16_t (*AtomicStore16Unsynchronized)(uint16_t* addr, uint16_t val);
+extern uint32_t (*AtomicStore32Unsynchronized)(uint32_t* addr, uint32_t val);
+#ifdef JS_64BIT
+extern uint64_t (*AtomicStore64Unsynchronized)(uint64_t* addr, uint64_t val);
+#endif
+
+// `exchange` takes a cell address and a value.  It stores it in the cell and
+// returns the value previously in the cell.
+extern uint8_t (*AtomicExchange8SeqCst)(uint8_t* addr, uint8_t val);
+extern uint16_t (*AtomicExchange16SeqCst)(uint16_t* addr, uint16_t val);
+extern uint32_t (*AtomicExchange32SeqCst)(uint32_t* addr, uint32_t val);
+#ifdef JS_64BIT
+extern uint64_t (*AtomicExchange64SeqCst)(uint64_t* addr, uint64_t val);
+#endif
+
+// `add` adds a value atomically to the cell and returns the old value in the
+// cell.  (There is no `sub`; just add the negated value.)
+extern uint8_t (*AtomicAdd8SeqCst)(uint8_t* addr, uint8_t val);
+extern uint16_t (*AtomicAdd16SeqCst)(uint16_t* addr, uint16_t val);
+extern uint32_t (*AtomicAdd32SeqCst)(uint32_t* addr, uint32_t val);
+#ifdef JS_64BIT
+extern uint64_t (*AtomicAdd64SeqCst)(uint64_t* addr, uint64_t val);
+#endif
+
+// `and` bitwise-ands a value atomically into the cell and returns the old value
+// in the cell.
+extern uint8_t (*AtomicAnd8SeqCst)(uint8_t* addr, uint8_t val);
+extern uint16_t (*AtomicAnd16SeqCst)(uint16_t* addr, uint16_t val);
+extern uint32_t (*AtomicAnd32SeqCst)(uint32_t* addr, uint32_t val);
+#ifdef JS_64BIT
+extern uint64_t (*AtomicAnd64SeqCst)(uint64_t* addr, uint64_t val);
+#endif
+
+// `or` bitwise-ors a value atomically into the cell and returns the old value
+// in the cell.
+extern uint8_t (*AtomicOr8SeqCst)(uint8_t* addr, uint8_t val);
+extern uint16_t (*AtomicOr16SeqCst)(uint16_t* addr, uint16_t val);
+extern uint32_t (*AtomicOr32SeqCst)(uint32_t* addr, uint32_t val);
+#ifdef JS_64BIT
+extern uint64_t (*AtomicOr64SeqCst)(uint64_t* addr, uint64_t val);
+#endif
+
+// `xor` bitwise-xors a value atomically into the cell and returns the old value
+// in the cell.
+extern uint8_t (*AtomicXor8SeqCst)(uint8_t* addr, uint8_t val);
+extern uint16_t (*AtomicXor16SeqCst)(uint16_t* addr, uint16_t val);
+extern uint32_t (*AtomicXor32SeqCst)(uint32_t* addr, uint32_t val);
+#ifdef JS_64BIT
+extern uint64_t (*AtomicXor64SeqCst)(uint64_t* addr, uint64_t val);
+#endif
+
+// `cmpxchg` takes a cell address, an expected value and a replacement value.
+// If the value in the cell equals the expected value then the replacement value
+// is stored in the cell.  It always returns the value previously in the cell.
+extern uint8_t (*AtomicCmpXchg8SeqCst)(uint8_t* addr, uint8_t oldval,
+                                       uint8_t newval);
+extern uint16_t (*AtomicCmpXchg16SeqCst)(uint16_t* addr, uint16_t oldval,
+                                         uint16_t newval);
+extern uint32_t (*AtomicCmpXchg32SeqCst)(uint32_t* addr, uint32_t oldval,
+                                         uint32_t newval);
+extern uint64_t (*AtomicCmpXchg64SeqCst)(uint64_t* addr, uint64_t oldval,
+                                         uint64_t newval);
+
 // `...MemcpyDown` moves bytes toward lower addresses in memory: dest <= src.
 // `...MemcpyUp` moves bytes toward higher addresses in memory: dest >= src.
 extern void AtomicMemcpyDownUnsynchronized(uint8_t* dest, const uint8_t* src,
@ -54,6 +153,7 @@ inline void js::jit::AtomicOperations::fenceSeqCst() { AtomicFenceSeqCst(); }
 #define JIT_LOADOP(T, U, loadop)                   \
  template <>                                      \
  inline T AtomicOperations::loadSeqCst(T* addr) { \
+    JS::AutoSuppressGCAnalysis nogc;               \
    return (T)loadop((U*)addr);                    \
  }

@ -61,6 +161,7 @@ inline void js::jit::AtomicOperations::fenceSeqCst() { AtomicFenceSeqCst(); }
 #  define JIT_LOADOP_CAS(T)                                   \
    template <>                                               \
    inline T AtomicOperations::loadSeqCst(T* addr) {          \
+      JS::AutoSuppressGCAnalysis nogc;                        \
      AtomicCompilerFence();                                  \
      return (T)AtomicCmpXchg64SeqCst((uint64_t*)addr, 0, 0); \
    }
@ -93,6 +194,7 @@ JIT_LOADOP(uint64_t, uint64_t, AtomicLoad64SeqCst)
 #define JIT_STOREOP(T, U, storeop)                            \
  template <>                                                 \
  inline void AtomicOperations::storeSeqCst(T* addr, T val) { \
+    JS::AutoSuppressGCAnalysis nogc;                          \
    storeop((U*)addr, val);                                   \
  }

@ -100,6 +202,7 @@ JIT_LOADOP(uint64_t, uint64_t, AtomicLoad64SeqCst)
 #  define JIT_STOREOP_CAS(T)                                                   \
    template <>                                                                \
    inline void AtomicOperations::storeSeqCst(T* addr, T val) {                \
+      JS::AutoSuppressGCAnalysis nogc;                                         \
      AtomicCompilerFence();                                                   \
      T oldval = *addr; /* good initial approximation */                       \
      for (;;) {                                                               \
@ -141,6 +244,7 @@ JIT_STOREOP(uint64_t, uint64_t, AtomicStore64SeqCst)
 #define JIT_EXCHANGEOP(T, U, xchgop)                          \
  template <>                                                 \
  inline T AtomicOperations::exchangeSeqCst(T* addr, T val) { \
+    JS::AutoSuppressGCAnalysis nogc;                          \
    return (T)xchgop((U*)addr, (U)val);                       \
  }

@ -148,6 +252,7 @@ JIT_STOREOP(uint64_t, uint64_t, AtomicStore64SeqCst)
 #  define JIT_EXCHANGEOP_CAS(T)                                                \
    template <>                                                                \
    inline T AtomicOperations::exchangeSeqCst(T* addr, T val) {                \
+      JS::AutoSuppressGCAnalysis nogc;                                         \
      AtomicCompilerFence();                                                   \
      T oldval = *addr;                                                        \
      for (;;) {                                                               \
@ -191,6 +296,7 @@ JIT_EXCHANGEOP(uint64_t, uint64_t, AtomicExchange64SeqCst)
  template <>                                                         \
  inline T AtomicOperations::compareExchangeSeqCst(T* addr, T oldval, \
                                                   T newval) {        \
+    JS::AutoSuppressGCAnalysis nogc;                                  \
    return (T)cmpxchg((U*)addr, (U)oldval, (U)newval);                \
  }

@ -214,12 +320,14 @@ JIT_CAS(uint64_t, uint64_t, AtomicCmpXchg64SeqCst)
 #define JIT_FETCHADDOP(T, U, xadd)                            \
  template <>                                                 \
  inline T AtomicOperations::fetchAddSeqCst(T* addr, T val) { \
+    JS::AutoSuppressGCAnalysis nogc;                          \
    return (T)xadd((U*)addr, (U)val);                         \
  }

 #define JIT_FETCHSUBOP(T)                                     \
  template <>                                                 \
  inline T AtomicOperations::fetchSubSeqCst(T* addr, T val) { \
+    JS::AutoSuppressGCAnalysis nogc;                          \
    return fetchAddSeqCst(addr, (T)(0 - val));                \
  }

@ -227,6 +335,7 @@ JIT_CAS(uint64_t, uint64_t, AtomicCmpXchg64SeqCst)
 #  define JIT_FETCHADDOP_CAS(T)                                           \
    template <>                                                           \
    inline T AtomicOperations::fetchAddSeqCst(T* addr, T val) {           \
+      JS::AutoSuppressGCAnalysis nogc;                                    \
      AtomicCompilerFence();                                              \
      T oldval = *addr; /* Good initial approximation */                  \
      for (;;) {                                                          \
@ -279,6 +388,7 @@ JIT_FETCHSUBOP(uint64_t)
 #define JIT_FETCHBITOPX(T, U, name, op)             \
  template <>                                       \
  inline T AtomicOperations::name(T* addr, T val) { \
+    JS::AutoSuppressGCAnalysis nogc;                \
    return (T)op((U*)addr, (U)val);                 \
  }

@ -296,6 +406,7 @@ JIT_FETCHSUBOP(uint64_t)
 #  define JIT_FETCHBITOPX_CAS(T, name, OP)                                 \
    template <>                                                            \
    inline T AtomicOperations::name(T* addr, T val) {                      \
+      JS::AutoSuppressGCAnalysis nogc;                                     \
      AtomicCompilerFence();                                               \
      T oldval = *addr;                                                    \
      for (;;) {                                                           \
@ -354,6 +465,7 @@ JIT_FETCHBITOP(uint64_t, uint64_t, AtomicAnd64SeqCst, AtomicOr64SeqCst,
 #define JIT_LOADSAFE(T, U, loadop)                                \
  template <>                                                     \
  inline T js::jit::AtomicOperations::loadSafeWhenRacy(T* addr) { \
+    JS::AutoSuppressGCAnalysis nogc;                              \
    union {                                                       \
      U u;                                                        \
      T t;                                                        \
@ -366,6 +478,7 @@ JIT_FETCHBITOP(uint64_t, uint64_t, AtomicAnd64SeqCst, AtomicOr64SeqCst,
 #  define JIT_LOADSAFE_TEARING(T)                                   \
    template <>                                                     \
    inline T js::jit::AtomicOperations::loadSafeWhenRacy(T* addr) { \
+      JS::AutoSuppressGCAnalysis nogc;                              \
      MOZ_ASSERT(sizeof(T) == 8);                                   \
      union {                                                       \
        uint32_t u[2];                                              \
@ -414,6 +527,7 @@ inline uint8_clamped js::jit::AtomicOperations::loadSafeWhenRacy(
 #define JIT_STORESAFE(T, U, storeop)                                         \
  template <>                                                                \
  inline void js::jit::AtomicOperations::storeSafeWhenRacy(T* addr, T val) { \
+    JS::AutoSuppressGCAnalysis nogc;                                         \
    union {                                                                  \
      U u;                                                                   \
      T t;                                                                   \
@ -426,6 +540,7 @@ inline uint8_clamped js::jit::AtomicOperations::loadSafeWhenRacy(
 #  define JIT_STORESAFE_TEARING(T)                                             \
    template <>                                                                \
    inline void js::jit::AtomicOperations::storeSafeWhenRacy(T* addr, T val) { \
+      JS::AutoSuppressGCAnalysis nogc;                                         \
      union {                                                                  \
        uint32_t u[2];                                                         \
        T t;                                                                   \
@ -472,6 +587,7 @@ inline void js::jit::AtomicOperations::storeSafeWhenRacy(uint8_clamped* addr,

 void js::jit::AtomicOperations::memcpySafeWhenRacy(void* dest, const void* src,
                                                   size_t nbytes) {
+  JS::AutoSuppressGCAnalysis nogc;
  MOZ_ASSERT(!((char*)dest <= (char*)src && (char*)src < (char*)dest + nbytes));
  MOZ_ASSERT(!((char*)src <= (char*)dest && (char*)dest < (char*)src + nbytes));
  AtomicMemcpyDownUnsynchronized((uint8_t*)dest, (const uint8_t*)src, nbytes);
@ -480,6 +596,7 @@ void js::jit::AtomicOperations::memcpySafeWhenRacy(void* dest, const void* src,
 inline void js::jit::AtomicOperations::memmoveSafeWhenRacy(void* dest,
                                                           const void* src,
                                                           size_t nbytes) {
+  JS::AutoSuppressGCAnalysis nogc;
  if ((char*)dest <= (char*)src) {
    AtomicMemcpyDownUnsynchronized((uint8_t*)dest, (const uint8_t*)src, nbytes);
  } else {
@ -487,4 +604,19 @@ inline void js::jit::AtomicOperations::memmoveSafeWhenRacy(void* dest,
  }
 }

+namespace js {
+namespace jit {
+
+extern bool InitializeJittedAtomics();
+extern void ShutDownJittedAtomics();
+
+}  // namespace jit
+}  // namespace js
+
+inline bool js::jit::AtomicOperations::Initialize() {
+  return InitializeJittedAtomics();
+}
+
+inline void js::jit::AtomicOperations::ShutDown() { ShutDownJittedAtomics(); }
+
 #endif  // jit_shared_AtomicOperations_shared_jit_h
--- a/js/src/jit/x64/CodeGenerator-x64.cpp
+++ b/js/src/jit/x64/CodeGenerator-x64.cpp
@ -325,8 +325,6 @@ void CodeGenerator::visitAtomicLoad64(LAtomicLoad64* lir) {

  Scalar::Type storageType = mir->storageType();

-  // NOTE: the generated code must match the assembly code in gen_load in
-  // GenerateAtomicOperations.py
  auto sync = Synchronization::Load();

  masm.memoryBarrierBefore(sync);
@ -353,8 +351,6 @@ void CodeGenerator::visitAtomicStore64(LAtomicStore64* lir) {

  masm.loadBigInt64(value, temp1);

-  // NOTE: the generated code must match the assembly code in gen_store in
-  // GenerateAtomicOperations.py
  auto sync = Synchronization::Store();

  masm.memoryBarrierBefore(sync);
--- a/js/src/jit/x64/MacroAssembler-x64.cpp
+++ b/js/src/jit/x64/MacroAssembler-x64.cpp
@ -922,8 +922,6 @@ void MacroAssembler::PushBoxed(FloatRegister reg) {

 void MacroAssembler::wasmLoad(const wasm::MemoryAccessDesc& access,
                              Operand srcAddr, AnyRegister out) {
-  // NOTE: the generated code must match the assembly code in gen_load in
-  // GenerateAtomicOperations.py
  memoryBarrierBefore(access.sync());

  MOZ_ASSERT_IF(
@ -1017,8 +1015,6 @@ void MacroAssembler::wasmLoad(const wasm::MemoryAccessDesc& access,

 void MacroAssembler::wasmLoadI64(const wasm::MemoryAccessDesc& access,
                                 Operand srcAddr, Register64 out) {
-  // NOTE: the generated code must match the assembly code in gen_load in
-  // GenerateAtomicOperations.py
  memoryBarrierBefore(access.sync());

  append(access, size());
@ -1061,8 +1057,6 @@ void MacroAssembler::wasmLoadI64(const wasm::MemoryAccessDesc& access,

 void MacroAssembler::wasmStore(const wasm::MemoryAccessDesc& access,
                               AnyRegister value, Operand dstAddr) {
-  // NOTE: the generated code must match the assembly code in gen_store in
-  // GenerateAtomicOperations.py
  memoryBarrierBefore(access.sync());

  append(access, masm.size());
@ -1348,8 +1342,6 @@ static void AtomicFetchOp64(MacroAssembler& masm,
                            const wasm::MemoryAccessDesc* access, AtomicOp op,
                            Register value, const T& mem, Register temp,
                            Register output) {
-  // NOTE: the generated code must match the assembly code in gen_fetchop in
-  // GenerateAtomicOperations.py
  if (op == AtomicFetchAddOp) {
    if (value != output) {
      masm.movq(value, output);
@ -1449,8 +1441,6 @@ void MacroAssembler::compareExchange64(const Synchronization&,
                                       const Address& mem, Register64 expected,
                                       Register64 replacement,
                                       Register64 output) {
-  // NOTE: the generated code must match the assembly code in gen_cmpxchg in
-  // GenerateAtomicOperations.py
  MOZ_ASSERT(output.reg == rax);
  if (expected != output) {
    movq(expected.reg, output.reg);
@ -1473,8 +1463,6 @@ void MacroAssembler::compareExchange64(const Synchronization&,
 void MacroAssembler::atomicExchange64(const Synchronization&,
                                      const Address& mem, Register64 value,
                                      Register64 output) {
-  // NOTE: the generated code must match the assembly code in gen_exchange in
-  // GenerateAtomicOperations.py
  if (value != output) {
    movq(value.reg, output.reg);
  }
--- a/js/src/jit/x86-shared/MacroAssembler-x86-shared.cpp
+++ b/js/src/jit/x86-shared/MacroAssembler-x86-shared.cpp
@ -1108,8 +1108,6 @@ static void CompareExchange(MacroAssembler& masm,
    masm.append(*access, masm.size());
  }

-  // NOTE: the generated code must match the assembly code in gen_cmpxchg in
-  // GenerateAtomicOperations.py
  switch (Scalar::byteSize(type)) {
    case 1:
      CheckBytereg(newval);
@ -1155,8 +1153,7 @@ static void AtomicExchange(MacroAssembler& masm,
                           const wasm::MemoryAccessDesc* access,
                           Scalar::Type type, const T& mem, Register value,
                           Register output)
-// NOTE: the generated code must match the assembly code in gen_exchange in
-// GenerateAtomicOperations.py
+
 {
  if (value != output) {
    masm.movl(value, output);
@ -1233,8 +1230,6 @@ static void AtomicFetchOp(MacroAssembler& masm,
                          const T& mem, Register temp, Register output) {
  // Note value can be an Imm or a Register.

-  // NOTE: the generated code must match the assembly code in gen_fetchop in
-  // GenerateAtomicOperations.py
 #define ATOMIC_BITOP_BODY(LOAD, OP, LOCK_CMPXCHG)  \
  do {                                             \
    MOZ_ASSERT(output != temp);                    \
--- a/js/src/jit/x86/MacroAssembler-x86.cpp
+++ b/js/src/jit/x86/MacroAssembler-x86.cpp
@ -958,8 +958,6 @@ void MacroAssembler::wasmLoad(const wasm::MemoryAccessDesc& access,
          access.type() == Scalar::Float32 || access.type() == Scalar::Float64);
  MOZ_ASSERT_IF(access.isWidenSimd128Load(), access.type() == Scalar::Float64);

-  // NOTE: the generated code must match the assembly code in gen_load in
-  // GenerateAtomicOperations.py
  memoryBarrierBefore(access.sync());

  append(access, size());
@ -1123,8 +1121,6 @@ void MacroAssembler::wasmStore(const wasm::MemoryAccessDesc& access,
  MOZ_ASSERT(dstAddr.kind() == Operand::MEM_REG_DISP ||
             dstAddr.kind() == Operand::MEM_SCALE);

-  // NOTE: the generated code must match the assembly code in gen_store in
-  // GenerateAtomicOperations.py
  memoryBarrierBefore(access.sync());

  append(access, size());
@ -1221,8 +1217,6 @@ static void CompareExchange64(MacroAssembler& masm,
  MOZ_ASSERT(replacement.high == ecx);
  MOZ_ASSERT(replacement.low == ebx);

-  // NOTE: the generated code must match the assembly code in gen_cmpxchg in
-  // GenerateAtomicOperations.py
  if (access) {
    masm.append(*access, masm.size());
  }
--- a/js/src/vm/Initialization.cpp
+++ b/js/src/vm/Initialization.cpp
@ -187,6 +187,8 @@ JS_PUBLIC_API const char* JS::detail::InitWithFailureDiagnostic(
  RETURN_IF_FAIL(js::vtune::Initialize());
 #endif

+  RETURN_IF_FAIL(js::jit::AtomicOperations::Initialize());
+
 #if JS_HAS_INTL_API
  if (mozilla::intl::ICU4CLibrary::Initialize().isErr()) {
    return "ICU4CLibrary::Initialize() failed";
@ -207,7 +209,7 @@ JS_PUBLIC_API const char* JS::detail::InitWithFailureDiagnostic(
 #endif

 #ifndef JS_CODEGEN_NONE
-  // This is forced by InitializeJit.
+  // Normally this is forced by the compilation of atomic operations.
  MOZ_ASSERT(js::jit::CPUFlagsHaveBeenComputed());
 #endif

@ -273,6 +275,8 @@ JS_PUBLIC_API void JS_ShutDown(void) {
  js::jit::SimulatorProcess::destroy();
 #endif

+  js::jit::AtomicOperations::ShutDown();
+
 #ifdef JS_TRACE_LOGGING
  js::DestroyTraceLoggerThreadState();
  js::DestroyTraceLoggerGraphState();
--- a/js/src/vm/SharedMem.h
+++ b/js/src/vm/SharedMem.h
@ -7,8 +7,6 @@
 #ifndef vm_SharedMem_h
 #define vm_SharedMem_h

-#include "mozilla/Assertions.h"
-
 #include <type_traits>

 template <typename T>