Bug 1667736 - Update packed_simd to compile on Rust 1.48. r=glandium

Differential Revision: https://phabricator.services.mozilla.com/D91572
2024-10-08 02:14:43 +00:00 · 2020-10-30 08:29:53 +00:00 · 2020-10-30 08:29:53 +00:00 · 33c996c701
commit 33c996c701
parent 1fe9274aa4
76 changed files with 605 additions and 788 deletions
--- a/.cargo/config.in
+++ b/.cargo/config.in
@ -45,7 +45,7 @@ tag = "v0.4.2"
 [source."https://github.com/hsivonen/packed_simd"]
 git = "https://github.com/hsivonen/packed_simd"
 replace-with = "vendored-sources"
-rev = "3541e3818fdc7c2a24f87e3459151a4ce955a67a"
+rev = "0917fe780032a6bbb23d71be545f9c1834128d75"

 [source."https://github.com/gfx-rs/naga"]
 git = "https://github.com/gfx-rs/naga"
--- a/Cargo.lock
+++ b/Cargo.lock
@ -3578,8 +3578,8 @@ dependencies = [

 [[package]]
 name = "packed_simd"
-version = "0.3.3"
-source = "git+https://github.com/hsivonen/packed_simd?rev=3541e3818fdc7c2a24f87e3459151a4ce955a67a#3541e3818fdc7c2a24f87e3459151a4ce955a67a"
+version = "0.3.4"
+source = "git+https://github.com/hsivonen/packed_simd?rev=0917fe780032a6bbb23d71be545f9c1834128d75#0917fe780032a6bbb23d71be545f9c1834128d75"
 dependencies = [
 "cfg-if",
 ]
--- a/Cargo.toml
+++ b/Cargo.toml
@ -65,7 +65,7 @@ panic = "abort"

 [patch.crates-io]
 libudev-sys = { path = "dom/webauthn/libudev-sys" }
-packed_simd = { git = "https://github.com/hsivonen/packed_simd", rev="3541e3818fdc7c2a24f87e3459151a4ce955a67a" }
+packed_simd = { git = "https://github.com/hsivonen/packed_simd", rev="0917fe780032a6bbb23d71be545f9c1834128d75" }
 rlbox_lucet_sandbox = { git = "https://github.com/PLSysSec/rlbox_lucet_sandbox/", rev="ed8bac8812e9f335d5fadd0f4ece96981aba88a3" }
 nix = { git = "https://github.com/shravanrn/nix/", branch = "r0.13.1", rev="4af6c367603869a30fddb5ffb0aba2b9477ba92e" }
 spirv_cross = { git = "https://github.com/kvark/spirv_cross", branch = "wgpu3", rev = "20191ad2f370afd6d247edcb9ff9da32d3bedb9c" }
--- a/third_party/rust/packed_simd/.cargo-checksum.json
+++ b/third_party/rust/packed_simd/.cargo-checksum.json
--- a/third_party/rust/packed_simd/.travis.yml
+++ b/third_party/rust/packed_simd/.travis.yml
@ -1,192 +1,129 @@
 language: rust
-sudo: false
 rust: nightly
+os: linux
+dist: focal

 stages:
  - tools
-  - linux-tier1
-  - osx-tier1
-  - osx-tier2
-  - linux-tier2
-  - android
+  - build-test-verify # Passes full test suite, permit no regressions (unless it's rustup :/)
+  - 32bit-tier1
+  - 64bit-tier2
+  - 32bit-tier2

-matrix:
-  fast_finish: true    
+jobs:
+  fast_finish: true
  include:
    # Android:
-    - env: TARGET=x86_64-linux-android NOVERIFY=1
+    - env: TARGET=x86_64-linux-android
      name: "x86_64-unknown-linux-android + SSE2"
-      stage: android
+      stage: build-test-verify
    - env: TARGET=arm-linux-androideabi
      name: "arm-linux-androideabi"
-      stage: android
+      stage: build-test-verify
    - env: TARGET=arm-linux-androideabi RUSTFLAGS="-C target-feature=+v7,+neon"
      name: "arm-linux-androideabi + NEON"
-      stage: android
-    - env: TARGET=aarch64-linux-android
-      name: "aarch64-unknown-linux-android"
-      stage: android
-    - env: TARGET=aarch64-linux-android RUSTFLAGS="-C target-feature=+neon"
-      name: "aarch64-unknown-linux-android + NEON"
-      stage: android
+      stage: build-test-verify
+    - name: "aarch64-unknown-linux-android + NEON"
+      env: TARGET=aarch64-linux-android RUSTFLAGS="-C target-feature=+neon"
+      stage: build-test-verify
    - env: TARGET="thumbv7neon-linux-androideabi"
      name: "thumbv7neon-linux-androideabi"
-      stage: android
+      stage: 32bit-tier2
    # Linux:
    - env: TARGET=i586-unknown-linux-gnu
      name: "i586-unknown-linux-gnu"
-      stage: linux-tier2
+      stage: 32bit-tier2
    - env: TARGET=i586-unknown-linux-gnu RUSTFLAGS="-C target-feature=+sse"
      name: "i586-unknown-linux-gnu + SSE"
-      stage: linux-tier2
+      stage: 32bit-tier2
    - env: TARGET=i586-unknown-linux-gnu RUSTFLAGS="-C target-feature=+sse2"
      name: "i586-unknown-linux-gnu + SSE2"
-      stage: linux-tier2
+      stage: 32bit-tier2
    - env: TARGET=i686-unknown-linux-gnu
      name: "i686-unknown-linux-gnu + SSE2"
-      stage: linux-tier1
+      stage: 32bit-tier1
    - env: TARGET=i686-unknown-linux-gnu RUSTFLAGS="-C target-feature=+sse4.2"
      name: "i686-unknown-linux-gnu + SSE4.2"
-      stage: linux-tier1
+      stage: 32bit-tier1
    - env: TARGET=i686-unknown-linux-gnu RUSTFLAGS="-C target-feature=+avx2"
      name: "i686-unknown-linux-gnu + AVX2"
-      stage: linux-tier1
-    - env: TARGET=x86_64-unknown-linux-gnu
-      name: "x86_64-unknown-linux-gnu + SSE2"
-      install: rustup component add rustfmt-preview
-      stage: linux-tier1
+      stage: 32bit-tier1
    - env: TARGET=x86_64-unknown-linux-gnu RUSTFLAGS="-C target-feature=+sse4.2"
      name: "x86_64-unknown-linux-gnu + SSE4.2"
      install: rustup component add rustfmt-preview
-      stage: linux-tier1
-    - env: TARGET=x86_64-unknown-linux-gnu RUSTFLAGS="-C target-feature=+avx"
-      name: "x86_64-unknown-linux-gnu + AVX"
-      install: rustup component add rustfmt-preview
-      stage: linux-tier1
+      stage: build-test-verify
    - env: TARGET=x86_64-unknown-linux-gnu RUSTFLAGS="-C target-feature=+avx2"
      name: "x86_64-unknown-linux-gnu + AVX2"
      install: rustup component add rustfmt-preview
-      stage: linux-tier1
-    - env: TARGET=x86_64-unknown-linux-gnu-emulated
-      name: "Intel SDE + SSE2"
-      install: true
-      stage: linux-tier1
-    - env: TARGET=x86_64-unknown-linux-gnu-emulated RUSTFLAGS="-C target-feature=+sse4.2"
-      name: "Intel SDE + SSE4.2"
-      install: true
-      stage: linux-tier1
-    - env: TARGET=x86_64-unknown-linux-gnu-emulated RUSTFLAGS="-C target-feature=+avx"
-      name: "Intel SDE + AVX"
-      install: true
-      stage: linux-tier1
-    - env: TARGET=x86_64-unknown-linux-gnu-emulated RUSTFLAGS="-C target-feature=+avx2"
-      name: "Intel SDE + AVX2"
-      install: true
-      stage: linux-tier1
-    - env: TARGET=x86_64-unknown-linux-gnu-emulated RUSTFLAGS="-C target-feature=+avx-512f"
-      name: "Intel SDE + AVX-512"
-      install: true
-      stage: linux-tier1
-    - env: TARGET=arm-unknown-linux-gnueabi
-      name: "arm-unknown-linux-gnueabi"
-      stage: linux-tier2
+      stage: build-test-verify
    - env: TARGET=arm-unknown-linux-gnueabi RUSTFLAGS="-C target-feature=+v7,+neon"
      name: "arm-unknown-linux-gnueabi + NEON"
-      stage: linux-tier2
+      stage: build-test-verify
    - env: TARGET=arm-unknown-linux-gnueabihf
      name: "arm-unknown-linux-gnueabihf"
-      stage: linux-tier2
+      stage: build-test-verify
    - env: TARGET=arm-unknown-linux-gnueabihf RUSTFLAGS="-C target-feature=+v7,+neon"
      name: "arm-unknown-linux-gnueabihf + NEON"
-      stage: linux-tier2
+      stage: build-test-verify
    - env: TARGET=armv7-unknown-linux-gnueabihf
      name: "armv7-unknown-linux-gnueabihf"
-      stage: linux-tier2
+      stage: build-test-verify
    - env: TARGET=armv7-unknown-linux-gnueabihf RUSTFLAGS="-C target-feature=+neon"
      name: "armv7-unknown-linux-gnueabihf + NEON"
-      stage: linux-tier2
+      stage: build-test-verify
    - env: TARGET="thumbv7neon-unknown-linux-gnueabihf"
      name: "thumbv7neon-unknown-linux-gnueabihf"
-      stage: linux-tier2
-    - env: TARGET=aarch64-unknown-linux-gnu
-      name: "aarch64-unknown-linux-gnu"
-      stage: linux-tier2
-    - env: TARGET=aarch64-unknown-linux-gnu RUSTFLAGS="-C target-feature=+neon"
-      name: "aarch64-unknown-linux-gnu + NEON"
-      stage: linux-tier2
+      stage: 32bit-tier2
+    - name: "aarch64-unknown-linux-gnu + NEON"
+      env: TARGET=aarch64-unknown-linux-gnu RUSTFLAGS="-C target-feature=+neon"
+      stage: build-test-verify
    - env: TARGET=mips-unknown-linux-gnu
      name: "mips-unknown-linux-gnu"
-      stage: linux-tier2
+      stage: 32bit-tier2
    - env: TARGET=mipsel-unknown-linux-musl
      name: "mipsel-unknown-linux-musl"
-      stage: linux-tier2
+      stage: 32bit-tier2
    - env: TARGET=mips64-unknown-linux-gnuabi64
      name: "mips64-unknown-linux-gnuabi64"
-      stage: linux-tier2
+      stage: 64bit-tier2
    - env: TARGET=mips64el-unknown-linux-gnuabi64
      name: "mips64el-unknown-linux-gnuabi64"
-      stage: linux-tier2
+      stage: 64bit-tier2
      # FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/18
      # env: TARGET=mips64el-unknown-linux-gnuabi64 RUSTFLAGS="-C target-feature=+msa -C target-cpu=mips64r6"
    - env: TARGET=powerpc-unknown-linux-gnu
      name: "powerpc-unknown-linux-gnu"
-      stage: linux-tier2
+      stage: 32bit-tier2
    - env: TARGET=powerpc64-unknown-linux-gnu
      name: "powerpc64-unknown-linux-gnu"
-      stage: linux-tier2
-    - env: TARGET=powerpc64le-unknown-linux-gnu
-      name: "powerpc64le-unknown-linux-gnu"
-      stage: linux-tier2
-    - env: TARGET=powerpc64le-unknown-linux-gnu RUSTFLAGS="-C target-feature=+altivec"
-      name: "powerpc64le-unknown-linux-gnu + ALTIVEC"
-      stage: linux-tier2
-    - env: TARGET=powerpc64le-unknown-linux-gnu RUSTFLAGS="-C target-feature=+vsx"
-      name: "powerpc64le-unknown-linux-gnu + VSX"
-      stage: linux-tier2
-    - env: TARGET=s390x-unknown-linux-gnu
-      name: "s390x-unknown-linux-gnu"
-      stage: linux-tier2
+      stage: 64bit-tier2
+    - name: "powerpc64le-unknown-linux-gnu"
+      env: TARGET=powerpc64le-unknown-linux-gnu
+      stage: build-test-verify
+    - name: "powerpc64le-unknown-linux-gnu + ALTIVEC"
+      env: TARGET=powerpc64le-unknown-linux-gnu RUSTFLAGS="-C target-feature=+altivec"
+      stage: build-test-verify
+    - name: "powerpc64le-unknown-linux-gnu + VSX"
+      env: TARGET=powerpc64le-unknown-linux-gnu RUSTFLAGS="-C target-feature=+vsx"
+      stage: build-test-verify
+    - name: "s390x-unknown-linux-gnu"
+      env: TARGET=s390x-unknown-linux-gnu
+      stage: 64bit-tier2
    - env: TARGET=sparc64-unknown-linux-gnu
      name: "sparc64-unknown-linux-gnu"
-      stage: linux-tier2
+      stage: 64bit-tier2
    # WebAssembly:
    - env: TARGET=wasm32-unknown-unknown
      name: "wasm32-unknown-unknown"
-      stage: osx-tier1 # For now
+      stage: 32bit-tier2
    # MacOSX:
-    - os: osx
-      env: TARGET=i686-apple-darwin
-      name: "i686-apple-darwin + SSE2"
-      script: ci/run.sh
-      osx_image: xcode10
-      stage: osx-tier1
-    - os: osx
-      env: TARGET=i686-apple-darwin RUSTFLAGS="-C target-feature=+sse4.2"
-      name: "i686-apple-darwin + SSE4.2"
-      script: ci/run.sh
-      osx_image: xcode10
-      stage: osx-tier1
-      # Travis-CI OSX build bots do not support AVX2:
-    - os: osx
-      env: TARGET=i686-apple-darwin RUSTFLAGS="-C target-feature=+avx"
-      name: "i686-apple-darwin + AVX"
-      script: ci/run.sh
-      osx_image: xcode10
-      stage: osx-tier1
-    - os: osx
-      env: TARGET=x86_64-apple-darwin
-      name: "x86_64-apple-darwin + SSE2"
-      install: true
-      script: ci/run.sh
-      osx_image: xcode10
-      stage: osx-tier1
    - os: osx
      env: TARGET=x86_64-apple-darwin RUSTFLAGS="-C target-feature=+sse4.2"
      name: "x86_64-apple-darwin + SSE4.2"
      install: true
      script: ci/run.sh
      osx_image: xcode10
-      stage: osx-tier1
+      stage: build-test-verify
      # Travis-CI OSX build bots do not support AVX2:
    - os: osx
      env: TARGET=x86_64-apple-darwin RUSTFLAGS="-C target-feature=+avx"
@ -194,7 +131,7 @@ matrix:
      install: true
      script: ci/run.sh
      osx_image: xcode10
-      stage: osx-tier1
+      stage: build-test-verify
    # *BSDs:
    #- env: TARGET=i686-unknown-freebsd NORUN=1
    #  script: ci/run.sh
@ -206,81 +143,75 @@ matrix:
    #- env: TARGET=x86_64-sun-solaris NORUN=1
    #  script: ci/run.sh
    # iOS:
-    - os: osx
-      env: TARGET=i386-apple-ios
-      name: "i386-apple-ios"
-      script: ci/run.sh
-      osx_image: xcode9.4
-      stage: osx-tier2
    - os: osx
      env: TARGET=x86_64-apple-ios
      name: "x86_64-apple-ios + SSE2"
      script: ci/run.sh
      osx_image: xcode9.4
-      stage: osx-tier2
-    - os: osx
-      env: TARGET=armv7-apple-ios NORUN=1
-      name: "armv7-apple-ios [Build only]"
-      script: ci/run.sh
+      stage: 64bit-tier2
+    - name: "aarch64-apple-ios + NEON"
+      env: TARGET=aarch64-apple-ios RUSTFLAGS="-C target-feature=+neon"
+      os: osx
      osx_image: xcode9.4
-      stage: osx-tier2
-    - os: osx
-      env: TARGET=aarch64-apple-ios NORUN=1
-      name: "aarch64-apple-ios [Build only]"
      script: ci/run.sh
-      osx_image: xcode9.4
-      stage: osx-tier2
+      stage: 64bit-tier2
    # BENCHMARKS:
    - name: "Benchmarks - x86_64-unknown-linux-gnu"
      install: TARGET=x86_64-unknown-linux-gnu ./ci/setup_benchmarks.sh
-      script: PATH=$(pwd):$PATH NORUN=1 VERIFY=1 FEATURES=core_arch,ispc,sleef-sys ci/benchmark.sh
+      # FIXME: Use `core_arch,sleef-sys` features once they works again
+      script: PATH=$(pwd):$PATH NORUN=1 VERIFY=1 FEATURES=ispc ci/benchmark.sh
      stage: tools
    - name: "Benchmarks - x86_64-apple-darwin"
      install: TARGET=x86_64-apple-darwin ./ci/setup_benchmarks.sh
-      script: PATH=$(pwd):$PATH NORUN=1 VERIFY=1 FEATURES=core_arch,ispc,sleef-sys ci/benchmark.sh
+      # FIXME: Use `core_arch,sleef-sys` features once they works again
+      script: PATH=$(pwd):$PATH NORUN=1 VERIFY=1 FEATURES=ispc ci/benchmark.sh
      os: osx
      osx_image: xcode9.4
      stage: tools
    # TOOLS:
    - name: "Documentation"
-      install: cargo install mdbook
+      before_install:
+        - sudo add-apt-repository -y ppa:deadsnakes/ppa
+        - sudo apt-get update -y
+        - sudo apt-get install -y python3.9
+      install:
+        - cargo install mdbook
      script: ci/dox.sh
      stage: tools
    - name: "rustfmt"
      install: true
-      before_script: rustup component add rustfmt-preview
-      script: ci/all.sh check_fmt || true
+      script: |
+        if rustup component add rustfmt-preview ; then
+            ci/all.sh check_fmt || true
+        fi
      stage: tools
    - name: "clippy"
      install: true
-      before_script: rustup component add clippy-preview
-      script: ci/all.sh clippy
+      script: |
+        if rustup component add clippy-preview ; then
+            ci/all.sh clippy
+        fi
      stage: tools

  allow_failures:
    # FIXME: ISPC cannot be found?
    - name: "Benchmarks - x86_64-apple-darwin"
-    # FIXME: TBD
-    - env: TARGET=powerpc-unknown-linux-gnu
-    - env: TARGET=powerpc64-unknown-linux-gnu
-    - env: TARGET=powerpc64le-unknown-linux-gnu
-    - env: TARGET=powerpc64le-unknown-linux-gnu RUSTFLAGS="-C target-feature=+altivec"
-    - env: TARGET=powerpc64le-unknown-linux-gnu RUSTFLAGS="-C target-feature=+vsx"
+    # FIXME: i686 fails in inlining, apparently
+    - stage: 32bit-tier1
    #- env: TARGET=i686-unknown-freebsd NORUN=1
    #- env: TARGET=x86_64-unknown-freebsd NORUN=1
    #- env: TARGET=x86_64-unknown-netbsd NORUN=1
    #- env: TARGET=x86_64-sun-solaris NORUN=1

    # FIXME: TBD
-    - env: TARGET=arm-linux-androideabi
-    - env: TARGET=arm-linux-androideabi RUSTFLAGS="-C target-feature=+v7,+neon"
-    - env: TARGET=aarch64-linux-android
-    - env: TARGET=aarch64-linux-android RUSTFLAGS="-C target-feature=+neon"
+    - stage: 64bit-tier2
+    - stage: 32bit-tier2

    # FIXME: iOS
    # https://github.com/rust-lang-nursery/packed_simd/issues/26
-    - env: TARGET=i386-apple-ios
    - env: TARGET=x86_64-apple-ios
+    # Is this related to the above? Mysterious test failure
+    - name: "aarch64-apple-ios + NEON"

    # FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/182
    - env: TARGET=arm-unknown-linux-gnueabi RUSTFLAGS="-C target-feature=+v7,+neon"
--- a/third_party/rust/packed_simd/Cargo.toml
+++ b/third_party/rust/packed_simd/Cargo.toml
@ -1,6 +1,6 @@
 [package]
 name = "packed_simd"
-version = "0.3.3"
+version = "0.3.4"
 authors = ["Gonzalo Brito Gadeschi <gonzalobg88@gmail.com>"]
 description = "Portable Packed SIMD vectors"
 documentation = "https://docs.rs/crate/packed_simd/"
@ -21,8 +21,8 @@ is-it-maintained-open-issues = { repository = "rust-lang-nursery/packed_simd" }
 maintenance = { status = "experimental" }

 [dependencies]
-cfg-if = "^0.1.6"
-core_arch = { version = "^0.1.3", optional = true }
+cfg-if = "0.1.10"
+core_arch = { version = "0.1.5", optional = true }

 [features]
 default = []
@ -31,12 +31,12 @@ libcore_neon = []

 [dev-dependencies]
 paste = "^0.1.3"
-arrayvec = { version = "^0.4", default-features = false }
+arrayvec = { version = "^0.5", default-features = false }

 [target.'cfg(target_arch = "x86_64")'.dependencies.sleef-sys]
-version = "^0.1.2"
+version = "0.1.2"
 optional = true

 [target.wasm32-unknown-unknown.dev-dependencies]
-wasm-bindgen = "=0.2.19"
-wasm-bindgen-test = "=0.2.19"
+wasm-bindgen = "=0.2.52"
+wasm-bindgen-test = "=0.3.2"
--- a/third_party/rust/packed_simd/README.md
+++ b/third_party/rust/packed_simd/README.md
@ -4,16 +4,14 @@

 [![Travis-CI Status]][travis] [![Appveyor Status]][appveyor] [![Latest Version]][crates.io] [![docs]][master_docs]

-> This aims to be a 100% conforming implementation of Rust RFC 2366 for stabilization.
-
-**WARNING**: this crate only supports the most recent nightly Rust toolchain.
+**WARNING**: this crate only supports the most recent nightly Rust toolchain
+and will be superceded by [stdsimd](https://github.com/rust-lang/stdsimd).

 ## Documentation

 * [API docs (`master` branch)][master_docs]
 * [Performance guide][perf_guide]
-* [API docs (`docs.rs`)][docs.rs]: **CURRENTLY DOWN** due to
-  https://github.com/rust-lang-nursery/packed_simd/issues/110
+* [API docs (`docs.rs`)][docs.rs]
 * [RFC2366 `std::simd`][rfc2366]: - contains motivation, design rationale,
  discussion, etc.

@ -39,17 +37,6 @@ Most of the examples come with both a scalar and a vectorized implementation.
  vector type as those of another vector type safely by just using the
  `.into_bits()` method.

-* `core_arch` (default: disabled): enable this feature to recompile `core::arch`
-  for the target-features enabled. `packed_simd` includes optimizations for some
-  target feature combinations that are enabled by this feature. Note, however,
-  that this is an unstable dependency, that rustc might break at any time.
-
-* `sleef-sys` (default: disabled - `x86_64` only): internally uses the [SLEEF]
-  short-vector math library when profitable via the [`sleef-sys`][sleef_sys]
-  crate. [SLEEF] is licensed under the [Boost Software License
-  v1.0][boost_license], an extremely permissive license, and can be statically
-  linked without issues.
-
 ## Performance

 The following [ISPC] examples are also part of `packed_simd`'s
@ -75,60 +62,40 @@ slowdown:

 ## Platform support

-The following table describes the supported platforms: `build` shows whether the
-library compiles without issues for a given target, while `run` shows whether
-the full testsuite passes on the target.
+The following table describes the supported platforms: `build` shows whether
+the library compiles without issues for a given target, while `run` shows
+whether the test suite passes for a given target.

-| Linux targets:                    | build     | run     |
-|-----------------------------------|-----------|---------|
-| `i586-unknown-linux-gnu`          | ✓         | ✓       |
-| `i686-unknown-linux-gnu`          | ✓         | ✓       |
-| `x86_64-unknown-linux-gnu`        | ✓         | ✓       |
-| `arm-unknown-linux-gnueabi`       | ✗         | ✗       |
-| `arm-unknown-linux-gnueabihf`     | ✓         | ✓       |
-| `armv7-unknown-linux-gnueabi`     | ✓         | ✓       |
-| `aarch64-unknown-linux-gnu`       | ✓         | ✓       |
-| `mips-unknown-linux-gnu`          | ✓         | ✓       |
-| `mipsel-unknown-linux-musl`       | ✓         | ✓       |
-| `mips64-unknown-linux-gnuabi64`   | ✓         | ✓       |
-| `mips64el-unknown-linux-gnuabi64` | ✓         | ✓       |
-| `powerpc-unknown-linux-gnu`       | ✗         | ✗       |
-| `powerpc64-unknown-linux-gnu`     | ✗         | ✗       |
-| `powerpc64le-unknown-linux-gnu`   | ✗         | ✗       |
-| `s390x-unknown-linux-gnu`         | ✓         | ✓*      |
-| `sparc64-unknown-linux-gnu`       | ✓         | ✓*      |
-| `thumbv7neon-unknown-linux-gnueabihf` | ✓         | ✓      |
-| **MacOSX targets:**               | **build** | **run** |
-| `x86_64-apple-darwin`             | ✓         | ✓       |
-| `i686-apple-darwin`               | ✓         | ✓       |
-| **Windows targets:**              | **build** | **run** |
-| `x86_64-pc-windows-msvc`          | ✓         | ✓       |
-| `i686-pc-windows-msvc`            | ✓         | ✓       |
-| `x86_64-pc-windows-gnu`           | ✗          | ✗        |
-| `i686-pc-windows-gnu`             | ✗          | ✗        |
-| **WebAssembly targets:**          | **build** | **run** |
-| `wasm32-unknown-unknown`          | ✓         | ✓      |
-| **Android targets:**              | **build** | **run** |
-| `x86_64-linux-android`            | ✓         | ✓       |
-| `arm-linux-androideabi`           | ✓         | ✓       |
-| `aarch64-linux-android`           | ✓         | ✗       |
-| `thumbv7neon-linux-androideabi`  | ✓         | ✓       |
-| **iOS targets:**                  | **build** | **run** |
-| `i386-apple-ios`                  | ✓         | ✗       |
-| `x86_64-apple-ios`                | ✓         | ✗       |
-| `armv7-apple-ios`                 | ✓         | ✗**     |
-| `aarch64-apple-ios`               | ✓         | ✗**     |
-| **xBSD targets:**                 | **build** | **run** |
-| `i686-unknown-freebsd`            | ✗         | ✗**     |
-| `x86_64-unknown-freebsd`          | ✗         | ✗**     |
-| `x86_64-unknown-netbsd`           | ✗         | ✗**     |
-| **Solaris targets:**              | **build** | **run** |
-| `x86_64-sun-solaris`              | ✗         | ✗**     |
+| **Linux**                             | **build** | **run** |
+|---------------------------------------|-----------|---------|
+| `i586-unknown-linux-gnu`              | ✓         | ✗       |
+| `i686-unknown-linux-gnu`              | ✓         | ✗       |
+| `x86_64-unknown-linux-gnu`            | ✓         | ✓       |
+| `arm-unknown-linux-gnueabi`           | ✗         | ✗       |
+| `arm-unknown-linux-gnueabihf`         | ✓         | ✓       |
+| `armv7-unknown-linux-gnueabi`         | ✓         | ✓       |
+| `aarch64-unknown-linux-gnu`           | ✓         | ✓       |
+| `mips-unknown-linux-gnu`              | ✓         | ✗       |
+| `mipsel-unknown-linux-musl`           | ✓         | ✗       |
+| `mips64-unknown-linux-gnuabi64`       | ✓         | ✗       |
+| `mips64el-unknown-linux-gnuabi64`     | ✓         | ✗       |
+| `powerpc-unknown-linux-gnu`           | ✗         | ✗       |
+| `powerpc64-unknown-linux-gnu`         | ✗         | ✗       |
+| `powerpc64le-unknown-linux-gnu`       | ✓         | ✓       |
+| `s390x-unknown-linux-gnu`             | ✗         | ✗       |
+| `sparc64-unknown-linux-gnu`           | ✓         | ✗       |
+| `thumbv7neon-unknown-linux-gnueabihf` | ✓         | ✓       |
+| **MacOSX**                            | **build** | **run** |
+| `x86_64-apple-darwin`                 | ✓         | ✓       |
+| **Android**                           | **build** | **run** |
+| `x86_64-linux-android`                | ✓         | ✓       |
+| `arm-linux-androideabi`               | ✓         | ✓       |
+| `aarch64-linux-android`               | ✓         | ✓       |
+| `thumbv7neon-linux-androideabi`       | ✗         | ✗       |
+| **iOS**                               | **build** | **run** |
+| `x86_64-apple-ios`                    | ✓         | ✗       |
+| `aarch64-apple-ios`                   | ✓         | ✗       |

-[*] most of the test suite passes correctly on these platform but
-there are correctness bugs open in the issue tracker.
-
-[**] it is currently not easily possible to run these platforms on CI.

 ## Machine code verification

@ -162,8 +129,8 @@ Unless you explicitly state otherwise, any contribution intentionally submitted
 for inclusion in `packed_simd` by you, as defined in the Apache-2.0 license, shall be
 dual licensed as above, without any additional terms or conditions.

-[travis]: https://travis-ci.org/rust-lang-nursery/packed_simd
-[Travis-CI Status]: https://travis-ci.org/rust-lang-nursery/packed_simd.svg?branch=master
+[travis]: https://travis-ci.com/rust-lang-nursery/packed_simd
+[Travis-CI Status]: https://travis-ci.com/rust-lang-nursery/packed_simd.svg?branch=master
 [appveyor]: https://ci.appveyor.com/project/gnzlbg/packed-simd
 [Appveyor Status]: https://ci.appveyor.com/api/projects/status/hd7v9dvr442hgdix?svg=true
 [Latest Version]: https://img.shields.io/crates/v/packed_simd.svg
--- a/third_party/rust/packed_simd/build.rs
+++ b/third_party/rust/packed_simd/build.rs
@ -1,5 +1,5 @@
 fn main() {
-	println!("cargo:rustc-env=RUSTC_BOOTSTRAP=1");
+    println!("cargo:rustc-env=RUSTC_BOOTSTRAP=1");
    let target = std::env::var("TARGET")
        .expect("TARGET environment variable not defined");
    if target.contains("neon") {
--- a/third_party/rust/packed_simd/ci/all.sh
+++ b/third_party/rust/packed_simd/ci/all.sh
@ -21,7 +21,7 @@ cargo_fmt() {
 }

 cargo_clippy() {
-    cargo clippy --all -- -D clippy::pedantic
+    cargo clippy --all -- -D clippy::perf
 }

 CMD="-1"
--- a/third_party/rust/packed_simd/ci/docker/aarch64-unknown-linux-gnu/Dockerfile
+++ b/third_party/rust/packed_simd/ci/docker/aarch64-unknown-linux-gnu/Dockerfile
@ -1,4 +1,4 @@
-FROM ubuntu:17.10
+FROM ubuntu:18.04
 RUN apt-get update && apt-get install -y --no-install-recommends \
  gcc \
  ca-certificates \
--- a/third_party/rust/packed_simd/ci/docker/arm-unknown-linux-gnueabi/Dockerfile
+++ b/third_party/rust/packed_simd/ci/docker/arm-unknown-linux-gnueabi/Dockerfile
@ -1,4 +1,4 @@
-FROM ubuntu:17.10
+FROM ubuntu:18.04
 RUN apt-get update && apt-get install -y --no-install-recommends \
  gcc \
  ca-certificates \
--- a/third_party/rust/packed_simd/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile
+++ b/third_party/rust/packed_simd/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile
@ -1,4 +1,4 @@
-FROM ubuntu:17.10
+FROM ubuntu:18.04
 RUN apt-get update && apt-get install -y --no-install-recommends \
  gcc \
  ca-certificates \
--- a/third_party/rust/packed_simd/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile
+++ b/third_party/rust/packed_simd/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile
@ -1,4 +1,4 @@
-FROM ubuntu:17.10
+FROM ubuntu:18.04
 RUN apt-get update && apt-get install -y --no-install-recommends \
  gcc \
  ca-certificates \
--- a/third_party/rust/packed_simd/ci/docker/i586-unknown-linux-gnu/Dockerfile
+++ b/third_party/rust/packed_simd/ci/docker/i586-unknown-linux-gnu/Dockerfile
@ -1,4 +1,4 @@
-FROM ubuntu:17.10
+FROM ubuntu:18.04
 RUN apt-get update && apt-get install -y --no-install-recommends \
  gcc-multilib \
  libc6-dev \
--- a/third_party/rust/packed_simd/ci/docker/i686-unknown-linux-gnu/Dockerfile
+++ b/third_party/rust/packed_simd/ci/docker/i686-unknown-linux-gnu/Dockerfile
@ -1,4 +1,4 @@
-FROM ubuntu:17.10
+FROM ubuntu:18.04
 RUN apt-get update && apt-get install -y --no-install-recommends \
  gcc-multilib \
  libc6-dev \
--- a/third_party/rust/packed_simd/ci/docker/mips-unknown-linux-gnu/Dockerfile
+++ b/third_party/rust/packed_simd/ci/docker/mips-unknown-linux-gnu/Dockerfile
@ -1,4 +1,4 @@
-FROM ubuntu:17.10
+FROM ubuntu:18.04

 RUN apt-get update && apt-get install -y --no-install-recommends \
        gcc libc6-dev qemu-user ca-certificates \
--- a/third_party/rust/packed_simd/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile
+++ b/third_party/rust/packed_simd/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile
@ -1,4 +1,4 @@
-FROM ubuntu:17.10
+FROM ubuntu:18.04

 RUN apt-get update && apt-get install -y --no-install-recommends \
        gcc libc6-dev qemu-user ca-certificates \
--- a/third_party/rust/packed_simd/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile
+++ b/third_party/rust/packed_simd/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile
@ -1,4 +1,4 @@
-FROM ubuntu:17.10
+FROM ubuntu:18.04

 RUN apt-get update && apt-get install -y --no-install-recommends \
        gcc libc6-dev qemu-user ca-certificates \
--- a/third_party/rust/packed_simd/ci/docker/mipsel-unknown-linux-musl/Dockerfile
+++ b/third_party/rust/packed_simd/ci/docker/mipsel-unknown-linux-musl/Dockerfile
@ -16,10 +16,10 @@ RUN mkdir /toolchain

 # Note that this originally came from:
 # https://downloads.openwrt.org/snapshots/trunk/malta/generic/OpenWrt-Toolchain-malta-le_gcc-5.3.0_musl-1.1.15.Linux-x86_64.tar.bz2
-RUN curl -L https://s3-us-west-1.amazonaws.com/rust-lang-ci2/libc/OpenWrt-Toolchain-malta-le_gcc-5.3.0_musl-1.1.15.Linux-x86_64.tar.bz2 | \
+RUN curl -L https://ci-mirrors.rust-lang.org/libc/OpenWrt-Toolchain-malta-le_gcc-5.3.0_musl-1.1.15.Linux-x86_64.tar.bz2 | \
      tar xjf - -C /toolchain --strip-components=2

 ENV PATH=$PATH:/rust/bin:/toolchain/bin \
    CC_mipsel_unknown_linux_musl=mipsel-openwrt-linux-gcc \
    CARGO_TARGET_MIPSEL_UNKNOWN_LINUX_MUSL_LINKER=mipsel-openwrt-linux-gcc \
-    CARGO_TARGET_MIPSEL_UNKNOWN_LINUX_MUSL_RUNNER="qemu-mipsel -L /toolchain"
+    CARGO_TARGET_MIPSEL_UNKNOWN_LINUX_MUSL_RUNNER="qemu-mipsel -L /toolchain"
--- a/third_party/rust/packed_simd/ci/docker/powerpc-unknown-linux-gnu/Dockerfile
+++ b/third_party/rust/packed_simd/ci/docker/powerpc-unknown-linux-gnu/Dockerfile
@ -1,4 +1,4 @@
-FROM ubuntu:17.10
+FROM ubuntu:18.04

 RUN apt-get update && apt-get install -y --no-install-recommends \
        gcc libc6-dev qemu-user ca-certificates \
@ -9,4 +9,5 @@ RUN apt-get update && apt-get install -y --no-install-recommends \

 ENV CARGO_TARGET_POWERPC_UNKNOWN_LINUX_GNU_LINKER=powerpc-linux-gnu-gcc \
    CARGO_TARGET_POWERPC_UNKNOWN_LINUX_GNU_RUNNER="qemu-ppc -cpu Vger -L /usr/powerpc-linux-gnu" \
+    CC=powerpc-linux-gnu-gcc \
    OBJDUMP=powerpc-linux-gnu-objdump
--- a/third_party/rust/packed_simd/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile
+++ b/third_party/rust/packed_simd/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile
@ -1,4 +1,4 @@
-FROM ubuntu:17.10
+FROM ubuntu:18.04

 RUN apt-get update && apt-get install -y --no-install-recommends \
    gcc \
--- a/third_party/rust/packed_simd/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile
+++ b/third_party/rust/packed_simd/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile
@ -1,4 +1,4 @@
-FROM ubuntu:17.10
+FROM ubuntu:18.04

 RUN apt-get update && apt-get install -y --no-install-recommends \
        gcc libc6-dev qemu-user ca-certificates \
--- a/third_party/rust/packed_simd/ci/docker/thumbv7neon-unknown-linux-gnueabihf/Dockerfile
+++ b/third_party/rust/packed_simd/ci/docker/thumbv7neon-unknown-linux-gnueabihf/Dockerfile
@ -1,4 +1,4 @@
-FROM ubuntu:17.10
+FROM ubuntu:18.04
 RUN apt-get update && apt-get install -y --no-install-recommends \
  gcc \
  ca-certificates \
--- a/third_party/rust/packed_simd/ci/docker/x86_64-unknown-linux-gnu/Dockerfile
+++ b/third_party/rust/packed_simd/ci/docker/x86_64-unknown-linux-gnu/Dockerfile
@ -1,4 +1,4 @@
-FROM ubuntu:17.10
+FROM ubuntu:18.04
 RUN apt-get update && apt-get install -y --no-install-recommends \
  gcc \
  libc6-dev \
--- a/third_party/rust/packed_simd/ci/dox.sh
+++ b/third_party/rust/packed_simd/ci/dox.sh
@ -18,7 +18,10 @@ cp -r perf-guide/book target/doc/perf-guide

 # If we're on travis, not a PR, and on the right branch, publish!
 if [ "$TRAVIS_PULL_REQUEST" = "false" ] && [ "$TRAVIS_BRANCH" = "master" ]; then
-  pip install ghp_import --install-option="--prefix=$HOME/.local"
-  $HOME/.local/bin/ghp-import -n target/doc
+  python3 -vV
+  pip -vV
+  python3.9 -vV
+  pip install ghp_import --user
+  ghp-import -n target/doc
  git push -qf https://${GH_PAGES}@github.com/${TRAVIS_REPO_SLUG}.git gh-pages
 fi
--- a/third_party/rust/packed_simd/ci/run.sh
+++ b/third_party/rust/packed_simd/ci/run.sh
@ -78,9 +78,11 @@ fi

 if [[ "${TARGET}" == "x86_64-unknown-linux-gnu" ]] || [[ "${TARGET}" == "x86_64-pc-windows-msvc" ]]; then
    # use sleef on linux and windows x86_64 builds
-    cargo_test_impl --release --features=into_bits,core_arch,sleef-sys
+    # FIXME: Use `core_arch,sleef-sys` features once they works again
+    cargo_test_impl --release --features=into_bits
 else
-    cargo_test_impl --release --features=into_bits,core_arch
+    # FIXME: Use `core_arch` feature once it works again
+    cargo_test_impl --release --features=into_bits
 fi

 # Verify code generation
--- a/third_party/rust/packed_simd/ci/setup_benchmarks.sh
+++ b/third_party/rust/packed_simd/ci/setup_benchmarks.sh
@ -5,6 +5,3 @@ set -ex
 # Get latest ISPC binary for the target and put it in the path
 git clone https://github.com/gnzlbg/ispc-binaries
 cp ispc-binaries/ispc-${TARGET} ispc
-
-# Rust-bindgen requires RUSTFMT
-rustup component add rustfmt-preview
--- a/third_party/rust/packed_simd/src/api.rs
+++ b/third_party/rust/packed_simd/src/api.rs
@ -1,5 +1,7 @@
 //! Implements the Simd<[T; N]> APIs

+#[macro_use]
+mod bitmask;
 crate mod cast;
 #[macro_use]
 mod cmp;
@ -39,7 +41,7 @@ crate mod into_bits;

 macro_rules! impl_i {
    ([$elem_ty:ident; $elem_n:expr]: $tuple_id:ident, $mask_ty:ident
-     | $ielem_ty:ident | $test_tt:tt | $($elem_ids:ident),*
+     | $ielem_ty:ident, $ibitmask_ty:ident | $test_tt:tt | $($elem_ids:ident),*
     | From: $($from_vec_ty:ident),* | $(#[$doc:meta])*) => {
        impl_minimal_iuf!([$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt
                          | $($elem_ids),* | $(#[$doc])*);
@ -93,6 +95,7 @@ macro_rules! impl_i {
        );
        impl_cmp_partial_ord!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
        impl_cmp_ord!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (0, 1));
+        impl_bitmask!($tuple_id | $ibitmask_ty | (-1, 0) | $test_tt);

        test_select!($elem_ty, $mask_ty, $tuple_id, (1, 2) | $test_tt);
        test_cmp_partial_ord_int!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
@ -102,7 +105,7 @@ macro_rules! impl_i {

 macro_rules! impl_u {
    ([$elem_ty:ident; $elem_n:expr]: $tuple_id:ident, $mask_ty:ident
-     | $ielem_ty:ident | $test_tt:tt | $($elem_ids:ident),*
+     | $ielem_ty:ident, $ibitmask_ty:ident | $test_tt:tt | $($elem_ids:ident),*
     | From: $($from_vec_ty:ident),* | $(#[$doc:meta])*) => {
        impl_minimal_iuf!([$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt
                          | $($elem_ids),* | $(#[$doc])*);
@ -155,6 +158,8 @@ macro_rules! impl_u {
        );
        impl_cmp_partial_ord!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
        impl_cmp_ord!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (0, 1));
+        impl_bitmask!($tuple_id | $ibitmask_ty | ($ielem_ty::max_value(), 0) |
+                      $test_tt);

        test_select!($elem_ty, $mask_ty, $tuple_id, (1, 2) | $test_tt);
        test_cmp_partial_ord_int!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
@ -222,7 +227,8 @@ macro_rules! impl_f {
 }

 macro_rules! impl_m {
-    ([$elem_ty:ident; $elem_n:expr]: $tuple_id:ident | $ielem_ty:ident
+    ([$elem_ty:ident; $elem_n:expr]: $tuple_id:ident
+     | $ielem_ty:ident, $ibitmask_ty:ident
     | $test_tt:tt | $($elem_ids:ident),* | From: $($from_vec_ty:ident),*
     | $(#[$doc:meta])*) => {
        impl_minimal_mask!(
@ -265,6 +271,7 @@ macro_rules! impl_m {
            [$elem_ty; $elem_n]: $tuple_id | $test_tt | (false, true)
        );
        impl_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+        impl_bitmask!($tuple_id | $ibitmask_ty | (true, false) | $test_tt);

        test_cmp_partial_ord_mask!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
        test_shuffle1_dyn_mask!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
--- a/third_party/rust/packed_simd/src/api/bit_manip.rs
+++ b/third_party/rust/packed_simd/src/api/bit_manip.rs
@ -37,6 +37,7 @@ macro_rules! impl_bit_manip {
            paste::item_with_macros! {
                #[allow(overflowing_literals)]
                pub mod [<$id _bit_manip>] {
+                    #![allow(const_item_mutation)]
                    use super::*;

                    const LANE_WIDTH: usize = mem::size_of::<$elem_ty>() * 8;
--- a/third_party/rust/packed_simd/src/api/bitmask.rs
+++ b/third_party/rust/packed_simd/src/api/bitmask.rs
@ -0,0 +1,82 @@
+//! Bitmask API
+
+macro_rules! impl_bitmask {
+    ($id:ident | $ibitmask_ty:ident | ($set:expr, $clear:expr)
+     | $test_tt:tt) => {
+        impl $id {
+            /// Creates a bitmask with the MSB of each vector lane.
+            ///
+            /// If the vector has less than 8 lanes, the bits that do not
+            /// correspond to any vector lanes are cleared.
+            #[inline]
+            pub fn bitmask(self) -> $ibitmask_ty {
+                unsafe { codegen::llvm::simd_bitmask(self.0) }
+            }
+        }
+
+        test_if! {
+            $test_tt:
+            paste::item! {
+                #[cfg(not(any(
+                    // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/210
+                    all(target_arch = "mips", target_endian = "big"),
+                    all(target_arch = "mips64", target_endian = "big"),
+                    target_arch = "sparc64",
+                    target_arch = "s390x",
+                )))]
+                pub mod [<$id _bitmask>] {
+                    use super::*;
+                    #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn bitmask() {
+                        // clear all lanes
+                        let vec = $id::splat($clear as _);
+                        let bitmask: $ibitmask_ty = 0;
+                        assert_eq!(vec.bitmask(), bitmask);
+
+                        // set even lanes
+                        let mut vec = $id::splat($clear as _);
+                        for i in 0..$id::lanes() {
+                            if i % 2 == 0 {
+                                vec = vec.replace(i, $set as _);
+                            }
+                        }
+                        // create bitmask with even lanes set:
+                        let mut bitmask: $ibitmask_ty = 0;
+                        for i in 0..$id::lanes() {
+                            if i % 2 == 0 {
+                                bitmask |= 1 << i;
+                            }
+                        }
+                        assert_eq!(vec.bitmask(), bitmask);
+
+
+                        // set odd lanes
+                        let mut vec = $id::splat($clear as _);
+                        for i in 0..$id::lanes() {
+                            if i % 2 != 0 {
+                                vec = vec.replace(i, $set as _);
+                            }
+                        }
+                        // create bitmask with odd lanes set:
+                        let mut bitmask: $ibitmask_ty = 0;
+                        for i in 0..$id::lanes() {
+                            if i % 2 != 0 {
+                                bitmask |= 1 << i;
+                            }
+                        }
+                        assert_eq!(vec.bitmask(), bitmask);
+
+                        // set all lanes
+                        let vec = $id::splat($set as _);
+                        let mut bitmask: $ibitmask_ty = 0;
+                        for i in 0..$id::lanes() {
+                            bitmask |= 1 << i;
+                        }
+                        assert_eq!(vec.bitmask(), bitmask);
+                    }
+                }
+            }
+        }
+    };
+}
--- a/third_party/rust/packed_simd/src/api/cast/v128.rs
+++ b/third_party/rust/packed_simd/src/api/cast/v128.rs
@ -1,5 +1,5 @@
 //! `FromCast` and `IntoCast` implementations for portable 128-bit wide vectors
-#![rustfmt::skip]
+#[rustfmt::skip]

 use crate::*;

--- a/third_party/rust/packed_simd/src/api/cast/v16.rs
+++ b/third_party/rust/packed_simd/src/api/cast/v16.rs
@ -1,5 +1,5 @@
 //! `FromCast` and `IntoCast` implementations for portable 16-bit wide vectors
-#![rustfmt::skip]
+#[rustfmt::skip]

 use crate::*;

--- a/third_party/rust/packed_simd/src/api/cast/v256.rs
+++ b/third_party/rust/packed_simd/src/api/cast/v256.rs
@ -1,5 +1,5 @@
 //! `FromCast` and `IntoCast` implementations for portable 256-bit wide vectors
-#![rustfmt::skip]
+#[rustfmt::skip]

 use crate::*;

--- a/third_party/rust/packed_simd/src/api/cast/v32.rs
+++ b/third_party/rust/packed_simd/src/api/cast/v32.rs
@ -1,5 +1,5 @@
 //! `FromCast` and `IntoCast` implementations for portable 32-bit wide vectors
-#![rustfmt::skip]
+#[rustfmt::skip]

 use crate::*;

--- a/third_party/rust/packed_simd/src/api/cast/v512.rs
+++ b/third_party/rust/packed_simd/src/api/cast/v512.rs
@ -1,5 +1,5 @@
 //! `FromCast` and `IntoCast` implementations for portable 512-bit wide vectors
-#![rustfmt::skip]
+#[rustfmt::skip]

 use crate::*;

--- a/third_party/rust/packed_simd/src/api/cast/v64.rs
+++ b/third_party/rust/packed_simd/src/api/cast/v64.rs
@ -1,5 +1,5 @@
 //! `FromCast` and `IntoCast` implementations for portable 64-bit wide vectors
-#![rustfmt::skip]
+#[rustfmt::skip]

 use crate::*;

--- a/third_party/rust/packed_simd/src/api/default.rs
+++ b/third_party/rust/packed_simd/src/api/default.rs
@ -12,6 +12,8 @@ macro_rules! impl_default {
        test_if!{
            $test_tt:
            paste::item! {
+                // Comparisons use integer casts within mantissa^1 range.
+                #[allow(clippy::float_cmp)]
                pub mod [<$id _default>] {
                    use super::*;
                    #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
--- a/third_party/rust/packed_simd/src/api/from/from_array.rs
+++ b/third_party/rust/packed_simd/src/api/from/from_array.rs
@ -56,6 +56,8 @@ macro_rules! impl_from_array {
        test_if! {
            $test_tt:
            paste::item! {
+                // Comparisons use integer casts within mantissa^1 range.
+                #[allow(clippy::float_cmp)]
                mod [<$id _from>] {
                    use super::*;
                    #[test]
--- a/third_party/rust/packed_simd/src/api/hash.rs
+++ b/third_party/rust/packed_simd/src/api/hash.rs
@ -36,6 +36,8 @@ macro_rules! impl_hash {
                        let mut v_hash = a_hash.clone();
                        a.hash(&mut a_hash);

+                        // Integer within mantissa^1 range.
+                        #[allow(clippy::float_cmp)]
                        let v = $id::splat(42 as $elem_ty);
                        v.hash(&mut v_hash);
                        assert_eq!(a_hash.finish(), v_hash.finish());
--- a/third_party/rust/packed_simd/src/api/into_bits/arch_specific.rs
+++ b/third_party/rust/packed_simd/src/api/into_bits/arch_specific.rs
@ -1,6 +1,6 @@
 //! `FromBits` and `IntoBits` between portable vector types and the
 //! architecture-specific vector types.
-#![rustfmt::skip]
+#[rustfmt::skip]

 // FIXME: MIPS FromBits/IntoBits

@ -84,7 +84,6 @@ macro_rules! impl_arch {
 // FIXME: 64-bit single element types
 // FIXME: arm/aarch float16x4_t missing
 impl_arch!(
-    [x86["x86"]: __m64], [x86_64["x86_64"]: __m64],
    [arm["arm"]: int8x8_t, uint8x8_t, poly8x8_t, int16x4_t, uint16x4_t,
     poly16x4_t, int32x2_t, uint32x2_t, float32x2_t, int64x1_t,
     uint64x1_t],
--- a/third_party/rust/packed_simd/src/api/into_bits/v128.rs
+++ b/third_party/rust/packed_simd/src/api/into_bits/v128.rs
@ -1,5 +1,5 @@
 //! `FromBits` and `IntoBits` implementations for portable 128-bit wide vectors
-#![rustfmt::skip]
+#[rustfmt::skip]

 #[allow(unused)]  // wasm_bindgen_test
 use crate::*;
--- a/third_party/rust/packed_simd/src/api/into_bits/v16.rs
+++ b/third_party/rust/packed_simd/src/api/into_bits/v16.rs
@ -1,5 +1,5 @@
 //! `FromBits` and `IntoBits` implementations for portable 16-bit wide vectors
-#![rustfmt::skip]
+#[rustfmt::skip]

 #[allow(unused)]  // wasm_bindgen_test
 use crate::*;
--- a/third_party/rust/packed_simd/src/api/into_bits/v256.rs
+++ b/third_party/rust/packed_simd/src/api/into_bits/v256.rs
@ -1,5 +1,5 @@
 //! `FromBits` and `IntoBits` implementations for portable 256-bit wide vectors
-#![rustfmt::skip]
+#[rustfmt::skip]

 #[allow(unused)]  // wasm_bindgen_test
 use crate::*;
--- a/third_party/rust/packed_simd/src/api/into_bits/v32.rs
+++ b/third_party/rust/packed_simd/src/api/into_bits/v32.rs
@ -1,5 +1,5 @@
 //! `FromBits` and `IntoBits` implementations for portable 32-bit wide vectors
-#![rustfmt::skip]
+#[rustfmt::skip]

 #[allow(unused)]  // wasm_bindgen_test
 use crate::*;
--- a/third_party/rust/packed_simd/src/api/into_bits/v512.rs
+++ b/third_party/rust/packed_simd/src/api/into_bits/v512.rs
@ -1,5 +1,5 @@
 //! `FromBits` and `IntoBits` implementations for portable 512-bit wide vectors
-#![rustfmt::skip]
+#[rustfmt::skip]

 #[allow(unused)]  // wasm_bindgen_test
 use crate::*;
--- a/third_party/rust/packed_simd/src/api/into_bits/v64.rs
+++ b/third_party/rust/packed_simd/src/api/into_bits/v64.rs
@ -1,5 +1,5 @@
 //! `FromBits` and `IntoBits` implementations for portable 64-bit wide vectors
-#![rustfmt::skip]
+#[rustfmt::skip]

 #[allow(unused)]  // wasm_bindgen_test
 use crate::*;
--- a/third_party/rust/packed_simd/src/api/minimal/iuf.rs
+++ b/third_party/rust/packed_simd/src/api/minimal/iuf.rs
@ -53,7 +53,7 @@ macro_rules! impl_minimal_iuf {

            /// Extracts the value at `index`.
            ///
-            /// # Precondition
+            /// # Safety
            ///
            /// If `index >= Self::lanes()` the behavior is undefined.
            #[inline]
@ -80,7 +80,7 @@ macro_rules! impl_minimal_iuf {

            /// Returns a new vector where the value at `index` is replaced by `new_value`.
            ///
-            /// # Precondition
+            /// # Safety
            ///
            /// If `index >= Self::lanes()` the behavior is undefined.
            #[inline]
@ -101,6 +101,8 @@ macro_rules! impl_minimal_iuf {
        test_if!{
            $test_tt:
            paste::item! {
+                // Comparisons use integer casts within mantissa^1 range.
+                #[allow(clippy::float_cmp)]
                pub mod [<$id _minimal>] {
                    use super::*;
                    #[cfg_attr(not(target_arch = "wasm32"), test)]
--- a/third_party/rust/packed_simd/src/api/minimal/mask.rs
+++ b/third_party/rust/packed_simd/src/api/minimal/mask.rs
@ -58,6 +58,8 @@ macro_rules! impl_minimal_mask {

            /// Extracts the value at `index`.
            ///
+            /// # Safety
+            ///
            /// If `index >= Self::lanes()` the behavior is undefined.
            #[inline]
            pub unsafe fn extract_unchecked(self, index: usize) -> bool {
@ -85,9 +87,9 @@ macro_rules! impl_minimal_mask {
            /// Returns a new vector where the value at `index` is replaced by
            /// `new_value`.
            ///
-            /// # Panics
+            /// # Safety
            ///
-            /// If `index >= Self::lanes()`.
+            /// If `index >= Self::lanes()` the behavior is undefined.
            #[inline]
            #[must_use = "replace_unchecked does not modify the original value - \
                          it returns a new vector with the value at `index` \
--- a/third_party/rust/packed_simd/src/api/minimal/ptr.rs
+++ b/third_party/rust/packed_simd/src/api/minimal/ptr.rs
@ -68,7 +68,7 @@ macro_rules! impl_minimal_p {

            /// Extracts the value at `index`.
            ///
-            /// # Precondition
+            /// # Safety
            ///
            /// If `index >= Self::lanes()` the behavior is undefined.
            #[inline]
@ -96,7 +96,7 @@ macro_rules! impl_minimal_p {

            /// Returns a new vector where the value at `index` is replaced by `new_value`.
            ///
-            /// # Precondition
+            /// # Safety
            ///
            /// If `index >= Self::lanes()` the behavior is undefined.
            #[inline]
@ -215,7 +215,7 @@ macro_rules! impl_minimal_p {
                    f,
                    "{}<{}>(",
                    stringify!($id),
-                    unsafe { crate::intrinsics::type_name::<T>() }
+                    crate::intrinsics::type_name::<T>()
                )?;
                for i in 0..$elem_count {
                    if i > 0 {
@ -550,11 +550,7 @@ macro_rules! impl_minimal_p {
                        ];

                        for i in 0..$elem_count {
-                            let ptr = unsafe {
-                                crate::mem::transmute(
-                                    &values[i] as *const i32
-                                )
-                            };
+                            let ptr = &values[i] as *const i32 as *mut i32;
                            vec = vec.replace(i, ptr);
                            array[i] = ptr;
                        }
@ -611,7 +607,7 @@ macro_rules! impl_minimal_p {

            /// Instantiates a new vector with the values of the `slice`.
            ///
-            /// # Precondition
+            /// # Safety
            ///
            /// If `slice.len() < Self::lanes()` or `&slice[0]` is not aligned
            /// to an `align_of::<Self>()` boundary, the behavior is undefined.
@ -624,7 +620,7 @@ macro_rules! impl_minimal_p {

            /// Instantiates a new vector with the values of the `slice`.
            ///
-            /// # Precondition
+            /// # Safety
            ///
            /// If `slice.len() < Self::lanes()` the behavior is undefined.
            #[inline]
@ -827,7 +823,7 @@ macro_rules! impl_minimal_p {

            /// Writes the values of the vector to the `slice`.
            ///
-            /// # Precondition
+            /// # Safety
            ///
            /// If `slice.len() < Self::lanes()` or `&slice[0]` is not
            /// aligned to an `align_of::<Self>()` boundary, the behavior is
@ -843,7 +839,7 @@ macro_rules! impl_minimal_p {

            /// Writes the values of the vector to the `slice`.
            ///
-            /// # Precondition
+            /// # Safety
            ///
            /// If `slice.len() < Self::lanes()` the behavior is undefined.
            #[inline]
@ -1025,11 +1021,7 @@ macro_rules! impl_minimal_p {
                        ];

                        for i in 0..$elem_count {
-                            let ptr = unsafe {
-                                crate::mem::transmute(
-                                    &values[i] as *const i32
-                                )
-                            };
+                            let ptr = &values[i] as *const i32 as *mut i32;
                            vec = vec.replace(i, ptr);
                            array[i] = ptr;
                        }
@ -1151,7 +1143,7 @@ macro_rules! impl_minimal_p {
            /// As such, memory acquired directly from allocators or memory
            /// mapped files may be too large to handle with this function.
            ///
-            /// Consider using wrapping_offset_from instead if these constraints
+            /// Consider using `wrapping_offset_from` instead if these constraints
            /// are difficult to satisfy. The only advantage of this method is
            /// that it enables more aggressive compiler optimizations.
            #[inline]
--- a/third_party/rust/packed_simd/src/api/ops/vector_float_min_max.rs
+++ b/third_party/rust/packed_simd/src/api/ops/vector_float_min_max.rs
@ -26,6 +26,11 @@ macro_rules! impl_ops_vector_float_min_max {
        test_if!{
            $test_tt:
            paste::item! {
+                #[cfg(not(any(
+                    // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/223
+                    all(target_arch = "mips", target_endian = "big"),
+                    target_arch = "mips64",
+                )))]
                pub mod [<$id _ops_vector_min_max>] {
                    use super::*;
                    #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
--- a/third_party/rust/packed_simd/src/api/ptr/gather_scatter.rs
+++ b/third_party/rust/packed_simd/src/api/ptr/gather_scatter.rs
@ -49,9 +49,9 @@ macro_rules! impl_ptr_read {
                        let mut ptr = $id::<i32>::null();

                        for i in 0..$elem_count {
-                            ptr = ptr.replace(i, unsafe {
-                                crate::mem::transmute(&v[i] as *const i32)
-                            });
+                            ptr = ptr.replace(i,
+                                &v[i] as *const i32 as *mut i32
+                            );
                        }

                        // all mask elements are true:
@ -135,32 +135,8 @@ macro_rules! impl_ptr_write {
                M: sealed::Mask,
                [M; $elem_count]: sealed::SimdArray,
            {
-                // FIXME:
-                // https://github.com/rust-lang-nursery/packed_simd/issues/85
-                #[cfg(not(target_arch = "mips"))]
-                {
-                    use crate::llvm::simd_scatter;
-                    simd_scatter(value.0, self.0, mask.0)
-                }
-                #[cfg(target_arch = "mips")]
-                {
-                    let m_ptr =
-                        &mask as *const Simd<[M; $elem_count]> as *const M;
-                    for i in 0..$elem_count {
-                        let m = ptr::read(m_ptr.add(i));
-                        if m.test() {
-                            let t_ptr = &self
-                                as *const Simd<[*mut T; $elem_count]>
-                                as *mut *mut T;
-                            let v_ptr = &value as *const Simd<[T; $elem_count]>
-                                as *const T;
-                            ptr::write(
-                                ptr::read(t_ptr.add(i)),
-                                ptr::read(v_ptr.add(i)),
-                            );
-                        }
-                    }
-                }
+                use crate::llvm::simd_scatter;
+                simd_scatter(value.0, self.0, mask.0)
            }
        }

@ -185,7 +161,7 @@ macro_rules! impl_ptr_write {
                        let mut ptr = $id::<i32>::null();
                        for i in 0..$elem_count {
                            ptr = ptr.replace(i, unsafe {
-                                crate::mem::transmute(arr.as_ptr().add(i))
+                                arr.as_ptr().add(i) as *mut i32
                            });
                        }
                        // ptr = [&arr[0], &arr[1], ...]
--- a/third_party/rust/packed_simd/src/api/reductions/float_arithmetic.rs
+++ b/third_party/rust/packed_simd/src/api/reductions/float_arithmetic.rs
@ -93,6 +93,8 @@ macro_rules! impl_reduction_float_arithmetic {
        test_if! {
            $test_tt:
            paste::item! {
+                // Comparisons use integer casts within mantissa^1 range.
+                #[allow(clippy::float_cmp)]
                pub mod [<$id _reduction_float_arith>] {
                    use super::*;
                    fn alternating(x: usize) -> $id {
@ -225,7 +227,7 @@ macro_rules! impl_reduction_float_arithmetic {
                        let mut v = $id::splat(0. as $elem_ty);
                        for i in 0..$id::lanes() {
                            let c = if i % 2 == 0 { 1e3 } else { -1. };
-                            start *= 3.14 * c;
+                            start *= ::core::$elem_ty::consts::PI * c;
                            scalar_reduction += start;
                            v = v.replace(i, start);
                        }
@ -257,6 +259,7 @@ macro_rules! impl_reduction_float_arithmetic {
                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
                    #[allow(unused, dead_code)]
                    fn product_roundoff() {
+                        use ::core::convert::TryInto;
                        // Performs a tree-reduction
                        fn tree_reduce_product(a: &[$elem_ty]) -> $elem_ty {
                            assert!(!a.is_empty());
@ -278,7 +281,7 @@ macro_rules! impl_reduction_float_arithmetic {
                        let mut v = $id::splat(0. as $elem_ty);
                        for i in 0..$id::lanes() {
                            let c = if i % 2 == 0 { 1e3 } else { -1. };
-                            start *= 3.14 * c;
+                            start *= ::core::$elem_ty::consts::PI * c;
                            scalar_reduction *= start;
                            v = v.replace(i, start);
                        }
@ -288,7 +291,9 @@ macro_rules! impl_reduction_float_arithmetic {
                        v.write_to_slice_unaligned(&mut a);
                        let tree_reduction = tree_reduce_product(&a);

-                        // tolerate 1 ULP difference:
+                        // FIXME: Too imprecise, even only for product(f32x8).
+                        // Figure out how to narrow this down.
+                        let ulp_limit = $id::lanes() / 2;
                        let red_bits = simd_reduction.to_bits();
                        let tree_bits = tree_reduction.to_bits();
                        assert!(
@ -296,7 +301,7 @@ macro_rules! impl_reduction_float_arithmetic {
                                red_bits - tree_bits
                            } else {
                                tree_bits - red_bits
-                            } < 2,
+                            } < ulp_limit.try_into().unwrap(),
                            "vector: {:?} | simd_reduction: {:?} | \
                             tree_reduction: {} | scalar_reduction: {}",
                            v,
--- a/third_party/rust/packed_simd/src/api/reductions/min_max.rs
+++ b/third_party/rust/packed_simd/src/api/reductions/min_max.rs
@ -76,6 +76,8 @@ macro_rules! impl_reduction_min_max {
        }
        test_if! {$test_tt:
        paste::item! {
+            // Comparisons use integer casts within mantissa^1 range.
+            #[allow(clippy::float_cmp)]
            pub mod [<$id _reduction_min_max>] {
                use super::*;
                #[cfg_attr(not(target_arch = "wasm32"), test)]
@ -124,6 +126,8 @@ macro_rules! test_reduction_float_min_max {
        test_if!{
            $test_tt:
            paste::item! {
+                // Comparisons use integer casts within mantissa^1 range.
+                #[allow(clippy::float_cmp)]
                pub mod [<$id _reduction_min_max_nan>] {
                    use super::*;
                    #[cfg_attr(not(target_arch = "wasm32"), test)]
--- a/third_party/rust/packed_simd/src/api/slice/from_slice.rs
+++ b/third_party/rust/packed_simd/src/api/slice/from_slice.rs
@ -38,7 +38,7 @@ macro_rules! impl_slice_from_slice {

            /// Instantiates a new vector with the values of the `slice`.
            ///
-            /// # Precondition
+            /// # Safety
            ///
            /// If `slice.len() < Self::lanes()` or `&slice[0]` is not aligned
            /// to an `align_of::<Self>()` boundary, the behavior is undefined.
@ -59,7 +59,7 @@ macro_rules! impl_slice_from_slice {

            /// Instantiates a new vector with the values of the `slice`.
            ///
-            /// # Precondition
+            /// # Safety
            ///
            /// If `slice.len() < Self::lanes()` the behavior is undefined.
            #[inline]
@ -84,6 +84,8 @@ macro_rules! impl_slice_from_slice {
        test_if! {
            $test_tt:
            paste::item! {
+                // Comparisons use integer casts within mantissa^1 range.
+                #[allow(clippy::float_cmp)]
                pub mod [<$id _slice_from_slice>] {
                    use super::*;
                    use crate::iter::Iterator;
--- a/third_party/rust/packed_simd/src/api/slice/write_to_slice.rs
+++ b/third_party/rust/packed_simd/src/api/slice/write_to_slice.rs
@ -39,7 +39,7 @@ macro_rules! impl_slice_write_to_slice {

            /// Writes the values of the vector to the `slice`.
            ///
-            /// # Precondition
+            /// # Safety
            ///
            /// If `slice.len() < Self::lanes()` or `&slice[0]` is not
            /// aligned to an `align_of::<Self>()` boundary, the behavior is
@ -64,7 +64,7 @@ macro_rules! impl_slice_write_to_slice {

            /// Writes the values of the vector to the `slice`.
            ///
-            /// # Precondition
+            /// # Safety
            ///
            /// If `slice.len() < Self::lanes()` the behavior is undefined.
            #[inline]
@ -86,6 +86,8 @@ macro_rules! impl_slice_write_to_slice {
        test_if! {
            $test_tt:
            paste::item! {
+                // Comparisons use integer casts within mantissa^1 range.
+                #[allow(clippy::float_cmp)]
                pub mod [<$id _slice_write_to_slice>] {
                    use super::*;
                    use crate::iter::Iterator;
--- a/third_party/rust/packed_simd/src/codegen.rs
+++ b/third_party/rust/packed_simd/src/codegen.rs
@ -16,6 +16,8 @@ macro_rules! impl_simd_array {
        pub struct $tuple_id($(crate $elem_tys),*);
        //^^^^^^^ leaked through SimdArray

+        impl crate::sealed::Seal for [$elem_ty; $elem_count] {}
+
        impl crate::sealed::SimdArray for [$elem_ty; $elem_count] {
            type Tuple = $tuple_id;
            type T = $elem_ty;
@ -23,6 +25,7 @@ macro_rules! impl_simd_array {
            type NT = [u32; $elem_count];
        }

+        impl crate::sealed::Seal for $tuple_id {}
        impl crate::sealed::Simd for $tuple_id {
            type Element = $elem_ty;
            const LANES: usize = $elem_count;
--- a/third_party/rust/packed_simd/src/codegen/bit_manip.rs
+++ b/third_party/rust/packed_simd/src/codegen/bit_manip.rs
@ -1,5 +1,5 @@
 //! LLVM bit manipulation intrinsics.
-#![rustfmt::skip]
+#[rustfmt::skip]

 use crate::*;

--- a/third_party/rust/packed_simd/src/codegen/llvm.rs
+++ b/third_party/rust/packed_simd/src/codegen/llvm.rs
@ -10,31 +10,37 @@ extern "platform-intrinsic" {
    // FIXME: Passing this intrinsics an `idx` array with an index that is
    // out-of-bounds will produce a monomorphization-time error.
    // https://github.com/rust-lang-nursery/packed_simd/issues/21
+    #[rustc_args_required_const(2)]
    pub fn simd_shuffle2<T, U>(x: T, y: T, idx: [u32; 2]) -> U
    where
        T: Simd,
        <T as Simd>::Element: Shuffle<[u32; 2], Output = U>;

+    #[rustc_args_required_const(2)]
    pub fn simd_shuffle4<T, U>(x: T, y: T, idx: [u32; 4]) -> U
    where
        T: Simd,
        <T as Simd>::Element: Shuffle<[u32; 4], Output = U>;

+    #[rustc_args_required_const(2)]
    pub fn simd_shuffle8<T, U>(x: T, y: T, idx: [u32; 8]) -> U
    where
        T: Simd,
        <T as Simd>::Element: Shuffle<[u32; 8], Output = U>;

+    #[rustc_args_required_const(2)]
    pub fn simd_shuffle16<T, U>(x: T, y: T, idx: [u32; 16]) -> U
    where
        T: Simd,
        <T as Simd>::Element: Shuffle<[u32; 16], Output = U>;

+    #[rustc_args_required_const(2)]
    pub fn simd_shuffle32<T, U>(x: T, y: T, idx: [u32; 32]) -> U
    where
        T: Simd,
        <T as Simd>::Element: Shuffle<[u32; 32], Output = U>;

+    #[rustc_args_required_const(2)]
    pub fn simd_shuffle64<T, U>(x: T, y: T, idx: [u32; 64]) -> U
    where
        T: Simd,
@ -96,4 +102,6 @@ extern "platform-intrinsic" {

    crate fn simd_gather<T, P, M>(value: T, pointers: P, mask: M) -> T;
    crate fn simd_scatter<T, P, M>(value: T, pointers: P, mask: M);
+
+    crate fn simd_bitmask<T, U>(value: T) -> U;
 }
--- a/third_party/rust/packed_simd/src/codegen/reductions/mask/x86.rs
+++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86.rs
@ -19,13 +19,7 @@ mod avx2;
 /// x86 64-bit m8x8 implementation
 macro_rules! x86_m8x8_impl {
    ($id:ident) => {
-        cfg_if! {
-            if #[cfg(all(target_arch = "x86_64", target_feature = "sse"))] {
-                x86_m8x8_sse_impl!($id);
-            } else {
-                fallback_impl!($id);
-            }
-        }
+        fallback_impl!($id);
    };
 }

--- a/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/sse.rs
+++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/sse.rs
@ -34,35 +34,3 @@ macro_rules! x86_m32x4_sse_impl {
        }
    };
 }
-
-macro_rules! x86_m8x8_sse_impl {
-    ($id:ident) => {
-        impl All for $id {
-            #[inline]
-            #[target_feature(enable = "sse")]
-            unsafe fn all(self) -> bool {
-                #[cfg(target_arch = "x86")]
-                use crate::arch::x86::_mm_movemask_pi8;
-                #[cfg(target_arch = "x86_64")]
-                use crate::arch::x86_64::_mm_movemask_pi8;
-                // _mm_movemask_pi8(a) creates an 8bit mask containing the most
-                // significant bit of each byte of `a`. If all bits are set,
-                // then all 8 lanes of the mask are true.
-                _mm_movemask_pi8(crate::mem::transmute(self))
-                    == u8::max_value() as i32
-            }
-        }
-        impl Any for $id {
-            #[inline]
-            #[target_feature(enable = "sse")]
-            unsafe fn any(self) -> bool {
-                #[cfg(target_arch = "x86")]
-                use crate::arch::x86::_mm_movemask_pi8;
-                #[cfg(target_arch = "x86_64")]
-                use crate::arch::x86_64::_mm_movemask_pi8;
-
-                _mm_movemask_pi8(crate::mem::transmute(self)) != 0
-            }
-        }
-    };
-}
--- a/third_party/rust/packed_simd/src/codegen/shuffle.rs
+++ b/third_party/rust/packed_simd/src/codegen/shuffle.rs
@ -2,301 +2,149 @@
 //! lanes and vector element types.

 use crate::masks::*;
-use crate::sealed::Shuffle;
+use crate::sealed::{Shuffle, Seal};

-impl Shuffle<[u32; 2]> for i8 {
-    type Output = crate::codegen::i8x2;
-}
-impl Shuffle<[u32; 4]> for i8 {
-    type Output = crate::codegen::i8x4;
-}
-impl Shuffle<[u32; 8]> for i8 {
-    type Output = crate::codegen::i8x8;
-}
-impl Shuffle<[u32; 16]> for i8 {
-    type Output = crate::codegen::i8x16;
-}
-impl Shuffle<[u32; 32]> for i8 {
-    type Output = crate::codegen::i8x32;
-}
-impl Shuffle<[u32; 64]> for i8 {
-    type Output = crate::codegen::i8x64;
+macro_rules! impl_shuffle {
+    ($array:ty, $base:ty, $out:ty) => {
+        impl Seal<$array> for $base {}
+        impl Shuffle<$array> for $base {
+            type Output = $out;
+        }
+    }
 }

-impl Shuffle<[u32; 2]> for u8 {
-    type Output = crate::codegen::u8x2;
-}
-impl Shuffle<[u32; 4]> for u8 {
-    type Output = crate::codegen::u8x4;
-}
-impl Shuffle<[u32; 8]> for u8 {
-    type Output = crate::codegen::u8x8;
-}
-impl Shuffle<[u32; 16]> for u8 {
-    type Output = crate::codegen::u8x16;
-}
-impl Shuffle<[u32; 32]> for u8 {
-    type Output = crate::codegen::u8x32;
-}
-impl Shuffle<[u32; 64]> for u8 {
-    type Output = crate::codegen::u8x64;
-}
+impl_shuffle! { [u32; 2], i8, crate::codegen::i8x2 }
+impl_shuffle! { [u32; 4], i8, crate::codegen::i8x4 }
+impl_shuffle! { [u32; 8], i8, crate::codegen::i8x8 }
+impl_shuffle! { [u32; 16], i8, crate::codegen::i8x16 }
+impl_shuffle! { [u32; 32], i8, crate::codegen::i8x32 }
+impl_shuffle! { [u32; 64], i8, crate::codegen::i8x64 }

-impl Shuffle<[u32; 2]> for m8 {
-    type Output = crate::codegen::m8x2;
-}
-impl Shuffle<[u32; 4]> for m8 {
-    type Output = crate::codegen::m8x4;
-}
-impl Shuffle<[u32; 8]> for m8 {
-    type Output = crate::codegen::m8x8;
-}
-impl Shuffle<[u32; 16]> for m8 {
-    type Output = crate::codegen::m8x16;
-}
-impl Shuffle<[u32; 32]> for m8 {
-    type Output = crate::codegen::m8x32;
-}
-impl Shuffle<[u32; 64]> for m8 {
-    type Output = crate::codegen::m8x64;
-}
+impl_shuffle! { [u32; 2], u8, crate::codegen::u8x2 }
+impl_shuffle! { [u32; 4], u8, crate::codegen::u8x4 }
+impl_shuffle! { [u32; 8], u8, crate::codegen::u8x8 }
+impl_shuffle! { [u32; 16], u8, crate::codegen::u8x16 }
+impl_shuffle! { [u32; 32], u8, crate::codegen::u8x32 }
+impl_shuffle! { [u32; 64], u8, crate::codegen::u8x64 }

-impl Shuffle<[u32; 2]> for i16 {
-    type Output = crate::codegen::i16x2;
-}
-impl Shuffle<[u32; 4]> for i16 {
-    type Output = crate::codegen::i16x4;
-}
-impl Shuffle<[u32; 8]> for i16 {
-    type Output = crate::codegen::i16x8;
-}
-impl Shuffle<[u32; 16]> for i16 {
-    type Output = crate::codegen::i16x16;
-}
-impl Shuffle<[u32; 32]> for i16 {
-    type Output = crate::codegen::i16x32;
-}
+impl_shuffle! { [u32; 2], m8, crate::codegen::m8x2 }
+impl_shuffle! { [u32; 4], m8, crate::codegen::m8x4 }
+impl_shuffle! { [u32; 8], m8, crate::codegen::m8x8 }
+impl_shuffle! { [u32; 16], m8, crate::codegen::m8x16 }
+impl_shuffle! { [u32; 32], m8, crate::codegen::m8x32 }
+impl_shuffle! { [u32; 64], m8, crate::codegen::m8x64 }

-impl Shuffle<[u32; 2]> for u16 {
-    type Output = crate::codegen::u16x2;
-}
-impl Shuffle<[u32; 4]> for u16 {
-    type Output = crate::codegen::u16x4;
-}
-impl Shuffle<[u32; 8]> for u16 {
-    type Output = crate::codegen::u16x8;
-}
-impl Shuffle<[u32; 16]> for u16 {
-    type Output = crate::codegen::u16x16;
-}
-impl Shuffle<[u32; 32]> for u16 {
-    type Output = crate::codegen::u16x32;
-}
+impl_shuffle! { [u32; 2], i16, crate::codegen::i16x2 }
+impl_shuffle! { [u32; 4], i16, crate::codegen::i16x4 }
+impl_shuffle! { [u32; 8], i16, crate::codegen::i16x8 }
+impl_shuffle! { [u32; 16], i16, crate::codegen::i16x16 }
+impl_shuffle! { [u32; 32], i16, crate::codegen::i16x32 }

-impl Shuffle<[u32; 2]> for m16 {
-    type Output = crate::codegen::m16x2;
-}
-impl Shuffle<[u32; 4]> for m16 {
-    type Output = crate::codegen::m16x4;
-}
-impl Shuffle<[u32; 8]> for m16 {
-    type Output = crate::codegen::m16x8;
-}
-impl Shuffle<[u32; 16]> for m16 {
-    type Output = crate::codegen::m16x16;
-}
-impl Shuffle<[u32; 32]> for m16 {
-    type Output = crate::codegen::m16x32;
-}
+impl_shuffle! { [u32; 2], u16, crate::codegen::u16x2 }
+impl_shuffle! { [u32; 4], u16, crate::codegen::u16x4 }
+impl_shuffle! { [u32; 8], u16, crate::codegen::u16x8 }
+impl_shuffle! { [u32; 16], u16, crate::codegen::u16x16 }
+impl_shuffle! { [u32; 32], u16, crate::codegen::u16x32 }

-impl Shuffle<[u32; 2]> for i32 {
-    type Output = crate::codegen::i32x2;
-}
-impl Shuffle<[u32; 4]> for i32 {
-    type Output = crate::codegen::i32x4;
-}
-impl Shuffle<[u32; 8]> for i32 {
-    type Output = crate::codegen::i32x8;
-}
-impl Shuffle<[u32; 16]> for i32 {
-    type Output = crate::codegen::i32x16;
-}
+impl_shuffle! { [u32; 2], m16, crate::codegen::m16x2 }
+impl_shuffle! { [u32; 4], m16, crate::codegen::m16x4 }
+impl_shuffle! { [u32; 8], m16, crate::codegen::m16x8 }
+impl_shuffle! { [u32; 16], m16, crate::codegen::m16x16 }

-impl Shuffle<[u32; 2]> for u32 {
-    type Output = crate::codegen::u32x2;
-}
-impl Shuffle<[u32; 4]> for u32 {
-    type Output = crate::codegen::u32x4;
-}
-impl Shuffle<[u32; 8]> for u32 {
-    type Output = crate::codegen::u32x8;
-}
-impl Shuffle<[u32; 16]> for u32 {
-    type Output = crate::codegen::u32x16;
-}
+impl_shuffle! { [u32; 2], i32, crate::codegen::i32x2 }
+impl_shuffle! { [u32; 4], i32, crate::codegen::i32x4 }
+impl_shuffle! { [u32; 8], i32, crate::codegen::i32x8 }
+impl_shuffle! { [u32; 16], i32, crate::codegen::i32x16 }

-impl Shuffle<[u32; 2]> for f32 {
-    type Output = crate::codegen::f32x2;
-}
-impl Shuffle<[u32; 4]> for f32 {
-    type Output = crate::codegen::f32x4;
-}
-impl Shuffle<[u32; 8]> for f32 {
-    type Output = crate::codegen::f32x8;
-}
-impl Shuffle<[u32; 16]> for f32 {
-    type Output = crate::codegen::f32x16;
-}
+impl_shuffle! { [u32; 2], u32, crate::codegen::u32x2 }
+impl_shuffle! { [u32; 4], u32, crate::codegen::u32x4 }
+impl_shuffle! { [u32; 8], u32, crate::codegen::u32x8 }
+impl_shuffle! { [u32; 16], u32, crate::codegen::u32x16 }

-impl Shuffle<[u32; 2]> for m32 {
-    type Output = crate::codegen::m32x2;
-}
-impl Shuffle<[u32; 4]> for m32 {
-    type Output = crate::codegen::m32x4;
-}
-impl Shuffle<[u32; 8]> for m32 {
-    type Output = crate::codegen::m32x8;
-}
-impl Shuffle<[u32; 16]> for m32 {
-    type Output = crate::codegen::m32x16;
-}
+impl_shuffle! { [u32; 2], f32, crate::codegen::f32x2 }
+impl_shuffle! { [u32; 4], f32, crate::codegen::f32x4 }
+impl_shuffle! { [u32; 8], f32, crate::codegen::f32x8 }
+impl_shuffle! { [u32; 16], f32, crate::codegen::f32x16 }
+
+impl_shuffle! { [u32; 2], m32, crate::codegen::m32x2 }
+impl_shuffle! { [u32; 4], m32, crate::codegen::m32x4 }
+impl_shuffle! { [u32; 8], m32, crate::codegen::m32x8 }
+impl_shuffle! { [u32; 16], m32, crate::codegen::m32x16 }

 /* FIXME: 64-bit single element vector
-impl Shuffle<[u32; 1]> for i64 {
-    type Output = crate::codegen::i64x1;
-}
+impl_shuffle! { [u32; 1], i64, crate::codegen::i64x1 }
 */
-impl Shuffle<[u32; 2]> for i64 {
-    type Output = crate::codegen::i64x2;
-}
-impl Shuffle<[u32; 4]> for i64 {
-    type Output = crate::codegen::i64x4;
-}
-impl Shuffle<[u32; 8]> for i64 {
-    type Output = crate::codegen::i64x8;
-}
+impl_shuffle! { [u32; 2], i64, crate::codegen::i64x2 }
+impl_shuffle! { [u32; 4], i64, crate::codegen::i64x4 }
+impl_shuffle! { [u32; 8], i64, crate::codegen::i64x8 }

 /* FIXME: 64-bit single element vector
-impl Shuffle<[u32; 1]> for u64 {
-    type Output = crate::codegen::u64x1;
-}
+impl_shuffle! { [u32; 1], i64, crate::codegen::i64x1 }
 */
-impl Shuffle<[u32; 2]> for u64 {
-    type Output = crate::codegen::u64x2;
-}
-impl Shuffle<[u32; 4]> for u64 {
-    type Output = crate::codegen::u64x4;
-}
-impl Shuffle<[u32; 8]> for u64 {
-    type Output = crate::codegen::u64x8;
-}
+impl_shuffle! { [u32; 2], u64, crate::codegen::u64x2 }
+impl_shuffle! { [u32; 4], u64, crate::codegen::u64x4 }
+impl_shuffle! { [u32; 8], u64, crate::codegen::u64x8 }

 /* FIXME: 64-bit single element vector
-impl Shuffle<[u32; 1]> for f64 {
-    type Output = crate::codegen::f64x1;
-}
+impl_shuffle! { [u32; 1], i64, crate::codegen::i64x1 }
 */
-impl Shuffle<[u32; 2]> for f64 {
-    type Output = crate::codegen::f64x2;
-}
-impl Shuffle<[u32; 4]> for f64 {
-    type Output = crate::codegen::f64x4;
-}
-impl Shuffle<[u32; 8]> for f64 {
-    type Output = crate::codegen::f64x8;
-}
+impl_shuffle! { [u32; 2], f64, crate::codegen::f64x2 }
+impl_shuffle! { [u32; 4], f64, crate::codegen::f64x4 }
+impl_shuffle! { [u32; 8], f64, crate::codegen::f64x8 }

 /* FIXME: 64-bit single element vector
-impl Shuffle<[u32; 1]> for m64 {
-    type Output = crate::codegen::m64x1;
-}
+impl_shuffle! { [u32; 1], i64, crate::codegen::i64x1 }
 */
-impl Shuffle<[u32; 2]> for m64 {
-    type Output = crate::codegen::m64x2;
-}
-impl Shuffle<[u32; 4]> for m64 {
-    type Output = crate::codegen::m64x4;
-}
-impl Shuffle<[u32; 8]> for m64 {
-    type Output = crate::codegen::m64x8;
-}
+impl_shuffle! { [u32; 2], m64, crate::codegen::m64x2 }
+impl_shuffle! { [u32; 4], m64, crate::codegen::m64x4 }
+impl_shuffle! { [u32; 8], m64, crate::codegen::m64x8 }

-impl Shuffle<[u32; 2]> for isize {
-    type Output = crate::codegen::isizex2;
-}
-impl Shuffle<[u32; 4]> for isize {
-    type Output = crate::codegen::isizex4;
-}
-impl Shuffle<[u32; 8]> for isize {
-    type Output = crate::codegen::isizex8;
-}
+impl_shuffle! { [u32; 2], isize, crate::codegen::isizex2 }
+impl_shuffle! { [u32; 4], isize, crate::codegen::isizex4 }
+impl_shuffle! { [u32; 8], isize, crate::codegen::isizex8 }

-impl Shuffle<[u32; 2]> for usize {
-    type Output = crate::codegen::usizex2;
-}
-impl Shuffle<[u32; 4]> for usize {
-    type Output = crate::codegen::usizex4;
-}
-impl Shuffle<[u32; 8]> for usize {
-    type Output = crate::codegen::usizex8;
-}
+impl_shuffle! { [u32; 2], usize, crate::codegen::usizex2 }
+impl_shuffle! { [u32; 4], usize, crate::codegen::usizex4 }
+impl_shuffle! { [u32; 8], usize, crate::codegen::usizex8 }

+impl_shuffle! { [u32; 2], msize, crate::codegen::msizex2 }
+impl_shuffle! { [u32; 4], msize, crate::codegen::msizex4 }
+impl_shuffle! { [u32; 8], msize, crate::codegen::msizex8 }
+
+impl<T> Seal<[u32; 2]> for *const T {}
 impl<T> Shuffle<[u32; 2]> for *const T {
    type Output = crate::codegen::cptrx2<T>;
 }
+impl<T> Seal<[u32; 4]> for *const T {}
 impl<T> Shuffle<[u32; 4]> for *const T {
    type Output = crate::codegen::cptrx4<T>;
 }
+impl<T> Seal<[u32; 8]> for *const T {}
 impl<T> Shuffle<[u32; 8]> for *const T {
    type Output = crate::codegen::cptrx8<T>;
 }

+impl<T> Seal<[u32; 2]> for *mut T {}
 impl<T> Shuffle<[u32; 2]> for *mut T {
    type Output = crate::codegen::mptrx2<T>;
 }
+impl<T> Seal<[u32; 4]> for *mut T {}
 impl<T> Shuffle<[u32; 4]> for *mut T {
    type Output = crate::codegen::mptrx4<T>;
 }
+impl<T> Seal<[u32; 8]> for *mut T {}
 impl<T> Shuffle<[u32; 8]> for *mut T {
    type Output = crate::codegen::mptrx8<T>;
 }

-impl Shuffle<[u32; 2]> for msize {
-    type Output = crate::codegen::msizex2;
-}
-impl Shuffle<[u32; 4]> for msize {
-    type Output = crate::codegen::msizex4;
-}
-impl Shuffle<[u32; 8]> for msize {
-    type Output = crate::codegen::msizex8;
-}
+impl_shuffle! { [u32; 1], i128, crate::codegen::i128x1 }
+impl_shuffle! { [u32; 2], i128, crate::codegen::i128x2 }
+impl_shuffle! { [u32; 4], i128, crate::codegen::i128x4 }

-impl Shuffle<[u32; 1]> for i128 {
-    type Output = crate::codegen::i128x1;
-}
-impl Shuffle<[u32; 2]> for i128 {
-    type Output = crate::codegen::i128x2;
-}
-impl Shuffle<[u32; 4]> for i128 {
-    type Output = crate::codegen::i128x4;
-}
+impl_shuffle! { [u32; 1], u128, crate::codegen::u128x1 }
+impl_shuffle! { [u32; 2], u128, crate::codegen::u128x2 }
+impl_shuffle! { [u32; 4], u128, crate::codegen::u128x4 }

-impl Shuffle<[u32; 1]> for u128 {
-    type Output = crate::codegen::u128x1;
-}
-impl Shuffle<[u32; 2]> for u128 {
-    type Output = crate::codegen::u128x2;
-}
-impl Shuffle<[u32; 4]> for u128 {
-    type Output = crate::codegen::u128x4;
-}
-
-impl Shuffle<[u32; 1]> for m128 {
-    type Output = crate::codegen::m128x1;
-}
-impl Shuffle<[u32; 2]> for m128 {
-    type Output = crate::codegen::m128x2;
-}
-impl Shuffle<[u32; 4]> for m128 {
-    type Output = crate::codegen::m128x4;
-}
+impl_shuffle! { [u32; 1], m128, crate::codegen::m128x1 }
+impl_shuffle! { [u32; 2], m128, crate::codegen::m128x2 }
+impl_shuffle! { [u32; 4], m128, crate::codegen::m128x4 }
--- a/third_party/rust/packed_simd/src/codegen/shuffle1_dyn.rs
+++ b/third_party/rust/packed_simd/src/codegen/shuffle1_dyn.rs
@ -28,31 +28,10 @@ macro_rules! impl_fallback {
 macro_rules! impl_shuffle1_dyn {
    (u8x8) => {
        cfg_if! {
-            if #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"),
-                         target_feature = "ssse3"))] {
-                impl Shuffle1Dyn for u8x8 {
-                    type Indices = Self;
-                    #[inline]
-                    fn shuffle1_dyn(self, indices: Self::Indices) -> Self {
-                        #[cfg(target_arch = "x86")]
-                        use crate::arch::x86::_mm_shuffle_pi8;
-                        #[cfg(target_arch = "x86_64")]
-                        use crate::arch::x86_64::_mm_shuffle_pi8;
-
-                        unsafe {
-                            crate::mem::transmute(
-                                _mm_shuffle_pi8(
-                                    crate::mem::transmute(self.0),
-                                    crate::mem::transmute(indices.0)
-                                )
-                            )
-                        }
-                    }
-                }
-            } else if #[cfg(all(
+            if #[cfg(all(
                any(
-                    all(target_aarch = "aarch64", target_feature = "neon"),
-                    all(target_aarch = "arm", target_feature = "v7",
+                    all(target_arch = "aarch64", target_feature = "neon"),
+                    all(target_arch = "doesnotexist", target_feature = "v7",
                        target_feature = "neon")
                ),
                any(feature = "core_arch", libcore_neon)
@ -62,9 +41,9 @@ macro_rules! impl_shuffle1_dyn {
                    type Indices = Self;
                    #[inline]
                    fn shuffle1_dyn(self, indices: Self::Indices) -> Self {
-                        #[cfg(targt_arch = "aarch64")]
+                        #[cfg(target_arch = "aarch64")]
                        use crate::arch::aarch64::vtbl1_u8;
-                        #[cfg(targt_arch = "arm")]
+                        #[cfg(target_arch = "doesnotexist")]
                        use crate::arch::arm::vtbl1_u8;

                        // This is safe because the binary is compiled with
@ -106,7 +85,7 @@ macro_rules! impl_shuffle1_dyn {
                        }
                    }
                }
-            } else if #[cfg(all(target_aarch = "aarch64", target_feature = "neon",
+            } else if #[cfg(all(target_arch = "aarch64", target_feature = "neon",
                                any(feature = "core_arch", libcore_neon)))] {
                impl Shuffle1Dyn for u8x16 {
                    type Indices = Self;
@ -125,7 +104,7 @@ macro_rules! impl_shuffle1_dyn {
                        }
                    }
                }
-            } else if #[cfg(all(target_aarch = "arm", target_feature = "v7",
+            } else if #[cfg(all(target_arch = "doesnotexist", target_feature = "v7",
                                target_feature = "neon",
                                any(feature = "core_arch", libcore_neon)))] {
                impl Shuffle1Dyn for u8x16 {
--- a/third_party/rust/packed_simd/src/codegen/vPtr.rs
+++ b/third_party/rust/packed_simd/src/codegen/vPtr.rs
@ -8,6 +8,7 @@ macro_rules! impl_simd_ptr {
        pub struct $tuple_id<$ty>($(crate $tys),*);
        //^^^^^^^ leaked through SimdArray

+        impl<$ty> crate::sealed::Seal for [$ptr_ty; $elem_count] {}
        impl<$ty> crate::sealed::SimdArray for [$ptr_ty; $elem_count] {
            type Tuple = $tuple_id<$ptr_ty>;
            type T = $ptr_ty;
@ -15,6 +16,7 @@ macro_rules! impl_simd_ptr {
            type NT = [u32; $elem_count];
        }

+        impl<$ty> crate::sealed::Seal for $tuple_id<$ptr_ty> {}
        impl<$ty> crate::sealed::Simd for $tuple_id<$ptr_ty> {
            type Element = $ptr_ty;
            const LANES: usize = $elem_count;
--- a/third_party/rust/packed_simd/src/lib.rs
+++ b/third_party/rust/packed_simd/src/lib.rs
@ -201,6 +201,7 @@

 #![feature(
    repr_simd,
+    rustc_attrs,
    const_fn,
    platform_intrinsics,
    stdsimd,
@ -209,22 +210,26 @@
    link_llvm_intrinsics,
    core_intrinsics,
    stmt_expr_attributes,
-    align_offset,
-    mmx_target_feature,
    crate_visibility_modifier,
    custom_inner_attributes
 )]
 #![allow(non_camel_case_types, non_snake_case,
-         clippy::cast_possible_truncation,
-         clippy::cast_lossless,
-         clippy::cast_possible_wrap,
-         clippy::cast_precision_loss,
-         // This lint is currently broken for generic code
-         // See https://github.com/rust-lang/rust-clippy/issues/3410
-         clippy::use_self
+        // FIXME: these types are unsound in C FFI already
+        // See https://github.com/rust-lang/rust/issues/53346
+        improper_ctypes_definitions,
+        clippy::cast_possible_truncation,
+        clippy::cast_lossless,
+        clippy::cast_possible_wrap,
+        clippy::cast_precision_loss,
+        // TODO: manually add the `#[must_use]` attribute where appropriate
+        clippy::must_use_candidate,
+        // This lint is currently broken for generic code
+        // See https://github.com/rust-lang/rust-clippy/issues/3410
+        clippy::use_self,
+        clippy::wrong_self_convention,
 )]
 #![cfg_attr(test, feature(hashmap_internals))]
-#![deny(warnings, rust_2018_idioms, clippy::missing_inline_in_public_items)]
+#![deny(rust_2018_idioms, clippy::missing_inline_in_public_items)]
 #![no_std]

 use cfg_if::cfg_if;
@ -256,6 +261,8 @@ mod api;
 mod codegen;
 mod sealed;

+pub use crate::sealed::{Simd as SimdVector, Shuffle, SimdArray, Mask};
+
 /// Packed SIMD vector type.
 ///
 /// # Examples
@ -276,6 +283,8 @@ pub struct Simd<A: sealed::SimdArray>(
    #[doc(hidden)] pub <A as sealed::SimdArray>::Tuple,
 );

+impl<A: sealed::SimdArray> sealed::Seal for Simd<A> {}
+
 /// Wrapper over `T` implementing a lexicoraphical order via the `PartialOrd`
 /// and/or `Ord` traits.
 #[repr(transparent)]
--- a/third_party/rust/packed_simd/src/masks.rs
+++ b/third_party/rust/packed_simd/src/masks.rs
@ -6,7 +6,9 @@ macro_rules! impl_mask_ty {
        #[derive(Copy, Clone)]
        pub struct $id($elem_ty);

+        impl crate::sealed::Seal for $id {}
        impl crate::sealed::Mask for $id {
+            #[inline]
            fn test(&self) -> bool {
                $id::test(self)
            }
--- a/third_party/rust/packed_simd/src/sealed.rs
+++ b/third_party/rust/packed_simd/src/sealed.rs
@ -1,8 +1,11 @@
 //! Sealed traits

+/// A sealed trait, this is logically private to the crate
+/// and will prevent implementations from outside the crate
+pub trait Seal<T = ()> {}
+
 /// Trait implemented by arrays that can be SIMD types.
-#[doc(hidden)]
-pub trait SimdArray {
+pub trait SimdArray: Seal {
    /// The type of the #[repr(simd)] type.
    type Tuple: Copy + Clone;
    /// The element type of the vector.
@ -16,7 +19,7 @@ pub trait SimdArray {
 /// This traits is used to constraint the arguments
 /// and result type of the portable shuffles.
 #[doc(hidden)]
-pub trait Shuffle<Lanes> {
+pub trait Shuffle<Lanes>: Seal<Lanes> {
    // Lanes is a `[u32; N]` where `N` is the number of vector lanes

    /// The result type of the shuffle.
@ -24,8 +27,7 @@ pub trait Shuffle<Lanes> {
 }

 /// This trait is implemented by all SIMD vector types.
-#[doc(hidden)]
-pub trait Simd {
+pub trait Simd: Seal {
    /// Element type of the SIMD vector
    type Element;
    /// The number of elements in the SIMD vector.
@ -35,7 +37,6 @@ pub trait Simd {
 }

 /// This trait is implemented by all mask types
-#[doc(hidden)]
-pub trait Mask {
+pub trait Mask: Seal {
    fn test(&self) -> bool;
 }
--- a/third_party/rust/packed_simd/src/testing/utils.rs
+++ b/third_party/rust/packed_simd/src/testing/utils.rs
@ -1,6 +1,8 @@
 //! Testing utilities

 #![allow(dead_code)]
+// FIXME: Or don't. But it's true this is a problematic comparison.
+#![allow(clippy::neg_cmp_op_on_partial_ord)]

 use crate::{cmp::PartialOrd, fmt::Debug, LexicographicallyOrdered};

@ -19,14 +21,19 @@ pub fn test_lt<T>(
    assert!(a <= b, "{:?}, {:?}", a, b);
    assert!(b >= a, "{:?}, {:?}", a, b);

-    // Irreflexivity
-    assert!(!(a < a), "{:?}, {:?}", a, b);
-    assert!(!(b < b), "{:?}, {:?}", a, b);
-    assert!(!(a > a), "{:?}, {:?}", a, b);
-    assert!(!(b > b), "{:?}, {:?}", a, b);
+    // The elegance of the mathematical expression of irreflexivity is more
+    // than clippy can handle.
+    #[allow(clippy::eq_op)]
+    {
+        // Irreflexivity
+        assert!(!(a < a), "{:?}, {:?}", a, b);
+        assert!(!(b < b), "{:?}, {:?}", a, b);
+        assert!(!(a > a), "{:?}, {:?}", a, b);
+        assert!(!(b > b), "{:?}, {:?}", a, b);

-    assert!(a <= a, "{:?}, {:?}", a, b);
-    assert!(b <= b, "{:?}, {:?}", a, b);
+        assert!(a <= a, "{:?}, {:?}", a, b);
+        assert!(b <= b, "{:?}, {:?}", a, b);
+    }
 }

 /// Tests PartialOrd for `a` and `b` where `a <= b` is true.
@ -38,8 +45,8 @@ pub fn test_le<T>(
    assert!(a <= b, "{:?}, {:?}", a, b);
    assert!(b >= a, "{:?}, {:?}", a, b);

-    assert!(a == b || a < b, "{:?}, {:?}", a, b);
-    assert!(a == b || b > a, "{:?}, {:?}", a, b);
+    assert!(a <= b, "{:?}, {:?}", a, b);
+    assert!(b >= a, "{:?}, {:?}", a, b);

    if a == b {
        assert!(!(a < b), "{:?}, {:?}", a, b);
--- a/third_party/rust/packed_simd/src/v128.rs
+++ b/third_party/rust/packed_simd/src/v128.rs
@ -1,42 +1,42 @@
 //! 128-bit wide vector types
-#![rustfmt::skip]
+#[rustfmt::skip]

 use crate::*;

-impl_i!([i8; 16]: i8x16, m8x16 | i8 | test_v128 |
+impl_i!([i8; 16]: i8x16, m8x16 | i8, u16 | test_v128 |
        x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 |
        From: |
        /// A 128-bit vector with 16 `i8` lanes.
 );
-impl_u!([u8; 16]: u8x16, m8x16 | u8 | test_v128 |
+impl_u!([u8; 16]: u8x16, m8x16 | u8, u16 | test_v128 |
        x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 |
        From: |
        /// A 128-bit vector with 16 `u8` lanes.
 );
-impl_m!([m8; 16]: m8x16 | i8 | test_v128 |
+impl_m!([m8; 16]: m8x16 | i8, u16 | test_v128 |
        x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 |
        From: m16x16 |
        /// A 128-bit vector mask with 16 `m8` lanes.
 );

-impl_i!([i16; 8]: i16x8, m16x8 | i16 | test_v128 | x0, x1, x2, x3, x4, x5, x6, x7 |
+impl_i!([i16; 8]: i16x8, m16x8 | i16, u8 | test_v128 | x0, x1, x2, x3, x4, x5, x6, x7 |
        From: i8x8, u8x8 |
        /// A 128-bit vector with 8 `i16` lanes.
 );
-impl_u!([u16; 8]: u16x8, m16x8 | u16| test_v128 | x0, x1, x2, x3, x4, x5, x6, x7 |
+impl_u!([u16; 8]: u16x8, m16x8 | u16, u8 | test_v128 | x0, x1, x2, x3, x4, x5, x6, x7 |
        From: u8x8 |
        /// A 128-bit vector with 8 `u16` lanes.
 );
-impl_m!([m16; 8]: m16x8 | i16 | test_v128 | x0, x1, x2, x3, x4, x5, x6, x7 |
+impl_m!([m16; 8]: m16x8 | i16, u8 | test_v128 | x0, x1, x2, x3, x4, x5, x6, x7 |
        From: m8x8, m32x8 |
        /// A 128-bit vector mask with 8 `m16` lanes.
 );

-impl_i!([i32; 4]: i32x4, m32x4 | i32 | test_v128 | x0, x1, x2, x3 |
+impl_i!([i32; 4]: i32x4, m32x4 | i32, u8 | test_v128 | x0, x1, x2, x3 |
        From: i8x4, u8x4, i16x4, u16x4  |
        /// A 128-bit vector with 4 `i32` lanes.
 );
-impl_u!([u32; 4]: u32x4, m32x4 | u32| test_v128 | x0, x1, x2, x3 |
+impl_u!([u32; 4]: u32x4, m32x4 | u32, u8 | test_v128 | x0, x1, x2, x3 |
        From: u8x4, u16x4 |
        /// A 128-bit vector with 4 `u32` lanes.
 );
@ -44,16 +44,16 @@ impl_f!([f32; 4]: f32x4, m32x4 | f32 | test_v128 | x0, x1, x2, x3 |
        From: i8x4, u8x4, i16x4, u16x4 |
        /// A 128-bit vector with 4 `f32` lanes.
 );
-impl_m!([m32; 4]: m32x4 | i32 | test_v128 | x0, x1, x2, x3 |
+impl_m!([m32; 4]: m32x4 | i32, u8 | test_v128 | x0, x1, x2, x3 |
        From: m8x4, m16x4, m64x4 |
        /// A 128-bit vector mask with 4 `m32` lanes.
 );

-impl_i!([i64; 2]: i64x2, m64x2 | i64 | test_v128 | x0, x1 |
+impl_i!([i64; 2]: i64x2, m64x2 | i64, u8 | test_v128 | x0, x1 |
        From: i8x2, u8x2, i16x2, u16x2, i32x2, u32x2 |
        /// A 128-bit vector with 2 `i64` lanes.
 );
-impl_u!([u64; 2]: u64x2, m64x2 | u64 | test_v128 | x0, x1 |
+impl_u!([u64; 2]: u64x2, m64x2 | u64, u8 | test_v128 | x0, x1 |
        From: u8x2, u16x2, u32x2 |
        /// A 128-bit vector with 2 `u64` lanes.
 );
@ -61,20 +61,20 @@ impl_f!([f64; 2]: f64x2, m64x2 | f64 | test_v128 | x0, x1 |
        From: i8x2, u8x2, i16x2, u16x2, i32x2, u32x2, f32x2 |
        /// A 128-bit vector with 2 `f64` lanes.
 );
-impl_m!([m64; 2]: m64x2 | i64 | test_v128 | x0, x1 |
+impl_m!([m64; 2]: m64x2 | i64, u8 | test_v128 | x0, x1 |
        From: m8x2, m16x2, m32x2, m128x2 |
        /// A 128-bit vector mask with 2 `m64` lanes.
 );

-impl_i!([i128; 1]: i128x1, m128x1 | i128 | test_v128 | x0 |
+impl_i!([i128; 1]: i128x1, m128x1 | i128, u8 | test_v128 | x0 |
        From: /*i8x1, u8x1, i16x1, u16x1, i32x1, u32x1, i64x1, u64x1 */ | // FIXME: unary small vector types
        /// A 128-bit vector with 1 `i128` lane.
 );
-impl_u!([u128; 1]: u128x1, m128x1 | u128 | test_v128 | x0 |
+impl_u!([u128; 1]: u128x1, m128x1 | u128, u8 | test_v128 | x0 |
        From: /*u8x1, u16x1, u32x1, u64x1 */ | // FIXME: unary small vector types
        /// A 128-bit vector with 1 `u128` lane.
 );
-impl_m!([m128; 1]: m128x1 | i128 | test_v128 | x0 |
+impl_m!([m128; 1]: m128x1 | i128, u8 | test_v128 | x0 |
        From: /*m8x1, m16x1, m32x1, m64x1 */ | // FIXME: unary small vector types
        /// A 128-bit vector mask with 1 `m128` lane.
 );
--- a/third_party/rust/packed_simd/src/v16.rs
+++ b/third_party/rust/packed_simd/src/v16.rs
@ -2,15 +2,15 @@

 use crate::*;

-impl_i!([i8; 2]: i8x2, m8x2 | i8 | test_v16 | x0, x1 |
+impl_i!([i8; 2]: i8x2, m8x2 | i8, u8 | test_v16 | x0, x1 |
        From: |
        /// A 16-bit vector with 2 `i8` lanes.
 );
-impl_u!([u8; 2]: u8x2, m8x2 | u8 | test_v16 | x0, x1 |
+impl_u!([u8; 2]: u8x2, m8x2 | u8, u8 | test_v16 | x0, x1 |
        From: |
        /// A 16-bit vector with 2 `u8` lanes.
 );
-impl_m!([m8; 2]: m8x2 | i8 | test_v16 | x0, x1 |
+impl_m!([m8; 2]: m8x2 | i8, u8 | test_v16 | x0, x1 |
        From: m16x2, m32x2, m64x2, m128x2 |
        /// A 16-bit vector mask with 2 `m8` lanes.
 );
--- a/third_party/rust/packed_simd/src/v256.rs
+++ b/third_party/rust/packed_simd/src/v256.rs
@ -1,48 +1,48 @@
 //! 256-bit wide vector types
-#![rustfmt::skip]
+#[rustfmt::skip]

 use crate::*;

-impl_i!([i8; 32]: i8x32, m8x32 | i8 | test_v256 |
+impl_i!([i8; 32]: i8x32, m8x32 | i8, u32 | test_v256 |
        x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15,
        x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 |
        From: |
        /// A 256-bit vector with 32 `i8` lanes.
 );
-impl_u!([u8; 32]: u8x32, m8x32 | u8 | test_v256 |
+impl_u!([u8; 32]: u8x32, m8x32 | u8, u32 | test_v256 |
        x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15,
        x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 |
        From: |
        /// A 256-bit vector with 32 `u8` lanes.
 );
-impl_m!([m8; 32]: m8x32 | i8 | test_v256 |
+impl_m!([m8; 32]: m8x32 | i8, u32 | test_v256 |
        x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15,
        x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 |
        From:  |
        /// A 256-bit vector mask with 32 `m8` lanes.
 );

-impl_i!([i16; 16]: i16x16, m16x16 | i16 | test_v256 |
+impl_i!([i16; 16]: i16x16, m16x16 | i16, u16 | test_v256 |
        x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 |
        From: i8x16, u8x16 |
        /// A 256-bit vector with 16 `i16` lanes.
 );
-impl_u!([u16; 16]: u16x16, m16x16 | u16 | test_v256 |
+impl_u!([u16; 16]: u16x16, m16x16 | u16, u16 | test_v256 |
        x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 |
        From: u8x16 |
        /// A 256-bit vector with 16 `u16` lanes.
 );
-impl_m!([m16; 16]: m16x16 | i16 | test_v256 |
+impl_m!([m16; 16]: m16x16 | i16, u16 | test_v256 |
        x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 |
        From: m8x16 |
        /// A 256-bit vector mask with 16 `m16` lanes.
 );

-impl_i!([i32; 8]: i32x8, m32x8 | i32 | test_v256 | x0, x1, x2, x3, x4, x5, x6, x7  |
+impl_i!([i32; 8]: i32x8, m32x8 | i32, u8 | test_v256 | x0, x1, x2, x3, x4, x5, x6, x7  |
        From: i8x8, u8x8, i16x8, u16x8 |
        /// A 256-bit vector with 8 `i32` lanes.
 );
-impl_u!([u32; 8]: u32x8, m32x8 | u32 | test_v256 | x0, x1, x2, x3, x4, x5, x6, x7 |
+impl_u!([u32; 8]: u32x8, m32x8 | u32, u8 | test_v256 | x0, x1, x2, x3, x4, x5, x6, x7 |
        From: u8x8, u16x8 |
        /// A 256-bit vector with 8 `u32` lanes.
 );
@ -50,16 +50,16 @@ impl_f!([f32; 8]: f32x8, m32x8 | f32 | test_v256 | x0, x1, x2, x3, x4, x5, x6, x
        From: i8x8, u8x8, i16x8, u16x8 |
        /// A 256-bit vector with 8 `f32` lanes.
 );
-impl_m!([m32; 8]: m32x8 | i32 | test_v256 | x0, x1, x2, x3, x4, x5, x6, x7 |
+impl_m!([m32; 8]: m32x8 | i32, u8 | test_v256 | x0, x1, x2, x3, x4, x5, x6, x7 |
        From: m8x8, m16x8 |
        /// A 256-bit vector mask with 8 `m32` lanes.
 );

-impl_i!([i64; 4]: i64x4, m64x4 | i64 | test_v256 | x0, x1, x2, x3 |
+impl_i!([i64; 4]: i64x4, m64x4 | i64, u8 | test_v256 | x0, x1, x2, x3 |
        From: i8x4, u8x4, i16x4, u16x4, i32x4, u32x4 |
        /// A 256-bit vector with 4 `i64` lanes.
 );
-impl_u!([u64; 4]: u64x4, m64x4 | u64 | test_v256 | x0, x1, x2, x3 |
+impl_u!([u64; 4]: u64x4, m64x4 | u64, u8 | test_v256 | x0, x1, x2, x3 |
        From: u8x4, u16x4, u32x4 |
        /// A 256-bit vector with 4 `u64` lanes.
 );
@ -67,20 +67,20 @@ impl_f!([f64; 4]: f64x4, m64x4 | f64 | test_v256 | x0, x1, x2, x3 |
        From: i8x4, u8x4, i16x4, u16x4, i32x4, u32x4, f32x4 |
        /// A 256-bit vector with 4 `f64` lanes.
 );
-impl_m!([m64; 4]: m64x4 | i64 | test_v256 | x0, x1, x2, x3 |
+impl_m!([m64; 4]: m64x4 | i64, u8 | test_v256 | x0, x1, x2, x3 |
        From: m8x4, m16x4, m32x4 |
        /// A 256-bit vector mask with 4 `m64` lanes.
 );

-impl_i!([i128; 2]: i128x2, m128x2 | i128 | test_v256 | x0, x1 |
+impl_i!([i128; 2]: i128x2, m128x2 | i128, u8 | test_v256 | x0, x1 |
        From: i8x2, u8x2, i16x2, u16x2, i32x2, u32x2, i64x2, u64x2 |
        /// A 256-bit vector with 2 `i128` lanes.
 );
-impl_u!([u128; 2]: u128x2, m128x2 | u128 | test_v256 | x0, x1 |
+impl_u!([u128; 2]: u128x2, m128x2 | u128, u8 | test_v256 | x0, x1 |
        From: u8x2, u16x2, u32x2, u64x2 |
        /// A 256-bit vector with 2 `u128` lanes.
 );
-impl_m!([m128; 2]: m128x2 | i128 | test_v256 | x0, x1 |
+impl_m!([m128; 2]: m128x2 | i128, u8 | test_v256 | x0, x1 |
        From: m8x2, m16x2, m32x2, m64x2 |
        /// A 256-bit vector mask with 2 `m128` lanes.
 );
--- a/third_party/rust/packed_simd/src/v32.rs
+++ b/third_party/rust/packed_simd/src/v32.rs
@ -2,28 +2,28 @@

 use crate::*;

-impl_i!([i8; 4]: i8x4, m8x4 | i8 | test_v32 | x0, x1, x2, x3 |
+impl_i!([i8; 4]: i8x4, m8x4 | i8, u8 | test_v32 | x0, x1, x2, x3 |
        From: |
        /// A 32-bit vector with 4 `i8` lanes.
 );
-impl_u!([u8; 4]: u8x4, m8x4 | u8 | test_v32 | x0, x1, x2, x3 |
+impl_u!([u8; 4]: u8x4, m8x4 | u8, u8 | test_v32 | x0, x1, x2, x3 |
        From: |
        /// A 32-bit vector with 4 `u8` lanes.
 );
-impl_m!([m8; 4]: m8x4 | i8 | test_v32 | x0, x1, x2, x3 |
+impl_m!([m8; 4]: m8x4 | i8, u8 | test_v32 | x0, x1, x2, x3 |
        From: m16x4, m32x4, m64x4 |
        /// A 32-bit vector mask with 4 `m8` lanes.
 );

-impl_i!([i16; 2]: i16x2, m16x2 | i16 | test_v32 | x0, x1 |
+impl_i!([i16; 2]: i16x2, m16x2 | i16, u8 | test_v32 | x0, x1 |
        From: i8x2, u8x2 |
        /// A 32-bit vector with 2 `i16` lanes.
 );
-impl_u!([u16; 2]: u16x2, m16x2 | u16 | test_v32 | x0, x1 |
+impl_u!([u16; 2]: u16x2, m16x2 | u16, u8 | test_v32 | x0, x1 |
        From: u8x2 |
        /// A 32-bit vector with 2 `u16` lanes.
 );
-impl_m!([m16; 2]: m16x2 | i16 | test_v32 | x0, x1 |
+impl_m!([m16; 2]: m16x2 | i16, u8 | test_v32 | x0, x1 |
        From: m8x2, m32x2, m64x2, m128x2 |
        /// A 32-bit vector mask with 2 `m16` lanes.
 );
--- a/third_party/rust/packed_simd/src/v512.rs
+++ b/third_party/rust/packed_simd/src/v512.rs
@ -1,9 +1,9 @@
 //! 512-bit wide vector types
-#![rustfmt::skip]
+#[rustfmt::skip]

 use crate::*;

-impl_i!([i8; 64]: i8x64, m8x64 | i8 | test_v512 |
+impl_i!([i8; 64]: i8x64, m8x64 | i8, u64 | test_v512 |
        x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15,
        x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31,
        x32, x33, x34, x35, x36, x37, x38, x39, x40, x41, x42, x43, x44, x45, x46, x47,
@ -11,7 +11,7 @@ impl_i!([i8; 64]: i8x64, m8x64 | i8 | test_v512 |
        From: |
        /// A 512-bit vector with 64 `i8` lanes.
 );
-impl_u!([u8; 64]: u8x64, m8x64 | u8 | test_v512 |
+impl_u!([u8; 64]: u8x64, m8x64 | u8, u64 | test_v512 |
        x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15,
        x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31,
        x32, x33, x34, x35, x36, x37, x38, x39, x40, x41, x42, x43, x44, x45, x46, x47,
@ -19,7 +19,7 @@ impl_u!([u8; 64]: u8x64, m8x64 | u8 | test_v512 |
        From: |
        /// A 512-bit vector with 64 `u8` lanes.
 );
-impl_m!([m8; 64]: m8x64 | i8 | test_v512 |
+impl_m!([m8; 64]: m8x64 | i8, u64 | test_v512 |
        x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15,
        x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31,
        x32, x33, x34, x35, x36, x37, x38, x39, x40, x41, x42, x43, x44, x45, x46, x47,
@ -28,31 +28,31 @@ impl_m!([m8; 64]: m8x64 | i8 | test_v512 |
        /// A 512-bit vector mask with 64 `m8` lanes.
 );

-impl_i!([i16; 32]: i16x32, m16x32 | i16 | test_v512 |
+impl_i!([i16; 32]: i16x32, m16x32 | i16, u32 | test_v512 |
        x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15,
        x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 |
        From: i8x32, u8x32 |
        /// A 512-bit vector with 32 `i16` lanes.
 );
-impl_u!([u16; 32]: u16x32, m16x32 | u16 | test_v512 |
+impl_u!([u16; 32]: u16x32, m16x32 | u16, u32 | test_v512 |
        x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15,
        x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 |
        From: u8x32 |
        /// A 512-bit vector with 32 `u16` lanes.
 );
-impl_m!([m16; 32]: m16x32 | i16 | test_v512 |
+impl_m!([m16; 32]: m16x32 | i16, u32 | test_v512 |
        x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15,
        x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 |
        From: m8x32 |
        /// A 512-bit vector mask with 32 `m16` lanes.
 );

-impl_i!([i32; 16]: i32x16, m32x16 | i32 | test_v512 |
+impl_i!([i32; 16]: i32x16, m32x16 | i32, u16 | test_v512 |
        x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 |
        From: i8x16, u8x16, i16x16, u16x16 |
        /// A 512-bit vector with 16 `i32` lanes.
 );
-impl_u!([u32; 16]: u32x16, m32x16 | u32 | test_v512 |
+impl_u!([u32; 16]: u32x16, m32x16 | u32, u16 | test_v512 |
        x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 |
        From: u8x16, u16x16 |
        /// A 512-bit vector with 16 `u32` lanes.
@ -62,17 +62,17 @@ impl_f!([f32; 16]: f32x16, m32x16 | f32 | test_v512 |
        From: i8x16, u8x16, i16x16, u16x16 |
        /// A 512-bit vector with 16 `f32` lanes.
 );
-impl_m!([m32; 16]: m32x16 | i32 | test_v512 |
+impl_m!([m32; 16]: m32x16 | i32, u16 | test_v512 |
        x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 |
        From: m8x16, m16x16 |
        /// A 512-bit vector mask with 16 `m32` lanes.
 );

-impl_i!([i64; 8]: i64x8, m64x8 | i64 | test_v512 | x0, x1, x2, x3, x4, x5, x6, x7 |
+impl_i!([i64; 8]: i64x8, m64x8 | i64, u8 | test_v512 | x0, x1, x2, x3, x4, x5, x6, x7 |
        From: i8x8, u8x8, i16x8, u16x8, i32x8, u32x8 |
        /// A 512-bit vector with 8 `i64` lanes.
 );
-impl_u!([u64; 8]: u64x8, m64x8 | u64 | test_v512 | x0, x1, x2, x3, x4, x5, x6, x7 |
+impl_u!([u64; 8]: u64x8, m64x8 | u64, u8 | test_v512 | x0, x1, x2, x3, x4, x5, x6, x7 |
        From: u8x8, u16x8, u32x8 |
        /// A 512-bit vector with 8 `u64` lanes.
 );
@ -80,20 +80,20 @@ impl_f!([f64; 8]: f64x8, m64x8 | f64 | test_v512 | x0, x1, x2, x3, x4, x5, x6, x
        From: i8x8, u8x8, i16x8, u16x8, i32x8, u32x8, f32x8 |
        /// A 512-bit vector with 8 `f64` lanes.
 );
-impl_m!([m64; 8]: m64x8 | i64 | test_v512 | x0, x1, x2, x3, x4, x5, x6, x7 |
+impl_m!([m64; 8]: m64x8 | i64, u8 | test_v512 | x0, x1, x2, x3, x4, x5, x6, x7 |
        From: m8x8, m16x8, m32x8 |
        /// A 512-bit vector mask with 8 `m64` lanes.
 );

-impl_i!([i128; 4]: i128x4, m128x4 | i128 | test_v512 | x0, x1, x2, x3 |
+impl_i!([i128; 4]: i128x4, m128x4 | i128, u8 | test_v512 | x0, x1, x2, x3 |
        From: i8x4, u8x4, i16x4, u16x4, i32x4, u32x4, i64x4, u64x4 |
        /// A 512-bit vector with 4 `i128` lanes.
 );
-impl_u!([u128; 4]: u128x4, m128x4 | u128 | test_v512 | x0, x1, x2, x3 |
+impl_u!([u128; 4]: u128x4, m128x4 | u128, u8 | test_v512 | x0, x1, x2, x3 |
        From: u8x4, u16x4, u32x4, u64x4 |
        /// A 512-bit vector with 4 `u128` lanes.
 );
-impl_m!([m128; 4]: m128x4 | i128 | test_v512 | x0, x1, x2, x3 |
+impl_m!([m128; 4]: m128x4 | i128, u8 | test_v512 | x0, x1, x2, x3 |
        From: m8x4, m16x4, m32x4, m64x4 |
        /// A 512-bit vector mask with 4 `m128` lanes.
 );
--- a/third_party/rust/packed_simd/src/v64.rs
+++ b/third_party/rust/packed_simd/src/v64.rs
@ -1,43 +1,43 @@
 //! 64-bit wide vector types
-#![rustfmt::skip]
+#[rustfmt::skip]

 use super::*;

-impl_i!([i8; 8]: i8x8, m8x8 | i8 | test_v64 | x0, x1, x2, x3, x4, x5, x6, x7 |
+impl_i!([i8; 8]: i8x8, m8x8 | i8, u8 | test_v64 | x0, x1, x2, x3, x4, x5, x6, x7 |
        From: |
        /// A 64-bit vector with 8 `i8` lanes.
 );
-impl_u!([u8; 8]: u8x8, m8x8 | u8 | test_v64 | x0, x1, x2, x3, x4, x5, x6, x7 |
+impl_u!([u8; 8]: u8x8, m8x8 | u8, u8 | test_v64 | x0, x1, x2, x3, x4, x5, x6, x7 |
        From: |
        /// A 64-bit vector with 8 `u8` lanes.
 );
-impl_m!([m8; 8]: m8x8 | i8 | test_v64 | x0, x1, x2, x3, x4, x5, x6, x7 |
+impl_m!([m8; 8]: m8x8 | i8, u8 | test_v64 | x0, x1, x2, x3, x4, x5, x6, x7 |
        From: m16x8, m32x8 |
        /// A 64-bit vector mask with 8 `m8` lanes.
 );

-impl_i!([i16; 4]: i16x4, m16x4 | i16 | test_v64 | x0, x1, x2, x3 |
+impl_i!([i16; 4]: i16x4, m16x4 | i16, u8 | test_v64 | x0, x1, x2, x3 |
        From: i8x4, u8x4 |
        /// A 64-bit vector with 4 `i16` lanes.
 );
-impl_u!([u16; 4]: u16x4, m16x4 | u16 | test_v64 | x0, x1, x2, x3 |
+impl_u!([u16; 4]: u16x4, m16x4 | u16, u8 | test_v64 | x0, x1, x2, x3 |
        From: u8x4 |
        /// A 64-bit vector with 4 `u16` lanes.
 );
-impl_m!([m16; 4]: m16x4 | i16 | test_v64 | x0, x1, x2, x3 |
+impl_m!([m16; 4]: m16x4 | i16, u8 | test_v64 | x0, x1, x2, x3 |
        From: m8x4, m32x4, m64x4 |
        /// A 64-bit vector mask with 4 `m16` lanes.
 );

-impl_i!([i32; 2]: i32x2, m32x2 | i32 | test_v64 | x0, x1 |
+impl_i!([i32; 2]: i32x2, m32x2 | i32, u8 | test_v64 | x0, x1 |
        From: i8x2, u8x2, i16x2, u16x2 |
        /// A 64-bit vector with 2 `i32` lanes.
 );
-impl_u!([u32; 2]: u32x2, m32x2 | u32 | test_v64 | x0, x1 |
+impl_u!([u32; 2]: u32x2, m32x2 | u32, u8 | test_v64 | x0, x1 |
        From: u8x2, u16x2 |
        /// A 64-bit vector with 2 `u32` lanes.
 );
-impl_m!([m32; 2]: m32x2 | i32 | test_v64 | x0, x1 |
+impl_m!([m32; 2]: m32x2 | i32, u8 | test_v64 | x0, x1 |
        From: m8x2, m16x2, m64x2, m128x2 |
        /// A 64-bit vector mask with 2 `m32` lanes.
 );
@ -47,15 +47,15 @@ impl_f!([f32; 2]: f32x2, m32x2 | f32 | test_v64 | x0, x1 |
 );

 /*
-impl_i!([i64; 1]: i64x1, m64x1 | i64 | test_v64 | x0 |
+impl_i!([i64; 1]: i64x1, m64x1 | i64, u8 | test_v64 | x0 |
        From: /*i8x1, u8x1, i16x1, u16x1, i32x1, u32x1*/ |  // FIXME: primitive to vector conversion
        /// A 64-bit vector with 1 `i64` lanes.
 );
-impl_u!([u64; 1]: u64x1, m64x1 | u64 | test_v64 | x0 |
+impl_u!([u64; 1]: u64x1, m64x1 | u64, u8 | test_v64 | x0 |
        From: /*u8x1, u16x1, u32x1*/ | // FIXME: primitive to vector conversion
        /// A 64-bit vector with 1 `u64` lanes.
 );
-impl_m!([m64; 1]: m64x1 | i64 | test_v64 | x0 |
+impl_m!([m64; 1]: m64x1 | i64, u8 | test_v64 | x0 |
        From: /*m8x1, m16x1, m32x1, */ m128x1 | // FIXME: unary small vector types
        /// A 64-bit vector mask with 1 `m64` lanes.
 );
--- a/third_party/rust/packed_simd/src/vPtr.rs
+++ b/third_party/rust/packed_simd/src/vPtr.rs
@ -1,5 +1,5 @@
 //! Vectors of pointers
-#![rustfmt::skip]
+#[rustfmt::skip]

 use crate::*;

--- a/third_party/rust/packed_simd/src/vSize.rs
+++ b/third_party/rust/packed_simd/src/vSize.rs
@ -3,50 +3,50 @@
 use crate::codegen::pointer_sized_int::{isize_, usize_};
 use crate::*;

-impl_i!([isize; 2]: isizex2, msizex2 | isize_ | test_v128 |
+impl_i!([isize; 2]: isizex2, msizex2 | isize_, u8 | test_v128 |
        x0, x1|
        From: |
        /// A vector with 2 `isize` lanes.
 );

-impl_u!([usize; 2]: usizex2, msizex2 | usize_ | test_v128 |
+impl_u!([usize; 2]: usizex2, msizex2 | usize_, u8 | test_v128 |
        x0, x1|
        From: |
        /// A vector with 2 `usize` lanes.
 );
-impl_m!([msize; 2]: msizex2 | isize_ | test_v128 |
+impl_m!([msize; 2]: msizex2 | isize_, u8 | test_v128 |
        x0, x1 |
        From: |
        /// A vector mask with 2 `msize` lanes.
 );

-impl_i!([isize; 4]: isizex4, msizex4 | isize_ | test_v256 |
+impl_i!([isize; 4]: isizex4, msizex4 | isize_, u8 | test_v256 |
        x0, x1, x2, x3 |
        From: |
        /// A vector with 4 `isize` lanes.
 );
-impl_u!([usize; 4]: usizex4, msizex4 | usize_ | test_v256 |
+impl_u!([usize; 4]: usizex4, msizex4 | usize_, u8 | test_v256 |
        x0, x1, x2, x3|
        From: |
        /// A vector with 4 `usize` lanes.
 );
-impl_m!([msize; 4]: msizex4 | isize_ | test_v256 |
+impl_m!([msize; 4]: msizex4 | isize_, u8 | test_v256 |
        x0, x1, x2, x3 |
        From: |
        /// A vector mask with 4 `msize` lanes.
 );

-impl_i!([isize; 8]: isizex8, msizex8 | isize_ | test_v512 |
+impl_i!([isize; 8]: isizex8, msizex8 | isize_, u8 | test_v512 |
        x0, x1, x2, x3, x4, x5, x6, x7 |
        From: |
-        /// A vector with 4 `isize` lanes.
+        /// A vector with 8 `isize` lanes.
 );
-impl_u!([usize; 8]: usizex8, msizex8 | usize_ | test_v512 |
+impl_u!([usize; 8]: usizex8, msizex8 | usize_, u8 | test_v512 |
        x0, x1, x2, x3, x4, x5, x6, x7 |
        From: |
        /// A vector with 8 `usize` lanes.
 );
-impl_m!([msize; 8]: msizex8 | isize_ | test_v512 |
+impl_m!([msize; 8]: msizex8 | isize_, u8 | test_v512 |
        x0, x1, x2, x3, x4, x5, x6, x7 |
        From: |
        /// A vector mask with 8 `msize` lanes.
--- a/third_party/rust/packed_simd/tests/endianness.rs
+++ b/third_party/rust/packed_simd/tests/endianness.rs
@ -17,7 +17,7 @@ fn endian_indexing() {
 #[cfg_attr(not(target_arch = "wasm32"), test)]
 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
 fn endian_bitcasts() {
-    #[cfg_attr(rustfmt, rustfmt_skip)]
+    #[rustfmt::skip]
    let x = i8x16::new(
        0, 1, 2, 3, 4, 5, 6, 7,
        8, 9, 10, 11, 12, 13, 14, 15,
@ -34,13 +34,13 @@ fn endian_bitcasts() {
 #[cfg_attr(not(target_arch = "wasm32"), test)]
 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
 fn endian_casts() {
-    #[cfg_attr(rustfmt, rustfmt_skip)]
+    #[rustfmt::skip]
    let x = i8x16::new(
        0, 1, 2, 3, 4, 5, 6, 7,
        8, 9, 10, 11, 12, 13, 14, 15,
    );
    let t: i16x16 = x.into(); // simd_cast
-    #[cfg_attr(rustfmt, rustfmt_skip)]
+    #[rustfmt::skip]
    let e = i16x16::new(
        0, 1, 2, 3, 4, 5, 6, 7,
        8, 9, 10, 11, 12, 13, 14, 15,
@ -51,7 +51,7 @@ fn endian_casts() {
 #[cfg_attr(not(target_arch = "wasm32"), test)]
 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
 fn endian_load_and_stores() {
-    #[cfg_attr(rustfmt, rustfmt_skip)]
+    #[rustfmt::skip]
    let x = i8x16::new(
        0, 1, 2, 3, 4, 5, 6, 7,
        8, 9, 10, 11, 12, 13, 14, 15,
@ -82,10 +82,15 @@ fn endian_array_union() {
        vec: f32x4,
    }
    let x: [f32; 4] = unsafe { A { vec: f32x4::new(0., 1., 2., 3.) }.data };
-    assert_eq!(x[0], 0_f32);
-    assert_eq!(x[1], 1_f32);
-    assert_eq!(x[2], 2_f32);
-    assert_eq!(x[3], 3_f32);
+    // As all of these are integer values within the mantissa^1 range, it
+    // would be very unusual for them to actually fail to compare.
+    #[allow(clippy::float_cmp)]
+    {
+        assert_eq!(x[0], 0_f32);
+        assert_eq!(x[1], 1_f32);
+        assert_eq!(x[2], 2_f32);
+        assert_eq!(x[3], 3_f32);
+    }
    let y: f32x4 = unsafe { A { data: [3., 2., 1., 0.] }.vec };
    assert_eq!(y, f32x4::new(3., 2., 1., 0.));

@ -93,23 +98,23 @@ fn endian_array_union() {
        data: [i8; 16],
        vec: i8x16,
    }
-    #[cfg_attr(rustfmt, rustfmt_skip)]
+    #[rustfmt::skip]
    let x = i8x16::new(
        0, 1, 2, 3, 4, 5, 6, 7,
        8, 9, 10, 11, 12, 13, 14, 15,
    );
    let x: [i8; 16] = unsafe { B { vec: x }.data };

-    for i in 0..16 {
-        assert_eq!(x[i], i as i8);
+    for (i, v) in x.iter().enumerate() {
+        assert_eq!(i as i8, *v);
    }

-    #[cfg_attr(rustfmt, rustfmt_skip)]
+    #[rustfmt::skip]
    let y = [
        15, 14, 13, 12, 11, 19, 9, 8,
        7, 6, 5, 4, 3, 2, 1, 0
    ];
-    #[cfg_attr(rustfmt, rustfmt_skip)]
+    #[rustfmt::skip]
    let e = i8x16::new(
        15, 14, 13, 12, 11, 19, 9, 8,
        7, 6, 5, 4, 3, 2, 1, 0
@ -121,7 +126,7 @@ fn endian_array_union() {
        data: [i16; 8],
        vec: i8x16,
    }
-    #[cfg_attr(rustfmt, rustfmt_skip)]
+    #[rustfmt::skip]
    let x = i8x16::new(
        0, 1, 2, 3, 4, 5, 6, 7,
        8, 9, 10, 11, 12, 13, 14, 15,
@ -145,21 +150,26 @@ fn endian_tuple_access() {
        vec: f32x4,
    }
    let x: F32x4T = unsafe { A { vec: f32x4::new(0., 1., 2., 3.) }.data };
-    assert_eq!(x.0, 0_f32);
-    assert_eq!(x.1, 1_f32);
-    assert_eq!(x.2, 2_f32);
-    assert_eq!(x.3, 3_f32);
+    // As all of these are integer values within the mantissa^1 range, it
+    // would be very unusual for them to actually fail to compare.
+    #[allow(clippy::float_cmp)]
+    {
+        assert_eq!(x.0, 0_f32);
+        assert_eq!(x.1, 1_f32);
+        assert_eq!(x.2, 2_f32);
+        assert_eq!(x.3, 3_f32);
+    }
    let y: f32x4 = unsafe { A { data: (3., 2., 1., 0.) }.vec };
    assert_eq!(y, f32x4::new(3., 2., 1., 0.));

-    #[cfg_attr(rustfmt, rustfmt_skip)]
+    #[rustfmt::skip]
    type I8x16T = (i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8);
    union B {
        data: I8x16T,
        vec: i8x16,
    }

-    #[cfg_attr(rustfmt, rustfmt_skip)]
+    #[rustfmt::skip]
    let x = i8x16::new(
        0, 1, 2, 3, 4, 5, 6, 7,
        8, 9, 10, 11, 12, 13, 14, 15,
@ -183,27 +193,27 @@ fn endian_tuple_access() {
    assert_eq!(x.14, 14);
    assert_eq!(x.15, 15);

-    #[cfg_attr(rustfmt, rustfmt_skip)]
+    #[rustfmt::skip]
    let y = (
        15, 14, 13, 12, 11, 10, 9, 8,
        7, 6, 5, 4, 3, 2, 1, 0
    );
    let z: i8x16 = unsafe { B { data: y }.vec };
-    #[cfg_attr(rustfmt, rustfmt_skip)]
+    #[rustfmt::skip]
    let e = i8x16::new(
        15, 14, 13, 12, 11, 10, 9, 8,
        7, 6, 5, 4, 3, 2, 1, 0
    );
    assert_eq!(e, z);

-    #[cfg_attr(rustfmt, rustfmt_skip)]
+    #[rustfmt::skip]
    type I16x8T = (i16, i16, i16, i16, i16, i16, i16, i16);
    union C {
        data: I16x8T,
        vec: i8x16,
    }

-    #[cfg_attr(rustfmt, rustfmt_skip)]
+    #[rustfmt::skip]
    let x = i8x16::new(
        0, 1, 2, 3, 4, 5, 6, 7,
        8, 9, 10, 11, 12, 13, 14, 15,
@ -224,7 +234,7 @@ fn endian_tuple_access() {
    assert_eq!(x.6, e[6]);
    assert_eq!(x.7, e[7]);

-    #[cfg_attr(rustfmt, rustfmt_skip)]
+    #[rustfmt::skip]
    #[repr(C)]
    #[derive(Copy ,Clone)]
    pub struct Tup(pub i8, pub i8, pub i16, pub i8, pub i8, pub i16,
@ -235,7 +245,7 @@ fn endian_tuple_access() {
        vec: i8x16,
    }

-    #[cfg_attr(rustfmt, rustfmt_skip)]
+    #[rustfmt::skip]
    let x = i8x16::new(
        0, 1, 2, 3, 4, 5, 6, 7,
        8, 9, 10, 11, 12, 13, 14, 15,