Bug 1712411 - Update jpeg-xl to 44778c6902084bd239c5fb8eaa53bfd90dd9face r=saschanaz

Differential Revision: https://phabricator.services.mozilla.com/D115751
2024-11-23 21:01:08 +00:00 · 2021-05-22 13:48:06 +00:00 · 2021-05-22 13:48:06 +00:00 · d289e4d878
commit d289e4d878
parent cbbe40bbe5
28 changed files with 538 additions and 181 deletions
--- a/media/libjxl/moz.yaml
+++ b/media/libjxl/moz.yaml
@ -20,12 +20,12 @@ origin:

  # Human-readable identifier for this version/release
  # Generally "version NNN", "tag SSS", "bookmark SSS"
-  release: commit 100e3c7e8a051b7399d0505c22640488287a31a8 (2021-05-18T15:01:53.000+02:00).
+  release: commit 44778c6902084bd239c5fb8eaa53bfd90dd9face (2021-05-21T20:39:54.000+02:00).

  # Revision to pull in
  # Must be a long or short commit SHA (long preferred)
  # NOTE(krosylight): Update highway together when updating this!
-  revision: 100e3c7e8a051b7399d0505c22640488287a31a8
+  revision: 44778c6902084bd239c5fb8eaa53bfd90dd9face

  # The package's license, where possible using the mnemonic from
  # https://spdx.org/licenses/
--- a/third_party/jpeg-xl/ci.sh
+++ b/third_party/jpeg-xl/ci.sh
@ -233,12 +233,16 @@ MR_ANCESTOR_SHA=""
 # Populate MR_HEAD_SHA and MR_ANCESTOR_SHA.
 merge_request_commits() {
  { set +x; } 2>/dev/null
+  # GITHUB_SHA is the current reference being build in GitHub Actions.
+  if [[ -n "${GITHUB_SHA:-}" ]]; then
+    git -C "${MYDIR}" fetch -q origin "${GITHUB_SHA}"
+    MR_HEAD_SHA="${GITHUB_SHA}"
+  else
    # CI_BUILD_REF is the reference currently being build in the CI workflow.
    MR_HEAD_SHA=$(git -C "${MYDIR}" rev-parse -q "${CI_BUILD_REF:-HEAD}")
-  if [[ -z "${CI_MERGE_REQUEST_IID:-}" ]]; then
-    # We are in a local branch, not a merge request.
-    MR_ANCESTOR_SHA=$(git -C "${MYDIR}" rev-parse -q HEAD@{upstream} || true)
-  else
+  fi
+
+  if [[ -n "${CI_MERGE_REQUEST_IID:-}" ]]; then
    # Merge request pipeline in CI. In this case the upstream is called "origin"
    # but it refers to the forked project that's the source of the merge
    # request. We need to get the target of the merge request, for which we need
@ -248,15 +252,28 @@ merge_request_commits() {
    git -C "${MYDIR}" fetch "${CI_MERGE_REQUEST_PROJECT_URL}" \
      "${CI_MERGE_REQUEST_TARGET_BRANCH_NAME}"
    MR_ANCESTOR_SHA=$(git -C "${MYDIR}" rev-parse -q FETCH_HEAD)
+  elif [[ -n "${GITHUB_BASE_REF:-}" ]]; then
+    # Pull request workflow in GitHub Actions. GitHub checkout action uses
+    # "origin" as the remote for the git checkout.
+    git -C "${MYDIR}" fetch -q origin "${GITHUB_BASE_REF}"
+    MR_ANCESTOR_SHA=$(git -C "${MYDIR}" rev-parse -q FETCH_HEAD)
+  else
+    # We are in a local branch, not a merge request.
+    MR_ANCESTOR_SHA=$(git -C "${MYDIR}" rev-parse -q HEAD@{upstream} || true)
  fi
+
  if [[ -z "${MR_ANCESTOR_SHA}" ]]; then
    echo "Warning, not tracking any branch, using the last commit in HEAD.">&2
    # This prints the return value with just HEAD.
    MR_ANCESTOR_SHA=$(git -C "${MYDIR}" rev-parse -q "${MR_HEAD_SHA}^")
  else
-    MR_ANCESTOR_SHA=$(git -C "${MYDIR}" merge-base --all \
+    # GitHub runs the pipeline on a merge commit, no need to look for the common
+    # ancestor in that case.
+    if [[ -z "${GITHUB_BASE_REF:-}" ]]; then
+      MR_ANCESTOR_SHA=$(git -C "${MYDIR}" merge-base \
        "${MR_ANCESTOR_SHA}" "${MR_HEAD_SHA}")
    fi
+  fi
  set -x
 }

--- a/third_party/jpeg-xl/examples/jxlinfo.c
+++ b/third_party/jpeg-xl/examples/jxlinfo.c
@ -46,7 +46,7 @@ int PrintBasicInfo(FILE* file) {
  int seen_basic_info = 0;

  for (;;) {
-    // The firs time, this will output JXL_DEC_NEED_MORE_INPUT because no
+    // The first time, this will output JXL_DEC_NEED_MORE_INPUT because no
    // input is set yet, this is ok since the input is set when handling this
    // event.
    JxlDecoderStatus status = JxlDecoderProcessInput(dec);
@ -55,7 +55,7 @@ int PrintBasicInfo(FILE* file) {
      fprintf(stderr, "Decoder error\n");
      break;
    } else if (status == JXL_DEC_NEED_MORE_INPUT) {
-      // The firstt time there is nothing to release and it returns 0, but that
+      // The first time there is nothing to release and it returns 0, but that
      // is ok.
      size_t remaining = JxlDecoderReleaseInput(dec);
      // move any remaining bytes to the front if necessary
@ -67,6 +67,10 @@ int PrintBasicInfo(FILE* file) {
      data = (uint8_t*)realloc(data, remaining + chunk_size);
      // append bytes read from the file behind the remaining bytes
      size_t read_size = fread(data + remaining, 1, chunk_size, file);
+      if (read_size == 0 && feof(file)) {
+        fprintf(stderr, "Unexpected EOF\n");
+        break;
+      }
      data_size = remaining + read_size;
      JxlDecoderSetInput(dec, data, data_size);
    } else if (status == JXL_DEC_SUCCESS) {
--- a/third_party/jpeg-xl/lib/extras/codec_png.cc
+++ b/third_party/jpeg-xl/lib/extras/codec_png.cc
@ -790,6 +790,7 @@ Status DecodeImagePNG(const Span<const uint8_t> bytes, ThreadPool* pool,
  JXL_RETURN_IF_ERROR(ok);
  io->dec_pixels = w * h;
  io->metadata.m.bit_depth.bits_per_sample = io->Main().DetectRealBitdepth();
+  io->metadata.m.xyb_encoded = false;
  SetIntensityTarget(io);
  if (!reader.HaveColorProfile()) {
    JXL_RETURN_IF_ERROR(ApplyHints(is_gray, io));
--- a/third_party/jpeg-xl/lib/extras/codec_psd.cc
+++ b/third_party/jpeg-xl/lib/extras/codec_psd.cc
@ -577,18 +577,26 @@ Status DecodeImagePSD(const Span<const uint8_t> bytes, ThreadPool* pool,
    std::vector<int> chan_id(real_nb_channels);
    std::iota(chan_id.begin(), chan_id.end(), 0);
    std::vector<bool> invert(real_nb_channels, false);
+    if (static_cast<int>(spotcolor.size()) + colormodel + 1 <
+        real_nb_channels) {
+      return JXL_FAILURE("Inconsistent layer configuration");
+    }
    if (!merged_has_alpha) {
+      if (colormodel <= real_nb_channels) {
+        return JXL_FAILURE("Inconsistent layer configuration");
+      }
      chan_id.erase(chan_id.begin() + colormodel);
      invert.erase(invert.begin() + colormodel);
-    } else
+    } else {
      colormodel++;
+    }
    for (size_t i = colormodel; i < invert.size(); i++) {
      if (spotcolor[i - colormodel][5] == 2) invert[i] = true;
      if (spotcolor[i - colormodel][5] == 0) invert[i] = true;
    }
-    JXL_RETURN_IF_ERROR(decode_layer(pos, maxpos, layer, chan_id, invert,
-                                     layer.xsize(), layer.ysize(), version,
-                                     (have_only_merged ? 0 : colormodel), false, bitdepth));
+    JXL_RETURN_IF_ERROR(decode_layer(
+        pos, maxpos, layer, chan_id, invert, layer.xsize(), layer.ysize(),
+        version, (have_only_merged ? 0 : colormodel), false, bitdepth));
  }

  if (io->frames.empty()) return JXL_FAILURE("PSD: no layers");
--- a/third_party/jpeg-xl/lib/include/jxl/decode.h
+++ b/third_party/jpeg-xl/lib/include/jxl/decode.h
@ -135,10 +135,14 @@ typedef enum {
   */
  JXL_DEC_ERROR = 1,

-  /** The decoder needs more input bytes to continue. In the next
-   * JxlDecoderProcessInput call, next_in and avail_in must point to more
-   * bytes to continue. If *avail_in is not 0, the new bytes must be appended to
-   * the *avail_in last previous bytes.
+  /** The decoder needs more input bytes to continue. Before the next
+   * JxlDecoderProcessInput call, more input data must be set, by calling
+   * JxlDecoderReleaseInput (if input was set previously) and then calling
+   * JxlDecoderSetInput. JxlDecoderReleaseInput returns how many bytes are
+   * not yet processed, before a next call to JxlDecoderProcessInput all
+   * unprocessed bytes must be provided again (the address need not match, but
+   * the contents must), and more bytes must be concatenated after the
+   * unprocessed bytes.
   */
  JXL_DEC_NEED_MORE_INPUT = 2,

@ -277,8 +281,8 @@ JxlDecoderSetParallelRunner(JxlDecoder* dec, JxlParallelRunner parallel_runner,
 * Returns a hint indicating how many more bytes the decoder is expected to
 * need to make JxlDecoderGetBasicInfo available after the next
 * JxlDecoderProcessInput call. This is a suggested large enough value for
- * the *avail_in parameter, but it is not guaranteed to be an upper bound nor
- * a lower bound.
+ * the amount of bytes to provide in the next JxlDecoderSetInput call, but it is
+ * not guaranteed to be an upper bound nor a lower bound.
 * Can be used before the first JxlDecoderProcessInput call, and is correct
 * the first time in most cases. If not, JxlDecoderSizeHintBasicInfo can be
 * called again to get an updated hint.
--- a/third_party/jpeg-xl/lib/jxl/alpha.cc
+++ b/third_party/jpeg-xl/lib/jxl/alpha.cc
@ -74,7 +74,7 @@ void PerformAlphaBlending(const float* bg, const float* bga, const float* fg,
 void PerformAlphaWeightedAdd(const float* bg, const float* fg, const float* fga,
                             float* out, size_t num_pixels, bool clamp) {
  if (fg == fga) {
-    memcpy(out, bg, num_pixels * sizeof(out));
+    memcpy(out, bg, num_pixels * sizeof(*out));
  } else {
    for (size_t x = 0; x < num_pixels; ++x) {
      out[x] = bg[x] + fg[x] * Clamp(fga[x]);
--- a/third_party/jpeg-xl/lib/jxl/ans_test.cc
+++ b/third_party/jpeg-xl/lib/jxl/ans_test.cc
@ -195,5 +195,95 @@ TEST(ANSTest, UintConfigRoundtrip) {
  }
 }

+void TestCheckpointing(bool ans, bool lz77) {
+  std::vector<std::vector<Token>> input_values(1);
+  for (size_t i = 0; i < 1024; i++) {
+    input_values[0].push_back(Token(0, i % 4));
+  }
+  // up to lz77 window size.
+  for (size_t i = 0; i < (1 << 20) - 1022; i++) {
+    input_values[0].push_back(Token(0, (i % 5) + 4));
+  }
+  // Ensure that when the window wraps around, new values are different.
+  input_values[0].push_back(Token(0, 0));
+  for (size_t i = 0; i < 1024; i++) {
+    input_values[0].push_back(Token(0, i % 4));
+  }
+
+  std::vector<uint8_t> context_map;
+  EntropyEncodingData codes;
+  HistogramParams params;
+  params.lz77_method = lz77 ? HistogramParams::LZ77Method::kLZ77
+                            : HistogramParams::LZ77Method::kNone;
+  params.force_huffman = !ans;
+
+  BitWriter writer;
+  {
+    auto input_values_copy = input_values;
+    BuildAndEncodeHistograms(params, 1, input_values_copy, &codes, &context_map,
+                             &writer, 0, nullptr);
+    WriteTokens(input_values_copy[0], codes, context_map, &writer, 0, nullptr);
+    writer.ZeroPadToByte();
+  }
+
+  // We do not truncate the output. Reading past the end reads out zeroes
+  // anyway.
+  BitReader br(writer.GetSpan());
+  Status status = true;
+  {
+    BitReaderScopedCloser bc(&br, &status);
+
+    std::vector<uint8_t> dec_context_map;
+    ANSCode decoded_codes;
+    ASSERT_TRUE(DecodeHistograms(&br, 1, &decoded_codes, &dec_context_map));
+    ASSERT_EQ(dec_context_map, context_map);
+    ANSSymbolReader reader(&decoded_codes, &br);
+
+    ANSSymbolReader::Checkpoint checkpoint;
+    size_t br_pos;
+    constexpr size_t kInterval = ANSSymbolReader::kMaxCheckpointInterval - 2;
+    for (size_t i = 0; i < input_values[0].size(); i++) {
+      if (i % kInterval == 0 && i > 0) {
+        reader.Restore(checkpoint);
+        ASSERT_TRUE(br.Close());
+        br = BitReader(writer.GetSpan());
+        br.SkipBits(br_pos);
+        for (size_t j = i - kInterval; j < i; j++) {
+          Token symbol = input_values[0][j];
+          uint32_t read_symbol =
+              reader.ReadHybridUint(symbol.context, &br, dec_context_map);
+          ASSERT_EQ(read_symbol, symbol.value) << "j = " << j;
+        }
+      }
+      if (i % kInterval == 0) {
+        reader.Save(&checkpoint);
+        br_pos = br.TotalBitsConsumed();
+      }
+      Token symbol = input_values[0][i];
+      uint32_t read_symbol =
+          reader.ReadHybridUint(symbol.context, &br, dec_context_map);
+      ASSERT_EQ(read_symbol, symbol.value) << "i = " << i;
+    }
+    ASSERT_TRUE(reader.CheckANSFinalState());
+  }
+  EXPECT_TRUE(status);
+}
+
+TEST(ANSTest, TestCheckpointingANS) {
+  TestCheckpointing(/*ans=*/true, /*lz77=*/false);
+}
+
+TEST(ANSTest, TestCheckpointingPrefix) {
+  TestCheckpointing(/*ans=*/false, /*lz77=*/false);
+}
+
+TEST(ANSTest, TestCheckpointingANSLZ77) {
+  TestCheckpointing(/*ans=*/true, /*lz77=*/true);
+}
+
+TEST(ANSTest, TestCheckpointingPrefixLZ77) {
+  TestCheckpointing(/*ans=*/false, /*lz77=*/true);
+}
+
 }  // namespace
 }  // namespace jxl
--- a/third_party/jpeg-xl/lib/jxl/blending.cc
+++ b/third_party/jpeg-xl/lib/jxl/blending.cc
@ -161,39 +161,24 @@ ImageBlender::RectBlender ImageBlender::PrepareRect(
    return blender;
  }

-  const Rect actual_input_rect(
-      input_rect.x0() + (blender.current_overlap_.x0() - rect.x0()),
-      input_rect.y0() + (blender.current_overlap_.y0() - rect.y0()),
-      blender.current_overlap_.xsize(), blender.current_overlap_.ysize());
-
  blender.current_cropbox_ =
      Rect(o_.x0 + blender.current_overlap_.x0(),
           o_.y0 + blender.current_overlap_.y0(),
           blender.current_overlap_.xsize(), blender.current_overlap_.ysize());
-  Image3F cropped_foreground(actual_input_rect.xsize(),
-                             actual_input_rect.ysize());
-  CopyImageTo(actual_input_rect, foreground, &cropped_foreground);
-  blender.foreground_ = ImageBundle(dest_->metadata());
-  blender.foreground_.SetFromImage(
-      std::move(cropped_foreground),
-      ColorEncoding::LinearSRGB() /* likely incorrect but unused anyway */);
-  if (!extra_channels.empty()) {
-    std::vector<ImageF> ec;
-    for (const auto& extra_channel : extra_channels) {
-      ImageF ec_image(actual_input_rect.xsize(), actual_input_rect.ysize());
-      CopyImageTo(actual_input_rect, extra_channel, &ec_image);
-      ec.push_back(std::move(ec_image));
-    }
-    blender.foreground_.SetExtraChannels(std::move(ec));
-  }

  // Turn current_overlap_ from being relative to the full foreground to being
-  // relative to the rect.
+  // relative to the rect or input_rect.
  blender.current_overlap_ =
      Rect(blender.current_overlap_.x0() - rect.x0(),
           blender.current_overlap_.y0() - rect.y0(),
           blender.current_overlap_.xsize(), blender.current_overlap_.ysize());

+  // And this one is relative to the `foreground` subimage.
+  const Rect input_overlap(blender.current_overlap_.x0() + input_rect.x0(),
+                           blender.current_overlap_.y0() + input_rect.y0(),
+                           blender.current_overlap_.xsize(),
+                           blender.current_overlap_.ysize());
+
  blender.blending_info_.resize(extra_channels.size() + 1);
  auto make_blending = [&](const BlendingInfo& info, PatchBlending* pb) {
    pb->alpha_channel = info.alpha_channel;
@ -222,13 +207,33 @@ ImageBlender::RectBlender ImageBlender::PrepareRect(
      default: {
        JXL_ABORT("Invalid blend mode");  // should have failed to decode
      }
-    };
+    }
  };
  make_blending(info_, &blender.blending_info_[0]);
  for (size_t i = 0; i < extra_channels.size(); i++) {
    make_blending((*ec_info_)[i], &blender.blending_info_[1 + i]);
  }

+  Rect cropbox_row = blender.current_cropbox_.Line(0);
+  Rect overlap_row = input_overlap.Line(0);
+  const auto num_ptrs = 3 + extra_channels.size();
+  blender.fg_ptrs_.reserve(num_ptrs);
+  blender.fg_strides_.reserve(num_ptrs);
+  blender.bg_ptrs_.reserve(num_ptrs);
+  blender.bg_strides_.reserve(num_ptrs);
+  for (size_t c = 0; c < 3; c++) {
+    blender.fg_ptrs_.push_back(overlap_row.ConstPlaneRow(foreground, c, 0));
+    blender.fg_strides_.push_back(foreground.PixelsPerRow());
+    blender.bg_ptrs_.push_back(cropbox_row.PlaneRow(dest_->color(), c, 0));
+    blender.bg_strides_.push_back(dest_->color()->PixelsPerRow());
+  }
+  for (size_t c = 0; c < extra_channels.size(); c++) {
+    blender.fg_ptrs_.push_back(overlap_row.ConstRow(extra_channels[c], 0));
+    blender.fg_strides_.push_back(extra_channels[c].PixelsPerRow());
+    blender.bg_ptrs_.push_back(cropbox_row.Row(&dest_->extra_channels()[c], 0));
+    blender.bg_strides_.push_back(dest_->extra_channels()[c].PixelsPerRow());
+  }
+
  return blender;
 }

@ -355,21 +360,15 @@ Status ImageBlender::RectBlender::DoBlending(size_t y) {
    return true;
  }
  y -= current_overlap_.y0();
-  Rect cropbox_row = current_cropbox_.Line(y);
-  Rect overlap_row = Rect(0, y, current_overlap_.xsize(), 1);
-  fg_ptrs_.resize(3 + foreground_.extra_channels().size());
-  bg_ptrs_.resize(3 + foreground_.extra_channels().size());
-  for (size_t c = 0; c < 3; c++) {
-    fg_ptrs_[c] = overlap_row.ConstPlaneRow(*foreground_.color(), c, 0);
-    bg_ptrs_[c] = cropbox_row.PlaneRow(dest_->color(), c, 0);
+  fg_row_ptrs_.resize(fg_ptrs_.size());
+  bg_row_ptrs_.resize(bg_ptrs_.size());
+  for (size_t c = 0; c < fg_row_ptrs_.size(); c++) {
+    fg_row_ptrs_[c] = fg_ptrs_[c] + y * fg_strides_[c];
+    bg_row_ptrs_[c] = bg_ptrs_[c] + y * bg_strides_[c];
  }
-  for (size_t c = 0; c < foreground_.extra_channels().size(); c++) {
-    fg_ptrs_[c + 3] = overlap_row.ConstRow(foreground_.extra_channels()[c], 0);
-    bg_ptrs_[c + 3] = cropbox_row.Row(&dest_->extra_channels()[c], 0);
-  }
-  return PerformBlending(bg_ptrs_.data(), fg_ptrs_.data(), bg_ptrs_.data(),
-                         current_overlap_.xsize(), blending_info_[0],
-                         blending_info_.data() + 1,
+  return PerformBlending(bg_row_ptrs_.data(), fg_row_ptrs_.data(),
+                         bg_row_ptrs_.data(), current_overlap_.xsize(),
+                         blending_info_[0], blending_info_.data() + 1,
                         dest_->metadata()->extra_channel_info);
 }

--- a/third_party/jpeg-xl/lib/jxl/blending.h
+++ b/third_party/jpeg-xl/lib/jxl/blending.h
@ -45,10 +45,13 @@ class ImageBlender {
    bool done_;
    Rect current_overlap_;
    Rect current_cropbox_;
-    ImageBundle foreground_;
    ImageBundle* dest_;
    std::vector<const float*> fg_ptrs_;
+    std::vector<size_t> fg_strides_;
    std::vector<float*> bg_ptrs_;
+    std::vector<size_t> bg_strides_;
+    std::vector<const float*> fg_row_ptrs_;
+    std::vector<float*> bg_row_ptrs_;
    std::vector<PatchBlending> blending_info_;
  };

--- a/third_party/jpeg-xl/lib/jxl/blending_test.cc
+++ b/third_party/jpeg-xl/lib/jxl/blending_test.cc
@ -48,5 +48,56 @@ TEST(BlendingTest, Crops) {
  }
 }

+TEST(BlendingTest, Offset) {
+  const PaddedBytes background_bytes = ReadTestData("jxl/splines.png");
+  CodecInOut background;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(background_bytes), &background));
+  const PaddedBytes foreground_bytes =
+      ReadTestData("jxl/grayscale_patches.png");
+  CodecInOut foreground;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(foreground_bytes), &foreground));
+
+  ImageBlender blender;
+  ImageBundle output;
+  CodecMetadata nonserialized_metadata;
+  ASSERT_TRUE(
+      nonserialized_metadata.size.Set(background.xsize(), background.ysize()));
+  PassesSharedState state;
+  state.frame_header.blending_info.mode = BlendMode::kReplace;
+  state.frame_header.blending_info.source = 0;
+  state.frame_header.nonserialized_metadata = &nonserialized_metadata;
+  state.metadata = &background.metadata;
+  state.reference_frames[0].frame = &background.Main();
+  PassesDecoderState dec_state;
+  dec_state.shared = &state;
+  const FrameOrigin foreground_origin = {-50, -50};
+  ASSERT_TRUE(blender.PrepareBlending(&dec_state, foreground_origin,
+                                      foreground.xsize(), foreground.ysize(),
+                                      background.Main().c_current(), &output));
+
+  static constexpr int kStep = 20;
+  for (size_t x0 = 0; x0 < foreground.xsize(); x0 += kStep) {
+    for (size_t y0 = 0; y0 < foreground.ysize(); y0 += kStep) {
+      const Rect rect =
+          Rect(x0, y0, kStep, kStep).Intersection(Rect(foreground.Main()));
+      Image3F foreground_crop(rect.xsize(), rect.ysize());
+      CopyImageTo(rect, *foreground.Main().color(), Rect(foreground_crop),
+                  &foreground_crop);
+      auto rect_blender =
+          blender.PrepareRect(rect, foreground_crop, {}, Rect(foreground_crop));
+      for (size_t y = 0; y < rect.ysize(); ++y) {
+        ASSERT_TRUE(rect_blender.DoBlending(y));
+      }
+    }
+  }
+
+  const PaddedBytes expected_bytes =
+      ReadTestData("jxl/blending/grayscale_patches_on_splines.png");
+  CodecInOut expected;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(expected_bytes), &expected));
+  VerifyRelativeError(*expected.Main().color(), *output.color(), 1. / (2 * 255),
+                      0);
+}
+
 }  // namespace
 }  // namespace jxl
--- a/third_party/jpeg-xl/lib/jxl/common.h
+++ b/third_party/jpeg-xl/lib/jxl/common.h
@ -87,7 +87,7 @@ constexpr size_t kGroupDimInBlocks = kGroupDim / kBlockDim;
 constexpr size_t kMaxNumPasses = 11;

 // Maximum number of reference frames.
-constexpr size_t kMaxNumReferenceFrames = 3;
+constexpr size_t kMaxNumReferenceFrames = 4;

 // Dimensions of a frame, in pixels, and other derived dimensions.
 // Computed from FrameHeader.
--- a/third_party/jpeg-xl/lib/jxl/dec_ans.h
+++ b/third_party/jpeg-xl/lib/jxl/dec_ans.h
@ -353,6 +353,55 @@ class ANSSymbolReader {
    return true;
  }

+  static constexpr size_t kMaxCheckpointInterval = 512;
+  struct Checkpoint {
+    uint32_t state;
+    uint32_t num_to_copy;
+    uint32_t copy_pos;
+    uint32_t num_decoded;
+    uint32_t lz77_window[kMaxCheckpointInterval];
+  };
+  void Save(Checkpoint* checkpoint) {
+    checkpoint->state = state_;
+    checkpoint->num_decoded = num_decoded_;
+    checkpoint->num_to_copy = num_to_copy_;
+    checkpoint->copy_pos = copy_pos_;
+    if (lz77_window_) {
+      size_t win_start = num_decoded_ & kWindowMask;
+      size_t win_end = (num_decoded_ + kMaxCheckpointInterval) & kWindowMask;
+      if (win_end > win_start) {
+        memcpy(checkpoint->lz77_window, lz77_window_ + win_start,
+               (win_end - win_start) * sizeof(*lz77_window_));
+      } else {
+        memcpy(checkpoint->lz77_window, lz77_window_ + win_start,
+               (kWindowSize - win_start) * sizeof(*lz77_window_));
+        memcpy(checkpoint->lz77_window + (kWindowSize - win_start),
+               lz77_window_, win_end * sizeof(*lz77_window_));
+      }
+    }
+  }
+  void Restore(const Checkpoint& checkpoint) {
+    state_ = checkpoint.state;
+    JXL_DASSERT(num_decoded_ <=
+                checkpoint.num_decoded + kMaxCheckpointInterval);
+    num_decoded_ = checkpoint.num_decoded;
+    num_to_copy_ = checkpoint.num_to_copy;
+    copy_pos_ = checkpoint.copy_pos;
+    if (lz77_window_) {
+      size_t win_start = num_decoded_ & kWindowMask;
+      size_t win_end = (num_decoded_ + kMaxCheckpointInterval) & kWindowMask;
+      if (win_end > win_start) {
+        memcpy(lz77_window_ + win_start, checkpoint.lz77_window,
+               (win_end - win_start) * sizeof(*lz77_window_));
+      } else {
+        memcpy(lz77_window_ + win_start, checkpoint.lz77_window,
+               (kWindowSize - win_start) * sizeof(*lz77_window_));
+        memcpy(lz77_window_, checkpoint.lz77_window + (kWindowSize - win_start),
+               win_end * sizeof(*lz77_window_));
+      }
+    }
+  }
+
 private:
  const AliasTable::Entry* JXL_RESTRICT alias_tables_;  // not owned
  const HuffmanDecodingData* huffman_data_;
--- a/third_party/jpeg-xl/lib/jxl/dec_external_image.cc
+++ b/third_party/jpeg-xl/lib/jxl/dec_external_image.cc
@ -59,8 +59,6 @@ void FloatToU32(const float* in, uint32_t* out, size_t num, float mul,
  const auto scale = Set(d, mul);
  for (size_t x = 0; x < vec_num; x += Lanes(d)) {
    auto v = Load(d, in + x);
-    // Check for NaNs.
-    JXL_DASSERT(AllTrue(v == v));
    // Clamp turns NaN to 'min'.
    v = Clamp(v, Zero(d), one);
    auto i = NearestInt(v * scale);
@ -68,7 +66,6 @@ void FloatToU32(const float* in, uint32_t* out, size_t num, float mul,
  }
  for (size_t x = vec_num; x < num; x++) {
    float v = in[x];
-    JXL_DASSERT(!std::isnan(v));
    // Inverted condition grants that NaN is mapped to 0.0f.
    v = (v >= 0.0f) ? (v > 1.0f ? mul : (v * mul)) : 0.0f;
    out[x] = static_cast<uint32_t>(v + 0.5f);
--- a/third_party/jpeg-xl/lib/jxl/dec_modular.cc
+++ b/third_party/jpeg-xl/lib/jxl/dec_modular.cc
@ -477,10 +477,15 @@ Status ModularFrameDecoder::FinalizeDecoding(PassesDecoderState* dec_state,
      if (gi.channel[c_in].w == 0 || gi.channel[c_in].h == 0) {
        return JXL_FAILURE("Empty image");
      }
+      size_t xsize_shifted = DivCeil(xsize, 1 << gi.channel[c_in].hshift);
+      size_t ysize_shifted = DivCeil(ysize, 1 << gi.channel[c_in].vshift);
+      if (ysize_shifted != gi.channel[c_in].h || xsize_shifted != gi.channel[c_in].w) {
+            return JXL_FAILURE("Dimension mismatch");
+      }
      if (frame_header.color_transform == ColorTransform::kXYB && c == 2) {
        JXL_ASSERT(!fp);
        RunOnPool(
-            pool, 0, ysize, jxl::ThreadPool::SkipInit(),
+            pool, 0, ysize_shifted, jxl::ThreadPool::SkipInit(),
            [&](const int task, const int thread) {
              const size_t y = task;
              const pixel_type* const JXL_RESTRICT row_in =
@ -489,25 +494,23 @@ Status ModularFrameDecoder::FinalizeDecoding(PassesDecoderState* dec_state,
                  gi.channel[0].Row(y);
              float* const JXL_RESTRICT row_out = decoded.PlaneRow(c, y);
              HWY_DYNAMIC_DISPATCH(MultiplySum)
-              (xsize, row_in, row_in_Y, factor, row_out);
+              (xsize_shifted, row_in, row_in_Y, factor, row_out);
            },
            "ModularIntToFloat");
      } else if (fp) {
        int bits = metadata->m.bit_depth.bits_per_sample;
        int exp_bits = metadata->m.bit_depth.exponent_bits_per_sample;
        RunOnPool(
-            pool, 0, ysize, jxl::ThreadPool::SkipInit(),
+            pool, 0, ysize_shifted, jxl::ThreadPool::SkipInit(),
            [&](const int task, const int thread) {
              const size_t y = task;
              const pixel_type* const JXL_RESTRICT row_in =
                  gi.channel[c_in].Row(y);
              float* const JXL_RESTRICT row_out = decoded.PlaneRow(c, y);
-              int_to_float(row_in, row_out, xsize, bits, exp_bits);
+              int_to_float(row_in, row_out, xsize_shifted, bits, exp_bits);
            },
            "ModularIntToFloat_losslessfloat");
      } else {
-        size_t xsize_shifted = DivCeil(xsize, 1 << gi.channel[c_in].hshift);
-        size_t ysize_shifted = DivCeil(ysize, 1 << gi.channel[c_in].vshift);
        RunOnPool(
            pool, 0, ysize_shifted, jxl::ThreadPool::SkipInit(),
            [&](const int task, const int thread) {
--- a/third_party/jpeg-xl/lib/jxl/dec_reconstruct.cc
+++ b/third_party/jpeg-xl/lib/jxl/dec_reconstruct.cc
@ -154,9 +154,16 @@ Status UndoXYBInPlace(Image3F* idct, const Rect& rect,
            FastPowf(d, v, Set(d, output_encoding_info.inverse_gamma)));
      };
      for (size_t x = 0; x < rect.xsize(); x += Lanes(d)) {
+#if MEMORY_SANITIZER
+        const auto mask = Iota(d, x) < Set(d, rect.xsize());
+        const auto in_opsin_x = IfThenElseZero(mask, Load(d, row0 + x));
+        const auto in_opsin_y = IfThenElseZero(mask, Load(d, row1 + x));
+        const auto in_opsin_b = IfThenElseZero(mask, Load(d, row2 + x));
+#else
        const auto in_opsin_x = Load(d, row0 + x);
        const auto in_opsin_y = Load(d, row1 + x);
        const auto in_opsin_b = Load(d, row2 + x);
+#endif
        JXL_COMPILER_FENCE;
        auto linear_r = Undefined(d);
        auto linear_g = Undefined(d);
@ -973,7 +980,10 @@ Status FinalizeFrameDecoding(ImageBundle* decoded,
                             1 << frame_header.chroma_subsampling.VShift(c)));
      for (size_t i = 0; i < frame_header.chroma_subsampling.HShift(c); i++) {
        plane.InitializePaddingForUnalignedAccesses();
-        plane = UpsampleH2(plane, pool);
+        const size_t output_xsize =
+            DivCeil(frame_dim.xsize_padded,
+                    1 << (frame_header.chroma_subsampling.HShift(c) - i - 1));
+        plane = UpsampleH2(plane, output_xsize, pool);
      }
      for (size_t i = 0; i < frame_header.chroma_subsampling.VShift(c); i++) {
        plane.InitializePaddingForUnalignedAccesses();
--- a/third_party/jpeg-xl/lib/jxl/dec_xyb.cc
+++ b/third_party/jpeg-xl/lib/jxl/dec_xyb.cc
@ -222,12 +222,17 @@ ImageF UpsampleV2(const ImageF& src, ThreadPool* pool) {
 *  output:
 *   |o1 e1 o2 e2 o3 e3 o4 e4| =: (o, e)
 */
-ImageF UpsampleH2(const ImageF& src, ThreadPool* pool) {
+ImageF UpsampleH2(const ImageF& src, size_t output_xsize, ThreadPool* pool) {
  const size_t xsize = src.xsize();
  const size_t ysize = src.ysize();
  JXL_ASSERT(xsize != 0);
  JXL_ASSERT(ysize != 0);
-  ImageF dst(xsize * 2, ysize);
+  JXL_ASSERT(DivCeil(output_xsize, 2) == xsize);
+  // Extra pixel in output might cause the whole extra vector overhead; thus
+  // we request specific output size. Should be safe, because the last 2 values
+  // are processed in non-vectorized form, and the "Plane" row padding concerns
+  // only about the case when unaligned vector store is applied at last pixel.
+  ImageF dst(output_xsize, ysize);

  constexpr size_t kGroupArea = kGroupDim * kGroupDim;
  const size_t lines_per_group = DivCeil(kGroupArea, xsize);
@ -306,8 +311,8 @@ ImageF UpsampleV2(const ImageF& src, ThreadPool* pool) {
 }

 HWY_EXPORT(UpsampleH2);
-ImageF UpsampleH2(const ImageF& src, ThreadPool* pool) {
-  return HWY_DYNAMIC_DISPATCH(UpsampleH2)(src, pool);
+ImageF UpsampleH2(const ImageF& src, size_t output_xsize, ThreadPool* pool) {
+  return HWY_DYNAMIC_DISPATCH(UpsampleH2)(src, output_xsize, pool);
 }

 HWY_EXPORT(HasFastXYBTosRGB8);
--- a/third_party/jpeg-xl/lib/jxl/dec_xyb.h
+++ b/third_party/jpeg-xl/lib/jxl/dec_xyb.h
@ -69,7 +69,7 @@ ImageF UpsampleV2(const ImageF& src, ThreadPool* pool);

 // WARNING: this uses unaligned accesses, so the caller must first call
 // src.InitializePaddingForUnalignedAccesses() to avoid msan crashes.
-ImageF UpsampleH2(const ImageF& src, ThreadPool* pool);
+ImageF UpsampleH2(const ImageF& src, size_t output_xsize, ThreadPool* pool);

 bool HasFastXYBTosRGB8();
 void FastXYBTosRGB8(const Image3F& input, const Rect& input_rect,
--- a/third_party/jpeg-xl/lib/jxl/decode.cc
+++ b/third_party/jpeg-xl/lib/jxl/decode.cc
@ -295,6 +295,7 @@ struct Sections {
  std::vector<char> section_received;
 };

+// NOLINTNEXTLINE(clang-analyzer-optin.performance.Padding)
 struct JxlDecoderStruct {
  JxlDecoderStruct() = default;

@ -312,7 +313,9 @@ struct JxlDecoderStruct {
  // final box that uses size 0 to indicate the end.
  bool last_codestream_seen;
  bool got_basic_info;
+  size_t header_except_icc_bits = 0;  // To skip everything before ICC.
  bool got_all_headers;               // Codestream metadata headers
+  jxl::ICCReader icc_reader;

  // This means either we actually got the preview image, or determined we
  // cannot get it or there is none.
@ -451,7 +454,9 @@ void JxlDecoderReset(JxlDecoder* dec) {
  dec->first_codestream_seen = false;
  dec->last_codestream_seen = false;
  dec->got_basic_info = false;
+  dec->header_except_icc_bits = 0;
  dec->got_all_headers = false;
+  dec->icc_reader.Reset();
  dec->got_preview_image = false;
  dec->last_frame_reached = false;
  dec->file_pos = 0;
@ -656,6 +661,11 @@ JxlDecoderStatus JxlDecoderReadAllHeaders(JxlDecoder* dec, const uint8_t* in,

  Span<const uint8_t> span(in + pos, size - pos);
  auto reader = GetBitReader(span);
+
+  if (dec->header_except_icc_bits != 0) {
+    // Headers were decoded already.
+    reader->SkipBits(dec->header_except_icc_bits);
+  } else {
    SizeHeader dummy_size_header;
    JXL_API_RETURN_IF_ERROR(ReadBundle(span, reader.get(), &dummy_size_header));

@ -666,10 +676,12 @@ JxlDecoderStatus JxlDecoderReadAllHeaders(JxlDecoder* dec, const uint8_t* in,

    JXL_API_RETURN_IF_ERROR(
        ReadBundle(span, reader.get(), &dec->metadata.transform_data));
+  }
+
+  dec->header_except_icc_bits = reader->TotalBitsConsumed();

  if (dec->metadata.m.color_encoding.WantICC()) {
-    PaddedBytes icc;
-    jxl::Status status = ReadICC(reader.get(), &icc, memory_limit_base_);
+    jxl::Status status = dec->icc_reader.Init(reader.get(), memory_limit_base_);
    // Always check AllReadsWithinBounds, not all the C++ decoder implementation
    // handles reader out of bounds correctly  yet (e.g. context map). Not
    // checking AllReadsWithinBounds can cause reader->Close() to trigger an
@ -684,6 +696,15 @@ JxlDecoderStatus JxlDecoderReadAllHeaders(JxlDecoder* dec, const uint8_t* in,
      // Other non-successful status is an error
      return JXL_DEC_ERROR;
    }
+    PaddedBytes icc;
+    status = dec->icc_reader.Process(reader.get(), &icc);
+    if (!status) {
+      if (status.code() == StatusCode::kNotEnoughBytes) {
+        return JXL_DEC_NEED_MORE_INPUT;
+      }
+      // Other non-successful status is an error
+      return JXL_DEC_ERROR;
+    }
    if (!dec->metadata.m.color_encoding.SetICCRaw(std::move(icc))) {
      return JXL_DEC_ERROR;
    }
@ -763,6 +784,9 @@ JxlDecoderStatus ParseFrameHeader(JxlDecoder* dec,
                                  const uint8_t* in, size_t size, size_t pos,
                                  bool is_preview, size_t* frame_size,
                                  size_t* dc_size) {
+  if (pos >= size) {
+    return JXL_DEC_NEED_MORE_INPUT;
+  }
  Span<const uint8_t> span(in + pos, size - pos);
  auto reader = GetBitReader(span);

@ -983,6 +1007,9 @@ JxlDecoderStatus JxlDecoderProcessInternal(JxlDecoder* dec, const uint8_t* in,

    if (dec->frame_stage == FrameStage::kTOC) {
      size_t pos = dec->frame_start - dec->codestream_pos;
+      if (pos >= size) {
+        return JXL_DEC_NEED_MORE_INPUT;
+      }
      Span<const uint8_t> span(in + pos, size - pos);
      auto reader = GetBitReader(span);

@ -1083,6 +1110,9 @@ JxlDecoderStatus JxlDecoderProcessInternal(JxlDecoder* dec, const uint8_t* in,
      }

      size_t pos = dec->frame_start - dec->codestream_pos;
+      if (pos >= size) {
+        return JXL_DEC_NEED_MORE_INPUT;
+      }

      bool get_dc = dec->is_last_of_still &&
                    (dec->frame_stage == FrameStage::kDC) && dec->dc_size != 0;
--- a/third_party/jpeg-xl/lib/jxl/enc_file.cc
+++ b/third_party/jpeg-xl/lib/jxl/enc_file.cc
@ -124,8 +124,10 @@ Status PrepareCodecMetadataFromIO(const CompressParams& cparams,
                                  const CodecInOut* io,
                                  CodecMetadata* metadata) {
  *metadata = io->metadata;
+  size_t ups = 1;
+  if (cparams.already_downsampled) ups = cparams.resampling;

-  JXL_RETURN_IF_ERROR(metadata->size.Set(io->xsize(), io->ysize()));
+  JXL_RETURN_IF_ERROR(metadata->size.Set(io->xsize() * ups, io->ysize() * ups));

  // Keep ICC profile in lossless modes because a reconstructed profile may be
  // slightly different (quantization).
--- a/third_party/jpeg-xl/lib/jxl/enc_frame.cc
+++ b/third_party/jpeg-xl/lib/jxl/enc_frame.cc
@ -340,11 +340,13 @@ Status MakeFrameHeader(const CompressParams& cparams,
  // Resized frames.
  if (frame_info.frame_type != FrameType::kDCFrame) {
    frame_header->frame_origin = ib.origin;
-    frame_header->frame_size.xsize = ib.xsize();
-    frame_header->frame_size.ysize = ib.ysize();
+    size_t ups = 1;
+    if (cparams.already_downsampled) ups = cparams.resampling;
+    frame_header->frame_size.xsize = ib.xsize() * ups;
+    frame_header->frame_size.ysize = ib.ysize() * ups;
    if (ib.origin.x0 != 0 || ib.origin.y0 != 0 ||
-        ib.xsize() != frame_header->default_xsize() ||
-        ib.ysize() != frame_header->default_ysize()) {
+        frame_header->frame_size.xsize != frame_header->default_xsize() ||
+        frame_header->frame_size.ysize != frame_header->default_ysize()) {
      frame_header->custom_size_or_origin = true;
    }
  }
@ -1055,9 +1057,7 @@ Status EncodeFrame(const CompressParams& cparams_orig,
    cparams.modular_mode = false;
  }

-  const size_t xsize = ib.xsize();
-  const size_t ysize = ib.ysize();
-  if (xsize == 0 || ysize == 0) return JXL_FAILURE("Empty image");
+  if (ib.xsize() == 0 || ib.ysize() == 0) return JXL_FAILURE("Empty image");

  // Assert that this metadata is correctly set up for the compression params,
  // this should have been done by enc_file.cc
@ -1184,7 +1184,7 @@ Status EncodeFrame(const CompressParams& cparams_orig,
      JXL_RETURN_IF_ERROR(lossy_frame_encoder.ComputeEncodingData(
          ib_or_linear, &opsin, pool, modular_frame_encoder.get(), writer,
          frame_header.get()));
-    } else if (frame_header->upsampling != 1) {
+    } else if (frame_header->upsampling != 1 && !cparams.already_downsampled) {
      // In VarDCT mode, LossyFrameHeuristics takes care of running downsampling
      // after noise, if necessary.
      DownsampleImage(&opsin, frame_header->upsampling);
@ -1194,7 +1194,7 @@ Status EncodeFrame(const CompressParams& cparams_orig,
        &ib, &opsin, pool, modular_frame_encoder.get(), writer,
        frame_header.get()));
  }
-  if (cparams.ec_resampling != 1) {
+  if (cparams.ec_resampling != 1 && !cparams.already_downsampled) {
    extra_channels = &extra_channels_storage;
    for (size_t i = 0; i < ib.extra_channels().size(); i++) {
      extra_channels_storage.emplace_back(CopyImage(ib.extra_channels()[i]));
--- a/third_party/jpeg-xl/lib/jxl/enc_heuristics.cc
+++ b/third_party/jpeg-xl/lib/jxl/enc_heuristics.cc
@ -252,7 +252,7 @@ Status DefaultEncoderHeuristics::LossyFrameHeuristics(
      shared.frame_header.flags &= ~FrameHeader::kNoise;
    }
  }
-  if (enc_state->shared.frame_header.upsampling != 1) {
+  if (enc_state->shared.frame_header.upsampling != 1 && !cparams.already_downsampled) {
    // In VarDCT mode, LossyFrameHeuristics takes care of running downsampling
    // after noise, if necessary.
    DownsampleImage(opsin, cparams.resampling);
--- a/third_party/jpeg-xl/lib/jxl/enc_icc_codec.cc
+++ b/third_party/jpeg-xl/lib/jxl/enc_icc_codec.cc
@ -425,7 +425,8 @@ Status WriteICC(const PaddedBytes& icc, BitWriter* JXL_RESTRICT writer,
        enc[i]);
  }
  HistogramParams params;
-  params.lz77_method = HistogramParams::LZ77Method::kOptimal;
+  params.lz77_method = enc.size() < 4096 ? HistogramParams::LZ77Method::kOptimal
+                                         : HistogramParams::LZ77Method::kLZ77;
  EntropyEncodingData code;
  std::vector<uint8_t> context_map;
  params.force_huffman = true;
--- a/third_party/jpeg-xl/lib/jxl/enc_params.h
+++ b/third_party/jpeg-xl/lib/jxl/enc_params.h
@ -233,6 +233,8 @@ struct CompressParams {
  // Down/upsample the image before encoding / after decoding by this factor.
  size_t resampling = 1;
  size_t ec_resampling = 1;
+  // Skip the downsampling before encoding if this is true.
+  bool already_downsampled = false;
 };

 static constexpr float kMinButteraugliForDynamicAR = 0.5f;
--- a/third_party/jpeg-xl/lib/jxl/icc_codec.cc
+++ b/third_party/jpeg-xl/lib/jxl/icc_codec.cc
@ -314,69 +314,99 @@ Status UnpredictICC(const uint8_t* enc, size_t size, PaddedBytes* result) {
  return true;
 }

-Status ReadICC(BitReader* JXL_RESTRICT reader, PaddedBytes* JXL_RESTRICT icc,
-               size_t output_limit) {
-  icc->clear();
-  const auto checkEndOfInput = [&]() -> Status {
-    if (reader->AllReadsWithinBounds()) return true;
-    return JXL_STATUS(StatusCode::kNotEnoughBytes,
-                      "Not enough bytes for reading ICC profile");
-  };
-  JXL_RETURN_IF_ERROR(checkEndOfInput());
-  uint64_t enc_size = U64Coder::Read(reader);
-  if (enc_size > 268435456) {
+Status ICCReader::Init(BitReader* reader, size_t output_limit) {
+  JXL_RETURN_IF_ERROR(CheckEOI(reader));
+  used_bits_base_ = reader->TotalBitsConsumed();
+  if (bits_to_skip_ == 0) {
+    enc_size_ = U64Coder::Read(reader);
+    if (enc_size_ > 268435456) {
      // Avoid too large memory allocation for invalid file.
-    // TODO(lode): a more accurate limit would be the filesize of the JXL file,
-    // if we can have it available here.
      return JXL_FAILURE("Too large encoded profile");
    }
-  PaddedBytes decompressed;
-  std::vector<uint8_t> context_map;
-  ANSCode code;
    JXL_RETURN_IF_ERROR(
-      DecodeHistograms(reader, kNumICCContexts, &code, &context_map));
-  ANSSymbolReader ans_reader(&code, reader);
-  size_t used_bits_base = reader->TotalBitsConsumed();
-  size_t i = 0;
-  decompressed.resize(std::min<size_t>(i + 0x400, enc_size));
+        DecodeHistograms(reader, kNumICCContexts, &code_, &context_map_));
+    ans_reader_ = ANSSymbolReader(&code_, reader);
+    i_ = 0;
+    decompressed_.resize(std::min<size_t>(i_ + 0x400, enc_size_));
+    for (; i_ < std::min<size_t>(2, enc_size_); i_++) {
+      decompressed_[i_] = ans_reader_.ReadHybridUint(
+          ICCANSContext(i_, i_ > 0 ? decompressed_[i_ - 1] : 0,
+                        i_ > 1 ? decompressed_[i_ - 2] : 0),
+          reader, context_map_);
+    }
+    if (enc_size_ > kPreambleSize) {
+      for (; i_ < kPreambleSize; i_++) {
+        decompressed_[i_] = ans_reader_.ReadHybridUint(
+            ICCANSContext(i_, decompressed_[i_ - 1], decompressed_[i_ - 2]),
+            reader, context_map_);
+      }
+      JXL_RETURN_IF_ERROR(CheckEOI(reader));
+      JXL_RETURN_IF_ERROR(
+          CheckPreamble(decompressed_, enc_size_, output_limit));
+    }
+    bits_to_skip_ = reader->TotalBitsConsumed() - used_bits_base_;
+  } else {
+    reader->SkipBits(bits_to_skip_);
+  }
+  return true;
+}

-  for (; i < std::min<size_t>(2, enc_size); i++) {
-    decompressed[i] = ans_reader.ReadHybridUint(
-        ICCANSContext(i, i > 0 ? decompressed[i - 1] : 0,
-                      i > 1 ? decompressed[i - 2] : 0),
-        reader, context_map);
+Status ICCReader::Process(BitReader* reader, PaddedBytes* icc) {
+  ANSSymbolReader::Checkpoint checkpoint;
+  size_t saved_i = 0;
+  auto save = [&]() {
+    ans_reader_.Save(&checkpoint);
+    bits_to_skip_ = reader->TotalBitsConsumed() - used_bits_base_;
+    saved_i = i_;
+  };
+  save();
+  auto check_and_restore = [&]() {
+    Status status = CheckEOI(reader);
+    if (!status) {
+      // not enough bytes.
+      ans_reader_.Restore(checkpoint);
+      i_ = saved_i;
+      return status;
    }
-  if (enc_size > kPreambleSize) {
-    for (; i < kPreambleSize; i++) {
-      decompressed[i] = ans_reader.ReadHybridUint(
-          ICCANSContext(i, decompressed[i - 1], decompressed[i - 2]), reader,
-          context_map);
-    }
-    JXL_RETURN_IF_ERROR(checkEndOfInput());
-    JXL_RETURN_IF_ERROR(CheckPreamble(decompressed, enc_size, output_limit));
-  }
-  for (; i < enc_size; i++) {
-    if ((i & 0x3FF) == 0) {
-      JXL_RETURN_IF_ERROR(checkEndOfInput());
-      if ((i > 0) && (((i & 0xFFFF) == 0))) {
+    return Status(true);
+  };
+  for (; i_ < enc_size_; i_++) {
+    if (i_ % ANSSymbolReader::kMaxCheckpointInterval == 0 && i_ > 0) {
+      JXL_RETURN_IF_ERROR(check_and_restore());
+      save();
+      if ((i_ > 0) && (((i_ & 0xFFFF) == 0))) {
        float used_bytes =
-            (reader->TotalBitsConsumed() - used_bits_base) / 8.0f;
-        if (i > used_bytes * 256) return JXL_FAILURE("Corrupted stream");
+            (reader->TotalBitsConsumed() - used_bits_base_) / 8.0f;
+        if (i_ > used_bytes * 256) return JXL_FAILURE("Corrupted stream");
      }
-      decompressed.resize(std::min<size_t>(i + 0x400, enc_size));
+      decompressed_.resize(std::min<size_t>(i_ + 0x400, enc_size_));
    }
-    JXL_DASSERT(i >= 2);
-    decompressed[i] = ans_reader.ReadHybridUint(
-        ICCANSContext(i, decompressed[i - 1], decompressed[i - 2]), reader,
-        context_map);
+    JXL_DASSERT(i_ >= 2);
+    decompressed_[i_] = ans_reader_.ReadHybridUint(
+        ICCANSContext(i_, decompressed_[i_ - 1], decompressed_[i_ - 2]), reader,
+        context_map_);
  }
-  JXL_RETURN_IF_ERROR(checkEndOfInput());
-  if (!ans_reader.CheckANSFinalState()) {
+  JXL_RETURN_IF_ERROR(check_and_restore());
+  bits_to_skip_ = reader->TotalBitsConsumed() - used_bits_base_;
+  if (!ans_reader_.CheckANSFinalState()) {
    return JXL_FAILURE("Corrupted ICC profile");
  }

-  JXL_RETURN_IF_ERROR(
-      UnpredictICC(decompressed.data(), decompressed.size(), icc));
+  icc->clear();
+  return UnpredictICC(decompressed_.data(), decompressed_.size(), icc);
+}
+
+Status ICCReader::CheckEOI(BitReader* reader) {
+  if (reader->AllReadsWithinBounds()) return true;
+  return JXL_STATUS(StatusCode::kNotEnoughBytes,
+                    "Not enough bytes for reading ICC profile");
+}
+
+Status ReadICC(BitReader* JXL_RESTRICT reader, PaddedBytes* JXL_RESTRICT icc,
+               size_t output_limit) {
+  ICCReader icc_reader;
+  JXL_RETURN_IF_ERROR(icc_reader.Init(reader, output_limit));
+  JXL_RETURN_IF_ERROR(icc_reader.Process(reader, icc));
  return true;
 }

--- a/third_party/jpeg-xl/lib/jxl/icc_codec.h
+++ b/third_party/jpeg-xl/lib/jxl/icc_codec.h
@ -25,6 +25,7 @@
 #include "lib/jxl/base/compiler_specific.h"
 #include "lib/jxl/base/padded_bytes.h"
 #include "lib/jxl/base/status.h"
+#include "lib/jxl/dec_ans.h"
 #include "lib/jxl/dec_bit_reader.h"
 #include "lib/jxl/enc_bit_writer.h"

@ -34,6 +35,26 @@ namespace jxl {
 Status WriteICC(const PaddedBytes& icc, BitWriter* JXL_RESTRICT writer,
                size_t layer, AuxOut* JXL_RESTRICT aux_out);

+struct ICCReader {
+  Status Init(BitReader* reader, size_t output_limit);
+  Status Process(BitReader* reader, PaddedBytes* icc);
+  void Reset() {
+    bits_to_skip_ = 0;
+    decompressed_.clear();
+  }
+
+ private:
+  Status CheckEOI(BitReader* reader);
+  size_t i_ = 0;
+  size_t bits_to_skip_ = 0;
+  size_t used_bits_base_ = 0;
+  uint64_t enc_size_ = 0;
+  std::vector<uint8_t> context_map_;
+  ANSCode code_;
+  ANSSymbolReader ans_reader_;
+  PaddedBytes decompressed_;
+};
+
 // `icc` may be empty afterwards - if so, call CreateProfile. Does not append,
 // clears any original data that was in icc.
 // If `output_limit` is not 0, then returns error if resulting profile would be
--- a/third_party/jpeg-xl/lib/jxl/modular/encoding/context_predict.h
+++ b/third_party/jpeg-xl/lib/jxl/modular/encoding/context_predict.h
@ -466,6 +466,48 @@ enum PredictorMode {
  kAllPredictions = 8,
 };

+JXL_INLINE pixel_type_w PredictOne(Predictor p, pixel_type_w left,
+                                   pixel_type_w top, pixel_type_w toptop,
+                                   pixel_type_w topleft, pixel_type_w topright,
+                                   pixel_type_w leftleft,
+                                   pixel_type_w toprightright,
+                                   pixel_type_w wp_pred) {
+  switch (p) {
+    case Predictor::Zero:
+      return pixel_type_w{0};
+    case Predictor::Left:
+      return left;
+    case Predictor::Top:
+      return top;
+    case Predictor::Select:
+      return Select(left, top, topleft);
+    case Predictor::Weighted:
+      return wp_pred;
+    case Predictor::Gradient:
+      return pixel_type_w{ClampedGradient(left, top, topleft)};
+    case Predictor::TopLeft:
+      return topleft;
+    case Predictor::TopRight:
+      return topright;
+    case Predictor::LeftLeft:
+      return leftleft;
+    case Predictor::Average0:
+      return (left + top) / 2;
+    case Predictor::Average1:
+      return (left + topleft) / 2;
+    case Predictor::Average2:
+      return (topleft + top) / 2;
+    case Predictor::Average3:
+      return (top + topright) / 2;
+    case Predictor::Average4:
+      return (6 * top - 2 * toptop + 7 * left + 1 * leftleft +
+              1 * toprightright + 3 * topright + 8) /
+             16;
+    default:
+      return pixel_type_w{0};
+  }
+}
+
 template <int mode>
 inline PredictionResult Predict(
    Properties *p, size_t w, const pixel_type *JXL_RESTRICT pp,
@ -528,28 +570,14 @@ inline PredictionResult Predict(
    result.multiplier = lr.multiplier;
    predictor = lr.predictor;
  }
-  pixel_type_w pred_storage[kNumModularPredictors];
-  if (!(mode & kAllPredictions)) {
-    predictions = pred_storage;
+  if (mode & kAllPredictions) {
+    for (size_t i = 0; i < kNumModularPredictors; i++) {
+      predictions[i] = PredictOne((Predictor)i, left, top, toptop, topleft,
+                                  topright, leftleft, toprightright, wp_pred);
    }
-  predictions[(int)Predictor::Zero] = 0;
-  predictions[(int)Predictor::Left] = left;
-  predictions[(int)Predictor::Top] = top;
-  predictions[(int)Predictor::Select] = Select(left, top, topleft);
-  predictions[(int)Predictor::Weighted] = wp_pred;
-  predictions[(int)Predictor::Gradient] = ClampedGradient(left, top, topleft);
-  predictions[(int)Predictor::TopLeft] = topleft;
-  predictions[(int)Predictor::TopRight] = topright;
-  predictions[(int)Predictor::LeftLeft] = leftleft;
-  predictions[(int)Predictor::Average0] = (left + top) / 2;
-  predictions[(int)Predictor::Average1] = (left + topleft) / 2;
-  predictions[(int)Predictor::Average2] = (topleft + top) / 2;
-  predictions[(int)Predictor::Average3] = (top + topright) / 2;
-  predictions[(int)Predictor::Average4] =
-      (6 * top - 2 * toptop + 7 * left + 1 * leftleft + 1 * toprightright +
-       3 * topright + 8) /
-      16;
-  result.guess += predictions[(int)predictor];
+  }
+  result.guess += PredictOne(predictor, left, top, toptop, topleft, topright,
+                             leftleft, toprightright, wp_pred);
  result.predictor = predictor;

  return result;
--- a/third_party/jpeg-xl/lib/jxl/modular/transform/palette.h
+++ b/third_party/jpeg-xl/lib/jxl/modular/transform/palette.h
@ -44,6 +44,10 @@ static constexpr int kLargeCubeOffset = kSmallCube * kSmallCube * kSmallCube;
 // Inclusive.
 static constexpr int kMinImplicitPaletteIndex = -(2 * 72 - 1);

+static constexpr pixel_type Scale(int value, int bit_depth, int denom) {
+  return (static_cast<pixel_type_w>(value) * ((1 << bit_depth) - 1)) / denom;
+}
+
 // The purpose of this function is solely to extend the interpretation of
 // palette indices to implicit values. If index < nb_deltas, indicating that the
 // result is a delta palette entry, it is the responsibility of the caller to
@ -96,8 +100,7 @@ static pixel_type GetPaletteValue(const pixel_type *const palette, int index,
      }
      index /= divisor;
    }
-    index %= kSmallCube;
-    return (index * ((1 << bit_depth) - 1)) / kSmallCube +
+    return Scale(index % kSmallCube, bit_depth, kSmallCube) +
           (1 << (std::max(0, bit_depth - 3)));
  } else if (palette_size + kLargeCubeOffset <= index) {
    if (c >= kCubePow) return 0;
@ -111,8 +114,7 @@ static pixel_type GetPaletteValue(const pixel_type *const palette, int index,
      }
      index /= divisor;
    }
-    index %= kLargeCube;
-    return (index * ((1 << bit_depth) - 1)) / (kLargeCube - 1);
+    return Scale(index % kLargeCube, bit_depth, kLargeCube - 1);
  }

  return palette[c * onerow + static_cast<size_t>(index)];
@ -232,7 +234,7 @@ static Status InvPalette(Image &input, uint32_t begin_c, uint32_t nb_colors,
  intptr_t onerow = input.channel[0].plane.PixelsPerRow();
  intptr_t onerow_image = input.channel[c0].plane.PixelsPerRow();
  const int bit_depth =
-      CeilLog2Nonzero(static_cast<unsigned>(input.maxval - input.minval + 1));
+      CeilLog2Nonzero(static_cast<unsigned>(input.maxval) - input.minval + 1);

  if (w == 0) {
    // Nothing to do.