mirror of
https://github.com/mozilla/gecko-dev.git
synced 2025-02-24 03:19:06 +00:00
Bug 1873510 - Update libjxl to b26041c708d523ac53bb7d95d4f5c4a5d3b1ce30 r=saschanaz
Differential Revision: https://phabricator.services.mozilla.com/D197950
This commit is contained in:
parent
a1a2edc0b9
commit
c02f0ef1c5
@ -10,9 +10,9 @@ origin:
|
||||
|
||||
url: https://github.com/libjxl/libjxl
|
||||
|
||||
release: 1d1006ce9dbe4224c1299992b57f28a4f62df9b7 (2023-12-29T13:36:27Z).
|
||||
release: b26041c708d523ac53bb7d95d4f5c4a5d3b1ce30 (2024-01-08T13:55:50Z).
|
||||
|
||||
revision: 1d1006ce9dbe4224c1299992b57f28a4f62df9b7
|
||||
revision: b26041c708d523ac53bb7d95d4f5c4a5d3b1ce30
|
||||
|
||||
license: Apache-2.0
|
||||
|
||||
|
1
third_party/jpeg-xl/BUILD.bazel
vendored
1
third_party/jpeg-xl/BUILD.bazel
vendored
@ -20,3 +20,4 @@ filegroup(
|
||||
"testdata/position_encoding/*.txt",
|
||||
]),
|
||||
)
|
||||
|
||||
|
2
third_party/jpeg-xl/deps.sh
vendored
2
third_party/jpeg-xl/deps.sh
vendored
@ -13,6 +13,7 @@ MYDIR=$(dirname $(realpath "$0"))
|
||||
|
||||
# Git revisions we use for the given submodules. Update these whenever you
|
||||
# update a git submodule.
|
||||
TESTDATA="873045a9c42ed60721756e26e2a6b32e17415205"
|
||||
THIRD_PARTY_BROTLI="36533a866ed1ca4b75cf049f4521e4ec5fe24727"
|
||||
THIRD_PARTY_HIGHWAY="ba0900a4957b929390ab73827235557959234fea"
|
||||
THIRD_PARTY_SKCMS="42030a771244ba67f86b1c1c76a6493f873c5f91"
|
||||
@ -79,6 +80,7 @@ EOF
|
||||
fi
|
||||
|
||||
# Sources downloaded from a tarball.
|
||||
download_github testdata libjxl/testdata
|
||||
download_github third_party/brotli google/brotli
|
||||
download_github third_party/highway google/highway
|
||||
download_github third_party/sjpeg webmproject/sjpeg
|
||||
|
4
third_party/jpeg-xl/lib/extras/codec.cc
vendored
4
third_party/jpeg-xl/lib/extras/codec.cc
vendored
@ -133,8 +133,8 @@ Status Encode(const CodecInOut& io, const ColorEncoding& c_desired,
|
||||
size_t bits_per_sample, const std::string& pathname,
|
||||
std::vector<uint8_t>* bytes, ThreadPool* pool) {
|
||||
std::string extension;
|
||||
const extras::Codec codec = extras::CodecFromPath(
|
||||
pathname, &bits_per_sample, /* filename */ nullptr, &extension);
|
||||
const extras::Codec codec =
|
||||
extras::CodecFromPath(pathname, &bits_per_sample, &extension);
|
||||
|
||||
// Warn about incorrect usage of PGM/PGX/PPM - only the latter supports
|
||||
// color, but CodecFromPath lumps them all together.
|
||||
|
37
third_party/jpeg-xl/lib/extras/dec/decode.cc
vendored
37
third_party/jpeg-xl/lib/extras/dec/decode.cc
vendored
@ -22,41 +22,30 @@ namespace {
|
||||
// Any valid encoding is larger (ensures codecs can read the first few bytes)
|
||||
constexpr size_t kMinBytes = 9;
|
||||
|
||||
void BasenameAndExtension(const std::string& path, std::string* filename,
|
||||
std::string* extension) {
|
||||
// Pattern: "png:name" or "png:-"
|
||||
size_t pos = path.find_first_of(':');
|
||||
if (pos != std::string::npos) {
|
||||
*extension = "." + path.substr(0, pos);
|
||||
*filename = path.substr(pos + 1);
|
||||
//+ ((path.length() == pos + 2 && path.substr(pos + 1, 1) == "-") ? "" :
|
||||
//*extension);
|
||||
return;
|
||||
}
|
||||
|
||||
std::string GetExtension(const std::string& path) {
|
||||
// Pattern: "name.png"
|
||||
pos = path.find_last_of('.');
|
||||
size_t pos = path.find_last_of('.');
|
||||
if (pos != std::string::npos) {
|
||||
*extension = path.substr(pos);
|
||||
*filename = path;
|
||||
return;
|
||||
return path.substr(pos);
|
||||
}
|
||||
|
||||
// Extension not found
|
||||
*filename = path;
|
||||
*extension = "";
|
||||
return "";
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
Codec CodecFromPath(std::string path, size_t* JXL_RESTRICT bits_per_sample,
|
||||
std::string* filename, std::string* extension) {
|
||||
std::string* extension) {
|
||||
std::string base;
|
||||
std::string ext;
|
||||
BasenameAndExtension(path, &base, &ext);
|
||||
if (filename) *filename = base;
|
||||
if (extension) *extension = ext;
|
||||
|
||||
std::string ext = GetExtension(path);
|
||||
if (extension) {
|
||||
if (extension->empty()) {
|
||||
*extension = ext;
|
||||
} else {
|
||||
ext = *extension;
|
||||
}
|
||||
}
|
||||
std::transform(ext.begin(), ext.end(), ext.begin(), [](char c) {
|
||||
return std::tolower(c, std::locale::classic());
|
||||
});
|
||||
|
1
third_party/jpeg-xl/lib/extras/dec/decode.h
vendored
1
third_party/jpeg-xl/lib/extras/dec/decode.h
vendored
@ -42,7 +42,6 @@ bool CanDecode(Codec codec);
|
||||
// that Encode() would encode to PFM instead of PPM.
|
||||
Codec CodecFromPath(std::string path,
|
||||
size_t* JXL_RESTRICT bits_per_sample = nullptr,
|
||||
std::string* filename = nullptr,
|
||||
std::string* extension = nullptr);
|
||||
|
||||
// Decodes "bytes" info *ppf.
|
||||
|
4
third_party/jpeg-xl/lib/extras/dec/jxl.cc
vendored
4
third_party/jpeg-xl/lib/extras/dec/jxl.cc
vendored
@ -10,10 +10,12 @@
|
||||
#include <jxl/decode_cxx.h>
|
||||
#include <jxl/types.h>
|
||||
|
||||
#include <cinttypes>
|
||||
|
||||
#include "lib/extras/common.h"
|
||||
#include "lib/extras/dec/color_description.h"
|
||||
#include "lib/jxl/base/exif.h"
|
||||
#include "lib/jxl/base/printf_macros.h"
|
||||
#include "lib/jxl/exif.h"
|
||||
|
||||
namespace jxl {
|
||||
namespace extras {
|
||||
|
2
third_party/jpeg-xl/lib/extras/enc/jxl.cc
vendored
2
third_party/jpeg-xl/lib/extras/enc/jxl.cc
vendored
@ -8,7 +8,7 @@
|
||||
#include <jxl/encode.h>
|
||||
#include <jxl/encode_cxx.h>
|
||||
|
||||
#include "lib/jxl/exif.h"
|
||||
#include "lib/jxl/base/exif.h"
|
||||
|
||||
namespace jxl {
|
||||
namespace extras {
|
||||
|
6
third_party/jpeg-xl/lib/include/jxl/encode.h
vendored
6
third_party/jpeg-xl/lib/include/jxl/encode.h
vendored
@ -824,8 +824,8 @@ struct JxlChunkedFrameInputSource {
|
||||
|
||||
/**
|
||||
* Callback to retrieve a rectangle of color channel data at a specific
|
||||
* location. It is guaranteed that xpos and ypos are multiples of 128. xsize,
|
||||
* ysize will be multiples of 128, unless the resulting rectangle would be out
|
||||
* location. It is guaranteed that xpos and ypos are multiples of 8. xsize,
|
||||
* ysize will be multiples of 8, unless the resulting rectangle would be out
|
||||
* of image bounds. Moreover, xsize and ysize will be at most 2048. The
|
||||
* returned data will be assumed to be in the format returned by the
|
||||
* (preceding) call to get_color_channels_pixel_format, except the `align`
|
||||
@ -868,7 +868,7 @@ struct JxlChunkedFrameInputSource {
|
||||
/**
|
||||
* Callback to retrieve a rectangle of extra channel `ec_index` data at a
|
||||
* specific location. It is guaranteed that xpos and ypos are multiples of
|
||||
* 128. xsize, ysize will be multiples of 128, unless the resulting rectangle
|
||||
* 8. xsize, ysize will be multiples of 8, unless the resulting rectangle
|
||||
* would be out of image bounds. Moreover, xsize and ysize will be at most
|
||||
* 2048. The returned data will be assumed to be in the format returned by the
|
||||
* (preceding) call to get_extra_channels_pixel_format_at with the
|
||||
|
@ -11,7 +11,12 @@
|
||||
|
||||
#include <jxl/codestream_header.h>
|
||||
|
||||
#include "lib/jxl/image_metadata.h"
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
|
||||
#include "lib/jxl/base/byte_order.h"
|
||||
#include "lib/jxl/base/compiler_specific.h"
|
||||
|
||||
namespace jxl {
|
||||
|
||||
@ -19,7 +24,7 @@ constexpr uint16_t kExifOrientationTag = 274;
|
||||
|
||||
// Checks if a blob looks like Exif, and if so, sets bigendian
|
||||
// according to the tiff endianness
|
||||
inline bool IsExif(const std::vector<uint8_t>& exif, bool* bigendian) {
|
||||
JXL_INLINE bool IsExif(const std::vector<uint8_t>& exif, bool* bigendian) {
|
||||
if (exif.size() < 12) return false; // not enough bytes for a valid exif blob
|
||||
const uint8_t* t = exif.data();
|
||||
if (LoadLE32(t) == 0x2A004D4D) {
|
||||
@ -33,8 +38,8 @@ inline bool IsExif(const std::vector<uint8_t>& exif, bool* bigendian) {
|
||||
}
|
||||
|
||||
// Finds the position of an Exif tag, or 0 if it is not found
|
||||
inline size_t FindExifTagPosition(const std::vector<uint8_t>& exif,
|
||||
uint16_t tagname) {
|
||||
JXL_INLINE size_t FindExifTagPosition(const std::vector<uint8_t>& exif,
|
||||
uint16_t tagname) {
|
||||
bool bigendian;
|
||||
if (!IsExif(exif, &bigendian)) return 0;
|
||||
const uint8_t* t = exif.data() + 4;
|
||||
@ -62,8 +67,8 @@ inline size_t FindExifTagPosition(const std::vector<uint8_t>& exif,
|
||||
// Parses the Exif data just enough to extract any render-impacting info.
|
||||
// If the Exif data is invalid or could not be parsed, then it is treated
|
||||
// as a no-op.
|
||||
inline void InterpretExif(const std::vector<uint8_t>& exif,
|
||||
JxlOrientation* orientation) {
|
||||
JXL_INLINE void InterpretExif(const std::vector<uint8_t>& exif,
|
||||
JxlOrientation* orientation) {
|
||||
bool bigendian;
|
||||
if (!IsExif(exif, &bigendian)) return;
|
||||
size_t o_pos = FindExifTagPosition(exif, kExifOrientationTag);
|
@ -6,7 +6,6 @@
|
||||
#ifndef LIB_JXL_CMS_COLOR_ENCODING_CMS_H_
|
||||
#define LIB_JXL_CMS_COLOR_ENCODING_CMS_H_
|
||||
|
||||
#include <jxl/cms.h>
|
||||
#include <jxl/cms_interface.h>
|
||||
#include <jxl/color_encoding.h>
|
||||
#include <jxl/types.h>
|
||||
@ -20,7 +19,6 @@
|
||||
|
||||
#include "lib/jxl/base/common.h"
|
||||
#include "lib/jxl/base/status.h"
|
||||
#include "lib/jxl/cms/jxl_cms_internal.h"
|
||||
|
||||
namespace jxl {
|
||||
namespace cms {
|
||||
@ -521,7 +519,6 @@ struct ColorEncoding {
|
||||
new_icc.data(), new_icc.size(),
|
||||
&external, &new_cmyk));
|
||||
cmyk = new_cmyk;
|
||||
if (cmyk) return true;
|
||||
JXL_RETURN_IF_ERROR(FromExternal(external));
|
||||
icc = std::move(new_icc);
|
||||
return true;
|
||||
|
@ -18,6 +18,7 @@
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "lib/jxl/base/common.h"
|
||||
#include "lib/jxl/base/compiler_specific.h"
|
||||
#include "lib/jxl/base/matrix_ops.h"
|
||||
#include "lib/jxl/base/span.h" // Bytes
|
||||
@ -1065,14 +1066,15 @@ static Status MaybeCreateProfileImpl(const JxlColorEncoding& c,
|
||||
|
||||
// Returns a representation of the ColorEncoding fields (not icc).
|
||||
// Example description: "RGB_D65_SRG_Rel_Lin"
|
||||
static std::string ColorEncodingDescription(const JxlColorEncoding& c) {
|
||||
static JXL_MAYBE_UNUSED std::string ColorEncodingDescription(
|
||||
const JxlColorEncoding& c) {
|
||||
return detail::ColorEncodingDescriptionImpl(c);
|
||||
}
|
||||
|
||||
// NOTE: for XYB colorspace, the created profile can be used to transform a
|
||||
// *scaled* XYB image (created by ScaleXYB()) to another colorspace.
|
||||
static Status MaybeCreateProfile(const JxlColorEncoding& c,
|
||||
std::vector<uint8_t>* icc) {
|
||||
static JXL_MAYBE_UNUSED Status MaybeCreateProfile(const JxlColorEncoding& c,
|
||||
std::vector<uint8_t>* icc) {
|
||||
return detail::MaybeCreateProfileImpl(c, icc);
|
||||
}
|
||||
|
||||
|
@ -8,8 +8,10 @@
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <utility>
|
||||
|
||||
#include "lib/jxl/base/common.h"
|
||||
#include "lib/jxl/base/compiler_specific.h"
|
||||
#include "lib/jxl/cms/transfer_functions.h"
|
||||
|
||||
namespace jxl {
|
||||
@ -122,9 +124,10 @@ class HlgOOTF_Base {
|
||||
const float blue_Y_;
|
||||
};
|
||||
|
||||
static void GamutMapScalar(float* red, float* green, float* blue,
|
||||
const float primaries_luminances[3],
|
||||
float preserve_saturation = 0.1f) {
|
||||
static JXL_MAYBE_UNUSED void GamutMapScalar(float* red, float* green,
|
||||
float* blue,
|
||||
const float primaries_luminances[3],
|
||||
float preserve_saturation = 0.1f) {
|
||||
const float luminance = primaries_luminances[0] * *red +
|
||||
primaries_luminances[1] * *green +
|
||||
primaries_luminances[2] * *blue;
|
||||
|
@ -11,7 +11,6 @@
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
|
||||
#include "lib/jxl/base/compiler_specific.h"
|
||||
#include "lib/jxl/base/status.h"
|
||||
|
||||
namespace jxl {
|
||||
|
@ -8,7 +8,6 @@
|
||||
|
||||
// Metadata for color space conversions.
|
||||
|
||||
#include <jxl/cms.h>
|
||||
#include <jxl/cms_interface.h>
|
||||
#include <jxl/color_encoding.h>
|
||||
#include <stddef.h>
|
||||
|
12
third_party/jpeg-xl/lib/jxl/convolve.h
vendored
12
third_party/jpeg-xl/lib/jxl/convolve.h
vendored
@ -76,18 +76,22 @@ void SlowSymmetric3(const ImageF& in, const Rect& rect,
|
||||
const WeightsSymmetric3& weights, ThreadPool* pool,
|
||||
ImageF* JXL_RESTRICT out);
|
||||
|
||||
void SlowSeparable5(const ImageF& in, const Rect& rect,
|
||||
void SlowSeparable5(const ImageF& in, const Rect& in_rect,
|
||||
const WeightsSeparable5& weights, ThreadPool* pool,
|
||||
ImageF* out);
|
||||
ImageF* out, const Rect& out_rect);
|
||||
|
||||
void SlowSeparable7(const ImageF& in, const Rect& rect,
|
||||
void SlowSeparable7(const ImageF& in, const Rect& in_rect,
|
||||
const WeightsSeparable7& weights, ThreadPool* pool,
|
||||
ImageF* out);
|
||||
ImageF* out, const Rect& out_rect);
|
||||
|
||||
void Symmetric3(const ImageF& in, const Rect& rect,
|
||||
const WeightsSymmetric3& weights, ThreadPool* pool,
|
||||
ImageF* out);
|
||||
|
||||
void Symmetric5(const ImageF& in, const Rect& in_rect,
|
||||
const WeightsSymmetric5& weights, ThreadPool* pool,
|
||||
ImageF* JXL_RESTRICT out, const Rect& out_rect);
|
||||
|
||||
void Symmetric5(const ImageF& in, const Rect& rect,
|
||||
const WeightsSymmetric5& weights, ThreadPool* pool,
|
||||
ImageF* JXL_RESTRICT out);
|
||||
|
@ -239,7 +239,7 @@ void Separable5(const ImageF& in, const Rect& rect,
|
||||
return Conv::Run(in, rect, weights, pool, out);
|
||||
}
|
||||
|
||||
return SlowSeparable5(in, rect, weights, pool, out);
|
||||
return SlowSeparable5(in, rect, weights, pool, out, Rect(*out));
|
||||
}
|
||||
|
||||
// NOLINTNEXTLINE(google-readability-namespace-comments)
|
||||
|
@ -263,7 +263,7 @@ void Separable7(const ImageF& in, const Rect& rect,
|
||||
return Conv::Run(in, rect, weights, pool, out);
|
||||
}
|
||||
|
||||
return SlowSeparable7(in, rect, weights, pool, out);
|
||||
return SlowSeparable7(in, rect, weights, pool, out, Rect(*out));
|
||||
}
|
||||
|
||||
// NOLINTNEXTLINE(google-readability-namespace-comments)
|
||||
|
62
third_party/jpeg-xl/lib/jxl/convolve_slow.cc
vendored
62
third_party/jpeg-xl/lib/jxl/convolve_slow.cc
vendored
@ -141,19 +141,19 @@ float SlowSeparablePixel(const ImageF& in, const Rect& rect, const int64_t x,
|
||||
const int64_t y, const int64_t radius,
|
||||
const float* JXL_RESTRICT horz_weights,
|
||||
const float* JXL_RESTRICT vert_weights) {
|
||||
const size_t xsize = rect.xsize();
|
||||
const size_t ysize = rect.ysize();
|
||||
const size_t xsize = in.xsize();
|
||||
const size_t ysize = in.ysize();
|
||||
const WrapMirror wrap;
|
||||
|
||||
float mul = 0.0f;
|
||||
for (int dy = -radius; dy <= radius; ++dy) {
|
||||
const float wy = vert_weights[std::abs(dy) * 4];
|
||||
const size_t sy = wrap(y + dy, ysize);
|
||||
const size_t sy = wrap(rect.y0() + y + dy, ysize);
|
||||
JXL_CHECK(sy < ysize);
|
||||
const float* const JXL_RESTRICT row = rect.ConstRow(in, sy);
|
||||
const float* const JXL_RESTRICT row = in.ConstRow(sy);
|
||||
for (int dx = -radius; dx <= radius; ++dx) {
|
||||
const float wx = horz_weights[std::abs(dx) * 4];
|
||||
const size_t sx = wrap(x + dx, xsize);
|
||||
const size_t sx = wrap(rect.x0() + x + dx, xsize);
|
||||
JXL_CHECK(sx < xsize);
|
||||
mul += row[sx] * wx * wy;
|
||||
}
|
||||
@ -161,48 +161,44 @@ float SlowSeparablePixel(const ImageF& in, const Rect& rect, const int64_t x,
|
||||
return mul;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void SlowSeparable5(const ImageF& in, const Rect& rect,
|
||||
const WeightsSeparable5& weights, ThreadPool* pool,
|
||||
ImageF* out) {
|
||||
template <int R, typename Weights>
|
||||
void SlowSeparable(const ImageF& in, const Rect& in_rect,
|
||||
const Weights& weights, ThreadPool* pool, ImageF* out,
|
||||
const Rect& out_rect) {
|
||||
JXL_ASSERT(in_rect.xsize() == out_rect.xsize());
|
||||
JXL_ASSERT(in_rect.ysize() == out_rect.ysize());
|
||||
JXL_ASSERT(in_rect.IsInside(Rect(in)));
|
||||
JXL_ASSERT(out_rect.IsInside(Rect(*out)));
|
||||
const float* horz_weights = &weights.horz[0];
|
||||
const float* vert_weights = &weights.vert[0];
|
||||
|
||||
const size_t ysize = rect.ysize();
|
||||
const size_t ysize = in_rect.ysize();
|
||||
JXL_CHECK(RunOnPool(
|
||||
pool, 0, static_cast<uint32_t>(ysize), ThreadPool::NoInit,
|
||||
[&](const uint32_t task, size_t /*thread*/) {
|
||||
const int64_t y = task;
|
||||
|
||||
float* const JXL_RESTRICT row_out = out->Row(y);
|
||||
for (size_t x = 0; x < rect.xsize(); ++x) {
|
||||
row_out[x] = SlowSeparablePixel(in, rect, x, y, /*radius=*/2,
|
||||
float* const JXL_RESTRICT row_out = out_rect.Row(out, y);
|
||||
for (size_t x = 0; x < in_rect.xsize(); ++x) {
|
||||
row_out[x] = SlowSeparablePixel(in, in_rect, x, y, /*radius=*/R,
|
||||
horz_weights, vert_weights);
|
||||
}
|
||||
},
|
||||
"SlowSeparable5"));
|
||||
"SlowSeparable"));
|
||||
}
|
||||
|
||||
void SlowSeparable7(const ImageF& in, const Rect& rect,
|
||||
} // namespace
|
||||
|
||||
void SlowSeparable5(const ImageF& in, const Rect& in_rect,
|
||||
const WeightsSeparable5& weights, ThreadPool* pool,
|
||||
ImageF* out, const Rect& out_rect) {
|
||||
SlowSeparable<2>(in, in_rect, weights, pool, out, out_rect);
|
||||
}
|
||||
|
||||
void SlowSeparable7(const ImageF& in, const Rect& in_rect,
|
||||
const WeightsSeparable7& weights, ThreadPool* pool,
|
||||
ImageF* out) {
|
||||
const float* horz_weights = &weights.horz[0];
|
||||
const float* vert_weights = &weights.vert[0];
|
||||
|
||||
const size_t ysize = rect.ysize();
|
||||
JXL_CHECK(RunOnPool(
|
||||
pool, 0, static_cast<uint32_t>(ysize), ThreadPool::NoInit,
|
||||
[&](const uint32_t task, size_t /*thread*/) {
|
||||
const int64_t y = task;
|
||||
|
||||
float* const JXL_RESTRICT row_out = out->Row(y);
|
||||
for (size_t x = 0; x < rect.xsize(); ++x) {
|
||||
row_out[x] = SlowSeparablePixel(in, rect, x, y, /*radius=*/3,
|
||||
horz_weights, vert_weights);
|
||||
}
|
||||
},
|
||||
"SlowSeparable7"));
|
||||
ImageF* out, const Rect& out_rect) {
|
||||
SlowSeparable<3>(in, in_rect, weights, pool, out, out_rect);
|
||||
}
|
||||
|
||||
} // namespace jxl
|
||||
|
@ -52,7 +52,7 @@ static V WeightedSum(const ImageF& in, const WrapY wrap_y, const size_t ix,
|
||||
const auto in_p2 = LoadU(d, center + 2);
|
||||
const auto in_m1 = LoadU(d, center - 1);
|
||||
const auto in_p1 = LoadU(d, center + 1);
|
||||
const auto in_00 = Load(d, center);
|
||||
const auto in_00 = LoadU(d, center);
|
||||
const auto sum_2 = Mul(wx2, Add(in_m2, in_p2));
|
||||
const auto sum_1 = Mul(wx1, Add(in_m1, in_p1));
|
||||
const auto sum_0 = Mul(wx0, in_00);
|
||||
@ -61,8 +61,8 @@ static V WeightedSum(const ImageF& in, const WrapY wrap_y, const size_t ix,
|
||||
|
||||
// Produces result for one pixel
|
||||
template <class WrapY>
|
||||
float Symmetric5Border(const ImageF& in, const Rect& rect, const int64_t ix,
|
||||
const int64_t iy, const WeightsSymmetric5& weights) {
|
||||
float Symmetric5Border(const ImageF& in, const int64_t ix, const int64_t iy,
|
||||
const WeightsSymmetric5& weights) {
|
||||
const float w0 = weights.c[0];
|
||||
const float w1 = weights.r[0];
|
||||
const float w2 = weights.R[0];
|
||||
@ -70,8 +70,8 @@ float Symmetric5Border(const ImageF& in, const Rect& rect, const int64_t ix,
|
||||
const float w5 = weights.L[0];
|
||||
const float w8 = weights.D[0];
|
||||
|
||||
const size_t xsize = rect.xsize();
|
||||
const size_t ysize = rect.ysize();
|
||||
const size_t xsize = in.xsize();
|
||||
const size_t ysize = in.ysize();
|
||||
const WrapY wrap_y;
|
||||
// Unrolled loop over all 5 rows of the kernel.
|
||||
float sum0 = WeightedSumBorder(in, wrap_y, ix, iy, xsize, ysize, w0, w1, w2);
|
||||
@ -88,8 +88,8 @@ float Symmetric5Border(const ImageF& in, const Rect& rect, const int64_t ix,
|
||||
|
||||
// Produces result for one vector's worth of pixels
|
||||
template <class WrapY>
|
||||
static void Symmetric5Interior(const ImageF& in, const Rect& rect,
|
||||
const int64_t ix, const int64_t iy,
|
||||
static void Symmetric5Interior(const ImageF& in, const int64_t ix,
|
||||
const int64_t rix, const int64_t iy,
|
||||
const WeightsSymmetric5& weights,
|
||||
float* JXL_RESTRICT row_out) {
|
||||
const HWY_FULL(float) d;
|
||||
@ -101,7 +101,7 @@ static void Symmetric5Interior(const ImageF& in, const Rect& rect,
|
||||
const auto w5 = LoadDup128(d, weights.L);
|
||||
const auto w8 = LoadDup128(d, weights.D);
|
||||
|
||||
const size_t ysize = rect.ysize();
|
||||
const size_t ysize = in.ysize();
|
||||
const WrapY wrap_y;
|
||||
// Unrolled loop over all 5 rows of the kernel.
|
||||
auto sum0 = WeightedSum(in, wrap_y, ix, iy, ysize, w0, w1, w2);
|
||||
@ -112,7 +112,7 @@ static void Symmetric5Interior(const ImageF& in, const Rect& rect,
|
||||
sum0 = Add(sum0, WeightedSum(in, wrap_y, ix, iy - 1, ysize, w1, w4, w5));
|
||||
sum1 = Add(sum1, WeightedSum(in, wrap_y, ix, iy + 1, ysize, w1, w4, w5));
|
||||
|
||||
Store(Add(sum0, sum1), d, row_out + ix);
|
||||
StoreU(Add(sum0, sum1), d, row_out + rix);
|
||||
}
|
||||
|
||||
template <class WrapY>
|
||||
@ -120,45 +120,44 @@ static void Symmetric5Row(const ImageF& in, const Rect& rect, const int64_t iy,
|
||||
const WeightsSymmetric5& weights,
|
||||
float* JXL_RESTRICT row_out) {
|
||||
const int64_t kRadius = 2;
|
||||
const size_t xsize = rect.xsize();
|
||||
const size_t xend = rect.x1();
|
||||
|
||||
size_t ix = 0;
|
||||
size_t rix = 0;
|
||||
size_t ix = rect.x0();
|
||||
const HWY_FULL(float) d;
|
||||
const size_t N = Lanes(d);
|
||||
const size_t aligned_x = RoundUpTo(kRadius, N);
|
||||
for (; ix < std::min(aligned_x, xsize); ++ix) {
|
||||
row_out[ix] = Symmetric5Border<WrapY>(in, rect, ix, iy, weights);
|
||||
for (; ix < std::min(aligned_x, xend); ++ix, ++rix) {
|
||||
row_out[rix] = Symmetric5Border<WrapY>(in, ix, iy, weights);
|
||||
}
|
||||
for (; ix + N + kRadius <= xsize; ix += N) {
|
||||
Symmetric5Interior<WrapY>(in, rect, ix, iy, weights, row_out);
|
||||
for (; ix + N + kRadius <= xend; ix += N, rix += N) {
|
||||
Symmetric5Interior<WrapY>(in, ix, rix, iy, weights, row_out);
|
||||
}
|
||||
for (; ix < xsize; ++ix) {
|
||||
row_out[ix] = Symmetric5Border<WrapY>(in, rect, ix, iy, weights);
|
||||
for (; ix < xend; ++ix, ++rix) {
|
||||
row_out[rix] = Symmetric5Border<WrapY>(in, ix, iy, weights);
|
||||
}
|
||||
}
|
||||
|
||||
static JXL_NOINLINE void Symmetric5BorderRow(const ImageF& in, const Rect& rect,
|
||||
const int64_t iy,
|
||||
const WeightsSymmetric5& weights,
|
||||
float* JXL_RESTRICT row_out) {
|
||||
return Symmetric5Row<WrapMirror>(in, rect, iy, weights, row_out);
|
||||
}
|
||||
|
||||
// Semi-vectorized (interior pixels Fonly); called directly like slow::, unlike
|
||||
// the fully vectorized strategies below.
|
||||
void Symmetric5(const ImageF& in, const Rect& rect,
|
||||
void Symmetric5(const ImageF& in, const Rect& in_rect,
|
||||
const WeightsSymmetric5& weights, ThreadPool* pool,
|
||||
ImageF* JXL_RESTRICT out) {
|
||||
const size_t ysize = rect.ysize();
|
||||
ImageF* JXL_RESTRICT out, const Rect& out_rect) {
|
||||
JXL_ASSERT(in_rect.xsize() == out_rect.xsize());
|
||||
JXL_ASSERT(in_rect.ysize() == out_rect.ysize());
|
||||
const size_t ysize = in_rect.ysize();
|
||||
JXL_CHECK(RunOnPool(
|
||||
pool, 0, static_cast<uint32_t>(ysize), ThreadPool::NoInit,
|
||||
[&](const uint32_t task, size_t /*thread*/) {
|
||||
const int64_t iy = task;
|
||||
const int64_t riy = task;
|
||||
const int64_t iy = in_rect.y0() + riy;
|
||||
|
||||
if (iy < 2 || iy >= static_cast<ssize_t>(ysize) - 2) {
|
||||
Symmetric5BorderRow(in, rect, iy, weights, out->Row(iy));
|
||||
if (iy < 2 || iy >= static_cast<ssize_t>(in.ysize()) - 2) {
|
||||
Symmetric5Row<WrapMirror>(in, in_rect, iy, weights,
|
||||
out_rect.Row(out, riy));
|
||||
} else {
|
||||
Symmetric5Row<WrapUnchanged>(in, rect, iy, weights, out->Row(iy));
|
||||
Symmetric5Row<WrapUnchanged>(in, in_rect, iy, weights,
|
||||
out_rect.Row(out, riy));
|
||||
}
|
||||
},
|
||||
"Symmetric5x5Convolution"));
|
||||
@ -173,10 +172,17 @@ HWY_AFTER_NAMESPACE();
|
||||
namespace jxl {
|
||||
|
||||
HWY_EXPORT(Symmetric5);
|
||||
void Symmetric5(const ImageF& in, const Rect& in_rect,
|
||||
const WeightsSymmetric5& weights, ThreadPool* pool,
|
||||
ImageF* JXL_RESTRICT out, const Rect& out_rect) {
|
||||
return HWY_DYNAMIC_DISPATCH(Symmetric5)(in, in_rect, weights, pool, out,
|
||||
out_rect);
|
||||
}
|
||||
|
||||
void Symmetric5(const ImageF& in, const Rect& rect,
|
||||
const WeightsSymmetric5& weights, ThreadPool* pool,
|
||||
ImageF* JXL_RESTRICT out) {
|
||||
return HWY_DYNAMIC_DISPATCH(Symmetric5)(in, rect, weights, pool, out);
|
||||
return Symmetric5(in, rect, weights, pool, out, Rect(*out));
|
||||
}
|
||||
|
||||
} // namespace jxl
|
||||
|
58
third_party/jpeg-xl/lib/jxl/convolve_test.cc
vendored
58
third_party/jpeg-xl/lib/jxl/convolve_test.cc
vendored
@ -81,21 +81,57 @@ void VerifySymmetric3(const size_t xsize, const size_t ysize, ThreadPool* pool,
|
||||
JXL_ASSERT_OK(VerifyRelativeError(out_expected, out_actual, 1E-5f, 1E-5f, _));
|
||||
}
|
||||
|
||||
std::vector<Rect> GenerateTestRectangles(size_t xsize, size_t ysize) {
|
||||
std::vector<Rect> out;
|
||||
for (size_t tl : {0, 1, 13}) {
|
||||
for (size_t br : {0, 1, 13}) {
|
||||
if (xsize > tl + br && ysize > tl + br) {
|
||||
out.push_back(Rect(tl, tl, xsize - tl - br, ysize - tl - br));
|
||||
}
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
// Ensures Symmetric and Separable give the same result.
|
||||
void VerifySymmetric5(const size_t xsize, const size_t ysize, ThreadPool* pool,
|
||||
Rng* rng) {
|
||||
const Rect rect(0, 0, xsize, ysize);
|
||||
|
||||
ImageF in(xsize, ysize);
|
||||
GenerateImage(*rng, &in, 0.0f, 1.0f);
|
||||
|
||||
ImageF out_expected(xsize, ysize);
|
||||
ImageF out_actual(xsize, ysize);
|
||||
for (const Rect& in_rect : GenerateTestRectangles(xsize, ysize)) {
|
||||
JXL_DEBUG(JXL_DEBUG_CONVOLVE,
|
||||
"in_rect: %" PRIuS "x%" PRIuS "+%" PRIuS ",%" PRIuS "",
|
||||
in_rect.xsize(), in_rect.ysize(), in_rect.x0(), in_rect.y0());
|
||||
{
|
||||
Rect out_rect = in_rect;
|
||||
ImageF out_expected(xsize, ysize);
|
||||
ImageF out_actual(xsize, ysize);
|
||||
FillImage(-1.0f, &out_expected);
|
||||
FillImage(-1.0f, &out_actual);
|
||||
|
||||
Separable5(in, Rect(in), WeightsSeparable5Lowpass(), pool, &out_expected);
|
||||
Symmetric5(in, rect, WeightsSymmetric5Lowpass(), pool, &out_actual);
|
||||
SlowSeparable5(in, in_rect, WeightsSeparable5Lowpass(), pool,
|
||||
&out_expected, out_rect);
|
||||
Symmetric5(in, in_rect, WeightsSymmetric5Lowpass(), pool, &out_actual,
|
||||
out_rect);
|
||||
|
||||
JXL_ASSERT_OK(VerifyRelativeError(out_expected, out_actual, 1E-5f, 1E-5f, _));
|
||||
JXL_ASSERT_OK(
|
||||
VerifyRelativeError(out_expected, out_actual, 1E-5f, 1E-5f, _));
|
||||
}
|
||||
{
|
||||
Rect out_rect(0, 0, in_rect.xsize(), in_rect.ysize());
|
||||
ImageF out_expected(out_rect.xsize(), out_rect.ysize());
|
||||
ImageF out_actual(out_rect.xsize(), out_rect.ysize());
|
||||
|
||||
SlowSeparable5(in, in_rect, WeightsSeparable5Lowpass(), pool,
|
||||
&out_expected, out_rect);
|
||||
Symmetric5(in, in_rect, WeightsSymmetric5Lowpass(), pool, &out_actual,
|
||||
out_rect);
|
||||
|
||||
JXL_ASSERT_OK(
|
||||
VerifyRelativeError(out_expected, out_actual, 1E-5f, 1E-5f, _));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void VerifySeparable5(const size_t xsize, const size_t ysize, ThreadPool* pool,
|
||||
@ -109,8 +145,8 @@ void VerifySeparable5(const size_t xsize, const size_t ysize, ThreadPool* pool,
|
||||
ImageF out_actual(xsize, ysize);
|
||||
|
||||
const WeightsSeparable5& weights = WeightsSeparable5Lowpass();
|
||||
Separable5(in, Rect(in), weights, pool, &out_expected);
|
||||
SlowSeparable5(in, rect, weights, pool, &out_actual);
|
||||
SlowSeparable5(in, rect, weights, pool, &out_expected, rect);
|
||||
Separable5(in, rect, weights, pool, &out_actual);
|
||||
|
||||
JXL_ASSERT_OK(VerifyRelativeError(out_expected, out_actual, 1E-5f, 1E-5f, _));
|
||||
}
|
||||
@ -131,8 +167,8 @@ void VerifySeparable7(const size_t xsize, const size_t ysize, ThreadPool* pool,
|
||||
{HWY_REP4(0.383103f), HWY_REP4(0.241843f),
|
||||
HWY_REP4(0.060626f), HWY_REP4(0.00598f)}};
|
||||
|
||||
SlowSeparable7(in, rect, weights, pool, &out_expected);
|
||||
Separable7(in, Rect(in), weights, pool, &out_actual);
|
||||
SlowSeparable7(in, rect, weights, pool, &out_expected, rect);
|
||||
Separable7(in, rect, weights, pool, &out_actual);
|
||||
|
||||
JXL_ASSERT_OK(VerifyRelativeError(out_expected, out_actual, 1E-5f, 1E-5f, _));
|
||||
}
|
||||
|
64
third_party/jpeg-xl/lib/jxl/enc_ac_strategy.cc
vendored
64
third_party/jpeg-xl/lib/jxl/enc_ac_strategy.cc
vendored
@ -790,8 +790,9 @@ void FindBestFirstLevelDivisionForSquare(
|
||||
}
|
||||
}
|
||||
|
||||
void ProcessRectACS(PassesEncoderState* JXL_RESTRICT enc_state,
|
||||
const ACSConfig& config, const Rect& rect) {
|
||||
void ProcessRectACS(const CompressParams& cparams, const ACSConfig& config,
|
||||
const Rect& rect, const ColorCorrelationMap& cmap,
|
||||
AcStrategyImage* ac_strategy) {
|
||||
// Main philosophy here:
|
||||
// 1. First find best 8x8 transform for each area.
|
||||
// 2. Merging them into larger transforms where possibly, but
|
||||
@ -802,9 +803,7 @@ void ProcessRectACS(PassesEncoderState* JXL_RESTRICT enc_state,
|
||||
// maps happen to be at that resolution, and having
|
||||
// integral transforms cross these boundaries leads to
|
||||
// additional complications.
|
||||
const CompressParams& cparams = enc_state->cparams;
|
||||
const float butteraugli_target = cparams.butteraugli_distance;
|
||||
AcStrategyImage* ac_strategy = &enc_state->shared.ac_strategy;
|
||||
const size_t dct_scratch_size =
|
||||
3 * (MaxVectorSize() / sizeof(float)) * AcStrategy::kMaxBlockDim;
|
||||
// TODO(veluca): reuse allocations
|
||||
@ -821,11 +820,9 @@ void ProcessRectACS(PassesEncoderState* JXL_RESTRICT enc_state,
|
||||
size_t tx = bx / kColorTileDimInBlocks;
|
||||
size_t ty = by / kColorTileDimInBlocks;
|
||||
const float cmap_factors[3] = {
|
||||
enc_state->shared.cmap.YtoXRatio(
|
||||
enc_state->shared.cmap.ytox_map.ConstRow(ty)[tx]),
|
||||
cmap.YtoXRatio(cmap.ytox_map.ConstRow(ty)[tx]),
|
||||
0.0f,
|
||||
enc_state->shared.cmap.YtoBRatio(
|
||||
enc_state->shared.cmap.ytob_map.ConstRow(ty)[tx]),
|
||||
cmap.YtoBRatio(cmap.ytob_map.ConstRow(ty)[tx]),
|
||||
};
|
||||
if (cparams.speed_tier > SpeedTier::kHare) return;
|
||||
// First compute the best 8x8 transform for each square. Later, we do not
|
||||
@ -1040,28 +1037,26 @@ HWY_AFTER_NAMESPACE();
|
||||
namespace jxl {
|
||||
HWY_EXPORT(ProcessRectACS);
|
||||
|
||||
void AcStrategyHeuristics::Init(const Image3F& src,
|
||||
PassesEncoderState* enc_state) {
|
||||
this->enc_state = enc_state;
|
||||
config.dequant = &enc_state->shared.matrices;
|
||||
const CompressParams& cparams = enc_state->cparams;
|
||||
void AcStrategyHeuristics::Init(const Image3F& src, const Rect& rect_in,
|
||||
const ImageF& quant_field, const ImageF& mask,
|
||||
const ImageF& mask1x1,
|
||||
DequantMatrices* matrices) {
|
||||
config.dequant = matrices;
|
||||
|
||||
if (cparams.speed_tier >= SpeedTier::kCheetah) {
|
||||
JXL_CHECK(enc_state->shared.matrices.EnsureComputed(1)); // DCT8 only
|
||||
JXL_CHECK(matrices->EnsureComputed(1)); // DCT8 only
|
||||
} else {
|
||||
uint32_t acs_mask = 0;
|
||||
// All transforms up to 64x64.
|
||||
for (size_t i = 0; i < AcStrategy::DCT128X128; i++) {
|
||||
acs_mask |= (1 << i);
|
||||
}
|
||||
JXL_CHECK(enc_state->shared.matrices.EnsureComputed(acs_mask));
|
||||
JXL_CHECK(matrices->EnsureComputed(acs_mask));
|
||||
}
|
||||
|
||||
// Image row pointers and strides.
|
||||
config.quant_field_row = enc_state->initial_quant_field.Row(0);
|
||||
config.quant_field_stride = enc_state->initial_quant_field.PixelsPerRow();
|
||||
auto& mask = enc_state->initial_quant_masking;
|
||||
auto& mask1x1 = enc_state->initial_quant_masking1x1;
|
||||
config.quant_field_row = quant_field.Row(0);
|
||||
config.quant_field_stride = quant_field.PixelsPerRow();
|
||||
if (mask.xsize() > 0 && mask.ysize() > 0) {
|
||||
config.masking_field_row = mask.Row(0);
|
||||
config.masking_field_stride = mask.PixelsPerRow();
|
||||
@ -1071,9 +1066,9 @@ void AcStrategyHeuristics::Init(const Image3F& src,
|
||||
config.masking1x1_field_stride = mask1x1.PixelsPerRow();
|
||||
}
|
||||
|
||||
config.src_rows[0] = src.ConstPlaneRow(0, 0);
|
||||
config.src_rows[1] = src.ConstPlaneRow(1, 0);
|
||||
config.src_rows[2] = src.ConstPlaneRow(2, 0);
|
||||
config.src_rows[0] = rect_in.ConstPlaneRow(src, 0, 0);
|
||||
config.src_rows[1] = rect_in.ConstPlaneRow(src, 1, 0);
|
||||
config.src_rows[2] = rect_in.ConstPlaneRow(src, 2, 0);
|
||||
config.src_stride = src.PixelsPerRow();
|
||||
|
||||
// Entropy estimate is composed of two factors:
|
||||
@ -1093,25 +1088,23 @@ void AcStrategyHeuristics::Init(const Image3F& src,
|
||||
config.info_loss_multiplier *= pow(ratio, kPow1);
|
||||
config.zeros_mul *= pow(ratio, kPow2);
|
||||
config.cost_delta *= pow(ratio, kPow3);
|
||||
JXL_ASSERT(enc_state->shared.ac_strategy.xsize() ==
|
||||
enc_state->shared.frame_dim.xsize_blocks);
|
||||
JXL_ASSERT(enc_state->shared.ac_strategy.ysize() ==
|
||||
enc_state->shared.frame_dim.ysize_blocks);
|
||||
}
|
||||
|
||||
void AcStrategyHeuristics::ProcessRect(const Rect& rect) {
|
||||
const CompressParams& cparams = enc_state->cparams;
|
||||
void AcStrategyHeuristics::ProcessRect(const Rect& rect,
|
||||
const ColorCorrelationMap& cmap,
|
||||
AcStrategyImage* ac_strategy) {
|
||||
// In Falcon mode, use DCT8 everywhere and uniform quantization.
|
||||
if (cparams.speed_tier >= SpeedTier::kCheetah) {
|
||||
enc_state->shared.ac_strategy.FillDCT8(rect);
|
||||
ac_strategy->FillDCT8(rect);
|
||||
return;
|
||||
}
|
||||
HWY_DYNAMIC_DISPATCH(ProcessRectACS)
|
||||
(enc_state, config, rect);
|
||||
(cparams, config, rect, cmap, ac_strategy);
|
||||
}
|
||||
|
||||
void AcStrategyHeuristics::Finalize(AuxOut* aux_out) {
|
||||
const auto& ac_strategy = enc_state->shared.ac_strategy;
|
||||
void AcStrategyHeuristics::Finalize(const FrameDimensions& frame_dim,
|
||||
const AcStrategyImage& ac_strategy,
|
||||
AuxOut* aux_out) {
|
||||
// Accounting and debug output.
|
||||
if (aux_out != nullptr) {
|
||||
aux_out->num_small_blocks =
|
||||
@ -1147,10 +1140,9 @@ void AcStrategyHeuristics::Finalize(AuxOut* aux_out) {
|
||||
}
|
||||
|
||||
// if (JXL_DEBUG_AC_STRATEGY && WantDebugOutput(aux_out)) {
|
||||
if (JXL_DEBUG_AC_STRATEGY && WantDebugOutput(enc_state->cparams)) {
|
||||
DumpAcStrategy(ac_strategy, enc_state->shared.frame_dim.xsize,
|
||||
enc_state->shared.frame_dim.ysize, "ac_strategy", aux_out,
|
||||
enc_state->cparams);
|
||||
if (JXL_DEBUG_AC_STRATEGY && WantDebugOutput(cparams)) {
|
||||
DumpAcStrategy(ac_strategy, frame_dim.xsize, frame_dim.ysize, "ac_strategy",
|
||||
aux_out, cparams);
|
||||
}
|
||||
}
|
||||
|
||||
|
21
third_party/jpeg-xl/lib/jxl/enc_ac_strategy.h
vendored
21
third_party/jpeg-xl/lib/jxl/enc_ac_strategy.h
vendored
@ -25,11 +25,11 @@ struct AuxOut;
|
||||
|
||||
struct ACSConfig {
|
||||
const DequantMatrices* JXL_RESTRICT dequant;
|
||||
float* JXL_RESTRICT quant_field_row;
|
||||
const float* JXL_RESTRICT quant_field_row;
|
||||
size_t quant_field_stride;
|
||||
float* JXL_RESTRICT masking_field_row;
|
||||
const float* JXL_RESTRICT masking_field_row;
|
||||
size_t masking_field_stride;
|
||||
float* JXL_RESTRICT masking1x1_field_row;
|
||||
const float* JXL_RESTRICT masking1x1_field_row;
|
||||
size_t masking1x1_field_stride;
|
||||
const float* JXL_RESTRICT src_rows[3];
|
||||
size_t src_stride;
|
||||
@ -43,7 +43,7 @@ struct ACSConfig {
|
||||
JXL_DASSERT(masking_field_row[by * masking_field_stride + bx] > 0);
|
||||
return masking_field_row[by * masking_field_stride + bx];
|
||||
}
|
||||
float* MaskingPtr1x1(size_t bx, size_t by) const {
|
||||
const float* MaskingPtr1x1(size_t bx, size_t by) const {
|
||||
JXL_DASSERT(masking1x1_field_row[by * masking1x1_field_stride + bx] > 0);
|
||||
return &masking1x1_field_row[by * masking1x1_field_stride + bx];
|
||||
}
|
||||
@ -54,11 +54,16 @@ struct ACSConfig {
|
||||
};
|
||||
|
||||
struct AcStrategyHeuristics {
|
||||
void Init(const Image3F& src, PassesEncoderState* enc_state);
|
||||
void ProcessRect(const Rect& rect);
|
||||
void Finalize(AuxOut* aux_out);
|
||||
AcStrategyHeuristics(const CompressParams& cparams) : cparams(cparams) {}
|
||||
void Init(const Image3F& src, const Rect& rect_in, const ImageF& quant_field,
|
||||
const ImageF& mask, const ImageF& mask1x1,
|
||||
DequantMatrices* matrices);
|
||||
void ProcessRect(const Rect& rect, const ColorCorrelationMap& cmap,
|
||||
AcStrategyImage* ac_strategy);
|
||||
void Finalize(const FrameDimensions& frame_dim,
|
||||
const AcStrategyImage& ac_strategy, AuxOut* aux_out);
|
||||
const CompressParams& cparams;
|
||||
ACSConfig config;
|
||||
PassesEncoderState* enc_state;
|
||||
};
|
||||
|
||||
} // namespace jxl
|
||||
|
@ -164,7 +164,8 @@ V SimpleGamma(const D d, V v) {
|
||||
|
||||
template <class D, class V>
|
||||
V GammaModulation(const D d, const size_t x, const size_t y,
|
||||
const ImageF& xyb_x, const ImageF& xyb_y, const V out_val) {
|
||||
const ImageF& xyb_x, const ImageF& xyb_y, const Rect& rect,
|
||||
const V out_val) {
|
||||
const float kBias = 0.16f;
|
||||
JXL_DASSERT(kBias > jxl::cms::kOpsinAbsorbanceBias[0]);
|
||||
JXL_DASSERT(kBias > jxl::cms::kOpsinAbsorbanceBias[1]);
|
||||
@ -173,8 +174,8 @@ V GammaModulation(const D d, const size_t x, const size_t y,
|
||||
auto bias = Set(d, kBias);
|
||||
auto half = Set(d, 0.5f);
|
||||
for (size_t dy = 0; dy < 8; ++dy) {
|
||||
const float* const JXL_RESTRICT row_in_x = xyb_x.Row(y + dy);
|
||||
const float* const JXL_RESTRICT row_in_y = xyb_y.Row(y + dy);
|
||||
const float* const JXL_RESTRICT row_in_x = rect.ConstRow(xyb_x, y + dy);
|
||||
const float* const JXL_RESTRICT row_in_y = rect.ConstRow(xyb_y, y + dy);
|
||||
for (size_t dx = 0; dx < 8; dx += Lanes(d)) {
|
||||
const auto iny = Add(Load(d, row_in_y + x + dx), bias);
|
||||
const auto inx = Load(d, row_in_x + x + dx);
|
||||
@ -201,7 +202,7 @@ V GammaModulation(const D d, const size_t x, const size_t y,
|
||||
// Change precision in 8x8 blocks that have high frequency content.
|
||||
template <class D, class V>
|
||||
V HfModulation(const D d, const size_t x, const size_t y, const ImageF& xyb,
|
||||
const V out_val) {
|
||||
const Rect& rect, const V out_val) {
|
||||
// Zero out the invalid differences for the rightmost value per row.
|
||||
const Rebind<uint32_t, D> du;
|
||||
HWY_ALIGN constexpr uint32_t kMaskRight[kBlockDim] = {~0u, ~0u, ~0u, ~0u,
|
||||
@ -212,9 +213,9 @@ V HfModulation(const D d, const size_t x, const size_t y, const ImageF& xyb,
|
||||
static const float valmin = 0.020602694503245016f;
|
||||
auto valminv = Set(d, valmin);
|
||||
for (size_t dy = 0; dy < 8; ++dy) {
|
||||
const float* JXL_RESTRICT row_in = xyb.Row(y + dy) + x;
|
||||
const float* JXL_RESTRICT row_in = rect.ConstRow(xyb, y + dy) + x;
|
||||
const float* JXL_RESTRICT row_in_next =
|
||||
dy == 7 ? row_in : xyb.Row(y + dy + 1) + x;
|
||||
dy == 7 ? row_in : rect.ConstRow(xyb, y + dy + 1) + x;
|
||||
|
||||
// In SCALAR, there is no guarantee of having extra row padding.
|
||||
// Hence, we need to ensure we don't access pixels outside the row itself.
|
||||
@ -252,11 +253,8 @@ V HfModulation(const D d, const size_t x, const size_t y, const ImageF& xyb,
|
||||
|
||||
void PerBlockModulations(const float butteraugli_target, const ImageF& xyb_x,
|
||||
const ImageF& xyb_y, const ImageF& xyb_b,
|
||||
const float scale, const Rect& rect, ImageF* out) {
|
||||
JXL_ASSERT(SameSize(xyb_x, xyb_y));
|
||||
JXL_ASSERT(DivCeil(xyb_x.xsize(), kBlockDim) == out->xsize());
|
||||
JXL_ASSERT(DivCeil(xyb_x.ysize(), kBlockDim) == out->ysize());
|
||||
|
||||
const Rect& rect_in, const float scale,
|
||||
const Rect& rect_out, ImageF* out) {
|
||||
float base_level = 0.48f * scale;
|
||||
float kDampenRampStart = 2.0f;
|
||||
float kDampenRampEnd = 14.0f;
|
||||
@ -270,16 +268,16 @@ void PerBlockModulations(const float butteraugli_target, const ImageF& xyb_x,
|
||||
}
|
||||
const float mul = scale * dampen;
|
||||
const float add = (1.0f - dampen) * base_level;
|
||||
for (size_t iy = rect.y0(); iy < rect.y0() + rect.ysize(); iy++) {
|
||||
for (size_t iy = rect_out.y0(); iy < rect_out.y1(); iy++) {
|
||||
const size_t y = iy * 8;
|
||||
float* const JXL_RESTRICT row_out = out->Row(iy);
|
||||
const HWY_CAPPED(float, kBlockDim) df;
|
||||
for (size_t ix = rect.x0(); ix < rect.x0() + rect.xsize(); ix++) {
|
||||
for (size_t ix = rect_out.x0(); ix < rect_out.x1(); ix++) {
|
||||
size_t x = ix * 8;
|
||||
auto out_val = Set(df, row_out[ix]);
|
||||
out_val = ComputeMask(df, out_val);
|
||||
out_val = HfModulation(df, x, y, xyb_y, out_val);
|
||||
out_val = GammaModulation(df, x, y, xyb_x, xyb_y, out_val);
|
||||
out_val = HfModulation(df, x, y, xyb_y, rect_in, out_val);
|
||||
out_val = GammaModulation(df, x, y, xyb_x, xyb_y, rect_in, out_val);
|
||||
// We want multiplicative quantization field, so everything
|
||||
// until this point has been modulating the exponent.
|
||||
row_out[ix] = FastPow2f(GetLane(out_val) * 1.442695041f) * mul + add;
|
||||
@ -399,13 +397,6 @@ void FuzzyErosion(const float butteraugli_target, const Rect& from_rect,
|
||||
}
|
||||
|
||||
struct AdaptiveQuantizationImpl {
|
||||
void Init(const Image3F& xyb) {
|
||||
JXL_DASSERT(xyb.xsize() % kBlockDim == 0);
|
||||
JXL_DASSERT(xyb.ysize() % kBlockDim == 0);
|
||||
const size_t xsize = xyb.xsize();
|
||||
const size_t ysize = xyb.ysize();
|
||||
aq_map = ImageF(xsize / kBlockDim, ysize / kBlockDim);
|
||||
}
|
||||
void PrepareBuffers(size_t num_threads) {
|
||||
diff_buffer = ImageF(kEncTileDim + 8, num_threads);
|
||||
for (size_t i = pre_erosion.size(); i < num_threads; i++) {
|
||||
@ -415,10 +406,10 @@ struct AdaptiveQuantizationImpl {
|
||||
}
|
||||
|
||||
void ComputeTile(float butteraugli_target, float scale, const Image3F& xyb,
|
||||
const Rect& rect, const int thread, ImageF* mask,
|
||||
ImageF* mask1x1) {
|
||||
const size_t xsize = xyb.xsize();
|
||||
const size_t ysize = xyb.ysize();
|
||||
const Rect& rect_in, const Rect& rect_out, const int thread,
|
||||
ImageF* mask, ImageF* mask1x1) {
|
||||
const size_t xsize = rect_in.xsize();
|
||||
const size_t ysize = rect_in.ysize();
|
||||
|
||||
// The XYB gamma is 3.0 to be able to decode faster with two muls.
|
||||
// Butteraugli's gamma is matching the gamma of human eye, around 2.6.
|
||||
@ -429,11 +420,11 @@ struct AdaptiveQuantizationImpl {
|
||||
|
||||
const HWY_FULL(float) df;
|
||||
|
||||
size_t y_start = rect.y0() * 8;
|
||||
size_t y_end = y_start + rect.ysize() * 8;
|
||||
size_t y_start = rect_out.y0() * 8;
|
||||
size_t y_end = y_start + rect_out.ysize() * 8;
|
||||
|
||||
size_t x_start = rect.x0() * 8;
|
||||
size_t x_end = x_start + rect.xsize() * 8;
|
||||
size_t x_start = rect_out.x0() * 8;
|
||||
size_t x_end = x_start + rect_out.xsize() * 8;
|
||||
|
||||
// Computes image (padded to multiple of 8x8) of local pixel differences.
|
||||
// Subsample both directions by 4.
|
||||
@ -441,9 +432,9 @@ struct AdaptiveQuantizationImpl {
|
||||
for (size_t y = y_start; y < y_end; ++y) {
|
||||
const size_t y2 = y + 1 < ysize ? y + 1 : y;
|
||||
const size_t y1 = y > 0 ? y - 1 : y;
|
||||
const float* row_in = xyb.PlaneRow(1, y);
|
||||
const float* row_in1 = xyb.PlaneRow(1, y1);
|
||||
const float* row_in2 = xyb.PlaneRow(1, y2);
|
||||
const float* row_in = rect_in.ConstPlaneRow(xyb, 1, y);
|
||||
const float* row_in1 = rect_in.ConstPlaneRow(xyb, 1, y1);
|
||||
const float* row_in2 = rect_in.ConstPlaneRow(xyb, 1, y2);
|
||||
float* mask1x1_out = mask1x1->Row(y);
|
||||
auto scalar_pixel1x1 = [&](size_t x) {
|
||||
const size_t x2 = x + 1 < xsize ? x + 1 : x;
|
||||
@ -466,9 +457,9 @@ struct AdaptiveQuantizationImpl {
|
||||
}
|
||||
|
||||
if (x_start != 0) x_start -= 4;
|
||||
if (x_end != xyb.xsize()) x_end += 4;
|
||||
if (x_end != rect_in.xsize()) x_end += 4;
|
||||
if (y_start != 0) y_start -= 4;
|
||||
if (y_end != xyb.ysize()) y_end += 4;
|
||||
if (y_end != rect_in.ysize()) y_end += 4;
|
||||
pre_erosion[thread].ShrinkTo((x_end - x_start) / 4, (y_end - y_start) / 4);
|
||||
|
||||
static const float limit = 0.2f;
|
||||
@ -476,9 +467,9 @@ struct AdaptiveQuantizationImpl {
|
||||
size_t y2 = y + 1 < ysize ? y + 1 : y;
|
||||
size_t y1 = y > 0 ? y - 1 : y;
|
||||
|
||||
const float* row_in = xyb.PlaneRow(1, y);
|
||||
const float* row_in1 = xyb.PlaneRow(1, y1);
|
||||
const float* row_in2 = xyb.PlaneRow(1, y2);
|
||||
const float* row_in = rect_in.ConstPlaneRow(xyb, 1, y);
|
||||
const float* row_in1 = rect_in.ConstPlaneRow(xyb, 1, y1);
|
||||
const float* row_in2 = rect_in.ConstPlaneRow(xyb, 1, y2);
|
||||
float* JXL_RESTRICT row_out = diff_buffer.Row(thread);
|
||||
|
||||
auto scalar_pixel = [&](size_t x) {
|
||||
@ -543,26 +534,25 @@ struct AdaptiveQuantizationImpl {
|
||||
}
|
||||
}
|
||||
Rect from_rect(x_start % 8 == 0 ? 0 : 1, y_start % 8 == 0 ? 0 : 1,
|
||||
rect.xsize() * 2, rect.ysize() * 2);
|
||||
FuzzyErosion(butteraugli_target, from_rect, pre_erosion[thread], rect,
|
||||
rect_out.xsize() * 2, rect_out.ysize() * 2);
|
||||
FuzzyErosion(butteraugli_target, from_rect, pre_erosion[thread], rect_out,
|
||||
&aq_map);
|
||||
for (size_t y = 0; y < rect.ysize(); ++y) {
|
||||
const float* aq_map_row = rect.ConstRow(aq_map, y);
|
||||
float* mask_row = rect.Row(mask, y);
|
||||
for (size_t x = 0; x < rect.xsize(); ++x) {
|
||||
for (size_t y = 0; y < rect_out.ysize(); ++y) {
|
||||
const float* aq_map_row = rect_out.ConstRow(aq_map, y);
|
||||
float* mask_row = rect_out.Row(mask, y);
|
||||
for (size_t x = 0; x < rect_out.xsize(); ++x) {
|
||||
mask_row[x] = ComputeMaskForAcStrategyUse(aq_map_row[x]);
|
||||
}
|
||||
}
|
||||
PerBlockModulations(butteraugli_target, xyb.Plane(0), xyb.Plane(1),
|
||||
xyb.Plane(2), scale, rect, &aq_map);
|
||||
xyb.Plane(2), rect_in, scale, rect_out, &aq_map);
|
||||
}
|
||||
std::vector<ImageF> pre_erosion;
|
||||
ImageF aq_map;
|
||||
ImageF diff_buffer;
|
||||
};
|
||||
|
||||
static void Blur1x1Masking(const FrameDimensions& frame_dim, ThreadPool* pool,
|
||||
ImageF* mask1x1) {
|
||||
static void Blur1x1Masking(ThreadPool* pool, ImageF* mask1x1) {
|
||||
// Blur the mask1x1 to obtain the masking image.
|
||||
// Before blurring it contains an image of absolute value of the
|
||||
// Laplacian of the intensity channel.
|
||||
@ -588,47 +578,47 @@ static void Blur1x1Masking(const FrameDimensions& frame_dim, ThreadPool* pool,
|
||||
{HWY_REP4(normalize_mul * kFilterMask1x1[1])},
|
||||
{HWY_REP4(normalize_mul * kFilterMask1x1[4])},
|
||||
{HWY_REP4(normalize_mul * kFilterMask1x1[3])}};
|
||||
Rect from_rect(0, 0, 8 * frame_dim.xsize_blocks, 8 * frame_dim.ysize_blocks);
|
||||
Rect from_rect(0, 0, mask1x1->xsize(), mask1x1->ysize());
|
||||
ImageF temp(mask1x1->xsize(), mask1x1->ysize());
|
||||
Symmetric5(*mask1x1, from_rect, weights, pool, &temp);
|
||||
CopyImageTo(temp, mask1x1); // TODO: make it a swap
|
||||
}
|
||||
|
||||
ImageF AdaptiveQuantizationMap(const float butteraugli_target,
|
||||
const Image3F& xyb,
|
||||
const FrameDimensions& frame_dim, float scale,
|
||||
ThreadPool* pool, ImageF* mask,
|
||||
const Image3F& xyb, const Rect& rect,
|
||||
float scale, ThreadPool* pool, ImageF* mask,
|
||||
ImageF* mask1x1) {
|
||||
JXL_DASSERT(rect.xsize() % kBlockDim == 0);
|
||||
JXL_DASSERT(rect.ysize() % kBlockDim == 0);
|
||||
AdaptiveQuantizationImpl impl;
|
||||
impl.Init(xyb);
|
||||
*mask = ImageF(frame_dim.xsize_blocks, frame_dim.ysize_blocks);
|
||||
*mask1x1 = ImageF(8 * frame_dim.xsize_blocks, 8 * frame_dim.ysize_blocks);
|
||||
const size_t xsize_blocks = rect.xsize() / kBlockDim;
|
||||
const size_t ysize_blocks = rect.ysize() / kBlockDim;
|
||||
impl.aq_map = ImageF(xsize_blocks, ysize_blocks);
|
||||
*mask = ImageF(xsize_blocks, ysize_blocks);
|
||||
*mask1x1 = ImageF(rect.xsize(), rect.ysize());
|
||||
JXL_CHECK(RunOnPool(
|
||||
pool, 0,
|
||||
DivCeil(frame_dim.xsize_blocks, kEncTileDimInBlocks) *
|
||||
DivCeil(frame_dim.ysize_blocks, kEncTileDimInBlocks),
|
||||
DivCeil(xsize_blocks, kEncTileDimInBlocks) *
|
||||
DivCeil(ysize_blocks, kEncTileDimInBlocks),
|
||||
[&](const size_t num_threads) {
|
||||
impl.PrepareBuffers(num_threads);
|
||||
return true;
|
||||
},
|
||||
[&](const uint32_t tid, const size_t thread) {
|
||||
size_t n_enc_tiles =
|
||||
DivCeil(frame_dim.xsize_blocks, kEncTileDimInBlocks);
|
||||
size_t n_enc_tiles = DivCeil(xsize_blocks, kEncTileDimInBlocks);
|
||||
size_t tx = tid % n_enc_tiles;
|
||||
size_t ty = tid / n_enc_tiles;
|
||||
size_t by0 = ty * kEncTileDimInBlocks;
|
||||
size_t by1 =
|
||||
std::min((ty + 1) * kEncTileDimInBlocks, frame_dim.ysize_blocks);
|
||||
size_t by1 = std::min((ty + 1) * kEncTileDimInBlocks, ysize_blocks);
|
||||
size_t bx0 = tx * kEncTileDimInBlocks;
|
||||
size_t bx1 =
|
||||
std::min((tx + 1) * kEncTileDimInBlocks, frame_dim.xsize_blocks);
|
||||
Rect r(bx0, by0, bx1 - bx0, by1 - by0);
|
||||
impl.ComputeTile(butteraugli_target, scale, xyb, r, thread, mask,
|
||||
mask1x1);
|
||||
size_t bx1 = std::min((tx + 1) * kEncTileDimInBlocks, xsize_blocks);
|
||||
Rect rect_out(bx0, by0, bx1 - bx0, by1 - by0);
|
||||
impl.ComputeTile(butteraugli_target, scale, xyb, rect, rect_out, thread,
|
||||
mask, mask1x1);
|
||||
},
|
||||
"AQ DiffPrecompute"));
|
||||
|
||||
Blur1x1Masking(frame_dim, pool, mask1x1);
|
||||
Blur1x1Masking(pool, mask1x1);
|
||||
return std::move(impl).aq_map;
|
||||
}
|
||||
|
||||
@ -770,8 +760,9 @@ ImageBundle RoundtripImage(const FrameHeader& frame_header,
|
||||
size_t num_special_frames = enc_state->special_frames.size();
|
||||
size_t num_passes = enc_state->progressive_splitter.GetNumPasses();
|
||||
ModularFrameEncoder modular_frame_encoder(frame_header, enc_state->cparams);
|
||||
JXL_CHECK(InitializePassesEncoder(frame_header, opsin, cms, pool, enc_state,
|
||||
&modular_frame_encoder, nullptr));
|
||||
JXL_CHECK(InitializePassesEncoder(frame_header, opsin, Rect(opsin), cms, pool,
|
||||
enc_state, &modular_frame_encoder,
|
||||
nullptr));
|
||||
JXL_CHECK(dec_state->Init(frame_header));
|
||||
JXL_CHECK(dec_state->InitForAC(num_passes, pool));
|
||||
|
||||
@ -830,7 +821,7 @@ constexpr int kMaxButteraugliIters = 4;
|
||||
|
||||
void FindBestQuantization(const FrameHeader& frame_header,
|
||||
const Image3F& linear, const Image3F& opsin,
|
||||
PassesEncoderState* enc_state,
|
||||
ImageF& quant_field, PassesEncoderState* enc_state,
|
||||
const JxlCmsInterface& cms, ThreadPool* pool,
|
||||
AuxOut* aux_out) {
|
||||
const CompressParams& cparams = enc_state->cparams;
|
||||
@ -843,7 +834,6 @@ void FindBestQuantization(const FrameHeader& frame_header,
|
||||
}
|
||||
Quantizer& quantizer = enc_state->shared.quantizer;
|
||||
ImageI& raw_quant_field = enc_state->shared.raw_quant_field;
|
||||
ImageF& quant_field = enc_state->initial_quant_field;
|
||||
|
||||
const float butteraugli_target = cparams.butteraugli_distance;
|
||||
const float original_butteraugli = cparams.original_butteraugli_distance;
|
||||
@ -998,7 +988,7 @@ void FindBestQuantization(const FrameHeader& frame_header,
|
||||
}
|
||||
|
||||
void FindBestQuantizationMaxError(const FrameHeader& frame_header,
|
||||
const Image3F& opsin,
|
||||
const Image3F& opsin, ImageF& quant_field,
|
||||
PassesEncoderState* enc_state,
|
||||
const JxlCmsInterface& cms, ThreadPool* pool,
|
||||
AuxOut* aux_out) {
|
||||
@ -1006,7 +996,6 @@ void FindBestQuantizationMaxError(const FrameHeader& frame_header,
|
||||
const CompressParams& cparams = enc_state->cparams;
|
||||
Quantizer& quantizer = enc_state->shared.quantizer;
|
||||
ImageI& raw_quant_field = enc_state->shared.raw_quant_field;
|
||||
ImageF& quant_field = enc_state->initial_quant_field;
|
||||
|
||||
// TODO(veluca): better choice of this value.
|
||||
const float initial_quant_dc =
|
||||
@ -1138,26 +1127,26 @@ float InitialQuantDC(float butteraugli_target) {
|
||||
}
|
||||
|
||||
ImageF InitialQuantField(const float butteraugli_target, const Image3F& opsin,
|
||||
const FrameDimensions& frame_dim, ThreadPool* pool,
|
||||
float rescale, ImageF* mask, ImageF* mask1x1) {
|
||||
const Rect& rect, ThreadPool* pool, float rescale,
|
||||
ImageF* mask, ImageF* mask1x1) {
|
||||
const float quant_ac = kAcQuant / butteraugli_target;
|
||||
return HWY_DYNAMIC_DISPATCH(AdaptiveQuantizationMap)(
|
||||
butteraugli_target, opsin, frame_dim, quant_ac * rescale, pool, mask,
|
||||
mask1x1);
|
||||
butteraugli_target, opsin, rect, quant_ac * rescale, pool, mask, mask1x1);
|
||||
}
|
||||
|
||||
void FindBestQuantizer(const FrameHeader& frame_header, const Image3F* linear,
|
||||
const Image3F& opsin, PassesEncoderState* enc_state,
|
||||
const Image3F& opsin, ImageF& quant_field,
|
||||
PassesEncoderState* enc_state,
|
||||
const JxlCmsInterface& cms, ThreadPool* pool,
|
||||
AuxOut* aux_out, double rescale) {
|
||||
const CompressParams& cparams = enc_state->cparams;
|
||||
if (cparams.max_error_mode) {
|
||||
FindBestQuantizationMaxError(frame_header, opsin, enc_state, cms, pool,
|
||||
aux_out);
|
||||
FindBestQuantizationMaxError(frame_header, opsin, quant_field, enc_state,
|
||||
cms, pool, aux_out);
|
||||
} else if (linear && cparams.speed_tier <= SpeedTier::kKitten) {
|
||||
// Normal encoding to a butteraugli score.
|
||||
FindBestQuantization(frame_header, *linear, opsin, enc_state, cms, pool,
|
||||
aux_out);
|
||||
FindBestQuantization(frame_header, *linear, opsin, quant_field, enc_state,
|
||||
cms, pool, aux_out);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -37,8 +37,8 @@ struct AuxOut;
|
||||
// fine-grained quantization should be enough. Returns a mask, too, which
|
||||
// can later be used to make better decisions about ac strategy.
|
||||
ImageF InitialQuantField(float butteraugli_target, const Image3F& opsin,
|
||||
const FrameDimensions& frame_dim, ThreadPool* pool,
|
||||
float rescale, ImageF* initial_quant_mask,
|
||||
const Rect& rect, ThreadPool* pool, float rescale,
|
||||
ImageF* initial_quant_mask,
|
||||
ImageF* initial_quant_mask1x1);
|
||||
|
||||
float InitialQuantDC(float butteraugli_target);
|
||||
@ -51,7 +51,8 @@ void AdjustQuantField(const AcStrategyImage& ac_strategy, const Rect& rect,
|
||||
// dequant_float_map and chosen quantization levels.
|
||||
// `linear` is only used in Kitten mode or slower.
|
||||
void FindBestQuantizer(const FrameHeader& frame_header, const Image3F* linear,
|
||||
const Image3F& opsin, PassesEncoderState* enc_state,
|
||||
const Image3F& opsin, ImageF& quant_field,
|
||||
PassesEncoderState* enc_state,
|
||||
const JxlCmsInterface& cms, ThreadPool* pool,
|
||||
AuxOut* aux_out, double rescale = 1.0);
|
||||
|
||||
|
@ -40,18 +40,14 @@ using hwy::HWY_NAMESPACE::Mul;
|
||||
using hwy::HWY_NAMESPACE::MulAdd;
|
||||
using hwy::HWY_NAMESPACE::Sqrt;
|
||||
|
||||
void ProcessTile(const FrameHeader& frame_header, const Image3F& opsin,
|
||||
PassesEncoderState* enc_state, const Rect& rect,
|
||||
void ProcessTile(const CompressParams& cparams, const FrameHeader& frame_header,
|
||||
const Image3F& opsin, const Rect& opsin_rect,
|
||||
const ImageF& quant_field, const AcStrategyImage& ac_strategy,
|
||||
ImageB* epf_sharpness, const Rect& rect,
|
||||
ArControlFieldHeuristics::TempImages* temp_image) {
|
||||
constexpr size_t N = kBlockDim;
|
||||
ImageB* JXL_RESTRICT epf_sharpness = &enc_state->shared.epf_sharpness;
|
||||
ImageF* JXL_RESTRICT quant = &enc_state->initial_quant_field;
|
||||
JXL_ASSERT(
|
||||
epf_sharpness->xsize() == enc_state->shared.frame_dim.xsize_blocks &&
|
||||
epf_sharpness->ysize() == enc_state->shared.frame_dim.ysize_blocks);
|
||||
|
||||
if (enc_state->cparams.butteraugli_distance < kMinButteraugliForDynamicAR ||
|
||||
enc_state->cparams.speed_tier > SpeedTier::kWombat ||
|
||||
if (cparams.butteraugli_distance < kMinButteraugliForDynamicAR ||
|
||||
cparams.speed_tier > SpeedTier::kWombat ||
|
||||
frame_header.loop_filter.epf_iters == 0) {
|
||||
FillPlane(static_cast<uint8_t>(4), epf_sharpness, rect);
|
||||
return;
|
||||
@ -76,11 +72,13 @@ void ProcessTile(const FrameHeader& frame_header, const Image3F& opsin,
|
||||
// (for example 32x32 dct). This relates to transforms ability
|
||||
// to propagate artefacts.
|
||||
size_t y0 = by0 == 0 ? 2 : 0;
|
||||
size_t y1 = by1 * N + 4 <= opsin.ysize() + 2 ? (by1 - by0) * N + 4
|
||||
: opsin.ysize() + 2 - by0 * N;
|
||||
size_t y1 = by1 * N + 4 <= opsin_rect.ysize() + 2
|
||||
? (by1 - by0) * N + 4
|
||||
: opsin_rect.ysize() + 2 - by0 * N;
|
||||
size_t x0 = bx0 == 0 ? 2 : 0;
|
||||
size_t x1 = bx1 * N + 4 <= opsin.xsize() + 2 ? (bx1 - bx0) * N + 4
|
||||
: opsin.xsize() + 2 - bx0 * N;
|
||||
size_t x1 = bx1 * N + 4 <= opsin_rect.xsize() + 2
|
||||
? (bx1 - bx0) * N + 4
|
||||
: opsin_rect.xsize() + 2 - bx0 * N;
|
||||
HWY_FULL(float) df;
|
||||
for (size_t y = y0; y < y1; y++) {
|
||||
float* JXL_RESTRICT laplacian_sqrsum_row = laplacian_sqrsum.Row(y);
|
||||
@ -89,14 +87,15 @@ void ProcessTile(const FrameHeader& frame_header, const Image3F& opsin,
|
||||
const float* JXL_RESTRICT in_row[3];
|
||||
const float* JXL_RESTRICT in_row_b[3];
|
||||
for (size_t c = 0; c < 3; c++) {
|
||||
in_row_t[c] = opsin.PlaneRow(c, cy > 0 ? cy - 1 : cy);
|
||||
in_row[c] = opsin.PlaneRow(c, cy);
|
||||
in_row_b[c] = opsin.PlaneRow(c, cy + 1 < opsin.ysize() ? cy + 1 : cy);
|
||||
in_row_t[c] = opsin_rect.ConstPlaneRow(opsin, c, cy > 0 ? cy - 1 : cy);
|
||||
in_row[c] = opsin_rect.ConstPlaneRow(opsin, c, cy);
|
||||
in_row_b[c] = opsin_rect.ConstPlaneRow(
|
||||
opsin, c, cy + 1 < opsin_rect.ysize() ? cy + 1 : cy);
|
||||
}
|
||||
auto compute_laplacian_scalar = [&](size_t x) {
|
||||
size_t cx = x + bx0 * N - 2;
|
||||
const size_t prevX = cx >= 1 ? cx - 1 : cx;
|
||||
const size_t nextX = cx + 1 < opsin.xsize() ? cx + 1 : cx;
|
||||
const size_t nextX = cx + 1 < opsin_rect.xsize() ? cx + 1 : cx;
|
||||
float sumsqr = 0;
|
||||
for (size_t c = 0; c < 3; c++) {
|
||||
float laplacian =
|
||||
@ -114,7 +113,8 @@ void ProcessTile(const FrameHeader& frame_header, const Image3F& opsin,
|
||||
compute_laplacian_scalar(x);
|
||||
}
|
||||
// Interior. One extra pixel of border as the last pixel is special.
|
||||
for (; x + Lanes(df) <= x1 && x + Lanes(df) + bx0 * N - 1 <= opsin.xsize();
|
||||
for (; x + Lanes(df) <= x1 &&
|
||||
x + Lanes(df) + bx0 * N - 1 <= opsin_rect.xsize();
|
||||
x += Lanes(df)) {
|
||||
size_t cx = x + bx0 * N - 2;
|
||||
auto sumsqr = Zero(df);
|
||||
@ -182,16 +182,16 @@ void ProcessTile(const FrameHeader& frame_header, const Image3F& opsin,
|
||||
// ignore pixels outside the image.
|
||||
// Y coordinates are relative to by0*8+y*4.
|
||||
size_t sy = y * 4 + by0 * 8 > 0 ? 0 : 2;
|
||||
size_t ey = y * 4 + by0 * 8 + 4 <= opsin.ysize() + 2
|
||||
size_t ey = y * 4 + by0 * 8 + 4 <= opsin_rect.ysize() + 2
|
||||
? 4
|
||||
: opsin.ysize() - y * 4 - by0 * 8 + 2;
|
||||
: opsin_rect.ysize() - y * 4 - by0 * 8 + 2;
|
||||
for (size_t x = 0; x < (bx1 - bx0) * 2 + 1; x++) {
|
||||
// ignore pixels outside the image.
|
||||
// X coordinates are relative to bx0*8.
|
||||
size_t sx = x * 4 + bx0 * 8 > 0 ? x * 4 : x * 4 + 2;
|
||||
size_t ex = x * 4 + bx0 * 8 + 4 <= opsin.xsize() + 2
|
||||
size_t ex = x * 4 + bx0 * 8 + 4 <= opsin_rect.xsize() + 2
|
||||
? x * 4 + 4
|
||||
: opsin.xsize() - bx0 * 8 + 2;
|
||||
: opsin_rect.xsize() - bx0 * 8 + 2;
|
||||
if (ex - sx == 4 && ey - sy == 4) {
|
||||
auto sum = Zero(df4);
|
||||
for (size_t iy = 0; iy < 4; iy++) {
|
||||
@ -212,9 +212,9 @@ void ProcessTile(const FrameHeader& frame_header, const Image3F& opsin,
|
||||
}
|
||||
}
|
||||
for (size_t by = by0; by < by1; by++) {
|
||||
AcStrategyRow acs_row = enc_state->shared.ac_strategy.ConstRow(by);
|
||||
AcStrategyRow acs_row = ac_strategy.ConstRow(by);
|
||||
uint8_t* JXL_RESTRICT out_row = epf_sharpness->Row(by);
|
||||
float* JXL_RESTRICT quant_row = quant->Row(by);
|
||||
const float* JXL_RESTRICT quant_row = quant_field.Row(by);
|
||||
for (size_t bx = bx0; bx < bx1; bx++) {
|
||||
AcStrategy acs = acs_row[bx];
|
||||
if (!acs.IsFirstBlock()) continue;
|
||||
@ -311,13 +311,14 @@ HWY_AFTER_NAMESPACE();
|
||||
namespace jxl {
|
||||
HWY_EXPORT(ProcessTile);
|
||||
|
||||
void ArControlFieldHeuristics::RunRect(const FrameHeader& frame_header,
|
||||
const Rect& block_rect,
|
||||
const Image3F& opsin,
|
||||
PassesEncoderState* enc_state,
|
||||
size_t thread) {
|
||||
void ArControlFieldHeuristics::RunRect(
|
||||
const CompressParams& cparams, const FrameHeader& frame_header,
|
||||
const Rect& block_rect, const Image3F& opsin, const Rect& opsin_rect,
|
||||
const ImageF& quant_field, const AcStrategyImage& ac_strategy,
|
||||
ImageB* epf_sharpness, size_t thread) {
|
||||
HWY_DYNAMIC_DISPATCH(ProcessTile)
|
||||
(frame_header, opsin, enc_state, block_rect, &temp_images[thread]);
|
||||
(cparams, frame_header, opsin, opsin_rect, quant_field, ac_strategy,
|
||||
epf_sharpness, block_rect, &temp_images[thread]);
|
||||
}
|
||||
|
||||
} // namespace jxl
|
||||
|
@ -36,14 +36,13 @@ struct ArControlFieldHeuristics {
|
||||
temp_images.resize(num_threads);
|
||||
}
|
||||
|
||||
void RunRect(const FrameHeader& frame_header, const Rect& block_rect,
|
||||
const Image3F& opsin, PassesEncoderState* enc_state,
|
||||
void RunRect(const CompressParams& cparams, const FrameHeader& frame_header,
|
||||
const Rect& block_rect, const Image3F& opsin,
|
||||
const Rect& opsin_rect, const ImageF& quant_field,
|
||||
const AcStrategyImage& ac_strategy, ImageB* epf_sharpness,
|
||||
size_t thread);
|
||||
|
||||
std::vector<TempImages> temp_images;
|
||||
ImageB* epf_sharpness;
|
||||
ImageF* quant;
|
||||
bool all_default;
|
||||
};
|
||||
|
||||
} // namespace jxl
|
||||
|
4
third_party/jpeg-xl/lib/jxl/enc_aux_out.cc
vendored
4
third_party/jpeg-xl/lib/jxl/enc_aux_out.cc
vendored
@ -5,6 +5,10 @@
|
||||
|
||||
#include "lib/jxl/enc_aux_out.h"
|
||||
|
||||
#ifndef __STDC_FORMAT_MACROS
|
||||
#define __STDC_FORMAT_MACROS
|
||||
#endif
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
7
third_party/jpeg-xl/lib/jxl/enc_cache.cc
vendored
7
third_party/jpeg-xl/lib/jxl/enc_cache.cc
vendored
@ -35,8 +35,9 @@
|
||||
namespace jxl {
|
||||
|
||||
Status InitializePassesEncoder(const FrameHeader& frame_header,
|
||||
const Image3F& opsin, const JxlCmsInterface& cms,
|
||||
ThreadPool* pool, PassesEncoderState* enc_state,
|
||||
const Image3F& opsin, const Rect& rect,
|
||||
const JxlCmsInterface& cms, ThreadPool* pool,
|
||||
PassesEncoderState* enc_state,
|
||||
ModularFrameEncoder* modular_frame_encoder,
|
||||
AuxOut* aux_out) {
|
||||
PassesSharedState& JXL_RESTRICT shared = enc_state->shared;
|
||||
@ -68,7 +69,7 @@ Status InitializePassesEncoder(const FrameHeader& frame_header,
|
||||
JXL_RETURN_IF_ERROR(RunOnPool(
|
||||
pool, 0, shared.frame_dim.num_groups, ThreadPool::NoInit,
|
||||
[&](size_t group_idx, size_t _) {
|
||||
ComputeCoefficients(group_idx, enc_state, opsin, &dc);
|
||||
ComputeCoefficients(group_idx, enc_state, opsin, rect, &dc);
|
||||
},
|
||||
"Compute coeffs"));
|
||||
|
||||
|
8
third_party/jpeg-xl/lib/jxl/enc_cache.h
vendored
8
third_party/jpeg-xl/lib/jxl/enc_cache.h
vendored
@ -40,10 +40,6 @@ struct PassesEncoderState {
|
||||
bool initialize_global_state = true;
|
||||
size_t dc_group_index = 0;
|
||||
|
||||
ImageF initial_quant_field; // Invalid in Falcon mode.
|
||||
ImageF initial_quant_masking; // Invalid in Falcon mode.
|
||||
ImageF initial_quant_masking1x1; // Invalid in Falcon mode.
|
||||
|
||||
// Per-pass DCT coefficients for the image. One row per group.
|
||||
std::vector<std::unique_ptr<ACImage>> coeffs;
|
||||
|
||||
@ -77,8 +73,8 @@ struct PassesEncoderState {
|
||||
// Initialize per-frame information.
|
||||
class ModularFrameEncoder;
|
||||
Status InitializePassesEncoder(const FrameHeader& frame_header,
|
||||
const Image3F& opsin, const JxlCmsInterface& cms,
|
||||
ThreadPool* pool,
|
||||
const Image3F& opsin, const Rect& rect,
|
||||
const JxlCmsInterface& cms, ThreadPool* pool,
|
||||
PassesEncoderState* passes_enc_state,
|
||||
ModularFrameEncoder* modular_frame_encoder,
|
||||
AuxOut* aux_out);
|
||||
|
@ -205,22 +205,23 @@ void ComputeDC(const ImageF& dc_values, bool fast, int32_t* dc_x,
|
||||
jxl::cms::kYToBRatio, kDistanceMultiplierDC, fast);
|
||||
}
|
||||
|
||||
void ComputeTile(const Image3F& opsin, const DequantMatrices& dequant,
|
||||
void ComputeTile(const Image3F& opsin, const Rect& opsin_rect,
|
||||
const DequantMatrices& dequant,
|
||||
const AcStrategyImage* ac_strategy,
|
||||
const ImageI* raw_quant_field, const Quantizer* quantizer,
|
||||
const Rect& r, bool fast, bool use_dct8, ImageSB* map_x,
|
||||
const Rect& rect, bool fast, bool use_dct8, ImageSB* map_x,
|
||||
ImageSB* map_b, ImageF* dc_values, float* mem) {
|
||||
static_assert(kEncTileDimInBlocks == kColorTileDimInBlocks,
|
||||
"Invalid color tile dim");
|
||||
size_t xsize_blocks = opsin.xsize() / kBlockDim;
|
||||
size_t xsize_blocks = opsin_rect.xsize() / kBlockDim;
|
||||
constexpr float kDistanceMultiplierAC = 1e-9f;
|
||||
const size_t dct_scratch_size =
|
||||
3 * (MaxVectorSize() / sizeof(float)) * AcStrategy::kMaxBlockDim;
|
||||
|
||||
const size_t y0 = r.y0();
|
||||
const size_t x0 = r.x0();
|
||||
const size_t x1 = r.x0() + r.xsize();
|
||||
const size_t y1 = r.y0() + r.ysize();
|
||||
const size_t y0 = rect.y0();
|
||||
const size_t x0 = rect.x0();
|
||||
const size_t x1 = rect.x0() + rect.xsize();
|
||||
const size_t y1 = rect.y0() + rect.ysize();
|
||||
|
||||
int ty = y0 / kColorTileDimInBlocks;
|
||||
int tx = x0 / kColorTileDimInBlocks;
|
||||
@ -257,9 +258,12 @@ void ComputeTile(const Image3F& opsin, const DequantMatrices& dequant,
|
||||
size_t num_ac = 0;
|
||||
|
||||
for (size_t y = y0; y < y1; ++y) {
|
||||
const float* JXL_RESTRICT row_y = opsin.ConstPlaneRow(1, y * kBlockDim);
|
||||
const float* JXL_RESTRICT row_x = opsin.ConstPlaneRow(0, y * kBlockDim);
|
||||
const float* JXL_RESTRICT row_b = opsin.ConstPlaneRow(2, y * kBlockDim);
|
||||
const float* JXL_RESTRICT row_y =
|
||||
opsin_rect.ConstPlaneRow(opsin, 1, y * kBlockDim);
|
||||
const float* JXL_RESTRICT row_x =
|
||||
opsin_rect.ConstPlaneRow(opsin, 0, y * kBlockDim);
|
||||
const float* JXL_RESTRICT row_b =
|
||||
opsin_rect.ConstPlaneRow(opsin, 2, y * kBlockDim);
|
||||
size_t stride = opsin.PixelsPerRow();
|
||||
|
||||
for (size_t x = x0; x < x1; x++) {
|
||||
@ -362,14 +366,15 @@ HWY_EXPORT(InitDCStorage);
|
||||
HWY_EXPORT(ComputeDC);
|
||||
HWY_EXPORT(ComputeTile);
|
||||
|
||||
void CfLHeuristics::Init(const Image3F& opsin) {
|
||||
size_t xsize_blocks = opsin.xsize() / kBlockDim;
|
||||
size_t ysize_blocks = opsin.ysize() / kBlockDim;
|
||||
void CfLHeuristics::Init(const Rect& rect) {
|
||||
size_t xsize_blocks = rect.xsize() / kBlockDim;
|
||||
size_t ysize_blocks = rect.ysize() / kBlockDim;
|
||||
HWY_DYNAMIC_DISPATCH(InitDCStorage)
|
||||
(xsize_blocks * ysize_blocks, &dc_values);
|
||||
}
|
||||
|
||||
void CfLHeuristics::ComputeTile(const Rect& r, const Image3F& opsin,
|
||||
const Rect& opsin_rect,
|
||||
const DequantMatrices& dequant,
|
||||
const AcStrategyImage* ac_strategy,
|
||||
const ImageI* raw_quant_field,
|
||||
@ -377,8 +382,8 @@ void CfLHeuristics::ComputeTile(const Rect& r, const Image3F& opsin,
|
||||
size_t thread, ColorCorrelationMap* cmap) {
|
||||
bool use_dct8 = ac_strategy == nullptr;
|
||||
HWY_DYNAMIC_DISPATCH(ComputeTile)
|
||||
(opsin, dequant, ac_strategy, raw_quant_field, quantizer, r, fast, use_dct8,
|
||||
&cmap->ytox_map, &cmap->ytob_map, &dc_values,
|
||||
(opsin, opsin_rect, dequant, ac_strategy, raw_quant_field, quantizer, r, fast,
|
||||
use_dct8, &cmap->ytox_map, &cmap->ytob_map, &dc_values,
|
||||
mem.get() + thread * ItemsPerThread());
|
||||
}
|
||||
|
||||
|
@ -29,13 +29,13 @@ void ColorCorrelationMapEncodeDC(const ColorCorrelationMap& map,
|
||||
AuxOut* aux_out);
|
||||
|
||||
struct CfLHeuristics {
|
||||
void Init(const Image3F& opsin);
|
||||
void Init(const Rect& rect);
|
||||
|
||||
void PrepareForThreads(size_t num_threads) {
|
||||
mem = hwy::AllocateAligned<float>(num_threads * ItemsPerThread());
|
||||
}
|
||||
|
||||
void ComputeTile(const Rect& r, const Image3F& opsin,
|
||||
void ComputeTile(const Rect& r, const Image3F& opsin, const Rect& opsin_rect,
|
||||
const DequantMatrices& dequant,
|
||||
const AcStrategyImage* ac_strategy,
|
||||
const ImageI* raw_quant_field, const Quantizer* quantizer,
|
||||
|
40
third_party/jpeg-xl/lib/jxl/enc_cluster.cc
vendored
40
third_party/jpeg-xl/lib/jxl/enc_cluster.cc
vendored
@ -82,13 +82,33 @@ float HistogramDistance(const Histogram& a, const Histogram& b) {
|
||||
return total_distance - a.entropy_ - b.entropy_;
|
||||
}
|
||||
|
||||
bool HasNewSymbol(const Histogram& a, const Histogram& b) {
|
||||
for (size_t i = 0; i < a.data_.size(); ++i) {
|
||||
if (a.data_[i] > 0 && (i >= b.data_.size() || b.data_[i] == 0)) {
|
||||
return true;
|
||||
}
|
||||
constexpr const float kInfinity = std::numeric_limits<float>::infinity();
|
||||
|
||||
float HistogramKLDivergence(const Histogram& actual, const Histogram& coding) {
|
||||
if (actual.total_count_ == 0) return 0;
|
||||
if (coding.total_count_ == 0) return kInfinity;
|
||||
|
||||
const HWY_CAPPED(float, Histogram::kRounding) df;
|
||||
const HWY_CAPPED(int32_t, Histogram::kRounding) di;
|
||||
|
||||
const auto coding_inv = Set(df, 1.0f / coding.total_count_);
|
||||
auto cost_lanes = Zero(df);
|
||||
|
||||
for (size_t i = 0; i < actual.data_.size(); i += Lanes(di)) {
|
||||
const auto counts = LoadU(di, &actual.data_[i]);
|
||||
const auto coding_counts =
|
||||
coding.data_.size() > i ? LoadU(di, &coding.data_[i]) : Zero(di);
|
||||
const auto coding_probs = Mul(ConvertTo(df, coding_counts), coding_inv);
|
||||
const auto neg_coding_cost = BitCast(
|
||||
df,
|
||||
IfThenZeroElse(Eq(counts, Zero(di)),
|
||||
IfThenElse(Eq(coding_counts, Zero(di)),
|
||||
BitCast(di, Set(df, -kInfinity)),
|
||||
BitCast(di, FastLog2f(df, coding_probs)))));
|
||||
cost_lanes = NegMulAdd(ConvertTo(df, counts), neg_coding_cost, cost_lanes);
|
||||
}
|
||||
return false;
|
||||
const float total_cost = GetLane(SumOfLanes(df, cost_lanes));
|
||||
return total_cost - actual.entropy_;
|
||||
}
|
||||
|
||||
// First step of a k-means clustering with a fancy distance metric.
|
||||
@ -121,7 +141,7 @@ void FastClusterHistograms(const std::vector<Histogram>& in,
|
||||
for (size_t i = 0; i < in.size(); i++) {
|
||||
if (dists[i] == 0.0f) continue;
|
||||
for (size_t j = 0; j < prev_histograms; ++j) {
|
||||
dists[i] = std::min(HistogramDistance(in[i], (*out)[j]), dists[i]);
|
||||
dists[i] = std::min(HistogramKLDivergence(in[i], (*out)[j]), dists[i]);
|
||||
}
|
||||
}
|
||||
auto max_dist = std::max_element(dists.begin(), dists.end());
|
||||
@ -149,10 +169,8 @@ void FastClusterHistograms(const std::vector<Histogram>& in,
|
||||
size_t best = 0;
|
||||
float best_dist = std::numeric_limits<float>::max();
|
||||
for (size_t j = 0; j < out->size(); j++) {
|
||||
// TODO(szabadka) Choose a different distance metric for previous
|
||||
// histograms (i.e. the cost of the new histogram with the old one).
|
||||
if (j < prev_histograms && HasNewSymbol(in[i], (*out)[j])) continue;
|
||||
float dist = HistogramDistance(in[i], (*out)[j]);
|
||||
float dist = j < prev_histograms ? HistogramKLDivergence(in[i], (*out)[j])
|
||||
: HistogramDistance(in[i], (*out)[j]);
|
||||
if (dist < best_dist) {
|
||||
best = j;
|
||||
best_dist = dist;
|
||||
|
80
third_party/jpeg-xl/lib/jxl/enc_fast_lossless.cc
vendored
80
third_party/jpeg-xl/lib/jxl/enc_fast_lossless.cc
vendored
@ -706,12 +706,12 @@ void JxlFastLosslessPrepareHeader(JxlFastLosslessFrameState* frame,
|
||||
if (!is_last) {
|
||||
output->Write(2, 0b00); // can not be saved as reference
|
||||
}
|
||||
output->Write(2, 0b00); // a frame has no name
|
||||
output->Write(1, 0); // loop filter is not all_default
|
||||
output->Write(1, 0); // no gaborish
|
||||
output->Write(2, 0); // 0 EPF iters
|
||||
output->Write(2, 0b00); // No LF extensions
|
||||
output->Write(2, 0b00); // No FH extensions
|
||||
output->Write(2, 0b00); // a frame has no name
|
||||
output->Write(1, 0); // loop filter is not all_default
|
||||
output->Write(1, 0); // no gaborish
|
||||
output->Write(2, 0); // 0 EPF iters
|
||||
output->Write(2, 0b00); // No LF extensions
|
||||
output->Write(2, 0b00); // No FH extensions
|
||||
|
||||
output->Write(1, 0); // No TOC permutation
|
||||
output->ZeroPadToByte(); // TOC is byte-aligned.
|
||||
@ -3632,13 +3632,11 @@ bool detect_palette(const unsigned char* r, size_t width,
|
||||
return collided;
|
||||
}
|
||||
|
||||
// TODO(szabadka): Add some parameter to indicate whether the input is
|
||||
// truly streaming.
|
||||
template <typename BitDepth>
|
||||
JxlFastLosslessFrameState* LLPrepare(JxlChunkedFrameInputSource input,
|
||||
size_t width, size_t height,
|
||||
BitDepth bitdepth, size_t nb_chans,
|
||||
bool big_endian, int effort) {
|
||||
bool big_endian, int effort, int oneshot) {
|
||||
assert(width != 0);
|
||||
assert(height != 0);
|
||||
|
||||
@ -3647,7 +3645,7 @@ JxlFastLosslessFrameState* LLPrepare(JxlChunkedFrameInputSource input,
|
||||
std::vector<int16_t> lookup(kHashSize);
|
||||
lookup[0] = 0;
|
||||
int pcolors = 0;
|
||||
bool collided = effort < 2 || bitdepth.bitdepth != 8;
|
||||
bool collided = effort < 2 || bitdepth.bitdepth != 8 || !oneshot;
|
||||
for (size_t y0 = 0; y0 < height && !collided; y0 += 256) {
|
||||
size_t ys = std::min<size_t>(height - y0, 256);
|
||||
for (size_t x0 = 0; x0 < width && !collided; x0 += 256) {
|
||||
@ -3728,11 +3726,7 @@ JxlFastLosslessFrameState* LLPrepare(JxlChunkedFrameInputSource input,
|
||||
|
||||
bool onegroup = num_groups_x == 1 && num_groups_y == 1;
|
||||
|
||||
// sample the middle (effort * 2) rows of every group
|
||||
// TODO(szabadka): Optimize sampling rule for low-memory code-path.
|
||||
for (size_t g = 0; g < num_groups_y * num_groups_x; g++) {
|
||||
size_t xg = g % num_groups_x;
|
||||
size_t yg = g / num_groups_x;
|
||||
auto sample_rows = [&](size_t xg, size_t yg, size_t num_rows) {
|
||||
size_t y0 = yg * 256;
|
||||
size_t x0 = xg * 256;
|
||||
size_t ys = std::min<size_t>(height - y0, 256);
|
||||
@ -3742,12 +3736,31 @@ JxlFastLosslessFrameState* LLPrepare(JxlChunkedFrameInputSource input,
|
||||
input.get_color_channel_data_at(input.opaque, x0, y0, xs, ys, &stride);
|
||||
auto rgba = reinterpret_cast<const unsigned char*>(buffer);
|
||||
int y_begin = std::max<int>(0, ys - 2 * effort) / 2;
|
||||
int y_count = std::min<int>(2 * effort * ys / 256, y0 + ys - y_begin - 1);
|
||||
int y_count = std::min<int>(num_rows, y0 + ys - y_begin - 1);
|
||||
int x_max = xs / kChunkSize * kChunkSize;
|
||||
CollectSamples(rgba, 0, y_begin, x_max, stride, y_count, raw_counts,
|
||||
lz77_counts, onegroup, !collided, bitdepth, nb_chans,
|
||||
big_endian, lookup.data());
|
||||
input.release_buffer(input.opaque, buffer);
|
||||
};
|
||||
|
||||
// TODO(veluca): that `64` is an arbitrary constant, meant to correspond to
|
||||
// the point where the number of processed rows is large enough that loading
|
||||
// the entire image is cost-effective.
|
||||
if (oneshot || effort >= 64) {
|
||||
for (size_t g = 0; g < num_groups_y * num_groups_x; g++) {
|
||||
size_t xg = g % num_groups_x;
|
||||
size_t yg = g / num_groups_x;
|
||||
size_t y0 = yg * 256;
|
||||
size_t ys = std::min<size_t>(height - y0, 256);
|
||||
size_t num_rows = 2 * effort * ys / 256;
|
||||
sample_rows(xg, yg, num_rows);
|
||||
}
|
||||
} else {
|
||||
// sample the middle (effort * 2 * num_groups) rows of the center group
|
||||
// (possibly all of them).
|
||||
sample_rows((num_groups_x - 1) / 2, (num_groups_y - 1) / 2,
|
||||
2 * effort * num_groups_x * num_groups_y);
|
||||
}
|
||||
|
||||
// TODO(veluca): can probably improve this and make it bitdepth-dependent.
|
||||
@ -3946,24 +3959,25 @@ void LLProcess(JxlFastLosslessFrameState* frame_state, bool is_last,
|
||||
|
||||
JxlFastLosslessFrameState* JxlFastLosslessPrepareImpl(
|
||||
JxlChunkedFrameInputSource input, size_t width, size_t height,
|
||||
size_t nb_chans, size_t bitdepth, bool big_endian, int effort) {
|
||||
size_t nb_chans, size_t bitdepth, bool big_endian, int effort,
|
||||
int oneshot) {
|
||||
assert(bitdepth > 0);
|
||||
assert(nb_chans <= 4);
|
||||
assert(nb_chans != 0);
|
||||
if (bitdepth <= 8) {
|
||||
return LLPrepare(input, width, height, UpTo8Bits(bitdepth), nb_chans,
|
||||
big_endian, effort);
|
||||
big_endian, effort, oneshot);
|
||||
}
|
||||
if (bitdepth <= 13) {
|
||||
return LLPrepare(input, width, height, From9To13Bits(bitdepth), nb_chans,
|
||||
big_endian, effort);
|
||||
big_endian, effort, oneshot);
|
||||
}
|
||||
if (bitdepth == 14) {
|
||||
return LLPrepare(input, width, height, Exactly14Bits(bitdepth), nb_chans,
|
||||
big_endian, effort);
|
||||
big_endian, effort, oneshot);
|
||||
}
|
||||
return LLPrepare(input, width, height, MoreThan14Bits(bitdepth), nb_chans,
|
||||
big_endian, effort);
|
||||
big_endian, effort, oneshot);
|
||||
}
|
||||
|
||||
void JxlFastLosslessProcessFrameImpl(
|
||||
@ -4074,10 +4088,8 @@ class FJxlFrameInput {
|
||||
bytes_per_pixel_(bitdepth <= 8 ? nb_chans : 2 * nb_chans) {}
|
||||
|
||||
JxlChunkedFrameInputSource GetInputSource() {
|
||||
return JxlChunkedFrameInputSource{
|
||||
this,
|
||||
GetDataAt,
|
||||
[](void*, const void*) {}};
|
||||
return JxlChunkedFrameInputSource{this, GetDataAt,
|
||||
[](void*, const void*) {}};
|
||||
}
|
||||
|
||||
private:
|
||||
@ -4099,9 +4111,9 @@ size_t JxlFastLosslessEncode(const unsigned char* rgba, size_t width,
|
||||
unsigned char** output, void* runner_opaque,
|
||||
FJxlParallelRunner runner) {
|
||||
FJxlFrameInput input(rgba, row_stride, nb_chans, bitdepth);
|
||||
auto frame_state =
|
||||
JxlFastLosslessPrepareFrame(input.GetInputSource(), width, height,
|
||||
nb_chans, bitdepth, big_endian, effort);
|
||||
auto frame_state = JxlFastLosslessPrepareFrame(
|
||||
input.GetInputSource(), width, height, nb_chans, bitdepth, big_endian,
|
||||
effort, /*oneshot=*/true);
|
||||
JxlFastLosslessProcessFrame(frame_state, /*is_last=*/true, runner_opaque,
|
||||
runner, nullptr);
|
||||
JxlFastLosslessPrepareHeader(frame_state, /*add_image_header=*/1,
|
||||
@ -4121,25 +4133,25 @@ size_t JxlFastLosslessEncode(const unsigned char* rgba, size_t width,
|
||||
|
||||
JxlFastLosslessFrameState* JxlFastLosslessPrepareFrame(
|
||||
JxlChunkedFrameInputSource input, size_t width, size_t height,
|
||||
size_t nb_chans, size_t bitdepth, int big_endian, int effort) {
|
||||
size_t nb_chans, size_t bitdepth, int big_endian, int effort, int oneshot) {
|
||||
#if FJXL_ENABLE_AVX512
|
||||
if (__builtin_cpu_supports("avx512cd") &&
|
||||
__builtin_cpu_supports("avx512vbmi") &&
|
||||
__builtin_cpu_supports("avx512bw") && __builtin_cpu_supports("avx512f") &&
|
||||
__builtin_cpu_supports("avx512vl")) {
|
||||
return AVX512::JxlFastLosslessPrepareImpl(input, width, height, nb_chans,
|
||||
bitdepth, big_endian, effort);
|
||||
return AVX512::JxlFastLosslessPrepareImpl(
|
||||
input, width, height, nb_chans, bitdepth, big_endian, effort, oneshot);
|
||||
}
|
||||
#endif
|
||||
#if FJXL_ENABLE_AVX2
|
||||
if (__builtin_cpu_supports("avx2")) {
|
||||
return AVX2::JxlFastLosslessPrepareImpl(input, width, height, nb_chans,
|
||||
bitdepth, big_endian, effort);
|
||||
return AVX2::JxlFastLosslessPrepareImpl(
|
||||
input, width, height, nb_chans, bitdepth, big_endian, effort, oneshot);
|
||||
}
|
||||
#endif
|
||||
|
||||
return default_implementation::JxlFastLosslessPrepareImpl(
|
||||
input, width, height, nb_chans, bitdepth, big_endian, effort);
|
||||
input, width, height, nb_chans, bitdepth, big_endian, effort, oneshot);
|
||||
}
|
||||
|
||||
void JxlFastLosslessProcessFrame(
|
||||
|
@ -69,7 +69,7 @@ struct JxlFastLosslessFrameState;
|
||||
// JxlFastLosslessFreeFrameState.
|
||||
JxlFastLosslessFrameState* JxlFastLosslessPrepareFrame(
|
||||
JxlChunkedFrameInputSource input, size_t width, size_t height,
|
||||
size_t nb_chans, size_t bitdepth, int big_endian, int effort);
|
||||
size_t nb_chans, size_t bitdepth, int big_endian, int effort, int oneshot);
|
||||
|
||||
#if !FJXL_STANDALONE
|
||||
class JxlEncoderOutputProcessorWrapper;
|
||||
|
2
third_party/jpeg-xl/lib/jxl/enc_fields.cc
vendored
2
third_party/jpeg-xl/lib/jxl/enc_fields.cc
vendored
@ -5,6 +5,8 @@
|
||||
|
||||
#include "lib/jxl/enc_fields.h"
|
||||
|
||||
#include <cinttypes>
|
||||
|
||||
#include "lib/jxl/enc_aux_out.h"
|
||||
#include "lib/jxl/fields.h"
|
||||
|
||||
|
130
third_party/jpeg-xl/lib/jxl/enc_frame.cc
vendored
130
third_party/jpeg-xl/lib/jxl/enc_frame.cc
vendored
@ -121,8 +121,7 @@ uint32_t GetBitDepth(JxlBitDepth bit_depth, const T& metadata,
|
||||
}
|
||||
}
|
||||
|
||||
Status CopyColorChannels(JxlChunkedFrameInputSource input, size_t x0, size_t y0,
|
||||
size_t xsize, size_t ysize,
|
||||
Status CopyColorChannels(JxlChunkedFrameInputSource input, Rect rect,
|
||||
const FrameInfo& frame_info,
|
||||
const ImageMetadata& metadata, ThreadPool* pool,
|
||||
Image3F* color, ImageF* alpha,
|
||||
@ -133,7 +132,8 @@ Status CopyColorChannels(JxlChunkedFrameInputSource input, size_t x0, size_t y0,
|
||||
size_t bits_per_sample =
|
||||
GetBitDepth(frame_info.image_bit_depth, metadata, format);
|
||||
size_t row_offset;
|
||||
auto buffer = GetColorBuffer(input, x0, y0, xsize, ysize, &row_offset);
|
||||
auto buffer = GetColorBuffer(input, rect.x0(), rect.y0(), rect.xsize(),
|
||||
rect.ysize(), &row_offset);
|
||||
if (!buffer) {
|
||||
return JXL_FAILURE("no buffer for color channels given");
|
||||
}
|
||||
@ -145,12 +145,12 @@ Status CopyColorChannels(JxlChunkedFrameInputSource input, size_t x0, size_t y0,
|
||||
" color channels, received only %u channels",
|
||||
color_channels, format.num_channels);
|
||||
}
|
||||
*color = Image3F(xsize, ysize);
|
||||
*color = Image3F(rect.xsize(), rect.ysize());
|
||||
const uint8_t* data = reinterpret_cast<const uint8_t*>(buffer.get());
|
||||
for (size_t c = 0; c < color_channels; ++c) {
|
||||
JXL_RETURN_IF_ERROR(ConvertFromExternalNoSizeCheck(
|
||||
data, xsize, ysize, row_offset, bits_per_sample, format, c, pool,
|
||||
&color->Plane(c)));
|
||||
data, rect.xsize(), rect.ysize(), row_offset, bits_per_sample, format,
|
||||
c, pool, &color->Plane(c)));
|
||||
}
|
||||
if (color_channels == 1) {
|
||||
CopyImageTo(color->Plane(0), &color->Plane(1));
|
||||
@ -159,7 +159,7 @@ Status CopyColorChannels(JxlChunkedFrameInputSource input, size_t x0, size_t y0,
|
||||
if (alpha) {
|
||||
if (*has_interleaved_alpha) {
|
||||
JXL_RETURN_IF_ERROR(ConvertFromExternalNoSizeCheck(
|
||||
data, xsize, ysize, row_offset, bits_per_sample, format,
|
||||
data, rect.xsize(), rect.ysize(), row_offset, bits_per_sample, format,
|
||||
format.num_channels - 1, pool, alpha));
|
||||
} else {
|
||||
// if alpha is not passed, but it is expected, then assume
|
||||
@ -170,8 +170,7 @@ Status CopyColorChannels(JxlChunkedFrameInputSource input, size_t x0, size_t y0,
|
||||
return true;
|
||||
}
|
||||
|
||||
Status CopyExtraChannels(JxlChunkedFrameInputSource input, size_t x0, size_t y0,
|
||||
size_t xsize, size_t ysize,
|
||||
Status CopyExtraChannels(JxlChunkedFrameInputSource input, Rect rect,
|
||||
const FrameInfo& frame_info,
|
||||
const ImageMetadata& metadata,
|
||||
bool has_interleaved_alpha, ThreadPool* pool,
|
||||
@ -189,15 +188,16 @@ Status CopyExtraChannels(JxlChunkedFrameInputSource input, size_t x0, size_t y0,
|
||||
ec_format.num_channels = 1;
|
||||
size_t row_offset;
|
||||
auto buffer =
|
||||
GetExtraChannelBuffer(input, ec, x0, y0, xsize, ysize, &row_offset);
|
||||
GetExtraChannelBuffer(input, ec, rect.x0(), rect.y0(), rect.xsize(),
|
||||
rect.ysize(), &row_offset);
|
||||
if (!buffer) {
|
||||
return JXL_FAILURE("no buffer for extra channel given");
|
||||
}
|
||||
size_t bits_per_sample = GetBitDepth(
|
||||
frame_info.image_bit_depth, metadata.extra_channel_info[ec], ec_format);
|
||||
if (!ConvertFromExternalNoSizeCheck(
|
||||
reinterpret_cast<const uint8_t*>(buffer.get()), xsize, ysize,
|
||||
row_offset, bits_per_sample, ec_format, 0, pool,
|
||||
reinterpret_cast<const uint8_t*>(buffer.get()), rect.xsize(),
|
||||
rect.ysize(), row_offset, bits_per_sample, ec_format, 0, pool,
|
||||
&(*extra_channels)[ec])) {
|
||||
return JXL_FAILURE("Failed to set buffer for extra channel");
|
||||
}
|
||||
@ -258,12 +258,10 @@ Status LoopFilterFromParams(const CompressParams& cparams, bool streaming_mode,
|
||||
LoopFilter* loop_filter = &frame_header->loop_filter;
|
||||
|
||||
// Gaborish defaults to enabled in Hare or slower.
|
||||
// TODO(szabadka) Re-enable Gaborish for streaming mode.
|
||||
loop_filter->gab =
|
||||
ApplyOverride(cparams.gaborish,
|
||||
!streaming_mode && cparams.speed_tier <= SpeedTier::kHare &&
|
||||
frame_header->encoding == FrameEncoding::kVarDCT &&
|
||||
cparams.decoding_speed_tier < 4);
|
||||
loop_filter->gab = ApplyOverride(
|
||||
cparams.gaborish, cparams.speed_tier <= SpeedTier::kHare &&
|
||||
frame_header->encoding == FrameEncoding::kVarDCT &&
|
||||
cparams.decoding_speed_tier < 4);
|
||||
|
||||
if (cparams.epf != -1) {
|
||||
loop_filter->epf_iters = cparams.epf;
|
||||
@ -540,14 +538,14 @@ struct PixelStatsForChromacityAdjustment {
|
||||
float dx = 0;
|
||||
float db = 0;
|
||||
float exposed_blue = 0;
|
||||
float CalcPlane(const ImageF* JXL_RESTRICT plane) const {
|
||||
float CalcPlane(const ImageF* JXL_RESTRICT plane, const Rect& rect) const {
|
||||
float xmax = 0;
|
||||
float ymax = 0;
|
||||
for (size_t ty = 1; ty < plane->ysize(); ++ty) {
|
||||
for (size_t tx = 1; tx < plane->xsize(); ++tx) {
|
||||
float cur = plane->Row(ty)[tx];
|
||||
float prev_row = plane->Row(ty - 1)[tx];
|
||||
float prev = plane->Row(ty)[tx - 1];
|
||||
for (size_t ty = 1; ty < rect.ysize(); ++ty) {
|
||||
for (size_t tx = 1; tx < rect.xsize(); ++tx) {
|
||||
float cur = rect.Row(plane, ty)[tx];
|
||||
float prev_row = rect.Row(plane, ty - 1)[tx];
|
||||
float prev = rect.Row(plane, ty)[tx - 1];
|
||||
xmax = std::max(xmax, std::abs(cur - prev));
|
||||
ymax = std::max(ymax, std::abs(cur - prev_row));
|
||||
}
|
||||
@ -555,20 +553,20 @@ struct PixelStatsForChromacityAdjustment {
|
||||
return std::max(xmax, ymax);
|
||||
}
|
||||
void CalcExposedBlue(const ImageF* JXL_RESTRICT plane_y,
|
||||
const ImageF* JXL_RESTRICT plane_b) {
|
||||
const ImageF* JXL_RESTRICT plane_b, const Rect& rect) {
|
||||
float eb = 0;
|
||||
float xmax = 0;
|
||||
float ymax = 0;
|
||||
for (size_t ty = 1; ty < plane_y->ysize(); ++ty) {
|
||||
for (size_t tx = 1; tx < plane_y->xsize(); ++tx) {
|
||||
float cur_y = plane_y->Row(ty)[tx];
|
||||
float cur_b = plane_b->Row(ty)[tx];
|
||||
for (size_t ty = 1; ty < rect.ysize(); ++ty) {
|
||||
for (size_t tx = 1; tx < rect.xsize(); ++tx) {
|
||||
float cur_y = rect.Row(plane_y, ty)[tx];
|
||||
float cur_b = rect.Row(plane_b, ty)[tx];
|
||||
float exposed_b = cur_b - cur_y * 1.2;
|
||||
float diff_b = cur_b - cur_y;
|
||||
float prev_row = plane_b->Row(ty - 1)[tx];
|
||||
float prev = plane_b->Row(ty)[tx - 1];
|
||||
float diff_prev_row = prev_row - plane_y->Row(ty - 1)[tx];
|
||||
float diff_prev = prev - plane_y->Row(ty)[tx - 1];
|
||||
float prev_row = rect.Row(plane_b, ty - 1)[tx];
|
||||
float prev = rect.Row(plane_b, ty)[tx - 1];
|
||||
float diff_prev_row = prev_row - rect.Row(plane_y, ty - 1)[tx];
|
||||
float diff_prev = prev - rect.Row(plane_y, ty)[tx - 1];
|
||||
xmax = std::max(xmax, std::abs(diff_b - diff_prev));
|
||||
ymax = std::max(ymax, std::abs(diff_b - diff_prev_row));
|
||||
if (exposed_b >= 0) {
|
||||
@ -580,9 +578,9 @@ struct PixelStatsForChromacityAdjustment {
|
||||
exposed_blue = eb;
|
||||
db = std::max(xmax, ymax);
|
||||
}
|
||||
void Calc(const Image3F* JXL_RESTRICT opsin) {
|
||||
dx = CalcPlane(&opsin->Plane(0));
|
||||
CalcExposedBlue(&opsin->Plane(1), &opsin->Plane(2));
|
||||
void Calc(const Image3F* JXL_RESTRICT opsin, const Rect& rect) {
|
||||
dx = CalcPlane(&opsin->Plane(0), rect);
|
||||
CalcExposedBlue(&opsin->Plane(1), &opsin->Plane(2), rect);
|
||||
}
|
||||
int HowMuchIsXChannelPixelized() {
|
||||
if (dx >= 0.03) {
|
||||
@ -609,7 +607,7 @@ struct PixelStatsForChromacityAdjustment {
|
||||
};
|
||||
|
||||
void ComputeChromacityAdjustments(const CompressParams& cparams,
|
||||
const Image3F& opsin,
|
||||
const Image3F& opsin, const Rect& rect,
|
||||
FrameHeader* frame_header) {
|
||||
if (frame_header->encoding != FrameEncoding::kVarDCT ||
|
||||
cparams.max_error_mode) {
|
||||
@ -633,7 +631,7 @@ void ComputeChromacityAdjustments(const CompressParams& cparams,
|
||||
// the image would be based on the worst case pixel.
|
||||
PixelStatsForChromacityAdjustment pixel_stats;
|
||||
if (cparams.speed_tier <= SpeedTier::kWombat) {
|
||||
pixel_stats.Calc(&opsin);
|
||||
pixel_stats.Calc(&opsin, rect);
|
||||
}
|
||||
// For X take the most severe adjustment.
|
||||
frame_header->x_qm_scale = std::max<int>(
|
||||
@ -1041,18 +1039,19 @@ Status ComputeJPEGTranscodingData(const jpeg::JPEGData& jpeg_data,
|
||||
|
||||
Status ComputeVarDCTEncodingData(const FrameHeader& frame_header,
|
||||
const Image3F* linear,
|
||||
Image3F* JXL_RESTRICT opsin,
|
||||
Image3F* JXL_RESTRICT opsin, const Rect& rect,
|
||||
const JxlCmsInterface& cms, ThreadPool* pool,
|
||||
ModularFrameEncoder* enc_modular,
|
||||
PassesEncoderState* enc_state,
|
||||
AuxOut* aux_out) {
|
||||
JXL_ASSERT((opsin->xsize() % kBlockDim) == 0 &&
|
||||
(opsin->ysize() % kBlockDim) == 0);
|
||||
JXL_ASSERT((rect.xsize() % kBlockDim) == 0 &&
|
||||
(rect.ysize() % kBlockDim) == 0);
|
||||
JXL_RETURN_IF_ERROR(LossyFrameHeuristics(frame_header, enc_state, enc_modular,
|
||||
linear, opsin, cms, pool, aux_out));
|
||||
linear, opsin, rect, cms, pool,
|
||||
aux_out));
|
||||
|
||||
JXL_RETURN_IF_ERROR(InitializePassesEncoder(frame_header, *opsin, cms, pool,
|
||||
enc_state, enc_modular, aux_out));
|
||||
JXL_RETURN_IF_ERROR(InitializePassesEncoder(
|
||||
frame_header, *opsin, rect, cms, pool, enc_state, enc_modular, aux_out));
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -1364,6 +1363,7 @@ Status ComputeEncodingData(
|
||||
} else {
|
||||
shared.frame_dim = frame_header.ToFrameDimensions();
|
||||
}
|
||||
|
||||
shared.image_features.patches.SetPassesSharedState(&shared);
|
||||
const FrameDimensions& frame_dim = shared.frame_dim;
|
||||
shared.ac_strategy =
|
||||
@ -1389,6 +1389,14 @@ Status ComputeEncodingData(
|
||||
const size_t black_idx = black_eci - metadata->m.extra_channel_info.data();
|
||||
const ColorEncoding c_enc = metadata->m.color_encoding;
|
||||
|
||||
// Make the image patch bigger than the currently processed group in streaming
|
||||
// mode so that we can take into account border pixels around the group when
|
||||
// computing inverse Gaborish and adaptive quantization map.
|
||||
int max_border = enc_state.streaming_mode ? kBlockDim : 0;
|
||||
Rect frame_rect(0, 0, frame_data.xsize, frame_data.ysize);
|
||||
Rect patch_rect = Rect(x0, y0, xsize, ysize).Extend(max_border, frame_rect);
|
||||
JXL_ASSERT(patch_rect.IsInside(frame_rect));
|
||||
|
||||
Image3F color;
|
||||
std::vector<ImageF> extra_channels(num_extra_channels);
|
||||
for (auto& extra_channel : extra_channels) {
|
||||
@ -1399,11 +1407,11 @@ Status ComputeEncodingData(
|
||||
bool has_interleaved_alpha = false;
|
||||
JxlChunkedFrameInputSource input = frame_data.GetInputSource();
|
||||
if (!frame_data.IsJPEG()) {
|
||||
JXL_RETURN_IF_ERROR(CopyColorChannels(input, x0, y0, xsize, ysize,
|
||||
frame_info, metadata->m, pool, &color,
|
||||
alpha, &has_interleaved_alpha));
|
||||
JXL_RETURN_IF_ERROR(CopyColorChannels(input, patch_rect, frame_info,
|
||||
metadata->m, pool, &color, alpha,
|
||||
&has_interleaved_alpha));
|
||||
}
|
||||
JXL_RETURN_IF_ERROR(CopyExtraChannels(input, x0, y0, xsize, ysize, frame_info,
|
||||
JXL_RETURN_IF_ERROR(CopyExtraChannels(input, patch_rect, frame_info,
|
||||
metadata->m, has_interleaved_alpha,
|
||||
pool, &extra_channels));
|
||||
|
||||
@ -1416,13 +1424,14 @@ Status ComputeEncodingData(
|
||||
Image3F opsin;
|
||||
if (!jpeg_data) {
|
||||
// Allocating a large enough image avoids a copy when padding.
|
||||
opsin = Image3F(RoundUpToBlockDim(xsize), RoundUpToBlockDim(ysize));
|
||||
opsin.ShrinkTo(xsize, ysize);
|
||||
opsin = Image3F(RoundUpToBlockDim(color.xsize()),
|
||||
RoundUpToBlockDim(color.ysize()));
|
||||
opsin.ShrinkTo(color.xsize(), color.ysize());
|
||||
if (frame_header.color_transform == ColorTransform::kXYB &&
|
||||
frame_info.ib_needs_color_transform) {
|
||||
if (frame_header.encoding == FrameEncoding::kVarDCT &&
|
||||
cparams.speed_tier <= SpeedTier::kKitten) {
|
||||
linear_storage = Image3F(xsize, ysize);
|
||||
linear_storage = Image3F(color.xsize(), color.ysize());
|
||||
linear = &linear_storage;
|
||||
}
|
||||
ToXYB(color, c_enc, metadata->m.IntensityTarget(), black, pool, &opsin,
|
||||
@ -1447,9 +1456,16 @@ Status ComputeEncodingData(
|
||||
}
|
||||
PadImageToBlockMultipleInPlace(&opsin);
|
||||
}
|
||||
color = Image3F();
|
||||
|
||||
// Rectangle within opsin that corresponds to the currently processed group in
|
||||
// streaming mode.
|
||||
Rect opsin_rect(x0 - patch_rect.x0(), y0 - patch_rect.y0(),
|
||||
RoundUpToBlockDim(xsize), RoundUpToBlockDim(ysize));
|
||||
|
||||
if (enc_state.initialize_global_state && !jpeg_data) {
|
||||
ComputeChromacityAdjustments(cparams, opsin, &mutable_frame_header);
|
||||
ComputeChromacityAdjustments(cparams, opsin, opsin_rect,
|
||||
&mutable_frame_header);
|
||||
}
|
||||
|
||||
if (!enc_state.streaming_mode) {
|
||||
@ -1466,6 +1482,10 @@ Status ComputeEncodingData(
|
||||
}
|
||||
}
|
||||
|
||||
if (!enc_state.streaming_mode) {
|
||||
opsin_rect = Rect(opsin);
|
||||
}
|
||||
|
||||
if (frame_header.encoding == FrameEncoding::kVarDCT) {
|
||||
enc_state.passes.resize(enc_state.progressive_splitter.GetNumPasses());
|
||||
for (PassesEncoderState::PassData& pass : enc_state.passes) {
|
||||
@ -1475,9 +1495,9 @@ Status ComputeEncodingData(
|
||||
JXL_RETURN_IF_ERROR(ComputeJPEGTranscodingData(
|
||||
*jpeg_data, frame_header, pool, &enc_modular, &enc_state));
|
||||
} else {
|
||||
JXL_RETURN_IF_ERROR(
|
||||
ComputeVarDCTEncodingData(frame_header, linear, &opsin, cms, pool,
|
||||
&enc_modular, &enc_state, aux_out));
|
||||
JXL_RETURN_IF_ERROR(ComputeVarDCTEncodingData(
|
||||
frame_header, linear, &opsin, opsin_rect, cms, pool, &enc_modular,
|
||||
&enc_state, aux_out));
|
||||
}
|
||||
ComputeAllCoeffOrders(enc_state, frame_dim);
|
||||
if (!enc_state.streaming_mode) {
|
||||
|
11
third_party/jpeg-xl/lib/jxl/enc_gaborish.cc
vendored
11
third_party/jpeg-xl/lib/jxl/enc_gaborish.cc
vendored
@ -15,7 +15,8 @@
|
||||
|
||||
namespace jxl {
|
||||
|
||||
void GaborishInverse(Image3F* in_out, float mul[3], ThreadPool* pool) {
|
||||
void GaborishInverse(Image3F* in_out, const Rect& rect, float mul[3],
|
||||
ThreadPool* pool) {
|
||||
WeightsSymmetric5 weights[3];
|
||||
// Only an approximation. One or even two 3x3, and rank-1 (separable) 5x5
|
||||
// are insufficient. The numbers here have been obtained by butteraugli
|
||||
@ -48,11 +49,9 @@ void GaborishInverse(Image3F* in_out, float mul[3], ThreadPool* pool) {
|
||||
// image and reuse the existing planes of the in/out image.
|
||||
ImageF temp(in_out->Plane(2).xsize(), in_out->Plane(2).ysize());
|
||||
CopyImageTo(in_out->Plane(2), &temp);
|
||||
Symmetric5(in_out->Plane(0), Rect(*in_out), weights[0], pool,
|
||||
&in_out->Plane(2));
|
||||
Symmetric5(in_out->Plane(1), Rect(*in_out), weights[1], pool,
|
||||
&in_out->Plane(0));
|
||||
Symmetric5(temp, Rect(*in_out), weights[2], pool, &in_out->Plane(1));
|
||||
Symmetric5(in_out->Plane(0), rect, weights[0], pool, &in_out->Plane(2), rect);
|
||||
Symmetric5(in_out->Plane(1), rect, weights[1], pool, &in_out->Plane(0), rect);
|
||||
Symmetric5(temp, rect, weights[2], pool, &in_out->Plane(1), rect);
|
||||
// Now planes are 1, 2, 0.
|
||||
in_out->Plane(0).Swap(in_out->Plane(1));
|
||||
// 2 1 0
|
||||
|
3
third_party/jpeg-xl/lib/jxl/enc_gaborish.h
vendored
3
third_party/jpeg-xl/lib/jxl/enc_gaborish.h
vendored
@ -19,7 +19,8 @@ namespace jxl {
|
||||
// Used in encoder to reduce the impact of the decoder's smoothing.
|
||||
// This is not exact. Works in-place to reduce memory use.
|
||||
// The input is typically in XYB space.
|
||||
void GaborishInverse(Image3F* in_out, float mul[3], ThreadPool* pool);
|
||||
void GaborishInverse(Image3F* in_out, const Rect& rect, float mul[3],
|
||||
ThreadPool* pool);
|
||||
|
||||
} // namespace jxl
|
||||
|
||||
|
@ -47,7 +47,7 @@ void TestRoundTrip(const Image3F& in, float max_l1) {
|
||||
w,
|
||||
w,
|
||||
};
|
||||
GaborishInverse(&fwd, weights, null_pool);
|
||||
GaborishInverse(&fwd, Rect(fwd), weights, null_pool);
|
||||
JXL_ASSERT_OK(VerifyRelativeError(in, fwd, max_l1, 1E-4f, _));
|
||||
}
|
||||
|
||||
|
10
third_party/jpeg-xl/lib/jxl/enc_group.cc
vendored
10
third_party/jpeg-xl/lib/jxl/enc_group.cc
vendored
@ -357,15 +357,17 @@ void QuantizeRoundtripYBlockAC(PassesEncoderState* enc_state, const size_t size,
|
||||
}
|
||||
|
||||
void ComputeCoefficients(size_t group_idx, PassesEncoderState* enc_state,
|
||||
const Image3F& opsin, Image3F* dc) {
|
||||
const Image3F& opsin, const Rect& rect, Image3F* dc) {
|
||||
const Rect block_group_rect =
|
||||
enc_state->shared.frame_dim.BlockGroupRect(group_idx);
|
||||
const Rect group_rect = enc_state->shared.frame_dim.GroupRect(group_idx);
|
||||
const Rect cmap_rect(
|
||||
block_group_rect.x0() / kColorTileDimInBlocks,
|
||||
block_group_rect.y0() / kColorTileDimInBlocks,
|
||||
DivCeil(block_group_rect.xsize(), kColorTileDimInBlocks),
|
||||
DivCeil(block_group_rect.ysize(), kColorTileDimInBlocks));
|
||||
const Rect group_rect =
|
||||
enc_state->shared.frame_dim.GroupRect(group_idx).Translate(rect.x0(),
|
||||
rect.y0());
|
||||
|
||||
const size_t xsize_blocks = block_group_rect.xsize();
|
||||
const size_t ysize_blocks = block_group_rect.ysize();
|
||||
@ -504,9 +506,9 @@ HWY_AFTER_NAMESPACE();
|
||||
namespace jxl {
|
||||
HWY_EXPORT(ComputeCoefficients);
|
||||
void ComputeCoefficients(size_t group_idx, PassesEncoderState* enc_state,
|
||||
const Image3F& opsin, Image3F* dc) {
|
||||
const Image3F& opsin, const Rect& rect, Image3F* dc) {
|
||||
return HWY_DYNAMIC_DISPATCH(ComputeCoefficients)(group_idx, enc_state, opsin,
|
||||
dc);
|
||||
rect, dc);
|
||||
}
|
||||
|
||||
Status EncodeGroupTokenizedCoefficients(size_t group_idx, size_t pass_idx,
|
||||
|
2
third_party/jpeg-xl/lib/jxl/enc_group.h
vendored
2
third_party/jpeg-xl/lib/jxl/enc_group.h
vendored
@ -20,7 +20,7 @@ struct PassesEncoderState;
|
||||
|
||||
// Fills DC
|
||||
void ComputeCoefficients(size_t group_idx, PassesEncoderState* enc_state,
|
||||
const Image3F& opsin, Image3F* dc);
|
||||
const Image3F& opsin, const Rect& rect, Image3F* dc);
|
||||
|
||||
Status EncodeGroupTokenizedCoefficients(size_t group_idx, size_t pass_idx,
|
||||
size_t histogram_idx,
|
||||
|
163
third_party/jpeg-xl/lib/jxl/enc_heuristics.cc
vendored
163
third_party/jpeg-xl/lib/jxl/enc_heuristics.cc
vendored
@ -30,8 +30,10 @@ namespace jxl {
|
||||
|
||||
struct AuxOut;
|
||||
|
||||
void FindBestBlockEntropyModel(PassesEncoderState& enc_state) {
|
||||
if (enc_state.cparams.decoding_speed_tier >= 1) {
|
||||
void FindBestBlockEntropyModel(const CompressParams& cparams, const ImageI& rqf,
|
||||
const AcStrategyImage& ac_strategy,
|
||||
BlockCtxMap* block_ctx_map) {
|
||||
if (cparams.decoding_speed_tier >= 1) {
|
||||
static constexpr uint8_t kSimpleCtxMap[] = {
|
||||
// Cluster all blocks together
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //
|
||||
@ -42,20 +44,18 @@ void FindBestBlockEntropyModel(PassesEncoderState& enc_state) {
|
||||
3 * kNumOrders == sizeof(kSimpleCtxMap) / sizeof *kSimpleCtxMap,
|
||||
"Update simple context map");
|
||||
|
||||
auto bcm = enc_state.shared.block_ctx_map;
|
||||
auto bcm = *block_ctx_map;
|
||||
bcm.ctx_map.assign(std::begin(kSimpleCtxMap), std::end(kSimpleCtxMap));
|
||||
bcm.num_ctxs = 2;
|
||||
bcm.num_dc_ctxs = 1;
|
||||
return;
|
||||
}
|
||||
if (enc_state.cparams.speed_tier >= SpeedTier::kFalcon) {
|
||||
if (cparams.speed_tier >= SpeedTier::kFalcon) {
|
||||
return;
|
||||
}
|
||||
const ImageI& rqf = enc_state.shared.raw_quant_field;
|
||||
// No need to change context modeling for small images.
|
||||
size_t tot = rqf.xsize() * rqf.ysize();
|
||||
size_t size_for_ctx_model =
|
||||
(1 << 10) * enc_state.cparams.butteraugli_distance;
|
||||
size_t size_for_ctx_model = (1 << 10) * cparams.butteraugli_distance;
|
||||
if (tot < size_for_ctx_model) return;
|
||||
|
||||
struct OccCounters {
|
||||
@ -79,14 +79,13 @@ void FindBestBlockEntropyModel(PassesEncoderState& enc_state) {
|
||||
size_t ord_counts[kNumOrders] = {};
|
||||
};
|
||||
// The OccCounters struct is too big to allocate on the stack.
|
||||
std::unique_ptr<OccCounters> counters(
|
||||
new OccCounters(rqf, enc_state.shared.ac_strategy));
|
||||
std::unique_ptr<OccCounters> counters(new OccCounters(rqf, ac_strategy));
|
||||
|
||||
// Splitting the context model according to the quantization field seems to
|
||||
// mostly benefit only large images.
|
||||
size_t size_for_qf_split = (1 << 13) * enc_state.cparams.butteraugli_distance;
|
||||
size_t size_for_qf_split = (1 << 13) * cparams.butteraugli_distance;
|
||||
size_t num_qf_segments = tot < size_for_qf_split ? 1 : 2;
|
||||
std::vector<uint32_t>& qft = enc_state.shared.block_ctx_map.qf_thresholds;
|
||||
std::vector<uint32_t>& qft = block_ctx_map->qf_thresholds;
|
||||
qft.clear();
|
||||
// Divide the quant field in up to num_qf_segments segments.
|
||||
size_t cumsum = 0;
|
||||
@ -153,7 +152,7 @@ void FindBestBlockEntropyModel(PassesEncoderState& enc_state) {
|
||||
remap[i] = remap_remap[remap[i]];
|
||||
}
|
||||
// Write the block context map.
|
||||
auto& ctx_map = enc_state.shared.block_ctx_map.ctx_map;
|
||||
auto& ctx_map = block_ctx_map->ctx_map;
|
||||
ctx_map = remap;
|
||||
ctx_map.resize(remap.size() * 3);
|
||||
// for chroma, only use up to nb_clusters_chroma separate block contexts
|
||||
@ -162,14 +161,13 @@ void FindBestBlockEntropyModel(PassesEncoderState& enc_state) {
|
||||
ctx_map[i] = num + Clamp1((int)remap[i % remap.size()], 0,
|
||||
(int)nb_clusters_chroma - 1);
|
||||
}
|
||||
enc_state.shared.block_ctx_map.num_ctxs =
|
||||
block_ctx_map->num_ctxs =
|
||||
*std::max_element(ctx_map.begin(), ctx_map.end()) + 1;
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
void FindBestDequantMatrices(const CompressParams& cparams,
|
||||
const Image3F& opsin,
|
||||
ModularFrameEncoder* modular_frame_encoder,
|
||||
DequantMatrices* dequant_matrices) {
|
||||
// TODO(veluca): quant matrices for no-gaborish.
|
||||
@ -701,38 +699,47 @@ Status LossyFrameHeuristics(const FrameHeader& frame_header,
|
||||
PassesEncoderState* enc_state,
|
||||
ModularFrameEncoder* modular_frame_encoder,
|
||||
const Image3F* original_pixels, Image3F* opsin,
|
||||
const JxlCmsInterface& cms, ThreadPool* pool,
|
||||
AuxOut* aux_out) {
|
||||
CompressParams& cparams = enc_state->cparams;
|
||||
const Rect& rect, const JxlCmsInterface& cms,
|
||||
ThreadPool* pool, AuxOut* aux_out) {
|
||||
const CompressParams& cparams = enc_state->cparams;
|
||||
const bool streaming_mode = enc_state->streaming_mode;
|
||||
const bool initialize_global_state = enc_state->initialize_global_state;
|
||||
PassesSharedState& shared = enc_state->shared;
|
||||
const FrameDimensions& frame_dim = shared.frame_dim;
|
||||
ImageFeatures& image_features = shared.image_features;
|
||||
DequantMatrices& matrices = shared.matrices;
|
||||
Quantizer& quantizer = shared.quantizer;
|
||||
ImageI& raw_quant_field = shared.raw_quant_field;
|
||||
ColorCorrelationMap& cmap = shared.cmap;
|
||||
AcStrategyImage& ac_strategy = shared.ac_strategy;
|
||||
ImageB& epf_sharpness = shared.epf_sharpness;
|
||||
BlockCtxMap& block_ctx_map = shared.block_ctx_map;
|
||||
|
||||
// Find and subtract splines.
|
||||
if (!enc_state->streaming_mode &&
|
||||
cparams.speed_tier <= SpeedTier::kSquirrel) {
|
||||
if (!streaming_mode && cparams.speed_tier <= SpeedTier::kSquirrel) {
|
||||
if (cparams.custom_splines.HasAny()) {
|
||||
shared.image_features.splines = cparams.custom_splines;
|
||||
image_features.splines = cparams.custom_splines;
|
||||
} else {
|
||||
shared.image_features.splines = FindSplines(*opsin);
|
||||
image_features.splines = FindSplines(*opsin);
|
||||
}
|
||||
JXL_RETURN_IF_ERROR(shared.image_features.splines.InitializeDrawCache(
|
||||
opsin->xsize(), opsin->ysize(), shared.cmap));
|
||||
shared.image_features.splines.SubtractFrom(opsin);
|
||||
JXL_RETURN_IF_ERROR(image_features.splines.InitializeDrawCache(
|
||||
opsin->xsize(), opsin->ysize(), cmap));
|
||||
image_features.splines.SubtractFrom(opsin);
|
||||
}
|
||||
|
||||
// Find and subtract patches/dots.
|
||||
if (!enc_state->streaming_mode &&
|
||||
if (!streaming_mode &&
|
||||
ApplyOverride(cparams.patches,
|
||||
cparams.speed_tier <= SpeedTier::kSquirrel)) {
|
||||
FindBestPatchDictionary(*opsin, enc_state, cms, pool, aux_out);
|
||||
PatchDictionaryEncoder::SubtractFrom(shared.image_features.patches, opsin);
|
||||
PatchDictionaryEncoder::SubtractFrom(image_features.patches, opsin);
|
||||
}
|
||||
|
||||
static const float kAcQuant = 0.79f;
|
||||
const float quant_dc = InitialQuantDC(cparams.butteraugli_distance);
|
||||
Quantizer& quantizer = enc_state->shared.quantizer;
|
||||
// We don't know the quant field yet, but for computing the global scale
|
||||
// assuming that it will be the same as for Falcon mode is good enough.
|
||||
if (enc_state->initialize_global_state) {
|
||||
if (initialize_global_state) {
|
||||
quantizer.ComputeGlobalScaleAndQuant(
|
||||
quant_dc, kAcQuant / cparams.butteraugli_distance, 0);
|
||||
}
|
||||
@ -757,34 +764,35 @@ Status LossyFrameHeuristics(const FrameHeader& frame_header,
|
||||
// output: Gaborished XYB, CfL, ACS, raw quant field, EPF control field.
|
||||
|
||||
ArControlFieldHeuristics ar_heuristics;
|
||||
AcStrategyHeuristics acs_heuristics;
|
||||
AcStrategyHeuristics acs_heuristics(cparams);
|
||||
CfLHeuristics cfl_heuristics;
|
||||
ImageF initial_quant_field;
|
||||
ImageF initial_quant_masking;
|
||||
ImageF initial_quant_masking1x1;
|
||||
|
||||
// Compute an initial estimate of the quantization field.
|
||||
// Call InitialQuantField only in Hare mode or slower. Otherwise, rely
|
||||
// on simple heuristics in FindBestAcStrategy, or set a constant for Falcon
|
||||
// mode.
|
||||
if (cparams.speed_tier > SpeedTier::kHare) {
|
||||
enc_state->initial_quant_field =
|
||||
ImageF(shared.frame_dim.xsize_blocks, shared.frame_dim.ysize_blocks);
|
||||
enc_state->initial_quant_masking =
|
||||
ImageF(shared.frame_dim.xsize_blocks, shared.frame_dim.ysize_blocks);
|
||||
initial_quant_field =
|
||||
ImageF(frame_dim.xsize_blocks, frame_dim.ysize_blocks);
|
||||
initial_quant_masking =
|
||||
ImageF(frame_dim.xsize_blocks, frame_dim.ysize_blocks);
|
||||
float q = kAcQuant / cparams.butteraugli_distance;
|
||||
FillImage(q, &enc_state->initial_quant_field);
|
||||
FillImage(1.0f / (q + 0.001f), &enc_state->initial_quant_masking);
|
||||
FillImage(q, &initial_quant_field);
|
||||
FillImage(1.0f / (q + 0.001f), &initial_quant_masking);
|
||||
} else {
|
||||
// Call this here, as it relies on pre-gaborish values.
|
||||
float butteraugli_distance_for_iqf = cparams.butteraugli_distance;
|
||||
if (!frame_header.loop_filter.gab) {
|
||||
butteraugli_distance_for_iqf *= 0.73f;
|
||||
}
|
||||
enc_state->initial_quant_field = InitialQuantField(
|
||||
butteraugli_distance_for_iqf, *opsin, shared.frame_dim, pool, 1.0f,
|
||||
&enc_state->initial_quant_masking,
|
||||
&enc_state->initial_quant_masking1x1);
|
||||
if (enc_state->initialize_global_state) {
|
||||
quantizer.SetQuantField(quant_dc, enc_state->initial_quant_field,
|
||||
nullptr);
|
||||
initial_quant_field = InitialQuantField(
|
||||
butteraugli_distance_for_iqf, *opsin, rect, pool, 1.0f,
|
||||
&initial_quant_masking, &initial_quant_masking1x1);
|
||||
if (initialize_global_state) {
|
||||
quantizer.SetQuantField(quant_dc, initial_quant_field, nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
@ -798,71 +806,67 @@ Status LossyFrameHeuristics(const FrameHeader& frame_header,
|
||||
0.99406123118127299f,
|
||||
0.99719338015886894f,
|
||||
};
|
||||
GaborishInverse(opsin, weight, pool);
|
||||
GaborishInverse(opsin, rect, weight, pool);
|
||||
}
|
||||
|
||||
if (enc_state->initialize_global_state) {
|
||||
FindBestDequantMatrices(cparams, *opsin, modular_frame_encoder,
|
||||
&enc_state->shared.matrices);
|
||||
if (initialize_global_state) {
|
||||
FindBestDequantMatrices(cparams, modular_frame_encoder, &matrices);
|
||||
}
|
||||
|
||||
cfl_heuristics.Init(*opsin);
|
||||
acs_heuristics.Init(*opsin, enc_state);
|
||||
cfl_heuristics.Init(rect);
|
||||
acs_heuristics.Init(*opsin, rect, initial_quant_field, initial_quant_masking,
|
||||
initial_quant_masking1x1, &matrices);
|
||||
|
||||
auto process_tile = [&](const uint32_t tid, const size_t thread) {
|
||||
size_t n_enc_tiles =
|
||||
DivCeil(enc_state->shared.frame_dim.xsize_blocks, kEncTileDimInBlocks);
|
||||
size_t n_enc_tiles = DivCeil(frame_dim.xsize_blocks, kEncTileDimInBlocks);
|
||||
size_t tx = tid % n_enc_tiles;
|
||||
size_t ty = tid / n_enc_tiles;
|
||||
size_t by0 = ty * kEncTileDimInBlocks;
|
||||
size_t by1 = std::min((ty + 1) * kEncTileDimInBlocks,
|
||||
enc_state->shared.frame_dim.ysize_blocks);
|
||||
size_t by1 =
|
||||
std::min((ty + 1) * kEncTileDimInBlocks, frame_dim.ysize_blocks);
|
||||
size_t bx0 = tx * kEncTileDimInBlocks;
|
||||
size_t bx1 = std::min((tx + 1) * kEncTileDimInBlocks,
|
||||
enc_state->shared.frame_dim.xsize_blocks);
|
||||
size_t bx1 =
|
||||
std::min((tx + 1) * kEncTileDimInBlocks, frame_dim.xsize_blocks);
|
||||
Rect r(bx0, by0, bx1 - bx0, by1 - by0);
|
||||
|
||||
// For speeds up to Wombat, we only compute the color correlation map
|
||||
// once we know the transform type and the quantization map.
|
||||
if (cparams.speed_tier <= SpeedTier::kSquirrel) {
|
||||
cfl_heuristics.ComputeTile(r, *opsin, enc_state->shared.matrices,
|
||||
cfl_heuristics.ComputeTile(r, *opsin, rect, matrices,
|
||||
/*ac_strategy=*/nullptr,
|
||||
/*raw_quant_field=*/nullptr,
|
||||
/*quantizer=*/nullptr, /*fast=*/false, thread,
|
||||
&enc_state->shared.cmap);
|
||||
&cmap);
|
||||
}
|
||||
|
||||
// Choose block sizes.
|
||||
acs_heuristics.ProcessRect(r);
|
||||
acs_heuristics.ProcessRect(r, cmap, &ac_strategy);
|
||||
|
||||
// Choose amount of post-processing smoothing.
|
||||
// TODO(veluca): should this go *after* AdjustQuantField?
|
||||
ar_heuristics.RunRect(frame_header, r, *opsin, enc_state, thread);
|
||||
ar_heuristics.RunRect(cparams, frame_header, r, *opsin, rect,
|
||||
initial_quant_field, ac_strategy, &epf_sharpness,
|
||||
thread);
|
||||
|
||||
// Always set the initial quant field, so we can compute the CfL map with
|
||||
// more accuracy. The initial quant field might change in slower modes, but
|
||||
// adjusting the quant field with butteraugli when all the other encoding
|
||||
// parameters are fixed is likely a more reliable choice anyway.
|
||||
AdjustQuantField(enc_state->shared.ac_strategy, r,
|
||||
cparams.butteraugli_distance,
|
||||
&enc_state->initial_quant_field);
|
||||
quantizer.SetQuantFieldRect(enc_state->initial_quant_field, r,
|
||||
&enc_state->shared.raw_quant_field);
|
||||
AdjustQuantField(ac_strategy, r, cparams.butteraugli_distance,
|
||||
&initial_quant_field);
|
||||
quantizer.SetQuantFieldRect(initial_quant_field, r, &raw_quant_field);
|
||||
|
||||
// Compute a non-default CfL map if we are at Hare speed, or slower.
|
||||
if (cparams.speed_tier <= SpeedTier::kHare) {
|
||||
cfl_heuristics.ComputeTile(
|
||||
r, *opsin, enc_state->shared.matrices, &enc_state->shared.ac_strategy,
|
||||
&enc_state->shared.raw_quant_field, &enc_state->shared.quantizer,
|
||||
/*fast=*/cparams.speed_tier >= SpeedTier::kWombat, thread,
|
||||
&enc_state->shared.cmap);
|
||||
r, *opsin, rect, matrices, &ac_strategy, &raw_quant_field, &quantizer,
|
||||
/*fast=*/cparams.speed_tier >= SpeedTier::kWombat, thread, &cmap);
|
||||
}
|
||||
};
|
||||
JXL_RETURN_IF_ERROR(RunOnPool(
|
||||
pool, 0,
|
||||
DivCeil(enc_state->shared.frame_dim.xsize_blocks, kEncTileDimInBlocks) *
|
||||
DivCeil(enc_state->shared.frame_dim.ysize_blocks,
|
||||
kEncTileDimInBlocks),
|
||||
DivCeil(frame_dim.xsize_blocks, kEncTileDimInBlocks) *
|
||||
DivCeil(frame_dim.ysize_blocks, kEncTileDimInBlocks),
|
||||
[&](const size_t num_threads) {
|
||||
ar_heuristics.PrepareForThreads(num_threads);
|
||||
cfl_heuristics.PrepareForThreads(num_threads);
|
||||
@ -870,23 +874,22 @@ Status LossyFrameHeuristics(const FrameHeader& frame_header,
|
||||
},
|
||||
process_tile, "Enc Heuristics"));
|
||||
|
||||
acs_heuristics.Finalize(aux_out);
|
||||
if (cparams.speed_tier <= SpeedTier::kHare &&
|
||||
enc_state->initialize_global_state) {
|
||||
acs_heuristics.Finalize(frame_dim, ac_strategy, aux_out);
|
||||
if (cparams.speed_tier <= SpeedTier::kHare && initialize_global_state) {
|
||||
cfl_heuristics.ComputeDC(/*fast=*/cparams.speed_tier >= SpeedTier::kWombat,
|
||||
&enc_state->shared.cmap);
|
||||
&cmap);
|
||||
}
|
||||
|
||||
// Refine quantization levels.
|
||||
if (!enc_state->streaming_mode) {
|
||||
FindBestQuantizer(frame_header, original_pixels, *opsin, enc_state, cms,
|
||||
pool, aux_out);
|
||||
if (!streaming_mode) {
|
||||
FindBestQuantizer(frame_header, original_pixels, *opsin,
|
||||
initial_quant_field, enc_state, cms, pool, aux_out);
|
||||
}
|
||||
|
||||
// Choose a context model that depends on the amount of quantization for AC.
|
||||
if (cparams.speed_tier < SpeedTier::kFalcon &&
|
||||
enc_state->initialize_global_state) {
|
||||
FindBestBlockEntropyModel(*enc_state);
|
||||
if (cparams.speed_tier < SpeedTier::kFalcon && initialize_global_state) {
|
||||
FindBestBlockEntropyModel(cparams, raw_quant_field, ac_strategy,
|
||||
&block_ctx_map);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
4
third_party/jpeg-xl/lib/jxl/enc_heuristics.h
vendored
4
third_party/jpeg-xl/lib/jxl/enc_heuristics.h
vendored
@ -37,8 +37,8 @@ Status LossyFrameHeuristics(const FrameHeader& frame_header,
|
||||
PassesEncoderState* enc_state,
|
||||
ModularFrameEncoder* modular_frame_encoder,
|
||||
const Image3F* original_pixels, Image3F* opsin,
|
||||
const JxlCmsInterface& cms, ThreadPool* pool,
|
||||
AuxOut* aux_out);
|
||||
const Rect& rect, const JxlCmsInterface& cms,
|
||||
ThreadPool* pool, AuxOut* aux_out);
|
||||
|
||||
void FindBestBlockEntropyModel(PassesEncoderState& enc_state);
|
||||
|
||||
|
2
third_party/jpeg-xl/lib/jxl/enc_modular.cc
vendored
2
third_party/jpeg-xl/lib/jxl/enc_modular.cc
vendored
@ -490,7 +490,7 @@ Status ModularFrameEncoder::ComputeEncodingData(
|
||||
if (do_color && frame_header.loop_filter.gab) {
|
||||
float w = 0.9908511000000001f;
|
||||
float weights[3] = {w, w, w};
|
||||
GaborishInverse(color, weights, pool);
|
||||
GaborishInverse(color, Rect(*color), weights, pool);
|
||||
}
|
||||
|
||||
if (do_color && metadata.bit_depth.bits_per_sample <= 16 &&
|
||||
|
4
third_party/jpeg-xl/lib/jxl/encode.cc
vendored
4
third_party/jpeg-xl/lib/jxl/encode.cc
vendored
@ -17,6 +17,7 @@
|
||||
#include "lib/jxl/base/byte_order.h"
|
||||
#include "lib/jxl/base/common.h"
|
||||
#include "lib/jxl/base/data_parallel.h"
|
||||
#include "lib/jxl/base/exif.h"
|
||||
#include "lib/jxl/base/printf_macros.h"
|
||||
#include "lib/jxl/base/span.h"
|
||||
#include "lib/jxl/base/status.h"
|
||||
@ -31,7 +32,6 @@
|
||||
#include "lib/jxl/enc_icc_codec.h"
|
||||
#include "lib/jxl/enc_params.h"
|
||||
#include "lib/jxl/encode_internal.h"
|
||||
#include "lib/jxl/exif.h"
|
||||
#include "lib/jxl/jpeg/enc_jpeg_data.h"
|
||||
#include "lib/jxl/luminance.h"
|
||||
#include "lib/jxl/memory_manager_internal.h"
|
||||
@ -2252,7 +2252,7 @@ JxlEncoderStatus JxlEncoderAddImageFrameInternal(
|
||||
auto frame_state = JxlFastLosslessPrepareFrame(
|
||||
frame_data.GetInputSource(), xsize, ysize, num_channels,
|
||||
frame_settings->enc->metadata.m.bit_depth.bits_per_sample, big_endian,
|
||||
/*effort=*/2);
|
||||
/*effort=*/2, /*oneshot=*/!frame_data.StreamingInput());
|
||||
if (!streaming) {
|
||||
JxlFastLosslessProcessFrame(frame_state, /*is_last=*/false,
|
||||
frame_settings->enc->thread_pool.get(),
|
||||
|
@ -267,6 +267,8 @@ class JxlEncoderChunkedFrameAdapter {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool StreamingInput() const { return has_input_source_; }
|
||||
|
||||
const size_t xsize;
|
||||
const size_t ysize;
|
||||
|
||||
|
61
third_party/jpeg-xl/lib/jxl/encode_test.cc
vendored
61
third_party/jpeg-xl/lib/jxl/encode_test.cc
vendored
@ -2009,3 +2009,64 @@ TEST_P(EncoderStreamingTest, ChunkedAndOutputCallback) {
|
||||
JXL_GTEST_INSTANTIATE_TEST_SUITE_P(
|
||||
EncoderStreamingTest, EncoderStreamingTest,
|
||||
testing::ValuesIn(StreamingTestParam::All()));
|
||||
|
||||
TEST(EncoderTest, CMYK) {
|
||||
size_t xsize = 257;
|
||||
size_t ysize = 259;
|
||||
jxl::test::TestImage image;
|
||||
image.SetDimensions(xsize, ysize)
|
||||
.SetDataType(JXL_TYPE_UINT8)
|
||||
.SetChannels(3)
|
||||
.SetAllBitDepths(8);
|
||||
image.AddFrame().RandomFill();
|
||||
jxl::test::TestImage ec_image;
|
||||
ec_image.SetDataType(JXL_TYPE_UINT8)
|
||||
.SetDimensions(xsize, ysize)
|
||||
.SetChannels(1)
|
||||
.SetAllBitDepths(8);
|
||||
ec_image.AddFrame().RandomFill();
|
||||
const auto& frame = image.ppf().frames[0].color;
|
||||
const auto& ec_frame = ec_image.ppf().frames[0].color;
|
||||
JxlBasicInfo basic_info = image.ppf().info;
|
||||
basic_info.xsize = xsize;
|
||||
basic_info.ysize = ysize;
|
||||
basic_info.num_extra_channels = 1;
|
||||
basic_info.uses_original_profile = JXL_TRUE;
|
||||
|
||||
std::vector<uint8_t> compressed = std::vector<uint8_t>(64);
|
||||
JxlEncoderPtr enc_ptr = JxlEncoderMake(nullptr);
|
||||
JxlEncoderStruct* enc = enc_ptr.get();
|
||||
ASSERT_NE(nullptr, enc);
|
||||
JxlEncoderFrameSettings* frame_settings =
|
||||
JxlEncoderFrameSettingsCreate(enc, NULL);
|
||||
|
||||
EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetBasicInfo(enc, &basic_info));
|
||||
JxlExtraChannelInfo channel_info;
|
||||
JxlExtraChannelType channel_type = JXL_CHANNEL_BLACK;
|
||||
JxlEncoderInitExtraChannelInfo(channel_type, &channel_info);
|
||||
EXPECT_EQ(JXL_ENC_SUCCESS,
|
||||
JxlEncoderSetExtraChannelInfo(enc, 0, &channel_info));
|
||||
const std::vector<uint8_t> icc = jxl::test::ReadTestData(
|
||||
"external/Compact-ICC-Profiles/profiles/"
|
||||
"CGATS001Compat-v2-micro.icc");
|
||||
EXPECT_EQ(JXL_ENC_SUCCESS,
|
||||
JxlEncoderSetICCProfile(enc, icc.data(), icc.size()));
|
||||
EXPECT_EQ(JXL_ENC_SUCCESS,
|
||||
JxlEncoderAddImageFrame(frame_settings, &frame.format,
|
||||
frame.pixels(), frame.pixels_size));
|
||||
EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetExtraChannelBuffer(
|
||||
frame_settings, &ec_frame.format,
|
||||
ec_frame.pixels(), ec_frame.pixels_size, 0));
|
||||
JxlEncoderCloseInput(frame_settings->enc);
|
||||
uint8_t* next_out = compressed.data();
|
||||
size_t avail_out = compressed.size();
|
||||
ProcessEncoder(enc, compressed, next_out, avail_out);
|
||||
|
||||
jxl::extras::JXLDecompressParams dparams;
|
||||
dparams.accepted_formats = {
|
||||
{3, JXL_TYPE_UINT8, JXL_LITTLE_ENDIAN, 0},
|
||||
};
|
||||
jxl::extras::PackedPixelFile ppf;
|
||||
EXPECT_TRUE(DecodeImageJXL(compressed.data(), compressed.size(), dparams,
|
||||
nullptr, &ppf, nullptr));
|
||||
}
|
||||
|
4
third_party/jpeg-xl/lib/jxl/fields.cc
vendored
4
third_party/jpeg-xl/lib/jxl/fields.cc
vendored
@ -5,10 +5,10 @@
|
||||
|
||||
#include "lib/jxl/fields.h"
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cinttypes>
|
||||
#include <cmath>
|
||||
#include <cstddef>
|
||||
#include <hwy/base.h>
|
||||
|
||||
#include "lib/jxl/base/bits.h"
|
||||
|
11
third_party/jpeg-xl/lib/jxl/fields.h
vendored
11
third_party/jpeg-xl/lib/jxl/fields.h
vendored
@ -8,15 +8,12 @@
|
||||
|
||||
// Forward/backward-compatible 'bundles' with auto-serialized 'fields'.
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <cinttypes>
|
||||
#include <cmath> // abs
|
||||
#include <cstdarg>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
|
||||
#include "lib/jxl/base/bits.h"
|
||||
#include "lib/jxl/base/compiler_specific.h"
|
||||
|
@ -81,8 +81,8 @@ void AppendKeyword(const Tag& keyword, PaddedBytes* data) {
|
||||
}
|
||||
|
||||
// Checks if a + b > size, taking possible integer overflow into account.
|
||||
Status CheckOutOfBounds(size_t a, size_t b, size_t size) {
|
||||
size_t pos = a + b;
|
||||
Status CheckOutOfBounds(uint64_t a, uint64_t b, uint64_t size) {
|
||||
uint64_t pos = a + b;
|
||||
if (pos > size) return JXL_FAILURE("Out of bounds");
|
||||
if (pos < a) return JXL_FAILURE("Out of bounds"); // overflow happened
|
||||
return true;
|
||||
|
@ -92,7 +92,7 @@ void EncodeKeyword(const Tag& keyword, uint8_t* data, size_t size, size_t pos);
|
||||
void AppendKeyword(const Tag& keyword, PaddedBytes* data);
|
||||
|
||||
// Checks if a + b > size, taking possible integer overflow into account.
|
||||
Status CheckOutOfBounds(size_t a, size_t b, size_t size);
|
||||
Status CheckOutOfBounds(uint64_t a, uint64_t b, uint64_t size);
|
||||
Status CheckIs32Bit(uint64_t v);
|
||||
|
||||
const Span<const uint8_t> ICCInitialHeaderPrediction();
|
||||
|
12
third_party/jpeg-xl/lib/jxl/image.h
vendored
12
third_party/jpeg-xl/lib/jxl/image.h
vendored
@ -8,7 +8,11 @@
|
||||
|
||||
// SIMD/multicore-friendly planar image representation with row accessors.
|
||||
|
||||
#if defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER) || \
|
||||
defined(THREAD_SANITIZER)
|
||||
#include <inttypes.h>
|
||||
#endif
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
@ -324,6 +328,14 @@ class RectT {
|
||||
return CeilShiftRight(shift, shift);
|
||||
}
|
||||
|
||||
RectT<T> Extend(T border, RectT<T> parent) {
|
||||
T new_x0 = x0() > parent.x0() + border ? x0() - border : parent.x0();
|
||||
T new_y0 = y0() > parent.y0() + border ? y0() - border : parent.y0();
|
||||
T new_x1 = x1() + border > parent.x1() ? parent.x1() : x1() + border;
|
||||
T new_y1 = y1() + border > parent.y1() ? parent.y1() : y1() + border;
|
||||
return RectT<T>(new_x0, new_y0, new_x1 - new_x0, new_y1 - new_y0);
|
||||
}
|
||||
|
||||
template <typename U>
|
||||
RectT<U> As() const {
|
||||
return RectT<U>(U(x0_), U(y0_), U(xsize_), U(ysize_));
|
||||
|
@ -5,8 +5,9 @@
|
||||
|
||||
#include "lib/jxl/modular/encoding/enc_debug_tree.h"
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <cinttypes>
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
|
||||
#include "lib/jxl/base/os_macros.h"
|
||||
#include "lib/jxl/base/printf_macros.h"
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#include "lib/jxl/modular/transform/transform.h"
|
||||
|
||||
#include <cinttypes>
|
||||
|
||||
#include "lib/jxl/base/printf_macros.h"
|
||||
#include "lib/jxl/fields.h"
|
||||
#include "lib/jxl/modular/modular_image.h"
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#include "lib/jxl/render_pipeline/stage_write.h"
|
||||
|
||||
#include <type_traits>
|
||||
|
||||
#include "lib/jxl/alpha.h"
|
||||
#include "lib/jxl/base/common.h"
|
||||
#include "lib/jxl/dec_cache.h"
|
||||
@ -21,6 +23,7 @@ namespace jxl {
|
||||
namespace HWY_NAMESPACE {
|
||||
|
||||
// These templates are not found via ADL.
|
||||
using hwy::HWY_NAMESPACE::Add;
|
||||
using hwy::HWY_NAMESPACE::Clamp;
|
||||
using hwy::HWY_NAMESPACE::Div;
|
||||
using hwy::HWY_NAMESPACE::Max;
|
||||
@ -30,6 +33,78 @@ using hwy::HWY_NAMESPACE::Or;
|
||||
using hwy::HWY_NAMESPACE::Rebind;
|
||||
using hwy::HWY_NAMESPACE::ShiftLeftSame;
|
||||
using hwy::HWY_NAMESPACE::ShiftRightSame;
|
||||
using hwy::HWY_NAMESPACE::VFromD;
|
||||
|
||||
// 8x8 ordered dithering pattern from
|
||||
// https://en.wikipedia.org/wiki/Ordered_dithering
|
||||
// scaled to have an average of 0 and be fully contained in (-0.5, 0.5).
|
||||
// Matrix is duplicated in width to avoid inconsistencies or out-of-bound-reads
|
||||
// if doing unaligned operations.
|
||||
const float kDither[(2 * 8) * 8] = {
|
||||
-0.4921875, 0.0078125, -0.3671875, 0.1328125, //
|
||||
-0.4609375, 0.0390625, -0.3359375, 0.1640625, //
|
||||
-0.4921875, 0.0078125, -0.3671875, 0.1328125, //
|
||||
-0.4609375, 0.0390625, -0.3359375, 0.1640625, //
|
||||
//
|
||||
0.2578125, -0.2421875, 0.3828125, -0.1171875, //
|
||||
0.2890625, -0.2109375, 0.4140625, -0.0859375, //
|
||||
0.2578125, -0.2421875, 0.3828125, -0.1171875, //
|
||||
0.2890625, -0.2109375, 0.4140625, -0.0859375, //
|
||||
//
|
||||
-0.3046875, 0.1953125, -0.4296875, 0.0703125, //
|
||||
-0.2734375, 0.2265625, -0.3984375, 0.1015625, //
|
||||
-0.3046875, 0.1953125, -0.4296875, 0.0703125, //
|
||||
-0.2734375, 0.2265625, -0.3984375, 0.1015625, //
|
||||
//
|
||||
0.4453125, -0.0546875, 0.3203125, -0.1796875, //
|
||||
0.4765625, -0.0234375, 0.3515625, -0.1484375, //
|
||||
0.4453125, -0.0546875, 0.3203125, -0.1796875, //
|
||||
0.4765625, -0.0234375, 0.3515625, -0.1484375, //
|
||||
//
|
||||
-0.4453125, 0.0546875, -0.3203125, 0.1796875, //
|
||||
-0.4765625, 0.0234375, -0.3515625, 0.1484375, //
|
||||
-0.4453125, 0.0546875, -0.3203125, 0.1796875, //
|
||||
-0.4765625, 0.0234375, -0.3515625, 0.1484375, //
|
||||
//
|
||||
0.3046875, -0.1953125, 0.4296875, -0.0703125, //
|
||||
0.2734375, -0.2265625, 0.3984375, -0.1015625, //
|
||||
0.3046875, -0.1953125, 0.4296875, -0.0703125, //
|
||||
0.2734375, -0.2265625, 0.3984375, -0.1015625, //
|
||||
//
|
||||
-0.2578125, 0.2421875, -0.3828125, 0.1171875, //
|
||||
-0.2890625, 0.2109375, -0.4140625, 0.0859375, //
|
||||
-0.2578125, 0.2421875, -0.3828125, 0.1171875, //
|
||||
-0.2890625, 0.2109375, -0.4140625, 0.0859375, //
|
||||
//
|
||||
0.4921875, -0.0078125, 0.3671875, -0.1328125, //
|
||||
0.4609375, -0.0390625, 0.3359375, -0.1640625, //
|
||||
0.4921875, -0.0078125, 0.3671875, -0.1328125, //
|
||||
0.4609375, -0.0390625, 0.3359375, -0.1640625, //
|
||||
};
|
||||
|
||||
using DF = HWY_FULL(float);
|
||||
|
||||
// Converts `v` to an appropriate value for the given unsigned type.
|
||||
// If the unsigned type is an 8-bit type, performs ordered dithering.
|
||||
template <typename T>
|
||||
VFromD<Rebind<T, DF>> MakeUnsigned(VFromD<DF> v, size_t x0, size_t y0,
|
||||
VFromD<DF> mul) {
|
||||
static_assert(std::is_unsigned<T>::value, "T must be an unsigned type");
|
||||
using DU = Rebind<T, DF>;
|
||||
v = Mul(v, mul);
|
||||
// TODO(veluca): if constexpr with C++17
|
||||
if (sizeof(T) == 1) {
|
||||
size_t pos = (y0 % 8) * (2 * 8) + (x0 % 8);
|
||||
#if HWY_TARGET != HWY_SCALAR
|
||||
auto dither = LoadDup128(DF(), kDither + pos);
|
||||
#else
|
||||
auto dither = LoadU(DF(), kDither + pos);
|
||||
#endif
|
||||
v = Add(v, dither);
|
||||
}
|
||||
v = Clamp(Zero(DF()), v, mul);
|
||||
return DemoteTo(DU(), NearestInt(v));
|
||||
}
|
||||
|
||||
class WriteToOutputStage : public RenderPipelineStage {
|
||||
public:
|
||||
@ -229,14 +304,14 @@ class WriteToOutputStage : public RenderPipelineStage {
|
||||
if (out.data_type_ == JXL_TYPE_UINT8) {
|
||||
uint8_t* JXL_RESTRICT temp =
|
||||
reinterpret_cast<uint8_t*>(temp_out_[thread_id].get());
|
||||
StoreUnsignedRow(out, input, len, temp);
|
||||
StoreUnsignedRow(out, input, len, temp, xstart, ypos);
|
||||
WriteToOutput(out, thread_id, ypos, xstart, len, temp);
|
||||
} else if (out.data_type_ == JXL_TYPE_UINT16 ||
|
||||
out.data_type_ == JXL_TYPE_FLOAT16) {
|
||||
uint16_t* JXL_RESTRICT temp =
|
||||
reinterpret_cast<uint16_t*>(temp_out_[thread_id].get());
|
||||
if (out.data_type_ == JXL_TYPE_UINT16) {
|
||||
StoreUnsignedRow(out, input, len, temp);
|
||||
StoreUnsignedRow(out, input, len, temp, xstart, ypos);
|
||||
} else {
|
||||
StoreFloat16Row(out, input, len, temp);
|
||||
}
|
||||
@ -289,10 +364,8 @@ class WriteToOutputStage : public RenderPipelineStage {
|
||||
|
||||
template <typename T>
|
||||
void StoreUnsignedRow(const Output& out, const float* input[4], size_t len,
|
||||
T* output) const {
|
||||
T* output, size_t xstart, size_t ypos) const {
|
||||
const HWY_FULL(float) d;
|
||||
auto zero = Zero(d);
|
||||
auto one = Set(d, 1.0f);
|
||||
auto mul = Set(d, (1u << (out.bits_per_sample_)) - 1);
|
||||
const Rebind<T, decltype(d)> du;
|
||||
const size_t padding = RoundUpTo(len, Lanes(d)) - len;
|
||||
@ -301,35 +374,32 @@ class WriteToOutputStage : public RenderPipelineStage {
|
||||
}
|
||||
if (out.num_channels_ == 1) {
|
||||
for (size_t i = 0; i < len; i += Lanes(d)) {
|
||||
auto v0 = Mul(Clamp(zero, LoadU(d, &input[0][i]), one), mul);
|
||||
StoreU(DemoteTo(du, NearestInt(v0)), du, &output[i]);
|
||||
StoreU(MakeUnsigned<T>(LoadU(d, &input[0][i]), xstart + i, ypos, mul),
|
||||
du, &output[i]);
|
||||
}
|
||||
} else if (out.num_channels_ == 2) {
|
||||
for (size_t i = 0; i < len; i += Lanes(d)) {
|
||||
auto v0 = Mul(Clamp(zero, LoadU(d, &input[0][i]), one), mul);
|
||||
auto v1 = Mul(Clamp(zero, LoadU(d, &input[1][i]), one), mul);
|
||||
StoreInterleaved2(DemoteTo(du, NearestInt(v0)),
|
||||
DemoteTo(du, NearestInt(v1)), du, &output[2 * i]);
|
||||
StoreInterleaved2(
|
||||
MakeUnsigned<T>(LoadU(d, &input[0][i]), xstart + i, ypos, mul),
|
||||
MakeUnsigned<T>(LoadU(d, &input[1][i]), xstart + i, ypos, mul), du,
|
||||
&output[2 * i]);
|
||||
}
|
||||
} else if (out.num_channels_ == 3) {
|
||||
for (size_t i = 0; i < len; i += Lanes(d)) {
|
||||
auto v0 = Mul(Clamp(zero, LoadU(d, &input[0][i]), one), mul);
|
||||
auto v1 = Mul(Clamp(zero, LoadU(d, &input[1][i]), one), mul);
|
||||
auto v2 = Mul(Clamp(zero, LoadU(d, &input[2][i]), one), mul);
|
||||
StoreInterleaved3(DemoteTo(du, NearestInt(v0)),
|
||||
DemoteTo(du, NearestInt(v1)),
|
||||
DemoteTo(du, NearestInt(v2)), du, &output[3 * i]);
|
||||
StoreInterleaved3(
|
||||
MakeUnsigned<T>(LoadU(d, &input[0][i]), xstart + i, ypos, mul),
|
||||
MakeUnsigned<T>(LoadU(d, &input[1][i]), xstart + i, ypos, mul),
|
||||
MakeUnsigned<T>(LoadU(d, &input[2][i]), xstart + i, ypos, mul), du,
|
||||
&output[3 * i]);
|
||||
}
|
||||
} else if (out.num_channels_ == 4) {
|
||||
for (size_t i = 0; i < len; i += Lanes(d)) {
|
||||
auto v0 = Mul(Clamp(zero, LoadU(d, &input[0][i]), one), mul);
|
||||
auto v1 = Mul(Clamp(zero, LoadU(d, &input[1][i]), one), mul);
|
||||
auto v2 = Mul(Clamp(zero, LoadU(d, &input[2][i]), one), mul);
|
||||
auto v3 = Mul(Clamp(zero, LoadU(d, &input[3][i]), one), mul);
|
||||
StoreInterleaved4(DemoteTo(du, NearestInt(v0)),
|
||||
DemoteTo(du, NearestInt(v1)),
|
||||
DemoteTo(du, NearestInt(v2)),
|
||||
DemoteTo(du, NearestInt(v3)), du, &output[4 * i]);
|
||||
StoreInterleaved4(
|
||||
MakeUnsigned<T>(LoadU(d, &input[0][i]), xstart + i, ypos, mul),
|
||||
MakeUnsigned<T>(LoadU(d, &input[1][i]), xstart + i, ypos, mul),
|
||||
MakeUnsigned<T>(LoadU(d, &input[2][i]), xstart + i, ypos, mul),
|
||||
MakeUnsigned<T>(LoadU(d, &input[3][i]), xstart + i, ypos, mul), du,
|
||||
&output[4 * i]);
|
||||
}
|
||||
}
|
||||
msan::PoisonMemory(output + out.num_channels_ * len,
|
||||
|
2
third_party/jpeg-xl/lib/jxl/sanitizers.h
vendored
2
third_party/jpeg-xl/lib/jxl/sanitizers.h
vendored
@ -6,7 +6,6 @@
|
||||
#ifndef LIB_JXL_SANITIZERS_H_
|
||||
#define LIB_JXL_SANITIZERS_H_
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stddef.h>
|
||||
|
||||
#include "lib/jxl/base/compiler_specific.h"
|
||||
@ -14,6 +13,7 @@
|
||||
#include "lib/jxl/image.h"
|
||||
|
||||
#if JXL_MEMORY_SANITIZER
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include <algorithm>
|
||||
|
1
third_party/jpeg-xl/lib/jxl/splines.cc
vendored
1
third_party/jpeg-xl/lib/jxl/splines.cc
vendored
@ -6,6 +6,7 @@
|
||||
#include "lib/jxl/splines.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cinttypes>
|
||||
#include <cmath>
|
||||
#include <limits>
|
||||
|
||||
|
5
third_party/jpeg-xl/lib/jxl/test_utils.cc
vendored
5
third_party/jpeg-xl/lib/jxl/test_utils.cc
vendored
@ -7,6 +7,7 @@
|
||||
|
||||
#include <jxl/cms.h>
|
||||
#include <jxl/cms_interface.h>
|
||||
#include <jxl/types.h>
|
||||
|
||||
#include <cstddef>
|
||||
#include <fstream>
|
||||
@ -487,6 +488,10 @@ size_t ComparePixels(const uint8_t* a, const uint8_t* b, size_t xsize,
|
||||
// TODO(lode): Set the required precision back to 11 bits when possible.
|
||||
precision = 0.5 * threshold_multiplier / ((1ull << (bits - 1)) - 1ull);
|
||||
}
|
||||
if (format_b.data_type == JXL_TYPE_UINT8) {
|
||||
// Increase the threshold by the maximum difference introduced by dithering.
|
||||
precision += 63.0 / 128.0;
|
||||
}
|
||||
size_t numdiff = 0;
|
||||
for (size_t y = 0; y < ysize; y++) {
|
||||
for (size_t x = 0; x < xsize; x++) {
|
||||
|
2
third_party/jpeg-xl/lib/jxl_lists.bzl
vendored
2
third_party/jpeg-xl/lib/jxl_lists.bzl
vendored
@ -14,6 +14,7 @@ libjxl_base_sources = [
|
||||
"jxl/base/common.h",
|
||||
"jxl/base/compiler_specific.h",
|
||||
"jxl/base/data_parallel.h",
|
||||
"jxl/base/exif.h",
|
||||
"jxl/base/fast_math-inl.h",
|
||||
"jxl/base/float.h",
|
||||
"jxl/base/iaca.h",
|
||||
@ -185,7 +186,6 @@ libjxl_dec_sources = [
|
||||
"jxl/entropy_coder.h",
|
||||
"jxl/epf.cc",
|
||||
"jxl/epf.h",
|
||||
"jxl/exif.h",
|
||||
"jxl/fast_dct-inl.h",
|
||||
"jxl/fast_dct.cc",
|
||||
"jxl/fast_dct.h",
|
||||
|
2
third_party/jpeg-xl/lib/jxl_lists.cmake
vendored
2
third_party/jpeg-xl/lib/jxl_lists.cmake
vendored
@ -14,6 +14,7 @@ set(JPEGXL_INTERNAL_BASE_SOURCES
|
||||
jxl/base/common.h
|
||||
jxl/base/compiler_specific.h
|
||||
jxl/base/data_parallel.h
|
||||
jxl/base/exif.h
|
||||
jxl/base/fast_math-inl.h
|
||||
jxl/base/float.h
|
||||
jxl/base/iaca.h
|
||||
@ -185,7 +186,6 @@ set(JPEGXL_INTERNAL_DEC_SOURCES
|
||||
jxl/entropy_coder.h
|
||||
jxl/epf.cc
|
||||
jxl/epf.h
|
||||
jxl/exif.h
|
||||
jxl/fast_dct-inl.h
|
||||
jxl/fast_dct.cc
|
||||
jxl/fast_dct.h
|
||||
|
Loading…
x
Reference in New Issue
Block a user