Bug 1873510 - Update libjxl to b26041c708d523ac53bb7d95d4f5c4a5d3b1ce30 r=saschanaz

Differential Revision: https://phabricator.services.mozilla.com/D197950
This commit is contained in:
Updatebot 2024-01-09 08:26:00 +00:00
parent a1a2edc0b9
commit c02f0ef1c5
61 changed files with 805 additions and 564 deletions

View File

@ -10,9 +10,9 @@ origin:
url: https://github.com/libjxl/libjxl
release: 1d1006ce9dbe4224c1299992b57f28a4f62df9b7 (2023-12-29T13:36:27Z).
release: b26041c708d523ac53bb7d95d4f5c4a5d3b1ce30 (2024-01-08T13:55:50Z).
revision: 1d1006ce9dbe4224c1299992b57f28a4f62df9b7
revision: b26041c708d523ac53bb7d95d4f5c4a5d3b1ce30
license: Apache-2.0

View File

@ -20,3 +20,4 @@ filegroup(
"testdata/position_encoding/*.txt",
]),
)

View File

@ -13,6 +13,7 @@ MYDIR=$(dirname $(realpath "$0"))
# Git revisions we use for the given submodules. Update these whenever you
# update a git submodule.
TESTDATA="873045a9c42ed60721756e26e2a6b32e17415205"
THIRD_PARTY_BROTLI="36533a866ed1ca4b75cf049f4521e4ec5fe24727"
THIRD_PARTY_HIGHWAY="ba0900a4957b929390ab73827235557959234fea"
THIRD_PARTY_SKCMS="42030a771244ba67f86b1c1c76a6493f873c5f91"
@ -79,6 +80,7 @@ EOF
fi
# Sources downloaded from a tarball.
download_github testdata libjxl/testdata
download_github third_party/brotli google/brotli
download_github third_party/highway google/highway
download_github third_party/sjpeg webmproject/sjpeg

View File

@ -133,8 +133,8 @@ Status Encode(const CodecInOut& io, const ColorEncoding& c_desired,
size_t bits_per_sample, const std::string& pathname,
std::vector<uint8_t>* bytes, ThreadPool* pool) {
std::string extension;
const extras::Codec codec = extras::CodecFromPath(
pathname, &bits_per_sample, /* filename */ nullptr, &extension);
const extras::Codec codec =
extras::CodecFromPath(pathname, &bits_per_sample, &extension);
// Warn about incorrect usage of PGM/PGX/PPM - only the latter supports
// color, but CodecFromPath lumps them all together.

View File

@ -22,41 +22,30 @@ namespace {
// Any valid encoding is larger (ensures codecs can read the first few bytes)
constexpr size_t kMinBytes = 9;
void BasenameAndExtension(const std::string& path, std::string* filename,
std::string* extension) {
// Pattern: "png:name" or "png:-"
size_t pos = path.find_first_of(':');
if (pos != std::string::npos) {
*extension = "." + path.substr(0, pos);
*filename = path.substr(pos + 1);
//+ ((path.length() == pos + 2 && path.substr(pos + 1, 1) == "-") ? "" :
//*extension);
return;
}
std::string GetExtension(const std::string& path) {
// Pattern: "name.png"
pos = path.find_last_of('.');
size_t pos = path.find_last_of('.');
if (pos != std::string::npos) {
*extension = path.substr(pos);
*filename = path;
return;
return path.substr(pos);
}
// Extension not found
*filename = path;
*extension = "";
return "";
}
} // namespace
Codec CodecFromPath(std::string path, size_t* JXL_RESTRICT bits_per_sample,
std::string* filename, std::string* extension) {
std::string* extension) {
std::string base;
std::string ext;
BasenameAndExtension(path, &base, &ext);
if (filename) *filename = base;
if (extension) *extension = ext;
std::string ext = GetExtension(path);
if (extension) {
if (extension->empty()) {
*extension = ext;
} else {
ext = *extension;
}
}
std::transform(ext.begin(), ext.end(), ext.begin(), [](char c) {
return std::tolower(c, std::locale::classic());
});

View File

@ -42,7 +42,6 @@ bool CanDecode(Codec codec);
// that Encode() would encode to PFM instead of PPM.
Codec CodecFromPath(std::string path,
size_t* JXL_RESTRICT bits_per_sample = nullptr,
std::string* filename = nullptr,
std::string* extension = nullptr);
// Decodes "bytes" info *ppf.

View File

@ -10,10 +10,12 @@
#include <jxl/decode_cxx.h>
#include <jxl/types.h>
#include <cinttypes>
#include "lib/extras/common.h"
#include "lib/extras/dec/color_description.h"
#include "lib/jxl/base/exif.h"
#include "lib/jxl/base/printf_macros.h"
#include "lib/jxl/exif.h"
namespace jxl {
namespace extras {

View File

@ -8,7 +8,7 @@
#include <jxl/encode.h>
#include <jxl/encode_cxx.h>
#include "lib/jxl/exif.h"
#include "lib/jxl/base/exif.h"
namespace jxl {
namespace extras {

View File

@ -824,8 +824,8 @@ struct JxlChunkedFrameInputSource {
/**
* Callback to retrieve a rectangle of color channel data at a specific
* location. It is guaranteed that xpos and ypos are multiples of 128. xsize,
* ysize will be multiples of 128, unless the resulting rectangle would be out
* location. It is guaranteed that xpos and ypos are multiples of 8. xsize,
* ysize will be multiples of 8, unless the resulting rectangle would be out
* of image bounds. Moreover, xsize and ysize will be at most 2048. The
* returned data will be assumed to be in the format returned by the
* (preceding) call to get_color_channels_pixel_format, except the `align`
@ -868,7 +868,7 @@ struct JxlChunkedFrameInputSource {
/**
* Callback to retrieve a rectangle of extra channel `ec_index` data at a
* specific location. It is guaranteed that xpos and ypos are multiples of
* 128. xsize, ysize will be multiples of 128, unless the resulting rectangle
* 8. xsize, ysize will be multiples of 8, unless the resulting rectangle
* would be out of image bounds. Moreover, xsize and ysize will be at most
* 2048. The returned data will be assumed to be in the format returned by the
* (preceding) call to get_extra_channels_pixel_format_at with the

View File

@ -11,7 +11,12 @@
#include <jxl/codestream_header.h>
#include "lib/jxl/image_metadata.h"
#include <cstddef>
#include <cstdint>
#include <vector>
#include "lib/jxl/base/byte_order.h"
#include "lib/jxl/base/compiler_specific.h"
namespace jxl {
@ -19,7 +24,7 @@ constexpr uint16_t kExifOrientationTag = 274;
// Checks if a blob looks like Exif, and if so, sets bigendian
// according to the tiff endianness
inline bool IsExif(const std::vector<uint8_t>& exif, bool* bigendian) {
JXL_INLINE bool IsExif(const std::vector<uint8_t>& exif, bool* bigendian) {
if (exif.size() < 12) return false; // not enough bytes for a valid exif blob
const uint8_t* t = exif.data();
if (LoadLE32(t) == 0x2A004D4D) {
@ -33,8 +38,8 @@ inline bool IsExif(const std::vector<uint8_t>& exif, bool* bigendian) {
}
// Finds the position of an Exif tag, or 0 if it is not found
inline size_t FindExifTagPosition(const std::vector<uint8_t>& exif,
uint16_t tagname) {
JXL_INLINE size_t FindExifTagPosition(const std::vector<uint8_t>& exif,
uint16_t tagname) {
bool bigendian;
if (!IsExif(exif, &bigendian)) return 0;
const uint8_t* t = exif.data() + 4;
@ -62,8 +67,8 @@ inline size_t FindExifTagPosition(const std::vector<uint8_t>& exif,
// Parses the Exif data just enough to extract any render-impacting info.
// If the Exif data is invalid or could not be parsed, then it is treated
// as a no-op.
inline void InterpretExif(const std::vector<uint8_t>& exif,
JxlOrientation* orientation) {
JXL_INLINE void InterpretExif(const std::vector<uint8_t>& exif,
JxlOrientation* orientation) {
bool bigendian;
if (!IsExif(exif, &bigendian)) return;
size_t o_pos = FindExifTagPosition(exif, kExifOrientationTag);

View File

@ -6,7 +6,6 @@
#ifndef LIB_JXL_CMS_COLOR_ENCODING_CMS_H_
#define LIB_JXL_CMS_COLOR_ENCODING_CMS_H_
#include <jxl/cms.h>
#include <jxl/cms_interface.h>
#include <jxl/color_encoding.h>
#include <jxl/types.h>
@ -20,7 +19,6 @@
#include "lib/jxl/base/common.h"
#include "lib/jxl/base/status.h"
#include "lib/jxl/cms/jxl_cms_internal.h"
namespace jxl {
namespace cms {
@ -521,7 +519,6 @@ struct ColorEncoding {
new_icc.data(), new_icc.size(),
&external, &new_cmyk));
cmyk = new_cmyk;
if (cmyk) return true;
JXL_RETURN_IF_ERROR(FromExternal(external));
icc = std::move(new_icc);
return true;

View File

@ -18,6 +18,7 @@
#include <string>
#include <vector>
#include "lib/jxl/base/common.h"
#include "lib/jxl/base/compiler_specific.h"
#include "lib/jxl/base/matrix_ops.h"
#include "lib/jxl/base/span.h" // Bytes
@ -1065,14 +1066,15 @@ static Status MaybeCreateProfileImpl(const JxlColorEncoding& c,
// Returns a representation of the ColorEncoding fields (not icc).
// Example description: "RGB_D65_SRG_Rel_Lin"
static std::string ColorEncodingDescription(const JxlColorEncoding& c) {
static JXL_MAYBE_UNUSED std::string ColorEncodingDescription(
const JxlColorEncoding& c) {
return detail::ColorEncodingDescriptionImpl(c);
}
// NOTE: for XYB colorspace, the created profile can be used to transform a
// *scaled* XYB image (created by ScaleXYB()) to another colorspace.
static Status MaybeCreateProfile(const JxlColorEncoding& c,
std::vector<uint8_t>* icc) {
static JXL_MAYBE_UNUSED Status MaybeCreateProfile(const JxlColorEncoding& c,
std::vector<uint8_t>* icc) {
return detail::MaybeCreateProfileImpl(c, icc);
}

View File

@ -8,8 +8,10 @@
#include <algorithm>
#include <cmath>
#include <utility>
#include "lib/jxl/base/common.h"
#include "lib/jxl/base/compiler_specific.h"
#include "lib/jxl/cms/transfer_functions.h"
namespace jxl {
@ -122,9 +124,10 @@ class HlgOOTF_Base {
const float blue_Y_;
};
static void GamutMapScalar(float* red, float* green, float* blue,
const float primaries_luminances[3],
float preserve_saturation = 0.1f) {
static JXL_MAYBE_UNUSED void GamutMapScalar(float* red, float* green,
float* blue,
const float primaries_luminances[3],
float preserve_saturation = 0.1f) {
const float luminance = primaries_luminances[0] * *red +
primaries_luminances[1] * *green +
primaries_luminances[2] * *blue;

View File

@ -11,7 +11,6 @@
#include <algorithm>
#include <cmath>
#include "lib/jxl/base/compiler_specific.h"
#include "lib/jxl/base/status.h"
namespace jxl {

View File

@ -8,7 +8,6 @@
// Metadata for color space conversions.
#include <jxl/cms.h>
#include <jxl/cms_interface.h>
#include <jxl/color_encoding.h>
#include <stddef.h>

View File

@ -76,18 +76,22 @@ void SlowSymmetric3(const ImageF& in, const Rect& rect,
const WeightsSymmetric3& weights, ThreadPool* pool,
ImageF* JXL_RESTRICT out);
void SlowSeparable5(const ImageF& in, const Rect& rect,
void SlowSeparable5(const ImageF& in, const Rect& in_rect,
const WeightsSeparable5& weights, ThreadPool* pool,
ImageF* out);
ImageF* out, const Rect& out_rect);
void SlowSeparable7(const ImageF& in, const Rect& rect,
void SlowSeparable7(const ImageF& in, const Rect& in_rect,
const WeightsSeparable7& weights, ThreadPool* pool,
ImageF* out);
ImageF* out, const Rect& out_rect);
void Symmetric3(const ImageF& in, const Rect& rect,
const WeightsSymmetric3& weights, ThreadPool* pool,
ImageF* out);
void Symmetric5(const ImageF& in, const Rect& in_rect,
const WeightsSymmetric5& weights, ThreadPool* pool,
ImageF* JXL_RESTRICT out, const Rect& out_rect);
void Symmetric5(const ImageF& in, const Rect& rect,
const WeightsSymmetric5& weights, ThreadPool* pool,
ImageF* JXL_RESTRICT out);

View File

@ -239,7 +239,7 @@ void Separable5(const ImageF& in, const Rect& rect,
return Conv::Run(in, rect, weights, pool, out);
}
return SlowSeparable5(in, rect, weights, pool, out);
return SlowSeparable5(in, rect, weights, pool, out, Rect(*out));
}
// NOLINTNEXTLINE(google-readability-namespace-comments)

View File

@ -263,7 +263,7 @@ void Separable7(const ImageF& in, const Rect& rect,
return Conv::Run(in, rect, weights, pool, out);
}
return SlowSeparable7(in, rect, weights, pool, out);
return SlowSeparable7(in, rect, weights, pool, out, Rect(*out));
}
// NOLINTNEXTLINE(google-readability-namespace-comments)

View File

@ -141,19 +141,19 @@ float SlowSeparablePixel(const ImageF& in, const Rect& rect, const int64_t x,
const int64_t y, const int64_t radius,
const float* JXL_RESTRICT horz_weights,
const float* JXL_RESTRICT vert_weights) {
const size_t xsize = rect.xsize();
const size_t ysize = rect.ysize();
const size_t xsize = in.xsize();
const size_t ysize = in.ysize();
const WrapMirror wrap;
float mul = 0.0f;
for (int dy = -radius; dy <= radius; ++dy) {
const float wy = vert_weights[std::abs(dy) * 4];
const size_t sy = wrap(y + dy, ysize);
const size_t sy = wrap(rect.y0() + y + dy, ysize);
JXL_CHECK(sy < ysize);
const float* const JXL_RESTRICT row = rect.ConstRow(in, sy);
const float* const JXL_RESTRICT row = in.ConstRow(sy);
for (int dx = -radius; dx <= radius; ++dx) {
const float wx = horz_weights[std::abs(dx) * 4];
const size_t sx = wrap(x + dx, xsize);
const size_t sx = wrap(rect.x0() + x + dx, xsize);
JXL_CHECK(sx < xsize);
mul += row[sx] * wx * wy;
}
@ -161,48 +161,44 @@ float SlowSeparablePixel(const ImageF& in, const Rect& rect, const int64_t x,
return mul;
}
} // namespace
void SlowSeparable5(const ImageF& in, const Rect& rect,
const WeightsSeparable5& weights, ThreadPool* pool,
ImageF* out) {
template <int R, typename Weights>
void SlowSeparable(const ImageF& in, const Rect& in_rect,
const Weights& weights, ThreadPool* pool, ImageF* out,
const Rect& out_rect) {
JXL_ASSERT(in_rect.xsize() == out_rect.xsize());
JXL_ASSERT(in_rect.ysize() == out_rect.ysize());
JXL_ASSERT(in_rect.IsInside(Rect(in)));
JXL_ASSERT(out_rect.IsInside(Rect(*out)));
const float* horz_weights = &weights.horz[0];
const float* vert_weights = &weights.vert[0];
const size_t ysize = rect.ysize();
const size_t ysize = in_rect.ysize();
JXL_CHECK(RunOnPool(
pool, 0, static_cast<uint32_t>(ysize), ThreadPool::NoInit,
[&](const uint32_t task, size_t /*thread*/) {
const int64_t y = task;
float* const JXL_RESTRICT row_out = out->Row(y);
for (size_t x = 0; x < rect.xsize(); ++x) {
row_out[x] = SlowSeparablePixel(in, rect, x, y, /*radius=*/2,
float* const JXL_RESTRICT row_out = out_rect.Row(out, y);
for (size_t x = 0; x < in_rect.xsize(); ++x) {
row_out[x] = SlowSeparablePixel(in, in_rect, x, y, /*radius=*/R,
horz_weights, vert_weights);
}
},
"SlowSeparable5"));
"SlowSeparable"));
}
void SlowSeparable7(const ImageF& in, const Rect& rect,
} // namespace
void SlowSeparable5(const ImageF& in, const Rect& in_rect,
const WeightsSeparable5& weights, ThreadPool* pool,
ImageF* out, const Rect& out_rect) {
SlowSeparable<2>(in, in_rect, weights, pool, out, out_rect);
}
void SlowSeparable7(const ImageF& in, const Rect& in_rect,
const WeightsSeparable7& weights, ThreadPool* pool,
ImageF* out) {
const float* horz_weights = &weights.horz[0];
const float* vert_weights = &weights.vert[0];
const size_t ysize = rect.ysize();
JXL_CHECK(RunOnPool(
pool, 0, static_cast<uint32_t>(ysize), ThreadPool::NoInit,
[&](const uint32_t task, size_t /*thread*/) {
const int64_t y = task;
float* const JXL_RESTRICT row_out = out->Row(y);
for (size_t x = 0; x < rect.xsize(); ++x) {
row_out[x] = SlowSeparablePixel(in, rect, x, y, /*radius=*/3,
horz_weights, vert_weights);
}
},
"SlowSeparable7"));
ImageF* out, const Rect& out_rect) {
SlowSeparable<3>(in, in_rect, weights, pool, out, out_rect);
}
} // namespace jxl

View File

@ -52,7 +52,7 @@ static V WeightedSum(const ImageF& in, const WrapY wrap_y, const size_t ix,
const auto in_p2 = LoadU(d, center + 2);
const auto in_m1 = LoadU(d, center - 1);
const auto in_p1 = LoadU(d, center + 1);
const auto in_00 = Load(d, center);
const auto in_00 = LoadU(d, center);
const auto sum_2 = Mul(wx2, Add(in_m2, in_p2));
const auto sum_1 = Mul(wx1, Add(in_m1, in_p1));
const auto sum_0 = Mul(wx0, in_00);
@ -61,8 +61,8 @@ static V WeightedSum(const ImageF& in, const WrapY wrap_y, const size_t ix,
// Produces result for one pixel
template <class WrapY>
float Symmetric5Border(const ImageF& in, const Rect& rect, const int64_t ix,
const int64_t iy, const WeightsSymmetric5& weights) {
float Symmetric5Border(const ImageF& in, const int64_t ix, const int64_t iy,
const WeightsSymmetric5& weights) {
const float w0 = weights.c[0];
const float w1 = weights.r[0];
const float w2 = weights.R[0];
@ -70,8 +70,8 @@ float Symmetric5Border(const ImageF& in, const Rect& rect, const int64_t ix,
const float w5 = weights.L[0];
const float w8 = weights.D[0];
const size_t xsize = rect.xsize();
const size_t ysize = rect.ysize();
const size_t xsize = in.xsize();
const size_t ysize = in.ysize();
const WrapY wrap_y;
// Unrolled loop over all 5 rows of the kernel.
float sum0 = WeightedSumBorder(in, wrap_y, ix, iy, xsize, ysize, w0, w1, w2);
@ -88,8 +88,8 @@ float Symmetric5Border(const ImageF& in, const Rect& rect, const int64_t ix,
// Produces result for one vector's worth of pixels
template <class WrapY>
static void Symmetric5Interior(const ImageF& in, const Rect& rect,
const int64_t ix, const int64_t iy,
static void Symmetric5Interior(const ImageF& in, const int64_t ix,
const int64_t rix, const int64_t iy,
const WeightsSymmetric5& weights,
float* JXL_RESTRICT row_out) {
const HWY_FULL(float) d;
@ -101,7 +101,7 @@ static void Symmetric5Interior(const ImageF& in, const Rect& rect,
const auto w5 = LoadDup128(d, weights.L);
const auto w8 = LoadDup128(d, weights.D);
const size_t ysize = rect.ysize();
const size_t ysize = in.ysize();
const WrapY wrap_y;
// Unrolled loop over all 5 rows of the kernel.
auto sum0 = WeightedSum(in, wrap_y, ix, iy, ysize, w0, w1, w2);
@ -112,7 +112,7 @@ static void Symmetric5Interior(const ImageF& in, const Rect& rect,
sum0 = Add(sum0, WeightedSum(in, wrap_y, ix, iy - 1, ysize, w1, w4, w5));
sum1 = Add(sum1, WeightedSum(in, wrap_y, ix, iy + 1, ysize, w1, w4, w5));
Store(Add(sum0, sum1), d, row_out + ix);
StoreU(Add(sum0, sum1), d, row_out + rix);
}
template <class WrapY>
@ -120,45 +120,44 @@ static void Symmetric5Row(const ImageF& in, const Rect& rect, const int64_t iy,
const WeightsSymmetric5& weights,
float* JXL_RESTRICT row_out) {
const int64_t kRadius = 2;
const size_t xsize = rect.xsize();
const size_t xend = rect.x1();
size_t ix = 0;
size_t rix = 0;
size_t ix = rect.x0();
const HWY_FULL(float) d;
const size_t N = Lanes(d);
const size_t aligned_x = RoundUpTo(kRadius, N);
for (; ix < std::min(aligned_x, xsize); ++ix) {
row_out[ix] = Symmetric5Border<WrapY>(in, rect, ix, iy, weights);
for (; ix < std::min(aligned_x, xend); ++ix, ++rix) {
row_out[rix] = Symmetric5Border<WrapY>(in, ix, iy, weights);
}
for (; ix + N + kRadius <= xsize; ix += N) {
Symmetric5Interior<WrapY>(in, rect, ix, iy, weights, row_out);
for (; ix + N + kRadius <= xend; ix += N, rix += N) {
Symmetric5Interior<WrapY>(in, ix, rix, iy, weights, row_out);
}
for (; ix < xsize; ++ix) {
row_out[ix] = Symmetric5Border<WrapY>(in, rect, ix, iy, weights);
for (; ix < xend; ++ix, ++rix) {
row_out[rix] = Symmetric5Border<WrapY>(in, ix, iy, weights);
}
}
static JXL_NOINLINE void Symmetric5BorderRow(const ImageF& in, const Rect& rect,
const int64_t iy,
const WeightsSymmetric5& weights,
float* JXL_RESTRICT row_out) {
return Symmetric5Row<WrapMirror>(in, rect, iy, weights, row_out);
}
// Semi-vectorized (interior pixels Fonly); called directly like slow::, unlike
// the fully vectorized strategies below.
void Symmetric5(const ImageF& in, const Rect& rect,
void Symmetric5(const ImageF& in, const Rect& in_rect,
const WeightsSymmetric5& weights, ThreadPool* pool,
ImageF* JXL_RESTRICT out) {
const size_t ysize = rect.ysize();
ImageF* JXL_RESTRICT out, const Rect& out_rect) {
JXL_ASSERT(in_rect.xsize() == out_rect.xsize());
JXL_ASSERT(in_rect.ysize() == out_rect.ysize());
const size_t ysize = in_rect.ysize();
JXL_CHECK(RunOnPool(
pool, 0, static_cast<uint32_t>(ysize), ThreadPool::NoInit,
[&](const uint32_t task, size_t /*thread*/) {
const int64_t iy = task;
const int64_t riy = task;
const int64_t iy = in_rect.y0() + riy;
if (iy < 2 || iy >= static_cast<ssize_t>(ysize) - 2) {
Symmetric5BorderRow(in, rect, iy, weights, out->Row(iy));
if (iy < 2 || iy >= static_cast<ssize_t>(in.ysize()) - 2) {
Symmetric5Row<WrapMirror>(in, in_rect, iy, weights,
out_rect.Row(out, riy));
} else {
Symmetric5Row<WrapUnchanged>(in, rect, iy, weights, out->Row(iy));
Symmetric5Row<WrapUnchanged>(in, in_rect, iy, weights,
out_rect.Row(out, riy));
}
},
"Symmetric5x5Convolution"));
@ -173,10 +172,17 @@ HWY_AFTER_NAMESPACE();
namespace jxl {
HWY_EXPORT(Symmetric5);
void Symmetric5(const ImageF& in, const Rect& in_rect,
const WeightsSymmetric5& weights, ThreadPool* pool,
ImageF* JXL_RESTRICT out, const Rect& out_rect) {
return HWY_DYNAMIC_DISPATCH(Symmetric5)(in, in_rect, weights, pool, out,
out_rect);
}
void Symmetric5(const ImageF& in, const Rect& rect,
const WeightsSymmetric5& weights, ThreadPool* pool,
ImageF* JXL_RESTRICT out) {
return HWY_DYNAMIC_DISPATCH(Symmetric5)(in, rect, weights, pool, out);
return Symmetric5(in, rect, weights, pool, out, Rect(*out));
}
} // namespace jxl

View File

@ -81,21 +81,57 @@ void VerifySymmetric3(const size_t xsize, const size_t ysize, ThreadPool* pool,
JXL_ASSERT_OK(VerifyRelativeError(out_expected, out_actual, 1E-5f, 1E-5f, _));
}
std::vector<Rect> GenerateTestRectangles(size_t xsize, size_t ysize) {
std::vector<Rect> out;
for (size_t tl : {0, 1, 13}) {
for (size_t br : {0, 1, 13}) {
if (xsize > tl + br && ysize > tl + br) {
out.push_back(Rect(tl, tl, xsize - tl - br, ysize - tl - br));
}
}
}
return out;
}
// Ensures Symmetric and Separable give the same result.
void VerifySymmetric5(const size_t xsize, const size_t ysize, ThreadPool* pool,
Rng* rng) {
const Rect rect(0, 0, xsize, ysize);
ImageF in(xsize, ysize);
GenerateImage(*rng, &in, 0.0f, 1.0f);
ImageF out_expected(xsize, ysize);
ImageF out_actual(xsize, ysize);
for (const Rect& in_rect : GenerateTestRectangles(xsize, ysize)) {
JXL_DEBUG(JXL_DEBUG_CONVOLVE,
"in_rect: %" PRIuS "x%" PRIuS "+%" PRIuS ",%" PRIuS "",
in_rect.xsize(), in_rect.ysize(), in_rect.x0(), in_rect.y0());
{
Rect out_rect = in_rect;
ImageF out_expected(xsize, ysize);
ImageF out_actual(xsize, ysize);
FillImage(-1.0f, &out_expected);
FillImage(-1.0f, &out_actual);
Separable5(in, Rect(in), WeightsSeparable5Lowpass(), pool, &out_expected);
Symmetric5(in, rect, WeightsSymmetric5Lowpass(), pool, &out_actual);
SlowSeparable5(in, in_rect, WeightsSeparable5Lowpass(), pool,
&out_expected, out_rect);
Symmetric5(in, in_rect, WeightsSymmetric5Lowpass(), pool, &out_actual,
out_rect);
JXL_ASSERT_OK(VerifyRelativeError(out_expected, out_actual, 1E-5f, 1E-5f, _));
JXL_ASSERT_OK(
VerifyRelativeError(out_expected, out_actual, 1E-5f, 1E-5f, _));
}
{
Rect out_rect(0, 0, in_rect.xsize(), in_rect.ysize());
ImageF out_expected(out_rect.xsize(), out_rect.ysize());
ImageF out_actual(out_rect.xsize(), out_rect.ysize());
SlowSeparable5(in, in_rect, WeightsSeparable5Lowpass(), pool,
&out_expected, out_rect);
Symmetric5(in, in_rect, WeightsSymmetric5Lowpass(), pool, &out_actual,
out_rect);
JXL_ASSERT_OK(
VerifyRelativeError(out_expected, out_actual, 1E-5f, 1E-5f, _));
}
}
}
void VerifySeparable5(const size_t xsize, const size_t ysize, ThreadPool* pool,
@ -109,8 +145,8 @@ void VerifySeparable5(const size_t xsize, const size_t ysize, ThreadPool* pool,
ImageF out_actual(xsize, ysize);
const WeightsSeparable5& weights = WeightsSeparable5Lowpass();
Separable5(in, Rect(in), weights, pool, &out_expected);
SlowSeparable5(in, rect, weights, pool, &out_actual);
SlowSeparable5(in, rect, weights, pool, &out_expected, rect);
Separable5(in, rect, weights, pool, &out_actual);
JXL_ASSERT_OK(VerifyRelativeError(out_expected, out_actual, 1E-5f, 1E-5f, _));
}
@ -131,8 +167,8 @@ void VerifySeparable7(const size_t xsize, const size_t ysize, ThreadPool* pool,
{HWY_REP4(0.383103f), HWY_REP4(0.241843f),
HWY_REP4(0.060626f), HWY_REP4(0.00598f)}};
SlowSeparable7(in, rect, weights, pool, &out_expected);
Separable7(in, Rect(in), weights, pool, &out_actual);
SlowSeparable7(in, rect, weights, pool, &out_expected, rect);
Separable7(in, rect, weights, pool, &out_actual);
JXL_ASSERT_OK(VerifyRelativeError(out_expected, out_actual, 1E-5f, 1E-5f, _));
}

View File

@ -790,8 +790,9 @@ void FindBestFirstLevelDivisionForSquare(
}
}
void ProcessRectACS(PassesEncoderState* JXL_RESTRICT enc_state,
const ACSConfig& config, const Rect& rect) {
void ProcessRectACS(const CompressParams& cparams, const ACSConfig& config,
const Rect& rect, const ColorCorrelationMap& cmap,
AcStrategyImage* ac_strategy) {
// Main philosophy here:
// 1. First find best 8x8 transform for each area.
// 2. Merging them into larger transforms where possibly, but
@ -802,9 +803,7 @@ void ProcessRectACS(PassesEncoderState* JXL_RESTRICT enc_state,
// maps happen to be at that resolution, and having
// integral transforms cross these boundaries leads to
// additional complications.
const CompressParams& cparams = enc_state->cparams;
const float butteraugli_target = cparams.butteraugli_distance;
AcStrategyImage* ac_strategy = &enc_state->shared.ac_strategy;
const size_t dct_scratch_size =
3 * (MaxVectorSize() / sizeof(float)) * AcStrategy::kMaxBlockDim;
// TODO(veluca): reuse allocations
@ -821,11 +820,9 @@ void ProcessRectACS(PassesEncoderState* JXL_RESTRICT enc_state,
size_t tx = bx / kColorTileDimInBlocks;
size_t ty = by / kColorTileDimInBlocks;
const float cmap_factors[3] = {
enc_state->shared.cmap.YtoXRatio(
enc_state->shared.cmap.ytox_map.ConstRow(ty)[tx]),
cmap.YtoXRatio(cmap.ytox_map.ConstRow(ty)[tx]),
0.0f,
enc_state->shared.cmap.YtoBRatio(
enc_state->shared.cmap.ytob_map.ConstRow(ty)[tx]),
cmap.YtoBRatio(cmap.ytob_map.ConstRow(ty)[tx]),
};
if (cparams.speed_tier > SpeedTier::kHare) return;
// First compute the best 8x8 transform for each square. Later, we do not
@ -1040,28 +1037,26 @@ HWY_AFTER_NAMESPACE();
namespace jxl {
HWY_EXPORT(ProcessRectACS);
void AcStrategyHeuristics::Init(const Image3F& src,
PassesEncoderState* enc_state) {
this->enc_state = enc_state;
config.dequant = &enc_state->shared.matrices;
const CompressParams& cparams = enc_state->cparams;
void AcStrategyHeuristics::Init(const Image3F& src, const Rect& rect_in,
const ImageF& quant_field, const ImageF& mask,
const ImageF& mask1x1,
DequantMatrices* matrices) {
config.dequant = matrices;
if (cparams.speed_tier >= SpeedTier::kCheetah) {
JXL_CHECK(enc_state->shared.matrices.EnsureComputed(1)); // DCT8 only
JXL_CHECK(matrices->EnsureComputed(1)); // DCT8 only
} else {
uint32_t acs_mask = 0;
// All transforms up to 64x64.
for (size_t i = 0; i < AcStrategy::DCT128X128; i++) {
acs_mask |= (1 << i);
}
JXL_CHECK(enc_state->shared.matrices.EnsureComputed(acs_mask));
JXL_CHECK(matrices->EnsureComputed(acs_mask));
}
// Image row pointers and strides.
config.quant_field_row = enc_state->initial_quant_field.Row(0);
config.quant_field_stride = enc_state->initial_quant_field.PixelsPerRow();
auto& mask = enc_state->initial_quant_masking;
auto& mask1x1 = enc_state->initial_quant_masking1x1;
config.quant_field_row = quant_field.Row(0);
config.quant_field_stride = quant_field.PixelsPerRow();
if (mask.xsize() > 0 && mask.ysize() > 0) {
config.masking_field_row = mask.Row(0);
config.masking_field_stride = mask.PixelsPerRow();
@ -1071,9 +1066,9 @@ void AcStrategyHeuristics::Init(const Image3F& src,
config.masking1x1_field_stride = mask1x1.PixelsPerRow();
}
config.src_rows[0] = src.ConstPlaneRow(0, 0);
config.src_rows[1] = src.ConstPlaneRow(1, 0);
config.src_rows[2] = src.ConstPlaneRow(2, 0);
config.src_rows[0] = rect_in.ConstPlaneRow(src, 0, 0);
config.src_rows[1] = rect_in.ConstPlaneRow(src, 1, 0);
config.src_rows[2] = rect_in.ConstPlaneRow(src, 2, 0);
config.src_stride = src.PixelsPerRow();
// Entropy estimate is composed of two factors:
@ -1093,25 +1088,23 @@ void AcStrategyHeuristics::Init(const Image3F& src,
config.info_loss_multiplier *= pow(ratio, kPow1);
config.zeros_mul *= pow(ratio, kPow2);
config.cost_delta *= pow(ratio, kPow3);
JXL_ASSERT(enc_state->shared.ac_strategy.xsize() ==
enc_state->shared.frame_dim.xsize_blocks);
JXL_ASSERT(enc_state->shared.ac_strategy.ysize() ==
enc_state->shared.frame_dim.ysize_blocks);
}
void AcStrategyHeuristics::ProcessRect(const Rect& rect) {
const CompressParams& cparams = enc_state->cparams;
void AcStrategyHeuristics::ProcessRect(const Rect& rect,
const ColorCorrelationMap& cmap,
AcStrategyImage* ac_strategy) {
// In Falcon mode, use DCT8 everywhere and uniform quantization.
if (cparams.speed_tier >= SpeedTier::kCheetah) {
enc_state->shared.ac_strategy.FillDCT8(rect);
ac_strategy->FillDCT8(rect);
return;
}
HWY_DYNAMIC_DISPATCH(ProcessRectACS)
(enc_state, config, rect);
(cparams, config, rect, cmap, ac_strategy);
}
void AcStrategyHeuristics::Finalize(AuxOut* aux_out) {
const auto& ac_strategy = enc_state->shared.ac_strategy;
void AcStrategyHeuristics::Finalize(const FrameDimensions& frame_dim,
const AcStrategyImage& ac_strategy,
AuxOut* aux_out) {
// Accounting and debug output.
if (aux_out != nullptr) {
aux_out->num_small_blocks =
@ -1147,10 +1140,9 @@ void AcStrategyHeuristics::Finalize(AuxOut* aux_out) {
}
// if (JXL_DEBUG_AC_STRATEGY && WantDebugOutput(aux_out)) {
if (JXL_DEBUG_AC_STRATEGY && WantDebugOutput(enc_state->cparams)) {
DumpAcStrategy(ac_strategy, enc_state->shared.frame_dim.xsize,
enc_state->shared.frame_dim.ysize, "ac_strategy", aux_out,
enc_state->cparams);
if (JXL_DEBUG_AC_STRATEGY && WantDebugOutput(cparams)) {
DumpAcStrategy(ac_strategy, frame_dim.xsize, frame_dim.ysize, "ac_strategy",
aux_out, cparams);
}
}

View File

@ -25,11 +25,11 @@ struct AuxOut;
struct ACSConfig {
const DequantMatrices* JXL_RESTRICT dequant;
float* JXL_RESTRICT quant_field_row;
const float* JXL_RESTRICT quant_field_row;
size_t quant_field_stride;
float* JXL_RESTRICT masking_field_row;
const float* JXL_RESTRICT masking_field_row;
size_t masking_field_stride;
float* JXL_RESTRICT masking1x1_field_row;
const float* JXL_RESTRICT masking1x1_field_row;
size_t masking1x1_field_stride;
const float* JXL_RESTRICT src_rows[3];
size_t src_stride;
@ -43,7 +43,7 @@ struct ACSConfig {
JXL_DASSERT(masking_field_row[by * masking_field_stride + bx] > 0);
return masking_field_row[by * masking_field_stride + bx];
}
float* MaskingPtr1x1(size_t bx, size_t by) const {
const float* MaskingPtr1x1(size_t bx, size_t by) const {
JXL_DASSERT(masking1x1_field_row[by * masking1x1_field_stride + bx] > 0);
return &masking1x1_field_row[by * masking1x1_field_stride + bx];
}
@ -54,11 +54,16 @@ struct ACSConfig {
};
struct AcStrategyHeuristics {
void Init(const Image3F& src, PassesEncoderState* enc_state);
void ProcessRect(const Rect& rect);
void Finalize(AuxOut* aux_out);
AcStrategyHeuristics(const CompressParams& cparams) : cparams(cparams) {}
void Init(const Image3F& src, const Rect& rect_in, const ImageF& quant_field,
const ImageF& mask, const ImageF& mask1x1,
DequantMatrices* matrices);
void ProcessRect(const Rect& rect, const ColorCorrelationMap& cmap,
AcStrategyImage* ac_strategy);
void Finalize(const FrameDimensions& frame_dim,
const AcStrategyImage& ac_strategy, AuxOut* aux_out);
const CompressParams& cparams;
ACSConfig config;
PassesEncoderState* enc_state;
};
} // namespace jxl

View File

@ -164,7 +164,8 @@ V SimpleGamma(const D d, V v) {
template <class D, class V>
V GammaModulation(const D d, const size_t x, const size_t y,
const ImageF& xyb_x, const ImageF& xyb_y, const V out_val) {
const ImageF& xyb_x, const ImageF& xyb_y, const Rect& rect,
const V out_val) {
const float kBias = 0.16f;
JXL_DASSERT(kBias > jxl::cms::kOpsinAbsorbanceBias[0]);
JXL_DASSERT(kBias > jxl::cms::kOpsinAbsorbanceBias[1]);
@ -173,8 +174,8 @@ V GammaModulation(const D d, const size_t x, const size_t y,
auto bias = Set(d, kBias);
auto half = Set(d, 0.5f);
for (size_t dy = 0; dy < 8; ++dy) {
const float* const JXL_RESTRICT row_in_x = xyb_x.Row(y + dy);
const float* const JXL_RESTRICT row_in_y = xyb_y.Row(y + dy);
const float* const JXL_RESTRICT row_in_x = rect.ConstRow(xyb_x, y + dy);
const float* const JXL_RESTRICT row_in_y = rect.ConstRow(xyb_y, y + dy);
for (size_t dx = 0; dx < 8; dx += Lanes(d)) {
const auto iny = Add(Load(d, row_in_y + x + dx), bias);
const auto inx = Load(d, row_in_x + x + dx);
@ -201,7 +202,7 @@ V GammaModulation(const D d, const size_t x, const size_t y,
// Change precision in 8x8 blocks that have high frequency content.
template <class D, class V>
V HfModulation(const D d, const size_t x, const size_t y, const ImageF& xyb,
const V out_val) {
const Rect& rect, const V out_val) {
// Zero out the invalid differences for the rightmost value per row.
const Rebind<uint32_t, D> du;
HWY_ALIGN constexpr uint32_t kMaskRight[kBlockDim] = {~0u, ~0u, ~0u, ~0u,
@ -212,9 +213,9 @@ V HfModulation(const D d, const size_t x, const size_t y, const ImageF& xyb,
static const float valmin = 0.020602694503245016f;
auto valminv = Set(d, valmin);
for (size_t dy = 0; dy < 8; ++dy) {
const float* JXL_RESTRICT row_in = xyb.Row(y + dy) + x;
const float* JXL_RESTRICT row_in = rect.ConstRow(xyb, y + dy) + x;
const float* JXL_RESTRICT row_in_next =
dy == 7 ? row_in : xyb.Row(y + dy + 1) + x;
dy == 7 ? row_in : rect.ConstRow(xyb, y + dy + 1) + x;
// In SCALAR, there is no guarantee of having extra row padding.
// Hence, we need to ensure we don't access pixels outside the row itself.
@ -252,11 +253,8 @@ V HfModulation(const D d, const size_t x, const size_t y, const ImageF& xyb,
void PerBlockModulations(const float butteraugli_target, const ImageF& xyb_x,
const ImageF& xyb_y, const ImageF& xyb_b,
const float scale, const Rect& rect, ImageF* out) {
JXL_ASSERT(SameSize(xyb_x, xyb_y));
JXL_ASSERT(DivCeil(xyb_x.xsize(), kBlockDim) == out->xsize());
JXL_ASSERT(DivCeil(xyb_x.ysize(), kBlockDim) == out->ysize());
const Rect& rect_in, const float scale,
const Rect& rect_out, ImageF* out) {
float base_level = 0.48f * scale;
float kDampenRampStart = 2.0f;
float kDampenRampEnd = 14.0f;
@ -270,16 +268,16 @@ void PerBlockModulations(const float butteraugli_target, const ImageF& xyb_x,
}
const float mul = scale * dampen;
const float add = (1.0f - dampen) * base_level;
for (size_t iy = rect.y0(); iy < rect.y0() + rect.ysize(); iy++) {
for (size_t iy = rect_out.y0(); iy < rect_out.y1(); iy++) {
const size_t y = iy * 8;
float* const JXL_RESTRICT row_out = out->Row(iy);
const HWY_CAPPED(float, kBlockDim) df;
for (size_t ix = rect.x0(); ix < rect.x0() + rect.xsize(); ix++) {
for (size_t ix = rect_out.x0(); ix < rect_out.x1(); ix++) {
size_t x = ix * 8;
auto out_val = Set(df, row_out[ix]);
out_val = ComputeMask(df, out_val);
out_val = HfModulation(df, x, y, xyb_y, out_val);
out_val = GammaModulation(df, x, y, xyb_x, xyb_y, out_val);
out_val = HfModulation(df, x, y, xyb_y, rect_in, out_val);
out_val = GammaModulation(df, x, y, xyb_x, xyb_y, rect_in, out_val);
// We want multiplicative quantization field, so everything
// until this point has been modulating the exponent.
row_out[ix] = FastPow2f(GetLane(out_val) * 1.442695041f) * mul + add;
@ -399,13 +397,6 @@ void FuzzyErosion(const float butteraugli_target, const Rect& from_rect,
}
struct AdaptiveQuantizationImpl {
void Init(const Image3F& xyb) {
JXL_DASSERT(xyb.xsize() % kBlockDim == 0);
JXL_DASSERT(xyb.ysize() % kBlockDim == 0);
const size_t xsize = xyb.xsize();
const size_t ysize = xyb.ysize();
aq_map = ImageF(xsize / kBlockDim, ysize / kBlockDim);
}
void PrepareBuffers(size_t num_threads) {
diff_buffer = ImageF(kEncTileDim + 8, num_threads);
for (size_t i = pre_erosion.size(); i < num_threads; i++) {
@ -415,10 +406,10 @@ struct AdaptiveQuantizationImpl {
}
void ComputeTile(float butteraugli_target, float scale, const Image3F& xyb,
const Rect& rect, const int thread, ImageF* mask,
ImageF* mask1x1) {
const size_t xsize = xyb.xsize();
const size_t ysize = xyb.ysize();
const Rect& rect_in, const Rect& rect_out, const int thread,
ImageF* mask, ImageF* mask1x1) {
const size_t xsize = rect_in.xsize();
const size_t ysize = rect_in.ysize();
// The XYB gamma is 3.0 to be able to decode faster with two muls.
// Butteraugli's gamma is matching the gamma of human eye, around 2.6.
@ -429,11 +420,11 @@ struct AdaptiveQuantizationImpl {
const HWY_FULL(float) df;
size_t y_start = rect.y0() * 8;
size_t y_end = y_start + rect.ysize() * 8;
size_t y_start = rect_out.y0() * 8;
size_t y_end = y_start + rect_out.ysize() * 8;
size_t x_start = rect.x0() * 8;
size_t x_end = x_start + rect.xsize() * 8;
size_t x_start = rect_out.x0() * 8;
size_t x_end = x_start + rect_out.xsize() * 8;
// Computes image (padded to multiple of 8x8) of local pixel differences.
// Subsample both directions by 4.
@ -441,9 +432,9 @@ struct AdaptiveQuantizationImpl {
for (size_t y = y_start; y < y_end; ++y) {
const size_t y2 = y + 1 < ysize ? y + 1 : y;
const size_t y1 = y > 0 ? y - 1 : y;
const float* row_in = xyb.PlaneRow(1, y);
const float* row_in1 = xyb.PlaneRow(1, y1);
const float* row_in2 = xyb.PlaneRow(1, y2);
const float* row_in = rect_in.ConstPlaneRow(xyb, 1, y);
const float* row_in1 = rect_in.ConstPlaneRow(xyb, 1, y1);
const float* row_in2 = rect_in.ConstPlaneRow(xyb, 1, y2);
float* mask1x1_out = mask1x1->Row(y);
auto scalar_pixel1x1 = [&](size_t x) {
const size_t x2 = x + 1 < xsize ? x + 1 : x;
@ -466,9 +457,9 @@ struct AdaptiveQuantizationImpl {
}
if (x_start != 0) x_start -= 4;
if (x_end != xyb.xsize()) x_end += 4;
if (x_end != rect_in.xsize()) x_end += 4;
if (y_start != 0) y_start -= 4;
if (y_end != xyb.ysize()) y_end += 4;
if (y_end != rect_in.ysize()) y_end += 4;
pre_erosion[thread].ShrinkTo((x_end - x_start) / 4, (y_end - y_start) / 4);
static const float limit = 0.2f;
@ -476,9 +467,9 @@ struct AdaptiveQuantizationImpl {
size_t y2 = y + 1 < ysize ? y + 1 : y;
size_t y1 = y > 0 ? y - 1 : y;
const float* row_in = xyb.PlaneRow(1, y);
const float* row_in1 = xyb.PlaneRow(1, y1);
const float* row_in2 = xyb.PlaneRow(1, y2);
const float* row_in = rect_in.ConstPlaneRow(xyb, 1, y);
const float* row_in1 = rect_in.ConstPlaneRow(xyb, 1, y1);
const float* row_in2 = rect_in.ConstPlaneRow(xyb, 1, y2);
float* JXL_RESTRICT row_out = diff_buffer.Row(thread);
auto scalar_pixel = [&](size_t x) {
@ -543,26 +534,25 @@ struct AdaptiveQuantizationImpl {
}
}
Rect from_rect(x_start % 8 == 0 ? 0 : 1, y_start % 8 == 0 ? 0 : 1,
rect.xsize() * 2, rect.ysize() * 2);
FuzzyErosion(butteraugli_target, from_rect, pre_erosion[thread], rect,
rect_out.xsize() * 2, rect_out.ysize() * 2);
FuzzyErosion(butteraugli_target, from_rect, pre_erosion[thread], rect_out,
&aq_map);
for (size_t y = 0; y < rect.ysize(); ++y) {
const float* aq_map_row = rect.ConstRow(aq_map, y);
float* mask_row = rect.Row(mask, y);
for (size_t x = 0; x < rect.xsize(); ++x) {
for (size_t y = 0; y < rect_out.ysize(); ++y) {
const float* aq_map_row = rect_out.ConstRow(aq_map, y);
float* mask_row = rect_out.Row(mask, y);
for (size_t x = 0; x < rect_out.xsize(); ++x) {
mask_row[x] = ComputeMaskForAcStrategyUse(aq_map_row[x]);
}
}
PerBlockModulations(butteraugli_target, xyb.Plane(0), xyb.Plane(1),
xyb.Plane(2), scale, rect, &aq_map);
xyb.Plane(2), rect_in, scale, rect_out, &aq_map);
}
std::vector<ImageF> pre_erosion;
ImageF aq_map;
ImageF diff_buffer;
};
static void Blur1x1Masking(const FrameDimensions& frame_dim, ThreadPool* pool,
ImageF* mask1x1) {
static void Blur1x1Masking(ThreadPool* pool, ImageF* mask1x1) {
// Blur the mask1x1 to obtain the masking image.
// Before blurring it contains an image of absolute value of the
// Laplacian of the intensity channel.
@ -588,47 +578,47 @@ static void Blur1x1Masking(const FrameDimensions& frame_dim, ThreadPool* pool,
{HWY_REP4(normalize_mul * kFilterMask1x1[1])},
{HWY_REP4(normalize_mul * kFilterMask1x1[4])},
{HWY_REP4(normalize_mul * kFilterMask1x1[3])}};
Rect from_rect(0, 0, 8 * frame_dim.xsize_blocks, 8 * frame_dim.ysize_blocks);
Rect from_rect(0, 0, mask1x1->xsize(), mask1x1->ysize());
ImageF temp(mask1x1->xsize(), mask1x1->ysize());
Symmetric5(*mask1x1, from_rect, weights, pool, &temp);
CopyImageTo(temp, mask1x1); // TODO: make it a swap
}
ImageF AdaptiveQuantizationMap(const float butteraugli_target,
const Image3F& xyb,
const FrameDimensions& frame_dim, float scale,
ThreadPool* pool, ImageF* mask,
const Image3F& xyb, const Rect& rect,
float scale, ThreadPool* pool, ImageF* mask,
ImageF* mask1x1) {
JXL_DASSERT(rect.xsize() % kBlockDim == 0);
JXL_DASSERT(rect.ysize() % kBlockDim == 0);
AdaptiveQuantizationImpl impl;
impl.Init(xyb);
*mask = ImageF(frame_dim.xsize_blocks, frame_dim.ysize_blocks);
*mask1x1 = ImageF(8 * frame_dim.xsize_blocks, 8 * frame_dim.ysize_blocks);
const size_t xsize_blocks = rect.xsize() / kBlockDim;
const size_t ysize_blocks = rect.ysize() / kBlockDim;
impl.aq_map = ImageF(xsize_blocks, ysize_blocks);
*mask = ImageF(xsize_blocks, ysize_blocks);
*mask1x1 = ImageF(rect.xsize(), rect.ysize());
JXL_CHECK(RunOnPool(
pool, 0,
DivCeil(frame_dim.xsize_blocks, kEncTileDimInBlocks) *
DivCeil(frame_dim.ysize_blocks, kEncTileDimInBlocks),
DivCeil(xsize_blocks, kEncTileDimInBlocks) *
DivCeil(ysize_blocks, kEncTileDimInBlocks),
[&](const size_t num_threads) {
impl.PrepareBuffers(num_threads);
return true;
},
[&](const uint32_t tid, const size_t thread) {
size_t n_enc_tiles =
DivCeil(frame_dim.xsize_blocks, kEncTileDimInBlocks);
size_t n_enc_tiles = DivCeil(xsize_blocks, kEncTileDimInBlocks);
size_t tx = tid % n_enc_tiles;
size_t ty = tid / n_enc_tiles;
size_t by0 = ty * kEncTileDimInBlocks;
size_t by1 =
std::min((ty + 1) * kEncTileDimInBlocks, frame_dim.ysize_blocks);
size_t by1 = std::min((ty + 1) * kEncTileDimInBlocks, ysize_blocks);
size_t bx0 = tx * kEncTileDimInBlocks;
size_t bx1 =
std::min((tx + 1) * kEncTileDimInBlocks, frame_dim.xsize_blocks);
Rect r(bx0, by0, bx1 - bx0, by1 - by0);
impl.ComputeTile(butteraugli_target, scale, xyb, r, thread, mask,
mask1x1);
size_t bx1 = std::min((tx + 1) * kEncTileDimInBlocks, xsize_blocks);
Rect rect_out(bx0, by0, bx1 - bx0, by1 - by0);
impl.ComputeTile(butteraugli_target, scale, xyb, rect, rect_out, thread,
mask, mask1x1);
},
"AQ DiffPrecompute"));
Blur1x1Masking(frame_dim, pool, mask1x1);
Blur1x1Masking(pool, mask1x1);
return std::move(impl).aq_map;
}
@ -770,8 +760,9 @@ ImageBundle RoundtripImage(const FrameHeader& frame_header,
size_t num_special_frames = enc_state->special_frames.size();
size_t num_passes = enc_state->progressive_splitter.GetNumPasses();
ModularFrameEncoder modular_frame_encoder(frame_header, enc_state->cparams);
JXL_CHECK(InitializePassesEncoder(frame_header, opsin, cms, pool, enc_state,
&modular_frame_encoder, nullptr));
JXL_CHECK(InitializePassesEncoder(frame_header, opsin, Rect(opsin), cms, pool,
enc_state, &modular_frame_encoder,
nullptr));
JXL_CHECK(dec_state->Init(frame_header));
JXL_CHECK(dec_state->InitForAC(num_passes, pool));
@ -830,7 +821,7 @@ constexpr int kMaxButteraugliIters = 4;
void FindBestQuantization(const FrameHeader& frame_header,
const Image3F& linear, const Image3F& opsin,
PassesEncoderState* enc_state,
ImageF& quant_field, PassesEncoderState* enc_state,
const JxlCmsInterface& cms, ThreadPool* pool,
AuxOut* aux_out) {
const CompressParams& cparams = enc_state->cparams;
@ -843,7 +834,6 @@ void FindBestQuantization(const FrameHeader& frame_header,
}
Quantizer& quantizer = enc_state->shared.quantizer;
ImageI& raw_quant_field = enc_state->shared.raw_quant_field;
ImageF& quant_field = enc_state->initial_quant_field;
const float butteraugli_target = cparams.butteraugli_distance;
const float original_butteraugli = cparams.original_butteraugli_distance;
@ -998,7 +988,7 @@ void FindBestQuantization(const FrameHeader& frame_header,
}
void FindBestQuantizationMaxError(const FrameHeader& frame_header,
const Image3F& opsin,
const Image3F& opsin, ImageF& quant_field,
PassesEncoderState* enc_state,
const JxlCmsInterface& cms, ThreadPool* pool,
AuxOut* aux_out) {
@ -1006,7 +996,6 @@ void FindBestQuantizationMaxError(const FrameHeader& frame_header,
const CompressParams& cparams = enc_state->cparams;
Quantizer& quantizer = enc_state->shared.quantizer;
ImageI& raw_quant_field = enc_state->shared.raw_quant_field;
ImageF& quant_field = enc_state->initial_quant_field;
// TODO(veluca): better choice of this value.
const float initial_quant_dc =
@ -1138,26 +1127,26 @@ float InitialQuantDC(float butteraugli_target) {
}
ImageF InitialQuantField(const float butteraugli_target, const Image3F& opsin,
const FrameDimensions& frame_dim, ThreadPool* pool,
float rescale, ImageF* mask, ImageF* mask1x1) {
const Rect& rect, ThreadPool* pool, float rescale,
ImageF* mask, ImageF* mask1x1) {
const float quant_ac = kAcQuant / butteraugli_target;
return HWY_DYNAMIC_DISPATCH(AdaptiveQuantizationMap)(
butteraugli_target, opsin, frame_dim, quant_ac * rescale, pool, mask,
mask1x1);
butteraugli_target, opsin, rect, quant_ac * rescale, pool, mask, mask1x1);
}
void FindBestQuantizer(const FrameHeader& frame_header, const Image3F* linear,
const Image3F& opsin, PassesEncoderState* enc_state,
const Image3F& opsin, ImageF& quant_field,
PassesEncoderState* enc_state,
const JxlCmsInterface& cms, ThreadPool* pool,
AuxOut* aux_out, double rescale) {
const CompressParams& cparams = enc_state->cparams;
if (cparams.max_error_mode) {
FindBestQuantizationMaxError(frame_header, opsin, enc_state, cms, pool,
aux_out);
FindBestQuantizationMaxError(frame_header, opsin, quant_field, enc_state,
cms, pool, aux_out);
} else if (linear && cparams.speed_tier <= SpeedTier::kKitten) {
// Normal encoding to a butteraugli score.
FindBestQuantization(frame_header, *linear, opsin, enc_state, cms, pool,
aux_out);
FindBestQuantization(frame_header, *linear, opsin, quant_field, enc_state,
cms, pool, aux_out);
}
}

View File

@ -37,8 +37,8 @@ struct AuxOut;
// fine-grained quantization should be enough. Returns a mask, too, which
// can later be used to make better decisions about ac strategy.
ImageF InitialQuantField(float butteraugli_target, const Image3F& opsin,
const FrameDimensions& frame_dim, ThreadPool* pool,
float rescale, ImageF* initial_quant_mask,
const Rect& rect, ThreadPool* pool, float rescale,
ImageF* initial_quant_mask,
ImageF* initial_quant_mask1x1);
float InitialQuantDC(float butteraugli_target);
@ -51,7 +51,8 @@ void AdjustQuantField(const AcStrategyImage& ac_strategy, const Rect& rect,
// dequant_float_map and chosen quantization levels.
// `linear` is only used in Kitten mode or slower.
void FindBestQuantizer(const FrameHeader& frame_header, const Image3F* linear,
const Image3F& opsin, PassesEncoderState* enc_state,
const Image3F& opsin, ImageF& quant_field,
PassesEncoderState* enc_state,
const JxlCmsInterface& cms, ThreadPool* pool,
AuxOut* aux_out, double rescale = 1.0);

View File

@ -40,18 +40,14 @@ using hwy::HWY_NAMESPACE::Mul;
using hwy::HWY_NAMESPACE::MulAdd;
using hwy::HWY_NAMESPACE::Sqrt;
void ProcessTile(const FrameHeader& frame_header, const Image3F& opsin,
PassesEncoderState* enc_state, const Rect& rect,
void ProcessTile(const CompressParams& cparams, const FrameHeader& frame_header,
const Image3F& opsin, const Rect& opsin_rect,
const ImageF& quant_field, const AcStrategyImage& ac_strategy,
ImageB* epf_sharpness, const Rect& rect,
ArControlFieldHeuristics::TempImages* temp_image) {
constexpr size_t N = kBlockDim;
ImageB* JXL_RESTRICT epf_sharpness = &enc_state->shared.epf_sharpness;
ImageF* JXL_RESTRICT quant = &enc_state->initial_quant_field;
JXL_ASSERT(
epf_sharpness->xsize() == enc_state->shared.frame_dim.xsize_blocks &&
epf_sharpness->ysize() == enc_state->shared.frame_dim.ysize_blocks);
if (enc_state->cparams.butteraugli_distance < kMinButteraugliForDynamicAR ||
enc_state->cparams.speed_tier > SpeedTier::kWombat ||
if (cparams.butteraugli_distance < kMinButteraugliForDynamicAR ||
cparams.speed_tier > SpeedTier::kWombat ||
frame_header.loop_filter.epf_iters == 0) {
FillPlane(static_cast<uint8_t>(4), epf_sharpness, rect);
return;
@ -76,11 +72,13 @@ void ProcessTile(const FrameHeader& frame_header, const Image3F& opsin,
// (for example 32x32 dct). This relates to transforms ability
// to propagate artefacts.
size_t y0 = by0 == 0 ? 2 : 0;
size_t y1 = by1 * N + 4 <= opsin.ysize() + 2 ? (by1 - by0) * N + 4
: opsin.ysize() + 2 - by0 * N;
size_t y1 = by1 * N + 4 <= opsin_rect.ysize() + 2
? (by1 - by0) * N + 4
: opsin_rect.ysize() + 2 - by0 * N;
size_t x0 = bx0 == 0 ? 2 : 0;
size_t x1 = bx1 * N + 4 <= opsin.xsize() + 2 ? (bx1 - bx0) * N + 4
: opsin.xsize() + 2 - bx0 * N;
size_t x1 = bx1 * N + 4 <= opsin_rect.xsize() + 2
? (bx1 - bx0) * N + 4
: opsin_rect.xsize() + 2 - bx0 * N;
HWY_FULL(float) df;
for (size_t y = y0; y < y1; y++) {
float* JXL_RESTRICT laplacian_sqrsum_row = laplacian_sqrsum.Row(y);
@ -89,14 +87,15 @@ void ProcessTile(const FrameHeader& frame_header, const Image3F& opsin,
const float* JXL_RESTRICT in_row[3];
const float* JXL_RESTRICT in_row_b[3];
for (size_t c = 0; c < 3; c++) {
in_row_t[c] = opsin.PlaneRow(c, cy > 0 ? cy - 1 : cy);
in_row[c] = opsin.PlaneRow(c, cy);
in_row_b[c] = opsin.PlaneRow(c, cy + 1 < opsin.ysize() ? cy + 1 : cy);
in_row_t[c] = opsin_rect.ConstPlaneRow(opsin, c, cy > 0 ? cy - 1 : cy);
in_row[c] = opsin_rect.ConstPlaneRow(opsin, c, cy);
in_row_b[c] = opsin_rect.ConstPlaneRow(
opsin, c, cy + 1 < opsin_rect.ysize() ? cy + 1 : cy);
}
auto compute_laplacian_scalar = [&](size_t x) {
size_t cx = x + bx0 * N - 2;
const size_t prevX = cx >= 1 ? cx - 1 : cx;
const size_t nextX = cx + 1 < opsin.xsize() ? cx + 1 : cx;
const size_t nextX = cx + 1 < opsin_rect.xsize() ? cx + 1 : cx;
float sumsqr = 0;
for (size_t c = 0; c < 3; c++) {
float laplacian =
@ -114,7 +113,8 @@ void ProcessTile(const FrameHeader& frame_header, const Image3F& opsin,
compute_laplacian_scalar(x);
}
// Interior. One extra pixel of border as the last pixel is special.
for (; x + Lanes(df) <= x1 && x + Lanes(df) + bx0 * N - 1 <= opsin.xsize();
for (; x + Lanes(df) <= x1 &&
x + Lanes(df) + bx0 * N - 1 <= opsin_rect.xsize();
x += Lanes(df)) {
size_t cx = x + bx0 * N - 2;
auto sumsqr = Zero(df);
@ -182,16 +182,16 @@ void ProcessTile(const FrameHeader& frame_header, const Image3F& opsin,
// ignore pixels outside the image.
// Y coordinates are relative to by0*8+y*4.
size_t sy = y * 4 + by0 * 8 > 0 ? 0 : 2;
size_t ey = y * 4 + by0 * 8 + 4 <= opsin.ysize() + 2
size_t ey = y * 4 + by0 * 8 + 4 <= opsin_rect.ysize() + 2
? 4
: opsin.ysize() - y * 4 - by0 * 8 + 2;
: opsin_rect.ysize() - y * 4 - by0 * 8 + 2;
for (size_t x = 0; x < (bx1 - bx0) * 2 + 1; x++) {
// ignore pixels outside the image.
// X coordinates are relative to bx0*8.
size_t sx = x * 4 + bx0 * 8 > 0 ? x * 4 : x * 4 + 2;
size_t ex = x * 4 + bx0 * 8 + 4 <= opsin.xsize() + 2
size_t ex = x * 4 + bx0 * 8 + 4 <= opsin_rect.xsize() + 2
? x * 4 + 4
: opsin.xsize() - bx0 * 8 + 2;
: opsin_rect.xsize() - bx0 * 8 + 2;
if (ex - sx == 4 && ey - sy == 4) {
auto sum = Zero(df4);
for (size_t iy = 0; iy < 4; iy++) {
@ -212,9 +212,9 @@ void ProcessTile(const FrameHeader& frame_header, const Image3F& opsin,
}
}
for (size_t by = by0; by < by1; by++) {
AcStrategyRow acs_row = enc_state->shared.ac_strategy.ConstRow(by);
AcStrategyRow acs_row = ac_strategy.ConstRow(by);
uint8_t* JXL_RESTRICT out_row = epf_sharpness->Row(by);
float* JXL_RESTRICT quant_row = quant->Row(by);
const float* JXL_RESTRICT quant_row = quant_field.Row(by);
for (size_t bx = bx0; bx < bx1; bx++) {
AcStrategy acs = acs_row[bx];
if (!acs.IsFirstBlock()) continue;
@ -311,13 +311,14 @@ HWY_AFTER_NAMESPACE();
namespace jxl {
HWY_EXPORT(ProcessTile);
void ArControlFieldHeuristics::RunRect(const FrameHeader& frame_header,
const Rect& block_rect,
const Image3F& opsin,
PassesEncoderState* enc_state,
size_t thread) {
void ArControlFieldHeuristics::RunRect(
const CompressParams& cparams, const FrameHeader& frame_header,
const Rect& block_rect, const Image3F& opsin, const Rect& opsin_rect,
const ImageF& quant_field, const AcStrategyImage& ac_strategy,
ImageB* epf_sharpness, size_t thread) {
HWY_DYNAMIC_DISPATCH(ProcessTile)
(frame_header, opsin, enc_state, block_rect, &temp_images[thread]);
(cparams, frame_header, opsin, opsin_rect, quant_field, ac_strategy,
epf_sharpness, block_rect, &temp_images[thread]);
}
} // namespace jxl

View File

@ -36,14 +36,13 @@ struct ArControlFieldHeuristics {
temp_images.resize(num_threads);
}
void RunRect(const FrameHeader& frame_header, const Rect& block_rect,
const Image3F& opsin, PassesEncoderState* enc_state,
void RunRect(const CompressParams& cparams, const FrameHeader& frame_header,
const Rect& block_rect, const Image3F& opsin,
const Rect& opsin_rect, const ImageF& quant_field,
const AcStrategyImage& ac_strategy, ImageB* epf_sharpness,
size_t thread);
std::vector<TempImages> temp_images;
ImageB* epf_sharpness;
ImageF* quant;
bool all_default;
};
} // namespace jxl

View File

@ -5,6 +5,10 @@
#include "lib/jxl/enc_aux_out.h"
#ifndef __STDC_FORMAT_MACROS
#define __STDC_FORMAT_MACROS
#endif
#include <inttypes.h>
#include <stddef.h>
#include <stdint.h>

View File

@ -35,8 +35,9 @@
namespace jxl {
Status InitializePassesEncoder(const FrameHeader& frame_header,
const Image3F& opsin, const JxlCmsInterface& cms,
ThreadPool* pool, PassesEncoderState* enc_state,
const Image3F& opsin, const Rect& rect,
const JxlCmsInterface& cms, ThreadPool* pool,
PassesEncoderState* enc_state,
ModularFrameEncoder* modular_frame_encoder,
AuxOut* aux_out) {
PassesSharedState& JXL_RESTRICT shared = enc_state->shared;
@ -68,7 +69,7 @@ Status InitializePassesEncoder(const FrameHeader& frame_header,
JXL_RETURN_IF_ERROR(RunOnPool(
pool, 0, shared.frame_dim.num_groups, ThreadPool::NoInit,
[&](size_t group_idx, size_t _) {
ComputeCoefficients(group_idx, enc_state, opsin, &dc);
ComputeCoefficients(group_idx, enc_state, opsin, rect, &dc);
},
"Compute coeffs"));

View File

@ -40,10 +40,6 @@ struct PassesEncoderState {
bool initialize_global_state = true;
size_t dc_group_index = 0;
ImageF initial_quant_field; // Invalid in Falcon mode.
ImageF initial_quant_masking; // Invalid in Falcon mode.
ImageF initial_quant_masking1x1; // Invalid in Falcon mode.
// Per-pass DCT coefficients for the image. One row per group.
std::vector<std::unique_ptr<ACImage>> coeffs;
@ -77,8 +73,8 @@ struct PassesEncoderState {
// Initialize per-frame information.
class ModularFrameEncoder;
Status InitializePassesEncoder(const FrameHeader& frame_header,
const Image3F& opsin, const JxlCmsInterface& cms,
ThreadPool* pool,
const Image3F& opsin, const Rect& rect,
const JxlCmsInterface& cms, ThreadPool* pool,
PassesEncoderState* passes_enc_state,
ModularFrameEncoder* modular_frame_encoder,
AuxOut* aux_out);

View File

@ -205,22 +205,23 @@ void ComputeDC(const ImageF& dc_values, bool fast, int32_t* dc_x,
jxl::cms::kYToBRatio, kDistanceMultiplierDC, fast);
}
void ComputeTile(const Image3F& opsin, const DequantMatrices& dequant,
void ComputeTile(const Image3F& opsin, const Rect& opsin_rect,
const DequantMatrices& dequant,
const AcStrategyImage* ac_strategy,
const ImageI* raw_quant_field, const Quantizer* quantizer,
const Rect& r, bool fast, bool use_dct8, ImageSB* map_x,
const Rect& rect, bool fast, bool use_dct8, ImageSB* map_x,
ImageSB* map_b, ImageF* dc_values, float* mem) {
static_assert(kEncTileDimInBlocks == kColorTileDimInBlocks,
"Invalid color tile dim");
size_t xsize_blocks = opsin.xsize() / kBlockDim;
size_t xsize_blocks = opsin_rect.xsize() / kBlockDim;
constexpr float kDistanceMultiplierAC = 1e-9f;
const size_t dct_scratch_size =
3 * (MaxVectorSize() / sizeof(float)) * AcStrategy::kMaxBlockDim;
const size_t y0 = r.y0();
const size_t x0 = r.x0();
const size_t x1 = r.x0() + r.xsize();
const size_t y1 = r.y0() + r.ysize();
const size_t y0 = rect.y0();
const size_t x0 = rect.x0();
const size_t x1 = rect.x0() + rect.xsize();
const size_t y1 = rect.y0() + rect.ysize();
int ty = y0 / kColorTileDimInBlocks;
int tx = x0 / kColorTileDimInBlocks;
@ -257,9 +258,12 @@ void ComputeTile(const Image3F& opsin, const DequantMatrices& dequant,
size_t num_ac = 0;
for (size_t y = y0; y < y1; ++y) {
const float* JXL_RESTRICT row_y = opsin.ConstPlaneRow(1, y * kBlockDim);
const float* JXL_RESTRICT row_x = opsin.ConstPlaneRow(0, y * kBlockDim);
const float* JXL_RESTRICT row_b = opsin.ConstPlaneRow(2, y * kBlockDim);
const float* JXL_RESTRICT row_y =
opsin_rect.ConstPlaneRow(opsin, 1, y * kBlockDim);
const float* JXL_RESTRICT row_x =
opsin_rect.ConstPlaneRow(opsin, 0, y * kBlockDim);
const float* JXL_RESTRICT row_b =
opsin_rect.ConstPlaneRow(opsin, 2, y * kBlockDim);
size_t stride = opsin.PixelsPerRow();
for (size_t x = x0; x < x1; x++) {
@ -362,14 +366,15 @@ HWY_EXPORT(InitDCStorage);
HWY_EXPORT(ComputeDC);
HWY_EXPORT(ComputeTile);
void CfLHeuristics::Init(const Image3F& opsin) {
size_t xsize_blocks = opsin.xsize() / kBlockDim;
size_t ysize_blocks = opsin.ysize() / kBlockDim;
void CfLHeuristics::Init(const Rect& rect) {
size_t xsize_blocks = rect.xsize() / kBlockDim;
size_t ysize_blocks = rect.ysize() / kBlockDim;
HWY_DYNAMIC_DISPATCH(InitDCStorage)
(xsize_blocks * ysize_blocks, &dc_values);
}
void CfLHeuristics::ComputeTile(const Rect& r, const Image3F& opsin,
const Rect& opsin_rect,
const DequantMatrices& dequant,
const AcStrategyImage* ac_strategy,
const ImageI* raw_quant_field,
@ -377,8 +382,8 @@ void CfLHeuristics::ComputeTile(const Rect& r, const Image3F& opsin,
size_t thread, ColorCorrelationMap* cmap) {
bool use_dct8 = ac_strategy == nullptr;
HWY_DYNAMIC_DISPATCH(ComputeTile)
(opsin, dequant, ac_strategy, raw_quant_field, quantizer, r, fast, use_dct8,
&cmap->ytox_map, &cmap->ytob_map, &dc_values,
(opsin, opsin_rect, dequant, ac_strategy, raw_quant_field, quantizer, r, fast,
use_dct8, &cmap->ytox_map, &cmap->ytob_map, &dc_values,
mem.get() + thread * ItemsPerThread());
}

View File

@ -29,13 +29,13 @@ void ColorCorrelationMapEncodeDC(const ColorCorrelationMap& map,
AuxOut* aux_out);
struct CfLHeuristics {
void Init(const Image3F& opsin);
void Init(const Rect& rect);
void PrepareForThreads(size_t num_threads) {
mem = hwy::AllocateAligned<float>(num_threads * ItemsPerThread());
}
void ComputeTile(const Rect& r, const Image3F& opsin,
void ComputeTile(const Rect& r, const Image3F& opsin, const Rect& opsin_rect,
const DequantMatrices& dequant,
const AcStrategyImage* ac_strategy,
const ImageI* raw_quant_field, const Quantizer* quantizer,

View File

@ -82,13 +82,33 @@ float HistogramDistance(const Histogram& a, const Histogram& b) {
return total_distance - a.entropy_ - b.entropy_;
}
bool HasNewSymbol(const Histogram& a, const Histogram& b) {
for (size_t i = 0; i < a.data_.size(); ++i) {
if (a.data_[i] > 0 && (i >= b.data_.size() || b.data_[i] == 0)) {
return true;
}
constexpr const float kInfinity = std::numeric_limits<float>::infinity();
float HistogramKLDivergence(const Histogram& actual, const Histogram& coding) {
if (actual.total_count_ == 0) return 0;
if (coding.total_count_ == 0) return kInfinity;
const HWY_CAPPED(float, Histogram::kRounding) df;
const HWY_CAPPED(int32_t, Histogram::kRounding) di;
const auto coding_inv = Set(df, 1.0f / coding.total_count_);
auto cost_lanes = Zero(df);
for (size_t i = 0; i < actual.data_.size(); i += Lanes(di)) {
const auto counts = LoadU(di, &actual.data_[i]);
const auto coding_counts =
coding.data_.size() > i ? LoadU(di, &coding.data_[i]) : Zero(di);
const auto coding_probs = Mul(ConvertTo(df, coding_counts), coding_inv);
const auto neg_coding_cost = BitCast(
df,
IfThenZeroElse(Eq(counts, Zero(di)),
IfThenElse(Eq(coding_counts, Zero(di)),
BitCast(di, Set(df, -kInfinity)),
BitCast(di, FastLog2f(df, coding_probs)))));
cost_lanes = NegMulAdd(ConvertTo(df, counts), neg_coding_cost, cost_lanes);
}
return false;
const float total_cost = GetLane(SumOfLanes(df, cost_lanes));
return total_cost - actual.entropy_;
}
// First step of a k-means clustering with a fancy distance metric.
@ -121,7 +141,7 @@ void FastClusterHistograms(const std::vector<Histogram>& in,
for (size_t i = 0; i < in.size(); i++) {
if (dists[i] == 0.0f) continue;
for (size_t j = 0; j < prev_histograms; ++j) {
dists[i] = std::min(HistogramDistance(in[i], (*out)[j]), dists[i]);
dists[i] = std::min(HistogramKLDivergence(in[i], (*out)[j]), dists[i]);
}
}
auto max_dist = std::max_element(dists.begin(), dists.end());
@ -149,10 +169,8 @@ void FastClusterHistograms(const std::vector<Histogram>& in,
size_t best = 0;
float best_dist = std::numeric_limits<float>::max();
for (size_t j = 0; j < out->size(); j++) {
// TODO(szabadka) Choose a different distance metric for previous
// histograms (i.e. the cost of the new histogram with the old one).
if (j < prev_histograms && HasNewSymbol(in[i], (*out)[j])) continue;
float dist = HistogramDistance(in[i], (*out)[j]);
float dist = j < prev_histograms ? HistogramKLDivergence(in[i], (*out)[j])
: HistogramDistance(in[i], (*out)[j]);
if (dist < best_dist) {
best = j;
best_dist = dist;

View File

@ -706,12 +706,12 @@ void JxlFastLosslessPrepareHeader(JxlFastLosslessFrameState* frame,
if (!is_last) {
output->Write(2, 0b00); // can not be saved as reference
}
output->Write(2, 0b00); // a frame has no name
output->Write(1, 0); // loop filter is not all_default
output->Write(1, 0); // no gaborish
output->Write(2, 0); // 0 EPF iters
output->Write(2, 0b00); // No LF extensions
output->Write(2, 0b00); // No FH extensions
output->Write(2, 0b00); // a frame has no name
output->Write(1, 0); // loop filter is not all_default
output->Write(1, 0); // no gaborish
output->Write(2, 0); // 0 EPF iters
output->Write(2, 0b00); // No LF extensions
output->Write(2, 0b00); // No FH extensions
output->Write(1, 0); // No TOC permutation
output->ZeroPadToByte(); // TOC is byte-aligned.
@ -3632,13 +3632,11 @@ bool detect_palette(const unsigned char* r, size_t width,
return collided;
}
// TODO(szabadka): Add some parameter to indicate whether the input is
// truly streaming.
template <typename BitDepth>
JxlFastLosslessFrameState* LLPrepare(JxlChunkedFrameInputSource input,
size_t width, size_t height,
BitDepth bitdepth, size_t nb_chans,
bool big_endian, int effort) {
bool big_endian, int effort, int oneshot) {
assert(width != 0);
assert(height != 0);
@ -3647,7 +3645,7 @@ JxlFastLosslessFrameState* LLPrepare(JxlChunkedFrameInputSource input,
std::vector<int16_t> lookup(kHashSize);
lookup[0] = 0;
int pcolors = 0;
bool collided = effort < 2 || bitdepth.bitdepth != 8;
bool collided = effort < 2 || bitdepth.bitdepth != 8 || !oneshot;
for (size_t y0 = 0; y0 < height && !collided; y0 += 256) {
size_t ys = std::min<size_t>(height - y0, 256);
for (size_t x0 = 0; x0 < width && !collided; x0 += 256) {
@ -3728,11 +3726,7 @@ JxlFastLosslessFrameState* LLPrepare(JxlChunkedFrameInputSource input,
bool onegroup = num_groups_x == 1 && num_groups_y == 1;
// sample the middle (effort * 2) rows of every group
// TODO(szabadka): Optimize sampling rule for low-memory code-path.
for (size_t g = 0; g < num_groups_y * num_groups_x; g++) {
size_t xg = g % num_groups_x;
size_t yg = g / num_groups_x;
auto sample_rows = [&](size_t xg, size_t yg, size_t num_rows) {
size_t y0 = yg * 256;
size_t x0 = xg * 256;
size_t ys = std::min<size_t>(height - y0, 256);
@ -3742,12 +3736,31 @@ JxlFastLosslessFrameState* LLPrepare(JxlChunkedFrameInputSource input,
input.get_color_channel_data_at(input.opaque, x0, y0, xs, ys, &stride);
auto rgba = reinterpret_cast<const unsigned char*>(buffer);
int y_begin = std::max<int>(0, ys - 2 * effort) / 2;
int y_count = std::min<int>(2 * effort * ys / 256, y0 + ys - y_begin - 1);
int y_count = std::min<int>(num_rows, y0 + ys - y_begin - 1);
int x_max = xs / kChunkSize * kChunkSize;
CollectSamples(rgba, 0, y_begin, x_max, stride, y_count, raw_counts,
lz77_counts, onegroup, !collided, bitdepth, nb_chans,
big_endian, lookup.data());
input.release_buffer(input.opaque, buffer);
};
// TODO(veluca): that `64` is an arbitrary constant, meant to correspond to
// the point where the number of processed rows is large enough that loading
// the entire image is cost-effective.
if (oneshot || effort >= 64) {
for (size_t g = 0; g < num_groups_y * num_groups_x; g++) {
size_t xg = g % num_groups_x;
size_t yg = g / num_groups_x;
size_t y0 = yg * 256;
size_t ys = std::min<size_t>(height - y0, 256);
size_t num_rows = 2 * effort * ys / 256;
sample_rows(xg, yg, num_rows);
}
} else {
// sample the middle (effort * 2 * num_groups) rows of the center group
// (possibly all of them).
sample_rows((num_groups_x - 1) / 2, (num_groups_y - 1) / 2,
2 * effort * num_groups_x * num_groups_y);
}
// TODO(veluca): can probably improve this and make it bitdepth-dependent.
@ -3946,24 +3959,25 @@ void LLProcess(JxlFastLosslessFrameState* frame_state, bool is_last,
JxlFastLosslessFrameState* JxlFastLosslessPrepareImpl(
JxlChunkedFrameInputSource input, size_t width, size_t height,
size_t nb_chans, size_t bitdepth, bool big_endian, int effort) {
size_t nb_chans, size_t bitdepth, bool big_endian, int effort,
int oneshot) {
assert(bitdepth > 0);
assert(nb_chans <= 4);
assert(nb_chans != 0);
if (bitdepth <= 8) {
return LLPrepare(input, width, height, UpTo8Bits(bitdepth), nb_chans,
big_endian, effort);
big_endian, effort, oneshot);
}
if (bitdepth <= 13) {
return LLPrepare(input, width, height, From9To13Bits(bitdepth), nb_chans,
big_endian, effort);
big_endian, effort, oneshot);
}
if (bitdepth == 14) {
return LLPrepare(input, width, height, Exactly14Bits(bitdepth), nb_chans,
big_endian, effort);
big_endian, effort, oneshot);
}
return LLPrepare(input, width, height, MoreThan14Bits(bitdepth), nb_chans,
big_endian, effort);
big_endian, effort, oneshot);
}
void JxlFastLosslessProcessFrameImpl(
@ -4074,10 +4088,8 @@ class FJxlFrameInput {
bytes_per_pixel_(bitdepth <= 8 ? nb_chans : 2 * nb_chans) {}
JxlChunkedFrameInputSource GetInputSource() {
return JxlChunkedFrameInputSource{
this,
GetDataAt,
[](void*, const void*) {}};
return JxlChunkedFrameInputSource{this, GetDataAt,
[](void*, const void*) {}};
}
private:
@ -4099,9 +4111,9 @@ size_t JxlFastLosslessEncode(const unsigned char* rgba, size_t width,
unsigned char** output, void* runner_opaque,
FJxlParallelRunner runner) {
FJxlFrameInput input(rgba, row_stride, nb_chans, bitdepth);
auto frame_state =
JxlFastLosslessPrepareFrame(input.GetInputSource(), width, height,
nb_chans, bitdepth, big_endian, effort);
auto frame_state = JxlFastLosslessPrepareFrame(
input.GetInputSource(), width, height, nb_chans, bitdepth, big_endian,
effort, /*oneshot=*/true);
JxlFastLosslessProcessFrame(frame_state, /*is_last=*/true, runner_opaque,
runner, nullptr);
JxlFastLosslessPrepareHeader(frame_state, /*add_image_header=*/1,
@ -4121,25 +4133,25 @@ size_t JxlFastLosslessEncode(const unsigned char* rgba, size_t width,
JxlFastLosslessFrameState* JxlFastLosslessPrepareFrame(
JxlChunkedFrameInputSource input, size_t width, size_t height,
size_t nb_chans, size_t bitdepth, int big_endian, int effort) {
size_t nb_chans, size_t bitdepth, int big_endian, int effort, int oneshot) {
#if FJXL_ENABLE_AVX512
if (__builtin_cpu_supports("avx512cd") &&
__builtin_cpu_supports("avx512vbmi") &&
__builtin_cpu_supports("avx512bw") && __builtin_cpu_supports("avx512f") &&
__builtin_cpu_supports("avx512vl")) {
return AVX512::JxlFastLosslessPrepareImpl(input, width, height, nb_chans,
bitdepth, big_endian, effort);
return AVX512::JxlFastLosslessPrepareImpl(
input, width, height, nb_chans, bitdepth, big_endian, effort, oneshot);
}
#endif
#if FJXL_ENABLE_AVX2
if (__builtin_cpu_supports("avx2")) {
return AVX2::JxlFastLosslessPrepareImpl(input, width, height, nb_chans,
bitdepth, big_endian, effort);
return AVX2::JxlFastLosslessPrepareImpl(
input, width, height, nb_chans, bitdepth, big_endian, effort, oneshot);
}
#endif
return default_implementation::JxlFastLosslessPrepareImpl(
input, width, height, nb_chans, bitdepth, big_endian, effort);
input, width, height, nb_chans, bitdepth, big_endian, effort, oneshot);
}
void JxlFastLosslessProcessFrame(

View File

@ -69,7 +69,7 @@ struct JxlFastLosslessFrameState;
// JxlFastLosslessFreeFrameState.
JxlFastLosslessFrameState* JxlFastLosslessPrepareFrame(
JxlChunkedFrameInputSource input, size_t width, size_t height,
size_t nb_chans, size_t bitdepth, int big_endian, int effort);
size_t nb_chans, size_t bitdepth, int big_endian, int effort, int oneshot);
#if !FJXL_STANDALONE
class JxlEncoderOutputProcessorWrapper;

View File

@ -5,6 +5,8 @@
#include "lib/jxl/enc_fields.h"
#include <cinttypes>
#include "lib/jxl/enc_aux_out.h"
#include "lib/jxl/fields.h"

View File

@ -121,8 +121,7 @@ uint32_t GetBitDepth(JxlBitDepth bit_depth, const T& metadata,
}
}
Status CopyColorChannels(JxlChunkedFrameInputSource input, size_t x0, size_t y0,
size_t xsize, size_t ysize,
Status CopyColorChannels(JxlChunkedFrameInputSource input, Rect rect,
const FrameInfo& frame_info,
const ImageMetadata& metadata, ThreadPool* pool,
Image3F* color, ImageF* alpha,
@ -133,7 +132,8 @@ Status CopyColorChannels(JxlChunkedFrameInputSource input, size_t x0, size_t y0,
size_t bits_per_sample =
GetBitDepth(frame_info.image_bit_depth, metadata, format);
size_t row_offset;
auto buffer = GetColorBuffer(input, x0, y0, xsize, ysize, &row_offset);
auto buffer = GetColorBuffer(input, rect.x0(), rect.y0(), rect.xsize(),
rect.ysize(), &row_offset);
if (!buffer) {
return JXL_FAILURE("no buffer for color channels given");
}
@ -145,12 +145,12 @@ Status CopyColorChannels(JxlChunkedFrameInputSource input, size_t x0, size_t y0,
" color channels, received only %u channels",
color_channels, format.num_channels);
}
*color = Image3F(xsize, ysize);
*color = Image3F(rect.xsize(), rect.ysize());
const uint8_t* data = reinterpret_cast<const uint8_t*>(buffer.get());
for (size_t c = 0; c < color_channels; ++c) {
JXL_RETURN_IF_ERROR(ConvertFromExternalNoSizeCheck(
data, xsize, ysize, row_offset, bits_per_sample, format, c, pool,
&color->Plane(c)));
data, rect.xsize(), rect.ysize(), row_offset, bits_per_sample, format,
c, pool, &color->Plane(c)));
}
if (color_channels == 1) {
CopyImageTo(color->Plane(0), &color->Plane(1));
@ -159,7 +159,7 @@ Status CopyColorChannels(JxlChunkedFrameInputSource input, size_t x0, size_t y0,
if (alpha) {
if (*has_interleaved_alpha) {
JXL_RETURN_IF_ERROR(ConvertFromExternalNoSizeCheck(
data, xsize, ysize, row_offset, bits_per_sample, format,
data, rect.xsize(), rect.ysize(), row_offset, bits_per_sample, format,
format.num_channels - 1, pool, alpha));
} else {
// if alpha is not passed, but it is expected, then assume
@ -170,8 +170,7 @@ Status CopyColorChannels(JxlChunkedFrameInputSource input, size_t x0, size_t y0,
return true;
}
Status CopyExtraChannels(JxlChunkedFrameInputSource input, size_t x0, size_t y0,
size_t xsize, size_t ysize,
Status CopyExtraChannels(JxlChunkedFrameInputSource input, Rect rect,
const FrameInfo& frame_info,
const ImageMetadata& metadata,
bool has_interleaved_alpha, ThreadPool* pool,
@ -189,15 +188,16 @@ Status CopyExtraChannels(JxlChunkedFrameInputSource input, size_t x0, size_t y0,
ec_format.num_channels = 1;
size_t row_offset;
auto buffer =
GetExtraChannelBuffer(input, ec, x0, y0, xsize, ysize, &row_offset);
GetExtraChannelBuffer(input, ec, rect.x0(), rect.y0(), rect.xsize(),
rect.ysize(), &row_offset);
if (!buffer) {
return JXL_FAILURE("no buffer for extra channel given");
}
size_t bits_per_sample = GetBitDepth(
frame_info.image_bit_depth, metadata.extra_channel_info[ec], ec_format);
if (!ConvertFromExternalNoSizeCheck(
reinterpret_cast<const uint8_t*>(buffer.get()), xsize, ysize,
row_offset, bits_per_sample, ec_format, 0, pool,
reinterpret_cast<const uint8_t*>(buffer.get()), rect.xsize(),
rect.ysize(), row_offset, bits_per_sample, ec_format, 0, pool,
&(*extra_channels)[ec])) {
return JXL_FAILURE("Failed to set buffer for extra channel");
}
@ -258,12 +258,10 @@ Status LoopFilterFromParams(const CompressParams& cparams, bool streaming_mode,
LoopFilter* loop_filter = &frame_header->loop_filter;
// Gaborish defaults to enabled in Hare or slower.
// TODO(szabadka) Re-enable Gaborish for streaming mode.
loop_filter->gab =
ApplyOverride(cparams.gaborish,
!streaming_mode && cparams.speed_tier <= SpeedTier::kHare &&
frame_header->encoding == FrameEncoding::kVarDCT &&
cparams.decoding_speed_tier < 4);
loop_filter->gab = ApplyOverride(
cparams.gaborish, cparams.speed_tier <= SpeedTier::kHare &&
frame_header->encoding == FrameEncoding::kVarDCT &&
cparams.decoding_speed_tier < 4);
if (cparams.epf != -1) {
loop_filter->epf_iters = cparams.epf;
@ -540,14 +538,14 @@ struct PixelStatsForChromacityAdjustment {
float dx = 0;
float db = 0;
float exposed_blue = 0;
float CalcPlane(const ImageF* JXL_RESTRICT plane) const {
float CalcPlane(const ImageF* JXL_RESTRICT plane, const Rect& rect) const {
float xmax = 0;
float ymax = 0;
for (size_t ty = 1; ty < plane->ysize(); ++ty) {
for (size_t tx = 1; tx < plane->xsize(); ++tx) {
float cur = plane->Row(ty)[tx];
float prev_row = plane->Row(ty - 1)[tx];
float prev = plane->Row(ty)[tx - 1];
for (size_t ty = 1; ty < rect.ysize(); ++ty) {
for (size_t tx = 1; tx < rect.xsize(); ++tx) {
float cur = rect.Row(plane, ty)[tx];
float prev_row = rect.Row(plane, ty - 1)[tx];
float prev = rect.Row(plane, ty)[tx - 1];
xmax = std::max(xmax, std::abs(cur - prev));
ymax = std::max(ymax, std::abs(cur - prev_row));
}
@ -555,20 +553,20 @@ struct PixelStatsForChromacityAdjustment {
return std::max(xmax, ymax);
}
void CalcExposedBlue(const ImageF* JXL_RESTRICT plane_y,
const ImageF* JXL_RESTRICT plane_b) {
const ImageF* JXL_RESTRICT plane_b, const Rect& rect) {
float eb = 0;
float xmax = 0;
float ymax = 0;
for (size_t ty = 1; ty < plane_y->ysize(); ++ty) {
for (size_t tx = 1; tx < plane_y->xsize(); ++tx) {
float cur_y = plane_y->Row(ty)[tx];
float cur_b = plane_b->Row(ty)[tx];
for (size_t ty = 1; ty < rect.ysize(); ++ty) {
for (size_t tx = 1; tx < rect.xsize(); ++tx) {
float cur_y = rect.Row(plane_y, ty)[tx];
float cur_b = rect.Row(plane_b, ty)[tx];
float exposed_b = cur_b - cur_y * 1.2;
float diff_b = cur_b - cur_y;
float prev_row = plane_b->Row(ty - 1)[tx];
float prev = plane_b->Row(ty)[tx - 1];
float diff_prev_row = prev_row - plane_y->Row(ty - 1)[tx];
float diff_prev = prev - plane_y->Row(ty)[tx - 1];
float prev_row = rect.Row(plane_b, ty - 1)[tx];
float prev = rect.Row(plane_b, ty)[tx - 1];
float diff_prev_row = prev_row - rect.Row(plane_y, ty - 1)[tx];
float diff_prev = prev - rect.Row(plane_y, ty)[tx - 1];
xmax = std::max(xmax, std::abs(diff_b - diff_prev));
ymax = std::max(ymax, std::abs(diff_b - diff_prev_row));
if (exposed_b >= 0) {
@ -580,9 +578,9 @@ struct PixelStatsForChromacityAdjustment {
exposed_blue = eb;
db = std::max(xmax, ymax);
}
void Calc(const Image3F* JXL_RESTRICT opsin) {
dx = CalcPlane(&opsin->Plane(0));
CalcExposedBlue(&opsin->Plane(1), &opsin->Plane(2));
void Calc(const Image3F* JXL_RESTRICT opsin, const Rect& rect) {
dx = CalcPlane(&opsin->Plane(0), rect);
CalcExposedBlue(&opsin->Plane(1), &opsin->Plane(2), rect);
}
int HowMuchIsXChannelPixelized() {
if (dx >= 0.03) {
@ -609,7 +607,7 @@ struct PixelStatsForChromacityAdjustment {
};
void ComputeChromacityAdjustments(const CompressParams& cparams,
const Image3F& opsin,
const Image3F& opsin, const Rect& rect,
FrameHeader* frame_header) {
if (frame_header->encoding != FrameEncoding::kVarDCT ||
cparams.max_error_mode) {
@ -633,7 +631,7 @@ void ComputeChromacityAdjustments(const CompressParams& cparams,
// the image would be based on the worst case pixel.
PixelStatsForChromacityAdjustment pixel_stats;
if (cparams.speed_tier <= SpeedTier::kWombat) {
pixel_stats.Calc(&opsin);
pixel_stats.Calc(&opsin, rect);
}
// For X take the most severe adjustment.
frame_header->x_qm_scale = std::max<int>(
@ -1041,18 +1039,19 @@ Status ComputeJPEGTranscodingData(const jpeg::JPEGData& jpeg_data,
Status ComputeVarDCTEncodingData(const FrameHeader& frame_header,
const Image3F* linear,
Image3F* JXL_RESTRICT opsin,
Image3F* JXL_RESTRICT opsin, const Rect& rect,
const JxlCmsInterface& cms, ThreadPool* pool,
ModularFrameEncoder* enc_modular,
PassesEncoderState* enc_state,
AuxOut* aux_out) {
JXL_ASSERT((opsin->xsize() % kBlockDim) == 0 &&
(opsin->ysize() % kBlockDim) == 0);
JXL_ASSERT((rect.xsize() % kBlockDim) == 0 &&
(rect.ysize() % kBlockDim) == 0);
JXL_RETURN_IF_ERROR(LossyFrameHeuristics(frame_header, enc_state, enc_modular,
linear, opsin, cms, pool, aux_out));
linear, opsin, rect, cms, pool,
aux_out));
JXL_RETURN_IF_ERROR(InitializePassesEncoder(frame_header, *opsin, cms, pool,
enc_state, enc_modular, aux_out));
JXL_RETURN_IF_ERROR(InitializePassesEncoder(
frame_header, *opsin, rect, cms, pool, enc_state, enc_modular, aux_out));
return true;
}
@ -1364,6 +1363,7 @@ Status ComputeEncodingData(
} else {
shared.frame_dim = frame_header.ToFrameDimensions();
}
shared.image_features.patches.SetPassesSharedState(&shared);
const FrameDimensions& frame_dim = shared.frame_dim;
shared.ac_strategy =
@ -1389,6 +1389,14 @@ Status ComputeEncodingData(
const size_t black_idx = black_eci - metadata->m.extra_channel_info.data();
const ColorEncoding c_enc = metadata->m.color_encoding;
// Make the image patch bigger than the currently processed group in streaming
// mode so that we can take into account border pixels around the group when
// computing inverse Gaborish and adaptive quantization map.
int max_border = enc_state.streaming_mode ? kBlockDim : 0;
Rect frame_rect(0, 0, frame_data.xsize, frame_data.ysize);
Rect patch_rect = Rect(x0, y0, xsize, ysize).Extend(max_border, frame_rect);
JXL_ASSERT(patch_rect.IsInside(frame_rect));
Image3F color;
std::vector<ImageF> extra_channels(num_extra_channels);
for (auto& extra_channel : extra_channels) {
@ -1399,11 +1407,11 @@ Status ComputeEncodingData(
bool has_interleaved_alpha = false;
JxlChunkedFrameInputSource input = frame_data.GetInputSource();
if (!frame_data.IsJPEG()) {
JXL_RETURN_IF_ERROR(CopyColorChannels(input, x0, y0, xsize, ysize,
frame_info, metadata->m, pool, &color,
alpha, &has_interleaved_alpha));
JXL_RETURN_IF_ERROR(CopyColorChannels(input, patch_rect, frame_info,
metadata->m, pool, &color, alpha,
&has_interleaved_alpha));
}
JXL_RETURN_IF_ERROR(CopyExtraChannels(input, x0, y0, xsize, ysize, frame_info,
JXL_RETURN_IF_ERROR(CopyExtraChannels(input, patch_rect, frame_info,
metadata->m, has_interleaved_alpha,
pool, &extra_channels));
@ -1416,13 +1424,14 @@ Status ComputeEncodingData(
Image3F opsin;
if (!jpeg_data) {
// Allocating a large enough image avoids a copy when padding.
opsin = Image3F(RoundUpToBlockDim(xsize), RoundUpToBlockDim(ysize));
opsin.ShrinkTo(xsize, ysize);
opsin = Image3F(RoundUpToBlockDim(color.xsize()),
RoundUpToBlockDim(color.ysize()));
opsin.ShrinkTo(color.xsize(), color.ysize());
if (frame_header.color_transform == ColorTransform::kXYB &&
frame_info.ib_needs_color_transform) {
if (frame_header.encoding == FrameEncoding::kVarDCT &&
cparams.speed_tier <= SpeedTier::kKitten) {
linear_storage = Image3F(xsize, ysize);
linear_storage = Image3F(color.xsize(), color.ysize());
linear = &linear_storage;
}
ToXYB(color, c_enc, metadata->m.IntensityTarget(), black, pool, &opsin,
@ -1447,9 +1456,16 @@ Status ComputeEncodingData(
}
PadImageToBlockMultipleInPlace(&opsin);
}
color = Image3F();
// Rectangle within opsin that corresponds to the currently processed group in
// streaming mode.
Rect opsin_rect(x0 - patch_rect.x0(), y0 - patch_rect.y0(),
RoundUpToBlockDim(xsize), RoundUpToBlockDim(ysize));
if (enc_state.initialize_global_state && !jpeg_data) {
ComputeChromacityAdjustments(cparams, opsin, &mutable_frame_header);
ComputeChromacityAdjustments(cparams, opsin, opsin_rect,
&mutable_frame_header);
}
if (!enc_state.streaming_mode) {
@ -1466,6 +1482,10 @@ Status ComputeEncodingData(
}
}
if (!enc_state.streaming_mode) {
opsin_rect = Rect(opsin);
}
if (frame_header.encoding == FrameEncoding::kVarDCT) {
enc_state.passes.resize(enc_state.progressive_splitter.GetNumPasses());
for (PassesEncoderState::PassData& pass : enc_state.passes) {
@ -1475,9 +1495,9 @@ Status ComputeEncodingData(
JXL_RETURN_IF_ERROR(ComputeJPEGTranscodingData(
*jpeg_data, frame_header, pool, &enc_modular, &enc_state));
} else {
JXL_RETURN_IF_ERROR(
ComputeVarDCTEncodingData(frame_header, linear, &opsin, cms, pool,
&enc_modular, &enc_state, aux_out));
JXL_RETURN_IF_ERROR(ComputeVarDCTEncodingData(
frame_header, linear, &opsin, opsin_rect, cms, pool, &enc_modular,
&enc_state, aux_out));
}
ComputeAllCoeffOrders(enc_state, frame_dim);
if (!enc_state.streaming_mode) {

View File

@ -15,7 +15,8 @@
namespace jxl {
void GaborishInverse(Image3F* in_out, float mul[3], ThreadPool* pool) {
void GaborishInverse(Image3F* in_out, const Rect& rect, float mul[3],
ThreadPool* pool) {
WeightsSymmetric5 weights[3];
// Only an approximation. One or even two 3x3, and rank-1 (separable) 5x5
// are insufficient. The numbers here have been obtained by butteraugli
@ -48,11 +49,9 @@ void GaborishInverse(Image3F* in_out, float mul[3], ThreadPool* pool) {
// image and reuse the existing planes of the in/out image.
ImageF temp(in_out->Plane(2).xsize(), in_out->Plane(2).ysize());
CopyImageTo(in_out->Plane(2), &temp);
Symmetric5(in_out->Plane(0), Rect(*in_out), weights[0], pool,
&in_out->Plane(2));
Symmetric5(in_out->Plane(1), Rect(*in_out), weights[1], pool,
&in_out->Plane(0));
Symmetric5(temp, Rect(*in_out), weights[2], pool, &in_out->Plane(1));
Symmetric5(in_out->Plane(0), rect, weights[0], pool, &in_out->Plane(2), rect);
Symmetric5(in_out->Plane(1), rect, weights[1], pool, &in_out->Plane(0), rect);
Symmetric5(temp, rect, weights[2], pool, &in_out->Plane(1), rect);
// Now planes are 1, 2, 0.
in_out->Plane(0).Swap(in_out->Plane(1));
// 2 1 0

View File

@ -19,7 +19,8 @@ namespace jxl {
// Used in encoder to reduce the impact of the decoder's smoothing.
// This is not exact. Works in-place to reduce memory use.
// The input is typically in XYB space.
void GaborishInverse(Image3F* in_out, float mul[3], ThreadPool* pool);
void GaborishInverse(Image3F* in_out, const Rect& rect, float mul[3],
ThreadPool* pool);
} // namespace jxl

View File

@ -47,7 +47,7 @@ void TestRoundTrip(const Image3F& in, float max_l1) {
w,
w,
};
GaborishInverse(&fwd, weights, null_pool);
GaborishInverse(&fwd, Rect(fwd), weights, null_pool);
JXL_ASSERT_OK(VerifyRelativeError(in, fwd, max_l1, 1E-4f, _));
}

View File

@ -357,15 +357,17 @@ void QuantizeRoundtripYBlockAC(PassesEncoderState* enc_state, const size_t size,
}
void ComputeCoefficients(size_t group_idx, PassesEncoderState* enc_state,
const Image3F& opsin, Image3F* dc) {
const Image3F& opsin, const Rect& rect, Image3F* dc) {
const Rect block_group_rect =
enc_state->shared.frame_dim.BlockGroupRect(group_idx);
const Rect group_rect = enc_state->shared.frame_dim.GroupRect(group_idx);
const Rect cmap_rect(
block_group_rect.x0() / kColorTileDimInBlocks,
block_group_rect.y0() / kColorTileDimInBlocks,
DivCeil(block_group_rect.xsize(), kColorTileDimInBlocks),
DivCeil(block_group_rect.ysize(), kColorTileDimInBlocks));
const Rect group_rect =
enc_state->shared.frame_dim.GroupRect(group_idx).Translate(rect.x0(),
rect.y0());
const size_t xsize_blocks = block_group_rect.xsize();
const size_t ysize_blocks = block_group_rect.ysize();
@ -504,9 +506,9 @@ HWY_AFTER_NAMESPACE();
namespace jxl {
HWY_EXPORT(ComputeCoefficients);
void ComputeCoefficients(size_t group_idx, PassesEncoderState* enc_state,
const Image3F& opsin, Image3F* dc) {
const Image3F& opsin, const Rect& rect, Image3F* dc) {
return HWY_DYNAMIC_DISPATCH(ComputeCoefficients)(group_idx, enc_state, opsin,
dc);
rect, dc);
}
Status EncodeGroupTokenizedCoefficients(size_t group_idx, size_t pass_idx,

View File

@ -20,7 +20,7 @@ struct PassesEncoderState;
// Fills DC
void ComputeCoefficients(size_t group_idx, PassesEncoderState* enc_state,
const Image3F& opsin, Image3F* dc);
const Image3F& opsin, const Rect& rect, Image3F* dc);
Status EncodeGroupTokenizedCoefficients(size_t group_idx, size_t pass_idx,
size_t histogram_idx,

View File

@ -30,8 +30,10 @@ namespace jxl {
struct AuxOut;
void FindBestBlockEntropyModel(PassesEncoderState& enc_state) {
if (enc_state.cparams.decoding_speed_tier >= 1) {
void FindBestBlockEntropyModel(const CompressParams& cparams, const ImageI& rqf,
const AcStrategyImage& ac_strategy,
BlockCtxMap* block_ctx_map) {
if (cparams.decoding_speed_tier >= 1) {
static constexpr uint8_t kSimpleCtxMap[] = {
// Cluster all blocks together
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //
@ -42,20 +44,18 @@ void FindBestBlockEntropyModel(PassesEncoderState& enc_state) {
3 * kNumOrders == sizeof(kSimpleCtxMap) / sizeof *kSimpleCtxMap,
"Update simple context map");
auto bcm = enc_state.shared.block_ctx_map;
auto bcm = *block_ctx_map;
bcm.ctx_map.assign(std::begin(kSimpleCtxMap), std::end(kSimpleCtxMap));
bcm.num_ctxs = 2;
bcm.num_dc_ctxs = 1;
return;
}
if (enc_state.cparams.speed_tier >= SpeedTier::kFalcon) {
if (cparams.speed_tier >= SpeedTier::kFalcon) {
return;
}
const ImageI& rqf = enc_state.shared.raw_quant_field;
// No need to change context modeling for small images.
size_t tot = rqf.xsize() * rqf.ysize();
size_t size_for_ctx_model =
(1 << 10) * enc_state.cparams.butteraugli_distance;
size_t size_for_ctx_model = (1 << 10) * cparams.butteraugli_distance;
if (tot < size_for_ctx_model) return;
struct OccCounters {
@ -79,14 +79,13 @@ void FindBestBlockEntropyModel(PassesEncoderState& enc_state) {
size_t ord_counts[kNumOrders] = {};
};
// The OccCounters struct is too big to allocate on the stack.
std::unique_ptr<OccCounters> counters(
new OccCounters(rqf, enc_state.shared.ac_strategy));
std::unique_ptr<OccCounters> counters(new OccCounters(rqf, ac_strategy));
// Splitting the context model according to the quantization field seems to
// mostly benefit only large images.
size_t size_for_qf_split = (1 << 13) * enc_state.cparams.butteraugli_distance;
size_t size_for_qf_split = (1 << 13) * cparams.butteraugli_distance;
size_t num_qf_segments = tot < size_for_qf_split ? 1 : 2;
std::vector<uint32_t>& qft = enc_state.shared.block_ctx_map.qf_thresholds;
std::vector<uint32_t>& qft = block_ctx_map->qf_thresholds;
qft.clear();
// Divide the quant field in up to num_qf_segments segments.
size_t cumsum = 0;
@ -153,7 +152,7 @@ void FindBestBlockEntropyModel(PassesEncoderState& enc_state) {
remap[i] = remap_remap[remap[i]];
}
// Write the block context map.
auto& ctx_map = enc_state.shared.block_ctx_map.ctx_map;
auto& ctx_map = block_ctx_map->ctx_map;
ctx_map = remap;
ctx_map.resize(remap.size() * 3);
// for chroma, only use up to nb_clusters_chroma separate block contexts
@ -162,14 +161,13 @@ void FindBestBlockEntropyModel(PassesEncoderState& enc_state) {
ctx_map[i] = num + Clamp1((int)remap[i % remap.size()], 0,
(int)nb_clusters_chroma - 1);
}
enc_state.shared.block_ctx_map.num_ctxs =
block_ctx_map->num_ctxs =
*std::max_element(ctx_map.begin(), ctx_map.end()) + 1;
}
namespace {
void FindBestDequantMatrices(const CompressParams& cparams,
const Image3F& opsin,
ModularFrameEncoder* modular_frame_encoder,
DequantMatrices* dequant_matrices) {
// TODO(veluca): quant matrices for no-gaborish.
@ -701,38 +699,47 @@ Status LossyFrameHeuristics(const FrameHeader& frame_header,
PassesEncoderState* enc_state,
ModularFrameEncoder* modular_frame_encoder,
const Image3F* original_pixels, Image3F* opsin,
const JxlCmsInterface& cms, ThreadPool* pool,
AuxOut* aux_out) {
CompressParams& cparams = enc_state->cparams;
const Rect& rect, const JxlCmsInterface& cms,
ThreadPool* pool, AuxOut* aux_out) {
const CompressParams& cparams = enc_state->cparams;
const bool streaming_mode = enc_state->streaming_mode;
const bool initialize_global_state = enc_state->initialize_global_state;
PassesSharedState& shared = enc_state->shared;
const FrameDimensions& frame_dim = shared.frame_dim;
ImageFeatures& image_features = shared.image_features;
DequantMatrices& matrices = shared.matrices;
Quantizer& quantizer = shared.quantizer;
ImageI& raw_quant_field = shared.raw_quant_field;
ColorCorrelationMap& cmap = shared.cmap;
AcStrategyImage& ac_strategy = shared.ac_strategy;
ImageB& epf_sharpness = shared.epf_sharpness;
BlockCtxMap& block_ctx_map = shared.block_ctx_map;
// Find and subtract splines.
if (!enc_state->streaming_mode &&
cparams.speed_tier <= SpeedTier::kSquirrel) {
if (!streaming_mode && cparams.speed_tier <= SpeedTier::kSquirrel) {
if (cparams.custom_splines.HasAny()) {
shared.image_features.splines = cparams.custom_splines;
image_features.splines = cparams.custom_splines;
} else {
shared.image_features.splines = FindSplines(*opsin);
image_features.splines = FindSplines(*opsin);
}
JXL_RETURN_IF_ERROR(shared.image_features.splines.InitializeDrawCache(
opsin->xsize(), opsin->ysize(), shared.cmap));
shared.image_features.splines.SubtractFrom(opsin);
JXL_RETURN_IF_ERROR(image_features.splines.InitializeDrawCache(
opsin->xsize(), opsin->ysize(), cmap));
image_features.splines.SubtractFrom(opsin);
}
// Find and subtract patches/dots.
if (!enc_state->streaming_mode &&
if (!streaming_mode &&
ApplyOverride(cparams.patches,
cparams.speed_tier <= SpeedTier::kSquirrel)) {
FindBestPatchDictionary(*opsin, enc_state, cms, pool, aux_out);
PatchDictionaryEncoder::SubtractFrom(shared.image_features.patches, opsin);
PatchDictionaryEncoder::SubtractFrom(image_features.patches, opsin);
}
static const float kAcQuant = 0.79f;
const float quant_dc = InitialQuantDC(cparams.butteraugli_distance);
Quantizer& quantizer = enc_state->shared.quantizer;
// We don't know the quant field yet, but for computing the global scale
// assuming that it will be the same as for Falcon mode is good enough.
if (enc_state->initialize_global_state) {
if (initialize_global_state) {
quantizer.ComputeGlobalScaleAndQuant(
quant_dc, kAcQuant / cparams.butteraugli_distance, 0);
}
@ -757,34 +764,35 @@ Status LossyFrameHeuristics(const FrameHeader& frame_header,
// output: Gaborished XYB, CfL, ACS, raw quant field, EPF control field.
ArControlFieldHeuristics ar_heuristics;
AcStrategyHeuristics acs_heuristics;
AcStrategyHeuristics acs_heuristics(cparams);
CfLHeuristics cfl_heuristics;
ImageF initial_quant_field;
ImageF initial_quant_masking;
ImageF initial_quant_masking1x1;
// Compute an initial estimate of the quantization field.
// Call InitialQuantField only in Hare mode or slower. Otherwise, rely
// on simple heuristics in FindBestAcStrategy, or set a constant for Falcon
// mode.
if (cparams.speed_tier > SpeedTier::kHare) {
enc_state->initial_quant_field =
ImageF(shared.frame_dim.xsize_blocks, shared.frame_dim.ysize_blocks);
enc_state->initial_quant_masking =
ImageF(shared.frame_dim.xsize_blocks, shared.frame_dim.ysize_blocks);
initial_quant_field =
ImageF(frame_dim.xsize_blocks, frame_dim.ysize_blocks);
initial_quant_masking =
ImageF(frame_dim.xsize_blocks, frame_dim.ysize_blocks);
float q = kAcQuant / cparams.butteraugli_distance;
FillImage(q, &enc_state->initial_quant_field);
FillImage(1.0f / (q + 0.001f), &enc_state->initial_quant_masking);
FillImage(q, &initial_quant_field);
FillImage(1.0f / (q + 0.001f), &initial_quant_masking);
} else {
// Call this here, as it relies on pre-gaborish values.
float butteraugli_distance_for_iqf = cparams.butteraugli_distance;
if (!frame_header.loop_filter.gab) {
butteraugli_distance_for_iqf *= 0.73f;
}
enc_state->initial_quant_field = InitialQuantField(
butteraugli_distance_for_iqf, *opsin, shared.frame_dim, pool, 1.0f,
&enc_state->initial_quant_masking,
&enc_state->initial_quant_masking1x1);
if (enc_state->initialize_global_state) {
quantizer.SetQuantField(quant_dc, enc_state->initial_quant_field,
nullptr);
initial_quant_field = InitialQuantField(
butteraugli_distance_for_iqf, *opsin, rect, pool, 1.0f,
&initial_quant_masking, &initial_quant_masking1x1);
if (initialize_global_state) {
quantizer.SetQuantField(quant_dc, initial_quant_field, nullptr);
}
}
@ -798,71 +806,67 @@ Status LossyFrameHeuristics(const FrameHeader& frame_header,
0.99406123118127299f,
0.99719338015886894f,
};
GaborishInverse(opsin, weight, pool);
GaborishInverse(opsin, rect, weight, pool);
}
if (enc_state->initialize_global_state) {
FindBestDequantMatrices(cparams, *opsin, modular_frame_encoder,
&enc_state->shared.matrices);
if (initialize_global_state) {
FindBestDequantMatrices(cparams, modular_frame_encoder, &matrices);
}
cfl_heuristics.Init(*opsin);
acs_heuristics.Init(*opsin, enc_state);
cfl_heuristics.Init(rect);
acs_heuristics.Init(*opsin, rect, initial_quant_field, initial_quant_masking,
initial_quant_masking1x1, &matrices);
auto process_tile = [&](const uint32_t tid, const size_t thread) {
size_t n_enc_tiles =
DivCeil(enc_state->shared.frame_dim.xsize_blocks, kEncTileDimInBlocks);
size_t n_enc_tiles = DivCeil(frame_dim.xsize_blocks, kEncTileDimInBlocks);
size_t tx = tid % n_enc_tiles;
size_t ty = tid / n_enc_tiles;
size_t by0 = ty * kEncTileDimInBlocks;
size_t by1 = std::min((ty + 1) * kEncTileDimInBlocks,
enc_state->shared.frame_dim.ysize_blocks);
size_t by1 =
std::min((ty + 1) * kEncTileDimInBlocks, frame_dim.ysize_blocks);
size_t bx0 = tx * kEncTileDimInBlocks;
size_t bx1 = std::min((tx + 1) * kEncTileDimInBlocks,
enc_state->shared.frame_dim.xsize_blocks);
size_t bx1 =
std::min((tx + 1) * kEncTileDimInBlocks, frame_dim.xsize_blocks);
Rect r(bx0, by0, bx1 - bx0, by1 - by0);
// For speeds up to Wombat, we only compute the color correlation map
// once we know the transform type and the quantization map.
if (cparams.speed_tier <= SpeedTier::kSquirrel) {
cfl_heuristics.ComputeTile(r, *opsin, enc_state->shared.matrices,
cfl_heuristics.ComputeTile(r, *opsin, rect, matrices,
/*ac_strategy=*/nullptr,
/*raw_quant_field=*/nullptr,
/*quantizer=*/nullptr, /*fast=*/false, thread,
&enc_state->shared.cmap);
&cmap);
}
// Choose block sizes.
acs_heuristics.ProcessRect(r);
acs_heuristics.ProcessRect(r, cmap, &ac_strategy);
// Choose amount of post-processing smoothing.
// TODO(veluca): should this go *after* AdjustQuantField?
ar_heuristics.RunRect(frame_header, r, *opsin, enc_state, thread);
ar_heuristics.RunRect(cparams, frame_header, r, *opsin, rect,
initial_quant_field, ac_strategy, &epf_sharpness,
thread);
// Always set the initial quant field, so we can compute the CfL map with
// more accuracy. The initial quant field might change in slower modes, but
// adjusting the quant field with butteraugli when all the other encoding
// parameters are fixed is likely a more reliable choice anyway.
AdjustQuantField(enc_state->shared.ac_strategy, r,
cparams.butteraugli_distance,
&enc_state->initial_quant_field);
quantizer.SetQuantFieldRect(enc_state->initial_quant_field, r,
&enc_state->shared.raw_quant_field);
AdjustQuantField(ac_strategy, r, cparams.butteraugli_distance,
&initial_quant_field);
quantizer.SetQuantFieldRect(initial_quant_field, r, &raw_quant_field);
// Compute a non-default CfL map if we are at Hare speed, or slower.
if (cparams.speed_tier <= SpeedTier::kHare) {
cfl_heuristics.ComputeTile(
r, *opsin, enc_state->shared.matrices, &enc_state->shared.ac_strategy,
&enc_state->shared.raw_quant_field, &enc_state->shared.quantizer,
/*fast=*/cparams.speed_tier >= SpeedTier::kWombat, thread,
&enc_state->shared.cmap);
r, *opsin, rect, matrices, &ac_strategy, &raw_quant_field, &quantizer,
/*fast=*/cparams.speed_tier >= SpeedTier::kWombat, thread, &cmap);
}
};
JXL_RETURN_IF_ERROR(RunOnPool(
pool, 0,
DivCeil(enc_state->shared.frame_dim.xsize_blocks, kEncTileDimInBlocks) *
DivCeil(enc_state->shared.frame_dim.ysize_blocks,
kEncTileDimInBlocks),
DivCeil(frame_dim.xsize_blocks, kEncTileDimInBlocks) *
DivCeil(frame_dim.ysize_blocks, kEncTileDimInBlocks),
[&](const size_t num_threads) {
ar_heuristics.PrepareForThreads(num_threads);
cfl_heuristics.PrepareForThreads(num_threads);
@ -870,23 +874,22 @@ Status LossyFrameHeuristics(const FrameHeader& frame_header,
},
process_tile, "Enc Heuristics"));
acs_heuristics.Finalize(aux_out);
if (cparams.speed_tier <= SpeedTier::kHare &&
enc_state->initialize_global_state) {
acs_heuristics.Finalize(frame_dim, ac_strategy, aux_out);
if (cparams.speed_tier <= SpeedTier::kHare && initialize_global_state) {
cfl_heuristics.ComputeDC(/*fast=*/cparams.speed_tier >= SpeedTier::kWombat,
&enc_state->shared.cmap);
&cmap);
}
// Refine quantization levels.
if (!enc_state->streaming_mode) {
FindBestQuantizer(frame_header, original_pixels, *opsin, enc_state, cms,
pool, aux_out);
if (!streaming_mode) {
FindBestQuantizer(frame_header, original_pixels, *opsin,
initial_quant_field, enc_state, cms, pool, aux_out);
}
// Choose a context model that depends on the amount of quantization for AC.
if (cparams.speed_tier < SpeedTier::kFalcon &&
enc_state->initialize_global_state) {
FindBestBlockEntropyModel(*enc_state);
if (cparams.speed_tier < SpeedTier::kFalcon && initialize_global_state) {
FindBestBlockEntropyModel(cparams, raw_quant_field, ac_strategy,
&block_ctx_map);
}
return true;
}

View File

@ -37,8 +37,8 @@ Status LossyFrameHeuristics(const FrameHeader& frame_header,
PassesEncoderState* enc_state,
ModularFrameEncoder* modular_frame_encoder,
const Image3F* original_pixels, Image3F* opsin,
const JxlCmsInterface& cms, ThreadPool* pool,
AuxOut* aux_out);
const Rect& rect, const JxlCmsInterface& cms,
ThreadPool* pool, AuxOut* aux_out);
void FindBestBlockEntropyModel(PassesEncoderState& enc_state);

View File

@ -490,7 +490,7 @@ Status ModularFrameEncoder::ComputeEncodingData(
if (do_color && frame_header.loop_filter.gab) {
float w = 0.9908511000000001f;
float weights[3] = {w, w, w};
GaborishInverse(color, weights, pool);
GaborishInverse(color, Rect(*color), weights, pool);
}
if (do_color && metadata.bit_depth.bits_per_sample <= 16 &&

View File

@ -17,6 +17,7 @@
#include "lib/jxl/base/byte_order.h"
#include "lib/jxl/base/common.h"
#include "lib/jxl/base/data_parallel.h"
#include "lib/jxl/base/exif.h"
#include "lib/jxl/base/printf_macros.h"
#include "lib/jxl/base/span.h"
#include "lib/jxl/base/status.h"
@ -31,7 +32,6 @@
#include "lib/jxl/enc_icc_codec.h"
#include "lib/jxl/enc_params.h"
#include "lib/jxl/encode_internal.h"
#include "lib/jxl/exif.h"
#include "lib/jxl/jpeg/enc_jpeg_data.h"
#include "lib/jxl/luminance.h"
#include "lib/jxl/memory_manager_internal.h"
@ -2252,7 +2252,7 @@ JxlEncoderStatus JxlEncoderAddImageFrameInternal(
auto frame_state = JxlFastLosslessPrepareFrame(
frame_data.GetInputSource(), xsize, ysize, num_channels,
frame_settings->enc->metadata.m.bit_depth.bits_per_sample, big_endian,
/*effort=*/2);
/*effort=*/2, /*oneshot=*/!frame_data.StreamingInput());
if (!streaming) {
JxlFastLosslessProcessFrame(frame_state, /*is_last=*/false,
frame_settings->enc->thread_pool.get(),

View File

@ -267,6 +267,8 @@ class JxlEncoderChunkedFrameAdapter {
return true;
}
bool StreamingInput() const { return has_input_source_; }
const size_t xsize;
const size_t ysize;

View File

@ -2009,3 +2009,64 @@ TEST_P(EncoderStreamingTest, ChunkedAndOutputCallback) {
JXL_GTEST_INSTANTIATE_TEST_SUITE_P(
EncoderStreamingTest, EncoderStreamingTest,
testing::ValuesIn(StreamingTestParam::All()));
TEST(EncoderTest, CMYK) {
size_t xsize = 257;
size_t ysize = 259;
jxl::test::TestImage image;
image.SetDimensions(xsize, ysize)
.SetDataType(JXL_TYPE_UINT8)
.SetChannels(3)
.SetAllBitDepths(8);
image.AddFrame().RandomFill();
jxl::test::TestImage ec_image;
ec_image.SetDataType(JXL_TYPE_UINT8)
.SetDimensions(xsize, ysize)
.SetChannels(1)
.SetAllBitDepths(8);
ec_image.AddFrame().RandomFill();
const auto& frame = image.ppf().frames[0].color;
const auto& ec_frame = ec_image.ppf().frames[0].color;
JxlBasicInfo basic_info = image.ppf().info;
basic_info.xsize = xsize;
basic_info.ysize = ysize;
basic_info.num_extra_channels = 1;
basic_info.uses_original_profile = JXL_TRUE;
std::vector<uint8_t> compressed = std::vector<uint8_t>(64);
JxlEncoderPtr enc_ptr = JxlEncoderMake(nullptr);
JxlEncoderStruct* enc = enc_ptr.get();
ASSERT_NE(nullptr, enc);
JxlEncoderFrameSettings* frame_settings =
JxlEncoderFrameSettingsCreate(enc, NULL);
EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetBasicInfo(enc, &basic_info));
JxlExtraChannelInfo channel_info;
JxlExtraChannelType channel_type = JXL_CHANNEL_BLACK;
JxlEncoderInitExtraChannelInfo(channel_type, &channel_info);
EXPECT_EQ(JXL_ENC_SUCCESS,
JxlEncoderSetExtraChannelInfo(enc, 0, &channel_info));
const std::vector<uint8_t> icc = jxl::test::ReadTestData(
"external/Compact-ICC-Profiles/profiles/"
"CGATS001Compat-v2-micro.icc");
EXPECT_EQ(JXL_ENC_SUCCESS,
JxlEncoderSetICCProfile(enc, icc.data(), icc.size()));
EXPECT_EQ(JXL_ENC_SUCCESS,
JxlEncoderAddImageFrame(frame_settings, &frame.format,
frame.pixels(), frame.pixels_size));
EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetExtraChannelBuffer(
frame_settings, &ec_frame.format,
ec_frame.pixels(), ec_frame.pixels_size, 0));
JxlEncoderCloseInput(frame_settings->enc);
uint8_t* next_out = compressed.data();
size_t avail_out = compressed.size();
ProcessEncoder(enc, compressed, next_out, avail_out);
jxl::extras::JXLDecompressParams dparams;
dparams.accepted_formats = {
{3, JXL_TYPE_UINT8, JXL_LITTLE_ENDIAN, 0},
};
jxl::extras::PackedPixelFile ppf;
EXPECT_TRUE(DecodeImageJXL(compressed.data(), compressed.size(), dparams,
nullptr, &ppf, nullptr));
}

View File

@ -5,10 +5,10 @@
#include "lib/jxl/fields.h"
#include <stddef.h>
#include <algorithm>
#include <cinttypes>
#include <cmath>
#include <cstddef>
#include <hwy/base.h>
#include "lib/jxl/base/bits.h"

View File

@ -8,15 +8,12 @@
// Forward/backward-compatible 'bundles' with auto-serialized 'fields'.
#include <inttypes.h>
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <cinttypes>
#include <cmath> // abs
#include <cstdarg>
#include <cstddef>
#include <cstdint>
#include <cstdlib>
#include <cstring>
#include "lib/jxl/base/bits.h"
#include "lib/jxl/base/compiler_specific.h"

View File

@ -81,8 +81,8 @@ void AppendKeyword(const Tag& keyword, PaddedBytes* data) {
}
// Checks if a + b > size, taking possible integer overflow into account.
Status CheckOutOfBounds(size_t a, size_t b, size_t size) {
size_t pos = a + b;
Status CheckOutOfBounds(uint64_t a, uint64_t b, uint64_t size) {
uint64_t pos = a + b;
if (pos > size) return JXL_FAILURE("Out of bounds");
if (pos < a) return JXL_FAILURE("Out of bounds"); // overflow happened
return true;

View File

@ -92,7 +92,7 @@ void EncodeKeyword(const Tag& keyword, uint8_t* data, size_t size, size_t pos);
void AppendKeyword(const Tag& keyword, PaddedBytes* data);
// Checks if a + b > size, taking possible integer overflow into account.
Status CheckOutOfBounds(size_t a, size_t b, size_t size);
Status CheckOutOfBounds(uint64_t a, uint64_t b, uint64_t size);
Status CheckIs32Bit(uint64_t v);
const Span<const uint8_t> ICCInitialHeaderPrediction();

View File

@ -8,7 +8,11 @@
// SIMD/multicore-friendly planar image representation with row accessors.
#if defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER) || \
defined(THREAD_SANITIZER)
#include <inttypes.h>
#endif
#include <stddef.h>
#include <stdint.h>
#include <string.h>
@ -324,6 +328,14 @@ class RectT {
return CeilShiftRight(shift, shift);
}
RectT<T> Extend(T border, RectT<T> parent) {
T new_x0 = x0() > parent.x0() + border ? x0() - border : parent.x0();
T new_y0 = y0() > parent.y0() + border ? y0() - border : parent.y0();
T new_x1 = x1() + border > parent.x1() ? parent.x1() : x1() + border;
T new_y1 = y1() + border > parent.y1() ? parent.y1() : y1() + border;
return RectT<T>(new_x0, new_y0, new_x1 - new_x0, new_y1 - new_y0);
}
template <typename U>
RectT<U> As() const {
return RectT<U>(U(x0_), U(y0_), U(xsize_), U(ysize_));

View File

@ -5,8 +5,9 @@
#include "lib/jxl/modular/encoding/enc_debug_tree.h"
#include <stdint.h>
#include <stdlib.h>
#include <cinttypes>
#include <cstdint>
#include <cstdlib>
#include "lib/jxl/base/os_macros.h"
#include "lib/jxl/base/printf_macros.h"

View File

@ -5,6 +5,8 @@
#include "lib/jxl/modular/transform/transform.h"
#include <cinttypes>
#include "lib/jxl/base/printf_macros.h"
#include "lib/jxl/fields.h"
#include "lib/jxl/modular/modular_image.h"

View File

@ -5,6 +5,8 @@
#include "lib/jxl/render_pipeline/stage_write.h"
#include <type_traits>
#include "lib/jxl/alpha.h"
#include "lib/jxl/base/common.h"
#include "lib/jxl/dec_cache.h"
@ -21,6 +23,7 @@ namespace jxl {
namespace HWY_NAMESPACE {
// These templates are not found via ADL.
using hwy::HWY_NAMESPACE::Add;
using hwy::HWY_NAMESPACE::Clamp;
using hwy::HWY_NAMESPACE::Div;
using hwy::HWY_NAMESPACE::Max;
@ -30,6 +33,78 @@ using hwy::HWY_NAMESPACE::Or;
using hwy::HWY_NAMESPACE::Rebind;
using hwy::HWY_NAMESPACE::ShiftLeftSame;
using hwy::HWY_NAMESPACE::ShiftRightSame;
using hwy::HWY_NAMESPACE::VFromD;
// 8x8 ordered dithering pattern from
// https://en.wikipedia.org/wiki/Ordered_dithering
// scaled to have an average of 0 and be fully contained in (-0.5, 0.5).
// Matrix is duplicated in width to avoid inconsistencies or out-of-bound-reads
// if doing unaligned operations.
const float kDither[(2 * 8) * 8] = {
-0.4921875, 0.0078125, -0.3671875, 0.1328125, //
-0.4609375, 0.0390625, -0.3359375, 0.1640625, //
-0.4921875, 0.0078125, -0.3671875, 0.1328125, //
-0.4609375, 0.0390625, -0.3359375, 0.1640625, //
//
0.2578125, -0.2421875, 0.3828125, -0.1171875, //
0.2890625, -0.2109375, 0.4140625, -0.0859375, //
0.2578125, -0.2421875, 0.3828125, -0.1171875, //
0.2890625, -0.2109375, 0.4140625, -0.0859375, //
//
-0.3046875, 0.1953125, -0.4296875, 0.0703125, //
-0.2734375, 0.2265625, -0.3984375, 0.1015625, //
-0.3046875, 0.1953125, -0.4296875, 0.0703125, //
-0.2734375, 0.2265625, -0.3984375, 0.1015625, //
//
0.4453125, -0.0546875, 0.3203125, -0.1796875, //
0.4765625, -0.0234375, 0.3515625, -0.1484375, //
0.4453125, -0.0546875, 0.3203125, -0.1796875, //
0.4765625, -0.0234375, 0.3515625, -0.1484375, //
//
-0.4453125, 0.0546875, -0.3203125, 0.1796875, //
-0.4765625, 0.0234375, -0.3515625, 0.1484375, //
-0.4453125, 0.0546875, -0.3203125, 0.1796875, //
-0.4765625, 0.0234375, -0.3515625, 0.1484375, //
//
0.3046875, -0.1953125, 0.4296875, -0.0703125, //
0.2734375, -0.2265625, 0.3984375, -0.1015625, //
0.3046875, -0.1953125, 0.4296875, -0.0703125, //
0.2734375, -0.2265625, 0.3984375, -0.1015625, //
//
-0.2578125, 0.2421875, -0.3828125, 0.1171875, //
-0.2890625, 0.2109375, -0.4140625, 0.0859375, //
-0.2578125, 0.2421875, -0.3828125, 0.1171875, //
-0.2890625, 0.2109375, -0.4140625, 0.0859375, //
//
0.4921875, -0.0078125, 0.3671875, -0.1328125, //
0.4609375, -0.0390625, 0.3359375, -0.1640625, //
0.4921875, -0.0078125, 0.3671875, -0.1328125, //
0.4609375, -0.0390625, 0.3359375, -0.1640625, //
};
using DF = HWY_FULL(float);
// Converts `v` to an appropriate value for the given unsigned type.
// If the unsigned type is an 8-bit type, performs ordered dithering.
template <typename T>
VFromD<Rebind<T, DF>> MakeUnsigned(VFromD<DF> v, size_t x0, size_t y0,
VFromD<DF> mul) {
static_assert(std::is_unsigned<T>::value, "T must be an unsigned type");
using DU = Rebind<T, DF>;
v = Mul(v, mul);
// TODO(veluca): if constexpr with C++17
if (sizeof(T) == 1) {
size_t pos = (y0 % 8) * (2 * 8) + (x0 % 8);
#if HWY_TARGET != HWY_SCALAR
auto dither = LoadDup128(DF(), kDither + pos);
#else
auto dither = LoadU(DF(), kDither + pos);
#endif
v = Add(v, dither);
}
v = Clamp(Zero(DF()), v, mul);
return DemoteTo(DU(), NearestInt(v));
}
class WriteToOutputStage : public RenderPipelineStage {
public:
@ -229,14 +304,14 @@ class WriteToOutputStage : public RenderPipelineStage {
if (out.data_type_ == JXL_TYPE_UINT8) {
uint8_t* JXL_RESTRICT temp =
reinterpret_cast<uint8_t*>(temp_out_[thread_id].get());
StoreUnsignedRow(out, input, len, temp);
StoreUnsignedRow(out, input, len, temp, xstart, ypos);
WriteToOutput(out, thread_id, ypos, xstart, len, temp);
} else if (out.data_type_ == JXL_TYPE_UINT16 ||
out.data_type_ == JXL_TYPE_FLOAT16) {
uint16_t* JXL_RESTRICT temp =
reinterpret_cast<uint16_t*>(temp_out_[thread_id].get());
if (out.data_type_ == JXL_TYPE_UINT16) {
StoreUnsignedRow(out, input, len, temp);
StoreUnsignedRow(out, input, len, temp, xstart, ypos);
} else {
StoreFloat16Row(out, input, len, temp);
}
@ -289,10 +364,8 @@ class WriteToOutputStage : public RenderPipelineStage {
template <typename T>
void StoreUnsignedRow(const Output& out, const float* input[4], size_t len,
T* output) const {
T* output, size_t xstart, size_t ypos) const {
const HWY_FULL(float) d;
auto zero = Zero(d);
auto one = Set(d, 1.0f);
auto mul = Set(d, (1u << (out.bits_per_sample_)) - 1);
const Rebind<T, decltype(d)> du;
const size_t padding = RoundUpTo(len, Lanes(d)) - len;
@ -301,35 +374,32 @@ class WriteToOutputStage : public RenderPipelineStage {
}
if (out.num_channels_ == 1) {
for (size_t i = 0; i < len; i += Lanes(d)) {
auto v0 = Mul(Clamp(zero, LoadU(d, &input[0][i]), one), mul);
StoreU(DemoteTo(du, NearestInt(v0)), du, &output[i]);
StoreU(MakeUnsigned<T>(LoadU(d, &input[0][i]), xstart + i, ypos, mul),
du, &output[i]);
}
} else if (out.num_channels_ == 2) {
for (size_t i = 0; i < len; i += Lanes(d)) {
auto v0 = Mul(Clamp(zero, LoadU(d, &input[0][i]), one), mul);
auto v1 = Mul(Clamp(zero, LoadU(d, &input[1][i]), one), mul);
StoreInterleaved2(DemoteTo(du, NearestInt(v0)),
DemoteTo(du, NearestInt(v1)), du, &output[2 * i]);
StoreInterleaved2(
MakeUnsigned<T>(LoadU(d, &input[0][i]), xstart + i, ypos, mul),
MakeUnsigned<T>(LoadU(d, &input[1][i]), xstart + i, ypos, mul), du,
&output[2 * i]);
}
} else if (out.num_channels_ == 3) {
for (size_t i = 0; i < len; i += Lanes(d)) {
auto v0 = Mul(Clamp(zero, LoadU(d, &input[0][i]), one), mul);
auto v1 = Mul(Clamp(zero, LoadU(d, &input[1][i]), one), mul);
auto v2 = Mul(Clamp(zero, LoadU(d, &input[2][i]), one), mul);
StoreInterleaved3(DemoteTo(du, NearestInt(v0)),
DemoteTo(du, NearestInt(v1)),
DemoteTo(du, NearestInt(v2)), du, &output[3 * i]);
StoreInterleaved3(
MakeUnsigned<T>(LoadU(d, &input[0][i]), xstart + i, ypos, mul),
MakeUnsigned<T>(LoadU(d, &input[1][i]), xstart + i, ypos, mul),
MakeUnsigned<T>(LoadU(d, &input[2][i]), xstart + i, ypos, mul), du,
&output[3 * i]);
}
} else if (out.num_channels_ == 4) {
for (size_t i = 0; i < len; i += Lanes(d)) {
auto v0 = Mul(Clamp(zero, LoadU(d, &input[0][i]), one), mul);
auto v1 = Mul(Clamp(zero, LoadU(d, &input[1][i]), one), mul);
auto v2 = Mul(Clamp(zero, LoadU(d, &input[2][i]), one), mul);
auto v3 = Mul(Clamp(zero, LoadU(d, &input[3][i]), one), mul);
StoreInterleaved4(DemoteTo(du, NearestInt(v0)),
DemoteTo(du, NearestInt(v1)),
DemoteTo(du, NearestInt(v2)),
DemoteTo(du, NearestInt(v3)), du, &output[4 * i]);
StoreInterleaved4(
MakeUnsigned<T>(LoadU(d, &input[0][i]), xstart + i, ypos, mul),
MakeUnsigned<T>(LoadU(d, &input[1][i]), xstart + i, ypos, mul),
MakeUnsigned<T>(LoadU(d, &input[2][i]), xstart + i, ypos, mul),
MakeUnsigned<T>(LoadU(d, &input[3][i]), xstart + i, ypos, mul), du,
&output[4 * i]);
}
}
msan::PoisonMemory(output + out.num_channels_ * len,

View File

@ -6,7 +6,6 @@
#ifndef LIB_JXL_SANITIZERS_H_
#define LIB_JXL_SANITIZERS_H_
#include <inttypes.h>
#include <stddef.h>
#include "lib/jxl/base/compiler_specific.h"
@ -14,6 +13,7 @@
#include "lib/jxl/image.h"
#if JXL_MEMORY_SANITIZER
#include <inttypes.h>
#include <stdio.h>
#include <algorithm>

View File

@ -6,6 +6,7 @@
#include "lib/jxl/splines.h"
#include <algorithm>
#include <cinttypes>
#include <cmath>
#include <limits>

View File

@ -7,6 +7,7 @@
#include <jxl/cms.h>
#include <jxl/cms_interface.h>
#include <jxl/types.h>
#include <cstddef>
#include <fstream>
@ -487,6 +488,10 @@ size_t ComparePixels(const uint8_t* a, const uint8_t* b, size_t xsize,
// TODO(lode): Set the required precision back to 11 bits when possible.
precision = 0.5 * threshold_multiplier / ((1ull << (bits - 1)) - 1ull);
}
if (format_b.data_type == JXL_TYPE_UINT8) {
// Increase the threshold by the maximum difference introduced by dithering.
precision += 63.0 / 128.0;
}
size_t numdiff = 0;
for (size_t y = 0; y < ysize; y++) {
for (size_t x = 0; x < xsize; x++) {

View File

@ -14,6 +14,7 @@ libjxl_base_sources = [
"jxl/base/common.h",
"jxl/base/compiler_specific.h",
"jxl/base/data_parallel.h",
"jxl/base/exif.h",
"jxl/base/fast_math-inl.h",
"jxl/base/float.h",
"jxl/base/iaca.h",
@ -185,7 +186,6 @@ libjxl_dec_sources = [
"jxl/entropy_coder.h",
"jxl/epf.cc",
"jxl/epf.h",
"jxl/exif.h",
"jxl/fast_dct-inl.h",
"jxl/fast_dct.cc",
"jxl/fast_dct.h",

View File

@ -14,6 +14,7 @@ set(JPEGXL_INTERNAL_BASE_SOURCES
jxl/base/common.h
jxl/base/compiler_specific.h
jxl/base/data_parallel.h
jxl/base/exif.h
jxl/base/fast_math-inl.h
jxl/base/float.h
jxl/base/iaca.h
@ -185,7 +186,6 @@ set(JPEGXL_INTERNAL_DEC_SOURCES
jxl/entropy_coder.h
jxl/epf.cc
jxl/epf.h
jxl/exif.h
jxl/fast_dct-inl.h
jxl/fast_dct.cc
jxl/fast_dct.h