Bug 1568138 - Reverse order of Huffman keys;r=arai

In Context 0.1, Huffman keys are represented with a bit order that is the reverse of the bitstream. This
patch ensures that Huffman tables reverse the bit order of their keys upon initialization, so as to allow
fast comparison later in the code.

Differential Revision: https://phabricator.services.mozilla.com/D38979

--HG--
extra : moz-landing-system : lando
This commit is contained in:
David Teller 2019-07-26 14:38:56 +00:00
parent 8aec1c40bf
commit 21161933e5
2 changed files with 110 additions and 50 deletions

View File

@ -44,9 +44,9 @@ const uint8_t MAX_CODE_BIT_LENGTH = 20;
const uint8_t MAX_PREFIX_BIT_LENGTH = 32;
// The length of the bit buffer, in bits.
const uint8_t BIT_BUFFER_LENGTH = 64;
const uint8_t BIT_BUFFER_SIZE = 64;
// Number of bits into the BIT_BUFFER_LENGTH read at each step.
// Number of bits into the `bitBuffer` read at each step.
const uint8_t BIT_BUFFER_READ_UNIT = 8;
// Hardcoded limits to avoid allocating too eagerly.
@ -1018,15 +1018,16 @@ JS::Result<Ok> BinASTTokenReaderContext::readHuffmanPrelude() {
return reader.run(HUFFMAN_STACK_INITIAL_CAPACITY);
}
BinASTTokenReaderContext::BitBuffer::BitBuffer() : bits(0), length(0) {
static_assert(sizeof(bits) * 8 == BIT_BUFFER_LENGTH,
"Expecting bitBuffer to match BIT_BUFFER_LENGTH");
BinASTTokenReaderContext::BitBuffer::BitBuffer() : bits(0), bitLength(0) {
static_assert(sizeof(bits) * 8 == BIT_BUFFER_SIZE,
"Expecting bitBuffer to match BIT_BUFFER_SIZE");
}
template <Compression C>
HuffmanLookup BinASTTokenReaderContext::BitBuffer::getHuffmanLookup() {
// Only keep the leading 32 bits.
const uint8_t bitLength = std::min<uint8_t>(length, MAX_PREFIX_BIT_LENGTH);
const uint8_t bitLength =
std::min<uint8_t>(this->bitLength, MAX_PREFIX_BIT_LENGTH);
const uint32_t bitsPrefix = bits & (uint64_t)0x00000000FFFFFFFF;
return HuffmanLookup(bitsPrefix, bitLength);
}
@ -1037,33 +1038,59 @@ BinASTTokenReaderContext::BitBuffer::advanceBitBuffer(
BinASTTokenReaderContext& owner, const uint8_t bitLength) {
// It should be impossible to call `advanceBitBuffer`
// with more bits than what we just handed out.
MOZ_ASSERT(bitLength <= this->length);
this->length -= bitLength;
// We're reading from the leading bits on. Since we have just read
// `bitLength` bits, we now need to shift everything else into
// position.
this->bits <<= bitLength;
if (length <= MAX_PREFIX_BIT_LENGTH) {
MOZ_ASSERT(bitLength <= this->bitLength);
// The algorithm is not intuitive, so consider an example, where the byte
// stream starts with `0b_HGFE_DCBA`, `0b_PONM_LKJI`, `0b_XWVU_TRSQ` (to keep
// things concise, in the example, we won't use the entire 64 bits).
//
// In each byte, bits are stored in the reverse order, so what we want
// is `0b_ABCD_EFGH`, `0b_IJML_MNOP`, `0b_QRST_UVWX`.
// For the example, let's assume that we have already read
// `0b_ABCD_EFGH`, `0b_IJKL_MNOP` into `bits`, so before the call to
// `advanceBitBuffer`, `bits` initially contains
// `0b_XXXX_XXXX__XXXX_XXXX__ABCD_EFGH__IJKL_MNOP`, where `X` are bits that
// are beyond `this->bitLength`
// 1. We have consumed a few bits from the bit buffer, say `ABC`.
// `bits` is now `0b_XXXX_XXXX__XXXX_XXXX__XXXD_EFGH__IJKL_MNOP`.
this->bitLength -= bitLength;
if (this->bitLength <= MAX_PREFIX_BIT_LENGTH) {
// Keys can be up to MAX_PREFIX_BIT_LENGTH bits long. If we have fewer bits
// available, it's time to reload. We'll try and get as close to 64 bits as
// possible.
while (length <= BIT_BUFFER_LENGTH - BIT_BUFFER_READ_UNIT) {
while (this->bitLength <= BIT_BUFFER_SIZE - BIT_BUFFER_READ_UNIT) {
// Let's try and pull one byte.
uint8_t byte;
uint32_t byteLen = 1;
MOZ_TRY((owner.readBuf<C, EndOfFilePolicy::BestEffort>(&byte, byteLen)));
if (byteLen < 1) {
uint32_t readLen = 1;
MOZ_TRY((owner.readBuf<C, EndOfFilePolicy::BestEffort>(&byte, readLen)));
if (readLen < 1) {
// Ok, nothing left to read.
break;
}
// We have just read one byte.
// Append it to `bits`.
MOZ_ASSERT(bits <= 0x00FFFFFFFFFFFFFF);
// 2. We have just read to `byte`, here `0b_XWVU_TSRQ`. Let's reverse
// `byte` into `0b_QRST_UVWX`.
const uint8_t reversedByte =
(byte & 0b10000000) >> 7 | (byte & 0b01000000) >> 5 |
(byte & 0b00100000) >> 3 | (byte & 0b00010000) >> 1 |
(byte & 0b00001000) << 1 | (byte & 0b00000100) << 3 |
(byte & 0b00000010) << 5 | (byte & 0b00000001) << 7;
// 3. Make space for these bits at the end of the stream
// so shift `bits` into
// `0b_XXXX_XXXX__XXXD_EFGH__IJKL_MNOP__0000_0000`.
this->bits <<= BIT_BUFFER_READ_UNIT;
this->bits += byte;
this->length += BIT_BUFFER_READ_UNIT;
MOZ_ASSERT(bits >> this->length == 0);
// 4. Finally, combine into.
// `0b_XXXX_XXXX__XXXD_EFGH__IJKL_MNOP__QRST_UVWX`.
this->bits += reversedByte;
this->bitLength += BIT_BUFFER_READ_UNIT;
MOZ_ASSERT(bits >> this->bitLength == 0);
// 4. Continue as long as we don't have enough bits.
}
}
@ -1314,9 +1341,6 @@ HuffmanEntry<const T*> HuffmanTableImpl<T, N>::lookup(HuffmanLookup key) const {
const uint32_t keyBits = key.leadingBits(iter.key.bitLength);
if (keyBits == iter.key.bits) {
// FIXME: keyBits are actually stored in the reverse order from the
// stream.
// FIXME: We need to reverse either the one or the other.
// Entry found.
return HuffmanEntry<const T*>(iter.key.bits, iter.key.bitLength,
&iter.value);

View File

@ -62,50 +62,71 @@ struct NormalizedInterfaceAndField {
// the returned `HuffmanKey` and consume as many bits from the bit stream.
struct HuffmanLookup {
HuffmanLookup(uint32_t bits, uint8_t bitLength)
: bits(bits), bitLength(bitLength) {
// We zero out the highest `32 - bitLength` bits.
: bits(bits & (uint32_t(0xFFFFFFFF) >> (32 - bitLength))),
bitLength(bitLength) {
MOZ_ASSERT(bitLength <= 32);
MOZ_ASSERT(bits >> bitLength == 0);
}
// Return the `bitLength` leading bits of this superset.
// This only makes sense if `bitLength <= this.bitLength`.
// Return the `bitLength` leading bits of this superset, in the order
// expected to compare to a `HuffmanKey`. The order of bits and bytes
// is ensured by `BitBuffer`.
//
// Note: This only makes sense if `bitLength <= this.bitLength`.
//
// So, for instance, if `leadingBits(4)` returns
// `0b_0000_0000__0000_0000__0000_0000__0000_0100`, this is
// equal to Huffman Key `0100`.
uint32_t leadingBits(const uint8_t bitLength) const;
// The buffer holding the bits.
// FIXME: Document bit order.
// The buffer holding the bits. At this stage, bits are stored
// in the same order as `HuffmanKey`. See the implementation of
// `BitBuffer` methods for more details about how this order
// is implemented.
//
// If `bitLength < 32`, the unused highest bits are guaranteed
// to be 0.
uint32_t bits;
// The actual length of buffer `bits`:
// - if `bitLength == 0`, use 0 bits of `bits`, this entire value is `0`;
// - if `bitLength == 1`, only use the last bit of `bits`;
// - ...
// - if `bitLength == 32`, use the entire value of `bits`;
// - other values of `bitLength` are invalid.
// The actual length of buffer `bits`.
//
// Invariant: the first `32 - bitLength` bits are always 0.
// MUST be within `[0, 32]`.
//
// If `bitLength < 32`, it means that some of the highest bits are unused.
uint8_t bitLength;
};
// A Huffman Key.
struct HuffmanKey {
HuffmanKey(uint32_t bits, uint8_t bitLength)
// Construct the HuffmanKey.
//
// `bits` and `bitLength` define a buffer containing the standard Huffman
// code for this key.
//
// For instance, if the Huffman code is `0100`,
// - `bits = 0b0000_0000__0000_0000__0000_0000__0000_0100`;
// - `bitLength = 4`.
HuffmanKey(const uint32_t bits, const uint8_t bitLength)
: bits(bits), bitLength(bitLength) {
MOZ_ASSERT(bitLength <= 32);
MOZ_ASSERT(bits >> bitLength == 0);
}
// The buffer holding the bits.
// FIXME: Document bit order.
//
// For a Huffman code of `0100`
// - `bits = 0b0000_0000__0000_0000__0000_0000__0000_0100`;
//
// If `bitLength < 32`, the unused highest bits are guaranteed
// to be 0.
uint32_t bits;
// The actual length of buffer `bits`:
// - if `bitLength == 0`, use 0 bits of `bits`, this entire value is `0`;
// - if `bitLength == 1`, only use the last bit of `bits`;
// - ...
// - if `bitLength == 32`, use the entire value of `bits`;
// - other values of `bitLength` are invalid.
// The actual length of buffer `bits`.
//
// Invariant: the first `32 - bitLength` bits are always 0.
// MUST be within `[0, 32]`.
//
// If `bitLength < 32`, it means that some of the highest bits are unused.
uint8_t bitLength;
};
@ -397,15 +418,30 @@ class MOZ_STACK_CLASS BinASTTokenReaderContext : public BinASTTokenReaderBase {
private:
// The contents of the buffer.
//
// - Bytes are added in the same order as the bytestream.
// - Individual bits within bytes are mirrored.
//
// In other words, if the byte stream starts with
// `0b_HGFE_DCBA`, `0b_PONM_LKJI`, `0b_0000_0000`,
// .... `0b_0000_0000`, `bits` will hold
// `0b_0000_...0000__ABCD_EFGH__IJKL_MNOP`.
//
// Note: By opposition to `HuffmanKey` or `HuffmanLookup`,
// the highest bits are NOT guaranteed to be `0`.
uint64_t bits;
// The number of elements in `bits`.
//
// Until we start lookup up into Huffman tables, `length == 0`.
// Until we start lookup up into Huffman tables, `bitLength == 0`.
// Once we do, we refill the buffer before any lookup, i.e.
// `length == 32` until we reach the last few bytes of the stream,
// `MAX_PREFIX_BIT_LENGTH = 32 <= bitLength <= BIT_BUFFER_SIZE = 64`
// until we reach the last few bytes of the stream,
// in which case `length` decreases monotonically to 0.
uint64_t length;
//
// If `bitLength < BIT_BUFFER_SIZE = 64`, some of the highest
// bits of `bits` are unused.
uint8_t bitLength;
} bitBuffer;
// The number of already decoded bytes.