Use the return of readBytes to find out if we are at the end of the stream.

This allows the removal of isObjectEnd and opens the way for reading 64 bits
at a time.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@221804 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Rafael Espindola 2014-11-12 18:37:00 +00:00
parent bc8114f733
commit ea3c2111f4
4 changed files with 53 additions and 62 deletions

View File

@ -169,6 +169,9 @@ class BitstreamCursor {
BitstreamReader *BitStream; BitstreamReader *BitStream;
size_t NextChar; size_t NextChar;
// The size of the bicode. 0 if we don't know it yet.
size_t Size;
/// This is the current data we have pulled from the stream but have not /// This is the current data we have pulled from the stream but have not
/// returned to the client. This is specifically and intentionally defined to /// returned to the client. This is specifically and intentionally defined to
/// follow the word size of the host machine for efficiency. We use word_t in /// follow the word size of the host machine for efficiency. We use word_t in
@ -208,17 +211,13 @@ public:
BitStream = R; BitStream = R;
NextChar = 0; NextChar = 0;
CurWord = 0; Size = 0;
BitsInCurWord = 0; BitsInCurWord = 0;
CurCodeSize = 2; CurCodeSize = 2;
} }
void freeState(); void freeState();
bool isEndPos(size_t pos) {
return BitStream->getBitcodeBytes().isObjectEnd(static_cast<uint64_t>(pos));
}
bool canSkipToPos(size_t pos) const { bool canSkipToPos(size_t pos) const {
// pos can be skipped to if it is a valid address or one byte past the end. // pos can be skipped to if it is a valid address or one byte past the end.
return pos == 0 || BitStream->getBitcodeBytes().isValidAddress( return pos == 0 || BitStream->getBitcodeBytes().isValidAddress(
@ -226,7 +225,12 @@ public:
} }
bool AtEndOfStream() { bool AtEndOfStream() {
return BitsInCurWord == 0 && isEndPos(NextChar); if (BitsInCurWord != 0)
return false;
if (Size == NextChar)
return true;
fillCurWord();
return BitsInCurWord == 0;
} }
/// Return the number of bits used to encode an abbrev #. /// Return the number of bits used to encode an abbrev #.
@ -305,7 +309,6 @@ public:
// Move the cursor to the right word. // Move the cursor to the right word.
NextChar = ByteNo; NextChar = ByteNo;
BitsInCurWord = 0; BitsInCurWord = 0;
CurWord = 0;
// Skip over any bits that are already consumed. // Skip over any bits that are already consumed.
if (WordBitNo) { if (WordBitNo) {
@ -316,6 +319,31 @@ public:
} }
} }
void fillCurWord() {
assert(Size == 0 || NextChar < (unsigned)Size);
// Read the next word from the stream.
uint8_t Array[sizeof(word_t)] = {0};
uint64_t BytesRead =
BitStream->getBitcodeBytes().readBytes(Array, sizeof(Array), NextChar);
// If we run out of data, stop at the end of the stream.
if (BytesRead == 0) {
Size = NextChar;
return;
}
assert(BytesRead == sizeof(Array));
// Handle big-endian byte-swapping if necessary.
support::detail::packed_endian_specific_integral<
word_t, support::little, support::unaligned> EndianValue;
memcpy(&EndianValue, Array, sizeof(Array));
CurWord = EndianValue;
NextChar += sizeof(word_t);
BitsInCurWord = sizeof(word_t) * 8;
}
uint32_t Read(unsigned NumBits) { uint32_t Read(unsigned NumBits) {
assert(NumBits && NumBits <= 32 && assert(NumBits && NumBits <= 32 &&
@ -324,48 +352,32 @@ public:
// If the field is fully contained by CurWord, return it quickly. // If the field is fully contained by CurWord, return it quickly.
if (BitsInCurWord >= NumBits) { if (BitsInCurWord >= NumBits) {
uint32_t R = uint32_t(CurWord) & (~0U >> (32-NumBits)); uint32_t R = uint32_t(CurWord) & (~0U >> (32-NumBits));
CurWord >>= NumBits;
// Use a mask to avoid undefined behavior.
CurWord >>= (NumBits & 0x1f);
BitsInCurWord -= NumBits; BitsInCurWord -= NumBits;
return R; return R;
} }
uint32_t R = BitsInCurWord ? uint32_t(CurWord) : 0;
unsigned BitsLeft = NumBits - BitsInCurWord;
fillCurWord();
// If we run out of data, stop at the end of the stream. // If we run out of data, stop at the end of the stream.
if (isEndPos(NextChar)) { if (BitsLeft > BitsInCurWord)
CurWord = 0;
BitsInCurWord = 0;
return 0; return 0;
}
uint32_t R = uint32_t(CurWord); uint32_t R2 = uint32_t(CurWord) & (~0U >> (sizeof(word_t) * 8 - BitsLeft));
// Read the next word from the stream. // Use a mask to avoid undefined behavior.
uint8_t Array[sizeof(word_t)] = {0}; CurWord >>= (BitsLeft & 0x1f);
BitStream->getBitcodeBytes().readBytes(Array, sizeof(Array), NextChar); BitsInCurWord -= BitsLeft;
// Handle big-endian byte-swapping if necessary. R |= uint32_t(R2 << (NumBits - BitsLeft));
support::detail::packed_endian_specific_integral
<word_t, support::little, support::unaligned> EndianValue;
memcpy(&EndianValue, Array, sizeof(Array));
CurWord = EndianValue;
NextChar += sizeof(word_t);
// Extract NumBits-BitsInCurWord from what we just read.
unsigned BitsLeft = NumBits-BitsInCurWord;
// Be careful here, BitsLeft is in the range [1..32]/[1..64] inclusive.
R |= uint32_t((CurWord & (word_t(~0ULL) >> (sizeof(word_t)*8-BitsLeft)))
<< BitsInCurWord);
// BitsLeft bits have just been used up from CurWord. BitsLeft is in the
// range [1..32]/[1..64] so be careful how we shift.
if (BitsLeft != sizeof(word_t)*8)
CurWord >>= BitsLeft;
else
CurWord = 0;
BitsInCurWord = sizeof(word_t)*8-BitsLeft;
return R; return R;
} }
@ -426,7 +438,6 @@ private:
} }
BitsInCurWord = 0; BitsInCurWord = 0;
CurWord = 0;
} }
public: public:

View File

@ -22,9 +22,9 @@ namespace llvm {
/// to return the right result, getExtent must also wait for all the data to /// to return the right result, getExtent must also wait for all the data to
/// arrive; therefore it should not be called on objects which are actually /// arrive; therefore it should not be called on objects which are actually
/// streamed (this would defeat the purpose of streaming). Instead, /// streamed (this would defeat the purpose of streaming). Instead,
/// isValidAddress and isObjectEnd can be used to test addresses without knowing /// isValidAddress can be used to test addresses without knowing the exact size
/// the exact size of the stream. Finally, getPointer can be used instead of /// of the stream. Finally, getPointer can be used instead of readBytes to avoid
/// readBytes to avoid extra copying. /// extra copying.
class MemoryObject { class MemoryObject {
public: public:
virtual ~MemoryObject(); virtual ~MemoryObject();
@ -61,13 +61,6 @@ public:
/// @param address - address of the byte, in the same space as getBase() /// @param address - address of the byte, in the same space as getBase()
/// @result - true if the address may be read with readByte() /// @result - true if the address may be read with readByte()
virtual bool isValidAddress(uint64_t address) const = 0; virtual bool isValidAddress(uint64_t address) const = 0;
/// Returns true if the address is one past the end of the object (i.e. if it
/// is equal to base + extent). May block until (address - base) bytes have
/// been read
/// @param address - address of the byte, in the same space as getBase()
/// @result - true if the address is equal to base + extent
virtual bool isObjectEnd(uint64_t address) const = 0;
}; };
} }

View File

@ -38,7 +38,6 @@ public:
return nullptr; return nullptr;
} }
bool isValidAddress(uint64_t address) const override; bool isValidAddress(uint64_t address) const override;
bool isObjectEnd(uint64_t address) const override;
/// Drop s bytes from the front of the stream, pushing the positions of the /// Drop s bytes from the front of the stream, pushing the positions of the
/// remaining bytes down by s. This is used to skip past the bitcode header, /// remaining bytes down by s. This is used to skip past the bitcode header,

View File

@ -34,9 +34,6 @@ public:
bool isValidAddress(uint64_t address) const override { bool isValidAddress(uint64_t address) const override {
return validAddress(address); return validAddress(address);
} }
bool isObjectEnd(uint64_t address) const override {
return objectEnd(address);
}
private: private:
const uint8_t* const FirstChar; const uint8_t* const FirstChar;
@ -47,9 +44,6 @@ private:
bool validAddress(uint64_t address) const { bool validAddress(uint64_t address) const {
return static_cast<std::ptrdiff_t>(address) < LastChar - FirstChar; return static_cast<std::ptrdiff_t>(address) < LastChar - FirstChar;
} }
bool objectEnd(uint64_t address) const {
return static_cast<std::ptrdiff_t>(address) == LastChar - FirstChar;
}
RawMemoryObject(const RawMemoryObject&) LLVM_DELETED_FUNCTION; RawMemoryObject(const RawMemoryObject&) LLVM_DELETED_FUNCTION;
void operator=(const RawMemoryObject&) LLVM_DELETED_FUNCTION; void operator=(const RawMemoryObject&) LLVM_DELETED_FUNCTION;
@ -85,12 +79,6 @@ bool StreamingMemoryObject::isValidAddress(uint64_t address) const {
return fetchToPos(address); return fetchToPos(address);
} }
bool StreamingMemoryObject::isObjectEnd(uint64_t address) const {
if (ObjectSize) return address == ObjectSize;
fetchToPos(address);
return address == ObjectSize && address != 0;
}
uint64_t StreamingMemoryObject::getExtent() const { uint64_t StreamingMemoryObject::getExtent() const {
if (ObjectSize) return ObjectSize; if (ObjectSize) return ObjectSize;
size_t pos = BytesRead + kChunkSize; size_t pos = BytesRead + kChunkSize;