[astc] Decode directly into color_values_direct[] (no intermediate result_vector storage)

This commit is contained in:
wildcard
2026-01-31 09:17:30 +01:00
committed by crueter
parent 8ed0ed5828
commit 1bc06c82e9

View File

@@ -83,6 +83,12 @@ int result_index = 0;
uint result_vector_max_index;
bool result_limit_reached = false;
// avoid intermediate result_vector storage during color decode phase
bool write_color_values = false;
uint color_values_direct[32];
uint color_out_index = 0;
uint color_num_values = 0;
// EncodingData helpers
uint Encoding(EncodingData val) {
return bitfieldExtract(val.data, 0, 8);
@@ -114,9 +120,110 @@ EncodingData CreateEncodingData(uint encoding, uint num_bits, uint bit_val, uint
return EncodingData(((encoding) << 0u) | ((num_bits) << 8u) |
((bit_val) << 16u) | ((quint_trit_val) << 24u));
}
uint ReplicateBitTo9(uint bit);
uint FastReplicateTo8(uint value, uint num_bits);
void EmitColorValue(EncodingData val) {
// write directly to color_values_direct[]
const uint encoding = Encoding(val);
const uint bitlen = NumBits(val);
const uint bitval = BitValue(val);
if (encoding == JUST_BITS) {
color_values_direct[++color_out_index] = FastReplicateTo8(bitval, bitlen);
return;
}
uint A = ReplicateBitTo9((bitval & 1));
uint B = 0, C = 0, D = QuintTritValue(val);
if (encoding == TRIT) {
switch (bitlen) {
case 1:
C = 204;
break;
case 2: {
C = 93;
const uint b = (bitval >> 1) & 1;
B = (b << 8) | (b << 4) | (b << 2) | (b << 1);
break;
}
case 3: {
C = 44;
const uint cb = (bitval >> 1) & 3;
B = (cb << 7) | (cb << 2) | cb;
break;
}
case 4: {
C = 22;
const uint dcb = (bitval >> 1) & 7;
B = (dcb << 6) | dcb;
break;
}
case 5: {
C = 11;
const uint edcb = (bitval >> 1) & 0xF;
B = (edcb << 5) | (edcb >> 2);
break;
}
case 6: {
C = 5;
const uint fedcb = (bitval >> 1) & 0x1F;
B = (fedcb << 4) | (fedcb >> 4);
break;
}
}
} else { // QUINT
switch (bitlen) {
case 1:
C = 113;
break;
case 2: {
C = 54;
const uint b = (bitval >> 1) & 1;
B = (b << 8) | (b << 3) | (b << 2);
break;
}
case 3: {
C = 26;
const uint cb = (bitval >> 1) & 3;
B = (cb << 7) | (cb << 1) | (cb >> 1);
break;
}
case 4: {
C = 13;
const uint dcb = (bitval >> 1) & 7;
B = (dcb << 6) | (dcb >> 1);
break;
}
case 5: {
C = 6;
const uint edcb = (bitval >> 1) & 0xF;
B = (edcb << 5) | (edcb >> 3);
break;
}
}
}
uint T = (D * C) + B;
T ^= A;
T = (A & 0x80) | (T >> 2);
color_values_direct[++color_out_index] = T;
}
void ResultEmplaceBack(EncodingData val) {
if (write_color_values) {
if (color_out_index >= color_num_values) {
// avoid decoding more than needed by this phase
result_limit_reached = true;
return;
}
EmitColorValue(val);
return;
}
if (result_index >= result_vector_max_index) {
// Alert callers to avoid decoding more than needed by this phase
result_limit_reached = true;
@@ -457,7 +564,7 @@ void DecodeIntegerSequence(uint max_range, uint num_values) {
}
}
void DecodeColorValues(uvec4 modes, uint num_partitions, uint color_data_bits, out uint color_values[32]) {
void DecodeColorValues(uvec4 modes, uint num_partitions, uint color_data_bits) {
uint num_values = 0;
for (uint i = 0; i < num_partitions; i++) {
num_values += ((modes[i] >> 2) + 1) << 1;
@@ -471,104 +578,21 @@ void DecodeColorValues(uvec4 modes, uint num_partitions, uint color_data_bits, o
break;
}
}
DecodeIntegerSequence(range - 1, num_values);
uint out_index = 0;
for (int itr = 0; itr < result_index; ++itr) {
if (out_index >= num_values) {
break;
}
const EncodingData val = GetEncodingFromVector(itr);
const uint encoding = Encoding(val);
const uint bitlen = NumBits(val);
const uint bitval = BitValue(val);
uint A = 0, B = 0, C = 0, D = 0;
A = ReplicateBitTo9((bitval & 1));
switch (encoding) {
case JUST_BITS:
color_values[++out_index] = FastReplicateTo8(bitval, bitlen);
break;
case TRIT: {
D = QuintTritValue(val);
switch (bitlen) {
case 1:
C = 204;
break;
case 2: {
C = 93;
const uint b = (bitval >> 1) & 1;
B = (b << 8) | (b << 4) | (b << 2) | (b << 1);
break;
}
case 3: {
C = 44;
const uint cb = (bitval >> 1) & 3;
B = (cb << 7) | (cb << 2) | cb;
break;
}
case 4: {
C = 22;
const uint dcb = (bitval >> 1) & 7;
B = (dcb << 6) | dcb;
break;
}
case 5: {
C = 11;
const uint edcb = (bitval >> 1) & 0xF;
B = (edcb << 5) | (edcb >> 2);
break;
}
case 6: {
C = 5;
const uint fedcb = (bitval >> 1) & 0x1F;
B = (fedcb << 4) | (fedcb >> 4);
break;
}
}
break;
}
case QUINT: {
D = QuintTritValue(val);
switch (bitlen) {
case 1:
C = 113;
break;
case 2: {
C = 54;
const uint b = (bitval >> 1) & 1;
B = (b << 8) | (b << 3) | (b << 2);
break;
}
case 3: {
C = 26;
const uint cb = (bitval >> 1) & 3;
B = (cb << 7) | (cb << 1) | (cb >> 1);
break;
}
case 4: {
C = 13;
const uint dcb = (bitval >> 1) & 7;
B = (dcb << 6) | (dcb >> 1);
break;
}
case 5: {
C = 6;
const uint edcb = (bitval >> 1) & 0xF;
B = (edcb << 5) | (edcb >> 3);
break;
}
}
break;
}
}
if (encoding != JUST_BITS) {
uint T = (D * C) + B;
T ^= A;
T = (A & 0x80) | (T >> 2);
color_values[++out_index] = T;
}
// Decode directly into color_values_direct[]
write_color_values = true;
color_out_index = 0;
color_num_values = num_values;
for (uint i = 0; i < 32; ++i) {
color_values_direct[i] = 0;
}
DecodeIntegerSequence(range - 1, num_values);
write_color_values = false;
}
ivec2 BitTransferSigned(int a, int b) {
ivec2 transferred;
transferred.y = b >> 1;
@@ -1069,13 +1093,12 @@ void DecompressBlock(ivec3 coord) {
uvec4 endpoints0[4];
uvec4 endpoints1[4];
{
// This decode phase should at most push 32 elements into the vector
result_vector_max_index = 32;
uint color_values[32];
// Decode directly into color_values_direct[] (no intermediate result_vector storage)
result_limit_reached = false;
uint colvals_index = 0;
DecodeColorValues(color_endpoint_mode, num_partitions, color_data_bits, color_values);
DecodeColorValues(color_endpoint_mode, num_partitions, color_data_bits);
for (uint i = 0; i < num_partitions; i++) {
ComputeEndpoints(endpoints0[i], endpoints1[i], color_endpoint_mode[i], color_values,
ComputeEndpoints(endpoints0[i], endpoints1[i], color_endpoint_mode[i], color_values_direct,
colvals_index);
}
}