i dont trust Mediatek handling signed or div/rem constants

This commit is contained in:
lizzie
2026-01-30 03:19:32 +00:00
committed by crueter
parent 8d78acf8b6
commit 6a5f86f201

View File

@@ -81,12 +81,12 @@ const uint encoding_values[22] = uint[](
);
// Input ASTC texture globals
int total_bitsread = 0;
uint total_bitsread = 0;
uvec4 local_buff;
// Color data globals
uvec4 color_endpoint_data;
int color_bitsread = 0;
uint color_bitsread = 0;
// Global "vector" to be pushed into when decoding
// At most will require BLOCK_WIDTH x BLOCK_HEIGHT in single plane mode
@@ -226,45 +226,31 @@ uint Select2DPartition(uint seed, uvec2 pos, uint partition_count) {
}
}
uint ExtractBits(uvec4 payload, int offset, int bits) {
if (bits <= 0 || bits > 32) {
uint ExtractBits(uvec4 payload, uint offset, uint bits) {
if (bits == 0 || bits > 32)
return 0;
}
const int last_offset = offset + bits - 1;
const int shifted_offset = offset >> 5;
if ((last_offset >> 5) == shifted_offset) {
return bitfieldExtract(payload[shifted_offset], offset & 31, bits);
}
const int first_bits = 32 - (offset & 31);
const uint last_offset = offset + bits - 1;
const uint shifted_offset = offset >> 5;
if ((last_offset >> 5) == shifted_offset)
return bitfieldExtract(payload[shifted_offset], int(offset & 31), int(bits));
const uint first_bits = 32 - (offset & 31);
const uvec4 next = uvec4(payload.yzw, 0);
return bitfieldExtract(payload[shifted_offset], offset & 31, first_bits)
| (bitfieldExtract(next[shifted_offset], 0, bits - first_bits) << first_bits);
return bitfieldExtract(payload[shifted_offset], int(offset & 31), int(first_bits))
| (bitfieldExtract(next[shifted_offset], 0, int(bits - first_bits)) << first_bits);
}
uint StreamBits(uint num_bits) {
const int int_bits = int(num_bits);
const uint ret = ExtractBits(local_buff, total_bitsread, int_bits);
total_bitsread += int_bits;
const uint ret = ExtractBits(local_buff, total_bitsread, num_bits);
total_bitsread += num_bits;
return ret;
}
void SkipBits(uint num_bits) {
const int int_bits = int(num_bits);
total_bitsread += int_bits;
}
uint StreamColorBits(uint num_bits) {
const int int_bits = int(num_bits);
const uint ret = ExtractBits(color_endpoint_data, color_bitsread, int_bits);
color_bitsread += int_bits;
const uint ret = ExtractBits(color_endpoint_data, color_bitsread, num_bits);
color_bitsread += num_bits;
return ret;
}
EncodingData GetEncodingFromVector(uint index) {
const uint data = result_vector[index];
return EncodingData(data);
}
// Returns the number of bits required to encode n_vals values.
uint GetBitLength(uint n_vals, uint encoding_index) {
// uint Div3Floor(uint v) { return (v * 0x5556) >> 16; }
@@ -273,8 +259,9 @@ uint GetBitLength(uint n_vals, uint encoding_index) {
// uint Div5Ceil(uint v) { return Div5Floor(v + 4); }
const EncodingData encoding_value = EncodingData(encoding_values[encoding_index]);
const uint encoding = Encoding(encoding_value);
const uint num_bits = NumBits(encoding_value);
const uvec3 div_constant = uvec3(0, 0x5556, 0x3334);
return NumBits(encoding_value) * n_vals
return num_bits * n_vals
+ ((((n_vals * ((0x870 >> (encoding * 4)) & 0xf)) + ((0x420 >> (encoding * 4)) & 0xf))
* div_constant[encoding]) >> 16);
}
@@ -309,13 +296,16 @@ void DecodeQuintBlock(uint num_bits) {
if (BitsOp(qQ.w, 1, 2) == 3 && BitsOp(qQ.w, 5, 6) == 0) {
qQ.x = 4;
qQ.y = 4;
qQ.z = (BitsBracket(qQ.w, 0) << 2) | ((BitsBracket(qQ.w, 4) & ~BitsBracket(qQ.w, 0)) << 1) |
(BitsBracket(qQ.w, 3) & ~BitsBracket(qQ.w, 0));
qQ.z = (BitsBracket(qQ.w, 0) << 2)
| ((BitsBracket(qQ.w, 4) & ~BitsBracket(qQ.w, 0)) << 1)
| (BitsBracket(qQ.w, 3) & ~BitsBracket(qQ.w, 0));
} else {
uint C = 0;
if (BitsOp(qQ.w, 1, 2) == 3) {
qQ.z = 4;
C = (BitsOp(qQ.w, 3, 4) << 3) | ((~BitsOp(qQ.w, 5, 6) & 3) << 1) | BitsBracket(qQ.w, 0);
C = (BitsOp(qQ.w, 3, 4) << 3)
| ((~BitsOp(qQ.w, 5, 6) & 3) << 1)
| BitsBracket(qQ.w, 0);
} else {
qQ.z = BitsOp(qQ.w, 5, 6);
C = BitsOp(qQ.w, 0, 4);
@@ -328,39 +318,37 @@ void DecodeQuintBlock(uint num_bits) {
qQ.x = BitsOp(C, 0, 2);
}
}
for (uint i = 0; i < 3; i++) {
const EncodingData val = CreateEncodingData(QUINT, num_bits, m[i], qQ[i]);
ResultEmplaceBack(val);
}
for (uint i = 0; i < 3; i++)
ResultEmplaceBack(CreateEncodingData(QUINT, num_bits, m[i], qQ[i]));
}
void DecodeTritBlock(uint num_bits) {
uvec4 m;
uvec4 t;
uvec3 Tm5t5;
uvec3 tm5t5;
m[0] = StreamColorBits(num_bits);
Tm5t5.x = StreamColorBits(2);
tm5t5.x = StreamColorBits(2);
m[1] = StreamColorBits(num_bits);
Tm5t5.x |= StreamColorBits(2) << 2;
tm5t5.x |= StreamColorBits(2) << 2;
m[2] = StreamColorBits(num_bits);
Tm5t5.x |= StreamColorBits(1) << 4;
tm5t5.x |= StreamColorBits(1) << 4;
m[3] = StreamColorBits(num_bits);
Tm5t5.x |= StreamColorBits(2) << 5;
Tm5t5.y = StreamColorBits(num_bits);
Tm5t5.x |= StreamColorBits(1) << 7;
tm5t5.x |= StreamColorBits(2) << 5;
tm5t5.y = StreamColorBits(num_bits);
tm5t5.x |= StreamColorBits(1) << 7;
uint C = 0;
if (BitsOp(Tm5t5.x, 2, 4) == 7) {
C = (BitsOp(Tm5t5.x, 5, 7) << 2) | BitsOp(Tm5t5.x, 0, 1);
Tm5t5.z = 2;
if (BitsOp(tm5t5.x, 2, 4) == 7) {
C = (BitsOp(tm5t5.x, 5, 7) << 2) | BitsOp(tm5t5.x, 0, 1);
tm5t5.z = 2;
t[3] = 2;
} else {
C = BitsOp(Tm5t5.x, 0, 4);
if (BitsOp(Tm5t5.x, 5, 6) == 3) {
Tm5t5.z = 2;
t[3] = BitsBracket(Tm5t5.x, 7);
C = BitsOp(tm5t5.x, 0, 4);
if (BitsOp(tm5t5.x, 5, 6) == 3) {
tm5t5.z = 2;
t[3] = BitsBracket(tm5t5.x, 7);
} else {
Tm5t5.z = BitsBracket(Tm5t5.x, 7);
t[3] = BitsOp(Tm5t5.x, 5, 6);
tm5t5.z = BitsBracket(tm5t5.x, 7);
t[3] = BitsOp(tm5t5.x, 5, 6);
}
}
if (BitsOp(C, 0, 1) == 3) {
@@ -376,12 +364,9 @@ void DecodeTritBlock(uint num_bits) {
t[1] = BitsOp(C, 2, 3);
t[0] = (BitsBracket(C, 1) << 1) | (BitsBracket(C, 0) & ~BitsBracket(C, 1));
}
for (uint i = 0; i < 4; i++) {
const EncodingData val = CreateEncodingData(TRIT, num_bits, m[i], t[i]);
ResultEmplaceBack(val);
}
const EncodingData val = CreateEncodingData(TRIT, num_bits, Tm5t5.y, Tm5t5.z);
ResultEmplaceBack(val);
for (uint i = 0; i < 4; i++)
ResultEmplaceBack(CreateEncodingData(TRIT, num_bits, m[i], t[i]));
ResultEmplaceBack(CreateEncodingData(TRIT, num_bits, tm5t5.y, tm5t5.z));
}
void DecodeIntegerSequence(uint max_range, uint num_values) {
@@ -511,7 +496,7 @@ void DecodeColorValues(uvec4 modes, uint num_partitions, uint color_data_bits, o
const uint upper_bound = num_values;
DecodeIntegerSequence(range - 1, num_values);
for (int i = 0; i < upper_bound; ++i) {
color_values[i + 1] = DecodeSingleColorValue(GetEncodingFromVector(i));
color_values[i + 1] = DecodeSingleColorValue(EncodingData(result_vector[i]));
}
}
@@ -539,12 +524,12 @@ void ComputeEndpoints(out uvec4 ep1, out uvec4 ep2, uint color_endpoint_mode, ui
#define READ_UINT_VALUES(N) \
uvec4 V[2]; \
for (uint i = 0; i < N; i++) { \
V[i / 4][i % 4] = color_values[++colvals_index]; \
V[i >> 2][i & 3] = color_values[++colvals_index]; \
}
#define READ_INT_VALUES(N) \
ivec4 V[2]; \
for (uint i = 0; i < N; i++) { \
V[i / 4][i % 4] = int(color_values[++colvals_index]); \
V[i >> 2][i & 3] = int(color_values[++colvals_index]); \
}
switch (color_endpoint_mode) {
@@ -687,7 +672,7 @@ void UnquantizeTexelWeights(uvec2 size, bool is_dual_plane) {
const uint area = size.x * size.y;
const uint loop_count = min(result_index, area * num_planes);
for (uint i = 0; i < loop_count; ++i) {
result_vector[i] = UnquantizeTexelWeight(GetEncodingFromVector(i));
result_vector[i] = UnquantizeTexelWeight(EncodingData(result_vector[i]));
}
}
@@ -969,12 +954,12 @@ void DecompressBlock(ivec3 coord) {
color_endpoint_data = bitfieldReverse(color_endpoint_data).wzyx;
const uint clear_byte_start = (weight_bits >> 3) + 1;
const uint byte_insert = ExtractBits(color_endpoint_data, int(clear_byte_start - 1) * 8, 8) & uint(((1 << (weight_bits % 8)) - 1));
const uint byte_insert = ExtractBits(color_endpoint_data, (clear_byte_start - 1) * 8, 8) & uint(((1 << (weight_bits & 7)) - 1));
const uint vec_index = (clear_byte_start - 1) >> 2;
color_endpoint_data[vec_index] = bitfieldInsert(color_endpoint_data[vec_index], byte_insert, int((clear_byte_start - 1) % 4) * 8, 8);
color_endpoint_data[vec_index] = bitfieldInsert(color_endpoint_data[vec_index], byte_insert, int((clear_byte_start - 1) & 3) * 8, 8);
for (uint i = clear_byte_start; i < 16; ++i) {
const uint idx = i >> 2;
color_endpoint_data[idx] = bitfieldInsert(color_endpoint_data[idx], 0, int(i % 4) * 8, 8);
color_endpoint_data[idx] = bitfieldInsert(color_endpoint_data[idx], 0, int(i & 3) * 8, 8);
}
// Re-init vector variables for next decode phase
@@ -998,7 +983,7 @@ void DecompressBlock(ivec3 coord) {
const uvec4 C0 = ReplicateByteTo16(endpoints0[local_partition]);
const uvec4 C1 = ReplicateByteTo16(endpoints1[local_partition]);
const uvec4 weight_vec = GetUnquantizedWeightVector(j, i, size_params, plane_index, dual_plane);
const vec4 Cf = vec4((C0 * (uvec4(64) - weight_vec) + C1 * weight_vec + uvec4(32)) / 64);
const vec4 Cf = vec4((C0 * (uvec4(64) - weight_vec) + C1 * weight_vec + uvec4(32)) >> 6);
const vec4 p = (Cf / 65535.0f);
imageStore(dest_image, coord + ivec3(i, j, 0), p.gbar);
}
@@ -1030,6 +1015,6 @@ void main() {
if (any(greaterThanEqual(coord, imageSize(dest_image)))) {
return;
}
local_buff = astc_data[offset / 16];
local_buff = astc_data[offset >> 4];
DecompressBlock(coord);
}