diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp index 19eed36d8c..aba908585d 100644 --- a/src/video_core/host_shaders/astc_decoder.comp +++ b/src/video_core/host_shaders/astc_decoder.comp @@ -81,12 +81,12 @@ const uint encoding_values[22] = uint[]( ); // Input ASTC texture globals -int total_bitsread = 0; +uint total_bitsread = 0; uvec4 local_buff; // Color data globals uvec4 color_endpoint_data; -int color_bitsread = 0; +uint color_bitsread = 0; // Global "vector" to be pushed into when decoding // At most will require BLOCK_WIDTH x BLOCK_HEIGHT in single plane mode @@ -226,45 +226,31 @@ uint Select2DPartition(uint seed, uvec2 pos, uint partition_count) { } } -uint ExtractBits(uvec4 payload, int offset, int bits) { - if (bits <= 0 || bits > 32) { +uint ExtractBits(uvec4 payload, uint offset, uint bits) { + if (bits == 0 || bits > 32) return 0; - } - const int last_offset = offset + bits - 1; - const int shifted_offset = offset >> 5; - if ((last_offset >> 5) == shifted_offset) { - return bitfieldExtract(payload[shifted_offset], offset & 31, bits); - } - const int first_bits = 32 - (offset & 31); + const uint last_offset = offset + bits - 1; + const uint shifted_offset = offset >> 5; + if ((last_offset >> 5) == shifted_offset) + return bitfieldExtract(payload[shifted_offset], int(offset & 31), int(bits)); + const uint first_bits = 32 - (offset & 31); const uvec4 next = uvec4(payload.yzw, 0); - return bitfieldExtract(payload[shifted_offset], offset & 31, first_bits) - | (bitfieldExtract(next[shifted_offset], 0, bits - first_bits) << first_bits); + return bitfieldExtract(payload[shifted_offset], int(offset & 31), int(first_bits)) + | (bitfieldExtract(next[shifted_offset], 0, int(bits - first_bits)) << first_bits); } uint StreamBits(uint num_bits) { - const int int_bits = int(num_bits); - const uint ret = ExtractBits(local_buff, total_bitsread, int_bits); - total_bitsread += int_bits; + const uint ret = ExtractBits(local_buff, total_bitsread, num_bits); + total_bitsread += num_bits; return ret; } -void SkipBits(uint num_bits) { - const int int_bits = int(num_bits); - total_bitsread += int_bits; -} - uint StreamColorBits(uint num_bits) { - const int int_bits = int(num_bits); - const uint ret = ExtractBits(color_endpoint_data, color_bitsread, int_bits); - color_bitsread += int_bits; + const uint ret = ExtractBits(color_endpoint_data, color_bitsread, num_bits); + color_bitsread += num_bits; return ret; } -EncodingData GetEncodingFromVector(uint index) { - const uint data = result_vector[index]; - return EncodingData(data); -} - // Returns the number of bits required to encode n_vals values. uint GetBitLength(uint n_vals, uint encoding_index) { // uint Div3Floor(uint v) { return (v * 0x5556) >> 16; } @@ -273,8 +259,9 @@ uint GetBitLength(uint n_vals, uint encoding_index) { // uint Div5Ceil(uint v) { return Div5Floor(v + 4); } const EncodingData encoding_value = EncodingData(encoding_values[encoding_index]); const uint encoding = Encoding(encoding_value); + const uint num_bits = NumBits(encoding_value); const uvec3 div_constant = uvec3(0, 0x5556, 0x3334); - return NumBits(encoding_value) * n_vals + return num_bits * n_vals + ((((n_vals * ((0x870 >> (encoding * 4)) & 0xf)) + ((0x420 >> (encoding * 4)) & 0xf)) * div_constant[encoding]) >> 16); } @@ -309,13 +296,16 @@ void DecodeQuintBlock(uint num_bits) { if (BitsOp(qQ.w, 1, 2) == 3 && BitsOp(qQ.w, 5, 6) == 0) { qQ.x = 4; qQ.y = 4; - qQ.z = (BitsBracket(qQ.w, 0) << 2) | ((BitsBracket(qQ.w, 4) & ~BitsBracket(qQ.w, 0)) << 1) | - (BitsBracket(qQ.w, 3) & ~BitsBracket(qQ.w, 0)); + qQ.z = (BitsBracket(qQ.w, 0) << 2) + | ((BitsBracket(qQ.w, 4) & ~BitsBracket(qQ.w, 0)) << 1) + | (BitsBracket(qQ.w, 3) & ~BitsBracket(qQ.w, 0)); } else { uint C = 0; if (BitsOp(qQ.w, 1, 2) == 3) { qQ.z = 4; - C = (BitsOp(qQ.w, 3, 4) << 3) | ((~BitsOp(qQ.w, 5, 6) & 3) << 1) | BitsBracket(qQ.w, 0); + C = (BitsOp(qQ.w, 3, 4) << 3) + | ((~BitsOp(qQ.w, 5, 6) & 3) << 1) + | BitsBracket(qQ.w, 0); } else { qQ.z = BitsOp(qQ.w, 5, 6); C = BitsOp(qQ.w, 0, 4); @@ -328,39 +318,37 @@ void DecodeQuintBlock(uint num_bits) { qQ.x = BitsOp(C, 0, 2); } } - for (uint i = 0; i < 3; i++) { - const EncodingData val = CreateEncodingData(QUINT, num_bits, m[i], qQ[i]); - ResultEmplaceBack(val); - } + for (uint i = 0; i < 3; i++) + ResultEmplaceBack(CreateEncodingData(QUINT, num_bits, m[i], qQ[i])); } void DecodeTritBlock(uint num_bits) { uvec4 m; uvec4 t; - uvec3 Tm5t5; + uvec3 tm5t5; m[0] = StreamColorBits(num_bits); - Tm5t5.x = StreamColorBits(2); + tm5t5.x = StreamColorBits(2); m[1] = StreamColorBits(num_bits); - Tm5t5.x |= StreamColorBits(2) << 2; + tm5t5.x |= StreamColorBits(2) << 2; m[2] = StreamColorBits(num_bits); - Tm5t5.x |= StreamColorBits(1) << 4; + tm5t5.x |= StreamColorBits(1) << 4; m[3] = StreamColorBits(num_bits); - Tm5t5.x |= StreamColorBits(2) << 5; - Tm5t5.y = StreamColorBits(num_bits); - Tm5t5.x |= StreamColorBits(1) << 7; + tm5t5.x |= StreamColorBits(2) << 5; + tm5t5.y = StreamColorBits(num_bits); + tm5t5.x |= StreamColorBits(1) << 7; uint C = 0; - if (BitsOp(Tm5t5.x, 2, 4) == 7) { - C = (BitsOp(Tm5t5.x, 5, 7) << 2) | BitsOp(Tm5t5.x, 0, 1); - Tm5t5.z = 2; + if (BitsOp(tm5t5.x, 2, 4) == 7) { + C = (BitsOp(tm5t5.x, 5, 7) << 2) | BitsOp(tm5t5.x, 0, 1); + tm5t5.z = 2; t[3] = 2; } else { - C = BitsOp(Tm5t5.x, 0, 4); - if (BitsOp(Tm5t5.x, 5, 6) == 3) { - Tm5t5.z = 2; - t[3] = BitsBracket(Tm5t5.x, 7); + C = BitsOp(tm5t5.x, 0, 4); + if (BitsOp(tm5t5.x, 5, 6) == 3) { + tm5t5.z = 2; + t[3] = BitsBracket(tm5t5.x, 7); } else { - Tm5t5.z = BitsBracket(Tm5t5.x, 7); - t[3] = BitsOp(Tm5t5.x, 5, 6); + tm5t5.z = BitsBracket(tm5t5.x, 7); + t[3] = BitsOp(tm5t5.x, 5, 6); } } if (BitsOp(C, 0, 1) == 3) { @@ -376,12 +364,9 @@ void DecodeTritBlock(uint num_bits) { t[1] = BitsOp(C, 2, 3); t[0] = (BitsBracket(C, 1) << 1) | (BitsBracket(C, 0) & ~BitsBracket(C, 1)); } - for (uint i = 0; i < 4; i++) { - const EncodingData val = CreateEncodingData(TRIT, num_bits, m[i], t[i]); - ResultEmplaceBack(val); - } - const EncodingData val = CreateEncodingData(TRIT, num_bits, Tm5t5.y, Tm5t5.z); - ResultEmplaceBack(val); + for (uint i = 0; i < 4; i++) + ResultEmplaceBack(CreateEncodingData(TRIT, num_bits, m[i], t[i])); + ResultEmplaceBack(CreateEncodingData(TRIT, num_bits, tm5t5.y, tm5t5.z)); } void DecodeIntegerSequence(uint max_range, uint num_values) { @@ -511,7 +496,7 @@ void DecodeColorValues(uvec4 modes, uint num_partitions, uint color_data_bits, o const uint upper_bound = num_values; DecodeIntegerSequence(range - 1, num_values); for (int i = 0; i < upper_bound; ++i) { - color_values[i + 1] = DecodeSingleColorValue(GetEncodingFromVector(i)); + color_values[i + 1] = DecodeSingleColorValue(EncodingData(result_vector[i])); } } @@ -539,12 +524,12 @@ void ComputeEndpoints(out uvec4 ep1, out uvec4 ep2, uint color_endpoint_mode, ui #define READ_UINT_VALUES(N) \ uvec4 V[2]; \ for (uint i = 0; i < N; i++) { \ - V[i / 4][i % 4] = color_values[++colvals_index]; \ + V[i >> 2][i & 3] = color_values[++colvals_index]; \ } #define READ_INT_VALUES(N) \ ivec4 V[2]; \ for (uint i = 0; i < N; i++) { \ - V[i / 4][i % 4] = int(color_values[++colvals_index]); \ + V[i >> 2][i & 3] = int(color_values[++colvals_index]); \ } switch (color_endpoint_mode) { @@ -687,7 +672,7 @@ void UnquantizeTexelWeights(uvec2 size, bool is_dual_plane) { const uint area = size.x * size.y; const uint loop_count = min(result_index, area * num_planes); for (uint i = 0; i < loop_count; ++i) { - result_vector[i] = UnquantizeTexelWeight(GetEncodingFromVector(i)); + result_vector[i] = UnquantizeTexelWeight(EncodingData(result_vector[i])); } } @@ -969,12 +954,12 @@ void DecompressBlock(ivec3 coord) { color_endpoint_data = bitfieldReverse(color_endpoint_data).wzyx; const uint clear_byte_start = (weight_bits >> 3) + 1; - const uint byte_insert = ExtractBits(color_endpoint_data, int(clear_byte_start - 1) * 8, 8) & uint(((1 << (weight_bits % 8)) - 1)); + const uint byte_insert = ExtractBits(color_endpoint_data, (clear_byte_start - 1) * 8, 8) & uint(((1 << (weight_bits & 7)) - 1)); const uint vec_index = (clear_byte_start - 1) >> 2; - color_endpoint_data[vec_index] = bitfieldInsert(color_endpoint_data[vec_index], byte_insert, int((clear_byte_start - 1) % 4) * 8, 8); + color_endpoint_data[vec_index] = bitfieldInsert(color_endpoint_data[vec_index], byte_insert, int((clear_byte_start - 1) & 3) * 8, 8); for (uint i = clear_byte_start; i < 16; ++i) { const uint idx = i >> 2; - color_endpoint_data[idx] = bitfieldInsert(color_endpoint_data[idx], 0, int(i % 4) * 8, 8); + color_endpoint_data[idx] = bitfieldInsert(color_endpoint_data[idx], 0, int(i & 3) * 8, 8); } // Re-init vector variables for next decode phase @@ -998,7 +983,7 @@ void DecompressBlock(ivec3 coord) { const uvec4 C0 = ReplicateByteTo16(endpoints0[local_partition]); const uvec4 C1 = ReplicateByteTo16(endpoints1[local_partition]); const uvec4 weight_vec = GetUnquantizedWeightVector(j, i, size_params, plane_index, dual_plane); - const vec4 Cf = vec4((C0 * (uvec4(64) - weight_vec) + C1 * weight_vec + uvec4(32)) / 64); + const vec4 Cf = vec4((C0 * (uvec4(64) - weight_vec) + C1 * weight_vec + uvec4(32)) >> 6); const vec4 p = (Cf / 65535.0f); imageStore(dest_image, coord + ivec3(i, j, 0), p.gbar); } @@ -1030,6 +1015,6 @@ void main() { if (any(greaterThanEqual(coord, imageSize(dest_image)))) { return; } - local_buff = astc_data[offset / 16]; + local_buff = astc_data[offset >> 4]; DecompressBlock(coord); }