Merge pull request #9374 from MerryMage/clz

BitUtils: Add CountLeadingZeros
This commit is contained in:
LC 2020-12-27 22:18:46 -05:00 committed by GitHub
commit c163bc3187
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 54 additions and 64 deletions

View File

@ -26,20 +26,6 @@ namespace
const int kWRegSizeInBits = 32; const int kWRegSizeInBits = 32;
const int kXRegSizeInBits = 64; const int kXRegSizeInBits = 64;
// The below few functions are taken from V8.
int CountLeadingZeros(uint64_t value, int width)
{
// TODO(jbramley): Optimize this for ARM64 hosts.
int count = 0;
uint64_t bit_test = 1ULL << (width - 1);
while ((count < width) && ((bit_test & value) == 0))
{
count++;
bit_test >>= 1;
}
return count;
}
uint64_t LargestPowerOf2Divisor(uint64_t value) uint64_t LargestPowerOf2Divisor(uint64_t value)
{ {
return value & -(int64_t)value; return value & -(int64_t)value;
@ -155,8 +141,8 @@ bool IsImmLogical(uint64_t value, unsigned int width, unsigned int* n, unsigned
// Compute the repeat distance d, and set up a bitmask covering the basic // Compute the repeat distance d, and set up a bitmask covering the basic
// unit of repetition (i.e. a word with the bottom d bits set). Also, in all // unit of repetition (i.e. a word with the bottom d bits set). Also, in all
// of these cases the N bit of the output will be zero. // of these cases the N bit of the output will be zero.
clz_a = CountLeadingZeros(a, kXRegSizeInBits); clz_a = Common::CountLeadingZeros(a);
int clz_c = CountLeadingZeros(c, kXRegSizeInBits); int clz_c = Common::CountLeadingZeros(c);
d = clz_a - clz_c; d = clz_a - clz_c;
mask = ((UINT64_C(1) << d) - 1); mask = ((UINT64_C(1) << d) - 1);
out_n = 0; out_n = 0;
@ -182,7 +168,7 @@ bool IsImmLogical(uint64_t value, unsigned int width, unsigned int* n, unsigned
// of set bits in our word, meaning that we have the trivial case of // of set bits in our word, meaning that we have the trivial case of
// d == 64 and only one 'repetition'. Set up all the same variables as in // d == 64 and only one 'repetition'. Set up all the same variables as in
// the general case above, and set the N bit in the output. // the general case above, and set the N bit in the output.
clz_a = CountLeadingZeros(a, kXRegSizeInBits); clz_a = Common::CountLeadingZeros(a);
d = 64; d = 64;
mask = ~UINT64_C(0); mask = ~UINT64_C(0);
out_n = 1; out_n = 1;
@ -214,7 +200,7 @@ bool IsImmLogical(uint64_t value, unsigned int width, unsigned int* n, unsigned
0x5555555555555555UL, 0x5555555555555555UL,
}}; }};
int multiplier_idx = CountLeadingZeros(d, kXRegSizeInBits) - 57; int multiplier_idx = Common::CountLeadingZeros((u64)d) - 57;
// Ensure that the index to the multipliers array is within bounds. // Ensure that the index to the multipliers array is within bounds.
DEBUG_ASSERT((multiplier_idx >= 0) && (static_cast<size_t>(multiplier_idx) < multipliers.size())); DEBUG_ASSERT((multiplier_idx >= 0) && (static_cast<size_t>(multiplier_idx) < multipliers.size()));
@ -233,7 +219,7 @@ bool IsImmLogical(uint64_t value, unsigned int width, unsigned int* n, unsigned
// Count the set bits in our basic stretch. The special case of clz(0) == -1 // Count the set bits in our basic stretch. The special case of clz(0) == -1
// makes the answer come out right for stretches that reach the very top of // makes the answer come out right for stretches that reach the very top of
// the word (e.g. numbers like 0xffffc00000000000). // the word (e.g. numbers like 0xffffc00000000000).
int clz_b = (b == 0) ? -1 : CountLeadingZeros(b, kXRegSizeInBits); int clz_b = (b == 0) ? -1 : Common::CountLeadingZeros(b);
int s = clz_a - clz_b; int s = clz_a - clz_b;
// Decide how many bits to rotate right by, to put the low bit of that basic // Decide how many bits to rotate right by, to put the low bit of that basic

View File

@ -11,6 +11,10 @@
#include <initializer_list> #include <initializer_list>
#include <type_traits> #include <type_traits>
#ifdef _MSC_VER
#include <intrin.h>
#endif
namespace Common namespace Common
{ {
/// ///
@ -357,4 +361,44 @@ T ExpandValue(T value, size_t left_shift_amount)
(T(-ExtractBit<0>(value)) >> (BitSize<T>() - left_shift_amount)); (T(-ExtractBit<0>(value)) >> (BitSize<T>() - left_shift_amount));
} }
constexpr int CountLeadingZeros(uint64_t value)
{
#if defined(__GNUC__)
return __builtin_clzll(value);
#elif defined(_MSC_VER) && defined(_M_ARM_64)
return _CountLeadingZeros64(value);
#elif defined(_MSC_VER) && defined(_M_X86_64)
unsigned long index;
return _BitScanReverse64(&index, value) ? 63 - index : 64;
#else
int result = 64;
while (value)
{
result--;
value >>= 1;
}
return result;
#endif
}
constexpr int CountLeadingZeros(uint32_t value)
{
#if defined(__GNUC__)
return __builtin_clz(value);
#elif defined(_MSC_VER) && defined(_M_ARM_64)
return _CountLeadingZeros(value);
#elif defined(_MSC_VER) && defined(_M_X86_64)
unsigned long index;
return _BitScanReverse(&index, value) ? 31 - index : 32;
#else
int result = 32;
while (value)
{
result--;
value >>= 1;
}
return result;
#endif
}
} // namespace Common } // namespace Common

View File

@ -9,12 +9,9 @@
#include <type_traits> #include <type_traits>
#include <vector> #include <vector>
#include "Common/BitUtils.h"
#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
#ifdef _MSC_VER
#include <intrin.h>
#endif
namespace MathUtil namespace MathUtil
{ {
constexpr double TAU = 6.2831853071795865; constexpr double TAU = 6.2831853071795865;
@ -154,21 +151,5 @@ float MathFloatVectorSum(const std::vector<float>&);
// Rounds down. 0 -> undefined // Rounds down. 0 -> undefined
inline int IntLog2(u64 val) inline int IntLog2(u64 val)
{ {
#if defined(__GNUC__) return 63 - Common::CountLeadingZeros(val);
return 63 - __builtin_clzll(val);
#elif defined(_MSC_VER)
unsigned long result = ULONG_MAX;
_BitScanReverse64(&result, val);
return result;
#else
int result = -1;
while (val != 0)
{
val >>= 1;
++result;
}
return result;
#endif
} }

View File

@ -236,17 +236,7 @@ void Interpreter::cmpl(UGeckoInstruction inst)
void Interpreter::cntlzwx(UGeckoInstruction inst) void Interpreter::cntlzwx(UGeckoInstruction inst)
{ {
u32 val = rGPR[inst.RS]; rGPR[inst.RA] = Common::CountLeadingZeros(rGPR[inst.RS]);
u32 mask = 0x80000000;
int i = 0;
for (; i < 32; i++, mask >>= 1)
{
if (val & mask)
break;
}
rGPR[inst.RA] = i;
if (inst.Rc) if (inst.Rc)
Helper_UpdateCR0(rGPR[inst.RA]); Helper_UpdateCR0(rGPR[inst.RA]);

View File

@ -1956,14 +1956,7 @@ void Jit64::cntlzwx(UGeckoInstruction inst)
if (gpr.IsImm(s)) if (gpr.IsImm(s))
{ {
u32 mask = 0x80000000; gpr.SetImmediate32(a, Common::CountLeadingZeros(gpr.Imm32(s)));
u32 i = 0;
for (; i < 32; i++, mask >>= 1)
{
if (gpr.Imm32(s) & mask)
break;
}
gpr.SetImmediate32(a, i);
} }
else else
{ {

View File

@ -368,11 +368,7 @@ void JitArm64::cntlzwx(UGeckoInstruction inst)
if (gpr.IsImm(s)) if (gpr.IsImm(s))
{ {
#ifdef _MSC_VER gpr.SetImmediate(a, Common::CountLeadingZeros(gpr.GetImm(s)));
gpr.SetImmediate(a, _CountLeadingZeros(gpr.GetImm(s)));
#else
gpr.SetImmediate(a, __builtin_clz(gpr.GetImm(s)));
#endif
if (inst.Rc) if (inst.Rc)
ComputeRC0(gpr.GetImm(a)); ComputeRC0(gpr.GetImm(a));
} }