Fix WavePrefixCountBits() being off by one.

It was counting bits up to the current lane included, whereas the
documentation says it should be excluded. This now matches dxc's behavior
as well.

Fix #2929
This commit is contained in:
Ryp 2022-04-21 22:05:17 +03:00
parent 06ac141412
commit f906b895ec
2 changed files with 5 additions and 5 deletions

View File

@ -1126,7 +1126,7 @@ local_size = (32, 16, 1)
0:54 0 (const int)
0:54 Constant:
0:54 0 (const int)
0:54 subgroupBallotInclusiveBitCount ( temp uint)
0:54 subgroupBallotExclusiveBitCount ( temp uint)
0:54 subgroupBallot ( temp 4-component vector of uint)
0:54 Compare Equal ( temp bool)
0:54 direct index ( temp uint)
@ -2289,7 +2289,7 @@ local_size = (32, 16, 1)
0:54 0 (const int)
0:54 Constant:
0:54 0 (const int)
0:54 subgroupBallotInclusiveBitCount ( temp uint)
0:54 subgroupBallotExclusiveBitCount ( temp uint)
0:54 subgroupBallot ( temp 4-component vector of uint)
0:54 Compare Equal ( temp bool)
0:54 direct index ( temp uint)
@ -2818,7 +2818,7 @@ local_size = (32, 16, 1)
390: 6(int) Load 389
392: 391(bool) IEqual 390 26
393: 13(ivec4) GroupNonUniformBallot 35 392
394: 6(int) GroupNonUniformBallotBitCount 35 InclusiveScan 393
394: 6(int) GroupNonUniformBallotBitCount 35 ExclusiveScan 393
395: 42(ptr) AccessChain 24(data) 25 386 25 26
Store 395 394
Return

View File

@ -5430,7 +5430,7 @@ void HlslParseContext::decomposeIntrinsic(const TSourceLoc& loc, TIntermTyped*&
}
case EOpWavePrefixCountBits:
{
// Mapped to subgroupBallotInclusiveBitCount(subgroupBallot())
// Mapped to subgroupBallotExclusiveBitCount(subgroupBallot())
// builtin
// uvec4 type.
@ -5444,7 +5444,7 @@ void HlslParseContext::decomposeIntrinsic(const TSourceLoc& loc, TIntermTyped*&
TType uintType(EbtUint, EvqTemporary);
node = intermediate.addBuiltInFunctionCall(loc,
EOpSubgroupBallotInclusiveBitCount, true, res, uintType);
EOpSubgroupBallotExclusiveBitCount, true, res, uintType);
break;
}