WIP: HLSL: add Append/ConsumeBuffer support

This commit is contained in:
steve-lunarg 2017-04-13 18:42:58 -06:00
parent 8e26feb8f2
commit 12bc9aa9ce
6 changed files with 361 additions and 63 deletions

View File

@ -0,0 +1,223 @@
hlsl.structbuffer.append.frag
Shader version: 500
gl_FragCoord origin is upper left
0:? Sequence
0:7 Function Definition: @main(u1; ( temp 4-component vector of float)
0:7 Function Parameters:
0:7 'pos' ( in uint)
0:? Sequence
0:8 move second child to first child ( temp void)
0:8 indirect index (layout( row_major std430) buffer 4-component vector of float)
0:8 @data: direct index for structure (layout( row_major std430) buffer implicitly-sized array of 4-component vector of float)
0:8 'sbuf_a' (layout( row_major std430) buffer block{layout( row_major std430) buffer implicitly-sized array of 4-component vector of float @data})
0:8 Constant:
0:8 0 (const uint)
0:8 AtomicAdd ( temp uint)
0:8 @count: direct index for structure ( temp int)
0:8 'sbuf_a@count' (layout( row_major std430) buffer block{layout( row_major std430) buffer int @count})
0:8 Constant:
0:8 0 (const int)
0:8 Constant:
0:8 1 (const int)
0:? Constant:
0:? 1.000000
0:? 2.000000
0:? 3.000000
0:? 4.000000
0:10 Branch: Return with expression
0:10 indirect index (layout( row_major std430) buffer 4-component vector of float)
0:10 @data: direct index for structure (layout( row_major std430) buffer implicitly-sized array of 4-component vector of float)
0:10 'sbuf_c' (layout( row_major std430) buffer block{layout( row_major std430) buffer implicitly-sized array of 4-component vector of float @data})
0:10 Constant:
0:10 0 (const uint)
0:10 add ( temp uint)
0:10 AtomicAdd ( temp uint)
0:10 @count: direct index for structure ( temp int)
0:10 'sbuf_c@count' (layout( row_major std430) buffer block{layout( row_major std430) buffer int @count})
0:10 Constant:
0:10 0 (const int)
0:10 Constant:
0:10 -1 (const int)
0:10 Constant:
0:10 -1 (const int)
0:7 Function Definition: main( ( temp void)
0:7 Function Parameters:
0:? Sequence
0:7 move second child to first child ( temp uint)
0:? 'pos' ( temp uint)
0:? 'pos' (layout( location=0) in uint)
0:7 move second child to first child ( temp 4-component vector of float)
0:? '@entryPointOutput' (layout( location=0) out 4-component vector of float)
0:7 Function Call: @main(u1; ( temp 4-component vector of float)
0:? 'pos' ( temp uint)
0:? Linker Objects
0:? 'sbuf_a' (layout( row_major std430) buffer block{layout( row_major std430) buffer implicitly-sized array of 4-component vector of float @data})
0:? 'sbuf_a@count' (layout( row_major std430) buffer block{layout( row_major std430) buffer int @count})
0:? 'sbuf_c' (layout( row_major std430) buffer block{layout( row_major std430) buffer implicitly-sized array of 4-component vector of float @data})
0:? 'sbuf_c@count' (layout( row_major std430) buffer block{layout( row_major std430) buffer int @count})
0:? 'sbuf_unused' (layout( row_major std430) buffer block{layout( row_major std430) buffer implicitly-sized array of 4-component vector of float @data})
0:? '@entryPointOutput' (layout( location=0) out 4-component vector of float)
0:? 'pos' (layout( location=0) in uint)
Linked fragment stage:
Shader version: 500
gl_FragCoord origin is upper left
0:? Sequence
0:7 Function Definition: @main(u1; ( temp 4-component vector of float)
0:7 Function Parameters:
0:7 'pos' ( in uint)
0:? Sequence
0:8 move second child to first child ( temp void)
0:8 indirect index (layout( row_major std430) buffer 4-component vector of float)
0:8 @data: direct index for structure (layout( row_major std430) buffer implicitly-sized array of 4-component vector of float)
0:8 'sbuf_a' (layout( row_major std430) buffer block{layout( row_major std430) buffer implicitly-sized array of 4-component vector of float @data})
0:8 Constant:
0:8 0 (const uint)
0:8 AtomicAdd ( temp uint)
0:8 @count: direct index for structure ( temp int)
0:8 'sbuf_a@count' (layout( row_major std430) buffer block{layout( row_major std430) buffer int @count})
0:8 Constant:
0:8 0 (const int)
0:8 Constant:
0:8 1 (const int)
0:? Constant:
0:? 1.000000
0:? 2.000000
0:? 3.000000
0:? 4.000000
0:10 Branch: Return with expression
0:10 indirect index (layout( row_major std430) buffer 4-component vector of float)
0:10 @data: direct index for structure (layout( row_major std430) buffer implicitly-sized array of 4-component vector of float)
0:10 'sbuf_c' (layout( row_major std430) buffer block{layout( row_major std430) buffer implicitly-sized array of 4-component vector of float @data})
0:10 Constant:
0:10 0 (const uint)
0:10 add ( temp uint)
0:10 AtomicAdd ( temp uint)
0:10 @count: direct index for structure ( temp int)
0:10 'sbuf_c@count' (layout( row_major std430) buffer block{layout( row_major std430) buffer int @count})
0:10 Constant:
0:10 0 (const int)
0:10 Constant:
0:10 -1 (const int)
0:10 Constant:
0:10 -1 (const int)
0:7 Function Definition: main( ( temp void)
0:7 Function Parameters:
0:? Sequence
0:7 move second child to first child ( temp uint)
0:? 'pos' ( temp uint)
0:? 'pos' (layout( location=0) in uint)
0:7 move second child to first child ( temp 4-component vector of float)
0:? '@entryPointOutput' (layout( location=0) out 4-component vector of float)
0:7 Function Call: @main(u1; ( temp 4-component vector of float)
0:? 'pos' ( temp uint)
0:? Linker Objects
0:? 'sbuf_a' (layout( row_major std430) buffer block{layout( row_major std430) buffer implicitly-sized array of 4-component vector of float @data})
0:? 'sbuf_a@count' (layout( row_major std430) buffer block{layout( row_major std430) buffer int @count})
0:? 'sbuf_c' (layout( row_major std430) buffer block{layout( row_major std430) buffer implicitly-sized array of 4-component vector of float @data})
0:? 'sbuf_c@count' (layout( row_major std430) buffer block{layout( row_major std430) buffer int @count})
0:? 'sbuf_unused' (layout( row_major std430) buffer block{layout( row_major std430) buffer implicitly-sized array of 4-component vector of float @data})
0:? '@entryPointOutput' (layout( location=0) out 4-component vector of float)
0:? 'pos' (layout( location=0) in uint)
// Module Version 10000
// Generated by (magic number): 80001
// Id's are bound by 56
Capability Shader
1: ExtInstImport "GLSL.std.450"
MemoryModel Logical GLSL450
EntryPoint Fragment 4 "main" 48 51
ExecutionMode 4 OriginUpperLeft
Source HLSL 500
Name 4 "main"
Name 12 "@main(u1;"
Name 11 "pos"
Name 15 "sbuf_a"
MemberName 15(sbuf_a) 0 "@data"
Name 17 "sbuf_a"
Name 20 "sbuf_a@count"
MemberName 20(sbuf_a@count) 0 "@count"
Name 22 "sbuf_a@count"
Name 36 "sbuf_c"
Name 37 "sbuf_c@count"
Name 46 "pos"
Name 48 "pos"
Name 51 "@entryPointOutput"
Name 52 "param"
Name 55 "sbuf_unused"
Decorate 14 ArrayStride 16
MemberDecorate 15(sbuf_a) 0 Offset 0
Decorate 15(sbuf_a) BufferBlock
Decorate 17(sbuf_a) DescriptorSet 0
MemberDecorate 20(sbuf_a@count) 0 Offset 0
Decorate 20(sbuf_a@count) BufferBlock
Decorate 22(sbuf_a@count) DescriptorSet 0
Decorate 36(sbuf_c) DescriptorSet 0
Decorate 37(sbuf_c@count) DescriptorSet 0
Decorate 48(pos) Location 0
Decorate 51(@entryPointOutput) Location 0
Decorate 55(sbuf_unused) DescriptorSet 0
2: TypeVoid
3: TypeFunction 2
6: TypeInt 32 0
7: TypePointer Function 6(int)
8: TypeFloat 32
9: TypeVector 8(float) 4
10: TypeFunction 9(fvec4) 7(ptr)
14: TypeRuntimeArray 9(fvec4)
15(sbuf_a): TypeStruct 14
16: TypePointer Uniform 15(sbuf_a)
17(sbuf_a): 16(ptr) Variable Uniform
18: TypeInt 32 1
19: 18(int) Constant 0
20(sbuf_a@count): TypeStruct 18(int)
21: TypePointer Uniform 20(sbuf_a@count)
22(sbuf_a@count): 21(ptr) Variable Uniform
23: TypePointer Uniform 18(int)
25: 18(int) Constant 1
26: 6(int) Constant 1
27: 6(int) Constant 0
29: 8(float) Constant 1065353216
30: 8(float) Constant 1073741824
31: 8(float) Constant 1077936128
32: 8(float) Constant 1082130432
33: 9(fvec4) ConstantComposite 29 30 31 32
34: TypePointer Uniform 9(fvec4)
36(sbuf_c): 16(ptr) Variable Uniform
37(sbuf_c@count): 21(ptr) Variable Uniform
39: 18(int) Constant 4294967295
47: TypePointer Input 6(int)
48(pos): 47(ptr) Variable Input
50: TypePointer Output 9(fvec4)
51(@entryPointOutput): 50(ptr) Variable Output
55(sbuf_unused): 16(ptr) Variable Uniform
4(main): 2 Function None 3
5: Label
46(pos): 7(ptr) Variable Function
52(param): 7(ptr) Variable Function
49: 6(int) Load 48(pos)
Store 46(pos) 49
53: 6(int) Load 46(pos)
Store 52(param) 53
54: 9(fvec4) FunctionCall 12(@main(u1;) 52(param)
Store 51(@entryPointOutput) 54
Return
FunctionEnd
12(@main(u1;): 9(fvec4) Function None 10
11(pos): 7(ptr) FunctionParameter
13: Label
24: 23(ptr) AccessChain 22(sbuf_a@count) 19
28: 6(int) AtomicIAdd 24 26 27 25
35: 34(ptr) AccessChain 17(sbuf_a) 19 28
Store 35 33
38: 23(ptr) AccessChain 37(sbuf_c@count) 19
40: 6(int) AtomicIAdd 38 26 27 39
41: 6(int) IAdd 40 39
42: 34(ptr) AccessChain 36(sbuf_c) 19 41
43: 9(fvec4) Load 42
ReturnValue 43
FunctionEnd

View File

@ -54,11 +54,14 @@ gl_FragCoord origin is upper left
0:16 Sequence
0:16 move second child to first child ( temp uint)
0:16 'c2' ( temp uint)
0:16 AtomicAdd ( temp uint)
0:16 @count: direct index for structure ( temp int)
0:16 'sbuf_rw_d@count' (layout( row_major std430) buffer block{layout( row_major std430) buffer int @count})
0:16 add ( temp uint)
0:16 AtomicAdd ( temp uint)
0:16 @count: direct index for structure ( temp int)
0:16 'sbuf_rw_d@count' (layout( row_major std430) buffer block{layout( row_major std430) buffer int @count})
0:16 Constant:
0:16 0 (const int)
0:16 Constant:
0:16 0 (const int)
0:16 -1 (const int)
0:16 Constant:
0:16 -1 (const int)
0:18 Branch: Return with expression
@ -155,11 +158,14 @@ gl_FragCoord origin is upper left
0:16 Sequence
0:16 move second child to first child ( temp uint)
0:16 'c2' ( temp uint)
0:16 AtomicAdd ( temp uint)
0:16 @count: direct index for structure ( temp int)
0:16 'sbuf_rw_d@count' (layout( row_major std430) buffer block{layout( row_major std430) buffer int @count})
0:16 add ( temp uint)
0:16 AtomicAdd ( temp uint)
0:16 @count: direct index for structure ( temp int)
0:16 'sbuf_rw_d@count' (layout( row_major std430) buffer block{layout( row_major std430) buffer int @count})
0:16 Constant:
0:16 0 (const int)
0:16 Constant:
0:16 0 (const int)
0:16 -1 (const int)
0:16 Constant:
0:16 -1 (const int)
0:18 Branch: Return with expression
@ -199,12 +205,12 @@ gl_FragCoord origin is upper left
// Module Version 10000
// Generated by (magic number): 80001
// Id's are bound by 69
// Id's are bound by 70
Capability Shader
1: ExtInstImport "GLSL.std.450"
MemoryModel Logical GLSL450
EntryPoint Fragment 4 "main" 62 65
EntryPoint Fragment 4 "main" 63 66
ExecutionMode 4 OriginUpperLeft
Source HLSL 500
Name 4 "main"
@ -222,10 +228,10 @@ gl_FragCoord origin is upper left
Name 36 "sbuf_rw_i@count"
Name 42 "c2"
Name 43 "sbuf_rw_d@count"
Name 60 "pos"
Name 62 "pos"
Name 65 "@entryPointOutput"
Name 66 "param"
Name 61 "pos"
Name 63 "pos"
Name 66 "@entryPointOutput"
Name 67 "param"
Decorate 19 ArrayStride 16
MemberDecorate 20(sbuf_rw_i) 0 Offset 0
Decorate 20(sbuf_rw_i) BufferBlock
@ -236,8 +242,8 @@ gl_FragCoord origin is upper left
Decorate 34(sbuf_rw_i@count) BufferBlock
Decorate 36(sbuf_rw_i@count) DescriptorSet 0
Decorate 43(sbuf_rw_d@count) DescriptorSet 0
Decorate 62(pos) Location 0
Decorate 65(@entryPointOutput) Location 0
Decorate 63(pos) Location 0
Decorate 66(@entryPointOutput) Location 0
2: TypeVoid
3: TypeFunction 2
6: TypeInt 32 0
@ -270,20 +276,20 @@ gl_FragCoord origin is upper left
40: 6(int) Constant 1
43(sbuf_rw_d@count): 35(ptr) Variable Uniform
45: 23(int) Constant 4294967295
61: TypePointer Input 6(int)
62(pos): 61(ptr) Variable Input
64: TypePointer Output 9(fvec4)
65(@entryPointOutput): 64(ptr) Variable Output
62: TypePointer Input 6(int)
63(pos): 62(ptr) Variable Input
65: TypePointer Output 9(fvec4)
66(@entryPointOutput): 65(ptr) Variable Output
4(main): 2 Function None 3
5: Label
60(pos): 7(ptr) Variable Function
66(param): 7(ptr) Variable Function
63: 6(int) Load 62(pos)
Store 60(pos) 63
67: 6(int) Load 60(pos)
Store 66(param) 67
68: 9(fvec4) FunctionCall 12(@main(u1;) 66(param)
Store 65(@entryPointOutput) 68
61(pos): 7(ptr) Variable Function
67(param): 7(ptr) Variable Function
64: 6(int) Load 63(pos)
Store 61(pos) 64
68: 6(int) Load 61(pos)
Store 67(param) 68
69: 9(fvec4) FunctionCall 12(@main(u1;) 67(param)
Store 66(@entryPointOutput) 69
Return
FunctionEnd
12(@main(u1;): 9(fvec4) Function None 10
@ -300,17 +306,18 @@ gl_FragCoord origin is upper left
Store 33(c1) 41
44: 37(ptr) AccessChain 43(sbuf_rw_d@count) 24
46: 6(int) AtomicIAdd 44 40 17 45
Store 42(c2) 46
47: 7(ptr) AccessChain 16(result) 17
48: 6(int) Load 47
49: 8(float) ConvertUToF 48
50: 7(ptr) AccessChain 16(result) 40
51: 6(int) Load 50
52: 8(float) ConvertUToF 51
53: 6(int) Load 33(c1)
54: 8(float) ConvertUToF 53
55: 6(int) Load 42(c2)
56: 8(float) ConvertUToF 55
57: 9(fvec4) CompositeConstruct 49 52 54 56
ReturnValue 57
47: 6(int) IAdd 46 45
Store 42(c2) 47
48: 7(ptr) AccessChain 16(result) 17
49: 6(int) Load 48
50: 8(float) ConvertUToF 49
51: 7(ptr) AccessChain 16(result) 40
52: 6(int) Load 51
53: 8(float) ConvertUToF 52
54: 6(int) Load 33(c1)
55: 8(float) ConvertUToF 54
56: 6(int) Load 42(c2)
57: 8(float) ConvertUToF 56
58: 9(fvec4) CompositeConstruct 50 53 55 57
ReturnValue 58
FunctionEnd

View File

@ -0,0 +1,11 @@
AppendStructuredBuffer<float4> sbuf_a;
ConsumeStructuredBuffer<float4> sbuf_c;
AppendStructuredBuffer<float4> sbuf_unused;
float4 main(uint pos : FOO) : SV_Target0
{
sbuf_a.Append(float4(1,2,3,4));
return sbuf_c.Consume();
}

View File

@ -245,6 +245,7 @@ INSTANTIATE_TEST_CASE_P(
{"hlsl.structarray.flatten.frag", "main"},
{"hlsl.structarray.flatten.geom", "main"},
{"hlsl.structbuffer.frag", "main"},
{"hlsl.structbuffer.append.frag", "main"},
{"hlsl.structbuffer.atomics.frag", "main"},
{"hlsl.structbuffer.byte.frag", "main"},
{"hlsl.structbuffer.coherent.frag", "main"},

View File

@ -2454,7 +2454,7 @@ bool HlslParseContext::hasStructBuffCounter(const TString& name) const
case EbvRWStructuredBuffer: // ...
return true;
default:
return false; // other builtin types do not have.
return false; // the other structuredbfufer types do not have a counter.
}
}
@ -2533,6 +2533,35 @@ void HlslParseContext::decomposeStructBufferMethods(const TSourceLoc& loc, TInte
bufferObj = arguments->getAsSymbolNode();
}
if (bufferObj == nullptr || bufferObj->getAsSymbolNode() == nullptr)
return;
TString bufferName(bufferObj->getAsSymbolNode()->getName());
const auto bivIt = structBufferBuiltIn.find(bufferName);
if (bivIt == structBufferBuiltIn.end())
return;
const TBuiltInVariable builtInType = bivIt->second;
// Some methods require a hidden internal counter, obtained via getStructBufferCounter().
// This lambda adds something to it and returns the old value.
const auto incDecCounter = [&](int incval) -> TIntermTyped* {
TIntermTyped* incrementValue = intermediate.addConstantUnion(incval, loc, true);
TIntermTyped* counter = getStructBufferCounter(loc, bufferObj); // obtain the counter member
if (counter == nullptr)
return nullptr;
TIntermAggregate* counterIncrement = new TIntermAggregate(EOpAtomicAdd);
counterIncrement->setType(TType(EbtUint, EvqTemporary));
counterIncrement->setLoc(loc);
counterIncrement->getSequence().push_back(counter);
counterIncrement->getSequence().push_back(incrementValue);
return counterIncrement;
};
// Index to obtain the runtime sized array out of the buffer.
TIntermTyped* argArray = indexStructBufferContent(loc, bufferObj);
if (argArray == nullptr)
@ -2545,7 +2574,9 @@ void HlslParseContext::decomposeStructBufferMethods(const TSourceLoc& loc, TInte
// Byte address buffers index in bytes (only multiples of 4 permitted... not so much a byte address
// buffer then, but that's what it calls itself.
const bool isByteAddressBuffer = (argArray->getBasicType() == EbtUint);
const bool isByteAddressBuffer = (builtInType == EbvByteAddressBuffer ||
builtInType == EbvRWByteAddressBuffer);
if (isByteAddressBuffer)
argIndex = intermediate.addBinaryNode(EOpRightShift, argIndex, intermediate.addConstantUnion(2, loc, true),
loc, TType(EbtInt));
@ -2746,28 +2777,50 @@ void HlslParseContext::decomposeStructBufferMethods(const TSourceLoc& loc, TInte
}
break;
case EOpMethodIncrementCounter:
{
node = incDecCounter(1);
break;
}
case EOpMethodDecrementCounter:
{
// These methods require a hidden internal counter, obtained via getStructBufferCounter()
TIntermTyped* incrementValue = intermediate.addConstantUnion(op == EOpMethodIncrementCounter ? 1 : -1, loc, true);
TIntermTyped* counter = getStructBufferCounter(loc, bufferObj); // obtain the counter member
node = incrementValue;
if (counter == nullptr)
break;
TIntermAggregate* counterIncrement = new TIntermAggregate(EOpAtomicAdd);
counterIncrement->setType(TType(EbtUint, EvqTemporary));
counterIncrement->setLoc(loc);
counterIncrement->getSequence().push_back(counter);
counterIncrement->getSequence().push_back(incrementValue);
node = counterIncrement;
TIntermTyped* preIncValue = incDecCounter(-1); // result is original value
node = intermediate.addBinaryNode(EOpAdd, preIncValue, intermediate.addConstantUnion(-1, loc, true), loc,
preIncValue->getType());
break;
}
case EOpMethodAppend:
{
TIntermTyped* oldCounter = incDecCounter(1);
TIntermTyped* lValue = intermediate.addIndex(EOpIndexIndirect, argArray, oldCounter, loc);
TIntermTyped* rValue = argAggregate->getSequence()[1]->getAsTyped();
const TType derefType(argArray->getType(), 0);
lValue->setType(derefType);
node = intermediate.addAssign(EOpAssign, lValue, rValue, loc);
node->setType(TType(EbtVoid)); // Append is a void return type
break;
}
case EOpMethodConsume:
{
TIntermTyped* oldCounter = incDecCounter(-1);
TIntermTyped* newCounter = intermediate.addBinaryNode(EOpAdd, oldCounter, intermediate.addConstantUnion(-1, loc, true), loc,
oldCounter->getType());
node = intermediate.addIndex(EOpIndexIndirect, argArray, newCounter, loc);
const TType derefType(argArray->getType(), 0);
node->setType(derefType);
break;
}
break;
default:
break; // most pass through unchanged
@ -5954,7 +6007,9 @@ const TFunction* HlslParseContext::findFunction(const TSourceLoc& loc, TFunction
(candidateList[0]->getBuiltInOp() == EOpMethodAppend ||
candidateList[0]->getBuiltInOp() == EOpMethodRestartStrip ||
candidateList[0]->getBuiltInOp() == EOpMethodIncrementCounter ||
candidateList[0]->getBuiltInOp() == EOpMethodDecrementCounter)) {
candidateList[0]->getBuiltInOp() == EOpMethodDecrementCounter ||
candidateList[0]->getBuiltInOp() == EOpMethodAppend ||
candidateList[0]->getBuiltInOp() == EOpMethodConsume)) {
return candidateList[0];
}

View File

@ -873,6 +873,7 @@ void TBuiltInParseablesHlsl::initialize(int /*version*/, EProfile /*profile*/, c
{ "InterlockedXor", nullptr, nullptr, "-", "-", EShLangAll, true },
{ "IncrementCounter", nullptr, nullptr, "-", "-", EShLangAll, true },
{ "DecrementCounter", nullptr, nullptr, "-", "-", EShLangAll, true },
{ "Consume", nullptr, nullptr, "-", "-", EShLangAll, true },
// Mark end of list, since we want to avoid a range-based for, as some compilers don't handle it yet.
{ nullptr, nullptr, nullptr, nullptr, nullptr, 0, false },
@ -1184,7 +1185,7 @@ void TBuiltInParseablesHlsl::identifyBuiltIns(int /*version*/, EProfile /*profil
symbolTable.relateToOperator(BUILTIN_PREFIX "Store4", EOpMethodStore4);
symbolTable.relateToOperator(BUILTIN_PREFIX "IncrementCounter", EOpMethodIncrementCounter);
symbolTable.relateToOperator(BUILTIN_PREFIX "DecrementCounter", EOpMethodDecrementCounter);
symbolTable.relateToOperator(BUILTIN_PREFIX "Append", EOpMethodAppend);
// Append is also a GS method: we don't add it twice
symbolTable.relateToOperator(BUILTIN_PREFIX "Consume", EOpMethodConsume);
symbolTable.relateToOperator(BUILTIN_PREFIX "InterlockedAdd", EOpInterlockedAdd);