Implement extension GL_NV_shader_atomic_int64

This commit is contained in:
Rex Xu 2017-09-26 15:42:56 +08:00
parent f21c173a05
commit e8fe8b0de9
8 changed files with 347 additions and 3 deletions

View File

@ -4749,12 +4749,12 @@ spv::Id TGlslangToSpvTraverser::createAtomicOperation(glslang::TOperator op, spv
case glslang::EOpAtomicMin:
case glslang::EOpImageAtomicMin:
case glslang::EOpAtomicCounterMin:
opCode = typeProxy == glslang::EbtUint ? spv::OpAtomicUMin : spv::OpAtomicSMin;
opCode = (typeProxy == glslang::EbtUint || typeProxy == glslang::EbtUint64) ? spv::OpAtomicUMin : spv::OpAtomicSMin;
break;
case glslang::EOpAtomicMax:
case glslang::EOpImageAtomicMax:
case glslang::EOpAtomicCounterMax:
opCode = typeProxy == glslang::EbtUint ? spv::OpAtomicUMax : spv::OpAtomicSMax;
opCode = (typeProxy == glslang::EbtUint || typeProxy == glslang::EbtUint64) ? spv::OpAtomicUMax : spv::OpAtomicSMax;
break;
case glslang::EOpAtomicAnd:
case glslang::EOpImageAtomicAnd:
@ -4795,6 +4795,9 @@ spv::Id TGlslangToSpvTraverser::createAtomicOperation(glslang::TOperator op, spv
break;
}
if (typeProxy == glslang::EbtInt64 || typeProxy == glslang::EbtUint64)
builder.addCapability(spv::CapabilityInt64Atomics);
// Sort out the operands
// - mapping from glslang -> SPV
// - there are extra SPV operands with no glslang source

View File

@ -0,0 +1,215 @@
spv.atomicInt64.comp
// Module Version 10000
// Generated by (magic number): 80001
// Id's are bound by 149
Capability Shader
Capability Int64
Capability Int64Atomics
1: ExtInstImport "GLSL.std.450"
MemoryModel Logical GLSL450
EntryPoint GLCompute 4 "main"
ExecutionMode 4 LocalSize 16 16 1
Source GLSL 450
SourceExtension "GL_ARB_gpu_shader_int64"
SourceExtension "GL_NV_shader_atomic_int64"
Name 4 "main"
Name 8 "i64"
Name 12 "u64"
Name 14 "Buffer"
MemberName 14(Buffer) 0 "i64"
MemberName 14(Buffer) 1 "u64"
Name 16 "buf"
Name 84 "Struct"
MemberName 84(Struct) 0 "i64"
MemberName 84(Struct) 1 "u64"
Name 86 "s"
MemberDecorate 14(Buffer) 0 Offset 0
MemberDecorate 14(Buffer) 1 Offset 8
Decorate 14(Buffer) BufferBlock
Decorate 16(buf) DescriptorSet 0
Decorate 16(buf) Binding 0
Decorate 148 BuiltIn WorkgroupSize
2: TypeVoid
3: TypeFunction 2
6: TypeInt 64 1
7: TypePointer Function 6(int)
9: 6(int) Constant 0 0
10: TypeInt 64 0
11: TypePointer Function 10(int)
13: 10(int) Constant 0 0
14(Buffer): TypeStruct 6(int) 10(int)
15: TypePointer Uniform 14(Buffer)
16(buf): 15(ptr) Variable Uniform
17: TypeInt 32 1
18: 17(int) Constant 0
19: TypePointer Uniform 6(int)
21: 6(int) Constant 4294967272 4294967295
22: TypeInt 32 0
23: 22(int) Constant 1
24: 22(int) Constant 0
28: 17(int) Constant 1
29: TypePointer Uniform 10(int)
31: 10(int) Constant 15 0
84(Struct): TypeStruct 6(int) 10(int)
85: TypePointer Workgroup 84(Struct)
86(s): 85(ptr) Variable Workgroup
87: TypePointer Workgroup 6(int)
92: TypePointer Workgroup 10(int)
146: TypeVector 22(int) 3
147: 22(int) Constant 16
148: 146(ivec3) ConstantComposite 147 147 23
4(main): 2 Function None 3
5: Label
8(i64): 7(ptr) Variable Function
12(u64): 11(ptr) Variable Function
Store 8(i64) 9
Store 12(u64) 13
20: 19(ptr) AccessChain 16(buf) 18
25: 6(int) AtomicSMin 20 23 24 21
26: 6(int) Load 8(i64)
27: 6(int) IAdd 26 25
Store 8(i64) 27
30: 29(ptr) AccessChain 16(buf) 28
32: 10(int) AtomicUMin 30 23 24 31
33: 10(int) Load 12(u64)
34: 10(int) IAdd 33 32
Store 12(u64) 34
35: 19(ptr) AccessChain 16(buf) 18
36: 6(int) AtomicSMax 35 23 24 21
37: 6(int) Load 8(i64)
38: 6(int) IAdd 37 36
Store 8(i64) 38
39: 29(ptr) AccessChain 16(buf) 28
40: 10(int) AtomicUMax 39 23 24 31
41: 10(int) Load 12(u64)
42: 10(int) IAdd 41 40
Store 12(u64) 42
43: 19(ptr) AccessChain 16(buf) 18
44: 6(int) AtomicAnd 43 23 24 21
45: 6(int) Load 8(i64)
46: 6(int) IAdd 45 44
Store 8(i64) 46
47: 29(ptr) AccessChain 16(buf) 28
48: 10(int) AtomicAnd 47 23 24 31
49: 10(int) Load 12(u64)
50: 10(int) IAdd 49 48
Store 12(u64) 50
51: 19(ptr) AccessChain 16(buf) 18
52: 6(int) AtomicOr 51 23 24 21
53: 6(int) Load 8(i64)
54: 6(int) IAdd 53 52
Store 8(i64) 54
55: 29(ptr) AccessChain 16(buf) 28
56: 10(int) AtomicOr 55 23 24 31
57: 10(int) Load 12(u64)
58: 10(int) IAdd 57 56
Store 12(u64) 58
59: 19(ptr) AccessChain 16(buf) 18
60: 6(int) AtomicXor 59 23 24 21
61: 6(int) Load 8(i64)
62: 6(int) IAdd 61 60
Store 8(i64) 62
63: 29(ptr) AccessChain 16(buf) 28
64: 10(int) AtomicXor 63 23 24 31
65: 10(int) Load 12(u64)
66: 10(int) IAdd 65 64
Store 12(u64) 66
67: 19(ptr) AccessChain 16(buf) 18
68: 6(int) AtomicIAdd 67 23 24 21
69: 6(int) Load 8(i64)
70: 6(int) IAdd 69 68
Store 8(i64) 70
71: 19(ptr) AccessChain 16(buf) 18
72: 6(int) AtomicExchange 71 23 24 21
73: 6(int) Load 8(i64)
74: 6(int) IAdd 73 72
Store 8(i64) 74
75: 19(ptr) AccessChain 16(buf) 18
76: 6(int) Load 8(i64)
77: 6(int) AtomicCompareExchange 75 23 24 24 76 21
78: 6(int) Load 8(i64)
79: 6(int) IAdd 78 77
Store 8(i64) 79
80: 6(int) Load 8(i64)
81: 19(ptr) AccessChain 16(buf) 18
Store 81 80
82: 10(int) Load 12(u64)
83: 29(ptr) AccessChain 16(buf) 28
Store 83 82
Store 8(i64) 9
Store 12(u64) 13
88: 87(ptr) AccessChain 86(s) 18
89: 6(int) AtomicSMin 88 23 24 21
90: 6(int) Load 8(i64)
91: 6(int) IAdd 90 89
Store 8(i64) 91
93: 92(ptr) AccessChain 86(s) 28
94: 10(int) AtomicUMin 93 23 24 31
95: 10(int) Load 12(u64)
96: 10(int) IAdd 95 94
Store 12(u64) 96
97: 87(ptr) AccessChain 86(s) 18
98: 6(int) AtomicSMax 97 23 24 21
99: 6(int) Load 8(i64)
100: 6(int) IAdd 99 98
Store 8(i64) 100
101: 92(ptr) AccessChain 86(s) 28
102: 10(int) AtomicUMax 101 23 24 31
103: 10(int) Load 12(u64)
104: 10(int) IAdd 103 102
Store 12(u64) 104
105: 87(ptr) AccessChain 86(s) 18
106: 6(int) AtomicAnd 105 23 24 21
107: 6(int) Load 8(i64)
108: 6(int) IAdd 107 106
Store 8(i64) 108
109: 92(ptr) AccessChain 86(s) 28
110: 10(int) AtomicAnd 109 23 24 31
111: 10(int) Load 12(u64)
112: 10(int) IAdd 111 110
Store 12(u64) 112
113: 87(ptr) AccessChain 86(s) 18
114: 6(int) AtomicOr 113 23 24 21
115: 6(int) Load 8(i64)
116: 6(int) IAdd 115 114
Store 8(i64) 116
117: 92(ptr) AccessChain 86(s) 28
118: 10(int) AtomicOr 117 23 24 31
119: 10(int) Load 12(u64)
120: 10(int) IAdd 119 118
Store 12(u64) 120
121: 87(ptr) AccessChain 86(s) 18
122: 6(int) AtomicXor 121 23 24 21
123: 6(int) Load 8(i64)
124: 6(int) IAdd 123 122
Store 8(i64) 124
125: 92(ptr) AccessChain 86(s) 28
126: 10(int) AtomicXor 125 23 24 31
127: 10(int) Load 12(u64)
128: 10(int) IAdd 127 126
Store 12(u64) 128
129: 87(ptr) AccessChain 86(s) 18
130: 6(int) AtomicIAdd 129 23 24 21
131: 6(int) Load 8(i64)
132: 6(int) IAdd 131 130
Store 8(i64) 132
133: 87(ptr) AccessChain 86(s) 18
134: 6(int) AtomicExchange 133 23 24 21
135: 6(int) Load 8(i64)
136: 6(int) IAdd 135 134
Store 8(i64) 136
137: 87(ptr) AccessChain 86(s) 18
138: 6(int) Load 8(i64)
139: 6(int) AtomicCompareExchange 137 23 24 24 138 21
140: 6(int) Load 8(i64)
141: 6(int) IAdd 140 139
Store 8(i64) 141
142: 6(int) Load 8(i64)
143: 87(ptr) AccessChain 86(s) 18
Store 143 142
144: 10(int) Load 12(u64)
145: 92(ptr) AccessChain 86(s) 28
Store 145 144
Return
FunctionEnd

79
Test/spv.atomicInt64.comp Normal file
View File

@ -0,0 +1,79 @@
#version 450 core
#extension GL_ARB_gpu_shader_int64: enable
#extension GL_NV_shader_atomic_int64: enable
layout(local_size_x = 16, local_size_y = 16) in;
layout(binding = 0) buffer Buffer
{
int64_t i64;
uint64_t u64;
} buf;
struct Struct
{
int64_t i64;
uint64_t u64;
};
shared Struct s;
void main()
{
const int64_t i64c = -24;
const uint64_t u64c = 0xF00000000F;
// Test shader storage block
int64_t i64 = 0;
uint64_t u64 = 0;
i64 += atomicMin(buf.i64, i64c);
u64 += atomicMin(buf.u64, u64c);
i64 += atomicMax(buf.i64, i64c);
u64 += atomicMax(buf.u64, u64c);
i64 += atomicAnd(buf.i64, i64c);
u64 += atomicAnd(buf.u64, u64c);
i64 += atomicOr(buf.i64, i64c);
u64 += atomicOr(buf.u64, u64c);
i64 += atomicXor(buf.i64, i64c);
u64 += atomicXor(buf.u64, u64c);
i64 += atomicAdd(buf.i64, i64c);
i64 += atomicExchange(buf.i64, i64c);
i64 += atomicCompSwap(buf.i64, i64c, i64);
buf.i64 = i64;
buf.u64 = u64;
// Test shared variable
i64 = 0;
u64 = 0;
i64 += atomicMin(s.i64, i64c);
u64 += atomicMin(s.u64, u64c);
i64 += atomicMax(s.i64, i64c);
u64 += atomicMax(s.u64, u64c);
i64 += atomicAnd(s.i64, i64c);
u64 += atomicAnd(s.u64, u64c);
i64 += atomicOr(s.i64, i64c);
u64 += atomicOr(s.u64, u64c);
i64 += atomicXor(s.i64, i64c);
u64 += atomicXor(s.u64, u64c);
i64 += atomicAdd(s.i64, i64c);
i64 += atomicExchange(s.i64, i64c);
i64 += atomicCompSwap(s.i64, i64c, i64);
s.i64 = i64;
s.u64 = u64;
}

View File

@ -923,6 +923,32 @@ void TBuiltIns::initialize(int version, EProfile profile, const SpvVersion& spvV
"\n");
}
#ifdef NV_EXTENSIONS
if (profile != EEsProfile && version >= 440) {
commonBuiltins.append(
"uint64_t atomicMin(coherent volatile inout uint64_t, uint64_t);"
" int64_t atomicMin(coherent volatile inout int64_t, int64_t);"
"uint64_t atomicMax(coherent volatile inout uint64_t, uint64_t);"
" int64_t atomicMax(coherent volatile inout int64_t, int64_t);"
"uint64_t atomicAnd(coherent volatile inout uint64_t, uint64_t);"
" int64_t atomicAnd(coherent volatile inout int64_t, int64_t);"
"uint64_t atomicOr (coherent volatile inout uint64_t, uint64_t);"
" int64_t atomicOr (coherent volatile inout int64_t, int64_t);"
"uint64_t atomicXor(coherent volatile inout uint64_t, uint64_t);"
" int64_t atomicXor(coherent volatile inout int64_t, int64_t);"
" int64_t atomicAdd(coherent volatile inout int64_t, int64_t);"
" int64_t atomicExchange(coherent volatile inout int64_t, int64_t);"
" int64_t atomicCompSwap(coherent volatile inout int64_t, int64_t, int64_t);"
"\n");
}
#endif
if ((profile == EEsProfile && version >= 310) ||
(profile != EEsProfile && version >= 450)) {
commonBuiltins.append(

View File

@ -1551,6 +1551,23 @@ void TParseContext::builtInOpCheck(const TSourceLoc& loc, const TFunction& fnCan
break;
}
#ifdef NV_EXTENSIONS
case EOpAtomicAdd:
case EOpAtomicMin:
case EOpAtomicMax:
case EOpAtomicAnd:
case EOpAtomicOr:
case EOpAtomicXor:
case EOpAtomicExchange:
case EOpAtomicCompSwap:
{
if (arg0->getType().getBasicType() == EbtInt64 || arg0->getType().getBasicType() == EbtUint64)
requireExtensions(loc, 1, &E_GL_NV_shader_atomic_int64, fnCandidate.getName().c_str());
break;
}
#endif
case EOpInterpolateAtCentroid:
case EOpInterpolateAtSample:
case EOpInterpolateAtOffset:

View File

@ -211,6 +211,7 @@ void TParseVersions::initializeExtensionBehavior()
extensionBehavior[E_GL_NV_viewport_array2] = EBhDisable;
extensionBehavior[E_GL_NV_stereo_view_rendering] = EBhDisable;
extensionBehavior[E_GL_NVX_multiview_per_view_attributes] = EBhDisable;
extensionBehavior[E_GL_NV_shader_atomic_int64] = EBhDisable;
#endif
// AEP
@ -343,6 +344,7 @@ void TParseVersions::getPreamble(std::string& preamble)
"#define GL_NV_sample_mask_override_coverage 1\n"
"#define GL_NV_geometry_shader_passthrough 1\n"
"#define GL_NV_viewport_array2 1\n"
"#define GL_NV_shader_atomic_int64 1\n"
#endif
;

View File

@ -182,6 +182,7 @@ const char* const E_SPV_NV_geometry_shader_passthrough = "GL_NV_geometr
const char* const E_GL_NV_viewport_array2 = "GL_NV_viewport_array2";
const char* const E_GL_NV_stereo_view_rendering = "GL_NV_stereo_view_rendering";
const char* const E_GL_NVX_multiview_per_view_attributes = "GL_NVX_multiview_per_view_attributes";
const char* const E_GL_NV_shader_atomic_int64 = "GL_NV_shader_atomic_int64";
// Arrays of extensions for the above viewportEXTs duplications

View File

@ -410,7 +410,7 @@ INSTANTIATE_TEST_CASE_P(
"spv.int16.frag",
"spv.shaderBallotAMD.comp",
"spv.shaderFragMaskAMD.frag",
"spv.textureGatherBiasLod.frag"
"spv.textureGatherBiasLod.frag",
})),
FileNameAsCustomTestSuffix
);
@ -428,6 +428,7 @@ INSTANTIATE_TEST_CASE_P(
"spv.stereoViewRendering.tesc",
"spv.multiviewPerViewAttributes.vert",
"spv.multiviewPerViewAttributes.tesc",
"spv.atomicInt64.comp",
})),
FileNameAsCustomTestSuffix
);