glslang/Test/spv.intcoopmat.comp

#version 450 core
#extension GL_KHR_memory_scope_semantics : enable
#extension GL_NV_cooperative_matrix : enable
#extension GL_NV_integer_cooperative_matrix : enable
#extension GL_EXT_shader_explicit_arithmetic_types : enable
#extension GL_EXT_buffer_reference : enable

layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

const int X = 8;
layout(constant_id = 0) const int Y = 2;
const int Z = X*Y;

icoopmatNV<8, gl_ScopeSubgroup, Z, 8> miC;
icoopmatNV<8, gl_ScopeSubgroup, Z, 8> miC2[3];
ucoopmatNV<8, gl_ScopeSubgroup, Z, 8> muC;
ucoopmatNV<8, gl_ScopeSubgroup, Z, 8> muC2[3];

int iarr[miC.length()];
int iarr2[miC2[1].length()];
int uarr[muC.length()];
int uarr2[muC2[1].length()];

const icoopmatNV<32, gl_ScopeSubgroup, Z, 8> mD = icoopmatNV<32, gl_ScopeSubgroup, Z, 8>(1);
const ucoopmatNV<8, gl_ScopeSubgroup, 8, 8> mD2 = ucoopmatNV<8, gl_ScopeSubgroup, 8, 8>(1);

struct S { int a; int b; int c; };

const S s = S(12, 23, 34);

layout(set = 0, binding = 0, buffer_reference) coherent buffer Block {
    uint y[1024*1024];
    uint x[];
} block;

layout(set = 0, binding = 0) coherent buffer Block16 {
    int8_t y[1024*1024];
    int8_t x[];

    Block b;
} block8;

icoopmatNV<8, gl_ScopeSubgroup, 8, 8> ineg(icoopmatNV<8, gl_ScopeSubgroup, 8, 8> m) { return -m; }
ucoopmatNV<8, gl_ScopeSubgroup, 8, 8> umul(ucoopmatNV<8, gl_ScopeSubgroup, 8, 8> m) { return m * uint8_t(2); }

layout(constant_id = 2) const int SC = 1;
ucoopmatNV<32, gl_ScopeSubgroup, SC, SC> scm[SC][SC];

// sized for icoopmatNV<8, gl_ScopeSubgroup, 16, 16>
shared uvec4 shmatrix[16*16*2/16];

void main()
{
    ucoopmatNV<8, gl_ScopeSubgroup, 16, (2>1?8:4)> mu = ucoopmatNV<8, gl_ScopeSubgroup, 16, (2>1?8:4)>(2);
    icoopmatNV<8, gl_ScopeSubgroup, 16, (2>1?8:4)> mi = icoopmatNV<8, gl_ScopeSubgroup, 16, (2>1?8:4)>(2);

    mu = mu + mu;
    mu = mu - mu;
    mi = -mi;
    mi = mi * int8_t(2);

    fcoopmatNV<16, gl_ScopeSubgroup, 16, 8> mf16_0 = fcoopmatNV<16, gl_ScopeSubgroup, 16, 8>(mu);
    fcoopmatNV<32, gl_ScopeSubgroup, 16, 8> mf32_0 = fcoopmatNV<32, gl_ScopeSubgroup, 16, 8>(mu);
    fcoopmatNV<16, gl_ScopeSubgroup, 16, 8> mf16_1 = fcoopmatNV<16, gl_ScopeSubgroup, 16, 8>(mi);
    fcoopmatNV<32, gl_ScopeSubgroup, 16, 8> mf32_1 = fcoopmatNV<32, gl_ScopeSubgroup, 16, 8>(mi);

    uint8_t x = mu[1];
    mi[0] = int8_t(x);

    coopMatLoadNV(mi, block.x, 16, 128, false);
    coopMatStoreNV(mi, block.x, 16, 128, false);
    coopMatLoadNV(mu, block8.x, 16, 128, false);
    coopMatStoreNV(mu, block8.x, 16, 128, false);
    coopMatLoadNV(mi, block8.b.x, 16, 128, false);
    coopMatStoreNV(mi, block8.b.x, 16, 128, false);

    ucoopmatNV<8, gl_ScopeSubgroup, 16, 8> A;
    ucoopmatNV<8, gl_ScopeSubgroup, 8, 8> B;
    ucoopmatNV<8, gl_ScopeSubgroup, 16, 8> C;
    ucoopmatNV<8, gl_ScopeSubgroup, 16, 8> D;
    D = coopMatMulAddNV(A, B, C);

    int l = D.length();


    icoopmatNV<8, gl_ScopeSubgroup, 16, (2>1?8:4)> a[5];
    a[3][0] = int8_t(1);

    int md1 = mD[1];

    md1 += (mi += mi)[1234];

    muC2[0] = muC2[1];
    muC2[1][0] = (miC2[2][0]);

    coopMatLoadNV(mi, block.y, 16, 128, false);
    coopMatStoreNV(mi, block.y, 16, 128, false);
    coopMatLoadNV(mu, block8.y, 16, 128, false);
    coopMatStoreNV(mu, block8.y, 16, 128, false);

    icoopmatNV<8, gl_ScopeSubgroup, 8, 8> p1;
    ucoopmatNV<8, gl_ScopeSubgroup, 8, 8> p2;

    p1 = ineg(p1);
    p2 = umul(p2);

    p1 /= p1;
    p2 /= p2;

    p1 *= int8_t(2);
    p2 *= uint8_t(4);

    icoopmatNV<8, gl_ScopeSubgroup, 16, 8> ms;
    coopMatLoadNV(ms, shmatrix, 1, 2, false);
    coopMatStoreNV(ms, shmatrix, 1, 2, false);

}
GL_NV_integer_cooperative_matrix support 2019-08-22 20:28:00 -05:00			`#version 450 core`
			`#extension GL_KHR_memory_scope_semantics : enable`
			`#extension GL_NV_cooperative_matrix : enable`
			`#extension GL_NV_integer_cooperative_matrix : enable`
			`#extension GL_EXT_shader_explicit_arithmetic_types : enable`
			`#extension GL_EXT_buffer_reference : enable`

			`layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in;`

			`const int X = 8;`
			`layout(constant_id = 0) const int Y = 2;`
			`const int Z = X*Y;`

			`icoopmatNV<8, gl_ScopeSubgroup, Z, 8> miC;`
			`icoopmatNV<8, gl_ScopeSubgroup, Z, 8> miC2[3];`
			`ucoopmatNV<8, gl_ScopeSubgroup, Z, 8> muC;`
			`ucoopmatNV<8, gl_ScopeSubgroup, Z, 8> muC2[3];`

			`int iarr[miC.length()];`
			`int iarr2[miC2[1].length()];`
			`int uarr[muC.length()];`
			`int uarr2[muC2[1].length()];`

			`const icoopmatNV<32, gl_ScopeSubgroup, Z, 8> mD = icoopmatNV<32, gl_ScopeSubgroup, Z, 8>(1);`
			`const ucoopmatNV<8, gl_ScopeSubgroup, 8, 8> mD2 = ucoopmatNV<8, gl_ScopeSubgroup, 8, 8>(1);`

			`struct S { int a; int b; int c; };`

			`const S s = S(12, 23, 34);`

			`layout(set = 0, binding = 0, buffer_reference) coherent buffer Block {`
			`uint y[1024*1024];`
			`uint x[];`
			`} block;`

			`layout(set = 0, binding = 0) coherent buffer Block16 {`
			`int8_t y[1024*1024];`
			`int8_t x[];`

			`Block b;`
			`} block8;`

			`icoopmatNV<8, gl_ScopeSubgroup, 8, 8> ineg(icoopmatNV<8, gl_ScopeSubgroup, 8, 8> m) { return -m; }`
			`ucoopmatNV<8, gl_ScopeSubgroup, 8, 8> umul(ucoopmatNV<8, gl_ScopeSubgroup, 8, 8> m) { return m * uint8_t(2); }`

			`layout(constant_id = 2) const int SC = 1;`
			`ucoopmatNV<32, gl_ScopeSubgroup, SC, SC> scm[SC][SC];`

			`// sized for icoopmatNV<8, gl_ScopeSubgroup, 16, 16>`
			`shared uvec4 shmatrix[16162/16];`

			`void main()`
			`{`
			`ucoopmatNV<8, gl_ScopeSubgroup, 16, (2>1?8:4)> mu = ucoopmatNV<8, gl_ScopeSubgroup, 16, (2>1?8:4)>(2);`
			`icoopmatNV<8, gl_ScopeSubgroup, 16, (2>1?8:4)> mi = icoopmatNV<8, gl_ScopeSubgroup, 16, (2>1?8:4)>(2);`

			`mu = mu + mu;`
			`mu = mu - mu;`
			`mi = -mi;`
			`mi = mi * int8_t(2);`

			`fcoopmatNV<16, gl_ScopeSubgroup, 16, 8> mf16_0 = fcoopmatNV<16, gl_ScopeSubgroup, 16, 8>(mu);`
			`fcoopmatNV<32, gl_ScopeSubgroup, 16, 8> mf32_0 = fcoopmatNV<32, gl_ScopeSubgroup, 16, 8>(mu);`
			`fcoopmatNV<16, gl_ScopeSubgroup, 16, 8> mf16_1 = fcoopmatNV<16, gl_ScopeSubgroup, 16, 8>(mi);`
			`fcoopmatNV<32, gl_ScopeSubgroup, 16, 8> mf32_1 = fcoopmatNV<32, gl_ScopeSubgroup, 16, 8>(mi);`

			`uint8_t x = mu[1];`
			`mi[0] = int8_t(x);`

			`coopMatLoadNV(mi, block.x, 16, 128, false);`
			`coopMatStoreNV(mi, block.x, 16, 128, false);`
			`coopMatLoadNV(mu, block8.x, 16, 128, false);`
			`coopMatStoreNV(mu, block8.x, 16, 128, false);`
			`coopMatLoadNV(mi, block8.b.x, 16, 128, false);`
			`coopMatStoreNV(mi, block8.b.x, 16, 128, false);`

			`ucoopmatNV<8, gl_ScopeSubgroup, 16, 8> A;`
			`ucoopmatNV<8, gl_ScopeSubgroup, 8, 8> B;`
			`ucoopmatNV<8, gl_ScopeSubgroup, 16, 8> C;`
			`ucoopmatNV<8, gl_ScopeSubgroup, 16, 8> D;`
			`D = coopMatMulAddNV(A, B, C);`

			`int l = D.length();`


			`icoopmatNV<8, gl_ScopeSubgroup, 16, (2>1?8:4)> a[5];`
			`a[3][0] = int8_t(1);`

			`int md1 = mD[1];`

			`md1 += (mi += mi)[1234];`

			`muC2[0] = muC2[1];`
			`muC2[1][0] = (miC2[2][0]);`

			`coopMatLoadNV(mi, block.y, 16, 128, false);`
			`coopMatStoreNV(mi, block.y, 16, 128, false);`
			`coopMatLoadNV(mu, block8.y, 16, 128, false);`
			`coopMatStoreNV(mu, block8.y, 16, 128, false);`

			`icoopmatNV<8, gl_ScopeSubgroup, 8, 8> p1;`
			`ucoopmatNV<8, gl_ScopeSubgroup, 8, 8> p2;`

			`p1 = ineg(p1);`
			`p2 = umul(p2);`

			`p1 /= p1;`
			`p2 /= p2;`

			`p1 *= int8_t(2);`
			`p2 *= uint8_t(4);`

			`icoopmatNV<8, gl_ScopeSubgroup, 16, 8> ms;`
			`coopMatLoadNV(ms, shmatrix, 1, 2, false);`
			`coopMatStoreNV(ms, shmatrix, 1, 2, false);`

			`}`