Squashed 'deps/SPIRV-Cross/' changes from 9c57364f18..8aa6731925

8aa6731925 Merge pull request #1065 from KhronosGroup/msvc-2013-workaround 909040e2eb MSVC 2013: Work around another compiler bug with array init. 53ab2144b9 Merge pull request #1064 from KhronosGroup/fix-1062 e0cd8595a4 Merge pull request #1063 from KhronosGroup/fix-1061 50342966c0 Fall back to complex loop if non-trivial continue block is found. fa9af7223a Add test shaders for NonUniformEXT propagation. d12b54bbb4 Propagate NonUniformEXT to dependent expressions. 13378ad1ac Add simple test for extended debug operations. 6d9c502a3a Merge branch 'master' of git://github.com/lifpan/SPIRV-Cross 5ca8779044 Parse SPIR-V debug information extended instructions, as well as OpNoLine. c5904dd245 Merge pull request #1059 from KhronosGroup/fix-1056 4056d0b74e Don't use scalar dot(). 041f103d44 MSL/HLSL: Support scalar reflect and refract. 9a6e2534e9 Merge pull request #1058 from KhronosGroup/fix-1054 fc9fe4e480 Fix variable scope when an if or else block dominates a variable. 3af18e741f Merge pull request #1055 from cdavis5e/msl21-frag-subgroup-builtins 31b6c93516 MSL: Support SubgroupLocalInvocationId and SubgroupSize in all stages. 41399fc899 Merge pull request #1051 from KhronosGroup/fix-1049 f8b084de61 MSL/HLSL: Support OpOuterProduct. 04e29895a3 Merge pull request #1001 from cdavis5e/msl-multiview 7eecf5a46b MSL: Support SPV_KHR_multiview. 8ee8e60f70 Merge pull request #1048 from KhronosGroup/fix-1047 ff87419607 Deal with scalar input values for distance/length/normalize. d1bdb6d491 Merge pull request #1046 from KhronosGroup/texture-fp16-coord 964ec44822 Merge pull request #1045 from KhronosGroup/c-api-get-declared-struct-member-size 1543bdaf7b Run format_all.sh. 581ed0fd59 HLSL: Does not support case-fallthrough. c76b99b711 Handle more cases with FP16 and texture sampling. 656d129c00 Add C API for get_declared_struct_member_size. 45805857e5 MSL: De-virtualize get_declared_struct_member_size. 02b2a1015d MSL: Fix minor XCode /analyze warning. 8f6939cb0d Merge pull request #1041 from KhronosGroup/fix-1011 4bbf343a7f Merge pull request #1043 from KhronosGroup/fix-1042 bcef66fbf3 Fix declaration of loop variables with a Phi helper copy. 845628cd4e Merge pull request #1040 from KhronosGroup/fix-1037 ab3798fd91 MSL: Add support for SubgroupSize / SubgroupInvocationID in fragment. 048f2380f3 MSL: Support custom bindings for argument buffer itself. git-subtree-dir: deps/SPIRV-Cross git-subtree-split: 8aa67319253b55f874d1fc7eb85d201299a8f488
2024-11-26 17:50:56 +00:00 · 2019-07-09 18:39:57 +02:00 · 2019-07-09 18:39:57 +02:00 · 695837ef77
commit 695837ef77
parent 7016d8b51f
97 changed files with 3467 additions and 61 deletions
--- a/deps/SPIRV-Cross/CMakeLists.txt
+++ b/deps/SPIRV-Cross/CMakeLists.txt
@ -287,7 +287,7 @@ if (SPIRV_CROSS_STATIC)
 endif()

 set(spirv-cross-abi-major 0)
-set(spirv-cross-abi-minor 14)
+set(spirv-cross-abi-minor 16)
 set(spirv-cross-abi-patch 0)

 if (SPIRV_CROSS_SHARED)
@ -457,6 +457,10 @@ if (SPIRV_CROSS_CLI)
 				target_link_libraries(spirv-cross-msl-constexpr-test spirv-cross-c)
 				set_target_properties(spirv-cross-msl-constexpr-test PROPERTIES LINK_FLAGS "${spirv-cross-link-flags}")

+				add_executable(spirv-cross-msl-resource-binding-test tests-other/msl_resource_bindings.cpp)
+				target_link_libraries(spirv-cross-msl-resource-binding-test spirv-cross-c)
+				set_target_properties(spirv-cross-msl-resource-binding-test PROPERTIES LINK_FLAGS "${spirv-cross-link-flags}")
+
 				if (CMAKE_COMPILER_IS_GNUCXX OR (${CMAKE_CXX_COMPILER_ID} MATCHES "Clang"))
 					target_compile_options(spirv-cross-c-api-test PRIVATE -std=c89 -Wall -Wextra)
 				endif()
@ -469,6 +473,8 @@ if (SPIRV_CROSS_CLI)
 						COMMAND $<TARGET_FILE:spirv-cross-small-vector-test>)
 				add_test(NAME spirv-cross-msl-constexpr-test
 						COMMAND $<TARGET_FILE:spirv-cross-msl-constexpr-test> ${CMAKE_CURRENT_SOURCE_DIR}/tests-other/msl_constexpr_test.spv)
+				add_test(NAME spirv-cross-msl-resource-binding-test
+						COMMAND $<TARGET_FILE:spirv-cross-msl-resource-binding-test> ${CMAKE_CURRENT_SOURCE_DIR}/tests-other/msl_resource_binding.spv)
 				add_test(NAME spirv-cross-test
 						COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/test_shaders.py --parallel
 						${spirv-cross-externals}
--- a/deps/SPIRV-Cross/main.cpp
+++ b/deps/SPIRV-Cross/main.cpp
@ -514,6 +514,7 @@ struct CLIArguments
 	bool msl_domain_lower_left = false;
 	bool msl_argument_buffers = false;
 	bool msl_texture_buffer_native = false;
+	bool msl_multiview = false;
 	bool glsl_emit_push_constant_as_ubo = false;
 	bool glsl_emit_ubo_as_plain_uniforms = false;
 	bool emit_line_directives = false;
@ -592,6 +593,7 @@ static void print_help()
 	                "\t[--msl-argument-buffers]\n"
 	                "\t[--msl-texture-buffer-native]\n"
 	                "\t[--msl-discrete-descriptor-set <index>]\n"
+	                "\t[--msl-multiview]\n"
 	                "\t[--hlsl]\n"
 	                "\t[--reflect]\n"
 	                "\t[--shader-model]\n"
@ -750,6 +752,7 @@ static string compile_iteration(const CLIArguments &args, std::vector<uint32_t>
 		msl_opts.tess_domain_origin_lower_left = args.msl_domain_lower_left;
 		msl_opts.argument_buffers = args.msl_argument_buffers;
 		msl_opts.texture_buffer_native = args.msl_texture_buffer_native;
+		msl_opts.multiview = args.msl_multiview;
 		msl_comp->set_msl_options(msl_opts);
 		for (auto &v : args.msl_discrete_descriptor_sets)
 			msl_comp->add_discrete_descriptor_set(v);
@ -1069,6 +1072,7 @@ static int main_inner(int argc, char *argv[])
 	cbs.add("--msl-discrete-descriptor-set",
 	        [&args](CLIParser &parser) { args.msl_discrete_descriptor_sets.push_back(parser.next_uint()); });
 	cbs.add("--msl-texture-buffer-native", [&args](CLIParser &) { args.msl_texture_buffer_native = true; });
+	cbs.add("--msl-multiview", [&args](CLIParser &) { args.msl_multiview = true; });
 	cbs.add("--extension", [&args](CLIParser &parser) { args.extensions.push_back(parser.next_string()); });
 	cbs.add("--rename-entry-point", [&args](CLIParser &parser) {
 		auto old_name = parser.next_string();
--- a/deps/SPIRV-Cross/reference/opt/shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.comp
+++ b/deps/SPIRV-Cross/reference/opt/shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.comp
@ -66,7 +66,7 @@ inline bool spvSubgroupAllEqual(bool value)

 kernel void main0(device SSBO& _9 [[buffer(0)]], uint gl_NumSubgroups [[simdgroups_per_threadgroup]], uint gl_SubgroupID [[simdgroup_index_in_threadgroup]], uint gl_SubgroupSize [[thread_execution_width]], uint gl_SubgroupInvocationID [[thread_index_in_simdgroup]])
 {
-    uint4 gl_SubgroupEqMask = 27 > 32 ? uint4(0, (1 << (gl_SubgroupInvocationID - 32)), uint2(0)) : uint4(1 << gl_SubgroupInvocationID, uint3(0));
+    uint4 gl_SubgroupEqMask = gl_SubgroupInvocationID > 32 ? uint4(0, (1 << (gl_SubgroupInvocationID - 32)), uint2(0)) : uint4(1 << gl_SubgroupInvocationID, uint3(0));
    uint4 gl_SubgroupGeMask = uint4(extract_bits(0xFFFFFFFF, min(gl_SubgroupInvocationID, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID, 0)), extract_bits(0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID, 32u), 0)), uint2(0));
    uint4 gl_SubgroupGtMask = uint4(extract_bits(0xFFFFFFFF, min(gl_SubgroupInvocationID + 1, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID - 1, 0)), extract_bits(0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID + 1, 32u), 0)), uint2(0));
    uint4 gl_SubgroupLeMask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0));
--- a/deps/SPIRV-Cross/reference/shaders-hlsl-no-opt/asm/frag/switch-block-case-fallthrough.asm.frag
+++ b/deps/SPIRV-Cross/reference/shaders-hlsl-no-opt/asm/frag/switch-block-case-fallthrough.asm.frag
@ -30,16 +30,23 @@ void frag_main()
        case 0:
        {
            _30 = 3;
+            j = _30;
+            _31 = 0;
+            j = _31;
+            break;
        }
        default:
        {
            j = _30;
            _31 = 0;
+            j = _31;
+            break;
        }
        case 1:
        case 11:
        {
            j = _31;
+            break;
        }
        case 2:
        {
@ -59,6 +66,8 @@ void frag_main()
        }
        case 4:
        {
+            i = 0;
+            break;
        }
        case 5:
        {
--- a/deps/SPIRV-Cross/reference/shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.comp
+++ b/deps/SPIRV-Cross/reference/shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.comp
@ -66,7 +66,7 @@ inline bool spvSubgroupAllEqual(bool value)

 kernel void main0(device SSBO& _9 [[buffer(0)]], uint gl_NumSubgroups [[simdgroups_per_threadgroup]], uint gl_SubgroupID [[simdgroup_index_in_threadgroup]], uint gl_SubgroupSize [[thread_execution_width]], uint gl_SubgroupInvocationID [[thread_index_in_simdgroup]])
 {
-    uint4 gl_SubgroupEqMask = 27 > 32 ? uint4(0, (1 << (gl_SubgroupInvocationID - 32)), uint2(0)) : uint4(1 << gl_SubgroupInvocationID, uint3(0));
+    uint4 gl_SubgroupEqMask = gl_SubgroupInvocationID > 32 ? uint4(0, (1 << (gl_SubgroupInvocationID - 32)), uint2(0)) : uint4(1 << gl_SubgroupInvocationID, uint3(0));
    uint4 gl_SubgroupGeMask = uint4(extract_bits(0xFFFFFFFF, min(gl_SubgroupInvocationID, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID, 0)), extract_bits(0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID, 32u), 0)), uint2(0));
    uint4 gl_SubgroupGtMask = uint4(extract_bits(0xFFFFFFFF, min(gl_SubgroupInvocationID + 1, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID - 1, 0)), extract_bits(0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID + 1, 32u), 0)), uint2(0));
    uint4 gl_SubgroupLeMask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0));
--- a/deps/SPIRV-Cross/spirv_cfg.cpp
+++ b/deps/SPIRV-Cross/spirv_cfg.cpp
@ -74,8 +74,14 @@ bool CFG::is_back_edge(uint32_t to) const
 	// We have a back edge if the visit order is set with the temporary magic value 0.
 	// Crossing edges will have already been recorded with a visit order.
 	auto itr = visit_order.find(to);
-	assert(itr != end(visit_order));
-	return itr->second.get() == 0;
+	return itr != end(visit_order) && itr->second.get() == 0;
+}
+
+bool CFG::has_visited_forward_edge(uint32_t to) const
+{
+	// If > 0, we have visited the edge already, and this is not a back edge branch.
+	auto itr = visit_order.find(to);
+	return itr != end(visit_order) && itr->second.get() > 0;
 }

 bool CFG::post_order_visit(uint32_t block_id)
@ -83,8 +89,10 @@ bool CFG::post_order_visit(uint32_t block_id)
 	// If we have already branched to this block (back edge), stop recursion.
 	// If our branches are back-edges, we do not record them.
 	// We have to record crossing edges however.
-	if (visit_order[block_id].get() >= 0)
-		return !is_back_edge(block_id);
+	if (has_visited_forward_edge(block_id))
+		return true;
+	else if (is_back_edge(block_id))
+		return false;

 	// Block back-edges from recursively revisiting ourselves.
 	visit_order[block_id].get() = 0;
@ -123,9 +131,39 @@ bool CFG::post_order_visit(uint32_t block_id)
 	// This is needed to avoid annoying cases with do { ... } while(false) loops often generated by inliners.
 	// To the CFG, this is linear control flow, but we risk picking the do/while scope as our dominating block.
 	// This makes sure that if we are accessing a variable outside the do/while, we choose the loop header as dominator.
-	if (block.merge == SPIRBlock::MergeLoop)
-		if (post_order_visit(block.merge_block))
-			add_branch(block_id, block.merge_block);
+	// We could use has_visited_forward_edge, but this break code-gen where the merge block is unreachable in the CFG.
+	if (block.merge == SPIRBlock::MergeLoop && post_order_visit(block.merge_block))
+		add_branch(block_id, block.merge_block);
+
+	// If this is a selection merge, add an implied branch to the merge target.
+	// This is needed to avoid cases where an inner branch dominates the outer branch.
+	// This can happen if one of the branches exit early, e.g.:
+	// if (cond) { ...; break; } else { var = 100 } use_var(var);
+	// We can use the variable without a Phi since there is only one possible parent here.
+	// However, in this case, we need to hoist out the inner variable to outside the branch.
+	// Use same strategy as loops.
+	if (block.merge == SPIRBlock::MergeSelection && post_order_visit(block.next_block))
+	{
+		// If there is only one preceding edge to the merge block and it's not ourselves, we need a fixup.
+		// Add a fake branch so any dominator in either the if (), or else () block, or a lone case statement
+		// will be hoisted out to outside the selection merge.
+		// If size > 1, the variable will be automatically hoisted, so we should not mess with it.
+		// Adding fake branches unconditionally breaks parameter preservation analysis,
+		// which looks at how variables are accessed through the CFG.
+		auto pred_itr = preceding_edges.find(block.next_block);
+		if (pred_itr != end(preceding_edges))
+		{
+			auto &pred = pred_itr->second;
+			if (pred.size() == 1 && *pred.begin() != block_id)
+				add_branch(block_id, block.next_block);
+		}
+		else
+		{
+			// If the merge block does not have any preceding edges, i.e. unreachable, hallucinate it.
+			// We're going to do code-gen for it, and domination analysis requires that we have at least one preceding edge.
+			add_branch(block_id, block.next_block);
+		}
+	}

 	// Then visit ourselves. Start counting at one, to let 0 be a magic value for testing back vs. crossing edges.
 	visit_order[block_id].get() = ++visit_count;
--- a/deps/SPIRV-Cross/spirv_cfg.hpp
+++ b/deps/SPIRV-Cross/spirv_cfg.hpp
@ -127,6 +127,7 @@ private:
 	uint32_t visit_count = 0;

 	bool is_back_edge(uint32_t to) const;
+	bool has_visited_forward_edge(uint32_t to) const;
 };

 class DominatorBuilder
--- a/deps/SPIRV-Cross/spirv_common.hpp
+++ b/deps/SPIRV-Cross/spirv_common.hpp
@ -476,6 +476,7 @@ struct SPIRExtension : IVariant
 	{
 		Unsupported,
 		GLSL,
+		SPV_debug_info,
 		SPV_AMD_shader_ballot,
 		SPV_AMD_shader_explicit_vertex_parameter,
 		SPV_AMD_shader_trinary_minmax,
--- a/deps/SPIRV-Cross/spirv_cross.cpp
+++ b/deps/SPIRV-Cross/spirv_cross.cpp
@ -3079,6 +3079,7 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint3

 	case OpArrayLength:
 	case OpLine:
+	case OpNoLine:
 		// Uses literals, but cannot be a phi variable or temporary, so ignore.
 		break;

@ -4266,6 +4267,7 @@ bool Compiler::instruction_to_result_type(uint32_t &result_type, uint32_t &resul
 	case OpGroupCommitReadPipe:
 	case OpGroupCommitWritePipe:
 	case OpLine:
+	case OpNoLine:
 		return false;

 	default:
--- a/deps/SPIRV-Cross/spirv_cross.hpp
+++ b/deps/SPIRV-Cross/spirv_cross.hpp
@ -258,7 +258,7 @@ public:
 	size_t get_declared_struct_size_runtime_array(const SPIRType &struct_type, size_t array_size) const;

 	// Returns the effective size of a buffer block struct member.
-	virtual size_t get_declared_struct_member_size(const SPIRType &struct_type, uint32_t index) const;
+	size_t get_declared_struct_member_size(const SPIRType &struct_type, uint32_t index) const;

 	// Returns a set of all global variables which are statically accessed
 	// by the control flow graph from the current entry point.
--- a/deps/SPIRV-Cross/spirv_cross_c.cpp
+++ b/deps/SPIRV-Cross/spirv_cross_c.cpp
@ -1623,6 +1623,16 @@ spvc_result spvc_compiler_get_declared_struct_size_runtime_array(spvc_compiler c
 	return SPVC_SUCCESS;
 }

+spvc_result spvc_compiler_get_declared_struct_member_size(spvc_compiler compiler, spvc_type struct_type, unsigned index, size_t *size)
+{
+	SPVC_BEGIN_SAFE_SCOPE
+	{
+		*size = compiler->compiler->get_declared_struct_member_size(*static_cast<const SPIRType *>(struct_type), index);
+	}
+	SPVC_END_SAFE_SCOPE(compiler->context, SPVC_ERROR_INVALID_ARGUMENT)
+	return SPVC_SUCCESS;
+}
+
 spvc_result spvc_compiler_type_struct_member_offset(spvc_compiler compiler, spvc_type type, unsigned index, unsigned *offset)
 {
 	SPVC_BEGIN_SAFE_SCOPE
--- a/deps/SPIRV-Cross/spirv_cross_c.h
+++ b/deps/SPIRV-Cross/spirv_cross_c.h
@ -33,7 +33,7 @@ extern "C" {
 /* Bumped if ABI or API breaks backwards compatibility. */
 #define SPVC_C_API_VERSION_MAJOR 0
 /* Bumped if APIs or enumerations are added in a backwards compatible way. */
-#define SPVC_C_API_VERSION_MINOR 14
+#define SPVC_C_API_VERSION_MINOR 16
 /* Bumped if internal implementation details change. */
 #define SPVC_C_API_VERSION_PATCH 0

@ -303,6 +303,7 @@ SPVC_PUBLIC_API void spvc_msl_resource_binding_init(spvc_msl_resource_binding *b
 #define SPVC_MSL_PUSH_CONSTANT_BINDING (0)
 #define SPVC_MSL_SWIZZLE_BUFFER_BINDING (~(1u))
 #define SPVC_MSL_BUFFER_SIZE_BUFFER_BINDING (~(2u))
+#define SPVC_MSL_ARGUMENT_BUFFER_BINDING (~(3u))

 /* Obsolete. Sticks around for backwards compatibility. */
 #define SPVC_MSL_AUX_BUFFER_STRUCT_VERSION 1
@ -659,6 +660,7 @@ SPVC_PUBLIC_API SpvAccessQualifier spvc_type_get_image_access_qualifier(spvc_typ
 SPVC_PUBLIC_API spvc_result spvc_compiler_get_declared_struct_size(spvc_compiler compiler, spvc_type struct_type, size_t *size);
 SPVC_PUBLIC_API spvc_result spvc_compiler_get_declared_struct_size_runtime_array(spvc_compiler compiler,
                                                                                 spvc_type struct_type, size_t array_size, size_t *size);
+SPVC_PUBLIC_API spvc_result spvc_compiler_get_declared_struct_member_size(spvc_compiler compiler, spvc_type type, unsigned index, size_t *size);

 SPVC_PUBLIC_API spvc_result spvc_compiler_type_struct_member_offset(spvc_compiler compiler,
                                                                    spvc_type type, unsigned index, unsigned *offset);
--- a/deps/SPIRV-Cross/spirv_glsl.cpp
+++ b/deps/SPIRV-Cross/spirv_glsl.cpp
@ -832,8 +832,6 @@ void CompilerGLSL::emit_struct(SPIRType &type)
 string CompilerGLSL::to_interpolation_qualifiers(const Bitset &flags)
 {
 	string res;
-	if (flags.get(DecorationNonUniformEXT))
-		res += "nonuniformEXT ";
 	//if (flags & (1ull << DecorationSmooth))
 	//    res += "smooth ";
 	if (flags.get(DecorationFlat))
@ -4602,7 +4600,7 @@ void CompilerGLSL::emit_texture_op(const Instruction &i)

 	SmallVector<uint32_t> inherited_expressions;

-	uint32_t result_type = ops[0];
+	uint32_t result_type_id = ops[0];
 	uint32_t id = ops[1];
 	uint32_t img = ops[2];
 	uint32_t coord = ops[3];
@ -4613,8 +4611,14 @@ void CompilerGLSL::emit_texture_op(const Instruction &i)
 	bool fetch = false;
 	const uint32_t *opt = nullptr;

+	auto &result_type = get<SPIRType>(result_type_id);
+
 	inherited_expressions.push_back(coord);

+	// Make sure non-uniform decoration is back-propagated to where it needs to be.
+	if (has_decoration(img, DecorationNonUniformEXT))
+		propagate_nonuniform_qualifier(img);
+
 	switch (op)
 	{
 	case OpImageSampleDrefImplicitLod:
@ -4771,14 +4775,21 @@ void CompilerGLSL::emit_texture_op(const Instruction &i)
 			image_is_depth = true;

 		if (image_is_depth)
-			expr = remap_swizzle(get<SPIRType>(result_type), 1, expr);
+			expr = remap_swizzle(result_type, 1, expr);
+	}
+
+	if (!backend.support_small_type_sampling_result && result_type.width < 32)
+	{
+		// Just value cast (narrowing) to expected type since we cannot rely on narrowing to work automatically.
+		// Hopefully compiler picks this up and converts the texturing instruction to the appropriate precision.
+		expr = join(type_to_glsl_constructor(result_type), "(", expr, ")");
 	}

 	// Deals with reads from MSL. We might need to downconvert to fewer components.
 	if (op == OpImageRead)
-		expr = remap_swizzle(get<SPIRType>(result_type), 4, expr);
+		expr = remap_swizzle(result_type, 4, expr);

-	emit_op(result_type, id, expr, forward);
+	emit_op(result_type_id, id, expr, forward);
 	for (auto &inherit : inherited_expressions)
 		inherit_expression_dependencies(id, inherit);

@ -7107,18 +7118,23 @@ string CompilerGLSL::variable_decl_function_local(SPIRVariable &var)
 	return expr;
 }

+void CompilerGLSL::emit_variable_temporary_copies(const SPIRVariable &var)
+{
+	if (var.allocate_temporary_copy)
+	{
+		auto &type = get<SPIRType>(var.basetype);
+		auto &flags = get_decoration_bitset(var.self);
+		statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, join("_", var.self, "_copy")), ";");
+	}
+}
+
 void CompilerGLSL::flush_variable_declaration(uint32_t id)
 {
 	auto *var = maybe_get<SPIRVariable>(id);
 	if (var && var->deferred_declaration)
 	{
 		statement(variable_decl_function_local(*var), ";");
-		if (var->allocate_temporary_copy)
-		{
-			auto &type = get<SPIRType>(var->basetype);
-			auto &flags = ir.meta[id].decoration.decoration_flags;
-			statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, join("_", id, "_copy")), ";");
-		}
+		emit_variable_temporary_copies(*var);
 		var->deferred_declaration = false;
 	}
 }
@ -7553,8 +7569,13 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		unroll_array_from_complex_load(id, ptr, expr);

 		auto &type = get<SPIRType>(result_type);
-		if (has_decoration(id, DecorationNonUniformEXT))
+		// Shouldn't need to check for ID, but current glslang codegen requires it in some cases
+		// when loading Image/Sampler descriptors. It does not hurt to check ID as well.
+		if (has_decoration(id, DecorationNonUniformEXT) || has_decoration(ptr, DecorationNonUniformEXT))
+		{
+			propagate_nonuniform_qualifier(ptr);
 			convert_non_uniform_expression(type, expr);
+		}

 		if (ptr_expression)
 			ptr_expression->need_transpose = old_need_transpose;
@ -7636,6 +7657,9 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 	{
 		auto *var = maybe_get<SPIRVariable>(ops[0]);

+		if (has_decoration(ops[0], DecorationNonUniformEXT))
+			propagate_nonuniform_qualifier(ops[0]);
+
 		if (var && var->statically_assigned)
 			var->static_expression = ops[1];
 		else if (var && var->loop_variable && !var->loop_variable_enable)
@ -8018,7 +8042,14 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		uint32_t rhs = ops[2];
 		bool pointer = get<SPIRType>(result_type).pointer;

-		if (expression_is_lvalue(rhs) && !pointer)
+		auto *chain = maybe_get<SPIRAccessChain>(rhs);
+		if (chain)
+		{
+			// Cannot lower to a SPIRExpression, just copy the object.
+			auto &e = set<SPIRAccessChain>(id, *chain);
+			e.self = id;
+		}
+		else if (expression_is_lvalue(rhs) && !pointer)
 		{
 			// Need a copy.
 			// For pointer types, we copy the pointer itself.
@ -8037,6 +8068,11 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 				auto *var = maybe_get_backing_variable(rhs);
 				e.loaded_from = var ? var->self : 0;
 			}
+
+			// If we're copying an access chain, need to inherit the read expressions.
+			auto *rhs_expr = maybe_get<SPIRExpression>(rhs);
+			if (rhs_expr)
+				e.implied_read_expressions = rhs_expr->implied_read_expressions;
 		}
 		break;
 	}
@ -8971,6 +9007,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		uint32_t result_type = ops[0];
 		uint32_t id = ops[1];
 		emit_sampled_image_op(result_type, id, ops[2], ops[3]);
+		inherit_expression_dependencies(id, ops[2]);
+		inherit_expression_dependencies(id, ops[3]);
 		break;
 	}

@ -9392,6 +9430,10 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		{
 			emit_spv_amd_gcn_shader_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
 		}
+		else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_debug_info)
+		{
+			break; // Ignore SPIR-V debug information extended instructions.
+		}
 		else
 		{
 			statement("// unimplemented ext op ", instruction.op);
@ -9658,6 +9700,9 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		break;
 	}

+	case OpNoLine:
+		break;
+
 	default:
 		statement("// unimplemented op ", instruction.op);
 		break;
@ -9873,7 +9918,16 @@ const char *CompilerGLSL::flags_to_qualifiers_glsl(const SPIRType &type, const B

 const char *CompilerGLSL::to_precision_qualifiers_glsl(uint32_t id)
 {
-	return flags_to_qualifiers_glsl(expression_type(id), ir.meta[id].decoration.decoration_flags);
+	auto &type = expression_type(id);
+	bool use_precision_qualifiers = backend.allow_precision_qualifiers || options.es;
+	if (use_precision_qualifiers && (type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage))
+	{
+		// Force mediump for the sampler type. We cannot declare 16-bit or smaller image types.
+		auto &result_type = get<SPIRType>(type.image.type);
+		if (result_type.width < 32)
+			return "mediump ";
+	}
+	return flags_to_qualifiers_glsl(type, ir.meta[id].decoration.decoration_flags);
 }

 string CompilerGLSL::to_qualifiers_glsl(uint32_t id)
@ -10089,15 +10143,22 @@ string CompilerGLSL::image_type_glsl(const SPIRType &type, uint32_t id)
 	switch (imagetype.basetype)
 	{
 	case SPIRType::Int:
+	case SPIRType::Short:
+	case SPIRType::SByte:
 		res = "i";
 		break;
 	case SPIRType::UInt:
+	case SPIRType::UShort:
+	case SPIRType::UByte:
 		res = "u";
 		break;
 	default:
 		break;
 	}

+	// For half image types, we will force mediump for the sampler, and cast to f16 after any sampling operation.
+	// We cannot express a true half texture type in GLSL. Neither for short integer formats for that matter.
+
 	if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData && options.vulkan_semantics)
 		return res + "subpassInput" + (type.image.ms ? "MS" : "");

@ -11189,7 +11250,10 @@ bool CompilerGLSL::attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method
 			}

 			default:
-				SPIRV_CROSS_THROW("For/while loop detected, but need while/for loop semantics.");
+				block.disable_block_optimization = true;
+				force_recompile();
+				begin_scope(); // We'll see an end_scope() later.
+				return false;
 			}

 			begin_scope();
@ -11263,7 +11327,10 @@ bool CompilerGLSL::attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method
 			}

 			default:
-				SPIRV_CROSS_THROW("For/while loop detected, but need while/for loop semantics.");
+				block.disable_block_optimization = true;
+				force_recompile();
+				begin_scope(); // We'll see an end_scope() later.
+				return false;
 			}

 			begin_scope();
@ -11325,8 +11392,13 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block)
 		continue_type = continue_block_type(get<SPIRBlock>(block.continue_block));

 	// If we have loop variables, stop masking out access to the variable now.
-	for (auto var : block.loop_variables)
-		get<SPIRVariable>(var).loop_variable_enable = true;
+	for (auto var_id : block.loop_variables)
+	{
+		auto &var = get<SPIRVariable>(var_id);
+		var.loop_variable_enable = true;
+		// We're not going to declare the variable directly, so emit a copy here.
+		emit_variable_temporary_copies(var);
+	}

 	// Remember deferred declaration state. We will restore it before returning.
 	SmallVector<bool, 64> rearm_dominated_variables(block.dominated_variables.size());
@ -11667,7 +11739,7 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block)
 			}

 			auto &case_block = get<SPIRBlock>(target_block);
-			if (i + 1 < num_blocks &&
+			if (backend.support_case_fallthrough && i + 1 < num_blocks &&
 			    execution_is_direct_branch(case_block, get<SPIRBlock>(block_declaration_order[i + 1])))
 			{
 				// We will fall through here, so just terminate the block chain early.
@ -12190,3 +12262,37 @@ void CompilerGLSL::emit_line_directive(uint32_t file_id, uint32_t line_literal)
 		statement_no_indent("#line ", line_literal, " \"", get<SPIRString>(file_id).str, "\"");
 	}
 }
+
+void CompilerGLSL::propagate_nonuniform_qualifier(uint32_t id)
+{
+	// SPIR-V might only tag the very last ID with NonUniformEXT, but for codegen,
+	// we need to know NonUniformEXT a little earlier, when the resource is actually loaded.
+	// Back-propagate the qualifier based on the expression dependency chain.
+
+	if (!has_decoration(id, DecorationNonUniformEXT))
+	{
+		set_decoration(id, DecorationNonUniformEXT);
+		force_recompile();
+	}
+
+	auto *e = maybe_get<SPIRExpression>(id);
+	auto *combined = maybe_get<SPIRCombinedImageSampler>(id);
+	auto *chain = maybe_get<SPIRAccessChain>(id);
+	if (e)
+	{
+		for (auto &expr : e->expression_dependencies)
+			propagate_nonuniform_qualifier(expr);
+		for (auto &expr : e->implied_read_expressions)
+			propagate_nonuniform_qualifier(expr);
+	}
+	else if (combined)
+	{
+		propagate_nonuniform_qualifier(combined->image);
+		propagate_nonuniform_qualifier(combined->sampler);
+	}
+	else if (chain)
+	{
+		for (auto &expr : chain->implied_read_expressions)
+			propagate_nonuniform_qualifier(expr);
+	}
+}
--- a/deps/SPIRV-Cross/spirv_glsl.hpp
+++ b/deps/SPIRV-Cross/spirv_glsl.hpp
@ -404,6 +404,8 @@ protected:
 		bool array_is_value_type = true;
 		bool comparison_image_samples_scalar = false;
 		bool native_pointers = false;
+		bool support_small_type_sampling_result = false;
+		bool support_case_fallthrough = true;
 	} backend;

 	void emit_struct(SPIRType &type);
@ -432,6 +434,7 @@ protected:
 	bool flush_phi_required(uint32_t from, uint32_t to);
 	void flush_variable_declaration(uint32_t id);
 	void flush_undeclared_variables(SPIRBlock &block);
+	void emit_variable_temporary_copies(const SPIRVariable &var);

 	bool should_dereference(uint32_t id);
 	bool should_forward(uint32_t id);
@ -669,6 +672,8 @@ protected:
 	void fixup_type_alias();
 	void reorder_type_alias();

+	void propagate_nonuniform_qualifier(uint32_t id);
+
 private:
 	void init();
 };
--- a/deps/SPIRV-Cross/spirv_hlsl.cpp
+++ b/deps/SPIRV-Cross/spirv_hlsl.cpp
@ -1742,6 +1742,36 @@ void CompilerHLSL::emit_resources()
 		end_scope();
 		statement("");
 	}
+
+	if (requires_scalar_reflect)
+	{
+		// FP16/FP64? No templates in HLSL.
+		statement("float SPIRV_Cross_Reflect(float i, float n)");
+		begin_scope();
+		statement("return i - 2.0 * dot(n, i) * n;");
+		end_scope();
+		statement("");
+	}
+
+	if (requires_scalar_refract)
+	{
+		// FP16/FP64? No templates in HLSL.
+		statement("float SPIRV_Cross_Refract(float i, float n, float eta)");
+		begin_scope();
+		statement("float NoI = n * i;");
+		statement("float NoI2 = NoI * NoI;");
+		statement("float k = 1.0 - eta * eta * (1.0 - NoI2);");
+		statement("if (k < 0.0)");
+		begin_scope();
+		statement("return 0.0;");
+		end_scope();
+		statement("else");
+		begin_scope();
+		statement("return eta * i - (eta * NoI + sqrt(k)) * n;");
+		end_scope();
+		end_scope();
+		statement("");
+	}
 }

 string CompilerHLSL::layout_for_member(const SPIRType &type, uint32_t index)
@ -2449,6 +2479,10 @@ void CompilerHLSL::emit_texture_op(const Instruction &i)

 	inherited_expressions.push_back(coord);

+	// Make sure non-uniform decoration is back-propagated to where it needs to be.
+	if (has_decoration(img, DecorationNonUniformEXT))
+		propagate_nonuniform_qualifier(img);
+
 	switch (op)
 	{
 	case OpImageSampleDrefImplicitLod:
@ -3245,6 +3279,45 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
 		break;
 	}

+	case GLSLstd450Normalize:
+		// HLSL does not support scalar versions here.
+		if (expression_type(args[0]).vecsize == 1)
+		{
+			// Returns -1 or 1 for valid input, sign() does the job.
+			emit_unary_func_op(result_type, id, args[0], "sign");
+		}
+		else
+			CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
+		break;
+
+	case GLSLstd450Reflect:
+		if (get<SPIRType>(result_type).vecsize == 1)
+		{
+			if (!requires_scalar_reflect)
+			{
+				requires_scalar_reflect = true;
+				force_recompile();
+			}
+			emit_binary_func_op(result_type, id, args[0], args[1], "SPIRV_Cross_Reflect");
+		}
+		else
+			CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
+		break;
+
+	case GLSLstd450Refract:
+		if (get<SPIRType>(result_type).vecsize == 1)
+		{
+			if (!requires_scalar_refract)
+			{
+				requires_scalar_refract = true;
+				force_recompile();
+			}
+			emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "SPIRV_Cross_Refract");
+		}
+		else
+			CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
+		break;
+
 	default:
 		CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
 		break;
@ -3389,6 +3462,9 @@ void CompilerHLSL::emit_load(const Instruction &instruction)
 		uint32_t id = ops[1];
 		uint32_t ptr = ops[2];

+		if (has_decoration(ptr, DecorationNonUniformEXT))
+			propagate_nonuniform_qualifier(ptr);
+
 		auto load_expr = read_access_chain(*chain);

 		bool forward = should_forward(ptr) && forced_temporaries.find(id) == end(forced_temporaries);
@ -3422,6 +3498,9 @@ void CompilerHLSL::write_access_chain(const SPIRAccessChain &chain, uint32_t val
 	// Make sure we trigger a read of the constituents in the access chain.
 	track_expression_read(chain.self);

+	if (has_decoration(chain.self, DecorationNonUniformEXT))
+		propagate_nonuniform_qualifier(chain.self);
+
 	SPIRType target_type;
 	target_type.basetype = SPIRType::UInt;
 	target_type.vecsize = type.vecsize;
@ -3954,6 +4033,31 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 		break;
 	}

+	case OpOuterProduct:
+	{
+		uint32_t result_type = ops[0];
+		uint32_t id = ops[1];
+		uint32_t a = ops[2];
+		uint32_t b = ops[3];
+
+		auto &type = get<SPIRType>(result_type);
+		string expr = type_to_glsl_constructor(type);
+		expr += "(";
+		for (uint32_t col = 0; col < type.columns; col++)
+		{
+			expr += to_enclosed_expression(a);
+			expr += " * ";
+			expr += to_extract_component_expression(b, col);
+			if (col + 1 < type.columns)
+				expr += ", ";
+		}
+		expr += ")";
+		emit_op(result_type, id, expr, should_forward(a) && should_forward(b));
+		inherit_expression_dependencies(id, a);
+		inherit_expression_dependencies(id, b);
+		break;
+	}
+
 	case OpFMod:
 	{
 		if (!requires_op_fmod)
@ -4700,6 +4804,7 @@ string CompilerHLSL::compile()
 	backend.can_declare_arrays_inline = false;
 	backend.can_return_array = false;
 	backend.nonuniform_qualifier = "NonUniformResourceIndex";
+	backend.support_case_fallthrough = false;

 	fixup_type_alias();
 	reorder_type_alias();
--- a/deps/SPIRV-Cross/spirv_hlsl.hpp
+++ b/deps/SPIRV-Cross/spirv_hlsl.hpp
@ -167,6 +167,8 @@ private:
 	void replace_illegal_names() override;

 	Options hlsl_options;
+
+	// TODO: Refactor this to be more similar to MSL, maybe have some common system in place?
 	bool requires_op_fmod = false;
 	bool requires_fp16_packing = false;
 	bool requires_explicit_fp16_packing = false;
@ -179,6 +181,8 @@ private:
 	bool requires_inverse_2x2 = false;
 	bool requires_inverse_3x3 = false;
 	bool requires_inverse_4x4 = false;
+	bool requires_scalar_reflect = false;
+	bool requires_scalar_refract = false;
 	uint64_t required_textureSizeVariants = 0;
 	void require_texture_query_variant(const SPIRType &type);

--- a/deps/SPIRV-Cross/spirv_msl.cpp
+++ b/deps/SPIRV-Cross/spirv_msl.cpp
@ -105,8 +105,10 @@ void CompilerMSL::build_implicit_builtins()
 	    active_input_builtins.get(BuiltInSubgroupLtMask);
 	bool need_subgroup_ge_mask = !msl_options.is_ios() && (active_input_builtins.get(BuiltInSubgroupGeMask) ||
 	                                                       active_input_builtins.get(BuiltInSubgroupGtMask));
+	bool need_multiview = get_execution_model() == ExecutionModelVertex &&
+	                      (msl_options.multiview || active_input_builtins.get(BuiltInViewIndex));
 	if (need_subpass_input || need_sample_pos || need_subgroup_mask || need_vertex_params || need_tesc_params ||
-	    needs_subgroup_invocation_id)
+	    need_multiview || needs_subgroup_invocation_id)
 	{
 		bool has_frag_coord = false;
 		bool has_sample_id = false;
@ -118,6 +120,7 @@ void CompilerMSL::build_implicit_builtins()
 		bool has_primitive_id = false;
 		bool has_subgroup_invocation_id = false;
 		bool has_subgroup_size = false;
+		bool has_view_idx = false;

 		ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
 			if (var.storage != StorageClassInput || !ir.meta[var.self].decoration.builtin)
@ -189,6 +192,22 @@ void CompilerMSL::build_implicit_builtins()
 				builtin_subgroup_size_id = var.self;
 				has_subgroup_size = true;
 			}
+
+			if (need_multiview)
+			{
+				if (builtin == BuiltInInstanceIndex)
+				{
+					// The view index here is derived from the instance index.
+					builtin_instance_idx_id = var.self;
+					has_instance_idx = true;
+				}
+
+				if (builtin == BuiltInViewIndex)
+				{
+					builtin_view_idx_id = var.self;
+					has_view_idx = true;
+				}
+			}
 		});

 		if (!has_frag_coord && need_subpass_input)
@ -246,7 +265,8 @@ void CompilerMSL::build_implicit_builtins()
 			mark_implicit_builtin(StorageClassInput, BuiltInSampleId, var_id);
 		}

-		if (need_vertex_params && (!has_vertex_idx || !has_base_vertex || !has_instance_idx || !has_base_instance))
+		if ((need_vertex_params && (!has_vertex_idx || !has_base_vertex || !has_instance_idx || !has_base_instance)) ||
+		    (need_multiview && (!has_instance_idx || !has_view_idx)))
 		{
 			uint32_t offset = ir.increase_bound_by(2);
 			uint32_t type_id = offset;
@ -265,7 +285,7 @@ void CompilerMSL::build_implicit_builtins()
 			auto &ptr_type = set<SPIRType>(type_ptr_id, uint_type_ptr);
 			ptr_type.self = type_id;

-			if (!has_vertex_idx)
+			if (need_vertex_params && !has_vertex_idx)
 			{
 				uint32_t var_id = ir.increase_bound_by(1);

@ -276,7 +296,7 @@ void CompilerMSL::build_implicit_builtins()
 				mark_implicit_builtin(StorageClassInput, BuiltInVertexIndex, var_id);
 			}

-			if (!has_base_vertex)
+			if (need_vertex_params && !has_base_vertex)
 			{
 				uint32_t var_id = ir.increase_bound_by(1);

@ -287,7 +307,7 @@ void CompilerMSL::build_implicit_builtins()
 				mark_implicit_builtin(StorageClassInput, BuiltInBaseVertex, var_id);
 			}

-			if (!has_instance_idx)
+			if (!has_instance_idx) // Needed by both multiview and tessellation
 			{
 				uint32_t var_id = ir.increase_bound_by(1);

@ -296,9 +316,30 @@ void CompilerMSL::build_implicit_builtins()
 				set_decoration(var_id, DecorationBuiltIn, BuiltInInstanceIndex);
 				builtin_instance_idx_id = var_id;
 				mark_implicit_builtin(StorageClassInput, BuiltInInstanceIndex, var_id);
+
+				if (need_multiview)
+				{
+					// Multiview shaders are not allowed to write to gl_Layer, ostensibly because
+					// it is implicitly written from gl_ViewIndex, but we have to do that explicitly.
+					// Note that we can't just abuse gl_ViewIndex for this purpose: it's an input, but
+					// gl_Layer is an output in vertex-pipeline shaders.
+					uint32_t type_ptr_out_id = ir.increase_bound_by(2);
+					SPIRType uint_type_ptr_out;
+					uint_type_ptr_out = uint_type;
+					uint_type_ptr_out.pointer = true;
+					uint_type_ptr_out.parent_type = type_id;
+					uint_type_ptr_out.storage = StorageClassOutput;
+					auto &ptr_out_type = set<SPIRType>(type_ptr_out_id, uint_type_ptr_out);
+					ptr_out_type.self = type_id;
+					var_id = type_ptr_out_id + 1;
+					set<SPIRVariable>(var_id, type_ptr_out_id, StorageClassOutput);
+					set_decoration(var_id, DecorationBuiltIn, BuiltInLayer);
+					builtin_layer_id = var_id;
+					mark_implicit_builtin(StorageClassOutput, BuiltInLayer, var_id);
+				}
 			}

-			if (!has_base_instance)
+			if (need_vertex_params && !has_base_instance)
 			{
 				uint32_t var_id = ir.increase_bound_by(1);

@ -308,6 +349,17 @@ void CompilerMSL::build_implicit_builtins()
 				builtin_base_instance_id = var_id;
 				mark_implicit_builtin(StorageClassInput, BuiltInBaseInstance, var_id);
 			}
+
+			if (need_multiview && !has_view_idx)
+			{
+				uint32_t var_id = ir.increase_bound_by(1);
+
+				// Create gl_ViewIndex.
+				set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
+				set_decoration(var_id, DecorationBuiltIn, BuiltInViewIndex);
+				builtin_view_idx_id = var_id;
+				mark_implicit_builtin(StorageClassInput, BuiltInViewIndex, var_id);
+			}
 		}

 		if (need_tesc_params && (!has_invocation_id || !has_primitive_id))
@ -428,6 +480,17 @@ void CompilerMSL::build_implicit_builtins()
 		set_extended_decoration(var_id, SPIRVCrossDecorationResourceIndexPrimary, msl_options.buffer_size_buffer_index);
 		buffer_size_buffer_id = var_id;
 	}
+
+	if (needs_view_mask_buffer())
+	{
+		uint32_t var_id = build_constant_uint_array_pointer();
+		set_name(var_id, "spvViewMask");
+		// This should never match anything.
+		set_decoration(var_id, DecorationDescriptorSet, ~(4u));
+		set_decoration(var_id, DecorationBinding, msl_options.view_mask_buffer_index);
+		set_extended_decoration(var_id, SPIRVCrossDecorationResourceIndexPrimary, msl_options.view_mask_buffer_index);
+		view_mask_buffer_id = var_id;
+	}
 }

 void CompilerMSL::mark_implicit_builtin(StorageClass storage, BuiltIn builtin, uint32_t id)
@ -708,10 +771,15 @@ string CompilerMSL::compile()
 	backend.comparison_image_samples_scalar = true;
 	backend.native_pointers = true;
 	backend.nonuniform_qualifier = "";
+	backend.support_small_type_sampling_result = true;

 	capture_output_to_buffer = msl_options.capture_output_to_buffer;
 	is_rasterization_disabled = msl_options.disable_rasterization || capture_output_to_buffer;

+	// Initialize array here rather than constructor, MSVC 2013 workaround.
+	for (auto &id : next_metal_resource_ids)
+		id = 0;
+
 	fixup_type_alias();
 	replace_illegal_names();

@ -731,6 +799,10 @@ string CompilerMSL::compile()
 		active_interface_variables.insert(swizzle_buffer_id);
 	if (buffer_size_buffer_id)
 		active_interface_variables.insert(buffer_size_buffer_id);
+	if (view_mask_buffer_id)
+		active_interface_variables.insert(view_mask_buffer_id);
+	if (builtin_layer_id)
+		active_interface_variables.insert(builtin_layer_id);

 	// Create structs to hold input, output and uniform variables.
 	// Do output first to ensure out. is declared at top of entry function.
@ -1020,7 +1092,6 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std::
 					added_out = true;
 				}
 				type_id = get<SPIRVariable>(arg_id).basetype;
-				p_type = &get<SPIRType>(type_id);
 				uint32_t next_id = ir.increase_bound_by(1);
 				func.add_parameter(type_id, next_id, true);
 				set<SPIRVariable>(next_id, type_id, StorageClassFunction, 0, arg_id);
@ -2370,7 +2441,7 @@ void CompilerMSL::align_struct(SPIRType &ib_type)
 		// Increment the current offset to be positioned immediately after the current member.
 		// Don't do this for the last member since it can be unsized, and it is not relevant for padding purposes here.
 		if (mbr_idx + 1 < mbr_cnt)
-			curr_offset = mbr_offset + uint32_t(get_declared_struct_member_size(ib_type, mbr_idx));
+			curr_offset = mbr_offset + uint32_t(get_declared_struct_member_size_msl(ib_type, mbr_idx));
 	}
 }

@ -3122,6 +3193,36 @@ void CompilerMSL::emit_custom_functions()
 			statement("");
 			break;

+		case SPVFuncImplReflectScalar:
+			// Metal does not support scalar versions of these functions.
+			statement("template<typename T>");
+			statement("inline T spvReflect(T i, T n)");
+			begin_scope();
+			statement("return i - T(2) * i * n * n;");
+			end_scope();
+			statement("");
+			break;
+
+		case SPVFuncImplRefractScalar:
+			// Metal does not support scalar versions of these functions.
+			statement("template<typename T>");
+			statement("inline T spvRefract(T i, T n, T eta)");
+			begin_scope();
+			statement("T NoI = n * i;");
+			statement("T NoI2 = NoI * NoI;");
+			statement("T k = T(1) - eta * eta * (T(1) - NoI2);");
+			statement("if (k < T(0))");
+			begin_scope();
+			statement("return T(0);");
+			end_scope();
+			statement("else");
+			begin_scope();
+			statement("return eta * i - (eta * NoI + sqrt(k)) * n;");
+			end_scope();
+			end_scope();
+			statement("");
+			break;
+
 		default:
 			break;
 		}
@ -4096,7 +4197,30 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
 		break;
 	}

-		// OpOuterProduct
+	case OpOuterProduct:
+	{
+		uint32_t result_type = ops[0];
+		uint32_t id = ops[1];
+		uint32_t a = ops[2];
+		uint32_t b = ops[3];
+
+		auto &type = get<SPIRType>(result_type);
+		string expr = type_to_glsl_constructor(type);
+		expr += "(";
+		for (uint32_t col = 0; col < type.columns; col++)
+		{
+			expr += to_enclosed_expression(a);
+			expr += " * ";
+			expr += to_extract_component_expression(b, col);
+			if (col + 1 < type.columns)
+				expr += ", ";
+		}
+		expr += ")";
+		emit_op(result_type, id, expr, should_forward(a) && should_forward(b));
+		inherit_expression_dependencies(id, a);
+		inherit_expression_dependencies(id, b);
+		break;
+	}

 	case OpIAddCarry:
 	case OpISubBorrow:
@ -4582,6 +4706,57 @@ void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
 		//        GLSLstd450InterpolateAtSample (sample_no_perspective qualifier)
 		//        GLSLstd450InterpolateAtOffset

+	case GLSLstd450Distance:
+		// MSL does not support scalar versions here.
+		if (expression_type(args[0]).vecsize == 1)
+		{
+			// Equivalent to length(a - b) -> abs(a - b).
+			emit_op(result_type, id,
+			        join("abs(", to_unpacked_expression(args[0]), " - ", to_unpacked_expression(args[1]), ")"),
+			        should_forward(args[0]) && should_forward(args[1]));
+			inherit_expression_dependencies(id, args[0]);
+			inherit_expression_dependencies(id, args[1]);
+		}
+		else
+			CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
+		break;
+
+	case GLSLstd450Length:
+		// MSL does not support scalar versions here.
+		if (expression_type(args[0]).vecsize == 1)
+		{
+			// Equivalent to abs().
+			emit_unary_func_op(result_type, id, args[0], "abs");
+		}
+		else
+			CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
+		break;
+
+	case GLSLstd450Normalize:
+		// MSL does not support scalar versions here.
+		if (expression_type(args[0]).vecsize == 1)
+		{
+			// Returns -1 or 1 for valid input, sign() does the job.
+			emit_unary_func_op(result_type, id, args[0], "sign");
+		}
+		else
+			CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
+		break;
+
+	case GLSLstd450Reflect:
+		if (get<SPIRType>(result_type).vecsize == 1)
+			emit_binary_func_op(result_type, id, args[0], args[1], "spvReflect");
+		else
+			CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
+		break;
+
+	case GLSLstd450Refract:
+		if (get<SPIRType>(result_type).vecsize == 1)
+			emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "spvRefract");
+		else
+			CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
+		break;
+
 	default:
 		CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
 		break;
@ -4766,6 +4941,21 @@ string CompilerMSL::to_function_name(uint32_t img, const SPIRType &imgtype, bool
 	return fname;
 }

+string CompilerMSL::convert_to_f32(const string &expr, uint32_t components)
+{
+	SPIRType t;
+	t.basetype = SPIRType::Float;
+	t.vecsize = components;
+	t.columns = 1;
+	return join(type_to_glsl_constructor(t), "(", expr, ")");
+}
+
+static inline bool sampling_type_needs_f32_conversion(const SPIRType &type)
+{
+	// Double is not supported to begin with, but doesn't hurt to check for completion.
+	return type.basetype == SPIRType::Half || type.basetype == SPIRType::Double;
+}
+
 // Returns the function args for a texture sampling function for the specified image and sampling characteristics.
 string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool is_fetch, bool is_gather, bool is_proj,
                                     uint32_t coord, uint32_t, uint32_t dref, uint32_t grad_x, uint32_t grad_y,
@ -4804,6 +4994,8 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool

 		if (is_fetch)
 			tex_coords = "uint(" + round_fp_tex_coords(tex_coords, coord_is_fp) + ")";
+		else if (sampling_type_needs_f32_conversion(coord_type))
+			tex_coords = convert_to_f32(tex_coords, 1);

 		alt_coord_component = 1;
 		break;
@ -4839,6 +5031,8 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool

 		if (is_fetch)
 			tex_coords = "uint2(" + round_fp_tex_coords(tex_coords, coord_is_fp) + ")";
+		else if (sampling_type_needs_f32_conversion(coord_type))
+			tex_coords = convert_to_f32(tex_coords, 2);

 		alt_coord_component = 2;
 		break;
@ -4849,6 +5043,8 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool

 		if (is_fetch)
 			tex_coords = "uint3(" + round_fp_tex_coords(tex_coords, coord_is_fp) + ")";
+		else if (sampling_type_needs_f32_conversion(coord_type))
+			tex_coords = convert_to_f32(tex_coords, 3);

 		alt_coord_component = 3;
 		break;
@ -4866,6 +5062,9 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool
 				tex_coords = enclose_expression(tex_coords) + ".xyz";
 		}

+		if (sampling_type_needs_f32_conversion(coord_type))
+			tex_coords = convert_to_f32(tex_coords, 3);
+
 		alt_coord_component = 3;
 		break;

@ -4896,7 +5095,12 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool

 	// If projection, use alt coord as divisor
 	if (is_proj)
-		tex_coords += " / " + to_extract_component_expression(coord, alt_coord_component);
+	{
+		if (sampling_type_needs_f32_conversion(coord_type))
+			tex_coords += " / " + convert_to_f32(to_extract_component_expression(coord, alt_coord_component), 1);
+		else
+			tex_coords += " / " + to_extract_component_expression(coord, alt_coord_component);
+	}

 	if (!farg_str.empty())
 		farg_str += ", ";
@ -4930,11 +5134,19 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool
 		forward = forward && should_forward(dref);
 		farg_str += ", ";

+		auto &dref_type = expression_type(dref);
+
+		string dref_expr;
 		if (is_proj)
-			farg_str +=
-			    to_enclosed_expression(dref) + " / " + to_extract_component_expression(coord, alt_coord_component);
+			dref_expr =
+			    join(to_enclosed_expression(dref), " / ", to_extract_component_expression(coord, alt_coord_component));
 		else
-			farg_str += to_expression(dref);
+			dref_expr = to_expression(dref);
+
+		if (sampling_type_needs_f32_conversion(dref_type))
+			dref_expr = convert_to_f32(dref_expr, 1);
+
+		farg_str += dref_expr;

 		if (msl_options.is_macos() && (grad_x || grad_y))
 		{
@ -5626,6 +5838,10 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in
 		{
 			switch (builtin)
 			{
+			case BuiltInViewIndex:
+				if (!msl_options.multiview)
+					break;
+				/* fallthrough */
 			case BuiltInFrontFacing:
 			case BuiltInPointCoord:
 			case BuiltInFragCoord:
@ -6013,7 +6229,12 @@ void CompilerMSL::entry_point_args_builtin(string &ep_args)
 			    bi_type != BuiltInClipDistance && bi_type != BuiltInCullDistance && bi_type != BuiltInSubgroupEqMask &&
 			    bi_type != BuiltInBaryCoordNV && bi_type != BuiltInBaryCoordNoPerspNV &&
 			    bi_type != BuiltInSubgroupGeMask && bi_type != BuiltInSubgroupGtMask &&
-			    bi_type != BuiltInSubgroupLeMask && bi_type != BuiltInSubgroupLtMask)
+			    bi_type != BuiltInSubgroupLeMask && bi_type != BuiltInSubgroupLtMask &&
+			    ((get_execution_model() == ExecutionModelFragment && msl_options.multiview) ||
+			     bi_type != BuiltInViewIndex) &&
+			    (get_execution_model() == ExecutionModelGLCompute ||
+			     (get_execution_model() == ExecutionModelFragment && msl_options.supports_msl_version(2, 2)) ||
+			     (bi_type != BuiltInSubgroupLocalInvocationId && bi_type != BuiltInSubgroupSize)))
 			{
 				if (!ep_args.empty())
 					ep_args += ", ";
@ -6090,6 +6311,7 @@ void CompilerMSL::entry_point_args_builtin(string &ep_args)
 string CompilerMSL::entry_point_args_argument_buffer(bool append_comma)
 {
 	string ep_args = entry_point_arg_stage_in();
+	Bitset claimed_bindings;

 	for (uint32_t i = 0; i < kMaxArgumentBuffers; i++)
 	{
@ -6104,12 +6326,30 @@ string CompilerMSL::entry_point_args_argument_buffer(bool append_comma)
 		if (!ep_args.empty())
 			ep_args += ", ";

-		ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "& " + to_name(id);
-		ep_args += " [[buffer(" + convert_to_string(i) + ")]]";
+		// Check if the argument buffer binding itself has been remapped.
+		uint32_t buffer_binding;
+		auto itr = resource_bindings.find({ get_entry_point().model, i, kArgumentBufferBinding });
+		if (itr != end(resource_bindings))
+		{
+			buffer_binding = itr->second.first.msl_buffer;
+			itr->second.second = true;
+		}
+		else
+		{
+			// As a fallback, directly map desc set <-> binding.
+			// If that was taken, take the next buffer binding.
+			if (claimed_bindings.get(i))
+				buffer_binding = next_metal_resource_index_buffer;
+			else
+				buffer_binding = i;
+		}

-		// Makes it more practical for testing, since the push constant block can occupy the first available
-		// buffer slot if it's not bound explicitly.
-		next_metal_resource_index_buffer = i + 1;
+		claimed_bindings.set(buffer_binding);
+
+		ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "& " + to_name(id);
+		ep_args += " [[buffer(" + convert_to_string(buffer_binding) + ")]]";
+
+		next_metal_resource_index_buffer = max(next_metal_resource_index_buffer, buffer_binding + 1);
 	}

 	entry_point_args_discrete_descriptors(ep_args);
@ -6401,6 +6641,50 @@ void CompilerMSL::fix_up_shader_inputs_outputs()
 					entry_func.fixup_hooks_in.push_back([=]() { statement(tc, ".y = 1.0 - ", tc, ".y;"); });
 				}
 				break;
+			case BuiltInSubgroupLocalInvocationId:
+				// This is natively supported in compute shaders.
+				if (get_execution_model() == ExecutionModelGLCompute)
+					break;
+
+				// This is natively supported in fragment shaders in MSL 2.2.
+				if (get_execution_model() == ExecutionModelFragment && msl_options.supports_msl_version(2, 2))
+					break;
+
+				if (msl_options.is_ios())
+					SPIRV_CROSS_THROW(
+					    "SubgroupLocalInvocationId cannot be used outside of compute shaders before MSL 2.2 on iOS.");
+
+				if (!msl_options.supports_msl_version(2, 1))
+					SPIRV_CROSS_THROW(
+					    "SubgroupLocalInvocationId cannot be used outside of compute shaders before MSL 2.1.");
+
+				// Shaders other than compute shaders don't support the SIMD-group
+				// builtins directly, but we can emulate them using the SIMD-group
+				// functions. This might break if some of the subgroup terminated
+				// before reaching the entry point.
+				entry_func.fixup_hooks_in.push_back([=]() {
+					statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
+					          " = simd_prefix_exclusive_sum(1);");
+				});
+				break;
+			case BuiltInSubgroupSize:
+				// This is natively supported in compute shaders.
+				if (get_execution_model() == ExecutionModelGLCompute)
+					break;
+
+				// This is natively supported in fragment shaders in MSL 2.2.
+				if (get_execution_model() == ExecutionModelFragment && msl_options.supports_msl_version(2, 2))
+					break;
+
+				if (msl_options.is_ios())
+					SPIRV_CROSS_THROW("SubgroupSize cannot be used outside of compute shaders on iOS.");
+
+				if (!msl_options.supports_msl_version(2, 1))
+					SPIRV_CROSS_THROW("SubgroupSize cannot be used outside of compute shaders before Metal 2.1.");
+
+				entry_func.fixup_hooks_in.push_back(
+				    [=]() { statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = simd_sum(1);"); });
+				break;
 			case BuiltInSubgroupEqMask:
 				if (msl_options.is_ios())
 					SPIRV_CROSS_THROW("Subgroup ballot functionality is unavailable on iOS.");
@ -6408,7 +6692,7 @@ void CompilerMSL::fix_up_shader_inputs_outputs()
 					SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.1.");
 				entry_func.fixup_hooks_in.push_back([=]() {
 					statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ",
-					          builtin_subgroup_invocation_id_id, " > 32 ? uint4(0, (1 << (",
+					          to_expression(builtin_subgroup_invocation_id_id), " > 32 ? uint4(0, (1 << (",
 					          to_expression(builtin_subgroup_invocation_id_id), " - 32)), uint2(0)) : uint4(1 << ",
 					          to_expression(builtin_subgroup_invocation_id_id), ", uint3(0));");
 				});
@ -6489,6 +6773,44 @@ void CompilerMSL::fix_up_shader_inputs_outputs()
 					          to_expression(builtin_subgroup_invocation_id_id), " - 32, 0)), uint2(0));");
 				});
 				break;
+			case BuiltInViewIndex:
+				if (!msl_options.multiview)
+				{
+					// According to the Vulkan spec, when not running under a multiview
+					// render pass, ViewIndex is 0.
+					entry_func.fixup_hooks_in.push_back([=]() {
+						statement("const ", builtin_type_decl(bi_type), " ", to_expression(var_id), " = 0;");
+					});
+				}
+				else if (get_execution_model() == ExecutionModelFragment)
+				{
+					// Because we adjusted the view index in the vertex shader, we have to
+					// adjust it back here.
+					entry_func.fixup_hooks_in.push_back([=]() {
+						statement(to_expression(var_id), " += ", to_expression(view_mask_buffer_id), "[0];");
+					});
+				}
+				else if (get_execution_model() == ExecutionModelVertex)
+				{
+					// Metal provides no special support for multiview, so we smuggle
+					// the view index in the instance index.
+					entry_func.fixup_hooks_in.push_back([=]() {
+						statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ",
+						          to_expression(view_mask_buffer_id), "[0] + ", to_expression(builtin_instance_idx_id),
+						          " % ", to_expression(view_mask_buffer_id), "[1];");
+						statement(to_expression(builtin_instance_idx_id), " /= ", to_expression(view_mask_buffer_id),
+						          "[1];");
+					});
+					// In addition to setting the variable itself, we also need to
+					// set the render_target_array_index with it on output. We have to
+					// offset this by the base view index, because Metal isn't in on
+					// our little game here.
+					entry_func.fixup_hooks_out.push_back([=]() {
+						statement(to_expression(builtin_layer_id), " = ", to_expression(var_id), " - ",
+						          to_expression(view_mask_buffer_id), "[0];");
+					});
+				}
+				break;
 			default:
 				break;
 			}
@ -7790,6 +8112,12 @@ string CompilerMSL::builtin_qualifier(BuiltIn builtin)
 	case BuiltInSamplePosition:
 		// Shouldn't be reached.
 		SPIRV_CROSS_THROW("Sample position is retrieved by a function in MSL.");
+	case BuiltInViewIndex:
+		if (execution.model != ExecutionModelFragment)
+			SPIRV_CROSS_THROW("ViewIndex is handled specially outside fragment shaders.");
+		// The ViewIndex was implicitly used in the prior stages to set the render_target_array_index,
+		// so we can get it from there.
+		return "render_target_array_index";

 	// Fragment function out
 	case BuiltInFragDepth:
@ -7820,7 +8148,18 @@ string CompilerMSL::builtin_qualifier(BuiltIn builtin)
 		return "thread_index_in_threadgroup";

 	case BuiltInSubgroupSize:
-		return "thread_execution_width";
+		if (execution.model == ExecutionModelFragment)
+		{
+			if (!msl_options.supports_msl_version(2, 2))
+				SPIRV_CROSS_THROW("threads_per_simdgroup requires Metal 2.2 in fragment shaders.");
+			return "threads_per_simdgroup";
+		}
+		else
+		{
+			// thread_execution_width is an alias for threads_per_simdgroup, and it's only available since 1.0,
+			// but not in fragment.
+			return "thread_execution_width";
+		}

 	case BuiltInNumSubgroups:
 		if (!msl_options.supports_msl_version(2))
@ -7833,9 +8172,18 @@ string CompilerMSL::builtin_qualifier(BuiltIn builtin)
 		return msl_options.is_ios() ? "quadgroup_index_in_threadgroup" : "simdgroup_index_in_threadgroup";

 	case BuiltInSubgroupLocalInvocationId:
-		if (!msl_options.supports_msl_version(2))
-			SPIRV_CROSS_THROW("Subgroup builtins require Metal 2.0.");
-		return msl_options.is_ios() ? "thread_index_in_quadgroup" : "thread_index_in_simdgroup";
+		if (execution.model == ExecutionModelFragment)
+		{
+			if (!msl_options.supports_msl_version(2, 2))
+				SPIRV_CROSS_THROW("thread_index_in_simdgroup requires Metal 2.2 in fragment shaders.");
+			return "thread_index_in_simdgroup";
+		}
+		else
+		{
+			if (!msl_options.supports_msl_version(2))
+				SPIRV_CROSS_THROW("Subgroup builtins require Metal 2.0.");
+			return msl_options.is_ios() ? "thread_index_in_quadgroup" : "thread_index_in_simdgroup";
+		}

 	case BuiltInSubgroupEqMask:
 	case BuiltInSubgroupGeMask:
@ -7937,6 +8285,8 @@ string CompilerMSL::builtin_type_decl(BuiltIn builtin, uint32_t id)
 		return "uint";
 	case BuiltInSamplePosition:
 		return "float2";
+	case BuiltInViewIndex:
+		return "uint";

 	// Fragment function out
 	case BuiltInFragDepth:
@ -7992,7 +8342,7 @@ string CompilerMSL::built_in_func_arg(BuiltIn builtin, bool prefix_comma)
 }

 // Returns the byte size of a struct member.
-size_t CompilerMSL::get_declared_struct_member_size(const SPIRType &struct_type, uint32_t index) const
+size_t CompilerMSL::get_declared_struct_member_size_msl(const SPIRType &struct_type, uint32_t index) const
 {
 	auto &type = get<SPIRType>(struct_type.member_types[index]);

@ -8358,7 +8708,7 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o
 		uint32_t extension_set = args[2];
 		if (compiler.get<SPIRExtension>(extension_set).ext == SPIRExtension::GLSL)
 		{
-			GLSLstd450 op_450 = static_cast<GLSLstd450>(args[3]);
+			auto op_450 = static_cast<GLSLstd450>(args[3]);
 			switch (op_450)
 			{
 			case GLSLstd450Radians:
@ -8373,6 +8723,22 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o
 				return SPVFuncImplFindUMsb;
 			case GLSLstd450SSign:
 				return SPVFuncImplSSign;
+			case GLSLstd450Reflect:
+			{
+				auto &type = compiler.get<SPIRType>(args[0]);
+				if (type.vecsize == 1)
+					return SPVFuncImplReflectScalar;
+				else
+					return SPVFuncImplNone;
+			}
+			case GLSLstd450Refract:
+			{
+				auto &type = compiler.get<SPIRType>(args[0]);
+				if (type.vecsize == 1)
+					return SPVFuncImplRefractScalar;
+				else
+					return SPVFuncImplNone;
+			}
 			case GLSLstd450MatrixInverse:
 			{
 				auto &mat_type = compiler.get<SPIRType>(args[0]);
@ -8519,6 +8885,9 @@ void CompilerMSL::bitcast_from_builtin_load(uint32_t source_id, std::string &exp
 	case BuiltInViewportIndex:
 	case BuiltInFragStencilRefEXT:
 	case BuiltInPrimitiveId:
+	case BuiltInSubgroupSize:
+	case BuiltInSubgroupLocalInvocationId:
+	case BuiltInViewIndex:
 		expected_type = SPIRType::UInt;
 		break;

@ -8561,6 +8930,7 @@ void CompilerMSL::bitcast_to_builtin_store(uint32_t target_id, std::string &expr
 	case BuiltInViewportIndex:
 	case BuiltInFragStencilRefEXT:
 	case BuiltInPrimitiveId:
+	case BuiltInViewIndex:
 		expected_type = SPIRType::UInt;
 		break;

--- a/deps/SPIRV-Cross/spirv_msl.hpp
+++ b/deps/SPIRV-Cross/spirv_msl.hpp
@ -161,6 +161,12 @@ static const uint32_t kSwizzleBufferBinding = ~(1u);
 // element to indicate the buffer binding for buffer size buffers to support OpArrayLength.
 static const uint32_t kBufferSizeBufferBinding = ~(2u);

+// Special constant used in a MSLResourceBinding binding
+// element to indicate the buffer binding used for the argument buffer itself.
+// This buffer binding should be kept as small as possible as all automatic bindings for buffers
+// will start at max(kArgumentBufferBinding) + 1.
+static const uint32_t kArgumentBufferBinding = ~(3u);
+
 static const uint32_t kMaxArgumentBuffers = 8;

 // Decompiles SPIR-V to Metal Shading Language
@ -185,12 +191,14 @@ public:
 		uint32_t shader_patch_output_buffer_index = 27;
 		uint32_t shader_tess_factor_buffer_index = 26;
 		uint32_t buffer_size_buffer_index = 25;
+		uint32_t view_mask_buffer_index = 24;
 		uint32_t shader_input_wg_index = 0;
 		bool enable_point_size_builtin = true;
 		bool disable_rasterization = false;
 		bool capture_output_to_buffer = false;
 		bool swizzle_texture_samples = false;
 		bool tess_domain_origin_lower_left = false;
+		bool multiview = false;

 		// Enable use of MSL 2.0 indirect argument buffers.
 		// MSL 2.0 must also be enabled.
@ -262,6 +270,13 @@ public:
 		return !buffers_requiring_array_length.empty();
 	}

+	// Provide feedback to calling API to allow it to pass a buffer
+	// containing the view mask for the current multiview subpass.
+	bool needs_view_mask_buffer() const
+	{
+		return msl_options.multiview;
+	}
+
 	// Provide feedback to calling API to allow it to pass an output
 	// buffer if the shader needs it.
 	bool needs_output_buffer() const
@ -386,6 +401,8 @@ protected:
 		SPVFuncImplSubgroupBallotFindMSB,
 		SPVFuncImplSubgroupBallotBitCount,
 		SPVFuncImplSubgroupAllEqual,
+		SPVFuncImplReflectScalar,
+		SPVFuncImplRefractScalar,
 		SPVFuncImplArrayCopyMultidimMax = 6
 	};

@ -406,7 +423,7 @@ protected:
 	std::string image_type_glsl(const SPIRType &type, uint32_t id = 0) override;
 	std::string sampler_type(const SPIRType &type);
 	std::string builtin_to_glsl(spv::BuiltIn builtin, spv::StorageClass storage) override;
-	size_t get_declared_struct_member_size(const SPIRType &struct_type, uint32_t index) const override;
+	size_t get_declared_struct_member_size_msl(const SPIRType &struct_type, uint32_t index) const;
 	std::string to_func_call_arg(uint32_t id) override;
 	std::string to_name(uint32_t id, bool allow_alias = true) const override;
 	std::string to_function_name(uint32_t img, const SPIRType &imgtype, bool is_fetch, bool is_gather, bool is_proj,
@ -520,12 +537,15 @@ protected:
 	uint32_t builtin_base_vertex_id = 0;
 	uint32_t builtin_instance_idx_id = 0;
 	uint32_t builtin_base_instance_id = 0;
+	uint32_t builtin_view_idx_id = 0;
+	uint32_t builtin_layer_id = 0;
 	uint32_t builtin_invocation_id_id = 0;
 	uint32_t builtin_primitive_id_id = 0;
 	uint32_t builtin_subgroup_invocation_id_id = 0;
 	uint32_t builtin_subgroup_size_id = 0;
 	uint32_t swizzle_buffer_id = 0;
 	uint32_t buffer_size_buffer_id = 0;
+	uint32_t view_mask_buffer_id = 0;

 	void bitcast_to_builtin_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type) override;
 	void bitcast_from_builtin_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type) override;
@ -538,6 +558,8 @@ protected:

 	void mark_implicit_builtin(spv::StorageClass storage, spv::BuiltIn builtin, uint32_t id);

+	std::string convert_to_f32(const std::string &expr, uint32_t components);
+
 	Options msl_options;
 	std::set<SPVFuncImpl> spv_function_implementations;
 	std::unordered_map<uint32_t, MSLVertexAttr> vtx_attrs_by_location;
@ -571,10 +593,12 @@ protected:
 	};

 	std::unordered_map<StageSetBinding, std::pair<MSLResourceBinding, bool>, InternalHasher> resource_bindings;
+
 	uint32_t next_metal_resource_index_buffer = 0;
 	uint32_t next_metal_resource_index_texture = 0;
 	uint32_t next_metal_resource_index_sampler = 0;
-	uint32_t next_metal_resource_ids[kMaxArgumentBuffers] = {};
+	// Intentionally uninitialized, works around MSVC 2013 bug.
+	uint32_t next_metal_resource_ids[kMaxArgumentBuffers];

 	uint32_t stage_in_var_id = 0;
 	uint32_t stage_out_var_id = 0;
--- a/deps/SPIRV-Cross/spirv_parser.cpp
+++ b/deps/SPIRV-Cross/spirv_parser.cpp
@ -162,7 +162,6 @@ void Parser::parse(const Instruction &instruction)
 	case OpSourceContinued:
 	case OpSourceExtension:
 	case OpNop:
-	case OpNoLine:
 	case OpModuleProcessed:
 		break;

@ -244,6 +243,8 @@ void Parser::parse(const Instruction &instruction)
 		auto ext = extract_string(ir.spirv, instruction.offset + 1);
 		if (ext == "GLSL.std.450")
 			set<SPIRExtension>(id, SPIRExtension::GLSL);
+		else if (ext == "DebugInfo")
+			set<SPIRExtension>(id, SPIRExtension::SPV_debug_info);
 		else if (ext == "SPV_AMD_shader_ballot")
 			set<SPIRExtension>(id, SPIRExtension::SPV_AMD_shader_ballot);
 		else if (ext == "SPV_AMD_shader_explicit_vertex_parameter")
@ -260,6 +261,14 @@ void Parser::parse(const Instruction &instruction)
 		break;
 	}

+	case OpExtInst:
+	{
+		// The SPIR-V debug information extended instructions might come at global scope.
+		if (current_block)
+			current_block->ops.push_back(instruction);
+		break;
+	}
+
 	case OpEntryPoint:
 	{
 		auto itr =
@ -1057,6 +1066,14 @@ void Parser::parse(const Instruction &instruction)
 		break;
 	}

+	case OpNoLine:
+	{
+		// OpNoLine might come at global scope.
+		if (current_block)
+			current_block->ops.push_back(instruction);
+		break;
+	}
+
 	// Actual opcodes.
 	default:
 	{
--- a/deps/SPIRV-Cross/test_shaders.py
+++ b/deps/SPIRV-Cross/test_shaders.py
@ -203,6 +203,8 @@ def cross_compile_msl(shader, spirv, opt, iterations, paths):
        msl_args.append('3')
    if '.line.' in shader:
        msl_args.append('--emit-line-directives')
+    if '.multiview.' in shader:
+        msl_args.append('--msl-multiview')

    subprocess.check_call(msl_args)

--- a/reference/opt/shaders-hlsl/asm/frag/texture-sampling-fp16.asm.frag
+++ b/reference/opt/shaders-hlsl/asm/frag/texture-sampling-fp16.asm.frag
@ -0,0 +1,29 @@
+Texture2D<float4> uTexture : register(t0);
+SamplerState _uTexture_sampler : register(s0);
+
+static min16float4 FragColor;
+static min16float2 UV;
+
+struct SPIRV_Cross_Input
+{
+    min16float2 UV : TEXCOORD0;
+};
+
+struct SPIRV_Cross_Output
+{
+    min16float4 FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    FragColor = min16float4(uTexture.Sample(_uTexture_sampler, UV));
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    UV = stage_input.UV;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
--- a/reference/opt/shaders-hlsl/comp/outer-product.comp
+++ b/reference/opt/shaders-hlsl/comp/outer-product.comp
@ -0,0 +1,48 @@
+RWByteAddressBuffer _21 : register(u0);
+ByteAddressBuffer _26 : register(t1);
+
+void comp_main()
+{
+    float2x2 _32 = float2x2(asfloat(_26.Load2(0)) * asfloat(_26.Load2(0)).x, asfloat(_26.Load2(0)) * asfloat(_26.Load2(0)).y);
+    _21.Store2(0, asuint(_32[0]));
+    _21.Store2(8, asuint(_32[1]));
+    float2x3 _41 = float2x3(asfloat(_26.Load3(16)) * asfloat(_26.Load2(0)).x, asfloat(_26.Load3(16)) * asfloat(_26.Load2(0)).y);
+    _21.Store3(16, asuint(_41[0]));
+    _21.Store3(32, asuint(_41[1]));
+    float2x4 _50 = float2x4(asfloat(_26.Load4(32)) * asfloat(_26.Load2(0)).x, asfloat(_26.Load4(32)) * asfloat(_26.Load2(0)).y);
+    _21.Store4(48, asuint(_50[0]));
+    _21.Store4(64, asuint(_50[1]));
+    float3x2 _58 = float3x2(asfloat(_26.Load2(0)) * asfloat(_26.Load3(16)).x, asfloat(_26.Load2(0)) * asfloat(_26.Load3(16)).y, asfloat(_26.Load2(0)) * asfloat(_26.Load3(16)).z);
+    _21.Store2(80, asuint(_58[0]));
+    _21.Store2(88, asuint(_58[1]));
+    _21.Store2(96, asuint(_58[2]));
+    float3x3 _66 = float3x3(asfloat(_26.Load3(16)) * asfloat(_26.Load3(16)).x, asfloat(_26.Load3(16)) * asfloat(_26.Load3(16)).y, asfloat(_26.Load3(16)) * asfloat(_26.Load3(16)).z);
+    _21.Store3(112, asuint(_66[0]));
+    _21.Store3(128, asuint(_66[1]));
+    _21.Store3(144, asuint(_66[2]));
+    float3x4 _74 = float3x4(asfloat(_26.Load4(32)) * asfloat(_26.Load3(16)).x, asfloat(_26.Load4(32)) * asfloat(_26.Load3(16)).y, asfloat(_26.Load4(32)) * asfloat(_26.Load3(16)).z);
+    _21.Store4(160, asuint(_74[0]));
+    _21.Store4(176, asuint(_74[1]));
+    _21.Store4(192, asuint(_74[2]));
+    float4x2 _82 = float4x2(asfloat(_26.Load2(0)) * asfloat(_26.Load4(32)).x, asfloat(_26.Load2(0)) * asfloat(_26.Load4(32)).y, asfloat(_26.Load2(0)) * asfloat(_26.Load4(32)).z, asfloat(_26.Load2(0)) * asfloat(_26.Load4(32)).w);
+    _21.Store2(208, asuint(_82[0]));
+    _21.Store2(216, asuint(_82[1]));
+    _21.Store2(224, asuint(_82[2]));
+    _21.Store2(232, asuint(_82[3]));
+    float4x3 _90 = float4x3(asfloat(_26.Load3(16)) * asfloat(_26.Load4(32)).x, asfloat(_26.Load3(16)) * asfloat(_26.Load4(32)).y, asfloat(_26.Load3(16)) * asfloat(_26.Load4(32)).z, asfloat(_26.Load3(16)) * asfloat(_26.Load4(32)).w);
+    _21.Store3(240, asuint(_90[0]));
+    _21.Store3(256, asuint(_90[1]));
+    _21.Store3(272, asuint(_90[2]));
+    _21.Store3(288, asuint(_90[3]));
+    float4x4 _98 = float4x4(asfloat(_26.Load4(32)) * asfloat(_26.Load4(32)).x, asfloat(_26.Load4(32)) * asfloat(_26.Load4(32)).y, asfloat(_26.Load4(32)) * asfloat(_26.Load4(32)).z, asfloat(_26.Load4(32)) * asfloat(_26.Load4(32)).w);
+    _21.Store4(304, asuint(_98[0]));
+    _21.Store4(320, asuint(_98[1]));
+    _21.Store4(336, asuint(_98[2]));
+    _21.Store4(352, asuint(_98[3]));
+}
+
+[numthreads(1, 1, 1)]
+void main()
+{
+    comp_main();
+}
--- a/reference/opt/shaders-hlsl/comp/scalar-std450-distance-length-normalize.comp
+++ b/reference/opt/shaders-hlsl/comp/scalar-std450-distance-length-normalize.comp
@ -0,0 +1,14 @@
+RWByteAddressBuffer _9 : register(u0);
+
+void comp_main()
+{
+    _9.Store(8, asuint(distance(asfloat(_9.Load(0)), asfloat(_9.Load(4)))));
+    _9.Store(12, asuint(length(asfloat(_9.Load(0)))));
+    _9.Store(16, asuint(sign(asfloat(_9.Load(0)))));
+}
+
+[numthreads(1, 1, 1)]
+void main()
+{
+    comp_main();
+}
--- a/reference/opt/shaders-hlsl/frag/for-loop-continue-control-flow.frag
+++ b/reference/opt/shaders-hlsl/frag/for-loop-continue-control-flow.frag
@ -0,0 +1,25 @@
+static float4 FragColor;
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    FragColor = 0.0f.xxxx;
+    for (int _43 = 0; _43 < 3; )
+    {
+        FragColor[_43] += float(_43);
+        _43++;
+        continue;
+    }
+}
+
+SPIRV_Cross_Output main()
+{
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
--- a/reference/opt/shaders-hlsl/frag/scalar-refract-reflect.frag
+++ b/reference/opt/shaders-hlsl/frag/scalar-refract-reflect.frag
@ -0,0 +1,49 @@
+static float FragColor;
+static float3 vRefract;
+
+struct SPIRV_Cross_Input
+{
+    float3 vRefract : TEXCOORD0;
+};
+
+struct SPIRV_Cross_Output
+{
+    float FragColor : SV_Target0;
+};
+
+float SPIRV_Cross_Reflect(float i, float n)
+{
+    return i - 2.0 * dot(n, i) * n;
+}
+
+float SPIRV_Cross_Refract(float i, float n, float eta)
+{
+    float NoI = n * i;
+    float NoI2 = NoI * NoI;
+    float k = 1.0 - eta * eta * (1.0 - NoI2);
+    if (k < 0.0)
+    {
+        return 0.0;
+    }
+    else
+    {
+        return eta * i - (eta * NoI + sqrt(k)) * n;
+    }
+}
+
+void frag_main()
+{
+    FragColor = SPIRV_Cross_Refract(vRefract.x, vRefract.y, vRefract.z);
+    FragColor += SPIRV_Cross_Reflect(vRefract.x, vRefract.y);
+    FragColor += refract(vRefract.xy, vRefract.yz, vRefract.z).y;
+    FragColor += reflect(vRefract.xy, vRefract.zy).y;
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    vRefract = stage_input.vRefract;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
--- a/reference/opt/shaders-msl/asm/frag/texture-sampling-fp16.asm.frag
+++ b/reference/opt/shaders-msl/asm/frag/texture-sampling-fp16.asm.frag
@ -0,0 +1,22 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    half4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    half2 UV [[user(locn0)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], texture2d<float> uTexture [[texture(0)]], sampler uTextureSmplr [[sampler(0)]])
+{
+    main0_out out = {};
+    out.FragColor = half4(uTexture.sample(uTextureSmplr, float2(in.UV)));
+    return out;
+}
+
--- a/reference/opt/shaders-msl/comp/outer-product.comp
+++ b/reference/opt/shaders-msl/comp/outer-product.comp
@ -0,0 +1,38 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBO
+{
+    float2x2 m22;
+    float2x3 m23;
+    float2x4 m24;
+    float3x2 m32;
+    float3x3 m33;
+    float3x4 m34;
+    float4x2 m42;
+    float4x3 m43;
+    float4x4 m44;
+};
+
+struct ReadSSBO
+{
+    float2 v2;
+    float3 v3;
+    float4 v4;
+};
+
+kernel void main0(device SSBO& _21 [[buffer(0)]], const device ReadSSBO& _26 [[buffer(1)]])
+{
+    _21.m22 = float2x2(_26.v2 * _26.v2.x, _26.v2 * _26.v2.y);
+    _21.m23 = float2x3(_26.v3 * _26.v2.x, _26.v3 * _26.v2.y);
+    _21.m24 = float2x4(_26.v4 * _26.v2.x, _26.v4 * _26.v2.y);
+    _21.m32 = float3x2(_26.v2 * _26.v3.x, _26.v2 * _26.v3.y, _26.v2 * _26.v3.z);
+    _21.m33 = float3x3(_26.v3 * _26.v3.x, _26.v3 * _26.v3.y, _26.v3 * _26.v3.z);
+    _21.m34 = float3x4(_26.v4 * _26.v3.x, _26.v4 * _26.v3.y, _26.v4 * _26.v3.z);
+    _21.m42 = float4x2(_26.v2 * _26.v4.x, _26.v2 * _26.v4.y, _26.v2 * _26.v4.z, _26.v2 * _26.v4.w);
+    _21.m43 = float4x3(_26.v3 * _26.v4.x, _26.v3 * _26.v4.y, _26.v3 * _26.v4.z, _26.v3 * _26.v4.w);
+    _21.m44 = float4x4(_26.v4 * _26.v4.x, _26.v4 * _26.v4.y, _26.v4 * _26.v4.z, _26.v4 * _26.v4.w);
+}
+
--- a/reference/opt/shaders-msl/comp/scalar-std450-distance-length-normalize.comp
+++ b/reference/opt/shaders-msl/comp/scalar-std450-distance-length-normalize.comp
@ -0,0 +1,21 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBO
+{
+    float a;
+    float b;
+    float c;
+    float d;
+    float e;
+};
+
+kernel void main0(device SSBO& _9 [[buffer(0)]])
+{
+    _9.c = abs(_9.a - _9.b);
+    _9.d = abs(_9.a);
+    _9.e = sign(_9.a);
+}
+
--- a/reference/opt/shaders-msl/frag/for-loop-continue-control-flow.frag
+++ b/reference/opt/shaders-msl/frag/for-loop-continue-control-flow.frag
@ -0,0 +1,23 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+fragment main0_out main0()
+{
+    main0_out out = {};
+    out.FragColor = float4(0.0);
+    for (int _43 = 0; _43 < 3; )
+    {
+        out.FragColor[_43] += float(_43);
+        _43++;
+        continue;
+    }
+    return out;
+}
+
--- a/reference/opt/shaders-msl/frag/scalar-refract-reflect.frag
+++ b/reference/opt/shaders-msl/frag/scalar-refract-reflect.frag
@ -0,0 +1,49 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float3 vRefract [[user(locn0)]];
+};
+
+template<typename T>
+inline T spvReflect(T i, T n)
+{
+    return i - T(2) * i * n * n;
+}
+
+template<typename T>
+inline T spvRefract(T i, T n, T eta)
+{
+    T NoI = n * i;
+    T NoI2 = NoI * NoI;
+    T k = T(1) - eta * eta * (T(1) - NoI2);
+    if (k < T(0))
+    {
+        return T(0);
+    }
+    else
+    {
+        return eta * i - (eta * NoI + sqrt(k)) * n;
+    }
+}
+
+fragment main0_out main0(main0_in in [[stage_in]])
+{
+    main0_out out = {};
+    out.FragColor = spvRefract(in.vRefract.x, in.vRefract.y, in.vRefract.z);
+    out.FragColor += spvReflect(in.vRefract.x, in.vRefract.y);
+    out.FragColor += refract(in.vRefract.xy, in.vRefract.yz, in.vRefract.z).y;
+    out.FragColor += reflect(in.vRefract.xy, in.vRefract.zy).y;
+    return out;
+}
+
--- a/reference/opt/shaders-msl/frag/subgroup-builtins.msl22.frag
+++ b/reference/opt/shaders-msl/frag/subgroup-builtins.msl22.frag
@ -0,0 +1,18 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    uint2 FragColor [[color(0)]];
+};
+
+fragment main0_out main0(uint gl_SubgroupSize [[threads_per_simdgroup]], uint gl_SubgroupInvocationID [[thread_index_in_simdgroup]])
+{
+    main0_out out = {};
+    out.FragColor.x = gl_SubgroupSize;
+    out.FragColor.y = gl_SubgroupInvocationID;
+    return out;
+}
+
--- a/reference/opt/shaders-msl/vulkan/frag/basic.multiview.nocompat.vk.frag
+++ b/reference/opt/shaders-msl/vulkan/frag/basic.multiview.nocompat.vk.frag
@ -0,0 +1,32 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float4 vColor [[user(locn0)]];
+    float2 vTex_0 [[user(locn1)]];
+    float2 vTex_1 [[user(locn2)]];
+    float2 vTex_2 [[user(locn3)]];
+    float2 vTex_3 [[user(locn4)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], constant uint* spvViewMask [[buffer(24)]], texture2d<float> uTex [[texture(0)]], sampler uTexSmplr [[sampler(0)]], uint gl_ViewIndex [[render_target_array_index]])
+{
+    main0_out out = {};
+    float2 vTex[4] = {};
+    vTex[0] = in.vTex_0;
+    vTex[1] = in.vTex_1;
+    vTex[2] = in.vTex_2;
+    vTex[3] = in.vTex_3;
+    gl_ViewIndex += spvViewMask[0];
+    out.FragColor = in.vColor * uTex.sample(uTexSmplr, vTex[int(gl_ViewIndex)]);
+    return out;
+}
+
--- a/reference/opt/shaders-msl/vulkan/frag/subgroups.nocompat.invalid.vk.msl21.frag
+++ b/reference/opt/shaders-msl/vulkan/frag/subgroups.nocompat.invalid.vk.msl21.frag
@ -0,0 +1,89 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float FragColor [[color(0)]];
+};
+
+inline uint4 spvSubgroupBallot(bool value)
+{
+    simd_vote vote = simd_ballot(value);
+    // simd_ballot() returns a 64-bit integer-like object, but
+    // SPIR-V callers expect a uint4. We must convert.
+    // FIXME: This won't include higher bits if Apple ever supports
+    // 128 lanes in an SIMD-group.
+    return uint4((uint)((simd_vote::vote_t)vote & 0xFFFFFFFF), (uint)(((simd_vote::vote_t)vote >> 32) & 0xFFFFFFFF), 0, 0);
+}
+
+inline bool spvSubgroupBallotBitExtract(uint4 ballot, uint bit)
+{
+    return !!extract_bits(ballot[bit / 32], bit % 32, 1);
+}
+
+inline uint spvSubgroupBallotFindLSB(uint4 ballot)
+{
+    return select(ctz(ballot.x), select(32 + ctz(ballot.y), select(64 + ctz(ballot.z), select(96 + ctz(ballot.w), uint(-1), ballot.w == 0), ballot.z == 0), ballot.y == 0), ballot.x == 0);
+}
+
+inline uint spvSubgroupBallotFindMSB(uint4 ballot)
+{
+    return select(128 - (clz(ballot.w) + 1), select(96 - (clz(ballot.z) + 1), select(64 - (clz(ballot.y) + 1), select(32 - (clz(ballot.x) + 1), uint(-1), ballot.x == 0), ballot.y == 0), ballot.z == 0), ballot.w == 0);
+}
+
+inline uint spvSubgroupBallotBitCount(uint4 ballot)
+{
+    return popcount(ballot.x) + popcount(ballot.y) + popcount(ballot.z) + popcount(ballot.w);
+}
+
+inline uint spvSubgroupBallotInclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)
+{
+    uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0));
+    return spvSubgroupBallotBitCount(ballot & mask);
+}
+
+inline uint spvSubgroupBallotExclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)
+{
+    uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0));
+    return spvSubgroupBallotBitCount(ballot & mask);
+}
+
+template<typename T>
+inline bool spvSubgroupAllEqual(T value)
+{
+    return simd_all(value == simd_broadcast_first(value));
+}
+
+template<>
+inline bool spvSubgroupAllEqual(bool value)
+{
+    return simd_all(value) || !simd_any(value);
+}
+
+fragment main0_out main0()
+{
+    main0_out out = {};
+    uint gl_SubgroupSize = simd_sum(1);
+    uint gl_SubgroupInvocationID = simd_prefix_exclusive_sum(1);
+    uint4 gl_SubgroupEqMask = gl_SubgroupInvocationID > 32 ? uint4(0, (1 << (gl_SubgroupInvocationID - 32)), uint2(0)) : uint4(1 << gl_SubgroupInvocationID, uint3(0));
+    uint4 gl_SubgroupGeMask = uint4(extract_bits(0xFFFFFFFF, min(gl_SubgroupInvocationID, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID, 0)), extract_bits(0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID, 32u), 0)), uint2(0));
+    uint4 gl_SubgroupGtMask = uint4(extract_bits(0xFFFFFFFF, min(gl_SubgroupInvocationID + 1, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID - 1, 0)), extract_bits(0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID + 1, 32u), 0)), uint2(0));
+    uint4 gl_SubgroupLeMask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0));
+    uint4 gl_SubgroupLtMask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0));
+    out.FragColor = float(gl_SubgroupSize);
+    out.FragColor = float(gl_SubgroupInvocationID);
+    out.FragColor = float4(gl_SubgroupEqMask).x;
+    out.FragColor = float4(gl_SubgroupGeMask).x;
+    out.FragColor = float4(gl_SubgroupGtMask).x;
+    out.FragColor = float4(gl_SubgroupLeMask).x;
+    out.FragColor = float4(gl_SubgroupLtMask).x;
+    uint4 _63 = spvSubgroupBallot(true);
+    float4 _147 = simd_prefix_inclusive_product(simd_product(float4(20.0)));
+    int4 _149 = simd_prefix_inclusive_product(simd_product(int4(20)));
+    return out;
+}
+
--- a/reference/opt/shaders-msl/vulkan/vert/multiview.multiview.nocompat.vk.vert
+++ b/reference/opt/shaders-msl/vulkan/vert/multiview.multiview.nocompat.vk.vert
@ -0,0 +1,31 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct MVPs
+{
+    float4x4 MVP[2];
+};
+
+struct main0_out
+{
+    float4 gl_Position [[position]];
+    uint gl_Layer [[render_target_array_index]];
+};
+
+struct main0_in
+{
+    float4 Position [[attribute(0)]];
+};
+
+vertex main0_out main0(main0_in in [[stage_in]], constant uint* spvViewMask [[buffer(24)]], constant MVPs& _19 [[buffer(0)]], uint gl_InstanceIndex [[instance_id]])
+{
+    main0_out out = {};
+    uint gl_ViewIndex = spvViewMask[0] + gl_InstanceIndex % spvViewMask[1];
+    gl_InstanceIndex /= spvViewMask[1];
+    out.gl_Position = _19.MVP[int(gl_ViewIndex)] * in.Position;
+    out.gl_Layer = gl_ViewIndex - spvViewMask[0];
+    return out;
+}
+
--- a/reference/opt/shaders-msl/vulkan/vert/multiview.nocompat.vk.vert
+++ b/reference/opt/shaders-msl/vulkan/vert/multiview.nocompat.vk.vert
@ -0,0 +1,29 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct MVPs
+{
+    float4x4 MVP[2];
+};
+
+struct main0_out
+{
+    float4 gl_Position [[position]];
+    uint gl_Layer [[render_target_array_index]];
+};
+
+struct main0_in
+{
+    float4 Position [[attribute(0)]];
+};
+
+vertex main0_out main0(main0_in in [[stage_in]], constant MVPs& _19 [[buffer(0)]], uint gl_InstanceIndex [[instance_id]])
+{
+    main0_out out = {};
+    const uint gl_ViewIndex = 0;
+    out.gl_Position = _19.MVP[int(gl_ViewIndex)] * in.Position;
+    return out;
+}
+
--- a/reference/opt/shaders/asm/comp/phi-temporary-copy-loop-variable.asm.comp
+++ b/reference/opt/shaders/asm/comp/phi-temporary-copy-loop-variable.asm.comp
@ -0,0 +1,25 @@
+#version 450
+layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
+
+layout(binding = 1, rgba32f) uniform writeonly image2D outImageTexture;
+
+void main()
+{
+    int _30;
+    _30 = 7;
+    int _27_copy;
+    for (int _27 = 7; _27 >= 0; _27_copy = _27, _27--, _30 = _27_copy)
+    {
+        if (5.0 > float(_27))
+        {
+            break;
+        }
+        else
+        {
+            continue;
+        }
+        continue;
+    }
+    imageStore(outImageTexture, ivec2(gl_GlobalInvocationID.xy), vec4(float(_30 - 1), float(_30), 1.0, 1.0));
+}
+
--- a/reference/opt/shaders/asm/extended-debug-extinst.invalid.asm.comp
+++ b/reference/opt/shaders/asm/extended-debug-extinst.invalid.asm.comp
@ -0,0 +1,18 @@
+#version 430
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 0, std430) buffer _8_9
+{
+    float _m0[];
+} _9;
+
+layout(binding = 1, std430) buffer _8_10
+{
+    float _m0[];
+} _10;
+
+void main()
+{
+    _10._m0[gl_GlobalInvocationID.x] = -_9._m0[gl_GlobalInvocationID.x];
+}
+
--- a/reference/opt/shaders/asm/frag/texture-sampling-fp16.asm.vk.frag
+++ b/reference/opt/shaders/asm/frag/texture-sampling-fp16.asm.vk.frag
@ -0,0 +1,19 @@
+#version 450
+#if defined(GL_AMD_gpu_shader_half_float)
+#extension GL_AMD_gpu_shader_half_float : require
+#elif defined(GL_NV_gpu_shader5)
+#extension GL_NV_gpu_shader5 : require
+#else
+#error No extension available for FP16.
+#endif
+
+layout(binding = 0) uniform sampler2D uTexture;
+
+layout(location = 0) out f16vec4 FragColor;
+layout(location = 0) in f16vec2 UV;
+
+void main()
+{
+    FragColor = f16vec4(texture(uTexture, UV));
+}
+
--- a/reference/opt/shaders/asm/frag/texture-sampling-fp16.asm.vk.frag.vk
+++ b/reference/opt/shaders/asm/frag/texture-sampling-fp16.asm.vk.frag.vk
@ -0,0 +1,20 @@
+#version 450
+#if defined(GL_AMD_gpu_shader_half_float)
+#extension GL_AMD_gpu_shader_half_float : require
+#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
+#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
+#else
+#error No extension available for FP16.
+#endif
+#extension GL_EXT_shader_16bit_storage : require
+
+layout(set = 0, binding = 0) uniform sampler2D uTexture;
+
+layout(location = 0) out f16vec4 FragColor;
+layout(location = 0) in f16vec2 UV;
+
+void main()
+{
+    FragColor = f16vec4(texture(uTexture, UV));
+}
+
--- a/reference/opt/shaders/comp/outer-product.comp
+++ b/reference/opt/shaders/comp/outer-product.comp
@ -0,0 +1,36 @@
+#version 450
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 0, std430) writeonly buffer SSBO
+{
+    mat2 m22;
+    mat2x3 m23;
+    mat2x4 m24;
+    mat3x2 m32;
+    mat3 m33;
+    mat3x4 m34;
+    mat4x2 m42;
+    mat4x3 m43;
+    mat4 m44;
+} _21;
+
+layout(binding = 1, std430) readonly buffer ReadSSBO
+{
+    vec2 v2;
+    vec3 v3;
+    vec4 v4;
+} _26;
+
+void main()
+{
+    _21.m22 = outerProduct(_26.v2, _26.v2);
+    _21.m23 = outerProduct(_26.v3, _26.v2);
+    _21.m24 = outerProduct(_26.v4, _26.v2);
+    _21.m32 = outerProduct(_26.v2, _26.v3);
+    _21.m33 = outerProduct(_26.v3, _26.v3);
+    _21.m34 = outerProduct(_26.v4, _26.v3);
+    _21.m42 = outerProduct(_26.v2, _26.v4);
+    _21.m43 = outerProduct(_26.v3, _26.v4);
+    _21.m44 = outerProduct(_26.v4, _26.v4);
+}
+
--- a/reference/opt/shaders/comp/scalar-std450-distance-length-normalize.comp
+++ b/reference/opt/shaders/comp/scalar-std450-distance-length-normalize.comp
@ -0,0 +1,19 @@
+#version 450
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 0, std430) buffer SSBO
+{
+    float a;
+    float b;
+    float c;
+    float d;
+    float e;
+} _9;
+
+void main()
+{
+    _9.c = distance(_9.a, _9.b);
+    _9.d = length(_9.a);
+    _9.e = normalize(_9.a);
+}
+
--- a/reference/opt/shaders/frag/for-loop-continue-control-flow.frag
+++ b/reference/opt/shaders/frag/for-loop-continue-control-flow.frag
@ -0,0 +1,15 @@
+#version 450
+
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+    FragColor = vec4(0.0);
+    for (int _43 = 0; _43 < 3; )
+    {
+        FragColor[_43] += float(_43);
+        _43++;
+        continue;
+    }
+}
+
--- a/reference/opt/shaders/frag/scalar-refract-reflect.frag
+++ b/reference/opt/shaders/frag/scalar-refract-reflect.frag
@ -0,0 +1,13 @@
+#version 450
+
+layout(location = 0) out float FragColor;
+layout(location = 0) in vec3 vRefract;
+
+void main()
+{
+    FragColor = refract(vRefract.x, vRefract.y, vRefract.z);
+    FragColor += reflect(vRefract.x, vRefract.y);
+    FragColor += refract(vRefract.xy, vRefract.yz, vRefract.z).y;
+    FragColor += reflect(vRefract.xy, vRefract.zy).y;
+}
+
--- a/reference/opt/shaders/frag/selection-block-dominator.frag
+++ b/reference/opt/shaders/frag/selection-block-dominator.frag
@ -0,0 +1,19 @@
+#version 450
+
+layout(location = 0) flat in int vIndex;
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+    for (;;)
+    {
+        if (vIndex != 1)
+        {
+            FragColor = vec4(1.0);
+            break;
+        }
+        FragColor = vec4(10.0);
+        break;
+    }
+}
+
--- a/reference/shaders-hlsl-no-opt/asm/frag/nonuniform-qualifier-propagation.nonuniformresource.sm51.asm.frag
+++ b/reference/shaders-hlsl-no-opt/asm/frag/nonuniform-qualifier-propagation.nonuniformresource.sm51.asm.frag
@ -0,0 +1,52 @@
+struct UBO_1_1
+{
+    float4 v[64];
+};
+
+ConstantBuffer<UBO_1_1> ubos[] : register(b0, space2);
+ByteAddressBuffer ssbos[] : register(t0, space3);
+Texture2D<float4> uSamplers[] : register(t0, space0);
+SamplerState uSamps[] : register(s0, space1);
+Texture2D<float4> uCombinedSamplers[] : register(t4, space0);
+SamplerState _uCombinedSamplers_sampler[] : register(s4, space0);
+
+static int vIndex;
+static float4 FragColor;
+static float2 vUV;
+
+struct SPIRV_Cross_Input
+{
+    nointerpolation int vIndex : TEXCOORD0;
+    float2 vUV : TEXCOORD1;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    int i = vIndex;
+    int _59 = i + 10;
+    int _64 = i + 40;
+    FragColor = uSamplers[NonUniformResourceIndex(_59)].Sample(uSamps[NonUniformResourceIndex(_64)], vUV);
+    int _71 = i + 10;
+    FragColor = uCombinedSamplers[NonUniformResourceIndex(_71)].Sample(_uCombinedSamplers_sampler[NonUniformResourceIndex(_71)], vUV);
+    int _77 = i + 20;
+    int _80 = i + 40;
+    FragColor += ubos[NonUniformResourceIndex(_77)].v[_80];
+    int _87 = i + 50;
+    int _90 = i + 60;
+    FragColor += asfloat(ssbos[NonUniformResourceIndex(_87)].Load4(_90 * 16 + 0));
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    vIndex = stage_input.vIndex;
+    vUV = stage_input.vUV;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
--- a/reference/shaders-hlsl/asm/frag/texture-sampling-fp16.asm.frag
+++ b/reference/shaders-hlsl/asm/frag/texture-sampling-fp16.asm.frag
@ -0,0 +1,29 @@
+Texture2D<float4> uTexture : register(t0);
+SamplerState _uTexture_sampler : register(s0);
+
+static min16float4 FragColor;
+static min16float2 UV;
+
+struct SPIRV_Cross_Input
+{
+    min16float2 UV : TEXCOORD0;
+};
+
+struct SPIRV_Cross_Output
+{
+    min16float4 FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    FragColor = min16float4(uTexture.Sample(_uTexture_sampler, UV));
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    UV = stage_input.UV;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
--- a/reference/shaders-hlsl/comp/outer-product.comp
+++ b/reference/shaders-hlsl/comp/outer-product.comp
@ -0,0 +1,48 @@
+RWByteAddressBuffer _21 : register(u0);
+ByteAddressBuffer _26 : register(t1);
+
+void comp_main()
+{
+    float2x2 _32 = float2x2(asfloat(_26.Load2(0)) * asfloat(_26.Load2(0)).x, asfloat(_26.Load2(0)) * asfloat(_26.Load2(0)).y);
+    _21.Store2(0, asuint(_32[0]));
+    _21.Store2(8, asuint(_32[1]));
+    float2x3 _41 = float2x3(asfloat(_26.Load3(16)) * asfloat(_26.Load2(0)).x, asfloat(_26.Load3(16)) * asfloat(_26.Load2(0)).y);
+    _21.Store3(16, asuint(_41[0]));
+    _21.Store3(32, asuint(_41[1]));
+    float2x4 _50 = float2x4(asfloat(_26.Load4(32)) * asfloat(_26.Load2(0)).x, asfloat(_26.Load4(32)) * asfloat(_26.Load2(0)).y);
+    _21.Store4(48, asuint(_50[0]));
+    _21.Store4(64, asuint(_50[1]));
+    float3x2 _58 = float3x2(asfloat(_26.Load2(0)) * asfloat(_26.Load3(16)).x, asfloat(_26.Load2(0)) * asfloat(_26.Load3(16)).y, asfloat(_26.Load2(0)) * asfloat(_26.Load3(16)).z);
+    _21.Store2(80, asuint(_58[0]));
+    _21.Store2(88, asuint(_58[1]));
+    _21.Store2(96, asuint(_58[2]));
+    float3x3 _66 = float3x3(asfloat(_26.Load3(16)) * asfloat(_26.Load3(16)).x, asfloat(_26.Load3(16)) * asfloat(_26.Load3(16)).y, asfloat(_26.Load3(16)) * asfloat(_26.Load3(16)).z);
+    _21.Store3(112, asuint(_66[0]));
+    _21.Store3(128, asuint(_66[1]));
+    _21.Store3(144, asuint(_66[2]));
+    float3x4 _74 = float3x4(asfloat(_26.Load4(32)) * asfloat(_26.Load3(16)).x, asfloat(_26.Load4(32)) * asfloat(_26.Load3(16)).y, asfloat(_26.Load4(32)) * asfloat(_26.Load3(16)).z);
+    _21.Store4(160, asuint(_74[0]));
+    _21.Store4(176, asuint(_74[1]));
+    _21.Store4(192, asuint(_74[2]));
+    float4x2 _82 = float4x2(asfloat(_26.Load2(0)) * asfloat(_26.Load4(32)).x, asfloat(_26.Load2(0)) * asfloat(_26.Load4(32)).y, asfloat(_26.Load2(0)) * asfloat(_26.Load4(32)).z, asfloat(_26.Load2(0)) * asfloat(_26.Load4(32)).w);
+    _21.Store2(208, asuint(_82[0]));
+    _21.Store2(216, asuint(_82[1]));
+    _21.Store2(224, asuint(_82[2]));
+    _21.Store2(232, asuint(_82[3]));
+    float4x3 _90 = float4x3(asfloat(_26.Load3(16)) * asfloat(_26.Load4(32)).x, asfloat(_26.Load3(16)) * asfloat(_26.Load4(32)).y, asfloat(_26.Load3(16)) * asfloat(_26.Load4(32)).z, asfloat(_26.Load3(16)) * asfloat(_26.Load4(32)).w);
+    _21.Store3(240, asuint(_90[0]));
+    _21.Store3(256, asuint(_90[1]));
+    _21.Store3(272, asuint(_90[2]));
+    _21.Store3(288, asuint(_90[3]));
+    float4x4 _98 = float4x4(asfloat(_26.Load4(32)) * asfloat(_26.Load4(32)).x, asfloat(_26.Load4(32)) * asfloat(_26.Load4(32)).y, asfloat(_26.Load4(32)) * asfloat(_26.Load4(32)).z, asfloat(_26.Load4(32)) * asfloat(_26.Load4(32)).w);
+    _21.Store4(304, asuint(_98[0]));
+    _21.Store4(320, asuint(_98[1]));
+    _21.Store4(336, asuint(_98[2]));
+    _21.Store4(352, asuint(_98[3]));
+}
+
+[numthreads(1, 1, 1)]
+void main()
+{
+    comp_main();
+}
--- a/reference/shaders-hlsl/comp/scalar-std450-distance-length-normalize.comp
+++ b/reference/shaders-hlsl/comp/scalar-std450-distance-length-normalize.comp
@ -0,0 +1,14 @@
+RWByteAddressBuffer _9 : register(u0);
+
+void comp_main()
+{
+    _9.Store(8, asuint(distance(asfloat(_9.Load(0)), asfloat(_9.Load(4)))));
+    _9.Store(12, asuint(length(asfloat(_9.Load(0)))));
+    _9.Store(16, asuint(sign(asfloat(_9.Load(0)))));
+}
+
+[numthreads(1, 1, 1)]
+void main()
+{
+    comp_main();
+}
--- a/reference/shaders-hlsl/frag/for-loop-continue-control-flow.frag
+++ b/reference/shaders-hlsl/frag/for-loop-continue-control-flow.frag
@ -0,0 +1,44 @@
+static float4 FragColor;
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    FragColor = 0.0f.xxxx;
+    int i = 0;
+    int _36;
+    for (;;)
+    {
+        if (i < 3)
+        {
+            int a = i;
+            FragColor[a] += float(i);
+            if (false)
+            {
+                _36 = 1;
+            }
+            else
+            {
+                int _41 = i;
+                i = _41 + 1;
+                _36 = _41;
+            }
+            continue;
+        }
+        else
+        {
+            break;
+        }
+    }
+}
+
+SPIRV_Cross_Output main()
+{
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
--- a/reference/shaders-hlsl/frag/scalar-refract-reflect.frag
+++ b/reference/shaders-hlsl/frag/scalar-refract-reflect.frag
@ -0,0 +1,49 @@
+static float FragColor;
+static float3 vRefract;
+
+struct SPIRV_Cross_Input
+{
+    float3 vRefract : TEXCOORD0;
+};
+
+struct SPIRV_Cross_Output
+{
+    float FragColor : SV_Target0;
+};
+
+float SPIRV_Cross_Reflect(float i, float n)
+{
+    return i - 2.0 * dot(n, i) * n;
+}
+
+float SPIRV_Cross_Refract(float i, float n, float eta)
+{
+    float NoI = n * i;
+    float NoI2 = NoI * NoI;
+    float k = 1.0 - eta * eta * (1.0 - NoI2);
+    if (k < 0.0)
+    {
+        return 0.0;
+    }
+    else
+    {
+        return eta * i - (eta * NoI + sqrt(k)) * n;
+    }
+}
+
+void frag_main()
+{
+    FragColor = SPIRV_Cross_Refract(vRefract.x, vRefract.y, vRefract.z);
+    FragColor += SPIRV_Cross_Reflect(vRefract.x, vRefract.y);
+    FragColor += refract(vRefract.xy, vRefract.yz, vRefract.z).y;
+    FragColor += reflect(vRefract.xy, vRefract.zy).y;
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    vRefract = stage_input.vRefract;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
--- a/reference/shaders-msl/asm/frag/texture-sampling-fp16.asm.frag
+++ b/reference/shaders-msl/asm/frag/texture-sampling-fp16.asm.frag
@ -0,0 +1,22 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    half4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    half2 UV [[user(locn0)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], texture2d<float> uTexture [[texture(0)]], sampler uTextureSmplr [[sampler(0)]])
+{
+    main0_out out = {};
+    out.FragColor = half4(uTexture.sample(uTextureSmplr, float2(in.UV)));
+    return out;
+}
+
--- a/reference/shaders-msl/comp/outer-product.comp
+++ b/reference/shaders-msl/comp/outer-product.comp
@ -0,0 +1,38 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBO
+{
+    float2x2 m22;
+    float2x3 m23;
+    float2x4 m24;
+    float3x2 m32;
+    float3x3 m33;
+    float3x4 m34;
+    float4x2 m42;
+    float4x3 m43;
+    float4x4 m44;
+};
+
+struct ReadSSBO
+{
+    float2 v2;
+    float3 v3;
+    float4 v4;
+};
+
+kernel void main0(device SSBO& _21 [[buffer(0)]], const device ReadSSBO& _26 [[buffer(1)]])
+{
+    _21.m22 = float2x2(_26.v2 * _26.v2.x, _26.v2 * _26.v2.y);
+    _21.m23 = float2x3(_26.v3 * _26.v2.x, _26.v3 * _26.v2.y);
+    _21.m24 = float2x4(_26.v4 * _26.v2.x, _26.v4 * _26.v2.y);
+    _21.m32 = float3x2(_26.v2 * _26.v3.x, _26.v2 * _26.v3.y, _26.v2 * _26.v3.z);
+    _21.m33 = float3x3(_26.v3 * _26.v3.x, _26.v3 * _26.v3.y, _26.v3 * _26.v3.z);
+    _21.m34 = float3x4(_26.v4 * _26.v3.x, _26.v4 * _26.v3.y, _26.v4 * _26.v3.z);
+    _21.m42 = float4x2(_26.v2 * _26.v4.x, _26.v2 * _26.v4.y, _26.v2 * _26.v4.z, _26.v2 * _26.v4.w);
+    _21.m43 = float4x3(_26.v3 * _26.v4.x, _26.v3 * _26.v4.y, _26.v3 * _26.v4.z, _26.v3 * _26.v4.w);
+    _21.m44 = float4x4(_26.v4 * _26.v4.x, _26.v4 * _26.v4.y, _26.v4 * _26.v4.z, _26.v4 * _26.v4.w);
+}
+
--- a/reference/shaders-msl/comp/scalar-std450-distance-length-normalize.comp
+++ b/reference/shaders-msl/comp/scalar-std450-distance-length-normalize.comp
@ -0,0 +1,21 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBO
+{
+    float a;
+    float b;
+    float c;
+    float d;
+    float e;
+};
+
+kernel void main0(device SSBO& _9 [[buffer(0)]])
+{
+    _9.c = abs(_9.a - _9.b);
+    _9.d = abs(_9.a);
+    _9.e = sign(_9.a);
+}
+
--- a/reference/shaders-msl/frag/for-loop-continue-control-flow.frag
+++ b/reference/shaders-msl/frag/for-loop-continue-control-flow.frag
@ -0,0 +1,42 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+fragment main0_out main0()
+{
+    main0_out out = {};
+    out.FragColor = float4(0.0);
+    int i = 0;
+    int _36;
+    for (;;)
+    {
+        if (i < 3)
+        {
+            int a = i;
+            out.FragColor[a] += float(i);
+            if (false)
+            {
+                _36 = 1;
+            }
+            else
+            {
+                int _41 = i;
+                i = _41 + 1;
+                _36 = _41;
+            }
+            continue;
+        }
+        else
+        {
+            break;
+        }
+    }
+    return out;
+}
+
--- a/reference/shaders-msl/frag/scalar-refract-reflect.frag
+++ b/reference/shaders-msl/frag/scalar-refract-reflect.frag
@ -0,0 +1,49 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float3 vRefract [[user(locn0)]];
+};
+
+template<typename T>
+inline T spvReflect(T i, T n)
+{
+    return i - T(2) * i * n * n;
+}
+
+template<typename T>
+inline T spvRefract(T i, T n, T eta)
+{
+    T NoI = n * i;
+    T NoI2 = NoI * NoI;
+    T k = T(1) - eta * eta * (T(1) - NoI2);
+    if (k < T(0))
+    {
+        return T(0);
+    }
+    else
+    {
+        return eta * i - (eta * NoI + sqrt(k)) * n;
+    }
+}
+
+fragment main0_out main0(main0_in in [[stage_in]])
+{
+    main0_out out = {};
+    out.FragColor = spvRefract(in.vRefract.x, in.vRefract.y, in.vRefract.z);
+    out.FragColor += spvReflect(in.vRefract.x, in.vRefract.y);
+    out.FragColor += refract(in.vRefract.xy, in.vRefract.yz, in.vRefract.z).y;
+    out.FragColor += reflect(in.vRefract.xy, in.vRefract.zy).y;
+    return out;
+}
+
--- a/reference/shaders-msl/frag/subgroup-builtins.msl22.frag
+++ b/reference/shaders-msl/frag/subgroup-builtins.msl22.frag
@ -0,0 +1,18 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    uint2 FragColor [[color(0)]];
+};
+
+fragment main0_out main0(uint gl_SubgroupSize [[threads_per_simdgroup]], uint gl_SubgroupInvocationID [[thread_index_in_simdgroup]])
+{
+    main0_out out = {};
+    out.FragColor.x = gl_SubgroupSize;
+    out.FragColor.y = gl_SubgroupInvocationID;
+    return out;
+}
+
--- a/reference/shaders-msl/vulkan/frag/basic.multiview.nocompat.vk.frag
+++ b/reference/shaders-msl/vulkan/frag/basic.multiview.nocompat.vk.frag
@ -0,0 +1,32 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float4 vColor [[user(locn0)]];
+    float2 vTex_0 [[user(locn1)]];
+    float2 vTex_1 [[user(locn2)]];
+    float2 vTex_2 [[user(locn3)]];
+    float2 vTex_3 [[user(locn4)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], constant uint* spvViewMask [[buffer(24)]], texture2d<float> uTex [[texture(0)]], sampler uTexSmplr [[sampler(0)]], uint gl_ViewIndex [[render_target_array_index]])
+{
+    main0_out out = {};
+    float2 vTex[4] = {};
+    vTex[0] = in.vTex_0;
+    vTex[1] = in.vTex_1;
+    vTex[2] = in.vTex_2;
+    vTex[3] = in.vTex_3;
+    gl_ViewIndex += spvViewMask[0];
+    out.FragColor = in.vColor * uTex.sample(uTexSmplr, vTex[int(gl_ViewIndex)]);
+    return out;
+}
+
--- a/reference/shaders-msl/vulkan/frag/subgroups.nocompat.invalid.vk.msl21.frag
+++ b/reference/shaders-msl/vulkan/frag/subgroups.nocompat.invalid.vk.msl21.frag
@ -0,0 +1,143 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float FragColor [[color(0)]];
+};
+
+inline uint4 spvSubgroupBallot(bool value)
+{
+    simd_vote vote = simd_ballot(value);
+    // simd_ballot() returns a 64-bit integer-like object, but
+    // SPIR-V callers expect a uint4. We must convert.
+    // FIXME: This won't include higher bits if Apple ever supports
+    // 128 lanes in an SIMD-group.
+    return uint4((uint)((simd_vote::vote_t)vote & 0xFFFFFFFF), (uint)(((simd_vote::vote_t)vote >> 32) & 0xFFFFFFFF), 0, 0);
+}
+
+inline bool spvSubgroupBallotBitExtract(uint4 ballot, uint bit)
+{
+    return !!extract_bits(ballot[bit / 32], bit % 32, 1);
+}
+
+inline uint spvSubgroupBallotFindLSB(uint4 ballot)
+{
+    return select(ctz(ballot.x), select(32 + ctz(ballot.y), select(64 + ctz(ballot.z), select(96 + ctz(ballot.w), uint(-1), ballot.w == 0), ballot.z == 0), ballot.y == 0), ballot.x == 0);
+}
+
+inline uint spvSubgroupBallotFindMSB(uint4 ballot)
+{
+    return select(128 - (clz(ballot.w) + 1), select(96 - (clz(ballot.z) + 1), select(64 - (clz(ballot.y) + 1), select(32 - (clz(ballot.x) + 1), uint(-1), ballot.x == 0), ballot.y == 0), ballot.z == 0), ballot.w == 0);
+}
+
+inline uint spvSubgroupBallotBitCount(uint4 ballot)
+{
+    return popcount(ballot.x) + popcount(ballot.y) + popcount(ballot.z) + popcount(ballot.w);
+}
+
+inline uint spvSubgroupBallotInclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)
+{
+    uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0));
+    return spvSubgroupBallotBitCount(ballot & mask);
+}
+
+inline uint spvSubgroupBallotExclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)
+{
+    uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0));
+    return spvSubgroupBallotBitCount(ballot & mask);
+}
+
+template<typename T>
+inline bool spvSubgroupAllEqual(T value)
+{
+    return simd_all(value == simd_broadcast_first(value));
+}
+
+template<>
+inline bool spvSubgroupAllEqual(bool value)
+{
+    return simd_all(value) || !simd_any(value);
+}
+
+fragment main0_out main0()
+{
+    main0_out out = {};
+    uint gl_SubgroupSize = simd_sum(1);
+    uint gl_SubgroupInvocationID = simd_prefix_exclusive_sum(1);
+    uint4 gl_SubgroupEqMask = gl_SubgroupInvocationID > 32 ? uint4(0, (1 << (gl_SubgroupInvocationID - 32)), uint2(0)) : uint4(1 << gl_SubgroupInvocationID, uint3(0));
+    uint4 gl_SubgroupGeMask = uint4(extract_bits(0xFFFFFFFF, min(gl_SubgroupInvocationID, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID, 0)), extract_bits(0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID, 32u), 0)), uint2(0));
+    uint4 gl_SubgroupGtMask = uint4(extract_bits(0xFFFFFFFF, min(gl_SubgroupInvocationID + 1, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID - 1, 0)), extract_bits(0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID + 1, 32u), 0)), uint2(0));
+    uint4 gl_SubgroupLeMask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0));
+    uint4 gl_SubgroupLtMask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0));
+    out.FragColor = float(gl_SubgroupSize);
+    out.FragColor = float(gl_SubgroupInvocationID);
+    bool elected = simd_is_first();
+    out.FragColor = float4(gl_SubgroupEqMask).x;
+    out.FragColor = float4(gl_SubgroupGeMask).x;
+    out.FragColor = float4(gl_SubgroupGtMask).x;
+    out.FragColor = float4(gl_SubgroupLeMask).x;
+    out.FragColor = float4(gl_SubgroupLtMask).x;
+    float4 broadcasted = simd_broadcast(float4(10.0), 8u);
+    float3 first = simd_broadcast_first(float3(20.0));
+    uint4 ballot_value = spvSubgroupBallot(true);
+    bool inverse_ballot_value = spvSubgroupBallotBitExtract(ballot_value, gl_SubgroupInvocationID);
+    bool bit_extracted = spvSubgroupBallotBitExtract(uint4(10u), 8u);
+    uint bit_count = spvSubgroupBallotBitCount(ballot_value);
+    uint inclusive_bit_count = spvSubgroupBallotInclusiveBitCount(ballot_value, gl_SubgroupInvocationID);
+    uint exclusive_bit_count = spvSubgroupBallotExclusiveBitCount(ballot_value, gl_SubgroupInvocationID);
+    uint lsb = spvSubgroupBallotFindLSB(ballot_value);
+    uint msb = spvSubgroupBallotFindMSB(ballot_value);
+    uint shuffled = simd_shuffle(10u, 8u);
+    uint shuffled_xor = simd_shuffle_xor(30u, 8u);
+    uint shuffled_up = simd_shuffle_up(20u, 4u);
+    uint shuffled_down = simd_shuffle_down(20u, 4u);
+    bool has_all = simd_all(true);
+    bool has_any = simd_any(true);
+    bool has_equal = spvSubgroupAllEqual(0);
+    has_equal = spvSubgroupAllEqual(true);
+    float4 added = simd_sum(float4(20.0));
+    int4 iadded = simd_sum(int4(20));
+    float4 multiplied = simd_product(float4(20.0));
+    int4 imultiplied = simd_product(int4(20));
+    float4 lo = simd_min(float4(20.0));
+    float4 hi = simd_max(float4(20.0));
+    int4 slo = simd_min(int4(20));
+    int4 shi = simd_max(int4(20));
+    uint4 ulo = simd_min(uint4(20u));
+    uint4 uhi = simd_max(uint4(20u));
+    uint4 anded = simd_and(ballot_value);
+    uint4 ored = simd_or(ballot_value);
+    uint4 xored = simd_xor(ballot_value);
+    added = simd_prefix_inclusive_sum(added);
+    iadded = simd_prefix_inclusive_sum(iadded);
+    multiplied = simd_prefix_inclusive_product(multiplied);
+    imultiplied = simd_prefix_inclusive_product(imultiplied);
+    added = simd_prefix_exclusive_sum(multiplied);
+    multiplied = simd_prefix_exclusive_product(multiplied);
+    iadded = simd_prefix_exclusive_sum(imultiplied);
+    imultiplied = simd_prefix_exclusive_product(imultiplied);
+    added = quad_sum(added);
+    multiplied = quad_product(multiplied);
+    iadded = quad_sum(iadded);
+    imultiplied = quad_product(imultiplied);
+    lo = quad_min(lo);
+    hi = quad_max(hi);
+    ulo = quad_min(ulo);
+    uhi = quad_max(uhi);
+    slo = quad_min(slo);
+    shi = quad_max(shi);
+    anded = quad_and(anded);
+    ored = quad_or(ored);
+    xored = quad_xor(xored);
+    float4 swap_horiz = quad_shuffle_xor(float4(20.0), 1u);
+    float4 swap_vertical = quad_shuffle_xor(float4(20.0), 2u);
+    float4 swap_diagonal = quad_shuffle_xor(float4(20.0), 3u);
+    float4 quad_broadcast0 = quad_broadcast(float4(20.0), 3u);
+    return out;
+}
+
--- a/reference/shaders-msl/vulkan/vert/multiview.multiview.nocompat.vk.vert
+++ b/reference/shaders-msl/vulkan/vert/multiview.multiview.nocompat.vk.vert
@ -0,0 +1,31 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct MVPs
+{
+    float4x4 MVP[2];
+};
+
+struct main0_out
+{
+    float4 gl_Position [[position]];
+    uint gl_Layer [[render_target_array_index]];
+};
+
+struct main0_in
+{
+    float4 Position [[attribute(0)]];
+};
+
+vertex main0_out main0(main0_in in [[stage_in]], constant uint* spvViewMask [[buffer(24)]], constant MVPs& _19 [[buffer(0)]], uint gl_InstanceIndex [[instance_id]])
+{
+    main0_out out = {};
+    uint gl_ViewIndex = spvViewMask[0] + gl_InstanceIndex % spvViewMask[1];
+    gl_InstanceIndex /= spvViewMask[1];
+    out.gl_Position = _19.MVP[int(gl_ViewIndex)] * in.Position;
+    out.gl_Layer = gl_ViewIndex - spvViewMask[0];
+    return out;
+}
+
--- a/reference/shaders-msl/vulkan/vert/multiview.nocompat.vk.vert
+++ b/reference/shaders-msl/vulkan/vert/multiview.nocompat.vk.vert
@ -0,0 +1,29 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct MVPs
+{
+    float4x4 MVP[2];
+};
+
+struct main0_out
+{
+    float4 gl_Position [[position]];
+    uint gl_Layer [[render_target_array_index]];
+};
+
+struct main0_in
+{
+    float4 Position [[attribute(0)]];
+};
+
+vertex main0_out main0(main0_in in [[stage_in]], constant MVPs& _19 [[buffer(0)]], uint gl_InstanceIndex [[instance_id]])
+{
+    main0_out out = {};
+    const uint gl_ViewIndex = 0;
+    out.gl_Position = _19.MVP[int(gl_ViewIndex)] * in.Position;
+    return out;
+}
+
--- a/reference/shaders-no-opt/asm/frag/nonuniform-qualifier-propagation.vk.nocompat.asm.frag.vk
+++ b/reference/shaders-no-opt/asm/frag/nonuniform-qualifier-propagation.vk.nocompat.asm.frag.vk
@ -0,0 +1,37 @@
+#version 450
+#extension GL_EXT_nonuniform_qualifier : require
+
+layout(set = 0, binding = 2, std140) uniform UBO
+{
+    vec4 v[64];
+} ubos[];
+
+layout(set = 0, binding = 3, std430) readonly buffer SSBO
+{
+    vec4 v[];
+} ssbos[];
+
+layout(set = 0, binding = 0) uniform texture2D uSamplers[];
+layout(set = 0, binding = 1) uniform sampler uSamps[];
+layout(set = 0, binding = 4) uniform sampler2D uCombinedSamplers[];
+
+layout(location = 0) flat in int vIndex;
+layout(location = 0) out vec4 FragColor;
+layout(location = 1) in vec2 vUV;
+
+void main()
+{
+    int i = vIndex;
+    int _59 = i + 10;
+    int _64 = i + 40;
+    FragColor = texture(sampler2D(uSamplers[nonuniformEXT(_59)], uSamps[nonuniformEXT(_64)]), vUV);
+    int _71 = i + 10;
+    FragColor = texture(uCombinedSamplers[nonuniformEXT(_71)], vUV);
+    int _77 = i + 20;
+    int _80 = i + 40;
+    FragColor += ubos[nonuniformEXT(_77)].v[_80];
+    int _87 = i + 50;
+    int _90 = i + 60;
+    FragColor += ssbos[nonuniformEXT(_87)].v[_90];
+}
+
--- a/reference/shaders/asm/comp/phi-temporary-copy-loop-variable.asm.comp
+++ b/reference/shaders/asm/comp/phi-temporary-copy-loop-variable.asm.comp
@ -0,0 +1,25 @@
+#version 450
+layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
+
+layout(binding = 1, rgba32f) uniform writeonly image2D outImageTexture;
+
+void main()
+{
+    int _30;
+    _30 = 7;
+    int _27_copy;
+    for (int _27 = 7; _27 >= 0; _27_copy = _27, _27--, _30 = _27_copy)
+    {
+        if (5.0 > float(_27))
+        {
+            break;
+        }
+        else
+        {
+            continue;
+        }
+        continue;
+    }
+    imageStore(outImageTexture, ivec2(gl_GlobalInvocationID.xy), vec4(float(_30 - 1), float(_30), 1.0, 1.0));
+}
+
--- a/reference/shaders/asm/extended-debug-extinst.invalid.asm.comp
+++ b/reference/shaders/asm/extended-debug-extinst.invalid.asm.comp
@ -0,0 +1,18 @@
+#version 430
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 0, std430) buffer _8_9
+{
+    float _m0[];
+} _9;
+
+layout(binding = 1, std430) buffer _8_10
+{
+    float _m0[];
+} _10;
+
+void main()
+{
+    _10._m0[gl_GlobalInvocationID.x] = -_9._m0[gl_GlobalInvocationID.x];
+}
+
--- a/reference/shaders/asm/frag/texture-sampling-fp16.asm.vk.frag
+++ b/reference/shaders/asm/frag/texture-sampling-fp16.asm.vk.frag
@ -0,0 +1,19 @@
+#version 450
+#if defined(GL_AMD_gpu_shader_half_float)
+#extension GL_AMD_gpu_shader_half_float : require
+#elif defined(GL_NV_gpu_shader5)
+#extension GL_NV_gpu_shader5 : require
+#else
+#error No extension available for FP16.
+#endif
+
+layout(binding = 0) uniform sampler2D uTexture;
+
+layout(location = 0) out f16vec4 FragColor;
+layout(location = 0) in f16vec2 UV;
+
+void main()
+{
+    FragColor = f16vec4(texture(uTexture, UV));
+}
+
--- a/reference/shaders/asm/frag/texture-sampling-fp16.asm.vk.frag.vk
+++ b/reference/shaders/asm/frag/texture-sampling-fp16.asm.vk.frag.vk
@ -0,0 +1,20 @@
+#version 450
+#if defined(GL_AMD_gpu_shader_half_float)
+#extension GL_AMD_gpu_shader_half_float : require
+#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
+#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
+#else
+#error No extension available for FP16.
+#endif
+#extension GL_EXT_shader_16bit_storage : require
+
+layout(set = 0, binding = 0) uniform sampler2D uTexture;
+
+layout(location = 0) out f16vec4 FragColor;
+layout(location = 0) in f16vec2 UV;
+
+void main()
+{
+    FragColor = f16vec4(texture(uTexture, UV));
+}
+
--- a/reference/shaders/comp/outer-product.comp
+++ b/reference/shaders/comp/outer-product.comp
@ -0,0 +1,36 @@
+#version 450
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 0, std430) writeonly buffer SSBO
+{
+    mat2 m22;
+    mat2x3 m23;
+    mat2x4 m24;
+    mat3x2 m32;
+    mat3 m33;
+    mat3x4 m34;
+    mat4x2 m42;
+    mat4x3 m43;
+    mat4 m44;
+} _21;
+
+layout(binding = 1, std430) readonly buffer ReadSSBO
+{
+    vec2 v2;
+    vec3 v3;
+    vec4 v4;
+} _26;
+
+void main()
+{
+    _21.m22 = outerProduct(_26.v2, _26.v2);
+    _21.m23 = outerProduct(_26.v3, _26.v2);
+    _21.m24 = outerProduct(_26.v4, _26.v2);
+    _21.m32 = outerProduct(_26.v2, _26.v3);
+    _21.m33 = outerProduct(_26.v3, _26.v3);
+    _21.m34 = outerProduct(_26.v4, _26.v3);
+    _21.m42 = outerProduct(_26.v2, _26.v4);
+    _21.m43 = outerProduct(_26.v3, _26.v4);
+    _21.m44 = outerProduct(_26.v4, _26.v4);
+}
+
--- a/reference/shaders/comp/scalar-std450-distance-length-normalize.comp
+++ b/reference/shaders/comp/scalar-std450-distance-length-normalize.comp
@ -0,0 +1,19 @@
+#version 450
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 0, std430) buffer SSBO
+{
+    float a;
+    float b;
+    float c;
+    float d;
+    float e;
+} _9;
+
+void main()
+{
+    _9.c = distance(_9.a, _9.b);
+    _9.d = length(_9.a);
+    _9.e = normalize(_9.a);
+}
+
--- a/reference/shaders/frag/for-loop-continue-control-flow.frag
+++ b/reference/shaders/frag/for-loop-continue-control-flow.frag
@ -0,0 +1,34 @@
+#version 450
+
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+    FragColor = vec4(0.0);
+    int i = 0;
+    int _36;
+    for (;;)
+    {
+        if (i < 3)
+        {
+            int a = i;
+            FragColor[a] += float(i);
+            if (false)
+            {
+                _36 = 1;
+            }
+            else
+            {
+                int _41 = i;
+                i = _41 + 1;
+                _36 = _41;
+            }
+            continue;
+        }
+        else
+        {
+            break;
+        }
+    }
+}
+
--- a/reference/shaders/frag/scalar-refract-reflect.frag
+++ b/reference/shaders/frag/scalar-refract-reflect.frag
@ -0,0 +1,13 @@
+#version 450
+
+layout(location = 0) out float FragColor;
+layout(location = 0) in vec3 vRefract;
+
+void main()
+{
+    FragColor = refract(vRefract.x, vRefract.y, vRefract.z);
+    FragColor += reflect(vRefract.x, vRefract.y);
+    FragColor += refract(vRefract.xy, vRefract.yz, vRefract.z).y;
+    FragColor += reflect(vRefract.xy, vRefract.zy).y;
+}
+
--- a/reference/shaders/frag/selection-block-dominator.frag
+++ b/reference/shaders/frag/selection-block-dominator.frag
@ -0,0 +1,20 @@
+#version 450
+
+layout(location = 0) flat in int vIndex;
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+    int v;
+    if (vIndex != 1)
+    {
+        FragColor = vec4(1.0);
+        return;
+    }
+    else
+    {
+        v = 10;
+    }
+    FragColor = vec4(float(v));
+}
+
--- a/shaders-hlsl-no-opt/asm/frag/nonuniform-qualifier-propagation.nonuniformresource.sm51.asm.frag
+++ b/shaders-hlsl-no-opt/asm/frag/nonuniform-qualifier-propagation.nonuniformresource.sm51.asm.frag
@ -0,0 +1,159 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 93
+; Schema: 0
+               OpCapability Shader
+               OpCapability ShaderNonUniformEXT
+               OpCapability RuntimeDescriptorArrayEXT
+               OpCapability UniformBufferArrayNonUniformIndexingEXT
+               OpCapability SampledImageArrayNonUniformIndexingEXT
+               OpCapability StorageBufferArrayNonUniformIndexingEXT
+               OpExtension "SPV_EXT_descriptor_indexing"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %vIndex %FragColor %vUV
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpSourceExtension "GL_EXT_nonuniform_qualifier"
+               OpName %main "main"
+               OpName %i "i"
+               OpName %vIndex "vIndex"
+               OpName %FragColor "FragColor"
+               OpName %uSamplers "uSamplers"
+               OpName %uSamps "uSamps"
+               OpName %vUV "vUV"
+               OpName %uCombinedSamplers "uCombinedSamplers"
+               OpName %UBO "UBO"
+               OpMemberName %UBO 0 "v"
+               OpName %ubos "ubos"
+               OpName %SSBO "SSBO"
+               OpMemberName %SSBO 0 "v"
+               OpName %ssbos "ssbos"
+               OpDecorate %vIndex Flat
+               OpDecorate %vIndex Location 0
+               OpDecorate %FragColor Location 0
+               OpDecorate %uSamplers DescriptorSet 0
+               OpDecorate %uSamplers Binding 0
+
+               OpDecorate %sampled_image NonUniformEXT
+               OpDecorate %combined_sampler NonUniformEXT
+               OpDecorate %ubo_ptr_copy NonUniformEXT
+               OpDecorate %ssbo_ptr_copy NonUniformEXT
+
+               OpDecorate %uSamps DescriptorSet 1
+               OpDecorate %uSamps Binding 0
+               OpDecorate %vUV Location 1
+               OpDecorate %uCombinedSamplers DescriptorSet 0
+               OpDecorate %uCombinedSamplers Binding 4
+               OpDecorate %_arr_v4float_uint_64 ArrayStride 16
+               OpMemberDecorate %UBO 0 Offset 0
+               OpDecorate %UBO Block
+               OpDecorate %ubos DescriptorSet 2
+               OpDecorate %ubos Binding 0
+               OpDecorate %_runtimearr_v4float ArrayStride 16
+               OpMemberDecorate %SSBO 0 NonWritable
+               OpMemberDecorate %SSBO 0 Offset 0
+               OpDecorate %SSBO BufferBlock
+               OpDecorate %ssbos DescriptorSet 3
+               OpDecorate %ssbos Binding 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+        %int = OpTypeInt 32 1
+%_ptr_Function_int = OpTypePointer Function %int
+%_ptr_Input_int = OpTypePointer Input %int
+     %vIndex = OpVariable %_ptr_Input_int Input
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+         %16 = OpTypeImage %float 2D 0 0 0 1 Unknown
+%_runtimearr_16 = OpTypeRuntimeArray %16
+%_ptr_UniformConstant__runtimearr_16 = OpTypePointer UniformConstant %_runtimearr_16
+  %uSamplers = OpVariable %_ptr_UniformConstant__runtimearr_16 UniformConstant
+     %int_10 = OpConstant %int 10
+%_ptr_UniformConstant_16 = OpTypePointer UniformConstant %16
+         %27 = OpTypeSampler
+%_runtimearr_27 = OpTypeRuntimeArray %27
+%_ptr_UniformConstant__runtimearr_27 = OpTypePointer UniformConstant %_runtimearr_27
+     %uSamps = OpVariable %_ptr_UniformConstant__runtimearr_27 UniformConstant
+     %int_40 = OpConstant %int 40
+%_ptr_UniformConstant_27 = OpTypePointer UniformConstant %27
+         %38 = OpTypeSampledImage %16
+    %v2float = OpTypeVector %float 2
+%_ptr_Input_v2float = OpTypePointer Input %v2float
+        %vUV = OpVariable %_ptr_Input_v2float Input
+%_runtimearr_38 = OpTypeRuntimeArray %38
+%_ptr_UniformConstant__runtimearr_38 = OpTypePointer UniformConstant %_runtimearr_38
+%uCombinedSamplers = OpVariable %_ptr_UniformConstant__runtimearr_38 UniformConstant
+%_ptr_UniformConstant_38 = OpTypePointer UniformConstant %38
+       %uint = OpTypeInt 32 0
+    %uint_64 = OpConstant %uint 64
+%_arr_v4float_uint_64 = OpTypeArray %v4float %uint_64
+        %UBO = OpTypeStruct %_arr_v4float_uint_64
+%_runtimearr_UBO = OpTypeRuntimeArray %UBO
+%_ptr_Uniform__runtimearr_UBO = OpTypePointer Uniform %_runtimearr_UBO
+       %ubos = OpVariable %_ptr_Uniform__runtimearr_UBO Uniform
+     %int_20 = OpConstant %int 20
+      %int_0 = OpConstant %int 0
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+%_runtimearr_v4float = OpTypeRuntimeArray %v4float
+       %SSBO = OpTypeStruct %_runtimearr_v4float
+%_runtimearr_SSBO = OpTypeRuntimeArray %SSBO
+%_ptr_Uniform__runtimearr_SSBO = OpTypePointer Uniform %_runtimearr_SSBO
+      %ssbos = OpVariable %_ptr_Uniform__runtimearr_SSBO Uniform
+     %int_50 = OpConstant %int 50
+     %int_60 = OpConstant %int 60
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+          %i = OpVariable %_ptr_Function_int Function
+         %11 = OpLoad %int %vIndex
+               OpStore %i %11
+         %20 = OpLoad %int %i
+         %22 = OpIAdd %int %20 %int_10
+         %23 = OpCopyObject %int %22
+         %25 = OpAccessChain %_ptr_UniformConstant_16 %uSamplers %23
+         %26 = OpLoad %16 %25
+         %31 = OpLoad %int %i
+         %33 = OpIAdd %int %31 %int_40
+         %34 = OpCopyObject %int %33
+         %36 = OpAccessChain %_ptr_UniformConstant_27 %uSamps %34
+         %37 = OpLoad %27 %36
+         %sampled_image = OpSampledImage %38 %26 %37
+         %43 = OpLoad %v2float %vUV
+         %44 = OpImageSampleImplicitLod %v4float %sampled_image %43
+               OpStore %FragColor %44
+         %48 = OpLoad %int %i
+         %49 = OpIAdd %int %48 %int_10
+         %50 = OpCopyObject %int %49
+         %52 = OpAccessChain %_ptr_UniformConstant_38 %uCombinedSamplers %50
+         %combined_sampler = OpLoad %38 %52
+         %54 = OpLoad %v2float %vUV
+         %55 = OpImageSampleImplicitLod %v4float %combined_sampler %54
+               OpStore %FragColor %55
+         %63 = OpLoad %int %i
+         %65 = OpIAdd %int %63 %int_20
+         %66 = OpCopyObject %int %65
+         %68 = OpLoad %int %i
+         %69 = OpIAdd %int %68 %int_40
+         %70 = OpCopyObject %int %69
+         %ubo_ptr = OpAccessChain %_ptr_Uniform_v4float %ubos %66 %int_0 %70
+         %ubo_ptr_copy = OpCopyObject %_ptr_Uniform_v4float %ubo_ptr
+         %73 = OpLoad %v4float %ubo_ptr_copy
+         %74 = OpLoad %v4float %FragColor
+         %75 = OpFAdd %v4float %74 %73
+               OpStore %FragColor %75
+         %81 = OpLoad %int %i
+         %83 = OpIAdd %int %81 %int_50
+         %84 = OpCopyObject %int %83
+         %85 = OpLoad %int %i
+         %87 = OpIAdd %int %85 %int_60
+         %88 = OpCopyObject %int %87
+         %ssbo_ptr = OpAccessChain %_ptr_Uniform_v4float %ssbos %84 %int_0 %88
+         %ssbo_ptr_copy = OpCopyObject %_ptr_Uniform_v4float %ssbo_ptr
+         %90 = OpLoad %v4float %ssbo_ptr_copy
+         %91 = OpLoad %v4float %FragColor
+         %92 = OpFAdd %v4float %91 %90
+               OpStore %FragColor %92
+               OpReturn
+               OpFunctionEnd
--- a/shaders-hlsl/asm/frag/texture-sampling-fp16.asm.frag
+++ b/shaders-hlsl/asm/frag/texture-sampling-fp16.asm.frag
@ -0,0 +1,46 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 25
+; Schema: 0
+               OpCapability Shader
+               OpCapability StorageInputOutput16
+               OpExtension "SPV_KHR_16bit_storage"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %FragColor %UV
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpSourceExtension "GL_EXT_shader_explicit_arithmetic_types_float16"
+               OpName %main "main"
+               OpName %FragColor "FragColor"
+               OpName %uTexture "uTexture"
+               OpName %UV "UV"
+               OpDecorate %FragColor Location 0
+               OpDecorate %uTexture DescriptorSet 0
+               OpDecorate %uTexture Binding 0
+               OpDecorate %UV Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+       %half = OpTypeFloat 16
+       %float = OpTypeFloat 32
+     %v4half = OpTypeVector %half 4
+     %v4float = OpTypeVector %float 4
+%_ptr_Output_v4half = OpTypePointer Output %v4half
+  %FragColor = OpVariable %_ptr_Output_v4half Output
+         %11 = OpTypeImage %float 2D 0 0 0 1 Unknown
+         %12 = OpTypeSampledImage %11
+%_ptr_UniformConstant_12 = OpTypePointer UniformConstant %12
+   %uTexture = OpVariable %_ptr_UniformConstant_12 UniformConstant
+     %v2half = OpTypeVector %half 2
+%_ptr_Input_v2half = OpTypePointer Input %v2half
+         %UV = OpVariable %_ptr_Input_v2half Input
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %15 = OpLoad %12 %uTexture
+         %19 = OpLoad %v2half %UV
+         %23 = OpImageSampleImplicitLod %v4float %15 %19
+		 %24 = OpFConvert %v4half %23
+               OpStore %FragColor %24
+               OpReturn
+               OpFunctionEnd
--- a/shaders-hlsl/comp/outer-product.comp
+++ b/shaders-hlsl/comp/outer-product.comp
@ -0,0 +1,37 @@
+#version 450
+layout(local_size_x = 1) in;
+
+layout(set = 0, binding = 0, std430) writeonly buffer SSBO
+{
+	mat2 m22;
+	mat2x3 m23;
+	mat2x4 m24;
+	mat3x2 m32;
+	mat3 m33;
+	mat3x4 m34;
+	mat4x2 m42;
+	mat4x3 m43;
+	mat4 m44;
+};
+
+layout(set = 0, binding = 1, std430) readonly buffer ReadSSBO
+{
+	vec2 v2;
+	vec3 v3;
+	vec4 v4;
+};
+
+void main()
+{
+	m22 = outerProduct(v2, v2);
+	m23 = outerProduct(v3, v2);
+	m24 = outerProduct(v4, v2);
+
+	m32 = outerProduct(v2, v3);
+	m33 = outerProduct(v3, v3);
+	m34 = outerProduct(v4, v3);
+
+	m42 = outerProduct(v2, v4);
+	m43 = outerProduct(v3, v4);
+	m44 = outerProduct(v4, v4);
+}
--- a/shaders-hlsl/comp/scalar-std450-distance-length-normalize.comp
+++ b/shaders-hlsl/comp/scalar-std450-distance-length-normalize.comp
@ -0,0 +1,18 @@
+#version 450
+layout(local_size_x = 1) in;
+
+layout(std430, set = 0, binding = 0) buffer SSBO
+{
+	float a;
+	float b;
+	float c;
+	float d;
+	float e;
+};
+
+void main()
+{
+	c = distance(a, b);
+	d = length(a);
+	e = normalize(a);
+}
--- a/shaders-hlsl/frag/for-loop-continue-control-flow.frag
+++ b/shaders-hlsl/frag/for-loop-continue-control-flow.frag
@ -0,0 +1,11 @@
+#version 450
+layout(location = 0) out vec4 FragColor;
+void main()
+{
+	FragColor = vec4(0.0);
+	for (int i = 0; i < 3; (0 > 1) ? 1 : i ++)
+	{
+		int a = i;
+		FragColor[a] += float(i);
+	}
+}
--- a/shaders-hlsl/frag/scalar-refract-reflect.frag
+++ b/shaders-hlsl/frag/scalar-refract-reflect.frag
@ -0,0 +1,11 @@
+#version 450
+layout(location = 0) out float FragColor;
+layout(location = 0) in vec3 vRefract; 
+
+void main()
+{
+	FragColor = refract(vRefract.x, vRefract.y, vRefract.z);
+	FragColor += reflect(vRefract.x, vRefract.y);
+	FragColor += refract(vRefract.xy, vRefract.yz, vRefract.z).y;
+	FragColor += reflect(vRefract.xy, vRefract.zy).y;
+}
--- a/shaders-msl/asm/frag/texture-sampling-fp16.asm.frag
+++ b/shaders-msl/asm/frag/texture-sampling-fp16.asm.frag
@ -0,0 +1,46 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 25
+; Schema: 0
+               OpCapability Shader
+               OpCapability StorageInputOutput16
+               OpExtension "SPV_KHR_16bit_storage"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %FragColor %UV
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpSourceExtension "GL_EXT_shader_explicit_arithmetic_types_float16"
+               OpName %main "main"
+               OpName %FragColor "FragColor"
+               OpName %uTexture "uTexture"
+               OpName %UV "UV"
+               OpDecorate %FragColor Location 0
+               OpDecorate %uTexture DescriptorSet 0
+               OpDecorate %uTexture Binding 0
+               OpDecorate %UV Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+       %half = OpTypeFloat 16
+       %float = OpTypeFloat 32
+     %v4half = OpTypeVector %half 4
+     %v4float = OpTypeVector %float 4
+%_ptr_Output_v4half = OpTypePointer Output %v4half
+  %FragColor = OpVariable %_ptr_Output_v4half Output
+         %11 = OpTypeImage %float 2D 0 0 0 1 Unknown
+         %12 = OpTypeSampledImage %11
+%_ptr_UniformConstant_12 = OpTypePointer UniformConstant %12
+   %uTexture = OpVariable %_ptr_UniformConstant_12 UniformConstant
+     %v2half = OpTypeVector %half 2
+%_ptr_Input_v2half = OpTypePointer Input %v2half
+         %UV = OpVariable %_ptr_Input_v2half Input
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %15 = OpLoad %12 %uTexture
+         %19 = OpLoad %v2half %UV
+         %23 = OpImageSampleImplicitLod %v4float %15 %19
+		 %24 = OpFConvert %v4half %23
+               OpStore %FragColor %24
+               OpReturn
+               OpFunctionEnd
--- a/shaders-msl/comp/outer-product.comp
+++ b/shaders-msl/comp/outer-product.comp
@ -0,0 +1,37 @@
+#version 450
+layout(local_size_x = 1) in;
+
+layout(set = 0, binding = 0, std430) writeonly buffer SSBO
+{
+	mat2 m22;
+	mat2x3 m23;
+	mat2x4 m24;
+	mat3x2 m32;
+	mat3 m33;
+	mat3x4 m34;
+	mat4x2 m42;
+	mat4x3 m43;
+	mat4 m44;
+};
+
+layout(set = 0, binding = 1, std430) readonly buffer ReadSSBO
+{
+	vec2 v2;
+	vec3 v3;
+	vec4 v4;
+};
+
+void main()
+{
+	m22 = outerProduct(v2, v2);
+	m23 = outerProduct(v3, v2);
+	m24 = outerProduct(v4, v2);
+
+	m32 = outerProduct(v2, v3);
+	m33 = outerProduct(v3, v3);
+	m34 = outerProduct(v4, v3);
+
+	m42 = outerProduct(v2, v4);
+	m43 = outerProduct(v3, v4);
+	m44 = outerProduct(v4, v4);
+}
--- a/shaders-msl/comp/scalar-std450-distance-length-normalize.comp
+++ b/shaders-msl/comp/scalar-std450-distance-length-normalize.comp
@ -0,0 +1,18 @@
+#version 450
+layout(local_size_x = 1) in;
+
+layout(std430, set = 0, binding = 0) buffer SSBO
+{
+	float a;
+	float b;
+	float c;
+	float d;
+	float e;
+};
+
+void main()
+{
+	c = distance(a, b);
+	d = length(a);
+	e = normalize(a);
+}
--- a/shaders-msl/frag/for-loop-continue-control-flow.frag
+++ b/shaders-msl/frag/for-loop-continue-control-flow.frag
@ -0,0 +1,11 @@
+#version 450
+layout(location = 0) out vec4 FragColor;
+void main()
+{
+	FragColor = vec4(0.0);
+	for (int i = 0; i < 3; (0 > 1) ? 1 : i ++)
+	{
+		int a = i;
+		FragColor[a] += float(i);
+	}
+}
--- a/shaders-msl/frag/scalar-refract-reflect.frag
+++ b/shaders-msl/frag/scalar-refract-reflect.frag
@ -0,0 +1,11 @@
+#version 450
+layout(location = 0) out float FragColor;
+layout(location = 0) in vec3 vRefract; 
+
+void main()
+{
+	FragColor = refract(vRefract.x, vRefract.y, vRefract.z);
+	FragColor += reflect(vRefract.x, vRefract.y);
+	FragColor += refract(vRefract.xy, vRefract.yz, vRefract.z).y;
+	FragColor += reflect(vRefract.xy, vRefract.zy).y;
+}
--- a/shaders-msl/frag/subgroup-builtins.msl22.frag
+++ b/shaders-msl/frag/subgroup-builtins.msl22.frag
@ -0,0 +1,10 @@
+#version 450
+#extension GL_KHR_shader_subgroup_basic : require
+
+layout(location = 0) out uvec2 FragColor;
+
+void main()
+{
+	FragColor.x = gl_SubgroupSize;
+	FragColor.y = gl_SubgroupInvocationID;
+}
--- a/shaders-msl/vulkan/frag/basic.multiview.nocompat.vk.frag
+++ b/shaders-msl/vulkan/frag/basic.multiview.nocompat.vk.frag
@ -0,0 +1,14 @@
+#version 310 es
+#extension GL_EXT_multiview : require
+precision mediump float;
+
+layout(location = 0) in vec4 vColor;
+layout(location = 1) in vec2 vTex[4];
+layout(binding = 0) uniform sampler2D uTex;
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+    FragColor = vColor * texture(uTex, vTex[gl_ViewIndex]);
+}
+
--- a/shaders-msl/vulkan/frag/subgroups.nocompat.invalid.vk.msl21.frag
+++ b/shaders-msl/vulkan/frag/subgroups.nocompat.invalid.vk.msl21.frag
@ -0,0 +1,119 @@
+#version 450
+#extension GL_KHR_shader_subgroup_basic : require
+#extension GL_KHR_shader_subgroup_ballot : require
+#extension GL_KHR_shader_subgroup_vote : require
+#extension GL_KHR_shader_subgroup_shuffle : require
+#extension GL_KHR_shader_subgroup_shuffle_relative : require
+#extension GL_KHR_shader_subgroup_arithmetic : require
+#extension GL_KHR_shader_subgroup_clustered : require
+#extension GL_KHR_shader_subgroup_quad : require
+
+layout(location = 0) out float FragColor;
+
+void main()
+{
+	// basic
+	FragColor = float(gl_SubgroupSize);
+	FragColor = float(gl_SubgroupInvocationID);
+	subgroupBarrier();
+	subgroupMemoryBarrier();
+	subgroupMemoryBarrierBuffer();
+	subgroupMemoryBarrierImage();
+	bool elected = subgroupElect();
+
+	// ballot
+	FragColor = float(gl_SubgroupEqMask);
+	FragColor = float(gl_SubgroupGeMask);
+	FragColor = float(gl_SubgroupGtMask);
+	FragColor = float(gl_SubgroupLeMask);
+	FragColor = float(gl_SubgroupLtMask);
+	vec4 broadcasted = subgroupBroadcast(vec4(10.0), 8u);
+	vec3 first = subgroupBroadcastFirst(vec3(20.0));
+	uvec4 ballot_value = subgroupBallot(true);
+	bool inverse_ballot_value = subgroupInverseBallot(ballot_value);
+	bool bit_extracted = subgroupBallotBitExtract(uvec4(10u), 8u);
+	uint bit_count = subgroupBallotBitCount(ballot_value);
+	uint inclusive_bit_count = subgroupBallotInclusiveBitCount(ballot_value);
+	uint exclusive_bit_count = subgroupBallotExclusiveBitCount(ballot_value);
+	uint lsb = subgroupBallotFindLSB(ballot_value);
+	uint msb = subgroupBallotFindMSB(ballot_value);
+
+	// shuffle
+	uint shuffled = subgroupShuffle(10u, 8u);
+	uint shuffled_xor = subgroupShuffleXor(30u, 8u);
+
+	// shuffle relative 
+	uint shuffled_up = subgroupShuffleUp(20u, 4u);
+	uint shuffled_down = subgroupShuffleDown(20u, 4u);
+
+	// vote
+	bool has_all = subgroupAll(true);
+	bool has_any = subgroupAny(true);
+	bool has_equal = subgroupAllEqual(0);
+	has_equal = subgroupAllEqual(true);
+
+	// arithmetic
+	vec4 added = subgroupAdd(vec4(20.0));
+	ivec4 iadded = subgroupAdd(ivec4(20));
+	vec4 multiplied = subgroupMul(vec4(20.0));
+	ivec4 imultiplied = subgroupMul(ivec4(20));
+	vec4 lo = subgroupMin(vec4(20.0));
+	vec4 hi = subgroupMax(vec4(20.0));
+	ivec4 slo = subgroupMin(ivec4(20));
+	ivec4 shi = subgroupMax(ivec4(20));
+	uvec4 ulo = subgroupMin(uvec4(20));
+	uvec4 uhi = subgroupMax(uvec4(20));
+	uvec4 anded = subgroupAnd(ballot_value);
+	uvec4 ored = subgroupOr(ballot_value);
+	uvec4 xored = subgroupXor(ballot_value);
+
+	added = subgroupInclusiveAdd(added);
+	iadded = subgroupInclusiveAdd(iadded);
+	multiplied = subgroupInclusiveMul(multiplied);
+	imultiplied = subgroupInclusiveMul(imultiplied);
+	//lo = subgroupInclusiveMin(lo);  // FIXME: Unsupported by Metal
+	//hi = subgroupInclusiveMax(hi);
+	//slo = subgroupInclusiveMin(slo);
+	//shi = subgroupInclusiveMax(shi);
+	//ulo = subgroupInclusiveMin(ulo);
+	//uhi = subgroupInclusiveMax(uhi);
+	//anded = subgroupInclusiveAnd(anded);
+	//ored = subgroupInclusiveOr(ored);
+	//xored = subgroupInclusiveXor(ored);
+	//added = subgroupExclusiveAdd(lo);
+
+	added = subgroupExclusiveAdd(multiplied);
+	multiplied = subgroupExclusiveMul(multiplied);
+	iadded = subgroupExclusiveAdd(imultiplied);
+	imultiplied = subgroupExclusiveMul(imultiplied);
+	//lo = subgroupExclusiveMin(lo);  // FIXME: Unsupported by Metal
+	//hi = subgroupExclusiveMax(hi);
+	//ulo = subgroupExclusiveMin(ulo);
+	//uhi = subgroupExclusiveMax(uhi);
+	//slo = subgroupExclusiveMin(slo);
+	//shi = subgroupExclusiveMax(shi);
+	//anded = subgroupExclusiveAnd(anded);
+	//ored = subgroupExclusiveOr(ored);
+	//xored = subgroupExclusiveXor(ored);
+
+	// clustered
+	added = subgroupClusteredAdd(added, 4u);
+	multiplied = subgroupClusteredMul(multiplied, 4u);
+	iadded = subgroupClusteredAdd(iadded, 4u);
+	imultiplied = subgroupClusteredMul(imultiplied, 4u);
+	lo = subgroupClusteredMin(lo, 4u);
+	hi = subgroupClusteredMax(hi, 4u);
+	ulo = subgroupClusteredMin(ulo, 4u);
+	uhi = subgroupClusteredMax(uhi, 4u);
+	slo = subgroupClusteredMin(slo, 4u);
+	shi = subgroupClusteredMax(shi, 4u);
+	anded = subgroupClusteredAnd(anded, 4u);
+	ored = subgroupClusteredOr(ored, 4u);
+	xored = subgroupClusteredXor(xored, 4u);
+
+	// quad
+	vec4 swap_horiz = subgroupQuadSwapHorizontal(vec4(20.0));
+	vec4 swap_vertical = subgroupQuadSwapVertical(vec4(20.0));
+	vec4 swap_diagonal = subgroupQuadSwapDiagonal(vec4(20.0));
+	vec4 quad_broadcast = subgroupQuadBroadcast(vec4(20.0), 3u);
+}
--- a/shaders-msl/vulkan/vert/multiview.multiview.nocompat.vk.vert
+++ b/shaders-msl/vulkan/vert/multiview.multiview.nocompat.vk.vert
@ -0,0 +1,14 @@
+#version 310 es
+#extension GL_EXT_multiview : require
+
+layout(std140, binding = 0) uniform MVPs
+{
+	mat4 MVP[2];
+};
+
+layout(location = 0) in vec4 Position;
+
+void main()
+{
+	gl_Position = MVP[gl_ViewIndex] * Position;
+}
--- a/shaders-msl/vulkan/vert/multiview.nocompat.vk.vert
+++ b/shaders-msl/vulkan/vert/multiview.nocompat.vk.vert
@ -0,0 +1,14 @@
+#version 310 es
+#extension GL_EXT_multiview : require
+
+layout(std140, binding = 0) uniform MVPs
+{
+	mat4 MVP[2];
+};
+
+layout(location = 0) in vec4 Position;
+
+void main()
+{
+	gl_Position = MVP[gl_ViewIndex] * Position;
+}
--- a/shaders-no-opt/asm/frag/nonuniform-qualifier-propagation.vk.nocompat.asm.frag
+++ b/shaders-no-opt/asm/frag/nonuniform-qualifier-propagation.vk.nocompat.asm.frag
@ -0,0 +1,159 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 93
+; Schema: 0
+               OpCapability Shader
+               OpCapability ShaderNonUniformEXT
+               OpCapability RuntimeDescriptorArrayEXT
+               OpCapability UniformBufferArrayNonUniformIndexingEXT
+               OpCapability SampledImageArrayNonUniformIndexingEXT
+               OpCapability StorageBufferArrayNonUniformIndexingEXT
+               OpExtension "SPV_EXT_descriptor_indexing"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %vIndex %FragColor %vUV
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpSourceExtension "GL_EXT_nonuniform_qualifier"
+               OpName %main "main"
+               OpName %i "i"
+               OpName %vIndex "vIndex"
+               OpName %FragColor "FragColor"
+               OpName %uSamplers "uSamplers"
+               OpName %uSamps "uSamps"
+               OpName %vUV "vUV"
+               OpName %uCombinedSamplers "uCombinedSamplers"
+               OpName %UBO "UBO"
+               OpMemberName %UBO 0 "v"
+               OpName %ubos "ubos"
+               OpName %SSBO "SSBO"
+               OpMemberName %SSBO 0 "v"
+               OpName %ssbos "ssbos"
+               OpDecorate %vIndex Flat
+               OpDecorate %vIndex Location 0
+               OpDecorate %FragColor Location 0
+               OpDecorate %uSamplers DescriptorSet 0
+               OpDecorate %uSamplers Binding 0
+
+               OpDecorate %sampled_image NonUniformEXT
+               OpDecorate %combined_sampler NonUniformEXT
+               OpDecorate %ubo_ptr_copy NonUniformEXT
+               OpDecorate %ssbo_ptr_copy NonUniformEXT
+
+               OpDecorate %uSamps DescriptorSet 0
+               OpDecorate %uSamps Binding 1
+               OpDecorate %vUV Location 1
+               OpDecorate %uCombinedSamplers DescriptorSet 0
+               OpDecorate %uCombinedSamplers Binding 4
+               OpDecorate %_arr_v4float_uint_64 ArrayStride 16
+               OpMemberDecorate %UBO 0 Offset 0
+               OpDecorate %UBO Block
+               OpDecorate %ubos DescriptorSet 0
+               OpDecorate %ubos Binding 2
+               OpDecorate %_runtimearr_v4float ArrayStride 16
+               OpMemberDecorate %SSBO 0 NonWritable
+               OpMemberDecorate %SSBO 0 Offset 0
+               OpDecorate %SSBO BufferBlock
+               OpDecorate %ssbos DescriptorSet 0
+               OpDecorate %ssbos Binding 3
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+        %int = OpTypeInt 32 1
+%_ptr_Function_int = OpTypePointer Function %int
+%_ptr_Input_int = OpTypePointer Input %int
+     %vIndex = OpVariable %_ptr_Input_int Input
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+         %16 = OpTypeImage %float 2D 0 0 0 1 Unknown
+%_runtimearr_16 = OpTypeRuntimeArray %16
+%_ptr_UniformConstant__runtimearr_16 = OpTypePointer UniformConstant %_runtimearr_16
+  %uSamplers = OpVariable %_ptr_UniformConstant__runtimearr_16 UniformConstant
+     %int_10 = OpConstant %int 10
+%_ptr_UniformConstant_16 = OpTypePointer UniformConstant %16
+         %27 = OpTypeSampler
+%_runtimearr_27 = OpTypeRuntimeArray %27
+%_ptr_UniformConstant__runtimearr_27 = OpTypePointer UniformConstant %_runtimearr_27
+     %uSamps = OpVariable %_ptr_UniformConstant__runtimearr_27 UniformConstant
+     %int_40 = OpConstant %int 40
+%_ptr_UniformConstant_27 = OpTypePointer UniformConstant %27
+         %38 = OpTypeSampledImage %16
+    %v2float = OpTypeVector %float 2
+%_ptr_Input_v2float = OpTypePointer Input %v2float
+        %vUV = OpVariable %_ptr_Input_v2float Input
+%_runtimearr_38 = OpTypeRuntimeArray %38
+%_ptr_UniformConstant__runtimearr_38 = OpTypePointer UniformConstant %_runtimearr_38
+%uCombinedSamplers = OpVariable %_ptr_UniformConstant__runtimearr_38 UniformConstant
+%_ptr_UniformConstant_38 = OpTypePointer UniformConstant %38
+       %uint = OpTypeInt 32 0
+    %uint_64 = OpConstant %uint 64
+%_arr_v4float_uint_64 = OpTypeArray %v4float %uint_64
+        %UBO = OpTypeStruct %_arr_v4float_uint_64
+%_runtimearr_UBO = OpTypeRuntimeArray %UBO
+%_ptr_Uniform__runtimearr_UBO = OpTypePointer Uniform %_runtimearr_UBO
+       %ubos = OpVariable %_ptr_Uniform__runtimearr_UBO Uniform
+     %int_20 = OpConstant %int 20
+      %int_0 = OpConstant %int 0
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+%_runtimearr_v4float = OpTypeRuntimeArray %v4float
+       %SSBO = OpTypeStruct %_runtimearr_v4float
+%_runtimearr_SSBO = OpTypeRuntimeArray %SSBO
+%_ptr_Uniform__runtimearr_SSBO = OpTypePointer Uniform %_runtimearr_SSBO
+      %ssbos = OpVariable %_ptr_Uniform__runtimearr_SSBO Uniform
+     %int_50 = OpConstant %int 50
+     %int_60 = OpConstant %int 60
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+          %i = OpVariable %_ptr_Function_int Function
+         %11 = OpLoad %int %vIndex
+               OpStore %i %11
+         %20 = OpLoad %int %i
+         %22 = OpIAdd %int %20 %int_10
+         %23 = OpCopyObject %int %22
+         %25 = OpAccessChain %_ptr_UniformConstant_16 %uSamplers %23
+         %26 = OpLoad %16 %25
+         %31 = OpLoad %int %i
+         %33 = OpIAdd %int %31 %int_40
+         %34 = OpCopyObject %int %33
+         %36 = OpAccessChain %_ptr_UniformConstant_27 %uSamps %34
+         %37 = OpLoad %27 %36
+         %sampled_image = OpSampledImage %38 %26 %37
+         %43 = OpLoad %v2float %vUV
+         %44 = OpImageSampleImplicitLod %v4float %sampled_image %43
+               OpStore %FragColor %44
+         %48 = OpLoad %int %i
+         %49 = OpIAdd %int %48 %int_10
+         %50 = OpCopyObject %int %49
+         %52 = OpAccessChain %_ptr_UniformConstant_38 %uCombinedSamplers %50
+         %combined_sampler = OpLoad %38 %52
+         %54 = OpLoad %v2float %vUV
+         %55 = OpImageSampleImplicitLod %v4float %combined_sampler %54
+               OpStore %FragColor %55
+         %63 = OpLoad %int %i
+         %65 = OpIAdd %int %63 %int_20
+         %66 = OpCopyObject %int %65
+         %68 = OpLoad %int %i
+         %69 = OpIAdd %int %68 %int_40
+         %70 = OpCopyObject %int %69
+         %ubo_ptr = OpAccessChain %_ptr_Uniform_v4float %ubos %66 %int_0 %70
+         %ubo_ptr_copy = OpCopyObject %_ptr_Uniform_v4float %ubo_ptr
+         %73 = OpLoad %v4float %ubo_ptr_copy
+         %74 = OpLoad %v4float %FragColor
+         %75 = OpFAdd %v4float %74 %73
+               OpStore %FragColor %75
+         %81 = OpLoad %int %i
+         %83 = OpIAdd %int %81 %int_50
+         %84 = OpCopyObject %int %83
+         %85 = OpLoad %int %i
+         %87 = OpIAdd %int %85 %int_60
+         %88 = OpCopyObject %int %87
+         %ssbo_ptr = OpAccessChain %_ptr_Uniform_v4float %ssbos %84 %int_0 %88
+         %ssbo_ptr_copy = OpCopyObject %_ptr_Uniform_v4float %ssbo_ptr
+         %90 = OpLoad %v4float %ssbo_ptr_copy
+         %91 = OpLoad %v4float %FragColor
+         %92 = OpFAdd %v4float %91 %90
+               OpStore %FragColor %92
+               OpReturn
+               OpFunctionEnd
--- a/shaders/asm/comp/phi-temporary-copy-loop-variable.asm.comp
+++ b/shaders/asm/comp/phi-temporary-copy-loop-variable.asm.comp
@ -0,0 +1,68 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Google spiregg; 0
+; Bound: 42
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %cs_test "main" %gl_GlobalInvocationID %gl_LocalInvocationIndex
+               OpExecutionMode %cs_test LocalSize 8 8 1
+               OpSource HLSL 600
+               OpName %type_2d_image "type.2d.image"
+               OpName %outImageTexture "outImageTexture"
+               OpName %cs_test "cs_test"
+               OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
+               OpDecorate %gl_LocalInvocationIndex BuiltIn LocalInvocationIndex
+               OpDecorate %outImageTexture DescriptorSet 0
+               OpDecorate %outImageTexture Binding 1
+      %float = OpTypeFloat 32
+    %float_5 = OpConstant %float 5
+    %float_1 = OpConstant %float 1
+        %int = OpTypeInt 32 1
+      %int_7 = OpConstant %int 7
+      %int_0 = OpConstant %int 0
+      %int_1 = OpConstant %int 1
+%type_2d_image = OpTypeImage %float 2D 2 0 0 2 Rgba32f
+%_ptr_UniformConstant_type_2d_image = OpTypePointer UniformConstant %type_2d_image
+       %uint = OpTypeInt 32 0
+     %v3uint = OpTypeVector %uint 3
+%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_Input_uint = OpTypePointer Input %uint
+       %void = OpTypeVoid
+         %19 = OpTypeFunction %void
+     %v2uint = OpTypeVector %uint 2
+    %v4float = OpTypeVector %float 4
+       %bool = OpTypeBool
+%outImageTexture = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant
+%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_LocalInvocationIndex = OpVariable %_ptr_Input_uint Input
+    %cs_test = OpFunction %void None %19
+         %23 = OpLabel
+         %24 = OpLoad %v3uint %gl_GlobalInvocationID
+         %25 = OpVectorShuffle %v2uint %24 %24 0 1
+               OpBranch %26
+         %26 = OpLabel
+         %27 = OpPhi %int %int_7 %23 %28 %29
+         %30 = OpPhi %int %int_7 %23 %27 %29
+         %31 = OpSGreaterThanEqual %bool %27 %int_0
+               OpLoopMerge %32 %29 None
+               OpBranchConditional %31 %33 %32
+         %33 = OpLabel
+         %34 = OpConvertSToF %float %27
+         %35 = OpFOrdGreaterThan %bool %float_5 %34
+               OpSelectionMerge %29 None
+               OpBranchConditional %35 %36 %29
+         %36 = OpLabel
+               OpBranch %32
+         %29 = OpLabel
+         %28 = OpISub %int %27 %int_1
+               OpBranch %26
+         %32 = OpLabel
+         %37 = OpISub %int %30 %int_1
+         %38 = OpConvertSToF %float %37
+         %39 = OpConvertSToF %float %30
+         %40 = OpCompositeConstruct %v4float %38 %39 %float_1 %float_1
+         %41 = OpLoad %type_2d_image %outImageTexture
+               OpImageWrite %41 %25 %40 None
+               OpReturn
+               OpFunctionEnd
--- a/shaders/asm/extended-debug-extinst.invalid.asm.comp
+++ b/shaders/asm/extended-debug-extinst.invalid.asm.comp
@ -0,0 +1,67 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Khronos SPIR-V Tools Assembler; 0
+; Bound: 37
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "DebugInfo"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %2 "main" %3
+               OpExecutionMode %2 LocalSize 1 1 1
+          %4 = OpString "negateInputs.comp"
+          %5 = OpString "negateInputs"
+          %6 = OpString "main"
+          %7 = OpString ""
+               OpSource GLSL 430
+               OpName %2 "main"
+               OpName %3 "gl_GlobalInvocationID"
+               OpDecorate %3 BuiltIn GlobalInvocationId
+               OpDecorate %8 BufferBlock
+               OpDecorate %9 DescriptorSet 0
+               OpDecorate %9 Binding 0
+               OpDecorate %10 DescriptorSet 0
+               OpDecorate %10 Binding 1
+               OpDecorate %11 ArrayStride 4
+               OpMemberDecorate %8 0 Offset 0
+               OpLine %4 0 0
+         %12 = OpTypeBool
+         %13 = OpTypeVoid
+         %14 = OpTypeFunction %13
+         %15 = OpTypeInt 32 0
+         %16 = OpTypeInt 32 1
+         %17 = OpTypeFloat 32
+         %18 = OpTypeVector %15 3
+         %19 = OpTypeVector %17 3
+         %20 = OpTypePointer Input %18
+         %21 = OpTypePointer Uniform %16
+         %22 = OpTypePointer Uniform %17
+         %23 = OpTypeRuntimeArray %16
+         %11 = OpTypeRuntimeArray %17
+          %8 = OpTypeStruct %11
+         %24 = OpTypePointer Uniform %8
+          %9 = OpVariable %24 Uniform
+         %10 = OpVariable %24 Uniform
+               OpLine %4 0 1
+               OpLine %5 1 0
+               OpLine %4 1000 100000
+          %3 = OpVariable %20 Input
+         %25 = OpConstant %16 0
+		 	OpNoLine
+               OpLine %4 1 1
+         %26 = OpExtInst %13 %1 DebugInfoNone
+         %27 = OpExtInst %13 %1 DebugTypeFunction %13
+         %28 = OpExtInst %13 %1 DebugFunction %6 %27 %4 1 1 %4 %7 FlagIsDefinition|FlagPrototyped|FlagIsOptimized 1 %26 %26
+          %2 = OpFunction %13 None %14
+         %29 = OpLabel
+         %30 = OpExtInst %13 %1 DebugScope %28
+               OpLine %4 1 1
+         %31 = OpLoad %18 %3
+         %32 = OpCompositeExtract %15 %31 0
+         %33 = OpAccessChain %22 %9 %25 %32
+         %34 = OpLoad %17 %33
+         %35 = OpFNegate %17 %34
+         %36 = OpAccessChain %22 %10 %25 %32
+               OpStore %36 %35
+			   OpNoLine
+               OpReturn
+               OpFunctionEnd
--- a/shaders/asm/frag/texture-sampling-fp16.asm.vk.frag
+++ b/shaders/asm/frag/texture-sampling-fp16.asm.vk.frag
@ -0,0 +1,46 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 25
+; Schema: 0
+               OpCapability Shader
+               OpCapability StorageInputOutput16
+               OpExtension "SPV_KHR_16bit_storage"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %FragColor %UV
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpSourceExtension "GL_EXT_shader_explicit_arithmetic_types_float16"
+               OpName %main "main"
+               OpName %FragColor "FragColor"
+               OpName %uTexture "uTexture"
+               OpName %UV "UV"
+               OpDecorate %FragColor Location 0
+               OpDecorate %uTexture DescriptorSet 0
+               OpDecorate %uTexture Binding 0
+               OpDecorate %UV Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+       %half = OpTypeFloat 16
+       %float = OpTypeFloat 32
+     %v4half = OpTypeVector %half 4
+     %v4float = OpTypeVector %float 4
+%_ptr_Output_v4half = OpTypePointer Output %v4half
+  %FragColor = OpVariable %_ptr_Output_v4half Output
+         %11 = OpTypeImage %float 2D 0 0 0 1 Unknown
+         %12 = OpTypeSampledImage %11
+%_ptr_UniformConstant_12 = OpTypePointer UniformConstant %12
+   %uTexture = OpVariable %_ptr_UniformConstant_12 UniformConstant
+     %v2half = OpTypeVector %half 2
+%_ptr_Input_v2half = OpTypePointer Input %v2half
+         %UV = OpVariable %_ptr_Input_v2half Input
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %15 = OpLoad %12 %uTexture
+         %19 = OpLoad %v2half %UV
+         %23 = OpImageSampleImplicitLod %v4float %15 %19
+		 %24 = OpFConvert %v4half %23
+               OpStore %FragColor %24
+               OpReturn
+               OpFunctionEnd
--- a/shaders/comp/outer-product.comp
+++ b/shaders/comp/outer-product.comp
@ -0,0 +1,37 @@
+#version 450
+layout(local_size_x = 1) in;
+
+layout(set = 0, binding = 0, std430) writeonly buffer SSBO
+{
+	mat2 m22;
+	mat2x3 m23;
+	mat2x4 m24;
+	mat3x2 m32;
+	mat3 m33;
+	mat3x4 m34;
+	mat4x2 m42;
+	mat4x3 m43;
+	mat4 m44;
+};
+
+layout(set = 0, binding = 1, std430) readonly buffer ReadSSBO
+{
+	vec2 v2;
+	vec3 v3;
+	vec4 v4;
+};
+
+void main()
+{
+	m22 = outerProduct(v2, v2);
+	m23 = outerProduct(v3, v2);
+	m24 = outerProduct(v4, v2);
+
+	m32 = outerProduct(v2, v3);
+	m33 = outerProduct(v3, v3);
+	m34 = outerProduct(v4, v3);
+
+	m42 = outerProduct(v2, v4);
+	m43 = outerProduct(v3, v4);
+	m44 = outerProduct(v4, v4);
+}
--- a/shaders/comp/scalar-std450-distance-length-normalize.comp
+++ b/shaders/comp/scalar-std450-distance-length-normalize.comp
@ -0,0 +1,18 @@
+#version 450
+layout(local_size_x = 1) in;
+
+layout(std430, set = 0, binding = 0) buffer SSBO
+{
+	float a;
+	float b;
+	float c;
+	float d;
+	float e;
+};
+
+void main()
+{
+	c = distance(a, b);
+	d = length(a);
+	e = normalize(a);
+}
--- a/shaders/frag/for-loop-continue-control-flow.frag
+++ b/shaders/frag/for-loop-continue-control-flow.frag
@ -0,0 +1,11 @@
+#version 450
+layout(location = 0) out vec4 FragColor;
+void main()
+{
+	FragColor = vec4(0.0);
+	for (int i = 0; i < 3; (0 > 1) ? 1 : i ++)
+	{
+		int a = i;
+		FragColor[a] += float(i);
+	}
+}
--- a/shaders/frag/scalar-refract-reflect.frag
+++ b/shaders/frag/scalar-refract-reflect.frag
@ -0,0 +1,11 @@
+#version 450
+layout(location = 0) out float FragColor;
+layout(location = 0) in vec3 vRefract; 
+
+void main()
+{
+	FragColor = refract(vRefract.x, vRefract.y, vRefract.z);
+	FragColor += reflect(vRefract.x, vRefract.y);
+	FragColor += refract(vRefract.xy, vRefract.yz, vRefract.z).y;
+	FragColor += reflect(vRefract.xy, vRefract.zy).y;
+}
--- a/shaders/frag/selection-block-dominator.frag
+++ b/shaders/frag/selection-block-dominator.frag
@ -0,0 +1,18 @@
+#version 450
+layout(location = 0) out vec4 FragColor;
+layout(location = 0) flat in int vIndex; 
+
+void main()
+{
+	int v;
+	if (vIndex != 1)
+	{
+		FragColor = vec4(1.0);
+		return;
+	}
+	else
+	{
+		v = 10;
+	}
+	FragColor = vec4(v);
+}
--- a/tests-other/msl_resource_binding.spv
+++ b/tests-other/msl_resource_binding.spv
--- a/tests-other/msl_resource_bindings.cpp
+++ b/tests-other/msl_resource_bindings.cpp
@ -0,0 +1,86 @@
+// Testbench for MSL resource binding APIs.
+// It does not validate output at the moment, but it's useful for ad-hoc testing.
+
+#include <spirv_cross_c.h>
+#include <vector>
+#include <stdio.h>
+#include <stdlib.h>
+
+#define SPVC_CHECKED_CALL(x) do { \
+	if ((x) != SPVC_SUCCESS) { \
+		fprintf(stderr, "Failed at line %d.\n", __LINE__); \
+		exit(1); \
+	} \
+} while(0)
+
+static std::vector<SpvId> read_file(const char *path)
+{
+	long len;
+	FILE *file = fopen(path, "rb");
+
+	if (!file)
+		return {};
+
+	fseek(file, 0, SEEK_END);
+	len = ftell(file);
+	rewind(file);
+
+	std::vector<SpvId> buffer(len / sizeof(SpvId));
+	if (fread(buffer.data(), 1, len, file) != (size_t)len)
+	{
+		fclose(file);
+		return {};
+	}
+
+	fclose(file);
+	return buffer;
+}
+
+int main(int argc, char **argv)
+{
+	if (argc != 2)
+		return EXIT_FAILURE;
+
+	auto buffer = read_file(argv[1]);
+	if (buffer.empty())
+		return EXIT_FAILURE;
+
+	spvc_context ctx;
+	spvc_parsed_ir parsed_ir;
+	spvc_compiler compiler;
+
+	SPVC_CHECKED_CALL(spvc_context_create(&ctx));
+	SPVC_CHECKED_CALL(spvc_context_parse_spirv(ctx, buffer.data(), buffer.size(), &parsed_ir));
+	SPVC_CHECKED_CALL(spvc_context_create_compiler(ctx, SPVC_BACKEND_MSL, parsed_ir, SPVC_CAPTURE_MODE_TAKE_OWNERSHIP, &compiler));
+	SPVC_CHECKED_CALL(spvc_compiler_msl_add_discrete_descriptor_set(compiler, 3));
+
+	spvc_compiler_options opts;
+	SPVC_CHECKED_CALL(spvc_compiler_create_compiler_options(compiler, &opts));
+	SPVC_CHECKED_CALL(spvc_compiler_options_set_bool(opts, SPVC_COMPILER_OPTION_MSL_ARGUMENT_BUFFERS, SPVC_TRUE));
+	SPVC_CHECKED_CALL(spvc_compiler_options_set_uint(opts, SPVC_COMPILER_OPTION_MSL_VERSION, 20000));
+	SPVC_CHECKED_CALL(spvc_compiler_install_compiler_options(compiler, opts));
+
+	spvc_msl_resource_binding binding;
+	spvc_msl_resource_binding_init(&binding);
+	binding.binding = SPVC_MSL_ARGUMENT_BUFFER_BINDING;
+	binding.stage = SpvExecutionModelFragment;
+	binding.desc_set = 0;
+	binding.msl_buffer = 2;
+	SPVC_CHECKED_CALL(spvc_compiler_msl_add_resource_binding(compiler, &binding));
+
+	binding.desc_set = 1;
+	binding.msl_buffer = 3;
+	SPVC_CHECKED_CALL(spvc_compiler_msl_add_resource_binding(compiler, &binding));
+
+	const char *str;
+	SPVC_CHECKED_CALL(spvc_compiler_compile(compiler, &str));
+
+	fprintf(stderr, "Output:\n%s\n", str);
+
+	if (!spvc_compiler_msl_is_resource_used(compiler, SpvExecutionModelFragment, 0, SPVC_MSL_ARGUMENT_BUFFER_BINDING))
+		return EXIT_FAILURE;
+
+	if (!spvc_compiler_msl_is_resource_used(compiler, SpvExecutionModelFragment, 1, SPVC_MSL_ARGUMENT_BUFFER_BINDING))
+		return EXIT_FAILURE;
+}
+