mirror of
https://gitee.com/openharmony/third_party_mesa3d
synced 2024-11-23 07:19:50 +00:00
microsoft: Add CLC frontend and kernel/compute support to DXIL converter
This adds a standalone library which can convert through the pipeline of OpenCL C -> SPIR -> SPIR-V -> NIR -> DXIL. It can add in the libclc implementations of various library functions in the NIR phase, and also massages the NIR to shift it more towards graphics-style compute. This is leveraged by the out-of-tree OpenCLOn12 runtime (https://github.com/microsoft/OpenCLOn12). This is the combination of a lot of commits from our development branch, containing code by several authors. Co-authored-by: Boris Brezillon <boris.brezillon@collabora.com> Co-authored-by: Daniel Stone <daniels@collabora.com> Co-authored-by: Erik Faye-Lund <erik.faye-lund@collabora.com> Acked-by: Jason Ekstrand <jason@jlekstrand.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7565>
This commit is contained in:
parent
1885e356e6
commit
ff05da7f8d
33
meson.build
33
meson.build
@ -299,6 +299,26 @@ if with_aco_tests and not with_amd_vk
|
||||
error('ACO tests require Radv')
|
||||
endif
|
||||
|
||||
_microsoft_clc = get_option('microsoft-clc')
|
||||
if _microsoft_clc == 'auto'
|
||||
with_microsoft_clc = false
|
||||
else
|
||||
with_microsoft_clc = _microsoft_clc == 'true'
|
||||
endif
|
||||
|
||||
if with_microsoft_clc
|
||||
with_clc = true
|
||||
dep_clang = dependency(
|
||||
'clang',
|
||||
method: 'cmake',
|
||||
static: true,
|
||||
modules: [
|
||||
'clangBasic', 'clangCodeGen', 'clangDriver', 'clangFrontend', 'clangFrontendTool',
|
||||
'clangHandleCXX', 'clangHandleLLVM',
|
||||
],
|
||||
)
|
||||
endif
|
||||
|
||||
if host_machine.system() == 'darwin'
|
||||
with_dri_platform = 'apple'
|
||||
pre_args += '-DBUILDING_MESA'
|
||||
@ -1470,8 +1490,13 @@ if with_gallium_opencl
|
||||
'lto', 'option', 'objcarcopts', 'profiledata',
|
||||
]
|
||||
endif
|
||||
if with_microsoft_clc
|
||||
llvm_modules += ['target', 'linker', 'irreader', 'option', 'libdriver']
|
||||
endif
|
||||
|
||||
if with_amd_vk or with_gallium_radeonsi or with_gallium_opencl
|
||||
if with_microsoft_clc
|
||||
_llvm_version = '>= 10.0.0'
|
||||
elif with_amd_vk or with_gallium_radeonsi or with_gallium_opencl
|
||||
_llvm_version = '>= 8.0.0'
|
||||
elif with_gallium_swr
|
||||
_llvm_version = '>= 6.0.0'
|
||||
@ -1521,7 +1546,7 @@ if _llvm != 'disabled'
|
||||
optional_modules : llvm_optional_modules,
|
||||
required : (
|
||||
with_amd_vk or with_gallium_radeonsi or with_gallium_swr or
|
||||
with_gallium_opencl or _llvm == 'enabled'
|
||||
with_gallium_opencl or with_microsoft_clc or _llvm == 'enabled'
|
||||
),
|
||||
static : not _shared_llvm,
|
||||
method : _llvm_method,
|
||||
@ -1564,9 +1589,11 @@ elif with_amd_vk or with_gallium_radeonsi or with_gallium_swr
|
||||
error('The following drivers require LLVM: Radv, RadeonSI, SWR. One of these is enabled, but LLVM is disabled.')
|
||||
elif with_gallium_opencl
|
||||
error('The OpenCL "Clover" state tracker requires LLVM, but LLVM is disabled.')
|
||||
elif with_microsoft_clc
|
||||
error('The Microsoft CLC compiler requires LLVM, but LLVM is disabled.')
|
||||
endif
|
||||
|
||||
with_opencl_spirv = _opencl != 'disabled' and get_option('opencl-spirv')
|
||||
with_opencl_spirv = (_opencl != 'disabled' and get_option('opencl-spirv')) or with_microsoft_clc
|
||||
if with_opencl_spirv
|
||||
chosen_llvm_version_array = dep_llvm.version().split('.')
|
||||
chosen_llvm_version_major = chosen_llvm_version_array[0].to_int()
|
||||
|
@ -261,6 +261,13 @@ option(
|
||||
value : false,
|
||||
description : 'Enable GLVND support.'
|
||||
)
|
||||
option(
|
||||
'microsoft-clc',
|
||||
type : 'combo',
|
||||
value : 'auto',
|
||||
choices : ['auto', 'true', 'false'],
|
||||
description : 'Build support for the Microsoft CLC to DXIL compiler'
|
||||
)
|
||||
option(
|
||||
'glx-read-only-text',
|
||||
type : 'boolean',
|
||||
|
@ -946,9 +946,45 @@ load("global_ir3", [2, 1], indices=[ACCESS, ALIGN_MUL, ALIGN_OFFSET], flags=[CAN
|
||||
intrinsic("bindless_resource_ir3", [1], dest_comp=1, indices=[DESC_SET], flags=[CAN_ELIMINATE, CAN_REORDER])
|
||||
|
||||
# DXIL specific intrinsics
|
||||
# src[] = { value, mask, index, offset }.
|
||||
intrinsic("store_ssbo_masked_dxil", [1, 1, 1, 1])
|
||||
# src[] = { value, index }.
|
||||
intrinsic("store_shared_dxil", [1, 1])
|
||||
# src[] = { value, mask, index }.
|
||||
intrinsic("store_shared_masked_dxil", [1, 1, 1])
|
||||
# src[] = { value, index }.
|
||||
intrinsic("store_scratch_dxil", [1, 1])
|
||||
# src[] = { index }.
|
||||
load("shared_dxil", [1], [], [CAN_ELIMINATE])
|
||||
# src[] = { index }.
|
||||
load("scratch_dxil", [1], [], [CAN_ELIMINATE])
|
||||
# src[] = { deref_var, offset }
|
||||
load("ptr_dxil", [1, 1], [], [])
|
||||
# src[] = { index, 16-byte-based-offset }
|
||||
load("ubo_dxil", [1, 1], [], [CAN_ELIMINATE])
|
||||
|
||||
# DXIL Shared atomic intrinsics
|
||||
#
|
||||
# All of the shared variable atomic memory operations read a value from
|
||||
# memory, compute a new value using one of the operations below, write the
|
||||
# new value to memory, and return the original value read.
|
||||
#
|
||||
# All operations take 2 sources:
|
||||
#
|
||||
# 0: The index in the i32 array for by the shared memory region
|
||||
# 1: The data parameter to the atomic function (i.e. the value to add
|
||||
# in shared_atomic_add, etc).
|
||||
intrinsic("shared_atomic_add_dxil", src_comp=[1, 1], dest_comp=1)
|
||||
intrinsic("shared_atomic_imin_dxil", src_comp=[1, 1], dest_comp=1)
|
||||
intrinsic("shared_atomic_umin_dxil", src_comp=[1, 1], dest_comp=1)
|
||||
intrinsic("shared_atomic_imax_dxil", src_comp=[1, 1], dest_comp=1)
|
||||
intrinsic("shared_atomic_umax_dxil", src_comp=[1, 1], dest_comp=1)
|
||||
intrinsic("shared_atomic_and_dxil", src_comp=[1, 1], dest_comp=1)
|
||||
intrinsic("shared_atomic_or_dxil", src_comp=[1, 1], dest_comp=1)
|
||||
intrinsic("shared_atomic_xor_dxil", src_comp=[1, 1], dest_comp=1)
|
||||
intrinsic("shared_atomic_exchange_dxil", src_comp=[1, 1], dest_comp=1)
|
||||
intrinsic("shared_atomic_comp_swap_dxil", src_comp=[1, 1, 1], dest_comp=1)
|
||||
|
||||
# Intrinsics used by the Midgard/Bifrost blend pipeline. These are defined
|
||||
# within a blend shader to read/write the raw value from the tile buffer,
|
||||
# without applying any format conversion in the process. If the shader needs
|
||||
|
@ -91,7 +91,7 @@ endif
|
||||
if with_any_intel
|
||||
subdir('intel')
|
||||
endif
|
||||
if with_gallium_d3d12
|
||||
if with_microsoft_clc or with_gallium_d3d12
|
||||
subdir('microsoft')
|
||||
endif
|
||||
subdir('mesa')
|
||||
|
1447
src/microsoft/clc/clc_compiler.c
Normal file
1447
src/microsoft/clc/clc_compiler.c
Normal file
File diff suppressed because it is too large
Load Diff
266
src/microsoft/clc/clc_compiler.h
Normal file
266
src/microsoft/clc/clc_compiler.h
Normal file
@ -0,0 +1,266 @@
|
||||
/*
|
||||
* Copyright © Microsoft Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef CLC_COMPILER_H
|
||||
#define CLC_COMPILER_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
struct clc_named_value {
|
||||
const char *name;
|
||||
const char *value;
|
||||
};
|
||||
|
||||
struct clc_compile_args {
|
||||
const struct clc_named_value *headers;
|
||||
unsigned num_headers;
|
||||
struct clc_named_value source;
|
||||
const char * const *args;
|
||||
unsigned num_args;
|
||||
};
|
||||
|
||||
struct clc_linker_args {
|
||||
const struct clc_object * const *in_objs;
|
||||
unsigned num_in_objs;
|
||||
unsigned create_library;
|
||||
};
|
||||
|
||||
typedef void (*clc_msg_callback)(void *priv, const char *msg);
|
||||
|
||||
struct clc_logger {
|
||||
void *priv;
|
||||
clc_msg_callback error;
|
||||
clc_msg_callback warning;
|
||||
};
|
||||
|
||||
struct spirv_binary {
|
||||
uint32_t *data;
|
||||
size_t size;
|
||||
};
|
||||
|
||||
enum clc_kernel_arg_type_qualifier {
|
||||
CLC_KERNEL_ARG_TYPE_CONST = 1 << 0,
|
||||
CLC_KERNEL_ARG_TYPE_RESTRICT = 1 << 1,
|
||||
CLC_KERNEL_ARG_TYPE_VOLATILE = 1 << 2,
|
||||
};
|
||||
|
||||
enum clc_kernel_arg_access_qualifier {
|
||||
CLC_KERNEL_ARG_ACCESS_READ = 1 << 0,
|
||||
CLC_KERNEL_ARG_ACCESS_WRITE = 1 << 1,
|
||||
};
|
||||
|
||||
enum clc_kernel_arg_address_qualifier {
|
||||
CLC_KERNEL_ARG_ADDRESS_PRIVATE,
|
||||
CLC_KERNEL_ARG_ADDRESS_CONSTANT,
|
||||
CLC_KERNEL_ARG_ADDRESS_LOCAL,
|
||||
CLC_KERNEL_ARG_ADDRESS_GLOBAL,
|
||||
};
|
||||
|
||||
struct clc_kernel_arg {
|
||||
const char *name;
|
||||
const char *type_name;
|
||||
unsigned type_qualifier;
|
||||
unsigned access_qualifier;
|
||||
enum clc_kernel_arg_address_qualifier address_qualifier;
|
||||
};
|
||||
|
||||
enum clc_vec_hint_type {
|
||||
CLC_VEC_HINT_TYPE_CHAR = 0,
|
||||
CLC_VEC_HINT_TYPE_SHORT = 1,
|
||||
CLC_VEC_HINT_TYPE_INT = 2,
|
||||
CLC_VEC_HINT_TYPE_LONG = 3,
|
||||
CLC_VEC_HINT_TYPE_HALF = 4,
|
||||
CLC_VEC_HINT_TYPE_FLOAT = 5,
|
||||
CLC_VEC_HINT_TYPE_DOUBLE = 6
|
||||
};
|
||||
|
||||
struct clc_kernel_info {
|
||||
const char *name;
|
||||
size_t num_args;
|
||||
const struct clc_kernel_arg *args;
|
||||
|
||||
unsigned vec_hint_size;
|
||||
enum clc_vec_hint_type vec_hint_type;
|
||||
};
|
||||
|
||||
struct clc_object {
|
||||
struct spirv_binary spvbin;
|
||||
const struct clc_kernel_info *kernels;
|
||||
unsigned num_kernels;
|
||||
};
|
||||
|
||||
#define CLC_MAX_CONSTS 32
|
||||
#define CLC_MAX_BINDINGS_PER_ARG 3
|
||||
#define CLC_MAX_SAMPLERS 16
|
||||
|
||||
struct clc_dxil_metadata {
|
||||
struct {
|
||||
unsigned offset;
|
||||
unsigned size;
|
||||
union {
|
||||
struct {
|
||||
unsigned buf_ids[CLC_MAX_BINDINGS_PER_ARG];
|
||||
unsigned num_buf_ids;
|
||||
} image;
|
||||
struct {
|
||||
unsigned sampler_id;
|
||||
} sampler;
|
||||
struct {
|
||||
unsigned buf_id;
|
||||
} globconstptr;
|
||||
struct {
|
||||
unsigned sharedmem_offset;
|
||||
} localptr;
|
||||
};
|
||||
} *args;
|
||||
unsigned kernel_inputs_cbv_id;
|
||||
unsigned kernel_inputs_buf_size;
|
||||
unsigned work_properties_cbv_id;
|
||||
size_t num_uavs;
|
||||
size_t num_srvs;
|
||||
size_t num_samplers;
|
||||
|
||||
struct {
|
||||
void *data;
|
||||
size_t size;
|
||||
unsigned uav_id;
|
||||
} consts[CLC_MAX_CONSTS];
|
||||
size_t num_consts;
|
||||
|
||||
struct {
|
||||
unsigned sampler_id;
|
||||
unsigned addressing_mode;
|
||||
unsigned normalized_coords;
|
||||
unsigned filter_mode;
|
||||
} const_samplers[CLC_MAX_SAMPLERS];
|
||||
size_t num_const_samplers;
|
||||
size_t local_mem_size;
|
||||
size_t priv_mem_size;
|
||||
|
||||
uint16_t local_size[3];
|
||||
uint16_t local_size_hint[3];
|
||||
|
||||
int printf_uav_id;
|
||||
};
|
||||
|
||||
struct clc_dxil_object {
|
||||
const struct clc_kernel_info *kernel;
|
||||
struct clc_dxil_metadata metadata;
|
||||
struct {
|
||||
void *data;
|
||||
size_t size;
|
||||
} binary;
|
||||
};
|
||||
|
||||
struct clc_context {
|
||||
const void *libclc_nir;
|
||||
};
|
||||
|
||||
struct clc_context_options {
|
||||
unsigned optimize;
|
||||
};
|
||||
|
||||
struct clc_context *clc_context_new(const struct clc_logger *logger, const struct clc_context_options *options);
|
||||
|
||||
void clc_free_context(struct clc_context *ctx);
|
||||
|
||||
void clc_context_serialize(struct clc_context *ctx, void **serialized, size_t *size);
|
||||
void clc_context_free_serialized(void *serialized);
|
||||
struct clc_context *clc_context_deserialize(void *serialized, size_t size);
|
||||
|
||||
struct clc_object *
|
||||
clc_compile(struct clc_context *ctx,
|
||||
const struct clc_compile_args *args,
|
||||
const struct clc_logger *logger);
|
||||
|
||||
struct clc_object *
|
||||
clc_link(struct clc_context *ctx,
|
||||
const struct clc_linker_args *args,
|
||||
const struct clc_logger *logger);
|
||||
|
||||
void clc_free_object(struct clc_object *obj);
|
||||
|
||||
struct clc_runtime_arg_info {
|
||||
union {
|
||||
struct {
|
||||
unsigned size;
|
||||
} localptr;
|
||||
struct {
|
||||
unsigned normalized_coords;
|
||||
unsigned addressing_mode; /* See SPIR-V spec for value meanings */
|
||||
unsigned linear_filtering;
|
||||
} sampler;
|
||||
};
|
||||
};
|
||||
|
||||
struct clc_runtime_kernel_conf {
|
||||
uint16_t local_size[3];
|
||||
struct clc_runtime_arg_info *args;
|
||||
unsigned lower_bit_size;
|
||||
unsigned support_global_work_id_offsets;
|
||||
unsigned support_work_group_id_offsets;
|
||||
};
|
||||
|
||||
struct clc_dxil_object *
|
||||
clc_to_dxil(struct clc_context *ctx,
|
||||
const struct clc_object *obj,
|
||||
const char *entrypoint,
|
||||
const struct clc_runtime_kernel_conf *conf,
|
||||
const struct clc_logger *logger);
|
||||
|
||||
void clc_free_dxil_object(struct clc_dxil_object *dxil);
|
||||
|
||||
/* This struct describes the layout of data expected in the CB bound at global_work_offset_cbv_id */
|
||||
struct clc_work_properties_data {
|
||||
/* Returned from get_global_offset(), and added into get_global_id() */
|
||||
unsigned global_offset_x;
|
||||
unsigned global_offset_y;
|
||||
unsigned global_offset_z;
|
||||
/* Returned from get_work_dim() */
|
||||
unsigned work_dim;
|
||||
/* The number of work groups being launched (i.e. the parameters to Dispatch).
|
||||
* If the requested global size doesn't fit in a single Dispatch, these values should
|
||||
* indicate the total number of groups that *should* have been launched. */
|
||||
unsigned group_count_total_x;
|
||||
unsigned group_count_total_y;
|
||||
unsigned group_count_total_z;
|
||||
unsigned padding;
|
||||
/* If the requested global size doesn't fit in a single Dispatch, subsequent dispatches
|
||||
* should fill out these offsets to indicate how many groups have already been launched */
|
||||
unsigned group_id_offset_x;
|
||||
unsigned group_id_offset_y;
|
||||
unsigned group_id_offset_z;
|
||||
};
|
||||
|
||||
uint64_t clc_compiler_get_version();
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
2187
src/microsoft/clc/clc_compiler_test.cpp
Normal file
2187
src/microsoft/clc/clc_compiler_test.cpp
Normal file
File diff suppressed because it is too large
Load Diff
811
src/microsoft/clc/clc_helpers.cpp
Normal file
811
src/microsoft/clc/clc_helpers.cpp
Normal file
@ -0,0 +1,811 @@
|
||||
//
|
||||
// Copyright 2012-2016 Francisco Jerez
|
||||
// Copyright 2012-2016 Advanced Micro Devices, Inc.
|
||||
// Copyright 2014-2016 Jan Vesely
|
||||
// Copyright 2014-2015 Serge Martin
|
||||
// Copyright 2015 Zoltan Gilian
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a
|
||||
// copy of this software and associated documentation files (the "Software"),
|
||||
// to deal in the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
// OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
#include <sstream>
|
||||
|
||||
#include <llvm/ADT/ArrayRef.h>
|
||||
#include <llvm/IR/DiagnosticPrinter.h>
|
||||
#include <llvm/IR/DiagnosticInfo.h>
|
||||
#include <llvm/IR/LLVMContext.h>
|
||||
#include <llvm/IR/Type.h>
|
||||
#include <llvm/Support/raw_ostream.h>
|
||||
#include <llvm-c/Core.h>
|
||||
#include <llvm-c/Target.h>
|
||||
#include <LLVMSPIRVLib/LLVMSPIRVLib.h>
|
||||
|
||||
#include <clang/CodeGen/CodeGenAction.h>
|
||||
#include <clang/Lex/PreprocessorOptions.h>
|
||||
#include <clang/Frontend/CompilerInstance.h>
|
||||
#include <clang/Frontend/TextDiagnosticBuffer.h>
|
||||
#include <clang/Frontend/TextDiagnosticPrinter.h>
|
||||
#include <clang/Basic/TargetInfo.h>
|
||||
|
||||
#include <spirv-tools/libspirv.hpp>
|
||||
#include <spirv-tools/linker.hpp>
|
||||
|
||||
#include "util/macros.h"
|
||||
#include "glsl_types.h"
|
||||
#include "nir.h"
|
||||
#include "nir_types.h"
|
||||
|
||||
#include "clc_helpers.h"
|
||||
#include "spirv.h"
|
||||
|
||||
#include "opencl-c.h.h"
|
||||
#include "opencl-c-base.h.h"
|
||||
|
||||
using ::llvm::Function;
|
||||
using ::llvm::LLVMContext;
|
||||
using ::llvm::Module;
|
||||
using ::llvm::raw_string_ostream;
|
||||
|
||||
static void
|
||||
llvm_log_handler(const ::llvm::DiagnosticInfo &di, void *data) {
|
||||
raw_string_ostream os { *reinterpret_cast<std::string *>(data) };
|
||||
::llvm::DiagnosticPrinterRawOStream printer { os };
|
||||
di.print(printer);
|
||||
}
|
||||
|
||||
class SPIRVKernelArg {
|
||||
public:
|
||||
SPIRVKernelArg(uint32_t id, uint32_t typeId) : id(id), typeId(typeId),
|
||||
addrQualifier(CLC_KERNEL_ARG_ADDRESS_PRIVATE),
|
||||
accessQualifier(0),
|
||||
typeQualifier(0) { }
|
||||
~SPIRVKernelArg() { }
|
||||
|
||||
uint32_t id;
|
||||
uint32_t typeId;
|
||||
std::string name;
|
||||
std::string typeName;
|
||||
enum clc_kernel_arg_address_qualifier addrQualifier;
|
||||
unsigned accessQualifier;
|
||||
unsigned typeQualifier;
|
||||
};
|
||||
|
||||
class SPIRVKernelInfo {
|
||||
public:
|
||||
SPIRVKernelInfo(uint32_t fid, const char *nm) : funcId(fid), name(nm), vecHint(0) { }
|
||||
~SPIRVKernelInfo() { }
|
||||
|
||||
uint32_t funcId;
|
||||
std::string name;
|
||||
std::vector<SPIRVKernelArg> args;
|
||||
unsigned vecHint;
|
||||
};
|
||||
|
||||
class SPIRVKernelParser {
|
||||
public:
|
||||
SPIRVKernelParser() : curKernel(NULL)
|
||||
{
|
||||
ctx = spvContextCreate(SPV_ENV_UNIVERSAL_1_0);
|
||||
}
|
||||
|
||||
~SPIRVKernelParser()
|
||||
{
|
||||
spvContextDestroy(ctx);
|
||||
}
|
||||
|
||||
void parseEntryPoint(const spv_parsed_instruction_t *ins)
|
||||
{
|
||||
assert(ins->num_operands >= 3);
|
||||
|
||||
const spv_parsed_operand_t *op = &ins->operands[1];
|
||||
|
||||
assert(op->type == SPV_OPERAND_TYPE_ID);
|
||||
|
||||
uint32_t funcId = ins->words[op->offset];
|
||||
|
||||
for (auto &iter : kernels) {
|
||||
if (funcId == iter.funcId)
|
||||
return;
|
||||
}
|
||||
|
||||
op = &ins->operands[2];
|
||||
assert(op->type == SPV_OPERAND_TYPE_LITERAL_STRING);
|
||||
const char *name = reinterpret_cast<const char *>(ins->words + op->offset);
|
||||
|
||||
kernels.push_back(SPIRVKernelInfo(funcId, name));
|
||||
}
|
||||
|
||||
void parseFunction(const spv_parsed_instruction_t *ins)
|
||||
{
|
||||
assert(ins->num_operands == 4);
|
||||
|
||||
const spv_parsed_operand_t *op = &ins->operands[1];
|
||||
|
||||
assert(op->type == SPV_OPERAND_TYPE_RESULT_ID);
|
||||
|
||||
uint32_t funcId = ins->words[op->offset];
|
||||
|
||||
SPIRVKernelInfo *kernel = NULL;
|
||||
|
||||
for (auto &kernel : kernels) {
|
||||
if (funcId == kernel.funcId && !kernel.args.size()) {
|
||||
curKernel = &kernel;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void parseFunctionParam(const spv_parsed_instruction_t *ins)
|
||||
{
|
||||
const spv_parsed_operand_t *op;
|
||||
uint32_t id, typeId;
|
||||
|
||||
if (!curKernel)
|
||||
return;
|
||||
|
||||
assert(ins->num_operands == 2);
|
||||
op = &ins->operands[0];
|
||||
assert(op->type == SPV_OPERAND_TYPE_TYPE_ID);
|
||||
typeId = ins->words[op->offset];
|
||||
op = &ins->operands[1];
|
||||
assert(op->type == SPV_OPERAND_TYPE_RESULT_ID);
|
||||
id = ins->words[op->offset];
|
||||
curKernel->args.push_back(SPIRVKernelArg(id, typeId));
|
||||
}
|
||||
|
||||
void parseName(const spv_parsed_instruction_t *ins)
|
||||
{
|
||||
const spv_parsed_operand_t *op;
|
||||
const char *name;
|
||||
uint32_t id;
|
||||
|
||||
assert(ins->num_operands == 2);
|
||||
|
||||
op = &ins->operands[0];
|
||||
assert(op->type == SPV_OPERAND_TYPE_ID);
|
||||
id = ins->words[op->offset];
|
||||
op = &ins->operands[1];
|
||||
assert(op->type == SPV_OPERAND_TYPE_LITERAL_STRING);
|
||||
name = reinterpret_cast<const char *>(ins->words + op->offset);
|
||||
|
||||
for (auto &kernel : kernels) {
|
||||
for (auto &arg : kernel.args) {
|
||||
if (arg.id == id && arg.name.empty()) {
|
||||
arg.name = name;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void parseTypePointer(const spv_parsed_instruction_t *ins)
|
||||
{
|
||||
enum clc_kernel_arg_address_qualifier addrQualifier;
|
||||
uint32_t typeId, targetTypeId, storageClass;
|
||||
const spv_parsed_operand_t *op;
|
||||
const char *typeName;
|
||||
|
||||
assert(ins->num_operands == 3);
|
||||
|
||||
op = &ins->operands[0];
|
||||
assert(op->type == SPV_OPERAND_TYPE_RESULT_ID);
|
||||
typeId = ins->words[op->offset];
|
||||
|
||||
op = &ins->operands[1];
|
||||
assert(op->type == SPV_OPERAND_TYPE_STORAGE_CLASS);
|
||||
storageClass = ins->words[op->offset];
|
||||
switch (storageClass) {
|
||||
case SpvStorageClassCrossWorkgroup:
|
||||
addrQualifier = CLC_KERNEL_ARG_ADDRESS_GLOBAL;
|
||||
break;
|
||||
case SpvStorageClassWorkgroup:
|
||||
addrQualifier = CLC_KERNEL_ARG_ADDRESS_LOCAL;
|
||||
break;
|
||||
case SpvStorageClassUniformConstant:
|
||||
addrQualifier = CLC_KERNEL_ARG_ADDRESS_CONSTANT;
|
||||
break;
|
||||
default:
|
||||
addrQualifier = CLC_KERNEL_ARG_ADDRESS_PRIVATE;
|
||||
break;
|
||||
}
|
||||
|
||||
for (auto &kernel : kernels) {
|
||||
for (auto &arg : kernel.args) {
|
||||
if (arg.typeId == typeId)
|
||||
arg.addrQualifier = addrQualifier;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void parseOpString(const spv_parsed_instruction_t *ins)
|
||||
{
|
||||
const spv_parsed_operand_t *op;
|
||||
std::string str;
|
||||
|
||||
assert(ins->num_operands == 2);
|
||||
|
||||
op = &ins->operands[1];
|
||||
assert(op->type == SPV_OPERAND_TYPE_LITERAL_STRING);
|
||||
str = reinterpret_cast<const char *>(ins->words + op->offset);
|
||||
|
||||
if (str.find("kernel_arg_type.") != 0)
|
||||
return;
|
||||
|
||||
size_t start = sizeof("kernel_arg_type.") - 1;
|
||||
|
||||
for (auto &kernel : kernels) {
|
||||
size_t pos;
|
||||
|
||||
pos = str.find(kernel.name, start);
|
||||
if (pos == std::string::npos ||
|
||||
pos != start || str[start + kernel.name.size()] != '.')
|
||||
continue;
|
||||
|
||||
pos = start + kernel.name.size();
|
||||
if (str[pos++] != '.')
|
||||
continue;
|
||||
|
||||
for (auto &arg : kernel.args) {
|
||||
if (arg.name.empty())
|
||||
break;
|
||||
|
||||
size_t typeEnd = str.find(',', pos);
|
||||
if (typeEnd == std::string::npos)
|
||||
break;
|
||||
|
||||
arg.typeName = str.substr(pos, typeEnd - pos);
|
||||
pos = typeEnd + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void applyDecoration(uint32_t id, const spv_parsed_instruction_t *ins)
|
||||
{
|
||||
auto iter = decorationGroups.find(id);
|
||||
if (iter != decorationGroups.end()) {
|
||||
for (uint32_t entry : iter->second)
|
||||
applyDecoration(entry, ins);
|
||||
return;
|
||||
}
|
||||
|
||||
const spv_parsed_operand_t *op;
|
||||
uint32_t decoration;
|
||||
|
||||
assert(ins->num_operands >= 2);
|
||||
|
||||
op = &ins->operands[1];
|
||||
assert(op->type == SPV_OPERAND_TYPE_DECORATION);
|
||||
decoration = ins->words[op->offset];
|
||||
|
||||
for (auto &kernel : kernels) {
|
||||
for (auto &arg : kernel.args) {
|
||||
if (arg.id == id) {
|
||||
switch (decoration) {
|
||||
case SpvDecorationVolatile:
|
||||
arg.typeQualifier |= CLC_KERNEL_ARG_TYPE_VOLATILE;
|
||||
break;
|
||||
case SpvDecorationConstant:
|
||||
arg.typeQualifier |= CLC_KERNEL_ARG_TYPE_CONST;
|
||||
break;
|
||||
case SpvDecorationRestrict:
|
||||
arg.typeQualifier |= CLC_KERNEL_ARG_TYPE_RESTRICT;
|
||||
break;
|
||||
case SpvDecorationFuncParamAttr:
|
||||
op = &ins->operands[2];
|
||||
assert(op->type == SPV_OPERAND_TYPE_FUNCTION_PARAMETER_ATTRIBUTE);
|
||||
switch (ins->words[op->offset]) {
|
||||
case SpvFunctionParameterAttributeNoAlias:
|
||||
arg.typeQualifier |= CLC_KERNEL_ARG_TYPE_RESTRICT;
|
||||
break;
|
||||
case SpvFunctionParameterAttributeNoWrite:
|
||||
arg.typeQualifier |= CLC_KERNEL_ARG_TYPE_CONST;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void parseOpDecorate(const spv_parsed_instruction_t *ins)
|
||||
{
|
||||
const spv_parsed_operand_t *op;
|
||||
uint32_t id, decoration;
|
||||
|
||||
assert(ins->num_operands >= 2);
|
||||
|
||||
op = &ins->operands[0];
|
||||
assert(op->type == SPV_OPERAND_TYPE_ID);
|
||||
id = ins->words[op->offset];
|
||||
|
||||
applyDecoration(id, ins);
|
||||
}
|
||||
|
||||
void parseOpGroupDecorate(const spv_parsed_instruction_t *ins)
|
||||
{
|
||||
assert(ins->num_operands >= 2);
|
||||
|
||||
const spv_parsed_operand_t *op = &ins->operands[0];
|
||||
assert(op->type == SPV_OPERAND_TYPE_ID);
|
||||
uint32_t groupId = ins->words[op->offset];
|
||||
|
||||
auto lowerBound = decorationGroups.lower_bound(groupId);
|
||||
if (lowerBound != decorationGroups.end() &&
|
||||
lowerBound->first == groupId)
|
||||
// Group already filled out
|
||||
return;
|
||||
|
||||
auto iter = decorationGroups.emplace_hint(lowerBound, groupId, std::vector<uint32_t>{});
|
||||
auto& vec = iter->second;
|
||||
vec.reserve(ins->num_operands - 1);
|
||||
for (uint32_t i = 1; i < ins->num_operands; ++i) {
|
||||
op = &ins->operands[i];
|
||||
assert(op->type == SPV_OPERAND_TYPE_ID);
|
||||
vec.push_back(ins->words[op->offset]);
|
||||
}
|
||||
}
|
||||
|
||||
void parseOpTypeImage(const spv_parsed_instruction_t *ins)
|
||||
{
|
||||
const spv_parsed_operand_t *op;
|
||||
uint32_t typeId;
|
||||
unsigned accessQualifier = CLC_KERNEL_ARG_ACCESS_READ;
|
||||
|
||||
op = &ins->operands[0];
|
||||
assert(op->type == SPV_OPERAND_TYPE_RESULT_ID);
|
||||
typeId = ins->words[op->offset];
|
||||
|
||||
if (ins->num_operands >= 9) {
|
||||
op = &ins->operands[8];
|
||||
assert(op->type == SPV_OPERAND_TYPE_ACCESS_QUALIFIER);
|
||||
switch (ins->words[op->offset]) {
|
||||
case SpvAccessQualifierReadOnly:
|
||||
accessQualifier = CLC_KERNEL_ARG_ACCESS_READ;
|
||||
break;
|
||||
case SpvAccessQualifierWriteOnly:
|
||||
accessQualifier = CLC_KERNEL_ARG_ACCESS_WRITE;
|
||||
break;
|
||||
case SpvAccessQualifierReadWrite:
|
||||
accessQualifier = CLC_KERNEL_ARG_ACCESS_WRITE |
|
||||
CLC_KERNEL_ARG_ACCESS_READ;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for (auto &kernel : kernels) {
|
||||
for (auto &arg : kernel.args) {
|
||||
if (arg.typeId == typeId) {
|
||||
arg.accessQualifier = accessQualifier;
|
||||
arg.addrQualifier = CLC_KERNEL_ARG_ADDRESS_GLOBAL;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void parseExecutionMode(const spv_parsed_instruction_t *ins)
|
||||
{
|
||||
uint32_t executionMode = ins->words[ins->operands[1].offset];
|
||||
if (executionMode != SpvExecutionModeVecTypeHint)
|
||||
return;
|
||||
|
||||
uint32_t funcId = ins->words[ins->operands[0].offset];
|
||||
uint32_t vecHint = ins->words[ins->operands[2].offset];
|
||||
for (auto& kernel : kernels) {
|
||||
if (kernel.funcId == funcId)
|
||||
kernel.vecHint = vecHint;
|
||||
}
|
||||
}
|
||||
|
||||
static spv_result_t
|
||||
parseInstruction(void *data, const spv_parsed_instruction_t *ins)
|
||||
{
|
||||
SPIRVKernelParser *parser = reinterpret_cast<SPIRVKernelParser *>(data);
|
||||
|
||||
switch (ins->opcode) {
|
||||
case SpvOpName:
|
||||
parser->parseName(ins);
|
||||
break;
|
||||
case SpvOpEntryPoint:
|
||||
parser->parseEntryPoint(ins);
|
||||
break;
|
||||
case SpvOpFunction:
|
||||
parser->parseFunction(ins);
|
||||
break;
|
||||
case SpvOpFunctionParameter:
|
||||
parser->parseFunctionParam(ins);
|
||||
break;
|
||||
case SpvOpFunctionEnd:
|
||||
case SpvOpLabel:
|
||||
parser->curKernel = NULL;
|
||||
break;
|
||||
case SpvOpTypePointer:
|
||||
parser->parseTypePointer(ins);
|
||||
break;
|
||||
case SpvOpTypeImage:
|
||||
parser->parseOpTypeImage(ins);
|
||||
break;
|
||||
case SpvOpString:
|
||||
parser->parseOpString(ins);
|
||||
break;
|
||||
case SpvOpDecorate:
|
||||
parser->parseOpDecorate(ins);
|
||||
break;
|
||||
case SpvOpGroupDecorate:
|
||||
parser->parseOpGroupDecorate(ins);
|
||||
break;
|
||||
case SpvOpExecutionMode:
|
||||
parser->parseExecutionMode(ins);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return SPV_SUCCESS;
|
||||
}
|
||||
|
||||
bool parsingComplete()
|
||||
{
|
||||
for (auto &kernel : kernels) {
|
||||
if (kernel.name.empty())
|
||||
return false;
|
||||
|
||||
for (auto &arg : kernel.args) {
|
||||
if (arg.name.empty() || arg.typeName.empty())
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void parseBinary(const struct spirv_binary &spvbin)
|
||||
{
|
||||
/* 3 passes should be enough to retrieve all kernel information:
|
||||
* 1st pass: all entry point name and number of args
|
||||
* 2nd pass: argument names and type names
|
||||
* 3rd pass: pointer type names
|
||||
*/
|
||||
for (unsigned pass = 0; pass < 3; pass++) {
|
||||
spvBinaryParse(ctx, reinterpret_cast<void *>(this),
|
||||
spvbin.data, spvbin.size / 4,
|
||||
NULL, parseInstruction, NULL);
|
||||
|
||||
if (parsingComplete())
|
||||
return;
|
||||
}
|
||||
|
||||
assert(0);
|
||||
}
|
||||
|
||||
std::vector<SPIRVKernelInfo> kernels;
|
||||
std::map<uint32_t, std::vector<uint32_t>> decorationGroups;
|
||||
SPIRVKernelInfo *curKernel;
|
||||
spv_context ctx;
|
||||
};
|
||||
|
||||
const struct clc_kernel_info *
|
||||
clc_spirv_get_kernels_info(const struct spirv_binary *spvbin,
|
||||
unsigned *num_kernels)
|
||||
{
|
||||
struct clc_kernel_info *kernels;
|
||||
|
||||
SPIRVKernelParser parser;
|
||||
|
||||
parser.parseBinary(*spvbin);
|
||||
*num_kernels = parser.kernels.size();
|
||||
if (!*num_kernels)
|
||||
return NULL;
|
||||
|
||||
kernels = reinterpret_cast<struct clc_kernel_info *>(calloc(*num_kernels,
|
||||
sizeof(*kernels)));
|
||||
assert(kernels);
|
||||
for (unsigned i = 0; i < parser.kernels.size(); i++) {
|
||||
kernels[i].name = strdup(parser.kernels[i].name.c_str());
|
||||
kernels[i].num_args = parser.kernels[i].args.size();
|
||||
kernels[i].vec_hint_size = parser.kernels[i].vecHint >> 16;
|
||||
kernels[i].vec_hint_type = (enum clc_vec_hint_type)(parser.kernels[i].vecHint & 0xFFFF);
|
||||
if (!kernels[i].num_args)
|
||||
continue;
|
||||
|
||||
struct clc_kernel_arg *args;
|
||||
|
||||
args = reinterpret_cast<struct clc_kernel_arg *>(calloc(kernels[i].num_args,
|
||||
sizeof(*kernels->args)));
|
||||
kernels[i].args = args;
|
||||
assert(args);
|
||||
for (unsigned j = 0; j < kernels[i].num_args; j++) {
|
||||
if (!parser.kernels[i].args[j].name.empty())
|
||||
args[j].name = strdup(parser.kernels[i].args[j].name.c_str());
|
||||
args[j].type_name = strdup(parser.kernels[i].args[j].typeName.c_str());
|
||||
args[j].address_qualifier = parser.kernels[i].args[j].addrQualifier;
|
||||
args[j].type_qualifier = parser.kernels[i].args[j].typeQualifier;
|
||||
args[j].access_qualifier = parser.kernels[i].args[j].accessQualifier;
|
||||
}
|
||||
}
|
||||
|
||||
return kernels;
|
||||
}
|
||||
|
||||
void
|
||||
clc_free_kernels_info(const struct clc_kernel_info *kernels,
|
||||
unsigned num_kernels)
|
||||
{
|
||||
if (!kernels)
|
||||
return;
|
||||
|
||||
for (unsigned i = 0; i < num_kernels; i++) {
|
||||
if (kernels[i].args) {
|
||||
for (unsigned j = 0; j < kernels[i].num_args; j++) {
|
||||
free((void *)kernels[i].args[j].name);
|
||||
free((void *)kernels[i].args[j].type_name);
|
||||
}
|
||||
}
|
||||
free((void *)kernels[i].name);
|
||||
}
|
||||
|
||||
free((void *)kernels);
|
||||
}
|
||||
|
||||
int
|
||||
clc_to_spirv(const struct clc_compile_args *args,
|
||||
struct spirv_binary *spvbin,
|
||||
const struct clc_logger *logger)
|
||||
{
|
||||
LLVMInitializeAllTargets();
|
||||
LLVMInitializeAllTargetInfos();
|
||||
LLVMInitializeAllTargetMCs();
|
||||
LLVMInitializeAllAsmPrinters();
|
||||
|
||||
std::string log;
|
||||
std::unique_ptr<LLVMContext> llvm_ctx { new LLVMContext };
|
||||
llvm_ctx->setDiagnosticHandlerCallBack(llvm_log_handler, &log);
|
||||
|
||||
std::unique_ptr<clang::CompilerInstance> c { new clang::CompilerInstance };
|
||||
clang::DiagnosticsEngine diag { new clang::DiagnosticIDs,
|
||||
new clang::DiagnosticOptions,
|
||||
new clang::TextDiagnosticPrinter(*new raw_string_ostream(log),
|
||||
&c->getDiagnosticOpts(), true)};
|
||||
|
||||
std::vector<const char *> clang_opts = {
|
||||
args->source.name,
|
||||
"-triple", "spir64-unknown-unknown",
|
||||
// By default, clang prefers to use modules to pull in the default headers,
|
||||
// which doesn't work with our technique of embedding the headers in our binary
|
||||
"-finclude-default-header",
|
||||
// Add a default CL compiler version. Clang will pick the last one specified
|
||||
// on the command line, so the app can override this one.
|
||||
"-cl-std=cl1.2",
|
||||
// The LLVM-SPIRV-Translator doesn't support memset with variable size
|
||||
"-fno-builtin-memset",
|
||||
// LLVM's optimizations can produce code that the translator can't translate
|
||||
"-O0",
|
||||
};
|
||||
// We assume there's appropriate defines for __OPENCL_VERSION__ and __IMAGE_SUPPORT__
|
||||
// being provided by the caller here.
|
||||
clang_opts.insert(clang_opts.end(), args->args, args->args + args->num_args);
|
||||
|
||||
if (!clang::CompilerInvocation::CreateFromArgs(c->getInvocation(),
|
||||
#if LLVM_VERSION_MAJOR >= 10
|
||||
clang_opts,
|
||||
#else
|
||||
clang_opts.data(),
|
||||
clang_opts.data() + clang_opts.size(),
|
||||
#endif
|
||||
diag)) {
|
||||
log += "Couldn't create Clang invocation.\n";
|
||||
clc_error(logger, log.c_str());
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (diag.hasErrorOccurred()) {
|
||||
log += "Errors occurred during Clang invocation.\n";
|
||||
clc_error(logger, log.c_str());
|
||||
return -1;
|
||||
}
|
||||
|
||||
// This is a workaround for a Clang bug which causes the number
|
||||
// of warnings and errors to be printed to stderr.
|
||||
// http://www.llvm.org/bugs/show_bug.cgi?id=19735
|
||||
c->getDiagnosticOpts().ShowCarets = false;
|
||||
|
||||
c->createDiagnostics(new clang::TextDiagnosticPrinter(
|
||||
*new raw_string_ostream(log),
|
||||
&c->getDiagnosticOpts(), true));
|
||||
|
||||
c->setTarget(clang::TargetInfo::CreateTargetInfo(
|
||||
c->getDiagnostics(), c->getInvocation().TargetOpts));
|
||||
|
||||
c->getFrontendOpts().ProgramAction = clang::frontend::EmitLLVMOnly;
|
||||
c->getHeaderSearchOpts().UseBuiltinIncludes = false;
|
||||
c->getHeaderSearchOpts().UseStandardSystemIncludes = false;
|
||||
|
||||
// Add opencl-c generic search path
|
||||
{
|
||||
::llvm::SmallString<128> system_header_path;
|
||||
::llvm::sys::path::system_temp_directory(true, system_header_path);
|
||||
::llvm::sys::path::append(system_header_path, "openclon12");
|
||||
c->getHeaderSearchOpts().AddPath(system_header_path.str(),
|
||||
clang::frontend::Angled,
|
||||
false, false);
|
||||
|
||||
::llvm::sys::path::append(system_header_path, "opencl-c.h");
|
||||
c->getPreprocessorOpts().addRemappedFile(system_header_path.str(),
|
||||
::llvm::MemoryBuffer::getMemBuffer(llvm::StringRef(opencl_c_source, _countof(opencl_c_source) - 1)).release());
|
||||
|
||||
::llvm::sys::path::remove_filename(system_header_path);
|
||||
::llvm::sys::path::append(system_header_path, "opencl-c-base.h");
|
||||
c->getPreprocessorOpts().addRemappedFile(system_header_path.str(),
|
||||
::llvm::MemoryBuffer::getMemBuffer(llvm::StringRef(opencl_c_base_source, _countof(opencl_c_base_source) - 1)).release());
|
||||
}
|
||||
|
||||
if (args->num_headers) {
|
||||
::llvm::SmallString<128> tmp_header_path;
|
||||
::llvm::sys::path::system_temp_directory(true, tmp_header_path);
|
||||
::llvm::sys::path::append(tmp_header_path, "openclon12");
|
||||
|
||||
c->getHeaderSearchOpts().AddPath(tmp_header_path.str(),
|
||||
clang::frontend::Quoted,
|
||||
false, false);
|
||||
|
||||
for (size_t i = 0; i < args->num_headers; i++) {
|
||||
auto path_copy = tmp_header_path;
|
||||
::llvm::sys::path::append(path_copy, ::llvm::sys::path::convert_to_slash(args->headers[i].name));
|
||||
c->getPreprocessorOpts().addRemappedFile(path_copy.str(),
|
||||
::llvm::MemoryBuffer::getMemBufferCopy(args->headers[i].value).release());
|
||||
}
|
||||
}
|
||||
|
||||
c->getPreprocessorOpts().addRemappedFile(
|
||||
args->source.name,
|
||||
::llvm::MemoryBuffer::getMemBufferCopy(std::string(args->source.value)).release());
|
||||
|
||||
// Compile the code
|
||||
clang::EmitLLVMOnlyAction act(llvm_ctx.get());
|
||||
if (!c->ExecuteAction(act)) {
|
||||
log += "Error executing LLVM compilation action.\n";
|
||||
clc_error(logger, log.c_str());
|
||||
return -1;
|
||||
}
|
||||
|
||||
auto mod = act.takeModule();
|
||||
std::ostringstream spv_stream;
|
||||
if (!::llvm::writeSpirv(mod.get(), spv_stream, log)) {
|
||||
log += "Translation from LLVM IR to SPIR-V failed.\n";
|
||||
clc_error(logger, log.c_str());
|
||||
return -1;
|
||||
}
|
||||
|
||||
const std::string spv_out = spv_stream.str();
|
||||
spvbin->size = spv_out.size();
|
||||
spvbin->data = static_cast<uint32_t *>(malloc(spvbin->size));
|
||||
memcpy(spvbin->data, spv_out.data(), spvbin->size);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const char *
|
||||
spv_result_to_str(spv_result_t res)
|
||||
{
|
||||
switch (res) {
|
||||
case SPV_SUCCESS: return "success";
|
||||
case SPV_UNSUPPORTED: return "unsupported";
|
||||
case SPV_END_OF_STREAM: return "end of stream";
|
||||
case SPV_WARNING: return "warning";
|
||||
case SPV_FAILED_MATCH: return "failed match";
|
||||
case SPV_REQUESTED_TERMINATION: return "requested termination";
|
||||
case SPV_ERROR_INTERNAL: return "internal error";
|
||||
case SPV_ERROR_OUT_OF_MEMORY: return "out of memory";
|
||||
case SPV_ERROR_INVALID_POINTER: return "invalid pointer";
|
||||
case SPV_ERROR_INVALID_BINARY: return "invalid binary";
|
||||
case SPV_ERROR_INVALID_TEXT: return "invalid text";
|
||||
case SPV_ERROR_INVALID_TABLE: return "invalid table";
|
||||
case SPV_ERROR_INVALID_VALUE: return "invalid value";
|
||||
case SPV_ERROR_INVALID_DIAGNOSTIC: return "invalid diagnostic";
|
||||
case SPV_ERROR_INVALID_LOOKUP: return "invalid lookup";
|
||||
case SPV_ERROR_INVALID_ID: return "invalid id";
|
||||
case SPV_ERROR_INVALID_CFG: return "invalid config";
|
||||
case SPV_ERROR_INVALID_LAYOUT: return "invalid layout";
|
||||
case SPV_ERROR_INVALID_CAPABILITY: return "invalid capability";
|
||||
case SPV_ERROR_INVALID_DATA: return "invalid data";
|
||||
case SPV_ERROR_MISSING_EXTENSION: return "missing extension";
|
||||
case SPV_ERROR_WRONG_VERSION: return "wrong version";
|
||||
default: return "unknown error";
|
||||
}
|
||||
}
|
||||
|
||||
class SPIRVMessageConsumer {
|
||||
public:
|
||||
SPIRVMessageConsumer(const struct clc_logger *logger): logger(logger) {}
|
||||
|
||||
void operator()(spv_message_level_t level, const char *src,
|
||||
const spv_position_t &pos, const char *msg)
|
||||
{
|
||||
switch(level) {
|
||||
case SPV_MSG_FATAL:
|
||||
case SPV_MSG_INTERNAL_ERROR:
|
||||
case SPV_MSG_ERROR:
|
||||
clc_error(logger, "(file=%s,line=%ld,column=%ld,index=%ld): %s",
|
||||
src, pos.line, pos.column, pos.index, msg);
|
||||
break;
|
||||
|
||||
case SPV_MSG_WARNING:
|
||||
clc_warning(logger, "(file=%s,line=%ld,column=%ld,index=%ld): %s",
|
||||
src, pos.line, pos.column, pos.index, msg);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
const struct clc_logger *logger;
|
||||
};
|
||||
|
||||
int
|
||||
clc_link_spirv_binaries(const struct clc_linker_args *args,
|
||||
struct spirv_binary *dst_bin,
|
||||
const struct clc_logger *logger)
|
||||
{
|
||||
std::vector<std::vector<uint32_t>> binaries;
|
||||
|
||||
for (unsigned i = 0; i < args->num_in_objs; i++) {
|
||||
std::vector<uint32_t> bin(args->in_objs[i]->spvbin.data,
|
||||
args->in_objs[i]->spvbin.data +
|
||||
(args->in_objs[i]->spvbin.size / 4));
|
||||
binaries.push_back(bin);
|
||||
}
|
||||
|
||||
SPIRVMessageConsumer msgconsumer(logger);
|
||||
spvtools::Context context(SPV_ENV_UNIVERSAL_1_0);
|
||||
context.SetMessageConsumer(msgconsumer);
|
||||
spvtools::LinkerOptions options;
|
||||
options.SetAllowPartialLinkage(args->create_library);
|
||||
options.SetCreateLibrary(args->create_library);
|
||||
std::vector<uint32_t> linkingResult;
|
||||
spv_result_t status = spvtools::Link(context, binaries, &linkingResult, options);
|
||||
if (status != SPV_SUCCESS) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
dst_bin->size = linkingResult.size() * 4;
|
||||
dst_bin->data = static_cast<uint32_t *>(malloc(dst_bin->size));
|
||||
memcpy(dst_bin->data, linkingResult.data(), dst_bin->size);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
clc_dump_spirv(const struct spirv_binary *spvbin, FILE *f)
|
||||
{
|
||||
spvtools::SpirvTools tools(SPV_ENV_UNIVERSAL_1_0);
|
||||
std::vector<uint32_t> bin(spvbin->data, spvbin->data + (spvbin->size / 4));
|
||||
std::string out;
|
||||
tools.Disassemble(bin, &out,
|
||||
SPV_BINARY_TO_TEXT_OPTION_INDENT |
|
||||
SPV_BINARY_TO_TEXT_OPTION_FRIENDLY_NAMES);
|
||||
fwrite(out.c_str(), out.size(), 1, f);
|
||||
}
|
||||
|
||||
void
|
||||
clc_free_spirv_binary(struct spirv_binary *spvbin)
|
||||
{
|
||||
free(spvbin->data);
|
||||
}
|
81
src/microsoft/clc/clc_helpers.h
Normal file
81
src/microsoft/clc/clc_helpers.h
Normal file
@ -0,0 +1,81 @@
|
||||
/*
|
||||
* Copyright © Microsoft Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef CLC_TO_NIR_H
|
||||
#define CLC_TO_NIR_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include "nir_types.h"
|
||||
|
||||
#include "clc_compiler.h"
|
||||
#include "util/u_string.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stddef.h>
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
|
||||
const struct clc_kernel_info *
|
||||
clc_spirv_get_kernels_info(const struct spirv_binary *spvbin,
|
||||
unsigned *num_kernels);
|
||||
|
||||
void
|
||||
clc_free_kernels_info(const struct clc_kernel_info *kernels,
|
||||
unsigned num_kernels);
|
||||
|
||||
int
|
||||
clc_to_spirv(const struct clc_compile_args *args,
|
||||
struct spirv_binary *spvbin,
|
||||
const struct clc_logger *logger);
|
||||
|
||||
int
|
||||
clc_link_spirv_binaries(const struct clc_linker_args *args,
|
||||
struct spirv_binary *dst_bin,
|
||||
const struct clc_logger *logger);
|
||||
|
||||
void
|
||||
clc_dump_spirv(const struct spirv_binary *spvbin, FILE *f);
|
||||
|
||||
void
|
||||
clc_free_spirv_binary(struct spirv_binary *spvbin);
|
||||
|
||||
#define clc_log(logger, level, fmt, ...) do { \
|
||||
if (!logger || !logger->level) break; \
|
||||
char *msg = NULL; \
|
||||
asprintf(&msg, fmt, __VA_ARGS__); \
|
||||
assert(msg); \
|
||||
logger->level(logger->priv, msg); \
|
||||
free(msg); \
|
||||
} while (0)
|
||||
|
||||
#define clc_error(logger, fmt, ...) clc_log(logger, error, fmt, __VA_ARGS__)
|
||||
#define clc_warning(logger, fmt, ...) clc_log(logger, warning, fmt, __VA_ARGS__)
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
388
src/microsoft/clc/clc_nir.c
Normal file
388
src/microsoft/clc/clc_nir.c
Normal file
@ -0,0 +1,388 @@
|
||||
/*
|
||||
* Copyright © Microsoft Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "u_math.h"
|
||||
#include "nir.h"
|
||||
#include "glsl_types.h"
|
||||
#include "nir_types.h"
|
||||
#include "nir_builder.h"
|
||||
|
||||
#include "clc_nir.h"
|
||||
#include "clc_compiler.h"
|
||||
#include "../compiler/dxil_nir.h"
|
||||
|
||||
static bool
|
||||
lower_load_base_global_invocation_id(nir_builder *b, nir_intrinsic_instr *intr,
|
||||
nir_variable *var)
|
||||
{
|
||||
b->cursor = nir_after_instr(&intr->instr);
|
||||
|
||||
nir_ssa_def *offset =
|
||||
build_load_ubo_dxil(b, nir_imm_int(b, var->data.binding),
|
||||
nir_imm_int(b,
|
||||
offsetof(struct clc_work_properties_data,
|
||||
global_offset_x)),
|
||||
nir_dest_num_components(intr->dest),
|
||||
nir_dest_bit_size(intr->dest));
|
||||
nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(offset));
|
||||
nir_instr_remove(&intr->instr);
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_load_work_dim(nir_builder *b, nir_intrinsic_instr *intr,
|
||||
nir_variable *var)
|
||||
{
|
||||
b->cursor = nir_after_instr(&intr->instr);
|
||||
|
||||
nir_ssa_def *dim =
|
||||
build_load_ubo_dxil(b, nir_imm_int(b, var->data.binding),
|
||||
nir_imm_int(b,
|
||||
offsetof(struct clc_work_properties_data,
|
||||
work_dim)),
|
||||
nir_dest_num_components(intr->dest),
|
||||
nir_dest_bit_size(intr->dest));
|
||||
nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(dim));
|
||||
nir_instr_remove(&intr->instr);
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_load_local_group_size(nir_builder *b, nir_intrinsic_instr *intr)
|
||||
{
|
||||
b->cursor = nir_after_instr(&intr->instr);
|
||||
|
||||
nir_const_value v[3] = {
|
||||
nir_const_value_for_int(b->shader->info.cs.local_size[0], 32),
|
||||
nir_const_value_for_int(b->shader->info.cs.local_size[1], 32),
|
||||
nir_const_value_for_int(b->shader->info.cs.local_size[2], 32)
|
||||
};
|
||||
nir_ssa_def *size = nir_build_imm(b, 3, 32, v);
|
||||
nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(size));
|
||||
nir_instr_remove(&intr->instr);
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_load_num_work_groups(nir_builder *b, nir_intrinsic_instr *intr,
|
||||
nir_variable *var)
|
||||
{
|
||||
b->cursor = nir_after_instr(&intr->instr);
|
||||
|
||||
nir_ssa_def *count =
|
||||
build_load_ubo_dxil(b, nir_imm_int(b, var->data.binding),
|
||||
nir_imm_int(b,
|
||||
offsetof(struct clc_work_properties_data,
|
||||
group_count_total_x)),
|
||||
nir_dest_num_components(intr->dest),
|
||||
nir_dest_bit_size(intr->dest));
|
||||
nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(count));
|
||||
nir_instr_remove(&intr->instr);
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_load_base_work_group_id(nir_builder *b, nir_intrinsic_instr *intr,
|
||||
nir_variable *var)
|
||||
{
|
||||
b->cursor = nir_after_instr(&intr->instr);
|
||||
|
||||
nir_ssa_def *offset =
|
||||
build_load_ubo_dxil(b, nir_imm_int(b, var->data.binding),
|
||||
nir_imm_int(b,
|
||||
offsetof(struct clc_work_properties_data,
|
||||
group_id_offset_x)),
|
||||
nir_dest_num_components(intr->dest),
|
||||
nir_dest_bit_size(intr->dest));
|
||||
nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(offset));
|
||||
nir_instr_remove(&intr->instr);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
clc_nir_lower_system_values(nir_shader *nir, nir_variable *var)
|
||||
{
|
||||
bool progress = false;
|
||||
|
||||
foreach_list_typed(nir_function, func, node, &nir->functions) {
|
||||
if (!func->is_entrypoint)
|
||||
continue;
|
||||
assert(func->impl);
|
||||
|
||||
nir_builder b;
|
||||
nir_builder_init(&b, func->impl);
|
||||
|
||||
nir_foreach_block(block, func->impl) {
|
||||
nir_foreach_instr_safe(instr, block) {
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
continue;
|
||||
|
||||
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
||||
|
||||
switch (intr->intrinsic) {
|
||||
case nir_intrinsic_load_base_global_invocation_id:
|
||||
progress |= lower_load_base_global_invocation_id(&b, intr, var);
|
||||
break;
|
||||
case nir_intrinsic_load_work_dim:
|
||||
progress |= lower_load_work_dim(&b, intr, var);
|
||||
break;
|
||||
case nir_intrinsic_load_local_group_size:
|
||||
lower_load_local_group_size(&b, intr);
|
||||
break;
|
||||
case nir_intrinsic_load_num_work_groups:
|
||||
lower_load_num_work_groups(&b, intr, var);
|
||||
break;
|
||||
case nir_intrinsic_load_base_work_group_id:
|
||||
lower_load_base_work_group_id(&b, intr, var);
|
||||
break;
|
||||
default: break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_load_kernel_input(nir_builder *b, nir_intrinsic_instr *intr,
|
||||
nir_variable *var)
|
||||
{
|
||||
nir_intrinsic_instr *load;
|
||||
|
||||
b->cursor = nir_before_instr(&intr->instr);
|
||||
|
||||
unsigned bit_size = nir_dest_bit_size(intr->dest);
|
||||
enum glsl_base_type base_type;
|
||||
|
||||
switch (bit_size) {
|
||||
case 64:
|
||||
base_type = GLSL_TYPE_UINT64;
|
||||
break;
|
||||
case 32:
|
||||
base_type = GLSL_TYPE_UINT;
|
||||
break;
|
||||
case 16:
|
||||
base_type = GLSL_TYPE_UINT16;
|
||||
break;
|
||||
case 8:
|
||||
base_type = GLSL_TYPE_UINT8;
|
||||
break;
|
||||
}
|
||||
|
||||
const struct glsl_type *type =
|
||||
glsl_vector_type(base_type, nir_dest_num_components(intr->dest));
|
||||
nir_ssa_def *ptr = nir_vec2(b, nir_imm_int(b, var->data.binding),
|
||||
nir_u2u(b, intr->src[0].ssa, 32));
|
||||
nir_deref_instr *deref = nir_build_deref_cast(b, ptr, nir_var_mem_ubo, type,
|
||||
bit_size / 8);
|
||||
deref->cast.align_mul = nir_intrinsic_align_mul(intr);
|
||||
deref->cast.align_offset = nir_intrinsic_align_offset(intr);
|
||||
|
||||
nir_ssa_def *result =
|
||||
nir_load_deref(b, deref);
|
||||
nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(result));
|
||||
nir_instr_remove(&intr->instr);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
clc_nir_lower_kernel_input_loads(nir_shader *nir, nir_variable *var)
|
||||
{
|
||||
bool progress = false;
|
||||
|
||||
foreach_list_typed(nir_function, func, node, &nir->functions) {
|
||||
if (!func->is_entrypoint)
|
||||
continue;
|
||||
assert(func->impl);
|
||||
|
||||
nir_builder b;
|
||||
nir_builder_init(&b, func->impl);
|
||||
|
||||
nir_foreach_block(block, func->impl) {
|
||||
nir_foreach_instr_safe(instr, block) {
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
continue;
|
||||
|
||||
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
||||
|
||||
if (intr->intrinsic == nir_intrinsic_load_kernel_input)
|
||||
progress |= lower_load_kernel_input(&b, intr, var);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
||||
|
||||
static nir_variable *
|
||||
add_printf_var(struct nir_shader *nir, unsigned uav_id)
|
||||
{
|
||||
/* This size is arbitrary. Minimum required per spec is 1MB */
|
||||
const unsigned max_printf_size = 1 * 1024 * 1024;
|
||||
const unsigned printf_array_size = max_printf_size / sizeof(unsigned);
|
||||
nir_variable *var =
|
||||
nir_variable_create(nir, nir_var_mem_ssbo,
|
||||
glsl_array_type(glsl_uint_type(), printf_array_size, sizeof(unsigned)),
|
||||
"kernel_work_properies");
|
||||
var->data.binding = uav_id;
|
||||
return var;
|
||||
}
|
||||
|
||||
static void
|
||||
lower_printf_impl(nir_builder *b, nir_intrinsic_instr *instr, nir_variable *var)
|
||||
{
|
||||
/* Atomic add a buffer size counter to determine where to write.
|
||||
* If overflowed, return -1, otherwise, store the arguments and return 0.
|
||||
*/
|
||||
b->cursor = nir_before_instr(&instr->instr);
|
||||
nir_deref_instr *ssbo_deref = nir_build_deref_var(b, var);
|
||||
nir_deref_instr *counter_deref = nir_build_deref_array_imm(b, ssbo_deref, 0);
|
||||
nir_deref_instr *struct_deref = nir_instr_as_deref(instr->src[1].ssa->parent_instr);
|
||||
nir_variable *struct_var = nir_deref_instr_get_variable(struct_deref);
|
||||
const struct glsl_type *struct_type = struct_var->type;
|
||||
/* Align the struct size to 4 for natural SSBO alignment */
|
||||
int struct_size = align(glsl_get_cl_size(struct_type), 4);
|
||||
|
||||
/* Hardcoding 64bit pointers to simplify some code below */
|
||||
assert(instr->src[0].ssa->num_components == 1 && instr->src[0].ssa->bit_size == 64);
|
||||
|
||||
nir_intrinsic_instr *atomic = nir_intrinsic_instr_create(b->shader, nir_intrinsic_deref_atomic_add);
|
||||
nir_ssa_dest_init(&atomic->instr, &atomic->dest, 1, 32, NULL);
|
||||
atomic->src[0] = nir_src_for_ssa(&counter_deref->dest.ssa);
|
||||
atomic->src[1] = nir_src_for_ssa(nir_imm_int(b, struct_size + sizeof(uint64_t)));
|
||||
nir_builder_instr_insert(b, &atomic->instr);
|
||||
|
||||
int max_valid_offset =
|
||||
glsl_get_cl_size(var->type) - /* buffer size */
|
||||
struct_size - /* printf args size */
|
||||
sizeof(uint64_t) - /* format string */
|
||||
sizeof(int); /* the first int in the buffer is for the counter */
|
||||
nir_push_if(b, nir_ilt(b, &atomic->dest.ssa, nir_imm_int(b, max_valid_offset)));
|
||||
nir_ssa_def *printf_succ_val = nir_imm_int(b, 0);
|
||||
|
||||
nir_ssa_def *start_offset = nir_u2u64(b, nir_iadd(b, &atomic->dest.ssa, nir_imm_int(b, sizeof(int))));
|
||||
nir_deref_instr *as_byte_array = nir_build_deref_cast(b, &ssbo_deref->dest.ssa, nir_var_mem_ssbo, glsl_uint8_t_type(), 1);
|
||||
nir_deref_instr *as_offset_byte_array = nir_build_deref_ptr_as_array(b, as_byte_array, start_offset);
|
||||
nir_deref_instr *format_string_write_deref =
|
||||
nir_build_deref_cast(b, &as_offset_byte_array->dest.ssa, nir_var_mem_ssbo, glsl_uint64_t_type(), 8);
|
||||
nir_store_deref(b, format_string_write_deref, instr->src[0].ssa, ~0);
|
||||
|
||||
for (unsigned i = 0; i < glsl_get_length(struct_type); ++i) {
|
||||
nir_ssa_def *field_offset_from_start = nir_imm_int64(b, glsl_get_struct_field_offset(struct_type, i) + sizeof(uint64_t));
|
||||
nir_ssa_def *field_offset = nir_iadd(b, start_offset, field_offset_from_start);
|
||||
|
||||
const struct glsl_type *field_type = glsl_get_struct_field(struct_type, i);
|
||||
nir_deref_instr *field_read_deref = nir_build_deref_struct(b, struct_deref, i);
|
||||
nir_ssa_def *field_value = nir_load_deref(b, field_read_deref);
|
||||
|
||||
/* Clang does promotion of arguments to their "native" size. That means that any floats
|
||||
* have been converted to doubles for the call to printf. Since we don't support doubles,
|
||||
* convert them back here; copy-prop and other optimizations should remove all hint of doubles.
|
||||
*/
|
||||
if (glsl_get_base_type(field_type) == GLSL_TYPE_DOUBLE) {
|
||||
field_value = nir_f2f32(b, field_value);
|
||||
field_type = glsl_float_type();
|
||||
}
|
||||
|
||||
as_offset_byte_array = nir_build_deref_ptr_as_array(b, as_byte_array, field_offset);
|
||||
nir_deref_instr *field_write_deref =
|
||||
nir_build_deref_cast(b, &as_offset_byte_array->dest.ssa, nir_var_mem_ssbo, field_type, glsl_get_cl_size(field_type));
|
||||
|
||||
nir_store_deref(b, field_write_deref, field_value, ~0);
|
||||
}
|
||||
|
||||
nir_push_else(b, NULL);
|
||||
nir_ssa_def *printf_fail_val = nir_imm_int(b, -1);
|
||||
nir_pop_if(b, NULL);
|
||||
|
||||
nir_ssa_def *return_value = nir_if_phi(b, printf_succ_val, printf_fail_val);
|
||||
nir_ssa_def_rewrite_uses(&instr->dest.ssa, nir_src_for_ssa(return_value));
|
||||
nir_instr_remove(&instr->instr);
|
||||
}
|
||||
|
||||
static nir_variable *
|
||||
find_identical_const_sampler(nir_shader *nir, nir_variable *sampler)
|
||||
{
|
||||
nir_foreach_variable_with_modes(uniform, nir, nir_var_uniform) {
|
||||
if (!glsl_type_is_sampler(uniform->type) || !uniform->data.sampler.is_inline_sampler)
|
||||
continue;
|
||||
if (uniform->data.sampler.addressing_mode == sampler->data.sampler.addressing_mode &&
|
||||
uniform->data.sampler.normalized_coordinates == sampler->data.sampler.normalized_coordinates &&
|
||||
uniform->data.sampler.filter_mode == sampler->data.sampler.filter_mode)
|
||||
return uniform;
|
||||
}
|
||||
unreachable("Should have at least found the input sampler");
|
||||
}
|
||||
|
||||
bool
|
||||
clc_nir_dedupe_const_samplers(nir_shader *nir)
|
||||
{
|
||||
bool progress = false;
|
||||
nir_foreach_function(func, nir) {
|
||||
if (!func->impl)
|
||||
continue;
|
||||
|
||||
nir_builder b;
|
||||
nir_builder_init(&b, func->impl);
|
||||
|
||||
nir_foreach_block(block, func->impl) {
|
||||
nir_foreach_instr_safe(instr, block) {
|
||||
if (instr->type != nir_instr_type_tex)
|
||||
continue;
|
||||
|
||||
nir_tex_instr *tex = nir_instr_as_tex(instr);
|
||||
int sampler_idx = nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref);
|
||||
if (sampler_idx == -1)
|
||||
continue;
|
||||
|
||||
nir_deref_instr *deref = nir_src_as_deref(tex->src[sampler_idx].src);
|
||||
nir_variable *sampler = nir_deref_instr_get_variable(deref);
|
||||
if (!sampler)
|
||||
continue;
|
||||
|
||||
assert(sampler->data.mode == nir_var_uniform);
|
||||
|
||||
if (!sampler->data.sampler.is_inline_sampler)
|
||||
continue;
|
||||
|
||||
nir_variable *replacement = find_identical_const_sampler(nir, sampler);
|
||||
if (replacement == sampler)
|
||||
continue;
|
||||
|
||||
b.cursor = nir_before_instr(&tex->instr);
|
||||
nir_deref_instr *replacement_deref = nir_build_deref_var(&b, replacement);
|
||||
nir_instr_rewrite_src(&tex->instr, &tex->src[sampler_idx].src,
|
||||
nir_src_for_ssa(&replacement_deref->dest.ssa));
|
||||
nir_deref_instr_remove_if_unused(deref);
|
||||
progress = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (progress) {
|
||||
nir_metadata_preserve(func->impl, nir_metadata_block_index | nir_metadata_dominance);
|
||||
}
|
||||
}
|
||||
return progress;
|
||||
}
|
40
src/microsoft/clc/clc_nir.h
Normal file
40
src/microsoft/clc/clc_nir.h
Normal file
@ -0,0 +1,40 @@
|
||||
/*
|
||||
* Copyright © Microsoft Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef CLC_NIR_H
|
||||
#define CLC_NIR_H
|
||||
|
||||
#include <stdbool.h>
|
||||
#include "nir.h"
|
||||
|
||||
bool
|
||||
clc_nir_lower_system_values(nir_shader *nir, nir_variable *var);
|
||||
bool dxil_nir_lower_kernel_input_loads(nir_shader *nir, nir_variable *var);
|
||||
|
||||
bool
|
||||
clc_nir_lower_printf(nir_shader *nir, unsigned uav_id);
|
||||
|
||||
bool
|
||||
clc_nir_dedupe_const_samplers(nir_shader *nir);
|
||||
|
||||
#endif
|
12
src/microsoft/clc/clglon12compiler.def
Normal file
12
src/microsoft/clc/clglon12compiler.def
Normal file
@ -0,0 +1,12 @@
|
||||
EXPORTS
|
||||
clc_context_new
|
||||
clc_free_context
|
||||
clc_context_serialize
|
||||
clc_context_free_serialized
|
||||
clc_context_deserialize
|
||||
clc_compile
|
||||
clc_link
|
||||
clc_free_object
|
||||
clc_to_dxil
|
||||
clc_free_dxil_object
|
||||
clc_compiler_get_version
|
880
src/microsoft/clc/compute_test.cpp
Normal file
880
src/microsoft/clc/compute_test.cpp
Normal file
@ -0,0 +1,880 @@
|
||||
/*
|
||||
* Copyright © Microsoft Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include <stdexcept>
|
||||
|
||||
#include <d3d12.h>
|
||||
#include <dxgi1_4.h>
|
||||
#include <gtest/gtest.h>
|
||||
#include <wrl.h>
|
||||
|
||||
#include "util/u_debug.h"
|
||||
#include "clc_compiler.h"
|
||||
#include "compute_test.h"
|
||||
#include "dxcapi.h"
|
||||
|
||||
using std::runtime_error;
|
||||
using Microsoft::WRL::ComPtr;
|
||||
|
||||
enum compute_test_debug_flags {
|
||||
COMPUTE_DEBUG_EXPERIMENTAL_SHADERS = 1 << 0,
|
||||
COMPUTE_DEBUG_USE_HW_D3D = 1 << 1,
|
||||
COMPUTE_DEBUG_OPTIMIZE_LIBCLC = 1 << 2,
|
||||
COMPUTE_DEBUG_SERIALIZE_LIBCLC = 1 << 3,
|
||||
};
|
||||
|
||||
static const struct debug_named_value debug_options[] = {
|
||||
{ "experimental_shaders", COMPUTE_DEBUG_EXPERIMENTAL_SHADERS, "Enable experimental shaders" },
|
||||
{ "use_hw_d3d", COMPUTE_DEBUG_USE_HW_D3D, "Use a hardware D3D device" },
|
||||
{ "optimize_libclc", COMPUTE_DEBUG_OPTIMIZE_LIBCLC, "Optimize the clc_context before using it" },
|
||||
{ "serialize_libclc", COMPUTE_DEBUG_SERIALIZE_LIBCLC, "Serialize and deserialize the clc_context" },
|
||||
DEBUG_NAMED_VALUE_END
|
||||
};
|
||||
|
||||
DEBUG_GET_ONCE_FLAGS_OPTION(debug_compute, "COMPUTE_TEST_DEBUG", debug_options, 0)
|
||||
|
||||
static void warning_callback(void *priv, const char *msg)
|
||||
{
|
||||
fprintf(stderr, "WARNING: %s\n", msg);
|
||||
}
|
||||
|
||||
static void error_callback(void *priv, const char *msg)
|
||||
{
|
||||
fprintf(stderr, "ERROR: %s\n", msg);
|
||||
}
|
||||
|
||||
static const struct clc_logger logger = {
|
||||
NULL,
|
||||
error_callback,
|
||||
warning_callback,
|
||||
};
|
||||
|
||||
void
|
||||
ComputeTest::enable_d3d12_debug_layer()
|
||||
{
|
||||
HMODULE hD3D12Mod = LoadLibrary("D3D12.DLL");
|
||||
if (!hD3D12Mod) {
|
||||
fprintf(stderr, "D3D12: failed to load D3D12.DLL\n");
|
||||
return;
|
||||
}
|
||||
|
||||
typedef HRESULT(WINAPI * PFN_D3D12_GET_DEBUG_INTERFACE)(REFIID riid,
|
||||
void **ppFactory);
|
||||
PFN_D3D12_GET_DEBUG_INTERFACE D3D12GetDebugInterface = (PFN_D3D12_GET_DEBUG_INTERFACE)GetProcAddress(hD3D12Mod, "D3D12GetDebugInterface");
|
||||
if (!D3D12GetDebugInterface) {
|
||||
fprintf(stderr, "D3D12: failed to load D3D12GetDebugInterface from D3D12.DLL\n");
|
||||
return;
|
||||
}
|
||||
|
||||
ID3D12Debug *debug;
|
||||
if (FAILED(D3D12GetDebugInterface(__uuidof(ID3D12Debug), (void **)& debug))) {
|
||||
fprintf(stderr, "D3D12: D3D12GetDebugInterface failed\n");
|
||||
return;
|
||||
}
|
||||
|
||||
debug->EnableDebugLayer();
|
||||
}
|
||||
|
||||
IDXGIFactory4 *
|
||||
ComputeTest::get_dxgi_factory()
|
||||
{
|
||||
static const GUID IID_IDXGIFactory4 = {
|
||||
0x1bc6ea02, 0xef36, 0x464f,
|
||||
{ 0xbf, 0x0c, 0x21, 0xca, 0x39, 0xe5, 0x16, 0x8a }
|
||||
};
|
||||
|
||||
typedef HRESULT(WINAPI * PFN_CREATE_DXGI_FACTORY)(REFIID riid,
|
||||
void **ppFactory);
|
||||
PFN_CREATE_DXGI_FACTORY CreateDXGIFactory;
|
||||
|
||||
HMODULE hDXGIMod = LoadLibrary("DXGI.DLL");
|
||||
if (!hDXGIMod)
|
||||
throw runtime_error("Failed to load DXGI.DLL");
|
||||
|
||||
CreateDXGIFactory = (PFN_CREATE_DXGI_FACTORY)GetProcAddress(hDXGIMod, "CreateDXGIFactory");
|
||||
if (!CreateDXGIFactory)
|
||||
throw runtime_error("Failed to load CreateDXGIFactory from DXGI.DLL");
|
||||
|
||||
IDXGIFactory4 *factory = NULL;
|
||||
HRESULT hr = CreateDXGIFactory(IID_IDXGIFactory4, (void **)&factory);
|
||||
if (FAILED(hr))
|
||||
throw runtime_error("CreateDXGIFactory failed");
|
||||
|
||||
return factory;
|
||||
}
|
||||
|
||||
IDXGIAdapter1 *
|
||||
ComputeTest::choose_adapter(IDXGIFactory4 *factory)
|
||||
{
|
||||
IDXGIAdapter1 *ret;
|
||||
|
||||
if (debug_get_option_debug_compute() & COMPUTE_DEBUG_USE_HW_D3D) {
|
||||
for (unsigned i = 0; SUCCEEDED(factory->EnumAdapters1(i, &ret)); i++) {
|
||||
DXGI_ADAPTER_DESC1 desc;
|
||||
ret->GetDesc1(&desc);
|
||||
if (!(desc.Flags & D3D_DRIVER_TYPE_SOFTWARE))
|
||||
return ret;
|
||||
}
|
||||
throw runtime_error("Failed to enum hardware adapter");
|
||||
} else {
|
||||
if (FAILED(factory->EnumWarpAdapter(__uuidof(IDXGIAdapter1),
|
||||
(void **)& ret)))
|
||||
throw runtime_error("Failed to enum warp adapter");
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
ID3D12Device *
|
||||
ComputeTest::create_device(IDXGIAdapter1 *adapter)
|
||||
{
|
||||
typedef HRESULT(WINAPI *PFN_D3D12CREATEDEVICE)(IUnknown *, D3D_FEATURE_LEVEL, REFIID, void **);
|
||||
PFN_D3D12CREATEDEVICE D3D12CreateDevice;
|
||||
|
||||
HMODULE hD3D12Mod = LoadLibrary("D3D12.DLL");
|
||||
if (!hD3D12Mod)
|
||||
throw runtime_error("failed to load D3D12.DLL");
|
||||
|
||||
if (debug_get_option_debug_compute() & COMPUTE_DEBUG_EXPERIMENTAL_SHADERS) {
|
||||
typedef HRESULT(WINAPI *PFN_D3D12ENABLEEXPERIMENTALFEATURES)(UINT, const IID *, void *, UINT *);
|
||||
PFN_D3D12ENABLEEXPERIMENTALFEATURES D3D12EnableExperimentalFeatures;
|
||||
D3D12EnableExperimentalFeatures = (PFN_D3D12ENABLEEXPERIMENTALFEATURES)
|
||||
GetProcAddress(hD3D12Mod, "D3D12EnableExperimentalFeatures");
|
||||
if (FAILED(D3D12EnableExperimentalFeatures(1, &D3D12ExperimentalShaderModels, NULL, NULL)))
|
||||
throw runtime_error("failed to enable experimental shader models");
|
||||
}
|
||||
|
||||
D3D12CreateDevice = (PFN_D3D12CREATEDEVICE)GetProcAddress(hD3D12Mod, "D3D12CreateDevice");
|
||||
if (!D3D12CreateDevice)
|
||||
throw runtime_error("failed to load D3D12CreateDevice from D3D12.DLL");
|
||||
|
||||
ID3D12Device *dev;
|
||||
if (FAILED(D3D12CreateDevice(adapter, D3D_FEATURE_LEVEL_12_0,
|
||||
__uuidof(ID3D12Device), (void **)& dev)))
|
||||
throw runtime_error("D3D12CreateDevice failed");
|
||||
|
||||
return dev;
|
||||
}
|
||||
|
||||
ComPtr<ID3D12RootSignature>
|
||||
ComputeTest::create_root_signature(const ComputeTest::Resources &resources)
|
||||
{
|
||||
D3D12_ROOT_PARAMETER1 root_param;
|
||||
root_param.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
|
||||
root_param.DescriptorTable.NumDescriptorRanges = resources.ranges.size();
|
||||
root_param.DescriptorTable.pDescriptorRanges = resources.ranges.data();
|
||||
root_param.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
||||
|
||||
D3D12_ROOT_SIGNATURE_DESC1 root_sig_desc;
|
||||
root_sig_desc.NumParameters = 1;
|
||||
root_sig_desc.pParameters = &root_param;
|
||||
root_sig_desc.NumStaticSamplers = 0;
|
||||
root_sig_desc.pStaticSamplers = NULL;
|
||||
root_sig_desc.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE;
|
||||
|
||||
D3D12_VERSIONED_ROOT_SIGNATURE_DESC versioned_desc;
|
||||
versioned_desc.Version = D3D_ROOT_SIGNATURE_VERSION_1_1;
|
||||
versioned_desc.Desc_1_1 = root_sig_desc;
|
||||
|
||||
ID3DBlob *sig, *error;
|
||||
if (FAILED(D3D12SerializeVersionedRootSignature(&versioned_desc,
|
||||
&sig, &error)))
|
||||
throw runtime_error("D3D12SerializeVersionedRootSignature failed");
|
||||
|
||||
ComPtr<ID3D12RootSignature> ret;
|
||||
if (FAILED(dev->CreateRootSignature(0,
|
||||
sig->GetBufferPointer(),
|
||||
sig->GetBufferSize(),
|
||||
__uuidof(ret),
|
||||
(void **)& ret)))
|
||||
throw runtime_error("CreateRootSignature failed");
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
ComPtr<ID3D12PipelineState>
|
||||
ComputeTest::create_pipeline_state(ComPtr<ID3D12RootSignature> &root_sig,
|
||||
const struct clc_dxil_object &dxil)
|
||||
{
|
||||
D3D12_COMPUTE_PIPELINE_STATE_DESC pipeline_desc = { root_sig.Get() };
|
||||
pipeline_desc.CS.pShaderBytecode = dxil.binary.data;
|
||||
pipeline_desc.CS.BytecodeLength = dxil.binary.size;
|
||||
|
||||
ComPtr<ID3D12PipelineState> pipeline_state;
|
||||
if (FAILED(dev->CreateComputePipelineState(&pipeline_desc,
|
||||
__uuidof(pipeline_state),
|
||||
(void **)& pipeline_state)))
|
||||
throw runtime_error("Failed to create pipeline state");
|
||||
return pipeline_state;
|
||||
}
|
||||
|
||||
ComPtr<ID3D12Resource>
|
||||
ComputeTest::create_buffer(int size, D3D12_HEAP_TYPE heap_type)
|
||||
{
|
||||
D3D12_RESOURCE_DESC desc;
|
||||
desc.Format = DXGI_FORMAT_UNKNOWN;
|
||||
desc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT;
|
||||
desc.Width = size;
|
||||
desc.Height = 1;
|
||||
desc.DepthOrArraySize = 1;
|
||||
desc.MipLevels = 1;
|
||||
desc.SampleDesc.Count = 1;
|
||||
desc.SampleDesc.Quality = 0;
|
||||
desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
|
||||
desc.Flags = heap_type == D3D12_HEAP_TYPE_DEFAULT ? D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS : D3D12_RESOURCE_FLAG_NONE;
|
||||
desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
|
||||
|
||||
D3D12_HEAP_PROPERTIES heap_pris = dev->GetCustomHeapProperties(0, heap_type);
|
||||
|
||||
D3D12_RESOURCE_STATES initial_state = D3D12_RESOURCE_STATE_COMMON;
|
||||
switch (heap_type) {
|
||||
case D3D12_HEAP_TYPE_UPLOAD:
|
||||
initial_state = D3D12_RESOURCE_STATE_GENERIC_READ;
|
||||
break;
|
||||
|
||||
case D3D12_HEAP_TYPE_READBACK:
|
||||
initial_state = D3D12_RESOURCE_STATE_COPY_DEST;
|
||||
break;
|
||||
}
|
||||
|
||||
ComPtr<ID3D12Resource> res;
|
||||
if (FAILED(dev->CreateCommittedResource(&heap_pris,
|
||||
D3D12_HEAP_FLAG_NONE, &desc, initial_state,
|
||||
NULL, __uuidof(ID3D12Resource), (void **)&res)))
|
||||
throw runtime_error("CreateCommittedResource failed");
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
ComPtr<ID3D12Resource>
|
||||
ComputeTest::create_upload_buffer_with_data(const void *data, size_t size)
|
||||
{
|
||||
auto upload_res = create_buffer(size, D3D12_HEAP_TYPE_UPLOAD);
|
||||
|
||||
void *ptr = NULL;
|
||||
D3D12_RANGE res_range = { 0, (SIZE_T)size };
|
||||
if (FAILED(upload_res->Map(0, &res_range, (void **)&ptr)))
|
||||
throw runtime_error("Failed to map upload-buffer");
|
||||
assert(ptr);
|
||||
memcpy(ptr, data, size);
|
||||
upload_res->Unmap(0, &res_range);
|
||||
return upload_res;
|
||||
}
|
||||
|
||||
ComPtr<ID3D12Resource>
|
||||
ComputeTest::create_sized_buffer_with_data(size_t buffer_size,
|
||||
const void *data,
|
||||
size_t data_size)
|
||||
{
|
||||
auto upload_res = create_upload_buffer_with_data(data, data_size);
|
||||
|
||||
auto res = create_buffer(buffer_size, D3D12_HEAP_TYPE_DEFAULT);
|
||||
resource_barrier(res, D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_COPY_DEST);
|
||||
cmdlist->CopyBufferRegion(res.Get(), 0, upload_res.Get(), 0, data_size);
|
||||
resource_barrier(res, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_COMMON);
|
||||
execute_cmdlist();
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
void
|
||||
ComputeTest::get_buffer_data(ComPtr<ID3D12Resource> res,
|
||||
void *buf, size_t size)
|
||||
{
|
||||
auto readback_res = create_buffer(align(size, 4), D3D12_HEAP_TYPE_READBACK);
|
||||
resource_barrier(res, D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_COPY_SOURCE);
|
||||
cmdlist->CopyResource(readback_res.Get(), res.Get());
|
||||
resource_barrier(res, D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_COMMON);
|
||||
execute_cmdlist();
|
||||
|
||||
void *ptr = NULL;
|
||||
D3D12_RANGE res_range = { 0, size };
|
||||
if (FAILED(readback_res->Map(0, &res_range, &ptr)))
|
||||
throw runtime_error("Failed to map readback-buffer");
|
||||
|
||||
memcpy(buf, ptr, size);
|
||||
|
||||
D3D12_RANGE empty_range = { 0, 0 };
|
||||
readback_res->Unmap(0, &empty_range);
|
||||
}
|
||||
|
||||
void
|
||||
ComputeTest::resource_barrier(ComPtr<ID3D12Resource> &res,
|
||||
D3D12_RESOURCE_STATES state_before,
|
||||
D3D12_RESOURCE_STATES state_after)
|
||||
{
|
||||
D3D12_RESOURCE_BARRIER barrier;
|
||||
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
|
||||
barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
|
||||
barrier.Transition.pResource = res.Get();
|
||||
barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
|
||||
barrier.Transition.StateBefore = state_before;
|
||||
barrier.Transition.StateAfter = state_after;
|
||||
cmdlist->ResourceBarrier(1, &barrier);
|
||||
}
|
||||
|
||||
void
|
||||
ComputeTest::execute_cmdlist()
|
||||
{
|
||||
if (FAILED(cmdlist->Close()))
|
||||
throw runtime_error("Closing ID3D12GraphicsCommandList failed");
|
||||
|
||||
ID3D12CommandList *cmdlists[] = { cmdlist };
|
||||
cmdqueue->ExecuteCommandLists(1, cmdlists);
|
||||
cmdqueue_fence->SetEventOnCompletion(fence_value, event);
|
||||
cmdqueue->Signal(cmdqueue_fence, fence_value);
|
||||
fence_value++;
|
||||
WaitForSingleObject(event, INFINITE);
|
||||
|
||||
if (FAILED(cmdalloc->Reset()))
|
||||
throw runtime_error("resetting ID3D12CommandAllocator failed");
|
||||
|
||||
if (FAILED(cmdlist->Reset(cmdalloc, NULL)))
|
||||
throw runtime_error("resetting ID3D12GraphicsCommandList failed");
|
||||
}
|
||||
|
||||
void
|
||||
ComputeTest::create_uav_buffer(ComPtr<ID3D12Resource> res,
|
||||
size_t width, size_t byte_stride,
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle)
|
||||
{
|
||||
D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc;
|
||||
uav_desc.Format = DXGI_FORMAT_R32_TYPELESS;
|
||||
uav_desc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER;
|
||||
uav_desc.Buffer.FirstElement = 0;
|
||||
uav_desc.Buffer.NumElements = DIV_ROUND_UP(width * byte_stride, 4);
|
||||
uav_desc.Buffer.StructureByteStride = 0;
|
||||
uav_desc.Buffer.CounterOffsetInBytes = 0;
|
||||
uav_desc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW;
|
||||
|
||||
dev->CreateUnorderedAccessView(res.Get(), NULL, &uav_desc, cpu_handle);
|
||||
}
|
||||
|
||||
void
|
||||
ComputeTest::create_cbv(ComPtr<ID3D12Resource> res, size_t size,
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle)
|
||||
{
|
||||
D3D12_CONSTANT_BUFFER_VIEW_DESC cbv_desc;
|
||||
cbv_desc.BufferLocation = res ? res->GetGPUVirtualAddress() : 0;
|
||||
cbv_desc.SizeInBytes = size;
|
||||
|
||||
dev->CreateConstantBufferView(&cbv_desc, cpu_handle);
|
||||
}
|
||||
|
||||
ComPtr<ID3D12Resource>
|
||||
ComputeTest::add_uav_resource(ComputeTest::Resources &resources,
|
||||
unsigned spaceid, unsigned resid,
|
||||
const void *data, size_t num_elems,
|
||||
size_t elem_size)
|
||||
{
|
||||
size_t size = align(elem_size * num_elems, 4);
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE handle;
|
||||
ComPtr<ID3D12Resource> res;
|
||||
handle = uav_heap->GetCPUDescriptorHandleForHeapStart();
|
||||
handle = offset_cpu_handle(handle, resources.descs.size() * uav_heap_incr);
|
||||
|
||||
if (size) {
|
||||
if (data)
|
||||
res = create_buffer_with_data(data, size);
|
||||
else
|
||||
res = create_buffer(size, D3D12_HEAP_TYPE_DEFAULT);
|
||||
|
||||
resource_barrier(res, D3D12_RESOURCE_STATE_COMMON,
|
||||
D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
|
||||
}
|
||||
create_uav_buffer(res, num_elems, elem_size, handle);
|
||||
resources.add(res, D3D12_DESCRIPTOR_RANGE_TYPE_UAV, spaceid, resid);
|
||||
return res;
|
||||
}
|
||||
|
||||
ComPtr<ID3D12Resource>
|
||||
ComputeTest::add_cbv_resource(ComputeTest::Resources &resources,
|
||||
unsigned spaceid, unsigned resid,
|
||||
const void *data, size_t size)
|
||||
{
|
||||
unsigned aligned_size = align(size, 256);
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE handle;
|
||||
ComPtr<ID3D12Resource> res;
|
||||
handle = uav_heap->GetCPUDescriptorHandleForHeapStart();
|
||||
handle = offset_cpu_handle(handle, resources.descs.size() * uav_heap_incr);
|
||||
|
||||
if (size) {
|
||||
assert(data);
|
||||
res = create_sized_buffer_with_data(aligned_size, data, size);
|
||||
}
|
||||
create_cbv(res, aligned_size, handle);
|
||||
resources.add(res, D3D12_DESCRIPTOR_RANGE_TYPE_CBV, spaceid, resid);
|
||||
return res;
|
||||
}
|
||||
|
||||
void
|
||||
ComputeTest::run_shader_with_raw_args(Shader shader,
|
||||
const CompileArgs &compile_args,
|
||||
const std::vector<RawShaderArg *> &args)
|
||||
{
|
||||
if (args.size() < 1)
|
||||
throw runtime_error("no inputs");
|
||||
|
||||
static HMODULE hD3D12Mod = LoadLibrary("D3D12.DLL");
|
||||
if (!hD3D12Mod)
|
||||
throw runtime_error("Failed to load D3D12.DLL");
|
||||
|
||||
D3D12SerializeVersionedRootSignature = (PFN_D3D12_SERIALIZE_VERSIONED_ROOT_SIGNATURE)GetProcAddress(hD3D12Mod, "D3D12SerializeVersionedRootSignature");
|
||||
|
||||
if (args.size() != shader.dxil->kernel->num_args)
|
||||
throw runtime_error("incorrect number of inputs");
|
||||
|
||||
struct clc_runtime_kernel_conf conf = { 0 };
|
||||
|
||||
// Older WARP and some hardware doesn't support int64, so for these tests, unconditionally lower away int64
|
||||
// A more complex runtime can be smarter about detecting when this needs to be done
|
||||
conf.lower_bit_size = 64;
|
||||
|
||||
if (!shader.dxil->metadata.local_size[0])
|
||||
conf.local_size[0] = compile_args.x;
|
||||
else
|
||||
conf.local_size[0] = shader.dxil->metadata.local_size[0];
|
||||
|
||||
if (!shader.dxil->metadata.local_size[1])
|
||||
conf.local_size[1] = compile_args.y;
|
||||
else
|
||||
conf.local_size[1] = shader.dxil->metadata.local_size[1];
|
||||
|
||||
if (!shader.dxil->metadata.local_size[2])
|
||||
conf.local_size[2] = compile_args.z;
|
||||
else
|
||||
conf.local_size[2] = shader.dxil->metadata.local_size[2];
|
||||
|
||||
if (compile_args.x % conf.local_size[0] ||
|
||||
compile_args.y % conf.local_size[1] ||
|
||||
compile_args.z % conf.local_size[2])
|
||||
throw runtime_error("invalid global size must be a multiple of local size");
|
||||
|
||||
std::vector<struct clc_runtime_arg_info> argsinfo(args.size());
|
||||
|
||||
conf.args = argsinfo.data();
|
||||
conf.support_global_work_id_offsets =
|
||||
compile_args.work_props.global_offset_x != 0 ||
|
||||
compile_args.work_props.global_offset_y != 0 ||
|
||||
compile_args.work_props.global_offset_z != 0;
|
||||
conf.support_work_group_id_offsets =
|
||||
compile_args.work_props.group_id_offset_x != 0 ||
|
||||
compile_args.work_props.group_id_offset_y != 0 ||
|
||||
compile_args.work_props.group_id_offset_z != 0;
|
||||
|
||||
for (unsigned i = 0; i < shader.dxil->kernel->num_args; ++i) {
|
||||
RawShaderArg *arg = args[i];
|
||||
size_t size = arg->get_elem_size() * arg->get_num_elems();
|
||||
|
||||
switch (shader.dxil->kernel->args[i].address_qualifier) {
|
||||
case CLC_KERNEL_ARG_ADDRESS_LOCAL:
|
||||
argsinfo[i].localptr.size = size;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
configure(shader, &conf);
|
||||
validate(shader);
|
||||
|
||||
std::shared_ptr<struct clc_dxil_object> &dxil = shader.dxil;
|
||||
|
||||
std::vector<uint8_t> argsbuf(dxil->metadata.kernel_inputs_buf_size);
|
||||
std::vector<ComPtr<ID3D12Resource>> argres(shader.dxil->kernel->num_args);
|
||||
clc_work_properties_data work_props = compile_args.work_props;
|
||||
if (!conf.support_work_group_id_offsets) {
|
||||
work_props.group_count_total_x = compile_args.x / conf.local_size[0];
|
||||
work_props.group_count_total_y = compile_args.y / conf.local_size[1];
|
||||
work_props.group_count_total_z = compile_args.z / conf.local_size[2];
|
||||
}
|
||||
if (work_props.work_dim == 0)
|
||||
work_props.work_dim = 3;
|
||||
Resources resources;
|
||||
|
||||
for (unsigned i = 0; i < dxil->kernel->num_args; ++i) {
|
||||
RawShaderArg *arg = args[i];
|
||||
size_t size = arg->get_elem_size() * arg->get_num_elems();
|
||||
void *slot = argsbuf.data() + dxil->metadata.args[i].offset;
|
||||
|
||||
switch (dxil->kernel->args[i].address_qualifier) {
|
||||
case CLC_KERNEL_ARG_ADDRESS_CONSTANT:
|
||||
case CLC_KERNEL_ARG_ADDRESS_GLOBAL: {
|
||||
assert(dxil->metadata.args[i].size == sizeof(uint64_t));
|
||||
uint64_t *ptr_slot = (uint64_t *)slot;
|
||||
if (arg->get_data())
|
||||
*ptr_slot = (uint64_t)dxil->metadata.args[i].globconstptr.buf_id << 32;
|
||||
else
|
||||
*ptr_slot = ~0ull;
|
||||
break;
|
||||
}
|
||||
case CLC_KERNEL_ARG_ADDRESS_LOCAL: {
|
||||
assert(dxil->metadata.args[i].size == sizeof(uint64_t));
|
||||
uint64_t *ptr_slot = (uint64_t *)slot;
|
||||
*ptr_slot = dxil->metadata.args[i].localptr.sharedmem_offset;
|
||||
break;
|
||||
}
|
||||
case CLC_KERNEL_ARG_ADDRESS_PRIVATE: {
|
||||
assert(size == dxil->metadata.args[i].size);
|
||||
memcpy(slot, arg->get_data(), size);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < dxil->kernel->num_args; ++i) {
|
||||
RawShaderArg *arg = args[i];
|
||||
|
||||
if (dxil->kernel->args[i].address_qualifier == CLC_KERNEL_ARG_ADDRESS_GLOBAL ||
|
||||
dxil->kernel->args[i].address_qualifier == CLC_KERNEL_ARG_ADDRESS_CONSTANT) {
|
||||
argres[i] = add_uav_resource(resources, 0,
|
||||
dxil->metadata.args[i].globconstptr.buf_id,
|
||||
arg->get_data(), arg->get_num_elems(),
|
||||
arg->get_elem_size());
|
||||
}
|
||||
}
|
||||
|
||||
if (dxil->metadata.printf_uav_id > 0)
|
||||
add_uav_resource(resources, 0, dxil->metadata.printf_uav_id, NULL, 1024 * 1024 / 4, 4);
|
||||
|
||||
for (unsigned i = 0; i < dxil->metadata.num_consts; ++i)
|
||||
add_uav_resource(resources, 0, dxil->metadata.consts[i].uav_id,
|
||||
dxil->metadata.consts[i].data,
|
||||
dxil->metadata.consts[i].size / 4, 4);
|
||||
|
||||
if (argsbuf.size())
|
||||
add_cbv_resource(resources, 0, dxil->metadata.kernel_inputs_cbv_id,
|
||||
argsbuf.data(), argsbuf.size());
|
||||
|
||||
add_cbv_resource(resources, 0, dxil->metadata.work_properties_cbv_id,
|
||||
&work_props, sizeof(work_props));
|
||||
|
||||
auto root_sig = create_root_signature(resources);
|
||||
auto pipeline_state = create_pipeline_state(root_sig, *dxil);
|
||||
|
||||
cmdlist->SetDescriptorHeaps(1, &uav_heap);
|
||||
cmdlist->SetComputeRootSignature(root_sig.Get());
|
||||
cmdlist->SetComputeRootDescriptorTable(0, uav_heap->GetGPUDescriptorHandleForHeapStart());
|
||||
cmdlist->SetPipelineState(pipeline_state.Get());
|
||||
|
||||
cmdlist->Dispatch(compile_args.x / conf.local_size[0],
|
||||
compile_args.y / conf.local_size[1],
|
||||
compile_args.z / conf.local_size[2]);
|
||||
|
||||
for (auto &range : resources.ranges) {
|
||||
if (range.RangeType == D3D12_DESCRIPTOR_RANGE_TYPE_UAV) {
|
||||
for (unsigned i = range.OffsetInDescriptorsFromTableStart;
|
||||
i < range.NumDescriptors; i++) {
|
||||
if (!resources.descs[i].Get())
|
||||
continue;
|
||||
|
||||
resource_barrier(resources.descs[i],
|
||||
D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
|
||||
D3D12_RESOURCE_STATE_COMMON);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
execute_cmdlist();
|
||||
|
||||
for (unsigned i = 0; i < args.size(); i++) {
|
||||
if (!(args[i]->get_direction() & SHADER_ARG_OUTPUT))
|
||||
continue;
|
||||
|
||||
assert(dxil->kernel->args[i].address_qualifier == CLC_KERNEL_ARG_ADDRESS_GLOBAL);
|
||||
get_buffer_data(argres[i], args[i]->get_data(),
|
||||
args[i]->get_elem_size() * args[i]->get_num_elems());
|
||||
}
|
||||
|
||||
ComPtr<ID3D12InfoQueue> info_queue;
|
||||
dev->QueryInterface(info_queue.ReleaseAndGetAddressOf());
|
||||
if (info_queue)
|
||||
{
|
||||
EXPECT_EQ(0, info_queue->GetNumStoredMessages());
|
||||
for (unsigned i = 0; i < info_queue->GetNumStoredMessages(); ++i) {
|
||||
SIZE_T message_size = 0;
|
||||
info_queue->GetMessageA(i, nullptr, &message_size);
|
||||
D3D12_MESSAGE* message = (D3D12_MESSAGE*)malloc(message_size);
|
||||
info_queue->GetMessageA(i, message, &message_size);
|
||||
FAIL() << message->pDescription;
|
||||
free(message);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
ComputeTest::SetUp()
|
||||
{
|
||||
static struct clc_context *compiler_ctx_g = nullptr;
|
||||
|
||||
if (!compiler_ctx_g) {
|
||||
clc_context_options options = { };
|
||||
options.optimize = (debug_get_option_debug_compute() & COMPUTE_DEBUG_OPTIMIZE_LIBCLC) != 0;
|
||||
|
||||
compiler_ctx_g = clc_context_new(&logger, &options);
|
||||
if (!compiler_ctx_g)
|
||||
throw runtime_error("failed to create CLC compiler context");
|
||||
|
||||
if (debug_get_option_debug_compute() & COMPUTE_DEBUG_SERIALIZE_LIBCLC) {
|
||||
void *serialized = nullptr;
|
||||
size_t serialized_size = 0;
|
||||
clc_context_serialize(compiler_ctx_g, &serialized, &serialized_size);
|
||||
if (!serialized)
|
||||
throw runtime_error("failed to serialize CLC compiler context");
|
||||
|
||||
clc_free_context(compiler_ctx_g);
|
||||
compiler_ctx_g = nullptr;
|
||||
|
||||
compiler_ctx_g = clc_context_deserialize(serialized, serialized_size);
|
||||
if (!compiler_ctx_g)
|
||||
throw runtime_error("failed to deserialize CLC compiler context");
|
||||
|
||||
clc_context_free_serialized(serialized);
|
||||
}
|
||||
}
|
||||
compiler_ctx = compiler_ctx_g;
|
||||
|
||||
enable_d3d12_debug_layer();
|
||||
|
||||
factory = get_dxgi_factory();
|
||||
if (!factory)
|
||||
throw runtime_error("failed to create DXGI factory");
|
||||
|
||||
adapter = choose_adapter(factory);
|
||||
if (!adapter)
|
||||
throw runtime_error("failed to choose adapter");
|
||||
|
||||
dev = create_device(adapter);
|
||||
if (!dev)
|
||||
throw runtime_error("failed to create device");
|
||||
|
||||
if (FAILED(dev->CreateFence(0, D3D12_FENCE_FLAG_NONE,
|
||||
__uuidof(cmdqueue_fence),
|
||||
(void **)&cmdqueue_fence)))
|
||||
throw runtime_error("failed to create fence\n");
|
||||
|
||||
D3D12_COMMAND_QUEUE_DESC queue_desc;
|
||||
queue_desc.Type = D3D12_COMMAND_LIST_TYPE_COMPUTE;
|
||||
queue_desc.Priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL;
|
||||
queue_desc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE;
|
||||
queue_desc.NodeMask = 0;
|
||||
if (FAILED(dev->CreateCommandQueue(&queue_desc,
|
||||
__uuidof(cmdqueue),
|
||||
(void **)&cmdqueue)))
|
||||
throw runtime_error("failed to create command queue");
|
||||
|
||||
if (FAILED(dev->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_COMPUTE,
|
||||
__uuidof(cmdalloc), (void **)&cmdalloc)))
|
||||
throw runtime_error("failed to create command allocator");
|
||||
|
||||
if (FAILED(dev->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_COMPUTE,
|
||||
cmdalloc, NULL, __uuidof(cmdlist), (void **)&cmdlist)))
|
||||
throw runtime_error("failed to create command list");
|
||||
|
||||
D3D12_DESCRIPTOR_HEAP_DESC heap_desc;
|
||||
heap_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV;
|
||||
heap_desc.NumDescriptors = 1000;
|
||||
heap_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE;
|
||||
heap_desc.NodeMask = 0;
|
||||
if (FAILED(dev->CreateDescriptorHeap(&heap_desc,
|
||||
__uuidof(uav_heap), (void **)&uav_heap)))
|
||||
throw runtime_error("failed to create descriptor heap");
|
||||
|
||||
uav_heap_incr = dev->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
|
||||
|
||||
event = CreateEvent(NULL, FALSE, FALSE, NULL);
|
||||
if (!event)
|
||||
throw runtime_error("Failed to create event");
|
||||
fence_value = 1;
|
||||
}
|
||||
|
||||
void
|
||||
ComputeTest::TearDown()
|
||||
{
|
||||
CloseHandle(event);
|
||||
|
||||
uav_heap->Release();
|
||||
cmdlist->Release();
|
||||
cmdalloc->Release();
|
||||
cmdqueue->Release();
|
||||
cmdqueue_fence->Release();
|
||||
dev->Release();
|
||||
adapter->Release();
|
||||
factory->Release();
|
||||
}
|
||||
|
||||
PFN_D3D12_SERIALIZE_VERSIONED_ROOT_SIGNATURE ComputeTest::D3D12SerializeVersionedRootSignature;
|
||||
|
||||
bool
|
||||
validate_module(const struct clc_dxil_object &dxil)
|
||||
{
|
||||
static HMODULE hmod = LoadLibrary("DXIL.DLL");
|
||||
if (!hmod) {
|
||||
/* Enabling experimental shaders allows us to run unsigned shader code,
|
||||
* such as when under the debugger where we can't run the validator. */
|
||||
if (debug_get_option_debug_compute() & COMPUTE_DEBUG_EXPERIMENTAL_SHADERS)
|
||||
return true;
|
||||
else
|
||||
throw runtime_error("failed to load DXIL.DLL");
|
||||
}
|
||||
|
||||
DxcCreateInstanceProc pfnDxcCreateInstance =
|
||||
(DxcCreateInstanceProc)GetProcAddress(hmod, "DxcCreateInstance");
|
||||
if (!pfnDxcCreateInstance)
|
||||
throw runtime_error("failed to load DxcCreateInstance");
|
||||
|
||||
struct shader_blob : public IDxcBlob {
|
||||
shader_blob(void *data, size_t size) : data(data), size(size) {}
|
||||
LPVOID STDMETHODCALLTYPE GetBufferPointer() override { return data; }
|
||||
SIZE_T STDMETHODCALLTYPE GetBufferSize() override { return size; }
|
||||
HRESULT STDMETHODCALLTYPE QueryInterface(REFIID, void **) override { return E_NOINTERFACE; }
|
||||
ULONG STDMETHODCALLTYPE AddRef() override { return 1; }
|
||||
ULONG STDMETHODCALLTYPE Release() override { return 0; }
|
||||
void *data;
|
||||
size_t size;
|
||||
} blob(dxil.binary.data, dxil.binary.size);
|
||||
|
||||
IDxcValidator *validator;
|
||||
if (FAILED(pfnDxcCreateInstance(CLSID_DxcValidator, __uuidof(IDxcValidator),
|
||||
(void **)&validator)))
|
||||
throw runtime_error("failed to create IDxcValidator");
|
||||
|
||||
IDxcOperationResult *result;
|
||||
if (FAILED(validator->Validate(&blob, DxcValidatorFlags_InPlaceEdit,
|
||||
&result)))
|
||||
throw runtime_error("Validate failed");
|
||||
|
||||
HRESULT hr;
|
||||
if (FAILED(result->GetStatus(&hr)) ||
|
||||
FAILED(hr)) {
|
||||
IDxcBlobEncoding *message;
|
||||
result->GetErrorBuffer(&message);
|
||||
fprintf(stderr, "D3D12: validation failed: %*s\n",
|
||||
(int)message->GetBufferSize(),
|
||||
(char *)message->GetBufferPointer());
|
||||
message->Release();
|
||||
validator->Release();
|
||||
result->Release();
|
||||
return false;
|
||||
}
|
||||
|
||||
validator->Release();
|
||||
result->Release();
|
||||
return true;
|
||||
}
|
||||
|
||||
static void
|
||||
dump_blob(const char *path, const struct clc_dxil_object &dxil)
|
||||
{
|
||||
FILE *fp = fopen(path, "wb");
|
||||
if (fp) {
|
||||
fwrite(dxil.binary.data, 1, dxil.binary.size, fp);
|
||||
fclose(fp);
|
||||
printf("D3D12: wrote '%s'...\n", path);
|
||||
}
|
||||
}
|
||||
|
||||
ComputeTest::Shader
|
||||
ComputeTest::compile(const std::vector<const char *> &sources,
|
||||
const std::vector<const char *> &compile_args,
|
||||
bool create_library)
|
||||
{
|
||||
struct clc_compile_args args = { 0 };
|
||||
args.args = compile_args.data();
|
||||
args.num_args = (unsigned)compile_args.size();
|
||||
struct clc_dxil_object *dxil;
|
||||
ComputeTest::Shader shader;
|
||||
|
||||
std::vector<Shader> shaders;
|
||||
|
||||
args.source.name = "obj.cl";
|
||||
|
||||
for (unsigned i = 0; i < sources.size(); i++) {
|
||||
args.source.value = sources[i];
|
||||
|
||||
auto obj = clc_compile(compiler_ctx, &args, &logger);
|
||||
if (!obj)
|
||||
throw runtime_error("failed to compile object!");
|
||||
|
||||
Shader shader;
|
||||
shader.obj = std::shared_ptr<struct clc_object>(obj, clc_free_object);
|
||||
shaders.push_back(shader);
|
||||
}
|
||||
|
||||
if (shaders.size() == 1 && create_library)
|
||||
return shaders[0];
|
||||
|
||||
return link(shaders, create_library);
|
||||
}
|
||||
|
||||
ComputeTest::Shader
|
||||
ComputeTest::link(const std::vector<Shader> &sources,
|
||||
bool create_library)
|
||||
{
|
||||
std::vector<const clc_object*> objs;
|
||||
for (auto& source : sources)
|
||||
objs.push_back(&*source.obj);
|
||||
|
||||
struct clc_linker_args link_args = {};
|
||||
link_args.in_objs = objs.data();
|
||||
link_args.num_in_objs = (unsigned)objs.size();
|
||||
link_args.create_library = create_library;
|
||||
struct clc_object *obj = clc_link(compiler_ctx,
|
||||
&link_args,
|
||||
&logger);
|
||||
if (!obj)
|
||||
throw runtime_error("failed to link objects!");
|
||||
|
||||
ComputeTest::Shader shader;
|
||||
shader.obj = std::shared_ptr<struct clc_object>(obj, clc_free_object);
|
||||
if (!link_args.create_library)
|
||||
configure(shader, NULL);
|
||||
|
||||
return shader;
|
||||
}
|
||||
|
||||
void
|
||||
ComputeTest::configure(Shader &shader,
|
||||
const struct clc_runtime_kernel_conf *conf)
|
||||
{
|
||||
struct clc_dxil_object *dxil;
|
||||
|
||||
dxil = clc_to_dxil(compiler_ctx, shader.obj.get(), "main_test", conf, &logger);
|
||||
if (!dxil)
|
||||
throw runtime_error("failed to compile kernel!");
|
||||
|
||||
shader.dxil = std::shared_ptr<struct clc_dxil_object>(dxil, clc_free_dxil_object);
|
||||
}
|
||||
|
||||
void
|
||||
ComputeTest::validate(ComputeTest::Shader &shader)
|
||||
{
|
||||
dump_blob("unsigned.cso", *shader.dxil);
|
||||
if (!validate_module(*shader.dxil))
|
||||
throw runtime_error("failed to validate module!");
|
||||
|
||||
dump_blob("signed.cso", *shader.dxil);
|
||||
}
|
324
src/microsoft/clc/compute_test.h
Normal file
324
src/microsoft/clc/compute_test.h
Normal file
@ -0,0 +1,324 @@
|
||||
/*
|
||||
* Copyright © Microsoft Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include <stdexcept>
|
||||
|
||||
#include <d3d12.h>
|
||||
#include <dxgi1_4.h>
|
||||
#include <gtest/gtest.h>
|
||||
#include <wrl.h>
|
||||
|
||||
#include "clc_compiler.h"
|
||||
|
||||
using std::runtime_error;
|
||||
using Microsoft::WRL::ComPtr;
|
||||
|
||||
inline D3D12_CPU_DESCRIPTOR_HANDLE
|
||||
offset_cpu_handle(D3D12_CPU_DESCRIPTOR_HANDLE handle, UINT offset)
|
||||
{
|
||||
handle.ptr += offset;
|
||||
return handle;
|
||||
}
|
||||
|
||||
inline size_t
|
||||
align(size_t value, unsigned alignment)
|
||||
{
|
||||
assert(alignment > 0);
|
||||
return ((value + (alignment - 1)) / alignment) * alignment;
|
||||
}
|
||||
|
||||
class ComputeTest : public ::testing::Test {
|
||||
protected:
|
||||
struct Shader {
|
||||
std::shared_ptr<struct clc_object> obj;
|
||||
std::shared_ptr<struct clc_dxil_object> dxil;
|
||||
};
|
||||
|
||||
static void
|
||||
enable_d3d12_debug_layer();
|
||||
|
||||
static IDXGIFactory4 *
|
||||
get_dxgi_factory();
|
||||
|
||||
static IDXGIAdapter1 *
|
||||
choose_adapter(IDXGIFactory4 *factory);
|
||||
|
||||
static ID3D12Device *
|
||||
create_device(IDXGIAdapter1 *adapter);
|
||||
|
||||
struct Resources {
|
||||
void add(ComPtr<ID3D12Resource> res,
|
||||
D3D12_DESCRIPTOR_RANGE_TYPE type,
|
||||
unsigned spaceid,
|
||||
unsigned resid)
|
||||
{
|
||||
descs.push_back(res);
|
||||
|
||||
if(!ranges.empty() &&
|
||||
ranges.back().RangeType == type &&
|
||||
ranges.back().RegisterSpace == spaceid &&
|
||||
ranges.back().BaseShaderRegister + ranges.back().NumDescriptors == resid) {
|
||||
ranges.back().NumDescriptors++;
|
||||
return;
|
||||
}
|
||||
|
||||
D3D12_DESCRIPTOR_RANGE1 range;
|
||||
|
||||
range.RangeType = type;
|
||||
range.NumDescriptors = 1;
|
||||
range.BaseShaderRegister = resid;
|
||||
range.RegisterSpace = spaceid;
|
||||
range.OffsetInDescriptorsFromTableStart = descs.size() - 1;
|
||||
range.Flags = D3D12_DESCRIPTOR_RANGE_FLAG_DESCRIPTORS_STATIC_KEEPING_BUFFER_BOUNDS_CHECKS;
|
||||
ranges.push_back(range);
|
||||
}
|
||||
|
||||
std::vector<D3D12_DESCRIPTOR_RANGE1> ranges;
|
||||
std::vector<ComPtr<ID3D12Resource>> descs;
|
||||
};
|
||||
|
||||
ComPtr<ID3D12RootSignature>
|
||||
create_root_signature(const Resources &resources);
|
||||
|
||||
ComPtr<ID3D12PipelineState>
|
||||
create_pipeline_state(ComPtr<ID3D12RootSignature> &root_sig,
|
||||
const struct clc_dxil_object &dxil);
|
||||
|
||||
ComPtr<ID3D12Resource>
|
||||
create_buffer(int size, D3D12_HEAP_TYPE heap_type);
|
||||
|
||||
ComPtr<ID3D12Resource>
|
||||
create_upload_buffer_with_data(const void *data, size_t size);
|
||||
|
||||
ComPtr<ID3D12Resource>
|
||||
create_sized_buffer_with_data(size_t buffer_size, const void *data,
|
||||
size_t data_size);
|
||||
|
||||
ComPtr<ID3D12Resource>
|
||||
create_buffer_with_data(const void *data, size_t size)
|
||||
{
|
||||
return create_sized_buffer_with_data(size, data, size);
|
||||
}
|
||||
|
||||
void
|
||||
get_buffer_data(ComPtr<ID3D12Resource> res,
|
||||
void *buf, size_t size);
|
||||
|
||||
void
|
||||
resource_barrier(ComPtr<ID3D12Resource> &res,
|
||||
D3D12_RESOURCE_STATES state_before,
|
||||
D3D12_RESOURCE_STATES state_after);
|
||||
|
||||
void
|
||||
execute_cmdlist();
|
||||
|
||||
void
|
||||
create_uav_buffer(ComPtr<ID3D12Resource> res,
|
||||
size_t width, size_t byte_stride,
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle);
|
||||
|
||||
void create_cbv(ComPtr<ID3D12Resource> res, size_t size,
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle);
|
||||
|
||||
ComPtr<ID3D12Resource>
|
||||
add_uav_resource(Resources &resources, unsigned spaceid, unsigned resid,
|
||||
const void *data = NULL, size_t num_elems = 0,
|
||||
size_t elem_size = 0);
|
||||
|
||||
ComPtr<ID3D12Resource>
|
||||
add_cbv_resource(Resources &resources, unsigned spaceid, unsigned resid,
|
||||
const void *data, size_t size);
|
||||
|
||||
void
|
||||
SetUp() override;
|
||||
|
||||
void
|
||||
TearDown() override;
|
||||
|
||||
Shader
|
||||
compile(const std::vector<const char *> &sources,
|
||||
const std::vector<const char *> &compile_args = {},
|
||||
bool create_library = false);
|
||||
|
||||
Shader
|
||||
link(const std::vector<Shader> &sources,
|
||||
bool create_library = false);
|
||||
|
||||
void
|
||||
configure(Shader &shader,
|
||||
const struct clc_runtime_kernel_conf *conf);
|
||||
|
||||
void
|
||||
validate(Shader &shader);
|
||||
|
||||
enum ShaderArgDirection {
|
||||
SHADER_ARG_INPUT = 1,
|
||||
SHADER_ARG_OUTPUT = 2,
|
||||
SHADER_ARG_INOUT = SHADER_ARG_INPUT | SHADER_ARG_OUTPUT,
|
||||
};
|
||||
|
||||
class RawShaderArg {
|
||||
public:
|
||||
RawShaderArg(enum ShaderArgDirection dir) : dir(dir) { }
|
||||
virtual size_t get_elem_size() const = 0;
|
||||
virtual size_t get_num_elems() const = 0;
|
||||
virtual const void *get_data() const = 0;
|
||||
virtual void *get_data() = 0;
|
||||
enum ShaderArgDirection get_direction() { return dir; }
|
||||
private:
|
||||
enum ShaderArgDirection dir;
|
||||
};
|
||||
|
||||
class NullShaderArg : public RawShaderArg {
|
||||
public:
|
||||
NullShaderArg() : RawShaderArg(SHADER_ARG_INPUT) { }
|
||||
size_t get_elem_size() const override { return 0; }
|
||||
size_t get_num_elems() const override { return 0; }
|
||||
const void *get_data() const override { return NULL; }
|
||||
void *get_data() override { return NULL; }
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
class ShaderArg : public std::vector<T>, public RawShaderArg
|
||||
{
|
||||
public:
|
||||
ShaderArg(const T &v, enum ShaderArgDirection dir = SHADER_ARG_INOUT) :
|
||||
std::vector<T>({ v }), RawShaderArg(dir) { }
|
||||
ShaderArg(const std::vector<T> &v, enum ShaderArgDirection dir = SHADER_ARG_INOUT) :
|
||||
std::vector<T>(v), RawShaderArg(dir) { }
|
||||
ShaderArg(const std::initializer_list<T> v, enum ShaderArgDirection dir = SHADER_ARG_INOUT) :
|
||||
std::vector<T>(v), RawShaderArg(dir) { }
|
||||
|
||||
ShaderArg<T>& operator =(const T &v)
|
||||
{
|
||||
this->clear();
|
||||
this->push_back(v);
|
||||
return *this;
|
||||
}
|
||||
|
||||
operator T&() { return this->at(0); }
|
||||
operator const T&() const { return this->at(0); }
|
||||
|
||||
ShaderArg<T>& operator =(const std::vector<T> &v)
|
||||
{
|
||||
*this = v;
|
||||
return *this;
|
||||
}
|
||||
|
||||
ShaderArg<T>& operator =(std::initializer_list<T> v)
|
||||
{
|
||||
*this = v;
|
||||
return *this;
|
||||
}
|
||||
|
||||
size_t get_elem_size() const override { return sizeof(T); }
|
||||
size_t get_num_elems() const override { return this->size(); }
|
||||
const void *get_data() const override { return this->data(); }
|
||||
void *get_data() override { return this->data(); }
|
||||
};
|
||||
|
||||
struct CompileArgs
|
||||
{
|
||||
unsigned x, y, z;
|
||||
std::vector<const char *> compiler_command_line;
|
||||
clc_work_properties_data work_props;
|
||||
};
|
||||
|
||||
private:
|
||||
void gather_args(std::vector<RawShaderArg *> &args) { }
|
||||
|
||||
template <typename T, typename... Rest>
|
||||
void gather_args(std::vector<RawShaderArg *> &args, T &arg, Rest&... rest)
|
||||
{
|
||||
args.push_back(&arg);
|
||||
gather_args(args, rest...);
|
||||
}
|
||||
|
||||
void run_shader_with_raw_args(Shader shader,
|
||||
const CompileArgs &compile_args,
|
||||
const std::vector<RawShaderArg *> &args);
|
||||
|
||||
protected:
|
||||
template <typename... Args>
|
||||
void run_shader(Shader shader,
|
||||
const CompileArgs &compile_args,
|
||||
Args&... args)
|
||||
{
|
||||
std::vector<RawShaderArg *> raw_args;
|
||||
gather_args(raw_args, args...);
|
||||
run_shader_with_raw_args(shader, compile_args, raw_args);
|
||||
}
|
||||
|
||||
template <typename... Args>
|
||||
void run_shader(const std::vector<const char *> &sources,
|
||||
unsigned x, unsigned y, unsigned z,
|
||||
Args&... args)
|
||||
{
|
||||
std::vector<RawShaderArg *> raw_args;
|
||||
gather_args(raw_args, args...);
|
||||
CompileArgs compile_args = { x, y, z };
|
||||
run_shader_with_raw_args(compile(sources), compile_args, raw_args);
|
||||
}
|
||||
|
||||
template <typename... Args>
|
||||
void run_shader(const std::vector<const char *> &sources,
|
||||
const CompileArgs &compile_args,
|
||||
Args&... args)
|
||||
{
|
||||
std::vector<RawShaderArg *> raw_args;
|
||||
gather_args(raw_args, args...);
|
||||
run_shader_with_raw_args(
|
||||
compile(sources, compile_args.compiler_command_line),
|
||||
compile_args, raw_args);
|
||||
}
|
||||
|
||||
template <typename... Args>
|
||||
void run_shader(const char *source,
|
||||
unsigned x, unsigned y, unsigned z,
|
||||
Args&... args)
|
||||
{
|
||||
std::vector<RawShaderArg *> raw_args;
|
||||
gather_args(raw_args, args...);
|
||||
CompileArgs compile_args = { x, y, z };
|
||||
run_shader_with_raw_args(compile({ source }), compile_args, raw_args);
|
||||
}
|
||||
|
||||
IDXGIFactory4 *factory;
|
||||
IDXGIAdapter1 *adapter;
|
||||
ID3D12Device *dev;
|
||||
ID3D12Fence *cmdqueue_fence;
|
||||
ID3D12CommandQueue *cmdqueue;
|
||||
ID3D12CommandAllocator *cmdalloc;
|
||||
ID3D12GraphicsCommandList *cmdlist;
|
||||
ID3D12DescriptorHeap *uav_heap;
|
||||
|
||||
struct clc_context *compiler_ctx;
|
||||
|
||||
UINT uav_heap_incr;
|
||||
int fence_value;
|
||||
|
||||
HANDLE event;
|
||||
static PFN_D3D12_SERIALIZE_VERSIONED_ROOT_SIGNATURE D3D12SerializeVersionedRootSignature;
|
||||
};
|
59
src/microsoft/clc/meson.build
Normal file
59
src/microsoft/clc/meson.build
Normal file
@ -0,0 +1,59 @@
|
||||
# Copyright © Microsoft Corporation
|
||||
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a
|
||||
# copy of this software and associated documentation files (the "Software"),
|
||||
# to deal in the Software without restriction, including without limitation
|
||||
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
# and/or sell copies of the Software, and to permit persons to whom the
|
||||
# Software is furnished to do so, subject to the following conditions:
|
||||
|
||||
# The above copyright notice and this permission notice (including the next
|
||||
# paragraph) shall be included in all copies or substantial portions of the
|
||||
# Software.
|
||||
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
# IN THE SOFTWARE.
|
||||
|
||||
clang_resource_dir = join_paths(
|
||||
dep_clang.get_variable(cmake: 'CLANG_INCLUDE_DIRS'), '..',
|
||||
'lib', 'clang', dep_clang.version(), 'include'
|
||||
)
|
||||
|
||||
opencl_c_h = custom_target(
|
||||
'opencl-c.h',
|
||||
input : [files_xxd, join_paths(clang_resource_dir, 'opencl-c.h')],
|
||||
output : 'opencl-c.h.h',
|
||||
command : [prog_python, '@INPUT@', '@OUTPUT@', '-n', 'opencl_c_source'],
|
||||
)
|
||||
opencl_c_base_h = custom_target(
|
||||
'opencl-c-base.h',
|
||||
input : [files_xxd, join_paths(clang_resource_dir, 'opencl-c-base.h')],
|
||||
output : 'opencl-c-base.h.h',
|
||||
command : [prog_python, '@INPUT@', '@OUTPUT@', '-n', 'opencl_c_base_source'],
|
||||
)
|
||||
|
||||
libclc_compiler = shared_library(
|
||||
'clglon12compiler',
|
||||
'clc_compiler.c',
|
||||
'clc_nir.c',
|
||||
'clc_helpers.cpp',
|
||||
opencl_c_h,
|
||||
opencl_c_base_h,
|
||||
vs_module_defs : 'clglon12compiler.def',
|
||||
include_directories : [inc_include, inc_src, inc_mapi, inc_mesa, inc_compiler, inc_gallium, inc_spirv],
|
||||
dependencies: [idep_nir_headers, dep_clang, dep_llvm, cc.find_library('version'),
|
||||
dep_llvmspirvlib, idep_mesautil, idep_libdxil_compiler, idep_nir, dep_spirv_tools]
|
||||
)
|
||||
|
||||
clc_compiler_test = executable('clc_compiler_test',
|
||||
['clc_compiler_test.cpp', 'compute_test.cpp'],
|
||||
link_with : [libclc_compiler],
|
||||
dependencies : [idep_gtest, idep_mesautil],
|
||||
include_directories : [inc_include, inc_src])
|
||||
|
||||
test('clc_compiler_test', clc_compiler_test, timeout: 120)
|
676
src/microsoft/compiler/dxcapi.h
Normal file
676
src/microsoft/compiler/dxcapi.h
Normal file
@ -0,0 +1,676 @@
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// //
|
||||
// dxcapi.h //
|
||||
// Copyright (C) Microsoft Corporation. All rights reserved. //
|
||||
// This file is distributed under the University of Illinois Open Source //
|
||||
// License. See LICENSE.TXT for details. //
|
||||
// //
|
||||
// Provides declarations for the DirectX Compiler API entry point. //
|
||||
// //
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef __DXC_API__
|
||||
#define __DXC_API__
|
||||
|
||||
#ifdef _WIN32
|
||||
#ifndef DXC_API_IMPORT
|
||||
#define DXC_API_IMPORT __declspec(dllimport)
|
||||
#endif
|
||||
#else
|
||||
#ifndef DXC_API_IMPORT
|
||||
#define DXC_API_IMPORT __attribute__ ((visibility ("default")))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef _WIN32
|
||||
#define DECLARE_CROSS_PLATFORM_UUIDOF(T)
|
||||
#define DEFINE_CROSS_PLATFORM_UUIDOF(T)
|
||||
#else
|
||||
#include <dlfcn.h>
|
||||
#include "dxc/Support/WinAdapter.h"
|
||||
#endif
|
||||
|
||||
struct IMalloc;
|
||||
|
||||
struct IDxcIncludeHandler;
|
||||
|
||||
typedef HRESULT (__stdcall *DxcCreateInstanceProc)(
|
||||
_In_ REFCLSID rclsid,
|
||||
_In_ REFIID riid,
|
||||
_Out_ LPVOID* ppv
|
||||
);
|
||||
|
||||
typedef HRESULT(__stdcall *DxcCreateInstance2Proc)(
|
||||
_In_ IMalloc *pMalloc,
|
||||
_In_ REFCLSID rclsid,
|
||||
_In_ REFIID riid,
|
||||
_Out_ LPVOID* ppv
|
||||
);
|
||||
|
||||
/// <summary>
|
||||
/// Creates a single uninitialized object of the class associated with a specified CLSID.
|
||||
/// </summary>
|
||||
/// <param name="rclsid">
|
||||
/// The CLSID associated with the data and code that will be used to create the object.
|
||||
/// </param>
|
||||
/// <param name="riid">
|
||||
/// A reference to the identifier of the interface to be used to communicate
|
||||
/// with the object.
|
||||
/// </param>
|
||||
/// <param name="ppv">
|
||||
/// Address of pointer variable that receives the interface pointer requested
|
||||
/// in riid. Upon successful return, *ppv contains the requested interface
|
||||
/// pointer. Upon failure, *ppv contains NULL.</param>
|
||||
/// <remarks>
|
||||
/// While this function is similar to CoCreateInstance, there is no COM involvement.
|
||||
/// </remarks>
|
||||
|
||||
extern "C"
|
||||
DXC_API_IMPORT HRESULT __stdcall DxcCreateInstance(
|
||||
_In_ REFCLSID rclsid,
|
||||
_In_ REFIID riid,
|
||||
_Out_ LPVOID* ppv
|
||||
);
|
||||
|
||||
extern "C"
|
||||
DXC_API_IMPORT HRESULT __stdcall DxcCreateInstance2(
|
||||
_In_ IMalloc *pMalloc,
|
||||
_In_ REFCLSID rclsid,
|
||||
_In_ REFIID riid,
|
||||
_Out_ LPVOID* ppv
|
||||
);
|
||||
|
||||
// For convenience, equivalent definitions to CP_UTF8 and CP_UTF16.
|
||||
#define DXC_CP_UTF8 65001
|
||||
#define DXC_CP_UTF16 1200
|
||||
// Use DXC_CP_ACP for: Binary; ANSI Text; Autodetect UTF with BOM
|
||||
#define DXC_CP_ACP 0
|
||||
|
||||
// This flag indicates that the shader hash was computed taking into account source information (-Zss)
|
||||
#define DXC_HASHFLAG_INCLUDES_SOURCE 1
|
||||
|
||||
// Hash digest type for ShaderHash
|
||||
typedef struct DxcShaderHash {
|
||||
UINT32 Flags; // DXC_HASHFLAG_*
|
||||
BYTE HashDigest[16];
|
||||
} DxcShaderHash;
|
||||
|
||||
#define DXC_FOURCC(ch0, ch1, ch2, ch3) ( \
|
||||
(UINT32)(UINT8)(ch0) | (UINT32)(UINT8)(ch1) << 8 | \
|
||||
(UINT32)(UINT8)(ch2) << 16 | (UINT32)(UINT8)(ch3) << 24 \
|
||||
)
|
||||
#define DXC_PART_PDB DXC_FOURCC('I', 'L', 'D', 'B')
|
||||
#define DXC_PART_PDB_NAME DXC_FOURCC('I', 'L', 'D', 'N')
|
||||
#define DXC_PART_PRIVATE_DATA DXC_FOURCC('P', 'R', 'I', 'V')
|
||||
#define DXC_PART_ROOT_SIGNATURE DXC_FOURCC('R', 'T', 'S', '0')
|
||||
#define DXC_PART_DXIL DXC_FOURCC('D', 'X', 'I', 'L')
|
||||
#define DXC_PART_REFLECTION_DATA DXC_FOURCC('R', 'D', 'A', 'T')
|
||||
#define DXC_PART_SHADER_HASH DXC_FOURCC('H', 'A', 'S', 'H')
|
||||
#define DXC_PART_INPUT_SIGNATURE DXC_FOURCC('I', 'S', 'G', '1')
|
||||
#define DXC_PART_OUTPUT_SIGNATURE DXC_FOURCC('O', 'S', 'G', '1')
|
||||
#define DXC_PART_PATCH_CONSTANT_SIGNATURE DXC_FOURCC('P', 'S', 'G', '1')
|
||||
|
||||
// Some option arguments are defined here for continuity with D3DCompile interface
|
||||
#define DXC_ARG_DEBUG L"-Zi"
|
||||
#define DXC_ARG_SKIP_VALIDATION L"-Vd"
|
||||
#define DXC_ARG_SKIP_OPTIMIZATIONS L"-Od"
|
||||
#define DXC_ARG_PACK_MATRIX_ROW_MAJOR L"-Zpr"
|
||||
#define DXC_ARG_PACK_MATRIX_COLUMN_MAJOR L"-Zpc"
|
||||
#define DXC_ARG_AVOID_FLOW_CONTROL L"-Gfa"
|
||||
#define DXC_ARG_PREFER_FLOW_CONTROL L"-Gfp"
|
||||
#define DXC_ARG_ENABLE_STRICTNESS L"-Ges"
|
||||
#define DXC_ARG_ENABLE_BACKWARDS_COMPATIBILITY L"-Gec"
|
||||
#define DXC_ARG_IEEE_STRICTNESS L"-Gis"
|
||||
#define DXC_ARG_OPTIMIZATION_LEVEL0 L"-O0"
|
||||
#define DXC_ARG_OPTIMIZATION_LEVEL1 L"-O1"
|
||||
#define DXC_ARG_OPTIMIZATION_LEVEL2 L"-O2"
|
||||
#define DXC_ARG_OPTIMIZATION_LEVEL3 L"-O3"
|
||||
#define DXC_ARG_WARNINGS_ARE_ERRORS L"-WX"
|
||||
#define DXC_ARG_RESOURCES_MAY_ALIAS L"-res_may_alias"
|
||||
#define DXC_ARG_ALL_RESOURCES_BOUND L"-all_resources_bound"
|
||||
#define DXC_ARG_DEBUG_NAME_FOR_SOURCE L"-Zss"
|
||||
#define DXC_ARG_DEBUG_NAME_FOR_BINARY L"-Zsb"
|
||||
|
||||
// IDxcBlob is an alias of ID3D10Blob and ID3DBlob
|
||||
struct __declspec(uuid("8BA5FB08-5195-40e2-AC58-0D989C3A0102"))
|
||||
IDxcBlob : public IUnknown {
|
||||
public:
|
||||
virtual LPVOID STDMETHODCALLTYPE GetBufferPointer(void) = 0;
|
||||
virtual SIZE_T STDMETHODCALLTYPE GetBufferSize(void) = 0;
|
||||
|
||||
DECLARE_CROSS_PLATFORM_UUIDOF(IDxcBlob)
|
||||
};
|
||||
|
||||
struct __declspec(uuid("7241d424-2646-4191-97c0-98e96e42fc68"))
|
||||
IDxcBlobEncoding : public IDxcBlob {
|
||||
public:
|
||||
virtual HRESULT STDMETHODCALLTYPE GetEncoding(_Out_ BOOL *pKnown,
|
||||
_Out_ UINT32 *pCodePage) = 0;
|
||||
|
||||
DECLARE_CROSS_PLATFORM_UUIDOF(IDxcBlobEncoding)
|
||||
};
|
||||
|
||||
// Notes on IDxcBlobUtf16 and IDxcBlobUtf8
|
||||
// These guarantee null-terminated text and the stated encoding.
|
||||
// GetBufferSize() will return the size in bytes, including null-terminator
|
||||
// GetStringLength() will return the length in characters, excluding the null-terminator
|
||||
// Name strings will use IDxcBlobUtf16, while other string output blobs,
|
||||
// such as errors/warnings, preprocessed HLSL, or other text will be based
|
||||
// on the -encoding option.
|
||||
|
||||
// The API will use this interface for output name strings
|
||||
struct __declspec(uuid("A3F84EAB-0FAA-497E-A39C-EE6ED60B2D84"))
|
||||
IDxcBlobUtf16 : public IDxcBlobEncoding {
|
||||
public:
|
||||
virtual LPCWSTR STDMETHODCALLTYPE GetStringPointer(void) = 0;
|
||||
virtual SIZE_T STDMETHODCALLTYPE GetStringLength(void) = 0;
|
||||
|
||||
DECLARE_CROSS_PLATFORM_UUIDOF(IDxcBlobUtf16)
|
||||
};
|
||||
struct __declspec(uuid("3DA636C9-BA71-4024-A301-30CBF125305B"))
|
||||
IDxcBlobUtf8 : public IDxcBlobEncoding {
|
||||
public:
|
||||
virtual LPCSTR STDMETHODCALLTYPE GetStringPointer(void) = 0;
|
||||
virtual SIZE_T STDMETHODCALLTYPE GetStringLength(void) = 0;
|
||||
|
||||
DECLARE_CROSS_PLATFORM_UUIDOF(IDxcBlobUtf8)
|
||||
};
|
||||
|
||||
struct __declspec(uuid("7f61fc7d-950d-467f-b3e3-3c02fb49187c"))
|
||||
IDxcIncludeHandler : public IUnknown {
|
||||
virtual HRESULT STDMETHODCALLTYPE LoadSource(
|
||||
_In_z_ LPCWSTR pFilename, // Candidate filename.
|
||||
_COM_Outptr_result_maybenull_ IDxcBlob **ppIncludeSource // Resultant source object for included file, nullptr if not found.
|
||||
) = 0;
|
||||
|
||||
DECLARE_CROSS_PLATFORM_UUIDOF(IDxcIncludeHandler)
|
||||
};
|
||||
|
||||
// Structure for supplying bytes or text input to Dxc APIs.
|
||||
// Use Encoding = 0 for non-text bytes, ANSI text, or unknown with BOM.
|
||||
typedef struct DxcBuffer {
|
||||
LPCVOID Ptr;
|
||||
SIZE_T Size;
|
||||
UINT Encoding;
|
||||
} DxcText;
|
||||
|
||||
struct DxcDefine {
|
||||
LPCWSTR Name;
|
||||
_Maybenull_ LPCWSTR Value;
|
||||
};
|
||||
|
||||
struct __declspec(uuid("73EFFE2A-70DC-45F8-9690-EFF64C02429D"))
|
||||
IDxcCompilerArgs : public IUnknown {
|
||||
// Pass GetArguments() and GetCount() to Compile
|
||||
virtual LPCWSTR* STDMETHODCALLTYPE GetArguments() = 0;
|
||||
virtual UINT32 STDMETHODCALLTYPE GetCount() = 0;
|
||||
|
||||
// Add additional arguments or defines here, if desired.
|
||||
virtual HRESULT STDMETHODCALLTYPE AddArguments(
|
||||
_In_opt_count_(argCount) LPCWSTR *pArguments, // Array of pointers to arguments to add
|
||||
_In_ UINT32 argCount // Number of arguments to add
|
||||
) = 0;
|
||||
virtual HRESULT STDMETHODCALLTYPE AddArgumentsUTF8(
|
||||
_In_opt_count_(argCount)LPCSTR *pArguments, // Array of pointers to UTF-8 arguments to add
|
||||
_In_ UINT32 argCount // Number of arguments to add
|
||||
) = 0;
|
||||
virtual HRESULT STDMETHODCALLTYPE AddDefines(
|
||||
_In_count_(defineCount) const DxcDefine *pDefines, // Array of defines
|
||||
_In_ UINT32 defineCount // Number of defines
|
||||
) = 0;
|
||||
|
||||
DECLARE_CROSS_PLATFORM_UUIDOF(IDxcCompilerArgs)
|
||||
};
|
||||
|
||||
//////////////////////////
|
||||
// Legacy Interfaces
|
||||
/////////////////////////
|
||||
|
||||
// NOTE: IDxcUtils replaces IDxcLibrary
|
||||
struct __declspec(uuid("e5204dc7-d18c-4c3c-bdfb-851673980fe7"))
|
||||
IDxcLibrary : public IUnknown {
|
||||
virtual HRESULT STDMETHODCALLTYPE SetMalloc(_In_opt_ IMalloc *pMalloc) = 0;
|
||||
virtual HRESULT STDMETHODCALLTYPE CreateBlobFromBlob(
|
||||
_In_ IDxcBlob *pBlob, UINT32 offset, UINT32 length, _COM_Outptr_ IDxcBlob **ppResult) = 0;
|
||||
virtual HRESULT STDMETHODCALLTYPE CreateBlobFromFile(
|
||||
_In_z_ LPCWSTR pFileName, _In_opt_ UINT32* codePage,
|
||||
_COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) = 0;
|
||||
virtual HRESULT STDMETHODCALLTYPE CreateBlobWithEncodingFromPinned(
|
||||
_In_bytecount_(size) LPCVOID pText, UINT32 size, UINT32 codePage,
|
||||
_COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) = 0;
|
||||
virtual HRESULT STDMETHODCALLTYPE CreateBlobWithEncodingOnHeapCopy(
|
||||
_In_bytecount_(size) LPCVOID pText, UINT32 size, UINT32 codePage,
|
||||
_COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) = 0;
|
||||
virtual HRESULT STDMETHODCALLTYPE CreateBlobWithEncodingOnMalloc(
|
||||
_In_bytecount_(size) LPCVOID pText, IMalloc *pIMalloc, UINT32 size, UINT32 codePage,
|
||||
_COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) = 0;
|
||||
virtual HRESULT STDMETHODCALLTYPE CreateIncludeHandler(
|
||||
_COM_Outptr_ IDxcIncludeHandler **ppResult) = 0;
|
||||
virtual HRESULT STDMETHODCALLTYPE CreateStreamFromBlobReadOnly(
|
||||
_In_ IDxcBlob *pBlob, _COM_Outptr_ IStream **ppStream) = 0;
|
||||
virtual HRESULT STDMETHODCALLTYPE GetBlobAsUtf8(
|
||||
_In_ IDxcBlob *pBlob, _COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) = 0;
|
||||
virtual HRESULT STDMETHODCALLTYPE GetBlobAsUtf16(
|
||||
_In_ IDxcBlob *pBlob, _COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) = 0;
|
||||
|
||||
DECLARE_CROSS_PLATFORM_UUIDOF(IDxcLibrary)
|
||||
};
|
||||
|
||||
// NOTE: IDxcResult replaces IDxcOperationResult
|
||||
struct __declspec(uuid("CEDB484A-D4E9-445A-B991-CA21CA157DC2"))
|
||||
IDxcOperationResult : public IUnknown {
|
||||
virtual HRESULT STDMETHODCALLTYPE GetStatus(_Out_ HRESULT *pStatus) = 0;
|
||||
|
||||
// GetResult returns the main result of the operation.
|
||||
// This corresponds to:
|
||||
// DXC_OUT_OBJECT - Compile() with shader or library target
|
||||
// DXC_OUT_DISASSEMBLY - Disassemble()
|
||||
// DXC_OUT_HLSL - Compile() with -P
|
||||
// DXC_OUT_ROOT_SIGNATURE - Compile() with rootsig_* target
|
||||
virtual HRESULT STDMETHODCALLTYPE GetResult(_COM_Outptr_result_maybenull_ IDxcBlob **ppResult) = 0;
|
||||
|
||||
// GetErrorBuffer Corresponds to DXC_OUT_ERRORS.
|
||||
virtual HRESULT STDMETHODCALLTYPE GetErrorBuffer(_COM_Outptr_result_maybenull_ IDxcBlobEncoding **ppErrors) = 0;
|
||||
|
||||
DECLARE_CROSS_PLATFORM_UUIDOF(IDxcOperationResult)
|
||||
};
|
||||
|
||||
// NOTE: IDxcCompiler3 replaces IDxcCompiler and IDxcCompiler2
|
||||
struct __declspec(uuid("8c210bf3-011f-4422-8d70-6f9acb8db617"))
|
||||
IDxcCompiler : public IUnknown {
|
||||
// Compile a single entry point to the target shader model
|
||||
virtual HRESULT STDMETHODCALLTYPE Compile(
|
||||
_In_ IDxcBlob *pSource, // Source text to compile
|
||||
_In_opt_z_ LPCWSTR pSourceName, // Optional file name for pSource. Used in errors and include handlers.
|
||||
_In_opt_z_ LPCWSTR pEntryPoint, // entry point name
|
||||
_In_z_ LPCWSTR pTargetProfile, // shader profile to compile
|
||||
_In_opt_count_(argCount) LPCWSTR *pArguments, // Array of pointers to arguments
|
||||
_In_ UINT32 argCount, // Number of arguments
|
||||
_In_count_(defineCount)
|
||||
const DxcDefine *pDefines, // Array of defines
|
||||
_In_ UINT32 defineCount, // Number of defines
|
||||
_In_opt_ IDxcIncludeHandler *pIncludeHandler, // user-provided interface to handle #include directives (optional)
|
||||
_COM_Outptr_ IDxcOperationResult **ppResult // Compiler output status, buffer, and errors
|
||||
) = 0;
|
||||
|
||||
// Preprocess source text
|
||||
virtual HRESULT STDMETHODCALLTYPE Preprocess(
|
||||
_In_ IDxcBlob *pSource, // Source text to preprocess
|
||||
_In_opt_z_ LPCWSTR pSourceName, // Optional file name for pSource. Used in errors and include handlers.
|
||||
_In_opt_count_(argCount) LPCWSTR *pArguments, // Array of pointers to arguments
|
||||
_In_ UINT32 argCount, // Number of arguments
|
||||
_In_count_(defineCount)
|
||||
const DxcDefine *pDefines, // Array of defines
|
||||
_In_ UINT32 defineCount, // Number of defines
|
||||
_In_opt_ IDxcIncludeHandler *pIncludeHandler, // user-provided interface to handle #include directives (optional)
|
||||
_COM_Outptr_ IDxcOperationResult **ppResult // Preprocessor output status, buffer, and errors
|
||||
) = 0;
|
||||
|
||||
// Disassemble a program.
|
||||
virtual HRESULT STDMETHODCALLTYPE Disassemble(
|
||||
_In_ IDxcBlob *pSource, // Program to disassemble.
|
||||
_COM_Outptr_ IDxcBlobEncoding **ppDisassembly // Disassembly text.
|
||||
) = 0;
|
||||
|
||||
DECLARE_CROSS_PLATFORM_UUIDOF(IDxcCompiler)
|
||||
};
|
||||
|
||||
// NOTE: IDxcCompiler3 replaces IDxcCompiler and IDxcCompiler2
|
||||
struct __declspec(uuid("A005A9D9-B8BB-4594-B5C9-0E633BEC4D37"))
|
||||
IDxcCompiler2 : public IDxcCompiler {
|
||||
// Compile a single entry point to the target shader model with debug information.
|
||||
virtual HRESULT STDMETHODCALLTYPE CompileWithDebug(
|
||||
_In_ IDxcBlob *pSource, // Source text to compile
|
||||
_In_opt_z_ LPCWSTR pSourceName, // Optional file name for pSource. Used in errors and include handlers.
|
||||
_In_opt_z_ LPCWSTR pEntryPoint, // Entry point name
|
||||
_In_z_ LPCWSTR pTargetProfile, // Shader profile to compile
|
||||
_In_opt_count_(argCount) LPCWSTR *pArguments, // Array of pointers to arguments
|
||||
_In_ UINT32 argCount, // Number of arguments
|
||||
_In_count_(defineCount)
|
||||
const DxcDefine *pDefines, // Array of defines
|
||||
_In_ UINT32 defineCount, // Number of defines
|
||||
_In_opt_ IDxcIncludeHandler *pIncludeHandler, // user-provided interface to handle #include directives (optional)
|
||||
_COM_Outptr_ IDxcOperationResult **ppResult, // Compiler output status, buffer, and errors
|
||||
_Outptr_opt_result_z_ LPWSTR *ppDebugBlobName,// Suggested file name for debug blob. (Must be HeapFree()'d!)
|
||||
_COM_Outptr_opt_ IDxcBlob **ppDebugBlob // Debug blob
|
||||
) = 0;
|
||||
|
||||
DECLARE_CROSS_PLATFORM_UUIDOF(IDxcCompiler2)
|
||||
};
|
||||
|
||||
struct __declspec(uuid("F1B5BE2A-62DD-4327-A1C2-42AC1E1E78E6"))
|
||||
IDxcLinker : public IUnknown {
|
||||
public:
|
||||
// Register a library with name to ref it later.
|
||||
virtual HRESULT RegisterLibrary(
|
||||
_In_opt_ LPCWSTR pLibName, // Name of the library.
|
||||
_In_ IDxcBlob *pLib // Library blob.
|
||||
) = 0;
|
||||
|
||||
// Links the shader and produces a shader blob that the Direct3D runtime can
|
||||
// use.
|
||||
virtual HRESULT STDMETHODCALLTYPE Link(
|
||||
_In_opt_ LPCWSTR pEntryName, // Entry point name
|
||||
_In_ LPCWSTR pTargetProfile, // shader profile to link
|
||||
_In_count_(libCount)
|
||||
const LPCWSTR *pLibNames, // Array of library names to link
|
||||
_In_ UINT32 libCount, // Number of libraries to link
|
||||
_In_opt_count_(argCount) const LPCWSTR *pArguments, // Array of pointers to arguments
|
||||
_In_ UINT32 argCount, // Number of arguments
|
||||
_COM_Outptr_
|
||||
IDxcOperationResult **ppResult // Linker output status, buffer, and errors
|
||||
) = 0;
|
||||
|
||||
DECLARE_CROSS_PLATFORM_UUIDOF(IDxcLinker)
|
||||
};
|
||||
|
||||
/////////////////////////
|
||||
// Latest interfaces. Please use these
|
||||
////////////////////////
|
||||
|
||||
// NOTE: IDxcUtils replaces IDxcLibrary
|
||||
struct __declspec(uuid("4605C4CB-2019-492A-ADA4-65F20BB7D67F"))
|
||||
IDxcUtils : public IUnknown {
|
||||
// Create a sub-blob that holds a reference to the outer blob and points to its memory.
|
||||
virtual HRESULT STDMETHODCALLTYPE CreateBlobFromBlob(
|
||||
_In_ IDxcBlob *pBlob, UINT32 offset, UINT32 length, _COM_Outptr_ IDxcBlob **ppResult) = 0;
|
||||
|
||||
// For codePage, use 0 (or DXC_CP_ACP) for raw binary or ANSI code page
|
||||
|
||||
// Creates a blob referencing existing memory, with no copy.
|
||||
// User must manage the memory lifetime separately.
|
||||
// (was: CreateBlobWithEncodingFromPinned)
|
||||
virtual HRESULT STDMETHODCALLTYPE CreateBlobFromPinned(
|
||||
_In_bytecount_(size) LPCVOID pData, UINT32 size, UINT32 codePage,
|
||||
_COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) = 0;
|
||||
|
||||
// Create blob, taking ownership of memory allocated with supplied allocator.
|
||||
// (was: CreateBlobWithEncodingOnMalloc)
|
||||
virtual HRESULT STDMETHODCALLTYPE MoveToBlob(
|
||||
_In_bytecount_(size) LPCVOID pData, IMalloc *pIMalloc, UINT32 size, UINT32 codePage,
|
||||
_COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) = 0;
|
||||
|
||||
////
|
||||
// New blobs and copied contents are allocated with the current allocator
|
||||
|
||||
// Copy blob contents to memory owned by the new blob.
|
||||
// (was: CreateBlobWithEncodingOnHeapCopy)
|
||||
virtual HRESULT STDMETHODCALLTYPE CreateBlob(
|
||||
_In_bytecount_(size) LPCVOID pData, UINT32 size, UINT32 codePage,
|
||||
_COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) = 0;
|
||||
|
||||
// (was: CreateBlobFromFile)
|
||||
virtual HRESULT STDMETHODCALLTYPE LoadFile(
|
||||
_In_z_ LPCWSTR pFileName, _In_opt_ UINT32* pCodePage,
|
||||
_COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) = 0;
|
||||
|
||||
virtual HRESULT STDMETHODCALLTYPE CreateReadOnlyStreamFromBlob(
|
||||
_In_ IDxcBlob *pBlob, _COM_Outptr_ IStream **ppStream) = 0;
|
||||
|
||||
// Create default file-based include handler
|
||||
virtual HRESULT STDMETHODCALLTYPE CreateDefaultIncludeHandler(
|
||||
_COM_Outptr_ IDxcIncludeHandler **ppResult) = 0;
|
||||
|
||||
// Convert or return matching encoded text blobs
|
||||
virtual HRESULT STDMETHODCALLTYPE GetBlobAsUtf8(
|
||||
_In_ IDxcBlob *pBlob, _COM_Outptr_ IDxcBlobUtf8 **pBlobEncoding) = 0;
|
||||
virtual HRESULT STDMETHODCALLTYPE GetBlobAsUtf16(
|
||||
_In_ IDxcBlob *pBlob, _COM_Outptr_ IDxcBlobUtf16 **pBlobEncoding) = 0;
|
||||
|
||||
virtual HRESULT STDMETHODCALLTYPE GetDxilContainerPart(
|
||||
_In_ const DxcBuffer *pShader,
|
||||
_In_ UINT32 DxcPart,
|
||||
_Outptr_result_nullonfailure_ void **ppPartData,
|
||||
_Out_ UINT32 *pPartSizeInBytes) = 0;
|
||||
|
||||
// Create reflection interface from serialized Dxil container, or DXC_PART_REFLECTION_DATA.
|
||||
// TBD: Require part header for RDAT? (leaning towards yes)
|
||||
virtual HRESULT STDMETHODCALLTYPE CreateReflection(
|
||||
_In_ const DxcBuffer *pData, REFIID iid, void **ppvReflection) = 0;
|
||||
|
||||
virtual HRESULT STDMETHODCALLTYPE BuildArguments(
|
||||
_In_opt_z_ LPCWSTR pSourceName, // Optional file name for pSource. Used in errors and include handlers.
|
||||
_In_opt_z_ LPCWSTR pEntryPoint, // Entry point name. (-E)
|
||||
_In_z_ LPCWSTR pTargetProfile, // Shader profile to compile. (-T)
|
||||
_In_opt_count_(argCount) LPCWSTR *pArguments, // Array of pointers to arguments
|
||||
_In_ UINT32 argCount, // Number of arguments
|
||||
_In_count_(defineCount)
|
||||
const DxcDefine *pDefines, // Array of defines
|
||||
_In_ UINT32 defineCount, // Number of defines
|
||||
_COM_Outptr_ IDxcCompilerArgs **ppArgs // Arguments you can use with Compile() method
|
||||
) = 0;
|
||||
|
||||
// Takes the shader PDB and returns the hash and the container inside it
|
||||
virtual HRESULT STDMETHODCALLTYPE GetPDBContents(
|
||||
_In_ IDxcBlob *pPDBBlob, _COM_Outptr_ IDxcBlob **ppHash, _COM_Outptr_ IDxcBlob **ppContainer) = 0;
|
||||
|
||||
DECLARE_CROSS_PLATFORM_UUIDOF(IDxcUtils)
|
||||
};
|
||||
|
||||
// For use with IDxcResult::[Has|Get]Output dxcOutKind argument
|
||||
// Note: text outputs returned from version 2 APIs are UTF-8 or UTF-16 based on -encoding option
|
||||
typedef enum DXC_OUT_KIND {
|
||||
DXC_OUT_NONE = 0,
|
||||
DXC_OUT_OBJECT = 1, // IDxcBlob - Shader or library object
|
||||
DXC_OUT_ERRORS = 2, // IDxcBlobUtf8 or IDxcBlobUtf16
|
||||
DXC_OUT_PDB = 3, // IDxcBlob
|
||||
DXC_OUT_SHADER_HASH = 4, // IDxcBlob - DxcShaderHash of shader or shader with source info (-Zsb/-Zss)
|
||||
DXC_OUT_DISASSEMBLY = 5, // IDxcBlobUtf8 or IDxcBlobUtf16 - from Disassemble
|
||||
DXC_OUT_HLSL = 6, // IDxcBlobUtf8 or IDxcBlobUtf16 - from Preprocessor or Rewriter
|
||||
DXC_OUT_TEXT = 7, // IDxcBlobUtf8 or IDxcBlobUtf16 - other text, such as -ast-dump or -Odump
|
||||
DXC_OUT_REFLECTION = 8, // IDxcBlob - RDAT part with reflection data
|
||||
DXC_OUT_ROOT_SIGNATURE = 9, // IDxcBlob - Serialized root signature output
|
||||
|
||||
DXC_OUT_FORCE_DWORD = 0xFFFFFFFF
|
||||
} DXC_OUT_KIND;
|
||||
|
||||
struct __declspec(uuid("58346CDA-DDE7-4497-9461-6F87AF5E0659"))
|
||||
IDxcResult : public IDxcOperationResult {
|
||||
virtual BOOL STDMETHODCALLTYPE HasOutput(_In_ DXC_OUT_KIND dxcOutKind) = 0;
|
||||
virtual HRESULT STDMETHODCALLTYPE GetOutput(_In_ DXC_OUT_KIND dxcOutKind,
|
||||
_In_ REFIID iid, _COM_Outptr_opt_result_maybenull_ void **ppvObject,
|
||||
_COM_Outptr_ IDxcBlobUtf16 **ppOutputName) = 0;
|
||||
|
||||
virtual UINT32 GetNumOutputs() = 0;
|
||||
virtual DXC_OUT_KIND GetOutputByIndex(UINT32 Index) = 0;
|
||||
virtual DXC_OUT_KIND PrimaryOutput() = 0;
|
||||
|
||||
DECLARE_CROSS_PLATFORM_UUIDOF(IDxcResult)
|
||||
};
|
||||
|
||||
struct __declspec(uuid("228B4687-5A6A-4730-900C-9702B2203F54"))
|
||||
IDxcCompiler3 : public IUnknown {
|
||||
// Compile a single entry point to the target shader model,
|
||||
// Compile a library to a library target (-T lib_*),
|
||||
// Compile a root signature (-T rootsig_*), or
|
||||
// Preprocess HLSL source (-P)
|
||||
virtual HRESULT STDMETHODCALLTYPE Compile(
|
||||
_In_ const DxcBuffer *pSource, // Source text to compile
|
||||
_In_opt_count_(argCount) LPCWSTR *pArguments, // Array of pointers to arguments
|
||||
_In_ UINT32 argCount, // Number of arguments
|
||||
_In_opt_ IDxcIncludeHandler *pIncludeHandler, // user-provided interface to handle #include directives (optional)
|
||||
_In_ REFIID riid, _Out_ LPVOID *ppResult // IDxcResult: status, buffer, and errors
|
||||
) = 0;
|
||||
|
||||
// Disassemble a program.
|
||||
virtual HRESULT STDMETHODCALLTYPE Disassemble(
|
||||
_In_ const DxcBuffer *pObject, // Program to disassemble: dxil container or bitcode.
|
||||
_In_ REFIID riid, _Out_ LPVOID *ppResult // IDxcResult: status, disassembly text, and errors
|
||||
) = 0;
|
||||
|
||||
DECLARE_CROSS_PLATFORM_UUIDOF(IDxcCompiler3)
|
||||
};
|
||||
|
||||
static const UINT32 DxcValidatorFlags_Default = 0;
|
||||
static const UINT32 DxcValidatorFlags_InPlaceEdit = 1; // Validator is allowed to update shader blob in-place.
|
||||
static const UINT32 DxcValidatorFlags_RootSignatureOnly = 2;
|
||||
static const UINT32 DxcValidatorFlags_ModuleOnly = 4;
|
||||
static const UINT32 DxcValidatorFlags_ValidMask = 0x7;
|
||||
|
||||
struct __declspec(uuid("A6E82BD2-1FD7-4826-9811-2857E797F49A"))
|
||||
IDxcValidator : public IUnknown {
|
||||
// Validate a shader.
|
||||
virtual HRESULT STDMETHODCALLTYPE Validate(
|
||||
_In_ IDxcBlob *pShader, // Shader to validate.
|
||||
_In_ UINT32 Flags, // Validation flags.
|
||||
_COM_Outptr_ IDxcOperationResult **ppResult // Validation output status, buffer, and errors
|
||||
) = 0;
|
||||
|
||||
DECLARE_CROSS_PLATFORM_UUIDOF(IDxcValidator)
|
||||
};
|
||||
|
||||
struct __declspec(uuid("334b1f50-2292-4b35-99a1-25588d8c17fe"))
|
||||
IDxcContainerBuilder : public IUnknown {
|
||||
virtual HRESULT STDMETHODCALLTYPE Load(_In_ IDxcBlob *pDxilContainerHeader) = 0; // Loads DxilContainer to the builder
|
||||
virtual HRESULT STDMETHODCALLTYPE AddPart(_In_ UINT32 fourCC, _In_ IDxcBlob *pSource) = 0; // Part to add to the container
|
||||
virtual HRESULT STDMETHODCALLTYPE RemovePart(_In_ UINT32 fourCC) = 0; // Remove the part with fourCC
|
||||
virtual HRESULT STDMETHODCALLTYPE SerializeContainer(_Out_ IDxcOperationResult **ppResult) = 0; // Builds a container of the given container builder state
|
||||
|
||||
DECLARE_CROSS_PLATFORM_UUIDOF(IDxcContainerBuilder)
|
||||
};
|
||||
|
||||
struct __declspec(uuid("091f7a26-1c1f-4948-904b-e6e3a8a771d5"))
|
||||
IDxcAssembler : public IUnknown {
|
||||
// Assemble dxil in ll or llvm bitcode to DXIL container.
|
||||
virtual HRESULT STDMETHODCALLTYPE AssembleToContainer(
|
||||
_In_ IDxcBlob *pShader, // Shader to assemble.
|
||||
_COM_Outptr_ IDxcOperationResult **ppResult // Assembly output status, buffer, and errors
|
||||
) = 0;
|
||||
|
||||
DECLARE_CROSS_PLATFORM_UUIDOF(IDxcAssembler)
|
||||
};
|
||||
|
||||
struct __declspec(uuid("d2c21b26-8350-4bdc-976a-331ce6f4c54c"))
|
||||
IDxcContainerReflection : public IUnknown {
|
||||
virtual HRESULT STDMETHODCALLTYPE Load(_In_ IDxcBlob *pContainer) = 0; // Container to load.
|
||||
virtual HRESULT STDMETHODCALLTYPE GetPartCount(_Out_ UINT32 *pResult) = 0;
|
||||
virtual HRESULT STDMETHODCALLTYPE GetPartKind(UINT32 idx, _Out_ UINT32 *pResult) = 0;
|
||||
virtual HRESULT STDMETHODCALLTYPE GetPartContent(UINT32 idx, _COM_Outptr_ IDxcBlob **ppResult) = 0;
|
||||
virtual HRESULT STDMETHODCALLTYPE FindFirstPartKind(UINT32 kind, _Out_ UINT32 *pResult) = 0;
|
||||
virtual HRESULT STDMETHODCALLTYPE GetPartReflection(UINT32 idx, REFIID iid, void **ppvObject) = 0;
|
||||
|
||||
DECLARE_CROSS_PLATFORM_UUIDOF(IDxcContainerReflection)
|
||||
};
|
||||
|
||||
struct __declspec(uuid("AE2CD79F-CC22-453F-9B6B-B124E7A5204C"))
|
||||
IDxcOptimizerPass : public IUnknown {
|
||||
virtual HRESULT STDMETHODCALLTYPE GetOptionName(_COM_Outptr_ LPWSTR *ppResult) = 0;
|
||||
virtual HRESULT STDMETHODCALLTYPE GetDescription(_COM_Outptr_ LPWSTR *ppResult) = 0;
|
||||
virtual HRESULT STDMETHODCALLTYPE GetOptionArgCount(_Out_ UINT32 *pCount) = 0;
|
||||
virtual HRESULT STDMETHODCALLTYPE GetOptionArgName(UINT32 argIndex, _COM_Outptr_ LPWSTR *ppResult) = 0;
|
||||
virtual HRESULT STDMETHODCALLTYPE GetOptionArgDescription(UINT32 argIndex, _COM_Outptr_ LPWSTR *ppResult) = 0;
|
||||
|
||||
DECLARE_CROSS_PLATFORM_UUIDOF(IDxcOptimizerPass)
|
||||
};
|
||||
|
||||
struct __declspec(uuid("25740E2E-9CBA-401B-9119-4FB42F39F270"))
|
||||
IDxcOptimizer : public IUnknown {
|
||||
virtual HRESULT STDMETHODCALLTYPE GetAvailablePassCount(_Out_ UINT32 *pCount) = 0;
|
||||
virtual HRESULT STDMETHODCALLTYPE GetAvailablePass(UINT32 index, _COM_Outptr_ IDxcOptimizerPass** ppResult) = 0;
|
||||
virtual HRESULT STDMETHODCALLTYPE RunOptimizer(IDxcBlob *pBlob,
|
||||
_In_count_(optionCount) LPCWSTR *ppOptions, UINT32 optionCount,
|
||||
_COM_Outptr_ IDxcBlob **pOutputModule,
|
||||
_COM_Outptr_opt_ IDxcBlobEncoding **ppOutputText) = 0;
|
||||
|
||||
DECLARE_CROSS_PLATFORM_UUIDOF(IDxcOptimizer)
|
||||
};
|
||||
|
||||
static const UINT32 DxcVersionInfoFlags_None = 0;
|
||||
static const UINT32 DxcVersionInfoFlags_Debug = 1; // Matches VS_FF_DEBUG
|
||||
static const UINT32 DxcVersionInfoFlags_Internal = 2; // Internal Validator (non-signing)
|
||||
|
||||
struct __declspec(uuid("b04f5b50-2059-4f12-a8ff-a1e0cde1cc7e"))
|
||||
IDxcVersionInfo : public IUnknown {
|
||||
virtual HRESULT STDMETHODCALLTYPE GetVersion(_Out_ UINT32 *pMajor, _Out_ UINT32 *pMinor) = 0;
|
||||
virtual HRESULT STDMETHODCALLTYPE GetFlags(_Out_ UINT32 *pFlags) = 0;
|
||||
|
||||
DECLARE_CROSS_PLATFORM_UUIDOF(IDxcVersionInfo)
|
||||
};
|
||||
|
||||
struct __declspec(uuid("fb6904c4-42f0-4b62-9c46-983af7da7c83"))
|
||||
IDxcVersionInfo2 : public IDxcVersionInfo {
|
||||
virtual HRESULT STDMETHODCALLTYPE GetCommitInfo(_Out_ UINT32 *pCommitCount, _Out_ char **pCommitHash) = 0;
|
||||
|
||||
DECLARE_CROSS_PLATFORM_UUIDOF(IDxcVersionInfo2)
|
||||
};
|
||||
|
||||
// Note: __declspec(selectany) requires 'extern'
|
||||
// On Linux __declspec(selectany) is removed and using 'extern' results in link error.
|
||||
#ifdef _MSC_VER
|
||||
#define CLSID_SCOPE __declspec(selectany) extern
|
||||
#else
|
||||
#define CLSID_SCOPE
|
||||
#endif
|
||||
|
||||
CLSID_SCOPE const CLSID CLSID_DxcCompiler = {
|
||||
0x73e22d93,
|
||||
0xe6ce,
|
||||
0x47f3,
|
||||
{0xb5, 0xbf, 0xf0, 0x66, 0x4f, 0x39, 0xc1, 0xb0}};
|
||||
|
||||
// {EF6A8087-B0EA-4D56-9E45-D07E1A8B7806}
|
||||
CLSID_SCOPE const GUID CLSID_DxcLinker = {
|
||||
0xef6a8087,
|
||||
0xb0ea,
|
||||
0x4d56,
|
||||
{0x9e, 0x45, 0xd0, 0x7e, 0x1a, 0x8b, 0x78, 0x6}};
|
||||
|
||||
// {CD1F6B73-2AB0-484D-8EDC-EBE7A43CA09F}
|
||||
CLSID_SCOPE const CLSID CLSID_DxcDiaDataSource = {
|
||||
0xcd1f6b73,
|
||||
0x2ab0,
|
||||
0x484d,
|
||||
{0x8e, 0xdc, 0xeb, 0xe7, 0xa4, 0x3c, 0xa0, 0x9f}};
|
||||
|
||||
// {3E56AE82-224D-470F-A1A1-FE3016EE9F9D}
|
||||
CLSID_SCOPE const CLSID CLSID_DxcCompilerArgs = {
|
||||
0x3e56ae82,
|
||||
0x224d,
|
||||
0x470f,
|
||||
{0xa1, 0xa1, 0xfe, 0x30, 0x16, 0xee, 0x9f, 0x9d}};
|
||||
|
||||
// {6245D6AF-66E0-48FD-80B4-4D271796748C}
|
||||
CLSID_SCOPE const GUID CLSID_DxcLibrary = {
|
||||
0x6245d6af,
|
||||
0x66e0,
|
||||
0x48fd,
|
||||
{0x80, 0xb4, 0x4d, 0x27, 0x17, 0x96, 0x74, 0x8c}};
|
||||
|
||||
CLSID_SCOPE const GUID CLSID_DxcUtils = CLSID_DxcLibrary;
|
||||
|
||||
// {8CA3E215-F728-4CF3-8CDD-88AF917587A1}
|
||||
CLSID_SCOPE const GUID CLSID_DxcValidator = {
|
||||
0x8ca3e215,
|
||||
0xf728,
|
||||
0x4cf3,
|
||||
{0x8c, 0xdd, 0x88, 0xaf, 0x91, 0x75, 0x87, 0xa1}};
|
||||
|
||||
// {D728DB68-F903-4F80-94CD-DCCF76EC7151}
|
||||
CLSID_SCOPE const GUID CLSID_DxcAssembler = {
|
||||
0xd728db68,
|
||||
0xf903,
|
||||
0x4f80,
|
||||
{0x94, 0xcd, 0xdc, 0xcf, 0x76, 0xec, 0x71, 0x51}};
|
||||
|
||||
// {b9f54489-55b8-400c-ba3a-1675e4728b91}
|
||||
CLSID_SCOPE const GUID CLSID_DxcContainerReflection = {
|
||||
0xb9f54489,
|
||||
0x55b8,
|
||||
0x400c,
|
||||
{0xba, 0x3a, 0x16, 0x75, 0xe4, 0x72, 0x8b, 0x91}};
|
||||
|
||||
// {AE2CD79F-CC22-453F-9B6B-B124E7A5204C}
|
||||
CLSID_SCOPE const GUID CLSID_DxcOptimizer = {
|
||||
0xae2cd79f,
|
||||
0xcc22,
|
||||
0x453f,
|
||||
{0x9b, 0x6b, 0xb1, 0x24, 0xe7, 0xa5, 0x20, 0x4c}};
|
||||
|
||||
// {94134294-411f-4574-b4d0-8741e25240d2}
|
||||
CLSID_SCOPE const GUID CLSID_DxcContainerBuilder = {
|
||||
0x94134294,
|
||||
0x411f,
|
||||
0x4574,
|
||||
{0xb4, 0xd0, 0x87, 0x41, 0xe2, 0x52, 0x40, 0xd2}};
|
||||
#endif
|
File diff suppressed because it is too large
Load Diff
@ -32,6 +32,14 @@ bool dxil_nir_lower_8bit_conv(nir_shader *shader);
|
||||
bool dxil_nir_lower_16bit_conv(nir_shader *shader);
|
||||
bool dxil_nir_lower_x2b(nir_shader *shader);
|
||||
bool dxil_nir_lower_inot(nir_shader *shader);
|
||||
bool dxil_nir_lower_ubo_to_temp(nir_shader *shader);
|
||||
bool dxil_nir_lower_loads_stores_to_dxil(nir_shader *shader);
|
||||
bool dxil_nir_lower_atomics_to_dxil(nir_shader *shader);
|
||||
bool dxil_nir_lower_deref_ssbo(nir_shader *shader);
|
||||
bool dxil_nir_opt_alu_deref_srcs(nir_shader *shader);
|
||||
bool dxil_nir_lower_memcpy_deref(nir_shader *shader);
|
||||
bool dxil_nir_lower_upcast_phis(nir_shader *shader, unsigned min_bit_size);
|
||||
bool dxil_nir_lower_fp16_casts(nir_shader *shader);
|
||||
|
||||
nir_ssa_def *
|
||||
build_load_ubo_dxil(nir_builder *b, nir_ssa_def *buffer,
|
||||
|
@ -97,8 +97,10 @@ nir_options = {
|
||||
.lower_pack_32_2x16_split = true,
|
||||
.lower_unpack_64_2x32_split = true,
|
||||
.lower_unpack_32_2x16_split = true,
|
||||
.use_scoped_barrier = true,
|
||||
.vertex_id_zero_based = true,
|
||||
.lower_base_vertex = true,
|
||||
.has_cs_global_id = true,
|
||||
};
|
||||
|
||||
const nir_shader_compiler_options*
|
||||
@ -808,6 +810,48 @@ emit_srv(struct ntd_context *ctx, nir_variable *var, unsigned binding, unsigned
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
emit_globals(struct ntd_context *ctx, nir_shader *s, unsigned size)
|
||||
{
|
||||
nir_foreach_variable_with_modes(var, s, nir_var_mem_ssbo)
|
||||
size++;
|
||||
|
||||
if (!size)
|
||||
return true;
|
||||
|
||||
const struct dxil_type *type = dxil_module_get_int_type(&ctx->mod, 32);
|
||||
if (!type)
|
||||
return false;
|
||||
|
||||
const struct dxil_type *struct_type =
|
||||
dxil_module_get_struct_type(&ctx->mod, NULL, &type, 1);
|
||||
if (!struct_type)
|
||||
return false;
|
||||
|
||||
const struct dxil_type *array_type =
|
||||
dxil_module_get_array_type(&ctx->mod, struct_type, size);
|
||||
if (!array_type)
|
||||
return false;
|
||||
|
||||
resource_array_layout layout = {0, 0, size};
|
||||
const struct dxil_mdnode *uav_meta =
|
||||
emit_uav_metadata(&ctx->mod, array_type,
|
||||
"globals", &layout,
|
||||
DXIL_COMP_TYPE_INVALID,
|
||||
DXIL_RESOURCE_KIND_RAW_BUFFER);
|
||||
if (!uav_meta)
|
||||
return false;
|
||||
|
||||
ctx->uav_metadata_nodes[ctx->num_uav_arrays++] = uav_meta;
|
||||
if (ctx->num_uav_arrays > 8)
|
||||
ctx->mod.feats.use_64uavs = 1;
|
||||
/* Handles to UAVs used for kernel globals are created on-demand */
|
||||
ctx->num_uavs += size;
|
||||
add_resource(ctx, DXIL_RES_UAV_RAW, &layout);
|
||||
ctx->mod.raw_and_structured_buffers = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
emit_uav(struct ntd_context *ctx, nir_variable *var, unsigned count)
|
||||
{
|
||||
@ -936,6 +980,53 @@ var_fill_const_array(struct ntd_context *ctx, const struct nir_constant *c,
|
||||
unreachable("unknown GLSL type in var_fill_const_array");
|
||||
}
|
||||
|
||||
static bool
|
||||
emit_global_consts(struct ntd_context *ctx, nir_shader *s)
|
||||
{
|
||||
nir_foreach_variable_with_modes(var, s, nir_var_shader_temp) {
|
||||
struct dxil_value *ret;
|
||||
bool err;
|
||||
|
||||
assert(var->constant_initializer);
|
||||
|
||||
unsigned int num_members = DIV_ROUND_UP(glsl_get_cl_size(var->type), 4);
|
||||
uint32_t *const_ints = ralloc_array(ctx->ralloc_ctx, uint32_t, num_members);
|
||||
err = var_fill_const_array(ctx, var->constant_initializer, var->type,
|
||||
const_ints, 0);
|
||||
if (!err)
|
||||
return false;
|
||||
const struct dxil_value **const_vals =
|
||||
ralloc_array(ctx->ralloc_ctx, const struct dxil_value *, num_members);
|
||||
if (!const_vals)
|
||||
return false;
|
||||
for (int i = 0; i < num_members; i++)
|
||||
const_vals[i] = dxil_module_get_int32_const(&ctx->mod, const_ints[i]);
|
||||
|
||||
const struct dxil_type *elt_type = dxil_module_get_int_type(&ctx->mod, 32);
|
||||
if (!elt_type)
|
||||
return false;
|
||||
const struct dxil_type *type =
|
||||
dxil_module_get_array_type(&ctx->mod, elt_type, num_members);
|
||||
if (!type)
|
||||
return false;
|
||||
const struct dxil_value *agg_vals =
|
||||
dxil_module_get_array_const(&ctx->mod, type, const_vals);
|
||||
if (!agg_vals)
|
||||
return false;
|
||||
|
||||
const struct dxil_value *gvar = dxil_add_global_ptr_var(&ctx->mod, var->name, type,
|
||||
DXIL_AS_DEFAULT, 4,
|
||||
agg_vals);
|
||||
if (!gvar)
|
||||
return false;
|
||||
|
||||
if (!_mesa_hash_table_insert(ctx->consts, var, (void *)gvar))
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
emit_cbv(struct ntd_context *ctx, unsigned binding,
|
||||
unsigned size, char *name)
|
||||
@ -1882,6 +1973,8 @@ emit_alu(struct ntd_context *ctx, nir_alu_instr *alu)
|
||||
case nir_op_flog2: return emit_unary_intin(ctx, alu, DXIL_INTR_FLOG2, src[0]);
|
||||
case nir_op_ffloor: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_NI, src[0]);
|
||||
case nir_op_ffract: return emit_unary_intin(ctx, alu, DXIL_INTR_FRC, src[0]);
|
||||
case nir_op_fisnormal: return emit_unary_intin(ctx, alu, DXIL_INTR_ISNORMAL, src[0]);
|
||||
case nir_op_fisfinite: return emit_unary_intin(ctx, alu, DXIL_INTR_ISFINITE, src[0]);
|
||||
|
||||
case nir_op_fddx:
|
||||
case nir_op_fddx_coarse: return emit_unary_intin(ctx, alu, DXIL_INTR_DDX_COARSE, src[0]);
|
||||
@ -1966,6 +2059,120 @@ load_ubo(struct ntd_context *ctx, const struct dxil_value *handle,
|
||||
return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
|
||||
}
|
||||
|
||||
static bool
|
||||
emit_barrier(struct ntd_context *ctx, nir_intrinsic_instr *intr)
|
||||
{
|
||||
const struct dxil_value *opcode, *mode;
|
||||
const struct dxil_func *func;
|
||||
uint32_t flags = 0;
|
||||
|
||||
if (nir_intrinsic_execution_scope(intr) == NIR_SCOPE_WORKGROUP)
|
||||
flags |= DXIL_BARRIER_MODE_SYNC_THREAD_GROUP;
|
||||
|
||||
nir_variable_mode modes = nir_intrinsic_memory_modes(intr);
|
||||
nir_scope mem_scope = nir_intrinsic_memory_scope(intr);
|
||||
|
||||
if (modes & ~(nir_var_mem_ssbo | nir_var_mem_global | nir_var_mem_shared))
|
||||
return false;
|
||||
|
||||
if (mem_scope != NIR_SCOPE_DEVICE && mem_scope != NIR_SCOPE_WORKGROUP)
|
||||
return false;
|
||||
|
||||
if (modes & (nir_var_mem_ssbo | nir_var_mem_global)) {
|
||||
if (mem_scope == NIR_SCOPE_DEVICE)
|
||||
flags |= DXIL_BARRIER_MODE_UAV_FENCE_GLOBAL;
|
||||
else
|
||||
flags |= DXIL_BARRIER_MODE_UAV_FENCE_THREAD_GROUP;
|
||||
}
|
||||
|
||||
if (modes & nir_var_mem_shared)
|
||||
flags |= DXIL_BARRIER_MODE_UAV_FENCE_THREAD_GROUP;
|
||||
|
||||
func = dxil_get_function(&ctx->mod, "dx.op.barrier", DXIL_NONE);
|
||||
if (!func)
|
||||
return false;
|
||||
|
||||
opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_BARRIER);
|
||||
if (!opcode)
|
||||
return false;
|
||||
|
||||
mode = dxil_module_get_int32_const(&ctx->mod, flags);
|
||||
if (!mode)
|
||||
return false;
|
||||
|
||||
const struct dxil_value *args[] = { opcode, mode };
|
||||
|
||||
return dxil_emit_call_void(&ctx->mod, func,
|
||||
args, ARRAY_SIZE(args));
|
||||
}
|
||||
|
||||
static bool
|
||||
emit_load_global_invocation_id(struct ntd_context *ctx,
|
||||
nir_intrinsic_instr *intr)
|
||||
{
|
||||
assert(intr->dest.is_ssa);
|
||||
nir_component_mask_t comps = nir_ssa_def_components_read(&intr->dest.ssa);
|
||||
|
||||
for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) {
|
||||
if (comps & (1 << i)) {
|
||||
const struct dxil_value *idx = dxil_module_get_int32_const(&ctx->mod, i);
|
||||
if (!idx)
|
||||
return false;
|
||||
const struct dxil_value *globalid = emit_threadid_call(ctx, idx);
|
||||
|
||||
if (!globalid)
|
||||
return false;
|
||||
|
||||
store_dest_value(ctx, &intr->dest, i, globalid);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
emit_load_local_invocation_id(struct ntd_context *ctx,
|
||||
nir_intrinsic_instr *intr)
|
||||
{
|
||||
assert(intr->dest.is_ssa);
|
||||
nir_component_mask_t comps = nir_ssa_def_components_read(&intr->dest.ssa);
|
||||
|
||||
for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) {
|
||||
if (comps & (1 << i)) {
|
||||
const struct dxil_value
|
||||
*idx = dxil_module_get_int32_const(&ctx->mod, i);
|
||||
if (!idx)
|
||||
return false;
|
||||
const struct dxil_value
|
||||
*threadidingroup = emit_threadidingroup_call(ctx, idx);
|
||||
if (!threadidingroup)
|
||||
return false;
|
||||
store_dest_value(ctx, &intr->dest, i, threadidingroup);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
emit_load_local_work_group_id(struct ntd_context *ctx,
|
||||
nir_intrinsic_instr *intr)
|
||||
{
|
||||
assert(intr->dest.is_ssa);
|
||||
nir_component_mask_t comps = nir_ssa_def_components_read(&intr->dest.ssa);
|
||||
|
||||
for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) {
|
||||
if (comps & (1 << i)) {
|
||||
const struct dxil_value *idx = dxil_module_get_int32_const(&ctx->mod, i);
|
||||
if (!idx)
|
||||
return false;
|
||||
const struct dxil_value *groupid = emit_groupid_call(ctx, idx);
|
||||
if (!groupid)
|
||||
return false;
|
||||
store_dest_value(ctx, &intr->dest, i, groupid);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
emit_load_primitiveid(struct ntd_context *ctx,
|
||||
nir_intrinsic_instr *intr)
|
||||
@ -2000,6 +2207,249 @@ get_int32_undef(struct dxil_module *m)
|
||||
return dxil_module_get_undef(m, int32_type);
|
||||
}
|
||||
|
||||
static const struct dxil_value *
|
||||
offset_to_index(struct dxil_module *m, const struct dxil_value *offset,
|
||||
unsigned bit_size)
|
||||
{
|
||||
unsigned shift_amt = util_logbase2(bit_size / 8);
|
||||
const struct dxil_value *shift =
|
||||
dxil_module_get_int32_const(m, shift_amt);
|
||||
if (!shift)
|
||||
return NULL;
|
||||
|
||||
return dxil_emit_binop(m, DXIL_BINOP_LSHR, offset, shift, 0);
|
||||
}
|
||||
|
||||
static const struct dxil_value *
|
||||
index_to_offset(struct dxil_module *m, const struct dxil_value *index,
|
||||
unsigned bit_size)
|
||||
{
|
||||
unsigned shift_amt = util_logbase2(bit_size / 8);
|
||||
const struct dxil_value *shift =
|
||||
dxil_module_get_int32_const(m, shift_amt);
|
||||
if (!shift)
|
||||
return NULL;
|
||||
|
||||
return dxil_emit_binop(m, DXIL_BINOP_SHL, index, shift, 0);
|
||||
}
|
||||
|
||||
static const struct dxil_value *
|
||||
emit_gep_for_index(struct ntd_context *ctx, const nir_variable *var,
|
||||
const struct dxil_value *index)
|
||||
{
|
||||
assert(var->data.mode == nir_var_shader_temp);
|
||||
|
||||
struct hash_entry *he = _mesa_hash_table_search(ctx->consts, var);
|
||||
assert(he != NULL);
|
||||
const struct dxil_value *ptr = he->data;
|
||||
|
||||
const struct dxil_value *zero = dxil_module_get_int32_const(&ctx->mod, 0);
|
||||
if (!zero)
|
||||
return NULL;
|
||||
|
||||
const struct dxil_value *ops[] = { ptr, zero, index };
|
||||
return dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops));
|
||||
}
|
||||
|
||||
static bool
|
||||
emit_load_ssbo(struct ntd_context *ctx, nir_intrinsic_instr *intr)
|
||||
{
|
||||
const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
|
||||
const struct dxil_value *buffer =
|
||||
get_src(ctx, &intr->src[0], 0, nir_type_uint);
|
||||
const struct dxil_value *offset =
|
||||
get_src(ctx, &intr->src[1], 0, nir_type_uint);
|
||||
if (!int32_undef || !buffer || !offset)
|
||||
return false;
|
||||
|
||||
assert(nir_src_bit_size(intr->src[0]) == 32);
|
||||
assert(nir_intrinsic_dest_components(intr) <= 4);
|
||||
|
||||
const struct dxil_value *handle =
|
||||
emit_createhandle_call(ctx, DXIL_RESOURCE_CLASS_UAV, 0, buffer,
|
||||
nir_src_is_const(intr->src[0]));
|
||||
if (!handle)
|
||||
return false;
|
||||
|
||||
const struct dxil_value *coord[2] = {
|
||||
offset,
|
||||
int32_undef
|
||||
};
|
||||
|
||||
const struct dxil_value *load = emit_bufferload_call(ctx, handle, coord);
|
||||
if (!load)
|
||||
return false;
|
||||
|
||||
for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) {
|
||||
const struct dxil_value *val =
|
||||
dxil_emit_extractval(&ctx->mod, load, i);
|
||||
if (!val)
|
||||
return false;
|
||||
store_dest_value(ctx, &intr->dest, i, val);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
emit_store_ssbo(struct ntd_context *ctx, nir_intrinsic_instr *intr)
|
||||
{
|
||||
const struct dxil_value *buffer =
|
||||
get_src(ctx, &intr->src[1], 0, nir_type_uint);
|
||||
const struct dxil_value *offset =
|
||||
get_src(ctx, &intr->src[2], 0, nir_type_uint);
|
||||
if (!buffer || !offset)
|
||||
return false;
|
||||
|
||||
const struct dxil_value *handle =
|
||||
emit_createhandle_call(ctx, DXIL_RESOURCE_CLASS_UAV, 0, buffer,
|
||||
nir_src_is_const(intr->src[1]));
|
||||
if (!handle)
|
||||
return false;
|
||||
|
||||
assert(nir_src_bit_size(intr->src[0]) == 32);
|
||||
unsigned num_components = nir_src_num_components(intr->src[0]);
|
||||
assert(num_components <= 4);
|
||||
const struct dxil_value *value[4];
|
||||
for (unsigned i = 0; i < num_components; ++i) {
|
||||
value[i] = get_src(ctx, &intr->src[0], i, nir_type_uint);
|
||||
if (!value[i])
|
||||
return false;
|
||||
}
|
||||
|
||||
const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
|
||||
if (!int32_undef)
|
||||
return false;
|
||||
|
||||
const struct dxil_value *coord[2] = {
|
||||
offset,
|
||||
int32_undef
|
||||
};
|
||||
|
||||
for (int i = num_components; i < 4; ++i)
|
||||
value[i] = int32_undef;
|
||||
|
||||
const struct dxil_value *write_mask =
|
||||
dxil_module_get_int8_const(&ctx->mod, (1u << num_components) - 1);
|
||||
if (!write_mask)
|
||||
return false;
|
||||
|
||||
return emit_bufferstore_call(ctx, handle, coord, value, write_mask, DXIL_I32);
|
||||
}
|
||||
|
||||
static bool
|
||||
emit_store_ssbo_masked(struct ntd_context *ctx, nir_intrinsic_instr *intr)
|
||||
{
|
||||
const struct dxil_value *value =
|
||||
get_src(ctx, &intr->src[0], 0, nir_type_uint);
|
||||
const struct dxil_value *mask =
|
||||
get_src(ctx, &intr->src[1], 0, nir_type_uint);
|
||||
const struct dxil_value *buffer =
|
||||
get_src(ctx, &intr->src[2], 0, nir_type_uint);
|
||||
const struct dxil_value *offset =
|
||||
get_src(ctx, &intr->src[3], 0, nir_type_uint);
|
||||
if (!value || !mask || !buffer || !offset)
|
||||
return false;
|
||||
|
||||
const struct dxil_value *handle =
|
||||
emit_createhandle_call(ctx, DXIL_RESOURCE_CLASS_UAV, 0, buffer,
|
||||
nir_src_is_const(intr->src[2]));
|
||||
if (!handle)
|
||||
return false;
|
||||
|
||||
const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
|
||||
if (!int32_undef)
|
||||
return false;
|
||||
|
||||
const struct dxil_value *coord[3] = {
|
||||
offset, int32_undef, int32_undef
|
||||
};
|
||||
|
||||
return
|
||||
emit_atomic_binop(ctx, handle, DXIL_ATOMIC_AND, coord, mask) != NULL &&
|
||||
emit_atomic_binop(ctx, handle, DXIL_ATOMIC_OR, coord, value) != NULL;
|
||||
}
|
||||
|
||||
static bool
|
||||
emit_store_shared(struct ntd_context *ctx, nir_intrinsic_instr *intr)
|
||||
{
|
||||
const struct dxil_value *zero, *index;
|
||||
unsigned bit_size = nir_src_bit_size(intr->src[0]);
|
||||
|
||||
/* All shared mem accesses should have been lowered to scalar 32bit
|
||||
* accesses.
|
||||
*/
|
||||
assert(bit_size == 32);
|
||||
assert(nir_src_num_components(intr->src[0]) == 1);
|
||||
|
||||
zero = dxil_module_get_int32_const(&ctx->mod, 0);
|
||||
if (!zero)
|
||||
return false;
|
||||
|
||||
if (intr->intrinsic == nir_intrinsic_store_shared_dxil)
|
||||
index = get_src(ctx, &intr->src[1], 0, nir_type_uint);
|
||||
else
|
||||
index = get_src(ctx, &intr->src[2], 0, nir_type_uint);
|
||||
if (!index)
|
||||
return false;
|
||||
|
||||
const struct dxil_value *ops[] = { ctx->sharedvars, zero, index };
|
||||
const struct dxil_value *ptr, *value;
|
||||
|
||||
ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops));
|
||||
if (!ptr)
|
||||
return false;
|
||||
|
||||
value = get_src(ctx, &intr->src[0], 0, nir_type_uint);
|
||||
|
||||
if (intr->intrinsic == nir_intrinsic_store_shared_dxil)
|
||||
return dxil_emit_store(&ctx->mod, value, ptr, 4, false);
|
||||
|
||||
const struct dxil_value *mask = get_src(ctx, &intr->src[1], 0, nir_type_uint);
|
||||
|
||||
if (!dxil_emit_atomicrmw(&ctx->mod, mask, ptr, DXIL_RMWOP_AND, false,
|
||||
DXIL_ATOMIC_ORDERING_ACQREL,
|
||||
DXIL_SYNC_SCOPE_CROSSTHREAD))
|
||||
return false;
|
||||
|
||||
if (!dxil_emit_atomicrmw(&ctx->mod, value, ptr, DXIL_RMWOP_OR, false,
|
||||
DXIL_ATOMIC_ORDERING_ACQREL,
|
||||
DXIL_SYNC_SCOPE_CROSSTHREAD))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
emit_store_scratch(struct ntd_context *ctx, nir_intrinsic_instr *intr)
|
||||
{
|
||||
const struct dxil_value *zero, *index;
|
||||
unsigned bit_size = nir_src_bit_size(intr->src[0]);
|
||||
|
||||
/* All scratch mem accesses should have been lowered to scalar 32bit
|
||||
* accesses.
|
||||
*/
|
||||
assert(bit_size == 32);
|
||||
assert(nir_src_num_components(intr->src[0]) == 1);
|
||||
|
||||
zero = dxil_module_get_int32_const(&ctx->mod, 0);
|
||||
if (!zero)
|
||||
return false;
|
||||
|
||||
index = get_src(ctx, &intr->src[1], 0, nir_type_uint);
|
||||
if (!index)
|
||||
return false;
|
||||
|
||||
const struct dxil_value *ops[] = { ctx->scratchvars, zero, index };
|
||||
const struct dxil_value *ptr, *value;
|
||||
|
||||
ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops));
|
||||
if (!ptr)
|
||||
return false;
|
||||
|
||||
value = get_src(ctx, &intr->src[0], 0, nir_type_uint);
|
||||
return dxil_emit_store(&ctx->mod, value, ptr, 4, false);
|
||||
}
|
||||
|
||||
static bool
|
||||
emit_load_ubo(struct ntd_context *ctx, nir_intrinsic_instr *intr)
|
||||
{
|
||||
@ -2224,6 +2674,97 @@ emit_load_input(struct ntd_context *ctx, nir_intrinsic_instr *intr,
|
||||
return emit_load_input_flat(ctx, intr, input);
|
||||
}
|
||||
|
||||
static bool
|
||||
emit_load_ptr(struct ntd_context *ctx, nir_intrinsic_instr *intr)
|
||||
{
|
||||
struct nir_variable *var =
|
||||
nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0]));
|
||||
const struct dxil_value *index =
|
||||
get_src(ctx, &intr->src[1], 0, nir_type_uint);
|
||||
|
||||
const struct dxil_value *ptr = emit_gep_for_index(ctx, var, index);
|
||||
if (!ptr)
|
||||
return false;
|
||||
|
||||
const struct dxil_value *retval =
|
||||
dxil_emit_load(&ctx->mod, ptr, 4, false);
|
||||
|
||||
store_dest(ctx, &intr->dest, 0, retval, nir_type_uint);
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
emit_load_shared(struct ntd_context *ctx, nir_intrinsic_instr *intr)
|
||||
{
|
||||
const struct dxil_value *zero, *index;
|
||||
unsigned bit_size = nir_dest_bit_size(intr->dest);
|
||||
unsigned align = bit_size / 8;
|
||||
|
||||
/* All shared mem accesses should have been lowered to scalar 32bit
|
||||
* accesses.
|
||||
*/
|
||||
assert(bit_size == 32);
|
||||
assert(nir_dest_num_components(intr->dest) == 1);
|
||||
|
||||
zero = dxil_module_get_int32_const(&ctx->mod, 0);
|
||||
if (!zero)
|
||||
return false;
|
||||
|
||||
index = get_src(ctx, &intr->src[0], 0, nir_type_uint);
|
||||
if (!index)
|
||||
return false;
|
||||
|
||||
const struct dxil_value *ops[] = { ctx->sharedvars, zero, index };
|
||||
const struct dxil_value *ptr, *retval;
|
||||
|
||||
ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops));
|
||||
if (!ptr)
|
||||
return false;
|
||||
|
||||
retval = dxil_emit_load(&ctx->mod, ptr, align, false);
|
||||
if (!retval)
|
||||
return false;
|
||||
|
||||
store_dest(ctx, &intr->dest, 0, retval, nir_type_uint);
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
emit_load_scratch(struct ntd_context *ctx, nir_intrinsic_instr *intr)
|
||||
{
|
||||
const struct dxil_value *zero, *one, *index;
|
||||
unsigned bit_size = nir_dest_bit_size(intr->dest);
|
||||
unsigned align = bit_size / 8;
|
||||
|
||||
/* All scratch mem accesses should have been lowered to scalar 32bit
|
||||
* accesses.
|
||||
*/
|
||||
assert(bit_size == 32);
|
||||
assert(nir_dest_num_components(intr->dest) == 1);
|
||||
|
||||
zero = dxil_module_get_int32_const(&ctx->mod, 0);
|
||||
if (!zero)
|
||||
return false;
|
||||
|
||||
index = get_src(ctx, &intr->src[0], 0, nir_type_uint);
|
||||
if (!index)
|
||||
return false;
|
||||
|
||||
const struct dxil_value *ops[] = { ctx->scratchvars, zero, index };
|
||||
const struct dxil_value *ptr, *retval;
|
||||
|
||||
ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops));
|
||||
if (!ptr)
|
||||
return false;
|
||||
|
||||
retval = dxil_emit_load(&ctx->mod, ptr, align, false);
|
||||
if (!retval)
|
||||
return false;
|
||||
|
||||
store_dest(ctx, &intr->dest, 0, retval, nir_type_uint);
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
emit_load_deref(struct ntd_context *ctx, nir_intrinsic_instr *intr)
|
||||
{
|
||||
@ -2573,10 +3114,31 @@ static bool
|
||||
emit_intrinsic(struct ntd_context *ctx, nir_intrinsic_instr *intr)
|
||||
{
|
||||
switch (intr->intrinsic) {
|
||||
case nir_intrinsic_load_global_invocation_id:
|
||||
case nir_intrinsic_load_global_invocation_id_zero_base:
|
||||
return emit_load_global_invocation_id(ctx, intr);
|
||||
case nir_intrinsic_load_local_invocation_id:
|
||||
return emit_load_local_invocation_id(ctx, intr);
|
||||
case nir_intrinsic_load_work_group_id:
|
||||
case nir_intrinsic_load_work_group_id_zero_base:
|
||||
return emit_load_local_work_group_id(ctx, intr);
|
||||
case nir_intrinsic_load_ssbo:
|
||||
return emit_load_ssbo(ctx, intr);
|
||||
case nir_intrinsic_store_ssbo:
|
||||
return emit_store_ssbo(ctx, intr);
|
||||
case nir_intrinsic_store_ssbo_masked_dxil:
|
||||
return emit_store_ssbo_masked(ctx, intr);
|
||||
case nir_intrinsic_store_deref:
|
||||
return emit_store_deref(ctx, intr);
|
||||
case nir_intrinsic_store_shared_dxil:
|
||||
case nir_intrinsic_store_shared_masked_dxil:
|
||||
return emit_store_shared(ctx, intr);
|
||||
case nir_intrinsic_store_scratch_dxil:
|
||||
return emit_store_scratch(ctx, intr);
|
||||
case nir_intrinsic_load_deref:
|
||||
return emit_load_deref(ctx, intr);
|
||||
case nir_intrinsic_load_ptr_dxil:
|
||||
return emit_load_ptr(ctx, intr);
|
||||
case nir_intrinsic_load_ubo:
|
||||
return emit_load_ubo(ctx, intr);
|
||||
case nir_intrinsic_load_ubo_dxil:
|
||||
@ -2592,6 +3154,10 @@ emit_intrinsic(struct ntd_context *ctx, nir_intrinsic_instr *intr)
|
||||
ctx->system_value[SYSTEM_VALUE_INSTANCE_ID]);
|
||||
case nir_intrinsic_load_primitive_id:
|
||||
return emit_load_primitiveid(ctx, intr);
|
||||
case nir_intrinsic_load_shared_dxil:
|
||||
return emit_load_shared(ctx, intr);
|
||||
case nir_intrinsic_load_scratch_dxil:
|
||||
return emit_load_scratch(ctx, intr);
|
||||
case nir_intrinsic_discard_if:
|
||||
return emit_discard_if(ctx, intr);
|
||||
case nir_intrinsic_discard:
|
||||
@ -2600,7 +3166,55 @@ emit_intrinsic(struct ntd_context *ctx, nir_intrinsic_instr *intr)
|
||||
return emit_emit_vertex(ctx, intr);
|
||||
case nir_intrinsic_end_primitive:
|
||||
return emit_end_primitive(ctx, intr);
|
||||
case nir_intrinsic_scoped_barrier:
|
||||
return emit_barrier(ctx, intr);
|
||||
case nir_intrinsic_ssbo_atomic_add:
|
||||
return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_ADD, nir_type_int);
|
||||
case nir_intrinsic_ssbo_atomic_imin:
|
||||
return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_IMIN, nir_type_int);
|
||||
case nir_intrinsic_ssbo_atomic_umin:
|
||||
return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_UMIN, nir_type_uint);
|
||||
case nir_intrinsic_ssbo_atomic_imax:
|
||||
return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_IMAX, nir_type_int);
|
||||
case nir_intrinsic_ssbo_atomic_umax:
|
||||
return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_UMAX, nir_type_uint);
|
||||
case nir_intrinsic_ssbo_atomic_and:
|
||||
return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_AND, nir_type_uint);
|
||||
case nir_intrinsic_ssbo_atomic_or:
|
||||
return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_OR, nir_type_uint);
|
||||
case nir_intrinsic_ssbo_atomic_xor:
|
||||
return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_XOR, nir_type_uint);
|
||||
case nir_intrinsic_ssbo_atomic_exchange:
|
||||
return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_EXCHANGE, nir_type_int);
|
||||
case nir_intrinsic_ssbo_atomic_comp_swap:
|
||||
return emit_ssbo_atomic_comp_swap(ctx, intr);
|
||||
case nir_intrinsic_shared_atomic_add_dxil:
|
||||
return emit_shared_atomic(ctx, intr, DXIL_RMWOP_ADD, nir_type_int);
|
||||
case nir_intrinsic_shared_atomic_imin_dxil:
|
||||
return emit_shared_atomic(ctx, intr, DXIL_RMWOP_MIN, nir_type_int);
|
||||
case nir_intrinsic_shared_atomic_umin_dxil:
|
||||
return emit_shared_atomic(ctx, intr, DXIL_RMWOP_UMIN, nir_type_uint);
|
||||
case nir_intrinsic_shared_atomic_imax_dxil:
|
||||
return emit_shared_atomic(ctx, intr, DXIL_RMWOP_MAX, nir_type_int);
|
||||
case nir_intrinsic_shared_atomic_umax_dxil:
|
||||
return emit_shared_atomic(ctx, intr, DXIL_RMWOP_UMAX, nir_type_uint);
|
||||
case nir_intrinsic_shared_atomic_and_dxil:
|
||||
return emit_shared_atomic(ctx, intr, DXIL_RMWOP_AND, nir_type_uint);
|
||||
case nir_intrinsic_shared_atomic_or_dxil:
|
||||
return emit_shared_atomic(ctx, intr, DXIL_RMWOP_OR, nir_type_uint);
|
||||
case nir_intrinsic_shared_atomic_xor_dxil:
|
||||
return emit_shared_atomic(ctx, intr, DXIL_RMWOP_XOR, nir_type_uint);
|
||||
case nir_intrinsic_shared_atomic_exchange_dxil:
|
||||
return emit_shared_atomic(ctx, intr, DXIL_RMWOP_XCHG, nir_type_int);
|
||||
case nir_intrinsic_shared_atomic_comp_swap_dxil:
|
||||
return emit_shared_atomic_comp_swap(ctx, intr);
|
||||
case nir_intrinsic_image_store:
|
||||
return emit_image_store(ctx, intr);
|
||||
case nir_intrinsic_image_size:
|
||||
return emit_image_size(ctx, intr);
|
||||
|
||||
case nir_intrinsic_load_num_work_groups:
|
||||
case nir_intrinsic_load_local_group_size:
|
||||
default:
|
||||
NIR_INSTR_UNSUPPORTED(&intr->instr);
|
||||
assert("Unimplemented intrinsic instruction");
|
||||
@ -3266,18 +3880,88 @@ prepare_phi_values(struct ntd_context *ctx, nir_shader *shader)
|
||||
static bool
|
||||
emit_cbvs(struct ntd_context *ctx, nir_shader *s)
|
||||
{
|
||||
for (int i = ctx->opts->ubo_binding_offset; i < s->info.num_ubos; ++i) {
|
||||
char name[64];
|
||||
snprintf(name, sizeof(name), "__ubo%d", i);
|
||||
if (!emit_cbv(ctx, i, 16384 /*4096 vec4's*/, name))
|
||||
return false;
|
||||
if (s->info.stage == MESA_SHADER_KERNEL) {
|
||||
nir_foreach_variable_with_modes(var, s, nir_var_mem_ubo) {
|
||||
if (!emit_ubo_var(ctx, var))
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
for (int i = ctx->opts->ubo_binding_offset; i < s->info.num_ubos; ++i) {
|
||||
char name[64];
|
||||
snprintf(name, sizeof(name), "__ubo%d", i);
|
||||
if (!emit_cbv(ctx, i, 16384 /*4096 vec4's*/, name))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
emit_module(struct ntd_context *ctx, nir_shader *s)
|
||||
emit_scratch(struct ntd_context *ctx, nir_shader *s)
|
||||
{
|
||||
if (s->scratch_size) {
|
||||
/*
|
||||
* We always allocate an u32 array, no matter the actual variable types.
|
||||
* According to the DXIL spec, the minimum load/store granularity is
|
||||
* 32-bit, anything smaller requires using a read-extract/read-write-modify
|
||||
* approach.
|
||||
*/
|
||||
unsigned size = ALIGN_POT(s->scratch_size, sizeof(uint32_t));
|
||||
const struct dxil_type *int32 = dxil_module_get_int_type(&ctx->mod, 32);
|
||||
const struct dxil_value *array_length = dxil_module_get_int32_const(&ctx->mod, size / sizeof(uint32_t));
|
||||
if (!int32 || !array_length)
|
||||
return false;
|
||||
|
||||
const struct dxil_type *type = dxil_module_get_array_type(
|
||||
&ctx->mod, int32, size / sizeof(uint32_t));
|
||||
if (!type)
|
||||
return false;
|
||||
|
||||
ctx->scratchvars = dxil_emit_alloca(&ctx->mod, type, int32, array_length, 4);
|
||||
if (!ctx->scratchvars)
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* The validator complains if we don't have ops that reference a global variable. */
|
||||
static bool
|
||||
shader_has_shared_ops(struct nir_shader *s)
|
||||
{
|
||||
nir_foreach_function(func, s) {
|
||||
if (!func->impl)
|
||||
continue;
|
||||
nir_foreach_block(block, func->impl) {
|
||||
nir_foreach_instr(instr, block) {
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
continue;
|
||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||
switch (intrin->intrinsic) {
|
||||
case nir_intrinsic_load_shared_dxil:
|
||||
case nir_intrinsic_store_shared_dxil:
|
||||
case nir_intrinsic_shared_atomic_add_dxil:
|
||||
case nir_intrinsic_shared_atomic_and_dxil:
|
||||
case nir_intrinsic_shared_atomic_comp_swap_dxil:
|
||||
case nir_intrinsic_shared_atomic_exchange_dxil:
|
||||
case nir_intrinsic_shared_atomic_imax_dxil:
|
||||
case nir_intrinsic_shared_atomic_imin_dxil:
|
||||
case nir_intrinsic_shared_atomic_or_dxil:
|
||||
case nir_intrinsic_shared_atomic_umax_dxil:
|
||||
case nir_intrinsic_shared_atomic_umin_dxil:
|
||||
case nir_intrinsic_shared_atomic_xor_dxil:
|
||||
return true;
|
||||
default: break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool
|
||||
emit_module(struct ntd_context *ctx, nir_shader *s, const struct nir_to_dxil_options *opts)
|
||||
{
|
||||
unsigned binding;
|
||||
|
||||
@ -3314,6 +3998,45 @@ emit_module(struct ntd_context *ctx, nir_shader *s)
|
||||
}
|
||||
}
|
||||
|
||||
if (s->info.cs.shared_size && shader_has_shared_ops(s)) {
|
||||
const struct dxil_type *type;
|
||||
unsigned size;
|
||||
|
||||
/*
|
||||
* We always allocate an u32 array, no matter the actual variable types.
|
||||
* According to the DXIL spec, the minimum load/store granularity is
|
||||
* 32-bit, anything smaller requires using a read-extract/read-write-modify
|
||||
* approach. Non-atomic 64-bit accesses are allowed, but the
|
||||
* GEP(cast(gvar, u64[] *), offset) and cast(GEP(gvar, offset), u64 *))
|
||||
* sequences don't seem to be accepted by the DXIL validator when the
|
||||
* pointer is in the groupshared address space, making the 32-bit -> 64-bit
|
||||
* pointer cast impossible.
|
||||
*/
|
||||
size = ALIGN_POT(s->info.cs.shared_size, sizeof(uint32_t));
|
||||
type = dxil_module_get_array_type(&ctx->mod,
|
||||
dxil_module_get_int_type(&ctx->mod, 32),
|
||||
size / sizeof(uint32_t));
|
||||
ctx->sharedvars = dxil_add_global_ptr_var(&ctx->mod, "shared", type,
|
||||
DXIL_AS_GROUPSHARED,
|
||||
ffs(sizeof(uint64_t)),
|
||||
NULL);
|
||||
}
|
||||
|
||||
if (!emit_scratch(ctx, s))
|
||||
return false;
|
||||
|
||||
/* UAVs */
|
||||
if (s->info.stage == MESA_SHADER_KERNEL) {
|
||||
if (!emit_globals(ctx, s, opts->num_kernel_globals))
|
||||
return false;
|
||||
|
||||
ctx->consts = _mesa_pointer_hash_table_create(ctx->ralloc_ctx);
|
||||
if (!ctx->consts)
|
||||
return false;
|
||||
if (!emit_global_consts(ctx, s))
|
||||
return false;
|
||||
}
|
||||
|
||||
nir_foreach_variable_with_modes(var, s, nir_var_uniform) {
|
||||
unsigned count = glsl_type_get_image_count(var->type);
|
||||
if (var->data.mode == nir_var_uniform && count) {
|
||||
@ -3383,6 +4106,7 @@ get_dxil_shader_kind(struct nir_shader *s)
|
||||
return DXIL_GEOMETRY_SHADER;
|
||||
case MESA_SHADER_FRAGMENT:
|
||||
return DXIL_PIXEL_SHADER;
|
||||
case MESA_SHADER_KERNEL:
|
||||
case MESA_SHADER_COMPUTE:
|
||||
return DXIL_COMPUTE_SHADER;
|
||||
default:
|
||||
@ -3437,11 +4161,16 @@ optimize_nir(struct nir_shader *s, const struct nir_to_dxil_options *opts)
|
||||
NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
|
||||
NIR_PASS(progress, s, nir_opt_algebraic);
|
||||
NIR_PASS(progress, s, dxil_nir_lower_x2b);
|
||||
if (s->options->lower_int64_options)
|
||||
NIR_PASS(progress, s, nir_lower_int64);
|
||||
NIR_PASS(progress, s, nir_lower_alu);
|
||||
NIR_PASS(progress, s, dxil_nir_lower_inot);
|
||||
NIR_PASS(progress, s, nir_opt_constant_folding);
|
||||
NIR_PASS(progress, s, nir_opt_undef);
|
||||
NIR_PASS(progress, s, nir_lower_undef_to_zero);
|
||||
NIR_PASS(progress, s, nir_opt_deref);
|
||||
NIR_PASS(progress, s, dxil_nir_lower_upcast_phis, opts->lower_int16 ? 32 : 16);
|
||||
NIR_PASS(progress, s, nir_lower_64bit_phis);
|
||||
NIR_PASS_V(s, nir_lower_system_values);
|
||||
} while (progress);
|
||||
|
||||
@ -3602,7 +4331,7 @@ nir_to_dxil(struct nir_shader *s, const struct nir_to_dxil_options *opts,
|
||||
if (debug_dxil & DXIL_DEBUG_VERBOSE)
|
||||
nir_print_shader(s, stderr);
|
||||
|
||||
if (!emit_module(ctx, s)) {
|
||||
if (!emit_module(ctx, s, opts)) {
|
||||
debug_printf("D3D12: dxil_container_add_module failed\n");
|
||||
retval = false;
|
||||
goto out;
|
||||
|
@ -52,6 +52,7 @@ struct nir_to_dxil_options {
|
||||
bool disable_math_refactoring;
|
||||
unsigned ubo_binding_offset;
|
||||
unsigned provoking_vertex;
|
||||
unsigned num_kernel_globals;
|
||||
};
|
||||
|
||||
bool
|
||||
|
@ -20,4 +20,7 @@
|
||||
# IN THE SOFTWARE.
|
||||
|
||||
subdir('compiler')
|
||||
if with_microsoft_clc
|
||||
subdir('clc')
|
||||
endif
|
||||
subdir('resource_state_manager')
|
||||
|
Loading…
Reference in New Issue
Block a user