mirror of
https://gitee.com/openharmony/third_party_mesa3d
synced 2024-11-24 16:00:56 +00:00
radeonsi: drop support for LLVM 3.8
LLVM 3.8: - had broken indirect resource indexing - didn't have scratch coalescing - was the last user of problematic v16i8 - only supported OpenGL 4.1 This leaves us with LLVM 3.9 and LLVM 4.0 support for Mesa 17.2. Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
This commit is contained in:
parent
4d32b4ac99
commit
12beef0374
@ -102,8 +102,8 @@ ZLIB_REQUIRED=1.2.8
|
||||
dnl LLVM versions
|
||||
LLVM_REQUIRED_GALLIUM=3.3.0
|
||||
LLVM_REQUIRED_OPENCL=3.6.0
|
||||
LLVM_REQUIRED_R600=3.8.0
|
||||
LLVM_REQUIRED_RADEONSI=3.8.0
|
||||
LLVM_REQUIRED_R600=3.9.0
|
||||
LLVM_REQUIRED_RADEONSI=3.9.0
|
||||
LLVM_REQUIRED_RADV=3.9.0
|
||||
LLVM_REQUIRED_SWR=3.9.0
|
||||
|
||||
|
@ -233,42 +233,16 @@ build_cube_intrinsic(struct ac_llvm_context *ctx,
|
||||
LLVMValueRef in[3],
|
||||
struct cube_selection_coords *out)
|
||||
{
|
||||
LLVMBuilderRef builder = ctx->builder;
|
||||
LLVMTypeRef f32 = ctx->f32;
|
||||
|
||||
if (HAVE_LLVM >= 0x0309) {
|
||||
LLVMTypeRef f32 = ctx->f32;
|
||||
|
||||
out->stc[1] = ac_build_intrinsic(ctx, "llvm.amdgcn.cubetc",
|
||||
f32, in, 3, AC_FUNC_ATTR_READNONE);
|
||||
out->stc[0] = ac_build_intrinsic(ctx, "llvm.amdgcn.cubesc",
|
||||
f32, in, 3, AC_FUNC_ATTR_READNONE);
|
||||
out->ma = ac_build_intrinsic(ctx, "llvm.amdgcn.cubema",
|
||||
f32, in, 3, AC_FUNC_ATTR_READNONE);
|
||||
out->id = ac_build_intrinsic(ctx, "llvm.amdgcn.cubeid",
|
||||
f32, in, 3, AC_FUNC_ATTR_READNONE);
|
||||
} else {
|
||||
LLVMValueRef c[4] = {
|
||||
in[0],
|
||||
in[1],
|
||||
in[2],
|
||||
LLVMGetUndef(LLVMTypeOf(in[0]))
|
||||
};
|
||||
LLVMValueRef vec = ac_build_gather_values(ctx, c, 4);
|
||||
|
||||
LLVMValueRef tmp =
|
||||
ac_build_intrinsic(ctx, "llvm.AMDGPU.cube",
|
||||
LLVMTypeOf(vec), &vec, 1,
|
||||
AC_FUNC_ATTR_READNONE);
|
||||
|
||||
out->stc[1] = LLVMBuildExtractElement(builder, tmp,
|
||||
LLVMConstInt(ctx->i32, 0, 0), "");
|
||||
out->stc[0] = LLVMBuildExtractElement(builder, tmp,
|
||||
LLVMConstInt(ctx->i32, 1, 0), "");
|
||||
out->ma = LLVMBuildExtractElement(builder, tmp,
|
||||
LLVMConstInt(ctx->i32, 2, 0), "");
|
||||
out->id = LLVMBuildExtractElement(builder, tmp,
|
||||
LLVMConstInt(ctx->i32, 3, 0), "");
|
||||
}
|
||||
out->stc[1] = ac_build_intrinsic(ctx, "llvm.amdgcn.cubetc",
|
||||
f32, in, 3, AC_FUNC_ATTR_READNONE);
|
||||
out->stc[0] = ac_build_intrinsic(ctx, "llvm.amdgcn.cubesc",
|
||||
f32, in, 3, AC_FUNC_ATTR_READNONE);
|
||||
out->ma = ac_build_intrinsic(ctx, "llvm.amdgcn.cubema",
|
||||
f32, in, 3, AC_FUNC_ATTR_READNONE);
|
||||
out->id = ac_build_intrinsic(ctx, "llvm.amdgcn.cubeid",
|
||||
f32, in, 3, AC_FUNC_ATTR_READNONE);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -558,7 +532,7 @@ ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
|
||||
bool has_add_tid)
|
||||
{
|
||||
/* TODO: Fix stores with ADD_TID and remove the "has_add_tid" flag. */
|
||||
if (HAVE_LLVM >= 0x0309 && !has_add_tid) {
|
||||
if (!has_add_tid) {
|
||||
/* Split 3 channel stores, becase LLVM doesn't support 3-channel
|
||||
* intrinsics. */
|
||||
if (num_channels == 3) {
|
||||
@ -663,73 +637,39 @@ ac_build_buffer_load(struct ac_llvm_context *ctx,
|
||||
{
|
||||
unsigned func = CLAMP(num_channels, 1, 3) - 1;
|
||||
|
||||
if (HAVE_LLVM >= 0x309) {
|
||||
LLVMValueRef args[] = {
|
||||
LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
|
||||
vindex ? vindex : LLVMConstInt(ctx->i32, 0, 0),
|
||||
LLVMConstInt(ctx->i32, inst_offset, 0),
|
||||
LLVMConstInt(ctx->i1, glc, 0),
|
||||
LLVMConstInt(ctx->i1, slc, 0)
|
||||
};
|
||||
LLVMValueRef args[] = {
|
||||
LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
|
||||
vindex ? vindex : LLVMConstInt(ctx->i32, 0, 0),
|
||||
LLVMConstInt(ctx->i32, inst_offset, 0),
|
||||
LLVMConstInt(ctx->i1, glc, 0),
|
||||
LLVMConstInt(ctx->i1, slc, 0)
|
||||
};
|
||||
|
||||
LLVMTypeRef types[] = {ctx->f32, LLVMVectorType(ctx->f32, 2),
|
||||
ctx->v4f32};
|
||||
const char *type_names[] = {"f32", "v2f32", "v4f32"};
|
||||
char name[256];
|
||||
LLVMTypeRef types[] = {ctx->f32, LLVMVectorType(ctx->f32, 2),
|
||||
ctx->v4f32};
|
||||
const char *type_names[] = {"f32", "v2f32", "v4f32"};
|
||||
char name[256];
|
||||
|
||||
if (voffset) {
|
||||
args[2] = LLVMBuildAdd(ctx->builder, args[2], voffset,
|
||||
"");
|
||||
}
|
||||
|
||||
if (soffset) {
|
||||
args[2] = LLVMBuildAdd(ctx->builder, args[2], soffset,
|
||||
"");
|
||||
}
|
||||
|
||||
snprintf(name, sizeof(name), "llvm.amdgcn.buffer.load.%s",
|
||||
type_names[func]);
|
||||
|
||||
return ac_build_intrinsic(ctx, name, types[func], args,
|
||||
ARRAY_SIZE(args),
|
||||
/* READNONE means writes can't
|
||||
* affect it, while READONLY means
|
||||
* that writes can affect it. */
|
||||
readonly_memory && HAVE_LLVM >= 0x0400 ?
|
||||
AC_FUNC_ATTR_READNONE :
|
||||
AC_FUNC_ATTR_READONLY);
|
||||
} else {
|
||||
LLVMValueRef args[] = {
|
||||
LLVMBuildBitCast(ctx->builder, rsrc, ctx->v16i8, ""),
|
||||
voffset ? voffset : vindex,
|
||||
soffset,
|
||||
LLVMConstInt(ctx->i32, inst_offset, 0),
|
||||
LLVMConstInt(ctx->i32, voffset ? 1 : 0, 0), // offen
|
||||
LLVMConstInt(ctx->i32, vindex ? 1 : 0, 0), //idxen
|
||||
LLVMConstInt(ctx->i32, glc, 0),
|
||||
LLVMConstInt(ctx->i32, slc, 0),
|
||||
LLVMConstInt(ctx->i32, 0, 0), // TFE
|
||||
};
|
||||
|
||||
LLVMTypeRef types[] = {ctx->i32, LLVMVectorType(ctx->i32, 2),
|
||||
ctx->v4i32};
|
||||
const char *type_names[] = {"i32", "v2i32", "v4i32"};
|
||||
const char *arg_type = "i32";
|
||||
char name[256];
|
||||
|
||||
if (voffset && vindex) {
|
||||
LLVMValueRef vaddr[] = {vindex, voffset};
|
||||
|
||||
arg_type = "v2i32";
|
||||
args[1] = ac_build_gather_values(ctx, vaddr, 2);
|
||||
}
|
||||
|
||||
snprintf(name, sizeof(name), "llvm.SI.buffer.load.dword.%s.%s",
|
||||
type_names[func], arg_type);
|
||||
|
||||
return ac_build_intrinsic(ctx, name, types[func], args,
|
||||
ARRAY_SIZE(args), AC_FUNC_ATTR_READONLY);
|
||||
if (voffset) {
|
||||
args[2] = LLVMBuildAdd(ctx->builder, args[2], voffset,
|
||||
"");
|
||||
}
|
||||
|
||||
if (soffset) {
|
||||
args[2] = LLVMBuildAdd(ctx->builder, args[2], soffset,
|
||||
"");
|
||||
}
|
||||
|
||||
snprintf(name, sizeof(name), "llvm.amdgcn.buffer.load.%s",
|
||||
type_names[func]);
|
||||
|
||||
return ac_build_intrinsic(ctx, name, types[func], args,
|
||||
ARRAY_SIZE(args),
|
||||
/* READNONE means writes can't affect it, while
|
||||
* READONLY means that writes can affect it. */
|
||||
readonly_memory && HAVE_LLVM >= 0x0400 ?
|
||||
AC_FUNC_ATTR_READNONE :
|
||||
AC_FUNC_ATTR_READONLY);
|
||||
}
|
||||
|
||||
LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx,
|
||||
@ -738,35 +678,22 @@ LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx,
|
||||
LLVMValueRef voffset,
|
||||
bool readonly_memory)
|
||||
{
|
||||
if (HAVE_LLVM >= 0x0309) {
|
||||
LLVMValueRef args [] = {
|
||||
LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
|
||||
vindex,
|
||||
voffset,
|
||||
LLVMConstInt(ctx->i1, 0, 0), /* glc */
|
||||
LLVMConstInt(ctx->i1, 0, 0), /* slc */
|
||||
};
|
||||
|
||||
return ac_build_intrinsic(ctx,
|
||||
"llvm.amdgcn.buffer.load.format.v4f32",
|
||||
ctx->v4f32, args, ARRAY_SIZE(args),
|
||||
/* READNONE means writes can't
|
||||
* affect it, while READONLY means
|
||||
* that writes can affect it. */
|
||||
readonly_memory && HAVE_LLVM >= 0x0400 ?
|
||||
AC_FUNC_ATTR_READNONE :
|
||||
AC_FUNC_ATTR_READONLY);
|
||||
}
|
||||
|
||||
LLVMValueRef args[] = {
|
||||
LLVMBuildBitCast(ctx->builder, rsrc, ctx->v16i8, ""),
|
||||
voffset,
|
||||
LLVMValueRef args [] = {
|
||||
LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
|
||||
vindex,
|
||||
voffset,
|
||||
LLVMConstInt(ctx->i1, 0, 0), /* glc */
|
||||
LLVMConstInt(ctx->i1, 0, 0), /* slc */
|
||||
};
|
||||
return ac_build_intrinsic(ctx, "llvm.SI.vs.load.input",
|
||||
ctx->v4f32, args, 3,
|
||||
AC_FUNC_ATTR_READNONE |
|
||||
AC_FUNC_ATTR_LEGACY);
|
||||
|
||||
return ac_build_intrinsic(ctx,
|
||||
"llvm.amdgcn.buffer.load.format.v4f32",
|
||||
ctx->v4f32, args, ARRAY_SIZE(args),
|
||||
/* READNONE means writes can't affect it, while
|
||||
* READONLY means that writes can affect it. */
|
||||
readonly_memory && HAVE_LLVM >= 0x0400 ?
|
||||
AC_FUNC_ATTR_READNONE :
|
||||
AC_FUNC_ATTR_READONLY);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -105,17 +105,10 @@ static const char *ac_get_llvm_processor_name(enum radeon_family family)
|
||||
return "fiji";
|
||||
case CHIP_STONEY:
|
||||
return "stoney";
|
||||
#if HAVE_LLVM == 0x0308
|
||||
case CHIP_POLARIS10:
|
||||
return "tonga";
|
||||
case CHIP_POLARIS11:
|
||||
return "tonga";
|
||||
#else
|
||||
case CHIP_POLARIS10:
|
||||
return "polaris10";
|
||||
case CHIP_POLARIS11:
|
||||
return "polaris11";
|
||||
#endif
|
||||
default:
|
||||
return "";
|
||||
}
|
||||
|
@ -1001,10 +1001,10 @@ const char *r600_get_llvm_processor_name(enum radeon_family family)
|
||||
case CHIP_STONEY:
|
||||
return "stoney";
|
||||
case CHIP_POLARIS10:
|
||||
return HAVE_LLVM >= 0x0309 ? "polaris10" : "carrizo";
|
||||
return "polaris10";
|
||||
case CHIP_POLARIS11:
|
||||
case CHIP_POLARIS12: /* same as polaris11 */
|
||||
return HAVE_LLVM >= 0x0309 ? "polaris11" : "carrizo";
|
||||
return "polaris11";
|
||||
case CHIP_VEGA10:
|
||||
return "gfx900";
|
||||
default:
|
||||
@ -1066,7 +1066,7 @@ static int r600_get_compute_param(struct pipe_screen *screen,
|
||||
case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
|
||||
if (ret) {
|
||||
uint64_t *block_size = ret;
|
||||
if (rscreen->chip_class >= SI && HAVE_LLVM >= 0x309 &&
|
||||
if (rscreen->chip_class >= SI &&
|
||||
ir_type == PIPE_SHADER_IR_TGSI) {
|
||||
block_size[0] = 2048;
|
||||
block_size[1] = 2048;
|
||||
@ -1082,7 +1082,7 @@ static int r600_get_compute_param(struct pipe_screen *screen,
|
||||
case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
|
||||
if (ret) {
|
||||
uint64_t *max_threads_per_block = ret;
|
||||
if (rscreen->chip_class >= SI && HAVE_LLVM >= 0x309 &&
|
||||
if (rscreen->chip_class >= SI &&
|
||||
ir_type == PIPE_SHADER_IR_TGSI)
|
||||
*max_threads_per_block = 2048;
|
||||
else
|
||||
@ -1174,7 +1174,7 @@ static int r600_get_compute_param(struct pipe_screen *screen,
|
||||
case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:
|
||||
if (ret) {
|
||||
uint64_t *max_variable_threads_per_block = ret;
|
||||
if (rscreen->chip_class >= SI && HAVE_LLVM >= 0x309 &&
|
||||
if (rscreen->chip_class >= SI &&
|
||||
ir_type == PIPE_SHADER_IR_TGSI)
|
||||
*max_variable_threads_per_block = SI_MAX_VARIABLE_THREADS_PER_BLOCK;
|
||||
else
|
||||
|
@ -327,8 +327,7 @@ static bool si_have_tgsi_compute(struct si_screen *sscreen)
|
||||
{
|
||||
/* Old kernels disallowed some register writes for SI
|
||||
* that are used for indirect dispatches. */
|
||||
return HAVE_LLVM >= 0x309 &&
|
||||
(sscreen->b.chip_class >= CIK ||
|
||||
return (sscreen->b.chip_class >= CIK ||
|
||||
sscreen->b.info.drm_major == 3 ||
|
||||
(sscreen->b.info.drm_major == 2 &&
|
||||
sscreen->b.info.drm_minor >= 45));
|
||||
@ -422,12 +421,10 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
|
||||
case PIPE_CAP_DOUBLES:
|
||||
case PIPE_CAP_TGSI_TEX_TXF_LZ:
|
||||
case PIPE_CAP_TGSI_TES_LAYER_VIEWPORT:
|
||||
return 1;
|
||||
|
||||
case PIPE_CAP_INT64:
|
||||
case PIPE_CAP_INT64_DIVMOD:
|
||||
case PIPE_CAP_TGSI_CLOCK:
|
||||
return HAVE_LLVM >= 0x0309;
|
||||
return 1;
|
||||
|
||||
case PIPE_CAP_TGSI_VOTE:
|
||||
return HAVE_LLVM >= 0x0400;
|
||||
@ -458,15 +455,13 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
|
||||
case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
|
||||
case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
|
||||
case PIPE_CAP_MAX_VERTEX_STREAMS:
|
||||
return 4;
|
||||
|
||||
case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
|
||||
return HAVE_LLVM >= 0x0309 ? 4 : 0;
|
||||
return 4;
|
||||
|
||||
case PIPE_CAP_GLSL_FEATURE_LEVEL:
|
||||
if (si_have_tgsi_compute(sscreen))
|
||||
return 450;
|
||||
return HAVE_LLVM >= 0x0309 ? 420 : 410;
|
||||
return 420;
|
||||
|
||||
case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
|
||||
return MIN2(sscreen->b.info.max_alloc_size, INT_MAX);
|
||||
@ -656,9 +651,9 @@ static int si_get_shader_param(struct pipe_screen* pscreen,
|
||||
case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
|
||||
return SI_NUM_SAMPLERS;
|
||||
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
|
||||
return HAVE_LLVM >= 0x0309 ? SI_NUM_SHADER_BUFFERS : 0;
|
||||
return SI_NUM_SHADER_BUFFERS;
|
||||
case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
|
||||
return HAVE_LLVM >= 0x0309 ? SI_NUM_IMAGES : 0;
|
||||
return SI_NUM_IMAGES;
|
||||
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
|
||||
return 32;
|
||||
case PIPE_SHADER_CAP_PREFERRED_IR:
|
||||
@ -887,9 +882,7 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws)
|
||||
sscreen->b.info.pfp_fw_version >= 121 &&
|
||||
sscreen->b.info.me_fw_version >= 87);
|
||||
|
||||
sscreen->has_ds_bpermute = HAVE_LLVM >= 0x0309 &&
|
||||
sscreen->b.chip_class >= VI;
|
||||
|
||||
sscreen->has_ds_bpermute = sscreen->b.chip_class >= VI;
|
||||
sscreen->has_msaa_sample_loc_bug = (sscreen->b.family >= CHIP_POLARIS10 &&
|
||||
sscreen->b.family <= CHIP_POLARIS12) ||
|
||||
sscreen->b.family == CHIP_VEGA10;
|
||||
|
@ -592,13 +592,6 @@ static LLVMValueRef get_bounded_indirect_index(struct si_shader_context *ctx,
|
||||
{
|
||||
LLVMValueRef result = get_indirect_index(ctx, ind, rel_index);
|
||||
|
||||
/* LLVM 3.8: If indirect resource indexing is used:
|
||||
* - SI & CIK hang
|
||||
* - VI crashes
|
||||
*/
|
||||
if (HAVE_LLVM == 0x0308)
|
||||
return LLVMGetUndef(ctx->i32);
|
||||
|
||||
return si_llvm_bound_index(ctx, result, num);
|
||||
}
|
||||
|
||||
@ -1638,17 +1631,12 @@ static void declare_system_value(struct si_shader_context *ctx,
|
||||
break;
|
||||
|
||||
case TGSI_SEMANTIC_HELPER_INVOCATION:
|
||||
if (HAVE_LLVM >= 0x0309) {
|
||||
value = lp_build_intrinsic(gallivm->builder,
|
||||
"llvm.amdgcn.ps.live",
|
||||
ctx->i1, NULL, 0,
|
||||
LP_FUNC_ATTR_READNONE);
|
||||
value = LLVMBuildNot(gallivm->builder, value, "");
|
||||
value = LLVMBuildSExt(gallivm->builder, value, ctx->i32, "");
|
||||
} else {
|
||||
assert(!"TGSI_SEMANTIC_HELPER_INVOCATION unsupported");
|
||||
return;
|
||||
}
|
||||
value = lp_build_intrinsic(gallivm->builder,
|
||||
"llvm.amdgcn.ps.live",
|
||||
ctx->i1, NULL, 0,
|
||||
LP_FUNC_ATTR_READNONE);
|
||||
value = LLVMBuildNot(gallivm->builder, value, "");
|
||||
value = LLVMBuildSExt(gallivm->builder, value, ctx->i32, "");
|
||||
break;
|
||||
|
||||
case TGSI_SEMANTIC_SUBGROUP_SIZE:
|
||||
@ -4283,12 +4271,10 @@ static void atomic_emit_memory(struct si_shader_context *ctx,
|
||||
|
||||
new_data = LLVMBuildBitCast(builder, new_data, ctx->i32, "");
|
||||
|
||||
#if HAVE_LLVM >= 0x309
|
||||
result = LLVMBuildAtomicCmpXchg(builder, ptr, arg, new_data,
|
||||
LLVMAtomicOrderingSequentiallyConsistent,
|
||||
LLVMAtomicOrderingSequentiallyConsistent,
|
||||
false);
|
||||
#endif
|
||||
|
||||
result = LLVMBuildExtractValue(builder, result, 0, "");
|
||||
} else {
|
||||
@ -5689,16 +5675,14 @@ static void si_llvm_emit_barrier(const struct lp_build_tgsi_action *action,
|
||||
* The real barrier instruction isn’t needed, because an entire patch
|
||||
* always fits into a single wave.
|
||||
*/
|
||||
if (HAVE_LLVM >= 0x0309 &&
|
||||
ctx->screen->b.chip_class == SI &&
|
||||
if (ctx->screen->b.chip_class == SI &&
|
||||
ctx->type == PIPE_SHADER_TESS_CTRL) {
|
||||
emit_waitcnt(ctx, LGKM_CNT & VM_CNT);
|
||||
return;
|
||||
}
|
||||
|
||||
lp_build_intrinsic(gallivm->builder,
|
||||
HAVE_LLVM >= 0x0309 ? "llvm.amdgcn.s.barrier"
|
||||
: "llvm.AMDGPU.barrier.local",
|
||||
"llvm.amdgcn.s.barrier",
|
||||
ctx->voidt, NULL, 0, LP_FUNC_ATTR_CONVERGENT);
|
||||
}
|
||||
|
||||
@ -6519,14 +6503,8 @@ void si_shader_apply_scratch_relocs(struct si_context *sctx,
|
||||
uint32_t scratch_rsrc_dword1 =
|
||||
S_008F04_BASE_ADDRESS_HI(scratch_va >> 32);
|
||||
|
||||
/* Enable scratch coalescing if LLVM sets ELEMENT_SIZE & INDEX_STRIDE
|
||||
* correctly.
|
||||
*/
|
||||
if (HAVE_LLVM >= 0x0309)
|
||||
scratch_rsrc_dword1 |= S_008F04_SWIZZLE_ENABLE(1);
|
||||
else
|
||||
scratch_rsrc_dword1 |=
|
||||
S_008F04_STRIDE(config->scratch_bytes_per_wave / 64);
|
||||
/* Enable scratch coalescing. */
|
||||
scratch_rsrc_dword1 |= S_008F04_SWIZZLE_ENABLE(1);
|
||||
|
||||
for (i = 0 ; i < shader->binary.reloc_count; i++) {
|
||||
const struct ac_shader_reloc *reloc =
|
||||
|
@ -701,8 +701,7 @@ static void emit_fdiv(const struct lp_build_tgsi_action *action,
|
||||
emit_data->args[0], emit_data->args[1], "");
|
||||
|
||||
/* Use v_rcp_f32 instead of precise division. */
|
||||
if (HAVE_LLVM >= 0x0309 &&
|
||||
!LLVMIsConstant(emit_data->output[emit_data->chan]))
|
||||
if (!LLVMIsConstant(emit_data->output[emit_data->chan]))
|
||||
LLVMSetMetadata(emit_data->output[emit_data->chan],
|
||||
ctx->fpmath_md_kind, ctx->fpmath_md_2p5_ulp);
|
||||
}
|
||||
@ -748,8 +747,7 @@ void si_shader_context_init_alu(struct lp_build_tgsi_context *bld_base)
|
||||
bld_base->op_actions[TGSI_OPCODE_DSLT].emit = emit_dcmp;
|
||||
bld_base->op_actions[TGSI_OPCODE_DSNE].emit = emit_dcmp;
|
||||
bld_base->op_actions[TGSI_OPCODE_DRSQ].emit = build_tgsi_intrinsic_nomem;
|
||||
bld_base->op_actions[TGSI_OPCODE_DRSQ].intr_name =
|
||||
HAVE_LLVM >= 0x0309 ? "llvm.amdgcn.rsq.f64" : "llvm.AMDGPU.rsq.f64";
|
||||
bld_base->op_actions[TGSI_OPCODE_DRSQ].intr_name = "llvm.amdgcn.rsq.f64";
|
||||
bld_base->op_actions[TGSI_OPCODE_DSQRT].emit = build_tgsi_intrinsic_nomem;
|
||||
bld_base->op_actions[TGSI_OPCODE_DSQRT].intr_name = "llvm.sqrt.f64";
|
||||
bld_base->op_actions[TGSI_OPCODE_EX2].emit = build_tgsi_intrinsic_nomem;
|
||||
|
@ -50,20 +50,6 @@ struct si_llvm_flow {
|
||||
LLVMBasicBlockRef loop_entry_block;
|
||||
};
|
||||
|
||||
#define CPU_STRING_LEN 30
|
||||
#define FS_STRING_LEN 30
|
||||
#define TRIPLE_STRING_LEN 7
|
||||
|
||||
/**
|
||||
* Shader types for the LLVM backend.
|
||||
*/
|
||||
enum si_llvm_shader_type {
|
||||
RADEON_LLVM_SHADER_PS = 0,
|
||||
RADEON_LLVM_SHADER_VS = 1,
|
||||
RADEON_LLVM_SHADER_GS = 2,
|
||||
RADEON_LLVM_SHADER_CS = 3,
|
||||
};
|
||||
|
||||
enum si_llvm_calling_convention {
|
||||
RADEON_LLVM_AMDGPU_VS = 87,
|
||||
RADEON_LLVM_AMDGPU_GS = 88,
|
||||
@ -86,36 +72,28 @@ void si_llvm_add_attribute(LLVMValueRef F, const char *name, int value)
|
||||
*/
|
||||
void si_llvm_shader_type(LLVMValueRef F, unsigned type)
|
||||
{
|
||||
enum si_llvm_shader_type llvm_type;
|
||||
enum si_llvm_calling_convention calling_conv;
|
||||
|
||||
switch (type) {
|
||||
case PIPE_SHADER_VERTEX:
|
||||
case PIPE_SHADER_TESS_CTRL:
|
||||
case PIPE_SHADER_TESS_EVAL:
|
||||
llvm_type = RADEON_LLVM_SHADER_VS;
|
||||
calling_conv = RADEON_LLVM_AMDGPU_VS;
|
||||
break;
|
||||
case PIPE_SHADER_GEOMETRY:
|
||||
llvm_type = RADEON_LLVM_SHADER_GS;
|
||||
calling_conv = RADEON_LLVM_AMDGPU_GS;
|
||||
break;
|
||||
case PIPE_SHADER_FRAGMENT:
|
||||
llvm_type = RADEON_LLVM_SHADER_PS;
|
||||
calling_conv = RADEON_LLVM_AMDGPU_PS;
|
||||
break;
|
||||
case PIPE_SHADER_COMPUTE:
|
||||
llvm_type = RADEON_LLVM_SHADER_CS;
|
||||
calling_conv = RADEON_LLVM_AMDGPU_CS;
|
||||
break;
|
||||
default:
|
||||
unreachable("Unhandle shader type");
|
||||
}
|
||||
|
||||
if (HAVE_LLVM >= 0x309)
|
||||
LLVMSetFunctionCallConv(F, calling_conv);
|
||||
else
|
||||
si_llvm_add_attribute(F, "ShaderType", llvm_type);
|
||||
LLVMSetFunctionCallConv(F, calling_conv);
|
||||
}
|
||||
|
||||
static void init_amdgpu_target()
|
||||
@ -848,10 +826,8 @@ static void emit_declaration(struct lp_build_tgsi_context *bld_base,
|
||||
* FIXME: We shouldn't need to have the non-alloca
|
||||
* code path for arrays. LLVM should be smart enough to
|
||||
* promote allocas into registers when profitable.
|
||||
*
|
||||
* LLVM 3.8 crashes with this.
|
||||
*/
|
||||
if ((HAVE_LLVM >= 0x0309 && array_size > 16) ||
|
||||
if (array_size > 16 ||
|
||||
/* TODO: VGPR indexing is buggy on GFX9. */
|
||||
ctx->screen->b.chip_class == GFX9) {
|
||||
array_alloca = LLVMBuildAlloca(builder,
|
||||
@ -1274,13 +1250,11 @@ void si_llvm_context_init(struct si_shader_context *ctx,
|
||||
ctx->gallivm.context);
|
||||
LLVMSetTarget(ctx->gallivm.module, "amdgcn--");
|
||||
|
||||
#if HAVE_LLVM >= 0x0309
|
||||
LLVMTargetDataRef data_layout = LLVMCreateTargetDataLayout(tm);
|
||||
char *data_layout_str = LLVMCopyStringRepOfTargetData(data_layout);
|
||||
LLVMSetDataLayout(ctx->gallivm.module, data_layout_str);
|
||||
LLVMDisposeTargetData(data_layout);
|
||||
LLVMDisposeMessage(data_layout_str);
|
||||
#endif
|
||||
|
||||
bool unsafe_fpmath = (sscreen->b.debug_flags & DBG_UNSAFE_MATH) != 0;
|
||||
enum lp_float_mode float_mode =
|
||||
|
Loading…
Reference in New Issue
Block a user