mirror of
https://gitee.com/openharmony/third_party_mesa3d
synced 2024-11-23 23:41:13 +00:00
intel/fs: Implement nir_intrinsic_global_atomic_*
eviewed-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
parent
a91f392073
commit
e644ed468f
@ -439,10 +439,13 @@ static const char *const dp_dc1_msg_type_hsw[32] = {
|
||||
[HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE] = "DC typed surface write",
|
||||
[GEN9_DATAPORT_DC_PORT1_A64_SCATTERED_READ] = "DC A64 scattered read",
|
||||
[GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_READ] = "DC A64 untyped surface read",
|
||||
[GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP] = "DC A64 untyped atomic op",
|
||||
[GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_WRITE] = "DC A64 untyped surface write",
|
||||
[GEN8_DATAPORT_DC_PORT1_A64_SCATTERED_WRITE] = "DC A64 scattered write",
|
||||
[GEN9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP] =
|
||||
"DC untyped atomic float op",
|
||||
[GEN9_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP] =
|
||||
"DC A64 untyped atomic float op",
|
||||
};
|
||||
|
||||
static const char *const aop[16] = {
|
||||
@ -1940,6 +1943,7 @@ brw_disassemble_inst(FILE *file, const struct gen_device_info *devinfo,
|
||||
case HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2:
|
||||
case HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2:
|
||||
case HSW_DATAPORT_DC_PORT1_ATOMIC_COUNTER_OP_SIMD4X2:
|
||||
case GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP:
|
||||
control(file, "atomic op", aop, msg_ctrl & 0xf, &space);
|
||||
break;
|
||||
case HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ:
|
||||
@ -1954,6 +1958,7 @@ brw_disassemble_inst(FILE *file, const struct gen_device_info *devinfo,
|
||||
break;
|
||||
}
|
||||
case GEN9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP:
|
||||
case GEN9_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP:
|
||||
format(file, "SIMD%d,", (msg_ctrl & (1 << 4)) ? 8 : 16);
|
||||
control(file, "atomic float op", aop_float, msg_ctrl & 0xf,
|
||||
&space);
|
||||
|
@ -749,6 +749,46 @@ brw_dp_a64_byte_scattered_rw_desc(const struct gen_device_info *devinfo,
|
||||
return brw_dp_desc(devinfo, BRW_BTI_STATELESS, msg_type, msg_control);
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
brw_dp_a64_untyped_atomic_desc(const struct gen_device_info *devinfo,
|
||||
unsigned exec_size, /**< 0 for SIMD4x2 */
|
||||
unsigned bit_size,
|
||||
unsigned atomic_op,
|
||||
bool response_expected)
|
||||
{
|
||||
assert(exec_size == 8);
|
||||
assert(devinfo->gen >= 8);
|
||||
assert(bit_size == 32 || bit_size == 64);
|
||||
|
||||
const unsigned msg_type = GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP;
|
||||
|
||||
const unsigned msg_control =
|
||||
SET_BITS(atomic_op, 3, 0) |
|
||||
SET_BITS(bit_size == 64, 4, 4) |
|
||||
SET_BITS(response_expected, 5, 5);
|
||||
|
||||
return brw_dp_desc(devinfo, BRW_BTI_STATELESS, msg_type, msg_control);
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
brw_dp_a64_untyped_atomic_float_desc(const struct gen_device_info *devinfo,
|
||||
unsigned exec_size,
|
||||
unsigned atomic_op,
|
||||
bool response_expected)
|
||||
{
|
||||
assert(exec_size == 8);
|
||||
assert(devinfo->gen >= 9);
|
||||
|
||||
assert(exec_size > 0);
|
||||
const unsigned msg_type = GEN9_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP;
|
||||
|
||||
const unsigned msg_control =
|
||||
SET_BITS(atomic_op, 1, 0) |
|
||||
SET_BITS(response_expected, 5, 5);
|
||||
|
||||
return brw_dp_desc(devinfo, BRW_BTI_STATELESS, msg_type, msg_control);
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
brw_dp_typed_atomic_desc(const struct gen_device_info *devinfo,
|
||||
unsigned exec_size,
|
||||
|
@ -424,6 +424,8 @@ enum opcode {
|
||||
SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL,
|
||||
SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL,
|
||||
SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL,
|
||||
SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL,
|
||||
SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL,
|
||||
|
||||
SHADER_OPCODE_TYPED_ATOMIC,
|
||||
SHADER_OPCODE_TYPED_ATOMIC_LOGICAL,
|
||||
@ -1185,9 +1187,11 @@ enum brw_message_target {
|
||||
#define HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE 13
|
||||
#define GEN9_DATAPORT_DC_PORT1_A64_SCATTERED_READ 0x10
|
||||
#define GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_READ 0x11
|
||||
#define GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP 0x12
|
||||
#define GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_WRITE 0x19
|
||||
#define GEN8_DATAPORT_DC_PORT1_A64_SCATTERED_WRITE 0x1a
|
||||
#define GEN9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP 0x1b
|
||||
#define GEN9_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP 0x1d
|
||||
|
||||
/* GEN9 */
|
||||
#define GEN9_DATAPORT_RC_RENDER_TARGET_WRITE 12
|
||||
|
@ -797,6 +797,35 @@ fs_inst::components_read(unsigned i) const
|
||||
assert(src[2].file == IMM);
|
||||
return i == 1 ? src[2].ud : 1;
|
||||
|
||||
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL:
|
||||
assert(src[2].file == IMM);
|
||||
if (i == 1) {
|
||||
/* Data source */
|
||||
const unsigned op = src[2].ud;
|
||||
switch (op) {
|
||||
case BRW_AOP_INC:
|
||||
case BRW_AOP_DEC:
|
||||
case BRW_AOP_PREDEC:
|
||||
return 0;
|
||||
case BRW_AOP_CMPWR:
|
||||
return 2;
|
||||
default:
|
||||
return 1;
|
||||
}
|
||||
} else {
|
||||
return 1;
|
||||
}
|
||||
|
||||
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL:
|
||||
assert(src[2].file == IMM);
|
||||
if (i == 1) {
|
||||
/* Data source */
|
||||
const unsigned op = src[2].ud;
|
||||
return op == BRW_AOP_FCMPWR ? 2 : 1;
|
||||
} else {
|
||||
return 1;
|
||||
}
|
||||
|
||||
case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL:
|
||||
/* Scattered logical opcodes use the following params:
|
||||
* src[0] Surface coordinates
|
||||
@ -5292,6 +5321,18 @@ lower_a64_logical_send(const fs_builder &bld, fs_inst *inst)
|
||||
true /* write */);
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL:
|
||||
desc = brw_dp_a64_untyped_atomic_desc(devinfo, inst->exec_size, 32,
|
||||
arg, /* atomic_op */
|
||||
!inst->dst.is_null());
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL:
|
||||
desc = brw_dp_a64_untyped_atomic_float_desc(devinfo, inst->exec_size,
|
||||
arg, /* atomic_op */
|
||||
!inst->dst.is_null());
|
||||
break;
|
||||
|
||||
default:
|
||||
unreachable("Unknown A64 logical instruction");
|
||||
}
|
||||
@ -5492,6 +5533,8 @@ fs_visitor::lower_logical_sends()
|
||||
case SHADER_OPCODE_A64_UNTYPED_READ_LOGICAL:
|
||||
case SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL:
|
||||
case SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL:
|
||||
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL:
|
||||
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL:
|
||||
lower_a64_logical_send(ibld, inst);
|
||||
break;
|
||||
|
||||
@ -5998,6 +6041,10 @@ get_lowered_simd_width(const struct gen_device_info *devinfo,
|
||||
case SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL:
|
||||
return devinfo->gen <= 8 ? 8 : MIN2(16, inst->exec_size);
|
||||
|
||||
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL:
|
||||
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL:
|
||||
return 8;
|
||||
|
||||
case SHADER_OPCODE_URB_READ_SIMD8:
|
||||
case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT:
|
||||
case SHADER_OPCODE_URB_WRITE_SIMD8:
|
||||
|
@ -233,6 +233,10 @@ public:
|
||||
int op, nir_intrinsic_instr *instr);
|
||||
void nir_emit_shared_atomic_float(const brw::fs_builder &bld,
|
||||
int op, nir_intrinsic_instr *instr);
|
||||
void nir_emit_global_atomic(const brw::fs_builder &bld,
|
||||
int op, nir_intrinsic_instr *instr);
|
||||
void nir_emit_global_atomic_float(const brw::fs_builder &bld,
|
||||
int op, nir_intrinsic_instr *instr);
|
||||
void nir_emit_texture(const brw::fs_builder &bld,
|
||||
nir_tex_instr *instr);
|
||||
void nir_emit_jump(const brw::fs_builder &bld,
|
||||
|
@ -4029,6 +4029,46 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
|
||||
}
|
||||
break;
|
||||
|
||||
case nir_intrinsic_global_atomic_add:
|
||||
nir_emit_global_atomic(bld, get_op_for_atomic_add(instr, 1), instr);
|
||||
break;
|
||||
case nir_intrinsic_global_atomic_imin:
|
||||
nir_emit_global_atomic(bld, BRW_AOP_IMIN, instr);
|
||||
break;
|
||||
case nir_intrinsic_global_atomic_umin:
|
||||
nir_emit_global_atomic(bld, BRW_AOP_UMIN, instr);
|
||||
break;
|
||||
case nir_intrinsic_global_atomic_imax:
|
||||
nir_emit_global_atomic(bld, BRW_AOP_IMAX, instr);
|
||||
break;
|
||||
case nir_intrinsic_global_atomic_umax:
|
||||
nir_emit_global_atomic(bld, BRW_AOP_UMAX, instr);
|
||||
break;
|
||||
case nir_intrinsic_global_atomic_and:
|
||||
nir_emit_global_atomic(bld, BRW_AOP_AND, instr);
|
||||
break;
|
||||
case nir_intrinsic_global_atomic_or:
|
||||
nir_emit_global_atomic(bld, BRW_AOP_OR, instr);
|
||||
break;
|
||||
case nir_intrinsic_global_atomic_xor:
|
||||
nir_emit_global_atomic(bld, BRW_AOP_XOR, instr);
|
||||
break;
|
||||
case nir_intrinsic_global_atomic_exchange:
|
||||
nir_emit_global_atomic(bld, BRW_AOP_MOV, instr);
|
||||
break;
|
||||
case nir_intrinsic_global_atomic_comp_swap:
|
||||
nir_emit_global_atomic(bld, BRW_AOP_CMPWR, instr);
|
||||
break;
|
||||
case nir_intrinsic_global_atomic_fmin:
|
||||
nir_emit_global_atomic_float(bld, BRW_AOP_FMIN, instr);
|
||||
break;
|
||||
case nir_intrinsic_global_atomic_fmax:
|
||||
nir_emit_global_atomic_float(bld, BRW_AOP_FMAX, instr);
|
||||
break;
|
||||
case nir_intrinsic_global_atomic_fcomp_swap:
|
||||
nir_emit_global_atomic_float(bld, BRW_AOP_FCMPWR, instr);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_ssbo: {
|
||||
assert(devinfo->gen >= 7);
|
||||
|
||||
@ -4702,6 +4742,60 @@ fs_visitor::nir_emit_shared_atomic_float(const fs_builder &bld,
|
||||
bld.MOV(dest, atomic_result);
|
||||
}
|
||||
|
||||
void
|
||||
fs_visitor::nir_emit_global_atomic(const fs_builder &bld,
|
||||
int op, nir_intrinsic_instr *instr)
|
||||
{
|
||||
if (stage == MESA_SHADER_FRAGMENT)
|
||||
brw_wm_prog_data(prog_data)->has_side_effects = true;
|
||||
|
||||
fs_reg dest;
|
||||
if (nir_intrinsic_infos[instr->intrinsic].has_dest)
|
||||
dest = get_nir_dest(instr->dest);
|
||||
|
||||
fs_reg addr = get_nir_src(instr->src[0]);
|
||||
|
||||
fs_reg data;
|
||||
if (op != BRW_AOP_INC && op != BRW_AOP_DEC && op != BRW_AOP_PREDEC)
|
||||
data = get_nir_src(instr->src[1]);
|
||||
|
||||
if (op == BRW_AOP_CMPWR) {
|
||||
fs_reg tmp = bld.vgrf(data.type, 2);
|
||||
fs_reg sources[2] = { data, get_nir_src(instr->src[2]) };
|
||||
bld.LOAD_PAYLOAD(tmp, sources, 2, 0);
|
||||
data = tmp;
|
||||
}
|
||||
|
||||
bld.emit(SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL,
|
||||
dest, addr, data, brw_imm_ud(op));
|
||||
}
|
||||
|
||||
void
|
||||
fs_visitor::nir_emit_global_atomic_float(const fs_builder &bld,
|
||||
int op, nir_intrinsic_instr *instr)
|
||||
{
|
||||
if (stage == MESA_SHADER_FRAGMENT)
|
||||
brw_wm_prog_data(prog_data)->has_side_effects = true;
|
||||
|
||||
assert(nir_intrinsic_infos[instr->intrinsic].has_dest);
|
||||
fs_reg dest = get_nir_dest(instr->dest);
|
||||
|
||||
fs_reg addr = get_nir_src(instr->src[0]);
|
||||
|
||||
assert(op != BRW_AOP_INC && op != BRW_AOP_DEC && op != BRW_AOP_PREDEC);
|
||||
fs_reg data = get_nir_src(instr->src[1]);
|
||||
|
||||
if (op == BRW_AOP_FCMPWR) {
|
||||
fs_reg tmp = bld.vgrf(data.type, 2);
|
||||
fs_reg sources[2] = { data, get_nir_src(instr->src[2]) };
|
||||
bld.LOAD_PAYLOAD(tmp, sources, 2, 0);
|
||||
data = tmp;
|
||||
}
|
||||
|
||||
bld.emit(SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL,
|
||||
dest, addr, data, brw_imm_ud(op));
|
||||
}
|
||||
|
||||
void
|
||||
fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
|
||||
{
|
||||
|
@ -499,6 +499,8 @@ schedule_node::set_latency_gen7(bool is_haswell)
|
||||
case HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2:
|
||||
case HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP:
|
||||
case GEN9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP:
|
||||
case GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP:
|
||||
case GEN9_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP:
|
||||
/* See also SHADER_OPCODE_UNTYPED_ATOMIC */
|
||||
latency = 14000;
|
||||
break;
|
||||
|
@ -302,6 +302,10 @@ brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op)
|
||||
return "a64_byte_scattered_read_logical";
|
||||
case SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL:
|
||||
return "a64_byte_scattered_write_logical";
|
||||
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL:
|
||||
return "a64_untyped_atomic_logical";
|
||||
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL:
|
||||
return "a64_untyped_atomic_float_logical";
|
||||
case SHADER_OPCODE_TYPED_ATOMIC:
|
||||
return "typed_atomic";
|
||||
case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
|
||||
@ -1020,6 +1024,8 @@ backend_instruction::has_side_effects() const
|
||||
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
|
||||
case SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL:
|
||||
case SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL:
|
||||
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL:
|
||||
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL:
|
||||
case SHADER_OPCODE_BYTE_SCATTERED_WRITE:
|
||||
case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL:
|
||||
case SHADER_OPCODE_TYPED_ATOMIC:
|
||||
|
Loading…
Reference in New Issue
Block a user