aco: improve vector optimization with sub-dword vectors

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4507>
This commit is contained in:
Rhys Perry 2020-04-10 13:09:54 +01:00 committed by Marge Bot
parent 849eb0a776
commit 41ac44e1b3

View File

@ -925,7 +925,7 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
}
if (instr->operands.size() == 1 && instr->operands[0].isTemp())
ctx.info[instr->definitions[0].tempId()].set_temp(instr->operands[0].getTemp());
else if (instr->definitions[0].getTemp().size() == instr->operands.size())
else
ctx.info[instr->definitions[0].tempId()].set_vec(instr.get());
break;
}
@ -933,10 +933,17 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
if (!ctx.info[instr->operands[0].tempId()].is_vec())
break;
Instruction* vec = ctx.info[instr->operands[0].tempId()].instr;
if (instr->definitions.size() != vec->operands.size())
break;
for (unsigned i = 0; i < instr->definitions.size(); i++) {
Operand vec_op = vec->operands[i];
unsigned split_offset = 0;
unsigned vec_offset = 0;
unsigned vec_index = 0;
for (unsigned i = 0; i < instr->definitions.size(); split_offset += instr->definitions[i++].bytes()) {
while (vec_offset < split_offset && vec_index < vec->operands.size())
vec_offset += vec->operands[vec_index++].bytes();
if (vec_offset != split_offset || vec->operands[vec_index].bytes() != instr->definitions[i].bytes())
continue;
Operand vec_op = vec->operands[vec_index];
if (vec_op.isConstant()) {
if (vec_op.isLiteral())
ctx.info[instr->definitions[i].tempId()].set_literal(vec_op.constantValue());
@ -2557,10 +2564,12 @@ void select_instruction(opt_ctx &ctx, aco_ptr<Instruction>& instr)
if (instr->opcode == aco_opcode::p_split_vector) {
unsigned num_used = 0;
unsigned idx = 0;
for (unsigned i = 0; i < instr->definitions.size(); i++) {
unsigned split_offset = 0;
for (unsigned i = 0, offset = 0; i < instr->definitions.size(); offset += instr->definitions[i++].bytes()) {
if (ctx.uses[instr->definitions[i].tempId()]) {
num_used++;
idx = i;
split_offset = offset;
}
}
bool done = false;
@ -2571,13 +2580,13 @@ void select_instruction(opt_ctx &ctx, aco_ptr<Instruction>& instr)
unsigned off = 0;
Operand op;
for (Operand& vec_op : vec->operands) {
if (off == idx * instr->definitions[0].size()) {
if (off == split_offset) {
op = vec_op;
break;
}
off += vec_op.size();
off += vec_op.bytes();
}
if (off != instr->operands[0].size()) {
if (off != instr->operands[0].bytes() && op.bytes() == instr->definitions[idx].bytes()) {
ctx.uses[instr->operands[0].tempId()]--;
for (Operand& vec_op : vec->operands) {
if (vec_op.isTemp())
@ -2595,10 +2604,12 @@ void select_instruction(opt_ctx &ctx, aco_ptr<Instruction>& instr)
}
}
if (!done && num_used == 1) {
if (!done && num_used == 1 &&
instr->operands[0].bytes() % instr->definitions[idx].bytes() == 0 &&
split_offset % instr->definitions[idx].bytes() == 0) {
aco_ptr<Pseudo_instruction> extract{create_instruction<Pseudo_instruction>(aco_opcode::p_extract_vector, Format::PSEUDO, 2, 1)};
extract->operands[0] = instr->operands[0];
extract->operands[1] = Operand((uint32_t) idx);
extract->operands[1] = Operand((uint32_t) split_offset / instr->definitions[idx].bytes());
extract->definitions[0] = instr->definitions[idx];
instr.reset(extract.release());
}