tcg: Put opcodes in a linked list

The previous setup required ops and args to be completely sequential,
and was error prone when it came to both iteration and optimization.

Reviewed-by: Bastian Koppelmann <kbastian@mail.uni-paderborn.de>
Signed-off-by: Richard Henderson <rth@twiddle.net>
This commit is contained in:
Richard Henderson 2014-09-19 13:49:15 -07:00
parent fe700adb3d
commit c45cb8bb89
5 changed files with 438 additions and 508 deletions

View File

@ -11,8 +11,8 @@ static int exitreq_label;
static inline void gen_tb_start(TranslationBlock *tb)
{
TCGv_i32 count;
TCGv_i32 flag;
TCGv_i32 count, flag, imm;
int i;
exitreq_label = gen_new_label();
flag = tcg_temp_new_i32();
@ -21,16 +21,25 @@ static inline void gen_tb_start(TranslationBlock *tb)
tcg_gen_brcondi_i32(TCG_COND_NE, flag, 0, exitreq_label);
tcg_temp_free_i32(flag);
if (!(tb->cflags & CF_USE_ICOUNT))
if (!(tb->cflags & CF_USE_ICOUNT)) {
return;
}
icount_label = gen_new_label();
count = tcg_temp_local_new_i32();
tcg_gen_ld_i32(count, cpu_env,
-ENV_OFFSET + offsetof(CPUState, icount_decr.u32));
imm = tcg_temp_new_i32();
tcg_gen_movi_i32(imm, 0xdeadbeef);
/* This is a horrid hack to allow fixing up the value later. */
icount_arg = tcg_ctx.gen_opparam_ptr + 1;
tcg_gen_subi_i32(count, count, 0xdeadbeef);
i = tcg_ctx.gen_last_op_idx;
i = tcg_ctx.gen_op_buf[i].args;
icount_arg = &tcg_ctx.gen_opparam_buf[i + 1];
tcg_gen_sub_i32(count, count, imm);
tcg_temp_free_i32(imm);
tcg_gen_brcondi_i32(TCG_COND_LT, count, 0, icount_label);
tcg_gen_st16_i32(count, cpu_env,
@ -49,7 +58,8 @@ static void gen_tb_end(TranslationBlock *tb, int num_insns)
tcg_gen_exit_tb((uintptr_t)tb + TB_EXIT_ICOUNT_EXPIRED);
}
*tcg_ctx.gen_opc_ptr = INDEX_op_end;
/* Terminate the linked list. */
tcg_ctx.gen_op_buf[tcg_ctx.gen_last_op_idx].next = -1;
}
static inline void gen_io_start(void)

View File

@ -162,13 +162,13 @@ static bool temps_are_copies(TCGArg arg1, TCGArg arg2)
return false;
}
static void tcg_opt_gen_mov(TCGContext *s, int op_index, TCGArg *gen_args,
static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg *args,
TCGOpcode old_op, TCGArg dst, TCGArg src)
{
TCGOpcode new_op = op_to_mov(old_op);
tcg_target_ulong mask;
s->gen_opc_buf[op_index] = new_op;
op->opc = new_op;
reset_temp(dst);
mask = temps[src].mask;
@ -193,17 +193,17 @@ static void tcg_opt_gen_mov(TCGContext *s, int op_index, TCGArg *gen_args,
temps[src].next_copy = dst;
}
gen_args[0] = dst;
gen_args[1] = src;
args[0] = dst;
args[1] = src;
}
static void tcg_opt_gen_movi(TCGContext *s, int op_index, TCGArg *gen_args,
static void tcg_opt_gen_movi(TCGContext *s, TCGOp *op, TCGArg *args,
TCGOpcode old_op, TCGArg dst, TCGArg val)
{
TCGOpcode new_op = op_to_movi(old_op);
tcg_target_ulong mask;
s->gen_opc_buf[op_index] = new_op;
op->opc = new_op;
reset_temp(dst);
temps[dst].state = TCG_TEMP_CONST;
@ -215,8 +215,8 @@ static void tcg_opt_gen_movi(TCGContext *s, int op_index, TCGArg *gen_args,
}
temps[dst].mask = mask;
gen_args[0] = dst;
gen_args[1] = val;
args[0] = dst;
args[1] = val;
}
static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y)
@ -533,11 +533,9 @@ static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
}
/* Propagate constants and copies, fold constant expressions. */
static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
TCGArg *args, TCGOpDef *tcg_op_defs)
static void tcg_constant_folding(TCGContext *s)
{
int nb_ops, op_index, nb_temps, nb_globals;
TCGArg *gen_args;
int oi, oi_next, nb_temps, nb_globals;
/* Array VALS has an element for each temp.
If this temp holds a constant then its value is kept in VALS' element.
@ -548,24 +546,23 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
nb_globals = s->nb_globals;
reset_all_temps(nb_temps);
nb_ops = tcg_opc_ptr - s->gen_opc_buf;
gen_args = args;
for (op_index = 0; op_index < nb_ops; op_index++) {
TCGOpcode op = s->gen_opc_buf[op_index];
const TCGOpDef *def = &tcg_op_defs[op];
for (oi = s->gen_first_op_idx; oi >= 0; oi = oi_next) {
tcg_target_ulong mask, partmask, affected;
int nb_oargs, nb_iargs, nb_args, i;
int nb_oargs, nb_iargs, i;
TCGArg tmp;
if (op == INDEX_op_call) {
*gen_args++ = tmp = *args++;
nb_oargs = tmp >> 16;
nb_iargs = tmp & 0xffff;
nb_args = nb_oargs + nb_iargs + def->nb_cargs;
TCGOp * const op = &s->gen_op_buf[oi];
TCGArg * const args = &s->gen_opparam_buf[op->args];
TCGOpcode opc = op->opc;
const TCGOpDef *def = &tcg_op_defs[opc];
oi_next = op->next;
if (opc == INDEX_op_call) {
nb_oargs = op->callo;
nb_iargs = op->calli;
} else {
nb_oargs = def->nb_oargs;
nb_iargs = def->nb_iargs;
nb_args = def->nb_args;
}
/* Do copy propagation */
@ -576,7 +573,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
}
/* For commutative operations make constant second argument */
switch (op) {
switch (opc) {
CASE_OP_32_64(add):
CASE_OP_32_64(mul):
CASE_OP_32_64(and):
@ -634,7 +631,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
/* Simplify expressions for "shift/rot r, 0, a => movi r, 0",
and "sub r, 0, a => neg r, a" case. */
switch (op) {
switch (opc) {
CASE_OP_32_64(shl):
CASE_OP_32_64(shr):
CASE_OP_32_64(sar):
@ -642,9 +639,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
CASE_OP_32_64(rotr):
if (temps[args[1]].state == TCG_TEMP_CONST
&& temps[args[1]].val == 0) {
tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], 0);
args += 3;
gen_args += 2;
tcg_opt_gen_movi(s, op, args, opc, args[0], 0);
continue;
}
break;
@ -657,7 +652,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
/* Proceed with possible constant folding. */
break;
}
if (op == INDEX_op_sub_i32) {
if (opc == INDEX_op_sub_i32) {
neg_op = INDEX_op_neg_i32;
have_neg = TCG_TARGET_HAS_neg_i32;
} else {
@ -669,12 +664,9 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
}
if (temps[args[1]].state == TCG_TEMP_CONST
&& temps[args[1]].val == 0) {
s->gen_opc_buf[op_index] = neg_op;
op->opc = neg_op;
reset_temp(args[0]);
gen_args[0] = args[0];
gen_args[1] = args[2];
args += 3;
gen_args += 2;
args[1] = args[2];
continue;
}
}
@ -728,12 +720,9 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
if (!have_not) {
break;
}
s->gen_opc_buf[op_index] = not_op;
op->opc = not_op;
reset_temp(args[0]);
gen_args[0] = args[0];
gen_args[1] = args[i];
args += 3;
gen_args += 2;
args[1] = args[i];
continue;
}
default:
@ -741,7 +730,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
}
/* Simplify expression for "op r, a, const => mov r, a" cases */
switch (op) {
switch (opc) {
CASE_OP_32_64(add):
CASE_OP_32_64(sub):
CASE_OP_32_64(shl):
@ -769,12 +758,10 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
break;
do_mov3:
if (temps_are_copies(args[0], args[1])) {
s->gen_opc_buf[op_index] = INDEX_op_nop;
op->opc = INDEX_op_nop;
} else {
tcg_opt_gen_mov(s, op_index, gen_args, op, args[0], args[1]);
gen_args += 2;
tcg_opt_gen_mov(s, op, args, opc, args[0], args[1]);
}
args += 3;
continue;
default:
break;
@ -784,7 +771,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
output argument is supported. */
mask = -1;
affected = -1;
switch (op) {
switch (opc) {
CASE_OP_32_64(ext8s):
if ((temps[args[1]].mask & 0x80) != 0) {
break;
@ -923,38 +910,31 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
if (partmask == 0) {
assert(nb_oargs == 1);
tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], 0);
args += nb_args;
gen_args += 2;
tcg_opt_gen_movi(s, op, args, opc, args[0], 0);
continue;
}
if (affected == 0) {
assert(nb_oargs == 1);
if (temps_are_copies(args[0], args[1])) {
s->gen_opc_buf[op_index] = INDEX_op_nop;
op->opc = INDEX_op_nop;
} else if (temps[args[1]].state != TCG_TEMP_CONST) {
tcg_opt_gen_mov(s, op_index, gen_args, op, args[0], args[1]);
gen_args += 2;
tcg_opt_gen_mov(s, op, args, opc, args[0], args[1]);
} else {
tcg_opt_gen_movi(s, op_index, gen_args, op,
tcg_opt_gen_movi(s, op, args, opc,
args[0], temps[args[1]].val);
gen_args += 2;
}
args += nb_args;
continue;
}
/* Simplify expression for "op r, a, 0 => movi r, 0" cases */
switch (op) {
switch (opc) {
CASE_OP_32_64(and):
CASE_OP_32_64(mul):
CASE_OP_32_64(muluh):
CASE_OP_32_64(mulsh):
if ((temps[args[2]].state == TCG_TEMP_CONST
&& temps[args[2]].val == 0)) {
tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], 0);
args += 3;
gen_args += 2;
tcg_opt_gen_movi(s, op, args, opc, args[0], 0);
continue;
}
break;
@ -963,18 +943,15 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
}
/* Simplify expression for "op r, a, a => mov r, a" cases */
switch (op) {
switch (opc) {
CASE_OP_32_64(or):
CASE_OP_32_64(and):
if (temps_are_copies(args[1], args[2])) {
if (temps_are_copies(args[0], args[1])) {
s->gen_opc_buf[op_index] = INDEX_op_nop;
op->opc = INDEX_op_nop;
} else {
tcg_opt_gen_mov(s, op_index, gen_args, op,
args[0], args[1]);
gen_args += 2;
tcg_opt_gen_mov(s, op, args, opc, args[0], args[1]);
}
args += 3;
continue;
}
break;
@ -983,14 +960,12 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
}
/* Simplify expression for "op r, a, a => movi r, 0" cases */
switch (op) {
switch (opc) {
CASE_OP_32_64(andc):
CASE_OP_32_64(sub):
CASE_OP_32_64(xor):
if (temps_are_copies(args[1], args[2])) {
tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], 0);
gen_args += 2;
args += 3;
tcg_opt_gen_movi(s, op, args, opc, args[0], 0);
continue;
}
break;
@ -1001,17 +976,14 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
/* Propagate constants through copy operations and do constant
folding. Constants will be substituted to arguments by register
allocator where needed and possible. Also detect copies. */
switch (op) {
switch (opc) {
CASE_OP_32_64(mov):
if (temps_are_copies(args[0], args[1])) {
args += 2;
s->gen_opc_buf[op_index] = INDEX_op_nop;
op->opc = INDEX_op_nop;
break;
}
if (temps[args[1]].state != TCG_TEMP_CONST) {
tcg_opt_gen_mov(s, op_index, gen_args, op, args[0], args[1]);
gen_args += 2;
args += 2;
tcg_opt_gen_mov(s, op, args, opc, args[0], args[1]);
break;
}
/* Source argument is constant. Rewrite the operation and
@ -1019,9 +991,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
args[1] = temps[args[1]].val;
/* fallthrough */
CASE_OP_32_64(movi):
tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], args[1]);
gen_args += 2;
args += 2;
tcg_opt_gen_movi(s, op, args, opc, args[0], args[1]);
break;
CASE_OP_32_64(not):
@ -1033,20 +1003,16 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
case INDEX_op_ext32s_i64:
case INDEX_op_ext32u_i64:
if (temps[args[1]].state == TCG_TEMP_CONST) {
tmp = do_constant_folding(op, temps[args[1]].val, 0);
tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], tmp);
gen_args += 2;
args += 2;
tmp = do_constant_folding(opc, temps[args[1]].val, 0);
tcg_opt_gen_movi(s, op, args, opc, args[0], tmp);
break;
}
goto do_default;
case INDEX_op_trunc_shr_i32:
if (temps[args[1]].state == TCG_TEMP_CONST) {
tmp = do_constant_folding(op, temps[args[1]].val, args[2]);
tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], tmp);
gen_args += 2;
args += 3;
tmp = do_constant_folding(opc, temps[args[1]].val, args[2]);
tcg_opt_gen_movi(s, op, args, opc, args[0], tmp);
break;
}
goto do_default;
@ -1075,11 +1041,9 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
CASE_OP_32_64(remu):
if (temps[args[1]].state == TCG_TEMP_CONST
&& temps[args[2]].state == TCG_TEMP_CONST) {
tmp = do_constant_folding(op, temps[args[1]].val,
tmp = do_constant_folding(opc, temps[args[1]].val,
temps[args[2]].val);
tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], tmp);
gen_args += 2;
args += 3;
tcg_opt_gen_movi(s, op, args, opc, args[0], tmp);
break;
}
goto do_default;
@ -1089,54 +1053,44 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
&& temps[args[2]].state == TCG_TEMP_CONST) {
tmp = deposit64(temps[args[1]].val, args[3], args[4],
temps[args[2]].val);
tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], tmp);
gen_args += 2;
args += 5;
tcg_opt_gen_movi(s, op, args, opc, args[0], tmp);
break;
}
goto do_default;
CASE_OP_32_64(setcond):
tmp = do_constant_folding_cond(op, args[1], args[2], args[3]);
tmp = do_constant_folding_cond(opc, args[1], args[2], args[3]);
if (tmp != 2) {
tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], tmp);
gen_args += 2;
args += 4;
tcg_opt_gen_movi(s, op, args, opc, args[0], tmp);
break;
}
goto do_default;
CASE_OP_32_64(brcond):
tmp = do_constant_folding_cond(op, args[0], args[1], args[2]);
tmp = do_constant_folding_cond(opc, args[0], args[1], args[2]);
if (tmp != 2) {
if (tmp) {
reset_all_temps(nb_temps);
s->gen_opc_buf[op_index] = INDEX_op_br;
gen_args[0] = args[3];
gen_args += 1;
op->opc = INDEX_op_br;
args[0] = args[3];
} else {
s->gen_opc_buf[op_index] = INDEX_op_nop;
op->opc = INDEX_op_nop;
}
args += 4;
break;
}
goto do_default;
CASE_OP_32_64(movcond):
tmp = do_constant_folding_cond(op, args[1], args[2], args[5]);
tmp = do_constant_folding_cond(opc, args[1], args[2], args[5]);
if (tmp != 2) {
if (temps_are_copies(args[0], args[4-tmp])) {
s->gen_opc_buf[op_index] = INDEX_op_nop;
op->opc = INDEX_op_nop;
} else if (temps[args[4-tmp]].state == TCG_TEMP_CONST) {
tcg_opt_gen_movi(s, op_index, gen_args, op,
tcg_opt_gen_movi(s, op, args, opc,
args[0], temps[args[4-tmp]].val);
gen_args += 2;
} else {
tcg_opt_gen_mov(s, op_index, gen_args, op,
args[0], args[4-tmp]);
gen_args += 2;
tcg_opt_gen_mov(s, op, args, opc, args[0], args[4-tmp]);
}
args += 6;
break;
}
goto do_default;
@ -1154,24 +1108,31 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
uint64_t a = ((uint64_t)ah << 32) | al;
uint64_t b = ((uint64_t)bh << 32) | bl;
TCGArg rl, rh;
TCGOp *op2;
TCGArg *args2;
if (op == INDEX_op_add2_i32) {
if (opc == INDEX_op_add2_i32) {
a += b;
} else {
a -= b;
}
/* We emit the extra nop when we emit the add2/sub2. */
assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop);
op2 = &s->gen_op_buf[oi_next];
assert(op2->opc == INDEX_op_nop);
/* But we still have to allocate args for the op. */
op2->args = s->gen_next_parm_idx;
s->gen_next_parm_idx += 2;
args2 = &s->gen_opparam_buf[op2->args];
rl = args[0];
rh = args[1];
tcg_opt_gen_movi(s, op_index, &gen_args[0],
op, rl, (uint32_t)a);
tcg_opt_gen_movi(s, ++op_index, &gen_args[2],
op, rh, (uint32_t)(a >> 32));
gen_args += 4;
args += 6;
tcg_opt_gen_movi(s, op, args, opc, rl, (uint32_t)a);
tcg_opt_gen_movi(s, op2, args2, opc, rh, (uint32_t)(a >> 32));
/* We've done all we need to do with the movi. Skip it. */
oi_next = op2->next;
break;
}
goto do_default;
@ -1183,18 +1144,25 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
uint32_t b = temps[args[3]].val;
uint64_t r = (uint64_t)a * b;
TCGArg rl, rh;
TCGOp *op2;
TCGArg *args2;
/* We emit the extra nop when we emit the mulu2. */
assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop);
op2 = &s->gen_op_buf[oi_next];
assert(op2->opc == INDEX_op_nop);
/* But we still have to allocate args for the op. */
op2->args = s->gen_next_parm_idx;
s->gen_next_parm_idx += 2;
args2 = &s->gen_opparam_buf[op2->args];
rl = args[0];
rh = args[1];
tcg_opt_gen_movi(s, op_index, &gen_args[0],
op, rl, (uint32_t)r);
tcg_opt_gen_movi(s, ++op_index, &gen_args[2],
op, rh, (uint32_t)(r >> 32));
gen_args += 4;
args += 4;
tcg_opt_gen_movi(s, op, args, opc, rl, (uint32_t)r);
tcg_opt_gen_movi(s, op2, args2, opc, rh, (uint32_t)(r >> 32));
/* We've done all we need to do with the movi. Skip it. */
oi_next = op2->next;
break;
}
goto do_default;
@ -1205,12 +1173,11 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
if (tmp) {
do_brcond_true:
reset_all_temps(nb_temps);
s->gen_opc_buf[op_index] = INDEX_op_br;
gen_args[0] = args[5];
gen_args += 1;
op->opc = INDEX_op_br;
args[0] = args[5];
} else {
do_brcond_false:
s->gen_opc_buf[op_index] = INDEX_op_nop;
op->opc = INDEX_op_nop;
}
} else if ((args[4] == TCG_COND_LT || args[4] == TCG_COND_GE)
&& temps[args[2]].state == TCG_TEMP_CONST
@ -1221,12 +1188,11 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
vs the high word of the input. */
do_brcond_high:
reset_all_temps(nb_temps);
s->gen_opc_buf[op_index] = INDEX_op_brcond_i32;
gen_args[0] = args[1];
gen_args[1] = args[3];
gen_args[2] = args[4];
gen_args[3] = args[5];
gen_args += 4;
op->opc = INDEX_op_brcond_i32;
args[0] = args[1];
args[1] = args[3];
args[2] = args[4];
args[3] = args[5];
} else if (args[4] == TCG_COND_EQ) {
/* Simplify EQ comparisons where one of the pairs
can be simplified. */
@ -1246,12 +1212,10 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
}
do_brcond_low:
reset_all_temps(nb_temps);
s->gen_opc_buf[op_index] = INDEX_op_brcond_i32;
gen_args[0] = args[0];
gen_args[1] = args[2];
gen_args[2] = args[4];
gen_args[3] = args[5];
gen_args += 4;
op->opc = INDEX_op_brcond_i32;
args[1] = args[2];
args[2] = args[4];
args[3] = args[5];
} else if (args[4] == TCG_COND_NE) {
/* Simplify NE comparisons where one of the pairs
can be simplified. */
@ -1273,15 +1237,13 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
} else {
goto do_default;
}
args += 6;
break;
case INDEX_op_setcond2_i32:
tmp = do_constant_folding_cond2(&args[1], &args[3], args[5]);
if (tmp != 2) {
do_setcond_const:
tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], tmp);
gen_args += 2;
tcg_opt_gen_movi(s, op, args, opc, args[0], tmp);
} else if ((args[5] == TCG_COND_LT || args[5] == TCG_COND_GE)
&& temps[args[3]].state == TCG_TEMP_CONST
&& temps[args[4]].state == TCG_TEMP_CONST
@ -1290,14 +1252,12 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
/* Simplify LT/GE comparisons vs zero to a single compare
vs the high word of the input. */
do_setcond_high:
s->gen_opc_buf[op_index] = INDEX_op_setcond_i32;
reset_temp(args[0]);
temps[args[0]].mask = 1;
gen_args[0] = args[0];
gen_args[1] = args[2];
gen_args[2] = args[4];
gen_args[3] = args[5];
gen_args += 4;
op->opc = INDEX_op_setcond_i32;
args[1] = args[2];
args[2] = args[4];
args[3] = args[5];
} else if (args[5] == TCG_COND_EQ) {
/* Simplify EQ comparisons where one of the pairs
can be simplified. */
@ -1318,12 +1278,9 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
do_setcond_low:
reset_temp(args[0]);
temps[args[0]].mask = 1;
s->gen_opc_buf[op_index] = INDEX_op_setcond_i32;
gen_args[0] = args[0];
gen_args[1] = args[1];
gen_args[2] = args[3];
gen_args[3] = args[5];
gen_args += 4;
op->opc = INDEX_op_setcond_i32;
args[2] = args[3];
args[3] = args[5];
} else if (args[5] == TCG_COND_NE) {
/* Simplify NE comparisons where one of the pairs
can be simplified. */
@ -1345,7 +1302,6 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
} else {
goto do_default;
}
args += 6;
break;
case INDEX_op_call:
@ -1377,22 +1333,12 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
}
}
}
for (i = 0; i < nb_args; i++) {
gen_args[i] = args[i];
}
args += nb_args;
gen_args += nb_args;
break;
}
}
return gen_args;
}
TCGArg *tcg_optimize(TCGContext *s, uint16_t *tcg_opc_ptr,
TCGArg *args, TCGOpDef *tcg_op_defs)
void tcg_optimize(TCGContext *s)
{
TCGArg *res;
res = tcg_constant_folding(s, tcg_opc_ptr, args, tcg_op_defs);
return res;
tcg_constant_folding(s);
}

View File

@ -35,100 +35,116 @@ extern TCGv_i32 TCGV_HIGH_link_error(TCGv_i64);
#define TCGV_HIGH TCGV_HIGH_link_error
#endif
/* Note that this is optimized for sequential allocation during translate.
Up to and including filling in the forward link immediately. We'll do
proper termination of the end of the list after we finish translation. */
static void tcg_emit_op(TCGContext *ctx, TCGOpcode opc, int args)
{
int oi = ctx->gen_next_op_idx;
int ni = oi + 1;
int pi = oi - 1;
tcg_debug_assert(oi < OPC_BUF_SIZE);
ctx->gen_last_op_idx = oi;
ctx->gen_next_op_idx = ni;
ctx->gen_op_buf[oi] = (TCGOp){
.opc = opc,
.args = args,
.prev = pi,
.next = ni
};
}
void tcg_gen_op0(TCGContext *ctx, TCGOpcode opc)
{
*ctx->gen_opc_ptr++ = opc;
tcg_emit_op(ctx, opc, -1);
}
void tcg_gen_op1(TCGContext *ctx, TCGOpcode opc, TCGArg a1)
{
uint16_t *op = ctx->gen_opc_ptr;
TCGArg *opp = ctx->gen_opparam_ptr;
int pi = ctx->gen_next_parm_idx;
op[0] = opc;
opp[0] = a1;
tcg_debug_assert(pi + 1 <= OPPARAM_BUF_SIZE);
ctx->gen_next_parm_idx = pi + 1;
ctx->gen_opparam_buf[pi] = a1;
ctx->gen_opc_ptr = op + 1;
ctx->gen_opparam_ptr = opp + 1;
tcg_emit_op(ctx, opc, pi);
}
void tcg_gen_op2(TCGContext *ctx, TCGOpcode opc, TCGArg a1, TCGArg a2)
{
uint16_t *op = ctx->gen_opc_ptr;
TCGArg *opp = ctx->gen_opparam_ptr;
int pi = ctx->gen_next_parm_idx;
op[0] = opc;
opp[0] = a1;
opp[1] = a2;
tcg_debug_assert(pi + 2 <= OPPARAM_BUF_SIZE);
ctx->gen_next_parm_idx = pi + 2;
ctx->gen_opparam_buf[pi + 0] = a1;
ctx->gen_opparam_buf[pi + 1] = a2;
ctx->gen_opc_ptr = op + 1;
ctx->gen_opparam_ptr = opp + 2;
tcg_emit_op(ctx, opc, pi);
}
void tcg_gen_op3(TCGContext *ctx, TCGOpcode opc, TCGArg a1,
TCGArg a2, TCGArg a3)
{
uint16_t *op = ctx->gen_opc_ptr;
TCGArg *opp = ctx->gen_opparam_ptr;
int pi = ctx->gen_next_parm_idx;
op[0] = opc;
opp[0] = a1;
opp[1] = a2;
opp[2] = a3;
tcg_debug_assert(pi + 3 <= OPPARAM_BUF_SIZE);
ctx->gen_next_parm_idx = pi + 3;
ctx->gen_opparam_buf[pi + 0] = a1;
ctx->gen_opparam_buf[pi + 1] = a2;
ctx->gen_opparam_buf[pi + 2] = a3;
ctx->gen_opc_ptr = op + 1;
ctx->gen_opparam_ptr = opp + 3;
tcg_emit_op(ctx, opc, pi);
}
void tcg_gen_op4(TCGContext *ctx, TCGOpcode opc, TCGArg a1,
TCGArg a2, TCGArg a3, TCGArg a4)
{
uint16_t *op = ctx->gen_opc_ptr;
TCGArg *opp = ctx->gen_opparam_ptr;
int pi = ctx->gen_next_parm_idx;
op[0] = opc;
opp[0] = a1;
opp[1] = a2;
opp[2] = a3;
opp[3] = a4;
tcg_debug_assert(pi + 4 <= OPPARAM_BUF_SIZE);
ctx->gen_next_parm_idx = pi + 4;
ctx->gen_opparam_buf[pi + 0] = a1;
ctx->gen_opparam_buf[pi + 1] = a2;
ctx->gen_opparam_buf[pi + 2] = a3;
ctx->gen_opparam_buf[pi + 3] = a4;
ctx->gen_opc_ptr = op + 1;
ctx->gen_opparam_ptr = opp + 4;
tcg_emit_op(ctx, opc, pi);
}
void tcg_gen_op5(TCGContext *ctx, TCGOpcode opc, TCGArg a1,
TCGArg a2, TCGArg a3, TCGArg a4, TCGArg a5)
{
uint16_t *op = ctx->gen_opc_ptr;
TCGArg *opp = ctx->gen_opparam_ptr;
int pi = ctx->gen_next_parm_idx;
op[0] = opc;
opp[0] = a1;
opp[1] = a2;
opp[2] = a3;
opp[3] = a4;
opp[4] = a5;
tcg_debug_assert(pi + 5 <= OPPARAM_BUF_SIZE);
ctx->gen_next_parm_idx = pi + 5;
ctx->gen_opparam_buf[pi + 0] = a1;
ctx->gen_opparam_buf[pi + 1] = a2;
ctx->gen_opparam_buf[pi + 2] = a3;
ctx->gen_opparam_buf[pi + 3] = a4;
ctx->gen_opparam_buf[pi + 4] = a5;
ctx->gen_opc_ptr = op + 1;
ctx->gen_opparam_ptr = opp + 5;
tcg_emit_op(ctx, opc, pi);
}
void tcg_gen_op6(TCGContext *ctx, TCGOpcode opc, TCGArg a1, TCGArg a2,
TCGArg a3, TCGArg a4, TCGArg a5, TCGArg a6)
{
uint16_t *op = ctx->gen_opc_ptr;
TCGArg *opp = ctx->gen_opparam_ptr;
int pi = ctx->gen_next_parm_idx;
op[0] = opc;
opp[0] = a1;
opp[1] = a2;
opp[2] = a3;
opp[3] = a4;
opp[4] = a5;
opp[5] = a6;
tcg_debug_assert(pi + 6 <= OPPARAM_BUF_SIZE);
ctx->gen_next_parm_idx = pi + 6;
ctx->gen_opparam_buf[pi + 0] = a1;
ctx->gen_opparam_buf[pi + 1] = a2;
ctx->gen_opparam_buf[pi + 2] = a3;
ctx->gen_opparam_buf[pi + 3] = a4;
ctx->gen_opparam_buf[pi + 4] = a5;
ctx->gen_opparam_buf[pi + 5] = a6;
ctx->gen_opc_ptr = op + 1;
ctx->gen_opparam_ptr = opp + 6;
tcg_emit_op(ctx, opc, pi);
}
/* 32 bit ops */
@ -1862,53 +1878,57 @@ static inline TCGMemOp tcg_canonicalize_memop(TCGMemOp op, bool is64, bool st)
return op;
}
static inline void tcg_add_param_i32(TCGv_i32 val)
static void gen_ldst_i32(TCGOpcode opc, TCGv_i32 val, TCGv addr,
TCGMemOp memop, TCGArg idx)
{
*tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(val);
}
static inline void tcg_add_param_i64(TCGv_i64 val)
{
if (TCG_TARGET_REG_BITS == 32) {
*tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(TCGV_LOW(val));
*tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(TCGV_HIGH(val));
} else {
*tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(val);
}
}
#if TARGET_LONG_BITS == 32
# define tcg_add_param_tl tcg_add_param_i32
tcg_gen_op4ii_i32(opc, val, addr, memop, idx);
#else
# define tcg_add_param_tl tcg_add_param_i64
if (TCG_TARGET_REG_BITS == 32) {
tcg_gen_op5ii_i32(opc, val, TCGV_LOW(addr), TCGV_HIGH(addr),
memop, idx);
} else {
tcg_gen_op4(&tcg_ctx, opc, GET_TCGV_I32(val), GET_TCGV_I64(addr),
memop, idx);
}
#endif
}
static void gen_ldst_i64(TCGOpcode opc, TCGv_i64 val, TCGv addr,
TCGMemOp memop, TCGArg idx)
{
#if TARGET_LONG_BITS == 32
if (TCG_TARGET_REG_BITS == 32) {
tcg_gen_op5ii_i32(opc, TCGV_LOW(val), TCGV_HIGH(val),
addr, memop, idx);
} else {
tcg_gen_op4(&tcg_ctx, opc, GET_TCGV_I64(val), GET_TCGV_I32(addr),
memop, idx);
}
#else
if (TCG_TARGET_REG_BITS == 32) {
tcg_gen_op6ii_i32(opc, TCGV_LOW(val), TCGV_HIGH(val),
TCGV_LOW(addr), TCGV_HIGH(addr), memop, idx);
} else {
tcg_gen_op4ii_i64(opc, val, addr, memop, idx);
}
#endif
}
void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, TCGMemOp memop)
{
memop = tcg_canonicalize_memop(memop, 0, 0);
*tcg_ctx.gen_opc_ptr++ = INDEX_op_qemu_ld_i32;
tcg_add_param_i32(val);
tcg_add_param_tl(addr);
*tcg_ctx.gen_opparam_ptr++ = memop;
*tcg_ctx.gen_opparam_ptr++ = idx;
gen_ldst_i32(INDEX_op_qemu_ld_i32, val, addr, memop, idx);
}
void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, TCGMemOp memop)
{
memop = tcg_canonicalize_memop(memop, 0, 1);
*tcg_ctx.gen_opc_ptr++ = INDEX_op_qemu_st_i32;
tcg_add_param_i32(val);
tcg_add_param_tl(addr);
*tcg_ctx.gen_opparam_ptr++ = memop;
*tcg_ctx.gen_opparam_ptr++ = idx;
gen_ldst_i32(INDEX_op_qemu_st_i32, val, addr, memop, idx);
}
void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, TCGMemOp memop)
{
memop = tcg_canonicalize_memop(memop, 1, 0);
if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
tcg_gen_qemu_ld_i32(TCGV_LOW(val), addr, idx, memop);
if (memop & MO_SIGN) {
@ -1919,25 +1939,17 @@ void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, TCGMemOp memop)
return;
}
*tcg_ctx.gen_opc_ptr++ = INDEX_op_qemu_ld_i64;
tcg_add_param_i64(val);
tcg_add_param_tl(addr);
*tcg_ctx.gen_opparam_ptr++ = memop;
*tcg_ctx.gen_opparam_ptr++ = idx;
memop = tcg_canonicalize_memop(memop, 1, 0);
gen_ldst_i64(INDEX_op_qemu_ld_i64, val, addr, memop, idx);
}
void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, TCGMemOp memop)
{
memop = tcg_canonicalize_memop(memop, 1, 1);
if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
tcg_gen_qemu_st_i32(TCGV_LOW(val), addr, idx, memop);
return;
}
*tcg_ctx.gen_opc_ptr++ = INDEX_op_qemu_st_i64;
tcg_add_param_i64(val);
tcg_add_param_tl(addr);
*tcg_ctx.gen_opparam_ptr++ = memop;
*tcg_ctx.gen_opparam_ptr++ = idx;
memop = tcg_canonicalize_memop(memop, 1, 1);
gen_ldst_i64(INDEX_op_qemu_st_i64, val, addr, memop, idx);
}

380
tcg/tcg.c
View File

@ -407,7 +407,6 @@ void tcg_func_start(TCGContext *s)
/* No temps have been previously allocated for size or locality. */
memset(s->free_temps, 0, sizeof(s->free_temps));
s->labels = tcg_malloc(sizeof(TCGLabel) * TCG_MAX_LABELS);
s->nb_labels = 0;
s->current_frame_offset = s->frame_start;
@ -415,8 +414,10 @@ void tcg_func_start(TCGContext *s)
s->goto_tb_issue_mask = 0;
#endif
s->gen_opc_ptr = s->gen_opc_buf;
s->gen_opparam_ptr = s->gen_opparam_buf;
s->gen_first_op_idx = 0;
s->gen_last_op_idx = -1;
s->gen_next_op_idx = 0;
s->gen_next_parm_idx = 0;
s->be = tcg_malloc(sizeof(TCGBackendData));
}
@ -703,9 +704,8 @@ int tcg_check_temp_count(void)
void tcg_gen_callN(TCGContext *s, void *func, TCGArg ret,
int nargs, TCGArg *args)
{
int i, real_args, nb_rets;
int i, real_args, nb_rets, pi, pi_first;
unsigned sizemask, flags;
TCGArg *nparam;
TCGHelperInfo *info;
info = g_hash_table_lookup(s->helpers, (gpointer)func);
@ -758,8 +758,7 @@ void tcg_gen_callN(TCGContext *s, void *func, TCGArg ret,
}
#endif /* TCG_TARGET_EXTEND_ARGS */
*s->gen_opc_ptr++ = INDEX_op_call;
nparam = s->gen_opparam_ptr++;
pi_first = pi = s->gen_next_parm_idx;
if (ret != TCG_CALL_DUMMY_ARG) {
#if defined(__sparc__) && !defined(__arch64__) \
&& !defined(CONFIG_TCG_INTERPRETER)
@ -769,25 +768,25 @@ void tcg_gen_callN(TCGContext *s, void *func, TCGArg ret,
two return temporaries, and reassemble below. */
retl = tcg_temp_new_i64();
reth = tcg_temp_new_i64();
*s->gen_opparam_ptr++ = GET_TCGV_I64(reth);
*s->gen_opparam_ptr++ = GET_TCGV_I64(retl);
s->gen_opparam_buf[pi++] = GET_TCGV_I64(reth);
s->gen_opparam_buf[pi++] = GET_TCGV_I64(retl);
nb_rets = 2;
} else {
*s->gen_opparam_ptr++ = ret;
s->gen_opparam_buf[pi++] = ret;
nb_rets = 1;
}
#else
if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) {
#ifdef HOST_WORDS_BIGENDIAN
*s->gen_opparam_ptr++ = ret + 1;
*s->gen_opparam_ptr++ = ret;
s->gen_opparam_buf[pi++] = ret + 1;
s->gen_opparam_buf[pi++] = ret;
#else
*s->gen_opparam_ptr++ = ret;
*s->gen_opparam_ptr++ = ret + 1;
s->gen_opparam_buf[pi++] = ret;
s->gen_opparam_buf[pi++] = ret + 1;
#endif
nb_rets = 2;
} else {
*s->gen_opparam_ptr++ = ret;
s->gen_opparam_buf[pi++] = ret;
nb_rets = 1;
}
#endif
@ -801,7 +800,7 @@ void tcg_gen_callN(TCGContext *s, void *func, TCGArg ret,
#ifdef TCG_TARGET_CALL_ALIGN_ARGS
/* some targets want aligned 64 bit args */
if (real_args & 1) {
*s->gen_opparam_ptr++ = TCG_CALL_DUMMY_ARG;
s->gen_opparam_buf[pi++] = TCG_CALL_DUMMY_ARG;
real_args++;
}
#endif
@ -816,26 +815,42 @@ void tcg_gen_callN(TCGContext *s, void *func, TCGArg ret,
have to get more complicated to differentiate between
stack arguments and register arguments. */
#if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
*s->gen_opparam_ptr++ = args[i] + 1;
*s->gen_opparam_ptr++ = args[i];
s->gen_opparam_buf[pi++] = args[i] + 1;
s->gen_opparam_buf[pi++] = args[i];
#else
*s->gen_opparam_ptr++ = args[i];
*s->gen_opparam_ptr++ = args[i] + 1;
s->gen_opparam_buf[pi++] = args[i];
s->gen_opparam_buf[pi++] = args[i] + 1;
#endif
real_args += 2;
continue;
}
*s->gen_opparam_ptr++ = args[i];
s->gen_opparam_buf[pi++] = args[i];
real_args++;
}
*s->gen_opparam_ptr++ = (uintptr_t)func;
*s->gen_opparam_ptr++ = flags;
s->gen_opparam_buf[pi++] = (uintptr_t)func;
s->gen_opparam_buf[pi++] = flags;
*nparam = (nb_rets << 16) | real_args;
i = s->gen_next_op_idx;
tcg_debug_assert(i < OPC_BUF_SIZE);
tcg_debug_assert(pi <= OPPARAM_BUF_SIZE);
/* total parameters, needed to go backward in the instruction stream */
*s->gen_opparam_ptr++ = 1 + nb_rets + real_args + 3;
/* Set links for sequential allocation during translation. */
s->gen_op_buf[i] = (TCGOp){
.opc = INDEX_op_call,
.callo = nb_rets,
.calli = real_args,
.args = pi_first,
.prev = i - 1,
.next = i + 1
};
/* Make sure the calli field didn't overflow. */
tcg_debug_assert(s->gen_op_buf[i].calli == real_args);
s->gen_last_op_idx = i;
s->gen_next_op_idx = i + 1;
s->gen_next_parm_idx = pi;
#if defined(__sparc__) && !defined(__arch64__) \
&& !defined(CONFIG_TCG_INTERPRETER)
@ -972,20 +987,21 @@ static const char * const ldst_name[] =
void tcg_dump_ops(TCGContext *s)
{
const uint16_t *opc_ptr;
const TCGArg *args;
TCGArg arg;
TCGOpcode c;
int i, k, nb_oargs, nb_iargs, nb_cargs, first_insn;
const TCGOpDef *def;
char buf[128];
TCGOp *op;
int oi;
first_insn = 1;
opc_ptr = s->gen_opc_buf;
args = s->gen_opparam_buf;
while (opc_ptr < s->gen_opc_ptr) {
c = *opc_ptr++;
for (oi = s->gen_first_op_idx; oi >= 0; oi = op->next) {
int i, k, nb_oargs, nb_iargs, nb_cargs;
const TCGOpDef *def;
const TCGArg *args;
TCGOpcode c;
op = &s->gen_op_buf[oi];
c = op->opc;
def = &tcg_op_defs[c];
args = &s->gen_opparam_buf[op->args];
if (c == INDEX_op_debug_insn_start) {
uint64_t pc;
#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
@ -993,21 +1009,14 @@ void tcg_dump_ops(TCGContext *s)
#else
pc = args[0];
#endif
if (!first_insn) {
if (oi != s->gen_first_op_idx) {
qemu_log("\n");
}
qemu_log(" ---- 0x%" PRIx64, pc);
first_insn = 0;
nb_oargs = def->nb_oargs;
nb_iargs = def->nb_iargs;
nb_cargs = def->nb_cargs;
} else if (c == INDEX_op_call) {
TCGArg arg;
/* variable number of arguments */
arg = *args++;
nb_oargs = arg >> 16;
nb_iargs = arg & 0xffff;
nb_oargs = op->callo;
nb_iargs = op->calli;
nb_cargs = def->nb_cargs;
/* function name, flags, out args */
@ -1028,26 +1037,20 @@ void tcg_dump_ops(TCGContext *s)
}
} else {
qemu_log(" %s ", def->name);
if (c == INDEX_op_nopn) {
/* variable number of arguments */
nb_cargs = *args;
nb_oargs = 0;
nb_iargs = 0;
} else {
nb_oargs = def->nb_oargs;
nb_iargs = def->nb_iargs;
nb_cargs = def->nb_cargs;
}
nb_oargs = def->nb_oargs;
nb_iargs = def->nb_iargs;
nb_cargs = def->nb_cargs;
k = 0;
for(i = 0; i < nb_oargs; i++) {
for (i = 0; i < nb_oargs; i++) {
if (k != 0) {
qemu_log(",");
}
qemu_log("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf),
args[k++]));
}
for(i = 0; i < nb_iargs; i++) {
for (i = 0; i < nb_iargs; i++) {
if (k != 0) {
qemu_log(",");
}
@ -1085,16 +1088,14 @@ void tcg_dump_ops(TCGContext *s)
i = 0;
break;
}
for(; i < nb_cargs; i++) {
for (; i < nb_cargs; i++) {
if (k != 0) {
qemu_log(",");
}
arg = args[k++];
qemu_log("$0x%" TCG_PRIlx, arg);
qemu_log("$0x%" TCG_PRIlx, args[k++]);
}
}
qemu_log("\n");
args += nb_iargs + nb_oargs + nb_cargs;
}
}
@ -1244,20 +1245,6 @@ void tcg_add_target_add_op_defs(const TCGTargetOpDef *tdefs)
}
#ifdef USE_LIVENESS_ANALYSIS
/* set a nop for an operation using 'nb_args' */
static inline void tcg_set_nop(TCGContext *s, uint16_t *opc_ptr,
TCGArg *args, int nb_args)
{
if (nb_args == 0) {
*opc_ptr = INDEX_op_nop;
} else {
*opc_ptr = INDEX_op_nopn;
args[0] = nb_args;
args[nb_args - 1] = nb_args;
}
}
/* liveness analysis: end of function: all temps are dead, and globals
should be in memory. */
static inline void tcg_la_func_end(TCGContext *s, uint8_t *dead_temps,
@ -1287,19 +1274,10 @@ static inline void tcg_la_bb_end(TCGContext *s, uint8_t *dead_temps,
temporaries are removed. */
static void tcg_liveness_analysis(TCGContext *s)
{
int i, op_index, nb_args, nb_iargs, nb_oargs, nb_ops;
TCGOpcode op, op_new, op_new2;
TCGArg *args, arg;
const TCGOpDef *def;
uint8_t *dead_temps, *mem_temps;
uint16_t dead_args;
uint8_t sync_args;
bool have_op_new2;
s->gen_opc_ptr++; /* skip end */
nb_ops = s->gen_opc_ptr - s->gen_opc_buf;
int oi, oi_prev, nb_ops;
nb_ops = s->gen_next_op_idx;
s->op_dead_args = tcg_malloc(nb_ops * sizeof(uint16_t));
s->op_sync_args = tcg_malloc(nb_ops * sizeof(uint8_t));
@ -1307,25 +1285,31 @@ static void tcg_liveness_analysis(TCGContext *s)
mem_temps = tcg_malloc(s->nb_temps);
tcg_la_func_end(s, dead_temps, mem_temps);
args = s->gen_opparam_ptr;
op_index = nb_ops - 1;
while (op_index >= 0) {
op = s->gen_opc_buf[op_index];
def = &tcg_op_defs[op];
switch(op) {
for (oi = s->gen_last_op_idx; oi >= 0; oi = oi_prev) {
int i, nb_iargs, nb_oargs;
TCGOpcode opc_new, opc_new2;
bool have_opc_new2;
uint16_t dead_args;
uint8_t sync_args;
TCGArg arg;
TCGOp * const op = &s->gen_op_buf[oi];
TCGArg * const args = &s->gen_opparam_buf[op->args];
TCGOpcode opc = op->opc;
const TCGOpDef *def = &tcg_op_defs[opc];
oi_prev = op->prev;
switch (opc) {
case INDEX_op_call:
{
int call_flags;
nb_args = args[-1];
args -= nb_args;
arg = *args++;
nb_iargs = arg & 0xffff;
nb_oargs = arg >> 16;
nb_oargs = op->callo;
nb_iargs = op->calli;
call_flags = args[nb_oargs + nb_iargs + 1];
/* pure functions can be removed if their result is not
used */
/* pure functions can be removed if their result is unused */
if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
for (i = 0; i < nb_oargs; i++) {
arg = args[i];
@ -1333,8 +1317,7 @@ static void tcg_liveness_analysis(TCGContext *s)
goto do_not_remove_call;
}
}
tcg_set_nop(s, s->gen_opc_buf + op_index,
args - 1, nb_args);
goto do_remove;
} else {
do_not_remove_call:
@ -1373,41 +1356,33 @@ static void tcg_liveness_analysis(TCGContext *s)
dead_temps[arg] = 0;
}
}
s->op_dead_args[op_index] = dead_args;
s->op_sync_args[op_index] = sync_args;
s->op_dead_args[oi] = dead_args;
s->op_sync_args[oi] = sync_args;
}
args--;
}
break;
case INDEX_op_debug_insn_start:
args -= def->nb_args;
break;
case INDEX_op_nopn:
nb_args = args[-1];
args -= nb_args;
case INDEX_op_nop:
case INDEX_op_end:
break;
case INDEX_op_discard:
args--;
/* mark the temporary as dead */
dead_temps[args[0]] = 1;
mem_temps[args[0]] = 0;
break;
case INDEX_op_end:
break;
case INDEX_op_add2_i32:
op_new = INDEX_op_add_i32;
opc_new = INDEX_op_add_i32;
goto do_addsub2;
case INDEX_op_sub2_i32:
op_new = INDEX_op_sub_i32;
opc_new = INDEX_op_sub_i32;
goto do_addsub2;
case INDEX_op_add2_i64:
op_new = INDEX_op_add_i64;
opc_new = INDEX_op_add_i64;
goto do_addsub2;
case INDEX_op_sub2_i64:
op_new = INDEX_op_sub_i64;
opc_new = INDEX_op_sub_i64;
do_addsub2:
args -= 6;
nb_iargs = 4;
nb_oargs = 2;
/* Test if the high part of the operation is dead, but not
@ -1418,12 +1393,11 @@ static void tcg_liveness_analysis(TCGContext *s)
if (dead_temps[args[0]] && !mem_temps[args[0]]) {
goto do_remove;
}
/* Create the single operation plus nop. */
s->gen_opc_buf[op_index] = op = op_new;
/* Replace the opcode and adjust the args in place,
leaving 3 unused args at the end. */
op->opc = opc = opc_new;
args[1] = args[2];
args[2] = args[4];
assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop);
tcg_set_nop(s, s->gen_opc_buf + op_index + 1, args + 3, 3);
/* Fall through and mark the single-word operation live. */
nb_iargs = 2;
nb_oargs = 1;
@ -1431,27 +1405,26 @@ static void tcg_liveness_analysis(TCGContext *s)
goto do_not_remove;
case INDEX_op_mulu2_i32:
op_new = INDEX_op_mul_i32;
op_new2 = INDEX_op_muluh_i32;
have_op_new2 = TCG_TARGET_HAS_muluh_i32;
opc_new = INDEX_op_mul_i32;
opc_new2 = INDEX_op_muluh_i32;
have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
goto do_mul2;
case INDEX_op_muls2_i32:
op_new = INDEX_op_mul_i32;
op_new2 = INDEX_op_mulsh_i32;
have_op_new2 = TCG_TARGET_HAS_mulsh_i32;
opc_new = INDEX_op_mul_i32;
opc_new2 = INDEX_op_mulsh_i32;
have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
goto do_mul2;
case INDEX_op_mulu2_i64:
op_new = INDEX_op_mul_i64;
op_new2 = INDEX_op_muluh_i64;
have_op_new2 = TCG_TARGET_HAS_muluh_i64;
opc_new = INDEX_op_mul_i64;
opc_new2 = INDEX_op_muluh_i64;
have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
goto do_mul2;
case INDEX_op_muls2_i64:
op_new = INDEX_op_mul_i64;
op_new2 = INDEX_op_mulsh_i64;
have_op_new2 = TCG_TARGET_HAS_mulsh_i64;
opc_new = INDEX_op_mul_i64;
opc_new2 = INDEX_op_mulsh_i64;
have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
goto do_mul2;
do_mul2:
args -= 4;
nb_iargs = 2;
nb_oargs = 2;
if (dead_temps[args[1]] && !mem_temps[args[1]]) {
@ -1460,28 +1433,25 @@ static void tcg_liveness_analysis(TCGContext *s)
goto do_remove;
}
/* The high part of the operation is dead; generate the low. */
s->gen_opc_buf[op_index] = op = op_new;
op->opc = opc = opc_new;
args[1] = args[2];
args[2] = args[3];
} else if (have_op_new2 && dead_temps[args[0]]
} else if (have_opc_new2 && dead_temps[args[0]]
&& !mem_temps[args[0]]) {
/* The low part of the operation is dead; generate the high. */
s->gen_opc_buf[op_index] = op = op_new2;
/* The low part of the operation is dead; generate the high. */
op->opc = opc = opc_new2;
args[0] = args[1];
args[1] = args[2];
args[2] = args[3];
} else {
goto do_not_remove;
}
assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop);
tcg_set_nop(s, s->gen_opc_buf + op_index + 1, args + 3, 1);
/* Mark the single-word operation live. */
nb_oargs = 1;
goto do_not_remove;
default:
/* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
args -= def->nb_args;
nb_iargs = def->nb_iargs;
nb_oargs = def->nb_oargs;
@ -1489,24 +1459,23 @@ static void tcg_liveness_analysis(TCGContext *s)
its outputs are dead. We assume that nb_oargs == 0
implies side effects */
if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
for(i = 0; i < nb_oargs; i++) {
for (i = 0; i < nb_oargs; i++) {
arg = args[i];
if (!dead_temps[arg] || mem_temps[arg]) {
goto do_not_remove;
}
}
do_remove:
tcg_set_nop(s, s->gen_opc_buf + op_index, args, def->nb_args);
op->opc = INDEX_op_nop;
#ifdef CONFIG_PROFILER
s->del_op_count++;
#endif
} else {
do_not_remove:
/* output args are dead */
dead_args = 0;
sync_args = 0;
for(i = 0; i < nb_oargs; i++) {
for (i = 0; i < nb_oargs; i++) {
arg = args[i];
if (dead_temps[arg]) {
dead_args |= (1 << i);
@ -1527,23 +1496,18 @@ static void tcg_liveness_analysis(TCGContext *s)
}
/* input args are live */
for(i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
arg = args[i];
if (dead_temps[arg]) {
dead_args |= (1 << i);
}
dead_temps[arg] = 0;
}
s->op_dead_args[op_index] = dead_args;
s->op_sync_args[op_index] = sync_args;
s->op_dead_args[oi] = dead_args;
s->op_sync_args[oi] = sync_args;
}
break;
}
op_index--;
}
if (args != s->gen_opparam_buf) {
tcg_abort();
}
}
#else
@ -2110,11 +2074,11 @@ static void tcg_reg_alloc_op(TCGContext *s,
#define STACK_DIR(x) (x)
#endif
static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def,
TCGOpcode opc, const TCGArg *args,
uint16_t dead_args, uint8_t sync_args)
static void tcg_reg_alloc_call(TCGContext *s, int nb_oargs, int nb_iargs,
const TCGArg * const args, uint16_t dead_args,
uint8_t sync_args)
{
int nb_iargs, nb_oargs, flags, nb_regs, i, reg, nb_params;
int flags, nb_regs, i, reg;
TCGArg arg;
TCGTemp *ts;
intptr_t stack_offset;
@ -2123,22 +2087,16 @@ static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def,
int allocate_args;
TCGRegSet allocated_regs;
arg = *args++;
nb_oargs = arg >> 16;
nb_iargs = arg & 0xffff;
nb_params = nb_iargs;
func_addr = (tcg_insn_unit *)(intptr_t)args[nb_oargs + nb_iargs];
flags = args[nb_oargs + nb_iargs + 1];
nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
if (nb_regs > nb_params) {
nb_regs = nb_params;
if (nb_regs > nb_iargs) {
nb_regs = nb_iargs;
}
/* assign stack slots first */
call_stack_size = (nb_params - nb_regs) * sizeof(tcg_target_long);
call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
~(TCG_TARGET_STACK_ALIGN - 1);
allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
@ -2149,7 +2107,7 @@ static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def,
}
stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
for(i = nb_regs; i < nb_params; i++) {
for(i = nb_regs; i < nb_iargs; i++) {
arg = args[nb_oargs + i];
#ifdef TCG_TARGET_STACK_GROWSUP
stack_offset -= sizeof(tcg_target_long);
@ -2256,8 +2214,6 @@ static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def,
}
}
}
return nb_iargs + nb_oargs + def->nb_cargs + 1;
}
#ifdef CONFIG_PROFILER
@ -2285,10 +2241,7 @@ static inline int tcg_gen_code_common(TCGContext *s,
tcg_insn_unit *gen_code_buf,
long search_pc)
{
TCGOpcode opc;
int op_index;
const TCGOpDef *def;
const TCGArg *args;
int oi, oi_next;
#ifdef DEBUG_DISAS
if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP))) {
@ -2303,8 +2256,7 @@ static inline int tcg_gen_code_common(TCGContext *s,
#endif
#ifdef USE_TCG_OPTIMIZATIONS
s->gen_opparam_ptr =
tcg_optimize(s, s->gen_opc_ptr, s->gen_opparam_buf, tcg_op_defs);
tcg_optimize(s);
#endif
#ifdef CONFIG_PROFILER
@ -2333,42 +2285,31 @@ static inline int tcg_gen_code_common(TCGContext *s,
tcg_out_tb_init(s);
args = s->gen_opparam_buf;
op_index = 0;
for (oi = s->gen_first_op_idx; oi >= 0; oi = oi_next) {
TCGOp * const op = &s->gen_op_buf[oi];
TCGArg * const args = &s->gen_opparam_buf[op->args];
TCGOpcode opc = op->opc;
const TCGOpDef *def = &tcg_op_defs[opc];
uint16_t dead_args = s->op_dead_args[oi];
uint8_t sync_args = s->op_sync_args[oi];
for(;;) {
opc = s->gen_opc_buf[op_index];
oi_next = op->next;
#ifdef CONFIG_PROFILER
tcg_table_op_count[opc]++;
#endif
def = &tcg_op_defs[opc];
#if 0
printf("%s: %d %d %d\n", def->name,
def->nb_oargs, def->nb_iargs, def->nb_cargs);
// dump_regs(s);
#endif
switch(opc) {
switch (opc) {
case INDEX_op_mov_i32:
case INDEX_op_mov_i64:
tcg_reg_alloc_mov(s, def, args, s->op_dead_args[op_index],
s->op_sync_args[op_index]);
tcg_reg_alloc_mov(s, def, args, dead_args, sync_args);
break;
case INDEX_op_movi_i32:
case INDEX_op_movi_i64:
tcg_reg_alloc_movi(s, args, s->op_dead_args[op_index],
s->op_sync_args[op_index]);
tcg_reg_alloc_movi(s, args, dead_args, sync_args);
break;
case INDEX_op_debug_insn_start:
/* debug instruction */
break;
case INDEX_op_nop:
case INDEX_op_nop1:
case INDEX_op_nop2:
case INDEX_op_nop3:
break;
case INDEX_op_nopn:
args += args[0];
goto next;
case INDEX_op_discard:
temp_dead(s, args[0]);
break;
@ -2377,12 +2318,9 @@ static inline int tcg_gen_code_common(TCGContext *s,
tcg_out_label(s, args[0], s->code_ptr);
break;
case INDEX_op_call:
args += tcg_reg_alloc_call(s, def, opc, args,
s->op_dead_args[op_index],
s->op_sync_args[op_index]);
goto next;
case INDEX_op_end:
goto the_end;
tcg_reg_alloc_call(s, op->callo, op->calli, args,
dead_args, sync_args);
break;
default:
/* Sanity check that we've not introduced any unhandled opcodes. */
if (def->flags & TCG_OPF_NOT_PRESENT) {
@ -2391,21 +2329,17 @@ static inline int tcg_gen_code_common(TCGContext *s,
/* Note: in order to speed up the code, it would be much
faster to have specialized register allocator functions for
some common argument patterns */
tcg_reg_alloc_op(s, def, opc, args, s->op_dead_args[op_index],
s->op_sync_args[op_index]);
tcg_reg_alloc_op(s, def, opc, args, dead_args, sync_args);
break;
}
args += def->nb_args;
next:
if (search_pc >= 0 && search_pc < tcg_current_code_size(s)) {
return op_index;
return oi;
}
op_index++;
#ifndef NDEBUG
check_regs(s);
#endif
}
the_end:
/* Generate TB finalization at the end of block */
tcg_out_tb_finalize(s);
return -1;
@ -2416,14 +2350,18 @@ int tcg_gen_code(TCGContext *s, tcg_insn_unit *gen_code_buf)
#ifdef CONFIG_PROFILER
{
int n;
n = (s->gen_opc_ptr - s->gen_opc_buf);
s->op_count += n;
if (n > s->op_count_max)
s->op_count_max = n;
s->temp_count += s->nb_temps;
if (s->nb_temps > s->temp_count_max)
s->temp_count_max = s->nb_temps;
n = s->gen_last_op_idx + 1;
s->op_count += n;
if (n > s->op_count_max) {
s->op_count_max = n;
}
n = s->nb_temps;
s->temp_count += n;
if (n > s->temp_count_max) {
s->temp_count_max = n;
}
}
#endif

View File

@ -448,10 +448,28 @@ typedef struct TCGTempSet {
unsigned long l[BITS_TO_LONGS(TCG_MAX_TEMPS)];
} TCGTempSet;
typedef struct TCGOp {
TCGOpcode opc : 8;
/* The number of out and in parameter for a call. */
unsigned callo : 2;
unsigned calli : 6;
/* Index of the arguments for this op, or -1 for zero-operand ops. */
signed args : 16;
/* Index of the prex/next op, or -1 for the end of the list. */
signed prev : 16;
signed next : 16;
} TCGOp;
QEMU_BUILD_BUG_ON(NB_OPS > 0xff);
QEMU_BUILD_BUG_ON(OPC_BUF_SIZE >= 0x7fff);
QEMU_BUILD_BUG_ON(OPPARAM_BUF_SIZE >= 0x7fff);
struct TCGContext {
uint8_t *pool_cur, *pool_end;
TCGPool *pool_first, *pool_current, *pool_first_large;
TCGLabel *labels;
int nb_labels;
int nb_globals;
int nb_temps;
@ -469,9 +487,6 @@ struct TCGContext {
corresponding output argument needs to be
sync to memory. */
/* tells in which temporary a given register is. It does not take
into account fixed registers */
int reg_to_temp[TCG_TARGET_NB_REGS];
TCGRegSet reserved_regs;
intptr_t current_frame_offset;
intptr_t frame_start;
@ -479,8 +494,6 @@ struct TCGContext {
int frame_reg;
tcg_insn_unit *code_ptr;
TCGTemp temps[TCG_MAX_TEMPS]; /* globals first, temps after */
TCGTempSet free_temps[TCG_TYPE_COUNT * 2];
GHashTable *helpers;
@ -508,14 +521,10 @@ struct TCGContext {
int goto_tb_issue_mask;
#endif
uint16_t gen_opc_buf[OPC_BUF_SIZE];
TCGArg gen_opparam_buf[OPPARAM_BUF_SIZE];
uint16_t *gen_opc_ptr;
TCGArg *gen_opparam_ptr;
target_ulong gen_opc_pc[OPC_BUF_SIZE];
uint16_t gen_opc_icount[OPC_BUF_SIZE];
uint8_t gen_opc_instr_start[OPC_BUF_SIZE];
int gen_first_op_idx;
int gen_last_op_idx;
int gen_next_op_idx;
int gen_next_parm_idx;
/* Code generation. Note that we specifically do not use tcg_insn_unit
here, because there's too much arithmetic throughout that relies
@ -533,6 +542,22 @@ struct TCGContext {
/* The TCGBackendData structure is private to tcg-target.c. */
struct TCGBackendData *be;
TCGTempSet free_temps[TCG_TYPE_COUNT * 2];
TCGTemp temps[TCG_MAX_TEMPS]; /* globals first, temps after */
/* tells in which temporary a given register is. It does not take
into account fixed registers */
int reg_to_temp[TCG_TARGET_NB_REGS];
TCGOp gen_op_buf[OPC_BUF_SIZE];
TCGArg gen_opparam_buf[OPPARAM_BUF_SIZE];
target_ulong gen_opc_pc[OPC_BUF_SIZE];
uint16_t gen_opc_icount[OPC_BUF_SIZE];
uint8_t gen_opc_instr_start[OPC_BUF_SIZE];
TCGLabel labels[TCG_MAX_LABELS];
};
extern TCGContext tcg_ctx;
@ -540,7 +565,7 @@ extern TCGContext tcg_ctx;
/* The number of opcodes emitted so far. */
static inline int tcg_op_buf_count(void)
{
return tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
return tcg_ctx.gen_next_op_idx;
}
/* Test for whether to terminate the TB for using too many opcodes. */
@ -718,8 +743,7 @@ void tcg_add_target_add_op_defs(const TCGTargetOpDef *tdefs);
void tcg_gen_callN(TCGContext *s, void *func,
TCGArg ret, int nargs, TCGArg *args);
TCGArg *tcg_optimize(TCGContext *s, uint16_t *tcg_opc_ptr, TCGArg *args,
TCGOpDef *tcg_op_def);
void tcg_optimize(TCGContext *s);
/* only used for debugging purposes */
void tcg_dump_ops(TCGContext *s);