diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c index d7e59e79c5..5357909fff 100644 --- a/tcg/i386/tcg-target.inc.c +++ b/tcg/i386/tcg-target.inc.c @@ -854,11 +854,11 @@ static void tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, switch (vece) { case MO_8: /* ??? With zero in a register, use PSHUFB. */ - tcg_out_vex_modrm(s, OPC_PUNPCKLBW, r, 0, a); + tcg_out_vex_modrm(s, OPC_PUNPCKLBW, r, a, a); a = r; /* FALLTHRU */ case MO_16: - tcg_out_vex_modrm(s, OPC_PUNPCKLWD, r, 0, a); + tcg_out_vex_modrm(s, OPC_PUNPCKLWD, r, a, a); a = r; /* FALLTHRU */ case MO_32: @@ -867,7 +867,7 @@ static void tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, tcg_out8(s, 0); break; case MO_64: - tcg_out_vex_modrm(s, OPC_PUNPCKLQDQ, r, 0, a); + tcg_out_vex_modrm(s, OPC_PUNPCKLQDQ, r, a, a); break; default: g_assert_not_reached(); diff --git a/tcg/tcg.c b/tcg/tcg.c index 551caf1c53..6eeebe0624 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -866,6 +866,7 @@ void tcg_func_start(TCGContext *s) /* No temps have been previously allocated for size or locality. */ memset(s->free_temps, 0, sizeof(s->free_temps)); + s->nb_ops = 0; s->nb_labels = 0; s->current_frame_offset = s->frame_start; @@ -1956,6 +1957,7 @@ void tcg_op_remove(TCGContext *s, TCGOp *op) { QTAILQ_REMOVE(&s->ops, op, link); QTAILQ_INSERT_TAIL(&s->free_ops, op, link); + s->nb_ops--; #ifdef CONFIG_PROFILER atomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1); @@ -1975,6 +1977,7 @@ static TCGOp *tcg_op_alloc(TCGOpcode opc) } memset(op, 0, offsetof(TCGOp, link)); op->opc = opc; + s->nb_ops++; return op; } diff --git a/tcg/tcg.h b/tcg/tcg.h index 75fbad128b..88378be310 100644 --- a/tcg/tcg.h +++ b/tcg/tcg.h @@ -655,6 +655,7 @@ struct TCGContext { int nb_globals; int nb_temps; int nb_indirects; + int nb_ops; /* goto_tb support */ tcg_insn_unit *code_buf; @@ -844,7 +845,12 @@ static inline TCGOp *tcg_last_op(void) /* Test for whether to terminate the TB for using too many opcodes. */ static inline bool tcg_op_buf_full(void) { - return false; + /* This is not a hard limit, it merely stops translation when + * we have produced "enough" opcodes. We want to limit TB size + * such that a RISC host can reasonably use a 16-bit signed + * branch within the TB. + */ + return tcg_ctx->nb_ops >= 8000; } /* pool based memory allocation */