tcg: Add floating point support

* Accelerate x87 emulation using new TCG FP ops
* Implement FP support on x86-64 target using SSE2
This commit is contained in:
Matt Borgerson 2021-09-29 17:20:41 -07:00 committed by mborgerson
parent 631c818c1e
commit 55ea6adf5d
11 changed files with 1570 additions and 125 deletions

View File

@ -346,6 +346,221 @@ static inline void tcg_gen_discard_i32(TCGv_i32 arg)
tcg_gen_op1_i32(INDEX_op_discard, arg);
}
static inline void tcg_gen_flcr(TCGv_i32 arg)
{
tcg_gen_op1_i32(INDEX_op_flcr, arg);
}
static inline void tcg_gen_st80f_f32(TCGv_f32 arg, TCGv_ptr dst)
{
tcg_gen_op2(INDEX_op_st80f_f32, tcgv_f32_arg(arg), tcgv_ptr_arg(dst));
}
static inline void tcg_gen_st80f_f64(TCGv_f64 arg, TCGv_ptr dst)
{
tcg_gen_op2(INDEX_op_st80f_f64, tcgv_f64_arg(arg), tcgv_ptr_arg(dst));
}
static inline void tcg_gen_ld80f_f32(TCGv_f32 ret, TCGv_ptr src)
{
tcg_gen_op2(INDEX_op_ld80f_f32, tcgv_f32_arg(ret), tcgv_ptr_arg(src));
}
static inline void tcg_gen_ld80f_f64(TCGv_f64 ret, TCGv_ptr src)
{
tcg_gen_op2(INDEX_op_ld80f_f64, tcgv_f64_arg(ret), tcgv_ptr_arg(src));
}
static inline void tcg_gen_abs_f32(TCGv_f32 ret, TCGv_f32 src)
{
tcg_gen_op2(INDEX_op_abs_f32, tcgv_f32_arg(ret), tcgv_f32_arg(src));
}
static inline void tcg_gen_abs_f64(TCGv_f64 ret, TCGv_f64 src)
{
tcg_gen_op2(INDEX_op_abs_f64, tcgv_f64_arg(ret), tcgv_f64_arg(src));
}
static inline void tcg_gen_add_f32(TCGv_f32 ret, TCGv_f32 arg1, TCGv_f32 arg2)
{
tcg_gen_op3(INDEX_op_add_f32,
tcgv_f32_arg(ret), tcgv_f32_arg(arg1), tcgv_f32_arg(arg2));
}
static inline void tcg_gen_add_f64(TCGv_f64 ret, TCGv_f64 arg1, TCGv_f64 arg2)
{
tcg_gen_op3(INDEX_op_add_f64,
tcgv_f64_arg(ret), tcgv_f64_arg(arg1), tcgv_f64_arg(arg2));
}
static inline void tcg_gen_chs_f32(TCGv_f32 ret, TCGv_f32 src)
{
tcg_gen_op2(INDEX_op_chs_f32, tcgv_f32_arg(ret), tcgv_f32_arg(src));
}
static inline void tcg_gen_chs_f64(TCGv_f64 ret, TCGv_f64 src)
{
tcg_gen_op2(INDEX_op_chs_f64, tcgv_f64_arg(ret), tcgv_f64_arg(src));
}
static inline void tcg_gen_com_f32(TCGv_i64 ret, TCGv_f32 arg1, TCGv_f32 arg2)
{
tcg_gen_op3(INDEX_op_com_f32,
tcgv_i64_arg(ret), tcgv_f32_arg(arg1), tcgv_f32_arg(arg2));
}
static inline void tcg_gen_com_f64(TCGv_i64 ret, TCGv_f64 arg1, TCGv_f64 arg2)
{
tcg_gen_op3(INDEX_op_com_f64,
tcgv_i64_arg(ret), tcgv_f64_arg(arg1), tcgv_f64_arg(arg2));
}
static inline void tcg_gen_cos_f32(TCGv_f32 ret, TCGv_f32 arg)
{
tcg_gen_op2(INDEX_op_cos_f32, tcgv_f32_arg(ret), tcgv_f32_arg(arg));
}
static inline void tcg_gen_cos_f64(TCGv_f64 ret, TCGv_f64 arg)
{
tcg_gen_op2(INDEX_op_cos_f64, tcgv_f64_arg(ret), tcgv_f64_arg(arg));
}
static inline void tcg_gen_cvt32f_f64(TCGv_f64 ret, TCGv_f32 arg)
{
tcg_gen_op2(INDEX_op_cvt32f_f64, tcgv_f64_arg(ret), tcgv_f32_arg(arg));
}
static inline void tcg_gen_cvt32f_i32(TCGv_i32 ret, TCGv_f32 arg)
{
tcg_gen_op2(INDEX_op_cvt32f_i32, tcgv_i32_arg(ret), tcgv_f32_arg(arg));
}
static inline void tcg_gen_cvt32f_i64(TCGv_i64 ret, TCGv_f32 arg)
{
tcg_gen_op2(INDEX_op_cvt32f_i64, tcgv_i64_arg(ret), tcgv_f32_arg(arg));
}
static inline void tcg_gen_cvt32i_f32(TCGv_f32 ret, TCGv_i32 arg)
{
tcg_gen_op2(INDEX_op_cvt32i_f32, tcgv_f32_arg(ret), tcgv_i32_arg(arg));
}
static inline void tcg_gen_cvt32i_f64(TCGv_f64 ret, TCGv_i32 arg)
{
tcg_gen_op2(INDEX_op_cvt32i_f64, tcgv_f64_arg(ret), tcgv_i32_arg(arg));
}
static inline void tcg_gen_cvt64f_f32(TCGv_f32 ret, TCGv_f64 arg)
{
tcg_gen_op2(INDEX_op_cvt64f_f32, tcgv_f32_arg(ret), tcgv_f64_arg(arg));
}
static inline void tcg_gen_cvt64f_i32(TCGv_i32 ret, TCGv_f64 src)
{
tcg_gen_op2(INDEX_op_cvt64f_i32, tcgv_i32_arg(ret), tcgv_f64_arg(src));
}
static inline void tcg_gen_cvt64f_i64(TCGv_i64 ret, TCGv_f64 src)
{
tcg_gen_op2(INDEX_op_cvt64f_i64, tcgv_i64_arg(ret), tcgv_f64_arg(src));
}
static inline void tcg_gen_cvt64i_f32(TCGv_f32 ret, TCGv_i64 arg)
{
tcg_gen_op2(INDEX_op_cvt64i_f32, tcgv_f32_arg(ret), tcgv_i64_arg(arg));
}
static inline void tcg_gen_cvt64i_f64(TCGv_f64 ret, TCGv_i64 arg)
{
tcg_gen_op2(INDEX_op_cvt64i_f64, tcgv_f64_arg(ret), tcgv_i64_arg(arg));
}
static inline void tcg_gen_div_f32(TCGv_f32 ret, TCGv_f32 arg1, TCGv_f32 arg2)
{
tcg_gen_op3(INDEX_op_div_f32,
tcgv_f32_arg(ret), tcgv_f32_arg(arg1), tcgv_f32_arg(arg2));
}
static inline void tcg_gen_div_f64(TCGv_f64 ret, TCGv_f64 arg1, TCGv_f64 arg2)
{
tcg_gen_op3(INDEX_op_div_f64,
tcgv_f64_arg(ret), tcgv_f64_arg(arg1), tcgv_f64_arg(arg2));
}
static inline void tcg_gen_mov32f_i32(TCGv_i32 ret, TCGv_f32 src)
{
tcg_gen_op2(INDEX_op_mov32f_i32, tcgv_i32_arg(ret), tcgv_f32_arg(src));
}
static inline void tcg_gen_mov32i_f32(TCGv_f32 ret, TCGv_i32 arg)
{
tcg_gen_op2(INDEX_op_mov32i_f32, tcgv_f32_arg(ret), tcgv_i32_arg(arg));
}
static inline void tcg_gen_mov64f_i64(TCGv_i64 ret, TCGv_f64 src)
{
tcg_gen_op2(INDEX_op_mov64f_i64, tcgv_i64_arg(ret), tcgv_f64_arg(src));
}
static inline void tcg_gen_mov64i_f64(TCGv_f64 ret, TCGv_i64 arg)
{
tcg_gen_op2(INDEX_op_mov64i_f64, tcgv_f64_arg(ret), tcgv_i64_arg(arg));
}
static inline void tcg_gen_mov_f32(TCGv_f32 ret, TCGv_f32 src)
{
tcg_gen_op2(INDEX_op_mov_f32, tcgv_f32_arg(ret), tcgv_f32_arg(src));
}
static inline void tcg_gen_mov_f64(TCGv_f64 ret, TCGv_f64 src)
{
tcg_gen_op2(INDEX_op_mov_f64, tcgv_f64_arg(ret), tcgv_f64_arg(src));
}
static inline void tcg_gen_mul_f32(TCGv_f32 ret, TCGv_f32 arg1, TCGv_f32 arg2)
{
tcg_gen_op3(INDEX_op_mul_f32,
tcgv_f32_arg(ret), tcgv_f32_arg(arg1), tcgv_f32_arg(arg2));
}
static inline void tcg_gen_mul_f64(TCGv_f64 ret, TCGv_f64 arg1, TCGv_f64 arg2)
{
tcg_gen_op3(INDEX_op_mul_f64,
tcgv_f64_arg(ret), tcgv_f64_arg(arg1), tcgv_f64_arg(arg2));
}
static inline void tcg_gen_sin_f32(TCGv_f32 ret, TCGv_f32 arg)
{
tcg_gen_op2(INDEX_op_sin_f32, tcgv_f32_arg(ret), tcgv_f32_arg(arg));
}
static inline void tcg_gen_sin_f64(TCGv_f64 ret, TCGv_f64 arg)
{
tcg_gen_op2(INDEX_op_sin_f64, tcgv_f64_arg(ret), tcgv_f64_arg(arg));
}
static inline void tcg_gen_sqrt_f32(TCGv_f32 ret, TCGv_f32 arg)
{
tcg_gen_op2(INDEX_op_sqrt_f32, tcgv_f32_arg(ret), tcgv_f32_arg(arg));
}
static inline void tcg_gen_sqrt_f64(TCGv_f64 ret, TCGv_f64 arg)
{
tcg_gen_op2(INDEX_op_sqrt_f64, tcgv_f64_arg(ret), tcgv_f64_arg(arg));
}
static inline void tcg_gen_sub_f32(TCGv_f32 ret, TCGv_f32 arg1, TCGv_f32 arg2)
{
tcg_gen_op3(INDEX_op_sub_f32,
tcgv_f32_arg(ret), tcgv_f32_arg(arg1), tcgv_f32_arg(arg2));
}
static inline void tcg_gen_sub_f64(TCGv_f64 ret, TCGv_f64 arg1, TCGv_f64 arg2)
{
tcg_gen_op3(INDEX_op_sub_f64,
tcgv_f64_arg(ret), tcgv_f64_arg(arg1), tcgv_f64_arg(arg2));
}
static inline void tcg_gen_mov_i32(TCGv_i32 ret, TCGv_i32 arg)
{
if (ret != arg) {

View File

@ -213,6 +213,49 @@ DEF(qemu_st8_i32, 0, TLADDR_ARGS + 1, 1,
TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS |
IMPL(TCG_TARGET_HAS_qemu_st8_i32))
/* Host floating point support. */
DEF(flcr, 0, 1, 0, IMPL(TCG_TARGET_HAS_fpu))
DEF(ld80f_f32, 1, 1, 0, IMPL(TCG_TARGET_HAS_fpu))
DEF(ld80f_f64, 1, 1, 0, IMPL(TCG_TARGET_HAS_fpu))
DEF(st80f_f32, 0, 2, 0, IMPL(TCG_TARGET_HAS_fpu))
DEF(st80f_f64, 0, 2, 0, IMPL(TCG_TARGET_HAS_fpu))
DEF(abs_f32, 1, 1, 0, IMPL(TCG_TARGET_HAS_fpu))
DEF(abs_f64, 1, 1, 0, IMPL(TCG_TARGET_HAS_fpu))
DEF(add_f32, 1, 2, 0, IMPL(TCG_TARGET_HAS_fpu))
DEF(add_f64, 1, 2, 0, IMPL(TCG_TARGET_HAS_fpu))
DEF(chs_f32, 1, 1, 0, IMPL(TCG_TARGET_HAS_fpu))
DEF(chs_f64, 1, 1, 0, IMPL(TCG_TARGET_HAS_fpu))
DEF(com_f32, 1, 2, 0, IMPL(TCG_TARGET_HAS_fpu))
DEF(com_f64, 1, 2, 0, IMPL(TCG_TARGET_HAS_fpu))
DEF(cos_f32, 1, 1, 0, IMPL(TCG_TARGET_HAS_fpu))
DEF(cos_f64, 1, 1, 0, IMPL(TCG_TARGET_HAS_fpu))
DEF(cvt32f_f64, 1, 1, 0, IMPL(TCG_TARGET_HAS_fpu))
DEF(cvt32f_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_fpu))
DEF(cvt32f_i64, 1, 1, 0, IMPL(TCG_TARGET_HAS_fpu))
DEF(cvt32i_f32, 1, 1, 0, IMPL(TCG_TARGET_HAS_fpu))
DEF(cvt32i_f64, 1, 1, 0, IMPL(TCG_TARGET_HAS_fpu))
DEF(cvt64f_f32, 1, 1, 0, IMPL(TCG_TARGET_HAS_fpu))
DEF(cvt64f_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_fpu))
DEF(cvt64f_i64, 1, 1, 0, IMPL(TCG_TARGET_HAS_fpu))
DEF(cvt64i_f32, 1, 1, 0, IMPL(TCG_TARGET_HAS_fpu))
DEF(cvt64i_f64, 1, 1, 0, IMPL(TCG_TARGET_HAS_fpu))
DEF(div_f32, 1, 2, 0, IMPL(TCG_TARGET_HAS_fpu))
DEF(div_f64, 1, 2, 0, IMPL(TCG_TARGET_HAS_fpu))
DEF(mov32f_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_fpu))
DEF(mov32i_f32, 1, 1, 0, IMPL(TCG_TARGET_HAS_fpu))
DEF(mov64f_i64, 1, 1, 0, IMPL(TCG_TARGET_HAS_fpu))
DEF(mov64i_f64, 1, 1, 0, IMPL(TCG_TARGET_HAS_fpu))
DEF(mov_f32, 1, 1, 0, IMPL(TCG_TARGET_HAS_fpu))
DEF(mov_f64, 1, 1, 0, IMPL(TCG_TARGET_HAS_fpu))
DEF(mul_f32, 1, 2, 0, IMPL(TCG_TARGET_HAS_fpu))
DEF(mul_f64, 1, 2, 0, IMPL(TCG_TARGET_HAS_fpu))
DEF(sin_f32, 1, 1, 0, IMPL(TCG_TARGET_HAS_fpu))
DEF(sin_f64, 1, 1, 0, IMPL(TCG_TARGET_HAS_fpu))
DEF(sqrt_f32, 1, 1, 0, IMPL(TCG_TARGET_HAS_fpu))
DEF(sqrt_f64, 1, 1, 0, IMPL(TCG_TARGET_HAS_fpu))
DEF(sub_f32, 1, 2, 0, IMPL(TCG_TARGET_HAS_fpu))
DEF(sub_f64, 1, 2, 0, IMPL(TCG_TARGET_HAS_fpu))
/* Host vector support. */
#define IMPLVEC TCG_OPF_VECTOR | IMPL(TCG_TARGET_MAYBE_vec)

View File

@ -207,6 +207,10 @@ typedef uint64_t TCGRegSet;
#define TCG_TARGET_HAS_v256 0
#endif
#ifndef TCG_TARGET_HAS_fpu
#define TCG_TARGET_HAS_fpu 0
#endif
#ifndef TARGET_INSN_START_EXTRA_WORDS
# define TARGET_INSN_START_WORDS 1
#else
@ -287,6 +291,9 @@ typedef enum TCGType {
TCG_TYPE_I32,
TCG_TYPE_I64,
TCG_TYPE_F32,
TCG_TYPE_F64,
TCG_TYPE_V64,
TCG_TYPE_V128,
TCG_TYPE_V256,
@ -355,6 +362,8 @@ typedef tcg_target_ulong TCGArg;
* TCGv_ptr : a host pointer type
* TCGv_vec : a host vector type; the exact size is not exposed
to the CPU front-end code.
* TCGv_f32 : 32 bit floating point type
* TCGv_f64 : 64 bit floating point type
* TCGv : an integer type the same size as target_ulong
(an alias for either TCGv_i32 or TCGv_i64)
The compiler's type checking will complain if you mix them
@ -378,6 +387,8 @@ typedef struct TCGv_i32_d *TCGv_i32;
typedef struct TCGv_i64_d *TCGv_i64;
typedef struct TCGv_ptr_d *TCGv_ptr;
typedef struct TCGv_vec_d *TCGv_vec;
typedef struct TCGv_f32_d *TCGv_f32;
typedef struct TCGv_f64_d *TCGv_f64;
typedef TCGv_ptr TCGv_env;
#if TARGET_LONG_BITS == 32
#define TCGv TCGv_i32
@ -625,6 +636,8 @@ struct TCGContext {
/* Exit to translator on overflow. */
sigjmp_buf jmp_trans;
void *disas_ctx;
};
static inline bool temp_readonly(TCGTemp *ts)
@ -697,6 +710,16 @@ static inline TCGTemp *tcgv_vec_temp(TCGv_vec v)
return tcgv_i32_temp((TCGv_i32)v);
}
static inline TCGTemp *tcgv_f32_temp(TCGv_f32 v)
{
return tcgv_i32_temp((TCGv_i32)v);
}
static inline TCGTemp *tcgv_f64_temp(TCGv_f64 v)
{
return tcgv_i32_temp((TCGv_i32)v);
}
static inline TCGArg tcgv_i32_arg(TCGv_i32 v)
{
return temp_arg(tcgv_i32_temp(v));
@ -717,6 +740,16 @@ static inline TCGArg tcgv_vec_arg(TCGv_vec v)
return temp_arg(tcgv_vec_temp(v));
}
static inline TCGArg tcgv_f32_arg(TCGv_f32 v)
{
return temp_arg(tcgv_f32_temp(v));
}
static inline TCGArg tcgv_f64_arg(TCGv_f64 v)
{
return temp_arg(tcgv_f64_temp(v));
}
static inline TCGv_i32 temp_tcgv_i32(TCGTemp *t)
{
(void)temp_idx(t); /* trigger embedded assert */
@ -738,6 +771,16 @@ static inline TCGv_vec temp_tcgv_vec(TCGTemp *t)
return (TCGv_vec)temp_tcgv_i32(t);
}
static inline TCGv_f32 temp_tcgv_f32(TCGTemp *t)
{
return (TCGv_f32)temp_tcgv_i32(t);
}
static inline TCGv_f64 temp_tcgv_f64(TCGTemp *t)
{
return (TCGv_f64)temp_tcgv_i32(t);
}
#if TCG_TARGET_REG_BITS == 32
static inline TCGv_i32 TCGV_LOW(TCGv_i64 t)
{
@ -876,6 +919,16 @@ static inline void tcg_temp_free_vec(TCGv_vec arg)
tcg_temp_free_internal(tcgv_vec_temp(arg));
}
static inline void tcg_temp_free_f32(TCGv_f32 arg)
{
tcg_temp_free_internal(tcgv_f32_temp(arg));
}
static inline void tcg_temp_free_f64(TCGv_f64 arg)
{
tcg_temp_free_internal(tcgv_f64_temp(arg));
}
static inline TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t offset,
const char *name)
{
@ -933,6 +986,44 @@ static inline TCGv_ptr tcg_temp_local_new_ptr(void)
return temp_tcgv_ptr(t);
}
static inline TCGv_f32 tcg_global_mem_new_f32(TCGv_ptr reg, intptr_t offset,
const char *name)
{
TCGTemp *t = tcg_global_mem_new_internal(TCG_TYPE_F32, reg, offset, name);
return temp_tcgv_f32(t);
}
static inline TCGv_f32 tcg_temp_new_f32(void)
{
TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_F32, false);
return temp_tcgv_f32(t);
}
static inline TCGv_f32 tcg_temp_local_new_f32(void)
{
TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_F32, true);
return temp_tcgv_f32(t);
}
static inline TCGv_f64 tcg_global_mem_new_f64(TCGv_ptr reg, intptr_t offset,
const char *name)
{
TCGTemp *t = tcg_global_mem_new_internal(TCG_TYPE_F64, reg, offset, name);
return temp_tcgv_f64(t);
}
static inline TCGv_f64 tcg_temp_new_f64(void)
{
TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_F64, false);
return temp_tcgv_f64(t);
}
static inline TCGv_f64 tcg_temp_local_new_f64(void)
{
TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_F64, true);
return temp_tcgv_f64(t);
}
#if defined(CONFIG_DEBUG_TCG)
/* If you call tcg_clear_temp_count() at the start of a section of
* code which is not supposed to leak any TCG temporaries, then
@ -1452,4 +1543,6 @@ static inline const TCGOpcode *tcg_swap_vecop_list(const TCGOpcode *n)
bool tcg_can_emit_vecop_list(const TCGOpcode *, TCGType, unsigned);
void gen_bb_epilogue(void); /* translate.c */
#endif /* TCG_H */

View File

@ -167,6 +167,7 @@ typedef enum X86Seg {
#define HF_IOBPT_SHIFT 24 /* an io breakpoint enabled */
#define HF_MPX_EN_SHIFT 25 /* MPX Enabled (CR4+XCR0+BNDCFGx) */
#define HF_MPX_IU_SHIFT 26 /* BND registers in-use */
#define HF_FPU_PC_SHIFT 27 /* FPU Precision Control */
#define HF_CPL_MASK (3 << HF_CPL_SHIFT)
#define HF_INHIBIT_IRQ_MASK (1 << HF_INHIBIT_IRQ_SHIFT)
@ -192,6 +193,7 @@ typedef enum X86Seg {
#define HF_IOBPT_MASK (1 << HF_IOBPT_SHIFT)
#define HF_MPX_EN_MASK (1 << HF_MPX_EN_SHIFT)
#define HF_MPX_IU_MASK (1 << HF_MPX_IU_SHIFT)
#define HF_FPU_PC_MASK (1 << HF_FPU_PC_SHIFT)
/* hflags2 */
@ -2145,6 +2147,13 @@ static inline void cpu_set_fpuc(CPUX86State *env, uint16_t fpuc)
if (tcg_enabled()) {
update_fp_status(env);
}
/*
* XXX: Currently emulating double extended precision with double precision
* when using hard floats.
*/
env->hflags &= ~HF_FPU_PC_MASK;
env->hflags |= ((env->fpuc >> 9) & 1) << HF_FPU_PC_SHIFT;
}
/* mem_helper.c */

317
target/i386/ops_fpu.h Normal file
View File

@ -0,0 +1,317 @@
/*
* x87 FPU support
*
* Copyright (c) 2021 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#define PRECf glue(PREC, f)
#define fPREC glue(f, PREC)
#define PREC_SUFFIX glue(_, fPREC)
#define PREC_TYPE glue(TCGv_, fPREC)
#define tcg_temp_new_fp glue(tcg_temp_new_, fPREC)
#define tcg_temp_free_fp glue(tcg_temp_free_, fPREC)
#define tcg_gen_st80f_fp glue(tcg_gen_st80f, PREC_SUFFIX)
#define tcg_gen_ld80f_fp glue(tcg_gen_ld80f, PREC_SUFFIX)
#define get_ft0 glue(get_ft0, PREC_SUFFIX)
#define get_stn glue(get_stn, PREC_SUFFIX)
#define get_st0 glue(get_st0, PREC_SUFFIX)
static PREC_TYPE get_ft0(DisasContext *s)
{
gen_flcr(s);
PREC_TYPE *v = (PREC_TYPE *)&s->ft0;
if (*v == NULL) {
*v = tcg_temp_new_fp();
TCGv_ptr p = gen_ft0_ptr();
tcg_gen_ld80f_fp(*v, p);
tcg_temp_free_ptr(p);
}
return *v;
}
static PREC_TYPE get_stn(DisasContext *s, int opreg)
{
assert(!(opreg & ~7));
gen_flcr(s);
PREC_TYPE *t = (PREC_TYPE *)&s->fpregs[(s->fpstt_delta + opreg) & 7];
if (*t == NULL) {
*t = tcg_temp_new_fp();
TCGv_ptr p = gen_stn_ptr(opreg);
tcg_gen_ld80f_fp(*t, p);
tcg_temp_free_ptr(p);
}
return *t;
}
static PREC_TYPE get_st0(DisasContext *s)
{
return get_stn(s, 0);
}
static void glue(flush_fp_regs, PREC_SUFFIX)(DisasContext *s)
{
for (int i = 0; i < 8; i++) {
PREC_TYPE *t = (PREC_TYPE *)&s->fpregs[(s->fpstt_delta + i) & 7];
if (*t) {
TCGv_ptr ptr = gen_stn_ptr(i);
tcg_gen_st80f_fp(*t, ptr);
tcg_temp_free_fp(*t);
tcg_temp_free_ptr(ptr);
*t = NULL;
}
}
if (s->ft0) {
TCGv_ptr ptr = gen_ft0_ptr();
tcg_gen_st80f_fp((PREC_TYPE)s->ft0, ptr);
tcg_temp_free_ptr(ptr);
s->ft0 = NULL;
}
}
static void glue(gen_fpop, PREC_SUFFIX)(DisasContext *s)
{
PREC_TYPE *t = (PREC_TYPE *)&s->fpregs[s->fpstt_delta & 7];
if (*t) {
tcg_temp_free_fp(*t);
*t = NULL;
}
}
static void glue(gen_fcom, PREC_SUFFIX)(DisasContext *s, PREC_TYPE arg1,
PREC_TYPE arg2)
{
TCGv_i64 res = tcg_temp_new_i64();
glue(tcg_gen_com, PREC_SUFFIX)(res, arg1, arg2);
/*
* Result is EFLAGS register format as follows
*
* C3 C2 C0
* arg1 > arg2 0 0 0
* arg1 < arg2 0 0 1
* arg1 = arg2 1 0 0
* unordered 1 1 1
*
* C3,C2,C0 = ZF,PF,CF = Bit 6,2,0
*
* fpus = {0x0100, 0x4000, 0x0000, 0x4500};
* < = > UO
*/
tcg_gen_andi_i64(res, res, 0x45);
tcg_gen_shli_i64(res, res, 8);
TCGv_i64 fpus = tcg_temp_new_i64();
tcg_gen_ld16u_i64(fpus, cpu_env, offsetof(CPUX86State, fpus));
tcg_gen_andi_i64(fpus, fpus, ~0x4500);
tcg_gen_or_i64(fpus, fpus, res);
tcg_gen_st16_i64(fpus, cpu_env, offsetof(CPUX86State, fpus));
tcg_temp_free_i64(fpus);
tcg_temp_free_i64(res);
/* FIXME: Exceptions */
}
/* FIXME: This decode logic should be shared with helper variant */
static void glue(gen_helper_fp_arith_ST0_FT0, PREC_SUFFIX)(DisasContext *s,
int op)
{
PREC_TYPE st0 = get_st0(s);
PREC_TYPE ft0 = get_ft0(s);
switch (op) {
case 0:
glue(tcg_gen_add, PREC_SUFFIX)(st0, st0, ft0);
break;
case 1:
glue(tcg_gen_mul, PREC_SUFFIX)(st0, st0, ft0);
break;
case 2:
case 3:
glue(gen_fcom, PREC_SUFFIX)(s, st0, ft0);
break;
case 4:
glue(tcg_gen_sub, PREC_SUFFIX)(st0, st0, ft0);
break;
case 5:
glue(tcg_gen_sub, PREC_SUFFIX)(st0, ft0, st0);
break;
case 6:
glue(tcg_gen_div, PREC_SUFFIX)(st0, st0, ft0);
break;
case 7:
glue(tcg_gen_div, PREC_SUFFIX)(st0, ft0, st0);
break;
default:
g_assert_not_reached();
}
}
static void glue(gen_helper_fp_arith_STN_ST0, PREC_SUFFIX)(DisasContext *s,
int op,
int opreg)
{
PREC_TYPE stn = get_stn(s, opreg);
PREC_TYPE st0 = get_st0(s);
switch (op) {
case 0:
glue(tcg_gen_add, PREC_SUFFIX)(stn, stn, st0);
break;
case 1:
glue(tcg_gen_mul, PREC_SUFFIX)(stn, stn, st0);
break;
case 4:
glue(tcg_gen_sub, PREC_SUFFIX)(stn, st0, stn);
break;
case 5:
glue(tcg_gen_sub, PREC_SUFFIX)(stn, stn, st0);
break;
case 6:
glue(tcg_gen_div, PREC_SUFFIX)(stn, st0, stn);
break;
case 7:
glue(tcg_gen_div, PREC_SUFFIX)(stn, stn, st0);
break;
default:
g_assert_not_reached();
}
}
static void glue(gen_fmov_FT0_STN, PREC_SUFFIX)(DisasContext *s, int st_index)
{
glue(tcg_gen_mov, PREC_SUFFIX)(get_ft0(s), get_stn(s, st_index));
}
static void glue(gen_fmov_ST0_STN, PREC_SUFFIX)(DisasContext *s, int st_index)
{
glue(tcg_gen_mov, PREC_SUFFIX)(get_st0(s), get_stn(s, st_index));
}
static void glue(gen_fmov_STN_ST0, PREC_SUFFIX)(DisasContext *s, int st_index)
{
glue(tcg_gen_mov, PREC_SUFFIX)(get_stn(s, st_index), get_st0(s));
}
static void glue(gen_flds_FT0, PREC_SUFFIX)(DisasContext *s, TCGv_i32 arg)
{
glue(gen_mov32i, PREC_SUFFIX)(get_ft0(s), arg);
}
static void glue(gen_flds_ST0, PREC_SUFFIX)(DisasContext *s, TCGv_i32 arg)
{
glue(gen_mov32i, PREC_SUFFIX)(get_st0(s), arg);
}
static void glue(gen_fldl_FT0, PREC_SUFFIX)(DisasContext *s, TCGv_i64 arg)
{
glue(gen_mov64i, PREC_SUFFIX)(get_ft0(s), arg);
}
static void glue(gen_fldl_ST0, PREC_SUFFIX)(DisasContext *s, TCGv_i64 arg)
{
glue(gen_mov64i, PREC_SUFFIX)(get_st0(s), arg);
}
static void glue(gen_fildl_FT0, PREC_SUFFIX)(DisasContext *s, TCGv_i32 arg)
{
glue(tcg_gen_cvt32i, PREC_SUFFIX)(get_ft0(s), arg);
}
static void glue(gen_fildl_ST0, PREC_SUFFIX)(DisasContext *s, TCGv_i32 arg)
{
glue(tcg_gen_cvt32i, PREC_SUFFIX)(get_st0(s), arg);
}
static void glue(gen_fildll_ST0, PREC_SUFFIX)(DisasContext *s, TCGv_i64 arg)
{
glue(tcg_gen_cvt64i, PREC_SUFFIX)(get_st0(s), arg);
}
static void glue(gen_fistl_ST0, PREC_SUFFIX)(DisasContext *s, TCGv_i32 arg)
{
glue(glue(tcg_gen_cvt, PRECf), _i32)(arg, get_st0(s));
}
static void glue(gen_fistll_ST0, PREC_SUFFIX)(DisasContext *s, TCGv_i64 arg)
{
glue(glue(tcg_gen_cvt, PRECf), _i64)(arg, get_st0(s));
}
static void glue(gen_fsts_ST0, PREC_SUFFIX)(DisasContext *s, TCGv_i32 arg)
{
glue(glue(gen_mov, PRECf), _i32)(arg, get_st0(s));
}
static void glue(gen_fstl_ST0, PREC_SUFFIX)(DisasContext *s, TCGv_i64 arg)
{
glue(glue(gen_mov, PRECf), _i64)(arg, get_st0(s));
}
static void glue(gen_fchs_ST0, PREC_SUFFIX)(DisasContext *s)
{
PREC_TYPE st0 = get_st0(s);
glue(tcg_gen_chs, PREC_SUFFIX)(st0, st0);
}
static void glue(gen_fabs_ST0, PREC_SUFFIX)(DisasContext *s)
{
PREC_TYPE st0 = get_st0(s);
glue(tcg_gen_abs, PREC_SUFFIX)(st0, st0);
}
static void glue(gen_fsqrt, PREC_SUFFIX)(DisasContext *s)
{
PREC_TYPE st0 = get_st0(s);
glue(tcg_gen_sqrt, PREC_SUFFIX)(st0, st0);
}
static void glue(gen_fsin, PREC_SUFFIX)(DisasContext *s)
{
PREC_TYPE st0 = get_st0(s);
glue(tcg_gen_sin, PREC_SUFFIX)(st0, st0);
}
static void glue(gen_fcos, PREC_SUFFIX)(DisasContext *s)
{
PREC_TYPE st0 = get_st0(s);
glue(tcg_gen_cos, PREC_SUFFIX)(st0, st0);
}
static void glue(gen_fld1_ST0, PREC_SUFFIX)(DisasContext *s)
{
glue(gen_movi, PREC_SUFFIX)(s, get_st0(s), 1.0);
}
static void glue(gen_fldz_ST0, PREC_SUFFIX)(DisasContext *s)
{
glue(gen_movi, PREC_SUFFIX)(s, get_st0(s), 0.0);
}
static void glue(gen_fldz_FT0, PREC_SUFFIX)(DisasContext *s)
{
glue(gen_movi, PREC_SUFFIX)(s, get_ft0(s), 0.0);
}

File diff suppressed because it is too large Load Diff

View File

@ -20,6 +20,7 @@ C_O0_I2(ri, r)
C_O0_I2(r, re)
C_O0_I2(s, L)
C_O0_I2(x, r)
C_O0_I2(L, x)
C_O0_I3(L, L, L)
C_O0_I3(s, L, L)
C_O0_I4(L, L, L, L)
@ -30,6 +31,8 @@ C_O1_I1(r, q)
C_O1_I1(r, r)
C_O1_I1(x, r)
C_O1_I1(x, x)
C_O1_I1(x, L)
C_O1_I1(r, x)
C_O1_I2(Q, 0, Q)
C_O1_I2(q, r, re)
C_O1_I2(r, 0, ci)
@ -43,6 +46,7 @@ C_O1_I2(r, r, re)
C_O1_I2(r, r, ri)
C_O1_I2(r, r, rI)
C_O1_I2(x, x, x)
C_O1_I2(r, x, x)
C_N1_I2(r, r, r)
C_N1_I2(r, r, rW)
C_O1_I3(x, x, x, x)

View File

@ -262,6 +262,8 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
#define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */
#define OPC_ANDN (0xf2 | P_EXT38)
#define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3))
#define OPC_ADDSD (0x58 | P_EXT | P_SIMDF2)
#define OPC_ADDSS (0x58 | P_EXT | P_SIMDF3)
#define OPC_AND_GvEv (OPC_ARITH_GvEv | (ARITH_AND << 3))
#define OPC_BLENDPS (0x0c | P_EXT3A | P_DATA16)
#define OPC_BSF (0xbc | P_EXT)
@ -270,7 +272,23 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
#define OPC_CALL_Jz (0xe8)
#define OPC_CMOVCC (0x40 | P_EXT) /* ... plus condition code */
#define OPC_CMP_GvEv (OPC_ARITH_GvEv | (ARITH_CMP << 3))
#define OPC_COMISD (0x2f | P_EXT | P_DATA16)
#define OPC_COMISS (0x2f | P_EXT)
#define OPC_CVTSI2SD (0x2a | P_EXT | P_SIMDF2)
#define OPC_CVTSI2SS (0x2a | P_EXT | P_SIMDF3)
#define OPC_CVTSS2SD (0x5a | P_EXT | P_SIMDF3)
#define OPC_CVTSD2SI (0x2d | P_EXT | P_SIMDF2)
#define OPC_CVTSS2SI (0x2d | P_EXT | P_SIMDF3)
#define OPC_CVTSD2SS (0x5a | P_EXT | P_SIMDF2)
#define OPC_DEC_r32 (0x48)
#define OPC_DIVSD (0x5e | P_EXT | P_SIMDF2)
#define OPC_DIVSS (0x5e | P_EXT | P_SIMDF3)
#define OPC_FLD_m32fp (0xd9)
#define OPC_FLD_m64fp (0xdd)
#define OPC_FLD_m80fp (0xdb)
#define OPC_FSTP_m32fp (0xd9)
#define OPC_FSTP_m64fp (0xdd)
#define OPC_FSTP_m80fp (0xdb)
#define OPC_IMUL_GvEv (0xaf | P_EXT)
#define OPC_IMUL_GvEvIb (0x6b)
#define OPC_IMUL_GvEvIz (0x69)
@ -279,6 +297,7 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
#define OPC_JCC_short (0x70) /* ... plus condition code */
#define OPC_JMP_long (0xe9)
#define OPC_JMP_short (0xeb)
#define OPC_LDMXCSR (0xae | P_EXT)
#define OPC_LEA (0x8d)
#define OPC_LZCNT (0xbd | P_EXT | P_SIMDF3)
#define OPC_MOVB_EvGv (0x88) /* stores, more or less */
@ -303,6 +322,8 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
#define OPC_MOVSLQ (0x63 | P_REXW)
#define OPC_MOVZBL (0xb6 | P_EXT)
#define OPC_MOVZWL (0xb7 | P_EXT)
#define OPC_MULSD (0x59 | P_EXT | P_SIMDF2)
#define OPC_MULSS (0x59 | P_EXT | P_SIMDF3)
#define OPC_PABSB (0x1c | P_EXT38 | P_DATA16)
#define OPC_PABSW (0x1d | P_EXT38 | P_DATA16)
#define OPC_PABSD (0x1e | P_EXT38 | P_DATA16)
@ -387,6 +408,7 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
#define OPC_PUSH_r32 (0x50)
#define OPC_PUSH_Iv (0x68)
#define OPC_PUSH_Ib (0x6a)
#define OPC_PUSHF (0x9c)
#define OPC_RET (0xc3)
#define OPC_SETCC (0x90 | P_EXT | P_REXB_RM) /* ... plus cc */
#define OPC_SHIFT_1 (0xd1)
@ -397,6 +419,10 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
#define OPC_SHLX (0xf7 | P_EXT38 | P_DATA16)
#define OPC_SHRX (0xf7 | P_EXT38 | P_SIMDF2)
#define OPC_SHRD_Ib (0xac | P_EXT)
#define OPC_SQRTSD (0x51 | P_EXT | P_SIMDF2)
#define OPC_SQRTSS (0x51 | P_EXT | P_SIMDF3)
#define OPC_SUBSD (0x5c | P_EXT | P_SIMDF2)
#define OPC_SUBSS (0x5c | P_EXT | P_SIMDF3)
#define OPC_TESTL (0x85)
#define OPC_TZCNT (0xbc | P_EXT | P_SIMDF3)
#define OPC_UD2 (0x0b | P_EXT)
@ -771,9 +797,11 @@ static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
}
switch (type) {
case TCG_TYPE_I64:
case TCG_TYPE_F64:
rexw = P_REXW;
/* fallthru */
case TCG_TYPE_I32:
case TCG_TYPE_F32:
if (ret < 16) {
if (arg < 16) {
tcg_out_modrm(s, OPC_MOVL_GvEv + rexw, ret, arg);
@ -1027,6 +1055,7 @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
{
switch (type) {
case TCG_TYPE_I32:
case TCG_TYPE_F32:
if (ret < 16) {
tcg_out_modrm_offset(s, OPC_MOVL_GvEv, ret, arg1, arg2);
} else {
@ -1034,6 +1063,7 @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
}
break;
case TCG_TYPE_I64:
case TCG_TYPE_F64:
if (ret < 16) {
tcg_out_modrm_offset(s, OPC_MOVL_GvEv | P_REXW, ret, arg1, arg2);
break;
@ -1072,6 +1102,7 @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
{
switch (type) {
case TCG_TYPE_I32:
case TCG_TYPE_F32:
if (arg < 16) {
tcg_out_modrm_offset(s, OPC_MOVL_EvGv, arg, arg1, arg2);
} else {
@ -1079,6 +1110,7 @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
}
break;
case TCG_TYPE_I64:
case TCG_TYPE_F64:
if (arg < 16) {
tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_REXW, arg, arg1, arg2);
break;
@ -2174,12 +2206,64 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
#endif
}
static inline void tcg_out_stash_xmm(TCGContext *s, TCGArg a0)
{
tcg_out_modrm_offset(s, OPC_MOVQ_WqVq, a0, TCG_REG_ESP, -0x10);
}
static inline void tcg_out_unstash_xmm(TCGContext *s, TCGArg a0)
{
tcg_out_modrm_offset(s, OPC_MOVQ_VqWq, a0, TCG_REG_ESP, -0x10);
}
static inline void tcg_out_fld(TCGContext *s, TCGArg a0)
{
tcg_out_modrm_offset(s, OPC_FLD_m80fp, 5, a0, 0);
}
static inline void tcg_out_fld_xmm(TCGContext *s, TCGArg a0, bool dp)
{
tcg_out_stash_xmm(s, a0);
tcg_out_modrm_offset(s, dp ? OPC_FLD_m64fp : OPC_FLD_m32fp, 0, TCG_REG_ESP,
-0x10);
}
static inline void tcg_out_fstp(TCGContext *s, TCGArg a0)
{
tcg_out_modrm_offset(s, OPC_FSTP_m80fp, 7, a0, 0);
}
static inline void tcg_out_fstp_xmm(TCGContext *s, TCGArg a0, bool dp)
{
tcg_out_modrm_offset(s, dp ? OPC_FSTP_m64fp : OPC_FSTP_m32fp, 3,
TCG_REG_ESP, -0x10);
tcg_out_unstash_xmm(s, a0);
}
static inline void tcg_out_fsin(TCGContext *s)
{
tcg_out8(s, 0xd9);
tcg_out8(s, 0xfe);
}
static inline void tcg_out_fcos(TCGContext *s)
{
tcg_out8(s, 0xd9);
tcg_out8(s, 0xff);
}
static const uint64_t fchs_mask32 = 0x80000000;
static const uint64_t fchs_mask64 = 0x8000000000000000L;
static const uint64_t fabs_mask32 = 0x7FFFFFFF;
static const uint64_t fabs_mask64 = 0x7FFFFFFFFFFFFFFFL;
static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
const TCGArg args[TCG_MAX_OP_ARGS],
const int const_args[TCG_MAX_OP_ARGS])
{
TCGArg a0, a1, a2;
int c, const_a2, vexop, rexw = 0;
bool dp = false;
#if TCG_TARGET_REG_BITS == 64
# define OP_32_64(x) \
@ -2191,6 +2275,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
case glue(glue(INDEX_op_, x), _i32)
#endif
#define OP_f32_f64(x) \
case glue(glue(INDEX_op_, x), _f64): \
dp = true; /* FALLTHRU */ \
case glue(glue(INDEX_op_, x), _f32)
/* Hoist the loads of the most common arguments. */
a0 = args[0];
a1 = args[1];
@ -2232,6 +2321,134 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
/* jmp to the given host address (could be epilogue) */
tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, a0);
break;
/* FIXME: Exceptions */
case INDEX_op_flcr:
tcg_out_st(s, TCG_TYPE_I32, a0, TCG_REG_ESP, -8);
tcg_out_modrm_offset(s, OPC_LDMXCSR, 2, TCG_REG_ESP, -8);
break;
OP_f32_f64(st80f):
tcg_out_fld_xmm(s, a0, dp);
tcg_out_fstp(s, a1);
break;
OP_f32_f64(ld80f):
tcg_out_fld(s, a1);
tcg_out_fstp_xmm(s, a0, dp);
break;
case INDEX_op_cvt32f_f64:
tcg_out_modrm(s, OPC_CVTSS2SD, a0, a1);
break;
OP_32_64(cvt32f):
tcg_out_modrm(s, OPC_CVTSS2SI + rexw, a0, a1);
break;
OP_f32_f64(cvt32i):
tcg_out_modrm(s, OPC_PXOR, a0, a0);
tcg_out_modrm(s, dp ? OPC_CVTSI2SD : OPC_CVTSI2SS, a0, a1);
break;
case INDEX_op_cvt64f_f32:
tcg_out_modrm(s, OPC_CVTSD2SS, a0, a1);
break;
OP_32_64(cvt64f):
tcg_out_modrm(s, OPC_CVTSD2SI + rexw, a0, a1);
break;
OP_f32_f64(cvt64i):
tcg_out_modrm(s, OPC_PXOR, a0, a0);
tcg_out_modrm(s, (dp ? OPC_CVTSI2SD : OPC_CVTSI2SS) | P_REXW, a0, a1);
break;
case INDEX_op_mov64f_i64:
case INDEX_op_mov64i_f64:
case INDEX_op_mov_f64:
dp = true; /* FALLTHRU */
case INDEX_op_mov32f_i32:
case INDEX_op_mov32i_f32:
case INDEX_op_mov_f32:
tcg_out_mov(s, dp ? TCG_TYPE_F64 : TCG_TYPE_F32, a0, a1);
break;
OP_f32_f64(add): {
int mopc = dp ? OPC_ADDSD : OPC_ADDSS;
if (a0 == a1) {
tcg_out_modrm(s, mopc, a1, a2);
} else if (a0 == a2) {
tcg_out_modrm(s, mopc, a2, a1);
} else {
tcg_out_stash_xmm(s, a1);
tcg_out_modrm(s, mopc, a1, a2);
tcg_out_mov(s, dp ? TCG_TYPE_F64 : TCG_TYPE_F32, a0, a1);
tcg_out_unstash_xmm(s, a1);
/* FIXME: AVX,reg,stack */
}
break;
}
OP_f32_f64(sub): {
int mopc = dp ? OPC_SUBSD : OPC_SUBSS;
if (a0 == a1) {
tcg_out_modrm(s, mopc, a1, a2);
} else {
tcg_out_stash_xmm(s, a1);
tcg_out_modrm(s, mopc, a1, a2);
tcg_out_mov(s, dp ? TCG_TYPE_F64 : TCG_TYPE_F32, a0, a1);
tcg_out_unstash_xmm(s, a1);
/* FIXME: AVX,reg,stack */
}
break;
}
OP_f32_f64(mul): {
int mopc = dp ? OPC_MULSD : OPC_MULSS;
if (a0 == a1) {
tcg_out_modrm(s, mopc, a1, a2);
} else if (a0 == a2) {
tcg_out_modrm(s, mopc, a2, a1);
} else {
/* FIXME: Handle 3 unique operand variant (AVX,reg,stack) */
assert(0);
}
break;
}
OP_f32_f64(div): {
int mopc = dp ? OPC_DIVSD : OPC_DIVSS;
if (a0 == a1) {
tcg_out_modrm(s, mopc, a1, a2);
} else {
tcg_out_stash_xmm(s, a1);
tcg_out_modrm(s, mopc, a1, a2);
tcg_out_mov(s, dp ? TCG_TYPE_F64 : TCG_TYPE_F32, a0, a1);
tcg_out_unstash_xmm(s, a1);
/* FIXME: AVX,reg,stack */
}
break;
}
OP_f32_f64(abs):
assert(a0 == a1); /* FIXME: add mov */
tcg_out_modrm_pool(s, OPC_PAND, a0);
new_pool_l2(s, R_386_PC32, s->code_ptr - 4, -4,
dp ? fabs_mask64 : fabs_mask32, 0);
break;
OP_f32_f64(chs):
assert(a0 == a1); /* FIXME: add mov */
tcg_out_modrm_pool(s, OPC_PXOR, a0);
new_pool_l2(s, R_386_PC32, s->code_ptr - 4, -4,
dp ? fchs_mask64 : fchs_mask32, 0);
break;
OP_f32_f64(com):
tcg_out_modrm(s, dp ? OPC_COMISD : OPC_COMISS, a1, a2);
tcg_out8(s, OPC_PUSHF);
tcg_out_pop(s, a0);
break;
OP_f32_f64(sqrt):
tcg_out_modrm(s, dp ? OPC_SQRTSD : OPC_SQRTSS, a0, a1);
break;
OP_f32_f64(sin):
tcg_out_fld_xmm(s, a1, dp);
tcg_out_fsin(s);
tcg_out_fstp_xmm(s, a0, dp);
break;
OP_f32_f64(cos):
tcg_out_fld_xmm(s, a1, dp);
tcg_out_fcos(s);
tcg_out_fstp_xmm(s, a0, dp);
break;
case INDEX_op_br:
tcg_out_jxx(s, JCC_JMP, arg_label(a0), 0);
break;
@ -2918,6 +3135,55 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_goto_ptr:
return C_O0_I1(r);
case INDEX_op_ld80f_f32:
case INDEX_op_ld80f_f64:
case INDEX_op_st80f_f32:
case INDEX_op_st80f_f64:
return C_O1_I1(x, L);
case INDEX_op_flcr:
return C_O0_I1(r);
case INDEX_op_mul_f32:
case INDEX_op_mul_f64:
case INDEX_op_div_f32:
case INDEX_op_div_f64:
case INDEX_op_add_f32:
case INDEX_op_add_f64:
case INDEX_op_sub_f32:
case INDEX_op_sub_f64:
return C_O1_I2(x, x, x);
case INDEX_op_cvt32i_f32:
case INDEX_op_cvt32i_f64:
case INDEX_op_cvt64i_f32:
case INDEX_op_cvt64i_f64:
case INDEX_op_mov32i_f32:
case INDEX_op_mov64i_f64:
return C_O1_I1(x, r);
case INDEX_op_mov_f32:
case INDEX_op_mov_f64:
case INDEX_op_abs_f32:
case INDEX_op_abs_f64:
case INDEX_op_chs_f32:
case INDEX_op_chs_f64:
case INDEX_op_sqrt_f32:
case INDEX_op_sqrt_f64:
case INDEX_op_cos_f32:
case INDEX_op_cos_f64:
case INDEX_op_sin_f32:
case INDEX_op_sin_f64:
case INDEX_op_cvt32f_f64:
case INDEX_op_cvt64f_f32:
return C_O1_I1(x, x);
case INDEX_op_com_f32:
case INDEX_op_com_f64:
return C_O1_I2(r, x, x);
case INDEX_op_cvt32f_i32:
case INDEX_op_cvt32f_i64:
case INDEX_op_cvt64f_i32:
case INDEX_op_cvt64f_i64:
case INDEX_op_mov32f_i32:
case INDEX_op_mov64f_i64:
return C_O1_I1(r, x);
case INDEX_op_ld8u_i32:
case INDEX_op_ld8u_i64:
case INDEX_op_ld8s_i32:
@ -3796,6 +4062,8 @@ static void tcg_target_init(TCGContext *s)
if (TCG_TARGET_REG_BITS == 64) {
tcg_target_available_regs[TCG_TYPE_I64] = ALL_GENERAL_REGS;
}
tcg_target_available_regs[TCG_TYPE_F32] = ALL_VECTOR_REGS;
tcg_target_available_regs[TCG_TYPE_F64] = ALL_VECTOR_REGS;
if (have_avx1) {
tcg_target_available_regs[TCG_TYPE_V64] = ALL_VECTOR_REGS;
tcg_target_available_regs[TCG_TYPE_V128] = ALL_VECTOR_REGS;

View File

@ -178,6 +178,8 @@ extern bool have_movbe;
#define TCG_TARGET_HAS_qemu_st8_i32 1
#endif
#define TCG_TARGET_HAS_fpu (TCG_TARGET_REG_BITS == 64)
/* We do not support older SSE systems, only beginning with AVX1. */
#define TCG_TARGET_HAS_v64 have_avx1
#define TCG_TARGET_HAS_v128 have_avx1

View File

@ -223,6 +223,8 @@ void tcg_gen_sari_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
void tcg_gen_brcond_i32(TCGCond cond, TCGv_i32 arg1, TCGv_i32 arg2, TCGLabel *l)
{
gen_bb_epilogue();
if (cond == TCG_COND_ALWAYS) {
tcg_gen_br(l);
} else if (cond != TCG_COND_NEVER) {
@ -233,6 +235,8 @@ void tcg_gen_brcond_i32(TCGCond cond, TCGv_i32 arg1, TCGv_i32 arg2, TCGLabel *l)
void tcg_gen_brcondi_i32(TCGCond cond, TCGv_i32 arg1, int32_t arg2, TCGLabel *l)
{
gen_bb_epilogue();
if (cond == TCG_COND_ALWAYS) {
tcg_gen_br(l);
} else if (cond != TCG_COND_NEVER) {
@ -1446,6 +1450,8 @@ void tcg_gen_sari_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
void tcg_gen_brcond_i64(TCGCond cond, TCGv_i64 arg1, TCGv_i64 arg2, TCGLabel *l)
{
gen_bb_epilogue();
if (cond == TCG_COND_ALWAYS) {
tcg_gen_br(l);
} else if (cond != TCG_COND_NEVER) {
@ -1463,6 +1469,8 @@ void tcg_gen_brcond_i64(TCGCond cond, TCGv_i64 arg1, TCGv_i64 arg2, TCGLabel *l)
void tcg_gen_brcondi_i64(TCGCond cond, TCGv_i64 arg1, int64_t arg2, TCGLabel *l)
{
gen_bb_epilogue();
if (TCG_TARGET_REG_BITS == 64) {
tcg_gen_brcond_i64(cond, arg1, tcg_constant_i64(arg2), l);
} else if (cond == TCG_COND_ALWAYS) {
@ -2713,6 +2721,9 @@ void tcg_gen_exit_tb(const TranslationBlock *tb, unsigned idx)
* This requires coordination with targets that do not use
* the translator_loop.
*/
gen_bb_epilogue();
uintptr_t val = (uintptr_t)tcg_splitwx_to_rx((void *)tb) + idx;
if (tb == NULL) {
@ -2734,6 +2745,8 @@ void tcg_gen_exit_tb(const TranslationBlock *tb, unsigned idx)
void tcg_gen_goto_tb(unsigned idx)
{
gen_bb_epilogue();
/* We tested CF_NO_GOTO_TB in translator_use_goto_tb. */
tcg_debug_assert(!(tcg_ctx->tb_cflags & CF_NO_GOTO_TB));
/* We only support two chained exits. */
@ -2756,6 +2769,8 @@ void tcg_gen_lookup_and_goto_ptr(void)
return;
}
gen_bb_epilogue();
plugin_gen_disable_mem_helpers();
ptr = tcg_temp_new_ptr();
gen_helper_lookup_tb_ptr(ptr, cpu_env);

View File

@ -1456,6 +1456,49 @@ bool tcg_op_supported(TCGOpcode op)
case INDEX_op_cmpsel_vec:
return have_vec && TCG_TARGET_HAS_cmpsel_vec;
case INDEX_op_flcr:
case INDEX_op_ld80f_f32:
case INDEX_op_ld80f_f64:
case INDEX_op_st80f_f32:
case INDEX_op_st80f_f64:
case INDEX_op_abs_f32:
case INDEX_op_abs_f64:
case INDEX_op_add_f32:
case INDEX_op_add_f64:
case INDEX_op_chs_f32:
case INDEX_op_chs_f64:
case INDEX_op_com_f32:
case INDEX_op_com_f64:
case INDEX_op_cos_f32:
case INDEX_op_cos_f64:
case INDEX_op_cvt32f_f64:
case INDEX_op_cvt32f_i32:
case INDEX_op_cvt32f_i64:
case INDEX_op_cvt32i_f32:
case INDEX_op_cvt32i_f64:
case INDEX_op_cvt64f_f32:
case INDEX_op_cvt64f_i32:
case INDEX_op_cvt64f_i64:
case INDEX_op_cvt64i_f32:
case INDEX_op_cvt64i_f64:
case INDEX_op_div_f32:
case INDEX_op_div_f64:
case INDEX_op_mov32f_i32:
case INDEX_op_mov32i_f32:
case INDEX_op_mov64f_i64:
case INDEX_op_mov64i_f64:
case INDEX_op_mov_f32:
case INDEX_op_mov_f64:
case INDEX_op_mul_f32:
case INDEX_op_mul_f64:
case INDEX_op_sin_f32:
case INDEX_op_sin_f64:
case INDEX_op_sqrt_f32:
case INDEX_op_sqrt_f64:
case INDEX_op_sub_f32:
case INDEX_op_sub_f64:
return TCG_TARGET_HAS_fpu;
default:
tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
return true;
@ -1472,6 +1515,8 @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
const TCGHelperInfo *info;
TCGOp *op;
gen_bb_epilogue();
info = g_hash_table_lookup(helper_table, (gpointer)func);
typemask = info->typemask;
@ -1721,6 +1766,12 @@ static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
break;
#endif
case TCG_TYPE_F32:
snprintf(buf, buf_size, "$%f", *(float *)&ts->val);
break;
case TCG_TYPE_F64:
snprintf(buf, buf_size, "$%g", *(double *)&ts->val);
break;
case TCG_TYPE_V64:
case TCG_TYPE_V128:
case TCG_TYPE_V256:
@ -3058,9 +3109,11 @@ static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
switch (ts->type) {
case TCG_TYPE_I32:
case TCG_TYPE_F32:
size = align = 4;
break;
case TCG_TYPE_I64:
case TCG_TYPE_F64:
case TCG_TYPE_V64:
size = align = 8;
break;
@ -3268,6 +3321,8 @@ static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
preferred_regs, ts->indirect_base);
if (ts->type <= TCG_TYPE_I64) {
tcg_out_movi(s, ts->type, reg, ts->val);
} else if (ts->type == TCG_TYPE_F32 || ts->type == TCG_TYPE_F64) {
assert(0); /* FIXME */
} else {
uint64_t val = ts->val;
MemOp vece = MO_64;