diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h index 2a654f350c..3cc670ae29 100644 --- a/include/tcg/tcg-op.h +++ b/include/tcg/tcg-op.h @@ -346,6 +346,221 @@ static inline void tcg_gen_discard_i32(TCGv_i32 arg) tcg_gen_op1_i32(INDEX_op_discard, arg); } +static inline void tcg_gen_flcr(TCGv_i32 arg) +{ + tcg_gen_op1_i32(INDEX_op_flcr, arg); +} + +static inline void tcg_gen_st80f_f32(TCGv_f32 arg, TCGv_ptr dst) +{ + tcg_gen_op2(INDEX_op_st80f_f32, tcgv_f32_arg(arg), tcgv_ptr_arg(dst)); +} + +static inline void tcg_gen_st80f_f64(TCGv_f64 arg, TCGv_ptr dst) +{ + tcg_gen_op2(INDEX_op_st80f_f64, tcgv_f64_arg(arg), tcgv_ptr_arg(dst)); +} + +static inline void tcg_gen_ld80f_f32(TCGv_f32 ret, TCGv_ptr src) +{ + tcg_gen_op2(INDEX_op_ld80f_f32, tcgv_f32_arg(ret), tcgv_ptr_arg(src)); +} + +static inline void tcg_gen_ld80f_f64(TCGv_f64 ret, TCGv_ptr src) +{ + tcg_gen_op2(INDEX_op_ld80f_f64, tcgv_f64_arg(ret), tcgv_ptr_arg(src)); +} + +static inline void tcg_gen_abs_f32(TCGv_f32 ret, TCGv_f32 src) +{ + tcg_gen_op2(INDEX_op_abs_f32, tcgv_f32_arg(ret), tcgv_f32_arg(src)); +} + +static inline void tcg_gen_abs_f64(TCGv_f64 ret, TCGv_f64 src) +{ + tcg_gen_op2(INDEX_op_abs_f64, tcgv_f64_arg(ret), tcgv_f64_arg(src)); +} + +static inline void tcg_gen_add_f32(TCGv_f32 ret, TCGv_f32 arg1, TCGv_f32 arg2) +{ + tcg_gen_op3(INDEX_op_add_f32, + tcgv_f32_arg(ret), tcgv_f32_arg(arg1), tcgv_f32_arg(arg2)); +} + +static inline void tcg_gen_add_f64(TCGv_f64 ret, TCGv_f64 arg1, TCGv_f64 arg2) +{ + tcg_gen_op3(INDEX_op_add_f64, + tcgv_f64_arg(ret), tcgv_f64_arg(arg1), tcgv_f64_arg(arg2)); +} + +static inline void tcg_gen_chs_f32(TCGv_f32 ret, TCGv_f32 src) +{ + tcg_gen_op2(INDEX_op_chs_f32, tcgv_f32_arg(ret), tcgv_f32_arg(src)); +} + +static inline void tcg_gen_chs_f64(TCGv_f64 ret, TCGv_f64 src) +{ + tcg_gen_op2(INDEX_op_chs_f64, tcgv_f64_arg(ret), tcgv_f64_arg(src)); +} + +static inline void tcg_gen_com_f32(TCGv_i64 ret, TCGv_f32 arg1, TCGv_f32 arg2) +{ + tcg_gen_op3(INDEX_op_com_f32, + tcgv_i64_arg(ret), tcgv_f32_arg(arg1), tcgv_f32_arg(arg2)); +} + +static inline void tcg_gen_com_f64(TCGv_i64 ret, TCGv_f64 arg1, TCGv_f64 arg2) +{ + tcg_gen_op3(INDEX_op_com_f64, + tcgv_i64_arg(ret), tcgv_f64_arg(arg1), tcgv_f64_arg(arg2)); +} + +static inline void tcg_gen_cos_f32(TCGv_f32 ret, TCGv_f32 arg) +{ + tcg_gen_op2(INDEX_op_cos_f32, tcgv_f32_arg(ret), tcgv_f32_arg(arg)); +} + +static inline void tcg_gen_cos_f64(TCGv_f64 ret, TCGv_f64 arg) +{ + tcg_gen_op2(INDEX_op_cos_f64, tcgv_f64_arg(ret), tcgv_f64_arg(arg)); +} + +static inline void tcg_gen_cvt32f_f64(TCGv_f64 ret, TCGv_f32 arg) +{ + tcg_gen_op2(INDEX_op_cvt32f_f64, tcgv_f64_arg(ret), tcgv_f32_arg(arg)); +} + +static inline void tcg_gen_cvt32f_i32(TCGv_i32 ret, TCGv_f32 arg) +{ + tcg_gen_op2(INDEX_op_cvt32f_i32, tcgv_i32_arg(ret), tcgv_f32_arg(arg)); +} + +static inline void tcg_gen_cvt32f_i64(TCGv_i64 ret, TCGv_f32 arg) +{ + tcg_gen_op2(INDEX_op_cvt32f_i64, tcgv_i64_arg(ret), tcgv_f32_arg(arg)); +} + +static inline void tcg_gen_cvt32i_f32(TCGv_f32 ret, TCGv_i32 arg) +{ + tcg_gen_op2(INDEX_op_cvt32i_f32, tcgv_f32_arg(ret), tcgv_i32_arg(arg)); +} + +static inline void tcg_gen_cvt32i_f64(TCGv_f64 ret, TCGv_i32 arg) +{ + tcg_gen_op2(INDEX_op_cvt32i_f64, tcgv_f64_arg(ret), tcgv_i32_arg(arg)); +} + +static inline void tcg_gen_cvt64f_f32(TCGv_f32 ret, TCGv_f64 arg) +{ + tcg_gen_op2(INDEX_op_cvt64f_f32, tcgv_f32_arg(ret), tcgv_f64_arg(arg)); +} + +static inline void tcg_gen_cvt64f_i32(TCGv_i32 ret, TCGv_f64 src) +{ + tcg_gen_op2(INDEX_op_cvt64f_i32, tcgv_i32_arg(ret), tcgv_f64_arg(src)); +} + +static inline void tcg_gen_cvt64f_i64(TCGv_i64 ret, TCGv_f64 src) +{ + tcg_gen_op2(INDEX_op_cvt64f_i64, tcgv_i64_arg(ret), tcgv_f64_arg(src)); +} + +static inline void tcg_gen_cvt64i_f32(TCGv_f32 ret, TCGv_i64 arg) +{ + tcg_gen_op2(INDEX_op_cvt64i_f32, tcgv_f32_arg(ret), tcgv_i64_arg(arg)); +} + +static inline void tcg_gen_cvt64i_f64(TCGv_f64 ret, TCGv_i64 arg) +{ + tcg_gen_op2(INDEX_op_cvt64i_f64, tcgv_f64_arg(ret), tcgv_i64_arg(arg)); +} + +static inline void tcg_gen_div_f32(TCGv_f32 ret, TCGv_f32 arg1, TCGv_f32 arg2) +{ + tcg_gen_op3(INDEX_op_div_f32, + tcgv_f32_arg(ret), tcgv_f32_arg(arg1), tcgv_f32_arg(arg2)); +} + +static inline void tcg_gen_div_f64(TCGv_f64 ret, TCGv_f64 arg1, TCGv_f64 arg2) +{ + tcg_gen_op3(INDEX_op_div_f64, + tcgv_f64_arg(ret), tcgv_f64_arg(arg1), tcgv_f64_arg(arg2)); +} + +static inline void tcg_gen_mov32f_i32(TCGv_i32 ret, TCGv_f32 src) +{ + tcg_gen_op2(INDEX_op_mov32f_i32, tcgv_i32_arg(ret), tcgv_f32_arg(src)); +} + +static inline void tcg_gen_mov32i_f32(TCGv_f32 ret, TCGv_i32 arg) +{ + tcg_gen_op2(INDEX_op_mov32i_f32, tcgv_f32_arg(ret), tcgv_i32_arg(arg)); +} + +static inline void tcg_gen_mov64f_i64(TCGv_i64 ret, TCGv_f64 src) +{ + tcg_gen_op2(INDEX_op_mov64f_i64, tcgv_i64_arg(ret), tcgv_f64_arg(src)); +} + +static inline void tcg_gen_mov64i_f64(TCGv_f64 ret, TCGv_i64 arg) +{ + tcg_gen_op2(INDEX_op_mov64i_f64, tcgv_f64_arg(ret), tcgv_i64_arg(arg)); +} + +static inline void tcg_gen_mov_f32(TCGv_f32 ret, TCGv_f32 src) +{ + tcg_gen_op2(INDEX_op_mov_f32, tcgv_f32_arg(ret), tcgv_f32_arg(src)); +} + +static inline void tcg_gen_mov_f64(TCGv_f64 ret, TCGv_f64 src) +{ + tcg_gen_op2(INDEX_op_mov_f64, tcgv_f64_arg(ret), tcgv_f64_arg(src)); +} + +static inline void tcg_gen_mul_f32(TCGv_f32 ret, TCGv_f32 arg1, TCGv_f32 arg2) +{ + tcg_gen_op3(INDEX_op_mul_f32, + tcgv_f32_arg(ret), tcgv_f32_arg(arg1), tcgv_f32_arg(arg2)); +} + +static inline void tcg_gen_mul_f64(TCGv_f64 ret, TCGv_f64 arg1, TCGv_f64 arg2) +{ + tcg_gen_op3(INDEX_op_mul_f64, + tcgv_f64_arg(ret), tcgv_f64_arg(arg1), tcgv_f64_arg(arg2)); +} + +static inline void tcg_gen_sin_f32(TCGv_f32 ret, TCGv_f32 arg) +{ + tcg_gen_op2(INDEX_op_sin_f32, tcgv_f32_arg(ret), tcgv_f32_arg(arg)); +} + +static inline void tcg_gen_sin_f64(TCGv_f64 ret, TCGv_f64 arg) +{ + tcg_gen_op2(INDEX_op_sin_f64, tcgv_f64_arg(ret), tcgv_f64_arg(arg)); +} + +static inline void tcg_gen_sqrt_f32(TCGv_f32 ret, TCGv_f32 arg) +{ + tcg_gen_op2(INDEX_op_sqrt_f32, tcgv_f32_arg(ret), tcgv_f32_arg(arg)); +} + +static inline void tcg_gen_sqrt_f64(TCGv_f64 ret, TCGv_f64 arg) +{ + tcg_gen_op2(INDEX_op_sqrt_f64, tcgv_f64_arg(ret), tcgv_f64_arg(arg)); +} + +static inline void tcg_gen_sub_f32(TCGv_f32 ret, TCGv_f32 arg1, TCGv_f32 arg2) +{ + tcg_gen_op3(INDEX_op_sub_f32, + tcgv_f32_arg(ret), tcgv_f32_arg(arg1), tcgv_f32_arg(arg2)); +} + +static inline void tcg_gen_sub_f64(TCGv_f64 ret, TCGv_f64 arg1, TCGv_f64 arg2) +{ + tcg_gen_op3(INDEX_op_sub_f64, + tcgv_f64_arg(ret), tcgv_f64_arg(arg1), tcgv_f64_arg(arg2)); +} + static inline void tcg_gen_mov_i32(TCGv_i32 ret, TCGv_i32 arg) { if (ret != arg) { diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h index 675873e200..73f4fca225 100644 --- a/include/tcg/tcg-opc.h +++ b/include/tcg/tcg-opc.h @@ -213,6 +213,49 @@ DEF(qemu_st8_i32, 0, TLADDR_ARGS + 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | IMPL(TCG_TARGET_HAS_qemu_st8_i32)) +/* Host floating point support. */ +DEF(flcr, 0, 1, 0, IMPL(TCG_TARGET_HAS_fpu)) +DEF(ld80f_f32, 1, 1, 0, IMPL(TCG_TARGET_HAS_fpu)) +DEF(ld80f_f64, 1, 1, 0, IMPL(TCG_TARGET_HAS_fpu)) +DEF(st80f_f32, 0, 2, 0, IMPL(TCG_TARGET_HAS_fpu)) +DEF(st80f_f64, 0, 2, 0, IMPL(TCG_TARGET_HAS_fpu)) +DEF(abs_f32, 1, 1, 0, IMPL(TCG_TARGET_HAS_fpu)) +DEF(abs_f64, 1, 1, 0, IMPL(TCG_TARGET_HAS_fpu)) +DEF(add_f32, 1, 2, 0, IMPL(TCG_TARGET_HAS_fpu)) +DEF(add_f64, 1, 2, 0, IMPL(TCG_TARGET_HAS_fpu)) +DEF(chs_f32, 1, 1, 0, IMPL(TCG_TARGET_HAS_fpu)) +DEF(chs_f64, 1, 1, 0, IMPL(TCG_TARGET_HAS_fpu)) +DEF(com_f32, 1, 2, 0, IMPL(TCG_TARGET_HAS_fpu)) +DEF(com_f64, 1, 2, 0, IMPL(TCG_TARGET_HAS_fpu)) +DEF(cos_f32, 1, 1, 0, IMPL(TCG_TARGET_HAS_fpu)) +DEF(cos_f64, 1, 1, 0, IMPL(TCG_TARGET_HAS_fpu)) +DEF(cvt32f_f64, 1, 1, 0, IMPL(TCG_TARGET_HAS_fpu)) +DEF(cvt32f_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_fpu)) +DEF(cvt32f_i64, 1, 1, 0, IMPL(TCG_TARGET_HAS_fpu)) +DEF(cvt32i_f32, 1, 1, 0, IMPL(TCG_TARGET_HAS_fpu)) +DEF(cvt32i_f64, 1, 1, 0, IMPL(TCG_TARGET_HAS_fpu)) +DEF(cvt64f_f32, 1, 1, 0, IMPL(TCG_TARGET_HAS_fpu)) +DEF(cvt64f_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_fpu)) +DEF(cvt64f_i64, 1, 1, 0, IMPL(TCG_TARGET_HAS_fpu)) +DEF(cvt64i_f32, 1, 1, 0, IMPL(TCG_TARGET_HAS_fpu)) +DEF(cvt64i_f64, 1, 1, 0, IMPL(TCG_TARGET_HAS_fpu)) +DEF(div_f32, 1, 2, 0, IMPL(TCG_TARGET_HAS_fpu)) +DEF(div_f64, 1, 2, 0, IMPL(TCG_TARGET_HAS_fpu)) +DEF(mov32f_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_fpu)) +DEF(mov32i_f32, 1, 1, 0, IMPL(TCG_TARGET_HAS_fpu)) +DEF(mov64f_i64, 1, 1, 0, IMPL(TCG_TARGET_HAS_fpu)) +DEF(mov64i_f64, 1, 1, 0, IMPL(TCG_TARGET_HAS_fpu)) +DEF(mov_f32, 1, 1, 0, IMPL(TCG_TARGET_HAS_fpu)) +DEF(mov_f64, 1, 1, 0, IMPL(TCG_TARGET_HAS_fpu)) +DEF(mul_f32, 1, 2, 0, IMPL(TCG_TARGET_HAS_fpu)) +DEF(mul_f64, 1, 2, 0, IMPL(TCG_TARGET_HAS_fpu)) +DEF(sin_f32, 1, 1, 0, IMPL(TCG_TARGET_HAS_fpu)) +DEF(sin_f64, 1, 1, 0, IMPL(TCG_TARGET_HAS_fpu)) +DEF(sqrt_f32, 1, 1, 0, IMPL(TCG_TARGET_HAS_fpu)) +DEF(sqrt_f64, 1, 1, 0, IMPL(TCG_TARGET_HAS_fpu)) +DEF(sub_f32, 1, 2, 0, IMPL(TCG_TARGET_HAS_fpu)) +DEF(sub_f64, 1, 2, 0, IMPL(TCG_TARGET_HAS_fpu)) + /* Host vector support. */ #define IMPLVEC TCG_OPF_VECTOR | IMPL(TCG_TARGET_MAYBE_vec) diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h index 255ec1446f..595dfdbc55 100644 --- a/include/tcg/tcg.h +++ b/include/tcg/tcg.h @@ -207,6 +207,10 @@ typedef uint64_t TCGRegSet; #define TCG_TARGET_HAS_v256 0 #endif +#ifndef TCG_TARGET_HAS_fpu +#define TCG_TARGET_HAS_fpu 0 +#endif + #ifndef TARGET_INSN_START_EXTRA_WORDS # define TARGET_INSN_START_WORDS 1 #else @@ -287,6 +291,9 @@ typedef enum TCGType { TCG_TYPE_I32, TCG_TYPE_I64, + TCG_TYPE_F32, + TCG_TYPE_F64, + TCG_TYPE_V64, TCG_TYPE_V128, TCG_TYPE_V256, @@ -355,6 +362,8 @@ typedef tcg_target_ulong TCGArg; * TCGv_ptr : a host pointer type * TCGv_vec : a host vector type; the exact size is not exposed to the CPU front-end code. + * TCGv_f32 : 32 bit floating point type + * TCGv_f64 : 64 bit floating point type * TCGv : an integer type the same size as target_ulong (an alias for either TCGv_i32 or TCGv_i64) The compiler's type checking will complain if you mix them @@ -378,6 +387,8 @@ typedef struct TCGv_i32_d *TCGv_i32; typedef struct TCGv_i64_d *TCGv_i64; typedef struct TCGv_ptr_d *TCGv_ptr; typedef struct TCGv_vec_d *TCGv_vec; +typedef struct TCGv_f32_d *TCGv_f32; +typedef struct TCGv_f64_d *TCGv_f64; typedef TCGv_ptr TCGv_env; #if TARGET_LONG_BITS == 32 #define TCGv TCGv_i32 @@ -625,6 +636,8 @@ struct TCGContext { /* Exit to translator on overflow. */ sigjmp_buf jmp_trans; + + void *disas_ctx; }; static inline bool temp_readonly(TCGTemp *ts) @@ -697,6 +710,16 @@ static inline TCGTemp *tcgv_vec_temp(TCGv_vec v) return tcgv_i32_temp((TCGv_i32)v); } +static inline TCGTemp *tcgv_f32_temp(TCGv_f32 v) +{ + return tcgv_i32_temp((TCGv_i32)v); +} + +static inline TCGTemp *tcgv_f64_temp(TCGv_f64 v) +{ + return tcgv_i32_temp((TCGv_i32)v); +} + static inline TCGArg tcgv_i32_arg(TCGv_i32 v) { return temp_arg(tcgv_i32_temp(v)); @@ -717,6 +740,16 @@ static inline TCGArg tcgv_vec_arg(TCGv_vec v) return temp_arg(tcgv_vec_temp(v)); } +static inline TCGArg tcgv_f32_arg(TCGv_f32 v) +{ + return temp_arg(tcgv_f32_temp(v)); +} + +static inline TCGArg tcgv_f64_arg(TCGv_f64 v) +{ + return temp_arg(tcgv_f64_temp(v)); +} + static inline TCGv_i32 temp_tcgv_i32(TCGTemp *t) { (void)temp_idx(t); /* trigger embedded assert */ @@ -738,6 +771,16 @@ static inline TCGv_vec temp_tcgv_vec(TCGTemp *t) return (TCGv_vec)temp_tcgv_i32(t); } +static inline TCGv_f32 temp_tcgv_f32(TCGTemp *t) +{ + return (TCGv_f32)temp_tcgv_i32(t); +} + +static inline TCGv_f64 temp_tcgv_f64(TCGTemp *t) +{ + return (TCGv_f64)temp_tcgv_i32(t); +} + #if TCG_TARGET_REG_BITS == 32 static inline TCGv_i32 TCGV_LOW(TCGv_i64 t) { @@ -876,6 +919,16 @@ static inline void tcg_temp_free_vec(TCGv_vec arg) tcg_temp_free_internal(tcgv_vec_temp(arg)); } +static inline void tcg_temp_free_f32(TCGv_f32 arg) +{ + tcg_temp_free_internal(tcgv_f32_temp(arg)); +} + +static inline void tcg_temp_free_f64(TCGv_f64 arg) +{ + tcg_temp_free_internal(tcgv_f64_temp(arg)); +} + static inline TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t offset, const char *name) { @@ -933,6 +986,44 @@ static inline TCGv_ptr tcg_temp_local_new_ptr(void) return temp_tcgv_ptr(t); } +static inline TCGv_f32 tcg_global_mem_new_f32(TCGv_ptr reg, intptr_t offset, + const char *name) +{ + TCGTemp *t = tcg_global_mem_new_internal(TCG_TYPE_F32, reg, offset, name); + return temp_tcgv_f32(t); +} + +static inline TCGv_f32 tcg_temp_new_f32(void) +{ + TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_F32, false); + return temp_tcgv_f32(t); +} + +static inline TCGv_f32 tcg_temp_local_new_f32(void) +{ + TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_F32, true); + return temp_tcgv_f32(t); +} + +static inline TCGv_f64 tcg_global_mem_new_f64(TCGv_ptr reg, intptr_t offset, + const char *name) +{ + TCGTemp *t = tcg_global_mem_new_internal(TCG_TYPE_F64, reg, offset, name); + return temp_tcgv_f64(t); +} + +static inline TCGv_f64 tcg_temp_new_f64(void) +{ + TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_F64, false); + return temp_tcgv_f64(t); +} + +static inline TCGv_f64 tcg_temp_local_new_f64(void) +{ + TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_F64, true); + return temp_tcgv_f64(t); +} + #if defined(CONFIG_DEBUG_TCG) /* If you call tcg_clear_temp_count() at the start of a section of * code which is not supposed to leak any TCG temporaries, then @@ -1452,4 +1543,6 @@ static inline const TCGOpcode *tcg_swap_vecop_list(const TCGOpcode *n) bool tcg_can_emit_vecop_list(const TCGOpcode *, TCGType, unsigned); +void gen_bb_epilogue(void); /* translate.c */ + #endif /* TCG_H */ diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 6c50d3ab4f..687d8924f5 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -167,6 +167,7 @@ typedef enum X86Seg { #define HF_IOBPT_SHIFT 24 /* an io breakpoint enabled */ #define HF_MPX_EN_SHIFT 25 /* MPX Enabled (CR4+XCR0+BNDCFGx) */ #define HF_MPX_IU_SHIFT 26 /* BND registers in-use */ +#define HF_FPU_PC_SHIFT 27 /* FPU Precision Control */ #define HF_CPL_MASK (3 << HF_CPL_SHIFT) #define HF_INHIBIT_IRQ_MASK (1 << HF_INHIBIT_IRQ_SHIFT) @@ -192,6 +193,7 @@ typedef enum X86Seg { #define HF_IOBPT_MASK (1 << HF_IOBPT_SHIFT) #define HF_MPX_EN_MASK (1 << HF_MPX_EN_SHIFT) #define HF_MPX_IU_MASK (1 << HF_MPX_IU_SHIFT) +#define HF_FPU_PC_MASK (1 << HF_FPU_PC_SHIFT) /* hflags2 */ @@ -2145,6 +2147,13 @@ static inline void cpu_set_fpuc(CPUX86State *env, uint16_t fpuc) if (tcg_enabled()) { update_fp_status(env); } + + /* + * XXX: Currently emulating double extended precision with double precision + * when using hard floats. + */ + env->hflags &= ~HF_FPU_PC_MASK; + env->hflags |= ((env->fpuc >> 9) & 1) << HF_FPU_PC_SHIFT; } /* mem_helper.c */ diff --git a/target/i386/ops_fpu.h b/target/i386/ops_fpu.h new file mode 100644 index 0000000000..7deb487289 --- /dev/null +++ b/target/i386/ops_fpu.h @@ -0,0 +1,317 @@ +/* + * x87 FPU support + * + * Copyright (c) 2021 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#define PRECf glue(PREC, f) +#define fPREC glue(f, PREC) +#define PREC_SUFFIX glue(_, fPREC) +#define PREC_TYPE glue(TCGv_, fPREC) +#define tcg_temp_new_fp glue(tcg_temp_new_, fPREC) +#define tcg_temp_free_fp glue(tcg_temp_free_, fPREC) +#define tcg_gen_st80f_fp glue(tcg_gen_st80f, PREC_SUFFIX) +#define tcg_gen_ld80f_fp glue(tcg_gen_ld80f, PREC_SUFFIX) +#define get_ft0 glue(get_ft0, PREC_SUFFIX) +#define get_stn glue(get_stn, PREC_SUFFIX) +#define get_st0 glue(get_st0, PREC_SUFFIX) + +static PREC_TYPE get_ft0(DisasContext *s) +{ + gen_flcr(s); + + PREC_TYPE *v = (PREC_TYPE *)&s->ft0; + + if (*v == NULL) { + *v = tcg_temp_new_fp(); + TCGv_ptr p = gen_ft0_ptr(); + tcg_gen_ld80f_fp(*v, p); + tcg_temp_free_ptr(p); + } + + return *v; +} + +static PREC_TYPE get_stn(DisasContext *s, int opreg) +{ + assert(!(opreg & ~7)); + gen_flcr(s); + + PREC_TYPE *t = (PREC_TYPE *)&s->fpregs[(s->fpstt_delta + opreg) & 7]; + + if (*t == NULL) { + *t = tcg_temp_new_fp(); + TCGv_ptr p = gen_stn_ptr(opreg); + tcg_gen_ld80f_fp(*t, p); + tcg_temp_free_ptr(p); + } + + return *t; +} + +static PREC_TYPE get_st0(DisasContext *s) +{ + return get_stn(s, 0); +} + +static void glue(flush_fp_regs, PREC_SUFFIX)(DisasContext *s) +{ + for (int i = 0; i < 8; i++) { + PREC_TYPE *t = (PREC_TYPE *)&s->fpregs[(s->fpstt_delta + i) & 7]; + if (*t) { + TCGv_ptr ptr = gen_stn_ptr(i); + tcg_gen_st80f_fp(*t, ptr); + tcg_temp_free_fp(*t); + tcg_temp_free_ptr(ptr); + *t = NULL; + } + } + + if (s->ft0) { + TCGv_ptr ptr = gen_ft0_ptr(); + tcg_gen_st80f_fp((PREC_TYPE)s->ft0, ptr); + tcg_temp_free_ptr(ptr); + s->ft0 = NULL; + } +} + +static void glue(gen_fpop, PREC_SUFFIX)(DisasContext *s) +{ + PREC_TYPE *t = (PREC_TYPE *)&s->fpregs[s->fpstt_delta & 7]; + if (*t) { + tcg_temp_free_fp(*t); + *t = NULL; + } +} + +static void glue(gen_fcom, PREC_SUFFIX)(DisasContext *s, PREC_TYPE arg1, + PREC_TYPE arg2) +{ + TCGv_i64 res = tcg_temp_new_i64(); + + glue(tcg_gen_com, PREC_SUFFIX)(res, arg1, arg2); + + /* + * Result is EFLAGS register format as follows + * + * C3 C2 C0 + * arg1 > arg2 0 0 0 + * arg1 < arg2 0 0 1 + * arg1 = arg2 1 0 0 + * unordered 1 1 1 + * + * C3,C2,C0 = ZF,PF,CF = Bit 6,2,0 + * + * fpus = {0x0100, 0x4000, 0x0000, 0x4500}; + * < = > UO + */ + + tcg_gen_andi_i64(res, res, 0x45); + tcg_gen_shli_i64(res, res, 8); + + TCGv_i64 fpus = tcg_temp_new_i64(); + tcg_gen_ld16u_i64(fpus, cpu_env, offsetof(CPUX86State, fpus)); + tcg_gen_andi_i64(fpus, fpus, ~0x4500); + tcg_gen_or_i64(fpus, fpus, res); + tcg_gen_st16_i64(fpus, cpu_env, offsetof(CPUX86State, fpus)); + + tcg_temp_free_i64(fpus); + tcg_temp_free_i64(res); + + /* FIXME: Exceptions */ +} + +/* FIXME: This decode logic should be shared with helper variant */ + +static void glue(gen_helper_fp_arith_ST0_FT0, PREC_SUFFIX)(DisasContext *s, + int op) +{ + PREC_TYPE st0 = get_st0(s); + PREC_TYPE ft0 = get_ft0(s); + + switch (op) { + case 0: + glue(tcg_gen_add, PREC_SUFFIX)(st0, st0, ft0); + break; + case 1: + glue(tcg_gen_mul, PREC_SUFFIX)(st0, st0, ft0); + break; + case 2: + case 3: + glue(gen_fcom, PREC_SUFFIX)(s, st0, ft0); + break; + case 4: + glue(tcg_gen_sub, PREC_SUFFIX)(st0, st0, ft0); + break; + case 5: + glue(tcg_gen_sub, PREC_SUFFIX)(st0, ft0, st0); + break; + case 6: + glue(tcg_gen_div, PREC_SUFFIX)(st0, st0, ft0); + break; + case 7: + glue(tcg_gen_div, PREC_SUFFIX)(st0, ft0, st0); + break; + default: + g_assert_not_reached(); + } +} + +static void glue(gen_helper_fp_arith_STN_ST0, PREC_SUFFIX)(DisasContext *s, + int op, + int opreg) +{ + PREC_TYPE stn = get_stn(s, opreg); + PREC_TYPE st0 = get_st0(s); + + switch (op) { + case 0: + glue(tcg_gen_add, PREC_SUFFIX)(stn, stn, st0); + break; + case 1: + glue(tcg_gen_mul, PREC_SUFFIX)(stn, stn, st0); + break; + case 4: + glue(tcg_gen_sub, PREC_SUFFIX)(stn, st0, stn); + break; + case 5: + glue(tcg_gen_sub, PREC_SUFFIX)(stn, stn, st0); + break; + case 6: + glue(tcg_gen_div, PREC_SUFFIX)(stn, st0, stn); + break; + case 7: + glue(tcg_gen_div, PREC_SUFFIX)(stn, stn, st0); + break; + default: + g_assert_not_reached(); + } +} + +static void glue(gen_fmov_FT0_STN, PREC_SUFFIX)(DisasContext *s, int st_index) +{ + glue(tcg_gen_mov, PREC_SUFFIX)(get_ft0(s), get_stn(s, st_index)); +} + +static void glue(gen_fmov_ST0_STN, PREC_SUFFIX)(DisasContext *s, int st_index) +{ + glue(tcg_gen_mov, PREC_SUFFIX)(get_st0(s), get_stn(s, st_index)); +} + +static void glue(gen_fmov_STN_ST0, PREC_SUFFIX)(DisasContext *s, int st_index) +{ + glue(tcg_gen_mov, PREC_SUFFIX)(get_stn(s, st_index), get_st0(s)); +} + +static void glue(gen_flds_FT0, PREC_SUFFIX)(DisasContext *s, TCGv_i32 arg) +{ + glue(gen_mov32i, PREC_SUFFIX)(get_ft0(s), arg); +} + +static void glue(gen_flds_ST0, PREC_SUFFIX)(DisasContext *s, TCGv_i32 arg) +{ + glue(gen_mov32i, PREC_SUFFIX)(get_st0(s), arg); +} + +static void glue(gen_fldl_FT0, PREC_SUFFIX)(DisasContext *s, TCGv_i64 arg) +{ + glue(gen_mov64i, PREC_SUFFIX)(get_ft0(s), arg); +} + +static void glue(gen_fldl_ST0, PREC_SUFFIX)(DisasContext *s, TCGv_i64 arg) +{ + glue(gen_mov64i, PREC_SUFFIX)(get_st0(s), arg); +} + +static void glue(gen_fildl_FT0, PREC_SUFFIX)(DisasContext *s, TCGv_i32 arg) +{ + glue(tcg_gen_cvt32i, PREC_SUFFIX)(get_ft0(s), arg); +} + +static void glue(gen_fildl_ST0, PREC_SUFFIX)(DisasContext *s, TCGv_i32 arg) +{ + glue(tcg_gen_cvt32i, PREC_SUFFIX)(get_st0(s), arg); +} + +static void glue(gen_fildll_ST0, PREC_SUFFIX)(DisasContext *s, TCGv_i64 arg) +{ + glue(tcg_gen_cvt64i, PREC_SUFFIX)(get_st0(s), arg); +} + +static void glue(gen_fistl_ST0, PREC_SUFFIX)(DisasContext *s, TCGv_i32 arg) +{ + glue(glue(tcg_gen_cvt, PRECf), _i32)(arg, get_st0(s)); +} + +static void glue(gen_fistll_ST0, PREC_SUFFIX)(DisasContext *s, TCGv_i64 arg) +{ + glue(glue(tcg_gen_cvt, PRECf), _i64)(arg, get_st0(s)); +} + +static void glue(gen_fsts_ST0, PREC_SUFFIX)(DisasContext *s, TCGv_i32 arg) +{ + glue(glue(gen_mov, PRECf), _i32)(arg, get_st0(s)); +} + +static void glue(gen_fstl_ST0, PREC_SUFFIX)(DisasContext *s, TCGv_i64 arg) +{ + glue(glue(gen_mov, PRECf), _i64)(arg, get_st0(s)); +} + +static void glue(gen_fchs_ST0, PREC_SUFFIX)(DisasContext *s) +{ + PREC_TYPE st0 = get_st0(s); + glue(tcg_gen_chs, PREC_SUFFIX)(st0, st0); +} + +static void glue(gen_fabs_ST0, PREC_SUFFIX)(DisasContext *s) +{ + PREC_TYPE st0 = get_st0(s); + glue(tcg_gen_abs, PREC_SUFFIX)(st0, st0); +} + +static void glue(gen_fsqrt, PREC_SUFFIX)(DisasContext *s) +{ + PREC_TYPE st0 = get_st0(s); + glue(tcg_gen_sqrt, PREC_SUFFIX)(st0, st0); +} + +static void glue(gen_fsin, PREC_SUFFIX)(DisasContext *s) +{ + PREC_TYPE st0 = get_st0(s); + glue(tcg_gen_sin, PREC_SUFFIX)(st0, st0); +} + +static void glue(gen_fcos, PREC_SUFFIX)(DisasContext *s) +{ + PREC_TYPE st0 = get_st0(s); + glue(tcg_gen_cos, PREC_SUFFIX)(st0, st0); +} + +static void glue(gen_fld1_ST0, PREC_SUFFIX)(DisasContext *s) +{ + glue(gen_movi, PREC_SUFFIX)(s, get_st0(s), 1.0); +} + +static void glue(gen_fldz_ST0, PREC_SUFFIX)(DisasContext *s) +{ + glue(gen_movi, PREC_SUFFIX)(s, get_st0(s), 0.0); +} + +static void glue(gen_fldz_FT0, PREC_SUFFIX)(DisasContext *s) +{ + glue(gen_movi, PREC_SUFFIX)(s, get_ft0(s), 0.0); +} diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index 7691ff9485..20fb88a564 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -32,9 +32,10 @@ #include "exec/log.h" +static int g_use_hard_fpu; + #if defined(XBOX) && defined(__x86_64__) #include "ui/xemu-settings.h" -static int g_use_hard_fpu; #define MAP_GEN_HELPER_SOFT_HARD(name) \ (g_use_hard_fpu ? gen_helper_##name##__hard : gen_helper_##name##__soft) #define gen_helper_flds_FT0 MAP_GEN_HELPER_SOFT_HARD(flds_FT0) @@ -156,9 +157,13 @@ static TCGv cpu_regs[CPU_NB_REGS]; static TCGv cpu_seg_base[6]; static TCGv_i64 cpu_bndl[4]; static TCGv_i64 cpu_bndu[4]; +static TCGv_i32 fpstt; + #include "exec/gen-icount.h" +typedef struct TCGv_fp_d *TCGv_fp; + typedef struct DisasContext { DisasContextBase base; @@ -217,6 +222,12 @@ typedef struct DisasContext { TCGv_i64 tmp1_i64; sigjmp_buf jmpbuf; + + /* Floating point */ + bool flcr_set; + int fpstt_delta; + TCGv_fp fpregs[8]; + TCGv_fp ft0; } DisasContext; /* The environment in which user-only runs is constrained. */ @@ -1363,62 +1374,450 @@ GEN_REPZ(outs) GEN_REPZ2(scas) GEN_REPZ2(cmps) -static void gen_helper_fp_arith_ST0_FT0(int op) +static TCGv_ptr gen_stn_ptr(int opreg) { - switch (op) { - case 0: - gen_helper_fadd_ST0_FT0(cpu_env); - break; - case 1: - gen_helper_fmul_ST0_FT0(cpu_env); - break; - case 2: - gen_helper_fcom_ST0_FT0(cpu_env); - break; - case 3: - gen_helper_fcom_ST0_FT0(cpu_env); - break; - case 4: - gen_helper_fsub_ST0_FT0(cpu_env); - break; - case 5: - gen_helper_fsubr_ST0_FT0(cpu_env); - break; - case 6: - gen_helper_fdiv_ST0_FT0(cpu_env); - break; - case 7: - gen_helper_fdivr_ST0_FT0(cpu_env); - break; + TCGv_i32 offset = tcg_temp_new_i32(); + tcg_gen_mov_i32(offset, fpstt); + + if (opreg != 0) { + tcg_gen_addi_i32(offset, offset, opreg); + tcg_gen_andi_i32(offset, offset, 7); + } + + tcg_gen_muli_i32(offset, offset, sizeof(FPReg)); + tcg_gen_addi_i32(offset, offset, offsetof(CPUX86State, fpregs[0].d)); + TCGv_ptr ptr = tcg_temp_new_ptr(); + tcg_gen_ext_i32_ptr(ptr, offset); + tcg_gen_add_ptr(ptr, ptr, cpu_env); + + tcg_temp_free_i32(offset); + return ptr; +} + +static TCGv_ptr gen_ft0_ptr(void) +{ + TCGv_ptr ft0 = tcg_temp_new_ptr(); + tcg_gen_addi_ptr(ft0, cpu_env, offsetof(CPUX86State, ft0)); + return ft0; +} + +static void gen_set_fptag(int offs, int value) +{ + TCGv_ptr p = tcg_temp_new_ptr(); + tcg_gen_ext_i32_ptr(p, fpstt); + tcg_gen_add_ptr(p, cpu_env, p); + TCGv_i32 tmp = tcg_const_i32(value); + tcg_gen_st8_i32(tmp, p, offsetof(CPUX86State, fptags[0]) + offs); + tcg_temp_free_i32(tmp); + tcg_temp_free_ptr(p); +} + +static bool fpu_using_double_precision(DisasContext *s) +{ + /* + * XXX: Currently emulating double extended precision with double precision + * when using hard floats. + */ + return s->flags & HF_FPU_PC_MASK; +} + +static void gen_movi_f32(DisasContext *s, TCGv_f32 ret, float arg) +{ + tcg_gen_mov32i_f32(ret, tcg_constant_i32(*(uint32_t *)&arg)); +} + +static void gen_movi_f64(DisasContext *s, TCGv_f64 ret, double arg) +{ + tcg_gen_mov64i_f64(ret, tcg_constant_i64(*(uint64_t *)&arg)); +} + +static void gen_flcr(DisasContext *s) +{ + /* TODO: Oversynchronized */ + if (s->flcr_set) { + return; + } + + TCGv_i32 v = tcg_temp_new_i32(); + tcg_gen_ld16u_i32(v, cpu_env, offsetof(CPUX86State, fpuc)); + tcg_gen_andi_i32(v, v, 0xc00); + tcg_gen_shli_i32(v, v, 3); + tcg_gen_ori_i32(v, v, 0x1f80); + tcg_gen_flcr(v); + tcg_temp_free_i32(v); + s->flcr_set = true; +} + +static void gen_mov32f_i64(TCGv_i64 ret, TCGv_f32 arg) +{ + TCGv_f64 t = tcg_temp_new_f64(); + tcg_gen_cvt32f_f64(t, arg); + tcg_gen_mov64f_i64(ret, t); + tcg_temp_free_f64(t); +} + +static void gen_mov32f_i32(TCGv_i32 ret, TCGv_f32 arg) +{ + tcg_gen_mov32f_i32(ret, arg); +} + +static void gen_mov32i_f64(TCGv_f64 ret, TCGv_i32 arg) +{ + TCGv_f32 t = tcg_temp_new_f32(); + tcg_gen_mov32i_f32(t, arg); + tcg_gen_cvt32f_f64(ret, t); + tcg_temp_free_f32(t); +} + +static void gen_mov32i_f32(TCGv_f32 ret, TCGv_i32 arg) +{ + tcg_gen_mov32i_f32(ret, arg); +} + +static void gen_mov64f_i32(TCGv_i32 ret, TCGv_f64 arg) +{ + TCGv_f32 t = tcg_temp_new_f32(); + tcg_gen_cvt64f_f32(t, arg); + tcg_gen_mov32f_i32(ret, t); + tcg_temp_free_f32(t); +} + +static void gen_mov64f_i64(TCGv_i64 ret, TCGv_f64 arg) +{ + tcg_gen_mov64f_i64(ret, arg); +} + +static void gen_mov64i_f32(TCGv_f32 ret, TCGv_i64 arg) +{ + TCGv_f64 t = tcg_temp_new_f64(); + tcg_gen_mov64i_f64(t, arg); + tcg_gen_cvt64f_f32(ret, t); + tcg_temp_free_f64(t); +} + +static void gen_mov64i_f64(TCGv_f64 ret, TCGv_i64 arg) +{ + tcg_gen_mov64i_f64(ret, arg); +} + +#define PREC 32 +#include "ops_fpu.h" +#undef PREC + +#define PREC 64 +#include "ops_fpu.h" +#undef PREC + +#define fp_pc_wrapper(f) \ + (fpu_using_double_precision(s) ? glue(f, _f64) : glue(f, _f32)) + +static void gen_flush_fp(DisasContext *s) +{ + fp_pc_wrapper(flush_fp_regs)(s); + s->fpstt_delta = 0; + s->flcr_set = false; +} + +/* + * Ugly macros to handle soft FPU helper generation + */ +#define GEN_HELPER_FALLBACK_v_v(func) do { \ + if (!g_use_hard_fpu) { \ + gen_helper_ ## func(cpu_env); \ + return; \ + }} while(0) + +#define GEN_HELPER_FALLBACK_v_i(func, arg) do { \ + if (!g_use_hard_fpu) { \ + gen_helper_ ## func(cpu_env, tcg_const_i32(arg)); \ + return; \ + }} while(0) + +#define GEN_HELPER_FALLBACK_v_T(func, arg) do { \ + if (!g_use_hard_fpu) { \ + gen_helper_ ## func(cpu_env, arg); \ + return; \ + }} while(0) + +#define GEN_HELPER_FALLBACK_T_v(func, arg) do { \ + if (!g_use_hard_fpu) { \ + gen_helper_ ## func(arg, cpu_env); \ + return; \ + }} while(0) + +static void gen_fpush(DisasContext *s) +{ + GEN_HELPER_FALLBACK_v_v(fpush); + + tcg_gen_subi_i32(fpstt, fpstt, 1); + tcg_gen_andi_i32(fpstt, fpstt, 7); + gen_set_fptag(0, 0); /* validate stack entry */ + + s->fpstt_delta -= 1; +} + +static void gen_fpop(DisasContext *s) +{ + GEN_HELPER_FALLBACK_v_v(fpop); + + gen_set_fptag(0, 1); /* invalidate stack entry */ + tcg_gen_addi_i32(fpstt, fpstt, 1); + tcg_gen_andi_i32(fpstt, fpstt, 7); + + fp_pc_wrapper(gen_fpop)(s); + s->fpstt_delta += 1; +} + +static void gen_fmov_FT0_STN(DisasContext *s, int st_index) +{ + GEN_HELPER_FALLBACK_v_i(fmov_FT0_STN, st_index); + fp_pc_wrapper(gen_fmov_FT0_STN)(s, st_index); +} + +static void gen_fmov_ST0_STN(DisasContext *s, int st_index) +{ + GEN_HELPER_FALLBACK_v_i(fmov_ST0_STN, st_index); + fp_pc_wrapper(gen_fmov_ST0_STN)(s, st_index); +} + +static void gen_fmov_STN_ST0(DisasContext *s, int st_index) +{ + GEN_HELPER_FALLBACK_v_i(fmov_STN_ST0, st_index); + fp_pc_wrapper(gen_fmov_STN_ST0)(s, st_index); +} + +static void gen_fxchg_ST0_STN(DisasContext *s, int st_index) +{ + GEN_HELPER_FALLBACK_v_i(fxchg_ST0_STN, st_index); + + /* Ensure ST0, STN are loaded */ + if (fpu_using_double_precision(s)) { + get_stn_f64(s, 0); + get_stn_f64(s, st_index); + } else { + get_stn_f32(s, 0); + get_stn_f32(s, st_index); + } + TCGv_fp i = s->fpregs[(s->fpstt_delta + 0) & 7]; + s->fpregs[(s->fpstt_delta + 0) & 7] = + s->fpregs[(s->fpstt_delta + st_index) & 7]; + s->fpregs[(s->fpstt_delta + st_index) & 7] = i; +} + +static void gen_enter_mmx(DisasContext *s) +{ + GEN_HELPER_FALLBACK_v_v(enter_mmx); + + gen_flush_fp((DisasContext *)tcg_ctx->disas_ctx); + + tcg_gen_movi_i32(fpstt, 0); + + TCGv_i32 v = tcg_const_i32(0); + for (int i = 0; i < 8; i++) { + tcg_gen_st8_i32(v, cpu_env, offsetof(CPUX86State, fptags[0]) + i); + } + tcg_temp_free_i32(v); +} + +static void gen_flds_FT0(DisasContext *s, TCGv_i32 arg) +{ + GEN_HELPER_FALLBACK_v_T(flds_FT0, arg); + fp_pc_wrapper(gen_flds_FT0)(s, arg); +} + +static void gen_flds_ST0(DisasContext *s, TCGv_i32 arg) +{ + GEN_HELPER_FALLBACK_v_T(flds_ST0, arg); + gen_fpush(s); + fp_pc_wrapper(gen_flds_ST0)(s, arg); +} + +static void gen_fldl_FT0(DisasContext *s, TCGv_i64 arg) +{ + GEN_HELPER_FALLBACK_v_T(fldl_FT0, arg); + fp_pc_wrapper(gen_fldl_FT0)(s, arg); +} + +static void gen_fldl_ST0(DisasContext *s, TCGv_i64 arg) +{ + GEN_HELPER_FALLBACK_v_T(fldl_ST0, arg); + gen_fpush(s); + fp_pc_wrapper(gen_fldl_ST0)(s, arg); +} + +static void gen_fildl_FT0(DisasContext *s, TCGv_i32 arg) +{ + GEN_HELPER_FALLBACK_v_T(fildl_FT0, arg); + fp_pc_wrapper(gen_fildl_FT0)(s, arg); +} + +static void gen_fildl_ST0(DisasContext *s, TCGv_i32 arg) +{ + GEN_HELPER_FALLBACK_v_T(fildl_ST0, arg); + gen_fpush(s); + fp_pc_wrapper(gen_fildl_ST0)(s, arg); +} + +static void gen_fildll_ST0(DisasContext *s, TCGv_i64 arg) +{ + GEN_HELPER_FALLBACK_v_T(fildll_ST0, arg); + gen_fpush(s); + fp_pc_wrapper(gen_fildll_ST0)(s, arg); +} + +static void gen_fsts_ST0(DisasContext *s, TCGv_i32 arg) +{ + GEN_HELPER_FALLBACK_T_v(fsts_ST0, arg); + fp_pc_wrapper(gen_fsts_ST0)(s, arg); +} + +static void gen_fstl_ST0(DisasContext *s, TCGv_i64 arg) +{ + GEN_HELPER_FALLBACK_T_v(fstl_ST0, arg); + fp_pc_wrapper(gen_fstl_ST0)(s, arg); +} + +static void gen_fistl_ST0(DisasContext *s, TCGv_i32 arg) +{ + GEN_HELPER_FALLBACK_T_v(fistl_ST0, arg); + fp_pc_wrapper(gen_fistl_ST0)(s, arg); +} + +static void gen_fistll_ST0(DisasContext *s, TCGv_i64 arg) +{ + GEN_HELPER_FALLBACK_T_v(fistll_ST0, arg); + fp_pc_wrapper(gen_fistll_ST0)(s, arg); +} + +static void gen_fchs_ST0(DisasContext *s) +{ + GEN_HELPER_FALLBACK_v_v(fchs_ST0); + fp_pc_wrapper(gen_fchs_ST0)(s); +} + +static void gen_fabs_ST0(DisasContext *s) +{ + GEN_HELPER_FALLBACK_v_v(fabs_ST0); + fp_pc_wrapper(gen_fabs_ST0)(s); +} + +static void gen_fsqrt(DisasContext *s) +{ + GEN_HELPER_FALLBACK_v_v(fsqrt); + fp_pc_wrapper(gen_fsqrt)(s); +} + +static void gen_clear_fpus_c2(DisasContext *s) +{ + TCGv_i32 v = tcg_temp_new_i32(); + tcg_gen_ld16u_i32(v, cpu_env, offsetof(CPUX86State, fpus)); + tcg_gen_andi_i32(v, v, ~0x400); /* C2 <-- 0 */ + tcg_gen_st16_i32(v, cpu_env, offsetof(CPUX86State, fpus)); +} + +static void gen_fsin(DisasContext *s) +{ + GEN_HELPER_FALLBACK_v_v(fsin); + fp_pc_wrapper(gen_fsin)(s); + gen_clear_fpus_c2(s); /* FIXME: Does not check range correctly */ +} + +static void gen_fcos(DisasContext *s) +{ + GEN_HELPER_FALLBACK_v_v(fcos); + fp_pc_wrapper(gen_fcos)(s); + gen_clear_fpus_c2(s); /* FIXME: Does not check range correctly */ +} + +static void gen_helper_fp_arith_ST0_FT0(DisasContext *s, int op) +{ + if (g_use_hard_fpu) { + fp_pc_wrapper(gen_helper_fp_arith_ST0_FT0)(s, op); + } else { + switch (op) { + case 0: + gen_helper_fadd_ST0_FT0(cpu_env); + break; + case 1: + gen_helper_fmul_ST0_FT0(cpu_env); + break; + case 2: + gen_helper_fcom_ST0_FT0(cpu_env); + break; + case 3: + gen_helper_fcom_ST0_FT0(cpu_env); + break; + case 4: + gen_helper_fsub_ST0_FT0(cpu_env); + break; + case 5: + gen_helper_fsubr_ST0_FT0(cpu_env); + break; + case 6: + gen_helper_fdiv_ST0_FT0(cpu_env); + break; + case 7: + gen_helper_fdivr_ST0_FT0(cpu_env); + break; + } } } -/* NOTE the exception in "r" op ordering */ -static void gen_helper_fp_arith_STN_ST0(int op, int opreg) +static void gen_fcom_ST0_FT0(DisasContext *s) { - TCGv_i32 tmp = tcg_const_i32(opreg); - switch (op) { - case 0: - gen_helper_fadd_STN_ST0(cpu_env, tmp); - break; - case 1: - gen_helper_fmul_STN_ST0(cpu_env, tmp); - break; - case 4: - gen_helper_fsubr_STN_ST0(cpu_env, tmp); - break; - case 5: - gen_helper_fsub_STN_ST0(cpu_env, tmp); - break; - case 6: - gen_helper_fdivr_STN_ST0(cpu_env, tmp); - break; - case 7: - gen_helper_fdiv_STN_ST0(cpu_env, tmp); - break; + gen_helper_fp_arith_ST0_FT0(s, 2); +} + +/* NOTE the exception in "r" op ordering */ +static void gen_helper_fp_arith_STN_ST0(DisasContext *s, int op, int opreg) +{ + if (g_use_hard_fpu) { + fp_pc_wrapper(gen_helper_fp_arith_STN_ST0)(s, op, opreg); + } else { + TCGv_i32 tmp = tcg_const_i32(opreg); + + switch (op) { + case 0: + gen_helper_fadd_STN_ST0(cpu_env, tmp); + break; + case 1: + gen_helper_fmul_STN_ST0(cpu_env, tmp); + break; + case 4: + gen_helper_fsubr_STN_ST0(cpu_env, tmp); + break; + case 5: + gen_helper_fsub_STN_ST0(cpu_env, tmp); + break; + case 6: + gen_helper_fdivr_STN_ST0(cpu_env, tmp); + break; + case 7: + gen_helper_fdiv_STN_ST0(cpu_env, tmp); + break; + } } } +static void gen_fld1_ST0(DisasContext *s) +{ + GEN_HELPER_FALLBACK_v_v(fld1_ST0); + fp_pc_wrapper(gen_fld1_ST0)(s); +} + +static void gen_fldz_ST0(DisasContext *s) +{ + GEN_HELPER_FALLBACK_v_v(fldz_ST0); + fp_pc_wrapper(gen_fldz_ST0)(s); + /* FIXME: Set tag word */ +} + +static void gen_fldz_FT0(DisasContext *s) +{ + GEN_HELPER_FALLBACK_v_v(fldz_FT0); + fp_pc_wrapper(gen_fldz_FT0)(s); +} + static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip) { gen_update_cc_op(s); @@ -3260,7 +3659,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, /* prepare MMX state (XXX: optimize by storing fptt and fptags in the static cpu state) */ if (!is_xmm) { - gen_helper_enter_mmx(cpu_env); + gen_enter_mmx(s); } modrm = x86_ldub_code(env, s); @@ -3661,7 +4060,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, break; case 0x02a: /* cvtpi2ps */ case 0x12a: /* cvtpi2pd */ - gen_helper_enter_mmx(cpu_env); + gen_enter_mmx(s); if (mod != 3) { gen_lea_modrm(env, s, modrm); op2_offset = offsetof(CPUX86State,mmx_t0); @@ -3706,7 +4105,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, case 0x12c: /* cvttpd2pi */ case 0x02d: /* cvtps2pi */ case 0x12d: /* cvtpd2pi */ - gen_helper_enter_mmx(cpu_env); + gen_enter_mmx(s); if (mod != 3) { gen_lea_modrm(env, s, modrm); op2_offset = offsetof(CPUX86State,xmm_t0); @@ -3818,14 +4217,14 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, } break; case 0x2d6: /* movq2dq */ - gen_helper_enter_mmx(cpu_env); + gen_enter_mmx(s); rm = (modrm & 7); gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)), offsetof(CPUX86State,fpregs[rm].mmx)); gen_op_movq_env_0(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1))); break; case 0x3d6: /* movdq2q */ - gen_helper_enter_mmx(cpu_env); + gen_enter_mmx(s); rm = (modrm & 7) | REX_B(s); gen_op_movq(s, offsetof(CPUX86State, fpregs[reg & 7].mmx), offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0))); @@ -6034,30 +6433,30 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) case 0: tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL); - gen_helper_flds_FT0(cpu_env, s->tmp2_i32); + gen_flds_FT0(s, s->tmp2_i32); break; case 1: tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL); - gen_helper_fildl_FT0(cpu_env, s->tmp2_i32); + gen_fildl_FT0(s, s->tmp2_i32); break; case 2: tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ); - gen_helper_fldl_FT0(cpu_env, s->tmp1_i64); + gen_fldl_FT0(s, s->tmp1_i64); break; case 3: default: tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LESW); - gen_helper_fildl_FT0(cpu_env, s->tmp2_i32); + gen_fildl_FT0(s, s->tmp2_i32); break; } - gen_helper_fp_arith_ST0_FT0(op1); + gen_helper_fp_arith_ST0_FT0(s, op1); if (op1 == 3) { /* fcomp needs pop */ - gen_helper_fpop(cpu_env); + gen_fpop(s); } } break; @@ -6073,23 +6472,23 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) case 0: tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL); - gen_helper_flds_ST0(cpu_env, s->tmp2_i32); + gen_flds_ST0(s, s->tmp2_i32); break; case 1: tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL); - gen_helper_fildl_ST0(cpu_env, s->tmp2_i32); + gen_fildl_ST0(s, s->tmp2_i32); break; case 2: tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ); - gen_helper_fldl_ST0(cpu_env, s->tmp1_i64); + gen_fldl_ST0(s, s->tmp1_i64); break; case 3: default: tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LESW); - gen_helper_fildl_ST0(cpu_env, s->tmp2_i32); + gen_fildl_ST0(s, s->tmp2_i32); break; } break; @@ -6113,22 +6512,22 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) s->mem_index, MO_LEUW); break; } - gen_helper_fpop(cpu_env); + gen_fpop(s); break; default: switch (op >> 4) { case 0: - gen_helper_fsts_ST0(s->tmp2_i32, cpu_env); + gen_fsts_ST0(s, s->tmp2_i32); tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL); break; case 1: - gen_helper_fistl_ST0(s->tmp2_i32, cpu_env); + gen_fistl_ST0(s, s->tmp2_i32); tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL); break; case 2: - gen_helper_fstl_ST0(s->tmp1_i64, cpu_env); + gen_fstl_ST0(s, s->tmp1_i64); tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ); break; @@ -6140,7 +6539,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) break; } if ((op & 7) == 3) { - gen_helper_fpop(cpu_env); + gen_fpop(s); } break; } @@ -6149,12 +6548,16 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) gen_helper_fldenv(cpu_env, s->A0, tcg_const_i32(dflag - 1)); update_fip = update_fdp = false; + gen_jmp_im(s, s->pc - s->cs_base); + gen_eob(s); break; case 0x0d: /* fldcw mem */ tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUW); gen_helper_fldcw(cpu_env, s->tmp2_i32); update_fip = update_fdp = false; + gen_jmp_im(s, s->pc - s->cs_base); + gen_eob(s); break; case 0x0e: /* fnstenv mem */ gen_helper_fstenv(cpu_env, s->A0, @@ -6172,7 +6575,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) break; case 0x1f: /* fstpt mem */ gen_helper_fstt_ST0(cpu_env, s->A0); - gen_helper_fpop(cpu_env); + gen_fpop(s); break; case 0x2c: /* frstor mem */ gen_helper_frstor(cpu_env, s->A0, @@ -6195,18 +6598,18 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) break; case 0x3e: /* fbstp */ gen_helper_fbst_ST0(cpu_env, s->A0); - gen_helper_fpop(cpu_env); + gen_fpop(s); break; case 0x3d: /* fildll */ tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ); - gen_helper_fildll_ST0(cpu_env, s->tmp1_i64); + gen_fildll_ST0(s, s->tmp1_i64); break; case 0x3f: /* fistpll */ - gen_helper_fistll_ST0(s->tmp1_i64, cpu_env); + gen_fistll_ST0(s, s->tmp1_i64); tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ); - gen_helper_fpop(cpu_env); + gen_fpop(s); break; default: goto unknown_op; @@ -6230,14 +6633,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) switch (op) { case 0x08: /* fld sti */ - gen_helper_fpush(cpu_env); - gen_helper_fmov_ST0_STN(cpu_env, - tcg_const_i32((opreg + 1) & 7)); + gen_fpush(s); + gen_fmov_ST0_STN(s, (opreg + 1) & 7); break; case 0x09: /* fxchg sti */ case 0x29: /* fxchg4 sti, undocumented op */ case 0x39: /* fxchg7 sti, undocumented op */ - gen_helper_fxchg_ST0_STN(cpu_env, tcg_const_i32(opreg)); + gen_fxchg_ST0_STN(s, opreg); break; case 0x0a: /* grp d9/2 */ switch (rm) { @@ -6253,14 +6655,14 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) case 0x0c: /* grp d9/4 */ switch (rm) { case 0: /* fchs */ - gen_helper_fchs_ST0(cpu_env); + gen_fchs_ST0(s); break; case 1: /* fabs */ - gen_helper_fabs_ST0(cpu_env); + gen_fabs_ST0(s); break; case 4: /* ftst */ - gen_helper_fldz_FT0(cpu_env); - gen_helper_fcom_ST0_FT0(cpu_env); + gen_fldz_FT0(s); + gen_fcom_ST0_FT0(s); break; case 5: /* fxam */ gen_helper_fxam_ST0(cpu_env); @@ -6273,32 +6675,32 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) { switch (rm) { case 0: - gen_helper_fpush(cpu_env); - gen_helper_fld1_ST0(cpu_env); + gen_fpush(s); + gen_fld1_ST0(s); break; case 1: - gen_helper_fpush(cpu_env); + gen_fpush(s); gen_helper_fldl2t_ST0(cpu_env); break; case 2: - gen_helper_fpush(cpu_env); + gen_fpush(s); gen_helper_fldl2e_ST0(cpu_env); break; case 3: - gen_helper_fpush(cpu_env); + gen_fpush(s); gen_helper_fldpi_ST0(cpu_env); break; case 4: - gen_helper_fpush(cpu_env); + gen_fpush(s); gen_helper_fldlg2_ST0(cpu_env); break; case 5: - gen_helper_fpush(cpu_env); + gen_fpush(s); gen_helper_fldln2_ST0(cpu_env); break; case 6: - gen_helper_fpush(cpu_env); - gen_helper_fldz_ST0(cpu_env); + gen_fpush(s); + gen_fldz_ST0(s); break; default: goto unknown_op; @@ -6343,7 +6745,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) gen_helper_fyl2xp1(cpu_env); break; case 2: /* fsqrt */ - gen_helper_fsqrt(cpu_env); + gen_fsqrt(s); break; case 3: /* fsincos */ gen_helper_fsincos(cpu_env); @@ -6355,11 +6757,11 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) gen_helper_frndint(cpu_env); break; case 6: /* fsin */ - gen_helper_fsin(cpu_env); + gen_fsin(s); break; default: case 7: /* fcos */ - gen_helper_fcos(cpu_env); + gen_fcos(s); break; } break; @@ -6371,36 +6773,35 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) op1 = op & 7; if (op >= 0x20) { - gen_helper_fp_arith_STN_ST0(op1, opreg); + gen_helper_fp_arith_STN_ST0(s, op1, opreg); if (op >= 0x30) { - gen_helper_fpop(cpu_env); + gen_fpop(s); } } else { - gen_helper_fmov_FT0_STN(cpu_env, - tcg_const_i32(opreg)); - gen_helper_fp_arith_ST0_FT0(op1); + gen_fmov_FT0_STN(s, opreg); + gen_helper_fp_arith_ST0_FT0(s, op1); } } break; case 0x02: /* fcom */ case 0x22: /* fcom2, undocumented op */ - gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg)); - gen_helper_fcom_ST0_FT0(cpu_env); + gen_fmov_FT0_STN(s, opreg); + gen_fcom_ST0_FT0(s); break; case 0x03: /* fcomp */ case 0x23: /* fcomp3, undocumented op */ case 0x32: /* fcomp5, undocumented op */ - gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg)); - gen_helper_fcom_ST0_FT0(cpu_env); - gen_helper_fpop(cpu_env); + gen_fmov_FT0_STN(s, opreg); + gen_fcom_ST0_FT0(s); + gen_fpop(s); break; case 0x15: /* da/5 */ switch (rm) { case 1: /* fucompp */ - gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1)); + gen_fmov_FT0_STN(s, 1); gen_helper_fucom_ST0_FT0(cpu_env); - gen_helper_fpop(cpu_env); - gen_helper_fpop(cpu_env); + gen_fpop(s); + gen_fpop(s); break; default: goto unknown_op; @@ -6419,6 +6820,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) case 3: /* fninit */ gen_helper_fninit(cpu_env); update_fip = false; + gen_jmp_im(s, s->pc - s->cs_base); + gen_eob(s); break; case 4: /* fsetpm (287 only, just do nop here) */ break; @@ -6431,7 +6834,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) goto illegal_op; } gen_update_cc_op(s); - gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg)); + gen_fmov_FT0_STN(s, opreg); gen_helper_fucomi_ST0_FT0(cpu_env); set_cc_op(s, CC_OP_EFLAGS); break; @@ -6440,7 +6843,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) goto illegal_op; } gen_update_cc_op(s); - gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg)); + gen_fmov_FT0_STN(s, opreg); gen_helper_fcomi_ST0_FT0(cpu_env); set_cc_op(s, CC_OP_EFLAGS); break; @@ -6448,31 +6851,31 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg)); break; case 0x2a: /* fst sti */ - gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg)); + gen_fmov_STN_ST0(s, opreg); break; case 0x2b: /* fstp sti */ case 0x0b: /* fstp1 sti, undocumented op */ case 0x3a: /* fstp8 sti, undocumented op */ case 0x3b: /* fstp9 sti, undocumented op */ - gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg)); - gen_helper_fpop(cpu_env); + gen_fmov_STN_ST0(s, opreg); + gen_fpop(s); break; case 0x2c: /* fucom st(i) */ - gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg)); + gen_fmov_FT0_STN(s, opreg); gen_helper_fucom_ST0_FT0(cpu_env); break; case 0x2d: /* fucomp st(i) */ - gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg)); + gen_fmov_FT0_STN(s, opreg); gen_helper_fucom_ST0_FT0(cpu_env); - gen_helper_fpop(cpu_env); + gen_fpop(s); break; case 0x33: /* de/3 */ switch (rm) { case 1: /* fcompp */ - gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1)); - gen_helper_fcom_ST0_FT0(cpu_env); - gen_helper_fpop(cpu_env); - gen_helper_fpop(cpu_env); + gen_fmov_FT0_STN(s, 1); + gen_fcom_ST0_FT0(s); + gen_fpop(s); + gen_fpop(s); break; default: goto unknown_op; @@ -6480,7 +6883,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) break; case 0x38: /* ffreep sti, undocumented op */ gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg)); - gen_helper_fpop(cpu_env); + gen_fpop(s); break; case 0x3c: /* df/4 */ switch (rm) { @@ -6498,9 +6901,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) goto illegal_op; } gen_update_cc_op(s); - gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg)); + gen_fmov_FT0_STN(s, opreg); gen_helper_fucomi_ST0_FT0(cpu_env); - gen_helper_fpop(cpu_env); + gen_fpop(s); set_cc_op(s, CC_OP_EFLAGS); break; case 0x3e: /* fcomip */ @@ -6508,9 +6911,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) goto illegal_op; } gen_update_cc_op(s); - gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg)); + gen_fmov_FT0_STN(s, opreg); gen_helper_fcomi_ST0_FT0(cpu_env); - gen_helper_fpop(cpu_env); + gen_fpop(s); set_cc_op(s, CC_OP_EFLAGS); break; case 0x10 ... 0x13: /* fcmovxx */ @@ -6531,7 +6934,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) op1 = fcmov_cc[op & 3] | (((op >> 3) & 1) ^ 1); l1 = gen_new_label(); gen_jcc1_noeob(s, op1, l1); - gen_helper_fmov_ST0_STN(cpu_env, tcg_const_i32(opreg)); + gen_fmov_ST0_STN(s, opreg); gen_set_label(l1); } break; @@ -8314,6 +8717,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) } gen_lea_modrm(env, s, modrm); gen_helper_fxrstor(cpu_env, s->A0); + gen_jmp_im(s, s->pc - s->cs_base); + gen_eob(s); break; CASE_MODRM_MEM_OP(2): /* ldmxcsr */ @@ -8637,6 +9042,9 @@ void tcg_x86_init(void) bnd_regu_names[i]); } + fpstt = tcg_global_mem_new_i32(cpu_env, + offsetof(CPUX86State, fpstt), "fpstt"); + #if defined(XBOX) && defined(__x86_64__) xemu_settings_get_bool(XEMU_SETTINGS_SYSTEM_HARD_FPU, &g_use_hard_fpu); #endif @@ -8705,6 +9113,13 @@ static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu) dc->ptr0 = tcg_temp_new_ptr(); dc->ptr1 = tcg_temp_new_ptr(); dc->cc_srcT = tcg_temp_local_new(); + + for (int i = 0; i < 8; i++) { + dc->fpregs[i] = NULL; + } + dc->fpstt_delta = 0; + dc->ft0 = NULL; + dc->flcr_set = false; } static void i386_tr_tb_start(DisasContextBase *db, CPUState *cpu) @@ -8766,6 +9181,8 @@ static void i386_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) { DisasContext *dc = container_of(dcbase, DisasContext, base); + gen_flush_fp(dc); + if (dc->base.is_jmp == DISAS_TOO_MANY) { gen_jmp_im(dc, dc->base.pc_next - dc->cs_base); gen_eob(dc); @@ -8795,7 +9212,9 @@ void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns) { DisasContext dc; + tcg_ctx->disas_ctx = &dc; translator_loop(&i386_tr_ops, &dc.base, cpu, tb, max_insns); + tcg_ctx->disas_ctx = NULL; } void restore_state_to_opc(CPUX86State *env, TranslationBlock *tb, @@ -8807,3 +9226,8 @@ void restore_state_to_opc(CPUX86State *env, TranslationBlock *tb, env->cc_op = cc_op; } } + +void gen_bb_epilogue(void) +{ + gen_flush_fp((DisasContext *)tcg_ctx->disas_ctx); +} diff --git a/tcg/i386/tcg-target-con-set.h b/tcg/i386/tcg-target-con-set.h index 78774d1005..369e38d153 100644 --- a/tcg/i386/tcg-target-con-set.h +++ b/tcg/i386/tcg-target-con-set.h @@ -20,6 +20,7 @@ C_O0_I2(ri, r) C_O0_I2(r, re) C_O0_I2(s, L) C_O0_I2(x, r) +C_O0_I2(L, x) C_O0_I3(L, L, L) C_O0_I3(s, L, L) C_O0_I4(L, L, L, L) @@ -30,6 +31,8 @@ C_O1_I1(r, q) C_O1_I1(r, r) C_O1_I1(x, r) C_O1_I1(x, x) +C_O1_I1(x, L) +C_O1_I1(r, x) C_O1_I2(Q, 0, Q) C_O1_I2(q, r, re) C_O1_I2(r, 0, ci) @@ -43,6 +46,7 @@ C_O1_I2(r, r, re) C_O1_I2(r, r, ri) C_O1_I2(r, r, rI) C_O1_I2(x, x, x) +C_O1_I2(r, x, x) C_N1_I2(r, r, r) C_N1_I2(r, r, rW) C_O1_I3(x, x, x, x) diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 98d924b91a..ba5f6053f5 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -262,6 +262,8 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct) #define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */ #define OPC_ANDN (0xf2 | P_EXT38) #define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3)) +#define OPC_ADDSD (0x58 | P_EXT | P_SIMDF2) +#define OPC_ADDSS (0x58 | P_EXT | P_SIMDF3) #define OPC_AND_GvEv (OPC_ARITH_GvEv | (ARITH_AND << 3)) #define OPC_BLENDPS (0x0c | P_EXT3A | P_DATA16) #define OPC_BSF (0xbc | P_EXT) @@ -270,7 +272,23 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct) #define OPC_CALL_Jz (0xe8) #define OPC_CMOVCC (0x40 | P_EXT) /* ... plus condition code */ #define OPC_CMP_GvEv (OPC_ARITH_GvEv | (ARITH_CMP << 3)) +#define OPC_COMISD (0x2f | P_EXT | P_DATA16) +#define OPC_COMISS (0x2f | P_EXT) +#define OPC_CVTSI2SD (0x2a | P_EXT | P_SIMDF2) +#define OPC_CVTSI2SS (0x2a | P_EXT | P_SIMDF3) +#define OPC_CVTSS2SD (0x5a | P_EXT | P_SIMDF3) +#define OPC_CVTSD2SI (0x2d | P_EXT | P_SIMDF2) +#define OPC_CVTSS2SI (0x2d | P_EXT | P_SIMDF3) +#define OPC_CVTSD2SS (0x5a | P_EXT | P_SIMDF2) #define OPC_DEC_r32 (0x48) +#define OPC_DIVSD (0x5e | P_EXT | P_SIMDF2) +#define OPC_DIVSS (0x5e | P_EXT | P_SIMDF3) +#define OPC_FLD_m32fp (0xd9) +#define OPC_FLD_m64fp (0xdd) +#define OPC_FLD_m80fp (0xdb) +#define OPC_FSTP_m32fp (0xd9) +#define OPC_FSTP_m64fp (0xdd) +#define OPC_FSTP_m80fp (0xdb) #define OPC_IMUL_GvEv (0xaf | P_EXT) #define OPC_IMUL_GvEvIb (0x6b) #define OPC_IMUL_GvEvIz (0x69) @@ -279,6 +297,7 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct) #define OPC_JCC_short (0x70) /* ... plus condition code */ #define OPC_JMP_long (0xe9) #define OPC_JMP_short (0xeb) +#define OPC_LDMXCSR (0xae | P_EXT) #define OPC_LEA (0x8d) #define OPC_LZCNT (0xbd | P_EXT | P_SIMDF3) #define OPC_MOVB_EvGv (0x88) /* stores, more or less */ @@ -303,6 +322,8 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct) #define OPC_MOVSLQ (0x63 | P_REXW) #define OPC_MOVZBL (0xb6 | P_EXT) #define OPC_MOVZWL (0xb7 | P_EXT) +#define OPC_MULSD (0x59 | P_EXT | P_SIMDF2) +#define OPC_MULSS (0x59 | P_EXT | P_SIMDF3) #define OPC_PABSB (0x1c | P_EXT38 | P_DATA16) #define OPC_PABSW (0x1d | P_EXT38 | P_DATA16) #define OPC_PABSD (0x1e | P_EXT38 | P_DATA16) @@ -387,6 +408,7 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct) #define OPC_PUSH_r32 (0x50) #define OPC_PUSH_Iv (0x68) #define OPC_PUSH_Ib (0x6a) +#define OPC_PUSHF (0x9c) #define OPC_RET (0xc3) #define OPC_SETCC (0x90 | P_EXT | P_REXB_RM) /* ... plus cc */ #define OPC_SHIFT_1 (0xd1) @@ -397,6 +419,10 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct) #define OPC_SHLX (0xf7 | P_EXT38 | P_DATA16) #define OPC_SHRX (0xf7 | P_EXT38 | P_SIMDF2) #define OPC_SHRD_Ib (0xac | P_EXT) +#define OPC_SQRTSD (0x51 | P_EXT | P_SIMDF2) +#define OPC_SQRTSS (0x51 | P_EXT | P_SIMDF3) +#define OPC_SUBSD (0x5c | P_EXT | P_SIMDF2) +#define OPC_SUBSS (0x5c | P_EXT | P_SIMDF3) #define OPC_TESTL (0x85) #define OPC_TZCNT (0xbc | P_EXT | P_SIMDF3) #define OPC_UD2 (0x0b | P_EXT) @@ -771,9 +797,11 @@ static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) } switch (type) { case TCG_TYPE_I64: + case TCG_TYPE_F64: rexw = P_REXW; /* fallthru */ case TCG_TYPE_I32: + case TCG_TYPE_F32: if (ret < 16) { if (arg < 16) { tcg_out_modrm(s, OPC_MOVL_GvEv + rexw, ret, arg); @@ -1027,6 +1055,7 @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, { switch (type) { case TCG_TYPE_I32: + case TCG_TYPE_F32: if (ret < 16) { tcg_out_modrm_offset(s, OPC_MOVL_GvEv, ret, arg1, arg2); } else { @@ -1034,6 +1063,7 @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, } break; case TCG_TYPE_I64: + case TCG_TYPE_F64: if (ret < 16) { tcg_out_modrm_offset(s, OPC_MOVL_GvEv | P_REXW, ret, arg1, arg2); break; @@ -1072,6 +1102,7 @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, { switch (type) { case TCG_TYPE_I32: + case TCG_TYPE_F32: if (arg < 16) { tcg_out_modrm_offset(s, OPC_MOVL_EvGv, arg, arg1, arg2); } else { @@ -1079,6 +1110,7 @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, } break; case TCG_TYPE_I64: + case TCG_TYPE_F64: if (arg < 16) { tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_REXW, arg, arg1, arg2); break; @@ -2174,12 +2206,64 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) #endif } +static inline void tcg_out_stash_xmm(TCGContext *s, TCGArg a0) +{ + tcg_out_modrm_offset(s, OPC_MOVQ_WqVq, a0, TCG_REG_ESP, -0x10); +} + +static inline void tcg_out_unstash_xmm(TCGContext *s, TCGArg a0) +{ + tcg_out_modrm_offset(s, OPC_MOVQ_VqWq, a0, TCG_REG_ESP, -0x10); +} + +static inline void tcg_out_fld(TCGContext *s, TCGArg a0) +{ + tcg_out_modrm_offset(s, OPC_FLD_m80fp, 5, a0, 0); +} + +static inline void tcg_out_fld_xmm(TCGContext *s, TCGArg a0, bool dp) +{ + tcg_out_stash_xmm(s, a0); + tcg_out_modrm_offset(s, dp ? OPC_FLD_m64fp : OPC_FLD_m32fp, 0, TCG_REG_ESP, + -0x10); +} + +static inline void tcg_out_fstp(TCGContext *s, TCGArg a0) +{ + tcg_out_modrm_offset(s, OPC_FSTP_m80fp, 7, a0, 0); +} + +static inline void tcg_out_fstp_xmm(TCGContext *s, TCGArg a0, bool dp) +{ + tcg_out_modrm_offset(s, dp ? OPC_FSTP_m64fp : OPC_FSTP_m32fp, 3, + TCG_REG_ESP, -0x10); + tcg_out_unstash_xmm(s, a0); +} + +static inline void tcg_out_fsin(TCGContext *s) +{ + tcg_out8(s, 0xd9); + tcg_out8(s, 0xfe); +} + +static inline void tcg_out_fcos(TCGContext *s) +{ + tcg_out8(s, 0xd9); + tcg_out8(s, 0xff); +} + +static const uint64_t fchs_mask32 = 0x80000000; +static const uint64_t fchs_mask64 = 0x8000000000000000L; +static const uint64_t fabs_mask32 = 0x7FFFFFFF; +static const uint64_t fabs_mask64 = 0x7FFFFFFFFFFFFFFFL; + static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) { TCGArg a0, a1, a2; int c, const_a2, vexop, rexw = 0; + bool dp = false; #if TCG_TARGET_REG_BITS == 64 # define OP_32_64(x) \ @@ -2191,6 +2275,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, case glue(glue(INDEX_op_, x), _i32) #endif +#define OP_f32_f64(x) \ + case glue(glue(INDEX_op_, x), _f64): \ + dp = true; /* FALLTHRU */ \ + case glue(glue(INDEX_op_, x), _f32) + /* Hoist the loads of the most common arguments. */ a0 = args[0]; a1 = args[1]; @@ -2232,6 +2321,134 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, /* jmp to the given host address (could be epilogue) */ tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, a0); break; + + /* FIXME: Exceptions */ + case INDEX_op_flcr: + tcg_out_st(s, TCG_TYPE_I32, a0, TCG_REG_ESP, -8); + tcg_out_modrm_offset(s, OPC_LDMXCSR, 2, TCG_REG_ESP, -8); + break; + + OP_f32_f64(st80f): + tcg_out_fld_xmm(s, a0, dp); + tcg_out_fstp(s, a1); + break; + OP_f32_f64(ld80f): + tcg_out_fld(s, a1); + tcg_out_fstp_xmm(s, a0, dp); + break; + case INDEX_op_cvt32f_f64: + tcg_out_modrm(s, OPC_CVTSS2SD, a0, a1); + break; + OP_32_64(cvt32f): + tcg_out_modrm(s, OPC_CVTSS2SI + rexw, a0, a1); + break; + OP_f32_f64(cvt32i): + tcg_out_modrm(s, OPC_PXOR, a0, a0); + tcg_out_modrm(s, dp ? OPC_CVTSI2SD : OPC_CVTSI2SS, a0, a1); + break; + case INDEX_op_cvt64f_f32: + tcg_out_modrm(s, OPC_CVTSD2SS, a0, a1); + break; + OP_32_64(cvt64f): + tcg_out_modrm(s, OPC_CVTSD2SI + rexw, a0, a1); + break; + OP_f32_f64(cvt64i): + tcg_out_modrm(s, OPC_PXOR, a0, a0); + tcg_out_modrm(s, (dp ? OPC_CVTSI2SD : OPC_CVTSI2SS) | P_REXW, a0, a1); + break; + case INDEX_op_mov64f_i64: + case INDEX_op_mov64i_f64: + case INDEX_op_mov_f64: + dp = true; /* FALLTHRU */ + case INDEX_op_mov32f_i32: + case INDEX_op_mov32i_f32: + case INDEX_op_mov_f32: + tcg_out_mov(s, dp ? TCG_TYPE_F64 : TCG_TYPE_F32, a0, a1); + break; + OP_f32_f64(add): { + int mopc = dp ? OPC_ADDSD : OPC_ADDSS; + if (a0 == a1) { + tcg_out_modrm(s, mopc, a1, a2); + } else if (a0 == a2) { + tcg_out_modrm(s, mopc, a2, a1); + } else { + tcg_out_stash_xmm(s, a1); + tcg_out_modrm(s, mopc, a1, a2); + tcg_out_mov(s, dp ? TCG_TYPE_F64 : TCG_TYPE_F32, a0, a1); + tcg_out_unstash_xmm(s, a1); + /* FIXME: AVX,reg,stack */ + } + break; + } + OP_f32_f64(sub): { + int mopc = dp ? OPC_SUBSD : OPC_SUBSS; + if (a0 == a1) { + tcg_out_modrm(s, mopc, a1, a2); + } else { + tcg_out_stash_xmm(s, a1); + tcg_out_modrm(s, mopc, a1, a2); + tcg_out_mov(s, dp ? TCG_TYPE_F64 : TCG_TYPE_F32, a0, a1); + tcg_out_unstash_xmm(s, a1); + /* FIXME: AVX,reg,stack */ + } + break; + } + OP_f32_f64(mul): { + int mopc = dp ? OPC_MULSD : OPC_MULSS; + if (a0 == a1) { + tcg_out_modrm(s, mopc, a1, a2); + } else if (a0 == a2) { + tcg_out_modrm(s, mopc, a2, a1); + } else { + /* FIXME: Handle 3 unique operand variant (AVX,reg,stack) */ + assert(0); + } + break; + } + OP_f32_f64(div): { + int mopc = dp ? OPC_DIVSD : OPC_DIVSS; + if (a0 == a1) { + tcg_out_modrm(s, mopc, a1, a2); + } else { + tcg_out_stash_xmm(s, a1); + tcg_out_modrm(s, mopc, a1, a2); + tcg_out_mov(s, dp ? TCG_TYPE_F64 : TCG_TYPE_F32, a0, a1); + tcg_out_unstash_xmm(s, a1); + /* FIXME: AVX,reg,stack */ + } + break; + } + OP_f32_f64(abs): + assert(a0 == a1); /* FIXME: add mov */ + tcg_out_modrm_pool(s, OPC_PAND, a0); + new_pool_l2(s, R_386_PC32, s->code_ptr - 4, -4, + dp ? fabs_mask64 : fabs_mask32, 0); + break; + OP_f32_f64(chs): + assert(a0 == a1); /* FIXME: add mov */ + tcg_out_modrm_pool(s, OPC_PXOR, a0); + new_pool_l2(s, R_386_PC32, s->code_ptr - 4, -4, + dp ? fchs_mask64 : fchs_mask32, 0); + break; + OP_f32_f64(com): + tcg_out_modrm(s, dp ? OPC_COMISD : OPC_COMISS, a1, a2); + tcg_out8(s, OPC_PUSHF); + tcg_out_pop(s, a0); + break; + OP_f32_f64(sqrt): + tcg_out_modrm(s, dp ? OPC_SQRTSD : OPC_SQRTSS, a0, a1); + break; + OP_f32_f64(sin): + tcg_out_fld_xmm(s, a1, dp); + tcg_out_fsin(s); + tcg_out_fstp_xmm(s, a0, dp); + break; + OP_f32_f64(cos): + tcg_out_fld_xmm(s, a1, dp); + tcg_out_fcos(s); + tcg_out_fstp_xmm(s, a0, dp); + break; + case INDEX_op_br: tcg_out_jxx(s, JCC_JMP, arg_label(a0), 0); break; @@ -2918,6 +3135,55 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_goto_ptr: return C_O0_I1(r); + case INDEX_op_ld80f_f32: + case INDEX_op_ld80f_f64: + case INDEX_op_st80f_f32: + case INDEX_op_st80f_f64: + return C_O1_I1(x, L); + case INDEX_op_flcr: + return C_O0_I1(r); + case INDEX_op_mul_f32: + case INDEX_op_mul_f64: + case INDEX_op_div_f32: + case INDEX_op_div_f64: + case INDEX_op_add_f32: + case INDEX_op_add_f64: + case INDEX_op_sub_f32: + case INDEX_op_sub_f64: + return C_O1_I2(x, x, x); + case INDEX_op_cvt32i_f32: + case INDEX_op_cvt32i_f64: + case INDEX_op_cvt64i_f32: + case INDEX_op_cvt64i_f64: + case INDEX_op_mov32i_f32: + case INDEX_op_mov64i_f64: + return C_O1_I1(x, r); + case INDEX_op_mov_f32: + case INDEX_op_mov_f64: + case INDEX_op_abs_f32: + case INDEX_op_abs_f64: + case INDEX_op_chs_f32: + case INDEX_op_chs_f64: + case INDEX_op_sqrt_f32: + case INDEX_op_sqrt_f64: + case INDEX_op_cos_f32: + case INDEX_op_cos_f64: + case INDEX_op_sin_f32: + case INDEX_op_sin_f64: + case INDEX_op_cvt32f_f64: + case INDEX_op_cvt64f_f32: + return C_O1_I1(x, x); + case INDEX_op_com_f32: + case INDEX_op_com_f64: + return C_O1_I2(r, x, x); + case INDEX_op_cvt32f_i32: + case INDEX_op_cvt32f_i64: + case INDEX_op_cvt64f_i32: + case INDEX_op_cvt64f_i64: + case INDEX_op_mov32f_i32: + case INDEX_op_mov64f_i64: + return C_O1_I1(r, x); + case INDEX_op_ld8u_i32: case INDEX_op_ld8u_i64: case INDEX_op_ld8s_i32: @@ -3796,6 +4062,8 @@ static void tcg_target_init(TCGContext *s) if (TCG_TARGET_REG_BITS == 64) { tcg_target_available_regs[TCG_TYPE_I64] = ALL_GENERAL_REGS; } + tcg_target_available_regs[TCG_TYPE_F32] = ALL_VECTOR_REGS; + tcg_target_available_regs[TCG_TYPE_F64] = ALL_VECTOR_REGS; if (have_avx1) { tcg_target_available_regs[TCG_TYPE_V64] = ALL_VECTOR_REGS; tcg_target_available_regs[TCG_TYPE_V128] = ALL_VECTOR_REGS; diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h index b00a6da293..6cff3b8e3c 100644 --- a/tcg/i386/tcg-target.h +++ b/tcg/i386/tcg-target.h @@ -178,6 +178,8 @@ extern bool have_movbe; #define TCG_TARGET_HAS_qemu_st8_i32 1 #endif +#define TCG_TARGET_HAS_fpu (TCG_TARGET_REG_BITS == 64) + /* We do not support older SSE systems, only beginning with AVX1. */ #define TCG_TARGET_HAS_v64 have_avx1 #define TCG_TARGET_HAS_v128 have_avx1 diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index c754396575..1e74dd7b41 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -223,6 +223,8 @@ void tcg_gen_sari_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) void tcg_gen_brcond_i32(TCGCond cond, TCGv_i32 arg1, TCGv_i32 arg2, TCGLabel *l) { + gen_bb_epilogue(); + if (cond == TCG_COND_ALWAYS) { tcg_gen_br(l); } else if (cond != TCG_COND_NEVER) { @@ -233,6 +235,8 @@ void tcg_gen_brcond_i32(TCGCond cond, TCGv_i32 arg1, TCGv_i32 arg2, TCGLabel *l) void tcg_gen_brcondi_i32(TCGCond cond, TCGv_i32 arg1, int32_t arg2, TCGLabel *l) { + gen_bb_epilogue(); + if (cond == TCG_COND_ALWAYS) { tcg_gen_br(l); } else if (cond != TCG_COND_NEVER) { @@ -1446,6 +1450,8 @@ void tcg_gen_sari_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) void tcg_gen_brcond_i64(TCGCond cond, TCGv_i64 arg1, TCGv_i64 arg2, TCGLabel *l) { + gen_bb_epilogue(); + if (cond == TCG_COND_ALWAYS) { tcg_gen_br(l); } else if (cond != TCG_COND_NEVER) { @@ -1463,6 +1469,8 @@ void tcg_gen_brcond_i64(TCGCond cond, TCGv_i64 arg1, TCGv_i64 arg2, TCGLabel *l) void tcg_gen_brcondi_i64(TCGCond cond, TCGv_i64 arg1, int64_t arg2, TCGLabel *l) { + gen_bb_epilogue(); + if (TCG_TARGET_REG_BITS == 64) { tcg_gen_brcond_i64(cond, arg1, tcg_constant_i64(arg2), l); } else if (cond == TCG_COND_ALWAYS) { @@ -2713,6 +2721,9 @@ void tcg_gen_exit_tb(const TranslationBlock *tb, unsigned idx) * This requires coordination with targets that do not use * the translator_loop. */ + + gen_bb_epilogue(); + uintptr_t val = (uintptr_t)tcg_splitwx_to_rx((void *)tb) + idx; if (tb == NULL) { @@ -2734,6 +2745,8 @@ void tcg_gen_exit_tb(const TranslationBlock *tb, unsigned idx) void tcg_gen_goto_tb(unsigned idx) { + gen_bb_epilogue(); + /* We tested CF_NO_GOTO_TB in translator_use_goto_tb. */ tcg_debug_assert(!(tcg_ctx->tb_cflags & CF_NO_GOTO_TB)); /* We only support two chained exits. */ @@ -2756,6 +2769,8 @@ void tcg_gen_lookup_and_goto_ptr(void) return; } + gen_bb_epilogue(); + plugin_gen_disable_mem_helpers(); ptr = tcg_temp_new_ptr(); gen_helper_lookup_tb_ptr(ptr, cpu_env); diff --git a/tcg/tcg.c b/tcg/tcg.c index 712b17551d..76159db04f 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1456,6 +1456,49 @@ bool tcg_op_supported(TCGOpcode op) case INDEX_op_cmpsel_vec: return have_vec && TCG_TARGET_HAS_cmpsel_vec; + case INDEX_op_flcr: + case INDEX_op_ld80f_f32: + case INDEX_op_ld80f_f64: + case INDEX_op_st80f_f32: + case INDEX_op_st80f_f64: + case INDEX_op_abs_f32: + case INDEX_op_abs_f64: + case INDEX_op_add_f32: + case INDEX_op_add_f64: + case INDEX_op_chs_f32: + case INDEX_op_chs_f64: + case INDEX_op_com_f32: + case INDEX_op_com_f64: + case INDEX_op_cos_f32: + case INDEX_op_cos_f64: + case INDEX_op_cvt32f_f64: + case INDEX_op_cvt32f_i32: + case INDEX_op_cvt32f_i64: + case INDEX_op_cvt32i_f32: + case INDEX_op_cvt32i_f64: + case INDEX_op_cvt64f_f32: + case INDEX_op_cvt64f_i32: + case INDEX_op_cvt64f_i64: + case INDEX_op_cvt64i_f32: + case INDEX_op_cvt64i_f64: + case INDEX_op_div_f32: + case INDEX_op_div_f64: + case INDEX_op_mov32f_i32: + case INDEX_op_mov32i_f32: + case INDEX_op_mov64f_i64: + case INDEX_op_mov64i_f64: + case INDEX_op_mov_f32: + case INDEX_op_mov_f64: + case INDEX_op_mul_f32: + case INDEX_op_mul_f64: + case INDEX_op_sin_f32: + case INDEX_op_sin_f64: + case INDEX_op_sqrt_f32: + case INDEX_op_sqrt_f64: + case INDEX_op_sub_f32: + case INDEX_op_sub_f64: + return TCG_TARGET_HAS_fpu; + default: tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS); return true; @@ -1472,6 +1515,8 @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) const TCGHelperInfo *info; TCGOp *op; + gen_bb_epilogue(); + info = g_hash_table_lookup(helper_table, (gpointer)func); typemask = info->typemask; @@ -1721,6 +1766,12 @@ static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size, snprintf(buf, buf_size, "$0x%" PRIx64, ts->val); break; #endif + case TCG_TYPE_F32: + snprintf(buf, buf_size, "$%f", *(float *)&ts->val); + break; + case TCG_TYPE_F64: + snprintf(buf, buf_size, "$%g", *(double *)&ts->val); + break; case TCG_TYPE_V64: case TCG_TYPE_V128: case TCG_TYPE_V256: @@ -3058,9 +3109,11 @@ static void temp_allocate_frame(TCGContext *s, TCGTemp *ts) switch (ts->type) { case TCG_TYPE_I32: + case TCG_TYPE_F32: size = align = 4; break; case TCG_TYPE_I64: + case TCG_TYPE_F64: case TCG_TYPE_V64: size = align = 8; break; @@ -3268,6 +3321,8 @@ static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs, preferred_regs, ts->indirect_base); if (ts->type <= TCG_TYPE_I64) { tcg_out_movi(s, ts->type, reg, ts->val); + } else if (ts->type == TCG_TYPE_F32 || ts->type == TCG_TYPE_F64) { + assert(0); /* FIXME */ } else { uint64_t val = ts->val; MemOp vece = MO_64;