Fix race in cpu_exec_step_atomic.

Work around compile failure with -fno-inine.
 Expand tcg/arm epilogue inline.
 Adjustments to the default code gen buffer size.
 -----BEGIN PGP SIGNATURE-----
 
 iQFRBAABCgA7FiEEekgeeIaLTbaoWgXAZN846K9+IV8FAl5ZyNcdHHJpY2hhcmQu
 aGVuZGVyc29uQGxpbmFyby5vcmcACgkQZN846K9+IV+K7ggAuTNt3TKyn59OqJJY
 u3iNNpNmIxtqIJQEJ6HuapPzGdc9pzmgOXC4I6zXxkebKiybp1BWYe82yCysrjwH
 C1al0ukw9kmaUdBBfiFZ7BqzclGCvIA8CtagP/26oQMAAkBaMbRTmKBvRsb20uNC
 aGKmeVYLWeeArHjVx5tNMbfOHKm8vmpNt8tIVNvj4Uw2Vu09T0Mvqa6LppOYhjJj
 UvNpq+qYePeNbe57FomYLaVdCZvrwe+lHdh6TiC3fFoetEidojA+vncVS/ksZ6rX
 EnP74AVYgz+Qa2+pN+9aj3wD4yNqQNvShvXiPY74eqwztFWYwWljUietsUTnW216
 F9p6LA==
 =Rj1p
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/rth/tags/pull-tcg-20200228' into staging

Fix race in cpu_exec_step_atomic.
Work around compile failure with -fno-inine.
Expand tcg/arm epilogue inline.
Adjustments to the default code gen buffer size.

# gpg: Signature made Sat 29 Feb 2020 02:13:43 GMT
# gpg:                using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F
# gpg:                issuer "richard.henderson@linaro.org"
# gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [full]
# Primary key fingerprint: 7A48 1E78 868B 4DB6 A85A  05C0 64DF 38E8 AF7E 215F

* remotes/rth/tags/pull-tcg-20200228:
  accel/tcg: increase default code gen buffer size for 64 bit
  accel/tcg: only USE_STATIC_CODE_GEN_BUFFER on 32 bit hosts
  accel/tcg: remove link between guest ram and TCG cache size
  accel/tcg: use units.h for defining code gen buffer sizes
  tcg/arm: Expand epilogue inline
  tcg/arm: Split out tcg_out_epilogue
  compiler.h: Don't use compile-time assert when __NO_INLINE__ is defined
  accel/tcg: fix race in cpu_exec_step_atomic (bug 1863025)

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2020-03-02 10:45:46 +00:00
commit 9f1750ed68
4 changed files with 60 additions and 53 deletions

View File

@ -240,6 +240,8 @@ void cpu_exec_step_atomic(CPUState *cpu)
uint32_t cf_mask = cflags & CF_HASH_MASK; uint32_t cf_mask = cflags & CF_HASH_MASK;
if (sigsetjmp(cpu->jmp_env, 0) == 0) { if (sigsetjmp(cpu->jmp_env, 0) == 0) {
start_exclusive();
tb = tb_lookup__cpu_state(cpu, &pc, &cs_base, &flags, cf_mask); tb = tb_lookup__cpu_state(cpu, &pc, &cs_base, &flags, cf_mask);
if (tb == NULL) { if (tb == NULL) {
mmap_lock(); mmap_lock();
@ -247,8 +249,6 @@ void cpu_exec_step_atomic(CPUState *cpu)
mmap_unlock(); mmap_unlock();
} }
start_exclusive();
/* Since we got here, we know that parallel_cpus must be true. */ /* Since we got here, we know that parallel_cpus must be true. */
parallel_cpus = false; parallel_cpus = false;
cc->cpu_exec_enter(cpu); cc->cpu_exec_enter(cpu);
@ -271,14 +271,15 @@ void cpu_exec_step_atomic(CPUState *cpu)
qemu_plugin_disable_mem_helpers(cpu); qemu_plugin_disable_mem_helpers(cpu);
} }
if (cpu_in_exclusive_context(cpu)) {
/* We might longjump out of either the codegen or the /*
* execution, so must make sure we only end the exclusive * As we start the exclusive region before codegen we must still
* region if we started it. * be in the region if we longjump out of either the codegen or
*/ * the execution.
parallel_cpus = true; */
end_exclusive(); g_assert(cpu_in_exclusive_context(cpu));
} parallel_cpus = true;
end_exclusive();
} }
struct tb_desc { struct tb_desc {

View File

@ -18,6 +18,7 @@
*/ */
#include "qemu/osdep.h" #include "qemu/osdep.h"
#include "qemu/units.h"
#include "qemu-common.h" #include "qemu-common.h"
#define NO_CPU_IO_DEFS #define NO_CPU_IO_DEFS
@ -891,43 +892,61 @@ static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1,
} }
} }
#if defined(CONFIG_USER_ONLY)
/* Currently it is not recommended to allocate big chunks of data in
user mode. It will change when a dedicated libc will be used. */
/* ??? 64-bit hosts ought to have no problem mmaping data outside the
region in which the guest needs to run. Revisit this. */
#define USE_STATIC_CODE_GEN_BUFFER
#endif
/* Minimum size of the code gen buffer. This number is randomly chosen, /* Minimum size of the code gen buffer. This number is randomly chosen,
but not so small that we can't have a fair number of TB's live. */ but not so small that we can't have a fair number of TB's live. */
#define MIN_CODE_GEN_BUFFER_SIZE (1024u * 1024) #define MIN_CODE_GEN_BUFFER_SIZE (1 * MiB)
/* Maximum size of the code gen buffer we'd like to use. Unless otherwise /* Maximum size of the code gen buffer we'd like to use. Unless otherwise
indicated, this is constrained by the range of direct branches on the indicated, this is constrained by the range of direct branches on the
host cpu, as used by the TCG implementation of goto_tb. */ host cpu, as used by the TCG implementation of goto_tb. */
#if defined(__x86_64__) #if defined(__x86_64__)
# define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024) # define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB)
#elif defined(__sparc__) #elif defined(__sparc__)
# define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024) # define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB)
#elif defined(__powerpc64__) #elif defined(__powerpc64__)
# define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024) # define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB)
#elif defined(__powerpc__) #elif defined(__powerpc__)
# define MAX_CODE_GEN_BUFFER_SIZE (32u * 1024 * 1024) # define MAX_CODE_GEN_BUFFER_SIZE (32 * MiB)
#elif defined(__aarch64__) #elif defined(__aarch64__)
# define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024) # define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB)
#elif defined(__s390x__) #elif defined(__s390x__)
/* We have a +- 4GB range on the branches; leave some slop. */ /* We have a +- 4GB range on the branches; leave some slop. */
# define MAX_CODE_GEN_BUFFER_SIZE (3ul * 1024 * 1024 * 1024) # define MAX_CODE_GEN_BUFFER_SIZE (3 * GiB)
#elif defined(__mips__) #elif defined(__mips__)
/* We have a 256MB branch region, but leave room to make sure the /* We have a 256MB branch region, but leave room to make sure the
main executable is also within that region. */ main executable is also within that region. */
# define MAX_CODE_GEN_BUFFER_SIZE (128ul * 1024 * 1024) # define MAX_CODE_GEN_BUFFER_SIZE (128 * MiB)
#else #else
# define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1) # define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1)
#endif #endif
#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32u * 1024 * 1024) #if TCG_TARGET_REG_BITS == 32
#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32 * MiB)
#ifdef CONFIG_USER_ONLY
/*
* For user mode on smaller 32 bit systems we may run into trouble
* allocating big chunks of data in the right place. On these systems
* we utilise a static code generation buffer directly in the binary.
*/
#define USE_STATIC_CODE_GEN_BUFFER
#endif
#else /* TCG_TARGET_REG_BITS == 64 */
#ifdef CONFIG_USER_ONLY
/*
* As user-mode emulation typically means running multiple instances
* of the translator don't go too nuts with our default code gen
* buffer lest we make things too hard for the OS.
*/
#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (128 * MiB)
#else
/*
* We expect most system emulation to run one or two guests per host.
* Users running large scale system emulation may want to tweak their
* runtime setup via the tb-size control on the command line.
*/
#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (1 * GiB)
#endif
#endif
#define DEFAULT_CODE_GEN_BUFFER_SIZE \ #define DEFAULT_CODE_GEN_BUFFER_SIZE \
(DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \ (DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \
@ -937,15 +956,7 @@ static inline size_t size_code_gen_buffer(size_t tb_size)
{ {
/* Size the buffer. */ /* Size the buffer. */
if (tb_size == 0) { if (tb_size == 0) {
#ifdef USE_STATIC_CODE_GEN_BUFFER
tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE; tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
#else
/* ??? Needs adjustments. */
/* ??? If we relax the requirement that CONFIG_USER_ONLY use the
static buffer, we could size this on RESERVED_VA, on the text
segment size of the executable, or continue to use the default. */
tb_size = (unsigned long)(ram_size / 4);
#endif
} }
if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) { if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) {
tb_size = MIN_CODE_GEN_BUFFER_SIZE; tb_size = MIN_CODE_GEN_BUFFER_SIZE;

View File

@ -236,7 +236,7 @@
* supports QEMU_ERROR, this will be reported at compile time; otherwise * supports QEMU_ERROR, this will be reported at compile time; otherwise
* this will be reported at link time due to the missing symbol. * this will be reported at link time due to the missing symbol.
*/ */
#ifdef __OPTIMIZE__ #if defined(__OPTIMIZE__) && !defined(__NO_INLINE__)
extern void QEMU_NORETURN QEMU_ERROR("code path is reachable") extern void QEMU_NORETURN QEMU_ERROR("code path is reachable")
qemu_build_not_reached(void); qemu_build_not_reached(void);
#else #else

View File

@ -1745,7 +1745,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
#endif #endif
} }
static tcg_insn_unit *tb_ret_addr; static void tcg_out_epilogue(TCGContext *s);
static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
const TCGArg *args, const int *const_args) const TCGArg *args, const int *const_args)
@ -1755,14 +1755,8 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
switch (opc) { switch (opc) {
case INDEX_op_exit_tb: case INDEX_op_exit_tb:
/* Reuse the zeroing that exists for goto_ptr. */ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, args[0]);
a0 = args[0]; tcg_out_epilogue(s);
if (a0 == 0) {
tcg_out_goto(s, COND_AL, s->code_gen_epilogue);
} else {
tcg_out_movi32(s, COND_AL, TCG_REG_R0, args[0]);
tcg_out_goto(s, COND_AL, tb_ret_addr);
}
break; break;
case INDEX_op_goto_tb: case INDEX_op_goto_tb:
{ {
@ -2284,19 +2278,17 @@ static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
+ TCG_TARGET_STACK_ALIGN - 1) \ + TCG_TARGET_STACK_ALIGN - 1) \
& -TCG_TARGET_STACK_ALIGN) & -TCG_TARGET_STACK_ALIGN)
#define STACK_ADDEND (FRAME_SIZE - PUSH_SIZE)
static void tcg_target_qemu_prologue(TCGContext *s) static void tcg_target_qemu_prologue(TCGContext *s)
{ {
int stack_addend;
/* Calling convention requires us to save r4-r11 and lr. */ /* Calling convention requires us to save r4-r11 and lr. */
/* stmdb sp!, { r4 - r11, lr } */ /* stmdb sp!, { r4 - r11, lr } */
tcg_out32(s, (COND_AL << 28) | 0x092d4ff0); tcg_out32(s, (COND_AL << 28) | 0x092d4ff0);
/* Reserve callee argument and tcg temp space. */ /* Reserve callee argument and tcg temp space. */
stack_addend = FRAME_SIZE - PUSH_SIZE;
tcg_out_dat_rI(s, COND_AL, ARITH_SUB, TCG_REG_CALL_STACK, tcg_out_dat_rI(s, COND_AL, ARITH_SUB, TCG_REG_CALL_STACK,
TCG_REG_CALL_STACK, stack_addend, 1); TCG_REG_CALL_STACK, STACK_ADDEND, 1);
tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE, tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
CPU_TEMP_BUF_NLONGS * sizeof(long)); CPU_TEMP_BUF_NLONGS * sizeof(long));
@ -2310,11 +2302,14 @@ static void tcg_target_qemu_prologue(TCGContext *s)
*/ */
s->code_gen_epilogue = s->code_ptr; s->code_gen_epilogue = s->code_ptr;
tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, 0); tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, 0);
tcg_out_epilogue(s);
}
/* TB epilogue */ static void tcg_out_epilogue(TCGContext *s)
tb_ret_addr = s->code_ptr; {
/* Release local stack frame. */
tcg_out_dat_rI(s, COND_AL, ARITH_ADD, TCG_REG_CALL_STACK, tcg_out_dat_rI(s, COND_AL, ARITH_ADD, TCG_REG_CALL_STACK,
TCG_REG_CALL_STACK, stack_addend, 1); TCG_REG_CALL_STACK, STACK_ADDEND, 1);
/* ldmia sp!, { r4 - r11, pc } */ /* ldmia sp!, { r4 - r11, pc } */
tcg_out32(s, (COND_AL << 28) | 0x08bd8ff0); tcg_out32(s, (COND_AL << 28) | 0x08bd8ff0);