diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 61e44cb859..da6eb67cfb 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -4819,7 +4819,11 @@ static void x86_cpu_reset(CPUState *s) memset(env->mtrr_fixed, 0, sizeof(env->mtrr_fixed)); env->interrupt_injected = -1; - env->exception_injected = -1; + env->exception_nr = -1; + env->exception_pending = 0; + env->exception_injected = 0; + env->exception_has_payload = false; + env->exception_payload = 0; env->nmi_injected = false; #if !defined(CONFIG_USER_ONLY) /* We hard-wire the BSP to the first CPU. */ diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 17116ef954..93345792f4 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1348,10 +1348,14 @@ typedef struct CPUX86State { /* For KVM */ uint32_t mp_state; - int32_t exception_injected; + int32_t exception_nr; int32_t interrupt_injected; uint8_t soft_interrupt; + uint8_t exception_pending; + uint8_t exception_injected; uint8_t has_error_code; + uint8_t exception_has_payload; + uint64_t exception_payload; uint32_t ins_len; uint32_t sipi_vector; bool tsc_valid; diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c index 2751c8125c..dc4bb63536 100644 --- a/target/i386/hvf/hvf.c +++ b/target/i386/hvf/hvf.c @@ -605,7 +605,9 @@ static void hvf_store_events(CPUState *cpu, uint32_t ins_len, uint64_t idtvec_in X86CPU *x86_cpu = X86_CPU(cpu); CPUX86State *env = &x86_cpu->env; - env->exception_injected = -1; + env->exception_nr = -1; + env->exception_pending = 0; + env->exception_injected = 0; env->interrupt_injected = -1; env->nmi_injected = false; if (idtvec_info & VMCS_IDT_VEC_VALID) { @@ -619,7 +621,8 @@ static void hvf_store_events(CPUState *cpu, uint32_t ins_len, uint64_t idtvec_in break; case VMCS_IDT_VEC_HWEXCEPTION: case VMCS_IDT_VEC_SWEXCEPTION: - env->exception_injected = idtvec_info & VMCS_IDT_VEC_VECNUM; + env->exception_nr = idtvec_info & VMCS_IDT_VEC_VECNUM; + env->exception_injected = 1; break; case VMCS_IDT_VEC_PRIV_SWEXCEPTION: default: @@ -912,7 +915,8 @@ int hvf_vcpu_exec(CPUState *cpu) macvm_set_rip(cpu, rip + ins_len); break; case VMX_REASON_VMCALL: - env->exception_injected = EXCP0D_GPF; + env->exception_nr = EXCP0D_GPF; + env->exception_injected = 1; env->has_error_code = true; env->error_code = 0; break; diff --git a/target/i386/hvf/x86hvf.c b/target/i386/hvf/x86hvf.c index df8e946fbc..e0ea02d631 100644 --- a/target/i386/hvf/x86hvf.c +++ b/target/i386/hvf/x86hvf.c @@ -362,8 +362,8 @@ bool hvf_inject_interrupts(CPUState *cpu_state) if (env->interrupt_injected != -1) { vector = env->interrupt_injected; intr_type = VMCS_INTR_T_SWINTR; - } else if (env->exception_injected != -1) { - vector = env->exception_injected; + } else if (env->exception_nr != -1) { + vector = env->exception_nr; if (vector == EXCP03_INT3 || vector == EXCP04_INTO) { intr_type = VMCS_INTR_T_SWEXCEPTION; } else { diff --git a/target/i386/kvm.c b/target/i386/kvm.c index e924663f32..c931e9dd7b 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -104,6 +104,7 @@ static uint32_t num_architectural_pmu_fixed_counters; static int has_xsave; static int has_xcrs; static int has_pit_state2; +static int has_exception_payload; static bool has_msr_mcg_ext_ctl; @@ -584,15 +585,56 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr) /* Hope we are lucky for AO MCE */ } +static void kvm_reset_exception(CPUX86State *env) +{ + env->exception_nr = -1; + env->exception_pending = 0; + env->exception_injected = 0; + env->exception_has_payload = false; + env->exception_payload = 0; +} + +static void kvm_queue_exception(CPUX86State *env, + int32_t exception_nr, + uint8_t exception_has_payload, + uint64_t exception_payload) +{ + assert(env->exception_nr == -1); + assert(!env->exception_pending); + assert(!env->exception_injected); + assert(!env->exception_has_payload); + + env->exception_nr = exception_nr; + + if (has_exception_payload) { + env->exception_pending = 1; + + env->exception_has_payload = exception_has_payload; + env->exception_payload = exception_payload; + } else { + env->exception_injected = 1; + + if (exception_nr == EXCP01_DB) { + assert(exception_has_payload); + env->dr[6] = exception_payload; + } else if (exception_nr == EXCP0E_PAGE) { + assert(exception_has_payload); + env->cr[2] = exception_payload; + } else { + assert(!exception_has_payload); + } + } +} + static int kvm_inject_mce_oldstyle(X86CPU *cpu) { CPUX86State *env = &cpu->env; - if (!kvm_has_vcpu_events() && env->exception_injected == EXCP12_MCHK) { + if (!kvm_has_vcpu_events() && env->exception_nr == EXCP12_MCHK) { unsigned int bank, bank_num = env->mcg_cap & 0xff; struct kvm_x86_mce mce; - env->exception_injected = -1; + kvm_reset_exception(env); /* * There must be at least one bank in use if an MCE is pending. @@ -1943,6 +1985,16 @@ int kvm_arch_init(MachineState *ms, KVMState *s) hv_vpindex_settable = kvm_check_extension(s, KVM_CAP_HYPERV_VP_INDEX); + has_exception_payload = kvm_check_extension(s, KVM_CAP_EXCEPTION_PAYLOAD); + if (has_exception_payload) { + ret = kvm_vm_enable_cap(s, KVM_CAP_EXCEPTION_PAYLOAD, 0, true); + if (ret < 0) { + error_report("kvm: Failed to enable exception payload cap: %s", + strerror(-ret)); + return ret; + } + } + ret = kvm_get_supported_msrs(s); if (ret < 0) { return ret; @@ -3253,8 +3305,16 @@ static int kvm_put_vcpu_events(X86CPU *cpu, int level) return 0; } - events.exception.injected = (env->exception_injected >= 0); - events.exception.nr = env->exception_injected; + events.flags = 0; + + if (has_exception_payload) { + events.flags |= KVM_VCPUEVENT_VALID_PAYLOAD; + events.exception.pending = env->exception_pending; + events.exception_has_payload = env->exception_has_payload; + events.exception_payload = env->exception_payload; + } + events.exception.nr = env->exception_nr; + events.exception.injected = env->exception_injected; events.exception.has_error_code = env->has_error_code; events.exception.error_code = env->error_code; @@ -3267,7 +3327,6 @@ static int kvm_put_vcpu_events(X86CPU *cpu, int level) events.nmi.masked = !!(env->hflags2 & HF2_NMI_MASK); events.sipi_vector = env->sipi_vector; - events.flags = 0; if (has_msr_smbase) { events.smi.smm = !!(env->hflags & HF_SMM_MASK); @@ -3317,8 +3376,19 @@ static int kvm_get_vcpu_events(X86CPU *cpu) if (ret < 0) { return ret; } - env->exception_injected = - events.exception.injected ? events.exception.nr : -1; + + if (events.flags & KVM_VCPUEVENT_VALID_PAYLOAD) { + env->exception_pending = events.exception.pending; + env->exception_has_payload = events.exception_has_payload; + env->exception_payload = events.exception_payload; + } else { + env->exception_pending = 0; + env->exception_has_payload = false; + } + env->exception_injected = events.exception.injected; + env->exception_nr = + (env->exception_pending || env->exception_injected) ? + events.exception.nr : -1; env->has_error_code = events.exception.has_error_code; env->error_code = events.exception.error_code; @@ -3370,12 +3440,12 @@ static int kvm_guest_debug_workarounds(X86CPU *cpu) unsigned long reinject_trap = 0; if (!kvm_has_vcpu_events()) { - if (env->exception_injected == EXCP01_DB) { + if (env->exception_nr == EXCP01_DB) { reinject_trap = KVM_GUESTDBG_INJECT_DB; } else if (env->exception_injected == EXCP03_INT3) { reinject_trap = KVM_GUESTDBG_INJECT_BP; } - env->exception_injected = -1; + kvm_reset_exception(env); } /* @@ -3751,13 +3821,13 @@ int kvm_arch_process_async_events(CPUState *cs) kvm_cpu_synchronize_state(cs); - if (env->exception_injected == EXCP08_DBLE) { + if (env->exception_nr == EXCP08_DBLE) { /* this means triple fault */ qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); cs->exit_request = 1; return 0; } - env->exception_injected = EXCP12_MCHK; + kvm_queue_exception(env, EXCP12_MCHK, 0, 0); env->has_error_code = 0; cs->halted = 0; @@ -3972,14 +4042,13 @@ static int kvm_handle_debug(X86CPU *cpu, } if (ret == 0) { cpu_synchronize_state(cs); - assert(env->exception_injected == -1); + assert(env->exception_nr == -1); /* pass to guest */ - env->exception_injected = arch_info->exception; + kvm_queue_exception(env, arch_info->exception, + arch_info->exception == EXCP01_DB, + arch_info->dr6); env->has_error_code = 0; - if (arch_info->exception == EXCP01_DB) { - env->dr[6] = arch_info->dr6; - } } return ret; diff --git a/target/i386/machine.c b/target/i386/machine.c index a6afdf8720..fc49e5ad94 100644 --- a/target/i386/machine.c +++ b/target/i386/machine.c @@ -240,6 +240,41 @@ static int cpu_pre_save(void *opaque) } #endif + /* + * When vCPU is running L2 and exception is still pending, + * it can potentially be intercepted by L1 hypervisor. + * In contrast to an injected exception which cannot be + * intercepted anymore. + * + * Furthermore, when a L2 exception is intercepted by L1 + * hypervisor, it's exception payload (CR2/DR6 on #PF/#DB) + * should not be set yet in the respective vCPU register. + * Thus, in case an exception is pending, it is + * important to save the exception payload seperately. + * + * Therefore, if an exception is not in a pending state + * or vCPU is not in guest-mode, it is not important to + * distinguish between a pending and injected exception + * and we don't need to store seperately the exception payload. + * + * In order to preserve better backwards-compatabile migration, + * convert a pending exception to an injected exception in + * case it is not important to distingiush between them + * as described above. + */ + if (env->exception_pending && !(env->hflags & HF_GUEST_MASK)) { + env->exception_pending = 0; + env->exception_injected = 1; + + if (env->exception_has_payload) { + if (env->exception_nr == EXCP01_DB) { + env->dr[6] = env->exception_payload; + } else if (env->exception_nr == EXCP0E_PAGE) { + env->cr[2] = env->exception_payload; + } + } + } + return 0; } @@ -297,6 +332,23 @@ static int cpu_post_load(void *opaque, int version_id) } #endif + /* + * There are cases that we can get valid exception_nr with both + * exception_pending and exception_injected being cleared. + * This can happen in one of the following scenarios: + * 1) Source is older QEMU without KVM_CAP_EXCEPTION_PAYLOAD support. + * 2) Source is running on kernel without KVM_CAP_EXCEPTION_PAYLOAD support. + * 3) "cpu/exception_info" subsection not sent because there is no exception + * pending or guest wasn't running L2 (See comment in cpu_pre_save()). + * + * In those cases, we can just deduce that a valid exception_nr means + * we can treat the exception as already injected. + */ + if ((env->exception_nr != -1) && + !env->exception_pending && !env->exception_injected) { + env->exception_injected = 1; + } + env->fpstt = (env->fpus_vmstate >> 11) & 7; env->fpus = env->fpus_vmstate & ~0x3800; env->fptag_vmstate ^= 0xff; @@ -342,6 +394,35 @@ static bool steal_time_msr_needed(void *opaque) return cpu->env.steal_time_msr != 0; } +static bool exception_info_needed(void *opaque) +{ + X86CPU *cpu = opaque; + CPUX86State *env = &cpu->env; + + /* + * It is important to save exception-info only in case + * we need to distingiush between a pending and injected + * exception. Which is only required in case there is a + * pending exception and vCPU is running L2. + * For more info, refer to comment in cpu_pre_save(). + */ + return env->exception_pending && (env->hflags & HF_GUEST_MASK); +} + +static const VMStateDescription vmstate_exception_info = { + .name = "cpu/exception_info", + .version_id = 1, + .minimum_version_id = 1, + .needed = exception_info_needed, + .fields = (VMStateField[]) { + VMSTATE_UINT8(env.exception_pending, X86CPU), + VMSTATE_UINT8(env.exception_injected, X86CPU), + VMSTATE_UINT8(env.exception_has_payload, X86CPU), + VMSTATE_UINT64(env.exception_payload, X86CPU), + VMSTATE_END_OF_LIST() + } +}; + static const VMStateDescription vmstate_steal_time_msr = { .name = "cpu/steal_time_msr", .version_id = 1, @@ -1251,7 +1332,7 @@ VMStateDescription vmstate_x86_cpu = { VMSTATE_INT32(env.interrupt_injected, X86CPU), VMSTATE_UINT32(env.mp_state, X86CPU), VMSTATE_UINT64(env.tsc, X86CPU), - VMSTATE_INT32(env.exception_injected, X86CPU), + VMSTATE_INT32(env.exception_nr, X86CPU), VMSTATE_UINT8(env.soft_interrupt, X86CPU), VMSTATE_UINT8(env.nmi_injected, X86CPU), VMSTATE_UINT8(env.nmi_pending, X86CPU), @@ -1275,6 +1356,7 @@ VMStateDescription vmstate_x86_cpu = { /* The above list is not sorted /wrt version numbers, watch out! */ }, .subsections = (const VMStateDescription*[]) { + &vmstate_exception_info, &vmstate_async_pf_msr, &vmstate_pv_eoi_msr, &vmstate_steal_time_msr,