From 15eafc2e602ff8c37c6e132eb8c63fec8fc17175 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 17 Dec 2015 17:16:08 +0100 Subject: [PATCH] kvm: x86: add support for KVM_CAP_SPLIT_IRQCHIP This patch adds support for split IRQ chip mode. When KVM_CAP_SPLIT_IRQCHIP is enabled: 1.) The PIC, PIT, and IOAPIC are implemented in userspace while the LAPIC is implemented by KVM. 2.) The software IOAPIC delivers interrupts to the KVM LAPIC via kvm_set_irq. Interrupt delivery is configured via the MSI routing table, for which routes are reserved in target-i386/kvm.c then configured in hw/intc/ioapic.c 3.) KVM delivers IOAPIC EOIs via a new exit KVM_EXIT_IOAPIC_EOI, which is handled in target-i386/kvm.c and relayed to the software IOAPIC via ioapic_eoi_broadcast. Signed-off-by: Matt Gingell Signed-off-by: Paolo Bonzini --- hw/i386/pc.c | 5 ++-- hw/i386/pc_piix.c | 4 +-- hw/intc/ioapic.c | 68 ++++++++++++++++++++++++++++++++++++++++-- include/hw/i386/pc.h | 13 ++++++++ include/sysemu/kvm.h | 6 +++- kvm-all.c | 10 +++++-- stubs/kvm.c | 2 +- target-arm/kvm.c | 8 ++++- target-i386/cpu.c | 2 +- target-i386/kvm.c | 42 ++++++++++++++++++++++++-- target-i386/kvm_i386.h | 2 ++ 11 files changed, 148 insertions(+), 14 deletions(-) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 5e20e07b6f..426424a721 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -65,6 +65,7 @@ #include "hw/mem/pc-dimm.h" #include "qapi/visitor.h" #include "qapi-visit.h" +#include "qom/cpu.h" /* debug PC/ISA interrupts */ //#define DEBUG_IRQ @@ -1517,7 +1518,7 @@ void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi, qemu_register_boot_set(pc_boot_set, *rtc_state); if (!xen_enabled()) { - if (kvm_irqchip_in_kernel()) { + if (kvm_pit_in_kernel()) { pit = kvm_pit_init(isa_bus, 0x40); } else { pit = pit_init(isa_bus, 0x40, pit_isa_irq, pit_alt_irq); @@ -1592,7 +1593,7 @@ void ioapic_init_gsi(GSIState *gsi_state, const char *parent_name) SysBusDevice *d; unsigned int i; - if (kvm_irqchip_in_kernel()) { + if (kvm_ioapic_in_kernel()) { dev = qdev_create(NULL, "kvm-ioapic"); } else { dev = qdev_create(NULL, "ioapic"); diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 6a498a20d3..319497e966 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -182,7 +182,7 @@ static void pc_init1(MachineState *machine, } gsi_state = g_malloc0(sizeof(*gsi_state)); - if (kvm_irqchip_in_kernel()) { + if (kvm_ioapic_in_kernel()) { kvm_pc_setup_irq_routing(pci_enabled); gsi = qemu_allocate_irqs(kvm_pc_gsi_handler, gsi_state, GSI_NUM_PINS); @@ -206,7 +206,7 @@ static void pc_init1(MachineState *machine, } isa_bus_irqs(isa_bus, gsi); - if (kvm_irqchip_in_kernel()) { + if (kvm_pic_in_kernel()) { i8259 = kvm_i8259_init(isa_bus); } else if (xen_enabled()) { i8259 = xen_interrupt_controller_init(); diff --git a/hw/intc/ioapic.c b/hw/intc/ioapic.c index de2dd4ba8e..e4bfaeade2 100644 --- a/hw/intc/ioapic.c +++ b/hw/intc/ioapic.c @@ -25,6 +25,8 @@ #include "hw/i386/pc.h" #include "hw/i386/ioapic.h" #include "hw/i386/ioapic_internal.h" +#include "include/hw/pci/msi.h" +#include "sysemu/kvm.h" //#define DEBUG_IOAPIC @@ -35,6 +37,10 @@ #define DPRINTF(fmt, ...) #endif +#define APIC_DELIVERY_MODE_SHIFT 8 +#define APIC_POLARITY_SHIFT 14 +#define APIC_TRIG_MODE_SHIFT 15 + static IOAPICCommonState *ioapics[MAX_IOAPICS]; /* global variable from ioapic_common.c */ @@ -54,6 +60,8 @@ static void ioapic_service(IOAPICCommonState *s) for (i = 0; i < IOAPIC_NUM_PINS; i++) { mask = 1 << i; if (s->irr & mask) { + int coalesce = 0; + entry = s->ioredtbl[i]; if (!(entry & IOAPIC_LVT_MASKED)) { trig_mode = ((entry >> IOAPIC_LVT_TRIGGER_MODE_SHIFT) & 1); @@ -64,6 +72,7 @@ static void ioapic_service(IOAPICCommonState *s) if (trig_mode == IOAPIC_TRIGGER_EDGE) { s->irr &= ~mask; } else { + coalesce = s->ioredtbl[i] & IOAPIC_LVT_REMOTE_IRR; s->ioredtbl[i] |= IOAPIC_LVT_REMOTE_IRR; } if (delivery_mode == IOAPIC_DM_EXTINT) { @@ -71,8 +80,23 @@ static void ioapic_service(IOAPICCommonState *s) } else { vector = entry & IOAPIC_VECTOR_MASK; } - apic_deliver_irq(dest, dest_mode, delivery_mode, - vector, trig_mode); +#ifdef CONFIG_KVM + if (kvm_irqchip_is_split()) { + if (trig_mode == IOAPIC_TRIGGER_EDGE) { + kvm_set_irq(kvm_state, i, 1); + kvm_set_irq(kvm_state, i, 0); + } else { + if (!coalesce) { + kvm_set_irq(kvm_state, i, 1); + } + } + continue; + } +#else + (void)coalesce; +#endif + apic_deliver_irq(dest, dest_mode, delivery_mode, vector, + trig_mode); } } } @@ -116,6 +140,44 @@ static void ioapic_set_irq(void *opaque, int vector, int level) } } +static void ioapic_update_kvm_routes(IOAPICCommonState *s) +{ +#ifdef CONFIG_KVM + int i; + + if (kvm_irqchip_is_split()) { + for (i = 0; i < IOAPIC_NUM_PINS; i++) { + uint64_t entry = s->ioredtbl[i]; + uint8_t trig_mode; + uint8_t delivery_mode; + uint8_t dest; + uint8_t dest_mode; + uint64_t pin_polarity; + MSIMessage msg; + + trig_mode = ((entry >> IOAPIC_LVT_TRIGGER_MODE_SHIFT) & 1); + dest = entry >> IOAPIC_LVT_DEST_SHIFT; + dest_mode = (entry >> IOAPIC_LVT_DEST_MODE_SHIFT) & 1; + pin_polarity = (entry >> IOAPIC_LVT_TRIGGER_MODE_SHIFT) & 1; + delivery_mode = + (entry >> IOAPIC_LVT_DELIV_MODE_SHIFT) & IOAPIC_DM_MASK; + + msg.address = APIC_DEFAULT_ADDRESS; + msg.address |= dest_mode << 2; + msg.address |= dest << 12; + + msg.data = entry & IOAPIC_VECTOR_MASK; + msg.data |= delivery_mode << APIC_DELIVERY_MODE_SHIFT; + msg.data |= pin_polarity << APIC_POLARITY_SHIFT; + msg.data |= trig_mode << APIC_TRIG_MODE_SHIFT; + + kvm_irqchip_update_msi_route(kvm_state, i, msg, NULL); + } + kvm_irqchip_commit_routes(kvm_state); + } +#endif +} + void ioapic_eoi_broadcast(int vector) { IOAPICCommonState *s; @@ -229,6 +291,8 @@ ioapic_mem_write(void *opaque, hwaddr addr, uint64_t val, } break; } + + ioapic_update_kvm_routes(s); } static const MemoryRegionOps ioapic_io_ops = { diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 854c330b66..4bf4faf050 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -20,6 +20,19 @@ #define HPET_INTCAP "hpet-intcap" +#ifdef CONFIG_KVM +#define kvm_pit_in_kernel() \ + (kvm_irqchip_in_kernel() && !kvm_irqchip_is_split()) +#define kvm_pic_in_kernel() \ + (kvm_irqchip_in_kernel() && !kvm_irqchip_is_split()) +#define kvm_ioapic_in_kernel() \ + (kvm_irqchip_in_kernel() && !kvm_irqchip_is_split()) +#else +#define kvm_pit_in_kernel() 0 +#define kvm_pic_in_kernel() 0 +#define kvm_ioapic_in_kernel() 0 +#endif + /** * PCMachineState: * @acpi_dev: link to ACPI PM device that performs ACPI hotplug handling diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index c8f43dc82d..7741f91f9c 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -174,6 +174,7 @@ extern bool kvm_ioeventfd_any_length_allowed; #else #define kvm_enabled() (0) #define kvm_irqchip_in_kernel() (false) +#define kvm_irqchip_is_split() (false) #define kvm_async_interrupts_enabled() (false) #define kvm_halt_in_kernel() (false) #define kvm_eventfds_enabled() (false) @@ -317,6 +318,8 @@ MemTxAttrs kvm_arch_post_run(CPUState *cpu, struct kvm_run *run); int kvm_arch_handle_exit(CPUState *cpu, struct kvm_run *run); +int kvm_arch_handle_ioapic_eoi(CPUState *cpu, struct kvm_run *run); + int kvm_arch_process_async_events(CPUState *cpu); int kvm_arch_get_registers(CPUState *cpu); @@ -484,6 +487,7 @@ void kvm_init_irq_routing(KVMState *s); /** * kvm_arch_irqchip_create: * @KVMState: The KVMState pointer + * @MachineState: The MachineState pointer * * Allow architectures to create an in-kernel irq chip themselves. * @@ -491,7 +495,7 @@ void kvm_init_irq_routing(KVMState *s); * 0: irq chip was not created * > 0: irq chip was created */ -int kvm_arch_irqchip_create(KVMState *s); +int kvm_arch_irqchip_create(MachineState *ms, KVMState *s); /** * kvm_set_one_reg - set a register value in KVM via KVM_SET_ONE_REG ioctl diff --git a/kvm-all.c b/kvm-all.c index e78a3786d6..bd9e7641b2 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -99,6 +99,7 @@ struct KVMState KVMState *kvm_state; bool kvm_kernel_irqchip; +bool kvm_split_irqchip; bool kvm_async_interrupts_allowed; bool kvm_halt_in_kernel_allowed; bool kvm_eventfds_allowed; @@ -1430,9 +1431,14 @@ static void kvm_irqchip_create(MachineState *machine, KVMState *s) /* First probe and see if there's a arch-specific hook to create the * in-kernel irqchip for us */ - ret = kvm_arch_irqchip_create(s); + ret = kvm_arch_irqchip_create(machine, s); if (ret == 0) { - ret = kvm_vm_ioctl(s, KVM_CREATE_IRQCHIP); + if (machine_kernel_irqchip_split(machine)) { + perror("Split IRQ chip mode not supported."); + exit(1); + } else { + ret = kvm_vm_ioctl(s, KVM_CREATE_IRQCHIP); + } } if (ret < 0) { fprintf(stderr, "Create kernel irqchip failed: %s\n", strerror(-ret)); diff --git a/stubs/kvm.c b/stubs/kvm.c index e7c60b6e0c..828b824f2c 100644 --- a/stubs/kvm.c +++ b/stubs/kvm.c @@ -1,7 +1,7 @@ #include "qemu-common.h" #include "sysemu/kvm.h" -int kvm_arch_irqchip_create(KVMState *s) +int kvm_arch_irqchip_create(MachineState *ms, KVMState *s) { return 0; } diff --git a/target-arm/kvm.c b/target-arm/kvm.c index 84974bbd32..eca3a0037d 100644 --- a/target-arm/kvm.c +++ b/target-arm/kvm.c @@ -25,6 +25,7 @@ #include "internals.h" #include "hw/arm/arm.h" #include "exec/memattrs.h" +#include "hw/boards.h" const KVMCapabilityInfo kvm_arch_required_capabilities[] = { KVM_CAP_LAST_INFO @@ -578,8 +579,13 @@ void kvm_arch_init_irq_routing(KVMState *s) { } -int kvm_arch_irqchip_create(KVMState *s) +int kvm_arch_irqchip_create(MachineState *ms, KVMState *s) { + if (machine_kernel_irqchip_split(ms)) { + perror("-machine kernel_irqchip=split is not supported on ARM."); + exit(1); + } + /* If we can create the VGIC using the newer device control API, we * let the device do this when it initializes itself, otherwise we * fall back to the old API */ diff --git a/target-i386/cpu.c b/target-i386/cpu.c index 92f7cc1f7d..0d447b5690 100644 --- a/target-i386/cpu.c +++ b/target-i386/cpu.c @@ -2743,7 +2743,7 @@ static void x86_cpu_apic_create(X86CPU *cpu, Error **errp) APICCommonState *apic; const char *apic_type = "apic"; - if (kvm_irqchip_in_kernel()) { + if (kvm_apic_in_kernel()) { apic_type = "kvm-apic"; } else if (xen_enabled()) { apic_type = "xen-apic"; diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 7692b5984d..7b01328ffe 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -39,6 +39,7 @@ #include "exec/ioport.h" #include "standard-headers/asm-x86/hyperv.h" #include "hw/pci/pci.h" +#include "hw/pci/msi.h" #include "migration/migration.h" #include "exec/memattrs.h" @@ -2597,7 +2598,7 @@ void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run) } } - if (!kvm_irqchip_in_kernel()) { + if (!kvm_pic_in_kernel()) { qemu_mutex_lock_iothread(); } @@ -2615,7 +2616,7 @@ void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run) } } - if (!kvm_irqchip_in_kernel()) { + if (!kvm_pic_in_kernel()) { /* Try to inject an interrupt if the guest can accept it */ if (run->ready_for_interrupt_injection && (cpu->interrupt_request & CPU_INTERRUPT_HARD) && @@ -3017,6 +3018,10 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) case KVM_EXIT_HYPERV: ret = kvm_hv_handle_exit(cpu, &run->hyperv); break; + case KVM_EXIT_IOAPIC_EOI: + ioapic_eoi_broadcast(run->eoi.vector); + ret = 0; + break; default: fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason); ret = -1; @@ -3051,6 +3056,39 @@ void kvm_arch_init_irq_routing(KVMState *s) */ kvm_msi_via_irqfd_allowed = true; kvm_gsi_routing_allowed = true; + + if (kvm_irqchip_is_split()) { + int i; + + /* If the ioapic is in QEMU and the lapics are in KVM, reserve + MSI routes for signaling interrupts to the local apics. */ + for (i = 0; i < IOAPIC_NUM_PINS; i++) { + struct MSIMessage msg = { 0x0, 0x0 }; + if (kvm_irqchip_add_msi_route(s, msg, NULL) < 0) { + error_report("Could not enable split IRQ mode."); + exit(1); + } + } + } +} + +int kvm_arch_irqchip_create(MachineState *ms, KVMState *s) +{ + int ret; + if (machine_kernel_irqchip_split(ms)) { + ret = kvm_vm_enable_cap(s, KVM_CAP_SPLIT_IRQCHIP, 0, 24); + if (ret) { + error_report("Could not enable split irqchip mode: %s\n", + strerror(-ret)); + exit(1); + } else { + DPRINTF("Enabled KVM_CAP_SPLIT_IRQCHIP\n"); + kvm_split_irqchip = true; + return 1; + } + } else { + return 0; + } } /* Classic KVM device assignment interface. Will remain x86 only. */ diff --git a/target-i386/kvm_i386.h b/target-i386/kvm_i386.h index c1b312ba2a..42b00af1b1 100644 --- a/target-i386/kvm_i386.h +++ b/target-i386/kvm_i386.h @@ -13,6 +13,8 @@ #include "sysemu/kvm.h" +#define kvm_apic_in_kernel() (kvm_irqchip_in_kernel()) + bool kvm_allows_irq0_override(void); bool kvm_has_smm(void); void kvm_synchronize_all_tsc(void);