mirror of
https://github.com/reactos/syzkaller.git
synced 2024-11-27 05:10:43 +00:00
812 lines
24 KiB
C
812 lines
24 KiB
C
// Copyright 2017 syzkaller project authors. All rights reserved.
|
||
// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
|
||
|
||
// This file is shared between executor and csource package.
|
||
|
||
// Implementation of syz_kvm_setup_cpu pseudo-syscall.
|
||
// See Intel Software Developer’s Manual Volume 3: System Programming Guide
|
||
// for details on what happens here.
|
||
|
||
#include "kvm.S.h"
|
||
#include "kvm.h"
|
||
|
||
#ifndef KVM_SMI
|
||
#define KVM_SMI _IO(KVMIO, 0xb7)
|
||
#endif
|
||
|
||
#define CR0_PE 1
|
||
#define CR0_MP (1 << 1)
|
||
#define CR0_EM (1 << 2)
|
||
#define CR0_TS (1 << 3)
|
||
#define CR0_ET (1 << 4)
|
||
#define CR0_NE (1 << 5)
|
||
#define CR0_WP (1 << 16)
|
||
#define CR0_AM (1 << 18)
|
||
#define CR0_NW (1 << 29)
|
||
#define CR0_CD (1 << 30)
|
||
#define CR0_PG (1 << 31)
|
||
|
||
#define CR4_VME 1
|
||
#define CR4_PVI (1 << 1)
|
||
#define CR4_TSD (1 << 2)
|
||
#define CR4_DE (1 << 3)
|
||
#define CR4_PSE (1 << 4)
|
||
#define CR4_PAE (1 << 5)
|
||
#define CR4_MCE (1 << 6)
|
||
#define CR4_PGE (1 << 7)
|
||
#define CR4_PCE (1 << 8)
|
||
#define CR4_OSFXSR (1 << 8)
|
||
#define CR4_OSXMMEXCPT (1 << 10)
|
||
#define CR4_UMIP (1 << 11)
|
||
#define CR4_VMXE (1 << 13)
|
||
#define CR4_SMXE (1 << 14)
|
||
#define CR4_FSGSBASE (1 << 16)
|
||
#define CR4_PCIDE (1 << 17)
|
||
#define CR4_OSXSAVE (1 << 18)
|
||
#define CR4_SMEP (1 << 20)
|
||
#define CR4_SMAP (1 << 21)
|
||
#define CR4_PKE (1 << 22)
|
||
|
||
#define EFER_SCE 1
|
||
#define EFER_LME (1 << 8)
|
||
#define EFER_LMA (1 << 10)
|
||
#define EFER_NXE (1 << 11)
|
||
#define EFER_SVME (1 << 12)
|
||
#define EFER_LMSLE (1 << 13)
|
||
#define EFER_FFXSR (1 << 14)
|
||
#define EFER_TCE (1 << 15)
|
||
|
||
// 32-bit page directory entry bits
|
||
#define PDE32_PRESENT 1
|
||
#define PDE32_RW (1 << 1)
|
||
#define PDE32_USER (1 << 2)
|
||
#define PDE32_PS (1 << 7)
|
||
|
||
// 64-bit page * entry bits
|
||
#define PDE64_PRESENT 1
|
||
#define PDE64_RW (1 << 1)
|
||
#define PDE64_USER (1 << 2)
|
||
#define PDE64_ACCESSED (1 << 5)
|
||
#define PDE64_DIRTY (1 << 6)
|
||
#define PDE64_PS (1 << 7)
|
||
#define PDE64_G (1 << 8)
|
||
|
||
struct tss16 {
|
||
uint16_t prev;
|
||
uint16_t sp0;
|
||
uint16_t ss0;
|
||
uint16_t sp1;
|
||
uint16_t ss1;
|
||
uint16_t sp2;
|
||
uint16_t ss2;
|
||
uint16_t ip;
|
||
uint16_t flags;
|
||
uint16_t ax;
|
||
uint16_t cx;
|
||
uint16_t dx;
|
||
uint16_t bx;
|
||
uint16_t sp;
|
||
uint16_t bp;
|
||
uint16_t si;
|
||
uint16_t di;
|
||
uint16_t es;
|
||
uint16_t cs;
|
||
uint16_t ss;
|
||
uint16_t ds;
|
||
uint16_t ldt;
|
||
} __attribute__((packed));
|
||
|
||
struct tss32 {
|
||
uint16_t prev, prevh;
|
||
uint32_t sp0;
|
||
uint16_t ss0, ss0h;
|
||
uint32_t sp1;
|
||
uint16_t ss1, ss1h;
|
||
uint32_t sp2;
|
||
uint16_t ss2, ss2h;
|
||
uint32_t cr3;
|
||
uint32_t ip;
|
||
uint32_t flags;
|
||
uint32_t ax;
|
||
uint32_t cx;
|
||
uint32_t dx;
|
||
uint32_t bx;
|
||
uint32_t sp;
|
||
uint32_t bp;
|
||
uint32_t si;
|
||
uint32_t di;
|
||
uint16_t es, esh;
|
||
uint16_t cs, csh;
|
||
uint16_t ss, ssh;
|
||
uint16_t ds, dsh;
|
||
uint16_t fs, fsh;
|
||
uint16_t gs, gsh;
|
||
uint16_t ldt, ldth;
|
||
uint16_t trace;
|
||
uint16_t io_bitmap;
|
||
} __attribute__((packed));
|
||
|
||
struct tss64 {
|
||
uint32_t reserved0;
|
||
uint64_t rsp[3];
|
||
uint64_t reserved1;
|
||
uint64_t ist[7];
|
||
uint64_t reserved2;
|
||
uint32_t reserved3;
|
||
uint32_t io_bitmap;
|
||
} __attribute__((packed));
|
||
|
||
static void fill_segment_descriptor(uint64_t* dt, uint64_t* lt, struct kvm_segment* seg)
|
||
{
|
||
uint16_t index = seg->selector >> 3;
|
||
uint64_t limit = seg->g ? seg->limit >> 12 : seg->limit;
|
||
uint64_t sd = (limit & 0xffff) | (seg->base & 0xffffff) << 16 | (uint64_t)seg->type << 40 | (uint64_t)seg->s << 44 | (uint64_t)seg->dpl << 45 | (uint64_t)seg->present << 47 | (limit & 0xf0000ULL) << 48 | (uint64_t)seg->avl << 52 | (uint64_t)seg->l << 53 | (uint64_t)seg->db << 54 | (uint64_t)seg->g << 55 | (seg->base & 0xff000000ULL) << 56;
|
||
NONFAILING(dt[index] = sd);
|
||
NONFAILING(lt[index] = sd);
|
||
}
|
||
|
||
static void fill_segment_descriptor_dword(uint64_t* dt, uint64_t* lt, struct kvm_segment* seg)
|
||
{
|
||
fill_segment_descriptor(dt, lt, seg);
|
||
uint16_t index = seg->selector >> 3;
|
||
NONFAILING(dt[index + 1] = 0);
|
||
NONFAILING(lt[index + 1] = 0);
|
||
}
|
||
|
||
static void setup_syscall_msrs(int cpufd, uint16_t sel_cs, uint16_t sel_cs_cpl3)
|
||
{
|
||
char buf[sizeof(struct kvm_msrs) + 5 * sizeof(struct kvm_msr_entry)];
|
||
memset(buf, 0, sizeof(buf));
|
||
struct kvm_msrs* msrs = (struct kvm_msrs*)buf;
|
||
msrs->nmsrs = 5;
|
||
msrs->entries[0].index = MSR_IA32_SYSENTER_CS;
|
||
msrs->entries[0].data = sel_cs;
|
||
msrs->entries[1].index = MSR_IA32_SYSENTER_ESP;
|
||
msrs->entries[1].data = ADDR_STACK0;
|
||
msrs->entries[2].index = MSR_IA32_SYSENTER_EIP;
|
||
msrs->entries[2].data = ADDR_VAR_SYSEXIT;
|
||
msrs->entries[3].index = MSR_IA32_STAR;
|
||
msrs->entries[3].data = ((uint64_t)sel_cs << 32) | ((uint64_t)sel_cs_cpl3 << 48);
|
||
msrs->entries[4].index = MSR_IA32_LSTAR;
|
||
msrs->entries[4].data = ADDR_VAR_SYSRET;
|
||
ioctl(cpufd, KVM_SET_MSRS, msrs);
|
||
}
|
||
|
||
static void setup_32bit_idt(struct kvm_sregs* sregs, char* host_mem, uintptr_t guest_mem)
|
||
{
|
||
sregs->idt.base = guest_mem + ADDR_VAR_IDT;
|
||
sregs->idt.limit = 0x1ff;
|
||
uint64_t* idt = (uint64_t*)(host_mem + sregs->idt.base);
|
||
int i;
|
||
for (i = 0; i < 32; i++) {
|
||
struct kvm_segment gate;
|
||
gate.selector = i << 3;
|
||
switch (i % 6) {
|
||
case 0:
|
||
// 16-bit interrupt gate
|
||
gate.type = 6;
|
||
gate.base = SEL_CS16;
|
||
break;
|
||
case 1:
|
||
// 16-bit trap gate
|
||
gate.type = 7;
|
||
gate.base = SEL_CS16;
|
||
break;
|
||
case 2:
|
||
// 16-bit task gate
|
||
gate.type = 3;
|
||
gate.base = SEL_TGATE16;
|
||
break;
|
||
case 3:
|
||
// 32-bit interrupt gate
|
||
gate.type = 14;
|
||
gate.base = SEL_CS32;
|
||
break;
|
||
case 4:
|
||
// 32-bit trap gate
|
||
gate.type = 15;
|
||
gate.base = SEL_CS32;
|
||
break;
|
||
case 6:
|
||
// 32-bit task gate
|
||
gate.type = 11;
|
||
gate.base = SEL_TGATE32;
|
||
break;
|
||
}
|
||
gate.limit = guest_mem + ADDR_VAR_USER_CODE2; // entry offset
|
||
gate.present = 1;
|
||
gate.dpl = 0;
|
||
gate.s = 0;
|
||
gate.g = 0;
|
||
gate.db = 0;
|
||
gate.l = 0;
|
||
gate.avl = 0;
|
||
fill_segment_descriptor(idt, idt, &gate);
|
||
}
|
||
}
|
||
|
||
static void setup_64bit_idt(struct kvm_sregs* sregs, char* host_mem, uintptr_t guest_mem)
|
||
{
|
||
sregs->idt.base = guest_mem + ADDR_VAR_IDT;
|
||
sregs->idt.limit = 0x1ff;
|
||
uint64_t* idt = (uint64_t*)(host_mem + sregs->idt.base);
|
||
int i;
|
||
for (i = 0; i < 32; i++) {
|
||
struct kvm_segment gate;
|
||
gate.selector = (i * 2) << 3;
|
||
gate.type = (i & 1) ? 14 : 15; // interrupt or trap gate
|
||
gate.base = SEL_CS64;
|
||
gate.limit = guest_mem + ADDR_VAR_USER_CODE2; // entry offset
|
||
gate.present = 1;
|
||
gate.dpl = 0;
|
||
gate.s = 0;
|
||
gate.g = 0;
|
||
gate.db = 0;
|
||
gate.l = 0;
|
||
gate.avl = 0;
|
||
fill_segment_descriptor_dword(idt, idt, &gate);
|
||
}
|
||
}
|
||
|
||
struct kvm_text {
|
||
uintptr_t typ;
|
||
const void* text;
|
||
uintptr_t size;
|
||
};
|
||
|
||
struct kvm_opt {
|
||
uint64_t typ;
|
||
uint64_t val;
|
||
};
|
||
|
||
#define KVM_SETUP_PAGING (1 << 0)
|
||
#define KVM_SETUP_PAE (1 << 1)
|
||
#define KVM_SETUP_PROTECTED (1 << 2)
|
||
#define KVM_SETUP_CPL3 (1 << 3)
|
||
#define KVM_SETUP_VIRT86 (1 << 4)
|
||
#define KVM_SETUP_SMM (1 << 5)
|
||
#define KVM_SETUP_VM (1 << 6)
|
||
|
||
// syz_kvm_setup_cpu(fd fd_kvmvm, cpufd fd_kvmcpu, usermem vma[24], text ptr[in, array[kvm_text, 1]], ntext len[text], flags flags[kvm_setup_flags], opts ptr[in, array[kvm_setup_opt, 0:2]], nopt len[opts])
|
||
static uintptr_t syz_kvm_setup_cpu(uintptr_t a0, uintptr_t a1, uintptr_t a2, uintptr_t a3, uintptr_t a4, uintptr_t a5, uintptr_t a6, uintptr_t a7)
|
||
{
|
||
const int vmfd = a0;
|
||
const int cpufd = a1;
|
||
char* const host_mem = (char*)a2;
|
||
const struct kvm_text* const text_array_ptr = (struct kvm_text*)a3;
|
||
const uintptr_t text_count = a4;
|
||
const uintptr_t flags = a5;
|
||
const struct kvm_opt* const opt_array_ptr = (struct kvm_opt*)a6;
|
||
uintptr_t opt_count = a7;
|
||
|
||
const uintptr_t page_size = 4 << 10;
|
||
const uintptr_t ioapic_page = 10;
|
||
const uintptr_t guest_mem_size = 24 * page_size;
|
||
const uintptr_t guest_mem = 0;
|
||
|
||
(void)text_count; // fuzzer can spoof count and we need just 1 text, so ignore text_count
|
||
int text_type = 0;
|
||
const void* text = 0;
|
||
uintptr_t text_size = 0;
|
||
NONFAILING(text_type = text_array_ptr[0].typ);
|
||
NONFAILING(text = text_array_ptr[0].text);
|
||
NONFAILING(text_size = text_array_ptr[0].size);
|
||
|
||
uintptr_t i;
|
||
for (i = 0; i < guest_mem_size / page_size; i++) {
|
||
struct kvm_userspace_memory_region memreg;
|
||
memreg.slot = i;
|
||
memreg.flags = 0; // can be KVM_MEM_LOG_DIRTY_PAGES | KVM_MEM_READONLY
|
||
memreg.guest_phys_addr = guest_mem + i * page_size;
|
||
if (i == ioapic_page)
|
||
memreg.guest_phys_addr = 0xfec00000;
|
||
memreg.memory_size = page_size;
|
||
memreg.userspace_addr = (uintptr_t)host_mem + i * page_size;
|
||
ioctl(vmfd, KVM_SET_USER_MEMORY_REGION, &memreg);
|
||
}
|
||
// SMRAM
|
||
struct kvm_userspace_memory_region memreg;
|
||
memreg.slot = 1 + (1 << 16);
|
||
memreg.flags = 0;
|
||
memreg.guest_phys_addr = 0x30000;
|
||
memreg.memory_size = 64 << 10;
|
||
memreg.userspace_addr = (uintptr_t)host_mem;
|
||
ioctl(vmfd, KVM_SET_USER_MEMORY_REGION, &memreg);
|
||
|
||
struct kvm_sregs sregs;
|
||
if (ioctl(cpufd, KVM_GET_SREGS, &sregs))
|
||
return -1;
|
||
|
||
struct kvm_regs regs;
|
||
memset(®s, 0, sizeof(regs));
|
||
regs.rip = guest_mem + ADDR_TEXT;
|
||
regs.rsp = ADDR_STACK0;
|
||
|
||
sregs.gdt.base = guest_mem + ADDR_GDT;
|
||
sregs.gdt.limit = 256 * sizeof(uint64_t) - 1;
|
||
uint64_t* gdt = (uint64_t*)(host_mem + sregs.gdt.base);
|
||
|
||
struct kvm_segment seg_ldt;
|
||
seg_ldt.selector = SEL_LDT;
|
||
seg_ldt.type = 2;
|
||
seg_ldt.base = guest_mem + ADDR_LDT;
|
||
seg_ldt.limit = 256 * sizeof(uint64_t) - 1;
|
||
seg_ldt.present = 1;
|
||
seg_ldt.dpl = 0;
|
||
seg_ldt.s = 0;
|
||
seg_ldt.g = 0;
|
||
seg_ldt.db = 1;
|
||
seg_ldt.l = 0;
|
||
sregs.ldt = seg_ldt;
|
||
uint64_t* ldt = (uint64_t*)(host_mem + sregs.ldt.base);
|
||
|
||
struct kvm_segment seg_cs16;
|
||
seg_cs16.selector = SEL_CS16;
|
||
seg_cs16.type = 11;
|
||
seg_cs16.base = 0;
|
||
seg_cs16.limit = 0xfffff;
|
||
seg_cs16.present = 1;
|
||
seg_cs16.dpl = 0;
|
||
seg_cs16.s = 1;
|
||
seg_cs16.g = 0;
|
||
seg_cs16.db = 0;
|
||
seg_cs16.l = 0;
|
||
|
||
struct kvm_segment seg_ds16 = seg_cs16;
|
||
seg_ds16.selector = SEL_DS16;
|
||
seg_ds16.type = 3;
|
||
|
||
struct kvm_segment seg_cs16_cpl3 = seg_cs16;
|
||
seg_cs16_cpl3.selector = SEL_CS16_CPL3;
|
||
seg_cs16_cpl3.dpl = 3;
|
||
|
||
struct kvm_segment seg_ds16_cpl3 = seg_ds16;
|
||
seg_ds16_cpl3.selector = SEL_DS16_CPL3;
|
||
seg_ds16_cpl3.dpl = 3;
|
||
|
||
struct kvm_segment seg_cs32 = seg_cs16;
|
||
seg_cs32.selector = SEL_CS32;
|
||
seg_cs32.db = 1;
|
||
|
||
struct kvm_segment seg_ds32 = seg_ds16;
|
||
seg_ds32.selector = SEL_DS32;
|
||
seg_ds32.db = 1;
|
||
|
||
struct kvm_segment seg_cs32_cpl3 = seg_cs32;
|
||
seg_cs32_cpl3.selector = SEL_CS32_CPL3;
|
||
seg_cs32_cpl3.dpl = 3;
|
||
|
||
struct kvm_segment seg_ds32_cpl3 = seg_ds32;
|
||
seg_ds32_cpl3.selector = SEL_DS32_CPL3;
|
||
seg_ds32_cpl3.dpl = 3;
|
||
|
||
struct kvm_segment seg_cs64 = seg_cs16;
|
||
seg_cs64.selector = SEL_CS64;
|
||
seg_cs64.l = 1;
|
||
|
||
struct kvm_segment seg_ds64 = seg_ds32;
|
||
seg_ds64.selector = SEL_DS64;
|
||
|
||
struct kvm_segment seg_cs64_cpl3 = seg_cs64;
|
||
seg_cs64_cpl3.selector = SEL_CS64_CPL3;
|
||
seg_cs64_cpl3.dpl = 3;
|
||
|
||
struct kvm_segment seg_ds64_cpl3 = seg_ds64;
|
||
seg_ds64_cpl3.selector = SEL_DS64_CPL3;
|
||
seg_ds64_cpl3.dpl = 3;
|
||
|
||
struct kvm_segment seg_tss32;
|
||
seg_tss32.selector = SEL_TSS32;
|
||
seg_tss32.type = 9;
|
||
seg_tss32.base = ADDR_VAR_TSS32;
|
||
seg_tss32.limit = 0x1ff;
|
||
seg_tss32.present = 1;
|
||
seg_tss32.dpl = 0;
|
||
seg_tss32.s = 0;
|
||
seg_tss32.g = 0;
|
||
seg_tss32.db = 0;
|
||
seg_tss32.l = 0;
|
||
|
||
struct kvm_segment seg_tss32_2 = seg_tss32;
|
||
seg_tss32_2.selector = SEL_TSS32_2;
|
||
seg_tss32_2.base = ADDR_VAR_TSS32_2;
|
||
|
||
struct kvm_segment seg_tss32_cpl3 = seg_tss32;
|
||
seg_tss32_cpl3.selector = SEL_TSS32_CPL3;
|
||
seg_tss32_cpl3.base = ADDR_VAR_TSS32_CPL3;
|
||
|
||
struct kvm_segment seg_tss32_vm86 = seg_tss32;
|
||
seg_tss32_vm86.selector = SEL_TSS32_VM86;
|
||
seg_tss32_vm86.base = ADDR_VAR_TSS32_VM86;
|
||
|
||
struct kvm_segment seg_tss16 = seg_tss32;
|
||
seg_tss16.selector = SEL_TSS16;
|
||
seg_tss16.base = ADDR_VAR_TSS16;
|
||
seg_tss16.limit = 0xff;
|
||
seg_tss16.type = 1;
|
||
|
||
struct kvm_segment seg_tss16_2 = seg_tss16;
|
||
seg_tss16_2.selector = SEL_TSS16_2;
|
||
seg_tss16_2.base = ADDR_VAR_TSS16_2;
|
||
seg_tss16_2.dpl = 0;
|
||
|
||
struct kvm_segment seg_tss16_cpl3 = seg_tss16;
|
||
seg_tss16_cpl3.selector = SEL_TSS16_CPL3;
|
||
seg_tss16_cpl3.base = ADDR_VAR_TSS16_CPL3;
|
||
seg_tss16_cpl3.dpl = 3;
|
||
|
||
struct kvm_segment seg_tss64 = seg_tss32;
|
||
seg_tss64.selector = SEL_TSS64;
|
||
seg_tss64.base = ADDR_VAR_TSS64;
|
||
seg_tss64.limit = 0x1ff;
|
||
|
||
struct kvm_segment seg_tss64_cpl3 = seg_tss64;
|
||
seg_tss64_cpl3.selector = SEL_TSS64_CPL3;
|
||
seg_tss64_cpl3.base = ADDR_VAR_TSS64_CPL3;
|
||
seg_tss64_cpl3.dpl = 3;
|
||
|
||
struct kvm_segment seg_cgate16;
|
||
seg_cgate16.selector = SEL_CGATE16;
|
||
seg_cgate16.type = 4;
|
||
seg_cgate16.base = SEL_CS16 | (2 << 16); // selector + param count
|
||
seg_cgate16.limit = ADDR_VAR_USER_CODE2; // entry offset
|
||
seg_cgate16.present = 1;
|
||
seg_cgate16.dpl = 0;
|
||
seg_cgate16.s = 0;
|
||
seg_cgate16.g = 0;
|
||
seg_cgate16.db = 0;
|
||
seg_cgate16.l = 0;
|
||
seg_cgate16.avl = 0;
|
||
|
||
struct kvm_segment seg_tgate16 = seg_cgate16;
|
||
seg_tgate16.selector = SEL_TGATE16;
|
||
seg_tgate16.type = 3;
|
||
seg_cgate16.base = SEL_TSS16_2;
|
||
seg_tgate16.limit = 0;
|
||
|
||
struct kvm_segment seg_cgate32 = seg_cgate16;
|
||
seg_cgate32.selector = SEL_CGATE32;
|
||
seg_cgate32.type = 12;
|
||
seg_cgate32.base = SEL_CS32 | (2 << 16); // selector + param count
|
||
|
||
struct kvm_segment seg_tgate32 = seg_cgate32;
|
||
seg_tgate32.selector = SEL_TGATE32;
|
||
seg_tgate32.type = 11;
|
||
seg_tgate32.base = SEL_TSS32_2;
|
||
seg_tgate32.limit = 0;
|
||
|
||
struct kvm_segment seg_cgate64 = seg_cgate16;
|
||
seg_cgate64.selector = SEL_CGATE64;
|
||
seg_cgate64.type = 12;
|
||
seg_cgate64.base = SEL_CS64;
|
||
|
||
int kvmfd = open("/dev/kvm", O_RDWR);
|
||
char buf[sizeof(struct kvm_cpuid2) + 128 * sizeof(struct kvm_cpuid_entry2)];
|
||
memset(buf, 0, sizeof(buf));
|
||
struct kvm_cpuid2* cpuid = (struct kvm_cpuid2*)buf;
|
||
cpuid->nent = 128;
|
||
ioctl(kvmfd, KVM_GET_SUPPORTED_CPUID, cpuid);
|
||
ioctl(cpufd, KVM_SET_CPUID2, cpuid);
|
||
close(kvmfd);
|
||
|
||
const char* text_prefix = 0;
|
||
int text_prefix_size = 0;
|
||
char* host_text = host_mem + ADDR_TEXT;
|
||
|
||
if (text_type == 8) {
|
||
if (flags & KVM_SETUP_SMM) {
|
||
if (flags & KVM_SETUP_PROTECTED) {
|
||
sregs.cs = seg_cs16;
|
||
sregs.ds = sregs.es = sregs.fs = sregs.gs = sregs.ss = seg_ds16;
|
||
sregs.cr0 |= CR0_PE;
|
||
} else {
|
||
sregs.cs.selector = 0;
|
||
sregs.cs.base = 0;
|
||
}
|
||
|
||
NONFAILING(*(host_mem + ADDR_TEXT) = 0xf4); // hlt for rsm
|
||
host_text = host_mem + 0x8000;
|
||
|
||
ioctl(cpufd, KVM_SMI, 0);
|
||
} else if (flags & KVM_SETUP_VIRT86) {
|
||
sregs.cs = seg_cs32;
|
||
sregs.ds = sregs.es = sregs.fs = sregs.gs = sregs.ss = seg_ds32;
|
||
sregs.cr0 |= CR0_PE;
|
||
sregs.efer |= EFER_SCE;
|
||
|
||
setup_syscall_msrs(cpufd, SEL_CS32, SEL_CS32_CPL3);
|
||
setup_32bit_idt(&sregs, host_mem, guest_mem);
|
||
|
||
if (flags & KVM_SETUP_PAGING) {
|
||
uint64_t pd_addr = guest_mem + ADDR_PD;
|
||
uint64_t* pd = (uint64_t*)(host_mem + ADDR_PD);
|
||
// A single 4MB page to cover the memory region
|
||
NONFAILING(pd[0] = PDE32_PRESENT | PDE32_RW | PDE32_USER | PDE32_PS);
|
||
sregs.cr3 = pd_addr;
|
||
sregs.cr4 |= CR4_PSE;
|
||
|
||
text_prefix = kvm_asm32_paged_vm86;
|
||
text_prefix_size = sizeof(kvm_asm32_paged_vm86) - 1;
|
||
} else {
|
||
text_prefix = kvm_asm32_vm86;
|
||
text_prefix_size = sizeof(kvm_asm32_vm86) - 1;
|
||
}
|
||
} else {
|
||
sregs.cs.selector = 0;
|
||
sregs.cs.base = 0;
|
||
}
|
||
} else if (text_type == 16) {
|
||
if (flags & KVM_SETUP_CPL3) {
|
||
sregs.cs = seg_cs16;
|
||
sregs.ds = sregs.es = sregs.fs = sregs.gs = sregs.ss = seg_ds16;
|
||
|
||
text_prefix = kvm_asm16_cpl3;
|
||
text_prefix_size = sizeof(kvm_asm16_cpl3) - 1;
|
||
} else {
|
||
sregs.cr0 |= CR0_PE;
|
||
sregs.cs = seg_cs16;
|
||
sregs.ds = sregs.es = sregs.fs = sregs.gs = sregs.ss = seg_ds16;
|
||
}
|
||
} else if (text_type == 32) {
|
||
sregs.cr0 |= CR0_PE;
|
||
sregs.efer |= EFER_SCE;
|
||
|
||
setup_syscall_msrs(cpufd, SEL_CS32, SEL_CS32_CPL3);
|
||
setup_32bit_idt(&sregs, host_mem, guest_mem);
|
||
|
||
if (flags & KVM_SETUP_SMM) {
|
||
sregs.cs = seg_cs32;
|
||
sregs.ds = sregs.es = sregs.fs = sregs.gs = sregs.ss = seg_ds32;
|
||
|
||
NONFAILING(*(host_mem + ADDR_TEXT) = 0xf4); // hlt for rsm
|
||
host_text = host_mem + 0x8000;
|
||
|
||
ioctl(cpufd, KVM_SMI, 0);
|
||
} else if (flags & KVM_SETUP_PAGING) {
|
||
sregs.cs = seg_cs32;
|
||
sregs.ds = sregs.es = sregs.fs = sregs.gs = sregs.ss = seg_ds32;
|
||
|
||
uint64_t pd_addr = guest_mem + ADDR_PD;
|
||
uint64_t* pd = (uint64_t*)(host_mem + ADDR_PD);
|
||
// A single 4MB page to cover the memory region
|
||
NONFAILING(pd[0] = PDE32_PRESENT | PDE32_RW | PDE32_USER | PDE32_PS);
|
||
sregs.cr3 = pd_addr;
|
||
sregs.cr4 |= CR4_PSE;
|
||
|
||
text_prefix = kvm_asm32_paged;
|
||
text_prefix_size = sizeof(kvm_asm32_paged) - 1;
|
||
} else if (flags & KVM_SETUP_CPL3) {
|
||
sregs.cs = seg_cs32_cpl3;
|
||
sregs.ds = sregs.es = sregs.fs = sregs.gs = sregs.ss = seg_ds32_cpl3;
|
||
} else {
|
||
sregs.cs = seg_cs32;
|
||
sregs.ds = sregs.es = sregs.fs = sregs.gs = sregs.ss = seg_ds32;
|
||
}
|
||
} else {
|
||
sregs.efer |= EFER_LME | EFER_SCE;
|
||
sregs.cr0 |= CR0_PE;
|
||
|
||
setup_syscall_msrs(cpufd, SEL_CS64, SEL_CS64_CPL3);
|
||
setup_64bit_idt(&sregs, host_mem, guest_mem);
|
||
|
||
sregs.cs = seg_cs32;
|
||
sregs.ds = sregs.es = sregs.fs = sregs.gs = sregs.ss = seg_ds32;
|
||
|
||
uint64_t pml4_addr = guest_mem + ADDR_PML4;
|
||
uint64_t* pml4 = (uint64_t*)(host_mem + ADDR_PML4);
|
||
uint64_t pdpt_addr = guest_mem + ADDR_PDP;
|
||
uint64_t* pdpt = (uint64_t*)(host_mem + ADDR_PDP);
|
||
uint64_t pd_addr = guest_mem + ADDR_PD;
|
||
uint64_t* pd = (uint64_t*)(host_mem + ADDR_PD);
|
||
NONFAILING(pml4[0] = PDE64_PRESENT | PDE64_RW | PDE64_USER | pdpt_addr);
|
||
NONFAILING(pdpt[0] = PDE64_PRESENT | PDE64_RW | PDE64_USER | pd_addr);
|
||
NONFAILING(pd[0] = PDE64_PRESENT | PDE64_RW | PDE64_USER | PDE64_PS);
|
||
sregs.cr3 = pml4_addr;
|
||
sregs.cr4 |= CR4_PAE;
|
||
|
||
if (flags & KVM_SETUP_VM) {
|
||
sregs.cr0 |= CR0_NE;
|
||
|
||
NONFAILING(*((uint64_t*)(host_mem + ADDR_VAR_VMXON_PTR)) = ADDR_VAR_VMXON);
|
||
NONFAILING(*((uint64_t*)(host_mem + ADDR_VAR_VMCS_PTR)) = ADDR_VAR_VMCS);
|
||
NONFAILING(memcpy(host_mem + ADDR_VAR_VMEXIT_CODE, kvm_asm64_vm_exit, sizeof(kvm_asm64_vm_exit) - 1));
|
||
NONFAILING(*((uint64_t*)(host_mem + ADDR_VAR_VMEXIT_PTR)) = ADDR_VAR_VMEXIT_CODE);
|
||
|
||
text_prefix = kvm_asm64_init_vm;
|
||
text_prefix_size = sizeof(kvm_asm64_init_vm) - 1;
|
||
} else if (flags & KVM_SETUP_CPL3) {
|
||
text_prefix = kvm_asm64_cpl3;
|
||
text_prefix_size = sizeof(kvm_asm64_cpl3) - 1;
|
||
} else {
|
||
text_prefix = kvm_asm64_enable_long;
|
||
text_prefix_size = sizeof(kvm_asm64_enable_long) - 1;
|
||
}
|
||
}
|
||
|
||
struct tss16 tss16;
|
||
memset(&tss16, 0, sizeof(tss16));
|
||
tss16.ss0 = tss16.ss1 = tss16.ss2 = SEL_DS16;
|
||
tss16.sp0 = tss16.sp1 = tss16.sp2 = ADDR_STACK0;
|
||
tss16.ip = ADDR_VAR_USER_CODE2;
|
||
tss16.flags = (1 << 1);
|
||
tss16.cs = SEL_CS16;
|
||
tss16.es = tss16.ds = tss16.ss = SEL_DS16;
|
||
tss16.ldt = SEL_LDT;
|
||
struct tss16* tss16_addr = (struct tss16*)(host_mem + seg_tss16_2.base);
|
||
NONFAILING(memcpy(tss16_addr, &tss16, sizeof(tss16)));
|
||
|
||
memset(&tss16, 0, sizeof(tss16));
|
||
tss16.ss0 = tss16.ss1 = tss16.ss2 = SEL_DS16;
|
||
tss16.sp0 = tss16.sp1 = tss16.sp2 = ADDR_STACK0;
|
||
tss16.ip = ADDR_VAR_USER_CODE2;
|
||
tss16.flags = (1 << 1);
|
||
tss16.cs = SEL_CS16_CPL3;
|
||
tss16.es = tss16.ds = tss16.ss = SEL_DS16_CPL3;
|
||
tss16.ldt = SEL_LDT;
|
||
struct tss16* tss16_cpl3_addr = (struct tss16*)(host_mem + seg_tss16_cpl3.base);
|
||
NONFAILING(memcpy(tss16_cpl3_addr, &tss16, sizeof(tss16)));
|
||
|
||
struct tss32 tss32;
|
||
memset(&tss32, 0, sizeof(tss32));
|
||
tss32.ss0 = tss32.ss1 = tss32.ss2 = SEL_DS32;
|
||
tss32.sp0 = tss32.sp1 = tss32.sp2 = ADDR_STACK0;
|
||
tss32.ip = ADDR_VAR_USER_CODE;
|
||
tss32.flags = (1 << 1) | (1 << 17);
|
||
tss32.ldt = SEL_LDT;
|
||
tss32.cr3 = sregs.cr3;
|
||
tss32.io_bitmap = offsetof(struct tss32, io_bitmap);
|
||
struct tss32* tss32_addr = (struct tss32*)(host_mem + seg_tss32_vm86.base);
|
||
NONFAILING(memcpy(tss32_addr, &tss32, sizeof(tss32)));
|
||
|
||
memset(&tss32, 0, sizeof(tss32));
|
||
tss32.ss0 = tss32.ss1 = tss32.ss2 = SEL_DS32;
|
||
tss32.sp0 = tss32.sp1 = tss32.sp2 = ADDR_STACK0;
|
||
tss32.ip = ADDR_VAR_USER_CODE;
|
||
tss32.flags = (1 << 1);
|
||
tss32.cr3 = sregs.cr3;
|
||
tss32.es = tss32.ds = tss32.ss = tss32.gs = tss32.fs = SEL_DS32;
|
||
tss32.cs = SEL_CS32;
|
||
tss32.ldt = SEL_LDT;
|
||
tss32.cr3 = sregs.cr3;
|
||
tss32.io_bitmap = offsetof(struct tss32, io_bitmap);
|
||
struct tss32* tss32_cpl3_addr = (struct tss32*)(host_mem + seg_tss32_2.base);
|
||
NONFAILING(memcpy(tss32_cpl3_addr, &tss32, sizeof(tss32)));
|
||
|
||
struct tss64 tss64;
|
||
memset(&tss64, 0, sizeof(tss64));
|
||
tss64.rsp[0] = ADDR_STACK0;
|
||
tss64.rsp[1] = ADDR_STACK0;
|
||
tss64.rsp[2] = ADDR_STACK0;
|
||
tss64.io_bitmap = offsetof(struct tss64, io_bitmap);
|
||
struct tss64* tss64_addr = (struct tss64*)(host_mem + seg_tss64.base);
|
||
NONFAILING(memcpy(tss64_addr, &tss64, sizeof(tss64)));
|
||
|
||
memset(&tss64, 0, sizeof(tss64));
|
||
tss64.rsp[0] = ADDR_STACK0;
|
||
tss64.rsp[1] = ADDR_STACK0;
|
||
tss64.rsp[2] = ADDR_STACK0;
|
||
tss64.io_bitmap = offsetof(struct tss64, io_bitmap);
|
||
struct tss64* tss64_cpl3_addr = (struct tss64*)(host_mem + seg_tss64_cpl3.base);
|
||
NONFAILING(memcpy(tss64_cpl3_addr, &tss64, sizeof(tss64)));
|
||
|
||
if (text_size > 1000)
|
||
text_size = 1000;
|
||
if (text_prefix) {
|
||
NONFAILING(memcpy(host_text, text_prefix, text_prefix_size));
|
||
void* patch = 0;
|
||
// Replace 0xbadc0de in LJMP with offset of a next instruction.
|
||
NONFAILING(patch = memmem(host_text, text_prefix_size, "\xde\xc0\xad\x0b", 4));
|
||
if (patch)
|
||
NONFAILING(*((uint32_t*)patch) = guest_mem + ADDR_TEXT + ((char*)patch - host_text) + 6);
|
||
uint16_t magic = PREFIX_SIZE;
|
||
patch = 0;
|
||
NONFAILING(patch = memmem(host_text, text_prefix_size, &magic, sizeof(magic)));
|
||
if (patch)
|
||
NONFAILING(*((uint16_t*)patch) = guest_mem + ADDR_TEXT + text_prefix_size);
|
||
}
|
||
NONFAILING(memcpy((void*)(host_text + text_prefix_size), text, text_size));
|
||
NONFAILING(*(host_text + text_prefix_size + text_size) = 0xf4); // hlt
|
||
|
||
NONFAILING(memcpy(host_mem + ADDR_VAR_USER_CODE, text, text_size));
|
||
NONFAILING(*(host_mem + ADDR_VAR_USER_CODE + text_size) = 0xf4); // hlt
|
||
|
||
NONFAILING(*(host_mem + ADDR_VAR_HLT) = 0xf4); // hlt
|
||
NONFAILING(memcpy(host_mem + ADDR_VAR_SYSRET, "\x0f\x07\xf4", 3));
|
||
NONFAILING(memcpy(host_mem + ADDR_VAR_SYSEXIT, "\x0f\x35\xf4", 3));
|
||
|
||
NONFAILING(*(uint64_t*)(host_mem + ADDR_VAR_VMWRITE_FLD) = 0);
|
||
NONFAILING(*(uint64_t*)(host_mem + ADDR_VAR_VMWRITE_VAL) = 0);
|
||
|
||
if (opt_count > 2)
|
||
opt_count = 2;
|
||
for (i = 0; i < opt_count; i++) {
|
||
uint64_t typ = 0;
|
||
uint64_t val = 0;
|
||
NONFAILING(typ = opt_array_ptr[i].typ);
|
||
NONFAILING(val = opt_array_ptr[i].val);
|
||
switch (typ % 9) {
|
||
case 0:
|
||
sregs.cr0 ^= val & (CR0_MP | CR0_EM | CR0_ET | CR0_NE | CR0_WP | CR0_AM | CR0_NW | CR0_CD);
|
||
break;
|
||
case 1:
|
||
sregs.cr4 ^= val & (CR4_VME | CR4_PVI | CR4_TSD | CR4_DE | CR4_MCE | CR4_PGE | CR4_PCE |
|
||
CR4_OSFXSR | CR4_OSXMMEXCPT | CR4_UMIP | CR4_VMXE | CR4_SMXE | CR4_FSGSBASE | CR4_PCIDE |
|
||
CR4_OSXSAVE | CR4_SMEP | CR4_SMAP | CR4_PKE);
|
||
break;
|
||
case 2:
|
||
sregs.efer ^= val & (EFER_SCE | EFER_NXE | EFER_SVME | EFER_LMSLE | EFER_FFXSR | EFER_TCE);
|
||
break;
|
||
case 3:
|
||
val &= ((1 << 8) | (1 << 9) | (1 << 10) | (1 << 12) | (1 << 13) | (1 << 14) |
|
||
(1 << 15) | (1 << 18) | (1 << 19) | (1 << 20) | (1 << 21));
|
||
regs.rflags ^= val;
|
||
NONFAILING(tss16_addr->flags ^= val);
|
||
NONFAILING(tss16_cpl3_addr->flags ^= val);
|
||
NONFAILING(tss32_addr->flags ^= val);
|
||
NONFAILING(tss32_cpl3_addr->flags ^= val);
|
||
break;
|
||
case 4:
|
||
seg_cs16.type = val & 0xf;
|
||
seg_cs32.type = val & 0xf;
|
||
seg_cs64.type = val & 0xf;
|
||
break;
|
||
case 5:
|
||
seg_cs16_cpl3.type = val & 0xf;
|
||
seg_cs32_cpl3.type = val & 0xf;
|
||
seg_cs64_cpl3.type = val & 0xf;
|
||
break;
|
||
case 6:
|
||
seg_ds16.type = val & 0xf;
|
||
seg_ds32.type = val & 0xf;
|
||
seg_ds64.type = val & 0xf;
|
||
break;
|
||
case 7:
|
||
seg_ds16_cpl3.type = val & 0xf;
|
||
seg_ds32_cpl3.type = val & 0xf;
|
||
seg_ds64_cpl3.type = val & 0xf;
|
||
break;
|
||
case 8:
|
||
NONFAILING(*(uint64_t*)(host_mem + ADDR_VAR_VMWRITE_FLD) = (val & 0xffff));
|
||
NONFAILING(*(uint64_t*)(host_mem + ADDR_VAR_VMWRITE_VAL) = (val >> 16));
|
||
break;
|
||
default:
|
||
fail("bad kvm setup opt");
|
||
}
|
||
}
|
||
regs.rflags |= 2; // bit 1 is always set
|
||
|
||
fill_segment_descriptor(gdt, ldt, &seg_ldt);
|
||
fill_segment_descriptor(gdt, ldt, &seg_cs16);
|
||
fill_segment_descriptor(gdt, ldt, &seg_ds16);
|
||
fill_segment_descriptor(gdt, ldt, &seg_cs16_cpl3);
|
||
fill_segment_descriptor(gdt, ldt, &seg_ds16_cpl3);
|
||
fill_segment_descriptor(gdt, ldt, &seg_cs32);
|
||
fill_segment_descriptor(gdt, ldt, &seg_ds32);
|
||
fill_segment_descriptor(gdt, ldt, &seg_cs32_cpl3);
|
||
fill_segment_descriptor(gdt, ldt, &seg_ds32_cpl3);
|
||
fill_segment_descriptor(gdt, ldt, &seg_cs64);
|
||
fill_segment_descriptor(gdt, ldt, &seg_ds64);
|
||
fill_segment_descriptor(gdt, ldt, &seg_cs64_cpl3);
|
||
fill_segment_descriptor(gdt, ldt, &seg_ds64_cpl3);
|
||
fill_segment_descriptor(gdt, ldt, &seg_tss32);
|
||
fill_segment_descriptor(gdt, ldt, &seg_tss32_2);
|
||
fill_segment_descriptor(gdt, ldt, &seg_tss32_cpl3);
|
||
fill_segment_descriptor(gdt, ldt, &seg_tss32_vm86);
|
||
fill_segment_descriptor(gdt, ldt, &seg_tss16);
|
||
fill_segment_descriptor(gdt, ldt, &seg_tss16_2);
|
||
fill_segment_descriptor(gdt, ldt, &seg_tss16_cpl3);
|
||
fill_segment_descriptor_dword(gdt, ldt, &seg_tss64);
|
||
fill_segment_descriptor_dword(gdt, ldt, &seg_tss64_cpl3);
|
||
fill_segment_descriptor(gdt, ldt, &seg_cgate16);
|
||
fill_segment_descriptor(gdt, ldt, &seg_tgate16);
|
||
fill_segment_descriptor(gdt, ldt, &seg_cgate32);
|
||
fill_segment_descriptor(gdt, ldt, &seg_tgate32);
|
||
fill_segment_descriptor_dword(gdt, ldt, &seg_cgate64);
|
||
|
||
if (ioctl(cpufd, KVM_SET_SREGS, &sregs))
|
||
return -1;
|
||
if (ioctl(cpufd, KVM_SET_REGS, ®s))
|
||
return -1;
|
||
return 0;
|
||
}
|