/* * QEMU dump * * Copyright Fujitsu, Corp. 2011, 2012 * * Authors: * Wen Congyang * * This work is licensed under the terms of the GNU GPL, version 2 or later. * See the COPYING file in the top-level directory. * */ #include "qemu-common.h" #include "elf.h" #include "cpu.h" #include "exec/cpu-all.h" #include "exec/hwaddr.h" #include "monitor/monitor.h" #include "sysemu/kvm.h" #include "sysemu/dump.h" #include "sysemu/sysemu.h" #include "sysemu/memory_mapping.h" #include "sysemu/cpus.h" #include "qapi/error.h" #include "qmp-commands.h" static uint16_t cpu_convert_to_target16(uint16_t val, int endian) { if (endian == ELFDATA2LSB) { val = cpu_to_le16(val); } else { val = cpu_to_be16(val); } return val; } static uint32_t cpu_convert_to_target32(uint32_t val, int endian) { if (endian == ELFDATA2LSB) { val = cpu_to_le32(val); } else { val = cpu_to_be32(val); } return val; } static uint64_t cpu_convert_to_target64(uint64_t val, int endian) { if (endian == ELFDATA2LSB) { val = cpu_to_le64(val); } else { val = cpu_to_be64(val); } return val; } typedef struct DumpState { GuestPhysBlockList guest_phys_blocks; ArchDumpInfo dump_info; MemoryMappingList list; uint16_t phdr_num; uint32_t sh_info; bool have_section; bool resume; size_t note_size; hwaddr memory_offset; int fd; RAMBlock *block; ram_addr_t start; bool has_filter; int64_t begin; int64_t length; Error **errp; } DumpState; static int dump_cleanup(DumpState *s) { int ret = 0; guest_phys_blocks_free(&s->guest_phys_blocks); memory_mapping_list_free(&s->list); if (s->fd != -1) { close(s->fd); } if (s->resume) { vm_start(); } return ret; } static void dump_error(DumpState *s, const char *reason) { dump_cleanup(s); } static int fd_write_vmcore(void *buf, size_t size, void *opaque) { DumpState *s = opaque; size_t written_size; written_size = qemu_write_full(s->fd, buf, size); if (written_size != size) { return -1; } return 0; } static int write_elf64_header(DumpState *s) { Elf64_Ehdr elf_header; int ret; int endian = s->dump_info.d_endian; memset(&elf_header, 0, sizeof(Elf64_Ehdr)); memcpy(&elf_header, ELFMAG, SELFMAG); elf_header.e_ident[EI_CLASS] = ELFCLASS64; elf_header.e_ident[EI_DATA] = s->dump_info.d_endian; elf_header.e_ident[EI_VERSION] = EV_CURRENT; elf_header.e_type = cpu_convert_to_target16(ET_CORE, endian); elf_header.e_machine = cpu_convert_to_target16(s->dump_info.d_machine, endian); elf_header.e_version = cpu_convert_to_target32(EV_CURRENT, endian); elf_header.e_ehsize = cpu_convert_to_target16(sizeof(elf_header), endian); elf_header.e_phoff = cpu_convert_to_target64(sizeof(Elf64_Ehdr), endian); elf_header.e_phentsize = cpu_convert_to_target16(sizeof(Elf64_Phdr), endian); elf_header.e_phnum = cpu_convert_to_target16(s->phdr_num, endian); if (s->have_section) { uint64_t shoff = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr) * s->sh_info; elf_header.e_shoff = cpu_convert_to_target64(shoff, endian); elf_header.e_shentsize = cpu_convert_to_target16(sizeof(Elf64_Shdr), endian); elf_header.e_shnum = cpu_convert_to_target16(1, endian); } ret = fd_write_vmcore(&elf_header, sizeof(elf_header), s); if (ret < 0) { dump_error(s, "dump: failed to write elf header.\n"); return -1; } return 0; } static int write_elf32_header(DumpState *s) { Elf32_Ehdr elf_header; int ret; int endian = s->dump_info.d_endian; memset(&elf_header, 0, sizeof(Elf32_Ehdr)); memcpy(&elf_header, ELFMAG, SELFMAG); elf_header.e_ident[EI_CLASS] = ELFCLASS32; elf_header.e_ident[EI_DATA] = endian; elf_header.e_ident[EI_VERSION] = EV_CURRENT; elf_header.e_type = cpu_convert_to_target16(ET_CORE, endian); elf_header.e_machine = cpu_convert_to_target16(s->dump_info.d_machine, endian); elf_header.e_version = cpu_convert_to_target32(EV_CURRENT, endian); elf_header.e_ehsize = cpu_convert_to_target16(sizeof(elf_header), endian); elf_header.e_phoff = cpu_convert_to_target32(sizeof(Elf32_Ehdr), endian); elf_header.e_phentsize = cpu_convert_to_target16(sizeof(Elf32_Phdr), endian); elf_header.e_phnum = cpu_convert_to_target16(s->phdr_num, endian); if (s->have_section) { uint32_t shoff = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr) * s->sh_info; elf_header.e_shoff = cpu_convert_to_target32(shoff, endian); elf_header.e_shentsize = cpu_convert_to_target16(sizeof(Elf32_Shdr), endian); elf_header.e_shnum = cpu_convert_to_target16(1, endian); } ret = fd_write_vmcore(&elf_header, sizeof(elf_header), s); if (ret < 0) { dump_error(s, "dump: failed to write elf header.\n"); return -1; } return 0; } static int write_elf64_load(DumpState *s, MemoryMapping *memory_mapping, int phdr_index, hwaddr offset, hwaddr filesz) { Elf64_Phdr phdr; int ret; int endian = s->dump_info.d_endian; memset(&phdr, 0, sizeof(Elf64_Phdr)); phdr.p_type = cpu_convert_to_target32(PT_LOAD, endian); phdr.p_offset = cpu_convert_to_target64(offset, endian); phdr.p_paddr = cpu_convert_to_target64(memory_mapping->phys_addr, endian); phdr.p_filesz = cpu_convert_to_target64(filesz, endian); phdr.p_memsz = cpu_convert_to_target64(memory_mapping->length, endian); phdr.p_vaddr = cpu_convert_to_target64(memory_mapping->virt_addr, endian); assert(memory_mapping->length >= filesz); ret = fd_write_vmcore(&phdr, sizeof(Elf64_Phdr), s); if (ret < 0) { dump_error(s, "dump: failed to write program header table.\n"); return -1; } return 0; } static int write_elf32_load(DumpState *s, MemoryMapping *memory_mapping, int phdr_index, hwaddr offset, hwaddr filesz) { Elf32_Phdr phdr; int ret; int endian = s->dump_info.d_endian; memset(&phdr, 0, sizeof(Elf32_Phdr)); phdr.p_type = cpu_convert_to_target32(PT_LOAD, endian); phdr.p_offset = cpu_convert_to_target32(offset, endian); phdr.p_paddr = cpu_convert_to_target32(memory_mapping->phys_addr, endian); phdr.p_filesz = cpu_convert_to_target32(filesz, endian); phdr.p_memsz = cpu_convert_to_target32(memory_mapping->length, endian); phdr.p_vaddr = cpu_convert_to_target32(memory_mapping->virt_addr, endian); assert(memory_mapping->length >= filesz); ret = fd_write_vmcore(&phdr, sizeof(Elf32_Phdr), s); if (ret < 0) { dump_error(s, "dump: failed to write program header table.\n"); return -1; } return 0; } static int write_elf64_note(DumpState *s) { Elf64_Phdr phdr; int endian = s->dump_info.d_endian; hwaddr begin = s->memory_offset - s->note_size; int ret; memset(&phdr, 0, sizeof(Elf64_Phdr)); phdr.p_type = cpu_convert_to_target32(PT_NOTE, endian); phdr.p_offset = cpu_convert_to_target64(begin, endian); phdr.p_paddr = 0; phdr.p_filesz = cpu_convert_to_target64(s->note_size, endian); phdr.p_memsz = cpu_convert_to_target64(s->note_size, endian); phdr.p_vaddr = 0; ret = fd_write_vmcore(&phdr, sizeof(Elf64_Phdr), s); if (ret < 0) { dump_error(s, "dump: failed to write program header table.\n"); return -1; } return 0; } static inline int cpu_index(CPUState *cpu) { return cpu->cpu_index + 1; } static int write_elf64_notes(DumpState *s) { CPUState *cpu; int ret; int id; for (cpu = first_cpu; cpu != NULL; cpu = cpu->next_cpu) { id = cpu_index(cpu); ret = cpu_write_elf64_note(fd_write_vmcore, cpu, id, s); if (ret < 0) { dump_error(s, "dump: failed to write elf notes.\n"); return -1; } } for (cpu = first_cpu; cpu != NULL; cpu = cpu->next_cpu) { ret = cpu_write_elf64_qemunote(fd_write_vmcore, cpu, s); if (ret < 0) { dump_error(s, "dump: failed to write CPU status.\n"); return -1; } } return 0; } static int write_elf32_note(DumpState *s) { hwaddr begin = s->memory_offset - s->note_size; Elf32_Phdr phdr; int endian = s->dump_info.d_endian; int ret; memset(&phdr, 0, sizeof(Elf32_Phdr)); phdr.p_type = cpu_convert_to_target32(PT_NOTE, endian); phdr.p_offset = cpu_convert_to_target32(begin, endian); phdr.p_paddr = 0; phdr.p_filesz = cpu_convert_to_target32(s->note_size, endian); phdr.p_memsz = cpu_convert_to_target32(s->note_size, endian); phdr.p_vaddr = 0; ret = fd_write_vmcore(&phdr, sizeof(Elf32_Phdr), s); if (ret < 0) { dump_error(s, "dump: failed to write program header table.\n"); return -1; } return 0; } static int write_elf32_notes(DumpState *s) { CPUState *cpu; int ret; int id; for (cpu = first_cpu; cpu != NULL; cpu = cpu->next_cpu) { id = cpu_index(cpu); ret = cpu_write_elf32_note(fd_write_vmcore, cpu, id, s); if (ret < 0) { dump_error(s, "dump: failed to write elf notes.\n"); return -1; } } for (cpu = first_cpu; cpu != NULL; cpu = cpu->next_cpu) { ret = cpu_write_elf32_qemunote(fd_write_vmcore, cpu, s); if (ret < 0) { dump_error(s, "dump: failed to write CPU status.\n"); return -1; } } return 0; } static int write_elf_section(DumpState *s, int type) { Elf32_Shdr shdr32; Elf64_Shdr shdr64; int endian = s->dump_info.d_endian; int shdr_size; void *shdr; int ret; if (type == 0) { shdr_size = sizeof(Elf32_Shdr); memset(&shdr32, 0, shdr_size); shdr32.sh_info = cpu_convert_to_target32(s->sh_info, endian); shdr = &shdr32; } else { shdr_size = sizeof(Elf64_Shdr); memset(&shdr64, 0, shdr_size); shdr64.sh_info = cpu_convert_to_target32(s->sh_info, endian); shdr = &shdr64; } ret = fd_write_vmcore(&shdr, shdr_size, s); if (ret < 0) { dump_error(s, "dump: failed to write section header table.\n"); return -1; } return 0; } static int write_data(DumpState *s, void *buf, int length) { int ret; ret = fd_write_vmcore(buf, length, s); if (ret < 0) { dump_error(s, "dump: failed to save memory.\n"); return -1; } return 0; } /* write the memroy to vmcore. 1 page per I/O. */ static int write_memory(DumpState *s, RAMBlock *block, ram_addr_t start, int64_t size) { int64_t i; int ret; for (i = 0; i < size / TARGET_PAGE_SIZE; i++) { ret = write_data(s, block->host + start + i * TARGET_PAGE_SIZE, TARGET_PAGE_SIZE); if (ret < 0) { return ret; } } if ((size % TARGET_PAGE_SIZE) != 0) { ret = write_data(s, block->host + start + i * TARGET_PAGE_SIZE, size % TARGET_PAGE_SIZE); if (ret < 0) { return ret; } } return 0; } /* get the memory's offset and size in the vmcore */ static void get_offset_range(hwaddr phys_addr, ram_addr_t mapping_length, DumpState *s, hwaddr *p_offset, hwaddr *p_filesz) { RAMBlock *block; hwaddr offset = s->memory_offset; int64_t size_in_block, start; /* When the memory is not stored into vmcore, offset will be -1 */ *p_offset = -1; *p_filesz = 0; if (s->has_filter) { if (phys_addr < s->begin || phys_addr >= s->begin + s->length) { return; } } QTAILQ_FOREACH(block, &ram_list.blocks, next) { if (s->has_filter) { if (block->offset >= s->begin + s->length || block->offset + block->length <= s->begin) { /* This block is out of the range */ continue; } if (s->begin <= block->offset) { start = block->offset; } else { start = s->begin; } size_in_block = block->length - (start - block->offset); if (s->begin + s->length < block->offset + block->length) { size_in_block -= block->offset + block->length - (s->begin + s->length); } } else { start = block->offset; size_in_block = block->length; } if (phys_addr >= start && phys_addr < start + size_in_block) { *p_offset = phys_addr - start + offset; /* The offset range mapped from the vmcore file must not spill over * the RAMBlock, clamp it. The rest of the mapping will be * zero-filled in memory at load time; see * . */ *p_filesz = phys_addr + mapping_length <= start + size_in_block ? mapping_length : size_in_block - (phys_addr - start); return; } offset += size_in_block; } } static int write_elf_loads(DumpState *s) { hwaddr offset, filesz; MemoryMapping *memory_mapping; uint32_t phdr_index = 1; int ret; uint32_t max_index; if (s->have_section) { max_index = s->sh_info; } else { max_index = s->phdr_num; } QTAILQ_FOREACH(memory_mapping, &s->list.head, next) { get_offset_range(memory_mapping->phys_addr, memory_mapping->length, s, &offset, &filesz); if (s->dump_info.d_class == ELFCLASS64) { ret = write_elf64_load(s, memory_mapping, phdr_index++, offset, filesz); } else { ret = write_elf32_load(s, memory_mapping, phdr_index++, offset, filesz); } if (ret < 0) { return -1; } if (phdr_index >= max_index) { break; } } return 0; } /* write elf header, PT_NOTE and elf note to vmcore. */ static int dump_begin(DumpState *s) { int ret; /* * the vmcore's format is: * -------------- * | elf header | * -------------- * | PT_NOTE | * -------------- * | PT_LOAD | * -------------- * | ...... | * -------------- * | PT_LOAD | * -------------- * | sec_hdr | * -------------- * | elf note | * -------------- * | memory | * -------------- * * we only know where the memory is saved after we write elf note into * vmcore. */ /* write elf header to vmcore */ if (s->dump_info.d_class == ELFCLASS64) { ret = write_elf64_header(s); } else { ret = write_elf32_header(s); } if (ret < 0) { return -1; } if (s->dump_info.d_class == ELFCLASS64) { /* write PT_NOTE to vmcore */ if (write_elf64_note(s) < 0) { return -1; } /* write all PT_LOAD to vmcore */ if (write_elf_loads(s) < 0) { return -1; } /* write section to vmcore */ if (s->have_section) { if (write_elf_section(s, 1) < 0) { return -1; } } /* write notes to vmcore */ if (write_elf64_notes(s) < 0) { return -1; } } else { /* write PT_NOTE to vmcore */ if (write_elf32_note(s) < 0) { return -1; } /* write all PT_LOAD to vmcore */ if (write_elf_loads(s) < 0) { return -1; } /* write section to vmcore */ if (s->have_section) { if (write_elf_section(s, 0) < 0) { return -1; } } /* write notes to vmcore */ if (write_elf32_notes(s) < 0) { return -1; } } return 0; } /* write PT_LOAD to vmcore */ static int dump_completed(DumpState *s) { dump_cleanup(s); return 0; } static int get_next_block(DumpState *s, RAMBlock *block) { while (1) { block = QTAILQ_NEXT(block, next); if (!block) { /* no more block */ return 1; } s->start = 0; s->block = block; if (s->has_filter) { if (block->offset >= s->begin + s->length || block->offset + block->length <= s->begin) { /* This block is out of the range */ continue; } if (s->begin > block->offset) { s->start = s->begin - block->offset; } } return 0; } } /* write all memory to vmcore */ static int dump_iterate(DumpState *s) { RAMBlock *block; int64_t size; int ret; while (1) { block = s->block; size = block->length; if (s->has_filter) { size -= s->start; if (s->begin + s->length < block->offset + block->length) { size -= block->offset + block->length - (s->begin + s->length); } } ret = write_memory(s, block, s->start, size); if (ret == -1) { return ret; } ret = get_next_block(s, block); if (ret == 1) { dump_completed(s); return 0; } } } static int create_vmcore(DumpState *s) { int ret; ret = dump_begin(s); if (ret < 0) { return -1; } ret = dump_iterate(s); if (ret < 0) { return -1; } return 0; } static ram_addr_t get_start_block(DumpState *s) { RAMBlock *block; if (!s->has_filter) { s->block = QTAILQ_FIRST(&ram_list.blocks); return 0; } QTAILQ_FOREACH(block, &ram_list.blocks, next) { if (block->offset >= s->begin + s->length || block->offset + block->length <= s->begin) { /* This block is out of the range */ continue; } s->block = block; if (s->begin > block->offset) { s->start = s->begin - block->offset; } else { s->start = 0; } return s->start; } return -1; } static int dump_init(DumpState *s, int fd, bool paging, bool has_filter, int64_t begin, int64_t length, Error **errp) { CPUState *cpu; int nr_cpus; Error *err = NULL; int ret; if (runstate_is_running()) { vm_stop(RUN_STATE_SAVE_VM); s->resume = true; } else { s->resume = false; } /* If we use KVM, we should synchronize the registers before we get dump * info or physmap info. */ cpu_synchronize_all_states(); nr_cpus = 0; for (cpu = first_cpu; cpu != NULL; cpu = cpu->next_cpu) { nr_cpus++; } s->errp = errp; s->fd = fd; s->has_filter = has_filter; s->begin = begin; s->length = length; guest_phys_blocks_init(&s->guest_phys_blocks); guest_phys_blocks_append(&s->guest_phys_blocks); s->start = get_start_block(s); if (s->start == -1) { error_set(errp, QERR_INVALID_PARAMETER, "begin"); goto cleanup; } /* get dump info: endian, class and architecture. * If the target architecture is not supported, cpu_get_dump_info() will * return -1. */ ret = cpu_get_dump_info(&s->dump_info); if (ret < 0) { error_set(errp, QERR_UNSUPPORTED); goto cleanup; } s->note_size = cpu_get_note_size(s->dump_info.d_class, s->dump_info.d_machine, nr_cpus); if (ret < 0) { error_set(errp, QERR_UNSUPPORTED); goto cleanup; } /* get memory mapping */ memory_mapping_list_init(&s->list); if (paging) { qemu_get_guest_memory_mapping(&s->list, &err); if (err != NULL) { error_propagate(errp, err); goto cleanup; } } else { qemu_get_guest_simple_memory_mapping(&s->list); } if (s->has_filter) { memory_mapping_filter(&s->list, s->begin, s->length); } /* * calculate phdr_num * * the type of ehdr->e_phnum is uint16_t, so we should avoid overflow */ s->phdr_num = 1; /* PT_NOTE */ if (s->list.num < UINT16_MAX - 2) { s->phdr_num += s->list.num; s->have_section = false; } else { s->have_section = true; s->phdr_num = PN_XNUM; s->sh_info = 1; /* PT_NOTE */ /* the type of shdr->sh_info is uint32_t, so we should avoid overflow */ if (s->list.num <= UINT32_MAX - 1) { s->sh_info += s->list.num; } else { s->sh_info = UINT32_MAX; } } if (s->dump_info.d_class == ELFCLASS64) { if (s->have_section) { s->memory_offset = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr) * s->sh_info + sizeof(Elf64_Shdr) + s->note_size; } else { s->memory_offset = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr) * s->phdr_num + s->note_size; } } else { if (s->have_section) { s->memory_offset = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr) * s->sh_info + sizeof(Elf32_Shdr) + s->note_size; } else { s->memory_offset = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr) * s->phdr_num + s->note_size; } } return 0; cleanup: guest_phys_blocks_free(&s->guest_phys_blocks); if (s->resume) { vm_start(); } return -1; } void qmp_dump_guest_memory(bool paging, const char *file, bool has_begin, int64_t begin, bool has_length, int64_t length, Error **errp) { const char *p; int fd = -1; DumpState *s; int ret; if (has_begin && !has_length) { error_set(errp, QERR_MISSING_PARAMETER, "length"); return; } if (!has_begin && has_length) { error_set(errp, QERR_MISSING_PARAMETER, "begin"); return; } #if !defined(WIN32) if (strstart(file, "fd:", &p)) { fd = monitor_get_fd(cur_mon, p, errp); if (fd == -1) { return; } } #endif if (strstart(file, "file:", &p)) { fd = qemu_open(p, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR); if (fd < 0) { error_setg_file_open(errp, errno, p); return; } } if (fd == -1) { error_set(errp, QERR_INVALID_PARAMETER, "protocol"); return; } s = g_malloc0(sizeof(DumpState)); ret = dump_init(s, fd, paging, has_begin, begin, length, errp); if (ret < 0) { g_free(s); return; } if (create_vmcore(s) < 0 && !error_is_set(s->errp)) { error_set(errp, QERR_IO_ERROR); } g_free(s); }