Merge remote-tracking branch 'stefanha/block' into staging

# By Fam Zheng (8) and others
# Via Stefan Hajnoczi
* stefanha/block:
  qemu-iotests: Filter out 'adapter_type'
  nbd: support large NBD requests
  nbd: use g_slice_new() instead of a freelist
  qemu-iotests: Filter out vmdk creation options
  vmdk: add bdrv_co_write_zeroes
  vmdk: store fields of VmdkMetaData in cpu endian
  vmdk: change magic number to macro
  vmdk: Add option to create zeroed-grain image
  vmdk: add support for “zeroed‐grain” GTE
  vmdk: named return code.
  blockdev: Replace "undefined error" in qmp_block_resize
  block: add read-only support to VHDX image format.
  block: initial VHDX driver support framework - supports open and probe
  block: vhdx header for the QEMU support of VHDX images
  qemu: add castagnoli crc32c checksum algorithm
This commit is contained in:
Anthony Liguori 2013-05-03 11:20:02 -05:00
commit 25565e8595
11 changed files with 1618 additions and 87 deletions

View File

@ -2,6 +2,7 @@ block-obj-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o vvfat
block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o
block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
block-obj-y += qed-check.o
block-obj-y += vhdx.o
block-obj-y += parallels.o blkdebug.o blkverify.o
block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
block-obj-$(CONFIG_POSIX) += raw-posix.o

972
block/vhdx.c Normal file
View File

@ -0,0 +1,972 @@
/*
* Block driver for Hyper-V VHDX Images
*
* Copyright (c) 2013 Red Hat, Inc.,
*
* Authors:
* Jeff Cody <jcody@redhat.com>
*
* This is based on the "VHDX Format Specification v0.95", published 4/12/2012
* by Microsoft:
* https://www.microsoft.com/en-us/download/details.aspx?id=29681
*
* This work is licensed under the terms of the GNU LGPL, version 2 or later.
* See the COPYING.LIB file in the top-level directory.
*
*/
#include "qemu-common.h"
#include "block/block_int.h"
#include "qemu/module.h"
#include "qemu/crc32c.h"
#include "block/vhdx.h"
/* Several metadata and region table data entries are identified by
* guids in a MS-specific GUID format. */
/* ------- Known Region Table GUIDs ---------------------- */
static const MSGUID bat_guid = { .data1 = 0x2dc27766,
.data2 = 0xf623,
.data3 = 0x4200,
.data4 = { 0x9d, 0x64, 0x11, 0x5e,
0x9b, 0xfd, 0x4a, 0x08} };
static const MSGUID metadata_guid = { .data1 = 0x8b7ca206,
.data2 = 0x4790,
.data3 = 0x4b9a,
.data4 = { 0xb8, 0xfe, 0x57, 0x5f,
0x05, 0x0f, 0x88, 0x6e} };
/* ------- Known Metadata Entry GUIDs ---------------------- */
static const MSGUID file_param_guid = { .data1 = 0xcaa16737,
.data2 = 0xfa36,
.data3 = 0x4d43,
.data4 = { 0xb3, 0xb6, 0x33, 0xf0,
0xaa, 0x44, 0xe7, 0x6b} };
static const MSGUID virtual_size_guid = { .data1 = 0x2FA54224,
.data2 = 0xcd1b,
.data3 = 0x4876,
.data4 = { 0xb2, 0x11, 0x5d, 0xbe,
0xd8, 0x3b, 0xf4, 0xb8} };
static const MSGUID page83_guid = { .data1 = 0xbeca12ab,
.data2 = 0xb2e6,
.data3 = 0x4523,
.data4 = { 0x93, 0xef, 0xc3, 0x09,
0xe0, 0x00, 0xc7, 0x46} };
static const MSGUID phys_sector_guid = { .data1 = 0xcda348c7,
.data2 = 0x445d,
.data3 = 0x4471,
.data4 = { 0x9c, 0xc9, 0xe9, 0x88,
0x52, 0x51, 0xc5, 0x56} };
static const MSGUID parent_locator_guid = { .data1 = 0xa8d35f2d,
.data2 = 0xb30b,
.data3 = 0x454d,
.data4 = { 0xab, 0xf7, 0xd3,
0xd8, 0x48, 0x34,
0xab, 0x0c} };
static const MSGUID logical_sector_guid = { .data1 = 0x8141bf1d,
.data2 = 0xa96f,
.data3 = 0x4709,
.data4 = { 0xba, 0x47, 0xf2,
0x33, 0xa8, 0xfa,
0xab, 0x5f} };
/* Each parent type must have a valid GUID; this is for parent images
* of type 'VHDX'. If we were to allow e.g. a QCOW2 parent, we would
* need to make up our own QCOW2 GUID type */
static const MSGUID parent_vhdx_guid = { .data1 = 0xb04aefb7,
.data2 = 0xd19e,
.data3 = 0x4a81,
.data4 = { 0xb7, 0x89, 0x25, 0xb8,
0xe9, 0x44, 0x59, 0x13} };
#define META_FILE_PARAMETER_PRESENT 0x01
#define META_VIRTUAL_DISK_SIZE_PRESENT 0x02
#define META_PAGE_83_PRESENT 0x04
#define META_LOGICAL_SECTOR_SIZE_PRESENT 0x08
#define META_PHYS_SECTOR_SIZE_PRESENT 0x10
#define META_PARENT_LOCATOR_PRESENT 0x20
#define META_ALL_PRESENT \
(META_FILE_PARAMETER_PRESENT | META_VIRTUAL_DISK_SIZE_PRESENT | \
META_PAGE_83_PRESENT | META_LOGICAL_SECTOR_SIZE_PRESENT | \
META_PHYS_SECTOR_SIZE_PRESENT)
typedef struct VHDXMetadataEntries {
VHDXMetadataTableEntry file_parameters_entry;
VHDXMetadataTableEntry virtual_disk_size_entry;
VHDXMetadataTableEntry page83_data_entry;
VHDXMetadataTableEntry logical_sector_size_entry;
VHDXMetadataTableEntry phys_sector_size_entry;
VHDXMetadataTableEntry parent_locator_entry;
uint16_t present;
} VHDXMetadataEntries;
typedef struct VHDXSectorInfo {
uint32_t bat_idx; /* BAT entry index */
uint32_t sectors_avail; /* sectors available in payload block */
uint32_t bytes_left; /* bytes left in the block after data to r/w */
uint32_t bytes_avail; /* bytes available in payload block */
uint64_t file_offset; /* absolute offset in bytes, in file */
uint64_t block_offset; /* block offset, in bytes */
} VHDXSectorInfo;
typedef struct BDRVVHDXState {
CoMutex lock;
int curr_header;
VHDXHeader *headers[2];
VHDXRegionTableHeader rt;
VHDXRegionTableEntry bat_rt; /* region table for the BAT */
VHDXRegionTableEntry metadata_rt; /* region table for the metadata */
VHDXMetadataTableHeader metadata_hdr;
VHDXMetadataEntries metadata_entries;
VHDXFileParameters params;
uint32_t block_size;
uint32_t block_size_bits;
uint32_t sectors_per_block;
uint32_t sectors_per_block_bits;
uint64_t virtual_disk_size;
uint32_t logical_sector_size;
uint32_t physical_sector_size;
uint64_t chunk_ratio;
uint32_t chunk_ratio_bits;
uint32_t logical_sector_size_bits;
uint32_t bat_entries;
VHDXBatEntry *bat;
uint64_t bat_offset;
VHDXParentLocatorHeader parent_header;
VHDXParentLocatorEntry *parent_entries;
} BDRVVHDXState;
uint32_t vhdx_checksum_calc(uint32_t crc, uint8_t *buf, size_t size,
int crc_offset)
{
uint32_t crc_new;
uint32_t crc_orig;
assert(buf != NULL);
if (crc_offset > 0) {
memcpy(&crc_orig, buf + crc_offset, sizeof(crc_orig));
memset(buf + crc_offset, 0, sizeof(crc_orig));
}
crc_new = crc32c(crc, buf, size);
if (crc_offset > 0) {
memcpy(buf + crc_offset, &crc_orig, sizeof(crc_orig));
}
return crc_new;
}
/* Validates the checksum of the buffer, with an in-place CRC.
*
* Zero is substituted during crc calculation for the original crc field,
* and the crc field is restored afterwards. But the buffer will be modifed
* during the calculation, so this may not be not suitable for multi-threaded
* use.
*
* crc_offset: byte offset in buf of the buffer crc
* buf: buffer pointer
* size: size of buffer (must be > crc_offset+4)
*
* returns true if checksum is valid, false otherwise
*/
bool vhdx_checksum_is_valid(uint8_t *buf, size_t size, int crc_offset)
{
uint32_t crc_orig;
uint32_t crc;
assert(buf != NULL);
assert(size > (crc_offset + 4));
memcpy(&crc_orig, buf + crc_offset, sizeof(crc_orig));
crc_orig = le32_to_cpu(crc_orig);
crc = vhdx_checksum_calc(0xffffffff, buf, size, crc_offset);
return crc == crc_orig;
}
/*
* Per the MS VHDX Specification, for every VHDX file:
* - The header section is fixed size - 1 MB
* - The header section is always the first "object"
* - The first 64KB of the header is the File Identifier
* - The first uint64 (8 bytes) is the VHDX Signature ("vhdxfile")
* - The following 512 bytes constitute a UTF-16 string identifiying the
* software that created the file, and is optional and diagnostic only.
*
* Therefore, we probe by looking for the vhdxfile signature "vhdxfile"
*/
static int vhdx_probe(const uint8_t *buf, int buf_size, const char *filename)
{
if (buf_size >= 8 && !memcmp(buf, "vhdxfile", 8)) {
return 100;
}
return 0;
}
/* All VHDX structures on disk are little endian */
static void vhdx_header_le_import(VHDXHeader *h)
{
assert(h != NULL);
le32_to_cpus(&h->signature);
le32_to_cpus(&h->checksum);
le64_to_cpus(&h->sequence_number);
leguid_to_cpus(&h->file_write_guid);
leguid_to_cpus(&h->data_write_guid);
leguid_to_cpus(&h->log_guid);
le16_to_cpus(&h->log_version);
le16_to_cpus(&h->version);
le32_to_cpus(&h->log_length);
le64_to_cpus(&h->log_offset);
}
/* opens the specified header block from the VHDX file header section */
static int vhdx_parse_header(BlockDriverState *bs, BDRVVHDXState *s)
{
int ret = 0;
VHDXHeader *header1;
VHDXHeader *header2;
bool h1_valid = false;
bool h2_valid = false;
uint64_t h1_seq = 0;
uint64_t h2_seq = 0;
uint8_t *buffer;
header1 = qemu_blockalign(bs, sizeof(VHDXHeader));
header2 = qemu_blockalign(bs, sizeof(VHDXHeader));
buffer = qemu_blockalign(bs, VHDX_HEADER_SIZE);
s->headers[0] = header1;
s->headers[1] = header2;
/* We have to read the whole VHDX_HEADER_SIZE instead of
* sizeof(VHDXHeader), because the checksum is over the whole
* region */
ret = bdrv_pread(bs->file, VHDX_HEADER1_OFFSET, buffer, VHDX_HEADER_SIZE);
if (ret < 0) {
goto fail;
}
/* copy over just the relevant portion that we need */
memcpy(header1, buffer, sizeof(VHDXHeader));
vhdx_header_le_import(header1);
if (vhdx_checksum_is_valid(buffer, VHDX_HEADER_SIZE, 4) &&
!memcmp(&header1->signature, "head", 4) &&
header1->version == 1) {
h1_seq = header1->sequence_number;
h1_valid = true;
}
ret = bdrv_pread(bs->file, VHDX_HEADER2_OFFSET, buffer, VHDX_HEADER_SIZE);
if (ret < 0) {
goto fail;
}
/* copy over just the relevant portion that we need */
memcpy(header2, buffer, sizeof(VHDXHeader));
vhdx_header_le_import(header2);
if (vhdx_checksum_is_valid(buffer, VHDX_HEADER_SIZE, 4) &&
!memcmp(&header2->signature, "head", 4) &&
header2->version == 1) {
h2_seq = header2->sequence_number;
h2_valid = true;
}
/* If there is only 1 valid header (or no valid headers), we
* don't care what the sequence numbers are */
if (h1_valid && !h2_valid) {
s->curr_header = 0;
} else if (!h1_valid && h2_valid) {
s->curr_header = 1;
} else if (!h1_valid && !h2_valid) {
ret = -EINVAL;
goto fail;
} else {
/* If both headers are valid, then we choose the active one by the
* highest sequence number. If the sequence numbers are equal, that is
* invalid */
if (h1_seq > h2_seq) {
s->curr_header = 0;
} else if (h2_seq > h1_seq) {
s->curr_header = 1;
} else {
ret = -EINVAL;
goto fail;
}
}
ret = 0;
goto exit;
fail:
qerror_report(ERROR_CLASS_GENERIC_ERROR, "No valid VHDX header found");
qemu_vfree(header1);
qemu_vfree(header2);
s->headers[0] = NULL;
s->headers[1] = NULL;
exit:
qemu_vfree(buffer);
return ret;
}
static int vhdx_open_region_tables(BlockDriverState *bs, BDRVVHDXState *s)
{
int ret = 0;
uint8_t *buffer;
int offset = 0;
VHDXRegionTableEntry rt_entry;
uint32_t i;
bool bat_rt_found = false;
bool metadata_rt_found = false;
/* We have to read the whole 64KB block, because the crc32 is over the
* whole block */
buffer = qemu_blockalign(bs, VHDX_HEADER_BLOCK_SIZE);
ret = bdrv_pread(bs->file, VHDX_REGION_TABLE_OFFSET, buffer,
VHDX_HEADER_BLOCK_SIZE);
if (ret < 0) {
goto fail;
}
memcpy(&s->rt, buffer, sizeof(s->rt));
le32_to_cpus(&s->rt.signature);
le32_to_cpus(&s->rt.checksum);
le32_to_cpus(&s->rt.entry_count);
le32_to_cpus(&s->rt.reserved);
offset += sizeof(s->rt);
if (!vhdx_checksum_is_valid(buffer, VHDX_HEADER_BLOCK_SIZE, 4) ||
memcmp(&s->rt.signature, "regi", 4)) {
ret = -EINVAL;
goto fail;
}
/* Per spec, maximum region table entry count is 2047 */
if (s->rt.entry_count > 2047) {
ret = -EINVAL;
goto fail;
}
for (i = 0; i < s->rt.entry_count; i++) {
memcpy(&rt_entry, buffer + offset, sizeof(rt_entry));
offset += sizeof(rt_entry);
leguid_to_cpus(&rt_entry.guid);
le64_to_cpus(&rt_entry.file_offset);
le32_to_cpus(&rt_entry.length);
le32_to_cpus(&rt_entry.data_bits);
/* see if we recognize the entry */
if (guid_eq(rt_entry.guid, bat_guid)) {
/* must be unique; if we have already found it this is invalid */
if (bat_rt_found) {
ret = -EINVAL;
goto fail;
}
bat_rt_found = true;
s->bat_rt = rt_entry;
continue;
}
if (guid_eq(rt_entry.guid, metadata_guid)) {
/* must be unique; if we have already found it this is invalid */
if (metadata_rt_found) {
ret = -EINVAL;
goto fail;
}
metadata_rt_found = true;
s->metadata_rt = rt_entry;
continue;
}
if (rt_entry.data_bits & VHDX_REGION_ENTRY_REQUIRED) {
/* cannot read vhdx file - required region table entry that
* we do not understand. per spec, we must fail to open */
ret = -ENOTSUP;
goto fail;
}
}
ret = 0;
fail:
qemu_vfree(buffer);
return ret;
}
/* Metadata initial parser
*
* This loads all the metadata entry fields. This may cause additional
* fields to be processed (e.g. parent locator, etc..).
*
* There are 5 Metadata items that are always required:
* - File Parameters (block size, has a parent)
* - Virtual Disk Size (size, in bytes, of the virtual drive)
* - Page 83 Data (scsi page 83 guid)
* - Logical Sector Size (logical sector size in bytes, either 512 or
* 4096. We only support 512 currently)
* - Physical Sector Size (512 or 4096)
*
* Also, if the File Parameters indicate this is a differencing file,
* we must also look for the Parent Locator metadata item.
*/
static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s)
{
int ret = 0;
uint8_t *buffer;
int offset = 0;
uint32_t i = 0;
VHDXMetadataTableEntry md_entry;
buffer = qemu_blockalign(bs, VHDX_METADATA_TABLE_MAX_SIZE);
ret = bdrv_pread(bs->file, s->metadata_rt.file_offset, buffer,
VHDX_METADATA_TABLE_MAX_SIZE);
if (ret < 0) {
goto exit;
}
memcpy(&s->metadata_hdr, buffer, sizeof(s->metadata_hdr));
offset += sizeof(s->metadata_hdr);
le64_to_cpus(&s->metadata_hdr.signature);
le16_to_cpus(&s->metadata_hdr.reserved);
le16_to_cpus(&s->metadata_hdr.entry_count);
if (memcmp(&s->metadata_hdr.signature, "metadata", 8)) {
ret = -EINVAL;
goto exit;
}
s->metadata_entries.present = 0;
if ((s->metadata_hdr.entry_count * sizeof(md_entry)) >
(VHDX_METADATA_TABLE_MAX_SIZE - offset)) {
ret = -EINVAL;
goto exit;
}
for (i = 0; i < s->metadata_hdr.entry_count; i++) {
memcpy(&md_entry, buffer + offset, sizeof(md_entry));
offset += sizeof(md_entry);
leguid_to_cpus(&md_entry.item_id);
le32_to_cpus(&md_entry.offset);
le32_to_cpus(&md_entry.length);
le32_to_cpus(&md_entry.data_bits);
le32_to_cpus(&md_entry.reserved2);
if (guid_eq(md_entry.item_id, file_param_guid)) {
if (s->metadata_entries.present & META_FILE_PARAMETER_PRESENT) {
ret = -EINVAL;
goto exit;
}
s->metadata_entries.file_parameters_entry = md_entry;
s->metadata_entries.present |= META_FILE_PARAMETER_PRESENT;
continue;
}
if (guid_eq(md_entry.item_id, virtual_size_guid)) {
if (s->metadata_entries.present & META_VIRTUAL_DISK_SIZE_PRESENT) {
ret = -EINVAL;
goto exit;
}
s->metadata_entries.virtual_disk_size_entry = md_entry;
s->metadata_entries.present |= META_VIRTUAL_DISK_SIZE_PRESENT;
continue;
}
if (guid_eq(md_entry.item_id, page83_guid)) {
if (s->metadata_entries.present & META_PAGE_83_PRESENT) {
ret = -EINVAL;
goto exit;
}
s->metadata_entries.page83_data_entry = md_entry;
s->metadata_entries.present |= META_PAGE_83_PRESENT;
continue;
}
if (guid_eq(md_entry.item_id, logical_sector_guid)) {
if (s->metadata_entries.present &
META_LOGICAL_SECTOR_SIZE_PRESENT) {
ret = -EINVAL;
goto exit;
}
s->metadata_entries.logical_sector_size_entry = md_entry;
s->metadata_entries.present |= META_LOGICAL_SECTOR_SIZE_PRESENT;
continue;
}
if (guid_eq(md_entry.item_id, phys_sector_guid)) {
if (s->metadata_entries.present & META_PHYS_SECTOR_SIZE_PRESENT) {
ret = -EINVAL;
goto exit;
}
s->metadata_entries.phys_sector_size_entry = md_entry;
s->metadata_entries.present |= META_PHYS_SECTOR_SIZE_PRESENT;
continue;
}
if (guid_eq(md_entry.item_id, parent_locator_guid)) {
if (s->metadata_entries.present & META_PARENT_LOCATOR_PRESENT) {
ret = -EINVAL;
goto exit;
}
s->metadata_entries.parent_locator_entry = md_entry;
s->metadata_entries.present |= META_PARENT_LOCATOR_PRESENT;
continue;
}
if (md_entry.data_bits & VHDX_META_FLAGS_IS_REQUIRED) {
/* cannot read vhdx file - required region table entry that
* we do not understand. per spec, we must fail to open */
ret = -ENOTSUP;
goto exit;
}
}
if (s->metadata_entries.present != META_ALL_PRESENT) {
ret = -ENOTSUP;
goto exit;
}
ret = bdrv_pread(bs->file,
s->metadata_entries.file_parameters_entry.offset
+ s->metadata_rt.file_offset,
&s->params,
sizeof(s->params));
if (ret < 0) {
goto exit;
}
le32_to_cpus(&s->params.block_size);
le32_to_cpus(&s->params.data_bits);
/* We now have the file parameters, so we can tell if this is a
* differencing file (i.e.. has_parent), is dynamic or fixed
* sized (leave_blocks_allocated), and the block size */
/* The parent locator required iff the file parameters has_parent set */
if (s->params.data_bits & VHDX_PARAMS_HAS_PARENT) {
if (s->metadata_entries.present & META_PARENT_LOCATOR_PRESENT) {
/* TODO: parse parent locator fields */
ret = -ENOTSUP; /* temp, until differencing files are supported */
goto exit;
} else {
/* if has_parent is set, but there is not parent locator present,
* then that is an invalid combination */
ret = -EINVAL;
goto exit;
}
}
/* determine virtual disk size, logical sector size,
* and phys sector size */
ret = bdrv_pread(bs->file,
s->metadata_entries.virtual_disk_size_entry.offset
+ s->metadata_rt.file_offset,
&s->virtual_disk_size,
sizeof(uint64_t));
if (ret < 0) {
goto exit;
}
ret = bdrv_pread(bs->file,
s->metadata_entries.logical_sector_size_entry.offset
+ s->metadata_rt.file_offset,
&s->logical_sector_size,
sizeof(uint32_t));
if (ret < 0) {
goto exit;
}
ret = bdrv_pread(bs->file,
s->metadata_entries.phys_sector_size_entry.offset
+ s->metadata_rt.file_offset,
&s->physical_sector_size,
sizeof(uint32_t));
if (ret < 0) {
goto exit;
}
le64_to_cpus(&s->virtual_disk_size);
le32_to_cpus(&s->logical_sector_size);
le32_to_cpus(&s->physical_sector_size);
if (s->logical_sector_size == 0 || s->params.block_size == 0) {
ret = -EINVAL;
goto exit;
}
/* both block_size and sector_size are guaranteed powers of 2 */
s->sectors_per_block = s->params.block_size / s->logical_sector_size;
s->chunk_ratio = (VHDX_MAX_SECTORS_PER_BLOCK) *
(uint64_t)s->logical_sector_size /
(uint64_t)s->params.block_size;
/* These values are ones we will want to use for division / multiplication
* later on, and they are all guaranteed (per the spec) to be powers of 2,
* so we can take advantage of that for shift operations during
* reads/writes */
if (s->logical_sector_size & (s->logical_sector_size - 1)) {
ret = -EINVAL;
goto exit;
}
if (s->sectors_per_block & (s->sectors_per_block - 1)) {
ret = -EINVAL;
goto exit;
}
if (s->chunk_ratio & (s->chunk_ratio - 1)) {
ret = -EINVAL;
goto exit;
}
s->block_size = s->params.block_size;
if (s->block_size & (s->block_size - 1)) {
ret = -EINVAL;
goto exit;
}
s->logical_sector_size_bits = 31 - clz32(s->logical_sector_size);
s->sectors_per_block_bits = 31 - clz32(s->sectors_per_block);
s->chunk_ratio_bits = 63 - clz64(s->chunk_ratio);
s->block_size_bits = 31 - clz32(s->block_size);
ret = 0;
exit:
qemu_vfree(buffer);
return ret;
}
/* Parse the replay log. Per the VHDX spec, if the log is present
* it must be replayed prior to opening the file, even read-only.
*
* If read-only, we must replay the log in RAM (or refuse to open
* a dirty VHDX file read-only */
static int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s)
{
int ret = 0;
int i;
VHDXHeader *hdr;
hdr = s->headers[s->curr_header];
/* either the log guid, or log length is zero,
* then a replay log is present */
for (i = 0; i < sizeof(hdr->log_guid.data4); i++) {
ret |= hdr->log_guid.data4[i];
}
if (hdr->log_guid.data1 == 0 &&
hdr->log_guid.data2 == 0 &&
hdr->log_guid.data3 == 0 &&
ret == 0) {
goto exit;
}
/* per spec, only log version of 0 is supported */
if (hdr->log_version != 0) {
ret = -EINVAL;
goto exit;
}
if (hdr->log_length == 0) {
goto exit;
}
/* We currently do not support images with logs to replay */
ret = -ENOTSUP;
exit:
return ret;
}
static int vhdx_open(BlockDriverState *bs, QDict *options, int flags)
{
BDRVVHDXState *s = bs->opaque;
int ret = 0;
uint32_t i;
uint64_t signature;
uint32_t data_blocks_cnt, bitmap_blocks_cnt;
s->bat = NULL;
qemu_co_mutex_init(&s->lock);
/* validate the file signature */
ret = bdrv_pread(bs->file, 0, &signature, sizeof(uint64_t));
if (ret < 0) {
goto fail;
}
if (memcmp(&signature, "vhdxfile", 8)) {
ret = -EINVAL;
goto fail;
}
ret = vhdx_parse_header(bs, s);
if (ret) {
goto fail;
}
ret = vhdx_parse_log(bs, s);
if (ret) {
goto fail;
}
ret = vhdx_open_region_tables(bs, s);
if (ret) {
goto fail;
}
ret = vhdx_parse_metadata(bs, s);
if (ret) {
goto fail;
}
s->block_size = s->params.block_size;
/* the VHDX spec dictates that virtual_disk_size is always a multiple of
* logical_sector_size */
bs->total_sectors = s->virtual_disk_size >> s->logical_sector_size_bits;
data_blocks_cnt = s->virtual_disk_size >> s->block_size_bits;
if (s->virtual_disk_size - (data_blocks_cnt << s->block_size_bits)) {
data_blocks_cnt++;
}
bitmap_blocks_cnt = data_blocks_cnt >> s->chunk_ratio_bits;
if (data_blocks_cnt - (bitmap_blocks_cnt << s->chunk_ratio_bits)) {
bitmap_blocks_cnt++;
}
if (s->parent_entries) {
s->bat_entries = bitmap_blocks_cnt * (s->chunk_ratio + 1);
} else {
s->bat_entries = data_blocks_cnt +
((data_blocks_cnt - 1) >> s->chunk_ratio_bits);
}
s->bat_offset = s->bat_rt.file_offset;
if (s->bat_entries > s->bat_rt.length / sizeof(VHDXBatEntry)) {
/* BAT allocation is not large enough for all entries */
ret = -EINVAL;
goto fail;
}
s->bat = qemu_blockalign(bs, s->bat_rt.length);
ret = bdrv_pread(bs->file, s->bat_offset, s->bat, s->bat_rt.length);
if (ret < 0) {
goto fail;
}
for (i = 0; i < s->bat_entries; i++) {
le64_to_cpus(&s->bat[i]);
}
if (flags & BDRV_O_RDWR) {
ret = -ENOTSUP;
goto fail;
}
/* TODO: differencing files, write */
return 0;
fail:
qemu_vfree(s->headers[0]);
qemu_vfree(s->headers[1]);
qemu_vfree(s->bat);
qemu_vfree(s->parent_entries);
return ret;
}
static int vhdx_reopen_prepare(BDRVReopenState *state,
BlockReopenQueue *queue, Error **errp)
{
return 0;
}
/*
* Perform sector to block offset translations, to get various
* sector and file offsets into the image. See VHDXSectorInfo
*/
static void vhdx_block_translate(BDRVVHDXState *s, int64_t sector_num,
int nb_sectors, VHDXSectorInfo *sinfo)
{
uint32_t block_offset;
sinfo->bat_idx = sector_num >> s->sectors_per_block_bits;
/* effectively a modulo - this gives us the offset into the block
* (in sector sizes) for our sector number */
block_offset = sector_num - (sinfo->bat_idx << s->sectors_per_block_bits);
/* the chunk ratio gives us the interleaving of the sector
* bitmaps, so we need to advance our page block index by the
* sector bitmaps entry number */
sinfo->bat_idx += sinfo->bat_idx >> s->chunk_ratio_bits;
/* the number of sectors we can read/write in this cycle */
sinfo->sectors_avail = s->sectors_per_block - block_offset;
sinfo->bytes_left = sinfo->sectors_avail << s->logical_sector_size_bits;
if (sinfo->sectors_avail > nb_sectors) {
sinfo->sectors_avail = nb_sectors;
}
sinfo->bytes_avail = sinfo->sectors_avail << s->logical_sector_size_bits;
sinfo->file_offset = s->bat[sinfo->bat_idx] >> VHDX_BAT_FILE_OFF_BITS;
sinfo->block_offset = block_offset << s->logical_sector_size_bits;
/* The file offset must be past the header section, so must be > 0 */
if (sinfo->file_offset == 0) {
return;
}
/* block offset is the offset in vhdx logical sectors, in
* the payload data block. Convert that to a byte offset
* in the block, and add in the payload data block offset
* in the file, in bytes, to get the final read address */
sinfo->file_offset <<= 20; /* now in bytes, rather than 1MB units */
sinfo->file_offset += sinfo->block_offset;
}
static coroutine_fn int vhdx_co_readv(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, QEMUIOVector *qiov)
{
BDRVVHDXState *s = bs->opaque;
int ret = 0;
VHDXSectorInfo sinfo;
uint64_t bytes_done = 0;
QEMUIOVector hd_qiov;
qemu_iovec_init(&hd_qiov, qiov->niov);
qemu_co_mutex_lock(&s->lock);
while (nb_sectors > 0) {
/* We are a differencing file, so we need to inspect the sector bitmap
* to see if we have the data or not */
if (s->params.data_bits & VHDX_PARAMS_HAS_PARENT) {
/* not supported yet */
ret = -ENOTSUP;
goto exit;
} else {
vhdx_block_translate(s, sector_num, nb_sectors, &sinfo);
qemu_iovec_reset(&hd_qiov);
qemu_iovec_concat(&hd_qiov, qiov, bytes_done, sinfo.bytes_avail);
/* check the payload block state */
switch (s->bat[sinfo.bat_idx] & VHDX_BAT_STATE_BIT_MASK) {
case PAYLOAD_BLOCK_NOT_PRESENT: /* fall through */
case PAYLOAD_BLOCK_UNDEFINED: /* fall through */
case PAYLOAD_BLOCK_UNMAPPED: /* fall through */
case PAYLOAD_BLOCK_ZERO:
/* return zero */
qemu_iovec_memset(&hd_qiov, 0, 0, sinfo.bytes_avail);
break;
case PAYLOAD_BLOCK_FULL_PRESENT:
qemu_co_mutex_unlock(&s->lock);
ret = bdrv_co_readv(bs->file,
sinfo.file_offset >> BDRV_SECTOR_BITS,
sinfo.sectors_avail, &hd_qiov);
qemu_co_mutex_lock(&s->lock);
if (ret < 0) {
goto exit;
}
break;
case PAYLOAD_BLOCK_PARTIALLY_PRESENT:
/* we don't yet support difference files, fall through
* to error */
default:
ret = -EIO;
goto exit;
break;
}
nb_sectors -= sinfo.sectors_avail;
sector_num += sinfo.sectors_avail;
bytes_done += sinfo.bytes_avail;
}
}
ret = 0;
exit:
qemu_co_mutex_unlock(&s->lock);
qemu_iovec_destroy(&hd_qiov);
return ret;
}
static coroutine_fn int vhdx_co_writev(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, QEMUIOVector *qiov)
{
return -ENOTSUP;
}
static void vhdx_close(BlockDriverState *bs)
{
BDRVVHDXState *s = bs->opaque;
qemu_vfree(s->headers[0]);
qemu_vfree(s->headers[1]);
qemu_vfree(s->bat);
qemu_vfree(s->parent_entries);
}
static BlockDriver bdrv_vhdx = {
.format_name = "vhdx",
.instance_size = sizeof(BDRVVHDXState),
.bdrv_probe = vhdx_probe,
.bdrv_open = vhdx_open,
.bdrv_close = vhdx_close,
.bdrv_reopen_prepare = vhdx_reopen_prepare,
.bdrv_co_readv = vhdx_co_readv,
.bdrv_co_writev = vhdx_co_writev,
};
static void bdrv_vhdx_init(void)
{
bdrv_register(&bdrv_vhdx);
}
block_init(bdrv_vhdx_init);

325
block/vhdx.h Normal file
View File

@ -0,0 +1,325 @@
/*
* Block driver for Hyper-V VHDX Images
*
* Copyright (c) 2013 Red Hat, Inc.,
*
* Authors:
* Jeff Cody <jcody@redhat.com>
*
* This is based on the "VHDX Format Specification v0.95", published 4/12/2012
* by Microsoft:
* https://www.microsoft.com/en-us/download/details.aspx?id=29681
*
* This work is licensed under the terms of the GNU LGPL, version 2 or later.
* See the COPYING.LIB file in the top-level directory.
*
*/
#ifndef BLOCK_VHDX_H
#define BLOCK_VHDX_H
/* Structures and fields present in the VHDX file */
/* The header section has the following blocks,
* each block is 64KB:
*
* _____________________________________________________________________________
* | File Id. | Header 1 | Header 2 | Region Table | Reserved (768KB) |
* |----------|---------------|------------|--------------|--------------------|
* | | | | | |
* 0.........64KB...........128KB........192KB..........256KB................1MB
*/
#define VHDX_HEADER_BLOCK_SIZE (64*1024)
#define VHDX_FILE_ID_OFFSET 0
#define VHDX_HEADER1_OFFSET (VHDX_HEADER_BLOCK_SIZE*1)
#define VHDX_HEADER2_OFFSET (VHDX_HEADER_BLOCK_SIZE*2)
#define VHDX_REGION_TABLE_OFFSET (VHDX_HEADER_BLOCK_SIZE*3)
/*
* A note on the use of MS-GUID fields. For more details on the GUID,
* please see: https://en.wikipedia.org/wiki/Globally_unique_identifier.
*
* The VHDX specification only states that these are MS GUIDs, and which
* bytes are data1-data4. It makes no mention of what algorithm should be used
* to generate the GUID, nor what standard. However, looking at the specified
* known GUID fields, it appears the GUIDs are:
* Standard/DCE GUID type (noted by 10b in the MSB of byte 0 of .data4)
* Random algorithm (noted by 0x4XXX for .data3)
*/
/* ---- HEADER SECTION STRUCTURES ---- */
/* These structures are ones that are defined in the VHDX specification
* document */
typedef struct VHDXFileIdentifier {
uint64_t signature; /* "vhdxfile" in ASCII */
uint16_t creator[256]; /* optional; utf-16 string to identify
the vhdx file creator. Diagnotistic
only */
} VHDXFileIdentifier;
/* the guid is a 16 byte unique ID - the definition for this used by
* Microsoft is not just 16 bytes though - it is a structure that is defined,
* so we need to follow it here so that endianness does not trip us up */
typedef struct MSGUID {
uint32_t data1;
uint16_t data2;
uint16_t data3;
uint8_t data4[8];
} MSGUID;
#define guid_eq(a, b) \
(memcmp(&(a), &(b), sizeof(MSGUID)) == 0)
#define VHDX_HEADER_SIZE (4*1024) /* although the vhdx_header struct in disk
is only 582 bytes, for purposes of crc
the header is the first 4KB of the 64KB
block */
/* The full header is 4KB, although the actual header data is much smaller.
* But for the checksum calculation, it is over the entire 4KB structure,
* not just the defined portion of it */
typedef struct QEMU_PACKED VHDXHeader {
uint32_t signature; /* "head" in ASCII */
uint32_t checksum; /* CRC-32C hash of the whole header */
uint64_t sequence_number; /* Seq number of this header. Each
VHDX file has 2 of these headers,
and only the header with the highest
sequence number is valid */
MSGUID file_write_guid; /* 128 bit unique identifier. Must be
updated to new, unique value before
the first modification is made to
file */
MSGUID data_write_guid; /* 128 bit unique identifier. Must be
updated to new, unique value before
the first modification is made to
visible data. Visbile data is
defined as:
- system & user metadata
- raw block data
- disk size
- any change that will
cause the virtual disk
sector read to differ
This does not need to change if
blocks are re-arranged */
MSGUID log_guid; /* 128 bit unique identifier. If zero,
there is no valid log. If non-zero,
log entries with this guid are
valid. */
uint16_t log_version; /* version of the log format. Mustn't be
zero, unless log_guid is also zero */
uint16_t version; /* version of th evhdx file. Currently,
only supported version is "1" */
uint32_t log_length; /* length of the log. Must be multiple
of 1MB */
uint64_t log_offset; /* byte offset in the file of the log.
Must also be a multiple of 1MB */
} VHDXHeader;
/* Header for the region table block */
typedef struct QEMU_PACKED VHDXRegionTableHeader {
uint32_t signature; /* "regi" in ASCII */
uint32_t checksum; /* CRC-32C hash of the 64KB table */
uint32_t entry_count; /* number of valid entries */
uint32_t reserved;
} VHDXRegionTableHeader;
/* Individual region table entry. There may be a maximum of 2047 of these
*
* There are two known region table properties. Both are required.
* BAT (block allocation table): 2DC27766F62342009D64115E9BFD4A08
* Metadata: 8B7CA20647904B9AB8FE575F050F886E
*/
#define VHDX_REGION_ENTRY_REQUIRED 0x01 /* if set, parser must understand
this entry in order to open
file */
typedef struct QEMU_PACKED VHDXRegionTableEntry {
MSGUID guid; /* 128-bit unique identifier */
uint64_t file_offset; /* offset of the object in the file.
Must be multiple of 1MB */
uint32_t length; /* length, in bytes, of the object */
uint32_t data_bits;
} VHDXRegionTableEntry;
/* ---- LOG ENTRY STRUCTURES ---- */
#define VHDX_LOG_HDR_SIZE 64
typedef struct QEMU_PACKED VHDXLogEntryHeader {
uint32_t signature; /* "loge" in ASCII */
uint32_t checksum; /* CRC-32C hash of the 64KB table */
uint32_t entry_length; /* length in bytes, multiple of 1MB */
uint32_t tail; /* byte offset of first log entry of a
seq, where this entry is the last
entry */
uint64_t sequence_number; /* incremented with each log entry.
May not be zero. */
uint32_t descriptor_count; /* number of descriptors in this log
entry, must be >= 0 */
uint32_t reserved;
MSGUID log_guid; /* value of the log_guid from
vhdx_header. If not found in
vhdx_header, it is invalid */
uint64_t flushed_file_offset; /* see spec for full details - this
sould be vhdx file size in bytes */
uint64_t last_file_offset; /* size in bytes that all allocated
file structures fit into */
} VHDXLogEntryHeader;
#define VHDX_LOG_DESC_SIZE 32
typedef struct QEMU_PACKED VHDXLogDescriptor {
uint32_t signature; /* "zero" or "desc" in ASCII */
union {
uint32_t reserved; /* zero desc */
uint32_t trailing_bytes; /* data desc: bytes 4092-4096 of the
data sector */
};
union {
uint64_t zero_length; /* zero desc: length of the section to
zero */
uint64_t leading_bytes; /* data desc: bytes 0-7 of the data
sector */
};
uint64_t file_offset; /* file offset to write zeros - multiple
of 4kB */
uint64_t sequence_number; /* must match same field in
vhdx_log_entry_header */
} VHDXLogDescriptor;
typedef struct QEMU_PACKED VHDXLogDataSector {
uint32_t data_signature; /* "data" in ASCII */
uint32_t sequence_high; /* 4 MSB of 8 byte sequence_number */
uint8_t data[4084]; /* raw data, bytes 8-4091 (inclusive).
see the data descriptor field for the
other mising bytes */
uint32_t sequence_low; /* 4 LSB of 8 byte sequence_number */
} VHDXLogDataSector;
/* block states - different state values depending on whether it is a
* payload block, or a sector block. */
#define PAYLOAD_BLOCK_NOT_PRESENT 0
#define PAYLOAD_BLOCK_UNDEFINED 1
#define PAYLOAD_BLOCK_ZERO 2
#define PAYLOAD_BLOCK_UNMAPPED 5
#define PAYLOAD_BLOCK_FULL_PRESENT 6
#define PAYLOAD_BLOCK_PARTIALLY_PRESENT 7
#define SB_BLOCK_NOT_PRESENT 0
#define SB_BLOCK_PRESENT 6
/* per the spec */
#define VHDX_MAX_SECTORS_PER_BLOCK (1<<23)
/* upper 44 bits are the file offset in 1MB units lower 3 bits are the state
other bits are reserved */
#define VHDX_BAT_STATE_BIT_MASK 0x07
#define VHDX_BAT_FILE_OFF_BITS (64-44)
typedef uint64_t VHDXBatEntry;
/* ---- METADATA REGION STRUCTURES ---- */
#define VHDX_METADATA_ENTRY_SIZE 32
#define VHDX_METADATA_MAX_ENTRIES 2047 /* not including the header */
#define VHDX_METADATA_TABLE_MAX_SIZE \
(VHDX_METADATA_ENTRY_SIZE * (VHDX_METADATA_MAX_ENTRIES+1))
typedef struct QEMU_PACKED VHDXMetadataTableHeader {
uint64_t signature; /* "metadata" in ASCII */
uint16_t reserved;
uint16_t entry_count; /* number table entries. <= 2047 */
uint32_t reserved2[5];
} VHDXMetadataTableHeader;
#define VHDX_META_FLAGS_IS_USER 0x01 /* max 1024 entries */
#define VHDX_META_FLAGS_IS_VIRTUAL_DISK 0x02 /* virtual disk metadata if set,
otherwise file metdata */
#define VHDX_META_FLAGS_IS_REQUIRED 0x04 /* parse must understand this
entry to open the file */
typedef struct QEMU_PACKED VHDXMetadataTableEntry {
MSGUID item_id; /* 128-bit identifier for metadata */
uint32_t offset; /* byte offset of the metadata. At
least 64kB. Relative to start of
metadata region */
/* note: if length = 0, so is offset */
uint32_t length; /* length of metadata. <= 1MB. */
uint32_t data_bits; /* least-significant 3 bits are flags, the
rest are reserved (see above) */
uint32_t reserved2;
} VHDXMetadataTableEntry;
#define VHDX_PARAMS_LEAVE_BLOCKS_ALLOCED 0x01 /* Do not change any blocks to
be BLOCK_NOT_PRESENT.
If set indicates a fixed
size VHDX file */
#define VHDX_PARAMS_HAS_PARENT 0x02 /* has parent / backing file */
typedef struct QEMU_PACKED VHDXFileParameters {
uint32_t block_size; /* size of each payload block, always
power of 2, <= 256MB and >= 1MB. */
uint32_t data_bits; /* least-significant 2 bits are flags, the rest
are reserved (see above) */
} VHDXFileParameters;
typedef struct QEMU_PACKED VHDXVirtualDiskSize {
uint64_t virtual_disk_size; /* Size of the virtual disk, in bytes.
Must be multiple of the sector size,
max of 64TB */
} VHDXVirtualDiskSize;
typedef struct QEMU_PACKED VHDXPage83Data {
MSGUID page_83_data[16]; /* unique id for scsi devices that
support page 0x83 */
} VHDXPage83Data;
typedef struct QEMU_PACKED VHDXVirtualDiskLogicalSectorSize {
uint32_t logical_sector_size; /* virtual disk sector size (in bytes).
Can only be 512 or 4096 bytes */
} VHDXVirtualDiskLogicalSectorSize;
typedef struct QEMU_PACKED VHDXVirtualDiskPhysicalSectorSize {
uint32_t physical_sector_size; /* physical sector size (in bytes).
Can only be 512 or 4096 bytes */
} VHDXVirtualDiskPhysicalSectorSize;
typedef struct QEMU_PACKED VHDXParentLocatorHeader {
MSGUID locator_type[16]; /* type of the parent virtual disk. */
uint16_t reserved;
uint16_t key_value_count; /* number of key/value pairs for this
locator */
} VHDXParentLocatorHeader;
/* key and value strings are UNICODE strings, UTF-16 LE encoding, no NULs */
typedef struct QEMU_PACKED VHDXParentLocatorEntry {
uint32_t key_offset; /* offset in metadata for key, > 0 */
uint32_t value_offset; /* offset in metadata for value, >0 */
uint16_t key_length; /* length of entry key, > 0 */
uint16_t value_length; /* length of entry value, > 0 */
} VHDXParentLocatorEntry;
/* ----- END VHDX SPECIFICATION STRUCTURES ---- */
uint32_t vhdx_checksum_calc(uint32_t crc, uint8_t *buf, size_t size,
int crc_offset);
bool vhdx_checksum_is_valid(uint8_t *buf, size_t size, int crc_offset);
static void leguid_to_cpus(MSGUID *guid)
{
le32_to_cpus(&guid->data1);
le16_to_cpus(&guid->data2);
le16_to_cpus(&guid->data3);
}
#endif

View File

@ -32,11 +32,25 @@
#define VMDK3_MAGIC (('C' << 24) | ('O' << 16) | ('W' << 8) | 'D')
#define VMDK4_MAGIC (('K' << 24) | ('D' << 16) | ('M' << 8) | 'V')
#define VMDK4_COMPRESSION_DEFLATE 1
#define VMDK4_FLAG_NL_DETECT (1 << 0)
#define VMDK4_FLAG_RGD (1 << 1)
/* Zeroed-grain enable bit */
#define VMDK4_FLAG_ZERO_GRAIN (1 << 2)
#define VMDK4_FLAG_COMPRESS (1 << 16)
#define VMDK4_FLAG_MARKER (1 << 17)
#define VMDK4_GD_AT_END 0xffffffffffffffffULL
#define VMDK_GTE_ZEROED 0x1
/* VMDK internal error codes */
#define VMDK_OK 0
#define VMDK_ERROR (-1)
/* Cluster not allocated */
#define VMDK_UNALLOC (-2)
#define VMDK_ZEROED (-3)
#define BLOCK_OPT_ZEROED_GRAIN "zeroed_grain"
typedef struct {
uint32_t version;
uint32_t flags;
@ -73,6 +87,8 @@ typedef struct VmdkExtent {
bool flat;
bool compressed;
bool has_marker;
bool has_zero_grain;
int version;
int64_t sectors;
int64_t end_sector;
int64_t flat_start_offset;
@ -108,6 +124,7 @@ typedef struct VmdkMetaData {
unsigned int l2_index;
unsigned int l2_offset;
int valid;
uint32_t *l2_cache_entry;
} VmdkMetaData;
typedef struct VmdkGrainMarker {
@ -561,6 +578,8 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
extent->compressed =
le16_to_cpu(header.compressAlgorithm) == VMDK4_COMPRESSION_DEFLATE;
extent->has_marker = le32_to_cpu(header.flags) & VMDK4_FLAG_MARKER;
extent->version = le32_to_cpu(header.version);
extent->has_zero_grain = le32_to_cpu(header.flags) & VMDK4_FLAG_ZERO_GRAIN;
ret = vmdk_init_tables(bs, extent);
if (ret) {
/* free extent allocated by vmdk_add_extent */
@ -578,22 +597,22 @@ static int vmdk_parse_description(const char *desc, const char *opt_name,
opt_pos = strstr(desc, opt_name);
if (!opt_pos) {
return -1;
return VMDK_ERROR;
}
/* Skip "=\"" following opt_name */
opt_pos += strlen(opt_name) + 2;
if (opt_pos >= end) {
return -1;
return VMDK_ERROR;
}
opt_end = opt_pos;
while (opt_end < end && *opt_end != '"') {
opt_end++;
}
if (opt_end == end || buf_size < opt_end - opt_pos + 1) {
return -1;
return VMDK_ERROR;
}
pstrcpy(buf, opt_end - opt_pos + 1, opt_pos);
return 0;
return VMDK_OK;
}
/* Open an extent file and append to bs array */
@ -772,7 +791,7 @@ static int get_whole_cluster(BlockDriverState *bs,
int ret;
if (!vmdk_is_cid_valid(bs)) {
return -1;
return VMDK_ERROR;
}
/* floor offset to cluster */
@ -780,30 +799,31 @@ static int get_whole_cluster(BlockDriverState *bs,
ret = bdrv_read(bs->backing_hd, offset >> 9, whole_grain,
extent->cluster_sectors);
if (ret < 0) {
return -1;
return VMDK_ERROR;
}
/* Write grain only into the active image */
ret = bdrv_write(extent->file, cluster_offset, whole_grain,
extent->cluster_sectors);
if (ret < 0) {
return -1;
return VMDK_ERROR;
}
}
return 0;
return VMDK_OK;
}
static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data)
{
uint32_t offset;
QEMU_BUILD_BUG_ON(sizeof(offset) != sizeof(m_data->offset));
offset = cpu_to_le32(m_data->offset);
/* update L2 table */
if (bdrv_pwrite_sync(
extent->file,
((int64_t)m_data->l2_offset * 512)
+ (m_data->l2_index * sizeof(m_data->offset)),
&(m_data->offset),
sizeof(m_data->offset)
) < 0) {
return -1;
&offset, sizeof(offset)) < 0) {
return VMDK_ERROR;
}
/* update backup L2 table */
if (extent->l1_backup_table_offset != 0) {
@ -812,13 +832,15 @@ static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data)
extent->file,
((int64_t)m_data->l2_offset * 512)
+ (m_data->l2_index * sizeof(m_data->offset)),
&(m_data->offset), sizeof(m_data->offset)
) < 0) {
return -1;
&offset, sizeof(offset)) < 0) {
return VMDK_ERROR;
}
}
if (m_data->l2_cache_entry) {
*m_data->l2_cache_entry = offset;
}
return 0;
return VMDK_OK;
}
static int get_cluster_offset(BlockDriverState *bs,
@ -830,24 +852,25 @@ static int get_cluster_offset(BlockDriverState *bs,
{
unsigned int l1_index, l2_offset, l2_index;
int min_index, i, j;
uint32_t min_count, *l2_table, tmp = 0;
uint32_t min_count, *l2_table;
bool zeroed = false;
if (m_data) {
m_data->valid = 0;
}
if (extent->flat) {
*cluster_offset = extent->flat_start_offset;
return 0;
return VMDK_OK;
}
offset -= (extent->end_sector - extent->sectors) * SECTOR_SIZE;
l1_index = (offset >> 9) / extent->l1_entry_sectors;
if (l1_index >= extent->l1_size) {
return -1;
return VMDK_ERROR;
}
l2_offset = extent->l1_table[l1_index];
if (!l2_offset) {
return -1;
return VMDK_UNALLOC;
}
for (i = 0; i < L2_CACHE_SIZE; i++) {
if (l2_offset == extent->l2_cache_offsets[i]) {
@ -877,7 +900,7 @@ static int get_cluster_offset(BlockDriverState *bs,
l2_table,
extent->l2_size * sizeof(uint32_t)
) != extent->l2_size * sizeof(uint32_t)) {
return -1;
return VMDK_ERROR;
}
extent->l2_cache_offsets[min_index] = l2_offset;
@ -886,9 +909,21 @@ static int get_cluster_offset(BlockDriverState *bs,
l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size;
*cluster_offset = le32_to_cpu(l2_table[l2_index]);
if (!*cluster_offset) {
if (m_data) {
m_data->valid = 1;
m_data->l1_index = l1_index;
m_data->l2_index = l2_index;
m_data->offset = *cluster_offset;
m_data->l2_offset = l2_offset;
m_data->l2_cache_entry = &l2_table[l2_index];
}
if (extent->has_zero_grain && *cluster_offset == VMDK_GTE_ZEROED) {
zeroed = true;
}
if (!*cluster_offset || zeroed) {
if (!allocate) {
return -1;
return zeroed ? VMDK_ZEROED : VMDK_UNALLOC;
}
/* Avoid the L2 tables update for the images that have snapshots. */
@ -901,8 +936,7 @@ static int get_cluster_offset(BlockDriverState *bs,
}
*cluster_offset >>= 9;
tmp = cpu_to_le32(*cluster_offset);
l2_table[l2_index] = tmp;
l2_table[l2_index] = cpu_to_le32(*cluster_offset);
/* First of all we write grain itself, to avoid race condition
* that may to corrupt the image.
@ -911,19 +945,15 @@ static int get_cluster_offset(BlockDriverState *bs,
*/
if (get_whole_cluster(
bs, extent, *cluster_offset, offset, allocate) == -1) {
return -1;
return VMDK_ERROR;
}
if (m_data) {
m_data->offset = tmp;
m_data->l1_index = l1_index;
m_data->l2_index = l2_index;
m_data->l2_offset = l2_offset;
m_data->valid = 1;
m_data->offset = *cluster_offset;
}
}
*cluster_offset <<= 9;
return 0;
return VMDK_OK;
}
static VmdkExtent *find_extent(BDRVVmdkState *s,
@ -959,8 +989,8 @@ static int coroutine_fn vmdk_co_is_allocated(BlockDriverState *bs,
ret = get_cluster_offset(bs, extent, NULL,
sector_num * 512, 0, &offset);
qemu_co_mutex_unlock(&s->lock);
/* get_cluster_offset returning 0 means success */
ret = !ret;
ret = (ret == VMDK_OK || ret == VMDK_ZEROED);
index_in_cluster = sector_num % extent->cluster_sectors;
n = extent->cluster_sectors - index_in_cluster;
@ -1103,9 +1133,9 @@ static int vmdk_read(BlockDriverState *bs, int64_t sector_num,
if (n > nb_sectors) {
n = nb_sectors;
}
if (ret) {
if (ret != VMDK_OK) {
/* if not allocated, try to read from parent image, if exist */
if (bs->backing_hd) {
if (bs->backing_hd && ret != VMDK_ZEROED) {
if (!vmdk_is_cid_valid(bs)) {
return -EINVAL;
}
@ -1142,8 +1172,17 @@ static coroutine_fn int vmdk_co_read(BlockDriverState *bs, int64_t sector_num,
return ret;
}
/**
* vmdk_write:
* @zeroed: buf is ignored (data is zero), use zeroed_grain GTE feature
* if possible, otherwise return -ENOTSUP.
* @zero_dry_run: used for zeroed == true only, don't update L2 table, just
*
* Returns: error code with 0 for success.
*/
static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
const uint8_t *buf, int nb_sectors)
const uint8_t *buf, int nb_sectors,
bool zeroed, bool zero_dry_run)
{
BDRVVmdkState *s = bs->opaque;
VmdkExtent *extent = NULL;
@ -1173,7 +1212,7 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
sector_num << 9, !extent->compressed,
&cluster_offset);
if (extent->compressed) {
if (ret == 0) {
if (ret == VMDK_OK) {
/* Refuse write to allocated cluster for streamOptimized */
fprintf(stderr,
"VMDK: can't write to allocated cluster"
@ -1189,7 +1228,7 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
&cluster_offset);
}
}
if (ret) {
if (ret == VMDK_ERROR) {
return -EINVAL;
}
extent_begin_sector = extent->end_sector - extent->sectors;
@ -1199,17 +1238,34 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
if (n > nb_sectors) {
n = nb_sectors;
}
ret = vmdk_write_extent(extent,
cluster_offset, index_in_cluster * 512,
buf, n, sector_num);
if (ret) {
return ret;
}
if (m_data.valid) {
/* update L2 tables */
if (vmdk_L2update(extent, &m_data) == -1) {
return -EIO;
if (zeroed) {
/* Do zeroed write, buf is ignored */
if (extent->has_zero_grain &&
index_in_cluster == 0 &&
n >= extent->cluster_sectors) {
n = extent->cluster_sectors;
if (!zero_dry_run) {
m_data.offset = VMDK_GTE_ZEROED;
/* update L2 tables */
if (vmdk_L2update(extent, &m_data) != VMDK_OK) {
return -EIO;
}
}
} else {
return -ENOTSUP;
}
} else {
ret = vmdk_write_extent(extent,
cluster_offset, index_in_cluster * 512,
buf, n, sector_num);
if (ret) {
return ret;
}
if (m_data.valid) {
/* update L2 tables */
if (vmdk_L2update(extent, &m_data) != VMDK_OK) {
return -EIO;
}
}
}
nb_sectors -= n;
@ -1235,14 +1291,29 @@ static coroutine_fn int vmdk_co_write(BlockDriverState *bs, int64_t sector_num,
int ret;
BDRVVmdkState *s = bs->opaque;
qemu_co_mutex_lock(&s->lock);
ret = vmdk_write(bs, sector_num, buf, nb_sectors);
ret = vmdk_write(bs, sector_num, buf, nb_sectors, false, false);
qemu_co_mutex_unlock(&s->lock);
return ret;
}
static int coroutine_fn vmdk_co_write_zeroes(BlockDriverState *bs,
int64_t sector_num,
int nb_sectors)
{
int ret;
BDRVVmdkState *s = bs->opaque;
qemu_co_mutex_lock(&s->lock);
ret = vmdk_write(bs, sector_num, NULL, nb_sectors, true, true);
if (!ret) {
ret = vmdk_write(bs, sector_num, NULL, nb_sectors, true, false);
}
qemu_co_mutex_unlock(&s->lock);
return ret;
}
static int vmdk_create_extent(const char *filename, int64_t filesize,
bool flat, bool compress)
bool flat, bool compress, bool zeroed_grain)
{
int ret, i;
int fd = 0;
@ -1264,9 +1335,10 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
}
magic = cpu_to_be32(VMDK4_MAGIC);
memset(&header, 0, sizeof(header));
header.version = 1;
header.flags =
3 | (compress ? VMDK4_FLAG_COMPRESS | VMDK4_FLAG_MARKER : 0);
header.version = zeroed_grain ? 2 : 1;
header.flags = VMDK4_FLAG_RGD | VMDK4_FLAG_NL_DETECT
| (compress ? VMDK4_FLAG_COMPRESS | VMDK4_FLAG_MARKER : 0)
| (zeroed_grain ? VMDK4_FLAG_ZERO_GRAIN : 0);
header.compressAlgorithm = compress ? VMDK4_COMPRESSION_DEFLATE : 0;
header.capacity = filesize / 512;
header.granularity = 128;
@ -1357,7 +1429,7 @@ static int filename_decompose(const char *filename, char *path, char *prefix,
if (filename == NULL || !strlen(filename)) {
fprintf(stderr, "Vmdk: no filename provided.\n");
return -1;
return VMDK_ERROR;
}
p = strrchr(filename, '/');
if (p == NULL) {
@ -1369,7 +1441,7 @@ static int filename_decompose(const char *filename, char *path, char *prefix,
if (p != NULL) {
p++;
if (p - filename >= buf_len) {
return -1;
return VMDK_ERROR;
}
pstrcpy(path, p - filename + 1, filename);
} else {
@ -1382,12 +1454,12 @@ static int filename_decompose(const char *filename, char *path, char *prefix,
postfix[0] = '\0';
} else {
if (q - p >= buf_len) {
return -1;
return VMDK_ERROR;
}
pstrcpy(prefix, q - p + 1, p);
pstrcpy(postfix, buf_len, q);
}
return 0;
return VMDK_OK;
}
static int relative_path(char *dest, int dest_size,
@ -1403,11 +1475,11 @@ static int relative_path(char *dest, int dest_size,
#endif
if (!(dest && base && target)) {
return -1;
return VMDK_ERROR;
}
if (path_is_absolute(target)) {
pstrcpy(dest, dest_size, target);
return 0;
return VMDK_OK;
}
while (base[i] == target[i]) {
i++;
@ -1426,7 +1498,7 @@ static int relative_path(char *dest, int dest_size,
pstrcat(dest, dest_size, sep);
}
pstrcat(dest, dest_size, q);
return 0;
return VMDK_OK;
}
static int vmdk_create(const char *filename, QEMUOptionParameter *options)
@ -1447,6 +1519,7 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options)
char parent_desc_line[BUF_SIZE] = "";
uint32_t parent_cid = 0xffffffff;
uint32_t number_heads = 16;
bool zeroed_grain = false;
const char desc_template[] =
"# Disk DescriptorFile\n"
"version=1\n"
@ -1482,6 +1555,8 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options)
flags |= options->value.n ? BLOCK_FLAG_COMPAT6 : 0;
} else if (!strcmp(options->name, BLOCK_OPT_SUBFMT)) {
fmt = options->value.s;
} else if (!strcmp(options->name, BLOCK_OPT_ZEROED_GRAIN)) {
zeroed_grain |= options->value.n;
}
options++;
}
@ -1568,7 +1643,8 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options)
snprintf(ext_filename, sizeof(ext_filename), "%s%s",
path, desc_filename);
if (vmdk_create_extent(ext_filename, size, flat, compress)) {
if (vmdk_create_extent(ext_filename, size,
flat, compress, zeroed_grain)) {
return -EINVAL;
}
filesize -= size;
@ -1694,6 +1770,11 @@ static QEMUOptionParameter vmdk_create_options[] = {
"VMDK flat extent format, can be one of "
"{monolithicSparse (default) | monolithicFlat | twoGbMaxExtentSparse | twoGbMaxExtentFlat | streamOptimized} "
},
{
.name = BLOCK_OPT_ZEROED_GRAIN,
.type = OPT_FLAG,
.help = "Enable efficient zero writes using the zeroed-grain GTE feature"
},
{ NULL }
};
@ -1705,6 +1786,7 @@ static BlockDriver bdrv_vmdk = {
.bdrv_reopen_prepare = vmdk_reopen_prepare,
.bdrv_read = vmdk_co_read,
.bdrv_write = vmdk_co_write,
.bdrv_co_write_zeroes = vmdk_co_write_zeroes,
.bdrv_close = vmdk_close,
.bdrv_create = vmdk_create,
.bdrv_co_flush_to_disk = vmdk_co_flush,

View File

@ -1118,6 +1118,7 @@ int do_drive_del(Monitor *mon, const QDict *qdict, QObject **ret_data)
void qmp_block_resize(const char *device, int64_t size, Error **errp)
{
BlockDriverState *bs;
int ret;
bs = bdrv_find(device);
if (!bs) {
@ -1133,7 +1134,8 @@ void qmp_block_resize(const char *device, int64_t size, Error **errp)
/* complete all in-flight operations before resizing the device */
bdrv_drain_all();
switch (bdrv_truncate(bs, size)) {
ret = bdrv_truncate(bs, size);
switch (ret) {
case 0:
break;
case -ENOMEDIUM:
@ -1149,7 +1151,7 @@ void qmp_block_resize(const char *device, int64_t size, Error **errp)
error_set(errp, QERR_DEVICE_IN_USE, device);
break;
default:
error_set(errp, QERR_UNDEFINED_ERROR);
error_setg_errno(errp, -ret, "Could not resize");
break;
}
}

View File

@ -58,7 +58,8 @@ enum {
#define NBD_DEFAULT_PORT 10809
#define NBD_BUFFER_SIZE (1024*1024)
/* Maximum size of a single READ/WRITE data buffer */
#define NBD_MAX_BUFFER_SIZE (32 * 1024 * 1024)
ssize_t nbd_wr_sync(int fd, void *buffer, size_t size, bool do_read);
int tcp_socket_incoming(const char *address, uint16_t port);

35
include/qemu/crc32c.h Normal file
View File

@ -0,0 +1,35 @@
/*
* Castagnoli CRC32C Checksum Algorithm
*
* Polynomial: 0x11EDC6F41
*
* Castagnoli93: Guy Castagnoli and Stefan Braeuer and Martin Herrman
* "Optimization of Cyclic Redundancy-Check Codes with 24
* and 32 Parity Bits",IEEE Transactions on Communication,
* Volume 41, Number 6, June 1993
*
* Copyright (c) 2013 Red Hat, Inc.,
*
* Authors:
* Jeff Cody <jcody@redhat.com>
*
* Based on the Linux kernel cryptographic crc32c module,
*
* Copyright (c) 2004 Cisco Systems, Inc.
* Copyright (c) 2008 Herbert Xu <herbert@gondor.apana.org.au>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
*/
#ifndef QEMU_CRC32_H
#define QEMU_CRC32_H
#include "qemu-common.h"
uint32_t crc32c(uint32_t crc, const uint8_t *data, unsigned int length);
#endif

36
nbd.c
View File

@ -98,7 +98,6 @@ struct NBDExport {
off_t size;
uint32_t nbdflags;
QTAILQ_HEAD(, NBDClient) clients;
QSIMPLEQ_HEAD(, NBDRequest) requests;
QTAILQ_ENTRY(NBDExport) next;
};
@ -845,18 +844,11 @@ void nbd_client_close(NBDClient *client)
static NBDRequest *nbd_request_get(NBDClient *client)
{
NBDRequest *req;
NBDExport *exp = client->exp;
assert(client->nb_requests <= MAX_NBD_REQUESTS - 1);
client->nb_requests++;
if (QSIMPLEQ_EMPTY(&exp->requests)) {
req = g_malloc0(sizeof(NBDRequest));
req->data = qemu_blockalign(exp->bs, NBD_BUFFER_SIZE);
} else {
req = QSIMPLEQ_FIRST(&exp->requests);
QSIMPLEQ_REMOVE_HEAD(&exp->requests, entry);
}
req = g_slice_new0(NBDRequest);
nbd_client_get(client);
req->client = client;
return req;
@ -865,7 +857,12 @@ static NBDRequest *nbd_request_get(NBDClient *client)
static void nbd_request_put(NBDRequest *req)
{
NBDClient *client = req->client;
QSIMPLEQ_INSERT_HEAD(&client->exp->requests, req, entry);
if (req->data) {
qemu_vfree(req->data);
}
g_slice_free(NBDRequest, req);
if (client->nb_requests-- == MAX_NBD_REQUESTS) {
qemu_notify_event();
}
@ -877,7 +874,6 @@ NBDExport *nbd_export_new(BlockDriverState *bs, off_t dev_offset,
void (*close)(NBDExport *))
{
NBDExport *exp = g_malloc0(sizeof(NBDExport));
QSIMPLEQ_INIT(&exp->requests);
exp->refcount = 1;
QTAILQ_INIT(&exp->clients);
exp->bs = bs;
@ -953,13 +949,6 @@ void nbd_export_put(NBDExport *exp)
exp->close(exp);
}
while (!QSIMPLEQ_EMPTY(&exp->requests)) {
NBDRequest *first = QSIMPLEQ_FIRST(&exp->requests);
QSIMPLEQ_REMOVE_HEAD(&exp->requests, entry);
qemu_vfree(first->data);
g_free(first);
}
g_free(exp);
}
}
@ -1018,6 +1007,7 @@ static ssize_t nbd_co_receive_request(NBDRequest *req, struct nbd_request *reque
{
NBDClient *client = req->client;
int csock = client->sock;
uint32_t command;
ssize_t rc;
client->recv_coroutine = qemu_coroutine_self();
@ -1029,9 +1019,9 @@ static ssize_t nbd_co_receive_request(NBDRequest *req, struct nbd_request *reque
goto out;
}
if (request->len > NBD_BUFFER_SIZE) {
if (request->len > NBD_MAX_BUFFER_SIZE) {
LOG("len (%u) is larger than max len (%u)",
request->len, NBD_BUFFER_SIZE);
request->len, NBD_MAX_BUFFER_SIZE);
rc = -EINVAL;
goto out;
}
@ -1045,7 +1035,11 @@ static ssize_t nbd_co_receive_request(NBDRequest *req, struct nbd_request *reque
TRACE("Decoding type");
if ((request->type & NBD_CMD_MASK_COMMAND) == NBD_CMD_WRITE) {
command = request->type & NBD_CMD_MASK_COMMAND;
if (command == NBD_CMD_READ || command == NBD_CMD_WRITE) {
req->data = qemu_blockalign(client->exp->bs, request->len);
}
if (command == NBD_CMD_WRITE) {
TRACE("Reading %u byte(s)", request->len);
if (qemu_co_recv(csock, req->data, request->len) != request->len) {

View File

@ -127,6 +127,9 @@ _make_test_img()
-e "s# compat='[^']*'##g" \
-e "s# compat6=\\(on\\|off\\)##g" \
-e "s# static=\\(on\\|off\\)##g" \
-e "s# zeroed_grain=\\(on\\|off\\)##g" \
-e "s# subformat='[^']*'##g" \
-e "s# adapter_type='[^']*'##g" \
-e "s# lazy_refcounts=\\(on\\|off\\)##g"
# Start an NBD server on the image file, which is what we'll be talking to

View File

@ -10,3 +10,4 @@ util-obj-$(CONFIG_POSIX) += compatfd.o
util-obj-y += iov.o aes.o qemu-config.o qemu-sockets.o uri.o notify.o
util-obj-y += qemu-option.o qemu-progress.o
util-obj-y += hexdump.o
util-obj-y += crc32c.o

115
util/crc32c.c Normal file
View File

@ -0,0 +1,115 @@
/*
* Castagnoli CRC32C Checksum Algorithm
*
* Polynomial: 0x11EDC6F41
*
* Castagnoli93: Guy Castagnoli and Stefan Braeuer and Martin Herrman
* "Optimization of Cyclic Redundancy-Check Codes with 24
* and 32 Parity Bits",IEEE Transactions on Communication,
* Volume 41, Number 6, June 1993
*
* Copyright (c) 2013 Red Hat, Inc.,
*
* Authors:
* Jeff Cody <jcody@redhat.com>
*
* Based on the Linux kernel cryptographic crc32c module,
*
* Copyright (c) 2004 Cisco Systems, Inc.
* Copyright (c) 2008 Herbert Xu <herbert@gondor.apana.org.au>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
*/
#include "qemu-common.h"
#include "qemu/crc32c.h"
/*
* This is the CRC-32C table
* Generated with:
* width = 32 bits
* poly = 0x1EDC6F41
* reflect input bytes = true
* reflect output bytes = true
*/
static const uint32_t crc32c_table[256] = {
0x00000000L, 0xF26B8303L, 0xE13B70F7L, 0x1350F3F4L,
0xC79A971FL, 0x35F1141CL, 0x26A1E7E8L, 0xD4CA64EBL,
0x8AD958CFL, 0x78B2DBCCL, 0x6BE22838L, 0x9989AB3BL,
0x4D43CFD0L, 0xBF284CD3L, 0xAC78BF27L, 0x5E133C24L,
0x105EC76FL, 0xE235446CL, 0xF165B798L, 0x030E349BL,
0xD7C45070L, 0x25AFD373L, 0x36FF2087L, 0xC494A384L,
0x9A879FA0L, 0x68EC1CA3L, 0x7BBCEF57L, 0x89D76C54L,
0x5D1D08BFL, 0xAF768BBCL, 0xBC267848L, 0x4E4DFB4BL,
0x20BD8EDEL, 0xD2D60DDDL, 0xC186FE29L, 0x33ED7D2AL,
0xE72719C1L, 0x154C9AC2L, 0x061C6936L, 0xF477EA35L,
0xAA64D611L, 0x580F5512L, 0x4B5FA6E6L, 0xB93425E5L,
0x6DFE410EL, 0x9F95C20DL, 0x8CC531F9L, 0x7EAEB2FAL,
0x30E349B1L, 0xC288CAB2L, 0xD1D83946L, 0x23B3BA45L,
0xF779DEAEL, 0x05125DADL, 0x1642AE59L, 0xE4292D5AL,
0xBA3A117EL, 0x4851927DL, 0x5B016189L, 0xA96AE28AL,
0x7DA08661L, 0x8FCB0562L, 0x9C9BF696L, 0x6EF07595L,
0x417B1DBCL, 0xB3109EBFL, 0xA0406D4BL, 0x522BEE48L,
0x86E18AA3L, 0x748A09A0L, 0x67DAFA54L, 0x95B17957L,
0xCBA24573L, 0x39C9C670L, 0x2A993584L, 0xD8F2B687L,
0x0C38D26CL, 0xFE53516FL, 0xED03A29BL, 0x1F682198L,
0x5125DAD3L, 0xA34E59D0L, 0xB01EAA24L, 0x42752927L,
0x96BF4DCCL, 0x64D4CECFL, 0x77843D3BL, 0x85EFBE38L,
0xDBFC821CL, 0x2997011FL, 0x3AC7F2EBL, 0xC8AC71E8L,
0x1C661503L, 0xEE0D9600L, 0xFD5D65F4L, 0x0F36E6F7L,
0x61C69362L, 0x93AD1061L, 0x80FDE395L, 0x72966096L,
0xA65C047DL, 0x5437877EL, 0x4767748AL, 0xB50CF789L,
0xEB1FCBADL, 0x197448AEL, 0x0A24BB5AL, 0xF84F3859L,
0x2C855CB2L, 0xDEEEDFB1L, 0xCDBE2C45L, 0x3FD5AF46L,
0x7198540DL, 0x83F3D70EL, 0x90A324FAL, 0x62C8A7F9L,
0xB602C312L, 0x44694011L, 0x5739B3E5L, 0xA55230E6L,
0xFB410CC2L, 0x092A8FC1L, 0x1A7A7C35L, 0xE811FF36L,
0x3CDB9BDDL, 0xCEB018DEL, 0xDDE0EB2AL, 0x2F8B6829L,
0x82F63B78L, 0x709DB87BL, 0x63CD4B8FL, 0x91A6C88CL,
0x456CAC67L, 0xB7072F64L, 0xA457DC90L, 0x563C5F93L,
0x082F63B7L, 0xFA44E0B4L, 0xE9141340L, 0x1B7F9043L,
0xCFB5F4A8L, 0x3DDE77ABL, 0x2E8E845FL, 0xDCE5075CL,
0x92A8FC17L, 0x60C37F14L, 0x73938CE0L, 0x81F80FE3L,
0x55326B08L, 0xA759E80BL, 0xB4091BFFL, 0x466298FCL,
0x1871A4D8L, 0xEA1A27DBL, 0xF94AD42FL, 0x0B21572CL,
0xDFEB33C7L, 0x2D80B0C4L, 0x3ED04330L, 0xCCBBC033L,
0xA24BB5A6L, 0x502036A5L, 0x4370C551L, 0xB11B4652L,
0x65D122B9L, 0x97BAA1BAL, 0x84EA524EL, 0x7681D14DL,
0x2892ED69L, 0xDAF96E6AL, 0xC9A99D9EL, 0x3BC21E9DL,
0xEF087A76L, 0x1D63F975L, 0x0E330A81L, 0xFC588982L,
0xB21572C9L, 0x407EF1CAL, 0x532E023EL, 0xA145813DL,
0x758FE5D6L, 0x87E466D5L, 0x94B49521L, 0x66DF1622L,
0x38CC2A06L, 0xCAA7A905L, 0xD9F75AF1L, 0x2B9CD9F2L,
0xFF56BD19L, 0x0D3D3E1AL, 0x1E6DCDEEL, 0xEC064EEDL,
0xC38D26C4L, 0x31E6A5C7L, 0x22B65633L, 0xD0DDD530L,
0x0417B1DBL, 0xF67C32D8L, 0xE52CC12CL, 0x1747422FL,
0x49547E0BL, 0xBB3FFD08L, 0xA86F0EFCL, 0x5A048DFFL,
0x8ECEE914L, 0x7CA56A17L, 0x6FF599E3L, 0x9D9E1AE0L,
0xD3D3E1ABL, 0x21B862A8L, 0x32E8915CL, 0xC083125FL,
0x144976B4L, 0xE622F5B7L, 0xF5720643L, 0x07198540L,
0x590AB964L, 0xAB613A67L, 0xB831C993L, 0x4A5A4A90L,
0x9E902E7BL, 0x6CFBAD78L, 0x7FAB5E8CL, 0x8DC0DD8FL,
0xE330A81AL, 0x115B2B19L, 0x020BD8EDL, 0xF0605BEEL,
0x24AA3F05L, 0xD6C1BC06L, 0xC5914FF2L, 0x37FACCF1L,
0x69E9F0D5L, 0x9B8273D6L, 0x88D28022L, 0x7AB90321L,
0xAE7367CAL, 0x5C18E4C9L, 0x4F48173DL, 0xBD23943EL,
0xF36E6F75L, 0x0105EC76L, 0x12551F82L, 0xE03E9C81L,
0x34F4F86AL, 0xC69F7B69L, 0xD5CF889DL, 0x27A40B9EL,
0x79B737BAL, 0x8BDCB4B9L, 0x988C474DL, 0x6AE7C44EL,
0xBE2DA0A5L, 0x4C4623A6L, 0x5F16D052L, 0xAD7D5351L
};
uint32_t crc32c(uint32_t crc, const uint8_t *data, unsigned int length)
{
while (length--) {
crc = crc32c_table[(crc ^ *data++) & 0xFFL] ^ (crc >> 8);
}
return crc^0xffffffff;
}