/* * Copyright (C) 2009-2011 Joel Rosdahl * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free * Software Foundation; either version 3 of the License, or (at your option) * any later version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. * * You should have received a copy of the GNU General Public License along with * this program; if not, write to the Free Software Foundation, Inc., 51 * Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include "ccache.h" #include "hashtable_itr.h" #include "hashutil.h" #include "manifest.h" #include "murmurhashneutral2.h" #include /* * Sketchy specification of the manifest disk format: * * magic number (4 bytes) * file format version (1 byte unsigned int) * size of the hash fields (in bytes) (1 byte unsigned int) * reserved for future use (2 bytes) * ---------------------------------------------------------------------------- * number of include file paths (4 bytes unsigned int) * path to include file (NUL-terminated string, * ... at most 1024 bytes) * * ---------------------------------------------------------------------------- * number of include file hash entries (4 bytes unsigned int) * index of include file path (4 bytes unsigned int) * hash of include file ( bytes) * size of include file (4 bytes unsigned int) * ... * * * * ---------------------------------------------------------------------------- * number of object name entries (4 bytes unsigned int) * number of include file hash indexes (4 bytes unsigned int) * include file hash index (4 bytes unsigned int) * ... * * hash part of object name ( bytes) * size part of object name (4 bytes unsigned int) * ... * number of include file hash indexes * include file hash index * ... * * * */ static const uint32_t MAGIC = 0x63436d46U; static const uint8_t VERSION = 0; static const uint32_t MAX_MANIFEST_ENTRIES = 100; #define static_assert(e) do { enum { static_assert__ = 1/(e) }; } while (0) struct file_info { /* Index to n_files. */ uint32_t index; /* Hash of referenced file. */ uint8_t hash[16]; /* Size of referenced file. */ uint32_t size; }; struct object { /* Number of entries in file_info_indexes. */ uint32_t n_file_info_indexes; /* Indexes to file_infos. */ uint32_t *file_info_indexes; /* Hash of the object itself. */ struct file_hash hash; }; struct manifest { /* Version of decoded file. */ uint8_t version; /* Reserved for future use. */ uint16_t reserved; /* Size of hash fields (in bytes). */ uint8_t hash_size; /* Referenced include files. */ uint32_t n_files; char **files; /* Information about referenced include files. */ uint32_t n_file_infos; struct file_info *file_infos; /* Object names plus references to include file hashes. */ uint32_t n_objects; struct object *objects; }; static unsigned int hash_from_file_info(void *key) { static_assert(sizeof(struct file_info) == 24); /* No padding. */ return murmurhashneutral2(key, sizeof(struct file_info), 0); } static int file_infos_equal(void *key1, void *key2) { struct file_info *fi1 = (struct file_info *)key1; struct file_info *fi2 = (struct file_info *)key2; return fi1->index == fi2->index && memcmp(fi1->hash, fi2->hash, 16) == 0 && fi1->size == fi2->size; } static void free_manifest(struct manifest *mf) { uint16_t i; for (i = 0; i < mf->n_files; i++) { free(mf->files[i]); } free(mf->files); free(mf->file_infos); for (i = 0; i < mf->n_objects; i++) { free(mf->objects[i].file_info_indexes); } free(mf->objects); } #define READ_INT(size, var) \ do { \ int ch_; \ size_t i_; \ (var) = 0; \ for (i_ = 0; i_ < (size); i_++) { \ ch_ = gzgetc(f); \ if (ch_ == EOF) { \ goto error; \ } \ (var) <<= 8; \ (var) |= ch_ & 0xFF; \ } \ } while (0) #define READ_STR(var) \ do { \ char buf_[1024]; \ size_t i_; \ int ch_; \ for (i_ = 0; i_ < sizeof(buf_); i_++) { \ ch_ = gzgetc(f); \ if (ch_ == EOF) { \ goto error; \ } \ buf_[i_] = ch_; \ if (ch_ == '\0') { \ break; \ } \ } \ if (i_ == sizeof(buf_)) { \ goto error; \ } \ (var) = x_strdup(buf_); \ } while (0) #define READ_BYTES(n, var) \ do { \ size_t i_; \ int ch_; \ for (i_ = 0; i_ < (n); i_++) { \ ch_ = gzgetc(f); \ if (ch_ == EOF) { \ goto error; \ } \ (var)[i_] = ch_; \ } \ } while (0) static struct manifest * create_empty_manifest(void) { struct manifest *mf; mf = x_malloc(sizeof(*mf)); mf->hash_size = 16; mf->n_files = 0; mf->files = NULL; mf->n_file_infos = 0; mf->file_infos = NULL; mf->n_objects = 0; mf->objects = NULL; return mf; } static struct manifest * read_manifest(gzFile f) { struct manifest *mf; uint16_t i, j; uint32_t magic; mf = create_empty_manifest(); READ_INT(4, magic); if (magic != MAGIC) { cc_log("Manifest file has bad magic number %u", magic); free_manifest(mf); return NULL; } READ_INT(1, mf->version); if (mf->version != VERSION) { cc_log("Manifest file has unknown version %u", mf->version); free_manifest(mf); return NULL; } READ_INT(1, mf->hash_size); if (mf->hash_size != 16) { /* Temporary measure until we support different hash algorithms. */ cc_log("Manifest file has unsupported hash size %u", mf->hash_size); free_manifest(mf); return NULL; } READ_INT(2, mf->reserved); READ_INT(4, mf->n_files); mf->files = x_calloc(mf->n_files, sizeof(*mf->files)); for (i = 0; i < mf->n_files; i++) { READ_STR(mf->files[i]); } READ_INT(4, mf->n_file_infos); mf->file_infos = x_calloc(mf->n_file_infos, sizeof(*mf->file_infos)); for (i = 0; i < mf->n_file_infos; i++) { READ_INT(4, mf->file_infos[i].index); READ_BYTES(mf->hash_size, mf->file_infos[i].hash); READ_INT(4, mf->file_infos[i].size); } READ_INT(4, mf->n_objects); mf->objects = x_calloc(mf->n_objects, sizeof(*mf->objects)); for (i = 0; i < mf->n_objects; i++) { READ_INT(4, mf->objects[i].n_file_info_indexes); mf->objects[i].file_info_indexes = x_calloc(mf->objects[i].n_file_info_indexes, sizeof(*mf->objects[i].file_info_indexes)); for (j = 0; j < mf->objects[i].n_file_info_indexes; j++) { READ_INT(4, mf->objects[i].file_info_indexes[j]); } READ_BYTES(mf->hash_size, mf->objects[i].hash.hash); READ_INT(4, mf->objects[i].hash.size); } return mf; error: cc_log("Corrupt manifest file"); free_manifest(mf); return NULL; } #define WRITE_INT(size, var) \ do { \ uint8_t ch_; \ size_t i_; \ for (i_ = 0; i_ < (size); i_++) { \ ch_ = ((var) >> (8 * ((size) - i_ - 1))); \ if (gzputc(f, ch_) == EOF) { \ goto error; \ } \ } \ } while (0) #define WRITE_STR(var) \ do { \ if (gzputs(f, var) == EOF || gzputc(f, '\0') == EOF) { \ goto error; \ } \ } while (0) #define WRITE_BYTES(n, var) \ do { \ size_t i_; \ for (i_ = 0; i_ < (n); i_++) { \ if (gzputc(f, (var)[i_]) == EOF) { \ goto error; \ } \ } \ } while (0) static int write_manifest(gzFile f, const struct manifest *mf) { uint16_t i, j; WRITE_INT(4, MAGIC); WRITE_INT(1, VERSION); WRITE_INT(1, 16); WRITE_INT(2, 0); WRITE_INT(4, mf->n_files); for (i = 0; i < mf->n_files; i++) { WRITE_STR(mf->files[i]); } WRITE_INT(4, mf->n_file_infos); for (i = 0; i < mf->n_file_infos; i++) { WRITE_INT(4, mf->file_infos[i].index); WRITE_BYTES(mf->hash_size, mf->file_infos[i].hash); WRITE_INT(4, mf->file_infos[i].size); } WRITE_INT(4, mf->n_objects); for (i = 0; i < mf->n_objects; i++) { WRITE_INT(4, mf->objects[i].n_file_info_indexes); for (j = 0; j < mf->objects[i].n_file_info_indexes; j++) { WRITE_INT(4, mf->objects[i].file_info_indexes[j]); } WRITE_BYTES(mf->hash_size, mf->objects[i].hash.hash); WRITE_INT(4, mf->objects[i].hash.size); } return 1; error: cc_log("Error writing to manifest file"); return 0; } static int verify_object(struct conf *conf, struct manifest *mf, struct object *obj, struct hashtable *hashed_files) { uint32_t i; struct file_info *fi; struct file_hash *actual; struct mdfour hash; int result; for (i = 0; i < obj->n_file_info_indexes; i++) { fi = &mf->file_infos[obj->file_info_indexes[i]]; actual = hashtable_search(hashed_files, mf->files[fi->index]); if (!actual) { actual = x_malloc(sizeof(*actual)); hash_start(&hash); result = hash_source_code_file(conf, &hash, mf->files[fi->index]); if (result & HASH_SOURCE_CODE_ERROR) { cc_log("Failed hashing %s", mf->files[fi->index]); free(actual); return 0; } if (result & HASH_SOURCE_CODE_FOUND_TIME) { free(actual); return 0; } hash_result_as_bytes(&hash, actual->hash); actual->size = hash.totalN; hashtable_insert(hashed_files, x_strdup(mf->files[fi->index]), actual); } if (memcmp(fi->hash, actual->hash, mf->hash_size) != 0 || fi->size != actual->size) { return 0; } } return 1; } static struct hashtable * create_string_index_map(char **strings, uint32_t len) { uint32_t i; struct hashtable *h; uint32_t *index; h = create_hashtable(1000, hash_from_string, strings_equal); for (i = 0; i < len; i++) { index = x_malloc(sizeof(*index)); *index = i; hashtable_insert(h, x_strdup(strings[i]), index); } return h; } static struct hashtable * create_file_info_index_map(struct file_info *infos, uint32_t len) { uint32_t i; struct hashtable *h; struct file_info *fi; uint32_t *index; h = create_hashtable(1000, hash_from_file_info, file_infos_equal); for (i = 0; i < len; i++) { fi = x_malloc(sizeof(*fi)); *fi = infos[i]; index = x_malloc(sizeof(*index)); *index = i; hashtable_insert(h, fi, index); } return h; } static uint32_t get_include_file_index(struct manifest *mf, char *path, struct hashtable *mf_files) { uint32_t *index; uint32_t n; index = hashtable_search(mf_files, path); if (index) { return *index; } n = mf->n_files; mf->files = x_realloc(mf->files, (n + 1) * sizeof(*mf->files)); mf->n_files++; mf->files[n] = x_strdup(path); return n; } static uint32_t get_file_hash_index(struct manifest *mf, char *path, struct file_hash *file_hash, struct hashtable *mf_files, struct hashtable *mf_file_infos) { struct file_info fi; uint32_t *fi_index; uint32_t n; fi.index = get_include_file_index(mf, path, mf_files); memcpy(fi.hash, file_hash->hash, sizeof(fi.hash)); fi.size = file_hash->size; fi_index = hashtable_search(mf_file_infos, &fi); if (fi_index) { return *fi_index; } n = mf->n_file_infos; mf->file_infos = x_realloc(mf->file_infos, (n + 1) * sizeof(*mf->file_infos)); mf->n_file_infos++; mf->file_infos[n] = fi; return n; } static void add_file_info_indexes(uint32_t *indexes, uint32_t size, struct manifest *mf, struct hashtable *included_files) { struct hashtable_itr *iter; uint32_t i; char *path; struct file_hash *file_hash; struct hashtable *mf_files; /* path --> index */ struct hashtable *mf_file_infos; /* struct file_info --> index */ if (size == 0) { return; } mf_files = create_string_index_map(mf->files, mf->n_files); mf_file_infos = create_file_info_index_map(mf->file_infos, mf->n_file_infos); iter = hashtable_iterator(included_files); i = 0; do { path = hashtable_iterator_key(iter); file_hash = hashtable_iterator_value(iter); indexes[i] = get_file_hash_index(mf, path, file_hash, mf_files, mf_file_infos); i++; } while (hashtable_iterator_advance(iter)); assert(i == size); hashtable_destroy(mf_file_infos, 1); hashtable_destroy(mf_files, 1); } static void add_object_entry(struct manifest *mf, struct file_hash *object_hash, struct hashtable *included_files) { struct object *obj; uint32_t n; n = mf->n_objects; mf->objects = x_realloc(mf->objects, (n + 1) * sizeof(*mf->objects)); mf->n_objects++; obj = &mf->objects[n]; n = hashtable_count(included_files); obj->n_file_info_indexes = n; obj->file_info_indexes = x_malloc(n * sizeof(*obj->file_info_indexes)); add_file_info_indexes(obj->file_info_indexes, n, mf, included_files); memcpy(obj->hash.hash, object_hash->hash, mf->hash_size); obj->hash.size = object_hash->size; } /* * Try to get the object hash from a manifest file. Caller frees. Returns NULL * on failure. */ struct file_hash * manifest_get(struct conf *conf, const char *manifest_path) { int fd; gzFile f = NULL; struct manifest *mf = NULL; struct hashtable *hashed_files = NULL; /* path --> struct file_hash */ uint32_t i; struct file_hash *fh = NULL; fd = open(manifest_path, O_RDONLY | O_BINARY); if (fd == -1) { /* Cache miss. */ cc_log("No such manifest file"); goto out; } f = gzdopen(fd, "rb"); if (!f) { close(fd); cc_log("Failed to gzdopen manifest file"); goto out; } mf = read_manifest(f); if (!mf) { cc_log("Error reading manifest file"); goto out; } hashed_files = create_hashtable(1000, hash_from_string, strings_equal); /* Check newest object first since it's a bit more likely to match. */ for (i = mf->n_objects; i > 0; i--) { if (verify_object(conf, mf, &mf->objects[i - 1], hashed_files)) { fh = x_malloc(sizeof(*fh)); *fh = mf->objects[i - 1].hash; goto out; } } out: if (hashed_files) { hashtable_destroy(hashed_files, 1); } if (f) { gzclose(f); } if (mf) { free_manifest(mf); } return fh; } /* * Put the object name into a manifest file given a set of included files. * Returns true on success, otherwise false. */ bool manifest_put(const char *manifest_path, struct file_hash *object_hash, struct hashtable *included_files) { int ret = 0; int fd1; int fd2; gzFile f2 = NULL; struct manifest *mf = NULL; char *tmp_file = NULL; /* * We don't bother to acquire a lock when writing the manifest to disk. A * race between two processes will only result in one lost entry, which is * not a big deal, and it's also very unlikely. */ fd1 = open(manifest_path, O_RDONLY | O_BINARY); if (fd1 == -1) { /* New file. */ mf = create_empty_manifest(); } else { gzFile f1 = gzdopen(fd1, "rb"); if (!f1) { cc_log("Failed to gzdopen manifest file"); close(fd1); goto out; } mf = read_manifest(f1); gzclose(f1); if (!mf) { cc_log("Failed to read manifest file; deleting it"); x_unlink(manifest_path); mf = create_empty_manifest(); } } if (mf->n_objects > MAX_MANIFEST_ENTRIES) { /* * Normally, there shouldn't be many object entries in the manifest since * new entries are added only if an include file has changed but not the * source file, and you typically change source files more often than * header files. However, it's certainly possible to imagine cases where * the manifest will grow large (for instance, a generated header file that * changes for every build), and this must be taken care of since * processing an ever growing manifest eventually will take too much time. * A good way of solving this would be to maintain the object entries in * LRU order and discarding the old ones. An easy way is to throw away all * entries when there are too many. Let's do that for now. */ cc_log("More than %u entries in manifest file; discarding", MAX_MANIFEST_ENTRIES); free_manifest(mf); mf = create_empty_manifest(); } tmp_file = format("%s.tmp.%s", manifest_path, tmp_string()); fd2 = safe_create_wronly(tmp_file); if (fd2 == -1) { cc_log("Failed to open %s", tmp_file); goto out; } f2 = gzdopen(fd2, "wb"); if (!f2) { cc_log("Failed to gzdopen %s", tmp_file); goto out; } add_object_entry(mf, object_hash, included_files); if (write_manifest(f2, mf)) { gzclose(f2); f2 = NULL; if (x_rename(tmp_file, manifest_path) == 0) { ret = 1; } else { cc_log("Failed to rename %s to %s", tmp_file, manifest_path); goto out; } } else { cc_log("Failed to write manifest file"); goto out; } out: if (mf) { free_manifest(mf); } if (tmp_file) { free(tmp_file); } if (f2) { gzclose(f2); } return ret; } bool manifest_dump(const char *manifest_path, FILE *stream) { struct manifest *mf = NULL; int fd; gzFile *f = NULL; bool ret = false; unsigned i, j; fd = open(manifest_path, O_RDONLY | O_BINARY); if (fd == -1) { fprintf(stderr, "No such manifest file: %s\n", manifest_path); goto out; } f = gzdopen(fd, "rb"); if (!f) { fprintf(stderr, "Failed to dzopen manifest file\n"); close(fd); goto out; } mf = read_manifest(f); if (!mf) { fprintf(stderr, "Error reading manifest file\n"); goto out; } fprintf(stream, "Magic: %c%c%c%c\n", (MAGIC >> 24) & 0xFF, (MAGIC >> 16) & 0xFF, (MAGIC >> 8) & 0xFF, MAGIC & 0xFF); fprintf(stream, "Version: %u\n", mf->version); fprintf(stream, "Hash size: %u\n", (unsigned)mf->hash_size); fprintf(stream, "Reserved field: %u\n", (unsigned)mf->reserved); fprintf(stream, "File paths (%u):\n", (unsigned)mf->n_files); for (i = 0; i < mf->n_files; ++i) { fprintf(stream, " %u: %s\n", i, mf->files[i]); } fprintf(stream, "File infos (%u):\n", (unsigned)mf->n_file_infos); for (i = 0; i < mf->n_file_infos; ++i) { char *hash; fprintf(stream, " %u:\n", i); fprintf(stream, " Path index: %u\n", mf->file_infos[i].index); hash = format_hash_as_string(mf->file_infos[i].hash, -1); fprintf(stream, " Hash: %s\n", hash); free(hash); fprintf(stream, " Size: %u\n", mf->file_infos[i].size); } fprintf(stream, "Results (%u):\n", (unsigned)mf->n_objects); for (i = 0; i < mf->n_objects; ++i) { char *hash; fprintf(stream, " %u:\n", i); fprintf(stream, " File hash indexes:"); for (j = 0; j < mf->objects[i].n_file_info_indexes; ++j) { fprintf(stream, " %u", mf->objects[i].file_info_indexes[j]); } fprintf(stream, "\n"); hash = format_hash_as_string(mf->objects[i].hash.hash, -1); fprintf(stream, " Hash: %s\n", hash); free(hash); fprintf(stream, " Size: %u\n", (unsigned)mf->objects[i].hash.size); } ret = true; out: if (mf) { free_manifest(mf); } if (f) { gzclose(f); } return ret; }