Fix NFS object corruption

Several months ago I reported a problem with NFS corruption from three
simultaneous NFS users of ccache on the same file; two writers to the cache
and one reader.

I believe I have tracked this issue down to a race related to the use of
unlink. On NFS, unlink() is NOT atomic; so what seemed to be happening was
the second writer unlink()ed the first's object, then the reader got the
partially unlinked (truncated) object.

The following patch fixes this issue by always calling rename before a
unlink - a new x_unlink function. There are some places where temp files
are being unlinked; for performance these can remain as the ordinary
unlink.
This commit is contained in:
Wilson Snyder 2010-10-08 07:28:06 -04:00 committed by Joel Rosdahl
parent b91097778f
commit d887888cd2
8 changed files with 76 additions and 40 deletions

View File

@ -27,6 +27,8 @@ Idioms
* Use str_eq() instead of strcmp() when testing for string (in)equality.
* Consider using str_startswith() instead of strncmp().
* Use bool, true and false for boolean values.
* Use tmp_unlink() or x_unlink() instead of unlink().
* Use x_rename() instead of rename().
Other
-----

View File

@ -236,7 +236,7 @@ clean_up_tmp_files()
/* delete intermediate pre-processor file if needed */
if (i_tmpfile) {
if (!direct_i_file) {
unlink(i_tmpfile);
tmp_unlink(i_tmpfile);
}
free(i_tmpfile);
i_tmpfile = NULL;
@ -244,7 +244,7 @@ clean_up_tmp_files()
/* delete the cpp stderr file if necessary */
if (cpp_stderr) {
unlink(cpp_stderr);
tmp_unlink(cpp_stderr);
free(cpp_stderr);
cpp_stderr = NULL;
}
@ -519,12 +519,12 @@ to_cache(struct args *args)
if (stat(tmp_stdout, &st) != 0 || st.st_size != 0) {
cc_log("Compiler produced stdout");
stats_update(STATS_STDOUT);
unlink(tmp_stdout);
unlink(tmp_stderr);
unlink(tmp_obj);
tmp_unlink(tmp_stdout);
tmp_unlink(tmp_stderr);
tmp_unlink(tmp_obj);
failed();
}
unlink(tmp_stdout);
tmp_unlink(tmp_stdout);
/*
* Merge stderr from the preprocessor (if any) and stderr from the real
@ -561,7 +561,7 @@ to_cache(struct args *args)
close(fd_cpp_stderr);
close(fd_real_stderr);
close(fd_result);
unlink(tmp_stderr2);
tmp_unlink(tmp_stderr2);
free(tmp_stderr2);
}
@ -579,13 +579,13 @@ to_cache(struct args *args)
/* we can use a quick method of getting the failed output */
copy_fd(fd, 2);
close(fd);
unlink(tmp_stderr);
tmp_unlink(tmp_stderr);
exit(status);
}
}
unlink(tmp_stderr);
unlink(tmp_obj);
tmp_unlink(tmp_stderr);
tmp_unlink(tmp_obj);
failed();
}
@ -619,7 +619,7 @@ to_cache(struct args *args)
added_bytes += file_size(&st);
added_files += 1;
} else {
unlink(tmp_stderr);
tmp_unlink(tmp_stderr);
}
if (move_uncompressed_file(tmp_obj, cached_obj, enable_compression) != 0) {
cc_log("Failed to move %s to %s", tmp_obj, cached_obj);
@ -703,9 +703,9 @@ get_object_name_from_cpp(struct args *args, struct mdfour *hash)
if (status != 0) {
if (!direct_i_file) {
unlink(path_stdout);
tmp_unlink(path_stdout);
}
unlink(path_stderr);
tmp_unlink(path_stderr);
cc_log("Preprocessor gave exit status %d", status);
stats_update(STATS_PREPROCESSOR);
failed();
@ -722,7 +722,7 @@ get_object_name_from_cpp(struct args *args, struct mdfour *hash)
hash_delimiter(hash, "unifycpp");
if (unify_hash(hash, path_stdout) != 0) {
stats_update(STATS_ERROR);
unlink(path_stderr);
tmp_unlink(path_stderr);
cc_log("Failed to unify %s", path_stdout);
failed();
}
@ -730,7 +730,7 @@ get_object_name_from_cpp(struct args *args, struct mdfour *hash)
hash_delimiter(hash, "cpp");
if (!process_preprocessed_file(hash, path_stdout)) {
stats_update(STATS_ERROR);
unlink(path_stderr);
tmp_unlink(path_stderr);
failed();
}
}
@ -750,7 +750,7 @@ get_object_name_from_cpp(struct args *args, struct mdfour *hash)
*/
cpp_stderr = path_stderr;
} else {
unlink(path_stderr);
tmp_unlink(path_stderr);
free(path_stderr);
}
@ -993,7 +993,7 @@ from_cache(enum fromcache_call_mode mode, bool put_object_in_manifest)
if (str_eq(output_obj, "/dev/null")) {
ret = 0;
} else {
unlink(output_obj);
x_unlink(output_obj);
/* only make a hardlink if the cache file is uncompressed */
if (getenv("CCACHE_HARDLINK") && !file_is_compressed(cached_obj)) {
ret = link(cached_obj, output_obj);
@ -1013,17 +1013,17 @@ from_cache(enum fromcache_call_mode mode, bool put_object_in_manifest)
stats_update(STATS_ERROR);
failed();
}
unlink(output_obj);
unlink(cached_stderr);
unlink(cached_obj);
unlink(cached_dep);
x_unlink(output_obj);
x_unlink(cached_stderr);
x_unlink(cached_obj);
x_unlink(cached_dep);
return;
} else {
cc_log("Created %s from %s", output_obj, cached_obj);
}
if (produce_dep_file) {
unlink(output_dep);
x_unlink(output_dep);
/* only make a hardlink if the cache file is uncompressed */
if (getenv("CCACHE_HARDLINK") && !file_is_compressed(cached_dep)) {
ret = link(cached_dep, output_dep);
@ -1045,11 +1045,11 @@ from_cache(enum fromcache_call_mode mode, bool put_object_in_manifest)
stats_update(STATS_ERROR);
failed();
}
unlink(output_obj);
unlink(output_dep);
unlink(cached_stderr);
unlink(cached_obj);
unlink(cached_dep);
x_unlink(output_obj);
x_unlink(output_dep);
x_unlink(cached_stderr);
x_unlink(cached_obj);
x_unlink(cached_dep);
return;
} else {
cc_log("Created %s from %s", output_dep, cached_dep);
@ -1874,7 +1874,7 @@ ccache(int argc, char *argv[])
cc_log("Hash from manifest doesn't match preprocessor output");
cc_log("Likely reason: different CCACHE_BASEDIRs used");
cc_log("Removing manifest as a safety measure");
unlink(manifest_path);
x_unlink(manifest_path);
put_object_in_manifest = true;
}

View File

@ -139,6 +139,8 @@ bool is_absolute_path(const char *path);
bool is_full_path(const char *path);
void update_mtime(const char *path);
int x_rename(const char *oldpath, const char *newpath);
int x_unlink(const char *path);
int tmp_unlink(const char *path);
char *x_readlink(const char *path);
char *read_text_file(const char *path);
bool read_file(const char *path, size_t size_hint, char **data, size_t *size);

View File

@ -73,7 +73,7 @@ traverse_fn(const char *fname, struct stat *st)
if (strstr(p, ".tmp.") != NULL) {
/* delete any tmp files older than 1 hour */
if (st->st_mtime + 3600 < time(NULL)) {
unlink(fname);
x_unlink(fname);
goto out;
}
}
@ -98,7 +98,7 @@ out:
static void
delete_file(const char *path, size_t size)
{
if (unlink(path) == 0) {
if (x_unlink(path) == 0) {
cache_size -= size;
files_in_cache--;
} else if (errno != ENOENT) {
@ -242,7 +242,7 @@ static void wipe_fn(const char *fname, struct stat *st)
}
free(p);
unlink(fname);
x_unlink(fname);
}
/* wipe all cached files in all subdirs */

View File

@ -175,7 +175,7 @@ execute(char **argv, const char *path_stdout, const char *path_stderr)
if (pid == 0) {
int fd;
unlink(path_stdout);
tmp_unlink(path_stdout);
fd = open(path_stdout, O_WRONLY|O_CREAT|O_TRUNC|O_EXCL|O_BINARY, 0666);
if (fd == -1) {
exit(1);
@ -183,7 +183,7 @@ execute(char **argv, const char *path_stdout, const char *path_stderr)
dup2(fd, 1);
close(fd);
unlink(path_stderr);
tmp_unlink(path_stderr);
fd = open(path_stderr, O_WRONLY|O_CREAT|O_TRUNC|O_EXCL|O_BINARY, 0666);
if (fd == -1) {
exit(1);

View File

@ -84,7 +84,7 @@ lockfile_acquire(const char *path, unsigned staleness_limit)
if (write(fd, my_content, strlen(my_content)) == -1) {
cc_log("lockfile_acquire: write %s: %s", lockfile, strerror(errno));
close(fd);
unlink(lockfile);
x_unlink(lockfile);
goto out;
}
close(fd);
@ -178,7 +178,7 @@ lockfile_release(const char *path)
{
char *lockfile = format("%s.lock", path);
cc_log("Releasing lock %s", lockfile);
unlink(lockfile);
tmp_unlink(lockfile);
free(lockfile);
}

View File

@ -335,7 +335,7 @@ stats_zero(void)
char *fname;
fname = format("%s/stats", cache_dir);
unlink(fname);
x_unlink(fname);
free(fname);
for (dir = 0; dir <= 0xF; dir++) {

40
util.c
View File

@ -268,7 +268,7 @@ copy_file(const char *src, const char *dest, int compress_dest)
gzclose(gz_out);
}
close(fd_out);
unlink(tmp_name);
tmp_unlink(tmp_name);
free(tmp_name);
return -1;
}
@ -312,7 +312,7 @@ error:
if (fd_out != -1) {
close(fd_out);
}
unlink(tmp_name);
tmp_unlink(tmp_name);
free(tmp_name);
return -1;
}
@ -325,7 +325,7 @@ move_file(const char *src, const char *dest, int compress_dest)
ret = copy_file(src, dest, compress_dest);
if (ret != -1) {
unlink(src);
x_unlink(src);
}
return ret;
}
@ -1048,11 +1048,43 @@ x_rename(const char *oldpath, const char *newpath)
{
#ifdef _WIN32
/* Windows' rename() refuses to overwrite an existing file. */
unlink(newpath);
unlink(newpath); /* not x_unlink, as x_unlink calls x_rename */
#endif
return rename(oldpath, newpath);
}
/*
* Remove path, NFS hazardous, for temporary files that will not exist
* on other systems only. IE the "path" should include tmp_string().
*/
int
tmp_unlink(const char *path)
{
cc_log("Unlink %s (as-tmp)", path);
return (unlink(path));
}
/*
* Remove path, safely for NFS
*/
int
x_unlink(const char *path)
{
/* If path is on an NFS share, unlink isn't atomic, so we rename to a temp file */
/* We don't care if tempfile is trashed, so it's always safe to unlink it first */
const char* tmp_name = format("%s.%s.rmXXXXXX", path, tmp_string());
cc_log("Unlink %s via %s", path, tmp_name);
if (x_rename(path, tmp_name) == -1) {
/* let caller report the error, or not */
return -1;
}
if (unlink(tmp_name) == -1) {
/* let caller report the error, or not */
return -1;
}
return 0;
}
#ifndef _WIN32
/* Like readlink() but returns the string or NULL on failure. Caller frees. */
char *