diff --git a/Makefile b/Makefile index 80bb15b..49fb4c9 100644 --- a/Makefile +++ b/Makefile @@ -1,18 +1,14 @@ CFLAGS=-W -Wall -O2 CC=gcc -OBJS= ccache.o mdfour.o hash.o execute.o util.o args.o -CLEAN_OBJS= ccache_clean.o util.o +OBJS= ccache.o mdfour.o hash.o execute.o util.o args.o stats.o cleanup.o HEADERS = ccache.h mdfour.h -all: ccache ccache_clean +all: ccache ccache: $(OBJS) $(HEADERS) $(CC) -o $@ $(OBJS) -ccache_clean: $(CLEAN_OBJS) $(HEADERS) - $(CC) -o $@ $(CLEAN_OBJS) - clean: - /bin/rm -f $(OBJS) *~ ccache ccache_clean + /bin/rm -f $(OBJS) *~ ccache diff --git a/README b/README index c2ec165..4baf89e 100644 --- a/README +++ b/README @@ -12,29 +12,41 @@ shell-script version. Installation ------------ -To install ccache first compile it, then place the "ccache" somewhere -in your path. You then need to create symbolic links from the ccache -executable to symlinks of the same name as your compiler. These -symlinks must come before the location of your real compiler in your -PATH. +There are two ways to use ccache. You can either prefix your compile +commands with "ccache" or you can create a symbolic link between +ccache and the names of your compilers. The first method is most +convenient if you just want to try out ccache or wish to use it for +some specific projects. The second method is most useful for when you +wish to use ccache for all your compiles. -For example the following will work on many systems: +To install for usage by the first method just copy ccache to somewhere +in your path. - make - cp ccache ccache_clean /usr/local/bin/ +To install for the second method do something like this: + + cp ccache /usr/local/bin/ ln -s /usr/local/bin/ccache /usr/local/bin/gcc ln -s /usr/local/bin/ccache /usr/local/bin/cc This will work as long as /usr/local/bin comes before the path to gcc (which is usually in /usr/bin). After installing you may wish to run -"rehash; type gcc" to make sure that the correct link is being used. +"which gcc" to make sure that the correct link is being used. + +Setting cache limits +-------------------- + +Run "ccache -h" to see a list of options. The main ones you may wish +to look at are "ccache -M" and "ccache -F" for setting the cache size +limits. + +You can use "ccache -s" to look at the cache hit/miss statistics. Configuration ------------- -Configuration of ccache is done via a number of environment -variables. In most cases you won't need any of these as the defaults -will be fine. +Configuration of ccache is done via a number of environment variables +and via ccache commands. In most cases you won't need any of these as +the defaults will be fine. CCACHE_DIR @@ -72,7 +84,11 @@ are: - ccache is written in C, which makes it a bit faster (calling out to external programs is mostly what slowed down the scripts). -- ccache can automatically find the real compiler on Linux +- ccache can automatically find the real compiler + +- ccache keeps statistics on hits/misses + +- ccache can do automatic cache management - ccache can cache compiler output that includes warnings. In many cases this gives ccache a much higher cache hit rate. @@ -99,21 +115,16 @@ compiling rsync I get: ccache uncached 24.6 seconds ccache cached 4.6 seconds +Cleaning size management +------------------------ -Cleaning the cache ------------------- +By default ccache has no limit on the cache size. You can set a limit +using the "ccache -M" and "ccache -F" options, which set the size and +number of files limits. -ccache tends to quickly fill up the cache directory. You may find the -ccache_clean utility useful for removing old cache files. If called -with no arguments it will trim the cache to be less than 1 Gigabyte, -deleting the oldest files first. You can also pass a single argument -specifying the size limit on the cache, for example: - ccache_clean 2G -would clear the oldest files to bring the cache below 2G in size. You -can use 'M' for megabytes, 'G' for gigabytes or 'K' for kilobytes. - -You may wish to call ccache_clean from a cron job to keep your disk -space usage reasonable. +When these limits are reached ccache will reduce the cache to 20% +below the numbers you specified in order to avoid doing the cache +clean operation too often. How it works ------------ diff --git a/ccache.c b/ccache.c index d5358ba..2574a0d 100644 --- a/ccache.c +++ b/ccache.c @@ -22,12 +22,13 @@ #include "ccache.h" -static char *cache_dir; +char *cache_dir = NULL; char *cache_logfile = NULL; static ARGS *stripped_args; static ARGS *orig_args; static char *output_file; static char *hashname; +char *stats_file = NULL; static int found_debug; /* @@ -45,7 +46,7 @@ static void to_cache(ARGS *args) { char *path_stderr; char *tmp_stdout, *tmp_stderr, *tmp_hashname; - struct stat st; + struct stat st1, st2; int status; x_asprintf(&tmp_stdout, "%s/tmp.stdout.%d", cache_dir, getpid()); @@ -57,8 +58,9 @@ static void to_cache(ARGS *args) status = execute(args->argv, tmp_stdout, tmp_stderr); args_pop(args, 2); - if (stat(tmp_stdout, &st) != 0 || st.st_size != 0) { + if (stat(tmp_stdout, &st1) != 0 || st1.st_size != 0) { cc_log("compiler produced stdout for %s\n", output_file); + stats_update(STATS_STDOUT); unlink(tmp_stdout); unlink(tmp_stderr); unlink(tmp_hashname); @@ -69,6 +71,7 @@ static void to_cache(ARGS *args) if (status != 0) { int fd; cc_log("compile of %s gave status = %d\n", output_file, status); + stats_update(STATS_STATUS); fd = open(tmp_stderr, O_RDONLY); if (fd != -1 && @@ -87,13 +90,17 @@ static void to_cache(ARGS *args) x_asprintf(&path_stderr, "%s.stderr", hashname); - if (rename(tmp_hashname, hashname) != 0 || + if (stat(tmp_stderr, &st1) != 0 || + stat(tmp_hashname, &st2) != 0 || + rename(tmp_hashname, hashname) != 0 || rename(tmp_stderr, path_stderr) != 0) { cc_log("failed to rename tmp files\n"); + stats_update(STATS_ERROR); failed(); } cc_log("Placed %s into cache\n", output_file); + stats_tocache(file_size(&st1) + file_size(&st2)); free(tmp_hashname); free(tmp_stderr); @@ -115,6 +122,7 @@ static void stabs_hash(const char *fname) fd = open(fname, O_RDONLY); if (fd == -1 || fstat(fd, &st) != 0) { cc_log("Failed to open preprocessor output %s\n", fname); + stats_update(STATS_PREPROCESSOR); failed(); } @@ -193,6 +201,7 @@ static void find_hash(ARGS *args) to try and detect compiler upgrades. It is not 100% reliable */ if (stat(args->argv[0], &st) != 0) { cc_log("Couldn't stat the compiler!?\n"); + stats_update(STATS_COMPILER); failed(); } hash_int(st.st_size); @@ -210,6 +219,7 @@ static void find_hash(ARGS *args) unlink(path_stdout); unlink(path_stderr); cc_log("the preprocessor gave %d\n", status); + stats_update(STATS_PREPROCESSOR); failed(); } @@ -240,6 +250,7 @@ static void find_hash(ARGS *args) failed(); } x_asprintf(&hashname, "%s/%s", hash_dir, s+1); + x_asprintf(&stats_file, "%s/stats", hash_dir); free(hash_dir); } @@ -253,6 +264,7 @@ static void from_cache(int first) int fd_stderr; char *stderr_file; int ret; + struct stat st; x_asprintf(&stderr_file, "%s.stderr", hashname); fd_stderr = open(stderr_file, O_RDONLY); @@ -261,6 +273,15 @@ static void from_cache(int first) free(stderr_file); return; } + + /* make sure the output is there too */ + if (stat(hashname, &st) != 0) { + close(fd_stderr); + unlink(stderr_file); + free(stderr_file); + return; + } + utime(stderr_file, NULL); unlink(output_file); @@ -269,6 +290,7 @@ static void from_cache(int first) /* the hash file might have been deleted by some external process */ if (ret == -1 && errno == ENOENT) { cc_log("hashfile missing for %s\n", output_file); + stats_update(STATS_MISSING); close(fd_stderr); unlink(stderr_file); return; @@ -280,6 +302,7 @@ static void from_cache(int first) if (ret == -1) { cc_log("failed to copy %s -> %s (%s)\n", hashname, output_file, strerror(errno)); + stats_update(STATS_ERROR); failed(); } } @@ -295,6 +318,7 @@ static void from_cache(int first) /* and exit with the right status code */ if (first) { cc_log("got cached result for %s\n", output_file); + stats_update(STATS_CACHED); } exit(0); @@ -421,6 +445,7 @@ static void process_args(int argc, char **argv) if (strcmp(argv[i], "-o") == 0) { if (i == argc-1) { cc_log("missing argument to %s\n", argv[i]); + stats_update(STATS_ARGS); failed(); } output_file = argv[i+1]; @@ -447,6 +472,7 @@ static void process_args(int argc, char **argv) strcmp(argv[i], "-isystem") == 0) { if (i == argc-1) { cc_log("missing argument to %s\n", argv[i]); + stats_update(STATS_ARGS); failed(); } @@ -473,6 +499,7 @@ static void process_args(int argc, char **argv) if (input_file) { cc_log("multiple input files (%s and %s)\n", input_file, argv[i]); + stats_update(STATS_LINK); failed(); } @@ -482,11 +509,13 @@ static void process_args(int argc, char **argv) if (!input_file) { cc_log("No input file found\n"); + stats_update(STATS_ARGS); failed(); } if (!found_c_opt) { cc_log("No -c option found for %s\n", input_file); + stats_update(STATS_LINK); failed(); } @@ -499,6 +528,7 @@ static void process_args(int argc, char **argv) p = strrchr(output_file, '.'); if (!p || !p[1]) { cc_log("badly formed output_file %s\n", output_file); + stats_update(STATS_ARGS); failed(); } p[1] = found_S_opt ? 's' : 'o'; @@ -539,19 +569,76 @@ static void ccache(int argc, char *argv[]) /* oh oh! */ cc_log("secondary from_cache failed!\n"); + stats_update(STATS_ERROR); failed(); } static void usage(void) { - printf("Usage: read the docs\n"); + printf("ccache, a compiler cache\n"); + printf("Copyright Andrew Tridgell, 2002\n\n"); + + printf("Usage:\n"); + printf("\tccache [options]\n"); + printf("\tccache compiler [compile options]\n"); + printf("\tcompiler [compile options] (via symbolic link)\n"); + printf("\nOptions:\n"); + + printf("-h this help page\n"); + printf("-s show statistics summary\n"); + printf("-h zero statistics\n"); + printf("-c run a cache cleanup\n"); + printf("-F set maximum files in cache\n"); + printf("-M set maximum size of cache (use G, M or K)\n"); } +/* the main program when not doing a compile */ static int ccache_main(int argc, char *argv[]) { - usage(); - return 1; + extern int optind; + int c; + size_t v; + + while ((c = getopt(argc, argv, "hszcF:M:")) != -1) { + switch (c) { + case 'h': + usage(); + exit(0); + + case 's': + stats_summary(); + break; + + case 'c': + cleanup_all(cache_dir); + printf("Cleaned cached\n"); + break; + + case 'z': + stats_zero(); + printf("Statistics cleared\n"); + break; + + case 'F': + v = atoi(optarg); + stats_set_limits(v, -1); + printf("Set cache file limit to %u\n", (unsigned)v); + break; + + case 'M': + v = value_units(optarg); + stats_set_limits(-1, v); + printf("Set cache size limit to %uk\n", (unsigned)v); + break; + + default: + usage(); + exit(1); + } + } + + return 0; } int main(int argc, char *argv[]) diff --git a/ccache.h b/ccache.h index fd184f2..6aaee51 100644 --- a/ccache.h +++ b/ccache.h @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -23,6 +24,29 @@ #define MYNAME "ccache" +#define LIMIT_MULTIPLE 0.8 + +enum stats { + STATS_NONE=0, + STATS_STDOUT, + STATS_STATUS, + STATS_ERROR, + STATS_TOCACHE, + STATS_PREPROCESSOR, + STATS_COMPILER, + STATS_MISSING, + STATS_CACHED, + STATS_ARGS, + STATS_LINK, + + STATS_NUMFILES, + STATS_TOTALSIZE, + STATS_MAXFILES, + STATS_MAXSIZE, + + STATS_END +}; + typedef unsigned uint32; #include "mdfour.h" @@ -47,6 +71,23 @@ void *x_realloc(void *ptr, size_t size); void *x_malloc(size_t size); void traverse(const char *dir, void (*fn)(const char *, struct stat *)); char *basename(const char *s); +char *dirname(char *s); +int lock_fd(int fd); +size_t file_size(struct stat *st); +int safe_open(const char *fname); + +void stats_update(enum stats stat); +void stats_zero(void); +void stats_summary(void); +void stats_tocache(size_t size); +void stats_read(const char *stats_file, unsigned counters[STATS_END]); +void stats_set_limits(long maxfiles, long maxsize); +size_t value_units(const char *s); +void stats_set_sizes(const char *dir, size_t num_files, size_t total_size); + + +void cleanup_dir(const char *dir, size_t maxfiles, size_t maxsize); +void cleanup_all(const char *dir); int execute(char **argv, const char *path_stdout, diff --git a/cleanup.c b/cleanup.c new file mode 100644 index 0000000..4d0d649 --- /dev/null +++ b/cleanup.c @@ -0,0 +1,149 @@ +/* + Copyright (C) Andrew Tridgell 2002 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ +/* + functions to cleanup the cache directory when it gets too large + */ + +#include "ccache.h" + +static struct files { + char *fname; + time_t mtime; + size_t size; +} **files; +static unsigned allocated; +static unsigned num_files; +static size_t total_size; +static size_t total_files; +static size_t size_threshold; +static size_t files_threshold; + +/* file comparison function to try to delete the oldest files first */ +static int files_compare(struct files **f1, struct files **f2) +{ + if ((*f2)->mtime == (*f1)->mtime) { + return strcmp((*f2)->fname, (*f1)->fname); + } + if ((*f2)->mtime > (*f1)->mtime) { + return -1; + } + return 1; +} + +/* this builds the list of files in the cache */ +static void traverse_fn(const char *fname, struct stat *st) +{ + char *p; + + if (!S_ISREG(st->st_mode)) return; + + p = basename(fname); + if (strcmp(p, "stats") == 0) { + free(p); + return; + } + free(p); + + if (num_files == allocated) { + allocated = 10000 + num_files*2; + files = x_realloc(files, sizeof(struct files *)*allocated); + } + + files[num_files] = x_malloc(sizeof(struct files *)); + files[num_files]->fname = x_strdup(fname); + files[num_files]->mtime = st->st_mtime; + files[num_files]->size = file_size(st) / 1024; + total_size += files[num_files]->size; + num_files++; +} + +/* sort the files we've found and delete the oldest ones until we are + below the thresholds */ +static void sort_and_clean(void) +{ + unsigned i; + + if (num_files > 1) { + /* sort in ascending data order */ + qsort(files, num_files, sizeof(struct files *), files_compare); + } + + /* delete enough files to bring us below the threshold */ + for (i=0;ifname) != 0 && errno != ENOENT) { + fprintf(stderr, "unlink %s - %s\n", + files[i]->fname, strerror(errno)); + continue; + } + + total_size -= files[i]->size; + } + + total_files = num_files - i; +} + +/* cleanup in one cache subdir */ +void cleanup_dir(const char *dir, size_t maxfiles, size_t maxsize) +{ + unsigned i; + + size_threshold = maxsize * LIMIT_MULTIPLE; + files_threshold = maxfiles * LIMIT_MULTIPLE; + + /* build a list of files */ + traverse(dir, traverse_fn); + + /* clean the cache */ + sort_and_clean(); + + stats_set_sizes(dir, total_files, total_size); + + /* free it up */ + for (i=0;ifname); + free(files[i]); + } + + num_files = 0; + total_size = 0; +} + +/* cleanup in all cache subdirs */ +void cleanup_all(const char *dir) +{ + unsigned counters[STATS_END]; + char *dname, *sfile; + int i; + + for (i=0;i<=0xF;i++) { + x_asprintf(&dname, "%s/%1x", dir, i); + x_asprintf(&sfile, "%s/%1x/stats", dir, i); + + memset(counters, 0, sizeof(counters)); + stats_read(sfile, counters); + + cleanup_dir(dname, + counters[STATS_MAXFILES], + counters[STATS_MAXSIZE]); + free(dname); + free(sfile); + } +} diff --git a/stats.c b/stats.c new file mode 100644 index 0000000..8db2dba --- /dev/null +++ b/stats.c @@ -0,0 +1,317 @@ +/* + Copyright (C) Andrew Tridgell 2002 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ +/* + routines to handle the stats files + + the stats file is stored one per cache subdirectory to make this more + scalable + */ + +#include "ccache.h" + +extern char *stats_file; +extern char *cache_dir; + +#define STATS_VERSION 1 + +static struct { + enum stats stat; + char *message; +} stats_messages[] = { + { STATS_TOCACHE, "cache miss" }, + { STATS_CACHED, "cache hit" }, + { STATS_LINK, "called for link" }, + { STATS_STDOUT, "compiler produced stdout" }, + { STATS_STATUS, "compile failed" }, + { STATS_ERROR, "ccache internal error" }, + { STATS_PREPROCESSOR, "preprocessor error" }, + { STATS_COMPILER, "couldn't find the compiler" }, + { STATS_MISSING, "cache file missing" }, + { STATS_ARGS, "bad compiler arguments" }, + { STATS_NUMFILES, "files in cache" }, + { STATS_TOTALSIZE, "cache size" }, + { STATS_MAXFILES, "max files" }, + { STATS_MAXSIZE, "max cache size" }, + { STATS_NONE, NULL } +}; + +/* return a string description of a statistic */ +static char *stats_message(enum stats stat) +{ + int i; + for (i=0;stats_messages[i].stat != STATS_NONE; i++) { + if (stats_messages[i].stat == stat) { + return stats_messages[i].message; + } + } + return "unknown"; +} + +/* parse a stats file from a buffer - adding to the counters */ +static void parse_stats(unsigned counters[STATS_END], char *buf) +{ + int i; + char *p, *p2; + + p = buf; + for (i=0;i= (int)sizeof(buf)-1) fatal("stats too long?!"); + } + len += snprintf(buf+len, sizeof(buf)-(len+1), "\n"); + if (len >= (int)sizeof(buf)-1) fatal("stats too long?!"); + + lseek(fd, 0, SEEK_SET); + write(fd, buf, len); +} + +/* read in the stats from one dir and add to the counters */ +static void stats_read_fd(int fd, unsigned counters[STATS_END]) +{ + char buf[1024]; + int len; + len = read(fd, buf, sizeof(buf)-1); + if (len <= 0) { + return; + } + buf[len] = 0; + parse_stats(counters, buf); +} + +/* update the stats counter for this compile */ +static void stats_update_size(enum stats stat, size_t size) +{ + int fd; + unsigned counters[STATS_END]; + int need_cleanup = 0; + + if (!stats_file) { + if (!cache_dir) return; + x_asprintf(&stats_file, "%s/stats", cache_dir); + } + + /* open safely to try to prevent symlink races */ + fd = safe_open(stats_file); + + /* still can't get it? don't bother ... */ + if (fd == -1) return; + if (lock_fd(fd) != 0) return; + + /* read in the old stats */ + memset(counters, 0, sizeof(counters)); + stats_read_fd(fd, counters); + + /* update them */ + counters[stat]++; + + /* on a cache miss we up the file count and size */ + if (stat == STATS_TOCACHE) { + counters[STATS_NUMFILES] += 2; + counters[STATS_TOTALSIZE] += size; + + /* we might need to cleanup if the cache has now got too big */ + if (counters[STATS_MAXFILES] != 0 && + counters[STATS_NUMFILES] > counters[STATS_MAXFILES]) { + need_cleanup = 1; + } + if (counters[STATS_MAXSIZE] != 0 && + counters[STATS_TOTALSIZE] > counters[STATS_MAXSIZE]) { + need_cleanup = 1; + } + } + + /* and write them out */ + write_stats(fd, counters); + close(fd); + + if (need_cleanup) { + char *p = dirname(stats_file); + cleanup_dir(p, counters[STATS_MAXFILES], counters[STATS_MAXSIZE]); + free(p); + } +} + +/* record a cache miss */ +void stats_tocache(size_t size) +{ + /* convert size to kilobytes */ + size = size / 1024; + + stats_update_size(STATS_TOCACHE, size); +} + +/* update a normal stat */ +void stats_update(enum stats stat) +{ + stats_update_size(stat, 0); +} + +/* read in the stats from one dir and add to the counters */ +void stats_read(const char *stats_file, unsigned counters[STATS_END]) +{ + int fd; + + fd = open(stats_file, O_RDONLY); + if (fd == -1) return; + lock_fd(fd); + stats_read_fd(fd, counters); + close(fd); +} + +/* sum and display the total stats for all cache dirs */ +void stats_summary(void) +{ + int dir, i; + unsigned counters[STATS_END]; + + memset(counters, 0, sizeof(counters)); + + /* add up the stats in each directory */ + for (dir=-1;dir<=0xF;dir++) { + char *fname; + + if (dir == -1) { + x_asprintf(&fname, "%s/stats", cache_dir); + } else { + x_asprintf(&fname, "%s/%1x/stats", cache_dir, dir); + } + + stats_read(fname, counters); + free(fname); + } + + /* and display them */ + for (i=0;ist_blocks * 512; +} + + +/* a safe open/create for read-write */ +int safe_open(const char *fname) +{ + int fd = open(fname, O_RDWR); + if (fd == -1 && errno == ENOENT) { + fd = open(fname, O_RDWR|O_CREAT|O_EXCL, 0666); + if (fd == -1 && errno == EEXIST) { + fd = open(fname, O_RDWR); + } + } + return fd; +} + +/* return a value in multiples of 1024 give a string that can end + in K, M or G +*/ +size_t value_units(const char *s) +{ + char m; + size_t v = atoi(s); + m = s[strlen(s)-1]; + switch (m) { + case 'G': + case 'g': + default: + v *= 1024*1024; + break; + case 'M': + case 'm': + v *= 1024; + break; + case 'K': + case 'k': + v *= 1; + break; + } + return v; +}