Unify comments when hashing source code to increase hit rate

This commit is contained in:
Joel Rosdahl 2009-12-14 22:31:28 +01:00
parent 78beb6c38b
commit 0114296fcb
6 changed files with 206 additions and 12 deletions

View File

@ -19,11 +19,11 @@ libs = @LIBS@ -lm
sources = \
ccache.c mdfour.c hash.c execute.c util.c args.c stats.c \
cleanup.c snprintf.c unify.c manifest.c hashtable.c hashtable_itr.c \
murmurhashneutral2.c hashutil.c
murmurhashneutral2.c hashutil.c comments.c
headers = \
ccache.h hashtable.h hashtable_itr.h hashtable_private.h hashutil.h \
manifest.h mdfour.h murmurhashneutral2.h
manifest.h mdfour.h murmurhashneutral2.h comments.h
objs = $(sources:.c=.o)

View File

@ -27,6 +27,7 @@
#include "hashtable_itr.h"
#include "hashutil.h"
#include "manifest.h"
#include "comments.h"
#include <getopt.h>
@ -244,7 +245,7 @@ static void remember_include_file(char *path, size_t path_len)
struct mdfour fhash;
struct stat st;
int fd = -1;
int ret;
char *data = (char *)-1;
if (!included_files) {
goto ignore;
@ -284,19 +285,20 @@ static void remember_include_file(char *path, size_t path_len)
cc_log("Include file \"%s\" too new\n", path);
goto failure;
}
hash_start(&fhash);
ret = hash_fd(&fhash, fd);
if (!ret) {
cc_log("Failed hashing include file \"%s\"\n", path);
data = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
if (data == (char *)-1) {
cc_log("Failed to mmap %s\n", path);
goto failure;
}
/* Hashing OK. */
hash_start(&fhash);
hash_string_ignoring_comments(&fhash, data, st.st_size);
h = x_malloc(sizeof(*h));
hash_result_as_bytes(&fhash, h->hash);
h->size = fhash.totalN;
hashtable_insert(included_files, path, h);
close(fd);
munmap(data, st.st_size);
return;
failure:
@ -307,6 +309,9 @@ failure:
/* Fall through. */
ignore:
free(path);
if (data != (char *)-1) {
munmap(data, st.st_size);
}
if (fd != -1) {
close(fd);
}
@ -786,7 +791,7 @@ static int find_hash(ARGS *args, enum findhash_call_mode mode)
switch (mode) {
case FINDHASH_DIRECT_MODE:
if (!hash_file(&hash, input_file)) {
if (!hash_file_ignoring_comments(&hash, input_file)) {
cc_log("Failed hashing %s\n", input_file);
failed();
}
@ -980,7 +985,8 @@ static void from_cache(enum fromcache_call_mode mode, int put_object_in_manifest
/* Create or update the manifest file. */
if (put_object_in_manifest && included_files) {
if (manifest_put(manifest_path, object_hash, included_files)) {
cc_log("Added object file hash to manifest\n");
cc_log("Added object file hash to manifest %s\n",
manifest_path);
/* Update timestamp for LRU cleanup. */
#ifdef HAVE_UTIMES
utimes(manifest_path, NULL);

135
comments.c Normal file
View File

@ -0,0 +1,135 @@
/*
* Copyright (C) Joel Rosdahl 2009
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 675 Mass
* Ave, Cambridge, MA 02139, USA.
*/
#include "ccache.h"
#include "comments.h"
#define HASH(ch) \
do { \
hashbuf[hashbuflen] = ch; \
hashbuflen++; \
if (hashbuflen == sizeof(hashbuf)) { \
hash_buffer(hash, hashbuf, sizeof(hashbuf)); \
hashbuflen = 0; \
} \
} while (0)
void hash_string_ignoring_comments(
struct mdfour *hash, const char *str, size_t len)
{
const char *p;
const char *end;
char hashbuf[64];
size_t hashbuflen = 0;
p = str;
end = str + len;
while (1) {
if (p >= end) {
goto end;
}
switch (*p) {
case '/':
if (p+1 == end) {
break;
}
switch (*(p+1)) {
case '*':
HASH(' '); /* Don't paste tokens together when
* removing the comment. */
p += 2;
while (p+1 < end
&& (*p != '*' || *(p+1) != '/')) {
if (*p == '\n') {
/* Keep line numbers. */
HASH('\n');
}
p++;
}
if (p+1 == end) {
goto end;
}
p += 2;
continue;
case '/':
p += 2;
while (p < end
&& (*p != '\n' || *(p-1) == '\\')) {
p++;
}
continue;
default:
break;
}
break;
case '"':
HASH(*p);
p++;
while (p < end && (*p != '"' || *(p-1) == '\\')) {
HASH(*p);
p++;
}
if (p == end) {
goto end;
}
break;
default:
break;
}
HASH(*p);
p++;
}
end:
hash_buffer(hash, hashbuf, hashbuflen);
}
/*
* Add contents of a file to a hash, but don't hash comments. Returns 1 on
* success, otherwise 0.
*/
int hash_file_ignoring_comments(struct mdfour *hash, const char *path)
{
int fd;
struct stat st;
char *data;
fd = open(path, O_RDONLY);
if (fd == -1) {
return 0;
}
if (fstat(fd, &st) == -1) {
close(fd);
return 0;
}
data = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
close(fd);
if (data == (void *)-1) {
return 0;
}
hash_string_ignoring_comments(hash, data, st.st_size);
munmap(data, st.st_size);
return 1;
}

10
comments.h Normal file
View File

@ -0,0 +1,10 @@
#ifndef COMMENTS_H
#define COMMENTS_H
#include "mdfour.h"
void hash_string_ignoring_comments(
struct mdfour *hash, const char *str, size_t len);
int hash_file_ignoring_comments(struct mdfour *hash, const char *path);
#endif

View File

@ -29,6 +29,7 @@
#include "hashutil.h"
#include "manifest.h"
#include "murmurhashneutral2.h"
#include "comments.h"
extern char *temp_dir;
@ -345,7 +346,8 @@ static int verify_object(struct manifest *mf, struct object *obj,
if (!actual) {
actual = x_malloc(sizeof(*actual));
hash_start(&hash);
if (!hash_file(&hash, mf->files[fi->index])) {
if (!hash_file_ignoring_comments(
&hash, mf->files[fi->index])) {
cc_log("Failed hashing %s\n",
mf->files[fi->index]);
free(actual);

41
test.sh
View File

@ -442,6 +442,7 @@ EOF
##################################################################
# Check that -Wp,-MMD,file.d works.
testname="-Wp,-MMD"
$CCACHE -C >/dev/null
$CCACHE -z >/dev/null
$CCACHE $COMPILER -c -Wp,-MMD,other.d test.c
checkstat 'cache hit (direct)' 0
@ -530,6 +531,7 @@ EOF
##################################################################
# Check that -MF works.
testname="-MF"
$CCACHE -C >/dev/null
$CCACHE -z >/dev/null
$CCACHE $COMPILER -c -MD -MF other.d test.c
checkstat 'cache hit (direct)' 0
@ -603,6 +605,45 @@ EOF
checkstat 'cache miss' 1
checkfile stderr-mf.txt "`cat stderr-orig.txt`"
##################################################################
# Check that changes in comments are ignored when hashing.
testname="changes in comments"
$CCACHE -C >/dev/null
$CCACHE -z >/dev/null
cat <<EOF >comments.h
/*
* /* foo comment
*/
EOF
cat <<'EOF' >comments.c
#include "comments.h"
char test[] = "\
/* apple */ // banana"; // foo comment
EOF
sleep 1 # Sleep to make the include file trusted.
$CCACHE $COMPILER -c comments.c
checkstat 'cache hit (direct)' 0
checkstat 'cache hit (preprocessed)' 0
checkstat 'cache miss' 1
sed -i 's/foo/ignored/' comments.h comments.c
sleep 1 # Sleep to make the include file trusted.
$CCACHE $COMPILER -c comments.c
checkstat 'cache hit (direct)' 1
checkstat 'cache hit (preprocessed)' 0
checkstat 'cache miss' 1
# Check that comment-like string contents are hashed.
sed -i 's/apple/orange/' comments.c
sleep 1 # Sleep to make the include file trusted.
$CCACHE $COMPILER -c comments.c
checkstat 'cache hit (direct)' 1
checkstat 'cache hit (preprocessed)' 0
checkstat 'cache miss' 2
##################################################################
# Reset things.
CCACHE_NODIRECT=1