Improve speed of temporal macro search

This commit is contained in:
Justin Lebar 2010-11-29 20:24:06 +01:00 committed by Joel Rosdahl
parent 90e9de12fa
commit fe732f041e
3 changed files with 122 additions and 145 deletions

View File

@ -19,6 +19,7 @@
#include "ccache.h"
#include "hashutil.h"
#include "murmurhashneutral2.h"
#include "macroskip.h"
unsigned
hash_from_string(void *str)
@ -45,125 +46,84 @@ file_hashes_equal(struct file_hash *fh1, struct file_hash *fh2)
&& fh1->size == fh2->size;
}
#define HASH(ch) \
do {\
hashbuf[hashbuflen] = ch; \
hashbuflen++; \
if (hashbuflen == sizeof(hashbuf)) {\
hash_buffer(hash, hashbuf, sizeof(hashbuf)); \
hashbuflen = 0; \
} \
} while (0)
/*
* Search for the strings "__DATE__" and "__TIME__" in str.
*
* Returns a bitmask with HASH_SOURCE_CODE_FOUND_DATE and
* HASH_SOURCE_CODE_FOUND_TIME set appropriately.
*/
static int
check_for_temporal_macros(const char *str, size_t len)
{
int result = 0;
/*
* We're using the Boyer-Moore-Horspool algorithm, which searches
* starting from the *end* of the needle. Our needles are 8 characters
* long, so i starts at 7.
*/
size_t i = 7;
while (i < len) {
/*
* Check whether the substring ending at str[i] has the form
* '__...E__'. On the assumption that 'E' is less common in
* source than '_', we check str[i-2] first.
*/
if (str[i - 2] == 'E' &&
str[i - 0] == '_' &&
str[i - 7] == '_' &&
str[i - 1] == '_' &&
str[i - 6] == '_') {
/*
* Check the remaining characters to see if the
* substring is '__DATE__' or '__TIME__'.
*/
if (str[i - 5] == 'D' && str[i - 4] == 'A' &&
str[i - 3] == 'T') {
result |= HASH_SOURCE_CODE_FOUND_DATE;
}
else if (str[i - 5] == 'T' && str[i - 4] == 'I' &&
str[i - 3] == 'M') {
result |= HASH_SOURCE_CODE_FOUND_TIME;
}
}
/*
* macro_skip tells us how far we can skip forward upon seeing
* str[i] at the end of a substring.
*/
i += macro_skip[(uint8_t)str[i]];
}
return result;
}
/*
* Hash a string ignoring comments. Returns a bitmask of HASH_SOURCE_CODE_*
* results.
* Hash a string. Returns a bitmask of HASH_SOURCE_CODE_* results.
*/
int
hash_source_code_string(
struct mdfour *hash, const char *str, size_t len, const char *path)
{
const char *p;
const char *end;
char hashbuf[64];
size_t hashbuflen = 0;
int result = HASH_SOURCE_CODE_OK;
extern unsigned sloppiness;
p = str;
end = str + len;
while (1) {
if (p >= end) {
goto end;
}
switch (*p) {
/* Potential start of comment. */
case '/':
if (p+1 == end) {
break;
}
switch (*(p+1)) {
case '*':
HASH(' '); /* Don't paste tokens together when removing the comment. */
p += 2;
while (p+1 < end
&& (*p != '*' || *(p+1) != '/')) {
if (*p == '\n') {
/* Keep line numbers. */
HASH('\n');
}
p++;
}
if (p+1 == end) {
goto end;
}
p += 2;
continue;
case '/':
p += 2;
while (p < end
&& (*p != '\n' || *(p-1) == '\\')) {
p++;
}
continue;
default:
break;
}
break;
/* Start of string. */
case '"':
HASH(*p);
p++;
while (p < end && (*p != '"' || *(p-1) == '\\')) {
HASH(*p);
p++;
}
if (p == end) {
goto end;
}
break;
/* Potential start of volatile macro. */
case '_':
if (p + 7 < end
&& p[1] == '_' && p[5] == 'E'
&& p[6] == '_' && p[7] == '_') {
if (p[2] == 'D' && p[3] == 'A'
&& p[4] == 'T') {
result |= HASH_SOURCE_CODE_FOUND_DATE;
} else if (p[2] == 'T' && p[3] == 'I'
&& p[4] == 'M') {
result |= HASH_SOURCE_CODE_FOUND_TIME;
}
/*
* Of course, we can't be sure that we have found a __{DATE,TIME}__
* that's actually used, but better safe than sorry. And if you do
* something like
*
* #define TIME __TI ## ME__
*
* in your code, you deserve to get a false cache hit.
*/
}
break;
default:
break;
}
HASH(*p);
p++;
/*
* Check for __DATE__ and __TIME__ if the sloppiness argument tells us
* we have to.
*/
if (!(sloppiness & SLOPPY_TIME_MACROS)) {
result |= check_for_temporal_macros(str, len);
}
end:
hash_buffer(hash, hashbuf, hashbuflen);
/*
* Hash the source string.
*/
hash_buffer(hash, str, len);
if (sloppiness & SLOPPY_TIME_MACROS) {
return 0;
}
if (result & HASH_SOURCE_CODE_FOUND_DATE) {
/*
* Make sure that the hash sum changes if the (potential) expansion of

56
macroskip.h Normal file
View File

@ -0,0 +1,56 @@
/*
* A Boyer-Moore-Horspool skip table used for searching for the strings
* "__TIME__" and "__DATE__".
*
* macro_skip[c] = 8 for all c not in "__TIME__" and "__DATE__".
*
* The other characters map as follows:
*
* _ -> 1
* A -> 5
* D -> 6
* E -> 3
* I -> 5
* M -> 4
* T -> 4
*
*
* This was generated with the following Python script:
*
* m = {'_': 1,
* 'A': 5,
* 'D': 6,
* 'E': 3,
* 'I': 5,
* 'M': 4,
* 'T': 4}
*
* for i in range(0, 256):
* if chr(i) in m:
* num = m[chr(i)]
* else:
* num = 8
* print ("%d, " % num),
*
* if i % 16 == 15:
* print ""
*/
static const uint32_t macro_skip[] = {
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 5, 8, 8, 6, 3, 8, 8, 8, 5, 8, 8, 8, 4, 8, 8,
8, 8, 8, 8, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 1,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
};

39
test.sh
View File

@ -866,45 +866,6 @@ EOF
checkstat 'cache miss' 1
checkfile stderr-mf.txt "`cat stderr-orig.txt`"
##################################################################
# Check that changes in comments are ignored when hashing.
testname="changes in comments"
$CCACHE -C >/dev/null
$CCACHE -z >/dev/null
cat <<EOF >comments.h
/*
* /* foo comment
*/
EOF
backdate comments.h
cat <<'EOF' >comments.c
#include "comments.h"
char test[] = "\
/* apple */ // banana"; // foo comment
EOF
$CCACHE $COMPILER -c comments.c
checkstat 'cache hit (direct)' 0
checkstat 'cache hit (preprocessed)' 0
checkstat 'cache miss' 1
sed_in_place 's/foo/ignored/' comments.h comments.c
backdate comments.h
$CCACHE $COMPILER -c comments.c
checkstat 'cache hit (direct)' 1
checkstat 'cache hit (preprocessed)' 0
checkstat 'cache miss' 1
# Check that comment-like string contents are hashed.
sed_in_place 's/apple/orange/' comments.c
backdate comments.h
$CCACHE $COMPILER -c comments.c
checkstat 'cache hit (direct)' 1
checkstat 'cache hit (preprocessed)' 0
checkstat 'cache miss' 2
##################################################################
# Check that it is possible to compile and cache an empty source code file.
testname="empty source file"