mirror of
https://github.com/reactos/ccache.git
synced 2024-11-26 21:20:32 +00:00
Improve speed of temporal macro search
This commit is contained in:
parent
90e9de12fa
commit
fe732f041e
172
hashutil.c
172
hashutil.c
@ -19,6 +19,7 @@
|
||||
#include "ccache.h"
|
||||
#include "hashutil.h"
|
||||
#include "murmurhashneutral2.h"
|
||||
#include "macroskip.h"
|
||||
|
||||
unsigned
|
||||
hash_from_string(void *str)
|
||||
@ -45,125 +46,84 @@ file_hashes_equal(struct file_hash *fh1, struct file_hash *fh2)
|
||||
&& fh1->size == fh2->size;
|
||||
}
|
||||
|
||||
#define HASH(ch) \
|
||||
do {\
|
||||
hashbuf[hashbuflen] = ch; \
|
||||
hashbuflen++; \
|
||||
if (hashbuflen == sizeof(hashbuf)) {\
|
||||
hash_buffer(hash, hashbuf, sizeof(hashbuf)); \
|
||||
hashbuflen = 0; \
|
||||
} \
|
||||
} while (0)
|
||||
/*
|
||||
* Search for the strings "__DATE__" and "__TIME__" in str.
|
||||
*
|
||||
* Returns a bitmask with HASH_SOURCE_CODE_FOUND_DATE and
|
||||
* HASH_SOURCE_CODE_FOUND_TIME set appropriately.
|
||||
*/
|
||||
static int
|
||||
check_for_temporal_macros(const char *str, size_t len)
|
||||
{
|
||||
int result = 0;
|
||||
|
||||
/*
|
||||
* We're using the Boyer-Moore-Horspool algorithm, which searches
|
||||
* starting from the *end* of the needle. Our needles are 8 characters
|
||||
* long, so i starts at 7.
|
||||
*/
|
||||
size_t i = 7;
|
||||
|
||||
while (i < len) {
|
||||
/*
|
||||
* Check whether the substring ending at str[i] has the form
|
||||
* '__...E__'. On the assumption that 'E' is less common in
|
||||
* source than '_', we check str[i-2] first.
|
||||
*/
|
||||
if (str[i - 2] == 'E' &&
|
||||
str[i - 0] == '_' &&
|
||||
str[i - 7] == '_' &&
|
||||
str[i - 1] == '_' &&
|
||||
str[i - 6] == '_') {
|
||||
|
||||
/*
|
||||
* Check the remaining characters to see if the
|
||||
* substring is '__DATE__' or '__TIME__'.
|
||||
*/
|
||||
|
||||
if (str[i - 5] == 'D' && str[i - 4] == 'A' &&
|
||||
str[i - 3] == 'T') {
|
||||
result |= HASH_SOURCE_CODE_FOUND_DATE;
|
||||
}
|
||||
else if (str[i - 5] == 'T' && str[i - 4] == 'I' &&
|
||||
str[i - 3] == 'M') {
|
||||
result |= HASH_SOURCE_CODE_FOUND_TIME;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* macro_skip tells us how far we can skip forward upon seeing
|
||||
* str[i] at the end of a substring.
|
||||
*/
|
||||
i += macro_skip[(uint8_t)str[i]];
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
* Hash a string ignoring comments. Returns a bitmask of HASH_SOURCE_CODE_*
|
||||
* results.
|
||||
* Hash a string. Returns a bitmask of HASH_SOURCE_CODE_* results.
|
||||
*/
|
||||
int
|
||||
hash_source_code_string(
|
||||
struct mdfour *hash, const char *str, size_t len, const char *path)
|
||||
{
|
||||
const char *p;
|
||||
const char *end;
|
||||
char hashbuf[64];
|
||||
size_t hashbuflen = 0;
|
||||
int result = HASH_SOURCE_CODE_OK;
|
||||
extern unsigned sloppiness;
|
||||
|
||||
p = str;
|
||||
end = str + len;
|
||||
while (1) {
|
||||
if (p >= end) {
|
||||
goto end;
|
||||
}
|
||||
switch (*p) {
|
||||
/* Potential start of comment. */
|
||||
case '/':
|
||||
if (p+1 == end) {
|
||||
break;
|
||||
}
|
||||
switch (*(p+1)) {
|
||||
case '*':
|
||||
HASH(' '); /* Don't paste tokens together when removing the comment. */
|
||||
p += 2;
|
||||
while (p+1 < end
|
||||
&& (*p != '*' || *(p+1) != '/')) {
|
||||
if (*p == '\n') {
|
||||
/* Keep line numbers. */
|
||||
HASH('\n');
|
||||
}
|
||||
p++;
|
||||
}
|
||||
if (p+1 == end) {
|
||||
goto end;
|
||||
}
|
||||
p += 2;
|
||||
continue;
|
||||
|
||||
case '/':
|
||||
p += 2;
|
||||
while (p < end
|
||||
&& (*p != '\n' || *(p-1) == '\\')) {
|
||||
p++;
|
||||
}
|
||||
continue;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
/* Start of string. */
|
||||
case '"':
|
||||
HASH(*p);
|
||||
p++;
|
||||
while (p < end && (*p != '"' || *(p-1) == '\\')) {
|
||||
HASH(*p);
|
||||
p++;
|
||||
}
|
||||
if (p == end) {
|
||||
goto end;
|
||||
}
|
||||
break;
|
||||
|
||||
/* Potential start of volatile macro. */
|
||||
case '_':
|
||||
if (p + 7 < end
|
||||
&& p[1] == '_' && p[5] == 'E'
|
||||
&& p[6] == '_' && p[7] == '_') {
|
||||
if (p[2] == 'D' && p[3] == 'A'
|
||||
&& p[4] == 'T') {
|
||||
result |= HASH_SOURCE_CODE_FOUND_DATE;
|
||||
} else if (p[2] == 'T' && p[3] == 'I'
|
||||
&& p[4] == 'M') {
|
||||
result |= HASH_SOURCE_CODE_FOUND_TIME;
|
||||
}
|
||||
/*
|
||||
* Of course, we can't be sure that we have found a __{DATE,TIME}__
|
||||
* that's actually used, but better safe than sorry. And if you do
|
||||
* something like
|
||||
*
|
||||
* #define TIME __TI ## ME__
|
||||
*
|
||||
* in your code, you deserve to get a false cache hit.
|
||||
*/
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
HASH(*p);
|
||||
p++;
|
||||
/*
|
||||
* Check for __DATE__ and __TIME__ if the sloppiness argument tells us
|
||||
* we have to.
|
||||
*/
|
||||
if (!(sloppiness & SLOPPY_TIME_MACROS)) {
|
||||
result |= check_for_temporal_macros(str, len);
|
||||
}
|
||||
|
||||
end:
|
||||
hash_buffer(hash, hashbuf, hashbuflen);
|
||||
/*
|
||||
* Hash the source string.
|
||||
*/
|
||||
hash_buffer(hash, str, len);
|
||||
|
||||
if (sloppiness & SLOPPY_TIME_MACROS) {
|
||||
return 0;
|
||||
}
|
||||
if (result & HASH_SOURCE_CODE_FOUND_DATE) {
|
||||
/*
|
||||
* Make sure that the hash sum changes if the (potential) expansion of
|
||||
|
56
macroskip.h
Normal file
56
macroskip.h
Normal file
@ -0,0 +1,56 @@
|
||||
/*
|
||||
* A Boyer-Moore-Horspool skip table used for searching for the strings
|
||||
* "__TIME__" and "__DATE__".
|
||||
*
|
||||
* macro_skip[c] = 8 for all c not in "__TIME__" and "__DATE__".
|
||||
*
|
||||
* The other characters map as follows:
|
||||
*
|
||||
* _ -> 1
|
||||
* A -> 5
|
||||
* D -> 6
|
||||
* E -> 3
|
||||
* I -> 5
|
||||
* M -> 4
|
||||
* T -> 4
|
||||
*
|
||||
*
|
||||
* This was generated with the following Python script:
|
||||
*
|
||||
* m = {'_': 1,
|
||||
* 'A': 5,
|
||||
* 'D': 6,
|
||||
* 'E': 3,
|
||||
* 'I': 5,
|
||||
* 'M': 4,
|
||||
* 'T': 4}
|
||||
*
|
||||
* for i in range(0, 256):
|
||||
* if chr(i) in m:
|
||||
* num = m[chr(i)]
|
||||
* else:
|
||||
* num = 8
|
||||
* print ("%d, " % num),
|
||||
*
|
||||
* if i % 16 == 15:
|
||||
* print ""
|
||||
*/
|
||||
|
||||
static const uint32_t macro_skip[] = {
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 5, 8, 8, 6, 3, 8, 8, 8, 5, 8, 8, 8, 4, 8, 8,
|
||||
8, 8, 8, 8, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 1,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
};
|
39
test.sh
39
test.sh
@ -866,45 +866,6 @@ EOF
|
||||
checkstat 'cache miss' 1
|
||||
checkfile stderr-mf.txt "`cat stderr-orig.txt`"
|
||||
|
||||
##################################################################
|
||||
# Check that changes in comments are ignored when hashing.
|
||||
testname="changes in comments"
|
||||
$CCACHE -C >/dev/null
|
||||
$CCACHE -z >/dev/null
|
||||
cat <<EOF >comments.h
|
||||
/*
|
||||
* /* foo comment
|
||||
*/
|
||||
EOF
|
||||
backdate comments.h
|
||||
cat <<'EOF' >comments.c
|
||||
#include "comments.h"
|
||||
char test[] = "\
|
||||
/* apple */ // banana"; // foo comment
|
||||
EOF
|
||||
|
||||
$CCACHE $COMPILER -c comments.c
|
||||
checkstat 'cache hit (direct)' 0
|
||||
checkstat 'cache hit (preprocessed)' 0
|
||||
checkstat 'cache miss' 1
|
||||
|
||||
sed_in_place 's/foo/ignored/' comments.h comments.c
|
||||
backdate comments.h
|
||||
|
||||
$CCACHE $COMPILER -c comments.c
|
||||
checkstat 'cache hit (direct)' 1
|
||||
checkstat 'cache hit (preprocessed)' 0
|
||||
checkstat 'cache miss' 1
|
||||
|
||||
# Check that comment-like string contents are hashed.
|
||||
sed_in_place 's/apple/orange/' comments.c
|
||||
backdate comments.h
|
||||
|
||||
$CCACHE $COMPILER -c comments.c
|
||||
checkstat 'cache hit (direct)' 1
|
||||
checkstat 'cache hit (preprocessed)' 0
|
||||
checkstat 'cache miss' 2
|
||||
|
||||
##################################################################
|
||||
# Check that it is possible to compile and cache an empty source code file.
|
||||
testname="empty source file"
|
||||
|
Loading…
Reference in New Issue
Block a user