mirror of
https://github.com/reactos/ccache.git
synced 2025-02-17 01:29:06 +00:00
Improve speed of temporal macro search
This commit is contained in:
parent
90e9de12fa
commit
fe732f041e
172
hashutil.c
172
hashutil.c
@ -19,6 +19,7 @@
|
|||||||
#include "ccache.h"
|
#include "ccache.h"
|
||||||
#include "hashutil.h"
|
#include "hashutil.h"
|
||||||
#include "murmurhashneutral2.h"
|
#include "murmurhashneutral2.h"
|
||||||
|
#include "macroskip.h"
|
||||||
|
|
||||||
unsigned
|
unsigned
|
||||||
hash_from_string(void *str)
|
hash_from_string(void *str)
|
||||||
@ -45,125 +46,84 @@ file_hashes_equal(struct file_hash *fh1, struct file_hash *fh2)
|
|||||||
&& fh1->size == fh2->size;
|
&& fh1->size == fh2->size;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define HASH(ch) \
|
/*
|
||||||
do {\
|
* Search for the strings "__DATE__" and "__TIME__" in str.
|
||||||
hashbuf[hashbuflen] = ch; \
|
*
|
||||||
hashbuflen++; \
|
* Returns a bitmask with HASH_SOURCE_CODE_FOUND_DATE and
|
||||||
if (hashbuflen == sizeof(hashbuf)) {\
|
* HASH_SOURCE_CODE_FOUND_TIME set appropriately.
|
||||||
hash_buffer(hash, hashbuf, sizeof(hashbuf)); \
|
*/
|
||||||
hashbuflen = 0; \
|
static int
|
||||||
} \
|
check_for_temporal_macros(const char *str, size_t len)
|
||||||
} while (0)
|
{
|
||||||
|
int result = 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We're using the Boyer-Moore-Horspool algorithm, which searches
|
||||||
|
* starting from the *end* of the needle. Our needles are 8 characters
|
||||||
|
* long, so i starts at 7.
|
||||||
|
*/
|
||||||
|
size_t i = 7;
|
||||||
|
|
||||||
|
while (i < len) {
|
||||||
|
/*
|
||||||
|
* Check whether the substring ending at str[i] has the form
|
||||||
|
* '__...E__'. On the assumption that 'E' is less common in
|
||||||
|
* source than '_', we check str[i-2] first.
|
||||||
|
*/
|
||||||
|
if (str[i - 2] == 'E' &&
|
||||||
|
str[i - 0] == '_' &&
|
||||||
|
str[i - 7] == '_' &&
|
||||||
|
str[i - 1] == '_' &&
|
||||||
|
str[i - 6] == '_') {
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check the remaining characters to see if the
|
||||||
|
* substring is '__DATE__' or '__TIME__'.
|
||||||
|
*/
|
||||||
|
|
||||||
|
if (str[i - 5] == 'D' && str[i - 4] == 'A' &&
|
||||||
|
str[i - 3] == 'T') {
|
||||||
|
result |= HASH_SOURCE_CODE_FOUND_DATE;
|
||||||
|
}
|
||||||
|
else if (str[i - 5] == 'T' && str[i - 4] == 'I' &&
|
||||||
|
str[i - 3] == 'M') {
|
||||||
|
result |= HASH_SOURCE_CODE_FOUND_TIME;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* macro_skip tells us how far we can skip forward upon seeing
|
||||||
|
* str[i] at the end of a substring.
|
||||||
|
*/
|
||||||
|
i += macro_skip[(uint8_t)str[i]];
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Hash a string ignoring comments. Returns a bitmask of HASH_SOURCE_CODE_*
|
* Hash a string. Returns a bitmask of HASH_SOURCE_CODE_* results.
|
||||||
* results.
|
|
||||||
*/
|
*/
|
||||||
int
|
int
|
||||||
hash_source_code_string(
|
hash_source_code_string(
|
||||||
struct mdfour *hash, const char *str, size_t len, const char *path)
|
struct mdfour *hash, const char *str, size_t len, const char *path)
|
||||||
{
|
{
|
||||||
const char *p;
|
|
||||||
const char *end;
|
|
||||||
char hashbuf[64];
|
|
||||||
size_t hashbuflen = 0;
|
|
||||||
int result = HASH_SOURCE_CODE_OK;
|
int result = HASH_SOURCE_CODE_OK;
|
||||||
extern unsigned sloppiness;
|
extern unsigned sloppiness;
|
||||||
|
|
||||||
p = str;
|
/*
|
||||||
end = str + len;
|
* Check for __DATE__ and __TIME__ if the sloppiness argument tells us
|
||||||
while (1) {
|
* we have to.
|
||||||
if (p >= end) {
|
*/
|
||||||
goto end;
|
if (!(sloppiness & SLOPPY_TIME_MACROS)) {
|
||||||
}
|
result |= check_for_temporal_macros(str, len);
|
||||||
switch (*p) {
|
|
||||||
/* Potential start of comment. */
|
|
||||||
case '/':
|
|
||||||
if (p+1 == end) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
switch (*(p+1)) {
|
|
||||||
case '*':
|
|
||||||
HASH(' '); /* Don't paste tokens together when removing the comment. */
|
|
||||||
p += 2;
|
|
||||||
while (p+1 < end
|
|
||||||
&& (*p != '*' || *(p+1) != '/')) {
|
|
||||||
if (*p == '\n') {
|
|
||||||
/* Keep line numbers. */
|
|
||||||
HASH('\n');
|
|
||||||
}
|
|
||||||
p++;
|
|
||||||
}
|
|
||||||
if (p+1 == end) {
|
|
||||||
goto end;
|
|
||||||
}
|
|
||||||
p += 2;
|
|
||||||
continue;
|
|
||||||
|
|
||||||
case '/':
|
|
||||||
p += 2;
|
|
||||||
while (p < end
|
|
||||||
&& (*p != '\n' || *(p-1) == '\\')) {
|
|
||||||
p++;
|
|
||||||
}
|
|
||||||
continue;
|
|
||||||
|
|
||||||
default:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
/* Start of string. */
|
|
||||||
case '"':
|
|
||||||
HASH(*p);
|
|
||||||
p++;
|
|
||||||
while (p < end && (*p != '"' || *(p-1) == '\\')) {
|
|
||||||
HASH(*p);
|
|
||||||
p++;
|
|
||||||
}
|
|
||||||
if (p == end) {
|
|
||||||
goto end;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
/* Potential start of volatile macro. */
|
|
||||||
case '_':
|
|
||||||
if (p + 7 < end
|
|
||||||
&& p[1] == '_' && p[5] == 'E'
|
|
||||||
&& p[6] == '_' && p[7] == '_') {
|
|
||||||
if (p[2] == 'D' && p[3] == 'A'
|
|
||||||
&& p[4] == 'T') {
|
|
||||||
result |= HASH_SOURCE_CODE_FOUND_DATE;
|
|
||||||
} else if (p[2] == 'T' && p[3] == 'I'
|
|
||||||
&& p[4] == 'M') {
|
|
||||||
result |= HASH_SOURCE_CODE_FOUND_TIME;
|
|
||||||
}
|
|
||||||
/*
|
|
||||||
* Of course, we can't be sure that we have found a __{DATE,TIME}__
|
|
||||||
* that's actually used, but better safe than sorry. And if you do
|
|
||||||
* something like
|
|
||||||
*
|
|
||||||
* #define TIME __TI ## ME__
|
|
||||||
*
|
|
||||||
* in your code, you deserve to get a false cache hit.
|
|
||||||
*/
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
default:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
HASH(*p);
|
|
||||||
p++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
end:
|
/*
|
||||||
hash_buffer(hash, hashbuf, hashbuflen);
|
* Hash the source string.
|
||||||
|
*/
|
||||||
|
hash_buffer(hash, str, len);
|
||||||
|
|
||||||
if (sloppiness & SLOPPY_TIME_MACROS) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
if (result & HASH_SOURCE_CODE_FOUND_DATE) {
|
if (result & HASH_SOURCE_CODE_FOUND_DATE) {
|
||||||
/*
|
/*
|
||||||
* Make sure that the hash sum changes if the (potential) expansion of
|
* Make sure that the hash sum changes if the (potential) expansion of
|
||||||
|
56
macroskip.h
Normal file
56
macroskip.h
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
/*
|
||||||
|
* A Boyer-Moore-Horspool skip table used for searching for the strings
|
||||||
|
* "__TIME__" and "__DATE__".
|
||||||
|
*
|
||||||
|
* macro_skip[c] = 8 for all c not in "__TIME__" and "__DATE__".
|
||||||
|
*
|
||||||
|
* The other characters map as follows:
|
||||||
|
*
|
||||||
|
* _ -> 1
|
||||||
|
* A -> 5
|
||||||
|
* D -> 6
|
||||||
|
* E -> 3
|
||||||
|
* I -> 5
|
||||||
|
* M -> 4
|
||||||
|
* T -> 4
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* This was generated with the following Python script:
|
||||||
|
*
|
||||||
|
* m = {'_': 1,
|
||||||
|
* 'A': 5,
|
||||||
|
* 'D': 6,
|
||||||
|
* 'E': 3,
|
||||||
|
* 'I': 5,
|
||||||
|
* 'M': 4,
|
||||||
|
* 'T': 4}
|
||||||
|
*
|
||||||
|
* for i in range(0, 256):
|
||||||
|
* if chr(i) in m:
|
||||||
|
* num = m[chr(i)]
|
||||||
|
* else:
|
||||||
|
* num = 8
|
||||||
|
* print ("%d, " % num),
|
||||||
|
*
|
||||||
|
* if i % 16 == 15:
|
||||||
|
* print ""
|
||||||
|
*/
|
||||||
|
|
||||||
|
static const uint32_t macro_skip[] = {
|
||||||
|
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||||
|
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||||
|
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||||
|
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||||
|
8, 5, 8, 8, 6, 3, 8, 8, 8, 5, 8, 8, 8, 4, 8, 8,
|
||||||
|
8, 8, 8, 8, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 1,
|
||||||
|
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||||
|
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||||
|
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||||
|
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||||
|
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||||
|
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||||
|
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||||
|
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||||
|
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||||
|
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||||
|
};
|
39
test.sh
39
test.sh
@ -866,45 +866,6 @@ EOF
|
|||||||
checkstat 'cache miss' 1
|
checkstat 'cache miss' 1
|
||||||
checkfile stderr-mf.txt "`cat stderr-orig.txt`"
|
checkfile stderr-mf.txt "`cat stderr-orig.txt`"
|
||||||
|
|
||||||
##################################################################
|
|
||||||
# Check that changes in comments are ignored when hashing.
|
|
||||||
testname="changes in comments"
|
|
||||||
$CCACHE -C >/dev/null
|
|
||||||
$CCACHE -z >/dev/null
|
|
||||||
cat <<EOF >comments.h
|
|
||||||
/*
|
|
||||||
* /* foo comment
|
|
||||||
*/
|
|
||||||
EOF
|
|
||||||
backdate comments.h
|
|
||||||
cat <<'EOF' >comments.c
|
|
||||||
#include "comments.h"
|
|
||||||
char test[] = "\
|
|
||||||
/* apple */ // banana"; // foo comment
|
|
||||||
EOF
|
|
||||||
|
|
||||||
$CCACHE $COMPILER -c comments.c
|
|
||||||
checkstat 'cache hit (direct)' 0
|
|
||||||
checkstat 'cache hit (preprocessed)' 0
|
|
||||||
checkstat 'cache miss' 1
|
|
||||||
|
|
||||||
sed_in_place 's/foo/ignored/' comments.h comments.c
|
|
||||||
backdate comments.h
|
|
||||||
|
|
||||||
$CCACHE $COMPILER -c comments.c
|
|
||||||
checkstat 'cache hit (direct)' 1
|
|
||||||
checkstat 'cache hit (preprocessed)' 0
|
|
||||||
checkstat 'cache miss' 1
|
|
||||||
|
|
||||||
# Check that comment-like string contents are hashed.
|
|
||||||
sed_in_place 's/apple/orange/' comments.c
|
|
||||||
backdate comments.h
|
|
||||||
|
|
||||||
$CCACHE $COMPILER -c comments.c
|
|
||||||
checkstat 'cache hit (direct)' 1
|
|
||||||
checkstat 'cache hit (preprocessed)' 0
|
|
||||||
checkstat 'cache miss' 2
|
|
||||||
|
|
||||||
##################################################################
|
##################################################################
|
||||||
# Check that it is possible to compile and cache an empty source code file.
|
# Check that it is possible to compile and cache an empty source code file.
|
||||||
testname="empty source file"
|
testname="empty source file"
|
||||||
|
Loading…
x
Reference in New Issue
Block a user