2002-03-31 14:15:33 +00:00
|
|
|
/*
|
2002-03-31 14:26:49 +00:00
|
|
|
Copyright (C) Andrew Tridgell 2002
|
2009-11-07 08:09:39 +00:00
|
|
|
|
2002-03-31 14:26:49 +00:00
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
|
|
(at your option) any later version.
|
2009-11-07 08:09:39 +00:00
|
|
|
|
2002-03-31 14:26:49 +00:00
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
2009-11-07 08:09:39 +00:00
|
|
|
|
2002-03-31 14:26:49 +00:00
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with this program; if not, write to the Free Software
|
|
|
|
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
2002-03-31 14:15:33 +00:00
|
|
|
*/
|
2002-03-31 14:26:49 +00:00
|
|
|
/*
|
|
|
|
C/C++ unifier
|
2002-03-31 14:15:33 +00:00
|
|
|
|
2002-04-01 03:20:51 +00:00
|
|
|
the idea is that changes that don't affect the resulting C code
|
|
|
|
should not change the hash. This is achieved by folding white-space
|
|
|
|
and other non-semantic fluff in the input into a single unified format.
|
|
|
|
|
|
|
|
This unifier was design to match the output of the unifier in
|
|
|
|
compilercache, which is flex based. The major difference is that
|
|
|
|
this unifier is much faster (about 2x) and more forgiving of
|
|
|
|
syntactic errors. Continuing on syntactic errors is important to
|
|
|
|
cope with C/C++ extensions in the local compiler (for example,
|
2009-11-07 08:09:39 +00:00
|
|
|
inline assembly systems).
|
2002-03-31 14:26:49 +00:00
|
|
|
*/
|
2002-03-31 14:15:33 +00:00
|
|
|
|
2002-03-31 14:26:49 +00:00
|
|
|
#include "ccache.h"
|
2002-03-31 14:15:33 +00:00
|
|
|
|
2010-02-27 23:02:29 +00:00
|
|
|
#include <sys/types.h>
|
|
|
|
#include <sys/stat.h>
|
2010-02-17 19:53:05 +00:00
|
|
|
#include <sys/mman.h>
|
|
|
|
#include <ctype.h>
|
2010-02-27 23:02:29 +00:00
|
|
|
#include <fcntl.h>
|
2010-02-17 19:53:05 +00:00
|
|
|
#include <string.h>
|
|
|
|
#include <unistd.h>
|
|
|
|
|
2010-03-18 11:43:40 +00:00
|
|
|
static const char *const s_tokens[] = {
|
2002-03-31 14:15:33 +00:00
|
|
|
"...", ">>=", "<<=", "+=", "-=", "*=", "/=", "%=", "&=", "^=",
|
|
|
|
"|=", ">>", "<<", "++", "--", "->", "&&", "||", "<=", ">=",
|
|
|
|
"==", "!=", ";", "{", "<%", "}", "%>", ",", ":", "=",
|
|
|
|
"(", ")", "[", "<:", "]", ":>", ".", "&", "!", "~",
|
|
|
|
"-", "+", "*", "/", "%", "<", ">", "^", "|", "?",
|
|
|
|
0
|
|
|
|
};
|
|
|
|
|
|
|
|
#define C_ALPHA 1
|
|
|
|
#define C_SPACE 2
|
|
|
|
#define C_TOKEN 4
|
|
|
|
#define C_QUOTE 8
|
|
|
|
#define C_DIGIT 16
|
|
|
|
#define C_HEX 32
|
|
|
|
#define C_FLOAT 64
|
|
|
|
#define C_SIGN 128
|
|
|
|
|
|
|
|
static struct {
|
|
|
|
unsigned char type;
|
|
|
|
unsigned char num_toks;
|
2010-03-18 11:43:40 +00:00
|
|
|
const char *toks[7];
|
2002-03-31 14:15:33 +00:00
|
|
|
} tokens[256];
|
|
|
|
|
2002-03-31 14:26:49 +00:00
|
|
|
/* build up the table used by the unifier */
|
2002-03-31 14:15:33 +00:00
|
|
|
static void build_table(void)
|
|
|
|
{
|
|
|
|
unsigned char c;
|
|
|
|
int i;
|
2002-03-31 14:26:49 +00:00
|
|
|
static int done;
|
|
|
|
|
|
|
|
if (done) return;
|
|
|
|
done = 1;
|
2002-03-31 14:15:33 +00:00
|
|
|
|
|
|
|
memset(tokens, 0, sizeof(tokens));
|
|
|
|
for (c=0;c<128;c++) {
|
|
|
|
if (isalpha(c) || c == '_') tokens[c].type |= C_ALPHA;
|
|
|
|
if (isdigit(c)) tokens[c].type |= C_DIGIT;
|
|
|
|
if (isspace(c)) tokens[c].type |= C_SPACE;
|
|
|
|
if (isxdigit(c)) tokens[c].type |= C_HEX;
|
|
|
|
}
|
|
|
|
tokens['\''].type |= C_QUOTE;
|
|
|
|
tokens['"'].type |= C_QUOTE;
|
|
|
|
tokens['l'].type |= C_FLOAT;
|
|
|
|
tokens['L'].type |= C_FLOAT;
|
|
|
|
tokens['f'].type |= C_FLOAT;
|
|
|
|
tokens['F'].type |= C_FLOAT;
|
|
|
|
tokens['U'].type |= C_FLOAT;
|
|
|
|
tokens['u'].type |= C_FLOAT;
|
|
|
|
|
|
|
|
tokens['-'].type |= C_SIGN;
|
|
|
|
tokens['+'].type |= C_SIGN;
|
|
|
|
|
|
|
|
for (i=0;s_tokens[i];i++) {
|
|
|
|
c = s_tokens[i][0];
|
|
|
|
tokens[c].type |= C_TOKEN;
|
|
|
|
tokens[c].toks[tokens[c].num_toks] = s_tokens[i];
|
|
|
|
tokens[c].num_toks++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2002-03-31 14:26:49 +00:00
|
|
|
/* buffer up characters before hashing them */
|
2009-11-08 19:13:27 +00:00
|
|
|
static void pushchar(struct mdfour *hash, unsigned char c)
|
2002-03-31 14:15:33 +00:00
|
|
|
{
|
|
|
|
static unsigned char buf[64];
|
2010-02-22 21:58:41 +00:00
|
|
|
static size_t len;
|
2002-03-31 14:15:33 +00:00
|
|
|
|
|
|
|
if (c == 0) {
|
2002-04-01 01:00:48 +00:00
|
|
|
if (len > 0) {
|
2009-11-08 19:13:27 +00:00
|
|
|
hash_buffer(hash, (char *)buf, len);
|
2002-04-01 01:00:48 +00:00
|
|
|
len = 0;
|
|
|
|
}
|
2009-11-08 19:13:27 +00:00
|
|
|
hash_buffer(hash, NULL, 0);
|
2002-03-31 14:15:33 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
buf[len++] = c;
|
|
|
|
if (len == 64) {
|
2009-11-08 19:13:27 +00:00
|
|
|
hash_buffer(hash, (char *)buf, len);
|
2002-03-31 14:15:33 +00:00
|
|
|
len = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2002-03-31 14:26:49 +00:00
|
|
|
/* hash some C/C++ code after unifying */
|
2009-11-08 19:13:27 +00:00
|
|
|
static void unify(struct mdfour *hash, unsigned char *p, size_t size)
|
2002-03-31 14:15:33 +00:00
|
|
|
{
|
|
|
|
size_t ofs;
|
|
|
|
unsigned char q;
|
|
|
|
int i;
|
|
|
|
|
2002-03-31 14:26:49 +00:00
|
|
|
build_table();
|
|
|
|
|
2002-03-31 14:15:33 +00:00
|
|
|
for (ofs=0; ofs<size;) {
|
|
|
|
if (p[ofs] == '#') {
|
|
|
|
if ((size-ofs) > 2 && p[ofs+1] == ' ' && isdigit(p[ofs+2])) {
|
|
|
|
do {
|
|
|
|
ofs++;
|
|
|
|
} while (ofs < size && p[ofs] != '\n');
|
|
|
|
ofs++;
|
|
|
|
} else {
|
|
|
|
do {
|
2009-11-08 19:13:27 +00:00
|
|
|
pushchar(hash, p[ofs]);
|
2002-03-31 14:15:33 +00:00
|
|
|
ofs++;
|
|
|
|
} while (ofs < size && p[ofs] != '\n');
|
2009-11-08 19:13:27 +00:00
|
|
|
pushchar(hash, '\n');
|
2002-03-31 14:15:33 +00:00
|
|
|
ofs++;
|
|
|
|
}
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (tokens[p[ofs]].type & C_ALPHA) {
|
|
|
|
do {
|
2009-11-08 19:13:27 +00:00
|
|
|
pushchar(hash, p[ofs]);
|
2002-03-31 14:15:33 +00:00
|
|
|
ofs++;
|
2009-11-07 08:09:39 +00:00
|
|
|
} while (ofs < size &&
|
2002-03-31 14:15:33 +00:00
|
|
|
(tokens[p[ofs]].type & (C_ALPHA|C_DIGIT)));
|
2009-11-08 19:13:27 +00:00
|
|
|
pushchar(hash, '\n');
|
2002-03-31 14:15:33 +00:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (tokens[p[ofs]].type & C_DIGIT) {
|
|
|
|
do {
|
2009-11-08 19:13:27 +00:00
|
|
|
pushchar(hash, p[ofs]);
|
2002-03-31 14:15:33 +00:00
|
|
|
ofs++;
|
2009-11-07 08:09:39 +00:00
|
|
|
} while (ofs < size &&
|
2002-04-01 01:00:48 +00:00
|
|
|
((tokens[p[ofs]].type & C_DIGIT) || p[ofs] == '.'));
|
|
|
|
if (ofs < size && (p[ofs] == 'x' || p[ofs] == 'X')) {
|
2002-03-31 14:15:33 +00:00
|
|
|
do {
|
2009-11-08 19:13:27 +00:00
|
|
|
pushchar(hash, p[ofs]);
|
2002-03-31 14:15:33 +00:00
|
|
|
ofs++;
|
2002-04-01 01:00:48 +00:00
|
|
|
} while (ofs < size && (tokens[p[ofs]].type & C_HEX));
|
2002-03-31 14:15:33 +00:00
|
|
|
}
|
2002-04-01 01:00:48 +00:00
|
|
|
if (ofs < size && (p[ofs] == 'E' || p[ofs] == 'e')) {
|
2009-11-08 19:13:27 +00:00
|
|
|
pushchar(hash, p[ofs]);
|
2002-03-31 14:15:33 +00:00
|
|
|
ofs++;
|
2009-11-07 08:09:39 +00:00
|
|
|
while (ofs < size &&
|
2002-04-01 01:00:48 +00:00
|
|
|
(tokens[p[ofs]].type & (C_DIGIT|C_SIGN))) {
|
2009-11-08 19:13:27 +00:00
|
|
|
pushchar(hash, p[ofs]);
|
2002-03-31 14:15:33 +00:00
|
|
|
ofs++;
|
|
|
|
}
|
|
|
|
}
|
2002-04-01 01:00:48 +00:00
|
|
|
while (ofs < size && (tokens[p[ofs]].type & C_FLOAT)) {
|
2009-11-08 19:13:27 +00:00
|
|
|
pushchar(hash, p[ofs]);
|
2002-03-31 14:15:33 +00:00
|
|
|
ofs++;
|
|
|
|
}
|
2009-11-08 19:13:27 +00:00
|
|
|
pushchar(hash, '\n');
|
2002-03-31 14:15:33 +00:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (tokens[p[ofs]].type & C_SPACE) {
|
|
|
|
do {
|
|
|
|
ofs++;
|
2002-04-01 01:00:48 +00:00
|
|
|
} while (ofs < size && (tokens[p[ofs]].type & C_SPACE));
|
2002-03-31 14:15:33 +00:00
|
|
|
continue;
|
|
|
|
}
|
2009-11-07 08:09:39 +00:00
|
|
|
|
2002-03-31 14:15:33 +00:00
|
|
|
if (tokens[p[ofs]].type & C_QUOTE) {
|
|
|
|
q = p[ofs];
|
2009-11-08 19:13:27 +00:00
|
|
|
pushchar(hash, p[ofs]);
|
2002-03-31 14:15:33 +00:00
|
|
|
do {
|
|
|
|
ofs++;
|
|
|
|
while (ofs < size-1 && p[ofs] == '\\') {
|
2009-11-08 19:13:27 +00:00
|
|
|
pushchar(hash, p[ofs]);
|
|
|
|
pushchar(hash, p[ofs+1]);
|
2002-03-31 14:15:33 +00:00
|
|
|
ofs+=2;
|
|
|
|
}
|
2009-11-08 19:13:27 +00:00
|
|
|
pushchar(hash, p[ofs]);
|
2002-03-31 14:15:33 +00:00
|
|
|
} while (ofs < size && p[ofs] != q);
|
2009-11-08 19:13:27 +00:00
|
|
|
pushchar(hash, '\n');
|
2002-03-31 14:15:33 +00:00
|
|
|
ofs++;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (tokens[p[ofs]].type & C_TOKEN) {
|
|
|
|
q = p[ofs];
|
2002-04-01 03:26:49 +00:00
|
|
|
for (i=0;i<tokens[q].num_toks;i++) {
|
2002-04-01 03:41:44 +00:00
|
|
|
unsigned char *s = (unsigned char *)tokens[q].toks[i];
|
|
|
|
int len = strlen((char *)s);
|
2002-04-01 03:26:49 +00:00
|
|
|
if (size >= ofs+len && memcmp(&p[ofs], s, len) == 0) {
|
2002-03-31 14:15:33 +00:00
|
|
|
int j;
|
|
|
|
for (j=0;s[j];j++) {
|
2009-11-08 19:13:27 +00:00
|
|
|
pushchar(hash, s[j]);
|
2002-03-31 14:15:33 +00:00
|
|
|
ofs++;
|
|
|
|
}
|
2009-11-08 19:13:27 +00:00
|
|
|
pushchar(hash, '\n');
|
2002-03-31 14:15:33 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2002-04-01 03:26:49 +00:00
|
|
|
if (i < tokens[q].num_toks) {
|
|
|
|
continue;
|
|
|
|
}
|
2002-03-31 14:15:33 +00:00
|
|
|
}
|
|
|
|
|
2009-11-08 19:13:27 +00:00
|
|
|
pushchar(hash, p[ofs]);
|
|
|
|
pushchar(hash, '\n');
|
2002-03-31 14:15:33 +00:00
|
|
|
ofs++;
|
|
|
|
}
|
2009-11-08 19:13:27 +00:00
|
|
|
pushchar(hash, 0);
|
2002-03-31 14:15:33 +00:00
|
|
|
}
|
|
|
|
|
2002-04-01 00:23:31 +00:00
|
|
|
|
2009-11-07 08:09:39 +00:00
|
|
|
/* hash a file that consists of preprocessor output, but remove any line
|
2002-04-01 00:23:31 +00:00
|
|
|
number information from the hash
|
|
|
|
*/
|
2009-11-08 19:13:27 +00:00
|
|
|
int unify_hash(struct mdfour *hash, const char *fname)
|
2002-04-01 00:23:31 +00:00
|
|
|
{
|
|
|
|
int fd;
|
2009-11-07 08:09:39 +00:00
|
|
|
struct stat st;
|
2002-04-01 00:23:31 +00:00
|
|
|
char *map;
|
|
|
|
|
2004-09-06 12:24:05 +00:00
|
|
|
fd = open(fname, O_RDONLY|O_BINARY);
|
2002-04-01 00:23:31 +00:00
|
|
|
if (fd == -1 || fstat(fd, &st) != 0) {
|
2010-02-24 19:46:17 +00:00
|
|
|
cc_log("Failed to open preprocessor output %s", fname);
|
2002-04-01 00:23:31 +00:00
|
|
|
stats_update(STATS_PREPROCESSOR);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* we use mmap() to make it easy to handle arbitrarily long
|
|
|
|
lines in preprocessor output. I have seen lines of over
|
|
|
|
100k in length, so this is well worth it */
|
|
|
|
map = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
|
2010-02-28 00:46:13 +00:00
|
|
|
close(fd);
|
2002-04-01 00:23:31 +00:00
|
|
|
if (map == (char *)-1) {
|
2010-02-24 19:46:17 +00:00
|
|
|
cc_log("Failed to mmap %s", fname);
|
2002-04-01 00:23:31 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* pass it through the unifier */
|
2009-11-08 19:13:27 +00:00
|
|
|
unify(hash, (unsigned char *)map, st.st_size);
|
2002-04-01 00:23:31 +00:00
|
|
|
|
|
|
|
munmap(map, st.st_size);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|