Remove path component limit from input of CanonicalizePath in windows

This commit is contained in:
Takuto Ikuta 2017-08-30 11:05:13 +09:00
parent 7bbc708ff0
commit 86f606fe3b
2 changed files with 50 additions and 59 deletions

View File

@ -102,18 +102,13 @@ bool CanonicalizePath(string* path, uint64_t* slash_bits, string* err) {
return true;
}
static bool IsPathSeparator(char c) {
#ifdef _WIN32
static uint64_t ShiftOverBit(int offset, uint64_t bits) {
// e.g. for |offset| == 2:
// | ... 9 8 7 6 5 4 3 2 1 0 |
// \_________________/ \_/
// above below
// So we drop the bit at offset and move above "down" into its place.
uint64_t above = bits & ~((1 << (offset + 1)) - 1);
uint64_t below = bits & ((1 << offset) - 1);
return (above >> 1) | below;
}
return c == '/' || c == '\\';
#else
return c == '/';
#endif
}
bool CanonicalizePath(char* path, size_t* len, uint64_t* slash_bits,
string* err) {
@ -134,37 +129,13 @@ bool CanonicalizePath(char* path, size_t* len, uint64_t* slash_bits,
const char* src = start;
const char* end = start + *len;
if (IsPathSeparator(*src)) {
#ifdef _WIN32
uint64_t bits = 0;
uint64_t bits_mask = 1;
int bits_offset = 0;
// Convert \ to /, setting a bit in |bits| for each \ encountered.
for (char* c = path; c < end; ++c) {
switch (*c) {
case '\\':
bits |= bits_mask;
*c = '/';
// Intentional fallthrough.
case '/':
bits_mask <<= 1;
bits_offset++;
}
}
if (bits_offset > 64) {
*err = "too many path components";
return false;
}
bits_offset = 0;
#endif
if (*src == '/') {
#ifdef _WIN32
bits_offset++;
// network path starts with //
if (*len > 1 && *(src + 1) == '/') {
if (*len > 1 && IsPathSeparator(*(src + 1))) {
src += 2;
dst += 2;
bits_offset++;
} else {
++src;
++dst;
@ -177,24 +148,16 @@ bool CanonicalizePath(char* path, size_t* len, uint64_t* slash_bits,
while (src < end) {
if (*src == '.') {
if (src + 1 == end || src[1] == '/') {
if (src + 1 == end || IsPathSeparator(src[1])) {
// '.' component; eliminate.
src += 2;
#ifdef _WIN32
bits = ShiftOverBit(bits_offset, bits);
#endif
continue;
} else if (src[1] == '.' && (src + 2 == end || src[2] == '/')) {
} else if (src[1] == '.' && (src + 2 == end || IsPathSeparator(src[2]))) {
// '..' component. Back up if possible.
if (component_count > 0) {
dst = components[component_count - 1];
src += 3;
--component_count;
#ifdef _WIN32
bits = ShiftOverBit(bits_offset, bits);
bits_offset--;
bits = ShiftOverBit(bits_offset, bits);
#endif
} else {
*dst++ = *src++;
*dst++ = *src++;
@ -204,11 +167,8 @@ bool CanonicalizePath(char* path, size_t* len, uint64_t* slash_bits,
}
}
if (*src == '/') {
if (IsPathSeparator(*src)) {
src++;
#ifdef _WIN32
bits = ShiftOverBit(bits_offset, bits);
#endif
continue;
}
@ -217,11 +177,8 @@ bool CanonicalizePath(char* path, size_t* len, uint64_t* slash_bits,
components[component_count] = dst;
++component_count;
while (*src != '/' && src != end)
while (src != end && !IsPathSeparator(*src))
*dst++ = *src++;
#ifdef _WIN32
bits_offset++;
#endif
*dst++ = *src++; // Copy '/' or final \0 character as well.
}
@ -232,6 +189,20 @@ bool CanonicalizePath(char* path, size_t* len, uint64_t* slash_bits,
*len = dst - start - 1;
#ifdef _WIN32
uint64_t bits = 0;
uint64_t bits_mask = 1;
for (char* c = start; c < start + *len; ++c) {
switch (*c) {
case '\\':
bits |= bits_mask;
*c = '/';
// Intentional fallthrough.
case '/':
bits_mask <<= 1;
}
}
*slash_bits = bits;
#else
*slash_bits = 0;

View File

@ -292,12 +292,12 @@ TEST(CanonicalizePath, TooManyComponents) {
EXPECT_TRUE(CanonicalizePath(&path, &slash_bits, &err));
EXPECT_EQ(slash_bits, 0xffffffff);
// 65 is not.
// 65 is OK if #component is less than 60 after path canonicalization.
err = "";
path = "a/./a/./a/./a/./a/./a/./a/./a/./a/./a/./a/./a/./a/./a/./a/./a/./"
"a/./a/./a/./a/./a/./a/./a/./a/./a/./a/./a/./a/./a/./a/./a/./a/./x/y.h";
EXPECT_FALSE(CanonicalizePath(&path, &slash_bits, &err));
EXPECT_EQ(err, "too many path components");
EXPECT_TRUE(CanonicalizePath(&path, &slash_bits, &err));
EXPECT_EQ(slash_bits, 0x0);
// Backslashes version.
err = "";
@ -306,8 +306,28 @@ TEST(CanonicalizePath, TooManyComponents) {
"a\\.\\a\\.\\a\\.\\a\\.\\a\\.\\a\\.\\a\\.\\a\\.\\"
"a\\.\\a\\.\\a\\.\\a\\.\\a\\.\\a\\.\\a\\.\\a\\.\\"
"a\\.\\a\\.\\a\\.\\a\\.\\a\\.\\a\\.\\a\\.\\a\\.\\x\\y.h";
EXPECT_FALSE(CanonicalizePath(&path, &slash_bits, &err));
EXPECT_EQ(err, "too many path components");
EXPECT_TRUE(CanonicalizePath(&path, &slash_bits, &err));
EXPECT_EQ(slash_bits, 0x1ffffffff);
// 59 after canonicalization is OK.
err = "";
path = "a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/"
"a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/x/y.h";
EXPECT_EQ(58, std::count(path.begin(), path.end(), '/'));
EXPECT_TRUE(CanonicalizePath(&path, &slash_bits, &err));
EXPECT_EQ(slash_bits, 0x0);
// Backslashes version.
err = "";
path =
"a\\a\\a\\a\\a\\a\\a\\a\\a\\a\\a\\a\\a\\a\\a\\a\\"
"a\\a\\a\\a\\a\\a\\a\\a\\a\\a\\a\\a\\a\\a\\a\\a\\"
"a\\a\\a\\a\\a\\a\\a\\a\\a\\a\\a\\a\\a\\a\\a\\a\\"
"a\\a\\a\\a\\a\\a\\a\\a\\a\\x\\y.h";
EXPECT_EQ(58, std::count(path.begin(), path.end(), '\\'));
EXPECT_TRUE(CanonicalizePath(&path, &slash_bits, &err));
EXPECT_EQ(slash_bits, 0x3ffffffffffffff);
}
#endif