mirror of
https://github.com/reactos/CMake.git
synced 2024-11-25 12:40:06 +00:00
86578eccf2
Per-source copyright/license notice headers that spell out copyright holder names and years are hard to maintain and often out-of-date or plain wrong. Precise contributor information is already maintained automatically by the version control tool. Ultimately it is the receiver of a file who is responsible for determining its licensing status, and per-source notices are merely a convenience. Therefore it is simpler and more accurate for each source to have a generic notice of the license name and references to more detailed information on copyright holders and full license terms. Our `Copyright.txt` file now contains a list of Contributors whose names appeared source-level copyright notices. It also references version control history for more precise information. Therefore we no longer need to spell out the list of Contributors in each source file notice. Replace CMake per-source copyright/license notice headers with a short description of the license and links to `Copyright.txt` and online information available from "https://cmake.org/licensing". The online URL also handles cases of modules being copied out of our source into other projects, so we can drop our notices about replacing links with full license text. Run the `Utilities/Scripts/filter-notices.bash` script to perform the majority of the replacements mechanically. Manually fix up shebang lines and trailing newlines in a few files. Manually update the notices in a few files that the script does not handle.
78 lines
2.6 KiB
C
78 lines
2.6 KiB
C
/* Distributed under the OSI-approved BSD 3-Clause License. See accompanying
|
|
file Copyright.txt or https://cmake.org/licensing for details. */
|
|
#include "cm_utf8.h"
|
|
|
|
/*
|
|
RFC 3629
|
|
07-bit: 0xxxxxxx
|
|
11-bit: 110xxxxx 10xxxxxx
|
|
16-bit: 1110xxxx 10xxxxxx 10xxxxxx
|
|
21-bit: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
|
|
|
|
Pre-RFC Compatibility
|
|
26-bit: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
|
|
31-bit: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
|
|
*/
|
|
|
|
/* Number of leading ones before a zero in the byte. */
|
|
static unsigned char const cm_utf8_ones[256] = {
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
|
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
|
3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 7, 8
|
|
};
|
|
|
|
/* Mask away control bits from bytes with n leading ones. */
|
|
static unsigned char const cm_utf8_mask[7] = { 0xEF, 0x3F, 0x1F, 0x0F,
|
|
0x07, 0x03, 0x01 };
|
|
|
|
/* Minimum allowed value when first byte has n leading ones. */
|
|
static unsigned int const cm_utf8_min[7] = {
|
|
0, 0, 1u << 7, 1u << 11, 1u << 16, 1u << 21, 1u << 26 /*, 1u<<31 */
|
|
};
|
|
|
|
const char* cm_utf8_decode_character(const char* first, const char* last,
|
|
unsigned int* pc)
|
|
{
|
|
/* Count leading ones in the first byte. */
|
|
unsigned char c = (unsigned char)*first++;
|
|
unsigned char const ones = cm_utf8_ones[c];
|
|
switch (ones) {
|
|
case 0:
|
|
*pc = c;
|
|
return first; /* One-byte character. */
|
|
case 1:
|
|
case 7:
|
|
case 8:
|
|
return 0; /* Invalid leading byte. */
|
|
default:
|
|
break;
|
|
}
|
|
|
|
/* Extract bits from this multi-byte character. */
|
|
{
|
|
unsigned int uc = c & cm_utf8_mask[ones];
|
|
int left;
|
|
for (left = ones - 1; left && first != last; --left) {
|
|
c = (unsigned char)*first++;
|
|
if (cm_utf8_ones[c] != 1) {
|
|
return 0;
|
|
}
|
|
uc = (uc << 6) | (c & cm_utf8_mask[1]);
|
|
}
|
|
|
|
if (left > 0 || uc < cm_utf8_min[ones]) {
|
|
return 0;
|
|
}
|
|
|
|
*pc = uc;
|
|
return first;
|
|
}
|
|
}
|