ls: unicode fixes

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
This commit is contained in:
Denys Vlasenko 2010-01-31 05:15:38 +01:00
parent ed910c750d
commit d8528b8e56
8 changed files with 573 additions and 234 deletions

View File

@ -7,7 +7,7 @@ dumpleases
Applets which may need unicode handling (more extensive than sanitizing
of filenames in error messages):
ls - uses unicode_strlen, not scrlen
ls - work in progress
expand, unexpand - uses unicode_strlen, not scrlen
ash, hush through lineedit - uses unicode_strlen, not scrlen
top - need to sanitize process args

View File

@ -241,9 +241,6 @@ struct dnode {
IF_SELINUX(security_context_t sid;)
};
static struct dnode **list_dir(const char *, unsigned *);
static unsigned list_single(const struct dnode *);
struct globals {
#if ENABLE_FEATURE_LS_COLOR
smallint show_color;
@ -528,242 +525,66 @@ static void dnsort(struct dnode **dn, int size)
#endif
static void showfiles(struct dnode **dn, unsigned nfiles)
static unsigned calc_name_len(const char *name)
{
unsigned i, ncols, nrows, row, nc;
unsigned column = 0;
unsigned nexttab = 0;
unsigned column_width = 0; /* for STYLE_LONG and STYLE_SINGLE not used */
unsigned len;
uni_stat_t uni_stat;
/* Never happens:
if (dn == NULL || nfiles < 1)
return;
*/
// TODO: quote tab as \t, etc, if -Q
name = printable_string(&uni_stat, name);
if (all_fmt & STYLE_LONG) {
ncols = 1;
} else {
/* find the longest file name, use that as the column width */
for (i = 0; dn[i]; i++) {
int len = unicode_strlen(dn[i]->name);
if (column_width < len)
column_width = len;
if (!(option_mask32 & OPT_Q)) {
return uni_stat.unicode_width;
}
len = 2 + uni_stat.unicode_width;
while (*name) {
if (*name == '"' || *name == '\\') {
len++;
}
column_width += tabstops +
IF_SELINUX( ((all_fmt & LIST_CONTEXT) ? 33 : 0) + )
((all_fmt & LIST_INO) ? 8 : 0) +
((all_fmt & LIST_BLOCKS) ? 5 : 0);
ncols = (int) (terminal_width / column_width);
}
if (ncols > 1) {
nrows = nfiles / ncols;
if (nrows * ncols < nfiles)
nrows++; /* round up fractionals */
} else {
nrows = nfiles;
ncols = 1;
}
for (row = 0; row < nrows; row++) {
for (nc = 0; nc < ncols; nc++) {
/* reach into the array based on the column and row */
if (all_fmt & DISP_ROWS)
i = (row * ncols) + nc; /* display across row */
else
i = (nc * nrows) + row; /* display by column */
if (i < nfiles) {
if (column > 0) {
nexttab -= column;
printf("%*s", nexttab, "");
column += nexttab;
}
nexttab = column + column_width;
column += list_single(dn[i]);
}
}
putchar('\n');
column = 0;
name++;
}
return len;
}
#if ENABLE_DESKTOP
/* http://www.opengroup.org/onlinepubs/9699919799/utilities/ls.html
* If any of the -l, -n, -s options is specified, each list
* of files within the directory shall be preceded by a
* status line indicating the number of file system blocks
* occupied by files in the directory in 512-byte units if
* the -k option is not specified, or 1024-byte units if the
* -k option is specified, rounded up to the next integral
* number of units.
/* Return the number of used columns.
* Note that only STYLE_COLUMNS uses return value.
* STYLE_SINGLE and STYLE_LONG don't care.
* coreutils 7.2 also supports:
* ls -b (--escape) = octal escapes (although it doesn't look like working)
* ls -N (--literal) = not escape at all
*/
/* by Jorgen Overgaard (jorgen AT antistaten.se) */
static off_t calculate_blocks(struct dnode **dn)
static unsigned print_name(const char *name)
{
uoff_t blocks = 1;
if (dn) {
while (*dn) {
/* st_blocks is in 512 byte blocks */
blocks += (*dn)->dstat.st_blocks;
dn++;
}
unsigned len;
uni_stat_t uni_stat;
// TODO: quote tab as \t, etc, if -Q
name = printable_string(&uni_stat, name);
if (!(option_mask32 & OPT_Q)) {
fputs(name, stdout);
return uni_stat.unicode_width;
}
/* Even though standard says use 512 byte blocks, coreutils use 1k */
/* Actually, we round up by calculating (blocks + 1) / 2,
* "+ 1" was done when we initialized blocks to 1 */
return blocks >> 1;
}
#endif
static void showdirs(struct dnode **dn, int first)
{
unsigned nfiles;
unsigned dndirs;
struct dnode **subdnp;
struct dnode **dnd;
/* Never happens:
if (dn == NULL || ndirs < 1) {
return;
}
*/
for (; *dn; dn++) {
if (all_fmt & (DISP_DIRNAME | DISP_RECURSIVE)) {
if (!first)
bb_putchar('\n');
first = 0;
printf("%s:\n", (*dn)->fullname);
}
subdnp = list_dir((*dn)->fullname, &nfiles);
#if ENABLE_DESKTOP
if ((all_fmt & STYLE_MASK) == STYLE_LONG)
printf("total %"OFF_FMT"u\n", calculate_blocks(subdnp));
#endif
if (nfiles > 0) {
/* list all files at this level */
dnsort(subdnp, nfiles);
showfiles(subdnp, nfiles);
if (ENABLE_FEATURE_LS_RECURSIVE
&& (all_fmt & DISP_RECURSIVE)
) {
/* recursive - list the sub-dirs */
dnd = splitdnarray(subdnp, SPLIT_SUBDIR);
dndirs = count_dirs(subdnp, SPLIT_SUBDIR);
if (dndirs > 0) {
dnsort(dnd, dndirs);
showdirs(dnd, 0);
/* free the array of dnode pointers to the dirs */
free(dnd);
}
}
/* free the dnodes and the fullname mem */
dfree(subdnp);
len = 2 + uni_stat.unicode_width;
putchar('"');
while (*name) {
if (*name == '"' || *name == '\\') {
putchar('\\');
len++;
}
putchar(*name++);
}
putchar('"');
return len;
}
/* Returns NULL-terminated malloced vector of pointers (or NULL) */
static struct dnode **list_dir(const char *path, unsigned *nfiles_p)
{
struct dnode *dn, *cur, **dnp;
struct dirent *entry;
DIR *dir;
unsigned i, nfiles;
/* Never happens:
if (path == NULL)
return NULL;
*/
*nfiles_p = 0;
dir = warn_opendir(path);
if (dir == NULL) {
exit_code = EXIT_FAILURE;
return NULL; /* could not open the dir */
}
dn = NULL;
nfiles = 0;
while ((entry = readdir(dir)) != NULL) {
char *fullname;
/* are we going to list the file- it may be . or .. or a hidden file */
if (entry->d_name[0] == '.') {
if ((!entry->d_name[1] || (entry->d_name[1] == '.' && !entry->d_name[2]))
&& !(all_fmt & DISP_DOT)
) {
continue;
}
if (!(all_fmt & DISP_HIDDEN))
continue;
}
fullname = concat_path_file(path, entry->d_name);
cur = my_stat(fullname, bb_basename(fullname), 0);
if (!cur) {
free(fullname);
continue;
}
cur->fname_allocated = 1;
cur->next = dn;
dn = cur;
nfiles++;
}
closedir(dir);
if (dn == NULL)
return NULL;
/* now that we know how many files there are
* allocate memory for an array to hold dnode pointers
*/
*nfiles_p = nfiles;
dnp = dnalloc(nfiles);
for (i = 0; /* i < nfiles - detected via !dn below */; i++) {
dnp[i] = dn; /* save pointer to node in array */
dn = dn->next;
if (!dn)
break;
}
return dnp;
}
static int print_name(const char *name)
{
if (option_mask32 & OPT_Q) {
#if ENABLE_FEATURE_ASSUME_UNICODE
unsigned len = 2 + unicode_strlen(name);
#else
unsigned len = 2;
#endif
putchar('"');
while (*name) {
if (*name == '"') {
putchar('\\');
len++;
}
putchar(*name++);
if (!ENABLE_FEATURE_ASSUME_UNICODE)
len++;
}
putchar('"');
return len;
}
/* No -Q: */
#if ENABLE_FEATURE_ASSUME_UNICODE
fputs(name, stdout);
return unicode_strlen(name);
#else
return printf("%s", name);
#endif
}
/* Return the number of used columns.
* Note that only STYLE_COLUMNS uses return value,
* STYLE_SINGLE and STYLE_LONG don't care.
*/
static NOINLINE unsigned list_single(const struct dnode *dn)
{
unsigned column = 0;
@ -914,6 +735,207 @@ static NOINLINE unsigned list_single(const struct dnode *dn)
return column;
}
static void showfiles(struct dnode **dn, unsigned nfiles)
{
unsigned i, ncols, nrows, row, nc;
unsigned column = 0;
unsigned nexttab = 0;
unsigned column_width = 0; /* used only by STYLE_COLUMNS */
if (all_fmt & STYLE_LONG) { /* STYLE_LONG or STYLE_SINGLE */
ncols = 1;
} else {
/* find the longest file name, use that as the column width */
for (i = 0; dn[i]; i++) {
int len = calc_name_len(dn[i]->name);
if (column_width < len)
column_width = len;
}
column_width += tabstops +
IF_SELINUX( ((all_fmt & LIST_CONTEXT) ? 33 : 0) + )
((all_fmt & LIST_INO) ? 8 : 0) +
((all_fmt & LIST_BLOCKS) ? 5 : 0);
ncols = (int) (terminal_width / column_width);
}
if (ncols > 1) {
nrows = nfiles / ncols;
if (nrows * ncols < nfiles)
nrows++; /* round up fractionals */
} else {
nrows = nfiles;
ncols = 1;
}
for (row = 0; row < nrows; row++) {
for (nc = 0; nc < ncols; nc++) {
/* reach into the array based on the column and row */
if (all_fmt & DISP_ROWS)
i = (row * ncols) + nc; /* display across row */
else
i = (nc * nrows) + row; /* display by column */
if (i < nfiles) {
if (column > 0) {
nexttab -= column;
printf("%*s", nexttab, "");
column += nexttab;
}
nexttab = column + column_width;
column += list_single(dn[i]);
}
}
putchar('\n');
column = 0;
}
}
#if ENABLE_DESKTOP
/* http://www.opengroup.org/onlinepubs/9699919799/utilities/ls.html
* If any of the -l, -n, -s options is specified, each list
* of files within the directory shall be preceded by a
* status line indicating the number of file system blocks
* occupied by files in the directory in 512-byte units if
* the -k option is not specified, or 1024-byte units if the
* -k option is specified, rounded up to the next integral
* number of units.
*/
/* by Jorgen Overgaard (jorgen AT antistaten.se) */
static off_t calculate_blocks(struct dnode **dn)
{
uoff_t blocks = 1;
if (dn) {
while (*dn) {
/* st_blocks is in 512 byte blocks */
blocks += (*dn)->dstat.st_blocks;
dn++;
}
}
/* Even though standard says use 512 byte blocks, coreutils use 1k */
/* Actually, we round up by calculating (blocks + 1) / 2,
* "+ 1" was done when we initialized blocks to 1 */
return blocks >> 1;
}
#endif
static struct dnode **list_dir(const char *, unsigned *);
static void showdirs(struct dnode **dn, int first)
{
unsigned nfiles;
unsigned dndirs;
struct dnode **subdnp;
struct dnode **dnd;
/* Never happens:
if (dn == NULL || ndirs < 1) {
return;
}
*/
for (; *dn; dn++) {
if (all_fmt & (DISP_DIRNAME | DISP_RECURSIVE)) {
if (!first)
bb_putchar('\n');
first = 0;
printf("%s:\n", (*dn)->fullname);
}
subdnp = list_dir((*dn)->fullname, &nfiles);
#if ENABLE_DESKTOP
if ((all_fmt & STYLE_MASK) == STYLE_LONG)
printf("total %"OFF_FMT"u\n", calculate_blocks(subdnp));
#endif
if (nfiles > 0) {
/* list all files at this level */
dnsort(subdnp, nfiles);
showfiles(subdnp, nfiles);
if (ENABLE_FEATURE_LS_RECURSIVE
&& (all_fmt & DISP_RECURSIVE)
) {
/* recursive - list the sub-dirs */
dnd = splitdnarray(subdnp, SPLIT_SUBDIR);
dndirs = count_dirs(subdnp, SPLIT_SUBDIR);
if (dndirs > 0) {
dnsort(dnd, dndirs);
showdirs(dnd, 0);
/* free the array of dnode pointers to the dirs */
free(dnd);
}
}
/* free the dnodes and the fullname mem */
dfree(subdnp);
}
}
}
/* Returns NULL-terminated malloced vector of pointers (or NULL) */
static struct dnode **list_dir(const char *path, unsigned *nfiles_p)
{
struct dnode *dn, *cur, **dnp;
struct dirent *entry;
DIR *dir;
unsigned i, nfiles;
/* Never happens:
if (path == NULL)
return NULL;
*/
*nfiles_p = 0;
dir = warn_opendir(path);
if (dir == NULL) {
exit_code = EXIT_FAILURE;
return NULL; /* could not open the dir */
}
dn = NULL;
nfiles = 0;
while ((entry = readdir(dir)) != NULL) {
char *fullname;
/* are we going to list the file- it may be . or .. or a hidden file */
if (entry->d_name[0] == '.') {
if ((!entry->d_name[1] || (entry->d_name[1] == '.' && !entry->d_name[2]))
&& !(all_fmt & DISP_DOT)
) {
continue;
}
if (!(all_fmt & DISP_HIDDEN))
continue;
}
fullname = concat_path_file(path, entry->d_name);
cur = my_stat(fullname, bb_basename(fullname), 0);
if (!cur) {
free(fullname);
continue;
}
cur->fname_allocated = 1;
cur->next = dn;
dn = cur;
nfiles++;
}
closedir(dir);
if (dn == NULL)
return NULL;
/* now that we know how many files there are
* allocate memory for an array to hold dnode pointers
*/
*nfiles_p = nfiles;
dnp = dnalloc(nfiles);
for (i = 0; /* i < nfiles - detected via !dn below */; i++) {
dnp[i] = dn; /* save pointer to node in array */
dn = dn->next;
if (!dn)
break;
}
return dnp;
}
int ls_main(int argc UNUSED_PARAM, char **argv)
{

View File

@ -577,11 +577,6 @@ char *strncpy_IFNAMSIZ(char *dst, const char *src) FAST_FUNC;
* But potentially slow, don't use in one-billion-times loops */
int bb_putchar(int ch) FAST_FUNC;
char *xasprintf(const char *format, ...) __attribute__ ((format(printf, 1, 2))) FAST_FUNC RETURNS_MALLOC;
/* Prints unprintable chars ch as ^C or M-c to file
* (M-c is used only if ch is ORed with PRINTABLE_META),
* else it is printed as-is (except for ch = 0x9b) */
enum { PRINTABLE_META = 0x100 };
void fputc_printable(int ch, FILE *file) FAST_FUNC;
// gcc-4.1.1 still isn't good enough at optimizing it
// (+200 bytes compared to macro)
//static ALWAYS_INLINE
@ -594,6 +589,20 @@ void fputc_printable(int ch, FILE *file) FAST_FUNC;
#define NOT_LONE_CHAR(s,c) ((s)[0] != (c) || (s)[1])
#define DOT_OR_DOTDOT(s) ((s)[0] == '.' && (!(s)[1] || ((s)[1] == '.' && !(s)[2])))
typedef struct uni_stat_t {
unsigned byte_count;
unsigned unicode_count;
unsigned unicode_width;
} uni_stat_t;
/* Returns a string with unprintable chars replaced by '?' or
* SUBST_WCHAR. This function is unicode-aware. */
const char* FAST_FUNC printable_string(uni_stat_t *stats, const char *str);
/* Prints unprintable char ch as ^C or M-c to file
* (M-c is used only if ch is ORed with PRINTABLE_META),
* else it is printed as-is (except for ch = 0x9b) */
enum { PRINTABLE_META = 0x100 };
void fputc_printable(int ch, FILE *file) FAST_FUNC;
/* dmalloc will redefine these to it's own implementation. It is safe
* to have the prototypes here unconditionally. */
void *malloc_or_warn(size_t size) FAST_FUNC RETURNS_MALLOC;

View File

@ -23,11 +23,6 @@ size_t FAST_FUNC unicode_strlen(const char *string);
enum {
UNI_FLAG_PAD = (1 << 0),
};
typedef struct uni_stat_t {
unsigned byte_count;
unsigned unicode_count;
unsigned unicode_width;
} uni_stat_t;
//UNUSED: unsigned FAST_FUNC unicode_padding_to_width(unsigned width, const char *src);
//UNUSED: char* FAST_FUNC unicode_conv_to_printable2(uni_stat_t *stats, const char *src, unsigned width, int flags);
char* FAST_FUNC unicode_conv_to_printable(uni_stat_t *stats, const char *src);

View File

@ -73,6 +73,7 @@ lib-y += perror_nomsg_and_die.o
lib-y += pidfile.o
lib-y += platform.o
lib-y += printable.o
lib-y += printable_string.o
lib-y += print_flags.o
lib-y += process_escape_sequence.o
lib-y += procps.o

65
libbb/printable_string.c Normal file
View File

@ -0,0 +1,65 @@
/* vi: set sw=4 ts=4: */
/*
* Unicode support routines.
*
* Copyright (C) 2010 Denys Vlasenko
*
* Licensed under GPL version 2, see file LICENSE in this tarball for details.
*/
#include "libbb.h"
#include "unicode.h"
const char* FAST_FUNC printable_string(uni_stat_t *stats, const char *str)
{
static char *saved[4];
static unsigned cur_saved; /* = 0 */
char *dst;
const char *s;
s = str;
while (1) {
unsigned char c = *s;
if (c == '\0') {
/* 99+% of inputs do not need conversion */
if (stats) {
stats->byte_count = (s - str);
stats->unicode_count = (s - str);
stats->unicode_width = (s - str);
}
return str;
}
if (c < ' ')
break;
if (c >= 0x7f)
break;
s++;
}
#if ENABLE_FEATURE_ASSUME_UNICODE
dst = unicode_conv_to_printable(stats, str);
#else
{
char *d = dst = xstrdup(str);
while (1) {
unsigned char c = *d;
if (c == '\0')
break;
if (c < ' ' || c >= 0x7f)
*d = '?';
d++;
}
if (stats) {
stats->byte_count = (d - dst);
stats->unicode_count = (d - dst);
stats->unicode_width = (d - dst);
}
}
#endif
free(saved[cur_saved]);
saved[cur_saved] = dst;
cur_saved = (cur_saved + 1) & (ARRAY_SIZE(saved)-1);
return dst;
}

111
testsuite/ls.mk_uni_tests Normal file
View File

@ -0,0 +1,111 @@
# DO NOT EDIT THIS FILE! MOST TEXT EDITORS WILL DAMAGE IT!
>'0001_1__Some_correct_UTF-8_text___________________________________________|'
>'0002_2__Boundary_condition_test_cases_____________________________________|'
>'0003_2.1__First_possible_sequence_of_a_certain_length_____________________|'
>'0004_2.1.2__2_bytes__U-00000080_:________"€"______________________________|'
>'0005_2.1.3__3_bytes__U-00000800_:________"à €"______________________________|'
>'0006_2.1.4__4_bytes__U-00010000_:________"ð<>€€"______________________________|'
>'0007_2.1.5__5_bytes__U-00200000_:________"øˆ€€€"______________________________|'
>'0008_2.1.6__6_bytes__U-04000000_:________"ü„€€€€"______________________________|'
>'0009_2.2__Last_possible_sequence_of_a_certain_length______________________|'
>'0010_2.2.1__1_byte___U-0000007F_:________""______________________________|'
>'0011_2.2.2__2_bytes__U-000007FF_:________"ß¿"______________________________|'
>'0012_2.2.3__3_bytes__U-0000FFFF_:________"ï¿¿"______________________________|'
>'0013_2.2.4__4_bytes__U-001FFFFF_:________"÷¿¿¿"______________________________|'
>'0014_2.2.5__5_bytes__U-03FFFFFF_:________"û¿¿¿¿"______________________________|'
>'0015_2.2.6__6_bytes__U-7FFFFFFF_:________"ý¿¿¿¿¿"______________________________|'
>'0016_2.3__Other_boundary_conditions_______________________________________|'
>'0017_2.3.1__U-0000D7FF_=_ed_9f_bf_=_"퟿"___________________________________|'
>'0018_2.3.2__U-0000E000_=_ee_80_80_=_""___________________________________|'
>'0019_2.3.3__U-0000FFFD_=_ef_bf_bd_=_"�"___________________________________|'
>'0020_2.3.4__U-0010FFFF_=_f4_8f_bf_bf_=_"ô<>¿¿"________________________________|'
>'0021_2.3.5__U-00110000_=_f4_90_80_80_=_"ô<>€€"________________________________|'
>'0022_3__Malformed_sequences_______________________________________________|'
>'0023_3.1__Unexpected_continuation_bytes___________________________________|'
>'0024_3.1.1__First_continuation_byte_0x80:_"€"_____________________________|'
>'0025_3.1.2__Last__continuation_byte_0xbf:_"¿"_____________________________|'
>'0026_3.1.3__2_continuation_bytes:_"€¿"____________________________________|'
>'0027_3.1.4__3_continuation_bytes:_"€¿€"___________________________________|'
>'0028_3.1.5__4_continuation_bytes:_"€¿€¿"__________________________________|'
>'0029_3.1.6__5_continuation_bytes:_"€¿€¿€"_________________________________|'
>'0030_3.1.7__6_continuation_bytes:_"€¿€¿€¿"________________________________|'
>'0031_3.1.8__7_continuation_bytes:_"€¿€¿€¿€"_______________________________|'
>'0032_3.1.9__Sequence_of_all_64_possible_continuation_bytes__0x80-0xbf_:___|'
>'0033____"€<>ƒ„…†‡ˆ‰ŠŒ<E280B9>Ž<EFBFBD>_________________________________________________|'
>'0034_____<5F>“”•˜™šœ<E280BA>žŸ_________________________________________________|'
>'0035_____ ¡¢£¤¥¦§¨©ª«¬­®¯_________________________________________________|'
>'0036_____°±²³´µ¶·¸¹º»¼½¾¿"________________________________________________|'
>'0037_3.2__Lonely_start_characters_________________________________________|'
>'0038_3.2.1__All_32_first_bytes_of_2-byte_sequences__0xc0-0xdf_,___________|'
>'0039________each_followed_by_a_space_character:___________________________|'
>'0040____"À_Á_Â_Ã_Ä_Å_Æ_Ç_È_É_Ê_Ë_Ì_Í_Î_Ï__________________________________|'
>'0041_____Ð_Ñ_Ò_Ó_Ô_Õ_Ö_×_Ø_Ù_Ú_Û_Ü_Ý_Þ_ß_"________________________________|'
>'0042_3.2.2__All_16_first_bytes_of_3-byte_sequences__0xe0-0xef_,___________|'
>'0043________each_followed_by_a_space_character:___________________________|'
>'0044____"à_á_â_ã_ä_å_æ_ç_è_é_ê_ë_ì_í_î_ï_"________________________________|'
>'0045_3.2.3__All_8_first_bytes_of_4-byte_sequences__0xf0-0xf7_,____________|'
>'0046________each_followed_by_a_space_character:___________________________|'
>'0047____"ð_ñ_ò_ó_ô_õ_ö_÷_"________________________________________________|'
>'0048_3.2.4__All_4_first_bytes_of_5-byte_sequences__0xf8-0xfb_,____________|'
>'0049________each_followed_by_a_space_character:___________________________|'
>'0050____"ø_ù_ú_û_"________________________________________________________|'
>'0051_3.2.5__All_2_first_bytes_of_6-byte_sequences__0xfc-0xfd_,____________|'
>'0052________each_followed_by_a_space_character:___________________________|'
>'0053____"ü_ý_"____________________________________________________________|'
>'0054_3.3__Sequences_with_last_continuation_byte_missing___________________|'
>'0055_3.3.1__2-byte_sequence_with_last_byte_missing__U+0000_:_____"À"______|'
>'0056_3.3.2__3-byte_sequence_with_last_byte_missing__U+0000_:_____"à€"______|'
>'0057_3.3.3__4-byte_sequence_with_last_byte_missing__U+0000_:_____"ð€€"______|'
>'0058_3.3.4__5-byte_sequence_with_last_byte_missing__U+0000_:_____"ø€€€"______|'
>'0059_3.3.5__6-byte_sequence_with_last_byte_missing__U+0000_:_____"ü€€€€"______|'
>'0060_3.3.6__2-byte_sequence_with_last_byte_missing__U-000007FF_:_"ß"______|'
>'0061_3.3.7__3-byte_sequence_with_last_byte_missing__U-0000FFFF_:_"ï¿"______|'
>'0062_3.3.8__4-byte_sequence_with_last_byte_missing__U-001FFFFF_:_"÷¿¿"______|'
>'0063_3.3.9__5-byte_sequence_with_last_byte_missing__U-03FFFFFF_:_"û¿¿¿"______|'
>'0064_3.3.10_6-byte_sequence_with_last_byte_missing__U-7FFFFFFF_:_"ý¿¿¿¿"______|'
>'0065_3.4__Concatenation_of_incomplete_sequences___________________________|'
>'0066____"Àà€ð€€ø€€€ü€€€€ßï¿÷¿¿û¿¿¿ý¿¿¿¿"______________________________________________________|'
>'0067_3.5__Impossible_bytes________________________________________________|'
>'0068_3.5.1__fe_=_"þ"______________________________________________________|'
>'0069_3.5.2__ff_=_"ÿ"______________________________________________________|'
>'0070_3.5.3__fe_fe_ff_ff_=_"þþÿÿ"__________________________________________|'
>'0071_4__Overlong_sequences________________________________________________|'
>'0072_4.1__Examples_of_an_overlong_ASCII_character_________________________|'
>'0073_4.1.1_U+002F_=_c0_af_____________=_"À¯"_______________________________|'
>'0074_4.1.2_U+002F_=_e0_80_af__________=_"à€¯"_______________________________|'
>'0075_4.1.3_U+002F_=_f0_80_80_af_______=_"ð€€¯"_______________________________|'
>'0076_4.1.4_U+002F_=_f8_80_80_80_af____=_"ø€€€¯"_______________________________|'
>'0077_4.1.5_U+002F_=_fc_80_80_80_80_af_=_"ü€€€€¯"_______________________________|'
>'0078_4.2__Maximum_overlong_sequences______________________________________|'
>'0079_4.2.1__U-0000007F_=_c1_bf_____________=_"Á¿"__________________________|'
>'0080_4.2.2__U-000007FF_=_e0_9f_bf__________=_"àŸ¿"__________________________|'
>'0081_4.2.3__U-0000FFFF_=_f0_8f_bf_bf_______=_"ð<>¿¿"__________________________|'
>'0082_4.2.4__U-001FFFFF_=_f8_87_bf_bf_bf____=_"ø‡¿¿¿"__________________________|'
>'0083_4.2.5__U-03FFFFFF_=_fc_83_bf_bf_bf_bf_=_"üƒ¿¿¿¿"__________________________|'
>'0084_4.3__Overlong_representation_of_the_NUL_character____________________|'
>'0085_4.3.1__U+0000_=_c0_80_____________=_"À€"______________________________|'
>'0086_4.3.2__U+0000_=_e0_80_80__________=_"à€€"______________________________|'
>'0087_4.3.3__U+0000_=_f0_80_80_80_______=_"ð€€€"______________________________|'
>'0088_4.3.4__U+0000_=_f8_80_80_80_80____=_"ø€€€€"______________________________|'
>'0089_4.3.5__U+0000_=_fc_80_80_80_80_80_=_"ü€€€€€"______________________________|'
>'0090_5__Illegal_code_positions____________________________________________|'
>'0091_5.1_Single_UTF-16_surrogates_________________________________________|'
>'0092_5.1.1__U+D800_=_ed_a0_80_=_"í €"_______________________________________|'
>'0093_5.1.2__U+DB7F_=_ed_ad_bf_=_"í­¿"_______________________________________|'
>'0094_5.1.3__U+DB80_=_ed_ae_80_=_"í®€"_______________________________________|'
>'0095_5.1.4__U+DBFF_=_ed_af_bf_=_"í¯¿"_______________________________________|'
>'0096_5.1.5__U+DC00_=_ed_b0_80_=_"í°€"_______________________________________|'
>'0097_5.1.6__U+DF80_=_ed_be_80_=_"í¾€"_______________________________________|'
>'0098_5.1.7__U+DFFF_=_ed_bf_bf_=_"í¿¿"_______________________________________|'
>'0099_5.2_Paired_UTF-16_surrogates_________________________________________|'
>'0100_5.2.1__U+D800_U+DC00_=_ed_a0_80_ed_b0_80_=_"𐀀"______________________|'
>'0101_5.2.2__U+D800_U+DFFF_=_ed_a0_80_ed_bf_bf_=_"𐏿"______________________|'
>'0102_5.2.3__U+DB7F_U+DC00_=_ed_ad_bf_ed_b0_80_=_"í­¿í°€"______________________|'
>'0103_5.2.4__U+DB7F_U+DFFF_=_ed_ad_bf_ed_bf_bf_=_"í­¿í¿¿"______________________|'
>'0104_5.2.5__U+DB80_U+DC00_=_ed_ae_80_ed_b0_80_=_"󰀀"______________________|'
>'0105_5.2.6__U+DB80_U+DFFF_=_ed_ae_80_ed_bf_bf_=_"󰏿"______________________|'
>'0106_5.2.7__U+DBFF_U+DC00_=_ed_af_bf_ed_b0_80_=_"􏰀"______________________|'
>'0107_5.2.8__U+DBFF_U+DFFF_=_ed_af_bf_ed_bf_bf_=_"􏿿"______________________|'
>'0108_5.3_Other_illegal_code_positions_____________________________________|'
>'0109_5.3.1__U+FFFE_=_ef_bf_be_=_"￾"_______________________________________|'
>'0110_5.3.2__U+FFFF_=_ef_bf_bf_=_"ï¿¿"_______________________________________|'

136
testsuite/ls.tests Executable file
View File

@ -0,0 +1,136 @@
#!/bin/sh
# Copyright 2010 by Denys Vlasenko
# Licensed under GPL v2, see file LICENSE for details.
. ./testing.sh
test -f "$bindir/.config" && . "$bindir/.config"
rm -rf ls.testdir >/dev/null
mkdir ls.testdir || exit 1
# testing "test name" "command" "expected result" "file input" "stdin"
# The test isn't passing correctly now - all | chars should line up
# perfectly in the correctly passed test.
test x"$CONFIG_FEATURE_ASSUME_UNICODE" = x"y" \
&& test x"$CONFIG_SUBST_WCHAR" = x"63" \
&& test x"$CONFIG_LAST_SUPPORTED_WCHAR" = x"767" \
&& testing "ls unicode test" \
"(cd ls.testdir && sh ../ls.mk_uni_tests) && ls -1 ls.testdir" \
'0001_1__Some_correct_UTF-8_text___________________________________________|
0002_2__Boundary_condition_test_cases_____________________________________|
0003_2.1__First_possible_sequence_of_a_certain_length_____________________|
0004_2.1.2__2_bytes__U-00000080_:________"?"______________________________|
0005_2.1.3__3_bytes__U-00000800_:________"?"______________________________|
0006_2.1.4__4_bytes__U-00010000_:________"?"______________________________|
0007_2.1.5__5_bytes__U-00200000_:________"?"______________________________|
0008_2.1.6__6_bytes__U-04000000_:________"?"______________________________|
0009_2.2__Last_possible_sequence_of_a_certain_length______________________|
0010_2.2.1__1_byte___U-0000007F_:________"?"______________________________|
0011_2.2.2__2_bytes__U-000007FF_:________"?"______________________________|
0012_2.2.3__3_bytes__U-0000FFFF_:________"?"______________________________|
0013_2.2.4__4_bytes__U-001FFFFF_:________"?"______________________________|
0014_2.2.5__5_bytes__U-03FFFFFF_:________"?"______________________________|
0015_2.2.6__6_bytes__U-7FFFFFFF_:________"?"______________________________|
0016_2.3__Other_boundary_conditions_______________________________________|
0017_2.3.1__U-0000D7FF_=_ed_9f_bf_=_"?"___________________________________|
0018_2.3.2__U-0000E000_=_ee_80_80_=_"?"___________________________________|
0019_2.3.3__U-0000FFFD_=_ef_bf_bd_=_"?"___________________________________|
0020_2.3.4__U-0010FFFF_=_f4_8f_bf_bf_=_"?"________________________________|
0021_2.3.5__U-00110000_=_f4_90_80_80_=_"?"________________________________|
0022_3__Malformed_sequences_______________________________________________|
0023_3.1__Unexpected_continuation_bytes___________________________________|
0024_3.1.1__First_continuation_byte_0x80:_"?"_____________________________|
0025_3.1.2__Last__continuation_byte_0xbf:_"?"_____________________________|
0026_3.1.3__2_continuation_bytes:_"??"____________________________________|
0027_3.1.4__3_continuation_bytes:_"???"___________________________________|
0028_3.1.5__4_continuation_bytes:_"????"__________________________________|
0029_3.1.6__5_continuation_bytes:_"?????"_________________________________|
0030_3.1.7__6_continuation_bytes:_"??????"________________________________|
0031_3.1.8__7_continuation_bytes:_"???????"_______________________________|
0032_3.1.9__Sequence_of_all_64_possible_continuation_bytes__0x80-0xbf_:___|
0033____"????????????????_________________________________________________|
0034_____????????????????_________________________________________________|
0035_____????????????????_________________________________________________|
0036_____????????????????"________________________________________________|
0037_3.2__Lonely_start_characters_________________________________________|
0038_3.2.1__All_32_first_bytes_of_2-byte_sequences__0xc0-0xdf_,___________|
0039________each_followed_by_a_space_character:___________________________|
0040____"?_?_?_?_?_?_?_?_?_?_?_?_?_?_?_?__________________________________|
0041_____?_?_?_?_?_?_?_?_?_?_?_?_?_?_?_?_"________________________________|
0042_3.2.2__All_16_first_bytes_of_3-byte_sequences__0xe0-0xef_,___________|
0043________each_followed_by_a_space_character:___________________________|
0044____"?_?_?_?_?_?_?_?_?_?_?_?_?_?_?_?_"________________________________|
0045_3.2.3__All_8_first_bytes_of_4-byte_sequences__0xf0-0xf7_,____________|
0046________each_followed_by_a_space_character:___________________________|
0047____"?_?_?_?_?_?_?_?_"________________________________________________|
0048_3.2.4__All_4_first_bytes_of_5-byte_sequences__0xf8-0xfb_,____________|
0049________each_followed_by_a_space_character:___________________________|
0050____"?_?_?_?_"________________________________________________________|
0051_3.2.5__All_2_first_bytes_of_6-byte_sequences__0xfc-0xfd_,____________|
0052________each_followed_by_a_space_character:___________________________|
0053____"?_?_"____________________________________________________________|
0054_3.3__Sequences_with_last_continuation_byte_missing___________________|
0055_3.3.1__2-byte_sequence_with_last_byte_missing__U+0000_:_____"?"______|
0056_3.3.2__3-byte_sequence_with_last_byte_missing__U+0000_:_____"??"______|
0057_3.3.3__4-byte_sequence_with_last_byte_missing__U+0000_:_____"???"______|
0058_3.3.4__5-byte_sequence_with_last_byte_missing__U+0000_:_____"????"______|
0059_3.3.5__6-byte_sequence_with_last_byte_missing__U+0000_:_____"?????"______|
0060_3.3.6__2-byte_sequence_with_last_byte_missing__U-000007FF_:_"?"______|
0061_3.3.7__3-byte_sequence_with_last_byte_missing__U-0000FFFF_:_"??"______|
0062_3.3.8__4-byte_sequence_with_last_byte_missing__U-001FFFFF_:_"???"______|
0063_3.3.9__5-byte_sequence_with_last_byte_missing__U-03FFFFFF_:_"????"______|
0064_3.3.10_6-byte_sequence_with_last_byte_missing__U-7FFFFFFF_:_"?????"______|
0065_3.4__Concatenation_of_incomplete_sequences___________________________|
0066____"??????????????????????????????"______________________________________________________|
0067_3.5__Impossible_bytes________________________________________________|
0068_3.5.1__fe_=_"?"______________________________________________________|
0069_3.5.2__ff_=_"?"______________________________________________________|
0070_3.5.3__fe_fe_ff_ff_=_"????"__________________________________________|
0071_4__Overlong_sequences________________________________________________|
0072_4.1__Examples_of_an_overlong_ASCII_character_________________________|
0073_4.1.1_U+002F_=_c0_af_____________=_"??"_______________________________|
0074_4.1.2_U+002F_=_e0_80_af__________=_"???"_______________________________|
0075_4.1.3_U+002F_=_f0_80_80_af_______=_"????"_______________________________|
0076_4.1.4_U+002F_=_f8_80_80_80_af____=_"?????"_______________________________|
0077_4.1.5_U+002F_=_fc_80_80_80_80_af_=_"??????"_______________________________|
0078_4.2__Maximum_overlong_sequences______________________________________|
0079_4.2.1__U-0000007F_=_c1_bf_____________=_"??"__________________________|
0080_4.2.2__U-000007FF_=_e0_9f_bf__________=_"?"__________________________|
0081_4.2.3__U-0000FFFF_=_f0_8f_bf_bf_______=_"?"__________________________|
0082_4.2.4__U-001FFFFF_=_f8_87_bf_bf_bf____=_"?"__________________________|
0083_4.2.5__U-03FFFFFF_=_fc_83_bf_bf_bf_bf_=_"?"__________________________|
0084_4.3__Overlong_representation_of_the_NUL_character____________________|
0085_4.3.1__U+0000_=_c0_80_____________=_"??"______________________________|
0086_4.3.2__U+0000_=_e0_80_80__________=_"???"______________________________|
0087_4.3.3__U+0000_=_f0_80_80_80_______=_"????"______________________________|
0088_4.3.4__U+0000_=_f8_80_80_80_80____=_"?????"______________________________|
0089_4.3.5__U+0000_=_fc_80_80_80_80_80_=_"??????"______________________________|
0090_5__Illegal_code_positions____________________________________________|
0091_5.1_Single_UTF-16_surrogates_________________________________________|
0092_5.1.1__U+D800_=_ed_a0_80_=_"?"_______________________________________|
0093_5.1.2__U+DB7F_=_ed_ad_bf_=_"?"_______________________________________|
0094_5.1.3__U+DB80_=_ed_ae_80_=_"?"_______________________________________|
0095_5.1.4__U+DBFF_=_ed_af_bf_=_"?"_______________________________________|
0096_5.1.5__U+DC00_=_ed_b0_80_=_"?"_______________________________________|
0097_5.1.6__U+DF80_=_ed_be_80_=_"?"_______________________________________|
0098_5.1.7__U+DFFF_=_ed_bf_bf_=_"?"_______________________________________|
0099_5.2_Paired_UTF-16_surrogates_________________________________________|
0100_5.2.1__U+D800_U+DC00_=_ed_a0_80_ed_b0_80_=_"??"______________________|
0101_5.2.2__U+D800_U+DFFF_=_ed_a0_80_ed_bf_bf_=_"??"______________________|
0102_5.2.3__U+DB7F_U+DC00_=_ed_ad_bf_ed_b0_80_=_"??"______________________|
0103_5.2.4__U+DB7F_U+DFFF_=_ed_ad_bf_ed_bf_bf_=_"??"______________________|
0104_5.2.5__U+DB80_U+DC00_=_ed_ae_80_ed_b0_80_=_"??"______________________|
0105_5.2.6__U+DB80_U+DFFF_=_ed_ae_80_ed_bf_bf_=_"??"______________________|
0106_5.2.7__U+DBFF_U+DC00_=_ed_af_bf_ed_b0_80_=_"??"______________________|
0107_5.2.8__U+DBFF_U+DFFF_=_ed_af_bf_ed_bf_bf_=_"??"______________________|
0108_5.3_Other_illegal_code_positions_____________________________________|
0109_5.3.1__U+FFFE_=_ef_bf_be_=_"?"_______________________________________|
0110_5.3.2__U+FFFF_=_ef_bf_bf_=_"?"_______________________________________|
' "" ""
# Clean up
rm -rf ls.testdir 2>/dev/null
exit $FAILCOUNT