From d8528b8e56bab7643722e4453121882d23c23c07 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sun, 31 Jan 2010 05:15:38 +0100 Subject: [PATCH] ls: unicode fixes Signed-off-by: Denys Vlasenko --- TODO_unicode | 2 +- coreutils/ls.c | 468 ++++++++++++++++++++------------------ include/libbb.h | 19 +- include/unicode.h | 5 - libbb/Kbuild | 1 + libbb/printable_string.c | 65 ++++++ testsuite/ls.mk_uni_tests | 111 +++++++++ testsuite/ls.tests | 136 +++++++++++ 8 files changed, 573 insertions(+), 234 deletions(-) create mode 100644 libbb/printable_string.c create mode 100644 testsuite/ls.mk_uni_tests create mode 100755 testsuite/ls.tests diff --git a/TODO_unicode b/TODO_unicode index c29fd933b..b310e8d4d 100644 --- a/TODO_unicode +++ b/TODO_unicode @@ -7,7 +7,7 @@ dumpleases Applets which may need unicode handling (more extensive than sanitizing of filenames in error messages): -ls - uses unicode_strlen, not scrlen +ls - work in progress expand, unexpand - uses unicode_strlen, not scrlen ash, hush through lineedit - uses unicode_strlen, not scrlen top - need to sanitize process args diff --git a/coreutils/ls.c b/coreutils/ls.c index 6c898b793..d004ce8b1 100644 --- a/coreutils/ls.c +++ b/coreutils/ls.c @@ -241,9 +241,6 @@ struct dnode { IF_SELINUX(security_context_t sid;) }; -static struct dnode **list_dir(const char *, unsigned *); -static unsigned list_single(const struct dnode *); - struct globals { #if ENABLE_FEATURE_LS_COLOR smallint show_color; @@ -528,242 +525,66 @@ static void dnsort(struct dnode **dn, int size) #endif -static void showfiles(struct dnode **dn, unsigned nfiles) +static unsigned calc_name_len(const char *name) { - unsigned i, ncols, nrows, row, nc; - unsigned column = 0; - unsigned nexttab = 0; - unsigned column_width = 0; /* for STYLE_LONG and STYLE_SINGLE not used */ + unsigned len; + uni_stat_t uni_stat; - /* Never happens: - if (dn == NULL || nfiles < 1) - return; - */ + // TODO: quote tab as \t, etc, if -Q + name = printable_string(&uni_stat, name); - if (all_fmt & STYLE_LONG) { - ncols = 1; - } else { - /* find the longest file name, use that as the column width */ - for (i = 0; dn[i]; i++) { - int len = unicode_strlen(dn[i]->name); - if (column_width < len) - column_width = len; + if (!(option_mask32 & OPT_Q)) { + return uni_stat.unicode_width; + } + + len = 2 + uni_stat.unicode_width; + while (*name) { + if (*name == '"' || *name == '\\') { + len++; } - column_width += tabstops + - IF_SELINUX( ((all_fmt & LIST_CONTEXT) ? 33 : 0) + ) - ((all_fmt & LIST_INO) ? 8 : 0) + - ((all_fmt & LIST_BLOCKS) ? 5 : 0); - ncols = (int) (terminal_width / column_width); - } - - if (ncols > 1) { - nrows = nfiles / ncols; - if (nrows * ncols < nfiles) - nrows++; /* round up fractionals */ - } else { - nrows = nfiles; - ncols = 1; - } - - for (row = 0; row < nrows; row++) { - for (nc = 0; nc < ncols; nc++) { - /* reach into the array based on the column and row */ - if (all_fmt & DISP_ROWS) - i = (row * ncols) + nc; /* display across row */ - else - i = (nc * nrows) + row; /* display by column */ - if (i < nfiles) { - if (column > 0) { - nexttab -= column; - printf("%*s", nexttab, ""); - column += nexttab; - } - nexttab = column + column_width; - column += list_single(dn[i]); - } - } - putchar('\n'); - column = 0; + name++; } + return len; } -#if ENABLE_DESKTOP -/* http://www.opengroup.org/onlinepubs/9699919799/utilities/ls.html - * If any of the -l, -n, -s options is specified, each list - * of files within the directory shall be preceded by a - * status line indicating the number of file system blocks - * occupied by files in the directory in 512-byte units if - * the -k option is not specified, or 1024-byte units if the - * -k option is specified, rounded up to the next integral - * number of units. +/* Return the number of used columns. + * Note that only STYLE_COLUMNS uses return value. + * STYLE_SINGLE and STYLE_LONG don't care. + * coreutils 7.2 also supports: + * ls -b (--escape) = octal escapes (although it doesn't look like working) + * ls -N (--literal) = not escape at all */ -/* by Jorgen Overgaard (jorgen AT antistaten.se) */ -static off_t calculate_blocks(struct dnode **dn) +static unsigned print_name(const char *name) { - uoff_t blocks = 1; - if (dn) { - while (*dn) { - /* st_blocks is in 512 byte blocks */ - blocks += (*dn)->dstat.st_blocks; - dn++; - } + unsigned len; + uni_stat_t uni_stat; + + // TODO: quote tab as \t, etc, if -Q + name = printable_string(&uni_stat, name); + + if (!(option_mask32 & OPT_Q)) { + fputs(name, stdout); + return uni_stat.unicode_width; } - /* Even though standard says use 512 byte blocks, coreutils use 1k */ - /* Actually, we round up by calculating (blocks + 1) / 2, - * "+ 1" was done when we initialized blocks to 1 */ - return blocks >> 1; -} -#endif - - -static void showdirs(struct dnode **dn, int first) -{ - unsigned nfiles; - unsigned dndirs; - struct dnode **subdnp; - struct dnode **dnd; - - /* Never happens: - if (dn == NULL || ndirs < 1) { - return; - } - */ - - for (; *dn; dn++) { - if (all_fmt & (DISP_DIRNAME | DISP_RECURSIVE)) { - if (!first) - bb_putchar('\n'); - first = 0; - printf("%s:\n", (*dn)->fullname); - } - subdnp = list_dir((*dn)->fullname, &nfiles); -#if ENABLE_DESKTOP - if ((all_fmt & STYLE_MASK) == STYLE_LONG) - printf("total %"OFF_FMT"u\n", calculate_blocks(subdnp)); -#endif - if (nfiles > 0) { - /* list all files at this level */ - dnsort(subdnp, nfiles); - showfiles(subdnp, nfiles); - if (ENABLE_FEATURE_LS_RECURSIVE - && (all_fmt & DISP_RECURSIVE) - ) { - /* recursive - list the sub-dirs */ - dnd = splitdnarray(subdnp, SPLIT_SUBDIR); - dndirs = count_dirs(subdnp, SPLIT_SUBDIR); - if (dndirs > 0) { - dnsort(dnd, dndirs); - showdirs(dnd, 0); - /* free the array of dnode pointers to the dirs */ - free(dnd); - } - } - /* free the dnodes and the fullname mem */ - dfree(subdnp); + len = 2 + uni_stat.unicode_width; + putchar('"'); + while (*name) { + if (*name == '"' || *name == '\\') { + putchar('\\'); + len++; } + putchar(*name++); } + putchar('"'); + return len; } - -/* Returns NULL-terminated malloced vector of pointers (or NULL) */ -static struct dnode **list_dir(const char *path, unsigned *nfiles_p) -{ - struct dnode *dn, *cur, **dnp; - struct dirent *entry; - DIR *dir; - unsigned i, nfiles; - - /* Never happens: - if (path == NULL) - return NULL; - */ - - *nfiles_p = 0; - dir = warn_opendir(path); - if (dir == NULL) { - exit_code = EXIT_FAILURE; - return NULL; /* could not open the dir */ - } - dn = NULL; - nfiles = 0; - while ((entry = readdir(dir)) != NULL) { - char *fullname; - - /* are we going to list the file- it may be . or .. or a hidden file */ - if (entry->d_name[0] == '.') { - if ((!entry->d_name[1] || (entry->d_name[1] == '.' && !entry->d_name[2])) - && !(all_fmt & DISP_DOT) - ) { - continue; - } - if (!(all_fmt & DISP_HIDDEN)) - continue; - } - fullname = concat_path_file(path, entry->d_name); - cur = my_stat(fullname, bb_basename(fullname), 0); - if (!cur) { - free(fullname); - continue; - } - cur->fname_allocated = 1; - cur->next = dn; - dn = cur; - nfiles++; - } - closedir(dir); - - if (dn == NULL) - return NULL; - - /* now that we know how many files there are - * allocate memory for an array to hold dnode pointers - */ - *nfiles_p = nfiles; - dnp = dnalloc(nfiles); - for (i = 0; /* i < nfiles - detected via !dn below */; i++) { - dnp[i] = dn; /* save pointer to node in array */ - dn = dn->next; - if (!dn) - break; - } - - return dnp; -} - - -static int print_name(const char *name) -{ - if (option_mask32 & OPT_Q) { -#if ENABLE_FEATURE_ASSUME_UNICODE - unsigned len = 2 + unicode_strlen(name); -#else - unsigned len = 2; -#endif - putchar('"'); - while (*name) { - if (*name == '"') { - putchar('\\'); - len++; - } - putchar(*name++); - if (!ENABLE_FEATURE_ASSUME_UNICODE) - len++; - } - putchar('"'); - return len; - } - /* No -Q: */ -#if ENABLE_FEATURE_ASSUME_UNICODE - fputs(name, stdout); - return unicode_strlen(name); -#else - return printf("%s", name); -#endif -} - - +/* Return the number of used columns. + * Note that only STYLE_COLUMNS uses return value, + * STYLE_SINGLE and STYLE_LONG don't care. + */ static NOINLINE unsigned list_single(const struct dnode *dn) { unsigned column = 0; @@ -914,6 +735,207 @@ static NOINLINE unsigned list_single(const struct dnode *dn) return column; } +static void showfiles(struct dnode **dn, unsigned nfiles) +{ + unsigned i, ncols, nrows, row, nc; + unsigned column = 0; + unsigned nexttab = 0; + unsigned column_width = 0; /* used only by STYLE_COLUMNS */ + + if (all_fmt & STYLE_LONG) { /* STYLE_LONG or STYLE_SINGLE */ + ncols = 1; + } else { + /* find the longest file name, use that as the column width */ + for (i = 0; dn[i]; i++) { + int len = calc_name_len(dn[i]->name); + if (column_width < len) + column_width = len; + } + column_width += tabstops + + IF_SELINUX( ((all_fmt & LIST_CONTEXT) ? 33 : 0) + ) + ((all_fmt & LIST_INO) ? 8 : 0) + + ((all_fmt & LIST_BLOCKS) ? 5 : 0); + ncols = (int) (terminal_width / column_width); + } + + if (ncols > 1) { + nrows = nfiles / ncols; + if (nrows * ncols < nfiles) + nrows++; /* round up fractionals */ + } else { + nrows = nfiles; + ncols = 1; + } + + for (row = 0; row < nrows; row++) { + for (nc = 0; nc < ncols; nc++) { + /* reach into the array based on the column and row */ + if (all_fmt & DISP_ROWS) + i = (row * ncols) + nc; /* display across row */ + else + i = (nc * nrows) + row; /* display by column */ + if (i < nfiles) { + if (column > 0) { + nexttab -= column; + printf("%*s", nexttab, ""); + column += nexttab; + } + nexttab = column + column_width; + column += list_single(dn[i]); + } + } + putchar('\n'); + column = 0; + } +} + + +#if ENABLE_DESKTOP +/* http://www.opengroup.org/onlinepubs/9699919799/utilities/ls.html + * If any of the -l, -n, -s options is specified, each list + * of files within the directory shall be preceded by a + * status line indicating the number of file system blocks + * occupied by files in the directory in 512-byte units if + * the -k option is not specified, or 1024-byte units if the + * -k option is specified, rounded up to the next integral + * number of units. + */ +/* by Jorgen Overgaard (jorgen AT antistaten.se) */ +static off_t calculate_blocks(struct dnode **dn) +{ + uoff_t blocks = 1; + if (dn) { + while (*dn) { + /* st_blocks is in 512 byte blocks */ + blocks += (*dn)->dstat.st_blocks; + dn++; + } + } + + /* Even though standard says use 512 byte blocks, coreutils use 1k */ + /* Actually, we round up by calculating (blocks + 1) / 2, + * "+ 1" was done when we initialized blocks to 1 */ + return blocks >> 1; +} +#endif + + +static struct dnode **list_dir(const char *, unsigned *); + +static void showdirs(struct dnode **dn, int first) +{ + unsigned nfiles; + unsigned dndirs; + struct dnode **subdnp; + struct dnode **dnd; + + /* Never happens: + if (dn == NULL || ndirs < 1) { + return; + } + */ + + for (; *dn; dn++) { + if (all_fmt & (DISP_DIRNAME | DISP_RECURSIVE)) { + if (!first) + bb_putchar('\n'); + first = 0; + printf("%s:\n", (*dn)->fullname); + } + subdnp = list_dir((*dn)->fullname, &nfiles); +#if ENABLE_DESKTOP + if ((all_fmt & STYLE_MASK) == STYLE_LONG) + printf("total %"OFF_FMT"u\n", calculate_blocks(subdnp)); +#endif + if (nfiles > 0) { + /* list all files at this level */ + dnsort(subdnp, nfiles); + showfiles(subdnp, nfiles); + if (ENABLE_FEATURE_LS_RECURSIVE + && (all_fmt & DISP_RECURSIVE) + ) { + /* recursive - list the sub-dirs */ + dnd = splitdnarray(subdnp, SPLIT_SUBDIR); + dndirs = count_dirs(subdnp, SPLIT_SUBDIR); + if (dndirs > 0) { + dnsort(dnd, dndirs); + showdirs(dnd, 0); + /* free the array of dnode pointers to the dirs */ + free(dnd); + } + } + /* free the dnodes and the fullname mem */ + dfree(subdnp); + } + } +} + + +/* Returns NULL-terminated malloced vector of pointers (or NULL) */ +static struct dnode **list_dir(const char *path, unsigned *nfiles_p) +{ + struct dnode *dn, *cur, **dnp; + struct dirent *entry; + DIR *dir; + unsigned i, nfiles; + + /* Never happens: + if (path == NULL) + return NULL; + */ + + *nfiles_p = 0; + dir = warn_opendir(path); + if (dir == NULL) { + exit_code = EXIT_FAILURE; + return NULL; /* could not open the dir */ + } + dn = NULL; + nfiles = 0; + while ((entry = readdir(dir)) != NULL) { + char *fullname; + + /* are we going to list the file- it may be . or .. or a hidden file */ + if (entry->d_name[0] == '.') { + if ((!entry->d_name[1] || (entry->d_name[1] == '.' && !entry->d_name[2])) + && !(all_fmt & DISP_DOT) + ) { + continue; + } + if (!(all_fmt & DISP_HIDDEN)) + continue; + } + fullname = concat_path_file(path, entry->d_name); + cur = my_stat(fullname, bb_basename(fullname), 0); + if (!cur) { + free(fullname); + continue; + } + cur->fname_allocated = 1; + cur->next = dn; + dn = cur; + nfiles++; + } + closedir(dir); + + if (dn == NULL) + return NULL; + + /* now that we know how many files there are + * allocate memory for an array to hold dnode pointers + */ + *nfiles_p = nfiles; + dnp = dnalloc(nfiles); + for (i = 0; /* i < nfiles - detected via !dn below */; i++) { + dnp[i] = dn; /* save pointer to node in array */ + dn = dn->next; + if (!dn) + break; + } + + return dnp; +} + int ls_main(int argc UNUSED_PARAM, char **argv) { diff --git a/include/libbb.h b/include/libbb.h index 73aea409e..a86d64400 100644 --- a/include/libbb.h +++ b/include/libbb.h @@ -577,11 +577,6 @@ char *strncpy_IFNAMSIZ(char *dst, const char *src) FAST_FUNC; * But potentially slow, don't use in one-billion-times loops */ int bb_putchar(int ch) FAST_FUNC; char *xasprintf(const char *format, ...) __attribute__ ((format(printf, 1, 2))) FAST_FUNC RETURNS_MALLOC; -/* Prints unprintable chars ch as ^C or M-c to file - * (M-c is used only if ch is ORed with PRINTABLE_META), - * else it is printed as-is (except for ch = 0x9b) */ -enum { PRINTABLE_META = 0x100 }; -void fputc_printable(int ch, FILE *file) FAST_FUNC; // gcc-4.1.1 still isn't good enough at optimizing it // (+200 bytes compared to macro) //static ALWAYS_INLINE @@ -594,6 +589,20 @@ void fputc_printable(int ch, FILE *file) FAST_FUNC; #define NOT_LONE_CHAR(s,c) ((s)[0] != (c) || (s)[1]) #define DOT_OR_DOTDOT(s) ((s)[0] == '.' && (!(s)[1] || ((s)[1] == '.' && !(s)[2]))) +typedef struct uni_stat_t { + unsigned byte_count; + unsigned unicode_count; + unsigned unicode_width; +} uni_stat_t; +/* Returns a string with unprintable chars replaced by '?' or + * SUBST_WCHAR. This function is unicode-aware. */ +const char* FAST_FUNC printable_string(uni_stat_t *stats, const char *str); +/* Prints unprintable char ch as ^C or M-c to file + * (M-c is used only if ch is ORed with PRINTABLE_META), + * else it is printed as-is (except for ch = 0x9b) */ +enum { PRINTABLE_META = 0x100 }; +void fputc_printable(int ch, FILE *file) FAST_FUNC; + /* dmalloc will redefine these to it's own implementation. It is safe * to have the prototypes here unconditionally. */ void *malloc_or_warn(size_t size) FAST_FUNC RETURNS_MALLOC; diff --git a/include/unicode.h b/include/unicode.h index f32e56599..25ef7407e 100644 --- a/include/unicode.h +++ b/include/unicode.h @@ -23,11 +23,6 @@ size_t FAST_FUNC unicode_strlen(const char *string); enum { UNI_FLAG_PAD = (1 << 0), }; -typedef struct uni_stat_t { - unsigned byte_count; - unsigned unicode_count; - unsigned unicode_width; -} uni_stat_t; //UNUSED: unsigned FAST_FUNC unicode_padding_to_width(unsigned width, const char *src); //UNUSED: char* FAST_FUNC unicode_conv_to_printable2(uni_stat_t *stats, const char *src, unsigned width, int flags); char* FAST_FUNC unicode_conv_to_printable(uni_stat_t *stats, const char *src); diff --git a/libbb/Kbuild b/libbb/Kbuild index 243626d67..7e793109e 100644 --- a/libbb/Kbuild +++ b/libbb/Kbuild @@ -73,6 +73,7 @@ lib-y += perror_nomsg_and_die.o lib-y += pidfile.o lib-y += platform.o lib-y += printable.o +lib-y += printable_string.o lib-y += print_flags.o lib-y += process_escape_sequence.o lib-y += procps.o diff --git a/libbb/printable_string.c b/libbb/printable_string.c new file mode 100644 index 000000000..47565de0d --- /dev/null +++ b/libbb/printable_string.c @@ -0,0 +1,65 @@ +/* vi: set sw=4 ts=4: */ +/* + * Unicode support routines. + * + * Copyright (C) 2010 Denys Vlasenko + * + * Licensed under GPL version 2, see file LICENSE in this tarball for details. + */ +#include "libbb.h" +#include "unicode.h" + +const char* FAST_FUNC printable_string(uni_stat_t *stats, const char *str) +{ + static char *saved[4]; + static unsigned cur_saved; /* = 0 */ + + char *dst; + const char *s; + + s = str; + while (1) { + unsigned char c = *s; + if (c == '\0') { + /* 99+% of inputs do not need conversion */ + if (stats) { + stats->byte_count = (s - str); + stats->unicode_count = (s - str); + stats->unicode_width = (s - str); + } + return str; + } + if (c < ' ') + break; + if (c >= 0x7f) + break; + s++; + } + +#if ENABLE_FEATURE_ASSUME_UNICODE + dst = unicode_conv_to_printable(stats, str); +#else + { + char *d = dst = xstrdup(str); + while (1) { + unsigned char c = *d; + if (c == '\0') + break; + if (c < ' ' || c >= 0x7f) + *d = '?'; + d++; + } + if (stats) { + stats->byte_count = (d - dst); + stats->unicode_count = (d - dst); + stats->unicode_width = (d - dst); + } + } +#endif + + free(saved[cur_saved]); + saved[cur_saved] = dst; + cur_saved = (cur_saved + 1) & (ARRAY_SIZE(saved)-1); + + return dst; +} diff --git a/testsuite/ls.mk_uni_tests b/testsuite/ls.mk_uni_tests new file mode 100644 index 000000000..da0c29f29 --- /dev/null +++ b/testsuite/ls.mk_uni_tests @@ -0,0 +1,111 @@ +# DO NOT EDIT THIS FILE! MOST TEXT EDITORS WILL DAMAGE IT! +>'0001_1__Some_correct_UTF-8_text___________________________________________|' +>'0002_2__Boundary_condition_test_cases_____________________________________|' +>'0003_2.1__First_possible_sequence_of_a_certain_length_____________________|' +>'0004_2.1.2__2_bytes__U-00000080_:________"€"______________________________|' +>'0005_2.1.3__3_bytes__U-00000800_:________"ࠀ"______________________________|' +>'0006_2.1.4__4_bytes__U-00010000_:________"𐀀"______________________________|' +>'0007_2.1.5__5_bytes__U-00200000_:________""______________________________|' +>'0008_2.1.6__6_bytes__U-04000000_:________""______________________________|' +>'0009_2.2__Last_possible_sequence_of_a_certain_length______________________|' +>'0010_2.2.1__1_byte___U-0000007F_:________""______________________________|' +>'0011_2.2.2__2_bytes__U-000007FF_:________"߿"______________________________|' +>'0012_2.2.3__3_bytes__U-0000FFFF_:________"￿"______________________________|' +>'0013_2.2.4__4_bytes__U-001FFFFF_:________""______________________________|' +>'0014_2.2.5__5_bytes__U-03FFFFFF_:________""______________________________|' +>'0015_2.2.6__6_bytes__U-7FFFFFFF_:________""______________________________|' +>'0016_2.3__Other_boundary_conditions_______________________________________|' +>'0017_2.3.1__U-0000D7FF_=_ed_9f_bf_=_"퟿"___________________________________|' +>'0018_2.3.2__U-0000E000_=_ee_80_80_=_""___________________________________|' +>'0019_2.3.3__U-0000FFFD_=_ef_bf_bd_=_"�"___________________________________|' +>'0020_2.3.4__U-0010FFFF_=_f4_8f_bf_bf_=_"􏿿"________________________________|' +>'0021_2.3.5__U-00110000_=_f4_90_80_80_=_""________________________________|' +>'0022_3__Malformed_sequences_______________________________________________|' +>'0023_3.1__Unexpected_continuation_bytes___________________________________|' +>'0024_3.1.1__First_continuation_byte_0x80:_""_____________________________|' +>'0025_3.1.2__Last__continuation_byte_0xbf:_""_____________________________|' +>'0026_3.1.3__2_continuation_bytes:_""____________________________________|' +>'0027_3.1.4__3_continuation_bytes:_""___________________________________|' +>'0028_3.1.5__4_continuation_bytes:_""__________________________________|' +>'0029_3.1.6__5_continuation_bytes:_""_________________________________|' +>'0030_3.1.7__6_continuation_bytes:_""________________________________|' +>'0031_3.1.8__7_continuation_bytes:_""_______________________________|' +>'0032_3.1.9__Sequence_of_all_64_possible_continuation_bytes__0x80-0xbf_:___|' +>'0033____"_________________________________________________|' +>'0034______________________________________________________|' +>'0035______________________________________________________|' +>'0036_____"________________________________________________|' +>'0037_3.2__Lonely_start_characters_________________________________________|' +>'0038_3.2.1__All_32_first_bytes_of_2-byte_sequences__0xc0-0xdf_,___________|' +>'0039________each_followed_by_a_space_character:___________________________|' +>'0040____"_________________________________________________|' +>'0041_____________________"________________________________|' +>'0042_3.2.2__All_16_first_bytes_of_3-byte_sequences__0xe0-0xef_,___________|' +>'0043________each_followed_by_a_space_character:___________________________|' +>'0044____"________________"________________________________|' +>'0045_3.2.3__All_8_first_bytes_of_4-byte_sequences__0xf0-0xf7_,____________|' +>'0046________each_followed_by_a_space_character:___________________________|' +>'0047____"________"________________________________________________|' +>'0048_3.2.4__All_4_first_bytes_of_5-byte_sequences__0xf8-0xfb_,____________|' +>'0049________each_followed_by_a_space_character:___________________________|' +>'0050____"____"________________________________________________________|' +>'0051_3.2.5__All_2_first_bytes_of_6-byte_sequences__0xfc-0xfd_,____________|' +>'0052________each_followed_by_a_space_character:___________________________|' +>'0053____"__"____________________________________________________________|' +>'0054_3.3__Sequences_with_last_continuation_byte_missing___________________|' +>'0055_3.3.1__2-byte_sequence_with_last_byte_missing__U+0000_:_____""______|' +>'0056_3.3.2__3-byte_sequence_with_last_byte_missing__U+0000_:_____""______|' +>'0057_3.3.3__4-byte_sequence_with_last_byte_missing__U+0000_:_____""______|' +>'0058_3.3.4__5-byte_sequence_with_last_byte_missing__U+0000_:_____""______|' +>'0059_3.3.5__6-byte_sequence_with_last_byte_missing__U+0000_:_____""______|' +>'0060_3.3.6__2-byte_sequence_with_last_byte_missing__U-000007FF_:_""______|' +>'0061_3.3.7__3-byte_sequence_with_last_byte_missing__U-0000FFFF_:_""______|' +>'0062_3.3.8__4-byte_sequence_with_last_byte_missing__U-001FFFFF_:_""______|' +>'0063_3.3.9__5-byte_sequence_with_last_byte_missing__U-03FFFFFF_:_""______|' +>'0064_3.3.10_6-byte_sequence_with_last_byte_missing__U-7FFFFFFF_:_""______|' +>'0065_3.4__Concatenation_of_incomplete_sequences___________________________|' +>'0066____""______________________________________________________|' +>'0067_3.5__Impossible_bytes________________________________________________|' +>'0068_3.5.1__fe_=_""______________________________________________________|' +>'0069_3.5.2__ff_=_""______________________________________________________|' +>'0070_3.5.3__fe_fe_ff_ff_=_""__________________________________________|' +>'0071_4__Overlong_sequences________________________________________________|' +>'0072_4.1__Examples_of_an_overlong_ASCII_character_________________________|' +>'0073_4.1.1_U+002F_=_c0_af_____________=_""_______________________________|' +>'0074_4.1.2_U+002F_=_e0_80_af__________=_""_______________________________|' +>'0075_4.1.3_U+002F_=_f0_80_80_af_______=_""_______________________________|' +>'0076_4.1.4_U+002F_=_f8_80_80_80_af____=_""_______________________________|' +>'0077_4.1.5_U+002F_=_fc_80_80_80_80_af_=_""_______________________________|' +>'0078_4.2__Maximum_overlong_sequences______________________________________|' +>'0079_4.2.1__U-0000007F_=_c1_bf_____________=_""__________________________|' +>'0080_4.2.2__U-000007FF_=_e0_9f_bf__________=_""__________________________|' +>'0081_4.2.3__U-0000FFFF_=_f0_8f_bf_bf_______=_""__________________________|' +>'0082_4.2.4__U-001FFFFF_=_f8_87_bf_bf_bf____=_""__________________________|' +>'0083_4.2.5__U-03FFFFFF_=_fc_83_bf_bf_bf_bf_=_""__________________________|' +>'0084_4.3__Overlong_representation_of_the_NUL_character____________________|' +>'0085_4.3.1__U+0000_=_c0_80_____________=_""______________________________|' +>'0086_4.3.2__U+0000_=_e0_80_80__________=_""______________________________|' +>'0087_4.3.3__U+0000_=_f0_80_80_80_______=_""______________________________|' +>'0088_4.3.4__U+0000_=_f8_80_80_80_80____=_""______________________________|' +>'0089_4.3.5__U+0000_=_fc_80_80_80_80_80_=_""______________________________|' +>'0090_5__Illegal_code_positions____________________________________________|' +>'0091_5.1_Single_UTF-16_surrogates_________________________________________|' +>'0092_5.1.1__U+D800_=_ed_a0_80_=_""_______________________________________|' +>'0093_5.1.2__U+DB7F_=_ed_ad_bf_=_""_______________________________________|' +>'0094_5.1.3__U+DB80_=_ed_ae_80_=_""_______________________________________|' +>'0095_5.1.4__U+DBFF_=_ed_af_bf_=_""_______________________________________|' +>'0096_5.1.5__U+DC00_=_ed_b0_80_=_""_______________________________________|' +>'0097_5.1.6__U+DF80_=_ed_be_80_=_""_______________________________________|' +>'0098_5.1.7__U+DFFF_=_ed_bf_bf_=_""_______________________________________|' +>'0099_5.2_Paired_UTF-16_surrogates_________________________________________|' +>'0100_5.2.1__U+D800_U+DC00_=_ed_a0_80_ed_b0_80_=_""______________________|' +>'0101_5.2.2__U+D800_U+DFFF_=_ed_a0_80_ed_bf_bf_=_""______________________|' +>'0102_5.2.3__U+DB7F_U+DC00_=_ed_ad_bf_ed_b0_80_=_""______________________|' +>'0103_5.2.4__U+DB7F_U+DFFF_=_ed_ad_bf_ed_bf_bf_=_""______________________|' +>'0104_5.2.5__U+DB80_U+DC00_=_ed_ae_80_ed_b0_80_=_""______________________|' +>'0105_5.2.6__U+DB80_U+DFFF_=_ed_ae_80_ed_bf_bf_=_""______________________|' +>'0106_5.2.7__U+DBFF_U+DC00_=_ed_af_bf_ed_b0_80_=_""______________________|' +>'0107_5.2.8__U+DBFF_U+DFFF_=_ed_af_bf_ed_bf_bf_=_""______________________|' +>'0108_5.3_Other_illegal_code_positions_____________________________________|' +>'0109_5.3.1__U+FFFE_=_ef_bf_be_=_"￾"_______________________________________|' +>'0110_5.3.2__U+FFFF_=_ef_bf_bf_=_"￿"_______________________________________|' diff --git a/testsuite/ls.tests b/testsuite/ls.tests new file mode 100755 index 000000000..b0c5da7f9 --- /dev/null +++ b/testsuite/ls.tests @@ -0,0 +1,136 @@ +#!/bin/sh +# Copyright 2010 by Denys Vlasenko +# Licensed under GPL v2, see file LICENSE for details. + +. ./testing.sh + +test -f "$bindir/.config" && . "$bindir/.config" + +rm -rf ls.testdir >/dev/null +mkdir ls.testdir || exit 1 + +# testing "test name" "command" "expected result" "file input" "stdin" + +# The test isn't passing correctly now - all | chars should line up +# perfectly in the correctly passed test. +test x"$CONFIG_FEATURE_ASSUME_UNICODE" = x"y" \ +&& test x"$CONFIG_SUBST_WCHAR" = x"63" \ +&& test x"$CONFIG_LAST_SUPPORTED_WCHAR" = x"767" \ +&& testing "ls unicode test" \ +"(cd ls.testdir && sh ../ls.mk_uni_tests) && ls -1 ls.testdir" \ +'0001_1__Some_correct_UTF-8_text___________________________________________| +0002_2__Boundary_condition_test_cases_____________________________________| +0003_2.1__First_possible_sequence_of_a_certain_length_____________________| +0004_2.1.2__2_bytes__U-00000080_:________"?"______________________________| +0005_2.1.3__3_bytes__U-00000800_:________"?"______________________________| +0006_2.1.4__4_bytes__U-00010000_:________"?"______________________________| +0007_2.1.5__5_bytes__U-00200000_:________"?"______________________________| +0008_2.1.6__6_bytes__U-04000000_:________"?"______________________________| +0009_2.2__Last_possible_sequence_of_a_certain_length______________________| +0010_2.2.1__1_byte___U-0000007F_:________"?"______________________________| +0011_2.2.2__2_bytes__U-000007FF_:________"?"______________________________| +0012_2.2.3__3_bytes__U-0000FFFF_:________"?"______________________________| +0013_2.2.4__4_bytes__U-001FFFFF_:________"?"______________________________| +0014_2.2.5__5_bytes__U-03FFFFFF_:________"?"______________________________| +0015_2.2.6__6_bytes__U-7FFFFFFF_:________"?"______________________________| +0016_2.3__Other_boundary_conditions_______________________________________| +0017_2.3.1__U-0000D7FF_=_ed_9f_bf_=_"?"___________________________________| +0018_2.3.2__U-0000E000_=_ee_80_80_=_"?"___________________________________| +0019_2.3.3__U-0000FFFD_=_ef_bf_bd_=_"?"___________________________________| +0020_2.3.4__U-0010FFFF_=_f4_8f_bf_bf_=_"?"________________________________| +0021_2.3.5__U-00110000_=_f4_90_80_80_=_"?"________________________________| +0022_3__Malformed_sequences_______________________________________________| +0023_3.1__Unexpected_continuation_bytes___________________________________| +0024_3.1.1__First_continuation_byte_0x80:_"?"_____________________________| +0025_3.1.2__Last__continuation_byte_0xbf:_"?"_____________________________| +0026_3.1.3__2_continuation_bytes:_"??"____________________________________| +0027_3.1.4__3_continuation_bytes:_"???"___________________________________| +0028_3.1.5__4_continuation_bytes:_"????"__________________________________| +0029_3.1.6__5_continuation_bytes:_"?????"_________________________________| +0030_3.1.7__6_continuation_bytes:_"??????"________________________________| +0031_3.1.8__7_continuation_bytes:_"???????"_______________________________| +0032_3.1.9__Sequence_of_all_64_possible_continuation_bytes__0x80-0xbf_:___| +0033____"????????????????_________________________________________________| +0034_____????????????????_________________________________________________| +0035_____????????????????_________________________________________________| +0036_____????????????????"________________________________________________| +0037_3.2__Lonely_start_characters_________________________________________| +0038_3.2.1__All_32_first_bytes_of_2-byte_sequences__0xc0-0xdf_,___________| +0039________each_followed_by_a_space_character:___________________________| +0040____"?_?_?_?_?_?_?_?_?_?_?_?_?_?_?_?__________________________________| +0041_____?_?_?_?_?_?_?_?_?_?_?_?_?_?_?_?_"________________________________| +0042_3.2.2__All_16_first_bytes_of_3-byte_sequences__0xe0-0xef_,___________| +0043________each_followed_by_a_space_character:___________________________| +0044____"?_?_?_?_?_?_?_?_?_?_?_?_?_?_?_?_"________________________________| +0045_3.2.3__All_8_first_bytes_of_4-byte_sequences__0xf0-0xf7_,____________| +0046________each_followed_by_a_space_character:___________________________| +0047____"?_?_?_?_?_?_?_?_"________________________________________________| +0048_3.2.4__All_4_first_bytes_of_5-byte_sequences__0xf8-0xfb_,____________| +0049________each_followed_by_a_space_character:___________________________| +0050____"?_?_?_?_"________________________________________________________| +0051_3.2.5__All_2_first_bytes_of_6-byte_sequences__0xfc-0xfd_,____________| +0052________each_followed_by_a_space_character:___________________________| +0053____"?_?_"____________________________________________________________| +0054_3.3__Sequences_with_last_continuation_byte_missing___________________| +0055_3.3.1__2-byte_sequence_with_last_byte_missing__U+0000_:_____"?"______| +0056_3.3.2__3-byte_sequence_with_last_byte_missing__U+0000_:_____"??"______| +0057_3.3.3__4-byte_sequence_with_last_byte_missing__U+0000_:_____"???"______| +0058_3.3.4__5-byte_sequence_with_last_byte_missing__U+0000_:_____"????"______| +0059_3.3.5__6-byte_sequence_with_last_byte_missing__U+0000_:_____"?????"______| +0060_3.3.6__2-byte_sequence_with_last_byte_missing__U-000007FF_:_"?"______| +0061_3.3.7__3-byte_sequence_with_last_byte_missing__U-0000FFFF_:_"??"______| +0062_3.3.8__4-byte_sequence_with_last_byte_missing__U-001FFFFF_:_"???"______| +0063_3.3.9__5-byte_sequence_with_last_byte_missing__U-03FFFFFF_:_"????"______| +0064_3.3.10_6-byte_sequence_with_last_byte_missing__U-7FFFFFFF_:_"?????"______| +0065_3.4__Concatenation_of_incomplete_sequences___________________________| +0066____"??????????????????????????????"______________________________________________________| +0067_3.5__Impossible_bytes________________________________________________| +0068_3.5.1__fe_=_"?"______________________________________________________| +0069_3.5.2__ff_=_"?"______________________________________________________| +0070_3.5.3__fe_fe_ff_ff_=_"????"__________________________________________| +0071_4__Overlong_sequences________________________________________________| +0072_4.1__Examples_of_an_overlong_ASCII_character_________________________| +0073_4.1.1_U+002F_=_c0_af_____________=_"??"_______________________________| +0074_4.1.2_U+002F_=_e0_80_af__________=_"???"_______________________________| +0075_4.1.3_U+002F_=_f0_80_80_af_______=_"????"_______________________________| +0076_4.1.4_U+002F_=_f8_80_80_80_af____=_"?????"_______________________________| +0077_4.1.5_U+002F_=_fc_80_80_80_80_af_=_"??????"_______________________________| +0078_4.2__Maximum_overlong_sequences______________________________________| +0079_4.2.1__U-0000007F_=_c1_bf_____________=_"??"__________________________| +0080_4.2.2__U-000007FF_=_e0_9f_bf__________=_"?"__________________________| +0081_4.2.3__U-0000FFFF_=_f0_8f_bf_bf_______=_"?"__________________________| +0082_4.2.4__U-001FFFFF_=_f8_87_bf_bf_bf____=_"?"__________________________| +0083_4.2.5__U-03FFFFFF_=_fc_83_bf_bf_bf_bf_=_"?"__________________________| +0084_4.3__Overlong_representation_of_the_NUL_character____________________| +0085_4.3.1__U+0000_=_c0_80_____________=_"??"______________________________| +0086_4.3.2__U+0000_=_e0_80_80__________=_"???"______________________________| +0087_4.3.3__U+0000_=_f0_80_80_80_______=_"????"______________________________| +0088_4.3.4__U+0000_=_f8_80_80_80_80____=_"?????"______________________________| +0089_4.3.5__U+0000_=_fc_80_80_80_80_80_=_"??????"______________________________| +0090_5__Illegal_code_positions____________________________________________| +0091_5.1_Single_UTF-16_surrogates_________________________________________| +0092_5.1.1__U+D800_=_ed_a0_80_=_"?"_______________________________________| +0093_5.1.2__U+DB7F_=_ed_ad_bf_=_"?"_______________________________________| +0094_5.1.3__U+DB80_=_ed_ae_80_=_"?"_______________________________________| +0095_5.1.4__U+DBFF_=_ed_af_bf_=_"?"_______________________________________| +0096_5.1.5__U+DC00_=_ed_b0_80_=_"?"_______________________________________| +0097_5.1.6__U+DF80_=_ed_be_80_=_"?"_______________________________________| +0098_5.1.7__U+DFFF_=_ed_bf_bf_=_"?"_______________________________________| +0099_5.2_Paired_UTF-16_surrogates_________________________________________| +0100_5.2.1__U+D800_U+DC00_=_ed_a0_80_ed_b0_80_=_"??"______________________| +0101_5.2.2__U+D800_U+DFFF_=_ed_a0_80_ed_bf_bf_=_"??"______________________| +0102_5.2.3__U+DB7F_U+DC00_=_ed_ad_bf_ed_b0_80_=_"??"______________________| +0103_5.2.4__U+DB7F_U+DFFF_=_ed_ad_bf_ed_bf_bf_=_"??"______________________| +0104_5.2.5__U+DB80_U+DC00_=_ed_ae_80_ed_b0_80_=_"??"______________________| +0105_5.2.6__U+DB80_U+DFFF_=_ed_ae_80_ed_bf_bf_=_"??"______________________| +0106_5.2.7__U+DBFF_U+DC00_=_ed_af_bf_ed_b0_80_=_"??"______________________| +0107_5.2.8__U+DBFF_U+DFFF_=_ed_af_bf_ed_bf_bf_=_"??"______________________| +0108_5.3_Other_illegal_code_positions_____________________________________| +0109_5.3.1__U+FFFE_=_ef_bf_be_=_"?"_______________________________________| +0110_5.3.2__U+FFFF_=_ef_bf_bf_=_"?"_______________________________________| +' "" "" + +# Clean up +rm -rf ls.testdir 2>/dev/null + +exit $FAILCOUNT