darling-gdb/gdb/charset.h
Tom Tromey 6c7a06a3fa gdb:
2009-03-19  Tom Tromey  <tromey@redhat.com>
	    Julian Brown  <julian@codesourcery.com>

	PR i18n/7220, PR i18n/7821, PR exp/8815, PR exp/9103,
	PR i18n/9401, PR exp/9613:
	* NEWS: Update
	* value.h (value_typed_string): Declare.
	(val_print_string): Update.
	* valprint.h (print_char_chars): Update.
	* valprint.c (print_char_chars): Add type argument.  Update.
	(val_print_string): Likewise.
	* valops.c (value_typed_string): New function.
	* utils.c (host_char_to_target): New function.
	(parse_escape): Use host_char_to_target, host_hex_value.  Update.
	Remove '^' case.
	(no_control_char_error): Remove.
	* typeprint.c (print_type_scalar): Update.
	* scm-valprint.c (scm_scmval_print): Update.
	* scm-lang.h (scm_printchar, scm_printstr): Update.
	* scm-lang.c (scm_printchar): Add type argument.
	(scm_printstr): Likewise.
	* printcmd.c (print_formatted): Update.
	(print_scalar_formatted): Update.
	(printf_command) <wide_string_arg, wide_char_arg>: New constants.
	Handle '%lc' and '%ls'.
	* parser-defs.h (struct typed_stoken): New type.
	(struct stoken_vector): Likewise.
	(write_exp_string_vector): Declare.
	* parse.c (write_exp_string_vector): New function.
	* p-valprint.c (pascal_val_print): Update.
	* p-lang.h (is_pascal_string_type, pascal_printchar,
	pascal_printstr): Update.
	* p-lang.c (is_pascal_string_type): Remove 'char_size' argument.
	Add 'char_type' argument.
	(pascal_emit_char): Add type argument.
	(pascal_printchar): Likewise.
	(pascal_printstr): Likewise.
	* objc-lang.c (objc_emit_char): Add type argument.
	(objc_printchar): Likewise.
	(objc_printstr): Likewise.
	* macroexp.c (get_character_constant): Handle unicode characters.
	Use c_parse_escape.
	(get_string_literal): Handle unicode strings.  Use
	c_parse_escape.
	* m2-valprint.c (print_unpacked_pointer): Update.
	(m2_print_array_contents): Update.
	(m2_val_print): Update.
	* m2-lang.c (m2_emit_char): Add type argument.
	(m2_printchar): Likewise.
	(m2_printstr): Likewise.
	* language.h (struct language_defn) <la_printchar>: Add type
	argument.
	<la_printstr, la_emitchar>: Likewise.
	(LA_PRINT_CHAR): Likewise.
	(LA_PRINT_STRING): Likewise.
	(LA_EMIT_CHAR): Likewise.
	* language.c (unk_lang_emit_char): Add type argument.
	(unk_lang_printchar): Likewise.
	(unk_lang_printstr): Likewise.
	* jv-valprint.c (java_val_print): Update.
	* jv-lang.c (java_emit_char): Add type argument.
	* f-valprint.c (f_val_print): Update.
	* f-lang.c (f_emit_char): Add type argument.
	(f_printchar): Likewise.
	(f_printstr): Likewise.
	* expprint.c (print_subexp_standard): Update.
	* charset.h (target_wide_charset): Declare.
	(c_target_char_has_backslash_escape, c_parse_backslash,
	host_char_print_literally, host_char_to_target,
	target_char_to_host, target_char_to_control_char): Remove.
	(enum transliterations): New type.
	(convert_between_encodings): Declare.
	(HOST_ESCAPE_CHAR): New define.
	(host_letter_to_control_character, host_hex_value): Declare.
	(enum wchar_iterate_result): New enum.
	(struct wchar_iterator): Declare.
	(make_wchar_iterator, make_cleanup_wchar_iterator, wchar_iterator,
	wchar_push_back): Declare.
	* charset-list.h: New file.
	* c-valprint.c (textual_name): New function.
	(textual_element_type): Handle wide character types.
	(c_val_print): Pass original type to textual_element_type.  Handle
	wide character types.
	(c_value_print): Use textual_element_type.  Pass original type of
	value to val_print.
	* c-lang.h (enum c_string_type): New type.
	(c_printchar, c_printstr): Update.
	* c-lang.c (classify_type): New function.
	(print_wchar): Likewise.
	(c_emit_char): Add type argument.  Handle wide characters.
	(c_printchar): Likewise.
	(c_printstr): Add type argument.  Handle wide and multibyte
	character sets.
	(convert_ucn): New function.
	(emit_numeric_character): Likewise.
	(convert_octal): Likewise.
	(convert_hex): Likewise.
	(ADVANCE): New macro.
	(convert_escape): New function.
	(parse_one_string): Likewise.
	(evaluate_subexp_c): Likewise.
	(exp_descriptor_c): New global.
	(c_language_defn): Use exp_descriptor_c.
	(cplus_language_defn): Likewise.
	(asm_language_defn): Likewise.
	(minimal_language_defn): Likewise.
	(charset_for_string_type): New function.
	* c-exp.y (%union): Add 'svec' and 'tsval'.
	(CHAR): New token.
	(exp): Add CHAR production.
	(string_exp): Rewrite.
	(exp) <string_exp>: Rewrite.
	(tempbuf): Now global.
	(tempbuf_init): New global.
	(parse_string_or_char): New function.
	(yylex) <tempbuf>: Now global.
	<tokptr, tempbufindex, tempbufsize, token_string, class_prefix>:
	Remove.
	Handle 'u', 'U', and 'L' prefixes.  Call parse_string_or_char.
	(c_parse_escape): New function.
	* auxv.c (fprint_target_auxv): Update.
	* ada-valprint.c (ada_emit_char): Add type argument.
	(ada_printchar): Likewise.
	(ada_print_scalar): Update.
	(printstr): Add type argument.  Update calls to ada_emit_char.
	(ada_printstr): Add type argument.
	(ada_val_print_array): Update.
	(ada_val_print_1): Likewise.
	* ada-lang.c (emit_char): Add type argument.
	* ada-lang.h (ada_emit_char, ada_printchar, ada_printstr): Add
	type arguments.
	* gdb_locale.h: Include langinfo.h.
	* charset.c (_initialize_charset): Set default host charset from
	the locale.  Don't register charsets.  Add target-wide-charset
	commands.  Call find_charset_names.
	(struct charset, struct translation): Remove.
	(GDB_DEFAULT_HOST_CHARSET): Remove.
	(GDB_DEFAULT_TARGET_WIDE_CHARSET): New define.
	(target_wide_charset_name): New global.
	(show_host_charset_name): Handle "auto".
	(show_target_wide_charset_name): New function.
	(host_charset_enum, target_charset_enum): Remove.
	(charset_enum): New global.
	(all_charsets, register_charset, lookup_charset, all_translations,
	register_translation, lookup_translation): Remove.
	(simple_charset, ascii_print_literally, ascii_to_control): Remove.
	(iso_8859_print_literally, iso_8859_to_control,
	iso_8859_family_charset): Remove.
	(ebcdic_print_literally, ebcdic_to_control,
	ebcdic_family_charset): Remove.
	(struct cached_iconv, check_iconv_cache, cached_iconv_convert,
	register_iconv_charsets): Remove.
	(target_wide_charset_be_name, target_wide_charset_le_name): New
	globals.
	(identity_either_char_to_other): Remove.
	(set_be_le_names, validate): New functions.
	(backslashable, backslashed, represented): Remove.
	(default_c_target_char_has_backslash_escape): Remove.
	(default_c_parse_backslash, iconv_convert): Remove.
	(ascii_to_iso_8859_1_table, ascii_to_ebcdic_us_table,
	ascii_to_ibm1047_table, iso_8859_1_to_ascii_table,
	iso_8859_1_to_ebcdic_us_table, iso_8859_1_to_ibm1047_table,
	ebcdic_us_to_ascii_table, ebcdic_us_to_iso_8859_1_table,
	ebcdic_us_to_ibm1047_table, ibm1047_to_ascii_table,
	ibm1047_to_iso_8859_1_table, ibm1047_to_ebcdic_us_table): Remove.
	(table_convert_char, table_translation, simple_table_translation):
	Remove.
	(current_host_charset, current_target_charset,
	c_target_char_has_backslash_escape_func,
	c_target_char_has_backslash_escape_baton): Remove.
	(c_parse_backslash_func, c_parse_backslash_baton): Remove.
	(host_char_to_target_func, host_char_to_target_baton): Remove.
	(target_char_to_host_func, target_char_to_host_baton): Remove.
	(cached_iconv_host_to_target, cached_iconv_target_to_host):
	Remove.
	(lookup_charset_or_error, check_valid_host_charset): Remove.
	(set_host_and_target_charsets): Remove.
	(set_host_charset, set_target_charset): Remove.
	(set_host_charset_sfunc, set_target_charset_sfunc): Rewrite.
	(set_target_wide_charset_sfunc): New function.
	(show_charset): Print target wide character set.
	(host_charset, target_charset): Rewrite.
	(target_wide_charset): New function.
	(c_target_char_has_backslash_escape): Remove.
	(c_parse_backslash): Remove.
	(host_letter_to_control_character): New function.
	(host_char_print_literally): Remove.
	(host_hex_value): New function.
	(target_char_to_control_char): Remove.
	(cleanup_iconv): New function.
	(convert_between_encodings): New function.
	(target_char_to_host): Remove.
	(struct wchar_iterator): Define.
	(make_wchar_iterator, make_cleanup_wchar_iterator, wchar_iterator,
	wchar_push_back): New functions.
	(do_cleanup_iterator): New function.
	(char_ptr): New typedef.
	(charsets): New global.
	(add_one, find_charset_names): New functions.
	(default_charset_names): New global.
	(auto_host_charset_name): Likewise.
	* aclocal.m4, config.in, configure: Rebuild.
	* configure.ac: Call AM_LANGINFO_CODESET.
	(GDB_DEFAULT_HOST_CHARSET): Default to UTF-8.
	(AM_ICONV): Invoke earlier.
	* acinclude.m4: Include codeset.m4.  Subst LIBICONV_INCLUDE and
	LIBICONV_LIBDIR.  Check for libiconv in build tree.
	* Makefile.in (LIBICONV_LIBDIR, LIBICONV_INCLUDE): New macros.
	(INTERNAL_CFLAGS_BASE): Add LIBICONV_INCLUDE.
	(INTERNAL_LDFLAGS): Add LIBICONV_LIBDIR.
	* gdb_obstack.h (obstack_grow_wstr): New define.
        * gdb_wchar.h: New file.
        * defs.h: Include it.
gdb/testsuite:
	* gdb.base/store.exp: Update for change to escape output.
	* gdb.base/callfuncs.exp (fetch_all_registers): Update for change
	to escape output.
	* gdb.base/pointers.exp: Update for change to escape output.
	* gdb.base/long_long.exp (gdb_test_long_long): Update for change
	to escape output.
	* gdb.base/constvars.exp (do_constvar_tests): Update for change to
	escape output.
	* gdb.base/call-rt-st.exp (print_struct_call): Update for change
	to escape output.
	* gdb.cp/ref-types.exp (gdb_start_again): Update for change to
	escape output.
	* gdb.base/setvar.exp: Update for change to escape output.
	* lib/gdb.exp (default_gdb_start): Set LC_CTYPE to C.
	* gdb.base/printcmds.exp (test_print_all_chars): Update for change
	to escape output.
	(test_print_string_constants): Likewise.
	* gdb.base/charset.exp (valid_host_charset): Check size of
	wchar_t.  Handle UCS-2 and UCS-4.  Add tests for wide and unicode
	cases.  Handle "auto"-related output.
	* gdb.base/charset.c (char16_t, char32_t): New typedefs.
	(uvar, Uvar): New globals.
gdb/doc:
	* gdb.texinfo (Character Sets): Remove obsolete text.  Document
	set target-wide-charset.
	(Requirements): Mention iconv.
2009-03-20 23:04:40 +00:00

148 lines
5.4 KiB
C
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/* Character set conversion support for GDB.
Copyright (C) 2001, 2007, 2008, 2009 Free Software Foundation, Inc.
This file is part of GDB.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. */
#ifndef CHARSET_H
#define CHARSET_H
/* If the target program uses a different character set than the host,
GDB has some support for translating between the two; GDB converts
characters and strings to the host character set before displaying
them, and converts characters and strings appearing in expressions
entered by the user to the target character set.
GDB's code pretty much assumes that the host character set is some
superset of ASCII; there are plenty if ('0' + n) expressions and
the like. */
/* Return the name of the current host/target character set. The
result is owned by the charset module; the caller should not free
it. */
const char *host_charset (void);
const char *target_charset (void);
const char *target_wide_charset (void);
/* These values are used to specify the type of transliteration done
by convert_between_encodings. */
enum transliterations
{
/* Error on failure to convert. */
translit_none,
/* Transliterate to host char. */
translit_char
};
/* Convert between two encodings.
FROM is the name of the source encoding.
TO is the name of the target encoding.
BYTES holds the bytes to convert; this is assumed to be characters
in the target encoding.
NUM_BYTES is the number of bytes.
WIDTH is the width of a character from the FROM charset, in bytes.
For a variable width encoding, WIDTH should be the size of a "base
character".
OUTPUT is an obstack where the converted data is written. The
caller is responsible for initializing the obstack, and for
destroying the obstack should an error occur.
TRANSLIT specifies how invalid conversions should be handled. */
void convert_between_encodings (const char *from, const char *to,
const gdb_byte *bytes, unsigned int num_bytes,
int width, struct obstack *output,
enum transliterations translit);
/* These values are used by wchar_iterate to report errors. */
enum wchar_iterate_result
{
/* Ordinary return. */
wchar_iterate_ok,
/* Invalid input sequence. */
wchar_iterate_invalid,
/* Incomplete input sequence at the end of the input. */
wchar_iterate_incomplete,
/* EOF. */
wchar_iterate_eof
};
/* Declaration of the opaque wchar iterator type. */
struct wchar_iterator;
/* Create a new character iterator which returns wchar_t's. INPUT is
the input buffer. BYTES is the number of bytes in the input
buffer. CHARSET is the name of the character set in which INPUT is
encoded. WIDTH is the number of bytes in a base character of
CHARSET.
This function either returns a new character set iterator, or calls
error. The result can be freed using a cleanup; see
make_cleanup_wchar_iterator. */
struct wchar_iterator *make_wchar_iterator (const gdb_byte *input, size_t bytes,
const char *charset,
size_t width);
/* Return a new cleanup suitable for destroying the wchar iterator
ITER. */
struct cleanup *make_cleanup_wchar_iterator (struct wchar_iterator *iter);
/* Perform a single iteration of a wchar_t iterator.
Returns the number of characters converted. A negative result
means that EOF has been reached. A positive result indicates the
number of valid wchar_ts in the result; *OUT_CHARS is updated to
point to the first valid character.
In all cases aside from EOF, *PTR is set to point to the first
converted target byte. *LEN is set to the number of bytes
converted.
A zero result means one of several unusual results. *OUT_RESULT is
set to indicate the type of un-ordinary return.
wchar_iterate_invalid means that an invalid input character was
seen. The iterator is advanced by WIDTH (the argument to
make_wchar_iterator) bytes.
wchar_iterate_incomplete means that an incomplete character was
seen at the end of the input sequence.
wchar_iterate_eof means that all bytes were successfully
converted. The other output arguments are not set. */
int wchar_iterate (struct wchar_iterator *iter,
enum wchar_iterate_result *out_result,
gdb_wchar_t **out_chars,
const gdb_byte **ptr, size_t *len);
/* GDB needs to know a few details of its execution character set.
This knowledge is isolated here and in charset.c. */
/* The escape character. */
#define HOST_ESCAPE_CHAR 27
/* Convert a letter, like 'c', to its corresponding control
character. */
char host_letter_to_control_character (char c);
/* Convert a hex digit character to its numeric value. E.g., 'f' is
converted to 15. This function assumes that C is a valid hex
digit. Both upper- and lower-case letters are recognized. */
int host_hex_value (char c);
#endif /* CHARSET_H */