mirror of
https://github.com/openharmony/third_party_gettext.git
synced 2026-07-01 10:25:03 -04:00
6c39c38b09
* gettext-tools/src/x-ruby.h: New file. * gettext-tools/src/x-ruby.c: New file. * gettext-tools/src/xgettext.h (verbose): New declaration. * gettext-tools/src/xgettext.c: Include x-ruby.h. (verbose): New declaration. (flag_table_ruby): New variable. (long_options): Add '--verbose'. (main): Update for Ruby. Handle '-v'/'--verbose' option. (usage): Document the '-L Ruby' and '-v' options. (xgettext_record_flag, language_to_extractor, extension_to_language): Update for Ruby. * gettext-tools/src/Makefile.am (noinst_HEADERS): Add x-ruby.h. (xgettext_SOURCES): Add x-ruby.c. * gettext-tools/src/FILES: Mention x-ruby.h, x-ruby.c. * gettext-tools/tests/xgettext-ruby-1: New file. * gettext-tools/tests/format-ruby-1: New file. * gettext-tools/tests/format-ruby-2: New file. * gettext-tools/tests/Makefile.am (TESTS): Add them. * gettext-tools/doc/gettext.texi (Ruby): New section. * gettext-tools/doc/xgettext.texi: Document the '-L Ruby' and '-v' options. * HACKING: Document the recommended Ruby packages. * NEWS: Mention the Ruby support.
13232 lines
484 KiB
Plaintext
13232 lines
484 KiB
Plaintext
\input texinfo @c -*-texinfo-*-
|
||
@c %**start of header
|
||
@setfilename gettext.info
|
||
@c The @ifset makeinfo ... @end ifset conditional evaluates to true in makeinfo
|
||
@c for info and html output, but to false in texi2html.
|
||
@ifnottex
|
||
@ifclear texi2html
|
||
@set makeinfo
|
||
@end ifclear
|
||
@end ifnottex
|
||
@c The @documentencoding is needed for makeinfo; texi2html 1.52
|
||
@c doesn't recognize it.
|
||
@ifset makeinfo
|
||
@documentencoding UTF-8
|
||
@end ifset
|
||
@settitle GNU @code{gettext} utilities
|
||
@finalout
|
||
@c Indices:
|
||
@c am = autoconf macro @amindex
|
||
@c cp = concept @cindex
|
||
@c ef = emacs function @efindex
|
||
@c em = emacs mode @emindex
|
||
@c ev = emacs variable @evindex
|
||
@c fn = function @findex
|
||
@c kw = keyword @kwindex
|
||
@c op = option @opindex
|
||
@c pg = program @pindex
|
||
@c vr = variable @vindex
|
||
@c Unused predefined indices:
|
||
@c tp = type @tindex
|
||
@c ky = keystroke @kindex
|
||
@defcodeindex am
|
||
@defcodeindex ef
|
||
@defindex em
|
||
@defcodeindex ev
|
||
@defcodeindex kw
|
||
@defcodeindex op
|
||
@syncodeindex ef em
|
||
@syncodeindex ev em
|
||
@syncodeindex fn cp
|
||
@syncodeindex kw cp
|
||
@ifclear texi2html
|
||
@firstparagraphindent insert
|
||
@end ifclear
|
||
@c %**end of header
|
||
|
||
@include version.texi
|
||
|
||
@ifinfo
|
||
@dircategory GNU Gettext Utilities
|
||
@direntry
|
||
* gettext: (gettext). GNU gettext utilities.
|
||
* autopoint: (gettext)autopoint Invocation. Copy gettext infrastructure.
|
||
* envsubst: (gettext)envsubst Invocation. Expand environment variables.
|
||
* gettextize: (gettext)gettextize Invocation. Prepare a package for gettext.
|
||
* msgattrib: (gettext)msgattrib Invocation. Select part of a PO file.
|
||
* msgcat: (gettext)msgcat Invocation. Combine several PO files.
|
||
* msgcmp: (gettext)msgcmp Invocation. Compare a PO file and template.
|
||
* msgcomm: (gettext)msgcomm Invocation. Match two PO files.
|
||
* msgconv: (gettext)msgconv Invocation. Convert PO file to encoding.
|
||
* msgen: (gettext)msgen Invocation. Create an English PO file.
|
||
* msgexec: (gettext)msgexec Invocation. Process a PO file.
|
||
* msgfilter: (gettext)msgfilter Invocation. Pipe a PO file through a filter.
|
||
* msgfmt: (gettext)msgfmt Invocation. Make MO files out of PO files.
|
||
* msggrep: (gettext)msggrep Invocation. Select part of a PO file.
|
||
* msginit: (gettext)msginit Invocation. Create a fresh PO file.
|
||
* msgmerge: (gettext)msgmerge Invocation. Update a PO file from template.
|
||
* msgunfmt: (gettext)msgunfmt Invocation. Uncompile MO file into PO file.
|
||
* msguniq: (gettext)msguniq Invocation. Unify duplicates for PO file.
|
||
* ngettext: (gettext)ngettext Invocation. Translate a message with plural.
|
||
* xgettext: (gettext)xgettext Invocation. Extract strings into a PO file.
|
||
* ISO639: (gettext)Language Codes. ISO 639 language codes.
|
||
* ISO3166: (gettext)Country Codes. ISO 3166 country codes.
|
||
@end direntry
|
||
@end ifinfo
|
||
|
||
@ifinfo
|
||
This file provides documentation for GNU @code{gettext} utilities.
|
||
It also serves as a reference for the free Translation Project.
|
||
|
||
@copying
|
||
Copyright (C) 1995-1998, 2001-2020 Free Software Foundation, Inc.
|
||
|
||
This manual is free documentation. It is dually licensed under the
|
||
GNU FDL and the GNU GPL. This means that you can redistribute this
|
||
manual under either of these two licenses, at your choice.
|
||
|
||
This manual is covered by the GNU FDL. Permission is granted to copy,
|
||
distribute and/or modify this document under the terms of the
|
||
GNU Free Documentation License (FDL), either version 1.2 of the
|
||
License, or (at your option) any later version published by the
|
||
Free Software Foundation (FSF); with no Invariant Sections, with no
|
||
Front-Cover Text, and with no Back-Cover Texts.
|
||
A copy of the license is included in @ref{GNU FDL}.
|
||
|
||
This manual is covered by the GNU GPL. You can redistribute it and/or
|
||
modify it under the terms of the GNU General Public License (GPL), either
|
||
version 2 of the License, or (at your option) any later version published
|
||
by the Free Software Foundation (FSF).
|
||
A copy of the license is included in @ref{GNU GPL}.
|
||
@end copying
|
||
@end ifinfo
|
||
|
||
@titlepage
|
||
@title GNU gettext tools, version @value{VERSION}
|
||
@subtitle Native Language Support Library and Tools
|
||
@subtitle Edition @value{EDITION}, @value{UPDATED}
|
||
@author Ulrich Drepper
|
||
@author Jim Meyering
|
||
@author Fran@,{c}ois Pinard
|
||
@author Bruno Haible
|
||
|
||
@ifnothtml
|
||
@page
|
||
@vskip 0pt plus 1filll
|
||
@c @insertcopying
|
||
Copyright (C) 1995-1998, 2001-2020 Free Software Foundation, Inc.
|
||
|
||
This manual is free documentation. It is dually licensed under the
|
||
GNU FDL and the GNU GPL. This means that you can redistribute this
|
||
manual under either of these two licenses, at your choice.
|
||
|
||
This manual is covered by the GNU FDL. Permission is granted to copy,
|
||
distribute and/or modify this document under the terms of the
|
||
GNU Free Documentation License (FDL), either version 1.2 of the
|
||
License, or (at your option) any later version published by the
|
||
Free Software Foundation (FSF); with no Invariant Sections, with no
|
||
Front-Cover Text, and with no Back-Cover Texts.
|
||
A copy of the license is included in @ref{GNU FDL}.
|
||
|
||
This manual is covered by the GNU GPL. You can redistribute it and/or
|
||
modify it under the terms of the GNU General Public License (GPL), either
|
||
version 2 of the License, or (at your option) any later version published
|
||
by the Free Software Foundation (FSF).
|
||
A copy of the license is included in @ref{GNU GPL}.
|
||
@end ifnothtml
|
||
@end titlepage
|
||
|
||
@c Table of Contents
|
||
@contents
|
||
|
||
@ifnottex
|
||
@node Top
|
||
@top GNU @code{gettext} utilities
|
||
|
||
This manual documents the GNU gettext tools and the GNU libintl library,
|
||
version @value{VERSION}.
|
||
|
||
@menu
|
||
* Introduction:: Introduction
|
||
* Users:: The User's View
|
||
* PO Files:: The Format of PO Files
|
||
* Sources:: Preparing Program Sources
|
||
* Template:: Making the PO Template File
|
||
* Creating:: Creating a New PO File
|
||
* Updating:: Updating Existing PO Files
|
||
* Editing:: Editing PO Files
|
||
* Manipulating:: Manipulating PO Files
|
||
* Binaries:: Producing Binary MO Files
|
||
* Programmers:: The Programmer's View
|
||
* Translators:: The Translator's View
|
||
* Maintainers:: The Maintainer's View
|
||
* Installers:: The Installer's and Distributor's View
|
||
* Programming Languages:: Other Programming Languages
|
||
* Data Formats:: Other Data Formats
|
||
* Conclusion:: Concluding Remarks
|
||
|
||
* Language Codes:: ISO 639 language codes
|
||
* Country Codes:: ISO 3166 country codes
|
||
* Licenses:: Licenses
|
||
|
||
* Program Index:: Index of Programs
|
||
* Option Index:: Index of Command-Line Options
|
||
* Variable Index:: Index of Environment Variables
|
||
* PO Mode Index:: Index of Emacs PO Mode Commands
|
||
* Autoconf Macro Index:: Index of Autoconf Macros
|
||
* Index:: General Index
|
||
|
||
@detailmenu
|
||
--- The Detailed Node Listing ---
|
||
|
||
Introduction
|
||
|
||
* Why:: The Purpose of GNU @code{gettext}
|
||
* Concepts:: I18n, L10n, and Such
|
||
* Aspects:: Aspects in Native Language Support
|
||
* Files:: Files Conveying Translations
|
||
* Overview:: Overview of GNU @code{gettext}
|
||
|
||
The User's View
|
||
|
||
* System Installation:: Questions During Operating System Installation
|
||
* Setting the GUI Locale:: How to Specify the Locale Used by GUI Programs
|
||
* Setting the POSIX Locale:: How to Specify the Locale According to POSIX
|
||
* Working in a Windows console:: Obtaining good output in a Windows console
|
||
* Installing Localizations:: How to Install Additional Translations
|
||
|
||
Setting the Locale through Environment Variables
|
||
|
||
* Locale Names:: How a Locale Specification Looks Like
|
||
* Locale Environment Variables:: Which Environment Variable Specfies What
|
||
* The LANGUAGE variable:: How to Specify a Priority List of Languages
|
||
|
||
Preparing Program Sources
|
||
|
||
* Importing:: Importing the @code{gettext} declaration
|
||
* Triggering:: Triggering @code{gettext} Operations
|
||
* Preparing Strings:: Preparing Translatable Strings
|
||
* Mark Keywords:: How Marks Appear in Sources
|
||
* Marking:: Marking Translatable Strings
|
||
* c-format Flag:: Telling something about the following string
|
||
* Special cases:: Special Cases of Translatable Strings
|
||
* Bug Report Address:: Letting Users Report Translation Bugs
|
||
* Names:: Marking Proper Names for Translation
|
||
* Libraries:: Preparing Library Sources
|
||
|
||
Making the PO Template File
|
||
|
||
* xgettext Invocation:: Invoking the @code{xgettext} Program
|
||
|
||
Creating a New PO File
|
||
|
||
* msginit Invocation:: Invoking the @code{msginit} Program
|
||
* Header Entry:: Filling in the Header Entry
|
||
|
||
Updating Existing PO Files
|
||
|
||
* msgmerge Invocation:: Invoking the @code{msgmerge} Program
|
||
|
||
Editing PO Files
|
||
|
||
* KBabel:: KDE's PO File Editor
|
||
* Gtranslator:: GNOME's PO File Editor
|
||
* PO Mode:: Emacs's PO File Editor
|
||
* Compendium:: Using Translation Compendia
|
||
|
||
Emacs's PO File Editor
|
||
|
||
* Installation:: Completing GNU @code{gettext} Installation
|
||
* Main PO Commands:: Main Commands
|
||
* Entry Positioning:: Entry Positioning
|
||
* Normalizing:: Normalizing Strings in Entries
|
||
* Translated Entries:: Translated Entries
|
||
* Fuzzy Entries:: Fuzzy Entries
|
||
* Untranslated Entries:: Untranslated Entries
|
||
* Obsolete Entries:: Obsolete Entries
|
||
* Modifying Translations:: Modifying Translations
|
||
* Modifying Comments:: Modifying Comments
|
||
* Subedit:: Mode for Editing Translations
|
||
* C Sources Context:: C Sources Context
|
||
* Auxiliary:: Consulting Auxiliary PO Files
|
||
|
||
Using Translation Compendia
|
||
|
||
* Creating Compendia:: Merging translations for later use
|
||
* Using Compendia:: Using older translations if they fit
|
||
|
||
Manipulating PO Files
|
||
|
||
* msgcat Invocation:: Invoking the @code{msgcat} Program
|
||
* msgconv Invocation:: Invoking the @code{msgconv} Program
|
||
* msggrep Invocation:: Invoking the @code{msggrep} Program
|
||
* msgfilter Invocation:: Invoking the @code{msgfilter} Program
|
||
* msguniq Invocation:: Invoking the @code{msguniq} Program
|
||
* msgcomm Invocation:: Invoking the @code{msgcomm} Program
|
||
* msgcmp Invocation:: Invoking the @code{msgcmp} Program
|
||
* msgattrib Invocation:: Invoking the @code{msgattrib} Program
|
||
* msgen Invocation:: Invoking the @code{msgen} Program
|
||
* msgexec Invocation:: Invoking the @code{msgexec} Program
|
||
* Colorizing:: Highlighting parts of PO files
|
||
* Other tools:: Other tools for manipulating PO files
|
||
* libgettextpo:: Writing your own programs that process PO files
|
||
|
||
Highlighting parts of PO files
|
||
|
||
* The --color option:: Triggering colorized output
|
||
* The TERM variable:: The environment variable @code{TERM}
|
||
* The --style option:: The @code{--style} option
|
||
* Style rules:: Style rules for PO files
|
||
* Customizing less:: Customizing @code{less} for viewing PO files
|
||
|
||
Producing Binary MO Files
|
||
|
||
* msgfmt Invocation:: Invoking the @code{msgfmt} Program
|
||
* msgunfmt Invocation:: Invoking the @code{msgunfmt} Program
|
||
* MO Files:: The Format of GNU MO Files
|
||
|
||
The Programmer's View
|
||
|
||
* catgets:: About @code{catgets}
|
||
* gettext:: About @code{gettext}
|
||
* Comparison:: Comparing the two interfaces
|
||
* Using libintl.a:: Using libintl.a in own programs
|
||
* gettext grok:: Being a @code{gettext} grok
|
||
* Temp Programmers:: Temporary Notes for the Programmers Chapter
|
||
|
||
About @code{catgets}
|
||
|
||
* Interface to catgets:: The interface
|
||
* Problems with catgets:: Problems with the @code{catgets} interface?!
|
||
|
||
About @code{gettext}
|
||
|
||
* Interface to gettext:: The interface
|
||
* Ambiguities:: Solving ambiguities
|
||
* Locating Catalogs:: Locating message catalog files
|
||
* Charset conversion:: How to request conversion to Unicode
|
||
* Contexts:: Solving ambiguities in GUI programs
|
||
* Plural forms:: Additional functions for handling plurals
|
||
* Optimized gettext:: Optimization of the *gettext functions
|
||
|
||
Temporary Notes for the Programmers Chapter
|
||
|
||
* Temp Implementations:: Temporary - Two Possible Implementations
|
||
* Temp catgets:: Temporary - About @code{catgets}
|
||
* Temp WSI:: Temporary - Why a single implementation
|
||
* Temp Notes:: Temporary - Notes
|
||
|
||
The Translator's View
|
||
|
||
* Trans Intro 0:: Introduction 0
|
||
* Trans Intro 1:: Introduction 1
|
||
* Discussions:: Discussions
|
||
* Organization:: Organization
|
||
* Information Flow:: Information Flow
|
||
* Translating plural forms:: How to fill in @code{msgstr[0]}, @code{msgstr[1]}
|
||
* Prioritizing messages:: How to find which messages to translate first
|
||
|
||
Organization
|
||
|
||
* Central Coordination:: Central Coordination
|
||
* National Teams:: National Teams
|
||
* Mailing Lists:: Mailing Lists
|
||
|
||
National Teams
|
||
|
||
* Sub-Cultures:: Sub-Cultures
|
||
* Organizational Ideas:: Organizational Ideas
|
||
|
||
The Maintainer's View
|
||
|
||
* Flat and Non-Flat:: Flat or Non-Flat Directory Structures
|
||
* Prerequisites:: Prerequisite Works
|
||
* gettextize Invocation:: Invoking the @code{gettextize} Program
|
||
* Adjusting Files:: Files You Must Create or Alter
|
||
* autoconf macros:: Autoconf macros for use in @file{configure.ac}
|
||
* Version Control Issues::
|
||
* Release Management:: Creating a Distribution Tarball
|
||
|
||
Files You Must Create or Alter
|
||
|
||
* po/POTFILES.in:: @file{POTFILES.in} in @file{po/}
|
||
* po/LINGUAS:: @file{LINGUAS} in @file{po/}
|
||
* po/Makevars:: @file{Makevars} in @file{po/}
|
||
* po/Rules-*:: Extending @file{Makefile} in @file{po/}
|
||
* configure.ac:: @file{configure.ac} at top level
|
||
* config.guess:: @file{config.guess}, @file{config.sub} at top level
|
||
* mkinstalldirs:: @file{mkinstalldirs} at top level
|
||
* aclocal:: @file{aclocal.m4} at top level
|
||
* config.h.in:: @file{config.h.in} at top level
|
||
* Makefile:: @file{Makefile.in} at top level
|
||
* src/Makefile:: @file{Makefile.in} in @file{src/}
|
||
* lib/gettext.h:: @file{gettext.h} in @file{lib/}
|
||
|
||
Autoconf macros for use in @file{configure.ac}
|
||
|
||
* AM_GNU_GETTEXT:: AM_GNU_GETTEXT in @file{gettext.m4}
|
||
* AM_GNU_GETTEXT_VERSION:: AM_GNU_GETTEXT_VERSION in @file{gettext.m4}
|
||
* AM_GNU_GETTEXT_NEED:: AM_GNU_GETTEXT_NEED in @file{gettext.m4}
|
||
* AM_PO_SUBDIRS:: AM_PO_SUBDIRS in @file{po.m4}
|
||
* AM_XGETTEXT_OPTION:: AM_XGETTEXT_OPTION in @file{po.m4}
|
||
* AM_ICONV:: AM_ICONV in @file{iconv.m4}
|
||
|
||
Integrating with Version Control Systems
|
||
|
||
* Distributed Development:: Avoiding version mismatch in distributed development
|
||
* Files under Version Control:: Files to put under version control
|
||
* Translations under Version Control:: Put PO Files under Version Control
|
||
* autopoint Invocation:: Invoking the @code{autopoint} Program
|
||
|
||
Other Programming Languages
|
||
|
||
* Language Implementors:: The Language Implementor's View
|
||
* Programmers for other Languages:: The Programmer's View
|
||
* Translators for other Languages:: The Translator's View
|
||
* Maintainers for other Languages:: The Maintainer's View
|
||
* List of Programming Languages:: Individual Programming Languages
|
||
|
||
The Translator's View
|
||
|
||
* c-format:: C Format Strings
|
||
* objc-format:: Objective C Format Strings
|
||
* sh-format:: Shell Format Strings
|
||
* python-format:: Python Format Strings
|
||
* lisp-format:: Lisp Format Strings
|
||
* elisp-format:: Emacs Lisp Format Strings
|
||
* librep-format:: librep Format Strings
|
||
* scheme-format:: Scheme Format Strings
|
||
* smalltalk-format:: Smalltalk Format Strings
|
||
* java-format:: Java Format Strings
|
||
* csharp-format:: C# Format Strings
|
||
* awk-format:: awk Format Strings
|
||
* object-pascal-format:: Object Pascal Format Strings
|
||
* ycp-format:: YCP Format Strings
|
||
* tcl-format:: Tcl Format Strings
|
||
* perl-format:: Perl Format Strings
|
||
* php-format:: PHP Format Strings
|
||
* ruby-format:: Ruby Format Strings
|
||
* gcc-internal-format:: GCC internal Format Strings
|
||
* gfc-internal-format:: GFC internal Format Strings
|
||
* qt-format:: Qt Format Strings
|
||
* qt-plural-format:: Qt Plural Format Strings
|
||
* kde-format:: KDE Format Strings
|
||
* boost-format:: Boost Format Strings
|
||
* lua-format:: Lua Format Strings
|
||
* javascript-format:: JavaScript Format Strings
|
||
|
||
Individual Programming Languages
|
||
|
||
* C:: C, C++, Objective C
|
||
* sh:: sh - Shell Script
|
||
* bash:: bash - Bourne-Again Shell Script
|
||
* Python:: Python
|
||
* Common Lisp:: GNU clisp - Common Lisp
|
||
* clisp C:: GNU clisp C sources
|
||
* Emacs Lisp:: Emacs Lisp
|
||
* librep:: librep
|
||
* Scheme:: GNU guile - Scheme
|
||
* Smalltalk:: GNU Smalltalk
|
||
* Java:: Java
|
||
* C#:: C#
|
||
* gawk:: GNU awk
|
||
* Pascal:: Pascal - Free Pascal Compiler
|
||
* wxWidgets:: wxWidgets library
|
||
* YCP:: YCP - YaST2 scripting language
|
||
* Tcl:: Tcl - Tk's scripting language
|
||
* Perl:: Perl
|
||
* PHP:: PHP Hypertext Preprocessor
|
||
* Ruby:: Ruby
|
||
* Pike:: Pike
|
||
* GCC-source:: GNU Compiler Collection sources
|
||
* Lua:: Lua
|
||
* JavaScript:: JavaScript
|
||
* Vala:: Vala
|
||
|
||
sh - Shell Script
|
||
|
||
* Preparing Shell Scripts:: Preparing Shell Scripts for Internationalization
|
||
* gettext.sh:: Contents of @code{gettext.sh}
|
||
* gettext Invocation:: Invoking the @code{gettext} program
|
||
* ngettext Invocation:: Invoking the @code{ngettext} program
|
||
* envsubst Invocation:: Invoking the @code{envsubst} program
|
||
* eval_gettext Invocation:: Invoking the @code{eval_gettext} function
|
||
* eval_ngettext Invocation:: Invoking the @code{eval_ngettext} function
|
||
* eval_pgettext Invocation:: Invoking the @code{eval_pgettext} function
|
||
* eval_npgettext Invocation:: Invoking the @code{eval_npgettext} function
|
||
|
||
Perl
|
||
|
||
* General Problems:: General Problems Parsing Perl Code
|
||
* Default Keywords:: Which Keywords Will xgettext Look For?
|
||
* Special Keywords:: How to Extract Hash Keys
|
||
* Quote-like Expressions:: What are Strings And Quote-like Expressions?
|
||
* Interpolation I:: Invalid String Interpolation
|
||
* Interpolation II:: Valid String Interpolation
|
||
* Parentheses:: When To Use Parentheses
|
||
* Long Lines:: How To Grok with Long Lines
|
||
* Perl Pitfalls:: Bugs, Pitfalls, and Things That Do Not Work
|
||
|
||
Other Data Formats
|
||
|
||
* Internationalizable Data:: Internationalizable Data Formats
|
||
* Localized Data:: Localized Data Formats
|
||
|
||
Internationalizable Data Formats
|
||
|
||
* POT:: POT - Portable Object Template
|
||
* RST:: Resource String Table
|
||
* Glade:: Glade - GNOME user interface description
|
||
* GSettings:: GSettings - GNOME user configuration schema
|
||
* AppData:: AppData - freedesktop.org application description
|
||
* Preparing ITS Rules:: Preparing Rules for XML Internationalization
|
||
|
||
Localized Data Formats
|
||
|
||
* Editable Message Catalogs:: Editable Message Catalogs
|
||
* Compiled Message Catalogs:: Compiled Message Catalogs
|
||
* Desktop Entry:: Desktop Entry files
|
||
* XML:: XML files
|
||
|
||
Editable Message Catalogs
|
||
|
||
* PO:: PO - Portable Object
|
||
* Java .properties:: Java .properties
|
||
* GNUstep .strings:: NeXTstep/GNUstep .strings
|
||
|
||
Compiled Message Catalogs
|
||
|
||
* MO:: MO - Machine Object
|
||
* Java ResourceBundle:: Java ResourceBundle
|
||
* C# Satellite Assembly:: C# Satellite Assembly
|
||
* C# Resource:: C# Resource
|
||
* Tcl message catalog:: Tcl message catalog
|
||
* Qt message catalog:: Qt message catalog
|
||
|
||
Concluding Remarks
|
||
|
||
* History:: History of GNU @code{gettext}
|
||
* The original ABOUT-NLS:: Historical introduction
|
||
* References:: Related Readings
|
||
|
||
Language Codes
|
||
|
||
* Usual Language Codes:: Two-letter ISO 639 language codes
|
||
* Rare Language Codes:: Three-letter ISO 639 language codes
|
||
|
||
Licenses
|
||
|
||
* GNU GPL:: GNU General Public License
|
||
* GNU LGPL:: GNU Lesser General Public License
|
||
* GNU FDL:: GNU Free Documentation License
|
||
|
||
@end detailmenu
|
||
@end menu
|
||
|
||
@end ifnottex
|
||
|
||
@node Introduction
|
||
@chapter Introduction
|
||
|
||
This chapter explains the goals sought in the creation
|
||
of GNU @code{gettext} and the free Translation Project.
|
||
Then, it explains a few broad concepts around
|
||
Native Language Support, and positions message translation with regard
|
||
to other aspects of national and cultural variance, as they apply
|
||
to programs. It also surveys those files used to convey the
|
||
translations. It explains how the various tools interact in the
|
||
initial generation of these files, and later, how the maintenance
|
||
cycle should usually operate.
|
||
|
||
@cindex sex
|
||
@cindex he, she, and they
|
||
@cindex she, he, and they
|
||
In this manual, we use @emph{he} when speaking of the programmer or
|
||
maintainer, @emph{she} when speaking of the translator, and @emph{they}
|
||
when speaking of the installers or end users of the translated program.
|
||
This is only a convenience for clarifying the documentation. It is
|
||
@emph{absolutely} not meant to imply that some roles are more appropriate
|
||
to males or females. Besides, as you might guess, GNU @code{gettext}
|
||
is meant to be useful for people using computers, whatever their sex,
|
||
race, religion or nationality!
|
||
|
||
@cindex bug report address
|
||
Please submit suggestions and corrections
|
||
@itemize @bullet
|
||
@item
|
||
either in the bug tracker at @url{https://savannah.gnu.org/projects/gettext}
|
||
@item
|
||
or by email to @code{bug-gettext@@gnu.org}.
|
||
@end itemize
|
||
|
||
@noindent
|
||
Please include the manual's edition number and update date in your messages.
|
||
|
||
@menu
|
||
* Why:: The Purpose of GNU @code{gettext}
|
||
* Concepts:: I18n, L10n, and Such
|
||
* Aspects:: Aspects in Native Language Support
|
||
* Files:: Files Conveying Translations
|
||
* Overview:: Overview of GNU @code{gettext}
|
||
@end menu
|
||
|
||
@node Why
|
||
@section The Purpose of GNU @code{gettext}
|
||
|
||
Usually, programs are written and documented in English, and use
|
||
English at execution time to interact with users. This is true
|
||
not only of GNU software, but also of a great deal of proprietary
|
||
and free software. Using a common language is quite handy for
|
||
communication between developers, maintainers and users from all
|
||
countries. On the other hand, most people are less comfortable with
|
||
English than with their own native language, and would prefer to
|
||
use their mother tongue for day to day's work, as far as possible.
|
||
Many would simply @emph{love} to see their computer screen showing
|
||
a lot less of English, and far more of their own language.
|
||
|
||
@cindex Translation Project
|
||
However, to many people, this dream might appear so far fetched that
|
||
they may believe it is not even worth spending time thinking about
|
||
it. They have no confidence at all that the dream might ever
|
||
become true. Yet some have not lost hope, and have organized themselves.
|
||
The Translation Project is a formalization of this hope into a
|
||
workable structure, which has a good chance to get all of us nearer
|
||
the achievement of a truly multi-lingual set of programs.
|
||
|
||
GNU @code{gettext} is an important step for the Translation Project,
|
||
as it is an asset on which we may build many other steps. This package
|
||
offers to programmers, translators and even users, a well integrated
|
||
set of tools and documentation. Specifically, the GNU @code{gettext}
|
||
utilities are a set of tools that provides a framework within which
|
||
other free packages may produce multi-lingual messages. These tools
|
||
include
|
||
|
||
@itemize @bullet
|
||
@item
|
||
A set of conventions about how programs should be written to support
|
||
message catalogs.
|
||
|
||
@item
|
||
A directory and file naming organization for the message catalogs
|
||
themselves.
|
||
|
||
@item
|
||
A runtime library supporting the retrieval of translated messages.
|
||
|
||
@item
|
||
A few stand-alone programs to massage in various ways the sets of
|
||
translatable strings, or already translated strings.
|
||
|
||
@item
|
||
A library supporting the parsing and creation of files containing
|
||
translated messages.
|
||
|
||
@item
|
||
A special mode for Emacs@footnote{In this manual, all mentions of Emacs
|
||
refers to either GNU Emacs or to XEmacs, which people sometimes call FSF
|
||
Emacs and Lucid Emacs, respectively.} which helps preparing these sets
|
||
and bringing them up to date.
|
||
@end itemize
|
||
|
||
GNU @code{gettext} is designed to minimize the impact of
|
||
internationalization on program sources, keeping this impact as small
|
||
and hardly noticeable as possible. Internationalization has better
|
||
chances of succeeding if it is very light weighted, or at least,
|
||
appear to be so, when looking at program sources.
|
||
|
||
The Translation Project also uses the GNU @code{gettext} distribution
|
||
as a vehicle for documenting its structure and methods. This goes
|
||
beyond the strict technicalities of documenting the GNU @code{gettext}
|
||
proper. By so doing, translators will find in a single place, as
|
||
far as possible, all they need to know for properly doing their
|
||
translating work. Also, this supplemental documentation might also
|
||
help programmers, and even curious users, in understanding how GNU
|
||
@code{gettext} is related to the remainder of the Translation
|
||
Project, and consequently, have a glimpse at the @emph{big picture}.
|
||
|
||
@node Concepts
|
||
@section I18n, L10n, and Such
|
||
|
||
@cindex i18n
|
||
@cindex l10n
|
||
Two long words appear all the time when we discuss support of native
|
||
language in programs, and these words have a precise meaning, worth
|
||
being explained here, once and for all in this document. The words are
|
||
@emph{internationalization} and @emph{localization}. Many people,
|
||
tired of writing these long words over and over again, took the
|
||
habit of writing @dfn{i18n} and @dfn{l10n} instead, quoting the first
|
||
and last letter of each word, and replacing the run of intermediate
|
||
letters by a number merely telling how many such letters there are.
|
||
But in this manual, in the sake of clarity, we will patiently write
|
||
the names in full, each time@dots{}
|
||
|
||
@cindex internationalization
|
||
By @dfn{internationalization}, one refers to the operation by which a
|
||
program, or a set of programs turned into a package, is made aware of and
|
||
able to support multiple languages. This is a generalization process,
|
||
by which the programs are untied from calling only English strings or
|
||
other English specific habits, and connected to generic ways of doing
|
||
the same, instead. Program developers may use various techniques to
|
||
internationalize their programs. Some of these have been standardized.
|
||
GNU @code{gettext} offers one of these standards. @xref{Programmers}.
|
||
|
||
@cindex localization
|
||
By @dfn{localization}, one means the operation by which, in a set
|
||
of programs already internationalized, one gives the program all
|
||
needed information so that it can adapt itself to handle its input
|
||
and output in a fashion which is correct for some native language and
|
||
cultural habits. This is a particularisation process, by which generic
|
||
methods already implemented in an internationalized program are used
|
||
in specific ways. The programming environment puts several functions
|
||
to the programmers disposal which allow this runtime configuration.
|
||
The formal description of specific set of cultural habits for some
|
||
country, together with all associated translations targeted to the
|
||
same native language, is called the @dfn{locale} for this language
|
||
or country. Users achieve localization of programs by setting proper
|
||
values to special environment variables, prior to executing those
|
||
programs, identifying which locale should be used.
|
||
|
||
In fact, locale message support is only one component of the cultural
|
||
data that makes up a particular locale. There are a whole host of
|
||
routines and functions provided to aid programmers in developing
|
||
internationalized software and which allow them to access the data
|
||
stored in a particular locale. When someone presently refers to a
|
||
particular locale, they are obviously referring to the data stored
|
||
within that particular locale. Similarly, if a programmer is referring
|
||
to ``accessing the locale routines'', they are referring to the
|
||
complete suite of routines that access all of the locale's information.
|
||
|
||
@cindex NLS
|
||
@cindex Native Language Support
|
||
@cindex Natural Language Support
|
||
One uses the expression @dfn{Native Language Support}, or merely NLS,
|
||
for speaking of the overall activity or feature encompassing both
|
||
internationalization and localization, allowing for multi-lingual
|
||
interactions in a program. In a nutshell, one could say that
|
||
internationalization is the operation by which further localizations
|
||
are made possible.
|
||
|
||
Also, very roughly said, when it comes to multi-lingual messages,
|
||
internationalization is usually taken care of by programmers, and
|
||
localization is usually taken care of by translators.
|
||
|
||
@node Aspects
|
||
@section Aspects in Native Language Support
|
||
|
||
@cindex translation aspects
|
||
For a totally multi-lingual distribution, there are many things to
|
||
translate beyond output messages.
|
||
|
||
@itemize @bullet
|
||
@item
|
||
As of today, GNU @code{gettext} offers a complete toolset for
|
||
translating messages output by C programs. Perl scripts and shell
|
||
scripts will also need to be translated. Even if there are today some hooks
|
||
by which this can be done, these hooks are not integrated as well as they
|
||
should be.
|
||
|
||
@item
|
||
Some programs, like @code{autoconf} or @code{bison}, are able
|
||
to produce other programs (or scripts). Even if the generating
|
||
programs themselves are internationalized, the generated programs they
|
||
produce may need internationalization on their own, and this indirect
|
||
internationalization could be automated right from the generating
|
||
program. In fact, quite usually, generating and generated programs
|
||
could be internationalized independently, as the effort needed is
|
||
fairly orthogonal.
|
||
|
||
@item
|
||
A few programs include textual tables which might need translation
|
||
themselves, independently of the strings contained in the program
|
||
itself. For example, @w{RFC 1345} gives an English description for each
|
||
character which the @code{recode} program is able to reconstruct at execution.
|
||
Since these descriptions are extracted from the RFC by mechanical means,
|
||
translating them properly would require a prior translation of the RFC
|
||
itself.
|
||
|
||
@item
|
||
Almost all programs accept options, which are often worded out so to
|
||
be descriptive for the English readers; one might want to consider
|
||
offering translated versions for program options as well.
|
||
|
||
@item
|
||
Many programs read, interpret, compile, or are somewhat driven by
|
||
input files which are texts containing keywords, identifiers, or
|
||
replies which are inherently translatable. For example, one may want
|
||
@code{gcc} to allow diacriticized characters in identifiers or use
|
||
translated keywords; @samp{rm -i} might accept something else than
|
||
@samp{y} or @samp{n} for replies, etc. Even if the program will
|
||
eventually make most of its output in the foreign languages, one has
|
||
to decide whether the input syntax, option values, etc., are to be
|
||
localized or not.
|
||
|
||
@item
|
||
The manual accompanying a package, as well as all documentation files
|
||
in the distribution, could surely be translated, too. Translating a
|
||
manual, with the intent of later keeping up with updates, is a major
|
||
undertaking in itself, generally.
|
||
|
||
@end itemize
|
||
|
||
As we already stressed, translation is only one aspect of locales.
|
||
Other internationalization aspects are system services and are handled
|
||
in GNU @code{libc}. There
|
||
are many attributes that are needed to define a country's cultural
|
||
conventions. These attributes include beside the country's native
|
||
language, the formatting of the date and time, the representation of
|
||
numbers, the symbols for currency, etc. These local @dfn{rules} are
|
||
termed the country's locale. The locale represents the knowledge
|
||
needed to support the country's native attributes.
|
||
|
||
@cindex locale categories
|
||
There are a few major areas which may vary between countries and
|
||
hence, define what a locale must describe. The following list helps
|
||
putting multi-lingual messages into the proper context of other tasks
|
||
related to locales. See the GNU @code{libc} manual for details.
|
||
|
||
@table @emph
|
||
|
||
@item Characters and Codesets
|
||
@cindex codeset
|
||
@cindex encoding
|
||
@cindex character encoding
|
||
@cindex locale category, LC_CTYPE
|
||
|
||
The codeset most commonly used through out the USA and most English
|
||
speaking parts of the world is the ASCII codeset. However, there are
|
||
many characters needed by various locales that are not found within
|
||
this codeset. The 8-bit @w{ISO 8859-1} code set has most of the special
|
||
characters needed to handle the major European languages. However, in
|
||
many cases, choosing @w{ISO 8859-1} is nevertheless not adequate: it
|
||
doesn't even handle the major European currency. Hence each locale
|
||
will need to specify which codeset they need to use and will need
|
||
to have the appropriate character handling routines to cope with
|
||
the codeset.
|
||
|
||
@item Currency
|
||
@cindex currency symbols
|
||
@cindex locale category, LC_MONETARY
|
||
|
||
The symbols used vary from country to country as does the position
|
||
used by the symbol. Software needs to be able to transparently
|
||
display currency figures in the native mode for each locale.
|
||
|
||
@item Dates
|
||
@cindex date format
|
||
@cindex locale category, LC_TIME
|
||
|
||
The format of date varies between locales. For example, Christmas day
|
||
in 1994 is written as 12/25/94 in the USA and as 25/12/94 in Australia.
|
||
Other countries might use @w{ISO 8601} dates, etc.
|
||
|
||
Time of the day may be noted as @var{hh}:@var{mm}, @var{hh}.@var{mm},
|
||
or otherwise. Some locales require time to be specified in 24-hour
|
||
mode rather than as AM or PM. Further, the nature and yearly extent
|
||
of the Daylight Saving correction vary widely between countries.
|
||
|
||
@item Numbers
|
||
@cindex number format
|
||
@cindex locale category, LC_NUMERIC
|
||
|
||
Numbers can be represented differently in different locales.
|
||
For example, the following numbers are all written correctly for
|
||
their respective locales:
|
||
|
||
@example
|
||
12,345.67 English
|
||
12.345,67 German
|
||
12345,67 French
|
||
1,2345.67 Asia
|
||
@end example
|
||
|
||
Some programs could go further and use different unit systems, like
|
||
English units or Metric units, or even take into account variants
|
||
about how numbers are spelled in full.
|
||
|
||
@item Messages
|
||
@cindex messages
|
||
@cindex locale category, LC_MESSAGES
|
||
|
||
The most obvious area is the language support within a locale. This is
|
||
where GNU @code{gettext} provides the means for developers and users to
|
||
easily change the language that the software uses to communicate to
|
||
the user.
|
||
|
||
@end table
|
||
|
||
@cindex locale categories
|
||
These areas of cultural conventions are called @emph{locale categories}.
|
||
It is an unfortunate term; @emph{locale aspects} or @emph{locale feature
|
||
categories} would be a better term, because each ``locale category''
|
||
describes an area or task that requires localization. The concrete data
|
||
that describes the cultural conventions for such an area and for a particular
|
||
culture is also called a @emph{locale category}. In this sense, a locale
|
||
is composed of several locale categories: the locale category describing
|
||
the codeset, the locale category describing the formatting of numbers,
|
||
the locale category containing the translated messages, and so on.
|
||
|
||
@cindex Linux
|
||
Components of locale outside of message handling are standardized in
|
||
the ISO C standard and the POSIX:2001 standard (also known as the SUSV3
|
||
specification). GNU @code{libc}
|
||
fully implements this, and most other modern systems provide a more
|
||
or less reasonable support for at least some of the missing components.
|
||
|
||
@node Files
|
||
@section Files Conveying Translations
|
||
|
||
@cindex files, @file{.po} and @file{.mo}
|
||
The letters PO in @file{.po} files means Portable Object, to
|
||
distinguish it from @file{.mo} files, where MO stands for Machine
|
||
Object. This paradigm, as well as the PO file format, is inspired
|
||
by the NLS standard developed by Uniforum, and first implemented by
|
||
Sun in their Solaris system.
|
||
|
||
PO files are meant to be read and edited by humans, and associate each
|
||
original, translatable string of a given package with its translation
|
||
in a particular target language. A single PO file is dedicated to
|
||
a single target language. If a package supports many languages,
|
||
there is one such PO file per language supported, and each package
|
||
has its own set of PO files. These PO files are best created by
|
||
the @code{xgettext} program, and later updated or refreshed through
|
||
the @code{msgmerge} program. Program @code{xgettext} extracts all
|
||
marked messages from a set of C files and initializes a PO file with
|
||
empty translations. Program @code{msgmerge} takes care of adjusting
|
||
PO files between releases of the corresponding sources, commenting
|
||
obsolete entries, initializing new ones, and updating all source
|
||
line references. Files ending with @file{.pot} are kind of base
|
||
translation files found in distributions, in PO file format.
|
||
|
||
MO files are meant to be read by programs, and are binary in nature.
|
||
A few systems already offer tools for creating and handling MO files
|
||
as part of the Native Language Support coming with the system, but the
|
||
format of these MO files is often different from system to system,
|
||
and non-portable. The tools already provided with these systems don't
|
||
support all the features of GNU @code{gettext}. Therefore GNU
|
||
@code{gettext} uses its own format for MO files. Files ending with
|
||
@file{.gmo} are really MO files, when it is known that these files use
|
||
the GNU format.
|
||
|
||
@node Overview
|
||
@section Overview of GNU @code{gettext}
|
||
|
||
@cindex overview of @code{gettext}
|
||
@cindex big picture
|
||
@cindex tutorial of @code{gettext} usage
|
||
The following diagram summarizes the relation between the files
|
||
handled by GNU @code{gettext} and the tools acting on these files.
|
||
It is followed by somewhat detailed explanations, which you should
|
||
read while keeping an eye on the diagram. Having a clear understanding
|
||
of these interrelations will surely help programmers, translators
|
||
and maintainers.
|
||
|
||
@ifhtml
|
||
@example
|
||
@group
|
||
Original C Sources ───> Preparation ───> Marked C Sources ───╮
|
||
│
|
||
╭─────────<─── GNU gettext Library │
|
||
╭─── make <───┤ │
|
||
│ ╰─────────<────────────────────┬───────────────╯
|
||
│ │
|
||
│ ╭─────<─── PACKAGE.pot <─── xgettext <───╯ ╭───<─── PO Compendium
|
||
│ │ │ ↑
|
||
│ │ ╰───╮ │
|
||
│ ╰───╮ ├───> PO editor ───╮
|
||
│ ├────> msgmerge ──────> LANG.po ────>────────╯ │
|
||
│ ╭───╯ │
|
||
│ │ │
|
||
│ ╰─────────────<───────────────╮ │
|
||
│ ├─── New LANG.po <────────────────────╯
|
||
│ ╭─── LANG.gmo <─── msgfmt <───╯
|
||
│ │
|
||
│ ╰───> install ───> /.../LANG/PACKAGE.mo ───╮
|
||
│ ├───> "Hello world!"
|
||
╰───────> install ───> /.../bin/PROGRAM ───────╯
|
||
@end group
|
||
@end example
|
||
@end ifhtml
|
||
@ifnothtml
|
||
@example
|
||
@group
|
||
Original C Sources ---> Preparation ---> Marked C Sources ---.
|
||
|
|
||
.---------<--- GNU gettext Library |
|
||
.--- make <---+ |
|
||
| `---------<--------------------+---------------'
|
||
| |
|
||
| .-----<--- PACKAGE.pot <--- xgettext <---' .---<--- PO Compendium
|
||
| | | ^
|
||
| | `---. |
|
||
| `---. +---> PO editor ---.
|
||
| +----> msgmerge ------> LANG.po ---->--------' |
|
||
| .---' |
|
||
| | |
|
||
| `-------------<---------------. |
|
||
| +--- New LANG.po <--------------------'
|
||
| .--- LANG.gmo <--- msgfmt <---'
|
||
| |
|
||
| `---> install ---> /.../LANG/PACKAGE.mo ---.
|
||
| +---> "Hello world!"
|
||
`-------> install ---> /.../bin/PROGRAM -------'
|
||
@end group
|
||
@end example
|
||
@end ifnothtml
|
||
|
||
@cindex marking translatable strings
|
||
As a programmer, the first step to bringing GNU @code{gettext}
|
||
into your package is identifying, right in the C sources, those strings
|
||
which are meant to be translatable, and those which are untranslatable.
|
||
This tedious job can be done a little more comfortably using emacs PO
|
||
mode, but you can use any means familiar to you for modifying your
|
||
C sources. Beside this some other simple, standard changes are needed to
|
||
properly initialize the translation library. @xref{Sources}, for
|
||
more information about all this.
|
||
|
||
For newly written software the strings of course can and should be
|
||
marked while writing it. The @code{gettext} approach makes this
|
||
very easy. Simply put the following lines at the beginning of each file
|
||
or in a central header file:
|
||
|
||
@example
|
||
@group
|
||
#define _(String) (String)
|
||
#define N_(String) String
|
||
#define textdomain(Domain)
|
||
#define bindtextdomain(Package, Directory)
|
||
@end group
|
||
@end example
|
||
|
||
@noindent
|
||
Doing this allows you to prepare the sources for internationalization.
|
||
Later when you feel ready for the step to use the @code{gettext} library
|
||
simply replace these definitions by the following:
|
||
|
||
@cindex include file @file{libintl.h}
|
||
@example
|
||
@group
|
||
#include <libintl.h>
|
||
#define _(String) gettext (String)
|
||
#define gettext_noop(String) String
|
||
#define N_(String) gettext_noop (String)
|
||
@end group
|
||
@end example
|
||
|
||
@cindex link with @file{libintl}
|
||
@cindex Linux
|
||
@noindent
|
||
and link against @file{libintl.a} or @file{libintl.so}. Note that on
|
||
GNU systems, you don't need to link with @code{libintl} because the
|
||
@code{gettext} library functions are already contained in GNU libc.
|
||
That is all you have to change.
|
||
|
||
@cindex template PO file
|
||
@cindex files, @file{.pot}
|
||
Once the C sources have been modified, the @code{xgettext} program
|
||
is used to find and extract all translatable strings, and create a
|
||
PO template file out of all these. This @file{@var{package}.pot} file
|
||
contains all original program strings. It has sets of pointers to
|
||
exactly where in C sources each string is used. All translations
|
||
are set to empty. The letter @code{t} in @file{.pot} marks this as
|
||
a Template PO file, not yet oriented towards any particular language.
|
||
@xref{xgettext Invocation}, for more details about how one calls the
|
||
@code{xgettext} program. If you are @emph{really} lazy, you might
|
||
be interested at working a lot more right away, and preparing the
|
||
whole distribution setup (@pxref{Maintainers}). By doing so, you
|
||
spare yourself typing the @code{xgettext} command, as @code{make}
|
||
should now generate the proper things automatically for you!
|
||
|
||
The first time through, there is no @file{@var{lang}.po} yet, so the
|
||
@code{msgmerge} step may be skipped and replaced by a mere copy of
|
||
@file{@var{package}.pot} to @file{@var{lang}.po}, where @var{lang}
|
||
represents the target language. See @ref{Creating} for details.
|
||
|
||
Then comes the initial translation of messages. Translation in
|
||
itself is a whole matter, still exclusively meant for humans,
|
||
and whose complexity far overwhelms the level of this manual.
|
||
Nevertheless, a few hints are given in some other chapter of this
|
||
manual (@pxref{Translators}). You will also find there indications
|
||
about how to contact translating teams, or becoming part of them,
|
||
for sharing your translating concerns with others who target the same
|
||
native language.
|
||
|
||
While adding the translated messages into the @file{@var{lang}.po}
|
||
PO file, if you are not using one of the dedicated PO file editors
|
||
(@pxref{Editing}), you are on your own
|
||
for ensuring that your efforts fully respect the PO file format, and quoting
|
||
conventions (@pxref{PO Files}). This is surely not an impossible task,
|
||
as this is the way many people have handled PO files around 1995.
|
||
On the other hand, by using a PO file editor, most details
|
||
of PO file format are taken care of for you, but you have to acquire
|
||
some familiarity with PO file editor itself.
|
||
|
||
If some common translations have already been saved into a compendium
|
||
PO file, translators may use PO mode for initializing untranslated
|
||
entries from the compendium, and also save selected translations into
|
||
the compendium, updating it (@pxref{Compendium}). Compendium files
|
||
are meant to be exchanged between members of a given translation team.
|
||
|
||
Programs, or packages of programs, are dynamic in nature: users write
|
||
bug reports and suggestion for improvements, maintainers react by
|
||
modifying programs in various ways. The fact that a package has
|
||
already been internationalized should not make maintainers shy
|
||
of adding new strings, or modifying strings already translated.
|
||
They just do their job the best they can. For the Translation
|
||
Project to work smoothly, it is important that maintainers do not
|
||
carry translation concerns on their already loaded shoulders, and that
|
||
translators be kept as free as possible of programming concerns.
|
||
|
||
The only concern maintainers should have is carefully marking new
|
||
strings as translatable, when they should be, and do not otherwise
|
||
worry about them being translated, as this will come in proper time.
|
||
Consequently, when programs and their strings are adjusted in various
|
||
ways by maintainers, and for matters usually unrelated to translation,
|
||
@code{xgettext} would construct @file{@var{package}.pot} files which are
|
||
evolving over time, so the translations carried by @file{@var{lang}.po}
|
||
are slowly fading out of date.
|
||
|
||
@cindex evolution of packages
|
||
It is important for translators (and even maintainers) to understand
|
||
that package translation is a continuous process in the lifetime of a
|
||
package, and not something which is done once and for all at the start.
|
||
After an initial burst of translation activity for a given package,
|
||
interventions are needed once in a while, because here and there,
|
||
translated entries become obsolete, and new untranslated entries
|
||
appear, needing translation.
|
||
|
||
The @code{msgmerge} program has the purpose of refreshing an already
|
||
existing @file{@var{lang}.po} file, by comparing it with a newer
|
||
@file{@var{package}.pot} template file, extracted by @code{xgettext}
|
||
out of recent C sources. The refreshing operation adjusts all
|
||
references to C source locations for strings, since these strings
|
||
move as programs are modified. Also, @code{msgmerge} comments out as
|
||
obsolete, in @file{@var{lang}.po}, those already translated entries
|
||
which are no longer used in the program sources (@pxref{Obsolete
|
||
Entries}). It finally discovers new strings and inserts them in
|
||
the resulting PO file as untranslated entries (@pxref{Untranslated
|
||
Entries}). @xref{msgmerge Invocation}, for more information about what
|
||
@code{msgmerge} really does.
|
||
|
||
Whatever route or means taken, the goal is to obtain an updated
|
||
@file{@var{lang}.po} file offering translations for all strings.
|
||
|
||
The temporal mobility, or fluidity of PO files, is an integral part of
|
||
the translation game, and should be well understood, and accepted.
|
||
People resisting it will have a hard time participating in the
|
||
Translation Project, or will give a hard time to other participants! In
|
||
particular, maintainers should relax and include all available official
|
||
PO files in their distributions, even if these have not recently been
|
||
updated, without exerting pressure on the translator teams to get the
|
||
job done. The pressure should rather come
|
||
from the community of users speaking a particular language, and
|
||
maintainers should consider themselves fairly relieved of any concern
|
||
about the adequacy of translation files. On the other hand, translators
|
||
should reasonably try updating the PO files they are responsible for,
|
||
while the package is undergoing pretest, prior to an official
|
||
distribution.
|
||
|
||
Once the PO file is complete and dependable, the @code{msgfmt} program
|
||
is used for turning the PO file into a machine-oriented format, which
|
||
may yield efficient retrieval of translations by the programs of the
|
||
package, whenever needed at runtime (@pxref{MO Files}). @xref{msgfmt
|
||
Invocation}, for more information about all modes of execution
|
||
for the @code{msgfmt} program.
|
||
|
||
Finally, the modified and marked C sources are compiled and linked
|
||
with the GNU @code{gettext} library, usually through the operation of
|
||
@code{make}, given a suitable @file{Makefile} exists for the project,
|
||
and the resulting executable is installed somewhere users will find it.
|
||
The MO files themselves should also be properly installed. Given the
|
||
appropriate environment variables are set (@pxref{Setting the POSIX Locale}),
|
||
the program should localize itself automatically, whenever it executes.
|
||
|
||
The remainder of this manual has the purpose of explaining in depth the various
|
||
steps outlined above.
|
||
|
||
@node Users
|
||
@chapter The User's View
|
||
|
||
Nowadays, when users log into a computer, they usually find that all
|
||
their programs show messages in their native language -- at least for
|
||
users of languages with an active free software community, like French or
|
||
German; to a lesser extent for languages with a smaller participation in
|
||
free software and the GNU project, like Hindi and Filipino.
|
||
|
||
How does this work? How can the user influence the language that is used
|
||
by the programs? This chapter will answer it.
|
||
|
||
@menu
|
||
* System Installation:: Questions During Operating System Installation
|
||
* Setting the GUI Locale:: How to Specify the Locale Used by GUI Programs
|
||
* Setting the POSIX Locale:: How to Specify the Locale According to POSIX
|
||
* Working in a Windows console:: Obtaining good output in a Windows console
|
||
* Installing Localizations:: How to Install Additional Translations
|
||
@end menu
|
||
|
||
@node System Installation
|
||
@section Operating System Installation
|
||
|
||
The default language is often already specified during operating system
|
||
installation. When the operating system is installed, the installer
|
||
typically asks for the language used for the installation process and,
|
||
separately, for the language to use in the installed system. Some OS
|
||
installers only ask for the language once.
|
||
|
||
This determines the system-wide default language for all users. But the
|
||
installers often give the possibility to install extra localizations for
|
||
additional languages. For example, the localizations of KDE (the K
|
||
Desktop Environment) and OpenOffice.org are often bundled separately,
|
||
as one installable package per language.
|
||
|
||
At this point it is good to consider the intended use of the machine: If
|
||
it is a machine designated for personal use, additional localizations are
|
||
probably not necessary. If, however, the machine is in use in an
|
||
organization or company that has international relationships, one can
|
||
consider the needs of guest users. If you have a guest from abroad, for
|
||
a week, what could be his preferred locales? It may be worth installing
|
||
these additional localizations ahead of time, since they cost only a bit
|
||
of disk space at this point.
|
||
|
||
The system-wide default language is the locale configuration that is used
|
||
when a new user account is created. But the user can have his own locale
|
||
configuration that is different from the one of the other users of the
|
||
same machine. He can specify it, typically after the first login, as
|
||
described in the next section.
|
||
|
||
@node Setting the GUI Locale
|
||
@section Setting the Locale Used by GUI Programs
|
||
|
||
The immediately available programs in a user's desktop come from a group
|
||
of programs called a ``desktop environment''; it usually includes the window
|
||
manager, a web browser, a text editor, and more. The most common free
|
||
desktop environments are KDE, GNOME, and Xfce.
|
||
|
||
The locale used by GUI programs of the desktop environment can be specified
|
||
in a configuration screen called ``control center'', ``language settings''
|
||
or ``country settings''.
|
||
|
||
Individual GUI programs that are not part of the desktop environment can
|
||
have their locale specified either in a settings panel, or through environment
|
||
variables.
|
||
|
||
For some programs, it is possible to specify the locale through environment
|
||
variables, possibly even to a different locale than the desktop's locale.
|
||
This means, instead of starting a program through a menu or from the file
|
||
system, you can start it from the command-line, after having set some
|
||
environment variables. The environment variables can be those specified
|
||
in the next section (@ref{Setting the POSIX Locale}); for some versions of
|
||
KDE, however, the locale is specified through a variable @code{KDE_LANG},
|
||
rather than @code{LANG} or @code{LC_ALL}.
|
||
|
||
@node Setting the POSIX Locale
|
||
@section Setting the Locale through Environment Variables
|
||
|
||
As a user, if your language has been installed for this package, in the
|
||
simplest case, you only have to set the @code{LANG} environment variable
|
||
to the appropriate @samp{@var{ll}_@var{CC}} combination. For example,
|
||
let's suppose that you speak German and live in Germany. At the shell
|
||
prompt, merely execute
|
||
@w{@samp{setenv LANG de_DE}} (in @code{csh}),
|
||
@w{@samp{export LANG; LANG=de_DE}} (in @code{sh}) or
|
||
@w{@samp{export LANG=de_DE}} (in @code{bash}). This can be done from your
|
||
@file{.login} or @file{.profile} file, once and for all.
|
||
|
||
@menu
|
||
* Locale Names:: How a Locale Specification Looks Like
|
||
* Locale Environment Variables:: Which Environment Variable Specfies What
|
||
* The LANGUAGE variable:: How to Specify a Priority List of Languages
|
||
@end menu
|
||
|
||
@node Locale Names
|
||
@subsection Locale Names
|
||
|
||
A locale name usually has the form @samp{@var{ll}_@var{CC}}. Here
|
||
@samp{@var{ll}} is an @w{ISO 639} two-letter language code, and
|
||
@samp{@var{CC}} is an @w{ISO 3166} two-letter country code. For example,
|
||
for German in Germany, @var{ll} is @code{de}, and @var{CC} is @code{DE}.
|
||
You find a list of the language codes in appendix @ref{Language Codes} and
|
||
a list of the country codes in appendix @ref{Country Codes}.
|
||
|
||
You might think that the country code specification is redundant. But in
|
||
fact, some languages have dialects in different countries. For example,
|
||
@samp{de_AT} is used for Austria, and @samp{pt_BR} for Brazil. The country
|
||
code serves to distinguish the dialects.
|
||
|
||
Many locale names have an extended syntax
|
||
@samp{@var{ll}_@var{CC}.@var{encoding}} that also specifies the character
|
||
encoding. These are in use because between 2000 and 2005, most users have
|
||
switched to locales in UTF-8 encoding. For example, the German locale on
|
||
glibc systems is nowadays @samp{de_DE.UTF-8}. The older name @samp{de_DE}
|
||
still refers to the German locale as of 2000 that stores characters in
|
||
ISO-8859-1 encoding -- a text encoding that cannot even accommodate the Euro
|
||
currency sign.
|
||
|
||
Some locale names use @samp{@var{ll}_@var{CC}@@@var{variant}} instead of
|
||
@samp{@var{ll}_@var{CC}}. The @samp{@@@var{variant}} can denote any kind of
|
||
characteristics that is not already implied by the language @var{ll} and
|
||
the country @var{CC}. It can denote a particular monetary unit. For example,
|
||
on glibc systems, @samp{de_DE@@euro} denotes the locale that uses the Euro
|
||
currency, in contrast to the older locale @samp{de_DE} which implies the use
|
||
of the currency before 2002. It can also denote a dialect of the language,
|
||
or the script used to write text (for example, @samp{sr_RS@@latin} uses the
|
||
Latin script, whereas @samp{sr_RS} uses the Cyrillic script to write Serbian),
|
||
or the orthography rules, or similar.
|
||
|
||
On other systems, some variations of this scheme are used, such as
|
||
@samp{@var{ll}}. You can get the list of locales supported by your system
|
||
for your language by running the command @samp{locale -a | grep '^@var{ll}'}.
|
||
|
||
There is also a special locale, called @samp{C}.
|
||
@c Don't mention that this locale also has the name "POSIX". When we talk about
|
||
@c the "POSIX locale", we mean the "locale as specified in the POSIX way", and
|
||
@c mentioning a locale called "POSIX" would bring total confusion.
|
||
When it is used, it disables all localization: in this locale, all programs
|
||
standardized by POSIX use English messages and an unspecified character
|
||
encoding (often US-ASCII, but sometimes also ISO-8859-1 or UTF-8, depending on
|
||
the operating system).
|
||
|
||
@node Locale Environment Variables
|
||
@subsection Locale Environment Variables
|
||
@cindex setting up @code{gettext} at run time
|
||
@cindex selecting message language
|
||
@cindex language selection
|
||
|
||
A locale is composed of several @emph{locale categories}, see @ref{Aspects}.
|
||
When a program looks up locale dependent values, it does this according to
|
||
the following environment variables, in priority order:
|
||
|
||
@enumerate
|
||
@vindex LANGUAGE@r{, environment variable}
|
||
@item @code{LANGUAGE}
|
||
@vindex LC_ALL@r{, environment variable}
|
||
@item @code{LC_ALL}
|
||
@vindex LC_CTYPE@r{, environment variable}
|
||
@vindex LC_NUMERIC@r{, environment variable}
|
||
@vindex LC_TIME@r{, environment variable}
|
||
@vindex LC_COLLATE@r{, environment variable}
|
||
@vindex LC_MONETARY@r{, environment variable}
|
||
@vindex LC_MESSAGES@r{, environment variable}
|
||
@item @code{LC_xxx}, according to selected locale category:
|
||
@code{LC_CTYPE}, @code{LC_NUMERIC}, @code{LC_TIME}, @code{LC_COLLATE},
|
||
@code{LC_MONETARY}, @code{LC_MESSAGES}, ...
|
||
@vindex LANG@r{, environment variable}
|
||
@item @code{LANG}
|
||
@end enumerate
|
||
|
||
Variables whose value is set but is empty are ignored in this lookup.
|
||
|
||
@code{LANG} is the normal environment variable for specifying a locale.
|
||
As a user, you normally set this variable (unless some of the other variables
|
||
have already been set by the system, in @file{/etc/profile} or similar
|
||
initialization files).
|
||
|
||
@code{LC_CTYPE}, @code{LC_NUMERIC}, @code{LC_TIME}, @code{LC_COLLATE},
|
||
@code{LC_MONETARY}, @code{LC_MESSAGES}, and so on, are the environment
|
||
variables meant to override @code{LANG} and affecting a single locale
|
||
category only. For example, assume you are a Swedish user in Spain, and you
|
||
want your programs to handle numbers and dates according to Spanish
|
||
conventions, and only the messages should be in Swedish. Then you could
|
||
create a locale named @samp{sv_ES} or @samp{sv_ES.UTF-8} by use of the
|
||
@code{localedef} program. But it is simpler, and achieves the same effect,
|
||
to set the @code{LANG} variable to @code{es_ES.UTF-8} and the
|
||
@code{LC_MESSAGES} variable to @code{sv_SE.UTF-8}; these two locales come
|
||
already preinstalled with the operating system.
|
||
|
||
@code{LC_ALL} is an environment variable that overrides all of these.
|
||
It is typically used in scripts that run particular programs. For example,
|
||
@code{configure} scripts generated by GNU autoconf use @code{LC_ALL} to make
|
||
sure that the configuration tests don't operate in locale dependent ways.
|
||
|
||
Some systems, unfortunately, set @code{LC_ALL} in @file{/etc/profile} or in
|
||
similar initialization files. As a user, you therefore have to unset this
|
||
variable if you want to set @code{LANG} and optionally some of the other
|
||
@code{LC_xxx} variables.
|
||
|
||
The @code{LANGUAGE} variable is described in the next subsection.
|
||
|
||
@node The LANGUAGE variable
|
||
@subsection Specifying a Priority List of Languages
|
||
|
||
Not all programs have translations for all languages. By default, an
|
||
English message is shown in place of a nonexistent translation. If you
|
||
understand other languages, you can set up a priority list of languages.
|
||
This is done through a different environment variable, called
|
||
@code{LANGUAGE}. GNU @code{gettext} gives preference to @code{LANGUAGE}
|
||
over @code{LC_ALL} and @code{LANG} for the purpose of message handling,
|
||
but you still need to have @code{LANG} (or @code{LC_ALL}) set to the primary
|
||
language; this is required by other parts of the system libraries.
|
||
For example, some Swedish users who would rather read translations in
|
||
German than English for when Swedish is not available, set @code{LANGUAGE}
|
||
to @samp{sv:de} while leaving @code{LANG} to @samp{sv_SE}.
|
||
|
||
Special advice for Norwegian users: The language code for Norwegian
|
||
bokm@ringaccent{a}l changed from @samp{no} to @samp{nb} recently (in 2003).
|
||
During the transition period, while some message catalogs for this language
|
||
are installed under @samp{nb} and some older ones under @samp{no}, it is
|
||
recommended for Norwegian users to set @code{LANGUAGE} to @samp{nb:no} so that
|
||
both newer and older translations are used.
|
||
|
||
In the @code{LANGUAGE} environment variable, but not in the other
|
||
environment variables, @samp{@var{ll}_@var{CC}} combinations can be
|
||
abbreviated as @samp{@var{ll}} to denote the language's main dialect.
|
||
For example, @samp{de} is equivalent to @samp{de_DE} (German as spoken in
|
||
Germany), and @samp{pt} to @samp{pt_PT} (Portuguese as spoken in Portugal)
|
||
in this context.
|
||
|
||
Note: The variable @code{LANGUAGE} is ignored if the locale is set to
|
||
@samp{C}. In other words, you have to first enable localization, by setting
|
||
@code{LANG} (or @code{LC_ALL}) to a value other than @samp{C}, before you can
|
||
use a language priority list through the @code{LANGUAGE} variable.
|
||
|
||
@node Working in a Windows console
|
||
@section Obtaining good output in a Windows console
|
||
@cindex Windows
|
||
@cindex ANSI encoding
|
||
@cindex OEM encoding
|
||
@vindex OUTPUT_CHARSET@r{, environment variable}
|
||
|
||
On Windows, consoles such as the one started by the @code{cmd.exe}
|
||
program do input and output in an encoding, called ``OEM code page'',
|
||
that is different from the encoding that text-mode programs usually use,
|
||
called ``ANSI code page''. (Note: This problem does not exist for
|
||
Cygwin consoles; these consoles do input and output in the UTF-8
|
||
encoding.) As a workaround, you may request that the programs produce
|
||
output in this ``OEM'' encoding. To do so, set the environment variable
|
||
@code{OUTPUT_CHARSET} to the ``OEM'' encoding, through a command such as
|
||
@smallexample
|
||
set OUTPUT_CHARSET=CP850
|
||
@end smallexample
|
||
Note: This has an effect only on strings looked up in message catalogs;
|
||
other categories of text are usually not affected by this setting.
|
||
Note also that this environment variable also affects output sent to a
|
||
file or to a pipe; output to a file is most often expected to be in the
|
||
``ANSI'' or in the UTF-8 encoding.
|
||
|
||
Here are examples of the ``ANSI'' and ``OEM'' code pages:
|
||
|
||
@multitable @columnfractions .5 .25 .25
|
||
@headitem Territories @tie{} @tab @tie{} ANSI encoding @tie{} @tab @tie{} OEM encoding
|
||
@item Western Europe @tie{} @tab @tie{} CP1252 @tie{} @tab @tie{} CP850
|
||
@item Slavic countries (Latin 2) @tie{} @tab @tie{} CP1250 @tie{} @tab @tie{} CP852
|
||
@item Baltic countries @tie{} @tab @tie{} CP1257 @tie{} @tab @tie{} CP775
|
||
@item Russia @tie{} @tab @tie{} CP1251 @tie{} @tab @tie{} CP866
|
||
@end multitable
|
||
|
||
@node Installing Localizations
|
||
@section Installing Translations for Particular Programs
|
||
@cindex Translation Matrix
|
||
@cindex available translations
|
||
|
||
Languages are not equally well supported in all packages using GNU
|
||
@code{gettext}, and more translations are added over time. Usually, you
|
||
use the translations that are shipped with the operating system
|
||
or with particular packages that you install afterwards. But you can also
|
||
install newer localizations directly. For doing this, you will need an
|
||
understanding where each localization file is stored on the file system.
|
||
|
||
@cindex @file{ABOUT-NLS} file
|
||
For programs that participate in the Translation Project, you can start
|
||
looking for translations here:
|
||
@url{https://translationproject.org/team/index.html}.
|
||
|
||
For programs that are part of the KDE project, the starting point is:
|
||
@url{https://l10n.kde.org/}.
|
||
|
||
For programs that are part of the GNOME project, the starting point is:
|
||
@url{https://wiki.gnome.org/TranslationProject}.
|
||
|
||
For other programs, you may check whether the program's source code package
|
||
contains some @file{@var{ll}.po} files; often they are kept together in a
|
||
directory called @file{po/}. Each @file{@var{ll}.po} file contains the
|
||
message translations for the language whose abbreviation of @var{ll}.
|
||
|
||
@node PO Files
|
||
@chapter The Format of PO Files
|
||
@cindex PO files' format
|
||
@cindex file format, @file{.po}
|
||
|
||
The GNU @code{gettext} toolset helps programmers and translators
|
||
at producing, updating and using translation files, mainly those
|
||
PO files which are textual, editable files. This chapter explains
|
||
the format of PO files.
|
||
|
||
A PO file is made up of many entries, each entry holding the relation
|
||
between an original untranslated string and its corresponding
|
||
translation. All entries in a given PO file usually pertain
|
||
to a single project, and all translations are expressed in a single
|
||
target language. One PO file @dfn{entry} has the following schematic
|
||
structure:
|
||
|
||
@example
|
||
@var{white-space}
|
||
# @var{translator-comments}
|
||
#. @var{extracted-comments}
|
||
#: @var{reference}@dots{}
|
||
#, @var{flag}@dots{}
|
||
#| msgid @var{previous-untranslated-string}
|
||
msgid @var{untranslated-string}
|
||
msgstr @var{translated-string}
|
||
@end example
|
||
|
||
The general structure of a PO file should be well understood by
|
||
the translator. When using PO mode, very little has to be known
|
||
about the format details, as PO mode takes care of them for her.
|
||
|
||
A simple entry can look like this:
|
||
|
||
@example
|
||
#: lib/error.c:116
|
||
msgid "Unknown system error"
|
||
msgstr "Error desconegut del sistema"
|
||
@end example
|
||
|
||
@cindex comments, translator
|
||
@cindex comments, automatic
|
||
@cindex comments, extracted
|
||
Entries begin with some optional white space. Usually, when generated
|
||
through GNU @code{gettext} tools, there is exactly one blank line
|
||
between entries. Then comments follow, on lines all starting with the
|
||
character @code{#}. There are two kinds of comments: those which have
|
||
some white space immediately following the @code{#} - the @var{translator
|
||
comments} -, which comments are created and maintained exclusively by the
|
||
translator, and those which have some non-white character just after the
|
||
@code{#} - the @var{automatic comments} -, which comments are created and
|
||
maintained automatically by GNU @code{gettext} tools. Comment lines
|
||
starting with @code{#.} contain comments given by the programmer, directed
|
||
at the translator; these comments are called @var{extracted comments}
|
||
because the @code{xgettext} program extracts them from the program's
|
||
source code. Comment lines starting with @code{#:} contain references to
|
||
the program's source code. Comment lines starting with @code{#,} contain
|
||
flags; more about these below. Comment lines starting with @code{#|}
|
||
contain the previous untranslated string for which the translator gave
|
||
a translation.
|
||
|
||
All comments, of either kind, are optional.
|
||
|
||
@kwindex msgid
|
||
@kwindex msgstr
|
||
After white space and comments, entries show two strings, namely
|
||
first the untranslated string as it appears in the original program
|
||
sources, and then, the translation of this string. The original
|
||
string is introduced by the keyword @code{msgid}, and the translation,
|
||
by @code{msgstr}. The two strings, untranslated and translated,
|
||
are quoted in various ways in the PO file, using @code{"}
|
||
delimiters and @code{\} escapes, but the translator does not really
|
||
have to pay attention to the precise quoting format, as PO mode fully
|
||
takes care of quoting for her.
|
||
|
||
The @code{msgid} strings, as well as automatic comments, are produced
|
||
and managed by other GNU @code{gettext} tools, and PO mode does not
|
||
provide means for the translator to alter these. The most she can
|
||
do is merely deleting them, and only by deleting the whole entry.
|
||
On the other hand, the @code{msgstr} string, as well as translator
|
||
comments, are really meant for the translator, and PO mode gives her
|
||
the full control she needs.
|
||
|
||
The comment lines beginning with @code{#,} are special because they are
|
||
not completely ignored by the programs as comments generally are. The
|
||
comma separated list of @var{flag}s is used by the @code{msgfmt}
|
||
program to give the user some better diagnostic messages. Currently
|
||
there are two forms of flags defined:
|
||
|
||
@table @code
|
||
@item fuzzy
|
||
@kwindex fuzzy@r{ flag}
|
||
This flag can be generated by the @code{msgmerge} program or it can be
|
||
inserted by the translator herself. It shows that the @code{msgstr}
|
||
string might not be a correct translation (anymore). Only the translator
|
||
can judge if the translation requires further modification, or is
|
||
acceptable as is. Once satisfied with the translation, she then removes
|
||
this @code{fuzzy} attribute. The @code{msgmerge} program inserts this
|
||
when it combined the @code{msgid} and @code{msgstr} entries after fuzzy
|
||
search only. @xref{Fuzzy Entries}.
|
||
|
||
@item c-format
|
||
@kwindex c-format@r{ flag}
|
||
@itemx no-c-format
|
||
@kwindex no-c-format@r{ flag}
|
||
These flags should not be added by a human. Instead only the
|
||
@code{xgettext} program adds them. In an automated PO file processing
|
||
system as proposed here, the user's changes would be thrown away again as
|
||
soon as the @code{xgettext} program generates a new template file.
|
||
|
||
The @code{c-format} flag indicates that the untranslated string and the
|
||
translation are supposed to be C format strings. The @code{no-c-format}
|
||
flag indicates that they are not C format strings, even though the untranslated
|
||
string happens to look like a C format string (with @samp{%} directives).
|
||
|
||
When the @code{c-format} flag is given for a string the @code{msgfmt}
|
||
program does some more tests to check the validity of the translation.
|
||
@xref{msgfmt Invocation}, @ref{c-format Flag} and @ref{c-format}.
|
||
|
||
@item objc-format
|
||
@kwindex objc-format@r{ flag}
|
||
@itemx no-objc-format
|
||
@kwindex no-objc-format@r{ flag}
|
||
Likewise for Objective C, see @ref{objc-format}.
|
||
|
||
@item sh-format
|
||
@kwindex sh-format@r{ flag}
|
||
@itemx no-sh-format
|
||
@kwindex no-sh-format@r{ flag}
|
||
Likewise for Shell, see @ref{sh-format}.
|
||
|
||
@item python-format
|
||
@kwindex python-format@r{ flag}
|
||
@itemx no-python-format
|
||
@kwindex no-python-format@r{ flag}
|
||
Likewise for Python, see @ref{python-format}.
|
||
|
||
@item python-brace-format
|
||
@kwindex python-brace-format@r{ flag}
|
||
@itemx no-python-brace-format
|
||
@kwindex no-python-brace-format@r{ flag}
|
||
Likewise for Python brace, see @ref{python-format}.
|
||
|
||
@item lisp-format
|
||
@kwindex lisp-format@r{ flag}
|
||
@itemx no-lisp-format
|
||
@kwindex no-lisp-format@r{ flag}
|
||
Likewise for Lisp, see @ref{lisp-format}.
|
||
|
||
@item elisp-format
|
||
@kwindex elisp-format@r{ flag}
|
||
@itemx no-elisp-format
|
||
@kwindex no-elisp-format@r{ flag}
|
||
Likewise for Emacs Lisp, see @ref{elisp-format}.
|
||
|
||
@item librep-format
|
||
@kwindex librep-format@r{ flag}
|
||
@itemx no-librep-format
|
||
@kwindex no-librep-format@r{ flag}
|
||
Likewise for librep, see @ref{librep-format}.
|
||
|
||
@item scheme-format
|
||
@kwindex scheme-format@r{ flag}
|
||
@itemx no-scheme-format
|
||
@kwindex no-scheme-format@r{ flag}
|
||
Likewise for Scheme, see @ref{scheme-format}.
|
||
|
||
@item smalltalk-format
|
||
@kwindex smalltalk-format@r{ flag}
|
||
@itemx no-smalltalk-format
|
||
@kwindex no-smalltalk-format@r{ flag}
|
||
Likewise for Smalltalk, see @ref{smalltalk-format}.
|
||
|
||
@item java-format
|
||
@kwindex java-format@r{ flag}
|
||
@itemx no-java-format
|
||
@kwindex no-java-format@r{ flag}
|
||
Likewise for Java @code{MessageFormat} format strings, see @ref{java-format}.
|
||
|
||
@item java-printf-format
|
||
@kwindex java-printf-format@r{ flag}
|
||
@itemx no-java-printf-format
|
||
@kwindex no-java-printf-format@r{ flag}
|
||
Likewise for Java @code{printf} format strings, see @ref{java-format}.
|
||
|
||
@item csharp-format
|
||
@kwindex csharp-format@r{ flag}
|
||
@itemx no-csharp-format
|
||
@kwindex no-csharp-format@r{ flag}
|
||
Likewise for C#, see @ref{csharp-format}.
|
||
|
||
@item awk-format
|
||
@kwindex awk-format@r{ flag}
|
||
@itemx no-awk-format
|
||
@kwindex no-awk-format@r{ flag}
|
||
Likewise for awk, see @ref{awk-format}.
|
||
|
||
@item object-pascal-format
|
||
@kwindex object-pascal-format@r{ flag}
|
||
@itemx no-object-pascal-format
|
||
@kwindex no-object-pascal-format@r{ flag}
|
||
Likewise for Object Pascal, see @ref{object-pascal-format}.
|
||
|
||
@item ycp-format
|
||
@kwindex ycp-format@r{ flag}
|
||
@itemx no-ycp-format
|
||
@kwindex no-ycp-format@r{ flag}
|
||
Likewise for YCP, see @ref{ycp-format}.
|
||
|
||
@item tcl-format
|
||
@kwindex tcl-format@r{ flag}
|
||
@itemx no-tcl-format
|
||
@kwindex no-tcl-format@r{ flag}
|
||
Likewise for Tcl, see @ref{tcl-format}.
|
||
|
||
@item perl-format
|
||
@kwindex perl-format@r{ flag}
|
||
@itemx no-perl-format
|
||
@kwindex no-perl-format@r{ flag}
|
||
Likewise for Perl, see @ref{perl-format}.
|
||
|
||
@item perl-brace-format
|
||
@kwindex perl-brace-format@r{ flag}
|
||
@itemx no-perl-brace-format
|
||
@kwindex no-perl-brace-format@r{ flag}
|
||
Likewise for Perl brace, see @ref{perl-format}.
|
||
|
||
@item php-format
|
||
@kwindex php-format@r{ flag}
|
||
@itemx no-php-format
|
||
@kwindex no-php-format@r{ flag}
|
||
Likewise for PHP, see @ref{php-format}.
|
||
|
||
@item ruby-format
|
||
@kwindex ruby-format@r{ flag}
|
||
@itemx no-ruby-format
|
||
@kwindex no-ruby-format@r{ flag}
|
||
Likewise for Ruby, see @ref{ruby-format}.
|
||
|
||
@item gcc-internal-format
|
||
@kwindex gcc-internal-format@r{ flag}
|
||
@itemx no-gcc-internal-format
|
||
@kwindex no-gcc-internal-format@r{ flag}
|
||
Likewise for the GCC sources, see @ref{gcc-internal-format}.
|
||
|
||
@item gfc-internal-format
|
||
@kwindex gfc-internal-format@r{ flag}
|
||
@itemx no-gfc-internal-format
|
||
@kwindex no-gfc-internal-format@r{ flag}
|
||
Likewise for the GNU Fortran Compiler sources, see @ref{gfc-internal-format}.
|
||
|
||
@item qt-format
|
||
@kwindex qt-format@r{ flag}
|
||
@itemx no-qt-format
|
||
@kwindex no-qt-format@r{ flag}
|
||
Likewise for Qt, see @ref{qt-format}.
|
||
|
||
@item qt-plural-format
|
||
@kwindex qt-plural-format@r{ flag}
|
||
@itemx no-qt-plural-format
|
||
@kwindex no-qt-plural-format@r{ flag}
|
||
Likewise for Qt plural forms, see @ref{qt-plural-format}.
|
||
|
||
@item kde-format
|
||
@kwindex kde-format@r{ flag}
|
||
@itemx no-kde-format
|
||
@kwindex no-kde-format@r{ flag}
|
||
Likewise for KDE, see @ref{kde-format}.
|
||
|
||
@item boost-format
|
||
@kwindex boost-format@r{ flag}
|
||
@itemx no-boost-format
|
||
@kwindex no-boost-format@r{ flag}
|
||
Likewise for Boost, see @ref{boost-format}.
|
||
|
||
@item lua-format
|
||
@kwindex lua-format@r{ flag}
|
||
@itemx no-lua-format
|
||
@kwindex no-lua-format@r{ flag}
|
||
Likewise for Lua, see @ref{lua-format}.
|
||
|
||
@item javascript-format
|
||
@kwindex javascript-format@r{ flag}
|
||
@itemx no-javascript-format
|
||
@kwindex no-javascript-format@r{ flag}
|
||
Likewise for JavaScript, see @ref{javascript-format}.
|
||
|
||
@end table
|
||
|
||
@kwindex msgctxt
|
||
@cindex context, in PO files
|
||
It is also possible to have entries with a context specifier. They look like
|
||
this:
|
||
|
||
@example
|
||
@var{white-space}
|
||
# @var{translator-comments}
|
||
#. @var{extracted-comments}
|
||
#: @var{reference}@dots{}
|
||
#, @var{flag}@dots{}
|
||
#| msgctxt @var{previous-context}
|
||
#| msgid @var{previous-untranslated-string}
|
||
msgctxt @var{context}
|
||
msgid @var{untranslated-string}
|
||
msgstr @var{translated-string}
|
||
@end example
|
||
|
||
The context serves to disambiguate messages with the same
|
||
@var{untranslated-string}. It is possible to have several entries with
|
||
the same @var{untranslated-string} in a PO file, provided that they each
|
||
have a different @var{context}. Note that an empty @var{context} string
|
||
and an absent @code{msgctxt} line do not mean the same thing.
|
||
|
||
@kwindex msgid_plural
|
||
@cindex plural forms, in PO files
|
||
A different kind of entries is used for translations which involve
|
||
plural forms.
|
||
|
||
@example
|
||
@var{white-space}
|
||
# @var{translator-comments}
|
||
#. @var{extracted-comments}
|
||
#: @var{reference}@dots{}
|
||
#, @var{flag}@dots{}
|
||
#| msgid @var{previous-untranslated-string-singular}
|
||
#| msgid_plural @var{previous-untranslated-string-plural}
|
||
msgid @var{untranslated-string-singular}
|
||
msgid_plural @var{untranslated-string-plural}
|
||
msgstr[0] @var{translated-string-case-0}
|
||
...
|
||
msgstr[N] @var{translated-string-case-n}
|
||
@end example
|
||
|
||
Such an entry can look like this:
|
||
|
||
@example
|
||
#: src/msgcmp.c:338 src/po-lex.c:699
|
||
#, c-format
|
||
msgid "found %d fatal error"
|
||
msgid_plural "found %d fatal errors"
|
||
msgstr[0] "s'ha trobat %d error fatal"
|
||
msgstr[1] "s'han trobat %d errors fatals"
|
||
@end example
|
||
|
||
Here also, a @code{msgctxt} context can be specified before @code{msgid},
|
||
like above.
|
||
|
||
Here, additional kinds of flags can be used:
|
||
|
||
@table @code
|
||
@item range:
|
||
@kwindex range:@r{ flag}
|
||
This flag is followed by a range of non-negative numbers, using the syntax
|
||
@code{range: @var{minimum-value}..@var{maximum-value}}. It designates the
|
||
possible values that the numeric parameter of the message can take. In some
|
||
languages, translators may produce slightly better translations if they know
|
||
that the value can only take on values between 0 and 10, for example.
|
||
@end table
|
||
|
||
The @var{previous-untranslated-string} is optionally inserted by the
|
||
@code{msgmerge} program, at the same time when it marks a message fuzzy.
|
||
It helps the translator to see which changes were done by the developers
|
||
on the @var{untranslated-string}.
|
||
|
||
It happens that some lines, usually whitespace or comments, follow the
|
||
very last entry of a PO file. Such lines are not part of any entry,
|
||
and will be dropped when the PO file is processed by the tools, or may
|
||
disturb some PO file editors.
|
||
|
||
The remainder of this section may be safely skipped by those using
|
||
a PO file editor, yet it may be interesting for everybody to have a better
|
||
idea of the precise format of a PO file. On the other hand, those
|
||
wishing to modify PO files by hand should carefully continue reading on.
|
||
|
||
An empty @var{untranslated-string} is reserved to contain the header
|
||
entry with the meta information (@pxref{Header Entry}). This header
|
||
entry should be the first entry of the file. The empty
|
||
@var{untranslated-string} is reserved for this purpose and must
|
||
not be used anywhere else.
|
||
|
||
Each of @var{untranslated-string} and @var{translated-string} respects
|
||
the C syntax for a character string, including the surrounding quotes
|
||
and embedded backslashed escape sequences. When the time comes
|
||
to write multi-line strings, one should not use escaped newlines.
|
||
Instead, a closing quote should follow the last character on the
|
||
line to be continued, and an opening quote should resume the string
|
||
at the beginning of the following PO file line. For example:
|
||
|
||
@example
|
||
msgid ""
|
||
"Here is an example of how one might continue a very long string\n"
|
||
"for the common case the string represents multi-line output.\n"
|
||
@end example
|
||
|
||
@noindent
|
||
In this example, the empty string is used on the first line, to
|
||
allow better alignment of the @code{H} from the word @samp{Here}
|
||
over the @code{f} from the word @samp{for}. In this example, the
|
||
@code{msgid} keyword is followed by three strings, which are meant
|
||
to be concatenated. Concatenating the empty string does not change
|
||
the resulting overall string, but it is a way for us to comply with
|
||
the necessity of @code{msgid} to be followed by a string on the same
|
||
line, while keeping the multi-line presentation left-justified, as
|
||
we find this to be a cleaner disposition. The empty string could have
|
||
been omitted, but only if the string starting with @samp{Here} was
|
||
promoted on the first line, right after @code{msgid}.@footnote{This
|
||
limitation is not imposed by GNU @code{gettext}, but is for compatibility
|
||
with the @code{msgfmt} implementation on Solaris.} It was not really necessary
|
||
either to switch between the two last quoted strings immediately after
|
||
the newline @samp{\n}, the switch could have occurred after @emph{any}
|
||
other character, we just did it this way because it is neater.
|
||
|
||
@cindex newlines in PO files
|
||
One should carefully distinguish between end of lines marked as
|
||
@samp{\n} @emph{inside} quotes, which are part of the represented
|
||
string, and end of lines in the PO file itself, outside string quotes,
|
||
which have no incidence on the represented string.
|
||
|
||
@cindex comments in PO files
|
||
Outside strings, white lines and comments may be used freely.
|
||
Comments start at the beginning of a line with @samp{#} and extend
|
||
until the end of the PO file line. Comments written by translators
|
||
should have the initial @samp{#} immediately followed by some white
|
||
space. If the @samp{#} is not immediately followed by white space,
|
||
this comment is most likely generated and managed by specialized GNU
|
||
tools, and might disappear or be replaced unexpectedly when the PO
|
||
file is given to @code{msgmerge}.
|
||
|
||
@node Sources
|
||
@chapter Preparing Program Sources
|
||
@cindex preparing programs for translation
|
||
|
||
@c FIXME: Rewrite (the whole chapter).
|
||
|
||
For the programmer, changes to the C source code fall into three
|
||
categories. First, you have to make the localization functions
|
||
known to all modules needing message translation. Second, you should
|
||
properly trigger the operation of GNU @code{gettext} when the program
|
||
initializes, usually from the @code{main} function. Last, you should
|
||
identify, adjust and mark all constant strings in your program
|
||
needing translation.
|
||
|
||
@menu
|
||
* Importing:: Importing the @code{gettext} declaration
|
||
* Triggering:: Triggering @code{gettext} Operations
|
||
* Preparing Strings:: Preparing Translatable Strings
|
||
* Mark Keywords:: How Marks Appear in Sources
|
||
* Marking:: Marking Translatable Strings
|
||
* c-format Flag:: Telling something about the following string
|
||
* Special cases:: Special Cases of Translatable Strings
|
||
* Bug Report Address:: Letting Users Report Translation Bugs
|
||
* Names:: Marking Proper Names for Translation
|
||
* Libraries:: Preparing Library Sources
|
||
@end menu
|
||
|
||
@node Importing
|
||
@section Importing the @code{gettext} declaration
|
||
|
||
Presuming that your set of programs, or package, has been adjusted
|
||
so all needed GNU @code{gettext} files are available, and your
|
||
@file{Makefile} files are adjusted (@pxref{Maintainers}), each C module
|
||
having translated C strings should contain the line:
|
||
|
||
@cindex include file @file{libintl.h}
|
||
@example
|
||
#include <libintl.h>
|
||
@end example
|
||
|
||
Similarly, each C module containing @code{printf()}/@code{fprintf()}/...
|
||
calls with a format string that could be a translated C string (even if
|
||
the C string comes from a different C module) should contain the line:
|
||
|
||
@example
|
||
#include <libintl.h>
|
||
@end example
|
||
|
||
@node Triggering
|
||
@section Triggering @code{gettext} Operations
|
||
|
||
@cindex initialization
|
||
The initialization of locale data should be done with more or less
|
||
the same code in every program, as demonstrated below:
|
||
|
||
@example
|
||
@group
|
||
int
|
||
main (int argc, char *argv[])
|
||
@{
|
||
@dots{}
|
||
setlocale (LC_ALL, "");
|
||
bindtextdomain (PACKAGE, LOCALEDIR);
|
||
textdomain (PACKAGE);
|
||
@dots{}
|
||
@}
|
||
@end group
|
||
@end example
|
||
|
||
@var{PACKAGE} and @var{LOCALEDIR} should be provided either by
|
||
@file{config.h} or by the Makefile. For now consult the @code{gettext}
|
||
or @code{hello} sources for more information.
|
||
|
||
@cindex locale category, LC_ALL
|
||
@cindex locale category, LC_CTYPE
|
||
The use of @code{LC_ALL} might not be appropriate for you.
|
||
@code{LC_ALL} includes all locale categories and especially
|
||
@code{LC_CTYPE}. This latter category is responsible for determining
|
||
character classes with the @code{isalnum} etc. functions from
|
||
@file{ctype.h} which could especially for programs, which process some
|
||
kind of input language, be wrong. For example this would mean that a
|
||
source code using the @,{c} (c-cedilla character) is runnable in
|
||
France but not in the U.S.
|
||
|
||
Some systems also have problems with parsing numbers using the
|
||
@code{scanf} functions if an other but the @code{LC_ALL} locale category is
|
||
used. The standards say that additional formats but the one known in the
|
||
@code{"C"} locale might be recognized. But some systems seem to reject
|
||
numbers in the @code{"C"} locale format. In some situation, it might
|
||
also be a problem with the notation itself which makes it impossible to
|
||
recognize whether the number is in the @code{"C"} locale or the local
|
||
format. This can happen if thousands separator characters are used.
|
||
Some locales define this character according to the national
|
||
conventions to @code{'.'} which is the same character used in the
|
||
@code{"C"} locale to denote the decimal point.
|
||
|
||
So it is sometimes necessary to replace the @code{LC_ALL} line in the
|
||
code above by a sequence of @code{setlocale} lines
|
||
|
||
@example
|
||
@group
|
||
@{
|
||
@dots{}
|
||
setlocale (LC_CTYPE, "");
|
||
setlocale (LC_MESSAGES, "");
|
||
@dots{}
|
||
@}
|
||
@end group
|
||
@end example
|
||
|
||
@cindex locale category, LC_CTYPE
|
||
@cindex locale category, LC_COLLATE
|
||
@cindex locale category, LC_MONETARY
|
||
@cindex locale category, LC_NUMERIC
|
||
@cindex locale category, LC_TIME
|
||
@cindex locale category, LC_MESSAGES
|
||
@cindex locale category, LC_RESPONSES
|
||
@noindent
|
||
On all POSIX conformant systems the locale categories @code{LC_CTYPE},
|
||
@code{LC_MESSAGES}, @code{LC_COLLATE}, @code{LC_MONETARY},
|
||
@code{LC_NUMERIC}, and @code{LC_TIME} are available. On some systems
|
||
which are only ISO C compliant, @code{LC_MESSAGES} is missing, but
|
||
a substitute for it is defined in GNU gettext's @code{<libintl.h>} and
|
||
in GNU gnulib's @code{<locale.h>}.
|
||
|
||
Note that changing the @code{LC_CTYPE} also affects the functions
|
||
declared in the @code{<ctype.h>} standard header and some functions
|
||
declared in the @code{<string.h>} and @code{<stdlib.h>} standard headers.
|
||
If this is not
|
||
desirable in your application (for example in a compiler's parser),
|
||
you can use a set of substitute functions which hardwire the C locale,
|
||
such as found in the modules @samp{c-ctype}, @samp{c-strcase},
|
||
@samp{c-strcasestr}, @samp{c-strtod}, @samp{c-strtold} in the GNU gnulib
|
||
source distribution.
|
||
|
||
It is also possible to switch the locale forth and back between the
|
||
environment dependent locale and the C locale, but this approach is
|
||
normally avoided because a @code{setlocale} call is expensive,
|
||
because it is tedious to determine the places where a locale switch
|
||
is needed in a large program's source, and because switching a locale
|
||
is not multithread-safe.
|
||
|
||
@node Preparing Strings
|
||
@section Preparing Translatable Strings
|
||
|
||
@cindex marking strings, preparations
|
||
Before strings can be marked for translations, they sometimes need to
|
||
be adjusted. Usually preparing a string for translation is done right
|
||
before marking it, during the marking phase which is described in the
|
||
next sections. What you have to keep in mind while doing that is the
|
||
following.
|
||
|
||
@itemize @bullet
|
||
@item
|
||
Decent English style.
|
||
|
||
@item
|
||
Entire sentences.
|
||
|
||
@item
|
||
Split at paragraphs.
|
||
|
||
@item
|
||
Use format strings instead of string concatenation.
|
||
|
||
@item
|
||
Use placeholders in format strings instead of embedded URLs.
|
||
|
||
@item
|
||
Avoid unusual markup and unusual control characters.
|
||
@end itemize
|
||
|
||
@noindent
|
||
Let's look at some examples of these guidelines.
|
||
|
||
@subheading Decent English style
|
||
|
||
@cindex style
|
||
Translatable strings should be in good English style. If slang language
|
||
with abbreviations and shortcuts is used, often translators will not
|
||
understand the message and will produce very inappropriate translations.
|
||
|
||
@example
|
||
"%s: is parameter\n"
|
||
@end example
|
||
|
||
@noindent
|
||
This is nearly untranslatable: Is the displayed item @emph{a} parameter or
|
||
@emph{the} parameter?
|
||
|
||
@example
|
||
"No match"
|
||
@end example
|
||
|
||
@noindent
|
||
The ambiguity in this message makes it unintelligible: Is the program
|
||
attempting to set something on fire? Does it mean "The given object does
|
||
not match the template"? Does it mean "The template does not fit for any
|
||
of the objects"?
|
||
|
||
@cindex ambiguities
|
||
In both cases, adding more words to the message will help both the
|
||
translator and the English speaking user.
|
||
|
||
@subheading Entire sentences
|
||
|
||
@cindex sentences
|
||
Translatable strings should be entire sentences. It is often not possible
|
||
to translate single verbs or adjectives in a substitutable way.
|
||
|
||
@example
|
||
printf ("File %s is %s protected", filename, rw ? "write" : "read");
|
||
@end example
|
||
|
||
@noindent
|
||
Most translators will not look at the source and will thus only see the
|
||
string @code{"File %s is %s protected"}, which is unintelligible. Change
|
||
this to
|
||
|
||
@example
|
||
printf (rw ? "File %s is write protected" : "File %s is read protected",
|
||
filename);
|
||
@end example
|
||
|
||
@noindent
|
||
This way the translator will not only understand the message, she will
|
||
also be able to find the appropriate grammatical construction. A French
|
||
translator for example translates "write protected" like "protected
|
||
against writing".
|
||
|
||
Entire sentences are also important because in many languages, the
|
||
declination of some word in a sentence depends on the gender or the
|
||
number (singular/plural) of another part of the sentence. There are
|
||
usually more interdependencies between words than in English. The
|
||
consequence is that asking a translator to translate two half-sentences
|
||
and then combining these two half-sentences through dumb string concatenation
|
||
will not work, for many languages, even though it would work for English.
|
||
That's why translators need to handle entire sentences.
|
||
|
||
Often sentences don't fit into a single line. If a sentence is output
|
||
using two subsequent @code{printf} statements, like this
|
||
|
||
@example
|
||
printf ("Locale charset \"%s\" is different from\n", lcharset);
|
||
printf ("input file charset \"%s\".\n", fcharset);
|
||
@end example
|
||
|
||
@noindent
|
||
the translator would have to translate two half sentences, but nothing
|
||
in the POT file would tell her that the two half sentences belong together.
|
||
It is necessary to merge the two @code{printf} statements so that the
|
||
translator can handle the entire sentence at once and decide at which
|
||
place to insert a line break in the translation (if at all):
|
||
|
||
@example
|
||
printf ("Locale charset \"%s\" is different from\n\
|
||
input file charset \"%s\".\n", lcharset, fcharset);
|
||
@end example
|
||
|
||
You may now ask: how about two or more adjacent sentences? Like in this case:
|
||
|
||
@example
|
||
puts ("Apollo 13 scenario: Stack overflow handling failed.");
|
||
puts ("On the next stack overflow we will crash!!!");
|
||
@end example
|
||
|
||
@noindent
|
||
Should these two statements merged into a single one? I would recommend to
|
||
merge them if the two sentences are related to each other, because then it
|
||
makes it easier for the translator to understand and translate both. On
|
||
the other hand, if one of the two messages is a stereotypic one, occurring
|
||
in other places as well, you will do a favour to the translator by not
|
||
merging the two. (Identical messages occurring in several places are
|
||
combined by xgettext, so the translator has to handle them once only.)
|
||
|
||
@subheading Split at paragraphs
|
||
|
||
@cindex paragraphs
|
||
Translatable strings should be limited to one paragraph; don't let a
|
||
single message be longer than ten lines. The reason is that when the
|
||
translatable string changes, the translator is faced with the task of
|
||
updating the entire translated string. Maybe only a single word will
|
||
have changed in the English string, but the translator doesn't see that
|
||
(with the current translation tools), therefore she has to proofread
|
||
the entire message.
|
||
|
||
@cindex help option
|
||
Many GNU programs have a @samp{--help} output that extends over several
|
||
screen pages. It is a courtesy towards the translators to split such a
|
||
message into several ones of five to ten lines each. While doing that,
|
||
you can also attempt to split the documented options into groups,
|
||
such as the input options, the output options, and the informative
|
||
output options. This will help every user to find the option he is
|
||
looking for.
|
||
|
||
@subheading No string concatenation
|
||
|
||
@cindex string concatenation
|
||
@cindex concatenation of strings
|
||
Hardcoded string concatenation is sometimes used to construct English
|
||
strings:
|
||
|
||
@example
|
||
strcpy (s, "Replace ");
|
||
strcat (s, object1);
|
||
strcat (s, " with ");
|
||
strcat (s, object2);
|
||
strcat (s, "?");
|
||
@end example
|
||
|
||
@noindent
|
||
In order to present to the translator only entire sentences, and also
|
||
because in some languages the translator might want to swap the order
|
||
of @code{object1} and @code{object2}, it is necessary to change this
|
||
to use a format string:
|
||
|
||
@example
|
||
sprintf (s, "Replace %s with %s?", object1, object2);
|
||
@end example
|
||
|
||
@cindex @code{inttypes.h}
|
||
A similar case is compile time concatenation of strings. The ISO C 99
|
||
include file @code{<inttypes.h>} contains a macro @code{PRId64} that
|
||
can be used as a formatting directive for outputting an @samp{int64_t}
|
||
integer through @code{printf}. It expands to a constant string, usually
|
||
"d" or "ld" or "lld" or something like this, depending on the platform.
|
||
Assume you have code like
|
||
|
||
@example
|
||
printf ("The amount is %0" PRId64 "\n", number);
|
||
@end example
|
||
|
||
@noindent
|
||
The @code{gettext} tools and library have special support for these
|
||
@code{<inttypes.h>} macros. You can therefore simply write
|
||
|
||
@example
|
||
printf (gettext ("The amount is %0" PRId64 "\n"), number);
|
||
@end example
|
||
|
||
@noindent
|
||
The PO file will contain the string "The amount is %0<PRId64>\n".
|
||
The translators will provide a translation containing "%0<PRId64>"
|
||
as well, and at runtime the @code{gettext} function's result will
|
||
contain the appropriate constant string, "d" or "ld" or "lld".
|
||
|
||
This works only for the predefined @code{<inttypes.h>} macros. If
|
||
you have defined your own similar macros, let's say @samp{MYPRId64},
|
||
that are not known to @code{xgettext}, the solution for this problem
|
||
is to change the code like this:
|
||
|
||
@example
|
||
char buf1[100];
|
||
sprintf (buf1, "%0" MYPRId64, number);
|
||
printf (gettext ("The amount is %s\n"), buf1);
|
||
@end example
|
||
|
||
This means, you put the platform dependent code in one statement, and the
|
||
internationalization code in a different statement. Note that a buffer length
|
||
of 100 is safe, because all available hardware integer types are limited to
|
||
128 bits, and to print a 128 bit integer one needs at most 54 characters,
|
||
regardless whether in decimal, octal or hexadecimal.
|
||
|
||
@cindex Java, string concatenation
|
||
@cindex C#, string concatenation
|
||
All this applies to other programming languages as well. For example, in
|
||
Java and C#, string concatenation is very frequently used, because it is a
|
||
compiler built-in operator. Like in C, in Java, you would change
|
||
|
||
@example
|
||
System.out.println("Replace "+object1+" with "+object2+"?");
|
||
@end example
|
||
|
||
@noindent
|
||
into a statement involving a format string:
|
||
|
||
@example
|
||
System.out.println(
|
||
MessageFormat.format("Replace @{0@} with @{1@}?",
|
||
new Object[] @{ object1, object2 @}));
|
||
@end example
|
||
|
||
@noindent
|
||
Similarly, in C#, you would change
|
||
|
||
@example
|
||
Console.WriteLine("Replace "+object1+" with "+object2+"?");
|
||
@end example
|
||
|
||
@noindent
|
||
into a statement involving a format string:
|
||
|
||
@example
|
||
Console.WriteLine(
|
||
String.Format("Replace @{0@} with @{1@}?", object1, object2));
|
||
@end example
|
||
|
||
@subheading No embedded URLs
|
||
|
||
It is good to not embed URLs in translatable strings, for several reasons:
|
||
@itemize @bullet
|
||
@item
|
||
It avoids possible mistakes during copy and paste.
|
||
@item
|
||
Translators cannot translate the URLs or, by mistake, use the URLs from
|
||
other packages that are present in their compendium.
|
||
@item
|
||
When the URLs change, translators don't need to revisit the translation
|
||
of the string.
|
||
@end itemize
|
||
|
||
The same holds for email addresses.
|
||
|
||
So, you would change
|
||
|
||
@example
|
||
fputs (_("GNU GPL version 3 <https://gnu.org/licenses/gpl.html>\n"),
|
||
stream);
|
||
@end example
|
||
|
||
@noindent
|
||
to
|
||
|
||
@example
|
||
fprintf (stream, _("GNU GPL version 3 <%s>\n"),
|
||
"https://gnu.org/licenses/gpl.html");
|
||
@end example
|
||
|
||
@subheading No unusual markup
|
||
|
||
@cindex markup
|
||
@cindex control characters
|
||
Unusual markup or control characters should not be used in translatable
|
||
strings. Translators will likely not understand the particular meaning
|
||
of the markup or control characters.
|
||
|
||
For example, if you have a convention that @samp{|} delimits the
|
||
left-hand and right-hand part of some GUI elements, translators will
|
||
often not understand it without specific comments. It might be
|
||
better to have the translator translate the left-hand and right-hand
|
||
part separately.
|
||
|
||
Another example is the @samp{argp} convention to use a single @samp{\v}
|
||
(vertical tab) control character to delimit two sections inside a
|
||
string. This is flawed. Some translators may convert it to a simple
|
||
newline, some to blank lines. With some PO file editors it may not be
|
||
easy to even enter a vertical tab control character. So, you cannot
|
||
be sure that the translation will contain a @samp{\v} character, at the
|
||
corresponding position. The solution is, again, to let the translator
|
||
translate two separate strings and combine at run-time the two translated
|
||
strings with the @samp{\v} required by the convention.
|
||
|
||
HTML markup, however, is common enough that it's probably ok to use in
|
||
translatable strings. But please bear in mind that the GNU gettext tools
|
||
don't verify that the translations are well-formed HTML.
|
||
|
||
@node Mark Keywords
|
||
@section How Marks Appear in Sources
|
||
@cindex marking strings that require translation
|
||
|
||
All strings requiring translation should be marked in the C sources. Marking
|
||
is done in such a way that each translatable string appears to be
|
||
the sole argument of some function or preprocessor macro. There are
|
||
only a few such possible functions or macros meant for translation,
|
||
and their names are said to be marking keywords. The marking is
|
||
attached to strings themselves, rather than to what we do with them.
|
||
This approach has more uses. A blatant example is an error message
|
||
produced by formatting. The format string needs translation, as
|
||
well as some strings inserted through some @samp{%s} specification
|
||
in the format, while the result from @code{sprintf} may have so many
|
||
different instances that it is impractical to list them all in some
|
||
@samp{error_string_out()} routine, say.
|
||
|
||
This marking operation has two goals. The first goal of marking
|
||
is for triggering the retrieval of the translation, at run time.
|
||
The keyword is possibly resolved into a routine able to dynamically
|
||
return the proper translation, as far as possible or wanted, for the
|
||
argument string. Most localizable strings are found in executable
|
||
positions, that is, attached to variables or given as parameters to
|
||
functions. But this is not universal usage, and some translatable
|
||
strings appear in structured initializations. @xref{Special cases}.
|
||
|
||
The second goal of the marking operation is to help @code{xgettext}
|
||
at properly extracting all translatable strings when it scans a set
|
||
of program sources and produces PO file templates.
|
||
|
||
The canonical keyword for marking translatable strings is
|
||
@samp{gettext}, it gave its name to the whole GNU @code{gettext}
|
||
package. For packages making only light use of the @samp{gettext}
|
||
keyword, macro or function, it is easily used @emph{as is}. However,
|
||
for packages using the @code{gettext} interface more heavily, it
|
||
is usually more convenient to give the main keyword a shorter, less
|
||
obtrusive name. Indeed, the keyword might appear on a lot of strings
|
||
all over the package, and programmers usually do not want nor need
|
||
their program sources to remind them forcefully, all the time, that they
|
||
are internationalized. Further, a long keyword has the disadvantage
|
||
of using more horizontal space, forcing more indentation work on
|
||
sources for those trying to keep them within 79 or 80 columns.
|
||
|
||
@cindex @code{_}, a macro to mark strings for translation
|
||
Many packages use @samp{_} (a simple underline) as a keyword,
|
||
and write @samp{_("Translatable string")} instead of @samp{gettext
|
||
("Translatable string")}. Further, the coding rule, from GNU standards,
|
||
wanting that there is a space between the keyword and the opening
|
||
parenthesis is relaxed, in practice, for this particular usage.
|
||
So, the textual overhead per translatable string is reduced to
|
||
only three characters: the underline and the two parentheses.
|
||
However, even if GNU @code{gettext} uses this convention internally,
|
||
it does not offer it officially. The real, genuine keyword is truly
|
||
@samp{gettext} indeed. It is fairly easy for those wanting to use
|
||
@samp{_} instead of @samp{gettext} to declare:
|
||
|
||
@example
|
||
#include <libintl.h>
|
||
#define _(String) gettext (String)
|
||
@end example
|
||
|
||
@noindent
|
||
instead of merely using @samp{#include <libintl.h>}.
|
||
|
||
The marking keywords @samp{gettext} and @samp{_} take the translatable
|
||
string as sole argument. It is also possible to define marking functions
|
||
that take it at another argument position. It is even possible to make
|
||
the marked argument position depend on the total number of arguments of
|
||
the function call; this is useful in C++. All this is achieved using
|
||
@code{xgettext}'s @samp{--keyword} option. How to pass such an option
|
||
to @code{xgettext}, assuming that @code{gettextize} is used, is described
|
||
in @ref{po/Makevars} and @ref{AM_XGETTEXT_OPTION}.
|
||
|
||
Note also that long strings can be split across lines, into multiple
|
||
adjacent string tokens. Automatic string concatenation is performed
|
||
at compile time according to ISO C and ISO C++; @code{xgettext} also
|
||
supports this syntax.
|
||
|
||
Later on, the maintenance is relatively easy. If, as a programmer,
|
||
you add or modify a string, you will have to ask yourself if the
|
||
new or altered string requires translation, and include it within
|
||
@samp{_()} if you think it should be translated. For example, @samp{"%s"}
|
||
is an example of string @emph{not} requiring translation. But
|
||
@samp{"%s: %d"} @emph{does} require translation, because in French, unlike
|
||
in English, it's customary to put a space before a colon.
|
||
|
||
@node Marking
|
||
@section Marking Translatable Strings
|
||
@emindex marking strings for translation
|
||
|
||
In PO mode, one set of features is meant more for the programmer than
|
||
for the translator, and allows him to interactively mark which strings,
|
||
in a set of program sources, are translatable, and which are not.
|
||
Even if it is a fairly easy job for a programmer to find and mark
|
||
such strings by other means, using any editor of his choice, PO mode
|
||
makes this work more comfortable. Further, this gives translators
|
||
who feel a little like programmers, or programmers who feel a little
|
||
like translators, a tool letting them work at marking translatable
|
||
strings in the program sources, while simultaneously producing a set of
|
||
translation in some language, for the package being internationalized.
|
||
|
||
@emindex @code{etags}, using for marking strings
|
||
The set of program sources, targeted by the PO mode commands describe
|
||
here, should have an Emacs tags table constructed for your project,
|
||
prior to using these PO file commands. This is easy to do. In any
|
||
shell window, change the directory to the root of your project, then
|
||
execute a command resembling:
|
||
|
||
@example
|
||
etags src/*.[hc] lib/*.[hc]
|
||
@end example
|
||
|
||
@noindent
|
||
presuming here you want to process all @file{.h} and @file{.c} files
|
||
from the @file{src/} and @file{lib/} directories. This command will
|
||
explore all said files and create a @file{TAGS} file in your root
|
||
directory, somewhat summarizing the contents using a special file
|
||
format Emacs can understand.
|
||
|
||
@emindex @file{TAGS}, and marking translatable strings
|
||
For packages following the GNU coding standards, there is
|
||
a make goal @code{tags} or @code{TAGS} which constructs the tag files in
|
||
all directories and for all files containing source code.
|
||
|
||
Once your @file{TAGS} file is ready, the following commands assist
|
||
the programmer at marking translatable strings in his set of sources.
|
||
But these commands are necessarily driven from within a PO file
|
||
window, and it is likely that you do not even have such a PO file yet.
|
||
This is not a problem at all, as you may safely open a new, empty PO
|
||
file, mainly for using these commands. This empty PO file will slowly
|
||
fill in while you mark strings as translatable in your program sources.
|
||
|
||
@table @kbd
|
||
@item ,
|
||
@efindex ,@r{, PO Mode command}
|
||
Search through program sources for a string which looks like a
|
||
candidate for translation (@code{po-tags-search}).
|
||
|
||
@item M-,
|
||
@efindex M-,@r{, PO Mode command}
|
||
Mark the last string found with @samp{_()} (@code{po-mark-translatable}).
|
||
|
||
@item M-.
|
||
@efindex M-.@r{, PO Mode command}
|
||
Mark the last string found with a keyword taken from a set of possible
|
||
keywords. This command with a prefix allows some management of these
|
||
keywords (@code{po-select-mark-and-mark}).
|
||
|
||
@end table
|
||
|
||
@efindex po-tags-search@r{, PO Mode command}
|
||
The @kbd{,} (@code{po-tags-search}) command searches for the next
|
||
occurrence of a string which looks like a possible candidate for
|
||
translation, and displays the program source in another Emacs window,
|
||
positioned in such a way that the string is near the top of this other
|
||
window. If the string is too big to fit whole in this window, it is
|
||
positioned so only its end is shown. In any case, the cursor
|
||
is left in the PO file window. If the shown string would be better
|
||
presented differently in different native languages, you may mark it
|
||
using @kbd{M-,} or @kbd{M-.}. Otherwise, you might rather ignore it
|
||
and skip to the next string by merely repeating the @kbd{,} command.
|
||
|
||
A string is a good candidate for translation if it contains a sequence
|
||
of three or more letters. A string containing at most two letters in
|
||
a row will be considered as a candidate if it has more letters than
|
||
non-letters. The command disregards strings containing no letters,
|
||
or isolated letters only. It also disregards strings within comments,
|
||
or strings already marked with some keyword PO mode knows (see below).
|
||
|
||
If you have never told Emacs about some @file{TAGS} file to use, the
|
||
command will request that you specify one from the minibuffer, the
|
||
first time you use the command. You may later change your @file{TAGS}
|
||
file by using the regular Emacs command @w{@kbd{M-x visit-tags-table}},
|
||
which will ask you to name the precise @file{TAGS} file you want
|
||
to use. @xref{Tags, , Tag Tables, emacs, The Emacs Editor}.
|
||
|
||
Each time you use the @kbd{,} command, the search resumes from where it was
|
||
left by the previous search, and goes through all program sources,
|
||
obeying the @file{TAGS} file, until all sources have been processed.
|
||
However, by giving a prefix argument to the command @w{(@kbd{C-u
|
||
,})}, you may request that the search be restarted all over again
|
||
from the first program source; but in this case, strings that you
|
||
recently marked as translatable will be automatically skipped.
|
||
|
||
Using this @kbd{,} command does not prevent using of other regular
|
||
Emacs tags commands. For example, regular @code{tags-search} or
|
||
@code{tags-query-replace} commands may be used without disrupting the
|
||
independent @kbd{,} search sequence. However, as implemented, the
|
||
@emph{initial} @kbd{,} command (or the @kbd{,} command is used with a
|
||
prefix) might also reinitialize the regular Emacs tags searching to the
|
||
first tags file, this reinitialization might be considered spurious.
|
||
|
||
@efindex po-mark-translatable@r{, PO Mode command}
|
||
@efindex po-select-mark-and-mark@r{, PO Mode command}
|
||
The @kbd{M-,} (@code{po-mark-translatable}) command will mark the
|
||
recently found string with the @samp{_} keyword. The @kbd{M-.}
|
||
(@code{po-select-mark-and-mark}) command will request that you type
|
||
one keyword from the minibuffer and use that keyword for marking
|
||
the string. Both commands will automatically create a new PO file
|
||
untranslated entry for the string being marked, and make it the
|
||
current entry (making it easy for you to immediately proceed to its
|
||
translation, if you feel like doing it right away). It is possible
|
||
that the modifications made to the program source by @kbd{M-,} or
|
||
@kbd{M-.} render some source line longer than 80 columns, forcing you
|
||
to break and re-indent this line differently. You may use the @kbd{O}
|
||
command from PO mode, or any other window changing command from
|
||
Emacs, to break out into the program source window, and do any
|
||
needed adjustments. You will have to use some regular Emacs command
|
||
to return the cursor to the PO file window, if you want command
|
||
@kbd{,} for the next string, say.
|
||
|
||
The @kbd{M-.} command has a few built-in speedups, so you do not
|
||
have to explicitly type all keywords all the time. The first such
|
||
speedup is that you are presented with a @emph{preferred} keyword,
|
||
which you may accept by merely typing @kbd{@key{RET}} at the prompt.
|
||
The second speedup is that you may type any non-ambiguous prefix of the
|
||
keyword you really mean, and the command will complete it automatically
|
||
for you. This also means that PO mode has to @emph{know} all
|
||
your possible keywords, and that it will not accept mistyped keywords.
|
||
|
||
If you reply @kbd{?} to the keyword request, the command gives a
|
||
list of all known keywords, from which you may choose. When the
|
||
command is prefixed by an argument @w{(@kbd{C-u M-.})}, it inhibits
|
||
updating any program source or PO file buffer, and does some simple
|
||
keyword management instead. In this case, the command asks for a
|
||
keyword, written in full, which becomes a new allowed keyword for
|
||
later @kbd{M-.} commands. Moreover, this new keyword automatically
|
||
becomes the @emph{preferred} keyword for later commands. By typing
|
||
an already known keyword in response to @w{@kbd{C-u M-.}}, one merely
|
||
changes the @emph{preferred} keyword and does nothing more.
|
||
|
||
All keywords known for @kbd{M-.} are recognized by the @kbd{,} command
|
||
when scanning for strings, and strings already marked by any of those
|
||
known keywords are automatically skipped. If many PO files are opened
|
||
simultaneously, each one has its own independent set of known keywords.
|
||
There is no provision in PO mode, currently, for deleting a known
|
||
keyword, you have to quit the file (maybe using @kbd{q}) and reopen
|
||
it afresh. When a PO file is newly brought up in an Emacs window, only
|
||
@samp{gettext} and @samp{_} are known as keywords, and @samp{gettext}
|
||
is preferred for the @kbd{M-.} command. In fact, this is not useful to
|
||
prefer @samp{_}, as this one is already built in the @kbd{M-,} command.
|
||
|
||
@node c-format Flag
|
||
@section Special Comments preceding Keywords
|
||
|
||
@c FIXME document c-format and no-c-format.
|
||
|
||
@cindex format strings
|
||
In C programs strings are often used within calls of functions from the
|
||
@code{printf} family. The special thing about these format strings is
|
||
that they can contain format specifiers introduced with @kbd{%}. Assume
|
||
we have the code
|
||
|
||
@example
|
||
printf (gettext ("String `%s' has %d characters\n"), s, strlen (s));
|
||
@end example
|
||
|
||
@noindent
|
||
A possible German translation for the above string might be:
|
||
|
||
@example
|
||
"%d Zeichen lang ist die Zeichenkette `%s'"
|
||
@end example
|
||
|
||
A C programmer, even if he cannot speak German, will recognize that
|
||
there is something wrong here. The order of the two format specifiers
|
||
is changed but of course the arguments in the @code{printf} don't have.
|
||
This will most probably lead to problems because now the length of the
|
||
string is regarded as the address.
|
||
|
||
To prevent errors at runtime caused by translations, the @code{msgfmt}
|
||
tool can check statically whether the arguments in the original and the
|
||
translation string match in type and number. If this is not the case
|
||
and the @samp{-c} option has been passed to @code{msgfmt}, @code{msgfmt}
|
||
will give an error and refuse to produce a MO file. Thus consistent
|
||
use of @samp{msgfmt -c} will catch the error, so that it cannot cause
|
||
problems at runtime.
|
||
|
||
@noindent
|
||
If the word order in the above German translation would be correct one
|
||
would have to write
|
||
|
||
@example
|
||
"%2$d Zeichen lang ist die Zeichenkette `%1$s'"
|
||
@end example
|
||
|
||
@noindent
|
||
The routines in @code{msgfmt} know about this special notation.
|
||
|
||
Because not all strings in a program will be format strings, it is not
|
||
useful for @code{msgfmt} to test all the strings in the @file{.po} file.
|
||
This might cause problems because the string might contain what looks
|
||
like a format specifier, but the string is not used in @code{printf}.
|
||
|
||
Therefore @code{xgettext} adds a special tag to those messages it
|
||
thinks might be a format string. There is no absolute rule for this,
|
||
only a heuristic. In the @file{.po} file the entry is marked using the
|
||
@code{c-format} flag in the @code{#,} comment line (@pxref{PO Files}).
|
||
|
||
@kwindex c-format@r{, and @code{xgettext}}
|
||
@kwindex no-c-format@r{, and @code{xgettext}}
|
||
The careful reader now might say that this again can cause problems.
|
||
The heuristic might guess it wrong. This is true and therefore
|
||
@code{xgettext} knows about a special kind of comment which lets
|
||
the programmer take over the decision. If in the same line as or
|
||
the immediately preceding line to the @code{gettext} keyword
|
||
the @code{xgettext} program finds a comment containing the words
|
||
@code{xgettext:c-format}, it will mark the string in any case with
|
||
the @code{c-format} flag. This kind of comment should be used when
|
||
@code{xgettext} does not recognize the string as a format string but
|
||
it really is one and it should be tested. Please note that when the
|
||
comment is in the same line as the @code{gettext} keyword, it must be
|
||
before the string to be translated.
|
||
|
||
This situation happens quite often. The @code{printf} function is often
|
||
called with strings which do not contain a format specifier. Of course
|
||
one would normally use @code{fputs} but it does happen. In this case
|
||
@code{xgettext} does not recognize this as a format string but what
|
||
happens if the translation introduces a valid format specifier? The
|
||
@code{printf} function will try to access one of the parameters but none
|
||
exists because the original code does not pass any parameters.
|
||
|
||
@code{xgettext} of course could make a wrong decision the other way
|
||
round, i.e.@: a string marked as a format string actually is not a format
|
||
string. In this case the @code{msgfmt} might give too many warnings and
|
||
would prevent translating the @file{.po} file. The method to prevent
|
||
this wrong decision is similar to the one used above, only the comment
|
||
to use must contain the string @code{xgettext:no-c-format}.
|
||
|
||
If a string is marked with @code{c-format} and this is not correct the
|
||
user can find out who is responsible for the decision. See
|
||
@ref{xgettext Invocation} to see how the @code{--debug} option can be
|
||
used for solving this problem.
|
||
|
||
@node Special cases
|
||
@section Special Cases of Translatable Strings
|
||
|
||
@cindex marking string initializers
|
||
The attentive reader might now point out that it is not always possible
|
||
to mark translatable string with @code{gettext} or something like this.
|
||
Consider the following case:
|
||
|
||
@example
|
||
@group
|
||
@{
|
||
static const char *messages[] = @{
|
||
"some very meaningful message",
|
||
"and another one"
|
||
@};
|
||
const char *string;
|
||
@dots{}
|
||
string
|
||
= index > 1 ? "a default message" : messages[index];
|
||
|
||
fputs (string);
|
||
@dots{}
|
||
@}
|
||
@end group
|
||
@end example
|
||
|
||
While it is no problem to mark the string @code{"a default message"} it
|
||
is not possible to mark the string initializers for @code{messages}.
|
||
What is to be done? We have to fulfill two tasks. First we have to mark the
|
||
strings so that the @code{xgettext} program (@pxref{xgettext Invocation})
|
||
can find them, and second we have to translate the string at runtime
|
||
before printing them.
|
||
|
||
The first task can be fulfilled by creating a new keyword, which names a
|
||
no-op. For the second we have to mark all access points to a string
|
||
from the array. So one solution can look like this:
|
||
|
||
@example
|
||
@group
|
||
#define gettext_noop(String) String
|
||
|
||
@{
|
||
static const char *messages[] = @{
|
||
gettext_noop ("some very meaningful message"),
|
||
gettext_noop ("and another one")
|
||
@};
|
||
const char *string;
|
||
@dots{}
|
||
string
|
||
= index > 1 ? gettext ("a default message") : gettext (messages[index]);
|
||
|
||
fputs (string);
|
||
@dots{}
|
||
@}
|
||
@end group
|
||
@end example
|
||
|
||
Please convince yourself that the string which is written by
|
||
@code{fputs} is translated in any case. How to get @code{xgettext} know
|
||
the additional keyword @code{gettext_noop} is explained in @ref{xgettext
|
||
Invocation}.
|
||
|
||
The above is of course not the only solution. You could also come along
|
||
with the following one:
|
||
|
||
@example
|
||
@group
|
||
#define gettext_noop(String) String
|
||
|
||
@{
|
||
static const char *messages[] = @{
|
||
gettext_noop ("some very meaningful message"),
|
||
gettext_noop ("and another one")
|
||
@};
|
||
const char *string;
|
||
@dots{}
|
||
string
|
||
= index > 1 ? gettext_noop ("a default message") : messages[index];
|
||
|
||
fputs (gettext (string));
|
||
@dots{}
|
||
@}
|
||
@end group
|
||
@end example
|
||
|
||
But this has a drawback. The programmer has to take care that
|
||
he uses @code{gettext_noop} for the string @code{"a default message"}.
|
||
A use of @code{gettext} could have in rare cases unpredictable results.
|
||
|
||
One advantage is that you need not make control flow analysis to make
|
||
sure the output is really translated in any case. But this analysis is
|
||
generally not very difficult. If it should be in any situation you can
|
||
use this second method in this situation.
|
||
|
||
@node Bug Report Address
|
||
@section Letting Users Report Translation Bugs
|
||
|
||
Code sometimes has bugs, but translations sometimes have bugs too. The
|
||
users need to be able to report them. Reporting translation bugs to the
|
||
programmer or maintainer of a package is not very useful, since the
|
||
maintainer must never change a translation, except on behalf of the
|
||
translator. Hence the translation bugs must be reported to the
|
||
translators.
|
||
|
||
Here is a way to organize this so that the maintainer does not need to
|
||
forward translation bug reports, nor even keep a list of the addresses of
|
||
the translators or their translation teams.
|
||
|
||
Every program has a place where is shows the bug report address. For
|
||
GNU programs, it is the code which handles the ``--help'' option,
|
||
typically in a function called ``usage''. In this place, instruct the
|
||
translator to add her own bug reporting address. For example, if that
|
||
code has a statement
|
||
|
||
@example
|
||
@group
|
||
printf (_("Report bugs to <%s>.\n"), PACKAGE_BUGREPORT);
|
||
@end group
|
||
@end example
|
||
|
||
you can add some translator instructions like this:
|
||
|
||
@example
|
||
@group
|
||
/* TRANSLATORS: The placeholder indicates the bug-reporting address
|
||
for this package. Please add _another line_ saying
|
||
"Report translation bugs to <...>\n" with the address for translation
|
||
bugs (typically your translation team's web or email address). */
|
||
printf (_("Report bugs to <%s>.\n"), PACKAGE_BUGREPORT);
|
||
@end group
|
||
@end example
|
||
|
||
These will be extracted by @samp{xgettext}, leading to a .pot file that
|
||
contains this:
|
||
|
||
@example
|
||
@group
|
||
#. TRANSLATORS: The placeholder indicates the bug-reporting address
|
||
#. for this package. Please add _another line_ saying
|
||
#. "Report translation bugs to <...>\n" with the address for translation
|
||
#. bugs (typically your translation team's web or email address).
|
||
#: src/hello.c:178
|
||
#, c-format
|
||
msgid "Report bugs to <%s>.\n"
|
||
msgstr ""
|
||
@end group
|
||
@end example
|
||
|
||
@node Names
|
||
@section Marking Proper Names for Translation
|
||
|
||
Should names of persons, cities, locations etc. be marked for translation
|
||
or not? People who only know languages that can be written with Latin
|
||
letters (English, Spanish, French, German, etc.) are tempted to say ``no'',
|
||
because names usually do not change when transported between these languages.
|
||
However, in general when translating from one script to another, names
|
||
are translated too, usually phonetically or by transliteration. For
|
||
example, Russian or Greek names are converted to the Latin alphabet when
|
||
being translated to English, and English or French names are converted
|
||
to the Katakana script when being translated to Japanese. This is
|
||
necessary because the speakers of the target language in general cannot
|
||
read the script the name is originally written in.
|
||
|
||
As a programmer, you should therefore make sure that names are marked
|
||
for translation, with a special comment telling the translators that it
|
||
is a proper name and how to pronounce it. In its simple form, it looks
|
||
like this:
|
||
|
||
@example
|
||
@group
|
||
printf (_("Written by %s.\n"),
|
||
/* TRANSLATORS: This is a proper name. See the gettext
|
||
manual, section Names. Note this is actually a non-ASCII
|
||
name: The first name is (with Unicode escapes)
|
||
"Fran\u00e7ois" or (with HTML entities) "François".
|
||
Pronunciation is like "fraa-swa pee-nar". */
|
||
_("Francois Pinard"));
|
||
@end group
|
||
@end example
|
||
|
||
@noindent
|
||
The GNU gnulib library offers a module @samp{propername}
|
||
(@url{https://www.gnu.org/software/gnulib/MODULES.html#module=propername})
|
||
which takes care to automatically append the original name, in parentheses,
|
||
to the translated name. For names that cannot be written in ASCII, it
|
||
also frees the translator from the task of entering the appropriate non-ASCII
|
||
characters if no script change is needed. In this more comfortable form,
|
||
it looks like this:
|
||
|
||
@example
|
||
@group
|
||
printf (_("Written by %s and %s.\n"),
|
||
proper_name ("Ulrich Drepper"),
|
||
/* TRANSLATORS: This is a proper name. See the gettext
|
||
manual, section Names. Note this is actually a non-ASCII
|
||
name: The first name is (with Unicode escapes)
|
||
"Fran\u00e7ois" or (with HTML entities) "François".
|
||
Pronunciation is like "fraa-swa pee-nar". */
|
||
proper_name_utf8 ("Francois Pinard", "Fran\303\247ois Pinard"));
|
||
@end group
|
||
@end example
|
||
|
||
@noindent
|
||
You can also write the original name directly in Unicode (rather than with
|
||
Unicode escapes or HTML entities) and denote the pronunciation using the
|
||
International Phonetic Alphabet (see
|
||
@url{https://en.wikipedia.org/wiki/International_Phonetic_Alphabet}).
|
||
|
||
As a translator, you should use some care when translating names, because
|
||
it is frustrating if people see their names mutilated or distorted.
|
||
|
||
If your language uses the Latin script, all you need to do is to reproduce
|
||
the name as perfectly as you can within the usual character set of your
|
||
language. In this particular case, this means to provide a translation
|
||
containing the c-cedilla character. If your language uses a different
|
||
script and the people speaking it don't usually read Latin words, it means
|
||
transliteration. If the programmer used the simple case, you should still
|
||
give, in parentheses, the original writing of the name -- for the sake of
|
||
the people that do read the Latin script. If the programmer used the
|
||
@samp{propername} module mentioned above, you don't need to give the original
|
||
writing of the name in parentheses, because the program will already do so.
|
||
Here is an example, using Greek as the target script:
|
||
|
||
@example
|
||
@group
|
||
#. This is a proper name. See the gettext
|
||
#. manual, section Names. Note this is actually a non-ASCII
|
||
#. name: The first name is (with Unicode escapes)
|
||
#. "Fran\u00e7ois" or (with HTML entities) "François".
|
||
#. Pronunciation is like "fraa-swa pee-nar".
|
||
msgid "Francois Pinard"
|
||
msgstr "\phi\rho\alpha\sigma\omicron\alpha \pi\iota\nu\alpha\rho"
|
||
" (Francois Pinard)"
|
||
@end group
|
||
@end example
|
||
|
||
Because translation of names is such a sensitive domain, it is a good
|
||
idea to test your translation before submitting it.
|
||
|
||
@node Libraries
|
||
@section Preparing Library Sources
|
||
|
||
When you are preparing a library, not a program, for the use of
|
||
@code{gettext}, only a few details are different. Here we assume that
|
||
the library has a translation domain and a POT file of its own. (If
|
||
it uses the translation domain and POT file of the main program, then
|
||
the previous sections apply without changes.)
|
||
|
||
@enumerate
|
||
@item
|
||
The library code doesn't call @code{setlocale (LC_ALL, "")}. It's the
|
||
responsibility of the main program to set the locale. The library's
|
||
documentation should mention this fact, so that developers of programs
|
||
using the library are aware of it.
|
||
|
||
@item
|
||
The library code doesn't call @code{textdomain (PACKAGE)}, because it
|
||
would interfere with the text domain set by the main program.
|
||
|
||
@item
|
||
The initialization code for a program was
|
||
|
||
@smallexample
|
||
setlocale (LC_ALL, "");
|
||
bindtextdomain (PACKAGE, LOCALEDIR);
|
||
textdomain (PACKAGE);
|
||
@end smallexample
|
||
|
||
@noindent
|
||
For a library it is reduced to
|
||
|
||
@smallexample
|
||
bindtextdomain (PACKAGE, LOCALEDIR);
|
||
@end smallexample
|
||
|
||
@noindent
|
||
If your library's API doesn't already have an initialization function,
|
||
you need to create one, containing at least the @code{bindtextdomain}
|
||
invocation. However, you usually don't need to export and document this
|
||
initialization function: It is sufficient that all entry points of the
|
||
library call the initialization function if it hasn't been called before.
|
||
The typical idiom used to achieve this is a static boolean variable that
|
||
indicates whether the initialization function has been called. Like this:
|
||
|
||
@example
|
||
@group
|
||
static bool libfoo_initialized;
|
||
|
||
static void
|
||
libfoo_initialize (void)
|
||
@{
|
||
bindtextdomain (PACKAGE, LOCALEDIR);
|
||
libfoo_initialized = true;
|
||
@}
|
||
|
||
/* This function is part of the exported API. */
|
||
struct foo *
|
||
create_foo (...)
|
||
@{
|
||
/* Must ensure the initialization is performed. */
|
||
if (!libfoo_initialized)
|
||
libfoo_initialize ();
|
||
...
|
||
@}
|
||
|
||
/* This function is part of the exported API. The argument must be
|
||
non-NULL and have been created through create_foo(). */
|
||
int
|
||
foo_refcount (struct foo *argument)
|
||
@{
|
||
/* No need to invoke the initialization function here, because
|
||
create_foo() must already have been called before. */
|
||
...
|
||
@}
|
||
@end group
|
||
@end example
|
||
|
||
@item
|
||
The usual declaration of the @samp{_} macro in each source file was
|
||
|
||
@smallexample
|
||
#include <libintl.h>
|
||
#define _(String) gettext (String)
|
||
@end smallexample
|
||
|
||
@noindent
|
||
for a program. For a library, which has its own translation domain,
|
||
it reads like this:
|
||
|
||
@smallexample
|
||
#include <libintl.h>
|
||
#define _(String) dgettext (PACKAGE, String)
|
||
@end smallexample
|
||
|
||
In other words, @code{dgettext} is used instead of @code{gettext}.
|
||
Similarly, the @code{dngettext} function should be used in place of the
|
||
@code{ngettext} function.
|
||
@end enumerate
|
||
|
||
@node Template
|
||
@chapter Making the PO Template File
|
||
@cindex PO template file
|
||
|
||
After preparing the sources, the programmer creates a PO template file.
|
||
This section explains how to use @code{xgettext} for this purpose.
|
||
|
||
@code{xgettext} creates a file named @file{@var{domainname}.po}. You
|
||
should then rename it to @file{@var{domainname}.pot}. (Why doesn't
|
||
@code{xgettext} create it under the name @file{@var{domainname}.pot}
|
||
right away? The answer is: for historical reasons. When @code{xgettext}
|
||
was specified, the distinction between a PO file and PO file template
|
||
was fuzzy, and the suffix @samp{.pot} wasn't in use at that time.)
|
||
|
||
@c FIXME: Rewrite.
|
||
|
||
@menu
|
||
* xgettext Invocation:: Invoking the @code{xgettext} Program
|
||
@end menu
|
||
|
||
@node xgettext Invocation
|
||
@section Invoking the @code{xgettext} Program
|
||
|
||
@include xgettext.texi
|
||
|
||
@node Creating
|
||
@chapter Creating a New PO File
|
||
@cindex creating a new PO file
|
||
|
||
When starting a new translation, the translator creates a file called
|
||
@file{@var{LANG}.po}, as a copy of the @file{@var{package}.pot} template
|
||
file with modifications in the initial comments (at the beginning of the file)
|
||
and in the header entry (the first entry, near the beginning of the file).
|
||
|
||
The easiest way to do so is by use of the @samp{msginit} program.
|
||
For example:
|
||
|
||
@example
|
||
$ cd @var{PACKAGE}-@var{VERSION}
|
||
$ cd po
|
||
$ msginit
|
||
@end example
|
||
|
||
The alternative way is to do the copy and modifications by hand.
|
||
To do so, the translator copies @file{@var{package}.pot} to
|
||
@file{@var{LANG}.po}. Then she modifies the initial comments and
|
||
the header entry of this file.
|
||
|
||
@menu
|
||
* msginit Invocation:: Invoking the @code{msginit} Program
|
||
* Header Entry:: Filling in the Header Entry
|
||
@end menu
|
||
|
||
@node msginit Invocation
|
||
@section Invoking the @code{msginit} Program
|
||
|
||
@include msginit.texi
|
||
|
||
@node Header Entry
|
||
@section Filling in the Header Entry
|
||
@cindex header entry of a PO file
|
||
|
||
The initial comments "SOME DESCRIPTIVE TITLE", "YEAR" and
|
||
"FIRST AUTHOR <EMAIL@@ADDRESS>, YEAR" ought to be replaced by sensible
|
||
information. This can be done in any text editor; if Emacs is used
|
||
and it switched to PO mode automatically (because it has recognized
|
||
the file's suffix), you can disable it by typing @kbd{M-x fundamental-mode}.
|
||
|
||
Modifying the header entry can already be done using PO mode: in Emacs,
|
||
type @kbd{M-x po-mode RET} and then @kbd{RET} again to start editing the
|
||
entry. You should fill in the following fields.
|
||
|
||
@table @asis
|
||
@item Project-Id-Version
|
||
This is the name and version of the package. Fill it in if it has not
|
||
already been filled in by @code{xgettext}.
|
||
|
||
@item Report-Msgid-Bugs-To
|
||
This has already been filled in by @code{xgettext}. It contains an email
|
||
address or URL where you can report bugs in the untranslated strings:
|
||
|
||
@itemize -
|
||
@item Strings which are not entire sentences, see the maintainer guidelines
|
||
in @ref{Preparing Strings}.
|
||
@item Strings which use unclear terms or require additional context to be
|
||
understood.
|
||
@item Strings which make invalid assumptions about notation of date, time or
|
||
money.
|
||
@item Pluralisation problems.
|
||
@item Incorrect English spelling.
|
||
@item Incorrect formatting.
|
||
@end itemize
|
||
|
||
@item POT-Creation-Date
|
||
This has already been filled in by @code{xgettext}.
|
||
|
||
@item PO-Revision-Date
|
||
You don't need to fill this in. It will be filled by the PO file editor
|
||
when you save the file.
|
||
|
||
@item Last-Translator
|
||
Fill in your name and email address (without double quotes).
|
||
|
||
@item Language-Team
|
||
Fill in the English name of the language, and the email address or
|
||
homepage URL of the language team you are part of.
|
||
|
||
Before starting a translation, it is a good idea to get in touch with
|
||
your translation team, not only to make sure you don't do duplicated work,
|
||
but also to coordinate difficult linguistic issues.
|
||
|
||
@cindex list of translation teams, where to find
|
||
In the Free Translation Project, each translation team has its own mailing
|
||
list. The up-to-date list of teams can be found at the Free Translation
|
||
Project's homepage, @uref{https://translationproject.org/}, in the "Teams"
|
||
area.
|
||
|
||
@item Language
|
||
@c The purpose of this field is to make it possible to automatically
|
||
@c - convert PO files to translation memory,
|
||
@c - initialize a spell checker based on the PO file,
|
||
@c - perform language specific checks.
|
||
Fill in the language code of the language. This can be in one of three
|
||
forms:
|
||
|
||
@itemize -
|
||
@item
|
||
@samp{@var{ll}}, an @w{ISO 639} two-letter language code (lowercase).
|
||
See @ref{Language Codes} for the list of codes.
|
||
|
||
@item
|
||
@samp{@var{ll}_@var{CC}}, where @samp{@var{ll}} is an @w{ISO 639} two-letter
|
||
language code (lowercase) and @samp{@var{CC}} is an @w{ISO 3166} two-letter
|
||
country code (uppercase). The country code specification is not redundant:
|
||
Some languages have dialects in different countries. For example,
|
||
@samp{de_AT} is used for Austria, and @samp{pt_BR} for Brazil. The country
|
||
code serves to distinguish the dialects. See @ref{Language Codes} and
|
||
@ref{Country Codes} for the lists of codes.
|
||
|
||
@item
|
||
@samp{@var{ll}_@var{CC}@@@var{variant}}, where @samp{@var{ll}} is an
|
||
@w{ISO 639} two-letter language code (lowercase), @samp{@var{CC}} is an
|
||
@w{ISO 3166} two-letter country code (uppercase), and @samp{@var{variant}} is
|
||
a variant designator. The variant designator (lowercase) can be a script
|
||
designator, such as @samp{latin} or @samp{cyrillic}.
|
||
@end itemize
|
||
|
||
The naming convention @samp{@var{ll}_@var{CC}} is also the way locales are
|
||
named on systems based on GNU libc. But there are three important differences:
|
||
|
||
@itemize @bullet
|
||
@item
|
||
In this PO file field, but not in locale names, @samp{@var{ll}_@var{CC}}
|
||
combinations denoting a language's main dialect are abbreviated as
|
||
@samp{@var{ll}}. For example, @samp{de} is equivalent to @samp{de_DE}
|
||
(German as spoken in Germany), and @samp{pt} to @samp{pt_PT} (Portuguese as
|
||
spoken in Portugal) in this context.
|
||
|
||
@item
|
||
In this PO file field, suffixes like @samp{.@var{encoding}} are not used.
|
||
|
||
@item
|
||
In this PO file field, variant designators that are not relevant to message
|
||
translation, such as @samp{@@euro}, are not used.
|
||
@end itemize
|
||
|
||
So, if your locale name is @samp{de_DE.UTF-8}, the language specification in
|
||
PO files is just @samp{de}.
|
||
|
||
@item Content-Type
|
||
@cindex encoding of PO files
|
||
@cindex charset of PO files
|
||
Replace @samp{CHARSET} with the character encoding used for your language,
|
||
in your locale, or UTF-8. This field is needed for correct operation of the
|
||
@code{msgmerge} and @code{msgfmt} programs, as well as for users whose
|
||
locale's character encoding differs from yours (see @ref{Charset conversion}).
|
||
|
||
@cindex @code{locale} program
|
||
You get the character encoding of your locale by running the shell command
|
||
@samp{locale charmap}. If the result is @samp{C} or @samp{ANSI_X3.4-1968},
|
||
which is equivalent to @samp{ASCII} (= @samp{US-ASCII}), it means that your
|
||
locale is not correctly configured. In this case, ask your translation
|
||
team which charset to use. @samp{ASCII} is not usable for any language
|
||
except Latin.
|
||
|
||
@cindex encoding list
|
||
Because the PO files must be portable to operating systems with less advanced
|
||
internationalization facilities, the character encodings that can be used
|
||
are limited to those supported by both GNU @code{libc} and GNU
|
||
@code{libiconv}. These are:
|
||
@code{ASCII}, @code{ISO-8859-1}, @code{ISO-8859-2}, @code{ISO-8859-3},
|
||
@code{ISO-8859-4}, @code{ISO-8859-5}, @code{ISO-8859-6}, @code{ISO-8859-7},
|
||
@code{ISO-8859-8}, @code{ISO-8859-9}, @code{ISO-8859-13}, @code{ISO-8859-14},
|
||
@code{ISO-8859-15},
|
||
@code{KOI8-R}, @code{KOI8-U}, @code{KOI8-T},
|
||
@code{CP850}, @code{CP866}, @code{CP874},
|
||
@code{CP932}, @code{CP949}, @code{CP950}, @code{CP1250}, @code{CP1251},
|
||
@code{CP1252}, @code{CP1253}, @code{CP1254}, @code{CP1255}, @code{CP1256},
|
||
@code{CP1257}, @code{GB2312}, @code{EUC-JP}, @code{EUC-KR}, @code{EUC-TW},
|
||
@code{BIG5}, @code{BIG5-HKSCS}, @code{GBK}, @code{GB18030}, @code{SHIFT_JIS},
|
||
@code{JOHAB}, @code{TIS-620}, @code{VISCII}, @code{GEORGIAN-PS}, @code{UTF-8}.
|
||
|
||
@c This data is taken from glibc/localedata/SUPPORTED.
|
||
@cindex Linux
|
||
In the GNU system, the following encodings are frequently used for the
|
||
corresponding languages.
|
||
|
||
@cindex encoding for your language
|
||
@itemize
|
||
@item @code{ISO-8859-1} for
|
||
Afrikaans, Albanian, Basque, Breton, Catalan, Cornish, Danish, Dutch,
|
||
English, Estonian, Faroese, Finnish, French, Galician, German,
|
||
Greenlandic, Icelandic, Indonesian, Irish, Italian, Malay, Manx,
|
||
Norwegian, Occitan, Portuguese, Spanish, Swedish, Tagalog, Uzbek,
|
||
Walloon,
|
||
@item @code{ISO-8859-2} for
|
||
Bosnian, Croatian, Czech, Hungarian, Polish, Romanian, Serbian, Slovak,
|
||
Slovenian,
|
||
@item @code{ISO-8859-3} for Maltese,
|
||
@item @code{ISO-8859-5} for Macedonian, Serbian,
|
||
@item @code{ISO-8859-6} for Arabic,
|
||
@item @code{ISO-8859-7} for Greek,
|
||
@item @code{ISO-8859-8} for Hebrew,
|
||
@item @code{ISO-8859-9} for Turkish,
|
||
@item @code{ISO-8859-13} for Latvian, Lithuanian, Maori,
|
||
@item @code{ISO-8859-14} for Welsh,
|
||
@item @code{ISO-8859-15} for
|
||
Basque, Catalan, Dutch, English, Finnish, French, Galician, German, Irish,
|
||
Italian, Portuguese, Spanish, Swedish, Walloon,
|
||
@item @code{KOI8-R} for Russian,
|
||
@item @code{KOI8-U} for Ukrainian,
|
||
@item @code{KOI8-T} for Tajik,
|
||
@item @code{CP1251} for Bulgarian, Belarusian,
|
||
@item @code{GB2312}, @code{GBK}, @code{GB18030}
|
||
for simplified writing of Chinese,
|
||
@item @code{BIG5}, @code{BIG5-HKSCS}
|
||
for traditional writing of Chinese,
|
||
@item @code{EUC-JP} for Japanese,
|
||
@item @code{EUC-KR} for Korean,
|
||
@item @code{TIS-620} for Thai,
|
||
@item @code{GEORGIAN-PS} for Georgian,
|
||
@item @code{UTF-8} for any language, including those listed above.
|
||
@end itemize
|
||
|
||
@cindex quote characters, use in PO files
|
||
@cindex quotation marks
|
||
When single quote characters or double quote characters are used in
|
||
translations for your language, and your locale's encoding is one of the
|
||
ISO-8859-* charsets, it is best if you create your PO files in UTF-8
|
||
encoding, instead of your locale's encoding. This is because in UTF-8
|
||
the real quote characters can be represented (single quote characters:
|
||
U+2018, U+2019, double quote characters: U+201C, U+201D), whereas none of
|
||
ISO-8859-* charsets has them all. Users in UTF-8 locales will see the
|
||
real quote characters, whereas users in ISO-8859-* locales will see the
|
||
vertical apostrophe and the vertical double quote instead (because that's
|
||
what the character set conversion will transliterate them to).
|
||
|
||
@cindex @code{xmodmap} program, and typing quotation marks
|
||
To enter such quote characters under X11, you can change your keyboard
|
||
mapping using the @code{xmodmap} program. The X11 names of the quote
|
||
characters are "leftsinglequotemark", "rightsinglequotemark",
|
||
"leftdoublequotemark", "rightdoublequotemark", "singlelowquotemark",
|
||
"doublelowquotemark".
|
||
|
||
Note that only recent versions of GNU Emacs support the UTF-8 encoding:
|
||
Emacs 20 with Mule-UCS, and Emacs 21. As of January 2001, XEmacs doesn't
|
||
support the UTF-8 encoding.
|
||
|
||
The character encoding name can be written in either upper or lower case.
|
||
Usually upper case is preferred.
|
||
|
||
@item Content-Transfer-Encoding
|
||
Set this to @code{8bit}.
|
||
|
||
@item Plural-Forms
|
||
This field is optional. It is only needed if the PO file has plural forms.
|
||
You can find them by searching for the @samp{msgid_plural} keyword. The
|
||
format of the plural forms field is described in @ref{Plural forms} and
|
||
@ref{Translating plural forms}.
|
||
@end table
|
||
|
||
@node Updating
|
||
@chapter Updating Existing PO Files
|
||
|
||
@menu
|
||
* msgmerge Invocation:: Invoking the @code{msgmerge} Program
|
||
@end menu
|
||
|
||
@node msgmerge Invocation
|
||
@section Invoking the @code{msgmerge} Program
|
||
|
||
@include msgmerge.texi
|
||
|
||
@node Editing
|
||
@chapter Editing PO Files
|
||
@cindex Editing PO Files
|
||
|
||
@menu
|
||
* KBabel:: KDE's PO File Editor
|
||
* Gtranslator:: GNOME's PO File Editor
|
||
* PO Mode:: Emacs's PO File Editor
|
||
* Compendium:: Using Translation Compendia
|
||
@end menu
|
||
|
||
@node KBabel
|
||
@section KDE's PO File Editor
|
||
@cindex KDE PO file editor
|
||
|
||
@node Gtranslator
|
||
@section GNOME's PO File Editor
|
||
@cindex GNOME PO file editor
|
||
|
||
@node PO Mode
|
||
@section Emacs's PO File Editor
|
||
@cindex Emacs PO Mode
|
||
|
||
@c FIXME: Rewrite.
|
||
|
||
For those of you being
|
||
the lucky users of Emacs, PO mode has been specifically created
|
||
for providing a cozy environment for editing or modifying PO files.
|
||
While editing a PO file, PO mode allows for the easy browsing of
|
||
auxiliary and compendium PO files, as well as for following references into
|
||
the set of C program sources from which PO files have been derived.
|
||
It has a few special features, among which are the interactive marking
|
||
of program strings as translatable, and the validation of PO files
|
||
with easy repositioning to PO file lines showing errors.
|
||
|
||
For the beginning, besides main PO mode commands
|
||
(@pxref{Main PO Commands}), you should know how to move between entries
|
||
(@pxref{Entry Positioning}), and how to handle untranslated entries
|
||
(@pxref{Untranslated Entries}).
|
||
|
||
@menu
|
||
* Installation:: Completing GNU @code{gettext} Installation
|
||
* Main PO Commands:: Main Commands
|
||
* Entry Positioning:: Entry Positioning
|
||
* Normalizing:: Normalizing Strings in Entries
|
||
* Translated Entries:: Translated Entries
|
||
* Fuzzy Entries:: Fuzzy Entries
|
||
* Untranslated Entries:: Untranslated Entries
|
||
* Obsolete Entries:: Obsolete Entries
|
||
* Modifying Translations:: Modifying Translations
|
||
* Modifying Comments:: Modifying Comments
|
||
* Subedit:: Mode for Editing Translations
|
||
* C Sources Context:: C Sources Context
|
||
* Auxiliary:: Consulting Auxiliary PO Files
|
||
@end menu
|
||
|
||
@node Installation
|
||
@subsection Completing GNU @code{gettext} Installation
|
||
|
||
@cindex installing @code{gettext}
|
||
@cindex @code{gettext} installation
|
||
Once you have received, unpacked, configured and compiled the GNU
|
||
@code{gettext} distribution, the @samp{make install} command puts in
|
||
place the programs @code{xgettext}, @code{msgfmt}, @code{gettext}, and
|
||
@code{msgmerge}, as well as their available message catalogs. To
|
||
top off a comfortable installation, you might also want to make the
|
||
PO mode available to your Emacs users.
|
||
|
||
@emindex @file{.emacs} customizations
|
||
@emindex installing PO mode
|
||
During the installation of the PO mode, you might want to modify your
|
||
file @file{.emacs}, once and for all, so it contains a few lines looking
|
||
like:
|
||
|
||
@example
|
||
(setq auto-mode-alist
|
||
(cons '("\\.po\\'\\|\\.po\\." . po-mode) auto-mode-alist))
|
||
(autoload 'po-mode "po-mode" "Major mode for translators to edit PO files" t)
|
||
@end example
|
||
|
||
Later, whenever you edit some @file{.po}
|
||
file, or any file having the string @samp{.po.} within its name,
|
||
Emacs loads @file{po-mode.elc} (or @file{po-mode.el}) as needed, and
|
||
automatically activates PO mode commands for the associated buffer.
|
||
The string @emph{PO} appears in the mode line for any buffer for
|
||
which PO mode is active. Many PO files may be active at once in a
|
||
single Emacs session.
|
||
|
||
If you are using Emacs version 20 or newer, and have already installed
|
||
the appropriate international fonts on your system, you may also tell
|
||
Emacs how to determine automatically the coding system of every PO file.
|
||
This will often (but not always) cause the necessary fonts to be loaded
|
||
and used for displaying the translations on your Emacs screen. For this
|
||
to happen, add the lines:
|
||
|
||
@example
|
||
(modify-coding-system-alist 'file "\\.po\\'\\|\\.po\\."
|
||
'po-find-file-coding-system)
|
||
(autoload 'po-find-file-coding-system "po-mode")
|
||
@end example
|
||
|
||
@noindent
|
||
to your @file{.emacs} file. If, with this, you still see boxes instead
|
||
of international characters, try a different font set (via Shift Mouse
|
||
button 1).
|
||
|
||
@node Main PO Commands
|
||
@subsection Main PO mode Commands
|
||
|
||
@cindex PO mode (Emacs) commands
|
||
@emindex commands
|
||
After setting up Emacs with something similar to the lines in
|
||
@ref{Installation}, PO mode is activated for a window when Emacs finds a
|
||
PO file in that window. This puts the window read-only and establishes a
|
||
po-mode-map, which is a genuine Emacs mode, in a way that is not derived
|
||
from text mode in any way. Functions found on @code{po-mode-hook},
|
||
if any, will be executed.
|
||
|
||
When PO mode is active in a window, the letters @samp{PO} appear
|
||
in the mode line for that window. The mode line also displays how
|
||
many entries of each kind are held in the PO file. For example,
|
||
the string @samp{132t+3f+10u+2o} would tell the translator that the
|
||
PO mode contains 132 translated entries (@pxref{Translated Entries},
|
||
3 fuzzy entries (@pxref{Fuzzy Entries}), 10 untranslated entries
|
||
(@pxref{Untranslated Entries}) and 2 obsolete entries (@pxref{Obsolete
|
||
Entries}). Zero-coefficients items are not shown. So, in this example, if
|
||
the fuzzy entries were unfuzzied, the untranslated entries were translated
|
||
and the obsolete entries were deleted, the mode line would merely display
|
||
@samp{145t} for the counters.
|
||
|
||
The main PO commands are those which do not fit into the other categories of
|
||
subsequent sections. These allow for quitting PO mode or for managing windows
|
||
in special ways.
|
||
|
||
@table @kbd
|
||
@item _
|
||
@efindex _@r{, PO Mode command}
|
||
Undo last modification to the PO file (@code{po-undo}).
|
||
|
||
@item Q
|
||
@efindex Q@r{, PO Mode command}
|
||
Quit processing and save the PO file (@code{po-quit}).
|
||
|
||
@item q
|
||
@efindex q@r{, PO Mode command}
|
||
Quit processing, possibly after confirmation (@code{po-confirm-and-quit}).
|
||
|
||
@item 0
|
||
@efindex 0@r{, PO Mode command}
|
||
Temporary leave the PO file window (@code{po-other-window}).
|
||
|
||
@item ?
|
||
@itemx h
|
||
@efindex ?@r{, PO Mode command}
|
||
@efindex h@r{, PO Mode command}
|
||
Show help about PO mode (@code{po-help}).
|
||
|
||
@item =
|
||
@efindex =@r{, PO Mode command}
|
||
Give some PO file statistics (@code{po-statistics}).
|
||
|
||
@item V
|
||
@efindex V@r{, PO Mode command}
|
||
Batch validate the format of the whole PO file (@code{po-validate}).
|
||
|
||
@end table
|
||
|
||
@efindex _@r{, PO Mode command}
|
||
@efindex po-undo@r{, PO Mode command}
|
||
The command @kbd{_} (@code{po-undo}) interfaces to the Emacs
|
||
@emph{undo} facility. @xref{Undo, , Undoing Changes, emacs, The Emacs
|
||
Editor}. Each time @kbd{_} is typed, modifications which the translator
|
||
did to the PO file are undone a little more. For the purpose of
|
||
undoing, each PO mode command is atomic. This is especially true for
|
||
the @kbd{@key{RET}} command: the whole edition made by using a single
|
||
use of this command is undone at once, even if the edition itself
|
||
implied several actions. However, while in the editing window, one
|
||
can undo the edition work quite parsimoniously.
|
||
|
||
@efindex Q@r{, PO Mode command}
|
||
@efindex q@r{, PO Mode command}
|
||
@efindex po-quit@r{, PO Mode command}
|
||
@efindex po-confirm-and-quit@r{, PO Mode command}
|
||
The commands @kbd{Q} (@code{po-quit}) and @kbd{q}
|
||
(@code{po-confirm-and-quit}) are used when the translator is done with the
|
||
PO file. The former is a bit less verbose than the latter. If the file
|
||
has been modified, it is saved to disk first. In both cases, and prior to
|
||
all this, the commands check if any untranslated messages remain in the
|
||
PO file and, if so, the translator is asked if she really wants to leave
|
||
off working with this PO file. This is the preferred way of getting rid
|
||
of an Emacs PO file buffer. Merely killing it through the usual command
|
||
@w{@kbd{C-x k}} (@code{kill-buffer}) is not the tidiest way to proceed.
|
||
|
||
@efindex 0@r{, PO Mode command}
|
||
@efindex po-other-window@r{, PO Mode command}
|
||
The command @kbd{0} (@code{po-other-window}) is another, softer way,
|
||
to leave PO mode, temporarily. It just moves the cursor to some other
|
||
Emacs window, and pops one if necessary. For example, if the translator
|
||
just got PO mode to show some source context in some other, she might
|
||
discover some apparent bug in the program source that needs correction.
|
||
This command allows the translator to change sex, become a programmer,
|
||
and have the cursor right into the window containing the program she
|
||
(or rather @emph{he}) wants to modify. By later getting the cursor back
|
||
in the PO file window, or by asking Emacs to edit this file once again,
|
||
PO mode is then recovered.
|
||
|
||
@efindex ?@r{, PO Mode command}
|
||
@efindex h@r{, PO Mode command}
|
||
@efindex po-help@r{, PO Mode command}
|
||
The command @kbd{h} (@code{po-help}) displays a summary of all available PO
|
||
mode commands. The translator should then type any character to resume
|
||
normal PO mode operations. The command @kbd{?} has the same effect
|
||
as @kbd{h}.
|
||
|
||
@efindex =@r{, PO Mode command}
|
||
@efindex po-statistics@r{, PO Mode command}
|
||
The command @kbd{=} (@code{po-statistics}) computes the total number of
|
||
entries in the PO file, the ordinal of the current entry (counted from
|
||
1), the number of untranslated entries, the number of obsolete entries,
|
||
and displays all these numbers.
|
||
|
||
@efindex V@r{, PO Mode command}
|
||
@efindex po-validate@r{, PO Mode command}
|
||
The command @kbd{V} (@code{po-validate}) launches @code{msgfmt} in
|
||
checking and verbose
|
||
mode over the current PO file. This command first offers to save the
|
||
current PO file on disk. The @code{msgfmt} tool, from GNU @code{gettext},
|
||
has the purpose of creating a MO file out of a PO file, and PO mode uses
|
||
the features of this program for checking the overall format of a PO file,
|
||
as well as all individual entries.
|
||
|
||
@efindex next-error@r{, stepping through PO file validation results}
|
||
The program @code{msgfmt} runs asynchronously with Emacs, so the
|
||
translator regains control immediately while her PO file is being studied.
|
||
Error output is collected in the Emacs @samp{*compilation*} buffer,
|
||
displayed in another window. The regular Emacs command @kbd{C-x`}
|
||
(@code{next-error}), as well as other usual compile commands, allow the
|
||
translator to reposition quickly to the offending parts of the PO file.
|
||
Once the cursor is on the line in error, the translator may decide on
|
||
any PO mode action which would help correcting the error.
|
||
|
||
@node Entry Positioning
|
||
@subsection Entry Positioning
|
||
|
||
@emindex current entry of a PO file
|
||
The cursor in a PO file window is almost always part of
|
||
an entry. The only exceptions are the special case when the cursor
|
||
is after the last entry in the file, or when the PO file is
|
||
empty. The entry where the cursor is found to be is said to be the
|
||
current entry. Many PO mode commands operate on the current entry,
|
||
so moving the cursor does more than allowing the translator to browse
|
||
the PO file, this also selects on which entry commands operate.
|
||
|
||
@emindex moving through a PO file
|
||
Some PO mode commands alter the position of the cursor in a specialized
|
||
way. A few of those special purpose positioning are described here,
|
||
the others are described in following sections (for a complete list try
|
||
@kbd{C-h m}):
|
||
|
||
@table @kbd
|
||
|
||
@item .
|
||
@efindex .@r{, PO Mode command}
|
||
Redisplay the current entry (@code{po-current-entry}).
|
||
|
||
@item n
|
||
@efindex n@r{, PO Mode command}
|
||
Select the entry after the current one (@code{po-next-entry}).
|
||
|
||
@item p
|
||
@efindex p@r{, PO Mode command}
|
||
Select the entry before the current one (@code{po-previous-entry}).
|
||
|
||
@item <
|
||
@efindex <@r{, PO Mode command}
|
||
Select the first entry in the PO file (@code{po-first-entry}).
|
||
|
||
@item >
|
||
@efindex >@r{, PO Mode command}
|
||
Select the last entry in the PO file (@code{po-last-entry}).
|
||
|
||
@item m
|
||
@efindex m@r{, PO Mode command}
|
||
Record the location of the current entry for later use
|
||
(@code{po-push-location}).
|
||
|
||
@item r
|
||
@efindex r@r{, PO Mode command}
|
||
Return to a previously saved entry location (@code{po-pop-location}).
|
||
|
||
@item x
|
||
@efindex x@r{, PO Mode command}
|
||
Exchange the current entry location with the previously saved one
|
||
(@code{po-exchange-location}).
|
||
|
||
@end table
|
||
|
||
@efindex .@r{, PO Mode command}
|
||
@efindex po-current-entry@r{, PO Mode command}
|
||
Any Emacs command able to reposition the cursor may be used
|
||
to select the current entry in PO mode, including commands which
|
||
move by characters, lines, paragraphs, screens or pages, and search
|
||
commands. However, there is a kind of standard way to display the
|
||
current entry in PO mode, which usual Emacs commands moving
|
||
the cursor do not especially try to enforce. The command @kbd{.}
|
||
(@code{po-current-entry}) has the sole purpose of redisplaying the
|
||
current entry properly, after the current entry has been changed by
|
||
means external to PO mode, or the Emacs screen otherwise altered.
|
||
|
||
It is yet to be decided if PO mode helps the translator, or otherwise
|
||
irritates her, by forcing a rigid window disposition while she
|
||
is doing her work. We originally had quite precise ideas about
|
||
how windows should behave, but on the other hand, anyone used to
|
||
Emacs is often happy to keep full control. Maybe a fixed window
|
||
disposition might be offered as a PO mode option that the translator
|
||
might activate or deactivate at will, so it could be offered on an
|
||
experimental basis. If nobody feels a real need for using it, or
|
||
a compulsion for writing it, we should drop this whole idea.
|
||
The incentive for doing it should come from translators rather than
|
||
programmers, as opinions from an experienced translator are surely
|
||
more worth to me than opinions from programmers @emph{thinking} about
|
||
how @emph{others} should do translation.
|
||
|
||
@efindex n@r{, PO Mode command}
|
||
@efindex po-next-entry@r{, PO Mode command}
|
||
@efindex p@r{, PO Mode command}
|
||
@efindex po-previous-entry@r{, PO Mode command}
|
||
The commands @kbd{n} (@code{po-next-entry}) and @kbd{p}
|
||
(@code{po-previous-entry}) move the cursor the entry following,
|
||
or preceding, the current one. If @kbd{n} is given while the
|
||
cursor is on the last entry of the PO file, or if @kbd{p}
|
||
is given while the cursor is on the first entry, no move is done.
|
||
|
||
@efindex <@r{, PO Mode command}
|
||
@efindex po-first-entry@r{, PO Mode command}
|
||
@efindex >@r{, PO Mode command}
|
||
@efindex po-last-entry@r{, PO Mode command}
|
||
The commands @kbd{<} (@code{po-first-entry}) and @kbd{>}
|
||
(@code{po-last-entry}) move the cursor to the first entry, or last
|
||
entry, of the PO file. When the cursor is located past the last
|
||
entry in a PO file, most PO mode commands will return an error saying
|
||
@samp{After last entry}. Moreover, the commands @kbd{<} and @kbd{>}
|
||
have the special property of being able to work even when the cursor
|
||
is not into some PO file entry, and one may use them for nicely
|
||
correcting this situation. But even these commands will fail on a
|
||
truly empty PO file. There are development plans for the PO mode for it
|
||
to interactively fill an empty PO file from sources. @xref{Marking}.
|
||
|
||
The translator may decide, before working at the translation of
|
||
a particular entry, that she needs to browse the remainder of the
|
||
PO file, maybe for finding the terminology or phraseology used
|
||
in related entries. She can of course use the standard Emacs idioms
|
||
for saving the current cursor location in some register, and use that
|
||
register for getting back, or else, use the location ring.
|
||
|
||
@efindex m@r{, PO Mode command}
|
||
@efindex po-push-location@r{, PO Mode command}
|
||
@efindex r@r{, PO Mode command}
|
||
@efindex po-pop-location@r{, PO Mode command}
|
||
PO mode offers another approach, by which cursor locations may be saved
|
||
onto a special stack. The command @kbd{m} (@code{po-push-location})
|
||
merely adds the location of current entry to the stack, pushing
|
||
the already saved locations under the new one. The command
|
||
@kbd{r} (@code{po-pop-location}) consumes the top stack element and
|
||
repositions the cursor to the entry associated with that top element.
|
||
This position is then lost, for the next @kbd{r} will move the cursor
|
||
to the previously saved location, and so on until no locations remain
|
||
on the stack.
|
||
|
||
If the translator wants the position to be kept on the location stack,
|
||
maybe for taking a look at the entry associated with the top
|
||
element, then go elsewhere with the intent of getting back later, she
|
||
ought to use @kbd{m} immediately after @kbd{r}.
|
||
|
||
@efindex x@r{, PO Mode command}
|
||
@efindex po-exchange-location@r{, PO Mode command}
|
||
The command @kbd{x} (@code{po-exchange-location}) simultaneously
|
||
repositions the cursor to the entry associated with the top element of
|
||
the stack of saved locations, and replaces that top element with the
|
||
location of the current entry before the move. Consequently, repeating
|
||
the @kbd{x} command toggles alternatively between two entries.
|
||
For achieving this, the translator will position the cursor on the
|
||
first entry, use @kbd{m}, then position to the second entry, and
|
||
merely use @kbd{x} for making the switch.
|
||
|
||
@node Normalizing
|
||
@subsection Normalizing Strings in Entries
|
||
@cindex string normalization in entries
|
||
|
||
There are many different ways for encoding a particular string into a
|
||
PO file entry, because there are so many different ways to split and
|
||
quote multi-line strings, and even, to represent special characters
|
||
by backslashed escaped sequences. Some features of PO mode rely on
|
||
the ability for PO mode to scan an already existing PO file for a
|
||
particular string encoded into the @code{msgid} field of some entry.
|
||
Even if PO mode has internally all the built-in machinery for
|
||
implementing this recognition easily, doing it fast is technically
|
||
difficult. To facilitate a solution to this efficiency problem,
|
||
we decided on a canonical representation for strings.
|
||
|
||
A conventional representation of strings in a PO file is currently
|
||
under discussion, and PO mode experiments with a canonical representation.
|
||
Having both @code{xgettext} and PO mode converging towards a uniform
|
||
way of representing equivalent strings would be useful, as the internal
|
||
normalization needed by PO mode could be automatically satisfied
|
||
when using @code{xgettext} from GNU @code{gettext}. An explicit
|
||
PO mode normalization should then be only necessary for PO files
|
||
imported from elsewhere, or for when the convention itself evolves.
|
||
|
||
So, for achieving normalization of at least the strings of a given
|
||
PO file needing a canonical representation, the following PO mode
|
||
command is available:
|
||
|
||
@emindex string normalization in entries
|
||
@table @kbd
|
||
@item M-x po-normalize
|
||
@efindex po-normalize@r{, PO Mode command}
|
||
Tidy the whole PO file by making entries more uniform.
|
||
|
||
@end table
|
||
|
||
The special command @kbd{M-x po-normalize}, which has no associated
|
||
keys, revises all entries, ensuring that strings of both original
|
||
and translated entries use uniform internal quoting in the PO file.
|
||
It also removes any crumb after the last entry. This command may be
|
||
useful for PO files freshly imported from elsewhere, or if we ever
|
||
improve on the canonical quoting format we use. This canonical format
|
||
is not only meant for getting cleaner PO files, but also for greatly
|
||
speeding up @code{msgid} string lookup for some other PO mode commands.
|
||
|
||
@kbd{M-x po-normalize} presently makes three passes over the entries.
|
||
The first implements heuristics for converting PO files for GNU
|
||
@code{gettext} 0.6 and earlier, in which @code{msgid} and @code{msgstr}
|
||
fields were using K&R style C string syntax for multi-line strings.
|
||
These heuristics may fail for comments not related to obsolete
|
||
entries and ending with a backslash; they also depend on subsequent
|
||
passes for finalizing the proper commenting of continued lines for
|
||
obsolete entries. This first pass might disappear once all oldish PO
|
||
files would have been adjusted. The second and third pass normalize
|
||
all @code{msgid} and @code{msgstr} strings respectively. They also
|
||
clean out those trailing backslashes used by XView's @code{msgfmt}
|
||
for continued lines.
|
||
|
||
@cindex importing PO files
|
||
Having such an explicit normalizing command allows for importing PO
|
||
files from other sources, but also eases the evolution of the current
|
||
convention, evolution driven mostly by aesthetic concerns, as of now.
|
||
It is easy to make suggested adjustments at a later time, as the
|
||
normalizing command and eventually, other GNU @code{gettext} tools
|
||
should greatly automate conformance. A description of the canonical
|
||
string format is given below, for the particular benefit of those not
|
||
having Emacs handy, and who would nevertheless want to handcraft
|
||
their PO files in nice ways.
|
||
|
||
@cindex multi-line strings
|
||
Right now, in PO mode, strings are single line or multi-line. A string
|
||
goes multi-line if and only if it has @emph{embedded} newlines, that
|
||
is, if it matches @samp{[^\n]\n+[^\n]}. So, we would have:
|
||
|
||
@example
|
||
msgstr "\n\nHello, world!\n\n\n"
|
||
@end example
|
||
|
||
but, replacing the space by a newline, this becomes:
|
||
|
||
@example
|
||
msgstr ""
|
||
"\n"
|
||
"\n"
|
||
"Hello,\n"
|
||
"world!\n"
|
||
"\n"
|
||
"\n"
|
||
@end example
|
||
|
||
We are deliberately using a caricatural example, here, to make the
|
||
point clearer. Usually, multi-lines are not that bad looking.
|
||
It is probable that we will implement the following suggestion.
|
||
We might lump together all initial newlines into the empty string,
|
||
and also all newlines introducing empty lines (that is, for @w{@var{n}
|
||
> 1}, the @var{n}-1'th last newlines would go together on a separate
|
||
string), so making the previous example appear:
|
||
|
||
@example
|
||
msgstr "\n\n"
|
||
"Hello,\n"
|
||
"world!\n"
|
||
"\n\n"
|
||
@end example
|
||
|
||
There are a few yet undecided little points about string normalization,
|
||
to be documented in this manual, once these questions settle.
|
||
|
||
@node Translated Entries
|
||
@subsection Translated Entries
|
||
@cindex translated entries
|
||
|
||
Each PO file entry for which the @code{msgstr} field has been filled with
|
||
a translation, and which is not marked as fuzzy (@pxref{Fuzzy Entries}),
|
||
is said to be a @dfn{translated} entry. Only translated entries will
|
||
later be compiled by GNU @code{msgfmt} and become usable in programs.
|
||
Other entry types will be excluded; translation will not occur for them.
|
||
|
||
@emindex moving by translated entries
|
||
Some commands are more specifically related to translated entry processing.
|
||
|
||
@table @kbd
|
||
@item t
|
||
@efindex t@r{, PO Mode command}
|
||
Find the next translated entry (@code{po-next-translated-entry}).
|
||
|
||
@item T
|
||
@efindex T@r{, PO Mode command}
|
||
Find the previous translated entry (@code{po-previous-translated-entry}).
|
||
|
||
@end table
|
||
|
||
@efindex t@r{, PO Mode command}
|
||
@efindex po-next-translated-entry@r{, PO Mode command}
|
||
@efindex T@r{, PO Mode command}
|
||
@efindex po-previous-translated-entry@r{, PO Mode command}
|
||
The commands @kbd{t} (@code{po-next-translated-entry}) and @kbd{T}
|
||
(@code{po-previous-translated-entry}) move forwards or backwards, chasing
|
||
for an translated entry. If none is found, the search is extended and
|
||
wraps around in the PO file buffer.
|
||
|
||
@evindex po-auto-fuzzy-on-edit@r{, PO Mode variable}
|
||
Translated entries usually result from the translator having edited in
|
||
a translation for them, @ref{Modifying Translations}. However, if the
|
||
variable @code{po-auto-fuzzy-on-edit} is not @code{nil}, the entry having
|
||
received a new translation first becomes a fuzzy entry, which ought to
|
||
be later unfuzzied before becoming an official, genuine translated entry.
|
||
@xref{Fuzzy Entries}.
|
||
|
||
@node Fuzzy Entries
|
||
@subsection Fuzzy Entries
|
||
@cindex fuzzy entries
|
||
|
||
@cindex attributes of a PO file entry
|
||
@cindex attribute, fuzzy
|
||
Each PO file entry may have a set of @dfn{attributes}, which are
|
||
qualities given a name and explicitly associated with the translation,
|
||
using a special system comment. One of these attributes
|
||
has the name @code{fuzzy}, and entries having this attribute are said
|
||
to have a fuzzy translation. They are called fuzzy entries, for short.
|
||
|
||
Fuzzy entries, even if they account for translated entries for
|
||
most other purposes, usually call for revision by the translator.
|
||
Those may be produced by applying the program @code{msgmerge} to
|
||
update an older translated PO files according to a new PO template
|
||
file, when this tool hypothesises that some new @code{msgid} has
|
||
been modified only slightly out of an older one, and chooses to pair
|
||
what it thinks to be the old translation for the new modified entry.
|
||
The slight alteration in the original string (the @code{msgid} string)
|
||
should often be reflected in the translated string, and this requires
|
||
the intervention of the translator. For this reason, @code{msgmerge}
|
||
might mark some entries as being fuzzy.
|
||
|
||
@emindex moving by fuzzy entries
|
||
Also, the translator may decide herself to mark an entry as fuzzy
|
||
for her own convenience, when she wants to remember that the entry
|
||
has to be later revisited. So, some commands are more specifically
|
||
related to fuzzy entry processing.
|
||
|
||
@table @kbd
|
||
@item f
|
||
@efindex f@r{, PO Mode command}
|
||
@c better append "-entry" all the time. -ke-
|
||
Find the next fuzzy entry (@code{po-next-fuzzy-entry}).
|
||
|
||
@item F
|
||
@efindex F@r{, PO Mode command}
|
||
Find the previous fuzzy entry (@code{po-previous-fuzzy-entry}).
|
||
|
||
@item @key{TAB}
|
||
@efindex TAB@r{, PO Mode command}
|
||
Remove the fuzzy attribute of the current entry (@code{po-unfuzzy}).
|
||
|
||
@end table
|
||
|
||
@efindex f@r{, PO Mode command}
|
||
@efindex po-next-fuzzy-entry@r{, PO Mode command}
|
||
@efindex F@r{, PO Mode command}
|
||
@efindex po-previous-fuzzy-entry@r{, PO Mode command}
|
||
The commands @kbd{f} (@code{po-next-fuzzy-entry}) and @kbd{F}
|
||
(@code{po-previous-fuzzy-entry}) move forwards or backwards, chasing for
|
||
a fuzzy entry. If none is found, the search is extended and wraps
|
||
around in the PO file buffer.
|
||
|
||
@efindex TAB@r{, PO Mode command}
|
||
@efindex po-unfuzzy@r{, PO Mode command}
|
||
@evindex po-auto-select-on-unfuzzy@r{, PO Mode variable}
|
||
The command @kbd{@key{TAB}} (@code{po-unfuzzy}) removes the fuzzy
|
||
attribute associated with an entry, usually leaving it translated.
|
||
Further, if the variable @code{po-auto-select-on-unfuzzy} has not
|
||
the @code{nil} value, the @kbd{@key{TAB}} command will automatically chase
|
||
for another interesting entry to work on. The initial value of
|
||
@code{po-auto-select-on-unfuzzy} is @code{nil}.
|
||
|
||
The initial value of @code{po-auto-fuzzy-on-edit} is @code{nil}. However,
|
||
if the variable @code{po-auto-fuzzy-on-edit} is set to @code{t}, any entry
|
||
edited through the @kbd{@key{RET}} command is marked fuzzy, as a way to
|
||
ensure some kind of double check, later. In this case, the usual paradigm
|
||
is that an entry becomes fuzzy (if not already) whenever the translator
|
||
modifies it. If she is satisfied with the translation, she then uses
|
||
@kbd{@key{TAB}} to pick another entry to work on, clearing the fuzzy attribute
|
||
on the same blow. If she is not satisfied yet, she merely uses @kbd{@key{SPC}}
|
||
to chase another entry, leaving the entry fuzzy.
|
||
|
||
@efindex DEL@r{, PO Mode command}
|
||
@efindex po-fade-out-entry@r{, PO Mode command}
|
||
The translator may also use the @kbd{@key{DEL}} command
|
||
(@code{po-fade-out-entry}) over any translated entry to mark it as being
|
||
fuzzy, when she wants to easily leave a trace she wants to later return
|
||
working at this entry.
|
||
|
||
Also, when time comes to quit working on a PO file buffer with the @kbd{q}
|
||
command, the translator is asked for confirmation, if fuzzy string
|
||
still exists.
|
||
|
||
@node Untranslated Entries
|
||
@subsection Untranslated Entries
|
||
@cindex untranslated entries
|
||
|
||
When @code{xgettext} originally creates a PO file, unless told
|
||
otherwise, it initializes the @code{msgid} field with the untranslated
|
||
string, and leaves the @code{msgstr} string to be empty. Such entries,
|
||
having an empty translation, are said to be @dfn{untranslated} entries.
|
||
Later, when the programmer slightly modifies some string right in
|
||
the program, this change is later reflected in the PO file
|
||
by the appearance of a new untranslated entry for the modified string.
|
||
|
||
The usual commands moving from entry to entry consider untranslated
|
||
entries on the same level as active entries. Untranslated entries
|
||
are easily recognizable by the fact they end with @w{@samp{msgstr ""}}.
|
||
|
||
@emindex moving by untranslated entries
|
||
The work of the translator might be (quite naively) seen as the process
|
||
of seeking for an untranslated entry, editing a translation for
|
||
it, and repeating these actions until no untranslated entries remain.
|
||
Some commands are more specifically related to untranslated entry
|
||
processing.
|
||
|
||
@table @kbd
|
||
@item u
|
||
@efindex u@r{, PO Mode command}
|
||
Find the next untranslated entry (@code{po-next-untranslated-entry}).
|
||
|
||
@item U
|
||
@efindex U@r{, PO Mode command}
|
||
Find the previous untranslated entry (@code{po-previous-untransted-entry}).
|
||
|
||
@item k
|
||
@efindex k@r{, PO Mode command}
|
||
Turn the current entry into an untranslated one (@code{po-kill-msgstr}).
|
||
|
||
@end table
|
||
|
||
@efindex u@r{, PO Mode command}
|
||
@efindex po-next-untranslated-entry@r{, PO Mode command}
|
||
@efindex U@r{, PO Mode command}
|
||
@efindex po-previous-untransted-entry@r{, PO Mode command}
|
||
The commands @kbd{u} (@code{po-next-untranslated-entry}) and @kbd{U}
|
||
(@code{po-previous-untransted-entry}) move forwards or backwards,
|
||
chasing for an untranslated entry. If none is found, the search is
|
||
extended and wraps around in the PO file buffer.
|
||
|
||
@efindex k@r{, PO Mode command}
|
||
@efindex po-kill-msgstr@r{, PO Mode command}
|
||
An entry can be turned back into an untranslated entry by
|
||
merely emptying its translation, using the command @kbd{k}
|
||
(@code{po-kill-msgstr}). @xref{Modifying Translations}.
|
||
|
||
Also, when time comes to quit working on a PO file buffer
|
||
with the @kbd{q} command, the translator is asked for confirmation,
|
||
if some untranslated string still exists.
|
||
|
||
@node Obsolete Entries
|
||
@subsection Obsolete Entries
|
||
@cindex obsolete entries
|
||
|
||
By @dfn{obsolete} PO file entries, we mean those entries which are
|
||
commented out, usually by @code{msgmerge} when it found that the
|
||
translation is not needed anymore by the package being localized.
|
||
|
||
The usual commands moving from entry to entry consider obsolete
|
||
entries on the same level as active entries. Obsolete entries are
|
||
easily recognizable by the fact that all their lines start with
|
||
@code{#}, even those lines containing @code{msgid} or @code{msgstr}.
|
||
|
||
Commands exist for emptying the translation or reinitializing it
|
||
to the original untranslated string. Commands interfacing with the
|
||
kill ring may force some previously saved text into the translation.
|
||
The user may interactively edit the translation. All these commands
|
||
may apply to obsolete entries, carefully leaving the entry obsolete
|
||
after the fact.
|
||
|
||
@emindex moving by obsolete entries
|
||
Moreover, some commands are more specifically related to obsolete
|
||
entry processing.
|
||
|
||
@table @kbd
|
||
@item o
|
||
@efindex o@r{, PO Mode command}
|
||
Find the next obsolete entry (@code{po-next-obsolete-entry}).
|
||
|
||
@item O
|
||
@efindex O@r{, PO Mode command}
|
||
Find the previous obsolete entry (@code{po-previous-obsolete-entry}).
|
||
|
||
@item @key{DEL}
|
||
@efindex DEL@r{, PO Mode command}
|
||
Make an active entry obsolete, or zap out an obsolete entry
|
||
(@code{po-fade-out-entry}).
|
||
|
||
@end table
|
||
|
||
@efindex o@r{, PO Mode command}
|
||
@efindex po-next-obsolete-entry@r{, PO Mode command}
|
||
@efindex O@r{, PO Mode command}
|
||
@efindex po-previous-obsolete-entry@r{, PO Mode command}
|
||
The commands @kbd{o} (@code{po-next-obsolete-entry}) and @kbd{O}
|
||
(@code{po-previous-obsolete-entry}) move forwards or backwards,
|
||
chasing for an obsolete entry. If none is found, the search is
|
||
extended and wraps around in the PO file buffer.
|
||
|
||
PO mode does not provide ways for un-commenting an obsolete entry
|
||
and making it active, because this would reintroduce an original
|
||
untranslated string which does not correspond to any marked string
|
||
in the program sources. This goes with the philosophy of never
|
||
introducing useless @code{msgid} values.
|
||
|
||
@efindex DEL@r{, PO Mode command}
|
||
@efindex po-fade-out-entry@r{, PO Mode command}
|
||
@emindex obsolete active entry
|
||
@emindex comment out PO file entry
|
||
However, it is possible to comment out an active entry, so making
|
||
it obsolete. GNU @code{gettext} utilities will later react to the
|
||
disappearance of a translation by using the untranslated string.
|
||
The command @kbd{@key{DEL}} (@code{po-fade-out-entry}) pushes the current entry
|
||
a little further towards annihilation. If the entry is active (it is a
|
||
translated entry), then it is first made fuzzy. If it is already fuzzy,
|
||
then the entry is merely commented out, with confirmation. If the entry
|
||
is already obsolete, then it is completely deleted from the PO file.
|
||
It is easy to recycle the translation so deleted into some other PO file
|
||
entry, usually one which is untranslated. @xref{Modifying Translations}.
|
||
|
||
Here is a quite interesting problem to solve for later development of
|
||
PO mode, for those nights you are not sleepy. The idea would be that
|
||
PO mode might become bright enough, one of these days, to make good
|
||
guesses at retrieving the most probable candidate, among all obsolete
|
||
entries, for initializing the translation of a newly appeared string.
|
||
I think it might be a quite hard problem to do this algorithmically, as
|
||
we have to develop good and efficient measures of string similarity.
|
||
Right now, PO mode completely lets the decision to the translator,
|
||
when the time comes to find the adequate obsolete translation, it
|
||
merely tries to provide handy tools for helping her to do so.
|
||
|
||
@node Modifying Translations
|
||
@subsection Modifying Translations
|
||
@cindex editing translations
|
||
@emindex editing translations
|
||
|
||
PO mode prevents direct modification of the PO file, by the usual
|
||
means Emacs gives for altering a buffer's contents. By doing so,
|
||
it pretends helping the translator to avoid little clerical errors
|
||
about the overall file format, or the proper quoting of strings,
|
||
as those errors would be easily made. Other kinds of errors are
|
||
still possible, but some may be caught and diagnosed by the batch
|
||
validation process, which the translator may always trigger by the
|
||
@kbd{V} command. For all other errors, the translator has to rely on
|
||
her own judgment, and also on the linguistic reports submitted to her
|
||
by the users of the translated package, having the same mother tongue.
|
||
|
||
When the time comes to create a translation, correct an error diagnosed
|
||
mechanically or reported by a user, the translators have to resort to
|
||
using the following commands for modifying the translations.
|
||
|
||
@table @kbd
|
||
@item @key{RET}
|
||
@efindex RET@r{, PO Mode command}
|
||
Interactively edit the translation (@code{po-edit-msgstr}).
|
||
|
||
@item @key{LFD}
|
||
@itemx C-j
|
||
@efindex LFD@r{, PO Mode command}
|
||
@efindex C-j@r{, PO Mode command}
|
||
Reinitialize the translation with the original, untranslated string
|
||
(@code{po-msgid-to-msgstr}).
|
||
|
||
@item k
|
||
@efindex k@r{, PO Mode command}
|
||
Save the translation on the kill ring, and delete it (@code{po-kill-msgstr}).
|
||
|
||
@item w
|
||
@efindex w@r{, PO Mode command}
|
||
Save the translation on the kill ring, without deleting it
|
||
(@code{po-kill-ring-save-msgstr}).
|
||
|
||
@item y
|
||
@efindex y@r{, PO Mode command}
|
||
Replace the translation, taking the new from the kill ring
|
||
(@code{po-yank-msgstr}).
|
||
|
||
@end table
|
||
|
||
@efindex RET@r{, PO Mode command}
|
||
@efindex po-edit-msgstr@r{, PO Mode command}
|
||
The command @kbd{@key{RET}} (@code{po-edit-msgstr}) opens a new Emacs
|
||
window meant to edit in a new translation, or to modify an already existing
|
||
translation. The new window contains a copy of the translation taken from
|
||
the current PO file entry, all ready for edition, expunged of all quoting
|
||
marks, fully modifiable and with the complete extent of Emacs modifying
|
||
commands. When the translator is done with her modifications, she may use
|
||
@w{@kbd{C-c C-c}} to close the subedit window with the automatically requoted
|
||
results, or @w{@kbd{C-c C-k}} to abort her modifications. @xref{Subedit},
|
||
for more information.
|
||
|
||
@efindex LFD@r{, PO Mode command}
|
||
@efindex C-j@r{, PO Mode command}
|
||
@efindex po-msgid-to-msgstr@r{, PO Mode command}
|
||
The command @kbd{@key{LFD}} (@code{po-msgid-to-msgstr}) initializes, or
|
||
reinitializes the translation with the original string. This command is
|
||
normally used when the translator wants to redo a fresh translation of
|
||
the original string, disregarding any previous work.
|
||
|
||
@evindex po-auto-edit-with-msgid@r{, PO Mode variable}
|
||
It is possible to arrange so, whenever editing an untranslated
|
||
entry, the @kbd{@key{LFD}} command be automatically executed. If you set
|
||
@code{po-auto-edit-with-msgid} to @code{t}, the translation gets
|
||
initialised with the original string, in case none exists already.
|
||
The default value for @code{po-auto-edit-with-msgid} is @code{nil}.
|
||
|
||
@emindex starting a string translation
|
||
In fact, whether it is best to start a translation with an empty
|
||
string, or rather with a copy of the original string, is a matter of
|
||
taste or habit. Sometimes, the source language and the
|
||
target language are so different that is simply best to start writing
|
||
on an empty page. At other times, the source and target languages
|
||
are so close that it would be a waste to retype a number of words
|
||
already being written in the original string. A translator may also
|
||
like having the original string right under her eyes, as she will
|
||
progressively overwrite the original text with the translation, even
|
||
if this requires some extra editing work to get rid of the original.
|
||
|
||
@emindex cut and paste for translated strings
|
||
@efindex k@r{, PO Mode command}
|
||
@efindex po-kill-msgstr@r{, PO Mode command}
|
||
@efindex w@r{, PO Mode command}
|
||
@efindex po-kill-ring-save-msgstr@r{, PO Mode command}
|
||
The command @kbd{k} (@code{po-kill-msgstr}) merely empties the
|
||
translation string, so turning the entry into an untranslated
|
||
one. But while doing so, its previous contents is put apart in
|
||
a special place, known as the kill ring. The command @kbd{w}
|
||
(@code{po-kill-ring-save-msgstr}) has also the effect of taking a
|
||
copy of the translation onto the kill ring, but it otherwise leaves
|
||
the entry alone, and does @emph{not} remove the translation from the
|
||
entry. Both commands use exactly the Emacs kill ring, which is shared
|
||
between buffers, and which is well known already to Emacs lovers.
|
||
|
||
The translator may use @kbd{k} or @kbd{w} many times in the course
|
||
of her work, as the kill ring may hold several saved translations.
|
||
From the kill ring, strings may later be reinserted in various
|
||
Emacs buffers. In particular, the kill ring may be used for moving
|
||
translation strings between different entries of a single PO file
|
||
buffer, or if the translator is handling many such buffers at once,
|
||
even between PO files.
|
||
|
||
To facilitate exchanges with buffers which are not in PO mode, the
|
||
translation string put on the kill ring by the @kbd{k} command is fully
|
||
unquoted before being saved: external quotes are removed, multi-line
|
||
strings are concatenated, and backslash escaped sequences are turned
|
||
into their corresponding characters. In the special case of obsolete
|
||
entries, the translation is also uncommented prior to saving.
|
||
|
||
@efindex y@r{, PO Mode command}
|
||
@efindex po-yank-msgstr@r{, PO Mode command}
|
||
The command @kbd{y} (@code{po-yank-msgstr}) completely replaces the
|
||
translation of the current entry by a string taken from the kill ring.
|
||
Following Emacs terminology, we then say that the replacement
|
||
string is @dfn{yanked} into the PO file buffer.
|
||
@xref{Yanking, , , emacs, The Emacs Editor}.
|
||
The first time @kbd{y} is used, the translation receives the value of
|
||
the most recent addition to the kill ring. If @kbd{y} is typed once
|
||
again, immediately, without intervening keystrokes, the translation
|
||
just inserted is taken away and replaced by the second most recent
|
||
addition to the kill ring. By repeating @kbd{y} many times in a row,
|
||
the translator may travel along the kill ring for saved strings,
|
||
until she finds the string she really wanted.
|
||
|
||
When a string is yanked into a PO file entry, it is fully and
|
||
automatically requoted for complying with the format PO files should
|
||
have. Further, if the entry is obsolete, PO mode then appropriately
|
||
push the inserted string inside comments. Once again, translators
|
||
should not burden themselves with quoting considerations besides, of
|
||
course, the necessity of the translated string itself respective to
|
||
the program using it.
|
||
|
||
Note that @kbd{k} or @kbd{w} are not the only commands pushing strings
|
||
on the kill ring, as almost any PO mode command replacing translation
|
||
strings (or the translator comments) automatically saves the old string
|
||
on the kill ring. The main exceptions to this general rule are the
|
||
yanking commands themselves.
|
||
|
||
@emindex using obsolete translations to make new entries
|
||
To better illustrate the operation of killing and yanking, let's
|
||
use an actual example, taken from a common situation. When the
|
||
programmer slightly modifies some string right in the program, his
|
||
change is later reflected in the PO file by the appearance
|
||
of a new untranslated entry for the modified string, and the fact
|
||
that the entry translating the original or unmodified string becomes
|
||
obsolete. In many cases, the translator might spare herself some work
|
||
by retrieving the unmodified translation from the obsolete entry,
|
||
then initializing the untranslated entry @code{msgstr} field with
|
||
this retrieved translation. Once this done, the obsolete entry is
|
||
not wanted anymore, and may be safely deleted.
|
||
|
||
When the translator finds an untranslated entry and suspects that a
|
||
slight variant of the translation exists, she immediately uses @kbd{m}
|
||
to mark the current entry location, then starts chasing obsolete
|
||
entries with @kbd{o}, hoping to find some translation corresponding
|
||
to the unmodified string. Once found, she uses the @kbd{@key{DEL}} command
|
||
for deleting the obsolete entry, knowing that @kbd{@key{DEL}} also @emph{kills}
|
||
the translation, that is, pushes the translation on the kill ring.
|
||
Then, @kbd{r} returns to the initial untranslated entry, and @kbd{y}
|
||
then @emph{yanks} the saved translation right into the @code{msgstr}
|
||
field. The translator is then free to use @kbd{@key{RET}} for fine
|
||
tuning the translation contents, and maybe to later use @kbd{u},
|
||
then @kbd{m} again, for going on with the next untranslated string.
|
||
|
||
When some sequence of keys has to be typed over and over again, the
|
||
translator may find it useful to become better acquainted with the Emacs
|
||
capability of learning these sequences and playing them back under request.
|
||
@xref{Keyboard Macros, , , emacs, The Emacs Editor}.
|
||
|
||
@node Modifying Comments
|
||
@subsection Modifying Comments
|
||
@cindex editing comments in PO files
|
||
@emindex editing comments
|
||
|
||
Any translation work done seriously will raise many linguistic
|
||
difficulties, for which decisions have to be made, and the choices
|
||
further documented. These documents may be saved within the
|
||
PO file in form of translator comments, which the translator
|
||
is free to create, delete, or modify at will. These comments may
|
||
be useful to herself when she returns to this PO file after a while.
|
||
|
||
Comments not having whitespace after the initial @samp{#}, for example,
|
||
those beginning with @samp{#.} or @samp{#:}, are @emph{not} translator
|
||
comments, they are exclusively created by other @code{gettext} tools.
|
||
So, the commands below will never alter such system added comments,
|
||
they are not meant for the translator to modify. @xref{PO Files}.
|
||
|
||
The following commands are somewhat similar to those modifying translations,
|
||
so the general indications given for those apply here. @xref{Modifying
|
||
Translations}.
|
||
|
||
@table @kbd
|
||
|
||
@item #
|
||
@efindex #@r{, PO Mode command}
|
||
Interactively edit the translator comments (@code{po-edit-comment}).
|
||
|
||
@item K
|
||
@efindex K@r{, PO Mode command}
|
||
Save the translator comments on the kill ring, and delete it
|
||
(@code{po-kill-comment}).
|
||
|
||
@item W
|
||
@efindex W@r{, PO Mode command}
|
||
Save the translator comments on the kill ring, without deleting it
|
||
(@code{po-kill-ring-save-comment}).
|
||
|
||
@item Y
|
||
@efindex Y@r{, PO Mode command}
|
||
Replace the translator comments, taking the new from the kill ring
|
||
(@code{po-yank-comment}).
|
||
|
||
@end table
|
||
|
||
These commands parallel PO mode commands for modifying the translation
|
||
strings, and behave much the same way as they do, except that they handle
|
||
this part of PO file comments meant for translator usage, rather
|
||
than the translation strings. So, if the descriptions given below are
|
||
slightly succinct, it is because the full details have already been given.
|
||
@xref{Modifying Translations}.
|
||
|
||
@efindex #@r{, PO Mode command}
|
||
@efindex po-edit-comment@r{, PO Mode command}
|
||
The command @kbd{#} (@code{po-edit-comment}) opens a new Emacs window
|
||
containing a copy of the translator comments on the current PO file entry.
|
||
If there are no such comments, PO mode understands that the translator wants
|
||
to add a comment to the entry, and she is presented with an empty screen.
|
||
Comment marks (@code{#}) and the space following them are automatically
|
||
removed before edition, and reinstated after. For translator comments
|
||
pertaining to obsolete entries, the uncommenting and recommenting operations
|
||
are done twice. Once in the editing window, the keys @w{@kbd{C-c C-c}}
|
||
allow the translator to tell she is finished with editing the comment.
|
||
@xref{Subedit}, for further details.
|
||
|
||
@evindex po-subedit-mode-hook@r{, PO Mode variable}
|
||
Functions found on @code{po-subedit-mode-hook}, if any, are executed after
|
||
the string has been inserted in the edit buffer.
|
||
|
||
@efindex K@r{, PO Mode command}
|
||
@efindex po-kill-comment@r{, PO Mode command}
|
||
@efindex W@r{, PO Mode command}
|
||
@efindex po-kill-ring-save-comment@r{, PO Mode command}
|
||
@efindex Y@r{, PO Mode command}
|
||
@efindex po-yank-comment@r{, PO Mode command}
|
||
The command @kbd{K} (@code{po-kill-comment}) gets rid of all
|
||
translator comments, while saving those comments on the kill ring.
|
||
The command @kbd{W} (@code{po-kill-ring-save-comment}) takes
|
||
a copy of the translator comments on the kill ring, but leaves
|
||
them undisturbed in the current entry. The command @kbd{Y}
|
||
(@code{po-yank-comment}) completely replaces the translator comments
|
||
by a string taken at the front of the kill ring. When this command
|
||
is immediately repeated, the comments just inserted are withdrawn,
|
||
and replaced by other strings taken along the kill ring.
|
||
|
||
On the kill ring, all strings have the same nature. There is no
|
||
distinction between @emph{translation} strings and @emph{translator
|
||
comments} strings. So, for example, let's presume the translator
|
||
has just finished editing a translation, and wants to create a new
|
||
translator comment to document why the previous translation was
|
||
not good, just to remember what was the problem. Foreseeing that she
|
||
will do that in her documentation, the translator may want to quote
|
||
the previous translation in her translator comments. To do so, she
|
||
may initialize the translator comments with the previous translation,
|
||
still at the head of the kill ring. Because editing already pushed the
|
||
previous translation on the kill ring, she merely has to type @kbd{M-w}
|
||
prior to @kbd{#}, and the previous translation will be right there,
|
||
all ready for being introduced by some explanatory text.
|
||
|
||
On the other hand, presume there are some translator comments already
|
||
and that the translator wants to add to those comments, instead
|
||
of wholly replacing them. Then, she should edit the comment right
|
||
away with @kbd{#}. Once inside the editing window, she can use the
|
||
regular Emacs commands @kbd{C-y} (@code{yank}) and @kbd{M-y}
|
||
(@code{yank-pop}) to get the previous translation where she likes.
|
||
|
||
@node Subedit
|
||
@subsection Details of Sub Edition
|
||
@emindex subedit minor mode
|
||
|
||
The PO subedit minor mode has a few peculiarities worth being described
|
||
in fuller detail. It installs a few commands over the usual editing set
|
||
of Emacs, which are described below.
|
||
|
||
@table @kbd
|
||
@item C-c C-c
|
||
@efindex C-c C-c@r{, PO Mode command}
|
||
Complete edition (@code{po-subedit-exit}).
|
||
|
||
@item C-c C-k
|
||
@efindex C-c C-k@r{, PO Mode command}
|
||
Abort edition (@code{po-subedit-abort}).
|
||
|
||
@item C-c C-a
|
||
@efindex C-c C-a@r{, PO Mode command}
|
||
Consult auxiliary PO files (@code{po-subedit-cycle-auxiliary}).
|
||
|
||
@end table
|
||
|
||
@emindex exiting PO subedit
|
||
@efindex C-c C-c@r{, PO Mode command}
|
||
@efindex po-subedit-exit@r{, PO Mode command}
|
||
The window's contents represents a translation for a given message,
|
||
or a translator comment. The translator may modify this window to
|
||
her heart's content. Once this is done, the command @w{@kbd{C-c C-c}}
|
||
(@code{po-subedit-exit}) may be used to return the edited translation into
|
||
the PO file, replacing the original translation, even if it moved out of
|
||
sight or if buffers were switched.
|
||
|
||
@efindex C-c C-k@r{, PO Mode command}
|
||
@efindex po-subedit-abort@r{, PO Mode command}
|
||
If the translator becomes unsatisfied with her translation or comment,
|
||
to the extent she prefers keeping what was existent prior to the
|
||
@kbd{@key{RET}} or @kbd{#} command, she may use the command @w{@kbd{C-c C-k}}
|
||
(@code{po-subedit-abort}) to merely get rid of edition, while preserving
|
||
the original translation or comment. Another way would be for her to exit
|
||
normally with @w{@kbd{C-c C-c}}, then type @code{U} once for undoing the
|
||
whole effect of last edition.
|
||
|
||
@efindex C-c C-a@r{, PO Mode command}
|
||
@efindex po-subedit-cycle-auxiliary@r{, PO Mode command}
|
||
The command @w{@kbd{C-c C-a}} (@code{po-subedit-cycle-auxiliary})
|
||
allows for glancing through translations
|
||
already achieved in other languages, directly while editing the current
|
||
translation. This may be quite convenient when the translator is fluent
|
||
at many languages, but of course, only makes sense when such completed
|
||
auxiliary PO files are already available to her (@pxref{Auxiliary}).
|
||
|
||
Functions found on @code{po-subedit-mode-hook}, if any, are executed after
|
||
the string has been inserted in the edit buffer.
|
||
|
||
While editing her translation, the translator should pay attention to not
|
||
inserting unwanted @kbd{@key{RET}} (newline) characters at the end of
|
||
the translated string if those are not meant to be there, or to removing
|
||
such characters when they are required. Since these characters are not
|
||
visible in the editing buffer, they are easily introduced by mistake.
|
||
To help her, @kbd{@key{RET}} automatically puts the character @code{<}
|
||
at the end of the string being edited, but this @code{<} is not really
|
||
part of the string. On exiting the editing window with @w{@kbd{C-c C-c}},
|
||
PO mode automatically removes such @kbd{<} and all whitespace added after
|
||
it. If the translator adds characters after the terminating @code{<}, it
|
||
looses its delimiting property and integrally becomes part of the string.
|
||
If she removes the delimiting @code{<}, then the edited string is taken
|
||
@emph{as is}, with all trailing newlines, even if invisible. Also, if
|
||
the translated string ought to end itself with a genuine @code{<}, then
|
||
the delimiting @code{<} may not be removed; so the string should appear,
|
||
in the editing window, as ending with two @code{<} in a row.
|
||
|
||
@emindex editing multiple entries
|
||
When a translation (or a comment) is being edited, the translator may move
|
||
the cursor back into the PO file buffer and freely move to other entries,
|
||
browsing at will. If, with an edition pending, the translator wanders in the
|
||
PO file buffer, she may decide to start modifying another entry. Each entry
|
||
being edited has its own subedit buffer. It is possible to simultaneously
|
||
edit the translation @emph{and} the comment of a single entry, or to
|
||
edit entries in different PO files, all at once. Typing @kbd{@key{RET}}
|
||
on a field already being edited merely resumes that particular edit. Yet,
|
||
the translator should better be comfortable at handling many Emacs windows!
|
||
|
||
@emindex pending subedits
|
||
Pending subedits may be completed or aborted in any order, regardless
|
||
of how or when they were started. When many subedits are pending and the
|
||
translator asks for quitting the PO file (with the @kbd{q} command), subedits
|
||
are automatically resumed one at a time, so she may decide for each of them.
|
||
|
||
@node C Sources Context
|
||
@subsection C Sources Context
|
||
@emindex consulting program sources
|
||
@emindex looking at the source to aid translation
|
||
@emindex use the source, Luke
|
||
|
||
PO mode is particularly powerful when used with PO files
|
||
created through GNU @code{gettext} utilities, as those utilities
|
||
insert special comments in the PO files they generate.
|
||
Some of these special comments relate the PO file entry to
|
||
exactly where the untranslated string appears in the program sources.
|
||
|
||
When the translator gets to an untranslated entry, she is fairly
|
||
often faced with an original string which is not as informative as
|
||
it normally should be, being succinct, cryptic, or otherwise ambiguous.
|
||
Before choosing how to translate the string, she needs to understand
|
||
better what the string really means and how tight the translation has
|
||
to be. Most of the time, when problems arise, the only way left to make
|
||
her judgment is looking at the true program sources from where this
|
||
string originated, searching for surrounding comments the programmer
|
||
might have put in there, and looking around for helping clues of
|
||
@emph{any} kind.
|
||
|
||
Surely, when looking at program sources, the translator will receive
|
||
more help if she is a fluent programmer. However, even if she is
|
||
not versed in programming and feels a little lost in C code, the
|
||
translator should not be shy at taking a look, once in a while.
|
||
It is most probable that she will still be able to find some of the
|
||
hints she needs. She will learn quickly to not feel uncomfortable
|
||
in program code, paying more attention to programmer's comments,
|
||
variable and function names (if he dared choosing them well), and
|
||
overall organization, than to the program code itself.
|
||
|
||
@emindex find source fragment for a PO file entry
|
||
The following commands are meant to help the translator at getting
|
||
program source context for a PO file entry.
|
||
|
||
@table @kbd
|
||
@item s
|
||
@efindex s@r{, PO Mode command}
|
||
Resume the display of a program source context, or cycle through them
|
||
(@code{po-cycle-source-reference}).
|
||
|
||
@item M-s
|
||
@efindex M-s@r{, PO Mode command}
|
||
Display of a program source context selected by menu
|
||
(@code{po-select-source-reference}).
|
||
|
||
@item S
|
||
@efindex S@r{, PO Mode command}
|
||
Add a directory to the search path for source files
|
||
(@code{po-consider-source-path}).
|
||
|
||
@item M-S
|
||
@efindex M-S@r{, PO Mode command}
|
||
Delete a directory from the search path for source files
|
||
(@code{po-ignore-source-path}).
|
||
|
||
@end table
|
||
|
||
@efindex s@r{, PO Mode command}
|
||
@efindex po-cycle-source-reference@r{, PO Mode command}
|
||
@efindex M-s@r{, PO Mode command}
|
||
@efindex po-select-source-reference@r{, PO Mode command}
|
||
The commands @kbd{s} (@code{po-cycle-source-reference}) and @kbd{M-s}
|
||
(@code{po-select-source-reference}) both open another window displaying
|
||
some source program file, and already positioned in such a way that
|
||
it shows an actual use of the string to be translated. By doing
|
||
so, the command gives source program context for the string. But if
|
||
the entry has no source context references, or if all references
|
||
are unresolved along the search path for program sources, then the
|
||
command diagnoses this as an error.
|
||
|
||
Even if @kbd{s} (or @kbd{M-s}) opens a new window, the cursor stays
|
||
in the PO file window. If the translator really wants to
|
||
get into the program source window, she ought to do it explicitly,
|
||
maybe by using command @kbd{O}.
|
||
|
||
When @kbd{s} is typed for the first time, or for a PO file entry which
|
||
is different of the last one used for getting source context, then the
|
||
command reacts by giving the first context available for this entry,
|
||
if any. If some context has already been recently displayed for the
|
||
current PO file entry, and the translator wandered off to do other
|
||
things, typing @kbd{s} again will merely resume, in another window,
|
||
the context last displayed. In particular, if the translator moved
|
||
the cursor away from the context in the source file, the command will
|
||
bring the cursor back to the context. By using @kbd{s} many times
|
||
in a row, with no other commands intervening, PO mode will cycle to
|
||
the next available contexts for this particular entry, getting back
|
||
to the first context once the last has been shown.
|
||
|
||
The command @kbd{M-s} behaves differently. Instead of cycling through
|
||
references, it lets the translator choose a particular reference among
|
||
many, and displays that reference. It is best used with completion,
|
||
if the translator types @kbd{@key{TAB}} immediately after @kbd{M-s}, in
|
||
response to the question, she will be offered a menu of all possible
|
||
references, as a reminder of which are the acceptable answers.
|
||
This command is useful only where there are really many contexts
|
||
available for a single string to translate.
|
||
|
||
@efindex S@r{, PO Mode command}
|
||
@efindex po-consider-source-path@r{, PO Mode command}
|
||
@efindex M-S@r{, PO Mode command}
|
||
@efindex po-ignore-source-path@r{, PO Mode command}
|
||
Program source files are usually found relative to where the PO
|
||
file stands. As a special provision, when this fails, the file is
|
||
also looked for, but relative to the directory immediately above it.
|
||
Those two cases take proper care of most PO files. However, it might
|
||
happen that a PO file has been moved, or is edited in a different
|
||
place than its normal location. When this happens, the translator
|
||
should tell PO mode in which directory normally sits the genuine PO
|
||
file. Many such directories may be specified, and all together, they
|
||
constitute what is called the @dfn{search path} for program sources.
|
||
The command @kbd{S} (@code{po-consider-source-path}) is used to interactively
|
||
enter a new directory at the front of the search path, and the command
|
||
@kbd{M-S} (@code{po-ignore-source-path}) is used to select, with completion,
|
||
one of the directories she does not want anymore on the search path.
|
||
|
||
@node Auxiliary
|
||
@subsection Consulting Auxiliary PO Files
|
||
@emindex consulting translations to other languages
|
||
|
||
PO mode is able to help the knowledgeable translator, being fluent in
|
||
many languages, at taking advantage of translations already achieved
|
||
in other languages she just happens to know. It provides these other
|
||
language translations as additional context for her own work. Moreover,
|
||
it has features to ease the production of translations for many languages
|
||
at once, for translators preferring to work in this way.
|
||
|
||
@cindex auxiliary PO file
|
||
@emindex auxiliary PO file
|
||
An @dfn{auxiliary} PO file is an existing PO file meant for the same
|
||
package the translator is working on, but targeted to a different mother
|
||
tongue language. Commands exist for declaring and handling auxiliary
|
||
PO files, and also for showing contexts for the entry under work.
|
||
|
||
Here are the auxiliary file commands available in PO mode.
|
||
|
||
@table @kbd
|
||
@item a
|
||
@efindex a@r{, PO Mode command}
|
||
Seek auxiliary files for another translation for the same entry
|
||
(@code{po-cycle-auxiliary}).
|
||
|
||
@item C-c C-a
|
||
@efindex C-c C-a@r{, PO Mode command}
|
||
Switch to a particular auxiliary file (@code{po-select-auxiliary}).
|
||
|
||
@item A
|
||
@efindex A@r{, PO Mode command}
|
||
Declare this PO file as an auxiliary file (@code{po-consider-as-auxiliary}).
|
||
|
||
@item M-A
|
||
@efindex M-A@r{, PO Mode command}
|
||
Remove this PO file from the list of auxiliary files
|
||
(@code{po-ignore-as-auxiliary}).
|
||
|
||
@end table
|
||
|
||
@efindex A@r{, PO Mode command}
|
||
@efindex po-consider-as-auxiliary@r{, PO Mode command}
|
||
@efindex M-A@r{, PO Mode command}
|
||
@efindex po-ignore-as-auxiliary@r{, PO Mode command}
|
||
Command @kbd{A} (@code{po-consider-as-auxiliary}) adds the current
|
||
PO file to the list of auxiliary files, while command @kbd{M-A}
|
||
(@code{po-ignore-as-auxiliary} just removes it.
|
||
|
||
@efindex a@r{, PO Mode command}
|
||
@efindex po-cycle-auxiliary@r{, PO Mode command}
|
||
The command @kbd{a} (@code{po-cycle-auxiliary}) seeks all auxiliary PO
|
||
files, round-robin, searching for a translated entry in some other language
|
||
having an @code{msgid} field identical as the one for the current entry.
|
||
The found PO file, if any, takes the place of the current PO file in
|
||
the display (its window gets on top). Before doing so, the current PO
|
||
file is also made into an auxiliary file, if not already. So, @kbd{a}
|
||
in this newly displayed PO file will seek another PO file, and so on,
|
||
so repeating @kbd{a} will eventually yield back the original PO file.
|
||
|
||
@efindex C-c C-a@r{, PO Mode command}
|
||
@efindex po-select-auxiliary@r{, PO Mode command}
|
||
The command @kbd{C-c C-a} (@code{po-select-auxiliary}) asks the translator
|
||
for her choice of a particular auxiliary file, with completion, and
|
||
then switches to that selected PO file. The command also checks if
|
||
the selected file has an @code{msgid} field identical as the one for
|
||
the current entry, and if yes, this entry becomes current. Otherwise,
|
||
the cursor of the selected file is left undisturbed.
|
||
|
||
For all this to work fully, auxiliary PO files will have to be normalized,
|
||
in that way that @code{msgid} fields should be written @emph{exactly}
|
||
the same way. It is possible to write @code{msgid} fields in various
|
||
ways for representing the same string, different writing would break the
|
||
proper behaviour of the auxiliary file commands of PO mode. This is not
|
||
expected to be much a problem in practice, as most existing PO files have
|
||
their @code{msgid} entries written by the same GNU @code{gettext} tools.
|
||
|
||
@efindex normalize@r{, PO Mode command}
|
||
However, PO files initially created by PO mode itself, while marking
|
||
strings in source files, are normalised differently. So are PO
|
||
files resulting of the @samp{M-x normalize} command. Until these
|
||
discrepancies between PO mode and other GNU @code{gettext} tools get
|
||
fully resolved, the translator should stay aware of normalisation issues.
|
||
|
||
@node Compendium
|
||
@section Using Translation Compendia
|
||
@emindex using translation compendia
|
||
|
||
@cindex compendium
|
||
A @dfn{compendium} is a special PO file containing a set of
|
||
translations recurring in many different packages. The translator can
|
||
use gettext tools to build a new compendium, to add entries to her
|
||
compendium, and to initialize untranslated entries, or to update
|
||
already translated entries, from translations kept in the compendium.
|
||
|
||
@menu
|
||
* Creating Compendia:: Merging translations for later use
|
||
* Using Compendia:: Using older translations if they fit
|
||
@end menu
|
||
|
||
@node Creating Compendia
|
||
@subsection Creating Compendia
|
||
@cindex creating compendia
|
||
@cindex compendium, creating
|
||
|
||
Basically every PO file consisting of translated entries only can be
|
||
declared as a valid compendium. Often the translator wants to have
|
||
special compendia; let's consider two cases: @cite{concatenating PO
|
||
files} and @cite{extracting a message subset from a PO file}.
|
||
|
||
@subsubsection Concatenate PO Files
|
||
|
||
@cindex concatenating PO files into a compendium
|
||
@cindex accumulating translations
|
||
To concatenate several valid PO files into one compendium file you can
|
||
use @samp{msgcomm} or @samp{msgcat} (the latter preferred):
|
||
|
||
@example
|
||
msgcat -o compendium.po file1.po file2.po
|
||
@end example
|
||
|
||
By default, @code{msgcat} will accumulate divergent translations
|
||
for the same string. Those occurrences will be marked as @code{fuzzy}
|
||
and highly visible decorated; calling @code{msgcat} on
|
||
@file{file1.po}:
|
||
|
||
@example
|
||
#: src/hello.c:200
|
||
#, c-format
|
||
msgid "Report bugs to <%s>.\n"
|
||
msgstr "Comunicar `bugs' a <%s>.\n"
|
||
@end example
|
||
|
||
@noindent
|
||
and @file{file2.po}:
|
||
|
||
@example
|
||
#: src/bye.c:100
|
||
#, c-format
|
||
msgid "Report bugs to <%s>.\n"
|
||
msgstr "Comunicar \"bugs\" a <%s>.\n"
|
||
@end example
|
||
|
||
@noindent
|
||
will result in:
|
||
|
||
@example
|
||
#: src/hello.c:200 src/bye.c:100
|
||
#, fuzzy, c-format
|
||
msgid "Report bugs to <%s>.\n"
|
||
msgstr ""
|
||
"#-#-#-#-# file1.po #-#-#-#-#\n"
|
||
"Comunicar `bugs' a <%s>.\n"
|
||
"#-#-#-#-# file2.po #-#-#-#-#\n"
|
||
"Comunicar \"bugs\" a <%s>.\n"
|
||
@end example
|
||
|
||
@noindent
|
||
The translator will have to resolve this ``conflict'' manually; she
|
||
has to decide whether the first or the second version is appropriate
|
||
(or provide a new translation), to delete the ``marker lines'', and
|
||
finally to remove the @code{fuzzy} mark.
|
||
|
||
If the translator knows in advance the first found translation of a
|
||
message is always the best translation she can make use to the
|
||
@samp{--use-first} switch:
|
||
|
||
@example
|
||
msgcat --use-first -o compendium.po file1.po file2.po
|
||
@end example
|
||
|
||
A good compendium file must not contain @code{fuzzy} or untranslated
|
||
entries. If input files are ``dirty'' you must preprocess the input
|
||
files or postprocess the result using @samp{msgattrib --translated --no-fuzzy}.
|
||
|
||
@subsubsection Extract a Message Subset from a PO File
|
||
@cindex extracting parts of a PO file into a compendium
|
||
|
||
Nobody wants to translate the same messages again and again; thus you
|
||
may wish to have a compendium file containing @file{getopt.c} messages.
|
||
|
||
To extract a message subset (e.g., all @file{getopt.c} messages) from an
|
||
existing PO file into one compendium file you can use @samp{msggrep}:
|
||
|
||
@example
|
||
msggrep --location src/getopt.c -o compendium.po file.po
|
||
@end example
|
||
|
||
@node Using Compendia
|
||
@subsection Using Compendia
|
||
|
||
You can use a compendium file to initialize a translation from scratch
|
||
or to update an already existing translation.
|
||
|
||
@subsubsection Initialize a New Translation File
|
||
@cindex initialize translations from a compendium
|
||
|
||
Since a PO file with translations does not exist the translator can
|
||
merely use @file{/dev/null} to fake the ``old'' translation file.
|
||
|
||
@example
|
||
msgmerge --compendium compendium.po -o file.po /dev/null file.pot
|
||
@end example
|
||
|
||
@subsubsection Update an Existing Translation File
|
||
@cindex update translations from a compendium
|
||
|
||
Concatenate the compendium file(s) and the existing PO, merge the
|
||
result with the POT file and remove the obsolete entries (optional,
|
||
here done using @samp{msgattrib}):
|
||
|
||
@example
|
||
msgcat --use-first -o update.po compendium1.po compendium2.po file.po
|
||
msgmerge update.po file.pot | msgattrib --no-obsolete > file.po
|
||
@end example
|
||
|
||
@node Manipulating
|
||
@chapter Manipulating PO Files
|
||
@cindex manipulating PO files
|
||
|
||
Sometimes it is necessary to manipulate PO files in a way that is better
|
||
performed automatically than by hand. GNU @code{gettext} includes a
|
||
complete set of tools for this purpose.
|
||
|
||
@cindex merging two PO files
|
||
When merging two packages into a single package, the resulting POT file
|
||
will be the concatenation of the two packages' POT files. Thus the
|
||
maintainer must concatenate the two existing package translations into
|
||
a single translation catalog, for each language. This is best performed
|
||
using @samp{msgcat}. It is then the translators' duty to deal with any
|
||
possible conflicts that arose during the merge.
|
||
|
||
@cindex encoding conversion
|
||
When a translator takes over the translation job from another translator,
|
||
but she uses a different character encoding in her locale, she will
|
||
convert the catalog to her character encoding. This is best done through
|
||
the @samp{msgconv} program.
|
||
|
||
When a maintainer takes a source file with tagged messages from another
|
||
package, he should also take the existing translations for this source
|
||
file (and not let the translators do the same job twice). One way to do
|
||
this is through @samp{msggrep}, another is to create a POT file for
|
||
that source file and use @samp{msgmerge}.
|
||
|
||
@cindex dialect
|
||
@cindex orthography
|
||
When a translator wants to adjust some translation catalog for a special
|
||
dialect or orthography --- for example, German as written in Switzerland
|
||
versus German as written in Germany --- she needs to apply some text
|
||
processing to every message in the catalog. The tool for doing this is
|
||
@samp{msgfilter}.
|
||
|
||
Another use of @code{msgfilter} is to produce approximately the POT file for
|
||
which a given PO file was made. This can be done through a filter command
|
||
like @samp{msgfilter sed -e d | sed -e '/^# /d'}. Note that the original
|
||
POT file may have had different comments and different plural message counts,
|
||
that's why it's better to use the original POT file if available.
|
||
|
||
@cindex checking of translations
|
||
When a translator wants to check her translations, for example according
|
||
to orthography rules or using a non-interactive spell checker, she can do
|
||
so using the @samp{msgexec} program.
|
||
|
||
@cindex duplicate elimination
|
||
When third party tools create PO or POT files, sometimes duplicates cannot
|
||
be avoided. But the GNU @code{gettext} tools give an error when they
|
||
encounter duplicate msgids in the same file and in the same domain.
|
||
To merge duplicates, the @samp{msguniq} program can be used.
|
||
|
||
@samp{msgcomm} is a more general tool for keeping or throwing away
|
||
duplicates, occurring in different files.
|
||
|
||
@samp{msgcmp} can be used to check whether a translation catalog is
|
||
completely translated.
|
||
|
||
@cindex attributes, manipulating
|
||
@samp{msgattrib} can be used to select and extract only the fuzzy
|
||
or untranslated messages of a translation catalog.
|
||
|
||
@samp{msgen} is useful as a first step for preparing English translation
|
||
catalogs. It copies each message's msgid to its msgstr.
|
||
|
||
Finally, for those applications where all these various programs are not
|
||
sufficient, a library @samp{libgettextpo} is provided that can be used to
|
||
write other specialized programs that process PO files.
|
||
|
||
@menu
|
||
* msgcat Invocation:: Invoking the @code{msgcat} Program
|
||
* msgconv Invocation:: Invoking the @code{msgconv} Program
|
||
* msggrep Invocation:: Invoking the @code{msggrep} Program
|
||
* msgfilter Invocation:: Invoking the @code{msgfilter} Program
|
||
* msguniq Invocation:: Invoking the @code{msguniq} Program
|
||
* msgcomm Invocation:: Invoking the @code{msgcomm} Program
|
||
* msgcmp Invocation:: Invoking the @code{msgcmp} Program
|
||
* msgattrib Invocation:: Invoking the @code{msgattrib} Program
|
||
* msgen Invocation:: Invoking the @code{msgen} Program
|
||
* msgexec Invocation:: Invoking the @code{msgexec} Program
|
||
* Colorizing:: Highlighting parts of PO files
|
||
* Other tools:: Other tools for manipulating PO files
|
||
* libgettextpo:: Writing your own programs that process PO files
|
||
@end menu
|
||
|
||
@node msgcat Invocation
|
||
@section Invoking the @code{msgcat} Program
|
||
|
||
@include msgcat.texi
|
||
|
||
@node msgconv Invocation
|
||
@section Invoking the @code{msgconv} Program
|
||
|
||
@include msgconv.texi
|
||
|
||
@node msggrep Invocation
|
||
@section Invoking the @code{msggrep} Program
|
||
|
||
@include msggrep.texi
|
||
|
||
@node msgfilter Invocation
|
||
@section Invoking the @code{msgfilter} Program
|
||
|
||
@include msgfilter.texi
|
||
|
||
@node msguniq Invocation
|
||
@section Invoking the @code{msguniq} Program
|
||
|
||
@include msguniq.texi
|
||
|
||
@node msgcomm Invocation
|
||
@section Invoking the @code{msgcomm} Program
|
||
|
||
@include msgcomm.texi
|
||
|
||
@node msgcmp Invocation
|
||
@section Invoking the @code{msgcmp} Program
|
||
|
||
@include msgcmp.texi
|
||
|
||
@node msgattrib Invocation
|
||
@section Invoking the @code{msgattrib} Program
|
||
|
||
@include msgattrib.texi
|
||
|
||
@node msgen Invocation
|
||
@section Invoking the @code{msgen} Program
|
||
|
||
@include msgen.texi
|
||
|
||
@node msgexec Invocation
|
||
@section Invoking the @code{msgexec} Program
|
||
|
||
@include msgexec.texi
|
||
|
||
@node Colorizing
|
||
@section Highlighting parts of PO files
|
||
|
||
Translators are usually only interested in seeing the untranslated and
|
||
fuzzy messages of a PO file. Also, when a message is set fuzzy because
|
||
the msgid changed, they want to see the differences between the previous
|
||
msgid and the current one (especially if the msgid is long and only few
|
||
words in it have changed). Finally, it's always welcome to highlight the
|
||
different sections of a message in a PO file (comments, msgid, msgstr, etc.).
|
||
|
||
Such highlighting is possible through the options @samp{--color} and
|
||
@samp{--style}. They are supported by all the programs that produce
|
||
a PO file on standard output, such as @code{msgcat}, @code{msgmerge},
|
||
and @code{msgunfmt}.
|
||
|
||
@menu
|
||
* The --color option:: Triggering colorized output
|
||
* The TERM variable:: The environment variable @code{TERM}
|
||
* The --style option:: The @code{--style} option
|
||
* Style rules:: Style rules for PO files
|
||
* Customizing less:: Customizing @code{less} for viewing PO files
|
||
@end menu
|
||
|
||
@node The --color option
|
||
@subsection The @code{--color} option
|
||
|
||
@opindex --color@r{, @code{msgcat} option}
|
||
The @samp{--color=@var{when}} option specifies under which conditions
|
||
colorized output should be generated. The @var{when} part can be one of
|
||
the following:
|
||
|
||
@table @code
|
||
@item always
|
||
@itemx yes
|
||
The output will be colorized.
|
||
|
||
@item never
|
||
@itemx no
|
||
The output will not be colorized.
|
||
|
||
@item auto
|
||
@itemx tty
|
||
The output will be colorized if the output device is a tty, i.e.@: when the
|
||
output goes directly to a text screen or terminal emulator window.
|
||
|
||
@item html
|
||
The output will be colorized and be in HTML format.
|
||
|
||
@item test
|
||
This is a special value, understood only by the @code{msgcat} program. It
|
||
is explained in the next section (@ref{The TERM variable}).
|
||
@end table
|
||
|
||
@noindent
|
||
@samp{--color} is equivalent to @samp{--color=yes}. The default is
|
||
@samp{--color=auto}.
|
||
|
||
Thus, a command like @samp{msgcat vi.po} will produce colorized output
|
||
when called by itself in a command window. Whereas in a pipe, such as
|
||
@samp{msgcat vi.po | less -R}, it will not produce colorized output. To
|
||
get colorized output in this situation nevertheless, use the command
|
||
@samp{msgcat --color vi.po | less -R}.
|
||
|
||
The @samp{--color=html} option will produce output that can be viewed in
|
||
a browser. This can be useful, for example, for Indic languages,
|
||
because the renderic of Indic scripts in browsers is usually better than
|
||
in terminal emulators.
|
||
|
||
Note that the output produced with the @code{--color} option is @emph{not}
|
||
a valid PO file in itself. It contains additional terminal-specific escape
|
||
sequences or HTML tags. A PO file reader will give a syntax error when
|
||
confronted with such content. Except for the @samp{--color=html} case,
|
||
you therefore normally don't need to save output produced with the
|
||
@code{--color} option in a file.
|
||
|
||
@node The TERM variable
|
||
@subsection The environment variable @code{TERM}
|
||
|
||
@vindex TERM@r{, environment variable}
|
||
The environment variable @code{TERM} contains a identifier for the text
|
||
window's capabilities. You can get a detailed list of these cababilities
|
||
by using the @samp{infocmp} command, using @samp{man 5 terminfo} as a
|
||
reference.
|
||
|
||
When producing text with embedded color directives, @code{msgcat} looks
|
||
at the @code{TERM} variable. Text windows today typically support at least
|
||
8 colors. Often, however, the text window supports 16 or more colors,
|
||
even though the @code{TERM} variable is set to a identifier denoting only
|
||
8 supported colors. It can be worth setting the @code{TERM} variable to
|
||
a different value in these cases:
|
||
|
||
@table @code
|
||
@item xterm
|
||
@code{xterm} is in most cases built with support for 16 colors. It can also
|
||
be built with support for 88 or 256 colors (but not both). You can try to
|
||
set @code{TERM} to either @code{xterm-16color}, @code{xterm-88color}, or
|
||
@code{xterm-256color}.
|
||
|
||
@item rxvt
|
||
@code{rxvt} is often built with support for 16 colors. You can try to set
|
||
@code{TERM} to @code{rxvt-16color}.
|
||
|
||
@item konsole
|
||
@code{konsole} too is often built with support for 16 colors. You can try to
|
||
set @code{TERM} to @code{konsole-16color} or @code{xterm-16color}.
|
||
@end table
|
||
|
||
After setting @code{TERM}, you can verify it by invoking
|
||
@samp{msgcat --color=test} and seeing whether the output looks like a
|
||
reasonable color map.
|
||
|
||
@node The --style option
|
||
@subsection The @code{--style} option
|
||
|
||
@opindex --style@r{, @code{msgcat} option}
|
||
The @samp{--style=@var{style_file}} option specifies the style file to use
|
||
when colorizing. It has an effect only when the @code{--color} option is
|
||
effective.
|
||
|
||
@vindex PO_STYLE@r{, environment variable}
|
||
If the @code{--style} option is not specified, the environment variable
|
||
@code{PO_STYLE} is considered. It is meant to point to the user's
|
||
preferred style for PO files.
|
||
|
||
The default style file is @file{$prefix/share/gettext/styles/po-default.css},
|
||
where @code{$prefix} is the installation location.
|
||
|
||
A few style files are predefined:
|
||
@table @file
|
||
@item po-vim.css
|
||
This style imitates the look used by vim 7.
|
||
|
||
@item po-emacs-x.css
|
||
This style imitates the look used by GNU Emacs 21 and 22 in an X11 window.
|
||
|
||
@item po-emacs-xterm.css
|
||
@itemx po-emacs-xterm16.css
|
||
@itemx po-emacs-xterm256.css
|
||
This style imitates the look used by GNU Emacs 22 in a terminal of type
|
||
@samp{xterm} (8 colors) or @samp{xterm-16color} (16 colors) or
|
||
@samp{xterm-256color} (256 colors), respectively.
|
||
@end table
|
||
|
||
@noindent
|
||
You can use these styles without specifying a directory. They are actually
|
||
located in @file{$prefix/share/gettext/styles/}, where @code{$prefix} is the
|
||
installation location.
|
||
|
||
You can also design your own styles. This is described in the next section.
|
||
|
||
|
||
@node Style rules
|
||
@subsection Style rules for PO files
|
||
|
||
The same style file can be used for styling of a PO file, for terminal
|
||
output and for HTML output. It is written in CSS (Cascading Style Sheet)
|
||
syntax. See @url{https://www.w3.org/TR/css2/cover.html} for a formal
|
||
definition of CSS. Many HTML authoring tutorials also contain explanations
|
||
of CSS.
|
||
|
||
In the case of HTML output, the style file is embedded in the HTML output.
|
||
In the case of text output, the style file is interpreted by the
|
||
@code{msgcat} program. This means, in particular, that when
|
||
@code{@@import} is used with relative file names, the file names are
|
||
|
||
@itemize @minus
|
||
@item
|
||
relative to the resulting HTML file, in the case of HTML output,
|
||
|
||
@item
|
||
relative to the style sheet containing the @code{@@import}, in the case of
|
||
text output. (Actually, @code{@@import}s are not yet supported in this case,
|
||
due to a limitation in @code{libcroco}.)
|
||
@end itemize
|
||
|
||
CSS rules are built up from selectors and declarations. The declarations
|
||
specify graphical properties; the selectors specify when they apply.
|
||
|
||
In PO files, the following simple selectors (based on "CSS classes", see
|
||
the CSS2 spec, section 5.8.3) are supported.
|
||
|
||
@itemize @bullet
|
||
@item
|
||
Selectors that apply to entire messages:
|
||
|
||
@table @code
|
||
@item .header
|
||
This matches the header entry of a PO file.
|
||
|
||
@item .translated
|
||
This matches a translated message.
|
||
|
||
@item .untranslated
|
||
This matches an untranslated message (i.e.@: a message with empty translation).
|
||
|
||
@item .fuzzy
|
||
This matches a fuzzy message (i.e.@: a message which has a translation that
|
||
needs review by the translator).
|
||
|
||
@item .obsolete
|
||
This matches an obsolete message (i.e.@: a message that was translated but is
|
||
not needed by the current POT file any more).
|
||
@end table
|
||
|
||
@item
|
||
Selectors that apply to parts of a message in PO syntax. Recall the general
|
||
structure of a message in PO syntax:
|
||
|
||
@example
|
||
@var{white-space}
|
||
# @var{translator-comments}
|
||
#. @var{extracted-comments}
|
||
#: @var{reference}@dots{}
|
||
#, @var{flag}@dots{}
|
||
#| msgid @var{previous-untranslated-string}
|
||
msgid @var{untranslated-string}
|
||
msgstr @var{translated-string}
|
||
@end example
|
||
|
||
@table @code
|
||
@item .comment
|
||
This matches all comments (translator comments, extracted comments,
|
||
source file reference comments, flag comments, previous message comments,
|
||
as well as the entire obsolete messages).
|
||
|
||
@item .translator-comment
|
||
This matches the translator comments.
|
||
|
||
@item .extracted-comment
|
||
This matches the extracted comments, i.e.@: the comments placed by the
|
||
programmer at the attention of the translator.
|
||
|
||
@item .reference-comment
|
||
This matches the source file reference comments (entire lines).
|
||
|
||
@item .reference
|
||
This matches the individual source file references inside the source file
|
||
reference comment lines.
|
||
|
||
@item .flag-comment
|
||
This matches the flag comment lines (entire lines).
|
||
|
||
@item .flag
|
||
This matches the individual flags inside flag comment lines.
|
||
|
||
@item .fuzzy-flag
|
||
This matches the `fuzzy' flag inside flag comment lines.
|
||
|
||
@item .previous-comment
|
||
This matches the comments containing the previous untranslated string (entire
|
||
lines).
|
||
|
||
@item .previous
|
||
This matches the previous untranslated string including the string delimiters,
|
||
the associated keywords (@code{msgid} etc.) and the spaces between them.
|
||
|
||
@item .msgid
|
||
This matches the untranslated string including the string delimiters,
|
||
the associated keywords (@code{msgid} etc.) and the spaces between them.
|
||
|
||
@item .msgstr
|
||
This matches the translated string including the string delimiters,
|
||
the associated keywords (@code{msgstr} etc.) and the spaces between them.
|
||
|
||
@item .keyword
|
||
This matches the keywords (@code{msgid}, @code{msgstr}, etc.).
|
||
|
||
@item .string
|
||
This matches strings, including the string delimiters (double quotes).
|
||
@end table
|
||
|
||
@item
|
||
Selectors that apply to parts of strings:
|
||
|
||
@table @code
|
||
@item .text
|
||
This matches the entire contents of a string (excluding the string delimiters,
|
||
i.e.@: the double quotes).
|
||
|
||
@item .escape-sequence
|
||
This matches an escape sequence (starting with a backslash).
|
||
|
||
@item .format-directive
|
||
This matches a format string directive (starting with a @samp{%} sign in the
|
||
case of most programming languages, with a @samp{@{} in the case of
|
||
@code{java-format} and @code{csharp-format}, with a @samp{~} in the case of
|
||
@code{lisp-format} and @code{scheme-format}, or with @samp{$} in the case of
|
||
@code{sh-format}).
|
||
|
||
@item .invalid-format-directive
|
||
This matches an invalid format string directive.
|
||
|
||
@item .added
|
||
In an untranslated string, this matches a part of the string that was not
|
||
present in the previous untranslated string. (Not yet implemented in this
|
||
release.)
|
||
|
||
@item .changed
|
||
In an untranslated string or in a previous untranslated string, this matches
|
||
a part of the string that is changed or replaced. (Not yet implemented in
|
||
this release.)
|
||
|
||
@item .removed
|
||
In a previous untranslated string, this matches a part of the string that
|
||
is not present in the current untranslated string. (Not yet implemented in
|
||
this release.)
|
||
@end table
|
||
@end itemize
|
||
|
||
These selectors can be combined to hierarchical selectors. For example,
|
||
|
||
@smallexample
|
||
.msgstr .invalid-format-directive @{ color: red; @}
|
||
@end smallexample
|
||
|
||
@noindent
|
||
will highlight the invalid format directives in the translated strings.
|
||
|
||
In text mode, pseudo-classes (CSS2 spec, section 5.11) and pseudo-elements
|
||
(CSS2 spec, section 5.12) are not supported.
|
||
|
||
The declarations in HTML mode are not limited; any graphical attribute
|
||
supported by the browsers can be used.
|
||
|
||
The declarations in text mode are limited to the following properties. Other
|
||
properties will be silently ignored.
|
||
|
||
@table @asis
|
||
@item @code{color} (CSS2 spec, section 14.1)
|
||
@itemx @code{background-color} (CSS2 spec, section 14.2.1)
|
||
These properties is supported. Colors will be adjusted to match the terminal's
|
||
capabilities. Note that many terminals support only 8 colors.
|
||
|
||
@item @code{font-weight} (CSS2 spec, section 15.2.3)
|
||
This property is supported, but most terminals can only render two different
|
||
weights: @code{normal} and @code{bold}. Values >= 600 are rendered as
|
||
@code{bold}.
|
||
|
||
@item @code{font-style} (CSS2 spec, section 15.2.3)
|
||
This property is supported. The values @code{italic} and @code{oblique} are
|
||
rendered the same way.
|
||
|
||
@item @code{text-decoration} (CSS2 spec, section 16.3.1)
|
||
This property is supported, limited to the values @code{none} and
|
||
@code{underline}.
|
||
@end table
|
||
|
||
@node Customizing less
|
||
@subsection Customizing @code{less} for viewing PO files
|
||
|
||
The @samp{less} program is a popular text file browser for use in a text
|
||
screen or terminal emulator. It also supports text with embedded escape
|
||
sequences for colors and text decorations.
|
||
|
||
You can use @code{less} to view a PO file like this (assuming an UTF-8
|
||
environment):
|
||
|
||
@smallexample
|
||
msgcat --to-code=UTF-8 --color xyz.po | less -R
|
||
@end smallexample
|
||
|
||
You can simplify this to this simple command:
|
||
|
||
@smallexample
|
||
less xyz.po
|
||
@end smallexample
|
||
|
||
@noindent
|
||
after these three preparations:
|
||
|
||
@enumerate
|
||
@item
|
||
Add the options @samp{-R} and @samp{-f} to the @code{LESS} environment
|
||
variable. In sh shells:
|
||
@smallexample
|
||
$ LESS="$LESS -R -f"
|
||
$ export LESS
|
||
@end smallexample
|
||
|
||
@item
|
||
If your system does not already have the @file{lessopen.sh} and
|
||
@file{lessclose.sh} scripts, create them and set the @code{LESSOPEN} and
|
||
@code{LESSCLOSE} environment variables, as indicated in the manual page
|
||
(@samp{man less}).
|
||
|
||
@item
|
||
Add to @file{lessopen.sh} a piece of script that recognizes PO files
|
||
through their file extension and invokes @code{msgcat} on them, producing
|
||
a temporary file. Like this:
|
||
|
||
@smallexample
|
||
case "$1" in
|
||
*.po)
|
||
tmpfile=`mktemp "$@{TMPDIR-/tmp@}/less.XXXXXX"`
|
||
msgcat --to-code=UTF-8 --color "$1" > "$tmpfile"
|
||
echo "$tmpfile"
|
||
exit 0
|
||
;;
|
||
esac
|
||
@end smallexample
|
||
@end enumerate
|
||
|
||
@node Other tools
|
||
@section Other tools for manipulating PO files
|
||
|
||
@cindex Pology
|
||
The ``Pology'' package is a Free Software package for manipulating PO files.
|
||
It features, in particular:
|
||
|
||
@itemize
|
||
@item
|
||
Examination and in-place modification of collections of PO files.
|
||
@item
|
||
Format-aware diffing and patching of PO files.
|
||
@item
|
||
Handling of version-control branches.
|
||
@item
|
||
Fine-grained asynchronous review workflow.
|
||
@item
|
||
Custom translation validation.
|
||
@item
|
||
Language and project specific support.
|
||
@end itemize
|
||
|
||
Its home page is at @url{http://pology.nedohodnik.net/}.
|
||
|
||
@node libgettextpo
|
||
@section Writing your own programs that process PO files
|
||
|
||
For the tasks for which a combination of @samp{msgattrib}, @samp{msgcat} etc.
|
||
is not sufficient, a set of C functions is provided in a library, to make it
|
||
possible to process PO files in your own programs. When you use this library,
|
||
you don't need to write routines to parse the PO file; instead, you retrieve
|
||
a pointer in memory to each of messages contained in the PO file. Functions
|
||
for writing PO files are not provided at this time.
|
||
|
||
The functions are declared in the header file @samp{<gettext-po.h>}, and are
|
||
defined in a library called @samp{libgettextpo}.
|
||
|
||
@deftp {Data Type} po_file_t
|
||
This is a pointer type that refers to the contents of a PO file, after it has
|
||
been read into memory.
|
||
@end deftp
|
||
|
||
@deftp {Data Type} po_message_iterator_t
|
||
This is a pointer type that refers to an iterator that produces a sequence of
|
||
messages.
|
||
@end deftp
|
||
|
||
@deftp {Data Type} po_message_t
|
||
This is a pointer type that refers to a message of a PO file, including its
|
||
translation.
|
||
@end deftp
|
||
|
||
@deftypefun po_file_t po_file_read (const char *@var{filename})
|
||
The @code{po_file_read} function reads a PO file into memory. The file name
|
||
is given as argument. The return value is a handle to the PO file's contents,
|
||
valid until @code{po_file_free} is called on it. In case of error, the return
|
||
value is @code{NULL}, and @code{errno} is set.
|
||
@end deftypefun
|
||
|
||
@deftypefun void po_file_free (po_file_t @var{file})
|
||
The @code{po_file_free} function frees a PO file's contents from memory,
|
||
including all messages that are only implicitly accessible through iterators.
|
||
@end deftypefun
|
||
|
||
@deftypefun {const char * const *} po_file_domains (po_file_t @var{file})
|
||
The @code{po_file_domains} function returns the domains for which the given
|
||
PO file has messages. The return value is a @code{NULL} terminated array
|
||
which is valid as long as the @var{file} handle is valid. For PO files which
|
||
contain no @samp{domain} directive, the return value contains only one domain,
|
||
namely the default domain @code{"messages"}.
|
||
@end deftypefun
|
||
|
||
@deftypefun po_message_iterator_t po_message_iterator (po_file_t @var{file}, const char *@var{domain})
|
||
The @code{po_message_iterator} returns an iterator that will produce the
|
||
messages of @var{file} that belong to the given @var{domain}. If @var{domain}
|
||
is @code{NULL}, the default domain is used instead. To list the messages,
|
||
use the function @code{po_next_message} repeatedly.
|
||
@end deftypefun
|
||
|
||
@deftypefun void po_message_iterator_free (po_message_iterator_t @var{iterator})
|
||
The @code{po_message_iterator_free} function frees an iterator previously
|
||
allocated through the @code{po_message_iterator} function.
|
||
@end deftypefun
|
||
|
||
@deftypefun po_message_t po_next_message (po_message_iterator_t @var{iterator})
|
||
The @code{po_next_message} function returns the next message from
|
||
@var{iterator} and advances the iterator. It returns @code{NULL} when the
|
||
iterator has reached the end of its message list.
|
||
@end deftypefun
|
||
|
||
The following functions returns details of a @code{po_message_t}. Recall
|
||
that the results are valid as long as the @var{file} handle is valid.
|
||
|
||
@deftypefun {const char *} po_message_msgid (po_message_t @var{message})
|
||
The @code{po_message_msgid} function returns the @code{msgid} (untranslated
|
||
English string) of a message. This is guaranteed to be non-@code{NULL}.
|
||
@end deftypefun
|
||
|
||
@deftypefun {const char *} po_message_msgid_plural (po_message_t @var{message})
|
||
The @code{po_message_msgid_plural} function returns the @code{msgid_plural}
|
||
(untranslated English plural string) of a message with plurals, or @code{NULL}
|
||
for a message without plural.
|
||
@end deftypefun
|
||
|
||
@deftypefun {const char *} po_message_msgstr (po_message_t @var{message})
|
||
The @code{po_message_msgstr} function returns the @code{msgstr} (translation)
|
||
of a message. For an untranslated message, the return value is an empty
|
||
string.
|
||
@end deftypefun
|
||
|
||
@deftypefun {const char *} po_message_msgstr_plural (po_message_t @var{message}, int @var{index})
|
||
The @code{po_message_msgstr_plural} function returns the
|
||
@code{msgstr[@var{index}]} of a message with plurals, or @code{NULL} when
|
||
the @var{index} is out of range or for a message without plural.
|
||
@end deftypefun
|
||
|
||
Here is an example code how these functions can be used.
|
||
|
||
@example
|
||
const char *filename = @dots{};
|
||
po_file_t file = po_file_read (filename);
|
||
|
||
if (file == NULL)
|
||
error (EXIT_FAILURE, errno, "couldn't open the PO file %s", filename);
|
||
@{
|
||
const char * const *domains = po_file_domains (file);
|
||
const char * const *domainp;
|
||
|
||
for (domainp = domains; *domainp; domainp++)
|
||
@{
|
||
const char *domain = *domainp;
|
||
po_message_iterator_t iterator = po_message_iterator (file, domain);
|
||
|
||
for (;;)
|
||
@{
|
||
po_message_t *message = po_next_message (iterator);
|
||
|
||
if (message == NULL)
|
||
break;
|
||
@{
|
||
const char *msgid = po_message_msgid (message);
|
||
const char *msgstr = po_message_msgstr (message);
|
||
|
||
@dots{}
|
||
@}
|
||
@}
|
||
po_message_iterator_free (iterator);
|
||
@}
|
||
@}
|
||
po_file_free (file);
|
||
@end example
|
||
|
||
@node Binaries
|
||
@chapter Producing Binary MO Files
|
||
|
||
@c FIXME: Rewrite.
|
||
|
||
@menu
|
||
* msgfmt Invocation:: Invoking the @code{msgfmt} Program
|
||
* msgunfmt Invocation:: Invoking the @code{msgunfmt} Program
|
||
* MO Files:: The Format of GNU MO Files
|
||
@end menu
|
||
|
||
@node msgfmt Invocation
|
||
@section Invoking the @code{msgfmt} Program
|
||
|
||
@include msgfmt.texi
|
||
|
||
@node msgunfmt Invocation
|
||
@section Invoking the @code{msgunfmt} Program
|
||
|
||
@include msgunfmt.texi
|
||
|
||
@node MO Files
|
||
@section The Format of GNU MO Files
|
||
@cindex MO file's format
|
||
@cindex file format, @file{.mo}
|
||
|
||
The format of the generated MO files is best described by a picture,
|
||
which appears below.
|
||
|
||
@cindex magic signature of MO files
|
||
The first two words serve the identification of the file. The magic
|
||
number will always signal GNU MO files. The number is stored in the
|
||
byte order used when the MO file was generated, so the magic number
|
||
really is two numbers: @code{0x950412de} and @code{0xde120495}.
|
||
|
||
The second word describes the current revision of the file format,
|
||
composed of a major and a minor revision number. The revision numbers
|
||
ensure that the readers of MO files can distinguish new formats from
|
||
old ones and handle their contents, as far as possible. For now the
|
||
major revision is 0 or 1, and the minor revision is also 0 or 1. More
|
||
revisions might be added in the future. A program seeing an unexpected
|
||
major revision number should stop reading the MO file entirely; whereas
|
||
an unexpected minor revision number means that the file can be read but
|
||
will not reveal its full contents, when parsed by a program that
|
||
supports only smaller minor revision numbers.
|
||
|
||
The version is kept
|
||
separate from the magic number, instead of using different magic
|
||
numbers for different formats, mainly because @file{/etc/magic} is
|
||
not updated often.
|
||
|
||
Follow a number of pointers to later tables in the file, allowing
|
||
for the extension of the prefix part of MO files without having to
|
||
recompile programs reading them. This might become useful for later
|
||
inserting a few flag bits, indication about the charset used, new
|
||
tables, or other things.
|
||
|
||
Then, at offset @var{O} and offset @var{T} in the picture, two tables
|
||
of string descriptors can be found. In both tables, each string
|
||
descriptor uses two 32 bits integers, one for the string length,
|
||
another for the offset of the string in the MO file, counting in bytes
|
||
from the start of the file. The first table contains descriptors
|
||
for the original strings, and is sorted so the original strings
|
||
are in increasing lexicographical order. The second table contains
|
||
descriptors for the translated strings, and is parallel to the first
|
||
table: to find the corresponding translation one has to access the
|
||
array slot in the second array with the same index.
|
||
|
||
Having the original strings sorted enables the use of simple binary
|
||
search, for when the MO file does not contain an hashing table, or
|
||
for when it is not practical to use the hashing table provided in
|
||
the MO file. This also has another advantage, as the empty string
|
||
in a PO file GNU @code{gettext} is usually @emph{translated} into
|
||
some system information attached to that particular MO file, and the
|
||
empty string necessarily becomes the first in both the original and
|
||
translated tables, making the system information very easy to find.
|
||
|
||
@cindex hash table, inside MO files
|
||
The size @var{S} of the hash table can be zero. In this case, the
|
||
hash table itself is not contained in the MO file. Some people might
|
||
prefer this because a precomputed hashing table takes disk space, and
|
||
does not win @emph{that} much speed. The hash table contains indices
|
||
to the sorted array of strings in the MO file. Conflict resolution is
|
||
done by double hashing. The precise hashing algorithm used is fairly
|
||
dependent on GNU @code{gettext} code, and is not documented here.
|
||
|
||
As for the strings themselves, they follow the hash file, and each
|
||
is terminated with a @key{NUL}, and this @key{NUL} is not counted in
|
||
the length which appears in the string descriptor. The @code{msgfmt}
|
||
program has an option selecting the alignment for MO file strings.
|
||
With this option, each string is separately aligned so it starts at
|
||
an offset which is a multiple of the alignment value. On some RISC
|
||
machines, a correct alignment will speed things up.
|
||
|
||
@cindex context, in MO files
|
||
Contexts are stored by storing the concatenation of the context, a
|
||
@key{EOT} byte, and the original string, instead of the original string.
|
||
|
||
@cindex plural forms, in MO files
|
||
Plural forms are stored by letting the plural of the original string
|
||
follow the singular of the original string, separated through a
|
||
@key{NUL} byte. The length which appears in the string descriptor
|
||
includes both. However, only the singular of the original string
|
||
takes part in the hash table lookup. The plural variants of the
|
||
translation are all stored consecutively, separated through a
|
||
@key{NUL} byte. Here also, the length in the string descriptor
|
||
includes all of them.
|
||
|
||
Nothing prevents a MO file from having embedded @key{NUL}s in strings.
|
||
However, the program interface currently used already presumes
|
||
that strings are @key{NUL} terminated, so embedded @key{NUL}s are
|
||
somewhat useless. But the MO file format is general enough so other
|
||
interfaces would be later possible, if for example, we ever want to
|
||
implement wide characters right in MO files, where @key{NUL} bytes may
|
||
accidentally appear. (No, we don't want to have wide characters in MO
|
||
files. They would make the file unnecessarily large, and the
|
||
@samp{wchar_t} type being platform dependent, MO files would be
|
||
platform dependent as well.)
|
||
|
||
This particular issue has been strongly debated in the GNU
|
||
@code{gettext} development forum, and it is expectable that MO file
|
||
format will evolve or change over time. It is even possible that many
|
||
formats may later be supported concurrently. But surely, we have to
|
||
start somewhere, and the MO file format described here is a good start.
|
||
Nothing is cast in concrete, and the format may later evolve fairly
|
||
easily, so we should feel comfortable with the current approach.
|
||
|
||
@example
|
||
@group
|
||
byte
|
||
+------------------------------------------+
|
||
0 | magic number = 0x950412de |
|
||
| |
|
||
4 | file format revision = 0 |
|
||
| |
|
||
8 | number of strings | == N
|
||
| |
|
||
12 | offset of table with original strings | == O
|
||
| |
|
||
16 | offset of table with translation strings | == T
|
||
| |
|
||
20 | size of hashing table | == S
|
||
| |
|
||
24 | offset of hashing table | == H
|
||
| |
|
||
. .
|
||
. (possibly more entries later) .
|
||
. .
|
||
| |
|
||
O | length & offset 0th string ----------------.
|
||
O + 8 | length & offset 1st string ------------------.
|
||
... ... | |
|
||
O + ((N-1)*8)| length & offset (N-1)th string | | |
|
||
| | | |
|
||
T | length & offset 0th translation ---------------.
|
||
T + 8 | length & offset 1st translation -----------------.
|
||
... ... | | | |
|
||
T + ((N-1)*8)| length & offset (N-1)th translation | | | | |
|
||
| | | | | |
|
||
H | start hash table | | | | |
|
||
... ... | | | |
|
||
H + S * 4 | end hash table | | | | |
|
||
| | | | | |
|
||
| NUL terminated 0th string <----------------' | | |
|
||
| | | | |
|
||
| NUL terminated 1st string <------------------' | |
|
||
| | | |
|
||
... ... | |
|
||
| | | |
|
||
| NUL terminated 0th translation <---------------' |
|
||
| | |
|
||
| NUL terminated 1st translation <-----------------'
|
||
| |
|
||
... ...
|
||
| |
|
||
+------------------------------------------+
|
||
@end group
|
||
@end example
|
||
|
||
@node Programmers
|
||
@chapter The Programmer's View
|
||
|
||
@c FIXME: Reorganize whole chapter.
|
||
|
||
One aim of the current message catalog implementation provided by
|
||
GNU @code{gettext} was to use the system's message catalog handling, if the
|
||
installer wishes to do so. So we perhaps should first take a look at
|
||
the solutions we know about. The people in the POSIX committee did not
|
||
manage to agree on one of the semi-official standards which we'll
|
||
describe below. In fact they couldn't agree on anything, so they decided
|
||
only to include an example of an interface. The major Unix vendors
|
||
are split in the usage of the two most important specifications: X/Open's
|
||
catgets vs. Uniforum's gettext interface. We'll describe them both and
|
||
later explain our solution of this dilemma.
|
||
|
||
@menu
|
||
* catgets:: About @code{catgets}
|
||
* gettext:: About @code{gettext}
|
||
* Comparison:: Comparing the two interfaces
|
||
* Using libintl.a:: Using libintl.a in own programs
|
||
* gettext grok:: Being a @code{gettext} grok
|
||
* Temp Programmers:: Temporary Notes for the Programmers Chapter
|
||
@end menu
|
||
|
||
@node catgets
|
||
@section About @code{catgets}
|
||
@cindex @code{catgets}, X/Open specification
|
||
|
||
The @code{catgets} implementation is defined in the X/Open Portability
|
||
Guide, Volume 3, XSI Supplementary Definitions, Chapter 5. But the
|
||
process of creating this standard seemed to be too slow for some of
|
||
the Unix vendors so they created their implementations on preliminary
|
||
versions of the standard. Of course this leads again to problems while
|
||
writing platform independent programs: even the usage of @code{catgets}
|
||
does not guarantee a unique interface.
|
||
|
||
Another, personal comment on this that only a bunch of committee members
|
||
could have made this interface. They never really tried to program
|
||
using this interface. It is a fast, memory-saving implementation, an
|
||
user can happily live with it. But programmers hate it (at least I and
|
||
some others do@dots{})
|
||
|
||
But we must not forget one point: after all the trouble with transferring
|
||
the rights on Unix they at last came to X/Open, the very same who
|
||
published this specification. This leads me to making the prediction
|
||
that this interface will be in future Unix standards (e.g.@: Spec1170) and
|
||
therefore part of all Unix implementation (implementations, which are
|
||
@emph{allowed} to wear this name).
|
||
|
||
@menu
|
||
* Interface to catgets:: The interface
|
||
* Problems with catgets:: Problems with the @code{catgets} interface?!
|
||
@end menu
|
||
|
||
@node Interface to catgets
|
||
@subsection The Interface
|
||
@cindex interface to @code{catgets}
|
||
|
||
The interface to the @code{catgets} implementation consists of three
|
||
functions which correspond to those used in file access: @code{catopen}
|
||
to open the catalog for using, @code{catgets} for accessing the message
|
||
tables, and @code{catclose} for closing after work is done. Prototypes
|
||
for the functions and the needed definitions are in the
|
||
@code{<nl_types.h>} header file.
|
||
|
||
@cindex @code{catopen}, a @code{catgets} function
|
||
@code{catopen} is used like in this:
|
||
|
||
@example
|
||
nl_catd catd = catopen ("catalog_name", 0);
|
||
@end example
|
||
|
||
The function takes as the argument the name of the catalog. This usual
|
||
refers to the name of the program or the package. The second parameter
|
||
is not further specified in the standard. I don't even know whether it
|
||
is implemented consistently among various systems. So the common advice
|
||
is to use @code{0} as the value. The return value is a handle to the
|
||
message catalog, equivalent to handles to file returned by @code{open}.
|
||
|
||
@cindex @code{catgets}, a @code{catgets} function
|
||
This handle is of course used in the @code{catgets} function which can
|
||
be used like this:
|
||
|
||
@example
|
||
char *translation = catgets (catd, set_no, msg_id, "original string");
|
||
@end example
|
||
|
||
The first parameter is this catalog descriptor. The second parameter
|
||
specifies the set of messages in this catalog, in which the message
|
||
described by @code{msg_id} is obtained. @code{catgets} therefore uses a
|
||
three-stage addressing:
|
||
|
||
@display
|
||
catalog name @result{} set number @result{} message ID @result{} translation
|
||
@end display
|
||
|
||
@c Anybody else loving Haskell??? :-) -- Uli
|
||
|
||
The fourth argument is not used to address the translation. It is given
|
||
as a default value in case when one of the addressing stages fail. One
|
||
important thing to remember is that although the return type of catgets
|
||
is @code{char *} the resulting string @emph{must not} be changed. It
|
||
should better be @code{const char *}, but the standard is published in
|
||
1988, one year before ANSI C.
|
||
|
||
@noindent
|
||
@cindex @code{catclose}, a @code{catgets} function
|
||
The last of these functions is used and behaves as expected:
|
||
|
||
@example
|
||
catclose (catd);
|
||
@end example
|
||
|
||
After this no @code{catgets} call using the descriptor is legal anymore.
|
||
|
||
@node Problems with catgets
|
||
@subsection Problems with the @code{catgets} Interface?!
|
||
@cindex problems with @code{catgets} interface
|
||
|
||
Now that this description seemed to be really easy --- where are the
|
||
problems we speak of? In fact the interface could be used in a
|
||
reasonable way, but constructing the message catalogs is a pain. The
|
||
reason for this lies in the third argument of @code{catgets}: the unique
|
||
message ID. This has to be a numeric value for all messages in a single
|
||
set. Perhaps you could imagine the problems keeping such a list while
|
||
changing the source code. Add a new message here, remove one there. Of
|
||
course there have been developed a lot of tools helping to organize this
|
||
chaos but one as the other fails in one aspect or the other. We don't
|
||
want to say that the other approach has no problems but they are far
|
||
more easy to manage.
|
||
|
||
@node gettext
|
||
@section About @code{gettext}
|
||
@cindex @code{gettext}, a programmer's view
|
||
|
||
The definition of the @code{gettext} interface comes from a Uniforum
|
||
proposal. It was submitted there by Sun, who had implemented the
|
||
@code{gettext} function in SunOS 4, around 1990. Nowadays, the
|
||
@code{gettext} interface is specified by the OpenI18N standard.
|
||
|
||
The main point about this solution is that it does not follow the
|
||
method of normal file handling (open-use-close) and that it does not
|
||
burden the programmer with so many tasks, especially the unique key handling.
|
||
Of course here also a unique key is needed, but this key is the message
|
||
itself (how long or short it is). See @ref{Comparison} for a more
|
||
detailed comparison of the two methods.
|
||
|
||
The following section contains a rather detailed description of the
|
||
interface. We make it that detailed because this is the interface
|
||
we chose for the GNU @code{gettext} Library. Programmers interested
|
||
in using this library will be interested in this description.
|
||
|
||
@menu
|
||
* Interface to gettext:: The interface
|
||
* Ambiguities:: Solving ambiguities
|
||
* Locating Catalogs:: Locating message catalog files
|
||
* Charset conversion:: How to request conversion to Unicode
|
||
* Contexts:: Solving ambiguities in GUI programs
|
||
* Plural forms:: Additional functions for handling plurals
|
||
* Optimized gettext:: Optimization of the *gettext functions
|
||
@end menu
|
||
|
||
@node Interface to gettext
|
||
@subsection The Interface
|
||
@cindex @code{gettext} interface
|
||
|
||
The minimal functionality an interface must have is a) to select a
|
||
domain the strings are coming from (a single domain for all programs is
|
||
not reasonable because its construction and maintenance is difficult,
|
||
perhaps impossible) and b) to access a string in a selected domain.
|
||
|
||
This is principally the description of the @code{gettext} interface. It
|
||
has a global domain which unqualified usages reference. Of course this
|
||
domain is selectable by the user.
|
||
|
||
@example
|
||
char *textdomain (const char *domain_name);
|
||
@end example
|
||
|
||
This provides the possibility to change or query the current status of
|
||
the current global domain of the @code{LC_MESSAGE} category. The
|
||
argument is a null-terminated string, whose characters must be legal in
|
||
the use in filenames. If the @var{domain_name} argument is @code{NULL},
|
||
the function returns the current value. If no value has been set
|
||
before, the name of the default domain is returned: @emph{messages}.
|
||
Please note that although the return value of @code{textdomain} is of
|
||
type @code{char *} no changing is allowed. It is also important to know
|
||
that no checks of the availability are made. If the name is not
|
||
available you will see this by the fact that no translations are provided.
|
||
|
||
@noindent
|
||
To use a domain set by @code{textdomain} the function
|
||
|
||
@example
|
||
char *gettext (const char *msgid);
|
||
@end example
|
||
|
||
@noindent
|
||
is to be used. This is the simplest reasonable form one can imagine.
|
||
The translation of the string @var{msgid} is returned if it is available
|
||
in the current domain. If it is not available, the argument itself is
|
||
returned. If the argument is @code{NULL} the result is undefined.
|
||
|
||
One thing which should come into mind is that no explicit dependency to
|
||
the used domain is given. The current value of the domain is used.
|
||
If this changes between two
|
||
executions of the same @code{gettext} call in the program, both calls
|
||
reference a different message catalog.
|
||
|
||
For the easiest case, which is normally used in internationalized
|
||
packages, once at the beginning of execution a call to @code{textdomain}
|
||
is issued, setting the domain to a unique name, normally the package
|
||
name. In the following code all strings which have to be translated are
|
||
filtered through the gettext function. That's all, the package speaks
|
||
your language.
|
||
|
||
@node Ambiguities
|
||
@subsection Solving Ambiguities
|
||
@cindex several domains
|
||
@cindex domain ambiguities
|
||
@cindex large package
|
||
|
||
While this single name domain works well for most applications there
|
||
might be the need to get translations from more than one domain. Of
|
||
course one could switch between different domains with calls to
|
||
@code{textdomain}, but this is really not convenient nor is it fast. A
|
||
possible situation could be one case subject to discussion during this
|
||
writing: all
|
||
error messages of functions in the set of common used functions should
|
||
go into a separate domain @code{error}. By this mean we would only need
|
||
to translate them once.
|
||
Another case are messages from a library, as these @emph{have} to be
|
||
independent of the current domain set by the application.
|
||
|
||
@noindent
|
||
For this reasons there are two more functions to retrieve strings:
|
||
|
||
@example
|
||
char *dgettext (const char *domain_name, const char *msgid);
|
||
char *dcgettext (const char *domain_name, const char *msgid,
|
||
int category);
|
||
@end example
|
||
|
||
Both take an additional argument at the first place, which corresponds
|
||
to the argument of @code{textdomain}. The third argument of
|
||
@code{dcgettext} allows to use another locale category but @code{LC_MESSAGES}.
|
||
But I really don't know where this can be useful. If the
|
||
@var{domain_name} is @code{NULL} or @var{category} has an value beside
|
||
the known ones, the result is undefined. It should also be noted that
|
||
this function is not part of the second known implementation of this
|
||
function family, the one found in Solaris.
|
||
|
||
A second ambiguity can arise by the fact, that perhaps more than one
|
||
domain has the same name. This can be solved by specifying where the
|
||
needed message catalog files can be found.
|
||
|
||
@example
|
||
char *bindtextdomain (const char *domain_name,
|
||
const char *dir_name);
|
||
@end example
|
||
|
||
Calling this function binds the given domain to a file in the specified
|
||
directory (how this file is determined follows below). Especially a
|
||
file in the systems default place is not favored against the specified
|
||
file anymore (as it would be by solely using @code{textdomain}). A
|
||
@code{NULL} pointer for the @var{dir_name} parameter returns the binding
|
||
associated with @var{domain_name}. If @var{domain_name} itself is
|
||
@code{NULL} nothing happens and a @code{NULL} pointer is returned. Here
|
||
again as for all the other functions is true that none of the return
|
||
value must be changed!
|
||
|
||
It is important to remember that relative path names for the
|
||
@var{dir_name} parameter can be trouble. Since the path is always
|
||
computed relative to the current directory different results will be
|
||
achieved when the program executes a @code{chdir} command. Relative
|
||
paths should always be avoided to avoid dependencies and
|
||
unreliabilities.
|
||
|
||
@example
|
||
wchar_t *wbindtextdomain (const char *domain_name,
|
||
const wchar_t *dir_name);
|
||
@end example
|
||
|
||
This function is provided only on native Windows platforms. It is like
|
||
@code{bindtextdomain}, except that the @var{dir_name} parameter is a
|
||
wide string (in UTF-16 encoding, as usual on Windows).
|
||
|
||
@node Locating Catalogs
|
||
@subsection Locating Message Catalog Files
|
||
@cindex message catalog files location
|
||
|
||
Because many different languages for many different packages have to be
|
||
stored we need some way to add these information to file message catalog
|
||
files. The way usually used in Unix environments is have this encoding
|
||
in the file name. This is also done here. The directory name given in
|
||
@code{bindtextdomain}s second argument (or the default directory),
|
||
followed by the name of the locale, the locale category, and the domain name
|
||
are concatenated:
|
||
|
||
@example
|
||
@var{dir_name}/@var{locale}/LC_@var{category}/@var{domain_name}.mo
|
||
@end example
|
||
|
||
The default value for @var{dir_name} is system specific. For the GNU
|
||
library, and for packages adhering to its conventions, it's:
|
||
@example
|
||
/usr/local/share/locale
|
||
@end example
|
||
|
||
@noindent
|
||
@var{locale} is the name of the locale category which is designated by
|
||
@code{LC_@var{category}}. For @code{gettext} and @code{dgettext} this
|
||
@code{LC_@var{category}} is always @code{LC_MESSAGES}.@footnote{Some
|
||
system, e.g.@: mingw, don't have @code{LC_MESSAGES}. Here we use a more or
|
||
less arbitrary value for it, namely 1729, the smallest positive integer
|
||
which can be represented in two different ways as the sum of two cubes.}
|
||
The name of the locale category is determined through
|
||
@code{setlocale (LC_@var{category}, NULL)}.
|
||
@footnote{When the system does not support @code{setlocale} its behavior
|
||
in setting the locale values is simulated by looking at the environment
|
||
variables.}
|
||
When using the function @code{dcgettext}, you can specify the locale category
|
||
through the third argument.
|
||
|
||
@node Charset conversion
|
||
@subsection How to specify the output character set @code{gettext} uses
|
||
@cindex charset conversion at runtime
|
||
@cindex encoding conversion at runtime
|
||
|
||
@code{gettext} not only looks up a translation in a message catalog. It
|
||
also converts the translation on the fly to the desired output character
|
||
set. This is useful if the user is working in a different character set
|
||
than the translator who created the message catalog, because it avoids
|
||
distributing variants of message catalogs which differ only in the
|
||
character set.
|
||
|
||
The output character set is, by default, the value of @code{nl_langinfo
|
||
(CODESET)}, which depends on the @code{LC_CTYPE} part of the current
|
||
locale. But programs which store strings in a locale independent way
|
||
(e.g.@: UTF-8) can request that @code{gettext} and related functions
|
||
return the translations in that encoding, by use of the
|
||
@code{bind_textdomain_codeset} function.
|
||
|
||
Note that the @var{msgid} argument to @code{gettext} is not subject to
|
||
character set conversion. Also, when @code{gettext} does not find a
|
||
translation for @var{msgid}, it returns @var{msgid} unchanged --
|
||
independently of the current output character set. It is therefore
|
||
recommended that all @var{msgid}s be US-ASCII strings.
|
||
|
||
@deftypefun {char *} bind_textdomain_codeset (const char *@var{domainname}, const char *@var{codeset})
|
||
The @code{bind_textdomain_codeset} function can be used to specify the
|
||
output character set for message catalogs for domain @var{domainname}.
|
||
The @var{codeset} argument must be a valid codeset name which can be used
|
||
for the @code{iconv_open} function, or a null pointer.
|
||
|
||
If the @var{codeset} parameter is the null pointer,
|
||
@code{bind_textdomain_codeset} returns the currently selected codeset
|
||
for the domain with the name @var{domainname}. It returns @code{NULL} if
|
||
no codeset has yet been selected.
|
||
|
||
The @code{bind_textdomain_codeset} function can be used several times.
|
||
If used multiple times with the same @var{domainname} argument, the
|
||
later call overrides the settings made by the earlier one.
|
||
|
||
The @code{bind_textdomain_codeset} function returns a pointer to a
|
||
string containing the name of the selected codeset. The string is
|
||
allocated internally in the function and must not be changed by the
|
||
user. If the system went out of core during the execution of
|
||
@code{bind_textdomain_codeset}, the return value is @code{NULL} and the
|
||
global variable @var{errno} is set accordingly.
|
||
@end deftypefun
|
||
|
||
@node Contexts
|
||
@subsection Using contexts for solving ambiguities
|
||
@cindex context
|
||
@cindex GUI programs
|
||
@cindex translating menu entries
|
||
@cindex menu entries
|
||
|
||
One place where the @code{gettext} functions, if used normally, have big
|
||
problems is within programs with graphical user interfaces (GUIs). The
|
||
problem is that many of the strings which have to be translated are very
|
||
short. They have to appear in pull-down menus which restricts the
|
||
length. But strings which are not containing entire sentences or at
|
||
least large fragments of a sentence may appear in more than one
|
||
situation in the program but might have different translations. This is
|
||
especially true for the one-word strings which are frequently used in
|
||
GUI programs.
|
||
|
||
As a consequence many people say that the @code{gettext} approach is
|
||
wrong and instead @code{catgets} should be used which indeed does not
|
||
have this problem. But there is a very simple and powerful method to
|
||
handle this kind of problems with the @code{gettext} functions.
|
||
|
||
Contexts can be added to strings to be translated. A context dependent
|
||
translation lookup is when a translation for a given string is searched,
|
||
that is limited to a given context. The translation for the same string
|
||
in a different context can be different. The different translations of
|
||
the same string in different contexts can be stored in the in the same
|
||
MO file, and can be edited by the translator in the same PO file.
|
||
|
||
The @file{gettext.h} include file contains the lookup macros for strings
|
||
with contexts. They are implemented as thin macros and inline functions
|
||
over the functions from @code{<libintl.h>}.
|
||
|
||
@findex pgettext
|
||
@example
|
||
const char *pgettext (const char *msgctxt, const char *msgid);
|
||
@end example
|
||
|
||
In a call of this macro, @var{msgctxt} and @var{msgid} must be string
|
||
literals. The macro returns the translation of @var{msgid}, restricted
|
||
to the context given by @var{msgctxt}.
|
||
|
||
The @var{msgctxt} string is visible in the PO file to the translator.
|
||
You should try to make it somehow canonical and never changing. Because
|
||
every time you change an @var{msgctxt}, the translator will have to review
|
||
the translation of @var{msgid}.
|
||
|
||
Finding a canonical @var{msgctxt} string that doesn't change over time can
|
||
be hard. But you shouldn't use the file name or class name containing the
|
||
@code{pgettext} call -- because it is a common development task to rename
|
||
a file or a class, and it shouldn't cause translator work. Also you shouldn't
|
||
use a comment in the form of a complete English sentence as @var{msgctxt} --
|
||
because orthography or grammar changes are often applied to such sentences,
|
||
and again, it shouldn't force the translator to do a review.
|
||
|
||
The @samp{p} in @samp{pgettext} stands for ``particular'': @code{pgettext}
|
||
fetches a particular translation of the @var{msgid}.
|
||
|
||
@findex dpgettext
|
||
@findex dcpgettext
|
||
@example
|
||
const char *dpgettext (const char *domain_name,
|
||
const char *msgctxt, const char *msgid);
|
||
const char *dcpgettext (const char *domain_name,
|
||
const char *msgctxt, const char *msgid,
|
||
int category);
|
||
@end example
|
||
|
||
These are generalizations of @code{pgettext}. They behave similarly to
|
||
@code{dgettext} and @code{dcgettext}, respectively. The @var{domain_name}
|
||
argument defines the translation domain. The @var{category} argument
|
||
allows to use another locale category than @code{LC_MESSAGES}.
|
||
|
||
As as example consider the following fictional situation. A GUI program
|
||
has a menu bar with the following entries:
|
||
|
||
@smallexample
|
||
+------------+------------+--------------------------------------+
|
||
| File | Printer | |
|
||
+------------+------------+--------------------------------------+
|
||
| Open | | Select |
|
||
| New | | Open |
|
||
+----------+ | Connect |
|
||
+----------+
|
||
@end smallexample
|
||
|
||
To have the strings @code{File}, @code{Printer}, @code{Open},
|
||
@code{New}, @code{Select}, and @code{Connect} translated there has to be
|
||
at some point in the code a call to a function of the @code{gettext}
|
||
family. But in two places the string passed into the function would be
|
||
@code{Open}. The translations might not be the same and therefore we
|
||
are in the dilemma described above.
|
||
|
||
What distinguishes the two places is the menu path from the menu root to
|
||
the particular menu entries:
|
||
|
||
@smallexample
|
||
Menu|File
|
||
Menu|Printer
|
||
Menu|File|Open
|
||
Menu|File|New
|
||
Menu|Printer|Select
|
||
Menu|Printer|Open
|
||
Menu|Printer|Connect
|
||
@end smallexample
|
||
|
||
The context is thus the menu path without its last part. So, the calls
|
||
look like this:
|
||
|
||
@smallexample
|
||
pgettext ("Menu|", "File")
|
||
pgettext ("Menu|", "Printer")
|
||
pgettext ("Menu|File|", "Open")
|
||
pgettext ("Menu|File|", "New")
|
||
pgettext ("Menu|Printer|", "Select")
|
||
pgettext ("Menu|Printer|", "Open")
|
||
pgettext ("Menu|Printer|", "Connect")
|
||
@end smallexample
|
||
|
||
Whether or not to use the @samp{|} character at the end of the context is a
|
||
matter of style.
|
||
|
||
For more complex cases, where the @var{msgctxt} or @var{msgid} are not
|
||
string literals, more general macros are available:
|
||
|
||
@findex pgettext_expr
|
||
@findex dpgettext_expr
|
||
@findex dcpgettext_expr
|
||
@example
|
||
const char *pgettext_expr (const char *msgctxt, const char *msgid);
|
||
const char *dpgettext_expr (const char *domain_name,
|
||
const char *msgctxt, const char *msgid);
|
||
const char *dcpgettext_expr (const char *domain_name,
|
||
const char *msgctxt, const char *msgid,
|
||
int category);
|
||
@end example
|
||
|
||
Here @var{msgctxt} and @var{msgid} can be arbitrary string-valued expressions.
|
||
These macros are more general. But in the case that both argument expressions
|
||
are string literals, the macros without the @samp{_expr} suffix are more
|
||
efficient.
|
||
|
||
@node Plural forms
|
||
@subsection Additional functions for plural forms
|
||
@cindex plural forms
|
||
|
||
The functions of the @code{gettext} family described so far (and all the
|
||
@code{catgets} functions as well) have one problem in the real world
|
||
which have been neglected completely in all existing approaches. What
|
||
is meant here is the handling of plural forms.
|
||
|
||
Looking through Unix source code before the time anybody thought about
|
||
internationalization (and, sadly, even afterwards) one can often find
|
||
code similar to the following:
|
||
|
||
@smallexample
|
||
printf ("%d file%s deleted", n, n == 1 ? "" : "s");
|
||
@end smallexample
|
||
|
||
@noindent
|
||
After the first complaints from people internationalizing the code people
|
||
either completely avoided formulations like this or used strings like
|
||
@code{"file(s)"}. Both look unnatural and should be avoided. First
|
||
tries to solve the problem correctly looked like this:
|
||
|
||
@smallexample
|
||
if (n == 1)
|
||
printf ("%d file deleted", n);
|
||
else
|
||
printf ("%d files deleted", n);
|
||
@end smallexample
|
||
|
||
But this does not solve the problem. It helps languages where the
|
||
plural form of a noun is not simply constructed by adding an
|
||
@ifhtml
|
||
‘s’
|
||
@end ifhtml
|
||
@ifnothtml
|
||
`s'
|
||
@end ifnothtml
|
||
but that is all. Once again people fell into the trap of believing the
|
||
rules their language is using are universal. But the handling of plural
|
||
forms differs widely between the language families. For example,
|
||
Rafal Maszkowski @code{<rzm@@mat.uni.torun.pl>} reports:
|
||
|
||
@quotation
|
||
In Polish we use e.g.@: plik (file) this way:
|
||
@example
|
||
1 plik
|
||
2,3,4 pliki
|
||
5-21 pliko'w
|
||
22-24 pliki
|
||
25-31 pliko'w
|
||
@end example
|
||
and so on (o' means 8859-2 oacute which should be rather okreska,
|
||
similar to aogonek).
|
||
@end quotation
|
||
|
||
There are two things which can differ between languages (and even inside
|
||
language families);
|
||
|
||
@itemize @bullet
|
||
@item
|
||
The form how plural forms are built differs. This is a problem with
|
||
languages which have many irregularities. German, for instance, is a
|
||
drastic case. Though English and German are part of the same language
|
||
family (Germanic), the almost regular forming of plural noun forms
|
||
(appending an
|
||
@ifhtml
|
||
‘s’)
|
||
@end ifhtml
|
||
@ifnothtml
|
||
`s')
|
||
@end ifnothtml
|
||
is hardly found in German.
|
||
|
||
@item
|
||
The number of plural forms differ. This is somewhat surprising for
|
||
those who only have experiences with Romanic and Germanic languages
|
||
since here the number is the same (there are two).
|
||
|
||
But other language families have only one form or many forms. More
|
||
information on this in an extra section.
|
||
@end itemize
|
||
|
||
The consequence of this is that application writers should not try to
|
||
solve the problem in their code. This would be localization since it is
|
||
only usable for certain, hardcoded language environments. Instead the
|
||
extended @code{gettext} interface should be used.
|
||
|
||
These extra functions are taking instead of the one key string two
|
||
strings and a numerical argument. The idea behind this is that using
|
||
the numerical argument and the first string as a key, the implementation
|
||
can select using rules specified by the translator the right plural
|
||
form. The two string arguments then will be used to provide a return
|
||
value in case no message catalog is found (similar to the normal
|
||
@code{gettext} behavior). In this case the rules for Germanic language
|
||
is used and it is assumed that the first string argument is the singular
|
||
form, the second the plural form.
|
||
|
||
This has the consequence that programs without language catalogs can
|
||
display the correct strings only if the program itself is written using
|
||
a Germanic language. This is a limitation but since the GNU C library
|
||
(as well as the GNU @code{gettext} package) are written as part of the
|
||
GNU package and the coding standards for the GNU project require program
|
||
being written in English, this solution nevertheless fulfills its
|
||
purpose.
|
||
|
||
@deftypefun {char *} ngettext (const char *@var{msgid1}, const char *@var{msgid2}, unsigned long int @var{n})
|
||
The @code{ngettext} function is similar to the @code{gettext} function
|
||
as it finds the message catalogs in the same way. But it takes two
|
||
extra arguments. The @var{msgid1} parameter must contain the singular
|
||
form of the string to be converted. It is also used as the key for the
|
||
search in the catalog. The @var{msgid2} parameter is the plural form.
|
||
The parameter @var{n} is used to determine the plural form. If no
|
||
message catalog is found @var{msgid1} is returned if @code{n == 1},
|
||
otherwise @code{msgid2}.
|
||
|
||
An example for the use of this function is:
|
||
|
||
@smallexample
|
||
printf (ngettext ("%d file removed", "%d files removed", n), n);
|
||
@end smallexample
|
||
|
||
Please note that the numeric value @var{n} has to be passed to the
|
||
@code{printf} function as well. It is not sufficient to pass it only to
|
||
@code{ngettext}.
|
||
|
||
In the English singular case, the number -- always 1 -- can be replaced with
|
||
"one":
|
||
|
||
@smallexample
|
||
printf (ngettext ("One file removed", "%d files removed", n), n);
|
||
@end smallexample
|
||
|
||
@noindent
|
||
This works because the @samp{printf} function discards excess arguments that
|
||
are not consumed by the format string.
|
||
|
||
If this function is meant to yield a format string that takes two or more
|
||
arguments, you can not use it like this:
|
||
|
||
@smallexample
|
||
printf (ngettext ("%d file removed from directory %s",
|
||
"%d files removed from directory %s",
|
||
n),
|
||
n, dir);
|
||
@end smallexample
|
||
|
||
@noindent
|
||
because in many languages the translators want to replace the @samp{%d}
|
||
with an explicit word in the singular case, just like ``one'' in English,
|
||
and C format strings cannot consume the second argument but skip the first
|
||
argument. Instead, you have to reorder the arguments so that @samp{n}
|
||
comes last:
|
||
|
||
@smallexample
|
||
printf (ngettext ("%2$d file removed from directory %1$s",
|
||
"%2$d files removed from directory %1$s",
|
||
n),
|
||
dir, n);
|
||
@end smallexample
|
||
|
||
@noindent
|
||
See @ref{c-format} for details about this argument reordering syntax.
|
||
|
||
When you know that the value of @code{n} is within a given range, you can
|
||
specify it as a comment directed to the @code{xgettext} tool. This
|
||
information may help translators to use more adequate translations. Like
|
||
this:
|
||
|
||
@smallexample
|
||
if (days > 7 && days < 14)
|
||
/* xgettext: range: 1..6 */
|
||
printf (ngettext ("one week and one day", "one week and %d days",
|
||
days - 7),
|
||
days - 7);
|
||
@end smallexample
|
||
|
||
It is also possible to use this function when the strings don't contain a
|
||
cardinal number:
|
||
|
||
@smallexample
|
||
puts (ngettext ("Delete the selected file?",
|
||
"Delete the selected files?",
|
||
n));
|
||
@end smallexample
|
||
|
||
In this case the number @var{n} is only used to choose the plural form.
|
||
@end deftypefun
|
||
|
||
@deftypefun {char *} dngettext (const char *@var{domain}, const char *@var{msgid1}, const char *@var{msgid2}, unsigned long int @var{n})
|
||
The @code{dngettext} is similar to the @code{dgettext} function in the
|
||
way the message catalog is selected. The difference is that it takes
|
||
two extra parameter to provide the correct plural form. These two
|
||
parameters are handled in the same way @code{ngettext} handles them.
|
||
@end deftypefun
|
||
|
||
@deftypefun {char *} dcngettext (const char *@var{domain}, const char *@var{msgid1}, const char *@var{msgid2}, unsigned long int @var{n}, int @var{category})
|
||
The @code{dcngettext} is similar to the @code{dcgettext} function in the
|
||
way the message catalog is selected. The difference is that it takes
|
||
two extra parameter to provide the correct plural form. These two
|
||
parameters are handled in the same way @code{ngettext} handles them.
|
||
@end deftypefun
|
||
|
||
Now, how do these functions solve the problem of the plural forms?
|
||
Without the input of linguists (which was not available) it was not
|
||
possible to determine whether there are only a few different forms in
|
||
which plural forms are formed or whether the number can increase with
|
||
every new supported language.
|
||
|
||
Therefore the solution implemented is to allow the translator to specify
|
||
the rules of how to select the plural form. Since the formula varies
|
||
with every language this is the only viable solution except for
|
||
hardcoding the information in the code (which still would require the
|
||
possibility of extensions to not prevent the use of new languages).
|
||
|
||
@cindex specifying plural form in a PO file
|
||
@kwindex nplurals@r{, in a PO file header}
|
||
@kwindex plural@r{, in a PO file header}
|
||
The information about the plural form selection has to be stored in the
|
||
header entry of the PO file (the one with the empty @code{msgid} string).
|
||
The plural form information looks like this:
|
||
|
||
@smallexample
|
||
Plural-Forms: nplurals=2; plural=n == 1 ? 0 : 1;
|
||
@end smallexample
|
||
|
||
The @code{nplurals} value must be a decimal number which specifies how
|
||
many different plural forms exist for this language. The string
|
||
following @code{plural} is an expression which is using the C language
|
||
syntax. Exceptions are that no negative numbers are allowed, numbers
|
||
must be decimal, and the only variable allowed is @code{n}. Spaces are
|
||
allowed in the expression, but backslash-newlines are not; in the
|
||
examples below the backslash-newlines are present for formatting purposes
|
||
only. This expression will be evaluated whenever one of the functions
|
||
@code{ngettext}, @code{dngettext}, or @code{dcngettext} is called. The
|
||
numeric value passed to these functions is then substituted for all uses
|
||
of the variable @code{n} in the expression. The resulting value then
|
||
must be greater or equal to zero and smaller than the value given as the
|
||
value of @code{nplurals}.
|
||
|
||
@noindent
|
||
@cindex plural form formulas
|
||
The following rules are known at this point. The language with families
|
||
are listed. But this does not necessarily mean the information can be
|
||
generalized for the whole family (as can be easily seen in the table
|
||
below).@footnote{Additions are welcome. Send appropriate information to
|
||
@email{bug-gettext@@gnu.org} and @email{bug-glibc-manual@@gnu.org}.
|
||
The Unicode CLDR Project (@uref{http://cldr.unicode.org}) provides a
|
||
comprehensive set of plural forms in a different format. The
|
||
@code{msginit} program has preliminary support for the format so you can
|
||
use it as a baseline (@pxref{msginit Invocation}).}
|
||
|
||
@table @asis
|
||
@item Only one form:
|
||
Some languages only require one single form. There is no distinction
|
||
between the singular and plural form. An appropriate header entry
|
||
would look like this:
|
||
|
||
@smallexample
|
||
Plural-Forms: nplurals=1; plural=0;
|
||
@end smallexample
|
||
|
||
@noindent
|
||
Languages with this property include:
|
||
|
||
@table @asis
|
||
@item Asian family
|
||
Japanese, @c 122.1 million speakers
|
||
Vietnamese, @c 68.6 million speakers
|
||
Korean @c 66.3 million speakers
|
||
@item Tai-Kadai family
|
||
Thai @c 20.4 million speakers
|
||
@end table
|
||
|
||
@item Two forms, singular used for one only
|
||
This is the form used in most existing programs since it is what English
|
||
is using. A header entry would look like this:
|
||
|
||
@smallexample
|
||
Plural-Forms: nplurals=2; plural=n != 1;
|
||
@end smallexample
|
||
|
||
(Note: this uses the feature of C expressions that boolean expressions
|
||
have to value zero or one.)
|
||
|
||
@noindent
|
||
Languages with this property include:
|
||
|
||
@table @asis
|
||
@item Germanic family
|
||
English, @c 328.0 million speakers
|
||
German, @c 96.9 million speakers
|
||
Dutch, @c 21.7 million speakers
|
||
Swedish, @c 8.3 million speakers
|
||
Danish, @c 5.6 million speakers
|
||
Norwegian, @c 4.6 million speakers
|
||
Faroese @c 0.05 million speakers
|
||
@item Romanic family
|
||
Spanish, @c 328.5 million speakers
|
||
Portuguese, @c 178.0 million speakers - 163 million Brazilian Portuguese
|
||
Italian @c 61.7 million speakers
|
||
@item Latin/Greek family
|
||
Greek @c 13.1 million speakers
|
||
@item Slavic family
|
||
Bulgarian @c 9.1 million speakers
|
||
@item Finno-Ugric family
|
||
Finnish, @c 5.0 million speakers
|
||
Estonian @c 1.0 million speakers
|
||
@item Semitic family
|
||
Hebrew @c 5.3 million speakers
|
||
@item Austronesian family
|
||
Bahasa Indonesian @c 23.2 million speakers
|
||
@item Artificial
|
||
Esperanto @c 2 million speakers
|
||
@end table
|
||
|
||
@noindent
|
||
Other languages using the same header entry are:
|
||
|
||
@table @asis
|
||
@item Finno-Ugric family
|
||
Hungarian @c 12.5 million speakers
|
||
@item Turkic/Altaic family
|
||
Turkish @c 50.8 million speakers
|
||
@end table
|
||
|
||
Hungarian does not appear to have a plural if you look at sentences involving
|
||
cardinal numbers. For example, ``1 apple'' is ``1 alma'', and ``123 apples'' is
|
||
``123 alma''. But when the number is not explicit, the distinction between
|
||
singular and plural exists: ``the apple'' is ``az alma'', and ``the apples'' is
|
||
``az alm@'{a}k''. Since @code{ngettext} has to support both types of sentences,
|
||
it is classified here, under ``two forms''.
|
||
|
||
The same holds for Turkish: ``1 apple'' is ``1 elma'', and ``123 apples'' is
|
||
``123 elma''. But when the number is omitted, the distinction between singular
|
||
and plural exists: ``the apple'' is ``elma'', and ``the apples'' is
|
||
``elmalar''.
|
||
|
||
@item Two forms, singular used for zero and one
|
||
Exceptional case in the language family. The header entry would be:
|
||
|
||
@smallexample
|
||
Plural-Forms: nplurals=2; plural=n>1;
|
||
@end smallexample
|
||
|
||
@noindent
|
||
Languages with this property include:
|
||
|
||
@table @asis
|
||
@item Romanic family
|
||
Brazilian Portuguese, @c 163 million speakers
|
||
French @c 67.8 million speakers
|
||
@end table
|
||
|
||
@item Three forms, special case for zero
|
||
The header entry would be:
|
||
|
||
@smallexample
|
||
Plural-Forms: nplurals=3; plural=n%10==1 && n%100!=11 ? 0 : n != 0 ? 1 : 2;
|
||
@end smallexample
|
||
|
||
@noindent
|
||
Languages with this property include:
|
||
|
||
@table @asis
|
||
@item Baltic family
|
||
Latvian @c 1.5 million speakers
|
||
@end table
|
||
|
||
@item Three forms, special cases for one and two
|
||
The header entry would be:
|
||
|
||
@smallexample
|
||
Plural-Forms: nplurals=3; plural=n==1 ? 0 : n==2 ? 1 : 2;
|
||
@end smallexample
|
||
|
||
@noindent
|
||
Languages with this property include:
|
||
|
||
@table @asis
|
||
@item Celtic
|
||
Gaeilge (Irish) @c 0.4 million speakers
|
||
@end table
|
||
|
||
@item Three forms, special case for numbers ending in 00 or [2-9][0-9]
|
||
The header entry would be:
|
||
|
||
@smallexample
|
||
Plural-Forms: nplurals=3; \
|
||
plural=n==1 ? 0 : (n==0 || (n%100 > 0 && n%100 < 20)) ? 1 : 2;
|
||
@end smallexample
|
||
|
||
@noindent
|
||
Languages with this property include:
|
||
|
||
@table @asis
|
||
@item Romanic family
|
||
Romanian @c 23.4 million speakers
|
||
@end table
|
||
|
||
@item Three forms, special case for numbers ending in 1[2-9]
|
||
The header entry would look like this:
|
||
|
||
@smallexample
|
||
Plural-Forms: nplurals=3; \
|
||
plural=n%10==1 && n%100!=11 ? 0 : \
|
||
n%10>=2 && (n%100<10 || n%100>=20) ? 1 : 2;
|
||
@end smallexample
|
||
|
||
@noindent
|
||
Languages with this property include:
|
||
|
||
@table @asis
|
||
@item Baltic family
|
||
Lithuanian @c 3.2 million speakers
|
||
@end table
|
||
|
||
@item Three forms, special cases for numbers ending in 1 and 2, 3, 4, except those ending in 1[1-4]
|
||
The header entry would look like this:
|
||
|
||
@smallexample
|
||
Plural-Forms: nplurals=3; \
|
||
plural=n%10==1 && n%100!=11 ? 0 : \
|
||
n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2;
|
||
@end smallexample
|
||
|
||
@noindent
|
||
Languages with this property include:
|
||
|
||
@table @asis
|
||
@item Slavic family
|
||
Russian, @c 143.6 million speakers
|
||
Ukrainian, @c 37.0 million speakers
|
||
Belarusian, @c 8.6 million speakers
|
||
Serbian, @c 7.0 million speakers
|
||
Croatian @c 5.5 million speakers
|
||
@end table
|
||
|
||
@item Three forms, special cases for 1 and 2, 3, 4
|
||
The header entry would look like this:
|
||
|
||
@smallexample
|
||
Plural-Forms: nplurals=3; \
|
||
plural=(n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2;
|
||
@end smallexample
|
||
|
||
@noindent
|
||
Languages with this property include:
|
||
|
||
@table @asis
|
||
@item Slavic family
|
||
Czech, @c 9.5 million speakers
|
||
Slovak @c 5.0 million speakers
|
||
@end table
|
||
|
||
@item Three forms, special case for one and some numbers ending in 2, 3, or 4
|
||
The header entry would look like this:
|
||
|
||
@smallexample
|
||
Plural-Forms: nplurals=3; \
|
||
plural=n==1 ? 0 : \
|
||
n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2;
|
||
@end smallexample
|
||
|
||
@noindent
|
||
Languages with this property include:
|
||
|
||
@table @asis
|
||
@item Slavic family
|
||
Polish @c 40.0 million speakers
|
||
@end table
|
||
|
||
@item Four forms, special case for one and all numbers ending in 02, 03, or 04
|
||
The header entry would look like this:
|
||
|
||
@smallexample
|
||
Plural-Forms: nplurals=4; \
|
||
plural=n%100==1 ? 0 : n%100==2 ? 1 : n%100==3 || n%100==4 ? 2 : 3;
|
||
@end smallexample
|
||
|
||
@noindent
|
||
Languages with this property include:
|
||
|
||
@table @asis
|
||
@item Slavic family
|
||
Slovenian @c 1.9 million speakers
|
||
@end table
|
||
|
||
@item Six forms, special cases for one, two, all numbers ending in 02, 03, @dots{} 10, all numbers ending in 11 @dots{} 99, and others
|
||
The header entry would look like this:
|
||
|
||
@smallexample
|
||
Plural-Forms: nplurals=6; \
|
||
plural=n==0 ? 0 : n==1 ? 1 : n==2 ? 2 : n%100>=3 && n%100<=10 ? 3 \
|
||
: n%100>=11 ? 4 : 5;
|
||
@end smallexample
|
||
|
||
@noindent
|
||
Languages with this property include:
|
||
|
||
@table @asis
|
||
@item Afroasiatic family
|
||
Arabic @c 246.0 million speakers
|
||
@end table
|
||
@end table
|
||
|
||
You might now ask, @code{ngettext} handles only numbers @var{n} of type
|
||
@samp{unsigned long}. What about larger integer types? What about negative
|
||
numbers? What about floating-point numbers?
|
||
|
||
About larger integer types, such as @samp{uintmax_t} or
|
||
@samp{unsigned long long}: they can be handled by reducing the value to a
|
||
range that fits in an @samp{unsigned long}. Simply casting the value to
|
||
@samp{unsigned long} would not do the right thing, since it would treat
|
||
@code{ULONG_MAX + 1} like zero, @code{ULONG_MAX + 2} like singular, and
|
||
the like. Here you can exploit the fact that all mentioned plural form
|
||
formulas eventually become periodic, with a period that is a divisor of 100
|
||
(or 1000 or 1000000). So, when you reduce a large value to another one in
|
||
the range [1000000, 1999999] that ends in the same 6 decimal digits, you
|
||
can assume that it will lead to the same plural form selection. This code
|
||
does this:
|
||
|
||
@smallexample
|
||
#include <inttypes.h>
|
||
uintmax_t nbytes = ...;
|
||
printf (ngettext ("The file has %"PRIuMAX" byte.",
|
||
"The file has %"PRIuMAX" bytes.",
|
||
(nbytes > ULONG_MAX
|
||
? (nbytes % 1000000) + 1000000
|
||
: nbytes)),
|
||
nbytes);
|
||
@end smallexample
|
||
|
||
Negative and floating-point values usually represent physical entities for
|
||
which singular and plural don't clearly apply. In such cases, there is no
|
||
need to use @code{ngettext}; a simple @code{gettext} call with a form suitable
|
||
for all values will do. For example:
|
||
|
||
@smallexample
|
||
printf (gettext ("Time elapsed: %.3f seconds"),
|
||
num_milliseconds * 0.001);
|
||
@end smallexample
|
||
|
||
@noindent
|
||
Even if @var{num_milliseconds} happens to be a multiple of 1000, the output
|
||
@smallexample
|
||
Time elapsed: 1.000 seconds
|
||
@end smallexample
|
||
@noindent
|
||
is acceptable in English, and similarly for other languages.
|
||
|
||
The translators' perspective regarding plural forms is explained in
|
||
@ref{Translating plural forms}.
|
||
|
||
@node Optimized gettext
|
||
@subsection Optimization of the *gettext functions
|
||
@cindex optimization of @code{gettext} functions
|
||
|
||
At this point of the discussion we should talk about an advantage of the
|
||
GNU @code{gettext} implementation. Some readers might have pointed out
|
||
that an internationalized program might have a poor performance if some
|
||
string has to be translated in an inner loop. While this is unavoidable
|
||
when the string varies from one run of the loop to the other it is
|
||
simply a waste of time when the string is always the same. Take the
|
||
following example:
|
||
|
||
@example
|
||
@group
|
||
@{
|
||
while (@dots{})
|
||
@{
|
||
puts (gettext ("Hello world"));
|
||
@}
|
||
@}
|
||
@end group
|
||
@end example
|
||
|
||
@noindent
|
||
When the locale selection does not change between two runs the resulting
|
||
string is always the same. One way to use this is:
|
||
|
||
@example
|
||
@group
|
||
@{
|
||
str = gettext ("Hello world");
|
||
while (@dots{})
|
||
@{
|
||
puts (str);
|
||
@}
|
||
@}
|
||
@end group
|
||
@end example
|
||
|
||
@noindent
|
||
But this solution is not usable in all situation (e.g.@: when the locale
|
||
selection changes) nor does it lead to legible code.
|
||
|
||
For this reason, GNU @code{gettext} caches previous translation results.
|
||
When the same translation is requested twice, with no new message
|
||
catalogs being loaded in between, @code{gettext} will, the second time,
|
||
find the result through a single cache lookup.
|
||
|
||
@node Comparison
|
||
@section Comparing the Two Interfaces
|
||
@cindex @code{gettext} vs @code{catgets}
|
||
@cindex comparison of interfaces
|
||
|
||
@c FIXME: arguments to catgets vs. gettext
|
||
@c Partly done 950718 -- drepper
|
||
|
||
The following discussion is perhaps a little bit colored. As said
|
||
above we implemented GNU @code{gettext} following the Uniforum
|
||
proposal and this surely has its reasons. But it should show how we
|
||
came to this decision.
|
||
|
||
First we take a look at the developing process. When we write an
|
||
application using NLS provided by @code{gettext} we proceed as always.
|
||
Only when we come to a string which might be seen by the users and thus
|
||
has to be translated we use @code{gettext("@dots{}")} instead of
|
||
@code{"@dots{}"}. At the beginning of each source file (or in a central
|
||
header file) we define
|
||
|
||
@example
|
||
#define gettext(String) (String)
|
||
@end example
|
||
|
||
Even this definition can be avoided when the system supports the
|
||
@code{gettext} function in its C library. When we compile this code the
|
||
result is the same as if no NLS code is used. When you take a look at
|
||
the GNU @code{gettext} code you will see that we use @code{_("@dots{}")}
|
||
instead of @code{gettext("@dots{}")}. This reduces the number of
|
||
additional characters per translatable string to @emph{3} (in words:
|
||
three).
|
||
|
||
When now a production version of the program is needed we simply replace
|
||
the definition
|
||
|
||
@example
|
||
#define _(String) (String)
|
||
@end example
|
||
|
||
@noindent
|
||
by
|
||
|
||
@cindex include file @file{libintl.h}
|
||
@example
|
||
#include <libintl.h>
|
||
#define _(String) gettext (String)
|
||
@end example
|
||
|
||
@noindent
|
||
Additionally we run the program @file{xgettext} on all source code file
|
||
which contain translatable strings and that's it: we have a running
|
||
program which does not depend on translations to be available, but which
|
||
can use any that becomes available.
|
||
|
||
@cindex @code{N_}, a convenience macro
|
||
The same procedure can be done for the @code{gettext_noop} invocations
|
||
(@pxref{Special cases}). One usually defines @code{gettext_noop} as a
|
||
no-op macro. So you should consider the following code for your project:
|
||
|
||
@example
|
||
#define gettext_noop(String) String
|
||
#define N_(String) gettext_noop (String)
|
||
@end example
|
||
|
||
@code{N_} is a short form similar to @code{_}. The @file{Makefile} in
|
||
the @file{po/} directory of GNU @code{gettext} knows by default both of the
|
||
mentioned short forms so you are invited to follow this proposal for
|
||
your own ease.
|
||
|
||
Now to @code{catgets}. The main problem is the work for the
|
||
programmer. Every time he comes to a translatable string he has to
|
||
define a number (or a symbolic constant) which has also be defined in
|
||
the message catalog file. He also has to take care for duplicate
|
||
entries, duplicate message IDs etc. If he wants to have the same
|
||
quality in the message catalog as the GNU @code{gettext} program
|
||
provides he also has to put the descriptive comments for the strings and
|
||
the location in all source code files in the message catalog. This is
|
||
nearly a Mission: Impossible.
|
||
|
||
But there are also some points people might call advantages speaking for
|
||
@code{catgets}. If you have a single word in a string and this string
|
||
is used in different contexts it is likely that in one or the other
|
||
language the word has different translations. Example:
|
||
|
||
@example
|
||
printf ("%s: %d", gettext ("number"), number_of_errors)
|
||
|
||
printf ("you should see %d %s", number_count,
|
||
number_count == 1 ? gettext ("number") : gettext ("numbers"))
|
||
@end example
|
||
|
||
Here we have to translate two times the string @code{"number"}. Even
|
||
if you do not speak a language beside English it might be possible to
|
||
recognize that the two words have a different meaning. In German the
|
||
first appearance has to be translated to @code{"Anzahl"} and the second
|
||
to @code{"Zahl"}.
|
||
|
||
Now you can say that this example is really esoteric. And you are
|
||
right! This is exactly how we felt about this problem and decide that
|
||
it does not weight that much. The solution for the above problem could
|
||
be very easy:
|
||
|
||
@example
|
||
printf ("%s %d", gettext ("number:"), number_of_errors)
|
||
|
||
printf (number_count == 1 ? gettext ("you should see %d number")
|
||
: gettext ("you should see %d numbers"),
|
||
number_count)
|
||
@end example
|
||
|
||
We believe that we can solve all conflicts with this method. If it is
|
||
difficult one can also consider changing one of the conflicting string a
|
||
little bit. But it is not impossible to overcome.
|
||
|
||
@code{catgets} allows same original entry to have different translations,
|
||
but @code{gettext} has another, scalable approach for solving ambiguities
|
||
of this kind: @xref{Ambiguities}.
|
||
|
||
@node Using libintl.a
|
||
@section Using libintl.a in own programs
|
||
|
||
Starting with version 0.9.4 the library @code{libintl.h} should be
|
||
self-contained. I.e., you can use it in your own programs without
|
||
providing additional functions. The @file{Makefile} will put the header
|
||
and the library in directories selected using the @code{$(prefix)}.
|
||
|
||
@node gettext grok
|
||
@section Being a @code{gettext} grok
|
||
|
||
@strong{ NOTE: } This documentation section is outdated and needs to be
|
||
revised.
|
||
|
||
To fully exploit the functionality of the GNU @code{gettext} library it
|
||
is surely helpful to read the source code. But for those who don't want
|
||
to spend that much time in reading the (sometimes complicated) code here
|
||
is a list comments:
|
||
|
||
@itemize @bullet
|
||
@item Changing the language at runtime
|
||
@cindex language selection at runtime
|
||
|
||
For interactive programs it might be useful to offer a selection of the
|
||
used language at runtime. To understand how to do this one need to know
|
||
how the used language is determined while executing the @code{gettext}
|
||
function. The method which is presented here only works correctly
|
||
with the GNU implementation of the @code{gettext} functions.
|
||
|
||
In the function @code{dcgettext} at every call the current setting of
|
||
the highest priority environment variable is determined and used.
|
||
Highest priority means here the following list with decreasing
|
||
priority:
|
||
|
||
@enumerate
|
||
@vindex LANGUAGE@r{, environment variable}
|
||
@item @code{LANGUAGE}
|
||
@vindex LC_ALL@r{, environment variable}
|
||
@item @code{LC_ALL}
|
||
@vindex LC_CTYPE@r{, environment variable}
|
||
@vindex LC_NUMERIC@r{, environment variable}
|
||
@vindex LC_TIME@r{, environment variable}
|
||
@vindex LC_COLLATE@r{, environment variable}
|
||
@vindex LC_MONETARY@r{, environment variable}
|
||
@vindex LC_MESSAGES@r{, environment variable}
|
||
@item @code{LC_xxx}, according to selected locale category
|
||
@vindex LANG@r{, environment variable}
|
||
@item @code{LANG}
|
||
@end enumerate
|
||
|
||
Afterwards the path is constructed using the found value and the
|
||
translation file is loaded if available.
|
||
|
||
What happens now when the value for, say, @code{LANGUAGE} changes? According
|
||
to the process explained above the new value of this variable is found
|
||
as soon as the @code{dcgettext} function is called. But this also means
|
||
the (perhaps) different message catalog file is loaded. In other
|
||
words: the used language is changed.
|
||
|
||
But there is one little hook. The code for gcc-2.7.0 and up provides
|
||
some optimization. This optimization normally prevents the calling of
|
||
the @code{dcgettext} function as long as no new catalog is loaded. But
|
||
if @code{dcgettext} is not called the program also cannot find the
|
||
@code{LANGUAGE} variable be changed (@pxref{Optimized gettext}). A
|
||
solution for this is very easy. Include the following code in the
|
||
language switching function.
|
||
|
||
@example
|
||
/* Change language. */
|
||
setenv ("LANGUAGE", "fr", 1);
|
||
|
||
/* Make change known. */
|
||
@{
|
||
extern int _nl_msg_cat_cntr;
|
||
++_nl_msg_cat_cntr;
|
||
@}
|
||
@end example
|
||
|
||
@cindex @code{_nl_msg_cat_cntr}
|
||
The variable @code{_nl_msg_cat_cntr} is defined in @file{loadmsgcat.c}.
|
||
You don't need to know what this is for. But it can be used to detect
|
||
whether a @code{gettext} implementation is GNU gettext and not non-GNU
|
||
system's native gettext implementation.
|
||
|
||
@end itemize
|
||
|
||
@node Temp Programmers
|
||
@section Temporary Notes for the Programmers Chapter
|
||
|
||
@strong{ NOTE: } This documentation section is outdated and needs to be
|
||
revised.
|
||
|
||
@menu
|
||
* Temp Implementations:: Temporary - Two Possible Implementations
|
||
* Temp catgets:: Temporary - About @code{catgets}
|
||
* Temp WSI:: Temporary - Why a single implementation
|
||
* Temp Notes:: Temporary - Notes
|
||
@end menu
|
||
|
||
@node Temp Implementations
|
||
@subsection Temporary - Two Possible Implementations
|
||
|
||
There are two competing methods for language independent messages:
|
||
the X/Open @code{catgets} method, and the Uniforum @code{gettext}
|
||
method. The @code{catgets} method indexes messages by integers; the
|
||
@code{gettext} method indexes them by their English translations.
|
||
The @code{catgets} method has been around longer and is supported
|
||
by more vendors. The @code{gettext} method is supported by Sun,
|
||
and it has been heard that the COSE multi-vendor initiative is
|
||
supporting it. Neither method is a POSIX standard; the POSIX.1
|
||
committee had a lot of disagreement in this area.
|
||
|
||
Neither one is in the POSIX standard. There was much disagreement
|
||
in the POSIX.1 committee about using the @code{gettext} routines
|
||
vs. @code{catgets} (XPG). In the end the committee couldn't
|
||
agree on anything, so no messaging system was included as part
|
||
of the standard. I believe the informative annex of the standard
|
||
includes the XPG3 messaging interfaces, ``@dots{}as an example of
|
||
a messaging system that has been implemented@dots{}''
|
||
|
||
They were very careful not to say anywhere that you should use one
|
||
set of interfaces over the other. For more on this topic please
|
||
see the Programming for Internationalization FAQ.
|
||
|
||
@node Temp catgets
|
||
@subsection Temporary - About @code{catgets}
|
||
|
||
There have been a few discussions of late on the use of
|
||
@code{catgets} as a base. I think it important to present both
|
||
sides of the argument and hence am opting to play devil's advocate
|
||
for a little bit.
|
||
|
||
I'll not deny the fact that @code{catgets} could have been designed
|
||
a lot better. It currently has quite a number of limitations and
|
||
these have already been pointed out.
|
||
|
||
However there is a great deal to be said for consistency and
|
||
standardization. A common recurring problem when writing Unix
|
||
software is the myriad portability problems across Unix platforms.
|
||
It seems as if every Unix vendor had a look at the operating system
|
||
and found parts they could improve upon. Undoubtedly, these
|
||
modifications are probably innovative and solve real problems.
|
||
However, software developers have a hard time keeping up with all
|
||
these changes across so many platforms.
|
||
|
||
And this has prompted the Unix vendors to begin to standardize their
|
||
systems. Hence the impetus for Spec1170. Every major Unix vendor
|
||
has committed to supporting this standard and every Unix software
|
||
developer waits with glee the day they can write software to this
|
||
standard and simply recompile (without having to use autoconf)
|
||
across different platforms.
|
||
|
||
As I understand it, Spec1170 is roughly based upon version 4 of the
|
||
X/Open Portability Guidelines (XPG4). Because @code{catgets} and
|
||
friends are defined in XPG4, I'm led to believe that @code{catgets}
|
||
is a part of Spec1170 and hence will become a standardized component
|
||
of all Unix systems.
|
||
|
||
@node Temp WSI
|
||
@subsection Temporary - Why a single implementation
|
||
|
||
Now it seems kind of wasteful to me to have two different systems
|
||
installed for accessing message catalogs. If we do want to remedy
|
||
@code{catgets} deficiencies why don't we try to expand @code{catgets}
|
||
(in a compatible manner) rather than implement an entirely new system.
|
||
Otherwise, we'll end up with two message catalog access systems installed
|
||
with an operating system - one set of routines for packages using GNU
|
||
@code{gettext} for their internationalization, and another set of routines
|
||
(catgets) for all other software. Bloated?
|
||
|
||
Supposing another catalog access system is implemented. Which do
|
||
we recommend? At least for Linux, we need to attract as many
|
||
software developers as possible. Hence we need to make it as easy
|
||
for them to port their software as possible. Which means supporting
|
||
@code{catgets}. We will be implementing the @code{libintl} code
|
||
within our @code{libc}, but does this mean we also have to incorporate
|
||
another message catalog access scheme within our @code{libc} as well?
|
||
And what about people who are going to be using the @code{libintl}
|
||
+ non-@code{catgets} routines. When they port their software to
|
||
other platforms, they're now going to have to include the front-end
|
||
(@code{libintl}) code plus the back-end code (the non-@code{catgets}
|
||
access routines) with their software instead of just including the
|
||
@code{libintl} code with their software.
|
||
|
||
Message catalog support is however only the tip of the iceberg.
|
||
What about the data for the other locale categories? They also have
|
||
a number of deficiencies. Are we going to abandon them as well and
|
||
develop another duplicate set of routines (should @code{libintl}
|
||
expand beyond message catalog support)?
|
||
|
||
Like many parts of Unix that can be improved upon, we're stuck with balancing
|
||
compatibility with the past with useful improvements and innovations for
|
||
the future.
|
||
|
||
@node Temp Notes
|
||
@subsection Temporary - Notes
|
||
|
||
X/Open agreed very late on the standard form so that many
|
||
implementations differ from the final form. Both of my system (old
|
||
Linux catgets and Ultrix-4) have a strange variation.
|
||
|
||
OK. After incorporating the last changes I have to spend some time on
|
||
making the GNU/Linux @code{libc} @code{gettext} functions. So in future
|
||
Solaris is not the only system having @code{gettext}.
|
||
|
||
@node Translators
|
||
@chapter The Translator's View
|
||
|
||
@c FIXME: Reorganize whole chapter.
|
||
|
||
@menu
|
||
* Trans Intro 0:: Introduction 0
|
||
* Trans Intro 1:: Introduction 1
|
||
* Discussions:: Discussions
|
||
* Organization:: Organization
|
||
* Information Flow:: Information Flow
|
||
* Translating plural forms:: How to fill in @code{msgstr[0]}, @code{msgstr[1]}
|
||
* Prioritizing messages:: How to find which messages to translate first
|
||
@end menu
|
||
|
||
@node Trans Intro 0
|
||
@section Introduction 0
|
||
|
||
@strong{ NOTE: } This documentation section is outdated and needs to be
|
||
revised.
|
||
|
||
Free software is going international! The Translation Project is a way
|
||
to get maintainers, translators and users all together, so free software
|
||
will gradually become able to speak many native languages.
|
||
|
||
The GNU @code{gettext} tool set contains @emph{everything} maintainers
|
||
need for internationalizing their packages for messages. It also
|
||
contains quite useful tools for helping translators at localizing
|
||
messages to their native language, once a package has already been
|
||
internationalized.
|
||
|
||
To achieve the Translation Project, we need many interested
|
||
people who like their own language and write it well, and who are also
|
||
able to synergize with other translators speaking the same language.
|
||
If you'd like to volunteer to @emph{work} at translating messages,
|
||
please send mail to your translating team.
|
||
|
||
Each team has its own mailing list, courtesy of Linux
|
||
International. You may reach your translating team at the address
|
||
@file{@var{ll}@@li.org}, replacing @var{ll} by the two-letter @w{ISO 639}
|
||
code for your language. Language codes are @emph{not} the same as
|
||
country codes given in @w{ISO 3166}. The following translating teams
|
||
exist:
|
||
|
||
@quotation
|
||
Chinese @code{zh}, Czech @code{cs}, Danish @code{da}, Dutch @code{nl},
|
||
Esperanto @code{eo}, Finnish @code{fi}, French @code{fr}, Irish
|
||
@code{ga}, German @code{de}, Greek @code{el}, Italian @code{it},
|
||
Japanese @code{ja}, Indonesian @code{in}, Norwegian @code{no}, Polish
|
||
@code{pl}, Portuguese @code{pt}, Russian @code{ru}, Spanish @code{es},
|
||
Swedish @code{sv} and Turkish @code{tr}.
|
||
@end quotation
|
||
|
||
@noindent
|
||
For example, you may reach the Chinese translating team by writing to
|
||
@file{zh@@li.org}. When you become a member of the translating team
|
||
for your own language, you may subscribe to its list. For example,
|
||
Swedish people can send a message to @w{@file{sv-request@@li.org}},
|
||
having this message body:
|
||
|
||
@example
|
||
subscribe
|
||
@end example
|
||
|
||
Keep in mind that team members should be interested in @emph{working}
|
||
at translations, or at solving translational difficulties, rather than
|
||
merely lurking around. If your team does not exist yet and you want to
|
||
start one, please write to @w{@file{coordinator@@translationproject.org}};
|
||
you will then reach the coordinator for all translator teams.
|
||
|
||
A handful of GNU packages have already been adapted and provided
|
||
with message translations for several languages. Translation
|
||
teams have begun to organize, using these packages as a starting
|
||
point. But there are many more packages and many languages for
|
||
which we have no volunteer translators. If you would like to
|
||
volunteer to work at translating messages, please send mail to
|
||
@file{coordinator@@translationproject.org} indicating what language(s)
|
||
you can work on.
|
||
|
||
@node Trans Intro 1
|
||
@section Introduction 1
|
||
|
||
@strong{ NOTE: } This documentation section is outdated and needs to be
|
||
revised.
|
||
|
||
This is now official, GNU is going international! Here is the
|
||
announcement submitted for the January 1995 GNU Bulletin:
|
||
|
||
@quotation
|
||
A handful of GNU packages have already been adapted and provided
|
||
with message translations for several languages. Translation
|
||
teams have begun to organize, using these packages as a starting
|
||
point. But there are many more packages and many languages
|
||
for which we have no volunteer translators. If you'd like to
|
||
volunteer to work at translating messages, please send mail to
|
||
@samp{coordinator@@translationproject.org} indicating what language(s)
|
||
you can work on.
|
||
@end quotation
|
||
|
||
This document should answer many questions for those who are curious about
|
||
the process or would like to contribute. Please at least skim over it,
|
||
hoping to cut down a little of the high volume of e-mail generated by this
|
||
collective effort towards internationalization of free software.
|
||
|
||
Most free programming which is widely shared is done in English, and
|
||
currently, English is used as the main communicating language between
|
||
national communities collaborating to free software. This very document
|
||
is written in English. This will not change in the foreseeable future.
|
||
|
||
However, there is a strong appetite from national communities for
|
||
having more software able to write using national language and habits,
|
||
and there is an on-going effort to modify free software in such a way
|
||
that it becomes able to do so. The experiments driven so far raised
|
||
an enthusiastic response from pretesters, so we believe that
|
||
internationalization of free software is dedicated to succeed.
|
||
|
||
For suggestion clarifications, additions or corrections to this
|
||
document, please e-mail to @file{coordinator@@translationproject.org}.
|
||
|
||
@node Discussions
|
||
@section Discussions
|
||
|
||
@strong{ NOTE: } This documentation section is outdated and needs to be
|
||
revised.
|
||
|
||
Facing this internationalization effort, a few users expressed their
|
||
concerns. Some of these doubts are presented and discussed, here.
|
||
|
||
@itemize @bullet
|
||
@item Smaller groups
|
||
|
||
Some languages are not spoken by a very large number of people, so people
|
||
speaking them sometimes consider that there may not be all that much
|
||
demand such versions of free software packages. Moreover, many people
|
||
being @emph{into computers}, in some countries, generally seem to prefer
|
||
English versions of their software.
|
||
|
||
On the other end, people might enjoy their own language a lot, and be
|
||
very motivated at providing to themselves the pleasure of having their
|
||
beloved free software speaking their mother tongue. They do themselves
|
||
a personal favor, and do not pay that much attention to the number of
|
||
people benefiting of their work.
|
||
|
||
@item Misinterpretation
|
||
|
||
Other users are shy to push forward their own language, seeing in this
|
||
some kind of misplaced propaganda. Someone thought there must be some
|
||
users of the language over the networks pestering other people with it.
|
||
|
||
But any spoken language is worth localization, because there are
|
||
people behind the language for whom the language is important and
|
||
dear to their hearts.
|
||
|
||
@item Odd translations
|
||
|
||
The biggest problem is to find the right translations so that
|
||
everybody can understand the messages. Translations are usually a
|
||
little odd. Some people get used to English, to the extent they may
|
||
find translations into their own language ``rather pushy, obnoxious
|
||
and sometimes even hilarious.'' As a French speaking man, I have
|
||
the experience of those instruction manuals for goods, so poorly
|
||
translated in French in Korea or Taiwan@dots{}
|
||
|
||
The fact is that we sometimes have to create a kind of national
|
||
computer culture, and this is not easy without the collaboration of
|
||
many people liking their mother tongue. This is why translations are
|
||
better achieved by people knowing and loving their own language, and
|
||
ready to work together at improving the results they obtain.
|
||
|
||
@item Dependencies over the GPL or LGPL
|
||
|
||
Some people wonder if using GNU @code{gettext} necessarily brings their
|
||
package under the protective wing of the GNU General Public License or
|
||
the GNU Lesser General Public License, when they do not want to make
|
||
their program free, or want other kinds of freedom. The simplest
|
||
answer is ``normally not''.
|
||
|
||
The @code{gettext-runtime} part of GNU @code{gettext}, i.e.@: the
|
||
contents of @code{libintl}, is covered by the GNU Lesser General Public
|
||
License. The @code{gettext-tools} part of GNU @code{gettext}, i.e.@: the
|
||
rest of the GNU @code{gettext} package, is covered by the GNU General
|
||
Public License.
|
||
|
||
The mere marking of localizable strings in a package, or conditional
|
||
inclusion of a few lines for initialization, is not really including
|
||
GPL'ed or LGPL'ed code. However, since the localization routines in
|
||
@code{libintl} are under the LGPL, the LGPL needs to be considered.
|
||
It gives the right to distribute the complete unmodified source of
|
||
@code{libintl} even with non-free programs. It also gives the right
|
||
to use @code{libintl} as a shared library, even for non-free programs.
|
||
But it gives the right to use @code{libintl} as a static library or
|
||
to incorporate @code{libintl} into another library only to free
|
||
software.
|
||
|
||
@end itemize
|
||
|
||
@node Organization
|
||
@section Organization
|
||
|
||
@strong{ NOTE: } This documentation section is outdated and needs to be
|
||
revised.
|
||
|
||
On a larger scale, the true solution would be to organize some kind of
|
||
fairly precise set up in which volunteers could participate. I gave
|
||
some thought to this idea lately, and realize there will be some
|
||
touchy points. I thought of writing to Richard Stallman to launch
|
||
such a project, but feel it might be good to shake out the ideas
|
||
between ourselves first. Most probably that Linux International has
|
||
some experience in the field already, or would like to orchestrate
|
||
the volunteer work, maybe. Food for thought, in any case!
|
||
|
||
I guess we have to setup something early, somehow, that will help
|
||
many possible contributors of the same language to interlock and avoid
|
||
work duplication, and further be put in contact for solving together
|
||
problems particular to their tongue (in most languages, there are many
|
||
difficulties peculiar to translating technical English). My Swedish
|
||
contributor acknowledged these difficulties, and I'm well aware of
|
||
them for French.
|
||
|
||
This is surely not a technical issue, but we should manage so the
|
||
effort of locale contributors be maximally useful, despite the national
|
||
team layer interface between contributors and maintainers.
|
||
|
||
The Translation Project needs some setup for coordinating language
|
||
coordinators. Localizing evolving programs will surely
|
||
become a permanent and continuous activity in the free software community,
|
||
once well started.
|
||
The setup should be minimally completed and tested before GNU
|
||
@code{gettext} becomes an official reality. The e-mail address
|
||
@file{coordinator@@translationproject.org} has been set up for receiving
|
||
offers from volunteers and general e-mail on these topics. This address
|
||
reaches the Translation Project coordinator.
|
||
|
||
@menu
|
||
* Central Coordination:: Central Coordination
|
||
* National Teams:: National Teams
|
||
* Mailing Lists:: Mailing Lists
|
||
@end menu
|
||
|
||
@node Central Coordination
|
||
@subsection Central Coordination
|
||
|
||
I also think GNU will need sooner than it thinks, that someone set up
|
||
a way to organize and coordinate these groups. Some kind of group
|
||
of groups. My opinion is that it would be good that GNU delegates
|
||
this task to a small group of collaborating volunteers, shortly.
|
||
Perhaps in @file{gnu.announce} a list of this national committee's
|
||
can be published.
|
||
|
||
My role as coordinator would simply be to refer to Ulrich any German
|
||
speaking volunteer interested to localization of free software packages, and
|
||
maybe helping national groups to initially organize, while maintaining
|
||
national registries for until national groups are ready to take over.
|
||
In fact, the coordinator should ease volunteers to get in contact with
|
||
one another for creating national teams, which should then select
|
||
one coordinator per language, or country (regionalized language).
|
||
If well done, the coordination should be useful without being an
|
||
overwhelming task, the time to put delegations in place.
|
||
|
||
@node National Teams
|
||
@subsection National Teams
|
||
|
||
I suggest we look for volunteer coordinators/editors for individual
|
||
languages. These people will scan contributions of translation files
|
||
for various programs, for their own languages, and will ensure high
|
||
and uniform standards of diction.
|
||
|
||
From my current experience with other people in these days, those who
|
||
provide localizations are very enthusiastic about the process, and are
|
||
more interested in the localization process than in the program they
|
||
localize, and want to do many programs, not just one. This seems
|
||
to confirm that having a coordinator/editor for each language is a
|
||
good idea.
|
||
|
||
We need to choose someone who is good at writing clear and concise
|
||
prose in the language in question. That is hard---we can't check
|
||
it ourselves. So we need to ask a few people to judge each others'
|
||
writing and select the one who is best.
|
||
|
||
I announce my prerelease to a few dozen people, and you would not
|
||
believe all the discussions it generated already. I shudder to think
|
||
what will happen when this will be launched, for true, officially,
|
||
world wide. Who am I to arbitrate between two Czekolsovak users
|
||
contradicting each other, for example?
|
||
|
||
I assume that your German is not much better than my French so that
|
||
I would not be able to judge about these formulations. What I would
|
||
suggest is that for each language there is a group for people who
|
||
maintain the PO files and judge about changes. I suspect there will
|
||
be cultural differences between how such groups of people will behave.
|
||
Some will have relaxed ways, reach consensus easily, and have anyone
|
||
of the group relate to the maintainers, while others will fight to
|
||
death, organize heavy administrations up to national standards, and
|
||
use strict channels.
|
||
|
||
The German team is putting out a good example. Right now, they are
|
||
maybe half a dozen people revising translations of each other and
|
||
discussing the linguistic issues. I do not even have all the names.
|
||
Ulrich Drepper is taking care of coordinating the German team.
|
||
He subscribed to all my pretest lists, so I do not even have to warn
|
||
him specifically of incoming releases.
|
||
|
||
I'm sure, that is a good idea to get teams for each language working
|
||
on translations. That will make the translations better and more
|
||
consistent.
|
||
|
||
@menu
|
||
* Sub-Cultures:: Sub-Cultures
|
||
* Organizational Ideas:: Organizational Ideas
|
||
@end menu
|
||
|
||
@node Sub-Cultures
|
||
@subsubsection Sub-Cultures
|
||
|
||
Taking French for example, there are a few sub-cultures around computers
|
||
which developed diverging vocabularies. Picking volunteers here and
|
||
there without addressing this problem in an organized way, soon in the
|
||
project, might produce a distasteful mix of internationalized programs,
|
||
and possibly trigger endless quarrels among those who really care.
|
||
|
||
Keeping some kind of unity in the way French localization of
|
||
internationalized programs is achieved is a difficult (and delicate) job.
|
||
Knowing the latin character of French people (:-), if we take this
|
||
the wrong way, we could end up nowhere, or spoil a lot of energies.
|
||
Maybe we should begin to address this problem seriously @emph{before}
|
||
GNU @code{gettext} become officially published. And I suspect that this
|
||
means soon!
|
||
|
||
@node Organizational Ideas
|
||
@subsubsection Organizational Ideas
|
||
|
||
I expect the next big changes after the official release. Please note
|
||
that I use the German translation of the short GPL message. We need
|
||
to set a few good examples before the localization goes out for true
|
||
in the free software community. Here are a few points to discuss:
|
||
|
||
@itemize @bullet
|
||
@item
|
||
Each group should have one FTP server (at least one master).
|
||
|
||
@item
|
||
The files on the server should reflect the latest version (of
|
||
course!) and it should also contain a RCS directory with the
|
||
corresponding archives (I don't have this now).
|
||
|
||
@item
|
||
There should also be a ChangeLog file (this is more useful than the
|
||
RCS archive but can be generated automatically from the later by
|
||
Emacs).
|
||
|
||
@item
|
||
A @dfn{core group} should judge about questionable changes (for now
|
||
this group consists solely by me but I ask some others occasionally;
|
||
this also seems to work).
|
||
|
||
@end itemize
|
||
|
||
@node Mailing Lists
|
||
@subsection Mailing Lists
|
||
|
||
If we get any inquiries about GNU @code{gettext}, send them on to:
|
||
|
||
@example
|
||
@file{coordinator@@translationproject.org}
|
||
@end example
|
||
|
||
The @file{*-pretest} lists are quite useful to me, maybe the idea could
|
||
be generalized to many GNU, and non-GNU packages. But each maintainer
|
||
his/her way!
|
||
|
||
Fran@,{c}ois, we have a mechanism in place here at
|
||
@file{gnu.ai.mit.edu} to track teams, support mailing lists for
|
||
them and log members. We have a slight preference that you use it.
|
||
If this is OK with you, I can get you clued in.
|
||
|
||
Things are changing! A few years ago, when Daniel Fekete and I
|
||
asked for a mailing list for GNU localization, nested at the FSF, we
|
||
were politely invited to organize it anywhere else, and so did we.
|
||
For communicating with my pretesters, I later made a handful of
|
||
mailing lists located at iro.umontreal.ca and administrated by
|
||
@code{majordomo}. These lists have been @emph{very} dependable
|
||
so far@dots{}
|
||
|
||
I suspect that the German team will organize itself a mailing list
|
||
located in Germany, and so forth for other countries. But before they
|
||
organize for true, it could surely be useful to offer mailing lists
|
||
located at the FSF to each national team. So yes, please explain me
|
||
how I should proceed to create and handle them.
|
||
|
||
We should create temporary mailing lists, one per country, to help
|
||
people organize. Temporary, because once regrouped and structured, it
|
||
would be fair the volunteers from country bring back @emph{their} list
|
||
in there and manage it as they want. My feeling is that, in the long
|
||
run, each team should run its own list, from within their country.
|
||
There also should be some central list to which all teams could
|
||
subscribe as they see fit, as long as each team is represented in it.
|
||
|
||
@node Information Flow
|
||
@section Information Flow
|
||
|
||
@strong{ NOTE: } This documentation section is outdated and needs to be
|
||
revised.
|
||
|
||
There will surely be some discussion about this messages after the
|
||
packages are finally released. If people now send you some proposals
|
||
for better messages, how do you proceed? Jim, please note that
|
||
right now, as I put forward nearly a dozen of localizable programs, I
|
||
receive both the translations and the coordination concerns about them.
|
||
|
||
If I put one of my things to pretest, Ulrich receives the announcement
|
||
and passes it on to the German team, who make last minute revisions.
|
||
Then he submits the translation files to me @emph{as the maintainer}.
|
||
For free packages I do not maintain, I would not even hear about it.
|
||
This scheme could be made to work for the whole Translation Project,
|
||
I think. For security reasons, maybe Ulrich (national coordinators,
|
||
in fact) should update central registry kept at the Translation Project
|
||
(Jim, me, or Len's recruits) once in a while.
|
||
|
||
In December/January, I was aggressively ready to internationalize
|
||
all of GNU, giving myself the duty of one small GNU package per week
|
||
or so, taking many weeks or months for bigger packages. But it does
|
||
not work this way. I first did all the things I'm responsible for.
|
||
I've nothing against some missionary work on other maintainers, but
|
||
I'm also losing a lot of energy over it---same debates over again.
|
||
|
||
And when the first localized packages are released we'll get a lot of
|
||
responses about ugly translations :-). Surely, and we need to have
|
||
beforehand a fairly good idea about how to handle the information
|
||
flow between the national teams and the package maintainers.
|
||
|
||
Please start saving somewhere a quick history of each PO file. I know
|
||
for sure that the file format will change, allowing for comments.
|
||
It would be nice that each file has a kind of log, and references for
|
||
those who want to submit comments or gripes, or otherwise contribute.
|
||
I sent a proposal for a fast and flexible format, but it is not
|
||
receiving acceptance yet by the GNU deciders. I'll tell you when I
|
||
have more information about this.
|
||
|
||
@node Translating plural forms
|
||
@section Translating plural forms
|
||
|
||
@cindex plural forms, translating
|
||
Suppose you are translating a PO file, and it contains an entry like this:
|
||
|
||
@smallexample
|
||
#, c-format
|
||
msgid "One file removed"
|
||
msgid_plural "%d files removed"
|
||
msgstr[0] ""
|
||
msgstr[1] ""
|
||
@end smallexample
|
||
|
||
@noindent
|
||
What does this mean? How do you fill it in?
|
||
|
||
Such an entry denotes a message with plural forms, that is, a message where
|
||
the text depends on a cardinal number. The general form of the message,
|
||
in English, is the @code{msgid_plural} line. The @code{msgid} line is the
|
||
English singular form, that is, the form for when the number is equal to 1.
|
||
More details about plural forms are explained in @ref{Plural forms}.
|
||
|
||
The first thing you need to look at is the @code{Plural-Forms} line in the
|
||
header entry of the PO file. It contains the number of plural forms and a
|
||
formula. If the PO file does not yet have such a line, you have to add it.
|
||
It only depends on the language into which you are translating. You can
|
||
get this info by using the @code{msginit} command (see @ref{Creating}) --
|
||
it contains a database of known plural formulas -- or by asking other
|
||
members of your translation team.
|
||
|
||
Suppose the line looks as follows:
|
||
|
||
@smallexample
|
||
"Plural-Forms: nplurals=3; plural=n%10==1 && n%100!=11 ? 0 : n%10>=2 && n"
|
||
"%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2;\n"
|
||
@end smallexample
|
||
|
||
It's logically one line; recall that the PO file formatting is allowed to
|
||
break long lines so that each physical line fits in 80 monospaced columns.
|
||
|
||
The value of @code{nplurals} here tells you that there are three plural
|
||
forms. The first thing you need to do is to ensure that the entry contains
|
||
an @code{msgstr} line for each of the forms:
|
||
|
||
@smallexample
|
||
#, c-format
|
||
msgid "One file removed"
|
||
msgid_plural "%d files removed"
|
||
msgstr[0] ""
|
||
msgstr[1] ""
|
||
msgstr[2] ""
|
||
@end smallexample
|
||
|
||
Then translate the @code{msgid_plural} line and fill it in into each
|
||
@code{msgstr} line:
|
||
|
||
@smallexample
|
||
#, c-format
|
||
msgid "One file removed"
|
||
msgid_plural "%d files removed"
|
||
msgstr[0] "%d slika uklonjenih"
|
||
msgstr[1] "%d slika uklonjenih"
|
||
msgstr[2] "%d slika uklonjenih"
|
||
@end smallexample
|
||
|
||
Now you can refine the translation so that it matches the plural form.
|
||
According to the formula above, @code{msgstr[0]} is used when the number
|
||
ends in 1 but does not end in 11; @code{msgstr[1]} is used when the number
|
||
ends in 2, 3, 4, but not in 12, 13, 14; and @code{msgstr[2]} is used in
|
||
all other cases. With this knowledge, you can refine the translations:
|
||
|
||
@smallexample
|
||
#, c-format
|
||
msgid "One file removed"
|
||
msgid_plural "%d files removed"
|
||
msgstr[0] "%d slika je uklonjena"
|
||
msgstr[1] "%d datoteke uklonjenih"
|
||
msgstr[2] "%d slika uklonjenih"
|
||
@end smallexample
|
||
|
||
You noticed that in the English singular form (@code{msgid}) the number
|
||
placeholder could be omitted and replaced by the numeral word ``one''.
|
||
Can you do this in your translation as well?
|
||
|
||
@smallexample
|
||
msgstr[0] "jednom datotekom je uklonjen"
|
||
@end smallexample
|
||
|
||
@noindent
|
||
Well, it depends on whether @code{msgstr[0]} applies only to the number 1,
|
||
or to other numbers as well. If, according to the plural formula,
|
||
@code{msgstr[0]} applies only to @code{n == 1}, then you can use the
|
||
specialized translation without the number placeholder. In our case,
|
||
however, @code{msgstr[0]} also applies to the numbers 21, 31, 41, etc.,
|
||
and therefore you cannot omit the placeholder.
|
||
|
||
@node Prioritizing messages
|
||
@section Prioritizing messages: How to determine which messages to translate first
|
||
|
||
A translator sometimes has only a limited amount of time per week to
|
||
spend on a package, and some packages have quite large message catalogs
|
||
(over 1000 messages). Therefore she wishes to translate the messages
|
||
first that are the most visible to the user, or that occur most frequently.
|
||
This section describes how to determine these "most urgent" messages.
|
||
It also applies to determine the "next most urgent" messages after the
|
||
message catalog has already been partially translated.
|
||
|
||
In a first step, she uses the programs like a user would do. While she
|
||
does this, the GNU @code{gettext} library logs into a file the not yet
|
||
translated messages for which a translation was requested from the program.
|
||
|
||
In a second step, she uses the PO mode to translate precisely this set
|
||
of messages.
|
||
|
||
@vindex GETTEXT_LOG_UNTRANSLATED@r{, environment variable}
|
||
Here a more details. The GNU @code{libintl} library (but not the
|
||
corresponding functions in GNU @code{libc}) supports an environment variable
|
||
@code{GETTEXT_LOG_UNTRANSLATED}. The GNU @code{libintl} library will
|
||
log into this file the messages for which @code{gettext()} and related
|
||
functions couldn't find the translation. If the file doesn't exist, it
|
||
will be created as needed. On systems with GNU @code{libc} a shared library
|
||
@samp{preloadable_libintl.so} is provided that can be used with the ELF
|
||
@samp{LD_PRELOAD} mechanism.
|
||
|
||
So, in the first step, the translator uses these commands on systems with
|
||
GNU @code{libc}:
|
||
|
||
@smallexample
|
||
$ LD_PRELOAD=/usr/local/lib/preloadable_libintl.so
|
||
$ export LD_PRELOAD
|
||
$ GETTEXT_LOG_UNTRANSLATED=$HOME/gettextlogused
|
||
$ export GETTEXT_LOG_UNTRANSLATED
|
||
@end smallexample
|
||
|
||
@noindent
|
||
and these commands on other systems:
|
||
|
||
@smallexample
|
||
$ GETTEXT_LOG_UNTRANSLATED=$HOME/gettextlogused
|
||
$ export GETTEXT_LOG_UNTRANSLATED
|
||
@end smallexample
|
||
|
||
Then she uses and peruses the programs. (It is a good and recommended
|
||
practice to use the programs for which you provide translations: it
|
||
gives you the needed context.) When done, she removes the environment
|
||
variables:
|
||
|
||
@smallexample
|
||
$ unset LD_PRELOAD
|
||
$ unset GETTEXT_LOG_UNTRANSLATED
|
||
@end smallexample
|
||
|
||
The second step starts with removing duplicates:
|
||
|
||
@smallexample
|
||
$ msguniq $HOME/gettextlogused > missing.po
|
||
@end smallexample
|
||
|
||
The result is a PO file, but needs some preprocessing before a PO file editor
|
||
can be used with it. First, it is a multi-domain PO file, containing
|
||
messages from many translation domains. Second, it lacks all translator
|
||
comments and source references. Here is how to get a list of the affected
|
||
translation domains:
|
||
|
||
@smallexample
|
||
$ sed -n -e 's,^domain "\(.*\)"$,\1,p' < missing.po | sort | uniq
|
||
@end smallexample
|
||
|
||
Then the translator can handle the domains one by one. For simplicity,
|
||
let's use environment variables to denote the language, domain and source
|
||
package.
|
||
|
||
@smallexample
|
||
$ lang=nl # your language
|
||
$ domain=coreutils # the name of the domain to be handled
|
||
$ package=/usr/src/gnu/coreutils-4.5.4 # the package where it comes from
|
||
@end smallexample
|
||
|
||
She takes the latest copy of @file{$lang.po} from the Translation Project,
|
||
or from the package (in most cases, @file{$package/po/$lang.po}), or
|
||
creates a fresh one if she's the first translator (see @ref{Creating}).
|
||
She then uses the following commands to mark the not urgent messages as
|
||
"obsolete". (This doesn't mean that these messages - translated and
|
||
untranslated ones - will go away. It simply means that the PO file editor
|
||
will ignore them in the following editing session.)
|
||
|
||
@smallexample
|
||
$ msggrep --domain=$domain missing.po | grep -v '^domain' \
|
||
> $domain-missing.po
|
||
$ msgattrib --set-obsolete --ignore-file $domain-missing.po $domain.$lang.po \
|
||
> $domain.$lang-urgent.po
|
||
@end smallexample
|
||
|
||
The she translates @file{$domain.$lang-urgent.po} by use of a PO file editor
|
||
(@pxref{Editing}).
|
||
(FIXME: I don't know whether @code{KBabel} and @code{gtranslator} also
|
||
preserve obsolete messages, as they should.)
|
||
Finally she restores the not urgent messages (with their earlier
|
||
translations, for those which were already translated) through this command:
|
||
|
||
@smallexample
|
||
$ msgmerge --no-fuzzy-matching $domain.$lang-urgent.po $package/po/$domain.pot \
|
||
> $domain.$lang.po
|
||
@end smallexample
|
||
|
||
Then she can submit @file{$domain.$lang.po} and proceed to the next domain.
|
||
|
||
@node Maintainers
|
||
@chapter The Maintainer's View
|
||
@cindex package maintainer's view of @code{gettext}
|
||
|
||
The maintainer of a package has many responsibilities. One of them
|
||
is ensuring that the package will install easily on many platforms,
|
||
and that the magic we described earlier (@pxref{Users}) will work
|
||
for installers and end users.
|
||
|
||
Of course, there are many possible ways by which GNU @code{gettext}
|
||
might be integrated in a distribution, and this chapter does not cover
|
||
them in all generality. Instead, it details one possible approach which
|
||
is especially adequate for many free software distributions following GNU
|
||
standards, or even better, Gnits standards, because GNU @code{gettext}
|
||
is purposely for helping the internationalization of the whole GNU
|
||
project, and as many other good free packages as possible. So, the
|
||
maintainer's view presented here presumes that the package already has
|
||
a @file{configure.ac} file and uses GNU Autoconf.
|
||
|
||
Nevertheless, GNU @code{gettext} may surely be useful for free packages
|
||
not following GNU standards and conventions, but the maintainers of such
|
||
packages might have to show imagination and initiative in organizing
|
||
their distributions so @code{gettext} work for them in all situations.
|
||
There are surely many, out there.
|
||
|
||
Even if @code{gettext} methods are now stabilizing, slight adjustments
|
||
might be needed between successive @code{gettext} versions, so you
|
||
should ideally revise this chapter in subsequent releases, looking
|
||
for changes.
|
||
|
||
@menu
|
||
* Flat and Non-Flat:: Flat or Non-Flat Directory Structures
|
||
* Prerequisites:: Prerequisite Works
|
||
* gettextize Invocation:: Invoking the @code{gettextize} Program
|
||
* Adjusting Files:: Files You Must Create or Alter
|
||
* autoconf macros:: Autoconf macros for use in @file{configure.ac}
|
||
* Version Control Issues::
|
||
* Release Management:: Creating a Distribution Tarball
|
||
@end menu
|
||
|
||
@node Flat and Non-Flat
|
||
@section Flat or Non-Flat Directory Structures
|
||
|
||
Some free software packages are distributed as @code{tar} files which unpack
|
||
in a single directory, these are said to be @dfn{flat} distributions.
|
||
Other free software packages have a one level hierarchy of subdirectories, using
|
||
for example a subdirectory named @file{doc/} for the Texinfo manual and
|
||
man pages, another called @file{lib/} for holding functions meant to
|
||
replace or complement C libraries, and a subdirectory @file{src/} for
|
||
holding the proper sources for the package. These other distributions
|
||
are said to be @dfn{non-flat}.
|
||
|
||
We cannot say much about flat distributions. A flat
|
||
directory structure has the disadvantage of increasing the difficulty
|
||
of updating to a new version of GNU @code{gettext}. Also, if you have
|
||
many PO files, this could somewhat pollute your single directory.
|
||
Also, GNU @code{gettext}'s libintl sources consist of C sources, shell
|
||
scripts, @code{sed} scripts and complicated Makefile rules, which don't
|
||
fit well into an existing flat structure. For these reasons, we
|
||
recommend to use non-flat approach in this case as well.
|
||
|
||
Maybe because GNU @code{gettext} itself has a non-flat structure,
|
||
we have more experience with this approach, and this is what will be
|
||
described in the remaining of this chapter. Some maintainers might
|
||
use this as an opportunity to unflatten their package structure.
|
||
|
||
@node Prerequisites
|
||
@section Prerequisite Works
|
||
@cindex converting a package to use @code{gettext}
|
||
@cindex migration from earlier versions of @code{gettext}
|
||
@cindex upgrading to new versions of @code{gettext}
|
||
|
||
There are some works which are required for using GNU @code{gettext}
|
||
in one of your package. These works have some kind of generality
|
||
that escape the point by point descriptions used in the remainder
|
||
of this chapter. So, we describe them here.
|
||
|
||
@itemize @bullet
|
||
@item
|
||
Before attempting to use @code{gettextize} you should install some
|
||
other packages first.
|
||
Ensure that recent versions of GNU @code{m4}, GNU Autoconf and GNU
|
||
@code{gettext} are already installed at your site, and if not, proceed
|
||
to do this first. If you get to install these things, beware that
|
||
GNU @code{m4} must be fully installed before GNU Autoconf is even
|
||
@emph{configured}.
|
||
|
||
To further ease the task of a package maintainer the @code{automake}
|
||
package was designed and implemented. GNU @code{gettext} now uses this
|
||
tool and the @file{Makefile} in the @file{po/} directory therefore
|
||
knows about all the goals necessary for using @code{automake}.
|
||
|
||
Those four packages are only needed by you, as a maintainer; the
|
||
installers of your own package and end users do not really need any of
|
||
GNU @code{m4}, GNU Autoconf, GNU @code{gettext}, or GNU @code{automake}
|
||
for successfully installing and running your package, with messages
|
||
properly translated. But this is not completely true if you provide
|
||
internationalized shell scripts within your own package: GNU
|
||
@code{gettext} shall then be installed at the user site if the end users
|
||
want to see the translation of shell script messages.
|
||
|
||
@item
|
||
Your package should use Autoconf and have a @file{configure.ac} or
|
||
@file{configure.in} file.
|
||
If it does not, you have to learn how. The Autoconf documentation
|
||
is quite well written, it is a good idea that you print it and get
|
||
familiar with it.
|
||
|
||
@item
|
||
Your C sources should have already been modified according to
|
||
instructions given earlier in this manual. @xref{Sources}.
|
||
|
||
@item
|
||
Your @file{po/} directory should receive all PO files submitted to you
|
||
by the translator teams, each having @file{@var{ll}.po} as a name.
|
||
This is not usually easy to get translation
|
||
work done before your package gets internationalized and available!
|
||
Since the cycle has to start somewhere, the easiest for the maintainer
|
||
is to start with absolutely no PO files, and wait until various
|
||
translator teams get interested in your package, and submit PO files.
|
||
|
||
@end itemize
|
||
|
||
It is worth adding here a few words about how the maintainer should
|
||
ideally behave with PO files submissions. As a maintainer, your role is
|
||
to authenticate the origin of the submission as being the representative
|
||
of the appropriate translating teams of the Translation Project (forward
|
||
the submission to @file{coordinator@@translationproject.org} in case of doubt),
|
||
to ensure that the PO file format is not severely broken and does not
|
||
prevent successful installation, and for the rest, to merely put these
|
||
PO files in @file{po/} for distribution.
|
||
|
||
As a maintainer, you do not have to take on your shoulders the
|
||
responsibility of checking if the translations are adequate or
|
||
complete, and should avoid diving into linguistic matters. Translation
|
||
teams drive themselves and are fully responsible of their linguistic
|
||
choices for the Translation Project. Keep in mind that translator teams are @emph{not}
|
||
driven by maintainers. You can help by carefully redirecting all
|
||
communications and reports from users about linguistic matters to the
|
||
appropriate translation team, or explain users how to reach or join
|
||
their team.
|
||
|
||
Maintainers should @emph{never ever} apply PO file bug reports
|
||
themselves, short-cutting translation teams. If some translator has
|
||
difficulty to get some of her points through her team, it should not be
|
||
an option for her to directly negotiate translations with maintainers.
|
||
Teams ought to settle their problems themselves, if any. If you, as
|
||
a maintainer, ever think there is a real problem with a team, please
|
||
never try to @emph{solve} a team's problem on your own.
|
||
|
||
@node gettextize Invocation
|
||
@section Invoking the @code{gettextize} Program
|
||
|
||
@include gettextize.texi
|
||
|
||
@node Adjusting Files
|
||
@section Files You Must Create or Alter
|
||
@cindex @code{gettext} files
|
||
|
||
Besides files which are automatically added through @code{gettextize},
|
||
there are many files needing revision for properly interacting with
|
||
GNU @code{gettext}. If you are closely following GNU standards for
|
||
Makefile engineering and auto-configuration, the adaptations should
|
||
be easier to achieve. Here is a point by point description of the
|
||
changes needed in each.
|
||
|
||
So, here comes a list of files, each one followed by a description of
|
||
all alterations it needs. Many examples are taken out from the GNU
|
||
@code{gettext} @value{VERSION} distribution itself, or from the GNU
|
||
@code{hello} distribution (@uref{https://www.gnu.org/software/hello}).
|
||
You may indeed refer to the source code of the GNU @code{gettext} and
|
||
GNU @code{hello} packages, as they are intended to be good examples for
|
||
using GNU gettext functionality.
|
||
|
||
@menu
|
||
* po/POTFILES.in:: @file{POTFILES.in} in @file{po/}
|
||
* po/LINGUAS:: @file{LINGUAS} in @file{po/}
|
||
* po/Makevars:: @file{Makevars} in @file{po/}
|
||
* po/Rules-*:: Extending @file{Makefile} in @file{po/}
|
||
* configure.ac:: @file{configure.ac} at top level
|
||
* config.guess:: @file{config.guess}, @file{config.sub} at top level
|
||
* mkinstalldirs:: @file{mkinstalldirs} at top level
|
||
* aclocal:: @file{aclocal.m4} at top level
|
||
* config.h.in:: @file{config.h.in} at top level
|
||
* Makefile:: @file{Makefile.in} at top level
|
||
* src/Makefile:: @file{Makefile.in} in @file{src/}
|
||
* lib/gettext.h:: @file{gettext.h} in @file{lib/}
|
||
@end menu
|
||
|
||
@node po/POTFILES.in
|
||
@subsection @file{POTFILES.in} in @file{po/}
|
||
@cindex @file{POTFILES.in} file
|
||
|
||
The @file{po/} directory should receive a file named
|
||
@file{POTFILES.in}. This file tells which files, among all program
|
||
sources, have marked strings needing translation. Here is an example
|
||
of such a file:
|
||
|
||
@example
|
||
@group
|
||
# List of source files containing translatable strings.
|
||
# Copyright (C) 1995 Free Software Foundation, Inc.
|
||
|
||
# Common library files
|
||
lib/error.c
|
||
lib/getopt.c
|
||
lib/xmalloc.c
|
||
|
||
# Package source files
|
||
src/gettext.c
|
||
src/msgfmt.c
|
||
src/xgettext.c
|
||
@end group
|
||
@end example
|
||
|
||
@noindent
|
||
Hash-marked comments and white lines are ignored. All other lines
|
||
list those source files containing strings marked for translation
|
||
(@pxref{Mark Keywords}), in a notation relative to the top level
|
||
of your whole distribution, rather than the location of the
|
||
@file{POTFILES.in} file itself.
|
||
|
||
When a C file is automatically generated by a tool, like @code{flex} or
|
||
@code{bison}, that doesn't introduce translatable strings by itself,
|
||
it is recommended to list in @file{po/POTFILES.in} the real source file
|
||
(ending in @file{.l} in the case of @code{flex}, or in @file{.y} in the
|
||
case of @code{bison}), not the generated C file.
|
||
|
||
@node po/LINGUAS
|
||
@subsection @file{LINGUAS} in @file{po/}
|
||
@cindex @file{LINGUAS} file
|
||
|
||
The @file{po/} directory should also receive a file named
|
||
@file{LINGUAS}. This file contains the list of available translations.
|
||
It is a whitespace separated list. Hash-marked comments and white lines
|
||
are ignored. Here is an example file:
|
||
|
||
@example
|
||
@group
|
||
# Set of available languages.
|
||
de fr
|
||
@end group
|
||
@end example
|
||
|
||
@noindent
|
||
This example means that German and French PO files are available, so
|
||
that these languages are currently supported by your package. If you
|
||
want to further restrict, at installation time, the set of installed
|
||
languages, this should not be done by modifying the @file{LINGUAS} file,
|
||
but rather by using the @code{LINGUAS} environment variable
|
||
(@pxref{Installers}).
|
||
|
||
It is recommended that you add the "languages" @samp{en@@quot} and
|
||
@samp{en@@boldquot} to the @code{LINGUAS} file. @code{en@@quot} is a
|
||
variant of English message catalogs (@code{en}) which uses real quotation
|
||
marks instead of the ugly looking asymmetric ASCII substitutes @samp{`}
|
||
and @samp{'}. @code{en@@boldquot} is a variant of @code{en@@quot} that
|
||
additionally outputs quoted pieces of text in a bold font, when used in
|
||
a terminal emulator which supports the VT100 escape sequences (such as
|
||
@code{xterm} or the Linux console, but not Emacs in @kbd{M-x shell} mode).
|
||
|
||
These extra message catalogs @samp{en@@quot} and @samp{en@@boldquot}
|
||
are constructed automatically, not by translators; to support them, you
|
||
need the files @file{Rules-quot}, @file{quot.sed}, @file{boldquot.sed},
|
||
@file{en@@quot.header}, @file{en@@boldquot.header}, @file{insert-header.sin}
|
||
in the @file{po/} directory. You can copy them from GNU gettext's @file{po/}
|
||
directory; they are also installed by running @code{gettextize}.
|
||
|
||
@node po/Makevars
|
||
@subsection @file{Makevars} in @file{po/}
|
||
@cindex @file{Makevars} file
|
||
|
||
The @file{po/} directory also has a file named @file{Makevars}. It
|
||
contains variables that are specific to your project. @file{po/Makevars}
|
||
gets inserted into the @file{po/Makefile} when the latter is created.
|
||
The variables thus take effect when the POT file is created or updated,
|
||
and when the message catalogs get installed.
|
||
|
||
The first three variables can be left unmodified if your package has a
|
||
single message domain and, accordingly, a single @file{po/} directory.
|
||
Only packages which have multiple @file{po/} directories at different
|
||
locations need to adjust the three first variables defined in
|
||
@file{Makevars}.
|
||
|
||
As an alternative to the @code{XGETTEXT_OPTIONS} variable, it is also
|
||
possible to specify @code{xgettext} options through the
|
||
@code{AM_XGETTEXT_OPTION} autoconf macro. See @ref{AM_XGETTEXT_OPTION}.
|
||
|
||
@node po/Rules-*
|
||
@subsection Extending @file{Makefile} in @file{po/}
|
||
@cindex @file{Makefile.in.in} extensions
|
||
|
||
All files called @file{Rules-*} in the @file{po/} directory get appended to
|
||
the @file{po/Makefile} when it is created. They present an opportunity to
|
||
add rules for special PO files to the Makefile, without needing to mess
|
||
with @file{po/Makefile.in.in}.
|
||
|
||
@cindex quotation marks
|
||
@vindex LANGUAGE@r{, environment variable}
|
||
GNU gettext comes with a @file{Rules-quot} file, containing rules for
|
||
building catalogs @file{en@@quot.po} and @file{en@@boldquot.po}. The
|
||
effect of @file{en@@quot.po} is that people who set their @code{LANGUAGE}
|
||
environment variable to @samp{en@@quot} will get messages with proper
|
||
looking symmetric Unicode quotation marks instead of abusing the ASCII
|
||
grave accent and the ASCII apostrophe for indicating quotations. To
|
||
enable this catalog, simply add @code{en@@quot} to the @file{po/LINGUAS}
|
||
file. The effect of @file{en@@boldquot.po} is that people who set
|
||
@code{LANGUAGE} to @samp{en@@boldquot} will get not only proper quotation
|
||
marks, but also the quoted text will be shown in a bold font on terminals
|
||
and consoles. This catalog is useful only for command-line programs, not
|
||
GUI programs. To enable it, similarly add @code{en@@boldquot} to the
|
||
@file{po/LINGUAS} file.
|
||
|
||
Similarly, you can create rules for building message catalogs for the
|
||
@file{sr@@latin} locale -- Serbian written with the Latin alphabet --
|
||
from those for the @file{sr} locale -- Serbian written with Cyrillic
|
||
letters. See @ref{msgfilter Invocation}.
|
||
|
||
@node configure.ac
|
||
@subsection @file{configure.ac} at top level
|
||
|
||
@file{configure.ac} or @file{configure.in} - this is the source from which
|
||
@code{autoconf} generates the @file{configure} script.
|
||
|
||
@enumerate
|
||
@item Declare the package and version.
|
||
@cindex package and version declaration in @file{configure.ac}
|
||
|
||
This is done by a set of lines like these:
|
||
|
||
@example
|
||
PACKAGE=gettext
|
||
VERSION=@value{VERSION}
|
||
AC_DEFINE_UNQUOTED(PACKAGE, "$PACKAGE")
|
||
AC_DEFINE_UNQUOTED(VERSION, "$VERSION")
|
||
AC_SUBST(PACKAGE)
|
||
AC_SUBST(VERSION)
|
||
@end example
|
||
|
||
@noindent
|
||
or, if you are using GNU @code{automake}, by a line like this:
|
||
|
||
@example
|
||
AM_INIT_AUTOMAKE(gettext, @value{VERSION})
|
||
@end example
|
||
|
||
@noindent
|
||
Of course, you replace @samp{gettext} with the name of your package,
|
||
and @samp{@value{VERSION}} by its version numbers, exactly as they
|
||
should appear in the packaged @code{tar} file name of your distribution
|
||
(@file{gettext-@value{VERSION}.tar.gz}, here).
|
||
|
||
@item Check for internationalization support.
|
||
|
||
Here is the main @code{m4} macro for triggering internationalization
|
||
support. Just add this line to @file{configure.ac}:
|
||
|
||
@example
|
||
AM_GNU_GETTEXT([external])
|
||
@end example
|
||
|
||
@noindent
|
||
This call is purposely simple, even if it generates a lot of configure
|
||
time checking and actions.
|
||
|
||
@item Have output files created.
|
||
|
||
The @code{AC_OUTPUT} directive, at the end of your @file{configure.ac}
|
||
file, needs to be modified in two ways:
|
||
|
||
@example
|
||
AC_OUTPUT([@var{existing configuration files} po/Makefile.in],
|
||
[@var{existing additional actions}])
|
||
@end example
|
||
|
||
The modification to the first argument to @code{AC_OUTPUT} asks
|
||
for substitution in the @file{po/} directory.
|
||
Note the @samp{.in} suffix used for @file{po/} only. This is because
|
||
the distributed file is really @file{po/Makefile.in.in}.
|
||
|
||
@end enumerate
|
||
|
||
@node config.guess
|
||
@subsection @file{config.guess}, @file{config.sub} at top level
|
||
|
||
You need to add the GNU @file{config.guess} and @file{config.sub} files
|
||
to your distribution. They are needed because the @code{AM_ICONV} macro
|
||
contains knowledge about specific platforms and therefore needs to
|
||
identify the platform.
|
||
|
||
You can obtain the newest version of @file{config.guess} and
|
||
@file{config.sub} from the @samp{config} project at
|
||
@file{https://savannah.gnu.org/}. The commands to fetch them are
|
||
@smallexample
|
||
$ wget -O config.guess 'https://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD'
|
||
$ wget -O config.sub 'https://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub;hb=HEAD'
|
||
@end smallexample
|
||
@noindent
|
||
Less recent versions are also contained in the GNU @code{automake} and
|
||
GNU @code{libtool} packages.
|
||
|
||
Normally, @file{config.guess} and @file{config.sub} are put at the
|
||
top level of a distribution. But it is also possible to put them in a
|
||
subdirectory, altogether with other configuration support files like
|
||
@file{install-sh}, @file{ltconfig}, @file{ltmain.sh} or @file{missing}.
|
||
All you need to do, other than moving the files, is to add the following line
|
||
to your @file{configure.ac}.
|
||
|
||
@example
|
||
AC_CONFIG_AUX_DIR([@var{subdir}])
|
||
@end example
|
||
|
||
@node mkinstalldirs
|
||
@subsection @file{mkinstalldirs} at top level
|
||
@cindex @file{mkinstalldirs} file
|
||
|
||
With earlier versions of GNU gettext, you needed to add the GNU
|
||
@file{mkinstalldirs} script to your distribution. This is not needed any
|
||
more. You can remove it.
|
||
|
||
@node aclocal
|
||
@subsection @file{aclocal.m4} at top level
|
||
@cindex @file{aclocal.m4} file
|
||
|
||
If you do not have an @file{aclocal.m4} file in your distribution,
|
||
the simplest is to concatenate the files @file{gettext.m4},
|
||
@file{host-cpu-c-abi.m4}, @file{intlmacosx.m4}, @file{iconv.m4},
|
||
@file{lib-ld.m4}, @file{lib-link.m4}, @file{lib-prefix.m4}, @file{nls.m4},
|
||
@file{po.m4}, @file{progtest.m4} from GNU @code{gettext}'s @file{m4/}
|
||
directory into a single file.
|
||
|
||
If you already have an @file{aclocal.m4} file, then you will have
|
||
to merge the said macro files into your @file{aclocal.m4}. Note that if
|
||
you are upgrading from a previous release of GNU @code{gettext}, you
|
||
should most probably @emph{replace} the macros (@code{AM_GNU_GETTEXT},
|
||
etc.), as they usually
|
||
change a little from one release of GNU @code{gettext} to the next.
|
||
Their contents may vary as we get more experience with strange systems
|
||
out there.
|
||
|
||
You should be using GNU @code{automake} 1.9 or newer. With it, you need
|
||
to copy the files @file{gettext.m4}, @file{host-cpu-c-abi.m4},
|
||
@file{intlmacosx.m4}, @file{iconv.m4}, @file{lib-ld.m4}, @file{lib-link.m4},
|
||
@file{lib-prefix.m4}, @file{nls.m4}, @file{po.m4}, @file{progtest.m4} from
|
||
GNU @code{gettext}'s @file{m4/} directory to a subdirectory named @file{m4/}
|
||
and add the line
|
||
|
||
@example
|
||
ACLOCAL_AMFLAGS = -I m4
|
||
@end example
|
||
|
||
@noindent
|
||
to your top level @file{Makefile.am}.
|
||
|
||
If you are using GNU @code{automake} 1.10 or newer, it is even easier:
|
||
Add the line
|
||
|
||
@example
|
||
ACLOCAL_AMFLAGS = --install -I m4
|
||
@end example
|
||
|
||
@noindent
|
||
to your top level @file{Makefile.am}, and run @samp{aclocal --install -I m4}.
|
||
This will copy the needed files to the @file{m4/} subdirectory automatically,
|
||
before updating @file{aclocal.m4}.
|
||
|
||
These macros check for the internationalization support functions
|
||
and related informations. Hopefully, once stabilized, these macros
|
||
might be integrated in the standard Autoconf set, because this
|
||
piece of @code{m4} code will be the same for all projects using GNU
|
||
@code{gettext}.
|
||
|
||
@node config.h.in
|
||
@subsection @file{config.h.in} at top level
|
||
@cindex @file{config.h.in} file
|
||
|
||
The include file template that holds the C macros to be defined by
|
||
@code{configure} is usually called @file{config.h.in} and may be
|
||
maintained either manually or automatically.
|
||
|
||
If it is maintained automatically, by use of the @samp{autoheader}
|
||
program, you need to do nothing about it. This is the case in particular
|
||
if you are using GNU @code{automake}.
|
||
|
||
If it is maintained manually, you can get away by adding the
|
||
following lines to @file{config.h.in}:
|
||
|
||
@example
|
||
/* Define to 1 if translation of program messages to the user's
|
||
native language is requested. */
|
||
#undef ENABLE_NLS
|
||
@end example
|
||
|
||
@node Makefile
|
||
@subsection @file{Makefile.in} at top level
|
||
|
||
Here are a few modifications you need to make to your main, top-level
|
||
@file{Makefile.in} file.
|
||
|
||
@enumerate
|
||
@item
|
||
Add the following lines near the beginning of your @file{Makefile.in},
|
||
so the @samp{dist:} goal will work properly (as explained further down):
|
||
|
||
@example
|
||
PACKAGE = @@PACKAGE@@
|
||
VERSION = @@VERSION@@
|
||
@end example
|
||
|
||
@item
|
||
Wherever you process subdirectories in your @file{Makefile.in}, be sure
|
||
you also process the subdirectory @samp{po}. Special
|
||
rules in the @file{Makefiles} take care for the case where no
|
||
internationalization is wanted.
|
||
|
||
If you are using Makefiles, either generated by automake, or hand-written
|
||
so they carefully follow the GNU coding standards, the effected goals for
|
||
which the new subdirectories must be handled include @samp{installdirs},
|
||
@samp{install}, @samp{uninstall}, @samp{clean}, @samp{distclean}.
|
||
|
||
Here is an example of a canonical order of processing. In this
|
||
example, we also define @code{SUBDIRS} in @code{Makefile.in} for it
|
||
to be further used in the @samp{dist:} goal.
|
||
|
||
@example
|
||
SUBDIRS = doc lib src po
|
||
@end example
|
||
|
||
@item
|
||
A delicate point is the @samp{dist:} goal, as @file{po/Makefile} will later
|
||
assume that the proper directory has been set up from the main @file{Makefile}.
|
||
Here is an example at what the @samp{dist:} goal might look like:
|
||
|
||
@example
|
||
distdir = $(PACKAGE)-$(VERSION)
|
||
dist: Makefile
|
||
rm -fr $(distdir)
|
||
mkdir $(distdir)
|
||
chmod 777 $(distdir)
|
||
for file in $(DISTFILES); do \
|
||
ln $$file $(distdir) 2>/dev/null || cp -p $$file $(distdir); \
|
||
done
|
||
for subdir in $(SUBDIRS); do \
|
||
mkdir $(distdir)/$$subdir || exit 1; \
|
||
chmod 777 $(distdir)/$$subdir; \
|
||
(cd $$subdir && $(MAKE) $@@) || exit 1; \
|
||
done
|
||
tar chozf $(distdir).tar.gz $(distdir)
|
||
rm -fr $(distdir)
|
||
@end example
|
||
|
||
@end enumerate
|
||
|
||
Note that if you are using GNU @code{automake}, @file{Makefile.in} is
|
||
automatically generated from @file{Makefile.am}, and all needed changes
|
||
to @file{Makefile.am} are already made by running @samp{gettextize}.
|
||
|
||
@node src/Makefile
|
||
@subsection @file{Makefile.in} in @file{src/}
|
||
|
||
Some of the modifications made in the main @file{Makefile.in} will
|
||
also be needed in the @file{Makefile.in} from your package sources,
|
||
which we assume here to be in the @file{src/} subdirectory. Here are
|
||
all the modifications needed in @file{src/Makefile.in}:
|
||
|
||
@enumerate
|
||
@item
|
||
In view of the @samp{dist:} goal, you should have these lines near the
|
||
beginning of @file{src/Makefile.in}:
|
||
|
||
@example
|
||
PACKAGE = @@PACKAGE@@
|
||
VERSION = @@VERSION@@
|
||
@end example
|
||
|
||
@item
|
||
If not done already, you should guarantee that @code{top_srcdir}
|
||
gets defined. This will serve for @code{cpp} include files. Just add
|
||
the line:
|
||
|
||
@example
|
||
top_srcdir = @@top_srcdir@@
|
||
@end example
|
||
|
||
@item
|
||
You might also want to define @code{subdir} as @samp{src}, later
|
||
allowing for almost uniform @samp{dist:} goals in all your
|
||
@file{Makefile.in}. At list, the @samp{dist:} goal below assume that
|
||
you used:
|
||
|
||
@example
|
||
subdir = src
|
||
@end example
|
||
|
||
@item
|
||
The @code{main} function of your program will normally call
|
||
@code{bindtextdomain} (see @pxref{Triggering}), like this:
|
||
|
||
@example
|
||
bindtextdomain (@var{PACKAGE}, LOCALEDIR);
|
||
textdomain (@var{PACKAGE});
|
||
@end example
|
||
|
||
On native Windows platforms, the @code{main} function may call
|
||
@code{wbindtextdomain} instead of @code{bindtextdomain}.
|
||
|
||
To make LOCALEDIR known to the program, add the following lines to
|
||
@file{Makefile.in}:
|
||
|
||
@example
|
||
datadir = @@datadir@@
|
||
datarootdir= @@datarootdir@@
|
||
localedir = @@localedir@@
|
||
DEFS = -DLOCALEDIR=\"$(localedir)\" @@DEFS@@
|
||
@end example
|
||
|
||
Note that @code{@@datadir@@} defaults to @samp{$(prefix)/share}, and
|
||
@code{$(localedir)} defaults to @samp{$(prefix)/share/locale}.
|
||
|
||
@item
|
||
You should ensure that the final linking will use @code{@@LIBINTL@@} or
|
||
@code{@@LTLIBINTL@@} as a library. @code{@@LIBINTL@@} is for use without
|
||
@code{libtool}, @code{@@LTLIBINTL@@} is for use with @code{libtool}. An
|
||
easy way to achieve this is to manage that it gets into @code{LIBS}, like
|
||
this:
|
||
|
||
@example
|
||
LIBS = @@LIBINTL@@ @@LIBS@@
|
||
@end example
|
||
|
||
In most packages internationalized with GNU @code{gettext}, one will
|
||
find a directory @file{lib/} in which a library containing some helper
|
||
functions will be build. (You need at least the few functions which the
|
||
GNU @code{gettext} Library itself needs.) However some of the functions
|
||
in the @file{lib/} also give messages to the user which of course should be
|
||
translated, too. Taking care of this, the support library (say
|
||
@file{libsupport.a}) should be placed before @code{@@LIBINTL@@} and
|
||
@code{@@LIBS@@} in the above example. So one has to write this:
|
||
|
||
@example
|
||
LIBS = ../lib/libsupport.a @@LIBINTL@@ @@LIBS@@
|
||
@end example
|
||
|
||
@item
|
||
Your @samp{dist:} goal has to conform with others. Here is a
|
||
reasonable definition for it:
|
||
|
||
@example
|
||
distdir = ../$(PACKAGE)-$(VERSION)/$(subdir)
|
||
dist: Makefile $(DISTFILES)
|
||
for file in $(DISTFILES); do \
|
||
ln $$file $(distdir) 2>/dev/null || cp -p $$file $(distdir) || exit 1; \
|
||
done
|
||
@end example
|
||
|
||
@end enumerate
|
||
|
||
Note that if you are using GNU @code{automake}, @file{Makefile.in} is
|
||
automatically generated from @file{Makefile.am}, and the first three
|
||
changes and the last change are not necessary. The remaining needed
|
||
@file{Makefile.am} modifications are the following:
|
||
|
||
@enumerate
|
||
@item
|
||
To make LOCALEDIR known to the program, add the following to
|
||
@file{Makefile.am}:
|
||
|
||
@example
|
||
<module>_CPPFLAGS = -DLOCALEDIR=\"$(localedir)\"
|
||
@end example
|
||
|
||
@noindent
|
||
for each specific module or compilation unit, or
|
||
|
||
@example
|
||
AM_CPPFLAGS = -DLOCALEDIR=\"$(localedir)\"
|
||
@end example
|
||
|
||
for all modules and compilation units together. Furthermore, if you are
|
||
using an Autoconf version older then 2.60, add this line to define
|
||
@samp{localedir}:
|
||
|
||
@example
|
||
localedir = $(datadir)/locale
|
||
@end example
|
||
|
||
@item
|
||
To ensure that the final linking will use @code{@@LIBINTL@@} or
|
||
@code{@@LTLIBINTL@@} as a library, add the following to
|
||
@file{Makefile.am}:
|
||
|
||
@example
|
||
<program>_LDADD = @@LIBINTL@@
|
||
@end example
|
||
|
||
@noindent
|
||
for each specific program, or
|
||
|
||
@example
|
||
LDADD = @@LIBINTL@@
|
||
@end example
|
||
|
||
for all programs together. Remember that when you use @code{libtool}
|
||
to link a program, you need to use @@LTLIBINTL@@ instead of @@LIBINTL@@
|
||
for that program.
|
||
|
||
@end enumerate
|
||
|
||
@node lib/gettext.h
|
||
@subsection @file{gettext.h} in @file{lib/}
|
||
@cindex @file{gettext.h} file
|
||
@cindex turning off NLS support
|
||
@cindex disabling NLS
|
||
|
||
Internationalization of packages, as provided by GNU @code{gettext}, is
|
||
optional. It can be turned off in two situations:
|
||
|
||
@itemize @bullet
|
||
@item
|
||
When the installer has specified @samp{./configure --disable-nls}. This
|
||
can be useful when small binaries are more important than features, for
|
||
example when building utilities for boot diskettes. It can also be useful
|
||
in order to get some specific C compiler warnings about code quality with
|
||
some older versions of GCC (older than 3.0).
|
||
|
||
@item
|
||
When the libintl.h header (with its associated libintl library, if any) is
|
||
not already installed on the system, it is preferable that the package builds
|
||
without internationalization support, rather than to give a compilation
|
||
error.
|
||
@end itemize
|
||
|
||
A C preprocessor macro can be used to detect these two cases. Usually,
|
||
when @code{libintl.h} was found and not explicitly disabled, the
|
||
@code{ENABLE_NLS} macro will be defined to 1 in the autoconf generated
|
||
configuration file (usually called @file{config.h}). In the two negative
|
||
situations, however, this macro will not be defined, thus it will evaluate
|
||
to 0 in C preprocessor expressions.
|
||
|
||
@cindex include file @file{libintl.h}
|
||
@file{gettext.h} is a convenience header file for conditional use of
|
||
@file{<libintl.h>}, depending on the @code{ENABLE_NLS} macro. If
|
||
@code{ENABLE_NLS} is set, it includes @file{<libintl.h>}; otherwise it
|
||
defines no-op substitutes for the libintl.h functions. We recommend
|
||
the use of @code{"gettext.h"} over direct use of @file{<libintl.h>},
|
||
so that portability to older systems is guaranteed and installers can
|
||
turn off internationalization if they want to. In the C code, you will
|
||
then write
|
||
|
||
@example
|
||
#include "gettext.h"
|
||
@end example
|
||
|
||
@noindent
|
||
instead of
|
||
|
||
@example
|
||
#include <libintl.h>
|
||
@end example
|
||
|
||
The location of @code{gettext.h} is usually in a directory containing
|
||
auxiliary include files. In many GNU packages, there is a directory
|
||
@file{lib/} containing helper functions; @file{gettext.h} fits there.
|
||
In other packages, it can go into the @file{src} directory.
|
||
|
||
Do not install the @code{gettext.h} file in public locations. Every
|
||
package that needs it should contain a copy of it on its own.
|
||
|
||
@node autoconf macros
|
||
@section Autoconf macros for use in @file{configure.ac}
|
||
@cindex autoconf macros for @code{gettext}
|
||
|
||
GNU @code{gettext} installs macros for use in a package's
|
||
@file{configure.ac} or @file{configure.in}.
|
||
@xref{Top, , Introduction, autoconf, The Autoconf Manual}.
|
||
The primary macro is, of course, @code{AM_GNU_GETTEXT}.
|
||
|
||
@menu
|
||
* AM_GNU_GETTEXT:: AM_GNU_GETTEXT in @file{gettext.m4}
|
||
* AM_GNU_GETTEXT_VERSION:: AM_GNU_GETTEXT_VERSION in @file{gettext.m4}
|
||
* AM_GNU_GETTEXT_NEED:: AM_GNU_GETTEXT_NEED in @file{gettext.m4}
|
||
* AM_PO_SUBDIRS:: AM_PO_SUBDIRS in @file{po.m4}
|
||
* AM_XGETTEXT_OPTION:: AM_XGETTEXT_OPTION in @file{po.m4}
|
||
* AM_ICONV:: AM_ICONV in @file{iconv.m4}
|
||
@end menu
|
||
|
||
@node AM_GNU_GETTEXT
|
||
@subsection AM_GNU_GETTEXT in @file{gettext.m4}
|
||
|
||
@amindex AM_GNU_GETTEXT
|
||
The @code{AM_GNU_GETTEXT} macro tests for the presence of the GNU gettext
|
||
function family in either the C library or a separate @code{libintl}
|
||
library (shared or static libraries are both supported). It also invokes
|
||
@code{AM_PO_SUBDIRS}, thus preparing the @file{po/} directories of the
|
||
package for building.
|
||
|
||
@code{AM_GNU_GETTEXT} accepts up to three optional arguments. The general
|
||
syntax is
|
||
|
||
@example
|
||
AM_GNU_GETTEXT([@var{intlsymbol}], [@var{needsymbol}])
|
||
@end example
|
||
|
||
@c We don't document @var{intlsymbol} = @samp{use-libtool} here, because
|
||
@c it is of no use for packages other than GNU gettext itself. (Such packages
|
||
@c are not allowed to install the shared libintl. But if they use libtool,
|
||
@c then it is in order to install shared libraries that depend on libintl.)
|
||
@var{intlsymbol} should always be @samp{external}.
|
||
|
||
If @var{needsymbol} is specified and is @samp{need-ngettext}, then GNU
|
||
gettext implementations (in libc or libintl) without the @code{ngettext()}
|
||
function will be ignored. If @var{needsymbol} is specified and is
|
||
@samp{need-formatstring-macros}, then GNU gettext implementations that don't
|
||
support the ISO C 99 @file{<inttypes.h>} formatstring macros will be ignored.
|
||
Only one @var{needsymbol} can be specified. These requirements can also be
|
||
specified by using the macro @code{AM_GNU_GETTEXT_NEED} elsewhere. To specify
|
||
more than one requirement, just specify the strongest one among them, or
|
||
invoke the @code{AM_GNU_GETTEXT_NEED} macro several times. The hierarchy
|
||
among the various alternatives is as follows: @samp{need-formatstring-macros}
|
||
implies @samp{need-ngettext}.
|
||
|
||
The @code{AM_GNU_GETTEXT} macro determines whether GNU gettext is
|
||
available and should be used. If so, it sets the @code{USE_NLS} variable
|
||
to @samp{yes}; it defines @code{ENABLE_NLS} to 1 in the autoconf
|
||
generated configuration file (usually called @file{config.h}); it sets
|
||
the variables @code{LIBINTL} and @code{LTLIBINTL} to the linker options
|
||
for use in a Makefile (@code{LIBINTL} for use without libtool,
|
||
@code{LTLIBINTL} for use with libtool); it adds an @samp{-I} option to
|
||
@code{CPPFLAGS} if necessary. In the negative case, it sets
|
||
@code{USE_NLS} to @samp{no}; it sets @code{LIBINTL} and @code{LTLIBINTL}
|
||
to empty and doesn't change @code{CPPFLAGS}.
|
||
|
||
The complexities that @code{AM_GNU_GETTEXT} deals with are the following:
|
||
|
||
@itemize @bullet
|
||
@item
|
||
@cindex @code{libintl} library
|
||
Some operating systems have @code{gettext} in the C library, for example
|
||
glibc. Some have it in a separate library @code{libintl}. GNU @code{libintl}
|
||
might have been installed as part of the GNU @code{gettext} package.
|
||
|
||
@item
|
||
GNU @code{libintl}, if installed, is not necessarily already in the search
|
||
path (@code{CPPFLAGS} for the include file search path, @code{LDFLAGS} for
|
||
the library search path).
|
||
|
||
@item
|
||
Except for glibc, the operating system's native @code{gettext} cannot
|
||
exploit the GNU mo files, doesn't have the necessary locale dependency
|
||
features, and cannot convert messages from the catalog's text encoding
|
||
to the user's locale encoding.
|
||
|
||
@item
|
||
GNU @code{libintl}, if installed, is not necessarily already in the
|
||
run time library search path. To avoid the need for setting an environment
|
||
variable like @code{LD_LIBRARY_PATH}, the macro adds the appropriate
|
||
run time search path options to the @code{LIBINTL} and @code{LTLIBINTL}
|
||
variables. This works on most systems, but not on some operating systems
|
||
with limited shared library support, like SCO.
|
||
|
||
@item
|
||
GNU @code{libintl} relies on POSIX/XSI @code{iconv}. The macro checks for
|
||
linker options needed to use iconv and appends them to the @code{LIBINTL}
|
||
and @code{LTLIBINTL} variables.
|
||
@end itemize
|
||
|
||
@node AM_GNU_GETTEXT_VERSION
|
||
@subsection AM_GNU_GETTEXT_VERSION in @file{gettext.m4}
|
||
|
||
@amindex AM_GNU_GETTEXT_VERSION
|
||
The @code{AM_GNU_GETTEXT_VERSION} macro declares the version number of
|
||
the GNU gettext infrastructure that is used by the package.
|
||
|
||
The use of this macro is optional; only the @code{autopoint} program makes
|
||
use of it (@pxref{Version Control Issues}).
|
||
|
||
@node AM_GNU_GETTEXT_NEED
|
||
@subsection AM_GNU_GETTEXT_NEED in @file{gettext.m4}
|
||
|
||
@amindex AM_GNU_GETTEXT_NEED
|
||
The @code{AM_GNU_GETTEXT_NEED} macro declares a constraint regarding the
|
||
GNU gettext implementation. The syntax is
|
||
|
||
@example
|
||
AM_GNU_GETTEXT_NEED([@var{needsymbol}])
|
||
@end example
|
||
|
||
If @var{needsymbol} is @samp{need-ngettext}, then GNU gettext implementations
|
||
(in libc or libintl) without the @code{ngettext()} function will be ignored.
|
||
If @var{needsymbol} is @samp{need-formatstring-macros}, then GNU gettext
|
||
implementations that don't support the ISO C 99 @file{<inttypes.h>}
|
||
formatstring macros will be ignored.
|
||
|
||
The optional second argument of @code{AM_GNU_GETTEXT} is also taken into
|
||
account.
|
||
|
||
The @code{AM_GNU_GETTEXT_NEED} invocations can occur before or after
|
||
the @code{AM_GNU_GETTEXT} invocation; the order doesn't matter.
|
||
|
||
@node AM_PO_SUBDIRS
|
||
@subsection AM_PO_SUBDIRS in @file{po.m4}
|
||
|
||
@amindex AM_PO_SUBDIRS
|
||
The @code{AM_PO_SUBDIRS} macro prepares the @file{po/} directories of the
|
||
package for building. This macro should be used in internationalized
|
||
programs written in other programming languages than C, C++, Objective C,
|
||
for example @code{sh}, @code{Python}, @code{Lisp}. See @ref{Programming
|
||
Languages} for a list of programming languages that support localization
|
||
through PO files.
|
||
|
||
The @code{AM_PO_SUBDIRS} macro determines whether internationalization
|
||
should be used. If so, it sets the @code{USE_NLS} variable to @samp{yes},
|
||
otherwise to @samp{no}. It also determines the right values for Makefile
|
||
variables in each @file{po/} directory.
|
||
|
||
@node AM_XGETTEXT_OPTION
|
||
@subsection AM_XGETTEXT_OPTION in @file{po.m4}
|
||
|
||
@amindex AM_XGETTEXT_OPTION
|
||
The @code{AM_XGETTEXT_OPTION} macro registers a command-line option to be
|
||
used in the invocations of @code{xgettext} in the @file{po/} directories
|
||
of the package.
|
||
|
||
For example, if you have a source file that defines a function
|
||
@samp{error_at_line} whose fifth argument is a format string, you can use
|
||
@example
|
||
AM_XGETTEXT_OPTION([--flag=error_at_line:5:c-format])
|
||
@end example
|
||
@noindent
|
||
to instruct @code{xgettext} to mark all translatable strings in @samp{gettext}
|
||
invocations that occur as fifth argument to this function as @samp{c-format}.
|
||
|
||
See @ref{xgettext Invocation} for the list of options that @code{xgettext}
|
||
accepts.
|
||
|
||
The use of this macro is an alternative to the use of the
|
||
@samp{XGETTEXT_OPTIONS} variable in @file{po/Makevars}.
|
||
|
||
@node AM_ICONV
|
||
@subsection AM_ICONV in @file{iconv.m4}
|
||
|
||
@amindex AM_ICONV
|
||
The @code{AM_ICONV} macro tests for the presence of the POSIX/XSI
|
||
@code{iconv} function family in either the C library or a separate
|
||
@code{libiconv} library. If found, it sets the @code{am_cv_func_iconv}
|
||
variable to @samp{yes}; it defines @code{HAVE_ICONV} to 1 in the autoconf
|
||
generated configuration file (usually called @file{config.h}); it defines
|
||
@code{ICONV_CONST} to @samp{const} or to empty, depending on whether the
|
||
second argument of @code{iconv()} is of type @samp{const char **} or
|
||
@samp{char **}; it sets the variables @code{LIBICONV} and
|
||
@code{LTLIBICONV} to the linker options for use in a Makefile
|
||
(@code{LIBICONV} for use without libtool, @code{LTLIBICONV} for use with
|
||
libtool); it adds an @samp{-I} option to @code{CPPFLAGS} if
|
||
necessary. If not found, it sets @code{LIBICONV} and @code{LTLIBICONV} to
|
||
empty and doesn't change @code{CPPFLAGS}.
|
||
|
||
The complexities that @code{AM_ICONV} deals with are the following:
|
||
|
||
@itemize @bullet
|
||
@item
|
||
@cindex @code{libiconv} library
|
||
Some operating systems have @code{iconv} in the C library, for example
|
||
glibc. Some have it in a separate library @code{libiconv}, for example
|
||
OSF/1 or FreeBSD. Regardless of the operating system, GNU @code{libiconv}
|
||
might have been installed. In that case, it should be used instead of the
|
||
operating system's native @code{iconv}.
|
||
|
||
@item
|
||
GNU @code{libiconv}, if installed, is not necessarily already in the search
|
||
path (@code{CPPFLAGS} for the include file search path, @code{LDFLAGS} for
|
||
the library search path).
|
||
|
||
@item
|
||
GNU @code{libiconv} is binary incompatible with some operating system's
|
||
native @code{iconv}, for example on FreeBSD. Use of an @file{iconv.h}
|
||
and @file{libiconv.so} that don't fit together would produce program
|
||
crashes.
|
||
|
||
@item
|
||
GNU @code{libiconv}, if installed, is not necessarily already in the
|
||
run time library search path. To avoid the need for setting an environment
|
||
variable like @code{LD_LIBRARY_PATH}, the macro adds the appropriate
|
||
run time search path options to the @code{LIBICONV} variable. This works
|
||
on most systems, but not on some operating systems with limited shared
|
||
library support, like SCO.
|
||
@end itemize
|
||
|
||
@file{iconv.m4} is distributed with the GNU gettext package because
|
||
@file{gettext.m4} relies on it.
|
||
|
||
@node Version Control Issues
|
||
@section Integrating with Version Control Systems
|
||
|
||
Many projects use version control systems for distributed development
|
||
and source backup. This section gives some advice how to manage the
|
||
uses of @code{gettextize}, @code{autopoint} and @code{autoconf} on
|
||
version controlled files.
|
||
|
||
@menu
|
||
* Distributed Development:: Avoiding version mismatch in distributed development
|
||
* Files under Version Control:: Files to put under version control
|
||
* Translations under Version Control:: Put PO Files under Version Control
|
||
* autopoint Invocation:: Invoking the @code{autopoint} Program
|
||
@end menu
|
||
|
||
@node Distributed Development
|
||
@subsection Avoiding version mismatch in distributed development
|
||
|
||
In a project development with multiple developers, there should be a
|
||
single developer who occasionally - when there is desire to upgrade to
|
||
a new @code{gettext} version - runs @code{gettextize} and performs the
|
||
changes listed in @ref{Adjusting Files}, and then commits his changes
|
||
to the repository.
|
||
|
||
It is highly recommended that all developers on a project use the same
|
||
version of GNU @code{gettext} in the package. In other words, if a
|
||
developer runs @code{gettextize}, he should go the whole way, make the
|
||
necessary remaining changes and commit his changes to the repository.
|
||
Otherwise the following damages will likely occur:
|
||
|
||
@itemize @bullet
|
||
@item
|
||
Apparent version mismatch between developers. Since some @code{gettext}
|
||
specific portions in @file{configure.ac}, @file{configure.in} and
|
||
@code{Makefile.am}, @code{Makefile.in} files depend on the @code{gettext}
|
||
version, the use of infrastructure files belonging to different
|
||
@code{gettext} versions can easily lead to build errors.
|
||
|
||
@item
|
||
Hidden version mismatch. Such version mismatch can also lead to
|
||
malfunctioning of the package, that may be undiscovered by the developers.
|
||
The worst case of hidden version mismatch is that internationalization
|
||
of the package doesn't work at all.
|
||
|
||
@item
|
||
Release risks. All developers implicitly perform constant testing on
|
||
a package. This is important in the days and weeks before a release.
|
||
If the guy who makes the release tar files uses a different version
|
||
of GNU @code{gettext} than the other developers, the distribution will
|
||
be less well tested than if all had been using the same @code{gettext}
|
||
version. For example, it is possible that a platform specific bug goes
|
||
undiscovered due to this constellation.
|
||
@end itemize
|
||
|
||
@node Files under Version Control
|
||
@subsection Files to put under version control
|
||
|
||
There are basically three ways to deal with generated files in the
|
||
context of a version controlled repository, such as @file{configure}
|
||
generated from @file{configure.ac}, @code{@var{parser}.c} generated
|
||
from @code{@var{parser}.y}, or @code{po/Makefile.in.in} autoinstalled
|
||
by @code{gettextize} or @code{autopoint}.
|
||
|
||
@enumerate
|
||
@item
|
||
All generated files are always committed into the repository.
|
||
|
||
@item
|
||
All generated files are committed into the repository occasionally,
|
||
for example each time a release is made.
|
||
|
||
@item
|
||
Generated files are never committed into the repository.
|
||
@end enumerate
|
||
|
||
Each of these three approaches has different advantages and drawbacks.
|
||
|
||
@enumerate
|
||
@item
|
||
The advantage is that anyone can check out the source at any moment and
|
||
gets a working build. The drawbacks are: 1a. It requires some frequent
|
||
"push" actions by the maintainers. 1b. The repository grows in size
|
||
quite fast.
|
||
|
||
@item
|
||
The advantage is that anyone can check out the source, and the usual
|
||
"./configure; make" will work. The drawbacks are: 2a. The one who
|
||
checks out the repository needs tools like GNU @code{automake}, GNU
|
||
@code{autoconf}, GNU @code{m4} installed in his PATH; sometimes he
|
||
even needs particular versions of them. 2b. When a release is made
|
||
and a commit is made on the generated files, the other developers get
|
||
conflicts on the generated files when merging the local work back to
|
||
the repository. Although these conflicts are easy to resolve, they
|
||
are annoying.
|
||
|
||
@item
|
||
The advantage is less work for the maintainers. The drawback is that
|
||
anyone who checks out the source not only needs tools like GNU
|
||
@code{automake}, GNU @code{autoconf}, GNU @code{m4} installed in his
|
||
PATH, but also that he needs to perform a package specific pre-build
|
||
step before being able to "./configure; make".
|
||
@end enumerate
|
||
|
||
For the first and second approach, all files modified or brought in
|
||
by the occasional @code{gettextize} invocation and update should be
|
||
committed into the repository.
|
||
|
||
For the third approach, the maintainer can omit from the repository
|
||
all the files that @code{gettextize} mentions as "copy". Instead, he
|
||
adds to the @file{configure.ac} or @file{configure.in} a line of the
|
||
form
|
||
|
||
@example
|
||
AM_GNU_GETTEXT_VERSION(@value{ARCHIVE-VERSION})
|
||
@end example
|
||
|
||
@noindent
|
||
and adds to the package's pre-build script an invocation of
|
||
@samp{autopoint}. For everyone who checks out the source, this
|
||
@code{autopoint} invocation will copy into the right place the
|
||
@code{gettext} infrastructure files that have been omitted from the repository.
|
||
|
||
The version number used as argument to @code{AM_GNU_GETTEXT_VERSION} is
|
||
the version of the @code{gettext} infrastructure that the package wants
|
||
to use. It is also the minimum version number of the @samp{autopoint}
|
||
program. So, if you write @code{AM_GNU_GETTEXT_VERSION(0.11.5)} then the
|
||
developers can have any version >= 0.11.5 installed; the package will work
|
||
with the 0.11.5 infrastructure in all developers' builds. When the
|
||
maintainer then runs gettextize from, say, version 0.12.1 on the package,
|
||
the occurrence of @code{AM_GNU_GETTEXT_VERSION(0.11.5)} will be changed
|
||
into @code{AM_GNU_GETTEXT_VERSION(0.12.1)}, and all other developers that
|
||
use the CVS will henceforth need to have GNU @code{gettext} 0.12.1 or newer
|
||
installed.
|
||
|
||
@node Translations under Version Control
|
||
@subsection Put PO Files under Version Control
|
||
|
||
Since translations are valuable assets as well as the source code, it
|
||
would make sense to put them under version control. The GNU gettext
|
||
infrastructure supports two ways to deal with translations in the
|
||
context of a version controlled repository.
|
||
|
||
@enumerate
|
||
@item
|
||
Both POT file and PO files are committed into the repository.
|
||
|
||
@item
|
||
Only PO files are committed into the repository.
|
||
|
||
@end enumerate
|
||
|
||
If a POT file is absent when building, it will be generated by
|
||
scanning the source files with @code{xgettext}, and then the PO files
|
||
are regenerated as a dependency. On the other hand, some maintainers
|
||
want to keep the POT file unchanged during the development phase. So,
|
||
even if a POT file is present and older than the source code, it won't
|
||
be updated automatically. You can manually update it with @code{make
|
||
$(DOMAIN).pot-update}, and commit it at certain point.
|
||
|
||
Special advices for particular version control systems:
|
||
|
||
@itemize @bullet
|
||
@item
|
||
Recent version control systems, Git for instance, ignore file's
|
||
timestamp. In that case, PO files can be accidentally updated even if
|
||
a POT file is not updated. To prevent this, you can set
|
||
@samp{PO_DEPENDS_ON_POT} variable to @code{no} in the @file{Makevars}
|
||
file and do @code{make update-po} manually.
|
||
|
||
@item
|
||
Location comments such as @code{#: lib/error.c:116} are sometimes
|
||
annoying, since these comments are volatile and may introduce unwanted
|
||
change to the working copy when building. To mitigate this, you can
|
||
decide to omit those comments from the PO files in the repository.
|
||
|
||
This is possible with the @code{--no-location} option of the
|
||
@code{msgmerge} command @footnote{you can also use it through the
|
||
@samp{MSGMERGE_OPTIONS} option from @file{Makevars}}. The drawback is
|
||
that, if the location information is needed, translators have to
|
||
recover the location comments by running @code{msgmerge} again.
|
||
|
||
@end itemize
|
||
|
||
@node autopoint Invocation
|
||
@subsection Invoking the @code{autopoint} Program
|
||
|
||
@include autopoint.texi
|
||
|
||
@node Release Management
|
||
@section Creating a Distribution Tarball
|
||
|
||
@cindex release
|
||
@cindex distribution tarball
|
||
In projects that use GNU @code{automake}, the usual commands for creating
|
||
a distribution tarball, @samp{make dist} or @samp{make distcheck},
|
||
automatically update the PO files as needed.
|
||
|
||
If GNU @code{automake} is not used, the maintainer needs to perform this
|
||
update before making a release:
|
||
|
||
@example
|
||
$ ./configure
|
||
$ (cd po; make update-po)
|
||
$ make distclean
|
||
@end example
|
||
|
||
@node Installers
|
||
@chapter The Installer's and Distributor's View
|
||
@cindex package installer's view of @code{gettext}
|
||
@cindex package distributor's view of @code{gettext}
|
||
@cindex package build and installation options
|
||
@cindex setting up @code{gettext} at build time
|
||
|
||
By default, packages fully using GNU @code{gettext}, internally,
|
||
are installed in such a way as to allow translation of
|
||
messages. At @emph{configuration} time, those packages should
|
||
automatically detect whether the underlying host system already provides
|
||
the GNU @code{gettext} functions. If not,
|
||
the GNU @code{gettext} library should be automatically prepared
|
||
and used. Installers may use special options at configuration
|
||
time for changing this behavior. The command @samp{./configure
|
||
--with-included-gettext} bypasses system @code{gettext} to
|
||
use the included GNU @code{gettext} instead,
|
||
while @samp{./configure --disable-nls}
|
||
produces programs totally unable to translate messages.
|
||
|
||
@vindex LINGUAS@r{, environment variable}
|
||
Internationalized packages have usually many @file{@var{ll}.po}
|
||
files. Unless
|
||
translations are disabled, all those available are installed together
|
||
with the package. However, the environment variable @code{LINGUAS}
|
||
may be set, prior to configuration, to limit the installed set.
|
||
@code{LINGUAS} should then contain a space separated list of two-letter
|
||
codes, stating which languages are allowed.
|
||
|
||
@node Programming Languages
|
||
@chapter Other Programming Languages
|
||
|
||
While the presentation of @code{gettext} focuses mostly on C and
|
||
implicitly applies to C++ as well, its scope is far broader than that:
|
||
Many programming languages, scripting languages and other textual data
|
||
like GUI resources or package descriptions can make use of the gettext
|
||
approach.
|
||
|
||
@menu
|
||
* Language Implementors:: The Language Implementor's View
|
||
* Programmers for other Languages:: The Programmer's View
|
||
* Translators for other Languages:: The Translator's View
|
||
* Maintainers for other Languages:: The Maintainer's View
|
||
* List of Programming Languages:: Individual Programming Languages
|
||
@end menu
|
||
|
||
@node Language Implementors
|
||
@section The Language Implementor's View
|
||
@cindex programming languages
|
||
@cindex scripting languages
|
||
|
||
All programming and scripting languages that have the notion of strings
|
||
are eligible to supporting @code{gettext}. Supporting @code{gettext}
|
||
means the following:
|
||
|
||
@enumerate
|
||
@item
|
||
You should add to the language a syntax for translatable strings. In
|
||
principle, a function call of @code{gettext} would do, but a shorthand
|
||
syntax helps keeping the legibility of internationalized programs. For
|
||
example, in C we use the syntax @code{_("string")}, and in GNU awk we use
|
||
the shorthand @code{_"string"}.
|
||
|
||
@item
|
||
You should arrange that evaluation of such a translatable string at
|
||
runtime calls the @code{gettext} function, or performs equivalent
|
||
processing.
|
||
|
||
@item
|
||
Similarly, you should make the functions @code{ngettext},
|
||
@code{dcgettext}, @code{dcngettext} available from within the language.
|
||
These functions are less often used, but are nevertheless necessary for
|
||
particular purposes: @code{ngettext} for correct plural handling, and
|
||
@code{dcgettext} and @code{dcngettext} for obeying other locale-related
|
||
environment variables than @code{LC_MESSAGES}, such as @code{LC_TIME} or
|
||
@code{LC_MONETARY}. For these latter functions, you need to make the
|
||
@code{LC_*} constants, available in the C header @code{<locale.h>},
|
||
referenceable from within the language, usually either as enumeration
|
||
values or as strings.
|
||
|
||
@item
|
||
You should allow the programmer to designate a message domain, either by
|
||
making the @code{textdomain} function available from within the
|
||
language, or by introducing a magic variable called @code{TEXTDOMAIN}.
|
||
Similarly, you should allow the programmer to designate where to search
|
||
for message catalogs, by providing access to the @code{bindtextdomain}
|
||
function or --- on native Windows platforms --- to the @code{wbindtextdomain}
|
||
function.
|
||
|
||
@item
|
||
You should either perform a @code{setlocale (LC_ALL, "")} call during
|
||
the startup of your language runtime, or allow the programmer to do so.
|
||
Remember that gettext will act as a no-op if the @code{LC_MESSAGES} and
|
||
@code{LC_CTYPE} locale categories are not both set.
|
||
|
||
@item
|
||
A programmer should have a way to extract translatable strings from a
|
||
program into a PO file. The GNU @code{xgettext} program is being
|
||
extended to support very different programming languages. Please
|
||
contact the GNU @code{gettext} maintainers to help them doing this.
|
||
The GNU @code{gettext} maintainers will need from you a formal
|
||
description of the lexical structure of source files. It should
|
||
answer the questions:
|
||
@itemize @bullet
|
||
@item
|
||
What does a token look like?
|
||
@item
|
||
What does a string literal look like? What escape characters exist
|
||
inside a string?
|
||
@item
|
||
What escape characters exist outside of strings? If Unicode escapes
|
||
are supported, are they applied before or after tokenization?
|
||
@item
|
||
What is the syntax for function calls? How are consecutive arguments
|
||
in the same function call separated?
|
||
@item
|
||
What is the syntax for comments?
|
||
@end itemize
|
||
@noindent Based on this description, the GNU @code{gettext} maintainers
|
||
can add support to @code{xgettext}.
|
||
|
||
If the string extractor is best integrated into your language's parser,
|
||
GNU @code{xgettext} can function as a front end to your string extractor.
|
||
|
||
@item
|
||
The language's library should have a string formatting facility.
|
||
Additionally:
|
||
@enumerate
|
||
@item
|
||
There must be a way, in the format string, to denote the arguments by a
|
||
positional number or a name. This is needed because for some languages
|
||
and some messages with more than one substitutable argument, the
|
||
translation will need to output the substituted arguments in different
|
||
order. @xref{c-format Flag}.
|
||
@item
|
||
The syntax of format strings must be documented in a way that translators
|
||
can understand. The GNU @code{gettext} manual will be extended to
|
||
include a pointer to this documentation.
|
||
@end enumerate
|
||
Based on this, the GNU @code{gettext} maintainers can add a format string
|
||
equivalence checker to @code{msgfmt}, so that translators get told
|
||
immediately when they have made a mistake during the translation of a
|
||
format string.
|
||
|
||
@item
|
||
If the language has more than one implementation, and not all of the
|
||
implementations use @code{gettext}, but the programs should be portable
|
||
across implementations, you should provide a no-i18n emulation, that
|
||
makes the other implementations accept programs written for yours,
|
||
without actually translating the strings.
|
||
|
||
@item
|
||
To help the programmer in the task of marking translatable strings,
|
||
which is sometimes performed using the Emacs PO mode (@pxref{Marking}),
|
||
you are welcome to
|
||
contact the GNU @code{gettext} maintainers, so they can add support for
|
||
your language to @file{po-mode.el}.
|
||
@end enumerate
|
||
|
||
On the implementation side, two approaches are possible, with
|
||
different effects on portability and copyright:
|
||
|
||
@itemize @bullet
|
||
@item
|
||
You may link against GNU @code{gettext} functions if they are found in
|
||
the C library. For example, an autoconf test for @code{gettext()} and
|
||
@code{ngettext()} will detect this situation. For the moment, this test
|
||
will succeed on GNU systems and on Solaris 11 platforms. No severe
|
||
copyright restrictions apply, except if you want to distribute statically
|
||
linked binaries.
|
||
|
||
@item
|
||
You may emulate or reimplement the GNU @code{gettext} functionality.
|
||
This has the advantage of full portability and no copyright
|
||
restrictions, but also the drawback that you have to reimplement the GNU
|
||
@code{gettext} features (such as the @code{LANGUAGE} environment
|
||
variable, the locale aliases database, the automatic charset conversion,
|
||
and plural handling).
|
||
@end itemize
|
||
|
||
@node Programmers for other Languages
|
||
@section The Programmer's View
|
||
|
||
For the programmer, the general procedure is the same as for the C
|
||
language. The Emacs PO mode marking supports other languages, and the GNU
|
||
@code{xgettext} string extractor recognizes other languages based on the
|
||
file extension or a command-line option. In some languages,
|
||
@code{setlocale} is not needed because it is already performed by the
|
||
underlying language runtime.
|
||
|
||
@node Translators for other Languages
|
||
@section The Translator's View
|
||
|
||
The translator works exactly as in the C language case. The only
|
||
difference is that when translating format strings, she has to be aware
|
||
of the language's particular syntax for positional arguments in format
|
||
strings.
|
||
|
||
@menu
|
||
* c-format:: C Format Strings
|
||
* objc-format:: Objective C Format Strings
|
||
* sh-format:: Shell Format Strings
|
||
* python-format:: Python Format Strings
|
||
* lisp-format:: Lisp Format Strings
|
||
* elisp-format:: Emacs Lisp Format Strings
|
||
* librep-format:: librep Format Strings
|
||
* scheme-format:: Scheme Format Strings
|
||
* smalltalk-format:: Smalltalk Format Strings
|
||
* java-format:: Java Format Strings
|
||
* csharp-format:: C# Format Strings
|
||
* awk-format:: awk Format Strings
|
||
* object-pascal-format:: Object Pascal Format Strings
|
||
* ycp-format:: YCP Format Strings
|
||
* tcl-format:: Tcl Format Strings
|
||
* perl-format:: Perl Format Strings
|
||
* php-format:: PHP Format Strings
|
||
* ruby-format:: Ruby Format Strings
|
||
* gcc-internal-format:: GCC internal Format Strings
|
||
* gfc-internal-format:: GFC internal Format Strings
|
||
* qt-format:: Qt Format Strings
|
||
* qt-plural-format:: Qt Plural Format Strings
|
||
* kde-format:: KDE Format Strings
|
||
* kde-kuit-format:: KUIT Format Strings
|
||
* boost-format:: Boost Format Strings
|
||
* lua-format:: Lua Format Strings
|
||
* javascript-format:: JavaScript Format Strings
|
||
@end menu
|
||
|
||
@node c-format
|
||
@subsection C Format Strings
|
||
|
||
C format strings are described in POSIX (IEEE P1003.1 2001), section
|
||
XSH 3 fprintf(),
|
||
@uref{http://www.opengroup.org/onlinepubs/007904975/functions/fprintf.html}.
|
||
See also the fprintf() manual page,
|
||
@uref{http://www.linuxvalley.it/encyclopedia/ldp/manpage/man3/printf.3.php},
|
||
@uref{http://informatik.fh-wuerzburg.de/student/i510/man/printf.html}.
|
||
|
||
Although format strings with positions that reorder arguments, such as
|
||
|
||
@example
|
||
"Only %2$d bytes free on '%1$s'."
|
||
@end example
|
||
|
||
@noindent
|
||
which is semantically equivalent to
|
||
|
||
@example
|
||
"'%s' has only %d bytes free."
|
||
@end example
|
||
|
||
@noindent
|
||
are a POSIX/XSI feature and not specified by ISO C 99, translators can rely
|
||
on this reordering ability: On the few platforms where @code{printf()},
|
||
@code{fprintf()} etc. don't support this feature natively, @file{libintl.a}
|
||
or @file{libintl.so} provides replacement functions, and GNU @code{<libintl.h>}
|
||
activates these replacement functions automatically.
|
||
|
||
@cindex outdigits
|
||
@cindex Arabic digits
|
||
As a special feature for Farsi (Persian) and maybe Arabic, translators can
|
||
insert an @samp{I} flag into numeric format directives. For example, the
|
||
translation of @code{"%d"} can be @code{"%Id"}. The effect of this flag,
|
||
on systems with GNU @code{libc}, is that in the output, the ASCII digits are
|
||
replaced with the @samp{outdigits} defined in the @code{LC_CTYPE} locale
|
||
category. On other systems, the @code{gettext} function removes this flag,
|
||
so that it has no effect.
|
||
|
||
Note that the programmer should @emph{not} put this flag into the
|
||
untranslated string. (Putting the @samp{I} format directive flag into an
|
||
@var{msgid} string would lead to undefined behaviour on platforms without
|
||
glibc when NLS is disabled.)
|
||
|
||
@node objc-format
|
||
@subsection Objective C Format Strings
|
||
|
||
Objective C format strings are like C format strings. They support an
|
||
additional format directive: "%@@", which when executed consumes an argument
|
||
of type @code{Object *}.
|
||
|
||
@node sh-format
|
||
@subsection Shell Format Strings
|
||
|
||
Shell format strings, as supported by GNU gettext and the @samp{envsubst}
|
||
program, are strings with references to shell variables in the form
|
||
@code{$@var{variable}} or @code{$@{@var{variable}@}}. References of the form
|
||
@code{$@{@var{variable}-@var{default}@}},
|
||
@code{$@{@var{variable}:-@var{default}@}},
|
||
@code{$@{@var{variable}=@var{default}@}},
|
||
@code{$@{@var{variable}:=@var{default}@}},
|
||
@code{$@{@var{variable}+@var{replacement}@}},
|
||
@code{$@{@var{variable}:+@var{replacement}@}},
|
||
@code{$@{@var{variable}?@var{ignored}@}},
|
||
@code{$@{@var{variable}:?@var{ignored}@}},
|
||
that would be valid inside shell scripts, are not supported. The
|
||
@var{variable} names must consist solely of alphanumeric or underscore
|
||
ASCII characters, not start with a digit and be nonempty; otherwise such
|
||
a variable reference is ignored.
|
||
|
||
@node python-format
|
||
@subsection Python Format Strings
|
||
|
||
There are two kinds of format strings in Python: those acceptable to
|
||
the Python built-in format operator @code{%}, labelled as
|
||
@samp{python-format}, and those acceptable to the @code{format} method
|
||
of the @samp{str} object.
|
||
|
||
Python @code{%} format strings are described in
|
||
@w{Python Library reference} /
|
||
@w{5. Built-in Types} /
|
||
@w{5.6. Sequence Types} /
|
||
@w{5.6.2. String Formatting Operations}.
|
||
@uref{https://docs.python.org/2/library/stdtypes.html#string-formatting-operations}.
|
||
|
||
Python brace format strings are described in @w{PEP 3101 -- Advanced
|
||
String Formatting}, @uref{https://www.python.org/dev/peps/pep-3101/}.
|
||
|
||
@node lisp-format
|
||
@subsection Lisp Format Strings
|
||
|
||
Lisp format strings are described in the Common Lisp HyperSpec,
|
||
chapter 22.3 @w{Formatted Output},
|
||
@uref{http://www.ai.mit.edu/projects/iiip/doc/CommonLISP/HyperSpec/Body/sec_22-3.html}.
|
||
|
||
@node elisp-format
|
||
@subsection Emacs Lisp Format Strings
|
||
|
||
Emacs Lisp format strings are documented in the Emacs Lisp reference,
|
||
section @w{Formatting Strings},
|
||
@uref{https://www.gnu.org/manual/elisp-manual-21-2.8/html_chapter/elisp_4.html#SEC75}.
|
||
Note that as of version 21, XEmacs supports numbered argument specifications
|
||
in format strings while FSF Emacs doesn't.
|
||
|
||
@node librep-format
|
||
@subsection librep Format Strings
|
||
|
||
librep format strings are documented in the librep manual, section
|
||
@w{Formatted Output},
|
||
@url{http://librep.sourceforge.net/librep-manual.html#Formatted%20Output},
|
||
@url{http://www.gwinnup.org/research/docs/librep.html#SEC122}.
|
||
|
||
@node scheme-format
|
||
@subsection Scheme Format Strings
|
||
|
||
Scheme format strings are documented in the SLIB manual, section
|
||
@w{Format Specification}.
|
||
|
||
@node smalltalk-format
|
||
@subsection Smalltalk Format Strings
|
||
|
||
Smalltalk format strings are described in the GNU Smalltalk documentation,
|
||
class @code{CharArray}, methods @samp{bindWith:} and
|
||
@samp{bindWithArguments:}.
|
||
@uref{https://www.gnu.org/software/smalltalk/gst-manual/gst_68.html#SEC238}.
|
||
In summary, a directive starts with @samp{%} and is followed by @samp{%}
|
||
or a nonzero digit (@samp{1} to @samp{9}).
|
||
|
||
@node java-format
|
||
@subsection Java Format Strings
|
||
|
||
There are two kinds of format strings in Java: those acceptable to the
|
||
@code{MessageFormat.format} function, labelled as @samp{java-format},
|
||
and those acceptable to the @code{String.format} and
|
||
@code{PrintStream.printf} functions, labelled as @samp{java-printf-format}.
|
||
|
||
Java format strings are described in the JDK documentation for class
|
||
@code{java.text.MessageFormat},
|
||
@uref{https://docs.oracle.com/javase/7/docs/api/java/text/MessageFormat.html}.
|
||
See also the ICU documentation
|
||
@uref{http://icu-project.org/apiref/icu4j/com/ibm/icu/text/MessageFormat.html}.
|
||
|
||
Java @code{printf} format strings are described in the JDK documentation
|
||
for class @code{java.util.Formatter},
|
||
@uref{https://docs.oracle.com/javase/7/docs/api/java/util/Formatter.html}.
|
||
|
||
@node csharp-format
|
||
@subsection C# Format Strings
|
||
|
||
C# format strings are described in the .NET documentation for class
|
||
@code{System.String} and in
|
||
@uref{http://msdn.microsoft.com/library/default.asp?url=/library/en-us/cpguide/html/cpConFormattingOverview.asp}.
|
||
|
||
@node awk-format
|
||
@subsection awk Format Strings
|
||
|
||
awk format strings are described in the gawk documentation, section
|
||
@w{Printf},
|
||
@uref{https://www.gnu.org/manual/gawk/html_node/Printf.html#Printf}.
|
||
|
||
@node object-pascal-format
|
||
@subsection Object Pascal Format Strings
|
||
|
||
Object Pascal format strings are described in the documentation of the
|
||
Free Pascal runtime library, section Format,
|
||
@uref{https://www.freepascal.org/docs-html/rtl/sysutils/format.html}.
|
||
|
||
@node ycp-format
|
||
@subsection YCP Format Strings
|
||
|
||
YCP sformat strings are described in the libycp documentation
|
||
@uref{file:/usr/share/doc/packages/libycp/YCP-builtins.html}.
|
||
In summary, a directive starts with @samp{%} and is followed by @samp{%}
|
||
or a nonzero digit (@samp{1} to @samp{9}).
|
||
|
||
@node tcl-format
|
||
@subsection Tcl Format Strings
|
||
|
||
Tcl format strings are described in the @file{format.n} manual page,
|
||
@uref{http://www.scriptics.com/man/tcl8.3/TclCmd/format.htm}.
|
||
|
||
@node perl-format
|
||
@subsection Perl Format Strings
|
||
|
||
There are two kinds of format strings in Perl: those acceptable to the
|
||
Perl built-in function @code{printf}, labelled as @samp{perl-format},
|
||
and those acceptable to the @code{libintl-perl} function @code{__x},
|
||
labelled as @samp{perl-brace-format}.
|
||
|
||
Perl @code{printf} format strings are described in the @code{sprintf}
|
||
section of @samp{man perlfunc}.
|
||
|
||
Perl brace format strings are described in the
|
||
@file{Locale::TextDomain(3pm)} manual page of the CPAN package
|
||
libintl-perl. In brief, Perl format uses placeholders put between
|
||
braces (@samp{@{} and @samp{@}}). The placeholder must have the syntax
|
||
of simple identifiers.
|
||
|
||
@node php-format
|
||
@subsection PHP Format Strings
|
||
|
||
PHP format strings are described in the documentation of the PHP function
|
||
@code{sprintf}, in @file{phpdoc/manual/function.sprintf.html} or
|
||
@uref{http://www.php.net/manual/en/function.sprintf.php}.
|
||
|
||
@node ruby-format
|
||
@subsection Ruby Format Strings
|
||
|
||
Ruby format strings are described in the documentation of the Ruby
|
||
functions @code{format} and @code{sprintf}, in
|
||
@uref{https://ruby-doc.org/core-2.7.1/Kernel.html#method-i-sprintf}.
|
||
|
||
There are two kinds of format strings in Ruby:
|
||
@itemize @bullet
|
||
@item
|
||
Those that take a list of arguments without names. They support
|
||
argument reordering by use of the @code{%@var{n}$} syntax. Note
|
||
that if one argument uses this syntax, all must use this syntax.
|
||
@item
|
||
Those that take a hash table, containing named arguments. The
|
||
syntax is @code{%<@var{name}>}. Note that @code{%@{@var{name}@}} is
|
||
equivalent to @code{%<@var{name}>s}.
|
||
@end itemize
|
||
|
||
@node gcc-internal-format
|
||
@subsection GCC internal Format Strings
|
||
|
||
These format strings are used inside the GCC sources. In such a format
|
||
string, a directive starts with @samp{%}, is optionally followed by a
|
||
size specifier @samp{l}, an optional flag @samp{+}, another optional flag
|
||
@samp{#}, and is finished by a specifier: @samp{%} denotes a literal
|
||
percent sign, @samp{c} denotes a character, @samp{s} denotes a string,
|
||
@samp{i} and @samp{d} denote an integer, @samp{o}, @samp{u}, @samp{x}
|
||
denote an unsigned integer, @samp{.*s} denotes a string preceded by a
|
||
width specification, @samp{H} denotes a @samp{location_t *} pointer,
|
||
@samp{D} denotes a general declaration, @samp{F} denotes a function
|
||
declaration, @samp{T} denotes a type, @samp{A} denotes a function argument,
|
||
@samp{C} denotes a tree code, @samp{E} denotes an expression, @samp{L}
|
||
denotes a programming language, @samp{O} denotes a binary operator,
|
||
@samp{P} denotes a function parameter, @samp{Q} denotes an assignment
|
||
operator, @samp{V} denotes a const/volatile qualifier.
|
||
|
||
@node gfc-internal-format
|
||
@subsection GFC internal Format Strings
|
||
|
||
These format strings are used inside the GNU Fortran Compiler sources,
|
||
that is, the Fortran frontend in the GCC sources. In such a format
|
||
string, a directive starts with @samp{%} and is finished by a
|
||
specifier: @samp{%} denotes a literal percent sign, @samp{C} denotes the
|
||
current source location, @samp{L} denotes a source location, @samp{c}
|
||
denotes a character, @samp{s} denotes a string, @samp{i} and @samp{d}
|
||
denote an integer, @samp{u} denotes an unsigned integer. @samp{i},
|
||
@samp{d}, and @samp{u} may be preceded by a size specifier @samp{l}.
|
||
|
||
@node qt-format
|
||
@subsection Qt Format Strings
|
||
|
||
Qt format strings are described in the documentation of the QString class
|
||
@uref{file:/usr/lib/qt-4.3.0/doc/html/qstring.html}.
|
||
In summary, a directive consists of a @samp{%} followed by a digit. The same
|
||
directive cannot occur more than once in a format string.
|
||
|
||
@node qt-plural-format
|
||
@subsection Qt Format Strings
|
||
|
||
Qt format strings are described in the documentation of the QObject::tr method
|
||
@uref{file:/usr/lib/qt-4.3.0/doc/html/qobject.html}.
|
||
In summary, the only allowed directive is @samp{%n}.
|
||
|
||
@node kde-format
|
||
@subsection KDE Format Strings
|
||
|
||
KDE 4 format strings are defined as follows:
|
||
A directive consists of a @samp{%} followed by a non-zero decimal number.
|
||
If a @samp{%n} occurs in a format strings, all of @samp{%1}, ..., @samp{%(n-1)}
|
||
must occur as well, except possibly one of them.
|
||
|
||
@node kde-kuit-format
|
||
@subsection KUIT Format Strings
|
||
|
||
KUIT (KDE User Interface Text) is compatible with KDE 4 format strings,
|
||
while it also allows programmers to add semantic information to a format
|
||
string, through XML markup tags. For example, if the first format
|
||
directive in a string is a filename, programmers could indicate that
|
||
with a @samp{filename} tag, like @samp{<filename>%1</filename>}.
|
||
|
||
KUIT format strings are described in
|
||
@uref{https://api.kde.org/frameworks/ki18n/html/prg_guide.html#kuit_markup}.
|
||
|
||
@node boost-format
|
||
@subsection Boost Format Strings
|
||
|
||
Boost format strings are described in the documentation of the
|
||
@code{boost::format} class, at
|
||
@uref{https://www.boost.org/libs/format/doc/format.html}.
|
||
In summary, a directive has either the same syntax as in a C format string,
|
||
such as @samp{%1$+5d}, or may be surrounded by vertical bars, such as
|
||
@samp{%|1$+5d|} or @samp{%|1$+5|}, or consists of just an argument number
|
||
between percent signs, such as @samp{%1%}.
|
||
|
||
@node lua-format
|
||
@subsection Lua Format Strings
|
||
|
||
Lua format strings are described in the Lua reference manual, section @w{String Manipulation},
|
||
@uref{https://www.lua.org/manual/5.1/manual.html#pdf-string.format}.
|
||
|
||
@node javascript-format
|
||
@subsection JavaScript Format Strings
|
||
|
||
Although JavaScript specification itself does not define any format
|
||
strings, many JavaScript implementations provide printf-like
|
||
functions. @code{xgettext} understands a set of common format strings
|
||
used in popular JavaScript implementations including Gjs, Seed, and
|
||
Node.JS. In such a format string, a directive starts with @samp{%}
|
||
and is finished by a specifier: @samp{%} denotes a literal percent
|
||
sign, @samp{c} denotes a character, @samp{s} denotes a string,
|
||
@samp{b}, @samp{d}, @samp{o}, @samp{x}, @samp{X} denote an integer,
|
||
@samp{f} denotes floating-point number, @samp{j} denotes a JSON
|
||
object.
|
||
|
||
|
||
@node Maintainers for other Languages
|
||
@section The Maintainer's View
|
||
|
||
For the maintainer, the general procedure differs from the C language
|
||
case:
|
||
|
||
@itemize @bullet
|
||
@item
|
||
If only a single programming language is used, the @code{XGETTEXT_OPTIONS}
|
||
variable in @file{po/Makevars} (@pxref{po/Makevars}) should be adjusted to
|
||
match the @code{xgettext} options for that particular programming language.
|
||
If the package uses more than one programming language with @code{gettext}
|
||
support, it becomes necessary to change the POT file construction rule
|
||
in @file{po/Makefile.in.in}. It is recommended to make one @code{xgettext}
|
||
invocation per programming language, each with the options appropriate for
|
||
that language, and to combine the resulting files using @code{msgcat}.
|
||
@end itemize
|
||
|
||
@node List of Programming Languages
|
||
@section Individual Programming Languages
|
||
|
||
@c Here is a list of programming languages, as used for Free Software projects
|
||
@c on SourceForge/Freshmeat, as of February 2002. Those supported by gettext
|
||
@c are marked with a star.
|
||
@c C 3580 *
|
||
@c Perl 1911 *
|
||
@c C++ 1379 *
|
||
@c Java 1200 *
|
||
@c PHP 1051 *
|
||
@c Python 613 *
|
||
@c Unix Shell 357 *
|
||
@c Tcl 266 *
|
||
@c SQL 174
|
||
@c JavaScript 118
|
||
@c Assembly 108
|
||
@c Scheme 51
|
||
@c Ruby 47
|
||
@c Lisp 45 *
|
||
@c Objective C 39 *
|
||
@c PL/SQL 29
|
||
@c Fortran 25
|
||
@c Ada 24
|
||
@c Delphi 22
|
||
@c Awk 19 *
|
||
@c Pascal 19
|
||
@c ML 19
|
||
@c Eiffel 17
|
||
@c Emacs-Lisp 14 *
|
||
@c Zope 14
|
||
@c ASP 12
|
||
@c Forth 12
|
||
@c Cold Fusion 10
|
||
@c Haskell 9
|
||
@c Visual Basic 9
|
||
@c C# 6 *
|
||
@c Smalltalk 6 *
|
||
@c Basic 5
|
||
@c Erlang 5
|
||
@c Modula 5
|
||
@c Object Pascal 5 *
|
||
@c Rexx 5
|
||
@c Dylan 4
|
||
@c Prolog 4
|
||
@c APL 3
|
||
@c PROGRESS 2
|
||
@c Euler 1
|
||
@c Euphoria 1
|
||
@c Pliant 1
|
||
@c Simula 1
|
||
@c XBasic 1
|
||
@c Logo 0
|
||
@c Other Scripting Engines 49
|
||
@c Other 116
|
||
|
||
@menu
|
||
* C:: C, C++, Objective C
|
||
* sh:: sh - Shell Script
|
||
* bash:: bash - Bourne-Again Shell Script
|
||
* Python:: Python
|
||
* Common Lisp:: GNU clisp - Common Lisp
|
||
* clisp C:: GNU clisp C sources
|
||
* Emacs Lisp:: Emacs Lisp
|
||
* librep:: librep
|
||
* Scheme:: GNU guile - Scheme
|
||
* Smalltalk:: GNU Smalltalk
|
||
* Java:: Java
|
||
* C#:: C#
|
||
* gawk:: GNU awk
|
||
* Pascal:: Pascal - Free Pascal Compiler
|
||
* wxWidgets:: wxWidgets library
|
||
* YCP:: YCP - YaST2 scripting language
|
||
* Tcl:: Tcl - Tk's scripting language
|
||
* Perl:: Perl
|
||
* PHP:: PHP Hypertext Preprocessor
|
||
* Ruby:: Ruby
|
||
* Pike:: Pike
|
||
* GCC-source:: GNU Compiler Collection sources
|
||
* Lua:: Lua
|
||
* JavaScript:: JavaScript
|
||
* Vala:: Vala
|
||
@end menu
|
||
|
||
@node C
|
||
@subsection C, C++, Objective C
|
||
@cindex C and C-like languages
|
||
|
||
@table @asis
|
||
@item RPMs
|
||
gcc, gpp, gobjc, glibc, gettext
|
||
|
||
@item Ubuntu packages
|
||
gcc, g++, gobjc, libc6-dev, libasprintf-dev
|
||
|
||
@item File extension
|
||
For C: @code{c}, @code{h}.
|
||
@*For C++: @code{C}, @code{c++}, @code{cc}, @code{cxx}, @code{cpp}, @code{hpp}.
|
||
@*For Objective C: @code{m}.
|
||
|
||
@item String syntax
|
||
@code{"abc"}
|
||
|
||
@item gettext shorthand
|
||
@code{_("abc")}
|
||
|
||
@item gettext/ngettext functions
|
||
@code{gettext}, @code{dgettext}, @code{dcgettext}, @code{ngettext},
|
||
@code{dngettext}, @code{dcngettext}
|
||
|
||
@item textdomain
|
||
@code{textdomain} function
|
||
|
||
@item bindtextdomain
|
||
@code{bindtextdomain} and @code{wbindtextdomain} functions
|
||
|
||
@item setlocale
|
||
Programmer must call @code{setlocale (LC_ALL, "")}
|
||
|
||
@item Prerequisite
|
||
@code{#include <libintl.h>}
|
||
@*@code{#include <locale.h>}
|
||
@*@code{#define _(string) gettext (string)}
|
||
|
||
@item Use or emulate GNU gettext
|
||
Use
|
||
|
||
@item Extractor
|
||
@code{xgettext -k_}
|
||
|
||
@item Formatting with positions
|
||
@code{fprintf "%2$d %1$d"}
|
||
@*In C++: @code{autosprintf "%2$d %1$d"}
|
||
(@pxref{Top, , Introduction, autosprintf, GNU autosprintf})
|
||
|
||
@item Portability
|
||
autoconf (gettext.m4) and #if ENABLE_NLS
|
||
|
||
@item po-mode marking
|
||
yes
|
||
@end table
|
||
|
||
The following examples are available in the @file{examples} directory:
|
||
@code{hello-c}, @code{hello-c-gnome}, @code{hello-c++}, @code{hello-c++-qt},
|
||
@code{hello-c++-kde}, @code{hello-c++-gnome}, @code{hello-c++-wxwidgets},
|
||
@code{hello-objc}, @code{hello-objc-gnustep}, @code{hello-objc-gnome}.
|
||
|
||
@node sh
|
||
@subsection sh - Shell Script
|
||
@cindex shell scripts
|
||
|
||
@table @asis
|
||
@item RPMs
|
||
bash, gettext
|
||
|
||
@item Ubuntu packages
|
||
bash, gettext-base
|
||
|
||
@item File extension
|
||
@code{sh}
|
||
|
||
@item String syntax
|
||
@code{"abc"}, @code{'abc'}, @code{abc}
|
||
|
||
@item gettext shorthand
|
||
@code{"`gettext \"abc\"`"}
|
||
|
||
@item gettext/ngettext functions
|
||
@pindex gettext
|
||
@pindex ngettext
|
||
@code{gettext}, @code{ngettext} programs
|
||
@*@code{eval_gettext}, @code{eval_ngettext}, @code{eval_pgettext},
|
||
@code{eval_npgettext} shell functions
|
||
|
||
@item textdomain
|
||
@vindex TEXTDOMAIN@r{, environment variable}
|
||
environment variable @code{TEXTDOMAIN}
|
||
|
||
@item bindtextdomain
|
||
@vindex TEXTDOMAINDIR@r{, environment variable}
|
||
environment variable @code{TEXTDOMAINDIR}
|
||
|
||
@item setlocale
|
||
automatic
|
||
|
||
@item Prerequisite
|
||
@code{. gettext.sh}
|
||
|
||
@item Use or emulate GNU gettext
|
||
use
|
||
|
||
@item Extractor
|
||
@code{xgettext}
|
||
|
||
@item Formatting with positions
|
||
---
|
||
|
||
@item Portability
|
||
fully portable
|
||
|
||
@item po-mode marking
|
||
---
|
||
@end table
|
||
|
||
An example is available in the @file{examples} directory: @code{hello-sh}.
|
||
|
||
@menu
|
||
* Preparing Shell Scripts:: Preparing Shell Scripts for Internationalization
|
||
* gettext.sh:: Contents of @code{gettext.sh}
|
||
* gettext Invocation:: Invoking the @code{gettext} program
|
||
* ngettext Invocation:: Invoking the @code{ngettext} program
|
||
* envsubst Invocation:: Invoking the @code{envsubst} program
|
||
* eval_gettext Invocation:: Invoking the @code{eval_gettext} function
|
||
* eval_ngettext Invocation:: Invoking the @code{eval_ngettext} function
|
||
* eval_pgettext Invocation:: Invoking the @code{eval_pgettext} function
|
||
* eval_npgettext Invocation:: Invoking the @code{eval_npgettext} function
|
||
@end menu
|
||
|
||
@node Preparing Shell Scripts
|
||
@subsubsection Preparing Shell Scripts for Internationalization
|
||
@cindex preparing shell scripts for translation
|
||
|
||
Preparing a shell script for internationalization is conceptually similar
|
||
to the steps described in @ref{Sources}. The concrete steps for shell
|
||
scripts are as follows.
|
||
|
||
@enumerate
|
||
@item
|
||
Insert the line
|
||
|
||
@smallexample
|
||
. gettext.sh
|
||
@end smallexample
|
||
|
||
near the top of the script. @code{gettext.sh} is a shell function library
|
||
that provides the functions
|
||
@code{eval_gettext} (see @ref{eval_gettext Invocation}),
|
||
@code{eval_ngettext} (see @ref{eval_ngettext Invocation}),
|
||
@code{eval_pgettext} (see @ref{eval_pgettext Invocation}), and
|
||
@code{eval_npgettext} (see @ref{eval_npgettext Invocation}).
|
||
You have to ensure that @code{gettext.sh} can be found in the @code{PATH}.
|
||
|
||
@item
|
||
Set and export the @code{TEXTDOMAIN} and @code{TEXTDOMAINDIR} environment
|
||
variables. Usually @code{TEXTDOMAIN} is the package or program name, and
|
||
@code{TEXTDOMAINDIR} is the absolute pathname corresponding to
|
||
@code{$prefix/share/locale}, where @code{$prefix} is the installation location.
|
||
|
||
@smallexample
|
||
TEXTDOMAIN=@@PACKAGE@@
|
||
export TEXTDOMAIN
|
||
TEXTDOMAINDIR=@@LOCALEDIR@@
|
||
export TEXTDOMAINDIR
|
||
@end smallexample
|
||
|
||
@item
|
||
Prepare the strings for translation, as described in @ref{Preparing Strings}.
|
||
|
||
@item
|
||
Simplify translatable strings so that they don't contain command substitution
|
||
(@code{"`...`"} or @code{"$(...)"}), variable access with defaulting (like
|
||
@code{$@{@var{variable}-@var{default}@}}), access to positional arguments
|
||
(like @code{$0}, @code{$1}, ...) or highly volatile shell variables (like
|
||
@code{$?}). This can always be done through simple local code restructuring.
|
||
For example,
|
||
|
||
@smallexample
|
||
echo "Usage: $0 [OPTION] FILE..."
|
||
@end smallexample
|
||
|
||
becomes
|
||
|
||
@smallexample
|
||
program_name=$0
|
||
echo "Usage: $program_name [OPTION] FILE..."
|
||
@end smallexample
|
||
|
||
Similarly,
|
||
|
||
@smallexample
|
||
echo "Remaining files: `ls | wc -l`"
|
||
@end smallexample
|
||
|
||
becomes
|
||
|
||
@smallexample
|
||
filecount="`ls | wc -l`"
|
||
echo "Remaining files: $filecount"
|
||
@end smallexample
|
||
|
||
@item
|
||
For each translatable string, change the output command @samp{echo} or
|
||
@samp{$echo} to @samp{gettext} (if the string contains no references to
|
||
shell variables) or to @samp{eval_gettext} (if it refers to shell variables),
|
||
followed by a no-argument @samp{echo} command (to account for the terminating
|
||
newline). Similarly, for cases with plural handling, replace a conditional
|
||
@samp{echo} command with an invocation of @samp{ngettext} or
|
||
@samp{eval_ngettext}, followed by a no-argument @samp{echo} command.
|
||
|
||
When doing this, you also need to add an extra backslash before the dollar
|
||
sign in references to shell variables, so that the @samp{eval_gettext}
|
||
function receives the translatable string before the variable values are
|
||
substituted into it. For example,
|
||
|
||
@smallexample
|
||
echo "Remaining files: $filecount"
|
||
@end smallexample
|
||
|
||
becomes
|
||
|
||
@smallexample
|
||
eval_gettext "Remaining files: \$filecount"; echo
|
||
@end smallexample
|
||
|
||
If the output command is not @samp{echo}, you can make it use @samp{echo}
|
||
nevertheless, through the use of backquotes. However, note that inside
|
||
backquotes, backslashes must be doubled to be effective (because the
|
||
backquoting eats one level of backslashes). For example, assuming that
|
||
@samp{error} is a shell function that signals an error,
|
||
|
||
@smallexample
|
||
error "file not found: $filename"
|
||
@end smallexample
|
||
|
||
is first transformed into
|
||
|
||
@smallexample
|
||
error "`echo \"file not found: \$filename\"`"
|
||
@end smallexample
|
||
|
||
which then becomes
|
||
|
||
@smallexample
|
||
error "`eval_gettext \"file not found: \\\$filename\"`"
|
||
@end smallexample
|
||
@end enumerate
|
||
|
||
@node gettext.sh
|
||
@subsubsection Contents of @code{gettext.sh}
|
||
|
||
@code{gettext.sh}, contained in the run-time package of GNU gettext, provides
|
||
the following:
|
||
|
||
@itemize @bullet
|
||
@item $echo
|
||
The variable @code{echo} is set to a command that outputs its first argument
|
||
and a newline, without interpreting backslashes in the argument string.
|
||
|
||
@item eval_gettext
|
||
See @ref{eval_gettext Invocation}.
|
||
|
||
@item eval_ngettext
|
||
See @ref{eval_ngettext Invocation}.
|
||
|
||
@item eval_pgettext
|
||
See @ref{eval_pgettext Invocation}.
|
||
|
||
@item eval_npgettext
|
||
See @ref{eval_npgettext Invocation}.
|
||
@end itemize
|
||
|
||
@node gettext Invocation
|
||
@subsubsection Invoking the @code{gettext} program
|
||
|
||
@include rt-gettext.texi
|
||
|
||
Note: @code{xgettext} supports only the one-argument form of the
|
||
@code{gettext} invocation, where no options are present and the
|
||
@var{textdomain} is implicit, from the environment.
|
||
|
||
@node ngettext Invocation
|
||
@subsubsection Invoking the @code{ngettext} program
|
||
|
||
@include rt-ngettext.texi
|
||
|
||
Note: @code{xgettext} supports only the three-arguments form of the
|
||
@code{ngettext} invocation, where no options are present and the
|
||
@var{textdomain} is implicit, from the environment.
|
||
|
||
@node envsubst Invocation
|
||
@subsubsection Invoking the @code{envsubst} program
|
||
|
||
@include rt-envsubst.texi
|
||
|
||
@node eval_gettext Invocation
|
||
@subsubsection Invoking the @code{eval_gettext} function
|
||
|
||
@cindex @code{eval_gettext} function, usage
|
||
@example
|
||
eval_gettext @var{msgid}
|
||
@end example
|
||
|
||
@cindex lookup message translation
|
||
This function outputs the native language translation of a textual message,
|
||
performing dollar-substitution on the result. Note that only shell variables
|
||
mentioned in @var{msgid} will be dollar-substituted in the result.
|
||
|
||
@node eval_ngettext Invocation
|
||
@subsubsection Invoking the @code{eval_ngettext} function
|
||
|
||
@cindex @code{eval_ngettext} function, usage
|
||
@example
|
||
eval_ngettext @var{msgid} @var{msgid-plural} @var{count}
|
||
@end example
|
||
|
||
@cindex lookup plural message translation
|
||
This function outputs the native language translation of a textual message
|
||
whose grammatical form depends on a number, performing dollar-substitution
|
||
on the result. Note that only shell variables mentioned in @var{msgid} or
|
||
@var{msgid-plural} will be dollar-substituted in the result.
|
||
|
||
@node eval_pgettext Invocation
|
||
@subsubsection Invoking the @code{eval_pgettext} function
|
||
|
||
@cindex @code{eval_pgettext} function, usage
|
||
@example
|
||
eval_pgettext @var{msgctxt} @var{msgid}
|
||
@end example
|
||
|
||
@cindex lookup message translation with context
|
||
This function outputs the native language translation of a textual message
|
||
in the given context @var{msgctxt} (see @ref{Contexts}), performing
|
||
dollar-substitution on the result. Note that only shell variables mentioned
|
||
in @var{msgid} will be dollar-substituted in the result.
|
||
|
||
@node eval_npgettext Invocation
|
||
@subsubsection Invoking the @code{eval_npgettext} function
|
||
|
||
@cindex @code{eval_npgettext} function, usage
|
||
@example
|
||
eval_npgettext @var{msgctxt} @var{msgid} @var{msgid-plural} @var{count}
|
||
@end example
|
||
|
||
@cindex lookup plural message translation with context
|
||
This function outputs the native language translation of a textual message
|
||
whose grammatical form depends on a number in the given context @var{msgctxt}
|
||
(see @ref{Contexts}), performing dollar-substitution on the result. Note
|
||
that only shell variables mentioned in @var{msgid} or @var{msgid-plural}
|
||
will be dollar-substituted in the result.
|
||
|
||
@node bash
|
||
@subsection bash - Bourne-Again Shell Script
|
||
@cindex bash
|
||
|
||
GNU @code{bash} 2.0 or newer has a special shorthand for translating a
|
||
string and substituting variable values in it: @code{$"msgid"}. But
|
||
the use of this construct is @strong{discouraged}, due to the security
|
||
holes it opens and due to its portability problems.
|
||
|
||
The security holes of @code{$"..."} come from the fact that after looking up
|
||
the translation of the string, @code{bash} processes it like it processes
|
||
any double-quoted string: dollar and backquote processing, like @samp{eval}
|
||
does.
|
||
|
||
@enumerate
|
||
@item
|
||
In a locale whose encoding is one of BIG5, BIG5-HKSCS, GBK, GB18030, SHIFT_JIS,
|
||
JOHAB, some double-byte characters have a second byte whose value is
|
||
@code{0x60}. For example, the byte sequence @code{\xe0\x60} is a single
|
||
character in these locales. Many versions of @code{bash} (all versions
|
||
up to bash-2.05, and newer versions on platforms without @code{mbsrtowcs()}
|
||
function) don't know about character boundaries and see a backquote character
|
||
where there is only a particular Chinese character. Thus it can start
|
||
executing part of the translation as a command list. This situation can occur
|
||
even without the translator being aware of it: if the translator provides
|
||
translations in the UTF-8 encoding, it is the @code{gettext()} function which
|
||
will, during its conversion from the translator's encoding to the user's
|
||
locale's encoding, produce the dangerous @code{\x60} bytes.
|
||
|
||
@item
|
||
A translator could - voluntarily or inadvertently - use backquotes
|
||
@code{"`...`"} or dollar-parentheses @code{"$(...)"} in her translations.
|
||
The enclosed strings would be executed as command lists by the shell.
|
||
@end enumerate
|
||
|
||
The portability problem is that @code{bash} must be built with
|
||
internationalization support; this is normally not the case on systems
|
||
that don't have the @code{gettext()} function in libc.
|
||
|
||
@node Python
|
||
@subsection Python
|
||
@cindex Python
|
||
|
||
@table @asis
|
||
@item RPMs
|
||
python
|
||
|
||
@item Ubuntu packages
|
||
python
|
||
|
||
@item File extension
|
||
@code{py}
|
||
|
||
@item String syntax
|
||
@code{'abc'}, @code{u'abc'}, @code{r'abc'}, @code{ur'abc'},
|
||
@*@code{"abc"}, @code{u"abc"}, @code{r"abc"}, @code{ur"abc"},
|
||
@*@code{'''abc'''}, @code{u'''abc'''}, @code{r'''abc'''}, @code{ur'''abc'''},
|
||
@*@code{"""abc"""}, @code{u"""abc"""}, @code{r"""abc"""}, @code{ur"""abc"""}
|
||
|
||
@item gettext shorthand
|
||
@code{_('abc')} etc.
|
||
|
||
@item gettext/ngettext functions
|
||
@code{gettext.gettext}, @code{gettext.dgettext},
|
||
@code{gettext.ngettext}, @code{gettext.dngettext},
|
||
also @code{ugettext}, @code{ungettext}
|
||
|
||
@item textdomain
|
||
@code{gettext.textdomain} function, or
|
||
@code{gettext.install(@var{domain})} function
|
||
|
||
@item bindtextdomain
|
||
@code{gettext.bindtextdomain} function, or
|
||
@code{gettext.install(@var{domain},@var{localedir})} function
|
||
|
||
@item setlocale
|
||
not used by the gettext emulation
|
||
|
||
@item Prerequisite
|
||
@code{import gettext}
|
||
|
||
@item Use or emulate GNU gettext
|
||
emulate
|
||
|
||
@item Extractor
|
||
@code{xgettext}
|
||
|
||
@item Formatting with positions
|
||
@code{'...%(ident)d...' % @{ 'ident': value @}}
|
||
|
||
@item Portability
|
||
fully portable
|
||
|
||
@item po-mode marking
|
||
---
|
||
@end table
|
||
|
||
An example is available in the @file{examples} directory: @code{hello-python}.
|
||
|
||
A note about format strings: Python supports format strings with unnamed
|
||
arguments, such as @code{'...%d...'}, and format strings with named arguments,
|
||
such as @code{'...%(ident)d...'}. The latter are preferable for
|
||
internationalized programs, for two reasons:
|
||
|
||
@itemize @bullet
|
||
@item
|
||
When a format string takes more than one argument, the translator can provide
|
||
a translation that uses the arguments in a different order, if the format
|
||
string uses named arguments. For example, the translator can reformulate
|
||
@smallexample
|
||
"'%(volume)s' has only %(freespace)d bytes free."
|
||
@end smallexample
|
||
@noindent
|
||
to
|
||
@smallexample
|
||
"Only %(freespace)d bytes free on '%(volume)s'."
|
||
@end smallexample
|
||
@noindent
|
||
Additionally, the identifiers also provide some context to the translator.
|
||
|
||
@item
|
||
In the context of plural forms, the format string used for the singular form
|
||
does not use the numeric argument in many languages. Even in English, one
|
||
prefers to write @code{"one hour"} instead of @code{"1 hour"}. Omitting
|
||
individual arguments from format strings like this is only possible with
|
||
the named argument syntax. (With unnamed arguments, Python -- unlike C --
|
||
verifies that the format string uses all supplied arguments.)
|
||
@end itemize
|
||
|
||
@node Common Lisp
|
||
@subsection GNU clisp - Common Lisp
|
||
@cindex Common Lisp
|
||
@cindex Lisp
|
||
@cindex clisp
|
||
|
||
@table @asis
|
||
@item RPMs
|
||
clisp 2.28 or newer
|
||
|
||
@item Ubuntu packages
|
||
clisp
|
||
|
||
@item File extension
|
||
@code{lisp}
|
||
|
||
@item String syntax
|
||
@code{"abc"}
|
||
|
||
@item gettext shorthand
|
||
@code{(_ "abc")}, @code{(ENGLISH "abc")}
|
||
|
||
@item gettext/ngettext functions
|
||
@code{i18n:gettext}, @code{i18n:ngettext}
|
||
|
||
@item textdomain
|
||
@code{i18n:textdomain}
|
||
|
||
@item bindtextdomain
|
||
@code{i18n:textdomaindir}
|
||
|
||
@item setlocale
|
||
automatic
|
||
|
||
@item Prerequisite
|
||
---
|
||
|
||
@item Use or emulate GNU gettext
|
||
use
|
||
|
||
@item Extractor
|
||
@code{xgettext -k_ -kENGLISH}
|
||
|
||
@item Formatting with positions
|
||
@code{format "~1@@*~D ~0@@*~D"}
|
||
|
||
@item Portability
|
||
On platforms without gettext, no translation.
|
||
|
||
@item po-mode marking
|
||
---
|
||
@end table
|
||
|
||
An example is available in the @file{examples} directory: @code{hello-clisp}.
|
||
|
||
@node clisp C
|
||
@subsection GNU clisp C sources
|
||
@cindex clisp C sources
|
||
|
||
@table @asis
|
||
@item RPMs
|
||
clisp
|
||
|
||
@item Ubuntu packages
|
||
clisp
|
||
|
||
@item File extension
|
||
@code{d}
|
||
|
||
@item String syntax
|
||
@code{"abc"}
|
||
|
||
@item gettext shorthand
|
||
@code{ENGLISH ? "abc" : ""}
|
||
@*@code{GETTEXT("abc")}
|
||
@*@code{GETTEXTL("abc")}
|
||
|
||
@item gettext/ngettext functions
|
||
@code{clgettext}, @code{clgettextl}
|
||
|
||
@item textdomain
|
||
---
|
||
|
||
@item bindtextdomain
|
||
---
|
||
|
||
@item setlocale
|
||
automatic
|
||
|
||
@item Prerequisite
|
||
@code{#include "lispbibl.c"}
|
||
|
||
@item Use or emulate GNU gettext
|
||
use
|
||
|
||
@item Extractor
|
||
@code{clisp-xgettext}
|
||
|
||
@item Formatting with positions
|
||
@code{fprintf "%2$d %1$d"}
|
||
|
||
@item Portability
|
||
On platforms without gettext, no translation.
|
||
|
||
@item po-mode marking
|
||
---
|
||
@end table
|
||
|
||
@node Emacs Lisp
|
||
@subsection Emacs Lisp
|
||
@cindex Emacs Lisp
|
||
|
||
@table @asis
|
||
@item RPMs
|
||
emacs, xemacs
|
||
|
||
@item Ubuntu packages
|
||
emacs, xemacs21
|
||
|
||
@item File extension
|
||
@code{el}
|
||
|
||
@item String syntax
|
||
@code{"abc"}
|
||
|
||
@item gettext shorthand
|
||
@code{(_"abc")}
|
||
|
||
@item gettext/ngettext functions
|
||
@code{gettext}, @code{dgettext} (xemacs only)
|
||
|
||
@item textdomain
|
||
@code{domain} special form (xemacs only)
|
||
|
||
@item bindtextdomain
|
||
@code{bind-text-domain} function (xemacs only)
|
||
|
||
@item setlocale
|
||
automatic
|
||
|
||
@item Prerequisite
|
||
---
|
||
|
||
@item Use or emulate GNU gettext
|
||
use
|
||
|
||
@item Extractor
|
||
@code{xgettext}
|
||
|
||
@item Formatting with positions
|
||
@code{format "%2$d %1$d"}
|
||
|
||
@item Portability
|
||
Only XEmacs. Without @code{I18N3} defined at build time, no translation.
|
||
|
||
@item po-mode marking
|
||
---
|
||
@end table
|
||
|
||
@node librep
|
||
@subsection librep
|
||
@cindex @code{librep} Lisp
|
||
|
||
@table @asis
|
||
@item RPMs
|
||
librep 0.15.3 or newer
|
||
|
||
@item Ubuntu packages
|
||
librep16
|
||
|
||
@item File extension
|
||
@code{jl}
|
||
|
||
@item String syntax
|
||
@code{"abc"}
|
||
|
||
@item gettext shorthand
|
||
@code{(_"abc")}
|
||
|
||
@item gettext/ngettext functions
|
||
@code{gettext}
|
||
|
||
@item textdomain
|
||
@code{textdomain} function
|
||
|
||
@item bindtextdomain
|
||
@code{bindtextdomain} function
|
||
|
||
@item setlocale
|
||
---
|
||
|
||
@item Prerequisite
|
||
@code{(require 'rep.i18n.gettext)}
|
||
|
||
@item Use or emulate GNU gettext
|
||
use
|
||
|
||
@item Extractor
|
||
@code{xgettext}
|
||
|
||
@item Formatting with positions
|
||
@code{format "%2$d %1$d"}
|
||
|
||
@item Portability
|
||
On platforms without gettext, no translation.
|
||
|
||
@item po-mode marking
|
||
---
|
||
@end table
|
||
|
||
An example is available in the @file{examples} directory: @code{hello-librep}.
|
||
|
||
@node Scheme
|
||
@subsection GNU guile - Scheme
|
||
@cindex Scheme
|
||
@cindex guile
|
||
|
||
@table @asis
|
||
@item RPMs
|
||
guile
|
||
|
||
@item Ubuntu packages
|
||
guile-2.0
|
||
|
||
@item File extension
|
||
@code{scm}
|
||
|
||
@item String syntax
|
||
@code{"abc"}
|
||
|
||
@item gettext shorthand
|
||
@code{(_ "abc")}, @code{_"abc"} (GIMP script-fu extension)
|
||
|
||
@item gettext/ngettext functions
|
||
@code{gettext}, @code{ngettext}
|
||
|
||
@item textdomain
|
||
@code{textdomain}
|
||
|
||
@item bindtextdomain
|
||
@code{bindtextdomain}
|
||
|
||
@item setlocale
|
||
@code{(catch #t (lambda () (setlocale LC_ALL "")) (lambda args #f))}
|
||
|
||
@item Prerequisite
|
||
@code{(use-modules (ice-9 format))}
|
||
|
||
@item Use or emulate GNU gettext
|
||
use
|
||
|
||
@item Extractor
|
||
@code{xgettext -k_}
|
||
|
||
@item Formatting with positions
|
||
@c @code{format "~1@@*~D ~0@@*~D~2@@*"}, requires @code{(use-modules (ice-9 format))}
|
||
@c not yet supported
|
||
---
|
||
|
||
@item Portability
|
||
On platforms without gettext, no translation.
|
||
|
||
@item po-mode marking
|
||
---
|
||
@end table
|
||
|
||
An example is available in the @file{examples} directory: @code{hello-guile}.
|
||
|
||
@node Smalltalk
|
||
@subsection GNU Smalltalk
|
||
@cindex Smalltalk
|
||
|
||
@table @asis
|
||
@item RPMs
|
||
smalltalk
|
||
|
||
@item Ubuntu packages
|
||
gnu-smalltalk
|
||
|
||
@item File extension
|
||
@code{st}
|
||
|
||
@item String syntax
|
||
@code{'abc'}
|
||
|
||
@item gettext shorthand
|
||
@code{NLS ? 'abc'}
|
||
|
||
@item gettext/ngettext functions
|
||
@code{LcMessagesDomain>>#at:}, @code{LcMessagesDomain>>#at:plural:with:}
|
||
|
||
@item textdomain
|
||
@code{LcMessages>>#domain:localeDirectory:} (returns a @code{LcMessagesDomain}
|
||
object).@*
|
||
Example: @code{I18N Locale default messages domain: 'gettext' localeDirectory: /usr/local/share/locale'}
|
||
|
||
@item bindtextdomain
|
||
@code{LcMessages>>#domain:localeDirectory:}, see above.
|
||
|
||
@item setlocale
|
||
Automatic if you use @code{I18N Locale default}.
|
||
|
||
@item Prerequisite
|
||
@code{PackageLoader fileInPackage: 'I18N'!}
|
||
|
||
@item Use or emulate GNU gettext
|
||
emulate
|
||
|
||
@item Extractor
|
||
@code{xgettext}
|
||
|
||
@item Formatting with positions
|
||
@code{'%1 %2' bindWith: 'Hello' with: 'world'}
|
||
|
||
@item Portability
|
||
fully portable
|
||
|
||
@item po-mode marking
|
||
---
|
||
@end table
|
||
|
||
An example is available in the @file{examples} directory:
|
||
@code{hello-smalltalk}.
|
||
|
||
@node Java
|
||
@subsection Java
|
||
@cindex Java
|
||
|
||
@table @asis
|
||
@item RPMs
|
||
java, java2
|
||
|
||
@item Ubuntu packages
|
||
default-jdk
|
||
|
||
@item File extension
|
||
@code{java}
|
||
|
||
@item String syntax
|
||
"abc", """text block"""
|
||
|
||
@item gettext shorthand
|
||
i18n("abc")
|
||
|
||
@item gettext/ngettext functions
|
||
@code{GettextResource.gettext}, @code{GettextResource.ngettext},
|
||
@code{GettextResource.pgettext}, @code{GettextResource.npgettext}
|
||
|
||
@item textdomain
|
||
---, use @code{ResourceBundle.getResource} instead
|
||
|
||
@item bindtextdomain
|
||
---, use CLASSPATH instead
|
||
|
||
@item setlocale
|
||
automatic
|
||
|
||
@item Prerequisite
|
||
---
|
||
|
||
@item Use or emulate GNU gettext
|
||
---, uses a Java specific message catalog format
|
||
|
||
@item Extractor
|
||
@code{xgettext -ki18n}
|
||
|
||
@item Formatting with positions
|
||
@code{MessageFormat.format "@{1,number@} @{0,number@}"}
|
||
or @code{String.format "%2$d %1$d"}
|
||
|
||
@item Portability
|
||
fully portable
|
||
|
||
@item po-mode marking
|
||
---
|
||
@end table
|
||
|
||
Before marking strings as internationalizable, uses of the string
|
||
concatenation operator need to be converted to @code{MessageFormat}
|
||
applications. For example, @code{"file "+filename+" not found"} becomes
|
||
@code{MessageFormat.format("file @{0@} not found", new Object[] @{ filename @})}.
|
||
Only after this is done, can the strings be marked and extracted.
|
||
|
||
GNU gettext uses the native Java internationalization mechanism, namely
|
||
@code{ResourceBundle}s. There are two formats of @code{ResourceBundle}s:
|
||
@code{.properties} files and @code{.class} files. The @code{.properties}
|
||
format is a text file which the translators can directly edit, like PO
|
||
files, but which doesn't support plural forms. Whereas the @code{.class}
|
||
format is compiled from @code{.java} source code and can support plural
|
||
forms (provided it is accessed through an appropriate API, see below).
|
||
|
||
To convert a PO file to a @code{.properties} file, the @code{msgcat}
|
||
program can be used with the option @code{--properties-output}. To convert
|
||
a @code{.properties} file back to a PO file, the @code{msgcat} program
|
||
can be used with the option @code{--properties-input}. All the tools
|
||
that manipulate PO files can work with @code{.properties} files as well,
|
||
if given the @code{--properties-input} and/or @code{--properties-output}
|
||
option.
|
||
|
||
To convert a PO file to a ResourceBundle class, the @code{msgfmt} program
|
||
can be used with the option @code{--java} or @code{--java2}. To convert a
|
||
ResourceBundle back to a PO file, the @code{msgunfmt} program can be used
|
||
with the option @code{--java}.
|
||
|
||
Two different programmatic APIs can be used to access ResourceBundles.
|
||
Note that both APIs work with all kinds of ResourceBundles, whether
|
||
GNU gettext generated classes, or other @code{.class} or @code{.properties}
|
||
files.
|
||
|
||
@enumerate
|
||
@item
|
||
The @code{java.util.ResourceBundle} API.
|
||
|
||
In particular, its @code{getString} function returns a string translation.
|
||
Note that a missing translation yields a @code{MissingResourceException}.
|
||
|
||
This has the advantage of being the standard API. And it does not require
|
||
any additional libraries, only the @code{msgcat} generated @code{.properties}
|
||
files or the @code{msgfmt} generated @code{.class} files. But it cannot do
|
||
plural handling, even if the resource was generated by @code{msgfmt} from
|
||
a PO file with plural handling.
|
||
|
||
@item
|
||
The @code{gnu.gettext.GettextResource} API.
|
||
|
||
Reference documentation in Javadoc 1.1 style format is in the
|
||
@uref{javadoc2/index.html,javadoc2 directory}.
|
||
|
||
Its @code{gettext} function returns a string translation. Note that when
|
||
a translation is missing, the @var{msgid} argument is returned unchanged.
|
||
|
||
This has the advantage of having the @code{ngettext} function for plural
|
||
handling and the @code{pgettext} and @code{npgettext} for strings constraint
|
||
to a particular context.
|
||
|
||
@cindex @code{libintl} for Java
|
||
To use this API, one needs the @code{libintl.jar} file which is part of
|
||
the GNU gettext package and distributed under the LGPL.
|
||
@end enumerate
|
||
|
||
Four examples, using the second API, are available in the @file{examples}
|
||
directory: @code{hello-java}, @code{hello-java-awt}, @code{hello-java-swing},
|
||
@code{hello-java-qtjambi}.
|
||
|
||
Now, to make use of the API and define a shorthand for @samp{getString},
|
||
there are three idioms that you can choose from:
|
||
|
||
@itemize @bullet
|
||
@item
|
||
(This one assumes Java 1.5 or newer.)
|
||
In a unique class of your project, say @samp{Util}, define a static variable
|
||
holding the @code{ResourceBundle} instance and the shorthand:
|
||
|
||
@smallexample
|
||
private static ResourceBundle myResources =
|
||
ResourceBundle.getBundle("domain-name");
|
||
public static String i18n(String s) @{
|
||
return myResources.getString(s);
|
||
@}
|
||
@end smallexample
|
||
|
||
All classes containing internationalized strings then contain
|
||
|
||
@smallexample
|
||
import static Util.i18n;
|
||
@end smallexample
|
||
|
||
@noindent
|
||
and the shorthand is used like this:
|
||
|
||
@smallexample
|
||
System.out.println(i18n("Operation completed."));
|
||
@end smallexample
|
||
|
||
@item
|
||
In a unique class of your project, say @samp{Util}, define a static variable
|
||
holding the @code{ResourceBundle} instance:
|
||
|
||
@smallexample
|
||
public static ResourceBundle myResources =
|
||
ResourceBundle.getBundle("domain-name");
|
||
@end smallexample
|
||
|
||
All classes containing internationalized strings then contain
|
||
|
||
@smallexample
|
||
private static ResourceBundle res = Util.myResources;
|
||
private static String i18n(String s) @{ return res.getString(s); @}
|
||
@end smallexample
|
||
|
||
@noindent
|
||
and the shorthand is used like this:
|
||
|
||
@smallexample
|
||
System.out.println(i18n("Operation completed."));
|
||
@end smallexample
|
||
|
||
@item
|
||
You add a class with a very short name, say @samp{S}, containing just the
|
||
definition of the resource bundle and of the shorthand:
|
||
|
||
@smallexample
|
||
public class S @{
|
||
public static ResourceBundle myResources =
|
||
ResourceBundle.getBundle("domain-name");
|
||
public static String i18n(String s) @{
|
||
return myResources.getString(s);
|
||
@}
|
||
@}
|
||
@end smallexample
|
||
|
||
@noindent
|
||
and the shorthand is used like this:
|
||
|
||
@smallexample
|
||
System.out.println(S.i18n("Operation completed."));
|
||
@end smallexample
|
||
@end itemize
|
||
|
||
Which of the three idioms you choose, will depend on whether your project
|
||
requires portability to Java versions prior to Java 1.5 and, if so, whether
|
||
copying two lines of codes into every class is more acceptable in your project
|
||
than a class with a single-letter name.
|
||
|
||
@node C#
|
||
@subsection C#
|
||
@cindex C#
|
||
|
||
@table @asis
|
||
@item RPMs
|
||
mono
|
||
|
||
@item Ubuntu packages
|
||
mono-mcs
|
||
|
||
@item File extension
|
||
@code{cs}
|
||
|
||
@item String syntax
|
||
@code{"abc"}, @code{@@"abc"}
|
||
|
||
@item gettext shorthand
|
||
_("abc")
|
||
|
||
@item gettext/ngettext functions
|
||
@code{GettextResourceManager.GetString},
|
||
@code{GettextResourceManager.GetPluralString}
|
||
@code{GettextResourceManager.GetParticularString}
|
||
@code{GettextResourceManager.GetParticularPluralString}
|
||
|
||
@item textdomain
|
||
@code{new GettextResourceManager(domain)}
|
||
|
||
@item bindtextdomain
|
||
---, compiled message catalogs are located in subdirectories of the directory
|
||
containing the executable
|
||
|
||
@item setlocale
|
||
automatic
|
||
|
||
@item Prerequisite
|
||
---
|
||
|
||
@item Use or emulate GNU gettext
|
||
---, uses a C# specific message catalog format
|
||
|
||
@item Extractor
|
||
@code{xgettext -k_}
|
||
|
||
@item Formatting with positions
|
||
@code{String.Format "@{1@} @{0@}"}
|
||
|
||
@item Portability
|
||
fully portable
|
||
|
||
@item po-mode marking
|
||
---
|
||
@end table
|
||
|
||
Before marking strings as internationalizable, uses of the string
|
||
concatenation operator need to be converted to @code{String.Format}
|
||
invocations. For example, @code{"file "+filename+" not found"} becomes
|
||
@code{String.Format("file @{0@} not found", filename)}.
|
||
Only after this is done, can the strings be marked and extracted.
|
||
|
||
GNU gettext uses the native C#/.NET internationalization mechanism, namely
|
||
the classes @code{ResourceManager} and @code{ResourceSet}. Applications
|
||
use the @code{ResourceManager} methods to retrieve the native language
|
||
translation of strings. An instance of @code{ResourceSet} is the in-memory
|
||
representation of a message catalog file. The @code{ResourceManager} loads
|
||
and accesses @code{ResourceSet} instances as needed to look up the
|
||
translations.
|
||
|
||
There are two formats of @code{ResourceSet}s that can be directly loaded by
|
||
the C# runtime: @code{.resources} files and @code{.dll} files.
|
||
|
||
@itemize @bullet
|
||
@item
|
||
The @code{.resources} format is a binary file usually generated through the
|
||
@code{resgen} or @code{monoresgen} utility, but which doesn't support plural
|
||
forms. @code{.resources} files can also be embedded in .NET @code{.exe} files.
|
||
This only affects whether a file system access is performed to load the message
|
||
catalog; it doesn't affect the contents of the message catalog.
|
||
|
||
@item
|
||
On the other hand, the @code{.dll} format is a binary file that is compiled
|
||
from @code{.cs} source code and can support plural forms (provided it is
|
||
accessed through the GNU gettext API, see below).
|
||
@end itemize
|
||
|
||
Note that these .NET @code{.dll} and @code{.exe} files are not tied to a
|
||
particular platform; their file format and GNU gettext for C# can be used
|
||
on any platform.
|
||
|
||
To convert a PO file to a @code{.resources} file, the @code{msgfmt} program
|
||
can be used with the option @samp{--csharp-resources}. To convert a
|
||
@code{.resources} file back to a PO file, the @code{msgunfmt} program can be
|
||
used with the option @samp{--csharp-resources}. You can also, in some cases,
|
||
use the @code{monoresgen} program (from the @code{mono}/@code{mcs} package).
|
||
This program can also convert a @code{.resources} file back to a PO file. But
|
||
beware: as of this writing (January 2004), the @code{monoresgen} converter is
|
||
quite buggy.
|
||
|
||
To convert a PO file to a @code{.dll} file, the @code{msgfmt} program can be
|
||
used with the option @code{--csharp}. The result will be a @code{.dll} file
|
||
containing a subclass of @code{GettextResourceSet}, which itself is a subclass
|
||
of @code{ResourceSet}. To convert a @code{.dll} file containing a
|
||
@code{GettextResourceSet} subclass back to a PO file, the @code{msgunfmt}
|
||
program can be used with the option @code{--csharp}.
|
||
|
||
The advantages of the @code{.dll} format over the @code{.resources} format
|
||
are:
|
||
|
||
@enumerate
|
||
@item
|
||
Freedom to localize: Users can add their own translations to an application
|
||
after it has been built and distributed. Whereas when the programmer uses
|
||
a @code{ResourceManager} constructor provided by the system, the set of
|
||
@code{.resources} files for an application must be specified when the
|
||
application is built and cannot be extended afterwards.
|
||
@c If this were the only issue with the @code{.resources} format, one could
|
||
@c use the @code{ResourceManager.CreateFileBasedResourceManager} function.
|
||
|
||
@item
|
||
Plural handling: A message catalog in @code{.dll} format supports the plural
|
||
handling function @code{GetPluralString}. Whereas @code{.resources} files can
|
||
only contain data and only support lookups that depend on a single string.
|
||
|
||
@item
|
||
Context handling: A message catalog in @code{.dll} format supports the
|
||
query-with-context functions @code{GetParticularString} and
|
||
@code{GetParticularPluralString}. Whereas @code{.resources} files can
|
||
only contain data and only support lookups that depend on a single string.
|
||
|
||
@item
|
||
The @code{GettextResourceManager} that loads the message catalogs in
|
||
@code{.dll} format also provides for inheritance on a per-message basis.
|
||
For example, in Austrian (@code{de_AT}) locale, translations from the German
|
||
(@code{de}) message catalog will be used for messages not found in the
|
||
Austrian message catalog. This has the consequence that the Austrian
|
||
translators need only translate those few messages for which the translation
|
||
into Austrian differs from the German one. Whereas when working with
|
||
@code{.resources} files, each message catalog must provide the translations
|
||
of all messages by itself.
|
||
|
||
@item
|
||
The @code{GettextResourceManager} that loads the message catalogs in
|
||
@code{.dll} format also provides for a fallback: The English @var{msgid} is
|
||
returned when no translation can be found. Whereas when working with
|
||
@code{.resources} files, a language-neutral @code{.resources} file must
|
||
explicitly be provided as a fallback.
|
||
@end enumerate
|
||
|
||
On the side of the programmatic APIs, the programmer can use either the
|
||
standard @code{ResourceManager} API and the GNU @code{GettextResourceManager}
|
||
API. The latter is an extension of the former, because
|
||
@code{GettextResourceManager} is a subclass of @code{ResourceManager}.
|
||
|
||
@enumerate
|
||
@item
|
||
The @code{System.Resources.ResourceManager} API.
|
||
|
||
This API works with resources in @code{.resources} format.
|
||
|
||
The creation of the @code{ResourceManager} is done through
|
||
@smallexample
|
||
new ResourceManager(domainname, Assembly.GetExecutingAssembly())
|
||
@end smallexample
|
||
@noindent
|
||
|
||
The @code{GetString} function returns a string's translation. Note that this
|
||
function returns null when a translation is missing (i.e.@: not even found in
|
||
the fallback resource file).
|
||
|
||
@item
|
||
The @code{GNU.Gettext.GettextResourceManager} API.
|
||
|
||
This API works with resources in @code{.dll} format.
|
||
|
||
Reference documentation is in the
|
||
@uref{csharpdoc/index.html,csharpdoc directory}.
|
||
|
||
The creation of the @code{ResourceManager} is done through
|
||
@smallexample
|
||
new GettextResourceManager(domainname)
|
||
@end smallexample
|
||
|
||
The @code{GetString} function returns a string's translation. Note that when
|
||
a translation is missing, the @var{msgid} argument is returned unchanged.
|
||
|
||
The @code{GetPluralString} function returns a string translation with plural
|
||
handling, like the @code{ngettext} function in C.
|
||
|
||
The @code{GetParticularString} function returns a string's translation,
|
||
specific to a particular context, like the @code{pgettext} function in C.
|
||
Note that when a translation is missing, the @var{msgid} argument is returned
|
||
unchanged.
|
||
|
||
The @code{GetParticularPluralString} function returns a string translation,
|
||
specific to a particular context, with plural handling, like the
|
||
@code{npgettext} function in C.
|
||
|
||
@cindex @code{libintl} for C#
|
||
To use this API, one needs the @code{GNU.Gettext.dll} file which is part of
|
||
the GNU gettext package and distributed under the LGPL.
|
||
@end enumerate
|
||
|
||
You can also mix both approaches: use the
|
||
@code{GNU.Gettext.GettextResourceManager} constructor, but otherwise use
|
||
only the @code{ResourceManager} type and only the @code{GetString} method.
|
||
This is appropriate when you want to profit from the tools for PO files,
|
||
but don't want to change an existing source code that uses
|
||
@code{ResourceManager} and don't (yet) need the @code{GetPluralString} method.
|
||
|
||
Two examples, using the second API, are available in the @file{examples}
|
||
directory: @code{hello-csharp}, @code{hello-csharp-forms}.
|
||
|
||
Now, to make use of the API and define a shorthand for @samp{GetString},
|
||
there are two idioms that you can choose from:
|
||
|
||
@itemize @bullet
|
||
@item
|
||
In a unique class of your project, say @samp{Util}, define a static variable
|
||
holding the @code{ResourceManager} instance:
|
||
|
||
@smallexample
|
||
public static GettextResourceManager MyResourceManager =
|
||
new GettextResourceManager("domain-name");
|
||
@end smallexample
|
||
|
||
All classes containing internationalized strings then contain
|
||
|
||
@smallexample
|
||
private static GettextResourceManager Res = Util.MyResourceManager;
|
||
private static String _(String s) @{ return Res.GetString(s); @}
|
||
@end smallexample
|
||
|
||
@noindent
|
||
and the shorthand is used like this:
|
||
|
||
@smallexample
|
||
Console.WriteLine(_("Operation completed."));
|
||
@end smallexample
|
||
|
||
@item
|
||
You add a class with a very short name, say @samp{S}, containing just the
|
||
definition of the resource manager and of the shorthand:
|
||
|
||
@smallexample
|
||
public class S @{
|
||
public static GettextResourceManager MyResourceManager =
|
||
new GettextResourceManager("domain-name");
|
||
public static String _(String s) @{
|
||
return MyResourceManager.GetString(s);
|
||
@}
|
||
@}
|
||
@end smallexample
|
||
|
||
@noindent
|
||
and the shorthand is used like this:
|
||
|
||
@smallexample
|
||
Console.WriteLine(S._("Operation completed."));
|
||
@end smallexample
|
||
@end itemize
|
||
|
||
Which of the two idioms you choose, will depend on whether copying two lines
|
||
of codes into every class is more acceptable in your project than a class
|
||
with a single-letter name.
|
||
|
||
@node gawk
|
||
@subsection GNU awk
|
||
@cindex awk
|
||
@cindex gawk
|
||
|
||
@table @asis
|
||
@item RPMs
|
||
gawk 3.1 or newer
|
||
|
||
@item Ubuntu packages
|
||
gawk
|
||
|
||
@item File extension
|
||
@code{awk}, @code{gawk}, @code{twjr}.
|
||
The file extension @code{twjr} is used by TexiWeb Jr
|
||
(@uref{https://github.com/arnoldrobbins/texiwebjr}).
|
||
|
||
@item String syntax
|
||
@code{"abc"}
|
||
|
||
@item gettext shorthand
|
||
@code{_"abc"}
|
||
|
||
@item gettext/ngettext functions
|
||
@code{dcgettext}, missing @code{dcngettext} in gawk-3.1.0
|
||
|
||
@item textdomain
|
||
@code{TEXTDOMAIN} variable
|
||
|
||
@item bindtextdomain
|
||
@code{bindtextdomain} function
|
||
|
||
@item setlocale
|
||
automatic, but missing @code{setlocale (LC_MESSAGES, "")} in gawk-3.1.0
|
||
|
||
@item Prerequisite
|
||
---
|
||
|
||
@item Use or emulate GNU gettext
|
||
use
|
||
|
||
@item Extractor
|
||
@code{xgettext}
|
||
|
||
@item Formatting with positions
|
||
@code{printf "%2$d %1$d"} (GNU awk only)
|
||
|
||
@item Portability
|
||
On platforms without gettext, no translation. On non-GNU awks, you must
|
||
define @code{dcgettext}, @code{dcngettext} and @code{bindtextdomain}
|
||
yourself.
|
||
|
||
@item po-mode marking
|
||
---
|
||
@end table
|
||
|
||
An example is available in the @file{examples} directory: @code{hello-gawk}.
|
||
|
||
@node Pascal
|
||
@subsection Pascal - Free Pascal Compiler
|
||
@cindex Pascal
|
||
@cindex Free Pascal
|
||
@cindex Object Pascal
|
||
|
||
@table @asis
|
||
@item RPMs
|
||
fpk
|
||
|
||
@item Ubuntu packages
|
||
fp-compiler, fp-units-fcl
|
||
|
||
@item File extension
|
||
@code{pp}, @code{pas}
|
||
|
||
@item String syntax
|
||
@code{'abc'}
|
||
|
||
@item gettext shorthand
|
||
automatic
|
||
|
||
@item gettext/ngettext functions
|
||
---, use @code{ResourceString} data type instead
|
||
|
||
@item textdomain
|
||
---, use @code{TranslateResourceStrings} function instead
|
||
|
||
@item bindtextdomain
|
||
---, use @code{TranslateResourceStrings} function instead
|
||
|
||
@item setlocale
|
||
automatic, but uses only LANG, not LC_MESSAGES or LC_ALL
|
||
|
||
@item Prerequisite
|
||
@code{@{$mode delphi@}} or @code{@{$mode objfpc@}}@*@code{uses gettext;}
|
||
|
||
@item Use or emulate GNU gettext
|
||
emulate partially
|
||
|
||
@item Extractor
|
||
@code{ppc386} followed by @code{xgettext} or @code{rstconv}
|
||
|
||
@item Formatting with positions
|
||
@code{uses sysutils;}@*@code{format "%1:d %0:d"}
|
||
|
||
@item Portability
|
||
?
|
||
|
||
@item po-mode marking
|
||
---
|
||
@end table
|
||
|
||
The Pascal compiler has special support for the @code{ResourceString} data
|
||
type. It generates a @code{.rst} file. This is then converted to a
|
||
@code{.pot} file by use of @code{xgettext} or @code{rstconv}. At runtime,
|
||
a @code{.mo} file corresponding to translations of this @code{.pot} file
|
||
can be loaded using the @code{TranslateResourceStrings} function in the
|
||
@code{gettext} unit.
|
||
|
||
An example is available in the @file{examples} directory: @code{hello-pascal}.
|
||
|
||
@node wxWidgets
|
||
@subsection wxWidgets library
|
||
@cindex @code{wxWidgets} library
|
||
|
||
@table @asis
|
||
@item RPMs
|
||
wxGTK, gettext
|
||
|
||
@item Ubuntu packages
|
||
libwxgtk3.0-dev
|
||
|
||
@item File extension
|
||
@code{cpp}
|
||
|
||
@item String syntax
|
||
@code{"abc"}
|
||
|
||
@item gettext shorthand
|
||
@code{_("abc")}
|
||
|
||
@item gettext/ngettext functions
|
||
@code{wxLocale::GetString}, @code{wxGetTranslation}
|
||
|
||
@item textdomain
|
||
@code{wxLocale::AddCatalog}
|
||
|
||
@item bindtextdomain
|
||
@code{wxLocale::AddCatalogLookupPathPrefix}
|
||
|
||
@item setlocale
|
||
@code{wxLocale::Init}, @code{wxSetLocale}
|
||
|
||
@item Prerequisite
|
||
@code{#include <wx/intl.h>}
|
||
|
||
@item Use or emulate GNU gettext
|
||
emulate, see @code{include/wx/intl.h} and @code{src/common/intl.cpp}
|
||
|
||
@item Extractor
|
||
@code{xgettext}
|
||
|
||
@item Formatting with positions
|
||
wxString::Format supports positions if and only if the system has
|
||
@code{wprintf()}, @code{vswprintf()} functions and they support positions
|
||
according to POSIX.
|
||
|
||
@item Portability
|
||
fully portable
|
||
|
||
@item po-mode marking
|
||
yes
|
||
@end table
|
||
|
||
@node YCP
|
||
@subsection YCP - YaST2 scripting language
|
||
@cindex YCP
|
||
@cindex YaST2 scripting language
|
||
|
||
@table @asis
|
||
@item RPMs
|
||
libycp, libycp-devel, yast2-core, yast2-core-devel
|
||
|
||
@item Ubuntu packages
|
||
---
|
||
|
||
@item File extension
|
||
@code{ycp}
|
||
|
||
@item String syntax
|
||
@code{"abc"}
|
||
|
||
@item gettext shorthand
|
||
@code{_("abc")}
|
||
|
||
@item gettext/ngettext functions
|
||
@code{_()} with 1 or 3 arguments
|
||
|
||
@item textdomain
|
||
@code{textdomain} statement
|
||
|
||
@item bindtextdomain
|
||
---
|
||
|
||
@item setlocale
|
||
---
|
||
|
||
@item Prerequisite
|
||
---
|
||
|
||
@item Use or emulate GNU gettext
|
||
use
|
||
|
||
@item Extractor
|
||
@code{xgettext}
|
||
|
||
@item Formatting with positions
|
||
@code{sformat "%2 %1"}
|
||
|
||
@item Portability
|
||
fully portable
|
||
|
||
@item po-mode marking
|
||
---
|
||
@end table
|
||
|
||
An example is available in the @file{examples} directory: @code{hello-ycp}.
|
||
|
||
@node Tcl
|
||
@subsection Tcl - Tk's scripting language
|
||
@cindex Tcl
|
||
@cindex Tk's scripting language
|
||
|
||
@table @asis
|
||
@item RPMs
|
||
tcl
|
||
|
||
@item Ubuntu packages
|
||
tcl
|
||
|
||
@item File extension
|
||
@code{tcl}
|
||
|
||
@item String syntax
|
||
@code{"abc"}
|
||
|
||
@item gettext shorthand
|
||
@code{[_ "abc"]}
|
||
|
||
@item gettext/ngettext functions
|
||
@code{::msgcat::mc}
|
||
|
||
@item textdomain
|
||
---
|
||
|
||
@item bindtextdomain
|
||
---, use @code{::msgcat::mcload} instead
|
||
|
||
@item setlocale
|
||
automatic, uses LANG, but ignores LC_MESSAGES and LC_ALL
|
||
|
||
@item Prerequisite
|
||
@code{package require msgcat}
|
||
@*@code{proc _ @{s@} @{return [::msgcat::mc $s]@}}
|
||
|
||
@item Use or emulate GNU gettext
|
||
---, uses a Tcl specific message catalog format
|
||
|
||
@item Extractor
|
||
@code{xgettext -k_}
|
||
|
||
@item Formatting with positions
|
||
@code{format "%2\$d %1\$d"}
|
||
|
||
@item Portability
|
||
fully portable
|
||
|
||
@item po-mode marking
|
||
---
|
||
@end table
|
||
|
||
Two examples are available in the @file{examples} directory:
|
||
@code{hello-tcl}, @code{hello-tcl-tk}.
|
||
|
||
Before marking strings as internationalizable, substitutions of variables
|
||
into the string need to be converted to @code{format} applications. For
|
||
example, @code{"file $filename not found"} becomes
|
||
@code{[format "file %s not found" $filename]}.
|
||
Only after this is done, can the strings be marked and extracted.
|
||
After marking, this example becomes
|
||
@code{[format [_ "file %s not found"] $filename]} or
|
||
@code{[msgcat::mc "file %s not found" $filename]}. Note that the
|
||
@code{msgcat::mc} function implicitly calls @code{format} when more than one
|
||
argument is given.
|
||
|
||
@node Perl
|
||
@subsection Perl
|
||
@cindex Perl
|
||
|
||
@table @asis
|
||
@item RPMs
|
||
perl
|
||
|
||
@item Ubuntu packages
|
||
perl, libintl-perl
|
||
|
||
@item File extension
|
||
@code{pl}, @code{PL}, @code{pm}, @code{perl}, @code{cgi}
|
||
|
||
@item String syntax
|
||
@itemize @bullet
|
||
|
||
@item @code{"abc"}
|
||
|
||
@item @code{'abc'}
|
||
|
||
@item @code{qq (abc)}
|
||
|
||
@item @code{q (abc)}
|
||
|
||
@item @code{qr /abc/}
|
||
|
||
@item @code{qx (/bin/date)}
|
||
|
||
@item @code{/pattern match/}
|
||
|
||
@item @code{?pattern match?}
|
||
|
||
@item @code{s/substitution/operators/}
|
||
|
||
@item @code{$tied_hash@{"message"@}}
|
||
|
||
@item @code{$tied_hash_reference->@{"message"@}}
|
||
|
||
@item etc., issue the command @samp{man perlsyn} for details
|
||
|
||
@end itemize
|
||
|
||
@item gettext shorthand
|
||
@code{__} (double underscore)
|
||
|
||
@item gettext/ngettext functions
|
||
@code{gettext}, @code{dgettext}, @code{dcgettext}, @code{ngettext},
|
||
@code{dngettext}, @code{dcngettext}, @code{pgettext}, @code{dpgettext},
|
||
@code{dcpgettext}, @code{npgettext}, @code{dnpgettext},
|
||
@code{dcnpgettext}
|
||
|
||
@item textdomain
|
||
@code{textdomain} function
|
||
|
||
@item bindtextdomain
|
||
@code{bindtextdomain} function
|
||
|
||
@item bind_textdomain_codeset
|
||
@code{bind_textdomain_codeset} function
|
||
|
||
@item setlocale
|
||
Use @code{setlocale (LC_ALL, "");}
|
||
|
||
@item Prerequisite
|
||
@code{use POSIX;}
|
||
@*@code{use Locale::TextDomain;} (included in the package libintl-perl
|
||
which is available on the Comprehensive Perl Archive Network CPAN,
|
||
https://www.cpan.org/).
|
||
|
||
@item Use or emulate GNU gettext
|
||
platform dependent: gettext_pp emulates, gettext_xs uses GNU gettext
|
||
|
||
@item Extractor
|
||
@code{xgettext -k__ -k\$__ -k%__ -k__x -k__n:1,2 -k__nx:1,2 -k__xn:1,2
|
||
-kN__ -kN__n:1,2 -k__p:1c,2 -k__np:1c,2,3 -kN__p:1c,2 -kN__np:1c,2,3}
|
||
|
||
@item Formatting with positions
|
||
Both kinds of format strings support formatting with positions.
|
||
@*@code{printf "%2\$d %1\$d", ...} (requires Perl 5.8.0 or newer)
|
||
@*@code{__expand("[new] replaces [old]", old => $oldvalue, new => $newvalue)}
|
||
|
||
@item Portability
|
||
The @code{libintl-perl} package is platform independent but is not
|
||
part of the Perl core. The programmer is responsible for
|
||
providing a dummy implementation of the required functions if the
|
||
package is not installed on the target system.
|
||
|
||
@item po-mode marking
|
||
---
|
||
|
||
@item Documentation
|
||
Included in @code{libintl-perl}, available on CPAN
|
||
(https://www.cpan.org/).
|
||
|
||
@end table
|
||
|
||
An example is available in the @file{examples} directory: @code{hello-perl}.
|
||
|
||
@cindex marking Perl sources
|
||
|
||
The @code{xgettext} parser backend for Perl differs significantly from
|
||
the parser backends for other programming languages, just as Perl
|
||
itself differs significantly from other programming languages. The
|
||
Perl parser backend offers many more string marking facilities than
|
||
the other backends but it also has some Perl specific limitations, the
|
||
worst probably being its imperfectness.
|
||
|
||
@menu
|
||
* General Problems:: General Problems Parsing Perl Code
|
||
* Default Keywords:: Which Keywords Will xgettext Look For?
|
||
* Special Keywords:: How to Extract Hash Keys
|
||
* Quote-like Expressions:: What are Strings And Quote-like Expressions?
|
||
* Interpolation I:: Invalid String Interpolation
|
||
* Interpolation II:: Valid String Interpolation
|
||
* Parentheses:: When To Use Parentheses
|
||
* Long Lines:: How To Grok with Long Lines
|
||
* Perl Pitfalls:: Bugs, Pitfalls, and Things That Do Not Work
|
||
@end menu
|
||
|
||
@node General Problems
|
||
@subsubsection General Problems Parsing Perl Code
|
||
|
||
It is often heard that only Perl can parse Perl. This is not true.
|
||
Perl cannot be @emph{parsed} at all, it can only be @emph{executed}.
|
||
Perl has various built-in ambiguities that can only be resolved at runtime.
|
||
|
||
The following example may illustrate one common problem:
|
||
|
||
@example
|
||
print gettext "Hello World!";
|
||
@end example
|
||
|
||
Although this example looks like a bullet-proof case of a function
|
||
invocation, it is not:
|
||
|
||
@example
|
||
open gettext, ">testfile" or die;
|
||
print gettext "Hello world!"
|
||
@end example
|
||
|
||
In this context, the string @code{gettext} looks more like a
|
||
file handle. But not necessarily:
|
||
|
||
@example
|
||
use Locale::Messages qw (:libintl_h);
|
||
open gettext ">testfile" or die;
|
||
print gettext "Hello world!";
|
||
@end example
|
||
|
||
Now, the file is probably syntactically incorrect, provided that the module
|
||
@code{Locale::Messages} found first in the Perl include path exports a
|
||
function @code{gettext}. But what if the module
|
||
@code{Locale::Messages} really looks like this?
|
||
|
||
@example
|
||
use vars qw (*gettext);
|
||
|
||
1;
|
||
@end example
|
||
|
||
In this case, the string @code{gettext} will be interpreted as a file
|
||
handle again, and the above example will create a file @file{testfile}
|
||
and write the string ``Hello world!'' into it. Even advanced
|
||
control flow analysis will not really help:
|
||
|
||
@example
|
||
if (0.5 < rand) @{
|
||
eval "use Sane";
|
||
@} else @{
|
||
eval "use InSane";
|
||
@}
|
||
print gettext "Hello world!";
|
||
@end example
|
||
|
||
If the module @code{Sane} exports a function @code{gettext} that does
|
||
what we expect, and the module @code{InSane} opens a file for writing
|
||
and associates the @emph{handle} @code{gettext} with this output
|
||
stream, we are clueless again about what will happen at runtime. It is
|
||
completely unpredictable. The truth is that Perl has so many ways to
|
||
fill its symbol table at runtime that it is impossible to interpret a
|
||
particular piece of code without executing it.
|
||
|
||
Of course, @code{xgettext} will not execute your Perl sources while
|
||
scanning for translatable strings, but rather use heuristics in order
|
||
to guess what you meant.
|
||
|
||
Another problem is the ambiguity of the slash and the question mark.
|
||
Their interpretation depends on the context:
|
||
|
||
@example
|
||
# A pattern match.
|
||
print "OK\n" if /foobar/;
|
||
|
||
# A division.
|
||
print 1 / 2;
|
||
|
||
# Another pattern match.
|
||
print "OK\n" if ?foobar?;
|
||
|
||
# Conditional.
|
||
print $x ? "foo" : "bar";
|
||
@end example
|
||
|
||
The slash may either act as the division operator or introduce a
|
||
pattern match, whereas the question mark may act as the ternary
|
||
conditional operator or as a pattern match, too. Other programming
|
||
languages like @code{awk} present similar problems, but the consequences of a
|
||
misinterpretation are particularly nasty with Perl sources. In @code{awk}
|
||
for instance, a statement can never exceed one line and the parser
|
||
can recover from a parsing error at the next newline and interpret
|
||
the rest of the input stream correctly. Perl is different, as a
|
||
pattern match is terminated by the next appearance of the delimiter
|
||
(the slash or the question mark) in the input stream, regardless of
|
||
the semantic context. If a slash is really a division sign but
|
||
mis-interpreted as a pattern match, the rest of the input file is most
|
||
probably parsed incorrectly.
|
||
|
||
There are certain cases, where the ambiguity cannot be resolved at all:
|
||
|
||
@example
|
||
$x = wantarray ? 1 : 0;
|
||
@end example
|
||
|
||
The Perl built-in function @code{wantarray} does not accept any arguments.
|
||
The Perl parser therefore knows that the question mark does not start
|
||
a regular expression but is the ternary conditional operator.
|
||
|
||
@example
|
||
sub wantarrays @{@}
|
||
$x = wantarrays ? 1 : 0;
|
||
@end example
|
||
|
||
Now the situation is different. The function @code{wantarrays} takes
|
||
a variable number of arguments (like any non-prototyped Perl function).
|
||
The question mark is now the delimiter of a pattern match, and hence
|
||
the piece of code does not compile.
|
||
|
||
@example
|
||
sub wantarrays() @{@}
|
||
$x = wantarrays ? 1 : 0;
|
||
@end example
|
||
|
||
Now the function is prototyped, Perl knows that it does not accept any
|
||
arguments, and the question mark is therefore interpreted as the
|
||
ternaray operator again. But that unfortunately outsmarts @code{xgettext}.
|
||
|
||
The Perl parser in @code{xgettext} cannot know whether a function has
|
||
a prototype and what that prototype would look like. It therefore makes
|
||
an educated guess. If a function is known to be a Perl built-in and
|
||
this function does not accept any arguments, a following question mark
|
||
or slash is treated as an operator, otherwise as the delimiter of a
|
||
following regular expression. The Perl built-ins that do not accept
|
||
arguments are @code{wantarray}, @code{fork}, @code{time}, @code{times},
|
||
@code{getlogin}, @code{getppid}, @code{getpwent}, @code{getgrent},
|
||
@code{gethostent}, @code{getnetent}, @code{getprotoent}, @code{getservent},
|
||
@code{setpwent}, @code{setgrent}, @code{endpwent}, @code{endgrent},
|
||
@code{endhostent}, @code{endnetent}, @code{endprotoent}, and
|
||
@code{endservent}.
|
||
|
||
If you find that @code{xgettext} fails to extract strings from
|
||
portions of your sources, you should therefore look out for slashes
|
||
and/or question marks preceding these sections. You may have come
|
||
across a bug in @code{xgettext}'s Perl parser (and of course you
|
||
should report that bug). In the meantime you should consider to
|
||
reformulate your code in a manner less challenging to @code{xgettext}.
|
||
|
||
In particular, if the parser is too dumb to see that a function
|
||
does not accept arguments, use parentheses:
|
||
|
||
@example
|
||
$x = somefunc() ? 1 : 0;
|
||
$y = (somefunc) ? 1 : 0;
|
||
@end example
|
||
|
||
In fact the Perl parser itself has similar problems and warns you
|
||
about such constructs.
|
||
|
||
@node Default Keywords
|
||
@subsubsection Which keywords will xgettext look for?
|
||
@cindex Perl default keywords
|
||
|
||
Unless you instruct @code{xgettext} otherwise by invoking it with one
|
||
of the options @code{--keyword} or @code{-k}, it will recognize the
|
||
following keywords in your Perl sources:
|
||
|
||
@itemize @bullet
|
||
|
||
@item @code{gettext}
|
||
|
||
@item @code{dgettext:2}
|
||
|
||
The second argument will be extracted.
|
||
|
||
@item @code{dcgettext:2}
|
||
|
||
The second argument will be extracted.
|
||
|
||
@item @code{ngettext:1,2}
|
||
|
||
The first (singular) and the second (plural) argument will be
|
||
extracted.
|
||
|
||
@item @code{dngettext:2,3}
|
||
|
||
The second (singular) and the third (plural) argument will be
|
||
extracted.
|
||
|
||
@item @code{dcngettext:2,3}
|
||
|
||
The second (singular) and the third (plural) argument will be
|
||
extracted.
|
||
|
||
@item @code{pgettext:1c,2}
|
||
|
||
The first (message context) and the second argument will be extracted.
|
||
|
||
@item @code{dpgettext:2c,3}
|
||
|
||
The second (message context) and the third argument will be extracted.
|
||
|
||
@item @code{dcpgettext:2c,3}
|
||
|
||
The second (message context) and the third argument will be extracted.
|
||
|
||
@item @code{npgettext:1c,2,3}
|
||
|
||
The first (message context), second (singular), and third (plural)
|
||
argument will be extracted.
|
||
|
||
@item @code{dnpgettext:2c,3,4}
|
||
|
||
The second (message context), third (singular), and fourth (plural)
|
||
argument will be extracted.
|
||
|
||
@item @code{dcnpgettext:2c,3,4}
|
||
|
||
The second (message context), third (singular), and fourth (plural)
|
||
argument will be extracted.
|
||
|
||
@item @code{gettext_noop}
|
||
|
||
@item @code{%gettext}
|
||
|
||
The keys of lookups into the hash @code{%gettext} will be extracted.
|
||
|
||
@item @code{$gettext}
|
||
|
||
The keys of lookups into the hash reference @code{$gettext} will be extracted.
|
||
|
||
@end itemize
|
||
|
||
@node Special Keywords
|
||
@subsubsection How to Extract Hash Keys
|
||
@cindex Perl special keywords for hash-lookups
|
||
|
||
Translating messages at runtime is normally performed by looking up the
|
||
original string in the translation database and returning the
|
||
translated version. The ``natural'' Perl implementation is a hash
|
||
lookup, and, of course, @code{xgettext} supports such practice.
|
||
|
||
@example
|
||
print __"Hello world!";
|
||
print $__@{"Hello world!"@};
|
||
print $__->@{"Hello world!"@};
|
||
print $$__@{"Hello world!"@};
|
||
@end example
|
||
|
||
The above four lines all do the same thing. The Perl module
|
||
@code{Locale::TextDomain} exports by default a hash @code{%__} that
|
||
is tied to the function @code{__()}. It also exports a reference
|
||
@code{$__} to @code{%__}.
|
||
|
||
If an argument to the @code{xgettext} option @code{--keyword},
|
||
resp. @code{-k} starts with a percent sign, the rest of the keyword is
|
||
interpreted as the name of a hash. If it starts with a dollar
|
||
sign, the rest of the keyword is interpreted as a reference to a
|
||
hash.
|
||
|
||
Note that you can omit the quotation marks (single or double) around
|
||
the hash key (almost) whenever Perl itself allows it:
|
||
|
||
@example
|
||
print $gettext@{Error@};
|
||
@end example
|
||
|
||
The exact rule is: You can omit the surrounding quotes, when the hash
|
||
key is a valid C (!) identifier, i.e.@: when it starts with an
|
||
underscore or an ASCII letter and is followed by an arbitrary number
|
||
of underscores, ASCII letters or digits. Other Unicode characters
|
||
are @emph{not} allowed, regardless of the @code{use utf8} pragma.
|
||
|
||
@node Quote-like Expressions
|
||
@subsubsection What are Strings And Quote-like Expressions?
|
||
@cindex Perl quote-like expressions
|
||
|
||
Perl offers a plethora of different string constructs. Those that can
|
||
be used either as arguments to functions or inside braces for hash
|
||
lookups are generally supported by @code{xgettext}.
|
||
|
||
@itemize @bullet
|
||
@item @strong{double-quoted strings}
|
||
@*
|
||
@example
|
||
print gettext "Hello World!";
|
||
@end example
|
||
|
||
@item @strong{single-quoted strings}
|
||
@*
|
||
@example
|
||
print gettext 'Hello World!';
|
||
@end example
|
||
|
||
@item @strong{the operator qq}
|
||
@*
|
||
@example
|
||
print gettext qq |Hello World!|;
|
||
print gettext qq <E-mail: <guido\@@imperia.net>>;
|
||
@end example
|
||
|
||
The operator @code{qq} is fully supported. You can use arbitrary
|
||
delimiters, including the four bracketing delimiters (round, angle,
|
||
square, curly) that nest.
|
||
|
||
@item @strong{the operator q}
|
||
@*
|
||
@example
|
||
print gettext q |Hello World!|;
|
||
print gettext q <E-mail: <guido@@imperia.net>>;
|
||
@end example
|
||
|
||
The operator @code{q} is fully supported. You can use arbitrary
|
||
delimiters, including the four bracketing delimiters (round, angle,
|
||
square, curly) that nest.
|
||
|
||
@item @strong{the operator qx}
|
||
@*
|
||
@example
|
||
print gettext qx ;LANGUAGE=C /bin/date;
|
||
print gettext qx [/usr/bin/ls | grep '^[A-Z]*'];
|
||
@end example
|
||
|
||
The operator @code{qx} is fully supported. You can use arbitrary
|
||
delimiters, including the four bracketing delimiters (round, angle,
|
||
square, curly) that nest.
|
||
|
||
The example is actually a useless use of @code{gettext}. It will
|
||
invoke the @code{gettext} function on the output of the command
|
||
specified with the @code{qx} operator. The feature was included
|
||
in order to make the interface consistent (the parser will extract
|
||
all strings and quote-like expressions).
|
||
|
||
@item @strong{here documents}
|
||
@*
|
||
@example
|
||
@group
|
||
print gettext <<'EOF';
|
||
program not found in $PATH
|
||
EOF
|
||
|
||
print ngettext <<EOF, <<"EOF";
|
||
one file deleted
|
||
EOF
|
||
several files deleted
|
||
EOF
|
||
@end group
|
||
@end example
|
||
|
||
Here-documents are recognized. If the delimiter is enclosed in single
|
||
quotes, the string is not interpolated. If it is enclosed in double
|
||
quotes or has no quotes at all, the string is interpolated.
|
||
|
||
Delimiters that start with a digit are not supported!
|
||
|
||
@end itemize
|
||
|
||
@node Interpolation I
|
||
@subsubsection Invalid Uses Of String Interpolation
|
||
@cindex Perl invalid string interpolation
|
||
|
||
Perl is capable of interpolating variables into strings. This offers
|
||
some nice features in localized programs but can also lead to
|
||
problems.
|
||
|
||
A common error is a construct like the following:
|
||
|
||
@example
|
||
print gettext "This is the program $0!\n";
|
||
@end example
|
||
|
||
Perl will interpolate at runtime the value of the variable @code{$0}
|
||
into the argument of the @code{gettext()} function. Hence, this
|
||
argument is not a string constant but a variable argument (@code{$0}
|
||
is a global variable that holds the name of the Perl script being
|
||
executed). The interpolation is performed by Perl before the string
|
||
argument is passed to @code{gettext()} and will therefore depend on
|
||
the name of the script which can only be determined at runtime.
|
||
Consequently, it is almost impossible that a translation can be looked
|
||
up at runtime (except if, by accident, the interpolated string is found
|
||
in the message catalog).
|
||
|
||
The @code{xgettext} program will therefore terminate parsing with a fatal
|
||
error if it encounters a variable inside of an extracted string. In
|
||
general, this will happen for all kinds of string interpolations that
|
||
cannot be safely performed at compile time. If you absolutely know
|
||
what you are doing, you can always circumvent this behavior:
|
||
|
||
@example
|
||
my $know_what_i_am_doing = "This is program $0!\n";
|
||
print gettext $know_what_i_am_doing;
|
||
@end example
|
||
|
||
Since the parser only recognizes strings and quote-like expressions,
|
||
but not variables or other terms, the above construct will be
|
||
accepted. You will have to find another way, however, to let your
|
||
original string make it into your message catalog.
|
||
|
||
If invoked with the option @code{--extract-all}, resp. @code{-a},
|
||
variable interpolation will be accepted. Rationale: You will
|
||
generally use this option in order to prepare your sources for
|
||
internationalization.
|
||
|
||
Please see the manual page @samp{man perlop} for details of strings and
|
||
quote-like expressions that are subject to interpolation and those
|
||
that are not. Safe interpolations (that will not lead to a fatal
|
||
error) are:
|
||
|
||
@itemize @bullet
|
||
|
||
@item the escape sequences @code{\t} (tab, HT, TAB), @code{\n}
|
||
(newline, NL), @code{\r} (return, CR), @code{\f} (form feed, FF),
|
||
@code{\b} (backspace, BS), @code{\a} (alarm, bell, BEL), and @code{\e}
|
||
(escape, ESC).
|
||
|
||
@item octal chars, like @code{\033}
|
||
@*
|
||
Note that octal escapes in the range of 400-777 are translated into a
|
||
UTF-8 representation, regardless of the presence of the @code{use utf8} pragma.
|
||
|
||
@item hex chars, like @code{\x1b}
|
||
|
||
@item wide hex chars, like @code{\x@{263a@}}
|
||
@*
|
||
Note that this escape is translated into a UTF-8 representation,
|
||
regardless of the presence of the @code{use utf8} pragma.
|
||
|
||
@item control chars, like @code{\c[} (CTRL-[)
|
||
|
||
@item named Unicode chars, like @code{\N@{LATIN CAPITAL LETTER C WITH CEDILLA@}}
|
||
@*
|
||
Note that this escape is translated into a UTF-8 representation,
|
||
regardless of the presence of the @code{use utf8} pragma.
|
||
@end itemize
|
||
|
||
The following escapes are considered partially safe:
|
||
|
||
@itemize @bullet
|
||
|
||
@item @code{\l} lowercase next char
|
||
|
||
@item @code{\u} uppercase next char
|
||
|
||
@item @code{\L} lowercase till \E
|
||
|
||
@item @code{\U} uppercase till \E
|
||
|
||
@item @code{\E} end case modification
|
||
|
||
@item @code{\Q} quote non-word characters till \E
|
||
|
||
@end itemize
|
||
|
||
These escapes are only considered safe if the string consists of
|
||
ASCII characters only. Translation of characters outside the range
|
||
defined by ASCII is locale-dependent and can actually only be performed
|
||
at runtime; @code{xgettext} doesn't do these locale-dependent translations
|
||
at extraction time.
|
||
|
||
Except for the modifier @code{\Q}, these translations, albeit valid,
|
||
are generally useless and only obfuscate your sources. If a
|
||
translation can be safely performed at compile time you can just as
|
||
well write what you mean.
|
||
|
||
@node Interpolation II
|
||
@subsubsection Valid Uses Of String Interpolation
|
||
@cindex Perl valid string interpolation
|
||
|
||
Perl is often used to generate sources for other programming languages
|
||
or arbitrary file formats. Web applications that output HTML code
|
||
make a prominent example for such usage.
|
||
|
||
You will often come across situations where you want to intersperse
|
||
code written in the target (programming) language with translatable
|
||
messages, like in the following HTML example:
|
||
|
||
@example
|
||
print gettext <<EOF;
|
||
<h1>My Homepage</h1>
|
||
<script language="JavaScript"><!--
|
||
for (i = 0; i < 100; ++i) @{
|
||
alert ("Thank you so much for visiting my homepage!");
|
||
@}
|
||
//--></script>
|
||
EOF
|
||
@end example
|
||
|
||
The parser will extract the entire here document, and it will appear
|
||
entirely in the resulting PO file, including the JavaScript snippet
|
||
embedded in the HTML code. If you exaggerate with constructs like
|
||
the above, you will run the risk that the translators of your package
|
||
will look out for a less challenging project. You should consider an
|
||
alternative expression here:
|
||
|
||
@example
|
||
print <<EOF;
|
||
<h1>$gettext@{"My Homepage"@}</h1>
|
||
<script language="JavaScript"><!--
|
||
for (i = 0; i < 100; ++i) @{
|
||
alert ("$gettext@{'Thank you so much for visiting my homepage!'@}");
|
||
@}
|
||
//--></script>
|
||
EOF
|
||
@end example
|
||
|
||
Only the translatable portions of the code will be extracted here, and
|
||
the resulting PO file will begrudgingly improve in terms of readability.
|
||
|
||
You can interpolate hash lookups in all strings or quote-like
|
||
expressions that are subject to interpolation (see the manual page
|
||
@samp{man perlop} for details). Double interpolation is invalid, however:
|
||
|
||
@example
|
||
# TRANSLATORS: Replace "the earth" with the name of your planet.
|
||
print gettext qq@{Welcome to $gettext->@{"the earth"@}@};
|
||
@end example
|
||
|
||
The @code{qq}-quoted string is recognized as an argument to @code{xgettext} in
|
||
the first place, and checked for invalid variable interpolation. The
|
||
dollar sign of hash-dereferencing will therefore terminate the parser
|
||
with an ``invalid interpolation'' error.
|
||
|
||
It is valid to interpolate hash lookups in regular expressions:
|
||
|
||
@example
|
||
if ($var =~ /$gettext@{"the earth"@}/) @{
|
||
print gettext "Match!\n";
|
||
@}
|
||
s/$gettext@{"U. S. A."@}/$gettext@{"U. S. A."@} $gettext@{"(dial +0)"@}/g;
|
||
@end example
|
||
|
||
@node Parentheses
|
||
@subsubsection When To Use Parentheses
|
||
@cindex Perl parentheses
|
||
|
||
In Perl, parentheses around function arguments are mostly optional.
|
||
@code{xgettext} will always assume that all
|
||
recognized keywords (except for hashes and hash references) are names
|
||
of properly prototyped functions, and will (hopefully) only require
|
||
parentheses where Perl itself requires them. All constructs in the
|
||
following example are therefore ok to use:
|
||
|
||
@example
|
||
@group
|
||
print gettext ("Hello World!\n");
|
||
print gettext "Hello World!\n";
|
||
print dgettext ($package => "Hello World!\n");
|
||
print dgettext $package, "Hello World!\n";
|
||
|
||
# The "fat comma" => turns the left-hand side argument into a
|
||
# single-quoted string!
|
||
print dgettext smellovision => "Hello World!\n";
|
||
|
||
# The following assignment only works with prototyped functions.
|
||
# Otherwise, the functions will act as "greedy" list operators and
|
||
# eat up all following arguments.
|
||
my $anonymous_hash = @{
|
||
planet => gettext "earth",
|
||
cakes => ngettext "one cake", "several cakes", $n,
|
||
still => $works,
|
||
@};
|
||
# The same without fat comma:
|
||
my $other_hash = @{
|
||
'planet', gettext "earth",
|
||
'cakes', ngettext "one cake", "several cakes", $n,
|
||
'still', $works,
|
||
@};
|
||
|
||
# Parentheses are only significant for the first argument.
|
||
print dngettext 'package', ("one cake", "several cakes", $n), $discarded;
|
||
@end group
|
||
@end example
|
||
|
||
@node Long Lines
|
||
@subsubsection How To Grok with Long Lines
|
||
@cindex Perl long lines
|
||
|
||
The necessity of long messages can often lead to a cumbersome or
|
||
unreadable coding style. Perl has several options that may prevent
|
||
you from writing unreadable code, and
|
||
@code{xgettext} does its best to do likewise. This is where the dot
|
||
operator (the string concatenation operator) may come in handy:
|
||
|
||
@example
|
||
@group
|
||
print gettext ("This is a very long"
|
||
. " message that is still"
|
||
. " readable, because"
|
||
. " it is split into"
|
||
. " multiple lines.\n");
|
||
@end group
|
||
@end example
|
||
|
||
Perl is smart enough to concatenate these constant string fragments
|
||
into one long string at compile time, and so is
|
||
@code{xgettext}. You will only find one long message in the resulting
|
||
POT file.
|
||
|
||
Note that the future Perl 6 will probably use the underscore
|
||
(@samp{_}) as the string concatenation operator, and the dot
|
||
(@samp{.}) for dereferencing. This new syntax is not yet supported by
|
||
@code{xgettext}.
|
||
|
||
If embedded newline characters are not an issue, or even desired, you
|
||
may also insert newline characters inside quoted strings wherever you
|
||
feel like it:
|
||
|
||
@example
|
||
@group
|
||
print gettext ("<em>In HTML output
|
||
embedded newlines are generally no
|
||
problem, since adjacent whitespace
|
||
is always rendered into a single
|
||
space character.</em>");
|
||
@end group
|
||
@end example
|
||
|
||
You may also consider to use here documents:
|
||
|
||
@example
|
||
@group
|
||
print gettext <<EOF;
|
||
<em>In HTML output
|
||
embedded newlines are generally no
|
||
problem, since adjacent whitespace
|
||
is always rendered into a single
|
||
space character.</em>
|
||
EOF
|
||
@end group
|
||
@end example
|
||
|
||
Please do not forget that the line breaks are real, i.e.@: they
|
||
translate into newline characters that will consequently show up in
|
||
the resulting POT file.
|
||
|
||
@node Perl Pitfalls
|
||
@subsubsection Bugs, Pitfalls, And Things That Do Not Work
|
||
@cindex Perl pitfalls
|
||
|
||
The foregoing sections should have proven that
|
||
@code{xgettext} is quite smart in extracting translatable strings from
|
||
Perl sources. Yet, some more or less exotic constructs that could be
|
||
expected to work, actually do not work.
|
||
|
||
One of the more relevant limitations can be found in the
|
||
implementation of variable interpolation inside quoted strings. Only
|
||
simple hash lookups can be used there:
|
||
|
||
@example
|
||
print <<EOF;
|
||
$gettext@{"The dot operator"
|
||
. " does not work"
|
||
. "here!"@}
|
||
Likewise, you cannot @@@{[ gettext ("interpolate function calls") ]@}
|
||
inside quoted strings or quote-like expressions.
|
||
EOF
|
||
@end example
|
||
|
||
This is valid Perl code and will actually trigger invocations of the
|
||
@code{gettext} function at runtime. Yet, the Perl parser in
|
||
@code{xgettext} will fail to recognize the strings. A less obvious
|
||
example can be found in the interpolation of regular expressions:
|
||
|
||
@example
|
||
s/<!--START_OF_WEEK-->/gettext ("Sunday")/e;
|
||
@end example
|
||
|
||
The modifier @code{e} will cause the substitution to be interpreted as
|
||
an evaluable statement. Consequently, at runtime the function
|
||
@code{gettext()} is called, but again, the parser fails to extract the
|
||
string ``Sunday''. Use a temporary variable as a simple workaround if
|
||
you really happen to need this feature:
|
||
|
||
@example
|
||
my $sunday = gettext "Sunday";
|
||
s/<!--START_OF_WEEK-->/$sunday/;
|
||
@end example
|
||
|
||
Hash slices would also be handy but are not recognized:
|
||
|
||
@example
|
||
my @@weekdays = @@gettext@{'Sunday', 'Monday', 'Tuesday', 'Wednesday',
|
||
'Thursday', 'Friday', 'Saturday'@};
|
||
# Or even:
|
||
@@weekdays = @@gettext@{qw (Sunday Monday Tuesday Wednesday Thursday
|
||
Friday Saturday) @};
|
||
@end example
|
||
|
||
This is perfectly valid usage of the tied hash @code{%gettext} but the
|
||
strings are not recognized and therefore will not be extracted.
|
||
|
||
Another caveat of the current version is its rudimentary support for
|
||
non-ASCII characters in identifiers. You may encounter serious
|
||
problems if you use identifiers with characters outside the range of
|
||
'A'-'Z', 'a'-'z', '0'-'9' and the underscore '_'.
|
||
|
||
Maybe some of these missing features will be implemented in future
|
||
versions, but since you can always make do without them at minimal effort,
|
||
these todos have very low priority.
|
||
|
||
A nasty problem are brace format strings that already contain braces
|
||
as part of the normal text, for example the usage strings typically
|
||
encountered in programs:
|
||
|
||
@example
|
||
die "usage: $0 @{OPTIONS@} FILENAME...\n";
|
||
@end example
|
||
|
||
If you want to internationalize this code with Perl brace format strings,
|
||
you will run into a problem:
|
||
|
||
@example
|
||
die __x ("usage: @{program@} @{OPTIONS@} FILENAME...\n", program => $0);
|
||
@end example
|
||
|
||
Whereas @samp{@{program@}} is a placeholder, @samp{@{OPTIONS@}}
|
||
is not and should probably be translated. Yet, there is no way to teach
|
||
the Perl parser in @code{xgettext} to recognize the first one, and leave
|
||
the other one alone.
|
||
|
||
There are two possible work-arounds for this problem. If you are
|
||
sure that your program will run under Perl 5.8.0 or newer (these
|
||
Perl versions handle positional parameters in @code{printf()}) or
|
||
if you are sure that the translator will not have to reorder the arguments
|
||
in her translation -- for example if you have only one brace placeholder
|
||
in your string, or if it describes a syntax, like in this one --, you can
|
||
mark the string as @code{no-perl-brace-format} and use @code{printf()}:
|
||
|
||
@example
|
||
# xgettext: no-perl-brace-format
|
||
die sprintf ("usage: %s @{OPTIONS@} FILENAME...\n", $0);
|
||
@end example
|
||
|
||
If you want to use the more portable Perl brace format, you will have to do
|
||
put placeholders in place of the literal braces:
|
||
|
||
@example
|
||
die __x ("usage: @{program@} @{[@}OPTIONS@{]@} FILENAME...\n",
|
||
program => $0, '[' => '@{', ']' => '@}');
|
||
@end example
|
||
|
||
Perl brace format strings know no escaping mechanism. No matter how this
|
||
escaping mechanism looked like, it would either give the programmer a
|
||
hard time, make translating Perl brace format strings heavy-going, or
|
||
result in a performance penalty at runtime, when the format directives
|
||
get executed. Most of the time you will happily get along with
|
||
@code{printf()} for this special case.
|
||
|
||
@node PHP
|
||
@subsection PHP Hypertext Preprocessor
|
||
@cindex PHP
|
||
|
||
@table @asis
|
||
@item RPMs
|
||
mod_php4, mod_php4-core, phpdoc
|
||
|
||
@item Ubuntu packages
|
||
php
|
||
|
||
@item File extension
|
||
@code{php}, @code{php3}, @code{php4}
|
||
|
||
@item String syntax
|
||
@code{"abc"}, @code{'abc'}
|
||
|
||
@item gettext shorthand
|
||
@code{_("abc")}
|
||
|
||
@item gettext/ngettext functions
|
||
@code{gettext}, @code{dgettext}, @code{dcgettext}; starting with PHP 4.2.0
|
||
also @code{ngettext}, @code{dngettext}, @code{dcngettext}
|
||
|
||
@item textdomain
|
||
@code{textdomain} function
|
||
|
||
@item bindtextdomain
|
||
@code{bindtextdomain} function
|
||
|
||
@item setlocale
|
||
Programmer must call @code{setlocale (LC_ALL, "")}
|
||
|
||
@item Prerequisite
|
||
---
|
||
|
||
@item Use or emulate GNU gettext
|
||
use
|
||
|
||
@item Extractor
|
||
@code{xgettext}
|
||
|
||
@item Formatting with positions
|
||
@code{printf "%2\$d %1\$d"}
|
||
|
||
@item Portability
|
||
On platforms without gettext, the functions are not available.
|
||
|
||
@item po-mode marking
|
||
---
|
||
@end table
|
||
|
||
An example is available in the @file{examples} directory: @code{hello-php}.
|
||
|
||
@node Ruby
|
||
@subsection Ruby
|
||
@cindex Ruby
|
||
|
||
@table @asis
|
||
@item RPMs
|
||
ruby, ruby-gettext
|
||
|
||
@item Ubuntu packages
|
||
ruby, ruby-gettext
|
||
|
||
@item File extension
|
||
@code{rb}
|
||
|
||
@item String syntax
|
||
@code{"abc"}, @code{'abc'}, @code{%q/abc/} etc.,
|
||
@code{%q(abc)}, @code{%q[abc]}, @code{%q@{abc@}}
|
||
|
||
@item gettext shorthand
|
||
@code{_("abc")}
|
||
|
||
@item gettext/ngettext functions
|
||
@code{gettext}, @code{ngettext}
|
||
|
||
@item textdomain
|
||
---
|
||
|
||
@item bindtextdomain
|
||
@code{bindtextdomain} function
|
||
|
||
@item setlocale
|
||
---
|
||
|
||
@item Prerequisite
|
||
@code{require 'gettext'}
|
||
@code{include GetText}
|
||
|
||
@item Use or emulate GNU gettext
|
||
emulate
|
||
|
||
@item Extractor
|
||
@code{xgettext}
|
||
|
||
@item Formatting with positions
|
||
@code{sprintf("%2$d %1$d", x, y)}
|
||
@*@code{"%@{new@} replaces %@{old@}" % @{:old => oldvalue, :new => newvalue@}}
|
||
|
||
@item Portability
|
||
fully portable
|
||
|
||
@item po-mode marking
|
||
---
|
||
@end table
|
||
|
||
@c An example is available in the @file{examples} directory: @code{hello-ruby}.
|
||
|
||
@node Pike
|
||
@subsection Pike
|
||
@cindex Pike
|
||
|
||
@table @asis
|
||
@item RPMs
|
||
roxen
|
||
|
||
@item Ubuntu packages
|
||
pike8.0 or pike7.8
|
||
|
||
@item File extension
|
||
@code{pike}
|
||
|
||
@item String syntax
|
||
@code{"abc"}
|
||
|
||
@item gettext shorthand
|
||
---
|
||
|
||
@item gettext/ngettext functions
|
||
@code{gettext}, @code{dgettext}, @code{dcgettext}
|
||
|
||
@item textdomain
|
||
@code{textdomain} function
|
||
|
||
@item bindtextdomain
|
||
@code{bindtextdomain} function
|
||
|
||
@item setlocale
|
||
@code{setlocale} function
|
||
|
||
@item Prerequisite
|
||
@code{import Locale.Gettext;}
|
||
|
||
@item Use or emulate GNU gettext
|
||
use
|
||
|
||
@item Extractor
|
||
---
|
||
|
||
@item Formatting with positions
|
||
---
|
||
|
||
@item Portability
|
||
On platforms without gettext, the functions are not available.
|
||
|
||
@item po-mode marking
|
||
---
|
||
@end table
|
||
|
||
@node GCC-source
|
||
@subsection GNU Compiler Collection sources
|
||
@cindex GCC-source
|
||
|
||
@table @asis
|
||
@item RPMs
|
||
gcc
|
||
|
||
@item Ubuntu packages
|
||
gcc
|
||
|
||
@item File extension
|
||
@code{c}, @code{h}.
|
||
|
||
@item String syntax
|
||
@code{"abc"}
|
||
|
||
@item gettext shorthand
|
||
@code{_("abc")}
|
||
|
||
@item gettext/ngettext functions
|
||
@code{gettext}, @code{dgettext}, @code{dcgettext}, @code{ngettext},
|
||
@code{dngettext}, @code{dcngettext}
|
||
|
||
@item textdomain
|
||
@code{textdomain} function
|
||
|
||
@item bindtextdomain
|
||
@code{bindtextdomain} function
|
||
|
||
@item setlocale
|
||
Programmer must call @code{setlocale (LC_ALL, "")}
|
||
|
||
@item Prerequisite
|
||
@code{#include "intl.h"}
|
||
|
||
@item Use or emulate GNU gettext
|
||
Use
|
||
|
||
@item Extractor
|
||
@code{xgettext -k_}
|
||
|
||
@item Formatting with positions
|
||
---
|
||
|
||
@item Portability
|
||
Uses autoconf macros
|
||
|
||
@item po-mode marking
|
||
yes
|
||
@end table
|
||
|
||
@node Lua
|
||
@subsection Lua
|
||
|
||
@table @asis
|
||
@item RPMs
|
||
lua
|
||
|
||
@item Ubuntu packages
|
||
lua, lua-gettext
|
||
@*
|
||
You need to install the @code{lua-gettext} package from
|
||
@url{https://gitlab.com/sukhichev/lua-gettext/blob/master/README.us.md}.
|
||
Debian and Ubuntu packages of it are available. Download the
|
||
appropriate one, and install it through
|
||
@samp{sudo dpkg -i lua-gettext_0.0_amd64.deb}.
|
||
|
||
@item File extension
|
||
@code{lua}
|
||
|
||
@item String syntax
|
||
@itemize @bullet
|
||
|
||
@item @code{"abc"}
|
||
|
||
@item @code{'abc'}
|
||
|
||
@item @code{[[abc]]}
|
||
|
||
@item @code{[=[abc]=]}
|
||
|
||
@item @code{[==[abc]==]}
|
||
|
||
@item ...
|
||
|
||
@end itemize
|
||
|
||
@item gettext shorthand
|
||
@code{_("abc")}
|
||
|
||
@item gettext/ngettext functions
|
||
@code{gettext.gettext}, @code{gettext.dgettext}, @code{gettext.dcgettext},
|
||
@code{gettext.ngettext}, @code{gettext.dngettext}, @code{gettext.dcngettext}
|
||
|
||
@item textdomain
|
||
@code{textdomain} function
|
||
|
||
@item bindtextdomain
|
||
@code{bindtextdomain} function
|
||
|
||
@item setlocale
|
||
automatic
|
||
|
||
@item Prerequisite
|
||
@code{require 'gettext'} or running lua interpreter with @code{-l gettext} option
|
||
|
||
@item Use or emulate GNU gettext
|
||
use
|
||
|
||
@item Extractor
|
||
@code{xgettext}
|
||
|
||
@item Formatting with positions
|
||
---
|
||
|
||
@item Portability
|
||
On platforms without gettext, the functions are not available.
|
||
|
||
@item po-mode marking
|
||
---
|
||
@end table
|
||
|
||
@node JavaScript
|
||
@subsection JavaScript
|
||
|
||
@table @asis
|
||
@item RPMs
|
||
js
|
||
|
||
@item Ubuntu packages
|
||
gjs
|
||
|
||
@item File extension
|
||
@code{js}
|
||
|
||
@item String syntax
|
||
@itemize @bullet
|
||
|
||
@item @code{"abc"}
|
||
|
||
@item @code{'abc'}
|
||
|
||
@item @code{`abc`}
|
||
|
||
@end itemize
|
||
|
||
@item gettext shorthand
|
||
@code{_("abc")}
|
||
|
||
@item gettext/ngettext functions
|
||
@code{gettext}, @code{dgettext}, @code{dcgettext}, @code{ngettext},
|
||
@code{dngettext}
|
||
|
||
@item textdomain
|
||
@code{textdomain} function
|
||
|
||
@item bindtextdomain
|
||
@code{bindtextdomain} function
|
||
|
||
@item setlocale
|
||
automatic
|
||
|
||
@item Prerequisite
|
||
---
|
||
|
||
@item Use or emulate GNU gettext
|
||
use, or emulate
|
||
|
||
@item Extractor
|
||
@code{xgettext}
|
||
|
||
@item Formatting with positions
|
||
---
|
||
|
||
@item Portability
|
||
On platforms without gettext, the functions are not available.
|
||
|
||
@item po-mode marking
|
||
---
|
||
@end table
|
||
|
||
@node Vala
|
||
@subsection Vala
|
||
|
||
@table @asis
|
||
@item RPMs
|
||
vala
|
||
|
||
@item Ubuntu packages
|
||
valac
|
||
|
||
@item File extension
|
||
@code{vala}
|
||
|
||
@item String syntax
|
||
@itemize @bullet
|
||
|
||
@item @code{"abc"}
|
||
|
||
@item @code{"""abc"""}
|
||
|
||
@end itemize
|
||
|
||
@item gettext shorthand
|
||
@code{_("abc")}
|
||
|
||
@item gettext/ngettext functions
|
||
@code{gettext}, @code{dgettext}, @code{dcgettext}, @code{ngettext},
|
||
@code{dngettext}, @code{dpgettext}, @code{dpgettext2}
|
||
|
||
@item textdomain
|
||
@code{textdomain} function, defined under the @code{Intl} namespace
|
||
|
||
@item bindtextdomain
|
||
@code{bindtextdomain} function, defined under the @code{Intl} namespace
|
||
|
||
@item setlocale
|
||
Programmer must call @code{Intl.setlocale (LocaleCategory.ALL, "")}
|
||
|
||
@item Prerequisite
|
||
---
|
||
|
||
@item Use or emulate GNU gettext
|
||
Use
|
||
|
||
@item Extractor
|
||
@code{xgettext}
|
||
|
||
@item Formatting with positions
|
||
Same as for the C language.
|
||
|
||
@item Portability
|
||
autoconf (gettext.m4) and #if ENABLE_NLS
|
||
|
||
@item po-mode marking
|
||
yes
|
||
@end table
|
||
|
||
@c This is the template for new languages.
|
||
@ignore
|
||
|
||
@ node
|
||
@ subsection
|
||
|
||
@table @asis
|
||
@item RPMs
|
||
|
||
@item Ubuntu packages
|
||
|
||
@item File extension
|
||
|
||
@item String syntax
|
||
|
||
@item gettext shorthand
|
||
|
||
@item gettext/ngettext functions
|
||
|
||
@item textdomain
|
||
|
||
@item bindtextdomain
|
||
|
||
@item setlocale
|
||
|
||
@item Prerequisite
|
||
|
||
@item Use or emulate GNU gettext
|
||
|
||
@item Extractor
|
||
|
||
@item Formatting with positions
|
||
|
||
@item Portability
|
||
|
||
@item po-mode marking
|
||
@end table
|
||
|
||
@end ignore
|
||
|
||
@node Data Formats
|
||
@chapter Other Data Formats
|
||
|
||
While the GNU gettext tools deal mainly with POT and PO files, they can
|
||
also manipulate a couple of other data formats.
|
||
|
||
@menu
|
||
* Internationalizable Data:: Internationalizable Data Formats
|
||
* Localized Data:: Localized Data Formats
|
||
@end menu
|
||
|
||
@node Internationalizable Data
|
||
@section Internationalizable Data Formats
|
||
|
||
Here is a list of other data formats which can be internationalized
|
||
using GNU gettext.
|
||
|
||
@menu
|
||
* POT:: POT - Portable Object Template
|
||
* RST:: Resource String Table
|
||
* Glade:: Glade - GNOME user interface description
|
||
* GSettings:: GSettings - GNOME user configuration schema
|
||
* AppData:: AppData - freedesktop.org application description
|
||
* Preparing ITS Rules:: Preparing Rules for XML Internationalization
|
||
@end menu
|
||
|
||
@node POT
|
||
@subsection POT - Portable Object Template
|
||
|
||
@table @asis
|
||
@item RPMs
|
||
gettext
|
||
|
||
@item Ubuntu packages
|
||
gettext
|
||
|
||
@item File extension
|
||
@code{pot}, @code{po}
|
||
|
||
@item Extractor
|
||
@code{xgettext}
|
||
@end table
|
||
|
||
@node RST
|
||
@subsection Resource String Table
|
||
@cindex RST
|
||
@cindex RSJ
|
||
|
||
RST is the format of resource string table files of the Free Pascal compiler
|
||
versions older than 3.0.0. RSJ is the new format of resource string table
|
||
files, created by the Free Pascal compiler version 3.0.0 or newer.
|
||
|
||
@table @asis
|
||
@item RPMs
|
||
fpk
|
||
|
||
@item Ubuntu packages
|
||
fp-compiler
|
||
|
||
@item File extension
|
||
@code{rst}, @code{rsj}
|
||
|
||
@item Extractor
|
||
@code{xgettext}, @code{rstconv}
|
||
@end table
|
||
|
||
@node Glade
|
||
@subsection Glade - GNOME user interface description
|
||
|
||
@table @asis
|
||
@item RPMs
|
||
glade, libglade, glade2, libglade2, intltool
|
||
|
||
@item Ubuntu packages
|
||
glade, libglade2-dev, intltool
|
||
|
||
@item File extension
|
||
@code{glade}, @code{glade2}, @code{ui}
|
||
|
||
@item Extractor
|
||
@code{xgettext}, @code{libglade-xgettext}, @code{xml-i18n-extract}, @code{intltool-extract}
|
||
@end table
|
||
|
||
@node GSettings
|
||
@subsection GSettings - GNOME user configuration schema
|
||
|
||
@table @asis
|
||
@item RPMs
|
||
glib2
|
||
|
||
@item Ubuntu packages
|
||
libglib2.0-dev
|
||
|
||
@item File extension
|
||
@code{gschema.xml}
|
||
|
||
@item Extractor
|
||
@code{xgettext}, @code{intltool-extract}
|
||
@end table
|
||
|
||
@node AppData
|
||
@subsection AppData - freedesktop.org application description
|
||
|
||
This file format is specified in
|
||
@url{https://www.freedesktop.org/software/appstream/docs/}.
|
||
|
||
@table @asis
|
||
@item RPMs
|
||
appdata-tools, appstream, libappstream-glib, libappstream-glib-builder
|
||
|
||
@item Ubuntu packages
|
||
appdata-tools, appstream, libappstream-glib-dev
|
||
|
||
@item File extension
|
||
@code{appdata.xml}, @code{metainfo.xml}
|
||
|
||
@item Extractor
|
||
@code{xgettext}, @code{intltool-extract}, @code{itstool}
|
||
@end table
|
||
|
||
@node Preparing ITS Rules
|
||
@subsection Preparing Rules for XML Internationalization
|
||
@cindex preparing rules for XML translation
|
||
|
||
Marking translatable strings in an XML file is done through a separate
|
||
"rule" file, making use of the Internationalization Tag Set standard
|
||
(ITS, @uref{https://www.w3.org/TR/its20/}). The currently supported ITS
|
||
data categories are: @samp{Translate}, @samp{Localization Note},
|
||
@samp{Elements Within Text}, and @samp{Preserve Space}. In addition to
|
||
them, @code{xgettext} also recognizes the following extended data
|
||
categories:
|
||
|
||
@table @samp
|
||
@item Context
|
||
|
||
This data category associates @code{msgctxt} to the extracted text. In
|
||
the global rule, the @code{contextRule} element contains the following:
|
||
|
||
@itemize
|
||
@item
|
||
A required @code{selector} attribute. It contains an absolute selector
|
||
that selects the nodes to which this rule applies.
|
||
|
||
@item
|
||
A required @code{contextPointer} attribute that contains a relative
|
||
selector pointing to a node that holds the @code{msgctxt} value.
|
||
|
||
@item
|
||
An optional @code{textPointer} attribute that contains a relative
|
||
selector pointing to a node that holds the @code{msgid} value.
|
||
@end itemize
|
||
|
||
@item Escape Special Characters
|
||
|
||
This data category indicates whether the special XML characters
|
||
(@code{<}, @code{>}, @code{&}, @code{"}) are escaped with entity
|
||
reference. In the global rule, the @code{escapeRule} element contains
|
||
the following:
|
||
|
||
@itemize
|
||
@item
|
||
A required @code{selector} attribute. It contains an absolute selector
|
||
that selects the nodes to which this rule applies.
|
||
|
||
@item
|
||
A required @code{escape} attribute with the value @code{yes} or @code{no}.
|
||
@end itemize
|
||
|
||
@item Extended Preserve Space
|
||
|
||
This data category extends the standard @samp{Preserve Space} data
|
||
category with the additional values @samp{trim} and @samp{paragraph}.
|
||
@samp{trim} means to remove the leading and trailing whitespaces of the
|
||
content, but not to normalize whitespaces in the middle.
|
||
@samp{paragraph} means to normalize the content but keep the paragraph
|
||
boundaries. In the global
|
||
rule, the @code{preserveSpaceRule} element contains the following:
|
||
|
||
@itemize
|
||
@item
|
||
A required @code{selector} attribute. It contains an absolute selector
|
||
that selects the nodes to which this rule applies.
|
||
|
||
@item
|
||
A required @code{space} attribute with the value @code{default},
|
||
@code{preserve}, @code{trim}, or @code{paragraph}.
|
||
@end itemize
|
||
|
||
@end table
|
||
|
||
All those extended data categories can only be expressed with global
|
||
rules, and the rule elements have to have the
|
||
@code{https://www.gnu.org/s/gettext/ns/its/extensions/1.0} namespace.
|
||
|
||
Given the following XML document in a file @file{messages.xml}:
|
||
|
||
@example
|
||
<?xml version="1.0"?>
|
||
<messages>
|
||
<message>
|
||
<p>A translatable string</p>
|
||
</message>
|
||
<message>
|
||
<p translatable="no">A non-translatable string</p>
|
||
</message>
|
||
</messages>
|
||
@end example
|
||
|
||
To extract the first text content ("A translatable string"), but not the
|
||
second ("A non-translatable string"), the following ITS rules can be used:
|
||
|
||
@example
|
||
<?xml version="1.0"?>
|
||
<its:rules xmlns:its="http://www.w3.org/2005/11/its" version="1.0">
|
||
<its:translateRule selector="/messages" translate="no"/>
|
||
<its:translateRule selector="//message/p" translate="yes"/>
|
||
|
||
<!-- If 'p' has an attribute 'translatable' with the value 'no', then
|
||
the content is not translatable. -->
|
||
<its:translateRule selector="//message/p[@@translatable = 'no']"
|
||
translate="no"/>
|
||
</its:rules>
|
||
@end example
|
||
|
||
@samp{xgettext} needs another file called "locating rule" to associate
|
||
an ITS rule with an XML file. If the above ITS file is saved as
|
||
@file{messages.its}, the locating rule would look like:
|
||
|
||
@example
|
||
<?xml version="1.0"?>
|
||
<locatingRules>
|
||
<locatingRule name="Messages" pattern="*.xml">
|
||
<documentRule localName="messages" target="messages.its"/>
|
||
</locatingRule>
|
||
<locatingRule name="Messages" pattern="*.msg" target="messages.its"/>
|
||
</locatingRules>
|
||
@end example
|
||
|
||
The @code{locatingRule} element must have a @code{pattern} attribute,
|
||
which denotes either a literal file name or a wildcard pattern of the
|
||
XML file@footnote{Note that the file name matching is done after
|
||
removing any @code{.in} suffix from the input file name. Thus the
|
||
@code{pattern} attribute must not include a pattern matching @code{.in}.
|
||
For example, if the input file name is @file{foo.msg.in}, the pattern
|
||
should be either @code{*.msg} or just @code{*}, rather than
|
||
@code{*.in}.}. The @code{locatingRule} element can have child
|
||
@code{documentRule} element, which adds checks on the content of the XML
|
||
file.
|
||
|
||
The first rule matches any file with the @file{.xml} file extension, but
|
||
it only applies to XML files whose root element is @samp{<messages>}.
|
||
|
||
The second rule indicates that the same ITS rule file are also
|
||
applicable to any file with the @file{.msg} file extension. The
|
||
optional @code{name} attribute of @code{locatingRule} allows to choose
|
||
rules by name, typically with @code{xgettext}'s @code{-L} option.
|
||
|
||
The associated ITS rule file is indicated by the @code{target} attribute
|
||
of @code{locatingRule} or @code{documentRule}. If it is specified in a
|
||
@code{documentRule} element, the parent @code{locatingRule} shouldn't
|
||
have the @code{target} attribute.
|
||
|
||
Locating rule files must have the @file{.loc} file extension. Both ITS
|
||
rule files and locating rule files must be installed in the
|
||
@file{$prefix/share/gettext/its} directory. Once those files are
|
||
properly installed, @code{xgettext} can extract translatable strings
|
||
from the matching XML files.
|
||
|
||
@subsubsection Two Use-cases of Translated Strings in XML
|
||
|
||
For XML, there are two use-cases of translated strings. One is the case
|
||
where the translated strings are directly consumed by programs, and the
|
||
other is the case where the translated strings are merged back to the
|
||
original XML document. In the former case, special characters in the
|
||
extracted strings shouldn't be escaped, while they should in the latter
|
||
case. To control wheter to escape special characters, the @samp{Escape
|
||
Special Characters} data category can be used.
|
||
|
||
To merge the translations, the @samp{msgfmt} program can be used with
|
||
the option @code{--xml}. @xref{msgfmt Invocation}, for more details
|
||
about how one calls the @samp{msgfmt} program. @samp{msgfmt}'s
|
||
@code{--xml} option doesn't perform character escaping, so translated
|
||
strings can have arbitrary XML constructs, such as elements for markup.
|
||
|
||
@c This is the template for new data formats.
|
||
@ignore
|
||
|
||
@ node
|
||
@ subsection
|
||
|
||
@table @asis
|
||
@item RPMs
|
||
|
||
@item Ubuntu packages
|
||
|
||
@item File extension
|
||
|
||
@item Extractor
|
||
@end table
|
||
|
||
@end ignore
|
||
|
||
@node Localized Data
|
||
@section Localized Data Formats
|
||
|
||
Here is a list of file formats that contain localized data and that the
|
||
GNU gettext tools can manipulate.
|
||
|
||
@menu
|
||
* Editable Message Catalogs:: Editable Message Catalogs
|
||
* Compiled Message Catalogs:: Compiled Message Catalogs
|
||
* Desktop Entry:: Desktop Entry files
|
||
* XML:: XML files
|
||
@end menu
|
||
|
||
@node Editable Message Catalogs
|
||
@subsection Editable Message Catalogs
|
||
|
||
These file formats can be used with all of the @code{msg*} tools and with
|
||
the @code{xgettext} program.
|
||
|
||
If you just want to convert among these formats, you can use the
|
||
@code{msgcat} program (with the appropriate option) or the @code{xgettext}
|
||
program.
|
||
|
||
@menu
|
||
* PO:: PO - Portable Object
|
||
* Java .properties:: Java .properties
|
||
* GNUstep .strings:: NeXTstep/GNUstep .strings
|
||
@end menu
|
||
|
||
@node PO
|
||
@subsubsection PO - Portable Object
|
||
|
||
@table @asis
|
||
@item File extension
|
||
@code{po}
|
||
@end table
|
||
|
||
@node Java .properties
|
||
@subsubsection Java .properties
|
||
|
||
@table @asis
|
||
@item File extension
|
||
@code{properties}
|
||
@end table
|
||
|
||
@node GNUstep .strings
|
||
@subsubsection NeXTstep/GNUstep .strings
|
||
|
||
@table @asis
|
||
@item File extension
|
||
@code{strings}
|
||
@end table
|
||
|
||
@node Compiled Message Catalogs
|
||
@subsection Compiled Message Catalogs
|
||
|
||
These file formats can be created through @code{msgfmt} and converted back
|
||
to PO format through @code{msgunfmt}.
|
||
|
||
@menu
|
||
* MO:: MO - Machine Object
|
||
* Java ResourceBundle:: Java ResourceBundle
|
||
* C# Satellite Assembly:: C# Satellite Assembly
|
||
* C# Resource:: C# Resource
|
||
* Tcl message catalog:: Tcl message catalog
|
||
* Qt message catalog:: Qt message catalog
|
||
@end menu
|
||
|
||
@node MO
|
||
@subsubsection MO - Machine Object
|
||
|
||
@table @asis
|
||
@item File extension
|
||
@code{mo}
|
||
@end table
|
||
|
||
See section @ref{MO Files} for details.
|
||
|
||
@node Java ResourceBundle
|
||
@subsubsection Java ResourceBundle
|
||
|
||
@table @asis
|
||
@item File extension
|
||
@code{class}
|
||
@end table
|
||
|
||
For more information, see the section @ref{Java} and the examples
|
||
@code{hello-java}, @code{hello-java-awt}, @code{hello-java-swing}.
|
||
|
||
@node C# Satellite Assembly
|
||
@subsubsection C# Satellite Assembly
|
||
|
||
@table @asis
|
||
@item File extension
|
||
@code{dll}
|
||
@end table
|
||
|
||
For more information, see the section @ref{C#}.
|
||
|
||
@node C# Resource
|
||
@subsubsection C# Resource
|
||
|
||
@table @asis
|
||
@item File extension
|
||
@code{resources}
|
||
@end table
|
||
|
||
For more information, see the section @ref{C#}.
|
||
|
||
@node Tcl message catalog
|
||
@subsubsection Tcl message catalog
|
||
|
||
@table @asis
|
||
@item File extension
|
||
@code{msg}
|
||
@end table
|
||
|
||
For more information, see the section @ref{Tcl} and the examples
|
||
@code{hello-tcl}, @code{hello-tcl-tk}.
|
||
|
||
@node Qt message catalog
|
||
@subsubsection Qt message catalog
|
||
|
||
@table @asis
|
||
@item File extension
|
||
@code{qm}
|
||
@end table
|
||
|
||
For more information, see the examples @code{hello-c++-qt} and
|
||
@code{hello-c++-kde}.
|
||
|
||
@node Desktop Entry
|
||
@subsection Desktop Entry files
|
||
|
||
The programmer produces a desktop entry file template with only the
|
||
English strings. These strings get included in the POT file, by way of
|
||
@code{xgettext} (usually by listing the template in @code{po/POTFILES.in}).
|
||
The translators produce PO files, one for each language. Finally, an
|
||
@code{msgfmt --desktop} invocation collects all the translations in the
|
||
desktop entry file.
|
||
|
||
For more information, see the example @code{hello-c-gnome3}.
|
||
|
||
@menu
|
||
* Icons:: Handling icons
|
||
@end menu
|
||
|
||
@node Icons
|
||
@subsubsection How to handle icons in Desktop Entry files
|
||
|
||
Icons are generally locale dependent, for the following reasons:
|
||
|
||
@itemize @bullet
|
||
@item
|
||
Icons may contain signs that are considered rude in some cultures. For
|
||
example, the high-five sign, in some cultures, is perceived as an
|
||
unfriendly ``stop'' sign.
|
||
@item
|
||
Icons may contain metaphors that are culture specific. For example, a
|
||
mailbox in the U.S. looks different than mailboxes all around the world.
|
||
@item
|
||
Icons may need to be mirrored for right-to-left locales.
|
||
@item
|
||
Icons may contain text strings (a bad practice, but anyway).
|
||
@end itemize
|
||
|
||
However, icons are not covered by GNU gettext localization, because
|
||
@itemize @bullet
|
||
@item
|
||
Icons cannot be easily embedded in PO files,
|
||
@item
|
||
The need to localize an icon is rare, and the ability to do so in a PO
|
||
file would introduce translator mistakes.
|
||
@c https://lists.freedesktop.org/archives/xdg/2019-June/014168.html
|
||
@end itemize
|
||
|
||
Desktop Entry files may contain an @samp{Icon} property, and this
|
||
property is localizable. If a translator wishes to localize an icon,
|
||
she should do so by bypassing the normal workflow with PO files:
|
||
@enumerate
|
||
@item
|
||
The translator contacts the package developers directly, sending them
|
||
the icon appropriate for her locale, with a request to change the
|
||
template file.
|
||
@item
|
||
The package developers add the icon file to their repository, and a
|
||
line
|
||
@smallexample
|
||
Icon[@var{locale}]=@var{icon_file_name}
|
||
@end smallexample
|
||
@noindent
|
||
to the template file.
|
||
@end enumerate
|
||
@noindent
|
||
This line remains in place when this template file is merged with the
|
||
translators' PO files, through @code{msgfmt}.
|
||
|
||
@node XML
|
||
@subsection XML files
|
||
|
||
See the section @ref{Preparing ITS Rules} and
|
||
@ref{msgfmt Invocation}, subsection ``XML mode operations''.
|
||
|
||
@node Conclusion
|
||
@chapter Concluding Remarks
|
||
|
||
We would like to conclude this GNU @code{gettext} manual by presenting
|
||
an history of the Translation Project so far. We finally give
|
||
a few pointers for those who want to do further research or readings
|
||
about Native Language Support matters.
|
||
|
||
@menu
|
||
* History:: History of GNU @code{gettext}
|
||
* The original ABOUT-NLS:: Historical introduction
|
||
* References:: Related Readings
|
||
@end menu
|
||
|
||
@node History
|
||
@section History of GNU @code{gettext}
|
||
@cindex history of GNU @code{gettext}
|
||
|
||
Internationalization concerns and algorithms have been informally
|
||
and casually discussed for years in GNU, sometimes around GNU
|
||
@code{libc}, maybe around the incoming @code{Hurd}, or otherwise
|
||
(nobody clearly remembers). And even then, when the work started for
|
||
real, this was somewhat independently of these previous discussions.
|
||
|
||
This all began in July 1994, when Patrick D'Cruze had the idea and
|
||
initiative of internationalizing version 3.9.2 of GNU @code{fileutils}.
|
||
He then asked Jim Meyering, the maintainer, how to get those changes
|
||
folded into an official release. That first draft was full of
|
||
@code{#ifdef}s and somewhat disconcerting, and Jim wanted to find
|
||
nicer ways. Patrick and Jim shared some tries and experimentations
|
||
in this area. Then, feeling that this might eventually have a deeper
|
||
impact on GNU, Jim wanted to know what standards were, and contacted
|
||
Richard Stallman, who very quickly and verbally described an overall
|
||
design for what was meant to become @code{glocale}, at that time.
|
||
|
||
Jim implemented @code{glocale} and got a lot of exhausting feedback
|
||
from Patrick and Richard, of course, but also from Mitchum DSouza
|
||
(who wrote a @code{catgets}-like package), Roland McGrath, maybe David
|
||
MacKenzie, Fran@,{c}ois Pinard, and Paul Eggert, all pushing and
|
||
pulling in various directions, not always compatible, to the extent
|
||
that after a couple of test releases, @code{glocale} was torn apart.
|
||
In particular, Paul Eggert -- always keeping an eye on developments
|
||
in Solaris -- advocated the use of the @code{gettext} API over
|
||
@code{glocale}'s @code{catgets}-based API.
|
||
|
||
While Jim took some distance and time and became dad for a second
|
||
time, Roland wanted to get GNU @code{libc} internationalized, and
|
||
got Ulrich Drepper involved in that project. Instead of starting
|
||
from @code{glocale}, Ulrich rewrote something from scratch, but
|
||
more conforming to the set of guidelines who emerged out of the
|
||
@code{glocale} effort. Then, Ulrich got people from the previous
|
||
forum to involve themselves into this new project, and the switch
|
||
from @code{glocale} to what was first named @code{msgutils}, renamed
|
||
@code{nlsutils}, and later @code{gettext}, became officially accepted
|
||
by Richard in May 1995 or so.
|
||
|
||
Let's summarize by saying that Ulrich Drepper wrote GNU @code{gettext}
|
||
in April 1995. The first official release of the package, including
|
||
PO mode, occurred in July 1995, and was numbered 0.7. Other people
|
||
contributed to the effort by providing a discussion forum around
|
||
Ulrich, writing little pieces of code, or testing. These are quoted
|
||
in the @code{THANKS} file which comes with the GNU @code{gettext}
|
||
distribution.
|
||
|
||
While this was being done, Fran@,{c}ois adapted half a dozen of
|
||
GNU packages to @code{glocale} first, then later to @code{gettext},
|
||
putting them in pretest, so providing along the way an effective
|
||
user environment for fine tuning the evolving tools. He also took
|
||
the responsibility of organizing and coordinating the Translation
|
||
Project. After nearly a year of informal exchanges between people from
|
||
many countries, translator teams started to exist in May 1995, through
|
||
the creation and support by Patrick D'Cruze of twenty unmoderated
|
||
mailing lists for that many native languages, and two moderated
|
||
lists: one for reaching all teams at once, the other for reaching
|
||
all willing maintainers of internationalized free software packages.
|
||
|
||
Fran@,{c}ois also wrote PO mode in June 1995 with the collaboration
|
||
of Greg McGary, as a kind of contribution to Ulrich's package.
|
||
He also gave a hand with the GNU @code{gettext} Texinfo manual.
|
||
|
||
In 1997, Ulrich Drepper released the GNU libc 2.0, which included the
|
||
@code{gettext}, @code{textdomain} and @code{bindtextdomain} functions.
|
||
|
||
In 2000, Ulrich Drepper added plural form handling (the @code{ngettext}
|
||
function) to GNU libc. Later, in 2001, he released GNU libc 2.2.x,
|
||
which is the first free C library with full internationalization support.
|
||
|
||
Ulrich being quite busy in his role of General Maintainer of GNU libc,
|
||
he handed over the GNU @code{gettext} maintenance to Bruno Haible in
|
||
2000. Bruno added the plural form handling to the tools as well, added
|
||
support for UTF-8 and CJK locales, and wrote a few new tools for
|
||
manipulating PO files.
|
||
|
||
@include nls.texi
|
||
|
||
@node References
|
||
@section Related Readings
|
||
@cindex related reading
|
||
@cindex bibliography
|
||
|
||
@strong{ NOTE: } This documentation section is outdated and needs to be
|
||
revised.
|
||
|
||
Eugene H. Dorr (@file{dorre@@well.com}) maintains an interesting
|
||
bibliography on internationalization matters, called
|
||
@cite{Internationalization Reference List}, which is available as:
|
||
@example
|
||
ftp://ftp.ora.com/pub/examples/nutshell/ujip/doc/i18n-books.txt
|
||
@end example
|
||
|
||
Michael Gschwind (@file{mike@@vlsivie.tuwien.ac.at}) maintains a
|
||
Frequently Asked Questions (FAQ) list, entitled @cite{Programming for
|
||
Internationalisation}. This FAQ discusses writing programs which
|
||
can handle different language conventions, character sets, etc.;
|
||
and is applicable to all character set encodings, with particular
|
||
emphasis on @w{ISO 8859-1}. It is regularly published in Usenet
|
||
groups @file{comp.unix.questions}, @file{comp.std.internat},
|
||
@file{comp.software.international}, @file{comp.lang.c},
|
||
@file{comp.windows.x}, @file{comp.std.c}, @file{comp.answers}
|
||
and @file{news.answers}. The home location of this document is:
|
||
@example
|
||
ftp://ftp.vlsivie.tuwien.ac.at/pub/8bit/ISO-programming
|
||
@end example
|
||
|
||
Patrick D'Cruze (@file{pdcruze@@li.org}) wrote a tutorial about NLS
|
||
matters, and Jochen Hein (@file{Hein@@student.tu-clausthal.de}) took
|
||
over the responsibility of maintaining it. It may be found as:
|
||
@example
|
||
ftp://sunsite.unc.edu/pub/Linux/utils/nls/catalogs/Incoming/...
|
||
...locale-tutorial-0.8.txt.gz
|
||
@end example
|
||
@noindent
|
||
This site is mirrored in:
|
||
@example
|
||
ftp://ftp.ibp.fr/pub/linux/sunsite/
|
||
@end example
|
||
|
||
A French version of the same tutorial should be findable at:
|
||
@example
|
||
ftp://ftp.ibp.fr/pub/linux/french/docs/
|
||
@end example
|
||
@noindent
|
||
together with French translations of many Linux-related documents.
|
||
|
||
@node Language Codes
|
||
@appendix Language Codes
|
||
@cindex language codes
|
||
@cindex ISO 639
|
||
|
||
The @w{ISO 639} standard defines two-letter codes for many languages, and
|
||
three-letter codes for more rarely used languages.
|
||
All abbreviations for languages used in the Translation Project should
|
||
come from this standard.
|
||
|
||
@menu
|
||
* Usual Language Codes:: Two-letter ISO 639 language codes
|
||
* Rare Language Codes:: Three-letter ISO 639 language codes
|
||
@end menu
|
||
|
||
@node Usual Language Codes
|
||
@appendixsec Usual Language Codes
|
||
|
||
For the commonly used languages, the @w{ISO 639-1} standard defines two-letter
|
||
codes.
|
||
|
||
@table @samp
|
||
@include iso-639.texi
|
||
@end table
|
||
|
||
@node Rare Language Codes
|
||
@appendixsec Rare Language Codes
|
||
|
||
For rarely used languages, the @w{ISO 639-2} standard defines three-letter
|
||
codes. Here is the current list, reduced to only living languages with at least
|
||
one million of speakers.
|
||
|
||
@table @samp
|
||
@include iso-639-2.texi
|
||
@end table
|
||
|
||
@node Country Codes
|
||
@appendix Country Codes
|
||
@cindex country codes
|
||
@cindex ISO 3166
|
||
|
||
The @w{ISO 3166} standard defines two character codes for many countries
|
||
and territories. All abbreviations for countries used in the Translation
|
||
Project should come from this standard.
|
||
|
||
@table @samp
|
||
@include iso-3166.texi
|
||
@end table
|
||
|
||
@node Licenses
|
||
@appendix Licenses
|
||
@cindex Licenses
|
||
|
||
The files of this package are covered by the licenses indicated in each
|
||
particular file or directory. Here is a summary:
|
||
|
||
@itemize @bullet
|
||
@item
|
||
The @code{libintl} and @code{libasprintf} libraries are covered by the
|
||
GNU Lesser General Public License (LGPL).
|
||
A copy of the license is included in @ref{GNU LGPL}.
|
||
|
||
@item
|
||
The executable programs of this package and the @code{libgettextpo} library
|
||
are covered by the GNU General Public License (GPL).
|
||
A copy of the license is included in @ref{GNU GPL}.
|
||
|
||
@item
|
||
This manual is free documentation. It is dually licensed under the
|
||
GNU FDL and the GNU GPL. This means that you can redistribute this
|
||
manual under either of these two licenses, at your choice.
|
||
@*
|
||
This manual is covered by the GNU FDL. Permission is granted to copy,
|
||
distribute and/or modify this document under the terms of the
|
||
GNU Free Documentation License (FDL), either version 1.2 of the
|
||
License, or (at your option) any later version published by the
|
||
Free Software Foundation (FSF); with no Invariant Sections, with no
|
||
Front-Cover Text, and with no Back-Cover Texts.
|
||
A copy of the license is included in @ref{GNU FDL}.
|
||
@*
|
||
This manual is covered by the GNU GPL. You can redistribute it and/or
|
||
modify it under the terms of the GNU General Public License (GPL), either
|
||
version 2 of the License, or (at your option) any later version published
|
||
by the Free Software Foundation (FSF).
|
||
A copy of the license is included in @ref{GNU GPL}.
|
||
@end itemize
|
||
|
||
@menu
|
||
* GNU GPL:: GNU General Public License
|
||
* GNU LGPL:: GNU Lesser General Public License
|
||
* GNU FDL:: GNU Free Documentation License
|
||
@end menu
|
||
|
||
@page
|
||
@node GNU GPL
|
||
@appendixsec GNU GENERAL PUBLIC LICENSE
|
||
@cindex GPL, GNU General Public License
|
||
@cindex License, GNU GPL
|
||
@include gpl.texi
|
||
@page
|
||
@node GNU LGPL
|
||
@appendixsec GNU LESSER GENERAL PUBLIC LICENSE
|
||
@cindex LGPL, GNU Lesser General Public License
|
||
@cindex License, GNU LGPL
|
||
@include lgpl.texi
|
||
@page
|
||
@node GNU FDL
|
||
@appendixsec GNU Free Documentation License
|
||
@cindex FDL, GNU Free Documentation License
|
||
@cindex License, GNU FDL
|
||
@include fdl.texi
|
||
|
||
@node Program Index
|
||
@unnumbered Program Index
|
||
|
||
@printindex pg
|
||
|
||
@node Option Index
|
||
@unnumbered Option Index
|
||
|
||
@printindex op
|
||
|
||
@node Variable Index
|
||
@unnumbered Variable Index
|
||
|
||
@printindex vr
|
||
|
||
@node PO Mode Index
|
||
@unnumbered PO Mode Index
|
||
|
||
@printindex em
|
||
|
||
@node Autoconf Macro Index
|
||
@unnumbered Autoconf Macro Index
|
||
|
||
@printindex am
|
||
|
||
@node Index
|
||
@unnumbered General Index
|
||
|
||
@printindex cp
|
||
|
||
@bye
|
||
|
||
@c Local variables:
|
||
@c texinfo-column-for-description: 32
|
||
@c End:
|