i18n: Implement po file parsing in create_translations tool

Until now the parsing was done by the po2c perl script, which
generated a messages.h file. The create_translations executable
had then to be recompiled before being executed. This commit
removes the po2c perl script. The parsing is now directly done by
the create_translations tool.

The parsing has also been extended to support the msgctxt strings.
This is not dumped yet in translations.dat but will be once I have
finished implementing context support for the translations.

svn-id: r52284
This commit is contained in:
Thierry Crozat 2010-08-22 18:49:19 +00:00
parent 66f7e4306c
commit c89462c2a6
8 changed files with 480 additions and 2893 deletions

View File

@ -35,10 +35,8 @@ updatepot:
#$(srcdir)/common/messages.cpp: $(POFILES)
# perl $(srcdir)/tools/po2c $^ > $(srcdir)/common/messages.cpp
translations-dat:
perl $(srcdir)/tools/create_translations/po2c $(POFILES) > $(srcdir)/tools/create_translations/messages.h
make tools/create_translations
tools/create_translations/create_translations
translations-dat: tools/create_translations
tools/create_translations/create_translations $(POFILES)
mv translations.dat gui/themes/
update-translations: updatepot $(POFILES) translations-dat

View File

@ -33,11 +33,7 @@
#endif // main
#include "create_translations.h"
// Include messages
// This file is generated from the po files by the script po2c:
// tools/create_translations/po2c po/*.po > tools/create_translations/messages.h
#include "messages.h"
#include "po_parser.h"
#define TRANSLATIONS_DAT_VER 1 // 1 byte
@ -58,6 +54,8 @@ void writeUint16BE(FILE *fp, uint16 value) {
int stringSize(const char* string) {
// Each string is preceded by its size coded on 2 bytes
if (string == NULL)
return 2;
int len = strlen(string) + 1;
return 2 + len;
// The two lines below are an example if we want to align string writes
@ -67,6 +65,10 @@ int stringSize(const char* string) {
void writeString(FILE *fp, const char* string) {
// Each string is preceded by its size coded on 2 bytes
if (string == NULL) {
writeUint16BE(fp, 0);
return;
}
int len = strlen(string) + 1;
writeUint16BE(fp, len);
fwrite(string, len, 1, fp);
@ -78,22 +80,20 @@ void writeString(FILE *fp, const char* string) {
// fwrite(padBuf, pad, 1, fp);
}
int translationArraySize(const PoMessageEntry *msgs) {
// ARRAYSIZE() macro does not work on _translations[index].msgs
// We rely on the fact that the item of the array has an id of 1 instead.
int size = 0;
while (msgs[size].msgid != -1) {
size++;
}
return size;
}
// Main
int main(int argc, char *argv[]) {
// Build the translation list
PoMessageList messageIds;
PoMessageEntryList** translations = new PoMessageEntryList*[argc - 1];
int numLangs = 0;
for (int i = 1 ; i < argc ; ++i) {
translations[numLangs] = parsePoFile(argv[i], messageIds);
if (translations[numLangs] != NULL)
++numLangs;
}
FILE* outFile;
int numLangs = ARRAYSIZE(_translations) - 1;
int numMessages = ARRAYSIZE(_messageIds) - 1;
int i, lang, nb;
int i, lang;
int len;
// Padding buffer initialization (filled with 0)
@ -112,9 +112,9 @@ int main(int argc, char *argv[]) {
writeUint16BE(outFile, numLangs);
// Write the length of each data block here.
// We could write it at the start of each block but that would mean than
// We could write it at the start of each block but that would mean that
// to go to block 4 we would have to go at the start of each preceding block,
// read it size and skip it until we arrive at the block we want.
// read its size and skip it until we arrive at the block we want.
// By having all the sizes at the start we just need to read the start of the
// file and can then skip to the block we want.
// Blocks are:
@ -128,8 +128,8 @@ int main(int argc, char *argv[]) {
// Each description
len = 0;
for (lang = 0; lang < numLangs; lang++) {
len += stringSize(_translations[lang].lang);
len += stringSize(_translations[lang].langname);
len += stringSize(translations[lang]->language());
len += stringSize(translations[lang]->languageName());
}
writeUint16BE(outFile, len);
@ -137,8 +137,8 @@ int main(int argc, char *argv[]) {
// It starts with the number of strings coded on 2 bytes followed by each
// string (two bytes for the number of chars and the string itself).
len = 2;
for (i = 0; i < numMessages ; ++i)
len += stringSize(_messageIds[i]);
for (i = 0; i < messageIds.size() ; ++i)
len += stringSize(messageIds[i]);
writeUint16BE(outFile, len);
// Then comes the size of each translation block.
@ -146,37 +146,40 @@ int main(int argc, char *argv[]) {
// For each string we have the string id (on two bytes) followed by
// the string size (two bytes for the number of chars and the string itself).
for (lang = 0; lang < numLangs; lang++) {
len = 2 + stringSize(_translations[lang].charset);
nb = translationArraySize(_translations[lang].msgs);
for (i = 0; i < nb ; ++i)
len += 2 + stringSize(_translations[lang].msgs[i].msgstr);
len = 2 + stringSize(translations[lang]->charset());
for (i = 0; i < translations[lang]->size() ; ++i)
len += 2 + stringSize(translations[lang]->entry(i)->msgstr);
writeUint16BE(outFile, len);
}
// Write list of languages
for (lang = 0; lang < numLangs; lang++) {
writeString(outFile, _translations[lang].lang);
writeString(outFile, _translations[lang].langname);
writeString(outFile, translations[lang]->language());
writeString(outFile, translations[lang]->languageName());
}
// Write original messages
writeUint16BE(outFile, numMessages);
for (i = 0; i < numMessages ; ++i) {
writeString(outFile, _messageIds[i]);
writeUint16BE(outFile, messageIds.size());
for (i = 0; i < messageIds.size() ; ++i) {
writeString(outFile, messageIds[i]);
}
// Write translations
for (lang = 0; lang < numLangs; lang++) {
nb = translationArraySize(_translations[lang].msgs);
writeUint16BE(outFile, nb);
writeString(outFile, _translations[lang].charset);
for (i = 0; i < nb ; ++i) {
writeUint16BE(outFile, _translations[lang].msgs[i].msgid);
writeString(outFile, _translations[lang].msgs[i].msgstr);
writeUint16BE(outFile, translations[lang]->size());
writeString(outFile, translations[lang]->charset());
for (i = 0; i < translations[lang]->size() ; ++i) {
writeUint16BE(outFile, messageIds.findIndex(translations[lang]->entry(i)->msgid));
writeString(outFile, translations[lang]->entry(i)->msgstr);
}
}
fclose(outFile);
// Clean the memory
for (i = 0 ; i < numLangs ; ++i)
delete translations[i];
delete [] translations;
return 0;
}

View File

@ -23,8 +23,6 @@
#ifndef CREATE_TRANSLATIONS_H
#define CREATE_TRANSLATIONS_H
#define ARRAYSIZE(x) ((int)(sizeof(x) / sizeof(x[0])))
typedef unsigned char uint8;
typedef unsigned short uint16;
typedef signed short int16;

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,7 @@
MODULE := tools/create_translations
MODULE_OBJS := \
po_parser.o \
create_translations.o
# Set the name of the executable

View File

@ -1,190 +0,0 @@
#!/usr/bin/perl
#
# po2c - Converts .po files to C code
#
# Copyright (C) 2004 Angel Ortega <angel@triptico.com>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
# http://www.triptico.com
#
# This program has been modified to suit the needs of the ScummVM project.
#
$VERSION = "1.0.2-scummvm";
if(scalar(@ARGV) == 0)
{
print "Usage: po2c {po file[s]}\n";
exit 1;
}
%msgs = ();
%msgids = ();
# stage 1: loading
# arguments are .po files
foreach my $f (@ARGV)
{
my ($lang);
my ($langDesc);
next unless(($lang) = ($f =~ /([^\/]+)\.po$/));
if(open F, $f)
{
my ($msgid, $val, %a);
while(<F>)
{
chomp;
# ignore blank lines or comments
next if /^$/ or /^#/;
if(/^msgid\s+\"(.*)\"\s*$/)
{
# store previous msgid
if(defined($msgid))
{
$a{$msgid} = $val;
$msgids{$msgid} ++;
}
# start of msgid
$val = $1;
}
elsif(/^msgstr\s+\"(.*)\"\s*$/)
{
# store previous msgid
$msgid = $val;
# start of msgstr
$val = $1;
}
elsif(/^\"(.*)\"\s*$/)
{
# add to current value
$val .= $1;
}
}
# store previous msgid
if(defined($msgid))
{
$a{$msgid} = $val;
$msgids{$msgid} ++;
}
close F;
# add to the global message pool
$msgs{$lang} = \%a;
}
}
# stage 2: convert the data
# stores all sorted msgids into @msgids
@msgids = sort(keys(%msgids));
# travels again, storing indexes into %msgids
for(my $n = 0;$n < scalar(@msgids);$n++)
{
$msgids{$msgids[$n]} = $n;
}
# stage 3: dump as C++ code
print "// generated by po2c $VERSION - Do not modify\n\n";
# dump first the msgid array
print "const char * const _messageIds[] = {\n";
for(my $n = 0;$n < scalar(@msgids);$n++)
{
print "\t/* $n */ \"" . $msgids[$n] . "\",\n";
}
print "\tNULL\n};\n\n";
# dump the lang structure
print "struct PoMessageEntry {\n";
print "\tint msgid;\n";
print "\tconst char *msgstr;\n";
print "};\n\n";
# dump now each language
foreach my $l (keys(%msgs))
{
print "const PoMessageEntry _translation_${l}\[\] = {\n";
# get the translation table for the language $l
my ($m) = $msgs{$l};
# while (my ($msgstr, $msgid) = each (%$m))
foreach my $msgid (sort(keys(%$m)))
{
my ($msgstr) = "";
# make it 7-bit safe
foreach $c (split(//, $m->{$msgid})) {
if (ord($c) > 0x7f) {
$msgstr .= sprintf("\\%o", ord($c));
} else {
$msgstr .= $c;
}
}
print "\t{ " . $msgids{$msgid} . ", \"" . $msgstr . "\" },\n"
if $msgstr;
}
print "\t{ -1, NULL }\n};\n\n";
}
# finally, dump the languages
print "struct PoLangEntry {\n";
print "\tconst char *lang;\n";
print "\tconst char *charset;\n";
print "\tconst char *langname;\n";
print "\tconst PoMessageEntry *msgs;\n";
print "};\n\n";
print "const PoLangEntry _translations[] = {\n";
foreach my $l (keys(%msgs))
{
# charset
$header = $msgs{$l}->{""};
$header =~ /charset=([^\\]+)/;
$charset = $1;
# user readable language name
$lang = $l;
$header = $msgs{$l}->{""};
$header =~ /Language:[\s]*([^\\]*)/;
unless ($1 eq "")
{
$lang = $1;
}
print "\t{ \"" . $l . "\", \"" . $charset . "\", \"" . $lang . "\", _translation_${l} },\n";
}
print "\t{ NULL, NULL, NULL, NULL }\n};\n\n";
exit 0;

View File

@ -0,0 +1,325 @@
/* ScummVM - Graphic Adventure Engine
*
* ScummVM is the legal property of its developers, whose names
* are too numerous to list here. Please refer to the COPYRIGHT
* file distributed with this source distribution.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* This is a utility for create the translations.dat file from all the po files.
* The generated files is used by ScummVM to propose translation of its GUI.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "po_parser.h"
PoMessageList::PoMessageList() : _messages(NULL), _size(0), _allocated(0) {
}
PoMessageList::~PoMessageList() {
for (int i = 0 ; i < _size ; ++i)
delete [] _messages[i];
delete [] _messages;
}
void PoMessageList::insert(const char *msg) {
if (msg == NULL || *msg == '\0')
return;
// binary-search for the insertion index
int leftIndex = 0;
int rightIndex = _size - 1;
while (rightIndex >= leftIndex) {
int midIndex = (leftIndex + rightIndex) / 2;
int compareResult = strcmp(msg, _messages[midIndex]);
if (compareResult == 0)
return;
else if (compareResult < 0)
rightIndex = midIndex - 1;
else
leftIndex = midIndex + 1;
}
// We now have rightIndex = leftIndex - 1 and we need to insert the new message
// between the two (i.a. at leftIndex).
if (_size + 1 > _allocated) {
_allocated += 100;
char** newMessages = new char*[_allocated];
for (int i = 0 ; i < leftIndex ; ++i)
newMessages[i] = _messages[i];
for (int i = leftIndex ; i < _size ; ++i)
newMessages[i+1] = _messages[i];
delete [] _messages;
_messages = newMessages;
} else {
for (int i = _size - 1 ; i >= leftIndex ; --i)
_messages[i+1] = _messages[i];
}
_messages[leftIndex] = new char[1+strlen(msg)];
strcpy(_messages[leftIndex], msg);
++_size;
}
int PoMessageList::findIndex(const char *msg) {
if (msg == NULL || *msg == '\0')
return -1;
// binary-search for the message
int leftIndex = 0;
int rightIndex = _size - 1;
while (rightIndex >= leftIndex) {
const int midIndex = (leftIndex + rightIndex) / 2;
const int compareResult = strcmp(msg, _messages[midIndex]);
if (compareResult == 0)
return midIndex;
else if (compareResult < 0)
rightIndex = midIndex - 1;
else
leftIndex = midIndex + 1;
}
return -1;
}
int PoMessageList::size() const {
return _size;
}
const char* PoMessageList::operator[](int index) const {
if (index < 0 || index >= _size)
return NULL;
return _messages[index];
}
PoMessageEntryList::PoMessageEntryList(const char* lang) :
_lang(NULL), _charset(NULL), _langName(NULL),
_list(NULL), _size(0), _allocated(0)
{
_lang = new char[1 + strlen(lang)];
strcpy(_lang, lang);
// Set default charset to empty string
_charset = new char[1];
_charset[0] = '\0';
// Set default langName to lang
_langName = new char[1 + strlen(lang)];
strcpy(_langName, lang);
}
PoMessageEntryList::~PoMessageEntryList() {
delete [] _lang;
delete [] _charset;
delete [] _langName;
for (int i = 0 ; i < _size ; ++i)
delete _list[i];
delete [] _list;
}
void PoMessageEntryList::addMessageEntry(const char *translation, const char *message, const char *context) {
if (*message == '\0') {
// This is the header.
// We get the charset and the language name from the translation string
char* str = parseLine(translation, "Language:");
if (str != NULL) {
delete [] _langName;
_langName = str;
}
str = parseLine(translation, "charset=");
if (str != NULL) {
delete [] _charset;
_charset = str;
}
return;
}
// binary-search for the insertion index
int leftIndex = 0;
int rightIndex = _size - 1;
while (rightIndex >= leftIndex) {
int midIndex = (leftIndex + rightIndex) / 2;
int compareResult = strcmp(message, _list[midIndex]->msgid);
if (compareResult == 0) {
if (context == NULL) {
if (_list[midIndex]->msgctxt == NULL)
return;
compareResult = -1;
} else {
if (_list[midIndex]->msgctxt == NULL)
compareResult = 1;
else {
compareResult = strcmp(context, _list[midIndex]->msgctxt);
if (compareResult == 0)
return;
}
}
}
if (compareResult < 0)
rightIndex = midIndex - 1;
else
leftIndex = midIndex + 1;
}
// We now have rightIndex = leftIndex - 1 and we need to insert the new message
// between the two (i.a. at leftIndex).
if (_size + 1 > _allocated) {
_allocated += 100;
PoMessageEntry** newList = new PoMessageEntry*[_allocated];
for (int i = 0 ; i < leftIndex ; ++i)
newList[i] = _list[i];
for (int i = leftIndex ; i < _size ; ++i)
newList[i+1] = _list[i];
delete [] _list;
_list = newList;
} else {
for (int i = _size - 1 ; i >= leftIndex ; --i)
_list[i+1] = _list[i];
}
_list[leftIndex] = new PoMessageEntry(translation, message, context);
++_size;
}
const char* PoMessageEntryList::language() const {
return _lang;
}
const char* PoMessageEntryList::languageName() const {
return _langName;
}
const char* PoMessageEntryList::charset() const {
return _charset;
}
int PoMessageEntryList::size() const {
return _size;
}
const PoMessageEntry* PoMessageEntryList::entry(int index) const {
if (index < 0 || index >= _size)
return NULL;
return _list[index];
}
PoMessageEntryList* parsePoFile(const char* file, PoMessageList& messages) {
FILE* inFile = fopen(file, "r");
if (!inFile)
return NULL;
char msgidBuf[1024], msgctxtBuf[1024], msgstrBuf[1024];
char line[1024], *currentBuf = NULL;
// Get language from file name and create PoMessageEntryList
int index = 0, start_index = strlen(file) - 1;
while (start_index > 0 && file[start_index-1] != '/' && file[start_index-1] != '\\') {
--start_index;
}
while (file[start_index+index] != '.' && file[start_index+index] != '\0') {
msgidBuf[index] = file[start_index+index];
++index;
}
msgidBuf[index] = '\0';
PoMessageEntryList *list = new PoMessageEntryList(msgidBuf);
// Parse the file line by line.
// The msgstr is always the last line of an entry (i.e. msgid and msgctxt always
// precede the corresponding msgstr).
msgidBuf[0] = msgstrBuf[0] = msgctxtBuf[0] = '\0';
while (!feof(inFile) && fgets(line, 1024, inFile)) {
// Skip empty and comment line
if (*line == '\n' || *line == '#')
continue;
if (strncmp(line, "msgid", 5) == 0) {
if (currentBuf == msgstrBuf) {
// add previous entry
if (*msgstrBuf != '\0') {
messages.insert(msgidBuf);
list->addMessageEntry(msgstrBuf, msgidBuf, msgctxtBuf);
}
msgidBuf[0] = msgstrBuf[0] = msgctxtBuf[0] = '\0';
}
strcpy(msgidBuf, stripLine(line));
currentBuf = msgidBuf;
} else if (strncmp(line, "msgctxt", 7) == 0) {
if (currentBuf == msgstrBuf) {
// add previous entry
if (*msgstrBuf != '\0') {
messages.insert(msgidBuf);
list->addMessageEntry(msgstrBuf, msgidBuf, msgctxtBuf);
}
msgidBuf[0] = msgstrBuf[0] = msgctxtBuf[0] = '\0';
}
strcpy(msgctxtBuf, stripLine(line));
currentBuf = msgctxtBuf;
} else if (strncmp(line, "msgstr", 6) == 0) {
strcpy(msgstrBuf, stripLine(line));
currentBuf = msgstrBuf;
} else {
// concatenate the string at the end of the current buffer
if (currentBuf)
strcat(currentBuf, stripLine(line));
}
}
fclose(inFile);
return list;
}
char* stripLine(char* line) {
// This function modifies line in place and return it.
// Keep only the text between the first two unprotected quotes.
// Look for the first quote
int start = 0;
int len = strlen(line);
while (start < len && line[start++] != '"') {}
// shift characters until we reach the end of the string or an unprotected quote
int i = 0;
while (start+i < len && (line[start+i] != '"' || (i > 0 && line[start+i-1] == '\\'))) {
line[i] = line[start+i];
++i;
}
line[i] = '\0';
return line;
}
char* parseLine(const char* line, const char* field) {
// This function allocate and return a new char*.
// It will return a NULL pointer if the field is not found.
// It is used to parse the header of the po files to find the language name
// and the charset.
char* str = strstr(line, field);
if (str == NULL)
return NULL;
str += strlen(field);
// Skip spaces
while (*str != '\0' && isspace(*str)) {
++str;
}
// Find string length (top at the first '\\'
// (since the string we want is followed by a '\\n')
int len = 0;
while (str[len] != '\0' && str[len] != '\\') {
++len;
}
if (len == 0)
return NULL;
// Create result string
char* result = new char[len+1];
strncpy(result, str, len);
result[len] = '\0';
return result;
}

View File

@ -0,0 +1,110 @@
/* ScummVM - Graphic Adventure Engine
*
* ScummVM is the legal property of its developers, whose names
* are too numerous to list here. Please refer to the COPYRIGHT
* file distributed with this source distribution.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
*/
#ifndef PO_PARSER_H
#define PO_PARSER_H
/**
* List of english messages.
*/
class PoMessageList {
public:
PoMessageList();
~PoMessageList();
void insert(const char *msg);
int findIndex(const char *msg);
int size() const;
const char* operator[](int) const;
private:
char** _messages;
int _size;
int _allocated;
};
/**
* Describes a translation entry.
*/
struct PoMessageEntry {
char* msgstr;
char* msgid;
char* msgctxt;
PoMessageEntry(const char *translation, const char *message, const char *context = NULL) :
msgstr(NULL), msgid(NULL), msgctxt(NULL)
{
if (translation != NULL && *translation != '\0') {
msgstr = new char[1+strlen(translation)];
strcpy(msgstr, translation);
}
if (message != NULL && *message != '\0') {
msgid = new char[1+strlen(message)];
strcpy(msgid, message);
}
if (context != NULL && *context != '\0') {
msgctxt = new char[1+strlen(context)];
strcpy(msgctxt, context);
}
}
~PoMessageEntry() {
delete [] msgstr;
delete [] msgid;
delete [] msgctxt;
}
};
/**
* List of translation entries for one language.
*/
class PoMessageEntryList {
public:
PoMessageEntryList(const char* language);
~PoMessageEntryList();
void addMessageEntry(const char *translation, const char *message, const char *context = NULL);
const char* language() const;
const char* languageName() const;
const char* charset() const;
int size() const;
const PoMessageEntry* entry(int) const;
private:
char* _lang;
char* _charset;
char* _langName;
PoMessageEntry** _list;
int _size;
int _allocated;
};
PoMessageEntryList* parsePoFile(const char* file, PoMessageList&);
char* stripLine(char*);
char* parseLine(const char* line, const char* field);
#endif /* PO_PARSER_H */