gecko-dev/webtools/mozbot/babel.pm

160 lines
4.1 KiB
Perl
Raw Normal View History

# -*- Mode: perl; indent-tabs-mode: nil -*-
#
# The contents of this file are subject to the Mozilla Public License
# Version 1.0 (the "License"); you may not use this file except in
# compliance with the License. You may obtain a copy of the License at
# http://www.mozilla.org/MPL/
#
# Software distributed under the License is distributed on an "AS IS"
# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
# License for the specific language governing rights and limitations
# under the License.
#
# The Original Code is the Bugzilla Bug Tracking System.
#
# The Initial Developer of the Original Code is Netscape Communications
# Corporation. Portions created by Netscape are Copyright (C) 1998
# Netscape Communications Corporation. All Rights Reserved.
#
# Contributor(s): Terry Weissman <terry@mozilla.org>
#
#
#
# Most of this module is rudely swiped from infobot. Here are the various
# licensing words I found with it:
# --
#
# The license for this stuff is yet to be written.
# Please don't do anything good with it without at
# least mentioning this work, perhaps the author (Kevin
# Lenzo, lenzo@cs.cmu.edu), and the Carnegie Mellon
# University, which supports my study.
#
# Also, there is work being done on various bits of this
# now by various people; if you have any corrections
# or contributions, please send them to me. Flat
# ascii files of databases, made with dump_db, are
# wonderful things to share, and a repository will be
# set up.
#
# ---
#
# This program is copyright Jonathan Feinberg 1999.
#
# This program is distributed under the same terms as infobot.
#
# Jonathan Feinberg
# jdf@pobox.com
# http://pobox.com/~jdf/
#
# Version 1.0
# First public release.
#
# ---------------------- (End of licensing words) ------------------------
package babel;
use strict;
use diagnostics;
my $no_babel;
BEGIN {
eval "use URI::Escape"; # utility functions for encoding the
if ($@) { $no_babel++}; # babelfish request
eval "use LWP::UserAgent";
if ($@) { $no_babel++};
}
BEGIN {
# Translate some feasible abbreviations into the ones babelfish
# expects.
use vars qw!%lang_code $lang_regex!;
%lang_code = (
fr => 'fr',
sp => 'es',
po => 'pt',
pt => 'pt',
it => 'it',
ge => 'de',
de => 'de',
gr => 'de',
en => 'en'
);
# Here's how we recognize the language you're asking for. It looks
# like RTSL saves you a few keystrokes in #perl, huh?
$lang_regex = join '|', keys %lang_code;
}
sub babelfish {
return '' if $no_babel;
my ($direction, $lang, $phrase) = @_;
$lang = $lang_code{$lang};
my $ua = new LWP::UserAgent;
$ua->timeout(4);
my $req =
HTTP::Request->new('POST',
'http://babelfish.altavista.digital.com/cgi-bin/translate');
$req->content_type('application/x-www-form-urlencoded');
my $tolang = "en_$lang";
my $toenglish = "${lang}_en";
if ($direction eq 'to') {
return translate($phrase, $tolang, $req, $ua);
}
elsif ($direction eq 'from') {
return translate($phrase, $toenglish, $req, $ua);
}
my $last_english = $phrase;
my $last_lang;
my %results = ();
my $i = 0;
while ($i++ < 7) {
last if $results{$phrase}++;
$last_lang = $phrase = translate($phrase, $tolang, $req, $ua);
last if $results{$phrase}++;
$last_english = $phrase = translate($phrase, $toenglish, $req, $ua);
}
return $last_english;
}
sub translate {
return '' if $no_babel;
my ($phrase, $languagepair, $req, $ua) = @_;
my $urltext = uri_escape($phrase);
$req->content("urltext=$urltext&lp=$languagepair&doit=done");
my $res = $ua->request($req);
if ($res->is_success) {
my $html = $res->content;
# This method subject to change with the whims of Altavista's design
# staff.
my ($translated) =
($html =~ m{<br>
\s+
<font\ face="arial,\ helvetica">
\s*
(?:\*\*\s+time\ out\s+\*\*)?
\s*
([^<]*)
}sx);
$translated =~ s/\n/ /g;
$translated =~ s/\s*$//;
return $translated;
} else {
return ":("; # failure
}
}
"Hello. I'm a true value.";