gecko-dev/browser/components/translation/BingTranslator.jsm

432 lines
14 KiB
JavaScript

/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
"use strict";
const {classes: Cc, interfaces: Ci, utils: Cu} = Components;
this.EXPORTED_SYMBOLS = [ "BingTranslator" ];
Cu.import("resource://gre/modules/Services.jsm");
Cu.import("resource://gre/modules/Log.jsm");
Cu.import("resource://gre/modules/Promise.jsm");
Cu.import("resource://gre/modules/Task.jsm");
Cu.import("resource://services-common/utils.js");
Cu.import("resource://services-common/rest.js");
// The maximum amount of net data allowed per request on Bing's API.
const MAX_REQUEST_DATA = 5000; // Documentation says 10000 but anywhere
// close to that is refused by the service.
// The maximum number of chunks allowed to be translated in a single
// request.
const MAX_REQUEST_CHUNKS = 1000; // Documentation says 2000.
// Self-imposed limit of 15 requests. This means that a page that would need
// to be broken in more than 15 requests won't be fully translated.
// The maximum amount of data that we will translate for a single page
// is MAX_REQUESTS * MAX_REQUEST_DATA.
const MAX_REQUESTS = 15;
/**
* Translates a webpage using Bing's Translation API.
*
* @param translationDocument The TranslationDocument object that represents
* the webpage to be translated
* @param sourceLanguage The source language of the document
* @param targetLanguage The target language for the translation
*
* @returns {Promise} A promise that will resolve when the translation
* task is finished.
*/
this.BingTranslator = function(translationDocument, sourceLanguage, targetLanguage) {
this.translationDocument = translationDocument;
this.sourceLanguage = sourceLanguage;
this.targetLanguage = targetLanguage;
this._pendingRequests = 0;
this._partialSuccess = false;
this._serviceUnavailable = false;
this._translatedCharacterCount = 0;
};
this.BingTranslator.prototype = {
/**
* Performs the translation, splitting the document into several chunks
* respecting the data limits of the API.
*
* @returns {Promise} A promise that will resolve when the translation
* task is finished.
*/
translate: function() {
return Task.spawn(function *() {
let currentIndex = 0;
this._onFinishedDeferred = Promise.defer();
// Let's split the document into various requests to be sent to
// Bing's Translation API.
for (let requestCount = 0; requestCount < MAX_REQUESTS; requestCount++) {
// Generating the text for each request can be expensive, so
// let's take the opportunity of the chunkification process to
// allow for the event loop to attend other pending events
// before we continue.
yield CommonUtils.laterTickResolvingPromise();
// Determine the data for the next request.
let request = this._generateNextTranslationRequest(currentIndex);
// Create a real request to the server, and put it on the
// pending requests list.
let bingRequest = new BingRequest(request.data,
this.sourceLanguage,
this.targetLanguage);
this._pendingRequests++;
bingRequest.fireRequest().then(this._chunkCompleted.bind(this),
this._chunkFailed.bind(this));
currentIndex = request.lastIndex;
if (request.finished) {
break;
}
}
return this._onFinishedDeferred.promise;
}.bind(this));
},
/**
* Function called when a request sent to the server completed successfully.
* This function handles calling the function to parse the result and the
* function to resolve the promise returned by the public `translate()`
* method when there's no pending request left.
*
* @param request The BingRequest sent to the server.
*/
_chunkCompleted: function(bingRequest) {
if (this._parseChunkResult(bingRequest)) {
this._partialSuccess = true;
// Count the number of characters successfully translated.
this._translatedCharacterCount += bingRequest.characterCount;
}
this._checkIfFinished();
},
/**
* Function called when a request sent to the server has failed.
* This function handles deciding if the error is transient or means the
* service is unavailable (zero balance on the key) and calling the
* function to resolve the promise returned by the public `translate()`
* method when there's no pending request left.
*
* @param aError [optional] The RESTRequest that failed.
*/
_chunkFailed: function(aError) {
if (aError instanceof RESTRequest &&
aError.response.status == 400) {
let body = aError.response.body;
if (body.contains("TranslateApiException") && body.contains("balance"))
this._serviceUnavailable = true;
}
this._checkIfFinished();
},
/**
* Function called when a request sent to the server has completed.
* This function handles resolving the promise
* returned by the public `translate()` method when all chunks are completed.
*/
_checkIfFinished: function() {
// Check if all pending requests have been
// completed and then resolves the promise.
// If at least one chunk was successful, the
// promise will be resolved positively which will
// display the "Success" state for the infobar. Otherwise,
// the "Error" state will appear.
if (--this._pendingRequests == 0) {
if (this._partialSuccess) {
this._onFinishedDeferred.resolve({
characterCount: this._translatedCharacterCount
});
} else {
let error = this._serviceUnavailable ? "unavailable" : "failure";
this._onFinishedDeferred.reject(error);
}
}
},
/**
* This function parses the result returned by Bing's Http.svc API,
* which is a XML file that contains a number of elements. To our
* particular interest, the only part of the response that matters
* are the <TranslatedText> nodes, which contains the resulting
* items that were sent to be translated.
*
* @param request The request sent to the server.
* @returns boolean True if parsing of this chunk was successful.
*/
_parseChunkResult: function(bingRequest) {
let domParser = Cc["@mozilla.org/xmlextras/domparser;1"]
.createInstance(Ci.nsIDOMParser);
let results;
try {
let doc = domParser.parseFromString(bingRequest.networkRequest
.response.body, "text/xml");
results = doc.querySelectorAll("TranslatedText");
} catch (e) {
return false;
}
let len = results.length;
if (len != bingRequest.translationData.length) {
// This should never happen, but if the service returns a different number
// of items (from the number of items submitted), we can't use this chunk
// because all items would be paired incorrectly.
return false;
}
let error = false;
for (let i = 0; i < len; i++) {
try {
let result = results[i].firstChild.nodeValue;
let root = bingRequest.translationData[i][0];
if (root.isSimpleRoot) {
// Workaround for Bing's service problem in which "&" chars in
// plain-text TranslationItems are double-escaped.
result = result.replace("&amp;", "&", "g");
}
root.parseResult(result);
} catch (e) { error = true; }
}
return !error;
},
/**
* This function will determine what is the data to be used for
* the Nth request we are generating, based on the input params.
*
* @param startIndex What is the index, in the roots list, that the
* chunk should start.
*/
_generateNextTranslationRequest: function(startIndex) {
let currentDataSize = 0;
let currentChunks = 0;
let output = [];
let rootsList = this.translationDocument.roots;
for (let i = startIndex; i < rootsList.length; i++) {
let root = rootsList[i];
let text = this.translationDocument.generateTextForItem(root);
if (!text) {
continue;
}
text = escapeXML(text);
let newCurSize = currentDataSize + text.length;
let newChunks = currentChunks + 1;
if (newCurSize > MAX_REQUEST_DATA ||
newChunks > MAX_REQUEST_CHUNKS) {
// If we've reached the API limits, let's stop accumulating data
// for this request and return. We return information useful for
// the caller to pass back on the next call, so that the function
// can keep working from where it stopped.
return {
data: output,
finished: false,
lastIndex: i
};
}
currentDataSize = newCurSize;
currentChunks = newChunks;
output.push([root, text]);
}
return {
data: output,
finished: true,
lastIndex: 0
};
}
};
/**
* Represents a request (for 1 chunk) sent off to Bing's service.
*
* @params translationData The data to be used for this translation,
* generated by the generateNextTranslationRequest...
* function.
* @param sourceLanguage The source language of the document.
* @param targetLanguage The target language for the translation.
*
*/
function BingRequest(translationData, sourceLanguage, targetLanguage) {
this.translationData = translationData;
this.sourceLanguage = sourceLanguage;
this.targetLanguage = targetLanguage;
this.characterCount = 0;
}
BingRequest.prototype = {
/**
* Initiates the request
*/
fireRequest: function() {
return Task.spawn(function *(){
let token = yield BingTokenManager.getToken();
let auth = "Bearer " + token;
let url = getUrlParam("https://api.microsofttranslator.com/v2/Http.svc/TranslateArray",
"browser.translation.bing.translateArrayURL",
false);
let request = new RESTRequest(url);
request.setHeader("Content-type", "text/xml");
request.setHeader("Authorization", auth);
let requestString =
'<TranslateArrayRequest>' +
'<AppId/>' +
'<From>' + this.sourceLanguage + '</From>' +
'<Options>' +
'<ContentType xmlns="http://schemas.datacontract.org/2004/07/Microsoft.MT.Web.Service.V2">text/html</ContentType>' +
'<ReservedFlags xmlns="http://schemas.datacontract.org/2004/07/Microsoft.MT.Web.Service.V2" />' +
'</Options>' +
'<Texts xmlns:s="http://schemas.microsoft.com/2003/10/Serialization/Arrays">';
for (let [, text] of this.translationData) {
requestString += '<s:string>' + text + '</s:string>';
this.characterCount += text.length;
}
requestString += '</Texts>' +
'<To>' + this.targetLanguage + '</To>' +
'</TranslateArrayRequest>';
let utf8 = CommonUtils.encodeUTF8(requestString);
let deferred = Promise.defer();
request.post(utf8, function(err) {
if (request.error || !request.response.success)
deferred.reject(request);
deferred.resolve(this);
}.bind(this));
this.networkRequest = request;
return deferred.promise;
}.bind(this));
}
};
/**
* Authentication Token manager for the API
*/
let BingTokenManager = {
_currentToken: null,
_currentExpiryTime: 0,
_pendingRequest: null,
/**
* Get a valid, non-expired token to be used for the API calls.
*
* @returns {Promise} A promise that resolves with the token
* string once it is obtained. The token returned
* can be the same one used in the past if it is still
* valid.
*/
getToken: function() {
if (this._pendingRequest) {
return this._pendingRequest;
}
let remainingMs = this._currentExpiryTime - new Date();
// Our existing token is still good for more than a minute, let's use it.
if (remainingMs > 60 * 1000) {
return Promise.resolve(this._currentToken);
}
return this._getNewToken();
},
/**
* Generates a new token from the server.
*
* @returns {Promise} A promise that resolves with the token
* string once it is obtained.
*/
_getNewToken: function() {
let url = getUrlParam("https://datamarket.accesscontrol.windows.net/v2/OAuth2-13",
"browser.translation.bing.authURL",
false);
let request = new RESTRequest(url);
request.setHeader("Content-type", "application/x-www-form-urlencoded");
let params = [
"grant_type=client_credentials",
"scope=" + encodeURIComponent("http://api.microsofttranslator.com"),
"client_id=" +
getUrlParam("%BING_API_CLIENTID%", "browser.translation.bing.clientIdOverride"),
"client_secret=" +
getUrlParam("%BING_API_KEY%", "browser.translation.bing.apiKeyOverride")
];
let deferred = Promise.defer();
this._pendingRequest = deferred.promise;
request.post(params.join("&"), function(err) {
BingTokenManager._pendingRequest = null;
if (err) {
deferred.reject(err);
}
try {
let json = JSON.parse(this.response.body);
if (json.error) {
deferred.reject(json.error);
return;
}
let token = json.access_token;
let expires_in = json.expires_in;
BingTokenManager._currentToken = token;
BingTokenManager._currentExpiryTime = new Date(Date.now() + expires_in * 1000);
deferred.resolve(token);
} catch (e) {
deferred.reject(e);
}
});
return deferred.promise;
}
};
/**
* Escape a string to be valid XML content.
*/
function escapeXML(aStr) {
return aStr.toString()
.replace("&", "&amp;", "g")
.replace('"', "&quot;", "g")
.replace("'", "&apos;", "g")
.replace("<", "&lt;", "g")
.replace(">", "&gt;", "g");
}
/**
* Fetch an auth token (clientID or client secret), which may be overridden by
* a pref if it's set.
*/
function getUrlParam(paramValue, prefName, encode = true) {
if (Services.prefs.getPrefType(prefName))
paramValue = Services.prefs.getCharPref(prefName);
paramValue = Services.urlFormatter.formatURL(paramValue);
return encode ? encodeURIComponent(paramValue) : paramValue;
}