RetroArch/tasks/task_translation.c

/*  RetroArch - A frontend for libretro.
 *  Copyright (C) 2010-2014 - Hans-Kristian Arntzen
 *  Copyright (C) 2011-2021 - Daniel De Matteis
 *
 *  RetroArch is free software: you can redistribute it and/or modify it under the terms
 *  of the GNU General Public License as published by the Free Software Found-
 *  ation, either version 3 of the License, or (at your option) any later version.
 *
 *  RetroArch is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
 *  without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
 *  PURPOSE.  See the GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License along with RetroArch.
 *  If not, see <http://www.gnu.org/licenses/>.
 */

#include <stdint.h>
#include <string.h>

#include <boolean.h>
#include <compat/strl.h>
#include <string/stdstring.h>

#ifdef HAVE_CONFIG_H
#include "../config.h"
#endif

#include <encodings/base64.h>
#include <formats/rbmp.h>
#include <formats/rpng.h>
#include <formats/rjson.h>
#include <formats/rjson_helpers.h>
#include <gfx/scaler/pixconv.h>
#include <gfx/scaler/scaler.h>
#include <gfx/video_frame.h>
#include <retro_timers.h>
#include "../translation_defines.h"

#ifdef HAVE_GFX_WIDGETS
#include "../gfx/gfx_widgets.h"
#endif

#include "../accessibility.h"
#include "../audio/audio_driver.h"
#include "../gfx/video_driver.h"
#include "../frontend/frontend_driver.h"
#include "../input/input_driver.h"
#include "../command.h"
#include "../paths.h"
#include "../runloop.h"
#include "../verbosity.h"
#include "../msg_hash.h"

#include "tasks_internal.h"

static const char* ACCESS_INPUT_LABELS[] =
{
   "b", "y", "select", "start", "up", "down", "left", "right",
   "a", "x", "l", "r", "l2", "r2", "l3", "r3"
};

static const char* ACCESS_RESPONSE_KEYS[] =
{
   "image", "sound", "text", "error", "auto", "press", "text_position"
};

typedef struct
{
   uint8_t *data;
   unsigned size;
   unsigned width;
   unsigned height;

   unsigned content_x;
   unsigned content_y;
   unsigned content_width;
   unsigned content_height;
   unsigned viewport_width;
   unsigned viewport_height;
} access_frame_t;

typedef struct
{
   char *data;
   int length;
   char format[4];
} access_base64_t;

typedef struct
{
   char *inputs;
   bool paused;
} access_request_t;

typedef struct
{
   char *image;
   int image_size;
#ifdef HAVE_AUDIOMIXER
   void *sound;
   int sound_size;
#endif
   char *error;
   char *text;
   char *recall;
   char *input;
   int text_position;
} access_response_t;

/* UTILITIES ---------------------------------------------------------------- */
/* -------------------------------------------------------------------------- */

/**
 * Returns true if the accessibility narrator is currently playing audio.
 */
#ifdef HAVE_ACCESSIBILITY
bool is_narrator_running(bool accessibility_enable)
{
   access_state_t *access_st = access_state_get_ptr();
   if (is_accessibility_enabled(
            accessibility_enable,
            access_st->enabled))
   {
      frontend_ctx_driver_t *frontend =
         frontend_state_get_ptr()->current_frontend_ctx;
      if (frontend && frontend->is_narrator_running)
         return frontend->is_narrator_running();
   }
   return false;
}
#endif

/**
 * Returns true if array {a} and {b}, both of the same size {size} are equal.
 * This method prevents a potential bug with memcmp on some platforms.
 */
static bool u8_array_equal(uint8_t *a, uint8_t *b, int size)
{
   int i = 0;
   for (; i < size; i++)
   {
      if (a[i] != b[i])
         return false;
   }
   return true;
}

/**
 * Helper method to simplify accessibility speech usage. This method will only
 * use TTS to read the provided text if accessibility has been enabled in the
 * frontend or by RetroArch's internal override mechanism.
 */
static void accessibility_speak(const char *text)
{
#ifdef HAVE_ACCESSIBILITY
   settings_t *settings = config_get_ptr();
   unsigned speed       = settings->uints.accessibility_narrator_speech_speed;
   bool narrator_on     = settings->bools.accessibility_enable;

   accessibility_speak_priority(narrator_on, speed, text, 10);
#endif
}

/**
 * Speaks the provided text using TTS. This only happens if the narrator has
 * been enabled or the service is running in Narrator mode, in which case it
 * must been used even if the user has disabled it.
 */
static void translation_speak(const char *text)
{
#ifdef HAVE_ACCESSIBILITY
   settings_t *settings       = config_get_ptr();
   access_state_t *access_st  = access_state_get_ptr();

   unsigned mode     = settings->uints.ai_service_mode;
   unsigned speed    = settings->uints.accessibility_narrator_speech_speed;
   bool narrator_on  = settings->bools.accessibility_enable;

   /* Force the use of the narrator in Narrator modes (TTS) */
   if (mode == 2 || mode == 4 || mode == 5 || narrator_on || access_st->enabled)
     accessibility_speak_priority(true, speed, text, 10);
#endif
}

/**
 * Displays the given message on screen and returns true. Returns false if no
 * {message} is provided (i.e. it is NULL). The message will be displayed as
 * information or error depending on the {error} boolean. In addition, it will
 * be logged if {error} is true, or if this is a debug build. The message will
 * also be played by the accessibility narrator if the user enabled it.
 */
static bool translation_user_message(const char *message, bool error)
{
   if (message)
   {
      accessibility_speak(message);
      runloop_msg_queue_push(
            message, 1, 180, true, NULL, MESSAGE_QUEUE_ICON_DEFAULT,
            error ? MESSAGE_QUEUE_CATEGORY_ERROR : MESSAGE_QUEUE_CATEGORY_INFO);
      if (error)
         RARCH_ERR("[Translate] %s\n", message);
#ifdef DEBUG
      else
         RARCH_LOG("[Translate] %s\n", message);
#endif
      return true;
   }
   return false;
}

/**
 * Displays the given hash on screen and returns true. Returns false if no
 * {hash} is provided (i.e. it is NULL). The message will be displayed as
 * information or error depending on the {error} boolean. In addition, it will
 * be logged if {error} is true, or if this is a debug build. The message will
 * also be played by the accessibility narrator if the user enabled it.
 */
static bool translation_hash_message(enum msg_hash_enums hash, bool error)
{
   if (hash)
   {
      const char *message  = msg_hash_to_str(hash);
      const char *intl     = msg_hash_to_str_us(hash);

      accessibility_speak(message);
      runloop_msg_queue_push(
            message, 1, 180, true, NULL, MESSAGE_QUEUE_ICON_DEFAULT,
            error ? MESSAGE_QUEUE_CATEGORY_ERROR : MESSAGE_QUEUE_CATEGORY_INFO);
      if (error)
         RARCH_ERR("[Translate] %s\n", intl);
#ifdef DEBUG
      else
         RARCH_LOG("[Translate] %s\n", intl);
#endif
      return true;
   }
   return false;
}

/**
 * Displays the given message on screen and returns true. Returns false if no
 * {message} is provided (i.e. it is NULL). The message will be displayed as
 * an error and it will be logged. The message will also be played by the
 * accessibility narrator if the user enabled it.
 */
static INLINE bool translation_user_error(const char *message)
{
   return translation_user_message(message, true);
}

/**
 * Displays the given message on screen and returns true. Returns false if no
 * {message} is provided (i.e. it is NULL). The message will be displayed as
 * information and will only be logged if this is a debug build. The message
 * will also be played by the accessibility narrator if the user enabled it.
 */
static INLINE bool translation_user_info(const char *message)
{
   return translation_user_message(message, false);
}

/**
 * Displays the given hash on screen and returns true. Returns false if no
 * {hash} is provided (i.e. it is NULL). The message will be displayed as
 * an error and it will be logged. The message will also be played by the
 * accessibility narrator if the user enabled it.
 */
static INLINE bool translation_hash_error(enum msg_hash_enums hash)
{
   return translation_hash_message(hash, true);
}

/**
 * Displays the given hash on screen and returns true. Returns false if no
 * {hash} is provided (i.e. it is NULL). The message will be displayed as
 * information and will only be logged if this is a debug build. The message
 * will also be played by the accessibility narrator if the user enabled it.
 */
static INLINE bool translation_hash_info(enum msg_hash_enums hash)
{
   return translation_hash_message(hash, false);
}

/**
 * Releases all data held by the service and stops it as soon as possible.
 * If {inform} is true, a message will be displayed to the user if the service
 * was running in automatic mode to warn them that it is now stopping.
 */
void translation_release(bool inform)
{
#ifdef HAVE_GFX_WIDGETS
   dispgfx_widget_t *p_dispwidget = dispwidget_get_ptr();
#endif
   access_state_t *access_st      = access_state_get_ptr();
   unsigned service_auto_prev     = access_st->ai_service_auto;
   access_st->ai_service_auto     = 0;

#ifdef DEBUG
   RARCH_LOG("[Translate]: AI Service is now stopping.\n");
#endif

   if (access_st->request_task)
      task_set_cancelled(access_st->request_task, true);
   if (access_st->response_task)
      task_set_cancelled(access_st->response_task, true);

#ifdef HAVE_THREADS
   if (access_st->image_lock)
   {
      slock_lock(access_st->image_lock);
#endif
      if (access_st->last_image)
         free(access_st->last_image);

      access_st->last_image      = NULL;
      access_st->last_image_size = 0;

#ifdef HAVE_THREADS
      slock_unlock(access_st->image_lock);
   }
#endif

#ifdef HAVE_GFX_WIDGETS
   if (p_dispwidget->ai_service_overlay_state != 0)
      gfx_widgets_ai_service_overlay_unload();
#endif

   if (inform && service_auto_prev != 0)
      translation_hash_info(MSG_AI_AUTO_MODE_DISABLED);
}

/**
 * Returns the string representation of the translation language enum value.
 */
static const char* ai_service_get_str(enum translation_lang id)
{
   switch (id)
   {
      case TRANSLATION_LANG_EN:
         return "en";
      case TRANSLATION_LANG_ES:
         return "es";
      case TRANSLATION_LANG_FR:
         return "fr";
      case TRANSLATION_LANG_IT:
         return "it";
      case TRANSLATION_LANG_DE:
         return "de";
      case TRANSLATION_LANG_JP:
         return "ja";
      case TRANSLATION_LANG_NL:
         return "nl";
      case TRANSLATION_LANG_CS:
         return "cs";
      case TRANSLATION_LANG_DA:
         return "da";
      case TRANSLATION_LANG_SV:
         return "sv";
      case TRANSLATION_LANG_HR:
         return "hr";
      case TRANSLATION_LANG_KO:
         return "ko";
      case TRANSLATION_LANG_ZH_CN:
         return "zh-CN";
      case TRANSLATION_LANG_ZH_TW:
         return "zh-TW";
      case TRANSLATION_LANG_CA:
         return "ca";
      case TRANSLATION_LANG_BG:
         return "bg";
      case TRANSLATION_LANG_BN:
         return "bn";
      case TRANSLATION_LANG_EU:
         return "eu";
      case TRANSLATION_LANG_AZ:
         return "az";
      case TRANSLATION_LANG_AR:
         return "ar";
      case TRANSLATION_LANG_AST:
         return "ast";
      case TRANSLATION_LANG_SQ:
         return "sq";
      case TRANSLATION_LANG_AF:
         return "af";
      case TRANSLATION_LANG_EO:
         return "eo";
      case TRANSLATION_LANG_ET:
         return "et";
      case TRANSLATION_LANG_TL:
         return "tl";
      case TRANSLATION_LANG_FI:
         return "fi";
      case TRANSLATION_LANG_GL:
         return "gl";
      case TRANSLATION_LANG_KA:
         return "ka";
      case TRANSLATION_LANG_EL:
         return "el";
      case TRANSLATION_LANG_GU:
         return "gu";
      case TRANSLATION_LANG_HT:
         return "ht";
      case TRANSLATION_LANG_HE:
         return "he";
      case TRANSLATION_LANG_HI:
         return "hi";
      case TRANSLATION_LANG_HU:
         return "hu";
      case TRANSLATION_LANG_IS:
         return "is";
      case TRANSLATION_LANG_ID:
         return "id";
      case TRANSLATION_LANG_GA:
         return "ga";
      case TRANSLATION_LANG_KN:
         return "kn";
      case TRANSLATION_LANG_LA:
         return "la";
      case TRANSLATION_LANG_LV:
         return "lv";
      case TRANSLATION_LANG_LT:
         return "lt";
      case TRANSLATION_LANG_MK:
         return "mk";
      case TRANSLATION_LANG_MS:
         return "ms";
      case TRANSLATION_LANG_MT:
         return "mt";
      case TRANSLATION_LANG_NO:
         return "no";
      case TRANSLATION_LANG_FA:
         return "fa";
      case TRANSLATION_LANG_PL:
         return "pl";
      case TRANSLATION_LANG_PT:
         return "pt";
      case TRANSLATION_LANG_RO:
         return "ro";
      case TRANSLATION_LANG_RU:
         return "ru";
      case TRANSLATION_LANG_SR:
         return "sr";
      case TRANSLATION_LANG_SK:
         return "sk";
      case TRANSLATION_LANG_SL:
         return "sl";
      case TRANSLATION_LANG_SW:
         return "sw";
      case TRANSLATION_LANG_TA:
         return "ta";
      case TRANSLATION_LANG_TE:
         return "te";
      case TRANSLATION_LANG_TH:
         return "th";
      case TRANSLATION_LANG_TR:
         return "tr";
      case TRANSLATION_LANG_UK:
         return "uk";
      case TRANSLATION_LANG_BE:
         return "be";
      case TRANSLATION_LANG_UR:
         return "ur";
      case TRANSLATION_LANG_VI:
         return "vi";
      case TRANSLATION_LANG_CY:
         return "cy";
      case TRANSLATION_LANG_YI:
         return "yi";
      case TRANSLATION_LANG_DONT_CARE:
      case TRANSLATION_LANG_LAST:
         break;
   }

   return "";
}

/* AUTOMATION --------------------------------------------------------------- */
/* -------------------------------------------------------------------------- */

/**
 * Handler invoking the next automatic request. This method simply waits for
 * any previous request to terminate before re-invoking the translation service.
 * By delegating this to a task handler we can safely do so in the task thread
 * instead of hogging the main thread.
 */
static void call_auto_translate_hndl(retro_task_t *task)
{
   int *mode_ptr               = (int*)task->user_data;
   uint32_t runloop_flags      = runloop_get_flags();
   access_state_t *access_st   = access_state_get_ptr();
   settings_t *settings        = config_get_ptr();

   if (task_get_cancelled(task))
      goto finish;

   switch (*mode_ptr)
   {
      case 1: /* Speech Mode   */
#ifdef HAVE_AUDIOMIXER
         if (!audio_driver_is_ai_service_speech_running())
            goto finish;
#endif
         break;
      case 2: /* Narrator Mode    */
      case 3: /* Text Mode        */
      case 4: /* Text + Narrator  */
      case 5: /* Image + Narrator */
#ifdef HAVE_ACCESSIBILITY
         if (!is_narrator_running(settings->bools.accessibility_enable))
            goto finish;
#endif
         break;
      default:
         goto finish;
   }
   return;

finish:
   task_set_finished(task, true);

   if (task->user_data)
      free(task->user_data);

   /* Final check to see if the user did not disable the service altogether */
   if (access_st->ai_service_auto != 0)
   {
      bool was_paused = runloop_flags & RUNLOOP_FLAG_PAUSED;
      command_event(CMD_EVENT_AI_SERVICE_CALL, &was_paused);
   }
}

/**
 * Invokes the next automatic request. This method delegates the invokation to
 * a task to allow for threading. The task will only execute after the polling
 * delay configured by the user has been honored since the last request.
 */
static void call_auto_translate_task(settings_t *settings)
{
   int* mode                  = NULL;
   access_state_t *access_st  = access_state_get_ptr();
   int ai_service_mode        = settings->uints.ai_service_mode;
   unsigned delay             = settings->uints.ai_service_poll_delay;
   retro_task_t *task         = task_init();
   if (!task)
      return;

   mode  = (int*)malloc(sizeof(int));
   *mode = ai_service_mode;

   task->handler     = call_auto_translate_hndl;
   task->user_data   = mode;
   task->mute        = true;
   task->when        = access_st->last_call + (delay * 1000);
   task_queue_push(task);
}

/* RESPONSE ----------------------------------------------------------------- */
/* -------------------------------------------------------------------------- */

/**
 * Parses the JSON returned by the translation server and returns structured
 * data. May return NULL if the parsing cannot be completed or the JSON is
 * malformed. If unsupported keys are provided in the JSON, they will simply
 * be ignored. Only the available data will be populated in the returned object
 * and everything else will be zero-initialized.
 */
static access_response_t* parse_response_json(http_transfer_data_t *data)
{
   int key                       = -1;
   rjson_t* json                 = NULL;
   char* image_data              = NULL;
   int image_size                = 0;
#ifdef HAVE_AUDIOMIXER
   void *sound_data              = NULL;
   int sound_size                = 0;
#endif
   access_response_t *response   = NULL;
   bool empty                    = true;
   enum rjson_type type;

   if (!data || !data->data)
      goto finish;
   if (!(json = rjson_open_buffer(data->data, data->len)))
      goto finish;
   if (!(response = (access_response_t*)calloc(1, sizeof(access_response_t))))
      goto finish;

   for (;;)
   {
      size_t length        = 0;
      const char *string   = NULL;
      type                 = rjson_next(json);

      if (type == RJSON_DONE || type == RJSON_ERROR)
         break;
      if (rjson_get_context_type(json) != RJSON_OBJECT)
         continue;

      if (type == RJSON_STRING && (rjson_get_context_count(json) & 1) == 1)
      {
         unsigned i;
         string = rjson_get_string(json, &length);
         for (i = 0; i < ARRAY_SIZE(ACCESS_RESPONSE_KEYS) && key == -1; i++)
         {
            if (string_is_equal(string, ACCESS_RESPONSE_KEYS[i]))
               key = i;
         }
      }
      else
      {
         if (type != RJSON_STRING && key < 6)
            continue;
         else
            string = rjson_get_string(json, &length);

         switch (key)
         {
            case 0: /* image */
               response->image = (length == 0) ? NULL : (char*)unbase64(
                     string, (int)length, &response->image_size);
               break;
#ifdef HAVE_AUDIOMIXER
            case 1: /* sound */
               response->sound = (length == 0) ? NULL : (void*)unbase64(
                     string, (int)length, &response->sound_size);
               break;
#endif
            case 2: /* text */
               response->text = strdup(string);
               break;
            case 3: /* error */
               response->error = strdup(string);
               break;
            case 4: /* auto */
               response->recall = strdup(string);
               break;
            case 5: /* press */
               response->input = strdup(string);
               break;
            case 6: /* text_position */
               if (type == RJSON_NUMBER)
                  response->text_position = rjson_get_int(json);
               break;
         }
         key = -1;
      }
   }

   if (type == RJSON_ERROR)
   {
      RARCH_LOG("[Translate] JSON error: %s\n", rjson_get_error(json));
      translation_user_error("Service returned a malformed JSON");
      free(response);
      response = NULL;
   }

finish:
   if (json)
      rjson_free(json);
   else
      translation_user_error("Internal error parsing returned JSON.");

   return response;
}

/**
 * Parses the image data of given type and displays it using widgets. If the
 * image widget is already shown, it will be unloaded first automatically.
 * This method will disable automatic translation if the widget could not be
 * loaded to prevent further errors.
 */
#ifdef HAVE_GFX_WIDGETS
static void translation_response_image_widget(
      char *image, int image_length, enum image_type_enum *image_type)
{
   video_driver_state_t *video_st = video_state_get_ptr();
   dispgfx_widget_t *p_dispwidget = dispwidget_get_ptr();
   access_state_t *access_st      = access_state_get_ptr();

   bool ai_res;
   bool gfx_widgets_paused        = video_st->flags & VIDEO_FLAG_WIDGETS_PAUSED;

   if (p_dispwidget->ai_service_overlay_state != 0)
      gfx_widgets_ai_service_overlay_unload();

   ai_res = gfx_widgets_ai_service_overlay_load(
         image, (unsigned)image_length, (*image_type));

   if (!ai_res)
   {
      translation_hash_error(MSG_AI_VIDEO_DRIVER_NOT_SUPPORTED);
      translation_release(true);
   }
   else if (gfx_widgets_paused)
   {
      /* Unpause for a frame otherwise widgets won't be displayed */
      p_dispwidget->ai_service_overlay_state = 2;
      command_event(CMD_EVENT_UNPAUSE, NULL);
   }
}
#endif

/**
 * Parses the image buffer, converting the data to the raw image format we need
 * to display the image within RetroArch. Writes the raw image data in {body}
 * as well as its {width} and {height} as determined by the image header.
 * Returns true if the process was successful.
 */
static bool translation_get_image_body(
      char *image, int image_size, enum image_type_enum *image_type,
      void *body, unsigned *width, unsigned *height)
{
#ifdef HAVE_RPNG
   rpng_t *rpng      = NULL;
   void *rpng_alpha  = NULL;
   int rpng_ret      = 0;
#endif

   if ((*image_type) == IMAGE_TYPE_BMP)
   {
      if (image_size < 55)
         return false;

      *width   = ((uint32_t) ((uint8_t)image[21]) << 24)
               + ((uint32_t) ((uint8_t)image[20]) << 16)
               + ((uint32_t) ((uint8_t)image[19]) <<  8)
               + ((uint32_t) ((uint8_t)image[18]) <<  0);
      *height  = ((uint32_t) ((uint8_t)image[25]) << 24)
               + ((uint32_t) ((uint8_t)image[24]) << 16)
               + ((uint32_t) ((uint8_t)image[23]) <<  8)
               + ((uint32_t) ((uint8_t)image[22]) <<  0);

      image_size = (*width) * (*height) * 3 * sizeof(uint8_t);
      body       = (void*)malloc(image_size);
      if (!body)
         return false;

      memcpy(body, image + 54 * sizeof(uint8_t), image_size);
      return true;
   }

#ifdef HAVE_RPNG
   else if ((*image_type) == IMAGE_TYPE_PNG)
   {
      if (image_size < 24)
         return false;
      if (!(rpng = rpng_alloc()))
         return false;

      *width   = ((uint32_t) ((uint8_t)image[16]) << 24)
               + ((uint32_t) ((uint8_t)image[17]) << 16)
               + ((uint32_t) ((uint8_t)image[18]) <<  8)
               + ((uint32_t) ((uint8_t)image[19]) <<  0);
      *height  = ((uint32_t) ((uint8_t)image[20]) << 24)
               + ((uint32_t) ((uint8_t)image[21]) << 16)
               + ((uint32_t) ((uint8_t)image[22]) <<  8)
               + ((uint32_t) ((uint8_t)image[23]) <<  0);

      rpng_set_buf_ptr(rpng, image, (size_t)image_size);
      rpng_start(rpng);
      while (rpng_iterate_image(rpng));

      do
      {
         rpng_ret = rpng_process_image(
               rpng, &rpng_alpha, (size_t)image_size, width, height);
      } while (rpng_ret == IMAGE_PROCESS_NEXT);

      /*
       * Returned output from the png processor is an upside down RGBA
       * image, so we have to change that to RGB first. This should
       * probably be replaced with a scaler call.
       */
      {
         int d      = 0;
         int tw, th, tc;
         unsigned ui;
         image_size = (*width) * (*height) * 3 * sizeof(uint8_t);
         body       = (void*)malloc(image_size);
         if (!body)
         {
            free(rpng_alpha);
            rpng_free(rpng);
            return false;
         }

         for (ui = 0; ui < (*width) * (*height) * 4; ui++)
         {
            if (ui % 4 != 3)
            {
               tc    = d % 3;
               th    = (*height) - d / (3 * (*width)) - 1;
               tw    = (d % ((*width) * 3)) / 3;
               ((uint8_t*) body)[tw * 3 + th * 3 * (*width) + tc]
                     = ((uint8_t*)rpng_alpha)[ui];
               d++;
            }
         }
      }
      free(rpng_alpha);
      rpng_free(rpng);
      return true;
   }
#endif

   return false;
}

/**
 * Displays the raw image on screen by directly writing to the frame buffer.
 * This method may fail depending on the current video driver.
 */
 /* TODO/FIXME: Does nothing with Vulkan apparently? */
static void translation_response_image_direct(
      char *image, int image_size, enum image_type_enum *image_type)
{
   size_t pitch;
   unsigned width;
   unsigned height;
   unsigned vp_width;
   unsigned vp_height;

   void *image_body                                    = NULL;
   uint8_t *raw_output_data                            = NULL;
   size_t raw_output_size                              = 0;
   const void *dummy_data                              = NULL;
   struct scaler_ctx *scaler                           = NULL;
   video_driver_state_t *video_st                      = video_state_get_ptr();
   const enum retro_pixel_format video_driver_pix_fmt  = video_st->pix_fmt;

   if (!(translation_get_image_body(
         image, image_size, image_type, image_body, &width, &height)))
      goto finish;

   if (!(scaler = (struct scaler_ctx*)calloc(1, sizeof(struct scaler_ctx))))
      goto finish;

   dummy_data  = video_st->frame_cache_data;
   vp_width    = video_st->frame_cache_width;
   vp_height   = video_st->frame_cache_height;
   pitch       = video_st->frame_cache_pitch;

   if (!vp_width || !vp_height)
      goto finish;

   if (dummy_data == RETRO_HW_FRAME_BUFFER_VALID)
   {
      /* In this case, we used the viewport to grab the image and translate it,
       * and we have the translated image in the image_body buffer. */
      translation_user_error("Video driver unsupported for hardware frame.");
      translation_release(true);
      goto finish;
   }

   /*
    * The assigned pitch may not be reliable. The width of the video frame can
    * change during run-time, but the pitch may not, so we just assign it as
    * the width times the byte depth.
    */
   if (video_driver_pix_fmt == RETRO_PIXEL_FORMAT_XRGB8888)
   {
      raw_output_size      = vp_width * vp_height * 4 * sizeof(uint8_t);
      raw_output_data      = (uint8_t*)malloc(raw_output_size);
      scaler->out_fmt      = SCALER_FMT_ARGB8888;
      scaler->out_stride   = vp_width * 4;
      pitch                = vp_width * 4;
   }
   else
   {
      raw_output_size      = vp_width * vp_height * 2 * sizeof(uint8_t);
      raw_output_data      = (uint8_t*)malloc(raw_output_size);
      scaler->out_fmt      = SCALER_FMT_RGB565;
      scaler->out_stride   = vp_width * 1;
      pitch                = vp_width * 2;
   }

   if (!raw_output_data)
      goto finish;

   scaler->in_fmt        = SCALER_FMT_BGR24;
   scaler->in_width      = width;
   scaler->in_height     = height;
   scaler->out_width     = vp_width;
   scaler->out_height    = vp_height;
   scaler->scaler_type   = SCALER_TYPE_POINT;
   scaler_ctx_gen_filter(scaler);

   scaler->in_stride     = -1 * vp_width * 3;

   scaler_ctx_scale_direct(
         scaler, raw_output_data,
         (uint8_t*)image_body + (height - 1) * width * 3);
   video_driver_frame(raw_output_data, width, height, pitch);

finish:
   if (image_body)
      free(image_body);
   if (scaler)
      free(scaler);
   if (raw_output_data)
      free(raw_output_data);
}

/**
 * Parses image data received by the server following a translation request.
 * This method assumes that image data is present in the response, it cannot
 * be null. If widgets are supported, this method will prefer using them to
 * overlay the picture on top of the video, otherwise it will try to write the
 * data directly into the frame buffer, which is much less reliable.
 */
static void translation_response_image_hndl(retro_task_t *task)
{
   /*
    * TODO/FIXME: Moved processing to the callback to fix an issue with
    * texture loading off the main thread in OpenGL. I'm leaving the original
    * structure here so we can move back to the handler if it becomes possible
    * in the future.
    */
   task_set_finished(task, true);
}

/**
 * Callback invoked once the image data received from the server has been
 * processed and eventually displayed. This is necessary to ensure that the
 * next automatic request will be invoked once the task is finished.
 */
static void translation_response_image_cb(
      retro_task_t *task, void *task_data, void *user_data, const char *error)
{
   settings_t* settings          = config_get_ptr();
   access_state_t *access_st     = access_state_get_ptr();

   enum image_type_enum image_type;
   access_response_t *response      = (access_response_t*)task->user_data;
   video_driver_state_t *video_st   = video_state_get_ptr();

   if (task_get_cancelled(task) || response->image_size < 4)
      goto finish;

   if (     response->image[0] == 'B'
         && response->image[1] == 'M')
      image_type = IMAGE_TYPE_BMP;
#ifdef HAVE_RPNG
   else if (response->image[1] == 'P'
         && response->image[2] == 'N'
         && response->image[3] == 'G')
      image_type = IMAGE_TYPE_PNG;
#endif
   else
   {
      translation_user_error("Service returned an unsupported image type.");
      translation_release(true);
      goto finish;
   }

#ifdef HAVE_GFX_WIDGETS
   if (     video_st->poke
         && video_st->poke->load_texture
         && video_st->poke->unload_texture)
      translation_response_image_widget(
            response->image, response->image_size, &image_type);
   else
#endif
      translation_response_image_direct(
            response->image, response->image_size, &image_type);

finish:
   free(response->image);
   free(response);

   if (access_st->ai_service_auto != 0)
      call_auto_translate_task(settings);
}

/**
 * Processes text data received by the server following a translation request.
 * Does nothing if the response does not contain any text data (NULL). Text
 * is either forcibly read by the narrator, even if it is disabled in the
 * front-end (Narrator Mode) or displayed on screen (in Text Mode). In the
 * later, it will only be read if the front-end narrator is enabled.
 */
static void translation_response_text(access_response_t *response)
{
   settings_t *settings       = config_get_ptr();
   unsigned service_mode      = settings->uints.ai_service_mode;
   access_state_t *access_st  = access_state_get_ptr();

   if (     (!response->text || string_is_empty(response->text))
         && (service_mode == 2 || service_mode == 3 || service_mode == 4)
         && access_st->ai_service_auto == 0)
   {
      translation_hash_info(MSG_AI_NOTHING_TO_TRANSLATE);
      return;
   }

   if (response->text)
   {
      /* The text should be displayed on screen in Text or Text+Narrator mode */
      if (service_mode == 3 || service_mode == 4)
      {
#ifdef HAVE_GFX_WIDGETS
         if (settings->bools.menu_enable_widgets)
         {
            dispgfx_widget_t *p_dispwidget = dispwidget_get_ptr();

            if (p_dispwidget->ai_service_overlay_state == 1)
               gfx_widgets_ai_service_overlay_unload();

            strlcpy(p_dispwidget->ai_service_text, response->text, 255);

            if (response->text_position > 0)
               p_dispwidget->ai_service_text_position
                     = (unsigned)response->text_position;
            else
               p_dispwidget->ai_service_text_position = 0;

            p_dispwidget->ai_service_overlay_state = 1;
         }
         else
         {
#endif
            /*
             * TODO/FIXME: Obviously this will not be as good as using widgets,
             * since messages run on a timer but it's an alternative at least.
             * Maybe split the message here so it fits the viewport.
             */
            runloop_msg_queue_push(
                  response->text, 2, 180,
                  true, NULL, MESSAGE_QUEUE_ICON_DEFAULT,
                  MESSAGE_QUEUE_CATEGORY_INFO);

#ifdef HAVE_GFX_WIDGETS
         }
#endif
      }
      translation_speak(&response->text[0]);
      free(response->text);
   }
}

/**
 * Processes audio data received by the server following a translation request.
 * Does nothing if the response does not contain any audio data (NULL). Audio
 * data is simply played as soon as possible using the audio driver.
 */
static void translation_response_sound(access_response_t *response)
{
#ifdef HAVE_AUDIOMIXER
   if (response->sound)
   {
      audio_mixer_stream_params_t params;

      params.volume               = 1.0f;
      /* user->slot_selection_type; */
      params.slot_selection_type  = AUDIO_MIXER_SLOT_SELECTION_MANUAL;
      params.slot_selection_idx   = 10;
      /* user->stream_type; */
      params.stream_type          = AUDIO_STREAM_TYPE_SYSTEM;
      params.type                 = AUDIO_MIXER_TYPE_WAV;
      params.state                = AUDIO_STREAM_STATE_PLAYING;
      params.buf                  = response->sound;
      params.bufsize              = response->sound_size;
      params.cb                   = NULL;
      params.basename             = NULL;

      audio_driver_mixer_add_stream(&params);
      free(response->sound);
   }
#endif
}

/**
 * Processes input data received by the server following a translation request.
 * Does nothing if the response does not contain any input data (NULL). This
 * method will try to forcibly press all the retropad keys listed in the input
 * string (comma-separated).
 */
static void translation_response_input(access_response_t *response)
{
   if (response->input)
   {
#ifdef HAVE_ACCESSIBILITY
      input_driver_state_t *input_st   = input_state_get_ptr();
#endif
      int length                       = strlen(response->input);
      char *token                      = strtok(response->input, ",");

      while (token)
      {
         if (string_is_equal(token, "pause"))
            command_event(CMD_EVENT_PAUSE, NULL);
         else if (string_is_equal(token, "unpause"))
            command_event(CMD_EVENT_UNPAUSE, NULL);
#ifdef HAVE_ACCESSIBILITY
         else
         {
            unsigned i      = 0;
            bool found = false;

            for (; i < ARRAY_SIZE(ACCESS_INPUT_LABELS) && !found; i++)
               found = string_is_equal(ACCESS_INPUT_LABELS[i], response->input);

            if (found)
               input_st->ai_gamepad_state[i] = 2;
         }
#endif
         token = strtok(NULL, ",");
      }
      free(response->input);
   }
}

/**
 * Callback invoked when the server responds to our translation request. If the
 * service is still running by then, this method will parse the JSON payload
 * and process the data, eventually re-invoking the translation service for
 * a new request if the server allowed automatic translation.
 */
static void translation_response_cb(
      retro_task_t *task, void *task_data, void *user_data, const char *error)
{
   http_transfer_data_t *data       = (http_transfer_data_t*)task_data;
   access_state_t *access_st        = access_state_get_ptr();
   settings_t *settings             = config_get_ptr();
   access_response_t *response      = NULL;
   bool auto_mode_prev              = access_st->ai_service_auto;
   unsigned service_mode            = settings->uints.ai_service_mode;

   /* We asked the service to stop by calling translation_release, so bail */
   if (!access_st->last_image)
      goto finish;
   if (translation_user_error(error))
      goto abort;
   if (!(response = parse_response_json(data)))
      goto abort;
   if (translation_user_error(response->error))
      goto abort;

   access_st->ai_service_auto = (response->recall == NULL) ? 0 : 1;
   if (auto_mode_prev != access_st->ai_service_auto)
      translation_hash_info(auto_mode_prev
            ? MSG_AI_AUTO_MODE_DISABLED : MSG_AI_AUTO_MODE_ENABLED);

   /*
    * We want to skip the data on auto=continue, unless automatic translation
    * has just been enabled, meaning data must be displayed again to the user.
    */
   if (     !string_is_equal(response->recall, "continue")
         || (auto_mode_prev == 0 && access_st->ai_service_auto == 1))
   {
#ifdef HAVE_GFX_WIDGETS
      dispgfx_widget_t *p_dispwidget = dispwidget_get_ptr();
      if (p_dispwidget->ai_service_overlay_state != 0)
         gfx_widgets_ai_service_overlay_unload();
#endif
      translation_response_text(response);
      translation_response_sound(response);
      translation_response_input(response);

      if (response->image)
      {
         retro_task_t *task = task_init();
         if (!task)
            goto finish;

         task->handler              = translation_response_image_hndl;
         task->callback             = translation_response_image_cb;
         task->user_data            = response;
         task->mute                 = true;
         access_st->response_task   = task;
         task_queue_push(task);

         /* Leave memory clean-up and auto callback to the task itself */
         return;
      }
      else if (access_st->ai_service_auto == 0
            && (service_mode == 0 || service_mode == 5))
         translation_hash_info(MSG_AI_NOTHING_TO_TRANSLATE);
   }
   goto finish;

abort:
   translation_release(true);
   if (response && response->error)
      free(response->error);

finish:
   if (response)
   {
      if (response->image)
         free(response->image);
      if (response->recall)
         free(response->recall);
      free(response);

      if (access_st->ai_service_auto != 0)
         call_auto_translate_task(settings);
   }
}

/* REQUEST ------------------------------------------------------------------ */
/* -------------------------------------------------------------------------- */

/**
 * Grabs and returns a frame from the video driver. If the frame buffer cannot
 * be accessed, this method will try to obtain a capture of the viewport as a
 * fallback, although this frame may be altered by any filter or shader enabled
 * by the user. Returns null if both methods fail.
 */
static access_frame_t* translation_grab_frame(void)
{
   size_t pitch;
   struct video_viewport vp               = {0};
   const void *data                       = NULL;
   uint8_t *bit24_image_prev              = NULL;
   struct scaler_ctx *scaler              = NULL;
   access_frame_t *frame                  = NULL;
   video_driver_state_t *video_st         = video_state_get_ptr();
   const enum retro_pixel_format pix_fmt  = video_st->pix_fmt;

   if (!(scaler = (struct scaler_ctx*)calloc(1, sizeof(struct scaler_ctx))))
      goto finish;
   if (!(frame = (access_frame_t*)malloc(sizeof(access_frame_t))))
      goto finish;

   data           = video_st->frame_cache_data;
   frame->data    = NULL;
   frame->width   = video_st->frame_cache_width;
   frame->height  = video_st->frame_cache_height;
   pitch          = video_st->frame_cache_pitch;

   if (!data)
      goto finish;

   video_driver_get_viewport_info(&vp);
   if (!vp.width || !vp.height)
      goto finish;

   frame->content_x        = vp.x;
   frame->content_y        = vp.y;
   frame->content_width    = vp.width;
   frame->content_height   = vp.height;
   frame->viewport_width   = vp.full_width;
   frame->viewport_height  = vp.full_height;
   frame->size             = frame->width * frame->height * 3;

   if (!(frame->data = (uint8_t*)malloc(frame->size)))
      goto finish;

   if (data == RETRO_HW_FRAME_BUFFER_VALID)
   {
      /* Direct frame capture failed, fallback on viewport capture */
      if (!(bit24_image_prev = (uint8_t*)malloc(vp.width * vp.height * 3)))
         goto finish;

      if (!(      video_st->current_video->read_viewport
               && video_st->current_video->read_viewport(
                  video_st->data, bit24_image_prev, false)))
      {
         translation_user_error("Could not read viewport.");
         translation_release(true);
         goto finish;
      }

      /* TODO: Rescale down to regular resolution */
      scaler->in_fmt      = SCALER_FMT_BGR24;
      scaler->out_fmt     = SCALER_FMT_BGR24;
      scaler->scaler_type = SCALER_TYPE_POINT;
      scaler->in_width    = vp.width;
      scaler->in_height   = vp.height;
      scaler->out_width   = frame->width;
      scaler->out_height  = frame->height;
      scaler_ctx_gen_filter(scaler);

      scaler->in_stride   = vp.width * 3;
      scaler->out_stride  = frame->width * 3;
      scaler_ctx_scale_direct(scaler, frame->data, bit24_image_prev);
   }
   else
   {
      /* This is a software core, so just change the pixel format to 24-bit */
      if (pix_fmt == RETRO_PIXEL_FORMAT_XRGB8888)
         scaler->in_fmt = SCALER_FMT_ARGB8888;
      else
         scaler->in_fmt = SCALER_FMT_RGB565;

      video_frame_convert_to_bgr24(
         scaler, frame->data, (const uint8_t*)data,
         frame->width, frame->height, (int)pitch);
   }
   scaler_ctx_gen_reset(scaler);

finish:
   if (bit24_image_prev)
      free(bit24_image_prev);
   if (scaler)
      free(scaler);

   if (frame)
   {
      if (frame->data)
         return frame;
      free(frame);
   }
   return NULL;
}

/**
 * Returns true if the {frame} passed in parameter is a duplicate of the last
 * frame the service was invoked on. This method effectively helps to prevent
 * the service from spamming the server with the same request over and over
 * again when running in automatic mode. This method will also save the image
 * in the {frame} structure as the new last image for the service.
 */
static bool translation_dupe_fail(access_frame_t *frame)
{
   access_state_t *access_st  = access_state_get_ptr();
   bool size_equal            = (frame->size == access_st->last_image_size);
   bool has_failed            = false;

#ifdef HAVE_THREADS
   slock_lock(access_st->image_lock);
#endif
   if (access_st->last_image && access_st->ai_service_auto != 0)
   {
      if (     size_equal
            && u8_array_equal(frame->data, access_st->last_image, frame->size))
         has_failed = true;
   }

   /* Init last image or reset buffer size if image size changed */
   if (!has_failed && (!access_st->last_image || !size_equal))
   {
      if (access_st->last_image)
         free(access_st->last_image);

      access_st->last_image_size  = frame->size;
      if (!(access_st->last_image = (uint8_t*)malloc(frame->size)))
         has_failed = true;
   }

   if (!has_failed)
      memcpy(access_st->last_image, frame->data, frame->size);

#ifdef HAVE_THREADS
   slock_unlock(access_st->image_lock);
#endif
   return has_failed;
}

/**
 * Converts and returns the {frame} as a base64 encoded PNG or BMP. The
 * selected image type will be available in the returned object, and will
 * favor PNG if possible. Returns NULL on failure.
 */
static access_base64_t* translation_frame_encode(access_frame_t *frame)
{
   uint8_t header[54];
   uint8_t *buffer         = NULL;
   uint64_t bytes          = 0;
   access_base64_t *encode = NULL;

   if (!(encode = (access_base64_t*)malloc(sizeof(access_base64_t))))
      goto finish;

#ifdef HAVE_RPNG
   strcpy(encode->format, "png");
   buffer = rpng_save_image_bgr24_string(
         frame->data, frame->width, frame->height,
         frame->width * 3, &bytes);
#else
   strcpy(encode->format, "bmp");
   form_bmp_header(header, frame->width, frame->height, false);
   if (!(buffer = (uint8_t*)malloc(frame->size + 54)))
      goto finish;

   memcpy(buffer, header, 54 * sizeof(uint8_t));
   memcpy(buffer + 54, frame->data, frame->size * sizeof(uint8_t));
   bytes = sizeof(uint8_t) * (frame->size + 54);
#endif

   encode->data = base64(
         (void*)buffer, (int)(bytes * sizeof(uint8_t)), &encode->length);

finish:
   if (buffer)
      free(buffer);

   if (encode->data)
      return encode;
   else
      free(encode);

   return NULL;
}

/**
 * Returns a newly allocated string describing the content and core currently
 * running. The string will contains the name of the core (or 'core') followed
 * by a double underscore (_) and the name of the content. Returns NULL on
 * failure.
 */
static char* translation_get_content_label(void)
{
   const char *label                 = NULL;
   char* system_label                = NULL;
   core_info_t *core_info            = NULL;

   core_info_get_current_core(&core_info);
   if (core_info)
   {
      const struct playlist_entry *entry  = NULL;
      playlist_t *current_playlist        = playlist_get_cached();
      const char *system_id;
      size_t system_id_len;
      size_t label_len;

      system_id      = (core_info->system_id) ? core_info->system_id : "core";
      system_id_len  = strlen(system_id);

      if (current_playlist)
      {
         playlist_get_index_by_path(
               current_playlist, path_get(RARCH_PATH_CONTENT), &entry);

         if (entry && !string_is_empty(entry->label))
            label = entry->label;
      }

      if (!label)
         label = path_basename(path_get(RARCH_PATH_BASENAME));

      label_len          = strlen(label);
      if (!(system_label = (char*)malloc(label_len + system_id_len + 3)))
         return NULL;

      memcpy(system_label, system_id, system_id_len);
      memcpy(system_label + system_id_len, "__", 2);
      memcpy(system_label + 2 + system_id_len, label, label_len);
      system_label[system_id_len + 2 + label_len] = '\0';
   }

   return system_label;
}

/**
 * Creates and returns a JSON writer containing the payload to send alongside
 * the translation request. {label} may be NULL, in which case no label will
 * be supplied in the JSON. Returns NULL if the writer cannot be initialized.
 */
static rjsonwriter_t* build_request_json(
      access_base64_t *image, access_request_t *request,
      access_frame_t *frame, char *label)
{
   unsigned i;
   rjsonwriter_t* writer = NULL;

   if (!(writer = rjsonwriter_open_memory()))
      return NULL;

   rjsonwriter_add_start_object(writer);
   {
      rjsonwriter_add_string(writer, "image");
      rjsonwriter_add_colon(writer);
      rjsonwriter_add_string_len(writer, image->data, image->length);

      rjsonwriter_add_comma(writer);
      rjsonwriter_add_string(writer, "format");
      rjsonwriter_add_colon(writer);
      rjsonwriter_add_string(writer, image->format);

      rjsonwriter_add_comma(writer);
      rjsonwriter_add_string(writer, "coords");
      rjsonwriter_add_colon(writer);
      rjsonwriter_add_start_array(writer);
      {
         rjsonwriter_add_unsigned(writer, frame->content_x);
         rjsonwriter_add_comma(writer);
         rjsonwriter_add_unsigned(writer, frame->content_y);
         rjsonwriter_add_comma(writer);
         rjsonwriter_add_unsigned(writer, frame->content_width);
         rjsonwriter_add_comma(writer);
         rjsonwriter_add_unsigned(writer, frame->content_height);
      }
      rjsonwriter_add_end_array(writer);

      rjsonwriter_add_comma(writer);
      rjsonwriter_add_string(writer, "viewport");
      rjsonwriter_add_colon(writer);
      rjsonwriter_add_start_array(writer);
      {
         rjsonwriter_add_unsigned(writer, frame->viewport_width);
         rjsonwriter_add_comma(writer);
         rjsonwriter_add_unsigned(writer, frame->viewport_height);
      }
      rjsonwriter_add_end_array(writer);

      if (label)
      {
         rjsonwriter_add_comma(writer);
         rjsonwriter_add_string(writer, "label");
         rjsonwriter_add_colon(writer);
         rjsonwriter_add_string(writer, label);
      }

      rjsonwriter_add_comma(writer);
      rjsonwriter_add_string(writer, "state");
      rjsonwriter_add_colon(writer);
      rjsonwriter_add_start_object(writer);
      {
         rjsonwriter_add_string(writer, "paused");
         rjsonwriter_add_colon(writer);
         rjsonwriter_add_unsigned(writer, (request->paused ? 1 : 0));

         for (i = 0; i < ARRAY_SIZE(ACCESS_INPUT_LABELS); i++)
         {
            rjsonwriter_add_comma(writer);
            rjsonwriter_add_string(writer, ACCESS_INPUT_LABELS[i]);
            rjsonwriter_add_colon(writer);
            rjsonwriter_add_unsigned(writer, request->inputs[i]);
         }
         rjsonwriter_add_end_object(writer);
      }
      rjsonwriter_add_end_object(writer);
   }

   return writer;
}

/**
 * Writes in the provided {buffer} the URL for the translation request. The
 * buffer is guaranteed to contain the server URL as well as an 'output' param
 * specifying the accepted data types for this service.
 */
static void build_request_url(char *buffer, size_t length, settings_t *settings)
{
   char token[2];
   size_t _len;
   bool poke_supported              = false;
   unsigned service_source_lang     = settings->uints.ai_service_source_lang;
   unsigned service_target_lang     = settings->uints.ai_service_target_lang;
   const char *service_url          = settings->arrays.ai_service_url;
   unsigned ai_service_mode         = settings->uints.ai_service_mode;
#ifdef HAVE_GFX_WIDGETS
   video_driver_state_t *video_st   = video_state_get_ptr();
   poke_supported                   = video_st->poke
                                   && video_st->poke->load_texture
                                   && video_st->poke->unload_texture;
#endif

   token[1] = '\0';
   if (strrchr(service_url, '?'))
      token[0] = '&';
   else
      token[0] = '?';

   _len = strlcpy(buffer, service_url, length);
   buffer += _len ;
   length -= _len;

   if (service_source_lang != TRANSLATION_LANG_DONT_CARE)
   {
      const char *lang_source
            = ai_service_get_str((enum translation_lang)service_source_lang);

      if (!string_is_empty(lang_source))
      {
         _len = strlcpy(buffer, token, length);
         buffer += _len;
         length -= _len;

         _len = strlcpy(buffer, "source_lang=", length);
         buffer += _len;
         length -= _len;

         _len = strlcpy(buffer, lang_source, length);
         buffer += _len;
         length -= _len;
         token[0] = '&';
      }
   }

   if (service_target_lang != TRANSLATION_LANG_DONT_CARE)
   {
      const char *lang_target
            = ai_service_get_str((enum translation_lang)service_target_lang);

      if (!string_is_empty(lang_target))
      {
         _len = strlcpy(buffer, token, length);
         buffer += _len;
         length -= _len;

         _len = strlcpy(buffer, "target_lang=", length);
         buffer += _len;
         length -= _len;

         _len = strlcpy(buffer, lang_target, length);
         buffer += _len;
         length -= _len;
         token[0] = '&';
      }
   }

   _len = strlcpy(buffer, token, length);
   buffer += _len;
   length -= _len;

   _len = strlcpy(buffer, "output=", length);
   buffer += _len;
   length -= _len;

   switch (ai_service_mode)
   {
      case 0: /* Image Mode */
         _len = strlcpy(buffer, "image,bmp", length);
         buffer += _len;
         length -= _len;
#ifdef HAVE_RPNG
         _len = strlcpy(buffer, ",png", length);
         buffer += _len;
         length -= _len;
         if (poke_supported)
         {
            strlcpy(buffer, ",png-a", length);
            buffer += _len;
            length -= _len;
         }
#endif
         break;

      case 1: /* Speech Mode */
         _len = strlcpy(buffer, "sound,wav", length);
         buffer += _len;
         length -= _len;
         break;

      case 2: /* Narrator Mode */
         _len = strlcpy(buffer, "text", length);
         buffer += _len;
         length -= _len;
         break;

      case 3: /* Text Mode */
      case 4: /* Text + Narrator */
         _len = strlcpy(buffer, "text,subs", length);
         buffer += _len;
         length -= _len;
         break;

      case 5: /* Image + Narrator */
         _len = strlcpy(buffer, "text,image,bmp", length);
         buffer += _len;
         length -= _len;
#ifdef HAVE_RPNG
         _len = strlcpy(buffer, ",png", length);
         buffer += _len;
         length -= _len;
         if (poke_supported)
         {
            _len = strlcpy(buffer, ",png-a", length);
            buffer += _len;
            length -= _len;
         }
#endif
         break;
   }
}

/**
 * Captures a frame from the currently running core and sends a request to the
 * translation server. Processing and encoding this data comes with a cost, so
 * it is offloaded to the task thread.
 */
static void translation_request_hndl(retro_task_t *task)
{
   access_request_t *request     = (access_request_t*)task->user_data;
   settings_t *settings          = config_get_ptr();
   access_state_t *access_st     = access_state_get_ptr();
   access_frame_t *frame         = NULL;
   access_base64_t *encode       = NULL;
   char *label                   = NULL;
   rjsonwriter_t *writer         = NULL;
   const char *json              = NULL;
   bool sent                     = false;
   char url[PATH_MAX_LENGTH];

   if (task_get_cancelled(task))
      goto finish;

   access_st->last_call = cpu_features_get_time_usec();

   frame = translation_grab_frame();
   if (task_get_cancelled(task) || !frame)
      goto finish;

   if (translation_dupe_fail(frame))
      goto finish;

   encode = translation_frame_encode(frame);
   if (task_get_cancelled(task) || !encode)
      goto finish;

   label  = translation_get_content_label();
   writer = build_request_json(encode, request, frame, label);
   if (task_get_cancelled(task) || !writer)
      goto finish;

   json = rjsonwriter_get_memory_buffer(writer, NULL);
   build_request_url(url, PATH_MAX_LENGTH, settings);
   if (task_get_cancelled(task) || !json)
      goto finish;

#ifdef DEBUG
   if (access_st->ai_service_auto == 0)
      RARCH_LOG("[Translate]: Sending request to: %s\n", url);
#endif
   sent = true;
   task_push_http_post_transfer(
         url, json, true, NULL, translation_response_cb, NULL);

finish:
   task_set_finished(task, true);

   if (frame)
   {
      if (frame->data)
         free(frame->data);
      free(frame);
   }
   if (encode)
   {
      if (encode->data)
         free(encode->data);
      free(encode);
   }
   if (label)
      free(label);
   if (writer)
      rjsonwriter_free(writer);
   if (request)
   {
      if (request->inputs)
         free(request->inputs);
      free(request);
   }

   /* Plan next auto-request if this one was skipped */
   if (!sent && access_st->ai_service_auto != 0)
      call_auto_translate_task(settings);
}

/**
 * Invokes the translation service. Captures a frame from the current content
 * core and sends it over HTTP to the translation server. Once the server
 * responds, the translation data is displayed accordingly to the preferences
 * of the user. Returns true if the request could be built and sent.
 */
bool run_translation_service(settings_t *settings, bool paused)
{
   unsigned i;
   retro_task_t *task               = NULL;
   access_request_t *request        = NULL;
   access_state_t *access_st        = access_state_get_ptr();
#ifdef HAVE_ACCESSIBILITY
   input_driver_state_t *input_st   = input_state_get_ptr();
#endif

   if (!(request = (access_request_t*)malloc(sizeof(access_request_t))))
      goto failure;

#ifdef HAVE_THREADS
   if (!access_st->image_lock)
   {
      if (!(access_st->image_lock = slock_new()))
         goto failure;
   }
#endif

   task = task_init();
   if (!task)
      goto failure;

   /* Freeze frontend state while we're still running on the main thread */
   request->paused = paused;
   request->inputs = (char*)malloc(
         sizeof(char) * ARRAY_SIZE(ACCESS_INPUT_LABELS));

#ifdef HAVE_ACCESSIBILITY
   for (i = 0; i < ARRAY_SIZE(ACCESS_INPUT_LABELS); i++)
      request->inputs[i] = input_st->ai_gamepad_state[i] ? 1 : 0;
#endif

   task->handler           = translation_request_hndl;
   task->user_data         = request;
   task->mute              = true;
   access_st->request_task = task;
   task_queue_push(task);

   return true;

failure:
   if (request)
      free(request);

   return false;
}