diff --git a/accessibility.h b/accessibility.h index df965df2bf..f52e6f4676 100644 --- a/accessibility.h +++ b/accessibility.h @@ -31,11 +31,36 @@ #endif #include "configuration.h" +#include "tasks/tasks_internal.h" + +#ifdef HAVE_THREADS +#include "rthreads/rthreads.h" +#endif typedef struct { + /* The last request task, used to prepare and send the translation */ + retro_task_t *request_task; + + /* The last response task, used to parse costly translation data */ + retro_task_t *response_task; + + /* Timestamp of the last translation request */ + retro_time_t last_call; + + #ifdef HAVE_THREADS + /* Necessary because last_image is manipulated by task handlers */ + slock_t *image_lock; + #endif + + /* Frame captured during the last call to the translation service */ + uint8_t *last_image; + int last_image_size; + + /* 1 if the automatic mode has been enabled, 0 otherwise */ int ai_service_auto; - /* Is text-to-speech accessibility turned on? */ + + /* Text-to-speech narrator override flag */ bool enabled; } access_state_t; @@ -46,42 +71,73 @@ bool is_narrator_running(bool accessibility_enable); #endif /* - This function does all the stuff needed to translate the game screen, - using the URL given in the settings. Once the image from the frame - buffer is sent to the server, the callback will write the translated - image to the screen. + Invoke this method to send a request to the AI service. + It makes the following POST request using URL params: + – source_lang (optional): language code of the content currently running. + – target_lang (optional): language of the content to return. + – output: comma-separated list of formats that must be provided by the + service. Also lists supported sub-formats. + + The currently supported formats are: + – sound: raw audio to playback. (wav) + – text: text to be read through internal text-to-speech capabilities. + 'subs' can be specified on top of that to explain that we are looking + for short text response in the manner of subtitles. + – image: image to display on top of the video feed. Widgets will be used + first if possible, otherwise we'll try to draw it directly on the + video buffer. (bmp, png, png-a) [All in 24-bits BGR formats] + + In addition, the request contains a JSON payload, formatted as such: + – image: captured frame from the currently running content (in base64). + – format: format of the captured frame ("png", or "bmp"). + – coords: array describing the coordinates of the image within the + viewport space (x, y, width, height). + – viewport: array describing the size of the viewport (width, height). + – label: a text string describing the content (__). + – state: a JSON object describing the state of the frontend, containing: + – paused: 1 if the content has been paused, 0 otherwise. + – : the name of a retropad input, valued 1 if pressed. + (a, b, x, y, l, r, l2, r2, l3, r3) + (up, down, left, right, start, select) + + The translation component then expects a response from the AI service in the + form of a JSON payload, formatted as such: + – image: base64 representation of an image in a supported format. + – sound: base64 representation of a sound byte in a supported format. + – text: results from the service as a string. + – text_position: hint for the position of the text when the service is + running in text mode (ie subtitles). Position is a number, + 1 for Bottom or 2 for Top (defaults to bottom). + – press: a list of retropad input to forcibly press. On top of the + expected keys (cf. 'state' above) values 'pause' and 'unpause' can be + specified to control the flow of the content. + – error: any error encountered with the request. + – auto: either 'auto' or 'continue' to control automatic requests. + + All fields are optional, but at least one of them must be present. + If 'error' is set, the error is shown to the user and everything else is + ignored, even 'auto' settings. + + With 'auto' on 'auto', RetroArch will automatically send a new request + (with a minimum delay enforced by uints.ai_service_poll_delay), with a value + of 'continue', RetroArch will ignore the returned content and skip to the + next automatic request. This allows the service to specify that the returned + content is the same as the one previously sent, so RetroArch does not need to + update its display unless necessary. With 'continue' the service *must* + still send the content, as we may need to display it if the user paused the + AI service for instance. - Supported client/services (thus far) - -VGTranslate client ( www.gitlab.com/spherebeaker/vg_translate ) - -Ztranslate client/service ( www.ztranslate.net/docs/service ) - - To use a client, download the relevant code/release, configure - them, and run them on your local machine, or network. Set the - retroarch configuration to point to your local client (usually - listening on localhost:4404 ) and enable translation service. - - If you don't want to run a client, you can also use a service, - which is basically like someone running a client for you. The - downside here is that your retroarch device will have to have - an internet connection, and you may have to sign up for it. - - To make your own server, it must listen for a POST request, which - will consist of a JSON body, with the "image" field as a base64 - encoded string of a 24bit-BMP/PNG that the will be translated. - The server must output the translated image in the form of a - JSON body, with the "image" field also as a base64 encoded - 24bit-BMP, or as an alpha channel png. - - "paused" boolean is passed in to indicate if the current call - was made during a paused frame. Due to how the menu widgets work, - if the ai service is called in "auto" mode, then this call will - be made while the menu widgets unpause the core for a frame to update - the on-screen widgets. To tell the ai service what the pause - mode is honestly, we store the runloop_paused variable from before - the handle_translation_cb wipes the widgets, and pass that in here. + {paused} boolean is passed in to indicate if the current call was made + during a paused frame. Due to how the menu widgets work, if the AI service + is called in 'auto' mode, then this call will be made while the menu widgets + unpause the core for a frame to update the on-screen widgets. To tell the AI + service what the pause mode is honestly, we store the runloop_paused + variable from before the service wipes the widgets, and pass that in here. */ bool run_translation_service(settings_t *settings, bool paused); +void translation_release(bool inform); + bool accessibility_speak_priority( bool accessibility_enable, unsigned accessibility_narrator_speech_speed, diff --git a/config.def.h b/config.def.h index c6294a5fc0..1ebbb626f1 100644 --- a/config.def.h +++ b/config.def.h @@ -1749,8 +1749,14 @@ #define DEFAULT_AI_SERVICE_MODE 1 +#define DEFAULT_AI_SERVICE_TEXT_POSITION 0 +#define DEFAULT_AI_SERVICE_TEXT_PADDING 5 + #define DEFAULT_AI_SERVICE_URL "http://localhost:4404/" +#define DEFAULT_AI_SERVICE_POLL_DELAY 0 +#define MAXIMUM_AI_SERVICE_POLL_DELAY 500 + #if defined(HAVE_FFMPEG) || defined(HAVE_MPV) #define DEFAULT_BUILTIN_MEDIAPLAYER_ENABLE true #else diff --git a/configuration.c b/configuration.c index 1bb55826bd..6448f23087 100644 --- a/configuration.c +++ b/configuration.c @@ -2477,11 +2477,13 @@ static struct config_uint_setting *populate_settings_uint( SETTING_UINT("cheevos_appearance_anchor", &settings->uints.cheevos_appearance_anchor, true, DEFAULT_CHEEVOS_APPEARANCE_ANCHOR, false); SETTING_UINT("cheevos_visibility_summary", &settings->uints.cheevos_visibility_summary, true, DEFAULT_CHEEVOS_VISIBILITY_SUMMARY, false); #endif - SETTING_UINT("accessibility_narrator_speech_speed", &settings->uints.accessibility_narrator_speech_speed, true, DEFAULT_ACCESSIBILITY_NARRATOR_SPEECH_SPEED, false); - SETTING_UINT("ai_service_mode", &settings->uints.ai_service_mode, true, DEFAULT_AI_SERVICE_MODE, false); - SETTING_UINT("ai_service_target_lang", &settings->uints.ai_service_target_lang, true, 0, false); - SETTING_UINT("ai_service_source_lang", &settings->uints.ai_service_source_lang, true, 0, false); + SETTING_UINT("ai_service_mode", &settings->uints.ai_service_mode, true, DEFAULT_AI_SERVICE_MODE, false); + SETTING_UINT("ai_service_target_lang", &settings->uints.ai_service_target_lang, true, 0, false); + SETTING_UINT("ai_service_source_lang", &settings->uints.ai_service_source_lang, true, 0, false); + SETTING_UINT("ai_service_poll_delay", &settings->uints.ai_service_poll_delay, true, DEFAULT_AI_SERVICE_POLL_DELAY, false); + SETTING_UINT("ai_service_text_position", &settings->uints.ai_service_text_position, true, DEFAULT_AI_SERVICE_TEXT_POSITION, false); + SETTING_UINT("ai_service_text_padding", &settings->uints.ai_service_text_padding, true, DEFAULT_AI_SERVICE_TEXT_PADDING, false); #ifdef HAVE_LIBNX SETTING_UINT("libnx_overclock", &settings->uints.libnx_overclock, true, SWITCH_DEFAULT_CPU_PROFILE, false); diff --git a/configuration.h b/configuration.h index b951533465..a44ae3fbcc 100644 --- a/configuration.h +++ b/configuration.h @@ -334,6 +334,9 @@ typedef struct settings unsigned ai_service_mode; unsigned ai_service_target_lang; unsigned ai_service_source_lang; + unsigned ai_service_poll_delay; + unsigned ai_service_text_position; + unsigned ai_service_text_padding; unsigned core_updater_auto_backup_history_size; unsigned video_black_frame_insertion; diff --git a/frontend/drivers/platform_win32.c b/frontend/drivers/platform_win32.c index 3c50c8205d..ffccd7c8c2 100644 --- a/frontend/drivers/platform_win32.c +++ b/frontend/drivers/platform_win32.c @@ -1064,9 +1064,12 @@ static bool accessibility_speak_windows(int speed, if (!wc || res != 0) { RARCH_ERR("Error communicating with NVDA\n"); + /* Fallback on powershell immediately and retry */ + g_plat_win32_flags &= ~PLAT_WIN32_FLAG_USE_NVDA; + g_plat_win32_flags |= PLAT_WIN32_FLAG_USE_POWERSHELL; if (wc) free(wc); - return false; + return accessibility_speak_windows(speed, speak_text, priority); } nvdaController_cancelSpeech_func(); diff --git a/gfx/gfx_widgets.c b/gfx/gfx_widgets.c index 989decd85f..453dddc663 100644 --- a/gfx/gfx_widgets.c +++ b/gfx/gfx_widgets.c @@ -1471,6 +1471,67 @@ static void INLINE gfx_widgets_font_unbind(gfx_widget_font_data_t *font_data) font_driver_bind_block(font_data->font, NULL); } +#ifdef HAVE_TRANSLATE +static void gfx_widgets_ai_line( + video_frame_info_t *video, char *line, int line_idx, int line_total) +{ + settings_t *settings = config_get_ptr(); + gfx_display_t *p_disp = (gfx_display_t*)video->disp_userdata; + dispgfx_widget_t *p_widget = (dispgfx_widget_t*)video->widgets_userdata; + void *userdata = video->userdata; + unsigned video_width = video->width; + unsigned video_height = video->height; + + int line_width = font_driver_get_message_width( + p_widget->gfx_widget_fonts.regular.font, + line, strlen(line), 1.0f); + + int hpadding = p_widget->simple_widget_padding; + int vpadding = settings->uints.ai_service_text_padding; + int half_vw = video_width * 0.5f; + int block_width = line_width + hpadding * 2; + int block_height = p_widget->simple_widget_height; + int block_x = half_vw - block_width * 0.5f; + int block_y = 0; + int line_y = 0; + + int position = (settings->uints.ai_service_text_position > 0) + ? settings->uints.ai_service_text_position + : p_widget->ai_service_text_position; + + switch (position) + { + case 0: /* Undef. */ + case 1: /* Bottom */ + block_y = (video_height * (100 - vpadding) * 0.01f) + - ((line_total - line_idx) * block_height); + break; + case 2: /* Top */ + block_y = (video_height * (vpadding * 0.01f)) + + (line_idx * block_height); + break; + } + + line_y = block_y + block_height * 0.5f + + p_widget->gfx_widget_fonts.regular.line_centre_offset; + + gfx_display_set_alpha(p_widget->backdrop_orig, DEFAULT_BACKDROP); + + gfx_display_draw_quad( + p_disp, userdata, video_width, video_height, + block_x, block_y, block_width, block_height, + video_width, video_height, + p_widget->backdrop_orig, + NULL); + + gfx_widgets_draw_text( + &p_widget->gfx_widget_fonts.regular, + line, half_vw, line_y, + video_width, video_height, + 0xFFFFFFFF, TEXT_ALIGN_CENTER, true); +} +#endif + void gfx_widgets_frame(void *data) { size_t i; @@ -1520,12 +1581,8 @@ void gfx_widgets_frame(void *data) /* AI Service overlay */ if (p_dispwidget->ai_service_overlay_state > 0) { - float outline_color[16] = { - 0.00, 1.00, 0.00, 1.00, - 0.00, 1.00, 0.00, 1.00, - 0.00, 1.00, 0.00, 1.00, - 0.00, 1.00, 0.00, 1.00, - }; + int text_length = strlen(p_dispwidget->ai_service_text); + gfx_display_set_alpha(p_dispwidget->pure_white, 1.0f); if (p_dispwidget->ai_service_overlay_texture) @@ -1550,63 +1607,46 @@ void gfx_widgets_frame(void *data) if (dispctx->blend_end) dispctx->blend_end(userdata); } - - /* top line */ - gfx_display_draw_quad( - p_disp, - userdata, - video_width, video_height, - 0, 0, - video_width, - p_dispwidget->divider_width_1px, - video_width, - video_height, - outline_color, - NULL - ); - /* bottom line */ - gfx_display_draw_quad( - p_disp, - userdata, - video_width, video_height, - 0, - video_height - p_dispwidget->divider_width_1px, - video_width, - p_dispwidget->divider_width_1px, - video_width, - video_height, - outline_color, - NULL - ); - /* left line */ - gfx_display_draw_quad( - p_disp, - userdata, - video_width, - video_height, - 0, - 0, - p_dispwidget->divider_width_1px, - video_height, - video_width, - video_height, - outline_color, - NULL - ); - /* right line */ - gfx_display_draw_quad( - p_disp, - userdata, - video_width, video_height, - video_width - p_dispwidget->divider_width_1px, - 0, - p_dispwidget->divider_width_1px, - video_height, - video_width, - video_height, - outline_color, - NULL - ); + + /* AI Service subtitle overlay widget */ + if (text_length > 0) + { + int padding = p_dispwidget->simple_widget_padding; + int text_width = font_driver_get_message_width( + p_dispwidget->gfx_widget_fonts.regular.font, + p_dispwidget->ai_service_text, + text_length, 1.0f); + + if (text_width > (video_width * 0.9f - padding * 2)) + { + int text_half = text_length / 2; + char *extra_line = (char*)malloc(sizeof(char) * text_length); + for (; text_half > 0; text_half--) + { + if (p_dispwidget->ai_service_text[text_half] == ' ') + { + p_dispwidget->ai_service_text[text_half] = '\0'; + gfx_widgets_ai_line( + video_info, p_dispwidget->ai_service_text, 0, 2); + strlcpy( + extra_line, + p_dispwidget->ai_service_text + text_half + 1, + text_length - text_half); + gfx_widgets_ai_line( + video_info, extra_line, 1, 2); + + p_dispwidget->ai_service_text[text_half] = ' '; + free(extra_line); + break; + } + } + } + else + { + gfx_widgets_ai_line( + video_info, p_dispwidget->ai_service_text, 0, 1); + } + } if (p_dispwidget->ai_service_overlay_state == 2) p_dispwidget->ai_service_overlay_state = 3; @@ -2149,6 +2189,7 @@ void gfx_widgets_ai_service_overlay_unload(void) if (p_dispwidget->ai_service_overlay_state == 1) { video_driver_texture_unload(&p_dispwidget->ai_service_overlay_texture); + p_dispwidget->ai_service_text[0] = '\0'; p_dispwidget->ai_service_overlay_texture = 0; p_dispwidget->ai_service_overlay_state = 0; } diff --git a/gfx/gfx_widgets.h b/gfx/gfx_widgets.h index 3b8195dc70..65bb9e9909 100644 --- a/gfx/gfx_widgets.h +++ b/gfx/gfx_widgets.h @@ -236,6 +236,8 @@ typedef struct dispgfx_widget #ifdef HAVE_TRANSLATE unsigned ai_service_overlay_width; unsigned ai_service_overlay_height; + unsigned ai_service_text_position; + char ai_service_text[255]; #endif uint8_t flags; diff --git a/intl/msg_hash_lbl.h b/intl/msg_hash_lbl.h index fbdef2afff..8f1dd793e1 100644 --- a/intl/msg_hash_lbl.h +++ b/intl/msg_hash_lbl.h @@ -6031,6 +6031,18 @@ MSG_HASH( MENU_ENUM_LABEL_AI_SERVICE_SOURCE_LANG, "ai_service_source_lang" ) +MSG_HASH( + MENU_ENUM_LABEL_AI_SERVICE_POLL_DELAY, + "ai_service_poll_delay" + ) +MSG_HASH( + MENU_ENUM_LABEL_AI_SERVICE_TEXT_POSITION, + "ai_service_text_position" + ) +MSG_HASH( + MENU_ENUM_LABEL_AI_SERVICE_TEXT_PADDING, + "ai_service_text_padding" + ) MSG_HASH( MENU_ENUM_LABEL_SETTINGS_SHOW_DRIVERS, "settings_show_drivers" diff --git a/intl/msg_hash_us.h b/intl/msg_hash_us.h index 9da8361c63..833568778d 100644 --- a/intl/msg_hash_us.h +++ b/intl/msg_hash_us.h @@ -6565,9 +6565,9 @@ MSG_HASH( MENU_ENUM_LABEL_VALUE_AI_SERVICE_MODE, "AI Service Output" ) -MSG_HASH( /* FIXME What does the Narrator mode do? */ +MSG_HASH( MENU_ENUM_SUBLABEL_AI_SERVICE_MODE, - "Show translation as a text overlay (Image Mode), or play as Text-To-Speech (Speech Mode)." + "Show translation as an image overlay (Image Mode), as direct audio (Speech), text-to-speech (Narrator), or text overlay (Text)." ) MSG_HASH( MENU_ENUM_LABEL_VALUE_AI_SERVICE_URL, @@ -6609,6 +6609,30 @@ MSG_HASH( MENU_ENUM_SUBLABEL_AI_SERVICE_TARGET_LANG, "The language the service will translate to. 'Default' is English." ) +MSG_HASH( + MENU_ENUM_LABEL_VALUE_AI_SERVICE_POLL_DELAY, + "AI Service Auto-Polling Delay" + ) +MSG_HASH( + MENU_ENUM_SUBLABEL_AI_SERVICE_POLL_DELAY, + "Minimum delay in ms between automatic calls. Lowers reactivity but increases CPU performance." + ) +MSG_HASH( + MENU_ENUM_LABEL_VALUE_AI_SERVICE_TEXT_POSITION, + "AI Service Text Position Override" + ) +MSG_HASH( + MENU_ENUM_SUBLABEL_AI_SERVICE_TEXT_POSITION, + "Override for the position of the overlay, when the service is in Text mode." + ) +MSG_HASH( + MENU_ENUM_LABEL_VALUE_AI_SERVICE_TEXT_PADDING, + "AI Service Text Padding (%)" + ) +MSG_HASH( + MENU_ENUM_SUBLABEL_AI_SERVICE_TEXT_PADDING, + "Vertical padding to apply to the text overlay, when the service is in Text mode. More padding will push the text towards the center of the screen." + ) /* Settings > Accessibility */ @@ -10176,6 +10200,26 @@ MSG_HASH( MENU_ENUM_LABEL_VALUE_AI_SERVICE_NARRATOR_MODE, "Narrator Mode" ) +MSG_HASH( + MENU_ENUM_LABEL_VALUE_AI_SERVICE_TEXT_MODE, + "Text Mode" + ) +MSG_HASH( + MENU_ENUM_LABEL_VALUE_AI_SERVICE_TEXT_NARRATOR_MODE, + "Text + Narrator" + ) +MSG_HASH( + MENU_ENUM_LABEL_VALUE_AI_SERVICE_IMAGE_NARRATOR_MODE, + "Image + Narrator" + ) +MSG_HASH( + MENU_ENUM_LABEL_VALUE_AI_SERVICE_TEXT_POSITION_BOTTOM, + "Bottom" + ) +MSG_HASH( + MENU_ENUM_LABEL_VALUE_AI_SERVICE_TEXT_POSITION_TOP, + "Top" + ) MSG_HASH( MENU_ENUM_LABEL_VALUE_PLAYLIST_ENTRY_REMOVE_ENABLE_HIST_FAV, "History & Favorites" @@ -13239,6 +13283,22 @@ MSG_HASH( /* FIXME Should be MSG_ */ MENU_ENUM_LABEL_VALUE_SIDELOAD_CORE_ERROR, "Core installation failed" ) +MSG_HASH( + MSG_AI_VIDEO_DRIVER_NOT_SUPPORTED, + "Video driver not supported for AI Service." + ) +MSG_HASH( + MSG_AI_AUTO_MODE_ENABLED, + "Automatic translation enabled." + ) +MSG_HASH( + MSG_AI_AUTO_MODE_DISABLED, + "Automatic translation disabled." + ) +MSG_HASH( + MSG_AI_NOTHING_TO_TRANSLATE, + "Nothing to translate." + ) MSG_HASH( MSG_CHEAT_DELETE_ALL_INSTRUCTIONS, "Press right five times to delete all cheats." diff --git a/menu/cbs/menu_cbs_sublabel.c b/menu/cbs/menu_cbs_sublabel.c index 40d8eba072..380c11c41f 100644 --- a/menu/cbs/menu_cbs_sublabel.c +++ b/menu/cbs/menu_cbs_sublabel.c @@ -269,6 +269,9 @@ DEFAULT_SUBLABEL_MACRO(action_bind_sublabel_ai_service_target_lang, MENU_ENUM_S DEFAULT_SUBLABEL_MACRO(action_bind_sublabel_ai_service_source_lang, MENU_ENUM_SUBLABEL_AI_SERVICE_SOURCE_LANG) DEFAULT_SUBLABEL_MACRO(action_bind_sublabel_ai_service_url, MENU_ENUM_SUBLABEL_AI_SERVICE_URL) DEFAULT_SUBLABEL_MACRO(action_bind_sublabel_ai_service_enable, MENU_ENUM_SUBLABEL_AI_SERVICE_ENABLE) +DEFAULT_SUBLABEL_MACRO(action_bind_sublabel_ai_service_poll_delay, MENU_ENUM_SUBLABEL_AI_SERVICE_POLL_DELAY) +DEFAULT_SUBLABEL_MACRO(action_bind_sublabel_ai_service_text_position, MENU_ENUM_SUBLABEL_AI_SERVICE_TEXT_POSITION) +DEFAULT_SUBLABEL_MACRO(action_bind_sublabel_ai_service_text_padding, MENU_ENUM_SUBLABEL_AI_SERVICE_TEXT_PADDING) DEFAULT_SUBLABEL_MACRO(action_bind_sublabel_power_management_settings_list, MENU_ENUM_SUBLABEL_POWER_MANAGEMENT_SETTINGS) DEFAULT_SUBLABEL_MACRO(action_bind_sublabel_privacy_settings_list, MENU_ENUM_SUBLABEL_PRIVACY_SETTINGS) DEFAULT_SUBLABEL_MACRO(action_bind_sublabel_midi_settings_list, MENU_ENUM_SUBLABEL_MIDI_SETTINGS) @@ -5001,6 +5004,15 @@ int menu_cbs_init_bind_sublabel(menu_file_list_cbs_t *cbs, case MENU_ENUM_LABEL_AI_SERVICE_ENABLE: BIND_ACTION_SUBLABEL(cbs, action_bind_sublabel_ai_service_enable); break; + case MENU_ENUM_LABEL_AI_SERVICE_POLL_DELAY: + BIND_ACTION_SUBLABEL(cbs, action_bind_sublabel_ai_service_poll_delay); + break; + case MENU_ENUM_LABEL_AI_SERVICE_TEXT_POSITION: + BIND_ACTION_SUBLABEL(cbs, action_bind_sublabel_ai_service_text_position); + break; + case MENU_ENUM_LABEL_AI_SERVICE_TEXT_PADDING: + BIND_ACTION_SUBLABEL(cbs, action_bind_sublabel_ai_service_text_padding); + break; case MENU_ENUM_LABEL_AI_SERVICE_SETTINGS: BIND_ACTION_SUBLABEL(cbs, action_bind_sublabel_ai_service_settings_list); break; diff --git a/menu/menu_displaylist.c b/menu/menu_displaylist.c index 094616d06e..5667760e51 100644 --- a/menu/menu_displaylist.c +++ b/menu/menu_displaylist.c @@ -5938,12 +5938,14 @@ void menu_displaylist_info_init(menu_displaylist_info_t *info) info->setting = NULL; } -typedef struct menu_displaylist_build_info { +typedef struct menu_displaylist_build_info +{ enum msg_hash_enums enum_idx; enum menu_displaylist_parse_type parse_type; } menu_displaylist_build_info_t; -typedef struct menu_displaylist_build_info_selective { +typedef struct menu_displaylist_build_info_selective +{ enum msg_hash_enums enum_idx; enum menu_displaylist_parse_type parse_type; bool checked; @@ -6683,7 +6685,8 @@ unsigned menu_displaylist_build_list( bool playlist_show_sublabels = settings->bools.playlist_show_sublabels; bool history_list_enable = settings->bools.history_list_enable; bool truncate_playlist = settings->bools.ozone_truncate_playlist_name; - menu_displaylist_build_info_selective_t build_list[] = { + menu_displaylist_build_info_selective_t build_list[] = + { {MENU_ENUM_LABEL_HISTORY_LIST_ENABLE, PARSE_ONLY_BOOL, true}, {MENU_ENUM_LABEL_CONTENT_HISTORY_SIZE, PARSE_ONLY_UINT, false}, {MENU_ENUM_LABEL_CONTENT_FAVORITES_SIZE, PARSE_ONLY_INT, true}, @@ -7736,12 +7739,15 @@ unsigned menu_displaylist_build_list( bool ai_service_enable = settings->bools.ai_service_enable; menu_displaylist_build_info_selective_t build_list[] = { - {MENU_ENUM_LABEL_AI_SERVICE_ENABLE, PARSE_ONLY_BOOL, true}, - {MENU_ENUM_LABEL_AI_SERVICE_MODE, PARSE_ONLY_UINT, false}, - {MENU_ENUM_LABEL_AI_SERVICE_URL, PARSE_ONLY_STRING, false}, - {MENU_ENUM_LABEL_AI_SERVICE_PAUSE, PARSE_ONLY_BOOL, false}, - {MENU_ENUM_LABEL_AI_SERVICE_SOURCE_LANG, PARSE_ONLY_UINT, false}, - {MENU_ENUM_LABEL_AI_SERVICE_TARGET_LANG, PARSE_ONLY_UINT, false}, + {MENU_ENUM_LABEL_AI_SERVICE_ENABLE, PARSE_ONLY_BOOL, true}, + {MENU_ENUM_LABEL_AI_SERVICE_MODE, PARSE_ONLY_UINT, false}, + {MENU_ENUM_LABEL_AI_SERVICE_URL, PARSE_ONLY_STRING, false}, + {MENU_ENUM_LABEL_AI_SERVICE_PAUSE, PARSE_ONLY_BOOL, false}, + {MENU_ENUM_LABEL_AI_SERVICE_POLL_DELAY, PARSE_ONLY_UINT, false}, + {MENU_ENUM_LABEL_AI_SERVICE_SOURCE_LANG, PARSE_ONLY_UINT, false}, + {MENU_ENUM_LABEL_AI_SERVICE_TARGET_LANG, PARSE_ONLY_UINT, false}, + {MENU_ENUM_LABEL_AI_SERVICE_TEXT_POSITION, PARSE_ONLY_UINT, false}, + {MENU_ENUM_LABEL_AI_SERVICE_TEXT_PADDING, PARSE_ONLY_UINT, false}, }; for (i = 0; i < ARRAY_SIZE(build_list); i++) @@ -7751,8 +7757,11 @@ unsigned menu_displaylist_build_list( case MENU_ENUM_LABEL_AI_SERVICE_MODE: case MENU_ENUM_LABEL_AI_SERVICE_URL: case MENU_ENUM_LABEL_AI_SERVICE_PAUSE: + case MENU_ENUM_LABEL_AI_SERVICE_POLL_DELAY: case MENU_ENUM_LABEL_AI_SERVICE_SOURCE_LANG: case MENU_ENUM_LABEL_AI_SERVICE_TARGET_LANG: + case MENU_ENUM_LABEL_AI_SERVICE_TEXT_POSITION: + case MENU_ENUM_LABEL_AI_SERVICE_TEXT_PADDING: if (ai_service_enable) build_list[i].checked = true; break; diff --git a/menu/menu_setting.c b/menu/menu_setting.c index 352d5a2762..e23a3d2586 100644 --- a/menu/menu_setting.c +++ b/menu/menu_setting.c @@ -3014,6 +3014,42 @@ static void setting_get_string_representation_uint_ai_service_mode( case 2: enum_idx = MENU_ENUM_LABEL_VALUE_AI_SERVICE_NARRATOR_MODE; break; + case 3: + enum_idx = MENU_ENUM_LABEL_VALUE_AI_SERVICE_TEXT_MODE; + break; + case 4: + enum_idx = MENU_ENUM_LABEL_VALUE_AI_SERVICE_TEXT_NARRATOR_MODE; + break; + case 5: + enum_idx = MENU_ENUM_LABEL_VALUE_AI_SERVICE_IMAGE_NARRATOR_MODE; + break; + default: + break; + } + + if (enum_idx != 0) + strlcpy(s, msg_hash_to_str(enum_idx), len); +} + +static void setting_get_string_representation_uint_ai_service_text_position( + rarch_setting_t *setting, + char *s, size_t len) +{ + enum msg_hash_enums enum_idx = MSG_UNKNOWN; + if (!setting) + return; + + switch (*setting->value.target.unsigned_integer) + { + case 0: + enum_idx = MENU_ENUM_LABEL_VALUE_NONE; + break; + case 1: + enum_idx = MENU_ENUM_LABEL_VALUE_AI_SERVICE_TEXT_POSITION_BOTTOM; + break; + case 2: + enum_idx = MENU_ENUM_LABEL_VALUE_AI_SERVICE_TEXT_POSITION_TOP; + break; default: break; } @@ -19234,7 +19270,7 @@ static bool setting_append_list( (*list)[list_info->index - 1].get_string_representation = &setting_get_string_representation_uint_ai_service_mode; (*list)[list_info->index - 1].action_ok = &setting_action_ok_uint; - menu_settings_list_current_add_range(list, list_info, 0, 2, 1, true, true); + menu_settings_list_current_add_range(list, list_info, 0, 5, 1, true, true); CONFIG_STRING( list, list_info, @@ -19316,6 +19352,50 @@ static bool setting_append_list( &setting_get_string_representation_uint_ai_service_lang; (*list)[list_info->index - 1].action_ok = &setting_action_ok_uint; menu_settings_list_current_add_range(list, list_info, TRANSLATION_LANG_DONT_CARE, (TRANSLATION_LANG_LAST-1), 1, true, true); + + CONFIG_UINT( + list, list_info, + &settings->uints.ai_service_poll_delay, + MENU_ENUM_LABEL_AI_SERVICE_POLL_DELAY, + MENU_ENUM_LABEL_VALUE_AI_SERVICE_POLL_DELAY, + DEFAULT_AI_SERVICE_POLL_DELAY, + &group_info, + &subgroup_info, + parent_group, + general_write_handler, + general_read_handler); + (*list)[list_info->index - 1].action_ok = &setting_action_ok_uint; + menu_settings_list_current_add_range(list, list_info, 0, MAXIMUM_AI_SERVICE_POLL_DELAY, 50, true, true); + + CONFIG_UINT( + list, list_info, + &settings->uints.ai_service_text_position, + MENU_ENUM_LABEL_AI_SERVICE_TEXT_POSITION, + MENU_ENUM_LABEL_VALUE_AI_SERVICE_TEXT_POSITION, + DEFAULT_AI_SERVICE_TEXT_POSITION, + &group_info, + &subgroup_info, + parent_group, + general_write_handler, + general_read_handler); + (*list)[list_info->index - 1].get_string_representation = + &setting_get_string_representation_uint_ai_service_text_position; + (*list)[list_info->index - 1].action_ok = &setting_action_ok_uint; + menu_settings_list_current_add_range(list, list_info, 0, 2, 1, true, true); + + CONFIG_UINT( + list, list_info, + &settings->uints.ai_service_text_padding, + MENU_ENUM_LABEL_AI_SERVICE_TEXT_PADDING, + MENU_ENUM_LABEL_VALUE_AI_SERVICE_TEXT_PADDING, + DEFAULT_AI_SERVICE_TEXT_PADDING, + &group_info, + &subgroup_info, + parent_group, + general_write_handler, + general_read_handler); + (*list)[list_info->index - 1].action_ok = &setting_action_ok_uint; + menu_settings_list_current_add_range(list, list_info, 0, 20, 1, true, true); END_SUB_GROUP(list, list_info, parent_group); END_GROUP(list, list_info, parent_group); diff --git a/msg_hash.h b/msg_hash.h index 99fddc252f..8fc17474a9 100644 --- a/msg_hash.h +++ b/msg_hash.h @@ -569,6 +569,10 @@ enum msg_hash_enums MSG_FAILED_TO_ENTER_GAMEMODE_LINUX, MSG_VRR_RUNLOOP_ENABLED, MSG_VRR_RUNLOOP_DISABLED, + MSG_AI_VIDEO_DRIVER_NOT_SUPPORTED, + MSG_AI_AUTO_MODE_ENABLED, + MSG_AI_AUTO_MODE_DISABLED, + MSG_AI_NOTHING_TO_TRANSLATE, MSG_VIDEO_REFRESH_RATE_CHANGED, MSG_IOS_TOUCH_MOUSE_ENABLED, @@ -2785,6 +2789,9 @@ enum msg_hash_enums MENU_LABEL(AI_SERVICE_URL), MENU_LABEL(AI_SERVICE_ENABLE), MENU_LABEL(AI_SERVICE_PAUSE), + MENU_LABEL(AI_SERVICE_POLL_DELAY), + MENU_LABEL(AI_SERVICE_TEXT_POSITION), + MENU_LABEL(AI_SERVICE_TEXT_PADDING), MENU_LABEL(ON), MENU_LABEL(OFF), @@ -3477,6 +3484,11 @@ enum msg_hash_enums MENU_ENUM_LABEL_VALUE_AI_SERVICE_IMAGE_MODE, MENU_ENUM_LABEL_VALUE_AI_SERVICE_SPEECH_MODE, MENU_ENUM_LABEL_VALUE_AI_SERVICE_NARRATOR_MODE, + MENU_ENUM_LABEL_VALUE_AI_SERVICE_TEXT_MODE, + MENU_ENUM_LABEL_VALUE_AI_SERVICE_TEXT_NARRATOR_MODE, + MENU_ENUM_LABEL_VALUE_AI_SERVICE_IMAGE_NARRATOR_MODE, + MENU_ENUM_LABEL_VALUE_AI_SERVICE_TEXT_POSITION_TOP, + MENU_ENUM_LABEL_VALUE_AI_SERVICE_TEXT_POSITION_BOTTOM, MENU_ENUM_LABEL_VALUE_NONE, MENU_ENUM_LABEL_VALUE_NO_INFORMATION_AVAILABLE, diff --git a/retroarch.c b/retroarch.c index 39593dbfd8..f4193e1fa7 100644 --- a/retroarch.c +++ b/retroarch.c @@ -2236,6 +2236,9 @@ bool command_event(enum event_command cmd, void *data) #if defined(HAVE_ACCESSIBILITY) || defined(HAVE_TRANSLATE) access_state_t *access_st = access_state_get_ptr(); #endif +#if defined(HAVE_TRANSLATE) && defined(HAVE_GFX_WIDGETS) + dispgfx_widget_t *p_dispwidget = dispwidget_get_ptr(); +#endif #ifdef HAVE_MENU struct menu_state *menu_st = menu_state_get_ptr(); #endif @@ -2252,12 +2255,12 @@ bool command_event(enum event_command cmd, void *data) #ifdef HAVE_OVERLAY input_overlay_unload(); #endif -#if defined(HAVE_TRANSLATE) && defined(HAVE_GFX_WIDGETS) - /* Because the overlay is a display widget, - * it's going to be written - * over the menu, so we unset it here. */ - if (dispwidget_get_ptr()->ai_service_overlay_state != 0) +#ifdef HAVE_TRANSLATE + translation_release(true); +#ifdef HAVE_GFX_WIDGETS + if (p_dispwidget->ai_service_overlay_state != 0) gfx_widgets_ai_service_overlay_unload(); +#endif #endif break; case CMD_EVENT_OVERLAY_INIT: @@ -2331,6 +2334,11 @@ bool command_event(enum event_command cmd, void *data) accessibility_narrator_speech_speed, (char*)msg_hash_to_str(MSG_UNPAUSED), 10); #endif +#ifdef HAVE_GFX_WIDGETS + if (p_dispwidget->ai_service_overlay_state != 0) + gfx_widgets_ai_service_overlay_unload(); +#endif + translation_release(true); command_event(CMD_EVENT_UNPAUSE, NULL); } else /* Pause on call */ @@ -2349,17 +2357,25 @@ bool command_event(enum event_command cmd, void *data) * Also, this mode is required for "auto" translation * packages, since you don't want to pause for that. */ - if (access_st->ai_service_auto == 2) + if (access_st->ai_service_auto != 0) { /* Auto mode was turned on, but we pressed the * toggle button, so turn it off now. */ - access_st->ai_service_auto = 0; -#ifdef HAVE_MENU_WIDGETS - gfx_widgets_ai_service_overlay_unload(); + translation_release(true); +#ifdef HAVE_GFX_WIDGETS + if (p_dispwidget->ai_service_overlay_state != 0) + gfx_widgets_ai_service_overlay_unload(); #endif } - else - command_event(CMD_EVENT_AI_SERVICE_CALL, NULL); + else + { +#ifdef HAVE_GFX_WIDGETS + if (p_dispwidget->ai_service_overlay_state != 0) + gfx_widgets_ai_service_overlay_unload(); + else +#endif + command_event(CMD_EVENT_AI_SERVICE_CALL, NULL); + } } #endif break; @@ -4473,10 +4489,6 @@ bool command_event(enum event_command cmd, void *data) if (data) paused = *((bool*)data); - if ( (access_st->ai_service_auto == 0) - && !settings->bools.ai_service_pause) - access_st->ai_service_auto = 1; - run_translation_service(settings, paused); } #endif @@ -7165,6 +7177,9 @@ bool retroarch_main_quit(void) video_driver_state_t*video_st = video_state_get_ptr(); settings_t *settings = config_get_ptr(); bool config_save_on_exit = settings->bools.config_save_on_exit; +#ifdef HAVE_ACCESSIBILITY + access_state_t *access_st = access_state_get_ptr(); +#endif struct retro_system_av_info *av_info = &video_st->av_info; /* Restore video driver before saving */ @@ -7263,6 +7278,17 @@ bool retroarch_main_quit(void) retroarch_menu_running_finished(true); #endif +#ifdef HAVE_ACCESSIBILITY + translation_release(false); +#ifdef HAVE_THREADS + if (access_st->image_lock) + { + slock_free(access_st->image_lock); + access_st->image_lock = NULL; + } +#endif +#endif + return true; } diff --git a/tasks/task_translation.c b/tasks/task_translation.c index 7082a273ca..f989ce595d 100644 --- a/tasks/task_translation.c +++ b/tasks/task_translation.c @@ -29,9 +29,11 @@ #include #include #include +#include #include #include #include +#include #include "../translation_defines.h" #ifdef HAVE_GFX_WIDGETS @@ -47,588 +49,290 @@ #include "../paths.h" #include "../runloop.h" #include "../verbosity.h" +#include "../msg_hash.h" #include "tasks_internal.h" -static void task_auto_translate_handler(retro_task_t *task) +static const char* ACCESS_INPUT_LABELS[] = +{ + "b", "y", "select", "start", "up", "down", "left", "right", + "a", "x", "l", "r", "l2", "r2", "l3", "r3" +}; + +static const char* ACCESS_RESPONSE_KEYS[] = +{ + "image", "sound", "text", "error", "auto", "press", "text_position" +}; + +typedef struct { - int *mode_ptr = (int*)task->user_data; - uint32_t runloop_flags = runloop_get_flags(); - access_state_t *access_st = access_state_get_ptr(); -#ifdef HAVE_ACCESSIBILITY - settings_t *settings = config_get_ptr(); -#endif + uint8_t *data; + unsigned size; + unsigned width; + unsigned height; + + unsigned content_x; + unsigned content_y; + unsigned content_width; + unsigned content_height; + unsigned viewport_width; + unsigned viewport_height; +} access_frame_t; - if (task_get_cancelled(task)) - goto task_finished; +typedef struct +{ + char *data; + int length; + char format[4]; +} access_base64_t; - switch (*mode_ptr) - { - case 1: /* Speech Mode */ +typedef struct +{ + char *inputs; + bool paused; +} access_request_t; + +typedef struct +{ + char *image; + int image_size; #ifdef HAVE_AUDIOMIXER - if (!audio_driver_is_ai_service_speech_running()) - goto task_finished; + void *sound; + int sound_size; #endif - break; - case 2: /* Narrator Mode */ + char *error; + char *text; + char *recall; + char *input; + int text_position; +} access_response_t; + +/* UTILITIES ---------------------------------------------------------------- */ +/* -------------------------------------------------------------------------- */ + +/** + * Returns true if the accessibility narrator is currently playing audio. + */ #ifdef HAVE_ACCESSIBILITY - if (!is_narrator_running( - settings->bools.accessibility_enable)) - goto task_finished; -#endif - break; - default: - break; - } - - return; - -task_finished: - if (access_st->ai_service_auto == 1) - access_st->ai_service_auto = 2; - - task_set_finished(task, true); - - if (*mode_ptr == 1 || *mode_ptr == 2) - { - bool was_paused = (runloop_flags & RUNLOOP_FLAG_PAUSED) ? true : false; - command_event(CMD_EVENT_AI_SERVICE_CALL, &was_paused); - } - if (task->user_data) - free(task->user_data); -} - -static void call_auto_translate_task( - settings_t *settings, - bool *was_paused) +bool is_narrator_running(bool accessibility_enable) { - int ai_service_mode = settings->uints.ai_service_mode; access_state_t *access_st = access_state_get_ptr(); - - /*Image Mode*/ - if (ai_service_mode == 0) - { - if (access_st->ai_service_auto == 1) - access_st->ai_service_auto = 2; - - command_event(CMD_EVENT_AI_SERVICE_CALL, was_paused); - } - else /* Speech or Narrator Mode */ - { - int* mode = NULL; - retro_task_t *t = task_init(); - if (!t) - return; - - mode = (int*)malloc(sizeof(int)); - *mode = ai_service_mode; - - t->handler = task_auto_translate_handler; - t->user_data = mode; - t->mute = true; - task_queue_push(t); - } -} - -static void handle_translation_cb( - retro_task_t *task, void *task_data, - void *user_data, const char *error) -{ - uint8_t* raw_output_data = NULL; - char* raw_image_file_data = NULL; - struct scaler_ctx* scaler = NULL; - http_transfer_data_t *data = (http_transfer_data_t*)task_data; - int new_image_size = 0; -#ifdef HAVE_AUDIOMIXER - int new_sound_size = 0; -#endif - void* raw_image_data = NULL; - void* raw_image_data_alpha = NULL; - void* raw_sound_data = NULL; - rjson_t *json = NULL; - int json_current_key = 0; - char* err_str = NULL; - char* txt_str = NULL; - char* auto_str = NULL; - char* key_str = NULL; - settings_t* settings = config_get_ptr(); - uint32_t runloop_flags = runloop_get_flags(); -#ifdef HAVE_ACCESSIBILITY - input_driver_state_t *input_st = input_state_get_ptr(); -#endif - video_driver_state_t - *video_st = video_state_get_ptr(); - const enum retro_pixel_format - video_driver_pix_fmt = video_st->pix_fmt; - access_state_t *access_st = access_state_get_ptr(); -#ifdef HAVE_GFX_WIDGETS - bool gfx_widgets_paused = (video_st->flags & - VIDEO_FLAG_WIDGETS_PAUSED) ? true : false; - dispgfx_widget_t *p_dispwidget = dispwidget_get_ptr(); -#endif -#ifdef HAVE_ACCESSIBILITY - bool accessibility_enable = settings->bools.accessibility_enable; - unsigned accessibility_narrator_speech_speed = settings->uints.accessibility_narrator_speech_speed; -#ifdef HAVE_GFX_WIDGETS - /* When auto mode is on, we turn off the overlay - * once we have the result for the next call.*/ - if (p_dispwidget->ai_service_overlay_state != 0 - && access_st->ai_service_auto == 2) - gfx_widgets_ai_service_overlay_unload(); -#endif -#endif - -#ifdef DEBUG - if (access_st->ai_service_auto != 2) - RARCH_LOG("RESULT FROM AI SERVICE...\n"); -#endif - - if (!data || error || !data->data) - goto finish; - - if (!(json = rjson_open_buffer(data->data, data->len))) - goto finish; - - /* Parse JSON body for the image and sound data */ - for (;;) - { - static const char* keys[] = { "image", "sound", "text", "error", "auto", "press" }; - - const char *str = NULL; - size_t str_len = 0; - enum rjson_type json_type = rjson_next(json); - - if (json_type == RJSON_DONE || json_type == RJSON_ERROR) - break; - if (json_type != RJSON_STRING) - continue; - if (rjson_get_context_type(json) != RJSON_OBJECT) - continue; - str = rjson_get_string(json, &str_len); - - if ((rjson_get_context_count(json) & 1) == 1) - { - int i; - json_current_key = -1; - - for (i = 0; i < (int)ARRAY_SIZE(keys); i++) - { - if (string_is_equal(str, keys[i])) - { - json_current_key = i; - break; - } - } - } - else - { - switch (json_current_key) - { - case 0: /* image */ - raw_image_file_data = (char*)unbase64(str, - (int)str_len, &new_image_size); - break; -#ifdef HAVE_AUDIOMIXER - case 1: /* sound */ - raw_sound_data = (void*)unbase64(str, - (int)str_len, &new_sound_size); - break; -#endif - case 2: /* text */ - txt_str = strdup(str); - break; - case 3: /* error */ - err_str = strdup(str); - break; - case 4: /* auto */ - auto_str = strdup(str); - break; - case 5: /* press */ - key_str = strdup(str); - break; - } - json_current_key = -1; - } - } - - if (string_is_equal(err_str, "No text found.")) - { -#ifdef DEBUG - RARCH_LOG("No text found...\n"); -#endif - if (txt_str) - { - free(txt_str); - txt_str = NULL; - } - - txt_str = (char*)malloc(15); - strlcpy(txt_str, err_str, 15); -#ifdef HAVE_GFX_WIDGETS - if (gfx_widgets_paused) - { - /* In this case we have to unpause and then repause for a frame */ - p_dispwidget->ai_service_overlay_state = 2; - command_event(CMD_EVENT_UNPAUSE, NULL); - } -#endif - } - - if ( !raw_image_file_data - && !raw_sound_data - && !txt_str - && !key_str - && (access_st->ai_service_auto != 2)) - { - error = "Invalid JSON body."; - goto finish; - } - - if (raw_image_file_data) - { - unsigned image_width, image_height; - /* Get the video frame dimensions reference */ - const void *dummy_data = video_st->frame_cache_data; - unsigned width = video_st->frame_cache_width; - unsigned height = video_st->frame_cache_height; - - /* try two different modes for text display * - * In the first mode, we use display widget overlays, but they require - * the video poke interface to be able to load image buffers. - * - * The other method is to draw to the video buffer directly, which needs - * a software core to be running. */ -#ifdef HAVE_GFX_WIDGETS - if ( video_st->poke - && video_st->poke->load_texture - && video_st->poke->unload_texture) - { - enum image_type_enum image_type; - /* Write to overlay */ - if ( raw_image_file_data[0] == 'B' - && raw_image_file_data[1] == 'M') - image_type = IMAGE_TYPE_BMP; - else if ( raw_image_file_data[1] == 'P' - && raw_image_file_data[2] == 'N' - && raw_image_file_data[3] == 'G') - image_type = IMAGE_TYPE_PNG; - else - { - RARCH_LOG("Invalid image type returned from server.\n"); - goto finish; - } - - if (!gfx_widgets_ai_service_overlay_load( - raw_image_file_data, (unsigned)new_image_size, - image_type)) - { - RARCH_LOG("Video driver not supported for AI Service."); - runloop_msg_queue_push( - /* msg_hash_to_str(MSG_VIDEO_DRIVER_NOT_SUPPORTED), */ - "Video driver not supported.", - 1, 180, true, - NULL, MESSAGE_QUEUE_ICON_DEFAULT, MESSAGE_QUEUE_CATEGORY_INFO); - } - else if (gfx_widgets_paused) - { - /* In this case we have to unpause and then repause for a frame */ - /* Unpausing state */ - p_dispwidget->ai_service_overlay_state = 2; - command_event(CMD_EVENT_UNPAUSE, NULL); - } - } - else -#endif - /* Can't use display widget overlays, so try writing to video buffer */ - { - size_t pitch; - /* Write to video buffer directly (software cores only) */ - - /* This is a BMP file coming back. */ - if ( raw_image_file_data[0] == 'B' - && raw_image_file_data[1] == 'M') - { - /* Get image data (24 bit), and convert to the emulated pixel format */ - image_width = - ((uint32_t) ((uint8_t)raw_image_file_data[21]) << 24) + - ((uint32_t) ((uint8_t)raw_image_file_data[20]) << 16) + - ((uint32_t) ((uint8_t)raw_image_file_data[19]) << 8) + - ((uint32_t) ((uint8_t)raw_image_file_data[18]) << 0); - - image_height = - ((uint32_t) ((uint8_t)raw_image_file_data[25]) << 24) + - ((uint32_t) ((uint8_t)raw_image_file_data[24]) << 16) + - ((uint32_t) ((uint8_t)raw_image_file_data[23]) << 8) + - ((uint32_t) ((uint8_t)raw_image_file_data[22]) << 0); - raw_image_data = (void*)malloc(image_width * image_height * 3 * sizeof(uint8_t)); - memcpy(raw_image_data, - raw_image_file_data + 54 * sizeof(uint8_t), - image_width * image_height * 3 * sizeof(uint8_t)); - } - /* PNG coming back from the url */ - else if (raw_image_file_data[1] == 'P' - && raw_image_file_data[2] == 'N' - && raw_image_file_data[3] == 'G') - { - int retval = 0; - rpng_t *rpng = NULL; - image_width = - ((uint32_t) ((uint8_t)raw_image_file_data[16]) << 24)+ - ((uint32_t) ((uint8_t)raw_image_file_data[17]) << 16)+ - ((uint32_t) ((uint8_t)raw_image_file_data[18]) << 8)+ - ((uint32_t) ((uint8_t)raw_image_file_data[19]) << 0); - image_height = - ((uint32_t) ((uint8_t)raw_image_file_data[20]) << 24)+ - ((uint32_t) ((uint8_t)raw_image_file_data[21]) << 16)+ - ((uint32_t) ((uint8_t)raw_image_file_data[22]) << 8)+ - ((uint32_t) ((uint8_t)raw_image_file_data[23]) << 0); - - if (!(rpng = rpng_alloc())) - { - error = "Can't allocate memory."; - goto finish; - } - - rpng_set_buf_ptr(rpng, raw_image_file_data, (size_t)new_image_size); - rpng_start(rpng); - while (rpng_iterate_image(rpng)); - - do - { - retval = rpng_process_image(rpng, &raw_image_data_alpha, - (size_t)new_image_size, &image_width, &image_height); - } while (retval == IMAGE_PROCESS_NEXT); - - /* Returned output from the png processor is an upside down RGBA - * image, so we have to change that to RGB first. This should - * probably be replaced with a scaler call.*/ - { - unsigned ui; - int tw, th, tc; - int d = 0; - raw_image_data = (void*)malloc(image_width*image_height*3*sizeof(uint8_t)); - for (ui = 0; ui < image_width * image_height * 4; ui++) - { - if (ui % 4 != 3) - { - tc = d % 3; - th = image_height-d / (image_width * 3) - 1; - tw = (d % (image_width * 3)) / 3; - ((uint8_t*) raw_image_data)[tw * 3 + th * 3 * image_width + tc] = ((uint8_t *)raw_image_data_alpha)[ui]; - d += 1; - } - } - } - rpng_free(rpng); - } - else - { - RARCH_LOG("Output from URL not a valid file type, or is not supported.\n"); - goto finish; - } - - if (!(scaler = (struct scaler_ctx*)calloc(1, sizeof(struct scaler_ctx)))) - goto finish; - - if (dummy_data == RETRO_HW_FRAME_BUFFER_VALID) - { - /* - In this case, we used the viewport to grab the image - and translate it, and we have the translated image in - the raw_image_data buffer. - */ - RARCH_LOG("Hardware frame buffer core, but selected video driver isn't supported.\n"); - goto finish; - } - - /* The assigned pitch may not be reliable. The width of - the video frame can change during run-time, but the - pitch may not, so we just assign it as the width - times the byte depth. - */ - - if (video_driver_pix_fmt == RETRO_PIXEL_FORMAT_XRGB8888) - { - raw_output_data = (uint8_t*)malloc(width * height * 4 * sizeof(uint8_t)); - scaler->out_fmt = SCALER_FMT_ARGB8888; - pitch = width * 4; - scaler->out_stride = (int)pitch; - } - else - { - raw_output_data = (uint8_t*)malloc(width * height * 2 * sizeof(uint8_t)); - scaler->out_fmt = SCALER_FMT_RGB565; - pitch = width * 2; - scaler->out_stride = width; - } - - if (!raw_output_data) - goto finish; - - scaler->in_fmt = SCALER_FMT_BGR24; - scaler->in_width = image_width; - scaler->in_height = image_height; - scaler->out_width = width; - scaler->out_height = height; - scaler->scaler_type = SCALER_TYPE_POINT; - scaler_ctx_gen_filter(scaler); - scaler->in_stride = -1 * width * 3; - - scaler_ctx_scale_direct(scaler, raw_output_data, - (uint8_t*)raw_image_data + (image_height - 1) * width * 3); - video_driver_frame(raw_output_data, image_width, image_height, pitch); - } - } - -#ifdef HAVE_AUDIOMIXER - if (raw_sound_data) - { - audio_mixer_stream_params_t params; - - params.volume = 1.0f; - params.slot_selection_type = AUDIO_MIXER_SLOT_SELECTION_MANUAL; /* user->slot_selection_type; */ - params.slot_selection_idx = 10; - params.stream_type = AUDIO_STREAM_TYPE_SYSTEM; /* user->stream_type; */ - params.type = AUDIO_MIXER_TYPE_WAV; - params.state = AUDIO_STREAM_STATE_PLAYING; - params.buf = raw_sound_data; - params.bufsize = new_sound_size; - params.cb = NULL; - params.basename = NULL; - - audio_driver_mixer_add_stream(¶ms); - - if (raw_sound_data) - { - free(raw_sound_data); - raw_sound_data = NULL; - } - } -#endif - - if (key_str) - { - size_t i; - char key[8]; - size_t length = strlen(key_str); - size_t start = 0; - - for (i = 1; i < length; i++) - { - char t = key_str[i]; - if (i == length - 1 || t == ' ' || t == ',') - { - if (i == length - 1 && t != ' ' && t!= ',') - i++; - - if (i-start > 7) - { - start = i; - continue; - } - - strncpy(key, key_str + start, i-start); - key[i-start] = '\0'; - -#ifdef HAVE_ACCESSIBILITY - if (string_is_equal(key, "b")) - input_st->ai_gamepad_state[0] = 2; - if (string_is_equal(key, "y")) - input_st->ai_gamepad_state[1] = 2; - if (string_is_equal(key, "select")) - input_st->ai_gamepad_state[2] = 2; - if (string_is_equal(key, "start")) - input_st->ai_gamepad_state[3] = 2; - - if (string_is_equal(key, "up")) - input_st->ai_gamepad_state[4] = 2; - if (string_is_equal(key, "down")) - input_st->ai_gamepad_state[5] = 2; - if (string_is_equal(key, "left")) - input_st->ai_gamepad_state[6] = 2; - if (string_is_equal(key, "right")) - input_st->ai_gamepad_state[7] = 2; - - if (string_is_equal(key, "a")) - input_st->ai_gamepad_state[8] = 2; - if (string_is_equal(key, "x")) - input_st->ai_gamepad_state[9] = 2; - if (string_is_equal(key, "l")) - input_st->ai_gamepad_state[10] = 2; - if (string_is_equal(key, "r")) - input_st->ai_gamepad_state[11] = 2; - - if (string_is_equal(key, "l2")) - input_st->ai_gamepad_state[12] = 2; - if (string_is_equal(key, "r2")) - input_st->ai_gamepad_state[13] = 2; - if (string_is_equal(key, "l3")) - input_st->ai_gamepad_state[14] = 2; - if (string_is_equal(key, "r3")) - input_st->ai_gamepad_state[15] = 2; -#endif - - if (string_is_equal(key, "pause")) - command_event(CMD_EVENT_PAUSE, NULL); - if (string_is_equal(key, "unpause")) - command_event(CMD_EVENT_UNPAUSE, NULL); - - start = i+1; - } - } - } - -#ifdef HAVE_ACCESSIBILITY - if ( txt_str - && is_accessibility_enabled( + if (is_accessibility_enabled( accessibility_enable, access_st->enabled)) - accessibility_speak_priority( - accessibility_enable, - accessibility_narrator_speech_speed, - txt_str, 10); + { + frontend_ctx_driver_t *frontend = + frontend_state_get_ptr()->current_frontend_ctx; + if (frontend && frontend->is_narrator_running) + return frontend->is_narrator_running(); + } + return false; +} #endif -finish: - if (error) - RARCH_ERR("%s: %s\n", msg_hash_to_str(MSG_DOWNLOAD_FAILED), error); - - if (user_data) - free(user_data); - - if (json) - rjson_free(json); - if (raw_image_file_data) - free(raw_image_file_data); - if (raw_image_data_alpha) - free(raw_image_data_alpha); - if (raw_image_data) - free(raw_image_data); - if (scaler) - free(scaler); - if (err_str) - free(err_str); - if (txt_str) - free(txt_str); - if (raw_output_data) - free(raw_output_data); - - if (auto_str) +/** + * Returns true if array {a} and {b}, both of the same size {size} are equal. + * This method prevents a potential bug with memcmp on some platforms. + */ +static bool u8_array_equal(uint8_t *a, uint8_t *b, int size) +{ + int i = 0; + for (; i < size; i++) { - if (string_is_equal(auto_str, "auto")) - { - bool was_paused = (runloop_flags & RUNLOOP_FLAG_PAUSED) ? true : false; - if ( (access_st->ai_service_auto != 0) - && !settings->bools.ai_service_pause) - call_auto_translate_task(settings, &was_paused); - } - free(auto_str); + if (a[i] != b[i]) + return false; } - if (key_str) - free(key_str); + return true; } -static const char *ai_service_get_str(enum translation_lang id) +/** + * Helper method to simplify accessibility speech usage. This method will only + * use TTS to read the provided text if accessibility has been enabled in the + * frontend or by RetroArch's internal override mechanism. + */ +static void accessibility_speak(const char *text) +{ +#ifdef HAVE_ACCESSIBILITY + settings_t *settings = config_get_ptr(); + unsigned speed = settings->uints.accessibility_narrator_speech_speed; + bool narrator_on = settings->bools.accessibility_enable; + + accessibility_speak_priority(narrator_on, speed, text, 10); +#endif +} + +/** + * Speaks the provided text using TTS. This only happens if the narrator has + * been enabled or the service is running in Narrator mode, in which case it + * must been used even if the user has disabled it. + */ +static void translation_speak(const char *text) +{ +#ifdef HAVE_ACCESSIBILITY + settings_t *settings = config_get_ptr(); + access_state_t *access_st = access_state_get_ptr(); + + unsigned mode = settings->uints.ai_service_mode; + unsigned speed = settings->uints.accessibility_narrator_speech_speed; + bool narrator_on = settings->bools.accessibility_enable; + + /* Force the use of the narrator in Narrator modes (TTS) */ + if (mode == 2 || mode == 4 || mode == 5 || narrator_on || access_st->enabled) + accessibility_speak_priority(true, speed, text, 10); +#endif +} + +/** + * Displays the given message on screen and returns true. Returns false if no + * {message} is provided (i.e. it is NULL). The message will be displayed as + * information or error depending on the {error} boolean. In addition, it will + * be logged if {error} is true, or if this is a debug build. The message will + * also be played by the accessibility narrator if the user enabled it. + */ +static bool translation_user_message(const char *message, bool error) +{ + if (message) + { + accessibility_speak(message); + runloop_msg_queue_push( + message, 1, 180, true, NULL, MESSAGE_QUEUE_ICON_DEFAULT, + error ? MESSAGE_QUEUE_CATEGORY_ERROR : MESSAGE_QUEUE_CATEGORY_INFO); + if (error) + RARCH_ERR("[Translate] %s\n", message); +#ifdef DEBUG + else + RARCH_LOG("[Translate] %s\n", message); +#endif + return true; + } + return false; +} + +/** + * Displays the given hash on screen and returns true. Returns false if no + * {hash} is provided (i.e. it is NULL). The message will be displayed as + * information or error depending on the {error} boolean. In addition, it will + * be logged if {error} is true, or if this is a debug build. The message will + * also be played by the accessibility narrator if the user enabled it. + */ +static bool translation_hash_message(enum msg_hash_enums hash, bool error) +{ + if (hash) + { + const char *message = msg_hash_to_str(hash); + const char *intl = msg_hash_to_str_us(hash); + + accessibility_speak(message); + runloop_msg_queue_push( + message, 1, 180, true, NULL, MESSAGE_QUEUE_ICON_DEFAULT, + error ? MESSAGE_QUEUE_CATEGORY_ERROR : MESSAGE_QUEUE_CATEGORY_INFO); + if (error) + RARCH_ERR("[Translate] %s\n", intl); +#ifdef DEBUG + else + RARCH_LOG("[Translate] %s\n", intl); +#endif + return true; + } + return false; +} + +/** + * Displays the given message on screen and returns true. Returns false if no + * {message} is provided (i.e. it is NULL). The message will be displayed as + * an error and it will be logged. The message will also be played by the + * accessibility narrator if the user enabled it. + */ +static INLINE bool translation_user_error(const char *message) +{ + return translation_user_message(message, true); +} + +/** + * Displays the given message on screen and returns true. Returns false if no + * {message} is provided (i.e. it is NULL). The message will be displayed as + * information and will only be logged if this is a debug build. The message + * will also be played by the accessibility narrator if the user enabled it. + */ +static INLINE bool translation_user_info(const char *message) +{ + return translation_user_message(message, false); +} + +/** + * Displays the given hash on screen and returns true. Returns false if no + * {hash} is provided (i.e. it is NULL). The message will be displayed as + * an error and it will be logged. The message will also be played by the + * accessibility narrator if the user enabled it. + */ +static INLINE bool translation_hash_error(enum msg_hash_enums hash) +{ + return translation_hash_message(hash, true); +} + +/** + * Displays the given hash on screen and returns true. Returns false if no + * {hash} is provided (i.e. it is NULL). The message will be displayed as + * information and will only be logged if this is a debug build. The message + * will also be played by the accessibility narrator if the user enabled it. + */ +static INLINE bool translation_hash_info(enum msg_hash_enums hash) +{ + return translation_hash_message(hash, false); +} + +/** + * Releases all data held by the service and stops it as soon as possible. + * If {inform} is true, a message will be displayed to the user if the service + * was running in automatic mode to warn them that it is now stopping. + */ +void translation_release(bool inform) +{ +#ifdef HAVE_GFX_WIDGETS + dispgfx_widget_t *p_dispwidget = dispwidget_get_ptr(); +#endif + access_state_t *access_st = access_state_get_ptr(); + unsigned service_auto_prev = access_st->ai_service_auto; + access_st->ai_service_auto = 0; + +#ifdef DEBUG + RARCH_LOG("[Translate]: AI Service is now stopping.\n"); +#endif + + if (access_st->request_task) + task_set_cancelled(access_st->request_task, true); + if (access_st->response_task) + task_set_cancelled(access_st->response_task, true); + +#ifdef HAVE_THREADS + if (access_st->image_lock) + { + slock_lock(access_st->image_lock); +#endif + if (access_st->last_image) + free(access_st->last_image); + + access_st->last_image = NULL; + access_st->last_image_size = 0; + +#ifdef HAVE_THREADS + slock_unlock(access_st->image_lock); + } +#endif + +#ifdef HAVE_GFX_WIDGETS + if (p_dispwidget->ai_service_overlay_state != 0) + gfx_widgets_ai_service_overlay_unload(); +#endif + + if (inform && service_auto_prev != 0) + translation_hash_info(MSG_AI_AUTO_MODE_DISABLED); +} + +/** + * Returns the string representation of the translation language enum value. + */ +static const char* ai_service_get_str(enum translation_lang id) { switch (id) { @@ -768,120 +472,790 @@ static const char *ai_service_get_str(enum translation_lang id) return ""; } -bool run_translation_service(settings_t *settings, bool paused) +/* AUTOMATION --------------------------------------------------------------- */ +/* -------------------------------------------------------------------------- */ + +/** + * Handler invoking the next automatic request. This method simply waits for + * any previous request to terminate before re-invoking the translation service. + * By delegating this to a task handler we can safely do so in the task thread + * instead of hogging the main thread. + */ +static void call_auto_translate_hndl(retro_task_t *task) { - struct video_viewport vp; - uint8_t header[54]; - size_t pitch; - unsigned width, height; - const void *data = NULL; - uint8_t *bit24_image = NULL; - uint8_t *bit24_image_prev = NULL; - struct scaler_ctx *scaler = (struct scaler_ctx*) - calloc(1, sizeof(struct scaler_ctx)); - bool error = false; + int *mode_ptr = (int*)task->user_data; + uint32_t runloop_flags = runloop_get_flags(); + access_state_t *access_st = access_state_get_ptr(); + settings_t *settings = config_get_ptr(); - uint8_t *bmp_buffer = NULL; - uint64_t buffer_bytes = 0; - char *bmp64_buffer = NULL; - rjsonwriter_t *jsonwriter = NULL; - const char *json_buffer = NULL; - int bmp64_length = 0; - bool TRANSLATE_USE_BMP = false; - char *sys_lbl = NULL; - core_info_t *core_info = NULL; - video_driver_state_t *video_st = video_state_get_ptr(); - access_state_t *access_st = access_state_get_ptr(); + if (task_get_cancelled(task)) + goto finish; + + switch (*mode_ptr) + { + case 1: /* Speech Mode */ +#ifdef HAVE_AUDIOMIXER + if (!audio_driver_is_ai_service_speech_running()) + goto finish; +#endif + break; + case 2: /* Narrator Mode */ + case 3: /* Text Mode */ + case 4: /* Text + Narrator */ + case 5: /* Image + Narrator */ #ifdef HAVE_ACCESSIBILITY - input_driver_state_t *input_st = input_state_get_ptr(); + if (!is_narrator_running(settings->bools.accessibility_enable)) + goto finish; #endif -#ifdef HAVE_GFX_WIDGETS - dispgfx_widget_t *p_dispwidget = dispwidget_get_ptr(); - /* For the case when ai service pause is disabled. */ - if ( (p_dispwidget->ai_service_overlay_state != 0) - && (access_st->ai_service_auto == 1)) - { - gfx_widgets_ai_service_overlay_unload(); - goto finish; + break; + default: + goto finish; } -#endif + return; +finish: + task_set_finished(task, true); - /* get the core info here so we can pass long the game name */ - core_info_get_current_core(&core_info); + if (task->user_data) + free(task->user_data); - if (core_info) + /* Final check to see if the user did not disable the service altogether */ + if (access_st->ai_service_auto != 0) { - size_t lbl_len; - const char *lbl = NULL; - const char *sys_id = core_info->system_id - ? core_info->system_id : "core"; - size_t sys_id_len = strlen(sys_id); - const struct playlist_entry *entry = NULL; - playlist_t *current_playlist = playlist_get_cached(); + bool was_paused = runloop_flags & RUNLOOP_FLAG_PAUSED; + command_event(CMD_EVENT_AI_SERVICE_CALL, &was_paused); + } +} - if (current_playlist) +/** + * Invokes the next automatic request. This method delegates the invokation to + * a task to allow for threading. The task will only execute after the polling + * delay configured by the user has been honored since the last request. + */ +static void call_auto_translate_task(settings_t *settings) +{ + int* mode = NULL; + access_state_t *access_st = access_state_get_ptr(); + int ai_service_mode = settings->uints.ai_service_mode; + unsigned delay = settings->uints.ai_service_poll_delay; + retro_task_t *task = task_init(); + if (!task) + return; + + mode = (int*)malloc(sizeof(int)); + *mode = ai_service_mode; + + task->handler = call_auto_translate_hndl; + task->user_data = mode; + task->mute = true; + task->when = access_st->last_call + (delay * 1000); + task_queue_push(task); +} + +/* RESPONSE ----------------------------------------------------------------- */ +/* -------------------------------------------------------------------------- */ + +/** + * Parses the JSON returned by the translation server and returns structured + * data. May return NULL if the parsing cannot be completed or the JSON is + * malformed. If unsupported keys are provided in the JSON, they will simply + * be ignored. Only the available data will be populated in the returned object + * and everything else will be zero-initialized. + */ +static access_response_t* parse_response_json(http_transfer_data_t *data) +{ + int key = -1; + rjson_t* json = NULL; + char* image_data = NULL; + int image_size = 0; +#ifdef HAVE_AUDIOMIXER + void *sound_data = NULL; + int sound_size = 0; +#endif + access_response_t *response = NULL; + bool empty = true; + enum rjson_type type; + + if (!data || !data->data) + goto finish; + if (!(json = rjson_open_buffer(data->data, data->len))) + goto finish; + if (!(response = (access_response_t*)calloc(1, sizeof(access_response_t)))) + goto finish; + + for (;;) + { + size_t length = 0; + const char *string = NULL; + type = rjson_next(json); + + if (type == RJSON_DONE || type == RJSON_ERROR) + break; + if (rjson_get_context_type(json) != RJSON_OBJECT) + continue; + + if (type == RJSON_STRING && (rjson_get_context_count(json) & 1) == 1) { - playlist_get_index_by_path( - current_playlist, path_get(RARCH_PATH_CONTENT), &entry); - - if (entry && !string_is_empty(entry->label)) - lbl = entry->label; + int i; + string = rjson_get_string(json, &length); + for (i = 0; i < ARRAY_SIZE(ACCESS_RESPONSE_KEYS) && key == -1; i++) + { + if (string_is_equal(string, ACCESS_RESPONSE_KEYS[i])) + key = i; + } + } + else + { + if (type != RJSON_STRING && key < 6) + continue; + else + string = rjson_get_string(json, &length); + + switch (key) + { + case 0: /* image */ + response->image = (length == 0) ? NULL : (char*)unbase64( + string, (int)length, &response->image_size); + break; +#ifdef HAVE_AUDIOMIXER + case 1: /* sound */ + response->sound = (length == 0) ? NULL : (void*)unbase64( + string, (int)length, &response->sound_size); + break; +#endif + case 2: /* text */ + response->text = strdup(string); + break; + case 3: /* error */ + response->error = strdup(string); + break; + case 4: /* auto */ + response->recall = strdup(string); + break; + case 5: /* press */ + response->input = strdup(string); + break; + case 6: /* text_position */ + if (type == RJSON_NUMBER) + response->text_position = rjson_get_int(json); + break; + } + key = -1; } - - if (!lbl) - lbl = path_basename(path_get(RARCH_PATH_BASENAME)); - lbl_len = strlen(lbl); - sys_lbl = (char*)malloc(lbl_len + sys_id_len + 3); - memcpy(sys_lbl, sys_id, sys_id_len); - memcpy(sys_lbl + sys_id_len, "__", 2); - memcpy(sys_lbl + 2 + sys_id_len, lbl, lbl_len); - sys_lbl[sys_id_len + 2 + lbl_len] = '\0'; } + + if (type == RJSON_ERROR) + { + RARCH_LOG("[Translate] JSON error: %s\n", rjson_get_error(json)); + translation_user_error("Service returned a malformed JSON"); + free(response); + response = NULL; + } + +finish: + if (json) + rjson_free(json); + else + translation_user_error("Internal error parsing returned JSON."); + + return response; +} - if (!scaler) +/** + * Parses the image data of given type and displays it using widgets. If the + * image widget is already shown, it will be unloaded first automatically. + * This method will disable automatic translation if the widget could not be + * loaded to prevent further errors. + */ +#ifdef HAVE_GFX_WIDGETS +static void translation_response_image_widget( + char *image, int image_length, enum image_type_enum *image_type) +{ + video_driver_state_t *video_st = video_state_get_ptr(); + dispgfx_widget_t *p_dispwidget = dispwidget_get_ptr(); + access_state_t *access_st = access_state_get_ptr(); + + bool ai_res; + bool gfx_widgets_paused = video_st->flags & VIDEO_FLAG_WIDGETS_PAUSED; + + if (p_dispwidget->ai_service_overlay_state != 0) + gfx_widgets_ai_service_overlay_unload(); + + ai_res = gfx_widgets_ai_service_overlay_load( + image, (unsigned)image_length, (*image_type)); + + if (!ai_res) + { + translation_hash_error(MSG_AI_VIDEO_DRIVER_NOT_SUPPORTED); + translation_release(true); + } + else if (gfx_widgets_paused) + { + /* Unpause for a frame otherwise widgets won't be displayed */ + p_dispwidget->ai_service_overlay_state = 2; + command_event(CMD_EVENT_UNPAUSE, NULL); + } +} +#endif + +/** + * Parses the image buffer, converting the data to the raw image format we need + * to display the image within RetroArch. Writes the raw image data in {body} + * as well as its {width} and {height} as determined by the image header. + * Returns true if the process was successful. + */ +static bool translation_get_image_body( + char *image, int image_size, enum image_type_enum *image_type, + void *body, unsigned *width, unsigned *height) +{ +#ifdef HAVE_RPNG + rpng_t *rpng = NULL; + void *rpng_alpha = NULL; + int rpng_ret = 0; +#endif + + if ((*image_type) == IMAGE_TYPE_BMP) + { + if (image_size < 55) + return false; + + *width = ((uint32_t) ((uint8_t)image[21]) << 24) + + ((uint32_t) ((uint8_t)image[20]) << 16) + + ((uint32_t) ((uint8_t)image[19]) << 8) + + ((uint32_t) ((uint8_t)image[18]) << 0); + *height = ((uint32_t) ((uint8_t)image[25]) << 24) + + ((uint32_t) ((uint8_t)image[24]) << 16) + + ((uint32_t) ((uint8_t)image[23]) << 8) + + ((uint32_t) ((uint8_t)image[22]) << 0); + + image_size = (*width) * (*height) * 3 * sizeof(uint8_t); + body = (void*)malloc(image_size); + if (!body) + return false; + + memcpy(body, image + 54 * sizeof(uint8_t), image_size); + return true; + } + +#ifdef HAVE_RPNG + else if ((*image_type) == IMAGE_TYPE_PNG) + { + if (image_size < 24) + return false; + if (!(rpng = rpng_alloc())) + return false; + + *width = ((uint32_t) ((uint8_t)image[16]) << 24) + + ((uint32_t) ((uint8_t)image[17]) << 16) + + ((uint32_t) ((uint8_t)image[18]) << 8) + + ((uint32_t) ((uint8_t)image[19]) << 0); + *height = ((uint32_t) ((uint8_t)image[20]) << 24) + + ((uint32_t) ((uint8_t)image[21]) << 16) + + ((uint32_t) ((uint8_t)image[22]) << 8) + + ((uint32_t) ((uint8_t)image[23]) << 0); + + rpng_set_buf_ptr(rpng, image, (size_t)image_size); + rpng_start(rpng); + while (rpng_iterate_image(rpng)); + + do + { + rpng_ret = rpng_process_image( + rpng, &rpng_alpha, (size_t)image_size, width, height); + } while (rpng_ret == IMAGE_PROCESS_NEXT); + + /* + * Returned output from the png processor is an upside down RGBA + * image, so we have to change that to RGB first. This should + * probably be replaced with a scaler call. + */ + { + int d = 0; + int tw, th, tc; + unsigned ui; + image_size = (*width) * (*height) * 3 * sizeof(uint8_t); + body = (void*)malloc(image_size); + if (!body) + { + free(rpng_alpha); + rpng_free(rpng); + return false; + } + + for (ui = 0; ui < (*width) * (*height) * 4; ui++) + { + if (ui % 4 != 3) + { + tc = d % 3; + th = (*height) - d / (3 * (*width)) - 1; + tw = (d % ((*width) * 3)) / 3; + ((uint8_t*) body)[tw * 3 + th * 3 * (*width) + tc] + = ((uint8_t*)rpng_alpha)[ui]; + d++; + } + } + } + free(rpng_alpha); + rpng_free(rpng); + return true; + } +#endif + + return false; +} + +/** + * Displays the raw image on screen by directly writing to the frame buffer. + * This method may fail depending on the current video driver. + */ + /* TODO/FIXME: Does nothing with Vulkan apparently? */ +static void translation_response_image_direct( + char *image, int image_size, enum image_type_enum *image_type) +{ + size_t pitch; + unsigned width; + unsigned height; + unsigned vp_width; + unsigned vp_height; + + void *image_body = NULL; + uint8_t *raw_output_data = NULL; + size_t raw_output_size = 0; + const void *dummy_data = NULL; + struct scaler_ctx *scaler = NULL; + video_driver_state_t *video_st = video_state_get_ptr(); + const enum retro_pixel_format video_driver_pix_fmt = video_st->pix_fmt; + + if (!(translation_get_image_body( + image, image_size, image_type, image_body, &width, &height))) goto finish; - data = video_st->frame_cache_data; - width = video_st->frame_cache_width; - height = video_st->frame_cache_height; - pitch = video_st->frame_cache_pitch; + if (!(scaler = (struct scaler_ctx*)calloc(1, sizeof(struct scaler_ctx)))) + goto finish; + + dummy_data = video_st->frame_cache_data; + vp_width = video_st->frame_cache_width; + vp_height = video_st->frame_cache_height; + pitch = video_st->frame_cache_pitch; + + if (!vp_width || !vp_height) + goto finish; + + if (dummy_data == RETRO_HW_FRAME_BUFFER_VALID) + { + /* In this case, we used the viewport to grab the image and translate it, + * and we have the translated image in the image_body buffer. */ + translation_user_error("Video driver unsupported for hardware frame."); + translation_release(true); + goto finish; + } + + /* + * The assigned pitch may not be reliable. The width of the video frame can + * change during run-time, but the pitch may not, so we just assign it as + * the width times the byte depth. + */ + if (video_driver_pix_fmt == RETRO_PIXEL_FORMAT_XRGB8888) + { + raw_output_size = vp_width * vp_height * 4 * sizeof(uint8_t); + raw_output_data = (uint8_t*)malloc(raw_output_size); + scaler->out_fmt = SCALER_FMT_ARGB8888; + scaler->out_stride = vp_width * 4; + pitch = vp_width * 4; + } + else + { + raw_output_size = vp_width * vp_height * 2 * sizeof(uint8_t); + raw_output_data = (uint8_t*)malloc(raw_output_size); + scaler->out_fmt = SCALER_FMT_RGB565; + scaler->out_stride = vp_width * 1; + pitch = vp_width * 2; + } + + if (!raw_output_data) + goto finish; + + scaler->in_fmt = SCALER_FMT_BGR24; + scaler->in_width = width; + scaler->in_height = height; + scaler->out_width = vp_width; + scaler->out_height = vp_height; + scaler->scaler_type = SCALER_TYPE_POINT; + scaler_ctx_gen_filter(scaler); + + scaler->in_stride = -1 * vp_width * 3; + + scaler_ctx_scale_direct( + scaler, raw_output_data, + (uint8_t*)image_body + (height - 1) * width * 3); + video_driver_frame(raw_output_data, width, height, pitch); + +finish: + if (image_body) + free(image_body); + if (scaler) + free(scaler); + if (raw_output_data) + free(raw_output_data); +} + +/** + * Parses image data received by the server following a translation request. + * This method assumes that image data is present in the response, it cannot + * be null. If widgets are supported, this method will prefer using them to + * overlay the picture on top of the video, otherwise it will try to write the + * data directly into the frame buffer, which is much less reliable. + */ +static void translation_response_image_hndl(retro_task_t *task) +{ + /* + * TODO/FIXME: Moved processing to the callback to fix an issue with + * texture loading off the main thread in OpenGL. I'm leaving the original + * structure here so we can move back to the handler if it becomes possible + * in the future. + */ + task_set_finished(task, true); +} + +/** + * Callback invoked once the image data received from the server has been + * processed and eventually displayed. This is necessary to ensure that the + * next automatic request will be invoked once the task is finished. + */ +static void translation_response_image_cb( + retro_task_t *task, void *task_data, void *user_data, const char *error) +{ + settings_t* settings = config_get_ptr(); + access_state_t *access_st = access_state_get_ptr(); + + enum image_type_enum image_type; + access_response_t *response = (access_response_t*)task->user_data; + video_driver_state_t *video_st = video_state_get_ptr(); + + if (task_get_cancelled(task) || response->image_size < 4) + goto finish; + + if ( response->image[0] == 'B' + && response->image[1] == 'M') + image_type = IMAGE_TYPE_BMP; +#ifdef HAVE_RPNG + else if (response->image[1] == 'P' + && response->image[2] == 'N' + && response->image[3] == 'G') + image_type = IMAGE_TYPE_PNG; +#endif + else + { + translation_user_error("Service returned an unsupported image type."); + translation_release(true); + goto finish; + } + +#ifdef HAVE_GFX_WIDGETS + if ( video_st->poke + && video_st->poke->load_texture + && video_st->poke->unload_texture) + translation_response_image_widget( + response->image, response->image_size, &image_type); + else +#endif + translation_response_image_direct( + response->image, response->image_size, &image_type); + +finish: + free(response->image); + free(response); + + if (access_st->ai_service_auto != 0) + call_auto_translate_task(settings); +} + +/** + * Processes text data received by the server following a translation request. + * Does nothing if the response does not contain any text data (NULL). Text + * is either forcibly read by the narrator, even if it is disabled in the + * front-end (Narrator Mode) or displayed on screen (in Text Mode). In the + * later, it will only be read if the front-end narrator is enabled. + */ +static void translation_response_text(access_response_t *response) +{ + settings_t *settings = config_get_ptr(); + unsigned service_mode = settings->uints.ai_service_mode; + access_state_t *access_st = access_state_get_ptr(); + + if ( (!response->text || string_is_empty(response->text)) + && (service_mode == 2 || service_mode == 3 || service_mode == 4) + && access_st->ai_service_auto == 0) + { + translation_hash_info(MSG_AI_NOTHING_TO_TRANSLATE); + return; + } + + if (response->text) + { + /* The text should be displayed on screen in Text or Text+Narrator mode */ + if (service_mode == 3 || service_mode == 4) + { +#ifdef HAVE_GFX_WIDGETS + if (settings->bools.menu_enable_widgets) + { + dispgfx_widget_t *p_dispwidget = dispwidget_get_ptr(); + + if (p_dispwidget->ai_service_overlay_state == 1) + gfx_widgets_ai_service_overlay_unload(); + + strlcpy(p_dispwidget->ai_service_text, response->text, 255); + + if (response->text_position > 0) + p_dispwidget->ai_service_text_position + = (unsigned)response->text_position; + else + p_dispwidget->ai_service_text_position = 0; + + p_dispwidget->ai_service_overlay_state = 1; + } + else + { +#endif + /* + * TODO/FIXME: Obviously this will not be as good as using widgets, + * since messages run on a timer but it's an alternative at least. + * Maybe split the message here so it fits the viewport. + */ + runloop_msg_queue_push( + response->text, 2, 180, + true, NULL, MESSAGE_QUEUE_ICON_DEFAULT, + MESSAGE_QUEUE_CATEGORY_INFO); + +#ifdef HAVE_GFX_WIDGETS + } +#endif + } + translation_speak(&response->text[0]); + free(response->text); + } +} + +/** + * Processes audio data received by the server following a translation request. + * Does nothing if the response does not contain any audio data (NULL). Audio + * data is simply played as soon as possible using the audio driver. + */ +static void translation_response_sound(access_response_t *response) +{ +#ifdef HAVE_AUDIOMIXER + if (response->sound) + { + audio_mixer_stream_params_t params; + + params.volume = 1.0f; + /* user->slot_selection_type; */ + params.slot_selection_type = AUDIO_MIXER_SLOT_SELECTION_MANUAL; + params.slot_selection_idx = 10; + /* user->stream_type; */ + params.stream_type = AUDIO_STREAM_TYPE_SYSTEM; + params.type = AUDIO_MIXER_TYPE_WAV; + params.state = AUDIO_STREAM_STATE_PLAYING; + params.buf = response->sound; + params.bufsize = response->sound_size; + params.cb = NULL; + params.basename = NULL; + + audio_driver_mixer_add_stream(¶ms); + free(response->sound); + } +#endif +} + +/** + * Processes input data received by the server following a translation request. + * Does nothing if the response does not contain any input data (NULL). This + * method will try to forcibly press all the retropad keys listed in the input + * string (comma-separated). + */ +static void translation_response_input(access_response_t *response) +{ + if (response->input) + { +#ifdef HAVE_ACCESSIBILITY + input_driver_state_t *input_st = input_state_get_ptr(); +#endif + int length = strlen(response->input); + char *token = strtok(response->input, ","); + + while (token) + { + if (string_is_equal(token, "pause")) + command_event(CMD_EVENT_PAUSE, NULL); + else if (string_is_equal(token, "unpause")) + command_event(CMD_EVENT_UNPAUSE, NULL); +#ifdef HAVE_ACCESSIBILITY + else + { + int i = 0; + bool found = false; + + for (; i < ARRAY_SIZE(ACCESS_INPUT_LABELS) && !found; i++) + found = string_is_equal(ACCESS_INPUT_LABELS[i], response->input); + + if (found) + input_st->ai_gamepad_state[i] = 2; + } +#endif + token = strtok(NULL, ","); + } + free(response->input); + } +} + +/** + * Callback invoked when the server responds to our translation request. If the + * service is still running by then, this method will parse the JSON payload + * and process the data, eventually re-invoking the translation service for + * a new request if the server allowed automatic translation. + */ +static void translation_response_cb( + retro_task_t *task, void *task_data, void *user_data, const char *error) +{ + http_transfer_data_t *data = (http_transfer_data_t*)task_data; + access_state_t *access_st = access_state_get_ptr(); + settings_t *settings = config_get_ptr(); + access_response_t *response = NULL; + bool auto_mode_prev = access_st->ai_service_auto; + unsigned service_mode = settings->uints.ai_service_mode; + + /* We asked the service to stop by calling translation_release, so bail */ + if (!access_st->last_image) + goto finish; + if (translation_user_error(error)) + goto abort; + if (!(response = parse_response_json(data))) + goto abort; + if (translation_user_error(response->error)) + goto abort; + + access_st->ai_service_auto = (response->recall == NULL) ? 0 : 1; + if (auto_mode_prev != access_st->ai_service_auto) + translation_hash_info(auto_mode_prev + ? MSG_AI_AUTO_MODE_DISABLED : MSG_AI_AUTO_MODE_ENABLED); + + /* + * We want to skip the data on auto=continue, unless automatic translation + * has just been enabled, meaning data must be displayed again to the user. + */ + if ( !string_is_equal(response->recall, "continue") + || (auto_mode_prev == 0 && access_st->ai_service_auto == 1)) + { +#ifdef HAVE_GFX_WIDGETS + dispgfx_widget_t *p_dispwidget = dispwidget_get_ptr(); + if (p_dispwidget->ai_service_overlay_state != 0) + gfx_widgets_ai_service_overlay_unload(); +#endif + translation_response_text(response); + translation_response_sound(response); + translation_response_input(response); + + if (response->image) + { + retro_task_t *task = task_init(); + if (!task) + goto finish; + + task->handler = translation_response_image_hndl; + task->callback = translation_response_image_cb; + task->user_data = response; + task->mute = true; + access_st->response_task = task; + task_queue_push(task); + + /* Leave memory clean-up and auto callback to the task itself */ + return; + } + else if (access_st->ai_service_auto == 0 + && (service_mode == 0 || service_mode == 5)) + translation_hash_info(MSG_AI_NOTHING_TO_TRANSLATE); + } + goto finish; + +abort: + translation_release(true); + if (response && response->error) + free(response->error); + +finish: + if (response) + { + if (response->image) + free(response->image); + if (response->recall) + free(response->recall); + free(response); + + if (access_st->ai_service_auto != 0) + call_auto_translate_task(settings); + } +} + +/* REQUEST ------------------------------------------------------------------ */ +/* -------------------------------------------------------------------------- */ + +/** + * Grabs and returns a frame from the video driver. If the frame buffer cannot + * be accessed, this method will try to obtain a capture of the viewport as a + * fallback, although this frame may be altered by any filter or shader enabled + * by the user. Returns null if both methods fail. + */ +static access_frame_t* translation_grab_frame() +{ + size_t pitch; + struct video_viewport vp = {0}; + const void *data = NULL; + uint8_t *bit24_image_prev = NULL; + struct scaler_ctx *scaler = NULL; + access_frame_t *frame = NULL; + video_driver_state_t *video_st = video_state_get_ptr(); + const enum retro_pixel_format pix_fmt = video_st->pix_fmt; + + if (!(scaler = (struct scaler_ctx*)calloc(1, sizeof(struct scaler_ctx)))) + goto finish; + if (!(frame = (access_frame_t*)malloc(sizeof(access_frame_t)))) + goto finish; + + data = video_st->frame_cache_data; + frame->width = video_st->frame_cache_width; + frame->height = video_st->frame_cache_height; + pitch = video_st->frame_cache_pitch; if (!data) goto finish; + video_driver_get_viewport_info(&vp); + if (!vp.width || !vp.height) + goto finish; + + frame->content_x = vp.x; + frame->content_y = vp.y; + frame->content_width = vp.width; + frame->content_height = vp.height; + frame->viewport_width = vp.full_width; + frame->viewport_height = vp.full_height; + frame->size = frame->width * frame->height * 3; + + if (!(frame->data = (uint8_t*)malloc(frame->size))) + goto finish; + if (data == RETRO_HW_FRAME_BUFFER_VALID) { - /* - The direct frame capture didn't work, so try getting it - from the viewport instead. This isn't as good as the - raw frame buffer, since the viewport may us bilinear - filtering, or other shaders that will completely trash - the OCR, but it's better than nothing. - */ - vp.x = 0; - vp.y = 0; - vp.width = 0; - vp.height = 0; - vp.full_width = 0; - vp.full_height = 0; - - video_driver_get_viewport_info(&vp); - - if (!vp.width || !vp.height) - goto finish; - - bit24_image_prev = (uint8_t*)malloc(vp.width * vp.height * 3); - bit24_image = (uint8_t*)malloc(width * height * 3); - - if (!bit24_image_prev || !bit24_image) + /* Direct frame capture failed, fallback on viewport capture */ + if (!(bit24_image_prev = (uint8_t*)malloc(vp.width * vp.height * 3))) goto finish; if (!( video_st->current_video->read_viewport && video_st->current_video->read_viewport( video_st->data, bit24_image_prev, false))) { - RARCH_LOG("Could not read viewport for translation service...\n"); + translation_user_error("Could not read viewport."); + translation_release(true); goto finish; } @@ -891,275 +1265,535 @@ bool run_translation_service(settings_t *settings, bool paused) scaler->scaler_type = SCALER_TYPE_POINT; scaler->in_width = vp.width; scaler->in_height = vp.height; - scaler->out_width = width; - scaler->out_height = height; + scaler->out_width = frame->width; + scaler->out_height = frame->height; scaler_ctx_gen_filter(scaler); - scaler->in_stride = vp.width*3; - scaler->out_stride = width*3; - scaler_ctx_scale_direct(scaler, bit24_image, bit24_image_prev); + scaler->in_stride = vp.width * 3; + scaler->out_stride = frame->width * 3; + scaler_ctx_scale_direct(scaler, frame->data, bit24_image_prev); } else { - const enum retro_pixel_format - video_driver_pix_fmt = video_st->pix_fmt; - /* This is a software core, so just change the pixel format to 24-bit. */ - if (!(bit24_image = (uint8_t*)malloc(width * height * 3))) - goto finish; - - if (video_driver_pix_fmt == RETRO_PIXEL_FORMAT_XRGB8888) + /* This is a software core, so just change the pixel format to 24-bit */ + if (pix_fmt == RETRO_PIXEL_FORMAT_XRGB8888) scaler->in_fmt = SCALER_FMT_ARGB8888; else scaler->in_fmt = SCALER_FMT_RGB565; + video_frame_convert_to_bgr24( - scaler, - (uint8_t *)bit24_image, - (const uint8_t*)data + ((int)height - 1)*pitch, - width, height, - (int)-pitch); + scaler, frame->data, (const uint8_t*)data, + frame->width, frame->height, (int)pitch); } scaler_ctx_gen_reset(scaler); - - if (!bit24_image) - { - error = true; - goto finish; - } - - if (TRANSLATE_USE_BMP) - { - /* - At this point, we should have a screenshot in the buffer, - so allocate an array to contain the BMP image along with - the BMP header as bytes, and then covert that to a - b64 encoded array for transport in JSON. - */ - form_bmp_header(header, width, height, false); - if (!(bmp_buffer = (uint8_t*)malloc(width * height * 3 + 54))) - goto finish; - - memcpy(bmp_buffer, header, 54 * sizeof(uint8_t)); - memcpy(bmp_buffer + 54, - bit24_image, - width * height * 3 * sizeof(uint8_t)); - buffer_bytes = sizeof(uint8_t) * (width * height * 3 + 54); - } - else - { - pitch = width * 3; - bmp_buffer = rpng_save_image_bgr24_string( - bit24_image + width * (height-1) * 3, - width, height, (signed)-pitch, &buffer_bytes); - } - - if (!(bmp64_buffer = base64((void *)bmp_buffer, - (int)(sizeof(uint8_t) * buffer_bytes), - &bmp64_length))) - goto finish; - - if (!(jsonwriter = rjsonwriter_open_memory())) - goto finish; - - rjsonwriter_raw(jsonwriter, "{", 1); - rjsonwriter_raw(jsonwriter, " ", 1); - rjsonwriter_add_string(jsonwriter, "image"); - rjsonwriter_raw(jsonwriter, ":", 1); - rjsonwriter_raw(jsonwriter, " ", 1); - rjsonwriter_add_string_len(jsonwriter, bmp64_buffer, bmp64_length); - - /* Form request... */ - if (sys_lbl) - { - rjsonwriter_raw(jsonwriter, ",", 1); - rjsonwriter_raw(jsonwriter, " ", 1); - rjsonwriter_add_string(jsonwriter, "label"); - rjsonwriter_raw(jsonwriter, ":", 1); - rjsonwriter_raw(jsonwriter, " ", 1); - rjsonwriter_add_string(jsonwriter, sys_lbl); - } - - rjsonwriter_raw(jsonwriter, ",", 1); - rjsonwriter_raw(jsonwriter, " ", 1); - rjsonwriter_add_string(jsonwriter, "state"); - rjsonwriter_raw(jsonwriter, ":", 1); - rjsonwriter_raw(jsonwriter, " ", 1); - rjsonwriter_raw(jsonwriter, "{", 1); - rjsonwriter_raw(jsonwriter, " ", 1); - rjsonwriter_add_string(jsonwriter, "paused"); - rjsonwriter_raw(jsonwriter, ":", 1); - rjsonwriter_raw(jsonwriter, " ", 1); - rjsonwriter_rawf(jsonwriter, "%u", (paused ? 1 : 0)); - { - static const char* state_labels[] = { "b", "y", "select", "start", "up", "down", "left", "right", "a", "x", "l", "r", "l2", "r2", "l3", "r3" }; - int i; - for (i = 0; i < (int)ARRAY_SIZE(state_labels); i++) - { - rjsonwriter_raw(jsonwriter, ",", 1); - rjsonwriter_raw(jsonwriter, " ", 1); - rjsonwriter_add_string(jsonwriter, state_labels[i]); - rjsonwriter_raw(jsonwriter, ":", 1); - rjsonwriter_raw(jsonwriter, " ", 1); -#ifdef HAVE_ACCESSIBILITY - rjsonwriter_rawf(jsonwriter, "%u", - (input_st->ai_gamepad_state[i] ? 1 : 0)); -#else - rjsonwriter_rawf(jsonwriter, "%u", 0); -#endif - } - } - rjsonwriter_raw(jsonwriter, " ", 1); - rjsonwriter_raw(jsonwriter, "}", 1); - rjsonwriter_raw(jsonwriter, " ", 1); - rjsonwriter_raw(jsonwriter, "}", 1); - - if (!(json_buffer = rjsonwriter_get_memory_buffer(jsonwriter, NULL))) - goto finish; /* ran out of memory */ - -#ifdef DEBUG - if (access_st->ai_service_auto != 2) - RARCH_LOG("Request size: %d\n", bmp64_length); -#endif - { - char new_ai_service_url[PATH_MAX_LENGTH]; - char separator = '?'; - unsigned ai_service_source_lang = settings->uints.ai_service_source_lang; - unsigned ai_service_target_lang = settings->uints.ai_service_target_lang; - const char *ai_service_url = settings->arrays.ai_service_url; - size_t _len = strlcpy(new_ai_service_url, - ai_service_url, sizeof(new_ai_service_url)); - - /* if query already exists in url, then use &'s instead */ - if (strrchr(new_ai_service_url, '?')) - separator = '&'; - - /* source lang */ - if (ai_service_source_lang != TRANSLATION_LANG_DONT_CARE) - { - const char *lang_source = ai_service_get_str( - (enum translation_lang)ai_service_source_lang); - - if (!string_is_empty(lang_source)) - { - new_ai_service_url[ _len] = separator; - new_ai_service_url[++_len] = '\0'; - _len += strlcpy(new_ai_service_url + _len, - "source_lang=", - sizeof(new_ai_service_url) - _len); - _len += strlcpy(new_ai_service_url + _len, - lang_source, - sizeof(new_ai_service_url) - _len); - separator = '&'; - } - } - - /* target lang */ - if (ai_service_target_lang != TRANSLATION_LANG_DONT_CARE) - { - const char *lang_target = ai_service_get_str( - (enum translation_lang)ai_service_target_lang); - - if (!string_is_empty(lang_target)) - { - new_ai_service_url[ _len] = separator; - new_ai_service_url[++_len] = '\0'; - _len += strlcpy(new_ai_service_url + _len, - "target_lang=", - sizeof(new_ai_service_url) - _len); - _len += strlcpy(new_ai_service_url + _len, - lang_target, - sizeof(new_ai_service_url) - _len); - separator = '&'; - } - } - - /* mode */ - { - unsigned ai_service_mode = settings->uints.ai_service_mode; - /*"image" is included for backwards compatability with - * vgtranslate < 1.04 */ - - new_ai_service_url[ _len] = separator; - new_ai_service_url[++_len] = '\0'; - _len += strlcpy(new_ai_service_url + _len, - "output=", - sizeof(new_ai_service_url) - _len); - - switch (ai_service_mode) - { - case 2: - strlcpy(new_ai_service_url + _len, - "text", - sizeof(new_ai_service_url) - _len); - break; - case 1: - case 3: - _len += strlcpy(new_ai_service_url + _len, - "sound,wav", - sizeof(new_ai_service_url) - _len); - if (ai_service_mode == 1) - break; - /* fall-through intentional for ai_service_mode == 3 */ - case 0: - _len += strlcpy(new_ai_service_url + _len, - "image,png", - sizeof(new_ai_service_url) - _len); -#ifdef HAVE_GFX_WIDGETS - if ( video_st->poke - && video_st->poke->load_texture - && video_st->poke->unload_texture) - strlcpy(new_ai_service_url + _len, - ",png-a", - sizeof(new_ai_service_url) - _len); -#endif - break; - default: - break; - } - - } -#ifdef DEBUG - if (access_st->ai_service_auto != 2) - RARCH_LOG("SENDING... %s\n", new_ai_service_url); -#endif - task_push_http_post_transfer(new_ai_service_url, - json_buffer, true, NULL, handle_translation_cb, NULL); - } - - error = false; + finish: if (bit24_image_prev) free(bit24_image_prev); - if (bit24_image) - free(bit24_image); - if (scaler) free(scaler); - if (bmp_buffer) - free(bmp_buffer); - - if (bmp64_buffer) - free(bmp64_buffer); - if (sys_lbl) - free(sys_lbl); - if (jsonwriter) - rjsonwriter_free(jsonwriter); - return !error; -} - -#ifdef HAVE_ACCESSIBILITY -bool is_narrator_running(bool accessibility_enable) -{ - access_state_t *access_st = access_state_get_ptr(); - if (is_accessibility_enabled( - accessibility_enable, - access_st->enabled)) + if (frame) { - frontend_ctx_driver_t *frontend = - frontend_state_get_ptr()->current_frontend_ctx; - if (frontend && frontend->is_narrator_running) - return frontend->is_narrator_running(); + if (frame->data) + return frame; + else + free(frame); } - return true; + return NULL; } + +/** + * Returns true if the {frame} passed in parameter is a duplicate of the last + * frame the service was invoked on. This method effectively helps to prevent + * the service from spamming the server with the same request over and over + * again when running in automatic mode. This method will also save the image + * in the {frame} structure as the new last image for the service. + */ +static bool translation_dupe_fail(access_frame_t *frame) +{ + access_state_t *access_st = access_state_get_ptr(); + bool size_equal = (frame->size == access_st->last_image_size); + bool has_failed = false; + +#ifdef HAVE_THREADS + slock_lock(access_st->image_lock); #endif + if (access_st->last_image && access_st->ai_service_auto != 0) + { + if ( size_equal + && u8_array_equal(frame->data, access_st->last_image, frame->size)) + has_failed = true; + } + + /* Init last image or reset buffer size if image size changed */ + if (!has_failed && (!access_st->last_image || !size_equal)) + { + if (access_st->last_image) + free(access_st->last_image); + + access_st->last_image_size = frame->size; + if (!(access_st->last_image = (uint8_t*)malloc(frame->size))) + has_failed = true; + } + + if (!has_failed) + memcpy(access_st->last_image, frame->data, frame->size); + +#ifdef HAVE_THREADS + slock_unlock(access_st->image_lock); +#endif + return has_failed; +} + +/** + * Converts and returns the {frame} as a base64 encoded PNG or BMP. The + * selected image type will be available in the returned object, and will + * favor PNG if possible. Returns NULL on failure. + */ +static access_base64_t* translation_frame_encode(access_frame_t *frame) +{ + uint8_t header[54]; + uint8_t *buffer = NULL; + uint64_t bytes = 0; + access_base64_t *encode = NULL; + + if (!(encode = (access_base64_t*)malloc(sizeof(access_base64_t)))) + goto finish; + +#ifdef HAVE_RPNG + strcpy(encode->format, "png"); + buffer = rpng_save_image_bgr24_string( + frame->data, frame->width, frame->height, + frame->width * 3, &bytes); +#else + strcpy(encode->format, "bmp"); + form_bmp_header(header, frame->width, frame->height, false); + if (!(buffer = (uint8_t*)malloc(frame->size + 54))) + goto finish; + + memcpy(buffer, header, 54 * sizeof(uint8_t)); + memcpy(buffer + 54, frame->data, frame->size * sizeof(uint8_t)); + bytes = sizeof(uint8_t) * (frame->size + 54); +#endif + + encode->data = base64( + (void*)buffer, (int)(bytes * sizeof(uint8_t)), &encode->length); + +finish: + if (buffer) + free(buffer); + + if (encode->data) + return encode; + else + free(encode); + + return NULL; +} + +/** + * Returns a newly allocated string describing the content and core currently + * running. The string will contains the name of the core (or 'core') followed + * by a double underscore (_) and the name of the content. Returns NULL on + * failure. + */ +static char* translation_get_content_label() +{ + const char *label = NULL; + char* system_label = NULL; + core_info_t *core_info = NULL; + + core_info_get_current_core(&core_info); + if (core_info) + { + const struct playlist_entry *entry = NULL; + playlist_t *current_playlist = playlist_get_cached(); + const char *system_id; + size_t system_id_len; + size_t label_len; + + system_id = (core_info->system_id) ? core_info->system_id : "core"; + system_id_len = strlen(system_id); + + if (current_playlist) + { + playlist_get_index_by_path( + current_playlist, path_get(RARCH_PATH_CONTENT), &entry); + + if (entry && !string_is_empty(entry->label)) + label = entry->label; + } + + if (!label) + label = path_basename(path_get(RARCH_PATH_BASENAME)); + + label_len = strlen(label); + if (!(system_label = (char*)malloc(label_len + system_id_len + 3))) + return NULL; + + memcpy(system_label, system_id, system_id_len); + memcpy(system_label + system_id_len, "__", 2); + memcpy(system_label + 2 + system_id_len, label, label_len); + system_label[system_id_len + 2 + label_len] = '\0'; + } + + return system_label; +} + +/** + * Creates and returns a JSON writer containing the payload to send alongside + * the translation request. {label} may be NULL, in which case no label will + * be supplied in the JSON. Returns NULL if the writer cannot be initialized. + */ +static rjsonwriter_t* build_request_json( + access_base64_t *image, access_request_t *request, + access_frame_t *frame, char *label) +{ + unsigned i; + rjsonwriter_t* writer = NULL; + + if (!(writer = rjsonwriter_open_memory())) + return NULL; + + rjsonwriter_add_start_object(writer); + { + rjsonwriter_add_string(writer, "image"); + rjsonwriter_add_colon(writer); + rjsonwriter_add_string_len(writer, image->data, image->length); + + rjsonwriter_add_comma(writer); + rjsonwriter_add_string(writer, "format"); + rjsonwriter_add_colon(writer); + rjsonwriter_add_string(writer, image->format); + + rjsonwriter_add_comma(writer); + rjsonwriter_add_string(writer, "coords"); + rjsonwriter_add_colon(writer); + rjsonwriter_add_start_array(writer); + { + rjsonwriter_add_unsigned(writer, frame->content_x); + rjsonwriter_add_comma(writer); + rjsonwriter_add_unsigned(writer, frame->content_y); + rjsonwriter_add_comma(writer); + rjsonwriter_add_unsigned(writer, frame->content_width); + rjsonwriter_add_comma(writer); + rjsonwriter_add_unsigned(writer, frame->content_height); + } + rjsonwriter_add_end_array(writer); + + rjsonwriter_add_comma(writer); + rjsonwriter_add_string(writer, "viewport"); + rjsonwriter_add_colon(writer); + rjsonwriter_add_start_array(writer); + { + rjsonwriter_add_unsigned(writer, frame->viewport_width); + rjsonwriter_add_comma(writer); + rjsonwriter_add_unsigned(writer, frame->viewport_height); + } + rjsonwriter_add_end_array(writer); + + if (label) + { + rjsonwriter_add_comma(writer); + rjsonwriter_add_string(writer, "label"); + rjsonwriter_add_colon(writer); + rjsonwriter_add_string(writer, label); + } + + rjsonwriter_add_comma(writer); + rjsonwriter_add_string(writer, "state"); + rjsonwriter_add_colon(writer); + rjsonwriter_add_start_object(writer); + { + rjsonwriter_add_string(writer, "paused"); + rjsonwriter_add_colon(writer); + rjsonwriter_add_unsigned(writer, (request->paused ? 1 : 0)); + + for (i = 0; i < ARRAY_SIZE(ACCESS_INPUT_LABELS); i++) + { + rjsonwriter_add_comma(writer); + rjsonwriter_add_string(writer, ACCESS_INPUT_LABELS[i]); + rjsonwriter_add_colon(writer); + rjsonwriter_add_unsigned(writer, request->inputs[i]); + } + rjsonwriter_add_end_object(writer); + } + rjsonwriter_add_end_object(writer); + } + + return writer; +} + +/** + * Writes in the provided {buffer} the URL for the translation request. The + * buffer is guaranteed to contain the server URL as well as an 'output' param + * specifying the accepted data types for this service. + */ +static void build_request_url(char *buffer, size_t length, settings_t *settings) +{ + char token[2]; + size_t _len; + bool poke_supported = false; + unsigned service_source_lang = settings->uints.ai_service_source_lang; + unsigned service_target_lang = settings->uints.ai_service_target_lang; + const char *service_url = settings->arrays.ai_service_url; + unsigned ai_service_mode = settings->uints.ai_service_mode; +#ifdef HAVE_GFX_WIDGETS + video_driver_state_t *video_st = video_state_get_ptr(); + poke_supported = video_st->poke + && video_st->poke->load_texture + && video_st->poke->unload_texture; +#endif + + _len = strlcpy(buffer, service_url, length); + buffer += _len; + length -= _len; + + token[1] = '\0'; + if (strrchr(buffer, '?')) + token[0] = '&'; + else + token[0] = '?'; + + if (service_source_lang != TRANSLATION_LANG_DONT_CARE) + { + const char *lang_source + = ai_service_get_str((enum translation_lang)service_source_lang); + + if (!string_is_empty(lang_source)) + { + _len = strlcpy(buffer, token, length); + buffer += _len; + length -= _len; + + _len = strlcpy(buffer + _len, "source_lang=", length - _len); + buffer += _len; + length -= _len; + + _len = strlcpy(buffer, lang_source, length); + buffer += _len; + length -= _len; + token[0] = '&'; + } + } + + if (service_target_lang != TRANSLATION_LANG_DONT_CARE) + { + const char *lang_target + = ai_service_get_str((enum translation_lang)service_target_lang); + + if (!string_is_empty(lang_target)) + { + _len = strlcpy(buffer, token, length); + buffer += _len; + length -= _len; + + _len = strlcpy(buffer, "target_lang=", length); + buffer += _len; + length -= _len; + + _len = strlcpy(buffer, lang_target, length); + buffer += _len; + length -= _len; + token[0] = '&'; + } + } + + _len = strlcpy(buffer, token, length); + buffer += _len; + length -= _len; + + _len = strlcpy(buffer, "output=", length); + buffer += _len; + length -= _len; + + switch (ai_service_mode) + { + case 0: /* Image Mode */ + _len = strlcpy(buffer, "image,bmp", length); + buffer += _len; + length -= _len; +#ifdef HAVE_RPNG + _len = strlcpy(buffer, ",png", length); + buffer += _len; + length -= _len; + if (poke_supported) + { + strlcpy(buffer, ",png-a", length); + buffer += _len; + length -= _len; + } +#endif + break; + + case 1: /* Speech Mode */ + _len = strlcpy(buffer, "sound,wav", length); + buffer += _len; + length -= _len; + break; + + case 2: /* Narrator Mode */ + _len = strlcpy(buffer, "text", length); + buffer += _len; + length -= _len; + break; + + case 3: /* Text Mode */ + case 4: /* Text + Narrator */ + _len = strlcpy(buffer, "text,subs", length); + buffer += _len; + length -= _len; + break; + + case 5: /* Image + Narrator */ + _len = strlcpy(buffer, "text,image,bmp", length); + buffer += _len; + length -= _len; +#ifdef HAVE_RPNG + _len = strlcpy(buffer, ",png", length); + buffer += _len; + length -= _len; + if (poke_supported) + { + _len = strlcpy(buffer, ",png-a", length); + buffer += _len; + length -= _len; + } +#endif + break; + } +} + +/** + * Captures a frame from the currently running core and sends a request to the + * translation server. Processing and encoding this data comes with a cost, so + * it is offloaded to the task thread. + */ +static void translation_request_hndl(retro_task_t *task) +{ + access_request_t *request = (access_request_t*)task->user_data; + settings_t *settings = config_get_ptr(); + access_state_t *access_st = access_state_get_ptr(); + access_frame_t *frame = NULL; + access_base64_t *encode = NULL; + char *label = NULL; + rjsonwriter_t *writer = NULL; + const char *json = NULL; + bool sent = false; + char url[PATH_MAX_LENGTH]; + + if (task_get_cancelled(task)) + goto finish; + + access_st->last_call = cpu_features_get_time_usec(); + + frame = translation_grab_frame(); + if (task_get_cancelled(task) || !frame) + goto finish; + + if (translation_dupe_fail(frame)) + goto finish; + + encode = translation_frame_encode(frame); + if (task_get_cancelled(task) || !encode) + goto finish; + + label = translation_get_content_label(); + writer = build_request_json(encode, request, frame, label); + if (task_get_cancelled(task) || !writer) + goto finish; + + json = rjsonwriter_get_memory_buffer(writer, NULL); + build_request_url(url, PATH_MAX_LENGTH, settings); + if (task_get_cancelled(task) || !json) + goto finish; + +#ifdef DEBUG + if (access_st->ai_service_auto == 0) + RARCH_LOG("[Translate]: Sending request to: %s\n", url); +#endif + sent = true; + task_push_http_post_transfer( + url, json, true, NULL, translation_response_cb, NULL); + +finish: + task_set_finished(task, true); + + if (frame && frame->data) + free(frame->data); + if (frame) + free(frame); + if (encode && encode->data) + free(encode->data); + if (encode) + free(encode); + if (label) + free(label); + if (writer) + rjsonwriter_free(writer); + if (request && request->inputs) + free(request->inputs); + if (request) + free(request); + + /* Plan next auto-request if this one was skipped */ + if (!sent && access_st->ai_service_auto != 0) + call_auto_translate_task(settings); +} + +/** + * Invokes the translation service. Captures a frame from the current content + * core and sends it over HTTP to the translation server. Once the server + * responds, the translation data is displayed accordingly to the preferences + * of the user. Returns true if the request could be built and sent. + */ +bool run_translation_service(settings_t *settings, bool paused) +{ + unsigned i; + retro_task_t *task = NULL; + access_request_t *request = NULL; + access_state_t *access_st = access_state_get_ptr(); +#ifdef HAVE_ACCESSIBILITY + input_driver_state_t *input_st = input_state_get_ptr(); +#endif + + if (!(request = (access_request_t*)malloc(sizeof(access_request_t)))) + goto failure; + +#ifdef HAVE_THREADS + if (!access_st->image_lock) + { + if (!(access_st->image_lock = slock_new())) + goto failure; + } +#endif + + task = task_init(); + if (!task) + goto failure; + + /* Freeze frontend state while we're still running on the main thread */ + request->paused = paused; + request->inputs = (char*)malloc( + sizeof(char) * ARRAY_SIZE(ACCESS_INPUT_LABELS)); + +#ifdef HAVE_ACCESSIBILITY + for (i = 0; i < ARRAY_SIZE(ACCESS_INPUT_LABELS); i++) + request->inputs[i] = input_st->ai_gamepad_state[i] ? 1 : 0; +#endif + + task->handler = translation_request_hndl; + task->user_data = request; + task->mute = true; + access_st->request_task = task; + task_queue_push(task); + + return true; + +failure: + if (request) + free(request); + + return false; +}