From 717b9768750dfef39120d4457a832ce7558bd879 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Tue, 26 Feb 2013 13:49:00 -0600 Subject: [PATCH] ARM Support without GLSL --- CMakeLists.txt | 344 +-- Source/Core/AudioCommon/CMakeLists.txt | 5 + Source/Core/AudioCommon/Src/AudioCommon.cpp | 7 +- .../Core/AudioCommon/Src/OpenSLESStream.cpp | 145 ++ Source/Core/AudioCommon/Src/OpenSLESStream.h | 48 + Source/Core/Common/CMakeLists.txt | 23 +- Source/Core/Common/Common.vcxproj | 9 +- Source/Core/Common/Common.vcxproj.filters | 9 +- Source/Core/Common/Src/ArmCPUDetect.cpp | 160 ++ Source/Core/Common/Src/ArmEmitter.cpp | 967 ++++++++ Source/Core/Common/Src/ArmEmitter.h | 587 +++++ Source/Core/Common/Src/CPUDetect.h | 23 +- Source/Core/Common/Src/Common.h | 6 +- Source/Core/Common/Src/CommonFuncs.h | 14 +- Source/Core/Common/Src/CommonPaths.h | 5 + Source/Core/Common/Src/FPURoundMode.h | 51 + Source/Core/Common/Src/FileUtil.cpp | 5 +- .../Core/Common/Src/GenericFPURoundMode.cpp | 41 + Source/Core/Common/Src/LogManager.cpp | 7 +- Source/Core/Common/Src/MathUtil.cpp | 24 - Source/Core/Common/Src/MathUtil.h | 13 +- Source/Core/Common/Src/MemArena.cpp | 34 + Source/Core/Common/Src/MemoryUtil.cpp | 5 +- Source/Core/Common/Src/StdConditionVariable.h | 2 +- Source/Core/Common/Src/StdMutex.h | 2 +- Source/Core/Common/Src/StdThread.h | 2 +- Source/Core/Common/Src/StringUtil.cpp | 38 +- Source/Core/Common/Src/Thread.cpp | 2 +- Source/Core/Common/Src/Thread.h | 2 - .../Core/Common/Src/{ABI.cpp => x64ABI.cpp} | 2 +- Source/Core/Common/Src/{ABI.h => x64ABI.h} | 0 .../Src/{CPUDetect.cpp => x64CPUDetect.cpp} | 6 +- Source/Core/Common/Src/x64Emitter.cpp | 2 +- Source/Core/Common/Src/x64Emitter.h | 2 +- Source/Core/Common/Src/x64FPURoundMode.cpp | 120 + .../Common/Src/{Thunk.cpp => x64Thunk.cpp} | 3 +- Source/Core/Core/CMakeLists.txt | 36 +- Source/Core/Core/Core.vcxproj | 12 +- Source/Core/Core/Core.vcxproj.filters | 9 +- Source/Core/Core/Src/ArmMemTools.cpp | 103 + Source/Core/Core/Src/ConfigManager.h | 2 +- Source/Core/Core/Src/Core.cpp | 7 +- Source/Core/Core/Src/DSP/DSPEmitter.cpp | 2 +- Source/Core/Core/Src/DSP/DSPHWInterface.cpp | 2 + .../Core/Src/DSP/Jit/DSPJitArithmetic.cpp | 2 +- Source/Core/Core/Src/DSP/Jit/DSPJitBranch.cpp | 2 +- Source/Core/Core/Src/DSP/Jit/DSPJitCCUtil.cpp | 2 +- Source/Core/Core/Src/DSP/Jit/DSPJitExtOps.cpp | 2 +- .../Core/Core/Src/DSP/Jit/DSPJitLoadStore.cpp | 2 +- Source/Core/Core/Src/DSP/Jit/DSPJitMisc.cpp | 2 +- .../Core/Src/DSP/Jit/DSPJitMultiplier.cpp | 2 +- Source/Core/Core/Src/DSP/Jit/DSPJitUtil.cpp | 2 +- Source/Core/Core/Src/HW/Memmap.cpp | 126 - Source/Core/Core/Src/HW/Memmap.h | 5 - .../Interpreter/Interpreter_FloatingPoint.cpp | 4 +- .../Interpreter/Interpreter_LoadStore.cpp | 21 +- .../Interpreter_SystemRegisters.cpp | 51 +- .../Interpreter/Interpreter_Tables.cpp | 2 +- Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp | 4 +- Source/Core/Core/Src/PowerPC/Jit64/Jit.h | 2 +- Source/Core/Core/Src/PowerPC/Jit64/JitAsm.cpp | 6 +- Source/Core/Core/Src/PowerPC/Jit64/JitAsm.h | 1 - .../Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp | 2 +- .../PowerPC/Jit64/Jit_LoadStoreFloating.cpp | 2 +- .../Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp | 2 +- .../Src/PowerPC/Jit64/Jit_SystemRegisters.cpp | 2 +- .../Core/Core/Src/PowerPC/Jit64IL/IR_X86.cpp | 10 +- .../Core/Core/Src/PowerPC/Jit64IL/JitIL.cpp | 2 +- Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.h | 2 +- .../Core/Src/PowerPC/Jit64IL/JitILAsm.cpp | 8 +- .../Src/PowerPC/Jit64IL/JitIL_LoadStore.cpp | 2 +- .../Jit64IL/JitIL_LoadStoreFloating.cpp | 2 +- .../PowerPC/Jit64IL/JitIL_LoadStorePaired.cpp | 2 +- .../PowerPC/Jit64IL/JitIL_SystemRegisters.cpp | 2 +- Source/Core/Core/Src/PowerPC/JitArm32/Jit.cpp | 498 ++++ Source/Core/Core/Src/PowerPC/JitArm32/Jit.h | 186 ++ .../Core/Src/PowerPC/JitArm32/JitArmCache.cpp | 46 + .../Core/Src/PowerPC/JitArm32/JitArmCache.h | 33 + .../Src/PowerPC/JitArm32/JitArm_BackPatch.cpp | 164 ++ .../Src/PowerPC/JitArm32/JitArm_Branch.cpp | 347 +++ .../PowerPC/JitArm32/JitArm_FloatingPoint.cpp | 80 + .../Src/PowerPC/JitArm32/JitArm_Integer.cpp | 297 +++ .../Src/PowerPC/JitArm32/JitArm_LoadStore.cpp | 414 ++++ .../JitArm32/JitArm_LoadStoreFloating.cpp | 72 + .../JitArm32/JitArm_SystemRegisters.cpp | 111 + .../Src/PowerPC/JitArm32/JitArm_Tables.cpp | 502 ++++ .../Core/Src/PowerPC/JitArm32/JitArm_Tables.h | 29 + .../Core/Core/Src/PowerPC/JitArm32/JitAsm.cpp | 174 ++ .../Core/Core/Src/PowerPC/JitArm32/JitAsm.h | 43 + .../Core/Src/PowerPC/JitArm32/JitRegCache.cpp | 167 ++ .../Core/Src/PowerPC/JitArm32/JitRegCache.h | 92 + .../Src/PowerPC/JitCommon/JitAsmCommon.cpp | 4 +- .../Core/Src/PowerPC/JitCommon/JitAsmCommon.h | 25 +- .../Src/PowerPC/JitCommon/JitBackpatch.cpp | 4 +- .../Core/Src/PowerPC/JitCommon/JitBackpatch.h | 4 + .../Core/Core/Src/PowerPC/JitCommon/JitBase.h | 26 +- .../Core/Src/PowerPC/JitCommon/JitCache.cpp | 94 +- .../Core/Src/PowerPC/JitCommon/JitCache.h | 16 +- .../Core/Src/PowerPC/JitCommon/Jit_Util.cpp | 2 +- Source/Core/Core/Src/PowerPC/JitInterface.cpp | 350 +++ Source/Core/Core/Src/PowerPC/JitInterface.h | 56 + Source/Core/Core/Src/PowerPC/PPCAnalyst.cpp | 3 +- Source/Core/Core/Src/PowerPC/PPCCache.cpp | 7 +- Source/Core/Core/Src/PowerPC/PPCTables.cpp | 18 +- Source/Core/Core/Src/PowerPC/PowerPC.cpp | 70 +- Source/Core/Core/Src/PowerPC/Profiler.cpp | 78 +- Source/Core/Core/Src/PowerPC/Profiler.h | 9 + .../Src/{MemTools.cpp => x64MemTools.cpp} | 12 +- Source/Core/DiscIO/Src/BannerLoader.cpp | 6 + Source/Core/DolphinWX/CMakeLists.txt | 187 +- .../DolphinWX/Src/Debugger/CodeWindow.cpp | 14 +- Source/Core/DolphinWX/Src/Frame.cpp | 2 +- Source/Core/DolphinWX/Src/GLInterface.h | 13 +- .../Src/GLInterface/{EGL.cpp => EGL_X11.cpp} | 2 +- .../Src/GLInterface/{EGL.h => EGL_X11.h} | 0 .../DolphinWX/Src/GLInterface/InterfaceBase.h | 10 +- Source/Core/DolphinWX/Src/MainAndroid.cpp | 149 ++ Source/Core/VideoCommon/CMakeLists.txt | 10 +- Source/Core/VideoCommon/Src/Fifo.cpp | 6 +- .../Core/VideoCommon/Src/GenericDLCache.cpp | 52 + .../VideoCommon/Src/GenericTextureDecoder.cpp | 2182 +++++++++++++++++ Source/Core/VideoCommon/Src/VertexLoader.cpp | 18 +- Source/Core/VideoCommon/Src/VertexLoader.h | 7 + .../Src/{DLCache.cpp => x64DLCache.cpp} | 2 +- ...xtureDecoder.cpp => x64TextureDecoder.cpp} | 93 - Source/Core/VideoCommon/VideoCommon.vcxproj | 10 +- .../VideoCommon/VideoCommon.vcxproj.filters | 12 +- .../Src/NativeVertexFormat.cpp | 2 +- Source/Plugins/Plugin_VideoOGL/Src/GLUtil.cpp | 4 +- .../Src/NativeVertexFormat.cpp | 2 +- Source/Plugins/Plugin_VideoOGL/Src/main.cpp | 4 + .../Src/OpcodeDecoder.cpp | 6 +- .../Src/SWCommandProcessor.cpp | 6 +- 133 files changed, 9048 insertions(+), 948 deletions(-) create mode 100644 Source/Core/AudioCommon/Src/OpenSLESStream.cpp create mode 100644 Source/Core/AudioCommon/Src/OpenSLESStream.h create mode 100644 Source/Core/Common/Src/ArmCPUDetect.cpp create mode 100644 Source/Core/Common/Src/ArmEmitter.cpp create mode 100644 Source/Core/Common/Src/ArmEmitter.h create mode 100644 Source/Core/Common/Src/FPURoundMode.h create mode 100644 Source/Core/Common/Src/GenericFPURoundMode.cpp rename Source/Core/Common/Src/{ABI.cpp => x64ABI.cpp} (99%) rename Source/Core/Common/Src/{ABI.h => x64ABI.h} (100%) rename Source/Core/Common/Src/{CPUDetect.cpp => x64CPUDetect.cpp} (98%) create mode 100644 Source/Core/Common/Src/x64FPURoundMode.cpp rename Source/Core/Common/Src/{Thunk.cpp => x64Thunk.cpp} (98%) create mode 100644 Source/Core/Core/Src/ArmMemTools.cpp create mode 100644 Source/Core/Core/Src/PowerPC/JitArm32/Jit.cpp create mode 100644 Source/Core/Core/Src/PowerPC/JitArm32/Jit.h create mode 100644 Source/Core/Core/Src/PowerPC/JitArm32/JitArmCache.cpp create mode 100644 Source/Core/Core/Src/PowerPC/JitArm32/JitArmCache.h create mode 100644 Source/Core/Core/Src/PowerPC/JitArm32/JitArm_BackPatch.cpp create mode 100644 Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Branch.cpp create mode 100644 Source/Core/Core/Src/PowerPC/JitArm32/JitArm_FloatingPoint.cpp create mode 100644 Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Integer.cpp create mode 100644 Source/Core/Core/Src/PowerPC/JitArm32/JitArm_LoadStore.cpp create mode 100644 Source/Core/Core/Src/PowerPC/JitArm32/JitArm_LoadStoreFloating.cpp create mode 100644 Source/Core/Core/Src/PowerPC/JitArm32/JitArm_SystemRegisters.cpp create mode 100644 Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Tables.cpp create mode 100644 Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Tables.h create mode 100644 Source/Core/Core/Src/PowerPC/JitArm32/JitAsm.cpp create mode 100644 Source/Core/Core/Src/PowerPC/JitArm32/JitAsm.h create mode 100644 Source/Core/Core/Src/PowerPC/JitArm32/JitRegCache.cpp create mode 100644 Source/Core/Core/Src/PowerPC/JitArm32/JitRegCache.h create mode 100644 Source/Core/Core/Src/PowerPC/JitInterface.cpp create mode 100644 Source/Core/Core/Src/PowerPC/JitInterface.h rename Source/Core/Core/Src/{MemTools.cpp => x64MemTools.cpp} (96%) rename Source/Core/DolphinWX/Src/GLInterface/{EGL.cpp => EGL_X11.cpp} (99%) rename Source/Core/DolphinWX/Src/GLInterface/{EGL.h => EGL_X11.h} (100%) create mode 100644 Source/Core/DolphinWX/Src/MainAndroid.cpp create mode 100644 Source/Core/VideoCommon/Src/GenericDLCache.cpp create mode 100644 Source/Core/VideoCommon/Src/GenericTextureDecoder.cpp rename Source/Core/VideoCommon/Src/{DLCache.cpp => x64DLCache.cpp} (99%) rename Source/Core/VideoCommon/Src/{TextureDecoder.cpp => x64TextureDecoder.cpp} (97%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4598eaa9e2..ea531f030c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -106,9 +106,22 @@ if(DOLPHIN_IS_STABLE) else() set(DOLPHIN_VERSION_PATCH ${DOLPHIN_WC_REVISION}) endif() +message(${CMAKE_SYSTEM_PROCESSOR}) +if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "^arm") + set(_M_GENERIC 1) + set(_M_ARM 1) + add_definitions(-marm -march=armv7-a) + add_definitions(-D_M_ARM=1) + add_definitions(-D_M_GENERIC=1) +endif() +# Set these next two lines to test generic +#set(_M_GENERIC 1) +#add_definitions(-D_M_GENERIC=1) # Various compile flags -add_definitions(-msse2) +if(NOT _M_GENERIC) + add_definitions(-msse2) +endif() include(CheckCXXCompilerFlag) macro(check_and_add_flag var flag) @@ -258,135 +271,141 @@ if(USE_EGL) endif() add_definitions(-D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE) +option(ANDROID "Enables a build for Android" OFF) +if(ANDROID) + message("Building for Android") + add_definitions(-DANDROID) +endif() ######################################## # Dependency checking # # TODO: We should have options for dependencies included in the externals to # override autodetection of system libraries and force the usage of the # externals. +if(NOT ANDROID) + include(CheckLib) -include(CheckLib) - -include(FindOpenGL) -include_directories(${OPENGL_INCLUDE_DIR}) -if(NOT OPENGL_GLU_FOUND) - message(FATAL_ERROR "GLU is required but not found") -endif() - -if(OPENMP) - include(FindOpenMP OPTIONAL) - if(OPENMP_FOUND) - message("OpenMP parallelization enabled") - add_definitions("${OpenMP_CXX_FLAGS}") - set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_CXX_FLAGS}") + include(FindOpenGL) + include_directories(${OPENGL_INCLUDE_DIR}) + if(NOT OPENGL_GLU_FOUND) + message(FATAL_ERROR "GLU is required but not found") endif() -endif() -if(NOT OPENMP_FOUND) - add_definitions(-Wno-unknown-pragmas) - message("OpenMP parallelization disabled") -endif() -include(FindALSA OPTIONAL) -if(ALSA_FOUND) - add_definitions(-DHAVE_ALSA=1) - message("ALSA found, enabling ALSA sound backend") -else() - add_definitions(-DHAVE_ALSA=0) - message("ALSA NOT found, disabling ALSA sound backend") -endif(ALSA_FOUND) + if(OPENMP) + include(FindOpenMP OPTIONAL) + if(OPENMP_FOUND) + message("OpenMP parallelization enabled") + add_definitions("${OpenMP_CXX_FLAGS}") + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_CXX_FLAGS}") + endif() + endif() + if(NOT OPENMP_FOUND) + add_definitions(-Wno-unknown-pragmas) + message("OpenMP parallelization disabled") + endif() -check_lib(AO ao QUIET) -if(AO_FOUND) - add_definitions(-DHAVE_AO=1) - message("ao found, enabling ao sound backend") -else() - add_definitions(-DHAVE_AO=0) - message("ao NOT found, disabling ao sound backend") -endif(AO_FOUND) + include(FindALSA OPTIONAL) + if(ALSA_FOUND) + add_definitions(-DHAVE_ALSA=1) + message("ALSA found, enabling ALSA sound backend") + else() + add_definitions(-DHAVE_ALSA=0) + message("ALSA NOT found, disabling ALSA sound backend") + endif(ALSA_FOUND) -check_lib(BLUEZ bluez QUIET) -if(BLUEZ_FOUND) - add_definitions(-DHAVE_BLUEZ=1) - message("bluez found, enabling bluetooth support") -else() - add_definitions(-DHAVE_BLUEZ=0) - message("bluez NOT found, disabling bluetooth support") -endif(BLUEZ_FOUND) + check_lib(AO ao QUIET) + if(AO_FOUND) + add_definitions(-DHAVE_AO=1) + message("ao found, enabling ao sound backend") + else() + add_definitions(-DHAVE_AO=0) + message("ao NOT found, disabling ao sound backend") + endif(AO_FOUND) -check_lib(PULSEAUDIO libpulse-simple QUIET) -if(PULSEAUDIO_FOUND) - add_definitions(-DHAVE_PULSEAUDIO=1) - message("PulseAudio found, enabling PulseAudio sound backend") -else() - add_definitions(-DHAVE_PULSEAUDIO=0) - message("PulseAudio NOT found, disabling PulseAudio sound backend") -endif(PULSEAUDIO_FOUND) + check_lib(BLUEZ bluez QUIET) + if(BLUEZ_FOUND) + add_definitions(-DHAVE_BLUEZ=1) + message("bluez found, enabling bluetooth support") + else() + add_definitions(-DHAVE_BLUEZ=0) + message("bluez NOT found, disabling bluetooth support") + endif(BLUEZ_FOUND) -include(FindOpenAL OPTIONAL) -if(OPENAL_FOUND) - add_definitions(-DHAVE_OPENAL=1) - include_directories(${OPENAL_INCLUDE_DIR}) - message("OpenAL found, enabling OpenAL sound backend") -else() - add_definitions(-DHAVE_OPENAL=0) - message("OpenAL NOT found, disabling OpenAL sound backend") -endif(OPENAL_FOUND) + check_lib(PULSEAUDIO libpulse QUIET) + if(PULSEAUDIO_FOUND) + add_definitions(-DHAVE_PULSEAUDIO=1) + message("PulseAudio found, enabling PulseAudio sound backend") + else() + add_definitions(-DHAVE_PULSEAUDIO=0) + message("PulseAudio NOT found, disabling PulseAudio sound backend") + endif(PULSEAUDIO_FOUND) + + include(FindOpenAL OPTIONAL) + if(OPENAL_FOUND) + add_definitions(-DHAVE_OPENAL=1) + include_directories(${OPENAL_INCLUDE_DIR}) + message("OpenAL found, enabling OpenAL sound backend") + else() + add_definitions(-DHAVE_OPENAL=0) + message("OpenAL NOT found, disabling OpenAL sound backend") + endif(OPENAL_FOUND) # Note: We do not need to explicitly check for X11 as it is done in the cmake # FindOpenGL module on linux. -if(UNIX AND NOT APPLE) + if(UNIX AND NOT APPLE) + if(X11_FOUND) + add_definitions(-DHAVE_X11=1) + include_directories(${X11_INCLUDE_DIR}) + message("X11 found") + else() + message(FATAL_ERROR "X11 is required but not found") + endif(X11_FOUND) + else() + add_definitions(-DHAVE_X11=0) + endif() + if(X11_FOUND) - add_definitions(-DHAVE_X11=1) - include_directories(${X11_INCLUDE_DIR}) - message("X11 found") + check_lib(XRANDR Xrandr) + endif() + if(XRANDR_FOUND) + add_definitions(-DHAVE_XRANDR=1) else() - message(FATAL_ERROR "X11 is required but not found") - endif(X11_FOUND) -else() - add_definitions(-DHAVE_X11=0) -endif() + add_definitions(-DHAVE_XRANDR=0) + endif(XRANDR_FOUND) -if(X11_FOUND) - check_lib(XRANDR Xrandr) -endif() -if(XRANDR_FOUND) - add_definitions(-DHAVE_XRANDR=1) -else() - add_definitions(-DHAVE_XRANDR=0) -endif(XRANDR_FOUND) + if(ENCODE_FRAMEDUMPS) + check_libav() + endif() -if(ENCODE_FRAMEDUMPS) - check_libav() -endif() - -include(CheckCXXSourceRuns) -set(CMAKE_REQUIRED_LIBRARIES portaudio) -CHECK_CXX_SOURCE_RUNS( - "#include - int main(int argc, char **argv) - { if(Pa_GetVersion() >= 1890) return 0; else return 1; }" - PORTAUDIO) -if(PORTAUDIO) - message("PortAudio found, enabling mic support") - add_definitions(-DHAVE_PORTAUDIO=1) - set(PORTAUDIO_FOUND TRUE) -else() - message("PortAudio not found, disabling mic support") - add_definitions(-DHAVE_PORTAUDIO=0) - set(PORTAUDIO_FOUND FALSE) -endif(PORTAUDIO) - -if(OPROFILING) - check_lib(OPROFILE opagent opagent.h) - check_lib(BFD bfd bfd.h) - if(OPROFILE_FOUND AND BFD_FOUND) - message("oprofile found, enabling profiling support") - add_definitions(-DUSE_OPROFILE=1) + include(CheckCXXSourceRuns) + set(CMAKE_REQUIRED_LIBRARIES portaudio) + CHECK_CXX_SOURCE_RUNS( + "#include + int main(int argc, char **argv) + { if(Pa_GetVersion() >= 1890) return 0; else return 1; }" + PORTAUDIO) + if(PORTAUDIO) + message("PortAudio found, enabling mic support") + add_definitions(-DHAVE_PORTAUDIO=1) + set(PORTAUDIO_FOUND TRUE) else() - message(FATAL_ERROR "oprofile or bfd not found. Can't build profiling support.") + message("PortAudio not found, disabling mic support") + add_definitions(-DHAVE_PORTAUDIO=0) + set(PORTAUDIO_FOUND FALSE) + endif(PORTAUDIO) + + option(OPROFILING "Enable profiling" OFF) + if(OPROFILING) + check_lib(OPROFILE opagent opagent.h) + check_lib(BFD bfd bfd.h) + if(OPROFILE_FOUND AND BFD_FOUND) + message("oprofile found, enabling profiling support") + add_definitions(-DUSE_OPROFILE=1) + else() + message(FATAL_ERROR "oprofile or bfd not found. Can't build profiling support.") + endif() endif() endif() - ######################################## # Setup include directories (and make sure they are preferred over the Externals) # @@ -401,7 +420,6 @@ include_directories(Source/Core/InputCommon/Src) include_directories(Source/Core/VideoCommon/Src) include_directories(Source/Core/VideoUICommon/Src) - ######################################## # Process externals and setup their include directories # @@ -415,7 +433,7 @@ include_directories(Source/Core/VideoUICommon/Src) add_subdirectory(Externals/Bochs_disasm) include_directories(Externals/Bochs_disasm) -if(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin") +if(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin" AND NOT ANDROID) check_lib(LZO lzo2 lzo/lzo1x.h QUIET) endif() if(LZO_FOUND) @@ -440,15 +458,16 @@ if(OPENAL_FOUND) endif() endif() -if(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin") - include(FindSDL2 OPTIONAL) -endif() -if(SDL2_FOUND) - message("Using shared SDL2") - include_directories(${SDL2_INCLUDE_DIR}) -else(SDL2_FOUND) - # SDL2 not found, try SDL +if(NOT ANDROID) if(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin") + include(FindSDL2 OPTIONAL) + endif() + if(SDL2_FOUND) + message("Using shared SDL2") + include_directories(${SDL2_INCLUDE_DIR}) + else(SDL2_FOUND) + # SDL2 not found, try SDL + if(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin") include(FindSDL OPTIONAL) endif() if(SDL_FOUND) @@ -460,12 +479,13 @@ else(SDL2_FOUND) include_directories(Externals/SDL/SDL Externals/SDL Externals/SDL/include) add_subdirectory(Externals/SDL) endif(SDL_FOUND) -endif(SDL2_FOUND) + endif(SDL2_FOUND) +endif() set(SFML_FIND_VERSION TRUE) set(SFML_FIND_VERSION_MAJOR 1) set(SFML_FIND_VERSION_MINOR 5) -if(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin") +if(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin" AND NOT ANDROID) include(FindSFML OPTIONAL) endif() if(SFML_FOUND AND NOT SFML_VERSION_MAJOR) # SFML 1.x doesn't define SFML_VERSION_MAJOR @@ -476,7 +496,7 @@ else() include_directories(Externals/SFML/include) endif() -if(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin") +if(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin" AND NOT ANDROID) check_lib(SOIL SOIL SOIL/SOIL.h QUIET) endif() if(SOIL_FOUND) @@ -506,26 +526,19 @@ if(WIN32) find_library(GLEW glew32s PATHS Externals/GLew) include_directories(Externals/GLew/include) else() - if(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin") - check_lib(GLEW GLEW GL/glew.h) + if(NOT ANDROID) + if(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin") + check_lib(GLEW GLEW GL/glew.h) + endif() + if(NOT GLEW_FOUND) + message("Using static GLEW from Externals") + add_subdirectory(Externals/GLew) + include_directories(Externals/GLew/include) + endif(NOT GLEW_FOUND) endif() - if(NOT GLEW_FOUND) - message("Using static GLEW from Externals") - add_subdirectory(Externals/GLew) - include_directories(Externals/GLew/include) - endif(NOT GLEW_FOUND) - - if(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin") - check_lib(CG Cg Cg/cg.h) - endif() - if(NOT CG_FOUND) - message("Using static Cg from Externals") - include_directories(Externals) - endif(NOT CG_FOUND) - check_lib(CGGL CgGL Cg/cgGL.h) endif() -if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin") +if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin" AND NOT ANDROID) find_library(CL OpenCL) set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-weak_framework,OpenCL") else() @@ -533,10 +546,25 @@ else() add_subdirectory(Externals/CLRun) endif() -if(NOT DISABLE_WX) +if(NOT DISABLE_WX AND NOT ANDROID) include(FindwxWidgets OPTIONAL) FIND_PACKAGE(wxWidgets COMPONENTS core aui adv) + if(wxWidgets_FOUND) + EXECUTE_PROCESS( + COMMAND sh "${wxWidgets_CONFIG_EXECUTABLE}" + ${wxWidgets_CONFIG_OPTIONS} --version + OUTPUT_VARIABLE wxWidgets_VERSION + OUTPUT_STRIP_TRAILING_WHITESPACE + ERROR_QUIET + ) + message("Found wxWidgets version ${wxWidgets_VERSION}") + if(${wxWidgets_VERSION} VERSION_LESS "2.8.9") + message("At least 2.8.9 is required; ignoring found version") + unset(wxWidgets_FOUND) + endif() + endif(wxWidgets_FOUND) + if(wxWidgets_FOUND) EXECUTE_PROCESS( COMMAND sh "${wxWidgets_CONFIG_EXECUTABLE}" @@ -557,26 +585,26 @@ if(NOT DISABLE_WX) endif() endif(wxWidgets_FOUND) - if(UNIX AND NOT APPLE) - # There is a bug in the FindGTK module in cmake version 2.8.2 that - # does not find gdk-pixbuf-2.0. On the other hand some 2.8.3 - # users have complained that pkg-config does not find - # gdk-pixbuf-2.0. On yet another hand, cmake version 2.8.3 in - # Ubuntu Natty does not find the glib libraries correctly. - # Ugly!!! - execute_process(COMMAND lsb_release -c -s - OUTPUT_VARIABLE DIST_NAME - ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) - if(${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}.${CMAKE_PATCH_VERSION} - VERSION_EQUAL 2.8.2 OR "${DIST_NAME}" STREQUAL "natty") - check_lib(GTK2 gtk+-2.0 gtk.h REQUIRED) - else() - include(FindGTK2) - if(GTK2_FOUND) - include_directories(${GTK2_INCLUDE_DIRS}) + if(UNIX AND NOT APPLE) + # There is a bug in the FindGTK module in cmake version 2.8.2 that + # does not find gdk-pixbuf-2.0. On the other hand some 2.8.3 + # users have complained that pkg-config does not find + # gdk-pixbuf-2.0. On yet another hand, cmake version 2.8.3 in + # Ubuntu Natty does not find the glib libraries correctly. + # Ugly!!! + execute_process(COMMAND lsb_release -c -s + OUTPUT_VARIABLE DIST_NAME + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) + if(${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}.${CMAKE_PATCH_VERSION} + VERSION_EQUAL 2.8.2 OR "${DIST_NAME}" STREQUAL "natty") + check_lib(GTK2 gtk+-2.0 gtk.h REQUIRED) + else() + include(FindGTK2) + if(GTK2_FOUND) + include_directories(${GTK2_INCLUDE_DIRS}) + endif() endif() endif() - endif() if(wxWidgets_FOUND) include(${wxWidgets_USE_FILE}) @@ -608,7 +636,7 @@ if(NOT DISABLE_WX) set(wxWidgets_LIBRARIES "wx") endif(wxWidgets_FOUND) add_definitions(-DHAVE_WX=1) -endif(NOT DISABLE_WX) +endif(NOT DISABLE_WX AND NOT ANDROID) ######################################## @@ -682,4 +710,4 @@ list(APPEND CPACK_SOURCE_IGNORE_FILES "${CMAKE_BINARY_DIR}") # CPack must be included after the CPACK_* variables are set in order for those # variables to take effect. -include(CPack) +Include(CPack) diff --git a/Source/Core/AudioCommon/CMakeLists.txt b/Source/Core/AudioCommon/CMakeLists.txt index 3b6d0cea4d..939956460d 100644 --- a/Source/Core/AudioCommon/CMakeLists.txt +++ b/Source/Core/AudioCommon/CMakeLists.txt @@ -6,6 +6,11 @@ set(SRCS Src/AudioCommon.cpp set(LIBS "") +if(ANDROID) + set(SRCS ${SRCS} Src/OpenSLESStream.cpp) + set(LIBS ${LIBS} OpenSLES) +endif(ANDROID) + if(ALSA_FOUND) set(SRCS ${SRCS} Src/AlsaSoundStream.cpp) set(LIBS ${LIBS} ${ALSA_LIBRARIES}) diff --git a/Source/Core/AudioCommon/Src/AudioCommon.cpp b/Source/Core/AudioCommon/Src/AudioCommon.cpp index e14b9ac45e..2f299d0edb 100644 --- a/Source/Core/AudioCommon/Src/AudioCommon.cpp +++ b/Source/Core/AudioCommon/Src/AudioCommon.cpp @@ -26,6 +26,7 @@ #include "CoreAudioSoundStream.h" #include "OpenALStream.h" #include "PulseAudioStream.h" +#include "OpenSLESStream.h" #include "../../Core/Src/Movie.h" #include "../../Core/Src/ConfigManager.h" @@ -55,7 +56,8 @@ namespace AudioCommon soundStream = new CoreAudioSound(mixer); else if (backend == BACKEND_PULSEAUDIO && PulseAudio::isValid()) soundStream = new PulseAudio(mixer); - + else if (backend == BACKEND_OPENSLES && OpenSLESStream::isValid()) + soundStream = new OpenSLESStream(mixer); if (soundStream != NULL) { UpdateSoundStream(); @@ -116,7 +118,8 @@ namespace AudioCommon backends.push_back(BACKEND_PULSEAUDIO); if (OpenALStream::isValid()) backends.push_back(BACKEND_OPENAL); - + if (OpenSLESStream::isValid()) + backends.push_back(BACKEND_OPENSLES); return backends; } diff --git a/Source/Core/AudioCommon/Src/OpenSLESStream.cpp b/Source/Core/AudioCommon/Src/OpenSLESStream.cpp new file mode 100644 index 0000000000..b0913a1895 --- /dev/null +++ b/Source/Core/AudioCommon/Src/OpenSLESStream.cpp @@ -0,0 +1,145 @@ +// Copyright (C) 2003 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ + +#ifdef ANDROID +#include "Common.h" +#include +#include "OpenSLESStream.h" + +#include +#include + +// engine interfaces +static SLObjectItf engineObject; +static SLEngineItf engineEngine; +static SLObjectItf outputMixObject; + +// buffer queue player interfaces +static SLObjectItf bqPlayerObject = NULL; +static SLPlayItf bqPlayerPlay; +static SLAndroidSimpleBufferQueueItf bqPlayerBufferQueue; +static SLMuteSoloItf bqPlayerMuteSolo; +static SLVolumeItf bqPlayerVolume; +static CMixer *g_mixer; +#define BUFFER_SIZE 512 +#define BUFFER_SIZE_IN_SAMPLES (BUFFER_SIZE / 2) + +// Double buffering. +static short buffer[2][BUFFER_SIZE]; +static int curBuffer = 0; + +static void bqPlayerCallback(SLAndroidSimpleBufferQueueItf bq, void *context) { + assert(bq == bqPlayerBufferQueue); + assert(NULL == context); + + short *nextBuffer = buffer[curBuffer]; + int nextSize = sizeof(buffer[0]); + + SLresult result = (*bqPlayerBufferQueue)->Enqueue(bqPlayerBufferQueue, nextBuffer, nextSize); + + // Comment from sample code: + // the most likely other result is SL_RESULT_BUFFER_INSUFFICIENT, + // which for this code example would indicate a programming error + assert(SL_RESULT_SUCCESS == result); + + curBuffer ^= 1; // Switch buffer + // Render to the fresh buffer + g_mixer->Mix(reinterpret_cast(buffer[curBuffer]), BUFFER_SIZE_IN_SAMPLES); +} +bool OpenSLESStream::Start() +{ + SLresult result; + // create engine + result = slCreateEngine(&engineObject, 0, NULL, 0, NULL, NULL); + assert(SL_RESULT_SUCCESS == result); + result = (*engineObject)->Realize(engineObject, SL_BOOLEAN_FALSE); + assert(SL_RESULT_SUCCESS == result); + result = (*engineObject)->GetInterface(engineObject, SL_IID_ENGINE, &engineEngine); + assert(SL_RESULT_SUCCESS == result); + result = (*engineEngine)->CreateOutputMix(engineEngine, &outputMixObject, 0, 0, 0); + assert(SL_RESULT_SUCCESS == result); + result = (*outputMixObject)->Realize(outputMixObject, SL_BOOLEAN_FALSE); + assert(SL_RESULT_SUCCESS == result); + + SLDataLocator_AndroidSimpleBufferQueue loc_bufq = {SL_DATALOCATOR_ANDROIDSIMPLEBUFFERQUEUE, 2}; + SLDataFormat_PCM format_pcm = { + SL_DATAFORMAT_PCM, + 2, + SL_SAMPLINGRATE_44_1, + SL_PCMSAMPLEFORMAT_FIXED_16, + SL_PCMSAMPLEFORMAT_FIXED_16, + SL_SPEAKER_FRONT_LEFT | SL_SPEAKER_FRONT_RIGHT, + SL_BYTEORDER_LITTLEENDIAN + }; + + SLDataSource audioSrc = {&loc_bufq, &format_pcm}; + + // configure audio sink + SLDataLocator_OutputMix loc_outmix = {SL_DATALOCATOR_OUTPUTMIX, outputMixObject}; + SLDataSink audioSnk = {&loc_outmix, NULL}; + + // create audio player + const SLInterfaceID ids[2] = {SL_IID_BUFFERQUEUE, SL_IID_VOLUME}; + const SLboolean req[2] = {SL_BOOLEAN_TRUE, SL_BOOLEAN_TRUE}; + result = (*engineEngine)->CreateAudioPlayer(engineEngine, &bqPlayerObject, &audioSrc, &audioSnk, 2, ids, req); + assert(SL_RESULT_SUCCESS == result); + + result = (*bqPlayerObject)->Realize(bqPlayerObject, SL_BOOLEAN_FALSE); + assert(SL_RESULT_SUCCESS == result); + result = (*bqPlayerObject)->GetInterface(bqPlayerObject, SL_IID_PLAY, &bqPlayerPlay); + assert(SL_RESULT_SUCCESS == result); + result = (*bqPlayerObject)->GetInterface(bqPlayerObject, SL_IID_BUFFERQUEUE, + &bqPlayerBufferQueue); + assert(SL_RESULT_SUCCESS == result); + result = (*bqPlayerBufferQueue)->RegisterCallback(bqPlayerBufferQueue, bqPlayerCallback, NULL); + assert(SL_RESULT_SUCCESS == result); + result = (*bqPlayerPlay)->SetPlayState(bqPlayerPlay, SL_PLAYSTATE_PLAYING); + assert(SL_RESULT_SUCCESS == result); + + // Render and enqueue a first buffer. (or should we just play the buffer empty?) + curBuffer = 0; + + result = (*bqPlayerBufferQueue)->Enqueue(bqPlayerBufferQueue, buffer[curBuffer], sizeof(buffer[curBuffer])); + if (SL_RESULT_SUCCESS != result) { + return false; + } + curBuffer ^= 1; + g_mixer = m_mixer; + return true; +} + +void OpenSLESStream::Stop() +{ + if (bqPlayerObject != NULL) { + (*bqPlayerObject)->Destroy(bqPlayerObject); + bqPlayerObject = NULL; + bqPlayerPlay = NULL; + bqPlayerBufferQueue = NULL; + bqPlayerMuteSolo = NULL; + bqPlayerVolume = NULL; + } + if (outputMixObject != NULL) { + (*outputMixObject)->Destroy(outputMixObject); + outputMixObject = NULL; + } + if (engineObject != NULL) { + (*engineObject)->Destroy(engineObject); + engineObject = NULL; + engineEngine = NULL; + } +} +#endif diff --git a/Source/Core/AudioCommon/Src/OpenSLESStream.h b/Source/Core/AudioCommon/Src/OpenSLESStream.h new file mode 100644 index 0000000000..f49160c4bc --- /dev/null +++ b/Source/Core/AudioCommon/Src/OpenSLESStream.h @@ -0,0 +1,48 @@ +// Copyright (C) 2003 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ + +#ifndef _OPENSLSTREAM_H_ +#define _OPENSLSTREAM_H_ + +#include "Thread.h" +#include "SoundStream.h" + +class OpenSLESStream : public SoundStream +{ +#ifdef ANDROID +public: + OpenSLESStream(CMixer *mixer, void *hWnd = NULL) + : SoundStream(mixer) + {}; + + virtual ~OpenSLESStream() {}; + + virtual bool Start(); + virtual void Stop(); + static bool isValid() { return true; } + virtual bool usesMixer() const { return true; } + +private: + std::thread thread; + Common::Event soundSyncEvent; +#else +public: + OpenSLESStream(CMixer *mixer, void *hWnd = NULL): SoundStream(mixer) {} +#endif // HAVE_OPENSL +}; + +#endif diff --git a/Source/Core/Common/CMakeLists.txt b/Source/Core/Common/CMakeLists.txt index 92d2bec668..480a08a0fc 100644 --- a/Source/Core/Common/CMakeLists.txt +++ b/Source/Core/Common/CMakeLists.txt @@ -1,9 +1,7 @@ -set(SRCS Src/ABI.cpp - Src/BreakPoints.cpp +set(SRCS Src/BreakPoints.cpp Src/CDUtils.cpp Src/ColorUtil.cpp Src/ConsoleListener.cpp - Src/CPUDetect.cpp Src/FileSearch.cpp Src/FileUtil.cpp Src/Hash.cpp @@ -20,10 +18,10 @@ set(SRCS Src/ABI.cpp Src/SymbolDB.cpp Src/SysConf.cpp Src/Thread.cpp - Src/Thunk.cpp Src/Timer.cpp Src/Version.cpp Src/VideoBackendBase.cpp + Src/x64ABI.cpp Src/x64Analyzer.cpp Src/x64Emitter.cpp Src/Crypto/aes_cbc.cpp @@ -33,6 +31,23 @@ set(SRCS Src/ABI.cpp Src/Crypto/md5.cpp Src/Crypto/sha1.cpp) +if(_M_ARM) #ARM + set(SRCS ${SRCS} + Src/ArmCPUDetect.cpp + Src/ArmEmitter.cpp) +else() + if(NOT _M_GENERIC) #X86 + set(SRCS ${SRCS} + Src/x64FPURoundMode.cpp + Src/x64Thunk.cpp + ) + endif() + set(SRCS ${SRCS} Src/x64CPUDetect.cpp) +endif() +if(_M_GENERIC) #Generic + set(SRCS ${SRCS} + Src/GenericFPURoundMode.cpp) +endif() if(WIN32) set(SRCS ${SRCS} Src/ExtendedTrace.cpp) endif(WIN32) diff --git a/Source/Core/Common/Common.vcxproj b/Source/Core/Common/Common.vcxproj index 0128a75a09..626a4df909 100644 --- a/Source/Core/Common/Common.vcxproj +++ b/Source/Core/Common/Common.vcxproj @@ -158,12 +158,10 @@ - - @@ -195,15 +193,17 @@ - + + + + - @@ -251,6 +251,7 @@ + diff --git a/Source/Core/Common/Common.vcxproj.filters b/Source/Core/Common/Common.vcxproj.filters index 8b05ecbba4..d427b70cdd 100644 --- a/Source/Core/Common/Common.vcxproj.filters +++ b/Source/Core/Common/Common.vcxproj.filters @@ -1,11 +1,9 @@  - - @@ -23,7 +21,6 @@ - @@ -53,9 +50,12 @@ Crypto + + + + - @@ -121,6 +121,7 @@ + diff --git a/Source/Core/Common/Src/ArmCPUDetect.cpp b/Source/Core/Common/Src/ArmCPUDetect.cpp new file mode 100644 index 0000000000..c03ab08c48 --- /dev/null +++ b/Source/Core/Common/Src/ArmCPUDetect.cpp @@ -0,0 +1,160 @@ +// Copyright (C) 2003 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ + +#include "Common.h" +#include "CPUDetect.h" +#include "StringUtil.h" + +const char procfile[] = "/proc/cpuinfo"; + +char *GetCPUString() +{ + const char marker[] = "Hardware\t: "; + char *cpu_string = 0; + // Count the number of processor lines in /proc/cpuinfo + char buf[1024]; + FILE *fp; + + fp = fopen(procfile, "r"); + if (!fp) + return 0; + + while (fgets(buf, sizeof(buf), fp)) + { + if (strncmp(buf, marker, sizeof(marker) - 1)) + continue; + cpu_string = buf + sizeof(marker) - 1; + cpu_string = strndup(cpu_string, strlen(cpu_string) - 1); // Strip the newline + break; + } + return cpu_string; +} +bool CheckCPUFeature(const char *feature) +{ + const char marker[] = "Features\t: "; + char buf[1024]; + FILE *fp; + + fp = fopen(procfile, "r"); + if (!fp) + return 0; + + while (fgets(buf, sizeof(buf), fp)) + { + if (strncmp(buf, marker, sizeof(marker) - 1)) + continue; + char *featurestring = buf + sizeof(marker) - 1; + char *token = strtok(featurestring, " "); + while (token != NULL) + { + if (strstr(token, feature)) + return true; + token = strtok(NULL, " "); + } + } + return false; +} +int GetCoreCount() +{ + const char marker[] = "processor\t: "; + int cores = 0; + char buf[1024]; + FILE *fp; + + fp = fopen(procfile, "r"); + if (!fp) + return 0; + + while (fgets(buf, sizeof(buf), fp)) + { + if (strncmp(buf, marker, sizeof(marker) - 1)) + continue; + ++cores; + } + return cores; +} + +CPUInfo cpu_info; + +CPUInfo::CPUInfo() { + Detect(); +} + +// Detects the various cpu features +void CPUInfo::Detect() +{ + // Set some defaults here + // When ARMv8 cpus come out, these need to be updated. + HTT = false; + OS64bit = false; + CPU64bit = false; + Mode64bit = false; + vendor = VENDOR_ARM; + + // Get the information about the CPU + strncpy(cpu_string, GetCPUString(), sizeof(cpu_string)); + num_cores = GetCoreCount(); + bSwp = CheckCPUFeature("swp"); + bHalf = CheckCPUFeature("half"); + bThumb = CheckCPUFeature("thumb"); + bFastMult = CheckCPUFeature("fastmult"); + bVFP = CheckCPUFeature("vfp"); + bEDSP = CheckCPUFeature("edsp"); + bThumbEE = CheckCPUFeature("thumbee"); + bNEON = CheckCPUFeature("neon"); + bVFPv3 = CheckCPUFeature("vfpv3"); + bTLS = CheckCPUFeature("tls"); + bVFPv4 = CheckCPUFeature("vfpv4"); + bIDIVa = CheckCPUFeature("idiva"); + bIDIVt = CheckCPUFeature("idivt"); + // These two are ARMv8 specific. + bFP = CheckCPUFeature("fp"); + bASIMD = CheckCPUFeature("asimd"); + + +#if defined(__ARM_ARCH_7A__) + bArmV7 = true; +#else + bArmV7 = false; +#endif +} + +// Turn the cpu info into a string we can show +std::string CPUInfo::Summarize() +{ + std::string sum; + if (num_cores == 1) + sum = StringFromFormat("%s, %i core", cpu_string, num_cores); + else + sum = StringFromFormat("%s, %i cores", cpu_string, num_cores); + + if (bSwp) sum += ", SWP"; + if (bHalf) sum += ", Half"; + if (bThumb) sum += ", Thumb"; + if (bFastMult) sum += ", FastMult"; + if (bVFP) sum += ", VFP"; + if (bEDSP) sum += ", EDSP"; + if (bThumbEE) sum += ", ThumbEE"; + if (bNEON) sum += ", NEON"; + if (bVFPv3) sum += ", VFPv3"; + if (bTLS) sum += ", TLS"; + if (bVFPv4) sum += ", VFPv4"; + if (bIDIVa) sum += ", IDIVa"; + if (bIDIVt) sum += ", IDIVt"; + + return sum; +} diff --git a/Source/Core/Common/Src/ArmEmitter.cpp b/Source/Core/Common/Src/ArmEmitter.cpp new file mode 100644 index 0000000000..6463dbe2ef --- /dev/null +++ b/Source/Core/Common/Src/ArmEmitter.cpp @@ -0,0 +1,967 @@ +// Copyright (C) 2003 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ + +#include "Common.h" +#include "ArmEmitter.h" +#include "CPUDetect.h" + +#include +#include + +// For cache flushing on Symbian/Blackberry +#ifdef __SYMBIAN32__ +#include +#endif + +#ifdef BLACKBERRY +#include +#endif + +namespace ArmGen +{ + +inline u32 RotR(u32 a, int amount) { + if (!amount) return a; + return (a >> amount) | (a << (32 - amount)); +} + +inline u32 RotL(u32 a, int amount) { + if (!amount) return a; + return (a << amount) | (a >> (32 - amount)); +} + +bool TryMakeOperand2(u32 imm, Operand2 &op2) { + // Just brute force it. + for (int i = 0; i < 16; i++) { + int mask = RotR(0xFF, i * 2); + if ((imm & mask) == imm) { + op2 = Operand2((u8)(RotL(imm, i * 2)), (u8)i); + return true; + } + } + return false; +} + +bool TryMakeOperand2_AllowInverse(u32 imm, Operand2 &op2, bool *inverse) +{ + if (!TryMakeOperand2(imm, op2)) { + *inverse = true; + return TryMakeOperand2(~imm, op2); + } else { + *inverse = false; + return true; + } +} + +bool TryMakeOperand2_AllowNegation(s32 imm, Operand2 &op2, bool *negated) +{ + if (!TryMakeOperand2(imm, op2)) { + *negated = true; + return TryMakeOperand2(-imm, op2); + } else { + *negated = false; + return true; + } +} + +void ARMXEmitter::MOVI2R(ARMReg reg, u32 val, bool optimize) +{ + Operand2 op2; + bool inverse; + if (!optimize) + { + // Only used in backpatch atm + // Only support ARMv7 right now + if (cpu_info.bArmV7) { + MOVW(reg, val & 0xFFFF); + MOVT(reg, val, true); + } + else + { + // ARMv6 version won't use backpatch for now + // Run again with optimizations + MOVI2R(reg, val); + } + } else if (TryMakeOperand2_AllowInverse(val, op2, &inverse)) { + if (!inverse) + MOV(reg, op2); + else + MVN(reg, op2); + } else { + if (cpu_info.bArmV7) { + // ARMv7 - can use MOVT/MOVW, best choice + MOVW(reg, val & 0xFFFF); + if(val & 0xFFFF0000) + MOVT(reg, val, true); + } else { + // ARMv6 - fallback sequence. + // TODO: Optimize further. Can for example choose negation etc. + // Literal pools is another way to do this but much more complicated + // so I can't really be bothered for an outdated CPU architecture like ARMv6. + bool first = true; + int shift = 16; + for (int i = 0; i < 4; i++) { + if (val & 0xFF) { + if (first) { + MOV(reg, Operand2((u8)val, (u8)(shift & 0xF))); + first = false; + } else { + ORR(reg, reg, Operand2((u8)val, (u8)(shift & 0xF))); + } + } + shift -= 4; + val >>= 8; + } + } + } +} +// Moves IMM to memory location +void ARMXEmitter::ARMABI_MOVI2M(Operand2 op, Operand2 val) +{ + // This moves imm to a memory location + MOVW(R14, val); MOVT(R14, val, true); + MOVW(R12, op); MOVT(R12, op, true); + STR(R12, R14); // R10 is what we want to store +} +void ARMXEmitter::QuickCallFunction(ARMReg reg, void *func) { + MOVI2R(reg, (u32)(func)); + BL(reg); +} + +void ARMXEmitter::SetCodePtr(u8 *ptr) +{ + code = ptr; + startcode = code; +} + +const u8 *ARMXEmitter::GetCodePtr() const +{ + return code; +} + +u8 *ARMXEmitter::GetWritableCodePtr() +{ + return code; +} + +void ARMXEmitter::ReserveCodeSpace(u32 bytes) +{ + for (u32 i = 0; i < bytes/4; i++) + Write32(0xE1200070); //bkpt 0 +} + +const u8 *ARMXEmitter::AlignCode16() +{ + ReserveCodeSpace((-(s32)code) & 15); + return code; +} + +const u8 *ARMXEmitter::AlignCodePage() +{ + ReserveCodeSpace((-(s32)code) & 4095); + return code; +} + +void ARMXEmitter::FlushIcache() +{ + FlushIcacheSection(lastCacheFlushEnd, code); + lastCacheFlushEnd = code; +} + +void ARMXEmitter::FlushIcacheSection(u8 *start, u8 *end) +{ +#ifdef __SYMBIAN32__ + User::IMB_Range( start, end); +#elif defined(BLACKBERRY) + msync(start, end - start, MS_SYNC | MS_INVALIDATE_ICACHE); +#else +#ifndef _WIN32 +#ifdef ANDROID + __builtin___clear_cache (start, end); +#else + // If on Linux, we HAVE to clear from start addr or else everything gets /really/ unstable + __builtin___clear_cache (startcode, end); +#endif +#endif +#endif +} + +void ARMXEmitter::SetCC(CCFlags cond) +{ + condition = cond << 28; +} + +void ARMXEmitter::NOP(int count) +{ + for (int i = 0; i < count; i++) { + Write32(condition | 0x01A00000); + } +} + +void ARMXEmitter::SETEND(bool BE) +{ + //SETEND is non-conditional + Write32( 0xF1010000 | (BE << 9)); +} +void ARMXEmitter::BKPT(u16 arg) +{ + Write32(condition | 0x01200070 | (arg << 4 & 0x000FFF00) | (arg & 0x0000000F)); +} +void ARMXEmitter::YIELD() +{ + Write32(condition | 0x0320F001); +} + +FixupBranch ARMXEmitter::B() +{ + FixupBranch branch; + branch.type = 0; // Zero for B + branch.ptr = code; + branch.condition = condition; + //We'll write NOP here for now. + Write32(condition | 0x01A00000); + return branch; +} +FixupBranch ARMXEmitter::BL() +{ + FixupBranch branch; + branch.type = 1; // Zero for B + branch.ptr = code; + branch.condition = condition; + //We'll write NOP here for now. + Write32(condition | 0x01A00000); + return branch; +} + +FixupBranch ARMXEmitter::B_CC(CCFlags Cond) +{ + FixupBranch branch; + branch.type = 0; // Zero for B + branch.ptr = code; + branch.condition = Cond << 28; + //We'll write NOP here for now. + Write32(condition | 0x01A00000); + return branch; +} +void ARMXEmitter::B_CC(CCFlags Cond, const void *fnptr) +{ + s32 distance = (s32)fnptr - (s32(code) + 8); + _assert_msg_(DYNA_REC, distance > -33554432 + && distance <= 33554432, + "B_CC out of range (%p calls %p)", code, fnptr); + + Write32((Cond << 28) | 0x0A000000 | ((distance >> 2) & 0x00FFFFFF)); +} +FixupBranch ARMXEmitter::BL_CC(CCFlags Cond) +{ + FixupBranch branch; + branch.type = 1; // Zero for B + branch.ptr = code; + branch.condition = Cond << 28; + //We'll write NOP here for now. + Write32(condition | 0x01A00000); + return branch; +} +void ARMXEmitter::SetJumpTarget(FixupBranch const &branch) +{ + s32 distance = (s32(code) - 8) - (s32)branch.ptr; + _assert_msg_(DYNA_REC, distance > -33554432 + && distance <= 33554432, + "SetJumpTarget out of range (%p calls %p)", code, + branch.ptr); + if(branch.type == 0) // B + *(u32*)branch.ptr = (u32)(branch.condition | (10 << 24) | ((distance >> 2) & + 0x00FFFFFF)); + else // BL + *(u32*)branch.ptr = (u32)(branch.condition | 0x0B000000 | ((distance >> 2) + & 0x00FFFFFF)); +} +void ARMXEmitter::B (const void *fnptr) +{ + s32 distance = (s32)fnptr - (s32(code) + 8); + _assert_msg_(DYNA_REC, distance > -33554432 + && distance <= 33554432, + "B out of range (%p calls %p)", code, fnptr); + + Write32(condition | 0x0A000000 | ((distance >> 2) & 0x00FFFFFF)); +} + +void ARMXEmitter::B(ARMReg src) +{ + Write32(condition | 0x12FFF10 | src); +} + +void ARMXEmitter::BL(const void *fnptr) +{ + s32 distance = (s32)fnptr - (s32(code) + 8); + _assert_msg_(DYNA_REC, distance > -33554432 + && distance <= 33554432, + "BL out of range (%p calls %p)", code, fnptr); + Write32(condition | 0x0B000000 | ((distance >> 2) & 0x00FFFFFF)); +} +void ARMXEmitter::BL(ARMReg src) +{ + Write32(condition | 0x12FFF30 | src); +} +void ARMXEmitter::PUSH(const int num, ...) +{ + u16 RegList = 0; + u8 Reg; + int i; + va_list vl; + va_start(vl, num); + for (i=0;i> 16 : Rm); } + +void ARMXEmitter::WriteInstruction (u32 Op, ARMReg Rd, ARMReg Rn, Operand2 Rm, bool SetFlags) // This can get renamed later +{ + s32 op = InstOps[Op][Rm.GetType()]; // Type always decided by last operand + u32 Data = Rm.GetData(); + if (Rm.GetType() == TYPE_IMM) + { + switch (Op) + { + // MOV cases that support IMM16 + case 16: + case 17: + Data = Rm.Imm16(); + break; + default: + break; + } + } + if (op == -1) + _assert_msg_(DYNA_REC, false, "%s not yet support %d", InstNames[Op], Rm.GetType()); + Write32(condition | (op << 21) | (SetFlags ? (1 << 20) : 0) | Rn << 16 | Rd << 12 | Data); +} + +// Data Operations +void ARMXEmitter::WriteSignedMultiply(u32 Op, u32 Op2, u32 Op3, ARMReg dest, ARMReg r1, ARMReg r2) +{ + Write32(condition | (0x7 << 24) | (Op << 20) | (dest << 16) | (Op2 << 12) | (r1 << 8) | (Op3 << 5) | (1 << 4) | r2); +} +void ARMXEmitter::UDIV(ARMReg dest, ARMReg dividend, ARMReg divisor) +{ + if (!cpu_info.bIDIVa) + PanicAlert("Trying to use integer divide on hardware that doesn't support it. Bad programmer."); + WriteSignedMultiply(3, 0xF, 0, dest, divisor, dividend); +} +void ARMXEmitter::SDIV(ARMReg dest, ARMReg dividend, ARMReg divisor) +{ + if (!cpu_info.bIDIVa) + PanicAlert("Trying to use integer divide on hardware that doesn't support it. Bad programmer."); + WriteSignedMultiply(1, 0xF, 0, dest, divisor, dividend); +} +void ARMXEmitter::LSL (ARMReg dest, ARMReg src, Operand2 op2) { WriteShiftedDataOp(0, false, dest, src, op2);} +void ARMXEmitter::LSLS(ARMReg dest, ARMReg src, Operand2 op2) { WriteShiftedDataOp(0, true, dest, src, op2);} +void ARMXEmitter::LSL (ARMReg dest, ARMReg src, ARMReg op2) { WriteShiftedDataOp(1, false, dest, src, op2);} +void ARMXEmitter::LSLS(ARMReg dest, ARMReg src, ARMReg op2) { WriteShiftedDataOp(1, true, dest, src, op2);} +void ARMXEmitter::MUL (ARMReg dest, ARMReg src, ARMReg op2) +{ + Write32(condition | (dest << 16) | (src << 8) | (9 << 4) | op2); +} +void ARMXEmitter::MULS(ARMReg dest, ARMReg src, ARMReg op2) +{ + Write32(condition | (1 << 20) | (dest << 16) | (src << 8) | (9 << 4) | op2); +} + +void ARMXEmitter::Write4OpMultiply(u32 op, ARMReg destLo, ARMReg destHi, ARMReg rm, ARMReg rn) { + Write32(condition | (op << 20) | (destHi << 16) | (destLo << 12) | (rm << 8) | (9 << 4) | rn); +} + +void ARMXEmitter::UMULL(ARMReg destLo, ARMReg destHi, ARMReg rm, ARMReg rn) +{ + Write4OpMultiply(0x8, destLo, destHi, rn, rm); +} + +void ARMXEmitter::SMULL(ARMReg destLo, ARMReg destHi, ARMReg rm, ARMReg rn) +{ + Write4OpMultiply(0xC, destLo, destHi, rn, rm); +} +void ARMXEmitter::SXTB (ARMReg dest, ARMReg op2) +{ + Write32(condition | (0x6AF << 16) | (dest << 12) | (7 << 4) | op2); +} +void ARMXEmitter::SXTH (ARMReg dest, ARMReg op2, u8 rotation) +{ + SXTAH(dest, (ARMReg)15, op2, rotation); +} +void ARMXEmitter::SXTAH(ARMReg dest, ARMReg src, ARMReg op2, u8 rotation) +{ + // bits ten and 11 are the rotation amount, see 8.8.232 for more + // information + Write32(condition | (0x6B << 20) | (src << 16) | (dest << 12) | (rotation << 10) | (7 << 4) | op2); +} +void ARMXEmitter::REV (ARMReg dest, ARMReg src ) +{ + Write32(condition | (107 << 20) | (15 << 16) | (dest << 12) | (243 << 4) | src); +} +void ARMXEmitter::REV16(ARMReg dest, ARMReg src) +{ + Write32(condition | (0x3DF << 16) | (dest << 12) | (0xFD << 4) | src); +} + +void ARMXEmitter::_MSR (bool write_nzcvq, bool write_g, Operand2 op2) +{ + Write32(condition | (0x320F << 12) | (write_nzcvq << 19) | (write_g << 18) | op2.Imm12Mod()); +} +void ARMXEmitter::_MSR (bool write_nzcvq, bool write_g, ARMReg src) +{ + Write32(condition | (0x120F << 12) | (write_nzcvq << 19) | (write_g << 18) | src); +} +void ARMXEmitter::MRS (ARMReg dest) +{ + Write32(condition | (16 << 20) | (15 << 16) | (dest << 12)); +} +void ARMXEmitter::WriteStoreOp(u32 op, ARMReg dest, ARMReg src, Operand2 op2) +{ + if (op2.GetData() == 0) // Don't index + Write32(condition | 0x01800000 | (op << 20) | (dest << 16) | (src << 12) | op2.Imm12()); + else + Write32(condition | (op << 20) | (3 << 23) | (dest << 16) | (src << 12) | op2.Imm12()); +} +void ARMXEmitter::STR (ARMReg dest, ARMReg src, Operand2 op) { WriteStoreOp(0x40, dest, src, op);} +void ARMXEmitter::STRB(ARMReg dest, ARMReg src, Operand2 op) { WriteStoreOp(0x44, dest, src, op);} +void ARMXEmitter::STR (ARMReg dest, ARMReg base, ARMReg offset, bool Index, bool Add) +{ + Write32(condition | (0x60 << 20) | (Index << 24) | (Add << 23) | (dest << 16) | (base << 12) | offset); +} +void ARMXEmitter::LDREX(ARMReg dest, ARMReg base) +{ + Write32(condition | (25 << 20) | (base << 16) | (dest << 12) | 0xF9F); +} +void ARMXEmitter::STREX(ARMReg dest, ARMReg base, ARMReg op) +{ + _assert_msg_(DYNA_REC, (dest != base && dest != op), "STREX dest can't be other two registers"); + Write32(condition | (24 << 20) | (base << 16) | (dest << 12) | (0xF9 << 4) | op); +} +void ARMXEmitter::DMB () +{ + Write32(0xF57FF05E); +} +void ARMXEmitter::SVC(Operand2 op) +{ + Write32(condition | (0x0F << 24) | op.Imm24()); +} + +void ARMXEmitter::LDR (ARMReg dest, ARMReg src, Operand2 op) { WriteStoreOp(0x41, src, dest, op);} +void ARMXEmitter::LDRH(ARMReg dest, ARMReg src, Operand2 op) +{ + u8 Imm = op.Imm8(); + Write32(condition | (0x05 << 20) | (src << 16) | (dest << 12) | ((Imm >> 4) << 8) | (0xB << 4) | (Imm & 0x0F)); +} +void ARMXEmitter::LDRB(ARMReg dest, ARMReg src, Operand2 op) { WriteStoreOp(0x45, src, dest, op);} + +void ARMXEmitter::LDR (ARMReg dest, ARMReg base, ARMReg offset, bool Index, bool Add) +{ + Write32(condition | (0x61 << 20) | (Index << 24) | (Add << 23) | (base << 16) | (dest << 12) | offset); +} +void ARMXEmitter::WriteRegStoreOp(u32 op, ARMReg dest, bool WriteBack, u16 RegList) +{ + Write32(condition | (op << 20) | (WriteBack << 21) | (dest << 16) | RegList); +} +void ARMXEmitter::STMFD(ARMReg dest, bool WriteBack, const int Regnum, ...) +{ + u16 RegList = 0; + u8 Reg; + int i; + va_list vl; + va_start(vl, Regnum); + for (i=0;i= S0) + { + if (Reg >= D0) + { + if (Reg >= Q0) + return (ARMReg)((Reg - Q0) * 2); // Always gets encoded as a double register + return (ARMReg)(Reg - D0); + } + return (ARMReg)(Reg - S0); + } + return Reg; +} +// NEON Specific +void ARMXEmitter::VADD(IntegerSize Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) +{ + _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to VADD(integer)"); + _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use VADD(integer) when CPU doesn't support it"); + + bool register_quad = Vd >= Q0; + + // Gets encoded as a double register + Vd = SubBase(Vd); + Vn = SubBase(Vn); + Vm = SubBase(Vm); + + Write32((0xF2 << 24) | ((Vd & 0x10) << 18) | (Size << 20) | ((Vn & 0xF) << 16) \ + | ((Vd & 0xF) << 12) | (0x8 << 8) | ((Vn & 0x10) << 3) | (register_quad << 6) \ + | ((Vm & 0x10) << 2) | (Vm & 0xF)); + +} +void ARMXEmitter::VSUB(IntegerSize Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) +{ + _assert_msg_(DYNA_REC, Vd >= Q0, "Pass invalid register to VSUB(integer)"); + _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use VSUB(integer) when CPU doesn't support it"); + + // Gets encoded as a double register + Vd = SubBase(Vd); + Vn = SubBase(Vn); + Vm = SubBase(Vm); + + Write32((0xF3 << 24) | ((Vd & 0x10) << 18) | (Size << 20) | ((Vn & 0xF) << 16) \ + | ((Vd & 0xF) << 12) | (0x8 << 8) | ((Vn & 0x10) << 3) | (1 << 6) \ + | ((Vm & 0x10) << 2) | (Vm & 0xF)); + +} + +// VFP Specific + +void ARMXEmitter::VLDR(ARMReg Dest, ARMReg Base, u16 op) +{ + _assert_msg_(DYNA_REC, Dest >= S0 && Dest <= D31, "Passed Invalid dest register to VLDR"); + _assert_msg_(DYNA_REC, Base <= R15, "Passed invalid Base register to VLDR"); + _assert_msg_(DYNA_REC, !(op & 3), "Offset needs to be word aligned"); + bool single_reg = Dest < D0; + + Dest = SubBase(Dest); + + if (single_reg) + { + Write32(NO_COND | (0x1B << 23) | ((Dest & 0x1) << 22) | (1 << 20) | (Base << 16) \ + | ((Dest & 0x1E) << 11) | (10 << 8) | (op >> 2)); + + } + else + { + Write32(NO_COND | (0x1B << 23) | ((Dest & 0x10) << 18) | (1 << 20) | (Base << 16) \ + | ((Dest & 0xF) << 12) | (11 << 8) | (op >> 2)); + } +} +void ARMXEmitter::VSTR(ARMReg Src, ARMReg Base, u16 op) +{ + _assert_msg_(DYNA_REC, Src >= S0 && Src <= D31, "Passed invalid src register to VSTR"); + _assert_msg_(DYNA_REC, Base <= R15, "Passed invalid base register to VSTR"); + _assert_msg_(DYNA_REC, !(op & 3), "Offset needs to be word aligned"); + bool single_reg = Src < D0; + + Src = SubBase(Src); + + if (single_reg) + { + Write32(NO_COND | (0x1B << 23) | ((Src & 0x1) << 22) | (Base << 16) \ + | ((Src & 0x1E) << 11) | (10 << 8) | (op >> 2)); + + } + else + { + Write32(NO_COND | (0x1B << 23) | ((Src & 0x10) << 18) | (Base << 16) \ + | ((Src & 0xF) << 12) | (11 << 8) | (op >> 2)); + } +} +void ARMXEmitter::VCMP(ARMReg Vd, ARMReg Vm) +{ + _assert_msg_(DYNA_REC, Vd < Q0, "Passed invalid Vd to VCMP"); + bool single_reg = Vd < D0; + + Vd = SubBase(Vd); + Vm = SubBase(Vm); + + if (single_reg) + { + Write32(NO_COND | (0x1D << 23) | ((Vd & 0x1) << 22) | (0x34 << 16) | ((Vd & 0x1E) << 11) \ + | (0x2B << 6) | ((Vm & 0x1) << 5) | (Vm >> 1)); + } + else + { + Write32(NO_COND | (0x1D << 23) | ((Vd & 0x10) << 18) | (0x34 << 16) | ((Vd & 0xF) << 12) \ + | (0x2F << 6) | ((Vm & 0x10) << 1) | (Vm & 0xF)); + } +} +void ARMXEmitter::VCMP(ARMReg Vd) +{ + _assert_msg_(DYNA_REC, Vd < Q0, "Passed invalid Vd to VCMP"); + bool single_reg = Vd < D0; + + Vd = SubBase(Vd); + + if (single_reg) + { + Write32(NO_COND | (0x1D << 23) | ((Vd & 0x1) << 22) | (0x35 << 16) | ((Vd & 0x1E) << 11) \ + | (0x2B << 6)); + } + else + { + Write32(NO_COND | (0x1D << 23) | ((Vd & 0x10) << 18) | (0x35 << 16) | ((Vd & 0xF) << 12) \ + | (0x2F << 6)); + } +} +void ARMXEmitter::VDIV(ARMReg Vd, ARMReg Vn, ARMReg Vm) +{ + _assert_msg_(DYNA_REC, Vd < Q0, "Pased invalid dest register to VSQRT"); + _assert_msg_(DYNA_REC, Vn < Q0, "Passed invalid Vn to VSQRT"); + _assert_msg_(DYNA_REC, Vm < Q0, "Passed invalid Vm to VSQRT"); + bool single_reg = Vd < D0; + + Vd = SubBase(Vd); + Vn = SubBase(Vn); + Vm = SubBase(Vm); + + if (single_reg) + { + Write32(NO_COND | (0x1D << 23) | ((Vd & 0x1) << 22) | ((Vn & 0x1E) << 15) \ + | ((Vd & 0x1E) << 11) | (0xA << 8) | ((Vn & 0x1) << 7) | ((Vm & 0x1) << 5) \ + | (Vm >> 1)); + } + else + { + Write32(NO_COND | (0x1D << 23) | ((Vd & 0x10) << 18) | ((Vn & 0xF) << 16) \ + | ((Vd & 0xF) << 12) | (0xB << 8) | ((Vn & 0x10) << 3) | ((Vm & 0x10) << 2) \ + | (Vm & 0xF)); + } +} +void ARMXEmitter::VSQRT(ARMReg Vd, ARMReg Vm) +{ + _assert_msg_(DYNA_REC, Vd < Q0, "Pased invalid dest register to VSQRT"); + _assert_msg_(DYNA_REC, Vm < Q0, "Passed invalid Vm to VSQRT"); + bool single_reg = Vd < D0; + + Vd = SubBase(Vd); + Vm = SubBase(Vm); + + if (single_reg) + { + Write32(NO_COND | (0x1D << 23) | ((Vd & 0x1) << 22) | (0x31 << 16) \ + | ((Vd & 0x1E) << 11) | (0x2B << 6) | ((Vm & 0x1) << 5) | (Vm >> 1)); + } + else + { + Write32(NO_COND | (0x1D << 23) | ((Vd & 0x10) << 18) | (0x31 << 16) \ + | ((Vd & 0xF) << 12) | (0x2F << 6) | ((Vm & 0x10) << 2) | (Vm & 0xF)); + } +} +// VFP and ASIMD +void ARMXEmitter::VABS(ARMReg Vd, ARMReg Vm) +{ + _assert_msg_(DYNA_REC, Vd < Q0, "VABS doesn't currently support Quad reg"); + _assert_msg_(DYNA_REC, Vd >= S0, "VABS doesn't support ARM Regs"); + bool single_reg = Vd < D0; + + Vd = SubBase(Vd); + Vm = SubBase(Vm); + + if (single_reg) + { + Write32(NO_COND | (0xEB << 20) | ((Vd & 0x1) << 6) | ((Vd & 0x1E) << 11) \ + | (0xAC << 4) | ((Vm & 0x1) << 5) | (Vm >> 1)); + } + else + { + Write32(NO_COND | (0xEB << 20) | ((Vd & 0x10) << 18) | ((Vd & 0xF) << 12) \ + | (0xBC << 4) | ((Vm & 0x10) << 1) | (Vm & 0xF)); + } +} +void ARMXEmitter::VADD(ARMReg Vd, ARMReg Vn, ARMReg Vm) +{ + _assert_msg_(DYNA_REC, Vd >= S0, "Passed invalid dest register to VADD"); + _assert_msg_(DYNA_REC, Vn >= S0, "Passed invalid Vn to VADD"); + _assert_msg_(DYNA_REC, Vm >= S0, "Passed invalid Vm to VADD"); + bool single_reg = Vd < D0; + bool double_reg = Vd < Q0; + + Vd = SubBase(Vd); + Vn = SubBase(Vn); + Vm = SubBase(Vm); + + if (single_reg) + { + Write32(NO_COND | (0x1C << 23) | ((Vd & 0x1) << 22) | (0x3 << 20) \ + | ((Vn & 0x1E) << 15) | ((Vd & 0x1E) << 11) | (0x5 << 9) \ + | ((Vn & 0x1) << 7) | ((Vm & 0x1) << 5) | (Vm >> 1)); + } + else + { + if (double_reg) + { + Write32(NO_COND | (0x1C << 23) | ((Vd & 0x10) << 18) | (0x3 << 20) \ + | ((Vn & 0xF) << 16) | ((Vd & 0xF) << 12) | (0xB << 8) \ + | ((Vn & 0x10) << 3) | ((Vm & 0x10) << 2) | (Vm & 0xF)); + } + else + { + _assert_msg_(DYNA_REC, cpu_info.bNEON, "Trying to use VADD with Quad Reg without support!"); + Write32((0xF2 << 24) | ((Vd & 0x10) << 18) | ((Vn & 0xF) << 16) \ + | ((Vd & 0xF) << 12) | (0xD << 8) | ((Vn & 0x10) << 3) \ + | (1 << 6) | ((Vm & 0x10) << 2) | (Vm & 0xF)); + } + } +} +void ARMXEmitter::VSUB(ARMReg Vd, ARMReg Vn, ARMReg Vm) +{ + _assert_msg_(DYNA_REC, Vd >= S0, "Passed invalid dest register to VSUB"); + _assert_msg_(DYNA_REC, Vn >= S0, "Passed invalid Vn to VSUB"); + _assert_msg_(DYNA_REC, Vm >= S0, "Passed invalid Vm to VSUB"); + bool single_reg = Vd < D0; + bool double_reg = Vd < Q0; + + Vd = SubBase(Vd); + Vn = SubBase(Vn); + Vm = SubBase(Vm); + + if (single_reg) + { + Write32(NO_COND | (0x1C << 23) | ((Vd & 0x1) << 22) | (0x3 << 20) \ + | ((Vn & 0x1E) << 15) | ((Vd & 0x1E) << 11) | (0x5 << 9) \ + | ((Vn & 0x1) << 7) | (1 << 6) | ((Vm & 0x1) << 5) | (Vm >> 1)); + } + else + { + if (double_reg) + { + Write32(NO_COND | (0x1C << 23) | ((Vd & 0x10) << 18) | (0x3 << 20) \ + | ((Vn & 0xF) << 16) | ((Vd & 0xF) << 12) | (0xB << 8) \ + | ((Vn & 0x10) << 3) | (1 << 6) | ((Vm & 0x10) << 2) | (Vm & 0xF)); + } + else + { + _assert_msg_(DYNA_REC, cpu_info.bNEON, "Trying to use VADD with Quad Reg without support!"); + Write32((0xF2 << 24) | (1 << 21) | ((Vd & 0x10) << 18) | ((Vn & 0xF) << 16) \ + | ((Vd & 0xF) << 12) | (0xD << 8) | ((Vn & 0x10) << 3) \ + | (1 << 6) | ((Vm & 0x10) << 2) | (Vm & 0xF)); + } + } +} + +void ARMXEmitter::VMOV(ARMReg Dest, ARMReg Src, bool high) +{ + _assert_msg_(DYNA_REC, Src < S0, "This VMOV doesn't support SRC other than ARM Reg"); + _assert_msg_(DYNA_REC, Dest >= D0, "This VMOV doesn't support DEST other than VFP"); + + Dest = SubBase(Dest); + + Write32(NO_COND | (0xE << 24) | (high << 21) | ((Dest & 0xF) << 16) | (Src << 12) \ + | (11 << 8) | ((Dest & 0x10) << 3) | (1 << 4)); +} +void ARMXEmitter::VMOV(ARMReg Dest, ARMReg Src) +{ + if (Dest > R15) + { + if (Src < S0) + { + if (Dest < D0) + { + // Moving to a Neon register FROM ARM Reg + Dest = (ARMReg)(Dest - S0); + Write32(NO_COND | (0xE0 << 20) | ((Dest & 0x1E) << 15) | (Src << 12) \ + | (0xA << 8) | ((Dest & 0x1) << 7) | (1 << 4)); + return; + } + else + { + // Move 64bit from Arm reg + _assert_msg_(DYNA_REC, false, "This VMOV doesn't support moving 64bit ARM to NEON"); + return; + } + } + } + else + { + if (Src > R15) + { + if (Src < D0) + { + // Moving to ARM Reg from Neon Register + Src = (ARMReg)(Src - S0); + Write32(NO_COND | (0xE1 << 20) | ((Src & 0x1E) << 15) | (Dest << 12) \ + | (0xA << 8) | ((Src & 0x1) << 7) | (1 << 4)); + return; + } + else + { + // Move 64bit To Arm reg + _assert_msg_(DYNA_REC, false, "This VMOV doesn't support moving 64bit ARM From NEON"); + return; + } + } + else + { + // Move Arm reg to Arm reg + _assert_msg_(DYNA_REC, false, "VMOV doesn't support moving ARM registers"); + } + } + // Moving NEON registers + int SrcSize = Src < D0 ? 1 : Src < Q0 ? 2 : 4; + int DestSize = Dest < D0 ? 1 : Dest < Q0 ? 2 : 4; + bool Single = DestSize == 1; + bool Quad = DestSize == 4; + + _assert_msg_(DYNA_REC, SrcSize == DestSize, "VMOV doesn't support moving different register sizes"); + + Dest = SubBase(Dest); + Src = SubBase(Src); + + if (Single) + { + Write32(NO_COND | (0x1D << 23) | ((Dest & 0x1) << 22) | (0x3 << 20) | ((Dest & 0x1E) << 11) \ + | (0x5 << 9) | (1 << 6) | ((Src & 0x1) << 5) | ((Src & 0x1E) >> 1)); + } + else + { + // Double and quad + if (Quad) + { + _assert_msg_(DYNA_REC, cpu_info.bNEON, "Trying to use quad registers when you don't support ASIMD."); + // Gets encoded as a Double register + Write32((0xF2 << 24) | ((Dest & 0x10) << 18) | (2 << 20) | ((Src & 0xF) << 16) \ + | ((Dest & 0xF) << 12) | (1 << 8) | ((Src & 0x10) << 3) | (1 << 6) \ + | ((Src & 0x10) << 1) | (1 << 4) | (Src & 0xF)); + + } + else + { + Write32(NO_COND | (0x1D << 23) | ((Dest & 0x10) << 18) | (0x3 << 20) | ((Dest & 0xF) << 12) \ + | (0x2D << 6) | ((Src & 0x10) << 1) | (Src & 0xF)); + } + } +} + +} diff --git a/Source/Core/Common/Src/ArmEmitter.h b/Source/Core/Common/Src/ArmEmitter.h new file mode 100644 index 0000000000..13635867a9 --- /dev/null +++ b/Source/Core/Common/Src/ArmEmitter.h @@ -0,0 +1,587 @@ +// Copyright (C) 2003 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ + +// WARNING - THIS LIBRARY IS NOT THREAD SAFE!!! + +#ifndef _DOLPHIN_ARM_CODEGEN_ +#define _DOLPHIN_ARM_CODEGEN_ + +#include "Common.h" +#include "MemoryUtil.h" +#if defined(__SYMBIAN32__) || defined(PANDORA) +#include +#endif + +#undef _IP +#undef R0 +#undef _SP +#undef _LR +#undef _PC + +namespace ArmGen +{ +enum ARMReg +{ + // GPRs + R0 = 0, R1, R2, R3, R4, R5, + R6, R7, R8, R9, R10, R11, + + // SPRs + // R13 - R15 are SP, LR, and PC. + // Almost always referred to by name instead of register number + R12 = 12, R13 = 13, R14 = 14, R15 = 15, + _IP = 12, _SP = 13, _LR = 14, _PC = 15, + + + // VFP single precision registers + S0, S1, S2, S3, S4, S5, S6, + S7, S8, S9, S10, S11, S12, S13, + S14, S15, S16, S17, S18, S19, S20, + S21, S22, S23, S24, S25, S26, S27, + S28, S29, S30, S31, + + // VFP Double Precision registers + D0, D1, D2, D3, D4, D5, D6, D7, + D8, D9, D10, D11, D12, D13, D14, D15, + D16, D17, D18, D19, D20, D21, D22, D23, + D24, D25, D26, D27, D28, D29, D30, D31, + + // ASIMD Quad-Word registers + Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, + Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15, + INVALID_REG = 0xFFFFFFFF +}; + +enum CCFlags +{ + CC_EQ = 0, // Equal + CC_NEQ, // Not equal + CC_CS, // Carry Set + CC_CC, // Carry Clear + CC_MI, // Minus (Negative) + CC_PL, // Plus + CC_VS, // Overflow + CC_VC, // No Overflow + CC_HI, // Unsigned higher + CC_LS, // Unsigned lower or same + CC_GE, // Signed greater than or equal + CC_LT, // Signed less than + CC_GT, // Signed greater than + CC_LE, // Signed less than or equal + CC_AL, // Always (unconditional) 14 + CC_HS = CC_CS, // Alias of CC_CS Unsigned higher or same + CC_LO = CC_CC, // Alias of CC_CC Unsigned lower +}; +const u32 NO_COND = 0xE0000000; + +enum ShiftType +{ + ST_LSL = 0, + ST_ASL = 0, + ST_LSR = 1, + ST_ASR = 2, + ST_ROR = 3, + ST_RRX = 4 +}; +enum IntegerSize +{ + I_I8 = 0, + I_I16, + I_I32, + I_I64 +}; + +enum +{ + NUMGPRs = 13, +}; + +class ARMXEmitter; + +enum OpType +{ + TYPE_IMM = 0, + TYPE_REG, + TYPE_IMMSREG, + TYPE_RSR, + TYPE_MEM +}; + +// This is no longer a proper operand2 class. Need to split up. +class Operand2 +{ + friend class ARMXEmitter; +protected: + u32 Value; + +private: + OpType Type; + + // IMM types + u8 Rotation; // Only for u8 values + + // Register types + u8 IndexOrShift; + ShiftType Shift; +public: + OpType GetType() + { + return Type; + } + Operand2() {} + Operand2(u32 imm, OpType type = TYPE_IMM) + { + Type = type; + Value = imm; + Rotation = 0; + } + + Operand2(ARMReg Reg) + { + Type = TYPE_REG; + Value = Reg; + Rotation = 0; + } + Operand2(u8 imm, u8 rotation) + { + Type = TYPE_IMM; + Value = imm; + Rotation = rotation; + } + Operand2(ARMReg base, ShiftType type, ARMReg shift) // RSR + { + Type = TYPE_RSR; + _assert_msg_(DYNA_REC, type != ST_RRX, "Invalid Operand2: RRX does not take a register shift amount"); + IndexOrShift = shift; + Shift = type; + Value = base; + } + + Operand2(u8 shift, ShiftType type, ARMReg base)// For IMM shifted register + { + if(shift == 32) shift = 0; + switch (type) + { + case ST_LSL: + _assert_msg_(DYNA_REC, shift < 32, "Invalid Operand2: LSL %u", shift); + break; + case ST_LSR: + _assert_msg_(DYNA_REC, shift <= 32, "Invalid Operand2: LSR %u", shift); + if (!shift) + type = ST_LSL; + if (shift == 32) + shift = 0; + break; + case ST_ASR: + _assert_msg_(DYNA_REC, shift < 32, "Invalid Operand2: LSR %u", shift); + if (!shift) + type = ST_LSL; + if (shift == 32) + shift = 0; + break; + case ST_ROR: + _assert_msg_(DYNA_REC, shift < 32, "Invalid Operand2: ROR %u", shift); + if (!shift) + type = ST_LSL; + break; + case ST_RRX: + _assert_msg_(DYNA_REC, shift == 0, "Invalid Operand2: RRX does not take an immediate shift amount"); + type = ST_ROR; + break; + } + IndexOrShift = shift; + Shift = type; + Value = base; + Type = TYPE_IMMSREG; + } + const u32 GetData() + { + switch(Type) + { + case TYPE_IMM: + return Imm12Mod(); // This'll need to be changed later + case TYPE_REG: + return Rm(); + case TYPE_IMMSREG: + return IMMSR(); + case TYPE_RSR: + return RSR(); + default: + _assert_msg_(DYNA_REC, false, "GetData with Invalid Type"); + return 0; + } + } + const u32 IMMSR() // IMM shifted register + { + _assert_msg_(DYNA_REC, Type = TYPE_IMMSREG, "IMMSR must be imm shifted register"); + return ((IndexOrShift & 0x1f) << 7 | (Shift << 5) | Value); + } + const u32 RSR() // Register shifted register + { + _assert_msg_(DYNA_REC, Type == TYPE_RSR, "RSR must be RSR Of Course"); + return (IndexOrShift << 8) | (Shift << 5) | 0x10 | Value; + } + const u32 Rm() + { + _assert_msg_(DYNA_REC, Type == TYPE_REG, "Rm must be with Reg"); + return Value; + } + + const u32 Imm5() + { + _assert_msg_(DYNA_REC, (Type == TYPE_IMM), "Imm5 not IMM value"); + return ((Value & 0x0000001F) << 7); + } + const u32 Imm8() + { + _assert_msg_(DYNA_REC, (Type == TYPE_IMM), "Imm8Rot not IMM value"); + return Value & 0xFF; + } + const u32 Imm8Rot() // IMM8 with Rotation + { + _assert_msg_(DYNA_REC, (Type == TYPE_IMM), "Imm8Rot not IMM value"); + _assert_msg_(DYNA_REC, (Rotation & 0xE1) != 0, "Invalid Operand2: immediate rotation %u", Rotation); + return (1 << 25) | (Rotation << 7) | (Value & 0x000000FF); + } + const u32 Imm12() + { + _assert_msg_(DYNA_REC, (Type == TYPE_IMM), "Imm12 not IMM"); + return (Value & 0x00000FFF); + } + + const u32 Imm12Mod() + { + // This is a IMM12 with the top four bits being rotation and the + // bottom eight being a IMM. This is for instructions that need to + // expand a 8bit IMM to a 32bit value and gives you some rotation as + // well. + // Each rotation rotates to the right by 2 bits + _assert_msg_(DYNA_REC, (Type == TYPE_IMM), "Imm12Mod not IMM"); + return ((Rotation & 0xF) << 8) | (Value & 0xFF); + } + const u32 Imm16() + { + _assert_msg_(DYNA_REC, (Type == TYPE_IMM), "Imm16 not IMM"); + return ( (Value & 0xF000) << 4) | (Value & 0x0FFF); + } + const u32 Imm16Low() + { + return Imm16(); + } + const u32 Imm16High() // Returns high 16bits + { + _assert_msg_(DYNA_REC, (Type == TYPE_IMM), "Imm16 not IMM"); + return ( ((Value >> 16) & 0xF000) << 4) | ((Value >> 16) & 0x0FFF); + } + const u32 Imm24() + { + _assert_msg_(DYNA_REC, (Type == TYPE_IMM), "Imm16 not IMM"); + return (Value & 0x0FFFFFFF); + } + // NEON and ASIMD specific + const u32 Imm8ASIMD() + { + _assert_msg_(DYNA_REC, (Type == TYPE_IMM), "Imm8ASIMD not IMM"); + return ((Value & 0x80) << 17) | ((Value & 0x70) << 12) | (Value & 0xF); + } + const u32 Imm8VFP() + { + _assert_msg_(DYNA_REC, (Type == TYPE_IMM), "Imm8VFP not IMM"); + return ((Value & 0xF0) << 12) | (Value & 0xF); + } +}; + +// Use these when you don't know if an imm can be represented as an operand2. +// This lets you generate both an optimal and a fallback solution by checking +// the return value, which will be false if these fail to find a Operand2 that +// represents your 32-bit imm value. +bool TryMakeOperand2(u32 imm, Operand2 &op2); +bool TryMakeOperand2_AllowInverse(u32 imm, Operand2 &op2, bool *inverse); +bool TryMakeOperand2_AllowNegation(s32 imm, Operand2 &op2, bool *negated); + +inline Operand2 R(ARMReg Reg) { return Operand2(Reg, TYPE_REG); } +inline Operand2 IMM(u32 Imm) { return Operand2(Imm, TYPE_IMM); } +inline Operand2 Mem(void *ptr) { return Operand2((u32)ptr, TYPE_IMM); } +//usage: struct {int e;} s; STRUCT_OFFSET(s,e) +#define STRUCT_OFF(str,elem) ((u32)((u32)&(str).elem-(u32)&(str))) + + +struct FixupBranch +{ + u8 *ptr; + u32 condition; // Remembers our codition at the time + int type; //0 = B 1 = BL +}; + +typedef const u8* JumpTarget; + +class ARMXEmitter +{ + friend struct OpArg; // for Write8 etc +private: + u8 *code, *startcode; + u8 *lastCacheFlushEnd; + u32 condition; + + void WriteStoreOp(u32 op, ARMReg dest, ARMReg src, Operand2 op2); + void WriteRegStoreOp(u32 op, ARMReg dest, bool WriteBack, u16 RegList); + void WriteShiftedDataOp(u32 op, bool SetFlags, ARMReg dest, ARMReg src, ARMReg op2); + void WriteShiftedDataOp(u32 op, bool SetFlags, ARMReg dest, ARMReg src, Operand2 op2); + void WriteSignedMultiply(u32 Op, u32 Op2, u32 Op3, ARMReg dest, ARMReg r1, ARMReg r2); + + void Write4OpMultiply(u32 op, ARMReg destLo, ARMReg destHi, ARMReg rn, ARMReg rm); + + // New Ops + void WriteInstruction(u32 op, ARMReg Rd, ARMReg Rn, Operand2 Rm, bool SetFlags = false); + +protected: + inline void Write32(u32 value) {*(u32*)code = value; code+=4;} + +public: + ARMXEmitter() : code(0), startcode(0), lastCacheFlushEnd(0) { + condition = CC_AL << 28; + } + ARMXEmitter(u8 *code_ptr) { + code = code_ptr; + lastCacheFlushEnd = code_ptr; + startcode = code_ptr; + condition = CC_AL << 28; + } + virtual ~ARMXEmitter() {} + + void SetCodePtr(u8 *ptr); + void ReserveCodeSpace(u32 bytes); + const u8 *AlignCode16(); + const u8 *AlignCodePage(); + const u8 *GetCodePtr() const; + void FlushIcache(); + void FlushIcacheSection(u8 *start, u8 *end); + u8 *GetWritableCodePtr(); + + void SetCC(CCFlags cond = CC_AL); + + // Special purpose instructions + + // Dynamic Endian Switching + void SETEND(bool BE); + // Debug Breakpoint + void BKPT(u16 arg); + + // Hint instruction + void YIELD(); + + // Do nothing + void NOP(int count = 1); //nop padding - TODO: fast nop slides, for amd and intel (check their manuals) + +#ifdef CALL +#undef CALL +#endif + + // Branching + FixupBranch B(); + FixupBranch B_CC(CCFlags Cond); + void B_CC(CCFlags Cond, const void *fnptr); + FixupBranch BL(); + FixupBranch BL_CC(CCFlags Cond); + void SetJumpTarget(FixupBranch const &branch); + + void B (const void *fnptr); + void B (ARMReg src); + void BL(const void *fnptr); + void BL(ARMReg src); + + void PUSH(const int num, ...); + void POP(const int num, ...); + + // New Data Ops + void AND (ARMReg Rd, ARMReg Rn, Operand2 Rm); + void ANDS(ARMReg Rd, ARMReg Rn, Operand2 Rm); + void EOR (ARMReg dest, ARMReg src, Operand2 op2); + void EORS(ARMReg dest, ARMReg src, Operand2 op2); + void SUB (ARMReg dest, ARMReg src, Operand2 op2); + void SUBS(ARMReg dest, ARMReg src, Operand2 op2); + void RSB (ARMReg dest, ARMReg src, Operand2 op2); + void RSBS(ARMReg dest, ARMReg src, Operand2 op2); + void ADD (ARMReg dest, ARMReg src, Operand2 op2); + void ADDS(ARMReg dest, ARMReg src, Operand2 op2); + void ADC (ARMReg dest, ARMReg src, Operand2 op2); + void ADCS(ARMReg dest, ARMReg src, Operand2 op2); + void LSL (ARMReg dest, ARMReg src, Operand2 op2); + void LSL (ARMReg dest, ARMReg src, ARMReg op2); + void LSLS(ARMReg dest, ARMReg src, Operand2 op2); + void LSLS(ARMReg dest, ARMReg src, ARMReg op2); + void SBC (ARMReg dest, ARMReg src, Operand2 op2); + void SBCS(ARMReg dest, ARMReg src, Operand2 op2); + void REV (ARMReg dest, ARMReg src); + void REV16 (ARMReg dest, ARMReg src); + void RSC (ARMReg dest, ARMReg src, Operand2 op2); + void RSCS(ARMReg dest, ARMReg src, Operand2 op2); + void TST ( ARMReg src, Operand2 op2); + void TEQ ( ARMReg src, Operand2 op2); + void CMP ( ARMReg src, Operand2 op2); + void CMN ( ARMReg src, Operand2 op2); + void ORR (ARMReg dest, ARMReg src, Operand2 op2); + void ORRS(ARMReg dest, ARMReg src, Operand2 op2); + void MOV (ARMReg dest, Operand2 op2); + void MOVS(ARMReg dest, Operand2 op2); + void BIC (ARMReg dest, ARMReg src, Operand2 op2); // BIC = ANDN + void BICS(ARMReg dest, ARMReg src, Operand2 op2); + void MVN (ARMReg dest, Operand2 op2); + void MVNS(ARMReg dest, Operand2 op2); + void MOVW(ARMReg dest, Operand2 op2); + void MOVT(ARMReg dest, Operand2 op2, bool TopBits = false); + + // UDIV and SDIV are only available on CPUs that have + // the idiva hardare capacity + void UDIV(ARMReg dest, ARMReg dividend, ARMReg divisor); + void SDIV(ARMReg dest, ARMReg dividend, ARMReg divisor); + + void MUL (ARMReg dest, ARMReg src, ARMReg op2); + void MULS(ARMReg dest, ARMReg src, ARMReg op2); + + void UMULL(ARMReg destLo, ARMReg destHi, ARMReg rn, ARMReg rm); + void SMULL(ARMReg destLo, ARMReg destHi, ARMReg rn, ARMReg rm); + + void SXTB(ARMReg dest, ARMReg op2); + void SXTH(ARMReg dest, ARMReg op2, u8 rotation = 0); + void SXTAH(ARMReg dest, ARMReg src, ARMReg op2, u8 rotation = 0); + // Using just MSR here messes with our defines on the PPC side of stuff (when this code was in dolphin...) + // Just need to put an underscore here, bit annoying. + void _MSR (bool nzcvq, bool g, Operand2 op2); + void _MSR (bool nzcvq, bool g, ARMReg src ); + void MRS (ARMReg dest); + + // Memory load/store operations + void LDR (ARMReg dest, ARMReg src, Operand2 op2 = 0); + // Offset adds to the base register in LDR + void LDR (ARMReg dest, ARMReg base, ARMReg offset, bool Index, bool Add); + void LDRH(ARMReg dest, ARMReg src, Operand2 op = 0); + void LDRB(ARMReg dest, ARMReg src, Operand2 op2 = 0); + void STR (ARMReg dest, ARMReg src, Operand2 op2 = 0); + // Offset adds on to the destination register in STR + void STR (ARMReg dest, ARMReg base, ARMReg offset, bool Index, bool Add); + + void STRB(ARMReg dest, ARMReg src, Operand2 op2 = 0); + void STMFD(ARMReg dest, bool WriteBack, const int Regnum, ...); + void LDMFD(ARMReg dest, bool WriteBack, const int Regnum, ...); + + // Exclusive Access operations + void LDREX(ARMReg dest, ARMReg base); + // dest contains the result if the instruction managed to store the value + void STREX(ARMReg dest, ARMReg base, ARMReg op); + void DMB (); + void SVC(Operand2 op); + + // NEON and ASIMD instructions + // None of these will be created with conditional since ARM + // is deprecating conditional execution of ASIMD instructions. + // ASIMD instructions don't even have a conditional encoding. + + // Subtracts the base from the register to give us the real one + ARMReg SubBase(ARMReg Reg); + // NEON Only + void VADD(IntegerSize Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VSUB(IntegerSize Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); + + // VFP Only + void VLDR(ARMReg Dest, ARMReg Base, u16 op); + void VSTR(ARMReg Src, ARMReg Base, u16 op); + void VCMP(ARMReg Vd, ARMReg Vm); + // Compares against zero + void VCMP(ARMReg Vd); + void VDIV(ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VSQRT(ARMReg Vd, ARMReg Vm); + + // NEON and VFP + void VABS(ARMReg Vd, ARMReg Vm); + void VADD(ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VSUB(ARMReg Vd, ARMReg Vn, ARMReg Vm); + + void VMOV(ARMReg Dest, ARMReg Src, bool high); + void VMOV(ARMReg Dest, ARMReg Src); + + void QuickCallFunction(ARMReg scratchreg, void *func); + // Utility functions + void MOVI2R(ARMReg reg, u32 val, bool optimize = true); + void ARMABI_MOVI2M(Operand2 op, Operand2 val); +}; // class ARMXEmitter + + +// Everything that needs to generate X86 code should inherit from this. +// You get memory management for free, plus, you can use all the MOV etc functions without +// having to prefix them with gen-> or something similar. +class ARMXCodeBlock : public ARMXEmitter +{ +protected: + u8 *region; + size_t region_size; + +public: + ARMXCodeBlock() : region(NULL), region_size(0) {} + virtual ~ARMXCodeBlock() { if (region) FreeCodeSpace(); } + + // Call this before you generate any code. + void AllocCodeSpace(int size) + { + region_size = size; + region = (u8*)AllocateExecutableMemory(region_size); + SetCodePtr(region); + } + + // Always clear code space with breakpoints, so that if someone accidentally executes + // uninitialized, it just breaks into the debugger. + void ClearCodeSpace() + { + // x86/64: 0xCC = breakpoint + memset(region, 0xCC, region_size); + ResetCodePtr(); + } + + // Call this when shutting down. Don't rely on the destructor, even though it'll do the job. + void FreeCodeSpace() + { + FreeMemoryPages(region, region_size); + region = NULL; + region_size = 0; + } + + bool IsInSpace(u8 *ptr) + { + return ptr >= region && ptr < region + region_size; + } + + // Cannot currently be undone. Will write protect the entire code region. + // Start over if you need to change the code (call FreeCodeSpace(), AllocCodeSpace()). + void WriteProtect() + { + WriteProtectMemory(region, region_size, true); + } + + void ResetCodePtr() + { + SetCodePtr(region); + } + + size_t GetSpaceLeft() const + { + return region_size - (GetCodePtr() - region); + } +}; + +} // namespace + +#endif // _DOLPHIN_INTEL_CODEGEN_ diff --git a/Source/Core/Common/Src/CPUDetect.h b/Source/Core/Common/Src/CPUDetect.h index 928b34c109..351120e3a2 100644 --- a/Source/Core/Common/Src/CPUDetect.h +++ b/Source/Core/Common/Src/CPUDetect.h @@ -25,7 +25,8 @@ enum CPUVendor { VENDOR_INTEL = 0, VENDOR_AMD = 1, - VENDOR_OTHER = 2, + VENDOR_ARM = 2, + VENDOR_OTHER = 3, }; struct CPUInfo @@ -55,6 +56,26 @@ struct CPUInfo bool bAES; bool bLAHFSAHF64; bool bLongMode; + + // ARM specific CPUInfo + bool bSwp; + bool bHalf; + bool bThumb; + bool bFastMult; + bool bVFP; + bool bEDSP; + bool bThumbEE; + bool bNEON; + bool bVFPv3; + bool bTLS; + bool bVFPv4; + bool bIDIVa; + bool bIDIVt; + bool bArmV7; // enable MOVT, MOVW etc + + // ARMv8 specific + bool bFP; + bool bASIMD; // Call Detect() explicit CPUInfo(); diff --git a/Source/Core/Common/Src/Common.h b/Source/Core/Common/Src/Common.h index 0132915a5c..904095e255 100644 --- a/Source/Core/Common/Src/Common.h +++ b/Source/Core/Common/Src/Common.h @@ -133,7 +133,9 @@ private: // wxWidgets does not have a true dummy macro for this. #define _trans(a) a -#if defined __GNUC__ +#if defined _M_GENERIC +# define _M_SSE 0x0 +#elif defined __GNUC__ # if defined __SSE4_2__ # define _M_SSE 0x402 # elif defined __SSE4_1__ @@ -144,7 +146,7 @@ private: # define _M_SSE 0x300 # endif #elif (_MSC_VER >= 1500) || __INTEL_COMPILER // Visual Studio 2008 -# define _M_SSE 0x402 +# define _M_SSE 0x402 #endif // Host communication. diff --git a/Source/Core/Common/Src/CommonFuncs.h b/Source/Core/Common/Src/CommonFuncs.h index 585fe0999a..7c6bcdc703 100644 --- a/Source/Core/Common/Src/CommonFuncs.h +++ b/Source/Core/Common/Src/CommonFuncs.h @@ -35,7 +35,7 @@ template<> struct CompileTimeAssert {}; #define b32(x) (b16(x) | (b16(x) >>16) ) #define ROUND_UP_POW2(x) (b32(x - 1) + 1) -#if defined __GNUC__ && !defined __SSSE3__ +#if defined __GNUC__ && !defined __SSSE3__ && !defined _M_GENERIC #include static __inline __m128i __attribute__((__always_inline__)) _mm_shuffle_epi8(__m128i a, __m128i mask) @@ -60,6 +60,8 @@ _mm_shuffle_epi8(__m128i a, __m128i mask) // go to debugger mode #ifdef GEKKO #define Crash() + #elif defined _M_GENERIC + #define Crash() { exit(1); } #else #define Crash() {asm ("int $3");} #endif @@ -136,6 +138,15 @@ inline u8 swap8(u8 _data) {return _data;} inline u16 swap16(u16 _data) {return _byteswap_ushort(_data);} inline u32 swap32(u32 _data) {return _byteswap_ulong (_data);} inline u64 swap64(u64 _data) {return _byteswap_uint64(_data);} +#elif _M_ARM +#ifdef ANDROID +#undef swap16 +#undef swap32 +#undef swap64 +#endif +inline u16 swap16 (u16 _data) { u32 data = _data; __asm__ ("rev16 %0, %1\n" : "=l" (data) : "l" (data)); return (u16)data;} +inline u32 swap32 (u32 _data) {__asm__ ("rev %0, %1\n" : "=l" (_data) : "l" (_data)); return _data;} +inline u64 swap64(u64 _data) {return ((u64)swap32(_data) << 32) | swap32(_data >> 32);} #elif __linux__ inline u16 swap16(u16 _data) {return bswap_16(_data);} inline u32 swap32(u32 _data) {return bswap_32(_data);} @@ -161,7 +172,6 @@ inline u64 swap64(u64 data) {return ((u64)swap32(data) << 32) | swap32(data >> 3 inline u16 swap16(const u8* _pData) {return swap16(*(const u16*)_pData);} inline u32 swap32(const u8* _pData) {return swap32(*(const u32*)_pData);} inline u64 swap64(const u8* _pData) {return swap64(*(const u64*)_pData);} - } // Namespace Common #endif // _COMMONFUNCS_H_ diff --git a/Source/Core/Common/Src/CommonPaths.h b/Source/Core/Common/Src/CommonPaths.h index 9e14d7cf7e..cab7ce3762 100644 --- a/Source/Core/Common/Src/CommonPaths.h +++ b/Source/Core/Common/Src/CommonPaths.h @@ -36,6 +36,9 @@ // You can use the File::GetUserPath() util for this #define USERDATA_DIR "Contents/Resources/User" #define DOLPHIN_DATA_DIR "Library/Application Support/Dolphin" +#elif defined ANDROID + #define USERDATA_DIR "user" + #define DOLPHIN_DATA_DIR "/sdcard/dolphin-emu" #else #define USERDATA_DIR "user" #ifdef USER_DIR @@ -52,6 +55,8 @@ #define SYSDATA_DIR "Contents/Resources/Sys" #define SHARED_USER_DIR File::GetBundleDirectory() + \ DIR_SEP USERDATA_DIR DIR_SEP +#elif defined ANDROID + #define SYSDATA_DIR "/sdcard/dolphin-emu" #else #ifdef DATA_DIR #define SYSDATA_DIR DATA_DIR "sys" diff --git a/Source/Core/Common/Src/FPURoundMode.h b/Source/Core/Common/Src/FPURoundMode.h new file mode 100644 index 0000000000..acfd89c313 --- /dev/null +++ b/Source/Core/Common/Src/FPURoundMode.h @@ -0,0 +1,51 @@ +// Copyright (C) 2003 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ +#ifndef FPU_ROUND_MODE_H_ +#define FPU_ROUND_MODE_H_ +#include "Common.h" + +namespace FPURoundMode +{ + enum RoundModes + { + ROUND_NEAR = 0, + ROUND_CHOP, + ROUND_UP, + ROUND_DOWN + }; + enum PrecisionModes { + PREC_24 = 0, + PREC_53, + PREC_64 + }; + void SetRoundMode(u32 mode); + + void SetPrecisionMode(u32 mode); + + void SetSIMDMode(u32 mode); + + /* + There are two different flavors of float to int conversion: + _mm_cvtps_epi32() and _mm_cvttps_epi32(). The first rounds + according to the MXCSR rounding bits. The second one always + uses round towards zero. + */ + void SaveSIMDState(); + void LoadSIMDState(); + void LoadDefaultSIMDState(); +} +#endif diff --git a/Source/Core/Common/Src/FileUtil.cpp b/Source/Core/Common/Src/FileUtil.cpp index 491db88e41..f42c204396 100644 --- a/Source/Core/Common/Src/FileUtil.cpp +++ b/Source/Core/Common/Src/FileUtil.cpp @@ -668,9 +668,10 @@ std::string &GetUserPath(const unsigned int DirIDX, const std::string &newPath) if (File::Exists(ROOT_DIR DIR_SEP USERDATA_DIR)) paths[D_USER_IDX] = ROOT_DIR DIR_SEP USERDATA_DIR DIR_SEP; else - paths[D_USER_IDX] = std::string(getenv("HOME") ? getenv("HOME") : getenv("PWD")) + DIR_SEP DOLPHIN_DATA_DIR DIR_SEP; + paths[D_USER_IDX] = std::string(getenv("HOME") ? + getenv("HOME") : getenv("PWD") ? + getenv("PWD") : "") + DIR_SEP DOLPHIN_DATA_DIR DIR_SEP; #endif - INFO_LOG(COMMON, "GetUserPath: Setting user directory to %s:", paths[D_USER_IDX].c_str()); paths[D_GCUSER_IDX] = paths[D_USER_IDX] + GC_USER_DIR DIR_SEP; paths[D_WIIROOT_IDX] = paths[D_USER_IDX] + WII_USER_DIR; diff --git a/Source/Core/Common/Src/GenericFPURoundMode.cpp b/Source/Core/Common/Src/GenericFPURoundMode.cpp new file mode 100644 index 0000000000..cc878291a1 --- /dev/null +++ b/Source/Core/Common/Src/GenericFPURoundMode.cpp @@ -0,0 +1,41 @@ +// Copyright (C) 2003 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ + +#include "FPURoundMode.h" + +// Generic, do nothing +namespace FPURoundMode +{ + void SetRoundMode(u32 mode) + { + } + void SetPrecisionMode(u32 mode) + { + } + void SetSIMDMode(u32 mode) + { + } + void SaveSIMDState() + { + } + void LoadSIMDState() + { + } + void LoadDefaultSIMDState() + { + } +} diff --git a/Source/Core/Common/Src/LogManager.cpp b/Source/Core/Common/Src/LogManager.cpp index a79c413cf8..3beae08c15 100644 --- a/Source/Core/Common/Src/LogManager.cpp +++ b/Source/Core/Common/Src/LogManager.cpp @@ -17,6 +17,9 @@ #include +#ifdef ANDROID +#include "Host.h" +#endif #include "LogManager.h" #include "ConsoleListener.h" #include "Timer.h" @@ -132,7 +135,9 @@ void LogManager::Log(LogTypes::LOG_LEVELS level, LogTypes::LOG_TYPE type, Common::Timer::GetTimeFormatted().c_str(), file, line, level_to_char[(int)level], log->GetShortName(), temp); - +#ifdef ANDROID + Host_SysMessage(msg); +#endif log->Trigger(level, msg); } diff --git a/Source/Core/Common/Src/MathUtil.cpp b/Source/Core/Common/Src/MathUtil.cpp index e078db2e9d..b97d55e62c 100644 --- a/Source/Core/Common/Src/MathUtil.cpp +++ b/Source/Core/Common/Src/MathUtil.cpp @@ -21,13 +21,6 @@ #include #include -namespace { - -static u32 saved_sse_state = _mm_getcsr(); -static const u32 default_sse_state = _mm_getcsr(); - -} - namespace MathUtil { @@ -114,23 +107,6 @@ u32 ClassifyFloat(float fvalue) } // namespace -void LoadDefaultSSEState() -{ - _mm_setcsr(default_sse_state); -} - - -void LoadSSEState() -{ - _mm_setcsr(saved_sse_state); -} - - -void SaveSSEState() -{ - saved_sse_state = _mm_getcsr(); -} - inline void MatrixMul(int n, const float *a, const float *b, float *result) { for (int i = 0; i < n; ++i) diff --git a/Source/Core/Common/Src/MathUtil.h b/Source/Core/Common/Src/MathUtil.h index a6290ff602..114a91bf3c 100644 --- a/Source/Core/Common/Src/MathUtil.h +++ b/Source/Core/Common/Src/MathUtil.h @@ -20,8 +20,8 @@ #include "Common.h" -#include #include +#include "FPURoundMode.h" namespace MathUtil { @@ -147,17 +147,6 @@ struct Rectangle inline float pow2f(float x) {return x * x;} inline double pow2(double x) {return x * x;} - -/* - There are two different flavors of float to int conversion: - _mm_cvtps_epi32() and _mm_cvttps_epi32(). The first rounds - according to the MXCSR rounding bits. The second one always - uses round towards zero. - */ - -void SaveSSEState(); -void LoadSSEState(); -void LoadDefaultSSEState(); float MathFloatVectorSum(const std::vector&); #define ROUND_UP(x, a) (((x) + (a) - 1) & ~((a) - 1)) diff --git a/Source/Core/Common/Src/MemArena.cpp b/Source/Core/Common/Src/MemArena.cpp index 1dcdef6672..0a2b9e0306 100644 --- a/Source/Core/Common/Src/MemArena.cpp +++ b/Source/Core/Common/Src/MemArena.cpp @@ -27,6 +27,10 @@ #include #include #include +#ifdef ANDROID +#include +#include +#endif #endif #if defined(__APPLE__) @@ -34,11 +38,41 @@ static const char* ram_temp_file = "/tmp/gc_mem.tmp"; #elif !defined(_WIN32) // non OSX unixes static const char* ram_temp_file = "/dev/shm/gc_mem.tmp"; #endif +#ifdef ANDROID +#define ASHMEM_DEVICE "/dev/ashmem" + +int AshmemCreateFileMapping(const char *name, size_t size) +{ + int fd, ret; + fd = open(ASHMEM_DEVICE, O_RDWR); + if (fd < 0) + return fd; + + // We don't really care if we can't set the name, it is optional + ret = ioctl(fd, ASHMEM_SET_NAME, name); + + ret = ioctl(fd, ASHMEM_SET_SIZE, size); + if (ret < 0) + { + close(fd); + NOTICE_LOG(MEMMAP, "Ashmem returned error: 0x%08x", ret); + return ret; + } + return fd; +} +#endif void MemArena::GrabLowMemSpace(size_t size) { #ifdef _WIN32 hMemoryMapping = CreateFileMapping(INVALID_HANDLE_VALUE, NULL, PAGE_READWRITE, 0, (DWORD)(size), NULL); +#elif defined(ANDROID) + fd = AshmemCreateFileMapping("Dolphin-emu", size); + if (fd < 0) + { + NOTICE_LOG(MEMMAP, "Ashmem allocation failed"); + return; + } #else mode_t mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH; fd = open(ram_temp_file, O_RDWR | O_CREAT, mode); diff --git a/Source/Core/Common/Src/MemoryUtil.cpp b/Source/Core/Common/Src/MemoryUtil.cpp index 0888746d42..ff50347799 100644 --- a/Source/Core/Common/Src/MemoryUtil.cpp +++ b/Source/Core/Common/Src/MemoryUtil.cpp @@ -117,9 +117,12 @@ void* AllocateAlignedMemory(size_t size,size_t alignment) void* ptr = _aligned_malloc(size,alignment); #else void* ptr = NULL; +#ifdef ANDROID + ptr = memalign(alignment, size); +#else if (posix_memalign(&ptr, alignment, size) != 0) ERROR_LOG(MEMMAP, "Failed to allocate aligned memory"); -; +#endif #endif // printf("Mapped memory at %p (size %ld)\n", ptr, diff --git a/Source/Core/Common/Src/StdConditionVariable.h b/Source/Core/Common/Src/StdConditionVariable.h index 1b81766ee0..11de6536b4 100644 --- a/Source/Core/Common/Src/StdConditionVariable.h +++ b/Source/Core/Common/Src/StdConditionVariable.h @@ -5,7 +5,7 @@ #define GCC_VER(x,y,z) ((x) * 10000 + (y) * 100 + (z)) #define GCC_VERSION GCC_VER(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) -#if GCC_VERSION >= GCC_VER(4,4,0) && __GXX_EXPERIMENTAL_CXX0X__ +#if GCC_VERSION >= GCC_VER(4,4,0) && __GXX_EXPERIMENTAL_CXX0X__ && !ANDROID // GCC 4.4 provides #include #else diff --git a/Source/Core/Common/Src/StdMutex.h b/Source/Core/Common/Src/StdMutex.h index 8949e905ac..8a5d22f928 100644 --- a/Source/Core/Common/Src/StdMutex.h +++ b/Source/Core/Common/Src/StdMutex.h @@ -5,7 +5,7 @@ #define GCC_VER(x,y,z) ((x) * 10000 + (y) * 100 + (z)) #define GCC_VERSION GCC_VER(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) -#if GCC_VERSION >= GCC_VER(4,4,0) && __GXX_EXPERIMENTAL_CXX0X__ +#if GCC_VERSION >= GCC_VER(4,4,0) && __GXX_EXPERIMENTAL_CXX0X__ && !ANDROID // GCC 4.4 provides #include #else diff --git a/Source/Core/Common/Src/StdThread.h b/Source/Core/Common/Src/StdThread.h index fea058c874..6897235658 100644 --- a/Source/Core/Common/Src/StdThread.h +++ b/Source/Core/Common/Src/StdThread.h @@ -5,7 +5,7 @@ #define GCC_VER(x,y,z) ((x) * 10000 + (y) * 100 + (z)) #define GCC_VERSION GCC_VER(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) -#if GCC_VERSION >= GCC_VER(4,4,0) && __GXX_EXPERIMENTAL_CXX0X__ +#if GCC_VERSION >= GCC_VER(4,4,0) && __GXX_EXPERIMENTAL_CXX0X__ && !ANDROID // GCC 4.4 provides #ifndef _GLIBCXX_USE_SCHED_YIELD #define _GLIBCXX_USE_SCHED_YIELD diff --git a/Source/Core/Common/Src/StringUtil.cpp b/Source/Core/Common/Src/StringUtil.cpp index e5c70b2c6b..6ef3d91a73 100644 --- a/Source/Core/Common/Src/StringUtil.cpp +++ b/Source/Core/Common/Src/StringUtil.cpp @@ -245,7 +245,7 @@ std::string ReplaceAll(std::string result, const std::string& src, const std::st while(1) { const int pos = result.find(src); - if (pos == -1) break; + if (pos == 16) break; result.replace(pos, src.size(), dest); } return result; @@ -263,25 +263,25 @@ std::string ReplaceAll(std::string result, const std::string& src, const std::st const char HEX2DEC[256] = { /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ - /* 0 */ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, - /* 1 */ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, - /* 2 */ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, - /* 3 */ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1, -1,-1,-1,-1, + /* 0 */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16, + /* 1 */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16, + /* 2 */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16, + /* 3 */ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,16,16, 16,16,16,16, - /* 4 */ -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1, - /* 5 */ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, - /* 6 */ -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1, - /* 7 */ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + /* 4 */ 16,10,11,12, 13,14,15,16, 16,16,16,16, 16,16,16,16, + /* 5 */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16, + /* 6 */ 16,10,11,12, 13,14,15,16, 16,16,16,16, 16,16,16,16, + /* 7 */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16, - /* 8 */ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, - /* 9 */ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, - /* A */ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, - /* B */ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + /* 8 */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16, + /* 9 */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16, + /* A */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16, + /* B */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16, - /* C */ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, - /* D */ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, - /* E */ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, - /* F */ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1 + /* C */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16, + /* D */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16, + /* E */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16, + /* F */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16 }; std::string UriDecode(const std::string & sSrc) @@ -303,8 +303,8 @@ std::string UriDecode(const std::string & sSrc) if (*pSrc == '%') { char dec1, dec2; - if (-1 != (dec1 = HEX2DEC[*(pSrc + 1)]) - && -1 != (dec2 = HEX2DEC[*(pSrc + 2)])) + if (16 != (dec1 = HEX2DEC[*(pSrc + 1)]) + && 16 != (dec2 = HEX2DEC[*(pSrc + 2)])) { *pEnd++ = (dec1 << 4) + dec2; pSrc += 3; diff --git a/Source/Core/Common/Src/Thread.cpp b/Source/Core/Common/Src/Thread.cpp index 73f83c204f..3e8440b74f 100644 --- a/Source/Core/Common/Src/Thread.cpp +++ b/Source/Core/Common/Src/Thread.cpp @@ -105,7 +105,7 @@ void SetThreadAffinity(std::thread::native_handle_type thread, u32 mask) #ifdef __APPLE__ thread_policy_set(pthread_mach_thread_np(thread), THREAD_AFFINITY_POLICY, (integer_t *)&mask, 1); -#elif defined __linux__ || defined BSD4_4 +#elif (defined __linux__ || defined BSD4_4) && !(defined ANDROID) cpu_set_t cpu_set; CPU_ZERO(&cpu_set); diff --git a/Source/Core/Common/Src/Thread.h b/Source/Core/Common/Src/Thread.h index e711fcab5c..8b38be1de1 100644 --- a/Source/Core/Common/Src/Thread.h +++ b/Source/Core/Common/Src/Thread.h @@ -33,8 +33,6 @@ #define INFINITE 0xffffffff #endif -#include - //for gettimeofday and struct time(spec|val) #include #include diff --git a/Source/Core/Common/Src/ABI.cpp b/Source/Core/Common/Src/x64ABI.cpp similarity index 99% rename from Source/Core/Common/Src/ABI.cpp rename to Source/Core/Common/Src/x64ABI.cpp index 63a8e76934..cd2f410dbf 100644 --- a/Source/Core/Common/Src/ABI.cpp +++ b/Source/Core/Common/Src/x64ABI.cpp @@ -17,7 +17,7 @@ #include "Common.h" #include "x64Emitter.h" -#include "ABI.h" +#include "x64ABI.h" using namespace Gen; diff --git a/Source/Core/Common/Src/ABI.h b/Source/Core/Common/Src/x64ABI.h similarity index 100% rename from Source/Core/Common/Src/ABI.h rename to Source/Core/Common/Src/x64ABI.h diff --git a/Source/Core/Common/Src/CPUDetect.cpp b/Source/Core/Common/Src/x64CPUDetect.cpp similarity index 98% rename from Source/Core/Common/Src/CPUDetect.cpp rename to Source/Core/Common/Src/x64CPUDetect.cpp index 282929e7b2..2898b91bec 100644 --- a/Source/Core/Common/Src/CPUDetect.cpp +++ b/Source/Core/Common/Src/x64CPUDetect.cpp @@ -30,7 +30,9 @@ #else //#include +#ifndef _M_GENERIC #include +#endif #if defined __FreeBSD__ #include @@ -39,7 +41,9 @@ static inline void do_cpuid(unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) { -#ifdef _LP64 +#if defined _M_GENERIC + (*eax) = (*ebx) = (*ecx) = (*edx) = 0; +#elif defined _LP64 // Note: EBX is reserved on Mac OS X and in PIC on Linux, so it has to // restored at the end of the asm block. __asm__ ( diff --git a/Source/Core/Common/Src/x64Emitter.cpp b/Source/Core/Common/Src/x64Emitter.cpp index 5f8d0a66f1..9f15845875 100644 --- a/Source/Core/Common/Src/x64Emitter.cpp +++ b/Source/Core/Common/Src/x64Emitter.cpp @@ -17,7 +17,7 @@ #include "Common.h" #include "x64Emitter.h" -#include "ABI.h" +#include "x64ABI.h" #include "CPUDetect.h" namespace Gen diff --git a/Source/Core/Common/Src/x64Emitter.h b/Source/Core/Common/Src/x64Emitter.h index 3b699c81a0..6b8c59b5a5 100644 --- a/Source/Core/Common/Src/x64Emitter.h +++ b/Source/Core/Common/Src/x64Emitter.h @@ -757,7 +757,7 @@ public: region_size = 0; } - bool IsInCodeSpace(u8 *ptr) + bool IsInSpace(u8 *ptr) { return ptr >= region && ptr < region + region_size; } diff --git a/Source/Core/Common/Src/x64FPURoundMode.cpp b/Source/Core/Common/Src/x64FPURoundMode.cpp new file mode 100644 index 0000000000..0beefd860a --- /dev/null +++ b/Source/Core/Common/Src/x64FPURoundMode.cpp @@ -0,0 +1,120 @@ +// Copyright (C) 2003 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ + +#include "Common.h" +#include "FPURoundMode.h" + +#ifndef _WIN32 +static const unsigned short FPU_ROUND_NEAR = 0 << 10; +static const unsigned short FPU_ROUND_DOWN = 1 << 10; +static const unsigned short FPU_ROUND_UP = 2 << 10; +static const unsigned short FPU_ROUND_CHOP = 3 << 10; +static const unsigned short FPU_ROUND_MASK = 3 << 10; +#include +#endif + +const u32 MASKS = 0x1F80; // mask away the interrupts. +const u32 DAZ = 0x40; +const u32 FTZ = 0x8000; + +namespace FPURoundMode +{ + // Get the default SSE states here. + static u32 saved_sse_state = _mm_getcsr(); + static const u32 default_sse_state = _mm_getcsr(); + + void SetRoundMode(u32 mode) + { + // Set FPU rounding mode to mimic the PowerPC's + #ifdef _M_IX86 + // This shouldn't really be needed anymore since we use SSE + #ifdef _WIN32 + const int table[4] = + { + _RC_NEAR, + _RC_CHOP, + _RC_UP, + _RC_DOWN + }; + _set_controlfp(_MCW_RC, table[mode]); + #else + const unsigned short table[4] = + { + FPU_ROUND_NEAR, + FPU_ROUND_CHOP, + FPU_ROUND_UP, + FPU_ROUND_DOWN + }; + unsigned short _mode; + asm ("fstcw %0" : "=m" (_mode) : ); + _mode = (_mode & ~FPU_ROUND_MASK) | table[mode]; + asm ("fldcw %0" : : "m" (_mode)); + #endif + #endif + } + + void SetPrecisionMode(u32 mode) + { + #ifdef _M_IX86 + // sets the floating-point lib to 53-bit + // PowerPC has a 53bit floating pipeline only + // eg: sscanf is very sensitive + #ifdef _WIN32 + _control87(_PC_53, MCW_PC); + #else + const unsigned short table[4] = { + 0 << 8, // FPU_PREC_24 + 2 << 8, // FPU_PREC_53 + 3 << 8, // FPU_PREC_64 + 3 << 8, // FPU_PREC_MASK + }; + unsigned short _mode; + asm ("fstcw %0" : : "m" (_mode)); + _mode = (_mode & ~table[4]) | table[mode]; + asm ("fldcw %0" : : "m" (_mode)); + #endif + #else + //x64 doesn't need this - fpu is done with SSE + //but still - set any useful sse options here + #endif + } + void SetSIMDMode(u32 mode) + { + static const u32 ssetable[4] = + { + (0 << 13) | MASKS, + (3 << 13) | MASKS, + (2 << 13) | MASKS, + (1 << 13) | MASKS, + }; + u32 csr = ssetable[mode]; + _mm_setcsr(csr); + } + + void SaveSIMDState() + { + saved_sse_state = _mm_getcsr(); + } + void LoadSIMDState() + { + _mm_setcsr(saved_sse_state); + } + void LoadDefaultSIMDState() + { + _mm_setcsr(default_sse_state); + } +} diff --git a/Source/Core/Common/Src/Thunk.cpp b/Source/Core/Common/Src/x64Thunk.cpp similarity index 98% rename from Source/Core/Common/Src/Thunk.cpp rename to Source/Core/Common/Src/x64Thunk.cpp index 1792df3e77..b9fd54f354 100644 --- a/Source/Core/Common/Src/Thunk.cpp +++ b/Source/Core/Common/Src/x64Thunk.cpp @@ -18,9 +18,8 @@ #include #include "Common.h" -#include "x64Emitter.h" #include "MemoryUtil.h" -#include "ABI.h" +#include "x64ABI.h" #include "Thunk.h" #define THUNK_ARENA_SIZE 1024*1024*1 diff --git a/Source/Core/Core/CMakeLists.txt b/Source/Core/Core/CMakeLists.txt index 6ffeeaec3a..b8976f42ea 100644 --- a/Source/Core/Core/CMakeLists.txt +++ b/Source/Core/Core/CMakeLists.txt @@ -9,7 +9,6 @@ set(SRCS Src/ActionReplay.cpp Src/DSPEmulator.cpp Src/GeckoCodeConfig.cpp Src/GeckoCode.cpp - Src/MemTools.cpp Src/Movie.cpp Src/NetPlay.cpp Src/NetPlayClient.cpp @@ -153,6 +152,7 @@ set(SRCS Src/ActionReplay.cpp Src/PowerPC/PPCTables.cpp Src/PowerPC/Profiler.cpp Src/PowerPC/SignatureDB.cpp + Src/PowerPC/JitInterface.cpp Src/PowerPC/Interpreter/Interpreter_Branch.cpp Src/PowerPC/Interpreter/Interpreter.cpp Src/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp @@ -162,6 +162,15 @@ set(SRCS Src/ActionReplay.cpp Src/PowerPC/Interpreter/Interpreter_Paired.cpp Src/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp Src/PowerPC/Interpreter/Interpreter_Tables.cpp + Src/PowerPC/JitCommon/JitAsmCommon.cpp + Src/PowerPC/JitCommon/JitBackpatch.cpp + Src/PowerPC/JitCommon/JitBase.cpp + Src/PowerPC/JitCommon/JitCache.cpp + Src/PowerPC/JitCommon/Jit_Util.cpp) + +if(NOT _M_GENERIC) + set(SRCS ${SRCS} + Src/x64MemTools.cpp Src/PowerPC/Jit64IL/IR.cpp Src/PowerPC/Jit64IL/IR_X86.cpp Src/PowerPC/Jit64IL/JitILAsm.cpp @@ -186,12 +195,25 @@ set(SRCS Src/ActionReplay.cpp Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp Src/PowerPC/Jit64/Jit_Paired.cpp Src/PowerPC/Jit64/JitRegCache.cpp - Src/PowerPC/Jit64/Jit_SystemRegisters.cpp - Src/PowerPC/JitCommon/JitAsmCommon.cpp - Src/PowerPC/JitCommon/JitBackpatch.cpp - Src/PowerPC/JitCommon/JitBase.cpp - Src/PowerPC/JitCommon/JitCache.cpp - Src/PowerPC/JitCommon/Jit_Util.cpp) + Src/PowerPC/Jit64/Jit_SystemRegisters.cpp) +endif() +if(_M_ARM) + set(SRCS ${SRCS} + Src/ArmMemTools.cpp + Src/PowerPC/JitArm32/Jit.cpp + Src/PowerPC/JitArm32/JitAsm.cpp + Src/PowerPC/JitArm32/JitArm_BackPatch.cpp + Src/PowerPC/JitArm32/JitArm_Tables.cpp + Src/PowerPC/JitArm32/JitArmCache.cpp + Src/PowerPC/JitArm32/JitRegCache.cpp + Src/PowerPC/JitArm32/JitFPRCache.cpp + Src/PowerPC/JitArm32/JitArm_Branch.cpp + Src/PowerPC/JitArm32/JitArm_Integer.cpp + Src/PowerPC/JitArm32/JitArm_LoadStore.cpp + Src/PowerPC/JitArm32/JitArm_FloatingPoint.cpp + Src/PowerPC/JitArm32/JitArm_SystemRegisters.cpp + Src/PowerPC/JitArm32/JitArm_LoadStoreFloating.cpp) +endif() set(LIBS bdisasm inputcommon videosoftware sfml-network) diff --git a/Source/Core/Core/Core.vcxproj b/Source/Core/Core/Core.vcxproj index bd3d97f11b..1dd547b2ea 100644 --- a/Source/Core/Core/Core.vcxproj +++ b/Source/Core/Core/Core.vcxproj @@ -332,7 +332,7 @@ - + @@ -378,6 +378,7 @@ + @@ -387,10 +388,10 @@ - false - false - false - + false + false + false + Create Create @@ -563,6 +564,7 @@ + diff --git a/Source/Core/Core/Core.vcxproj.filters b/Source/Core/Core/Core.vcxproj.filters index 246dafd972..b4f3f47be7 100644 --- a/Source/Core/Core/Core.vcxproj.filters +++ b/Source/Core/Core/Core.vcxproj.filters @@ -5,9 +5,8 @@ - - + @@ -562,6 +561,9 @@ HW %28Flipper/Hollywood%29\GCMemcard + + PowerPC + @@ -1048,6 +1050,9 @@ HW %28Flipper/Hollywood%29\GCMemcard + + PowerPC + diff --git a/Source/Core/Core/Src/ArmMemTools.cpp b/Source/Core/Core/Src/ArmMemTools.cpp new file mode 100644 index 0000000000..34a0c34b7d --- /dev/null +++ b/Source/Core/Core/Src/ArmMemTools.cpp @@ -0,0 +1,103 @@ +// Copyright (C) 2003 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ + + +#include +#include +#ifdef ANDROID +#include +#else +#include // Look in here for the context definition. +#include +#endif + +#include "Common.h" +#include "MemTools.h" +#include "HW/Memmap.h" +#include "PowerPC/PowerPC.h" +#include "PowerPC/JitInterface.h" +#include "PowerPC/JitCommon/JitBase.h" + +namespace EMM +{ +#ifdef ANDROID +typedef struct sigcontext mcontext_t; +typedef struct ucontext { + uint32_t uc_flags; + struct ucontext* uc_link; + stack_t uc_stack; + mcontext_t uc_mcontext; + // Other fields are not used by Google Breakpad. Don't define them. +} ucontext_t; +#endif + +void sigsegv_handler(int signal, siginfo_t *info, void *raw_context) +{ + if (signal != SIGSEGV) + { + // We are not interested in other signals - handle it as usual. + return; + } + ucontext_t *context = (ucontext_t *)raw_context; + int sicode = info->si_code; + if (sicode != SEGV_MAPERR && sicode != SEGV_ACCERR) + { + // Huh? Return. + return; + } + + + // Get all the information we can out of the context. + mcontext_t *ctx = &context->uc_mcontext; + + void *fault_memory_ptr = (void*)ctx->arm_r10; + u8 *fault_instruction_ptr = (u8 *)ctx->arm_pc; + + if (!JitInterface::IsInCodeSpace(fault_instruction_ptr)) { + // Let's not prevent debugging. + return; + } + + u64 bad_address = (u64)fault_memory_ptr; + u64 memspace_bottom = (u64)Memory::base; + if (bad_address < memspace_bottom) { + PanicAlertT("Exception handler - access below memory space. %08llx%08llx", + bad_address >> 32, bad_address); + } + + u32 em_address = (u32)(bad_address - memspace_bottom); + + int access_type = 0; + + CONTEXT fake_ctx; + fake_ctx.reg_pc = ctx->arm_pc; + const u8 *new_rip = jit->BackPatch(fault_instruction_ptr, access_type, em_address, &fake_ctx); + if (new_rip) { + ctx->arm_pc = fake_ctx.reg_pc; + } +} + +void InstallExceptionHandler() +{ + struct sigaction sa; + sa.sa_handler = 0; + sa.sa_sigaction = &sigsegv_handler; + sa.sa_flags = SA_SIGINFO; + sigemptyset(&sa.sa_mask); + sigaction(SIGSEGV, &sa, NULL); +} +} // namespace diff --git a/Source/Core/Core/Src/ConfigManager.h b/Source/Core/Core/Src/ConfigManager.h index 0c4ceffa9f..4a36b37335 100644 --- a/Source/Core/Core/Src/ConfigManager.h +++ b/Source/Core/Core/Src/ConfigManager.h @@ -35,7 +35,7 @@ #define BACKEND_OPENAL "OpenAL" #define BACKEND_PULSEAUDIO "Pulse" #define BACKEND_XAUDIO2 "XAudio2" - +#define BACKEND_OPENSLES "OpenSLES" struct SConfig : NonCopyable { // Wii Devices diff --git a/Source/Core/Core/Src/Core.cpp b/Source/Core/Core/Src/Core.cpp index a6c07eeaaa..f7337def45 100644 --- a/Source/Core/Core/Src/Core.cpp +++ b/Source/Core/Core/Src/Core.cpp @@ -54,7 +54,6 @@ #include "IPC_HLE/WII_IPC_HLE_Device_usb.h" #include "PowerPC/PowerPC.h" -#include "PowerPC/JitCommon/JitBase.h" #include "DSPEmulator.h" #include "ConfigManager.h" @@ -140,7 +139,7 @@ void DisplayMessage(const char *message, int time_in_ms) if (_CoreParameter.bRenderToMain && SConfig::GetInstance().m_InterfaceStatusbar) { - Host_UpdateStatusBar(message); + Host_UpdateStatusBar(message); } else Host_UpdateTitle(message); @@ -189,7 +188,7 @@ bool IsGPUThread() return IsCPUThread(); } } - + // This is called from the GUI thread. See the booting call schedule in // BootManager.cpp bool Init() @@ -310,7 +309,7 @@ void CpuThread() g_video_backend->Video_Prepare(); } - #if defined(_M_X64) + #if defined(_M_X64) || _M_ARM EMM::InstallExceptionHandler(); // Let's run under memory watch #endif diff --git a/Source/Core/Core/Src/DSP/DSPEmitter.cpp b/Source/Core/Core/Src/DSP/DSPEmitter.cpp index 2ce441bf31..12dac52e31 100644 --- a/Source/Core/Core/Src/DSP/DSPEmitter.cpp +++ b/Source/Core/Core/Src/DSP/DSPEmitter.cpp @@ -25,7 +25,7 @@ #include "DSPAnalyzer.h" #include "Jit/DSPJitUtil.h" #include "x64Emitter.h" -#include "ABI.h" +#include "x64ABI.h" #define MAX_BLOCK_SIZE 250 #define DSP_IDLE_SKIP_CYCLES 0x1000 diff --git a/Source/Core/Core/Src/DSP/DSPHWInterface.cpp b/Source/Core/Core/Src/DSP/DSPHWInterface.cpp index f2cbbc2b9f..e4c81e4a7a 100644 --- a/Source/Core/Core/Src/DSP/DSPHWInterface.cpp +++ b/Source/Core/Core/Src/DSP/DSPHWInterface.cpp @@ -252,7 +252,9 @@ static void gdsp_idma_out(u16 dsp_addr, u32 addr, u32 size) ERROR_LOG(DSPLLE, "*** idma_out IRAM_DSP (0x%04x) -> RAM (0x%08x) : size (0x%08x)", dsp_addr / 2, addr, size); } +#if _M_SSE >= 0x301 static const __m128i s_mask = _mm_set_epi32(0x0E0F0C0DL, 0x0A0B0809L, 0x06070405L, 0x02030001L); +#endif // TODO: These should eat clock cycles. static void gdsp_ddma_in(u16 dsp_addr, u32 addr, u32 size) diff --git a/Source/Core/Core/Src/DSP/Jit/DSPJitArithmetic.cpp b/Source/Core/Core/Src/DSP/Jit/DSPJitArithmetic.cpp index 0b75431de6..185ada8ba3 100644 --- a/Source/Core/Core/Src/DSP/Jit/DSPJitArithmetic.cpp +++ b/Source/Core/Core/Src/DSP/Jit/DSPJitArithmetic.cpp @@ -25,7 +25,7 @@ #include "DSPJitUtil.h" #endif #include "x64Emitter.h" -#include "ABI.h" +#include "x64ABI.h" using namespace Gen; // CLR $acR diff --git a/Source/Core/Core/Src/DSP/Jit/DSPJitBranch.cpp b/Source/Core/Core/Src/DSP/Jit/DSPJitBranch.cpp index 8ca770e35d..252550b84c 100644 --- a/Source/Core/Core/Src/DSP/Jit/DSPJitBranch.cpp +++ b/Source/Core/Core/Src/DSP/Jit/DSPJitBranch.cpp @@ -21,7 +21,7 @@ #include "../DSPAnalyzer.h" #include "DSPJitUtil.h" #include "x64Emitter.h" -#include "ABI.h" +#include "x64ABI.h" using namespace Gen; diff --git a/Source/Core/Core/Src/DSP/Jit/DSPJitCCUtil.cpp b/Source/Core/Core/Src/DSP/Jit/DSPJitCCUtil.cpp index e382b6114a..be7675378d 100644 --- a/Source/Core/Core/Src/DSP/Jit/DSPJitCCUtil.cpp +++ b/Source/Core/Core/Src/DSP/Jit/DSPJitCCUtil.cpp @@ -24,7 +24,7 @@ #include "../DSPEmitter.h" #include "DSPJitUtil.h" #include "x64Emitter.h" -#include "ABI.h" +#include "x64ABI.h" using namespace Gen; // In: RAX: s64 _Value diff --git a/Source/Core/Core/Src/DSP/Jit/DSPJitExtOps.cpp b/Source/Core/Core/Src/DSP/Jit/DSPJitExtOps.cpp index 8203f7cde1..40e39a89e9 100644 --- a/Source/Core/Core/Src/DSP/Jit/DSPJitExtOps.cpp +++ b/Source/Core/Core/Src/DSP/Jit/DSPJitExtOps.cpp @@ -18,7 +18,7 @@ #include "../DSPEmitter.h" #include "DSPJitUtil.h" #include "x64Emitter.h" -#include "ABI.h" +#include "x64ABI.h" using namespace Gen; diff --git a/Source/Core/Core/Src/DSP/Jit/DSPJitLoadStore.cpp b/Source/Core/Core/Src/DSP/Jit/DSPJitLoadStore.cpp index a784e2dd2d..248ed8f945 100644 --- a/Source/Core/Core/Src/DSP/Jit/DSPJitLoadStore.cpp +++ b/Source/Core/Core/Src/DSP/Jit/DSPJitLoadStore.cpp @@ -22,7 +22,7 @@ #include "../DSPEmitter.h" #include "DSPJitUtil.h" #include "x64Emitter.h" -#include "ABI.h" +#include "x64ABI.h" using namespace Gen; // SRS @M, $(0x18+S) diff --git a/Source/Core/Core/Src/DSP/Jit/DSPJitMisc.cpp b/Source/Core/Core/Src/DSP/Jit/DSPJitMisc.cpp index 8d8a635536..d0bf7a3822 100644 --- a/Source/Core/Core/Src/DSP/Jit/DSPJitMisc.cpp +++ b/Source/Core/Core/Src/DSP/Jit/DSPJitMisc.cpp @@ -20,7 +20,7 @@ #include "../DSPEmitter.h" #include "DSPJitUtil.h" #include "x64Emitter.h" -#include "ABI.h" +#include "x64ABI.h" using namespace Gen; //clobbers: diff --git a/Source/Core/Core/Src/DSP/Jit/DSPJitMultiplier.cpp b/Source/Core/Core/Src/DSP/Jit/DSPJitMultiplier.cpp index 1323de002d..0dcbf41877 100644 --- a/Source/Core/Core/Src/DSP/Jit/DSPJitMultiplier.cpp +++ b/Source/Core/Core/Src/DSP/Jit/DSPJitMultiplier.cpp @@ -27,7 +27,7 @@ #include "DSPJitUtil.h" #endif #include "x64Emitter.h" -#include "ABI.h" +#include "x64ABI.h" using namespace Gen; // Returns s64 in RAX diff --git a/Source/Core/Core/Src/DSP/Jit/DSPJitUtil.cpp b/Source/Core/Core/Src/DSP/Jit/DSPJitUtil.cpp index b9e166f698..2fb18f83da 100644 --- a/Source/Core/Core/Src/DSP/Jit/DSPJitUtil.cpp +++ b/Source/Core/Core/Src/DSP/Jit/DSPJitUtil.cpp @@ -20,7 +20,7 @@ #include "../DSPEmitter.h" #include "DSPJitUtil.h" #include "x64Emitter.h" -#include "ABI.h" +#include "x64ABI.h" using namespace Gen; diff --git a/Source/Core/Core/Src/HW/Memmap.cpp b/Source/Core/Core/Src/HW/Memmap.cpp index 88f6b2c358..6a6fb14212 100644 --- a/Source/Core/Core/Src/HW/Memmap.cpp +++ b/Source/Core/Core/Src/HW/Memmap.cpp @@ -419,132 +419,6 @@ u32 Read_Instruction(const u32 em_address) return inst.hex; } -u32 Read_Opcode_JIT_Uncached(const u32 _Address) -{ - u8* iCache; - u32 addr; - if (_Address & JIT_ICACHE_VMEM_BIT) - { - iCache = jit->GetBlockCache()->GetICacheVMEM(); - addr = _Address & JIT_ICACHE_MASK; - } - else if (_Address & JIT_ICACHE_EXRAM_BIT) - { - iCache = jit->GetBlockCache()->GetICacheEx(); - addr = _Address & JIT_ICACHEEX_MASK; - } - else - { - iCache = jit->GetBlockCache()->GetICache(); - addr = _Address & JIT_ICACHE_MASK; - } - u32 inst = *(u32*)(iCache + addr); - if (inst == JIT_ICACHE_INVALID_WORD) - { - u32 cache_block_start = addr & ~0x1f; - u32 mem_block_start = _Address & ~0x1f; - u8 *pMem = Memory::GetPointer(mem_block_start); - memcpy(iCache + cache_block_start, pMem, 32); - inst = *(u32*)(iCache + addr); - } - inst = Common::swap32(inst); - - if ((inst & 0xfc000000) == 0) - { - inst = jit->GetBlockCache()->GetOriginalFirstOp(inst); - } - - return inst; -} - -u32 Read_Opcode_JIT(u32 _Address) -{ -#ifdef FAST_ICACHE - if (bMMU && !bFakeVMEM && (_Address & ADDR_MASK_MEM1)) - { - _Address = Memory::TranslateAddress(_Address, FLAG_OPCODE); - if (_Address == 0) - { - return 0; - } - } - u32 inst = 0; - - // Bypass the icache for the external interrupt exception handler - if ( (_Address & 0x0FFFFF00) == 0x00000500 ) - inst = Read_Opcode_JIT_Uncached(_Address); - else - inst = PowerPC::ppcState.iCache.ReadInstruction(_Address); -#else - u32 inst = Memory::ReadUnchecked_U32(_Address); -#endif - return inst; -} - -// The following function is deprecated in favour of FAST_ICACHE -u32 Read_Opcode_JIT_LC(const u32 _Address) -{ -#ifdef JIT_UNLIMITED_ICACHE - if ((_Address & ~JIT_ICACHE_MASK) != 0x80000000 && (_Address & ~JIT_ICACHE_MASK) != 0x00000000 && - (_Address & ~JIT_ICACHE_MASK) != 0x7e000000 && // TLB area - (_Address & ~JIT_ICACHEEX_MASK) != 0x90000000 && (_Address & ~JIT_ICACHEEX_MASK) != 0x10000000) - { - PanicAlertT("iCacheJIT: Reading Opcode from %x. Please report.", _Address); - ERROR_LOG(MEMMAP, "iCacheJIT: Reading Opcode from %x. Please report.", _Address); - return 0; - } - u8* iCache; - u32 addr; - if (_Address & JIT_ICACHE_VMEM_BIT) - { - iCache = jit->GetBlockCache()->GetICacheVMEM(); - addr = _Address & JIT_ICACHE_MASK; - } - else if (_Address & JIT_ICACHE_EXRAM_BIT) - { - iCache = jit->GetBlockCache()->GetICacheEx(); - addr = _Address & JIT_ICACHEEX_MASK; - } - else - { - iCache = jit->GetBlockCache()->GetICache(); - addr = _Address & JIT_ICACHE_MASK; - } - u32 inst = *(u32*)(iCache + addr); - if (inst == JIT_ICACHE_INVALID_WORD) - inst = Memory::ReadUnchecked_U32(_Address); - else - inst = Common::swap32(inst); -#else - u32 inst = Memory::ReadUnchecked_U32(_Address); -#endif - if ((inst & 0xfc000000) == 0) - { - inst = jit->GetBlockCache()->GetOriginalFirstOp(inst); - } - return inst; -} - -// WARNING! No checks! -// We assume that _Address is cached -void Write_Opcode_JIT(const u32 _Address, const u32 _Value) -{ -#ifdef JIT_UNLIMITED_ICACHE - if (_Address & JIT_ICACHE_VMEM_BIT) - { - *(u32*)(jit->GetBlockCache()->GetICacheVMEM() + (_Address & JIT_ICACHE_MASK)) = Common::swap32(_Value); - } - else if (_Address & JIT_ICACHE_EXRAM_BIT) - { - *(u32*)(jit->GetBlockCache()->GetICacheEx() + (_Address & JIT_ICACHEEX_MASK)) = Common::swap32(_Value); - } - else - *(u32*)(jit->GetBlockCache()->GetICache() + (_Address & JIT_ICACHE_MASK)) = Common::swap32(_Value); -#else - Memory::WriteUnchecked_U32(_Value, _Address); -#endif -} - void WriteBigEData(const u8 *_pData, const u32 _Address, const u32 _iSize) { memcpy(GetPointer(_Address), _pData, _iSize); diff --git a/Source/Core/Core/Src/HW/Memmap.h b/Source/Core/Core/Src/HW/Memmap.h index 42ceb737ca..8662804185 100644 --- a/Source/Core/Core/Src/HW/Memmap.h +++ b/Source/Core/Core/Src/HW/Memmap.h @@ -119,11 +119,6 @@ inline u32 ReadFast32(const u32 _Address) // used by interpreter to read instructions, uses iCache u32 Read_Opcode(const u32 _Address); -// used by JIT to read instructions -u32 Read_Opcode_JIT(const u32 _Address); -// used by JIT. uses iCacheJIT. Reads in the "Locked cache" mode -u32 Read_Opcode_JIT_LC(const u32 _Address); -void Write_Opcode_JIT(const u32 _Address, const u32 _Value); // this is used by Debugger a lot. // For now, just reads from memory! u32 Read_Instruction(const u32 _Address); diff --git a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp index 1d6ea2e70d..ffaea0e868 100644 --- a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp +++ b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp @@ -28,8 +28,6 @@ #undef _interlockedbittestandreset #undef _interlockedbittestandset64 #undef _interlockedbittestandreset64 -#else -#include #endif #include "../../Core.h" @@ -514,4 +512,4 @@ void Interpreter::fsqrtx(UGeckoInstruction _inst) rPS0(_inst.FD) = sqrt(b); UpdateFPRF(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); -} \ No newline at end of file +} diff --git a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_LoadStore.cpp b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_LoadStore.cpp index 5e665eb38e..8e406dbfab 100644 --- a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_LoadStore.cpp +++ b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_LoadStore.cpp @@ -24,8 +24,7 @@ #include "Interpreter.h" #include "../../Core.h" -#include "../JitCommon/JitBase.h" -#include "../JitCommon/JitCache.h" +#include "../JitInterface.h" #include "Interpreter_FPUtils.h" @@ -363,34 +362,24 @@ void Interpreter::dcbf(UGeckoInstruction _inst) { NPC = PC + 12; }*/ - // Invalidate the jit block cache on dcbf - if (jit) - { u32 address = Helper_Get_EA_X(_inst); - jit->GetBlockCache()->InvalidateICache(address & ~0x1f, 32); - } + JitInterface::InvalidateICache(address & ~0x1f, 32); } void Interpreter::dcbi(UGeckoInstruction _inst) { // Removes a block from data cache. Since we don't emulate the data cache, we don't need to do anything to the data cache // However, we invalidate the jit block cache on dcbi - if (jit) - { u32 address = Helper_Get_EA_X(_inst); - jit->GetBlockCache()->InvalidateICache(address & ~0x1f, 32); - } + JitInterface::InvalidateICache(address & ~0x1f, 32); } void Interpreter::dcbst(UGeckoInstruction _inst) { // Cache line flush. Since we don't emulate the data cache, we don't need to do anything. // Invalidate the jit block cache on dcbst in case new code has been loaded via the data cache - if (jit) - { u32 address = Helper_Get_EA_X(_inst); - jit->GetBlockCache()->InvalidateICache(address & ~0x1f, 32); - } + JitInterface::InvalidateICache(address & ~0x1f, 32); } void Interpreter::dcbt(UGeckoInstruction _inst) @@ -409,7 +398,7 @@ void Interpreter::dcbz(UGeckoInstruction _inst) // HACK but works... we think if (!Core::g_CoreStartupParameter.bDCBZOFF) Memory::Memset(Helper_Get_EA_X(_inst) & (~31), 0, 32); - if (!jit) + if (!JitInterface::GetCore()) PowerPC::CheckExceptions(); } diff --git a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp index 5a199b56c8..ff65f52471 100644 --- a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp +++ b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp @@ -26,13 +26,6 @@ #undef _interlockedbittestandreset #undef _interlockedbittestandset64 #undef _interlockedbittestandreset64 -#else -static const unsigned short FPU_ROUND_NEAR = 0 << 10; -static const unsigned short FPU_ROUND_DOWN = 1 << 10; -static const unsigned short FPU_ROUND_UP = 2 << 10; -static const unsigned short FPU_ROUND_CHOP = 3 << 10; -static const unsigned short FPU_ROUND_MASK = 3 << 10; -#include #endif #include "CPUDetect.h" @@ -43,6 +36,7 @@ static const unsigned short FPU_ROUND_MASK = 3 << 10; #include "../../HW/SystemTimers.h" #include "../../Core.h" #include "Interpreter.h" +#include "FPURoundMode.h" #include "Interpreter_FPUtils.h" @@ -61,38 +55,11 @@ mffsx: 80036650 (huh?) // That is, set rounding mode etc when entering jit code or the interpreter loop // Restore rounding mode when calling anything external -const u32 MASKS = 0x1F80; // mask away the interrupts. -const u32 DAZ = 0x40; -const u32 FTZ = 0x8000; - static void FPSCRtoFPUSettings(UReg_FPSCR fp) { - // Set FPU rounding mode to mimic the PowerPC's -#ifdef _M_IX86 - // This shouldn't really be needed anymore since we use SSE -#ifdef _WIN32 - const int table[4] = - { - _RC_NEAR, - _RC_CHOP, - _RC_UP, - _RC_DOWN - }; - _set_controlfp(_MCW_RC, table[fp.RN]); -#else - const unsigned short table[4] = - { - FPU_ROUND_NEAR, - FPU_ROUND_CHOP, - FPU_ROUND_UP, - FPU_ROUND_DOWN - }; - unsigned short mode; - asm ("fstcw %0" : "=m" (mode) : ); - mode = (mode & ~FPU_ROUND_MASK) | table[fp.RN]; - asm ("fldcw %0" : : "m" (mode)); -#endif -#endif + + FPURoundMode::SetRoundMode(fp.RN); + if (fp.VE || fp.OE || fp.UE || fp.ZE || fp.XE) { //PanicAlert("FPSCR - exceptions enabled. Please report. VE=%i OE=%i UE=%i ZE=%i XE=%i", @@ -101,14 +68,6 @@ static void FPSCRtoFPUSettings(UReg_FPSCR fp) } // Also corresponding SSE rounding mode setting - static const u32 ssetable[4] = - { - (0 << 13) | MASKS, - (3 << 13) | MASKS, - (2 << 13) | MASKS, - (1 << 13) | MASKS, - }; - u32 csr = ssetable[FPSCR.RN]; if (FPSCR.NI) { // Either one of these two breaks Beyond Good & Evil. @@ -116,7 +75,7 @@ static void FPSCRtoFPUSettings(UReg_FPSCR fp) // csr |= DAZ; // csr |= FTZ; } - _mm_setcsr(csr); + FPURoundMode::SetSIMDMode(FPSCR.RN); } void Interpreter::mtfsb0x(UGeckoInstruction _inst) diff --git a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_Tables.cpp b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_Tables.cpp index 07219218a2..fee8ed70d8 100644 --- a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_Tables.cpp +++ b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_Tables.cpp @@ -267,7 +267,7 @@ static GekkoOPTemplate table31[] = {19, Interpreter::mfcr, {"mfcr", OPTYPE_SYSTEM, FL_OUT_D, 0, 0, 0, 0}}, {83, Interpreter::mfmsr, {"mfmsr", OPTYPE_SYSTEM, FL_OUT_D, 0, 0, 0, 0}}, {144, Interpreter::mtcrf, {"mtcrf", OPTYPE_SYSTEM, 0, 0, 0, 0, 0}}, - {146, Interpreter::mtmsr, {"mtmsr", OPTYPE_SYSTEM, FL_ENDBLOCK, 0, 0, 0, 0}}, + {146, Interpreter::mtmsr, {"mtmsr", OPTYPE_SYSTEM, FL_IN_S | FL_ENDBLOCK, 0, 0, 0, 0}}, {210, Interpreter::mtsr, {"mtsr", OPTYPE_SYSTEM, 0, 0, 0, 0, 0}}, {242, Interpreter::mtsrin, {"mtsrin", OPTYPE_SYSTEM, 0, 0, 0, 0, 0}}, {339, Interpreter::mfspr, {"mfspr", OPTYPE_SPR, FL_OUT_D, 0, 0, 0, 0}}, diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp index 900a8475f7..710984571e 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp @@ -24,7 +24,7 @@ #include "Common.h" #include "x64Emitter.h" -#include "ABI.h" +#include "x64ABI.h" #include "Thunk.h" #include "../../HLE/HLE.h" #include "../../Core.h" @@ -357,7 +357,7 @@ void Jit64::WriteExternalExceptionExit() MOV(32, R(EAX), M(&PC)); MOV(32, M(&NPC), R(EAX)); ABI_CallFunction(reinterpret_cast(&PowerPC::CheckExternalExceptions)); - SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount)); + SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount)); JMP(asm_routines.dispatcher, true); } diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit.h b/Source/Core/Core/Src/PowerPC/Jit64/Jit.h index 1ebd476134..1d856d6291 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit.h @@ -62,7 +62,7 @@ if (js.memcheck) \ SetJumpTarget(memException); -class Jit64 : public JitBase +class Jit64 : public Jitx86Base { private: GPRRegCache gpr; diff --git a/Source/Core/Core/Src/PowerPC/Jit64/JitAsm.cpp b/Source/Core/Core/Src/PowerPC/Jit64/JitAsm.cpp index f7082cd1cc..df27a3a641 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/JitAsm.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/JitAsm.cpp @@ -15,7 +15,7 @@ // Official SVN repository and contact information can be found at // http://code.google.com/p/dolphin-emu/ -#include "ABI.h" +#include "x64ABI.h" #include "x64Emitter.h" #include "../../HW/Memmap.h" @@ -24,7 +24,7 @@ #include "../../CoreTiming.h" #include "MemoryUtil.h" -#include "ABI.h" +#include "x64ABI.h" #include "Jit.h" #include "../JitCommon/JitCache.h" @@ -204,7 +204,7 @@ void Jit64AsmRoutineManager::Generate() MOV(32, M(&NPC), R(EAX)); ABI_CallFunction(reinterpret_cast(&PowerPC::CheckExternalExceptions)); SetJumpTarget(noExtException); - + TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF)); J_CC(CC_Z, outerLoop, true); diff --git a/Source/Core/Core/Src/PowerPC/Jit64/JitAsm.h b/Source/Core/Core/Src/PowerPC/Jit64/JitAsm.h index e8df0877f7..19e1f41ba8 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/JitAsm.h +++ b/Source/Core/Core/Src/PowerPC/Jit64/JitAsm.h @@ -18,7 +18,6 @@ #ifndef _JIT64ASM_H #define _JIT64ASM_H -#include "x64Emitter.h" #include "../JitCommon/JitAsmCommon.h" // In Dolphin, we don't use inline assembly. Instead, we generate all machine-near diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp index 04e4d09cc9..7c1d60ad09 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp @@ -28,7 +28,7 @@ #include "../../HW/Memmap.h" #include "../PPCTables.h" #include "x64Emitter.h" -#include "ABI.h" +#include "x64ABI.h" #include "Jit.h" #include "JitAsm.h" diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp index 9f61e097b8..5766eb3177 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp @@ -27,7 +27,7 @@ #include "../PPCTables.h" #include "CPUDetect.h" #include "x64Emitter.h" -#include "ABI.h" +#include "x64ABI.h" #include "Jit.h" #include "JitAsm.h" diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp index 3b14179cd4..924758ab3a 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp @@ -28,7 +28,7 @@ #include "../PPCTables.h" #include "CPUDetect.h" #include "x64Emitter.h" -#include "ABI.h" +#include "x64ABI.h" #include "Jit.h" #include "JitAsm.h" diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_SystemRegisters.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_SystemRegisters.cpp index 14827129f8..0b6c0462d9 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_SystemRegisters.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_SystemRegisters.cpp @@ -23,7 +23,7 @@ #include "../PowerPC.h" #include "../PPCTables.h" #include "x64Emitter.h" -#include "ABI.h" +#include "x64ABI.h" #include "Thunk.h" #include "Jit.h" diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/IR_X86.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/IR_X86.cpp index ce47e2c3ac..3638ba8da6 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/IR_X86.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/IR_X86.cpp @@ -1991,8 +1991,10 @@ void JitIL::WriteCode() { } void ProfiledReJit() { - jit->SetCodePtr(jit->js.rewriteStart); - DoWriteCode(&((JitIL *)jit)->ibuild, (JitIL *)jit, true, false); - jit->js.curBlock->codeSize = (int)(jit->GetCodePtr() - jit->js.rewriteStart); - jit->GetBlockCache()->FinalizeBlock(jit->js.curBlock->blockNum, jit->jo.enableBlocklink, jit->js.curBlock->normalEntry); + JitIL *jitil = (JitIL *)jit; + jitil->SetCodePtr(jitil->js.rewriteStart); + DoWriteCode(&jitil->ibuild, jitil, true, false); + jitil->js.curBlock->codeSize = (int)(jitil->GetCodePtr() - jitil->js.rewriteStart); + jitil->GetBlockCache()->FinalizeBlock(jitil->js.curBlock->blockNum, jitil->jo.enableBlocklink, + jitil->js.curBlock->normalEntry); } diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.cpp index d56c9ffee8..dc21228858 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.cpp @@ -19,7 +19,7 @@ #include "Common.h" #include "x64Emitter.h" -#include "ABI.h" +#include "x64ABI.h" #include "Thunk.h" #include "../../HLE/HLE.h" #include "../../Core.h" diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.h b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.h index fdda098e44..8c579fa4bd 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.h +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.h @@ -57,7 +57,7 @@ #define DISABLE64 #endif -class JitIL : public JitBase +class JitIL : public Jitx86Base { private: diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/JitILAsm.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/JitILAsm.cpp index 74c290aa9c..aaa155072e 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/JitILAsm.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/JitILAsm.cpp @@ -15,7 +15,7 @@ // Official SVN repository and contact information can be found at // http://code.google.com/p/dolphin-emu/ -#include "ABI.h" +#include "x64ABI.h" #include "x64Emitter.h" #include "../../HW/Memmap.h" @@ -25,7 +25,7 @@ #include "MemoryUtil.h" #include "CPUDetect.h" -#include "ABI.h" +#include "x64ABI.h" #include "Thunk.h" #include "../../HW/GPFifo.h" @@ -211,14 +211,14 @@ void JitILAsmRoutineManager::Generate() doTiming = GetCodePtr(); ABI_CallFunction(reinterpret_cast(&CoreTiming::Advance)); - + testExceptions = GetCodePtr(); MOV(32, R(EAX), M(&PC)); MOV(32, M(&NPC), R(EAX)); ABI_CallFunction(reinterpret_cast(&PowerPC::CheckExceptions)); MOV(32, R(EAX), M(&NPC)); MOV(32, M(&PC), R(EAX)); - + TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF)); J_CC(CC_Z, outer_loop, true); //Landing pad for drec space diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_LoadStore.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_LoadStore.cpp index 45ceda694a..96902434a6 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_LoadStore.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_LoadStore.cpp @@ -27,7 +27,7 @@ #include "../../HW/Memmap.h" #include "../PPCTables.h" #include "x64Emitter.h" -#include "ABI.h" +#include "x64ABI.h" #include "JitIL.h" #include "JitILAsm.h" diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_LoadStoreFloating.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_LoadStoreFloating.cpp index e06d289cc4..c74abfd8e9 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_LoadStoreFloating.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_LoadStoreFloating.cpp @@ -27,7 +27,7 @@ #include "../PPCTables.h" #include "CPUDetect.h" #include "x64Emitter.h" -#include "ABI.h" +#include "x64ABI.h" #include "JitIL.h" #include "JitILAsm.h" diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_LoadStorePaired.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_LoadStorePaired.cpp index 2f0a0762fd..6e86804bec 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_LoadStorePaired.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_LoadStorePaired.cpp @@ -25,7 +25,7 @@ #include "../PPCTables.h" #include "CPUDetect.h" #include "x64Emitter.h" -#include "ABI.h" +#include "x64ABI.h" #include "JitIL.h" #include "JitILAsm.h" diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_SystemRegisters.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_SystemRegisters.cpp index f3e25dc2dd..70a6ea4843 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_SystemRegisters.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_SystemRegisters.cpp @@ -23,7 +23,7 @@ #include "../PowerPC.h" #include "../PPCTables.h" #include "x64Emitter.h" -#include "ABI.h" +#include "x64ABI.h" #include "Thunk.h" #include "JitIL.h" diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/Jit.cpp b/Source/Core/Core/Src/PowerPC/JitArm32/Jit.cpp new file mode 100644 index 0000000000..e4b0ffd714 --- /dev/null +++ b/Source/Core/Core/Src/PowerPC/JitArm32/Jit.cpp @@ -0,0 +1,498 @@ +// Copyright (C) 2003 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ + +#include + +#include "Common.h" +#include "../../HLE/HLE.h" +#include "../../Core.h" +#include "../../PatchEngine.h" +#include "../../CoreTiming.h" +#include "../../ConfigManager.h" +#include "../PowerPC.h" +#include "../Profiler.h" +#include "../PPCTables.h" +#include "../PPCAnalyst.h" +#include "../../HW/Memmap.h" +#include "../../HW/GPFifo.h" +#include "Jit.h" +#include "JitArm_Tables.h" +#include "ArmEmitter.h" +#include "../JitInterface.h" + +using namespace ArmGen; +using namespace PowerPC; + +static int CODE_SIZE = 1024*1024*32; +namespace CPUCompare +{ + extern u32 m_BlockStart; +} + +void JitArm::Init() +{ + AllocCodeSpace(CODE_SIZE); + blocks.Init(); + asm_routines.Init(); + // TODO: Investigate why the register cache crashes when only doing Init with + // the pointer to this. Seems for some reason it doesn't set the emitter pointer + // In the class for some reason? + gpr.Init(this); + gpr.SetEmitter(this); + fpr.Init(this); + fpr.SetEmitter(this); + jo.enableBlocklink = true; + jo.optimizeGatherPipe = false; +} + +void JitArm::ClearCache() +{ + ClearCodeSpace(); + blocks.Clear(); +} + +void JitArm::Shutdown() +{ + FreeCodeSpace(); + blocks.Shutdown(); + asm_routines.Shutdown(); +} + +// This is only called by Default() in this file. It will execute an instruction with the interpreter functions. +void JitArm::WriteCallInterpreter(UGeckoInstruction inst) +{ + gpr.Flush(); + fpr.Flush(); + Interpreter::_interpreterInstruction instr = GetInterpreterOp(inst); + MOVI2R(R0, inst.hex); + MOVI2R(R12, (u32)instr); + BL(R12); +} +void JitArm::unknown_instruction(UGeckoInstruction inst) +{ + PanicAlert("unknown_instruction %08x - Fix me ;)", inst.hex); +} + +void JitArm::Default(UGeckoInstruction _inst) +{ + WriteCallInterpreter(_inst.hex); +} + +void JitArm::HLEFunction(UGeckoInstruction _inst) +{ + gpr.Flush(); + fpr.Flush(); + MOVI2R(R0, js.compilerPC); + MOVI2R(R1, _inst.hex); + QuickCallFunction(R14, (void*)&HLE::Execute); + ARMReg rA = gpr.GetReg(); + LDR(rA, R9, STRUCT_OFF(PowerPC::ppcState, npc)); + WriteExitDestInR(rA); +} + +void JitArm::DoNothing(UGeckoInstruction _inst) +{ + // Yup, just don't do anything. +} + +static const bool ImHereDebug = false; +static const bool ImHereLog = false; +static std::map been_here; + +static void ImHere() +{ + static File::IOFile f; + if (ImHereLog) + { + if (!f) + { + f.Open("log32.txt", "w"); + } + fprintf(f.GetHandle(), "%08x\n", PC); + } + if (been_here.find(PC) != been_here.end()) + { + been_here.find(PC)->second++; + if ((been_here.find(PC)->second) & 1023) + return; + } + DEBUG_LOG(DYNA_REC, "I'm here - PC = %08x , LR = %08x", PC, LR); + been_here[PC] = 1; +} + +void JitArm::Cleanup() +{ + if (jo.optimizeGatherPipe && js.fifoBytesThisBlock > 0) + QuickCallFunction(R14, (void*)&GPFifo::CheckGatherPipe); +} +void JitArm::DoDownCount() +{ + ARMReg rA = gpr.GetReg(); + ARMReg rB = gpr.GetReg(); + MOVI2R(rA, (u32)&CoreTiming::downcount); + LDR(rB, rA); + if(js.downcountAmount < 255) // We can enlarge this if we used rotations + { + SUBS(rB, rB, js.downcountAmount); + STR(rA, rB); + } + else + { + ARMReg rC = gpr.GetReg(false); + MOVI2R(rC, js.downcountAmount); + SUBS(rB, rB, rC); + STR(rA, rB); + } + gpr.Unlock(rA, rB); +} +void JitArm::WriteExitDestInR(ARMReg Reg) +{ + STR(R9, Reg, STRUCT_OFF(PowerPC::ppcState, pc)); + Cleanup(); + DoDownCount(); + MOVI2R(Reg, (u32)asm_routines.dispatcher); + B(Reg); + gpr.Unlock(Reg); +} +void JitArm::WriteRfiExitDestInR(ARMReg Reg) +{ + STR(R9, Reg, STRUCT_OFF(PowerPC::ppcState, pc)); + Cleanup(); + DoDownCount(); + + MOVI2R(Reg, (u32)asm_routines.testExceptions); + B(Reg); + gpr.Unlock(Reg); // This was locked in the instruction beforehand +} +void JitArm::WriteExceptionExit() +{ + ARMReg A = gpr.GetReg(false); + Cleanup(); + DoDownCount(); + + MOVI2R(A, (u32)asm_routines.testExceptions); + B(A); +} +void JitArm::WriteExit(u32 destination, int exit_num) +{ + Cleanup(); + + DoDownCount(); + //If nobody has taken care of this yet (this can be removed when all branches are done) + JitBlock *b = js.curBlock; + b->exitAddress[exit_num] = destination; + b->exitPtrs[exit_num] = GetWritableCodePtr(); + + // Link opportunity! + int block = blocks.GetBlockNumberFromStartAddress(destination); + if (block >= 0 && jo.enableBlocklink) + { + // It exists! Joy of joy! + B(blocks.GetBlock(block)->checkedEntry); + b->linkStatus[exit_num] = true; + } + else + { + ARMReg A = gpr.GetReg(false); + MOVI2R(A, destination); + STR(R9, A, STRUCT_OFF(PowerPC::ppcState, pc)); + MOVI2R(A, (u32)asm_routines.dispatcher); + B(A); + } +} + +void STACKALIGN JitArm::Run() +{ + CompiledCode pExecAddr = (CompiledCode)asm_routines.enterCode; + pExecAddr(); +} + +void JitArm::SingleStep() +{ + CompiledCode pExecAddr = (CompiledCode)asm_routines.enterCode; + pExecAddr(); +} + +void JitArm::Trace() +{ + char regs[500] = ""; + char fregs[750] = ""; + +#ifdef JIT_LOG_GPR + for (int i = 0; i < 32; i++) + { + char reg[50]; + sprintf(reg, "r%02d: %08x ", i, PowerPC::ppcState.gpr[i]); + strncat(regs, reg, 500); + } +#endif + +#ifdef JIT_LOG_FPR + for (int i = 0; i < 32; i++) + { + char reg[50]; + sprintf(reg, "f%02d: %016x ", i, riPS0(i)); + strncat(fregs, reg, 750); + } +#endif + + DEBUG_LOG(DYNA_REC, "JITARM PC: %08x SRR0: %08x SRR1: %08x CRfast: %02x%02x%02x%02x%02x%02x%02x%02x FPSCR: %08x MSR: %08x LR: %08x %s %s", + PC, SRR0, SRR1, PowerPC::ppcState.cr_fast[0], PowerPC::ppcState.cr_fast[1], PowerPC::ppcState.cr_fast[2], PowerPC::ppcState.cr_fast[3], + PowerPC::ppcState.cr_fast[4], PowerPC::ppcState.cr_fast[5], PowerPC::ppcState.cr_fast[6], PowerPC::ppcState.cr_fast[7], PowerPC::ppcState.fpscr, + PowerPC::ppcState.msr, PowerPC::ppcState.spr[8], regs, fregs); +} + +void JitArm::PrintDebug(UGeckoInstruction inst, u32 level) +{ + if (level > 0) + printf("Start: %08x OP '%s' Info\n", (u32)GetCodePtr(), PPCTables::GetInstructionName(inst)); + if (level > 1) + { + GekkoOPInfo* Info = GetOpInfo(inst.hex); + printf("\tOuts\n"); + if (Info->flags & FL_OUT_A) + printf("\t-OUT_A: %x\n", inst.RA); + if(Info->flags & FL_OUT_D) + printf("\t-OUT_D: %x\n", inst.RD); + printf("\tIns\n"); + // A, AO, B, C, S + if(Info->flags & FL_IN_A) + printf("\t-IN_A: %x\n", inst.RA); + if(Info->flags & FL_IN_A0) + printf("\t-IN_A0: %x\n", inst.RA); + if(Info->flags & FL_IN_B) + printf("\t-IN_B: %x\n", inst.RB); + if(Info->flags & FL_IN_C) + printf("\t-IN_C: %x\n", inst.RC); + if(Info->flags & FL_IN_S) + printf("\t-IN_S: %x\n", inst.RS); + } +} + +void STACKALIGN JitArm::Jit(u32 em_address) +{ + if (GetSpaceLeft() < 0x10000 || blocks.IsFull() || Core::g_CoreStartupParameter.bJITNoBlockCache) + { + ClearCache(); + } + + int block_num = blocks.AllocateBlock(em_address); + JitBlock *b = blocks.GetBlock(block_num); + const u8* BlockPtr = DoJit(em_address, &code_buffer, b); + blocks.FinalizeBlock(block_num, jo.enableBlocklink, BlockPtr); +} +void JitArm::Break(UGeckoInstruction inst) +{ + BKPT(0x4444); +} + +const u8* JitArm::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlock *b) +{ + int blockSize = code_buf->GetSize(); + + // Memory exception on instruction fetch + bool memory_exception = false; + + // A broken block is a block that does not end in a branch + bool broken_block = false; + + if (Core::g_CoreStartupParameter.bEnableDebugging) + { + // Comment out the following to disable breakpoints (speed-up) + blockSize = 1; + broken_block = true; + Trace(); + } + + if (em_address == 0) + { + Core::SetState(Core::CORE_PAUSE); + PanicAlert("ERROR: Compiling at 0. LR=%08x CTR=%08x", LR, CTR); + } + + if (Core::g_CoreStartupParameter.bMMU && (em_address & JIT_ICACHE_VMEM_BIT)) + { + if (!Memory::TranslateAddress(em_address, Memory::FLAG_OPCODE)) + { + // Memory exception occurred during instruction fetch + memory_exception = true; + } + } + + + int size = 0; + js.isLastInstruction = false; + js.blockStart = em_address; + js.fifoBytesThisBlock = 0; + js.curBlock = b; + js.block_flags = 0; + js.cancel = false; + + // Analyze the block, collect all instructions it is made of (including inlining, + // if that is enabled), reorder instructions for optimal performance, and join joinable instructions. + u32 nextPC = em_address; + u32 merged_addresses[32]; + const int capacity_of_merged_addresses = sizeof(merged_addresses) / sizeof(merged_addresses[0]); + int size_of_merged_addresses = 0; + if (!memory_exception) + { + // If there is a memory exception inside a block (broken_block==true), compile up to that instruction. + nextPC = PPCAnalyst::Flatten(em_address, &size, &js.st, &js.gpa, &js.fpa, broken_block, code_buf, blockSize, merged_addresses, capacity_of_merged_addresses, size_of_merged_addresses); + } + PPCAnalyst::CodeOp *ops = code_buf->codebuffer; + + const u8 *start = GetCodePtr(); + b->checkedEntry = start; + b->runCount = 0; + + // Downcount flag check, Only valid for linked blocks + { + FixupBranch skip = B_CC(CC_PL); + ARMABI_MOVI2M((u32)&PC, js.blockStart); + ARMReg rA = gpr.GetReg(false); + MOVI2R(rA, (u32)asm_routines.doTiming); + B(rA); + SetJumpTarget(skip); + } + + const u8 *normalEntry = GetCodePtr(); + b->normalEntry = normalEntry; + + if(ImHereDebug) + QuickCallFunction(R14, (void *)&ImHere); //Used to get a trace of the last few blocks before a crash, sometimes VERY useful + + if (js.fpa.any) + { + // This block uses FPU - needs to add FP exception bailout + ARMReg A = gpr.GetReg(); + ARMReg C = gpr.GetReg(); + Operand2 Shift(2, 10); // 1 << 13 + MOVI2R(C, js.blockStart); // R3 + LDR(A, R9, STRUCT_OFF(PowerPC::ppcState, msr)); + TST(A, Shift); + FixupBranch b1 = B_CC(CC_NEQ); + STR(R9, C, STRUCT_OFF(PowerPC::ppcState, pc)); + MOVI2R(A, (u32)asm_routines.fpException); + B(A); + SetJumpTarget(b1); + gpr.Unlock(A, C); + } + // Conditionally add profiling code. + if (Profiler::g_ProfileBlocks) { + ARMReg rA = gpr.GetReg(); + ARMReg rB = gpr.GetReg(); + MOVI2R(rA, (u32)&b->runCount); // Load in to register + LDR(rB, rA); // Load the actual value in to R11. + ADD(rB, rB, 1); // Add one to the value + STR(rA, rB); // Now store it back in the memory location + // get start tic + PROFILER_QUERY_PERFORMANCE_COUNTER(&b->ticStart); + gpr.Unlock(rA, rB); + } + gpr.Start(js.gpa); + fpr.Start(js.fpa); + js.downcountAmount = 0; + if (!Core::g_CoreStartupParameter.bEnableDebugging) + { + for (int i = 0; i < size_of_merged_addresses; ++i) + { + const u32 address = merged_addresses[i]; + js.downcountAmount += PatchEngine::GetSpeedhackCycles(address); + } + } + + js.skipnext = false; + js.blockSize = size; + js.compilerPC = nextPC; + // Translate instructions + for (int i = 0; i < (int)size; i++) + { + js.compilerPC = ops[i].address; + js.op = &ops[i]; + js.instructionNumber = i; + const GekkoOPInfo *opinfo = ops[i].opinfo; + js.downcountAmount += (opinfo->numCyclesMinusOne + 1); + + if (i == (int)size - 1) + { + // WARNING - cmp->branch merging will screw this up. + js.isLastInstruction = true; + js.next_inst = 0; + if (Profiler::g_ProfileBlocks) { + // CAUTION!!! push on stack regs you use, do your stuff, then pop + PROFILER_VPUSH; + // get end tic + PROFILER_QUERY_PERFORMANCE_COUNTER(&b->ticStop); + // tic counter += (end tic - start tic) + PROFILER_ADD_DIFF_LARGE_INTEGER(&b->ticCounter, &b->ticStop, &b->ticStart); + PROFILER_VPOP; + } + } + else + { + // help peephole optimizations + js.next_inst = ops[i + 1].inst; + js.next_compilerPC = ops[i + 1].address; + } + if (jo.optimizeGatherPipe && js.fifoBytesThisBlock >= 32) + { + js.fifoBytesThisBlock -= 32; + // TODO: This needs thunkmanager for ARM + //ARMABI_CallFunction(thunks.ProtectFunction((void *)&GPFifo::CheckGatherPipe, 0)); + } + if (Core::g_CoreStartupParameter.bEnableDebugging) + { + // Add run count + ARMReg RA = gpr.GetReg(); + ARMReg RB = gpr.GetReg(); + MOVI2R(RA, (u32)&opinfo->runCount); + LDR(RB, RA); + ADD(RB, RB, 1); + STR(RA, RB); + gpr.Unlock(RA, RB); + } + if (!ops[i].skip) + { + PrintDebug(ops[i].inst, 0); + if (js.memcheck && (opinfo->flags & FL_USE_FPU)) + { + // Don't do this yet + BKPT(0x7777); + } + JitArmTables::CompileInstruction(ops[i]); + if (js.memcheck && (opinfo->flags & FL_LOADSTORE)) + { + // Don't do this yet + BKPT(0x666); + } + } + } + if (memory_exception) + BKPT(0x500); + if (broken_block) + { + printf("Broken Block going to 0x%08x\n", nextPC); + WriteExit(nextPC, 0); + } + + b->flags = js.block_flags; + b->codeSize = (u32)(GetCodePtr() - normalEntry); + b->originalSize = size; + FlushIcache(); + return start; +} + diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/Jit.h b/Source/Core/Core/Src/PowerPC/JitArm32/Jit.h new file mode 100644 index 0000000000..c736ae77a1 --- /dev/null +++ b/Source/Core/Core/Src/PowerPC/JitArm32/Jit.h @@ -0,0 +1,186 @@ +// Copyright (C) 2003 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ + +// ======================== +// See comments in Jit.cpp. +// ======================== + +// Mystery: Capcom vs SNK 800aa278 + +// CR flags approach: +// * Store that "N+Z flag contains CR0" or "S+Z flag contains CR3". +// * All flag altering instructions flush this +// * A flush simply does a conditional write to the appropriate CRx. +// * If flag available, branch code can become absolutely trivial. + +// Settings +// ---------- +#ifndef _JITARM_H +#define _JITARM_H +#include "../CPUCoreBase.h" +#include "../PPCAnalyst.h" +#include "JitArmCache.h" +#include "JitRegCache.h" +#include "JitFPRCache.h" +#include "JitAsm.h" +#include "../JitCommon/JitBase.h" + +// Use these to control the instruction selection +// #define INSTRUCTION_START Default(inst); return; +// #define INSTRUCTION_START PPCTables::CountInstruction(inst); +#define INSTRUCTION_START +#define JITDISABLE(type) \ + if (Core::g_CoreStartupParameter.bJITOff || \ + Core::g_CoreStartupParameter.bJIT##type##Off) \ + {Default(inst); return;} + +class JitArm : public JitBase, public ArmGen::ARMXCodeBlock +{ +private: + JitArmBlockCache blocks; + + JitArmAsmRoutineManager asm_routines; + + // TODO: Make arm specific versions of these, shouldn't be too hard to + // make it so we allocate some space at the start(?) of code generation + // and keep the registers in a cache. Will burn this bridge when we get to + // it. + ArmRegCache gpr; + ArmFPRCache fpr; + + PPCAnalyst::CodeBuffer code_buffer; + void DoDownCount(); + + void PrintDebug(UGeckoInstruction inst, u32 level); + + void Helper_UpdateCR1(ARMReg value); +public: + JitArm() : code_buffer(32000) {} + ~JitArm() {} + + void Init(); + void Shutdown(); + + // Jit! + + void Jit(u32 em_address); + const u8* DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlock *b); + + JitBaseBlockCache *GetBlockCache() { return &blocks; } + + const u8 *BackPatch(u8 *codePtr, int accessType, u32 em_address, void *ctx); + + bool IsInCodeSpace(u8 *ptr) { return IsInSpace(ptr); } + + void Trace(); + + void ClearCache(); + + const u8 *GetDispatcher() { + return asm_routines.dispatcher; + } + CommonAsmRoutinesBase *GetAsmRoutines() { + return &asm_routines; + } + + const char *GetName() { + return "JITARM"; + } + // Run! + + void Run(); + void SingleStep(); + + // Utilities for use by opcodes + + void WriteExit(u32 destination, int exit_num); + void WriteExitDestInR(ARMReg Reg); + void WriteRfiExitDestInR(ARMReg Reg); + void WriteExceptionExit(); + void WriteCallInterpreter(UGeckoInstruction _inst); + void Cleanup(); + + void GenerateRC(int cr = 0); + void ComputeRC(int cr = 0); + + // TODO: This shouldn't be here + void StoreFromReg(ARMReg dest, ARMReg value, int accessSize, s32 offset); + void LoadToReg(ARMReg dest, ARMReg addr, int accessSize, s32 offset); + + // OPCODES + void unknown_instruction(UGeckoInstruction _inst); + void Default(UGeckoInstruction _inst); + void DoNothing(UGeckoInstruction _inst); + void HLEFunction(UGeckoInstruction _inst); + + void DynaRunTable4(UGeckoInstruction _inst); + void DynaRunTable19(UGeckoInstruction _inst); + void DynaRunTable31(UGeckoInstruction _inst); + void DynaRunTable59(UGeckoInstruction _inst); + void DynaRunTable63(UGeckoInstruction _inst); + + // Breakin shit + void Break(UGeckoInstruction _inst); + // Branch + void bx(UGeckoInstruction _inst); + void bcx(UGeckoInstruction _inst); + void bclrx(UGeckoInstruction _inst); + void sc(UGeckoInstruction _inst); + void rfi(UGeckoInstruction _inst); + void bcctrx(UGeckoInstruction _inst); + + // Integer + void addi(UGeckoInstruction _inst); + void addis(UGeckoInstruction _inst); + void addx(UGeckoInstruction _inst); + void cmp (UGeckoInstruction _inst); + void cmpi(UGeckoInstruction _inst); + void cmpli(UGeckoInstruction _inst); + void negx(UGeckoInstruction _inst); + void mulli(UGeckoInstruction _inst); + void ori(UGeckoInstruction _inst); + void oris(UGeckoInstruction _inst); + void orx(UGeckoInstruction _inst); + void rlwimix(UGeckoInstruction _inst); + void rlwinmx(UGeckoInstruction _inst); + void extshx(UGeckoInstruction inst); + void extsbx(UGeckoInstruction inst); + + // System Registers + void mtmsr(UGeckoInstruction _inst); + void mtspr(UGeckoInstruction _inst); + void mfspr(UGeckoInstruction _inst); + + // LoadStore + void icbi(UGeckoInstruction _inst); + void lbz(UGeckoInstruction _inst); + void lhz(UGeckoInstruction _inst); + void lwz(UGeckoInstruction _inst); + void lwzx(UGeckoInstruction _inst); + void stw(UGeckoInstruction _inst); + void stwu(UGeckoInstruction _inst); + + // Floating point + void fabsx(UGeckoInstruction _inst); + void faddx(UGeckoInstruction _inst); + void fmrx(UGeckoInstruction _inst); + + // Floating point loadStore + void lfs(UGeckoInstruction _inst); +}; + +#endif // _JIT64_H diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/JitArmCache.cpp b/Source/Core/Core/Src/PowerPC/JitArm32/JitArmCache.cpp new file mode 100644 index 0000000000..296ca736a9 --- /dev/null +++ b/Source/Core/Core/Src/PowerPC/JitArm32/JitArmCache.cpp @@ -0,0 +1,46 @@ +// Copyright (C) 2003 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ + +// Enable define below to enable oprofile integration. For this to work, +// it requires at least oprofile version 0.9.4, and changing the build +// system to link the Dolphin executable against libopagent. Since the +// dependency is a little inconvenient and this is possibly a slight +// performance hit, it's not enabled by default, but it's useful for +// locating performance issues. + +#include "../JitInterface.h" +#include "JitArmCache.h" + + +using namespace ArmGen; + + void JitArmBlockCache::WriteLinkBlock(u8* location, const u8* address) + { + ARMXEmitter emit(location); + emit.B(address); + } + void JitArmBlockCache::WriteDestroyBlock(const u8* location, u32 address) + { + ARMXEmitter emit((u8 *)location); + emit.MOVI2R(R10, (u32)&PC); + emit.MOVI2R(R11, address); + emit.MOVI2R(R12, (u32)jit->GetAsmRoutines()->dispatcher); + emit.STR(R10, R11); + emit.B(R12); + } + + diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/JitArmCache.h b/Source/Core/Core/Src/PowerPC/JitArm32/JitArmCache.h new file mode 100644 index 0000000000..103947d5ad --- /dev/null +++ b/Source/Core/Core/Src/PowerPC/JitArm32/JitArmCache.h @@ -0,0 +1,33 @@ +// Copyright (C) 2003 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ + +#ifndef _JITARMCACHE_H +#define _JITARMCACHE_H + +#include "../JitCommon/JitCache.h" + + +typedef void (*CompiledCode)(); + +class JitArmBlockCache : public JitBaseBlockCache +{ +private: + void WriteLinkBlock(u8* location, const u8* address); + void WriteDestroyBlock(const u8* location, u32 address); +}; + +#endif diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_BackPatch.cpp b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_BackPatch.cpp new file mode 100644 index 0000000000..677dc238a8 --- /dev/null +++ b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_BackPatch.cpp @@ -0,0 +1,164 @@ +// Copyright (C) 2003 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ + +#include + +#include "Common.h" + +#include "../../HW/Memmap.h" +#include "Jit.h" +#include "../JitCommon/JitBackpatch.h" +#include "StringUtil.h" + +#ifdef _M_X64 +static void BackPatchError(const std::string &text, u8 *codePtr, u32 emAddress) { + u64 code_addr = (u64)codePtr; + disassembler disasm; + char disbuf[256]; + memset(disbuf, 0, 256); +#ifdef _M_IX86 + disasm.disasm32(0, code_addr, codePtr, disbuf); +#else + disasm.disasm64(0, code_addr, codePtr, disbuf); +#endif + PanicAlert("%s\n\n" + "Error encountered accessing emulated address %08x.\n" + "Culprit instruction: \n%s\nat %#llx", + text.c_str(), emAddress, disbuf, code_addr); + return; +} +#endif + +// This generates some fairly heavy trampolines, but: +// 1) It's really necessary. We don't know anything about the context. +// 2) It doesn't really hurt. Only instructions that access I/O will get these, and there won't be +// that many of them in a typical program/game. +bool DisamLoadStore(const u32 inst, ARMReg &rD, u8 &accessSize, bool &Store) +{ + u8 op = (inst >> 20) & 0xFF; + printf("op: 0x%08x\n", op); + switch (op) + { + case 0x58: // STR + { + rD = (ARMReg)((inst >> 16) & 0xF); + Store = true; + accessSize = 32; + } + break; + case 0x59: // LDR + { + rD = (ARMReg)((inst >> 16) & 0xF); + Store = false; + accessSize = 32; + } + break; + case 0x05: // LDRH + { + rD = (ARMReg)((inst >> 16) & 0xF); + Store = false; + accessSize = 16; + } + break; + case 0x45 + 0x18: // LDRB + { + rD = (ARMReg)((inst >> 16) & 0xF); + Store = false; + accessSize = 8; + } + break; + case 0x44 + 0x18: // STRB + default: + return false; + } + return true; +} +const u8 *JitArm::BackPatch(u8 *codePtr, int accessType, u32 emAddress, void *ctx_void) +{ + // TODO: This ctx needs to be filled with our information + CONTEXT *ctx = (CONTEXT *)ctx_void; + + // We need to get the destination register before we start + u32 Value = *(u32*)codePtr; + ARMReg rD; + u8 accessSize; + bool Store; + + if (!DisamLoadStore(Value, rD, accessSize, Store)) + { + printf("Invalid backpatch at location 0x%08x(0x%08x)\n", ctx->reg_pc, Value); + exit(0); + } + + if (Store) + { + const u32 ARMREGOFFSET = 4 * 7; + ARMXEmitter emitter(codePtr - ARMREGOFFSET); + switch (accessSize) + { + case 8: // 8bit + //emitter.MOVI2R(R14, (u32)&Memory::Write_U8, false); // 1-2 + return 0; + break; + case 16: // 16bit + //emitter.MOVI2R(R14, (u32)&Memory::Write_U16, false); // 1-2 + return 0; + break; + case 32: // 32bit + emitter.MOVI2R(R14, (u32)&Memory::Write_U32, false); // 1-2 + break; + } + emitter.PUSH(4, R0, R1, R2, R3); // 3 + emitter.MOV(R0, rD); // Value - 4 + emitter.MOV(R1, R10); // Addr- 5 + emitter.BL(R14); // 6 + emitter.POP(4, R0, R1, R2, R3); // 7 + emitter.NOP(1); // 8 + u32 newPC = ctx->reg_pc - (ARMREGOFFSET + 4 * 4); + ctx->reg_pc = newPC; + emitter.FlushIcache(); + return codePtr; + } + else + { + const u32 ARMREGOFFSET = 4 * 6; + ARMXEmitter emitter(codePtr - ARMREGOFFSET); + switch (accessSize) + { + case 8: // 8bit + emitter.MOVI2R(R14, (u32)&Memory::Read_U8, false); // 2 + break; + case 16: // 16bit + emitter.MOVI2R(R14, (u32)&Memory::Read_U16, false); // 2 + break; + case 32: // 32bit + emitter.MOVI2R(R14, (u32)&Memory::Read_U32, false); // 2 + break; + } + emitter.PUSH(4, R0, R1, R2, R3); // 3 + emitter.MOV(R0, R10); // 4 + emitter.BL(R14); // 5 + emitter.MOV(R14, R0); // 6 + emitter.POP(4, R0, R1, R2, R3); // 7 + emitter.MOV(rD, R14); // 8 + ctx->reg_pc -= ARMREGOFFSET + (4 * 4); + emitter.FlushIcache(); + return codePtr; + } + return 0; +} + diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Branch.cpp b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Branch.cpp new file mode 100644 index 0000000000..27d5c23141 --- /dev/null +++ b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Branch.cpp @@ -0,0 +1,347 @@ +// Copyright (C) 2003 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ +#include "Common.h" +#include "Thunk.h" + +#include "../../Core.h" +#include "../PowerPC.h" +#include "../../CoreTiming.h" +#include "../PPCTables.h" +#include "ArmEmitter.h" + +#include "Jit.h" +#include "JitRegCache.h" +#include "JitAsm.h" + +// The branches are known good, or at least reasonably good. +// No need for a disable-mechanism. + +// If defined, clears CR0 at blr and bl-s. If the assumption that +// flags never carry over between functions holds, then the task for +// an optimizer becomes much easier. + +// #define ACID_TEST + +// Zelda and many more games seem to pass the Acid Test. + + +using namespace ArmGen; +void JitArm::sc(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(Branch) + + gpr.Flush(); + fpr.Flush(); + + ARMABI_MOVI2M((u32)&PC, js.compilerPC + 4); // Destroys R12 and R14 + ARMReg rA = gpr.GetReg(); + LDR(rA, R9, STRUCT_OFF(PowerPC::ppcState, Exceptions)); + ORR(rA, rA, EXCEPTION_SYSCALL); + STR(R9, rA, STRUCT_OFF(PowerPC::ppcState, Exceptions)); + gpr.Unlock(rA); + + WriteExceptionExit(); +} + +void JitArm::rfi(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(Branch) + + gpr.Flush(); + fpr.Flush(); + + // See Interpreter rfi for details + const u32 mask = 0x87C0FFFF; + const u32 clearMSR13 = 0xFFFBFFFF; // Mask used to clear the bit MSR[13] + // MSR = ((MSR & ~mask) | (SRR1 & mask)) & clearMSR13; + // R0 = MSR location + // R1 = MSR contents + // R2 = Mask + // R3 = Mask + ARMReg rA = gpr.GetReg(); + ARMReg rB = gpr.GetReg(); + ARMReg rC = gpr.GetReg(); + ARMReg rD = gpr.GetReg(); + MOVI2R(rA, (u32)&MSR); + MOVI2R(rB, (~mask) & clearMSR13); + MOVI2R(rC, mask & clearMSR13); + + LDR(rD, rA); + + AND(rD, rD, rB); // rD = Masked MSR + STR(rA, rD); + + MOVI2R(rB, (u32)&SRR1); + LDR(rB, rB); // rB contains SRR1 here + + AND(rB, rB, rC); // rB contains masked SRR1 here + ORR(rB, rD, rB); // rB = Masked MSR OR masked SRR1 + + STR(rA, rB); // STR rB in to rA + + MOVI2R(rA, (u32)&SRR0); + LDR(rA, rA); + + gpr.Unlock(rB, rC, rD); + WriteRfiExitDestInR(rA); // rA gets unlocked here + //AND(32, M(&MSR), Imm32((~mask) & clearMSR13)); + //MOV(32, R(EAX), M(&SRR1)); + //AND(32, R(EAX), Imm32(mask & clearMSR13)); + //OR(32, M(&MSR), R(EAX)); + // NPC = SRR0; + //MOV(32, R(EAX), M(&SRR0)); + //WriteRfiExitDestInEAX(); +} + +void JitArm::bx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(Branch) + // We must always process the following sentence + // even if the blocks are merged by PPCAnalyst::Flatten(). + if (inst.LK) + ARMABI_MOVI2M((u32)&LR, js.compilerPC + 4); + + // If this is not the last instruction of a block, + // we will skip the rest process. + // Because PPCAnalyst::Flatten() merged the blocks. + if (!js.isLastInstruction) { + return; + } + + gpr.Flush(); + fpr.Flush(); + + u32 destination; + if (inst.AA) + destination = SignExt26(inst.LI << 2); + else + destination = js.compilerPC + SignExt26(inst.LI << 2); + #ifdef ACID_TEST + // TODO: Not implemented yet. + //if (inst.LK) + //AND(32, M(&PowerPC::ppcState.cr), Imm32(~(0xFF000000))); + #endif + if (destination == js.compilerPC) + { + //PanicAlert("Idle loop detected at %08x", destination); + // CALL(ProtectFunction(&CoreTiming::Idle, 0)); + // JMP(Asm::testExceptions, true); + // make idle loops go faster + js.downcountAmount += 8; + } + WriteExit(destination, 0); +} + +void JitArm::bcx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(Branch) + // USES_CR + _assert_msg_(DYNA_REC, js.isLastInstruction, "bcx not last instruction of block"); + + gpr.Flush(); + fpr.Flush(); + + ARMReg rA = gpr.GetReg(); + ARMReg rB = gpr.GetReg(); + FixupBranch pCTRDontBranch; + if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) // Decrement and test CTR + { + MOVI2R(rA, (u32)&CTR); + LDR(rB, rA); + SUBS(rB, rB, 1); + STR(rA, rB); + + //SUB(32, M(&CTR), Imm8(1)); + if (inst.BO & BO_BRANCH_IF_CTR_0) + pCTRDontBranch = B_CC(CC_NEQ); + else + pCTRDontBranch = B_CC(CC_EQ); + } + + FixupBranch pConditionDontBranch; + if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0) // Test a CR bit + { + LDRB(rA, R9, STRUCT_OFF(PowerPC::ppcState, cr_fast) + (inst.BI >> 2)); + TST(rA, 8 >> (inst.BI & 3)); + + //TEST(8, M(&PowerPC::ppcState.cr_fast[inst.BI >> 2]), Imm8(8 >> (inst.BI & 3))); + if (inst.BO & BO_BRANCH_IF_TRUE) // Conditional branch + pConditionDontBranch = B_CC(CC_EQ); // Zero + else + pConditionDontBranch = B_CC(CC_NEQ); // Not Zero + } + gpr.Unlock(rA, rB); + if (inst.LK) + ARMABI_MOVI2M((u32)&LR, js.compilerPC + 4); // Careful, destroys R14, R12 + + u32 destination; + if(inst.AA) + destination = SignExt16(inst.BD << 2); + else + destination = js.compilerPC + SignExt16(inst.BD << 2); + WriteExit(destination, 0); + + if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0) + SetJumpTarget( pConditionDontBranch ); + if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) + SetJumpTarget( pCTRDontBranch ); + + WriteExit(js.compilerPC + 4, 1); +} +void JitArm::bcctrx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(Branch) + + gpr.Flush(); + fpr.Flush(); + + // bcctrx doesn't decrement and/or test CTR + _dbg_assert_msg_(POWERPC, inst.BO_2 & BO_DONT_DECREMENT_FLAG, "bcctrx with decrement and test CTR option is invalid!"); + + if (inst.BO_2 & BO_DONT_CHECK_CONDITION) + { + // BO_2 == 1z1zz -> b always + + //NPC = CTR & 0xfffffffc; + if(inst.LK_3) + ARMABI_MOVI2M((u32)&LR, js.compilerPC + 4); + ARMReg rA = gpr.GetReg(); + ARMReg rB = gpr.GetReg(); + MOVI2R(rA, (u32)&CTR); + MVN(rB, 0x3); // 0xFFFFFFFC + LDR(rA, rA); + AND(rA, rA, rB); + gpr.Unlock(rB); + WriteExitDestInR(rA); + } + else + { + // Rare condition seen in (just some versions of?) Nintendo's NES Emulator + + // BO_2 == 001zy -> b if false + // BO_2 == 011zy -> b if true + ARMReg rA = gpr.GetReg(); + ARMReg rB = gpr.GetReg(); + + LDRB(rA, R9, STRUCT_OFF(PowerPC::ppcState, cr_fast) + (inst.BI >> 2)); + TST(rA, 8 >> (inst.BI & 3)); + CCFlags branch; + if (inst.BO_2 & BO_BRANCH_IF_TRUE) + branch = CC_EQ; + else + branch = CC_NEQ; + FixupBranch b = B_CC(branch); + + MOVI2R(rA, (u32)&CTR); + LDR(rA, rA); + MVN(rB, 0x3); // 0xFFFFFFFC + AND(rA, rA, rB); + + if (inst.LK_3){ + ARMReg rC = gpr.GetReg(false); + u32 Jumpto = js.compilerPC + 4; + MOVI2R(rB, (u32)&LR); + MOVI2R(rC, Jumpto); + STR(rB, rC); + //ARMABI_MOVI2M((u32)&LR, js.compilerPC + 4); + } + gpr.Unlock(rB); // rA gets unlocked in WriteExitDestInR + WriteExitDestInR(rA); + + SetJumpTarget(b); + WriteExit(js.compilerPC + 4, 1); + } +} +void JitArm::bclrx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(Branch) + if (!js.isLastInstruction && + (inst.BO & (1 << 4)) && (inst.BO & (1 << 2))) { + if (inst.LK) + { + ARMABI_MOVI2M((u32)&LR, js.compilerPC + 4); + } + return; + } + gpr.Flush(); + fpr.Flush(); + + ARMReg rA = gpr.GetReg(); + ARMReg rB = gpr.GetReg(); + FixupBranch pCTRDontBranch; + if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) // Decrement and test CTR + { + MOVI2R(rA, (u32)&CTR); + LDR(rB, rA); + SUBS(rB, rB, 1); + STR(rA, rB); + + //SUB(32, M(&CTR), Imm8(1)); + if (inst.BO & BO_BRANCH_IF_CTR_0) + pCTRDontBranch = B_CC(CC_NEQ); + else + pCTRDontBranch = B_CC(CC_EQ); + } + + FixupBranch pConditionDontBranch; + if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0) // Test a CR bit + { + LDRB(rA, R9, STRUCT_OFF(PowerPC::ppcState, cr_fast) + (inst.BI >> 2)); + TST(rA, 8 >> (inst.BI & 3)); + //TEST(8, M(&PowerPC::ppcState.cr_fast[inst.BI >> 2]), Imm8(8 >> (inst.BI & 3))); + if (inst.BO & BO_BRANCH_IF_TRUE) // Conditional branch + pConditionDontBranch = B_CC(CC_EQ); // Zero + else + pConditionDontBranch = B_CC(CC_NEQ); // Not Zero + } + + // This below line can be used to prove that blr "eats flags" in practice. + // This observation will let us do a lot of fun observations. + #ifdef ACID_TEST + // TODO: Not yet implemented + // AND(32, M(&PowerPC::ppcState.cr), Imm32(~(0xFF000000))); + #endif + + //MOV(32, R(EAX), M(&LR)); + //AND(32, R(EAX), Imm32(0xFFFFFFFC)); + MOVI2R(rA, (u32)&LR); + MVN(rB, 0x3); // 0xFFFFFFFC + LDR(rA, rA); + AND(rA, rA, rB); + if (inst.LK){ + ARMReg rC = gpr.GetReg(false); + u32 Jumpto = js.compilerPC + 4; + MOVI2R(rB, (u32)&LR); + MOVI2R(rC, Jumpto); + STR(rB, rC); + //ARMABI_MOVI2M((u32)&LR, js.compilerPC + 4); + } + gpr.Unlock(rB); // rA gets unlocked in WriteExitDestInR + WriteExitDestInR(rA); + + if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0) + SetJumpTarget( pConditionDontBranch ); + if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) + SetJumpTarget( pCTRDontBranch ); + WriteExit(js.compilerPC + 4, 1); +} diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_FloatingPoint.cpp b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_FloatingPoint.cpp new file mode 100644 index 0000000000..fe4c0e3f22 --- /dev/null +++ b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_FloatingPoint.cpp @@ -0,0 +1,80 @@ +// Copyright (C) 2003 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ + +#include "Common.h" +#include "Thunk.h" + +#include "../../Core.h" +#include "../PowerPC.h" +#include "../../ConfigManager.h" +#include "../../CoreTiming.h" +#include "../PPCTables.h" +#include "ArmEmitter.h" +#include "../../HW/Memmap.h" + + +#include "Jit.h" +#include "JitRegCache.h" +#include "JitFPRCache.h" +#include "JitAsm.h" + +void JitArm::Helper_UpdateCR1(ARMReg value) +{ + // Should just update exception flags, not do any compares. + PanicAlert("CR1"); +} + +void JitArm::fabsx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(FloatingPoint) + + ARMReg vD = fpr.R0(inst.FD); + ARMReg vB = fpr.R0(inst.FB); + + VABS(vD, vB); + + if (inst.Rc) Helper_UpdateCR1(vD); +} + +void JitArm::faddx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(FloatingPoint) + + ARMReg vD = fpr.R0(inst.FD); + ARMReg vA = fpr.R0(inst.FA); + ARMReg vB = fpr.R0(inst.FB); + + VADD(vD, vA, vB); + if (inst.Rc) Helper_UpdateCR1(vD); +} + +void JitArm::fmrx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(FloatingPoint) + Default(inst); return; + + ARMReg vD = fpr.R0(inst.FD); + ARMReg vB = fpr.R0(inst.FB); + + VMOV(vD, vB); + + if (inst.Rc) Helper_UpdateCR1(vD); +} + diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Integer.cpp b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Integer.cpp new file mode 100644 index 0000000000..63c96b21fd --- /dev/null +++ b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Integer.cpp @@ -0,0 +1,297 @@ +// Copyright (C) 2003 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ +#include "Common.h" +#include "Thunk.h" + +#include "../../Core.h" +#include "../PowerPC.h" +#include "../../CoreTiming.h" +#include "../PPCTables.h" +#include "ArmEmitter.h" + +#include "Jit.h" +#include "JitRegCache.h" +#include "JitAsm.h" +extern u32 Helper_Mask(u8 mb, u8 me); +// ADDI and RLWINMX broken for now + +// Assumes that Sign and Zero flags were set by the last operation. Preserves all flags and registers. +// Jit64 ComputerRC is signed +// JIT64 GenerateRC is unsigned +void JitArm::GenerateRC(int cr) { + ARMReg rB = gpr.GetReg(); + + MOV(rB, 0x4); // Result > 0 + SetCC(CC_EQ); MOV(rB, 0x2); // Result == 0 + SetCC(CC_MI); MOV(rB, 0x8); // Result < 0 + SetCC(); + + STRB(R9, rB, STRUCT_OFF(PowerPC::ppcState, cr_fast) + cr); + gpr.Unlock(rB); +} +void JitArm::ComputeRC(int cr) { + ARMReg rB = gpr.GetReg(); + + MOV(rB, 0x2); // Result == 0 + SetCC(CC_LT); MOV(rB, 0x8); // Result < 0 + SetCC(CC_GT); MOV(rB, 0x4); // Result > 0 + SetCC(); + + STRB(R9, rB, STRUCT_OFF(PowerPC::ppcState, cr_fast) + cr); + gpr.Unlock(rB); +} + +void JitArm::addi(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(Integer) + + ARMReg RD = gpr.R(inst.RD); + + if (inst.RA) + { + ARMReg rA = gpr.GetReg(false); + ARMReg RA = gpr.R(inst.RA); + MOVI2R(rA, (u32)inst.SIMM_16); + ADD(RD, RA, rA); + } + else + MOVI2R(RD, inst.SIMM_16); +} +void JitArm::addis(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(Integer) + + ARMReg RD = gpr.R(inst.RD); + if (inst.RA) + { + ARMReg rA = gpr.GetReg(false); + ARMReg RA = gpr.R(inst.RA); + MOVI2R(rA, inst.SIMM_16 << 16); + ADD(RD, RA, rA); + } + else + MOVI2R(RD, inst.SIMM_16 << 16); +} +void JitArm::addx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(Integer) + + ARMReg RA = gpr.R(inst.RA); + ARMReg RB = gpr.R(inst.RB); + ARMReg RD = gpr.R(inst.RD); + ADDS(RD, RA, RB); + if (inst.Rc) ComputeRC(); +} +// Wrong - 28/10/2012 +void JitArm::mulli(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(Integer) + + Default(inst); return; + ARMReg RA = gpr.R(inst.RA); + ARMReg RD = gpr.R(inst.RD); + ARMReg rA = gpr.GetReg(); + MOVI2R(rA, inst.SIMM_16); + MUL(RD, RA, rA); + gpr.Unlock(rA); +} +void JitArm::ori(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(Integer) + + ARMReg RA = gpr.R(inst.RA); + ARMReg RS = gpr.R(inst.RS); + ARMReg rA = gpr.GetReg(); + MOVI2R(rA, inst.UIMM); + ORR(RA, RS, rA); + gpr.Unlock(rA); +} +void JitArm::oris(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(Integer) + + ARMReg RA = gpr.R(inst.RA); + ARMReg RS = gpr.R(inst.RS); + ARMReg rA = gpr.GetReg(); + MOVI2R(rA, inst.UIMM << 16); + ORR(RA, RS, rA); + gpr.Unlock(rA); +} +void JitArm::orx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(Integer) + + ARMReg rA = gpr.R(inst.RA); + ARMReg rS = gpr.R(inst.RS); + ARMReg rB = gpr.R(inst.RB); + ORRS(rA, rS, rB); + if (inst.Rc) + ComputeRC(); +} + +void JitArm::extshx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(Integer) + ARMReg RA, RS; + RA = gpr.R(inst.RA); + RS = gpr.R(inst.RS); + SXTH(RA, RS); + if (inst.Rc){ + CMP(RA, 0); + ComputeRC(); + } +} +void JitArm::extsbx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(Integer) + ARMReg RA, RS; + RA = gpr.R(inst.RA); + RS = gpr.R(inst.RS); + SXTB(RA, RS); + if (inst.Rc){ + CMP(RA, 0); + ComputeRC(); + } +} +void JitArm::cmp (UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(Integer) + + ARMReg RA = gpr.R(inst.RA); + ARMReg RB = gpr.R(inst.RB); + int crf = inst.CRFD; + CMP(RA, RB); + ComputeRC(crf); +} +void JitArm::cmpi(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(Integer) + + ARMReg RA = gpr.R(inst.RA); + ARMReg rA = gpr.GetReg(); + int crf = inst.CRFD; + MOVI2R(rA, inst.SIMM_16); + CMP(RA, rA); + gpr.Unlock(rA); + ComputeRC(crf); +} +void JitArm::cmpli(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(Integer) + + ARMReg RA = gpr.R(inst.RA); + ARMReg rA = gpr.GetReg(); + int crf = inst.CRFD; + MOVI2R(rA, (u32)inst.UIMM); + CMP(RA, rA); + + // Unsigned GenerateRC() + + MOV(rA, 0x2); // Result == 0 + SetCC(CC_LO); MOV(rA, 0x8); // Result < 0 + SetCC(CC_HI); MOV(rA, 0x4); // Result > 0 + SetCC(); + + STRB(R9, rA, STRUCT_OFF(PowerPC::ppcState, cr_fast) + crf); + gpr.Unlock(rA); + +} +// Wrong - 27/10/2012 +void JitArm::negx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(Integer) + + Default(inst);return; + ARMReg RA = gpr.R(inst.RA); + ARMReg RD = gpr.R(inst.RD); + + RSBS(RD, RA, 0); + if (inst.Rc) + { + GenerateRC(); + } + if (inst.OE) + { + BKPT(0x333); + //GenerateOverflow(); + } +} +void JitArm::rlwimix(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(Integer) + + u32 mask = Helper_Mask(inst.MB,inst.ME); + ARMReg RA = gpr.R(inst.RA); + ARMReg RS = gpr.R(inst.RS); + ARMReg rA = gpr.GetReg(); + ARMReg rB = gpr.GetReg(); + MOVI2R(rA, mask); + + Operand2 Shift(32 - inst.SH, ST_ROR, RS); // This rotates left, while ARM has only rotate right, so swap it. + if (inst.Rc) + { + BIC (rB, RA, rA); // RA & ~mask + AND (rA, rA, Shift); + ORRS(RA, rB, rA); + GenerateRC(); + } + else + { + BIC (rB, RA, rA); // RA & ~mask + AND (rA, rA, Shift); + ORR(RA, rB, rA); + } + gpr.Unlock(rA, rB); +} +void JitArm::rlwinmx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(Integer) + + u32 mask = Helper_Mask(inst.MB,inst.ME); + ARMReg RA = gpr.R(inst.RA); + ARMReg RS = gpr.R(inst.RS); + ARMReg rA = gpr.GetReg(); + MOVI2R(rA, mask); + + Operand2 Shift(32 - inst.SH, ST_ROR, RS); // This rotates left, while ARM has only rotate right, so swap it. + if (inst.Rc) + { + ANDS(RA, rA, Shift); + GenerateRC(); + } + else + AND (RA, rA, Shift); + gpr.Unlock(rA); + + //m_GPR[inst.RA] = _rotl(m_GPR[inst.RS],inst.SH) & mask; +} + diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_LoadStore.cpp b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_LoadStore.cpp new file mode 100644 index 0000000000..80c8a96cbb --- /dev/null +++ b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_LoadStore.cpp @@ -0,0 +1,414 @@ +// Copyright (C) 2003 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ + +#include "Common.h" +#include "Thunk.h" + +#include "../../Core.h" +#include "../PowerPC.h" +#include "../../ConfigManager.h" +#include "../../CoreTiming.h" +#include "../PPCTables.h" +#include "ArmEmitter.h" +#include "../../HW/Memmap.h" + + +#include "Jit.h" +#include "JitRegCache.h" +#include "JitAsm.h" + +#ifdef ANDROID +#define FASTMEM 0 +#else +#define FASTMEM 0 +#endif +void JitArm::stw(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(LoadStore) + + ARMReg RS = gpr.R(inst.RS); +#if FASTMEM + // R10 contains the dest address + ARMReg Value = R11; + ARMReg RA; + if (inst.RA) + RA = gpr.R(inst.RA); + MOV(Value, RS); + if (inst.RA) + { + MOVI2R(R10, inst.SIMM_16, false); + ADD(R10, R10, RA); + } + else + { + MOVI2R(R10, (u32)inst.SIMM_16, false); + NOP(1); + } + StoreFromReg(R10, Value, 32, 0); +#else + ARMReg ValueReg = gpr.GetReg(); + ARMReg Addr = gpr.GetReg(); + ARMReg Function = gpr.GetReg(); + + MOV(ValueReg, RS); + if (inst.RA) + { + MOVI2R(Addr, inst.SIMM_16); + ARMReg RA = gpr.R(inst.RA); + ADD(Addr, Addr, RA); + } + else + MOVI2R(Addr, (u32)inst.SIMM_16); + + MOVI2R(Function, (u32)&Memory::Write_U32); + PUSH(4, R0, R1, R2, R3); + MOV(R0, ValueReg); + MOV(R1, Addr); + BL(Function); + POP(4, R0, R1, R2, R3); + gpr.Unlock(ValueReg, Addr, Function); +#endif +} +void JitArm::stwu(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(LoadStore) + + ARMReg RA = gpr.R(inst.RA); + ARMReg RS = gpr.R(inst.RS); + ARMReg ValueReg = gpr.GetReg(); + ARMReg Addr = gpr.GetReg(); + ARMReg Function = gpr.GetReg(); + + MOVI2R(Addr, inst.SIMM_16); + ADD(Addr, Addr, RA); + + // Check and set the update before writing since calling a function can + // mess with the "special registers R11+ which may cause some issues. + LDR(Function, R9, STRUCT_OFF(PowerPC::ppcState, Exceptions)); + CMP(Function, EXCEPTION_DSI); + FixupBranch DoNotWrite = B_CC(CC_EQ); + MOV(RA, Addr); + SetJumpTarget(DoNotWrite); + + MOV(ValueReg, RS); + + MOVI2R(Function, (u32)&Memory::Write_U32); + PUSH(4, R0, R1, R2, R3); + MOV(R0, ValueReg); + MOV(R1, Addr); + BL(Function); + POP(4, R0, R1, R2, R3); + + gpr.Unlock(ValueReg, Addr, Function); +} +void JitArm::StoreFromReg(ARMReg dest, ARMReg value, int accessSize, s32 offset) +{ + ARMReg rA = gpr.GetReg(); + + // All this gets replaced on backpatch + MOVI2R(rA, Memory::MEMVIEW32_MASK, false); // 1-2 + AND(dest, dest, rA); // 3 + MOVI2R(rA, (u32)Memory::base, false); // 4-5 + ADD(dest, dest, rA); // 6 + switch (accessSize) + { + case 32: + REV(value, value); // 7 + break; + case 16: + REV16(value, value); + break; + case 8: + NOP(1); + break; + } + switch (accessSize) + { + case 32: + STR(dest, value); // 8 + break; + case 16: + // Not implemented + break; + case 8: + // Not implemented + break; + } + gpr.Unlock(rA); +} +void JitArm::LoadToReg(ARMReg dest, ARMReg addr, int accessSize, s32 offset) +{ + ARMReg rA = gpr.GetReg(); + MOVI2R(rA, offset, false); // -3 + ADD(addr, addr, rA); // - 1 + + // All this gets replaced on backpatch + MOVI2R(rA, Memory::MEMVIEW32_MASK, false); // 2 + AND(addr, addr, rA); // 3 + MOVI2R(rA, (u32)Memory::base, false); // 5 + ADD(addr, addr, rA); // 6 + switch (accessSize) + { + case 32: + LDR(dest, addr); // 7 + break; + case 16: + LDRH(dest, addr); + break; + case 8: + LDRB(dest, addr); + break; + } + switch (accessSize) + { + case 32: + REV(dest, dest); // 9 + break; + case 16: + REV16(dest, dest); + break; + case 8: + NOP(1); + break; + + } + gpr.Unlock(rA); +} +void JitArm::lbz(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(LoadStore) + + ARMReg rA = gpr.GetReg(); + ARMReg rB = gpr.GetReg(); + ARMReg RD = gpr.R(inst.RD); + LDR(rA, R9, STRUCT_OFF(PowerPC::ppcState, Exceptions)); + CMP(rA, EXCEPTION_DSI); + FixupBranch DoNotLoad = B_CC(CC_EQ); +#if FASTMEM + // Backpatch route + // Gets loaded in to RD + // Address is in R10 + gpr.Unlock(rA, rB); + if (inst.RA) + { + ARMReg RA = gpr.R(inst.RA); + MOV(R10, RA); // - 4 + } + else + MOV(R10, 0); // - 4 + LoadToReg(RD, R10, 8, inst.SIMM_16); +#else + + if (inst.RA) + { + MOVI2R(rB, inst.SIMM_16); + ARMReg RA = gpr.R(inst.RA); + ADD(rB, rB, RA); + } + else + MOVI2R(rB, (u32)inst.SIMM_16); + + MOVI2R(rA, (u32)&Memory::Read_U8); + PUSH(4, R0, R1, R2, R3); + MOV(R0, rB); + BL(rA); + MOV(rA, R0); + POP(4, R0, R1, R2, R3); + MOV(RD, rA); + gpr.Unlock(rA, rB); +#endif + SetJumpTarget(DoNotLoad); +} + +void JitArm::lhz(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(LoadStore) + + ARMReg rA = gpr.GetReg(); + ARMReg rB = gpr.GetReg(); + ARMReg RD = gpr.R(inst.RD); + LDR(rA, R9, STRUCT_OFF(PowerPC::ppcState, Exceptions)); + CMP(rA, EXCEPTION_DSI); + FixupBranch DoNotLoad = B_CC(CC_EQ); +#if 0 // FASTMEM + // Backpatch route + // Gets loaded in to RD + // Address is in R10 + gpr.Unlock(rA, rB); + if (inst.RA) + { + ARMReg RA = gpr.R(inst.RA); + printf("lhz jump to here: 0x%08x\n", (u32)GetCodePtr()); + MOV(R10, RA); // - 4 + } + else + { + printf("lhz jump to here: 0x%08x\n", (u32)GetCodePtr()); + MOV(R10, 0); // - 4 + } + LoadToReg(RD, R10, 16, (u32)inst.SIMM_16); +#else + + if (inst.RA) + { + MOVI2R(rB, inst.SIMM_16); + ARMReg RA = gpr.R(inst.RA); + ADD(rB, rB, RA); + } + else + MOVI2R(rB, (u32)inst.SIMM_16); + + MOVI2R(rA, (u32)&Memory::Read_U16); + PUSH(4, R0, R1, R2, R3); + MOV(R0, rB); + BL(rA); + MOV(rA, R0); + POP(4, R0, R1, R2, R3); + MOV(RD, rA); + gpr.Unlock(rA, rB); +#endif + SetJumpTarget(DoNotLoad); +} +void JitArm::lwz(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(LoadStore) + + ARMReg rA = gpr.GetReg(); + ARMReg rB = gpr.GetReg(); + ARMReg RD = gpr.R(inst.RD); + LDR(rA, R9, STRUCT_OFF(PowerPC::ppcState, Exceptions)); + CMP(rA, EXCEPTION_DSI); + FixupBranch DoNotLoad = B_CC(CC_EQ); + +#if FASTMEM + // Backpatch route + // Gets loaded in to RD + // Address is in R10 + gpr.Unlock(rA, rB); + if (inst.RA) + { + ARMReg RA = gpr.R(inst.RA); + MOV(R10, RA); // - 4 + } + else + MOV(R10, 0); // - 4 + LoadToReg(RD, R10, 32, (u32)inst.SIMM_16); +#else + if (inst.RA) + { + MOVI2R(rB, inst.SIMM_16); + ARMReg RA = gpr.R(inst.RA); + ADD(rB, rB, RA); + } + else + MOVI2R(rB, (u32)inst.SIMM_16); + + MOVI2R(rA, (u32)&Memory::Read_U32); + PUSH(4, R0, R1, R2, R3); + MOV(R0, rB); + BL(rA); + MOV(rA, R0); + POP(4, R0, R1, R2, R3); + MOV(RD, rA); + gpr.Unlock(rA, rB); +#endif + SetJumpTarget(DoNotLoad); + if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSkipIdle && + (inst.hex & 0xFFFF0000) == 0x800D0000 && + (Memory::ReadUnchecked_U32(js.compilerPC + 4) == 0x28000000 || + (SConfig::GetInstance().m_LocalCoreStartupParameter.bWii && Memory::ReadUnchecked_U32(js.compilerPC + 4) == 0x2C000000)) && + Memory::ReadUnchecked_U32(js.compilerPC + 8) == 0x4182fff8) + { + gpr.Flush(); + fpr.Flush(); + + // if it's still 0, we can wait until the next event + TST(RD, RD); + FixupBranch noIdle = B_CC(CC_NEQ); + rA = gpr.GetReg(); + + MOVI2R(rA, (u32)&PowerPC::OnIdle); + MOVI2R(R0, PowerPC::ppcState.gpr[inst.RA] + (s32)(s16)inst.SIMM_16); + BL(rA); + + gpr.Unlock(rA); + WriteExceptionExit(); + + SetJumpTarget(noIdle); + + //js.compilerPC += 8; + return; + } +} +void JitArm::lwzx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(LoadStore) + + ARMReg rA = gpr.GetReg(); + ARMReg rB = gpr.GetReg(); + + ARMReg RB = gpr.R(inst.RB); + ARMReg RD = gpr.R(inst.RD); + LDR(rA, R9, STRUCT_OFF(PowerPC::ppcState, Exceptions)); + CMP(rA, EXCEPTION_DSI); + FixupBranch DoNotLoad = B_CC(CC_EQ); +#if FASTMEM + // Backpatch route + // Gets loaded in to RD + // Address is in R10 + gpr.Unlock(rA, rB); + if (inst.RA) + { + ARMReg RA = gpr.R(inst.RA); + ADD(R10, RA, RB); // - 4 + } + else + MOV(R10, RB); // -4 + LoadToReg(RD, R10, 32, 0); +#else + if (inst.RA) + { + ARMReg RA = gpr.R(inst.RA); + ADD(rB, RA, RB); + } + else + MOV(rB, RB); + + MOVI2R(rA, (u32)&Memory::Read_U32); + PUSH(4, R0, R1, R2, R3); + MOV(R0, rB); + BL(rA); + MOV(rA, R0); + POP(4, R0, R1, R2, R3); + MOV(RD, rA); + gpr.Unlock(rA, rB); +#endif + SetJumpTarget(DoNotLoad); + //// u32 temp = Memory::Read_U32(_inst.RA ? (m_GPR[_inst.RA] + m_GPR[_inst.RB]) : m_GPR[_inst.RB]); +} +void JitArm::icbi(UGeckoInstruction inst) +{ + Default(inst); + WriteExit(js.compilerPC + 4, 0); +} + diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_LoadStoreFloating.cpp b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_LoadStoreFloating.cpp new file mode 100644 index 0000000000..d9e320ab22 --- /dev/null +++ b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_LoadStoreFloating.cpp @@ -0,0 +1,72 @@ +// Copyright (C) 2003 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ + +#include "Common.h" +#include "Thunk.h" + +#include "../../Core.h" +#include "../PowerPC.h" +#include "../../ConfigManager.h" +#include "../../CoreTiming.h" +#include "../PPCTables.h" +#include "ArmEmitter.h" +#include "../../HW/Memmap.h" + + +#include "Jit.h" +#include "JitRegCache.h" +#include "JitFPRCache.h" +#include "JitAsm.h" + +void JitArm::lfs(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(LoadStoreFloating) + Default(inst); return; + + ARMReg rA = gpr.GetReg(); + ARMReg rB = gpr.GetReg(); + LDR(rA, R9, STRUCT_OFF(PowerPC::ppcState, Exceptions)); + CMP(rA, EXCEPTION_DSI); + FixupBranch DoNotLoad = B_CC(CC_EQ); + + if (inst.RA) + { + MOVI2R(rB, inst.SIMM_16); + ARMReg RA = gpr.R(inst.RA); + ADD(rB, rB, RA); + } + else + MOVI2R(rB, (u32)inst.SIMM_16); + + MOVI2R(rA, (u32)&Memory::Read_U32); + PUSH(4, R0, R1, R2, R3); + MOV(R0, rB); + BL(rA); + MOV(rA, R0); + POP(4, R0, R1, R2, R3); + + ARMReg v0 = fpr.R0(inst.FD, false); + ARMReg v1 = fpr.R1(inst.FD, false); + + VMOV(v0, rA, false); + VMOV(v1, rA, false); + + gpr.Unlock(rA, rB); + SetJumpTarget(DoNotLoad); +} + diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_SystemRegisters.cpp b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_SystemRegisters.cpp new file mode 100644 index 0000000000..a99114df67 --- /dev/null +++ b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_SystemRegisters.cpp @@ -0,0 +1,111 @@ +// Copyright (C) 2003 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ +#include "Common.h" +#include "Thunk.h" + +#include "../../Core.h" +#include "../PowerPC.h" +#include "../../CoreTiming.h" +#include "../PPCTables.h" +#include "ArmEmitter.h" + +#include "Jit.h" +#include "JitRegCache.h" +#include "JitAsm.h" + +void JitArm::mtspr(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(SystemRegisters) + + u32 iIndex = (inst.SPRU << 5) | (inst.SPRL & 0x1F); + ARMReg RD = gpr.R(inst.RD); + + switch (iIndex) + { + case SPR_LR: + case SPR_CTR: + case SPR_XER: + // These are safe to do the easy way, see the bottom of this function. + break; + + case SPR_GQR0: + case SPR_GQR0 + 1: + case SPR_GQR0 + 2: + case SPR_GQR0 + 3: + case SPR_GQR0 + 4: + case SPR_GQR0 + 5: + case SPR_GQR0 + 6: + case SPR_GQR0 + 7: + // Prevent recompiler from compiling in old quantizer values. + // If the value changed, destroy all blocks using this quantizer + // This will create a little bit of block churn, but hopefully not too bad. + { + /* + MOV(32, R(EAX), M(&PowerPC::ppcState.spr[iIndex])); // Load old value + CMP(32, R(EAX), gpr.R(inst.RD)); + FixupBranch skip_destroy = J_CC(CC_E, false); + int gqr = iIndex - SPR_GQR0; + ABI_CallFunctionC(ProtectFunction(&Jit64::DestroyBlocksWithFlag, 1), (u32)BLOCK_USE_GQR0 << gqr); + SetJumpTarget(skip_destroy);*/ + } + // TODO - break block if quantizers are written to. + default: + Default(inst); + return; + } + + // OK, this is easy. + ARMReg rA = gpr.GetReg(false); + MOVI2R(rA, (u32)&PowerPC::ppcState.spr); + STR(rA, RD, iIndex * 4); +} + +void JitArm::mfspr(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(SystemRegisters) + + u32 iIndex = (inst.SPRU << 5) | (inst.SPRL & 0x1F); + ARMReg RD = gpr.R(inst.RD); + switch (iIndex) + { + case SPR_WPAR: + case SPR_DEC: + case SPR_TL: + case SPR_TU: + Default(inst); + return; + default: + ARMReg rA = gpr.GetReg(false); + MOVI2R(rA, (u32)&PowerPC::ppcState.spr); + LDR(RD, rA, iIndex * 4); + break; + } +} +void JitArm::mtmsr(UGeckoInstruction inst) +{ + INSTRUCTION_START + // Don't interpret this, if we do we get thrown out + //JITDISABLE(SystemRegisters) + + ARMReg rA = gpr.GetReg(); + MOVI2R(rA, (u32)&MSR); + STR(rA, gpr.R(inst.RS)); + gpr.Unlock(rA); + WriteExit(js.compilerPC + 4, 0); +} diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Tables.cpp b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Tables.cpp new file mode 100644 index 0000000000..1929705df2 --- /dev/null +++ b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Tables.cpp @@ -0,0 +1,502 @@ +// Copyright (C) 2003 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ + +#include "Jit.h" +#include "../JitInterface.h" +#include "JitArm_Tables.h" + +// Should be moved in to the Jit class +typedef void (JitArm::*_Instruction) (UGeckoInstruction instCode); + +static _Instruction dynaOpTable[64]; +static _Instruction dynaOpTable4[1024]; +static _Instruction dynaOpTable19[1024]; +static _Instruction dynaOpTable31[1024]; +static _Instruction dynaOpTable59[32]; +static _Instruction dynaOpTable63[1024]; + +void JitArm::DynaRunTable4(UGeckoInstruction _inst) {(this->*dynaOpTable4 [_inst.SUBOP10])(_inst);} +void JitArm::DynaRunTable19(UGeckoInstruction _inst) {(this->*dynaOpTable19[_inst.SUBOP10])(_inst);} +void JitArm::DynaRunTable31(UGeckoInstruction _inst) {(this->*dynaOpTable31[_inst.SUBOP10])(_inst);} +void JitArm::DynaRunTable59(UGeckoInstruction _inst) {(this->*dynaOpTable59[_inst.SUBOP5 ])(_inst);} +void JitArm::DynaRunTable63(UGeckoInstruction _inst) {(this->*dynaOpTable63[_inst.SUBOP10])(_inst);} + +struct GekkoOPTemplate +{ + int opcode; + _Instruction Inst; + //GekkoOPInfo opinfo; // Doesn't need opinfo, Interpreter fills it out +}; + +static GekkoOPTemplate primarytable[] = +{ + {4, &JitArm::DynaRunTable4}, //"RunTable4", OPTYPE_SUBTABLE | (4<<24), 0}}, + {19, &JitArm::DynaRunTable19}, //"RunTable19", OPTYPE_SUBTABLE | (19<<24), 0}}, + {31, &JitArm::DynaRunTable31}, //"RunTable31", OPTYPE_SUBTABLE | (31<<24), 0}}, + {59, &JitArm::DynaRunTable59}, //"RunTable59", OPTYPE_SUBTABLE | (59<<24), 0}}, + {63, &JitArm::DynaRunTable63}, //"RunTable63", OPTYPE_SUBTABLE | (63<<24), 0}}, + + {16, &JitArm::bcx}, //"bcx", OPTYPE_SYSTEM, FL_ENDBLOCK}}, + {18, &JitArm::bx}, //"bx", OPTYPE_SYSTEM, FL_ENDBLOCK}}, + + {1, &JitArm::HLEFunction}, //"HLEFunction", OPTYPE_SYSTEM, FL_ENDBLOCK}}, + {2, &JitArm::Default}, //"DynaBlock", OPTYPE_SYSTEM, 0}}, + {3, &JitArm::Break}, //"twi", OPTYPE_SYSTEM, FL_ENDBLOCK}}, + {17, &JitArm::sc}, //"sc", OPTYPE_SYSTEM, FL_ENDBLOCK, 1}}, + + {7, &JitArm::mulli}, //"mulli", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_RC_BIT, 2}}, + {8, &JitArm::Default}, //"subfic", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA}}, + {10, &JitArm::cmpli}, //"cmpli", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn}}, + {11, &JitArm::cmpi}, //"cmpi", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn}}, + {12, &JitArm::Default}, //"addic", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA}}, + {13, &JitArm::Default}, //"addic_rc", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CR0}}, + {14, &JitArm::addi}, //"addi", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A0}}, + {15, &JitArm::addis}, //"addis", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A0}}, + + {20, &JitArm::rlwimix}, //"rlwimix", OPTYPE_INTEGER, FL_OUT_A | FL_IN_A | FL_IN_S | FL_RC_BIT}}, + {21, &JitArm::rlwinmx}, //"rlwinmx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}}, + {23, &JitArm::Default}, //"rlwnmx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_IN_B | FL_RC_BIT}}, + + {24, &JitArm::ori}, //"ori", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S}}, + {25, &JitArm::oris}, //"oris", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S}}, + {26, &JitArm::Default}, //"xori", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S}}, + {27, &JitArm::Default}, //"xoris", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S}}, + {28, &JitArm::Default}, //"andi_rc", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_SET_CR0}}, + {29, &JitArm::Default}, //"andis_rc", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_SET_CR0}}, + + {32, &JitArm::lwz}, //"lwz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}}, + {33, &JitArm::Default}, //"lwzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}}, + {34, &JitArm::lbz}, //"lbz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}}, + {35, &JitArm::Default}, //"lbzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}}, + {40, &JitArm::lhz}, //"lhz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}}, + {41, &JitArm::Default}, //"lhzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}}, + {42, &JitArm::Default}, //"lha", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}}, + {43, &JitArm::Default}, //"lhau", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}}, + + {44, &JitArm::Default}, //"sth", OPTYPE_STORE, FL_IN_A | FL_IN_S}}, + {45, &JitArm::Default}, //"sthu", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_S}}, + {36, &JitArm::stw}, //"stw", OPTYPE_STORE, FL_IN_A | FL_IN_S}}, + {37, &JitArm::stwu}, //"stwu", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_S}}, + {38, &JitArm::Default}, //"stb", OPTYPE_STORE, FL_IN_A | FL_IN_S}}, + {39, &JitArm::Default}, //"stbu", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_S}}, + + {46, &JitArm::Default}, //"lmw", OPTYPE_SYSTEM, FL_EVIL, 10}}, + {47, &JitArm::Default}, //"stmw", OPTYPE_SYSTEM, FL_EVIL, 10}}, + + {48, &JitArm::lfs}, //"lfs", OPTYPE_LOADFP, FL_IN_A}}, + {49, &JitArm::Default}, //"lfsu", OPTYPE_LOADFP, FL_OUT_A | FL_IN_A}}, + {50, &JitArm::Default}, //"lfd", OPTYPE_LOADFP, FL_IN_A}}, + {51, &JitArm::Default}, //"lfdu", OPTYPE_LOADFP, FL_OUT_A | FL_IN_A}}, + + {52, &JitArm::Default}, //"stfs", OPTYPE_STOREFP, FL_IN_A}}, + {53, &JitArm::Default}, //"stfsu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}}, + {54, &JitArm::Default}, //"stfd", OPTYPE_STOREFP, FL_IN_A}}, + {55, &JitArm::Default}, //"stfdu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}}, + + {56, &JitArm::Default}, //"psq_l", OPTYPE_PS, FL_IN_A}}, + {57, &JitArm::Default}, //"psq_lu", OPTYPE_PS, FL_OUT_A | FL_IN_A}}, + {60, &JitArm::Default}, //"psq_st", OPTYPE_PS, FL_IN_A}}, + {61, &JitArm::Default}, //"psq_stu", OPTYPE_PS, FL_OUT_A | FL_IN_A}}, + + //missing: 0, 5, 6, 9, 22, 30, 62, 58 + {0, &JitArm::Default}, //"unknown_instruction", OPTYPE_UNKNOWN, 0}}, + {5, &JitArm::Default}, //"unknown_instruction", OPTYPE_UNKNOWN, 0}}, + {6, &JitArm::Default}, //"unknown_instruction", OPTYPE_UNKNOWN, 0}}, + {9, &JitArm::Default}, //"unknown_instruction", OPTYPE_UNKNOWN, 0}}, + {22, &JitArm::Default}, //"unknown_instruction", OPTYPE_UNKNOWN, 0}}, + {30, &JitArm::Default}, //"unknown_instruction", OPTYPE_UNKNOWN, 0}}, + {62, &JitArm::Default}, //"unknown_instruction", OPTYPE_UNKNOWN, 0}}, + {58, &JitArm::Default}, //"unknown_instruction", OPTYPE_UNKNOWN, 0}}, +}; + +static GekkoOPTemplate table4[] = +{ //SUBOP10 + {0, &JitArm::Default}, //"ps_cmpu0", OPTYPE_PS, FL_SET_CRn}}, + {32, &JitArm::Default}, //"ps_cmpo0", OPTYPE_PS, FL_SET_CRn}}, + {40, &JitArm::Default}, //"ps_neg", OPTYPE_PS, FL_RC_BIT}}, + {136, &JitArm::Default}, //"ps_nabs", OPTYPE_PS, FL_RC_BIT}}, + {264, &JitArm::Default}, //"ps_abs", OPTYPE_PS, FL_RC_BIT}}, + {64, &JitArm::Default}, //"ps_cmpu1", OPTYPE_PS, FL_RC_BIT}}, + {72, &JitArm::Default}, //"ps_mr", OPTYPE_PS, FL_RC_BIT}}, + {96, &JitArm::Default}, //"ps_cmpo1", OPTYPE_PS, FL_RC_BIT}}, + {528, &JitArm::Default}, //"ps_merge00", OPTYPE_PS, FL_RC_BIT}}, + {560, &JitArm::Default}, //"ps_merge01", OPTYPE_PS, FL_RC_BIT}}, + {592, &JitArm::Default}, //"ps_merge10", OPTYPE_PS, FL_RC_BIT}}, + {624, &JitArm::Default}, //"ps_merge11", OPTYPE_PS, FL_RC_BIT}}, + + {1014, &JitArm::Default}, //"dcbz_l", OPTYPE_SYSTEM, 0}}, +}; + +static GekkoOPTemplate table4_2[] = +{ + {10, &JitArm::Default}, //"ps_sum0", OPTYPE_PS, 0}}, + {11, &JitArm::Default}, //"ps_sum1", OPTYPE_PS, 0}}, + {12, &JitArm::Default}, //"ps_muls0", OPTYPE_PS, 0}}, + {13, &JitArm::Default}, //"ps_muls1", OPTYPE_PS, 0}}, + {14, &JitArm::Default}, //"ps_madds0", OPTYPE_PS, 0}}, + {15, &JitArm::Default}, //"ps_madds1", OPTYPE_PS, 0}}, + {18, &JitArm::Default}, //"ps_div", OPTYPE_PS, 0, 16}}, + {20, &JitArm::Default}, //"ps_sub", OPTYPE_PS, 0}}, + {21, &JitArm::Default}, //"ps_add", OPTYPE_PS, 0}}, + {23, &JitArm::Default}, //"ps_sel", OPTYPE_PS, 0}}, + {24, &JitArm::Default}, //"ps_res", OPTYPE_PS, 0}}, + {25, &JitArm::Default}, //"ps_mul", OPTYPE_PS, 0}}, + {26, &JitArm::Default}, //"ps_rsqrte", OPTYPE_PS, 0, 1}}, + {28, &JitArm::Default}, //"ps_msub", OPTYPE_PS, 0}}, + {29, &JitArm::Default}, //"ps_madd", OPTYPE_PS, 0}}, + {30, &JitArm::Default}, //"ps_nmsub", OPTYPE_PS, 0}}, + {31, &JitArm::Default}, //"ps_nmadd", OPTYPE_PS, 0}}, +}; + + +static GekkoOPTemplate table4_3[] = +{ + {6, &JitArm::Default}, //"psq_lx", OPTYPE_PS, 0}}, + {7, &JitArm::Default}, //"psq_stx", OPTYPE_PS, 0}}, + {38, &JitArm::Default}, //"psq_lux", OPTYPE_PS, 0}}, + {39, &JitArm::Default}, //"psq_stux", OPTYPE_PS, 0}}, +}; + +static GekkoOPTemplate table19[] = +{ + {528, &JitArm::bcctrx}, //"bcctrx", OPTYPE_BRANCH, FL_ENDBLOCK}}, + {16, &JitArm::bclrx}, //"bclrx", OPTYPE_BRANCH, FL_ENDBLOCK}}, + {257, &JitArm::Default}, //"crand", OPTYPE_CR, FL_EVIL}}, + {129, &JitArm::Default}, //"crandc", OPTYPE_CR, FL_EVIL}}, + {289, &JitArm::Default}, //"creqv", OPTYPE_CR, FL_EVIL}}, + {225, &JitArm::Default}, //"crnand", OPTYPE_CR, FL_EVIL}}, + {33, &JitArm::Default}, //"crnor", OPTYPE_CR, FL_EVIL}}, + {449, &JitArm::Default}, //"cror", OPTYPE_CR, FL_EVIL}}, + {417, &JitArm::Default}, //"crorc", OPTYPE_CR, FL_EVIL}}, + {193, &JitArm::Default}, //"crxor", OPTYPE_CR, FL_EVIL}}, + + {150, &JitArm::DoNothing}, //"isync", OPTYPE_ICACHE, FL_EVIL}}, + {0, &JitArm::Default}, //"mcrf", OPTYPE_SYSTEM, FL_EVIL}}, + + {50, &JitArm::rfi}, //"rfi", OPTYPE_SYSTEM, FL_ENDBLOCK | FL_CHECKEXCEPTIONS, 1}}, + {18, &JitArm::Break}, //"rfid", OPTYPE_SYSTEM, FL_ENDBLOCK | FL_CHECKEXCEPTIONS}} +}; + + +static GekkoOPTemplate table31[] = +{ + {28, &JitArm::Default}, //"andx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, + {60, &JitArm::Default}, //"andcx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, + {444, &JitArm::orx}, //"orx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, + {124, &JitArm::Default}, //"norx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, + {316, &JitArm::Default}, //"xorx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, + {412, &JitArm::Default}, //"orcx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, + {476, &JitArm::Default}, //"nandx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, + {284, &JitArm::Default}, //"eqvx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, + {0, &JitArm::cmp}, //"cmp", OPTYPE_INTEGER, FL_IN_AB | FL_SET_CRn}}, + {32, &JitArm::Default}, //"cmpl", OPTYPE_INTEGER, FL_IN_AB | FL_SET_CRn}}, + {26, &JitArm::Default}, //"cntlzwx",OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}}, + {922, &JitArm::extshx}, //"extshx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}}, + {954, &JitArm::extsbx}, //"extsbx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}}, + {536, &JitArm::Default}, //"srwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}}, + {792, &JitArm::Default}, //"srawx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}}, + {824, &JitArm::Default}, //"srawix", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}}, + {24, &JitArm::Default}, //"slwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}}, + + {54, &JitArm::Default}, //"dcbst", OPTYPE_DCACHE, 0, 4}}, + {86, &JitArm::Default}, //"dcbf", OPTYPE_DCACHE, 0, 4}}, + {246, &JitArm::Default}, //"dcbtst", OPTYPE_DCACHE, 0, 1}}, + {278, &JitArm::Default}, //"dcbt", OPTYPE_DCACHE, 0, 1}}, + {470, &JitArm::Default}, //"dcbi", OPTYPE_DCACHE, 0, 4}}, + {758, &JitArm::Default}, //"dcba", OPTYPE_DCACHE, 0, 4}}, + {1014, &JitArm::Default}, //"dcbz", OPTYPE_DCACHE, 0, 4}}, + + //load word + {23, &JitArm::lwzx}, //"lwzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, + {55, &JitArm::Default}, //"lwzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}}, + + //load halfword + {279, &JitArm::Default}, //"lhzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, + {311, &JitArm::Default}, //"lhzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}}, + + //load halfword signextend + {343, &JitArm::Default}, //"lhax", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, + {375, &JitArm::Default}, //"lhaux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}}, + + //load byte + {87, &JitArm::Default}, //"lbzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, + {119, &JitArm::Default}, //"lbzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}}, + + //load byte reverse + {534, &JitArm::Default}, //"lwbrx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, + {790, &JitArm::Default}, //"lhbrx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, + + // Conditional load/store (Wii SMP) + {150, &JitArm::Default}, //"stwcxd", OPTYPE_STORE, FL_EVIL | FL_SET_CR0}}, + {20, &JitArm::Default}, //"lwarx", OPTYPE_LOAD, FL_EVIL | FL_OUT_D | FL_IN_A0B | FL_SET_CR0}}, + + //load string (interpret these) + {533, &JitArm::Default}, //"lswx", OPTYPE_LOAD, FL_EVIL | FL_IN_A | FL_OUT_D}}, + {597, &JitArm::Default}, //"lswi", OPTYPE_LOAD, FL_EVIL | FL_IN_AB | FL_OUT_D}}, + + //store word + {151, &JitArm::Default}, //"stwx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}}, + {183, &JitArm::Default}, //"stwux", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_B}}, + + //store halfword + {407, &JitArm::Default}, //"sthx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}}, + {439, &JitArm::Default}, //"sthux", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_B}}, + + //store byte + {215, &JitArm::Default}, //"stbx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}}, + {247, &JitArm::Default}, //"stbux", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_B}}, + + //store bytereverse + {662, &JitArm::Default}, //"stwbrx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}}, + {918, &JitArm::Default}, //"sthbrx", OPTYPE_STORE, FL_IN_A | FL_IN_B}}, + + {661, &JitArm::Default}, //"stswx", OPTYPE_STORE, FL_EVIL}}, + {725, &JitArm::Default}, //"stswi", OPTYPE_STORE, FL_EVIL}}, + + // fp load/store + {535, &JitArm::Default}, //"lfsx", OPTYPE_LOADFP, FL_IN_A0 | FL_IN_B}}, + {567, &JitArm::Default}, //"lfsux", OPTYPE_LOADFP, FL_IN_A | FL_IN_B}}, + {599, &JitArm::Default}, //"lfdx", OPTYPE_LOADFP, FL_IN_A0 | FL_IN_B}}, + {631, &JitArm::Default}, //"lfdux", OPTYPE_LOADFP, FL_IN_A | FL_IN_B}}, + + {663, &JitArm::Default}, //"stfsx", OPTYPE_STOREFP, FL_IN_A0 | FL_IN_B}}, + {695, &JitArm::Default}, //"stfsux", OPTYPE_STOREFP, FL_IN_A | FL_IN_B}}, + {727, &JitArm::Default}, //"stfdx", OPTYPE_STOREFP, FL_IN_A0 | FL_IN_B}}, + {759, &JitArm::Default}, //"stfdux", OPTYPE_STOREFP, FL_IN_A | FL_IN_B}}, + {983, &JitArm::Default}, //"stfiwx", OPTYPE_STOREFP, FL_IN_A0 | FL_IN_B}}, + + {19, &JitArm::Default}, //"mfcr", OPTYPE_SYSTEM, FL_OUT_D}}, + {83, &JitArm::Default}, //"mfmsr", OPTYPE_SYSTEM, FL_OUT_D}}, + {144, &JitArm::Default}, //"mtcrf", OPTYPE_SYSTEM, 0}}, + {146, &JitArm::mtmsr}, //"mtmsr", OPTYPE_SYSTEM, FL_ENDBLOCK}}, + {210, &JitArm::Default}, //"mtsr", OPTYPE_SYSTEM, 0}}, + {242, &JitArm::Default}, //"mtsrin", OPTYPE_SYSTEM, 0}}, + {339, &JitArm::mfspr}, //"mfspr", OPTYPE_SPR, FL_OUT_D}}, + {467, &JitArm::mtspr}, //"mtspr", OPTYPE_SPR, 0, 2}}, + {371, &JitArm::Default}, //"mftb", OPTYPE_SYSTEM, FL_OUT_D | FL_TIMER}}, + {512, &JitArm::Default}, //"mcrxr", OPTYPE_SYSTEM, 0}}, + {595, &JitArm::Default}, //"mfsr", OPTYPE_SYSTEM, FL_OUT_D, 2}}, + {659, &JitArm::Default}, //"mfsrin", OPTYPE_SYSTEM, FL_OUT_D, 2}}, + + {4, &JitArm::Break}, //"tw", OPTYPE_SYSTEM, FL_ENDBLOCK, 1}}, + {598, &JitArm::Default}, //"sync", OPTYPE_SYSTEM, 0, 2}}, + {982, &JitArm::icbi}, //"icbi", OPTYPE_SYSTEM, FL_ENDBLOCK, 3}}, + + // Unused instructions on GC + {310, &JitArm::Default}, //"eciwx", OPTYPE_INTEGER, FL_RC_BIT}}, + {438, &JitArm::Default}, //"ecowx", OPTYPE_INTEGER, FL_RC_BIT}}, + {854, &JitArm::Default}, //"eieio", OPTYPE_INTEGER, FL_RC_BIT}}, + {306, &JitArm::Default}, //"tlbie", OPTYPE_SYSTEM, 0}}, + {370, &JitArm::Default}, //"tlbia", OPTYPE_SYSTEM, 0}}, + {566, &JitArm::Default}, //"tlbsync", OPTYPE_SYSTEM, 0}}, +}; + +static GekkoOPTemplate table31_2[] = +{ + {266, &JitArm::addx}, //"addx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}}, + {778, &JitArm::addx}, //"addx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}}, + {10, &JitArm::Default}, //"addcx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}}, + {138, &JitArm::Default}, //"addex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, + {234, &JitArm::Default}, //"addmex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, + {202, &JitArm::Default}, //"addzex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, + {491, &JitArm::Default}, //"divwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 39}}, + {1003, &JitArm::Default}, //"divwox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 39}}, + {459, &JitArm::Default}, //"divwux", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 39}}, + {971, &JitArm::Default}, //"divwuox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 39}}, + {75, &JitArm::Default}, //"mulhwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 4}}, + {11, &JitArm::Default}, //"mulhwux", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 4}}, + {235, &JitArm::Default}, //"mullwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 4}}, + {747, &JitArm::Default}, //"mullwox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 4}}, + {104, &JitArm::negx}, //"negx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}}, + {40, &JitArm::Default}, //"subfx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}}, + {552, &JitArm::Default}, //"subox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}}, + {8, &JitArm::Default}, //"subfcx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}}, + {136, &JitArm::Default}, //"subfex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, + {232, &JitArm::Default}, //"subfmex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, + {200, &JitArm::Default}, //"subfzex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, +}; + +static GekkoOPTemplate table59[] = +{ + {18, &JitArm::Default}, //{"fdivsx", OPTYPE_FPU, FL_RC_BIT_F, 16}}, + {20, &JitArm::Default}, //"fsubsx", OPTYPE_FPU, FL_RC_BIT_F}}, + {21, &JitArm::Default}, //"faddsx", OPTYPE_FPU, FL_RC_BIT_F}}, +// {22, &JitArm::Default}, //"fsqrtsx", OPTYPE_FPU, FL_RC_BIT_F}}, // Not implemented on gekko + {24, &JitArm::Default}, //"fresx", OPTYPE_FPU, FL_RC_BIT_F}}, + {25, &JitArm::Default}, //"fmulsx", OPTYPE_FPU, FL_RC_BIT_F}}, + {28, &JitArm::Default}, //"fmsubsx", OPTYPE_FPU, FL_RC_BIT_F}}, + {29, &JitArm::Default}, //"fmaddsx", OPTYPE_FPU, FL_RC_BIT_F}}, + {30, &JitArm::Default}, //"fnmsubsx", OPTYPE_FPU, FL_RC_BIT_F}}, + {31, &JitArm::Default}, //"fnmaddsx", OPTYPE_FPU, FL_RC_BIT_F}}, +}; + +static GekkoOPTemplate table63[] = +{ + {264, &JitArm::fabsx}, //"fabsx", OPTYPE_FPU, FL_RC_BIT_F}}, + {32, &JitArm::Default}, //"fcmpo", OPTYPE_FPU, FL_RC_BIT_F}}, + {0, &JitArm::Default}, //"fcmpu", OPTYPE_FPU, FL_RC_BIT_F}}, + {14, &JitArm::Default}, //"fctiwx", OPTYPE_FPU, FL_RC_BIT_F}}, + {15, &JitArm::Default}, //"fctiwzx", OPTYPE_FPU, FL_RC_BIT_F}}, + {72, &JitArm::fmrx}, //"fmrx", OPTYPE_FPU, FL_RC_BIT_F}}, + {136, &JitArm::Default}, //"fnabsx", OPTYPE_FPU, FL_RC_BIT_F}}, + {40, &JitArm::Default}, //"fnegx", OPTYPE_FPU, FL_RC_BIT_F}}, + {12, &JitArm::Default}, //"frspx", OPTYPE_FPU, FL_RC_BIT_F}}, + + {64, &JitArm::Default}, //"mcrfs", OPTYPE_SYSTEMFP, 0}}, + {583, &JitArm::Default}, //"mffsx", OPTYPE_SYSTEMFP, 0}}, + {70, &JitArm::Default}, //"mtfsb0x", OPTYPE_SYSTEMFP, 0, 2}}, + {38, &JitArm::Default}, //"mtfsb1x", OPTYPE_SYSTEMFP, 0, 2}}, + {134, &JitArm::Default}, //"mtfsfix", OPTYPE_SYSTEMFP, 0, 2}}, + {711, &JitArm::Default}, //"mtfsfx", OPTYPE_SYSTEMFP, 0, 2}}, +}; + +static GekkoOPTemplate table63_2[] = +{ + {18, &JitArm::Default}, //"fdivx", OPTYPE_FPU, FL_RC_BIT_F, 30}}, + {20, &JitArm::Default}, //"fsubx", OPTYPE_FPU, FL_RC_BIT_F}}, + {21, &JitArm::faddx}, //"faddx", OPTYPE_FPU, FL_RC_BIT_F}}, + {22, &JitArm::Default}, //"fsqrtx", OPTYPE_FPU, FL_RC_BIT_F}}, + {23, &JitArm::Default}, //"fselx", OPTYPE_FPU, FL_RC_BIT_F}}, + {25, &JitArm::Default}, //"fmulx", OPTYPE_FPU, FL_RC_BIT_F}}, + {26, &JitArm::Default}, //"frsqrtex", OPTYPE_FPU, FL_RC_BIT_F}}, + {28, &JitArm::Default}, //"fmsubx", OPTYPE_FPU, FL_RC_BIT_F}}, + {29, &JitArm::Default}, //"fmaddx", OPTYPE_FPU, FL_RC_BIT_F}}, + {30, &JitArm::Default}, //"fnmsubx", OPTYPE_FPU, FL_RC_BIT_F}}, + {31, &JitArm::Default}, //"fnmaddx", OPTYPE_FPU, FL_RC_BIT_F}}, +}; + + +namespace JitArmTables +{ + +void CompileInstruction(PPCAnalyst::CodeOp & op) +{ + JitArm *jitarm = (JitArm *)jit; + (jitarm->*dynaOpTable[op.inst.OPCD])(op.inst); + GekkoOPInfo *info = op.opinfo; + if (info) { +#ifdef OPLOG + if (!strcmp(info->opname, OP_TO_LOG)){ ///"mcrfs" + rsplocations.push_back(jit.js.compilerPC); + } +#endif + info->compileCount++; + info->lastUse = jit->js.compilerPC; + } +} + +void InitTables() +{ + // once initialized, tables are read-only + static bool initialized = false; + if (initialized) + return; + + //clear + for (int i = 0; i < 32; i++) + { + dynaOpTable59[i] = &JitArm::unknown_instruction; + } + + for (int i = 0; i < 1024; i++) + { + dynaOpTable4 [i] = &JitArm::unknown_instruction; + dynaOpTable19[i] = &JitArm::unknown_instruction; + dynaOpTable31[i] = &JitArm::unknown_instruction; + dynaOpTable63[i] = &JitArm::unknown_instruction; + } + + for (int i = 0; i < (int)(sizeof(primarytable) / sizeof(GekkoOPTemplate)); i++) + { + dynaOpTable[primarytable[i].opcode] = primarytable[i].Inst; + } + + for (int i = 0; i < 32; i++) + { + int fill = i << 5; + for (int j = 0; j < (int)(sizeof(table4_2) / sizeof(GekkoOPTemplate)); j++) + { + int op = fill+table4_2[j].opcode; + dynaOpTable4[op] = table4_2[j].Inst; + } + } + + for (int i = 0; i < 16; i++) + { + int fill = i << 6; + for (int j = 0; j < (int)(sizeof(table4_3) / sizeof(GekkoOPTemplate)); j++) + { + int op = fill+table4_3[j].opcode; + dynaOpTable4[op] = table4_3[j].Inst; + } + } + + for (int i = 0; i < (int)(sizeof(table4) / sizeof(GekkoOPTemplate)); i++) + { + int op = table4[i].opcode; + dynaOpTable4[op] = table4[i].Inst; + } + + for (int i = 0; i < (int)(sizeof(table31) / sizeof(GekkoOPTemplate)); i++) + { + int op = table31[i].opcode; + dynaOpTable31[op] = table31[i].Inst; + } + + for (int i = 0; i < 1; i++) + { + int fill = i << 9; + for (int j = 0; j < (int)(sizeof(table31_2) / sizeof(GekkoOPTemplate)); j++) + { + int op = fill + table31_2[j].opcode; + dynaOpTable31[op] = table31_2[j].Inst; + } + } + + for (int i = 0; i < (int)(sizeof(table19) / sizeof(GekkoOPTemplate)); i++) + { + int op = table19[i].opcode; + dynaOpTable19[op] = table19[i].Inst; + } + + for (int i = 0; i < (int)(sizeof(table59) / sizeof(GekkoOPTemplate)); i++) + { + int op = table59[i].opcode; + dynaOpTable59[op] = table59[i].Inst; + } + + for (int i = 0; i < (int)(sizeof(table63) / sizeof(GekkoOPTemplate)); i++) + { + int op = table63[i].opcode; + dynaOpTable63[op] = table63[i].Inst; + } + + for (int i = 0; i < 32; i++) + { + int fill = i << 5; + for (int j = 0; j < (int)(sizeof(table63_2) / sizeof(GekkoOPTemplate)); j++) + { + int op = fill + table63_2[j].opcode; + dynaOpTable63[op] = table63_2[j].Inst; + } + } + + initialized = true; + +} + +} // namespace diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Tables.h b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Tables.h new file mode 100644 index 0000000000..4904ce3984 --- /dev/null +++ b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Tables.h @@ -0,0 +1,29 @@ +// Copyright (C) 2003 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ + +#ifndef JITARM_TABLES_H +#define JITARM_TABLES_H + +#include "../Gekko.h" +#include "../PPCTables.h" + +namespace JitArmTables +{ + void CompileInstruction(PPCAnalyst::CodeOp & op); + void InitTables(); +} +#endif diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/JitAsm.cpp b/Source/Core/Core/Src/PowerPC/JitArm32/JitAsm.cpp new file mode 100644 index 0000000000..e45058b57b --- /dev/null +++ b/Source/Core/Core/Src/PowerPC/JitArm32/JitAsm.cpp @@ -0,0 +1,174 @@ +// Copyright (C) 2003 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ + + +#include "../../HW/Memmap.h" + +#include "../PowerPC.h" +#include "../../CoreTiming.h" +#include "MemoryUtil.h" + +#include "Jit.h" +#include "../JitCommon/JitCache.h" + +#include "../../HW/GPFifo.h" +#include "../../Core.h" +#include "JitAsm.h" +#include "ArmEmitter.h" + +using namespace ArmGen; + +//TODO - make an option +//#if _DEBUG +// bool enableDebug = false; +//#else +// bool enableDebug = false; +//#endif + +JitArmAsmRoutineManager asm_routines; + +void JitArmAsmRoutineManager::Generate() +{ + enterCode = GetCodePtr(); + PUSH(2, R11, _LR); // R11 is frame pointer in Debug. + + MOVI2R(R0, (u32)&CoreTiming::downcount); + MOVI2R(R9, (u32)&PowerPC::ppcState); + + FixupBranch skipToRealDispatcher = B(); + dispatcher = GetCodePtr(); + printf("Dispatcher is %p\n", dispatcher); + + // Downcount Check + // The result of slice decrementation should be in flags if somebody jumped here + // IMPORTANT - We jump on negative, not carry!!! + FixupBranch bail = B_CC(CC_MI); + + SetJumpTarget(skipToRealDispatcher); + dispatcherNoCheck = GetCodePtr(); + + // This block of code gets the address of the compiled block of code + // It runs though to the compiling portion if it isn't found + LDR(R12, R9, STRUCT_OFF(PowerPC::ppcState, pc));// Load the current PC into R12 + + MOVI2R(R14, JIT_ICACHE_MASK); // Potential for optimization + AND(R12, R12, R14); // R12 contains PC & JIT_ICACHE_MASK here. + // Confirmed good to this point 08-03-12 + + MOVI2R(R14, (u32)jit->GetBlockCache()->GetICache()); + // Confirmed That this loads the base iCache Location correctly 08-04-12 + + LDR(R12, R14, R12, true, true); // R12 contains iCache[PC & JIT_ICACHE_MASK] here + // R12 Confirmed this is the correct iCache Location loaded. + TST(R12, 0xFC); // Test to see if it is a JIT block. + + SetCC(CC_EQ); // Only run next part if R12 is zero + // Success, it is our Jitblock. + MOVI2R(R14, (u32)jit->GetBlockCache()->GetCodePointers()); + // LDR R14 right here to get CodePointers()[0] pointer. + REV(R12, R12); // Reversing this gives us our JITblock. + LSL(R12, R12, 2); // Multiply by four because address locations are u32 in size + LDR(R14, R14, R12, true, true); // Load the block address in to R14 + + B(R14); + + FixupBranch NextBlock = B(); // Jump to end so we can start a new block + SetCC(); // Return to always executing codes + + // If we get to this point, that means that we don't have the block cached to execute + // So call ArmJit to compile the block and then execute it. + MOVI2R(R14, (u32)&Jit); + LDR(R0, R9, STRUCT_OFF(PowerPC::ppcState, pc)); + BL(R14); + + B(dispatcherNoCheck); + + // fpException() + // Floating Point Exception Check, Jumped to if false + fpException = GetCodePtr(); + LDR(R0, R9, STRUCT_OFF(PowerPC::ppcState, Exceptions)); + ORR(R0, R0, EXCEPTION_FPU_UNAVAILABLE); + STR(R9, R0, STRUCT_OFF(PowerPC::ppcState, Exceptions)); + QuickCallFunction(R14, (void*)&PowerPC::CheckExceptions); + LDR(R0, R9, STRUCT_OFF(PowerPC::ppcState, npc)); + STR(R9, R0, STRUCT_OFF(PowerPC::ppcState, pc)); + B(dispatcher); + + SetJumpTarget(bail); + doTiming = GetCodePtr(); + // XXX: In JIT64, Advance() gets called /after/ the exception checking + // once it jumps back to the start of outerLoop + QuickCallFunction(R14, (void*)&CoreTiming::Advance); + + // Does exception checking + testExceptions = GetCodePtr(); + LDR(R0, R9, STRUCT_OFF(PowerPC::ppcState, pc)); + STR(R9, R0, STRUCT_OFF(PowerPC::ppcState, npc)); + QuickCallFunction(R14, (void*)&PowerPC::CheckExceptions); + LDR(R0, R9, STRUCT_OFF(PowerPC::ppcState, npc)); + STR(R9, R0, STRUCT_OFF(PowerPC::ppcState, pc)); + // Check the state pointer to see if we are exiting + // Gets checked on every exception check + MOVI2R(R0, (u32)PowerPC::GetStatePtr()); + MVN(R1, 0); + LDR(R0, R0); + TST(R0, R1); + FixupBranch Exit = B_CC(CC_NEQ); + + SetJumpTarget(NextBlock); + B(dispatcher); + + SetJumpTarget(Exit); + + POP(2, R11, _PC); + FlushIcache(); +} + +void JitArmAsmRoutineManager::GenerateCommon() +{ +/* fifoDirectWrite8 = AlignCode4(); + GenFifoWrite(8); + fifoDirectWrite16 = AlignCode4(); + GenFifoWrite(16); + fifoDirectWrite32 = AlignCode4(); + GenFifoWrite(32); + fifoDirectWriteFloat = AlignCode4(); + GenFifoFloatWrite(); + fifoDirectWriteXmm64 = AlignCode4(); + GenFifoXmm64Write(); + + GenQuantizedLoads(); + GenQuantizedStores(); + GenQuantizedSingleStores(); +*/ + //CMPSD(R(XMM0), M(&zero), + // TODO + + // Fast write routines - special case the most common hardware write + // TODO: use this. + // Even in x86, the param values will be in the right registers. + /* + const u8 *fastMemWrite8 = AlignCode16(); + CMP(32, R(ABI_PARAM2), Imm32(0xCC008000)); + FixupBranch skip_fast_write = J_CC(CC_NE, false); + MOV(32, EAX, M(&m_gatherPipeCount)); + MOV(8, MDisp(EAX, (u32)&m_gatherPipe), ABI_PARAM1); + ADD(32, 1, M(&m_gatherPipeCount)); + RET(); + SetJumpTarget(skip_fast_write); + CALL((void *)&Memory::Write_U8);*/ +} diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/JitAsm.h b/Source/Core/Core/Src/PowerPC/JitArm32/JitAsm.h new file mode 100644 index 0000000000..9a61e9e653 --- /dev/null +++ b/Source/Core/Core/Src/PowerPC/JitArm32/JitAsm.h @@ -0,0 +1,43 @@ +// Copyright (C) 2003 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ + +#ifndef _JITARMASM_H +#define _JITARMASM_H +#include "ArmEmitter.h" +#include "../JitCommon/JitAsmCommon.h" +using namespace ArmGen; +class JitArmAsmRoutineManager : public CommonAsmRoutinesBase, public ARMXCodeBlock +{ +private: + void Generate(); + void GenerateCommon(); + +public: + void Init() { + AllocCodeSpace(8192); + Generate(); + WriteProtect(); + } + + void Shutdown() { + FreeCodeSpace(); + } +}; + +extern JitArmAsmRoutineManager asm_routines; + +#endif // _JIT64ASM_H diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/JitRegCache.cpp b/Source/Core/Core/Src/PowerPC/JitArm32/JitRegCache.cpp new file mode 100644 index 0000000000..9c6ffdbac4 --- /dev/null +++ b/Source/Core/Core/Src/PowerPC/JitArm32/JitRegCache.cpp @@ -0,0 +1,167 @@ +// Copyright (C) 2003 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ + +#include "JitRegCache.h" + +ArmRegCache::ArmRegCache() +{ + emit = 0; +} + +void ArmRegCache::Init(ARMXEmitter *emitter) +{ + emit = emitter; + ARMReg *PPCRegs = GetPPCAllocationOrder(NUMPPCREG); + ARMReg *Regs = GetAllocationOrder(NUMARMREG); + for(u8 a = 0; a < 32; ++a) + { + // This gives us the memory locations of the gpr registers so we can + // load them. + regs[a].location = (u8*)&PowerPC::ppcState.gpr[a]; + regs[a].UsesLeft = 0; + } + for(u8 a = 0; a < NUMPPCREG; ++a) + { + ArmCRegs[a].PPCReg = 33; + ArmCRegs[a].Reg = PPCRegs[a]; + ArmCRegs[a].LastLoad = 0; + } + for(u8 a = 0; a < NUMARMREG; ++a) + { + ArmRegs[a].Reg = Regs[a]; + ArmRegs[a].free = true; + } +} +void ArmRegCache::Start(PPCAnalyst::BlockRegStats &stats) +{ + for(u8 a = 0; a < NUMPPCREG; ++a) + { + ArmCRegs[a].PPCReg = 33; + ArmCRegs[a].LastLoad = 0; + } + for(u8 a = 0; a < 32; ++a) + regs[a].UsesLeft = stats.GetTotalNumAccesses(a); +} +ARMReg *ArmRegCache::GetPPCAllocationOrder(int &count) +{ + // This will return us the allocation order of the registers we can use on + // the ppc side. + static ARMReg allocationOrder[] = + { + R0, R1, R2, R3, R4, R5, R6, R7, R8 + }; + count = sizeof(allocationOrder) / sizeof(const int); + return allocationOrder; +} +ARMReg *ArmRegCache::GetAllocationOrder(int &count) +{ + // This will return us the allocation order of the registers we can use on + // the host side. + static ARMReg allocationOrder[] = + { + R14, R12, R11, R10 + }; + count = sizeof(allocationOrder) / sizeof(const int); + return allocationOrder; +} + +ARMReg ArmRegCache::GetReg(bool AutoLock) +{ + for(u8 a = 0; a < NUMARMREG; ++a) + if(ArmRegs[a].free) + { + // Alright, this one is free + if (AutoLock) + ArmRegs[a].free = false; + return ArmRegs[a].Reg; + } + // Uh Oh, we have all them locked.... + _assert_msg_(_DYNA_REC_, false, "All available registers are locked dumb dumb"); + return R0; +} +void ArmRegCache::Lock(ARMReg Reg) +{ + for(u8 RegNum = 0; RegNum < NUMARMREG; ++RegNum) + if(ArmRegs[RegNum].Reg == Reg) + { + _assert_msg_(_DYNA_REC, ArmRegs[RegNum].free, "This register is already locked"); + ArmRegs[RegNum].free = false; + } + _assert_msg_(_DYNA_REC, false, "Register %d can't be used with lock", Reg); +} +void ArmRegCache::Unlock(ARMReg R0, ARMReg R1, ARMReg R2, ARMReg R3) +{ + for(u8 RegNum = 0; RegNum < NUMARMREG; ++RegNum) + { + if(ArmRegs[RegNum].Reg == R0) + { + _assert_msg_(_DYNA_REC, !ArmRegs[RegNum].free, "This register is already unlocked"); + ArmRegs[RegNum].free = true; + } + if( R1 != INVALID_REG && ArmRegs[RegNum].Reg == R1) ArmRegs[RegNum].free = true; + if( R2 != INVALID_REG && ArmRegs[RegNum].Reg == R2) ArmRegs[RegNum].free = true; + if( R3 != INVALID_REG && ArmRegs[RegNum].Reg == R3) ArmRegs[RegNum].free = true; + } +} + +ARMReg ArmRegCache::R(u32 preg) +{ + u32 HighestUsed = 0; + u8 Num = 0; + for(u8 a = 0; a < NUMPPCREG; ++a){ + ++ArmCRegs[a].LastLoad; + if (ArmCRegs[a].LastLoad > HighestUsed) + { + HighestUsed = ArmCRegs[a].LastLoad; + Num = a; + } + } + // Check if already Loaded + for(u8 a = 0; a < NUMPPCREG; ++a) + if (ArmCRegs[a].PPCReg == preg) + { + ArmCRegs[a].LastLoad = 0; + return ArmCRegs[a].Reg; + } + // Check if we have a free register + for (u8 a = 0; a < NUMPPCREG; ++a) + if (ArmCRegs[a].PPCReg == 33) + { + emit->LDR(ArmCRegs[a].Reg, R9, STRUCT_OFF(PowerPC::ppcState, gpr) + preg * 4); + ArmCRegs[a].PPCReg = preg; + ArmCRegs[a].LastLoad = 0; + return ArmCRegs[a].Reg; + } + // Alright, we couldn't get a free space, dump that least used register + emit->STR(R9, ArmCRegs[Num].Reg, STRUCT_OFF(PowerPC::ppcState, gpr) + ArmCRegs[Num].PPCReg * 4); + emit->LDR(ArmCRegs[Num].Reg, R9, STRUCT_OFF(PowerPC::ppcState, gpr) + preg * 4); + ArmCRegs[Num].PPCReg = preg; + ArmCRegs[Num].LastLoad = 0; + return ArmCRegs[Num].Reg; +} + +void ArmRegCache::Flush() +{ + for(u8 a = 0; a < NUMPPCREG; ++a) + if (ArmCRegs[a].PPCReg != 33) + { + emit->STR(R9, ArmCRegs[a].Reg, STRUCT_OFF(PowerPC::ppcState, gpr) + ArmCRegs[a].PPCReg * 4); + ArmCRegs[a].PPCReg = 33; + ArmCRegs[a].LastLoad = 0; + } +} + diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/JitRegCache.h b/Source/Core/Core/Src/PowerPC/JitArm32/JitRegCache.h new file mode 100644 index 0000000000..7292ce1581 --- /dev/null +++ b/Source/Core/Core/Src/PowerPC/JitArm32/JitRegCache.h @@ -0,0 +1,92 @@ +// Copyright (C) 2003 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ + +#ifndef _JITARMREGCACHE_H +#define _JITARMREGCACHE_H + +#include "ArmEmitter.h" +#include "../Gekko.h" +#include "../PPCAnalyst.h" + +using namespace ArmGen; +// This ARM Register cache actually pre loads the most used registers before +// the block to increase speed since every memory load requires two +// instructions to load it. We are going to use R0-RMAX as registers for the +// use of PPC Registers. +// Allocation order as follows +#define ARMREGS 16 +// Allocate R0 to R9 for PPC first. +// For General registers on the host side, start with R14 and go down as we go +// R13 is reserved for our stack pointer, don't ever use that. Unless you save +// it +// So we have R14, R12, R11, R10 to work with instructions + +struct PPCCachedReg +{ + const u8 *location; + u32 UsesLeft; +}; +struct JRCPPC +{ + u32 PPCReg; // Tied to which PPC Register + bool PS1; + ARMReg Reg; // Tied to which ARM Register + u32 LastLoad; +}; +struct JRCReg +{ + ARMReg Reg; // Which reg this is. + bool free; +}; +class ArmRegCache +{ +private: + PPCCachedReg regs[32]; + JRCPPC ArmCRegs[ARMREGS]; + JRCReg ArmRegs[ARMREGS]; // Four registers remaining + + int NUMPPCREG; + int NUMARMREG; + + ARMReg *GetAllocationOrder(int &count); + ARMReg *GetPPCAllocationOrder(int &count); + +protected: + ARMXEmitter *emit; + +public: + ArmRegCache(); + ~ArmRegCache() {} + + void Init(ARMXEmitter *emitter); + void Start(PPCAnalyst::BlockRegStats &stats); + + void SetEmitter(ARMXEmitter *emitter) {emit = emitter;} + + ARMReg GetReg(bool AutoLock = true); // Return a ARM register we can use. + void Lock(ARMReg reg); + void Unlock(ARMReg R0, ARMReg R1 = INVALID_REG, ARMReg R2 = INVALID_REG, ARMReg R3 = + INVALID_REG); + void Flush(); + ARMReg R(u32 preg); // Returns a cached register + +}; + + + + +#endif diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/JitAsmCommon.cpp b/Source/Core/Core/Src/PowerPC/JitCommon/JitAsmCommon.cpp index 34b9a681e9..7f509dbf0e 100644 --- a/Source/Core/Core/Src/PowerPC/JitCommon/JitAsmCommon.cpp +++ b/Source/Core/Core/Src/PowerPC/JitCommon/JitAsmCommon.cpp @@ -15,7 +15,7 @@ // Official SVN repository and contact information can be found at // http://code.google.com/p/dolphin-emu/ -#include "ABI.h" +#include "x64ABI.h" #include "Thunk.h" #include "CPUDetect.h" #include "x64Emitter.h" @@ -26,7 +26,7 @@ #include "../../CoreTiming.h" #include "MemoryUtil.h" -#include "ABI.h" +#include "x64ABI.h" #include "../JitCommon/JitCache.h" #include "../../HW/GPFifo.h" diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/JitAsmCommon.h b/Source/Core/Core/Src/PowerPC/JitCommon/JitAsmCommon.h index cbec27d2aa..a610a03326 100644 --- a/Source/Core/Core/Src/PowerPC/JitCommon/JitAsmCommon.h +++ b/Source/Core/Core/Src/PowerPC/JitCommon/JitAsmCommon.h @@ -21,16 +21,8 @@ #include "../JitCommon/Jit_Util.h" #include "Thunk.h" -class CommonAsmRoutines : public EmuCodeBlock { -protected: - void GenQuantizedLoads(); - void GenQuantizedStores(); - void GenQuantizedSingleStores(); - +class CommonAsmRoutinesBase { public: - void GenFifoWrite(int size); - void GenFifoXmm64Write(); - void GenFifoFloatWrite(); const u8 *fifoDirectWrite8; const u8 *fifoDirectWrite16; @@ -72,8 +64,23 @@ public: // In: XMM0: Bottom 32-bit slot holds the float to be written. const u8 **singleStoreQuantized; +}; + +class CommonAsmRoutines : public CommonAsmRoutinesBase, public EmuCodeBlock +{ +protected: + void GenQuantizedLoads(); + void GenQuantizedStores(); + void GenQuantizedSingleStores(); + +public: + void GenFifoWrite(int size); + void GenFifoXmm64Write(); + void GenFifoFloatWrite(); + private: ThunkManager thunks; + }; #endif diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/JitBackpatch.cpp b/Source/Core/Core/Src/PowerPC/JitCommon/JitBackpatch.cpp index 3042cfedba..76e791a94c 100644 --- a/Source/Core/Core/Src/PowerPC/JitCommon/JitBackpatch.cpp +++ b/Source/Core/Core/Src/PowerPC/JitCommon/JitBackpatch.cpp @@ -25,7 +25,7 @@ #include "../../HW/Memmap.h" #include "x64Emitter.h" -#include "ABI.h" +#include "x64ABI.h" #include "Thunk.h" #include "x64Analyzer.h" @@ -160,7 +160,7 @@ const u8 *TrampolineCache::GetWriteTrampoline(const InstructionInfo &info) // 1) It's really necessary. We don't know anything about the context. // 2) It doesn't really hurt. Only instructions that access I/O will get these, and there won't be // that many of them in a typical program/game. -const u8 *JitBase::BackPatch(u8 *codePtr, int accessType, u32 emAddress, void *ctx_void) +const u8 *Jitx86Base::BackPatch(u8 *codePtr, int accessType, u32 emAddress, void *ctx_void) { #ifdef _M_X64 CONTEXT *ctx = (CONTEXT *)ctx_void; diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/JitBackpatch.h b/Source/Core/Core/Src/PowerPC/JitCommon/JitBackpatch.h index 4b880eb86d..871a112c6c 100644 --- a/Source/Core/Core/Src/PowerPC/JitCommon/JitBackpatch.h +++ b/Source/Core/Core/Src/PowerPC/JitCommon/JitBackpatch.h @@ -34,6 +34,9 @@ // from the real context. struct CONTEXT { + #ifdef _M_ARM + u32 reg_pc; + #else #ifdef _M_X64 u64 Rip; u64 Rax; @@ -41,6 +44,7 @@ u32 Eip; u32 Eax; #endif + #endif }; #endif diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/JitBase.h b/Source/Core/Core/Src/PowerPC/JitCommon/JitBase.h index 5352c06d04..158171be0f 100644 --- a/Source/Core/Core/Src/PowerPC/JitCommon/JitBase.h +++ b/Source/Core/Core/Src/PowerPC/JitCommon/JitBase.h @@ -32,12 +32,9 @@ #define JIT_OPCODE 0 -class JitBase : public CPUCoreBase, public EmuCodeBlock +class JitBase : public CPUCoreBase { protected: - JitBlockCache blocks; - TrampolineCache trampolines; - struct JitOptions { bool optimizeStack; @@ -85,14 +82,29 @@ public: // This should probably be removed from public: JitOptions jo; JitState js; - - JitBlockCache *GetBlockCache() { return &blocks; } + + virtual JitBaseBlockCache *GetBlockCache() = 0; virtual void Jit(u32 em_address) = 0; + virtual const u8 *BackPatch(u8 *codePtr, int accessType, u32 em_address, void *ctx) = 0; + + virtual const CommonAsmRoutinesBase *GetAsmRoutines() = 0; + + virtual bool IsInCodeSpace(u8 *ptr) = 0; +}; + +class Jitx86Base : public JitBase, public EmuCodeBlock +{ +protected: + JitBlockCache blocks; + TrampolineCache trampolines; +public: + JitBlockCache *GetBlockCache() { return &blocks; } + const u8 *BackPatch(u8 *codePtr, int accessType, u32 em_address, void *ctx); - virtual const CommonAsmRoutines *GetAsmRoutines() = 0; + bool IsInCodeSpace(u8 *ptr) { return IsInSpace(ptr); } }; extern JitBase *jit; diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/JitCache.cpp b/Source/Core/Core/Src/PowerPC/JitCommon/JitCache.cpp index d409518bf9..415b37ca61 100644 --- a/Source/Core/Core/Src/PowerPC/JitCommon/JitCache.cpp +++ b/Source/Core/Core/Src/PowerPC/JitCommon/JitCache.cpp @@ -32,15 +32,13 @@ #include "MemoryUtil.h" #include "../../HW/Memmap.h" +#include "../JitInterface.h" #include "../../CoreTiming.h" #include "../PowerPC.h" #include "../PPCTables.h" #include "../PPCAnalyst.h" -#include "x64Emitter.h" -#include "x64Analyzer.h" - #include "JitCache.h" #include "JitBase.h" @@ -68,12 +66,12 @@ bool JitBlock::ContainsAddress(u32 em_address) return (em_address >= originalAddress && em_address < originalAddress + 4 * originalSize); } - bool JitBlockCache::IsFull() const + bool JitBaseBlockCache::IsFull() const { return GetNumBlocks() >= MAX_NUM_BLOCKS - 1; } - void JitBlockCache::Init() + void JitBaseBlockCache::Init() { MAX_NUM_BLOCKS = 65536*2; @@ -91,11 +89,11 @@ bool JitBlock::ContainsAddress(u32 em_address) } else { - PanicAlert("JitBlockCache::Init() - iCache is already initialized"); + PanicAlert("JitBaseBlockCache::Init() - iCache is already initialized"); } if (iCache == 0 || iCacheEx == 0 || iCacheVMEM == 0) { - PanicAlert("JitBlockCache::Init() - unable to allocate iCache"); + PanicAlert("JitBaseBlockCache::Init() - unable to allocate iCache"); } memset(iCache, JIT_ICACHE_INVALID_BYTE, JIT_ICACHE_SIZE); memset(iCacheEx, JIT_ICACHE_INVALID_BYTE, JIT_ICACHEEX_SIZE); @@ -104,7 +102,7 @@ bool JitBlock::ContainsAddress(u32 em_address) Clear(); } - void JitBlockCache::Shutdown() + void JitBaseBlockCache::Shutdown() { delete[] blocks; delete[] blockCodePointers; @@ -133,7 +131,7 @@ bool JitBlock::ContainsAddress(u32 em_address) // This clears the JIT cache. It's called from JitCache.cpp when the JIT cache // is full and when saving and loading states. - void JitBlockCache::Clear() + void JitBaseBlockCache::Clear() { if (IsFull()) Core::DisplayMessage("Clearing block cache.", 3000); @@ -151,7 +149,7 @@ bool JitBlock::ContainsAddress(u32 em_address) memset(blockCodePointers, 0, sizeof(u8*)*MAX_NUM_BLOCKS); } - void JitBlockCache::ClearSafe() + void JitBaseBlockCache::ClearSafe() { #ifdef JIT_UNLIMITED_ICACHE memset(iCache, JIT_ICACHE_INVALID_BYTE, JIT_ICACHE_SIZE); @@ -160,7 +158,7 @@ bool JitBlock::ContainsAddress(u32 em_address) #endif } - /*void JitBlockCache::DestroyBlocksWithFlag(BlockFlag death_flag) + /*void JitBaseBlockCache::DestroyBlocksWithFlag(BlockFlag death_flag) { for (int i = 0; i < num_blocks; i++) { @@ -171,23 +169,23 @@ bool JitBlock::ContainsAddress(u32 em_address) } }*/ - void JitBlockCache::Reset() + void JitBaseBlockCache::Reset() { Shutdown(); Init(); } - JitBlock *JitBlockCache::GetBlock(int no) + JitBlock *JitBaseBlockCache::GetBlock(int no) { return &blocks[no]; } - int JitBlockCache::GetNumBlocks() const + int JitBaseBlockCache::GetNumBlocks() const { return num_blocks; } - bool JitBlockCache::RangeIntersect(int s1, int e1, int s2, int e2) const + bool JitBaseBlockCache::RangeIntersect(int s1, int e1, int s2, int e2) const { // check if any endpoint is inside the other range if ((s1 >= s2 && s1 <= e2) || @@ -199,7 +197,7 @@ bool JitBlock::ContainsAddress(u32 em_address) return false; } - int JitBlockCache::AllocateBlock(u32 em_address) + int JitBaseBlockCache::AllocateBlock(u32 em_address) { JitBlock &b = blocks[num_blocks]; b.invalid = false; @@ -215,12 +213,12 @@ bool JitBlock::ContainsAddress(u32 em_address) return num_blocks - 1; } - void JitBlockCache::FinalizeBlock(int block_num, bool block_link, const u8 *code_ptr) + void JitBaseBlockCache::FinalizeBlock(int block_num, bool block_link, const u8 *code_ptr) { blockCodePointers[block_num] = code_ptr; JitBlock &b = blocks[block_num]; - b.originalFirstOpcode = Memory::Read_Opcode_JIT(b.originalAddress); - Memory::Write_Opcode_JIT(b.originalAddress, (JIT_OPCODE << 26) | block_num); + b.originalFirstOpcode = JitInterface::Read_Opcode_JIT(b.originalAddress); + JitInterface::Write_Opcode_JIT(b.originalAddress, (JIT_OPCODE << 26) | block_num); // Convert the logical address to a physical address for the block map u32 pAddr = b.originalAddress & 0x1FFFFFFF; @@ -264,29 +262,29 @@ bool JitBlock::ContainsAddress(u32 em_address) #endif } - const u8 **JitBlockCache::GetCodePointers() + const u8 **JitBaseBlockCache::GetCodePointers() { return blockCodePointers; } #ifdef JIT_UNLIMITED_ICACHE - u8* JitBlockCache::GetICache() + u8* JitBaseBlockCache::GetICache() { return iCache; } - u8* JitBlockCache::GetICacheEx() + u8* JitBaseBlockCache::GetICacheEx() { return iCacheEx; } - u8* JitBlockCache::GetICacheVMEM() + u8* JitBaseBlockCache::GetICacheVMEM() { return iCacheVMEM; } #endif - int JitBlockCache::GetBlockNumberFromStartAddress(u32 addr) + int JitBaseBlockCache::GetBlockNumberFromStartAddress(u32 addr) { if (!blocks) return -1; @@ -317,24 +315,24 @@ bool JitBlock::ContainsAddress(u32 em_address) return inst; } - void JitBlockCache::GetBlockNumbersFromAddress(u32 em_address, std::vector *block_numbers) + void JitBaseBlockCache::GetBlockNumbersFromAddress(u32 em_address, std::vector *block_numbers) { for (int i = 0; i < num_blocks; i++) if (blocks[i].ContainsAddress(em_address)) block_numbers->push_back(i); } - u32 JitBlockCache::GetOriginalFirstOp(int block_num) + u32 JitBaseBlockCache::GetOriginalFirstOp(int block_num) { if (block_num >= num_blocks) { - //PanicAlert("JitBlockCache::GetOriginalFirstOp - block_num = %u is out of range", block_num); + //PanicAlert("JitBaseBlockCache::GetOriginalFirstOp - block_num = %u is out of range", block_num); return block_num; } return blocks[block_num].originalFirstOpcode; } - CompiledCode JitBlockCache::GetCompiledCodeFromBlock(int block_num) + CompiledCode JitBaseBlockCache::GetCompiledCodeFromBlock(int block_num) { return (CompiledCode)blockCodePointers[block_num]; } @@ -345,7 +343,7 @@ bool JitBlock::ContainsAddress(u32 em_address) //Can be faster by doing a queue for blocks to link up, and only process those //Should probably be done - void JitBlockCache::LinkBlockExits(int i) + void JitBaseBlockCache::LinkBlockExits(int i) { JitBlock &b = blocks[i]; if (b.invalid) @@ -360,8 +358,7 @@ bool JitBlock::ContainsAddress(u32 em_address) int destinationBlock = GetBlockNumberFromStartAddress(b.exitAddress[e]); if (destinationBlock != -1) { - XEmitter emit(b.exitPtrs[e]); - emit.JMP(blocks[destinationBlock].checkedEntry, true); + WriteLinkBlock(b.exitPtrs[e], blocks[destinationBlock].checkedEntry); b.linkStatus[e] = true; } } @@ -370,7 +367,7 @@ bool JitBlock::ContainsAddress(u32 em_address) using namespace std; - void JitBlockCache::LinkBlock(int i) + void JitBaseBlockCache::LinkBlock(int i) { LinkBlockExits(i); JitBlock &b = blocks[i]; @@ -386,7 +383,7 @@ bool JitBlock::ContainsAddress(u32 em_address) } } - void JitBlockCache::UnlinkBlock(int i) + void JitBaseBlockCache::UnlinkBlock(int i) { JitBlock &b = blocks[i]; pair::iterator, multimap::iterator> ppp; @@ -403,7 +400,7 @@ bool JitBlock::ContainsAddress(u32 em_address) } } - void JitBlockCache::DestroyBlock(int block_num, bool invalidate) + void JitBaseBlockCache::DestroyBlock(int block_num, bool invalidate) { if (block_num < 0 || block_num >= num_blocks) { @@ -419,7 +416,7 @@ bool JitBlock::ContainsAddress(u32 em_address) } b.invalid = true; #ifdef JIT_UNLIMITED_ICACHE - Memory::Write_Opcode_JIT(b.originalAddress, b.originalFirstOpcode?b.originalFirstOpcode:JIT_ICACHE_INVALID_WORD); + JitInterface::Write_Opcode_JIT(b.originalAddress, b.originalFirstOpcode?b.originalFirstOpcode:JIT_ICACHE_INVALID_WORD); #else if (Memory::ReadFast32(b.originalAddress) == block_num) Memory::WriteUnchecked_U32(b.originalFirstOpcode, b.originalAddress); @@ -430,18 +427,10 @@ bool JitBlock::ContainsAddress(u32 em_address) // Send anyone who tries to run this block back to the dispatcher. // Not entirely ideal, but .. pretty good. // Spurious entrances from previously linked blocks can only come through checkedEntry - XEmitter emit((u8 *)b.checkedEntry); - emit.MOV(32, M(&PC), Imm32(b.originalAddress)); - emit.JMP(jit->GetAsmRoutines()->dispatcher, true); - // this is not needed really - /* - emit.SetCodePtr((u8 *)blockCodePointers[blocknum]); - emit.MOV(32, M(&PC), Imm32(b.originalAddress)); - emit.JMP(asm_routines.dispatcher, true); - */ + WriteDestroyBlock(b.checkedEntry, b.originalAddress); } - void JitBlockCache::InvalidateICache(u32 address, const u32 length) + void JitBaseBlockCache::InvalidateICache(u32 address, const u32 length) { // Convert the logical address to a physical address for the block map u32 pAddr = address & 0x1FFFFFFF; @@ -449,7 +438,7 @@ bool JitBlock::ContainsAddress(u32 em_address) // Optimize the common case of length == 32 which is used by Interpreter::dcb* bool destroy_block = true; if (length == 32) - { + { if (!valid_block[pAddr / 32]) destroy_block = false; else @@ -462,7 +451,7 @@ bool JitBlock::ContainsAddress(u32 em_address) { std::map, u32>::iterator it1 = block_map.lower_bound(std::make_pair(pAddr, 0)), it2 = it1; while (it2 != block_map.end() && it2->first.second < pAddr + length) - { + { #ifdef JIT_UNLIMITED_ICACHE JitBlock &b = blocks[it2->second]; if (b.originalAddress & JIT_ICACHE_VMEM_BIT) @@ -516,3 +505,14 @@ bool JitBlock::ContainsAddress(u32 em_address) } #endif } + void JitBlockCache::WriteLinkBlock(u8* location, const u8* address) + { + XEmitter emit(location); + emit.JMP(address, true); + } + void JitBlockCache::WriteDestroyBlock(const u8* location, u32 address) + { + XEmitter emit((u8 *)location); + emit.MOV(32, M(&PC), Imm32(address)); + emit.JMP(jit->GetAsmRoutines()->dispatcher, true); + } diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/JitCache.h b/Source/Core/Core/Src/PowerPC/JitCommon/JitCache.h index 4214f33875..b148cb8090 100644 --- a/Source/Core/Core/Src/PowerPC/JitCommon/JitCache.h +++ b/Source/Core/Core/Src/PowerPC/JitCommon/JitCache.h @@ -78,7 +78,8 @@ struct JitBlock typedef void (*CompiledCode)(); -class JitBlockCache + +class JitBaseBlockCache { const u8 **blockCodePointers; JitBlock *blocks; @@ -97,9 +98,13 @@ class JitBlockCache void LinkBlockExits(int i); void LinkBlock(int i); void UnlinkBlock(int i); + + // Virtual for overloaded + virtual void WriteLinkBlock(u8* location, const u8* address) = 0; + virtual void WriteDestroyBlock(const u8* location, u32 address) = 0; public: - JitBlockCache() : + JitBaseBlockCache() : blockCodePointers(0), blocks(0), num_blocks(0), #ifdef JIT_UNLIMITED_ICACHE iCache(0), iCacheEx(0), iCacheVMEM(0), @@ -146,4 +151,11 @@ public: //void DestroyBlocksWithFlag(BlockFlag death_flag); }; +// x86 BlockCache +class JitBlockCache : public JitBaseBlockCache +{ +private: + void WriteLinkBlock(u8* location, const u8* address); + void WriteDestroyBlock(const u8* location, u32 address); +}; #endif diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.cpp b/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.cpp index cc1327c695..f5c4cb022b 100644 --- a/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.cpp +++ b/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.cpp @@ -25,7 +25,7 @@ #include "../../HW/Memmap.h" #include "../PPCTables.h" #include "x64Emitter.h" -#include "ABI.h" +#include "x64ABI.h" #include "JitBase.h" #include "Jit_Util.h" diff --git a/Source/Core/Core/Src/PowerPC/JitInterface.cpp b/Source/Core/Core/Src/PowerPC/JitInterface.cpp new file mode 100644 index 0000000000..8fcdd9837f --- /dev/null +++ b/Source/Core/Core/Src/PowerPC/JitInterface.cpp @@ -0,0 +1,350 @@ +// Copyright (C) 2003 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ + +#include + +#ifdef _WIN32 +#include +#endif + +#include "JitInterface.h" +#include "JitCommon/JitBase.h" + +#ifndef _M_GENERIC +#include "Jit64IL/JitIL.h" +#include "Jit64/Jit.h" +#include "Jit64/Jit64_Tables.h" +#include "Jit64IL/JitIL_Tables.h" +#endif + +#ifdef _M_ARM +#include "JitArm32/Jit.h" +#include "JitArm32/JitArm_Tables.h" +#endif + +#include "Profiler.h" +#include "PPCSymbolDB.h" +#include "HW/Memmap.h" +#include "ConfigManager.h" + +bool bFakeVMEM = false; +bool bMMU = false; + +namespace JitInterface +{ + void DoState(PointerWrap &p) + { + if (jit && p.GetMode() == PointerWrap::MODE_READ) + jit->GetBlockCache()->ClearSafe(); + } + CPUCoreBase *InitJitCore(int core) + { + bFakeVMEM = SConfig::GetInstance().m_LocalCoreStartupParameter.iTLBHack == 1; + bMMU = SConfig::GetInstance().m_LocalCoreStartupParameter.bMMU; + + CPUCoreBase *ptr = NULL; + switch(core) + { + #ifndef _M_GENERIC + case 1: + { + ptr = new Jit64(); + break; + } + case 2: + { + ptr = new JitIL(); + break; + } + #endif + #ifdef _M_ARM + case 3: + { + ptr = new JitArm(); + break; + } + #endif + default: + { + PanicAlert("Unrecognizable cpu_core: %d", core); + return NULL; + break; + } + } + jit = static_cast(ptr); + jit->Init(); + return ptr; + } + void InitTables(int core) + { + switch(core) + { + #ifndef _M_GENERIC + case 1: + { + Jit64Tables::InitTables(); + break; + } + case 2: + { + JitILTables::InitTables(); + break; + } + #endif + #ifdef _M_ARM + case 3: + { + JitArmTables::InitTables(); + break; + } + #endif + default: + { + PanicAlert("Unrecognizable cpu_core: %d", core); + break; + } + } + } + CPUCoreBase *GetCore() + { + return jit; + } + + void WriteProfileResults(const char *filename) + { + // Can't really do this with no jit core available + #ifndef _M_GENERIC + + std::vector stats; + stats.reserve(jit->GetBlockCache()->GetNumBlocks()); + u64 cost_sum = 0; + #ifdef _WIN32 + u64 timecost_sum = 0; + u64 countsPerSec; + QueryPerformanceFrequency((LARGE_INTEGER *)&countsPerSec); + #endif + for (int i = 0; i < jit->GetBlockCache()->GetNumBlocks(); i++) + { + const JitBlock *block = jit->GetBlockCache()->GetBlock(i); + // Rough heuristic. Mem instructions should cost more. + u64 cost = block->originalSize * (block->runCount / 4); + #ifdef _WIN32 + u64 timecost = block->ticCounter; + #endif + // Todo: tweak. + if (block->runCount >= 1) + stats.push_back(BlockStat(i, cost)); + cost_sum += cost; + #ifdef _WIN32 + timecost_sum += timecost; + #endif + } + + sort(stats.begin(), stats.end()); + File::IOFile f(filename, "w"); + if (!f) + { + PanicAlert("failed to open %s", filename); + return; + } + fprintf(f.GetHandle(), "origAddr\tblkName\tcost\ttimeCost\tpercent\ttimePercent\tOvAllinBlkTime(ms)\tblkCodeSize\n"); + for (unsigned int i = 0; i < stats.size(); i++) + { + const JitBlock *block = jit->GetBlockCache()->GetBlock(stats[i].blockNum); + if (block) + { + std::string name = g_symbolDB.GetDescription(block->originalAddress); + double percent = 100.0 * (double)stats[i].cost / (double)cost_sum; + #ifdef _WIN32 + double timePercent = 100.0 * (double)block->ticCounter / (double)timecost_sum; + fprintf(f.GetHandle(), "%08x\t%s\t%llu\t%llu\t%.2lf\t%llf\t%lf\t%i\n", + block->originalAddress, name.c_str(), stats[i].cost, + block->ticCounter, percent, timePercent, + (double)block->ticCounter*1000.0/(double)countsPerSec, block->codeSize); + #else + fprintf(f.GetHandle(), "%08x\t%s\t%llu\t???\t%.2lf\t???\t???\t%i\n", + block->originalAddress, name.c_str(), stats[i].cost, percent, block->codeSize); + #endif + } + } + #endif + } + bool IsInCodeSpace(u8 *ptr) + { + return jit->IsInCodeSpace(ptr); + } + const u8 *BackPatch(u8 *codePtr, int accessType, u32 em_address, void *ctx) + { + return jit->BackPatch(codePtr, accessType, em_address, ctx); + } + + void ClearCache() + { + if (jit) + jit->ClearCache(); + } + void ClearSafe() + { + if (jit) + jit->GetBlockCache()->ClearSafe(); + } + + void InvalidateICache(u32 address, u32 size) + { + if (jit) + jit->GetBlockCache()->InvalidateICache(address, size); + } + + + // Memory functions + u32 Read_Opcode_JIT_Uncached(const u32 _Address) + { + u8* iCache; + u32 addr; + if (_Address & JIT_ICACHE_VMEM_BIT) + { + iCache = jit->GetBlockCache()->GetICacheVMEM(); + addr = _Address & JIT_ICACHE_MASK; + } + else if (_Address & JIT_ICACHE_EXRAM_BIT) + { + iCache = jit->GetBlockCache()->GetICacheEx(); + addr = _Address & JIT_ICACHEEX_MASK; + } + else + { + iCache = jit->GetBlockCache()->GetICache(); + addr = _Address & JIT_ICACHE_MASK; + } + u32 inst = *(u32*)(iCache + addr); + if (inst == JIT_ICACHE_INVALID_WORD) + { + u32 cache_block_start = addr & ~0x1f; + u32 mem_block_start = _Address & ~0x1f; + u8 *pMem = Memory::GetPointer(mem_block_start); + memcpy(iCache + cache_block_start, pMem, 32); + inst = *(u32*)(iCache + addr); + } + inst = Common::swap32(inst); + + if ((inst & 0xfc000000) == 0) + { + inst = jit->GetBlockCache()->GetOriginalFirstOp(inst); + } + + return inst; + } + + u32 Read_Opcode_JIT(u32 _Address) + { + #ifdef FAST_ICACHE + if (bMMU && !bFakeVMEM && (_Address & Memory::ADDR_MASK_MEM1)) + { + _Address = Memory::TranslateAddress(_Address, Memory::FLAG_OPCODE); + if (_Address == 0) + { + return 0; + } + } + u32 inst = 0; + + // Bypass the icache for the external interrupt exception handler + if ( (_Address & 0x0FFFFF00) == 0x00000500 ) + inst = Read_Opcode_JIT_Uncached(_Address); + else + inst = PowerPC::ppcState.iCache.ReadInstruction(_Address); + #else + u32 inst = Memory::ReadUnchecked_U32(_Address); + #endif + return inst; + } + + // The following function is deprecated in favour of FAST_ICACHE + u32 Read_Opcode_JIT_LC(const u32 _Address) + { + #ifdef JIT_UNLIMITED_ICACHE + if ((_Address & ~JIT_ICACHE_MASK) != 0x80000000 && (_Address & ~JIT_ICACHE_MASK) != 0x00000000 && + (_Address & ~JIT_ICACHE_MASK) != 0x7e000000 && // TLB area + (_Address & ~JIT_ICACHEEX_MASK) != 0x90000000 && (_Address & ~JIT_ICACHEEX_MASK) != 0x10000000) + { + PanicAlertT("iCacheJIT: Reading Opcode from %x. Please report.", _Address); + ERROR_LOG(MEMMAP, "iCacheJIT: Reading Opcode from %x. Please report.", _Address); + return 0; + } + u8* iCache; + u32 addr; + if (_Address & JIT_ICACHE_VMEM_BIT) + { + iCache = jit->GetBlockCache()->GetICacheVMEM(); + addr = _Address & JIT_ICACHE_MASK; + } + else if (_Address & JIT_ICACHE_EXRAM_BIT) + { + iCache = jit->GetBlockCache()->GetICacheEx(); + addr = _Address & JIT_ICACHEEX_MASK; + } + else + { + iCache = jit->GetBlockCache()->GetICache(); + addr = _Address & JIT_ICACHE_MASK; + } + u32 inst = *(u32*)(iCache + addr); + if (inst == JIT_ICACHE_INVALID_WORD) + inst = Memory::ReadUnchecked_U32(_Address); + else + inst = Common::swap32(inst); + #else + u32 inst = Memory::ReadUnchecked_U32(_Address); + #endif + if ((inst & 0xfc000000) == 0) + { + inst = jit->GetBlockCache()->GetOriginalFirstOp(inst); + } + return inst; + } + + // WARNING! No checks! + // We assume that _Address is cached + void Write_Opcode_JIT(const u32 _Address, const u32 _Value) + { + #ifdef JIT_UNLIMITED_ICACHE + if (_Address & JIT_ICACHE_VMEM_BIT) + { + *(u32*)(jit->GetBlockCache()->GetICacheVMEM() + (_Address & JIT_ICACHE_MASK)) = Common::swap32(_Value); + } + else if (_Address & JIT_ICACHE_EXRAM_BIT) + { + *(u32*)(jit->GetBlockCache()->GetICacheEx() + (_Address & JIT_ICACHEEX_MASK)) = Common::swap32(_Value); + } + else + *(u32*)(jit->GetBlockCache()->GetICache() + (_Address & JIT_ICACHE_MASK)) = Common::swap32(_Value); + #else + Memory::WriteUnchecked_U32(_Value, _Address); + #endif + } + + + void Shutdown() + { + if (jit) + { + jit->Shutdown(); + delete jit; + jit = NULL; + } + } +} diff --git a/Source/Core/Core/Src/PowerPC/JitInterface.h b/Source/Core/Core/Src/PowerPC/JitInterface.h new file mode 100644 index 0000000000..a01c3dfa6e --- /dev/null +++ b/Source/Core/Core/Src/PowerPC/JitInterface.h @@ -0,0 +1,56 @@ +// Copyright (C) 2003 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ + +#include "ChunkFile.h" +#include "CPUCoreBase.h" + +namespace JitInterface +{ + void DoState(PointerWrap &p); + + CPUCoreBase *InitJitCore(int core); + void InitTables(int core); + CPUCoreBase *GetCore(); + + // Debugging + void WriteProfileResults(const char *filename); + + // Memory Utilities + bool IsInCodeSpace(u8 *ptr); + const u8 *BackPatch(u8 *codePtr, int accessType, u32 em_address, void *ctx); + + // used by JIT to read instructions + u32 Read_Opcode_JIT(const u32 _Address); + // used by JIT. uses iCacheJIT. Reads in the "Locked cache" mode + u32 Read_Opcode_JIT_LC(const u32 _Address); + void Write_Opcode_JIT(const u32 _Address, const u32 _Value); + + // Clearing CodeCache + void ClearCache(); + + void ClearSafe(); + + void InvalidateICache(u32 address, u32 size); + + void Shutdown(); +} +extern bool bFakeVMEM; +extern bool bMMU; + +#ifdef _M_ARM +#include "JitArm32/Jit.h" +#endif diff --git a/Source/Core/Core/Src/PowerPC/PPCAnalyst.cpp b/Source/Core/Core/Src/PowerPC/PPCAnalyst.cpp index fa9f278c29..15a85568de 100644 --- a/Source/Core/Core/Src/PowerPC/PPCAnalyst.cpp +++ b/Source/Core/Core/Src/PowerPC/PPCAnalyst.cpp @@ -21,6 +21,7 @@ #include "StringUtil.h" #include "Interpreter/Interpreter.h" #include "../HW/Memmap.h" +#include "JitInterface.h" #include "PPCTables.h" #include "PPCSymbolDB.h" #include "SignatureDB.h" @@ -368,7 +369,7 @@ u32 Flatten(u32 address, int *realsize, BlockStats *st, BlockRegStats *gpa, } else { - inst = Memory::Read_Opcode_JIT(address); + inst = JitInterface::Read_Opcode_JIT(address); } } diff --git a/Source/Core/Core/Src/PowerPC/PPCCache.cpp b/Source/Core/Core/Src/PowerPC/PPCCache.cpp index 6956e9db1c..a0e4c666a4 100644 --- a/Source/Core/Core/Src/PowerPC/PPCCache.cpp +++ b/Source/Core/Core/Src/PowerPC/PPCCache.cpp @@ -20,6 +20,7 @@ #include "PowerPC.h" #include "JitCommon/JitBase.h" #include "JitCommon/JitCache.h" +#include "JitInterface.h" namespace PowerPC { @@ -76,8 +77,7 @@ namespace PowerPC memset(lookup_table_ex, 0xff, sizeof(lookup_table_ex)); memset(lookup_table_vmem, 0xff, sizeof(lookup_table_vmem)); #endif - if (jit) - jit->GetBlockCache()->ClearSafe(); + JitInterface::ClearSafe(); } void InstructionCache::Init() @@ -109,8 +109,7 @@ namespace PowerPC } #endif valid[set] = 0; - if (jit) - jit->GetBlockCache()->InvalidateICache(addr & ~0x1f, 32); + JitInterface::InvalidateICache(addr & ~0x1f, 32); } u32 InstructionCache::ReadInstruction(u32 addr) diff --git a/Source/Core/Core/Src/PowerPC/PPCTables.cpp b/Source/Core/Core/Src/PowerPC/PPCTables.cpp index d96089cebb..a6975f5860 100644 --- a/Source/Core/Core/Src/PowerPC/PPCTables.cpp +++ b/Source/Core/Core/Src/PowerPC/PPCTables.cpp @@ -24,11 +24,7 @@ #include "FileUtil.h" #include "Interpreter/Interpreter.h" #include "Interpreter/Interpreter_Tables.h" -#include "Jit64IL/JitIL_Tables.h" -#include "Jit64/Jit64_Tables.h" - -#include "Jit64IL/JitIL.h" -#include "Jit64/Jit.h" +#include "JitInterface.h" struct op_inf { @@ -165,19 +161,9 @@ void InitTables(int cpu_core) // Interpreter break; } - case 1: - { - Jit64Tables::InitTables(); - break; - } - case 2: - { - JitILTables::InitTables(); - break; - } default: { - PanicAlert("Unrecognizable cpu_core: %d", cpu_core); + JitInterface::InitTables(cpu_core); break; } } diff --git a/Source/Core/Core/Src/PowerPC/PowerPC.cpp b/Source/Core/Core/Src/PowerPC/PowerPC.cpp index 8a7b817212..d4447331e7 100644 --- a/Source/Core/Core/Src/PowerPC/PowerPC.cpp +++ b/Source/Core/Core/Src/PowerPC/PowerPC.cpp @@ -29,12 +29,10 @@ #include "../HW/SystemTimers.h" #include "Interpreter/Interpreter.h" -#include "JitCommon/JitBase.h" -#include "Jit64IL/JitIL.h" -#include "Jit64/Jit.h" #include "PowerPC.h" #include "PPCTables.h" #include "CPUCoreBase.h" +#include "JitInterface.h" #include "../Host.h" #include "HW/EXI.h" @@ -87,8 +85,7 @@ void DoState(PointerWrap &p) // SystemTimers::DecrementerSet(); // SystemTimers::TimeBaseSet(); - if (jit && p.GetMode() == PointerWrap::MODE_READ) - jit->GetBlockCache()->ClearSafe(); + JitInterface::DoState(p); } void ResetRegisters() @@ -131,28 +128,18 @@ void ResetRegisters() void Init(int cpu_core) { - enum { - FPU_PREC_24 = 0 << 8, - FPU_PREC_53 = 2 << 8, - FPU_PREC_64 = 3 << 8, - FPU_PREC_MASK = 3 << 8, - }; -#ifdef _M_IX86 - // sets the floating-point lib to 53-bit - // PowerPC has a 53bit floating pipeline only - // eg: sscanf is very sensitive -#ifdef _WIN32 - _control87(_PC_53, MCW_PC); -#else - unsigned short _mode; - asm ("fstcw %0" : : "m" (_mode)); - _mode = (_mode & ~FPU_PREC_MASK) | FPU_PREC_53; - asm ("fldcw %0" : : "m" (_mode)); -#endif -#else - //x64 doesn't need this - fpu is done with SSE - //but still - set any useful sse options here -#endif + FPURoundMode::SetPrecisionMode(FPURoundMode::PREC_53); + + memset(ppcState.mojs, 0, sizeof(ppcState.mojs)); + memset(ppcState.sr, 0, sizeof(ppcState.sr)); + ppcState.DebugCount = 0; + ppcState.dtlb_last = 0; + ppcState.dtlb_last = 0; + memset(ppcState.dtlb_va, 0, sizeof(ppcState.dtlb_va)); + memset(ppcState.dtlb_pa, 0, sizeof(ppcState.dtlb_pa)); + ppcState.itlb_last = 0; + memset(ppcState.itlb_va, 0, sizeof(ppcState.itlb_va)); + memset(ppcState.itlb_pa, 0, sizeof(ppcState.itlb_pa)); memset(ppcState.mojs, 0, sizeof(ppcState.mojs)); memset(ppcState.sr, 0, sizeof(ppcState.sr)); @@ -179,27 +166,13 @@ void Init(int cpu_core) cpu_core_base = interpreter; break; } - case 1: - { - cpu_core_base = new Jit64(); - break; - } - case 2: - { - cpu_core_base = new JitIL(); - break; - } - default: - { - PanicAlert("Unrecognizable cpu_core: %d", cpu_core); - break; - } + default: + cpu_core_base = JitInterface::InitJitCore(cpu_core); + break; } if (cpu_core_base != interpreter) { - jit = static_cast(cpu_core_base); - jit->Init(); mode = MODE_JIT; } else @@ -213,12 +186,7 @@ void Init(int cpu_core) void Shutdown() { - if (jit) - { - jit->Shutdown(); - delete jit; - jit = NULL; - } + JitInterface::Shutdown(); interpreter->Shutdown(); cpu_core_base = NULL; state = CPU_POWERDOWN; @@ -244,7 +212,7 @@ void SetMode(CoreMode new_mode) case MODE_JIT: // Switching from interpreter to JIT. // Don't really need to do much. It'll work, the cache will refill itself. - cpu_core_base = jit; + cpu_core_base = JitInterface::GetCore(); break; } } diff --git a/Source/Core/Core/Src/PowerPC/Profiler.cpp b/Source/Core/Core/Src/PowerPC/Profiler.cpp index 3636b00717..fc8942f09b 100644 --- a/Source/Core/Core/Src/PowerPC/Profiler.cpp +++ b/Source/Core/Core/Src/PowerPC/Profiler.cpp @@ -15,17 +15,7 @@ // Official SVN repository and contact information can be found at // http://code.google.com/p/dolphin-emu/ -#include "JitCommon/JitBase.h" - -#include -#include - -#ifdef _WIN32 -#include -#endif - -#include "PPCSymbolDB.h" -#include "FileUtil.h" +#include "JitInterface.h" namespace Profiler { @@ -33,73 +23,9 @@ namespace Profiler bool g_ProfileBlocks; bool g_ProfileInstructions; -struct BlockStat -{ - BlockStat(int bn, u64 c) : blockNum(bn), cost(c) {} - int blockNum; - u64 cost; - - bool operator <(const BlockStat &other) const - { return cost > other.cost; } -}; - void WriteProfileResults(const char *filename) { - std::vector stats; - stats.reserve(jit->GetBlockCache()->GetNumBlocks()); - u64 cost_sum = 0; -#ifdef _WIN32 - u64 timecost_sum = 0; - u64 countsPerSec; - QueryPerformanceFrequency((LARGE_INTEGER *)&countsPerSec); -#endif - for (int i = 0; i < jit->GetBlockCache()->GetNumBlocks(); i++) - { - const JitBlock *block = jit->GetBlockCache()->GetBlock(i); - if (block && !block->invalid) - { - // Rough heuristic. Mem instructions should cost more. - u64 cost = block->originalSize * (block->runCount / 4); -#ifdef _WIN32 - u64 timecost = block->ticCounter; -#endif - // Todo: tweak. - if (block->runCount >= 1) - stats.push_back(BlockStat(i, cost)); - cost_sum += cost; -#ifdef _WIN32 - timecost_sum += timecost; -#endif - } - } - - sort(stats.begin(), stats.end()); - File::IOFile f(filename, "w"); - if (!f) - { - PanicAlert("failed to open %s", filename); - return; - } - fprintf(f.GetHandle(), "origAddr\tblkName\tcost\trunCount\ttimeCost\tpercent\ttimePercent\tOvAllinBlkTime(ms)\tblkCodeSize\n"); - for (unsigned int i = 0; i < stats.size(); i++) - { - const JitBlock *block = jit->GetBlockCache()->GetBlock(stats[i].blockNum); - if (block && !block->invalid) - { - std::string name = g_symbolDB.GetDescription(block->originalAddress); - double percent = 100.0 * (double)stats[i].cost / (double)cost_sum; -#ifdef _WIN32 - double timePercent = 100.0 * (double)block->ticCounter / (double)timecost_sum; - fprintf(f.GetHandle(), "%08x\t%s\t%llu\t%llu\t%llu\t%.2lf\t%llf\t%lf\t%i\n", - block->originalAddress, name.c_str(), stats[i].cost, block->runCount, - block->ticCounter, percent, timePercent, - (double)block->ticCounter*1000.0/(double)countsPerSec, block->codeSize); -#else - fprintf(f.GetHandle(), "%08x\t%s\t%llu\t???\t%.2lf\t???\t???\t%i\n", - block->originalAddress, name.c_str(), stats[i].cost, percent, block->codeSize); -#endif - } - } + JitInterface::WriteProfileResults(filename); } } // namespace diff --git a/Source/Core/Core/Src/PowerPC/Profiler.h b/Source/Core/Core/Src/PowerPC/Profiler.h index ada130e90f..b8beeaae72 100644 --- a/Source/Core/Core/Src/PowerPC/Profiler.h +++ b/Source/Core/Core/Src/PowerPC/Profiler.h @@ -57,6 +57,15 @@ #define PROFILER_VPOP #endif +struct BlockStat +{ + BlockStat(int bn, u64 c) : blockNum(bn), cost(c) {} + int blockNum; + u64 cost; + + bool operator <(const BlockStat &other) const + { return cost > other.cost; } +}; namespace Profiler { diff --git a/Source/Core/Core/Src/MemTools.cpp b/Source/Core/Core/Src/x64MemTools.cpp similarity index 96% rename from Source/Core/Core/Src/MemTools.cpp rename to Source/Core/Core/Src/x64MemTools.cpp index bb6fef6439..fff26b3cb3 100644 --- a/Source/Core/Core/Src/MemTools.cpp +++ b/Source/Core/Core/Src/x64MemTools.cpp @@ -52,8 +52,10 @@ #include "MemTools.h" #include "HW/Memmap.h" #include "PowerPC/PowerPC.h" +#include "PowerPC/JitInterface.h" +#ifndef _M_GENERIC #include "PowerPC/JitCommon/JitBase.h" -#include "PowerPC/JitCommon/JitBackpatch.h" +#endif #include "x64Analyzer.h" namespace EMM @@ -77,7 +79,7 @@ LONG NTAPI Handler(PEXCEPTION_POINTERS pPtrs) PVOID codeAddr = pPtrs->ExceptionRecord->ExceptionAddress; unsigned char *codePtr = (unsigned char*)codeAddr; - if (!jit->IsInCodeSpace(codePtr)) { + if (!JitInterface::IsInCodeSpace(codePtr)) { // Let's not prevent debugging. return (DWORD)EXCEPTION_CONTINUE_SEARCH; } @@ -107,7 +109,7 @@ LONG NTAPI Handler(PEXCEPTION_POINTERS pPtrs) //We could emulate the memory accesses here, but then they would still be around to take up //execution resources. Instead, we backpatch into a generic memory call and retry. - const u8 *new_rip = jit->BackPatch(codePtr, accessType, emAddress, ctx); + const u8 *new_rip = JitInterface::BackPatch(codePtr, accessType, emAddress, ctx); // Rip/Eip needs to be updated. if (new_rip) @@ -182,6 +184,7 @@ void print_trace(const char * msg) void sigsegv_handler(int signal, siginfo_t *info, void *raw_context) { +#ifndef _M_GENERIC if (signal != SIGSEGV) { // We are not interested in other signals - handle it as usual. @@ -203,7 +206,7 @@ void sigsegv_handler(int signal, siginfo_t *info, void *raw_context) #else u8 *fault_instruction_ptr = (u8 *)CREG_EIP(ctx); #endif - if (!jit->IsInCodeSpace(fault_instruction_ptr)) { + if (!JitInterface::IsInCodeSpace(fault_instruction_ptr)) { // Let's not prevent debugging. return; } @@ -240,6 +243,7 @@ void sigsegv_handler(int signal, siginfo_t *info, void *raw_context) CREG_EIP(ctx) = fake_ctx.Eip; #endif } +#endif } void InstallExceptionHandler() diff --git a/Source/Core/DiscIO/Src/BannerLoader.cpp b/Source/Core/DiscIO/Src/BannerLoader.cpp index 6451c73670..2350896c9c 100644 --- a/Source/Core/DiscIO/Src/BannerLoader.cpp +++ b/Source/Core/DiscIO/Src/BannerLoader.cpp @@ -27,7 +27,9 @@ #include #else #include +#ifndef ANDROID #include +#endif #include #endif @@ -131,6 +133,9 @@ bool IBannerLoader::CopyBeUnicodeToString( std::string& _rDestination, const u16 delete[] buffer; } } +#else +#ifdef ANDROID + return false; #else if (_src) { @@ -193,6 +198,7 @@ bool IBannerLoader::CopyBeUnicodeToString( std::string& _rDestination, const u16 delete[] src_buffer_start; iconv_close(conv_desc); } +#endif #endif return returnCode; } diff --git a/Source/Core/DolphinWX/CMakeLists.txt b/Source/Core/DolphinWX/CMakeLists.txt index 858e05bbfd..c3cdccccfc 100644 --- a/Source/Core/DolphinWX/CMakeLists.txt +++ b/Source/Core/DolphinWX/CMakeLists.txt @@ -10,21 +10,21 @@ set(LIBS core ${GTK2_LIBRARIES} ${XRANDR_LIBRARIES} ${X11_LIBRARIES}) - -if(SDL2_FOUND) - # Using shared SDL2 - set(LIBS ${LIBS} ${SDL2_LIBRARY}) -else(SDL2_FOUND) - if(SDL_FOUND) - # Using shared SDL - set(LIBS ${LIBS} ${SDL_LIBRARY}) - else(SDL_FOUND) - # Using static SDL from Externals - set(LIBS ${LIBS} SDL) +if(NOT ANDROID) + if(SDL2_FOUND) + # Using shared SDL2 + set(LIBS ${LIBS} ${SDL2_LIBRARY}) + else(SDL2_FOUND) + if(SDL_FOUND) + # Using shared SDL + set(LIBS ${LIBS} ${SDL_LIBRARY}) + else(SDL_FOUND) + # Using static SDL from Externals + set(LIBS ${LIBS} SDL) + endif() endif() endif() - if(LIBAV_FOUND) set(LIBS ${LIBS} ${LIBAV_LIBRARIES}) endif() @@ -79,13 +79,18 @@ if(wxWidgets_FOUND) set(WXLIBS ${wxWidgets_LIBRARIES}) else() - set(SRCS - Src/MainNoGUI.cpp) + if(ANDROID) + set(SRCS Src/MainAndroid.cpp) + else() + set(SRCS Src/MainNoGUI.cpp) + endif() endif() if(USE_EGL) - set(SRCS ${SRCS} Src/GLInterface/EGL.cpp - Src/GLInterface/X11_Util.cpp) + if(NOT ANDROID) + set(SRCS ${SRCS} Src/GLInterface/EGL_X11.cpp + Src/GLInterface/X11_Util.cpp) + endif() else() if(WIN32) set(SRCS ${SRCS} Src/GLInterface/GLW.cpp) @@ -127,7 +132,9 @@ elseif(${CMAKE_SYSTEM_NAME} MATCHES "Darwin") set_source_files_properties(${RESOURCES} PROPERTIES MACOSX_PACKAGE_LOCATION Resources) else() - set(SRCS ${SRCS} Src/X11Utils.cpp) + if(NOT ANDROID) + set(SRCS ${SRCS} Src/X11Utils.cpp) + endif() endif() if(${CMAKE_SYSTEM_NAME} MATCHES "FreeBSD" OR @@ -146,75 +153,85 @@ else() set(DOLPHIN_EXE ${DOLPHIN_EXE_BASE}-nogui) endif() -add_executable(${DOLPHIN_EXE} ${SRCS}) -target_link_libraries(${DOLPHIN_EXE} ${LIBS} ${WXLIBS}) - -if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin") - include(BundleUtilities) - set(BUNDLE_PATH ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${DOLPHIN_EXE}.app) - - # Ask for an application bundle. - set_target_properties(${DOLPHIN_EXE} PROPERTIES - MACOSX_BUNDLE true - MACOSX_BUNDLE_INFO_PLIST ${CMAKE_CURRENT_SOURCE_DIR}/Info.plist.in - ) - - # Install Cg framework into application bundle. - copy_resolved_framework_into_bundle( - # Our framework in "Externals" does not have "Versions/Current/" in - # its path; work around the missing directory levels using "././". - "${CMAKE_SOURCE_DIR}/Externals/Cg/Cg.framework/././Cg" - "${BUNDLE_PATH}/Contents/Frameworks/Cg.framework/././Cg" - ) - - # Fix up the bundle after it is finished. - # There does not seem to be an easy way to run CMake commands post-build, - # so we invoke CMake again on a generated script. - file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/postprocess_bundle.cmake " - include(BundleUtilities) - message(\"Fixing up application bundle: ${BUNDLE_PATH}\") - set(BU_CHMOD_BUNDLE_ITEMS ON) - fixup_bundle(\"${BUNDLE_PATH}\" \"\" \"\") - ") - add_custom_command(TARGET ${DOLPHIN_EXE} POST_BUILD - COMMAND ${CMAKE_COMMAND} -P postprocess_bundle.cmake - ) - - # Copy data files into application bundle. - file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/copy_data_into_bundle.cmake " - file(INSTALL ${CMAKE_SOURCE_DIR}/Data/Sys ${CMAKE_SOURCE_DIR}/Data/User - DESTINATION ${BUNDLE_PATH}/Contents/Resources - ) - file(GLOB TRANSLATION_FILES RELATIVE ${CMAKE_BINARY_DIR} - ${CMAKE_BINARY_DIR}/*.gmo - ) - foreach(TRANSLATION_FILE \${TRANSLATION_FILES}) - string(REPLACE \".gmo\" \".lproj\" TRANSLATION_DIR - \${TRANSLATION_FILE} - ) - # It would be better to copy to the new name as a single action, - # but I can't figure out a way to let CMake do that. - file(COPY ${CMAKE_BINARY_DIR}/\${TRANSLATION_FILE} - DESTINATION ${BUNDLE_PATH}/Contents/Resources/\${TRANSLATION_DIR} - NO_SOURCE_PERMISSIONS - ) - file(RENAME - ${BUNDLE_PATH}/Contents/Resources/\${TRANSLATION_DIR}/\${TRANSLATION_FILE} - ${BUNDLE_PATH}/Contents/Resources/\${TRANSLATION_DIR}/dolphin-emu.mo - ) - endforeach(TRANSLATION_FILE) - ") - add_custom_target(CopyDataIntoBundle ALL - COMMAND ${CMAKE_COMMAND} -P copy_data_into_bundle.cmake - VERBATIM - ) - - # Install bundle into systemwide /Applications directory. - install(DIRECTORY ${BUNDLE_PATH} DESTINATION /Applications - USE_SOURCE_PERMISSIONS - ) +if(ANDROID) + add_library(${DOLPHIN_EXE} SHARED ${SRCS}) + target_link_libraries(${DOLPHIN_EXE} + log + android + "-Wl,--whole-archive" + ${LIBS} + "-Wl,--no-whole-archive" + ) else() - install(TARGETS ${DOLPHIN_EXE} RUNTIME DESTINATION ${bindir}) + add_executable(${DOLPHIN_EXE} ${SRCS}) + target_link_libraries(${DOLPHIN_EXE} ${LIBS} ${WXLIBS}) + if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin") + include(BundleUtilities) + set(BUNDLE_PATH ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${DOLPHIN_EXE}.app) + + # Ask for an application bundle. + set_target_properties(${DOLPHIN_EXE} PROPERTIES + MACOSX_BUNDLE true + MACOSX_BUNDLE_INFO_PLIST ${CMAKE_CURRENT_SOURCE_DIR}/Info.plist.in + ) + + # Install Cg framework into application bundle. + copy_resolved_framework_into_bundle( + # Our framework in "Externals" does not have "Versions/Current/" in + # its path; work around the missing directory levels using "././". + "${CMAKE_SOURCE_DIR}/Externals/Cg/Cg.framework/././Cg" + "${BUNDLE_PATH}/Contents/Frameworks/Cg.framework/././Cg" + ) + + # Fix up the bundle after it is finished. + # There does not seem to be an easy way to run CMake commands post-build, + # so we invoke CMake again on a generated script. + file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/postprocess_bundle.cmake " + include(BundleUtilities) + message(\"Fixing up application bundle: ${BUNDLE_PATH}\") + set(BU_CHMOD_BUNDLE_ITEMS ON) + fixup_bundle(\"${BUNDLE_PATH}\" \"\" \"\") + ") + add_custom_command(TARGET ${DOLPHIN_EXE} POST_BUILD + COMMAND ${CMAKE_COMMAND} -P postprocess_bundle.cmake + ) + + # Copy data files into application bundle. + file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/copy_data_into_bundle.cmake " + file(INSTALL ${CMAKE_SOURCE_DIR}/Data/Sys ${CMAKE_SOURCE_DIR}/Data/User + DESTINATION ${BUNDLE_PATH}/Contents/Resources + ) + file(GLOB TRANSLATION_FILES RELATIVE ${CMAKE_BINARY_DIR} + ${CMAKE_BINARY_DIR}/*.gmo + ) + foreach(TRANSLATION_FILE \${TRANSLATION_FILES}) + string(REPLACE \".gmo\" \".lproj\" TRANSLATION_DIR + \${TRANSLATION_FILE} + ) + # It would be better to copy to the new name as a single action, + # but I can't figure out a way to let CMake do that. + file(COPY ${CMAKE_BINARY_DIR}/\${TRANSLATION_FILE} + DESTINATION ${BUNDLE_PATH}/Contents/Resources/\${TRANSLATION_DIR} + NO_SOURCE_PERMISSIONS + ) + file(RENAME + ${BUNDLE_PATH}/Contents/Resources/\${TRANSLATION_DIR}/\${TRANSLATION_FILE} + ${BUNDLE_PATH}/Contents/Resources/\${TRANSLATION_DIR}/dolphin-emu.mo + ) + endforeach(TRANSLATION_FILE) + ") + add_custom_target(CopyDataIntoBundle ALL + COMMAND ${CMAKE_COMMAND} -P copy_data_into_bundle.cmake + VERBATIM + ) + + # Install bundle into systemwide /Applications directory. + install(DIRECTORY ${BUNDLE_PATH} DESTINATION /Applications + USE_SOURCE_PERMISSIONS + ) + else() + install(TARGETS ${DOLPHIN_EXE} RUNTIME DESTINATION ${bindir}) + endif() endif() set(CPACK_PACKAGE_EXECUTABLES ${CPACK_PACKAGE_EXECUTABLES} ${DOLPHIN_EXE}) diff --git a/Source/Core/DolphinWX/Src/Debugger/CodeWindow.cpp b/Source/Core/DolphinWX/Src/Debugger/CodeWindow.cpp index 1c0776862c..601faeaec6 100644 --- a/Source/Core/DolphinWX/Src/Debugger/CodeWindow.cpp +++ b/Source/Core/DolphinWX/Src/Debugger/CodeWindow.cpp @@ -38,6 +38,7 @@ #include "LogManager.h" #include "HW/CPU.h" #include "PowerPC/PowerPC.h" +#include "PowerPC/JitInterface.h" #include "Debugger/PPCDebugInterface.h" #include "Debugger/Debugger_SymbolMap.h" #include "PowerPC/PPCAnalyst.h" @@ -45,8 +46,6 @@ #include "PowerPC/PPCSymbolDB.h" #include "PowerPC/SignatureDB.h" #include "PowerPC/PPCTables.h" -#include "PowerPC/JitCommon/JitBase.h" -#include "PowerPC/JitCommon/JitCache.h" // for ClearCache() #include "ConfigManager.h" @@ -260,8 +259,7 @@ void CCodeWindow::SingleStep() { if (CCPU::IsStepping()) { - if (jit) - jit->GetBlockCache()->InvalidateICache(PC, 4); + JitInterface::InvalidateICache(PC, 4); CCPU::StepOpcode(&sync_event); wxThread::Sleep(20); // need a short wait here @@ -492,10 +490,8 @@ void CCodeWindow::OnCPUMode(wxCommandEvent& event) } // Clear the JIT cache to enable these changes - if (jit) - { - jit->ClearCache(); - } + JitInterface::ClearCache(); + // Update UpdateButtonStates(); } @@ -509,7 +505,7 @@ void CCodeWindow::OnJitMenu(wxCommandEvent& event) break; case IDM_CLEARCODECACHE: - jit->ClearCache(); + JitInterface::ClearCache(); break; case IDM_SEARCHINSTRUCTION: diff --git a/Source/Core/DolphinWX/Src/Frame.cpp b/Source/Core/DolphinWX/Src/Frame.cpp index 46b3a1e8e5..b0b53cc5ea 100644 --- a/Source/Core/DolphinWX/Src/Frame.cpp +++ b/Source/Core/DolphinWX/Src/Frame.cpp @@ -704,7 +704,7 @@ bool IsHotkey(wxKeyEvent &event, int Id) { return (event.GetKeyCode() != WXK_NONE && event.GetKeyCode() == SConfig::GetInstance().m_LocalCoreStartupParameter.iHotkey[Id] && - event.GetModifiers() == SConfig::GetInstance().m_LocalCoreStartupParameter.iHotkeyModifier[Id]); + event.GetModifiers() == SConfig::GetInstance().m_LocalCoreStartupParameter.iHotkeyModifier[Id]); } int GetCmdForHotkey(unsigned int key) diff --git a/Source/Core/DolphinWX/Src/GLInterface.h b/Source/Core/DolphinWX/Src/GLInterface.h index a81f17104c..70b4942e94 100644 --- a/Source/Core/DolphinWX/Src/GLInterface.h +++ b/Source/Core/DolphinWX/Src/GLInterface.h @@ -18,9 +18,11 @@ #define _GLINTERFACE_H_ #include "Thread.h" - -#if defined(USE_EGL) && USE_EGL -#include "GLInterface/EGL.h" +#ifdef ANDROID +#include +#include +#elif defined(USE_EGL) && USE_EGL +#include "GLInterface/EGL_X11.h" #elif defined(USE_WX) && USE_WX #include "GLInterface/WX.h" #elif defined(__APPLE__) @@ -29,10 +31,13 @@ #include "GLInterface/WGL.h" #elif defined(HAVE_X11) && HAVE_X11 #include "GLInterface/GLX.h" +#else +#error Platform doesnt have a GLInterface #endif typedef struct { -#if defined(USE_EGL) && USE_EGL // This is currently a X11/EGL implementation for desktop +#ifdef ANDROID +#elif defined(USE_EGL) && USE_EGL // This is currently a X11/EGL implementation for desktop int screen; Display *dpy; Display *evdpy; diff --git a/Source/Core/DolphinWX/Src/GLInterface/EGL.cpp b/Source/Core/DolphinWX/Src/GLInterface/EGL_X11.cpp similarity index 99% rename from Source/Core/DolphinWX/Src/GLInterface/EGL.cpp rename to Source/Core/DolphinWX/Src/GLInterface/EGL_X11.cpp index 304eb92059..c092aced64 100644 --- a/Source/Core/DolphinWX/Src/GLInterface/EGL.cpp +++ b/Source/Core/DolphinWX/Src/GLInterface/EGL_X11.cpp @@ -19,7 +19,7 @@ #include "RenderBase.h" #include "../GLInterface.h" -#include "EGL.h" +#include "EGL_X11.h" // Show the current FPS void cInterfaceEGL::UpdateFPSDisplay(const char *text) diff --git a/Source/Core/DolphinWX/Src/GLInterface/EGL.h b/Source/Core/DolphinWX/Src/GLInterface/EGL_X11.h similarity index 100% rename from Source/Core/DolphinWX/Src/GLInterface/EGL.h rename to Source/Core/DolphinWX/Src/GLInterface/EGL_X11.h diff --git a/Source/Core/DolphinWX/Src/GLInterface/InterfaceBase.h b/Source/Core/DolphinWX/Src/GLInterface/InterfaceBase.h index c2d1ffc54b..1c00a1fd89 100644 --- a/Source/Core/DolphinWX/Src/GLInterface/InterfaceBase.h +++ b/Source/Core/DolphinWX/Src/GLInterface/InterfaceBase.h @@ -23,11 +23,11 @@ protected: u32 s_backbuffer_width; u32 s_backbuffer_height; public: - virtual void Swap() = 0; - virtual void UpdateFPSDisplay(const char *Text) = 0; - virtual bool Create(void *&window_handle) = 0; - virtual bool MakeCurrent() = 0; - virtual void Shutdown() = 0; + virtual void Swap() {} + virtual void UpdateFPSDisplay(const char *Text) {} + virtual bool Create(void *&window_handle) { return true; } + virtual bool MakeCurrent() { return true; } + virtual void Shutdown() {} virtual void SwapInterval(int Interval) { } virtual u32 GetBackBufferWidth() { return s_backbuffer_width; } diff --git a/Source/Core/DolphinWX/Src/MainAndroid.cpp b/Source/Core/DolphinWX/Src/MainAndroid.cpp new file mode 100644 index 0000000000..ca2119921d --- /dev/null +++ b/Source/Core/DolphinWX/Src/MainAndroid.cpp @@ -0,0 +1,149 @@ +// Copyright (C) 2003 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ +#include +#include +#include + +#include "Common.h" +#include "FileUtil.h" + +#include "Core.h" +#include "Host.h" +#include "CPUDetect.h" +#include "Thread.h" + +#include "PowerPC/PowerPC.h" +#include "HW/Wiimote.h" + +#include "VideoBackendBase.h" +#include "ConfigManager.h" +#include "LogManager.h" +#include "BootManager.h" + +#include +#include +#define LOGI(...) ((void)__android_log_print(ANDROID_LOG_INFO, "Dolphinemu", __VA_ARGS__)) + +bool rendererHasFocus = true; +bool running = true; + +void Host_NotifyMapLoaded() {} +void Host_RefreshDSPDebuggerWindow() {} + +void Host_ShowJitResults(unsigned int address){} + +Common::Event updateMainFrameEvent; +void Host_Message(int Id) +{ +} + +void* Host_GetRenderHandle() +{ + return NULL; +} + +void* Host_GetInstance() { return NULL; } + +void Host_UpdateTitle(const char* title){}; + +void Host_UpdateLogDisplay(){} + +void Host_UpdateDisasmDialog(){} + +void Host_UpdateMainFrame() +{ +} + +void Host_UpdateBreakPointView(){} + +bool Host_GetKeyState(int keycode) +{ + return false; +} + +void Host_GetRenderWindowSize(int& x, int& y, int& width, int& height) +{ + x = SConfig::GetInstance().m_LocalCoreStartupParameter.iRenderWindowXPos; + y = SConfig::GetInstance().m_LocalCoreStartupParameter.iRenderWindowYPos; + width = SConfig::GetInstance().m_LocalCoreStartupParameter.iRenderWindowWidth; + height = SConfig::GetInstance().m_LocalCoreStartupParameter.iRenderWindowHeight; +} + +void Host_RequestRenderWindowSize(int width, int height) {} +void Host_SetStartupDebuggingParameters() +{ +} + +bool Host_RendererHasFocus() +{ + return true; +} + +void Host_ConnectWiimote(int wm_idx, bool connect) {} + +void Host_SetWaitCursor(bool enable){} + +void Host_UpdateStatusBar(const char* _pText, int Filed){} + +void Host_SysMessage(const char *fmt, ...) +{ + va_list list; + char msg[512]; + + va_start(list, fmt); + vsprintf(msg, fmt, list); + va_end(list); + + size_t len = strlen(msg); + if (msg[len - 1] != '\n') { + msg[len - 1] = '\n'; + msg[len] = '\0'; + } + LOGI(msg); +} + +void Host_SetWiiMoteConnectionState(int _State) {} + +#ifdef __cplusplus +extern "C" +{ +#endif +JNIEXPORT void JNICALL Java_org_dolphinemu_dolphinemu_dolphinemuactivity_main(JNIEnv *env, jobject obj) +{ + LogManager::Init(); + SConfig::Init(); + VideoBackend::PopulateList(); + VideoBackend::ActivateBackend(SConfig::GetInstance(). + m_LocalCoreStartupParameter.m_strVideoBackend); + WiimoteReal::LoadSettings(); + + // No use running the loop when booting fails + if (BootManager::BootCore("")) + { + while (PowerPC::GetState() != PowerPC::CPU_POWERDOWN) + updateMainFrameEvent.Wait(); + } + + WiimoteReal::Shutdown(); + VideoBackend::ClearList(); + SConfig::Shutdown(); + LogManager::Shutdown(); +} + +#ifdef __cplusplus +} +#endif diff --git a/Source/Core/VideoCommon/CMakeLists.txt b/Source/Core/VideoCommon/CMakeLists.txt index 6ed8e29c6b..cba3666399 100644 --- a/Source/Core/VideoCommon/CMakeLists.txt +++ b/Source/Core/VideoCommon/CMakeLists.txt @@ -3,7 +3,6 @@ set(SRCS Src/BPFunctions.cpp Src/BPStructs.cpp Src/CPMemory.cpp Src/CommandProcessor.cpp - Src/DLCache.cpp Src/Debugger.cpp Src/Fifo.cpp Src/FPSCounter.cpp @@ -24,7 +23,6 @@ set(SRCS Src/BPFunctions.cpp Src/Statistics.cpp Src/TextureCacheBase.cpp Src/TextureConversionShader.cpp - Src/TextureDecoder.cpp Src/VertexLoader.cpp Src/VertexLoaderManager.cpp Src/VertexLoader_Color.cpp @@ -41,6 +39,14 @@ set(SRCS Src/BPFunctions.cpp Src/memcpy_amd.cpp) set(LIBS core) + +if(NOT _M_GENERIC) + set(SRCS ${SRCS} Src/x64TextureDecoder.cpp + Src/x64DLCache.cpp) +else() + set(SRCS ${SRCS} Src/GenericTextureDecoder.cpp + Src/GenericDLCache.cpp) +endif() if(NOT ${CL} STREQUAL CL-NOTFOUND) list(APPEND LIBS ${CL}) endif() diff --git a/Source/Core/VideoCommon/Src/Fifo.cpp b/Source/Core/VideoCommon/Src/Fifo.cpp index 50aa21ebbf..413b163ced 100644 --- a/Source/Core/VideoCommon/Src/Fifo.cpp +++ b/Source/Core/VideoCommon/Src/Fifo.cpp @@ -222,11 +222,11 @@ void RunGpu() { u8 *uData = Memory::GetPointer(fifo.CPReadPointer); - SaveSSEState(); - LoadDefaultSSEState(); + FPURoundMode::SaveSIMDState(); + FPURoundMode::LoadDefaultSIMDState(); ReadDataFromFifo(uData, 32); OpcodeDecoder_Run(g_bSkipCurrentFrame); - LoadSSEState(); + FPURoundMode::LoadSIMDState(); //DEBUG_LOG(COMMANDPROCESSOR, "Fifo wraps to base"); diff --git a/Source/Core/VideoCommon/Src/GenericDLCache.cpp b/Source/Core/VideoCommon/Src/GenericDLCache.cpp new file mode 100644 index 0000000000..5e27deb469 --- /dev/null +++ b/Source/Core/VideoCommon/Src/GenericDLCache.cpp @@ -0,0 +1,52 @@ +// Copyright (C) 2003-2009 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ + +// TODO: Handle cache-is-full condition :p + + +#include "Common.h" +#include "DLCache.h" + +namespace DLCache +{ + +void Init() +{ +} + +void Shutdown() +{ +} + +void Clear() +{ +} + +void ProgressiveCleanup() +{ +} +} // namespace + +// NOTE - outside the namespace on purpose. +bool HandleDisplayList(u32 address, u32 size) +{ + return false; +} + +void IncrementCheckContextId() +{ +} diff --git a/Source/Core/VideoCommon/Src/GenericTextureDecoder.cpp b/Source/Core/VideoCommon/Src/GenericTextureDecoder.cpp new file mode 100644 index 0000000000..ad7cfeebf9 --- /dev/null +++ b/Source/Core/VideoCommon/Src/GenericTextureDecoder.cpp @@ -0,0 +1,2182 @@ +// Copyright (C) 2003 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ + +#include "Common.h" +//#include "VideoCommon.h" // to get debug logs + +#include "CPUDetect.h" +#include "TextureDecoder.h" +#include "OpenCL.h" +#include "OpenCL/OCLTextureDecoder.h" +#include "VideoConfig.h" + +#include "LookUpTables.h" + +#include + + + +bool TexFmt_Overlay_Enable=false; +bool TexFmt_Overlay_Center=false; + +extern const char* texfmt[]; +extern const unsigned char sfont_map[]; +extern const unsigned char sfont_raw[][9*10]; + +// TRAM +// STATE_TO_SAVE + GC_ALIGNED16(u8 texMem[TMEM_SIZE]); + + +// Gamecube/Wii texture decoder + +// Decodes all known Gamecube/Wii texture formats. +// by ector + +int TexDecoder_GetTexelSizeInNibbles(int format) +{ + switch (format & 0x3f) { + case GX_TF_I4: return 1; + case GX_TF_I8: return 2; + case GX_TF_IA4: return 2; + case GX_TF_IA8: return 4; + case GX_TF_RGB565: return 4; + case GX_TF_RGB5A3: return 4; + case GX_TF_RGBA8: return 8; + case GX_TF_C4: return 1; + case GX_TF_C8: return 2; + case GX_TF_C14X2: return 4; + case GX_TF_CMPR: return 1; + case GX_CTF_R4: return 1; + case GX_CTF_RA4: return 2; + case GX_CTF_RA8: return 4; + case GX_CTF_YUVA8: return 8; + case GX_CTF_A8: return 2; + case GX_CTF_R8: return 2; + case GX_CTF_G8: return 2; + case GX_CTF_B8: return 2; + case GX_CTF_RG8: return 4; + case GX_CTF_GB8: return 4; + + case GX_TF_Z8: return 2; + case GX_TF_Z16: return 4; + case GX_TF_Z24X8: return 8; + + case GX_CTF_Z4: return 1; + case GX_CTF_Z8M: return 2; + case GX_CTF_Z8L: return 2; + case GX_CTF_Z16L: return 4; + default: return 1; + } +} + +int TexDecoder_GetTextureSizeInBytes(int width, int height, int format) +{ + return (width * height * TexDecoder_GetTexelSizeInNibbles(format)) / 2; +} + +int TexDecoder_GetBlockWidthInTexels(u32 format) +{ + switch (format) + { + case GX_TF_I4: return 8; + case GX_TF_I8: return 8; + case GX_TF_IA4: return 8; + case GX_TF_IA8: return 4; + case GX_TF_RGB565: return 4; + case GX_TF_RGB5A3: return 4; + case GX_TF_RGBA8: return 4; + case GX_TF_C4: return 8; + case GX_TF_C8: return 8; + case GX_TF_C14X2: return 4; + case GX_TF_CMPR: return 8; + case GX_CTF_R4: return 8; + case GX_CTF_RA4: return 8; + case GX_CTF_RA8: return 4; + case GX_CTF_A8: return 8; + case GX_CTF_R8: return 8; + case GX_CTF_G8: return 8; + case GX_CTF_B8: return 8; + case GX_CTF_RG8: return 4; + case GX_CTF_GB8: return 4; + case GX_TF_Z8: return 8; + case GX_TF_Z16: return 4; + case GX_TF_Z24X8: return 4; + case GX_CTF_Z4: return 8; + case GX_CTF_Z8M: return 8; + case GX_CTF_Z8L: return 8; + case GX_CTF_Z16L: return 4; + default: + ERROR_LOG(VIDEO, "Unsupported Texture Format (%08x)! (GetBlockWidthInTexels)", format); + return 8; + } +} + +int TexDecoder_GetBlockHeightInTexels(u32 format) +{ + switch (format) + { + case GX_TF_I4: return 8; + case GX_TF_I8: return 4; + case GX_TF_IA4: return 4; + case GX_TF_IA8: return 4; + case GX_TF_RGB565: return 4; + case GX_TF_RGB5A3: return 4; + case GX_TF_RGBA8: return 4; + case GX_TF_C4: return 8; + case GX_TF_C8: return 4; + case GX_TF_C14X2: return 4; + case GX_TF_CMPR: return 8; + case GX_CTF_R4: return 8; + case GX_CTF_RA4: return 4; + case GX_CTF_RA8: return 4; + case GX_CTF_A8: return 4; + case GX_CTF_R8: return 4; + case GX_CTF_G8: return 4; + case GX_CTF_B8: return 4; + case GX_CTF_RG8: return 4; + case GX_CTF_GB8: return 4; + case GX_TF_Z8: return 4; + case GX_TF_Z16: return 4; + case GX_TF_Z24X8: return 4; + case GX_CTF_Z4: return 8; + case GX_CTF_Z8M: return 4; + case GX_CTF_Z8L: return 4; + case GX_CTF_Z16L: return 4; + default: + ERROR_LOG(VIDEO, "Unsupported Texture Format (%08x)! (GetBlockHeightInTexels)", format); + return 4; + } +} + +//returns bytes +int TexDecoder_GetPaletteSize(int format) +{ + switch (format) + { + case GX_TF_C4: return 16 * 2; + case GX_TF_C8: return 256 * 2; + case GX_TF_C14X2: return 16384 * 2; + default: + return 0; + } +} + +static inline u32 decodeIA8(u16 val) +{ + int a = val >> 8; + int i = val & 0xFF; + return (a << 24) | (i << 16) | (i << 8) | i; +} + +static inline u32 decode5A3(u16 val) +{ + int r,g,b,a; + if ((val & 0x8000)) + { + a = 0xFF; + r = Convert5To8((val >> 10) & 0x1F); + g = Convert5To8((val >> 5) & 0x1F); + b = Convert5To8(val & 0x1F); + } + else + { + a = Convert3To8((val >> 12) & 0x7); + r = Convert4To8((val >> 8) & 0xF); + g = Convert4To8((val >> 4) & 0xF); + b = Convert4To8(val & 0xF); + } + return (a << 24) | (r << 16) | (g << 8) | b; +} + +static inline u32 decode5A3RGBA(u16 val) +{ + int r,g,b,a; + if ((val&0x8000)) + { + r=Convert5To8((val>>10) & 0x1f); + g=Convert5To8((val>>5 ) & 0x1f); + b=Convert5To8((val ) & 0x1f); + a=0xFF; + } + else + { + a=Convert3To8((val>>12) & 0x7); + r=Convert4To8((val>>8 ) & 0xf); + g=Convert4To8((val>>4 ) & 0xf); + b=Convert4To8((val ) & 0xf); + } + return r | (g<<8) | (b << 16) | (a << 24); +} + +static inline u32 decode565RGBA(u16 val) +{ + int r,g,b,a; + r=Convert5To8((val>>11) & 0x1f); + g=Convert6To8((val>>5 ) & 0x3f); + b=Convert5To8((val ) & 0x1f); + a=0xFF; + return r | (g<<8) | (b << 16) | (a << 24); +} + +static inline u32 decodeIA8Swapped(u16 val) +{ + int a = val & 0xFF; + int i = val >> 8; + return i | (i<<8) | (i<<16) | (a<<24); +} + + + +struct DXTBlock +{ + u16 color1; + u16 color2; + u8 lines[4]; +}; + +//inline void decodebytesC4(u32 *dst, const u8 *src, int numbytes, int tlutaddr, int tlutfmt) +inline void decodebytesC4_5A3_To_BGRA32(u32 *dst, const u8 *src, int tlutaddr) +{ + u16 *tlut = (u16*)(texMem + tlutaddr); + for (int x = 0; x < 4; x++) + { + u8 val = src[x]; + *dst++ = decode5A3(Common::swap16(tlut[val >> 4])); + *dst++ = decode5A3(Common::swap16(tlut[val & 0xF])); + } +} + +inline void decodebytesC4_5A3_To_rgba32(u32 *dst, const u8 *src, int tlutaddr) +{ + u16 *tlut = (u16*)(texMem + tlutaddr); + for (int x = 0; x < 4; x++) + { + u8 val = src[x]; + *dst++ = decode5A3RGBA(Common::swap16(tlut[val >> 4])); + *dst++ = decode5A3RGBA(Common::swap16(tlut[val & 0xF])); + } +} + +inline void decodebytesC4_To_Raw16(u16* dst, const u8* src, int tlutaddr) +{ + u16* tlut = (u16*)(texMem+tlutaddr); + for (int x = 0; x < 4; x++) + { + u8 val = src[x]; + *dst++ = Common::swap16(tlut[val >> 4]); + *dst++ = Common::swap16(tlut[val & 0xF]); + } +} + +inline void decodebytesC4IA8_To_RGBA(u32* dst, const u8* src, int tlutaddr) +{ + u16* tlut = (u16*)(texMem+tlutaddr); + for (int x = 0; x < 4; x++) + { + u8 val = src[x]; + *dst++ = decodeIA8Swapped(tlut[val >> 4]); + *dst++ = decodeIA8Swapped(tlut[val & 0xF]); + } +} + +inline void decodebytesC4RGB565_To_RGBA(u32* dst, const u8* src, int tlutaddr) +{ + u16* tlut = (u16*)(texMem+tlutaddr); + for (int x = 0; x < 4; x++) + { + u8 val = src[x]; + *dst++ = decode565RGBA(Common::swap16(tlut[val >> 4])); + *dst++ = decode565RGBA(Common::swap16(tlut[val & 0xF])); + } +} + +//inline void decodebytesC8(u32 *dst, const u8 *src, int numbytes, int tlutaddr, int tlutfmt) +inline void decodebytesC8_5A3_To_BGRA32(u32 *dst, const u8 *src, int tlutaddr) +{ + u16 *tlut = (u16*)(texMem + tlutaddr); + for (int x = 0; x < 8; x++) + { + u8 val = src[x]; + *dst++ = decode5A3(Common::swap16(tlut[val])); + } +} + +inline void decodebytesC8_5A3_To_RGBA32(u32 *dst, const u8 *src, int tlutaddr) +{ + u16 *tlut = (u16*)(texMem + tlutaddr); + for (int x = 0; x < 8; x++) + { + u8 val = src[x]; + *dst++ = decode5A3RGBA(Common::swap16(tlut[val])); + } +} + +inline void decodebytesC8_To_Raw16(u16* dst, const u8* src, int tlutaddr) +{ + u16* tlut = (u16*)(texMem + tlutaddr); + for (int x = 0; x < 8; x++) + { + u8 val = src[x]; + *dst++ = Common::swap16(tlut[val]); + } +} + +inline void decodebytesC8IA8_To_RGBA(u32* dst, const u8* src, int tlutaddr) +{ + u16* tlut = (u16*)(texMem + tlutaddr); + for (int x = 0; x < 8; x++) + { + *dst++ = decodeIA8Swapped(tlut[src[x]]); + } +} + +inline void decodebytesC8RGB565_To_RGBA(u32* dst, const u8* src, int tlutaddr) +{ + u16* tlut = (u16*)(texMem + tlutaddr); + for (int x = 0; x < 8; x++) + { + *dst++ = decode565RGBA(Common::swap16(tlut[src[x]])); + } +} + +inline void decodebytesC14X2_5A3_To_BGRA32(u32 *dst, const u16 *src, int tlutaddr) +{ + u16 *tlut = (u16*)(texMem + tlutaddr); + for (int x = 0; x < 4; x++) + { + u16 val = Common::swap16(src[x]); + *dst++ = decode5A3(Common::swap16(tlut[(val & 0x3FFF)])); + } +} + +inline void decodebytesC14X2_5A3_To_RGBA(u32 *dst, const u16 *src, int tlutaddr) +{ + u16 *tlut = (u16*)(texMem + tlutaddr); + for (int x = 0; x < 4; x++) + { + u16 val = Common::swap16(src[x]); + *dst++ = decode5A3RGBA(Common::swap16(tlut[(val & 0x3FFF)])); + } +} + +inline void decodebytesC14X2_To_Raw16(u16* dst, const u16* src, int tlutaddr) +{ + u16* tlut = (u16*)(texMem + tlutaddr); + for (int x = 0; x < 4; x++) + { + u16 val = Common::swap16(src[x]); + *dst++ = Common::swap16(tlut[(val & 0x3FFF)]); + } +} + +inline void decodebytesC14X2IA8_To_RGBA(u32* dst, const u16* src, int tlutaddr) +{ + u16* tlut = (u16*)(texMem + tlutaddr); + for (int x = 0; x < 4; x++) + { + u16 val = Common::swap16(src[x]); + *dst++ = decodeIA8Swapped(tlut[(val & 0x3FFF)]); + } +} + +inline void decodebytesC14X2rgb565_To_RGBA(u32* dst, const u16* src, int tlutaddr) +{ + u16* tlut = (u16*)(texMem + tlutaddr); + for (int x = 0; x < 4; x++) + { + u16 val = Common::swap16(src[x]); + *dst++ = decode565RGBA(Common::swap16(tlut[(val & 0x3FFF)])); + } +} + +// Needs more speed. +inline void decodebytesIA4(u16 *dst, const u8 *src) +{ + for (int x = 0; x < 8; x++) + { + const u8 val = src[x]; + u8 a = Convert4To8(val >> 4); + u8 l = Convert4To8(val & 0xF); + dst[x] = (a << 8) | l; + } +} + +inline void decodebytesIA4RGBA(u32 *dst, const u8 *src) +{ + for (int x = 0; x < 8; x++) + { + const u8 val = src[x]; + u8 a = Convert4To8(val >> 4); + u8 l = Convert4To8(val & 0xF); + dst[x] = (a << 24) | l << 16 | l << 8 | l; + } +} + +inline void decodebytesRGB5A3(u32 *dst, const u16 *src) +{ +#if 0 + for (int x = 0; x < 4; x++) + dst[x] = decode5A3(Common::swap16(src[x])); +#else + dst[0] = decode5A3(Common::swap16(src[0])); + dst[1] = decode5A3(Common::swap16(src[1])); + dst[2] = decode5A3(Common::swap16(src[2])); + dst[3] = decode5A3(Common::swap16(src[3])); +#endif +} + +inline void decodebytesRGB5A3rgba(u32 *dst, const u16 *src) +{ +#if 0 + for (int x = 0; x < 4; x++) + dst[x] = decode5A3RGBA(Common::swap16(src[x])); +#else + dst[0] = decode5A3RGBA(Common::swap16(src[0])); + dst[1] = decode5A3RGBA(Common::swap16(src[1])); + dst[2] = decode5A3RGBA(Common::swap16(src[2])); + dst[3] = decode5A3RGBA(Common::swap16(src[3])); +#endif +} + +// This one is used by many video formats. It'd therefore be good if it was fast. +// Needs more speed. +inline void decodebytesARGB8_4(u32 *dst, const u16 *src, const u16 *src2) +{ +#if 0 + for (int x = 0; x < 4; x++) + dst[x] = Common::swap32((src2[x] << 16) | src[x]); +#else + dst[0] = Common::swap32((src2[0] << 16) | src[0]); + dst[1] = Common::swap32((src2[1] << 16) | src[1]); + dst[2] = Common::swap32((src2[2] << 16) | src[2]); + dst[3] = Common::swap32((src2[3] << 16) | src[3]); +#endif + + // This can probably be done in a few SSE pack/unpack instructions + pshufb + // some unpack instruction x2: + // ABABABABABABABAB 1212121212121212 -> + // AB12AB12AB12AB12 AB12AB12AB12AB12 + // 2x pshufb-> + // 21BA21BA21BA21BA 21BA21BA21BA21BA + // and we are done. +} + +inline void decodebytesARGB8_4ToRgba(u32 *dst, const u16 *src, const u16 * src2) +{ +#if 0 + for (int x = 0; x < 4; x++) { + dst[x] = ((src[x] & 0xFF) << 24) | ((src[x] & 0xFF00)>>8) | (src2[x] << 8); + } +#else + dst[0] = ((src[0] & 0xFF) << 24) | ((src[0] & 0xFF00)>>8) | (src2[0] << 8); + dst[1] = ((src[1] & 0xFF) << 24) | ((src[1] & 0xFF00)>>8) | (src2[1] << 8); + dst[2] = ((src[2] & 0xFF) << 24) | ((src[2] & 0xFF00)>>8) | (src2[2] << 8); + dst[3] = ((src[3] & 0xFF) << 24) | ((src[3] & 0xFF00)>>8) | (src2[3] << 8); +#endif +} + +inline u32 makecol(int r, int g, int b, int a) +{ + return (a << 24)|(r << 16)|(g << 8)|b; +} + +inline u32 makeRGBA(int r, int g, int b, int a) +{ + return (a<<24)|(b<<16)|(g<<8)|r; +} + +void decodeDXTBlock(u32 *dst, const DXTBlock *src, int pitch) +{ + // S3TC Decoder (Note: GCN decodes differently from PC so we can't use native support) + // Needs more speed. + u16 c1 = Common::swap16(src->color1); + u16 c2 = Common::swap16(src->color2); + int blue1 = Convert5To8(c1 & 0x1F); + int blue2 = Convert5To8(c2 & 0x1F); + int green1 = Convert6To8((c1 >> 5) & 0x3F); + int green2 = Convert6To8((c2 >> 5) & 0x3F); + int red1 = Convert5To8((c1 >> 11) & 0x1F); + int red2 = Convert5To8((c2 >> 11) & 0x1F); + int colors[4]; + colors[0] = makecol(red1, green1, blue1, 255); + colors[1] = makecol(red2, green2, blue2, 255); + if (c1 > c2) + { + int blue3 = ((blue2 - blue1) >> 1) - ((blue2 - blue1) >> 3); + int green3 = ((green2 - green1) >> 1) - ((green2 - green1) >> 3); + int red3 = ((red2 - red1) >> 1) - ((red2 - red1) >> 3); + colors[2] = makecol(red1 + red3, green1 + green3, blue1 + blue3, 255); + colors[3] = makecol(red2 - red3, green2 - green3, blue2 - blue3, 255); + } + else + { + colors[2] = makecol((red1 + red2 + 1) / 2, // Average + (green1 + green2 + 1) / 2, + (blue1 + blue2 + 1) / 2, 255); + colors[3] = makecol(red2, green2, blue2, 0); // Color2 but transparent + } + + for (int y = 0; y < 4; y++) + { + int val = src->lines[y]; + for (int x = 0; x < 4; x++) + { + dst[x] = colors[(val >> 6) & 3]; + val <<= 2; + } + dst += pitch; + } +} + +void decodeDXTBlockRGBA(u32 *dst, const DXTBlock *src, int pitch) +{ + // S3TC Decoder (Note: GCN decodes differently from PC so we can't use native support) + // Needs more speed. + u16 c1 = Common::swap16(src->color1); + u16 c2 = Common::swap16(src->color2); + int blue1 = Convert5To8(c1 & 0x1F); + int blue2 = Convert5To8(c2 & 0x1F); + int green1 = Convert6To8((c1 >> 5) & 0x3F); + int green2 = Convert6To8((c2 >> 5) & 0x3F); + int red1 = Convert5To8((c1 >> 11) & 0x1F); + int red2 = Convert5To8((c2 >> 11) & 0x1F); + int colors[4]; + colors[0] = makeRGBA(red1, green1, blue1, 255); + colors[1] = makeRGBA(red2, green2, blue2, 255); + if (c1 > c2) + { + int blue3 = ((blue2 - blue1) >> 1) - ((blue2 - blue1) >> 3); + int green3 = ((green2 - green1) >> 1) - ((green2 - green1) >> 3); + int red3 = ((red2 - red1) >> 1) - ((red2 - red1) >> 3); + colors[2] = makeRGBA(red1 + red3, green1 + green3, blue1 + blue3, 255); + colors[3] = makeRGBA(red2 - red3, green2 - green3, blue2 - blue3, 255); + } + else + { + colors[2] = makeRGBA((red1 + red2 + 1) / 2, // Average + (green1 + green2 + 1) / 2, + (blue1 + blue2 + 1) / 2, 255); + colors[3] = makeRGBA(red2, green2, blue2, 0); // Color2 but transparent + } + + for (int y = 0; y < 4; y++) + { + int val = src->lines[y]; + for (int x = 0; x < 4; x++) + { + dst[x] = colors[(val >> 6) & 3]; + val <<= 2; + } + dst += pitch; + } +} + +#if 0 // TODO - currently does not handle transparency correctly and causes problems when texture dimensions are not multiples of 8 +static void copyDXTBlock(u8* dst, const u8* src) +{ + ((u16*)dst)[0] = Common::swap16(((u16*)src)[0]); + ((u16*)dst)[1] = Common::swap16(((u16*)src)[1]); + u32 pixels = ((u32*)src)[1]; + // A bit of trickiness here: the row are in the same order + // between the two formats, but the ordering within the rows + // is reversed. + pixels = ((pixels >> 4) & 0x0F0F0F0F) | ((pixels << 4) & 0xF0F0F0F0); + pixels = ((pixels >> 2) & 0x33333333) | ((pixels << 2) & 0xCCCCCCCC); + ((u32*)dst)[1] = pixels; +} +#endif + +static PC_TexFormat GetPCFormatFromTLUTFormat(int tlutfmt) +{ + switch (tlutfmt) + { + case 0: return PC_TEX_FMT_IA8; // IA8 + case 1: return PC_TEX_FMT_RGB565; // RGB565 + case 2: return PC_TEX_FMT_BGRA32; // RGB5A3: This TLUT format requires + // extra work to decode. + } + return PC_TEX_FMT_NONE; // Error +} + +PC_TexFormat GetPC_TexFormat(int texformat, int tlutfmt) +{ + switch (texformat) + { + case GX_TF_C4: + return GetPCFormatFromTLUTFormat(tlutfmt); + case GX_TF_I4: + return PC_TEX_FMT_IA8; + case GX_TF_I8: // speed critical + return PC_TEX_FMT_IA8; + case GX_TF_C8: + return GetPCFormatFromTLUTFormat(tlutfmt); + case GX_TF_IA4: + return PC_TEX_FMT_IA4_AS_IA8; + case GX_TF_IA8: + return PC_TEX_FMT_IA8; + case GX_TF_C14X2: + return GetPCFormatFromTLUTFormat(tlutfmt); + case GX_TF_RGB565: + return PC_TEX_FMT_RGB565; + case GX_TF_RGB5A3: + return PC_TEX_FMT_BGRA32; + case GX_TF_RGBA8: // speed critical + return PC_TEX_FMT_BGRA32; + case GX_TF_CMPR: // speed critical + // The metroid games use this format almost exclusively. + { + return PC_TEX_FMT_BGRA32; + } + } + + // The "copy" texture formats, too? + return PC_TEX_FMT_NONE; +} + + +//switch endianness, unswizzle +//TODO: to save memory, don't blindly convert everything to argb8888 +//also ARGB order needs to be swapped later, to accommodate modern hardware better +//need to add DXT support too +PC_TexFormat TexDecoder_Decode_real(u8 *dst, const u8 *src, int width, int height, int texformat, int tlutaddr, int tlutfmt) +{ + const int Wsteps4 = (width + 3) / 4; + const int Wsteps8 = (width + 7) / 8; + + switch (texformat) + { + case GX_TF_C4: + if (tlutfmt == 2) + { + // Special decoding is required for TLUT format 5A3 + for (int y = 0; y < height; y += 8) + for (int x = 0, yStep = (y / 8) * Wsteps8; x < width; x += 8, yStep++) + for (int iy = 0, xStep = yStep * 8; iy < 8; iy++, xStep++) + decodebytesC4_5A3_To_BGRA32((u32*)dst + (y + iy) * width + x, src + 4 * xStep, tlutaddr); + } + else + { + for (int y = 0; y < height; y += 8) + for (int x = 0, yStep = (y / 8) * Wsteps8; x < width; x += 8, yStep++) + for (int iy = 0, xStep = yStep * 8; iy < 8; iy++, xStep++) + decodebytesC4_To_Raw16((u16*)dst + (y + iy) * width + x, src + 4 * xStep, tlutaddr); + } + return GetPCFormatFromTLUTFormat(tlutfmt); + case GX_TF_I4: + { + for (int y = 0; y < height; y += 8) + for (int x = 0, yStep = (y / 8) * Wsteps8; x < width; x += 8, yStep++) + for (int iy = 0, xStep = yStep * 8 ; iy < 8; iy++,xStep++) + for (int ix = 0; ix < 4; ix++) + { + int val = src[4 * xStep + ix]; + dst[(y + iy) * width + x + ix * 2] = Convert4To8(val >> 4); + dst[(y + iy) * width + x + ix * 2 + 1] = Convert4To8(val & 0xF); + } + } + return PC_TEX_FMT_I4_AS_I8; + case GX_TF_I8: // speed critical + { + for (int y = 0; y < height; y += 4) + for (int x = 0, yStep = (y / 4) * Wsteps8; x < width; x += 8, yStep++) + for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++) + { + ((u64*)(dst + (y + iy) * width + x))[0] = ((u64*)(src + 8 * xStep))[0]; + } + } + return PC_TEX_FMT_I8; + case GX_TF_C8: + if (tlutfmt == 2) + { + // Special decoding is required for TLUT format 5A3 + for (int y = 0; y < height; y += 4) + for (int x = 0, yStep = (y / 4) * Wsteps8; x < width; x += 8, yStep++) + for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++) + decodebytesC8_5A3_To_BGRA32((u32*)dst + (y + iy) * width + x, src + 8 * xStep, tlutaddr); + } + else + { + + { + for (int y = 0; y < height; y += 4) + for (int x = 0, yStep = (y / 4) * Wsteps8; x < width; x += 8, yStep++) + for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++) + decodebytesC8_To_Raw16((u16*)dst + (y + iy) * width + x, src + 8 * xStep, tlutaddr); + } + } + return GetPCFormatFromTLUTFormat(tlutfmt); + case GX_TF_IA4: + { + for (int y = 0; y < height; y += 4) + for (int x = 0, yStep = (y / 4) * Wsteps8; x < width; x += 8, yStep++) + for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++) + decodebytesIA4((u16*)dst + (y + iy) * width + x, src + 8 * xStep); + } + return PC_TEX_FMT_IA4_AS_IA8; + case GX_TF_IA8: + { + for (int y = 0; y < height; y += 4) + for (int x = 0, yStep = (y / 4) * Wsteps4; x < width; x += 4, yStep++) + for (int iy = 0, xStep = yStep * 4; iy < 4; iy++, xStep++) + { + u16 *ptr = (u16 *)dst + (y + iy) * width + x; + u16 *s = (u16 *)(src + 8 * xStep); + for(int j = 0; j < 4; j++) + *ptr++ = Common::swap16(*s++); + } + + } + return PC_TEX_FMT_IA8; + case GX_TF_C14X2: + if (tlutfmt == 2) + { + // Special decoding is required for TLUT format 5A3 + for (int y = 0; y < height; y += 4) + for (int x = 0, yStep = (y / 4) * Wsteps4; x < width; x += 4, yStep++) + for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++) + decodebytesC14X2_5A3_To_BGRA32((u32*)dst + (y + iy) * width + x, (u16*)(src + 8 * xStep), tlutaddr); + } + else + { + for (int y = 0; y < height; y += 4) + for (int x = 0, yStep = (y / 4) * Wsteps4; x < width; x += 4, yStep++) + for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++) + decodebytesC14X2_To_Raw16((u16*)dst + (y + iy) * width + x,(u16*)(src + 8 * xStep), tlutaddr); + } + return GetPCFormatFromTLUTFormat(tlutfmt); + case GX_TF_RGB565: + { + for (int y = 0; y < height; y += 4) + for (int x = 0, yStep = (y / 4) * Wsteps4; x < width; x += 4, yStep++) + for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++) + { + u16 *ptr = (u16 *)dst + (y + iy) * width + x; + u16 *s = (u16 *)(src + 8 * xStep); + for(int j = 0; j < 4; j++) + *ptr++ = Common::swap16(*s++); + } + } + return PC_TEX_FMT_RGB565; + case GX_TF_RGB5A3: + { + for (int y = 0; y < height; y += 4) + for (int x = 0, yStep = (y / 4) * Wsteps4; x < width; x += 4, yStep++) + for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++) + //decodebytesRGB5A3((u32*)dst+(y+iy)*width+x, (u16*)src, 4); + decodebytesRGB5A3((u32*)dst+(y+iy)*width+x, (u16*)(src + 8 * xStep)); + } + return PC_TEX_FMT_BGRA32; + case GX_TF_RGBA8: // speed critical + { + + { + for (int y = 0; y < height; y += 4) + for (int x = 0, yStep = (y / 4) * Wsteps4; x < width; x += 4, yStep++) + { + const u8* src2 = src + 64 * yStep; + for (int iy = 0; iy < 4; iy++) + decodebytesARGB8_4((u32*)dst + (y+iy)*width + x, (u16*)src2 + 4 * iy, (u16*)src2 + 4 * iy + 16); + } + } + } + return PC_TEX_FMT_BGRA32; + case GX_TF_CMPR: // speed critical + // The metroid games use this format almost exclusively. + { +#if 0 // TODO - currently does not handle transparency correctly and causes problems when texture dimensions are not multiples of 8 + // 11111111 22222222 55555555 66666666 + // 33333333 44444444 77777777 88888888 + for (int y = 0; y < height; y += 8) + { + for (int x = 0; x < width; x += 8) + { + copyDXTBlock(dst+(y/2)*width+x*2, src); + src += 8; + copyDXTBlock(dst+(y/2)*width+x*2+8, src); + src += 8; + copyDXTBlock(dst+(y/2+2)*width+x*2, src); + src += 8; + copyDXTBlock(dst+(y/2+2)*width+x*2+8, src); + src += 8; + } + } + return PC_TEX_FMT_DXT1; +#else + for (int y = 0; y < height; y += 8) + { + for (int x = 0, yStep = (y / 8) * Wsteps8; x < width; x += 8, yStep++) + { + const u8* src2 = src + 4 * sizeof(DXTBlock) * yStep; + decodeDXTBlock((u32*)dst + y * width + x, (DXTBlock*)src2, width); + src2 += sizeof(DXTBlock); + decodeDXTBlock((u32*)dst + y * width + x + 4, (DXTBlock*)src2, width); + src2 += sizeof(DXTBlock); + decodeDXTBlock((u32*)dst + (y + 4) * width + x, (DXTBlock*)src2, width); + src2 += sizeof(DXTBlock); + decodeDXTBlock((u32*)dst + (y + 4) * width + x + 4, (DXTBlock*)src2, width); + } + } +#endif + return PC_TEX_FMT_BGRA32; + } + } + + // The "copy" texture formats, too? + return PC_TEX_FMT_NONE; +} + + + +// JSD 01/06/11: +// TODO: we really should ensure BOTH the source and destination addresses are aligned to 16-byte boundaries to +// squeeze out a little more performance. _mm_loadu_si128/_mm_storeu_si128 is slower than _mm_load_si128/_mm_store_si128 +// because they work on unaligned addresses. The processor is free to make the assumption that addresses are multiples +// of 16 in the aligned case. +// TODO: complete SSE2 optimization of less often used texture formats. +// TODO: refactor algorithms using _mm_loadl_epi64 unaligned loads to prefer 128-bit aligned loads. + +PC_TexFormat TexDecoder_Decode_RGBA(u32 * dst, const u8 * src, int width, int height, int texformat, int tlutaddr, int tlutfmt) +{ + + const int Wsteps4 = (width + 3) / 4; + const int Wsteps8 = (width + 7) / 8; + + switch (texformat) + { + case GX_TF_C4: + if (tlutfmt == 2) + { + // Special decoding is required for TLUT format 5A3 + for (int y = 0; y < height; y += 8) + for (int x = 0, yStep = (y / 8) * Wsteps8; x < width; x += 8,yStep++) + for (int iy = 0, xStep = 8 * yStep; iy < 8; iy++,xStep++) + decodebytesC4_5A3_To_rgba32(dst + (y + iy) * width + x, src + 4 * xStep, tlutaddr); + } + else if(tlutfmt == 0) + { + for (int y = 0; y < height; y += 8) + for (int x = 0, yStep = (y / 8) * Wsteps8; x < width; x += 8,yStep++) + for (int iy = 0, xStep = 8 * yStep; iy < 8; iy++,xStep++) + decodebytesC4IA8_To_RGBA(dst + (y + iy) * width + x, src + 4 * xStep, tlutaddr); + + } + else + { + for (int y = 0; y < height; y += 8) + for (int x = 0, yStep = (y / 8) * Wsteps8; x < width; x += 8,yStep++) + for (int iy = 0, xStep = 8 * yStep; iy < 8; iy++,xStep++) + decodebytesC4RGB565_To_RGBA(dst + (y + iy) * width + x, src + 4 * xStep, tlutaddr); + } + break; + case GX_TF_I4: + { + // Reference C implementation: + for (int y = 0; y < height; y += 8) + for (int x = 0; x < width; x += 8) + for (int iy = 0; iy < 8; iy++, src += 4) + for (int ix = 0; ix < 4; ix++) + { + int val = src[ix]; + u8 i1 = Convert4To8(val >> 4); + u8 i2 = Convert4To8(val & 0xF); + memset(dst+(y + iy) * width + x + ix * 2 , i1,4); + memset(dst+(y + iy) * width + x + ix * 2 + 1 , i2,4); + } + } + break; + case GX_TF_I8: // speed critical + { + // Reference C implementation + for (int y = 0; y < height; y += 4) + for (int x = 0; x < width; x += 8) + for (int iy = 0; iy < 4; ++iy, src += 8) + { + u32 * newdst = dst + (y + iy)*width+x; + const u8 * newsrc = src; + u8 srcval; + + srcval = (newsrc++)[0]; (newdst++)[0] = srcval | (srcval << 8) | (srcval << 16) | (srcval << 24); + srcval = (newsrc++)[0]; (newdst++)[0] = srcval | (srcval << 8) | (srcval << 16) | (srcval << 24); + srcval = (newsrc++)[0]; (newdst++)[0] = srcval | (srcval << 8) | (srcval << 16) | (srcval << 24); + srcval = (newsrc++)[0]; (newdst++)[0] = srcval | (srcval << 8) | (srcval << 16) | (srcval << 24); + srcval = (newsrc++)[0]; (newdst++)[0] = srcval | (srcval << 8) | (srcval << 16) | (srcval << 24); + srcval = (newsrc++)[0]; (newdst++)[0] = srcval | (srcval << 8) | (srcval << 16) | (srcval << 24); + srcval = (newsrc++)[0]; (newdst++)[0] = srcval | (srcval << 8) | (srcval << 16) | (srcval << 24); + srcval = newsrc[0]; newdst[0] = srcval | (srcval << 8) | (srcval << 16) | (srcval << 24); + } + } + break; + case GX_TF_C8: + if (tlutfmt == 2) + { + // Special decoding is required for TLUT format 5A3 + for (int y = 0; y < height; y += 4) + for (int x = 0, yStep = (y / 4) * Wsteps8; x < width; x += 8, yStep++) + for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++) + decodebytesC8_5A3_To_RGBA32((u32*)dst + (y + iy) * width + x, src + 8 * xStep, tlutaddr); + } + else if(tlutfmt == 0) + { + for (int y = 0; y < height; y += 4) + for (int x = 0, yStep = (y / 4) * Wsteps8; x < width; x += 8, yStep++) + for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++) + decodebytesC8IA8_To_RGBA(dst + (y + iy) * width + x, src + 8 * xStep, tlutaddr); + + } + else + { + for (int y = 0; y < height; y += 4) + for (int x = 0, yStep = (y / 4) * Wsteps8; x < width; x += 8, yStep++) + for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++) + decodebytesC8RGB565_To_RGBA(dst + (y + iy) * width + x, src + 8 * xStep, tlutaddr); + + } + break; + case GX_TF_IA4: + { + for (int y = 0; y < height; y += 4) + for (int x = 0, yStep = (y / 4) * Wsteps8; x < width; x += 8, yStep++) + for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++) + decodebytesIA4RGBA(dst + (y + iy) * width + x, src + 8 * xStep); + } + break; + case GX_TF_IA8: + { + // Reference C implementation: + for (int y = 0; y < height; y += 4) + for (int x = 0; x < width; x += 4) + for (int iy = 0; iy < 4; iy++, src += 8) + { + u32 *ptr = dst + (y + iy) * width + x; + u16 *s = (u16 *)src; + ptr[0] = decodeIA8Swapped(s[0]); + ptr[1] = decodeIA8Swapped(s[1]); + ptr[2] = decodeIA8Swapped(s[2]); + ptr[3] = decodeIA8Swapped(s[3]); + } + } + break; + case GX_TF_C14X2: + if (tlutfmt == 2) + { + // Special decoding is required for TLUT format 5A3 + for (int y = 0; y < height; y += 4) + for (int x = 0, yStep = (y / 4) * Wsteps4; x < width; x += 4, yStep++) + for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++) + decodebytesC14X2_5A3_To_BGRA32(dst + (y + iy) * width + x, (u16*)(src + 8 * xStep), tlutaddr); + } + else if (tlutfmt == 0) + { + for (int y = 0; y < height; y += 4) + for (int x = 0, yStep = (y / 4) * Wsteps4; x < width; x += 4, yStep++) + for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++) + decodebytesC14X2IA8_To_RGBA(dst + (y + iy) * width + x, (u16*)(src + 8 * xStep), tlutaddr); + } + else + { + for (int y = 0; y < height; y += 4) + for (int x = 0, yStep = (y / 4) * Wsteps4; x < width; x += 4, yStep++) + for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++) + decodebytesC14X2rgb565_To_RGBA(dst + (y + iy) * width + x, (u16*)(src + 8 * xStep), tlutaddr); + } + break; + case GX_TF_RGB565: + { + // Reference C implementation. + for (int y = 0; y < height; y += 4) + for (int x = 0; x < width; x += 4) + for (int iy = 0; iy < 4; iy++, src += 8) + { + u32 *ptr = dst + (y + iy) * width + x; + u16 *s = (u16 *)src; + for(int j = 0; j < 4; j++) + *ptr++ = decode565RGBA(Common::swap16(*s++)); + } + } + break; + case GX_TF_RGB5A3: + { + // Reference C implementation: + for (int y = 0; y < height; y += 4) + for (int x = 0; x < width; x += 4) + for (int iy = 0; iy < 4; iy++, src += 8) + decodebytesRGB5A3rgba(dst+(y+iy)*width+x, (u16*)src); + } + break; + case GX_TF_RGBA8: // speed critical + { + // Reference C implementation. + for (int y = 0; y < height; y += 4) + for (int x = 0; x < width; x += 4) + { + for (int iy = 0; iy < 4; iy++) + decodebytesARGB8_4ToRgba(dst + (y+iy)*width + x, (u16*)src + 4 * iy, (u16*)src + 4 * iy + 16); + src += 64; + } + } + break; + case GX_TF_CMPR: // speed critical + // The metroid games use this format almost exclusively. + { + for (int y = 0; y < height; y += 8) + { + for (int x = 0; x < width; x += 8) + { + decodeDXTBlockRGBA((u32*)dst + y * width + x, (DXTBlock*)src, width); + src += sizeof(DXTBlock); + decodeDXTBlockRGBA((u32*)dst + y * width + x + 4, (DXTBlock*)src, width); + src += sizeof(DXTBlock); + decodeDXTBlockRGBA((u32*)dst + (y + 4) * width + x, (DXTBlock*)src, width); + src += sizeof(DXTBlock); + decodeDXTBlockRGBA((u32*)dst + (y + 4) * width + x + 4, (DXTBlock*)src, width); + src += sizeof(DXTBlock); + } + } + break; + } + } + + // The "copy" texture formats, too? + return PC_TEX_FMT_RGBA32; +} + + + + +void TexDecoder_SetTexFmtOverlayOptions(bool enable, bool center) +{ + TexFmt_Overlay_Enable = enable; + TexFmt_Overlay_Center = center; +} + +PC_TexFormat TexDecoder_Decode(u8 *dst, const u8 *src, int width, int height, int texformat, int tlutaddr, int tlutfmt,bool rgbaOnly) +{ + PC_TexFormat retval = TexDecoder_Decode_OpenCL(dst, src, + width, height, texformat, tlutaddr, tlutfmt, rgbaOnly); + if (retval == PC_TEX_FMT_NONE) + retval = rgbaOnly ? TexDecoder_Decode_RGBA((u32*)dst, src, + width, height, texformat, tlutaddr, tlutfmt) + : TexDecoder_Decode_real(dst, src, + width, height, texformat, tlutaddr, tlutfmt); + + if ((!TexFmt_Overlay_Enable)|| (retval == PC_TEX_FMT_NONE)) + return retval; + + int w = min(width, 40); + int h = min(height, 10); + + int xoff = (width - w) >> 1; + int yoff = (height - h) >> 1; + + if (!TexFmt_Overlay_Center) + { + xoff=0; + yoff=0; + } + + const char* fmt = texfmt[texformat&15]; + while (*fmt) + { + int xcnt = 0; + int nchar = sfont_map[(int)*fmt]; + + const unsigned char *ptr = sfont_raw[nchar]; // each char is up to 9x10 + + for (int x = 0; x < 9;x++) + { + if (ptr[x] == 0x78) + break; + xcnt++; + } + + for (int y=0; y < 10; y++) + { + for (int x=0; x < xcnt; x++) + { + switch(retval) + { + case PC_TEX_FMT_I8: + { + // TODO: Is this an acceptable way to draw in I8? + u8 *dtp = (u8*)dst; + dtp[(y + yoff) * width + x + xoff] = ptr[x] ? 0xFF : 0x88; + break; + } + case PC_TEX_FMT_IA8: + case PC_TEX_FMT_IA4_AS_IA8: + { + u16 *dtp = (u16*)dst; + dtp[(y + yoff) * width + x + xoff] = ptr[x] ? 0xFFFF : 0xFF00; + break; + } + case PC_TEX_FMT_RGB565: + { + u16 *dtp = (u16*)dst; + dtp[(y + yoff)*width + x + xoff] = ptr[x] ? 0xFFFF : 0x0000; + break; + } + default: + case PC_TEX_FMT_BGRA32: + { + int *dtp = (int*)dst; + dtp[(y + yoff) * width + x + xoff] = ptr[x] ? 0xFFFFFFFF : 0xFF000000; + break; + } + } + } + ptr += 9; + } + xoff += xcnt; + fmt++; + } + + return retval; +} + + + +void TexDecoder_DecodeTexel(u8 *dst, const u8 *src, int s, int t, int imageWidth, int texformat, int tlutaddr, int tlutfmt) +{ + /* General formula for computing texture offset + // + u16 sBlk = s / blockWidth; + u16 tBlk = t / blockHeight; + u16 widthBlks = (width / blockWidth) + 1; + u32 base = (tBlk * widthBlks + sBlk) * blockWidth * blockHeight; + u16 blkS = s & (blockWidth - 1); + u16 blkT = t & (blockHeight - 1); + u32 blkOff = blkT * blockWidth + blkS; + */ + + switch (texformat) + { + case GX_TF_C4: + { + u16 sBlk = s >> 3; + u16 tBlk = t >> 3; + u16 widthBlks = (imageWidth >> 3) + 1; + u32 base = (tBlk * widthBlks + sBlk) << 5; + u16 blkS = s & 7; + u16 blkT = t & 7; + u32 blkOff = (blkT << 3) + blkS; + + int rs = (blkOff & 1)?0:4; + u32 offset = base + (blkOff >> 1); + + u8 val = (*(src + offset) >> rs) & 0xF; + u16 *tlut = (u16*)(texMem + tlutaddr); + + switch (tlutfmt) + { + case 0: + *((u32*)dst) = decodeIA8Swapped(tlut[val]); + break; + case 1: + *((u32*)dst) = decode565RGBA(Common::swap16(tlut[val])); + break; + case 2: + *((u32*)dst) = decode5A3RGBA(Common::swap16(tlut[val])); + break; + } + } + break; + case GX_TF_I4: + { + u16 sBlk = s >> 3; + u16 tBlk = t >> 3; + u16 widthBlks = (imageWidth >> 3) + 1; + u32 base = (tBlk * widthBlks + sBlk) << 5; + u16 blkS = s & 7; + u16 blkT = t & 7; + u32 blkOff = (blkT << 3) + blkS; + + int rs = (blkOff & 1)?0:4; + u32 offset = base + (blkOff >> 1); + + u8 val = (*(src + offset) >> rs) & 0xF; + val = Convert4To8(val); + dst[0] = val; + dst[1] = val; + dst[2] = val; + dst[3] = val; + } + break; + case GX_TF_I8: + { + u16 sBlk = s >> 3; + u16 tBlk = t >> 2; + u16 widthBlks = (imageWidth >> 3) + 1; + u32 base = (tBlk * widthBlks + sBlk) << 5; + u16 blkS = s & 7; + u16 blkT = t & 3; + u32 blkOff = (blkT << 3) + blkS; + + u8 val = *(src + base + blkOff); + dst[0] = val; + dst[1] = val; + dst[2] = val; + dst[3] = val; + } + break; + case GX_TF_C8: + { + u16 sBlk = s >> 3; + u16 tBlk = t >> 2; + u16 widthBlks = (imageWidth >> 3) + 1; + u32 base = (tBlk * widthBlks + sBlk) << 5; + u16 blkS = s & 7; + u16 blkT = t & 3; + u32 blkOff = (blkT << 3) + blkS; + + u8 val = *(src + base + blkOff); + u16 *tlut = (u16*)(texMem + tlutaddr); + + switch (tlutfmt) + { + case 0: + *((u32*)dst) = decodeIA8Swapped(tlut[val]); + break; + case 1: + *((u32*)dst) = decode565RGBA(Common::swap16(tlut[val])); + break; + case 2: + *((u32*)dst) = decode5A3RGBA(Common::swap16(tlut[val])); + break; + } + } + break; + case GX_TF_IA4: + { + u16 sBlk = s >> 3; + u16 tBlk = t >> 2; + u16 widthBlks = (imageWidth >> 3) + 1; + u32 base = (tBlk * widthBlks + sBlk) << 5; + u16 blkS = s & 7; + u16 blkT = t & 3; + u32 blkOff = (blkT << 3) + blkS; + + u8 val = *(src + base + blkOff); + const u8 a = Convert4To8(val>>4); + const u8 l = Convert4To8(val&0xF); + dst[0] = l; + dst[1] = l; + dst[2] = l; + dst[3] = a; + } + break; + case GX_TF_IA8: + { + u16 sBlk = s >> 2; + u16 tBlk = t >> 2; + u16 widthBlks = (imageWidth >> 2) + 1; + u32 base = (tBlk * widthBlks + sBlk) << 4; + u16 blkS = s & 3; + u16 blkT = t & 3; + u32 blkOff = (blkT << 2) + blkS; + + u32 offset = (base + blkOff) << 1; + const u16* valAddr = (u16*)(src + offset); + + *((u32*)dst) = decodeIA8Swapped(*valAddr); + } + break; + case GX_TF_C14X2: + { + u16 sBlk = s >> 2; + u16 tBlk = t >> 2; + u16 widthBlks = (imageWidth >> 2) + 1; + u32 base = (tBlk * widthBlks + sBlk) << 4; + u16 blkS = s & 3; + u16 blkT = t & 3; + u32 blkOff = (blkT << 2) + blkS; + + u32 offset = (base + blkOff) << 1; + const u16* valAddr = (u16*)(src + offset); + + u16 val = Common::swap16(*valAddr) & 0x3FFF; + u16 *tlut = (u16*)(texMem + tlutaddr); + + switch (tlutfmt) + { + case 0: + *((u32*)dst) = decodeIA8Swapped(tlut[val]); + break; + case 1: + *((u32*)dst) = decode565RGBA(Common::swap16(tlut[val])); + break; + case 2: + *((u32*)dst) = decode5A3RGBA(Common::swap16(tlut[val])); + break; + } + } + break; + case GX_TF_RGB565: + { + u16 sBlk = s >> 2; + u16 tBlk = t >> 2; + u16 widthBlks = (imageWidth >> 2) + 1; + u32 base = (tBlk * widthBlks + sBlk) << 4; + u16 blkS = s & 3; + u16 blkT = t & 3; + u32 blkOff = (blkT << 2) + blkS; + + u32 offset = (base + blkOff) << 1; + const u16* valAddr = (u16*)(src + offset); + + *((u32*)dst) = decode565RGBA(Common::swap16(*valAddr)); + } + break; + case GX_TF_RGB5A3: + { + u16 sBlk = s >> 2; + u16 tBlk = t >> 2; + u16 widthBlks = (imageWidth >> 2) + 1; + u32 base = (tBlk * widthBlks + sBlk) << 4; + u16 blkS = s & 3; + u16 blkT = t & 3; + u32 blkOff = (blkT << 2) + blkS; + + u32 offset = (base + blkOff) << 1; + const u16* valAddr = (u16*)(src + offset); + + *((u32*)dst) = decode5A3RGBA(Common::swap16(*valAddr)); + } + break; + case GX_TF_RGBA8: + { + u16 sBlk = s >> 2; + u16 tBlk = t >> 2; + u16 widthBlks = (imageWidth >> 2) + 1; + u32 base = (tBlk * widthBlks + sBlk) << 5; // shift by 5 is correct + u16 blkS = s & 3; + u16 blkT = t & 3; + u32 blkOff = (blkT << 2) + blkS; + + u32 offset = (base + blkOff) << 1 ; + const u8* valAddr = src + offset; + + dst[3] = valAddr[0]; + dst[0] = valAddr[1]; + dst[1] = valAddr[32]; + dst[2] = valAddr[33]; + } + break; + case GX_TF_CMPR: + { + u16 sDxt = s >> 2; + u16 tDxt = t >> 2; + + u16 sBlk = sDxt >> 1; + u16 tBlk = tDxt >> 1; + u16 widthBlks = (imageWidth >> 3) + 1; + u32 base = (tBlk * widthBlks + sBlk) << 2; + u16 blkS = sDxt & 1; + u16 blkT = tDxt & 1; + u32 blkOff = (blkT << 1) + blkS; + + u32 offset = (base + blkOff) << 3; + + const DXTBlock* dxtBlock = (const DXTBlock*)(src + offset); + + u16 c1 = Common::swap16(dxtBlock->color1); + u16 c2 = Common::swap16(dxtBlock->color2); + int blue1 = Convert5To8(c1 & 0x1F); + int blue2 = Convert5To8(c2 & 0x1F); + int green1 = Convert6To8((c1 >> 5) & 0x3F); + int green2 = Convert6To8((c2 >> 5) & 0x3F); + int red1 = Convert5To8((c1 >> 11) & 0x1F); + int red2 = Convert5To8((c2 >> 11) & 0x1F); + + u16 ss = s & 3; + u16 tt = t & 3; + + int colorSel = dxtBlock->lines[tt]; + int rs = 6 - (ss << 1); + colorSel = (colorSel >> rs) & 3; + colorSel |= c1 > c2?0:4; + + u32 color = 0; + + switch (colorSel) + { + case 0: + case 4: + color = makeRGBA(red1, green1, blue1, 255); + break; + case 1: + case 5: + color = makeRGBA(red2, green2, blue2, 255); + break; + case 2: + color = makeRGBA(red1+(red2-red1)/3, green1+(green2-green1)/3, blue1+(blue2-blue1)/3, 255); + break; + case 3: + color = makeRGBA(red2+(red1-red2)/3, green2+(green1-green2)/3, blue2+(blue1-blue2)/3, 255); + break; + case 6: + color = makeRGBA((int)ceil((float)(red1+red2)/2), (int)ceil((float)(green1+green2)/2), (int)ceil((float)(blue1+blue2)/2), 255); + break; + case 7: + color = makeRGBA(red2, green2, blue2, 0); + break; + } + + *((u32*)dst) = color; + } + break; + } +} + + +const char* texfmt[] = { + // pixel + "I4", "I8", "IA4", "IA8", + "RGB565", "RGB5A3", "RGBA8", "0x07", + "C4", "C8", "C14X2", "0x0B", + "0x0C", "0x0D", "CMPR", "0x0F", + // Z-buffer + "0x10", "Z8", "0x12", "Z16", + "0x14", "0x15", "Z24X8", "0x17", + "0x18", "0x19", "0x1A", "0x1B", + "0x1C", "0x1D", "0x1E", "0x1F", + // pixel + copy + "CR4", "0x21", "CRA4", "CRA8", + "0x24", "0x25", "CYUVA8", "CA8", + "CR8", "CG8", "CB8", "CRG8", + "CGB8", "0x2D", "0x2E", "0x2F", + // Z + copy + "CZ4", "0x31", "0x32", "0x33", + "0x34", "0x35", "0x36", "0x37", + "0x38", "CZ8M", "CZ8L", "0x3B", + "CZ16L", "0x3D", "0x3E", "0x3F", +}; + +const unsigned char sfont_map[] = { + 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, + 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, + 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,10,10,10,10,10, + 10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25, + 26,27,28,29,30,31,32,33,34,35,36,10,10,10,10,10, + 10,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51, + 52,53,54,55,56,57,58,59,60,61,62,10,10,10,10,10, + 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, + 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, + 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, + 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, + 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, + 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, + 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, + 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, +}; + +const unsigned char sfont_raw[][9*10] = { + { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x00, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x00, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + },{ + 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, + },{ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x00, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x00, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0x00, 0x00, 0x00, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + },{ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x00, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x00, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + },{ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x00, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x00, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0x00, 0x00, 0x00, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + },{ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0x00, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0x00, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0x00, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + },{ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x00, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0x00, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x00, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + },{ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0x00, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x00, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x00, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x00, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x00, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + },{ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x00, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x00, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x00, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + },{ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x00, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x00, 0x00, 0x00, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x00, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + },{ + 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, 0x78, 0x78, + },{ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, + 0xff, 0xff, 0xff, 0x00, 0xff, 0xff, 0xff, 0x78, 0x78, + 0xff, 0xff, 0xff, 0x00, 0xff, 0xff, 0xff, 0x78, 0x78, + 0xff, 0xff, 0x00, 0xff, 0x00, 0xff, 0xff, 0x78, 0x78, + 0xff, 0xff, 0x00, 0x00, 0x00, 0xff, 0xff, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, + },{ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, + 0xff, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, + 0xff, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, + 0xff, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, + },{ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, + 0xff, 0xff, 0x00, 0x00, 0x00, 0xff, 0xff, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, + 0xff, 0xff, 0x00, 0x00, 0x00, 0xff, 0xff, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, + },{ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, + 0xff, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, + 0xff, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, + },{ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0x00, 0x00, 0x00, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0x00, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0x00, 0x00, 0x00, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + },{ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0x00, 0x00, 0x00, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0x00, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + },{ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, + 0xff, 0xff, 0x00, 0x00, 0x00, 0xff, 0xff, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0x00, 0x00, 0xff, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, + 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0xff, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, + },{ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0x00, 0x00, 0x00, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + },{ + 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, 0x78, + },{ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0x00, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + },{ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0x00, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0x00, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + },{ + 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0x00, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + },{ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0x00, 0x00, 0xff, 0xff, 0xff, 0x00, 0x00, 0xff, + 0xff, 0x00, 0x00, 0xff, 0xff, 0xff, 0x00, 0x00, 0xff, + 0xff, 0x00, 0xff, 0x00, 0xff, 0x00, 0xff, 0x00, 0xff, + 0xff, 0x00, 0xff, 0x00, 0xff, 0x00, 0xff, 0x00, 0xff, + 0xff, 0x00, 0xff, 0xff, 0x00, 0xff, 0xff, 0x00, 0xff, + 0xff, 0x00, 0xff, 0xff, 0x00, 0xff, 0xff, 0x00, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + },{ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, + 0xff, 0x00, 0x00, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, + 0xff, 0x00, 0x00, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, + 0xff, 0x00, 0xff, 0x00, 0xff, 0x00, 0xff, 0x78, 0x78, + 0xff, 0x00, 0xff, 0x00, 0xff, 0x00, 0xff, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0x00, 0x00, 0xff, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0x00, 0x00, 0xff, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, + },{ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, + 0xff, 0xff, 0x00, 0x00, 0x00, 0xff, 0xff, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, + 0xff, 0xff, 0x00, 0x00, 0x00, 0xff, 0xff, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, + },{ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0x00, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0x00, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + },{ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, + 0xff, 0xff, 0x00, 0x00, 0x00, 0xff, 0xff, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, + 0xff, 0x00, 0xff, 0x00, 0xff, 0x00, 0xff, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0x00, 0xff, 0xff, 0x78, 0x78, + 0xff, 0xff, 0x00, 0x00, 0xff, 0x00, 0xff, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, + },{ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, + 0xff, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, + 0xff, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, + },{ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x00, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x00, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x00, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + },{ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x00, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x00, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x00, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x00, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x00, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + },{ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, + 0xff, 0xff, 0x00, 0x00, 0x00, 0xff, 0xff, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, + },{ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, + 0xff, 0xff, 0x00, 0xff, 0x00, 0xff, 0xff, 0x78, 0x78, + 0xff, 0xff, 0x00, 0xff, 0x00, 0xff, 0xff, 0x78, 0x78, + 0xff, 0xff, 0xff, 0x00, 0xff, 0xff, 0xff, 0x78, 0x78, + 0xff, 0xff, 0xff, 0x00, 0xff, 0xff, 0xff, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, + },{ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0xff, + 0xff, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0xff, + 0xff, 0xff, 0x00, 0xff, 0x00, 0xff, 0x00, 0xff, 0xff, + 0xff, 0xff, 0x00, 0xff, 0x00, 0xff, 0x00, 0xff, 0xff, + 0xff, 0xff, 0xff, 0x00, 0xff, 0x00, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0x00, 0xff, 0x00, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + },{ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, + 0xff, 0xff, 0x00, 0xff, 0x00, 0xff, 0xff, 0x78, 0x78, + 0xff, 0xff, 0xff, 0x00, 0xff, 0xff, 0xff, 0x78, 0x78, + 0xff, 0xff, 0xff, 0x00, 0xff, 0xff, 0xff, 0x78, 0x78, + 0xff, 0xff, 0x00, 0xff, 0x00, 0xff, 0xff, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, + },{ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, + 0xff, 0xff, 0x00, 0xff, 0x00, 0xff, 0xff, 0x78, 0x78, + 0xff, 0xff, 0xff, 0x00, 0xff, 0xff, 0xff, 0x78, 0x78, + 0xff, 0xff, 0xff, 0x00, 0xff, 0xff, 0xff, 0x78, 0x78, + 0xff, 0xff, 0xff, 0x00, 0xff, 0xff, 0xff, 0x78, 0x78, + 0xff, 0xff, 0xff, 0x00, 0xff, 0xff, 0xff, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, + },{ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0x00, 0x00, 0x00, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x00, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0x00, 0x00, 0x00, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + },{ + 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x00, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + },{ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0x00, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0x00, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + },{ + 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x00, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x00, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + },{ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x00, 0x00, 0x00, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x00, 0x00, 0x00, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + },{ + 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x00, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + },{ + 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, + },{ + 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x00, 0x00, 0x00, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x00, 0x00, 0x00, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x00, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, + },{ + 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + },{ + 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, 0x78, + },{ + 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, 0x78, + },{ + 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + },{ + 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, 0x78, + },{ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, + 0xff, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0x78, 0x78, + 0xff, 0x00, 0xff, 0x00, 0xff, 0x00, 0xff, 0x78, 0x78, + 0xff, 0x00, 0xff, 0x00, 0xff, 0x00, 0xff, 0x78, 0x78, + 0xff, 0x00, 0xff, 0x00, 0xff, 0x00, 0xff, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, + },{ + 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + },{ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x00, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x00, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + },{ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0x00, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0x00, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + },{ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x00, 0x00, 0x00, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x00, 0x00, 0x00, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, + },{ + 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, + },{ + 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x00, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + },{ + 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78, + },{ + 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x00, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + },{ + 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + },{ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, + 0xff, 0x00, 0xff, 0x00, 0xff, 0x00, 0xff, 0x78, 0x78, + 0xff, 0x00, 0xff, 0x00, 0xff, 0x00, 0xff, 0x78, 0x78, + 0xff, 0xff, 0x00, 0xff, 0x00, 0xff, 0xff, 0x78, 0x78, + 0xff, 0xff, 0x00, 0xff, 0x00, 0xff, 0xff, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, + },{ + 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + },{ + 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + },{ + 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0x00, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0x00, 0x00, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, + }, +}; diff --git a/Source/Core/VideoCommon/Src/VertexLoader.cpp b/Source/Core/VideoCommon/Src/VertexLoader.cpp index 5bffab8ee7..526cbe7e49 100644 --- a/Source/Core/VideoCommon/Src/VertexLoader.cpp +++ b/Source/Core/VideoCommon/Src/VertexLoader.cpp @@ -23,7 +23,7 @@ #include "MemoryUtil.h" #include "StringUtil.h" #include "x64Emitter.h" -#include "ABI.h" +#include "x64ABI.h" #include "PixelEngine.h" #include "Host.h" @@ -43,8 +43,9 @@ //BBox #include "XFMemory.h" extern float GC_ALIGNED16(g_fProjectionMatrix[16]); - +#ifndef _M_GENERIC #define USE_JIT +#endif #define COMPILED_CODE_SIZE 4096 @@ -82,8 +83,9 @@ static const float fractionTable[32] = { 1.0f / (1U << 24), 1.0f / (1U << 25), 1.0f / (1U << 26), 1.0f / (1U << 27), 1.0f / (1U << 28), 1.0f / (1U << 29), 1.0f / (1U << 30), 1.0f / (1U << 31), }; - +#ifdef USE_JIT using namespace Gen; +#endif void LOADERDECL PosMtx_ReadDirect_UByte() { @@ -182,14 +184,19 @@ VertexLoader::VertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr) m_VtxDesc = vtx_desc; SetVAT(vtx_attr.g0.Hex, vtx_attr.g1.Hex, vtx_attr.g2.Hex); + #ifdef USE_JIT AllocCodeSpace(COMPILED_CODE_SIZE); CompileVertexTranslator(); WriteProtect(); + #endif + } VertexLoader::~VertexLoader() { + #ifdef USE_JIT FreeCodeSpace(); + #endif delete m_NativeFmt; } @@ -474,7 +481,8 @@ void VertexLoader::WriteCall(TPipelineFunction func) m_PipelineStages[m_numPipelineStages++] = func; #endif } - +// ARMTODO: This should be done in a better way +#ifndef _M_GENERIC void VertexLoader::WriteGetVariable(int bits, OpArg dest, void *address) { #ifdef USE_JIT @@ -498,7 +506,7 @@ void VertexLoader::WriteSetVariable(int bits, void *address, OpArg value) #endif #endif } - +#endif void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count) { m_numLoadedVertices += count; diff --git a/Source/Core/VideoCommon/Src/VertexLoader.h b/Source/Core/VideoCommon/Src/VertexLoader.h index 98a57cb9ff..0f321cd14f 100644 --- a/Source/Core/VideoCommon/Src/VertexLoader.h +++ b/Source/Core/VideoCommon/Src/VertexLoader.h @@ -76,7 +76,12 @@ private: } }; +// ARMTODO: This should be done in a better way +#ifndef _M_GENERIC class VertexLoader : public Gen::XCodeBlock, NonCopyable +#else +class VertexLoader +#endif { public: VertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr); @@ -122,8 +127,10 @@ private: void WriteCall(TPipelineFunction); +#ifndef _M_GENERIC void WriteGetVariable(int bits, Gen::OpArg dest, void *address); void WriteSetVariable(int bits, void *address, Gen::OpArg dest); +#endif }; #endif diff --git a/Source/Core/VideoCommon/Src/DLCache.cpp b/Source/Core/VideoCommon/Src/x64DLCache.cpp similarity index 99% rename from Source/Core/VideoCommon/Src/DLCache.cpp rename to Source/Core/VideoCommon/Src/x64DLCache.cpp index c828ea3a03..559593a08c 100644 --- a/Source/Core/VideoCommon/Src/DLCache.cpp +++ b/Source/Core/VideoCommon/Src/x64DLCache.cpp @@ -35,7 +35,7 @@ #include "VertexLoaderManager.h" #include "VertexManagerBase.h" #include "x64Emitter.h" -#include "ABI.h" +#include "x64ABI.h" #include "DLCache.h" #include "VideoConfig.h" diff --git a/Source/Core/VideoCommon/Src/TextureDecoder.cpp b/Source/Core/VideoCommon/Src/x64TextureDecoder.cpp similarity index 97% rename from Source/Core/VideoCommon/Src/TextureDecoder.cpp rename to Source/Core/VideoCommon/Src/x64TextureDecoder.cpp index 30df817902..6c474920cd 100644 --- a/Source/Core/VideoCommon/Src/TextureDecoder.cpp +++ b/Source/Core/VideoCommon/Src/x64TextureDecoder.cpp @@ -1119,20 +1119,6 @@ PC_TexFormat TexDecoder_Decode_RGBA(u32 * dst, const u8 * src, int width, int he _mm_storeu_si128( (__m128i*)( dst+(y + iy+1) * width + x + 4 ), o4 ); } } -#if 0 - // Reference C implementation: - for (int y = 0; y < height; y += 8) - for (int x = 0; x < width; x += 8) - for (int iy = 0; iy < 8; iy++, src += 4) - for (int ix = 0; ix < 4; ix++) - { - int val = src[ix]; - u8 i1 = Convert4To8(val >> 4); - u8 i2 = Convert4To8(val & 0xF); - memset(dst+(y + iy) * width + x + ix * 2 , i1,4); - memset(dst+(y + iy) * width + x + ix * 2 + 1 , i2,4); - } -#endif } break; case GX_TF_I8: // speed critical @@ -1248,26 +1234,6 @@ PC_TexFormat TexDecoder_Decode_RGBA(u32 * dst, const u8 * src, int width, int he } } -#if 0 - // Reference C implementation - for (int y = 0; y < height; y += 4) - for (int x = 0; x < width; x += 8) - for (int iy = 0; iy < 4; ++iy, src += 8) - { - u32 * newdst = dst + (y + iy)*width+x; - const u8 * newsrc = src; - u8 srcval; - - srcval = (newsrc++)[0]; (newdst++)[0] = srcval | (srcval << 8) | (srcval << 16) | (srcval << 24); - srcval = (newsrc++)[0]; (newdst++)[0] = srcval | (srcval << 8) | (srcval << 16) | (srcval << 24); - srcval = (newsrc++)[0]; (newdst++)[0] = srcval | (srcval << 8) | (srcval << 16) | (srcval << 24); - srcval = (newsrc++)[0]; (newdst++)[0] = srcval | (srcval << 8) | (srcval << 16) | (srcval << 24); - srcval = (newsrc++)[0]; (newdst++)[0] = srcval | (srcval << 8) | (srcval << 16) | (srcval << 24); - srcval = (newsrc++)[0]; (newdst++)[0] = srcval | (srcval << 8) | (srcval << 16) | (srcval << 24); - srcval = (newsrc++)[0]; (newdst++)[0] = srcval | (srcval << 8) | (srcval << 16) | (srcval << 24); - srcval = newsrc[0]; newdst[0] = srcval | (srcval << 8) | (srcval << 16) | (srcval << 24); - } -#endif } break; case GX_TF_C8: @@ -1380,20 +1346,6 @@ PC_TexFormat TexDecoder_Decode_RGBA(u32 * dst, const u8 * src, int width, int he _mm_storeu_si128( (__m128i*)(dst + (y + iy) * width + x), r1 ); } } -#if 0 - // Reference C implementation: - for (int y = 0; y < height; y += 4) - for (int x = 0; x < width; x += 4) - for (int iy = 0; iy < 4; iy++, src += 8) - { - u32 *ptr = dst + (y + iy) * width + x; - u16 *s = (u16 *)src; - ptr[0] = decodeIA8Swapped(s[0]); - ptr[1] = decodeIA8Swapped(s[1]); - ptr[2] = decodeIA8Swapped(s[2]); - ptr[3] = decodeIA8Swapped(s[3]); - } -#endif } break; case GX_TF_C14X2: @@ -1493,18 +1445,6 @@ PC_TexFormat TexDecoder_Decode_RGBA(u32 * dst, const u8 * src, int width, int he __m128i *ptr = (__m128i *)(dst + (y + iy) * width + x); _mm_storeu_si128(ptr, abgr888x4); } -#if 0 - // Reference C implementation. - for (int y = 0; y < height; y += 4) - for (int x = 0; x < width; x += 4) - for (int iy = 0; iy < 4; iy++, src += 8) - { - u32 *ptr = dst + (y + iy) * width + x; - u16 *s = (u16 *)src; - for(int j = 0; j < 4; j++) - *ptr++ = decode565RGBA(Common::swap16(*s++)); - } -#endif } break; case GX_TF_RGB5A3: @@ -1718,13 +1658,6 @@ PC_TexFormat TexDecoder_Decode_RGBA(u32 * dst, const u8 * src, int width, int he } } } -#if 0 - // Reference C implementation: - for (int y = 0; y < height; y += 4) - for (int x = 0; x < width; x += 4) - for (int iy = 0; iy < 4; iy++, src += 8) - decodebytesRGB5A3rgba(dst+(y+iy)*width+x, (u16*)src); -#endif } break; case GX_TF_RGBA8: // speed critical @@ -1860,16 +1793,6 @@ PC_TexFormat TexDecoder_Decode_RGBA(u32 * dst, const u8 * src, int width, int he _mm_storeu_si128(dst128, rgba11); } } -#if 0 - // Reference C implementation. - for (int y = 0; y < height; y += 4) - for (int x = 0; x < width; x += 4) - { - for (int iy = 0; iy < 4; iy++) - decodebytesARGB8_4ToRgba(dst + (y+iy)*width + x, (u16*)src + 4 * iy, (u16*)src + 4 * iy + 16); - src += 64; - } -#endif } break; case GX_TF_CMPR: // speed critical @@ -2104,22 +2027,6 @@ PC_TexFormat TexDecoder_Decode_RGBA(u32 * dst, const u8 * src, int width, int he } } } -#if 0 - for (int y = 0; y < height; y += 8) - { - for (int x = 0; x < width; x += 8) - { - decodeDXTBlockRGBA((u32*)dst + y * width + x, (DXTBlock*)src, width); - src += sizeof(DXTBlock); - decodeDXTBlockRGBA((u32*)dst + y * width + x + 4, (DXTBlock*)src, width); - src += sizeof(DXTBlock); - decodeDXTBlockRGBA((u32*)dst + (y + 4) * width + x, (DXTBlock*)src, width); - src += sizeof(DXTBlock); - decodeDXTBlockRGBA((u32*)dst + (y + 4) * width + x + 4, (DXTBlock*)src, width); - src += sizeof(DXTBlock); - } - } -#endif break; } } diff --git a/Source/Core/VideoCommon/VideoCommon.vcxproj b/Source/Core/VideoCommon/VideoCommon.vcxproj index bf9122e6b5..0c1b13f301 100644 --- a/Source/Core/VideoCommon/VideoCommon.vcxproj +++ b/Source/Core/VideoCommon/VideoCommon.vcxproj @@ -111,8 +111,8 @@ ..\Common\Src;..\Core\Src;..\..\..\Externals\SOIL;..\..\..\Externals\CLRun\include;%(AdditionalIncludeDirectories) - false - + false + true @@ -143,7 +143,7 @@ ..\Common\Src;..\Core\Src;..\..\..\Externals\SOIL;..\..\..\Externals\CLRun\include;%(AdditionalIncludeDirectories) false - + true true @@ -182,7 +182,6 @@ - @@ -204,7 +203,6 @@ - @@ -216,6 +214,8 @@ + + diff --git a/Source/Core/VideoCommon/VideoCommon.vcxproj.filters b/Source/Core/VideoCommon/VideoCommon.vcxproj.filters index e7019f17ed..6038d5e9e2 100644 --- a/Source/Core/VideoCommon/VideoCommon.vcxproj.filters +++ b/Source/Core/VideoCommon/VideoCommon.vcxproj.filters @@ -5,9 +5,6 @@ - - Vertex Loading - Vertex Loading @@ -92,9 +89,6 @@ Decoding - - Decoding - Base @@ -122,6 +116,12 @@ Util + + Decoding + + + Vertex Loading + diff --git a/Source/Plugins/Plugin_VideoDX9/Src/NativeVertexFormat.cpp b/Source/Plugins/Plugin_VideoDX9/Src/NativeVertexFormat.cpp index bfaf21ec7e..d68228273c 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/NativeVertexFormat.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/NativeVertexFormat.cpp @@ -19,7 +19,7 @@ #include "D3DBase.h" #include "x64Emitter.h" -#include "ABI.h" +#include "x64ABI.h" #include "MemoryUtil.h" #include "VertexShaderGen.h" diff --git a/Source/Plugins/Plugin_VideoOGL/Src/GLUtil.cpp b/Source/Plugins/Plugin_VideoOGL/Src/GLUtil.cpp index 29a1b6a662..01ea415263 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/GLUtil.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/GLUtil.cpp @@ -51,7 +51,9 @@ void VideoBackend::UpdateFPSDisplay(const char *text) } void InitInterface() { - #if defined(USE_EGL) && USE_EGL + #ifdef ANDROID + GLInterface = new cInterfaceBase; + #elif defined(USE_EGL) && USE_EGL GLInterface = new cInterfaceEGL; #elif defined(USE_WX) && USE_WX GLInterface = new cInterfaceWX; diff --git a/Source/Plugins/Plugin_VideoOGL/Src/NativeVertexFormat.cpp b/Source/Plugins/Plugin_VideoOGL/Src/NativeVertexFormat.cpp index ae20649433..0528bd1878 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/NativeVertexFormat.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/NativeVertexFormat.cpp @@ -17,7 +17,7 @@ #include "GLUtil.h" #include "x64Emitter.h" -#include "ABI.h" +#include "x64ABI.h" #include "MemoryUtil.h" #include "VertexShaderGen.h" diff --git a/Source/Plugins/Plugin_VideoOGL/Src/main.cpp b/Source/Plugins/Plugin_VideoOGL/Src/main.cpp index 05f21beed1..99e01bfb3d 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/main.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/main.cpp @@ -204,7 +204,9 @@ void VideoBackend::Video_Prepare() GL_REPORT_ERRORD(); VertexLoaderManager::Init(); TextureConverter::Init(); +#ifndef _M_GENERIC DLCache::Init(); +#endif // Notify the core that the video backend is ready Host_Message(WM_USER_CREATE); @@ -219,7 +221,9 @@ void VideoBackend::Shutdown() s_efbAccessRequested = false; s_FifoShuttingDown = false; s_swapRequested = false; +#ifndef _M_GENERIC DLCache::Shutdown(); +#endif Fifo_Shutdown(); PostProcessing::Shutdown(); diff --git a/Source/Plugins/Plugin_VideoSoftware/Src/OpcodeDecoder.cpp b/Source/Plugins/Plugin_VideoSoftware/Src/OpcodeDecoder.cpp index 0091e4eaca..032ff9d74a 100644 --- a/Source/Plugins/Plugin_VideoSoftware/Src/OpcodeDecoder.cpp +++ b/Source/Plugins/Plugin_VideoSoftware/Src/OpcodeDecoder.cpp @@ -125,7 +125,9 @@ void DecodeStandard(u32 bufferSize) if (Cmd == GX_NOP) return; - + // Causes a SIGBUS error on Android + // XXX: Investigate +#ifndef ANDROID // check if switching in or out of an object // only used for debuggging if (inObjectStream && (Cmd & 0x87) != lastPrimCmd) @@ -139,7 +141,7 @@ void DecodeStandard(u32 bufferSize) lastPrimCmd = Cmd & 0x87; DebugUtil::OnObjectBegin(); } - +#endif switch(Cmd) { case GX_NOP: diff --git a/Source/Plugins/Plugin_VideoSoftware/Src/SWCommandProcessor.cpp b/Source/Plugins/Plugin_VideoSoftware/Src/SWCommandProcessor.cpp index b9afc1abb5..72b26338b5 100644 --- a/Source/Plugins/Plugin_VideoSoftware/Src/SWCommandProcessor.cpp +++ b/Source/Plugins/Plugin_VideoSoftware/Src/SWCommandProcessor.cpp @@ -115,15 +115,15 @@ void RunGpu() if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread) { // We are going to do FP math on the main thread so have to save the current state - SaveSSEState(); - LoadDefaultSSEState(); + FPURoundMode::SaveSIMDState(); + FPURoundMode::LoadDefaultSIMDState(); // run the opcode decoder do { RunBuffer(); } while (cpreg.ctrl.GPReadEnable && !AtBreakpoint() && cpreg.readptr != cpreg.writeptr); - LoadSSEState(); + FPURoundMode::LoadSIMDState(); } }