Added libucontext for android & replaced x86 intrinsics with sse2neon implementations

This commit is contained in:
SSimco 2023-12-27 16:13:45 +02:00
parent 47091d836b
commit 8e666d32d0
15 changed files with 9278 additions and 59 deletions

3
.gitmodules vendored
View File

@ -16,3 +16,6 @@
[submodule "dependencies/imgui"]
path = dependencies/imgui
url = https://github.com/ocornut/imgui
[submodule "dependencies/libucontext"]
path = dependencies/libucontext
url = https://github.com/SSimco/libucontext

View File

@ -140,6 +140,7 @@ find_package(pugixml REQUIRED)
find_package(RapidJSON REQUIRED)
find_package(Boost COMPONENTS program_options filesystem nowide REQUIRED)
if(ANDROID)
add_subdirectory(dependencies/libucontext EXCLUDE_FROM_ALL)
find_package(Boost COMPONENTS context iostreams REQUIRED)
endif()
find_package(libzip REQUIRED)

1
dependencies/libucontext vendored Submodule

@ -0,0 +1 @@
Subproject commit be80075e957c4a61a6415c280802fea9001201a2

View File

@ -338,7 +338,6 @@ void LatteShaderCache_Load()
if (g_renderer->GetType() == RendererAPI::Vulkan)
LatteShaderCache_LoadVulkanPipelineCache(cacheTitleId);
#if !__ANDROID__
g_renderer->BeginFrame(true);
if (g_renderer->ImguiBegin(true))
{
@ -351,7 +350,6 @@ void LatteShaderCache_Load()
LatteShaderCache_drawBackgroundImage(g_shaderCacheLoaderState.textureDRCId, 854, 480);
g_renderer->ImguiEnd();
}
#endif // __ANDROID__
g_renderer->SwapBuffers(true, true);
if (g_shaderCacheLoaderState.textureTVId)

View File

@ -2,9 +2,6 @@
#include "Cafe/HW/Latte/LatteAddrLib/LatteAddrLib.h"
#include "Cafe/OS/libs/gx2/GX2_Surface.h"
#include <bit>
#if __ANDROID__
#include <boost/core/bit.hpp>
#endif
/*
Info:
@ -75,11 +72,7 @@ namespace LatteAddrLib
uint32 NextPow2(uint32 dim)
{
#if __ANDROID__
return boost::core::bit_ceil(dim);
#else
return std::bit_ceil<uint32>(dim);
#endif
}
uint32 GetBitsPerPixel(E_HWSURFFMT format, uint32* pElemMode, uint32* pExpandX, uint32* pExpandY)

View File

@ -1123,9 +1123,7 @@ namespace coreinit
{
OSHostThread* hostThread = (OSHostThread*)_thread;
#if defined(ARCH_X86_64)
_mm_setcsr(_mm_getcsr() | 0x8000); // flush denormals to zero
#endif
PPCInterpreter_t* hCPU = &hostThread->ppcInstance;
__OSLoadThread(hostThread->m_thread, hCPU, hostThread->selectedCore);
@ -1157,9 +1155,7 @@ namespace coreinit
{
SetThreadName(fmt::format("OSSchedulerThread[core={}]", (uintptr_t)_assignedCoreIndex).c_str());
t_assignedCoreIndex = (sint32)(uintptr_t)_assignedCoreIndex;
#if defined(ARCH_X86_64)
_mm_setcsr(_mm_getcsr() | 0x8000); // flush denormals to zero
#endif
t_schedulerFiber = Fiber::PrepareCurrentThread();
// create scheduler idle fiber and switch to it

View File

@ -61,6 +61,17 @@ CPUFeaturesImpl::CPUFeaturesImpl()
memcpy(m_cpuBrandName + 32, cpuInfo, sizeof(cpuInfo));
}
#endif
#if defined(__aarch64__)
x86.ssse3 = true;
x86.sse4_1 = true;
x86.avx = true;
x86.avx2 = true;
x86.lzcnt = true;
x86.movbe = true;
x86.bmi2 = true;
x86.aesni = true;
x86.invariant_tsc = true;
#endif
}
std::string CPUFeaturesImpl::GetCPUName()

View File

@ -41,6 +41,11 @@
#include <immintrin.h>
#endif
#if defined(__aarch64__)
#include "sse2neon.h"
#endif
// c++ includes
#include <string>
#include <string_view>
@ -332,23 +337,6 @@ inline uint64 _udiv128(uint64 highDividend, uint64 lowDividend, uint64 divisor,
// On aarch64 we handle some of the x86 intrinsics by implementing them as wrappers
#if defined(__aarch64__)
inline void _mm_pause()
{
asm volatile("yield");
}
inline uint64 __rdtsc()
{
uint64 t;
asm volatile("mrs %0, cntvct_el0" : "=r" (t));
return t;
}
inline void _mm_mfence()
{
}
inline unsigned char _addcarry_u64(unsigned char carry, unsigned long long a, unsigned long long b, unsigned long long *result)
{
*result = a + b + (unsigned long long)carry;
@ -516,24 +504,16 @@ inline std::string_view _utf8Wrapper(std::u8string_view input)
// convert fs::path to utf8 encoded string
inline std::string _pathToUtf8(const fs::path& path)
{
#if __ANDROID__
return path.generic_string();
#else
std::u8string strU8 = path.generic_u8string();
std::string v((const char*)strU8.data(), strU8.size());
return v;
#endif // __ANDROID__
}
// convert utf8 encoded string to fs::path
inline fs::path _utf8ToPath(std::string_view input)
{
#if __ANDROID__
return fs::path(input);
#else
std::basic_string_view<char8_t> v((char8_t*)input.data(), input.size());
return fs::path(v);
#endif // __ANDROID__
}
// locale-independent variant of tolower() which also matches Wii U behavior

9236
src/Common/sse2neon.h Normal file

File diff suppressed because it is too large Load Diff

View File

@ -5,7 +5,7 @@ plugins {
android {
namespace 'info.cemu.Cemu'
compileSdk 34
ndkVersion '25.2.9519653'
ndkVersion '26.1.10909125'
defaultConfig {
applicationId "info.cemu.Cemu"
minSdk 30
@ -47,6 +47,7 @@ android {
'-DBUNDLE_SPEEX=ON',
'-DENABLE_DISCORD_RPC=OFF',
'-DENABLE_NSYSHID_LIBUSB=OFF',
'-DENABLE_HIDAPI=OFF',
'-DENABLE_WAYLAND=OFF',
)
// abiFilters("x86_64", "arm64-v8a")

View File

@ -118,9 +118,7 @@ class EmulationState
int wpadCount = 0;
for (int i = 0; i < InputManager::kMaxController; i++)
{
auto emulatedController = AndroidEmulatedController::getAndroidEmulatedController(
i)
.getEmulatedController();
auto emulatedController = AndroidEmulatedController::getAndroidEmulatedController(i).getEmulatedController();
if (!emulatedController)
continue;
if (emulatedController->type() != EmulatedController::Type::VPAD)

View File

@ -74,11 +74,7 @@ if(WIN32)
target_sources(CemuUtil PRIVATE MemMapper/MemMapperWin.cpp)
target_sources(CemuUtil PRIVATE SystemInfo/SystemInfoWin.cpp)
elseif(UNIX)
if(ANDROID)
target_sources(CemuUtil PRIVATE Fiber/FiberBoost.cpp)
else()
target_sources(CemuUtil PRIVATE Fiber/FiberUnix.cpp)
endif()
target_sources(CemuUtil PRIVATE Fiber/FiberUnix.cpp)
target_sources(CemuUtil PRIVATE MemMapper/MemMapperUnix.cpp)
target_sources(CemuUtil PRIVATE SystemInfo/SystemInfoUnix.cpp)
if(NOT APPLE)
@ -95,7 +91,7 @@ set_property(TARGET CemuUtil PROPERTY MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CON
target_include_directories(CemuUtil PUBLIC "../")
if(ANDROID)
target_link_libraries(CemuUtil PRIVATE Boost::context)
target_link_libraries(CemuUtil PRIVATE libucontext)
endif()
target_link_libraries(CemuUtil PRIVATE

View File

@ -1,12 +1,21 @@
#include "Fiber.h"
#if __ANDROID__
#include <libucontext/libucontext.h>
using _ucontext_t = libucontext_ucontext_t;
constexpr auto& swapcontext = libucontext_swapcontext;
constexpr auto& getcontext = libucontext_getcontext;
constexpr auto& makecontext = libucontext_makecontext;
#else
#include <ucontext.h>
using _ucontext_t = ucontext_t;
#endif
#include <atomic>
thread_local Fiber* sCurrentFiber{};
Fiber::Fiber(void(*FiberEntryPoint)(void* userParam), void* userParam, void* privateData) : m_privateData(privateData)
{
ucontext_t* ctx = (ucontext_t*)malloc(sizeof(ucontext_t));
_ucontext_t* ctx = (_ucontext_t*)malloc(sizeof(_ucontext_t));
const size_t stackSize = 2 * 1024 * 1024;
m_stackPtr = malloc(stackSize);
@ -21,7 +30,7 @@ Fiber::Fiber(void(*FiberEntryPoint)(void* userParam), void* userParam, void* pri
Fiber::Fiber(void* privateData) : m_privateData(privateData)
{
ucontext_t* ctx = (ucontext_t*)malloc(sizeof(ucontext_t));
_ucontext_t* ctx = (_ucontext_t*)malloc(sizeof(_ucontext_t));
getcontext(ctx);
this->m_implData = (void*)ctx;
m_stackPtr = nullptr;
@ -46,7 +55,7 @@ void Fiber::Switch(Fiber& targetFiber)
Fiber* leavingFiber = sCurrentFiber;
sCurrentFiber = &targetFiber;
std::atomic_thread_fence(std::memory_order_seq_cst);
swapcontext((ucontext_t*)(leavingFiber->m_implData), (ucontext_t*)(targetFiber.m_implData));
swapcontext((_ucontext_t*)(leavingFiber->m_implData), (_ucontext_t*)(targetFiber.m_implData));
std::atomic_thread_fence(std::memory_order_seq_cst);
}

View File

@ -600,7 +600,6 @@ void AES128_CBC_decrypt_updateIV(uint8* output, uint8* input, uint32 length, con
memcpy(iv, newIv, KEYLEN);
}
#if defined(ARCH_X86_64)
ATTRIBUTE_AESNI inline __m128i AESNI128_ASSIST(
__m128i temp1,
__m128i temp2)
@ -792,7 +791,6 @@ ATTRIBUTE_AESNI void __aesni__AES128_ECB_encrypt(uint8* input, const uint8* key,
feedback = _mm_aesenclast_si128(feedback, ((__m128i*)expandedKey)[10]);
_mm_storeu_si128(&((__m128i*)output)[0], feedback);
}
#endif
void(*AES128_ECB_encrypt)(uint8* input, const uint8* key, uint8* output);
void (*AES128_CBC_decrypt)(uint8* output, uint8* input, uint32 length, const uint8* key, const uint8* iv) = nullptr;
@ -837,7 +835,6 @@ void AES128_init()
lookupTable_multiply[i] = (vE << 0) | (v9 << 8) | (vD << 16) | (vB << 24);
}
// check if AES-NI is available
#if defined(ARCH_X86_64)
if (g_CPUFeatures.x86.aesni)
{
// AES-NI implementation
@ -850,8 +847,4 @@ void AES128_init()
AES128_CBC_decrypt = __soft__AES128_CBC_decrypt;
AES128_ECB_encrypt = __soft__AES128_ECB_encrypt;
}
#else
AES128_CBC_decrypt = __soft__AES128_CBC_decrypt;
AES128_ECB_encrypt = __soft__AES128_ECB_encrypt;
#endif
}

View File

@ -41,7 +41,10 @@
},
"boost-random",
"fmt",
"hidapi",
{
"name": "hidapi",
"platform": "!android"
},
"libpng",
"glm",
{