mirror of
https://github.com/libretro/ppsspp.git
synced 2024-11-25 01:00:01 +00:00
Merge pull request #7046 from unknownbrackets/perf
Improve load and shutdown performance a bit
This commit is contained in:
commit
f1d43e7e90
@ -28,6 +28,15 @@
|
||||
// - Serialization code for anything complex has to be manually written.
|
||||
|
||||
#include <map>
|
||||
#ifdef IOS
|
||||
#include <tr1/unordered_map>
|
||||
namespace std {
|
||||
using std::tr1::unordered_map;
|
||||
using std::tr1::unordered_multimap;
|
||||
}
|
||||
#else
|
||||
#include <unordered_map>
|
||||
#endif
|
||||
#include <deque>
|
||||
#include <list>
|
||||
#include <set>
|
||||
@ -178,7 +187,29 @@ public:
|
||||
}
|
||||
|
||||
template<class K, class T>
|
||||
void DoMap(std::map<K, T> &x, T &default_val)
|
||||
void Do(std::unordered_map<K, T *> &x)
|
||||
{
|
||||
if (mode == MODE_READ)
|
||||
{
|
||||
for (auto it = x.begin(), end = x.end(); it != end; ++it)
|
||||
{
|
||||
if (it->second != NULL)
|
||||
delete it->second;
|
||||
}
|
||||
}
|
||||
T *dv = NULL;
|
||||
DoMap(x, dv);
|
||||
}
|
||||
|
||||
template<class K, class T>
|
||||
void Do(std::unordered_map<K, T> &x)
|
||||
{
|
||||
T dv = T();
|
||||
DoMap(x, dv);
|
||||
}
|
||||
|
||||
template<class M>
|
||||
void DoMap(M &x, typename M::mapped_type &default_val)
|
||||
{
|
||||
unsigned int number = (unsigned int)x.size();
|
||||
Do(number);
|
||||
@ -188,9 +219,9 @@ public:
|
||||
x.clear();
|
||||
while (number > 0)
|
||||
{
|
||||
K first = K();
|
||||
typename M::key_type first = typename M::key_type();
|
||||
Do(first);
|
||||
T second = default_val;
|
||||
typename M::mapped_type second = default_val;
|
||||
Do(second);
|
||||
x[first] = second;
|
||||
--number;
|
||||
@ -201,10 +232,10 @@ public:
|
||||
case MODE_MEASURE:
|
||||
case MODE_VERIFY:
|
||||
{
|
||||
typename std::map<K, T>::iterator itr = x.begin();
|
||||
typename M::iterator itr = x.begin();
|
||||
while (number > 0)
|
||||
{
|
||||
K first = itr->first;
|
||||
typename M::key_type first = itr->first;
|
||||
Do(first);
|
||||
Do(itr->second);
|
||||
--number;
|
||||
@ -238,7 +269,29 @@ public:
|
||||
}
|
||||
|
||||
template<class K, class T>
|
||||
void DoMultimap(std::multimap<K, T> &x, T &default_val)
|
||||
void Do(std::unordered_multimap<K, T *> &x)
|
||||
{
|
||||
if (mode == MODE_READ)
|
||||
{
|
||||
for (auto it = x.begin(), end = x.end(); it != end; ++it)
|
||||
{
|
||||
if (it->second != NULL)
|
||||
delete it->second;
|
||||
}
|
||||
}
|
||||
T *dv = NULL;
|
||||
DoMultimap(x, dv);
|
||||
}
|
||||
|
||||
template<class K, class T>
|
||||
void Do(std::unordered_multimap<K, T> &x)
|
||||
{
|
||||
T dv = T();
|
||||
DoMultimap(x, dv);
|
||||
}
|
||||
|
||||
template<class M>
|
||||
void DoMultimap(M &x, typename M::mapped_type &default_val)
|
||||
{
|
||||
unsigned int number = (unsigned int)x.size();
|
||||
Do(number);
|
||||
@ -248,9 +301,9 @@ public:
|
||||
x.clear();
|
||||
while (number > 0)
|
||||
{
|
||||
K first = K();
|
||||
typename M::key_type first = typename M::key_type();
|
||||
Do(first);
|
||||
T second = default_val;
|
||||
typename M::mapped_type second = default_val;
|
||||
Do(second);
|
||||
x.insert(std::make_pair(first, second));
|
||||
--number;
|
||||
@ -261,7 +314,7 @@ public:
|
||||
case MODE_MEASURE:
|
||||
case MODE_VERIFY:
|
||||
{
|
||||
typename std::multimap<K, T>::iterator itr = x.begin();
|
||||
typename M::iterator itr = x.begin();
|
||||
while (number > 0)
|
||||
{
|
||||
Do(itr->first);
|
||||
|
@ -20,6 +20,7 @@
|
||||
#include "Core/FileSystems/BlockDevices.h"
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include <algorithm>
|
||||
|
||||
extern "C"
|
||||
{
|
||||
@ -99,6 +100,18 @@ bool FileBlockDevice::ReadBlock(int blockNumber, u8 *outPtr)
|
||||
lseek64(fd, (u64)blockNumber * (u64)GetBlockSize(), SEEK_SET);
|
||||
if (read(fd, outPtr, 2048) != 2048) {
|
||||
ERROR_LOG(FILESYS, "Could not read() 2048 bytes from block");
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool FileBlockDevice::ReadBlocks(u32 minBlock, int count, u8 *outPtr)
|
||||
{
|
||||
lseek64(fd, (u64)minBlock * (u64)GetBlockSize(), SEEK_SET);
|
||||
const s32 bytes = GetBlockSize() * count;
|
||||
if (read(fd, outPtr, bytes) != bytes) {
|
||||
ERROR_LOG(FILESYS, "Could not read() %d bytes from block", bytes);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@ -121,12 +134,24 @@ FileBlockDevice::~FileBlockDevice()
|
||||
bool FileBlockDevice::ReadBlock(int blockNumber, u8 *outPtr)
|
||||
{
|
||||
fseeko(f, (u64)blockNumber * (u64)GetBlockSize(), SEEK_SET);
|
||||
if (fread(outPtr, 1, 2048, f) != 2048)
|
||||
if (fread(outPtr, 1, 2048, f) != 2048) {
|
||||
DEBUG_LOG(FILESYS, "Could not read 2048 bytes from block");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool FileBlockDevice::ReadBlocks(u32 minBlock, int count, u8 *outPtr)
|
||||
{
|
||||
fseeko(f, (u64)minBlock * (u64)GetBlockSize(), SEEK_SET);
|
||||
if (fread(outPtr, 2048, count, f) != count) {
|
||||
ERROR_LOG(FILESYS, "Could not read %d bytes from block", 2048 * count);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
// .CSO format
|
||||
@ -157,6 +182,8 @@ typedef struct ciso_header
|
||||
|
||||
// TODO: Need much better error handling.
|
||||
|
||||
static const u32 CSO_READ_BUFFER_SIZE = 256 * 1024;
|
||||
|
||||
CISOFileBlockDevice::CISOFileBlockDevice(FILE *file)
|
||||
: f(file)
|
||||
{
|
||||
@ -197,7 +224,10 @@ CISOFileBlockDevice::CISOFileBlockDevice(FILE *file)
|
||||
VERBOSE_LOG(LOADER, "CSO numBlocks=%i numFrames=%i align=%i", numBlocks, numFrames, indexShift);
|
||||
|
||||
// We might read a bit of alignment too, so be prepared.
|
||||
readBuffer = new u8[frameSize + (1 << indexShift)];
|
||||
if (frameSize + (1 << indexShift) < CSO_READ_BUFFER_SIZE)
|
||||
readBuffer = new u8[CSO_READ_BUFFER_SIZE];
|
||||
else
|
||||
readBuffer = new u8[frameSize + (1 << indexShift)];
|
||||
zlibBuffer = new u8[frameSize + (1 << indexShift)];
|
||||
zlibBufferFrame = numFrames;
|
||||
|
||||
@ -279,7 +309,7 @@ bool CISOFileBlockDevice::ReadBlock(int blockNumber, u8 *outPtr)
|
||||
return false;
|
||||
}
|
||||
z.avail_in = readSize;
|
||||
z.next_out = frameSize == GetBlockSize() ? outPtr : zlibBuffer;
|
||||
z.next_out = frameSize == (u32)GetBlockSize() ? outPtr : zlibBuffer;
|
||||
z.avail_out = frameSize;
|
||||
z.next_in = readBuffer;
|
||||
|
||||
@ -300,7 +330,7 @@ bool CISOFileBlockDevice::ReadBlock(int blockNumber, u8 *outPtr)
|
||||
}
|
||||
inflateEnd(&z);
|
||||
|
||||
if (frameSize != GetBlockSize())
|
||||
if (frameSize != (u32)GetBlockSize())
|
||||
{
|
||||
zlibBufferFrame = frameNumber;
|
||||
memcpy(outPtr, zlibBuffer + compressedOffset, GetBlockSize());
|
||||
@ -309,6 +339,96 @@ bool CISOFileBlockDevice::ReadBlock(int blockNumber, u8 *outPtr)
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CISOFileBlockDevice::ReadBlocks(u32 minBlock, int count, u8 *outPtr) {
|
||||
if (count == 1) {
|
||||
return ReadBlock(minBlock, outPtr);
|
||||
}
|
||||
if (minBlock >= numBlocks) {
|
||||
memset(outPtr, 0, GetBlockSize() * count);
|
||||
return false;
|
||||
}
|
||||
|
||||
const u32 lastBlock = std::min(minBlock + count, numBlocks) - 1;
|
||||
const u32 missingBlocks = (lastBlock + 1 - minBlock) - count;
|
||||
if (lastBlock < minBlock + count) {
|
||||
memset(outPtr + GetBlockSize() * (count - missingBlocks), 0, GetBlockSize() * missingBlocks);
|
||||
}
|
||||
|
||||
const u32 minFrameNumber = minBlock >> blockShift;
|
||||
const u32 lastFrameNumber = lastBlock >> blockShift;
|
||||
const u32 afterLastIndexPos = index[lastFrameNumber + 1] & 0x7FFFFFFF;
|
||||
const u64 totalReadEnd = (u64)afterLastIndexPos << indexShift;
|
||||
|
||||
z_stream z;
|
||||
z.zalloc = Z_NULL;
|
||||
z.zfree = Z_NULL;
|
||||
z.opaque = Z_NULL;
|
||||
if (inflateInit2(&z, -15) != Z_OK) {
|
||||
ERROR_LOG(LOADER, "Unable to initialize inflate: %s\n", (z.msg) ? z.msg : "?");
|
||||
return false;
|
||||
}
|
||||
|
||||
u64 readBufferStart = 0;
|
||||
u64 readBufferEnd = 0;
|
||||
u32 block = minBlock;
|
||||
const u32 blocksPerFrame = 1 << blockShift;
|
||||
for (u32 frame = minFrameNumber; frame <= lastFrameNumber; ++frame) {
|
||||
const u32 idx = index[frame];
|
||||
const u32 indexPos = idx & 0x7FFFFFFF;
|
||||
const u32 nextIndexPos = index[frame + 1] & 0x7FFFFFFF;
|
||||
|
||||
const u64 frameReadPos = (u64)indexPos << indexShift;
|
||||
const u64 frameReadEnd = (u64)nextIndexPos << indexShift;
|
||||
const u32 frameReadSize = (u32)(frameReadEnd - frameReadPos);
|
||||
const u32 frameBlockOffset = block & ((1 << blockShift) - 1);
|
||||
const u32 frameBlocks = std::min(lastBlock - block + 1, blocksPerFrame) - frameBlockOffset;
|
||||
|
||||
if (frameReadEnd > readBufferEnd) {
|
||||
const size_t maxNeeded = totalReadEnd - frameReadPos;
|
||||
const size_t chunkSize = std::min(maxNeeded, (size_t)std::max(frameReadSize, CSO_READ_BUFFER_SIZE));
|
||||
|
||||
fseeko(f, frameReadPos, SEEK_SET);
|
||||
const u32 readSize = (u32)fread(readBuffer, 1, chunkSize, f);
|
||||
if (readSize < chunkSize) {
|
||||
memset(readBuffer + readSize, 0, chunkSize - readSize);
|
||||
}
|
||||
|
||||
readBufferStart = frameReadPos;
|
||||
readBufferEnd = frameReadPos + readSize;
|
||||
}
|
||||
|
||||
u8 *rawBuffer = &readBuffer[frameReadPos - readBufferStart];
|
||||
const int plain = idx & 0x80000000;
|
||||
if (plain) {
|
||||
memcpy(outPtr, rawBuffer + frameBlockOffset * GetBlockSize(), frameBlocks * GetBlockSize());
|
||||
} else {
|
||||
z.avail_in = frameReadSize;
|
||||
z.next_out = frameBlocks == blocksPerFrame ? outPtr : zlibBuffer;
|
||||
z.avail_out = frameSize;
|
||||
z.next_in = rawBuffer;
|
||||
|
||||
int status = inflate(&z, Z_FINISH);
|
||||
if (status != Z_STREAM_END) {
|
||||
ERROR_LOG(LOADER, "Inflate frame %d: failed - %s[%d]\n", frame, (z.msg) ? z.msg : "error", status);
|
||||
memset(outPtr, 0, frameBlocks * GetBlockSize());
|
||||
} else if (z.total_out != frameSize) {
|
||||
ERROR_LOG(LOADER, "Inflate frame %d: block size error %d != %d\n", frame, (u32)z.total_out, frameSize);
|
||||
memset(outPtr, 0, frameBlocks * GetBlockSize());
|
||||
} else if (frameBlocks != blocksPerFrame) {
|
||||
memcpy(outPtr, zlibBuffer + frameBlockOffset * GetBlockSize(), frameBlocks * GetBlockSize());
|
||||
// In case we end up reusing it in a single read later.
|
||||
zlibBufferFrame = frame;
|
||||
}
|
||||
|
||||
inflateReset(&z);
|
||||
}
|
||||
|
||||
block += frameBlocks;
|
||||
outPtr += frameBlocks * GetBlockSize();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
NPDRMDemoBlockDevice::NPDRMDemoBlockDevice(FILE *file)
|
||||
: f(file)
|
||||
|
@ -31,6 +31,15 @@ class BlockDevice
|
||||
public:
|
||||
virtual ~BlockDevice() {}
|
||||
virtual bool ReadBlock(int blockNumber, u8 *outPtr) = 0;
|
||||
virtual bool ReadBlocks(u32 minBlock, int count, u8 *outPtr) {
|
||||
for (int b = 0; b < count; ++b) {
|
||||
if (!ReadBlock(minBlock + b, outPtr)) {
|
||||
return false;
|
||||
}
|
||||
outPtr += GetBlockSize();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
int GetBlockSize() const { return 2048;} // forced, it cannot be changed by subclasses
|
||||
virtual u32 GetNumBlocks() = 0;
|
||||
};
|
||||
@ -41,7 +50,8 @@ class CISOFileBlockDevice : public BlockDevice
|
||||
public:
|
||||
CISOFileBlockDevice(FILE *file);
|
||||
~CISOFileBlockDevice();
|
||||
bool ReadBlock(int blockNumber, u8 *outPtr);
|
||||
bool ReadBlock(int blockNumber, u8 *outPtr) override;
|
||||
bool ReadBlocks(u32 minBlock, int count, u8 *outPtr) override;
|
||||
u32 GetNumBlocks() { return numBlocks;}
|
||||
|
||||
private:
|
||||
@ -64,6 +74,7 @@ public:
|
||||
FileBlockDevice(FILE *file);
|
||||
~FileBlockDevice();
|
||||
bool ReadBlock(int blockNumber, u8 *outPtr) override;
|
||||
bool ReadBlocks(u32 minBlock, int count, u8 *outPtr) override;
|
||||
u32 GetNumBlocks() override {return (u32)(filesize / GetBlockSize());}
|
||||
|
||||
private:
|
||||
|
@ -18,6 +18,7 @@
|
||||
#include <cstring>
|
||||
#include <cstdio>
|
||||
#include <ctype.h>
|
||||
#include <algorithm>
|
||||
|
||||
#include "Common/Common.h"
|
||||
#include "Common/CommonTypes.h"
|
||||
@ -494,10 +495,10 @@ int ISOFileSystem::Ioctl(u32 handle, u32 cmd, u32 indataPtr, u32 inlen, u32 outd
|
||||
u32 size = (u32)desc.pathTableLengthLE;
|
||||
u8 *out = Memory::GetPointer(outdataPtr);
|
||||
|
||||
while (size >= 2048) {
|
||||
blockDevice->ReadBlock(block++, out);
|
||||
out += 2048;
|
||||
}
|
||||
int blocks = size / blockDevice->GetBlockSize();
|
||||
blockDevice->ReadBlocks(block, blocks, out);
|
||||
size -= blocks * blockDevice->GetBlockSize();
|
||||
out += blocks * blockDevice->GetBlockSize();
|
||||
|
||||
// The remaining (or, usually, only) partial sector.
|
||||
if (size > 0) {
|
||||
@ -527,12 +528,9 @@ size_t ISOFileSystem::ReadFile(u32 handle, u8 *pointer, s64 size)
|
||||
if (e.isBlockSectorMode)
|
||||
{
|
||||
// Whole sectors! Shortcut to this simple code.
|
||||
for (int i = 0; i < size; i++)
|
||||
{
|
||||
blockDevice->ReadBlock(e.seekPos, pointer + i * 2048);
|
||||
e.seekPos++;
|
||||
}
|
||||
return (size_t)size;
|
||||
blockDevice->ReadBlocks(e.seekPos, (int)size, pointer);
|
||||
e.seekPos += (int)size;
|
||||
return (int)size;
|
||||
}
|
||||
|
||||
u32 positionOnIso;
|
||||
@ -559,29 +557,37 @@ size_t ISOFileSystem::ReadFile(u32 handle, u8 *pointer, s64 size)
|
||||
}
|
||||
//okay, we have size and position, let's rock
|
||||
|
||||
u32 totalRead = 0;
|
||||
const int firstBlockOffset = positionOnIso & 2047;
|
||||
const int firstBlockSize = firstBlockOffset == 0 ? 0 : (int)std::min(size, 2048LL - firstBlockOffset);
|
||||
const int lastBlockSize = (size - firstBlockSize) & 2047;
|
||||
const s64 middleSize = size - firstBlockSize - lastBlockSize;
|
||||
int secNum = positionOnIso / 2048;
|
||||
int posInSector = positionOnIso & 2047;
|
||||
s64 remain = size;
|
||||
|
||||
u8 theSector[2048];
|
||||
|
||||
while (remain > 0)
|
||||
{
|
||||
blockDevice->ReadBlock(secNum, theSector);
|
||||
size_t bytesToCopy = 2048 - posInSector;
|
||||
if ((s64)bytesToCopy > remain)
|
||||
bytesToCopy = (size_t)remain;
|
||||
_dbg_assert_msg_(FILESYS, (middleSize & 2047) == 0, "Remaining size should be aligned");
|
||||
|
||||
memcpy(pointer, theSector + posInSector, bytesToCopy);
|
||||
totalRead += (u32)bytesToCopy;
|
||||
pointer += bytesToCopy;
|
||||
remain -= bytesToCopy;
|
||||
posInSector = 0;
|
||||
secNum++;
|
||||
if (firstBlockSize != 0)
|
||||
{
|
||||
blockDevice->ReadBlock(secNum++, theSector);
|
||||
memcpy(pointer, theSector + firstBlockOffset, firstBlockSize);
|
||||
pointer += firstBlockSize;
|
||||
}
|
||||
if (middleSize != 0)
|
||||
{
|
||||
const u32 sectors = (u32)(middleSize / 2048);
|
||||
blockDevice->ReadBlocks(secNum, sectors, pointer);
|
||||
secNum += sectors;
|
||||
pointer += middleSize;
|
||||
}
|
||||
if (lastBlockSize != 0)
|
||||
{
|
||||
blockDevice->ReadBlock(secNum++, theSector);
|
||||
memcpy(pointer, theSector, lastBlockSize);
|
||||
pointer += lastBlockSize;
|
||||
}
|
||||
|
||||
e.seekPos += (unsigned int)size;
|
||||
return totalRead;
|
||||
return (size_t)size;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -58,9 +58,6 @@ enum
|
||||
HLE_AFTER_SKIP_DEADBEEF = 0x40,
|
||||
};
|
||||
|
||||
typedef std::vector<Syscall> SyscallVector;
|
||||
typedef std::map<std::string, SyscallVector> SyscallVectorByModule;
|
||||
|
||||
static std::vector<HLEModule> moduleDB;
|
||||
static int delayedResultEvent = -1;
|
||||
static int hleAfterSyscall = HLE_AFTER_NOTHING;
|
||||
|
@ -114,7 +114,7 @@ struct VarSymbolImport {
|
||||
|
||||
struct VarSymbolExport {
|
||||
bool Matches(const VarSymbolImport &other) const {
|
||||
return !strncmp(moduleName, other.moduleName, KERNELOBJECT_MAX_NAME_LENGTH) && nid == other.nid;
|
||||
return nid == other.nid && !strncmp(moduleName, other.moduleName, KERNELOBJECT_MAX_NAME_LENGTH);
|
||||
}
|
||||
|
||||
char moduleName[KERNELOBJECT_MAX_NAME_LENGTH + 1];
|
||||
@ -130,7 +130,7 @@ struct FuncSymbolImport {
|
||||
|
||||
struct FuncSymbolExport {
|
||||
bool Matches(const FuncSymbolImport &other) const {
|
||||
return !strncmp(moduleName, other.moduleName, KERNELOBJECT_MAX_NAME_LENGTH) && nid == other.nid;
|
||||
return nid == other.nid && !strncmp(moduleName, other.moduleName, KERNELOBJECT_MAX_NAME_LENGTH);
|
||||
}
|
||||
|
||||
char moduleName[KERNELOBJECT_MAX_NAME_LENGTH + 1];
|
||||
|
@ -17,6 +17,15 @@
|
||||
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
#ifdef IOS
|
||||
#include <tr1/unordered_map>
|
||||
namespace std {
|
||||
using std::tr1::unordered_map;
|
||||
using std::tr1::unordered_multimap;
|
||||
}
|
||||
#else
|
||||
#include <unordered_map>
|
||||
#endif
|
||||
#include "Common/ChunkFile.h"
|
||||
#include "Core/MemMap.h"
|
||||
#include "Core/HLE/HLE.h"
|
||||
@ -155,7 +164,7 @@ struct LwMutex : public KernelObject
|
||||
static int mutexWaitTimer = -1;
|
||||
static int lwMutexWaitTimer = -1;
|
||||
// Thread -> Mutex locks for thread end.
|
||||
typedef std::multimap<SceUID, SceUID> MutexMap;
|
||||
typedef std::unordered_multimap<SceUID, SceUID> MutexMap;
|
||||
static MutexMap mutexHeldLocks;
|
||||
|
||||
void __KernelMutexBeginCallback(SceUID threadID, SceUID prevCallbackId);
|
||||
@ -204,7 +213,7 @@ void __KernelMutexShutdown()
|
||||
void __KernelMutexAcquireLock(Mutex *mutex, int count, SceUID thread)
|
||||
{
|
||||
#if defined(_DEBUG)
|
||||
std::pair<MutexMap::iterator, MutexMap::iterator> locked = mutexHeldLocks.equal_range(thread);
|
||||
auto locked = mutexHeldLocks.equal_range(thread);
|
||||
for (MutexMap::iterator iter = locked.first; iter != locked.second; ++iter)
|
||||
_dbg_assert_msg_(SCEKERNEL, (*iter).second != mutex->GetUID(), "Thread %d / mutex %d wasn't removed from mutexHeldLocks properly.", thread, mutex->GetUID());
|
||||
#endif
|
||||
|
@ -222,12 +222,12 @@ void JitBlockCache::RemoveBlockMap(int block_num) {
|
||||
|
||||
const u32 pAddr = b.originalAddress & 0x1FFFFFFF;
|
||||
auto it = block_map_.find(std::make_pair(pAddr + 4 * b.originalSize, pAddr));
|
||||
if (it != block_map_.end() && it->second == block_num) {
|
||||
if (it != block_map_.end() && it->second == (u32)block_num) {
|
||||
block_map_.erase(it);
|
||||
} else {
|
||||
// It wasn't in there, or it has the wrong key. Let's search...
|
||||
for (auto it = block_map_.begin(); it != block_map_.end(); ++it) {
|
||||
if (it->second == block_num) {
|
||||
if (it->second == (u32)block_num) {
|
||||
block_map_.erase(it);
|
||||
break;
|
||||
}
|
||||
@ -253,7 +253,7 @@ void JitBlockCache::FinalizeBlock(int block_num, bool block_link) {
|
||||
if (block_link) {
|
||||
for (int i = 0; i < MAX_JIT_BLOCK_EXITS; i++) {
|
||||
if (b.exitAddress[i] != INVALID_EXIT) {
|
||||
links_to_.insert(std::pair<u32, int>(b.exitAddress[i], block_num));
|
||||
links_to_.insert(std::make_pair(b.exitAddress[i], block_num));
|
||||
latestExit = std::max(latestExit, b.exitAddress[i]);
|
||||
}
|
||||
}
|
||||
@ -439,29 +439,25 @@ void JitBlockCache::LinkBlockExits(int i) {
|
||||
}
|
||||
|
||||
void JitBlockCache::LinkBlock(int i) {
|
||||
using namespace std;
|
||||
LinkBlockExits(i);
|
||||
JitBlock &b = blocks_[i];
|
||||
pair<multimap<u32, int>::iterator, multimap<u32, int>::iterator> ppp;
|
||||
// equal_range(b) returns pair<iterator,iterator> representing the range
|
||||
// of element with key b
|
||||
ppp = links_to_.equal_range(b.originalAddress);
|
||||
auto ppp = links_to_.equal_range(b.originalAddress);
|
||||
if (ppp.first == ppp.second)
|
||||
return;
|
||||
for (multimap<u32, int>::iterator iter = ppp.first; iter != ppp.second; ++iter) {
|
||||
for (auto iter = ppp.first; iter != ppp.second; ++iter) {
|
||||
// PanicAlert("Linking block %i to block %i", iter->second, i);
|
||||
LinkBlockExits(iter->second);
|
||||
}
|
||||
}
|
||||
|
||||
void JitBlockCache::UnlinkBlock(int i) {
|
||||
using namespace std;
|
||||
JitBlock &b = blocks_[i];
|
||||
pair<multimap<u32, int>::iterator, multimap<u32, int>::iterator> ppp;
|
||||
ppp = links_to_.equal_range(b.originalAddress);
|
||||
auto ppp = links_to_.equal_range(b.originalAddress);
|
||||
if (ppp.first == ppp.second)
|
||||
return;
|
||||
for (multimap<u32, int>::iterator iter = ppp.first; iter != ppp.second; ++iter) {
|
||||
for (auto iter = ppp.first; iter != ppp.second; ++iter) {
|
||||
JitBlock &sourceBlock = blocks_[iter->second];
|
||||
for (int e = 0; e < MAX_JIT_BLOCK_EXITS; e++) {
|
||||
if (sourceBlock.exitAddress[e] == b.originalAddress)
|
||||
|
@ -18,6 +18,15 @@
|
||||
#pragma once
|
||||
|
||||
#include <map>
|
||||
#ifdef IOS
|
||||
#include <tr1/unordered_map>
|
||||
namespace std {
|
||||
using std::tr1::unordered_map;
|
||||
using std::tr1::unordered_multimap;
|
||||
}
|
||||
#else
|
||||
#include <unordered_map>
|
||||
#endif
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
@ -160,10 +169,10 @@ private:
|
||||
MIPSState *mips_;
|
||||
CodeBlock *codeBlock_;
|
||||
JitBlock *blocks_;
|
||||
std::multimap<u32, int> proxyBlockMap_;
|
||||
std::unordered_multimap<u32, int> proxyBlockMap_;
|
||||
|
||||
int num_blocks_;
|
||||
std::multimap<u32, int> links_to_;
|
||||
std::unordered_multimap<u32, int> links_to_;
|
||||
std::map<std::pair<u32,u32>, u32> block_map_; // (end_addr, start_addr) -> number
|
||||
|
||||
enum {
|
||||
|
@ -16,6 +16,15 @@
|
||||
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
|
||||
|
||||
#include <map>
|
||||
#ifdef IOS
|
||||
#include <tr1/unordered_map>
|
||||
namespace std {
|
||||
using std::tr1:unordered_map;
|
||||
using std::tr1:unordered_multimap;
|
||||
};
|
||||
#else
|
||||
#include <unordered_map>
|
||||
#endif
|
||||
#include <set>
|
||||
#include "base/mutex.h"
|
||||
#include "ext/cityhash/city.h"
|
||||
@ -36,13 +45,18 @@
|
||||
using namespace MIPSCodeUtils;
|
||||
|
||||
// Not in a namespace because MSVC's debugger doesn't like it
|
||||
static std::vector<MIPSAnalyst::AnalyzedFunction> functions;
|
||||
typedef std::vector<MIPSAnalyst::AnalyzedFunction> FunctionsVector;
|
||||
static FunctionsVector functions;
|
||||
recursive_mutex functions_lock;
|
||||
|
||||
// TODO: Try multimap instead
|
||||
// One function can appear in multiple copies in memory, and they will all have
|
||||
// the same hash and should all be replaced if possible.
|
||||
static std::map<u64, std::vector<MIPSAnalyst::AnalyzedFunction*>> hashToFunction;
|
||||
#ifdef __SYMBIAN32__
|
||||
// Symbian does not have a functional unordered_multimap.
|
||||
static std::multimap<u64, MIPSAnalyst::AnalyzedFunction *> hashToFunction;
|
||||
#else
|
||||
static std::unordered_multimap<u64, MIPSAnalyst::AnalyzedFunction *> hashToFunction;
|
||||
#endif
|
||||
|
||||
struct HashMapFunc {
|
||||
char name[64];
|
||||
@ -653,10 +667,13 @@ namespace MIPSAnalyst {
|
||||
void UpdateHashToFunctionMap() {
|
||||
lock_guard guard(functions_lock);
|
||||
hashToFunction.clear();
|
||||
#ifndef __SYMBIAN32__
|
||||
hashToFunction.reserve(functions.size());
|
||||
#endif
|
||||
for (auto iter = functions.begin(); iter != functions.end(); iter++) {
|
||||
AnalyzedFunction &f = *iter;
|
||||
if (f.hasHash && f.size > 16) {
|
||||
hashToFunction[f.hash].push_back(&f);
|
||||
hashToFunction.insert(std::make_pair(f.hash, &f));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1021,19 +1038,30 @@ skip:
|
||||
// the easy way of saving a hashmap by unloading and loading a game. I added
|
||||
// an alternative way.
|
||||
|
||||
// TODO: speedup
|
||||
auto iter = functions.begin();
|
||||
while (iter != functions.end()) {
|
||||
if (iter->start >= startAddr && iter->start <= endAddr) {
|
||||
iter = functions.erase(iter);
|
||||
} else {
|
||||
iter++;
|
||||
// Most of the time, functions from the same module will be contiguous in functions.
|
||||
FunctionsVector::iterator prevMatch = functions.end();
|
||||
for (auto iter = functions.begin(); iter != functions.end(); ++iter) {
|
||||
const bool hadPrevMatch = prevMatch != functions.end();
|
||||
const bool match = iter->start >= startAddr && iter->start <= endAddr;
|
||||
|
||||
if (!hadPrevMatch && match) {
|
||||
// Entering a range.
|
||||
prevMatch = iter;
|
||||
} else if (hadPrevMatch && !match) {
|
||||
// Left a range.
|
||||
iter = functions.erase(prevMatch, iter);
|
||||
prevMatch = functions.end();
|
||||
}
|
||||
}
|
||||
if (prevMatch != functions.end()) {
|
||||
// Cool, this is the fastest way.
|
||||
functions.erase(prevMatch, functions.end());
|
||||
}
|
||||
|
||||
RestoreReplacedInstructions(startAddr, endAddr);
|
||||
|
||||
// TODO: Also wipe them from hash->function map
|
||||
// TODO: Also wipe them from hash->function map.
|
||||
// It should be fine not to though, since a collision is not likely.
|
||||
}
|
||||
|
||||
void ReplaceFunctions() {
|
||||
@ -1112,15 +1140,14 @@ skip:
|
||||
UpdateHashToFunctionMap();
|
||||
|
||||
for (auto mf = hashMap.begin(), end = hashMap.end(); mf != end; ++mf) {
|
||||
auto iter = hashToFunction.find(mf->hash);
|
||||
if (iter == hashToFunction.end()) {
|
||||
auto range = hashToFunction.equal_range(mf->hash);
|
||||
if (range.first == range.second) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Yay, found a function.
|
||||
|
||||
for (unsigned int i = 0; i < iter->second.size(); i++) {
|
||||
AnalyzedFunction &f = *(iter->second[i]);
|
||||
for (auto iter = range.first; iter != range.second; ++iter) {
|
||||
AnalyzedFunction &f = *iter->second;
|
||||
if (f.hash == mf->hash && f.size == mf->size) {
|
||||
strncpy(f.name, mf->name, sizeof(mf->name) - 1);
|
||||
|
||||
|
@ -406,7 +406,7 @@ MemoryInitedLock Lock()
|
||||
return MemoryInitedLock();
|
||||
}
|
||||
|
||||
static Opcode Read_Instruction(u32 address, bool resolveReplacements, Opcode inst)
|
||||
__forceinline static Opcode Read_Instruction(u32 address, bool resolveReplacements, Opcode inst)
|
||||
{
|
||||
if (!MIPS_IS_EMUHACK(inst.encoding)) {
|
||||
return inst;
|
||||
|
@ -189,15 +189,16 @@ void __PPGeInit()
|
||||
palette[i] = (val << 12) | 0xFFF;
|
||||
}
|
||||
|
||||
u16_le *imagePtr = (u16_le *)imageData;
|
||||
const u32_le *imagePtr = (u32_le *)imageData;
|
||||
u8 *ramPtr = (u8 *)Memory::GetPointer(atlasPtr);
|
||||
|
||||
// Palettize to 4-bit, the easy way.
|
||||
for (int i = 0; i < width * height / 2; i++) {
|
||||
u16 c1 = imagePtr[i*2];
|
||||
u16 c2 = imagePtr[i*2+1];
|
||||
int a1 = c1 & 0xF;
|
||||
int a2 = c2 & 0xF;
|
||||
// Each pixel is 16 bits, so this loads two pixels.
|
||||
u32 c = imagePtr[i];
|
||||
// It's white anyway, so we only look at one channel of each pixel.
|
||||
int a1 = (c & 0x0000000F) >> 0;
|
||||
int a2 = (c & 0x000F0000) >> 16;
|
||||
u8 cval = (a2 << 4) | a1;
|
||||
ramPtr[i] = cval;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user