Merge pull request #7046 from unknownbrackets/perf

Improve load and shutdown performance a bit
This commit is contained in:
Henrik Rydgård 2014-11-03 21:50:10 +01:00
commit f1d43e7e90
12 changed files with 313 additions and 84 deletions

View File

@ -28,6 +28,15 @@
// - Serialization code for anything complex has to be manually written.
#include <map>
#ifdef IOS
#include <tr1/unordered_map>
namespace std {
using std::tr1::unordered_map;
using std::tr1::unordered_multimap;
}
#else
#include <unordered_map>
#endif
#include <deque>
#include <list>
#include <set>
@ -178,7 +187,29 @@ public:
}
template<class K, class T>
void DoMap(std::map<K, T> &x, T &default_val)
void Do(std::unordered_map<K, T *> &x)
{
if (mode == MODE_READ)
{
for (auto it = x.begin(), end = x.end(); it != end; ++it)
{
if (it->second != NULL)
delete it->second;
}
}
T *dv = NULL;
DoMap(x, dv);
}
template<class K, class T>
void Do(std::unordered_map<K, T> &x)
{
T dv = T();
DoMap(x, dv);
}
template<class M>
void DoMap(M &x, typename M::mapped_type &default_val)
{
unsigned int number = (unsigned int)x.size();
Do(number);
@ -188,9 +219,9 @@ public:
x.clear();
while (number > 0)
{
K first = K();
typename M::key_type first = typename M::key_type();
Do(first);
T second = default_val;
typename M::mapped_type second = default_val;
Do(second);
x[first] = second;
--number;
@ -201,10 +232,10 @@ public:
case MODE_MEASURE:
case MODE_VERIFY:
{
typename std::map<K, T>::iterator itr = x.begin();
typename M::iterator itr = x.begin();
while (number > 0)
{
K first = itr->first;
typename M::key_type first = itr->first;
Do(first);
Do(itr->second);
--number;
@ -238,7 +269,29 @@ public:
}
template<class K, class T>
void DoMultimap(std::multimap<K, T> &x, T &default_val)
void Do(std::unordered_multimap<K, T *> &x)
{
if (mode == MODE_READ)
{
for (auto it = x.begin(), end = x.end(); it != end; ++it)
{
if (it->second != NULL)
delete it->second;
}
}
T *dv = NULL;
DoMultimap(x, dv);
}
template<class K, class T>
void Do(std::unordered_multimap<K, T> &x)
{
T dv = T();
DoMultimap(x, dv);
}
template<class M>
void DoMultimap(M &x, typename M::mapped_type &default_val)
{
unsigned int number = (unsigned int)x.size();
Do(number);
@ -248,9 +301,9 @@ public:
x.clear();
while (number > 0)
{
K first = K();
typename M::key_type first = typename M::key_type();
Do(first);
T second = default_val;
typename M::mapped_type second = default_val;
Do(second);
x.insert(std::make_pair(first, second));
--number;
@ -261,7 +314,7 @@ public:
case MODE_MEASURE:
case MODE_VERIFY:
{
typename std::multimap<K, T>::iterator itr = x.begin();
typename M::iterator itr = x.begin();
while (number > 0)
{
Do(itr->first);

View File

@ -20,6 +20,7 @@
#include "Core/FileSystems/BlockDevices.h"
#include <cstdio>
#include <cstring>
#include <algorithm>
extern "C"
{
@ -99,6 +100,18 @@ bool FileBlockDevice::ReadBlock(int blockNumber, u8 *outPtr)
lseek64(fd, (u64)blockNumber * (u64)GetBlockSize(), SEEK_SET);
if (read(fd, outPtr, 2048) != 2048) {
ERROR_LOG(FILESYS, "Could not read() 2048 bytes from block");
return false;
}
return true;
}
bool FileBlockDevice::ReadBlocks(u32 minBlock, int count, u8 *outPtr)
{
lseek64(fd, (u64)minBlock * (u64)GetBlockSize(), SEEK_SET);
const s32 bytes = GetBlockSize() * count;
if (read(fd, outPtr, bytes) != bytes) {
ERROR_LOG(FILESYS, "Could not read() %d bytes from block", bytes);
return false;
}
return true;
}
@ -121,12 +134,24 @@ FileBlockDevice::~FileBlockDevice()
bool FileBlockDevice::ReadBlock(int blockNumber, u8 *outPtr)
{
fseeko(f, (u64)blockNumber * (u64)GetBlockSize(), SEEK_SET);
if (fread(outPtr, 1, 2048, f) != 2048)
if (fread(outPtr, 1, 2048, f) != 2048) {
DEBUG_LOG(FILESYS, "Could not read 2048 bytes from block");
return false;
}
return true;
}
bool FileBlockDevice::ReadBlocks(u32 minBlock, int count, u8 *outPtr)
{
fseeko(f, (u64)minBlock * (u64)GetBlockSize(), SEEK_SET);
if (fread(outPtr, 2048, count, f) != count) {
ERROR_LOG(FILESYS, "Could not read %d bytes from block", 2048 * count);
return false;
}
return true;
}
#endif
// .CSO format
@ -157,6 +182,8 @@ typedef struct ciso_header
// TODO: Need much better error handling.
static const u32 CSO_READ_BUFFER_SIZE = 256 * 1024;
CISOFileBlockDevice::CISOFileBlockDevice(FILE *file)
: f(file)
{
@ -197,7 +224,10 @@ CISOFileBlockDevice::CISOFileBlockDevice(FILE *file)
VERBOSE_LOG(LOADER, "CSO numBlocks=%i numFrames=%i align=%i", numBlocks, numFrames, indexShift);
// We might read a bit of alignment too, so be prepared.
readBuffer = new u8[frameSize + (1 << indexShift)];
if (frameSize + (1 << indexShift) < CSO_READ_BUFFER_SIZE)
readBuffer = new u8[CSO_READ_BUFFER_SIZE];
else
readBuffer = new u8[frameSize + (1 << indexShift)];
zlibBuffer = new u8[frameSize + (1 << indexShift)];
zlibBufferFrame = numFrames;
@ -279,7 +309,7 @@ bool CISOFileBlockDevice::ReadBlock(int blockNumber, u8 *outPtr)
return false;
}
z.avail_in = readSize;
z.next_out = frameSize == GetBlockSize() ? outPtr : zlibBuffer;
z.next_out = frameSize == (u32)GetBlockSize() ? outPtr : zlibBuffer;
z.avail_out = frameSize;
z.next_in = readBuffer;
@ -300,7 +330,7 @@ bool CISOFileBlockDevice::ReadBlock(int blockNumber, u8 *outPtr)
}
inflateEnd(&z);
if (frameSize != GetBlockSize())
if (frameSize != (u32)GetBlockSize())
{
zlibBufferFrame = frameNumber;
memcpy(outPtr, zlibBuffer + compressedOffset, GetBlockSize());
@ -309,6 +339,96 @@ bool CISOFileBlockDevice::ReadBlock(int blockNumber, u8 *outPtr)
return true;
}
bool CISOFileBlockDevice::ReadBlocks(u32 minBlock, int count, u8 *outPtr) {
if (count == 1) {
return ReadBlock(minBlock, outPtr);
}
if (minBlock >= numBlocks) {
memset(outPtr, 0, GetBlockSize() * count);
return false;
}
const u32 lastBlock = std::min(minBlock + count, numBlocks) - 1;
const u32 missingBlocks = (lastBlock + 1 - minBlock) - count;
if (lastBlock < minBlock + count) {
memset(outPtr + GetBlockSize() * (count - missingBlocks), 0, GetBlockSize() * missingBlocks);
}
const u32 minFrameNumber = minBlock >> blockShift;
const u32 lastFrameNumber = lastBlock >> blockShift;
const u32 afterLastIndexPos = index[lastFrameNumber + 1] & 0x7FFFFFFF;
const u64 totalReadEnd = (u64)afterLastIndexPos << indexShift;
z_stream z;
z.zalloc = Z_NULL;
z.zfree = Z_NULL;
z.opaque = Z_NULL;
if (inflateInit2(&z, -15) != Z_OK) {
ERROR_LOG(LOADER, "Unable to initialize inflate: %s\n", (z.msg) ? z.msg : "?");
return false;
}
u64 readBufferStart = 0;
u64 readBufferEnd = 0;
u32 block = minBlock;
const u32 blocksPerFrame = 1 << blockShift;
for (u32 frame = minFrameNumber; frame <= lastFrameNumber; ++frame) {
const u32 idx = index[frame];
const u32 indexPos = idx & 0x7FFFFFFF;
const u32 nextIndexPos = index[frame + 1] & 0x7FFFFFFF;
const u64 frameReadPos = (u64)indexPos << indexShift;
const u64 frameReadEnd = (u64)nextIndexPos << indexShift;
const u32 frameReadSize = (u32)(frameReadEnd - frameReadPos);
const u32 frameBlockOffset = block & ((1 << blockShift) - 1);
const u32 frameBlocks = std::min(lastBlock - block + 1, blocksPerFrame) - frameBlockOffset;
if (frameReadEnd > readBufferEnd) {
const size_t maxNeeded = totalReadEnd - frameReadPos;
const size_t chunkSize = std::min(maxNeeded, (size_t)std::max(frameReadSize, CSO_READ_BUFFER_SIZE));
fseeko(f, frameReadPos, SEEK_SET);
const u32 readSize = (u32)fread(readBuffer, 1, chunkSize, f);
if (readSize < chunkSize) {
memset(readBuffer + readSize, 0, chunkSize - readSize);
}
readBufferStart = frameReadPos;
readBufferEnd = frameReadPos + readSize;
}
u8 *rawBuffer = &readBuffer[frameReadPos - readBufferStart];
const int plain = idx & 0x80000000;
if (plain) {
memcpy(outPtr, rawBuffer + frameBlockOffset * GetBlockSize(), frameBlocks * GetBlockSize());
} else {
z.avail_in = frameReadSize;
z.next_out = frameBlocks == blocksPerFrame ? outPtr : zlibBuffer;
z.avail_out = frameSize;
z.next_in = rawBuffer;
int status = inflate(&z, Z_FINISH);
if (status != Z_STREAM_END) {
ERROR_LOG(LOADER, "Inflate frame %d: failed - %s[%d]\n", frame, (z.msg) ? z.msg : "error", status);
memset(outPtr, 0, frameBlocks * GetBlockSize());
} else if (z.total_out != frameSize) {
ERROR_LOG(LOADER, "Inflate frame %d: block size error %d != %d\n", frame, (u32)z.total_out, frameSize);
memset(outPtr, 0, frameBlocks * GetBlockSize());
} else if (frameBlocks != blocksPerFrame) {
memcpy(outPtr, zlibBuffer + frameBlockOffset * GetBlockSize(), frameBlocks * GetBlockSize());
// In case we end up reusing it in a single read later.
zlibBufferFrame = frame;
}
inflateReset(&z);
}
block += frameBlocks;
outPtr += frameBlocks * GetBlockSize();
}
return true;
}
NPDRMDemoBlockDevice::NPDRMDemoBlockDevice(FILE *file)
: f(file)

View File

@ -31,6 +31,15 @@ class BlockDevice
public:
virtual ~BlockDevice() {}
virtual bool ReadBlock(int blockNumber, u8 *outPtr) = 0;
virtual bool ReadBlocks(u32 minBlock, int count, u8 *outPtr) {
for (int b = 0; b < count; ++b) {
if (!ReadBlock(minBlock + b, outPtr)) {
return false;
}
outPtr += GetBlockSize();
}
return true;
}
int GetBlockSize() const { return 2048;} // forced, it cannot be changed by subclasses
virtual u32 GetNumBlocks() = 0;
};
@ -41,7 +50,8 @@ class CISOFileBlockDevice : public BlockDevice
public:
CISOFileBlockDevice(FILE *file);
~CISOFileBlockDevice();
bool ReadBlock(int blockNumber, u8 *outPtr);
bool ReadBlock(int blockNumber, u8 *outPtr) override;
bool ReadBlocks(u32 minBlock, int count, u8 *outPtr) override;
u32 GetNumBlocks() { return numBlocks;}
private:
@ -64,6 +74,7 @@ public:
FileBlockDevice(FILE *file);
~FileBlockDevice();
bool ReadBlock(int blockNumber, u8 *outPtr) override;
bool ReadBlocks(u32 minBlock, int count, u8 *outPtr) override;
u32 GetNumBlocks() override {return (u32)(filesize / GetBlockSize());}
private:

View File

@ -18,6 +18,7 @@
#include <cstring>
#include <cstdio>
#include <ctype.h>
#include <algorithm>
#include "Common/Common.h"
#include "Common/CommonTypes.h"
@ -494,10 +495,10 @@ int ISOFileSystem::Ioctl(u32 handle, u32 cmd, u32 indataPtr, u32 inlen, u32 outd
u32 size = (u32)desc.pathTableLengthLE;
u8 *out = Memory::GetPointer(outdataPtr);
while (size >= 2048) {
blockDevice->ReadBlock(block++, out);
out += 2048;
}
int blocks = size / blockDevice->GetBlockSize();
blockDevice->ReadBlocks(block, blocks, out);
size -= blocks * blockDevice->GetBlockSize();
out += blocks * blockDevice->GetBlockSize();
// The remaining (or, usually, only) partial sector.
if (size > 0) {
@ -527,12 +528,9 @@ size_t ISOFileSystem::ReadFile(u32 handle, u8 *pointer, s64 size)
if (e.isBlockSectorMode)
{
// Whole sectors! Shortcut to this simple code.
for (int i = 0; i < size; i++)
{
blockDevice->ReadBlock(e.seekPos, pointer + i * 2048);
e.seekPos++;
}
return (size_t)size;
blockDevice->ReadBlocks(e.seekPos, (int)size, pointer);
e.seekPos += (int)size;
return (int)size;
}
u32 positionOnIso;
@ -559,29 +557,37 @@ size_t ISOFileSystem::ReadFile(u32 handle, u8 *pointer, s64 size)
}
//okay, we have size and position, let's rock
u32 totalRead = 0;
const int firstBlockOffset = positionOnIso & 2047;
const int firstBlockSize = firstBlockOffset == 0 ? 0 : (int)std::min(size, 2048LL - firstBlockOffset);
const int lastBlockSize = (size - firstBlockSize) & 2047;
const s64 middleSize = size - firstBlockSize - lastBlockSize;
int secNum = positionOnIso / 2048;
int posInSector = positionOnIso & 2047;
s64 remain = size;
u8 theSector[2048];
while (remain > 0)
{
blockDevice->ReadBlock(secNum, theSector);
size_t bytesToCopy = 2048 - posInSector;
if ((s64)bytesToCopy > remain)
bytesToCopy = (size_t)remain;
_dbg_assert_msg_(FILESYS, (middleSize & 2047) == 0, "Remaining size should be aligned");
memcpy(pointer, theSector + posInSector, bytesToCopy);
totalRead += (u32)bytesToCopy;
pointer += bytesToCopy;
remain -= bytesToCopy;
posInSector = 0;
secNum++;
if (firstBlockSize != 0)
{
blockDevice->ReadBlock(secNum++, theSector);
memcpy(pointer, theSector + firstBlockOffset, firstBlockSize);
pointer += firstBlockSize;
}
if (middleSize != 0)
{
const u32 sectors = (u32)(middleSize / 2048);
blockDevice->ReadBlocks(secNum, sectors, pointer);
secNum += sectors;
pointer += middleSize;
}
if (lastBlockSize != 0)
{
blockDevice->ReadBlock(secNum++, theSector);
memcpy(pointer, theSector, lastBlockSize);
pointer += lastBlockSize;
}
e.seekPos += (unsigned int)size;
return totalRead;
return (size_t)size;
}
else
{

View File

@ -58,9 +58,6 @@ enum
HLE_AFTER_SKIP_DEADBEEF = 0x40,
};
typedef std::vector<Syscall> SyscallVector;
typedef std::map<std::string, SyscallVector> SyscallVectorByModule;
static std::vector<HLEModule> moduleDB;
static int delayedResultEvent = -1;
static int hleAfterSyscall = HLE_AFTER_NOTHING;

View File

@ -114,7 +114,7 @@ struct VarSymbolImport {
struct VarSymbolExport {
bool Matches(const VarSymbolImport &other) const {
return !strncmp(moduleName, other.moduleName, KERNELOBJECT_MAX_NAME_LENGTH) && nid == other.nid;
return nid == other.nid && !strncmp(moduleName, other.moduleName, KERNELOBJECT_MAX_NAME_LENGTH);
}
char moduleName[KERNELOBJECT_MAX_NAME_LENGTH + 1];
@ -130,7 +130,7 @@ struct FuncSymbolImport {
struct FuncSymbolExport {
bool Matches(const FuncSymbolImport &other) const {
return !strncmp(moduleName, other.moduleName, KERNELOBJECT_MAX_NAME_LENGTH) && nid == other.nid;
return nid == other.nid && !strncmp(moduleName, other.moduleName, KERNELOBJECT_MAX_NAME_LENGTH);
}
char moduleName[KERNELOBJECT_MAX_NAME_LENGTH + 1];

View File

@ -17,6 +17,15 @@
#include <algorithm>
#include <map>
#ifdef IOS
#include <tr1/unordered_map>
namespace std {
using std::tr1::unordered_map;
using std::tr1::unordered_multimap;
}
#else
#include <unordered_map>
#endif
#include "Common/ChunkFile.h"
#include "Core/MemMap.h"
#include "Core/HLE/HLE.h"
@ -155,7 +164,7 @@ struct LwMutex : public KernelObject
static int mutexWaitTimer = -1;
static int lwMutexWaitTimer = -1;
// Thread -> Mutex locks for thread end.
typedef std::multimap<SceUID, SceUID> MutexMap;
typedef std::unordered_multimap<SceUID, SceUID> MutexMap;
static MutexMap mutexHeldLocks;
void __KernelMutexBeginCallback(SceUID threadID, SceUID prevCallbackId);
@ -204,7 +213,7 @@ void __KernelMutexShutdown()
void __KernelMutexAcquireLock(Mutex *mutex, int count, SceUID thread)
{
#if defined(_DEBUG)
std::pair<MutexMap::iterator, MutexMap::iterator> locked = mutexHeldLocks.equal_range(thread);
auto locked = mutexHeldLocks.equal_range(thread);
for (MutexMap::iterator iter = locked.first; iter != locked.second; ++iter)
_dbg_assert_msg_(SCEKERNEL, (*iter).second != mutex->GetUID(), "Thread %d / mutex %d wasn't removed from mutexHeldLocks properly.", thread, mutex->GetUID());
#endif

View File

@ -222,12 +222,12 @@ void JitBlockCache::RemoveBlockMap(int block_num) {
const u32 pAddr = b.originalAddress & 0x1FFFFFFF;
auto it = block_map_.find(std::make_pair(pAddr + 4 * b.originalSize, pAddr));
if (it != block_map_.end() && it->second == block_num) {
if (it != block_map_.end() && it->second == (u32)block_num) {
block_map_.erase(it);
} else {
// It wasn't in there, or it has the wrong key. Let's search...
for (auto it = block_map_.begin(); it != block_map_.end(); ++it) {
if (it->second == block_num) {
if (it->second == (u32)block_num) {
block_map_.erase(it);
break;
}
@ -253,7 +253,7 @@ void JitBlockCache::FinalizeBlock(int block_num, bool block_link) {
if (block_link) {
for (int i = 0; i < MAX_JIT_BLOCK_EXITS; i++) {
if (b.exitAddress[i] != INVALID_EXIT) {
links_to_.insert(std::pair<u32, int>(b.exitAddress[i], block_num));
links_to_.insert(std::make_pair(b.exitAddress[i], block_num));
latestExit = std::max(latestExit, b.exitAddress[i]);
}
}
@ -439,29 +439,25 @@ void JitBlockCache::LinkBlockExits(int i) {
}
void JitBlockCache::LinkBlock(int i) {
using namespace std;
LinkBlockExits(i);
JitBlock &b = blocks_[i];
pair<multimap<u32, int>::iterator, multimap<u32, int>::iterator> ppp;
// equal_range(b) returns pair<iterator,iterator> representing the range
// of element with key b
ppp = links_to_.equal_range(b.originalAddress);
auto ppp = links_to_.equal_range(b.originalAddress);
if (ppp.first == ppp.second)
return;
for (multimap<u32, int>::iterator iter = ppp.first; iter != ppp.second; ++iter) {
for (auto iter = ppp.first; iter != ppp.second; ++iter) {
// PanicAlert("Linking block %i to block %i", iter->second, i);
LinkBlockExits(iter->second);
}
}
void JitBlockCache::UnlinkBlock(int i) {
using namespace std;
JitBlock &b = blocks_[i];
pair<multimap<u32, int>::iterator, multimap<u32, int>::iterator> ppp;
ppp = links_to_.equal_range(b.originalAddress);
auto ppp = links_to_.equal_range(b.originalAddress);
if (ppp.first == ppp.second)
return;
for (multimap<u32, int>::iterator iter = ppp.first; iter != ppp.second; ++iter) {
for (auto iter = ppp.first; iter != ppp.second; ++iter) {
JitBlock &sourceBlock = blocks_[iter->second];
for (int e = 0; e < MAX_JIT_BLOCK_EXITS; e++) {
if (sourceBlock.exitAddress[e] == b.originalAddress)

View File

@ -18,6 +18,15 @@
#pragma once
#include <map>
#ifdef IOS
#include <tr1/unordered_map>
namespace std {
using std::tr1::unordered_map;
using std::tr1::unordered_multimap;
}
#else
#include <unordered_map>
#endif
#include <vector>
#include <string>
@ -160,10 +169,10 @@ private:
MIPSState *mips_;
CodeBlock *codeBlock_;
JitBlock *blocks_;
std::multimap<u32, int> proxyBlockMap_;
std::unordered_multimap<u32, int> proxyBlockMap_;
int num_blocks_;
std::multimap<u32, int> links_to_;
std::unordered_multimap<u32, int> links_to_;
std::map<std::pair<u32,u32>, u32> block_map_; // (end_addr, start_addr) -> number
enum {

View File

@ -16,6 +16,15 @@
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#include <map>
#ifdef IOS
#include <tr1/unordered_map>
namespace std {
using std::tr1:unordered_map;
using std::tr1:unordered_multimap;
};
#else
#include <unordered_map>
#endif
#include <set>
#include "base/mutex.h"
#include "ext/cityhash/city.h"
@ -36,13 +45,18 @@
using namespace MIPSCodeUtils;
// Not in a namespace because MSVC's debugger doesn't like it
static std::vector<MIPSAnalyst::AnalyzedFunction> functions;
typedef std::vector<MIPSAnalyst::AnalyzedFunction> FunctionsVector;
static FunctionsVector functions;
recursive_mutex functions_lock;
// TODO: Try multimap instead
// One function can appear in multiple copies in memory, and they will all have
// the same hash and should all be replaced if possible.
static std::map<u64, std::vector<MIPSAnalyst::AnalyzedFunction*>> hashToFunction;
#ifdef __SYMBIAN32__
// Symbian does not have a functional unordered_multimap.
static std::multimap<u64, MIPSAnalyst::AnalyzedFunction *> hashToFunction;
#else
static std::unordered_multimap<u64, MIPSAnalyst::AnalyzedFunction *> hashToFunction;
#endif
struct HashMapFunc {
char name[64];
@ -653,10 +667,13 @@ namespace MIPSAnalyst {
void UpdateHashToFunctionMap() {
lock_guard guard(functions_lock);
hashToFunction.clear();
#ifndef __SYMBIAN32__
hashToFunction.reserve(functions.size());
#endif
for (auto iter = functions.begin(); iter != functions.end(); iter++) {
AnalyzedFunction &f = *iter;
if (f.hasHash && f.size > 16) {
hashToFunction[f.hash].push_back(&f);
hashToFunction.insert(std::make_pair(f.hash, &f));
}
}
}
@ -1021,19 +1038,30 @@ skip:
// the easy way of saving a hashmap by unloading and loading a game. I added
// an alternative way.
// TODO: speedup
auto iter = functions.begin();
while (iter != functions.end()) {
if (iter->start >= startAddr && iter->start <= endAddr) {
iter = functions.erase(iter);
} else {
iter++;
// Most of the time, functions from the same module will be contiguous in functions.
FunctionsVector::iterator prevMatch = functions.end();
for (auto iter = functions.begin(); iter != functions.end(); ++iter) {
const bool hadPrevMatch = prevMatch != functions.end();
const bool match = iter->start >= startAddr && iter->start <= endAddr;
if (!hadPrevMatch && match) {
// Entering a range.
prevMatch = iter;
} else if (hadPrevMatch && !match) {
// Left a range.
iter = functions.erase(prevMatch, iter);
prevMatch = functions.end();
}
}
if (prevMatch != functions.end()) {
// Cool, this is the fastest way.
functions.erase(prevMatch, functions.end());
}
RestoreReplacedInstructions(startAddr, endAddr);
// TODO: Also wipe them from hash->function map
// TODO: Also wipe them from hash->function map.
// It should be fine not to though, since a collision is not likely.
}
void ReplaceFunctions() {
@ -1112,15 +1140,14 @@ skip:
UpdateHashToFunctionMap();
for (auto mf = hashMap.begin(), end = hashMap.end(); mf != end; ++mf) {
auto iter = hashToFunction.find(mf->hash);
if (iter == hashToFunction.end()) {
auto range = hashToFunction.equal_range(mf->hash);
if (range.first == range.second) {
continue;
}
// Yay, found a function.
for (unsigned int i = 0; i < iter->second.size(); i++) {
AnalyzedFunction &f = *(iter->second[i]);
for (auto iter = range.first; iter != range.second; ++iter) {
AnalyzedFunction &f = *iter->second;
if (f.hash == mf->hash && f.size == mf->size) {
strncpy(f.name, mf->name, sizeof(mf->name) - 1);

View File

@ -406,7 +406,7 @@ MemoryInitedLock Lock()
return MemoryInitedLock();
}
static Opcode Read_Instruction(u32 address, bool resolveReplacements, Opcode inst)
__forceinline static Opcode Read_Instruction(u32 address, bool resolveReplacements, Opcode inst)
{
if (!MIPS_IS_EMUHACK(inst.encoding)) {
return inst;

View File

@ -189,15 +189,16 @@ void __PPGeInit()
palette[i] = (val << 12) | 0xFFF;
}
u16_le *imagePtr = (u16_le *)imageData;
const u32_le *imagePtr = (u32_le *)imageData;
u8 *ramPtr = (u8 *)Memory::GetPointer(atlasPtr);
// Palettize to 4-bit, the easy way.
for (int i = 0; i < width * height / 2; i++) {
u16 c1 = imagePtr[i*2];
u16 c2 = imagePtr[i*2+1];
int a1 = c1 & 0xF;
int a2 = c2 & 0xF;
// Each pixel is 16 bits, so this loads two pixels.
u32 c = imagePtr[i];
// It's white anyway, so we only look at one channel of each pixel.
int a1 = (c & 0x0000000F) >> 0;
int a2 = (c & 0x000F0000) >> 16;
u8 cval = (a2 << 4) | a1;
ramPtr[i] = cval;
}