Merge pull request #812 from libretro/dynarec

Dynarec updates, and revert my bad gpu optimization
This commit is contained in:
Autechre 2021-08-24 11:25:56 +02:00 committed by GitHub
commit 92b93bfe11
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
25 changed files with 565 additions and 473 deletions

View File

@ -9,7 +9,7 @@ HAVE_LIGHTREC = 1
LINK_STATIC_LIBCPLUSPLUS = 1
THREADED_RECOMPILER = 1
LIGHTREC_DEBUG = 0
LIGHTREC_LOG_LEVEL = 2
LIGHTREC_LOG_LEVEL = 3
CORE_DIR := .
HAVE_GRIFFIN = 0

View File

@ -162,7 +162,7 @@ ifeq ($(HAVE_LIGHTREC), 1)
FLAGS += -DHAVE_LIGHTREC
ifeq ($(LIGHTREC_LOG_LEVEL),)
FLAGS += -DLOG_LEVEL=2 \
FLAGS += -DLOG_LEVEL=3 \
-DENABLE_DISASSEMBLER=0
else
ifeq ($(LIGHTREC_LOG_LEVEL), 4)

View File

@ -6,7 +6,7 @@
[subrepo]
remote = https://github.com/pcercuei/lightrec.git
branch = master
commit = aa2f992ed8c3236d1d952d72e3de8ea2b8d11af0
parent = bd765e2bf9b0f1e5bd788ebd867c9b1830ece001
commit = 0df4ec86ba664dad3b4cc24fd3199131e8e3219f
parent = 364a705dc70b57a734b4e362226a386b34a008fb
method = merge
cmdver = 0.4.3

View File

@ -59,7 +59,7 @@ struct block * lightrec_find_block_from_lut(struct blockcache *cache,
void remove_from_code_lut(struct blockcache *cache, struct block *block)
{
struct lightrec_state *state = block->state;
struct lightrec_state *state = cache->state;
u32 offset = lut_offset(block->pc);
if (block->function) {
@ -110,7 +110,7 @@ void lightrec_free_block_cache(struct blockcache *cache)
for (i = 0; i < LUT_SIZE; i++) {
for (block = cache->lut[i]; block; block = next) {
next = block->next;
lightrec_free_block(block);
lightrec_free_block(cache->state, block);
}
}
@ -132,18 +132,10 @@ struct blockcache * lightrec_blockcache_init(struct lightrec_state *state)
u32 lightrec_calculate_block_hash(const struct block *block)
{
const struct lightrec_mem_map *map = block->map;
u32 pc, hash = 0xffffffff;
const u32 *code;
const u32 *code = block->code;
u32 hash = 0xffffffff;
unsigned int i;
pc = kunseg(block->pc) - map->pc;
while (map->mirror_of)
map = map->mirror_of;
code = map->address + pc;
/* Jenkins one-at-a-time hash algorithm */
for (i = 0; i < block->nb_ops; i++) {
hash += *code++;
@ -158,9 +150,9 @@ u32 lightrec_calculate_block_hash(const struct block *block)
return hash;
}
bool lightrec_block_is_outdated(struct block *block)
bool lightrec_block_is_outdated(struct lightrec_state *state, struct block *block)
{
void **lut_entry = &block->state->code_lut[lut_offset(block->pc)];
void **lut_entry = &state->code_lut[lut_offset(block->pc)];
bool outdated;
if (*lut_entry)
@ -173,7 +165,7 @@ bool lightrec_block_is_outdated(struct block *block)
if (block->function)
*lut_entry = block->function;
else
*lut_entry = block->state->get_next_block;
*lut_entry = state->get_next_block;
}
return outdated;

View File

@ -22,6 +22,6 @@ struct blockcache * lightrec_blockcache_init(struct lightrec_state *state);
void lightrec_free_block_cache(struct blockcache *cache);
u32 lightrec_calculate_block_hash(const struct block *block);
_Bool lightrec_block_is_outdated(struct block *block);
_Bool lightrec_block_is_outdated(struct lightrec_state *state, struct block *block);
#endif /* __BLOCKCACHE_H__ */

View File

@ -17,6 +17,7 @@ static const char *std_opcodes[] = {
[OP_BEQ] = "beq ",
[OP_BNE] = "bne ",
[OP_BLEZ] = "blez ",
[OP_BGTZ] = "bgtz ",
[OP_ADDI] = "addi ",
[OP_ADDIU] = "addiu ",
[OP_SLTI] = "slti ",

File diff suppressed because it is too large Load Diff

View File

@ -11,7 +11,9 @@
struct block;
struct opcode;
void lightrec_rec_opcode(const struct block *block, u16 offset);
void lightrec_emit_eob(const struct block *block, u16 offset);
void lightrec_rec_opcode(struct lightrec_state *state, const struct block *block,
u16 offset);
void lightrec_emit_eob(struct lightrec_state *state, const struct block *block,
u16 offset);
#endif /* __EMITTER_H__ */

View File

@ -379,7 +379,7 @@ static u32 int_do_branch(struct interpreter *inter, u32 old_pc, u32 next_pc)
(inter->op->flags & LIGHTREC_LOCAL_BRANCH) &&
(s16)inter->op->c.i.imm >= 0) {
next_pc = old_pc + ((1 + (s16)inter->op->c.i.imm) << 2);
next_pc = lightrec_emulate_block(inter->block, next_pc);
next_pc = lightrec_emulate_block(inter->state, inter->block, next_pc);
}
return next_pc;
@ -823,9 +823,6 @@ static u32 int_special_DIV(struct interpreter *inter)
if (rt == 0) {
hi = rs;
lo = (rs < 0) * 2 - 1;
} else if ((rs == 0x80000000) && (rt == 0xFFFFFFFF)) {
lo = rs;
hi = 0;
} else {
lo = rs / rt;
hi = rs % rt;
@ -1117,13 +1114,14 @@ static u32 int_CP2(struct interpreter *inter)
return int_CP(inter);
}
static u32 lightrec_emulate_block_list(struct block *block, u32 offset)
static u32 lightrec_emulate_block_list(struct lightrec_state *state,
struct block *block, u32 offset)
{
struct interpreter inter;
u32 pc;
inter.block = block;
inter.state = block->state;
inter.state = state;
inter.offset = offset;
inter.op = &block->opcode_list[offset];
inter.cycles = 0;
@ -1134,17 +1132,17 @@ static u32 lightrec_emulate_block_list(struct block *block, u32 offset)
/* Add the cycles of the last branch */
inter.cycles += lightrec_cycles_of_opcode(inter.op->c);
block->state->current_cycle += inter.cycles;
state->current_cycle += inter.cycles;
return pc;
}
u32 lightrec_emulate_block(struct block *block, u32 pc)
u32 lightrec_emulate_block(struct lightrec_state *state, struct block *block, u32 pc)
{
u32 offset = (kunseg(pc) - kunseg(block->pc)) >> 2;
if (offset < block->nb_ops)
return lightrec_emulate_block_list(block, offset);
return lightrec_emulate_block_list(state, block, offset);
pr_err("PC 0x%x is outside block at PC 0x%x\n", pc, block->pc);

View File

@ -10,6 +10,6 @@
struct block;
u32 lightrec_emulate_block(struct block *block, u32 pc);
u32 lightrec_emulate_block(struct lightrec_state *state, struct block *block, u32 pc);
#endif /* __LIGHTREC_INTERPRETER_H__ */

View File

@ -71,19 +71,18 @@ struct reaper;
struct block {
jit_state_t *_jit;
struct lightrec_state *state;
struct opcode *opcode_list;
void (*function)(void);
const u32 *code;
struct block *next;
u32 pc;
u32 hash;
unsigned int code_size;
u16 nb_ops;
u8 flags;
#if ENABLE_THREADED_COMPILER
atomic_flag op_list_freed;
#endif
unsigned int code_size;
u16 flags;
u16 nb_ops;
const struct lightrec_mem_map *map;
struct block *next;
};
struct lightrec_branch {
@ -146,7 +145,7 @@ u32 lightrec_rw(struct lightrec_state *state, union code op,
u32 addr, u32 data, u16 *flags,
struct block *block);
void lightrec_free_block(struct block *block);
void lightrec_free_block(struct lightrec_state *state, struct block *block);
void remove_from_code_lut(struct blockcache *cache, struct block *block);
@ -190,8 +189,8 @@ u32 lightrec_mfc(struct lightrec_state *state, union code op);
union code lightrec_read_opcode(struct lightrec_state *state, u32 pc);
struct block * lightrec_get_block(struct lightrec_state *state, u32 pc);
int lightrec_compile_block(struct block *block);
void lightrec_free_opcode_list(struct block *block);
int lightrec_compile_block(struct lightrec_state *state, struct block *block);
void lightrec_free_opcode_list(struct lightrec_state *state, struct block *block);
unsigned int lightrec_cycles_of_opcode(union code code);

View File

@ -199,18 +199,34 @@ static void lightrec_invalidate_map(struct lightrec_state *state,
}
static const struct lightrec_mem_map *
lightrec_get_map(struct lightrec_state *state, u32 kaddr)
lightrec_get_map(struct lightrec_state *state,
void **host, u32 kaddr)
{
const struct lightrec_mem_map *map;
unsigned int i;
u32 addr;
for (i = 0; i < state->nb_maps; i++) {
const struct lightrec_mem_map *map = &state->maps[i];
const struct lightrec_mem_map *mapi = &state->maps[i];
if (kaddr >= map->pc && kaddr < map->pc + map->length)
return map;
if (kaddr >= mapi->pc && kaddr < mapi->pc + mapi->length) {
map = mapi;
break;
}
}
return NULL;
if (i == state->nb_maps)
return NULL;
addr = kaddr - map->pc;
while (map->mirror_of)
map = map->mirror_of;
if (host)
*host = map->address + addr;
return map;
}
u32 lightrec_rw(struct lightrec_state *state, union code op,
@ -218,25 +234,17 @@ u32 lightrec_rw(struct lightrec_state *state, union code op,
{
const struct lightrec_mem_map *map;
const struct lightrec_mem_map_ops *ops;
u32 kaddr, pc, opcode = op.opcode;
u32 opcode = op.opcode;
void *host;
addr += (s16) op.i.imm;
kaddr = kunseg(addr);
map = lightrec_get_map(state, kaddr);
map = lightrec_get_map(state, &host, kunseg(addr));
if (!map) {
__segfault_cb(state, addr, block);
return 0;
}
pc = map->pc;
while (map->mirror_of)
map = map->mirror_of;
host = (void *)((uintptr_t)map->address + kaddr - pc);
if (unlikely(map->ops)) {
if (flags)
*flags |= LIGHTREC_HW_IO;
@ -439,7 +447,7 @@ struct block * lightrec_get_block(struct lightrec_state *state, u32 pc)
{
struct block *block = lightrec_find_block(state->block_cache, pc);
if (block && lightrec_block_is_outdated(block)) {
if (block && lightrec_block_is_outdated(state, block)) {
pr_debug("Block at PC 0x%08x is outdated!\n", block->pc);
/* Make sure the recompiler isn't processing the block we'll
@ -449,7 +457,7 @@ struct block * lightrec_get_block(struct lightrec_state *state, u32 pc)
lightrec_unregister_block(state->block_cache, block);
remove_from_code_lut(state->block_cache, block);
lightrec_free_block(block);
lightrec_free_block(state, block);
block = NULL;
}
@ -499,11 +507,11 @@ static void * get_next_block_func(struct lightrec_state *state, u32 pc)
if (ENABLE_THREADED_COMPILER)
lightrec_recompiler_add(state->rec, block);
else
lightrec_compile_block(block);
lightrec_compile_block(state, block);
}
if (ENABLE_THREADED_COMPILER && likely(!should_recompile))
func = lightrec_recompiler_run_first_pass(block, &pc);
func = lightrec_recompiler_run_first_pass(state, block, &pc);
else
func = block->function;
@ -514,14 +522,14 @@ static void * get_next_block_func(struct lightrec_state *state, u32 pc)
if (!ENABLE_THREADED_COMPILER &&
((ENABLE_FIRST_PASS && likely(!should_recompile)) ||
unlikely(block->flags & BLOCK_NEVER_COMPILE)))
pc = lightrec_emulate_block(block, pc);
pc = lightrec_emulate_block(state, block, pc);
if (likely(!(block->flags & BLOCK_NEVER_COMPILE))) {
/* Then compile it using the profiled data */
if (ENABLE_THREADED_COMPILER)
lightrec_recompiler_add(state->rec, block);
else
lightrec_compile_block(block);
lightrec_compile_block(state, block);
}
if (state->exit_flags != LIGHTREC_EXIT_NORMAL ||
@ -608,7 +616,6 @@ static struct block * generate_wrapper(struct lightrec_state *state)
jit_patch_at(jit_jmpi(), to_fn_epilog);
jit_epilog();
block->state = state;
block->_jit = _jit;
block->function = jit_emit();
block->opcode_list = NULL;
@ -637,25 +644,20 @@ err_no_mem:
static u32 lightrec_memset(struct lightrec_state *state)
{
const struct lightrec_mem_map *map;
u32 pc, kunseg_pc = kunseg(state->native_reg_cache[4]);
u32 kunseg_pc = kunseg(state->native_reg_cache[4]);
void *host;
const struct lightrec_mem_map *map = lightrec_get_map(state, &host, kunseg_pc);
u32 length = state->native_reg_cache[5] * 4;
map = lightrec_get_map(state, kunseg_pc);
if (!map) {
pr_err("Unable to find memory map for memset target address "
"0x%x\n", kunseg_pc);
return 0;
}
pc = kunseg_pc - map->pc;
while (map->mirror_of)
map = map->mirror_of;
pr_debug("Calling host memset, PC 0x%x (host address 0x%lx) for %u bytes\n",
kunseg_pc, (uintptr_t)map->address + pc, length);
memset((void *)map->address + pc, 0, length);
kunseg_pc, (uintptr_t)host, length);
memset(host, 0, length);
if (!state->invalidate_from_dma_only)
lightrec_invalidate_map(state, map, kunseg_pc, length);
@ -798,7 +800,6 @@ static struct block * generate_dispatcher(struct lightrec_state *state)
jit_retr(LIGHTREC_REG_CYCLE);
jit_epilog();
block->state = state;
block->_jit = _jit;
block->function = jit_emit();
block->opcode_list = NULL;
@ -833,17 +834,11 @@ err_no_mem:
union code lightrec_read_opcode(struct lightrec_state *state, u32 pc)
{
u32 addr, kunseg_pc = kunseg(pc);
const u32 *code;
const struct lightrec_mem_map *map = lightrec_get_map(state, kunseg_pc);
void *host;
addr = kunseg_pc - map->pc;
while (map->mirror_of)
map = map->mirror_of;
code = map->address + addr;
lightrec_get_map(state, &host, kunseg(pc));
const u32 *code = (u32 *)host;
return (union code) *code;
}
@ -852,9 +847,9 @@ unsigned int lightrec_cycles_of_opcode(union code code)
return 2;
}
void lightrec_free_opcode_list(struct block *block)
void lightrec_free_opcode_list(struct lightrec_state *state, struct block *block)
{
lightrec_free(block->state, MEM_FOR_IR,
lightrec_free(state, MEM_FOR_IR,
sizeof(*block->opcode_list) * block->nb_ops,
block->opcode_list);
}
@ -904,21 +899,14 @@ static struct block * lightrec_precompile_block(struct lightrec_state *state,
{
struct opcode *list;
struct block *block;
const u32 *code;
u32 addr, kunseg_pc = kunseg(pc);
const struct lightrec_mem_map *map = lightrec_get_map(state, kunseg_pc);
void *host;
const struct lightrec_mem_map *map = lightrec_get_map(state, &host, kunseg(pc));
const u32 *code = (u32 *) host;
unsigned int length;
if (!map)
return NULL;
addr = kunseg_pc - map->pc;
while (map->mirror_of)
map = map->mirror_of;
code = map->address + addr;
block = lightrec_malloc(state, MEM_FOR_IR, sizeof(*block));
if (!block) {
pr_err("Unable to recompile block: Out of memory\n");
@ -932,11 +920,10 @@ static struct block * lightrec_precompile_block(struct lightrec_state *state,
}
block->pc = pc;
block->state = state;
block->_jit = NULL;
block->function = NULL;
block->opcode_list = list;
block->map = map;
block->code = code;
block->next = NULL;
block->flags = 0;
block->code_size = 0;
@ -945,7 +932,7 @@ static struct block * lightrec_precompile_block(struct lightrec_state *state,
#endif
block->nb_ops = length / sizeof(u32);
lightrec_optimize(block);
lightrec_optimize(state, block);
length = block->nb_ops * sizeof(u32);
@ -1010,22 +997,21 @@ static bool lightrec_block_is_fully_tagged(const struct block *block)
return true;
}
static void lightrec_reap_block(void *data)
static void lightrec_reap_block(struct lightrec_state *state, void *data)
{
struct block *block = data;
pr_debug("Reap dead block at PC 0x%08x\n", block->pc);
lightrec_free_block(block);
lightrec_free_block(state, block);
}
static void lightrec_reap_jit(void *data)
static void lightrec_reap_jit(struct lightrec_state *state, void *data)
{
_jit_destroy_state(data);
}
int lightrec_compile_block(struct block *block)
int lightrec_compile_block(struct lightrec_state *state, struct block *block)
{
struct lightrec_state *state = block->state;
struct lightrec_branch_target *target;
bool op_list_freed = false, fully_tagged = false;
struct block *block2;
@ -1073,10 +1059,10 @@ int lightrec_compile_block(struct block *block)
pr_debug("Branch at offset 0x%x will be emulated\n",
i << 2);
lightrec_emit_eob(block, i);
lightrec_emit_eob(state, block, i);
skip_next = !(elm->flags & LIGHTREC_NO_DS);
} else {
lightrec_rec_opcode(block, i);
lightrec_rec_opcode(state, block, i);
skip_next = has_delay_slot(elm->c) &&
!(elm->flags & LIGHTREC_NO_DS);
#if _WIN32
@ -1165,7 +1151,7 @@ int lightrec_compile_block(struct block *block)
lightrec_reap_block,
block2);
} else {
lightrec_free_block(block2);
lightrec_free_block(state, block2);
}
}
}
@ -1189,7 +1175,7 @@ int lightrec_compile_block(struct block *block)
if (fully_tagged && !op_list_freed) {
pr_debug("Block PC 0x%08x is fully tagged"
" - free opcode list\n", block->pc);
lightrec_free_opcode_list(block);
lightrec_free_opcode_list(state, block);
block->opcode_list = NULL;
}
@ -1267,7 +1253,7 @@ u32 lightrec_run_interpreter(struct lightrec_state *state, u32 pc)
state->exit_flags = LIGHTREC_EXIT_NORMAL;
pc = lightrec_emulate_block(block, pc);
pc = lightrec_emulate_block(state, block, pc);
if (LOG_LEVEL >= INFO_L)
lightrec_print_info(state);
@ -1275,15 +1261,15 @@ u32 lightrec_run_interpreter(struct lightrec_state *state, u32 pc)
return pc;
}
void lightrec_free_block(struct block *block)
void lightrec_free_block(struct lightrec_state *state, struct block *block)
{
lightrec_unregister(MEM_FOR_MIPS_CODE, block->nb_ops * sizeof(u32));
if (block->opcode_list)
lightrec_free_opcode_list(block);
lightrec_free_opcode_list(state, block);
if (block->_jit)
_jit_destroy_state(block->_jit);
lightrec_unregister(MEM_FOR_CODE, block->code_size);
lightrec_free(block->state, MEM_FOR_IR, sizeof(*block), block);
lightrec_free(state, MEM_FOR_IR, sizeof(*block), block);
}
struct lightrec_state * lightrec_init(char *argv0,
@ -1387,7 +1373,7 @@ struct lightrec_state * lightrec_init(char *argv0,
return state;
err_free_dispatcher:
lightrec_free_block(state->dispatcher);
lightrec_free_block(state, state->dispatcher);
err_free_reaper:
if (ENABLE_THREADED_COMPILER)
lightrec_reaper_destroy(state->reaper);
@ -1413,6 +1399,10 @@ err_finish_jit:
void lightrec_destroy(struct lightrec_state *state)
{
/* Force a print info on destroy*/
state->current_cycle = ~state->current_cycle;
lightrec_print_info(state);
if (ENABLE_THREADED_COMPILER) {
lightrec_free_recompiler(state->rec);
lightrec_reaper_destroy(state->reaper);
@ -1420,8 +1410,8 @@ void lightrec_destroy(struct lightrec_state *state)
lightrec_free_regcache(state->reg_cache);
lightrec_free_block_cache(state->block_cache);
lightrec_free_block(state->dispatcher);
lightrec_free_block(state->c_wrapper_block);
lightrec_free_block(state, state->dispatcher);
lightrec_free_block(state, state->c_wrapper_block);
finish_jit();
#if ENABLE_TINYMM
@ -1435,12 +1425,9 @@ void lightrec_destroy(struct lightrec_state *state)
void lightrec_invalidate(struct lightrec_state *state, u32 addr, u32 len)
{
u32 kaddr = kunseg(addr & ~0x3);
const struct lightrec_mem_map *map = lightrec_get_map(state, kaddr);
const struct lightrec_mem_map *map = lightrec_get_map(state, NULL, kaddr);
if (map) {
while (map->mirror_of)
map = map->mirror_of;
if (map != &state->maps[PSX_MAP_KERNEL_USER_RAM])
return;

View File

@ -570,7 +570,7 @@ static u32 lightrec_propagate_consts(union code c, u32 known, u32 *v)
return known;
}
static int lightrec_transform_ops(struct block *block)
static int lightrec_transform_ops(struct lightrec_state *state, struct block *block)
{
struct opcode *list;
unsigned int i;
@ -661,7 +661,7 @@ static int lightrec_transform_ops(struct block *block)
return 0;
}
static int lightrec_switch_delay_slots(struct block *block)
static int lightrec_switch_delay_slots(struct lightrec_state *state, struct block *block)
{
struct opcode *list, *next = &block->opcode_list[0];
unsigned int i;
@ -751,7 +751,7 @@ static int lightrec_switch_delay_slots(struct block *block)
return 0;
}
static int shrink_opcode_list(struct block *block, u16 new_size)
static int shrink_opcode_list(struct lightrec_state *state, struct block *block, u16 new_size)
{
struct opcode *list;
@ -762,7 +762,7 @@ static int shrink_opcode_list(struct block *block, u16 new_size)
}
list = lightrec_malloc(block->state, MEM_FOR_IR,
list = lightrec_malloc(state, MEM_FOR_IR,
sizeof(*list) * new_size);
if (!list) {
pr_err("Unable to allocate memory\n");
@ -771,7 +771,7 @@ static int shrink_opcode_list(struct block *block, u16 new_size)
memcpy(list, block->opcode_list, sizeof(*list) * new_size);
lightrec_free_opcode_list(block);
lightrec_free_opcode_list(state, block);
block->opcode_list = list;
block->nb_ops = new_size;
@ -781,7 +781,8 @@ static int shrink_opcode_list(struct block *block, u16 new_size)
return 0;
}
static int lightrec_detect_impossible_branches(struct block *block)
static int lightrec_detect_impossible_branches(struct lightrec_state *state,
struct block *block)
{
struct opcode *op, *next = &block->opcode_list[0];
unsigned int i;
@ -814,7 +815,7 @@ static int lightrec_detect_impossible_branches(struct block *block)
* only keep the first two opcodes of the block (the
* branch itself + its delay slot) */
if (block->nb_ops > 2)
ret = shrink_opcode_list(block, 2);
ret = shrink_opcode_list(state, block, 2);
break;
}
}
@ -822,7 +823,7 @@ static int lightrec_detect_impossible_branches(struct block *block)
return ret;
}
static int lightrec_local_branches(struct block *block)
static int lightrec_local_branches(struct lightrec_state *state, struct block *block)
{
struct opcode *list;
unsigned int i;
@ -913,7 +914,7 @@ static void lightrec_add_unload(struct opcode *op, u8 reg)
op->flags |= LIGHTREC_UNLOAD_RT;
}
static int lightrec_early_unload(struct block *block)
static int lightrec_early_unload(struct lightrec_state *state, struct block *block)
{
unsigned int i, offset;
struct opcode *op;
@ -952,7 +953,7 @@ static int lightrec_early_unload(struct block *block)
return 0;
}
static int lightrec_flag_stores(struct block *block)
static int lightrec_flag_stores(struct lightrec_state *state, struct block *block)
{
struct opcode *list;
u32 known = BIT(0);
@ -975,7 +976,7 @@ static int lightrec_flag_stores(struct block *block)
* on the heuristic that stores using one of these
* registers as address will never hit a code page. */
if (list->i.rs >= 28 && list->i.rs <= 29 &&
!block->state->maps[PSX_MAP_KERNEL_USER_RAM].ops) {
!state->maps[PSX_MAP_KERNEL_USER_RAM].ops) {
pr_debug("Flaging opcode 0x%08x as not requiring invalidation\n",
list->opcode);
list->flags |= LIGHTREC_NO_INVALIDATE;
@ -1004,7 +1005,7 @@ static int lightrec_flag_stores(struct block *block)
static u8 get_mfhi_mflo_reg(const struct block *block, u16 offset,
const struct opcode *last,
u32 mask, bool sync, bool mflo)
u32 mask, bool sync, bool mflo, bool another)
{
const struct opcode *op, *next = &block->opcode_list[offset];
u32 old_mask;
@ -1041,9 +1042,9 @@ static u8 get_mfhi_mflo_reg(const struct block *block, u16 offset,
- !!(OPT_SWITCH_DELAY_SLOTS && (op->flags & LIGHTREC_NO_DS));
reg = get_mfhi_mflo_reg(block, branch_offset, NULL,
mask, sync, mflo);
mask, sync, mflo, false);
reg2 = get_mfhi_mflo_reg(block, offset + 1, next,
mask, sync, mflo);
mask, sync, mflo, false);
if (reg > 0 && reg == reg2)
return reg;
if (!reg && !reg2)
@ -1082,6 +1083,14 @@ static u8 get_mfhi_mflo_reg(const struct block *block, u16 offset,
return reg;
case OP_SPECIAL_MFHI:
if (!mflo) {
if (another)
return op->r.rd;
/* Must use REG_HI if there is another MFHI target*/
reg2 = get_mfhi_mflo_reg(block, i + 1, next,
0, sync, mflo, true);
if (reg2 > 0 && reg2 != REG_HI)
return REG_HI;
if (!sync && !(old_mask & BIT(op->r.rd)))
return op->r.rd;
else
@ -1090,6 +1099,14 @@ static u8 get_mfhi_mflo_reg(const struct block *block, u16 offset,
continue;
case OP_SPECIAL_MFLO:
if (mflo) {
if (another)
return op->r.rd;
/* Must use REG_LO if there is another MFLO target*/
reg2 = get_mfhi_mflo_reg(block, i + 1, next,
0, sync, mflo, true);
if (reg2 > 0 && reg2 != REG_LO)
return REG_LO;
if (!sync && !(old_mask & BIT(op->r.rd)))
return op->r.rd;
else
@ -1160,7 +1177,7 @@ static void lightrec_replace_lo_hi(struct block *block, u16 offset,
}
}
static int lightrec_flag_mults_divs(struct block *block)
static int lightrec_flag_mults_divs(struct lightrec_state *state, struct block *block)
{
struct opcode *list;
u8 reg_hi, reg_lo;
@ -1187,14 +1204,14 @@ static int lightrec_flag_mults_divs(struct block *block)
(list->flags & LIGHTREC_NO_DS))
continue;
reg_lo = get_mfhi_mflo_reg(block, i + 1, NULL, 0, false, true);
reg_lo = get_mfhi_mflo_reg(block, i + 1, NULL, 0, false, true, false);
if (reg_lo == 0) {
pr_debug("Mark MULT(U)/DIV(U) opcode at offset 0x%x as"
" not writing LO\n", i << 2);
list->flags |= LIGHTREC_NO_LO;
}
reg_hi = get_mfhi_mflo_reg(block, i + 1, NULL, 0, false, false);
reg_hi = get_mfhi_mflo_reg(block, i + 1, NULL, 0, false, false, false);
if (reg_hi == 0) {
pr_debug("Mark MULT(U)/DIV(U) opcode at offset 0x%x as"
" not writing HI\n", i << 2);
@ -1311,7 +1328,8 @@ static bool remove_div_sequence(struct block *block, unsigned int offset)
return false;
}
static int lightrec_remove_div_by_zero_check_sequence(struct block *block)
static int lightrec_remove_div_by_zero_check_sequence(struct lightrec_state *state,
struct block *block)
{
struct opcode *op;
unsigned int i;
@ -1340,7 +1358,7 @@ static const u32 memset_code[] = {
0x00000000, // nop
};
static int lightrec_replace_memset(struct block *block)
static int lightrec_replace_memset(struct lightrec_state *state, struct block *block)
{
unsigned int i;
union code c;
@ -1364,7 +1382,7 @@ static int lightrec_replace_memset(struct block *block)
return 0;
}
static int (*lightrec_optimizers[])(struct block *) = {
static int (*lightrec_optimizers[])(struct lightrec_state *state, struct block *) = {
IF_OPT(OPT_REMOVE_DIV_BY_ZERO_SEQ, &lightrec_remove_div_by_zero_check_sequence),
IF_OPT(OPT_REPLACE_MEMSET, &lightrec_replace_memset),
IF_OPT(OPT_DETECT_IMPOSSIBLE_BRANCHES, &lightrec_detect_impossible_branches),
@ -1376,14 +1394,14 @@ static int (*lightrec_optimizers[])(struct block *) = {
IF_OPT(OPT_EARLY_UNLOAD, &lightrec_early_unload),
};
int lightrec_optimize(struct block *block)
int lightrec_optimize(struct lightrec_state *state, struct block *block)
{
unsigned int i;
int ret;
for (i = 0; i < ARRAY_SIZE(lightrec_optimizers); i++) {
if (lightrec_optimizers[i]) {
ret = (*lightrec_optimizers[i])(block);
ret = (*lightrec_optimizers[i])(state, block);
if (ret)
return ret;
}

View File

@ -21,6 +21,6 @@ _Bool is_syscall(union code c);
_Bool should_emulate(const struct opcode *op);
int lightrec_optimize(struct block *block);
int lightrec_optimize(struct lightrec_state *state, struct block *block);
#endif /* __OPTIMIZER_H__ */

View File

@ -102,7 +102,7 @@ void lightrec_reaper_reap(struct reaper *reaper)
reaper_elm = container_of(elm, struct reaper_elm, slist);
(*reaper_elm->func)(reaper_elm->data);
(*reaper_elm->func)(reaper->state, reaper_elm->data);
lightrec_free(reaper->state, MEM_FOR_LIGHTREC,
sizeof(*reaper_elm), reaper_elm);

View File

@ -9,7 +9,7 @@
struct lightrec_state;
struct reaper;
typedef void (*reap_func_t)(void *);
typedef void (*reap_func_t)(struct lightrec_state *state, void *);
struct reaper *lightrec_reaper_init(struct lightrec_state *state);
void lightrec_reaper_destroy(struct reaper *reaper);

View File

@ -44,7 +44,7 @@ static void lightrec_compile_list(struct recompiler *rec)
pthread_mutex_unlock(&rec->mutex);
ret = lightrec_compile_block(block);
ret = lightrec_compile_block(rec->state, block);
if (ret) {
pr_err("Unable to compile block at PC 0x%x: %d\n",
block->pc, ret);
@ -242,7 +242,8 @@ void lightrec_recompiler_remove(struct recompiler *rec, struct block *block)
pthread_mutex_unlock(&rec->mutex);
}
void * lightrec_recompiler_run_first_pass(struct block *block, u32 *pc)
void * lightrec_recompiler_run_first_pass(struct lightrec_state *state,
struct block *block, u32 *pc)
{
bool freed;
@ -256,7 +257,7 @@ void * lightrec_recompiler_run_first_pass(struct block *block, u32 *pc)
/* The block was already compiled but the opcode list
* didn't get freed yet - do it now */
lightrec_free_opcode_list(block);
lightrec_free_opcode_list(state, block);
block->opcode_list = NULL;
}
}
@ -269,7 +270,7 @@ void * lightrec_recompiler_run_first_pass(struct block *block, u32 *pc)
freed = atomic_flag_test_and_set(&block->op_list_freed);
/* Block wasn't compiled yet - run the interpreter */
*pc = lightrec_emulate_block(block, *pc);
*pc = lightrec_emulate_block(state, block, *pc);
if (!freed)
atomic_flag_clear(&block->op_list_freed);
@ -281,7 +282,7 @@ void * lightrec_recompiler_run_first_pass(struct block *block, u32 *pc)
pr_debug("Block PC 0x%08x is fully tagged"
" - free opcode list\n", block->pc);
lightrec_free_opcode_list(block);
lightrec_free_opcode_list(state, block);
block->opcode_list = NULL;
}

View File

@ -15,6 +15,7 @@ void lightrec_free_recompiler(struct recompiler *rec);
int lightrec_recompiler_add(struct recompiler *rec, struct block *block);
void lightrec_recompiler_remove(struct recompiler *rec, struct block *block);
void * lightrec_recompiler_run_first_pass(struct block *block, u32 *pc);
void * lightrec_recompiler_run_first_pass(struct lightrec_state *state,
struct block *block, u32 *pc);
#endif /* __LIGHTREC_RECOMPILER_H__ */

View File

@ -1724,10 +1724,14 @@ int lightrec_init_mmap()
#endif
#ifdef HAVE_SHM
int memfd;
const char *shm_name = "/lightrec_memfd";
const char *shm_name = "/lightrec_memfd_beetle";
memfd = shm_open(shm_name, O_RDWR | O_CREAT | O_EXCL,
S_IRUSR | S_IWUSR);
memfd = shm_open(shm_name, O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
if (memfd < 0 && errno == EEXIST) {
shm_unlink(shm_name);
memfd = shm_open(shm_name, O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
}
if (memfd < 0) {
log_cb(RETRO_LOG_ERROR, "Failed to create SHM: %s\n", strerror(errno));

View File

@ -16,8 +16,7 @@
#define OPT_LOCAL_BRANCHES 1
#define OPT_SWITCH_DELAY_SLOTS 1
#define OPT_FLAG_STORES 1
/* Disable until ape escape bug is fixed */
#define OPT_FLAG_MULT_DIV 0
#define OPT_FLAG_MULT_DIV 1
#define OPT_EARLY_UNLOAD 1
#endif /* __LIGHTREC_CONFIG_H__ */

View File

@ -85,7 +85,7 @@ static FastFIFO<uint32, 0x20> GPU_BlitterFIFO; // 0x10 on an actual PS1 GPU, 0x2
struct CTEntry
{
void (*func[4][4])(PS_GPU* g, const uint32 *cb, bool MaskEval_TA);
void (*func[4][8])(PS_GPU* g, const uint32 *cb);
uint8_t len;
uint8_t fifo_fb_len;
bool ss_cmd;
@ -141,20 +141,24 @@ static void SetTPage(PS_GPU *gpu, const uint32_t cmdw)
/* C-style function wrappers so our command table isn't so ginormous(in memory usage). */
template<int numvertices, bool shaded, bool textured,
int BlendMode, bool TexMult, uint32 TexMode_TA>
static void G_Command_DrawPolygon(PS_GPU* g, const uint32 *cb, bool MaskEval_TA)
int BlendMode, bool TexMult, uint32 TexMode_TA, bool MaskEval_TA>
static void G_Command_DrawPolygon(PS_GPU* g, const uint32 *cb)
{
Command_DrawPolygon<numvertices, shaded, textured,
BlendMode, TexMult, TexMode_TA>(g, cb, PGXP_enabled(), MaskEval_TA);
if (PGXP_enabled())
Command_DrawPolygon<numvertices, shaded, textured,
BlendMode, TexMult, TexMode_TA, MaskEval_TA, true>(g, cb);
else
Command_DrawPolygon<numvertices, shaded, textured,
BlendMode, TexMult, TexMode_TA, MaskEval_TA, false>(g, cb);
}
static void Command_ClearCache(PS_GPU* g, const uint32 *cb, bool MaskEval_TA)
static void Command_ClearCache(PS_GPU* g, const uint32 *cb)
{
InvalidateCache(g);
}
static void Command_IRQ(PS_GPU* g, const uint32 *cb, bool MaskEval_TA)
static void Command_IRQ(PS_GPU* g, const uint32 *cb)
{
g->IRQPending = true;
IRQ_Assert(IRQ_GPU, g->IRQPending);
@ -162,7 +166,7 @@ static void Command_IRQ(PS_GPU* g, const uint32 *cb, bool MaskEval_TA)
// Special RAM write mode(16 pixels at a time),
// does *not* appear to use mask drawing environment settings.
static void Command_FBFill(PS_GPU* gpu, const uint32 *cb, bool MaskEval_TA)
static void Command_FBFill(PS_GPU* gpu, const uint32 *cb)
{
unsigned y;
int32_t r = cb[0] & 0xFF;
@ -198,7 +202,7 @@ static void Command_FBFill(PS_GPU* gpu, const uint32 *cb, bool MaskEval_TA)
rsx_intf_fill_rect(cb[0], destX, destY, width, height);
}
static void Command_FBCopy(PS_GPU* g, const uint32 *cb, bool MaskEval_TA)
static void Command_FBCopy(PS_GPU* g, const uint32 *cb)
{
unsigned y;
int32_t sourceX = (cb[1] >> 0) & 0x3FF;
@ -251,7 +255,7 @@ static void Command_FBCopy(PS_GPU* g, const uint32 *cb, bool MaskEval_TA)
rsx_intf_copy_rect(sourceX, sourceY, destX, destY, width, height, g->MaskEvalAND, g->MaskSetOR);
}
static void Command_FBWrite(PS_GPU* g, const uint32 *cb, bool MaskEval_TA)
static void Command_FBWrite(PS_GPU* g, const uint32 *cb)
{
//assert(InCmd == INCMD_NONE);
@ -280,7 +284,7 @@ static void Command_FBWrite(PS_GPU* g, const uint32 *cb, bool MaskEval_TA)
* raw_height == 0, or raw_height != 0x200 && (raw_height & 0x1FF) == 0
*/
static void Command_FBRead(PS_GPU* g, const uint32 *cb, bool MaskEval_TA)
static void Command_FBRead(PS_GPU* g, const uint32 *cb)
{
//assert(g->InCmd == INCMD_NONE);
@ -318,7 +322,7 @@ static void Command_FBRead(PS_GPU* g, const uint32 *cb, bool MaskEval_TA)
}
}
static void Command_DrawMode(PS_GPU* g, const uint32 *cb, bool MaskEval_TA)
static void Command_DrawMode(PS_GPU* g, const uint32 *cb)
{
const uint32 cmdw = *cb;
@ -337,7 +341,7 @@ static void Command_DrawMode(PS_GPU* g, const uint32 *cb, bool MaskEval_TA)
//printf("*******************DFE: %d -- scanline=%d\n", dfe, scanline);
}
static void Command_TexWindow(PS_GPU* g, const uint32 *cb, bool MaskEval_TA)
static void Command_TexWindow(PS_GPU* g, const uint32 *cb)
{
g->tww = (*cb & 0x1F);
g->twh = ((*cb >> 5) & 0x1F);
@ -348,7 +352,7 @@ static void Command_TexWindow(PS_GPU* g, const uint32 *cb, bool MaskEval_TA)
rsx_intf_set_tex_window(g->tww, g->twh, g->twx, g->twy);
}
static void Command_Clip0(PS_GPU* g, const uint32 *cb, bool MaskEval_TA)
static void Command_Clip0(PS_GPU* g, const uint32 *cb)
{
g->ClipX0 = *cb & 1023;
g->ClipY0 = (*cb >> 10) & 1023;
@ -356,7 +360,7 @@ static void Command_Clip0(PS_GPU* g, const uint32 *cb, bool MaskEval_TA)
g->ClipX1, g->ClipY1);
}
static void Command_Clip1(PS_GPU* g, const uint32 *cb, bool MaskEval_TA)
static void Command_Clip1(PS_GPU* g, const uint32 *cb)
{
g->ClipX1 = *cb & 1023;
g->ClipY1 = (*cb >> 10) & 1023;
@ -364,7 +368,7 @@ static void Command_Clip1(PS_GPU* g, const uint32 *cb, bool MaskEval_TA)
g->ClipX1, g->ClipY1);
}
static void Command_DrawingOffset(PS_GPU* g, const uint32 *cb, bool MaskEval_TA)
static void Command_DrawingOffset(PS_GPU* g, const uint32 *cb)
{
g->OffsX = sign_x_to_s32(11, (*cb & 2047));
g->OffsY = sign_x_to_s32(11, ((*cb >> 11) & 2047));
@ -372,7 +376,7 @@ static void Command_DrawingOffset(PS_GPU* g, const uint32 *cb, bool MaskEval_TA)
//fprintf(stderr, "[GPU] Drawing offset: %d(raw=%d) %d(raw=%d) -- %d\n", OffsX, *cb, OffsY, *cb >> 11, scanline);
}
static void Command_MaskSetting(PS_GPU* g, const uint32 *cb, bool MaskEval_TA)
static void Command_MaskSetting(PS_GPU* g, const uint32 *cb)
{
//printf("Mask setting: %08x\n", *cb);
g->MaskSetOR = (*cb & 1) ? 0x8000 : 0x0000;
@ -1085,15 +1089,15 @@ static void ProcessFIFO(uint32_t in_count)
}
if ((cc >= 0x80) && (cc <= 0x9F))
Command_FBCopy(&GPU, CB, GPU.MaskEvalAND);
Command_FBCopy(&GPU, CB);
else if ((cc >= 0xA0) && (cc <= 0xBF))
Command_FBWrite(&GPU, CB, GPU.MaskEvalAND);
Command_FBWrite(&GPU, CB);
else if ((cc >= 0xC0) && (cc <= 0xDF))
Command_FBRead(&GPU, CB, GPU.MaskEvalAND);
Command_FBRead(&GPU, CB);
else
{
if (command->func[GPU.abr][GPU.TexMode])
command->func[GPU.abr][GPU.TexMode](&GPU, CB, GPU.MaskEvalAND);
command->func[GPU.abr][GPU.TexMode | (GPU.MaskEvalAND ? 0x4 : 0x0)](&GPU, CB);
}
}

View File

@ -68,8 +68,8 @@ static INLINE void PlotPixelBlend(uint16_t bg_pix, uint16_t *fore_pix)
}
template<int BlendMode, bool textured>
static INLINE void PlotPixel(PS_GPU *gpu, int32_t x, int32_t y, uint16_t fore_pix, bool MaskEval_TA)
template<int BlendMode, bool MaskEval_TA, bool textured>
static INLINE void PlotPixel(PS_GPU *gpu, int32_t x, int32_t y, uint16_t fore_pix)
{
// More Y precision bits than GPU RAM installed in (non-arcade, at least) Playstation hardware.
y &= (512 << gpu->upscale_shift) - 1;
@ -91,8 +91,8 @@ static INLINE void PlotPixel(PS_GPU *gpu, int32_t x, int32_t y, uint16_t fore_pi
}
/// Copy of PlotPixel without internal upscaling, used to draw lines and sprites
template<int BlendMode, bool textured>
static INLINE void PlotNativePixel(PS_GPU *gpu, int32_t x, int32_t y, uint16_t fore_pix, bool MaskEval_TA)
template<int BlendMode, bool MaskEval_TA, bool textured>
static INLINE void PlotNativePixel(PS_GPU *gpu, int32_t x, int32_t y, uint16_t fore_pix)
{
uint16_t output;
y &= 511; // More Y precision bits than GPU RAM installed in (non-arcade, at least) Playstation hardware.
@ -251,15 +251,19 @@ static INLINE bool LineSkipTest(PS_GPU* g, unsigned y)
//#define BM_HELPER(fg) { fg(0), fg(1), fg(2), fg(3) }
#define POLY_HELPER_SUB(bm, cv, tm) \
G_Command_DrawPolygon<3 + ((cv & 0x8) >> 3), ((cv & 0x10) >> 4), ((cv & 0x4) >> 2), ((cv & 0x2) >> 1) ? bm : -1, ((cv & 1) ^ 1) & ((cv & 0x4) >> 2), tm>
#define POLY_HELPER_SUB(bm, cv, tm, mam) \
G_Command_DrawPolygon<3 + ((cv & 0x8) >> 3), ((cv & 0x10) >> 4), ((cv & 0x4) >> 2), ((cv & 0x2) >> 1) ? bm : -1, ((cv & 1) ^ 1) & ((cv & 0x4) >> 2), tm, mam >
#define POLY_HELPER_FG(bm, cv) \
{ \
POLY_HELPER_SUB(bm, cv, ((cv & 0x4) ? 0 : 0)), \
POLY_HELPER_SUB(bm, cv, ((cv & 0x4) ? 1 : 0)), \
POLY_HELPER_SUB(bm, cv, ((cv & 0x4) ? 2 : 0)), \
POLY_HELPER_SUB(bm, cv, ((cv & 0x4) ? 2 : 0)), \
#define POLY_HELPER_FG(bm, cv) \
{ \
POLY_HELPER_SUB(bm, cv, ((cv & 0x4) ? 0 : 0), 0), \
POLY_HELPER_SUB(bm, cv, ((cv & 0x4) ? 1 : 0), 0), \
POLY_HELPER_SUB(bm, cv, ((cv & 0x4) ? 2 : 0), 0), \
POLY_HELPER_SUB(bm, cv, ((cv & 0x4) ? 2 : 0), 0), \
POLY_HELPER_SUB(bm, cv, ((cv & 0x4) ? 0 : 0), 1), \
POLY_HELPER_SUB(bm, cv, ((cv & 0x4) ? 1 : 0), 1), \
POLY_HELPER_SUB(bm, cv, ((cv & 0x4) ? 2 : 0), 1), \
POLY_HELPER_SUB(bm, cv, ((cv & 0x4) ? 2 : 0), 1), \
}
#define POLY_HELPER(cv) \
@ -270,14 +274,18 @@ static INLINE bool LineSkipTest(PS_GPU* g, unsigned y)
false \
}
#define SPR_HELPER_SUB(bm, cv, tm) Command_DrawSprite<(cv >> 3) & 0x3, ((cv & 0x4) >> 2), ((cv & 0x2) >> 1) ? bm : -1, ((cv & 1) ^ 1) & ((cv & 0x4) >> 2), tm>
#define SPR_HELPER_SUB(bm, cv, tm, mam) Command_DrawSprite<(cv >> 3) & 0x3, ((cv & 0x4) >> 2), ((cv & 0x2) >> 1) ? bm : -1, ((cv & 1) ^ 1) & ((cv & 0x4) >> 2), tm, mam>
#define SPR_HELPER_FG(bm, cv) \
{ \
SPR_HELPER_SUB(bm, cv, ((cv & 0x4) ? 0 : 0)), \
SPR_HELPER_SUB(bm, cv, ((cv & 0x4) ? 1 : 0)), \
SPR_HELPER_SUB(bm, cv, ((cv & 0x4) ? 2 : 0)), \
SPR_HELPER_SUB(bm, cv, ((cv & 0x4) ? 2 : 0)), \
#define SPR_HELPER_FG(bm, cv) \
{ \
SPR_HELPER_SUB(bm, cv, ((cv & 0x4) ? 0 : 0), 0), \
SPR_HELPER_SUB(bm, cv, ((cv & 0x4) ? 1 : 0), 0), \
SPR_HELPER_SUB(bm, cv, ((cv & 0x4) ? 2 : 0), 0), \
SPR_HELPER_SUB(bm, cv, ((cv & 0x4) ? 2 : 0), 0), \
SPR_HELPER_SUB(bm, cv, ((cv & 0x4) ? 0 : 0), 1), \
SPR_HELPER_SUB(bm, cv, ((cv & 0x4) ? 1 : 0), 1), \
SPR_HELPER_SUB(bm, cv, ((cv & 0x4) ? 2 : 0), 1), \
SPR_HELPER_SUB(bm, cv, ((cv & 0x4) ? 2 : 0), 1), \
}
@ -289,14 +297,18 @@ static INLINE bool LineSkipTest(PS_GPU* g, unsigned y)
false \
}
#define LINE_HELPER_SUB(bm, cv) Command_DrawLine<((cv & 0x08) >> 3), ((cv & 0x10) >> 4), ((cv & 0x2) >> 1) ? bm : -1>
#define LINE_HELPER_SUB(bm, cv, mam) Command_DrawLine<((cv & 0x08) >> 3), ((cv & 0x10) >> 4), ((cv & 0x2) >> 1) ? bm : -1, mam>
#define LINE_HELPER_FG(bm, cv) \
{ \
LINE_HELPER_SUB(bm, cv), \
LINE_HELPER_SUB(bm, cv), \
LINE_HELPER_SUB(bm, cv), \
LINE_HELPER_SUB(bm, cv), \
LINE_HELPER_SUB(bm, cv, 0), \
LINE_HELPER_SUB(bm, cv, 0), \
LINE_HELPER_SUB(bm, cv, 0), \
LINE_HELPER_SUB(bm, cv, 0), \
LINE_HELPER_SUB(bm, cv, 1), \
LINE_HELPER_SUB(bm, cv, 1), \
LINE_HELPER_SUB(bm, cv, 1), \
LINE_HELPER_SUB(bm, cv, 1) \
}
#define LINE_HELPER(cv) \
@ -307,7 +319,7 @@ static INLINE bool LineSkipTest(PS_GPU* g, unsigned y)
false \
}
#define OTHER_HELPER_FG(bm, arg_ptr) { arg_ptr, arg_ptr, arg_ptr, arg_ptr }
#define OTHER_HELPER_FG(bm, arg_ptr) { arg_ptr, arg_ptr, arg_ptr, arg_ptr, arg_ptr, arg_ptr, arg_ptr, arg_ptr }
#define OTHER_HELPER(arg_cs, arg_fbcs, arg_ss, arg_ptr) { { OTHER_HELPER_FG(0, arg_ptr), OTHER_HELPER_FG(1, arg_ptr), OTHER_HELPER_FG(2, arg_ptr), OTHER_HELPER_FG(3, arg_ptr) }, arg_cs, arg_fbcs, arg_ss }
#define OTHER_HELPER_X2(arg_cs, arg_fbcs, arg_ss, arg_ptr) OTHER_HELPER(arg_cs, arg_fbcs, arg_ss, arg_ptr), OTHER_HELPER(arg_cs, arg_fbcs, arg_ss, arg_ptr)
#define OTHER_HELPER_X4(arg_cs, arg_fbcs, arg_ss, arg_ptr) OTHER_HELPER_X2(arg_cs, arg_fbcs, arg_ss, arg_ptr), OTHER_HELPER_X2(arg_cs, arg_fbcs, arg_ss, arg_ptr)
@ -315,5 +327,5 @@ static INLINE bool LineSkipTest(PS_GPU* g, unsigned y)
#define OTHER_HELPER_X16(arg_cs, arg_fbcs, arg_ss, arg_ptr) OTHER_HELPER_X8(arg_cs, arg_fbcs, arg_ss, arg_ptr), OTHER_HELPER_X8(arg_cs, arg_fbcs, arg_ss, arg_ptr)
#define OTHER_HELPER_X32(arg_cs, arg_fbcs, arg_ss, arg_ptr) OTHER_HELPER_X16(arg_cs, arg_fbcs, arg_ss, arg_ptr), OTHER_HELPER_X16(arg_cs, arg_fbcs, arg_ss, arg_ptr)
#define NULLCMD_FG(bm) { NULL, NULL, NULL, NULL }
#define NULLCMD_FG(bm) { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL }
#define NULLCMD() { { NULLCMD_FG(0), NULLCMD_FG(1), NULLCMD_FG(2), NULLCMD_FG(3) }, 1, 1, true }

View File

@ -101,8 +101,8 @@ static INLINE void AddLineStep(line_fxp_coord *point, const line_fxp_step *step)
}
}
template<bool gouraud, int BlendMode>
static void DrawLine(PS_GPU *gpu, line_point *points, bool MaskEval_TA)
template<bool gouraud, int BlendMode, bool MaskEval_TA>
static void DrawLine(PS_GPU *gpu, line_point *points)
{
line_fxp_coord cur_point;
line_fxp_step step;
@ -155,15 +155,15 @@ static void DrawLine(PS_GPU *gpu, line_point *points, bool MaskEval_TA)
// FIXME: There has to be a faster way than checking for being inside the drawing area for each pixel.
if(x >= gpu->ClipX0 && x <= gpu->ClipX1 && y >= gpu->ClipY0 && y <= gpu->ClipY1)
PlotNativePixel<BlendMode, false>(gpu, x, y, pix, MaskEval_TA);
PlotNativePixel<BlendMode, MaskEval_TA, false>(gpu, x, y, pix);
}
AddLineStep<gouraud>(&cur_point, &step);
}
}
template<bool polyline, bool gouraud, int BlendMode>
static void Command_DrawLine(PS_GPU *gpu, const uint32_t *cb, bool MaskEval_TA)
template<bool polyline, bool gouraud, int BlendMode, bool MaskEval_TA>
static void Command_DrawLine(PS_GPU *gpu, const uint32_t *cb)
{
line_point points[2];
const uint8_t cc = cb[0] >> 24; // For pline handling later.
@ -240,5 +240,5 @@ static void Command_DrawLine(PS_GPU *gpu, const uint32_t *cb, bool MaskEval_TA)
#endif
if (rsx_intf_has_software_renderer())
DrawLine<gouraud, BlendMode>(gpu, points, MaskEval_TA);
DrawLine<gouraud, BlendMode, MaskEval_TA>(gpu, points);
}

View File

@ -115,8 +115,8 @@ static INLINE void AddIDeltas_DY(i_group &ig, const i_deltas &idl, uint32_t coun
}
}
template<bool gouraud, bool textured, int BlendMode, bool TexMult, uint32 TexMode_TA>
static INLINE void DrawSpan(PS_GPU *gpu, int y, const int32 x_start, const int32 x_bound, i_group ig, const i_deltas &idl, bool MaskEval_TA)
template<bool gouraud, bool textured, int BlendMode, bool TexMult, uint32 TexMode_TA, bool MaskEval_TA>
static INLINE void DrawSpan(PS_GPU *gpu, int y, const int32 x_start, const int32 x_bound, i_group ig, const i_deltas &idl)
{
if(LineSkipTest(gpu, y >> gpu->upscale_shift))
return;
@ -190,7 +190,7 @@ static INLINE void DrawSpan(PS_GPU *gpu, int y, const int32 x_start, const int32
uint8_t *dither_offset = gpu->DitherLUT[dither_y][dither_x];
fbw = ModTexel(dither_offset, fbw, r, g, b);
}
PlotPixel<BlendMode, true>(gpu, x, y, fbw, MaskEval_TA);
PlotPixel<BlendMode, MaskEval_TA, true>(gpu, x, y, fbw);
}
}
else
@ -210,7 +210,7 @@ static INLINE void DrawSpan(PS_GPU *gpu, int y, const int32 x_start, const int32
pix |= (b >> 3) << 10;
}
PlotPixel<BlendMode, false>(gpu, x, y, pix, MaskEval_TA);
PlotPixel<BlendMode, MaskEval_TA, false>(gpu, x, y, pix);
}
x++;
@ -218,8 +218,8 @@ static INLINE void DrawSpan(PS_GPU *gpu, int y, const int32 x_start, const int32
} while(MDFN_LIKELY(--w > 0));
}
template<bool gouraud, bool textured, int BlendMode, bool TexMult, uint32_t TexMode_TA>
static INLINE void DrawTriangle(PS_GPU *gpu, tri_vertex *vertices, bool MaskEval_TA)
template<bool gouraud, bool textured, int BlendMode, bool TexMult, uint32_t TexMode_TA, bool MaskEval_TA>
static INLINE void DrawTriangle(PS_GPU *gpu, tri_vertex *vertices)
{
i_deltas idl;
unsigned core_vertex;
@ -452,7 +452,7 @@ if(vertices[1].y == vertices[0].y)
continue;
}
DrawSpan<gouraud, textured, BlendMode, TexMult, TexMode_TA>(gpu, yi, GetPolyXFP_Int(lc), GetPolyXFP_Int(rc), ig, idl, MaskEval_TA);
DrawSpan<gouraud, textured, BlendMode, TexMult, TexMode_TA, MaskEval_TA>(gpu, yi, GetPolyXFP_Int(lc), GetPolyXFP_Int(rc), ig, idl);
}
}
else
@ -470,7 +470,7 @@ if(vertices[1].y == vertices[0].y)
goto skipit;
}
DrawSpan<gouraud, textured, BlendMode, TexMult, TexMode_TA>(gpu, yi, GetPolyXFP_Int(lc), GetPolyXFP_Int(rc), ig, idl, MaskEval_TA);
DrawSpan<gouraud, textured, BlendMode, TexMult, TexMode_TA, MaskEval_TA>(gpu, yi, GetPolyXFP_Int(lc), GetPolyXFP_Int(rc), ig, idl);
//
//
//
@ -501,8 +501,8 @@ bool Hack_ForceLine(PS_GPU *gpu, tri_vertex* vertices, tri_vertex* outVertices);
extern int psx_pgxp_2d_tol;
template<int numvertices, bool gouraud, bool textured, int BlendMode, bool TexMult, uint32_t TexMode_TA>
static void Command_DrawPolygon(PS_GPU *gpu, const uint32_t *cb, bool pgxp, bool MaskEval_TA)
template<int numvertices, bool gouraud, bool textured, int BlendMode, bool TexMult, uint32_t TexMode_TA, bool MaskEval_TA, bool pgxp>
static void Command_DrawPolygon(PS_GPU *gpu, const uint32_t *cb)
{
tri_vertex vertices[3];
const uint32_t* baseCB = cb;
@ -884,7 +884,7 @@ static void Command_DrawPolygon(PS_GPU *gpu, const uint32_t *cb, bool pgxp, bool
}
if (rsx_intf_has_software_renderer())
DrawTriangle<gouraud, textured, BlendMode, TexMult, TexMode_TA>(gpu, vertices, MaskEval_TA);
DrawTriangle<gouraud, textured, BlendMode, TexMult, TexMode_TA, MaskEval_TA>(gpu, vertices);
// Line Render: Overwrite vertices with those of the second triangle
if ((lineFound) && (numvertices == 3) && (textured))

View File

@ -1,8 +1,8 @@
template<bool textured, int BlendMode, bool TexMult, uint32_t TexMode_TA,
bool FlipX, bool FlipY>
bool MaskEval_TA, bool FlipX, bool FlipY>
static void DrawSprite(PS_GPU *gpu, int32_t x_arg, int32_t y_arg, int32_t w, int32_t h,
uint8_t u_arg, uint8_t v_arg, uint32_t color, uint32_t clut_offset, bool MaskEval_TA)
uint8_t u_arg, uint8_t v_arg, uint32_t color, uint32_t clut_offset)
{
uint8_t u, v;
const int32_t r = color & 0xFF;
@ -100,11 +100,11 @@ static void DrawSprite(PS_GPU *gpu, int32_t x_arg, int32_t y_arg, int32_t w, int
uint8_t *dither_offset = gpu->DitherLUT[2][3];
fbw = ModTexel(dither_offset, fbw, r, g, b);
}
PlotNativePixel<BlendMode, true>(gpu, x, y, fbw, MaskEval_TA);
PlotNativePixel<BlendMode, MaskEval_TA, true>(gpu, x, y, fbw);
}
}
else
PlotNativePixel<BlendMode, false>(gpu, x, y, fill_color, MaskEval_TA);
PlotNativePixel<BlendMode, MaskEval_TA, false>(gpu, x, y, fill_color);
if(textured)
u_r += u_inc;
@ -116,8 +116,8 @@ static void DrawSprite(PS_GPU *gpu, int32_t x_arg, int32_t y_arg, int32_t w, int
}
template<uint8_t raw_size, bool textured, int BlendMode,
bool TexMult, uint32_t TexMode_TA>
static void Command_DrawSprite(PS_GPU *gpu, const uint32_t *cb, bool MaskEval_TA)
bool TexMult, uint32_t TexMode_TA, bool MaskEval_TA>
static void Command_DrawSprite(PS_GPU *gpu, const uint32_t *cb)
{
int32_t x, y;
int32_t w, h;
@ -230,6 +230,7 @@ static void Command_DrawSprite(PS_GPU *gpu, const uint32_t *cb, bool MaskEval_TA
true);
}
#endif
#if 0
printf("SPRITE: %d %d %d -- %d %d\n", raw_size, x, y, w, h);
#endif
@ -241,30 +242,30 @@ static void Command_DrawSprite(PS_GPU *gpu, const uint32_t *cb, bool MaskEval_TA
{
case 0x0000:
if(!TexMult || color == 0x808080)
DrawSprite<textured, BlendMode, false, TexMode_TA, false, false>(gpu, x, y, w, h, u, v, color, clut, MaskEval_TA);
DrawSprite<textured, BlendMode, false, TexMode_TA, MaskEval_TA, false, false>(gpu, x, y, w, h, u, v, color, clut);
else
DrawSprite<textured, BlendMode, true, TexMode_TA, false, false>(gpu, x, y, w, h, u, v, color, clut, MaskEval_TA);
DrawSprite<textured, BlendMode, true, TexMode_TA, MaskEval_TA, false, false>(gpu, x, y, w, h, u, v, color, clut);
break;
case 0x1000:
if(!TexMult || color == 0x808080)
DrawSprite<textured, BlendMode, false, TexMode_TA, true, false>(gpu, x, y, w, h, u, v, color, clut, MaskEval_TA);
DrawSprite<textured, BlendMode, false, TexMode_TA, MaskEval_TA, true, false>(gpu, x, y, w, h, u, v, color, clut);
else
DrawSprite<textured, BlendMode, true, TexMode_TA, true, false>(gpu, x, y, w, h, u, v, color, clut, MaskEval_TA);
DrawSprite<textured, BlendMode, true, TexMode_TA, MaskEval_TA, true, false>(gpu, x, y, w, h, u, v, color, clut);
break;
case 0x2000:
if(!TexMult || color == 0x808080)
DrawSprite<textured, BlendMode, false, TexMode_TA, false, true>(gpu, x, y, w, h, u, v, color, clut, MaskEval_TA);
DrawSprite<textured, BlendMode, false, TexMode_TA, MaskEval_TA, false, true>(gpu, x, y, w, h, u, v, color, clut);
else
DrawSprite<textured, BlendMode, true, TexMode_TA, false, true>(gpu, x, y, w, h, u, v, color, clut, MaskEval_TA);
DrawSprite<textured, BlendMode, true, TexMode_TA, MaskEval_TA, false, true>(gpu, x, y, w, h, u, v, color, clut);
break;
case 0x3000:
if(!TexMult || color == 0x808080)
DrawSprite<textured, BlendMode, false, TexMode_TA, true, true>(gpu, x, y, w, h, u, v, color, clut, MaskEval_TA);
DrawSprite<textured, BlendMode, false, TexMode_TA, MaskEval_TA, true, true>(gpu, x, y, w, h, u, v, color, clut);
else
DrawSprite<textured, BlendMode, true, TexMode_TA, true, true>(gpu, x, y, w, h, u, v, color, clut, MaskEval_TA);
DrawSprite<textured, BlendMode, true, TexMode_TA, MaskEval_TA, true, true>(gpu, x, y, w, h, u, v, color, clut);
break;
}
}