Merge pull request #812 from libretro/dynarec

Dynarec updates, and revert my bad gpu optimization
This commit is contained in:
Autechre 2021-08-24 11:25:56 +02:00 committed by GitHub
commit 92b93bfe11
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
25 changed files with 565 additions and 473 deletions

View File

@ -9,7 +9,7 @@ HAVE_LIGHTREC = 1
LINK_STATIC_LIBCPLUSPLUS = 1 LINK_STATIC_LIBCPLUSPLUS = 1
THREADED_RECOMPILER = 1 THREADED_RECOMPILER = 1
LIGHTREC_DEBUG = 0 LIGHTREC_DEBUG = 0
LIGHTREC_LOG_LEVEL = 2 LIGHTREC_LOG_LEVEL = 3
CORE_DIR := . CORE_DIR := .
HAVE_GRIFFIN = 0 HAVE_GRIFFIN = 0

View File

@ -162,7 +162,7 @@ ifeq ($(HAVE_LIGHTREC), 1)
FLAGS += -DHAVE_LIGHTREC FLAGS += -DHAVE_LIGHTREC
ifeq ($(LIGHTREC_LOG_LEVEL),) ifeq ($(LIGHTREC_LOG_LEVEL),)
FLAGS += -DLOG_LEVEL=2 \ FLAGS += -DLOG_LEVEL=3 \
-DENABLE_DISASSEMBLER=0 -DENABLE_DISASSEMBLER=0
else else
ifeq ($(LIGHTREC_LOG_LEVEL), 4) ifeq ($(LIGHTREC_LOG_LEVEL), 4)

View File

@ -6,7 +6,7 @@
[subrepo] [subrepo]
remote = https://github.com/pcercuei/lightrec.git remote = https://github.com/pcercuei/lightrec.git
branch = master branch = master
commit = aa2f992ed8c3236d1d952d72e3de8ea2b8d11af0 commit = 0df4ec86ba664dad3b4cc24fd3199131e8e3219f
parent = bd765e2bf9b0f1e5bd788ebd867c9b1830ece001 parent = 364a705dc70b57a734b4e362226a386b34a008fb
method = merge method = merge
cmdver = 0.4.3 cmdver = 0.4.3

View File

@ -59,7 +59,7 @@ struct block * lightrec_find_block_from_lut(struct blockcache *cache,
void remove_from_code_lut(struct blockcache *cache, struct block *block) void remove_from_code_lut(struct blockcache *cache, struct block *block)
{ {
struct lightrec_state *state = block->state; struct lightrec_state *state = cache->state;
u32 offset = lut_offset(block->pc); u32 offset = lut_offset(block->pc);
if (block->function) { if (block->function) {
@ -110,7 +110,7 @@ void lightrec_free_block_cache(struct blockcache *cache)
for (i = 0; i < LUT_SIZE; i++) { for (i = 0; i < LUT_SIZE; i++) {
for (block = cache->lut[i]; block; block = next) { for (block = cache->lut[i]; block; block = next) {
next = block->next; next = block->next;
lightrec_free_block(block); lightrec_free_block(cache->state, block);
} }
} }
@ -132,18 +132,10 @@ struct blockcache * lightrec_blockcache_init(struct lightrec_state *state)
u32 lightrec_calculate_block_hash(const struct block *block) u32 lightrec_calculate_block_hash(const struct block *block)
{ {
const struct lightrec_mem_map *map = block->map; const u32 *code = block->code;
u32 pc, hash = 0xffffffff; u32 hash = 0xffffffff;
const u32 *code;
unsigned int i; unsigned int i;
pc = kunseg(block->pc) - map->pc;
while (map->mirror_of)
map = map->mirror_of;
code = map->address + pc;
/* Jenkins one-at-a-time hash algorithm */ /* Jenkins one-at-a-time hash algorithm */
for (i = 0; i < block->nb_ops; i++) { for (i = 0; i < block->nb_ops; i++) {
hash += *code++; hash += *code++;
@ -158,9 +150,9 @@ u32 lightrec_calculate_block_hash(const struct block *block)
return hash; return hash;
} }
bool lightrec_block_is_outdated(struct block *block) bool lightrec_block_is_outdated(struct lightrec_state *state, struct block *block)
{ {
void **lut_entry = &block->state->code_lut[lut_offset(block->pc)]; void **lut_entry = &state->code_lut[lut_offset(block->pc)];
bool outdated; bool outdated;
if (*lut_entry) if (*lut_entry)
@ -173,7 +165,7 @@ bool lightrec_block_is_outdated(struct block *block)
if (block->function) if (block->function)
*lut_entry = block->function; *lut_entry = block->function;
else else
*lut_entry = block->state->get_next_block; *lut_entry = state->get_next_block;
} }
return outdated; return outdated;

View File

@ -22,6 +22,6 @@ struct blockcache * lightrec_blockcache_init(struct lightrec_state *state);
void lightrec_free_block_cache(struct blockcache *cache); void lightrec_free_block_cache(struct blockcache *cache);
u32 lightrec_calculate_block_hash(const struct block *block); u32 lightrec_calculate_block_hash(const struct block *block);
_Bool lightrec_block_is_outdated(struct block *block); _Bool lightrec_block_is_outdated(struct lightrec_state *state, struct block *block);
#endif /* __BLOCKCACHE_H__ */ #endif /* __BLOCKCACHE_H__ */

View File

@ -17,6 +17,7 @@ static const char *std_opcodes[] = {
[OP_BEQ] = "beq ", [OP_BEQ] = "beq ",
[OP_BNE] = "bne ", [OP_BNE] = "bne ",
[OP_BLEZ] = "blez ", [OP_BLEZ] = "blez ",
[OP_BGTZ] = "bgtz ",
[OP_ADDI] = "addi ", [OP_ADDI] = "addi ",
[OP_ADDIU] = "addiu ", [OP_ADDIU] = "addiu ",
[OP_SLTI] = "slti ", [OP_SLTI] = "slti ",

File diff suppressed because it is too large Load Diff

View File

@ -11,7 +11,9 @@
struct block; struct block;
struct opcode; struct opcode;
void lightrec_rec_opcode(const struct block *block, u16 offset); void lightrec_rec_opcode(struct lightrec_state *state, const struct block *block,
void lightrec_emit_eob(const struct block *block, u16 offset); u16 offset);
void lightrec_emit_eob(struct lightrec_state *state, const struct block *block,
u16 offset);
#endif /* __EMITTER_H__ */ #endif /* __EMITTER_H__ */

View File

@ -379,7 +379,7 @@ static u32 int_do_branch(struct interpreter *inter, u32 old_pc, u32 next_pc)
(inter->op->flags & LIGHTREC_LOCAL_BRANCH) && (inter->op->flags & LIGHTREC_LOCAL_BRANCH) &&
(s16)inter->op->c.i.imm >= 0) { (s16)inter->op->c.i.imm >= 0) {
next_pc = old_pc + ((1 + (s16)inter->op->c.i.imm) << 2); next_pc = old_pc + ((1 + (s16)inter->op->c.i.imm) << 2);
next_pc = lightrec_emulate_block(inter->block, next_pc); next_pc = lightrec_emulate_block(inter->state, inter->block, next_pc);
} }
return next_pc; return next_pc;
@ -823,9 +823,6 @@ static u32 int_special_DIV(struct interpreter *inter)
if (rt == 0) { if (rt == 0) {
hi = rs; hi = rs;
lo = (rs < 0) * 2 - 1; lo = (rs < 0) * 2 - 1;
} else if ((rs == 0x80000000) && (rt == 0xFFFFFFFF)) {
lo = rs;
hi = 0;
} else { } else {
lo = rs / rt; lo = rs / rt;
hi = rs % rt; hi = rs % rt;
@ -1117,13 +1114,14 @@ static u32 int_CP2(struct interpreter *inter)
return int_CP(inter); return int_CP(inter);
} }
static u32 lightrec_emulate_block_list(struct block *block, u32 offset) static u32 lightrec_emulate_block_list(struct lightrec_state *state,
struct block *block, u32 offset)
{ {
struct interpreter inter; struct interpreter inter;
u32 pc; u32 pc;
inter.block = block; inter.block = block;
inter.state = block->state; inter.state = state;
inter.offset = offset; inter.offset = offset;
inter.op = &block->opcode_list[offset]; inter.op = &block->opcode_list[offset];
inter.cycles = 0; inter.cycles = 0;
@ -1134,17 +1132,17 @@ static u32 lightrec_emulate_block_list(struct block *block, u32 offset)
/* Add the cycles of the last branch */ /* Add the cycles of the last branch */
inter.cycles += lightrec_cycles_of_opcode(inter.op->c); inter.cycles += lightrec_cycles_of_opcode(inter.op->c);
block->state->current_cycle += inter.cycles; state->current_cycle += inter.cycles;
return pc; return pc;
} }
u32 lightrec_emulate_block(struct block *block, u32 pc) u32 lightrec_emulate_block(struct lightrec_state *state, struct block *block, u32 pc)
{ {
u32 offset = (kunseg(pc) - kunseg(block->pc)) >> 2; u32 offset = (kunseg(pc) - kunseg(block->pc)) >> 2;
if (offset < block->nb_ops) if (offset < block->nb_ops)
return lightrec_emulate_block_list(block, offset); return lightrec_emulate_block_list(state, block, offset);
pr_err("PC 0x%x is outside block at PC 0x%x\n", pc, block->pc); pr_err("PC 0x%x is outside block at PC 0x%x\n", pc, block->pc);

View File

@ -10,6 +10,6 @@
struct block; struct block;
u32 lightrec_emulate_block(struct block *block, u32 pc); u32 lightrec_emulate_block(struct lightrec_state *state, struct block *block, u32 pc);
#endif /* __LIGHTREC_INTERPRETER_H__ */ #endif /* __LIGHTREC_INTERPRETER_H__ */

View File

@ -71,19 +71,18 @@ struct reaper;
struct block { struct block {
jit_state_t *_jit; jit_state_t *_jit;
struct lightrec_state *state;
struct opcode *opcode_list; struct opcode *opcode_list;
void (*function)(void); void (*function)(void);
const u32 *code;
struct block *next;
u32 pc; u32 pc;
u32 hash; u32 hash;
unsigned int code_size;
u16 nb_ops;
u8 flags;
#if ENABLE_THREADED_COMPILER #if ENABLE_THREADED_COMPILER
atomic_flag op_list_freed; atomic_flag op_list_freed;
#endif #endif
unsigned int code_size;
u16 flags;
u16 nb_ops;
const struct lightrec_mem_map *map;
struct block *next;
}; };
struct lightrec_branch { struct lightrec_branch {
@ -146,7 +145,7 @@ u32 lightrec_rw(struct lightrec_state *state, union code op,
u32 addr, u32 data, u16 *flags, u32 addr, u32 data, u16 *flags,
struct block *block); struct block *block);
void lightrec_free_block(struct block *block); void lightrec_free_block(struct lightrec_state *state, struct block *block);
void remove_from_code_lut(struct blockcache *cache, struct block *block); void remove_from_code_lut(struct blockcache *cache, struct block *block);
@ -190,8 +189,8 @@ u32 lightrec_mfc(struct lightrec_state *state, union code op);
union code lightrec_read_opcode(struct lightrec_state *state, u32 pc); union code lightrec_read_opcode(struct lightrec_state *state, u32 pc);
struct block * lightrec_get_block(struct lightrec_state *state, u32 pc); struct block * lightrec_get_block(struct lightrec_state *state, u32 pc);
int lightrec_compile_block(struct block *block); int lightrec_compile_block(struct lightrec_state *state, struct block *block);
void lightrec_free_opcode_list(struct block *block); void lightrec_free_opcode_list(struct lightrec_state *state, struct block *block);
unsigned int lightrec_cycles_of_opcode(union code code); unsigned int lightrec_cycles_of_opcode(union code code);

View File

@ -199,18 +199,34 @@ static void lightrec_invalidate_map(struct lightrec_state *state,
} }
static const struct lightrec_mem_map * static const struct lightrec_mem_map *
lightrec_get_map(struct lightrec_state *state, u32 kaddr) lightrec_get_map(struct lightrec_state *state,
void **host, u32 kaddr)
{ {
const struct lightrec_mem_map *map;
unsigned int i; unsigned int i;
u32 addr;
for (i = 0; i < state->nb_maps; i++) { for (i = 0; i < state->nb_maps; i++) {
const struct lightrec_mem_map *map = &state->maps[i]; const struct lightrec_mem_map *mapi = &state->maps[i];
if (kaddr >= map->pc && kaddr < map->pc + map->length) if (kaddr >= mapi->pc && kaddr < mapi->pc + mapi->length) {
return map; map = mapi;
break;
}
} }
return NULL; if (i == state->nb_maps)
return NULL;
addr = kaddr - map->pc;
while (map->mirror_of)
map = map->mirror_of;
if (host)
*host = map->address + addr;
return map;
} }
u32 lightrec_rw(struct lightrec_state *state, union code op, u32 lightrec_rw(struct lightrec_state *state, union code op,
@ -218,25 +234,17 @@ u32 lightrec_rw(struct lightrec_state *state, union code op,
{ {
const struct lightrec_mem_map *map; const struct lightrec_mem_map *map;
const struct lightrec_mem_map_ops *ops; const struct lightrec_mem_map_ops *ops;
u32 kaddr, pc, opcode = op.opcode; u32 opcode = op.opcode;
void *host; void *host;
addr += (s16) op.i.imm; addr += (s16) op.i.imm;
kaddr = kunseg(addr);
map = lightrec_get_map(state, kaddr); map = lightrec_get_map(state, &host, kunseg(addr));
if (!map) { if (!map) {
__segfault_cb(state, addr, block); __segfault_cb(state, addr, block);
return 0; return 0;
} }
pc = map->pc;
while (map->mirror_of)
map = map->mirror_of;
host = (void *)((uintptr_t)map->address + kaddr - pc);
if (unlikely(map->ops)) { if (unlikely(map->ops)) {
if (flags) if (flags)
*flags |= LIGHTREC_HW_IO; *flags |= LIGHTREC_HW_IO;
@ -439,7 +447,7 @@ struct block * lightrec_get_block(struct lightrec_state *state, u32 pc)
{ {
struct block *block = lightrec_find_block(state->block_cache, pc); struct block *block = lightrec_find_block(state->block_cache, pc);
if (block && lightrec_block_is_outdated(block)) { if (block && lightrec_block_is_outdated(state, block)) {
pr_debug("Block at PC 0x%08x is outdated!\n", block->pc); pr_debug("Block at PC 0x%08x is outdated!\n", block->pc);
/* Make sure the recompiler isn't processing the block we'll /* Make sure the recompiler isn't processing the block we'll
@ -449,7 +457,7 @@ struct block * lightrec_get_block(struct lightrec_state *state, u32 pc)
lightrec_unregister_block(state->block_cache, block); lightrec_unregister_block(state->block_cache, block);
remove_from_code_lut(state->block_cache, block); remove_from_code_lut(state->block_cache, block);
lightrec_free_block(block); lightrec_free_block(state, block);
block = NULL; block = NULL;
} }
@ -499,11 +507,11 @@ static void * get_next_block_func(struct lightrec_state *state, u32 pc)
if (ENABLE_THREADED_COMPILER) if (ENABLE_THREADED_COMPILER)
lightrec_recompiler_add(state->rec, block); lightrec_recompiler_add(state->rec, block);
else else
lightrec_compile_block(block); lightrec_compile_block(state, block);
} }
if (ENABLE_THREADED_COMPILER && likely(!should_recompile)) if (ENABLE_THREADED_COMPILER && likely(!should_recompile))
func = lightrec_recompiler_run_first_pass(block, &pc); func = lightrec_recompiler_run_first_pass(state, block, &pc);
else else
func = block->function; func = block->function;
@ -514,14 +522,14 @@ static void * get_next_block_func(struct lightrec_state *state, u32 pc)
if (!ENABLE_THREADED_COMPILER && if (!ENABLE_THREADED_COMPILER &&
((ENABLE_FIRST_PASS && likely(!should_recompile)) || ((ENABLE_FIRST_PASS && likely(!should_recompile)) ||
unlikely(block->flags & BLOCK_NEVER_COMPILE))) unlikely(block->flags & BLOCK_NEVER_COMPILE)))
pc = lightrec_emulate_block(block, pc); pc = lightrec_emulate_block(state, block, pc);
if (likely(!(block->flags & BLOCK_NEVER_COMPILE))) { if (likely(!(block->flags & BLOCK_NEVER_COMPILE))) {
/* Then compile it using the profiled data */ /* Then compile it using the profiled data */
if (ENABLE_THREADED_COMPILER) if (ENABLE_THREADED_COMPILER)
lightrec_recompiler_add(state->rec, block); lightrec_recompiler_add(state->rec, block);
else else
lightrec_compile_block(block); lightrec_compile_block(state, block);
} }
if (state->exit_flags != LIGHTREC_EXIT_NORMAL || if (state->exit_flags != LIGHTREC_EXIT_NORMAL ||
@ -608,7 +616,6 @@ static struct block * generate_wrapper(struct lightrec_state *state)
jit_patch_at(jit_jmpi(), to_fn_epilog); jit_patch_at(jit_jmpi(), to_fn_epilog);
jit_epilog(); jit_epilog();
block->state = state;
block->_jit = _jit; block->_jit = _jit;
block->function = jit_emit(); block->function = jit_emit();
block->opcode_list = NULL; block->opcode_list = NULL;
@ -637,25 +644,20 @@ err_no_mem:
static u32 lightrec_memset(struct lightrec_state *state) static u32 lightrec_memset(struct lightrec_state *state)
{ {
const struct lightrec_mem_map *map; u32 kunseg_pc = kunseg(state->native_reg_cache[4]);
u32 pc, kunseg_pc = kunseg(state->native_reg_cache[4]); void *host;
const struct lightrec_mem_map *map = lightrec_get_map(state, &host, kunseg_pc);
u32 length = state->native_reg_cache[5] * 4; u32 length = state->native_reg_cache[5] * 4;
map = lightrec_get_map(state, kunseg_pc);
if (!map) { if (!map) {
pr_err("Unable to find memory map for memset target address " pr_err("Unable to find memory map for memset target address "
"0x%x\n", kunseg_pc); "0x%x\n", kunseg_pc);
return 0; return 0;
} }
pc = kunseg_pc - map->pc;
while (map->mirror_of)
map = map->mirror_of;
pr_debug("Calling host memset, PC 0x%x (host address 0x%lx) for %u bytes\n", pr_debug("Calling host memset, PC 0x%x (host address 0x%lx) for %u bytes\n",
kunseg_pc, (uintptr_t)map->address + pc, length); kunseg_pc, (uintptr_t)host, length);
memset((void *)map->address + pc, 0, length); memset(host, 0, length);
if (!state->invalidate_from_dma_only) if (!state->invalidate_from_dma_only)
lightrec_invalidate_map(state, map, kunseg_pc, length); lightrec_invalidate_map(state, map, kunseg_pc, length);
@ -798,7 +800,6 @@ static struct block * generate_dispatcher(struct lightrec_state *state)
jit_retr(LIGHTREC_REG_CYCLE); jit_retr(LIGHTREC_REG_CYCLE);
jit_epilog(); jit_epilog();
block->state = state;
block->_jit = _jit; block->_jit = _jit;
block->function = jit_emit(); block->function = jit_emit();
block->opcode_list = NULL; block->opcode_list = NULL;
@ -833,17 +834,11 @@ err_no_mem:
union code lightrec_read_opcode(struct lightrec_state *state, u32 pc) union code lightrec_read_opcode(struct lightrec_state *state, u32 pc)
{ {
u32 addr, kunseg_pc = kunseg(pc); void *host;
const u32 *code;
const struct lightrec_mem_map *map = lightrec_get_map(state, kunseg_pc);
addr = kunseg_pc - map->pc; lightrec_get_map(state, &host, kunseg(pc));
while (map->mirror_of)
map = map->mirror_of;
code = map->address + addr;
const u32 *code = (u32 *)host;
return (union code) *code; return (union code) *code;
} }
@ -852,9 +847,9 @@ unsigned int lightrec_cycles_of_opcode(union code code)
return 2; return 2;
} }
void lightrec_free_opcode_list(struct block *block) void lightrec_free_opcode_list(struct lightrec_state *state, struct block *block)
{ {
lightrec_free(block->state, MEM_FOR_IR, lightrec_free(state, MEM_FOR_IR,
sizeof(*block->opcode_list) * block->nb_ops, sizeof(*block->opcode_list) * block->nb_ops,
block->opcode_list); block->opcode_list);
} }
@ -904,21 +899,14 @@ static struct block * lightrec_precompile_block(struct lightrec_state *state,
{ {
struct opcode *list; struct opcode *list;
struct block *block; struct block *block;
const u32 *code; void *host;
u32 addr, kunseg_pc = kunseg(pc); const struct lightrec_mem_map *map = lightrec_get_map(state, &host, kunseg(pc));
const struct lightrec_mem_map *map = lightrec_get_map(state, kunseg_pc); const u32 *code = (u32 *) host;
unsigned int length; unsigned int length;
if (!map) if (!map)
return NULL; return NULL;
addr = kunseg_pc - map->pc;
while (map->mirror_of)
map = map->mirror_of;
code = map->address + addr;
block = lightrec_malloc(state, MEM_FOR_IR, sizeof(*block)); block = lightrec_malloc(state, MEM_FOR_IR, sizeof(*block));
if (!block) { if (!block) {
pr_err("Unable to recompile block: Out of memory\n"); pr_err("Unable to recompile block: Out of memory\n");
@ -932,11 +920,10 @@ static struct block * lightrec_precompile_block(struct lightrec_state *state,
} }
block->pc = pc; block->pc = pc;
block->state = state;
block->_jit = NULL; block->_jit = NULL;
block->function = NULL; block->function = NULL;
block->opcode_list = list; block->opcode_list = list;
block->map = map; block->code = code;
block->next = NULL; block->next = NULL;
block->flags = 0; block->flags = 0;
block->code_size = 0; block->code_size = 0;
@ -945,7 +932,7 @@ static struct block * lightrec_precompile_block(struct lightrec_state *state,
#endif #endif
block->nb_ops = length / sizeof(u32); block->nb_ops = length / sizeof(u32);
lightrec_optimize(block); lightrec_optimize(state, block);
length = block->nb_ops * sizeof(u32); length = block->nb_ops * sizeof(u32);
@ -1010,22 +997,21 @@ static bool lightrec_block_is_fully_tagged(const struct block *block)
return true; return true;
} }
static void lightrec_reap_block(void *data) static void lightrec_reap_block(struct lightrec_state *state, void *data)
{ {
struct block *block = data; struct block *block = data;
pr_debug("Reap dead block at PC 0x%08x\n", block->pc); pr_debug("Reap dead block at PC 0x%08x\n", block->pc);
lightrec_free_block(block); lightrec_free_block(state, block);
} }
static void lightrec_reap_jit(void *data) static void lightrec_reap_jit(struct lightrec_state *state, void *data)
{ {
_jit_destroy_state(data); _jit_destroy_state(data);
} }
int lightrec_compile_block(struct block *block) int lightrec_compile_block(struct lightrec_state *state, struct block *block)
{ {
struct lightrec_state *state = block->state;
struct lightrec_branch_target *target; struct lightrec_branch_target *target;
bool op_list_freed = false, fully_tagged = false; bool op_list_freed = false, fully_tagged = false;
struct block *block2; struct block *block2;
@ -1073,10 +1059,10 @@ int lightrec_compile_block(struct block *block)
pr_debug("Branch at offset 0x%x will be emulated\n", pr_debug("Branch at offset 0x%x will be emulated\n",
i << 2); i << 2);
lightrec_emit_eob(block, i); lightrec_emit_eob(state, block, i);
skip_next = !(elm->flags & LIGHTREC_NO_DS); skip_next = !(elm->flags & LIGHTREC_NO_DS);
} else { } else {
lightrec_rec_opcode(block, i); lightrec_rec_opcode(state, block, i);
skip_next = has_delay_slot(elm->c) && skip_next = has_delay_slot(elm->c) &&
!(elm->flags & LIGHTREC_NO_DS); !(elm->flags & LIGHTREC_NO_DS);
#if _WIN32 #if _WIN32
@ -1165,7 +1151,7 @@ int lightrec_compile_block(struct block *block)
lightrec_reap_block, lightrec_reap_block,
block2); block2);
} else { } else {
lightrec_free_block(block2); lightrec_free_block(state, block2);
} }
} }
} }
@ -1189,7 +1175,7 @@ int lightrec_compile_block(struct block *block)
if (fully_tagged && !op_list_freed) { if (fully_tagged && !op_list_freed) {
pr_debug("Block PC 0x%08x is fully tagged" pr_debug("Block PC 0x%08x is fully tagged"
" - free opcode list\n", block->pc); " - free opcode list\n", block->pc);
lightrec_free_opcode_list(block); lightrec_free_opcode_list(state, block);
block->opcode_list = NULL; block->opcode_list = NULL;
} }
@ -1267,7 +1253,7 @@ u32 lightrec_run_interpreter(struct lightrec_state *state, u32 pc)
state->exit_flags = LIGHTREC_EXIT_NORMAL; state->exit_flags = LIGHTREC_EXIT_NORMAL;
pc = lightrec_emulate_block(block, pc); pc = lightrec_emulate_block(state, block, pc);
if (LOG_LEVEL >= INFO_L) if (LOG_LEVEL >= INFO_L)
lightrec_print_info(state); lightrec_print_info(state);
@ -1275,15 +1261,15 @@ u32 lightrec_run_interpreter(struct lightrec_state *state, u32 pc)
return pc; return pc;
} }
void lightrec_free_block(struct block *block) void lightrec_free_block(struct lightrec_state *state, struct block *block)
{ {
lightrec_unregister(MEM_FOR_MIPS_CODE, block->nb_ops * sizeof(u32)); lightrec_unregister(MEM_FOR_MIPS_CODE, block->nb_ops * sizeof(u32));
if (block->opcode_list) if (block->opcode_list)
lightrec_free_opcode_list(block); lightrec_free_opcode_list(state, block);
if (block->_jit) if (block->_jit)
_jit_destroy_state(block->_jit); _jit_destroy_state(block->_jit);
lightrec_unregister(MEM_FOR_CODE, block->code_size); lightrec_unregister(MEM_FOR_CODE, block->code_size);
lightrec_free(block->state, MEM_FOR_IR, sizeof(*block), block); lightrec_free(state, MEM_FOR_IR, sizeof(*block), block);
} }
struct lightrec_state * lightrec_init(char *argv0, struct lightrec_state * lightrec_init(char *argv0,
@ -1387,7 +1373,7 @@ struct lightrec_state * lightrec_init(char *argv0,
return state; return state;
err_free_dispatcher: err_free_dispatcher:
lightrec_free_block(state->dispatcher); lightrec_free_block(state, state->dispatcher);
err_free_reaper: err_free_reaper:
if (ENABLE_THREADED_COMPILER) if (ENABLE_THREADED_COMPILER)
lightrec_reaper_destroy(state->reaper); lightrec_reaper_destroy(state->reaper);
@ -1413,6 +1399,10 @@ err_finish_jit:
void lightrec_destroy(struct lightrec_state *state) void lightrec_destroy(struct lightrec_state *state)
{ {
/* Force a print info on destroy*/
state->current_cycle = ~state->current_cycle;
lightrec_print_info(state);
if (ENABLE_THREADED_COMPILER) { if (ENABLE_THREADED_COMPILER) {
lightrec_free_recompiler(state->rec); lightrec_free_recompiler(state->rec);
lightrec_reaper_destroy(state->reaper); lightrec_reaper_destroy(state->reaper);
@ -1420,8 +1410,8 @@ void lightrec_destroy(struct lightrec_state *state)
lightrec_free_regcache(state->reg_cache); lightrec_free_regcache(state->reg_cache);
lightrec_free_block_cache(state->block_cache); lightrec_free_block_cache(state->block_cache);
lightrec_free_block(state->dispatcher); lightrec_free_block(state, state->dispatcher);
lightrec_free_block(state->c_wrapper_block); lightrec_free_block(state, state->c_wrapper_block);
finish_jit(); finish_jit();
#if ENABLE_TINYMM #if ENABLE_TINYMM
@ -1435,12 +1425,9 @@ void lightrec_destroy(struct lightrec_state *state)
void lightrec_invalidate(struct lightrec_state *state, u32 addr, u32 len) void lightrec_invalidate(struct lightrec_state *state, u32 addr, u32 len)
{ {
u32 kaddr = kunseg(addr & ~0x3); u32 kaddr = kunseg(addr & ~0x3);
const struct lightrec_mem_map *map = lightrec_get_map(state, kaddr); const struct lightrec_mem_map *map = lightrec_get_map(state, NULL, kaddr);
if (map) { if (map) {
while (map->mirror_of)
map = map->mirror_of;
if (map != &state->maps[PSX_MAP_KERNEL_USER_RAM]) if (map != &state->maps[PSX_MAP_KERNEL_USER_RAM])
return; return;

View File

@ -570,7 +570,7 @@ static u32 lightrec_propagate_consts(union code c, u32 known, u32 *v)
return known; return known;
} }
static int lightrec_transform_ops(struct block *block) static int lightrec_transform_ops(struct lightrec_state *state, struct block *block)
{ {
struct opcode *list; struct opcode *list;
unsigned int i; unsigned int i;
@ -661,7 +661,7 @@ static int lightrec_transform_ops(struct block *block)
return 0; return 0;
} }
static int lightrec_switch_delay_slots(struct block *block) static int lightrec_switch_delay_slots(struct lightrec_state *state, struct block *block)
{ {
struct opcode *list, *next = &block->opcode_list[0]; struct opcode *list, *next = &block->opcode_list[0];
unsigned int i; unsigned int i;
@ -751,7 +751,7 @@ static int lightrec_switch_delay_slots(struct block *block)
return 0; return 0;
} }
static int shrink_opcode_list(struct block *block, u16 new_size) static int shrink_opcode_list(struct lightrec_state *state, struct block *block, u16 new_size)
{ {
struct opcode *list; struct opcode *list;
@ -762,7 +762,7 @@ static int shrink_opcode_list(struct block *block, u16 new_size)
} }
list = lightrec_malloc(block->state, MEM_FOR_IR, list = lightrec_malloc(state, MEM_FOR_IR,
sizeof(*list) * new_size); sizeof(*list) * new_size);
if (!list) { if (!list) {
pr_err("Unable to allocate memory\n"); pr_err("Unable to allocate memory\n");
@ -771,7 +771,7 @@ static int shrink_opcode_list(struct block *block, u16 new_size)
memcpy(list, block->opcode_list, sizeof(*list) * new_size); memcpy(list, block->opcode_list, sizeof(*list) * new_size);
lightrec_free_opcode_list(block); lightrec_free_opcode_list(state, block);
block->opcode_list = list; block->opcode_list = list;
block->nb_ops = new_size; block->nb_ops = new_size;
@ -781,7 +781,8 @@ static int shrink_opcode_list(struct block *block, u16 new_size)
return 0; return 0;
} }
static int lightrec_detect_impossible_branches(struct block *block) static int lightrec_detect_impossible_branches(struct lightrec_state *state,
struct block *block)
{ {
struct opcode *op, *next = &block->opcode_list[0]; struct opcode *op, *next = &block->opcode_list[0];
unsigned int i; unsigned int i;
@ -814,7 +815,7 @@ static int lightrec_detect_impossible_branches(struct block *block)
* only keep the first two opcodes of the block (the * only keep the first two opcodes of the block (the
* branch itself + its delay slot) */ * branch itself + its delay slot) */
if (block->nb_ops > 2) if (block->nb_ops > 2)
ret = shrink_opcode_list(block, 2); ret = shrink_opcode_list(state, block, 2);
break; break;
} }
} }
@ -822,7 +823,7 @@ static int lightrec_detect_impossible_branches(struct block *block)
return ret; return ret;
} }
static int lightrec_local_branches(struct block *block) static int lightrec_local_branches(struct lightrec_state *state, struct block *block)
{ {
struct opcode *list; struct opcode *list;
unsigned int i; unsigned int i;
@ -913,7 +914,7 @@ static void lightrec_add_unload(struct opcode *op, u8 reg)
op->flags |= LIGHTREC_UNLOAD_RT; op->flags |= LIGHTREC_UNLOAD_RT;
} }
static int lightrec_early_unload(struct block *block) static int lightrec_early_unload(struct lightrec_state *state, struct block *block)
{ {
unsigned int i, offset; unsigned int i, offset;
struct opcode *op; struct opcode *op;
@ -952,7 +953,7 @@ static int lightrec_early_unload(struct block *block)
return 0; return 0;
} }
static int lightrec_flag_stores(struct block *block) static int lightrec_flag_stores(struct lightrec_state *state, struct block *block)
{ {
struct opcode *list; struct opcode *list;
u32 known = BIT(0); u32 known = BIT(0);
@ -975,7 +976,7 @@ static int lightrec_flag_stores(struct block *block)
* on the heuristic that stores using one of these * on the heuristic that stores using one of these
* registers as address will never hit a code page. */ * registers as address will never hit a code page. */
if (list->i.rs >= 28 && list->i.rs <= 29 && if (list->i.rs >= 28 && list->i.rs <= 29 &&
!block->state->maps[PSX_MAP_KERNEL_USER_RAM].ops) { !state->maps[PSX_MAP_KERNEL_USER_RAM].ops) {
pr_debug("Flaging opcode 0x%08x as not requiring invalidation\n", pr_debug("Flaging opcode 0x%08x as not requiring invalidation\n",
list->opcode); list->opcode);
list->flags |= LIGHTREC_NO_INVALIDATE; list->flags |= LIGHTREC_NO_INVALIDATE;
@ -1004,7 +1005,7 @@ static int lightrec_flag_stores(struct block *block)
static u8 get_mfhi_mflo_reg(const struct block *block, u16 offset, static u8 get_mfhi_mflo_reg(const struct block *block, u16 offset,
const struct opcode *last, const struct opcode *last,
u32 mask, bool sync, bool mflo) u32 mask, bool sync, bool mflo, bool another)
{ {
const struct opcode *op, *next = &block->opcode_list[offset]; const struct opcode *op, *next = &block->opcode_list[offset];
u32 old_mask; u32 old_mask;
@ -1041,9 +1042,9 @@ static u8 get_mfhi_mflo_reg(const struct block *block, u16 offset,
- !!(OPT_SWITCH_DELAY_SLOTS && (op->flags & LIGHTREC_NO_DS)); - !!(OPT_SWITCH_DELAY_SLOTS && (op->flags & LIGHTREC_NO_DS));
reg = get_mfhi_mflo_reg(block, branch_offset, NULL, reg = get_mfhi_mflo_reg(block, branch_offset, NULL,
mask, sync, mflo); mask, sync, mflo, false);
reg2 = get_mfhi_mflo_reg(block, offset + 1, next, reg2 = get_mfhi_mflo_reg(block, offset + 1, next,
mask, sync, mflo); mask, sync, mflo, false);
if (reg > 0 && reg == reg2) if (reg > 0 && reg == reg2)
return reg; return reg;
if (!reg && !reg2) if (!reg && !reg2)
@ -1082,6 +1083,14 @@ static u8 get_mfhi_mflo_reg(const struct block *block, u16 offset,
return reg; return reg;
case OP_SPECIAL_MFHI: case OP_SPECIAL_MFHI:
if (!mflo) { if (!mflo) {
if (another)
return op->r.rd;
/* Must use REG_HI if there is another MFHI target*/
reg2 = get_mfhi_mflo_reg(block, i + 1, next,
0, sync, mflo, true);
if (reg2 > 0 && reg2 != REG_HI)
return REG_HI;
if (!sync && !(old_mask & BIT(op->r.rd))) if (!sync && !(old_mask & BIT(op->r.rd)))
return op->r.rd; return op->r.rd;
else else
@ -1090,6 +1099,14 @@ static u8 get_mfhi_mflo_reg(const struct block *block, u16 offset,
continue; continue;
case OP_SPECIAL_MFLO: case OP_SPECIAL_MFLO:
if (mflo) { if (mflo) {
if (another)
return op->r.rd;
/* Must use REG_LO if there is another MFLO target*/
reg2 = get_mfhi_mflo_reg(block, i + 1, next,
0, sync, mflo, true);
if (reg2 > 0 && reg2 != REG_LO)
return REG_LO;
if (!sync && !(old_mask & BIT(op->r.rd))) if (!sync && !(old_mask & BIT(op->r.rd)))
return op->r.rd; return op->r.rd;
else else
@ -1160,7 +1177,7 @@ static void lightrec_replace_lo_hi(struct block *block, u16 offset,
} }
} }
static int lightrec_flag_mults_divs(struct block *block) static int lightrec_flag_mults_divs(struct lightrec_state *state, struct block *block)
{ {
struct opcode *list; struct opcode *list;
u8 reg_hi, reg_lo; u8 reg_hi, reg_lo;
@ -1187,14 +1204,14 @@ static int lightrec_flag_mults_divs(struct block *block)
(list->flags & LIGHTREC_NO_DS)) (list->flags & LIGHTREC_NO_DS))
continue; continue;
reg_lo = get_mfhi_mflo_reg(block, i + 1, NULL, 0, false, true); reg_lo = get_mfhi_mflo_reg(block, i + 1, NULL, 0, false, true, false);
if (reg_lo == 0) { if (reg_lo == 0) {
pr_debug("Mark MULT(U)/DIV(U) opcode at offset 0x%x as" pr_debug("Mark MULT(U)/DIV(U) opcode at offset 0x%x as"
" not writing LO\n", i << 2); " not writing LO\n", i << 2);
list->flags |= LIGHTREC_NO_LO; list->flags |= LIGHTREC_NO_LO;
} }
reg_hi = get_mfhi_mflo_reg(block, i + 1, NULL, 0, false, false); reg_hi = get_mfhi_mflo_reg(block, i + 1, NULL, 0, false, false, false);
if (reg_hi == 0) { if (reg_hi == 0) {
pr_debug("Mark MULT(U)/DIV(U) opcode at offset 0x%x as" pr_debug("Mark MULT(U)/DIV(U) opcode at offset 0x%x as"
" not writing HI\n", i << 2); " not writing HI\n", i << 2);
@ -1311,7 +1328,8 @@ static bool remove_div_sequence(struct block *block, unsigned int offset)
return false; return false;
} }
static int lightrec_remove_div_by_zero_check_sequence(struct block *block) static int lightrec_remove_div_by_zero_check_sequence(struct lightrec_state *state,
struct block *block)
{ {
struct opcode *op; struct opcode *op;
unsigned int i; unsigned int i;
@ -1340,7 +1358,7 @@ static const u32 memset_code[] = {
0x00000000, // nop 0x00000000, // nop
}; };
static int lightrec_replace_memset(struct block *block) static int lightrec_replace_memset(struct lightrec_state *state, struct block *block)
{ {
unsigned int i; unsigned int i;
union code c; union code c;
@ -1364,7 +1382,7 @@ static int lightrec_replace_memset(struct block *block)
return 0; return 0;
} }
static int (*lightrec_optimizers[])(struct block *) = { static int (*lightrec_optimizers[])(struct lightrec_state *state, struct block *) = {
IF_OPT(OPT_REMOVE_DIV_BY_ZERO_SEQ, &lightrec_remove_div_by_zero_check_sequence), IF_OPT(OPT_REMOVE_DIV_BY_ZERO_SEQ, &lightrec_remove_div_by_zero_check_sequence),
IF_OPT(OPT_REPLACE_MEMSET, &lightrec_replace_memset), IF_OPT(OPT_REPLACE_MEMSET, &lightrec_replace_memset),
IF_OPT(OPT_DETECT_IMPOSSIBLE_BRANCHES, &lightrec_detect_impossible_branches), IF_OPT(OPT_DETECT_IMPOSSIBLE_BRANCHES, &lightrec_detect_impossible_branches),
@ -1376,14 +1394,14 @@ static int (*lightrec_optimizers[])(struct block *) = {
IF_OPT(OPT_EARLY_UNLOAD, &lightrec_early_unload), IF_OPT(OPT_EARLY_UNLOAD, &lightrec_early_unload),
}; };
int lightrec_optimize(struct block *block) int lightrec_optimize(struct lightrec_state *state, struct block *block)
{ {
unsigned int i; unsigned int i;
int ret; int ret;
for (i = 0; i < ARRAY_SIZE(lightrec_optimizers); i++) { for (i = 0; i < ARRAY_SIZE(lightrec_optimizers); i++) {
if (lightrec_optimizers[i]) { if (lightrec_optimizers[i]) {
ret = (*lightrec_optimizers[i])(block); ret = (*lightrec_optimizers[i])(state, block);
if (ret) if (ret)
return ret; return ret;
} }

View File

@ -21,6 +21,6 @@ _Bool is_syscall(union code c);
_Bool should_emulate(const struct opcode *op); _Bool should_emulate(const struct opcode *op);
int lightrec_optimize(struct block *block); int lightrec_optimize(struct lightrec_state *state, struct block *block);
#endif /* __OPTIMIZER_H__ */ #endif /* __OPTIMIZER_H__ */

View File

@ -102,7 +102,7 @@ void lightrec_reaper_reap(struct reaper *reaper)
reaper_elm = container_of(elm, struct reaper_elm, slist); reaper_elm = container_of(elm, struct reaper_elm, slist);
(*reaper_elm->func)(reaper_elm->data); (*reaper_elm->func)(reaper->state, reaper_elm->data);
lightrec_free(reaper->state, MEM_FOR_LIGHTREC, lightrec_free(reaper->state, MEM_FOR_LIGHTREC,
sizeof(*reaper_elm), reaper_elm); sizeof(*reaper_elm), reaper_elm);

View File

@ -9,7 +9,7 @@
struct lightrec_state; struct lightrec_state;
struct reaper; struct reaper;
typedef void (*reap_func_t)(void *); typedef void (*reap_func_t)(struct lightrec_state *state, void *);
struct reaper *lightrec_reaper_init(struct lightrec_state *state); struct reaper *lightrec_reaper_init(struct lightrec_state *state);
void lightrec_reaper_destroy(struct reaper *reaper); void lightrec_reaper_destroy(struct reaper *reaper);

View File

@ -44,7 +44,7 @@ static void lightrec_compile_list(struct recompiler *rec)
pthread_mutex_unlock(&rec->mutex); pthread_mutex_unlock(&rec->mutex);
ret = lightrec_compile_block(block); ret = lightrec_compile_block(rec->state, block);
if (ret) { if (ret) {
pr_err("Unable to compile block at PC 0x%x: %d\n", pr_err("Unable to compile block at PC 0x%x: %d\n",
block->pc, ret); block->pc, ret);
@ -242,7 +242,8 @@ void lightrec_recompiler_remove(struct recompiler *rec, struct block *block)
pthread_mutex_unlock(&rec->mutex); pthread_mutex_unlock(&rec->mutex);
} }
void * lightrec_recompiler_run_first_pass(struct block *block, u32 *pc) void * lightrec_recompiler_run_first_pass(struct lightrec_state *state,
struct block *block, u32 *pc)
{ {
bool freed; bool freed;
@ -256,7 +257,7 @@ void * lightrec_recompiler_run_first_pass(struct block *block, u32 *pc)
/* The block was already compiled but the opcode list /* The block was already compiled but the opcode list
* didn't get freed yet - do it now */ * didn't get freed yet - do it now */
lightrec_free_opcode_list(block); lightrec_free_opcode_list(state, block);
block->opcode_list = NULL; block->opcode_list = NULL;
} }
} }
@ -269,7 +270,7 @@ void * lightrec_recompiler_run_first_pass(struct block *block, u32 *pc)
freed = atomic_flag_test_and_set(&block->op_list_freed); freed = atomic_flag_test_and_set(&block->op_list_freed);
/* Block wasn't compiled yet - run the interpreter */ /* Block wasn't compiled yet - run the interpreter */
*pc = lightrec_emulate_block(block, *pc); *pc = lightrec_emulate_block(state, block, *pc);
if (!freed) if (!freed)
atomic_flag_clear(&block->op_list_freed); atomic_flag_clear(&block->op_list_freed);
@ -281,7 +282,7 @@ void * lightrec_recompiler_run_first_pass(struct block *block, u32 *pc)
pr_debug("Block PC 0x%08x is fully tagged" pr_debug("Block PC 0x%08x is fully tagged"
" - free opcode list\n", block->pc); " - free opcode list\n", block->pc);
lightrec_free_opcode_list(block); lightrec_free_opcode_list(state, block);
block->opcode_list = NULL; block->opcode_list = NULL;
} }

View File

@ -15,6 +15,7 @@ void lightrec_free_recompiler(struct recompiler *rec);
int lightrec_recompiler_add(struct recompiler *rec, struct block *block); int lightrec_recompiler_add(struct recompiler *rec, struct block *block);
void lightrec_recompiler_remove(struct recompiler *rec, struct block *block); void lightrec_recompiler_remove(struct recompiler *rec, struct block *block);
void * lightrec_recompiler_run_first_pass(struct block *block, u32 *pc); void * lightrec_recompiler_run_first_pass(struct lightrec_state *state,
struct block *block, u32 *pc);
#endif /* __LIGHTREC_RECOMPILER_H__ */ #endif /* __LIGHTREC_RECOMPILER_H__ */

View File

@ -1724,10 +1724,14 @@ int lightrec_init_mmap()
#endif #endif
#ifdef HAVE_SHM #ifdef HAVE_SHM
int memfd; int memfd;
const char *shm_name = "/lightrec_memfd"; const char *shm_name = "/lightrec_memfd_beetle";
memfd = shm_open(shm_name, O_RDWR | O_CREAT | O_EXCL, memfd = shm_open(shm_name, O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
S_IRUSR | S_IWUSR);
if (memfd < 0 && errno == EEXIST) {
shm_unlink(shm_name);
memfd = shm_open(shm_name, O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
}
if (memfd < 0) { if (memfd < 0) {
log_cb(RETRO_LOG_ERROR, "Failed to create SHM: %s\n", strerror(errno)); log_cb(RETRO_LOG_ERROR, "Failed to create SHM: %s\n", strerror(errno));

View File

@ -16,8 +16,7 @@
#define OPT_LOCAL_BRANCHES 1 #define OPT_LOCAL_BRANCHES 1
#define OPT_SWITCH_DELAY_SLOTS 1 #define OPT_SWITCH_DELAY_SLOTS 1
#define OPT_FLAG_STORES 1 #define OPT_FLAG_STORES 1
/* Disable until ape escape bug is fixed */ #define OPT_FLAG_MULT_DIV 1
#define OPT_FLAG_MULT_DIV 0
#define OPT_EARLY_UNLOAD 1 #define OPT_EARLY_UNLOAD 1
#endif /* __LIGHTREC_CONFIG_H__ */ #endif /* __LIGHTREC_CONFIG_H__ */

View File

@ -85,7 +85,7 @@ static FastFIFO<uint32, 0x20> GPU_BlitterFIFO; // 0x10 on an actual PS1 GPU, 0x2
struct CTEntry struct CTEntry
{ {
void (*func[4][4])(PS_GPU* g, const uint32 *cb, bool MaskEval_TA); void (*func[4][8])(PS_GPU* g, const uint32 *cb);
uint8_t len; uint8_t len;
uint8_t fifo_fb_len; uint8_t fifo_fb_len;
bool ss_cmd; bool ss_cmd;
@ -141,20 +141,24 @@ static void SetTPage(PS_GPU *gpu, const uint32_t cmdw)
/* C-style function wrappers so our command table isn't so ginormous(in memory usage). */ /* C-style function wrappers so our command table isn't so ginormous(in memory usage). */
template<int numvertices, bool shaded, bool textured, template<int numvertices, bool shaded, bool textured,
int BlendMode, bool TexMult, uint32 TexMode_TA> int BlendMode, bool TexMult, uint32 TexMode_TA, bool MaskEval_TA>
static void G_Command_DrawPolygon(PS_GPU* g, const uint32 *cb, bool MaskEval_TA) static void G_Command_DrawPolygon(PS_GPU* g, const uint32 *cb)
{ {
Command_DrawPolygon<numvertices, shaded, textured, if (PGXP_enabled())
BlendMode, TexMult, TexMode_TA>(g, cb, PGXP_enabled(), MaskEval_TA); Command_DrawPolygon<numvertices, shaded, textured,
BlendMode, TexMult, TexMode_TA, MaskEval_TA, true>(g, cb);
else
Command_DrawPolygon<numvertices, shaded, textured,
BlendMode, TexMult, TexMode_TA, MaskEval_TA, false>(g, cb);
} }
static void Command_ClearCache(PS_GPU* g, const uint32 *cb, bool MaskEval_TA) static void Command_ClearCache(PS_GPU* g, const uint32 *cb)
{ {
InvalidateCache(g); InvalidateCache(g);
} }
static void Command_IRQ(PS_GPU* g, const uint32 *cb, bool MaskEval_TA) static void Command_IRQ(PS_GPU* g, const uint32 *cb)
{ {
g->IRQPending = true; g->IRQPending = true;
IRQ_Assert(IRQ_GPU, g->IRQPending); IRQ_Assert(IRQ_GPU, g->IRQPending);
@ -162,7 +166,7 @@ static void Command_IRQ(PS_GPU* g, const uint32 *cb, bool MaskEval_TA)
// Special RAM write mode(16 pixels at a time), // Special RAM write mode(16 pixels at a time),
// does *not* appear to use mask drawing environment settings. // does *not* appear to use mask drawing environment settings.
static void Command_FBFill(PS_GPU* gpu, const uint32 *cb, bool MaskEval_TA) static void Command_FBFill(PS_GPU* gpu, const uint32 *cb)
{ {
unsigned y; unsigned y;
int32_t r = cb[0] & 0xFF; int32_t r = cb[0] & 0xFF;
@ -198,7 +202,7 @@ static void Command_FBFill(PS_GPU* gpu, const uint32 *cb, bool MaskEval_TA)
rsx_intf_fill_rect(cb[0], destX, destY, width, height); rsx_intf_fill_rect(cb[0], destX, destY, width, height);
} }
static void Command_FBCopy(PS_GPU* g, const uint32 *cb, bool MaskEval_TA) static void Command_FBCopy(PS_GPU* g, const uint32 *cb)
{ {
unsigned y; unsigned y;
int32_t sourceX = (cb[1] >> 0) & 0x3FF; int32_t sourceX = (cb[1] >> 0) & 0x3FF;
@ -251,7 +255,7 @@ static void Command_FBCopy(PS_GPU* g, const uint32 *cb, bool MaskEval_TA)
rsx_intf_copy_rect(sourceX, sourceY, destX, destY, width, height, g->MaskEvalAND, g->MaskSetOR); rsx_intf_copy_rect(sourceX, sourceY, destX, destY, width, height, g->MaskEvalAND, g->MaskSetOR);
} }
static void Command_FBWrite(PS_GPU* g, const uint32 *cb, bool MaskEval_TA) static void Command_FBWrite(PS_GPU* g, const uint32 *cb)
{ {
//assert(InCmd == INCMD_NONE); //assert(InCmd == INCMD_NONE);
@ -280,7 +284,7 @@ static void Command_FBWrite(PS_GPU* g, const uint32 *cb, bool MaskEval_TA)
* raw_height == 0, or raw_height != 0x200 && (raw_height & 0x1FF) == 0 * raw_height == 0, or raw_height != 0x200 && (raw_height & 0x1FF) == 0
*/ */
static void Command_FBRead(PS_GPU* g, const uint32 *cb, bool MaskEval_TA) static void Command_FBRead(PS_GPU* g, const uint32 *cb)
{ {
//assert(g->InCmd == INCMD_NONE); //assert(g->InCmd == INCMD_NONE);
@ -318,7 +322,7 @@ static void Command_FBRead(PS_GPU* g, const uint32 *cb, bool MaskEval_TA)
} }
} }
static void Command_DrawMode(PS_GPU* g, const uint32 *cb, bool MaskEval_TA) static void Command_DrawMode(PS_GPU* g, const uint32 *cb)
{ {
const uint32 cmdw = *cb; const uint32 cmdw = *cb;
@ -337,7 +341,7 @@ static void Command_DrawMode(PS_GPU* g, const uint32 *cb, bool MaskEval_TA)
//printf("*******************DFE: %d -- scanline=%d\n", dfe, scanline); //printf("*******************DFE: %d -- scanline=%d\n", dfe, scanline);
} }
static void Command_TexWindow(PS_GPU* g, const uint32 *cb, bool MaskEval_TA) static void Command_TexWindow(PS_GPU* g, const uint32 *cb)
{ {
g->tww = (*cb & 0x1F); g->tww = (*cb & 0x1F);
g->twh = ((*cb >> 5) & 0x1F); g->twh = ((*cb >> 5) & 0x1F);
@ -348,7 +352,7 @@ static void Command_TexWindow(PS_GPU* g, const uint32 *cb, bool MaskEval_TA)
rsx_intf_set_tex_window(g->tww, g->twh, g->twx, g->twy); rsx_intf_set_tex_window(g->tww, g->twh, g->twx, g->twy);
} }
static void Command_Clip0(PS_GPU* g, const uint32 *cb, bool MaskEval_TA) static void Command_Clip0(PS_GPU* g, const uint32 *cb)
{ {
g->ClipX0 = *cb & 1023; g->ClipX0 = *cb & 1023;
g->ClipY0 = (*cb >> 10) & 1023; g->ClipY0 = (*cb >> 10) & 1023;
@ -356,7 +360,7 @@ static void Command_Clip0(PS_GPU* g, const uint32 *cb, bool MaskEval_TA)
g->ClipX1, g->ClipY1); g->ClipX1, g->ClipY1);
} }
static void Command_Clip1(PS_GPU* g, const uint32 *cb, bool MaskEval_TA) static void Command_Clip1(PS_GPU* g, const uint32 *cb)
{ {
g->ClipX1 = *cb & 1023; g->ClipX1 = *cb & 1023;
g->ClipY1 = (*cb >> 10) & 1023; g->ClipY1 = (*cb >> 10) & 1023;
@ -364,7 +368,7 @@ static void Command_Clip1(PS_GPU* g, const uint32 *cb, bool MaskEval_TA)
g->ClipX1, g->ClipY1); g->ClipX1, g->ClipY1);
} }
static void Command_DrawingOffset(PS_GPU* g, const uint32 *cb, bool MaskEval_TA) static void Command_DrawingOffset(PS_GPU* g, const uint32 *cb)
{ {
g->OffsX = sign_x_to_s32(11, (*cb & 2047)); g->OffsX = sign_x_to_s32(11, (*cb & 2047));
g->OffsY = sign_x_to_s32(11, ((*cb >> 11) & 2047)); g->OffsY = sign_x_to_s32(11, ((*cb >> 11) & 2047));
@ -372,7 +376,7 @@ static void Command_DrawingOffset(PS_GPU* g, const uint32 *cb, bool MaskEval_TA)
//fprintf(stderr, "[GPU] Drawing offset: %d(raw=%d) %d(raw=%d) -- %d\n", OffsX, *cb, OffsY, *cb >> 11, scanline); //fprintf(stderr, "[GPU] Drawing offset: %d(raw=%d) %d(raw=%d) -- %d\n", OffsX, *cb, OffsY, *cb >> 11, scanline);
} }
static void Command_MaskSetting(PS_GPU* g, const uint32 *cb, bool MaskEval_TA) static void Command_MaskSetting(PS_GPU* g, const uint32 *cb)
{ {
//printf("Mask setting: %08x\n", *cb); //printf("Mask setting: %08x\n", *cb);
g->MaskSetOR = (*cb & 1) ? 0x8000 : 0x0000; g->MaskSetOR = (*cb & 1) ? 0x8000 : 0x0000;
@ -1085,15 +1089,15 @@ static void ProcessFIFO(uint32_t in_count)
} }
if ((cc >= 0x80) && (cc <= 0x9F)) if ((cc >= 0x80) && (cc <= 0x9F))
Command_FBCopy(&GPU, CB, GPU.MaskEvalAND); Command_FBCopy(&GPU, CB);
else if ((cc >= 0xA0) && (cc <= 0xBF)) else if ((cc >= 0xA0) && (cc <= 0xBF))
Command_FBWrite(&GPU, CB, GPU.MaskEvalAND); Command_FBWrite(&GPU, CB);
else if ((cc >= 0xC0) && (cc <= 0xDF)) else if ((cc >= 0xC0) && (cc <= 0xDF))
Command_FBRead(&GPU, CB, GPU.MaskEvalAND); Command_FBRead(&GPU, CB);
else else
{ {
if (command->func[GPU.abr][GPU.TexMode]) if (command->func[GPU.abr][GPU.TexMode])
command->func[GPU.abr][GPU.TexMode](&GPU, CB, GPU.MaskEvalAND); command->func[GPU.abr][GPU.TexMode | (GPU.MaskEvalAND ? 0x4 : 0x0)](&GPU, CB);
} }
} }

View File

@ -68,8 +68,8 @@ static INLINE void PlotPixelBlend(uint16_t bg_pix, uint16_t *fore_pix)
} }
template<int BlendMode, bool textured> template<int BlendMode, bool MaskEval_TA, bool textured>
static INLINE void PlotPixel(PS_GPU *gpu, int32_t x, int32_t y, uint16_t fore_pix, bool MaskEval_TA) static INLINE void PlotPixel(PS_GPU *gpu, int32_t x, int32_t y, uint16_t fore_pix)
{ {
// More Y precision bits than GPU RAM installed in (non-arcade, at least) Playstation hardware. // More Y precision bits than GPU RAM installed in (non-arcade, at least) Playstation hardware.
y &= (512 << gpu->upscale_shift) - 1; y &= (512 << gpu->upscale_shift) - 1;
@ -91,8 +91,8 @@ static INLINE void PlotPixel(PS_GPU *gpu, int32_t x, int32_t y, uint16_t fore_pi
} }
/// Copy of PlotPixel without internal upscaling, used to draw lines and sprites /// Copy of PlotPixel without internal upscaling, used to draw lines and sprites
template<int BlendMode, bool textured> template<int BlendMode, bool MaskEval_TA, bool textured>
static INLINE void PlotNativePixel(PS_GPU *gpu, int32_t x, int32_t y, uint16_t fore_pix, bool MaskEval_TA) static INLINE void PlotNativePixel(PS_GPU *gpu, int32_t x, int32_t y, uint16_t fore_pix)
{ {
uint16_t output; uint16_t output;
y &= 511; // More Y precision bits than GPU RAM installed in (non-arcade, at least) Playstation hardware. y &= 511; // More Y precision bits than GPU RAM installed in (non-arcade, at least) Playstation hardware.
@ -251,15 +251,19 @@ static INLINE bool LineSkipTest(PS_GPU* g, unsigned y)
//#define BM_HELPER(fg) { fg(0), fg(1), fg(2), fg(3) } //#define BM_HELPER(fg) { fg(0), fg(1), fg(2), fg(3) }
#define POLY_HELPER_SUB(bm, cv, tm) \ #define POLY_HELPER_SUB(bm, cv, tm, mam) \
G_Command_DrawPolygon<3 + ((cv & 0x8) >> 3), ((cv & 0x10) >> 4), ((cv & 0x4) >> 2), ((cv & 0x2) >> 1) ? bm : -1, ((cv & 1) ^ 1) & ((cv & 0x4) >> 2), tm> G_Command_DrawPolygon<3 + ((cv & 0x8) >> 3), ((cv & 0x10) >> 4), ((cv & 0x4) >> 2), ((cv & 0x2) >> 1) ? bm : -1, ((cv & 1) ^ 1) & ((cv & 0x4) >> 2), tm, mam >
#define POLY_HELPER_FG(bm, cv) \ #define POLY_HELPER_FG(bm, cv) \
{ \ { \
POLY_HELPER_SUB(bm, cv, ((cv & 0x4) ? 0 : 0)), \ POLY_HELPER_SUB(bm, cv, ((cv & 0x4) ? 0 : 0), 0), \
POLY_HELPER_SUB(bm, cv, ((cv & 0x4) ? 1 : 0)), \ POLY_HELPER_SUB(bm, cv, ((cv & 0x4) ? 1 : 0), 0), \
POLY_HELPER_SUB(bm, cv, ((cv & 0x4) ? 2 : 0)), \ POLY_HELPER_SUB(bm, cv, ((cv & 0x4) ? 2 : 0), 0), \
POLY_HELPER_SUB(bm, cv, ((cv & 0x4) ? 2 : 0)), \ POLY_HELPER_SUB(bm, cv, ((cv & 0x4) ? 2 : 0), 0), \
POLY_HELPER_SUB(bm, cv, ((cv & 0x4) ? 0 : 0), 1), \
POLY_HELPER_SUB(bm, cv, ((cv & 0x4) ? 1 : 0), 1), \
POLY_HELPER_SUB(bm, cv, ((cv & 0x4) ? 2 : 0), 1), \
POLY_HELPER_SUB(bm, cv, ((cv & 0x4) ? 2 : 0), 1), \
} }
#define POLY_HELPER(cv) \ #define POLY_HELPER(cv) \
@ -270,14 +274,18 @@ static INLINE bool LineSkipTest(PS_GPU* g, unsigned y)
false \ false \
} }
#define SPR_HELPER_SUB(bm, cv, tm) Command_DrawSprite<(cv >> 3) & 0x3, ((cv & 0x4) >> 2), ((cv & 0x2) >> 1) ? bm : -1, ((cv & 1) ^ 1) & ((cv & 0x4) >> 2), tm> #define SPR_HELPER_SUB(bm, cv, tm, mam) Command_DrawSprite<(cv >> 3) & 0x3, ((cv & 0x4) >> 2), ((cv & 0x2) >> 1) ? bm : -1, ((cv & 1) ^ 1) & ((cv & 0x4) >> 2), tm, mam>
#define SPR_HELPER_FG(bm, cv) \ #define SPR_HELPER_FG(bm, cv) \
{ \ { \
SPR_HELPER_SUB(bm, cv, ((cv & 0x4) ? 0 : 0)), \ SPR_HELPER_SUB(bm, cv, ((cv & 0x4) ? 0 : 0), 0), \
SPR_HELPER_SUB(bm, cv, ((cv & 0x4) ? 1 : 0)), \ SPR_HELPER_SUB(bm, cv, ((cv & 0x4) ? 1 : 0), 0), \
SPR_HELPER_SUB(bm, cv, ((cv & 0x4) ? 2 : 0)), \ SPR_HELPER_SUB(bm, cv, ((cv & 0x4) ? 2 : 0), 0), \
SPR_HELPER_SUB(bm, cv, ((cv & 0x4) ? 2 : 0)), \ SPR_HELPER_SUB(bm, cv, ((cv & 0x4) ? 2 : 0), 0), \
SPR_HELPER_SUB(bm, cv, ((cv & 0x4) ? 0 : 0), 1), \
SPR_HELPER_SUB(bm, cv, ((cv & 0x4) ? 1 : 0), 1), \
SPR_HELPER_SUB(bm, cv, ((cv & 0x4) ? 2 : 0), 1), \
SPR_HELPER_SUB(bm, cv, ((cv & 0x4) ? 2 : 0), 1), \
} }
@ -289,14 +297,18 @@ static INLINE bool LineSkipTest(PS_GPU* g, unsigned y)
false \ false \
} }
#define LINE_HELPER_SUB(bm, cv) Command_DrawLine<((cv & 0x08) >> 3), ((cv & 0x10) >> 4), ((cv & 0x2) >> 1) ? bm : -1> #define LINE_HELPER_SUB(bm, cv, mam) Command_DrawLine<((cv & 0x08) >> 3), ((cv & 0x10) >> 4), ((cv & 0x2) >> 1) ? bm : -1, mam>
#define LINE_HELPER_FG(bm, cv) \ #define LINE_HELPER_FG(bm, cv) \
{ \ { \
LINE_HELPER_SUB(bm, cv), \ LINE_HELPER_SUB(bm, cv, 0), \
LINE_HELPER_SUB(bm, cv), \ LINE_HELPER_SUB(bm, cv, 0), \
LINE_HELPER_SUB(bm, cv), \ LINE_HELPER_SUB(bm, cv, 0), \
LINE_HELPER_SUB(bm, cv), \ LINE_HELPER_SUB(bm, cv, 0), \
LINE_HELPER_SUB(bm, cv, 1), \
LINE_HELPER_SUB(bm, cv, 1), \
LINE_HELPER_SUB(bm, cv, 1), \
LINE_HELPER_SUB(bm, cv, 1) \
} }
#define LINE_HELPER(cv) \ #define LINE_HELPER(cv) \
@ -307,7 +319,7 @@ static INLINE bool LineSkipTest(PS_GPU* g, unsigned y)
false \ false \
} }
#define OTHER_HELPER_FG(bm, arg_ptr) { arg_ptr, arg_ptr, arg_ptr, arg_ptr } #define OTHER_HELPER_FG(bm, arg_ptr) { arg_ptr, arg_ptr, arg_ptr, arg_ptr, arg_ptr, arg_ptr, arg_ptr, arg_ptr }
#define OTHER_HELPER(arg_cs, arg_fbcs, arg_ss, arg_ptr) { { OTHER_HELPER_FG(0, arg_ptr), OTHER_HELPER_FG(1, arg_ptr), OTHER_HELPER_FG(2, arg_ptr), OTHER_HELPER_FG(3, arg_ptr) }, arg_cs, arg_fbcs, arg_ss } #define OTHER_HELPER(arg_cs, arg_fbcs, arg_ss, arg_ptr) { { OTHER_HELPER_FG(0, arg_ptr), OTHER_HELPER_FG(1, arg_ptr), OTHER_HELPER_FG(2, arg_ptr), OTHER_HELPER_FG(3, arg_ptr) }, arg_cs, arg_fbcs, arg_ss }
#define OTHER_HELPER_X2(arg_cs, arg_fbcs, arg_ss, arg_ptr) OTHER_HELPER(arg_cs, arg_fbcs, arg_ss, arg_ptr), OTHER_HELPER(arg_cs, arg_fbcs, arg_ss, arg_ptr) #define OTHER_HELPER_X2(arg_cs, arg_fbcs, arg_ss, arg_ptr) OTHER_HELPER(arg_cs, arg_fbcs, arg_ss, arg_ptr), OTHER_HELPER(arg_cs, arg_fbcs, arg_ss, arg_ptr)
#define OTHER_HELPER_X4(arg_cs, arg_fbcs, arg_ss, arg_ptr) OTHER_HELPER_X2(arg_cs, arg_fbcs, arg_ss, arg_ptr), OTHER_HELPER_X2(arg_cs, arg_fbcs, arg_ss, arg_ptr) #define OTHER_HELPER_X4(arg_cs, arg_fbcs, arg_ss, arg_ptr) OTHER_HELPER_X2(arg_cs, arg_fbcs, arg_ss, arg_ptr), OTHER_HELPER_X2(arg_cs, arg_fbcs, arg_ss, arg_ptr)
@ -315,5 +327,5 @@ static INLINE bool LineSkipTest(PS_GPU* g, unsigned y)
#define OTHER_HELPER_X16(arg_cs, arg_fbcs, arg_ss, arg_ptr) OTHER_HELPER_X8(arg_cs, arg_fbcs, arg_ss, arg_ptr), OTHER_HELPER_X8(arg_cs, arg_fbcs, arg_ss, arg_ptr) #define OTHER_HELPER_X16(arg_cs, arg_fbcs, arg_ss, arg_ptr) OTHER_HELPER_X8(arg_cs, arg_fbcs, arg_ss, arg_ptr), OTHER_HELPER_X8(arg_cs, arg_fbcs, arg_ss, arg_ptr)
#define OTHER_HELPER_X32(arg_cs, arg_fbcs, arg_ss, arg_ptr) OTHER_HELPER_X16(arg_cs, arg_fbcs, arg_ss, arg_ptr), OTHER_HELPER_X16(arg_cs, arg_fbcs, arg_ss, arg_ptr) #define OTHER_HELPER_X32(arg_cs, arg_fbcs, arg_ss, arg_ptr) OTHER_HELPER_X16(arg_cs, arg_fbcs, arg_ss, arg_ptr), OTHER_HELPER_X16(arg_cs, arg_fbcs, arg_ss, arg_ptr)
#define NULLCMD_FG(bm) { NULL, NULL, NULL, NULL } #define NULLCMD_FG(bm) { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL }
#define NULLCMD() { { NULLCMD_FG(0), NULLCMD_FG(1), NULLCMD_FG(2), NULLCMD_FG(3) }, 1, 1, true } #define NULLCMD() { { NULLCMD_FG(0), NULLCMD_FG(1), NULLCMD_FG(2), NULLCMD_FG(3) }, 1, 1, true }

View File

@ -101,8 +101,8 @@ static INLINE void AddLineStep(line_fxp_coord *point, const line_fxp_step *step)
} }
} }
template<bool gouraud, int BlendMode> template<bool gouraud, int BlendMode, bool MaskEval_TA>
static void DrawLine(PS_GPU *gpu, line_point *points, bool MaskEval_TA) static void DrawLine(PS_GPU *gpu, line_point *points)
{ {
line_fxp_coord cur_point; line_fxp_coord cur_point;
line_fxp_step step; line_fxp_step step;
@ -155,15 +155,15 @@ static void DrawLine(PS_GPU *gpu, line_point *points, bool MaskEval_TA)
// FIXME: There has to be a faster way than checking for being inside the drawing area for each pixel. // FIXME: There has to be a faster way than checking for being inside the drawing area for each pixel.
if(x >= gpu->ClipX0 && x <= gpu->ClipX1 && y >= gpu->ClipY0 && y <= gpu->ClipY1) if(x >= gpu->ClipX0 && x <= gpu->ClipX1 && y >= gpu->ClipY0 && y <= gpu->ClipY1)
PlotNativePixel<BlendMode, false>(gpu, x, y, pix, MaskEval_TA); PlotNativePixel<BlendMode, MaskEval_TA, false>(gpu, x, y, pix);
} }
AddLineStep<gouraud>(&cur_point, &step); AddLineStep<gouraud>(&cur_point, &step);
} }
} }
template<bool polyline, bool gouraud, int BlendMode> template<bool polyline, bool gouraud, int BlendMode, bool MaskEval_TA>
static void Command_DrawLine(PS_GPU *gpu, const uint32_t *cb, bool MaskEval_TA) static void Command_DrawLine(PS_GPU *gpu, const uint32_t *cb)
{ {
line_point points[2]; line_point points[2];
const uint8_t cc = cb[0] >> 24; // For pline handling later. const uint8_t cc = cb[0] >> 24; // For pline handling later.
@ -240,5 +240,5 @@ static void Command_DrawLine(PS_GPU *gpu, const uint32_t *cb, bool MaskEval_TA)
#endif #endif
if (rsx_intf_has_software_renderer()) if (rsx_intf_has_software_renderer())
DrawLine<gouraud, BlendMode>(gpu, points, MaskEval_TA); DrawLine<gouraud, BlendMode, MaskEval_TA>(gpu, points);
} }

View File

@ -115,8 +115,8 @@ static INLINE void AddIDeltas_DY(i_group &ig, const i_deltas &idl, uint32_t coun
} }
} }
template<bool gouraud, bool textured, int BlendMode, bool TexMult, uint32 TexMode_TA> template<bool gouraud, bool textured, int BlendMode, bool TexMult, uint32 TexMode_TA, bool MaskEval_TA>
static INLINE void DrawSpan(PS_GPU *gpu, int y, const int32 x_start, const int32 x_bound, i_group ig, const i_deltas &idl, bool MaskEval_TA) static INLINE void DrawSpan(PS_GPU *gpu, int y, const int32 x_start, const int32 x_bound, i_group ig, const i_deltas &idl)
{ {
if(LineSkipTest(gpu, y >> gpu->upscale_shift)) if(LineSkipTest(gpu, y >> gpu->upscale_shift))
return; return;
@ -190,7 +190,7 @@ static INLINE void DrawSpan(PS_GPU *gpu, int y, const int32 x_start, const int32
uint8_t *dither_offset = gpu->DitherLUT[dither_y][dither_x]; uint8_t *dither_offset = gpu->DitherLUT[dither_y][dither_x];
fbw = ModTexel(dither_offset, fbw, r, g, b); fbw = ModTexel(dither_offset, fbw, r, g, b);
} }
PlotPixel<BlendMode, true>(gpu, x, y, fbw, MaskEval_TA); PlotPixel<BlendMode, MaskEval_TA, true>(gpu, x, y, fbw);
} }
} }
else else
@ -210,7 +210,7 @@ static INLINE void DrawSpan(PS_GPU *gpu, int y, const int32 x_start, const int32
pix |= (b >> 3) << 10; pix |= (b >> 3) << 10;
} }
PlotPixel<BlendMode, false>(gpu, x, y, pix, MaskEval_TA); PlotPixel<BlendMode, MaskEval_TA, false>(gpu, x, y, pix);
} }
x++; x++;
@ -218,8 +218,8 @@ static INLINE void DrawSpan(PS_GPU *gpu, int y, const int32 x_start, const int32
} while(MDFN_LIKELY(--w > 0)); } while(MDFN_LIKELY(--w > 0));
} }
template<bool gouraud, bool textured, int BlendMode, bool TexMult, uint32_t TexMode_TA> template<bool gouraud, bool textured, int BlendMode, bool TexMult, uint32_t TexMode_TA, bool MaskEval_TA>
static INLINE void DrawTriangle(PS_GPU *gpu, tri_vertex *vertices, bool MaskEval_TA) static INLINE void DrawTriangle(PS_GPU *gpu, tri_vertex *vertices)
{ {
i_deltas idl; i_deltas idl;
unsigned core_vertex; unsigned core_vertex;
@ -452,7 +452,7 @@ if(vertices[1].y == vertices[0].y)
continue; continue;
} }
DrawSpan<gouraud, textured, BlendMode, TexMult, TexMode_TA>(gpu, yi, GetPolyXFP_Int(lc), GetPolyXFP_Int(rc), ig, idl, MaskEval_TA); DrawSpan<gouraud, textured, BlendMode, TexMult, TexMode_TA, MaskEval_TA>(gpu, yi, GetPolyXFP_Int(lc), GetPolyXFP_Int(rc), ig, idl);
} }
} }
else else
@ -470,7 +470,7 @@ if(vertices[1].y == vertices[0].y)
goto skipit; goto skipit;
} }
DrawSpan<gouraud, textured, BlendMode, TexMult, TexMode_TA>(gpu, yi, GetPolyXFP_Int(lc), GetPolyXFP_Int(rc), ig, idl, MaskEval_TA); DrawSpan<gouraud, textured, BlendMode, TexMult, TexMode_TA, MaskEval_TA>(gpu, yi, GetPolyXFP_Int(lc), GetPolyXFP_Int(rc), ig, idl);
// //
// //
// //
@ -501,8 +501,8 @@ bool Hack_ForceLine(PS_GPU *gpu, tri_vertex* vertices, tri_vertex* outVertices);
extern int psx_pgxp_2d_tol; extern int psx_pgxp_2d_tol;
template<int numvertices, bool gouraud, bool textured, int BlendMode, bool TexMult, uint32_t TexMode_TA> template<int numvertices, bool gouraud, bool textured, int BlendMode, bool TexMult, uint32_t TexMode_TA, bool MaskEval_TA, bool pgxp>
static void Command_DrawPolygon(PS_GPU *gpu, const uint32_t *cb, bool pgxp, bool MaskEval_TA) static void Command_DrawPolygon(PS_GPU *gpu, const uint32_t *cb)
{ {
tri_vertex vertices[3]; tri_vertex vertices[3];
const uint32_t* baseCB = cb; const uint32_t* baseCB = cb;
@ -884,7 +884,7 @@ static void Command_DrawPolygon(PS_GPU *gpu, const uint32_t *cb, bool pgxp, bool
} }
if (rsx_intf_has_software_renderer()) if (rsx_intf_has_software_renderer())
DrawTriangle<gouraud, textured, BlendMode, TexMult, TexMode_TA>(gpu, vertices, MaskEval_TA); DrawTriangle<gouraud, textured, BlendMode, TexMult, TexMode_TA, MaskEval_TA>(gpu, vertices);
// Line Render: Overwrite vertices with those of the second triangle // Line Render: Overwrite vertices with those of the second triangle
if ((lineFound) && (numvertices == 3) && (textured)) if ((lineFound) && (numvertices == 3) && (textured))

View File

@ -1,8 +1,8 @@
template<bool textured, int BlendMode, bool TexMult, uint32_t TexMode_TA, template<bool textured, int BlendMode, bool TexMult, uint32_t TexMode_TA,
bool FlipX, bool FlipY> bool MaskEval_TA, bool FlipX, bool FlipY>
static void DrawSprite(PS_GPU *gpu, int32_t x_arg, int32_t y_arg, int32_t w, int32_t h, static void DrawSprite(PS_GPU *gpu, int32_t x_arg, int32_t y_arg, int32_t w, int32_t h,
uint8_t u_arg, uint8_t v_arg, uint32_t color, uint32_t clut_offset, bool MaskEval_TA) uint8_t u_arg, uint8_t v_arg, uint32_t color, uint32_t clut_offset)
{ {
uint8_t u, v; uint8_t u, v;
const int32_t r = color & 0xFF; const int32_t r = color & 0xFF;
@ -100,11 +100,11 @@ static void DrawSprite(PS_GPU *gpu, int32_t x_arg, int32_t y_arg, int32_t w, int
uint8_t *dither_offset = gpu->DitherLUT[2][3]; uint8_t *dither_offset = gpu->DitherLUT[2][3];
fbw = ModTexel(dither_offset, fbw, r, g, b); fbw = ModTexel(dither_offset, fbw, r, g, b);
} }
PlotNativePixel<BlendMode, true>(gpu, x, y, fbw, MaskEval_TA); PlotNativePixel<BlendMode, MaskEval_TA, true>(gpu, x, y, fbw);
} }
} }
else else
PlotNativePixel<BlendMode, false>(gpu, x, y, fill_color, MaskEval_TA); PlotNativePixel<BlendMode, MaskEval_TA, false>(gpu, x, y, fill_color);
if(textured) if(textured)
u_r += u_inc; u_r += u_inc;
@ -116,8 +116,8 @@ static void DrawSprite(PS_GPU *gpu, int32_t x_arg, int32_t y_arg, int32_t w, int
} }
template<uint8_t raw_size, bool textured, int BlendMode, template<uint8_t raw_size, bool textured, int BlendMode,
bool TexMult, uint32_t TexMode_TA> bool TexMult, uint32_t TexMode_TA, bool MaskEval_TA>
static void Command_DrawSprite(PS_GPU *gpu, const uint32_t *cb, bool MaskEval_TA) static void Command_DrawSprite(PS_GPU *gpu, const uint32_t *cb)
{ {
int32_t x, y; int32_t x, y;
int32_t w, h; int32_t w, h;
@ -230,6 +230,7 @@ static void Command_DrawSprite(PS_GPU *gpu, const uint32_t *cb, bool MaskEval_TA
true); true);
} }
#endif #endif
#if 0 #if 0
printf("SPRITE: %d %d %d -- %d %d\n", raw_size, x, y, w, h); printf("SPRITE: %d %d %d -- %d %d\n", raw_size, x, y, w, h);
#endif #endif
@ -241,30 +242,30 @@ static void Command_DrawSprite(PS_GPU *gpu, const uint32_t *cb, bool MaskEval_TA
{ {
case 0x0000: case 0x0000:
if(!TexMult || color == 0x808080) if(!TexMult || color == 0x808080)
DrawSprite<textured, BlendMode, false, TexMode_TA, false, false>(gpu, x, y, w, h, u, v, color, clut, MaskEval_TA); DrawSprite<textured, BlendMode, false, TexMode_TA, MaskEval_TA, false, false>(gpu, x, y, w, h, u, v, color, clut);
else else
DrawSprite<textured, BlendMode, true, TexMode_TA, false, false>(gpu, x, y, w, h, u, v, color, clut, MaskEval_TA); DrawSprite<textured, BlendMode, true, TexMode_TA, MaskEval_TA, false, false>(gpu, x, y, w, h, u, v, color, clut);
break; break;
case 0x1000: case 0x1000:
if(!TexMult || color == 0x808080) if(!TexMult || color == 0x808080)
DrawSprite<textured, BlendMode, false, TexMode_TA, true, false>(gpu, x, y, w, h, u, v, color, clut, MaskEval_TA); DrawSprite<textured, BlendMode, false, TexMode_TA, MaskEval_TA, true, false>(gpu, x, y, w, h, u, v, color, clut);
else else
DrawSprite<textured, BlendMode, true, TexMode_TA, true, false>(gpu, x, y, w, h, u, v, color, clut, MaskEval_TA); DrawSprite<textured, BlendMode, true, TexMode_TA, MaskEval_TA, true, false>(gpu, x, y, w, h, u, v, color, clut);
break; break;
case 0x2000: case 0x2000:
if(!TexMult || color == 0x808080) if(!TexMult || color == 0x808080)
DrawSprite<textured, BlendMode, false, TexMode_TA, false, true>(gpu, x, y, w, h, u, v, color, clut, MaskEval_TA); DrawSprite<textured, BlendMode, false, TexMode_TA, MaskEval_TA, false, true>(gpu, x, y, w, h, u, v, color, clut);
else else
DrawSprite<textured, BlendMode, true, TexMode_TA, false, true>(gpu, x, y, w, h, u, v, color, clut, MaskEval_TA); DrawSprite<textured, BlendMode, true, TexMode_TA, MaskEval_TA, false, true>(gpu, x, y, w, h, u, v, color, clut);
break; break;
case 0x3000: case 0x3000:
if(!TexMult || color == 0x808080) if(!TexMult || color == 0x808080)
DrawSprite<textured, BlendMode, false, TexMode_TA, true, true>(gpu, x, y, w, h, u, v, color, clut, MaskEval_TA); DrawSprite<textured, BlendMode, false, TexMode_TA, MaskEval_TA, true, true>(gpu, x, y, w, h, u, v, color, clut);
else else
DrawSprite<textured, BlendMode, true, TexMode_TA, true, true>(gpu, x, y, w, h, u, v, color, clut, MaskEval_TA); DrawSprite<textured, BlendMode, true, TexMode_TA, MaskEval_TA, true, true>(gpu, x, y, w, h, u, v, color, clut);
break; break;
} }
} }