mirror of
https://github.com/libretro/parallel-n64.git
synced 2025-03-01 05:05:49 +00:00
More intense debugging.
This commit is contained in:
parent
353ca97fba
commit
510ef2c738
@ -93,7 +93,6 @@ static INLINE unsigned SPECIAL(uint32_t inst, uint32_t PC)
|
||||
{
|
||||
uint64_t hash = hash_imem((const uint8_t*)VR, sizeof(VR));
|
||||
fprintf(stderr, "JR (PC: %u): 0, %llu\n", temp_PC & 0xfff, hash);
|
||||
fprintf(stderr, " DMEM HASH: 0x%016llx\n", hash_imem(RSP.DMEM, 0x1000));
|
||||
}
|
||||
|
||||
return 1;
|
||||
@ -486,6 +485,10 @@ EX:
|
||||
const int e = (inst >> 21) & 0xF; /* rs & 0xF */
|
||||
|
||||
COP2_C2[opcode](vd, vs, vt, e);
|
||||
{
|
||||
uint64_t hash = hash_imem((const uint8_t*)VR, sizeof(VR));
|
||||
fprintf(stderr, "CP2 (PC: %u): 0, %llu\n", opcode, hash);
|
||||
}
|
||||
}
|
||||
else if (run_task_opcode(inst, inst >> 26))
|
||||
{
|
||||
|
@ -15,6 +15,18 @@
|
||||
|
||||
INLINE static void do_madn(short* VD, short* VS, short* VT)
|
||||
{
|
||||
unsigned i;
|
||||
for (i = 0; i < 8; i++)
|
||||
fprintf(stderr, "ACC LO[%u] = %d\n", i, VACC_L[i]);
|
||||
for (i = 0; i < 8; i++)
|
||||
fprintf(stderr, "ACC MD[%u] = %d\n", i, VACC_M[i]);
|
||||
for (i = 0; i < 8; i++)
|
||||
fprintf(stderr, "ACC HI[%u] = %d\n", i, VACC_H[i]);
|
||||
for (i = 0; i < 8; i++)
|
||||
fprintf(stderr, "VS[%u] = %d\n", i, VS[i]);
|
||||
for (i = 0; i < 8; i++)
|
||||
fprintf(stderr, "VT[%u] = %d\n", i, VT[i]);
|
||||
|
||||
#ifdef ARCH_MIN_SSE2
|
||||
__m128i acc_hi, acc_md, acc_lo;
|
||||
__m128i prod_hi, prod_lo;
|
||||
@ -76,6 +88,8 @@ INLINE static void do_madn(short* VD, short* VS, short* VT)
|
||||
vs = _mm_xor_si128(vs, acc_md); /* Stupid unsigned-clamp-ish adjustment. */
|
||||
|
||||
_mm_storeu_si128((__m128i *)VD, vs);
|
||||
for (i = 0; i < 8; i++)
|
||||
fprintf(stderr, "VD[%u] = %d\n", i, VD[i]);
|
||||
#else
|
||||
uint32_t addend[N];
|
||||
register int i;
|
||||
|
82
mupen64plus-rsp-paraLLEl/arch/x86_64/rsp/vmuln.h
Normal file
82
mupen64plus-rsp-paraLLEl/arch/x86_64/rsp/vmuln.h
Normal file
@ -0,0 +1,82 @@
|
||||
//
|
||||
// arch/x86_64/rsp/vmuln.h
|
||||
//
|
||||
// This file is subject to the terms and conditions defined in
|
||||
// 'LICENSE', which is part of this source code package.
|
||||
//
|
||||
|
||||
template <bool VMADN>
|
||||
static inline __m128i rsp_vmadn_vmudn(__m128i vs, __m128i vt,
|
||||
__m128i zero, __m128i *acc_lo, __m128i *acc_md, __m128i *acc_hi) {
|
||||
__m128i lo, hi, sign, overflow_mask;
|
||||
|
||||
if (VMADN)
|
||||
{
|
||||
for (unsigned i = 0; i < 8; i++)
|
||||
fprintf(stderr, "ACC LO[%u] = %d\n", i, reinterpret_cast<int16_t*>(acc_lo)[i]);
|
||||
for (unsigned i = 0; i < 8; i++)
|
||||
fprintf(stderr, "ACC MD[%u] = %d\n", i, reinterpret_cast<int16_t*>(acc_md)[i]);
|
||||
for (unsigned i = 0; i < 8; i++)
|
||||
fprintf(stderr, "ACC HI[%u] = %d\n", i, reinterpret_cast<int16_t*>(acc_hi)[i]);
|
||||
for (unsigned i = 0; i < 8; i++)
|
||||
fprintf(stderr, "VS[%u] = %d\n", i, reinterpret_cast<int16_t*>(&vs)[i]);
|
||||
for (unsigned i = 0; i < 8; i++)
|
||||
fprintf(stderr, "VT[%u] = %d\n", i, reinterpret_cast<int16_t*>(&vt)[i]);
|
||||
}
|
||||
|
||||
lo = _mm_mullo_epi16(vs, vt);
|
||||
hi = _mm_mulhi_epu16(vs, vt);
|
||||
|
||||
// What we're really want to do is unsigned vs * signed vt.
|
||||
// However, we have no such instructions to do so.
|
||||
//
|
||||
// There's a trick to "fix" an unsigned product, though:
|
||||
// If vt was negative, take the upper 16-bits of the product
|
||||
// and subtract vs.
|
||||
sign = _mm_srai_epi16(vt, 15);
|
||||
vs = _mm_and_si128(vs, sign);
|
||||
hi = _mm_sub_epi16(hi, vs);
|
||||
|
||||
// VMADN
|
||||
if (VMADN) {
|
||||
// Tricky part: start accumulate everything.
|
||||
// Get/keep the carry as we'll add it in later.
|
||||
overflow_mask = _mm_adds_epu16(*acc_lo, lo);
|
||||
*acc_lo = _mm_add_epi16(*acc_lo, lo);
|
||||
|
||||
overflow_mask = _mm_cmpeq_epi16(*acc_lo, overflow_mask);
|
||||
overflow_mask = _mm_cmpeq_epi16(overflow_mask, zero);
|
||||
|
||||
// This is REALLY clever. Since the product results from
|
||||
// two 16-bit components, one positive and one negative,
|
||||
// we don't have to worry about carrying the 1 (we can
|
||||
// only borrow) past 32-bits. So we can just add it here.
|
||||
hi = _mm_sub_epi16(hi, overflow_mask);
|
||||
|
||||
// Check for overflow of the upper sum.
|
||||
overflow_mask = _mm_adds_epu16(*acc_md, hi);
|
||||
*acc_md = _mm_add_epi16(*acc_md, hi);
|
||||
|
||||
overflow_mask = _mm_cmpeq_epi16(*acc_md, overflow_mask);
|
||||
overflow_mask = _mm_cmpeq_epi16(overflow_mask, zero);
|
||||
|
||||
// Finish up the accumulation of the... accumulator.
|
||||
*acc_hi = _mm_add_epi16(*acc_hi, _mm_srai_epi16(hi, 15));
|
||||
*acc_hi = _mm_sub_epi16(*acc_hi, overflow_mask);
|
||||
//return rsp_uclamp_acc(*acc_lo, *acc_md, *acc_hi, zero);
|
||||
auto ret = rsp_uclamp_acc(*acc_lo, *acc_md, *acc_hi, zero);
|
||||
for (unsigned i = 0; i < 8; i++)
|
||||
fprintf(stderr, "VD[%u] = %d\n", i, reinterpret_cast<int16_t*>(&ret)[i]);
|
||||
return ret;
|
||||
}
|
||||
|
||||
// VMUDN
|
||||
else {
|
||||
*acc_lo = lo;
|
||||
*acc_md = hi;
|
||||
*acc_hi = _mm_srai_epi16(hi, 15);
|
||||
|
||||
return lo;
|
||||
}
|
||||
}
|
||||
|
@ -18,7 +18,8 @@ void RSP_DEBUG(RSP::CPUState *rsp, const char *tag, unsigned pc, unsigned value)
|
||||
{
|
||||
uint64_t hash = hash_imem((const uint8_t*)rsp->cp2.regs, sizeof(rsp->cp2.regs));
|
||||
fprintf(stderr, "%s (PC: %u): %u, %llu\n", tag, pc, value, hash);
|
||||
fprintf(stderr, " DMEM HASH: 0x%016llx\n", hash_imem((const uint8_t*)rsp->dmem, 0x1000));
|
||||
if (value)
|
||||
fprintf(stderr, " DMEM HASH: 0x%016llx\n", hash_imem((const uint8_t*)rsp->dmem, 0x1000));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
@ -413,6 +414,10 @@ Func CPU::jit_region(uint64_t hash, unsigned pc, unsigned count)
|
||||
DISASM("RSP_RESERVED v%u, v%u, v%u[%u]\n", vd, vs, vt, e);
|
||||
//fprintf(stderr, "Unimplemented COP2 op %u.\n", op);
|
||||
}
|
||||
|
||||
#ifdef INTENSE_DEBUG
|
||||
APPEND("RSP_DEBUG(STATE, \"CP2\", %u, 0);\n", op);
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -475,7 +480,9 @@ Func CPU::jit_region(uint64_t hash, unsigned pc, unsigned count)
|
||||
set_pc_indirect(rs);
|
||||
pipe_pending_return = true;
|
||||
DISASM("jr %s\n", NAME(rs));
|
||||
#ifdef INTENSE_DEBUG
|
||||
APPEND("RSP_DEBUG(STATE, \"JR\", pipe_branch_delay * 4, 0);\n");
|
||||
#endif
|
||||
break;
|
||||
|
||||
case 015: // BREAK
|
||||
|
Loading…
x
Reference in New Issue
Block a user