More intense debugging.

This commit is contained in:
Tiny Tiger 2016-08-06 23:56:17 +02:00 committed by twinaphex
parent 353ca97fba
commit 510ef2c738
4 changed files with 108 additions and 2 deletions

View File

@ -93,7 +93,6 @@ static INLINE unsigned SPECIAL(uint32_t inst, uint32_t PC)
{
uint64_t hash = hash_imem((const uint8_t*)VR, sizeof(VR));
fprintf(stderr, "JR (PC: %u): 0, %llu\n", temp_PC & 0xfff, hash);
fprintf(stderr, " DMEM HASH: 0x%016llx\n", hash_imem(RSP.DMEM, 0x1000));
}
return 1;
@ -486,6 +485,10 @@ EX:
const int e = (inst >> 21) & 0xF; /* rs & 0xF */
COP2_C2[opcode](vd, vs, vt, e);
{
uint64_t hash = hash_imem((const uint8_t*)VR, sizeof(VR));
fprintf(stderr, "CP2 (PC: %u): 0, %llu\n", opcode, hash);
}
}
else if (run_task_opcode(inst, inst >> 26))
{

View File

@ -15,6 +15,18 @@
INLINE static void do_madn(short* VD, short* VS, short* VT)
{
unsigned i;
for (i = 0; i < 8; i++)
fprintf(stderr, "ACC LO[%u] = %d\n", i, VACC_L[i]);
for (i = 0; i < 8; i++)
fprintf(stderr, "ACC MD[%u] = %d\n", i, VACC_M[i]);
for (i = 0; i < 8; i++)
fprintf(stderr, "ACC HI[%u] = %d\n", i, VACC_H[i]);
for (i = 0; i < 8; i++)
fprintf(stderr, "VS[%u] = %d\n", i, VS[i]);
for (i = 0; i < 8; i++)
fprintf(stderr, "VT[%u] = %d\n", i, VT[i]);
#ifdef ARCH_MIN_SSE2
__m128i acc_hi, acc_md, acc_lo;
__m128i prod_hi, prod_lo;
@ -76,6 +88,8 @@ INLINE static void do_madn(short* VD, short* VS, short* VT)
vs = _mm_xor_si128(vs, acc_md); /* Stupid unsigned-clamp-ish adjustment. */
_mm_storeu_si128((__m128i *)VD, vs);
for (i = 0; i < 8; i++)
fprintf(stderr, "VD[%u] = %d\n", i, VD[i]);
#else
uint32_t addend[N];
register int i;

View File

@ -0,0 +1,82 @@
//
// arch/x86_64/rsp/vmuln.h
//
// This file is subject to the terms and conditions defined in
// 'LICENSE', which is part of this source code package.
//
template <bool VMADN>
static inline __m128i rsp_vmadn_vmudn(__m128i vs, __m128i vt,
__m128i zero, __m128i *acc_lo, __m128i *acc_md, __m128i *acc_hi) {
__m128i lo, hi, sign, overflow_mask;
if (VMADN)
{
for (unsigned i = 0; i < 8; i++)
fprintf(stderr, "ACC LO[%u] = %d\n", i, reinterpret_cast<int16_t*>(acc_lo)[i]);
for (unsigned i = 0; i < 8; i++)
fprintf(stderr, "ACC MD[%u] = %d\n", i, reinterpret_cast<int16_t*>(acc_md)[i]);
for (unsigned i = 0; i < 8; i++)
fprintf(stderr, "ACC HI[%u] = %d\n", i, reinterpret_cast<int16_t*>(acc_hi)[i]);
for (unsigned i = 0; i < 8; i++)
fprintf(stderr, "VS[%u] = %d\n", i, reinterpret_cast<int16_t*>(&vs)[i]);
for (unsigned i = 0; i < 8; i++)
fprintf(stderr, "VT[%u] = %d\n", i, reinterpret_cast<int16_t*>(&vt)[i]);
}
lo = _mm_mullo_epi16(vs, vt);
hi = _mm_mulhi_epu16(vs, vt);
// What we're really want to do is unsigned vs * signed vt.
// However, we have no such instructions to do so.
//
// There's a trick to "fix" an unsigned product, though:
// If vt was negative, take the upper 16-bits of the product
// and subtract vs.
sign = _mm_srai_epi16(vt, 15);
vs = _mm_and_si128(vs, sign);
hi = _mm_sub_epi16(hi, vs);
// VMADN
if (VMADN) {
// Tricky part: start accumulate everything.
// Get/keep the carry as we'll add it in later.
overflow_mask = _mm_adds_epu16(*acc_lo, lo);
*acc_lo = _mm_add_epi16(*acc_lo, lo);
overflow_mask = _mm_cmpeq_epi16(*acc_lo, overflow_mask);
overflow_mask = _mm_cmpeq_epi16(overflow_mask, zero);
// This is REALLY clever. Since the product results from
// two 16-bit components, one positive and one negative,
// we don't have to worry about carrying the 1 (we can
// only borrow) past 32-bits. So we can just add it here.
hi = _mm_sub_epi16(hi, overflow_mask);
// Check for overflow of the upper sum.
overflow_mask = _mm_adds_epu16(*acc_md, hi);
*acc_md = _mm_add_epi16(*acc_md, hi);
overflow_mask = _mm_cmpeq_epi16(*acc_md, overflow_mask);
overflow_mask = _mm_cmpeq_epi16(overflow_mask, zero);
// Finish up the accumulation of the... accumulator.
*acc_hi = _mm_add_epi16(*acc_hi, _mm_srai_epi16(hi, 15));
*acc_hi = _mm_sub_epi16(*acc_hi, overflow_mask);
//return rsp_uclamp_acc(*acc_lo, *acc_md, *acc_hi, zero);
auto ret = rsp_uclamp_acc(*acc_lo, *acc_md, *acc_hi, zero);
for (unsigned i = 0; i < 8; i++)
fprintf(stderr, "VD[%u] = %d\n", i, reinterpret_cast<int16_t*>(&ret)[i]);
return ret;
}
// VMUDN
else {
*acc_lo = lo;
*acc_md = hi;
*acc_hi = _mm_srai_epi16(hi, 15);
return lo;
}
}

View File

@ -18,7 +18,8 @@ void RSP_DEBUG(RSP::CPUState *rsp, const char *tag, unsigned pc, unsigned value)
{
uint64_t hash = hash_imem((const uint8_t*)rsp->cp2.regs, sizeof(rsp->cp2.regs));
fprintf(stderr, "%s (PC: %u): %u, %llu\n", tag, pc, value, hash);
fprintf(stderr, " DMEM HASH: 0x%016llx\n", hash_imem((const uint8_t*)rsp->dmem, 0x1000));
if (value)
fprintf(stderr, " DMEM HASH: 0x%016llx\n", hash_imem((const uint8_t*)rsp->dmem, 0x1000));
}
#endif
}
@ -413,6 +414,10 @@ Func CPU::jit_region(uint64_t hash, unsigned pc, unsigned count)
DISASM("RSP_RESERVED v%u, v%u, v%u[%u]\n", vd, vs, vt, e);
//fprintf(stderr, "Unimplemented COP2 op %u.\n", op);
}
#ifdef INTENSE_DEBUG
APPEND("RSP_DEBUG(STATE, \"CP2\", %u, 0);\n", op);
#endif
}
else
{
@ -475,7 +480,9 @@ Func CPU::jit_region(uint64_t hash, unsigned pc, unsigned count)
set_pc_indirect(rs);
pipe_pending_return = true;
DISASM("jr %s\n", NAME(rs));
#ifdef INTENSE_DEBUG
APPEND("RSP_DEBUG(STATE, \"JR\", pipe_branch_delay * 4, 0);\n");
#endif
break;
case 015: // BREAK