mirror of
https://github.com/mupen64plus-ae/parallel-rsp.git
synced 2025-02-21 13:20:54 +00:00
Initial commit.
This commit is contained in:
commit
4312dcecde
6
.gitignore
vendored
Normal file
6
.gitignore
vendored
Normal file
@ -0,0 +1,6 @@
|
||||
*.o
|
||||
*.bin
|
||||
*.elf
|
||||
/cmake-build-*
|
||||
/.idea
|
||||
*.iml
|
61
CMakeLists.txt
Normal file
61
CMakeLists.txt
Normal file
@ -0,0 +1,61 @@
|
||||
cmake_minimum_required(VERSION 3.5)
|
||||
set(CMAKE_CXX_STANDARD 14)
|
||||
set(CMAKE_C_STANDARD 99)
|
||||
project(parallel-rsp LANGUAGES CXX C)
|
||||
|
||||
if (CMAKE_COMPILER_IS_GNUCXX OR (${CMAKE_CXX_COMPILER_ID} MATCHES "Clang"))
|
||||
set(PARALLEL_RSP_CXX_FLAGS -Wall -Wextra -Wno-missing-field-initializers -Wno-empty-body -ffast-math -Wno-unused-parameter)
|
||||
elseif (MSVC)
|
||||
set(PARALLEL_RSP_CXX_FLAGS /D_CRT_SECURE_NO_WARNINGS /wd4267 /wd4244 /wd4309 /wd4005 /MP /DNOMINMAX)
|
||||
endif()
|
||||
|
||||
add_library(parallel-rsp STATIC
|
||||
main.cpp
|
||||
rsp/vfunctions.cpp
|
||||
rsp.cpp rsp.hpp
|
||||
debug_jit.cpp debug_jit.hpp
|
||||
rsp/ls.cpp rsp/pipeline.h
|
||||
rsp/reciprocal.cpp rsp/reciprocal.h
|
||||
rsp_1.1.h
|
||||
rsp/cp0.cpp rsp/cp2.cpp
|
||||
arch/x86_64/rsp/rsp_core.cpp
|
||||
arch/x86_64/rsp/clamp.h
|
||||
arch/x86_64/rsp/rsp.h
|
||||
arch/x86_64/rsp/rsp_impl.h
|
||||
arch/x86_64/rsp/vcr.h
|
||||
arch/x86_64/rsp/vabs.h
|
||||
arch/x86_64/rsp/vadd.h
|
||||
arch/x86_64/rsp/vaddc.h
|
||||
arch/x86_64/rsp/vand.h
|
||||
arch/x86_64/rsp/vch.h
|
||||
arch/x86_64/rsp/vcl.h
|
||||
arch/x86_64/rsp/vcr.h
|
||||
arch/x86_64/rsp/vcmp.h
|
||||
arch/x86_64/rsp/vdivh.h
|
||||
arch/x86_64/rsp/vmac.h
|
||||
arch/x86_64/rsp/vmov.h
|
||||
arch/x86_64/rsp/vmrg.h
|
||||
arch/x86_64/rsp/vmudh.h
|
||||
arch/x86_64/rsp/vmul.h
|
||||
arch/x86_64/rsp/vmull.h
|
||||
arch/x86_64/rsp/vmulh.h
|
||||
arch/x86_64/rsp/vmuln.h
|
||||
arch/x86_64/rsp/vor.h
|
||||
arch/x86_64/rsp/vrcpsq.h
|
||||
arch/x86_64/rsp/vrsq.h
|
||||
arch/x86_64/rsp/vsub.h
|
||||
arch/x86_64/rsp/vsubc.h
|
||||
arch/x86_64/rsp/vxor.h
|
||||
arch/x86_64/rsp/vmulm.h)
|
||||
|
||||
target_include_directories(parallel-rsp PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
target_include_directories(parallel-rsp PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/arch/x86_64/rsp)
|
||||
target_compile_options(parallel-rsp PRIVATE ${PARALLEL_RSP_CXX_FLAGS})
|
||||
target_compile_definitions(parallel-rsp PUBLIC DEBUG_JIT)
|
||||
target_link_libraries(parallel-rsp dl)
|
||||
|
||||
add_executable(rsp-runner main.cpp)
|
||||
target_link_libraries(rsp-runner PRIVATE parallel-rsp)
|
||||
target_compile_options(rsp-runner PRIVATE ${PARALLEL_RSP_CXX_FLAGS})
|
||||
set_target_properties(rsp-runner PROPERTIES LINK_FLAGS "-rdynamic")
|
||||
|
13
CREDITS.txt
Normal file
13
CREDITS.txt
Normal file
@ -0,0 +1,13 @@
|
||||
Written by Themaister.
|
||||
|
||||
The code is heavily reliant on MarathonMan's CEN64 RSP implementation, as well as CXD4's RSP implementation.
|
||||
|
||||
MIPS core: Rewritten from scratch
|
||||
CP0: Near copy-pasta from CEN64
|
||||
CP2: Near copy-pasta from CEN64
|
||||
LS pipe: Near copy-pasta from CXD4
|
||||
Mupen64plus glue code: Reused most of CXD4.
|
||||
Lightning jitter interface: Written from scratch
|
||||
|
||||
The plugin's focus is to support dynamic recompilation for performance,
|
||||
instead of being pure interpreters as CEN64 and CXD4's implementations are.
|
46
arch/x86_64/rsp/clamp.h
Normal file
46
arch/x86_64/rsp/clamp.h
Normal file
@ -0,0 +1,46 @@
|
||||
//
|
||||
// arch/x86_64/rsp/clamp.h
|
||||
//
|
||||
// This file is subject to the terms and conditions defined in
|
||||
// 'LICENSE', which is part of this source code package.
|
||||
//
|
||||
|
||||
static inline __m128i rsp_sclamp_acc_tomd(
|
||||
__m128i acc_md, __m128i acc_hi) {
|
||||
__m128i l = _mm_unpacklo_epi16(acc_md, acc_hi);
|
||||
__m128i h = _mm_unpackhi_epi16(acc_md, acc_hi);
|
||||
return _mm_packs_epi32(l, h);
|
||||
}
|
||||
|
||||
static inline __m128i rsp_uclamp_acc(__m128i val,
|
||||
__m128i acc_md, __m128i acc_hi, __m128i zero) {
|
||||
__m128i clamp_mask, clamped_val;
|
||||
__m128i hi_sign_check, md_sign_check;
|
||||
__m128i md_negative, hi_negative;
|
||||
__m128i tmp;
|
||||
|
||||
hi_negative = _mm_srai_epi16(acc_hi, 15);
|
||||
md_negative = _mm_srai_epi16(acc_md, 15);
|
||||
|
||||
// We don't have to clamp if the HI part of the
|
||||
// accumulator is sign-extended down to the MD part.
|
||||
hi_sign_check = _mm_cmpeq_epi16(hi_negative, acc_hi);
|
||||
md_sign_check = _mm_cmpeq_epi16(hi_negative, md_negative);
|
||||
clamp_mask = _mm_and_si128(md_sign_check, hi_sign_check);
|
||||
|
||||
// Generate the value in the event we need to clamp.
|
||||
// * hi_negative, mid_sign => xxxx
|
||||
// * hi_negative, !mid_sign => 0000
|
||||
// * !hi_negative, mid_sign => FFFF
|
||||
// * !hi_negative, !mid_sign => xxxx
|
||||
clamped_val = _mm_cmpeq_epi16(hi_negative, zero);
|
||||
|
||||
#ifndef __SSE4_1__
|
||||
tmp = _mm_and_si128(clamp_mask, val);
|
||||
val = _mm_andnot_si128(clamp_mask, clamped_val);
|
||||
return _mm_or_si128(val, tmp);
|
||||
#else
|
||||
return _mm_blendv_epi8(clamped_val, val, clamp_mask);
|
||||
#endif
|
||||
}
|
||||
|
147
arch/x86_64/rsp/rsp.h
Normal file
147
arch/x86_64/rsp/rsp.h
Normal file
@ -0,0 +1,147 @@
|
||||
//
|
||||
// arch/x86_64/rsp/rsp.h
|
||||
//
|
||||
// Extern declarations for host RSP functions.
|
||||
//
|
||||
// This file is subject to the terms and conditions defined in
|
||||
// 'LICENSE', which is part of this source code package.
|
||||
//
|
||||
|
||||
#ifndef __arch_rsp_h__
|
||||
#define __arch_rsp_h__
|
||||
|
||||
#ifdef __SSE4_2__
|
||||
#include <nmmintrin.h>
|
||||
#elif defined(__SSE4_1__)
|
||||
#include <smmintrin.h>
|
||||
#elif defined(__SSSE3__)
|
||||
#include <tmmintrin.h>
|
||||
#elif defined(__SSE3__)
|
||||
#include <pmmintrin.h>
|
||||
#else
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
typedef __m128i rsp_vect_t;
|
||||
|
||||
namespace RSP
|
||||
{
|
||||
struct CPUState;
|
||||
}
|
||||
|
||||
// Loads and shuffles a 16x8 vector according to element.
|
||||
#ifdef __SSSE3__
|
||||
extern const uint16_t shuffle_keys[16][8];
|
||||
|
||||
static inline __m128i rsp_vect_load_and_shuffle_operand(
|
||||
const uint16_t *src, unsigned element) {
|
||||
__m128i operand = _mm_load_si128((__m128i*) src);
|
||||
__m128i key = _mm_load_si128((__m128i*) shuffle_keys[element]);
|
||||
|
||||
return _mm_shuffle_epi8(operand, key);
|
||||
}
|
||||
#else
|
||||
__m128i rsp_vect_load_and_shuffle_operand(
|
||||
const uint16_t *src, unsigned element);
|
||||
#endif
|
||||
|
||||
// Loads a vector without shuffling its elements.
|
||||
static inline __m128i rsp_vect_load_unshuffled_operand(const uint16_t *src) {
|
||||
return _mm_load_si128((__m128i *) src);
|
||||
}
|
||||
|
||||
// Writes an operand back to memory.
|
||||
static inline void rsp_vect_write_operand(uint16_t *dest, __m128i src) {
|
||||
_mm_store_si128((__m128i*) dest, src);
|
||||
}
|
||||
|
||||
static inline __m128i read_acc_lo(const uint16_t *acc) {
|
||||
return rsp_vect_load_unshuffled_operand(acc + 16);
|
||||
}
|
||||
static inline __m128i read_acc_md(const uint16_t *acc) {
|
||||
return rsp_vect_load_unshuffled_operand(acc + 8);
|
||||
}
|
||||
static inline __m128i read_acc_hi(const uint16_t *acc) {
|
||||
return rsp_vect_load_unshuffled_operand(acc);
|
||||
}
|
||||
static inline __m128i read_vcc_lo(const uint16_t *vcc) {
|
||||
return rsp_vect_load_unshuffled_operand(vcc + 8);
|
||||
}
|
||||
static inline __m128i read_vcc_hi(const uint16_t *vcc) {
|
||||
return rsp_vect_load_unshuffled_operand(vcc);
|
||||
}
|
||||
static inline __m128i read_vco_lo(const uint16_t *vco) {
|
||||
return rsp_vect_load_unshuffled_operand(vco + 8);
|
||||
}
|
||||
static inline __m128i read_vco_hi(const uint16_t *vco) {
|
||||
return rsp_vect_load_unshuffled_operand(vco);
|
||||
}
|
||||
static inline __m128i read_vce(const uint16_t *vce) {
|
||||
return rsp_vect_load_unshuffled_operand(vce + 8);
|
||||
}
|
||||
static inline void write_acc_lo(uint16_t *acc, __m128i acc_lo) {
|
||||
rsp_vect_write_operand(acc + 16, acc_lo);
|
||||
}
|
||||
static inline void write_acc_md(uint16_t *acc, __m128i acc_md) {
|
||||
rsp_vect_write_operand(acc + 8, acc_md);
|
||||
}
|
||||
static inline void write_acc_hi(uint16_t *acc, __m128i acc_hi) {
|
||||
rsp_vect_write_operand(acc, acc_hi);
|
||||
}
|
||||
static inline void write_vcc_lo(uint16_t *vcc, __m128i vcc_lo) {
|
||||
rsp_vect_write_operand(vcc + 8, vcc_lo);
|
||||
}
|
||||
static inline void write_vcc_hi(uint16_t *vcc, __m128i vcc_hi) {
|
||||
rsp_vect_write_operand(vcc, vcc_hi);
|
||||
}
|
||||
static inline void write_vco_lo(uint16_t *vco, __m128i vco_lo) {
|
||||
rsp_vect_write_operand(vco + 8, vco_lo);
|
||||
}
|
||||
static inline void write_vco_hi(uint16_t *vco, __m128i vco_hi) {
|
||||
rsp_vect_write_operand(vco, vco_hi);
|
||||
}
|
||||
static inline void write_vce(uint16_t *vce, __m128i vce_r) {
|
||||
rsp_vect_write_operand(vce + 8, vce_r);
|
||||
}
|
||||
|
||||
// Returns scalar bitmasks for VCO/VCC/VCE.
|
||||
static inline int16_t rsp_get_flags(const uint16_t *flags) {
|
||||
return (int16_t) _mm_movemask_epi8(
|
||||
_mm_packs_epi16(
|
||||
_mm_load_si128((__m128i *) (flags + 8)),
|
||||
_mm_load_si128((__m128i *) (flags + 0))
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
void rsp_set_flags(uint16_t *flags, uint16_t rt);
|
||||
|
||||
// Zeroes out a vector register.
|
||||
static inline __m128i rsp_vzero(void) {
|
||||
return _mm_setzero_si128();
|
||||
}
|
||||
|
||||
extern const uint16_t vdiv_mask_table[8][8];
|
||||
|
||||
#define HES(x) ((x) ^ 2)
|
||||
#define BES(x) ((x) ^ 3)
|
||||
#define MES(x) ((x) ^ 1)
|
||||
|
||||
#define READ_MEM_U8(mem, addr) \
|
||||
(reinterpret_cast<const uint8_t*>(mem)[BES(addr)])
|
||||
#define READ_MEM_U16(mem, addr) \
|
||||
(reinterpret_cast<const uint16_t*>(mem)[HES(addr) >> 1])
|
||||
#define READ_MEM_U32(mem, addr) \
|
||||
(reinterpret_cast<const uint32_t*>(mem)[addr >> 2])
|
||||
|
||||
#define WRITE_MEM_U8(mem, addr, data) \
|
||||
(reinterpret_cast<uint8_t*>(mem)[BES(addr)] = data)
|
||||
#define WRITE_MEM_U16(mem, addr, data) \
|
||||
(reinterpret_cast<uint16_t*>(mem)[HES(addr) >> 1] = data)
|
||||
#define WRITE_MEM_U32(mem, addr, data) \
|
||||
(reinterpret_cast<uint32_t*>(mem)[addr >> 2] = data)
|
||||
|
||||
#endif
|
||||
|
600
arch/x86_64/rsp/rsp_core.cpp
Normal file
600
arch/x86_64/rsp/rsp_core.cpp
Normal file
@ -0,0 +1,600 @@
|
||||
//
|
||||
// arch/x86_64/rsp/rsp.c
|
||||
//
|
||||
// Declarations for host RSP functions.
|
||||
//
|
||||
// This file is subject to the terms and conditions defined in
|
||||
// 'LICENSE', which is part of this source code package.
|
||||
//
|
||||
|
||||
#include "rsp.h"
|
||||
#include <string.h>
|
||||
#include "../../../rsp.hpp"
|
||||
|
||||
#ifdef __SSSE3__
|
||||
//
|
||||
// This table is used to "shuffle" the RSP vector after loading it.
|
||||
//
|
||||
alignas(64) const uint16_t shuffle_keys[16][8] = {
|
||||
/* -- */ {0x0100, 0x0302, 0x0504, 0x0706, 0x0908, 0x0B0A, 0x0D0C, 0x0F0E},
|
||||
/* -- */ {0x0100, 0x0302, 0x0504, 0x0706, 0x0908, 0x0B0A, 0x0D0C, 0x0F0E},
|
||||
|
||||
/* 0q */ {0x0100, 0x0100, 0x0504, 0x0504, 0x0908, 0x0908, 0x0D0C, 0x0D0C},
|
||||
/* 1q */ {0x0302, 0x0302, 0x0706, 0x0706, 0x0B0A, 0x0B0A, 0x0F0E, 0x0F0E},
|
||||
|
||||
/* 0h */ {0x0100, 0x0100, 0x0100, 0x0100, 0x0908, 0x0908, 0x0908, 0x0908},
|
||||
/* 1h */ {0x0302, 0x0302, 0x0302, 0x0302, 0x0B0A, 0x0B0A, 0x0B0A, 0x0B0A},
|
||||
/* 2h */ {0x0504, 0x0504, 0x0504, 0x0504, 0x0D0C, 0x0D0C, 0x0D0C, 0x0D0C},
|
||||
/* 3h */ {0x0706, 0x0706, 0x0706, 0x0706, 0x0F0E, 0x0F0E, 0x0F0E, 0x0F0E},
|
||||
|
||||
/* 0w */ {0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100},
|
||||
/* 1w */ {0x0302, 0x0302, 0x0302, 0x0302, 0x0302, 0x0302, 0x0302, 0x0302},
|
||||
/* 2w */ {0x0504, 0x0504, 0x0504, 0x0504, 0x0504, 0x0504, 0x0504, 0x0504},
|
||||
/* 3w */ {0x0706, 0x0706, 0x0706, 0x0706, 0x0706, 0x0706, 0x0706, 0x0706},
|
||||
/* 4w */ {0x0908, 0x0908, 0x0908, 0x0908, 0x0908, 0x0908, 0x0908, 0x0908},
|
||||
/* 5w */ {0x0B0A, 0x0B0A, 0x0B0A, 0x0B0A, 0x0B0A, 0x0B0A, 0x0B0A, 0x0B0A},
|
||||
/* 6w */ {0x0D0C, 0x0D0C, 0x0D0C, 0x0D0C, 0x0D0C, 0x0D0C, 0x0D0C, 0x0D0C},
|
||||
/* 7w */ {0x0F0E, 0x0F0E, 0x0F0E, 0x0F0E, 0x0F0E, 0x0F0E, 0x0F0E, 0x0F0E},
|
||||
};
|
||||
#endif
|
||||
|
||||
//
|
||||
// These tables are used to shift data loaded from DMEM.
|
||||
// In addition to shifting, they also take into account that
|
||||
// DMEM uses big-endian byte ordering, whereas vectors are
|
||||
// 2-byte little-endian.
|
||||
//
|
||||
|
||||
// Shift left LUT; shifts in zeros from the right, one byte at a time.
|
||||
alignas(64) static const uint16_t sll_b2l_keys[16][8] = {
|
||||
{0x0001, 0x0203, 0x0405, 0x0607, 0x0809, 0x0A0B, 0x0C0D, 0x0E0F},
|
||||
{0x8000, 0x0102, 0x0304, 0x0506, 0x0708, 0x090A, 0x0B0C, 0x0D0E},
|
||||
{0x8080, 0x0001, 0x0203, 0x0405, 0x0607, 0x0809, 0x0A0B, 0x0C0D},
|
||||
{0x8080, 0x8000, 0x0102, 0x0304, 0x0506, 0x0708, 0x090A, 0x0B0C},
|
||||
|
||||
{0x8080, 0x8080, 0x0001, 0x0203, 0x0405, 0x0607, 0x0809, 0x0A0B},
|
||||
{0x8080, 0x8080, 0x8000, 0x0102, 0x0304, 0x0506, 0x0708, 0x090A},
|
||||
{0x8080, 0x8080, 0x8080, 0x0001, 0x0203, 0x0405, 0x0607, 0x0809},
|
||||
{0x8080, 0x8080, 0x8080, 0x8000, 0x0102, 0x0304, 0x0506, 0x0708},
|
||||
|
||||
{0x8080, 0x8080, 0x8080, 0x8080, 0x0001, 0x0203, 0x0405, 0x0607},
|
||||
{0x8080, 0x8080, 0x8080, 0x8080, 0x8000, 0x0102, 0x0304, 0x0506},
|
||||
{0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x0001, 0x0203, 0x0405},
|
||||
{0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8000, 0x0102, 0x0304},
|
||||
|
||||
{0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x0001, 0x0203},
|
||||
{0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8000, 0x0102},
|
||||
{0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x0001},
|
||||
{0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8000},
|
||||
};
|
||||
|
||||
// Shift left LUT; shirts low order to high order, inserting 0x00s.
|
||||
alignas(64) static const uint16_t sll_l2b_keys[16][8] = {
|
||||
{0x0001, 0x0203, 0x0405, 0x0607, 0x0809, 0x0A0B, 0x0C0D, 0x0E0F},
|
||||
{0x0180, 0x0300, 0x0502, 0x0704, 0x0906, 0x0B08, 0x0D0A, 0x0E0C},
|
||||
{0x8080, 0x0001, 0x0203, 0x0405, 0x0607, 0x0809, 0x0A0B, 0x0C0D},
|
||||
{0x8080, 0x0180, 0x0300, 0x0502, 0x0704, 0x0906, 0x0B08, 0x0D0A},
|
||||
|
||||
{0x8080, 0x8080, 0x0001, 0x0203, 0x0405, 0x0607, 0x0809, 0x0A0B},
|
||||
{0x8080, 0x8080, 0x0180, 0x0300, 0x0502, 0x0704, 0x0906, 0x0B08},
|
||||
{0x8080, 0x8080, 0x8080, 0x0001, 0x0203, 0x0405, 0x0607, 0x0809},
|
||||
{0x8080, 0x8080, 0x8080, 0x0180, 0x0300, 0x0502, 0x0704, 0x0906},
|
||||
|
||||
{0x8080, 0x8080, 0x8080, 0x8080, 0x0001, 0x0203, 0x0405, 0x0607},
|
||||
{0x8080, 0x8080, 0x8080, 0x8080, 0x0180, 0x0300, 0x0502, 0x0704},
|
||||
{0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x0001, 0x0203, 0x0405},
|
||||
{0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x0180, 0x0300, 0x0502},
|
||||
|
||||
{0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x0001, 0x0203},
|
||||
{0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x0180, 0x0300},
|
||||
{0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x0001},
|
||||
{0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x0180},
|
||||
};
|
||||
|
||||
// Shift right LUT; shifts in zeros from the left, one byte at a time.
|
||||
alignas(64) static const uint16_t srl_b2l_keys[16][8] = {
|
||||
{0x0001, 0x0203, 0x0405, 0x0607, 0x0809, 0x0A0B, 0x0C0D, 0x0E0F},
|
||||
{0x0102, 0x0304, 0x0506, 0x0708, 0x090A, 0x0B0C, 0x0D0E, 0x0F80},
|
||||
{0x0203, 0x0405, 0x0607, 0x0809, 0x0A0B, 0x0C0D, 0x0E0F, 0x8080},
|
||||
{0x0304, 0x0506, 0x0708, 0x090A, 0x0B0C, 0x0D0E, 0x0F80, 0x8080},
|
||||
|
||||
{0x0405, 0x0607, 0x0809, 0x0A0B, 0x0C0D, 0x0E0F, 0x8080, 0x8080},
|
||||
{0x0506, 0x0708, 0x090A, 0x0B0C, 0x0D0E, 0x0F80, 0x8080, 0x8080},
|
||||
{0x0607, 0x0809, 0x0A0B, 0x0C0D, 0x0E0F, 0x8080, 0x8080, 0x8080},
|
||||
{0x0708, 0x090A, 0x0B0C, 0x0D0E, 0x0F80, 0x8080, 0x8080, 0x8080},
|
||||
|
||||
{0x0809, 0x0A0B, 0x0C0D, 0x0E0F, 0x8080, 0x8080, 0x8080, 0x8080},
|
||||
{0x090A, 0x0B0C, 0x0D0E, 0x0F80, 0x8080, 0x8080, 0x8080, 0x8080},
|
||||
{0x0A0B, 0x0C0D, 0x0E0F, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080},
|
||||
{0x0B0C, 0x0D0E, 0x0F80, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080},
|
||||
|
||||
{0x0C0D, 0x0E0F, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080},
|
||||
{0x0D0E, 0x0F80, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080},
|
||||
{0x0E0F, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080},
|
||||
{0x0F80, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080},
|
||||
};
|
||||
|
||||
alignas(64) static const uint16_t ror_b2l_keys[16][8] = {
|
||||
{0x0001, 0x0203, 0x0405, 0x0607, 0x0809, 0x0A0B, 0x0C0D, 0x0E0F},
|
||||
{0x0102, 0x0304, 0x0506, 0x0708, 0x090A, 0x0B0C, 0x0D0E, 0x0F00},
|
||||
{0x0203, 0x0405, 0x0607, 0x0809, 0x0A0B, 0x0C0D, 0x0E0F, 0x0001},
|
||||
{0x0304, 0x0506, 0x0708, 0x090A, 0x0B0C, 0x0D0E, 0x0F00, 0x0102},
|
||||
|
||||
{0x0405, 0x0607, 0x0809, 0x0A0B, 0x0C0D, 0x0E0F, 0x0001, 0x0203},
|
||||
{0x0506, 0x0708, 0x090A, 0x0B0C, 0x0D0E, 0x0F00, 0x0102, 0x0304},
|
||||
{0x0607, 0x0809, 0x0A0B, 0x0C0D, 0x0E0F, 0x0001, 0x0203, 0x0405},
|
||||
{0x0708, 0x090A, 0x0B0C, 0x0D0E, 0x0F00, 0x0102, 0x0304, 0x0506},
|
||||
|
||||
{0x0809, 0x0A0B, 0x0C0D, 0x0E0F, 0x0001, 0x0203, 0x0405, 0x0607},
|
||||
{0x090A, 0x0B0C, 0x0D0E, 0x0F00, 0x0102, 0x0304, 0x0506, 0x0708},
|
||||
{0x0A0B, 0x0C0D, 0x0E0F, 0x0001, 0x0203, 0x0405, 0x0607, 0x0809},
|
||||
{0x0B0C, 0x0D0E, 0x0F00, 0x0102, 0x0304, 0x0506, 0x0708, 0x090A},
|
||||
|
||||
{0x0C0D, 0x0E0F, 0x0001, 0x0203, 0x0405, 0x0607, 0x0809, 0x0A0B},
|
||||
{0x0D0E, 0x0F00, 0x0102, 0x0304, 0x0506, 0x0708, 0x090A, 0x0B0C},
|
||||
{0x0E0F, 0x0001, 0x0203, 0x0405, 0x0607, 0x0809, 0x0A0B, 0x0C0D},
|
||||
{0x0F00, 0x0102, 0x0304, 0x0506, 0x0708, 0x090A, 0x0B0C, 0x0D0E},
|
||||
};
|
||||
|
||||
// Rotate left LUT; rotates high order bytes back to low order.
|
||||
alignas(64) static const uint16_t rol_l2b_keys[16][8] = {
|
||||
{0x0001, 0x0203, 0x0405, 0x0607, 0x0809, 0x0A0B, 0x0C0D, 0x0E0F},
|
||||
{0x010E, 0x0300, 0x0502, 0x0704, 0x0906, 0x0B08, 0x0D0A, 0x0F0C},
|
||||
{0x0E0F, 0x0001, 0x0203, 0x0405, 0x0607, 0x0809, 0x0A0B, 0x0C0D},
|
||||
{0x0F0C, 0x010E, 0x0300, 0x0502, 0x0704, 0x0906, 0x0B08, 0x0D0A},
|
||||
|
||||
{0x0C0D, 0x0E0F, 0x0001, 0x0203, 0x0405, 0x0607, 0x0809, 0x0A0B},
|
||||
{0x0D0A, 0x0F0C, 0x010E, 0x0300, 0x0502, 0x0704, 0x0906, 0x0B08},
|
||||
{0x0A0B, 0x0C0D, 0x0E0F, 0x0001, 0x0203, 0x0405, 0x0607, 0x0809},
|
||||
{0x0B08, 0x0D0A, 0x0F0C, 0x010E, 0x0300, 0x0502, 0x0704, 0x0906},
|
||||
|
||||
{0x0809, 0x0A0B, 0x0C0D, 0x0E0F, 0x0001, 0x0203, 0x0405, 0x0607},
|
||||
{0x0906, 0x0B08, 0x0D0A, 0x0F0C, 0x010E, 0x0300, 0x0502, 0x0704},
|
||||
{0x0607, 0x0809, 0x0A0B, 0x0C0D, 0x0E0F, 0x0001, 0x0203, 0x0405},
|
||||
{0x0704, 0x0906, 0x0B08, 0x0D0A, 0x0F0C, 0x010E, 0x0300, 0x0502},
|
||||
|
||||
{0x0405, 0x0607, 0x0809, 0x0A0B, 0x0C0D, 0x0E0F, 0x0001, 0x0203},
|
||||
{0x0502, 0x0704, 0x0906, 0x0B08, 0x0D0A, 0x0F0C, 0x010E, 0x0300},
|
||||
{0x0203, 0x0405, 0x0607, 0x0809, 0x0A0B, 0x0C0D, 0x0E0F, 0x0001},
|
||||
{0x0300, 0x0502, 0x0704, 0x0906, 0x0B08, 0x0D0A, 0x0F0C, 0x010E},
|
||||
};
|
||||
|
||||
// Rotate right LUT; rotates high order bytes back to low order.
|
||||
alignas(64) static const uint16_t ror_l2b_keys[16][8] = {
|
||||
{0x0001, 0x0203, 0x0405, 0x0607, 0x0809, 0x0A0B, 0x0C0D, 0x0E0F},
|
||||
{0x0300, 0x0502, 0x0704, 0x0906, 0x0B08, 0x0D0A, 0x0F0C, 0x010E},
|
||||
{0x0203, 0x0405, 0x0607, 0x0809, 0x0A0B, 0x0C0D, 0x0E0F, 0x0001},
|
||||
{0x0502, 0x0704, 0x0906, 0x0B08, 0x0D0A, 0x0F0C, 0x010E, 0x0300},
|
||||
|
||||
{0x0405, 0x0607, 0x0809, 0x0A0B, 0x0C0D, 0x0E0F, 0x0001, 0x0203},
|
||||
{0x0704, 0x0906, 0x0B08, 0x0D0A, 0x0F0C, 0x010E, 0x0300, 0x0502},
|
||||
{0x0607, 0x0809, 0x0A0B, 0x0C0D, 0x0E0F, 0x0001, 0x0203, 0x0405},
|
||||
{0x0906, 0x0B08, 0x0D0A, 0x0F0C, 0x010E, 0x0300, 0x0502, 0x0704},
|
||||
|
||||
{0x0809, 0x0A0B, 0x0C0D, 0x0E0F, 0x0001, 0x0203, 0x0405, 0x0607},
|
||||
{0x0B08, 0x0D0A, 0x0F0C, 0x010E, 0x0300, 0x0502, 0x0704, 0x0906},
|
||||
{0x0A0B, 0x0C0D, 0x0E0F, 0x0001, 0x0203, 0x0405, 0x0607, 0x0809},
|
||||
{0x0D0A, 0x0F0C, 0x010E, 0x0300, 0x0502, 0x0704, 0x0906, 0x0B08},
|
||||
|
||||
{0x0C0D, 0x0E0F, 0x0001, 0x0203, 0x0405, 0x0607, 0x0809, 0x0A0B},
|
||||
{0x0F0C, 0x010E, 0x0300, 0x0502, 0x0704, 0x0906, 0x0B08, 0x0D0A},
|
||||
{0x0E0F, 0x0001, 0x0203, 0x0405, 0x0607, 0x0809, 0x0A0B, 0x0C0D},
|
||||
{0x010E, 0x0300, 0x0502, 0x0704, 0x0906, 0x0B08, 0x0D0A, 0x0F0C},
|
||||
};
|
||||
|
||||
#ifndef __SSSE3__
|
||||
static inline __m128i sse2_pshufb_loop8(__m128i v, const uint8_t *keys) {
|
||||
alignas(16) uint8_t temp[(0x80 |128) + 1] ;
|
||||
unsigned j;
|
||||
|
||||
_mm_store_si128((__m128i *) temp, v);
|
||||
temp[0x80] = 0;
|
||||
|
||||
#if 0
|
||||
for (j = 0; j < 16; j++)
|
||||
temp[j + 16] = temp[keys[j]];
|
||||
#else
|
||||
for (j = 0; j < 16; j+=4) {
|
||||
temp[j + 16] = temp[keys[j+0]];
|
||||
temp[j + 17] = temp[keys[j+1]];
|
||||
temp[j + 18] = temp[keys[j+2]];
|
||||
temp[j + 19] = temp[keys[j+3]];
|
||||
}
|
||||
#endif
|
||||
|
||||
return _mm_load_si128(((__m128i *)temp)+1);
|
||||
}
|
||||
static inline __m128i sse2_pshufb(__m128i v, const uint16_t *keys) {
|
||||
union {
|
||||
const uint16_t *k16;
|
||||
const uint8_t *k8;
|
||||
} x;
|
||||
x.k16 = keys;
|
||||
return sse2_pshufb_loop8(v, x.k8);
|
||||
}
|
||||
#endif
|
||||
|
||||
// Uses a LUT to populate flag registers.
|
||||
void rsp_set_flags(uint16_t *flags, uint16_t rt) {
|
||||
unsigned i;
|
||||
|
||||
static const uint16_t array[16][4] = {
|
||||
{0x0000, 0x0000, 0x0000, 0x0000},
|
||||
{0xFFFF, 0x0000, 0x0000, 0x0000},
|
||||
{0x0000, 0xFFFF, 0x0000, 0x0000},
|
||||
{0xFFFF, 0xFFFF, 0x0000, 0x0000},
|
||||
{0x0000, 0x0000, 0xFFFF, 0x0000},
|
||||
{0xFFFF, 0x0000, 0xFFFF, 0x0000},
|
||||
{0x0000, 0xFFFF, 0xFFFF, 0x0000},
|
||||
{0xFFFF, 0xFFFF, 0xFFFF, 0x0000},
|
||||
{0x0000, 0x0000, 0x0000, 0xFFFF},
|
||||
{0xFFFF, 0x0000, 0x0000, 0xFFFF},
|
||||
{0x0000, 0xFFFF, 0x0000, 0xFFFF},
|
||||
{0xFFFF, 0xFFFF, 0x0000, 0xFFFF},
|
||||
{0x0000, 0x0000, 0xFFFF, 0xFFFF},
|
||||
{0xFFFF, 0x0000, 0xFFFF, 0xFFFF},
|
||||
{0x0000, 0xFFFF, 0xFFFF, 0xFFFF},
|
||||
{0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF},
|
||||
};
|
||||
|
||||
for (i = 0; i < 2; i++, rt >>= 4)
|
||||
memcpy(flags + 8 + i * 4, array[rt & 0xF], sizeof(array[0]));
|
||||
|
||||
for (i = 0; i < 2; i++, rt >>= 4)
|
||||
memcpy(flags + 0 + i * 4, array[rt & 0xF], sizeof(array[0]));
|
||||
}
|
||||
|
||||
#ifndef __SSSE3__
|
||||
__m128i rsp_vect_load_and_shuffle_operand(
|
||||
const uint16_t *src, unsigned element) {
|
||||
__m128i v;
|
||||
|
||||
switch(element) {
|
||||
case 0:
|
||||
case 1:
|
||||
v = _mm_load_si128((__m128i *) src);
|
||||
return v;
|
||||
|
||||
// element => 0q
|
||||
case 2:
|
||||
v = _mm_load_si128((__m128i *) src);
|
||||
v = _mm_shufflelo_epi16(v, _MM_SHUFFLE(2,2,0,0));
|
||||
v = _mm_shufflehi_epi16(v, _MM_SHUFFLE(2,2,0,0));
|
||||
return v;
|
||||
|
||||
// element => 1q
|
||||
case 3:
|
||||
v = _mm_load_si128((__m128i *) src);
|
||||
v = _mm_shufflelo_epi16(v, _MM_SHUFFLE(3,3,1,1));
|
||||
v = _mm_shufflehi_epi16(v, _MM_SHUFFLE(3,3,1,1));
|
||||
return v;
|
||||
|
||||
// element => 0h ... 3h
|
||||
case 4:
|
||||
case 5:
|
||||
case 6:
|
||||
case 7:
|
||||
__asm__("" : "=x"(v)); /* Do not remove. */
|
||||
v = _mm_insert_epi16(v, src[element - 4], 0);
|
||||
v = _mm_insert_epi16(v, src[element - 0], 1);
|
||||
v = _mm_shufflelo_epi16(v, _MM_SHUFFLE(1,1,0,0));
|
||||
v = _mm_shuffle_epi32(v, _MM_SHUFFLE(1,1,0,0));
|
||||
return v;
|
||||
|
||||
// element => 0w ... 7w
|
||||
case 8:
|
||||
case 9:
|
||||
case 10:
|
||||
case 11:
|
||||
case 12:
|
||||
case 13:
|
||||
case 14:
|
||||
case 15:
|
||||
__asm__("" : "=x"(v)); /* Do not remove. */
|
||||
v = _mm_insert_epi16(v, src[element - 8], 0);
|
||||
v = _mm_unpacklo_epi16(v, v);
|
||||
v = _mm_shuffle_epi32(v, _MM_SHUFFLE(0,0,0,0));
|
||||
return v;
|
||||
}
|
||||
|
||||
#ifdef NDEBUG
|
||||
__builtin_unreachable();
|
||||
#else
|
||||
__builtin_trap();
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
//
|
||||
// SSSE3+ accelerated loads for group I. Byteswap big-endian to 2-byte
|
||||
// little-endian vector. Start at vector element offset, discarding any
|
||||
// wraparound as necessary.
|
||||
//
|
||||
// TODO: Reverse-engineer what happens when loads to vector elements must
|
||||
// wraparound. Do we just discard the data, as below, or does the
|
||||
// data effectively get rotated around the edge of the vector?
|
||||
//
|
||||
void rsp_vload_group1(RSP::CPUState *rsp, uint32_t addr, unsigned element,
|
||||
uint16_t *regp, rsp_vect_t reg, rsp_vect_t dqm) {
|
||||
__m128i data;
|
||||
|
||||
unsigned offset = addr & 0x7;
|
||||
unsigned ror = offset - element;
|
||||
|
||||
// Always load in 8-byte chunks to emulate wraparound.
|
||||
if (offset) {
|
||||
uint32_t aligned_addr_lo = addr & ~0x7;
|
||||
uint32_t aligned_addr_hi = (aligned_addr_lo + 8) & 0xFFF;
|
||||
__m128i temp;
|
||||
|
||||
data = _mm_loadl_epi64((__m128i *) (rsp->dmem + aligned_addr_lo));
|
||||
temp = _mm_loadl_epi64((__m128i *) (rsp->dmem + aligned_addr_hi));
|
||||
data = _mm_unpacklo_epi64(data, temp);
|
||||
}
|
||||
|
||||
else
|
||||
data = _mm_loadl_epi64((__m128i *) (rsp->dmem + addr));
|
||||
|
||||
// Shift the DQM up to the point where we mux in the data.
|
||||
#ifndef __SSSE3__
|
||||
dqm = sse2_pshufb(dqm, sll_b2l_keys[element]);
|
||||
#else
|
||||
__m128i ekey = _mm_load_si128((__m128i *) (sll_b2l_keys[element]));
|
||||
dqm = _mm_shuffle_epi8(dqm, ekey);
|
||||
#endif
|
||||
|
||||
// Align the data to the DQM so we can mask it in.
|
||||
#ifndef __SSSE3__
|
||||
data = sse2_pshufb(data, ror_b2l_keys[ror & 0xF]);
|
||||
#else
|
||||
ekey = _mm_load_si128((__m128i *) (ror_b2l_keys[ror & 0xF]));
|
||||
data = _mm_shuffle_epi8(data, ekey);
|
||||
#endif
|
||||
|
||||
// Mask and mux in the data.
|
||||
#ifdef __SSE4_1__
|
||||
reg = _mm_blendv_epi8(reg, data, dqm);
|
||||
#else
|
||||
data = _mm_and_si128(dqm, data);
|
||||
reg = _mm_andnot_si128(dqm, reg);
|
||||
reg = _mm_or_si128(data, reg);
|
||||
#endif
|
||||
|
||||
_mm_store_si128((__m128i *) regp, reg);
|
||||
}
|
||||
|
||||
//
|
||||
// SSSE3+ accelerated loads for group II.
|
||||
//
|
||||
// TODO: Reverse-engineer what happens when loads to vector elements must
|
||||
// wraparound. Do we just discard the data, as below, or does the
|
||||
// data effectively get rotated around the edge of the vector?
|
||||
//
|
||||
// TODO: Reverse-engineer what happens when element != 0.
|
||||
//
|
||||
void rsp_vload_group2(RSP::CPUState *rsp, uint32_t addr, unsigned element,
|
||||
uint16_t *regp, rsp_vect_t reg, rsp_vect_t dqm) {
|
||||
unsigned offset = addr & 0x7;
|
||||
__m128i data, zero;
|
||||
|
||||
// Always load in 8-byte chunks to emulate wraparound.
|
||||
if (offset) {
|
||||
uint32_t aligned_addr_lo = addr & ~0x7;
|
||||
uint32_t aligned_addr_hi = (aligned_addr_lo + 8) & 0xFFF;
|
||||
uint64_t datalow, datahigh;
|
||||
|
||||
memcpy(&datalow, rsp->dmem + aligned_addr_lo, sizeof(datalow));
|
||||
memcpy(&datahigh, rsp->dmem + aligned_addr_hi, sizeof(datahigh));
|
||||
|
||||
// TODO: Get rid of GNU extensions.
|
||||
datalow = __builtin_bswap64(datalow);
|
||||
datahigh = __builtin_bswap64(datahigh);
|
||||
datahigh >>= ((8 - offset) << 3);
|
||||
datalow <<= (offset << 3);
|
||||
datalow = datahigh | datalow;
|
||||
datalow = __builtin_bswap64(datalow);
|
||||
|
||||
data = _mm_loadl_epi64((__m128i *) &datalow);
|
||||
}
|
||||
|
||||
else
|
||||
data = _mm_loadl_epi64((__m128i *) (rsp->dmem + addr));
|
||||
|
||||
// "Unpack" the data.
|
||||
zero = _mm_setzero_si128();
|
||||
data = _mm_unpacklo_epi8(zero, data);
|
||||
|
||||
#if 0
|
||||
if (rsp->pipeline.exdf_latch.request.type != RSP_MEM_REQUEST_PACK)
|
||||
#endif
|
||||
data = _mm_srli_epi16(data, 1);
|
||||
|
||||
_mm_store_si128((__m128i *) regp, data);
|
||||
}
|
||||
|
||||
//
|
||||
// SSSE3+ accelerated loads for group IV. Byteswap big-endian to 2-byte
|
||||
// little-endian vector. Stop loading at quadword boundaries.
|
||||
//
|
||||
// TODO: Reverse-engineer what happens when loads from vector elements
|
||||
// must wraparound (i.e., the address offset is small, starting
|
||||
// element is large).
|
||||
//
|
||||
void rsp_vload_group4(RSP::CPUState *rsp, uint32_t addr, unsigned element,
|
||||
uint16_t *regp, rsp_vect_t reg, rsp_vect_t dqm) {
|
||||
uint32_t aligned_addr = addr & 0xFF0;
|
||||
unsigned offset = addr & 0xF;
|
||||
unsigned ror;
|
||||
|
||||
__m128i data = _mm_load_si128((__m128i *) (rsp->dmem + aligned_addr));
|
||||
|
||||
// TODO: Use of element is almost certainly wrong...
|
||||
ror = 16 - element + offset;
|
||||
|
||||
#if 0
|
||||
if (rsp->pipeline.exdf_latch.request.type != RSP_MEM_REQUEST_QUAD)
|
||||
#endif
|
||||
dqm = _mm_cmpeq_epi8(_mm_setzero_si128(), dqm);
|
||||
|
||||
#ifndef __SSSE3__
|
||||
data = sse2_pshufb(data, ror_b2l_keys[ror & 0xF]);
|
||||
dqm = sse2_pshufb(dqm, ror_b2l_keys[ror & 0xF]);
|
||||
#else
|
||||
__m128i dkey = _mm_load_si128((__m128i *) (ror_b2l_keys[ror & 0xF]));
|
||||
data = _mm_shuffle_epi8(data, dkey);
|
||||
dqm = _mm_shuffle_epi8(dqm, dkey);
|
||||
#endif
|
||||
|
||||
// Mask and mux in the data.
|
||||
#ifdef __SSE4_1__
|
||||
data = _mm_blendv_epi8(reg, data, dqm);
|
||||
#else
|
||||
data = _mm_and_si128(dqm, data);
|
||||
reg = _mm_andnot_si128(dqm, reg);
|
||||
data = _mm_or_si128(data, reg);
|
||||
#endif
|
||||
|
||||
_mm_store_si128((__m128i *) regp, data);
|
||||
}
|
||||
|
||||
//
|
||||
// SSE3+ accelerated stores for group I. Byteswap 2-byte little-endian
|
||||
// vector back to big-endian. Start at vector element offset, wrapping
|
||||
// around the edge of the vector as necessary.
|
||||
//
|
||||
// TODO: Reverse-engineer what happens when stores from vector elements
|
||||
// must wraparound. Do we just stop storing the data, or do we
|
||||
// continue storing from the front of the vector, as below?
|
||||
//
|
||||
void rsp_vstore_group1(RSP::CPUState *rsp, uint32_t addr, unsigned element,
|
||||
uint16_t *regp, rsp_vect_t reg, rsp_vect_t dqm) {
|
||||
unsigned offset = addr & 0x7;
|
||||
unsigned ror = element - offset;
|
||||
__m128i data;
|
||||
|
||||
// Shift the DQM up to the point where we mux in the data.
|
||||
#ifndef __SSSE3__
|
||||
dqm = sse2_pshufb(dqm, sll_l2b_keys[offset]);
|
||||
#else
|
||||
__m128i ekey = _mm_load_si128((__m128i *) (sll_l2b_keys[offset]));
|
||||
dqm = _mm_shuffle_epi8(dqm, ekey);
|
||||
#endif
|
||||
|
||||
// Rotate the reg to align with the DQM.
|
||||
#ifndef __SSSE3__
|
||||
reg = sse2_pshufb(reg, ror_l2b_keys[ror & 0xF]);
|
||||
#else
|
||||
ekey = _mm_load_si128((__m128i *) (ror_l2b_keys[ror & 0xF]));
|
||||
reg = _mm_shuffle_epi8(reg, ekey);
|
||||
#endif
|
||||
|
||||
// Always load in 8-byte chunks to emulate wraparound.
|
||||
if (offset) {
|
||||
uint32_t aligned_addr_lo = addr & ~0x7;
|
||||
uint32_t aligned_addr_hi = (aligned_addr_lo + 8) & 0xFFF;
|
||||
__m128i temp;
|
||||
|
||||
data = _mm_loadl_epi64((__m128i *) (rsp->dmem + aligned_addr_lo));
|
||||
temp = _mm_loadl_epi64((__m128i *) (rsp->dmem + aligned_addr_hi));
|
||||
data = _mm_unpacklo_epi64(data, temp);
|
||||
|
||||
// Mask and mux in the data.
|
||||
#ifdef __SSE4_1__
|
||||
data = _mm_blendv_epi8(data, reg, dqm);
|
||||
#else
|
||||
data = _mm_andnot_si128(dqm, data);
|
||||
reg = _mm_and_si128(dqm, reg);
|
||||
data = _mm_or_si128(data, reg);
|
||||
#endif
|
||||
|
||||
_mm_storel_epi64((__m128i *) (rsp->dmem + aligned_addr_lo), data);
|
||||
|
||||
data = _mm_srli_si128(data, 8);
|
||||
_mm_storel_epi64((__m128i *) (rsp->dmem + aligned_addr_hi), data);
|
||||
}
|
||||
|
||||
else {
|
||||
data = _mm_loadl_epi64((__m128i *) (rsp->dmem + addr));
|
||||
|
||||
// Mask and mux in the data.
|
||||
#ifdef __SSE4_1__
|
||||
data = _mm_blendv_epi8(data, reg, dqm);
|
||||
#else
|
||||
data = _mm_andnot_si128(dqm, data);
|
||||
reg = _mm_and_si128(dqm, reg);
|
||||
data = _mm_or_si128(data, reg);
|
||||
#endif
|
||||
|
||||
_mm_storel_epi64((__m128i *) (rsp->dmem + addr), data);
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// SSE3+ accelerated stores for group II. Byteswap 2-byte little-endian
|
||||
// vector back to big-endian. Start at vector element offset, wrapping
|
||||
// around the edge of the vector as necessary.
|
||||
//
|
||||
// TODO: Reverse-engineer what happens when stores from vector elements
|
||||
// must wraparound. Do we just stop storing the data, or do we
|
||||
// continue storing from the front of the vector, as below?
|
||||
//
|
||||
// TODO: Reverse-engineer what happens when element != 0.
|
||||
//
|
||||
void rsp_vstore_group2(RSP::CPUState *rsp, uint32_t addr, unsigned element,
|
||||
uint16_t *regp, rsp_vect_t reg, rsp_vect_t dqm) {
|
||||
|
||||
// "Pack" the data.
|
||||
#if 0
|
||||
if (rsp->pipeline.exdf_latch.request.type != RSP_MEM_REQUEST_PACK)
|
||||
#endif
|
||||
reg = _mm_slli_epi16(reg, 1);
|
||||
|
||||
reg = _mm_srai_epi16(reg, 8);
|
||||
reg = _mm_packs_epi16(reg, reg);
|
||||
|
||||
// TODO: Always store in 8-byte chunks to emulate wraparound.
|
||||
_mm_storel_epi64((__m128i *) (rsp->dmem + addr), reg);
|
||||
}
|
||||
|
||||
//
|
||||
// SSE3+ accelerated stores for group IV. Byteswap 2-byte little-endian
|
||||
// vector back to big-endian. Stop storing at quadword boundaries.
|
||||
//
|
||||
void rsp_vstore_group4(RSP::CPUState *rsp, uint32_t addr, unsigned element,
|
||||
uint16_t *regp, rsp_vect_t reg, rsp_vect_t dqm) {
|
||||
uint32_t aligned_addr = addr & 0xFF0;
|
||||
unsigned offset = addr & 0xF;
|
||||
unsigned rol = offset;
|
||||
|
||||
__m128i data = _mm_load_si128((__m128i *) (rsp->dmem + aligned_addr));
|
||||
|
||||
#if 0
|
||||
if (rsp->pipeline.exdf_latch.request.type == RSP_MEM_REQUEST_QUAD)
|
||||
#else
|
||||
if (0)
|
||||
#endif
|
||||
rol -= element;
|
||||
|
||||
// TODO: How is this adjusted for SRV when e != 0?
|
||||
else
|
||||
dqm = _mm_cmpeq_epi8(_mm_setzero_si128(), dqm);
|
||||
|
||||
#ifndef __SSSE3__
|
||||
reg = sse2_pshufb(reg, rol_l2b_keys[rol & 0xF]);
|
||||
#else
|
||||
__m128i ekey = _mm_load_si128((__m128i *) (rol_l2b_keys[rol & 0xF]));
|
||||
reg = _mm_shuffle_epi8(reg, ekey);
|
||||
#endif
|
||||
|
||||
// Mask and mux out the data, write.
|
||||
#ifdef __SSE4_1__
|
||||
data = _mm_blendv_epi8(data, reg, dqm);
|
||||
#else
|
||||
reg = _mm_and_si128(dqm, reg);
|
||||
data = _mm_andnot_si128(dqm, data);
|
||||
data = _mm_or_si128(data, reg);
|
||||
#endif
|
||||
|
||||
_mm_store_si128((__m128i *) (rsp->dmem + aligned_addr), data);
|
||||
}
|
||||
|
29
arch/x86_64/rsp/rsp_impl.h
Normal file
29
arch/x86_64/rsp/rsp_impl.h
Normal file
@ -0,0 +1,29 @@
|
||||
#ifndef RSP_IMPL_H
|
||||
#define RSP_IMPL_H
|
||||
|
||||
#include "clamp.h"
|
||||
#include "vabs.h"
|
||||
#include "vadd.h"
|
||||
#include "vaddc.h"
|
||||
#include "vand.h"
|
||||
#include "vch.h"
|
||||
#include "vcmp.h"
|
||||
#include "vcl.h"
|
||||
#include "vcr.h"
|
||||
#include "vmac.h"
|
||||
#include "vmrg.h"
|
||||
#include "vmul.h"
|
||||
#include "vmulh.h"
|
||||
#include "vmull.h"
|
||||
#include "vmulm.h"
|
||||
#include "vmuln.h"
|
||||
#include "vor.h"
|
||||
#include "vsub.h"
|
||||
#include "vsubc.h"
|
||||
#include "vxor.h"
|
||||
#include "vrcpsq.h"
|
||||
#include "vmov.h"
|
||||
#include "vdivh.h"
|
||||
#include "vrsq.h"
|
||||
|
||||
#endif
|
19
arch/x86_64/rsp/vabs.h
Normal file
19
arch/x86_64/rsp/vabs.h
Normal file
@ -0,0 +1,19 @@
|
||||
//
|
||||
// arch/x86_64/rsp/vabs.h
|
||||
//
|
||||
// This file is subject to the terms and conditions defined in
|
||||
// 'LICENSE', which is part of this source code package.
|
||||
//
|
||||
|
||||
static inline __m128i rsp_vabs(__m128i vs, __m128i vt, __m128i *acc_lo) {
|
||||
__m128i vs_zero = _mm_cmpeq_epi16(vs, _mm_setzero_si128());
|
||||
__m128i sign_lt = _mm_srai_epi16(vs, 15);
|
||||
__m128i vd = _mm_andnot_si128(vs_zero, vt);
|
||||
|
||||
// Careful: if VT = 0x8000 and VS is negative,
|
||||
// acc_lo will be 0x8000 but vd will be 0x7FFF.
|
||||
vd = _mm_xor_si128(vd, sign_lt);
|
||||
*acc_lo = _mm_sub_epi16(vd, sign_lt);
|
||||
return _mm_subs_epi16(vd, sign_lt);
|
||||
}
|
||||
|
23
arch/x86_64/rsp/vadd.h
Normal file
23
arch/x86_64/rsp/vadd.h
Normal file
@ -0,0 +1,23 @@
|
||||
//
|
||||
// arch/x86_64/rsp/vadd.h
|
||||
//
|
||||
// This file is subject to the terms and conditions defined in
|
||||
// 'LICENSE', which is part of this source code package.
|
||||
//
|
||||
|
||||
static inline __m128i rsp_vadd(__m128i vs, __m128i vt,
|
||||
__m128i carry, __m128i *acc_lo) {
|
||||
__m128i vd, minimum, maximum;
|
||||
|
||||
// VCC uses unsaturated arithmetic.
|
||||
vd = _mm_add_epi16(vs, vt);
|
||||
*acc_lo = _mm_sub_epi16(vd, carry);
|
||||
|
||||
// VD is the signed sum of the two sources and the carry. Since we
|
||||
// have to saturate the sum of all three, we have to be clever.
|
||||
minimum = _mm_min_epi16(vs, vt);
|
||||
maximum = _mm_max_epi16(vs, vt);
|
||||
minimum = _mm_subs_epi16(minimum, carry);
|
||||
return _mm_adds_epi16(minimum, maximum);
|
||||
}
|
||||
|
20
arch/x86_64/rsp/vaddc.h
Normal file
20
arch/x86_64/rsp/vaddc.h
Normal file
@ -0,0 +1,20 @@
|
||||
//
|
||||
// arch/x86_64/rsp/vaddc.h
|
||||
//
|
||||
// This file is subject to the terms and conditions defined in
|
||||
// 'LICENSE', which is part of this source code package.
|
||||
//
|
||||
|
||||
static inline __m128i rsp_vaddc(__m128i vs, __m128i vt,
|
||||
__m128i zero, __m128i *sn) {
|
||||
__m128i sat_sum, unsat_sum;
|
||||
|
||||
sat_sum = _mm_adds_epu16(vs, vt);
|
||||
unsat_sum = _mm_add_epi16(vs, vt);
|
||||
|
||||
*sn = _mm_cmpeq_epi16(sat_sum, unsat_sum);
|
||||
*sn = _mm_cmpeq_epi16(*sn, zero);
|
||||
|
||||
return unsat_sum;
|
||||
}
|
||||
|
16
arch/x86_64/rsp/vand.h
Normal file
16
arch/x86_64/rsp/vand.h
Normal file
@ -0,0 +1,16 @@
|
||||
//
|
||||
// arch/x86_64/rsp/vand.h
|
||||
//
|
||||
// This file is subject to the terms and conditions defined in
|
||||
// 'LICENSE', which is part of this source code package.
|
||||
//
|
||||
|
||||
static inline __m128i rsp_vand(__m128i vs, __m128i vt) {
|
||||
return _mm_and_si128(vs, vt);
|
||||
}
|
||||
|
||||
static inline __m128i rsp_vnand(__m128i vs, __m128i vt) {
|
||||
__m128i vd = _mm_and_si128(vs, vt);
|
||||
return _mm_xor_si128(vd, _mm_set1_epi32(0xffffffffu));
|
||||
}
|
||||
|
68
arch/x86_64/rsp/vch.h
Normal file
68
arch/x86_64/rsp/vch.h
Normal file
@ -0,0 +1,68 @@
|
||||
//
|
||||
// arch/x86_64/rsp/vch.h
|
||||
//
|
||||
// This file is subject to the terms and conditions defined in
|
||||
// 'LICENSE', which is part of this source code package.
|
||||
//
|
||||
|
||||
static inline __m128i rsp_vch(__m128i vs, __m128i vt, __m128i zero,
|
||||
__m128i *ge, __m128i *le, __m128i *eq, __m128i *sign, __m128i *vce) {
|
||||
|
||||
__m128i sign_negvt, vt_neg;
|
||||
__m128i diff, diff_zero, diff_sel_mask;
|
||||
__m128i diff_gez, diff_lez;
|
||||
|
||||
// sign = (vs ^ vt) < 0
|
||||
*sign = _mm_xor_si128(vs, vt);
|
||||
*sign = _mm_cmplt_epi16(*sign, zero);
|
||||
|
||||
// sign_negvt = sign ? -vt : vt
|
||||
sign_negvt = _mm_xor_si128(vt, *sign);
|
||||
sign_negvt = _mm_sub_epi16(sign_negvt, *sign);
|
||||
|
||||
// Compute diff, diff_zero:
|
||||
diff = _mm_sub_epi16(vs, sign_negvt);
|
||||
diff_zero = _mm_cmpeq_epi16(diff, zero);
|
||||
|
||||
// Compute le/ge:
|
||||
vt_neg = _mm_cmplt_epi16(vt, zero);
|
||||
diff_lez = _mm_cmpgt_epi16(diff, zero);
|
||||
diff_gez = _mm_or_si128(diff_lez, diff_zero);
|
||||
diff_lez = _mm_cmpeq_epi16(zero, diff_lez);
|
||||
|
||||
#ifdef __SSE4_1__
|
||||
*ge = _mm_blendv_epi8(diff_gez, vt_neg, *sign);
|
||||
*le = _mm_blendv_epi8(vt_neg, diff_lez, *sign);
|
||||
#else
|
||||
*ge = _mm_and_si128(*sign, vt_neg);
|
||||
diff_gez = _mm_andnot_si128(*sign, diff_gez);
|
||||
*ge = _mm_or_si128(*ge, diff_gez);
|
||||
|
||||
*le = _mm_and_si128(*sign, diff_lez);
|
||||
diff_lez = _mm_andnot_si128(*sign, vt_neg);
|
||||
*le = _mm_or_si128(*le, diff_lez);
|
||||
#endif
|
||||
|
||||
// Compute vce:
|
||||
*vce = _mm_cmpeq_epi16(diff, *sign);
|
||||
*vce = _mm_and_si128(*vce, *sign);
|
||||
|
||||
// Compute !eq:
|
||||
*eq = _mm_or_si128(diff_zero, *vce);
|
||||
*eq = _mm_cmpeq_epi16(*eq, zero);
|
||||
|
||||
// Compute result:
|
||||
#ifdef __SSE4_1__
|
||||
diff_sel_mask = _mm_blendv_epi8(*ge, *le, *sign);
|
||||
return _mm_blendv_epi8(vs, sign_negvt, diff_sel_mask);
|
||||
#else
|
||||
diff_lez = _mm_and_si128(*sign, *le);
|
||||
diff_gez = _mm_andnot_si128(*sign, *ge);
|
||||
diff_sel_mask = _mm_or_si128(diff_lez, diff_gez);
|
||||
|
||||
diff_lez = _mm_and_si128(diff_sel_mask, sign_negvt);
|
||||
diff_gez = _mm_andnot_si128(diff_sel_mask, vs);
|
||||
return _mm_or_si128(diff_lez, diff_gez);
|
||||
#endif
|
||||
}
|
||||
|
75
arch/x86_64/rsp/vcl.h
Normal file
75
arch/x86_64/rsp/vcl.h
Normal file
@ -0,0 +1,75 @@
|
||||
//
|
||||
// arch/x86_64/rsp/vcl.h
|
||||
//
|
||||
// This file is subject to the terms and conditions defined in
|
||||
// 'LICENSE', which is part of this source code package.
|
||||
//
|
||||
|
||||
static inline __m128i rsp_vcl(__m128i vs, __m128i vt, __m128i zero,
|
||||
__m128i *ge, __m128i *le, __m128i eq, __m128i sign, __m128i vce) {
|
||||
|
||||
__m128i sign_negvt, diff, ncarry, nvce, diff_zero;
|
||||
__m128i le_case1, le_case2, le_eq, do_le;
|
||||
__m128i ge_eq, do_ge, mux_mask;
|
||||
|
||||
// sign_negvt = sign ? -vt : vt
|
||||
sign_negvt = _mm_xor_si128(vt, sign);
|
||||
sign_negvt = _mm_sub_epi16(sign_negvt, sign);
|
||||
|
||||
// Compute diff, diff_zero, ncarry, and nvce:
|
||||
// Note: diff = sign ? (vs + vt) : (vs - vt).
|
||||
diff = _mm_sub_epi16(vs, sign_negvt);
|
||||
ncarry = _mm_adds_epu16(vs, vt);
|
||||
ncarry = _mm_cmpeq_epi16(diff, ncarry);
|
||||
nvce = _mm_cmpeq_epi16(vce, zero);
|
||||
diff_zero = _mm_cmpeq_epi16(diff, zero);
|
||||
|
||||
// Compute results for if (sign && ne):
|
||||
le_case1 = _mm_and_si128(diff_zero, ncarry);
|
||||
le_case1 = _mm_and_si128(nvce, le_case1);
|
||||
le_case2 = _mm_or_si128(diff_zero, ncarry);
|
||||
le_case2 = _mm_and_si128(vce, le_case2);
|
||||
le_eq = _mm_or_si128(le_case1, le_case2);
|
||||
|
||||
// Compute results for if (!sign && ne):
|
||||
ge_eq = _mm_subs_epu16(vt, vs);
|
||||
ge_eq = _mm_cmpeq_epi16(ge_eq, zero);
|
||||
|
||||
// Blend everything together. Caveat: we don't update
|
||||
// the results of ge/le if ne is false, so be careful.
|
||||
do_le = _mm_andnot_si128(eq, sign);
|
||||
#ifdef __SSE4_1__
|
||||
*le = _mm_blendv_epi8(*le, le_eq, do_le);
|
||||
#else
|
||||
le_eq = _mm_and_si128(do_le, le_eq);
|
||||
*le = _mm_andnot_si128(do_le, *le);
|
||||
*le = _mm_or_si128(le_eq, *le);
|
||||
#endif
|
||||
|
||||
do_ge = _mm_or_si128(sign, eq);
|
||||
#ifdef __SSE4_1__
|
||||
*ge = _mm_blendv_epi8(ge_eq, *ge, do_ge);
|
||||
#else
|
||||
*ge = _mm_and_si128(do_ge, *ge);
|
||||
ge_eq = _mm_andnot_si128(do_ge, ge_eq);
|
||||
*ge = _mm_or_si128(ge_eq, *ge);
|
||||
#endif
|
||||
|
||||
// Mux the result based on the value of sign.
|
||||
#ifdef __SSE4_1__
|
||||
mux_mask = _mm_blendv_epi8(*ge, *le, sign);
|
||||
#else
|
||||
do_le = _mm_and_si128(sign, *le);
|
||||
do_ge = _mm_andnot_si128(sign, *ge);
|
||||
mux_mask = _mm_or_si128(do_le, do_ge);
|
||||
#endif
|
||||
|
||||
#ifdef __SSE4_1__
|
||||
return _mm_blendv_epi8(vs, sign_negvt, mux_mask);
|
||||
#else
|
||||
sign_negvt = _mm_and_si128(mux_mask, sign_negvt);
|
||||
vs = _mm_andnot_si128(mux_mask, vs);
|
||||
return _mm_or_si128(sign_negvt, vs);
|
||||
#endif
|
||||
}
|
||||
|
85
arch/x86_64/rsp/vcmp.h
Normal file
85
arch/x86_64/rsp/vcmp.h
Normal file
@ -0,0 +1,85 @@
|
||||
//
|
||||
// arch/x86_64/rsp/vcmp.h
|
||||
//
|
||||
// This file is subject to the terms and conditions defined in
|
||||
// 'LICENSE', which is part of this source code package.
|
||||
//
|
||||
|
||||
static inline __m128i rsp_veq(__m128i vs, __m128i vt,
|
||||
__m128i zero, __m128i *le, __m128i eq, __m128i sign) {
|
||||
__m128i equal = _mm_cmpeq_epi16(vs, vt);
|
||||
|
||||
*le = _mm_andnot_si128(eq, equal);
|
||||
|
||||
#ifdef __SSE4_1__
|
||||
return _mm_blendv_epi8(vt, vs, *le);
|
||||
#else
|
||||
vs = _mm_and_si128(*le, vs);
|
||||
vt = _mm_andnot_si128(*le, vt);
|
||||
return _mm_or_si128(vs, vt);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline __m128i rsp_vge(__m128i vs, __m128i vt,
|
||||
__m128i zero, __m128i *le, __m128i eq, __m128i sign) {
|
||||
__m128i equal = _mm_cmpeq_epi16(vs, vt);
|
||||
|
||||
__m128i gt = _mm_cmpgt_epi16(vs, vt);
|
||||
__m128i equalsign = _mm_and_si128(eq, sign);
|
||||
|
||||
equal = _mm_andnot_si128(equalsign, equal);
|
||||
*le = _mm_or_si128(gt, equal);
|
||||
|
||||
#ifdef __SSE4_1__
|
||||
return _mm_blendv_epi8(vt, vs, *le);
|
||||
#else
|
||||
vs = _mm_and_si128(*le, vs);
|
||||
vt = _mm_andnot_si128(*le, vt);
|
||||
return _mm_or_si128(vs, vt);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline __m128i rsp_vlt(__m128i vs, __m128i vt,
|
||||
__m128i zero, __m128i *le, __m128i eq, __m128i sign) {
|
||||
__m128i equal = _mm_cmpeq_epi16(vs, vt);
|
||||
__m128i lt = _mm_cmplt_epi16(vs, vt);
|
||||
|
||||
equal = _mm_and_si128(eq, equal);
|
||||
equal = _mm_and_si128(sign, equal);
|
||||
*le = _mm_or_si128(lt, equal);
|
||||
|
||||
#ifdef __SSE4_1__
|
||||
return _mm_blendv_epi8(vt, vs, *le);
|
||||
#else
|
||||
vs = _mm_and_si128(*le, vs);
|
||||
vt = _mm_andnot_si128(*le, vt);
|
||||
return _mm_or_si128(vs, vt);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline __m128i rsp_vne(__m128i vs, __m128i vt,
|
||||
__m128i zero, __m128i *le, __m128i eq, __m128i sign) {
|
||||
__m128i equal = _mm_cmpeq_epi16(vs, vt);
|
||||
__m128i nequal = _mm_cmpeq_epi16(equal, zero);
|
||||
|
||||
*le = _mm_and_si128(eq, equal);
|
||||
*le = _mm_or_si128(*le, nequal);
|
||||
|
||||
#ifdef INTENSE_DEBUG
|
||||
for (unsigned i = 0; i < 8; i++)
|
||||
fprintf(stderr, "VS[%d] = %d\n", i,
|
||||
reinterpret_cast<int16_t*>(&vs)[i]);
|
||||
for (unsigned i = 0; i < 8; i++)
|
||||
fprintf(stderr, "VT[%d] = %d\n", i,
|
||||
reinterpret_cast<int16_t*>(&vt)[i]);
|
||||
#endif
|
||||
|
||||
#ifdef __SSE4_1__
|
||||
return _mm_blendv_epi8(vt, vs, *le);
|
||||
#else
|
||||
vs = _mm_and_si128(*le, vs);
|
||||
vt = _mm_andnot_si128(*le, vt);
|
||||
return _mm_or_si128(vs, vt);
|
||||
#endif
|
||||
}
|
||||
|
54
arch/x86_64/rsp/vcr.h
Normal file
54
arch/x86_64/rsp/vcr.h
Normal file
@ -0,0 +1,54 @@
|
||||
//
|
||||
// arch/x86_64/rsp/vcr.h
|
||||
//
|
||||
// This file is subject to the terms and conditions defined in
|
||||
// 'LICENSE', which is part of this source code package.
|
||||
//
|
||||
|
||||
static inline __m128i rsp_vcr(__m128i vs, __m128i vt,
|
||||
__m128i zero, __m128i *ge, __m128i *le) {
|
||||
__m128i diff_sel_mask, diff_gez, diff_lez;
|
||||
__m128i sign, sign_notvt;
|
||||
|
||||
#ifdef INTENSE_DEBUG
|
||||
for (unsigned i = 0; i < 8; i++)
|
||||
fprintf(stderr, "VS[%d] = %d\n", i,
|
||||
reinterpret_cast<int16_t*>(&vs)[i]);
|
||||
|
||||
for (unsigned i = 0; i < 8; i++)
|
||||
fprintf(stderr, "VT[%d] = %d\n", i,
|
||||
reinterpret_cast<int16_t*>(&vt)[i]);
|
||||
#endif
|
||||
|
||||
// sign = (vs ^ vt) < 0
|
||||
sign = _mm_xor_si128(vs, vt);
|
||||
sign = _mm_srai_epi16(sign, 15);
|
||||
|
||||
// Compute le
|
||||
diff_lez = _mm_and_si128(vs, sign);
|
||||
diff_lez = _mm_add_epi16(diff_lez, vt);
|
||||
*le = _mm_srai_epi16(diff_lez, 15);
|
||||
|
||||
// Compute ge
|
||||
diff_gez = _mm_or_si128(vs, sign);
|
||||
diff_gez = _mm_min_epi16(diff_gez, vt);
|
||||
*ge = _mm_cmpeq_epi16(diff_gez, vt);
|
||||
|
||||
// sign_notvt = sn ? ~vt : vt
|
||||
sign_notvt = _mm_xor_si128(vt, sign);
|
||||
|
||||
// Compute result:
|
||||
#ifdef __SSE4_1__
|
||||
diff_sel_mask = _mm_blendv_epi8(*ge, *le, sign);
|
||||
return _mm_blendv_epi8(vs, sign_notvt, diff_sel_mask);
|
||||
#else
|
||||
diff_sel_mask = _mm_sub_epi16(*le, *ge);
|
||||
diff_sel_mask = _mm_and_si128(diff_sel_mask, sign);
|
||||
diff_sel_mask = _mm_add_epi16(diff_sel_mask, *ge);
|
||||
|
||||
zero = _mm_sub_epi16(sign_notvt, vs);
|
||||
zero = _mm_and_si128(zero, diff_sel_mask);
|
||||
return _mm_add_epi16(zero, vs);
|
||||
#endif
|
||||
}
|
||||
|
18
arch/x86_64/rsp/vdivh.h
Normal file
18
arch/x86_64/rsp/vdivh.h
Normal file
@ -0,0 +1,18 @@
|
||||
//
|
||||
// arch/x86_64/rsp/vrcp.c
|
||||
//
|
||||
// This file is subject to the terms and conditions defined in
|
||||
// 'LICENSE', which is part of this source code package.
|
||||
//
|
||||
|
||||
inline __m128i rsp_vdivh(RSP::CPUState *rsp,
|
||||
unsigned src, unsigned e, unsigned dest, unsigned de) {
|
||||
|
||||
// Get the element from VT.
|
||||
rsp->cp2.div_in = rsp->cp2.regs[src].e[e & 0x7];
|
||||
|
||||
// Write out the upper part of the result.
|
||||
rsp->cp2.regs[dest].e[de & 0x7] = rsp->cp2.div_out;
|
||||
return rsp_vect_load_unshuffled_operand(rsp->cp2.regs[dest].e);
|
||||
}
|
||||
|
66
arch/x86_64/rsp/vmac.h
Normal file
66
arch/x86_64/rsp/vmac.h
Normal file
@ -0,0 +1,66 @@
|
||||
//
|
||||
// arch/x86_64/rsp/vmacf.h
|
||||
//
|
||||
// This file is subject to the terms and conditions defined in
|
||||
// 'LICENSE', which is part of this source code package.
|
||||
//
|
||||
|
||||
template <bool VMACU>
|
||||
static inline __m128i rsp_vmacf_vmacu(__m128i vs, __m128i vt,
|
||||
__m128i zero, __m128i *acc_lo, __m128i *acc_md, __m128i *acc_hi) {
|
||||
__m128i overflow_hi_mask, overflow_md_mask;
|
||||
__m128i lo, md, hi, carry, overflow_mask;
|
||||
|
||||
// Get the product and shift it over
|
||||
// being sure to save the carries.
|
||||
lo = _mm_mullo_epi16(vs, vt);
|
||||
hi = _mm_mulhi_epi16(vs, vt);
|
||||
|
||||
md = _mm_slli_epi16(hi, 1);
|
||||
carry = _mm_srli_epi16(lo, 15);
|
||||
hi = _mm_srai_epi16(hi, 15);
|
||||
md = _mm_or_si128(md, carry);
|
||||
lo = _mm_slli_epi16(lo, 1);
|
||||
|
||||
// Tricky part: start accumulating everything.
|
||||
// Get/keep the carry as we'll add it in later.
|
||||
overflow_mask = _mm_adds_epu16(*acc_lo, lo);
|
||||
*acc_lo = _mm_add_epi16(*acc_lo, lo);
|
||||
|
||||
overflow_mask = _mm_cmpeq_epi16(*acc_lo, overflow_mask);
|
||||
overflow_mask = _mm_cmpeq_epi16(overflow_mask, zero);
|
||||
|
||||
// Add in the carry. If the middle portion is
|
||||
// already 0xFFFF and we have a carry, we have
|
||||
// to carry the all the way up to hi.
|
||||
md = _mm_sub_epi16(md, overflow_mask);
|
||||
carry = _mm_cmpeq_epi16(md, zero);
|
||||
carry = _mm_and_si128(carry, overflow_mask);
|
||||
hi = _mm_sub_epi16(hi, carry);
|
||||
|
||||
// Accumulate the middle portion.
|
||||
overflow_mask = _mm_adds_epu16(*acc_md, md);
|
||||
*acc_md = _mm_add_epi16(*acc_md, md);
|
||||
|
||||
overflow_mask = _mm_cmpeq_epi16(*acc_md, overflow_mask);
|
||||
overflow_mask = _mm_cmpeq_epi16(overflow_mask, zero);
|
||||
|
||||
// Finish up the accumulation of the... accumulator.
|
||||
*acc_hi = _mm_add_epi16(*acc_hi, hi);
|
||||
*acc_hi = _mm_sub_epi16(*acc_hi, overflow_mask);
|
||||
|
||||
// VMACU
|
||||
if (VMACU) {
|
||||
overflow_hi_mask = _mm_srai_epi16(*acc_hi, 15);
|
||||
overflow_md_mask = _mm_srai_epi16(*acc_md, 15);
|
||||
md = _mm_or_si128(overflow_md_mask, *acc_md);
|
||||
overflow_mask = _mm_cmpgt_epi16(*acc_hi, zero);
|
||||
md = _mm_andnot_si128(overflow_hi_mask, md);
|
||||
return _mm_or_si128(overflow_mask, md);
|
||||
}
|
||||
|
||||
// VMACF
|
||||
else
|
||||
return rsp_sclamp_acc_tomd(*acc_md, *acc_hi);
|
||||
}
|
||||
|
19
arch/x86_64/rsp/vmov.h
Normal file
19
arch/x86_64/rsp/vmov.h
Normal file
@ -0,0 +1,19 @@
|
||||
//
|
||||
// arch/x86_64/rsp/vmov.c
|
||||
//
|
||||
// This file is subject to the terms and conditions defined in
|
||||
// 'LICENSE', which is part of this source code package.
|
||||
//
|
||||
|
||||
inline __m128i rsp_vmov(RSP::CPUState *rsp,
|
||||
unsigned src, unsigned e, unsigned dest, unsigned de) {
|
||||
uint16_t data;
|
||||
|
||||
// Get the element from VT.
|
||||
data = rsp->cp2.regs[src].e[e & 0x7];
|
||||
|
||||
// Write out the upper part of the result.
|
||||
rsp->cp2.regs[dest].e[de & 0x7] = data;
|
||||
return rsp_vect_load_unshuffled_operand(rsp->cp2.regs[dest].e);
|
||||
}
|
||||
|
17
arch/x86_64/rsp/vmrg.h
Normal file
17
arch/x86_64/rsp/vmrg.h
Normal file
@ -0,0 +1,17 @@
|
||||
//
|
||||
// arch/x86_64/rsp/vmrg.h
|
||||
//
|
||||
// This file is subject to the terms and conditions defined in
|
||||
// 'LICENSE', which is part of this source code package.
|
||||
//
|
||||
|
||||
static inline __m128i rsp_vmrg(__m128i vs, __m128i vt, __m128i le) {
|
||||
#ifdef __SSE4_1__
|
||||
return _mm_blendv_epi8(vt, vs, le);
|
||||
#else
|
||||
vs = _mm_and_si128(le, vs);
|
||||
vt = _mm_andnot_si128(le, vt);
|
||||
return _mm_or_si128(vs, vt);
|
||||
#endif
|
||||
}
|
||||
|
15
arch/x86_64/rsp/vmudh.h
Normal file
15
arch/x86_64/rsp/vmudh.h
Normal file
@ -0,0 +1,15 @@
|
||||
//
|
||||
// arch/x86_64/rsp/vmudh.h
|
||||
//
|
||||
// This file is subject to the terms and conditions defined in
|
||||
// 'LICENSE', which is part of this source code package.
|
||||
//
|
||||
|
||||
static inline __m128i rsp_vmudh(__m128i vs, __m128i vt,
|
||||
__m128i *acc_md, __m128i *acc_hi) {
|
||||
*acc_md = _mm_mullo_epi16(vs, vt);
|
||||
*acc_hi = _mm_mulhi_epi16(vs, vt);
|
||||
|
||||
return rsp_sclamp_acc_tomd(*acc_md, *acc_hi);
|
||||
}
|
||||
|
47
arch/x86_64/rsp/vmul.h
Normal file
47
arch/x86_64/rsp/vmul.h
Normal file
@ -0,0 +1,47 @@
|
||||
//
|
||||
// arch/x86_64/rsp/vmul.h
|
||||
//
|
||||
// This file is subject to the terms and conditions defined in
|
||||
// 'LICENSE', which is part of this source code package.
|
||||
//
|
||||
|
||||
//
|
||||
// TODO: CHECK ME.
|
||||
//
|
||||
|
||||
template <bool VMULU>
|
||||
static inline __m128i rsp_vmulf_vmulu(__m128i vs, __m128i vt,
|
||||
__m128i zero, __m128i *acc_lo, __m128i *acc_md, __m128i *acc_hi) {
|
||||
__m128i lo, hi, round, sign1, sign2, eq, neq, neg;
|
||||
|
||||
lo = _mm_mullo_epi16(vs, vt);
|
||||
round = _mm_cmpeq_epi16(zero, zero);
|
||||
sign1 = _mm_srli_epi16(lo, 15);
|
||||
lo = _mm_add_epi16(lo, lo);
|
||||
round = _mm_slli_epi16(round, 15);
|
||||
hi = _mm_mulhi_epi16(vs, vt);
|
||||
sign2 = _mm_srli_epi16(lo, 15);
|
||||
*acc_lo = _mm_add_epi16(round, lo);
|
||||
sign1 = _mm_add_epi16(sign1, sign2);
|
||||
|
||||
hi = _mm_slli_epi16(hi, 1);
|
||||
neq = eq = _mm_cmpeq_epi16(vs, vt);
|
||||
*acc_md = _mm_add_epi16(hi, sign1);
|
||||
|
||||
neg = _mm_srai_epi16(*acc_md, 15);
|
||||
|
||||
// VMULU
|
||||
if (VMULU) {
|
||||
*acc_hi = _mm_andnot_si128(eq, neg);
|
||||
hi =_mm_or_si128(*acc_md, neg);
|
||||
return _mm_andnot_si128(*acc_hi, hi);
|
||||
}
|
||||
|
||||
// VMULF
|
||||
else {
|
||||
eq = _mm_and_si128(eq, neg);
|
||||
*acc_hi = _mm_andnot_si128(neq, neg);
|
||||
return _mm_add_epi16(*acc_md, eq);
|
||||
}
|
||||
}
|
||||
|
39
arch/x86_64/rsp/vmulh.h
Normal file
39
arch/x86_64/rsp/vmulh.h
Normal file
@ -0,0 +1,39 @@
|
||||
//
|
||||
// arch/x86_64/rsp/vmulh.h
|
||||
//
|
||||
// This file is subject to the terms and conditions defined in
|
||||
// 'LICENSE', which is part of this source code package.
|
||||
//
|
||||
|
||||
template <bool VMADH>
|
||||
static inline __m128i rsp_vmadh_vmudh(__m128i vs, __m128i vt,
|
||||
__m128i zero, __m128i *acc_lo, __m128i *acc_md, __m128i *acc_hi) {
|
||||
__m128i lo, hi, overflow_mask;
|
||||
|
||||
lo = _mm_mullo_epi16(vs, vt);
|
||||
hi = _mm_mulhi_epi16(vs, vt);
|
||||
|
||||
// VMADH
|
||||
if (VMADH) {
|
||||
// Tricky part: start accumulate everything.
|
||||
// Get/keep the carry as we'll add it in later.
|
||||
overflow_mask = _mm_adds_epu16(*acc_md, lo);
|
||||
*acc_md = _mm_add_epi16(*acc_md, lo);
|
||||
|
||||
overflow_mask = _mm_cmpeq_epi16(*acc_md, overflow_mask);
|
||||
overflow_mask = _mm_cmpeq_epi16(overflow_mask, zero);
|
||||
|
||||
hi = _mm_sub_epi16(hi, overflow_mask);
|
||||
*acc_hi = _mm_add_epi16(*acc_hi, hi);
|
||||
}
|
||||
|
||||
// VMUDH
|
||||
else {
|
||||
*acc_lo = zero;
|
||||
*acc_md = lo;
|
||||
*acc_hi = hi;
|
||||
}
|
||||
|
||||
return rsp_sclamp_acc_tomd(*acc_md, *acc_hi);
|
||||
}
|
||||
|
54
arch/x86_64/rsp/vmull.h
Normal file
54
arch/x86_64/rsp/vmull.h
Normal file
@ -0,0 +1,54 @@
|
||||
//
|
||||
// arch/x86_64/rsp/vmadl.h
|
||||
//
|
||||
// This file is subject to the terms and conditions defined in
|
||||
// 'LICENSE', which is part of this source code package.
|
||||
//
|
||||
|
||||
template <bool VMADL>
|
||||
static inline __m128i rsp_vmadl_vmudl(__m128i vs, __m128i vt,
|
||||
__m128i zero, __m128i *acc_lo, __m128i *acc_md, __m128i *acc_hi) {
|
||||
__m128i hi, overflow_mask;
|
||||
|
||||
hi = _mm_mulhi_epu16(vs, vt);
|
||||
|
||||
// VMADL
|
||||
if (VMADL) {
|
||||
|
||||
// Tricky part: start accumulate everything.
|
||||
// Get/keep the carry as we'll add it in later.
|
||||
overflow_mask = _mm_adds_epu16(*acc_lo, hi);
|
||||
*acc_lo = _mm_add_epi16(*acc_lo, hi);
|
||||
|
||||
overflow_mask = _mm_cmpeq_epi16(*acc_lo, overflow_mask);
|
||||
overflow_mask = _mm_cmpeq_epi16(overflow_mask, zero);
|
||||
hi = _mm_sub_epi16(zero, overflow_mask);
|
||||
|
||||
// Check for overflow of the upper sum.
|
||||
//
|
||||
// TODO: Since hi can only be {0,1}, we should
|
||||
// be able to generalize this for performance.
|
||||
overflow_mask = _mm_adds_epu16(*acc_md, hi);
|
||||
*acc_md = _mm_add_epi16(*acc_md, hi);
|
||||
|
||||
overflow_mask = _mm_cmpeq_epi16(*acc_md, overflow_mask);
|
||||
overflow_mask = _mm_cmpeq_epi16(overflow_mask, zero);
|
||||
|
||||
// Finish up the accumulation of the... accumulator.
|
||||
// Since the product was unsigned, only worry about
|
||||
// positive overflow (i.e.: borrowing not possible).
|
||||
*acc_hi = _mm_sub_epi16(*acc_hi, overflow_mask);
|
||||
|
||||
return rsp_uclamp_acc(*acc_lo, *acc_md, *acc_hi, zero);
|
||||
}
|
||||
|
||||
// VMUDL
|
||||
else {
|
||||
*acc_lo = hi;
|
||||
*acc_md = zero;
|
||||
*acc_hi = zero;
|
||||
|
||||
return hi;
|
||||
}
|
||||
}
|
||||
|
65
arch/x86_64/rsp/vmulm.h
Normal file
65
arch/x86_64/rsp/vmulm.h
Normal file
@ -0,0 +1,65 @@
|
||||
//
|
||||
// arch/x86_64/rsp/vmulm.h
|
||||
//
|
||||
// This file is subject to the terms and conditions defined in
|
||||
// 'LICENSE', which is part of this source code package.
|
||||
//
|
||||
|
||||
template <bool VMADM>
|
||||
static inline __m128i rsp_vmadm_vmudm(__m128i vs, __m128i vt,
|
||||
__m128i zero, __m128i *acc_lo, __m128i *acc_md, __m128i *acc_hi) {
|
||||
__m128i lo, hi, sign, overflow_mask;
|
||||
|
||||
lo = _mm_mullo_epi16(vs, vt);
|
||||
hi = _mm_mulhi_epu16(vs, vt);
|
||||
|
||||
// What we're really want to do is unsigned vs * signed vt.
|
||||
// However, we have no such instructions to do so.
|
||||
//
|
||||
// There's a trick to "fix" an unsigned product, though:
|
||||
// If vt was negative, take the upper 16-bits of the product
|
||||
// and subtract vs.
|
||||
sign = _mm_srai_epi16(vs, 15);
|
||||
vt = _mm_and_si128(vt, sign);
|
||||
hi = _mm_sub_epi16(hi, vt);
|
||||
|
||||
// VMADM
|
||||
if (VMADM) {
|
||||
// Tricky part: start accumulate everything.
|
||||
// Get/keep the carry as we'll add it in later.
|
||||
overflow_mask = _mm_adds_epu16(*acc_lo, lo);
|
||||
*acc_lo = _mm_add_epi16(*acc_lo, lo);
|
||||
|
||||
overflow_mask = _mm_cmpeq_epi16(*acc_lo, overflow_mask);
|
||||
overflow_mask = _mm_cmpeq_epi16(overflow_mask, zero);
|
||||
|
||||
// This is REALLY clever. Since the product results from
|
||||
// two 16-bit components, one positive and one negative,
|
||||
// we don't have to worry about carrying the 1 (we can
|
||||
// only borrow) past 32-bits. So we can just add it here.
|
||||
hi = _mm_sub_epi16(hi, overflow_mask);
|
||||
|
||||
// Check for overflow of the upper sum.
|
||||
overflow_mask = _mm_adds_epu16(*acc_md, hi);
|
||||
*acc_md = _mm_add_epi16(*acc_md, hi);
|
||||
|
||||
overflow_mask = _mm_cmpeq_epi16(*acc_md, overflow_mask);
|
||||
overflow_mask = _mm_cmpeq_epi16(overflow_mask, zero);
|
||||
|
||||
// Finish up the accumulation of the... accumulator.
|
||||
*acc_hi = _mm_add_epi16(*acc_hi, _mm_srai_epi16(hi, 15));
|
||||
*acc_hi = _mm_sub_epi16(*acc_hi, overflow_mask);
|
||||
|
||||
return rsp_sclamp_acc_tomd(*acc_md, *acc_hi);
|
||||
}
|
||||
|
||||
// VMUDM
|
||||
else {
|
||||
*acc_lo = lo;
|
||||
*acc_md = hi;
|
||||
*acc_hi = _mm_srai_epi16(hi, 15);
|
||||
|
||||
return hi;
|
||||
}
|
||||
}
|
||||
|
87
arch/x86_64/rsp/vmuln.h
Normal file
87
arch/x86_64/rsp/vmuln.h
Normal file
@ -0,0 +1,87 @@
|
||||
//
|
||||
// arch/x86_64/rsp/vmuln.h
|
||||
//
|
||||
// This file is subject to the terms and conditions defined in
|
||||
// 'LICENSE', which is part of this source code package.
|
||||
//
|
||||
|
||||
template <bool VMADN>
|
||||
static inline __m128i rsp_vmadn_vmudn(__m128i vs, __m128i vt,
|
||||
__m128i zero, __m128i *acc_lo, __m128i *acc_md, __m128i *acc_hi) {
|
||||
__m128i lo, hi, sign, overflow_mask;
|
||||
|
||||
#ifdef INTENSE_DEBUG
|
||||
if (VMADN)
|
||||
{
|
||||
for (unsigned i = 0; i < 8; i++)
|
||||
fprintf(stderr, "ACC LO[%u] = %d\n", i, reinterpret_cast<int16_t*>(acc_lo)[i]);
|
||||
for (unsigned i = 0; i < 8; i++)
|
||||
fprintf(stderr, "ACC MD[%u] = %d\n", i, reinterpret_cast<int16_t*>(acc_md)[i]);
|
||||
for (unsigned i = 0; i < 8; i++)
|
||||
fprintf(stderr, "ACC HI[%u] = %d\n", i, reinterpret_cast<int16_t*>(acc_hi)[i]);
|
||||
for (unsigned i = 0; i < 8; i++)
|
||||
fprintf(stderr, "VS[%u] = %d\n", i, reinterpret_cast<int16_t*>(&vs)[i]);
|
||||
for (unsigned i = 0; i < 8; i++)
|
||||
fprintf(stderr, "VT[%u] = %d\n", i, reinterpret_cast<int16_t*>(&vt)[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
lo = _mm_mullo_epi16(vs, vt);
|
||||
hi = _mm_mulhi_epu16(vs, vt);
|
||||
|
||||
// What we're really want to do is unsigned vs * signed vt.
|
||||
// However, we have no such instructions to do so.
|
||||
//
|
||||
// There's a trick to "fix" an unsigned product, though:
|
||||
// If vt was negative, take the upper 16-bits of the product
|
||||
// and subtract vs.
|
||||
sign = _mm_srai_epi16(vt, 15);
|
||||
vs = _mm_and_si128(vs, sign);
|
||||
hi = _mm_sub_epi16(hi, vs);
|
||||
|
||||
// VMADN
|
||||
if (VMADN) {
|
||||
// Tricky part: start accumulate everything.
|
||||
// Get/keep the carry as we'll add it in later.
|
||||
overflow_mask = _mm_adds_epu16(*acc_lo, lo);
|
||||
*acc_lo = _mm_add_epi16(*acc_lo, lo);
|
||||
|
||||
overflow_mask = _mm_cmpeq_epi16(*acc_lo, overflow_mask);
|
||||
overflow_mask = _mm_cmpeq_epi16(overflow_mask, zero);
|
||||
|
||||
// This is REALLY clever. Since the product results from
|
||||
// two 16-bit components, one positive and one negative,
|
||||
// we don't have to worry about carrying the 1 (we can
|
||||
// only borrow) past 32-bits. So we can just add it here.
|
||||
hi = _mm_sub_epi16(hi, overflow_mask);
|
||||
|
||||
// Check for overflow of the upper sum.
|
||||
overflow_mask = _mm_adds_epu16(*acc_md, hi);
|
||||
*acc_md = _mm_add_epi16(*acc_md, hi);
|
||||
|
||||
overflow_mask = _mm_cmpeq_epi16(*acc_md, overflow_mask);
|
||||
overflow_mask = _mm_cmpeq_epi16(overflow_mask, zero);
|
||||
|
||||
// Finish up the accumulation of the... accumulator.
|
||||
*acc_hi = _mm_add_epi16(*acc_hi, _mm_srai_epi16(hi, 15));
|
||||
*acc_hi = _mm_sub_epi16(*acc_hi, overflow_mask);
|
||||
#ifdef INTENSE_DEBUG
|
||||
auto ret = rsp_uclamp_acc(*acc_lo, *acc_md, *acc_hi, zero);
|
||||
for (unsigned i = 0; i < 8; i++)
|
||||
fprintf(stderr, "VD[%u] = %d\n", i, reinterpret_cast<int16_t*>(&ret)[i]);
|
||||
return ret;
|
||||
#else
|
||||
return rsp_uclamp_acc(*acc_lo, *acc_md, *acc_hi, zero);
|
||||
#endif
|
||||
}
|
||||
|
||||
// VMUDN
|
||||
else {
|
||||
*acc_lo = lo;
|
||||
*acc_md = hi;
|
||||
*acc_hi = _mm_srai_epi16(hi, 15);
|
||||
|
||||
return lo;
|
||||
}
|
||||
}
|
||||
|
16
arch/x86_64/rsp/vor.h
Normal file
16
arch/x86_64/rsp/vor.h
Normal file
@ -0,0 +1,16 @@
|
||||
//
|
||||
// arch/x86_64/rsp/vor.h
|
||||
//
|
||||
// This file is subject to the terms and conditions defined in
|
||||
// 'LICENSE', which is part of this source code package.
|
||||
//
|
||||
|
||||
static inline __m128i rsp_vor(__m128i vs, __m128i vt) {
|
||||
return _mm_or_si128(vs, vt);
|
||||
}
|
||||
|
||||
static inline __m128i rsp_vnor(__m128i vs, __m128i vt) {
|
||||
__m128i vd = _mm_or_si128(vs, vt);
|
||||
return _mm_xor_si128(vd, _mm_set1_epi32(0xffffffffu));
|
||||
}
|
||||
|
79
arch/x86_64/rsp/vrcpsq.h
Normal file
79
arch/x86_64/rsp/vrcpsq.h
Normal file
@ -0,0 +1,79 @@
|
||||
//
|
||||
// arch/x86_64/rsp/vrcpsq.c
|
||||
//
|
||||
// This file is subject to the terms and conditions defined in
|
||||
// 'LICENSE', which is part of this source code package.
|
||||
//
|
||||
|
||||
#include "../../../rsp/reciprocal.h"
|
||||
|
||||
template <bool VRSQ>
|
||||
inline __m128i rsp_vrcp_vrsq(RSP::CPUState *rsp, int dp,
|
||||
unsigned src, unsigned e, unsigned dest, unsigned de) {
|
||||
uint32_t dp_input, sp_input;
|
||||
int32_t input, result;
|
||||
|
||||
int16_t vt;
|
||||
|
||||
int32_t input_mask, data;
|
||||
unsigned shift, idx;
|
||||
|
||||
// Get the element from VT.
|
||||
vt = rsp->cp2.regs[src].e[e & 0x7];
|
||||
|
||||
dp_input = ((uint32_t) rsp->cp2.div_in << 16) | (uint16_t) vt;
|
||||
sp_input = vt;
|
||||
|
||||
input = (dp) ? dp_input : sp_input;
|
||||
input_mask = input >> 31;
|
||||
data = input ^ input_mask;
|
||||
|
||||
if (input > -32768)
|
||||
data -= input_mask;
|
||||
|
||||
// Handle edge cases.
|
||||
if (data == 0)
|
||||
result = 0x7fffFFFFU;
|
||||
|
||||
else if (input == -32768)
|
||||
result = 0xffff0000U;
|
||||
|
||||
// Main case: compute the reciprocal.
|
||||
else {
|
||||
|
||||
// TODO: Clean this up.
|
||||
#ifdef _MSC_VER
|
||||
unsigned long bsf_index;
|
||||
_BitScanReverse(&bsf_index, data);
|
||||
shift = 31 - bsf_index;
|
||||
#else
|
||||
shift = __builtin_clz(data);
|
||||
#endif
|
||||
|
||||
// VRSQ
|
||||
if (VRSQ) {
|
||||
idx = (((unsigned long long) data << shift) & 0x7FC00000U) >> 22;
|
||||
idx = ((idx | 0x200) & 0x3FE) | (shift % 2);
|
||||
result = rsp_reciprocal_rom[idx];
|
||||
|
||||
result = ((0x10000 | result) << 14) >> ((31 - shift) >> 1);
|
||||
}
|
||||
|
||||
// VRCP
|
||||
else {
|
||||
idx = (((unsigned long long) data << shift) & 0x7FC00000U) >> 22;
|
||||
result = rsp_reciprocal_rom[idx];
|
||||
|
||||
result = ((0x10000 | result) << 14) >> (31 - shift);
|
||||
}
|
||||
|
||||
result = result ^ input_mask;
|
||||
}
|
||||
|
||||
// Write out the results.
|
||||
rsp->cp2.div_out = result >> 16;
|
||||
rsp->cp2.regs[dest].e[de & 0x7] = result;
|
||||
|
||||
return rsp_vect_load_unshuffled_operand(rsp->cp2.regs[dest].e);
|
||||
}
|
||||
|
97
arch/x86_64/rsp/vrsq.h
Normal file
97
arch/x86_64/rsp/vrsq.h
Normal file
@ -0,0 +1,97 @@
|
||||
//
|
||||
// arch/x86_64/rsp/vrsq.c
|
||||
//
|
||||
// This file is subject to the terms and conditions defined in
|
||||
// 'LICENSE', which is part of this source code package.
|
||||
//
|
||||
|
||||
#include "../../../rsp/reciprocal.h"
|
||||
|
||||
// Mask table for vrsq(LH) functions.
|
||||
alignas(16) static const uint16_t vrsq_mask_table[8][8] = {
|
||||
{0xffff, 0, 0, 0, 0, 0, 0, 0},
|
||||
{0, 0xffff, 0, 0, 0, 0, 0, 0},
|
||||
{0, 0, 0xffff, 0, 0, 0, 0, 0},
|
||||
{0, 0, 0, 0xffff, 0, 0, 0, 0},
|
||||
{0, 0, 0, 0, 0xffff, 0, 0, 0},
|
||||
{0, 0, 0, 0, 0, 0xffff, 0, 0},
|
||||
{0, 0, 0, 0, 0, 0, 0xffff, 0},
|
||||
{0, 0, 0, 0, 0, 0, 0, 0xffff}
|
||||
};
|
||||
|
||||
inline __m128i rsp_vrsq(RSP::CPUState *rsp, int dp,
|
||||
unsigned src, unsigned e, unsigned dest, unsigned de) {
|
||||
uint32_t dp_input, sp_input;
|
||||
int32_t input, result;
|
||||
int16_t vt;
|
||||
|
||||
int32_t input_mask, data;
|
||||
unsigned shift, idx;
|
||||
|
||||
// Get the element from VT.
|
||||
vt = rsp->cp2.regs[src].e[e & 0x7];
|
||||
|
||||
dp_input = ((uint32_t) rsp->cp2.div_in << 16) | (uint16_t) vt;
|
||||
sp_input = vt;
|
||||
|
||||
input = (dp) ? dp_input : sp_input;
|
||||
input_mask = input >> 31;
|
||||
data = input ^ input_mask;
|
||||
|
||||
if (input > -32768)
|
||||
data -= input_mask;
|
||||
|
||||
// Handle edge cases.
|
||||
if (data == 0)
|
||||
result = 0x7fffFFFFU;
|
||||
|
||||
else if (input == -32768)
|
||||
result = 0xffff0000U;
|
||||
|
||||
// Main case: compute the reciprocal.
|
||||
else {
|
||||
|
||||
//TODO: Clean this up.
|
||||
#ifdef _MSC_VER
|
||||
unsigned long bsf_index;
|
||||
_BitScanReverse(&bsf_index, data);
|
||||
shift = 31 - bsf_index;
|
||||
#else
|
||||
shift = __builtin_clz(data);
|
||||
#endif
|
||||
|
||||
idx = (((unsigned long long) data << shift) & 0x7FC00000U) >> 22;
|
||||
idx = ((idx | 0x200) & 0x3FE) | (shift % 2);
|
||||
result = rsp_reciprocal_rom[idx];
|
||||
|
||||
result = ((0x10000 | result) << 14) >> ((31 - shift) >> 1);
|
||||
result = result ^ input_mask;
|
||||
}
|
||||
|
||||
// Write out the results.
|
||||
rsp->cp2.div_out = result >> 16;
|
||||
rsp->cp2.regs[dest].e[de & 0x7] = result;
|
||||
|
||||
return rsp_vect_load_unshuffled_operand(rsp->cp2.regs[dest].e);
|
||||
}
|
||||
|
||||
inline __m128i rsp_vrsqh(RSP::CPUState *rsp,
|
||||
unsigned src, unsigned e, unsigned dest, unsigned de) {
|
||||
__m128i vd, vd_mask, b_result;
|
||||
|
||||
int16_t elements[8];
|
||||
|
||||
// Get the element from VT.
|
||||
memcpy(elements, rsp->cp2.regs + src, sizeof(elements));
|
||||
rsp->cp2.div_in = elements[e];
|
||||
|
||||
// Write out the upper part of the result.
|
||||
vd_mask = _mm_load_si128((__m128i *) vrsq_mask_table[de]);
|
||||
vd = _mm_load_si128((__m128i *) (rsp->cp2.regs + dest));
|
||||
vd = _mm_andnot_si128(vd_mask, vd);
|
||||
|
||||
b_result = _mm_set1_epi16(rsp->cp2.div_out);
|
||||
b_result = _mm_and_si128(vd_mask, b_result);
|
||||
return _mm_or_si128(b_result, vd);
|
||||
}
|
||||
|
24
arch/x86_64/rsp/vsub.h
Normal file
24
arch/x86_64/rsp/vsub.h
Normal file
@ -0,0 +1,24 @@
|
||||
//
|
||||
// arch/x86_64/rsp/vsub.h
|
||||
//
|
||||
// This file is subject to the terms and conditions defined in
|
||||
// 'LICENSE', which is part of this source code package.
|
||||
//
|
||||
|
||||
static inline __m128i rsp_vsub(__m128i vs, __m128i vt,
|
||||
__m128i carry, __m128i *acc_lo) {
|
||||
__m128i unsat_diff, sat_diff, overflow, vd;
|
||||
|
||||
// acc_lo uses saturated arithmetic.
|
||||
unsat_diff = _mm_sub_epi16(vt, carry);
|
||||
sat_diff = _mm_subs_epi16(vt, carry);
|
||||
|
||||
*acc_lo = _mm_sub_epi16(vs, unsat_diff);
|
||||
vd = _mm_subs_epi16(vs, sat_diff);
|
||||
|
||||
// VD is the signed diff of the two sources and the carry. Since we
|
||||
// have to saturate the diff of all three, we have to be clever.
|
||||
overflow = _mm_cmpgt_epi16(sat_diff, unsat_diff);
|
||||
return _mm_adds_epi16(vd, overflow);
|
||||
}
|
||||
|
21
arch/x86_64/rsp/vsubc.h
Normal file
21
arch/x86_64/rsp/vsubc.h
Normal file
@ -0,0 +1,21 @@
|
||||
//
|
||||
// arch/x86_64/rsp/vsubc.h
|
||||
//
|
||||
// This file is subject to the terms and conditions defined in
|
||||
// 'LICENSE', which is part of this source code package.
|
||||
//
|
||||
|
||||
static inline __m128i rsp_vsubc(__m128i vs, __m128i vt,
|
||||
__m128i zero, __m128i *eq, __m128i *sn) {
|
||||
__m128i equal, sat_udiff, sat_udiff_zero;
|
||||
|
||||
sat_udiff = _mm_subs_epu16(vs, vt);
|
||||
equal = _mm_cmpeq_epi16(vs, vt);
|
||||
sat_udiff_zero = _mm_cmpeq_epi16(sat_udiff, zero);
|
||||
|
||||
*eq = _mm_cmpeq_epi16(equal, zero);
|
||||
*sn = _mm_andnot_si128(equal, sat_udiff_zero);
|
||||
|
||||
return _mm_sub_epi16(vs, vt);
|
||||
}
|
||||
|
16
arch/x86_64/rsp/vxor.h
Normal file
16
arch/x86_64/rsp/vxor.h
Normal file
@ -0,0 +1,16 @@
|
||||
//
|
||||
// arch/x86_64/rsp/vxor.h
|
||||
//
|
||||
// This file is subject to the terms and conditions defined in
|
||||
// 'LICENSE', which is part of this source code package.
|
||||
//
|
||||
|
||||
static inline __m128i rsp_vxor(__m128i vs, __m128i vt) {
|
||||
return _mm_xor_si128(vs, vt);
|
||||
}
|
||||
|
||||
static inline __m128i rsp_vnxor(__m128i vs, __m128i vt) {
|
||||
__m128i vd = _mm_xor_si128(vs, vt);
|
||||
return _mm_xor_si128(vd, _mm_set1_epi32(0xffffffffu));
|
||||
}
|
||||
|
4
debug-toolchain/Makefile
Normal file
4
debug-toolchain/Makefile
Normal file
@ -0,0 +1,4 @@
|
||||
PROGRAM = test
|
||||
MIPS_OBJ = main.o
|
||||
|
||||
include Makefile.mips
|
38
debug-toolchain/Makefile.mips
Normal file
38
debug-toolchain/Makefile.mips
Normal file
@ -0,0 +1,38 @@
|
||||
####
|
||||
## Basic Makefile for RSP-MIPS
|
||||
##
|
||||
######
|
||||
|
||||
TARGET_ELF = $(PROGRAM).elf
|
||||
TARGET_BIN = $(PROGRAM).bin
|
||||
TARGET_GLOBAL_BIN = $(PROGRAM).global.bin
|
||||
MIPS_LD_SCRIPT = rsp-mips.ld
|
||||
|
||||
MIPS_OBJCOPY = mipsel-linux-gnu-objcopy
|
||||
MIPS_CC = mipsel-linux-gnu-gcc
|
||||
MIPS_AS = mipsel-linux-gnu-as
|
||||
MIPS_LD = mipsel-linux-gnu-ld
|
||||
CRT_OBJ = start.o rsp-mips.o
|
||||
|
||||
all: $(TARGET_BIN) $(TARGET_GLOBAL_BIN)
|
||||
|
||||
$(TARGET_BIN): $(TARGET_ELF)
|
||||
$(MIPS_OBJCOPY) -j .text $< $(TARGET_BIN) -O binary
|
||||
|
||||
$(TARGET_GLOBAL_BIN): $(TARGET_ELF)
|
||||
$(MIPS_OBJCOPY) -j .data $< $(TARGET_GLOBAL_BIN) -O binary
|
||||
|
||||
$(TARGET_ELF): $(MIPS_OBJ) $(CRT_OBJ)
|
||||
$(MIPS_LD) -T $(MIPS_LD_SCRIPT) -o $@ $(CRT_OBJ) $(MIPS_OBJ) -EB
|
||||
|
||||
%.o: %.s
|
||||
$(MIPS_AS) -o $@ $< -EB -mabi=eabi -march=mips1
|
||||
|
||||
%.o: %.c rsp-mips.h
|
||||
$(MIPS_CC) -c -o $@ $< -Os -EB -march=mips1 -mabi=eabi -mno-abicalls -std=gnu99 -nostdlib
|
||||
|
||||
clean:
|
||||
rm -f $(MIPS_OBJ) $(TARGET_ELF) $(TARGET_HEX) $(TARGET_GLOBAL_HEX) $(TARGET_BIN) $(TARGET_GLOBAL_BIN)
|
||||
|
||||
.PHONY: all clean tools clean-tools
|
||||
|
8
debug-toolchain/main.c
Normal file
8
debug-toolchain/main.c
Normal file
@ -0,0 +1,8 @@
|
||||
#include "rsp-mips.h"
|
||||
|
||||
u32 data[4] = { 0x10, 0x20, 0x30, 0x40 };
|
||||
|
||||
int main(void)
|
||||
{
|
||||
rsp_debug_break(data[0], data[1], data[2], data[3]);
|
||||
}
|
17
debug-toolchain/rsp-mips.h
Normal file
17
debug-toolchain/rsp-mips.h
Normal file
@ -0,0 +1,17 @@
|
||||
#ifndef __RSP_MIPS_H
|
||||
#define __RSP_MIPS_H
|
||||
|
||||
typedef signed char s8;
|
||||
typedef unsigned char u8;
|
||||
typedef signed short s16;
|
||||
typedef unsigned short u16;
|
||||
typedef signed int s32;
|
||||
typedef unsigned int u32;
|
||||
typedef signed long long s64;
|
||||
typedef unsigned long long u64;
|
||||
typedef u32 size_t;
|
||||
|
||||
void rsp_break(void);
|
||||
void rsp_debug_break(u32 a, u32 b, u32 c, u32 d);
|
||||
|
||||
#endif
|
9
debug-toolchain/rsp-mips.ld
Normal file
9
debug-toolchain/rsp-mips.ld
Normal file
@ -0,0 +1,9 @@
|
||||
SECTIONS {
|
||||
ENTRY(rsp_mips_start)
|
||||
. = 0x00400000;
|
||||
.text : { start.o (.text); * (.text*); }
|
||||
|
||||
. = 0x00700000;
|
||||
.data : { * (.data); * (.rodata*); * (.sbss); * (.scommon); }
|
||||
.bss : { * (.bss); }
|
||||
}
|
20
debug-toolchain/rsp-mips.s
Normal file
20
debug-toolchain/rsp-mips.s
Normal file
@ -0,0 +1,20 @@
|
||||
.text
|
||||
.section .text
|
||||
|
||||
.global rsp_break
|
||||
.ent rsp_break
|
||||
.type rsp_break, @function
|
||||
rsp_break:
|
||||
break
|
||||
jr $ra
|
||||
.end rsp_break
|
||||
.size rsp_break, .-rsp_break
|
||||
|
||||
.global rsp_debug_break
|
||||
.ent rsp_debug_break
|
||||
.type rsp_debug_break, @function
|
||||
rsp_debug_break:
|
||||
break
|
||||
jr $ra
|
||||
.end rsp_debug_break
|
||||
.size rsp_debug_break, .-rsp_debug_break
|
28
debug-toolchain/start.s
Normal file
28
debug-toolchain/start.s
Normal file
@ -0,0 +1,28 @@
|
||||
###
|
||||
##
|
||||
# Entry point and setup for our RSP
|
||||
##
|
||||
|
||||
.equ RAM_SIZE, 4096
|
||||
|
||||
.text
|
||||
.align 2
|
||||
.section .text
|
||||
.global rsp_mips_start
|
||||
.extern main
|
||||
.ent rsp_mips_start
|
||||
.type rsp_mips_start, @function
|
||||
|
||||
rsp_mips_start:
|
||||
nop
|
||||
redo:
|
||||
li $a0, 0 # argc = 0, it will never be used anyways on this stuff. :D
|
||||
li $a1, 0 # argv = 0
|
||||
li $sp, (0x00700000 + RAM_SIZE - 4) # Set up stack.
|
||||
|
||||
jal main
|
||||
j redo
|
||||
|
||||
.end rsp_mips_start
|
||||
.size rsp_mips_start, .-rsp_mips_start
|
||||
|
81
debug_jit.cpp
Normal file
81
debug_jit.cpp
Normal file
@ -0,0 +1,81 @@
|
||||
#include "debug_jit.hpp"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <dlfcn.h>
|
||||
#include <string>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace JIT
|
||||
{
|
||||
|
||||
struct DebugBlock::Impl
|
||||
{
|
||||
Impl() = default;
|
||||
Impl(Impl&&) = delete;
|
||||
void operator=(Impl&&) = delete;
|
||||
~Impl();
|
||||
|
||||
void *dylib = nullptr;
|
||||
Func block = nullptr;
|
||||
string name, soname;
|
||||
|
||||
bool compile(uint64_t hash, const std::string &source);
|
||||
};
|
||||
|
||||
DebugBlock::DebugBlock(const unordered_map<string, uint64_t> &)
|
||||
{}
|
||||
|
||||
DebugBlock::~DebugBlock()
|
||||
{}
|
||||
|
||||
DebugBlock::Impl::~Impl()
|
||||
{
|
||||
if (dylib)
|
||||
dlclose(dylib);
|
||||
|
||||
remove(soname.c_str());
|
||||
//remove(name.c_str());
|
||||
}
|
||||
|
||||
bool DebugBlock::compile(uint64_t hash, const std::string &source)
|
||||
{
|
||||
impl = unique_ptr<Impl>(new Impl);
|
||||
bool ret = impl->compile(hash, source);
|
||||
if (ret)
|
||||
block = impl->block;
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool DebugBlock::Impl::compile(uint64_t hash, const std::string &source)
|
||||
{
|
||||
name = "/tmp/";
|
||||
name += to_string(hash);
|
||||
soname = name;
|
||||
name += ".c";
|
||||
soname += ".so";
|
||||
|
||||
FILE *file = fopen(name.c_str(), "w");
|
||||
if (!file)
|
||||
return false;
|
||||
|
||||
fputs(source.c_str(), file);
|
||||
fclose(file);
|
||||
|
||||
char command[256];
|
||||
sprintf(command, "gcc -o %s %s -shared -fpic -O0 -g -std=c99 -Wl,--unresolved-symbols=ignore-all", soname.c_str(), name.c_str());
|
||||
int ret = system(command);
|
||||
if (ret != 0)
|
||||
return false;
|
||||
|
||||
dylib = dlopen(soname.c_str(), RTLD_LOCAL | RTLD_LAZY);
|
||||
if (!dylib)
|
||||
return false;
|
||||
|
||||
block = reinterpret_cast<Func>(dlsym(dylib, "block_entry"));
|
||||
if (!dylib)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
28
debug_jit.hpp
Normal file
28
debug_jit.hpp
Normal file
@ -0,0 +1,28 @@
|
||||
#ifndef DEBUG_JIT_HPP__
|
||||
#define DEBUG_JIT_HPP__
|
||||
|
||||
#include <memory>
|
||||
#include <stdint.h>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
|
||||
namespace JIT
|
||||
{
|
||||
using Func = void (*)(void *, void *);
|
||||
class DebugBlock
|
||||
{
|
||||
public:
|
||||
DebugBlock(const std::unordered_map<std::string, uint64_t> &symbol_table);
|
||||
~DebugBlock();
|
||||
|
||||
bool compile(uint64_t hash, const std::string &source);
|
||||
Func get_func() const { return block; }
|
||||
|
||||
private:
|
||||
struct Impl;
|
||||
std::unique_ptr<Impl> impl;
|
||||
Func block = nullptr;
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
214
llvm_jit.cpp
Normal file
214
llvm_jit.cpp
Normal file
@ -0,0 +1,214 @@
|
||||
#include "llvm_jit.hpp"
|
||||
|
||||
#include <clang/CodeGen/CodeGenAction.h>
|
||||
#include <clang/Driver/Compilation.h>
|
||||
#include <clang/Driver/Driver.h>
|
||||
#include <clang/Driver/Tool.h>
|
||||
#include <clang/Frontend/CompilerInstance.h>
|
||||
#include <clang/Frontend/CompilerInvocation.h>
|
||||
#include <clang/Frontend/TextDiagnosticPrinter.h>
|
||||
#include <clang/Lex/PreprocessorOptions.h>
|
||||
#include <llvm/ADT/SmallString.h>
|
||||
#include <llvm/ExecutionEngine/ExecutionEngine.h>
|
||||
#include <llvm/ExecutionEngine/MCJIT.h>
|
||||
#include <llvm/ExecutionEngine/JITSymbol.h>
|
||||
#include <llvm/ExecutionEngine/ObjectCache.h>
|
||||
#include <llvm/ExecutionEngine/SectionMemoryManager.h>
|
||||
#include <llvm/ExecutionEngine/RuntimeDyld.h>
|
||||
#include <llvm/IR/IRBuilder.h>
|
||||
#include <llvm/IR/LLVMContext.h>
|
||||
#include <llvm/IR/Module.h>
|
||||
#include <llvm/IRReader/IRReader.h>
|
||||
#include <llvm/Support/FileSystem.h>
|
||||
#include <llvm/Support/Host.h>
|
||||
#include <llvm/Support/ManagedStatic.h>
|
||||
#include <llvm/Support/Path.h>
|
||||
#include <llvm/Support/SourceMgr.h>
|
||||
#include <llvm/Support/TargetSelect.h>
|
||||
#include <llvm/Support/raw_ostream.h>
|
||||
|
||||
#include <llvm/ExecutionEngine/Orc/CompileUtils.h>
|
||||
#include <llvm/ExecutionEngine/Orc/Core.h>
|
||||
#include <llvm/ExecutionEngine/Orc/ExecutionUtils.h>
|
||||
#include <llvm/ExecutionEngine/Orc/IRCompileLayer.h>
|
||||
#include <llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h>
|
||||
|
||||
|
||||
#include <stdio.h>
|
||||
using namespace clang;
|
||||
using namespace std;
|
||||
|
||||
namespace JIT
|
||||
{
|
||||
struct Block::Impl
|
||||
{
|
||||
Impl(LLVMEngine &engine_)
|
||||
: engine(engine_)
|
||||
{}
|
||||
|
||||
LLVMEngine &engine;
|
||||
Func block = nullptr;
|
||||
size_t block_size = 0;
|
||||
bool compile(const std::string &source);
|
||||
};
|
||||
|
||||
Block::Block(LLVMEngine &engine)
|
||||
{
|
||||
impl = std::unique_ptr<Impl>(new Impl(engine));
|
||||
}
|
||||
|
||||
Block::~Block()
|
||||
{
|
||||
}
|
||||
|
||||
struct LLVMHolder
|
||||
{
|
||||
LLVMHolder()
|
||||
{
|
||||
llvm::InitializeNativeTarget();
|
||||
llvm::InitializeNativeTargetAsmPrinter();
|
||||
llvm::InitializeNativeTargetAsmParser();
|
||||
}
|
||||
|
||||
~LLVMHolder()
|
||||
{
|
||||
llvm::llvm_shutdown();
|
||||
}
|
||||
};
|
||||
|
||||
struct LLVMEngine::Impl
|
||||
{
|
||||
Impl(const std::unordered_map<std::string, uint64_t> &symbol_table_)
|
||||
: symbol_table(symbol_table_)
|
||||
{
|
||||
static LLVMHolder llvm_holder;
|
||||
|
||||
execution_session = llvm::make_unique<llvm::orc::ExecutionSession>();
|
||||
execution_session->setErrorReporter([](llvm::Error error) {
|
||||
if (error)
|
||||
llvm::errs() << "Error: " << error << "\n";
|
||||
});
|
||||
|
||||
llvm::orc::LegacyRTDyldObjectLinkingLayer::Resources resources;
|
||||
resources.MemMgr = llvm::make_unique<llvm::SectionMemoryManager>();
|
||||
resources.Resolver = llvm::orc::createLegacyLookupResolver(
|
||||
*execution_session,
|
||||
[this](const std::string &name) -> llvm::JITSymbol {
|
||||
return findSymbol(name);
|
||||
},
|
||||
[](llvm::Error) {});
|
||||
|
||||
object_layer = llvm::make_unique<llvm::orc::LegacyRTDyldObjectLinkingLayer>(*execution_session,
|
||||
[=](llvm::orc::VModuleKey) { return resources; });
|
||||
|
||||
auto host = llvm::orc::JITTargetMachineBuilder::detectHost();
|
||||
target_machine = llvm::cantFail(host->createTargetMachine());
|
||||
target_machine->setOptLevel(llvm::CodeGenOpt::Level::Default);
|
||||
data_layout = llvm::make_unique<llvm::DataLayout>(std::move(*host->getDefaultDataLayoutForTarget()));
|
||||
compile_layer = llvm::make_unique<llvm::orc::LegacyIRCompileLayer<
|
||||
llvm::orc::LegacyRTDyldObjectLinkingLayer, llvm::orc::SimpleCompiler>>(*object_layer, llvm::orc::SimpleCompiler(*target_machine));
|
||||
}
|
||||
|
||||
std::unique_ptr<EmitLLVMOnlyAction> compile_c(const std::string &source)
|
||||
{
|
||||
llvm::SmallVector<const char *, 4> args;
|
||||
args.push_back("__block.c");
|
||||
args.push_back("-std=c99");
|
||||
args.push_back("-O2");
|
||||
|
||||
std::string string_buffer;
|
||||
llvm::raw_string_ostream ss(string_buffer);
|
||||
|
||||
IntrusiveRefCntPtr<DiagnosticOptions> diag_opts = new DiagnosticOptions();
|
||||
TextDiagnosticPrinter *diag_client = new TextDiagnosticPrinter(ss, &*diag_opts);
|
||||
IntrusiveRefCntPtr<DiagnosticIDs> diag_id(new DiagnosticIDs());
|
||||
DiagnosticsEngine diags(diag_id, &*diag_opts, diag_client);
|
||||
|
||||
auto CI = llvm::make_unique<CompilerInvocation>();
|
||||
auto *invocation = CI.get();
|
||||
CompilerInvocation::CreateFromArgs(*CI, args.data(), args.data() + args.size(), diags);
|
||||
|
||||
auto clang = llvm::make_unique<CompilerInstance>();
|
||||
clang->setInvocation(std::move(CI));
|
||||
clang->createDiagnostics();
|
||||
|
||||
auto act = llvm::make_unique<EmitLLVMOnlyAction>();
|
||||
|
||||
StringRef code_data(source);
|
||||
auto buffer = llvm::MemoryBuffer::getMemBufferCopy(code_data);
|
||||
invocation->getPreprocessorOpts().clearRemappedFiles();
|
||||
invocation->getPreprocessorOpts().addRemappedFile("__block.c", buffer.release());
|
||||
|
||||
if (!clang->ExecuteAction(*act))
|
||||
{
|
||||
llvm::errs() << "ExecuteAction failed.\n";
|
||||
return {};
|
||||
}
|
||||
return act;
|
||||
}
|
||||
|
||||
Func compile(const std::string &source)
|
||||
{
|
||||
auto act = compile_c(source);
|
||||
if (!act)
|
||||
return nullptr;
|
||||
|
||||
auto K = execution_session->allocateVModule();
|
||||
auto error = compile_layer->addModule(K, act->takeModule());
|
||||
|
||||
if (error)
|
||||
return nullptr;
|
||||
|
||||
auto entry_point = compile_layer->findSymbolIn(K, "block_entry", true);
|
||||
auto block = reinterpret_cast<Func>(llvm::cantFail(entry_point.getAddress()));
|
||||
return block;
|
||||
}
|
||||
|
||||
llvm::JITSymbol findSymbol(const std::string &name)
|
||||
{
|
||||
auto itr = symbol_table.find(name);
|
||||
if (itr != symbol_table.end())
|
||||
return llvm::JITSymbol(itr->second, llvm::JITSymbolFlags::None);
|
||||
else
|
||||
return llvm::JITSymbol(nullptr);
|
||||
}
|
||||
|
||||
const std::unordered_map<std::string, uint64_t> &symbol_table;
|
||||
|
||||
llvm::LLVMContext context;
|
||||
std::unique_ptr<llvm::orc::ExecutionSession> execution_session;
|
||||
std::unique_ptr<llvm::orc::LegacyRTDyldObjectLinkingLayer> object_layer;
|
||||
std::unique_ptr<llvm::orc::LegacyIRCompileLayer<
|
||||
llvm::orc::LegacyRTDyldObjectLinkingLayer,
|
||||
llvm::orc::SimpleCompiler>> compile_layer;
|
||||
std::unique_ptr<llvm::TargetMachine> target_machine;
|
||||
std::unique_ptr<llvm::orc::MangleAndInterner> mangler;
|
||||
std::unique_ptr<llvm::DataLayout> data_layout;
|
||||
};
|
||||
|
||||
LLVMEngine::LLVMEngine(const std::unordered_map<std::string, uint64_t> &symbol_table)
|
||||
{
|
||||
impl.reset(new Impl(symbol_table));
|
||||
}
|
||||
|
||||
LLVMEngine::~LLVMEngine()
|
||||
{
|
||||
}
|
||||
|
||||
bool Block::compile(uint64_t, const std::string &source)
|
||||
{
|
||||
bool ret = impl->compile(source);
|
||||
if (ret)
|
||||
{
|
||||
block = impl->block;
|
||||
block_size = impl->block_size;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool Block::Impl::compile(const std::string &source)
|
||||
{
|
||||
block = engine.impl->compile(source);
|
||||
return block != nullptr;
|
||||
}
|
||||
}
|
39
llvm_jit.hpp
Normal file
39
llvm_jit.hpp
Normal file
@ -0,0 +1,39 @@
|
||||
#ifndef JIT_HPP
|
||||
#define JIT_HPP
|
||||
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
#include <string>
|
||||
|
||||
namespace JIT
|
||||
{
|
||||
class LLVMEngine
|
||||
{
|
||||
public:
|
||||
LLVMEngine(const std::unordered_map<std::string, uint64_t> &symbol_table);
|
||||
~LLVMEngine();
|
||||
|
||||
private:
|
||||
friend class Block;
|
||||
struct Impl;
|
||||
std::unique_ptr<Impl> impl;
|
||||
};
|
||||
|
||||
using Func = void (*)(void *, void *);
|
||||
class Block
|
||||
{
|
||||
public:
|
||||
Block(LLVMEngine &engine);
|
||||
~Block();
|
||||
bool compile(uint64_t hash, const std::string &source);
|
||||
Func get_func() const { return block; }
|
||||
|
||||
private:
|
||||
struct Impl;
|
||||
std::unique_ptr<Impl> impl;
|
||||
Func block = nullptr;
|
||||
size_t block_size = 0;
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
330
main.cpp
Normal file
330
main.cpp
Normal file
@ -0,0 +1,330 @@
|
||||
#include "rsp.hpp"
|
||||
#include <vector>
|
||||
#include <stdio.h>
|
||||
|
||||
using namespace std;
|
||||
|
||||
static inline uint32_t flip_endian(uint32_t v)
|
||||
{
|
||||
return
|
||||
(v >> 24) |
|
||||
(v << 24) |
|
||||
((v >> 8) & 0x0000ff00) |
|
||||
((v << 8) & 0x00ff0000);
|
||||
}
|
||||
|
||||
static vector<uint32_t> read_binary(const char *path, bool flip)
|
||||
{
|
||||
FILE *f = fopen(path, "rb");
|
||||
if (!f)
|
||||
return {};
|
||||
|
||||
fseek(f, 0, SEEK_END);
|
||||
long len = ftell(f);
|
||||
rewind(f);
|
||||
|
||||
vector<uint32_t> v(len / 4);
|
||||
fread(v.data(), sizeof(uint32_t), v.size(), f);
|
||||
fclose(f);
|
||||
|
||||
if (flip)
|
||||
for (auto &value : v)
|
||||
value = flip_endian(value);
|
||||
return v;
|
||||
}
|
||||
|
||||
static bool read_tag_validate(FILE *file, const char *tag)
|
||||
{
|
||||
char tmp[9] = {};
|
||||
if (fread(tmp, 1, 8, file) != 8)
|
||||
throw runtime_error("Failed to read tag.");
|
||||
|
||||
if (strcmp(tmp, "EOF ") == 0)
|
||||
return false;
|
||||
|
||||
if (strcmp(tmp, tag))
|
||||
throw runtime_error("Unexpected tag.");
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool read_block(FILE *file, const char *tag, void *buffer, size_t size)
|
||||
{
|
||||
if (!read_tag_validate(file, tag))
|
||||
return false;
|
||||
|
||||
uint32_t block_size;
|
||||
if (fread(&block_size, sizeof(block_size), 1, file) != 1)
|
||||
throw runtime_error("EOF");
|
||||
|
||||
if (size != block_size)
|
||||
throw runtime_error("Unexpected size");
|
||||
|
||||
if (fread(buffer, size, 1, file) != 1)
|
||||
throw runtime_error("EOF");
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool read_poke(FILE *file, RSP::CPU &cpu)
|
||||
{
|
||||
char tmp[9] = {};
|
||||
if (fread(tmp, 1, 8, file) != 8)
|
||||
throw runtime_error("Failed to read tag.");
|
||||
|
||||
if (strcmp(tmp, "ENDDMA ") == 0)
|
||||
return false;
|
||||
|
||||
if (strcmp(tmp, "POKE "))
|
||||
throw runtime_error("Unexpected tag.");
|
||||
|
||||
uint32_t offset;
|
||||
uint32_t len;
|
||||
|
||||
if (fread(&offset, sizeof(offset), 1, file) != 1)
|
||||
throw runtime_error("Wrong EOF");
|
||||
if (fread(&len, sizeof(len), 1, file) != 1)
|
||||
throw runtime_error("Wrong EOF");
|
||||
|
||||
if (offset >= 0x1000)
|
||||
{
|
||||
if (fread(reinterpret_cast<uint8_t *>(cpu.get_state().imem) + offset - 0x1000, len, 1, file) != 1)
|
||||
throw runtime_error("Wrong EOF");
|
||||
}
|
||||
else
|
||||
{
|
||||
if (fread(reinterpret_cast<uint8_t *>(cpu.get_state().dmem) + offset, len, 1, file) != 1)
|
||||
throw runtime_error("Wrong EOF");
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void validate_trace(RSP::CPU &cpu, const char *path)
|
||||
{
|
||||
auto &state = cpu.get_state();
|
||||
uint32_t dmem[1024];
|
||||
uint32_t imem[1024];
|
||||
cpu.set_dmem(dmem);
|
||||
cpu.set_imem(imem);
|
||||
|
||||
FILE *file = fopen(path, "rb");
|
||||
if (!file)
|
||||
throw runtime_error("Failed to load trace.");
|
||||
|
||||
try
|
||||
{
|
||||
read_tag_validate(file, "RSPDUMP1");
|
||||
|
||||
unsigned index = 0;
|
||||
|
||||
while (read_tag_validate(file, "BEGIN "))
|
||||
{
|
||||
read_block(file, "DMEM ", state.dmem, 0x1000);
|
||||
read_block(file, "IMEM ", state.imem, 0x1000);
|
||||
read_block(file, "SR32 ", state.sr, sizeof(state.sr));
|
||||
read_block(file, "VR32 ", state.cp2.regs, sizeof(state.cp2.regs));
|
||||
read_block(file, "VLO ", state.cp2.acc.e + RSP::RSP_ACC_LO, sizeof(uint16_t) * 8);
|
||||
read_block(file, "VMD ", state.cp2.acc.e + RSP::RSP_ACC_MD, sizeof(uint16_t) * 8);
|
||||
read_block(file, "VHI ", state.cp2.acc.e + RSP::RSP_ACC_HI, sizeof(uint16_t) * 8);
|
||||
read_block(file, "PC ", &state.pc, sizeof(state.pc));
|
||||
|
||||
int16_t VCO, VCC, VCE;
|
||||
read_block(file, "VCO ", &VCO, sizeof(VCO));
|
||||
read_block(file, "VCC ", &VCC, sizeof(VCC));
|
||||
read_block(file, "VCE ", &VCE, sizeof(VCE));
|
||||
|
||||
rsp_set_flags(state.cp2.flags[RSP::RSP_VCO].e, VCO);
|
||||
rsp_set_flags(state.cp2.flags[RSP::RSP_VCC].e, VCC);
|
||||
rsp_set_flags(state.cp2.flags[RSP::RSP_VCE].e, VCE);
|
||||
|
||||
RSP::ReturnMode mode = RSP::MODE_CONTINUE;
|
||||
do
|
||||
{
|
||||
*state.cp0.cr[RSP::CP0_REGISTER_SP_STATUS] = 0;
|
||||
cpu.invalidate_imem();
|
||||
|
||||
// Run till break.
|
||||
mode = cpu.run();
|
||||
if (mode == RSP::MODE_DMA_READ)
|
||||
{
|
||||
if (!read_tag_validate(file, "BEGINDMA"))
|
||||
throw runtime_error("Expected BEGINDMA.");
|
||||
while (read_poke(file, cpu));
|
||||
}
|
||||
} while (mode != RSP::MODE_BREAK);
|
||||
|
||||
uint32_t dmem[0x1000 >> 2];
|
||||
uint32_t imem[0x1000 >> 2];
|
||||
uint32_t sr[32];
|
||||
uint16_t vr[32 * 8];
|
||||
uint16_t vlo[8];
|
||||
uint16_t vmd[8];
|
||||
uint16_t vhi[8];
|
||||
|
||||
read_block(file, "DMEM END", dmem, sizeof(dmem));
|
||||
read_block(file, "IMEM END", imem, sizeof(imem));
|
||||
read_block(file, "SR32 END", sr, sizeof(sr));
|
||||
read_block(file, "VR32 END", vr, sizeof(vr));
|
||||
read_block(file, "VLO END", vlo, sizeof(vlo));
|
||||
read_block(file, "VMD END", vmd, sizeof(vmd));
|
||||
read_block(file, "VHI END", vhi, sizeof(vhi));
|
||||
read_block(file, "VCO END", &VCO, sizeof(VCO));
|
||||
read_block(file, "VCC END", &VCC, sizeof(VCC));
|
||||
read_block(file, "VCE END", &VCE, sizeof(VCE));
|
||||
|
||||
unsigned errors = 0;
|
||||
|
||||
fprintf(stderr, "==== Trace #%u ====\n", index);
|
||||
|
||||
// Validate DMEM
|
||||
for (unsigned i = 0; i < (0x1000 >> 2); i++)
|
||||
{
|
||||
if (state.dmem[i] != dmem[i])
|
||||
{
|
||||
fprintf(stderr, "DMEM32[0x%03x] fault. Expected 0x%08x, got 0x%08x!\n",
|
||||
i, dmem[i], state.dmem[i]);
|
||||
errors++;
|
||||
}
|
||||
}
|
||||
|
||||
// Validate IMEM (in case of DMA)
|
||||
for (unsigned i = 0; i < (0x1000 >> 2); i++)
|
||||
{
|
||||
if (state.imem[i] != imem[i])
|
||||
{
|
||||
fprintf(stderr, "IMEM32[0x%03x] fault. Expected 0x%08x, got 0x%08x!\n",
|
||||
i, dmem[i], state.dmem[i]);
|
||||
errors++;
|
||||
}
|
||||
}
|
||||
|
||||
// Validate SR
|
||||
for (unsigned i = 0; i < 32; i++)
|
||||
{
|
||||
if (sr[i] != state.sr[i])
|
||||
{
|
||||
fprintf(stderr, "SR[%02u] fault. Expected 0x%08x, got 0x%08x!\n",
|
||||
i, sr[i], state.sr[i]);
|
||||
errors++;
|
||||
}
|
||||
}
|
||||
|
||||
// Validate VR
|
||||
for (unsigned i = 0; i < 16 * 8; i++)
|
||||
{
|
||||
if (vr[i] != state.cp2.regs[i >> 3].e[i & 7])
|
||||
{
|
||||
fprintf(stderr, "VR[%02u][%u] fault. Expected 0x%04x, got 0x%04x!\n",
|
||||
i >> 3, i & 7, vr[i], state.cp2.regs[i >> 3].e[i & 7]);
|
||||
errors++;
|
||||
}
|
||||
}
|
||||
|
||||
// Validate VLO
|
||||
for (unsigned i = 0; i < 8; i++)
|
||||
{
|
||||
if (vlo[i] != state.cp2.acc.e[RSP::RSP_ACC_LO + i])
|
||||
{
|
||||
fprintf(stderr, "VLO[%u] fault. Expected 0x%04x, got 0x%04x!\n",
|
||||
i, vlo[i], state.cp2.acc.e[RSP::RSP_ACC_LO + i]);
|
||||
errors++;
|
||||
}
|
||||
}
|
||||
|
||||
// Validate VMD
|
||||
for (unsigned i = 0; i < 8; i++)
|
||||
{
|
||||
if (vmd[i] != state.cp2.acc.e[RSP::RSP_ACC_MD + i])
|
||||
{
|
||||
fprintf(stderr, "VMD[%u] fault. Expected 0x%04x, got 0x%04x!\n",
|
||||
i, vmd[i], state.cp2.acc.e[RSP::RSP_ACC_MD + i]);
|
||||
errors++;
|
||||
}
|
||||
}
|
||||
|
||||
// Validate VHI
|
||||
for (unsigned i = 0; i < 8; i++)
|
||||
{
|
||||
if (vhi[i] != state.cp2.acc.e[RSP::RSP_ACC_HI + i])
|
||||
{
|
||||
fprintf(stderr, "VHI[%u] fault. Expected 0x%04x, got 0x%04x!\n",
|
||||
i, vhi[i], state.cp2.acc.e[RSP::RSP_ACC_HI + i]);
|
||||
errors++;
|
||||
}
|
||||
}
|
||||
|
||||
// Validate flags
|
||||
if (VCO != rsp_get_flags(state.cp2.flags[RSP::RSP_VCO].e))
|
||||
{
|
||||
fprintf(stderr, "VCO fault. Expected 0x%04x, got 0x%04x!\n",
|
||||
VCO, rsp_get_flags(state.cp2.flags[RSP::RSP_VCO].e));
|
||||
errors++;
|
||||
}
|
||||
|
||||
if (VCC != rsp_get_flags(state.cp2.flags[RSP::RSP_VCC].e))
|
||||
{
|
||||
fprintf(stderr, "VCC fault. Expected 0x%04x, got 0x%04x!\n",
|
||||
VCC, rsp_get_flags(state.cp2.flags[RSP::RSP_VCC].e));
|
||||
errors++;
|
||||
}
|
||||
|
||||
if (VCE != rsp_get_flags(state.cp2.flags[RSP::RSP_VCE].e))
|
||||
{
|
||||
fprintf(stderr, "VCE fault. Expected 0x%04x, got 0x%04x!\n",
|
||||
VCE, rsp_get_flags(state.cp2.flags[RSP::RSP_VCE].e));
|
||||
errors++;
|
||||
}
|
||||
|
||||
read_tag_validate(file, "END ");
|
||||
|
||||
if (errors == 0)
|
||||
fprintf(stderr, "SUCCESS! :D\n");
|
||||
else
|
||||
fprintf(stderr, "%u ERRORS! :{\n", errors);
|
||||
fprintf(stderr, "======================\n\n");
|
||||
|
||||
index++;
|
||||
}
|
||||
}
|
||||
catch (const std::exception &e)
|
||||
{
|
||||
fprintf(stderr, "Exception: %s\n", e.what());
|
||||
}
|
||||
|
||||
fclose(file);
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
RSP::CPU cpu;
|
||||
auto &state = cpu.get_state();
|
||||
|
||||
uint32_t cr[16] = {};
|
||||
for (unsigned i = 0; i < 16; i++)
|
||||
state.cp0.cr[i] = &cr[i];
|
||||
|
||||
if (argc == 3)
|
||||
{
|
||||
auto dmem = read_binary(argv[1], true);
|
||||
auto imem = read_binary(argv[2], true);
|
||||
if (imem.empty())
|
||||
return 1;
|
||||
|
||||
dmem.resize(0x1000);
|
||||
imem.resize(0x1000);
|
||||
cpu.set_dmem(dmem.data());
|
||||
cpu.set_imem(imem.data());
|
||||
|
||||
for (unsigned i = 0; i < 1; i++)
|
||||
{
|
||||
cpu.invalidate_imem();
|
||||
cr[RSP::CP0_REGISTER_SP_STATUS] = 0;
|
||||
cpu.run();
|
||||
}
|
||||
}
|
||||
else if (argc == 2)
|
||||
validate_trace(cpu, argv[1]);
|
||||
else
|
||||
return 1;
|
||||
}
|
146
parallel.cpp
Normal file
146
parallel.cpp
Normal file
@ -0,0 +1,146 @@
|
||||
#include <stdint.h>
|
||||
#include "rsp.hpp"
|
||||
|
||||
#include "rsp_1.1.h"
|
||||
#include "m64p_plugin.h"
|
||||
|
||||
#define RSP_PARALLEL_VERSION 0x0101
|
||||
#define RSP_PLUGIN_API_VERSION 0x020000
|
||||
|
||||
namespace RSP
|
||||
{
|
||||
RSP_INFO rsp;
|
||||
CPU cpu;
|
||||
short MFC0_count[32];
|
||||
int SP_STATUS_TIMEOUT;
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
|
||||
#ifdef INTENSE_DEBUG
|
||||
// Need super-fast hash here.
|
||||
static uint64_t hash_imem(const uint8_t *data, size_t size)
|
||||
{
|
||||
uint64_t h = 0xcbf29ce484222325ull;
|
||||
size_t i;
|
||||
for (i = 0; i < size; i++)
|
||||
h = (h * 0x100000001b3ull) ^ data[i];
|
||||
return h;
|
||||
}
|
||||
|
||||
void log_rsp_mem_parallel(void)
|
||||
{
|
||||
fprintf(stderr, "IMEM HASH: 0x%016llx\n", hash_imem(RSP::rsp.IMEM, 0x1000));
|
||||
fprintf(stderr, "DMEM HASH: 0x%016llx\n", hash_imem(RSP::rsp.DMEM, 0x1000));
|
||||
}
|
||||
#endif
|
||||
|
||||
EXPORT unsigned int CALL parallelRSPDoRspCycles(unsigned int cycles)
|
||||
{
|
||||
if (*RSP::rsp.SP_STATUS_REG & (SP_STATUS_HALT | SP_STATUS_BROKE))
|
||||
return 0;
|
||||
|
||||
// We don't know if Mupen from the outside invalidated our IMEM.
|
||||
RSP::cpu.invalidate_imem();
|
||||
|
||||
// Run CPU until we either break or we need to fire an IRQ.
|
||||
RSP::cpu.get_state().pc = *RSP::rsp.SP_PC_REG & 0xfff;
|
||||
|
||||
#ifdef INTENSE_DEBUG
|
||||
fprintf(stderr, "RUN TASK: %u\n", RSP::cpu.get_state().pc);
|
||||
log_rsp_mem_parallel();
|
||||
#endif
|
||||
|
||||
for (auto &count : RSP::MFC0_count)
|
||||
count = 0;
|
||||
|
||||
while (!(*RSP::rsp.SP_STATUS_REG & SP_STATUS_HALT))
|
||||
{
|
||||
auto mode = RSP::cpu.run();
|
||||
if (mode == RSP::MODE_CHECK_FLAGS && (*RSP::cpu.get_state().cp0.irq & 1))
|
||||
break;
|
||||
}
|
||||
|
||||
*RSP::rsp.SP_PC_REG = 0x04001000 | (RSP::cpu.get_state().pc & 0xffc);
|
||||
|
||||
// From CXD4.
|
||||
if (*RSP::rsp.SP_STATUS_REG & SP_STATUS_BROKE)
|
||||
return cycles;
|
||||
else if (*RSP::cpu.get_state().cp0.irq & 1)
|
||||
RSP::rsp.CheckInterrupts();
|
||||
else if (*RSP::rsp.SP_SEMAPHORE_REG != 0) // Semaphore lock fixes.
|
||||
{}
|
||||
else
|
||||
RSP::SP_STATUS_TIMEOUT = 16; // From now on, wait 16 times, not 0x7fff
|
||||
|
||||
// CPU restarts with the correct SIGs.
|
||||
*RSP::rsp.SP_STATUS_REG &= ~SP_STATUS_HALT;
|
||||
|
||||
return cycles;
|
||||
}
|
||||
|
||||
EXPORT m64p_error CALL parallelRSPPluginGetVersion(m64p_plugin_type *PluginType, int *PluginVersion, int *APIVersion, const char **PluginNamePtr, int *Capabilities)
|
||||
{
|
||||
/* set version info */
|
||||
if (PluginType != NULL)
|
||||
*PluginType = M64PLUGIN_RSP;
|
||||
|
||||
if (PluginVersion != NULL)
|
||||
*PluginVersion = RSP_PARALLEL_VERSION;
|
||||
|
||||
if (APIVersion != NULL)
|
||||
*APIVersion = RSP_PLUGIN_API_VERSION;
|
||||
|
||||
if (Capabilities != NULL)
|
||||
*Capabilities = 0;
|
||||
|
||||
return M64ERR_SUCCESS;
|
||||
}
|
||||
|
||||
EXPORT void CALL parallelRSPRomClosed(void)
|
||||
{
|
||||
*RSP::rsp.SP_PC_REG = 0x00000000;
|
||||
}
|
||||
|
||||
EXPORT void CALL parallelRSPInitiateRSP(RSP_INFO Rsp_Info, unsigned int *CycleCount)
|
||||
{
|
||||
if (CycleCount)
|
||||
*CycleCount = 0;
|
||||
|
||||
if (Rsp_Info.DMEM == Rsp_Info.IMEM) /* usually dummy RSP data for testing */
|
||||
return; /* DMA is not executed just because plugin initiates. */
|
||||
|
||||
RSP::rsp = Rsp_Info;
|
||||
*RSP::rsp.SP_PC_REG = 0x04001000 & 0x00000FFF; /* task init bug on Mupen64 */
|
||||
|
||||
auto **cr = RSP::cpu.get_state().cp0.cr;
|
||||
cr[0x0] = RSP::rsp.SP_MEM_ADDR_REG;
|
||||
cr[0x1] = RSP::rsp.SP_DRAM_ADDR_REG;
|
||||
cr[0x2] = RSP::rsp.SP_RD_LEN_REG;
|
||||
cr[0x3] = RSP::rsp.SP_WR_LEN_REG;
|
||||
cr[0x4] = RSP::rsp.SP_STATUS_REG;
|
||||
cr[0x5] = RSP::rsp.SP_DMA_FULL_REG;
|
||||
cr[0x6] = RSP::rsp.SP_DMA_BUSY_REG;
|
||||
cr[0x7] = RSP::rsp.SP_SEMAPHORE_REG;
|
||||
cr[0x8] = RSP::rsp.DPC_START_REG;
|
||||
cr[0x9] = RSP::rsp.DPC_END_REG;
|
||||
cr[0xA] = RSP::rsp.DPC_CURRENT_REG;
|
||||
cr[0xB] = RSP::rsp.DPC_STATUS_REG;
|
||||
cr[0xC] = RSP::rsp.DPC_CLOCK_REG;
|
||||
cr[0xD] = RSP::rsp.DPC_BUFBUSY_REG;
|
||||
cr[0xE] = RSP::rsp.DPC_PIPEBUSY_REG;
|
||||
cr[0xF] = RSP::rsp.DPC_TMEM_REG;
|
||||
|
||||
*cr[RSP::CP0_REGISTER_SP_STATUS] = SP_STATUS_HALT;
|
||||
RSP::cpu.get_state().cp0.irq = RSP::rsp.MI_INTR_REG;
|
||||
|
||||
// From CXD4.
|
||||
RSP::SP_STATUS_TIMEOUT = 0x7fff;
|
||||
|
||||
RSP::cpu.set_dmem(reinterpret_cast<uint32_t*>(Rsp_Info.DMEM));
|
||||
RSP::cpu.set_imem(reinterpret_cast<uint32_t*>(Rsp_Info.IMEM));
|
||||
RSP::cpu.set_rdram(reinterpret_cast<uint32_t*>(Rsp_Info.RDRAM));
|
||||
}
|
||||
|
||||
}
|
||||
|
113
rsp.hpp
Normal file
113
rsp.hpp
Normal file
@ -0,0 +1,113 @@
|
||||
#ifndef RSP_HPP__
|
||||
#define RSP_HPP__
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <unordered_map>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "state.hpp"
|
||||
#include "llvm_jit.hpp"
|
||||
#include "debug_jit.hpp"
|
||||
#include "rsp_op.hpp"
|
||||
|
||||
#include <setjmp.h>
|
||||
|
||||
namespace RSP
|
||||
{
|
||||
#ifdef DEBUG_JIT
|
||||
using Block = JIT::DebugBlock;
|
||||
#else
|
||||
using Block = JIT::Block;
|
||||
#endif
|
||||
using Func = JIT::Func;
|
||||
|
||||
enum ReturnMode
|
||||
{
|
||||
MODE_ENTER = 0,
|
||||
MODE_CONTINUE = 1,
|
||||
MODE_BREAK = 2,
|
||||
MODE_DMA_READ = 3,
|
||||
MODE_CHECK_FLAGS = 4
|
||||
};
|
||||
|
||||
class alignas(64) CPU
|
||||
{
|
||||
public:
|
||||
CPU();
|
||||
~CPU();
|
||||
|
||||
CPU(CPU&&) = delete;
|
||||
void operator=(CPU&&) = delete;
|
||||
|
||||
void set_dmem(uint32_t *dmem)
|
||||
{
|
||||
state.dmem = dmem;
|
||||
}
|
||||
|
||||
void set_imem(uint32_t *imem)
|
||||
{
|
||||
state.imem = imem;
|
||||
}
|
||||
|
||||
void set_rdram(uint32_t *rdram)
|
||||
{
|
||||
state.rdram = rdram;
|
||||
}
|
||||
|
||||
void invalidate_imem();
|
||||
|
||||
CPUState &get_state()
|
||||
{
|
||||
return state;
|
||||
}
|
||||
|
||||
ReturnMode run();
|
||||
|
||||
void enter(uint32_t pc);
|
||||
void call(uint32_t target, uint32_t ret);
|
||||
int ret(uint32_t pc);
|
||||
void exit(ReturnMode mode);
|
||||
|
||||
void print_registers();
|
||||
|
||||
private:
|
||||
CPUState state;
|
||||
Func blocks[IMEM_WORDS] = {};
|
||||
std::unordered_map<std::string, uint64_t> symbol_table;
|
||||
#ifndef DEBUG_JIT
|
||||
JIT::LLVMEngine jit_engine;
|
||||
#endif
|
||||
std::unordered_map<uint64_t, std::unique_ptr<Block>> cached_blocks[IMEM_WORDS];
|
||||
|
||||
void invalidate_code();
|
||||
uint64_t hash_imem(unsigned pc, unsigned count) const;
|
||||
Func jit_region(uint64_t hash, unsigned pc, unsigned count);
|
||||
|
||||
std::string full_code;
|
||||
std::string body;
|
||||
|
||||
|
||||
void init_symbol_table();
|
||||
|
||||
alignas(64) uint32_t cached_imem[IMEM_WORDS] = {};
|
||||
|
||||
// Platform specific.
|
||||
#ifdef __GNUC__
|
||||
intptr_t env[64];
|
||||
// We're reading this after setjmp returns so need to make sure the read happens when we expect it to.
|
||||
volatile ReturnMode return_mode;
|
||||
#else
|
||||
#error "Need __builtin_setjmp/longjmp support alternative for other compilers ..."
|
||||
#endif
|
||||
|
||||
#define CALL_STACK_SIZE 32
|
||||
uint32_t call_stack[CALL_STACK_SIZE] = {};
|
||||
unsigned call_stack_ptr = 0;
|
||||
|
||||
unsigned analyze_static_end(unsigned pc, unsigned end);
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
331
rsp/cp0.cpp
Normal file
331
rsp/cp0.cpp
Normal file
@ -0,0 +1,331 @@
|
||||
#include "../rsp.hpp"
|
||||
#include "../state.hpp"
|
||||
|
||||
#ifdef PARALLEL_INTEGRATION
|
||||
#include "../Rsp_#1.1.h"
|
||||
#include "m64p_plugin.h"
|
||||
namespace RSP
|
||||
{
|
||||
extern RSP_INFO rsp;
|
||||
extern short MFC0_count[32];
|
||||
extern int SP_STATUS_TIMEOUT;
|
||||
}
|
||||
#endif
|
||||
|
||||
using namespace RSP;
|
||||
|
||||
extern "C" {
|
||||
|
||||
#ifdef INTENSE_DEBUG
|
||||
void log_rsp_mem_parallel(void);
|
||||
#endif
|
||||
|
||||
int RSP_MFC0(RSP::CPUState *rsp, unsigned rt, unsigned rd)
|
||||
{
|
||||
rd &= 15;
|
||||
uint32_t res = *rsp->cp0.cr[rd];
|
||||
if (rt)
|
||||
rsp->sr[rt] = res;
|
||||
|
||||
// CFG_MEND_SEMAPHORE_LOCK == 0 by default,
|
||||
// so don't bother implementing semaphores.
|
||||
// It makes Mario Golf run terribly for some reason.
|
||||
|
||||
#ifdef PARALLEL_INTEGRATION
|
||||
// WAIT_FOR_CPU_HOST. From CXD4.
|
||||
if (rd == CP0_REGISTER_SP_STATUS)
|
||||
{
|
||||
RSP::MFC0_count[rt] += 1;
|
||||
if (RSP::MFC0_count[rt] >= RSP::SP_STATUS_TIMEOUT)
|
||||
{
|
||||
*RSP::rsp.SP_STATUS_REG |= SP_STATUS_HALT;
|
||||
return MODE_CHECK_FLAGS;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
//if (rd == 4) // SP_STATUS_REG
|
||||
// fprintf(stderr, "READING STATUS REG!\n");
|
||||
|
||||
return MODE_CONTINUE;
|
||||
}
|
||||
|
||||
static inline int rsp_status_write(RSP::CPUState *rsp, uint32_t rt)
|
||||
{
|
||||
//fprintf(stderr, "Writing 0x%x to status reg!\n", rt);
|
||||
|
||||
uint32_t status = *rsp->cp0.cr[CP0_REGISTER_SP_STATUS];
|
||||
|
||||
if (rt & SP_CLR_HALT)
|
||||
status &= ~SP_STATUS_HALT;
|
||||
else if (rt & SP_SET_HALT)
|
||||
status |= SP_STATUS_HALT;
|
||||
|
||||
if (rt & SP_CLR_BROKE)
|
||||
status &= ~SP_STATUS_BROKE;
|
||||
|
||||
if (rt & SP_CLR_INTR)
|
||||
*rsp->cp0.irq &= ~1;
|
||||
else if (rt & SP_SET_INTR)
|
||||
*rsp->cp0.irq |= 1;
|
||||
|
||||
if (rt & SP_CLR_SSTEP)
|
||||
status &= ~SP_STATUS_SSTEP;
|
||||
else if (rt & SP_SET_SSTEP)
|
||||
status |= SP_STATUS_SSTEP;
|
||||
|
||||
if (rt & SP_CLR_INTR_BREAK)
|
||||
status &= ~SP_STATUS_INTR_BREAK;
|
||||
else if (rt & SP_SET_INTR_BREAK)
|
||||
status |= SP_STATUS_INTR_BREAK;
|
||||
|
||||
if (rt & SP_CLR_SIG0)
|
||||
status &= ~SP_STATUS_SIG0;
|
||||
else if (rt & SP_SET_SIG0)
|
||||
status |= SP_STATUS_SIG0;
|
||||
|
||||
if (rt & SP_CLR_SIG1)
|
||||
status &= ~SP_STATUS_SIG1;
|
||||
else if (rt & SP_SET_SIG1)
|
||||
status |= SP_STATUS_SIG1;
|
||||
|
||||
if (rt & SP_CLR_SIG2)
|
||||
status &= ~SP_STATUS_SIG2;
|
||||
else if (rt & SP_SET_SIG2)
|
||||
status |= SP_STATUS_SIG2;
|
||||
|
||||
if (rt & SP_CLR_SIG3)
|
||||
status &= ~SP_STATUS_SIG3;
|
||||
else if (rt & SP_SET_SIG3)
|
||||
status |= SP_STATUS_SIG3;
|
||||
|
||||
if (rt & SP_CLR_SIG4)
|
||||
status &= ~SP_STATUS_SIG4;
|
||||
else if (rt & SP_SET_SIG4)
|
||||
status |= SP_STATUS_SIG4;
|
||||
|
||||
if (rt & SP_CLR_SIG5)
|
||||
status &= ~SP_STATUS_SIG5;
|
||||
else if (rt & SP_SET_SIG5)
|
||||
status |= SP_STATUS_SIG5;
|
||||
|
||||
if (rt & SP_CLR_SIG6)
|
||||
status &= ~SP_STATUS_SIG6;
|
||||
else if (rt & SP_SET_SIG6)
|
||||
status |= SP_STATUS_SIG6;
|
||||
|
||||
if (rt & SP_CLR_SIG7)
|
||||
status &= ~SP_STATUS_SIG7;
|
||||
else if (rt & SP_SET_SIG7)
|
||||
status |= SP_STATUS_SIG7;
|
||||
|
||||
*rsp->cp0.cr[CP0_REGISTER_SP_STATUS] = status;
|
||||
return ((*rsp->cp0.irq & 1) || (status & SP_STATUS_HALT)) ? MODE_CHECK_FLAGS : MODE_CONTINUE;
|
||||
}
|
||||
|
||||
#ifdef PARALLEL_INTEGRATION
|
||||
static int rsp_dma_read(RSP::CPUState *rsp)
|
||||
{
|
||||
uint32_t length_reg = *rsp->cp0.cr[CP0_REGISTER_DMA_READ_LENGTH];
|
||||
uint32_t length = (length_reg & 0xFFF) + 1;
|
||||
uint32_t skip = (length_reg >> 20) & 0xFFF;
|
||||
unsigned count = (length_reg >> 12) & 0xFF;
|
||||
|
||||
// Force alignment.
|
||||
length = (length + 0x7) & ~0x7;
|
||||
*rsp->cp0.cr[CP0_REGISTER_DMA_CACHE] &= ~0x3;
|
||||
*rsp->cp0.cr[CP0_REGISTER_DMA_DRAM] &= ~0x7;
|
||||
|
||||
// Check length.
|
||||
if (((*rsp->cp0.cr[CP0_REGISTER_DMA_CACHE] & 0xFFF) + length) > 0x1000)
|
||||
length = 0x1000 - (*rsp->cp0.cr[CP0_REGISTER_DMA_CACHE] & 0xFFF);
|
||||
|
||||
unsigned i = 0;
|
||||
uint32_t source = *rsp->cp0.cr[CP0_REGISTER_DMA_DRAM];
|
||||
uint32_t dest = *rsp->cp0.cr[CP0_REGISTER_DMA_CACHE];
|
||||
|
||||
#ifdef INTENSE_DEBUG
|
||||
fprintf(stderr, "DMA READ: (0x%x <- 0x%x) len %u, count %u, skip %u\n",
|
||||
dest & 0x1ffc, source & 0x7ffffc,
|
||||
length, count + 1, skip);
|
||||
#endif
|
||||
|
||||
do
|
||||
{
|
||||
unsigned j = 0;
|
||||
do
|
||||
{
|
||||
uint32_t source_addr = (source + j) & 0x7FFFFC;
|
||||
uint32_t dest_addr = (dest + j) & 0x1FFC;
|
||||
uint32_t word = rsp->rdram[source_addr >> 2];
|
||||
|
||||
if (dest_addr & 0x1000)
|
||||
{
|
||||
// Invalidate IMEM.
|
||||
unsigned block = (dest_addr & 0xfff) / CODE_BLOCK_SIZE;
|
||||
rsp->dirty_blocks |= (0x3 << block) >> 1;
|
||||
//rsp->dirty_blocks = ~0u;
|
||||
rsp->imem[(dest_addr & 0xfff) >> 2] = word;
|
||||
}
|
||||
else
|
||||
rsp->dmem[dest_addr >> 2] = word;
|
||||
|
||||
j += 4;
|
||||
} while (j < length);
|
||||
|
||||
source += length + skip;
|
||||
dest += length;
|
||||
} while (++i <= count);
|
||||
|
||||
*rsp->cp0.cr[CP0_REGISTER_DMA_DRAM] = source;
|
||||
*rsp->cp0.cr[CP0_REGISTER_DMA_CACHE] = dest;
|
||||
|
||||
#ifdef INTENSE_DEBUG
|
||||
log_rsp_mem_parallel();
|
||||
#endif
|
||||
return rsp->dirty_blocks ? MODE_CHECK_FLAGS : MODE_CONTINUE;
|
||||
}
|
||||
|
||||
static void rsp_dma_write(RSP::CPUState *rsp)
|
||||
{
|
||||
uint32_t length_reg = *rsp->cp0.cr[CP0_REGISTER_DMA_WRITE_LENGTH];
|
||||
uint32_t length = (length_reg & 0xFFF) + 1;
|
||||
uint32_t skip = (length_reg >> 20) & 0xFFF;
|
||||
unsigned count = (length_reg >> 12) & 0xFF;
|
||||
|
||||
// Force alignment.
|
||||
length = (length + 0x7) & ~0x7;
|
||||
*rsp->cp0.cr[CP0_REGISTER_DMA_CACHE] &= ~0x3;
|
||||
*rsp->cp0.cr[CP0_REGISTER_DMA_DRAM] &= ~0x7;
|
||||
|
||||
// Check length.
|
||||
if (((*rsp->cp0.cr[CP0_REGISTER_DMA_CACHE] & 0xFFF) + length) > 0x1000)
|
||||
length = 0x1000 - (*rsp->cp0.cr[CP0_REGISTER_DMA_CACHE] & 0xFFF);
|
||||
|
||||
uint32_t dest = *rsp->cp0.cr[CP0_REGISTER_DMA_DRAM];
|
||||
uint32_t source = *rsp->cp0.cr[CP0_REGISTER_DMA_CACHE];
|
||||
|
||||
#ifdef INTENSE_DEBUG
|
||||
fprintf(stderr, "DMA WRITE: (0x%x <- 0x%x) len %u, count %u, skip %u\n",
|
||||
dest & 0x7ffffc, source & 0x1ffc,
|
||||
length, count + 1, skip);
|
||||
#endif
|
||||
|
||||
unsigned i = 0;
|
||||
do
|
||||
{
|
||||
unsigned j = 0;
|
||||
|
||||
do
|
||||
{
|
||||
uint32_t source_addr = (source + j) & 0x1FFC;
|
||||
uint32_t dest_addr = (dest + j) & 0x7FFFFC;
|
||||
|
||||
rsp->rdram[dest_addr >> 2] = (source_addr & 0x1000) ?
|
||||
rsp->imem[(source_addr & 0xfff) >> 2] :
|
||||
rsp->dmem[source_addr >> 2];
|
||||
|
||||
j += 4;
|
||||
} while (j < length);
|
||||
|
||||
source += length;
|
||||
dest += length + skip;
|
||||
} while (++i <= count);
|
||||
|
||||
*rsp->cp0.cr[CP0_REGISTER_DMA_CACHE] = source;
|
||||
*rsp->cp0.cr[CP0_REGISTER_DMA_DRAM] = dest;
|
||||
#ifdef INTENSE_DEBUG
|
||||
log_rsp_mem_parallel();
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
int RSP_MTC0(RSP::CPUState *rsp, unsigned rd, unsigned rt)
|
||||
{
|
||||
uint32_t val = rsp->sr[rt];
|
||||
|
||||
switch (static_cast<CP0Registers>(rd & 15))
|
||||
{
|
||||
case CP0_REGISTER_DMA_CACHE:
|
||||
*rsp->cp0.cr[CP0_REGISTER_DMA_CACHE] = val & 0x1fff;
|
||||
break;
|
||||
|
||||
case CP0_REGISTER_DMA_DRAM:
|
||||
*rsp->cp0.cr[CP0_REGISTER_DMA_DRAM] = val & 0xffffff;
|
||||
break;
|
||||
|
||||
case CP0_REGISTER_DMA_READ_LENGTH:
|
||||
*rsp->cp0.cr[CP0_REGISTER_DMA_READ_LENGTH] = val;
|
||||
#ifdef PARALLEL_INTEGRATION
|
||||
return rsp_dma_read(rsp);
|
||||
#else
|
||||
return MODE_DMA_READ;
|
||||
#endif
|
||||
|
||||
case CP0_REGISTER_DMA_WRITE_LENGTH:
|
||||
*rsp->cp0.cr[CP0_REGISTER_DMA_WRITE_LENGTH] = val;
|
||||
#ifdef PARALLEL_INTEGRATION
|
||||
rsp_dma_write(rsp);
|
||||
#endif
|
||||
break;
|
||||
|
||||
case CP0_REGISTER_SP_STATUS:
|
||||
return rsp_status_write(rsp, val);
|
||||
|
||||
case CP0_REGISTER_SP_RESERVED:
|
||||
// CXD4 forces this to 0.
|
||||
*rsp->cp0.cr[CP0_REGISTER_SP_RESERVED] = 0;
|
||||
break;
|
||||
|
||||
case CP0_REGISTER_CMD_START:
|
||||
#ifdef INTENSE_DEBUG
|
||||
fprintf(stderr, "CMD_START 0x%x\n", val & 0xfffffff8u);
|
||||
#endif
|
||||
*rsp->cp0.cr[CP0_REGISTER_CMD_START] =
|
||||
*rsp->cp0.cr[CP0_REGISTER_CMD_CURRENT] =
|
||||
*rsp->cp0.cr[CP0_REGISTER_CMD_END] =
|
||||
val & 0xfffffff8u;
|
||||
break;
|
||||
|
||||
case CP0_REGISTER_CMD_END:
|
||||
#ifdef INTENSE_DEBUG
|
||||
fprintf(stderr, "CMD_END 0x%x\n", val & 0xfffffff8u);
|
||||
#endif
|
||||
*rsp->cp0.cr[CP0_REGISTER_CMD_END] = val & 0xfffffff8u;
|
||||
|
||||
#ifdef PARALLEL_INTEGRATION
|
||||
RSP::rsp.ProcessRdpList();
|
||||
#endif
|
||||
break;
|
||||
|
||||
case CP0_REGISTER_CMD_CLOCK:
|
||||
fprintf(stderr, "CMD_CLOCK");
|
||||
*rsp->cp0.cr[CP0_REGISTER_CMD_CLOCK] = val;
|
||||
break;
|
||||
|
||||
case CP0_REGISTER_CMD_STATUS:
|
||||
*rsp->cp0.cr[CP0_REGISTER_CMD_STATUS] &= ~(!!(val & 0x1) << 0);
|
||||
*rsp->cp0.cr[CP0_REGISTER_CMD_STATUS] |= (!!(val & 0x2) << 0);
|
||||
*rsp->cp0.cr[CP0_REGISTER_CMD_STATUS] &= ~(!!(val & 0x4) << 1);
|
||||
*rsp->cp0.cr[CP0_REGISTER_CMD_STATUS] |= (!!(val & 0x8) << 1);
|
||||
*rsp->cp0.cr[CP0_REGISTER_CMD_STATUS] &= ~(!!(val & 0x10) << 2);
|
||||
*rsp->cp0.cr[CP0_REGISTER_CMD_STATUS] |= (!!(val & 0x20) << 2);
|
||||
*rsp->cp0.cr[CP0_REGISTER_CMD_TMEM_BUSY] &= !(val & 0x40) * -1;
|
||||
*rsp->cp0.cr[CP0_REGISTER_CMD_CLOCK] &= !(val & 0x200) * -1;
|
||||
break;
|
||||
|
||||
case CP0_REGISTER_CMD_CURRENT:
|
||||
case CP0_REGISTER_CMD_BUSY:
|
||||
case CP0_REGISTER_CMD_PIPE_BUSY:
|
||||
case CP0_REGISTER_CMD_TMEM_BUSY:
|
||||
break;
|
||||
|
||||
default:
|
||||
*rsp->cp0.cr[rd & 15] = val;
|
||||
break;
|
||||
}
|
||||
|
||||
return MODE_CONTINUE;
|
||||
}
|
||||
|
||||
}
|
71
rsp/cp2.cpp
Normal file
71
rsp/cp2.cpp
Normal file
@ -0,0 +1,71 @@
|
||||
#include "../rsp.hpp"
|
||||
|
||||
extern "C" {
|
||||
|
||||
void RSP_CFC2(RSP::CPUState *rsp, unsigned rt, unsigned rd)
|
||||
{
|
||||
unsigned src = rd & 3;
|
||||
if (src == 3)
|
||||
src = 2;
|
||||
|
||||
int16_t res = rsp_get_flags(rsp->cp2.flags[src].e);
|
||||
if (rt)
|
||||
rsp->sr[rt] = res;
|
||||
}
|
||||
|
||||
void RSP_CTC2(RSP::CPUState *rsp, unsigned rt, unsigned rd)
|
||||
{
|
||||
rt = rsp->sr[rt] & 0xffff;
|
||||
|
||||
unsigned dst = rd & 3;
|
||||
if (dst >= 2)
|
||||
{
|
||||
rt &= 0xff;
|
||||
dst = 2;
|
||||
}
|
||||
rsp_set_flags(rsp->cp2.flags[dst].e, rt);
|
||||
}
|
||||
|
||||
void RSP_MTC2(RSP::CPUState *rsp, unsigned rt, unsigned rd, unsigned element)
|
||||
{
|
||||
uint16_t *e = rsp->cp2.regs[rd].e;
|
||||
|
||||
#ifdef INTENSE_DEBUG
|
||||
fprintf(stderr, "MTC2, rt = %u, [rt] = 0x%x, rd = %u, e = %u\n",
|
||||
rt, rsp->sr[rt], rd, element);
|
||||
#endif
|
||||
|
||||
unsigned lo = element >> 1;
|
||||
rt = rsp->sr[rt];
|
||||
|
||||
if (element & 1)
|
||||
{
|
||||
unsigned hi = (element + 1) >> 1;
|
||||
e[lo] = (e[lo] & 0xff00) | ((rt >> 8) & 0xff);
|
||||
e[hi] = (e[lo] & 0x00ff) | ((rt & 0xff) << 8);
|
||||
}
|
||||
else
|
||||
e[lo] = rt;
|
||||
}
|
||||
|
||||
void RSP_MFC2(RSP::CPUState *rsp, unsigned rt, unsigned rd, unsigned element)
|
||||
{
|
||||
if (rt == 0)
|
||||
return;
|
||||
|
||||
const uint16_t *e = rsp->cp2.regs[rd].e;
|
||||
unsigned lo = element >> 1;
|
||||
|
||||
if (element & 1)
|
||||
{
|
||||
unsigned hi = ((element + 1) >> 1) & 7;
|
||||
uint16_t high = e[lo] << 8;
|
||||
uint8_t low = e[hi] >> 8;
|
||||
rsp->sr[rt] = int16_t(high | low);
|
||||
}
|
||||
else
|
||||
rsp->sr[rt] = int16_t(e[lo]);
|
||||
}
|
||||
|
||||
}
|
||||
|
380
rsp/ls.cpp
Normal file
380
rsp/ls.cpp
Normal file
@ -0,0 +1,380 @@
|
||||
#include "../rsp.hpp"
|
||||
|
||||
extern "C" {
|
||||
|
||||
// Using mostly CXD4 implementation as a base here since it's easier to follow.
|
||||
// CEN64's implementation seems much better, but takes more effort to port for now.
|
||||
// Reading wide words together with SSE4 blend, SSSE3 pshufb, etc should make this much faster.
|
||||
|
||||
// Load 8-bit
|
||||
void RSP_LBV(RSP::CPUState *rsp, unsigned rt, unsigned e, int offset, unsigned base)
|
||||
{
|
||||
unsigned addr = (rsp->sr[base] + offset * 1) & 0xfff;
|
||||
reinterpret_cast<uint8_t*>(rsp->cp2.regs[rt].e)[MES(e)] = READ_MEM_U8(rsp->dmem, addr);
|
||||
}
|
||||
|
||||
// Store 8-bit
|
||||
void RSP_SBV(RSP::CPUState *rsp, unsigned rt, unsigned e, int offset, unsigned base)
|
||||
{
|
||||
unsigned addr = (rsp->sr[base] + offset * 1) & 0xfff;
|
||||
uint8_t v = reinterpret_cast<uint8_t*>(rsp->cp2.regs[rt].e)[MES(e)];
|
||||
|
||||
#ifdef INTENSE_DEBUG
|
||||
fprintf(stderr, "SBV: 0x%x (0x%x)\n", addr, v);
|
||||
#endif
|
||||
|
||||
WRITE_MEM_U8(rsp->dmem, addr, v);
|
||||
}
|
||||
|
||||
// Load 16-bit
|
||||
void RSP_LSV(RSP::CPUState *rsp, unsigned rt, unsigned e, int offset, unsigned base)
|
||||
{
|
||||
if (e & 1)
|
||||
return;
|
||||
|
||||
unsigned addr = (rsp->sr[base] + offset * 2) & 0xfff;
|
||||
unsigned correction = addr & 3;
|
||||
if (correction == 3)
|
||||
return;
|
||||
|
||||
uint16_t result;
|
||||
if (correction == 1)
|
||||
result = (READ_MEM_U8(rsp->dmem, addr + 0) << 8) | (READ_MEM_U8(rsp->dmem, addr + 1) << 0);
|
||||
else
|
||||
result = READ_MEM_U16(rsp->dmem, addr);
|
||||
|
||||
rsp->cp2.regs[rt].e[e >> 1] = result;
|
||||
}
|
||||
|
||||
// Store 16-bit
|
||||
void RSP_SSV(RSP::CPUState *rsp, unsigned rt, unsigned e, int offset, unsigned base)
|
||||
{
|
||||
unsigned addr = (rsp->sr[base] + offset * 2) & 0xfff;
|
||||
uint8_t v0 = reinterpret_cast<uint8_t*>(rsp->cp2.regs[rt].e)[MES(e)];
|
||||
uint8_t v1 = reinterpret_cast<uint8_t*>(rsp->cp2.regs[rt].e)[MES((e + 1) & 0xf)];
|
||||
|
||||
#ifdef INTENSE_DEBUG
|
||||
fprintf(stderr, "SSV: 0x%x (0x%x, 0x%x)\n", addr, v0, v1);
|
||||
#endif
|
||||
|
||||
WRITE_MEM_U8(rsp->dmem, addr, v0);
|
||||
WRITE_MEM_U8(rsp->dmem, (addr + 1) & 0xfff, v1);
|
||||
}
|
||||
|
||||
// Load 32-bit
|
||||
void RSP_LLV(RSP::CPUState *rsp, unsigned rt, unsigned e, int offset, unsigned base)
|
||||
{
|
||||
unsigned addr = (rsp->sr[base] + offset * 4) & 0xfff;
|
||||
if (e & 1)
|
||||
return;
|
||||
if (addr & 1)
|
||||
return;
|
||||
e >>= 1;
|
||||
|
||||
rsp->cp2.regs[rt].e[e] = READ_MEM_U16(rsp->dmem, addr);
|
||||
rsp->cp2.regs[rt].e[(e + 1) & 7] = READ_MEM_U16(rsp->dmem, (addr + 2) & 0xfff);
|
||||
}
|
||||
|
||||
// Store 32-bit
|
||||
void RSP_SLV(RSP::CPUState *rsp, unsigned rt, unsigned e, int offset, unsigned base)
|
||||
{
|
||||
if ((e & 1) || (e > 0xc))
|
||||
return;
|
||||
unsigned addr = (rsp->sr[base] + offset * 4) & 0xfff;
|
||||
|
||||
#ifdef INTENSE_DEBUG
|
||||
fprintf(stderr, "SLV 0x%x, e = %u\n", addr, e);
|
||||
#endif
|
||||
|
||||
if (addr & 1)
|
||||
return;
|
||||
e >>= 1;
|
||||
|
||||
uint16_t v0 = rsp->cp2.regs[rt].e[e];
|
||||
uint16_t v1 = rsp->cp2.regs[rt].e[e + 1];
|
||||
WRITE_MEM_U16(rsp->dmem, addr, v0);
|
||||
WRITE_MEM_U16(rsp->dmem, (addr + 2) & 0xfff, v1);
|
||||
}
|
||||
|
||||
// Load 64-bit
|
||||
void RSP_LDV(RSP::CPUState *rsp, unsigned rt, unsigned e, int offset, unsigned base)
|
||||
{
|
||||
if (e & 1)
|
||||
return;
|
||||
unsigned addr = (rsp->sr[base] + offset * 8) & 0xfff;
|
||||
auto *reg = rsp->cp2.regs[rt].e;
|
||||
e >>= 1;
|
||||
|
||||
if (addr & 1)
|
||||
{
|
||||
reg[e + 0] = (READ_MEM_U8(rsp->dmem, addr + 0) << 8) | READ_MEM_U8(rsp->dmem, addr + 1);
|
||||
reg[e + 1] = (READ_MEM_U8(rsp->dmem, addr + 2) << 8) | READ_MEM_U8(rsp->dmem, addr + 3);
|
||||
reg[e + 2] = (READ_MEM_U8(rsp->dmem, addr + 4) << 8) | READ_MEM_U8(rsp->dmem, addr + 5);
|
||||
reg[e + 3] = (READ_MEM_U8(rsp->dmem, addr + 6) << 8) | READ_MEM_U8(rsp->dmem, addr + 7);
|
||||
}
|
||||
else
|
||||
{
|
||||
reg[e + 0] = READ_MEM_U16(rsp->dmem, addr);
|
||||
reg[e + 1] = READ_MEM_U16(rsp->dmem, (addr + 2) & 0xfff);
|
||||
reg[e + 2] = READ_MEM_U16(rsp->dmem, (addr + 4) & 0xfff);
|
||||
reg[e + 3] = READ_MEM_U16(rsp->dmem, (addr + 6) & 0xfff);
|
||||
}
|
||||
}
|
||||
|
||||
// Store 64-bit
|
||||
void RSP_SDV(RSP::CPUState *rsp, unsigned rt, unsigned e, int offset, unsigned base)
|
||||
{
|
||||
unsigned addr = (rsp->sr[base] + offset * 8) & 0xfff;
|
||||
|
||||
#ifdef INTENSE_DEBUG
|
||||
fprintf(stderr, "SDV 0x%x, e = %u\n", addr, e);
|
||||
#endif
|
||||
|
||||
// Handle illegal scenario.
|
||||
if ((e > 8) || (e & 1) || (addr & 1))
|
||||
{
|
||||
for (unsigned i = 0; i < 8; i++)
|
||||
{
|
||||
WRITE_MEM_U8(rsp->dmem, (addr + i) & 0xfff,
|
||||
reinterpret_cast<const uint8_t*>(rsp->cp2.regs[rt].e)[MES((e + i) & 0xf)]);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
e >>= 1;
|
||||
for (unsigned i = 0; i < 4; i++)
|
||||
{
|
||||
WRITE_MEM_U16(rsp->dmem, (addr + 2 * i) & 0xfff,
|
||||
rsp->cp2.regs[rt].e[e + i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Load 8x8-bit into high bits.
|
||||
void RSP_LPV(RSP::CPUState *rsp, unsigned rt, unsigned e, int offset, unsigned base)
|
||||
{
|
||||
if (e != 0)
|
||||
return;
|
||||
|
||||
unsigned addr = (rsp->sr[base] + offset * 8) & 0xfff;
|
||||
auto *reg = rsp->cp2.regs[rt].e;
|
||||
for (unsigned i = 0; i < 8; i++)
|
||||
reg[i] = READ_MEM_U8(rsp->dmem, (addr + i) & 0xfff) << 8;
|
||||
}
|
||||
|
||||
void RSP_SPV(RSP::CPUState *rsp, unsigned rt, unsigned e, int offset, unsigned base)
|
||||
{
|
||||
if (e != 0)
|
||||
return;
|
||||
unsigned addr = (rsp->sr[base] + offset * 8) & 0xfff;
|
||||
auto *reg = rsp->cp2.regs[rt].e;
|
||||
for (unsigned i = 0; i < 8; i++)
|
||||
WRITE_MEM_U8(rsp->dmem, (addr + i) & 0xfff, int16_t(reg[i]) >> 8);
|
||||
}
|
||||
|
||||
// Load 8x8-bit into high bits, but shift by 7 instead of 8.
|
||||
// Was probably used for certain fixed point algorithms to get more headroom without
|
||||
// saturation, but weird nonetheless.
|
||||
void RSP_LUV(RSP::CPUState *rsp, unsigned rt, unsigned e, int offset, unsigned base)
|
||||
{
|
||||
unsigned addr = (rsp->sr[base] + offset * 8) & 0xfff;
|
||||
auto *reg = rsp->cp2.regs[rt].e;
|
||||
|
||||
if (e != 0)
|
||||
{
|
||||
// Special path for Mia Hamm soccer.
|
||||
addr += -e & 0xf;
|
||||
for (unsigned b = 0; b < 8; b++)
|
||||
{
|
||||
reg[b] = READ_MEM_U8(rsp->dmem, addr) << 7;
|
||||
--e;
|
||||
addr -= e ? 0 : 16;
|
||||
++addr;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (unsigned i = 0; i < 8; i++)
|
||||
reg[i] = READ_MEM_U8(rsp->dmem, (addr + i) & 0xfff) << 7;
|
||||
}
|
||||
}
|
||||
|
||||
void RSP_SUV(RSP::CPUState *rsp, unsigned rt, unsigned e, int offset, unsigned base)
|
||||
{
|
||||
if (e != 0)
|
||||
return;
|
||||
unsigned addr = (rsp->sr[base] + offset * 8) & 0xfff;
|
||||
auto *reg = rsp->cp2.regs[rt].e;
|
||||
for (unsigned i = 0; i < 8; i++)
|
||||
WRITE_MEM_U8(rsp->dmem, (addr + i) & 0xfff, int16_t(reg[i]) >> 7);
|
||||
}
|
||||
|
||||
// Load 8x8-bits into high bits, but shift by 7 instead of 8.
|
||||
// Seems to differ from LUV in that it loads every other byte instead of packed bytes.
|
||||
void RSP_LHV(RSP::CPUState *rsp, unsigned rt, unsigned e, int offset, unsigned base)
|
||||
{
|
||||
if (e != 0)
|
||||
return;
|
||||
unsigned addr = (rsp->sr[base] + offset * 16) & 0xfff;
|
||||
if (addr & 0xe)
|
||||
return;
|
||||
|
||||
auto *reg = rsp->cp2.regs[rt].e;
|
||||
for (unsigned i = 0; i < 8; i++)
|
||||
reg[i] = READ_MEM_U8(rsp->dmem, addr + 2 * i) << 7;
|
||||
}
|
||||
|
||||
void RSP_SHV(RSP::CPUState *rsp, unsigned rt, unsigned e, int offset, unsigned base)
|
||||
{
|
||||
if (e != 0)
|
||||
return;
|
||||
unsigned addr = (rsp->sr[base] + offset * 16) & 0xfff;
|
||||
auto *reg = rsp->cp2.regs[rt].e;
|
||||
for (unsigned i = 0; i < 8; i++)
|
||||
WRITE_MEM_U8(rsp->dmem, (addr + 2 * i) & 0xfff, int16_t(reg[i]) >> 7);
|
||||
}
|
||||
|
||||
// No idea what the purpose of this is.
|
||||
void RSP_SFV(RSP::CPUState *rsp, unsigned rt, unsigned e, int offset, unsigned base)
|
||||
{
|
||||
unsigned addr = (rsp->sr[base] + offset * 16) & 0xff3;
|
||||
auto *reg = rsp->cp2.regs[rt].e;
|
||||
switch (e)
|
||||
{
|
||||
case 0:
|
||||
WRITE_MEM_U8(rsp->dmem, (addr + 0) & 0xfff, int16_t(reg[0]) >> 7);
|
||||
WRITE_MEM_U8(rsp->dmem, (addr + 4) & 0xfff, int16_t(reg[1]) >> 7);
|
||||
WRITE_MEM_U8(rsp->dmem, (addr + 8) & 0xfff, int16_t(reg[2]) >> 7);
|
||||
WRITE_MEM_U8(rsp->dmem, (addr + 12) & 0xfff, int16_t(reg[3]) >> 7);
|
||||
break;
|
||||
|
||||
case 8:
|
||||
WRITE_MEM_U8(rsp->dmem, (addr + 0) & 0xfff, int16_t(reg[4]) >> 7);
|
||||
WRITE_MEM_U8(rsp->dmem, (addr + 4) & 0xfff, int16_t(reg[5]) >> 7);
|
||||
WRITE_MEM_U8(rsp->dmem, (addr + 8) & 0xfff, int16_t(reg[6]) >> 7);
|
||||
WRITE_MEM_U8(rsp->dmem, (addr + 12) & 0xfff, int16_t(reg[7]) >> 7);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Loads full 128-bit register, however, it seems to handle unaligned addresses in a very
|
||||
// strange way.
|
||||
void RSP_LQV(RSP::CPUState *rsp, unsigned rt, unsigned e, int offset, unsigned base)
|
||||
{
|
||||
if (e & 1)
|
||||
return;
|
||||
unsigned addr = (rsp->sr[base] + offset * 16) & 0xfff;
|
||||
|
||||
#ifdef INTENSE_DEBUG
|
||||
fprintf(stderr, "LQV: 0x%x, e = %u, vt = %u, base = %u\n", addr, e, rt, base);
|
||||
#endif
|
||||
|
||||
if (addr & 1)
|
||||
return;
|
||||
|
||||
unsigned b = (addr & 0xf) >> 1;
|
||||
e >>= 1;
|
||||
|
||||
auto *reg = rsp->cp2.regs[rt].e;
|
||||
for (unsigned i = b; i < 8; i++, e++, addr += 2)
|
||||
reg[e] = READ_MEM_U16(rsp->dmem, addr & 0xfff);
|
||||
}
|
||||
|
||||
void RSP_SQV(RSP::CPUState *rsp, unsigned rt, unsigned e, int offset, unsigned base)
|
||||
{
|
||||
unsigned addr = (rsp->sr[base] + offset * 16) & 0xfff;
|
||||
if (addr & 1)
|
||||
return;
|
||||
|
||||
unsigned b = addr & 0xf;
|
||||
|
||||
auto *reg = rsp->cp2.regs[rt].e;
|
||||
|
||||
if (e != 0)
|
||||
{
|
||||
// Mia Hamm Soccer
|
||||
for (unsigned i = 0; i < 16 - b; i++, addr++)
|
||||
{
|
||||
WRITE_MEM_U8(rsp->dmem, addr & 0xfff,
|
||||
reinterpret_cast<const uint8_t*>(reg)[MES((e + i) & 0xf)]);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
b >>= 1;
|
||||
for (unsigned i = b; i < 8; i++, e++, addr += 2)
|
||||
WRITE_MEM_U16(rsp->dmem, addr & 0xfff, reg[e]);
|
||||
}
|
||||
}
|
||||
|
||||
// Complements LQV?
|
||||
void RSP_LRV(RSP::CPUState *rsp, unsigned rt, unsigned e, int offset, unsigned base)
|
||||
{
|
||||
if (e != 0)
|
||||
return;
|
||||
unsigned addr = (rsp->sr[base] + offset * 16) & 0xfff;
|
||||
if (addr & 1)
|
||||
return;
|
||||
|
||||
unsigned b = (addr & 0xf) >> 1;
|
||||
addr &= ~0xf;
|
||||
|
||||
auto *reg = rsp->cp2.regs[rt].e;
|
||||
for (e = 8 - b; e < 8; e++, addr += 2)
|
||||
reg[e] = READ_MEM_U16(rsp->dmem, addr & 0xfff);
|
||||
}
|
||||
|
||||
void RSP_SRV(RSP::CPUState *rsp, unsigned rt, unsigned e, int offset, unsigned base)
|
||||
{
|
||||
if (e != 0)
|
||||
return;
|
||||
unsigned addr = (rsp->sr[base] + offset * 16) & 0xfff;
|
||||
if (addr & 1)
|
||||
return;
|
||||
|
||||
unsigned b = (addr & 0xf) >> 1;
|
||||
addr &= ~0xf;
|
||||
|
||||
auto *reg = rsp->cp2.regs[rt].e;
|
||||
for (e = 8 - b; e < 8; e++, addr += 2)
|
||||
WRITE_MEM_U16(rsp->dmem, addr & 0xfff, reg[e]);
|
||||
}
|
||||
|
||||
// Transposed stuff?
|
||||
void RSP_LTV(RSP::CPUState *rsp, unsigned rt, unsigned e, int offset, unsigned base)
|
||||
{
|
||||
if (e & 1)
|
||||
return;
|
||||
if (rt & 7)
|
||||
return;
|
||||
unsigned addr = (rsp->sr[base] + offset * 16) & 0xfff;
|
||||
if (addr & 0xf)
|
||||
return;
|
||||
|
||||
for (unsigned i = 0; i < 8; i++)
|
||||
rsp->cp2.regs[rt + i].e[(-e / 2 + i) & 7] = READ_MEM_U16(rsp->dmem, addr + 2 * i);
|
||||
}
|
||||
|
||||
void RSP_STV(RSP::CPUState *rsp, unsigned rt, unsigned e, int offset, unsigned base)
|
||||
{
|
||||
if (e & 1)
|
||||
return;
|
||||
if (rt & 7)
|
||||
return;
|
||||
unsigned addr = (rsp->sr[base] + offset * 16) & 0xfff;
|
||||
if (addr & 0xf)
|
||||
return;
|
||||
|
||||
for (unsigned i = 0; i < 8; i++)
|
||||
{
|
||||
WRITE_MEM_U16(rsp->dmem, addr + 2 * i,
|
||||
rsp->cp2.regs[rt + ((e / 2 + i) & 7)].e[i]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
103
rsp/pipeline.h
Normal file
103
rsp/pipeline.h
Normal file
@ -0,0 +1,103 @@
|
||||
//
|
||||
// rsp/pipeline.h: RSP processor pipeline.
|
||||
//
|
||||
// CEN64: Cycle-Accurate Nintendo 64 Emulator.
|
||||
// Copyright (C) 2015, Tyler J. Stachecki.
|
||||
//
|
||||
// This file is subject to the terms and conditions defined in
|
||||
// 'LICENSE', which is part of this source code package.
|
||||
//
|
||||
|
||||
#ifndef __rsp_pipeline_h__
|
||||
#define __rsp_pipeline_h__
|
||||
#include "rsp/decoder.h"
|
||||
#include "rsp/cp2.h"
|
||||
#include "rsp/rsp.h"
|
||||
|
||||
struct rsp;
|
||||
|
||||
enum rsp_mem_request_type {
|
||||
RSP_MEM_REQUEST_NONE,
|
||||
RSP_MEM_REQUEST_INT_MEM,
|
||||
RSP_MEM_REQUEST_VECTOR,
|
||||
RSP_MEM_REQUEST_FOURTH,
|
||||
RSP_MEM_REQUEST_HALF,
|
||||
RSP_MEM_REQUEST_PACK,
|
||||
RSP_MEM_REQUEST_QUAD,
|
||||
RSP_MEM_REQUEST_REST,
|
||||
RSP_MEM_REQUEST_UPACK
|
||||
};
|
||||
|
||||
struct rsp_int_mem_packet {
|
||||
uint32_t data;
|
||||
uint32_t rdqm;
|
||||
uint32_t wdqm;
|
||||
|
||||
unsigned rshift;
|
||||
};
|
||||
|
||||
struct rsp_vect_mem_packet {
|
||||
union aligned_rsp_1vect_t vdqm;
|
||||
|
||||
void (*vldst_func)(struct rsp *rsp, uint32_t addr, unsigned element,
|
||||
uint16_t *regp, rsp_vect_t reg, rsp_vect_t dqm);
|
||||
|
||||
unsigned element;
|
||||
unsigned dest;
|
||||
};
|
||||
|
||||
union rsp_mem_packet {
|
||||
struct rsp_int_mem_packet p_int;
|
||||
struct rsp_vect_mem_packet p_vect;
|
||||
};
|
||||
|
||||
struct rsp_mem_request {
|
||||
uint32_t addr;
|
||||
enum rsp_mem_request_type type;
|
||||
union rsp_mem_packet packet;
|
||||
};
|
||||
|
||||
struct rsp_latch {
|
||||
uint32_t pc;
|
||||
};
|
||||
|
||||
struct rsp_result {
|
||||
uint32_t result;
|
||||
unsigned dest;
|
||||
};
|
||||
|
||||
struct rsp_ifrd_latch {
|
||||
struct rsp_latch common;
|
||||
struct rsp_opcode opcode;
|
||||
uint32_t pc, iw;
|
||||
};
|
||||
|
||||
struct rsp_rdex_latch {
|
||||
struct rsp_latch common;
|
||||
struct rsp_opcode opcode;
|
||||
uint32_t iw;
|
||||
};
|
||||
|
||||
struct rsp_exdf_latch {
|
||||
struct rsp_latch common;
|
||||
struct rsp_result result;
|
||||
|
||||
struct rsp_mem_request request;
|
||||
};
|
||||
|
||||
struct rsp_dfwb_latch {
|
||||
struct rsp_latch common;
|
||||
struct rsp_result result;
|
||||
};
|
||||
|
||||
struct rsp_pipeline {
|
||||
struct rsp_dfwb_latch dfwb_latch;
|
||||
struct rsp_exdf_latch exdf_latch;
|
||||
struct rsp_rdex_latch rdex_latch;
|
||||
struct rsp_ifrd_latch ifrd_latch;
|
||||
};
|
||||
|
||||
cen64_cold void rsp_pipeline_init(struct rsp_pipeline *pipeline);
|
||||
|
||||
#endif
|
||||
|
143
rsp/reciprocal.cpp
Normal file
143
rsp/reciprocal.cpp
Normal file
@ -0,0 +1,143 @@
|
||||
//
|
||||
// common/reciprocal.c: RSP reciprocal ROM contents.
|
||||
//
|
||||
// CEN64: Cycle-Accurate Nintendo 64 Emulator.
|
||||
// Copyright (C) 2015, Tyler J. Stachecki.
|
||||
//
|
||||
// This file is subject to the terms and conditions defined in
|
||||
// 'LICENSE', which is part of this source code package.
|
||||
//
|
||||
|
||||
#include "reciprocal.h"
|
||||
|
||||
alignas(64) const uint16_t rsp_reciprocal_rom[1024] = {
|
||||
0xFFFF, 0xFF00, 0xFE01, 0xFD04, 0xFC07, 0xFB0C, 0xFA11, 0xF918,
|
||||
0xF81F, 0xF727, 0xF631, 0xF53B, 0xF446, 0xF352, 0xF25F, 0xF16D,
|
||||
0xF07C, 0xEF8B, 0xEE9C, 0xEDAE, 0xECC0, 0xEBD3, 0xEAE8, 0xE9FD,
|
||||
0xE913, 0xE829, 0xE741, 0xE65A, 0xE573, 0xE48D, 0xE3A9, 0xE2C5,
|
||||
0xE1E1, 0xE0FF, 0xE01E, 0xDF3D, 0xDE5D, 0xDD7E, 0xDCA0, 0xDBC2,
|
||||
0xDAE6, 0xDA0A, 0xD92F, 0xD854, 0xD77B, 0xD6A2, 0xD5CA, 0xD4F3,
|
||||
0xD41D, 0xD347, 0xD272, 0xD19E, 0xD0CB, 0xCFF8, 0xCF26, 0xCE55,
|
||||
0xCD85, 0xCCB5, 0xCBE6, 0xCB18, 0xCA4B, 0xC97E, 0xC8B2, 0xC7E7,
|
||||
0xC71C, 0xC652, 0xC589, 0xC4C0, 0xC3F8, 0xC331, 0xC26B, 0xC1A5,
|
||||
0xC0E0, 0xC01C, 0xBF58, 0xBE95, 0xBDD2, 0xBD10, 0xBC4F, 0xBB8F,
|
||||
0xBACF, 0xBA10, 0xB951, 0xB894, 0xB7D6, 0xB71A, 0xB65E, 0xB5A2,
|
||||
0xB4E8, 0xB42E, 0xB374, 0xB2BB, 0xB203, 0xB14B, 0xB094, 0xAFDE,
|
||||
0xAF28, 0xAE73, 0xADBE, 0xAD0A, 0xAC57, 0xABA4, 0xAAF1, 0xAA40,
|
||||
0xA98E, 0xA8DE, 0xA82E, 0xA77E, 0xA6D0, 0xA621, 0xA574, 0xA4C6,
|
||||
0xA41A, 0xA36E, 0xA2C2, 0xA217, 0xA16D, 0xA0C3, 0xA01A, 0x9F71,
|
||||
0x9EC8, 0x9E21, 0x9D79, 0x9CD3, 0x9C2D, 0x9B87, 0x9AE2, 0x9A3D,
|
||||
0x9999, 0x98F6, 0x9852, 0x97B0, 0x970E, 0x966C, 0x95CB, 0x952B,
|
||||
0x948B, 0x93EB, 0x934C, 0x92AD, 0x920F, 0x9172, 0x90D4, 0x9038,
|
||||
0x8F9C, 0x8F00, 0x8E65, 0x8DCA, 0x8D30, 0x8C96, 0x8BFC, 0x8B64,
|
||||
0x8ACB, 0x8A33, 0x899C, 0x8904, 0x886E, 0x87D8, 0x8742, 0x86AD,
|
||||
0x8618, 0x8583, 0x84F0, 0x845C, 0x83C9, 0x8336, 0x82A4, 0x8212,
|
||||
0x8181, 0x80F0, 0x8060, 0x7FD0, 0x7F40, 0x7EB1, 0x7E22, 0x7D93,
|
||||
0x7D05, 0x7C78, 0x7BEB, 0x7B5E, 0x7AD2, 0x7A46, 0x79BA, 0x792F,
|
||||
0x78A4, 0x781A, 0x7790, 0x7706, 0x767D, 0x75F5, 0x756C, 0x74E4,
|
||||
0x745D, 0x73D5, 0x734F, 0x72C8, 0x7242, 0x71BC, 0x7137, 0x70B2,
|
||||
0x702E, 0x6FA9, 0x6F26, 0x6EA2, 0x6E1F, 0x6D9C, 0x6D1A, 0x6C98,
|
||||
0x6C16, 0x6B95, 0x6B14, 0x6A94, 0x6A13, 0x6993, 0x6914, 0x6895,
|
||||
0x6816, 0x6798, 0x6719, 0x669C, 0x661E, 0x65A1, 0x6524, 0x64A8,
|
||||
0x642C, 0x63B0, 0x6335, 0x62BA, 0x623F, 0x61C5, 0x614B, 0x60D1,
|
||||
0x6058, 0x5FDF, 0x5F66, 0x5EED, 0x5E75, 0x5DFD, 0x5D86, 0x5D0F,
|
||||
0x5C98, 0x5C22, 0x5BAB, 0x5B35, 0x5AC0, 0x5A4B, 0x59D6, 0x5961,
|
||||
0x58ED, 0x5879, 0x5805, 0x5791, 0x571E, 0x56AC, 0x5639, 0x55C7,
|
||||
0x5555, 0x54E3, 0x5472, 0x5401, 0x5390, 0x5320, 0x52AF, 0x5240,
|
||||
0x51D0, 0x5161, 0x50F2, 0x5083, 0x5015, 0x4FA6, 0x4F38, 0x4ECB,
|
||||
0x4E5E, 0x4DF1, 0x4D84, 0x4D17, 0x4CAB, 0x4C3F, 0x4BD3, 0x4B68,
|
||||
0x4AFD, 0x4A92, 0x4A27, 0x49BD, 0x4953, 0x48E9, 0x4880, 0x4817,
|
||||
0x47AE, 0x4745, 0x46DC, 0x4674, 0x460C, 0x45A5, 0x453D, 0x44D6,
|
||||
0x446F, 0x4408, 0x43A2, 0x433C, 0x42D6, 0x4270, 0x420B, 0x41A6,
|
||||
0x4141, 0x40DC, 0x4078, 0x4014, 0x3FB0, 0x3F4C, 0x3EE8, 0x3E85,
|
||||
0x3E22, 0x3DC0, 0x3D5D, 0x3CFB, 0x3C99, 0x3C37, 0x3BD6, 0x3B74,
|
||||
0x3B13, 0x3AB2, 0x3A52, 0x39F1, 0x3991, 0x3931, 0x38D2, 0x3872,
|
||||
0x3813, 0x37B4, 0x3755, 0x36F7, 0x3698, 0x363A, 0x35DC, 0x357F,
|
||||
0x3521, 0x34C4, 0x3467, 0x340A, 0x33AE, 0x3351, 0x32F5, 0x3299,
|
||||
0x323E, 0x31E2, 0x3187, 0x312C, 0x30D1, 0x3076, 0x301C, 0x2FC2,
|
||||
0x2F68, 0x2F0E, 0x2EB4, 0x2E5B, 0x2E02, 0x2DA9, 0x2D50, 0x2CF8,
|
||||
0x2C9F, 0x2C47, 0x2BEF, 0x2B97, 0x2B40, 0x2AE8, 0x2A91, 0x2A3A,
|
||||
0x29E4, 0x298D, 0x2937, 0x28E0, 0x288B, 0x2835, 0x27DF, 0x278A,
|
||||
0x2735, 0x26E0, 0x268B, 0x2636, 0x25E2, 0x258D, 0x2539, 0x24E5,
|
||||
0x2492, 0x243E, 0x23EB, 0x2398, 0x2345, 0x22F2, 0x22A0, 0x224D,
|
||||
0x21FB, 0x21A9, 0x2157, 0x2105, 0x20B4, 0x2063, 0x2012, 0x1FC1,
|
||||
0x1F70, 0x1F1F, 0x1ECF, 0x1E7F, 0x1E2E, 0x1DDF, 0x1D8F, 0x1D3F,
|
||||
0x1CF0, 0x1CA1, 0x1C52, 0x1C03, 0x1BB4, 0x1B66, 0x1B17, 0x1AC9,
|
||||
0x1A7B, 0x1A2D, 0x19E0, 0x1992, 0x1945, 0x18F8, 0x18AB, 0x185E,
|
||||
0x1811, 0x17C4, 0x1778, 0x172C, 0x16E0, 0x1694, 0x1648, 0x15FD,
|
||||
0x15B1, 0x1566, 0x151B, 0x14D0, 0x1485, 0x143B, 0x13F0, 0x13A6,
|
||||
0x135C, 0x1312, 0x12C8, 0x127F, 0x1235, 0x11EC, 0x11A3, 0x1159,
|
||||
0x1111, 0x10C8, 0x107F, 0x1037, 0x0FEF, 0x0FA6, 0x0F5E, 0x0F17,
|
||||
0x0ECF, 0x0E87, 0x0E40, 0x0DF9, 0x0DB2, 0x0D6B, 0x0D24, 0x0CDD,
|
||||
0x0C97, 0x0C50, 0x0C0A, 0x0BC4, 0x0B7E, 0x0B38, 0x0AF2, 0x0AAD,
|
||||
0x0A68, 0x0A22, 0x09DD, 0x0998, 0x0953, 0x090F, 0x08CA, 0x0886,
|
||||
0x0842, 0x07FD, 0x07B9, 0x0776, 0x0732, 0x06EE, 0x06AB, 0x0668,
|
||||
0x0624, 0x05E1, 0x059E, 0x055C, 0x0519, 0x04D6, 0x0494, 0x0452,
|
||||
0x0410, 0x03CE, 0x038C, 0x034A, 0x0309, 0x02C7, 0x0286, 0x0245,
|
||||
0x0204, 0x01C3, 0x0182, 0x0141, 0x0101, 0x00C0, 0x0080, 0x0040,
|
||||
0x6A09, 0xFFFF, 0x6955, 0xFF00, 0x68A1, 0xFE02, 0x67EF, 0xFD06,
|
||||
0x673E, 0xFC0B, 0x668D, 0xFB12, 0x65DE, 0xFA1A, 0x6530, 0xF923,
|
||||
0x6482, 0xF82E, 0x63D6, 0xF73B, 0x632B, 0xF648, 0x6280, 0xF557,
|
||||
0x61D7, 0xF467, 0x612E, 0xF379, 0x6087, 0xF28C, 0x5FE0, 0xF1A0,
|
||||
0x5F3A, 0xF0B6, 0x5E95, 0xEFCD, 0x5DF1, 0xEEE5, 0x5D4E, 0xEDFF,
|
||||
0x5CAC, 0xED19, 0x5C0B, 0xEC35, 0x5B6B, 0xEB52, 0x5ACB, 0xEA71,
|
||||
0x5A2C, 0xE990, 0x598F, 0xE8B1, 0x58F2, 0xE7D3, 0x5855, 0xE6F6,
|
||||
0x57BA, 0xE61B, 0x5720, 0xE540, 0x5686, 0xE467, 0x55ED, 0xE38E,
|
||||
0x5555, 0xE2B7, 0x54BE, 0xE1E1, 0x5427, 0xE10D, 0x5391, 0xE039,
|
||||
0x52FC, 0xDF66, 0x5268, 0xDE94, 0x51D5, 0xDDC4, 0x5142, 0xDCF4,
|
||||
0x50B0, 0xDC26, 0x501F, 0xDB59, 0x4F8E, 0xDA8C, 0x4EFE, 0xD9C1,
|
||||
0x4E6F, 0xD8F7, 0x4DE1, 0xD82D, 0x4D53, 0xD765, 0x4CC6, 0xD69E,
|
||||
0x4C3A, 0xD5D7, 0x4BAF, 0xD512, 0x4B24, 0xD44E, 0x4A9A, 0xD38A,
|
||||
0x4A10, 0xD2C8, 0x4987, 0xD206, 0x48FF, 0xD146, 0x4878, 0xD086,
|
||||
0x47F1, 0xCFC7, 0x476B, 0xCF0A, 0x46E5, 0xCE4D, 0x4660, 0xCD91,
|
||||
0x45DC, 0xCCD6, 0x4558, 0xCC1B, 0x44D5, 0xCB62, 0x4453, 0xCAA9,
|
||||
0x43D1, 0xC9F2, 0x434F, 0xC93B, 0x42CF, 0xC885, 0x424F, 0xC7D0,
|
||||
0x41CF, 0xC71C, 0x4151, 0xC669, 0x40D2, 0xC5B6, 0x4055, 0xC504,
|
||||
0x3FD8, 0xC453, 0x3F5B, 0xC3A3, 0x3EDF, 0xC2F4, 0x3E64, 0xC245,
|
||||
0x3DE9, 0xC198, 0x3D6E, 0xC0EB, 0x3CF5, 0xC03F, 0x3C7C, 0xBF93,
|
||||
0x3C03, 0xBEE9, 0x3B8B, 0xBE3F, 0x3B13, 0xBD96, 0x3A9C, 0xBCED,
|
||||
0x3A26, 0xBC46, 0x39B0, 0xBB9F, 0x393A, 0xBAF8, 0x38C5, 0xBA53,
|
||||
0x3851, 0xB9AE, 0x37DD, 0xB90A, 0x3769, 0xB867, 0x36F6, 0xB7C5,
|
||||
0x3684, 0xB723, 0x3612, 0xB681, 0x35A0, 0xB5E1, 0x352F, 0xB541,
|
||||
0x34BF, 0xB4A2, 0x344F, 0xB404, 0x33DF, 0xB366, 0x3370, 0xB2C9,
|
||||
0x3302, 0xB22C, 0x3293, 0xB191, 0x3226, 0xB0F5, 0x31B9, 0xB05B,
|
||||
0x314C, 0xAFC1, 0x30DF, 0xAF28, 0x3074, 0xAE8F, 0x3008, 0xADF7,
|
||||
0x2F9D, 0xAD60, 0x2F33, 0xACC9, 0x2EC8, 0xAC33, 0x2E5F, 0xAB9E,
|
||||
0x2DF6, 0xAB09, 0x2D8D, 0xAA75, 0x2D24, 0xA9E1, 0x2CBC, 0xA94E,
|
||||
0x2C55, 0xA8BC, 0x2BEE, 0xA82A, 0x2B87, 0xA799, 0x2B21, 0xA708,
|
||||
0x2ABB, 0xA678, 0x2A55, 0xA5E8, 0x29F0, 0xA559, 0x298B, 0xA4CB,
|
||||
0x2927, 0xA43D, 0x28C3, 0xA3B0, 0x2860, 0xA323, 0x27FD, 0xA297,
|
||||
0x279A, 0xA20B, 0x2738, 0xA180, 0x26D6, 0xA0F6, 0x2674, 0xA06C,
|
||||
0x2613, 0x9FE2, 0x25B2, 0x9F59, 0x2552, 0x9ED1, 0x24F2, 0x9E49,
|
||||
0x2492, 0x9DC2, 0x2432, 0x9D3B, 0x23D3, 0x9CB4, 0x2375, 0x9C2F,
|
||||
0x2317, 0x9BA9, 0x22B9, 0x9B25, 0x225B, 0x9AA0, 0x21FE, 0x9A1C,
|
||||
0x21A1, 0x9999, 0x2145, 0x9916, 0x20E8, 0x9894, 0x208D, 0x9812,
|
||||
0x2031, 0x9791, 0x1FD6, 0x9710, 0x1F7B, 0x968F, 0x1F21, 0x960F,
|
||||
0x1EC7, 0x9590, 0x1E6D, 0x9511, 0x1E13, 0x9492, 0x1DBA, 0x9414,
|
||||
0x1D61, 0x9397, 0x1D09, 0x931A, 0x1CB1, 0x929D, 0x1C59, 0x9221,
|
||||
0x1C01, 0x91A5, 0x1BAA, 0x9129, 0x1B53, 0x90AF, 0x1AFC, 0x9034,
|
||||
0x1AA6, 0x8FBA, 0x1A50, 0x8F40, 0x19FA, 0x8EC7, 0x19A5, 0x8E4F,
|
||||
0x1950, 0x8DD6, 0x18FB, 0x8D5E, 0x18A7, 0x8CE7, 0x1853, 0x8C70,
|
||||
0x17FF, 0x8BF9, 0x17AB, 0x8B83, 0x1758, 0x8B0D, 0x1705, 0x8A98,
|
||||
0x16B2, 0x8A23, 0x1660, 0x89AE, 0x160D, 0x893A, 0x15BC, 0x88C6,
|
||||
0x156A, 0x8853, 0x1519, 0x87E0, 0x14C8, 0x876D, 0x1477, 0x86FB,
|
||||
0x1426, 0x8689, 0x13D6, 0x8618, 0x1386, 0x85A7, 0x1337, 0x8536,
|
||||
0x12E7, 0x84C6, 0x1298, 0x8456, 0x1249, 0x83E7, 0x11FB, 0x8377,
|
||||
0x11AC, 0x8309, 0x115E, 0x829A, 0x1111, 0x822C, 0x10C3, 0x81BF,
|
||||
0x1076, 0x8151, 0x1029, 0x80E4, 0x0FDC, 0x8078, 0x0F8F, 0x800C,
|
||||
0x0F43, 0x7FA0, 0x0EF7, 0x7F34, 0x0EAB, 0x7EC9, 0x0E60, 0x7E5E,
|
||||
0x0E15, 0x7DF4, 0x0DCA, 0x7D8A, 0x0D7F, 0x7D20, 0x0D34, 0x7CB6,
|
||||
0x0CEA, 0x7C4D, 0x0CA0, 0x7BE5, 0x0C56, 0x7B7C, 0x0C0C, 0x7B14,
|
||||
0x0BC3, 0x7AAC, 0x0B7A, 0x7A45, 0x0B31, 0x79DE, 0x0AE8, 0x7977,
|
||||
0x0AA0, 0x7911, 0x0A58, 0x78AB, 0x0A10, 0x7845, 0x09C8, 0x77DF,
|
||||
0x0981, 0x777A, 0x0939, 0x7715, 0x08F2, 0x76B1, 0x08AB, 0x764D,
|
||||
0x0865, 0x75E9, 0x081E, 0x7585, 0x07D8, 0x7522, 0x0792, 0x74BF,
|
||||
0x074D, 0x745D, 0x0707, 0x73FA, 0x06C2, 0x7398, 0x067D, 0x7337,
|
||||
0x0638, 0x72D5, 0x05F3, 0x7274, 0x05AF, 0x7213, 0x056A, 0x71B3,
|
||||
0x0526, 0x7152, 0x04E2, 0x70F2, 0x049F, 0x7093, 0x045B, 0x7033,
|
||||
0x0418, 0x6FD4, 0x03D5, 0x6F76, 0x0392, 0x6F17, 0x0350, 0x6EB9,
|
||||
0x030D, 0x6E5B, 0x02CB, 0x6DFD, 0x0289, 0x6DA0, 0x0247, 0x6D43,
|
||||
0x0206, 0x6CE6, 0x01C4, 0x6C8A, 0x0183, 0x6C2D, 0x0142, 0x6BD1,
|
||||
0x0101, 0x6B76, 0x00C0, 0x6B1A, 0x0080, 0x6ABF, 0x0040, 0x6A64
|
||||
};
|
||||
|
18
rsp/reciprocal.h
Normal file
18
rsp/reciprocal.h
Normal file
@ -0,0 +1,18 @@
|
||||
//
|
||||
// common/reciprocal.h: RSP reciprocal ROM contents.
|
||||
//
|
||||
// CEN64: Cycle-Accurate Nintendo 64 Emulator.
|
||||
// Copyright (C) 2015, Tyler J. Stachecki.
|
||||
//
|
||||
// This file is subject to the terms and conditions defined in
|
||||
// 'LICENSE', which is part of this source code package.
|
||||
//
|
||||
|
||||
#ifndef __common_reciprocal_h__
|
||||
#define __common_reciprocal_h__
|
||||
#include <stdint.h>
|
||||
|
||||
extern const uint16_t rsp_reciprocal_rom[1024];
|
||||
|
||||
#endif
|
||||
|
34
rsp/registers.md
Normal file
34
rsp/registers.md
Normal file
@ -0,0 +1,34 @@
|
||||
//
|
||||
// rsp/registers.md: RSP register enumerations.
|
||||
//
|
||||
// CEN64: Cycle-Accurate Nintendo 64 Emulator.
|
||||
// Copyright (C) 2015, Tyler J. Stachecki.
|
||||
//
|
||||
// This file is subject to the terms and conditions defined in
|
||||
// 'LICENSE', which is part of this source code package.
|
||||
//
|
||||
|
||||
#ifndef SP_REGISTER_LIST
|
||||
#define SP_REGISTER_LIST \
|
||||
X(SP_MEM_ADDR_REG) \
|
||||
X(SP_DRAM_ADDR_REG) \
|
||||
X(SP_RD_LEN_REG) \
|
||||
X(SP_WR_LEN_REG) \
|
||||
X(SP_STATUS_REG) \
|
||||
X(SP_DMA_FULL_REG) \
|
||||
X(SP_DMA_BUSY_REG) \
|
||||
X(SP_SEMAPHORE_REG) \
|
||||
X(CMD_START) \
|
||||
X(CMD_END) \
|
||||
X(CMD_CURRENT) \
|
||||
X(CMD_STATUS) \
|
||||
X(CMD_CLOCK) \
|
||||
X(CMD_BUSY) \
|
||||
X(CMD_PIPE_BUSY) \
|
||||
X(CMD_TMEM_BUSY) \
|
||||
X(SP_PC_REG) \
|
||||
X(SP_IBIST_REG)
|
||||
#endif
|
||||
|
||||
SP_REGISTER_LIST
|
||||
|
703
rsp/vfunctions.cpp
Normal file
703
rsp/vfunctions.cpp
Normal file
@ -0,0 +1,703 @@
|
||||
//
|
||||
// rsp/vfunctions.c: RSP vector execution functions.
|
||||
//
|
||||
// CEN64: Cycle-Accurate Nintendo 64 Emulator.
|
||||
// Copyright (C) 2015, Tyler J. Stachecki.
|
||||
//
|
||||
// This file is subject to the terms and conditions defined in
|
||||
// 'LICENSE', which is part of this source code package.
|
||||
//
|
||||
|
||||
#include "../rsp.hpp"
|
||||
#include "rsp_impl.h"
|
||||
#include "../rsp_op.hpp"
|
||||
|
||||
#define LOAD_VS() rsp_vect_load_unshuffled_operand(rsp->cp2.regs[vs].e)
|
||||
#define LOAD_VT() rsp_vect_load_and_shuffle_operand(rsp->cp2.regs[vt].e, e)
|
||||
#define STORE_RESULT() rsp_vect_write_operand(rsp->cp2.regs[vd].e, result)
|
||||
|
||||
extern "C" {
|
||||
|
||||
//
|
||||
// VABS
|
||||
//
|
||||
void RSP_VABS(RSP::CPUState *rsp, unsigned vd, unsigned vs, unsigned vt, unsigned e)
|
||||
{
|
||||
uint16_t *acc = rsp->cp2.acc.e;
|
||||
rsp_vect_t acc_lo;
|
||||
rsp_vect_t result = rsp_vabs(LOAD_VS(), LOAD_VT(), &acc_lo);
|
||||
write_acc_lo(acc, acc_lo);
|
||||
rsp_vect_write_operand(rsp->cp2.regs[vd].e, result);
|
||||
}
|
||||
|
||||
//
|
||||
// VADD
|
||||
//
|
||||
void RSP_VADD(RSP::CPUState *rsp, unsigned vd, unsigned vs, unsigned vt, unsigned e)
|
||||
{
|
||||
uint16_t *acc = rsp->cp2.acc.e;
|
||||
rsp_vect_t carry, acc_lo;
|
||||
|
||||
carry = read_vco_lo(rsp->cp2.flags[RSP::RSP_VCO].e);
|
||||
rsp_vect_t result = rsp_vadd(LOAD_VS(), LOAD_VT(), carry, &acc_lo);
|
||||
|
||||
write_vco_hi(rsp->cp2.flags[RSP::RSP_VCO].e, rsp_vzero());
|
||||
write_vco_lo(rsp->cp2.flags[RSP::RSP_VCO].e, rsp_vzero());
|
||||
write_acc_lo(acc, acc_lo);
|
||||
STORE_RESULT();
|
||||
}
|
||||
|
||||
//
|
||||
// VADDC
|
||||
//
|
||||
void RSP_VADDC(RSP::CPUState *rsp, unsigned vd, unsigned vs, unsigned vt, unsigned e)
|
||||
{
|
||||
uint16_t *acc = rsp->cp2.acc.e;
|
||||
rsp_vect_t sn;
|
||||
|
||||
rsp_vect_t result = rsp_vaddc(LOAD_VS(), LOAD_VT(), rsp_vzero(), &sn);
|
||||
write_vco_hi(rsp->cp2.flags[RSP::RSP_VCO].e, rsp_vzero()); // TODO: Confirm.
|
||||
write_vco_lo(rsp->cp2.flags[RSP::RSP_VCO].e, sn);
|
||||
write_acc_lo(acc, result);
|
||||
STORE_RESULT();
|
||||
}
|
||||
|
||||
//
|
||||
// VAND
|
||||
// VNAND
|
||||
//
|
||||
void RSP_VAND(RSP::CPUState *rsp, unsigned vd, unsigned vs, unsigned vt, unsigned e)
|
||||
{
|
||||
uint16_t *acc = rsp->cp2.acc.e;
|
||||
rsp_vect_t result = rsp_vand(LOAD_VS(), LOAD_VT());
|
||||
write_acc_lo(acc, result);
|
||||
STORE_RESULT();
|
||||
}
|
||||
|
||||
void RSP_VNAND(RSP::CPUState *rsp, unsigned vd, unsigned vs, unsigned vt, unsigned e)
|
||||
{
|
||||
uint16_t *acc = rsp->cp2.acc.e;
|
||||
rsp_vect_t result = rsp_vnand(LOAD_VS(), LOAD_VT());
|
||||
write_acc_lo(acc, result);
|
||||
STORE_RESULT();
|
||||
}
|
||||
|
||||
//
|
||||
// VCH
|
||||
//
|
||||
void RSP_VCH(RSP::CPUState *rsp, unsigned vd, unsigned vs, unsigned vt, unsigned e)
|
||||
{
|
||||
uint16_t *acc = rsp->cp2.acc.e;
|
||||
rsp_vect_t ge, le, sign, eq, vce;
|
||||
|
||||
rsp_vect_t result = rsp_vch(LOAD_VS(), LOAD_VT(), rsp_vzero(), &ge, &le, &eq, &sign, &vce);
|
||||
|
||||
write_vcc_hi(rsp->cp2.flags[RSP::RSP_VCC].e, ge);
|
||||
write_vcc_lo(rsp->cp2.flags[RSP::RSP_VCC].e, le);
|
||||
write_vco_hi(rsp->cp2.flags[RSP::RSP_VCO].e, eq);
|
||||
write_vco_lo(rsp->cp2.flags[RSP::RSP_VCO].e, sign);
|
||||
write_vce (rsp->cp2.flags[RSP::RSP_VCE].e, vce);
|
||||
write_acc_lo(acc, result);
|
||||
STORE_RESULT();
|
||||
}
|
||||
|
||||
//
|
||||
// VCL
|
||||
//
|
||||
void RSP_VCL(RSP::CPUState *rsp, unsigned vd, unsigned vs, unsigned vt, unsigned e)
|
||||
{
|
||||
uint16_t *acc = rsp->cp2.acc.e;
|
||||
rsp_vect_t ge, le, eq, sign, vce;
|
||||
|
||||
ge = read_vcc_hi(rsp->cp2.flags[RSP::RSP_VCC].e);
|
||||
le = read_vcc_lo(rsp->cp2.flags[RSP::RSP_VCC].e);
|
||||
eq = read_vco_hi(rsp->cp2.flags[RSP::RSP_VCO].e);
|
||||
sign = read_vco_lo(rsp->cp2.flags[RSP::RSP_VCO].e);
|
||||
vce = read_vce(rsp->cp2.flags[RSP::RSP_VCE].e);
|
||||
|
||||
rsp_vect_t result = rsp_vcl(LOAD_VS(), LOAD_VT(), rsp_vzero(), &ge, &le, eq, sign, vce);
|
||||
|
||||
write_vcc_hi(rsp->cp2.flags[RSP::RSP_VCC].e, ge);
|
||||
write_vcc_lo(rsp->cp2.flags[RSP::RSP_VCC].e, le);
|
||||
write_vco_hi(rsp->cp2.flags[RSP::RSP_VCO].e, rsp_vzero());
|
||||
write_vco_lo(rsp->cp2.flags[RSP::RSP_VCO].e, rsp_vzero());
|
||||
write_vce (rsp->cp2.flags[RSP::RSP_VCE].e, rsp_vzero());
|
||||
write_acc_lo(acc, result);
|
||||
STORE_RESULT();
|
||||
}
|
||||
|
||||
//
|
||||
// VCR
|
||||
//
|
||||
void RSP_VCR(RSP::CPUState *rsp, unsigned vd, unsigned vs, unsigned vt, unsigned e)
|
||||
{
|
||||
uint16_t *acc = rsp->cp2.acc.e;
|
||||
rsp_vect_t ge, le;
|
||||
|
||||
rsp_vect_t result = rsp_vcr(LOAD_VS(), LOAD_VT(), rsp_vzero(), &ge, &le);
|
||||
|
||||
#ifdef INTENSE_DEBUG
|
||||
for (unsigned i = 0; i < 8; i++)
|
||||
fprintf(stderr, "VD[%d] = %d\n", i,
|
||||
reinterpret_cast<int16_t*>(&result)[i]);
|
||||
#endif
|
||||
|
||||
write_vcc_hi(rsp->cp2.flags[RSP::RSP_VCC].e, ge);
|
||||
write_vcc_lo(rsp->cp2.flags[RSP::RSP_VCC].e, le);
|
||||
write_vco_hi(rsp->cp2.flags[RSP::RSP_VCO].e, rsp_vzero());
|
||||
write_vco_lo(rsp->cp2.flags[RSP::RSP_VCO].e, rsp_vzero());
|
||||
write_vce (rsp->cp2.flags[RSP::RSP_VCE].e, rsp_vzero());
|
||||
write_acc_lo(acc, result);
|
||||
STORE_RESULT();
|
||||
}
|
||||
|
||||
//
|
||||
// VEQ
|
||||
// VGE
|
||||
// VLT
|
||||
// VNE
|
||||
//
|
||||
void RSP_VEQ(RSP::CPUState *rsp, unsigned vd, unsigned vs, unsigned vt, unsigned e)
|
||||
{
|
||||
uint16_t *acc = rsp->cp2.acc.e;
|
||||
rsp_vect_t le, eq, sign;
|
||||
|
||||
eq = read_vco_hi(rsp->cp2.flags[RSP::RSP_VCO].e);
|
||||
sign = read_vco_lo(rsp->cp2.flags[RSP::RSP_VCO].e);
|
||||
|
||||
rsp_vect_t result = rsp_veq(LOAD_VS(), LOAD_VT(), rsp_vzero(), &le, eq, sign);
|
||||
|
||||
write_vcc_hi(rsp->cp2.flags[RSP::RSP_VCC].e, rsp_vzero());
|
||||
write_vcc_lo(rsp->cp2.flags[RSP::RSP_VCC].e, le);
|
||||
write_vco_hi(rsp->cp2.flags[RSP::RSP_VCO].e, rsp_vzero());
|
||||
write_vco_lo(rsp->cp2.flags[RSP::RSP_VCO].e, rsp_vzero());
|
||||
write_acc_lo(acc, result);
|
||||
STORE_RESULT();
|
||||
}
|
||||
|
||||
void RSP_VGE(RSP::CPUState *rsp, unsigned vd, unsigned vs, unsigned vt, unsigned e)
|
||||
{
|
||||
uint16_t *acc = rsp->cp2.acc.e;
|
||||
rsp_vect_t le, eq, sign;
|
||||
|
||||
eq = read_vco_hi(rsp->cp2.flags[RSP::RSP_VCO].e);
|
||||
sign = read_vco_lo(rsp->cp2.flags[RSP::RSP_VCO].e);
|
||||
|
||||
rsp_vect_t result = rsp_vge(LOAD_VS(), LOAD_VT(), rsp_vzero(), &le, eq, sign);
|
||||
|
||||
write_vcc_hi(rsp->cp2.flags[RSP::RSP_VCC].e, rsp_vzero());
|
||||
write_vcc_lo(rsp->cp2.flags[RSP::RSP_VCC].e, le);
|
||||
write_vco_hi(rsp->cp2.flags[RSP::RSP_VCO].e, rsp_vzero());
|
||||
write_vco_lo(rsp->cp2.flags[RSP::RSP_VCO].e, rsp_vzero());
|
||||
write_acc_lo(acc, result);
|
||||
STORE_RESULT();
|
||||
}
|
||||
|
||||
void RSP_VLT(RSP::CPUState *rsp, unsigned vd, unsigned vs, unsigned vt, unsigned e)
|
||||
{
|
||||
uint16_t *acc = rsp->cp2.acc.e;
|
||||
rsp_vect_t le, eq, sign;
|
||||
|
||||
eq = read_vco_hi(rsp->cp2.flags[RSP::RSP_VCO].e);
|
||||
sign = read_vco_lo(rsp->cp2.flags[RSP::RSP_VCO].e);
|
||||
|
||||
rsp_vect_t result = rsp_vlt(LOAD_VS(), LOAD_VT(), rsp_vzero(), &le, eq, sign);
|
||||
|
||||
write_vcc_hi(rsp->cp2.flags[RSP::RSP_VCC].e, rsp_vzero());
|
||||
write_vcc_lo(rsp->cp2.flags[RSP::RSP_VCC].e, le);
|
||||
write_vco_hi(rsp->cp2.flags[RSP::RSP_VCO].e, rsp_vzero());
|
||||
write_vco_lo(rsp->cp2.flags[RSP::RSP_VCO].e, rsp_vzero());
|
||||
write_acc_lo(acc, result);
|
||||
STORE_RESULT();
|
||||
}
|
||||
|
||||
void RSP_VNE(RSP::CPUState *rsp, unsigned vd, unsigned vs, unsigned vt, unsigned e)
|
||||
{
|
||||
uint16_t *acc = rsp->cp2.acc.e;
|
||||
rsp_vect_t le, eq, sign;
|
||||
|
||||
eq = read_vco_hi(rsp->cp2.flags[RSP::RSP_VCO].e);
|
||||
sign = read_vco_lo(rsp->cp2.flags[RSP::RSP_VCO].e);
|
||||
|
||||
rsp_vect_t result = rsp_vne(LOAD_VS(), LOAD_VT(), rsp_vzero(), &le, eq, sign);
|
||||
#ifdef INTENSE_DEBUG
|
||||
for (unsigned i = 0; i < 8; i++)
|
||||
fprintf(stderr, "VD[%d] = %d\n", i,
|
||||
reinterpret_cast<int16_t*>(&result)[i]);
|
||||
#endif
|
||||
|
||||
write_vcc_hi(rsp->cp2.flags[RSP::RSP_VCC].e, rsp_vzero());
|
||||
write_vcc_lo(rsp->cp2.flags[RSP::RSP_VCC].e, le);
|
||||
write_vco_hi(rsp->cp2.flags[RSP::RSP_VCO].e, rsp_vzero());
|
||||
write_vco_lo(rsp->cp2.flags[RSP::RSP_VCO].e, rsp_vzero());
|
||||
write_acc_lo(acc, result);
|
||||
STORE_RESULT();
|
||||
}
|
||||
|
||||
//
|
||||
// VINVALID
|
||||
//
|
||||
void RSP_VINVALID(RSP::CPUState *, unsigned, unsigned, unsigned, unsigned)
|
||||
{
|
||||
fprintf(stderr, "Unimplemented ...\n");
|
||||
}
|
||||
|
||||
//
|
||||
// VMACF
|
||||
// VMACU
|
||||
//
|
||||
void RSP_VMACF(RSP::CPUState *rsp, unsigned vd, unsigned vs, unsigned vt, unsigned e)
|
||||
{
|
||||
uint16_t *acc = rsp->cp2.acc.e;
|
||||
rsp_vect_t acc_lo, acc_md, acc_hi, result;
|
||||
acc_lo = read_acc_lo(acc);
|
||||
acc_md = read_acc_md(acc);
|
||||
acc_hi = read_acc_hi(acc);
|
||||
|
||||
result = rsp_vmacf_vmacu<false>(LOAD_VS(), LOAD_VT(), rsp_vzero(), &acc_lo, &acc_md, &acc_hi);
|
||||
|
||||
write_acc_lo(acc, acc_lo);
|
||||
write_acc_md(acc, acc_md);
|
||||
write_acc_hi(acc, acc_hi);
|
||||
STORE_RESULT();
|
||||
}
|
||||
|
||||
void RSP_VMACU(RSP::CPUState *rsp, unsigned vd, unsigned vs, unsigned vt, unsigned e)
|
||||
{
|
||||
uint16_t *acc = rsp->cp2.acc.e;
|
||||
rsp_vect_t acc_lo, acc_md, acc_hi, result;
|
||||
acc_lo = read_acc_lo(acc);
|
||||
acc_md = read_acc_md(acc);
|
||||
acc_hi = read_acc_hi(acc);
|
||||
|
||||
result = rsp_vmacf_vmacu<true>(LOAD_VS(), LOAD_VT(), rsp_vzero(), &acc_lo, &acc_md, &acc_hi);
|
||||
|
||||
write_acc_lo(acc, acc_lo);
|
||||
write_acc_md(acc, acc_md);
|
||||
write_acc_hi(acc, acc_hi);
|
||||
STORE_RESULT();
|
||||
}
|
||||
|
||||
//
|
||||
// VMADH
|
||||
// VMUDH
|
||||
//
|
||||
void RSP_VMADH(RSP::CPUState *rsp, unsigned vd, unsigned vs, unsigned vt, unsigned e)
|
||||
{
|
||||
uint16_t *acc = rsp->cp2.acc.e;
|
||||
rsp_vect_t acc_lo, acc_md, acc_hi, result;
|
||||
|
||||
acc_lo = read_acc_lo(acc);
|
||||
acc_md = read_acc_md(acc);
|
||||
acc_hi = read_acc_hi(acc);
|
||||
|
||||
result = rsp_vmadh_vmudh<true>(LOAD_VS(), LOAD_VT(), rsp_vzero(), &acc_lo, &acc_md, &acc_hi);
|
||||
|
||||
write_acc_lo(acc, acc_lo);
|
||||
write_acc_md(acc, acc_md);
|
||||
write_acc_hi(acc, acc_hi);
|
||||
STORE_RESULT();
|
||||
}
|
||||
|
||||
void RSP_VMUDH(RSP::CPUState *rsp, unsigned vd, unsigned vs, unsigned vt, unsigned e)
|
||||
{
|
||||
uint16_t *acc = rsp->cp2.acc.e;
|
||||
rsp_vect_t acc_lo, acc_md, acc_hi, result;
|
||||
|
||||
acc_lo = read_acc_lo(acc);
|
||||
acc_md = read_acc_md(acc);
|
||||
acc_hi = read_acc_hi(acc);
|
||||
|
||||
result = rsp_vmadh_vmudh<false>(LOAD_VS(), LOAD_VT(), rsp_vzero(), &acc_lo, &acc_md, &acc_hi);
|
||||
|
||||
write_acc_lo(acc, acc_lo);
|
||||
write_acc_md(acc, acc_md);
|
||||
write_acc_hi(acc, acc_hi);
|
||||
STORE_RESULT();
|
||||
}
|
||||
|
||||
//
|
||||
// VMADL
|
||||
// VMUDL
|
||||
//
|
||||
void RSP_VMADL(RSP::CPUState *rsp, unsigned vd, unsigned vs, unsigned vt, unsigned e)
|
||||
{
|
||||
uint16_t *acc = rsp->cp2.acc.e;
|
||||
rsp_vect_t acc_lo, acc_md, acc_hi, result;
|
||||
|
||||
acc_lo = read_acc_lo(acc);
|
||||
acc_md = read_acc_md(acc);
|
||||
acc_hi = read_acc_hi(acc);
|
||||
|
||||
result = rsp_vmadl_vmudl<true>(LOAD_VS(), LOAD_VT(), rsp_vzero(), &acc_lo, &acc_md, &acc_hi);
|
||||
|
||||
write_acc_lo(acc, acc_lo);
|
||||
write_acc_md(acc, acc_md);
|
||||
write_acc_hi(acc, acc_hi);
|
||||
STORE_RESULT();
|
||||
}
|
||||
|
||||
void RSP_VMUDL(RSP::CPUState *rsp, unsigned vd, unsigned vs, unsigned vt, unsigned e)
|
||||
{
|
||||
uint16_t *acc = rsp->cp2.acc.e;
|
||||
rsp_vect_t acc_lo, acc_md, acc_hi, result;
|
||||
|
||||
acc_lo = read_acc_lo(acc);
|
||||
acc_md = read_acc_md(acc);
|
||||
acc_hi = read_acc_hi(acc);
|
||||
|
||||
result = rsp_vmadl_vmudl<false>(LOAD_VS(), LOAD_VT(), rsp_vzero(), &acc_lo, &acc_md, &acc_hi);
|
||||
|
||||
write_acc_lo(acc, acc_lo);
|
||||
write_acc_md(acc, acc_md);
|
||||
write_acc_hi(acc, acc_hi);
|
||||
STORE_RESULT();
|
||||
}
|
||||
|
||||
//
|
||||
// VMADM
|
||||
// VMUDM
|
||||
//
|
||||
void RSP_VMADM(RSP::CPUState *rsp, unsigned vd, unsigned vs, unsigned vt, unsigned e)
|
||||
{
|
||||
uint16_t *acc = rsp->cp2.acc.e;
|
||||
rsp_vect_t acc_lo, acc_md, acc_hi, result;
|
||||
|
||||
acc_lo = read_acc_lo(acc);
|
||||
acc_md = read_acc_md(acc);
|
||||
acc_hi = read_acc_hi(acc);
|
||||
|
||||
result = rsp_vmadm_vmudm<true>(LOAD_VS(), LOAD_VT(), rsp_vzero(), &acc_lo, &acc_md, &acc_hi);
|
||||
|
||||
write_acc_lo(acc, acc_lo);
|
||||
write_acc_md(acc, acc_md);
|
||||
write_acc_hi(acc, acc_hi);
|
||||
STORE_RESULT();
|
||||
}
|
||||
|
||||
void RSP_VMUDM(RSP::CPUState *rsp, unsigned vd, unsigned vs, unsigned vt, unsigned e)
|
||||
{
|
||||
uint16_t *acc = rsp->cp2.acc.e;
|
||||
rsp_vect_t acc_lo, acc_md, acc_hi, result;
|
||||
|
||||
acc_lo = read_acc_lo(acc);
|
||||
acc_md = read_acc_md(acc);
|
||||
acc_hi = read_acc_hi(acc);
|
||||
|
||||
result = rsp_vmadm_vmudm<false>(LOAD_VS(), LOAD_VT(), rsp_vzero(), &acc_lo, &acc_md, &acc_hi);
|
||||
|
||||
write_acc_lo(acc, acc_lo);
|
||||
write_acc_md(acc, acc_md);
|
||||
write_acc_hi(acc, acc_hi);
|
||||
STORE_RESULT();
|
||||
}
|
||||
|
||||
//
|
||||
// VMADN
|
||||
// VMUDN
|
||||
//
|
||||
void RSP_VMADN(RSP::CPUState *rsp, unsigned vd, unsigned vs, unsigned vt, unsigned e)
|
||||
{
|
||||
uint16_t *acc = rsp->cp2.acc.e;
|
||||
rsp_vect_t acc_lo, acc_md, acc_hi, result;
|
||||
|
||||
acc_lo = read_acc_lo(acc);
|
||||
acc_md = read_acc_md(acc);
|
||||
acc_hi = read_acc_hi(acc);
|
||||
|
||||
result = rsp_vmadn_vmudn<true>(LOAD_VS(), LOAD_VT(), rsp_vzero(), &acc_lo, &acc_md, &acc_hi);
|
||||
|
||||
write_acc_lo(acc, acc_lo);
|
||||
write_acc_md(acc, acc_md);
|
||||
write_acc_hi(acc, acc_hi);
|
||||
STORE_RESULT();
|
||||
}
|
||||
|
||||
void RSP_VMUDN(RSP::CPUState *rsp, unsigned vd, unsigned vs, unsigned vt, unsigned e)
|
||||
{
|
||||
uint16_t *acc = rsp->cp2.acc.e;
|
||||
rsp_vect_t acc_lo, acc_md, acc_hi, result;
|
||||
|
||||
acc_lo = read_acc_lo(acc);
|
||||
acc_md = read_acc_md(acc);
|
||||
acc_hi = read_acc_hi(acc);
|
||||
|
||||
result = rsp_vmadn_vmudn<false>(LOAD_VS(), LOAD_VT(), rsp_vzero(), &acc_lo, &acc_md, &acc_hi);
|
||||
|
||||
write_acc_lo(acc, acc_lo);
|
||||
write_acc_md(acc, acc_md);
|
||||
write_acc_hi(acc, acc_hi);
|
||||
STORE_RESULT();
|
||||
}
|
||||
|
||||
//
|
||||
// VMOV
|
||||
//
|
||||
void RSP_VMOV(RSP::CPUState *rsp, unsigned vd, unsigned vs, unsigned vt, unsigned e)
|
||||
{
|
||||
uint16_t *acc = rsp->cp2.acc.e;
|
||||
unsigned de = vs & 0x7;
|
||||
write_acc_lo(acc, LOAD_VT());
|
||||
__m128i result = rsp_vmov(rsp, vt, e, vd, de);
|
||||
STORE_RESULT();
|
||||
}
|
||||
|
||||
//
|
||||
// VMRG
|
||||
//
|
||||
void RSP_VMRG(RSP::CPUState *rsp, unsigned vd, unsigned vs, unsigned vt, unsigned e)
|
||||
{
|
||||
uint16_t *acc = rsp->cp2.acc.e;
|
||||
rsp_vect_t le;
|
||||
|
||||
le = read_vcc_lo(rsp->cp2.flags[RSP::RSP_VCC].e);
|
||||
rsp_vect_t result = rsp_vmrg(LOAD_VS(), LOAD_VT(), le);
|
||||
write_vco_hi(rsp->cp2.flags[RSP::RSP_VCO].e, rsp_vzero());
|
||||
write_vco_lo(rsp->cp2.flags[RSP::RSP_VCO].e, rsp_vzero());
|
||||
write_acc_lo(acc, result);
|
||||
STORE_RESULT();
|
||||
}
|
||||
|
||||
//
|
||||
// VMULF
|
||||
// VMULU
|
||||
//
|
||||
void RSP_VMULF(RSP::CPUState *rsp, unsigned vd, unsigned vs, unsigned vt, unsigned e)
|
||||
{
|
||||
uint16_t *acc = rsp->cp2.acc.e;
|
||||
rsp_vect_t acc_lo, acc_md, acc_hi, result;
|
||||
|
||||
result = rsp_vmulf_vmulu<false>(LOAD_VS(), LOAD_VT(), rsp_vzero(), &acc_lo, &acc_md, &acc_hi);
|
||||
|
||||
write_acc_lo(acc, acc_lo);
|
||||
write_acc_md(acc, acc_md);
|
||||
write_acc_hi(acc, acc_hi);
|
||||
STORE_RESULT();
|
||||
}
|
||||
|
||||
void RSP_VMULU(RSP::CPUState *rsp, unsigned vd, unsigned vs, unsigned vt, unsigned e)
|
||||
{
|
||||
uint16_t *acc = rsp->cp2.acc.e;
|
||||
rsp_vect_t acc_lo, acc_md, acc_hi, result;
|
||||
|
||||
result = rsp_vmulf_vmulu<true>(LOAD_VS(), LOAD_VT(), rsp_vzero(), &acc_lo, &acc_md, &acc_hi);
|
||||
|
||||
write_acc_lo(acc, acc_lo);
|
||||
write_acc_md(acc, acc_md);
|
||||
write_acc_hi(acc, acc_hi);
|
||||
STORE_RESULT();
|
||||
}
|
||||
|
||||
//
|
||||
// VNOP
|
||||
//
|
||||
void RSP_VNOP(RSP::CPUState *, unsigned, unsigned, unsigned, unsigned)
|
||||
{
|
||||
}
|
||||
|
||||
//
|
||||
// VOR
|
||||
// VNOR
|
||||
//
|
||||
void RSP_VOR(RSP::CPUState *rsp, unsigned vd, unsigned vs, unsigned vt, unsigned e)
|
||||
{
|
||||
uint16_t *acc = rsp->cp2.acc.e;
|
||||
|
||||
rsp_vect_t result = rsp_vor(LOAD_VS(), LOAD_VT());
|
||||
|
||||
write_acc_lo(acc, result);
|
||||
STORE_RESULT();
|
||||
}
|
||||
|
||||
void RSP_VNOR(RSP::CPUState *rsp, unsigned vd, unsigned vs, unsigned vt, unsigned e)
|
||||
{
|
||||
uint16_t *acc = rsp->cp2.acc.e;
|
||||
|
||||
rsp_vect_t result = rsp_vnor(LOAD_VS(), LOAD_VT());
|
||||
|
||||
write_acc_lo(acc, result);
|
||||
STORE_RESULT();
|
||||
}
|
||||
|
||||
//
|
||||
// VRCP
|
||||
// VRCPL
|
||||
// VRSQ
|
||||
// VRSQL
|
||||
//
|
||||
void RSP_VRCP(RSP::CPUState *rsp, unsigned vd, unsigned vs, unsigned vt, unsigned e)
|
||||
{
|
||||
uint16_t *acc = rsp->cp2.acc.e;
|
||||
unsigned de = vs & 0x7;
|
||||
e &= 0x7;
|
||||
|
||||
write_acc_lo(acc, LOAD_VT());
|
||||
|
||||
rsp->cp2.dp_flag = 0;
|
||||
rsp_vect_t result = rsp_vrcp_vrsq<false>(rsp, 0, vt, e, vd, de);
|
||||
STORE_RESULT();
|
||||
}
|
||||
|
||||
void RSP_VRCPL(RSP::CPUState *rsp, unsigned vd, unsigned vs, unsigned vt, unsigned e)
|
||||
{
|
||||
uint16_t *acc = rsp->cp2.acc.e;
|
||||
unsigned de = vs & 0x7;
|
||||
e &= 0x7;
|
||||
|
||||
write_acc_lo(acc, LOAD_VT());
|
||||
|
||||
int dp = rsp->cp2.dp_flag & 1;
|
||||
rsp->cp2.dp_flag = 0;
|
||||
|
||||
rsp_vect_t result = rsp_vrcp_vrsq<false>(rsp, dp, vt, e, vd, de);
|
||||
STORE_RESULT();
|
||||
}
|
||||
|
||||
|
||||
void RSP_VRSQ(RSP::CPUState *rsp, unsigned vd, unsigned vs, unsigned vt, unsigned e)
|
||||
{
|
||||
uint16_t *acc = rsp->cp2.acc.e;
|
||||
unsigned de = vs & 0x7;
|
||||
e &= 0x7;
|
||||
|
||||
write_acc_lo(acc, LOAD_VT());
|
||||
|
||||
rsp->cp2.dp_flag = 0;
|
||||
rsp_vect_t result = rsp_vrcp_vrsq<true>(rsp, 0, vt, e, vd, de);
|
||||
STORE_RESULT();
|
||||
}
|
||||
|
||||
void RSP_VRSQL(RSP::CPUState *rsp, unsigned vd, unsigned vs, unsigned vt, unsigned e)
|
||||
{
|
||||
uint16_t *acc = rsp->cp2.acc.e;
|
||||
unsigned de = vs & 0x7;
|
||||
e &= 0x7;
|
||||
|
||||
write_acc_lo(acc, LOAD_VT());
|
||||
|
||||
int dp = rsp->cp2.dp_flag & 1;
|
||||
rsp->cp2.dp_flag = 0;
|
||||
|
||||
rsp_vect_t result = rsp_vrcp_vrsq<true>(rsp, dp, vt, e, vd, de);
|
||||
STORE_RESULT();
|
||||
}
|
||||
|
||||
//
|
||||
// VRCPH
|
||||
// VRSQH
|
||||
//
|
||||
void RSP_VRCPH(RSP::CPUState *rsp, unsigned vd, unsigned vs, unsigned vt, unsigned e)
|
||||
{
|
||||
uint16_t *acc = rsp->cp2.acc.e;
|
||||
unsigned de = vs & 0x7;
|
||||
e &= 0x7;
|
||||
|
||||
write_acc_lo(acc, LOAD_VT());
|
||||
|
||||
// Specify double-precision for VRCPL on the next pass.
|
||||
rsp->cp2.dp_flag = 1;
|
||||
|
||||
rsp_vect_t result = rsp_vdivh(rsp, vt, e, vd, de);
|
||||
STORE_RESULT();
|
||||
}
|
||||
|
||||
void RSP_VRSQH(RSP::CPUState *rsp, unsigned vd, unsigned vs, unsigned vt, unsigned e)
|
||||
{
|
||||
uint16_t *acc = rsp->cp2.acc.e;
|
||||
unsigned de = vs & 0x7;
|
||||
e &= 0x7;
|
||||
|
||||
write_acc_lo(acc, LOAD_VT());
|
||||
|
||||
// Specify double-precision for VRCPL on the next pass.
|
||||
rsp->cp2.dp_flag = 1;
|
||||
|
||||
rsp_vect_t result = rsp_vdivh(rsp, vt, e, vd, de);
|
||||
STORE_RESULT();
|
||||
}
|
||||
|
||||
//
|
||||
// VSAR
|
||||
//
|
||||
void RSP_VSAR(RSP::CPUState *rsp, unsigned vd, unsigned vs, unsigned vt, unsigned e)
|
||||
{
|
||||
uint16_t *acc = rsp->cp2.acc.e;
|
||||
rsp_vect_t result;
|
||||
|
||||
switch (e) {
|
||||
case 8: result = read_acc_hi(acc); break;
|
||||
case 9: result = read_acc_md(acc); break;
|
||||
case 10: result = read_acc_lo(acc); break;
|
||||
default: result = rsp_vzero(); break;
|
||||
}
|
||||
|
||||
STORE_RESULT();
|
||||
}
|
||||
|
||||
//
|
||||
// VSUB
|
||||
//
|
||||
void RSP_VSUB(RSP::CPUState *rsp, unsigned vd, unsigned vs, unsigned vt, unsigned e)
|
||||
{
|
||||
uint16_t *acc = rsp->cp2.acc.e;
|
||||
rsp_vect_t carry, acc_lo;
|
||||
|
||||
carry = read_vco_lo(rsp->cp2.flags[RSP::RSP_VCO].e);
|
||||
|
||||
rsp_vect_t result = rsp_vsub(LOAD_VS(), LOAD_VT(), carry, &acc_lo);
|
||||
|
||||
write_vco_hi(rsp->cp2.flags[RSP::RSP_VCO].e, rsp_vzero());
|
||||
write_vco_lo(rsp->cp2.flags[RSP::RSP_VCO].e, rsp_vzero());
|
||||
write_acc_lo(acc, acc_lo);
|
||||
STORE_RESULT();
|
||||
}
|
||||
|
||||
//
|
||||
// VSUBC
|
||||
//
|
||||
void RSP_VSUBC(RSP::CPUState *rsp, unsigned vd, unsigned vs, unsigned vt, unsigned e)
|
||||
{
|
||||
uint16_t *acc = rsp->cp2.acc.e;
|
||||
rsp_vect_t eq, sn;
|
||||
|
||||
rsp_vect_t result = rsp_vsubc(LOAD_VS(), LOAD_VT(), rsp_vzero(), &eq, &sn);
|
||||
|
||||
write_vco_hi(rsp->cp2.flags[RSP::RSP_VCO].e, eq);
|
||||
write_vco_lo(rsp->cp2.flags[RSP::RSP_VCO].e, sn);
|
||||
write_acc_lo(acc, result);
|
||||
STORE_RESULT();
|
||||
}
|
||||
|
||||
//
|
||||
// VXOR
|
||||
// VNXOR
|
||||
//
|
||||
void RSP_VXOR(RSP::CPUState *rsp, unsigned vd, unsigned vs, unsigned vt, unsigned e)
|
||||
{
|
||||
uint16_t *acc = rsp->cp2.acc.e;
|
||||
|
||||
rsp_vect_t result = rsp_vxor(LOAD_VS(), LOAD_VT());
|
||||
|
||||
write_acc_lo(acc, result);
|
||||
STORE_RESULT();
|
||||
}
|
||||
|
||||
void RSP_VNXOR(RSP::CPUState *rsp, unsigned vd, unsigned vs, unsigned vt, unsigned e)
|
||||
{
|
||||
uint16_t *acc = rsp->cp2.acc.e;
|
||||
|
||||
rsp_vect_t result = rsp_vnxor(LOAD_VS(), LOAD_VT());
|
||||
|
||||
write_acc_lo(acc, result);
|
||||
STORE_RESULT();
|
||||
}
|
||||
|
||||
// RESERVED
|
||||
void RSP_RESERVED(RSP::CPUState *rsp, unsigned vd, unsigned, unsigned, unsigned)
|
||||
{
|
||||
rsp_vect_t result = rsp_vzero();
|
||||
STORE_RESULT();
|
||||
}
|
||||
|
||||
}
|
||||
|
226
rsp_1.1.h
Normal file
226
rsp_1.1.h
Normal file
@ -0,0 +1,226 @@
|
||||
#ifndef __RSP_1_1_H__
|
||||
#define __RSP_1_1_H__
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define PLUGIN_TYPE_RSP 1
|
||||
#define PLUGIN_TYPE_GFX 2
|
||||
#define PLUGIN_TYPE_AUDIO 3
|
||||
#define PLUGIN_TYPE_CONTROLLER 4
|
||||
|
||||
#if !defined(M64P_PLUGIN_API)
|
||||
/*
|
||||
* slight changes to zilmar's spec file for portability
|
||||
*
|
||||
* The raw plugin spec headers by zilmar required WIN32 definitions.
|
||||
*
|
||||
* Here, the sufficient ANSI approximations are given so that this header
|
||||
* will operate more independently.
|
||||
*/
|
||||
struct HWND__ {int unused;};
|
||||
typedef struct HWND__ *HWND;
|
||||
struct HINSTANCE__ {int unused;};
|
||||
typedef struct HINSTANCE__ *HINSTANCE;
|
||||
struct HMENU__ {int unused;};
|
||||
typedef struct HMENU__ *HMENU;
|
||||
struct HDC__ {int unused;};
|
||||
typedef struct HDC__ *HDC;
|
||||
#endif
|
||||
|
||||
#if defined(_STDINT_H) || defined(M64P_PLUGIN_API)
|
||||
typedef uint32_t RCPREG;
|
||||
#elif (0)
|
||||
typedef unsigned long RCPREG; /* necessary for 16-bit targets */
|
||||
#else
|
||||
typedef unsigned int RCPREG; /* ANSI approximation of 32-bit size */
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
unsigned short Version; /* Should be set to 0x0101 */
|
||||
unsigned short Type; /* Set to PLUGIN_TYPE_RSP */
|
||||
char Name[100]; /* Name of the DLL */
|
||||
|
||||
/* If DLL supports memory these memory options then set them to TRUE or FALSE
|
||||
if it does not support it */
|
||||
int NormalMemory; /* a normal BYTE array */
|
||||
int MemoryBswaped; /* a normal BYTE array where the memory has been pre-
|
||||
byte-swapped on a DWORD (32 bits) boundary */
|
||||
} PLUGIN_INFO;
|
||||
|
||||
#if !defined(M64P_PLUGIN_API)
|
||||
typedef struct {
|
||||
HINSTANCE hInst;
|
||||
int MemoryBswaped; /* If this is set to TRUE, then the memory has been
|
||||
pre-byte-swapped on a DWORD (32 bits) boundary */
|
||||
unsigned char *RDRAM;
|
||||
unsigned char *DMEM;
|
||||
unsigned char *IMEM;
|
||||
|
||||
RCPREG *MI_INTR_REG;
|
||||
|
||||
RCPREG *SP_MEM_ADDR_REG;
|
||||
RCPREG *SP_DRAM_ADDR_REG;
|
||||
RCPREG *SP_RD_LEN_REG;
|
||||
RCPREG *SP_WR_LEN_REG;
|
||||
RCPREG *SP_STATUS_REG;
|
||||
RCPREG *SP_DMA_FULL_REG;
|
||||
RCPREG *SP_DMA_BUSY_REG;
|
||||
RCPREG *SP_PC_REG; /* This was SUPPOSED to be defined after the next. */
|
||||
RCPREG *SP_SEMAPHORE_REG;
|
||||
|
||||
/** RCPREG *SP_PC_REG; // CPU-mapped between SP and DP command buffer regs **/
|
||||
|
||||
RCPREG *DPC_START_REG;
|
||||
RCPREG *DPC_END_REG;
|
||||
RCPREG *DPC_CURRENT_REG;
|
||||
RCPREG *DPC_STATUS_REG;
|
||||
RCPREG *DPC_CLOCK_REG;
|
||||
RCPREG *DPC_BUFBUSY_REG;
|
||||
RCPREG *DPC_PIPEBUSY_REG;
|
||||
RCPREG *DPC_TMEM_REG;
|
||||
|
||||
void (*CheckInterrupts)(void);
|
||||
void (*ProcessDList)(void);
|
||||
void (*ProcessAList)(void);
|
||||
void (*ProcessRdpList)(void);
|
||||
void (*ShowCFB)(void);
|
||||
} RSP_INFO;
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
void (*UpdateBreakPoints)(void);
|
||||
void (*UpdateMemory)(void);
|
||||
void (*UpdateR4300iRegisters)(void);
|
||||
void (*Enter_BPoint_Window)(void);
|
||||
void (*Enter_R4300i_Commands_Window)(void);
|
||||
void (*Enter_R4300i_Register_Window)(void);
|
||||
void (*Enter_RSP_Commands_Window)(void);
|
||||
void (*Enter_Memory_Window)(void);
|
||||
} DEBUG_INFO;
|
||||
|
||||
#if defined(M64P_PLUGIN_API)
|
||||
#define M64P_PLUGIN_PROTOTYPES 1
|
||||
#include "m64p_types.h"
|
||||
#include "m64p_common.h"
|
||||
#include "m64p_plugin.h"
|
||||
#include "m64p_config.h"
|
||||
#else
|
||||
#if defined(WIN32)
|
||||
#define EXPORT __declspec(dllexport)
|
||||
#define CALL __cdecl
|
||||
#else
|
||||
#define EXPORT __attribute__((visibility("default")))
|
||||
#define CALL
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if !defined(M64P_PLUGIN_API)
|
||||
/******************************************************************
|
||||
Function: CloseDLL
|
||||
Purpose: This function is called when the emulator is closing
|
||||
down allowing the DLL to de-initialise.
|
||||
input: none
|
||||
output: none
|
||||
*******************************************************************/
|
||||
EXPORT void CALL CloseDLL(void);
|
||||
|
||||
/******************************************************************
|
||||
Function: DllAbout
|
||||
Purpose: This function is optional function that is provided
|
||||
to give further information about the DLL.
|
||||
input: a handle to the window that calls this function
|
||||
output: none
|
||||
*******************************************************************/
|
||||
EXPORT void CALL DllAbout(HWND hParent);
|
||||
|
||||
/******************************************************************
|
||||
Function: DllConfig
|
||||
Purpose: This function is optional function that is provided
|
||||
to allow the user to configure the DLL
|
||||
input: a handle to the window that calls this function
|
||||
output: none
|
||||
*******************************************************************/
|
||||
EXPORT void CALL DllConfig(HWND hParent);
|
||||
|
||||
/******************************************************************
|
||||
Function: DllTest
|
||||
Purpose: This function is optional function that is provided
|
||||
to allow the user to test the DLL
|
||||
input: a handle to the window that calls this function
|
||||
output: none
|
||||
*******************************************************************/
|
||||
EXPORT void CALL DllTest(HWND hParent);
|
||||
#endif
|
||||
|
||||
/******************************************************************
|
||||
Function: DoRspCycles
|
||||
Purpose: This function is to allow the RSP to run in parallel
|
||||
with the r4300 switching control back to the r4300 once
|
||||
the function ends.
|
||||
input: The number of cycles that is meant to be executed
|
||||
output: The number of cycles that was executed. This value can
|
||||
be greater than the number of cycles that the RSP
|
||||
should have performed.
|
||||
(this value is ignored if the RSP is stopped)
|
||||
*******************************************************************/
|
||||
EXPORT unsigned int CALL DoRspCycles(unsigned int Cycles);
|
||||
|
||||
/******************************************************************
|
||||
Function: GetDllInfo
|
||||
Purpose: This function allows the emulator to gather information
|
||||
about the DLL by filling in the PluginInfo structure.
|
||||
input: a pointer to a PLUGIN_INFO structure that needs to be
|
||||
filled by the function. (see def above)
|
||||
output: none
|
||||
*******************************************************************/
|
||||
EXPORT void CALL GetDllInfo(PLUGIN_INFO *PluginInfo);
|
||||
|
||||
/*
|
||||
* `GetRspDebugInfo` -- customarily deprecated by cxd4
|
||||
*
|
||||
* It was extraordinarily easy to re-invent debug facilities without
|
||||
* depending on the Microsoft-Windows-themed debug functions from this spec.
|
||||
*
|
||||
* What's more? No emulators supporting RSP plugins require this function.
|
||||
* It can be safely ignored as a non-portable custom extension to the spec.
|
||||
*/
|
||||
|
||||
/******************************************************************
|
||||
Function: InitiateRSP
|
||||
Purpose: This function is called when the DLL is started to give
|
||||
information from the emulator that the n64 RSP
|
||||
interface needs
|
||||
input: Rsp_Info is passed to this function which is defined
|
||||
above.
|
||||
CycleCount is the number of cycles between switching
|
||||
control between the RSP and r4300i core.
|
||||
output: none
|
||||
*******************************************************************/
|
||||
EXPORT void CALL InitiateRSP(RSP_INFO Rsp_Info, unsigned int *CycleCount);
|
||||
|
||||
/*
|
||||
* `InitiateRSPDebugger` -- customarily deprecated by cxd4
|
||||
*
|
||||
* Here, again, nothing about the full features of debugging this RSP
|
||||
* emulator needed to depend on any WIN32 fixations in this plugin spec.
|
||||
*
|
||||
* Also, again, as with the case of `GetRspDebugInfo`, the test of time has
|
||||
* passed the conclusion that no emulators require the RSP plugin to export
|
||||
* this procedure's symbol to be considered a valid RSP plugin.
|
||||
*/
|
||||
|
||||
/******************************************************************
|
||||
Function: RomClosed
|
||||
Purpose: This function is called when a rom is closed.
|
||||
input: none
|
||||
output: none
|
||||
*******************************************************************/
|
||||
EXPORT void CALL RomClosed(void);
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
91
rsp_op.hpp
Normal file
91
rsp_op.hpp
Normal file
@ -0,0 +1,91 @@
|
||||
#ifndef RSP_OP_HPP__
|
||||
#define RSP_OP_HPP__
|
||||
|
||||
extern "C" {
|
||||
int RSP_MFC0(RSP::CPUState *rsp, unsigned rt, unsigned rd);
|
||||
int RSP_MTC0(RSP::CPUState *rsp, unsigned rd, unsigned rt);
|
||||
|
||||
void RSP_MTC2(RSP::CPUState *rsp, unsigned rt, unsigned vd, unsigned e);
|
||||
void RSP_MFC2(RSP::CPUState *rsp, unsigned rt, unsigned vs, unsigned e);
|
||||
void RSP_CFC2(RSP::CPUState *rsp, unsigned rt, unsigned rd);
|
||||
void RSP_CTC2(RSP::CPUState *rsp, unsigned rt, unsigned rd);
|
||||
|
||||
void RSP_CALL(void *opaque, unsigned target, unsigned ret);
|
||||
void RSP_RETURN(void *opaque, unsigned pc);
|
||||
void RSP_EXIT(void *opaque, int mode);
|
||||
|
||||
#define DECL_LS(op) \
|
||||
void RSP_##op(RSP::CPUState *rsp, unsigned rt, unsigned element, int offset, unsigned base)
|
||||
|
||||
DECL_LS(LBV);
|
||||
DECL_LS(LSV);
|
||||
DECL_LS(LLV);
|
||||
DECL_LS(LDV);
|
||||
DECL_LS(LQV);
|
||||
DECL_LS(LRV);
|
||||
DECL_LS(LPV);
|
||||
DECL_LS(LUV);
|
||||
DECL_LS(LHV);
|
||||
DECL_LS(LFV);
|
||||
DECL_LS(LTV);
|
||||
|
||||
DECL_LS(SBV);
|
||||
DECL_LS(SSV);
|
||||
DECL_LS(SLV);
|
||||
DECL_LS(SDV);
|
||||
DECL_LS(SQV);
|
||||
DECL_LS(SRV);
|
||||
DECL_LS(SPV);
|
||||
DECL_LS(SUV);
|
||||
DECL_LS(SHV);
|
||||
DECL_LS(SFV);
|
||||
DECL_LS(STV);
|
||||
|
||||
#define DECL_COP2(op) \
|
||||
void RSP_##op(RSP::CPUState *rsp, unsigned vd, unsigned vs, unsigned vt, unsigned e)
|
||||
DECL_COP2(VMULF);
|
||||
DECL_COP2(VMULU);
|
||||
DECL_COP2(VMUDL);
|
||||
DECL_COP2(VMUDM);
|
||||
DECL_COP2(VMUDN);
|
||||
DECL_COP2(VMUDH);
|
||||
DECL_COP2(VMACF);
|
||||
DECL_COP2(VMACU);
|
||||
DECL_COP2(VMACQ);
|
||||
DECL_COP2(VMADL);
|
||||
DECL_COP2(VMADM);
|
||||
DECL_COP2(VMADN);
|
||||
DECL_COP2(VMADH);
|
||||
DECL_COP2(VADD);
|
||||
DECL_COP2(VSUB);
|
||||
DECL_COP2(VABS);
|
||||
DECL_COP2(VADDC);
|
||||
DECL_COP2(VSUBC);
|
||||
DECL_COP2(VSAR);
|
||||
DECL_COP2(VLT);
|
||||
DECL_COP2(VEQ);
|
||||
DECL_COP2(VNE);
|
||||
DECL_COP2(VGE);
|
||||
DECL_COP2(VCL);
|
||||
DECL_COP2(VCH);
|
||||
DECL_COP2(VCR);
|
||||
DECL_COP2(VMRG);
|
||||
DECL_COP2(VAND);
|
||||
DECL_COP2(VNAND);
|
||||
DECL_COP2(VOR);
|
||||
DECL_COP2(VNOR);
|
||||
DECL_COP2(VXOR);
|
||||
DECL_COP2(VNXOR);
|
||||
DECL_COP2(VRCP);
|
||||
DECL_COP2(VRCPL);
|
||||
DECL_COP2(VRCPH);
|
||||
DECL_COP2(VMOV);
|
||||
DECL_COP2(VRSQ);
|
||||
DECL_COP2(VRSQL);
|
||||
DECL_COP2(VRSQH);
|
||||
DECL_COP2(VNOP);
|
||||
DECL_COP2(RESERVED);
|
||||
|
||||
}
|
||||
|
||||
#endif
|
136
state.hpp
Normal file
136
state.hpp
Normal file
@ -0,0 +1,136 @@
|
||||
#ifndef STATE_HPP__
|
||||
#define STATE_HPP__
|
||||
|
||||
#include "rsp.h"
|
||||
|
||||
#define DMEM_SIZE (4 * 1024)
|
||||
#define IMEM_SIZE (4 * 1024)
|
||||
#define DMEM_WORDS (DMEM_SIZE / 4)
|
||||
#define IMEM_WORDS (DMEM_SIZE / 4)
|
||||
#define CODE_BLOCK_SIZE (256)
|
||||
#define CODE_BLOCK_WORDS (CODE_BLOCK_SIZE / 4)
|
||||
#define CODE_BLOCK_SIZE_LOG2 (8)
|
||||
#define CODE_BLOCKS (IMEM_SIZE / CODE_BLOCK_SIZE)
|
||||
|
||||
namespace RSP
|
||||
{
|
||||
enum RSPFlags
|
||||
{
|
||||
RSP_VCO = 0,
|
||||
RSP_VCC = 1,
|
||||
RSP_VCE = 2
|
||||
};
|
||||
|
||||
enum RSPAccumulator
|
||||
{
|
||||
RSP_ACC_LO = 16,
|
||||
RSP_ACC_MD = 8,
|
||||
RSP_ACC_HI = 0
|
||||
};
|
||||
|
||||
enum CP0Registers
|
||||
{
|
||||
CP0_REGISTER_DMA_CACHE = 0,
|
||||
CP0_REGISTER_DMA_DRAM = 1,
|
||||
CP0_REGISTER_DMA_READ_LENGTH = 2,
|
||||
CP0_REGISTER_DMA_WRITE_LENGTH = 3,
|
||||
CP0_REGISTER_SP_STATUS = 4,
|
||||
CP0_REGISTER_DMA_FULL = 5,
|
||||
CP0_REGISTER_DMA_BUSY = 6,
|
||||
CP0_REGISTER_SP_RESERVED = 7,
|
||||
CP0_REGISTER_CMD_START = 8,
|
||||
CP0_REGISTER_CMD_END = 9,
|
||||
CP0_REGISTER_CMD_CURRENT = 10,
|
||||
CP0_REGISTER_CMD_STATUS = 11,
|
||||
CP0_REGISTER_CMD_CLOCK = 12,
|
||||
CP0_REGISTER_CMD_BUSY = 13,
|
||||
CP0_REGISTER_CMD_PIPE_BUSY = 14,
|
||||
CP0_REGISTER_CMD_TMEM_BUSY = 15,
|
||||
};
|
||||
|
||||
// SP_STATUS read bits.
|
||||
#define SP_STATUS_HALT 0x0001
|
||||
#define SP_STATUS_BROKE 0x0002
|
||||
#define SP_STATUS_DMA_BUSY 0x0004
|
||||
#define SP_STATUS_DMA_FULL 0x0008
|
||||
#define SP_STATUS_IO_FULL 0x0010
|
||||
#define SP_STATUS_SSTEP 0x0020
|
||||
#define SP_STATUS_INTR_BREAK 0x0040
|
||||
#define SP_STATUS_SIG0 0x0080
|
||||
#define SP_STATUS_SIG1 0x0100
|
||||
#define SP_STATUS_SIG2 0x0200
|
||||
#define SP_STATUS_SIG3 0x0400
|
||||
#define SP_STATUS_SIG4 0x0800
|
||||
#define SP_STATUS_SIG5 0x1000
|
||||
#define SP_STATUS_SIG6 0x2000
|
||||
#define SP_STATUS_SIG7 0x4000
|
||||
|
||||
// SP_STATUS write bits.
|
||||
#define SP_CLR_HALT 0x00000001
|
||||
#define SP_SET_HALT 0x00000002
|
||||
#define SP_CLR_BROKE 0x00000004
|
||||
#define SP_CLR_INTR 0x00000008
|
||||
#define SP_SET_INTR 0x00000010
|
||||
#define SP_CLR_SSTEP 0x00000020
|
||||
#define SP_SET_SSTEP 0x00000040
|
||||
#define SP_CLR_INTR_BREAK 0x00000080
|
||||
#define SP_SET_INTR_BREAK 0x00000100
|
||||
#define SP_CLR_SIG0 0x00000200
|
||||
#define SP_SET_SIG0 0x00000400
|
||||
#define SP_CLR_SIG1 0x00000800
|
||||
#define SP_SET_SIG1 0x00001000
|
||||
#define SP_CLR_SIG2 0x00002000
|
||||
#define SP_SET_SIG2 0x00004000
|
||||
#define SP_CLR_SIG3 0x00008000
|
||||
#define SP_SET_SIG3 0x00010000
|
||||
#define SP_CLR_SIG4 0x00020000
|
||||
#define SP_SET_SIG4 0x00040000
|
||||
#define SP_CLR_SIG5 0x00080000
|
||||
#define SP_SET_SIG5 0x00100000
|
||||
#define SP_CLR_SIG6 0x00200000
|
||||
#define SP_SET_SIG6 0x00400000
|
||||
#define SP_CLR_SIG7 0x00800000
|
||||
#define SP_SET_SIG7 0x01000000
|
||||
|
||||
template<int N>
|
||||
struct alignas(rsp_vect_t) AlignedRSPVector
|
||||
{
|
||||
uint16_t e[8 * N];
|
||||
};
|
||||
|
||||
struct CP0
|
||||
{
|
||||
uint32_t *cr[16] = {};
|
||||
uint32_t *irq = nullptr;
|
||||
};
|
||||
|
||||
struct alignas(64) CP2
|
||||
{
|
||||
AlignedRSPVector<1> regs[32];
|
||||
AlignedRSPVector<2> flags[3];
|
||||
AlignedRSPVector<3> acc;
|
||||
int16_t div_out;
|
||||
int16_t div_in;
|
||||
int8_t dp_flag;
|
||||
};
|
||||
|
||||
struct CPUState
|
||||
{
|
||||
uint32_t pc = 0;
|
||||
uint32_t dirty_blocks = 0;
|
||||
static_assert(CODE_BLOCKS <= 32, "Code blocks must fit in 32-bit register.");
|
||||
|
||||
uint32_t has_delay_slot = 0;
|
||||
uint32_t branch_target = 0;
|
||||
|
||||
uint32_t sr[32] = {};
|
||||
uint32_t *dmem = nullptr;
|
||||
uint32_t *imem = nullptr;
|
||||
uint32_t *rdram = nullptr;
|
||||
|
||||
CP2 cp2 = {};
|
||||
CP0 cp0;
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
Loading…
x
Reference in New Issue
Block a user