Merge pull request #791 from libretro/dynarec

Dynarec updates
This commit is contained in:
Autechre 2021-05-29 08:15:48 +02:00 committed by GitHub
commit b8a3982f6c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
64 changed files with 3954 additions and 1692 deletions

View File

@ -9,6 +9,7 @@ HAVE_LIGHTREC = 1
LINK_STATIC_LIBCPLUSPLUS = 1
THREADED_RECOMPILER = 1
LIGHTREC_DEBUG = 0
LIGHTREC_LOG_LEVEL = 2
CORE_DIR := .
HAVE_GRIFFIN = 0

View File

@ -161,6 +161,19 @@ endif
ifeq ($(HAVE_LIGHTREC), 1)
FLAGS += -DHAVE_LIGHTREC
ifeq ($(LIGHTREC_LOG_LEVEL),)
FLAGS += -DLOG_LEVEL=2 \
-DENABLE_DISASSEMBLER=0
else
ifeq ($(LIGHTREC_LOG_LEVEL), 4)
FLAGS += -DLOG_LEVEL=4 \
-DENABLE_DISASSEMBLER=1
else
FLAGS += -DLOG_LEVEL=$(LIGHTREC_LOG_LEVEL) \
-DENABLE_DISASSEMBLER=0
endif
endif
ifeq ($(NO_GCC), 1)
FLAGS += -DHAS_DEFAULT_ELM=0
else
@ -183,7 +196,8 @@ ifeq ($(HAVE_LIGHTREC), 1)
endif
INCFLAGS += -I$(DEPS_DIR)/lightning/include \
-I$(DEPS_DIR)/lightrec
-I$(DEPS_DIR)/lightrec \
-I$(CORE_DIR)/lightning-lightrec-include
endif
ifneq ($(HAVE_GRIFFIN), 1)
@ -356,7 +370,6 @@ ifeq ($(HAVE_LIGHTREC), 1)
$(DEPS_DIR)/lightning/lib/jit_size.c \
$(DEPS_DIR)/lightning/lib/lightning.c \
$(DEPS_DIR)/lightrec/blockcache.c \
$(DEPS_DIR)/lightrec/disassembler.c \
$(DEPS_DIR)/lightrec/emitter.c \
$(DEPS_DIR)/lightrec/interpreter.c \
$(DEPS_DIR)/lightrec/lightrec.c \
@ -371,8 +384,12 @@ ifeq ($(HAVE_LIGHTREC), 1)
endif
endif
ifeq ($(LIGHTREC_LOG_LEVEL), 4)
SOURCES_C += $(DEPS_DIR)/lightrec/disassembler.c
endif
ifneq (,$(findstring win,$(platform)))
SOURCES_C += $(DEPS_DIR)/mman/sys/mman.c
SOURCES_C += $(DEPS_DIR)/mman/sys/mman.c
endif
endif ##ifeq ($(HAVE_LIGHTREC), 1)

3
deps/lightning/.gitmodules vendored Normal file
View File

@ -0,0 +1,3 @@
[submodule "gnulib"]
path = gnulib
url = git://git.sv.gnu.org/gnulib.git

View File

@ -6,7 +6,7 @@
[subrepo]
remote = https://git.savannah.gnu.org/git/lightning.git
branch = master
commit = b0b8eb5e856c0d29053dc842e1919a2eb58c8cda
commit = 876c1043bec5bfd594482b40700c84693e40d0eb
parent = 9f797430963d9cf0fcef7d963466f9cac7026de2
method = merge
cmdver = 0.4.1
cmdver = 0.4.3

View File

@ -1,3 +1,38 @@
2021-04-03 Marc Nieper-Wißkirchen <marc@nieper-wisskirchen.de>
* check/Makefile.am: Add test for the live instruction.
* check/live.ok: New file.
* check/live.tst: New file.
* doc/body.texi: Add documentation for the live instruction and
for jit_get_reg/jit_get_unreg. Fix menu entries.
* include/lightning.h.in (jit_get_reg, jit_unget_reg): Expose the
macros in the public header file.
* include/lightning/jit_private.h (jit_get_reg, jit_unget_reg):
Remove the macros from the private header file.
2021-04-03 Marc Nieper-Wißkirchen <marc@nieper-wisskirchen.de>
* Makefile.am, check/Makefile.am, doc/Makefile.am,
lib/Makefile.am: Include $(top_builddir)/include in include paths
for the autoconf-generated header file lightning.h.
2021-04-03 Marc Nieper-Wißkirchen <marc@nieper-wisskirchen.de>
* doc/.gitignore: Add version.texi to list of ignored files.
* doc/version.texi: Remove file from version control.
2020-18-04 Paulo Andrade <pcpa@gnu.org>
* lib/jit_x86-cpu.c, lib/jit_x86.c: Implement %rip relative
calls and jumps on x86_64. Currently very conservative, assuming
a jit block can be larger than 2G, so, if a jump or call is in
the same jit generation, but target is unknown, use an indirect
branch (could have an option to assume a jit code block is
never larger than 2G). Also a deoptimization is that now does
not always generate ip relative jmpi; previously implicitly
assumed jmpi would never be larger than 2G. Overall still an
optimization.
2020-23-01 Paulo Andrade <pcpa@gnu.org>
* lib/lightning.c: Add a proper fix to the condition of considering
@ -26,7 +61,7 @@
to the set of registers to scan for live range, what might
consume a lot of cpu time, doing nothing.
2019-09-16 Marc Nieper-WiÃkirchen <marc@nieper-wisskirchen.de>
2019-09-16 Marc Nieper-Wißkirchen <marc@nieper-wisskirchen.de>
* include/lightning/jit_x86.h, lib/jit_x86.c: Correct x86_64
backend, made %r12 a callee-save register as dictated by the
@ -65,7 +100,7 @@
https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=925129
* THANKS: update.
2019-08-29 Marc Nieper-WiÃkirchen <marc@nieper-wisskirchen.de>
2019-08-29 Marc Nieper-Wißkirchen <marc@nieper-wisskirchen.de>
* include/lightning/jit_private.h: Move definition of offsetof
from the public header file here.
@ -1415,7 +1450,7 @@
* include/lightning/jit_arm.h: Correct wrong jit_f macro
definition.
* include/lightning/jit_ia64.h, include/lightning/jit_ppc.h:
* include/lightning/jit_ia64.h, include/lightning/jit_ppc.h:
Correct wrong jit_r macro definition.
* lib/jit_x86-x87.c, lib/jit_x86.c: Actually use the
@ -1884,7 +1919,7 @@
divr_f and divrd_d implementation.
* check/lightning.c: Add __ia64__ preprocessor define
on Itanium.
on Itanium.
* check/alu.inc, check/clobber.tst, check/float.tst: Define
several macros conditionally to __ia64__. This is required
@ -2991,7 +3026,7 @@
* include/lightning/jit_x86.h: Correct typo in macro name.
* lib/jit_arm.c, lib/jit_arm-cpu.c, lib/jit_mips.c,
lib/jit_mips-cpu.c, lib/jit_ppc.c, lib/jit_ppc-cpu.c,
lib/jit_mips-cpu.c, lib/jit_ppc.c, lib/jit_ppc-cpu.c,
lib/jit_x86.c, lib/jit_x86-cpu.c: Correct wrong code to get
current jit function pointer.
@ -3225,7 +3260,7 @@
* lightning/i386/core.h (jit_ldr_c, jit_ldxr_c, jit_ldr_s,
jit_ldxr_s): Move...
* lightning/i386/core-32.h: ... here.
* lightning/i386/core-64.h (jit_ldr_c, jit_ldxr_c, jit_ldr_s,
* lightning/i386/core-64.h (jit_ldr_c, jit_ldxr_c, jit_ldr_s):
Use movsbq and movswq.
2010-08-10 Paulo César Pereira de Andrade <pcpa@mandriva.com.br>
@ -3306,11 +3341,11 @@
* lightning/ppc/funcs.h (jit_flush_code): modified the computation
of start/end. The pointer arithmetic was done without casting. It
prevented compilation with recent gcc versions.
prevented compilation with recent gcc versions.
* lightning/ppc/core.h (jit_pushr_i): The offset for the store was
incorrect. Should have been 4 bytes below SP (not above).
* lightning/ppc/core.h (jit_popr_i): The offset for the load was
incorrect. Should have been 0 (not +8).
* lightning/ppc/core.h (jit_popr_i): The offset for the load was
incorrect. Should have been 0 (not +8).
2008-06-17 Paolo Bonzini <bonzini@gnu.org>
@ -3589,7 +3624,7 @@
* lightning/i386/fp-32.h: ... here.
* lightning/i386/fp-64.h: Write the code.
* lightning/sparc/fp.h: Fix jit_extr_{f_d,d_f} register order.
2006-11-22 Paolo Bonzini <bonzini@gnu.org>
* lightning/i386/asm-i386.h: Move x86-64 instructions...
@ -3803,7 +3838,7 @@
2006-01-23 Paolo Bonzini <bonzini@gnu.org>
* configure.ac: Fix comments in config.h.in.
2005-11-25 Paolo Bonzini <bonzini@gnu.org>
* lightning/sparc/fp.h: Fix header comment.
@ -4123,7 +4158,7 @@
* lightning/ppc/funcs.h: correctly align stack pointer
No changelogs for the assemblers (lightning directory) until 1.0
2003-03-27 Paolo Bonzini <bonzini@gnu.org>
* tests/printf2.c: new test
@ -4135,7 +4170,7 @@ No changelogs for the assemblers (lightning directory) until 1.0
2001-01-19 Paolo Bonzini <bonzini@gnu.org>
* configure.in: support cross-assembling
* disass/bfd.h, disass/dis-asm.h, disass/dis-buf.c,
disass/i386-dis.c, disass/i386.h, disass/ppc-dis.c,
disass/ppc.h, disass/ppc-opc.c, disass/sparc-dis.c,

View File

@ -17,18 +17,22 @@
ACLOCAL_AMFLAGS = -I m4
SUBDIRS = \
gnulib-lib \
check \
doc \
include \
lib
EXTRA_DIST = m4/gnulib-cache.m4
pkgconfiglibdir = $(libdir)/pkgconfig
pkgconfiglib_DATA = lightning.pc
if get_jit_size
JIT_SIZE_PATH = "$(top_builddir)/jit_$(cpu)-sz.c"
AM_CPPFLAGS=-DGET_JIT_SIZE=1 -DJIT_SIZE_PATH='$(JIT_SIZE_PATH)'
AM_CFLAGS = -I$(top_srcdir)/include -D_GNU_SOURCE $(LIGHTNING_CFLAGS)
AM_CFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include \
-D_GNU_SOURCE $(LIGHTNING_CFLAGS)
noinst_PROGRAMS = size
size_LDADD = $(top_builddir)/lib/liblightning.la -lm $(SHLIB)

96
deps/lightning/README-hacking vendored Normal file
View File

@ -0,0 +1,96 @@
This README-hacking file describes the development environment.
Everything related to the development of GNU lightning is on Savannah:
https://savannah.gnu.org/projects/lightning/.
* Working from the Repository
** Autotools
This distribution uses the latest stable versions of Automake, Autoconf. If
you are getting the sources from git (or change configure.ac), you'll need
to have these tools installed to (re)build. All
of these programs are available from ftp://ftp.gnu.org/gnu.
If you're using a GNU/Linux distribution, the easiest way to install these
packages depends on your system. The following shell command should work
for Debian-based systems such as Ubuntu:
$ sudo apt-get install autoconf automake
** Building
After getting the git sources, and installing the tools above, you can run
$ ./bootstrap
$ ./configure
$ make
$ make check
to do a fresh build. At this point, there should be no difference between
your local copy, and the master copy:
$ git diff
should output no difference.
After that first time, running make should suffice.
** Gnulib
This distribution also uses Gnulib (https://www.gnu.org/software/gnulib) to
share common files, stored as a submodule in git.
** Updating
$ git pull
$ git submodule update
** Updating a submodule
To update a submodule, say gnulib, do as follows:
Get the most recent version of the master branch from git.
$ cd gnulib
$ git pull
Make sure GNU lightning can live with that version of gnulib.
$ cd ..
$ ./bootstrap
$ make distcheck
Register your changes.
$ git commit ...
----
Copyright 2021 Free Software Foundation, Inc.
This file is part of GNU lightning.
GNU lightning is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as
published by the Free Software Foundation; either version 3, or (at
your option) any later version.
GNU lightning is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this program. If not, see
<http://www.gnu.org/licenses/>.
Local Variables:
mode: outline
fill-column: 76
End:

1090
deps/lightning/bootstrap vendored Executable file

File diff suppressed because it is too large Load Diff

76
deps/lightning/bootstrap.conf vendored Normal file
View File

@ -0,0 +1,76 @@
# Bootstrap configuration.
# Copyright (C) 2006-2020 Free Software Foundation, Inc.
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
# gnulib modules used by this package.
gnulib_modules="
"
# gnulib library name.
gnulib_name=libgnu
# directories.
source_base=gnulib-lib
doc_base=gnulib-doc
# Additional xgettext options to use. Use "\\\newline" to break lines.
XGETTEXT_OPTIONS=$XGETTEXT_OPTIONS'\\\
--from-code=UTF-8\\\
--flag=asprintf:2:c-format --flag=vasprintf:2:c-format\\\
--flag=asnprintf:3:c-format --flag=vasnprintf:3:c-format\\\
--flag=wrapf:1:c-format\\\
'
# If "AM_GNU_GETTEXT(external" or "AM_GNU_GETTEXT([external]"
# appears in configure.ac, exclude some unnecessary files.
# Without grep's -E option (not portable enough, pre-configure),
# the following test is ugly. Also, this depends on the existence
# of configure.ac, not the obsolescent-named configure.in. But if
# you're using this infrastructure, you should care about such things.
gettext_external=0
grep '^[ ]*AM_GNU_GETTEXT(external\>' configure.ac > /dev/null &&
gettext_external=1
grep '^[ ]*AM_GNU_GETTEXT(\[external\]' configure.ac > /dev/null &&
gettext_external=1
if test $gettext_external = 1; then
# Gettext supplies these files, but we don't need them since
# we don't have an intl subdirectory.
excluded_files='
m4/glibc2.m4
m4/intdiv0.m4
m4/lcmessage.m4
m4/lock.m4
m4/printf-posix.m4
m4/size_max.m4
m4/uintmax_t.m4
m4/ulonglong.m4
m4/visibility.m4
m4/xsize.m4
'
fi
# Build prerequisites
buildreq="\
autoconf 2.59
automake 1.9.6
git 1.5.5
tar -
"
bootstrap_sync=true

View File

@ -14,7 +14,7 @@
# License for more details.
#
AM_CFLAGS = -I$(top_srcdir)/include -D_GNU_SOURCE
AM_CFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include -D_GNU_SOURCE
check_PROGRAMS = lightning ccall self setcode nodata ctramp carg cva_list
@ -91,6 +91,7 @@ EXTRA_DIST = \
call.tst call.ok \
float.tst float.ok \
jmpr.tst jmpr.ok \
live.tst live.ok \
put.tst put.ok \
qalu.inc \
qalu_mul.tst qalu_mul.ok \
@ -125,7 +126,7 @@ base_TESTS = \
fop_abs fop_sqrt \
varargs stack \
clobber carry call \
float jmpr put \
float jmpr live put \
qalu_mul qalu_div \
range ranger ret tramp \
va_list
@ -315,4 +316,3 @@ CLEANFILES = $(TESTS)
debug: lightning
$(LIBTOOL) --mode=execute gdb lightning

View File

@ -3791,11 +3791,11 @@ execute(int argc, char *argv[])
function = jit_emit();
if (flag_verbose > 1 || flag_disasm) {
jit_print();
fprintf(stdout, " - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n");
fprintf(stderr, " - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n");
}
if (flag_verbose > 0 || flag_disasm) {
jit_disassemble();
fprintf(stdout, " - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n");
fprintf(stderr, " - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n");
}
jit_clear_state();

1
deps/lightning/check/live.ok vendored Normal file
View File

@ -0,0 +1 @@
ok

33
deps/lightning/check/live.tst vendored Normal file
View File

@ -0,0 +1,33 @@
.data 16
ok:
.c "ok"
.code
jmpi main
check_r0:
prolog
movi %v0 exit_r0
movi %r0 1
movi %r2 10
// on x86 this changes %rax on other arches could use %r0 as temporary
divi %r1 %r2 3
live %r0
// %r0 must still be 1
jmpr %v0
exit_r0:
retr %r0
epilog
main:
prolog
calli check_r0
retval %r1
beqi r0_ok %r1 1
calli @abort
r0_ok:
prepare
pushargi ok
finishi @puts
ret
epilog

View File

@ -7,26 +7,29 @@ dnl GNU lightning is free software; you can redistribute it and/or modify it
dnl under the terms of the GNU Lesser General Public License as published
dnl by the Free Software Foundation; either version 3, or (at your option)
dnl any later version.
dnl
dnl
dnl GNU lightning is distributed in the hope that it will be useful, but
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
dnl License for more details.
dnl
AC_PREREQ(2.57)
AC_PREREQ(2.64)
AC_INIT([GNU lightning], 2.1.3, pcpa@gnu.org, lightning)
AC_CONFIG_AUX_DIR([build-aux])
AC_CANONICAL_TARGET
AC_CONFIG_SRCDIR([Makefile.am])
AM_INIT_AUTOMAKE([dist-bzip2])
m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
AC_CONFIG_MACRO_DIR(m4)
AC_CONFIG_MACRO_DIR([m4])
AC_CONFIG_HEADERS(config.h)
AC_CONFIG_HEADERS([config.h])
AC_PROG_CC
AC_PROG_INSTALL
AC_PROG_LIBTOOL
gl_EARLY
AM_PROG_AR
LT_INIT
gl_INIT
case "$target_cpu" in
ia64)
@ -291,6 +294,7 @@ fi
AC_OUTPUT([Makefile
lightning.pc
gnulib-lib/Makefile
doc/Makefile
include/Makefile
include/lightning/Makefile

View File

@ -1,2 +1,3 @@
*.info*
stamp-*
/version.texi

View File

@ -14,7 +14,7 @@
# License for more details.
#
AM_CFLAGS = -I$(top_srcdir)/include -D_GNU_SOURCE
AM_CFLAGS = -I $(top_builddir)/include -I$(top_srcdir)/include -D_GNU_SOURCE
info_TEXINFOS = lightning.texi
MOSTLYCLEANFILES = lightning.tmp

View File

@ -30,6 +30,7 @@ dynamic code generation.
* The instruction set:: The RISC instruction set used in GNU lightning
* GNU lightning examples:: GNU lightning's examples
* Reentrancy:: Re-entrant usage of GNU lightning
* Registers:: Accessing the whole register file
* Customizations:: Advanced code generation customizations
* Acknowledgements:: Acknowledgements for GNU lightning
@end menu
@ -43,8 +44,8 @@ This document describes @value{TOPIC} the @lightning{} library for
dynamic code generation.
@end iftex
Dynamic code generation is the generation of machine code
at runtime. It is typically used to strip a layer of interpretation
Dynamic code generation is the generation of machine code
at runtime. It is typically used to strip a layer of interpretation
by allowing compilation to occur at runtime. One of the most
well-known applications of dynamic code generation is perhaps that
of interpreters that compile source code to an intermediate bytecode
@ -53,7 +54,7 @@ approach effectively combines the portability of bytecode
representations with the speed of machine code. Another common
application of dynamic code generation is in the field of hardware
simulators and binary emulators, which can use the same techniques
to translate simulated instructions to the instructions of the
to translate simulated instructions to the instructions of the
underlying machine.
Yet other applications come to mind: for example, windowing
@ -68,7 +69,7 @@ retargeted for each machine; in addition, coding a run-time code
generator is a tedious and error-prone task more than a difficult one.
@lightning{} provides a portable, fast and easily retargetable dynamic
code generation system.
code generation system.
To be portable, @lightning{} abstracts over current architectures'
quirks and unorthogonalities. The interface that it exposes to is that
@ -695,6 +696,51 @@ in = arg @rem{! Same as above}
ret @rem{! Return to caller}
@end example
@item Register liveness
During code generation, @lightning{} occasionally needs scratch registers
or needs to use architecture-defined registers. For that, @lightning{}
internally maintains register liveness information.
In the following example, @code{qdivr} will need special registers like
@code{R0} on some architectures. As @lightning{} understands that
@code{R0} is used in the subsequent instruction, it will create
save/restore code for @code{R0} in case.
@example
...
qdivr V0, V1, V2, V3
movr V3, R0
...
@end example
The same is not true in the example that follows. Here, @code{R0} is
not alive after the division operation because @code{R0} is neither an
argument register nor a callee-save register. Thus, no save/restore
code for @code{R0} will be created in case.
@example
...
qdivr V0, V1, V2, V3
jmpr R1
...
@end example
The @code{live} instruction can be used to mark a register as live after
it as in the following example. Here, @code{R0} will be preserved
across the division.
@example
...
qdivr V0, V1, V2, V3
live R0
jmpr R1
...
@end example
The @code{live} instruction is useful at code entry and exit points,
like after and before a @code{callr} instruction.
@item Trampolines, continuations and tail call optimization
Frequently it is required to generate jit code that must jump to
@ -1005,9 +1051,9 @@ programmer would write):
mov %i0, %g2 retl
inc %g2 inc %o0
mov %g2, %i0
restore
retl
nop
restore
retl
nop
@end example
In this case, @lightning{} introduces overhead to create a register
window (not knowing that the procedure is a leaf procedure) and to
@ -1480,7 +1526,7 @@ implementation and to avoid needing the user to keep adding an extra
argument to every call, as multiple jit states generating code in
paralell should be very uncommon.
@section Registers
@node Registers
@chapter Accessing the whole register file
As mentioned earlier in this chapter, all @lightning{} back-ends are
@ -1496,6 +1542,33 @@ constant. Of course, expressions like @code{JIT_R0} and
@code{JIT_R(0)} denote the same register, and likewise for
integer callee-saved, or floating-point, registers.
@section Scratch registers
For operations, @lightning{} does not support directly, like storing
a literal in memory, @code{jit_get_reg} and @code{jit_unget_reg} can be used to
acquire and release a scratch register as in the following pattern:
@example
jit_int32_t reg = jit_get_reg (jit_class_gpr);
jit_movi (reg, immediate);
jit_stxi (offsetof (some_struct, some_field), JIT_V0, reg);
jit_unget_reg (reg);
@end example
As @code{jit_get_reg} and @code{jit_unget_reg} may generate spills and
reloads but don't follow branches, the code between both must be in
the same basic block and must not contain any branches as in the
following (bad) example.
@example
jit_int32_t reg = jit_get_reg (jit_class_gpr);
jit_ldxi (reg, JIT_V0, offset);
jump = jit_bnei (reg, V0);
jit_movr (JIT_V1, reg);
jit_patch (jump);
jit_unget_reg (reg);
@end example
@node Customizations
@chapter Customizations

View File

@ -1,4 +0,0 @@
@set UPDATED 3 October 2017
@set UPDATED-MONTH October 2017
@set EDITION 2.1.3
@set VERSION 2.1.3

1
deps/lightning/gnulib vendored Submodule

@ -0,0 +1 @@
Subproject commit e54b645fc6b8422562327443bda575c65d931fbd

2
deps/lightning/gnulib-lib/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
/Makefile.am
/dummy.c

View File

@ -1011,6 +1011,12 @@ extern void _jit_retr_d(jit_state_t*, jit_fpr_t);
extern void _jit_reti_d(jit_state_t*, jit_float64_t);
extern void _jit_retval_d(jit_state_t*, jit_fpr_t);
#define jit_get_reg(s) _jit_get_reg(_jit,s)
extern jit_int32_t _jit_get_reg(jit_state_t*, jit_int32_t);
#define jit_unget_reg(r) _jit_unget_reg(_jit,r)
extern void _jit_unget_reg(jit_state_t*, jit_int32_t);
#define jit_new_node(c) _jit_new_node(_jit,c)
extern jit_node_t *_jit_new_node(jit_state_t*, jit_code_t);
#define jit_new_node_w(c,u) _jit_new_node_w(_jit,c,u)

View File

@ -34,9 +34,9 @@
typedef enum {
#define jit_r(i) (_V0 + (i))
#if NEW_ABI
# define jit_r_num() 7
# define jit_r_num() 8
#else
# define jit_r_num() 11
# define jit_r_num() 12
#endif
#define jit_v(i) (_S0 + (i))
#define jit_v_num() 8
@ -55,6 +55,7 @@ typedef enum {
# define JIT_R4 _T6
# define JIT_R5 _T7
# define JIT_R6 _T8
# define JIT_R7 _T9
#else
# define JIT_R2 _T0
# define JIT_R3 _T1
@ -65,6 +66,7 @@ typedef enum {
# define JIT_R8 _T6
# define JIT_R9 _T7
# define JIT_R10 _T8
# define JIT_R11 _T9
#endif
_V0, _V1,
#if !NEW_ABI

View File

@ -672,14 +672,6 @@ _jit_regarg_set(jit_state_t*, jit_node_t*, jit_int32_t);
extern void
_jit_regarg_clr(jit_state_t*, jit_node_t*, jit_int32_t);
#define jit_get_reg(s) _jit_get_reg(_jit,s)
extern jit_int32_t
_jit_get_reg(jit_state_t*, jit_int32_t);
#define jit_unget_reg(r) _jit_unget_reg(_jit,r)
extern void
_jit_unget_reg(jit_state_t*, jit_int32_t);
#define jit_save(reg) _jit_save(_jit, reg)
extern void
_jit_save(jit_state_t*, jit_int32_t);

View File

@ -14,7 +14,8 @@
# License for more details.
#
AM_CFLAGS = -I$(top_srcdir)/include -D_GNU_SOURCE $(LIGHTNING_CFLAGS)
AM_CFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include \
-D_GNU_SOURCE $(LIGHTNING_CFLAGS)
liblightning_LTLIBRARIES = liblightning.la
liblightning_la_LDFLAGS = -version-info 1:0:0

View File

@ -50,7 +50,7 @@ static asymbol *disasm_synthetic;
static long disasm_num_symbols;
static long disasm_num_synthetic;
static jit_state_t *disasm_jit;
#define disasm_stream stdout
static FILE *disasm_stream;
#endif
/*
@ -73,6 +73,8 @@ jit_init_debug(const char *progname)
}
bfd_check_format(disasm_bfd, bfd_object);
bfd_check_format(disasm_bfd, bfd_archive);
if (!disasm_stream)
disasm_stream = stderr;
INIT_DISASSEMBLE_INFO(disasm_info, disasm_stream, fprintf);
# if defined(__i386__) || defined(__x86_64__)
disasm_info.arch = bfd_arch_i386;

View File

@ -107,6 +107,10 @@ typedef union {
# endif
# define can_sign_extend_short_p(im) ((im) >= -32678 && (im) <= 32767)
# define can_zero_extend_short_p(im) ((im) >= 0 && (im) <= 65535)
# define is_low_mask(im) (((im) & 1) ? (__builtin_popcountl((im) + 1) == 1) : 0)
# define is_high_mask(im) ((im) ? (__builtin_popcountl((im) + (1 << __builtin_ctzl(im))) == 0) : 0)
# define masked_bits_count(im) __builtin_popcountl(im)
# define unmasked_bits_count(im) (__WORDSIZE - masked_bits_count(im))
# if __WORDSIZE == 32
# define can_sign_extend_int_p(im) 1
# define can_zero_extend_int_p(im) 1
@ -340,8 +344,10 @@ static void _nop(jit_state_t*,jit_int32_t);
# define DSRLV(rd,rt,rs) rrr_t(rs,rt,rd,MIPS_DSRLV)
# define DSRL(rd,rt,sa) rrit(rt,rd,sa,MIPS_DSRL)
# define DSRL32(rd,rt,sa) rrit(rt,rd,sa,MIPS_DSRL32)
# define INS(rt,rs,pos,size) hrrrit(MIPS_SPECIAL3,rs,rt,pos,pos+size-1,MIPS_INS)
# define DINS(rt,rs,pos,size) hrrrit(MIPS_SPECIAL3,rs,rt,pos,pos+size-1,MIPS_DINS)
# define INS(rt,rs,pos,size) hrrrit(MIPS_SPECIAL3,rs,rt,pos+size-1,pos,MIPS_INS)
# define DINS(rt,rs,pos,size) hrrrit(MIPS_SPECIAL3,rs,rt,pos+size-1,pos,MIPS_DINS)
# define EXT(rt,rs,pos,size) hrrrit(MIPS_SPECIAL3,rs,rt,size-1,pos,MIPS_EXT)
# define DEXT(rt,rs,pos,size) hrrrit(MIPS_SPECIAL3,rs,rt,size-1,pos,MIPS_DEXT)
# define ROTR(rd,rt,sa) hrrrit(MIPS_SPECIAL,1,rt,rd,sa,MIPS_SRL)
# define DROTR(rd,rt,sa) hrrrit(MIPS_SPECIAL,1,rt,rd,sa,MIPS_DSRL)
# define MFHI(rd) rrr_t(_ZERO_REGNO,_ZERO_REGNO,rd,MIPS_MFHI)
@ -494,7 +500,8 @@ static void _ori(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
# define xorr(r0,r1,r2) XOR(r0,r1,r2)
# define xori(r0,r1,i0) _xori(_jit,r0,r1,i0)
static void _xori(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
# define movr(r0,r1) orr(r0,r1,_ZERO_REGNO)
# define movr(r0,r1) _movr(_jit,r0,r1)
static void _movr(jit_state_t*,jit_int32_t,jit_int32_t);
# define movi(r0,i0) _movi(_jit,r0,i0)
static void _movi(jit_state_t*,jit_int32_t,jit_word_t);
# define movi_p(r0,i0) _movi_p(_jit,r0,i0)
@ -1160,7 +1167,20 @@ _andi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
jit_int32_t reg;
if (can_zero_extend_short_p(i0))
ANDI(r0, r1, i0);
else {
else if (is_low_mask(i0)) {
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
if (masked_bits_count(i0) <= 32)
EXT(r0, r1, 0, masked_bits_count(i0));
else
#endif
{
lshi(r0, r1, unmasked_bits_count(i0));
rshi_u(r0, r0, unmasked_bits_count(i0));
}
} else if (is_high_mask(i0)) {
rshi(r0, r1, unmasked_bits_count(i0));
lshi(r0, r0, unmasked_bits_count(i0));
} else {
reg = jit_get_reg(jit_class_gpr);
movi(rn(reg), i0);
AND(r0, r1, rn(reg));
@ -1196,6 +1216,13 @@ _xori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
}
}
static void
_movr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
if (r0 != r1)
orr(r0, r1, _ZERO_REGNO);
}
static void
_movi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
{
@ -2869,10 +2896,11 @@ _bmci(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
static void
_callr(jit_state_t *_jit, jit_int32_t r0)
{
JALR(r0);
if (r0 != _T9_REGNO)
movr(_T9_REGNO, r0);
JALR(r0);
NOP(1);
else
NOP(1);
}
static void

View File

@ -1160,7 +1160,11 @@ _htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
ROTLWI(rn(reg), r1, 8);
RLWIMI(rn(reg), r1, 24, 0, 7);
RLWIMI(rn(reg), r1, 24, 16, 23);
# if __WORDSIZE == 64
CLRLDI(r0, rn(reg), 32);
# else
MR(r0,rn(reg));
# endif
jit_unget_reg(reg);
}

View File

@ -20,12 +20,12 @@
#include <lightning.h>
#include <lightning/jit_private.h>
#define print_chr(value) fputc(value, stdout)
#define print_hex(value) fprintf(stdout, "0x%lx", value)
#define print_dec(value) fprintf(stdout, "%ld", value)
#define print_flt(value) fprintf(stdout, "%g", value)
#define print_str(value) fprintf(stdout, "%s", value)
#define print_ptr(value) fprintf(stdout, "%p", value)
#define print_chr(value) fputc(value, print_stream)
#define print_hex(value) fprintf(print_stream, "0x%lx", value)
#define print_dec(value) fprintf(print_stream, "%ld", value)
#define print_flt(value) fprintf(print_stream, "%g", value)
#define print_str(value) fprintf(print_stream, "%s", value)
#define print_ptr(value) fprintf(print_stream, "%p", value)
#define print_reg(value) \
do { \
if ((value) & jit_regno_patch) \
@ -45,6 +45,11 @@
* Initialization
*/
#include "jit_names.c"
/*
* Initialization
*/
static FILE *print_stream;
/*
* Implementation
@ -54,6 +59,9 @@ _jit_print(jit_state_t *_jit)
{
jit_node_t *node;
if (!print_stream)
print_stream = stderr;
if ((node = _jitc->head)) {
jit_print_node(node);
for (node = node->next; node; node = node->next) {

View File

@ -661,10 +661,22 @@ static jit_word_t _bxsubi_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
static void _callr(jit_state_t*, jit_int32_t);
# define calli(i0) _calli(_jit, i0)
static jit_word_t _calli(jit_state_t*, jit_word_t);
# if __X64
# define calli_p(i0) _calli_p(_jit, i0)
static jit_word_t _calli_p(jit_state_t*, jit_word_t);
# else
# define calli_p(i0) calli(i0)
# endif
# define jmpr(r0) _jmpr(_jit, r0)
static void _jmpr(jit_state_t*, jit_int32_t);
# define jmpi(i0) _jmpi(_jit, i0)
static jit_word_t _jmpi(jit_state_t*, jit_word_t);
# if __X64
# define jmpi_p(i0) _jmpi_p(_jit, i0)
static jit_word_t _jmpi_p(jit_state_t*, jit_word_t);
# else
# define jmpi_p(i0) jmpi(i0)
# endif
# define jmpsi(i0) _jmpsi(_jit, i0)
static void _jmpsi(jit_state_t*, jit_uint8_t);
# define prolog(node) _prolog(_jit, node)
@ -3411,27 +3423,41 @@ static jit_word_t
_calli(jit_state_t *_jit, jit_word_t i0)
{
jit_word_t word;
#if __X64
jit_int32_t reg;
reg = jit_get_reg(jit_class_gpr);
word = movi_p(rn(reg), i0);
callr(rn(reg));
jit_unget_reg(reg);
#else
jit_word_t w;
ic(0xe8);
w = i0 - (_jit->pc.w + 4);
ii(w);
word = _jit->pc.w;
#if __X64
w = i0 - (_jit->pc.w + 5);
if ((jit_int32_t)w == w) {
#endif
ic(0xe8);
w = i0 - (_jit->pc.w + 4);
ii(w);
word = _jit->pc.w;
#if __X64
}
else
word = calli_p(i0);
#endif
return (word);
}
#if __X64
static jit_word_t
_calli_p(jit_state_t *_jit, jit_word_t i0)
{
jit_word_t word;
jit_int32_t reg;
reg = jit_get_reg(jit_class_gpr);
word = movi_p(rn(reg), i0);
callr(rn(reg));
jit_unget_reg(reg);
return (word);
}
#endif
static void
_jmpr(jit_state_t *_jit, jit_int32_t r0)
{
rex(0, WIDE, _NOREG, _NOREG, r0);
rex(0, 0, _NOREG, _NOREG, r0);
ic(0xff);
mrm(0x03, 0x04, r7(r0));
}
@ -3439,13 +3465,38 @@ _jmpr(jit_state_t *_jit, jit_int32_t r0)
static jit_word_t
_jmpi(jit_state_t *_jit, jit_word_t i0)
{
jit_word_t word;
jit_word_t w;
ic(0xe9);
w = i0 - (_jit->pc.w + 4);
ii(w);
return (_jit->pc.w);
#if __X64
w = i0 - (_jit->pc.w + 5);
if ((jit_int32_t)w == w) {
#endif
ic(0xe9);
w = i0 - (_jit->pc.w + 4);
ii(w);
word = _jit->pc.w;
#if __X64
}
else
word = jmpi_p(i0);
#endif
return (word);
}
#if __X64
static jit_word_t
_jmpi_p(jit_state_t *_jit, jit_word_t i0)
{
jit_word_t word;
jit_int32_t reg;
reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
word = movi_p(rn(reg), i0);
jmpr(rn(reg));
jit_unget_reg(reg);
return (word);
}
#endif
static void
_jmpsi(jit_state_t *_jit, jit_uint8_t i0)
{
@ -3830,6 +3881,7 @@ _patch_at(jit_state_t *_jit, jit_node_t *node,
switch (node->code) {
# if __X64
case jit_code_calli:
case jit_code_jmpi:
# endif
case jit_code_movi:
patch_abs(instr, label);

View File

@ -2012,7 +2012,7 @@ _emit_code(jit_state_t *_jit)
if (temp->flag & jit_flag_patch)
jmpi(temp->u.w);
else {
word = jmpi(_jit->pc.w);
word = jmpi_p(_jit->pc.w);
patch(word, node);
}
}
@ -2027,9 +2027,12 @@ _emit_code(jit_state_t *_jit)
temp = node->u.n;
assert(temp->code == jit_code_label ||
temp->code == jit_code_epilog);
word = calli(temp->u.w);
if (!(temp->flag & jit_flag_patch))
if (temp->flag & jit_flag_patch)
calli(temp->u.w);
else {
word = calli_p(_jit->pc.w);
patch(word, node);
}
}
else
calli(node->u.w);

10
deps/lightning/m4/.gitignore vendored Normal file
View File

@ -0,0 +1,10 @@
/lt~obsolete.m4
/ltversion.m4
/ltsugar.m4
/ltoptions.m4
/libtool.m4
/00gnulib.m4
/gnulib-common.m4
/gnulib-comp.m4
/gnulib-tool.m4
/zzgnulib.m4

57
deps/lightning/m4/gnulib-cache.m4 vendored Normal file
View File

@ -0,0 +1,57 @@
# Copyright (C) 2002-2021 Free Software Foundation, Inc.
#
# This file is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This file is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this file. If not, see <https://www.gnu.org/licenses/>.
#
# As a special exception to the GNU General Public License,
# this file may be distributed as part of a program that
# contains a configuration script generated by Autoconf, under
# the same distribution terms as the rest of that program.
#
# Generated by gnulib-tool.
#
# This file represents the specification of how gnulib-tool is used.
# It acts as a cache: It is written and read by gnulib-tool.
# In projects that use version control, this file is meant to be put under
# version control, like the configure.ac and various Makefile.am files.
# Specification in the form of a command-line invocation:
# gnulib-tool --import --local-dir=gl \
# --lib=libgnu \
# --source-base=gnulib-lib \
# --m4-base=m4 \
# --doc-base=gnulib-doc \
# --tests-base=tests \
# --aux-dir=build-aux \
# --no-conditional-dependencies \
# --libtool \
# --macro-prefix=gl
# Specification in the form of a few gnulib-tool.m4 macro invocations:
gl_LOCAL_DIR([gl])
gl_MODULES([
])
gl_AVOID([])
gl_SOURCE_BASE([gnulib-lib])
gl_M4_BASE([m4])
gl_PO_BASE([])
gl_DOC_BASE([gnulib-doc])
gl_TESTS_BASE([tests])
gl_LIB([libgnu])
gl_MAKEFILE_NAME([])
gl_LIBTOOL
gl_MACRO_PREFIX([gl])
gl_PO_DOMAIN([])
gl_WITNESS_C_MACRO([])

View File

@ -6,7 +6,7 @@
[subrepo]
remote = https://github.com/pcercuei/lightrec.git
branch = master
commit = 16a375e4532106128a8d486276963801a0e03189
parent = 9f797430963d9cf0fcef7d963466f9cac7026de2
commit = aa2f992ed8c3236d1d952d72e3de8ea2b8d11af0
parent = bd765e2bf9b0f1e5bd788ebd867c9b1830ece001
method = merge
cmdver = 0.4.3

View File

@ -1,5 +1,5 @@
cmake_minimum_required(VERSION 3.0)
project(lightrec LANGUAGES C VERSION 0.3)
project(lightrec LANGUAGES C VERSION 0.4)
set(BUILD_SHARED_LIBS ON CACHE BOOL "Build shared libraries")
if (NOT BUILD_SHARED_LIBS)
@ -29,7 +29,6 @@ set(HAS_DEFAULT_ELM ${CMAKE_COMPILER_IS_GNUCC})
list(APPEND LIGHTREC_SOURCES
blockcache.c
disassembler.c
emitter.c
interpreter.c
lightrec.c
@ -62,6 +61,16 @@ if (ENABLE_THREADED_COMPILER)
endif (NOT ENABLE_FIRST_PASS)
endif (ENABLE_THREADED_COMPILER)
option(OPT_REMOVE_DIV_BY_ZERO_SEQ "(optimization) Remove div-by-zero check sequence" ON)
option(OPT_REPLACE_MEMSET "(optimization) Detect and replace memset with host variant" ON)
option(OPT_DETECT_IMPOSSIBLE_BRANCHES "(optimization) Detect impossible branches" ON)
option(OPT_TRANSFORM_OPS "(optimization) Transform opcodes" ON)
option(OPT_LOCAL_BRANCHES "(optimization) Detect local branches" ON)
option(OPT_SWITCH_DELAY_SLOTS "(optimization) Switch delay slots" ON)
option(OPT_FLAG_STORES "(optimization) Flag stores that don't require invalidation" ON)
option(OPT_FLAG_MULT_DIV "(optimization) Flag MULT/DIV that only use one of HI/LO" ON)
option(OPT_EARLY_UNLOAD "(optimization) Unload registers early" ON)
include_directories(${CMAKE_CURRENT_BINARY_DIR})
add_library(${PROJECT_NAME} ${LIGHTREC_SOURCES} ${LIGHTREC_HEADERS})
@ -74,6 +83,13 @@ set_target_properties(${PROJECT_NAME} PROPERTIES
C_EXTENSIONS OFF
)
if (CMAKE_C_COMPILER_ID MATCHES "GNU|Clang")
target_compile_options(${PROJECT_NAME} PRIVATE -Wall -Wno-parentheses)
endif()
if (CMAKE_C_COMPILER_ID STREQUAL "Clang")
target_compile_options(${PROJECT_NAME} PRIVATE -Wno-initializer-overrides)
endif()
option(ENABLE_TINYMM "Enable optional libtinymm dependency" OFF)
if (ENABLE_TINYMM)
find_library(TINYMM_LIBRARIES tinymm REQUIRED)
@ -98,16 +114,8 @@ include_directories(${LIBLIGHTNING_INCLUDE_DIR})
target_link_libraries(${PROJECT_NAME} PRIVATE ${LIBLIGHTNING})
if (LOG_LEVEL STREQUAL Debug)
find_library(LIBOPCODES NAMES opcodes-multiarch opcodes)
find_path(LIBOPCODES_INCLUDE_DIR dis-asm.h)
if (NOT LIBOPCODES OR NOT LIBOPCODES_INCLUDE_DIR)
message(SEND_ERROR "Debug log level requires libopcodes (from binutils) to be installed.")
endif ()
set(ENABLE_DISASSEMBLER ON)
include_directories(${LIBOPCODES_INCLUDE_DIR})
target_link_libraries(${PROJECT_NAME} PRIVATE ${LIBOPCODES})
target_sources(${PROJECT_NAME} PRIVATE disassembler.c)
endif()
configure_file(config.h.cmakein config.h @ONLY)

View File

@ -1,15 +1,6 @@
// SPDX-License-Identifier: LGPL-2.1-or-later
/*
* Copyright (C) 2015-2020 Paul Cercueil <paul@crapouillou.net>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
* Copyright (C) 2015-2021 Paul Cercueil <paul@crapouillou.net>
*/
#include "blockcache.h"
@ -19,6 +10,7 @@
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
/* Must be power of two */
#define LUT_SIZE 0x4000
@ -28,6 +20,11 @@ struct blockcache {
struct block * lut[LUT_SIZE];
};
u16 lightrec_get_lut_entry(const struct block *block)
{
return (kunseg(block->pc) >> 2) & (LUT_SIZE - 1);
}
struct block * lightrec_find_block(struct blockcache *cache, u32 pc)
{
struct block *block;
@ -42,22 +39,33 @@ struct block * lightrec_find_block(struct blockcache *cache, u32 pc)
return NULL;
}
struct block * lightrec_find_block_from_lut(struct blockcache *cache,
u16 lut_entry, u32 addr_in_block)
{
struct block *block;
u32 pc;
addr_in_block = kunseg(addr_in_block);
for (block = cache->lut[lut_entry]; block; block = block->next) {
pc = kunseg(block->pc);
if (addr_in_block >= pc &&
addr_in_block < pc + (block->nb_ops << 2))
return block;
}
return NULL;
}
void remove_from_code_lut(struct blockcache *cache, struct block *block)
{
struct lightrec_state *state = block->state;
const struct opcode *op;
u32 offset = lut_offset(block->pc);
/* Use state->get_next_block in the code LUT, which basically
* calls back get_next_block_func(), until the compiler
* overrides this. This is required, as a NULL value in the code
* LUT means an outdated block. */
state->code_lut[offset] = state->get_next_block;
for (op = block->opcode_list; op; op = op->next)
if (op->c.i.op == OP_META_SYNC)
state->code_lut[offset + op->offset] = NULL;
if (block->function) {
memset(&state->code_lut[offset], 0,
block->nb_ops * sizeof(*state->code_lut));
}
}
void lightrec_register_block(struct blockcache *cache, struct block *block)

View File

@ -1,15 +1,6 @@
/* SPDX-License-Identifier: LGPL-2.1-or-later */
/*
* Copyright (C) 2014-2020 Paul Cercueil <paul@crapouillou.net>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
* Copyright (C) 2014-2021 Paul Cercueil <paul@crapouillou.net>
*/
#ifndef __BLOCKCACHE_H__
@ -20,6 +11,10 @@
struct blockcache;
struct block * lightrec_find_block(struct blockcache *cache, u32 pc);
struct block * lightrec_find_block_from_lut(struct blockcache *cache,
u16 lut_entry, u32 addr_in_block);
u16 lightrec_get_lut_entry(const struct block *block);
void lightrec_register_block(struct blockcache *cache, struct block *block);
void lightrec_unregister_block(struct blockcache *cache, struct block *block);

View File

@ -1,22 +0,0 @@
/*
* Copyright (C) 2019 Paul Cercueil <paul@crapouillou.net>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*/
#ifndef __LIGHTREC_CONFIG_H__
#define __LIGHTREC_CONFIG_H__
#define ENABLE_DISASSEMBLER 0
#define ENABLE_FIRST_PASS 1
#endif /* __LIGHTREC_CONFIG_H__ */

View File

@ -1,15 +1,6 @@
/* SPDX-License-Identifier: LGPL-2.1-or-later */
/*
* Copyright (C) 2019 Paul Cercueil <paul@crapouillou.net>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
* Copyright (C) 2019-2021 Paul Cercueil <paul@crapouillou.net>
*/
#ifndef __LIGHTREC_CONFIG_H__
@ -22,5 +13,15 @@
#cmakedefine01 HAS_DEFAULT_ELM
#cmakedefine01 OPT_REMOVE_DIV_BY_ZERO_SEQ
#cmakedefine01 OPT_REPLACE_MEMSET
#cmakedefine01 OPT_DETECT_IMPOSSIBLE_BRANCHES
#cmakedefine01 OPT_TRANSFORM_OPS
#cmakedefine01 OPT_LOCAL_BRANCHES
#cmakedefine01 OPT_SWITCH_DELAY_SLOTS
#cmakedefine01 OPT_FLAG_STORES
#cmakedefine01 OPT_FLAG_MULT_DIV
#cmakedefine01 OPT_EARLY_UNLOAD
#endif /* __LIGHTREC_CONFIG_H__ */

13
deps/lightrec/debug.h vendored
View File

@ -1,15 +1,6 @@
/* SPDX-License-Identifier: LGPL-2.1-or-later */
/*
* Copyright (C) 2014-2020 Paul Cercueil <paul@crapouillou.net>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
* Copyright (C) 2014-2021 Paul Cercueil <paul@crapouillou.net>
*/
#ifndef DEBUG_H

View File

@ -1,142 +1,391 @@
// SPDX-License-Identifier: LGPL-2.1-or-later
/*
* Copyright (C) 2014-2020 Paul Cercueil <paul@crapouillou.net>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
* Copyright (C) 2014-2021 Paul Cercueil <paul@crapouillou.net>
*/
#include "config.h"
#if ENABLE_DISASSEMBLER
#include <dis-asm.h>
#endif
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "debug.h"
#include "disassembler.h"
#include "lightrec-private.h"
#include "memmanager.h"
#include "regcache.h"
static bool is_unconditional_jump(const struct opcode *op)
static const char *std_opcodes[] = {
[OP_J] = "j ",
[OP_JAL] = "jal ",
[OP_BEQ] = "beq ",
[OP_BNE] = "bne ",
[OP_BLEZ] = "blez ",
[OP_ADDI] = "addi ",
[OP_ADDIU] = "addiu ",
[OP_SLTI] = "slti ",
[OP_SLTIU] = "sltiu ",
[OP_ANDI] = "andi ",
[OP_ORI] = "ori ",
[OP_XORI] = "xori ",
[OP_LUI] = "lui ",
[OP_LB] = "lb ",
[OP_LH] = "lh ",
[OP_LWL] = "lwl ",
[OP_LW] = "lw ",
[OP_LBU] = "lbu ",
[OP_LHU] = "lhu ",
[OP_LWR] = "lwr ",
[OP_SB] = "sb ",
[OP_SH] = "sh ",
[OP_SWL] = "swl ",
[OP_SW] = "sw ",
[OP_SWR] = "swr ",
[OP_LWC2] = "lwc2 ",
[OP_SWC2] = "swc2 ",
[OP_META_BEQZ] = "beqz ",
[OP_META_BNEZ] = "bnez ",
};
static const char *special_opcodes[] = {
[OP_SPECIAL_SLL] = "sll ",
[OP_SPECIAL_SRL] = "srl ",
[OP_SPECIAL_SRA] = "sra ",
[OP_SPECIAL_SLLV] = "sllv ",
[OP_SPECIAL_SRLV] = "srlv ",
[OP_SPECIAL_SRAV] = "srav ",
[OP_SPECIAL_JR] = "jr ",
[OP_SPECIAL_JALR] = "jalr ",
[OP_SPECIAL_SYSCALL] = "syscall ",
[OP_SPECIAL_BREAK] = "break ",
[OP_SPECIAL_MFHI] = "mfhi ",
[OP_SPECIAL_MTHI] = "mthi ",
[OP_SPECIAL_MFLO] = "mflo ",
[OP_SPECIAL_MTLO] = "mtlo ",
[OP_SPECIAL_MULT] = "mult ",
[OP_SPECIAL_MULTU] = "multu ",
[OP_SPECIAL_DIV] = "div ",
[OP_SPECIAL_DIVU] = "divu ",
[OP_SPECIAL_ADD] = "add ",
[OP_SPECIAL_ADDU] = "addu ",
[OP_SPECIAL_SUB] = "sub ",
[OP_SPECIAL_SUBU] = "subu ",
[OP_SPECIAL_AND] = "and ",
[OP_SPECIAL_OR] = "or ",
[OP_SPECIAL_XOR] = "xor ",
[OP_SPECIAL_NOR] = "nor ",
[OP_SPECIAL_SLT] = "slt ",
[OP_SPECIAL_SLTU] = "sltu ",
};
static const char *regimm_opcodes[] = {
[OP_REGIMM_BLTZ] = "bltz ",
[OP_REGIMM_BGEZ] = "bgez ",
[OP_REGIMM_BLTZAL] = "bltzal ",
[OP_REGIMM_BGEZAL] = "bgezal ",
};
static const char *cp0_opcodes[] = {
[OP_CP0_MFC0] = "mfc0 ",
[OP_CP0_CFC0] = "cfc0 ",
[OP_CP0_MTC0] = "mtc0 ",
[OP_CP0_CTC0] = "ctc0 ",
[OP_CP0_RFE] = "rfe",
};
static const char *cp2_opcodes[] = {
[OP_CP2_BASIC_MFC2] = "mfc2 ",
[OP_CP2_BASIC_CFC2] = "cfc2 ",
[OP_CP2_BASIC_MTC2] = "mtc2 ",
[OP_CP2_BASIC_CTC2] = "ctc2 ",
};
static const char *opcode_flags[] = {
"switched branch/DS",
"unload Rs",
"unload Rt",
"unload Rd",
"sync point",
};
static const char *opcode_io_flags[] = {
"memory I/O",
"hardware I/O",
"self-modifying code",
"no invalidation",
};
static const char *opcode_branch_flags[] = {
"emulate branch",
"local branch",
};
static const char *opcode_multdiv_flags[] = {
"No LO",
"No HI",
"No div check",
};
static int print_flags(char *buf, size_t len, u16 flags,
const char **array, size_t array_size)
{
switch (op->i.op) {
const char *flag_name;
unsigned int i;
size_t count = 0, bytes;
bool first = true;
for (i = 0; i < array_size + ARRAY_SIZE(opcode_flags); i++) {
if (!(flags & BIT(i)))
continue;
if (i < ARRAY_SIZE(opcode_flags))
flag_name = opcode_flags[i];
else
flag_name = array[i - ARRAY_SIZE(opcode_flags)];
if (first)
bytes = snprintf(buf, len, "(%s", flag_name);
else
bytes = snprintf(buf, len, ", %s", flag_name);
first = false;
buf += bytes;
len -= bytes;
count += bytes;
}
if (!first)
count += snprintf(buf, len, ")");
else
*buf = '\0';
return count;
}
static int print_op_special(union code c, char *buf, size_t len,
const char ***flags_ptr, size_t *nb_flags)
{
switch (c.r.op) {
case OP_SPECIAL_SLL:
case OP_SPECIAL_SRL:
case OP_SPECIAL_SRA:
return snprintf(buf, len, "%s%s,%s,%u",
special_opcodes[c.r.op],
lightrec_reg_name(c.r.rd),
lightrec_reg_name(c.r.rt),
c.r.imm);
case OP_SPECIAL_SLLV:
case OP_SPECIAL_SRLV:
case OP_SPECIAL_SRAV:
case OP_SPECIAL_ADD:
case OP_SPECIAL_ADDU:
case OP_SPECIAL_SUB:
case OP_SPECIAL_SUBU:
case OP_SPECIAL_AND:
case OP_SPECIAL_OR:
case OP_SPECIAL_XOR:
case OP_SPECIAL_NOR:
case OP_SPECIAL_SLT:
case OP_SPECIAL_SLTU:
return snprintf(buf, len, "%s%s,%s,%s",
special_opcodes[c.r.op],
lightrec_reg_name(c.r.rd),
lightrec_reg_name(c.r.rt),
lightrec_reg_name(c.r.rs));
case OP_SPECIAL_JR:
case OP_SPECIAL_MTHI:
case OP_SPECIAL_MTLO:
return snprintf(buf, len, "%s%s",
special_opcodes[c.r.op],
lightrec_reg_name(c.r.rs));
case OP_SPECIAL_JALR:
return snprintf(buf, len, "%s%s,%s",
special_opcodes[c.r.op],
lightrec_reg_name(c.r.rd),
lightrec_reg_name(c.r.rt));
case OP_SPECIAL_SYSCALL:
case OP_SPECIAL_BREAK:
return snprintf(buf, len, "%s", special_opcodes[c.r.op]);
case OP_SPECIAL_MFHI:
case OP_SPECIAL_MFLO:
return snprintf(buf, len, "%s%s",
special_opcodes[c.r.op],
lightrec_reg_name(c.r.rd));
case OP_SPECIAL_MULT:
case OP_SPECIAL_MULTU:
case OP_SPECIAL_DIV:
case OP_SPECIAL_DIVU:
*flags_ptr = opcode_multdiv_flags;
*nb_flags = ARRAY_SIZE(opcode_multdiv_flags);
return snprintf(buf, len, "%s%s,%s,%s,%s",
special_opcodes[c.r.op],
lightrec_reg_name(get_mult_div_hi(c)),
lightrec_reg_name(get_mult_div_lo(c)),
lightrec_reg_name(c.r.rs),
lightrec_reg_name(c.r.rt));
default:
return snprintf(buf, len, "unknown (0x%08x)", c.opcode);
}
}
static int print_op_cp(union code c, char *buf, size_t len, unsigned int cp)
{
if (cp == 2) {
switch (c.i.rs) {
case OP_CP0_MFC0:
case OP_CP0_CFC0:
case OP_CP0_MTC0:
case OP_CP0_CTC0:
return snprintf(buf, len, "%s%s,%u",
cp2_opcodes[c.i.rs],
lightrec_reg_name(c.i.rt),
c.r.rd);
default:
return snprintf(buf, len, "cp2 (0x%08x)", c.opcode);
}
} else {
switch (c.i.rs) {
case OP_CP0_MFC0:
case OP_CP0_CFC0:
case OP_CP0_MTC0:
case OP_CP0_CTC0:
return snprintf(buf, len, "%s%s,%u",
cp0_opcodes[c.i.rs],
lightrec_reg_name(c.i.rt),
c.r.rd);
case OP_CP0_RFE:
return snprintf(buf, len, "rfe ");
default:
return snprintf(buf, len, "unknown (0x%08x)", c.opcode);
}
}
}
static int print_op(union code c, u32 pc, char *buf, size_t len,
const char ***flags_ptr, size_t *nb_flags)
{
if (c.opcode == 0)
return snprintf(buf, len, "nop ");
switch (c.i.op) {
case OP_SPECIAL:
return op->r.op == OP_SPECIAL_JR || op->r.op == OP_SPECIAL_JALR;
return print_op_special(c, buf, len, flags_ptr, nb_flags);
case OP_REGIMM:
*flags_ptr = opcode_branch_flags;
*nb_flags = ARRAY_SIZE(opcode_branch_flags);
return snprintf(buf, len, "%s%s,0x%x",
regimm_opcodes[c.i.rt],
lightrec_reg_name(c.i.rs),
pc + 4 + ((s16)c.i.imm << 2));
case OP_J:
case OP_JAL:
return true;
return snprintf(buf, len, "%s0x%x",
std_opcodes[c.i.op],
(pc & 0xf0000000) | (c.j.imm << 2));
case OP_BEQ:
case OP_BNE:
case OP_BLEZ:
return op->i.rs == op->i.rt;
case OP_REGIMM:
return (op->r.rt == OP_REGIMM_BGEZ ||
op->r.rt == OP_REGIMM_BGEZAL) && op->i.rs == 0;
case OP_BGTZ:
*flags_ptr = opcode_branch_flags;
*nb_flags = ARRAY_SIZE(opcode_branch_flags);
return snprintf(buf, len, "%s%s,%s,0x%x",
std_opcodes[c.i.op],
lightrec_reg_name(c.i.rs),
lightrec_reg_name(c.i.rt),
pc + 4 + ((s16)c.i.imm << 2));
case OP_ADDI:
case OP_ADDIU:
case OP_SLTI:
case OP_SLTIU:
case OP_ANDI:
case OP_ORI:
case OP_XORI:
return snprintf(buf, len, "%s%s,%s,0x%04hx",
std_opcodes[c.i.op],
lightrec_reg_name(c.i.rt),
lightrec_reg_name(c.i.rs),
(u16)c.i.imm);
case OP_LUI:
return snprintf(buf, len, "%s%s,0x%04hx",
std_opcodes[c.i.op],
lightrec_reg_name(c.i.rt),
(u16)c.i.imm);
case OP_CP0:
return print_op_cp(c, buf, len, 0);
case OP_CP2:
return print_op_cp(c, buf, len, 2);
case OP_LB:
case OP_LH:
case OP_LWL:
case OP_LW:
case OP_LBU:
case OP_LHU:
case OP_LWR:
case OP_SB:
case OP_SH:
case OP_SWL:
case OP_SW:
case OP_SWR:
*flags_ptr = opcode_io_flags;
*nb_flags = ARRAY_SIZE(opcode_io_flags);
return snprintf(buf, len, "%s%s,%hd(%s)",
std_opcodes[c.i.op],
lightrec_reg_name(c.i.rt),
(s16)c.i.imm,
lightrec_reg_name(c.i.rs));
case OP_LWC2:
case OP_SWC2:
*flags_ptr = opcode_io_flags;
*nb_flags = ARRAY_SIZE(opcode_io_flags);
return snprintf(buf, len, "%s%s,%hd(%s)",
std_opcodes[c.i.op],
lightrec_reg_name(c.i.rt),
(s16)c.i.imm,
lightrec_reg_name(c.i.rs));
case OP_META_BEQZ:
case OP_META_BNEZ:
*flags_ptr = opcode_branch_flags;
*nb_flags = ARRAY_SIZE(opcode_branch_flags);
return snprintf(buf, len, "%s%s,0x%x",
std_opcodes[c.i.op],
lightrec_reg_name(c.i.rs),
pc + 4 + ((s16)c.i.imm << 2));
case OP_META_MOV:
return snprintf(buf, len, "move %s,%s",
lightrec_reg_name(c.r.rd),
lightrec_reg_name(c.r.rs));
default:
return false;
return snprintf(buf, len, "unknown (0x%08x)", c.opcode);
}
}
static bool is_syscall(const struct opcode *op)
void lightrec_print_disassembly(const struct block *block, const u32 *code)
{
return (op->i.op == OP_SPECIAL && op->r.op == OP_SPECIAL_SYSCALL) ||
(op->i.op == OP_CP0 && (op->r.rs == OP_CP0_MTC0 ||
op->r.rs == OP_CP0_CTC0) &&
(op->r.rd == 12 || op->r.rd == 13));
}
void lightrec_free_opcode_list(struct lightrec_state *state, struct opcode *list)
{
struct opcode *next;
while (list) {
next = list->next;
lightrec_free(state, MEM_FOR_IR, sizeof(*list), list);
list = next;
}
}
struct opcode * lightrec_disassemble(struct lightrec_state *state,
const u32 *src, unsigned int *len)
{
struct opcode *head = NULL;
bool stop_next = false;
struct opcode *curr, *last;
const struct opcode *op;
const char **flags_ptr;
size_t nb_flags, count, count2;
char buf[256], buf2[256], buf3[256];
unsigned int i;
u32 pc, branch_pc;
for (i = 0, last = NULL; ; i++, last = curr) {
curr = lightrec_calloc(state, MEM_FOR_IR, sizeof(*curr));
if (!curr) {
pr_err("Unable to allocate memory\n");
lightrec_free_opcode_list(state, head);
return NULL;
for (i = 0; i < block->nb_ops; i++) {
op = &block->opcode_list[i];
branch_pc = get_branch_pc(block, i, 0);
pc = block->pc + (i << 2);
count = print_op((union code)code[i], pc, buf, sizeof(buf),
&flags_ptr, &nb_flags);
flags_ptr = NULL;
nb_flags = 0;
count2 = print_op(op->c, branch_pc, buf2, sizeof(buf2),
&flags_ptr, &nb_flags);
if (code[i] == op->c.opcode) {
*buf2 = '\0';
count2 = 0;
}
if (!last)
head = curr;
else
last->next = curr;
print_flags(buf3, sizeof(buf3), op->flags, flags_ptr, nb_flags);
/* TODO: Take care of endianness */
curr->opcode = LE32TOH(*src++);
curr->offset = i;
/* NOTE: The block disassembly ends after the opcode that
* follows an unconditional jump (delay slot) */
if (stop_next || is_syscall(curr))
break;
else if (is_unconditional_jump(curr))
stop_next = true;
}
if (len)
*len = (i + 1) * sizeof(u32);
return head;
}
unsigned int lightrec_cycles_of_opcode(union code code)
{
switch (code.i.op) {
case OP_META_REG_UNLOAD:
case OP_META_SYNC:
return 0;
default:
return 2;
printf("0x%08x (0x%x)\t%s%*c%s%*c%s\n", pc, i << 2,
buf, 30 - (int)count, ' ', buf2, 30 - (int)count2, ' ', buf3);
}
}
#if ENABLE_DISASSEMBLER
void lightrec_print_disassembly(const struct block *block,
const u32 *code, unsigned int length)
{
struct disassemble_info info;
unsigned int i;
memset(&info, 0, sizeof(info));
init_disassemble_info(&info, stdout, (fprintf_ftype) fprintf);
info.buffer = (bfd_byte *) code;
info.buffer_vma = (bfd_vma)(uintptr_t) code;
info.buffer_length = length;
info.flavour = bfd_target_unknown_flavour;
info.arch = bfd_arch_mips;
info.mach = bfd_mach_mips3000;
disassemble_init_for_target(&info);
for (i = 0; i < length; i += 4) {
void print_insn_little_mips(bfd_vma, struct disassemble_info *);
putc('\t', stdout);
print_insn_little_mips((bfd_vma)(uintptr_t) code++, &info);
putc('\n', stdout);
}
}
#endif

View File

@ -1,15 +1,6 @@
/* SPDX-License-Identifier: LGPL-2.1-or-later */
/*
* Copyright (C) 2014-2020 Paul Cercueil <paul@crapouillou.net>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
* Copyright (C) 2014-2021 Paul Cercueil <paul@crapouillou.net>
*/
#ifndef __DISASSEMBLER_H__
@ -22,24 +13,29 @@
#define __packed __attribute__((packed))
#endif
#define BIT(x) (1 << (x))
#define BIT(x) (1ULL << (x))
/* Flags for all opcodes */
#define LIGHTREC_NO_DS BIT(0)
#define LIGHTREC_UNLOAD_RS BIT(1)
#define LIGHTREC_UNLOAD_RT BIT(2)
#define LIGHTREC_UNLOAD_RD BIT(3)
#define LIGHTREC_SYNC BIT(4)
/* Flags for load/store opcodes */
#define LIGHTREC_DIRECT_IO BIT(1)
#define LIGHTREC_HW_IO BIT(2)
#define LIGHTREC_SMC BIT(3)
#define LIGHTREC_NO_INVALIDATE BIT(4)
#define LIGHTREC_DIRECT_IO BIT(5)
#define LIGHTREC_HW_IO BIT(6)
#define LIGHTREC_SMC BIT(7)
#define LIGHTREC_NO_INVALIDATE BIT(8)
/* Flags for branches */
#define LIGHTREC_EMULATE_BRANCH BIT(1)
#define LIGHTREC_LOCAL_BRANCH BIT(2)
#define LIGHTREC_EMULATE_BRANCH BIT(5)
#define LIGHTREC_LOCAL_BRANCH BIT(6)
/* Flags for div/mult opcodes */
#define LIGHTREC_NO_HI BIT(1)
#define LIGHTREC_NO_DIV_CHECK BIT(2)
#define LIGHTREC_NO_LO BIT(5)
#define LIGHTREC_NO_HI BIT(6)
#define LIGHTREC_NO_DIV_CHECK BIT(7)
struct block;
@ -77,13 +73,10 @@ enum standard_opcodes {
OP_LWC2 = 0x32,
OP_SWC2 = 0x3a,
OP_META_REG_UNLOAD = 0x11,
OP_META_BEQZ = 0x14,
OP_META_BNEZ = 0x15,
OP_META_MOV = 0x16,
OP_META_SYNC = 0x17,
};
enum special_opcodes {
@ -205,18 +198,8 @@ struct opcode {
struct opcode_j j;
};
u16 flags;
u16 offset;
struct opcode *next;
};
struct opcode * lightrec_disassemble(struct lightrec_state *state,
const u32 *src, unsigned int *len);
void lightrec_free_opcode_list(struct lightrec_state *state,
struct opcode *list);
unsigned int lightrec_cycles_of_opcode(union code code);
void lightrec_print_disassembly(const struct block *block,
const u32 *code, unsigned int length);
void lightrec_print_disassembly(const struct block *block, const u32 *code);
#endif /* __DISASSEMBLER_H__ */

1454
deps/lightrec/emitter.c vendored

File diff suppressed because it is too large Load Diff

View File

@ -1,15 +1,6 @@
/* SPDX-License-Identifier: LGPL-2.1-or-later */
/*
* Copyright (C) 2014-2020 Paul Cercueil <paul@crapouillou.net>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
* Copyright (C) 2014-2021 Paul Cercueil <paul@crapouillou.net>
*/
#ifndef __EMITTER_H__
@ -20,9 +11,7 @@
struct block;
struct opcode;
void lightrec_rec_opcode(const struct block *block,
const struct opcode *op, u32 pc);
void lightrec_emit_eob(const struct block *block,
const struct opcode *op, u32 pc);
void lightrec_rec_opcode(const struct block *block, u16 offset);
void lightrec_emit_eob(const struct block *block, u16 offset);
#endif /* __EMITTER_H__ */

View File

@ -1,15 +1,6 @@
// SPDX-License-Identifier: LGPL-2.1-or-later
/*
* Copyright (C) 2019-2020 Paul Cercueil <paul@crapouillou.net>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
* Copyright (C) 2019-2021 Paul Cercueil <paul@crapouillou.net>
*/
#include "disassembler.h"
@ -39,18 +30,45 @@ struct interpreter {
struct opcode *op;
u32 cycles;
bool delay_slot;
u16 offset;
};
static u32 int_get_branch_pc(const struct interpreter *inter)
{
return get_branch_pc(inter->block, inter->offset, 0);
}
static inline u32 int_get_ds_pc(const struct interpreter *inter, s16 imm)
{
return get_ds_pc(inter->block, inter->offset, imm);
}
static inline struct opcode *next_op(const struct interpreter *inter)
{
return &inter->block->opcode_list[inter->offset + 1];
}
static inline u32 execute(lightrec_int_func_t func, struct interpreter *inter)
{
return (*func)(inter);
}
static inline u32 lightrec_int_op(struct interpreter *inter)
{
return execute(int_standard[inter->op->i.op], inter);
}
static inline u32 jump_skip(struct interpreter *inter)
{
inter->op = inter->op->next;
inter->op = next_op(inter);
inter->offset++;
return execute(int_standard[inter->op->i.op], inter);
if (inter->op->flags & LIGHTREC_SYNC) {
inter->state->current_cycle += inter->cycles;
inter->cycles = 0;
}
return lightrec_int_op(inter);
}
static inline u32 jump_next(struct interpreter *inter)
@ -70,16 +88,12 @@ static inline u32 jump_after_branch(struct interpreter *inter)
if (unlikely(inter->delay_slot))
return 0;
inter->op = inter->op->next;
inter->op = next_op(inter);
inter->offset++;
return jump_skip(inter);
}
static inline u32 lightrec_int_op(struct interpreter *inter)
{
return execute(int_standard[inter->op->i.op], inter);
}
static void update_cycles_before_branch(struct interpreter *inter)
{
u32 cycles;
@ -89,7 +103,7 @@ static void update_cycles_before_branch(struct interpreter *inter)
if (has_delay_slot(inter->op->c) &&
!(inter->op->flags & LIGHTREC_NO_DS))
cycles += lightrec_cycles_of_opcode(inter->op->next->c);
cycles += lightrec_cycles_of_opcode(next_op(inter)->c);
inter->cycles += cycles;
inter->state->current_cycle += inter->cycles;
@ -131,7 +145,7 @@ static u32 int_delay_slot(struct interpreter *inter, u32 pc, bool branch)
{
struct lightrec_state *state = inter->state;
u32 *reg_cache = state->native_reg_cache;
struct opcode new_op, *op = inter->op->next;
struct opcode new_op, *op = next_op(inter);
union code op_next;
struct interpreter inter2 = {
.state = state,
@ -228,8 +242,6 @@ static u32 int_delay_slot(struct interpreter *inter, u32 pc, bool branch)
} else {
new_op.c = op_next;
new_op.flags = 0;
new_op.offset = 0;
new_op.next = NULL;
inter2.op = &new_op;
/* Execute the first opcode of the next block */
@ -243,8 +255,7 @@ static u32 int_delay_slot(struct interpreter *inter, u32 pc, bool branch)
inter->cycles += lightrec_cycles_of_opcode(op_next);
}
} else {
next_pc = inter->block->pc
+ (inter->op->offset + 2) * sizeof(u32);
next_pc = int_get_ds_pc(inter, 2);
}
inter2.block = inter->block;
@ -291,8 +302,6 @@ static u32 int_delay_slot(struct interpreter *inter, u32 pc, bool branch)
new_op.c = op_next;
new_op.flags = 0;
new_op.offset = sizeof(u32);
new_op.next = NULL;
inter2.op = &new_op;
inter2.block = NULL;
@ -316,7 +325,7 @@ static u32 int_unimplemented(struct interpreter *inter)
static u32 int_jump(struct interpreter *inter, bool link)
{
struct lightrec_state *state = inter->state;
u32 old_pc = inter->block->pc + inter->op->offset * sizeof(u32);
u32 old_pc = int_get_branch_pc(inter);
u32 pc = (old_pc & 0xf0000000) | (inter->op->j.imm << 2);
if (link)
@ -344,7 +353,7 @@ static u32 int_jumpr(struct interpreter *inter, u8 link_reg)
u32 old_pc, next_pc = state->native_reg_cache[inter->op->r.rs];
if (link_reg) {
old_pc = inter->block->pc + inter->op->offset * sizeof(u32);
old_pc = int_get_branch_pc(inter);
state->native_reg_cache[link_reg] = old_pc + 8;
}
@ -404,7 +413,7 @@ static u32 int_branch(struct interpreter *inter, u32 pc,
static u32 int_beq(struct interpreter *inter, bool bne)
{
u32 rs, rt, old_pc = inter->block->pc + inter->op->offset * sizeof(u32);
u32 rs, rt, old_pc = int_get_branch_pc(inter);
rs = inter->state->native_reg_cache[inter->op->i.rs];
rt = inter->state->native_reg_cache[inter->op->i.rt];
@ -424,7 +433,7 @@ static u32 int_BNE(struct interpreter *inter)
static u32 int_bgez(struct interpreter *inter, bool link, bool lt, bool regimm)
{
u32 old_pc = inter->block->pc + inter->op->offset * sizeof(u32);
u32 old_pc = int_get_branch_pc(inter);
s32 rs;
if (link)
@ -492,7 +501,7 @@ static u32 int_ctc(struct interpreter *inter)
* interrupt status. */
if (!(inter->op->flags & LIGHTREC_NO_DS) &&
op->i.op == OP_CP0 && (op->r.rd == 12 || op->r.rd == 13))
return inter->block->pc + (op->offset + 1) * sizeof(u32);
return int_get_ds_pc(inter, 1);
else
return jump_next(inter);
}
@ -520,7 +529,7 @@ static u32 int_CP(struct interpreter *inter)
const struct lightrec_cop_ops *ops;
const struct opcode *op = inter->op;
if ((op->j.imm >> 25) & 1)
if (op->i.op == OP_CP2)
ops = &state->ops.cop2_ops;
else
ops = &state->ops.cop0_ops;
@ -613,7 +622,7 @@ static u32 int_io(struct interpreter *inter, bool is_load)
val = lightrec_rw(inter->state, inter->op->c,
reg_cache[op->rs], reg_cache[op->rt],
&inter->op->flags);
&inter->op->flags, inter->block);
if (is_load && op->rt)
reg_cache[op->rt] = val;
@ -636,9 +645,9 @@ static u32 int_store(struct interpreter *inter)
lightrec_rw(inter->state, inter->op->c,
inter->state->native_reg_cache[inter->op->i.rs],
inter->state->native_reg_cache[inter->op->i.rt],
&inter->op->flags);
&inter->op->flags, inter->block);
next_pc = inter->block->pc + (inter->op->offset + 1) * 4;
next_pc = int_get_ds_pc(inter, 1);
/* Invalidate next PC, to force the rest of the block to be rebuilt */
lightrec_invalidate(inter->state, next_pc, 4);
@ -725,7 +734,7 @@ static u32 int_syscall_break(struct interpreter *inter)
else
inter->state->exit_flags |= LIGHTREC_EXIT_SYSCALL;
return inter->block->pc + inter->op->offset * sizeof(u32);
return int_get_ds_pc(inter, 0);
}
static u32 int_special_MFHI(struct interpreter *inter)
@ -773,11 +782,14 @@ static u32 int_special_MULT(struct interpreter *inter)
u32 *reg_cache = inter->state->native_reg_cache;
s32 rs = reg_cache[inter->op->r.rs];
s32 rt = reg_cache[inter->op->r.rt];
u8 reg_lo = get_mult_div_lo(inter->op->c);
u8 reg_hi = get_mult_div_hi(inter->op->c);
u64 res = (s64)rs * (s64)rt;
if (!(inter->op->flags & LIGHTREC_NO_HI))
reg_cache[REG_HI] = res >> 32;
reg_cache[REG_LO] = res;
reg_cache[reg_hi] = res >> 32;
if (!(inter->op->flags & LIGHTREC_NO_LO))
reg_cache[reg_lo] = res;
return jump_next(inter);
}
@ -787,11 +799,14 @@ static u32 int_special_MULTU(struct interpreter *inter)
u32 *reg_cache = inter->state->native_reg_cache;
u32 rs = reg_cache[inter->op->r.rs];
u32 rt = reg_cache[inter->op->r.rt];
u8 reg_lo = get_mult_div_lo(inter->op->c);
u8 reg_hi = get_mult_div_hi(inter->op->c);
u64 res = (u64)rs * (u64)rt;
if (!(inter->op->flags & LIGHTREC_NO_HI))
reg_cache[REG_HI] = res >> 32;
reg_cache[REG_LO] = res;
reg_cache[reg_hi] = res >> 32;
if (!(inter->op->flags & LIGHTREC_NO_LO))
reg_cache[reg_lo] = res;
return jump_next(inter);
}
@ -801,6 +816,8 @@ static u32 int_special_DIV(struct interpreter *inter)
u32 *reg_cache = inter->state->native_reg_cache;
s32 rs = reg_cache[inter->op->r.rs];
s32 rt = reg_cache[inter->op->r.rt];
u8 reg_lo = get_mult_div_lo(inter->op->c);
u8 reg_hi = get_mult_div_hi(inter->op->c);
u32 lo, hi;
if (rt == 0) {
@ -811,8 +828,10 @@ static u32 int_special_DIV(struct interpreter *inter)
hi = rs % rt;
}
reg_cache[REG_HI] = hi;
reg_cache[REG_LO] = lo;
if (!(inter->op->flags & LIGHTREC_NO_HI))
reg_cache[reg_hi] = hi;
if (!(inter->op->flags & LIGHTREC_NO_LO))
reg_cache[reg_lo] = lo;
return jump_next(inter);
}
@ -822,6 +841,8 @@ static u32 int_special_DIVU(struct interpreter *inter)
u32 *reg_cache = inter->state->native_reg_cache;
u32 rs = reg_cache[inter->op->r.rs];
u32 rt = reg_cache[inter->op->r.rt];
u8 reg_lo = get_mult_div_lo(inter->op->c);
u8 reg_hi = get_mult_div_hi(inter->op->c);
u32 lo, hi;
if (rt == 0) {
@ -832,8 +853,10 @@ static u32 int_special_DIVU(struct interpreter *inter)
hi = rs % rt;
}
reg_cache[REG_HI] = hi;
reg_cache[REG_LO] = lo;
if (!(inter->op->flags & LIGHTREC_NO_HI))
reg_cache[reg_hi] = hi;
if (!(inter->op->flags & LIGHTREC_NO_LO))
reg_cache[reg_lo] = lo;
return jump_next(inter);
}
@ -942,11 +965,6 @@ static u32 int_special_SLTU(struct interpreter *inter)
return jump_next(inter);
}
static u32 int_META_SKIP(struct interpreter *inter)
{
return jump_skip(inter);
}
static u32 int_META_MOV(struct interpreter *inter)
{
u32 *reg_cache = inter->state->native_reg_cache;
@ -958,14 +976,6 @@ static u32 int_META_MOV(struct interpreter *inter)
return jump_next(inter);
}
static u32 int_META_SYNC(struct interpreter *inter)
{
inter->state->current_cycle += inter->cycles;
inter->cycles = 0;
return jump_skip(inter);
}
static const lightrec_int_func_t int_standard[64] = {
SET_DEFAULT_ELM(int_standard, int_unimplemented),
[OP_SPECIAL] = int_SPECIAL,
@ -1001,11 +1011,9 @@ static const lightrec_int_func_t int_standard[64] = {
[OP_LWC2] = int_LWC2,
[OP_SWC2] = int_store,
[OP_META_REG_UNLOAD] = int_META_SKIP,
[OP_META_BEQZ] = int_BEQ,
[OP_META_BNEZ] = int_BNE,
[OP_META_MOV] = int_META_MOV,
[OP_META_SYNC] = int_META_SYNC,
};
static const lightrec_int_func_t int_special[64] = {
@ -1106,14 +1114,15 @@ static u32 int_CP2(struct interpreter *inter)
return int_CP(inter);
}
static u32 lightrec_emulate_block_list(struct block *block, struct opcode *op)
static u32 lightrec_emulate_block_list(struct block *block, u32 offset)
{
struct interpreter inter;
u32 pc;
inter.block = block;
inter.state = block->state;
inter.op = op;
inter.offset = offset;
inter.op = &block->opcode_list[offset];
inter.cycles = 0;
inter.delay_slot = false;
@ -1130,12 +1139,9 @@ static u32 lightrec_emulate_block_list(struct block *block, struct opcode *op)
u32 lightrec_emulate_block(struct block *block, u32 pc)
{
u32 offset = (kunseg(pc) - kunseg(block->pc)) >> 2;
struct opcode *op;
for (op = block->opcode_list;
op && (op->offset < offset); op = op->next);
if (op)
return lightrec_emulate_block_list(block, op);
if (offset < block->nb_ops)
return lightrec_emulate_block_list(block, offset);
pr_err("PC 0x%x is outside block at PC 0x%x\n", pc, block->pc);

View File

@ -1,15 +1,6 @@
/* SPDX-License-Identifier: LGPL-2.1-or-later */
/*
* Copyright (C) 2019-2020 Paul Cercueil <paul@crapouillou.net>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
* Copyright (C) 2019-2021 Paul Cercueil <paul@crapouillou.net>
*/
#ifndef __LIGHTREC_INTERPRETER_H__

View File

@ -1,15 +1,6 @@
/* SPDX-License-Identifier: LGPL-2.1-or-later */
/*
* Copyright (C) 2016-2020 Paul Cercueil <paul@crapouillou.net>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
* Copyright (C) 2016-2021 Paul Cercueil <paul@crapouillou.net>
*/
#ifndef __LIGHTREC_PRIVATE_H__
@ -56,12 +47,16 @@
#define BLOCK_SHOULD_RECOMPILE BIT(1)
#define BLOCK_FULLY_TAGGED BIT(2)
#define BLOCK_IS_DEAD BIT(3)
#define BLOCK_IS_MEMSET BIT(4)
#define RAM_SIZE 0x200000
#define BIOS_SIZE 0x80000
#define CODE_LUT_SIZE ((RAM_SIZE + BIOS_SIZE) >> 2)
#define REG_LO 32
#define REG_HI 33
/* Definition of jit_state_t (avoids inclusion of <lightning.h>) */
struct jit_node;
struct jit_state;
@ -101,17 +96,27 @@ struct lightrec_branch_target {
u32 offset;
};
enum c_wrappers {
C_WRAPPER_RW,
C_WRAPPER_RW_GENERIC,
C_WRAPPER_MFC,
C_WRAPPER_MTC,
C_WRAPPER_RFE,
C_WRAPPER_CP,
C_WRAPPER_SYSCALL,
C_WRAPPER_BREAK,
C_WRAPPERS_COUNT,
};
struct lightrec_state {
u32 native_reg_cache[34];
u32 next_pc;
u32 current_cycle;
u32 target_cycle;
u32 exit_flags;
struct block *dispatcher, *rw_wrapper, *rw_generic_wrapper,
*mfc_wrapper, *mtc_wrapper, *rfe_wrapper, *cp_wrapper,
*syscall_wrapper, *break_wrapper;
void *rw_func, *rw_generic_func, *mfc_func, *mtc_func, *rfe_func,
*cp_func, *syscall_func, *break_func;
u32 old_cycle_counter;
struct block *dispatcher, *c_wrapper_block;
void *c_wrapper, *c_wrappers[C_WRAPPERS_COUNT];
struct jit_node *branches[512];
struct lightrec_branch local_branches[512];
struct lightrec_branch_target targets[512];
@ -124,6 +129,7 @@ struct lightrec_state {
struct recompiler *rec;
struct reaper *reaper;
void (*eob_wrapper_func)(void);
void (*memset_func)(void);
void (*get_next_block)(void);
struct lightrec_ops ops;
unsigned int nb_precompile;
@ -137,7 +143,8 @@ struct lightrec_state {
};
u32 lightrec_rw(struct lightrec_state *state, union code op,
u32 addr, u32 data, u16 *flags);
u32 addr, u32 data, u16 *flags,
struct block *block);
void lightrec_free_block(struct block *block);
@ -159,6 +166,24 @@ static inline u32 lut_offset(u32 pc)
return (pc & (RAM_SIZE - 1)) >> 2; // RAM
}
static inline u32 get_ds_pc(const struct block *block, u16 offset, s16 imm)
{
u16 flags = block->opcode_list[offset].flags;
offset += !!(OPT_SWITCH_DELAY_SLOTS && (flags & LIGHTREC_NO_DS));
return block->pc + (offset + imm << 2);
}
static inline u32 get_branch_pc(const struct block *block, u16 offset, s16 imm)
{
u16 flags = block->opcode_list[offset].flags;
offset -= !!(OPT_SWITCH_DELAY_SLOTS && (flags & LIGHTREC_NO_DS));
return block->pc + (offset + imm << 2);
}
void lightrec_mtc(struct lightrec_state *state, union code op, u32 data);
u32 lightrec_mfc(struct lightrec_state *state, union code op);
@ -166,5 +191,18 @@ union code lightrec_read_opcode(struct lightrec_state *state, u32 pc);
struct block * lightrec_get_block(struct lightrec_state *state, u32 pc);
int lightrec_compile_block(struct block *block);
void lightrec_free_opcode_list(struct block *block);
unsigned int lightrec_cycles_of_opcode(union code code);
static inline u8 get_mult_div_lo(union code c)
{
return (OPT_FLAG_MULT_DIV && c.r.rd) ? c.r.rd : REG_LO;
}
static inline u8 get_mult_div_hi(union code c)
{
return (OPT_FLAG_MULT_DIV && c.r.imm) ? c.r.imm : REG_HI;
}
#endif /* __LIGHTREC_PRIVATE_H__ */

View File

@ -1,15 +1,6 @@
// SPDX-License-Identifier: LGPL-2.1-or-later
/*
* Copyright (C) 2014-2020 Paul Cercueil <paul@crapouillou.net>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
* Copyright (C) 2014-2021 Paul Cercueil <paul@crapouillou.net>
*/
#include "blockcache.h"
@ -98,11 +89,14 @@ static const struct lightrec_mem_map_ops lightrec_default_ops = {
.lw = lightrec_default_lw,
};
static void __segfault_cb(struct lightrec_state *state, u32 addr)
static void __segfault_cb(struct lightrec_state *state, u32 addr,
const struct block *block)
{
lightrec_set_exit_flags(state, LIGHTREC_EXIT_SEGFAULT);
pr_err("Segmentation fault in recompiled code: invalid "
"load/store at address 0x%08x\n", addr);
if (block)
pr_err("Was executing block PC 0x%08x\n", block->pc);
}
static void lightrec_swl(struct lightrec_state *state,
@ -196,10 +190,12 @@ static void lightrec_lwc2(struct lightrec_state *state, union code op,
}
static void lightrec_invalidate_map(struct lightrec_state *state,
const struct lightrec_mem_map *map, u32 addr)
const struct lightrec_mem_map *map, u32 addr, u32 len)
{
if (map == &state->maps[PSX_MAP_KERNEL_USER_RAM])
state->code_lut[lut_offset(addr)] = NULL;
if (map == &state->maps[PSX_MAP_KERNEL_USER_RAM]) {
memset(&state->code_lut[lut_offset(addr)], 0,
((len + 3) / 4) * sizeof(void *));
}
}
static const struct lightrec_mem_map *
@ -218,7 +214,7 @@ lightrec_get_map(struct lightrec_state *state, u32 kaddr)
}
u32 lightrec_rw(struct lightrec_state *state, union code op,
u32 addr, u32 data, u16 *flags)
u32 addr, u32 data, u16 *flags, struct block *block)
{
const struct lightrec_mem_map *map;
const struct lightrec_mem_map_ops *ops;
@ -230,7 +226,7 @@ u32 lightrec_rw(struct lightrec_state *state, union code op,
map = lightrec_get_map(state, kaddr);
if (!map) {
__segfault_cb(state, addr);
__segfault_cb(state, addr, block);
return 0;
}
@ -294,11 +290,13 @@ u32 lightrec_rw(struct lightrec_state *state, union code op,
}
static void lightrec_rw_helper(struct lightrec_state *state,
union code op, u16 *flags)
union code op, u16 *flags,
struct block *block)
{
u32 ret = lightrec_rw(state, op,
state->native_reg_cache[op.i.rs],
state->native_reg_cache[op.i.rt], flags);
state->native_reg_cache[op.i.rt], flags,
block);
switch (op.i.op) {
case OP_LB:
@ -317,20 +315,32 @@ static void lightrec_rw_helper(struct lightrec_state *state,
static void lightrec_rw_cb(struct lightrec_state *state, union code op)
{
lightrec_rw_helper(state, op, NULL);
lightrec_rw_helper(state, op, NULL, NULL);
}
static void lightrec_rw_generic_cb(struct lightrec_state *state,
struct opcode *op, struct block *block)
static void lightrec_rw_generic_cb(struct lightrec_state *state, u32 arg)
{
bool was_tagged = op->flags & (LIGHTREC_HW_IO | LIGHTREC_DIRECT_IO);
struct block *block;
struct opcode *op;
bool was_tagged;
u16 offset = (u16)arg;
lightrec_rw_helper(state, op->c, &op->flags);
block = lightrec_find_block_from_lut(state->block_cache,
arg >> 16, state->next_pc);
if (unlikely(!block)) {
pr_err("rw_generic: No block found in LUT for PC 0x%x offset 0x%x\n",
state->next_pc, offset);
return;
}
op = &block->opcode_list[offset];
was_tagged = op->flags & (LIGHTREC_HW_IO | LIGHTREC_DIRECT_IO);
lightrec_rw_helper(state, op->c, &op->flags, block);
if (!was_tagged) {
pr_debug("Opcode of block at PC 0x%08x offset 0x%x has been "
"tagged - flag for recompilation\n",
block->pc, op->offset << 2);
pr_debug("Opcode of block at PC 0x%08x has been tagged - flag "
"for recompilation\n", block->pc);
block->flags |= BLOCK_SHOULD_RECOMPILE;
}
@ -407,7 +417,7 @@ static void lightrec_cp_cb(struct lightrec_state *state, union code op)
{
void (*func)(struct lightrec_state *, u32);
if ((op.opcode >> 25) & 1)
if (op.i.op == OP_CP2)
func = state->ops.cop2_ops.op;
else
func = state->ops.cop0_ops.op;
@ -466,12 +476,17 @@ static void * get_next_block_func(struct lightrec_state *state, u32 pc)
for (;;) {
func = state->code_lut[lut_offset(pc)];
if (func && func != state->get_next_block)
return func;
break;
block = lightrec_get_block(state, pc);
if (unlikely(!block))
return NULL;
break;
if (OPT_REPLACE_MEMSET && (block->flags & BLOCK_IS_MEMSET)) {
func = state->memset_func;
break;
}
should_recompile = block->flags & BLOCK_SHOULD_RECOMPILE &&
!(block->flags & BLOCK_IS_DEAD);
@ -493,7 +508,7 @@ static void * get_next_block_func(struct lightrec_state *state, u32 pc)
func = block->function;
if (likely(func))
return func;
break;
/* Block wasn't compiled yet - run the interpreter */
if (!ENABLE_THREADED_COMPILER &&
@ -510,40 +525,26 @@ static void * get_next_block_func(struct lightrec_state *state, u32 pc)
}
if (state->exit_flags != LIGHTREC_EXIT_NORMAL ||
state->current_cycle >= state->target_cycle) {
state->next_pc = pc;
return NULL;
}
state->current_cycle >= state->target_cycle)
break;
}
}
static s32 c_generic_function_wrapper(struct lightrec_state *state,
s32 cycles_delta,
void (*f)(struct lightrec_state *,
struct opcode *,
struct block *),
struct opcode *op, struct block *block)
{
state->current_cycle = state->target_cycle - cycles_delta;
(*f)(state, op, block);
return state->target_cycle - state->current_cycle;
state->next_pc = pc;
return func;
}
static s32 c_function_wrapper(struct lightrec_state *state, s32 cycles_delta,
void (*f)(struct lightrec_state *, union code),
union code op)
void (*f)(struct lightrec_state *, u32 d),
u32 d)
{
state->current_cycle = state->target_cycle - cycles_delta;
(*f)(state, op);
(*f)(state, d);
return state->target_cycle - state->current_cycle;
}
static struct block * generate_wrapper(struct lightrec_state *state,
void *f, bool generic)
static struct block * generate_wrapper(struct lightrec_state *state)
{
struct block *block;
jit_state_t *_jit;
@ -594,14 +595,9 @@ static struct block * generate_wrapper(struct lightrec_state *state,
jit_prepare();
jit_pushargr(LIGHTREC_REG_STATE);
jit_pushargr(LIGHTREC_REG_CYCLE);
jit_pushargi((uintptr_t)f);
jit_pushargr(JIT_R0);
if (generic) {
jit_pushargr(JIT_R1);
jit_finishi(c_generic_function_wrapper);
} else {
jit_finishi(c_function_wrapper);
}
jit_pushargr(JIT_R1);
jit_finishi(c_function_wrapper);
#if __WORDSIZE == 64
jit_retval_i(LIGHTREC_REG_CYCLE);
@ -639,11 +635,40 @@ err_no_mem:
return NULL;
}
static u32 lightrec_memset(struct lightrec_state *state)
{
const struct lightrec_mem_map *map;
u32 pc, kunseg_pc = kunseg(state->native_reg_cache[4]);
u32 length = state->native_reg_cache[5] * 4;
map = lightrec_get_map(state, kunseg_pc);
if (!map) {
pr_err("Unable to find memory map for memset target address "
"0x%x\n", kunseg_pc);
return 0;
}
pc = kunseg_pc - map->pc;
while (map->mirror_of)
map = map->mirror_of;
pr_debug("Calling host memset, PC 0x%x (host address 0x%lx) for %u bytes\n",
kunseg_pc, (uintptr_t)map->address + pc, length);
memset((void *)map->address + pc, 0, length);
if (!state->invalidate_from_dma_only)
lightrec_invalidate_map(state, map, kunseg_pc, length);
/* Rough estimation of the number of cycles consumed */
return 8 + 5 * (length + 3 / 4);
}
static struct block * generate_dispatcher(struct lightrec_state *state)
{
struct block *block;
jit_state_t *_jit;
jit_node_t *to_end, *to_end2, *to_c, *loop, *addr, *addr2;
jit_node_t *to_end, *to_c, *loop, *addr, *addr2, *addr3;
unsigned int i;
u32 offset, ram_len;
jit_word_t code_size;
@ -682,10 +707,35 @@ static struct block * generate_dispatcher(struct lightrec_state *state)
/* Call the block's code */
jit_jmpr(JIT_R0);
if (OPT_REPLACE_MEMSET) {
/* Blocks will jump here when they need to call
* lightrec_memset() */
addr3 = jit_indirect();
jit_prepare();
jit_pushargr(LIGHTREC_REG_STATE);
jit_finishi(lightrec_memset);
#if __WORDSIZE == 64
jit_ldxi_ui(JIT_V0, LIGHTREC_REG_STATE,
offsetof(struct lightrec_state, native_reg_cache[31]));
#else
jit_ldxi_i(JIT_V0, LIGHTREC_REG_STATE,
offsetof(struct lightrec_state, native_reg_cache[31]));
#endif
jit_retval(JIT_R0);
jit_subr(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, JIT_R0);
}
/* The block will jump here, with the number of cycles remaining in
* LIGHTREC_REG_CYCLE */
addr2 = jit_indirect();
/* Store back the next_pc to the lightrec_state structure */
offset = offsetof(struct lightrec_state, next_pc);
jit_stxi_i(offset, LIGHTREC_REG_STATE, JIT_V0);
/* Jump to end if state->target_cycle < state->current_cycle */
to_end = jit_blei(LIGHTREC_REG_CYCLE, 0);
@ -707,7 +757,7 @@ static struct block * generate_dispatcher(struct lightrec_state *state)
/* Slow path: call C function get_next_block_func() */
jit_patch(to_c);
if (ENABLE_FIRST_PASS) {
if (ENABLE_FIRST_PASS || OPT_DETECT_IMPOSSIBLE_BRANCHES) {
/* We may call the interpreter - update state->current_cycle */
jit_ldxi_i(JIT_R2, LIGHTREC_REG_STATE,
offsetof(struct lightrec_state, target_cycle));
@ -728,7 +778,7 @@ static struct block * generate_dispatcher(struct lightrec_state *state)
jit_finishi(&get_next_block_func);
jit_retval(JIT_R0);
if (ENABLE_FIRST_PASS) {
if (ENABLE_FIRST_PASS || OPT_DETECT_IMPOSSIBLE_BRANCHES) {
/* The interpreter may have updated state->current_cycle and
* state->target_cycle - recalc the delta */
jit_ldxi_i(JIT_R1, LIGHTREC_REG_STATE,
@ -741,18 +791,10 @@ static struct block * generate_dispatcher(struct lightrec_state *state)
/* If we get non-NULL, loop */
jit_patch_at(jit_bnei(JIT_R0, 0), loop);
to_end2 = jit_jmpi();
/* When exiting, the recompiled code will jump to that address */
jit_note(__FILE__, __LINE__);
jit_patch(to_end);
/* Store back the next_pc to the lightrec_state structure */
offset = offsetof(struct lightrec_state, next_pc);
jit_stxi_i(offset, LIGHTREC_REG_STATE, JIT_V0);
jit_patch(to_end2);
jit_retr(LIGHTREC_REG_CYCLE);
jit_epilog();
@ -769,6 +811,8 @@ static struct block * generate_dispatcher(struct lightrec_state *state)
block->code_size = code_size;
state->eob_wrapper_func = jit_address(addr2);
if (OPT_REPLACE_MEMSET)
state->memset_func = jit_address(addr3);
state->get_next_block = jit_address(addr);
if (ENABLE_DISASSEMBLER) {
@ -803,6 +847,58 @@ union code lightrec_read_opcode(struct lightrec_state *state, u32 pc)
return (union code) *code;
}
unsigned int lightrec_cycles_of_opcode(union code code)
{
return 2;
}
void lightrec_free_opcode_list(struct block *block)
{
lightrec_free(block->state, MEM_FOR_IR,
sizeof(*block->opcode_list) * block->nb_ops,
block->opcode_list);
}
static unsigned int lightrec_get_mips_block_len(const u32 *src)
{
unsigned int i;
union code c;
for (i = 1; ; i++) {
c.opcode = LE32TOH(*src++);
if (is_syscall(c))
return i;
if (is_unconditional_jump(c))
return i + 1;
}
}
static struct opcode * lightrec_disassemble(struct lightrec_state *state,
const u32 *src, unsigned int *len)
{
struct opcode *list;
unsigned int i, length;
length = lightrec_get_mips_block_len(src);
list = lightrec_malloc(state, MEM_FOR_IR, sizeof(*list) * length);
if (!list) {
pr_err("Unable to allocate memory\n");
return NULL;
}
for (i = 0; i < length; i++) {
list[i].opcode = LE32TOH(src[i]);
list[i].flags = 0;
}
*len = length * sizeof(u32);
return list;
}
static struct block * lightrec_precompile_block(struct lightrec_state *state,
u32 pc)
{
@ -857,16 +953,19 @@ static struct block * lightrec_precompile_block(struct lightrec_state *state,
if (ENABLE_DISASSEMBLER) {
pr_debug("Disassembled block at PC: 0x%x\n", block->pc);
lightrec_print_disassembly(block, code, length);
lightrec_print_disassembly(block, code);
}
pr_debug("Block size: %lu opcodes\n", block->nb_ops);
pr_debug("Block size: %hu opcodes\n", block->nb_ops);
/* If the first opcode is an 'impossible' branch, never compile the
* block */
if (should_emulate(list))
if (should_emulate(block->opcode_list))
block->flags |= BLOCK_NEVER_COMPILE;
if (OPT_REPLACE_MEMSET && (block->flags & BLOCK_IS_MEMSET))
state->code_lut[lut_offset(pc)] = state->memset_func;
block->hash = lightrec_calculate_block_hash(block);
pr_debug("Recompile count: %u\n", state->nb_precompile++);
@ -874,11 +973,14 @@ static struct block * lightrec_precompile_block(struct lightrec_state *state,
return block;
}
static bool lightrec_block_is_fully_tagged(struct block *block)
static bool lightrec_block_is_fully_tagged(const struct block *block)
{
struct opcode *op;
const struct opcode *op;
unsigned int i;
for (i = 0; i < block->nb_ops; i++) {
op = &block->opcode_list[i];
for (op = block->opcode_list; op; op = op->next) {
/* Verify that all load/stores of the opcode list
* Check all loads/stores of the opcode list and mark the
* block as fully compiled if they all have been tagged. */
@ -933,7 +1035,7 @@ int lightrec_compile_block(struct block *block)
bool skip_next = false;
jit_word_t code_size;
unsigned int i, j;
u32 next_pc, offset;
u32 offset;
fully_tagged = lightrec_block_is_fully_tagged(block);
if (fully_tagged)
@ -957,8 +1059,8 @@ int lightrec_compile_block(struct block *block)
start_of_block = jit_label();
for (elm = block->opcode_list; elm; elm = elm->next) {
next_pc = block->pc + elm->offset * sizeof(u32);
for (i = 0; i < block->nb_ops; i++) {
elm = &block->opcode_list[i];
if (skip_next) {
skip_next = false;
@ -969,11 +1071,12 @@ int lightrec_compile_block(struct block *block)
if (should_emulate(elm)) {
pr_debug("Branch at offset 0x%x will be emulated\n",
elm->offset << 2);
lightrec_emit_eob(block, elm, next_pc);
i << 2);
lightrec_emit_eob(block, i);
skip_next = !(elm->flags & LIGHTREC_NO_DS);
} else if (elm->opcode) {
lightrec_rec_opcode(block, elm, next_pc);
} else {
lightrec_rec_opcode(block, i);
skip_next = has_delay_slot(elm->c) &&
!(elm->flags & LIGHTREC_NO_DS);
#if _WIN32
@ -1086,7 +1189,7 @@ int lightrec_compile_block(struct block *block)
if (fully_tagged && !op_list_freed) {
pr_debug("Block PC 0x%08x is fully tagged"
" - free opcode list\n", block->pc);
lightrec_free_opcode_list(state, block->opcode_list);
lightrec_free_opcode_list(block);
block->opcode_list = NULL;
}
@ -1104,6 +1207,20 @@ int lightrec_compile_block(struct block *block)
return 0;
}
static void lightrec_print_info(struct lightrec_state *state)
{
if ((state->current_cycle & ~0xfffffff) != state->old_cycle_counter) {
pr_info("Lightrec RAM usage: IR %u KiB, CODE %u KiB, "
"MIPS %u KiB, TOTAL %u KiB, avg. IPI %f\n",
lightrec_get_mem_usage(MEM_FOR_IR) / 1024,
lightrec_get_mem_usage(MEM_FOR_CODE) / 1024,
lightrec_get_mem_usage(MEM_FOR_MIPS_CODE) / 1024,
lightrec_get_total_mem_usage() / 1024,
lightrec_get_average_ipi());
state->old_cycle_counter = state->current_cycle & ~0xfffffff;
}
}
u32 lightrec_execute(struct lightrec_state *state, u32 pc, u32 target_cycle)
{
s32 (*func)(void *, s32) = (void *)state->dispatcher->function;
@ -1117,6 +1234,7 @@ u32 lightrec_execute(struct lightrec_state *state, u32 pc, u32 target_cycle)
target_cycle = UINT_MAX;
state->target_cycle = target_cycle;
state->next_pc = pc;
block_trace = get_next_block_func(state, pc);
if (block_trace) {
@ -1130,6 +1248,9 @@ u32 lightrec_execute(struct lightrec_state *state, u32 pc, u32 target_cycle)
if (ENABLE_THREADED_COMPILER)
lightrec_reaper_reap(state->reaper);
if (LOG_LEVEL >= INFO_L)
lightrec_print_info(state);
return state->next_pc;
}
@ -1146,14 +1267,19 @@ u32 lightrec_run_interpreter(struct lightrec_state *state, u32 pc)
state->exit_flags = LIGHTREC_EXIT_NORMAL;
return lightrec_emulate_block(block, pc);
pc = lightrec_emulate_block(block, pc);
if (LOG_LEVEL >= INFO_L)
lightrec_print_info(state);
return pc;
}
void lightrec_free_block(struct block *block)
{
lightrec_unregister(MEM_FOR_MIPS_CODE, block->nb_ops * sizeof(u32));
if (block->opcode_list)
lightrec_free_opcode_list(block->state, block->opcode_list);
lightrec_free_opcode_list(block);
if (block->_jit)
_jit_destroy_state(block->_jit);
lightrec_unregister(MEM_FOR_CODE, block->code_size);
@ -1220,50 +1346,20 @@ struct lightrec_state * lightrec_init(char *argv0,
if (!state->dispatcher)
goto err_free_reaper;
state->rw_generic_wrapper = generate_wrapper(state,
lightrec_rw_generic_cb,
true);
if (!state->rw_generic_wrapper)
state->c_wrapper_block = generate_wrapper(state);
if (!state->c_wrapper_block)
goto err_free_dispatcher;
state->rw_wrapper = generate_wrapper(state, lightrec_rw_cb, false);
if (!state->rw_wrapper)
goto err_free_generic_rw_wrapper;
state->c_wrapper = state->c_wrapper_block->function;
state->mfc_wrapper = generate_wrapper(state, lightrec_mfc_cb, false);
if (!state->mfc_wrapper)
goto err_free_rw_wrapper;
state->mtc_wrapper = generate_wrapper(state, lightrec_mtc_cb, false);
if (!state->mtc_wrapper)
goto err_free_mfc_wrapper;
state->rfe_wrapper = generate_wrapper(state, lightrec_rfe_cb, false);
if (!state->rfe_wrapper)
goto err_free_mtc_wrapper;
state->cp_wrapper = generate_wrapper(state, lightrec_cp_cb, false);
if (!state->cp_wrapper)
goto err_free_rfe_wrapper;
state->syscall_wrapper = generate_wrapper(state, lightrec_syscall_cb,
false);
if (!state->syscall_wrapper)
goto err_free_cp_wrapper;
state->break_wrapper = generate_wrapper(state, lightrec_break_cb,
false);
if (!state->break_wrapper)
goto err_free_syscall_wrapper;
state->rw_generic_func = state->rw_generic_wrapper->function;
state->rw_func = state->rw_wrapper->function;
state->mfc_func = state->mfc_wrapper->function;
state->mtc_func = state->mtc_wrapper->function;
state->rfe_func = state->rfe_wrapper->function;
state->cp_func = state->cp_wrapper->function;
state->syscall_func = state->syscall_wrapper->function;
state->break_func = state->break_wrapper->function;
state->c_wrappers[C_WRAPPER_RW] = lightrec_rw_cb;
state->c_wrappers[C_WRAPPER_RW_GENERIC] = lightrec_rw_generic_cb;
state->c_wrappers[C_WRAPPER_MFC] = lightrec_mfc_cb;
state->c_wrappers[C_WRAPPER_MTC] = lightrec_mtc_cb;
state->c_wrappers[C_WRAPPER_RFE] = lightrec_rfe_cb;
state->c_wrappers[C_WRAPPER_CP] = lightrec_cp_cb;
state->c_wrappers[C_WRAPPER_SYSCALL] = lightrec_syscall_cb;
state->c_wrappers[C_WRAPPER_BREAK] = lightrec_break_cb;
map = &state->maps[PSX_MAP_BIOS];
state->offset_bios = (uintptr_t)map->address - map->pc;
@ -1279,22 +1375,17 @@ struct lightrec_state * lightrec_init(char *argv0,
state->maps[PSX_MAP_MIRROR3].address == map->address + 0x600000)
state->mirrors_mapped = true;
if (state->offset_bios == 0 &&
state->offset_scratch == 0 &&
state->offset_ram == 0 &&
state->mirrors_mapped) {
pr_info("Memory map is perfect. Emitted code will be best.\n");
} else {
pr_info("Memory map is sub-par. Emitted code will be slow.\n");
}
return state;
err_free_syscall_wrapper:
lightrec_free_block(state->syscall_wrapper);
err_free_cp_wrapper:
lightrec_free_block(state->cp_wrapper);
err_free_rfe_wrapper:
lightrec_free_block(state->rfe_wrapper);
err_free_mtc_wrapper:
lightrec_free_block(state->mtc_wrapper);
err_free_mfc_wrapper:
lightrec_free_block(state->mfc_wrapper);
err_free_rw_wrapper:
lightrec_free_block(state->rw_wrapper);
err_free_generic_rw_wrapper:
lightrec_free_block(state->rw_generic_wrapper);
err_free_dispatcher:
lightrec_free_block(state->dispatcher);
err_free_reaper:
@ -1330,14 +1421,7 @@ void lightrec_destroy(struct lightrec_state *state)
lightrec_free_regcache(state->reg_cache);
lightrec_free_block_cache(state->block_cache);
lightrec_free_block(state->dispatcher);
lightrec_free_block(state->rw_generic_wrapper);
lightrec_free_block(state->rw_wrapper);
lightrec_free_block(state->mfc_wrapper);
lightrec_free_block(state->mtc_wrapper);
lightrec_free_block(state->rfe_wrapper);
lightrec_free_block(state->cp_wrapper);
lightrec_free_block(state->syscall_wrapper);
lightrec_free_block(state->break_wrapper);
lightrec_free_block(state->c_wrapper_block);
finish_jit();
#if ENABLE_TINYMM
@ -1363,10 +1447,7 @@ void lightrec_invalidate(struct lightrec_state *state, u32 addr, u32 len)
/* Handle mirrors */
kaddr &= (state->maps[PSX_MAP_KERNEL_USER_RAM].length - 1);
for (; len > 4; len -= 4, kaddr += 4)
lightrec_invalidate_map(state, map, kaddr);
lightrec_invalidate_map(state, map, kaddr);
lightrec_invalidate_map(state, map, kaddr, len);
}
}

View File

@ -1,15 +1,6 @@
/* SPDX-License-Identifier: LGPL-2.1-or-later */
/*
* Copyright (C) 2016-2020 Paul Cercueil <paul@crapouillou.net>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
* Copyright (C) 2016-2021 Paul Cercueil <paul@crapouillou.net>
*/
#ifndef __LIGHTREC_H__
@ -69,14 +60,6 @@ enum psx_map {
PSX_MAP_MIRROR3,
};
enum mem_type {
MEM_FOR_CODE,
MEM_FOR_MIPS_CODE,
MEM_FOR_IR,
MEM_FOR_LIGHTREC,
MEM_TYPE_END,
};
struct lightrec_mem_map_ops {
void (*sb)(struct lightrec_state *, u32 opcode,
void *host, u32 addr, u8 data);
@ -139,10 +122,6 @@ __api void lightrec_reset_cycle_count(struct lightrec_state *state, u32 cycles);
__api void lightrec_set_target_cycle_count(struct lightrec_state *state,
u32 cycles);
__api unsigned int lightrec_get_mem_usage(enum mem_type type);
__api unsigned int lightrec_get_total_mem_usage(void);
__api float lightrec_get_average_ipi(void);
#ifdef __cplusplus
};
#endif

View File

@ -1,15 +1,6 @@
// SPDX-License-Identifier: LGPL-2.1-or-later
/*
* Copyright (C) 2019-2020 Paul Cercueil <paul@crapouillou.net>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
* Copyright (C) 2019-2021 Paul Cercueil <paul@crapouillou.net>
*/
#include "config.h"

View File

@ -1,15 +1,6 @@
/* SPDX-License-Identifier: LGPL-2.1-or-later */
/*
* Copyright (C) 2019-2020 Paul Cercueil <paul@crapouillou.net>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
* Copyright (C) 2019-2021 Paul Cercueil <paul@crapouillou.net>
*/
#ifndef __MEMMANAGER_H__
@ -17,6 +8,14 @@
#include "lightrec.h"
enum mem_type {
MEM_FOR_CODE,
MEM_FOR_MIPS_CODE,
MEM_FOR_IR,
MEM_FOR_LIGHTREC,
MEM_TYPE_END,
};
void * lightrec_malloc(struct lightrec_state *state,
enum mem_type type, unsigned int len);
void * lightrec_calloc(struct lightrec_state *state,
@ -27,4 +26,8 @@ void lightrec_free(struct lightrec_state *state,
void lightrec_register(enum mem_type type, unsigned int len);
void lightrec_unregister(enum mem_type type, unsigned int len);
unsigned int lightrec_get_mem_usage(enum mem_type type);
unsigned int lightrec_get_total_mem_usage(void);
float lightrec_get_average_ipi(void);
#endif /* __MEMMANAGER_H__ */

View File

@ -1,17 +1,9 @@
// SPDX-License-Identifier: LGPL-2.1-or-later
/*
* Copyright (C) 2014-2020 Paul Cercueil <paul@crapouillou.net>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
* Copyright (C) 2014-2021 Paul Cercueil <paul@crapouillou.net>
*/
#include "config.h"
#include "disassembler.h"
#include "lightrec.h"
#include "memmanager.h"
@ -21,60 +13,89 @@
#include <errno.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#define IF_OPT(opt, ptr) ((opt) ? (ptr) : NULL)
struct optimizer_list {
void (**optimizers)(struct opcode *);
unsigned int nb_optimizers;
};
bool opcode_reads_register(union code op, u8 reg)
bool is_unconditional_jump(union code c)
{
switch (c.i.op) {
case OP_SPECIAL:
return c.r.op == OP_SPECIAL_JR || c.r.op == OP_SPECIAL_JALR;
case OP_J:
case OP_JAL:
return true;
case OP_BEQ:
case OP_BLEZ:
return c.i.rs == c.i.rt;
case OP_REGIMM:
return (c.r.rt == OP_REGIMM_BGEZ ||
c.r.rt == OP_REGIMM_BGEZAL) && c.i.rs == 0;
default:
return false;
}
}
bool is_syscall(union code c)
{
return (c.i.op == OP_SPECIAL && c.r.op == OP_SPECIAL_SYSCALL) ||
(c.i.op == OP_CP0 && (c.r.rs == OP_CP0_MTC0 ||
c.r.rs == OP_CP0_CTC0) &&
(c.r.rd == 12 || c.r.rd == 13));
}
static u64 opcode_read_mask(union code op)
{
switch (op.i.op) {
case OP_SPECIAL:
switch (op.r.op) {
case OP_SPECIAL_SYSCALL:
case OP_SPECIAL_BREAK:
return false;
return 0;
case OP_SPECIAL_JR:
case OP_SPECIAL_JALR:
case OP_SPECIAL_MTHI:
case OP_SPECIAL_MTLO:
return op.r.rs == reg;
return BIT(op.r.rs);
case OP_SPECIAL_MFHI:
return reg == REG_HI;
return BIT(REG_HI);
case OP_SPECIAL_MFLO:
return reg == REG_LO;
return BIT(REG_LO);
case OP_SPECIAL_SLL:
case OP_SPECIAL_SRL:
case OP_SPECIAL_SRA:
return op.r.rt == reg;
return BIT(op.r.rt);
default:
return op.r.rs == reg || op.r.rt == reg;
return BIT(op.r.rs) | BIT(op.r.rt);
}
case OP_CP0:
switch (op.r.rs) {
case OP_CP0_MTC0:
case OP_CP0_CTC0:
return op.r.rt == reg;
return BIT(op.r.rt);
default:
return false;
return 0;
}
case OP_CP2:
if (op.r.op == OP_CP2_BASIC) {
switch (op.r.rs) {
case OP_CP2_BASIC_MTC2:
case OP_CP2_BASIC_CTC2:
return op.r.rt == reg;
return BIT(op.r.rt);
default:
return false;
break;
}
} else {
return false;
}
return 0;
case OP_J:
case OP_JAL:
case OP_LUI:
return false;
return 0;
case OP_BEQ:
case OP_BNE:
case OP_LWL:
@ -84,33 +105,45 @@ bool opcode_reads_register(union code op, u8 reg)
case OP_SWL:
case OP_SW:
case OP_SWR:
return op.i.rs == reg || op.i.rt == reg;
return BIT(op.i.rs) | BIT(op.i.rt);
default:
return op.i.rs == reg;
return BIT(op.i.rs);
}
}
bool opcode_writes_register(union code op, u8 reg)
static u64 opcode_write_mask(union code op)
{
u64 flags;
switch (op.i.op) {
case OP_SPECIAL:
switch (op.r.op) {
case OP_SPECIAL_JR:
case OP_SPECIAL_JALR:
case OP_SPECIAL_SYSCALL:
case OP_SPECIAL_BREAK:
return false;
return 0;
case OP_SPECIAL_MULT:
case OP_SPECIAL_MULTU:
case OP_SPECIAL_DIV:
case OP_SPECIAL_DIVU:
return reg == REG_LO || reg == REG_HI;
if (!OPT_FLAG_MULT_DIV)
return BIT(REG_LO) | BIT(REG_HI);
if (op.r.rd)
flags = BIT(op.r.rd);
else
flags = BIT(REG_LO);
if (op.r.imm)
flags |= BIT(op.r.imm);
else
flags |= BIT(REG_HI);
return flags;
case OP_SPECIAL_MTHI:
return reg == REG_HI;
return BIT(REG_HI);
case OP_SPECIAL_MTLO:
return reg == REG_LO;
return BIT(REG_LO);
default:
return op.r.rd == reg;
return BIT(op.r.rd);
}
case OP_ADDI:
case OP_ADDIU:
@ -127,34 +160,90 @@ bool opcode_writes_register(union code op, u8 reg)
case OP_LBU:
case OP_LHU:
case OP_LWR:
return op.i.rt == reg;
return BIT(op.i.rt);
case OP_JAL:
return BIT(31);
case OP_CP0:
switch (op.r.rs) {
case OP_CP0_MFC0:
case OP_CP0_CFC0:
return op.i.rt == reg;
return BIT(op.i.rt);
default:
return false;
return 0;
}
case OP_CP2:
if (op.r.op == OP_CP2_BASIC) {
switch (op.r.rs) {
case OP_CP2_BASIC_MFC2:
case OP_CP2_BASIC_CFC2:
return op.i.rt == reg;
return BIT(op.i.rt);
default:
return false;
break;
}
} else {
return false;
}
return 0;
case OP_REGIMM:
switch (op.r.rt) {
case OP_REGIMM_BLTZAL:
case OP_REGIMM_BGEZAL:
return BIT(31);
default:
return 0;
}
case OP_META_MOV:
return op.r.rd == reg;
return BIT(op.r.rd);
default:
return 0;
}
}
bool opcode_reads_register(union code op, u8 reg)
{
return opcode_read_mask(op) & BIT(reg);
}
bool opcode_writes_register(union code op, u8 reg)
{
return opcode_write_mask(op) & BIT(reg);
}
static bool opcode_is_load(union code op)
{
switch (op.i.op) {
case OP_LB:
case OP_LH:
case OP_LWL:
case OP_LW:
case OP_LBU:
case OP_LHU:
case OP_LWR:
case OP_LWC2:
return true;
default:
return false;
}
}
static bool opcode_is_store(union code op)
{
switch (op.i.op) {
case OP_SB:
case OP_SH:
case OP_SW:
case OP_SWL:
case OP_SWR:
case OP_SWC2:
return true;
default:
return false;
}
}
bool opcode_is_io(union code op)
{
return opcode_is_load(op) || opcode_is_store(op);
}
/* TODO: Complete */
static bool is_nop(union code op)
{
@ -481,43 +570,13 @@ static u32 lightrec_propagate_consts(union code c, u32 known, u32 *v)
return known;
}
static int lightrec_add_meta(struct block *block,
struct opcode *op, union code code)
{
struct opcode *meta;
meta = lightrec_malloc(block->state, MEM_FOR_IR, sizeof(*meta));
if (!meta)
return -ENOMEM;
meta->c = code;
meta->flags = 0;
if (op) {
meta->offset = op->offset;
meta->next = op->next;
op->next = meta;
} else {
meta->offset = 0;
meta->next = block->opcode_list;
block->opcode_list = meta;
}
return 0;
}
static int lightrec_add_sync(struct block *block, struct opcode *prev)
{
return lightrec_add_meta(block, prev, (union code){
.j.op = OP_META_SYNC,
});
}
static int lightrec_transform_ops(struct block *block)
{
struct opcode *list = block->opcode_list;
struct opcode *list;
unsigned int i;
for (; list; list = list->next) {
for (i = 0; i < block->nb_ops; i++) {
list = &block->opcode_list[i];
/* Transform all opcodes detected as useless to real NOPs
* (0x0: SLL r0, r0, #0) */
@ -604,20 +663,28 @@ static int lightrec_transform_ops(struct block *block)
static int lightrec_switch_delay_slots(struct block *block)
{
struct opcode *list, *prev;
struct opcode *list, *next = &block->opcode_list[0];
unsigned int i;
union code op, next_op;
u8 flags;
for (list = block->opcode_list, prev = NULL; list->next;
prev = list, list = list->next) {
union code op = list->c;
union code next_op = list->next->c;
for (i = 0; i < block->nb_ops - 1; i++) {
list = next;
next = &block->opcode_list[i + 1];
next_op = next->c;
op = list->c;
if (!has_delay_slot(op) ||
list->flags & (LIGHTREC_NO_DS | LIGHTREC_EMULATE_BRANCH) ||
op.opcode == 0)
op.opcode == 0 || next_op.opcode == 0)
continue;
if (prev && has_delay_slot(prev->c))
if (i && has_delay_slot(block->opcode_list[i - 1].c) &&
!(block->opcode_list[i - 1].flags & LIGHTREC_NO_DS))
continue;
if ((list->flags & LIGHTREC_SYNC) ||
(next->flags & LIGHTREC_SYNC))
continue;
switch (list->i.op) {
@ -671,27 +738,59 @@ static int lightrec_switch_delay_slots(struct block *block)
}
pr_debug("Swap branch and delay slot opcodes "
"at offsets 0x%x / 0x%x\n", list->offset << 2,
list->next->offset << 2);
"at offsets 0x%x / 0x%x\n",
i << 2, (i + 1) << 2);
flags = list->next->flags;
flags = next->flags;
list->c = next_op;
list->next->c = op;
list->next->flags = list->flags | LIGHTREC_NO_DS;
next->c = op;
next->flags = list->flags | LIGHTREC_NO_DS;
list->flags = flags | LIGHTREC_NO_DS;
list->offset++;
list->next->offset--;
}
return 0;
}
static int shrink_opcode_list(struct block *block, u16 new_size)
{
struct opcode *list;
if (new_size >= block->nb_ops) {
pr_err("Invalid shrink size (%u vs %u)\n",
new_size, block->nb_ops);
return -EINVAL;
}
list = lightrec_malloc(block->state, MEM_FOR_IR,
sizeof(*list) * new_size);
if (!list) {
pr_err("Unable to allocate memory\n");
return -ENOMEM;
}
memcpy(list, block->opcode_list, sizeof(*list) * new_size);
lightrec_free_opcode_list(block);
block->opcode_list = list;
block->nb_ops = new_size;
pr_debug("Shrunk opcode list of block PC 0x%08x to %u opcodes\n",
block->pc, new_size);
return 0;
}
static int lightrec_detect_impossible_branches(struct block *block)
{
struct opcode *op, *next;
struct opcode *op, *next = &block->opcode_list[0];
unsigned int i;
int ret = 0;
for (i = 0; i < block->nb_ops - 1; i++) {
op = next;
next = &block->opcode_list[i + 1];
for (op = block->opcode_list, next = op->next; next;
op = next, next = op->next) {
if (!has_delay_slot(op->c) ||
(!load_in_delay_slot(next->c) &&
!has_delay_slot(next->c) &&
@ -705,28 +804,33 @@ static int lightrec_detect_impossible_branches(struct block *block)
continue;
}
op->flags |= LIGHTREC_EMULATE_BRANCH;
if (op == block->opcode_list) {
pr_debug("First opcode of block PC 0x%08x is an impossible branch\n",
block->pc);
/* If the first opcode is an 'impossible' branch, we
* only keep the first two opcodes of the block (the
* branch itself + its delay slot) */
lightrec_free_opcode_list(block->state, next->next);
next->next = NULL;
block->nb_ops = 2;
if (block->nb_ops > 2)
ret = shrink_opcode_list(block, 2);
break;
}
op->flags |= LIGHTREC_EMULATE_BRANCH;
}
return 0;
return ret;
}
static int lightrec_local_branches(struct block *block)
{
struct opcode *list, *target, *prev;
struct opcode *list;
unsigned int i;
s32 offset;
int ret;
for (list = block->opcode_list; list; list = list->next) {
for (i = 0; i < block->nb_ops; i++) {
list = &block->opcode_list[i];
if (should_emulate(list))
continue;
@ -738,7 +842,7 @@ static int lightrec_local_branches(struct block *block)
case OP_REGIMM:
case OP_META_BEQZ:
case OP_META_BNEZ:
offset = list->offset + 1 + (s16)list->i.imm;
offset = i + 1 + (s16)list->i.imm;
if (offset >= 0 && offset < block->nb_ops)
break;
default: /* fall-through */
@ -747,37 +851,20 @@ static int lightrec_local_branches(struct block *block)
pr_debug("Found local branch to offset 0x%x\n", offset << 2);
for (target = block->opcode_list, prev = NULL;
target; prev = target, target = target->next) {
if (target->offset != offset ||
target->j.op == OP_META_SYNC)
continue;
if (should_emulate(target)) {
pr_debug("Branch target must be emulated"
" - skip\n");
break;
}
if (prev && has_delay_slot(prev->c)) {
pr_debug("Branch target is a delay slot"
" - skip\n");
break;
}
if (prev && prev->j.op != OP_META_SYNC) {
pr_debug("Adding sync before offset "
"0x%x\n", offset << 2);
ret = lightrec_add_sync(block, prev);
if (ret)
return ret;
prev->next->offset = target->offset;
}
list->flags |= LIGHTREC_LOCAL_BRANCH;
break;
if (should_emulate(&block->opcode_list[offset])) {
pr_debug("Branch target must be emulated - skip\n");
continue;
}
if (offset && has_delay_slot(block->opcode_list[offset - 1].c)) {
pr_debug("Branch target is a delay slot - skip\n");
continue;
}
pr_debug("Adding sync at offset 0x%x\n", offset << 2);
block->opcode_list[offset].flags |= LIGHTREC_SYNC;
list->flags |= LIGHTREC_LOCAL_BRANCH;
}
return 0;
@ -809,63 +896,57 @@ bool has_delay_slot(union code op)
}
}
bool should_emulate(struct opcode *list)
bool should_emulate(const struct opcode *list)
{
return has_delay_slot(list->c) &&
(list->flags & LIGHTREC_EMULATE_BRANCH);
}
static int lightrec_add_unload(struct block *block, struct opcode *op, u8 reg)
static void lightrec_add_unload(struct opcode *op, u8 reg)
{
return lightrec_add_meta(block, op, (union code){
.i.op = OP_META_REG_UNLOAD,
.i.rs = reg,
});
if (op->i.op == OP_SPECIAL && reg == op->r.rd)
op->flags |= LIGHTREC_UNLOAD_RD;
if (op->i.rs == reg)
op->flags |= LIGHTREC_UNLOAD_RS;
if (op->i.rt == reg)
op->flags |= LIGHTREC_UNLOAD_RT;
}
static int lightrec_early_unload(struct block *block)
{
struct opcode *list = block->opcode_list;
u8 i;
unsigned int i, offset;
struct opcode *op;
u8 reg;
for (i = 1; i < 34; i++) {
struct opcode *op, *last_r = NULL, *last_w = NULL;
unsigned int last_r_id = 0, last_w_id = 0, id = 0;
int ret;
for (reg = 1; reg < 34; reg++) {
int last_r_id = -1, last_w_id = -1;
for (op = list; op->next; op = op->next, id++) {
if (opcode_reads_register(op->c, i)) {
last_r = op;
last_r_id = id;
}
for (i = 0; i < block->nb_ops; i++) {
union code c = block->opcode_list[i].c;
if (opcode_writes_register(op->c, i)) {
last_w = op;
last_w_id = id;
}
if (opcode_reads_register(c, reg))
last_r_id = i;
if (opcode_writes_register(c, reg))
last_w_id = i;
}
if (last_w_id > last_r_id) {
if (has_delay_slot(last_w->c) &&
!(last_w->flags & LIGHTREC_NO_DS))
last_w = last_w->next;
if (last_w_id > last_r_id)
offset = (unsigned int)last_w_id;
else if (last_r_id >= 0)
offset = (unsigned int)last_r_id;
else
continue;
if (last_w->next) {
ret = lightrec_add_unload(block, last_w, i);
if (ret)
return ret;
}
} else if (last_r) {
if (has_delay_slot(last_r->c) &&
!(last_r->flags & LIGHTREC_NO_DS))
last_r = last_r->next;
op = &block->opcode_list[offset];
if (last_r->next) {
ret = lightrec_add_unload(block, last_r, i);
if (ret)
return ret;
}
}
if (has_delay_slot(op->c) && (op->flags & LIGHTREC_NO_DS))
offset++;
if (offset == block->nb_ops)
continue;
lightrec_add_unload(&block->opcode_list[offset], reg);
}
return 0;
@ -876,8 +957,11 @@ static int lightrec_flag_stores(struct block *block)
struct opcode *list;
u32 known = BIT(0);
u32 values[32] = { 0 };
unsigned int i;
for (i = 0; i < block->nb_ops; i++) {
list = &block->opcode_list[i];
for (list = block->opcode_list; list; list = list->next) {
/* Register $zero is always, well, zero */
known |= BIT(0);
values[0] = 0;
@ -918,13 +1002,29 @@ static int lightrec_flag_stores(struct block *block)
return 0;
}
static u8 get_mfhi_mflo_reg(const struct opcode *op, bool mflo)
static u8 get_mfhi_mflo_reg(const struct block *block, u16 offset,
const struct opcode *last,
u32 mask, bool sync, bool mflo)
{
const struct opcode *next;
u32 offset;
const struct opcode *op, *next = &block->opcode_list[offset];
u32 old_mask;
u8 reg2, reg = mflo ? REG_LO : REG_HI;
u16 branch_offset;
unsigned int i;
for (i = offset; i < block->nb_ops; i++) {
op = next;
next = &block->opcode_list[i + 1];
old_mask = mask;
/* If any other opcode writes or reads to the register
* we'd use, then we cannot use it anymore. */
mask |= opcode_read_mask(op->c);
mask |= opcode_write_mask(op->c);
if (op->flags & LIGHTREC_SYNC)
sync = true;
for (; op; op = op->next) {
switch (op->i.op) {
case OP_BEQ:
case OP_BNE:
@ -934,15 +1034,16 @@ static u8 get_mfhi_mflo_reg(const struct opcode *op, bool mflo)
case OP_META_BEQZ:
case OP_META_BNEZ:
/* TODO: handle backwards branches too */
if ((op->flags & LIGHTREC_LOCAL_BRANCH) &&
if (!last &&
(op->flags & LIGHTREC_LOCAL_BRANCH) &&
(s16)op->c.i.imm >= 0) {
offset = op->offset + 1 + (s16)op->c.i.imm;
branch_offset = i + 1 + (s16)op->c.i.imm
- !!(OPT_SWITCH_DELAY_SLOTS && (op->flags & LIGHTREC_NO_DS));
for (next = op; next->offset != offset;
next = next->next);
reg = get_mfhi_mflo_reg(next, mflo);
reg2 = get_mfhi_mflo_reg(op->next, mflo);
reg = get_mfhi_mflo_reg(block, branch_offset, NULL,
mask, sync, mflo);
reg2 = get_mfhi_mflo_reg(block, offset + 1, next,
mask, sync, mflo);
if (reg > 0 && reg == reg2)
return reg;
if (!reg && !reg2)
@ -969,26 +1070,37 @@ static u8 get_mfhi_mflo_reg(const struct opcode *op, bool mflo)
if (op->r.rs != 31)
return reg;
if (!(op->flags & LIGHTREC_NO_DS) &&
(op->next->i.op == OP_SPECIAL) &&
(!mflo && op->next->r.op == OP_SPECIAL_MFHI) ||
(mflo && op->next->r.op == OP_SPECIAL_MFLO))
return op->next->r.rd;
if (!sync &&
!(op->flags & LIGHTREC_NO_DS) &&
(next->i.op == OP_SPECIAL) &&
((!mflo && next->r.op == OP_SPECIAL_MFHI) ||
(mflo && next->r.op == OP_SPECIAL_MFLO)))
return next->r.rd;
return 0;
case OP_SPECIAL_JALR:
return reg;
case OP_SPECIAL_MFHI:
if (!mflo)
return op->r.rd;
if (!mflo) {
if (!sync && !(old_mask & BIT(op->r.rd)))
return op->r.rd;
else
return REG_HI;
}
continue;
case OP_SPECIAL_MFLO:
if (mflo)
return op->r.rd;
if (mflo) {
if (!sync && !(old_mask & BIT(op->r.rd)))
return op->r.rd;
else
return REG_LO;
}
continue;
default:
continue;
break;
}
/* fall-through */
default:
continue;
}
@ -997,13 +1109,66 @@ static u8 get_mfhi_mflo_reg(const struct opcode *op, bool mflo)
return reg;
}
static void lightrec_replace_lo_hi(struct block *block, u16 offset,
u16 last, bool lo)
{
unsigned int i;
u32 branch_offset;
/* This function will remove the following MFLO/MFHI. It must be called
* only if get_mfhi_mflo_reg() returned a non-zero value. */
for (i = offset; i < last; i++) {
struct opcode *op = &block->opcode_list[i];
switch (op->i.op) {
case OP_BEQ:
case OP_BNE:
case OP_BLEZ:
case OP_BGTZ:
case OP_REGIMM:
case OP_META_BEQZ:
case OP_META_BNEZ:
/* TODO: handle backwards branches too */
if ((op->flags & LIGHTREC_LOCAL_BRANCH) &&
(s16)op->c.i.imm >= 0) {
branch_offset = i + 1 + (s16)op->c.i.imm
- !!(OPT_SWITCH_DELAY_SLOTS && (op->flags & LIGHTREC_NO_DS));
lightrec_replace_lo_hi(block, branch_offset, last, lo);
lightrec_replace_lo_hi(block, i + 1, branch_offset, lo);
}
break;
case OP_SPECIAL:
if (lo && op->r.op == OP_SPECIAL_MFLO) {
pr_debug("Removing MFLO opcode at offset 0x%x\n",
i << 2);
op->opcode = 0;
return;
} else if (!lo && op->r.op == OP_SPECIAL_MFHI) {
pr_debug("Removing MFHI opcode at offset 0x%x\n",
i << 2);
op->opcode = 0;
return;
}
/* fall-through */
default:
break;
}
}
}
static int lightrec_flag_mults_divs(struct block *block)
{
struct opcode *list, *prev;
u8 reg_hi;
struct opcode *list;
u8 reg_hi, reg_lo;
unsigned int i;
for (i = 0; i < block->nb_ops - 1; i++) {
list = &block->opcode_list[i];
for (list = block->opcode_list, prev = NULL; list;
prev = list, list = list->next) {
if (list->i.op != OP_SPECIAL)
continue;
@ -1018,21 +1183,56 @@ static int lightrec_flag_mults_divs(struct block *block)
}
/* Don't support opcodes in delay slots */
if (prev && has_delay_slot(prev->c))
if ((i && has_delay_slot(block->opcode_list[i - 1].c)) ||
(list->flags & LIGHTREC_NO_DS))
continue;
reg_hi = get_mfhi_mflo_reg(list->next, false);
reg_lo = get_mfhi_mflo_reg(block, i + 1, NULL, 0, false, true);
if (reg_lo == 0) {
pr_debug("Mark MULT(U)/DIV(U) opcode at offset 0x%x as"
" not writing LO\n", i << 2);
list->flags |= LIGHTREC_NO_LO;
}
reg_hi = get_mfhi_mflo_reg(block, i + 1, NULL, 0, false, false);
if (reg_hi == 0) {
pr_debug("Mark MULT(U)/DIV(U) opcode at offset 0x%x as"
" 32-bit\n", list->offset << 2);
" not writing HI\n", i << 2);
list->flags |= LIGHTREC_NO_HI;
}
if (!reg_lo && !reg_hi) {
pr_debug("Both LO/HI unused in this block, they will "
"probably be used in parent block - removing "
"flags.\n");
list->flags &= ~(LIGHTREC_NO_LO | LIGHTREC_NO_HI);
}
if (reg_lo > 0 && reg_lo != REG_LO) {
pr_debug("Found register %s to hold LO (rs = %u, rt = %u)\n",
lightrec_reg_name(reg_lo), list->r.rs, list->r.rt);
lightrec_replace_lo_hi(block, i + 1, block->nb_ops, true);
list->r.rd = reg_lo;
} else {
list->r.rd = 0;
}
if (reg_hi > 0 && reg_hi != REG_HI) {
pr_debug("Found register %s to hold HI (rs = %u, rt = %u)\n",
lightrec_reg_name(reg_hi), list->r.rs, list->r.rt);
lightrec_replace_lo_hi(block, i + 1, block->nb_ops, false);
list->r.imm = reg_hi;
} else {
list->r.imm = 0;
}
}
return 0;
}
static bool remove_div_sequence(struct opcode *list)
static bool remove_div_sequence(struct block *block, unsigned int offset)
{
struct opcode *op;
unsigned int i, found = 0;
@ -1051,7 +1251,9 @@ static bool remove_div_sequence(struct opcode *list)
* and these sequences can be removed.
*/
for (op = list; op; op = op->next) {
for (i = offset; i < block->nb_ops; i++) {
op = &block->opcode_list[i];
if (!found) {
if (op->i.op == OP_SPECIAL &&
(op->r.op == OP_SPECIAL_DIV || op->r.op == OP_SPECIAL_DIVU))
@ -1061,7 +1263,7 @@ static bool remove_div_sequence(struct opcode *list)
/* BNE ???, zero, +8 */
found++;
} else {
list = list->next;
offset++;
}
} else if (found == 1 && !op->opcode) {
/* NOP */
@ -1098,13 +1300,10 @@ static bool remove_div_sequence(struct opcode *list)
found = 3;
pr_debug("Removing DIV%s sequence at offset 0x%x\n",
found == 9 ? "" : "U",
list->offset << 2);
found == 9 ? "" : "U", offset << 2);
for (i = 0; list && i < found; i++) {
list->opcode = 0;
list = list->next;
}
for (i = 0; i < found; i++)
block->opcode_list[offset + i].opcode = 0;
return true;
}
@ -1115,37 +1314,79 @@ static bool remove_div_sequence(struct opcode *list)
static int lightrec_remove_div_by_zero_check_sequence(struct block *block)
{
struct opcode *op;
unsigned int i;
for (i = 0; i < block->nb_ops; i++) {
op = &block->opcode_list[i];
for (op = block->opcode_list; op; op = op->next) {
if (op->i.op == OP_SPECIAL &&
(op->r.op == OP_SPECIAL_DIVU || op->r.op == OP_SPECIAL_DIV) &&
remove_div_sequence(op->next))
remove_div_sequence(block, i + 1))
op->flags |= LIGHTREC_NO_DIV_CHECK;
}
return 0;
}
static const u32 memset_code[] = {
0x10a00006, // beqz a1, 2f
0x24a2ffff, // addiu v0,a1,-1
0x2403ffff, // li v1,-1
0xac800000, // 1: sw zero,0(a0)
0x2442ffff, // addiu v0,v0,-1
0x1443fffd, // bne v0,v1, 1b
0x24840004, // addiu a0,a0,4
0x03e00008, // 2: jr ra
0x00000000, // nop
};
static int lightrec_replace_memset(struct block *block)
{
unsigned int i;
union code c;
for (i = 0; i < block->nb_ops; i++) {
c = block->opcode_list[i].c;
if (c.opcode != memset_code[i])
return 0;
if (i == ARRAY_SIZE(memset_code) - 1) {
/* success! */
pr_debug("Block at PC 0x%x is a memset\n", block->pc);
block->flags |= BLOCK_IS_MEMSET | BLOCK_NEVER_COMPILE;
/* Return non-zero to skip other optimizers. */
return 1;
}
}
return 0;
}
static int (*lightrec_optimizers[])(struct block *) = {
&lightrec_remove_div_by_zero_check_sequence,
&lightrec_detect_impossible_branches,
&lightrec_transform_ops,
&lightrec_local_branches,
&lightrec_switch_delay_slots,
&lightrec_flag_stores,
&lightrec_flag_mults_divs,
&lightrec_early_unload,
IF_OPT(OPT_REMOVE_DIV_BY_ZERO_SEQ, &lightrec_remove_div_by_zero_check_sequence),
IF_OPT(OPT_REPLACE_MEMSET, &lightrec_replace_memset),
IF_OPT(OPT_DETECT_IMPOSSIBLE_BRANCHES, &lightrec_detect_impossible_branches),
IF_OPT(OPT_TRANSFORM_OPS, &lightrec_transform_ops),
IF_OPT(OPT_LOCAL_BRANCHES, &lightrec_local_branches),
IF_OPT(OPT_SWITCH_DELAY_SLOTS, &lightrec_switch_delay_slots),
IF_OPT(OPT_FLAG_STORES, &lightrec_flag_stores),
IF_OPT(OPT_FLAG_MULT_DIV, &lightrec_flag_mults_divs),
IF_OPT(OPT_EARLY_UNLOAD, &lightrec_early_unload),
};
int lightrec_optimize(struct block *block)
{
unsigned int i;
int ret;
for (i = 0; i < ARRAY_SIZE(lightrec_optimizers); i++) {
int ret = lightrec_optimizers[i](block);
if (ret)
return ret;
if (lightrec_optimizers[i]) {
ret = (*lightrec_optimizers[i])(block);
if (ret)
return ret;
}
}
return 0;

View File

@ -1,15 +1,6 @@
/* SPDX-License-Identifier: LGPL-2.1-or-later */
/*
* Copyright (C) 2014-2020 Paul Cercueil <paul@crapouillou.net>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
* Copyright (C) 2014-2021 Paul Cercueil <paul@crapouillou.net>
*/
#ifndef __OPTIMIZER_H__
@ -24,8 +15,11 @@ _Bool opcode_reads_register(union code op, u8 reg);
_Bool opcode_writes_register(union code op, u8 reg);
_Bool has_delay_slot(union code op);
_Bool load_in_delay_slot(union code op);
_Bool opcode_is_io(union code op);
_Bool is_unconditional_jump(union code c);
_Bool is_syscall(union code c);
_Bool should_emulate(struct opcode *op);
_Bool should_emulate(const struct opcode *op);
int lightrec_optimize(struct block *block);

View File

@ -1,15 +1,6 @@
// SPDX-License-Identifier: LGPL-2.1-or-later
/*
* Copyright (C) 2020 Paul Cercueil <paul@crapouillou.net>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
* Copyright (C) 2020-2021 Paul Cercueil <paul@crapouillou.net>
*/
#include "blockcache.h"

View File

@ -1,15 +1,6 @@
/* SPDX-License-Identifier: LGPL-2.1-or-later */
/*
* Copyright (C) 2020 Paul Cercueil <paul@crapouillou.net>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
* Copyright (C) 2020-2021 Paul Cercueil <paul@crapouillou.net>
*/
#ifndef __LIGHTREC_REAPER_H__

View File

@ -1,15 +1,6 @@
// SPDX-License-Identifier: LGPL-2.1-or-later
/*
* Copyright (C) 2019-2020 Paul Cercueil <paul@crapouillou.net>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
* Copyright (C) 2019-2021 Paul Cercueil <paul@crapouillou.net>
*/
#include "debug.h"
@ -127,6 +118,8 @@ struct recompiler *lightrec_recompiler_init(struct lightrec_state *state)
goto err_mtx_destroy;
}
pr_info("Threaded recompiler started\n");
return rec;
err_mtx_destroy:
@ -263,8 +256,7 @@ void * lightrec_recompiler_run_first_pass(struct block *block, u32 *pc)
/* The block was already compiled but the opcode list
* didn't get freed yet - do it now */
lightrec_free_opcode_list(block->state,
block->opcode_list);
lightrec_free_opcode_list(block);
block->opcode_list = NULL;
}
}
@ -289,7 +281,7 @@ void * lightrec_recompiler_run_first_pass(struct block *block, u32 *pc)
pr_debug("Block PC 0x%08x is fully tagged"
" - free opcode list\n", block->pc);
lightrec_free_opcode_list(block->state, block->opcode_list);
lightrec_free_opcode_list(block);
block->opcode_list = NULL;
}

View File

@ -1,15 +1,6 @@
/* SPDX-License-Identifier: LGPL-2.1-or-later */
/*
* Copyright (C) 2019-2020 Paul Cercueil <paul@crapouillou.net>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
* Copyright (C) 2019-2021 Paul Cercueil <paul@crapouillou.net>
*/
#ifndef __LIGHTREC_RECOMPILER_H__

View File

@ -1,15 +1,6 @@
// SPDX-License-Identifier: LGPL-2.1-or-later
/*
* Copyright (C) 2014-2020 Paul Cercueil <paul@crapouillou.net>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
* Copyright (C) 2014-2021 Paul Cercueil <paul@crapouillou.net>
*/
#include "debug.h"
@ -21,7 +12,8 @@
#include <stddef.h>
struct native_register {
bool used, loaded, dirty, output, extend, extended, locked;
bool used, loaded, dirty, output, extend, extended,
zero_extend, zero_extended, locked;
s8 emulated_register;
};
@ -79,6 +71,27 @@ static inline struct native_register * lightning_reg_to_lightrec(
}
}
u8 lightrec_get_reg_in_flags(struct regcache *cache, u8 jit_reg)
{
struct native_register *reg = lightning_reg_to_lightrec(cache, jit_reg);
u8 flags = 0;
if (reg->extended)
flags |= REG_EXT;
if (reg->zero_extended)
flags |= REG_ZEXT;
return flags;
}
void lightrec_set_reg_out_flags(struct regcache *cache, u8 jit_reg, u8 flags)
{
struct native_register *reg = lightning_reg_to_lightrec(cache, jit_reg);
reg->extend = flags & REG_EXT;
reg->zero_extend = flags & REG_ZEXT;
}
static struct native_register * alloc_temp(struct regcache *cache)
{
unsigned int i;
@ -157,6 +170,7 @@ static struct native_register * alloc_in_out(struct regcache *cache,
static void lightrec_discard_nreg(struct native_register *nreg)
{
nreg->extended = false;
nreg->zero_extended = false;
nreg->loaded = false;
nreg->output = false;
nreg->dirty = false;
@ -223,7 +237,8 @@ u8 lightrec_alloc_reg_temp(struct regcache *cache, jit_state_t *_jit)
return jit_reg;
}
u8 lightrec_alloc_reg_out(struct regcache *cache, jit_state_t *_jit, u8 reg)
u8 lightrec_alloc_reg_out(struct regcache *cache, jit_state_t *_jit,
u8 reg, u8 flags)
{
u8 jit_reg;
struct native_register *nreg = alloc_in_out(cache, reg, true);
@ -240,14 +255,16 @@ u8 lightrec_alloc_reg_out(struct regcache *cache, jit_state_t *_jit, u8 reg)
if (nreg->emulated_register != reg)
lightrec_unload_nreg(cache, _jit, nreg, jit_reg);
nreg->extend = false;
nreg->used = true;
nreg->output = true;
nreg->emulated_register = reg;
nreg->extend = flags & REG_EXT;
nreg->zero_extend = flags & REG_ZEXT;
return jit_reg;
}
u8 lightrec_alloc_reg_in(struct regcache *cache, jit_state_t *_jit, u8 reg)
u8 lightrec_alloc_reg_in(struct regcache *cache, jit_state_t *_jit,
u8 reg, u8 flags)
{
u8 jit_reg;
bool reg_changed;
@ -270,52 +287,49 @@ u8 lightrec_alloc_reg_in(struct regcache *cache, jit_state_t *_jit, u8 reg)
s16 offset = offsetof(struct lightrec_state, native_reg_cache)
+ (reg << 2);
nreg->zero_extended = flags & REG_ZEXT;
nreg->extended = !nreg->zero_extended;
/* Load previous value from register cache */
#if __WORDSIZE == 64
if (nreg->zero_extended)
jit_ldxi_ui(jit_reg, LIGHTREC_REG_STATE, offset);
else
jit_ldxi_i(jit_reg, LIGHTREC_REG_STATE, offset);
#else
jit_ldxi_i(jit_reg, LIGHTREC_REG_STATE, offset);
#endif
nreg->loaded = true;
nreg->extended = true;
}
/* Clear register r0 before use */
if (reg == 0 && (!nreg->loaded || nreg->dirty)) {
jit_movi(jit_reg, 0);
nreg->extended = true;
nreg->zero_extended = true;
nreg->loaded = true;
}
nreg->used = true;
nreg->output = false;
nreg->emulated_register = reg;
return jit_reg;
}
u8 lightrec_alloc_reg_out_ext(struct regcache *cache, jit_state_t *_jit, u8 reg)
{
struct native_register *nreg;
u8 jit_reg;
jit_reg = lightrec_alloc_reg_out(cache, _jit, reg);
nreg = lightning_reg_to_lightrec(cache, jit_reg);
nreg->extend = true;
return jit_reg;
}
u8 lightrec_alloc_reg_in_ext(struct regcache *cache, jit_state_t *_jit, u8 reg)
{
struct native_register *nreg;
u8 jit_reg;
jit_reg = lightrec_alloc_reg_in(cache, _jit, reg);
nreg = lightning_reg_to_lightrec(cache, jit_reg);
#if __WORDSIZE == 64
if (!nreg->extended) {
if ((flags & REG_EXT) && !nreg->extended &&
(!nreg->zero_extended || !(flags & REG_ZEXT))) {
nreg->extended = true;
nreg->zero_extended = false;
#if __WORDSIZE == 64
jit_extr_i(jit_reg, jit_reg);
}
#endif
} else if (!(flags & REG_EXT) && (flags & REG_ZEXT) &&
!nreg->zero_extended) {
nreg->zero_extended = true;
nreg->extended = false;
#if __WORDSIZE == 64
jit_extr_ui(jit_reg, jit_reg);
#endif
}
return jit_reg;
}
@ -341,6 +355,7 @@ u8 lightrec_request_reg_in(struct regcache *cache, jit_state_t *_jit,
jit_ldxi_i(jit_reg, LIGHTREC_REG_STATE, offset);
nreg->extended = true;
nreg->zero_extended = false;
nreg->used = true;
nreg->loaded = true;
nreg->emulated_register = reg;
@ -353,8 +368,10 @@ static void free_reg(struct native_register *nreg)
/* Set output registers as dirty */
if (nreg->used && nreg->output && nreg->emulated_register > 0)
nreg->dirty = true;
if (nreg->output)
if (nreg->output) {
nreg->extended = nreg->extend;
nreg->zero_extended = nreg->zero_extend;
}
nreg->used = false;
}

View File

@ -1,15 +1,6 @@
/* SPDX-License-Identifier: LGPL-2.1-or-later */
/*
* Copyright (C) 2014-2020 Paul Cercueil <paul@crapouillou.net>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
* Copyright (C) 2014-2021 Paul Cercueil <paul@crapouillou.net>
*/
#ifndef __REGCACHE_H__
@ -22,8 +13,9 @@
#define LIGHTREC_REG_STATE (JIT_V(JIT_V_NUM - 1))
#define LIGHTREC_REG_CYCLE (JIT_V(JIT_V_NUM - 2))
#define REG_LO 32
#define REG_HI 33
/* Flags for lightrec_alloc_reg_in / lightrec_alloc_reg_out. */
#define REG_EXT BIT(0) /* register is sign-extended */
#define REG_ZEXT BIT(1) /* register is zero-extended */
struct register_value {
_Bool known;
@ -35,15 +27,17 @@ struct regcache;
u8 lightrec_alloc_reg(struct regcache *cache, jit_state_t *_jit, u8 jit_reg);
u8 lightrec_alloc_reg_temp(struct regcache *cache, jit_state_t *_jit);
u8 lightrec_alloc_reg_out(struct regcache *cache, jit_state_t *_jit, u8 reg);
u8 lightrec_alloc_reg_in(struct regcache *cache, jit_state_t *_jit, u8 reg);
u8 lightrec_alloc_reg_out_ext(struct regcache *cache,
jit_state_t *_jit, u8 reg);
u8 lightrec_alloc_reg_in_ext(struct regcache *cache, jit_state_t *_jit, u8 reg);
u8 lightrec_alloc_reg_out(struct regcache *cache, jit_state_t *_jit,
u8 reg, u8 flags);
u8 lightrec_alloc_reg_in(struct regcache *cache, jit_state_t *_jit,
u8 reg, u8 flags);
u8 lightrec_request_reg_in(struct regcache *cache, jit_state_t *_jit,
u8 reg, u8 jit_reg);
u8 lightrec_get_reg_in_flags(struct regcache *cache, u8 jit_reg);
void lightrec_set_reg_out_flags(struct regcache *cache, u8 jit_reg, u8 flags);
void lightrec_regcache_reset(struct regcache *cache);
void lightrec_lock_reg(struct regcache *cache, jit_state_t *_jit, u8 jit_reg);

13
deps/lightrec/slist.h vendored
View File

@ -1,15 +1,6 @@
/* SPDX-License-Identifier: LGPL-2.1-or-later */
/*
* Copyright (C) 2020 Paul Cercueil <paul@crapouillou.net>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
* Copyright (C) 2020-2021 Paul Cercueil <paul@crapouillou.net>
*/
#ifndef __LIGHTREC_SLIST_H__

View File

@ -69,6 +69,6 @@ LOCAL_SRC_FILES := $(SOURCES_CXX) $(SOURCES_C)
LOCAL_CFLAGS := $(COREFLAGS)
LOCAL_CXXFLAGS := $(COREFLAGS) -std=c++11
LOCAL_LDFLAGS := -Wl,-version-script=$(CORE_DIR)/link.T -ldl
LOCAL_LDLIBS := -llog
LOCAL_LDLIBS := -llog -landroid
LOCAL_CPP_FEATURES := exceptions rtti
include $(BUILD_SHARED_LIBRARY)

View File

@ -39,6 +39,7 @@ retro_input_state_t dbg_input_state_cb = 0;
#ifdef HAVE_ASHMEM
#include <sys/ioctl.h>
#include <linux/ashmem.h>
#include <dlfcn.h>
#endif
#if defined(HAVE_SHM) || defined(HAVE_ASHMEM)
@ -1641,6 +1642,7 @@ static const uintptr_t supported_io_bases[] = {
#define RAM_SIZE 0x200000
#define BIOS_SIZE 0x80000
#define SCRATCH_SIZE 0x400
#define SHM_SIZE RAM_SIZE+BIOS_SIZE+SCRATCH_SIZE
#ifdef HAVE_WIN_SHM
#define MAP(addr, size, fd, offset) \
@ -1675,12 +1677,49 @@ int lightrec_init_mmap()
memfd = open("/dev/ashmem", O_RDWR);
if (memfd < 0) {
log_cb(RETRO_LOG_ERROR, "Failed to create ASHMEM: %s\n", strerror(errno));
return 0;
}
/* Android 10+ / API 29+ gives EACCES (permission denied) opening /dev/ashmem
* fallback to ASharedMemory_create available since Android 8 / API 26 */
if(errno == EACCES) {
void *lib;
int (*create)(const char*, size_t);
int (*setProt)(int, int);
char *error1, *error2;
ioctl(memfd, ASHMEM_SET_NAME, "lightrec_memfd");
ioctl(memfd, ASHMEM_SET_SIZE, RAM_SIZE+BIOS_SIZE+SCRATCH_SIZE);
dlerror(); /* Clear any existing error */
lib = dlopen("libandroid.so", RTLD_NOW);
if (lib == NULL) {
log_cb(RETRO_LOG_ERROR, "Failed to dlopen: %s\n", dlerror());
return 0;
}
*(void **)(&create) = dlsym(lib, "ASharedMemory_create");
error1 = dlerror();
*(void **)(&setProt) = dlsym(lib, "ASharedMemory_setProt");
error2 = dlerror();
if (error1 == NULL)
memfd = (*create)("lightrec_memfd",SHM_SIZE);
if (memfd < 0) {
log_cb(RETRO_LOG_ERROR, "Failed to ASharedMemory_create: %s\n",
(error1 != NULL) ? error1 : strerror(errno));
dlclose(lib);
return 0;
}
if (error2 != NULL || (((*setProt)(memfd, PROT_READ|PROT_WRITE)) < 0))
log_cb(RETRO_LOG_ERROR, "Failed to ASharedMemory_setProt: %s\n",
(error2 != NULL) ? error2 : strerror(errno));
dlclose(lib);
} else {
log_cb(RETRO_LOG_ERROR, "Failed to create ASHMEM: %s\n", strerror(errno));
return 0;
}
} else {
ioctl(memfd, ASHMEM_SET_NAME, "lightrec_memfd");
ioctl(memfd, ASHMEM_SET_SIZE, SHM_SIZE);
}
#endif
#ifdef HAVE_SHM
int memfd;
@ -1697,7 +1736,7 @@ int lightrec_init_mmap()
/* unlink ASAP to prevent leaving a file in shared memory if we crash */
shm_unlink(shm_name);
if (ftruncate(memfd, RAM_SIZE+BIOS_SIZE+SCRATCH_SIZE) < 0) {
if (ftruncate(memfd, SHM_SIZE) < 0) {
log_cb(RETRO_LOG_ERROR, "Could not truncate SHM size: %s\n", strerror(errno));
goto close_return;
}
@ -1705,8 +1744,7 @@ int lightrec_init_mmap()
#ifdef HAVE_WIN_SHM
HANDLE memfd;
memfd = CreateFileMapping(INVALID_HANDLE_VALUE, NULL, PAGE_READWRITE, 0,
RAM_SIZE+BIOS_SIZE+SCRATCH_SIZE, NULL);
memfd = CreateFileMapping(INVALID_HANDLE_VALUE, NULL, PAGE_READWRITE, 0, SHM_SIZE, NULL);
if (memfd == NULL) {
log_cb(RETRO_LOG_ERROR, "Failed to create WIN_SHM: %s (%d)\n", strerror(errno), GetLastError());
@ -1805,7 +1843,7 @@ void lightrec_free_mmap()
UNMAP(psx_scratch, SCRATCH_SIZE);
#ifdef HAVE_ASHMEM
/* ashmem shared memory is not pinned by mmap, it dies on close */
/* android shared memory is not pinned by mmap, it dies on close */
close(memfd);
#endif
}

View File

@ -0,0 +1,22 @@
/* SPDX-License-Identifier: LGPL-2.1-or-later */
/*
* Copyright (C) 2019-2021 Paul Cercueil <paul@crapouillou.net>
*/
#ifndef __LIGHTREC_CONFIG_H__
#define __LIGHTREC_CONFIG_H__
#define ENABLE_FIRST_PASS 1
#define ENABLE_TINYMM 0
#define OPT_REMOVE_DIV_BY_ZERO_SEQ 1
#define OPT_REPLACE_MEMSET 1
#define OPT_DETECT_IMPOSSIBLE_BRANCHES 1
#define OPT_TRANSFORM_OPS 1
#define OPT_LOCAL_BRANCHES 1
#define OPT_SWITCH_DELAY_SLOTS 1
#define OPT_FLAG_STORES 1
#define OPT_FLAG_MULT_DIV 1
#define OPT_EARLY_UNLOAD 1
#endif /* __LIGHTREC_CONFIG_H__ */

View File

@ -1011,6 +1011,12 @@ extern void _jit_retr_d(jit_state_t*, jit_fpr_t);
extern void _jit_reti_d(jit_state_t*, jit_float64_t);
extern void _jit_retval_d(jit_state_t*, jit_fpr_t);
#define jit_get_reg(s) _jit_get_reg(_jit,s)
extern jit_int32_t _jit_get_reg(jit_state_t*, jit_int32_t);
#define jit_unget_reg(r) _jit_unget_reg(_jit,r)
extern void _jit_unget_reg(jit_state_t*, jit_int32_t);
#define jit_new_node(c) _jit_new_node(_jit,c)
extern jit_node_t *_jit_new_node(jit_state_t*, jit_code_t);
#define jit_new_node_w(c,u) _jit_new_node_w(_jit,c,u)

View File

@ -45,6 +45,7 @@ bool prev_invalidate;
extern bool psx_dynarec_invalidate;
extern uint8 psx_mmap;
static struct lightrec_state *lightrec_state;
uint8 next_interpreter;
#endif
extern bool psx_gte_overclock;
@ -193,6 +194,7 @@ void PS_CPU::Power(void)
PGXP_Init();
#ifdef HAVE_LIGHTREC
next_interpreter = 0;
prev_dynarec = psx_dynarec;
prev_invalidate = psx_dynarec_invalidate;
pgxpMode = PGXP_GetModes();
@ -256,9 +258,22 @@ int PS_CPU::StateAction(StateMem *sm, const unsigned load, const bool data_only)
{
#ifdef HAVE_LIGHTREC
if(psx_dynarec != DYNAREC_DISABLED) {
if(lightrec_state)
lightrec_invalidate_all(lightrec_state);
else
if(lightrec_state) {
/* Hack to prevent Dynarec + Runahead from causing a crash by
* switching to lightrec interpreter after load state in bios */
if(psx_dynarec != DYNAREC_RUN_INTERPRETER &&
BACKED_PC >= 0xBFC00000 && BACKED_PC <= 0xBFC80000) {
if(next_interpreter == 0) {
log_cb(RETRO_LOG_INFO, "PC 0x%08x Dynarec using interpreter for a few "
"frames, avoid crash due to Runahead\n",BACKED_PC);
lightrec_plugin_init();
}
/* run lightrec's interpreter for a few frames
* 76 for NTSC, 93 for PAL seems to prevent crash at max runahead */
next_interpreter = 93;
} else
lightrec_invalidate_all(lightrec_state);
}else
lightrec_plugin_init();
}
#endif
@ -2719,6 +2734,9 @@ pscpu_timestamp_t PS_CPU::Run(pscpu_timestamp_t timestamp_in, bool BIOSPrintMode
prev_invalidate = psx_dynarec_invalidate;
}
if(next_interpreter > 0)
next_interpreter--;
if(psx_dynarec != DYNAREC_DISABLED)
return(lightrec_plugin_execute(timestamp_in));
#endif
@ -3895,12 +3913,12 @@ int32_t PS_CPU::lightrec_plugin_execute(int32_t timestamp)
lightrec_restore_registers(lightrec_state, GPRL);
lightrec_reset_cycle_count(lightrec_state, timestamp);
if (psx_dynarec == DYNAREC_EXECUTE)
if (next_interpreter > 0 || psx_dynarec == DYNAREC_RUN_INTERPRETER)
PC = lightrec_run_interpreter(lightrec_state,PC);
else if (psx_dynarec == DYNAREC_EXECUTE)
PC = lightrec_execute(lightrec_state, PC, next_event_ts);
else if (psx_dynarec == DYNAREC_EXECUTE_ONE)
PC = lightrec_execute_one(lightrec_state,PC);
else if (psx_dynarec == DYNAREC_RUN_INTERPRETER)
PC = lightrec_run_interpreter(lightrec_state,PC);
timestamp = lightrec_current_cycle_count(
lightrec_state);
@ -3945,9 +3963,6 @@ void PS_CPU::lightrec_plugin_clear(u32 addr, u32 size)
void PS_CPU::lightrec_plugin_shutdown(void)
{
log_cb(RETRO_LOG_INFO,"Lightrec memory usage: %u KiB, average IPI: %.2f\n",
lightrec_get_total_mem_usage()/1024,
lightrec_get_average_ipi());
lightrec_destroy(lightrec_state);
}