Convert llcvt.c to assembly

This commit is contained in:
Ryan Dwyer 2024-09-05 20:48:45 +10:00
parent 28f678bfee
commit a617fd62b1
3 changed files with 217 additions and 290 deletions

View File

@ -340,8 +340,7 @@ ifeq ($(COMPILER), ido)
MIPS3_C_FILES := \
src/lib/rng_c.c \
src/lib/ultra/libc/ll.c \
src/lib/ultra/libc/llcvt.c
src/lib/ultra/libc/ll.c
G_C_FILES := \
$(shell find src/lib/ultra/audio -name '*.c') \
@ -378,7 +377,6 @@ ifeq ($(COMPILER), ido)
src/lib/ultra/io/visetyscale.c \
src/lib/ultra/io/viswapbuf.c \
src/lib/ultra/libc/ll.c \
src/lib/ultra/libc/llcvt.c \
src/lib/ultra/os/atomic.c \
src/lib/ultra/os/createmesgqueue.c \
src/lib/ultra/os/destroythread.c \
@ -772,9 +770,9 @@ $(B_DIR)/lib/rng_c.o: src/lib/rng_c.c $(ASSETMGR_O_FILES) $(RECOMP_FILES)
$(CC) -c $(CFLAGS) $(OPT_LVL) -o $@ $<
tools/patchmips3 $@ || rm $@
$(B_DIR)/lib/ultra/libc/llcvt.o: src/lib/ultra/libc/llcvt.c $(ASSETMGR_O_FILES) $(RECOMP_FILES)
$(B_DIR)/lib/ultra/libc/llcvt.o: src/lib/ultra/libc/llcvt.s
@mkdir -p $(dir $@)
$(CC) -c $(CFLAGS) $(OPT_LVL) -o $@ $<
cpp -P -Wno-trigraphs -I include -I include/PR -I src/include $(C_DEFINES) -D_LANGUAGE_ASSEMBLY -D_MIPSEB $< | $(AS) $(ASFLAGS) -o $@
tools/patchmips3 $@ || rm $@
$(B_DIR)/lib/ultra/libc/ll.o: src/lib/ultra/libc/ll.c $(ASSETMGR_O_FILES) $(RECOMP_FILES)

View File

@ -1,285 +0,0 @@
#include "constants.h"
#ifdef __sgi
#if VERSION < VERSION_NTSC_1_0
long long __d_to_ll(double f)
{
return f;
}
#endif
long long __f_to_ll(float f)
{
return f;
}
#if VERSION < VERSION_NTSC_1_0
#if MATCHING
GLOBAL_ASM(
glabel __d_to_ull
/* 5ad8: 444ef800 */ cfc1 $t6,$31
/* 5adc: 24020001 */ addiu $v0,$zero,0x1
/* 5ae0: 44c2f800 */ ctc1 $v0,$31
/* 5ae4: 00000000 */ sll $zero,$zero,0x0
/* 5ae8: 46206125 */ cvt.l.d $f4,$f12
/* 5aec: 4442f800 */ cfc1 $v0,$31
/* 5af0: 00000000 */ sll $zero,$zero,0x0
/* 5af4: 30410004 */ andi $at,$v0,0x4
/* 5af8: 30420078 */ andi $v0,$v0,0x78
/* 5afc: 10400014 */ beqz $v0,.NB00005b50
/* 5b00: 3c0143e0 */ lui $at,0x43e0
/* 5b04: 44812000 */ mtc1 $at,$f4
/* 5b08: 24020001 */ addiu $v0,$zero,0x1
/* 5b0c: 46246101 */ sub.d $f4,$f12,$f4
/* 5b10: 44c2f800 */ ctc1 $v0,$31
/* 5b14: 00000000 */ sll $zero,$zero,0x0
/* 5b18: 46202125 */ cvt.l.d $f4,$f4
/* 5b1c: 4442f800 */ cfc1 $v0,$31
/* 5b20: 00000000 */ sll $zero,$zero,0x0
/* 5b24: 30410004 */ andi $at,$v0,0x4
/* 5b28: 30420078 */ andi $v0,$v0,0x78
/* 5b2c: 14400006 */ bnez $v0,.NB00005b48
/* 5b30: 00000000 */ sll $zero,$zero,0x0
/* 5b34: 3c0f8000 */ lui $t7,0x8000
/* 5b38: 000f783c */ dsll32 $t7,$t7,0x0
/* 5b3c: 44222000 */ dmfc1 $v0,$f4
/* 5b40: 10000007 */ beqz $zero,.NB00005b60
/* 5b44: 004f1025 */ or $v0,$v0,$t7
.NB00005b48:
/* 5b48: 10000005 */ beqz $zero,.NB00005b60
/* 5b4c: 2402ffff */ addiu $v0,$zero,-1
.NB00005b50:
/* 5b50: 44222000 */ dmfc1 $v0,$f4
/* 5b54: 00000000 */ sll $zero,$zero,0x0
/* 5b58: 0440fffb */ bltz $v0,.NB00005b48
/* 5b5c: 00000000 */ sll $zero,$zero,0x0
.NB00005b60:
/* 5b60: 44cef800 */ ctc1 $t6,$31
/* 5b64: 0002183c */ dsll32 $v1,$v0,0x0
/* 5b68: 0003183f */ dsra32 $v1,$v1,0x0
/* 5b6c: 03e00008 */ jr $ra
/* 5b70: 0002103f */ dsra32 $v0,$v0,0x0
);
#else
unsigned long long __d_to_ull(double d)
{
return d;
}
#endif
#endif
#if MATCHING
GLOBAL_ASM(
glabel __f_to_ull
/* 5a7c: 444ef800 */ cfc1 $t6,$31
/* 5a80: 24020001 */ addiu $v0,$zero,0x1
/* 5a84: 44c2f800 */ ctc1 $v0,$31
/* 5a88: 00000000 */ nop
/* 5a8c: 46006125 */ cvt.l.s $f4,$f12
/* 5a90: 4442f800 */ cfc1 $v0,$31
/* 5a94: 00000000 */ nop
/* 5a98: 30410004 */ andi $at,$v0,0x4
/* 5a9c: 30420078 */ andi $v0,$v0,0x78
/* 5aa0: 10400014 */ beqz $v0,.L00005af4
/* 5aa4: 3c015f00 */ lui $at,0x5f00
/* 5aa8: 44812000 */ mtc1 $at,$f4
/* 5aac: 24020001 */ addiu $v0,$zero,0x1
/* 5ab0: 46046101 */ sub.s $f4,$f12,$f4
/* 5ab4: 44c2f800 */ ctc1 $v0,$31
/* 5ab8: 00000000 */ nop
/* 5abc: 46002125 */ cvt.l.s $f4,$f4
/* 5ac0: 4442f800 */ cfc1 $v0,$31
/* 5ac4: 00000000 */ nop
/* 5ac8: 30410004 */ andi $at,$v0,0x4
/* 5acc: 30420078 */ andi $v0,$v0,0x78
/* 5ad0: 14400006 */ bnez $v0,.L00005aec
/* 5ad4: 00000000 */ nop
/* 5ad8: 3c0f8000 */ lui $t7,0x8000
/* 5adc: 000f783c */ dsll32 $t7,$t7,0x0
/* 5ae0: 44222000 */ dmfc1 $v0,$f4
/* 5ae4: 10000007 */ b .L00005b04
/* 5ae8: 004f1025 */ or $v0,$v0,$t7
.L00005aec:
/* 5aec: 10000005 */ b .L00005b04
/* 5af0: 2402ffff */ addiu $v0,$zero,-1
.L00005af4:
/* 5af4: 44222000 */ dmfc1 $v0,$f4
/* 5af8: 00000000 */ nop
/* 5afc: 0440fffb */ bltz $v0,.L00005aec
/* 5b00: 00000000 */ nop
.L00005b04:
/* 5b04: 44cef800 */ ctc1 $t6,$31
/* 5b08: 0002183c */ dsll32 $v1,$v0,0x0
/* 5b0c: 0003183f */ dsra32 $v1,$v1,0x0
/* 5b10: 03e00008 */ jr $ra
/* 5b14: 0002103f */ dsra32 $v0,$v0,0x0
);
#else
// Mismatch: Goal loads 0x80000000 then shifts it left by 32,
// while the below stores it in .rodata and loads it.
unsigned long long __f_to_ull(float f)
{
return f;
}
#endif
double __ll_to_d(long long s)
{
return s;
}
#if VERSION < VERSION_NTSC_1_0
float __ll_to_f(long long s)
{
return s;
}
#endif
#if VERSION < VERSION_NTSC_1_0
#if MATCHING
GLOBAL_ASM(
glabel __ull_to_d
/* 5c40: afa40000 */ sw $a0,0x0($sp)
/* 5c44: afa50004 */ sw $a1,0x4($sp)
/* 5c48: dfae0000 */ ld $t6,0x0($sp)
/* 5c4c: 44ae2000 */ dmtc1 $t6,$f4
/* 5c50: 05c10005 */ bgez $t6,.NB00005c68
/* 5c54: 46a02021 */ cvt.d.l $f0,$f4
/* 5c58: 3c0141f0 */ lui $at,0x41f0
/* 5c5c: 44813000 */ mtc1 $at,$f6
/* 5c60: 00000000 */ sll $zero,$zero,0x0
/* 5c64: 46260000 */ add.d $f0,$f0,$f6
.NB00005c68:
/* 5c68: 03e00008 */ jr $ra
/* 5c6c: 00000000 */ sll $zero,$zero,0x0
);
#else
double __ull_to_d(unsigned long long u)
{
return u;
}
#endif
#endif
float __ull_to_f(unsigned long long u)
{
return u;
}
#else
// gcc
/**
* IDO's __f_to_ll is gcc's __fixsfdi
* IDO's __f_to_ull is gcc's __fixunssfdi
* IDO's __ll_to_d is gcc's __floatdidf
* IDO's __ull_to_f is gcc's __floatundisf
*/
__asm__(" \n\
.set push \n\
.set noat \n\
.set noreorder \n\
.set gp=64 \n\
\n\
.global __fixsfdi \n\
__fixsfdi: \n\
.type __fixsfdi, @function \n\
.ent __fixsfdi \n\
trunc.l.s $f4, $f12 \n\
dmfc1 $v0, $f4 \n\
nop \n\
dsll32 $v1, $v0, 0 \n\
dsra32 $v1, $v1, 0 \n\
jr $ra \n\
dsra32 $v0, $v0, 0 \n\
.end __fixsfdi \n\
.size __fixsfdi, . - __fixsfdi \n\
\n\
.global __fixunssfdi \n\
__fixunssfdi: \n\
.type __fixunssfdi, @function \n\
.ent __fixunssfdi \n\
cfc1 $t6, $31 \n\
addiu $v0, $zero, 1 \n\
ctc1 $v0, $31 \n\
nop \n\
cvt.l.s $f4, $f12 \n\
cfc1 $v0, $31 \n\
nop \n\
andi $at, $v0, 4 \n\
andi $v0, $v0, 0x78 \n\
beqz $v0, 2f \n\
lui $at, 0x5f00 \n\
mtc1 $at, $f4 \n\
addiu $v0, $zero, 1 \n\
sub.s $f4, $f12, $f4 \n\
ctc1 $v0, $31 \n\
nop \n\
cvt.l.s $f4, $f4 \n\
cfc1 $v0, $31 \n\
nop \n\
andi $at, $v0, 4 \n\
andi $v0, $v0, 0x78 \n\
bnez $v0, 1f \n\
nop \n\
lui $t7, 0x8000 \n\
dsll32 $t7, $t7, 0 \n\
dmfc1 $v0, $f4 \n\
b 3f \n\
or $v0, $v0, $t7 \n\
1: \n\
b 3f \n\
addiu $v0, $zero, -1 \n\
2: \n\
dmfc1 $v0, $f4 \n\
nop \n\
bltz $v0, 1b \n\
nop \n\
3: \n\
ctc1 $t6, $31 \n\
dsll32 $v1, $v0, 0 \n\
dsra32 $v1, $v1, 0 \n\
jr $ra \n\
dsra32 $v0, $v0, 0 \n\
.end __fixunssfdi \n\
.size __fixunssfdi, . - __fixunssfdi \n\
\n\
.global __floatdidf \n\
__floatdidf: \n\
.type __floatdidf, @function \n\
.ent __floatdidf \n\
sw $a0, 0($sp) \n\
sw $a1, 4($sp) \n\
ld $t6, 0($sp) \n\
dmtc1 $t6, $f4 \n\
jr $ra \n\
cvt.d.l $f0, $f4 \n\
.end __floatdidf \n\
.size __floatdidf, . - __floatdidf \n\
\n\
.global __floatundisf \n\
__floatundisf: \n\
.type __floatundisf, @function \n\
.ent __floatundisf \n\
sw $a0, 0($sp) \n\
sw $a1, 4($sp) \n\
ld $t6, 0($sp) \n\
dmtc1 $t6, $f4 \n\
bgez $t6, 1f \n\
cvt.s.l $f0, $f4 \n\
lui $at, 0x4f80 \n\
mtc1 $at, $f6 \n\
nop \n\
add.s $f0, $f0, $f6 \n\
1: \n\
jr $ra \n\
nop \n\
.end __floatundisf \n\
.size __floatundisf, . - __floatundisf \n\
\n\
.set pop \n\
\n");
#endif

214
src/lib/ultra/libc/llcvt.s Normal file
View File

@ -0,0 +1,214 @@
#include "asm_helper.h"
#include "macros.inc"
#include "versions.h"
/**
* Rare almost certainly compiled a C version of this file
* then patched the compiled assembly.
*
* __f_to_ull:
*
* The C version loads 0x8000000000000000 from rodata while Rare's does
* lui 0x8000 and shifts left by 32. This is an optimisation that Rare made.
*
* __ull_to_d:
*
* Rare had to ASM patch some instructions due to Rare's use of the FPU's
* native register mode (FR=1), which the assembler assumes is off.
* However, Rare patched these wrong, and as a result calling __ull_to_d with
* very large numbers can cause a crash. This function is never called though
* and only exists in the beta version.
*
* __d_to_ull:
*
* This function contains both of the above. Like __ull_to_d, it is also
* patched incorrectly and can cause a crash. It was also not used and only
* exists in the beta version.
*/
.set noat
.set noreorder
.set gp=64
.text
#if VERSION < VERSION_NTSC_1_0
glabel __d_to_ll
trunc.l.d $f4, $f12
dmfc1 $v0, $f4
nop
dsll32 $v1, $v0, 0
dsra32 $v1, $v1, 0
jr $ra
dsra32 $v0, $v0, 0
#endif
glabel __f_to_ll
glabel __fixsfdi
trunc.l.s $f4, $f12
dmfc1 $v0, $f4
nop
dsll32 $v1, $v0, 0
dsra32 $v1, $v1, 0
jr $ra
dsra32 $v0, $v0, 0
#if VERSION < VERSION_NTSC_1_0
glabel __d_to_ull
cfc1 $t6, $31
li $v0, 1
ctc1 $v0, $31
nop
cvt.l.d $f4, $f12
cfc1 $v0, $31
nop
andi $at, $v0, 4
andi $v0, $v0, 0x78
beqz $v0, .NB00005b50
# Same @bug as in __ull_to_d
lui $at, 0x43e0
mtc1 $at, $f4
li $v0, 1
sub.d $f4, $f12, $f4
ctc1 $v0, $31
nop
cvt.l.d $f4, $f4
cfc1 $v0, $31
nop
andi $at, $v0, 4
andi $v0, $v0, 0x78
bnez $v0, .NB00005b48
nop
lui $t7, 0x8000
dsll32 $t7, $t7, 0
dmfc1 $v0, $f4
beqz $zero, .NB00005b60
or $v0, $v0, $t7
.NB00005b48:
beqz $zero, .NB00005b60
li $v0, -1
.NB00005b50:
dmfc1 $v0, $f4
nop
bltz $v0, .NB00005b48
nop
.NB00005b60:
ctc1 $t6, $31
dsll32 $v1, $v0, 0
dsra32 $v1, $v1, 0
jr $ra
dsra32 $v0, $v0, 0
#endif
glabel __f_to_ull
glabel __fixunssfdi
cfc1 $t6, $31
li $v0, 1
ctc1 $v0, $31
nop
cvt.l.s $f4, $f12
cfc1 $v0, $31
nop
andi $at, $v0, 4
andi $v0, $v0, 0x78
beqz $v0, .L00005af4
lui $at, 0x5f00
mtc1 $at, $f4
li $v0, 1
sub.s $f4, $f12, $f4
ctc1 $v0, $31
nop
cvt.l.s $f4, $f4
cfc1 $v0, $31
nop
andi $at, $v0, 4
andi $v0, $v0, 0x78
bnez $v0, .L00005aec
nop
lui $t7, 0x8000
dsll32 $t7, $t7, 0
dmfc1 $v0, $f4
b .L00005b04
or $v0, $v0, $t7
.L00005aec:
b .L00005b04
li $v0, -1
.L00005af4:
dmfc1 $v0, $f4
nop
bltz $v0, .L00005aec
nop
.L00005b04:
ctc1 $t6, $31
dsll32 $v1, $v0, 0
dsra32 $v1, $v1, 0
jr $ra
dsra32 $v0, $v0, 0
glabel __ll_to_d
glabel __floatdidf
sw $a0, 0($sp)
sw $a1, 4($sp)
ld $t6, 0($sp)
dmtc1 $t6, $f4
jr $ra
cvt.d.l $f0, $f4
#if VERSION < VERSION_NTSC_1_0
glabel __ll_to_f
sw $a0, 0($sp)
sw $a1, 4($sp)
ld $t6, 0($sp)
dmtc1 $t6, $f4
jr $ra
cvt.s.l $f0, $f4
#endif
#if VERSION < VERSION_NTSC_1_0
glabel __ull_to_d
sw $a0, 0($sp)
sw $a1, 4($sp)
ld $t6, 0($sp)
dmtc1 $t6, $f4
bgez $t6, .NB00005c68
cvt.d.l $f0, $f4
# IDO emits:
#
# lui $at, 0x41f0
# mtc1 $at, $f7
# mtc1 $zero, $f6
#
# ...which is correct when FR=0. $f7 is the upper half of the 64-bit register.
#
# @bug: Rare patched this to account for FR=1, but did it incorrectly and
# loaded the immediate into the lower half of the register.
# They should have used dmtc1 to set the value for the entire $f6 register.
lui $at, 0x41f0
mtc1 $at, $f6
nop
add.d $f0, $f0, $f6
.NB00005c68:
jr $ra
nop
#endif
glabel __ull_to_f
glabel __floatundisf
sw $a0, 0($sp)
sw $a1, 4($sp)
ld $t6, 0($sp)
dmtc1 $t6, $f4
bgez $t6, .end
cvt.s.l $f0, $f4
lui $at, 0x4f80
mtc1 $at, $f6
nop
add.s $f0, $f0, $f6
.end:
jr $ra
nop