mirror of
https://github.com/open-goal/jak-project.git
synced 2025-02-22 15:10:48 +00:00
[Compiler] Improve spills and register backups (#175)
* xmm spill * improve getting stack variables * improve symbol getting * update changelog
This commit is contained in:
parent
a80b331c27
commit
c8d382b35c
@ -98,4 +98,9 @@
|
||||
- The listener now uses message IDs to more robustly handle the situation where a response messages comes, but is extremely late, or if some sent messages are skipped.
|
||||
- Fixed bug where references to the debug segment using RIP-relative links were not set to zero by the linker when the debug segment isn't loaded.
|
||||
- The `rlet` form now supports 128-bit vector float registers with the `vf` register class.
|
||||
- Added support for "vector float" assembly operations, including `lvf`, `svf`, `xor`, `sub`, `add`, and `blend`.
|
||||
- Added support for "vector float" assembly operations, including `lvf`, `svf`, `xor`, `sub`, `add`, and `blend`.
|
||||
- Added the ability to spill floating point variables to the stack if there aren't enough registers.
|
||||
- Improved back up and restore of xmm registers
|
||||
- Fixed an off-by-one in move eliminator (previous version was correct, but did not generate as good code). Complicated functions are 2 to 10% smaller.
|
||||
- Improved getting a stack address.
|
||||
- Improved getting the value of `#f`, `#t`, and `()`.
|
@ -9,6 +9,7 @@
|
||||
#include <thread>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "common/util/Timer.h"
|
||||
#include "game/sce/libscf.h"
|
||||
#include "kboot.h"
|
||||
#include "kmachine.h"
|
||||
@ -146,6 +147,7 @@ void KernelCheckAndDispatch() {
|
||||
// dispatch the kernel
|
||||
//(**kernel_dispatcher)();
|
||||
|
||||
Timer kernel_dispatch_timer;
|
||||
if (MasterUseKernel) {
|
||||
// use the GOAL kernel.
|
||||
call_goal_on_stack(Ptr<Function>(kernel_dispatcher->value), goal_stack, s7.offset,
|
||||
@ -164,6 +166,11 @@ void KernelCheckAndDispatch() {
|
||||
}
|
||||
}
|
||||
|
||||
auto time_ms = kernel_dispatch_timer.getMs();
|
||||
if (time_ms > 3) {
|
||||
printf("Kernel dispatch time: %.3f ms\n", time_ms);
|
||||
}
|
||||
|
||||
ClearPending();
|
||||
|
||||
// if the listener function changed, it means the kernel ran it, so we should notify compiler.
|
||||
|
@ -5,3 +5,4 @@
|
||||
;; name in dgo: gravity-h
|
||||
;; dgos: GAME, ENGINE
|
||||
|
||||
;; This file generates no code.
|
@ -318,8 +318,6 @@
|
||||
len)
|
||||
)
|
||||
|
||||
|
||||
|
||||
(defmethod asize-of pair ((obj pair))
|
||||
"Get the asize of a pair"
|
||||
(the-as int (-> pair size))
|
||||
|
@ -62,6 +62,8 @@ void CodeGenerator::do_function(FunctionEnv* env, int f_idx) {
|
||||
* Generates prologues / epilogues.
|
||||
*/
|
||||
void CodeGenerator::do_goal_function(FunctionEnv* env, int f_idx) {
|
||||
bool use_new_xmms = true;
|
||||
|
||||
auto f_rec = m_gen.get_existing_function_record(f_idx);
|
||||
// todo, extra alignment settings
|
||||
|
||||
@ -71,13 +73,44 @@ void CodeGenerator::do_goal_function(FunctionEnv* env, int f_idx) {
|
||||
// compute how much stack we will use
|
||||
int stack_offset = 0;
|
||||
|
||||
// back up xmms (currently not aligned)
|
||||
// count how many xmm's we have to backup
|
||||
int n_xmm_backups = 0;
|
||||
for (auto& saved_reg : allocs.used_saved_regs) {
|
||||
if (saved_reg.is_xmm()) {
|
||||
m_gen.add_instr_no_ir(f_rec, IGen::sub_gpr64_imm8s(RSP, XMM_SIZE), InstructionInfo::PROLOGUE);
|
||||
m_gen.add_instr_no_ir(f_rec, IGen::store128_gpr64_xmm128(RSP, saved_reg),
|
||||
n_xmm_backups++;
|
||||
}
|
||||
}
|
||||
|
||||
// only for new xmms. if n == 0, we don't use this at all.
|
||||
int xmm_backup_stack_offset = 8 + XMM_SIZE * n_xmm_backups;
|
||||
|
||||
if (use_new_xmms) {
|
||||
if (n_xmm_backups > 0) {
|
||||
// offset the stack
|
||||
stack_offset += xmm_backup_stack_offset;
|
||||
m_gen.add_instr_no_ir(f_rec, IGen::sub_gpr64_imm(RSP, xmm_backup_stack_offset),
|
||||
InstructionInfo::PROLOGUE);
|
||||
stack_offset += XMM_SIZE;
|
||||
// back up xmms
|
||||
int i = 0;
|
||||
for (auto& saved_reg : allocs.used_saved_regs) {
|
||||
if (saved_reg.is_xmm()) {
|
||||
int offset = i * XMM_SIZE;
|
||||
m_gen.add_instr_no_ir(f_rec, IGen::store128_xmm128_reg_offset(RSP, saved_reg, offset),
|
||||
InstructionInfo::PROLOGUE);
|
||||
i++;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// back up xmms (currently not aligned)
|
||||
for (auto& saved_reg : allocs.used_saved_regs) {
|
||||
if (saved_reg.is_xmm()) {
|
||||
m_gen.add_instr_no_ir(f_rec, IGen::sub_gpr64_imm8s(RSP, XMM_SIZE),
|
||||
InstructionInfo::PROLOGUE);
|
||||
m_gen.add_instr_no_ir(f_rec, IGen::store128_gpr64_xmm128(RSP, saved_reg),
|
||||
InstructionInfo::PROLOGUE);
|
||||
stack_offset += XMM_SIZE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -132,10 +165,16 @@ void CodeGenerator::do_goal_function(FunctionEnv* env, int f_idx) {
|
||||
auto& bonus = allocs.stack_ops.at(ir_idx);
|
||||
for (auto& op : bonus.ops) {
|
||||
if (op.load) {
|
||||
if (op.reg.is_gpr()) {
|
||||
if (op.reg.is_gpr() && op.reg_class == RegClass::GPR_64) {
|
||||
// todo, s8 or 0 offset if possible?
|
||||
m_gen.add_instr(IGen::load64_gpr64_plus_s32(
|
||||
op.reg, allocs.get_slot_for_spill(op.slot) * GPR_SIZE, RSP),
|
||||
i_rec);
|
||||
} else if (op.reg.is_xmm() && op.reg_class == RegClass::FLOAT) {
|
||||
// load xmm32 off of the stack
|
||||
m_gen.add_instr(IGen::load_reg_offset_xmm32(
|
||||
op.reg, RSP, allocs.get_slot_for_spill(op.slot) * GPR_SIZE),
|
||||
i_rec);
|
||||
} else {
|
||||
assert(false);
|
||||
}
|
||||
@ -148,10 +187,16 @@ void CodeGenerator::do_goal_function(FunctionEnv* env, int f_idx) {
|
||||
// store things back on the stack if needed.
|
||||
for (auto& op : bonus.ops) {
|
||||
if (op.store) {
|
||||
if (op.reg.is_gpr()) {
|
||||
if (op.reg.is_gpr() && op.reg_class == RegClass::GPR_64) {
|
||||
// todo, s8 or 0 offset if possible?
|
||||
m_gen.add_instr(IGen::store64_gpr64_plus_s32(
|
||||
RSP, allocs.get_slot_for_spill(op.slot) * GPR_SIZE, op.reg),
|
||||
i_rec);
|
||||
} else if (op.reg.is_xmm() && op.reg_class == RegClass::FLOAT) {
|
||||
// store xmm32 on the stack
|
||||
m_gen.add_instr(IGen::store_reg_offset_xmm32(
|
||||
RSP, op.reg, allocs.get_slot_for_spill(op.slot) * GPR_SIZE),
|
||||
i_rec);
|
||||
} else {
|
||||
assert(false);
|
||||
}
|
||||
@ -180,12 +225,31 @@ void CodeGenerator::do_goal_function(FunctionEnv* env, int f_idx) {
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = int(allocs.used_saved_regs.size()); i-- > 0;) {
|
||||
auto& saved_reg = allocs.used_saved_regs.at(i);
|
||||
if (saved_reg.is_xmm()) {
|
||||
m_gen.add_instr_no_ir(f_rec, IGen::load128_xmm128_gpr64(saved_reg, RSP),
|
||||
if (use_new_xmms) {
|
||||
if (n_xmm_backups > 0) {
|
||||
int j = n_xmm_backups;
|
||||
for (int i = int(allocs.used_saved_regs.size()); i-- > 0;) {
|
||||
auto& saved_reg = allocs.used_saved_regs.at(i);
|
||||
if (saved_reg.is_xmm()) {
|
||||
j--;
|
||||
int offset = j * XMM_SIZE;
|
||||
m_gen.add_instr_no_ir(f_rec, IGen::load128_xmm128_reg_offset(saved_reg, RSP, offset),
|
||||
InstructionInfo::EPILOGUE);
|
||||
}
|
||||
}
|
||||
assert(j == 0);
|
||||
m_gen.add_instr_no_ir(f_rec, IGen::add_gpr64_imm(RSP, xmm_backup_stack_offset),
|
||||
InstructionInfo::EPILOGUE);
|
||||
m_gen.add_instr_no_ir(f_rec, IGen::add_gpr64_imm8s(RSP, XMM_SIZE), InstructionInfo::EPILOGUE);
|
||||
}
|
||||
} else {
|
||||
for (int i = int(allocs.used_saved_regs.size()); i-- > 0;) {
|
||||
auto& saved_reg = allocs.used_saved_regs.at(i);
|
||||
if (saved_reg.is_xmm()) {
|
||||
m_gen.add_instr_no_ir(f_rec, IGen::load128_xmm128_gpr64(saved_reg, RSP),
|
||||
InstructionInfo::EPILOGUE);
|
||||
m_gen.add_instr_no_ir(f_rec, IGen::add_gpr64_imm8s(RSP, XMM_SIZE),
|
||||
InstructionInfo::EPILOGUE);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2,6 +2,7 @@
|
||||
#include "IR.h"
|
||||
#include "goalc/emitter/IGen.h"
|
||||
#include "third-party/fmt/core.h"
|
||||
#include "common/symbols.h"
|
||||
|
||||
using namespace emitter;
|
||||
|
||||
@ -202,10 +203,19 @@ void IR_LoadSymbolPointer::do_codegen(emitter::ObjectGenerator* gen,
|
||||
const AllocationResult& allocs,
|
||||
emitter::IR_Record irec) {
|
||||
auto dest_reg = get_reg(m_dest, allocs, irec);
|
||||
// todo, could be single lea opcode
|
||||
gen->add_instr(IGen::mov_gpr64_gpr64(dest_reg, gRegInfo.get_st_reg()), irec);
|
||||
auto add = gen->add_instr(IGen::add_gpr64_imm32s(dest_reg, 0x0afecafe), irec);
|
||||
gen->link_instruction_symbol_ptr(add, m_name);
|
||||
if (m_name == "#f") {
|
||||
static_assert(FIX_SYM_FALSE == 0, "false symbol location");
|
||||
gen->add_instr(IGen::mov_gpr64_gpr64(dest_reg, gRegInfo.get_st_reg()), irec);
|
||||
} else if (m_name == "#t") {
|
||||
gen->add_instr(IGen::lea_reg_plus_off8(dest_reg, gRegInfo.get_st_reg(), FIX_SYM_TRUE), irec);
|
||||
} else if (m_name == "_empty_") {
|
||||
gen->add_instr(IGen::lea_reg_plus_off8(dest_reg, gRegInfo.get_st_reg(), FIX_SYM_EMPTY_PAIR),
|
||||
irec);
|
||||
} else {
|
||||
auto instr =
|
||||
gen->add_instr(IGen::lea_reg_plus_off32(dest_reg, gRegInfo.get_st_reg(), 0x0afecafe), irec);
|
||||
gen->link_instruction_symbol_ptr(instr, m_name);
|
||||
}
|
||||
}
|
||||
|
||||
/////////////////////
|
||||
@ -1002,12 +1012,15 @@ void IR_GetStackAddr::do_codegen(emitter::ObjectGenerator* gen,
|
||||
auto dest_reg = get_reg(m_dest, allocs, irec);
|
||||
int offset = GPR_SIZE * allocs.get_slot_for_var(m_slot);
|
||||
|
||||
// dest = offset
|
||||
load_constant(offset, gen, irec, dest_reg);
|
||||
// dest = offset + RSP
|
||||
gen->add_instr(IGen::add_gpr64_gpr64(dest_reg, RSP), irec);
|
||||
// dest = offset + RSP - offset
|
||||
gen->add_instr(IGen::sub_gpr64_gpr64(dest_reg, gRegInfo.get_offset_reg()), irec);
|
||||
if (offset == 0) {
|
||||
gen->add_instr(IGen::mov_gpr64_gpr64(dest_reg, RSP), irec);
|
||||
gen->add_instr(IGen::sub_gpr64_gpr64(dest_reg, gRegInfo.get_offset_reg()), irec);
|
||||
} else {
|
||||
// dest = offset + RSP
|
||||
gen->add_instr(IGen::lea_reg_plus_off(dest_reg, RSP, offset), irec);
|
||||
// dest = offset + RSP - offset
|
||||
gen->add_instr(IGen::sub_gpr64_gpr64(dest_reg, gRegInfo.get_offset_reg()), irec);
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////
|
||||
|
@ -966,6 +966,62 @@ class IGen {
|
||||
return instr;
|
||||
}
|
||||
|
||||
static Instruction lea_reg_plus_off32(Register dest, Register base, s64 offset) {
|
||||
assert(dest.is_gpr());
|
||||
assert(base.is_gpr());
|
||||
assert(offset >= INT32_MIN && offset <= INT32_MAX);
|
||||
Instruction instr(0x8d);
|
||||
instr.set_modrm_rex_sib_for_reg_reg_disp(dest.hw_id(), 2, base.hw_id(), true);
|
||||
instr.set(Imm(4, offset));
|
||||
return instr;
|
||||
}
|
||||
|
||||
static Instruction lea_reg_plus_off8(Register dest, Register base, s64 offset) {
|
||||
assert(dest.is_gpr());
|
||||
assert(base.is_gpr());
|
||||
assert(offset >= INT8_MIN && offset <= INT8_MAX);
|
||||
Instruction instr(0x8d);
|
||||
instr.set_modrm_rex_sib_for_reg_reg_disp(dest.hw_id(), 1, base.hw_id(), true);
|
||||
instr.set(Imm(1, offset));
|
||||
return instr;
|
||||
}
|
||||
|
||||
static Instruction lea_reg_plus_off(Register dest, Register base, s64 offset) {
|
||||
if (offset >= INT8_MIN && offset <= INT8_MAX) {
|
||||
return lea_reg_plus_off8(dest, base, offset);
|
||||
} else if (offset >= INT32_MIN && offset <= INT32_MAX) {
|
||||
return lea_reg_plus_off32(dest, base, offset);
|
||||
} else {
|
||||
assert(false);
|
||||
}
|
||||
}
|
||||
|
||||
static Instruction store32_xmm32_gpr64_plus_s32(Register base, Register xmm_value, s64 offset) {
|
||||
assert(xmm_value.is_xmm());
|
||||
assert(base.is_gpr());
|
||||
assert(offset >= INT32_MIN && offset <= INT32_MAX);
|
||||
Instruction instr(0xf3);
|
||||
instr.set_op2(0x0f);
|
||||
instr.set_op3(0x11);
|
||||
instr.set_modrm_rex_sib_for_reg_reg_disp(xmm_value.hw_id(), 2, base.hw_id(), false);
|
||||
instr.set(Imm(4, offset));
|
||||
instr.swap_op0_rex();
|
||||
return instr;
|
||||
}
|
||||
|
||||
static Instruction store32_xmm32_gpr64_plus_s8(Register base, Register xmm_value, s64 offset) {
|
||||
assert(xmm_value.is_xmm());
|
||||
assert(base.is_gpr());
|
||||
assert(offset >= INT8_MIN && offset <= INT8_MAX);
|
||||
Instruction instr(0xf3);
|
||||
instr.set_op2(0x0f);
|
||||
instr.set_op3(0x11);
|
||||
instr.set_modrm_rex_sib_for_reg_reg_disp(xmm_value.hw_id(), 1, base.hw_id(), false);
|
||||
instr.set(Imm(1, offset));
|
||||
instr.swap_op0_rex();
|
||||
return instr;
|
||||
}
|
||||
|
||||
static Instruction load32_xmm32_gpr64_plus_gpr64_plus_s32(Register xmm_dest,
|
||||
Register addr1,
|
||||
Register addr2,
|
||||
@ -985,6 +1041,32 @@ class IGen {
|
||||
return instr;
|
||||
}
|
||||
|
||||
static Instruction load32_xmm32_gpr64_plus_s32(Register xmm_dest, Register base, s64 offset) {
|
||||
assert(xmm_dest.is_xmm());
|
||||
assert(base.is_gpr());
|
||||
assert(offset >= INT32_MIN && offset <= INT32_MAX);
|
||||
Instruction instr(0xf3);
|
||||
instr.set_op2(0x0f);
|
||||
instr.set_op3(0x10);
|
||||
instr.set_modrm_rex_sib_for_reg_reg_disp(xmm_dest.hw_id(), 2, base.hw_id(), false);
|
||||
instr.set(Imm(4, offset));
|
||||
instr.swap_op0_rex();
|
||||
return instr;
|
||||
}
|
||||
|
||||
static Instruction load32_xmm32_gpr64_plus_s8(Register xmm_dest, Register base, s64 offset) {
|
||||
assert(xmm_dest.is_xmm());
|
||||
assert(base.is_gpr());
|
||||
assert(offset >= INT8_MIN && offset <= INT8_MAX);
|
||||
Instruction instr(0xf3);
|
||||
instr.set_op2(0x0f);
|
||||
instr.set_op3(0x10);
|
||||
instr.set_modrm_rex_sib_for_reg_reg_disp(xmm_dest.hw_id(), 1, base.hw_id(), false);
|
||||
instr.set(Imm(1, offset));
|
||||
instr.swap_op0_rex();
|
||||
return instr;
|
||||
}
|
||||
|
||||
static Instruction load_goal_xmm32(Register xmm_dest, Register addr, Register off, s64 offset) {
|
||||
if (offset == 0) {
|
||||
return load32_xmm32_gpr64_plus_gpr64(xmm_dest, addr, off);
|
||||
@ -1009,6 +1091,30 @@ class IGen {
|
||||
}
|
||||
}
|
||||
|
||||
static Instruction store_reg_offset_xmm32(Register base, Register xmm_value, s64 offset) {
|
||||
assert(base.is_gpr());
|
||||
assert(xmm_value.is_xmm());
|
||||
if (offset >= INT8_MIN && offset <= INT8_MAX) {
|
||||
return store32_xmm32_gpr64_plus_s8(base, xmm_value, offset);
|
||||
} else if (offset >= INT32_MIN && offset <= INT32_MAX) {
|
||||
return store32_xmm32_gpr64_plus_s32(base, xmm_value, offset);
|
||||
} else {
|
||||
assert(false);
|
||||
}
|
||||
}
|
||||
|
||||
static Instruction load_reg_offset_xmm32(Register xmm_dest, Register base, s64 offset) {
|
||||
assert(base.is_gpr());
|
||||
assert(xmm_dest.is_xmm());
|
||||
if (offset >= INT8_MIN && offset <= INT8_MAX) {
|
||||
return load32_xmm32_gpr64_plus_s8(xmm_dest, base, offset);
|
||||
} else if (offset >= INT32_MIN && offset <= INT32_MAX) {
|
||||
return load32_xmm32_gpr64_plus_s32(xmm_dest, base, offset);
|
||||
} else {
|
||||
assert(false);
|
||||
}
|
||||
}
|
||||
|
||||
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
// LOADS n' STORES - XMM128
|
||||
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
@ -1019,8 +1125,8 @@ class IGen {
|
||||
static Instruction store128_gpr64_xmm128(Register gpr_addr, Register xmm_value) {
|
||||
assert(gpr_addr.is_gpr());
|
||||
assert(xmm_value.is_xmm());
|
||||
// Instruction instr(0x66);
|
||||
Instruction instr(0xf3);
|
||||
Instruction instr(0x66);
|
||||
// Instruction instr(0xf3);
|
||||
instr.set_op2(0x0f);
|
||||
instr.set_op3(0x7f);
|
||||
instr.set_modrm_and_rex_for_reg_addr(xmm_value.hw_id(), gpr_addr.hw_id(), false);
|
||||
@ -1028,11 +1134,39 @@ class IGen {
|
||||
return instr;
|
||||
}
|
||||
|
||||
static Instruction store128_gpr64_xmm128_s32(Register gpr_addr, Register xmm_value, s64 offset) {
|
||||
assert(gpr_addr.is_gpr());
|
||||
assert(xmm_value.is_xmm());
|
||||
assert(offset >= INT32_MIN && offset <= INT32_MAX);
|
||||
Instruction instr(0x66);
|
||||
// Instruction instr(0xf3);
|
||||
instr.set_op2(0x0f);
|
||||
instr.set_op3(0x7f);
|
||||
instr.set_modrm_rex_sib_for_reg_reg_disp(xmm_value.hw_id(), 2, gpr_addr.hw_id(), false);
|
||||
instr.set(Imm(4, offset));
|
||||
instr.swap_op0_rex();
|
||||
return instr;
|
||||
}
|
||||
|
||||
static Instruction store128_gpr64_xmm128_s8(Register gpr_addr, Register xmm_value, s64 offset) {
|
||||
assert(gpr_addr.is_gpr());
|
||||
assert(xmm_value.is_xmm());
|
||||
assert(offset >= INT8_MIN && offset <= INT8_MAX);
|
||||
Instruction instr(0x66);
|
||||
// Instruction instr(0xf3);
|
||||
instr.set_op2(0x0f);
|
||||
instr.set_op3(0x7f);
|
||||
instr.set_modrm_rex_sib_for_reg_reg_disp(xmm_value.hw_id(), 1, gpr_addr.hw_id(), false);
|
||||
instr.set(Imm(1, offset));
|
||||
instr.swap_op0_rex();
|
||||
return instr;
|
||||
}
|
||||
|
||||
static Instruction load128_xmm128_gpr64(Register xmm_dest, Register gpr_addr) {
|
||||
assert(gpr_addr.is_gpr());
|
||||
assert(xmm_dest.is_xmm());
|
||||
// Instruction instr(0x66);
|
||||
Instruction instr(0xf3);
|
||||
Instruction instr(0x66);
|
||||
// Instruction instr(0xf3);
|
||||
instr.set_op2(0x0f);
|
||||
instr.set_op3(0x6f);
|
||||
instr.set_modrm_and_rex_for_reg_addr(xmm_dest.hw_id(), gpr_addr.hw_id(), false);
|
||||
@ -1040,6 +1174,58 @@ class IGen {
|
||||
return instr;
|
||||
}
|
||||
|
||||
static Instruction load128_xmm128_gpr64_s32(Register xmm_dest, Register gpr_addr, s64 offset) {
|
||||
assert(gpr_addr.is_gpr());
|
||||
assert(xmm_dest.is_xmm());
|
||||
assert(offset >= INT32_MIN && offset <= INT32_MAX);
|
||||
Instruction instr(0x66);
|
||||
// Instruction instr(0xf3);
|
||||
instr.set_op2(0x0f);
|
||||
instr.set_op3(0x6f);
|
||||
instr.set_modrm_rex_sib_for_reg_reg_disp(xmm_dest.hw_id(), 2, gpr_addr.hw_id(), false);
|
||||
instr.set(Imm(4, offset));
|
||||
instr.swap_op0_rex();
|
||||
return instr;
|
||||
}
|
||||
|
||||
static Instruction load128_xmm128_gpr64_s8(Register xmm_dest, Register gpr_addr, s64 offset) {
|
||||
assert(gpr_addr.is_gpr());
|
||||
assert(xmm_dest.is_xmm());
|
||||
assert(offset >= INT8_MIN && offset <= INT8_MAX);
|
||||
Instruction instr(0x66);
|
||||
// Instruction instr(0xf3);
|
||||
instr.set_op2(0x0f);
|
||||
instr.set_op3(0x6f);
|
||||
instr.set_modrm_rex_sib_for_reg_reg_disp(xmm_dest.hw_id(), 1, gpr_addr.hw_id(), false);
|
||||
instr.set(Imm(1, offset));
|
||||
instr.swap_op0_rex();
|
||||
return instr;
|
||||
}
|
||||
|
||||
static Instruction load128_xmm128_reg_offset(Register xmm_dest, Register base, s64 offset) {
|
||||
if (offset == 0) {
|
||||
return load128_xmm128_gpr64(xmm_dest, base);
|
||||
} else if (offset >= INT8_MIN && offset <= INT8_MAX) {
|
||||
return load128_xmm128_gpr64_s8(xmm_dest, base, offset);
|
||||
} else if (offset >= INT32_MIN && offset <= INT32_MAX) {
|
||||
return load128_xmm128_gpr64_s32(xmm_dest, base, offset);
|
||||
} else {
|
||||
assert(false);
|
||||
}
|
||||
}
|
||||
|
||||
static Instruction store128_xmm128_reg_offset(Register base, Register xmm_val, s64 offset) {
|
||||
if (offset == 0) {
|
||||
return store128_gpr64_xmm128(base, xmm_val);
|
||||
} else if (offset >= INT8_MIN && offset <= INT8_MAX) {
|
||||
return store128_gpr64_xmm128_s8(base, xmm_val, offset);
|
||||
} else if (offset >= INT32_MIN && offset <= INT32_MAX) {
|
||||
return store128_gpr64_xmm128_s32(base, xmm_val, offset);
|
||||
} else {
|
||||
assert(false);
|
||||
}
|
||||
}
|
||||
|
||||
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
// RIP loads and stores
|
||||
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
@ -1231,7 +1417,7 @@ class IGen {
|
||||
assert(dst_reg.is_gpr());
|
||||
assert(src_reg.is_gpr());
|
||||
Instruction instr(0x8b);
|
||||
instr.set_modrm_rex_sib_for_reg_reg_disp32(dst_reg.hw_id(), 2, src_reg.hw_id(), true);
|
||||
instr.set_modrm_rex_sib_for_reg_reg_disp(dst_reg.hw_id(), 2, src_reg.hw_id(), true);
|
||||
instr.set_disp(Imm(4, offset));
|
||||
return instr;
|
||||
}
|
||||
@ -1243,7 +1429,7 @@ class IGen {
|
||||
assert(addr.is_gpr());
|
||||
assert(value.is_gpr());
|
||||
Instruction instr(0x89);
|
||||
instr.set_modrm_rex_sib_for_reg_reg_disp32(value.hw_id(), 2, addr.hw_id(), true);
|
||||
instr.set_modrm_rex_sib_for_reg_reg_disp(value.hw_id(), 2, addr.hw_id(), true);
|
||||
instr.set_disp(Imm(4, offset));
|
||||
return instr;
|
||||
}
|
||||
|
@ -839,9 +839,9 @@ struct Instruction {
|
||||
}
|
||||
|
||||
/*!
|
||||
* Set up modrm and rex for the commonly used 32-bit immediate displacement indexing mode.
|
||||
* Set up modrm and rex for the commonly used immediate displacement indexing mode.
|
||||
*/
|
||||
void set_modrm_rex_sib_for_reg_reg_disp32(uint8_t reg, uint8_t mod, uint8_t rm, bool rex_w) {
|
||||
void set_modrm_rex_sib_for_reg_reg_disp(uint8_t reg, uint8_t mod, uint8_t rm, bool rex_w) {
|
||||
ModRM modrm;
|
||||
|
||||
bool rex_r = false;
|
||||
|
@ -435,12 +435,12 @@ bool can_var_be_assigned(int var,
|
||||
if (move_eliminator) {
|
||||
if (enable_fancy_coloring) {
|
||||
if (lr.dies_next_at_instr(instr) && other_lr.becomes_live_at_instr(instr) &&
|
||||
in.instructions.at(instr).is_move) {
|
||||
(allow_read_write_same_reg || in.instructions.at(instr).is_move)) {
|
||||
allowed_by_move_eliminator = true;
|
||||
}
|
||||
|
||||
if (lr.becomes_live_at_instr(instr) && other_lr.dies_next_at_instr(instr) &&
|
||||
in.instructions.at(instr).is_move) {
|
||||
(allow_read_write_same_reg || in.instructions.at(instr).is_move)) {
|
||||
allowed_by_move_eliminator = true;
|
||||
}
|
||||
} else {
|
||||
@ -456,7 +456,7 @@ bool can_var_be_assigned(int var,
|
||||
}
|
||||
|
||||
if (!allowed_by_move_eliminator) {
|
||||
if (debug_trace >= 2) {
|
||||
if (debug_trace >= 1) {
|
||||
printf("at idx %d, %s conflicts\n", instr, other_lr.print_assignment().c_str());
|
||||
}
|
||||
|
||||
@ -470,7 +470,7 @@ bool can_var_be_assigned(int var,
|
||||
for (int instr = lr.min + 1; instr <= lr.max - 1; instr++) {
|
||||
for (auto clobber : in.instructions.at(instr).clobber) {
|
||||
if (ass.occupies_reg(clobber)) {
|
||||
if (debug_trace >= 2) {
|
||||
if (debug_trace >= 1) {
|
||||
printf("at idx %d clobber\n", instr);
|
||||
}
|
||||
|
||||
@ -482,7 +482,7 @@ bool can_var_be_assigned(int var,
|
||||
for (int instr = lr.min; instr <= lr.max; instr++) {
|
||||
for (auto exclusive : in.instructions.at(instr).exclude) {
|
||||
if (ass.occupies_reg(exclusive)) {
|
||||
if (debug_trace >= 2) {
|
||||
if (debug_trace >= 1) {
|
||||
printf("at idx %d exclusive conflict\n", instr);
|
||||
}
|
||||
|
||||
@ -495,7 +495,7 @@ bool can_var_be_assigned(int var,
|
||||
for (int instr = lr.min; instr <= lr.max; instr++) {
|
||||
if (lr.has_constraint && lr.assignment.at(instr - lr.min).is_assigned()) {
|
||||
if (!(ass.occupies_same_reg(lr.assignment.at(instr - lr.min)))) {
|
||||
if (debug_trace >= 2) {
|
||||
if (debug_trace >= 1) {
|
||||
printf("at idx %d self bad (%s) (%s)\n", instr,
|
||||
lr.assignment.at(instr - lr.min).to_string().c_str(), ass.to_string().c_str());
|
||||
}
|
||||
@ -526,12 +526,12 @@ bool assignment_ok_at(int var,
|
||||
if (move_eliminator) {
|
||||
if (enable_fancy_coloring) {
|
||||
if (lr.dies_next_at_instr(idx) && other_lr.becomes_live_at_instr(idx) &&
|
||||
in.instructions.at(idx).is_move) {
|
||||
(allow_read_write_same_reg || in.instructions.at(idx).is_move)) {
|
||||
allowed_by_move_eliminator = true;
|
||||
}
|
||||
|
||||
if (lr.becomes_live_at_instr(idx) && other_lr.dies_next_at_instr(idx) &&
|
||||
in.instructions.at(idx).is_move) {
|
||||
(allow_read_write_same_reg || in.instructions.at(idx).is_move)) {
|
||||
allowed_by_move_eliminator = true;
|
||||
}
|
||||
} else {
|
||||
@ -668,6 +668,7 @@ bool try_spill_coloring(int var, RegAllocCache* cache, const AllocationInput& in
|
||||
for (int instr = lr.min; instr <= lr.max; instr++) {
|
||||
// bonus_instructions.at(instr).clear();
|
||||
StackOp::Op bonus;
|
||||
bonus.reg_class = cache->iregs.at(var).reg_class;
|
||||
|
||||
// we may have a constaint in here
|
||||
auto& current_assignment = lr.assignment.at(instr - lr.min);
|
||||
@ -821,15 +822,15 @@ bool do_allocation_for_var(int var,
|
||||
auto& first_instr = in.instructions.at(lr.min);
|
||||
auto& last_instr = in.instructions.at(lr.max);
|
||||
|
||||
if (first_instr.is_move) {
|
||||
auto& possible_coloring = cache->live_ranges.at(first_instr.read.front().id).get(lr.min);
|
||||
if (!colored && last_instr.is_move) {
|
||||
auto& possible_coloring = cache->live_ranges.at(last_instr.write.front().id).get(lr.max);
|
||||
if (possible_coloring.is_assigned() && in_vec(all_reg_order, possible_coloring.reg)) {
|
||||
colored = try_assignment_for_var(var, possible_coloring, cache, in, debug_trace);
|
||||
}
|
||||
}
|
||||
|
||||
if (!colored && last_instr.is_move) {
|
||||
auto& possible_coloring = cache->live_ranges.at(last_instr.write.front().id).get(lr.max);
|
||||
if (!colored && first_instr.is_move) {
|
||||
auto& possible_coloring = cache->live_ranges.at(first_instr.read.front().id).get(lr.min);
|
||||
if (possible_coloring.is_assigned() && in_vec(all_reg_order, possible_coloring.reg)) {
|
||||
colored = try_assignment_for_var(var, possible_coloring, cache, in, debug_trace);
|
||||
}
|
||||
|
@ -12,6 +12,7 @@ struct StackOp {
|
||||
struct Op {
|
||||
int slot = -1;
|
||||
emitter::Register reg;
|
||||
RegClass reg_class = RegClass::INVALID;
|
||||
bool load = false; // load from reg before instruction?
|
||||
bool store = false; // store into reg after instruction?
|
||||
};
|
||||
@ -47,6 +48,8 @@ constexpr bool enable_fancy_coloring = true;
|
||||
// will attempt to allocate in a way to reduce the number of moves.
|
||||
constexpr bool move_eliminator = true;
|
||||
|
||||
constexpr bool allow_read_write_same_reg = true;
|
||||
|
||||
// Indication of where a variable is live and what assignment it has at each point in the range.
|
||||
struct LiveInfo {
|
||||
public:
|
||||
|
52
test/goalc/source_templates/with_game/test-xmm-spill.gc
Normal file
52
test/goalc/source_templates/with_game/test-xmm-spill.gc
Normal file
@ -0,0 +1,52 @@
|
||||
(defun force-xmm-spill ()
|
||||
(let ((v00 1.0)
|
||||
(v01 2.0)
|
||||
(v02 3.0)
|
||||
(v03 4.0)
|
||||
(v04 5.0)
|
||||
(v05 6.0)
|
||||
(v06 7.0)
|
||||
(v07 8.0)
|
||||
(v08 9.0)
|
||||
(v09 10.0)
|
||||
(v10 11.0)
|
||||
(v11 12.0)
|
||||
(v12 13.0)
|
||||
(v13 14.0)
|
||||
(v14 15.0)
|
||||
(v15 16.0)
|
||||
(v16 17.0)
|
||||
(v17 18.0)
|
||||
(v18 19.0)
|
||||
(v19 20.0)
|
||||
(v20 21.0)
|
||||
(v21 22.0)
|
||||
)
|
||||
(+ v00
|
||||
v01
|
||||
v02
|
||||
v03
|
||||
v04
|
||||
v05
|
||||
v06
|
||||
v07
|
||||
v08
|
||||
v09
|
||||
v10
|
||||
v11
|
||||
v12
|
||||
v13
|
||||
v14
|
||||
v15
|
||||
v16
|
||||
v17
|
||||
v18
|
||||
v19
|
||||
v20
|
||||
v21
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
(format #t "~f~%" (force-xmm-spill))
|
||||
0
|
@ -357,6 +357,10 @@ TEST_F(WithGameTests, VFLoadStatic) {
|
||||
runner.run_static_test(env, testCategory, "test-load-static-vector.gc", {"5.3000\n0\n"});
|
||||
}
|
||||
|
||||
TEST_F(WithGameTests, XMMSpill) {
|
||||
runner.run_static_test(env, testCategory, "test-xmm-spill.gc", {"253.0000\n0\n"});
|
||||
}
|
||||
|
||||
TEST(TypeConsistency, TypeConsistency) {
|
||||
Compiler compiler;
|
||||
compiler.enable_throw_on_redefines();
|
||||
|
@ -57,68 +57,68 @@ TEST(CodeTester, xmm_store_128) {
|
||||
// movdqa [r14], xmm3
|
||||
// movdqa [rbx], xmm14
|
||||
// movdqa [r14], xmm13
|
||||
// tester.emit(IGen::store128_gpr64_xmm128(RBX, XMM3));
|
||||
// tester.emit(IGen::store128_gpr64_xmm128(R14, XMM3));
|
||||
// tester.emit(IGen::store128_gpr64_xmm128(RBX, XMM14));
|
||||
// tester.emit(IGen::store128_gpr64_xmm128(R14, XMM13));
|
||||
// EXPECT_EQ(tester.dump_to_hex_string(),
|
||||
// "66 0f 7f 1b 66 41 0f 7f 1e 66 44 0f 7f 33 66 45 0f 7f 2e");
|
||||
//
|
||||
// tester.clear();
|
||||
// tester.emit(IGen::store128_gpr64_xmm128(RSP, XMM1));
|
||||
// EXPECT_EQ(tester.dump_to_hex_string(), "66 0f 7f 0c 24"); // requires SIB byte.
|
||||
//
|
||||
// tester.clear();
|
||||
// tester.emit(IGen::store128_gpr64_xmm128(R12, XMM13));
|
||||
// EXPECT_EQ(tester.dump_to_hex_string(), "66 45 0f 7f 2c 24"); // requires SIB byte and REX
|
||||
// byte
|
||||
//
|
||||
// tester.clear();
|
||||
// tester.emit(IGen::store128_gpr64_xmm128(RBP, XMM1));
|
||||
// EXPECT_EQ(tester.dump_to_hex_string(), "66 0f 7f 4d 00");
|
||||
//
|
||||
// tester.clear();
|
||||
// tester.emit(IGen::store128_gpr64_xmm128(RBP, XMM11));
|
||||
// EXPECT_EQ(tester.dump_to_hex_string(), "66 44 0f 7f 5d 00");
|
||||
//
|
||||
// tester.clear();
|
||||
// tester.emit(IGen::store128_gpr64_xmm128(R13, XMM2));
|
||||
// EXPECT_EQ(tester.dump_to_hex_string(), "66 41 0f 7f 55 00");
|
||||
//
|
||||
// tester.clear();
|
||||
// tester.emit(IGen::store128_gpr64_xmm128(R13, XMM12));
|
||||
// EXPECT_EQ(tester.dump_to_hex_string(), "66 45 0f 7f 65 00");
|
||||
|
||||
tester.emit(IGen::store128_gpr64_xmm128(RBX, XMM3));
|
||||
tester.emit(IGen::store128_gpr64_xmm128(R14, XMM3));
|
||||
tester.emit(IGen::store128_gpr64_xmm128(RBX, XMM14));
|
||||
tester.emit(IGen::store128_gpr64_xmm128(R14, XMM13));
|
||||
EXPECT_EQ(tester.dump_to_hex_string(),
|
||||
"f3 0f 7f 1b f3 41 0f 7f 1e f3 44 0f 7f 33 f3 45 0f 7f 2e");
|
||||
"66 0f 7f 1b 66 41 0f 7f 1e 66 44 0f 7f 33 66 45 0f 7f 2e");
|
||||
|
||||
tester.clear();
|
||||
tester.emit(IGen::store128_gpr64_xmm128(RSP, XMM1));
|
||||
EXPECT_EQ(tester.dump_to_hex_string(), "f3 0f 7f 0c 24"); // requires SIB byte.
|
||||
EXPECT_EQ(tester.dump_to_hex_string(), "66 0f 7f 0c 24"); // requires SIB byte.
|
||||
|
||||
tester.clear();
|
||||
tester.emit(IGen::store128_gpr64_xmm128(R12, XMM13));
|
||||
EXPECT_EQ(tester.dump_to_hex_string(), "f3 45 0f 7f 2c 24"); // requires SIB byte and REX byte
|
||||
EXPECT_EQ(tester.dump_to_hex_string(), "66 45 0f 7f 2c 24"); // requires SIB byte and REX byte
|
||||
|
||||
tester.clear();
|
||||
tester.emit(IGen::store128_gpr64_xmm128(RBP, XMM1));
|
||||
EXPECT_EQ(tester.dump_to_hex_string(), "f3 0f 7f 4d 00");
|
||||
EXPECT_EQ(tester.dump_to_hex_string(), "66 0f 7f 4d 00");
|
||||
|
||||
tester.clear();
|
||||
tester.emit(IGen::store128_gpr64_xmm128(RBP, XMM11));
|
||||
EXPECT_EQ(tester.dump_to_hex_string(), "f3 44 0f 7f 5d 00");
|
||||
EXPECT_EQ(tester.dump_to_hex_string(), "66 44 0f 7f 5d 00");
|
||||
|
||||
tester.clear();
|
||||
tester.emit(IGen::store128_gpr64_xmm128(R13, XMM2));
|
||||
EXPECT_EQ(tester.dump_to_hex_string(), "f3 41 0f 7f 55 00");
|
||||
EXPECT_EQ(tester.dump_to_hex_string(), "66 41 0f 7f 55 00");
|
||||
|
||||
tester.clear();
|
||||
tester.emit(IGen::store128_gpr64_xmm128(R13, XMM12));
|
||||
EXPECT_EQ(tester.dump_to_hex_string(), "f3 45 0f 7f 65 00");
|
||||
EXPECT_EQ(tester.dump_to_hex_string(), "66 45 0f 7f 65 00");
|
||||
|
||||
// tester.emit(IGen::store128_gpr64_xmm128(RBX, XMM3));
|
||||
// tester.emit(IGen::store128_gpr64_xmm128(R14, XMM3));
|
||||
// tester.emit(IGen::store128_gpr64_xmm128(RBX, XMM14));
|
||||
// tester.emit(IGen::store128_gpr64_xmm128(R14, XMM13));
|
||||
// EXPECT_EQ(tester.dump_to_hex_string(),
|
||||
// "f3 0f 7f 1b f3 41 0f 7f 1e f3 44 0f 7f 33 f3 45 0f 7f 2e");
|
||||
//
|
||||
// tester.clear();
|
||||
// tester.emit(IGen::store128_gpr64_xmm128(RSP, XMM1));
|
||||
// EXPECT_EQ(tester.dump_to_hex_string(), "f3 0f 7f 0c 24"); // requires SIB byte.
|
||||
//
|
||||
// tester.clear();
|
||||
// tester.emit(IGen::store128_gpr64_xmm128(R12, XMM13));
|
||||
// EXPECT_EQ(tester.dump_to_hex_string(), "f3 45 0f 7f 2c 24"); // requires SIB byte and REX
|
||||
// byte
|
||||
//
|
||||
// tester.clear();
|
||||
// tester.emit(IGen::store128_gpr64_xmm128(RBP, XMM1));
|
||||
// EXPECT_EQ(tester.dump_to_hex_string(), "f3 0f 7f 4d 00");
|
||||
//
|
||||
// tester.clear();
|
||||
// tester.emit(IGen::store128_gpr64_xmm128(RBP, XMM11));
|
||||
// EXPECT_EQ(tester.dump_to_hex_string(), "f3 44 0f 7f 5d 00");
|
||||
//
|
||||
// tester.clear();
|
||||
// tester.emit(IGen::store128_gpr64_xmm128(R13, XMM2));
|
||||
// EXPECT_EQ(tester.dump_to_hex_string(), "f3 41 0f 7f 55 00");
|
||||
//
|
||||
// tester.clear();
|
||||
// tester.emit(IGen::store128_gpr64_xmm128(R13, XMM12));
|
||||
// EXPECT_EQ(tester.dump_to_hex_string(), "f3 45 0f 7f 65 00");
|
||||
}
|
||||
|
||||
TEST(CodeTester, sub_gpr64_imm8) {
|
||||
@ -147,67 +147,67 @@ TEST(CodeTester, xmm_load_128) {
|
||||
CodeTester tester;
|
||||
tester.init_code_buffer(256);
|
||||
|
||||
tester.emit(IGen::load128_xmm128_gpr64(XMM3, RBX));
|
||||
tester.emit(IGen::load128_xmm128_gpr64(XMM3, R14));
|
||||
tester.emit(IGen::load128_xmm128_gpr64(XMM14, RBX));
|
||||
tester.emit(IGen::load128_xmm128_gpr64(XMM13, R14));
|
||||
EXPECT_EQ(tester.dump_to_hex_string(),
|
||||
"f3 0f 6f 1b f3 41 0f 6f 1e f3 44 0f 6f 33 f3 45 0f 6f 2e");
|
||||
|
||||
tester.clear();
|
||||
tester.emit(IGen::load128_xmm128_gpr64(XMM1, RSP));
|
||||
EXPECT_EQ(tester.dump_to_hex_string(), "f3 0f 6f 0c 24"); // requires SIB byte.
|
||||
|
||||
tester.clear();
|
||||
tester.emit(IGen::load128_xmm128_gpr64(XMM13, R12));
|
||||
EXPECT_EQ(tester.dump_to_hex_string(), "f3 45 0f 6f 2c 24"); // requires SIB byte and REX byte
|
||||
|
||||
tester.clear();
|
||||
tester.emit(IGen::load128_xmm128_gpr64(XMM1, RBP));
|
||||
EXPECT_EQ(tester.dump_to_hex_string(), "f3 0f 6f 4d 00");
|
||||
|
||||
tester.clear();
|
||||
tester.emit(IGen::load128_xmm128_gpr64(XMM11, RBP));
|
||||
EXPECT_EQ(tester.dump_to_hex_string(), "f3 44 0f 6f 5d 00");
|
||||
|
||||
tester.clear();
|
||||
tester.emit(IGen::load128_xmm128_gpr64(XMM2, R13));
|
||||
EXPECT_EQ(tester.dump_to_hex_string(), "f3 41 0f 6f 55 00");
|
||||
|
||||
tester.clear();
|
||||
tester.emit(IGen::load128_xmm128_gpr64(XMM12, R13));
|
||||
EXPECT_EQ(tester.dump_to_hex_string(), "f3 45 0f 6f 65 00");
|
||||
// tester.emit(IGen::load128_xmm128_gpr64(XMM3, RBX));
|
||||
// tester.emit(IGen::load128_xmm128_gpr64(XMM3, R14));
|
||||
// tester.emit(IGen::load128_xmm128_gpr64(XMM14, RBX));
|
||||
// tester.emit(IGen::load128_xmm128_gpr64(XMM13, R14));
|
||||
// EXPECT_EQ(tester.dump_to_hex_string(),
|
||||
// "66 0f 6f 1b 66 41 0f 6f 1e 66 44 0f 6f 33 66 45 0f 6f 2e");
|
||||
// "f3 0f 6f 1b f3 41 0f 6f 1e f3 44 0f 6f 33 f3 45 0f 6f 2e");
|
||||
//
|
||||
// tester.clear();
|
||||
// tester.emit(IGen::load128_xmm128_gpr64(XMM1, RSP));
|
||||
// EXPECT_EQ(tester.dump_to_hex_string(), "66 0f 6f 0c 24"); // requires SIB byte.
|
||||
// EXPECT_EQ(tester.dump_to_hex_string(), "f3 0f 6f 0c 24"); // requires SIB byte.
|
||||
//
|
||||
// tester.clear();
|
||||
// tester.emit(IGen::load128_xmm128_gpr64(XMM13, R12));
|
||||
// EXPECT_EQ(tester.dump_to_hex_string(), "66 45 0f 6f 2c 24"); // requires SIB byte and REX
|
||||
// EXPECT_EQ(tester.dump_to_hex_string(), "f3 45 0f 6f 2c 24"); // requires SIB byte and REX
|
||||
// byte
|
||||
//
|
||||
// tester.clear();
|
||||
// tester.emit(IGen::load128_xmm128_gpr64(XMM1, RBP));
|
||||
// EXPECT_EQ(tester.dump_to_hex_string(), "66 0f 6f 4d 00");
|
||||
// EXPECT_EQ(tester.dump_to_hex_string(), "f3 0f 6f 4d 00");
|
||||
//
|
||||
// tester.clear();
|
||||
// tester.emit(IGen::load128_xmm128_gpr64(XMM11, RBP));
|
||||
// EXPECT_EQ(tester.dump_to_hex_string(), "66 44 0f 6f 5d 00");
|
||||
// EXPECT_EQ(tester.dump_to_hex_string(), "f3 44 0f 6f 5d 00");
|
||||
//
|
||||
// tester.clear();
|
||||
// tester.emit(IGen::load128_xmm128_gpr64(XMM2, R13));
|
||||
// EXPECT_EQ(tester.dump_to_hex_string(), "66 41 0f 6f 55 00");
|
||||
// EXPECT_EQ(tester.dump_to_hex_string(), "f3 41 0f 6f 55 00");
|
||||
//
|
||||
// tester.clear();
|
||||
// tester.emit(IGen::load128_xmm128_gpr64(XMM12, R13));
|
||||
// EXPECT_EQ(tester.dump_to_hex_string(), "66 45 0f 6f 65 00");
|
||||
// EXPECT_EQ(tester.dump_to_hex_string(), "f3 45 0f 6f 65 00");
|
||||
tester.emit(IGen::load128_xmm128_gpr64(XMM3, RBX));
|
||||
tester.emit(IGen::load128_xmm128_gpr64(XMM3, R14));
|
||||
tester.emit(IGen::load128_xmm128_gpr64(XMM14, RBX));
|
||||
tester.emit(IGen::load128_xmm128_gpr64(XMM13, R14));
|
||||
EXPECT_EQ(tester.dump_to_hex_string(),
|
||||
"66 0f 6f 1b 66 41 0f 6f 1e 66 44 0f 6f 33 66 45 0f 6f 2e");
|
||||
|
||||
tester.clear();
|
||||
tester.emit(IGen::load128_xmm128_gpr64(XMM1, RSP));
|
||||
EXPECT_EQ(tester.dump_to_hex_string(), "66 0f 6f 0c 24"); // requires SIB byte.
|
||||
|
||||
tester.clear();
|
||||
tester.emit(IGen::load128_xmm128_gpr64(XMM13, R12));
|
||||
EXPECT_EQ(tester.dump_to_hex_string(), "66 45 0f 6f 2c 24"); // requires SIB byte and REX byte
|
||||
|
||||
tester.clear();
|
||||
tester.emit(IGen::load128_xmm128_gpr64(XMM1, RBP));
|
||||
EXPECT_EQ(tester.dump_to_hex_string(), "66 0f 6f 4d 00");
|
||||
|
||||
tester.clear();
|
||||
tester.emit(IGen::load128_xmm128_gpr64(XMM11, RBP));
|
||||
EXPECT_EQ(tester.dump_to_hex_string(), "66 44 0f 6f 5d 00");
|
||||
|
||||
tester.clear();
|
||||
tester.emit(IGen::load128_xmm128_gpr64(XMM2, R13));
|
||||
EXPECT_EQ(tester.dump_to_hex_string(), "66 41 0f 6f 55 00");
|
||||
|
||||
tester.clear();
|
||||
tester.emit(IGen::load128_xmm128_gpr64(XMM12, R13));
|
||||
EXPECT_EQ(tester.dump_to_hex_string(), "66 45 0f 6f 65 00");
|
||||
}
|
||||
|
||||
TEST(CodeTester, push_pop_xmms) {
|
||||
|
@ -3802,3 +3802,83 @@ TEST(EmitterSlow, xmm32_move) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Emitter, LEA) {
|
||||
CodeTester tester;
|
||||
tester.init_code_buffer(1024);
|
||||
tester.emit(IGen::lea_reg_plus_off(RDI, RSP, -3));
|
||||
tester.emit(IGen::lea_reg_plus_off(RDI, R12, -3));
|
||||
tester.emit(IGen::lea_reg_plus_off(R13, RSP, -3));
|
||||
tester.emit(IGen::lea_reg_plus_off(R13, R12, -3));
|
||||
tester.emit(IGen::lea_reg_plus_off(RDI, RSP, -300));
|
||||
tester.emit(IGen::lea_reg_plus_off(RDI, R12, -300));
|
||||
tester.emit(IGen::lea_reg_plus_off(R13, RSP, -300));
|
||||
tester.emit(IGen::lea_reg_plus_off(R13, R12, -300));
|
||||
EXPECT_EQ(tester.dump_to_hex_string(true),
|
||||
"488D7C24FD498D7C24FD4C8D6C24FD4D8D6C24FD488DBC24D4FEFFFF498DBC24D4FEFFFF4C8DAC24D4FEFF"
|
||||
"FF4D8DAC24D4FEFFFF");
|
||||
}
|
||||
|
||||
TEST(EmitterXMM, StackLoad32) {
|
||||
CodeTester tester;
|
||||
tester.init_code_buffer(1024);
|
||||
tester.emit(IGen::load32_xmm32_gpr64_plus_s32(XMM0 + 3, RSP, -1234));
|
||||
tester.emit(IGen::load32_xmm32_gpr64_plus_s32(XMM0 + 13, RSP, -1234));
|
||||
EXPECT_EQ(tester.dump_to_hex_string(true), "F30F109C242EFBFFFFF3440F10AC242EFBFFFF");
|
||||
}
|
||||
|
||||
TEST(EmitterXMM, StackLoad8) {
|
||||
CodeTester tester;
|
||||
tester.init_code_buffer(1024);
|
||||
tester.emit(IGen::load32_xmm32_gpr64_plus_s8(XMM0 + 3, RSP, -12));
|
||||
tester.emit(IGen::load32_xmm32_gpr64_plus_s8(XMM0 + 13, RSP, -12));
|
||||
EXPECT_EQ(tester.dump_to_hex_string(true), "F30F105C24F4F3440F106C24F4");
|
||||
}
|
||||
|
||||
TEST(EmitterXMM, StackLoadFull32) {
|
||||
CodeTester tester;
|
||||
tester.init_code_buffer(1024);
|
||||
tester.emit(IGen::load128_xmm128_gpr64_s32(XMM0 + 3, RSP, -1234));
|
||||
tester.emit(IGen::load128_xmm128_gpr64_s32(XMM0 + 13, RSP, -1234));
|
||||
EXPECT_EQ(tester.dump_to_hex_string(true), "660F6F9C242EFBFFFF66440F6FAC242EFBFFFF");
|
||||
}
|
||||
|
||||
TEST(EmitterXMM, StackLoadFull8) {
|
||||
CodeTester tester;
|
||||
tester.init_code_buffer(1024);
|
||||
tester.emit(IGen::load128_xmm128_gpr64_s8(XMM0 + 3, RSP, -12));
|
||||
tester.emit(IGen::load128_xmm128_gpr64_s8(XMM0 + 13, RSP, -12));
|
||||
EXPECT_EQ(tester.dump_to_hex_string(true), "660F6F5C24F466440F6F6C24F4");
|
||||
}
|
||||
|
||||
TEST(EmitterXMM, StackStore32) {
|
||||
CodeTester tester;
|
||||
tester.init_code_buffer(1024);
|
||||
tester.emit(IGen::store32_xmm32_gpr64_plus_s32(RSP, XMM0 + 3, -1234));
|
||||
tester.emit(IGen::store32_xmm32_gpr64_plus_s32(RSP, XMM0 + 13, -1234));
|
||||
EXPECT_EQ(tester.dump_to_hex_string(true), "F30F119C242EFBFFFFF3440F11AC242EFBFFFF");
|
||||
}
|
||||
|
||||
TEST(EmitterXMM, StackStore8) {
|
||||
CodeTester tester;
|
||||
tester.init_code_buffer(1024);
|
||||
tester.emit(IGen::store32_xmm32_gpr64_plus_s8(RSP, XMM0 + 3, -12));
|
||||
tester.emit(IGen::store32_xmm32_gpr64_plus_s8(RSP, XMM0 + 13, -12));
|
||||
EXPECT_EQ(tester.dump_to_hex_string(true), "F30F115C24F4F3440F116C24F4");
|
||||
}
|
||||
|
||||
TEST(EmitterXMM, StackStoreFull32) {
|
||||
CodeTester tester;
|
||||
tester.init_code_buffer(1024);
|
||||
tester.emit(IGen::store128_gpr64_xmm128_s32(RSP, XMM0 + 3, -1234));
|
||||
tester.emit(IGen::store128_gpr64_xmm128_s32(RSP, XMM0 + 13, -1234));
|
||||
EXPECT_EQ(tester.dump_to_hex_string(true), "660F7F9C242EFBFFFF66440F7FAC242EFBFFFF");
|
||||
}
|
||||
|
||||
TEST(EmitterXMM, StackStoreFull8) {
|
||||
CodeTester tester;
|
||||
tester.init_code_buffer(1024);
|
||||
tester.emit(IGen::store128_gpr64_xmm128_s8(RSP, XMM0 + 3, -12));
|
||||
tester.emit(IGen::store128_gpr64_xmm128_s8(RSP, XMM0 + 13, -12));
|
||||
EXPECT_EQ(tester.dump_to_hex_string(true), "660F7F5C24F466440F7F6C24F4");
|
||||
}
|
||||
|
@ -208,4 +208,4 @@ TEST(EmitterAVX, RIP) {
|
||||
tester.emit(IGen::loadvf_rip_plus_s32(XMM0 + 3, -123));
|
||||
tester.emit(IGen::loadvf_rip_plus_s32(XMM0 + 13, -123));
|
||||
EXPECT_EQ(tester.dump_to_hex_string(true), "C5F8281D85FFFFFFC578282D85FFFFFF");
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user