update before frontend rewrite.

This commit is contained in:
Mphatso Raymond Mataka
2025-03-26 20:34:06 -07:00
parent 24d5f92b76
commit 66aa17ee9e
35 changed files with 12014 additions and 10577 deletions

View File

@@ -2,6 +2,9 @@ cmake_minimum_required(VERSION 3.14)
set(PROJECT_SHARED rem)
set(CMAKE_C_COMPILER gcc)
set(CMAKE_CXX_COMPILER g++)
set(SOURCE_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/src/)
set(LIBRARY_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/libraries/)
@@ -9,10 +12,12 @@ if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release)
endif()
set(CMAKE_CXX_FLAGS "-g -Ofast -Wno-c++11-narrowing")
set(CMAKE_CXX_FLAGS "-g")
project(${PROJECT_SHARED})
add_definitions(-Ofast)
add_definitions(-Wno-c++11-narrowing)
add_definitions(-std=c++17)
add_compile_definitions(XBYAK64)

View File

@@ -32,4 +32,9 @@ abi get_abi()
result.os = get_running_os();
return result;
}
bool get_is_apple_silicon(abi abi_context)
{
return abi_context.cpu == cpu_information::arm_64 && abi_context.os == os_information::_macos;
}

View File

@@ -26,4 +26,6 @@ os_information get_running_os();
cpu_information get_running_cpu();
abi get_abi();
bool get_is_apple_silicon(abi abi_context);
#endif

View File

@@ -1,24 +1,23 @@
#include "aarch64_assembler.h"
#include "jit/jit_context.h"
#include <string.h>
#include <sys/mman.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#define ONE_MB 1 * 1024 * 1024
#define X(reg) rarma_context::X(reg)
#define SP rarma_context::SP()
#define WSP rarma_context::SP()
void assemble_aarch64_abi_caller_code(void* result_code, uint64_t* result_code_size, abi abi_information)
void create_aarch64_caller(jit_context* result, abi abi_information)
{
rarma_context c;
rarma_context::create(&c, ONE_MB);
rarma_context::sub_imm12(&c, SP, SP, 8);
//rarma_context::adds(&c, XZR(), SP(), X(1));
rarma_context::add_imm12(&c, SP, SP, 8);
*result_code_size = c.memory_location;
memcpy(result_code, c.memory_block, *result_code_size);
growing_jit_cache::append_code(&result->jit_cache, c.memory_block, rarma_context::get_code_size(&c));
rarma_context::destroy(&c);
}

View File

@@ -6,6 +6,8 @@
#include "rarma.h"
void assemble_aarch64_abi_caller_code(void* result_code, uint64_t* result_code_size, abi abi_information);
struct jit_context;
void create_aarch64_caller(jit_context* result, abi abi_information);
#endif

View File

@@ -1,7 +1,7 @@
#include "aarch64_pipeline.h"
#include "aarch64_pre_allocator.h"
void assemble_aarch64_pipeline(void** result_code, uint64_t* result_code_size, ir_operation_block* source_ir, bool optimize, abi working_abi, compiler_flags flags)
void assemble_aarch64_pipeline(void** result_code, uint64_t* result_code_size, ir_operation_block* source_ir, abi working_abi, compiler_flags flags)
{
arena_allocator* allocator = source_ir->allocator;

View File

@@ -5,6 +5,6 @@
#include "abi_information.h"
#include "assembly/universal_flags.h"
void assemble_aarch64_pipeline(void** result_code, uint64_t* result_code_size, ir_operation_block* source_ir, bool optimize, abi working_abi, compiler_flags flags);
void assemble_aarch64_pipeline(void** result_code, uint64_t* result_code_size, ir_operation_block* source_ir, abi working_abi, compiler_flags flags);
#endif

View File

@@ -5,6 +5,26 @@
#include <inttypes.h>
#include <iostream>
#include <assert.h>
#include <vector>
#include <string.h>
#include <sstream>
#include <iomanip>
#define create_mask(bits) ((1ULL << bits) - 1)
#define assert_in_mask(value, bits) (assert((value & ~create_mask(bits)) == 0))
#define assert_is_register(value) (assert_in_mask(value, 5))
#define assert_is_bit(value) (assert_in_mask(value, 1))
#define assert_masked_zero(value,bits) assert((value & create_mask(bits)) == 0)
#define outside_of_bits(value, bits) (value & ~create_mask(bits))
#define error() (throw 0)
#define X(index) rarma_context::reg(index, rarma_context::operand_size::size_int64)
#define W(index) rarma_context::reg(index, rarma_context::operand_size::size_int32)
#define SP() rarma_context::sp(rarma_context::operand_size::size_int64)
#define WSP() rarma_context::sp(rarma_context::operand_size::size_int32)
#define XZR() rarma_context::zr(rarma_context::operand_size::size_int64)
#define WZR() rarma_context::zr(rarma_context::operand_size::size_int32)
#define LR() X(30)
struct rarma_context
{
@@ -15,13 +35,14 @@ struct rarma_context
enum operand_type
{
zr,
sp,
gp,
vec
_zr,
_sp,
_gp,
vec,
immediate
};
enum shift_type
enum shift_option
{
lsl,
lsr,
@@ -29,121 +50,244 @@ struct rarma_context
asr
};
struct operand
enum extend_type
{
int operand_size;
int operand_register_index;
operand_type type;
uxtb,
uxth,
uxtw,
uxtx,
sxtb,
sxth,
sxtw,
sxtx
};
static uint64_t get_code_size(rarma_context* context)
enum operand_size
{
return context->memory_location;
}
size_int8,
size_int16,
size_int32,
size_int64,
size_int128,
size_null = -1
};
static operand gp_size(int index, int size, operand_type type)
struct base_operand
{
operand result;
int data;
operand_size size;
operand_type type;
result.operand_size = size;
result.operand_register_index = index;
result.type = type;
bool is_active;
};
assert_in_mask(index, 5);
struct operand
{
#define base_count 3
base_operand bases[base_count];
switch (type)
int _extend_type;
int _shift_type;
void init()
{
memset(this, 0, sizeof(operand));
_extend_type = -1;
_shift_type = -1;
}
void init_base(int index, int data, operand_size size, operand_type type)
{
case sp:
case zr:
bases[index].data = data;
bases[index].size = size;
bases[index].type = type;
bases[index].is_active = true;
}
operand()
{
init();
}
operand(int immediate)
{
init();
init_base(0, immediate, operand_size::size_null, operand_type::immediate);
}
bool is_deactivated_from(int index)
{
for (; index < base_count; ++index)
{
assert(index == 31);
assert(size >= 2 && size <= 3);
}; break;
if (!bases[index].is_active)
continue;
return false;
}
return true;
}
return result;
}
static operand X(int index)
{
return gp_size(index, 3, operand_type::gp);
}
static operand SP()
{
return gp_size(31, 3, operand_type::sp);
}
static operand W(int index)
{
return gp_size(index, 2, operand_type::gp);
}
static operand WSP()
{
return gp_size(31, 2, operand_type::sp);
}
static operand H(int index)
{
return gp_size(index, 1, operand_type::gp);
}
static operand B(int index)
{
return gp_size(index, 0, operand_type::gp);
}
static void assert_is_gp_sp(operand test)
{
if (test.operand_register_index == 31)
bool is_basic()
{
assert(test.type == sp);
if (
_extend_type != -1 ||
_shift_type != -1
)
return false;
if (!is_deactivated_from(1))
return false;
return true;
}
else
bool is_immediate(base_operand operand)
{
assert(test.type == gp);
return operand.type == rarma_context::immediate;
}
}
static void assert_is_gp_zr(operand test)
{
if (test.operand_register_index == 31)
bool is_gp_or_zr(base_operand operand)
{
assert(test.type == zr);
return operand.type == rarma_context::_gp || operand.type == rarma_context::_zr;
}
else
bool is_gp_or_sp(base_operand operand)
{
assert(test.type == gp);
return operand.type == rarma_context::_gp || operand.type == rarma_context::_sp;
}
}
static void assert_same_size(operand left, operand right)
{
assert(left.operand_size == right.operand_size);
}
bool is_sp(base_operand operand)
{
return operand.type == rarma_context::_sp;
}
static bool is_vector(operand test)
{
return test.operand_size == 4;
}
bool is_extended_shifted_immediate()
{
if (_shift_type != lsl)
return false;
static void assert_not_vector(operand test)
{
assert(!is_vector(test));
}
if (_extend_type == -1)
return false;
static void assert_w_or_x(operand test)
{
assert(test.operand_size == 2 || test.operand_size == 3);
}
base_operand base = bases[0];
base_operand shift = bases[1];
static void assert_in_mask(int imm, int bit_count)
{
assert((imm & ~create_mask(bit_count)) == 0);
}
if (!is_deactivated_from(2))
return false;
if (!is_gp_or_zr(base))
return false;
if (!is_immediate(shift))
return false;
return true;
}
bool is_shifted_immediate()
{
if (_shift_type == -1)
return false;
if (_extend_type != -1)
return false;
base_operand base = bases[0];
base_operand shift = bases[1];
if (!is_deactivated_from(2))
return false;
if (!is_gp_or_zr(base))
return false;
if (!is_immediate(shift))
return false;
return true;
}
bool is_basic_register()
{
if (!is_basic())
return false;
switch (bases[0].type)
{
case _zr:
case _sp:
case _gp:
case vec:
{
return true;
};
}
return false;
}
bool is_base_immediate()
{
if (!is_basic())
return false;
return bases[0].type == operand_type::immediate;
}
static operand shifted_operand(operand base, operand shift, shift_option type)
{
assert(base.is_basic());
assert(shift.is_basic());
operand result;
result.bases[0] = base.bases[0];
result.bases[1] = shift.bases[0];
result._shift_type = type;
return result;
}
static operand extended_operand(operand base, extend_type extend, operand shift)
{
assert_is_basic_register(base);
assert_is_basic_immediate(shift);
operand result;
result.bases[0] = base.bases[0];
result.bases[1] = shift.bases[0];
result._shift_type = lsl;
result._extend_type = extend;
return result;
}
operand operator << (operand other)
{
return shifted_operand(*this, other, shift_option::lsl);
}
operand operator >> (operand other)
{
return shifted_operand(*this, other, shift_option::lsr);
}
static operand asr(operand base, operand other)
{
return shifted_operand(base, other, shift_option::asr);
}
static operand ror(operand base, operand other)
{
return shifted_operand(base, other, shift_option::ror);
}
};
static void create(rarma_context* result, uint64_t memory_block_size = 1024)
{
@@ -158,164 +302,420 @@ struct rarma_context
free(to_destroy->memory_block);
}
static uint32_t write_instruction(rarma_context* context,uint32_t instruction)
static uint64_t get_code_size(rarma_context* context)
{
if (context->memory_location >= context->memory_block_size)
{
std::cout << "RARM OUT OF SPACE" << std::endl;
throw 0;
}
return context->memory_location;
}
static uint32_t write_instruction(rarma_context* context, uint32_t instruction)
{
*(uint32_t*)((uint64_t)context->memory_block + context->memory_location) = instruction;
context->memory_location += 4;
return instruction;
}
static int create_mask(int size)
static operand reg(int index, operand_size size)
{
return (1 << size) - 1;
operand result;
assert(index >= 0 && index < 31);
result.init_base(0, index, size, operand_type::_gp);
return result;
}
static uint32_t add_subtract_shifted(rarma_context* context, bool is_add, bool set_flags, operand d, operand n, operand m, shift_type shift, int imm6)
static operand sp(operand_size size)
{
int sf = d.operand_size == 3;
int op = !is_add;
int S = set_flags;
operand result;
result.init_base(0, 31, size, operand_type::_sp);
return result;
}
static operand zr(operand_size size)
{
operand result;
result.init_base(0, 31, size, operand_type::_zr);
return result;
}
static bool operand_is_sp(operand test)
{
return test.bases[0].type == _sp;
}
static void assert_is_gp_or_sp(operand test)
{
int register_index = test.bases[0].data;
int type = test.bases[0].type;
if (register_index != 31)
{
assert(type == _gp);
}
else
{
assert(type == _sp);
}
}
static void assert_is_basic_register(operand test, int size = -1)
{
assert(test.is_basic_register());
if (size != -1)
{
assert(test.bases[0].size == size);
}
}
static void assert_is_basic_immediate(operand test)
{
assert(test.is_base_immediate());
}
static void assert_is_gp_or_zr(operand test)
{
int register_index = test.bases[0].data;
int type = test.bases[0].type;
if (register_index != 31)
{
assert(type == _gp);
}
else
{
assert(type == _zr);
}
}
static void assert_is_gp_or_zr(std::vector<operand> test)
{
for (int i = 0; i < test.size(); ++i)
{
assert_is_gp_or_zr(test[i]);
}
}
static void assert_x_or_w(operand test)
{
base_operand base = test.bases[0];
assert(base.size == operand_size::size_int32 || base.size == operand_size::size_int64);
}
static void assert_x(operand test)
{
base_operand base = test.bases[0];
assert(base.size == operand_size::size_int64);
}
static void assert_w(operand test)
{
base_operand base = test.bases[0];
assert(base.size == operand_size::size_int32);
}
static bool operand_same_size(operand left, operand right)
{
return left.bases[0].size == right.bases[0].size;
}
static void assert_same_size(operand left, operand right)
{
assert(operand_same_size(left, right));
}
static void assert_same_size(std::vector<operand> operands)
{
operand first = operands[0];
for (int i = 1; i < operands.size(); ++i)
{
assert_same_size(first, operands[i]);
}
}
static uint32_t add_subtract_shifted(rarma_context* context, int sf, int op, int S, int rd, int rn, int rm, shift_option shift, int imm6)
{
uint32_t result = 0b01011 << 24;
assert_is_register(rd);
assert_is_register(rn);
assert_is_register(rm);
assert_is_bit(S);
assert_is_bit(op);
assert_is_bit(sf);
assert_in_mask(imm6, 6);
assert_in_mask(shift, 2);
assert_is_gp_zr(d);
assert_is_gp_zr(n);
assert_is_gp_zr(m);
uint32_t result = 0b01011 << 24;
result |= sf << 31;
result |= op << 30;
result |= S << 28;
result |= rd << 0;
result |= rn << 5;
result |= rm << 16;
result |= imm6 << 10;
result |= shift << 22;
result |= m.operand_register_index << 16;
result |= imm6 << 10;
result |= n.operand_register_index << 5;
result |= d.operand_register_index;
result |= S << 29;
result |= op << 30;
result |= sf << 31;
write_instruction(context, result);
return result;
return write_instruction(context, result);
}
static uint32_t add_subtract_imm12(rarma_context* context, bool is_add, bool set_flags, operand d, operand n, int imm12)
static uint32_t add_subtract_extended(rarma_context* context, int sf, int op, int S, int rd, int rn, int rm, extend_type option, int imm3)
{
int sf = d.operand_size == 3;
int op = !is_add;
int S = set_flags;
int sh;
if (imm12 == 0)
{
sh = 0;
}
else if (imm12 & create_mask(12))
{
assert_in_mask(imm12, 12);
uint32_t result = 0b01011001 << 21;
sh = 0;
}
else if (imm12 >> 12)
{
imm12 >>= 12;
assert_is_register(rd);
assert_is_register(rn);
assert_is_register(rm);
assert_is_bit(S);
assert_is_bit(op);
assert_is_bit(sf);
assert((imm12 & ~create_mask(12)) == 0);
assert_in_mask(option, 3);
assert_in_mask(imm3, 3);
sh = 1;
}
result |= rd << 0;
result |= rn << 5;
result |= rm << 16;
result |= option << 13;
result |= imm3 << 10;
result |= S << 29;
result |= op << 30;
result |= sf << 31;
return write_instruction(context, result);
}
static int32_t add_subtract_imm12(rarma_context* context, int sf, int op, int S, int sh, int rd, int rn, int imm12)
{
uint32_t result = 0b100010 << 23;
assert_same_size(d, n);
assert_not_vector(d);
assert_w_or_x(d);
assert_is_gp_sp(n);
if (set_flags)
if (imm12 == 0 && sh == 1)
{
assert_is_gp_zr(d);
sh = 0;
}
else
{
assert_is_gp_sp(d);
}
result |= (sf << 31);
result |= (op << 30);
result |= (S << 29);
result |= (sh << 22);
result |= d.operand_register_index;
result |= n.operand_register_index << 5;
assert_in_mask(imm12, 12);
assert_is_register(rd);
assert_is_register(rn);
assert_is_bit(sh);
assert_is_bit(S);
assert_is_bit(op);
assert_is_bit(sf);
result |= rd << 0;
result |= rn << 5;
result |= imm12 << 10;
result |= sh << 22;
result |= S << 29;
result |= op << 30;
result |= sf << 31;
write_instruction(context, result);
return write_instruction(context, result);
}
static uint32_t add_subtract(rarma_context* context, bool is_add, bool set_flags, operand d, operand n, operand m)
{
if (!(d.is_basic_register() && n.is_basic_register()))
error();
return result;
int sf = d.bases[0].size == size_int64;
int op = !is_add;
int S = set_flags;
int rd = d.bases[0].data;
int rn = n.bases[0].data;
if (m.is_base_immediate())
{
if (set_flags)
{
assert_is_gp_or_zr(d);
}
else
{
assert_is_gp_or_sp(d);
}
assert_is_gp_or_sp(n);
assert_x_or_w(d);
assert_same_size(d, n);
int imm12 = m.bases[0].data;
int sh = 0;
if (outside_of_bits(imm12, 12))
{
sh = 1;
assert_masked_zero(imm12, 12);
imm12 >>= 12;
}
if (outside_of_bits(imm12, 12))
{
error();
}
return add_subtract_imm12(context, sf, op, S, sh, rd, rn, imm12);
}
assert_is_gp_or_zr(m);
int rm = m.bases[0].data;
bool use_extended = false;
if (
m.is_extended_shifted_immediate() ||
!operand_same_size(n, m) ||
operand_is_sp(d) ||
operand_is_sp(n)
)
{
use_extended = true;
}
if (use_extended)
{
if (set_flags)
{
assert_is_gp_or_zr(d);
}
else
{
assert_is_gp_or_sp(d);
}
assert_is_gp_or_sp(n);
assert_x_or_w(d);
assert_x_or_w(m);
assert_same_size({d, n});
int option;
int shift_ammount;
if (m.is_basic_register())
{
assert_same_size(n, m);
option = uxtw + sf;
}
else if (m.is_shifted_immediate())
{
assert(m._shift_type == lsl);
shift_ammount = m.bases[1].data;
option = uxtw + sf;
}
else if (m.is_extended_shifted_immediate())
{
assert(m._shift_type == lsl);
shift_ammount = m.bases[1].data;
option = m._extend_type;
switch (option)
{
case uxtx:
case sxtx:
{
assert_x(m);
}; break;
default:
{
assert_w(m);
}; break;
}
}
else
{
error();
}
assert_in_mask(shift_ammount, 3);
return add_subtract_extended(context, sf, op, S, rd, rn, rm, (extend_type)option, shift_ammount);
}
assert_same_size({d, n, m});
assert_is_gp_or_zr({d, n, m});
shift_option shift = (shift_option)m._shift_type;
int imm6 = m.bases[1].data;
if ((int)shift == -1)
{
shift = shift_option::lsl;
assert(imm6 == 0);
}
return add_subtract_shifted(context, sf, op, S, rd, rn, rm, shift, imm6);
}
static uint32_t add_imm12(rarma_context* context, operand d, operand n, int imm12)
static uint32_t add(rarma_context* context, operand d, operand n, operand m)
{
return add_subtract_imm12(context, true, false, d, n, imm12);
return add_subtract(context, true, false, d, n, m);
}
static uint32_t sub_imm12(rarma_context* context, operand d, operand n, int imm12)
static uint32_t sub(rarma_context* context, operand d, operand n, operand m)
{
return add_subtract_imm12(context, false, false, d, n, imm12);
return add_subtract(context, false, false, d, n, m);
}
static uint32_t adds_imm12(rarma_context* context, operand d, operand n, int imm12)
static uint32_t adds(rarma_context* context, operand d, operand n, operand m)
{
return add_subtract_imm12(context, true, true, d, n, imm12);
return add_subtract(context, true, true, d, n, m);
}
static uint32_t subs_imm12(rarma_context* context, operand d, operand n, int imm12)
static uint32_t subs(rarma_context* context, operand d, operand n, operand m)
{
return add_subtract_imm12(context, false, true, d, n, imm12);
}
static uint32_t add_shifted(rarma_context* context, operand d, operand n, operand m, shift_type shift = shift_type::lsl, int imm6 = 0)
{
return add_subtract_shifted(context, true, false, d, n, m, shift, imm6);
}
static uint32_t sub_shifted(rarma_context* context, operand d, operand n, operand m, shift_type shift = shift_type::lsl, int imm6 = 0)
{
return add_subtract_shifted(context, false, false, d, n, m, shift, imm6);
}
static uint32_t adds_shifted(rarma_context* context, operand d, operand n, operand m, shift_type shift = shift_type::lsl, int imm6 = 0)
{
return add_subtract_shifted(context, true, true, d, n, m, shift, imm6);
}
static uint32_t subs_shifted(rarma_context* context, operand d, operand n, operand m, shift_type shift = shift_type::lsl, int imm6 = 0)
{
return add_subtract_shifted(context, false, true, d, n, m, shift, imm6);
return add_subtract(context, false, true, d, n, m);
}
static uint32_t ret(rarma_context* context, operand n)
{
uint32_t result = 0b1101011001011111000000 << 10;
assert_is_basic_register(n, size_int64);
assert_is_gp_zr(n);
uint32_t result = (0b1101011001011111000000 << 10) | (n.bases[0].data << 5);
result |= n.operand_register_index << 5;
return write_instruction(context, result);
}
write_instruction(context, result);
static std::string get_debug_code(rarma_context* context)
{
int instruction_count = get_code_size(context) / 4;
void* code = context->memory_block;
return result;
std::stringstream stream;
for (int i = 0; i < instruction_count; ++i)
{
int instruction = *((int*)code + i);
for (int b = 0; b < 4; ++b)
{
stream << std::hex << std::setw(2) << std::setfill('0') << ((instruction >> (b * 8)) & 255) << " ";
}
stream << std::endl;
}
return stream.str();
}
};

View File

@@ -3,11 +3,9 @@
enum compiler_flags
{
check_undefined_behavior = 1ULL << 0,
optimize_ssa = 1ULL << 1,
mathmatical_fold = optimize_ssa | 1ULL << 2,
compiler_flags_all = check_undefined_behavior | mathmatical_fold
check_undefined_behavior = 1ULL << 0,
optimize_basic_ssa = 1ULL << 1,
optimize_group_pool_ssa = 1ULL << 2
};
#endif

View File

@@ -114,7 +114,7 @@ void assemble_x86_64_code(void** result_code, uint64_t* result_code_size, ir_ope
{
arena_allocator* allocator = source_ir->allocator;
int buffer_size = ONE_MB * 10;
int buffer_size = ONE_MB * 20;
*result_code = arena_allocator::allocate_recursive(allocator, buffer_size);
Xbyak::CodeGenerator c(buffer_size, *result_code);

View File

@@ -8,8 +8,11 @@
#include "ir/undefined_behavior_check.h"
#include <iostream>
#include <fstream>
void assemble_x86_64_pipeline(void** result_code, uint64_t* result_code_size, ir_operation_block* source_ir, bool optimize, abi working_abi, compiler_flags flags)
int id = 0;
void assemble_x86_64_pipeline(void** result_code, uint64_t* result_code_size, ir_operation_block* source_ir, abi working_abi, compiler_flags flags)
{
arena_allocator* allocator = source_ir->allocator;
@@ -32,9 +35,13 @@ void assemble_x86_64_pipeline(void** result_code, uint64_t* result_code_size, ir
source_ir = undefined_behavior_checked_code;
}
if (flags & optimize_ssa)
bool use_lrsa_hints = false;
if ((flags & optimize_basic_ssa) || (flags & optimize_group_pool_ssa))
{
convert_to_ssa(source_ir, flags & mathmatical_fold);
convert_to_ssa(source_ir, flags);
use_lrsa_hints = true;
}
x86_pre_allocator_context::run_pass(&pre_allocation_data, pre_allocated_code, source_ir, working_abi.cpu,working_abi.os);
@@ -61,8 +68,25 @@ void assemble_x86_64_pipeline(void** result_code, uint64_t* result_code_size, ir
(uint32_t)pre_allocation_data.opernad_counts[0]
},
RSP(ir_operand_meta::int64)
RSP(ir_operand_meta::int64),
use_lrsa_hints
);
/*
if (flags & compiler_flags::optimize_group_pool_ssa)
{
std::ofstream str;
id++;
str.open("/media/linvirt/partish/tmp/" + std::to_string(id));
str << ir_operation_block::get_block_log(register_allocated_code);
str.close();
}
*/
assemble_x86_64_code(result_code, result_code_size, register_allocated_code);
}

View File

@@ -5,6 +5,6 @@
#include "abi_information.h"
#include "assembly/universal_flags.h"
void assemble_x86_64_pipeline(void** result_code, uint64_t* result_code_size, ir_operation_block* source_ir, bool optimize, abi working_abi, compiler_flags flags);
void assemble_x86_64_pipeline(void** result_code, uint64_t* result_code_size, ir_operation_block* source_ir, abi working_abi, compiler_flags flags);
#endif

View File

@@ -1290,6 +1290,7 @@ static void emit_pre_allocation_instruction(x86_pre_allocator_context* pre_alloc
case x86_sqrtpd:
case x86_sqrtss:
case x86_sqrtsd:
case ir_register_allocator_hint_global:
{
emit_as_is(pre_allocator_context, operation);
}; break;

View File

@@ -11,7 +11,9 @@ void aarch64_emit_context::create(guest_process* process, aarch64_emit_context*
result->translate_functions = false;
result->optimization_flags = flags;
guest_register_store::create(&result->registers, ssa, process->guest_context_offset_data.context_size);
aarch64_context_offsets offsets = *(aarch64_context_offsets*)process->guest_context_data;
guest_register_store::create(&result->registers, ssa, offsets.context_size);
}
void aarch64_emit_context::init_context(aarch64_emit_context* ctx)
@@ -23,7 +25,7 @@ void aarch64_emit_context::init_context(aarch64_emit_context* ctx)
ir_operation_block::emitds(ir, ir_get_argument, ctx->context_pointer,ir_operand::create_con(0));
aarch64_context_offsets offsets = process->guest_context_offset_data;
aarch64_context_offsets offsets = *(aarch64_context_offsets*)process->guest_context_data;
for (int i = 0; i < 32; ++i)
{
@@ -173,14 +175,18 @@ void aarch64_emit_context::emit_context_movement(aarch64_emit_context* ctx)
static int get_x_location(aarch64_emit_context* ctx, int index)
{
int x_location = ctx->process->guest_context_offset_data.x_offset + (index * 8);
aarch64_context_offsets offsets = *(aarch64_context_offsets*)ctx->process->guest_context_data;
int x_location = offsets.x_offset + (index * 8);
return x_location;
}
static int get_v_location(aarch64_emit_context* ctx, int index)
{
int v_location = ctx->process->guest_context_offset_data.q_offset + (index * 16);
aarch64_context_offsets offsets = *(aarch64_context_offsets*)ctx->process->guest_context_data;
int v_location = offsets.q_offset + (index * 16);
return v_location;
}

View File

@@ -19,7 +19,7 @@ uint64_t translate_address_interpreter(interpreter_data* ctx, uint64_t address)
uint64_t _x_interpreter(interpreter_data* ctx, uint64_t reg_id)
{
aarch64_context_offsets* offsets = &ctx->process_context->guest_context_offset_data;
aarch64_context_offsets* offsets = (aarch64_context_offsets*)ctx->process_context->guest_context_data;
assert(reg_id >= 0 && reg_id <= 32);
@@ -28,7 +28,7 @@ uint64_t _x_interpreter(interpreter_data* ctx, uint64_t reg_id)
void _x_interpreter(interpreter_data* ctx, uint64_t reg_id, uint64_t value)
{
aarch64_context_offsets* offsets = &ctx->process_context->guest_context_offset_data;
aarch64_context_offsets* offsets = (aarch64_context_offsets*)ctx->process_context->guest_context_data;
assert(reg_id >= 0 && reg_id <= 32);
@@ -37,7 +37,7 @@ void _x_interpreter(interpreter_data* ctx, uint64_t reg_id, uint64_t value)
uint64_t _sys_interpreter(interpreter_data* ctx, uint64_t reg_id)
{
aarch64_context_offsets* offsets = &ctx->process_context->guest_context_offset_data;
aarch64_context_offsets* offsets = (aarch64_context_offsets*)ctx->process_context->guest_context_data;
switch (reg_id)
{
@@ -57,7 +57,7 @@ uint64_t _sys_interpreter(interpreter_data* ctx, uint64_t reg_id)
void _sys_interpreter(interpreter_data* ctx, uint64_t reg_id, uint64_t value)
{
aarch64_context_offsets* offsets = &ctx->process_context->guest_context_offset_data;
aarch64_context_offsets* offsets = (aarch64_context_offsets*)ctx->process_context->guest_context_data;
switch (reg_id)
{
@@ -77,7 +77,7 @@ void _sys_interpreter(interpreter_data* ctx, uint64_t reg_id, uint64_t value)
uint128_t V_interpreter(interpreter_data* ctx, uint64_t reg_id)
{
aarch64_context_offsets* offsets = &ctx->process_context->guest_context_offset_data;
aarch64_context_offsets* offsets = (aarch64_context_offsets*)ctx->process_context->guest_context_data;
assert(reg_id >= 0 && reg_id <= 32);
@@ -88,7 +88,7 @@ uint128_t V_interpreter(interpreter_data* ctx, uint64_t reg_id)
void V_interpreter(interpreter_data* ctx, uint64_t reg_id, uint128_t value)
{
aarch64_context_offsets* offsets = &ctx->process_context->guest_context_offset_data;
aarch64_context_offsets* offsets = (aarch64_context_offsets*)ctx->process_context->guest_context_data;
assert(reg_id >= 0 && reg_id <= 32);
@@ -139,7 +139,7 @@ void _return_from_call_interpreter(interpreter_data* ctx, uint64_t location)
uint64_t get_vector_context_interpreter(interpreter_data* ctx)
{
aarch64_context_offsets* offsets = &ctx->process_context->guest_context_offset_data;
aarch64_context_offsets* offsets = (aarch64_context_offsets*)ctx->process_context->guest_context_data;
void* data_pointer = (char*)ctx->register_data + offsets->q_offset;

View File

@@ -35,7 +35,7 @@ void _x_jit(ssa_emit_context* ctx, uint64_t reg_id, ir_operand value)
ir_operand _sys_jit(ssa_emit_context* ctx, uint64_t reg_id)
{
aarch64_emit_context* actx = (aarch64_emit_context*)ctx->context_data;
aarch64_context_offsets offsets = actx->process->guest_context_offset_data;
aarch64_context_offsets offsets = *(aarch64_context_offsets*)actx->process->guest_context_data;
switch (reg_id)
{
@@ -56,7 +56,7 @@ ir_operand _sys_jit(ssa_emit_context* ctx, uint64_t reg_id)
void _sys_jit(ssa_emit_context* ctx, uint64_t reg_id, ir_operand value)
{
aarch64_emit_context* actx = (aarch64_emit_context*)ctx->context_data;
aarch64_context_offsets offsets = actx->process->guest_context_offset_data;
aarch64_context_offsets offsets = *(aarch64_context_offsets*)actx->process->guest_context_data;
switch (reg_id)
{
@@ -154,7 +154,9 @@ ir_operand get_vector_context_jit(ssa_emit_context* ctx)
aarch64_emit_context* actx = (aarch64_emit_context*)ctx->context_data;
guest_process* process = actx->process;
return ssa_emit_context::emit_ssa(ctx, ir_add, actx->context_pointer, ir_operand::create_con(process->guest_context_offset_data.q_offset));
aarch64_context_offsets offsets = *(aarch64_context_offsets*)actx->process->guest_context_data;
return ssa_emit_context::emit_ssa(ctx, ir_add, actx->context_pointer, ir_operand::create_con(offsets.q_offset));
}
void store_context_jit(ssa_emit_context* ctx)
@@ -267,7 +269,7 @@ uint64_t use_x86_lzcnt_jit(ssa_emit_context* ctx)
static ir_operand x86_add_subtract_set_flags_jit(ssa_emit_context* ctx,uint64_t O, ir_operand n, ir_operand m, bool is_add)
{
aarch64_emit_context* actx = (aarch64_emit_context*)ctx->context_data;
aarch64_context_offsets offsets = actx->process->guest_context_offset_data;
aarch64_context_offsets offsets = *(aarch64_context_offsets*)actx->process->guest_context_data;
ir_operand result = ssa_emit_context::create_local(ctx, O);

File diff suppressed because it is too large Load Diff

View File

@@ -93,54 +93,8 @@ static uint64_t undefined_value()
}
//INTERPRETER
void add_subtract_imm12_interpreter(interpreter_data* ctx, uint64_t sf, uint64_t op, uint64_t S, uint64_t sh, uint64_t imm12, uint64_t Rn, uint64_t Rd);
void add_subtract_shifted_interpreter(interpreter_data* ctx, uint64_t sf, uint64_t op, uint64_t S, uint64_t shift, uint64_t Rm, uint64_t imm6, uint64_t Rn, uint64_t Rd);
void add_subtract_extended_interpreter(interpreter_data* ctx, uint64_t sf, uint64_t op, uint64_t S, uint64_t Rm, uint64_t option, uint64_t imm3, uint64_t Rn, uint64_t Rd);
void add_subtract_carry_interpreter(interpreter_data* ctx, uint64_t sf, uint64_t op, uint64_t S, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void shift_variable_interpreter(interpreter_data* ctx, uint64_t sf, uint64_t Rm, uint64_t op2, uint64_t Rn, uint64_t Rd);
void multiply_with_32_interpreter(interpreter_data* ctx, uint64_t U, uint64_t Rm, uint64_t o0, uint64_t Ra, uint64_t Rn, uint64_t Rd);
void multiply_hi_interpreter(interpreter_data* ctx, uint64_t U, uint64_t Rm, uint64_t o0, uint64_t Rn, uint64_t Rd);
void multiply_additive_interpreter(interpreter_data* ctx, uint64_t sf, uint64_t Rm, uint64_t o0, uint64_t Ra, uint64_t Rn, uint64_t Rd);
void divide_interpreter(interpreter_data* ctx, uint64_t sf, uint64_t Rm, uint64_t o1, uint64_t Rn, uint64_t Rd);
uint64_t create_rbit_mask_interpreter(interpreter_data* ctx, uint64_t index);
void rbit_interpreter(interpreter_data* ctx, uint64_t sf, uint64_t Rn, uint64_t Rd);
void rev16_interpreter(interpreter_data* ctx, uint64_t sf, uint64_t Rn, uint64_t Rd);
void reverse_interpreter(interpreter_data* ctx, uint64_t sf, uint64_t opc, uint64_t Rn, uint64_t Rd);
void count_leading_interpreter(interpreter_data* ctx, uint64_t sf, uint64_t s, uint64_t Rn, uint64_t Rd);
void extr_interpreter(interpreter_data* ctx, uint64_t sf, uint64_t N, uint64_t Rm, uint64_t imms, uint64_t Rn, uint64_t Rd);
void bitfield_interpreter(interpreter_data* ctx, uint64_t sf, uint64_t opc, uint64_t N, uint64_t immr, uint64_t imms, uint64_t Rn, uint64_t Rd);
void logical_immediate_interpreter(interpreter_data* ctx, uint64_t sf, uint64_t opc, uint64_t N, uint64_t immr, uint64_t imms, uint64_t Rn, uint64_t Rd);
void logical_shifted_interpreter(interpreter_data* ctx, uint64_t sf, uint64_t opc, uint64_t shift, uint64_t N, uint64_t Rm, uint64_t imm6, uint64_t Rn, uint64_t Rd);
void conditional_select_interpreter(interpreter_data* ctx, uint64_t sf, uint64_t op, uint64_t S, uint64_t Rm, uint64_t cond, uint64_t op2, uint64_t Rn, uint64_t Rd);
void conditional_compare_interpreter(interpreter_data* ctx, uint64_t sf, uint64_t op, uint64_t Rm, uint64_t cond, uint64_t mode, uint64_t Rn, uint64_t nzcv);
void move_wide_immediate_interpreter(interpreter_data* ctx, uint64_t sf, uint64_t opc, uint64_t hw, uint64_t imm16, uint64_t Rd);
void pc_rel_addressing_interpreter(interpreter_data* ctx, uint64_t op, uint64_t immlo, uint64_t immhi, uint64_t Rd);
void branch_register_interpreter(interpreter_data* ctx, uint64_t l, uint64_t Rn);
void return_register_interpreter(interpreter_data* ctx, uint64_t Rn);
void test_bit_branch_interpreter(interpreter_data* ctx, uint64_t b5, uint64_t op, uint64_t b40, uint64_t imm14, uint64_t Rt);
void compare_and_branch_interpreter(interpreter_data* ctx, uint64_t sf, uint64_t op, uint64_t imm19, uint64_t Rt);
void b_unconditional_interpreter(interpreter_data* ctx, uint64_t op, uint64_t imm26);
void b_conditional_interpreter(interpreter_data* ctx, uint64_t imm19, uint64_t cond);
void svc_interpreter(interpreter_data* ctx, uint64_t imm16);
void msr_register_interpreter(interpreter_data* ctx, uint64_t imm15, uint64_t Rt);
void mrs_register_interpreter(interpreter_data* ctx, uint64_t imm15, uint64_t Rt);
void hints_interpreter(interpreter_data* ctx, uint64_t imm7);
void sys_interpreter(interpreter_data* ctx, uint64_t L, uint64_t imm19);
void barriers_interpreter(interpreter_data* ctx, uint64_t CRm, uint64_t op2, uint64_t Rt);
void load_store_register_post_interpreter(interpreter_data* ctx, uint64_t size, uint64_t VR, uint64_t opc, uint64_t imm9, uint64_t Rn, uint64_t Rt);
void load_store_register_pre_interpreter(interpreter_data* ctx, uint64_t size, uint64_t VR, uint64_t opc, uint64_t imm9, uint64_t Rn, uint64_t Rt);
void load_store_register_unscaled_interpreter(interpreter_data* ctx, uint64_t size, uint64_t VR, uint64_t opc, uint64_t imm9, uint64_t Rn, uint64_t Rt);
void load_store_register_pair_imm_offset_interpreter(interpreter_data* ctx, uint64_t opc, uint64_t VR, uint64_t L, uint64_t imm7, uint64_t Rt2, uint64_t Rn, uint64_t Rt);
void load_store_register_pair_imm_post_interpreter(interpreter_data* ctx, uint64_t opc, uint64_t VR, uint64_t L, uint64_t imm7, uint64_t Rt2, uint64_t Rn, uint64_t Rt);
void load_store_register_pair_imm_pre_interpreter(interpreter_data* ctx, uint64_t opc, uint64_t VR, uint64_t L, uint64_t imm7, uint64_t Rt2, uint64_t Rn, uint64_t Rt);
void load_store_register_pair_imm_interpreter(interpreter_data* ctx, uint64_t opc, uint64_t VR, uint64_t wb, uint64_t L, uint64_t imm7, uint64_t Rt2, uint64_t Rn, uint64_t Rt);
void load_store_register_imm_unsigned_interpreter(interpreter_data* ctx, uint64_t size, uint64_t VR, uint64_t opc, uint64_t imm12, uint64_t Rn, uint64_t Rt);
void load_store_register_imm_unscaled_interpreter(interpreter_data* ctx, uint64_t size, uint64_t VR, uint64_t opc, uint64_t imm9, uint64_t wb, uint64_t Rn, uint64_t Rt);
void load_store_register_offset_interpreter(interpreter_data* ctx, uint64_t size, uint64_t VR, uint64_t opc, uint64_t Rm, uint64_t option, uint64_t S, uint64_t Rn, uint64_t Rt);
void load_store_exclusive_ordered_interpreter(interpreter_data* ctx, uint64_t size, uint64_t ordered, uint64_t L, uint64_t Rs, uint64_t o0, uint64_t Rn, uint64_t Rt);
uint64_t exclusive_address_mask_interpreter(interpreter_data* ctx);
void load_exclusive_interpreter(interpreter_data* ctx, uint64_t is_exclusive, uint64_t size, uint64_t Rn, uint64_t Rt);
void store_exclusive_interpreter(interpreter_data* ctx, uint64_t is_exclusive, uint64_t size, uint64_t Rn, uint64_t Rt, uint64_t Rs);
void memory_single_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t p, uint64_t R, uint64_t Rm, uint64_t b, uint64_t size, uint64_t Rn, uint64_t Rt, uint64_t is_load, uint64_t opcode, uint64_t S);
void memory_multiple_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t Rm, uint64_t size, uint64_t Rn, uint64_t Rt, uint64_t rpt, uint64_t selem, uint64_t wback, uint64_t is_load);
uint64_t sign_extend_interpreter(interpreter_data* ctx, uint64_t source, uint64_t count);
template <typename O>
O a_shift_reg_interpreter(interpreter_data* ctx, uint64_t m, uint64_t shift_type, uint64_t ammount);
@@ -164,56 +118,6 @@ O add_subtract_carry_impl_interpreter(interpreter_data* ctx, O n, O m, uint64_t
uint32_t condition_holds_interpreter(interpreter_data* ctx, uint64_t cond);
void branch_long_universal_interpreter(interpreter_data* ctx, uint64_t Rn, uint64_t link);
uint64_t select_interpreter(interpreter_data* ctx, uint64_t condition, uint64_t yes, uint64_t no);
void dup_general_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t imm5, uint64_t Rn, uint64_t Rd);
void dup_element_scalar_interpreter(interpreter_data* ctx, uint64_t imm5, uint64_t Rn, uint64_t Rd);
void dup_element_vector_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t imm5, uint64_t Rn, uint64_t Rd);
void move_to_gp_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t imm5, uint64_t U, uint64_t Rn, uint64_t Rd);
void ins_general_interpreter(interpreter_data* ctx, uint64_t imm5, uint64_t Rn, uint64_t Rd);
void ins_element_interpreter(interpreter_data* ctx, uint64_t imm5, uint64_t imm4, uint64_t Rn, uint64_t Rd);
void movi_immediate_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t op, uint64_t immhi, uint64_t cmode, uint64_t immlo, uint64_t Rd);
void fmov_general_interpreter(interpreter_data* ctx, uint64_t sf, uint64_t ftype, uint64_t rmode, uint64_t opcode, uint64_t Rn, uint64_t Rd);
void convert_to_float_gp_interpreter(interpreter_data* ctx, uint64_t sf, uint64_t ftype, uint64_t U, uint64_t Rn, uint64_t Rd);
void convert_to_float_vector_scalar_interpreter(interpreter_data* ctx, uint64_t U, uint64_t sz, uint64_t Rn, uint64_t Rd);
void convert_to_float_vector_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t U, uint64_t sz, uint64_t Rn, uint64_t Rd);
void shl_immedaite_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t immh, uint64_t immb, uint64_t Rn, uint64_t Rd);
void sshr_vector_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t immh, uint64_t immb, uint64_t Rn, uint64_t Rd);
void shll_shll2_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t U, uint64_t immh, uint64_t immb, uint64_t Rn, uint64_t Rd);
void shrn_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t immh, uint64_t immb, uint64_t Rn, uint64_t Rd);
void rev64_vector_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t size, uint64_t Rn, uint64_t Rd);
void neg_vector_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t size, uint64_t Rn, uint64_t Rd);
void not_vector_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t Rn, uint64_t Rd);
void abs_vector_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t size, uint64_t Rn, uint64_t Rd);
void mul_vector_index_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t size, uint64_t L, uint64_t M, uint64_t Rm, uint64_t H, uint64_t Rn, uint64_t Rd);
void mul_vector_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t size, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void ext_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t Rm, uint64_t imm4, uint64_t Rn, uint64_t Rd);
void compare_above_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t U, uint64_t size, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void shl_vector_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t U, uint64_t size, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void add_vector_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t size, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void addlv_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t U, uint64_t size, uint64_t Rn, uint64_t Rd);
void cnt_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t size, uint64_t Rn, uint64_t Rd);
void orr_orn_vector_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t invert, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void bsl_vector_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void and_bic_vector_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t invert, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void eor_vector_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void xnt_xnt2_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t size, uint64_t Rn, uint64_t Rd);
void zip_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t size, uint64_t Rm, uint64_t op, uint64_t Rn, uint64_t Rd);
void trn_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t size, uint64_t Rm, uint64_t op, uint64_t Rn, uint64_t Rd);
void tbl_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t Rm, uint64_t len, uint64_t Rn, uint64_t Rd);
void ld4_st4_multiple_structures_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t L, uint64_t size, uint64_t Rn, uint64_t Rt);
void ld4_st4_multiple_structures_post_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t L, uint64_t Rm, uint64_t size, uint64_t Rn, uint64_t Rt);
void ld1r_no_offset_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t size, uint64_t Rn, uint64_t Rt);
void ld1r_post_index_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t Rm, uint64_t size, uint64_t Rn, uint64_t Rt);
void ld1_single_structure_no_offset_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t opcode, uint64_t S, uint64_t size, uint64_t Rn, uint64_t Rt);
void ld1_single_structure_post_index_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t Rm, uint64_t opcode, uint64_t S, uint64_t size, uint64_t Rn, uint64_t Rt);
void st1_multiple_structures_no_offset_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t ophi, uint64_t oplo, uint64_t size, uint64_t Rn, uint64_t Rt);
void st1_multiple_structures_post_index_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t Rm, uint64_t ophi, uint64_t oplo, uint64_t size, uint64_t Rn, uint64_t Rt);
void st2_multiple_structures_no_offset_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t size, uint64_t Rn, uint64_t Rt);
void st2_multiple_structures_post_index_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t Rm, uint64_t size, uint64_t Rn, uint64_t Rt);
void st1_single_structure_no_offset_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t opcode, uint64_t S, uint64_t size, uint64_t Rn, uint64_t Rt);
void st1_single_structure_post_index_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t Rm, uint64_t opcode, uint64_t S, uint64_t size, uint64_t Rn, uint64_t Rt);
void floating_point_conditional_select_interpreter(interpreter_data* ctx, uint64_t ftype, uint64_t Rm, uint64_t cond, uint64_t Rn, uint64_t Rd);
void fcmp_interpreter(interpreter_data* ctx, uint64_t ftype, uint64_t Rm, uint64_t Rn, uint64_t opc);
void fccmp_interpreter(interpreter_data* ctx, uint64_t ftype, uint64_t Rm, uint64_t cond, uint64_t Rn, uint64_t nzcv);
uint64_t create_mask_interpreter(interpreter_data* ctx, uint64_t bits);
uint64_t shift_left_check_interpreter(interpreter_data* ctx, uint64_t to_shift, uint64_t shift, uint64_t size);
uint64_t get_x86_rounding_mode_interpreter(interpreter_data* ctx, uint64_t rounding);
@@ -232,10 +136,6 @@ uint64_t call_float_binary_interpreter(interpreter_data* ctx, uint64_t operand1,
uint64_t call_float_unary_interpreter(interpreter_data* ctx, uint64_t operand, uint64_t fpcr, uint64_t N, uint64_t function);
void convert_to_float_interpreter(interpreter_data* ctx, uint64_t sf, uint64_t ftype, uint64_t U, uint64_t Rn, uint64_t Rd, uint64_t from_vector);
uint128_t replicate_vector_interpreter(interpreter_data* ctx, uint128_t source, uint64_t v_size, uint64_t count);
void st1_interpreter(interpreter_data* ctx, uint64_t wback, uint64_t Q, uint64_t L, uint64_t opcode, uint64_t size, uint64_t Rm, uint64_t Rn, uint64_t Rt);
void st_interpreter(interpreter_data* ctx, uint64_t wback, uint64_t Q, uint64_t L, uint64_t opcode, uint64_t size, uint64_t Rm, uint64_t Rn, uint64_t Rt);
void memory_4_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t L, uint64_t size, uint64_t Rm, uint64_t Rn, uint64_t Rt);
void memory_1_interpreter(interpreter_data* ctx, uint64_t wback, uint64_t Q, uint64_t L, uint64_t R, uint64_t Rm, uint64_t o2, uint64_t opcode, uint64_t S, uint64_t size, uint64_t Rn, uint64_t Rt, uint64_t is_load);
uint64_t bits_r_interpreter(interpreter_data* ctx, uint64_t operand, uint64_t top, uint64_t bottom);
uint64_t infinity_interpreter(interpreter_data* ctx, uint64_t sign, uint64_t N);
uint64_t float_is_nan_interpreter(interpreter_data* ctx, uint64_t operand, uint64_t N);
@@ -284,6 +184,12 @@ void floating_point_multiply_accumulate_scalar_element_interpreter(interpreter_d
void floating_point_multiply_accumulate_vector_element_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t Rd, uint64_t Rn, uint64_t Rm, uint64_t neg, uint64_t sz, uint64_t index);
void fcm_vector_interpreter(interpreter_data* ctx, uint64_t Rd, uint64_t Rn, uint64_t Rm, uint64_t mode, uint64_t Q, uint64_t sz);
uint128_t clear_vector_scalar_interpreter(interpreter_data* ctx, uint128_t working, uint64_t fltsize);
uint64_t create_rbit_mask_interpreter(interpreter_data* ctx, uint64_t index);
void load_store_register_pair_imm_interpreter(interpreter_data* ctx, uint64_t opc, uint64_t VR, uint64_t wb, uint64_t L, uint64_t imm7, uint64_t Rt2, uint64_t Rn, uint64_t Rt);
void load_store_register_imm_unscaled_interpreter(interpreter_data* ctx, uint64_t size, uint64_t VR, uint64_t opc, uint64_t imm9, uint64_t wb, uint64_t Rn, uint64_t Rt);
uint64_t exclusive_address_mask_interpreter(interpreter_data* ctx);
void load_exclusive_interpreter(interpreter_data* ctx, uint64_t is_exclusive, uint64_t size, uint64_t Rn, uint64_t Rt);
void store_exclusive_interpreter(interpreter_data* ctx, uint64_t is_exclusive, uint64_t size, uint64_t Rn, uint64_t Rt, uint64_t Rs);
uint64_t _compare_and_swap_interpreter(interpreter_data* ctx, uint64_t physical_address, uint64_t expecting, uint64_t to_swap, uint64_t size);
uint64_t compare_and_swap_interpreter(interpreter_data* ctx, uint64_t address, uint64_t expecting, uint64_t to_swap, uint64_t size);
template <typename O>
@@ -294,32 +200,64 @@ uint64_t XSP_interpreter(interpreter_data* ctx, uint64_t reg_id);
void XSP_interpreter(interpreter_data* ctx, uint64_t reg_id, uint64_t value);
uint64_t X_interpreter(interpreter_data* ctx, uint64_t reg_id);
void X_interpreter(interpreter_data* ctx, uint64_t reg_id, uint64_t value);
void ld_st_1_multiple_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t p, uint64_t L, uint64_t Rm, uint64_t opchi, uint64_t opclo, uint64_t size, uint64_t Rn, uint64_t Rt);
void ld_st_2_multiple_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t p, uint64_t L, uint64_t Rm, uint64_t size, uint64_t Rn, uint64_t Rt);
void ld_st_3_multiple_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t p, uint64_t L, uint64_t Rm, uint64_t size, uint64_t Rn, uint64_t Rt);
void ld_st_4_multiple_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t p, uint64_t L, uint64_t Rm, uint64_t size, uint64_t Rn, uint64_t Rt);
void ldXr_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t p, uint64_t R, uint64_t Rm, uint64_t b, uint64_t size, uint64_t Rn, uint64_t Rt);
void ld_st_single_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t p, uint64_t L, uint64_t R, uint64_t Rm, uint64_t opcode, uint64_t b, uint64_t S, uint64_t size, uint64_t Rn, uint64_t Rt);
void add_subtract_imm12_interpreter(interpreter_data* ctx, uint64_t sf, uint64_t op, uint64_t S, uint64_t sh, uint64_t imm12, uint64_t Rn, uint64_t Rd);
void add_subtract_shifted_interpreter(interpreter_data* ctx, uint64_t sf, uint64_t op, uint64_t S, uint64_t shift, uint64_t Rm, uint64_t imm6, uint64_t Rn, uint64_t Rd);
void add_subtract_extended_interpreter(interpreter_data* ctx, uint64_t sf, uint64_t op, uint64_t S, uint64_t Rm, uint64_t option, uint64_t imm3, uint64_t Rn, uint64_t Rd);
void add_subtract_carry_interpreter(interpreter_data* ctx, uint64_t sf, uint64_t op, uint64_t S, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void shift_variable_interpreter(interpreter_data* ctx, uint64_t sf, uint64_t Rm, uint64_t op2, uint64_t Rn, uint64_t Rd);
void multiply_with_32_interpreter(interpreter_data* ctx, uint64_t U, uint64_t Rm, uint64_t o0, uint64_t Ra, uint64_t Rn, uint64_t Rd);
void multiply_hi_interpreter(interpreter_data* ctx, uint64_t U, uint64_t Rm, uint64_t o0, uint64_t Rn, uint64_t Rd);
void multiply_additive_interpreter(interpreter_data* ctx, uint64_t sf, uint64_t Rm, uint64_t o0, uint64_t Ra, uint64_t Rn, uint64_t Rd);
void divide_interpreter(interpreter_data* ctx, uint64_t sf, uint64_t Rm, uint64_t o1, uint64_t Rn, uint64_t Rd);
void rbit_interpreter(interpreter_data* ctx, uint64_t sf, uint64_t Rn, uint64_t Rd);
void rev16_interpreter(interpreter_data* ctx, uint64_t sf, uint64_t Rn, uint64_t Rd);
void reverse_interpreter(interpreter_data* ctx, uint64_t sf, uint64_t opc, uint64_t Rn, uint64_t Rd);
void count_leading_interpreter(interpreter_data* ctx, uint64_t sf, uint64_t s, uint64_t Rn, uint64_t Rd);
void extr_interpreter(interpreter_data* ctx, uint64_t sf, uint64_t N, uint64_t Rm, uint64_t imms, uint64_t Rn, uint64_t Rd);
void bitfield_interpreter(interpreter_data* ctx, uint64_t sf, uint64_t opc, uint64_t N, uint64_t immr, uint64_t imms, uint64_t Rn, uint64_t Rd);
void logical_immediate_interpreter(interpreter_data* ctx, uint64_t sf, uint64_t opc, uint64_t N, uint64_t immr, uint64_t imms, uint64_t Rn, uint64_t Rd);
void logical_shifted_interpreter(interpreter_data* ctx, uint64_t sf, uint64_t opc, uint64_t shift, uint64_t N, uint64_t Rm, uint64_t imm6, uint64_t Rn, uint64_t Rd);
void conditional_select_interpreter(interpreter_data* ctx, uint64_t sf, uint64_t op, uint64_t S, uint64_t Rm, uint64_t cond, uint64_t op2, uint64_t Rn, uint64_t Rd);
void conditional_compare_interpreter(interpreter_data* ctx, uint64_t sf, uint64_t op, uint64_t Rm, uint64_t cond, uint64_t mode, uint64_t Rn, uint64_t nzcv);
void move_wide_immediate_interpreter(interpreter_data* ctx, uint64_t sf, uint64_t opc, uint64_t hw, uint64_t imm16, uint64_t Rd);
void pc_rel_addressing_interpreter(interpreter_data* ctx, uint64_t op, uint64_t immlo, uint64_t immhi, uint64_t Rd);
void branch_register_interpreter(interpreter_data* ctx, uint64_t l, uint64_t Rn);
void return_register_interpreter(interpreter_data* ctx, uint64_t Rn);
void test_bit_branch_interpreter(interpreter_data* ctx, uint64_t b5, uint64_t op, uint64_t b40, uint64_t imm14, uint64_t Rt);
void compare_and_branch_interpreter(interpreter_data* ctx, uint64_t sf, uint64_t op, uint64_t imm19, uint64_t Rt);
void b_unconditional_interpreter(interpreter_data* ctx, uint64_t op, uint64_t imm26);
void b_conditional_interpreter(interpreter_data* ctx, uint64_t imm19, uint64_t cond);
void svc_interpreter(interpreter_data* ctx, uint64_t imm16);
void msr_register_interpreter(interpreter_data* ctx, uint64_t imm15, uint64_t Rt);
void mrs_register_interpreter(interpreter_data* ctx, uint64_t imm15, uint64_t Rt);
void hints_interpreter(interpreter_data* ctx, uint64_t imm7);
void sys_interpreter(interpreter_data* ctx, uint64_t L, uint64_t imm19);
void barriers_interpreter(interpreter_data* ctx, uint64_t CRm, uint64_t op2, uint64_t Rt);
void load_store_register_post_interpreter(interpreter_data* ctx, uint64_t size, uint64_t VR, uint64_t opc, uint64_t imm9, uint64_t Rn, uint64_t Rt);
void load_store_register_pre_interpreter(interpreter_data* ctx, uint64_t size, uint64_t VR, uint64_t opc, uint64_t imm9, uint64_t Rn, uint64_t Rt);
void load_store_register_unscaled_interpreter(interpreter_data* ctx, uint64_t size, uint64_t VR, uint64_t opc, uint64_t imm9, uint64_t Rn, uint64_t Rt);
void load_store_register_pair_imm_offset_interpreter(interpreter_data* ctx, uint64_t opc, uint64_t VR, uint64_t L, uint64_t imm7, uint64_t Rt2, uint64_t Rn, uint64_t Rt);
void load_store_register_pair_imm_post_interpreter(interpreter_data* ctx, uint64_t opc, uint64_t VR, uint64_t L, uint64_t imm7, uint64_t Rt2, uint64_t Rn, uint64_t Rt);
void load_store_register_pair_imm_pre_interpreter(interpreter_data* ctx, uint64_t opc, uint64_t VR, uint64_t L, uint64_t imm7, uint64_t Rt2, uint64_t Rn, uint64_t Rt);
void load_store_register_imm_unsigned_interpreter(interpreter_data* ctx, uint64_t size, uint64_t VR, uint64_t opc, uint64_t imm12, uint64_t Rn, uint64_t Rt);
void load_store_register_offset_interpreter(interpreter_data* ctx, uint64_t size, uint64_t VR, uint64_t opc, uint64_t Rm, uint64_t option, uint64_t S, uint64_t Rn, uint64_t Rt);
void load_store_exclusive_ordered_interpreter(interpreter_data* ctx, uint64_t size, uint64_t ordered, uint64_t L, uint64_t Rs, uint64_t o0, uint64_t Rn, uint64_t Rt);
void conversion_between_floating_point_and_fixed_point_interpreter(interpreter_data* ctx, uint64_t sf, uint64_t S, uint64_t ftype, uint64_t rmode, uint64_t opcode, uint64_t scale, uint64_t Rn, uint64_t Rd);
void fcvt_interpreter(interpreter_data* ctx, uint64_t ftype, uint64_t opc, uint64_t Rn, uint64_t Rd);
void fcvtz_scalar_integer_interpreter(interpreter_data* ctx, uint64_t sf, uint64_t ftype, uint64_t U, uint64_t Rn, uint64_t Rd);
void fcvtz_vector_integer_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t U, uint64_t sz, uint64_t Rn, uint64_t Rd);
void fcvtn_scalar_integer_interpreter(interpreter_data* ctx, uint64_t sf, uint64_t ftype, uint64_t U, uint64_t Rn, uint64_t Rd);
void fcvta_scalar_integer_interpreter(interpreter_data* ctx, uint64_t sf, uint64_t ftype, uint64_t U, uint64_t Rn, uint64_t Rd);
void fcvtm_scalar_integer_interpreter(interpreter_data* ctx, uint64_t sf, uint64_t ftype, uint64_t U, uint64_t Rn, uint64_t Rd);
void frintp_scalar_interpreter(interpreter_data* ctx, uint64_t ftype, uint64_t Rn, uint64_t Rd);
void frintm_scalar_interpreter(interpreter_data* ctx, uint64_t ftype, uint64_t Rn, uint64_t Rd);
void fcvtp_scalar_integer_interpreter(interpreter_data* ctx, uint64_t sf, uint64_t ftype, uint64_t U, uint64_t Rn, uint64_t Rd);
void fadd_scalar_interpreter(interpreter_data* ctx, uint64_t ftype, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void fsub_scalar_interpreter(interpreter_data* ctx, uint64_t ftype, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void fmul_scalar_interpreter(interpreter_data* ctx, uint64_t ftype, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void fdiv_scalar_interpreter(interpreter_data* ctx, uint64_t ftype, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void fmax_scalar_interpreter(interpreter_data* ctx, uint64_t ftype, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void fmin_scalar_interpreter(interpreter_data* ctx, uint64_t ftype, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void fmaxnm_scalar_interpreter(interpreter_data* ctx, uint64_t ftype, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void fminnm_scalar_interpreter(interpreter_data* ctx, uint64_t ftype, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void fnmul_scalar_interpreter(interpreter_data* ctx, uint64_t ftype, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void fabs_scalar_interpreter(interpreter_data* ctx, uint64_t ftype, uint64_t Rn, uint64_t Rd);
void fneg_scalar_interpreter(interpreter_data* ctx, uint64_t ftype, uint64_t Rn, uint64_t Rd);
void fneg_vector_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t sz, uint64_t Rn, uint64_t Rd);
void fsqrt_scalar_interpreter(interpreter_data* ctx, uint64_t ftype, uint64_t Rn, uint64_t Rd);
void fsqrt_vector_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t sz, uint64_t Rn, uint64_t Rd);
void frecpe_vector_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t sz, uint64_t Rn, uint64_t Rd);
void frsqrte_scalar_interpreter(interpreter_data* ctx, uint64_t sz, uint64_t Rn, uint64_t Rd);
void fmov_scalar_immediate_interpreter(interpreter_data* ctx, uint64_t ftype, uint64_t imm8, uint64_t Rd);
void fadd_vector_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t sz, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void fmul_vector_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t sz, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void fsub_vector_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t sz, uint64_t Rm, uint64_t Rn, uint64_t Rd);
@@ -343,6 +281,62 @@ void fcmeq_vector_register_interpreter(interpreter_data* ctx, uint64_t Q, uint64
void fcmgt_vector_register_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t sz, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void fcmge_vector_register_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t sz, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void fcmle_zero_vector_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t sz, uint64_t Rn, uint64_t Rd);
void fadd_scalar_interpreter(interpreter_data* ctx, uint64_t ftype, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void fsub_scalar_interpreter(interpreter_data* ctx, uint64_t ftype, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void fmul_scalar_interpreter(interpreter_data* ctx, uint64_t ftype, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void fdiv_scalar_interpreter(interpreter_data* ctx, uint64_t ftype, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void fmax_scalar_interpreter(interpreter_data* ctx, uint64_t ftype, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void fmin_scalar_interpreter(interpreter_data* ctx, uint64_t ftype, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void fmaxnm_scalar_interpreter(interpreter_data* ctx, uint64_t ftype, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void fminnm_scalar_interpreter(interpreter_data* ctx, uint64_t ftype, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void fnmul_scalar_interpreter(interpreter_data* ctx, uint64_t ftype, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void fabs_scalar_interpreter(interpreter_data* ctx, uint64_t ftype, uint64_t Rn, uint64_t Rd);
void fneg_scalar_interpreter(interpreter_data* ctx, uint64_t ftype, uint64_t Rn, uint64_t Rd);
void fneg_vector_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t sz, uint64_t Rn, uint64_t Rd);
void fsqrt_scalar_interpreter(interpreter_data* ctx, uint64_t ftype, uint64_t Rn, uint64_t Rd);
void fsqrt_vector_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t sz, uint64_t Rn, uint64_t Rd);
void frecpe_vector_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t sz, uint64_t Rn, uint64_t Rd);
void frsqrte_scalar_interpreter(interpreter_data* ctx, uint64_t sz, uint64_t Rn, uint64_t Rd);
void fmov_scalar_immediate_interpreter(interpreter_data* ctx, uint64_t ftype, uint64_t imm8, uint64_t Rd);
void dup_general_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t imm5, uint64_t Rn, uint64_t Rd);
void dup_element_scalar_interpreter(interpreter_data* ctx, uint64_t imm5, uint64_t Rn, uint64_t Rd);
void dup_element_vector_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t imm5, uint64_t Rn, uint64_t Rd);
void move_to_gp_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t imm5, uint64_t U, uint64_t Rn, uint64_t Rd);
void ins_general_interpreter(interpreter_data* ctx, uint64_t imm5, uint64_t Rn, uint64_t Rd);
void ins_element_interpreter(interpreter_data* ctx, uint64_t imm5, uint64_t imm4, uint64_t Rn, uint64_t Rd);
void movi_immediate_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t op, uint64_t immhi, uint64_t cmode, uint64_t immlo, uint64_t Rd);
void fmov_general_interpreter(interpreter_data* ctx, uint64_t sf, uint64_t ftype, uint64_t rmode, uint64_t opcode, uint64_t Rn, uint64_t Rd);
void convert_to_float_gp_interpreter(interpreter_data* ctx, uint64_t sf, uint64_t ftype, uint64_t U, uint64_t Rn, uint64_t Rd);
void convert_to_float_vector_scalar_interpreter(interpreter_data* ctx, uint64_t U, uint64_t sz, uint64_t Rn, uint64_t Rd);
void convert_to_float_vector_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t U, uint64_t sz, uint64_t Rn, uint64_t Rd);
void shl_immedaite_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t immh, uint64_t immb, uint64_t Rn, uint64_t Rd);
void shr_vector_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t U, uint64_t immh, uint64_t immb, uint64_t Rn, uint64_t Rd);
void shll_shll2_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t U, uint64_t immh, uint64_t immb, uint64_t Rn, uint64_t Rd);
void shrn_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t immh, uint64_t immb, uint64_t Rn, uint64_t Rd);
void rev64_vector_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t size, uint64_t Rn, uint64_t Rd);
void neg_vector_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t size, uint64_t Rn, uint64_t Rd);
void not_vector_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t Rn, uint64_t Rd);
void abs_vector_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t size, uint64_t Rn, uint64_t Rd);
void mul_vector_index_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t size, uint64_t L, uint64_t M, uint64_t Rm, uint64_t H, uint64_t Rn, uint64_t Rd);
void mul_vector_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t size, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void ext_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t Rm, uint64_t imm4, uint64_t Rn, uint64_t Rd);
void compare_above_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t U, uint64_t size, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void shl_vector_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t U, uint64_t size, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void add_vector_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t size, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void addlv_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t U, uint64_t size, uint64_t Rn, uint64_t Rd);
void cnt_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t size, uint64_t Rn, uint64_t Rd);
void orr_orn_vector_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t invert, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void bsl_vector_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void and_bic_vector_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t invert, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void eor_vector_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void xnt_xnt2_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t size, uint64_t Rn, uint64_t Rd);
void zip_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t size, uint64_t Rm, uint64_t op, uint64_t Rn, uint64_t Rd);
void uzp_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t size, uint64_t Rm, uint64_t op, uint64_t Rn, uint64_t Rd);
void trn_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t size, uint64_t Rm, uint64_t op, uint64_t Rn, uint64_t Rd);
void tbl_interpreter(interpreter_data* ctx, uint64_t Q, uint64_t Rm, uint64_t len, uint64_t Rn, uint64_t Rd);
void floating_point_conditional_select_interpreter(interpreter_data* ctx, uint64_t ftype, uint64_t Rm, uint64_t cond, uint64_t Rn, uint64_t Rd);
void fcmp_interpreter(interpreter_data* ctx, uint64_t ftype, uint64_t Rm, uint64_t Rn, uint64_t opc);
void fccmp_interpreter(interpreter_data* ctx, uint64_t ftype, uint64_t Rm, uint64_t cond, uint64_t Rn, uint64_t nzcv);
uint64_t _x_interpreter(interpreter_data* ctx, uint64_t reg_id);//THIS FUNCTION IS USER DEFINED
void _x_interpreter(interpreter_data* ctx, uint64_t reg_id, uint64_t value);//THIS FUNCTION IS USER DEFINED
uint64_t _sys_interpreter(interpreter_data* ctx, uint64_t reg_id);//THIS FUNCTION IS USER DEFINED
@@ -386,54 +380,8 @@ template <typename R>
R intrinsic_ternary_imm_interpreter(interpreter_data* ctx, uint64_t instruction, R source_0, R source_1, uint64_t source_2);//THIS FUNCTION IS USER DEFINED
//JIT
void add_subtract_imm12_jit(ssa_emit_context* ctx, uint64_t sf, uint64_t op, uint64_t S, uint64_t sh, uint64_t imm12, uint64_t Rn, uint64_t Rd);
void add_subtract_shifted_jit(ssa_emit_context* ctx, uint64_t sf, uint64_t op, uint64_t S, uint64_t shift, uint64_t Rm, uint64_t imm6, uint64_t Rn, uint64_t Rd);
void add_subtract_extended_jit(ssa_emit_context* ctx, uint64_t sf, uint64_t op, uint64_t S, uint64_t Rm, uint64_t option, uint64_t imm3, uint64_t Rn, uint64_t Rd);
void add_subtract_carry_jit(ssa_emit_context* ctx, uint64_t sf, uint64_t op, uint64_t S, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void shift_variable_jit(ssa_emit_context* ctx, uint64_t sf, uint64_t Rm, uint64_t op2, uint64_t Rn, uint64_t Rd);
void multiply_with_32_jit(ssa_emit_context* ctx, uint64_t U, uint64_t Rm, uint64_t o0, uint64_t Ra, uint64_t Rn, uint64_t Rd);
void multiply_hi_jit(ssa_emit_context* ctx, uint64_t U, uint64_t Rm, uint64_t o0, uint64_t Rn, uint64_t Rd);
void multiply_additive_jit(ssa_emit_context* ctx, uint64_t sf, uint64_t Rm, uint64_t o0, uint64_t Ra, uint64_t Rn, uint64_t Rd);
void divide_jit(ssa_emit_context* ctx, uint64_t sf, uint64_t Rm, uint64_t o1, uint64_t Rn, uint64_t Rd);
uint64_t create_rbit_mask_jit(ssa_emit_context* ctx, uint64_t index);
void rbit_jit(ssa_emit_context* ctx, uint64_t sf, uint64_t Rn, uint64_t Rd);
void rev16_jit(ssa_emit_context* ctx, uint64_t sf, uint64_t Rn, uint64_t Rd);
void reverse_jit(ssa_emit_context* ctx, uint64_t sf, uint64_t opc, uint64_t Rn, uint64_t Rd);
void count_leading_jit(ssa_emit_context* ctx, uint64_t sf, uint64_t s, uint64_t Rn, uint64_t Rd);
void extr_jit(ssa_emit_context* ctx, uint64_t sf, uint64_t N, uint64_t Rm, uint64_t imms, uint64_t Rn, uint64_t Rd);
void bitfield_jit(ssa_emit_context* ctx, uint64_t sf, uint64_t opc, uint64_t N, uint64_t immr, uint64_t imms, uint64_t Rn, uint64_t Rd);
void logical_immediate_jit(ssa_emit_context* ctx, uint64_t sf, uint64_t opc, uint64_t N, uint64_t immr, uint64_t imms, uint64_t Rn, uint64_t Rd);
void logical_shifted_jit(ssa_emit_context* ctx, uint64_t sf, uint64_t opc, uint64_t shift, uint64_t N, uint64_t Rm, uint64_t imm6, uint64_t Rn, uint64_t Rd);
void conditional_select_jit(ssa_emit_context* ctx, uint64_t sf, uint64_t op, uint64_t S, uint64_t Rm, uint64_t cond, uint64_t op2, uint64_t Rn, uint64_t Rd);
void conditional_compare_jit(ssa_emit_context* ctx, uint64_t sf, uint64_t op, uint64_t Rm, uint64_t cond, uint64_t mode, uint64_t Rn, uint64_t nzcv);
void move_wide_immediate_jit(ssa_emit_context* ctx, uint64_t sf, uint64_t opc, uint64_t hw, uint64_t imm16, uint64_t Rd);
void pc_rel_addressing_jit(ssa_emit_context* ctx, uint64_t op, uint64_t immlo, uint64_t immhi, uint64_t Rd);
void branch_register_jit(ssa_emit_context* ctx, uint64_t l, uint64_t Rn);
void return_register_jit(ssa_emit_context* ctx, uint64_t Rn);
void test_bit_branch_jit(ssa_emit_context* ctx, uint64_t b5, uint64_t op, uint64_t b40, uint64_t imm14, uint64_t Rt);
void compare_and_branch_jit(ssa_emit_context* ctx, uint64_t sf, uint64_t op, uint64_t imm19, uint64_t Rt);
void b_unconditional_jit(ssa_emit_context* ctx, uint64_t op, uint64_t imm26);
void b_conditional_jit(ssa_emit_context* ctx, uint64_t imm19, uint64_t cond);
void svc_jit(ssa_emit_context* ctx, uint64_t imm16);
void msr_register_jit(ssa_emit_context* ctx, uint64_t imm15, uint64_t Rt);
void mrs_register_jit(ssa_emit_context* ctx, uint64_t imm15, uint64_t Rt);
void hints_jit(ssa_emit_context* ctx, uint64_t imm7);
void sys_jit(ssa_emit_context* ctx, uint64_t L, uint64_t imm19);
void barriers_jit(ssa_emit_context* ctx, uint64_t CRm, uint64_t op2, uint64_t Rt);
void load_store_register_post_jit(ssa_emit_context* ctx, uint64_t size, uint64_t VR, uint64_t opc, uint64_t imm9, uint64_t Rn, uint64_t Rt);
void load_store_register_pre_jit(ssa_emit_context* ctx, uint64_t size, uint64_t VR, uint64_t opc, uint64_t imm9, uint64_t Rn, uint64_t Rt);
void load_store_register_unscaled_jit(ssa_emit_context* ctx, uint64_t size, uint64_t VR, uint64_t opc, uint64_t imm9, uint64_t Rn, uint64_t Rt);
void load_store_register_pair_imm_offset_jit(ssa_emit_context* ctx, uint64_t opc, uint64_t VR, uint64_t L, uint64_t imm7, uint64_t Rt2, uint64_t Rn, uint64_t Rt);
void load_store_register_pair_imm_post_jit(ssa_emit_context* ctx, uint64_t opc, uint64_t VR, uint64_t L, uint64_t imm7, uint64_t Rt2, uint64_t Rn, uint64_t Rt);
void load_store_register_pair_imm_pre_jit(ssa_emit_context* ctx, uint64_t opc, uint64_t VR, uint64_t L, uint64_t imm7, uint64_t Rt2, uint64_t Rn, uint64_t Rt);
void load_store_register_pair_imm_jit(ssa_emit_context* ctx, uint64_t opc, uint64_t VR, uint64_t wb, uint64_t L, uint64_t imm7, uint64_t Rt2, uint64_t Rn, uint64_t Rt);
void load_store_register_imm_unsigned_jit(ssa_emit_context* ctx, uint64_t size, uint64_t VR, uint64_t opc, uint64_t imm12, uint64_t Rn, uint64_t Rt);
void load_store_register_imm_unscaled_jit(ssa_emit_context* ctx, uint64_t size, uint64_t VR, uint64_t opc, uint64_t imm9, uint64_t wb, uint64_t Rn, uint64_t Rt);
void load_store_register_offset_jit(ssa_emit_context* ctx, uint64_t size, uint64_t VR, uint64_t opc, uint64_t Rm, uint64_t option, uint64_t S, uint64_t Rn, uint64_t Rt);
void load_store_exclusive_ordered_jit(ssa_emit_context* ctx, uint64_t size, uint64_t ordered, uint64_t L, uint64_t Rs, uint64_t o0, uint64_t Rn, uint64_t Rt);
ir_operand exclusive_address_mask_jit(ssa_emit_context* ctx);
void load_exclusive_jit(ssa_emit_context* ctx, uint64_t is_exclusive, uint64_t size, uint64_t Rn, uint64_t Rt);
void store_exclusive_jit(ssa_emit_context* ctx, uint64_t is_exclusive, uint64_t size, uint64_t Rn, uint64_t Rt, uint64_t Rs);
void memory_single_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t p, uint64_t R, uint64_t Rm, uint64_t b, uint64_t size, uint64_t Rn, uint64_t Rt, uint64_t is_load, uint64_t opcode, uint64_t S);
void memory_multiple_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t Rm, uint64_t size, uint64_t Rn, uint64_t Rt, uint64_t rpt, uint64_t selem, uint64_t wback, uint64_t is_load);
uint64_t sign_extend_jit(ssa_emit_context* ctx, uint64_t source, uint64_t count);
ir_operand a_shift_reg_jit(ssa_emit_context* ctx,uint64_t O, uint64_t m, uint64_t shift_type, uint64_t ammount);
ir_operand a_extend_reg_jit(ssa_emit_context* ctx,uint64_t O, uint64_t m, uint64_t extend_type, uint64_t shift);
@@ -452,56 +400,6 @@ ir_operand add_subtract_carry_impl_jit(ssa_emit_context* ctx,uint64_t O, ir_oper
ir_operand condition_holds_jit(ssa_emit_context* ctx, uint64_t cond);
void branch_long_universal_jit(ssa_emit_context* ctx, uint64_t Rn, uint64_t link);
uint64_t select_jit(ssa_emit_context* ctx, uint64_t condition, uint64_t yes, uint64_t no);
void dup_general_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t imm5, uint64_t Rn, uint64_t Rd);
void dup_element_scalar_jit(ssa_emit_context* ctx, uint64_t imm5, uint64_t Rn, uint64_t Rd);
void dup_element_vector_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t imm5, uint64_t Rn, uint64_t Rd);
void move_to_gp_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t imm5, uint64_t U, uint64_t Rn, uint64_t Rd);
void ins_general_jit(ssa_emit_context* ctx, uint64_t imm5, uint64_t Rn, uint64_t Rd);
void ins_element_jit(ssa_emit_context* ctx, uint64_t imm5, uint64_t imm4, uint64_t Rn, uint64_t Rd);
void movi_immediate_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t op, uint64_t immhi, uint64_t cmode, uint64_t immlo, uint64_t Rd);
void fmov_general_jit(ssa_emit_context* ctx, uint64_t sf, uint64_t ftype, uint64_t rmode, uint64_t opcode, uint64_t Rn, uint64_t Rd);
void convert_to_float_gp_jit(ssa_emit_context* ctx, uint64_t sf, uint64_t ftype, uint64_t U, uint64_t Rn, uint64_t Rd);
void convert_to_float_vector_scalar_jit(ssa_emit_context* ctx, uint64_t U, uint64_t sz, uint64_t Rn, uint64_t Rd);
void convert_to_float_vector_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t U, uint64_t sz, uint64_t Rn, uint64_t Rd);
void shl_immedaite_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t immh, uint64_t immb, uint64_t Rn, uint64_t Rd);
void sshr_vector_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t immh, uint64_t immb, uint64_t Rn, uint64_t Rd);
void shll_shll2_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t U, uint64_t immh, uint64_t immb, uint64_t Rn, uint64_t Rd);
void shrn_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t immh, uint64_t immb, uint64_t Rn, uint64_t Rd);
void rev64_vector_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t size, uint64_t Rn, uint64_t Rd);
void neg_vector_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t size, uint64_t Rn, uint64_t Rd);
void not_vector_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t Rn, uint64_t Rd);
void abs_vector_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t size, uint64_t Rn, uint64_t Rd);
void mul_vector_index_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t size, uint64_t L, uint64_t M, uint64_t Rm, uint64_t H, uint64_t Rn, uint64_t Rd);
void mul_vector_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t size, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void ext_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t Rm, uint64_t imm4, uint64_t Rn, uint64_t Rd);
void compare_above_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t U, uint64_t size, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void shl_vector_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t U, uint64_t size, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void add_vector_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t size, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void addlv_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t U, uint64_t size, uint64_t Rn, uint64_t Rd);
void cnt_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t size, uint64_t Rn, uint64_t Rd);
void orr_orn_vector_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t invert, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void bsl_vector_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void and_bic_vector_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t invert, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void eor_vector_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void xnt_xnt2_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t size, uint64_t Rn, uint64_t Rd);
void zip_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t size, uint64_t Rm, uint64_t op, uint64_t Rn, uint64_t Rd);
void trn_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t size, uint64_t Rm, uint64_t op, uint64_t Rn, uint64_t Rd);
void tbl_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t Rm, uint64_t len, uint64_t Rn, uint64_t Rd);
void ld4_st4_multiple_structures_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t L, uint64_t size, uint64_t Rn, uint64_t Rt);
void ld4_st4_multiple_structures_post_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t L, uint64_t Rm, uint64_t size, uint64_t Rn, uint64_t Rt);
void ld1r_no_offset_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t size, uint64_t Rn, uint64_t Rt);
void ld1r_post_index_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t Rm, uint64_t size, uint64_t Rn, uint64_t Rt);
void ld1_single_structure_no_offset_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t opcode, uint64_t S, uint64_t size, uint64_t Rn, uint64_t Rt);
void ld1_single_structure_post_index_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t Rm, uint64_t opcode, uint64_t S, uint64_t size, uint64_t Rn, uint64_t Rt);
void st1_multiple_structures_no_offset_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t ophi, uint64_t oplo, uint64_t size, uint64_t Rn, uint64_t Rt);
void st1_multiple_structures_post_index_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t Rm, uint64_t ophi, uint64_t oplo, uint64_t size, uint64_t Rn, uint64_t Rt);
void st2_multiple_structures_no_offset_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t size, uint64_t Rn, uint64_t Rt);
void st2_multiple_structures_post_index_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t Rm, uint64_t size, uint64_t Rn, uint64_t Rt);
void st1_single_structure_no_offset_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t opcode, uint64_t S, uint64_t size, uint64_t Rn, uint64_t Rt);
void st1_single_structure_post_index_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t Rm, uint64_t opcode, uint64_t S, uint64_t size, uint64_t Rn, uint64_t Rt);
void floating_point_conditional_select_jit(ssa_emit_context* ctx, uint64_t ftype, uint64_t Rm, uint64_t cond, uint64_t Rn, uint64_t Rd);
void fcmp_jit(ssa_emit_context* ctx, uint64_t ftype, uint64_t Rm, uint64_t Rn, uint64_t opc);
void fccmp_jit(ssa_emit_context* ctx, uint64_t ftype, uint64_t Rm, uint64_t cond, uint64_t Rn, uint64_t nzcv);
ir_operand create_mask_jit(ssa_emit_context* ctx, uint64_t bits);
ir_operand shift_left_check_jit(ssa_emit_context* ctx, ir_operand to_shift, ir_operand shift, uint64_t size);
uint64_t get_x86_rounding_mode_jit(ssa_emit_context* ctx, uint64_t rounding);
@@ -520,10 +418,6 @@ ir_operand call_float_binary_jit(ssa_emit_context* ctx, ir_operand operand1, ir_
ir_operand call_float_unary_jit(ssa_emit_context* ctx, ir_operand operand, ir_operand fpcr, uint64_t N, uint64_t function);
void convert_to_float_jit(ssa_emit_context* ctx, uint64_t sf, uint64_t ftype, uint64_t U, uint64_t Rn, uint64_t Rd, uint64_t from_vector);
ir_operand replicate_vector_jit(ssa_emit_context* ctx, ir_operand source, uint64_t v_size, uint64_t count);
void st1_jit(ssa_emit_context* ctx, uint64_t wback, uint64_t Q, uint64_t L, uint64_t opcode, uint64_t size, uint64_t Rm, uint64_t Rn, uint64_t Rt);
void st_jit(ssa_emit_context* ctx, uint64_t wback, uint64_t Q, uint64_t L, uint64_t opcode, uint64_t size, uint64_t Rm, uint64_t Rn, uint64_t Rt);
void memory_4_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t L, uint64_t size, uint64_t Rm, uint64_t Rn, uint64_t Rt);
void memory_1_jit(ssa_emit_context* ctx, uint64_t wback, uint64_t Q, uint64_t L, uint64_t R, uint64_t Rm, uint64_t o2, uint64_t opcode, uint64_t S, uint64_t size, uint64_t Rn, uint64_t Rt, uint64_t is_load);
ir_operand bits_r_jit(ssa_emit_context* ctx, ir_operand operand, uint64_t top, uint64_t bottom);
ir_operand infinity_jit(ssa_emit_context* ctx, uint64_t sign, uint64_t N);
ir_operand float_is_nan_jit(ssa_emit_context* ctx, ir_operand operand, uint64_t N);
@@ -571,6 +465,12 @@ void floating_point_multiply_accumulate_scalar_element_jit(ssa_emit_context* ctx
void floating_point_multiply_accumulate_vector_element_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t Rd, uint64_t Rn, uint64_t Rm, uint64_t neg, uint64_t sz, uint64_t index);
void fcm_vector_jit(ssa_emit_context* ctx, uint64_t Rd, uint64_t Rn, uint64_t Rm, uint64_t mode, uint64_t Q, uint64_t sz);
ir_operand clear_vector_scalar_jit(ssa_emit_context* ctx, ir_operand working, uint64_t fltsize);
uint64_t create_rbit_mask_jit(ssa_emit_context* ctx, uint64_t index);
void load_store_register_pair_imm_jit(ssa_emit_context* ctx, uint64_t opc, uint64_t VR, uint64_t wb, uint64_t L, uint64_t imm7, uint64_t Rt2, uint64_t Rn, uint64_t Rt);
void load_store_register_imm_unscaled_jit(ssa_emit_context* ctx, uint64_t size, uint64_t VR, uint64_t opc, uint64_t imm9, uint64_t wb, uint64_t Rn, uint64_t Rt);
ir_operand exclusive_address_mask_jit(ssa_emit_context* ctx);
void load_exclusive_jit(ssa_emit_context* ctx, uint64_t is_exclusive, uint64_t size, uint64_t Rn, uint64_t Rt);
void store_exclusive_jit(ssa_emit_context* ctx, uint64_t is_exclusive, uint64_t size, uint64_t Rn, uint64_t Rt, uint64_t Rs);
ir_operand _compare_and_swap_jit(ssa_emit_context* ctx, ir_operand physical_address, ir_operand expecting, ir_operand to_swap, uint64_t size);
ir_operand compare_and_swap_jit(ssa_emit_context* ctx, ir_operand address, ir_operand expecting, ir_operand to_swap, uint64_t size);
void mem_jit(ssa_emit_context* ctx,uint64_t O, ir_operand address, ir_operand value);
@@ -579,32 +479,64 @@ ir_operand XSP_jit(ssa_emit_context* ctx, uint64_t reg_id);
void XSP_jit(ssa_emit_context* ctx, uint64_t reg_id, ir_operand value);
ir_operand X_jit(ssa_emit_context* ctx, uint64_t reg_id);
void X_jit(ssa_emit_context* ctx, uint64_t reg_id, ir_operand value);
void ld_st_1_multiple_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t p, uint64_t L, uint64_t Rm, uint64_t opchi, uint64_t opclo, uint64_t size, uint64_t Rn, uint64_t Rt);
void ld_st_2_multiple_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t p, uint64_t L, uint64_t Rm, uint64_t size, uint64_t Rn, uint64_t Rt);
void ld_st_3_multiple_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t p, uint64_t L, uint64_t Rm, uint64_t size, uint64_t Rn, uint64_t Rt);
void ld_st_4_multiple_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t p, uint64_t L, uint64_t Rm, uint64_t size, uint64_t Rn, uint64_t Rt);
void ldXr_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t p, uint64_t R, uint64_t Rm, uint64_t b, uint64_t size, uint64_t Rn, uint64_t Rt);
void ld_st_single_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t p, uint64_t L, uint64_t R, uint64_t Rm, uint64_t opcode, uint64_t b, uint64_t S, uint64_t size, uint64_t Rn, uint64_t Rt);
void add_subtract_imm12_jit(ssa_emit_context* ctx, uint64_t sf, uint64_t op, uint64_t S, uint64_t sh, uint64_t imm12, uint64_t Rn, uint64_t Rd);
void add_subtract_shifted_jit(ssa_emit_context* ctx, uint64_t sf, uint64_t op, uint64_t S, uint64_t shift, uint64_t Rm, uint64_t imm6, uint64_t Rn, uint64_t Rd);
void add_subtract_extended_jit(ssa_emit_context* ctx, uint64_t sf, uint64_t op, uint64_t S, uint64_t Rm, uint64_t option, uint64_t imm3, uint64_t Rn, uint64_t Rd);
void add_subtract_carry_jit(ssa_emit_context* ctx, uint64_t sf, uint64_t op, uint64_t S, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void shift_variable_jit(ssa_emit_context* ctx, uint64_t sf, uint64_t Rm, uint64_t op2, uint64_t Rn, uint64_t Rd);
void multiply_with_32_jit(ssa_emit_context* ctx, uint64_t U, uint64_t Rm, uint64_t o0, uint64_t Ra, uint64_t Rn, uint64_t Rd);
void multiply_hi_jit(ssa_emit_context* ctx, uint64_t U, uint64_t Rm, uint64_t o0, uint64_t Rn, uint64_t Rd);
void multiply_additive_jit(ssa_emit_context* ctx, uint64_t sf, uint64_t Rm, uint64_t o0, uint64_t Ra, uint64_t Rn, uint64_t Rd);
void divide_jit(ssa_emit_context* ctx, uint64_t sf, uint64_t Rm, uint64_t o1, uint64_t Rn, uint64_t Rd);
void rbit_jit(ssa_emit_context* ctx, uint64_t sf, uint64_t Rn, uint64_t Rd);
void rev16_jit(ssa_emit_context* ctx, uint64_t sf, uint64_t Rn, uint64_t Rd);
void reverse_jit(ssa_emit_context* ctx, uint64_t sf, uint64_t opc, uint64_t Rn, uint64_t Rd);
void count_leading_jit(ssa_emit_context* ctx, uint64_t sf, uint64_t s, uint64_t Rn, uint64_t Rd);
void extr_jit(ssa_emit_context* ctx, uint64_t sf, uint64_t N, uint64_t Rm, uint64_t imms, uint64_t Rn, uint64_t Rd);
void bitfield_jit(ssa_emit_context* ctx, uint64_t sf, uint64_t opc, uint64_t N, uint64_t immr, uint64_t imms, uint64_t Rn, uint64_t Rd);
void logical_immediate_jit(ssa_emit_context* ctx, uint64_t sf, uint64_t opc, uint64_t N, uint64_t immr, uint64_t imms, uint64_t Rn, uint64_t Rd);
void logical_shifted_jit(ssa_emit_context* ctx, uint64_t sf, uint64_t opc, uint64_t shift, uint64_t N, uint64_t Rm, uint64_t imm6, uint64_t Rn, uint64_t Rd);
void conditional_select_jit(ssa_emit_context* ctx, uint64_t sf, uint64_t op, uint64_t S, uint64_t Rm, uint64_t cond, uint64_t op2, uint64_t Rn, uint64_t Rd);
void conditional_compare_jit(ssa_emit_context* ctx, uint64_t sf, uint64_t op, uint64_t Rm, uint64_t cond, uint64_t mode, uint64_t Rn, uint64_t nzcv);
void move_wide_immediate_jit(ssa_emit_context* ctx, uint64_t sf, uint64_t opc, uint64_t hw, uint64_t imm16, uint64_t Rd);
void pc_rel_addressing_jit(ssa_emit_context* ctx, uint64_t op, uint64_t immlo, uint64_t immhi, uint64_t Rd);
void branch_register_jit(ssa_emit_context* ctx, uint64_t l, uint64_t Rn);
void return_register_jit(ssa_emit_context* ctx, uint64_t Rn);
void test_bit_branch_jit(ssa_emit_context* ctx, uint64_t b5, uint64_t op, uint64_t b40, uint64_t imm14, uint64_t Rt);
void compare_and_branch_jit(ssa_emit_context* ctx, uint64_t sf, uint64_t op, uint64_t imm19, uint64_t Rt);
void b_unconditional_jit(ssa_emit_context* ctx, uint64_t op, uint64_t imm26);
void b_conditional_jit(ssa_emit_context* ctx, uint64_t imm19, uint64_t cond);
void svc_jit(ssa_emit_context* ctx, uint64_t imm16);
void msr_register_jit(ssa_emit_context* ctx, uint64_t imm15, uint64_t Rt);
void mrs_register_jit(ssa_emit_context* ctx, uint64_t imm15, uint64_t Rt);
void hints_jit(ssa_emit_context* ctx, uint64_t imm7);
void sys_jit(ssa_emit_context* ctx, uint64_t L, uint64_t imm19);
void barriers_jit(ssa_emit_context* ctx, uint64_t CRm, uint64_t op2, uint64_t Rt);
void load_store_register_post_jit(ssa_emit_context* ctx, uint64_t size, uint64_t VR, uint64_t opc, uint64_t imm9, uint64_t Rn, uint64_t Rt);
void load_store_register_pre_jit(ssa_emit_context* ctx, uint64_t size, uint64_t VR, uint64_t opc, uint64_t imm9, uint64_t Rn, uint64_t Rt);
void load_store_register_unscaled_jit(ssa_emit_context* ctx, uint64_t size, uint64_t VR, uint64_t opc, uint64_t imm9, uint64_t Rn, uint64_t Rt);
void load_store_register_pair_imm_offset_jit(ssa_emit_context* ctx, uint64_t opc, uint64_t VR, uint64_t L, uint64_t imm7, uint64_t Rt2, uint64_t Rn, uint64_t Rt);
void load_store_register_pair_imm_post_jit(ssa_emit_context* ctx, uint64_t opc, uint64_t VR, uint64_t L, uint64_t imm7, uint64_t Rt2, uint64_t Rn, uint64_t Rt);
void load_store_register_pair_imm_pre_jit(ssa_emit_context* ctx, uint64_t opc, uint64_t VR, uint64_t L, uint64_t imm7, uint64_t Rt2, uint64_t Rn, uint64_t Rt);
void load_store_register_imm_unsigned_jit(ssa_emit_context* ctx, uint64_t size, uint64_t VR, uint64_t opc, uint64_t imm12, uint64_t Rn, uint64_t Rt);
void load_store_register_offset_jit(ssa_emit_context* ctx, uint64_t size, uint64_t VR, uint64_t opc, uint64_t Rm, uint64_t option, uint64_t S, uint64_t Rn, uint64_t Rt);
void load_store_exclusive_ordered_jit(ssa_emit_context* ctx, uint64_t size, uint64_t ordered, uint64_t L, uint64_t Rs, uint64_t o0, uint64_t Rn, uint64_t Rt);
void conversion_between_floating_point_and_fixed_point_jit(ssa_emit_context* ctx, uint64_t sf, uint64_t S, uint64_t ftype, uint64_t rmode, uint64_t opcode, uint64_t scale, uint64_t Rn, uint64_t Rd);
void fcvt_jit(ssa_emit_context* ctx, uint64_t ftype, uint64_t opc, uint64_t Rn, uint64_t Rd);
void fcvtz_scalar_integer_jit(ssa_emit_context* ctx, uint64_t sf, uint64_t ftype, uint64_t U, uint64_t Rn, uint64_t Rd);
void fcvtz_vector_integer_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t U, uint64_t sz, uint64_t Rn, uint64_t Rd);
void fcvtn_scalar_integer_jit(ssa_emit_context* ctx, uint64_t sf, uint64_t ftype, uint64_t U, uint64_t Rn, uint64_t Rd);
void fcvta_scalar_integer_jit(ssa_emit_context* ctx, uint64_t sf, uint64_t ftype, uint64_t U, uint64_t Rn, uint64_t Rd);
void fcvtm_scalar_integer_jit(ssa_emit_context* ctx, uint64_t sf, uint64_t ftype, uint64_t U, uint64_t Rn, uint64_t Rd);
void frintp_scalar_jit(ssa_emit_context* ctx, uint64_t ftype, uint64_t Rn, uint64_t Rd);
void frintm_scalar_jit(ssa_emit_context* ctx, uint64_t ftype, uint64_t Rn, uint64_t Rd);
void fcvtp_scalar_integer_jit(ssa_emit_context* ctx, uint64_t sf, uint64_t ftype, uint64_t U, uint64_t Rn, uint64_t Rd);
void fadd_scalar_jit(ssa_emit_context* ctx, uint64_t ftype, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void fsub_scalar_jit(ssa_emit_context* ctx, uint64_t ftype, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void fmul_scalar_jit(ssa_emit_context* ctx, uint64_t ftype, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void fdiv_scalar_jit(ssa_emit_context* ctx, uint64_t ftype, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void fmax_scalar_jit(ssa_emit_context* ctx, uint64_t ftype, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void fmin_scalar_jit(ssa_emit_context* ctx, uint64_t ftype, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void fmaxnm_scalar_jit(ssa_emit_context* ctx, uint64_t ftype, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void fminnm_scalar_jit(ssa_emit_context* ctx, uint64_t ftype, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void fnmul_scalar_jit(ssa_emit_context* ctx, uint64_t ftype, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void fabs_scalar_jit(ssa_emit_context* ctx, uint64_t ftype, uint64_t Rn, uint64_t Rd);
void fneg_scalar_jit(ssa_emit_context* ctx, uint64_t ftype, uint64_t Rn, uint64_t Rd);
void fneg_vector_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t sz, uint64_t Rn, uint64_t Rd);
void fsqrt_scalar_jit(ssa_emit_context* ctx, uint64_t ftype, uint64_t Rn, uint64_t Rd);
void fsqrt_vector_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t sz, uint64_t Rn, uint64_t Rd);
void frecpe_vector_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t sz, uint64_t Rn, uint64_t Rd);
void frsqrte_scalar_jit(ssa_emit_context* ctx, uint64_t sz, uint64_t Rn, uint64_t Rd);
void fmov_scalar_immediate_jit(ssa_emit_context* ctx, uint64_t ftype, uint64_t imm8, uint64_t Rd);
void fadd_vector_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t sz, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void fmul_vector_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t sz, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void fsub_vector_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t sz, uint64_t Rm, uint64_t Rn, uint64_t Rd);
@@ -628,6 +560,62 @@ void fcmeq_vector_register_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t sz, u
void fcmgt_vector_register_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t sz, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void fcmge_vector_register_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t sz, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void fcmle_zero_vector_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t sz, uint64_t Rn, uint64_t Rd);
void fadd_scalar_jit(ssa_emit_context* ctx, uint64_t ftype, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void fsub_scalar_jit(ssa_emit_context* ctx, uint64_t ftype, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void fmul_scalar_jit(ssa_emit_context* ctx, uint64_t ftype, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void fdiv_scalar_jit(ssa_emit_context* ctx, uint64_t ftype, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void fmax_scalar_jit(ssa_emit_context* ctx, uint64_t ftype, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void fmin_scalar_jit(ssa_emit_context* ctx, uint64_t ftype, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void fmaxnm_scalar_jit(ssa_emit_context* ctx, uint64_t ftype, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void fminnm_scalar_jit(ssa_emit_context* ctx, uint64_t ftype, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void fnmul_scalar_jit(ssa_emit_context* ctx, uint64_t ftype, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void fabs_scalar_jit(ssa_emit_context* ctx, uint64_t ftype, uint64_t Rn, uint64_t Rd);
void fneg_scalar_jit(ssa_emit_context* ctx, uint64_t ftype, uint64_t Rn, uint64_t Rd);
void fneg_vector_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t sz, uint64_t Rn, uint64_t Rd);
void fsqrt_scalar_jit(ssa_emit_context* ctx, uint64_t ftype, uint64_t Rn, uint64_t Rd);
void fsqrt_vector_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t sz, uint64_t Rn, uint64_t Rd);
void frecpe_vector_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t sz, uint64_t Rn, uint64_t Rd);
void frsqrte_scalar_jit(ssa_emit_context* ctx, uint64_t sz, uint64_t Rn, uint64_t Rd);
void fmov_scalar_immediate_jit(ssa_emit_context* ctx, uint64_t ftype, uint64_t imm8, uint64_t Rd);
void dup_general_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t imm5, uint64_t Rn, uint64_t Rd);
void dup_element_scalar_jit(ssa_emit_context* ctx, uint64_t imm5, uint64_t Rn, uint64_t Rd);
void dup_element_vector_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t imm5, uint64_t Rn, uint64_t Rd);
void move_to_gp_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t imm5, uint64_t U, uint64_t Rn, uint64_t Rd);
void ins_general_jit(ssa_emit_context* ctx, uint64_t imm5, uint64_t Rn, uint64_t Rd);
void ins_element_jit(ssa_emit_context* ctx, uint64_t imm5, uint64_t imm4, uint64_t Rn, uint64_t Rd);
void movi_immediate_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t op, uint64_t immhi, uint64_t cmode, uint64_t immlo, uint64_t Rd);
void fmov_general_jit(ssa_emit_context* ctx, uint64_t sf, uint64_t ftype, uint64_t rmode, uint64_t opcode, uint64_t Rn, uint64_t Rd);
void convert_to_float_gp_jit(ssa_emit_context* ctx, uint64_t sf, uint64_t ftype, uint64_t U, uint64_t Rn, uint64_t Rd);
void convert_to_float_vector_scalar_jit(ssa_emit_context* ctx, uint64_t U, uint64_t sz, uint64_t Rn, uint64_t Rd);
void convert_to_float_vector_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t U, uint64_t sz, uint64_t Rn, uint64_t Rd);
void shl_immedaite_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t immh, uint64_t immb, uint64_t Rn, uint64_t Rd);
void shr_vector_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t U, uint64_t immh, uint64_t immb, uint64_t Rn, uint64_t Rd);
void shll_shll2_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t U, uint64_t immh, uint64_t immb, uint64_t Rn, uint64_t Rd);
void shrn_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t immh, uint64_t immb, uint64_t Rn, uint64_t Rd);
void rev64_vector_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t size, uint64_t Rn, uint64_t Rd);
void neg_vector_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t size, uint64_t Rn, uint64_t Rd);
void not_vector_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t Rn, uint64_t Rd);
void abs_vector_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t size, uint64_t Rn, uint64_t Rd);
void mul_vector_index_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t size, uint64_t L, uint64_t M, uint64_t Rm, uint64_t H, uint64_t Rn, uint64_t Rd);
void mul_vector_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t size, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void ext_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t Rm, uint64_t imm4, uint64_t Rn, uint64_t Rd);
void compare_above_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t U, uint64_t size, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void shl_vector_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t U, uint64_t size, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void add_vector_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t size, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void addlv_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t U, uint64_t size, uint64_t Rn, uint64_t Rd);
void cnt_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t size, uint64_t Rn, uint64_t Rd);
void orr_orn_vector_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t invert, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void bsl_vector_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void and_bic_vector_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t invert, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void eor_vector_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t Rm, uint64_t Rn, uint64_t Rd);
void xnt_xnt2_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t size, uint64_t Rn, uint64_t Rd);
void zip_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t size, uint64_t Rm, uint64_t op, uint64_t Rn, uint64_t Rd);
void uzp_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t size, uint64_t Rm, uint64_t op, uint64_t Rn, uint64_t Rd);
void trn_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t size, uint64_t Rm, uint64_t op, uint64_t Rn, uint64_t Rd);
void tbl_jit(ssa_emit_context* ctx, uint64_t Q, uint64_t Rm, uint64_t len, uint64_t Rn, uint64_t Rd);
void floating_point_conditional_select_jit(ssa_emit_context* ctx, uint64_t ftype, uint64_t Rm, uint64_t cond, uint64_t Rn, uint64_t Rd);
void fcmp_jit(ssa_emit_context* ctx, uint64_t ftype, uint64_t Rm, uint64_t Rn, uint64_t opc);
void fccmp_jit(ssa_emit_context* ctx, uint64_t ftype, uint64_t Rm, uint64_t cond, uint64_t Rn, uint64_t nzcv);
ir_operand _x_jit(ssa_emit_context* ctx, uint64_t reg_id);//THIS FUNCTION IS USER DEFINED
void _x_jit(ssa_emit_context* ctx, uint64_t reg_id, ir_operand value);//THIS FUNCTION IS USER DEFINED
ir_operand _sys_jit(ssa_emit_context* ctx, uint64_t reg_id);//THIS FUNCTION IS USER DEFINED

View File

@@ -3,17 +3,16 @@
enum guest_compiler_optimization_flags
{
function_wide_translation = 1 << 0,
guest_optimize_ssa = 1 << 1,
guest_optimize_mathmatical_fold =(1 << 2) | guest_optimize_ssa,
guest_function_wide_translation = 1 << 0,
guest_optimize_basic_ssa = 1 << 1,
guest_optimize_group_ssa = 1 << 2,
use_flt = 1 << 3,
interpreted = 1 << 4,
level_zero = 0,
level_one = function_wide_translation,
level_two = function_wide_translation | guest_optimize_ssa,
level_three = function_wide_translation | guest_optimize_mathmatical_fold | use_flt
level_one = guest_function_wide_translation,
level_two = guest_function_wide_translation | guest_optimize_basic_ssa,
level_three = guest_function_wide_translation | guest_optimize_group_ssa | use_flt
};
#endif

View File

@@ -3,6 +3,111 @@
#include "guest_process.h"
#include "jit/jit_memory.h"
static void retranslate_functions_worker(guest_function_store* context, std::vector<retranslate_request>* to_retranslate, int index)
{
for (auto i : *to_retranslate)
{
translate_request_data process_context = i.process_context;
guest_function result = process_context.translate_function(&process_context,i.flags);
result.optimizations = i.flags;
if (i.flags & guest_compiler_optimization_flags::use_flt)
{
fast_function_table::insert_function(&context->native_function_table, i.address, result.jit_offset);
}
context->main_translate_lock.lock();
context->functions[i.address] = result;
context->main_translate_lock.unlock();
}
if (index == -1)
return;
context->retranslator_workers[index] = false;
delete to_retranslate;
}
static void retranslate_functions_master(guest_function_store* context)
{
context->retranslator_is_running = true;
context->retranslate_lock.lock();
auto to_retranslate = context->retranslate_requests;
context->retranslate_requests = std::vector<retranslate_request>();
context->retranslate_lock.unlock();
if (to_retranslate.size() == 0)
{
return;
}
int function_pool_count = to_retranslate.size() / THREAD_COUNT;
if (function_pool_count == 0)
{
retranslate_functions_worker(context, &to_retranslate, -1);
}
else
{
int current_thread = 0;
int global_place = 0;
while (true)
{
std::vector<retranslate_request>* current_pool = new std::vector<retranslate_request>();
for (; global_place < to_retranslate.size(); ++global_place)
{
current_pool->push_back(to_retranslate[global_place]);
if (global_place % function_pool_count == 0 && global_place != 0)
{
global_place++;
break;
}
}
context->retranslator_workers[current_thread] = true;
std::thread(retranslate_functions_worker, context, current_pool, current_thread).detach();
if (current_thread < THREAD_COUNT)
{
current_thread++;
}
if (global_place >= to_retranslate.size())
{
break;
}
}
}
context->retranslator_is_running = false;
}
static bool no_worker_retranslating(guest_function_store* context)
{
for (int i = 0; i < THREAD_COUNT; ++i)
{
if (context->retranslator_workers[i])
{
return true;
}
}
return true;
}
void guest_function_store::request_retranslate_function(guest_function_store* context, uint64_t address, guest_compiler_optimization_flags flags, translate_request_data process_context)
{
context->retranslate_lock.lock();
@@ -17,58 +122,14 @@ void guest_function_store::request_retranslate_function(guest_function_store* co
context->retranslate_lock.unlock();
if (!context->retranslator_is_running)
if (!context->retranslator_is_running && no_worker_retranslating(context))
{
context->retranslator_is_running = true;
std::thread(guest_function_store::retranslate_functions, context).detach();
std::thread(retranslate_functions_master, context).detach();
}
}
void guest_function_store::retranslate_functions(guest_function_store* context)
{
context->retranslator_is_running = true;
int rest = 0;
while (1)
{
context->retranslate_lock.lock();
auto to_retranslate = context->retranslate_requests;
context->retranslate_requests = std::vector<retranslate_request>();
context->retranslate_lock.unlock();
if (to_retranslate.size() == 0)
{
break;
}
for (auto i : to_retranslate)
{
translate_request_data process_context = i.process_context;
guest_function result = process_context.translate_function(&process_context,i.flags);
result.optimizations = i.flags;
if (i.flags & guest_compiler_optimization_flags::use_flt)
{
fast_function_table::insert_function(&context->native_function_table, i.address, result.jit_offset);
}
context->main_translate_lock.lock();
context->functions[i.address] = result;
context->main_translate_lock.unlock();
}
}
context->retranslator_is_running = false;
}
guest_function guest_function_store::get_or_translate_function(guest_function_store* context, uint64_t address, translate_request_data* process_context, bool incrament_usage_counter)
{
auto function_table = &context->native_function_table;

View File

@@ -5,12 +5,15 @@
#include <inttypes.h>
#include <mutex>
#include <vector>
#include <thread>
#include <atomic>
#include "translate_request_data.h"
#include "fast_function_table.h"
#include "guest_compiler_optimization_flags.h"
#include "translate_request_data.h"
#include <thread>
#define THREAD_COUNT 10
struct guest_function_store;
struct translate_request_data;
@@ -33,11 +36,12 @@ struct guest_function_store
std::vector<retranslate_request> retranslate_requests;
std::thread retranslator_thread;
bool retranslator_is_running;
bool retranslator_workers[THREAD_COUNT];
static void request_retranslate_function(guest_function_store* context, uint64_t address, guest_compiler_optimization_flags flags, translate_request_data process_context);
static void retranslate_functions(guest_function_store* context);
static guest_function get_or_translate_function(guest_function_store* context, uint64_t address, translate_request_data* process_context, bool incrament_usgae_counter = false);
static void destroy(guest_function_store* to_destory);
};

View File

@@ -11,23 +11,6 @@
#include <iostream>
#include <iomanip>
void guest_process::create(guest_process* result, guest_memory guest_memory_context, jit_context* host_jit_context, aarch64_context_offsets arm_guest_data)
{
result->guest_memory_context = guest_memory_context;
result->host_jit_context = host_jit_context;
result->guest_context_offset_data = arm_guest_data;
result->svc_function = nullptr;
result->undefined_instruction = nullptr;
result->debug_mode = false;
result->guest_functions.use_flt = true;
result->guest_functions.retranslator_is_running = false;
result->log_native = nullptr;
init_aarch64_decoder(result);
}
uint64_t guest_process::jit_function(guest_process* process, uint64_t guest_function_address, void* arm_context)
{
translate_request_data translator_request =
@@ -55,9 +38,9 @@ uint64_t guest_process::jit_function(guest_process* process, uint64_t guest_func
case level_one:
{
if (function_to_execute.times_executed == 10 && !process->debug_mode)
if (function_to_execute.times_executed == 50 && !process->debug_mode)
{
guest_function_store::request_retranslate_function(&process->guest_functions,guest_function_address, level_three, translator_request);
guest_function_store::request_retranslate_function(&process->guest_functions,guest_function_address, level_two, translator_request);
}
} break;
@@ -118,9 +101,9 @@ guest_function guest_process::translate_function(translate_request_data* data, g
aarch64_emit.translate_functions = true;
int instruction_limit = 10000;
int instruction_limit = INT32_MAX;
if ((flags & guest_compiler_optimization_flags::function_wide_translation) == 0)
if (!(flags & guest_compiler_optimization_flags::guest_function_wide_translation))
{
instruction_limit = 50;
}
@@ -131,6 +114,18 @@ guest_function guest_process::translate_function(translate_request_data* data, g
ir_operation_block::emitds(raw_ir, ir_move, ssa_emit.memory_base, ir_operand::create_con((uint64_t)process->guest_memory_context.base));
int backend_compiler_flags = (compiler_flags)0;
if (flags & guest_compiler_optimization_flags::guest_optimize_basic_ssa)
{
backend_compiler_flags = compiler_flags::optimize_basic_ssa;
}
if (flags & guest_compiler_optimization_flags::guest_optimize_group_ssa)
{
backend_compiler_flags = compiler_flags::optimize_group_pool_ssa;
}
while (true)
{
std::unordered_set<uint64_t> to_compile_que = aarch64_emit.basic_block_translate_que;
@@ -164,9 +159,12 @@ guest_function guest_process::translate_function(translate_request_data* data, g
uint32_t raw_instruction = *(uint32_t*)instruction_address;
auto instruction_table = fixed_length_decoder<uint32_t>::decode_fast(&process->decoder, raw_instruction);
auto instruction_table = fixed_length_decoder<uint32_t>::decode_fast(&process->fixed_length_decoder_context, raw_instruction);
ssa_emit_context::reset_local(&ssa_emit);
if (!(backend_compiler_flags & guest_compiler_optimization_flags::guest_optimize_basic_ssa))
{
ssa_emit_context::reset_local(&ssa_emit);
}
if (instruction_table == nullptr)
{
@@ -214,25 +212,13 @@ guest_function guest_process::translate_function(translate_request_data* data, g
aarch64_emit_context::emit_context_movement(&aarch64_emit);
int backend_compiler_flags = (compiler_flags)0;
if (flags & guest_compiler_optimization_flags::guest_optimize_ssa)
{
backend_compiler_flags = compiler_flags::optimize_ssa;
}
if (flags & guest_compiler_optimization_flags::guest_optimize_mathmatical_fold)
{
backend_compiler_flags = compiler_flags::optimize_ssa | compiler_flags::mathmatical_fold;
}
uint64_t code_size;
void* code = jit_context::compile_code(process->host_jit_context, raw_ir,(compiler_flags)backend_compiler_flags, &code_size);
if (((guest_process*)data->process)->log_native != nullptr && flags == guest_compiler_optimization_flags::level_three)
{
//((void(*)(void*, int))((guest_process*)data->process)->log_native)(code, code_size);
((void(*)(void*, int))((guest_process*)data->process)->log_native)(code, code_size);
}
guest_function result;
@@ -269,7 +255,7 @@ uint64_t guest_process::interperate_function(guest_process* process, uint64_t gu
interpreter.current_instruction = instruction;
auto table = fixed_length_decoder<uint32_t>::decode_fast(&process->decoder, instruction);
auto table = fixed_length_decoder<uint32_t>::decode_fast(&process->fixed_length_decoder_context, instruction);
if (table == nullptr)
{
@@ -318,6 +304,52 @@ void guest_process::create(guest_process* result, guest_memory memory, jit_conte
}
}
void guest_process::create_guest_process(guest_process* result, guest_memory guest_memory_context, jit_context* host_jit_context, void* context_data, int context_data_size, cpu_type cpu, cpu_size size, memory_order order)
{
result->guest_memory_context = guest_memory_context;
result->host_jit_context = host_jit_context;
if (context_data != nullptr)
{
memcpy(result->guest_context_data, context_data, context_data_size);
}
result->svc_function = nullptr;
result->undefined_instruction = nullptr;
result->debug_mode = false;
result->guest_functions.use_flt = true;
memset(result->guest_functions.retranslator_workers, 0, sizeof(guest_function_store::retranslator_workers));
result->guest_functions.retranslator_is_running = false;
result->log_native = nullptr;
switch (cpu)
{
case arm:
{
switch (size)
{
case _64_bit:
{
init_aarch64_decoder(result);
}; break;
default:
{
throw_error();
}; break;
}
}; break;
default:
{
throw_error();
}; break;
}
}
void guest_process::destroy(guest_process* process)
{
guest_function_store::destroy(&process->guest_functions);

View File

@@ -17,23 +17,26 @@ struct guest_process
guest_memory guest_memory_context;
jit_context* host_jit_context;
guest_function_store guest_functions;
fixed_length_decoder<uint32_t> decoder;
fixed_length_decoder<uint32_t> fixed_length_decoder_context;
aarch64_context_offsets guest_context_offset_data;
uint8_t guest_context_data[1024];
void* svc_function;
void* counter_function;
void* undefined_instruction;
bool debug_mode;
void* log_native;
bool debug_mode;
void* interperate_function_reference;
void* jit_function_reference;
cpu_type process_type;
cpu_size process_size;
memory_order process_memory_order;
void* process_data;
static void create(guest_process* result, guest_memory guest_memory_context, jit_context* host_jit_context, aarch64_context_offsets arm_guest_data);
static uint64_t jit_function(guest_process* process, uint64_t guest_function, void* arm_context);
static uint64_t interperate_function(guest_process* process, uint64_t guest_function, void* arm_context, bool* is_running, bool exit_on_long_branch = false);
@@ -41,6 +44,8 @@ struct guest_process
static void create(guest_process* result,guest_memory memory,jit_context* jit, cpu_type process_type, cpu_size process_size, memory_order process_memory_order);
static void destroy(guest_process* process);
static void create_guest_process(guest_process* result, guest_memory guest_memory_context, jit_context* host_jit_context, void* context_data, int context_data_size, cpu_type cpu, cpu_size size, memory_order order);
};
#endif

View File

@@ -34,7 +34,10 @@ extern "C"
result->process.log_native = nullptr;
jit_context::create(&result->memory, 5ULL * 1024 * 1024 * 1024, get_abi());
guest_process::create(&result->process, {memory, base_plus_va, base_plus_va_jit}, &result->memory, *context_offsets);
guest_memory guest_memory_context = {memory, base_plus_va, base_plus_va_jit};
guest_process::create_guest_process(&result->process, guest_memory_context, &result->memory, context_offsets, sizeof(aarch64_context_offsets), cpu_type::arm, cpu_size::_64_bit, memory_order::little_endian);
result->process.undefined_instruction = undefined_instruction;
result->process.svc_function = svc;
@@ -42,6 +45,11 @@ extern "C"
return result;
}
EXPORT void set_log_native(external_context* context, void* log_native)
{
context->process.log_native = log_native;
}
EXPORT void destroy_rem_context(external_context* context)
{
@@ -69,6 +77,8 @@ extern "C"
EXPORT void invalidate_jit_region(external_context* context, uint64_t address, uint64_t size)
{
context->process.guest_functions.main_translate_lock.lock();
for (uint64_t i = 0; i < size; i += 4)
{
uint64_t working_address = address + i;
@@ -76,5 +86,7 @@ extern "C"
fast_function_table::insert_function(&context->process.guest_functions.native_function_table, working_address, -1);
context->process.guest_functions.functions.erase(working_address);
}
context->process.guest_functions.main_translate_lock.unlock();
}
}

View File

@@ -1,5 +1,6 @@
#include "basic_register_allocator.h"
#include "debugging.h"
#include "tools/misc_tools.h"
struct save_state_group
{
@@ -17,6 +18,12 @@ struct save_state_group
}
};
enum known_global_data
{
base_register = 1 << 0,
vector_register = 1 << 1,
};
static void create_register_save_state(ir_control_flow_node* node, basic_register_allocator_context* context, std::unordered_map<ir_control_flow_node*, save_state_group>* groups)
{
save_state_group group = save_state_group::create(context);
@@ -236,11 +243,11 @@ static void unlock_all_basic(register_allocator_module* context)
}
}
static void unload_all(register_allocator_module* context, bool is_quet)
static void unload_all(register_allocator_module* context, bool is_quet, bool is_branch)
{
for (int i = 0; i < context->host_register_count; ++i)
{
register_allocator_module::emit_host_unload(context, i, is_quet);
register_allocator_module::emit_host_unload(context, i, is_quet, is_branch);
}
}
@@ -250,10 +257,10 @@ static void unlock_all_basic(basic_register_allocator_context* context)
unlock_all_basic(context->vec_allocator);
}
static void unload_all(basic_register_allocator_context* context, bool is_quet = false)
static void unload_all(basic_register_allocator_context* context, bool is_quet = false, bool is_branch = false)
{
unload_all(context->gp_allocator, is_quet);
unload_all(context->vec_allocator, is_quet);
unload_all(context->gp_allocator, is_quet, is_branch);
unload_all(context->vec_allocator, is_quet, is_branch);
}
void host_register::set_lock_bit(host_register* guest, lock_mode mode)
@@ -302,7 +309,12 @@ void register_allocator_module::emit_host_load(register_allocator_module* module
}
}
void register_allocator_module::emit_host_unload(register_allocator_module* module, int host_index, bool is_quet)
static bool is_vector_module(register_allocator_module* module)
{
return module->guest_type >= int128;
}
void register_allocator_module::emit_host_unload(register_allocator_module* module, int host_index, bool is_quet, bool is_branch)
{
ir_operation_block* result_ir = module->allocator_unit->result_ir;
host_register* working_host = &module->host_registers[host_index];
@@ -318,8 +330,15 @@ void register_allocator_module::emit_host_unload(register_allocator_module* modu
}
int type_byte_count = 8 << module->guest_type;
bool ignore = false;
if (is_branch && module->allocator_unit->use_lrsa_hints)
{
ignore = !in_set(&module->allocator_unit->current_lrsa_known_globals, working_host->guest_offset);
}
if (working_host->working_mode & register_mode::write && !is_quet)
if (working_host->working_mode & register_mode::write && !is_quet && !ignore)
{
ir_operand offset = ir_operand::create_con(working_host->guest_offset);
ir_operand to_store = ir_operand::create_reg(working_host->host_index, module->guest_type);
@@ -333,11 +352,11 @@ void register_allocator_module::emit_host_unload(register_allocator_module* modu
working_host->hits = 0;
}
static void unload_basic(basic_register_allocator_context* result_register_allocator, bool is_quiet = false)
static void unload_basic(basic_register_allocator_context* result_register_allocator, bool is_quiet = false, bool is_branch = false)
{
unlock_all_basic(result_register_allocator);
unload_all(result_register_allocator, is_quiet);
unload_all(result_register_allocator, is_quiet, is_branch);
}
static void emit_basic_block(basic_register_allocator_context* result_register_allocator, ir_operation_block* result_ir, ir_control_flow_node* node, std::unordered_map<ir_control_flow_node*, save_state_group>* save_state_groups)
@@ -366,6 +385,34 @@ static void emit_basic_block(basic_register_allocator_context* result_register_a
init_p_unlock(result_register_allocator, working_operation.sources[0]);
}; break;
case ir_instructions::ir_register_allocator_hint_global:
{
if (!result_register_allocator->use_lrsa_hints)
{
throw_error();
}
ir_operation* working_operation = &i->data;
result_register_allocator->current_lrsa_known_globals.clear();
for (int o = 0; o < working_operation->sources.count; ++o)
{
ir_operand operand = working_operation->sources[o];
register_allocator_module* module = get_module(result_register_allocator, operand);
int offset = get_guest_offset(module, operand.value);
if (in_set(&result_register_allocator->current_lrsa_known_globals, offset))
{
continue;
}
result_register_allocator->current_lrsa_known_globals.insert(offset);
}
}; break;
default:
{
const int stack_max = 10;
@@ -382,8 +429,6 @@ static void emit_basic_block(basic_register_allocator_context* result_register_a
{
unload_basic(result_register_allocator);
}
bool skip = false;
if (i == node->final_instruction)
{
@@ -402,7 +447,7 @@ static void emit_basic_block(basic_register_allocator_context* result_register_a
is_quiet = true;
}
unload_basic(result_register_allocator, is_quiet);
unload_basic(result_register_allocator, is_quiet, true);
}; break;
case 2:
@@ -420,7 +465,7 @@ static void emit_basic_block(basic_register_allocator_context* result_register_a
is_quiet = true;
}
unload_basic(result_register_allocator, is_quiet);
unload_basic(result_register_allocator, is_quiet, true);
}; break;
case 0:
@@ -435,19 +480,18 @@ static void emit_basic_block(basic_register_allocator_context* result_register_a
}
}
if (!skip)
{
ir_operation_block::emit_with(result_ir, instruction, new_destinations, working_operation.destinations.count, new_sources, working_operation.sources.count);
}
ir_operation_block::emit_with(result_ir, instruction, new_destinations, working_operation.destinations.count, new_sources, working_operation.sources.count);
}; break;
}
}
}
void basic_register_allocator_context::run_pass(basic_register_allocator_context* result_register_allocator, ir_operation_block* result_ir, ir_operation_block* pre_allocated_code, int gp_count, guest_data gp_data, int vec_count, guest_data vec_data, ir_operand context_register)
void basic_register_allocator_context::run_pass(basic_register_allocator_context* result_register_allocator, ir_operation_block* result_ir, ir_operation_block* pre_allocated_code, int gp_count, guest_data gp_data, int vec_count, guest_data vec_data, ir_operand context_register, bool use_lrsa_hints)
{
arena_allocator* allocator = result_ir->allocator;
result_register_allocator->use_lrsa_hints = use_lrsa_hints;
result_register_allocator->gp_allocator = create_allocator_module(result_register_allocator, allocator, gp_count, gp_data.guest_count, gp_data.guest_type);
result_register_allocator->vec_allocator = create_allocator_module(result_register_allocator, allocator, vec_count, vec_data.guest_count, vec_data.guest_type);

View File

@@ -2,6 +2,7 @@
#define BASIC_REGISTER_ALLOCATOR_H
#include "ir.h"
#include <unordered_set>
struct basic_register_allocator_context;
struct register_allocator_module;
@@ -60,7 +61,7 @@ struct register_allocator_module
uint64_t stack_offset;
static void emit_host_load(register_allocator_module* module, int host_register, int guest_offset, register_mode mode);
static void emit_host_unload(register_allocator_module* module, int host_register, bool is_quet = false);
static void emit_host_unload(register_allocator_module* module, int host_register, bool is_quet = false, bool is_branch = false);
};
struct module_save_state
@@ -92,12 +93,14 @@ struct basic_register_allocator_context
{
register_allocator_module* gp_allocator;
register_allocator_module* vec_allocator;
std::unordered_set<int> current_lrsa_known_globals;
ir_operation_block* result_ir;
ir_operand context_register;
bool use_lrsa_hints;
static void run_pass(basic_register_allocator_context* result_register_allocator, ir_operation_block* result_ir, ir_operation_block* pre_allocated_code, int gp_count, guest_data gp_data, int vec_count, guest_data vec_data, ir_operand context_register);
static void run_pass(basic_register_allocator_context* result_register_allocator, ir_operation_block* result_ir, ir_operation_block* pre_allocated_code, int gp_count, guest_data gp_data, int vec_count, guest_data vec_data, ir_operand context_register, bool use_lrsa_hints);
};
#endif

View File

@@ -76,6 +76,20 @@ enum ir_instructions : uint64_t
ir_ternary_begin,
ir_conditional_select,
/*
ir_conditional_select_equal,
ir_conditional_select_not_equal,
ir_conditional_select_less_signed,
ir_conditional_select_less_unsigned,
ir_conditional_select_greater_signed,
ir_conditional_select_greater_unsigned,
ir_conditional_select_less_equal_signed,
ir_conditional_select_less_equal_unsigned,
ir_conditional_select_greater_equal_signed,
ir_conditional_select_greater_equal_unsigned,
*/
ir_double_shift_right,
ir_ternary_end,
@@ -110,6 +124,7 @@ enum ir_instructions : uint64_t
ir_open_context,
ir_register_allocator_p_lock,
ir_register_allocator_p_unlock,
ir_register_allocator_hint_global,
ir_ssa_phi,
//Vectors
@@ -301,6 +316,7 @@ static std::string instruction_names[] = {
"ir_open_context",
"ir_register_allocator_p_lock",
"ir_register_allocator_p_unlock",
"ir_register_allocator_hint_global",
"ir_ssa_phi",
//Vectors

View File

@@ -7,6 +7,7 @@
#include <unordered_map>
struct register_lifetime;
struct lrsa_node;
struct register_lifetime
{
@@ -17,8 +18,11 @@ struct register_lifetime
int first_use_after_birth_location;
register_lifetime* birth_lifetime;
ir_operation* birth_instruction;
bool is_vector;
ir_operation* birth_instruction;
lrsa_node* birth_node;
lrsa_node* death_node;
static bool in_interval(register_lifetime* lifetime, int time)
{
@@ -48,7 +52,7 @@ struct register_lifetime
context->birth_instruction = birth_instruction;
}
static register_lifetime create(uint64_t source_register, int time, ir_operation* birth_instruction)
static register_lifetime create(uint64_t source_register, int time, ir_operation* birth_instruction, lrsa_node* birth_node)
{
register_lifetime result;
@@ -56,6 +60,7 @@ struct register_lifetime
result.birth = time;
result.death = time;
result.first_use_after_birth_location = result.birth;
result.birth_node = birth_node;
set_birth_instruction(&result, birth_instruction);
@@ -65,19 +70,33 @@ struct register_lifetime
struct loop_data
{
int check_location;
int loop_end;
int check_location;
lrsa_node* loop_end_node;
};
struct known_global
{
uint64_t reg;
bool is_vector;
known_global()
{
//
}
};
struct lrsa_node
{
ir_control_flow_node* raw_node;
int start_time;
int end_time;
ir_control_flow_node* raw_node;
intrusive_linked_list_element<ir_control_flow_node*>* raw_node_element;
int start_time;
int end_time;
std::vector<known_global> known_globals;
};
static void find_lifetimes_first_last(std::unordered_map<uint64_t, register_lifetime>* lifetimes,ir_operand* operands, ir_operation* working_operation, int operand_count, int time, int is_source)
static void find_lifetimes_first_last(std::unordered_map<uint64_t, register_lifetime>* lifetimes,ir_operand* operands, ir_operation* working_operation, int operand_count, int time, int is_source, lrsa_node* working_node)
{
for (int i = 0; i < operand_count; ++i)
{
@@ -94,11 +113,14 @@ static void find_lifetimes_first_last(std::unordered_map<uint64_t, register_life
if (!in_map(lifetimes, working_register))
{
(*lifetimes)[working_register] = register_lifetime::create(working_register, time, working_operation);
(*lifetimes)[working_register] = register_lifetime::create(working_register, time, working_operation, working_node);
}
lifetime_reference = &(*lifetimes)[working_register];
lifetime_reference->is_vector = ir_operand::is_vector(&working);
lifetime_reference->death_node = working_node;
lifetime_reference->death = time;
if (is_source && lifetime_reference->death > lifetime_reference->birth)
@@ -139,11 +161,10 @@ int find_and_use_slot(bool* slots, int max)
void linier_scan_register_allocator_pass(ir_control_flow_graph* cfg)
{
std::unordered_map<uint64_t, register_lifetime> all_intervals;
std::unordered_map<ir_control_flow_node*, lrsa_node*> node_map;
int time = 0;
std::unordered_map<ir_control_flow_node*, lrsa_node*> node_map;
int node_count = 0;
for (auto i = cfg->linier_nodes->first; i != nullptr; i = i->next)
@@ -163,12 +184,13 @@ void linier_scan_register_allocator_pass(ir_control_flow_graph* cfg)
if (i->data == nullptr)
continue;
lrsa_node* temp_node = &working_nodes[node_count];
lrsa_node* working_node = &working_nodes[node_count];
temp_node->start_time = time;
temp_node->raw_node = i->data;
working_node->start_time = time;
working_node->raw_node = i->data;
working_node->raw_node_element = i;
node_map[i->data] = temp_node;
node_map[i->data] = working_node;
auto raw_node = i->data;
@@ -176,13 +198,13 @@ void linier_scan_register_allocator_pass(ir_control_flow_graph* cfg)
{
ir_operation* working_operation = &ins->data;
find_lifetimes_first_last(&all_intervals, working_operation->destinations.data, working_operation,working_operation->destinations.count, time, false);
find_lifetimes_first_last(&all_intervals, working_operation->sources.data, working_operation,working_operation->sources.count, time, true);
find_lifetimes_first_last(&all_intervals, working_operation->destinations.data, working_operation,working_operation->destinations.count, time, false, working_node);
find_lifetimes_first_last(&all_intervals, working_operation->sources.data, working_operation,working_operation->sources.count, time, true, working_node);
time++;
}
temp_node->end_time = time;
working_node->end_time = time;
node_count++;
}
@@ -209,7 +231,8 @@ void linier_scan_register_allocator_pass(ir_control_flow_graph* cfg)
loop_data this_loop;
this_loop.check_location = working_jump->start_time;
this_loop.loop_end = working_node->end_time;
this_loop.loop_end_node = working_node;
loops.push_back(this_loop);
}
@@ -226,9 +249,12 @@ void linier_scan_register_allocator_pass(ir_control_flow_graph* cfg)
if (!register_lifetime::in_interval(working_lifetime,loop->check_location))
continue;
if (loop->loop_end > working_lifetime->death)
int loop_end_time = loop->loop_end_node->end_time;
if (loop_end_time > working_lifetime->death)
{
working_lifetime->death = loop->loop_end;
working_lifetime->death = loop_end_time;
working_lifetime->death_node = loop->loop_end_node;
}
}
}
@@ -244,10 +270,51 @@ void linier_scan_register_allocator_pass(ir_control_flow_graph* cfg)
for (auto i : all_intervals)
{
register_lifetime lifetime = i.second;
register_lifetime* lifetime = &all_intervals[i.first];
births[lifetime.birth].push_back(lifetime.source_register);
deaths[lifetime.death].push_back(lifetime.source_register);
births[lifetime->birth].push_back(lifetime->source_register);
deaths[lifetime->death].push_back(lifetime->source_register);
if (lifetime->birth_node == lifetime->death_node)
continue;
lrsa_node* birth = lifetime->birth_node;
lrsa_node* death = lifetime->death_node;
for (auto element = birth->raw_node_element; element != death->raw_node_element->next; element = element->next)
{
lrsa_node* working_node = node_map[element->data];
known_global data;
data.is_vector = i.second.is_vector;
data.reg = i.first;
working_node->known_globals.push_back(data);
}
}
for (int i = 0; i < node_count; ++i)
{
lrsa_node* working_node = &working_nodes[i];
int source_count = working_node->known_globals.size();
if (source_count == 0)
continue;
ir_operand sources[source_count];
for (int o = 0; o < source_count; ++o)
{
known_global data = working_node->known_globals[o];
ir_operand source = ir_operand::create_reg(data.reg, int64 + data.is_vector);
sources[o] = source;
}
ir_operation_block::emit_with(cfg->source_ir, ir_register_allocator_hint_global, nullptr, 0, sources, source_count, working_node->raw_node->entry_instruction);
}
std::unordered_map<uint64_t, uint64_t> working_remap;

View File

@@ -47,6 +47,38 @@ struct ssa_context
std::vector<global_usage_location*> global_usage_pool;
};
static bool unpredictable_96_bits(ir_instructions instruction)
{
switch (instruction)
{
case x86_addss:
case x86_divss:
case x86_maxss:
case x86_minss:
case x86_mulss:
case x86_subss:
return true;
}
return false;
}
static bool unpredictable_64_bits(ir_instructions instruction)
{
switch (instruction)
{
case x86_addsd:
case x86_divsd:
case x86_maxsd:
case x86_minsd:
case x86_mulsd:
case x86_subsd:
return true;
}
return false;
}
static void connect_global_usages(global_usage_location* a, global_usage_location* b)
{
intrusive_linked_list<global_usage_location*>::insert_element(a->connections, b);
@@ -119,29 +151,36 @@ static void destroy_ssa_context(ssa_context* to_destroy)
}
}
static bool look_for_and_connect_global_usage(ssa_node* look_at_node,global_usage_location* to_connect, uint64_t look_for_register, int time, bool is_global)
static bool look_for_and_connect_global_usage(ssa_node* node,global_usage_location* to_connect, uint64_t look_for_register, int time, bool is_global)
{
if (!in_set(&look_at_node->declared_in_block, look_for_register))
if (!in_set(&node->declared_in_block, look_for_register))
{
if (in_map(&node->cached_global_declarations, look_for_register))
{
connect_global_usages(to_connect,node->cached_global_declarations[look_for_register]);
return true;
}
return false;
}
if (is_global)
{
if (!in_map(&look_at_node->last_declared, look_for_register))
if (!in_map(&node->last_declared, look_for_register))
{
throw_error();
}
time = look_at_node->last_declared[look_for_register];
time = node->last_declared[look_for_register];
}
for (; time != -1; -- time)
{
if (!in_map(&look_at_node->declarations_global_info[time], look_for_register))
if (!in_map(&node->declarations_global_info[time], look_for_register))
continue;
global_usage_location* working_connection = look_at_node->declarations_global_info[time][look_for_register];
global_usage_location* working_connection = node->declarations_global_info[time][look_for_register];
connect_global_usages(working_connection, to_connect);
@@ -169,6 +208,8 @@ static void find_register_in_parents(ssa_node* look_at_node, uint64_t look_for_r
{
find_register_in_parents(i, look_for_register, to_connect, visited);
}
look_at_node->cached_global_declarations[look_for_register] = to_connect;
}
static void find_same_register_pools(ssa_node* node)
@@ -197,13 +238,6 @@ static void find_same_register_pools(ssa_node* node)
continue;
}
if (in_map(&node->cached_global_declarations, current_source->value))
{
connect_global_usages(this_global_usage,node->cached_global_declarations[current_source->value]);
continue;
}
this_global_usage->is_global = true;
for (auto inlet : node->inlets)
@@ -284,6 +318,57 @@ static void remap_global_usage(global_usage_location* to_remap, std::unordered_s
}
}
static uint64_t find_and_remap_basic_block_globals(ssa_context* context)
{
std::unordered_map<uint64_t, std::unordered_set<ssa_node*>> global_register_usage;
for (auto node : context->ssa_nodes)
{
auto raw_node = node->raw_node;
for (auto ins = raw_node->entry_instruction; ins != raw_node->final_instruction->next; ins = ins->next)
{
ir_operation* working_instruction = &ins->data;
for (int o = 0; o < working_instruction->destinations.count; ++o)
{
ir_operand working_operand = working_instruction->destinations[o];
global_register_usage[working_operand.value].insert(node);
}
for (int o = 0; o < working_instruction->sources.count; ++o)
{
ir_operand working_operand = working_instruction->sources[o];
if (ir_operand::is_constant(&working_operand))
{
continue;
}
global_register_usage[working_operand.value].insert(node);
}
}
}
std::unordered_map<uint64_t, uint64_t> global_remap;
int count = 0;
for (auto o : global_register_usage)
{
if (o.second.size() <= 1)
continue;
global_remap[o.first] = count;
count++;
}
ir_operation_block::ssa_remap(context->ir, &global_remap);
return count;
}
static uint64_t find_and_remap_true_globals(ssa_context* context)
{
for (auto i : context->ssa_nodes)
@@ -343,6 +428,19 @@ static bool is_global(ssa_context* context, ir_operand working_register)
return is_global(context, working_register.value);
}
static bool any_global(ssa_context* context, ir_operand* registers, int count)
{
for (int i = 0; i < count; ++i)
{
if (is_global(context, registers[i]))
{
return true;
}
}
return false;
}
uint64_t look_for_local_at_time(ssa_node* node, int time, uint64_t to_look_for)
{
for (; time != -1; time--)
@@ -515,6 +613,11 @@ static bool optimize_math(ssa_node* working_node)
ir_operand* des = working_operation->destinations.data;
ir_operand* src = working_operation->sources.data;
if (instruction_is_commutative(working_operation->instruction) && working_operation->sources.count == 2 && ir_operand::is_constant(&src[0]) && ir_operand::is_register(&src[1]))
{
swap_operands(&working_operation->sources[0], &working_operation->sources[1]);
}
switch (working_operation->instruction)
{
case ir_bitwise_and:
@@ -539,6 +642,53 @@ static bool optimize_math(ssa_node* working_node)
is_done = false;
}
else if (check_constant(src[1]))
{
switch (src[1].value)
{
case UINT8_MAX:
{
working_operation->instruction = ir_move;
working_operation->sources.count = 1;
des[0] = ir_operand::copy_new_raw_size(des[0], int8);
src[0] = ir_operand::copy_new_raw_size(src[0], int8);
is_done = false;
}; break;
case UINT16_MAX:
{
working_operation->instruction = ir_move;
working_operation->sources.count = 1;
des[0] = ir_operand::copy_new_raw_size(des[0], int16);
src[0] = ir_operand::copy_new_raw_size(src[0], int16);
is_done = false;
}; break;
case UINT32_MAX:
{
working_operation->instruction = ir_move;
working_operation->sources.count = 1;
des[0] = ir_operand::copy_new_raw_size(des[0], int32);
src[0] = ir_operand::copy_new_raw_size(src[0], int32);
is_done = false;
}; break;
case UINT64_MAX:
{
convert_to_move(working_operation, src[0]);
is_done = false;
}; break;
default:
break;
}
}
}; break;
case ir_multiply:
@@ -549,6 +699,26 @@ static bool optimize_math(ssa_node* working_node)
convert_to_move(working_operation, ir_operand::create_con(value, des[0].meta_data));
is_done = false;
}
else if (check_constant(src[1], 0))
{
convert_to_move(working_operation, ir_operand::create_con(0));
is_done = false;
}
else if (check_constant(src[1], 1))
{
convert_to_move(working_operation, src[0]);
is_done = false;
}
else if (check_constant(src[1], 2))
{
working_operation->instruction = ir_shift_left;
src[1] = ir_operand::create_con(1, src[1].meta_data);
is_done = false;
}
}; break;
@@ -700,7 +870,6 @@ static bool optimize_math(ssa_node* working_node)
is_done = false;
}
}; break;
case ir_decrament:
@@ -953,11 +1122,6 @@ static bool optimize_multiple_instructions(ssa_node* working_node)
ir_operand* src = working_operation->sources.data;
ir_operand* des = working_operation->destinations.data;
if (instruction_is_commutative(working_operation->instruction) && working_operation->sources.count == 2 && ir_operand::is_constant(&src[0]) && ir_operand::is_register(&src[1]))
{
swap_operands(&working_operation->sources[0], &working_operation->sources[1]);
}
switch (working_operation->instruction)
{
case ir_zero_extend:
@@ -992,6 +1156,129 @@ static bool optimize_multiple_instructions(ssa_node* working_node)
is_done = false;
}; break;
case x86_sub_flags:
{
ir_operand result = des[0];
ir_operand n = des[1];
ir_operand z = des[2];
ir_operand c = des[3];
ir_operand v = des[4];
if (any_global(context, des, 5))
{
continue;
}
if (usage_count[result.value] > 0)
{
continue;
}
if (
usage_count[n.value] == 0 &&
usage_count[z.value] == 1 &&
usage_count[c.value] == 0 &&
usage_count[v.value] == 0
)
{
ir_operation* z_usage_instruction = nullptr;
for (auto look = i->next; look != raw_node->final_instruction->next; look = look->next)
{
ir_operation* check_instruction = &look->data;
if (check_for_register_in_instruction(check_instruction, z.value))
{
z_usage_instruction = check_instruction;
break;
}
}
if (z_usage_instruction == nullptr)
{
throw_error();
}
if (z_usage_instruction->instruction != ir_compare_equal && z_usage_instruction->instruction != ir_compare_not_equal)
{
continue;
}
if (!check_constant(src[1], 1))
{
continue;
}
ir_operand destination = z_usage_instruction->destinations.data[0];
working_operation->destinations.count = 1;
des[0] = ir_operand::copy_new_raw_size(destination, des[0].meta_data);
working_operation->instruction = z_usage_instruction->instruction;
nop_operation(z_usage_instruction);
is_done = false;
}
else if (
usage_count[n.value] == 0 &&
usage_count[z.value] == 0 &&
usage_count[c.value] == 1 &&
usage_count[v.value] == 0
)
{
ir_operation* c_usage_instruction = nullptr;
for (auto check = i->next; check != raw_node->final_instruction->next; check = check->next)
{
ir_operation* check_instruction = &check->data;
if (check_for_register_in_instruction(check_instruction, c.value))
{
c_usage_instruction = check_instruction;
break;
}
}
if (c_usage_instruction == nullptr)
{
throw_error();
}
ir_instructions replacement_instruction;
switch (c_usage_instruction->instruction)
{
case ir_compare_equal: replacement_instruction = ir_compare_greater_equal_unsigned; break;
case ir_compare_not_equal: replacement_instruction = ir_compare_less_unsigned; break;
default:
{
continue;
}; break;
}
if (!check_constant(src[1], 1))
{
continue;
}
ir_operand destination = c_usage_instruction->destinations.data[0];
working_operation->destinations.count = 1;
des[0] = ir_operand::copy_new_raw_size(destination, des[0].meta_data);
working_operation->instruction = replacement_instruction;
nop_operation(c_usage_instruction);
is_done = false;
}
}; break;
case ir_bitwise_exclusive_or:
{
if (ir_operand::is_register(&src[0]) && check_constant(src[1], 1))
@@ -1192,14 +1479,16 @@ static bool optimize_multiple_instructions(ssa_node* working_node)
if (replace_candidate->value == source_operand.value)
{
if (replace_candidate->meta_data != source_operand.meta_data)
{
//is_valid = false;
if (ir_operand::get_raw_size(replace_candidate) > ir_operand::get_raw_size(&source_operand))
{
//IN A LOT OF CACES, THIS CAN BE IGNORED
//TODO, FIND THOSE CASES
//break;
is_valid = false;
working_operation->instruction = ir_zero_extend;
break;
}
replace_candidate->value = destination_operand.value;
@@ -1293,6 +1582,18 @@ static bool optimize_multiple_instructions(ssa_node* working_node)
}; break;
default:
if (unpredictable_96_bits(working_operation->instruction))
{
ir_operand destination_operand = des[0];
if (is_global(context,destination_operand))
{
break;
}
}
break;
}
}
@@ -1333,13 +1634,16 @@ static bool optimize_local_moves(ssa_node* working_node)
bool is_zero_extend_check = false;
for (auto r = i->next; r != raw_node->final_instruction->next; r = r->next)
if (!ir_operand::is_constant(&to_replace))
{
if (check_if_zero_extend(&r->data, destination))
for (auto r = i->next; r != raw_node->final_instruction->next; r = r->next)
{
is_zero_extend_check = true;
if (check_if_zero_extend(&r->data, destination))
{
is_zero_extend_check = true;
break;
break;
}
}
}
@@ -1363,7 +1667,7 @@ static bool optimize_local_moves(ssa_node* working_node)
return is_done;
}
void convert_to_ssa(ir_operation_block* ir, bool optimize)
void convert_to_ssa(ir_operation_block* ir, compiler_flags flags)
{
ir_operation_block::clamp_operands(ir, true);
@@ -1373,13 +1677,24 @@ void convert_to_ssa(ir_operation_block* ir, bool optimize)
create_time_stamped_declaration_info(&ssa);
ssa.global_top = find_and_remap_true_globals(&ssa);
if (flags & compiler_flags::optimize_group_pool_ssa)
{
ssa.global_top = find_and_remap_true_globals(&ssa);
}
else if (flags & compiler_flags::optimize_basic_ssa)
{
ssa.global_top = find_and_remap_basic_block_globals(&ssa);
}
else
{
throw_error();
}
redifine_destinations(&ssa);
redifine_sources(&ssa);
while (optimize)
while (true)
{
bool is_done = true;
@@ -1399,11 +1714,9 @@ void convert_to_ssa(ir_operation_block* ir, bool optimize)
}
}
//ir_operation_block::log(ssa.ir);
//std::cin.get();
linier_scan_register_allocator_pass(ssa.cfg);
destroy_ssa_context(&ssa);
//ir_operation_block::log(ir);
}

View File

@@ -2,10 +2,11 @@
#define SSA_H
#include "ir.h"
#include "assembly/universal_flags.h"
#include <unordered_set>
#include <vector>
void convert_to_ssa(ir_operation_block* ir, bool optimize);
void convert_to_ssa(ir_operation_block* ir, compiler_flags flags);
#endif

View File

@@ -11,9 +11,9 @@ void growing_jit_cache::create(growing_jit_cache* result, jit_memory* memory)
result->memory = memory;
}
void* growing_jit_cache::allocate(growing_jit_cache* jit_cache, uint64_t size)
uint64_t growing_jit_cache::allocate(growing_jit_cache* jit_cache, uint64_t size)
{
void* result = (char*)jit_cache->memory->raw_memory_block + jit_cache->top;
void* result = (void*)jit_cache->top;
jit_cache->top += size;
@@ -22,18 +22,18 @@ void* growing_jit_cache::allocate(growing_jit_cache* jit_cache, uint64_t size)
throw_error();
}
return result;
return (uint64_t)result;
}
void* growing_jit_cache::append_code(growing_jit_cache* jit_cache, void* code, uint64_t size)
{
jit_cache->lock.lock();
void* result = allocate(jit_cache, size);
uint64_t result = allocate(jit_cache, size);
memcpy(result, code, size);
jit_memory::coppy_over(jit_cache->memory, (uint64_t)result, code, size);
jit_cache->lock.unlock();
return result;
return (void*)(result + (uint64_t)jit_cache->memory->raw_memory_block);
}

View File

@@ -16,7 +16,7 @@ struct growing_jit_cache
static void create(growing_jit_cache* result, jit_memory* memory);
static void* append_code(growing_jit_cache* jit_cache, void* code, uint64_t size);
static void* allocate(growing_jit_cache* jit_cache, uint64_t size);
static uint64_t allocate(growing_jit_cache* jit_cache, uint64_t size);
};
#endif

View File

@@ -6,6 +6,8 @@
#include "assembly/x86/x86_pipeline.h"
#include "assembly/x86/x86_assembler.h"
#include "assembly/aarch64/aarch64_assembler.h"
#include "assembly/aarch64/aarch64_pipeline.h"
typedef uint64_t (*abi_caller_function)(void*, uint64_t*);
@@ -13,10 +15,12 @@ static void create_x86_caller(jit_context* result)
{
uint64_t max_space = 400;
void* caller_code_space = growing_jit_cache::allocate(&result->jit_cache, max_space);
char caller_buffer[max_space];
uint64_t code_size;
assemble_x86_abi_caller_code(caller_code_space, &code_size, result->jit_cache.memory->host_abi);
assemble_x86_abi_caller_code(caller_buffer, &code_size, result->jit_cache.memory->host_abi);
growing_jit_cache::append_code(&result->jit_cache, caller_buffer, max_space);
if (code_size > max_space)
{
@@ -38,6 +42,11 @@ void jit_context::create(jit_context* result,uint64_t allocation_size, abi abi_i
create_x86_caller(result);
}; break;
case arm_64:
{
create_aarch64_caller(result, abi_information);
}; break;
default:
{
throw_error();
@@ -78,7 +87,12 @@ void* jit_context::compile_code(jit_context* context, ir_operation_block* ir_ope
{
case x86_64:
{
assemble_x86_64_pipeline(&code_buffer, &code_size, ir_operation_block_context, false, working_abi, flags);
assemble_x86_64_pipeline(&code_buffer, &code_size, ir_operation_block_context, working_abi, flags);
}; break;
case arm_64:
{
assemble_aarch64_pipeline(&code_buffer, &code_size, ir_operation_block_context, working_abi, flags);
}; break;
default: throw_error();

View File

@@ -11,7 +11,16 @@
static bool allocate_executable_memory(void** memory, uint64_t size)
{
void* result = mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
uint64_t map_info = MAP_PRIVATE | MAP_ANONYMOUS;
uint64_t map_proc = PROT_READ | PROT_WRITE;
#if defined(__APPLE__)
map_info |= MAP_JIT;
#else
map_proc |= PROT_EXEC;
#endif
void* result = mmap(NULL, size, map_proc, map_info , -1, 0);
*memory = result;
@@ -47,20 +56,38 @@ static void unmark_memory_executable(void* memory, uint64_t size)
#endif
static uint64_t align_64_kb(uint64_t source)
#define DEFAULT_KB_SIZE 4
static uint64_t align_page_size(uint64_t source, int kb_size = DEFAULT_KB_SIZE)
{
uint64_t page_size = 64 * 1024;
uint64_t page_size = kb_size * 1024;
uint64_t mask = page_size - 1;
return (source & ~mask) + page_size;
uint64_t working_result = (source & ~mask);
if (source > working_result)
{
working_result += page_size;
}
return working_result;
}
static uint64_t align_page(uint64_t source, int kb_size = DEFAULT_KB_SIZE)
{
uint64_t page_size = kb_size * 1024;
uint64_t mask = page_size - 1;
return source & ~mask;
}
bool jit_memory::create(jit_memory** result, uint64_t allocation_size, abi host_abi)
{
jit_memory* working_result = new jit_memory();
allocation_size = align_64_kb(allocation_size);
allocation_size = align_page_size(allocation_size, 4);
working_result->host_abi = host_abi;
working_result->memory_block_size = allocation_size;
@@ -77,11 +104,43 @@ void jit_memory::destroy(jit_memory* to_destroy)
delete to_destroy;
}
void* jit_memory::coppy_over(jit_memory* jit_memory_context,uint64_t result_offset, void* source, uint64_t size)
static void align_page_info(jit_memory* jit_memory_context, uint64_t* offset, uint64_t* size)
{
*offset = align_page(*offset);
*size = align_page_size(*size);
*offset = (uint64_t)jit_memory_context->raw_memory_block + *offset;
}
static void ready_page_for_write(jit_memory* jit_memory_context, uint64_t result_offset, uint64_t size)
{
align_page_info(jit_memory_context, &result_offset, &size);
mprotect((void*)result_offset, size, PROT_READ | PROT_WRITE);
}
static void ready_page_for_execution(jit_memory* jit_memory_context, uint64_t result_offset, uint64_t size)
{
align_page_info(jit_memory_context, &result_offset, &size);
mprotect((void*)result_offset, size, PROT_READ | PROT_EXEC);
}
void* jit_memory::coppy_over(jit_memory* jit_memory_context, uint64_t result_offset, void* source, uint64_t size)
{
char* result_location = (char*)jit_memory_context->raw_memory_block + result_offset;
if (get_is_apple_silicon(jit_memory_context->host_abi))
{
ready_page_for_write(jit_memory_context, result_offset, size);
}
memcpy(result_location, source, size);
if (get_is_apple_silicon(jit_memory_context->host_abi))
{
ready_page_for_execution(jit_memory_context, result_offset, size);
}
return result_location;
}

View File

@@ -1,7 +1,7 @@
{
"folders": [
{
"path": "../../../rem_tester_arm"
"path": "../../../../../Users/raymondmataka/Desktop/rem_tester_arm"
},
{
"path": "../.."