[Compiler] In-progress support for vector float (AVX) (#171)

* begin work on vf support

* split reg kind into reg hw kind and class, use class for ireg

* try test

* clang format

* add some more ops and some example functions

* better lvf on statics

* add documentation
This commit is contained in:
water111 2020-12-30 15:33:51 -05:00 committed by GitHub
parent 4d713d5c8c
commit a80b331c27
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
39 changed files with 1714 additions and 313 deletions

View File

@ -15,7 +15,7 @@ constexpr int ARRAY_DATA_OFFSET = 12; // not including type tag
constexpr s32 GOAL_MAX_SYMBOLS = 0x2000; constexpr s32 GOAL_MAX_SYMBOLS = 0x2000;
constexpr s32 SYM_INFO_OFFSET = 0xff34; constexpr s32 SYM_INFO_OFFSET = 0xff34;
enum class RegKind { GPR_64, FLOAT, INT_128, FLOAT_4X, INVALID }; enum class RegClass { GPR_64, FLOAT, INT_128, VECTOR_FLOAT, INVALID };
constexpr u32 GOAL_NEW_METHOD = 0; // method ID of GOAL new constexpr u32 GOAL_NEW_METHOD = 0; // method ID of GOAL new
constexpr u32 GOAL_DEL_METHOD = 1; // method ID of GOAL delete constexpr u32 GOAL_DEL_METHOD = 1; // method ID of GOAL delete

View File

@ -9,18 +9,18 @@
#include "Type.h" #include "Type.h"
namespace { namespace {
std::string reg_kind_to_string(RegKind kind) { std::string reg_kind_to_string(RegClass kind) {
switch (kind) { switch (kind) {
case RegKind::GPR_64: case RegClass::GPR_64:
return "gpr64"; return "gpr64";
case RegKind::INT_128: case RegClass::INT_128:
return "int128"; return "int128";
case RegKind::FLOAT: case RegClass::FLOAT:
return "float"; return "float";
case RegKind::FLOAT_4X: case RegClass::VECTOR_FLOAT:
return "float-4x"; return "float-4x";
default: default:
throw std::runtime_error("Unsupported RegKind"); throw std::runtime_error("Unsupported HWRegKind");
} }
} }
@ -268,8 +268,8 @@ int NullType::get_size_in_memory() const {
throw std::runtime_error("get_size_in_memory called on NullType"); throw std::runtime_error("get_size_in_memory called on NullType");
} }
RegKind NullType::get_preferred_reg_kind() const { RegClass NullType::get_preferred_reg_class() const {
throw std::runtime_error("get_preferred_reg_kind called on NullType"); throw std::runtime_error("get_preferred_reg_class called on NullType");
} }
int NullType::get_offset() const { int NullType::get_offset() const {
@ -306,7 +306,7 @@ ValueType::ValueType(std::string parent,
bool is_boxed, bool is_boxed,
int size, int size,
bool sign_extend, bool sign_extend,
RegKind reg) RegClass reg)
: Type(std::move(parent), std::move(name), is_boxed), : Type(std::move(parent), std::move(name), is_boxed),
m_size(size), m_size(size),
m_sign_extend(sign_extend), m_sign_extend(sign_extend),
@ -339,7 +339,7 @@ int ValueType::get_size_in_memory() const {
/*! /*!
* The type of register that this value likes to be loaded into. * The type of register that this value likes to be loaded into.
*/ */
RegKind ValueType::get_preferred_reg_kind() const { RegClass ValueType::get_preferred_reg_class() const {
return m_reg_kind; return m_reg_kind;
} }
@ -447,8 +447,8 @@ int ReferenceType::get_load_size() const {
/*! /*!
* Pointers go in GPRs * Pointers go in GPRs
*/ */
RegKind ReferenceType::get_preferred_reg_kind() const { RegClass ReferenceType::get_preferred_reg_class() const {
return RegKind::GPR_64; return RegClass::GPR_64;
} }
std::string ReferenceType::print() const { std::string ReferenceType::print() const {
@ -592,7 +592,7 @@ bool BitField::operator==(const BitField& other) const {
} }
BitFieldType::BitFieldType(std::string parent, std::string name, int size, bool sign_extend) BitFieldType::BitFieldType(std::string parent, std::string name, int size, bool sign_extend)
: ValueType(std::move(parent), std::move(name), false, size, sign_extend, RegKind::GPR_64) {} : ValueType(std::move(parent), std::move(name), false, size, sign_extend, RegClass::GPR_64) {}
bool BitFieldType::lookup_field(const std::string& name, BitField* out) const { bool BitFieldType::lookup_field(const std::string& name, BitField* out) const {
for (auto& field : m_fields) { for (auto& field : m_fields) {

View File

@ -44,7 +44,7 @@ class Type {
virtual int get_size_in_memory() const = 0; virtual int get_size_in_memory() const = 0;
// if we have no other information, what kind of register should we load into? // if we have no other information, what kind of register should we load into?
virtual RegKind get_preferred_reg_kind() const = 0; virtual RegClass get_preferred_reg_class() const = 0;
// get the "offset" applied to boxed objects // get the "offset" applied to boxed objects
virtual int get_offset() const = 0; virtual int get_offset() const = 0;
@ -106,7 +106,7 @@ class NullType : public Type {
bool get_load_signed() const override; bool get_load_signed() const override;
int get_size_in_memory() const override; int get_size_in_memory() const override;
int get_inline_array_alignment() const override; int get_inline_array_alignment() const override;
RegKind get_preferred_reg_kind() const override; RegClass get_preferred_reg_class() const override;
int get_offset() const override; int get_offset() const override;
int get_in_memory_alignment() const override; int get_in_memory_alignment() const override;
std::string print() const override; std::string print() const override;
@ -125,12 +125,12 @@ class ValueType : public Type {
bool is_boxed, bool is_boxed,
int size, int size,
bool sign_extend, bool sign_extend,
RegKind reg); RegClass reg);
bool is_reference() const override; bool is_reference() const override;
int get_load_size() const override; int get_load_size() const override;
bool get_load_signed() const override; bool get_load_signed() const override;
int get_size_in_memory() const override; int get_size_in_memory() const override;
RegKind get_preferred_reg_kind() const override; RegClass get_preferred_reg_class() const override;
int get_offset() const override; int get_offset() const override;
int get_in_memory_alignment() const override; int get_in_memory_alignment() const override;
int get_inline_array_alignment() const override; int get_inline_array_alignment() const override;
@ -146,7 +146,7 @@ class ValueType : public Type {
int m_size = -1; int m_size = -1;
int m_offset = 0; int m_offset = 0;
bool m_sign_extend = false; bool m_sign_extend = false;
RegKind m_reg_kind = RegKind::INVALID; RegClass m_reg_kind = RegClass::INVALID;
}; };
/*! /*!
@ -159,7 +159,7 @@ class ReferenceType : public Type {
bool is_reference() const override; bool is_reference() const override;
int get_load_size() const override; int get_load_size() const override;
bool get_load_signed() const override; bool get_load_signed() const override;
RegKind get_preferred_reg_kind() const override; RegClass get_preferred_reg_class() const override;
std::string print() const override; std::string print() const override;
~ReferenceType() = default; ~ReferenceType() = default;
}; };

View File

@ -123,11 +123,11 @@ DerefInfo TypeSystem::get_deref_info(const TypeSpec& ts) const {
} }
// default to GPR // default to GPR
info.reg = RegKind::GPR_64; info.reg = RegClass::GPR_64;
info.mem_deref = true; info.mem_deref = true;
if (typecheck(TypeSpec("float"), ts, "", false, false)) { if (typecheck(TypeSpec("float"), ts, "", false, false)) {
info.reg = RegKind::FLOAT; info.reg = RegClass::FLOAT;
} }
if (ts.base_type() == "inline-array") { if (ts.base_type() == "inline-array") {
@ -165,7 +165,7 @@ DerefInfo TypeSystem::get_deref_info(const TypeSpec& ts) const {
// an array of values, which should be loaded in the correct way to the correct register // an array of values, which should be loaded in the correct way to the correct register
info.stride = result_type->get_size_in_memory(); info.stride = result_type->get_size_in_memory();
info.sign_extend = result_type->get_load_signed(); info.sign_extend = result_type->get_load_signed();
info.reg = result_type->get_preferred_reg_kind(); info.reg = result_type->get_preferred_reg_class();
info.load_size = result_type->get_load_size(); info.load_size = result_type->get_load_size();
assert(result_type->get_size_in_memory() == result_type->get_load_size()); assert(result_type->get_size_in_memory() == result_type->get_load_size());
} }
@ -640,7 +640,7 @@ void TypeSystem::add_builtin_types() {
// OBJECT // OBJECT
auto obj_type = add_type( auto obj_type = add_type(
"object", std::make_unique<ValueType>("object", "object", false, 4, false, RegKind::GPR_64)); "object", std::make_unique<ValueType>("object", "object", false, 4, false, RegClass::GPR_64));
auto structure_type = add_builtin_structure("object", "structure"); auto structure_type = add_builtin_structure("object", "structure");
auto basic_type = add_builtin_basic("structure", "basic"); auto basic_type = add_builtin_basic("structure", "basic");
@ -660,7 +660,7 @@ void TypeSystem::add_builtin_types() {
inline_array_type->set_runtime_type("pointer"); inline_array_type->set_runtime_type("pointer");
add_builtin_value_type("object", "number", 8); // sign extend? add_builtin_value_type("object", "number", 8); // sign extend?
add_builtin_value_type("number", "float", 4, false, false, RegKind::FLOAT); add_builtin_value_type("number", "float", 4, false, false, RegClass::FLOAT);
add_builtin_value_type("number", "integer", 8, false, false); // sign extend? add_builtin_value_type("number", "integer", 8, false, false); // sign extend?
add_builtin_value_type("integer", "binteger", 8, true, false); // sign extend? add_builtin_value_type("integer", "binteger", 8, true, false); // sign extend?
add_builtin_value_type("integer", "sinteger", 8, false, true); add_builtin_value_type("integer", "sinteger", 8, false, true);
@ -668,13 +668,13 @@ void TypeSystem::add_builtin_types() {
add_builtin_value_type("sinteger", "int16", 2, false, true); add_builtin_value_type("sinteger", "int16", 2, false, true);
add_builtin_value_type("sinteger", "int32", 4, false, true); add_builtin_value_type("sinteger", "int32", 4, false, true);
add_builtin_value_type("sinteger", "int64", 8, false, true); add_builtin_value_type("sinteger", "int64", 8, false, true);
add_builtin_value_type("sinteger", "int128", 16, false, true, RegKind::INT_128); add_builtin_value_type("sinteger", "int128", 16, false, true, RegClass::INT_128);
add_builtin_value_type("integer", "uinteger", 8); add_builtin_value_type("integer", "uinteger", 8);
add_builtin_value_type("uinteger", "uint8", 1); add_builtin_value_type("uinteger", "uint8", 1);
add_builtin_value_type("uinteger", "uint16", 2); add_builtin_value_type("uinteger", "uint16", 2);
add_builtin_value_type("uinteger", "uint32", 4); add_builtin_value_type("uinteger", "uint32", 4);
add_builtin_value_type("uinteger", "uint64", 8); add_builtin_value_type("uinteger", "uint64", 8);
add_builtin_value_type("uinteger", "uint128", 16, false, false, RegKind::INT_128); add_builtin_value_type("uinteger", "uint128", 16, false, false, RegClass::INT_128);
auto int_type = add_builtin_value_type("integer", "int", 8, false, true); auto int_type = add_builtin_value_type("integer", "int", 8, false, true);
int_type->disallow_in_runtime(); int_type->disallow_in_runtime();
@ -948,7 +948,7 @@ ValueType* TypeSystem::add_builtin_value_type(const std::string& parent,
int size, int size,
bool boxed, bool boxed,
bool sign_extend, bool sign_extend,
RegKind reg) { RegClass reg) {
add_type(type_name, add_type(type_name,
std::make_unique<ValueType>(parent, type_name, boxed, size, sign_extend, reg)); std::make_unique<ValueType>(parent, type_name, boxed, size, sign_extend, reg));
return get_type_of_type<ValueType>(type_name); return get_type_of_type<ValueType>(type_name);

View File

@ -36,7 +36,7 @@ struct DerefInfo {
bool can_deref = false; bool can_deref = false;
bool mem_deref = false; bool mem_deref = false;
bool sign_extend = false; bool sign_extend = false;
RegKind reg = RegKind::INVALID; RegClass reg = RegClass::INVALID;
int stride = -1; int stride = -1;
int load_size = -1; int load_size = -1;
TypeSpec result_type; TypeSpec result_type;
@ -68,7 +68,7 @@ struct ReverseDerefInfo {
struct ReverseDerefInputInfo { struct ReverseDerefInputInfo {
int offset = -1; int offset = -1;
bool mem_deref = false; bool mem_deref = false;
RegKind reg = RegKind::INVALID; RegClass reg = RegClass::INVALID;
int load_size = -1; int load_size = -1;
bool sign_extend = false; bool sign_extend = false;
TypeSpec input_type; TypeSpec input_type;
@ -81,7 +81,7 @@ struct DerefKind {
bool is_store = false; // when true, the sign extension shouldn't matter bool is_store = false; // when true, the sign extension shouldn't matter
int size = -1; // how many bytes int size = -1; // how many bytes
bool sign_extend = false; // for loads only (4 bytes and under), do we sign extend? bool sign_extend = false; // for loads only (4 bytes and under), do we sign extend?
RegKind reg_kind = RegKind::INVALID; RegClass reg_kind = RegClass::INVALID;
}; };
struct FieldReverseLookupInput { struct FieldReverseLookupInput {
@ -239,7 +239,7 @@ class TypeSystem {
int size, int size,
bool boxed = false, bool boxed = false,
bool sign_extend = false, bool sign_extend = false,
RegKind reg = RegKind::GPR_64); RegClass reg = RegClass::GPR_64);
void builtin_structure_inherit(StructureType* st); void builtin_structure_inherit(StructureType* st);
enum ForwardDeclareKind { TYPE, STRUCTURE, BASIC }; enum ForwardDeclareKind { TYPE, STRUCTURE, BASIC };

View File

@ -13,7 +13,7 @@
namespace versions { namespace versions {
// language version (OpenGOAL) // language version (OpenGOAL)
constexpr s32 GOAL_VERSION_MAJOR = 0; constexpr s32 GOAL_VERSION_MAJOR = 0;
constexpr s32 GOAL_VERSION_MINOR = 4; constexpr s32 GOAL_VERSION_MINOR = 5;
// these versions are from the game // these versions are from the game
constexpr u32 ART_FILE_VERSION = 6; constexpr u32 ART_FILE_VERSION = 6;

View File

@ -65,12 +65,12 @@ bool get_as_reg_offset(const IR* ir, RegOffset* out) {
return false; return false;
} }
RegKind get_reg_kind(const Register& r) { RegClass get_reg_kind(const Register& r) {
switch (r.get_kind()) { switch (r.get_kind()) {
case Reg::GPR: case Reg::GPR:
return RegKind::GPR_64; return RegClass::GPR_64;
case Reg::FPR: case Reg::FPR:
return RegKind::FLOAT; return RegClass::FLOAT;
default: default:
assert(false); assert(false);
} }

View File

@ -733,21 +733,13 @@
; ;;(define-extern rgba object) ;; unknown type ; ;;(define-extern rgba object) ;; unknown type
; (define-extern seekl function) ; (define-extern seekl function)
; ;; vector-h
(deftype vector (structure)
((data float 4 :offset-assert 0)
(x float :offset 0)
(y float :offset 4)
(z float :offset 8)
(w float :offset 12)
(quad uint128 :offset 0)
)
:method-count-assert 9
:size-assert #x10
:flag-assert #x900000010
)
;; vector-h ;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;; VECTOR-H ;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~;
(deftype bit-array (basic) (deftype bit-array (basic)
((length int32 :offset-assert 4) ((length int32 :offset-assert 4)
(allocated-length int32 :offset-assert 8) (allocated-length int32 :offset-assert 8)
@ -921,11 +913,30 @@
:flag-assert #x900000010 :flag-assert #x900000010
) )
;; what's the s here? (deftype vector (structure)
((data float 4 :offset-assert 0)
(x float :offset 0)
(y float :offset 4)
(z float :offset 8)
(w float :offset 12)
(quad uint128 :offset 0)
)
:method-count-assert 9
:size-assert #x10
:flag-assert #x900000010
)
(define-extern *null-vector* vector)
(define-extern *identity-vector* vector)
(define-extern *x-vector* vector)
(define-extern *y-vector* vector)
(define-extern *z-vector* vector)
(define-extern *up-vector* vector)
(deftype vector4s-3 (structure) (deftype vector4s-3 (structure)
((data float 12 :offset-assert 0) ;; guess ((data float 12 :offset-assert 0) ;; guess
(quad uint128 3 :offset 0) (quad uint128 3 :offset 0)
(vector vector4w 3 :offset 0) ;; guess (vector vector 3 :inline :offset 0) ;; guess
) )
:method-count-assert 9 :method-count-assert 9
:size-assert #x30 :size-assert #x30
@ -970,6 +981,8 @@
:flag-assert #x900000010 :flag-assert #x900000010
) )
;; todo isphere
(deftype box8s (structure) (deftype box8s (structure)
((data float 8 :offset-assert 0) ((data float 8 :offset-assert 0)
(quad uint128 2 :offset 0) (quad uint128 2 :offset 0)
@ -1019,7 +1032,6 @@
) )
) )
;; vector-h
(deftype vertical-planes (structure) (deftype vertical-planes (structure)
((data uint128 4 :offset-assert 0) ;; probably wrong ((data uint128 4 :offset-assert 0) ;; probably wrong
) )
@ -1064,6 +1076,18 @@
:flag-assert #x90000000c :flag-assert #x90000000c
) )
(define-extern vector-dot (function vector vector float))
(define-extern vector-dot-vu (function vector vector float))
(define-extern vector4-dot (function vector vector float))
(define-extern vector4-dot-vu (function vector vector float))
(define-extern vector+! (function vector vector vector vector))
(define-extern vector-! (function vector vector vector vector))
(define-extern vector-zero! (function vector vector))
(define-extern vector-reset! (function vector vector))
(define-extern vector-copy! (function vector vector vector))
(define-extern *zero-vector* vector)
;; bounding-box-h ;; bounding-box-h
(deftype bounding-box (structure) (deftype bounding-box (structure)
@ -31307,52 +31331,7 @@
;;(define-extern time-frame object) ;; unknown type ;;(define-extern time-frame object) ;; unknown type
;;(define-extern part-id object) ;; unknown type ;;(define-extern part-id object) ;; unknown type
;;(define-extern vector2h object) ;; unknown type
;;(define-extern vector4s-3 object) ;; unknown type
;;(define-extern *identity-vector* object) ;; unknown type
;;(define-extern vector4ub object) ;; unknown type
;;(define-extern vector2w object) ;; unknown type
;;(define-extern isphere object) ;; unknown type
(define-extern vector-dot-vu (function vector vector float))
;;(define-extern vertical-planes object) ;; unknown type
;;(define-extern cylinder object) ;; unknown type
;;(define-extern *x-vector* object) ;; unknown type
;;(define-extern *y-vector* object) ;; unknown type
;;(define-extern *z-vector* object) ;; unknown type
(define-extern vector-zero! function)
;;(define-extern bit-array object) ;; unknown type
(define-extern vector-! function)
;;(define-extern vector16b object) ;; unknown type
;;(define-extern vector-array object) ;; unknown type
;;(define-extern rgbaf object) ;; unknown type
;;(define-extern cylinder-flat object) ;; unknown type
;;(define-extern plane object) ;; unknown type
;;(define-extern vector3s object) ;; unknown type
(define-extern vector-dot (function vector vector float))
;;(define-extern *zero-vector* object) ;; unknown type
;;(define-extern vector4h object) ;; unknown type
;;(define-extern vector4w object) ;; unknown type
(define-extern box8s-array type)
;;(define-extern *null-vector* object) ;; unknown type
;;(define-extern qword object) ;; unknown type
;;(define-extern *up-vector* object) ;; unknown type
;;(define-extern vector4b object) ;; unknown type
;;(define-extern vector4w-4 object) ;; unknown type
;;(define-extern vertical-planes-array object) ;; unknown type
(define-extern vector4-dot (function vector vector float))
;;(define-extern vector2uh object) ;; unknown type
;;(define-extern sphere object) ;; unknown type
;;(define-extern vector3h object) ;; unknown type
;;(define-extern vector4w-3 object) ;; unknown type
(define-extern vector+! function)
(define-extern vector4-dot-vu (function vector vector float))
;;(define-extern box8s object) ;; unknown type
;;(define-extern vector3w object) ;; unknown type
(define-extern vector-reset! function)
;;(define-extern vector4w-2 object) ;; unknown type
;;(define-extern vector8h object) ;; unknown type
(define-extern vector-copy! function)
(define-extern vector type)
;;(define-extern bounding-box object) ;; unknown type ;;(define-extern bounding-box object) ;; unknown type
;;(define-extern bounding-box4w object) ;; unknown type ;;(define-extern bounding-box4w object) ;; unknown type
;;(define-extern bounding-box-both object) ;; unknown type ;;(define-extern bounding-box-both object) ;; unknown type

View File

@ -90,3 +90,12 @@
- Made string/float constants go in the main segment when they are declared in the top-level segment, instead of the top-level segment. This is what GOAL seems to do (not 100% sure yet) and avoids issues where you set something to a string constant in the top-level. This avoids the possibility of memory bugs at the cost of more memory usage (likely very little additional memory). - Made string/float constants go in the main segment when they are declared in the top-level segment, instead of the top-level segment. This is what GOAL seems to do (not 100% sure yet) and avoids issues where you set something to a string constant in the top-level. This avoids the possibility of memory bugs at the cost of more memory usage (likely very little additional memory).
- Added support for boxed arrays. They can be created with `new` and indexed with `->`. The compound type `(array <elt-type>)` is used to describe an array with a given content type. - Added support for boxed arrays. They can be created with `new` and indexed with `->`. The compound type `(array <elt-type>)` is used to describe an array with a given content type.
- Added `reset-here` option for `rlet`. - Added `reset-here` option for `rlet`.
## V0.5
- Breaking change: the register class `xmm` for a single float was renamed to `fpr` to distinguish it from other uses of `xmm` registers.
- Breaking change: the message format for reset and shutdown messages sent between the listener and runtime has changed.
- Improved code-generation quality where accessing a field or similar with an offset of zero from a base register.
- The listener now uses message IDs to more robustly handle the situation where a response messages comes, but is extremely late, or if some sent messages are skipped.
- Fixed bug where references to the debug segment using RIP-relative links were not set to zero by the linker when the debug segment isn't loaded.
- The `rlet` form now supports 128-bit vector float registers with the `vf` register class.
- Added support for "vector float" assembly operations, including `lvf`, `svf`, `xor`, `sub`, `add`, and `blend`.

View File

@ -1169,7 +1169,7 @@ Not implemented well yet.
body... body...
) )
``` ```
Create register variables. You can optionally specify a register with the `:reg` option and a register name like `rax` or `xmm3`. The initial value of the register is not set. If you don't specify a register, a GPR will be chosen for you by the coloring system and it will behave like a `let`. If you don't specify a register, you can specify a register class (`gpr` or `xmm`) and the compiler will pick a GPR or XMM for you. Create register variables. You can optionally specify a register with the `:reg` option and a register name like `rax` or `xmm3`. The initial value of the register is not set. If you don't specify a register, a GPR will be chosen for you by the coloring system and it will behave like a `let`. If you don't specify a register, you can specify a register class (`gpr`, a normal 64-bit integer register; `fpr`, a 32-bit single precision float; or `vf`, and 128-bit floating point vector register) and the compiler will pick a GPR or XMM for you.
If you pick a callee-saved register and use it within the coloring system, the compiler will back it up for you in the prologue and restore it in the epilogue. If you pick a callee-saved register and use it within the coloring system, the compiler will back it up for you in the prologue and restore it in the epilogue.
If you pick a special register like `rsp`, it won't be backed up. If you pick a special register like `rsp`, it won't be backed up.
@ -1188,17 +1188,17 @@ Here is an example of using an `rlet` to access registers:
``` ```
## General assembly forms ## General assembly forms
In general, assembly forms have a name that begins with a `.`. They all evaluate to `none` and copy the form of an x86-64 instruction. For example `(.sub dst src)`. A destination must be a settable register (ok if it's spilled). So you can't do something like `(.sub (-> obj field) x)`. Instead, do `(set! temp (-> obj field))`, `(.sub temp x)`, `(set! (-> obj field) temp)`. The sources can be any expression. In general, assembly forms have a name that begins with a `.`. They all evaluate to `none` and copy the form of an x86-64 instruction. For example `(.sub dst src)`. A destination must be a settable register (ok if it's spilled). So you can't do something like `(.sub (-> obj field) x)`. Instead, do `(set! temp (-> obj field))`, `(.sub temp x)`, `(set! (-> obj field) temp)`. The sources can be any expression, or a register. This allows you to mix high-level code with assembly easily, like `(.mov rax (-> obj field))` or `(.push (+ 1 (-> obj field)))`.
By default, assembly forms work with the coloring system. This means that assembly and high level expression can be mixed together without clobbering each other. It also means use of callee-saved registers will cause them to be backed up/restored in the function prologue and epilogue. Use of weird registers like `r15`, `r14`, and `rsp` works as you would expect with the coloring system. By default, assembly forms work with the coloring system. This means that assembly and high level expression can be mixed together without clobbering each other. It also means use of callee-saved registers will cause them to be backed up/restored in the function prologue and epilogue. Use of weird registers like `r15`, `r14`, and `rsp` works as you would expect with the coloring system.
But you can also request to skip this with `:color #f` option, like `(.push my-reg-var :color #f)`. Be very careful with this. The `:color #f` option will only work with register variables from `rlet` which have a manually specified register. It will entirely bypass the coloring system and use this register. Use of this with other GOAL code is extremely dangerous and should be done very carefully or avoided. But you can also request to skip this with `:color #f` option, like `(.push my-reg-var :color #f)`. Be very careful with this. The `:color #f` option will only work with register variables from `rlet` which have a manually specified register. It will entirely bypass the coloring system and use this register. Use of this near high level GOAL variables is extremely dangerous and should be done very carefully or avoided, as the GOAL compiler will not know that you could be modifying its registers. In a form with `:color #f`, you cannot use higher level code or variables - all variables must be defined in `rlet`s. This is because higher level expressions and variables cannot be used without the coloring system.
## `.sub` ## `.sub`
```lisp ```lisp
(.sub dest src [:color #t|#f]) (.sub dest src [:color #t|#f])
``` ```
x86-64 subtraction. If coloring is on (the default), the `dest` must be a settable register (`rlet` var, `let` var, function argument, ...). It can't be a place like a symbol, field, stack variable, etc. If coloring is off, both `src` and `dest` must be registers defined and constrained in an enclosing `rlet`. x86-64 subtraction (64-bit). If coloring is on (the default), the `dest` must be a settable register (`rlet` var, `let` var, function argument, ...). It can't be a place like a symbol, field, stack variable, etc. If coloring is off, both `src` and `dest` must be registers defined and constrained in an enclosing `rlet`.
Example: Example:
``` ```
@ -1219,27 +1219,88 @@ Example:
) )
``` ```
## `.add`
```lisp
(.add dest src [:color #t|#f])
```
Addition (64-bit). Similar to subtraction.
## `.jr`
```lisp
(.jr addres-reg [:color #t|#f])
```
Jump-register. Jumps to the address given. The address is treated as a 64-bit pointer, not a GOAL pointer.
## `.load-sym`
```lisp
(.load-sym dest symbol-name [:sext #t|#f] [:color #t|#f])
```
Load the value of a symbol into a register. By default, it will look at the type of the symbol to determine if it should be sign extended or not. You can override this with the `:sext` option if needed. The symbol must be known to the type system.
## `.push` ## `.push`
```lisp ```lisp
(.push src [:color #t|#f]) (.push src [:color #t|#f])
``` ```
The x86-64 push instruction. Does a 64-bit GPR. The `src` can be any expression if color is on. Otherwise it must be a register defined and constrained in an enclosing `rlet`. The x86-64 push instruction. Does a 64-bit GPR. The `src` can be any expression that can be put in a gpr if color is on. Otherwise it must be a register defined and constrained in an enclosing `rlet`.
## `.pop` ## `.pop`
```lisp ```lisp
(.pop dst [:color #t|#f]) (.pop dst [:color #t|#f])
``` ```
The x86-64 pop instruction. Does a 64-bit GPR. The `dst` can be any settable register if color is on. Otherwise it must be a register defined and constrained in an enclosing `rlet`. The x86-64 pop instruction. Does a 64-bit GPR. The `dst` can be any expression which evaluates to a settable register if color is on. Otherwise it must be a register defined and constrained in an enclosing `rlet`.
## `.ret` ## `.ret`
```lisp ```lisp
(.ret [:color #t|#f]) (.ret [:color #t|#f])
``` ```
The x86-64 ret instruction. The color option does nothing. This is not recognized as a control flow instruction by the coloring system. The x86-64 ret instruction. The color option does nothing. This is not recognized as a control flow instruction by the coloring system. It does not touch the return register `rax`.
## `.mov`
```lisp
(.mov dst src [:color #t|#f])
```
Move between two registers. The `dst` should be a register (either `rlet` or `let` variable), and the `src` can be a register or any expression. The following moves are supported:
- `gpr` to `gpr`
- `fpr` to `fpr` (only moves lower 32-bits of the xmms, uses `movss`)
- `vf` to `vf` (moves all 128-bits of the xmms, uses `vmovaps`)
- `gpr` to `fpr` (only moves 32-bits, uses `movd`)
- `fpr` to `gpr` (only moves 32-bits, upper 32-bits are zero, uses `movd`)
This code generation is identical to using a `(set! dst src)` form.
## `.lvf`
```lisp
(.lvf dst-reg src-loc [:color #t|#f])
```
Load a vector float register from `src-loc`. The `dst-reg` must be a vector float register. The `src-loc` can be a gpr containing a GOAL pointer or expression which gives a GOAL pointer. There is no type checking on the `src-loc` so be careful. The load uses `vmovaps`, so the source must be 16-byte aligned.
If the source is in the form `base-reg + constant-offset`, like from a `(&-> my-object my-inline-vector-field)`, the constant offset will be folded into the load instruction like `vmovaps xmm1, [r15 + rax + 12]`.
If the source is an immediate `(new 'static ...)` form that results in a statically allocated variable, it will use `RIP` relative addressing (32-bit immediate) form. This means that the code:
```lisp
(.lvf vf1 (new 'static 'vector :x 1.2 :y 2.3 :z 3.4 :w 5.6))
```
will be just a single instruction to do a `vmovaps xmm1, [rip + XXX]`.
##`.svf`
```lisp
(.svf dst-loc src-reg [:color #t|#f])
```
Store a vector float. Works similarly to the `lvf` form, but there is no optimized case for storing into a static because this isn't allowed in GOAL.
## Three operand vector float operations.
```lisp
(.<op-name>.vf dst src0 src1 [:color #t|#f])
```
All the three operand forms work similarly. You can do something like `(.add.vf vf1 vf2 vf3)`. All operations use the similarly named `v<op-name>ps` instruction, xmm128 VEX encoding. We support `xor`, `sub`, and `add` so far.
## `.blend.vf`
```lisp
(.blend.vf dst src0 src1 mask [:color #t|#f])
```
Wrapper around `vblendps` (VEX xmm128 version) instruction. The `mask` must evaluate to a constant integer at compile time. The integer must be in the range of 0-15.
# Compiler Forms - Unsorted # Compiler Forms - Unsorted

View File

@ -268,7 +268,7 @@
(deftype vector4s-3 (structure) (deftype vector4s-3 (structure)
((data float 12 :offset-assert 0) ;; guess ((data float 12 :offset-assert 0) ;; guess
(quad uint128 3 :offset 0) (quad uint128 3 :offset 0)
(vector vector4w 3 :offset 0) ;; guess (vector vector 3 :inline :offset 0) ;; guess
) )
:method-count-assert 9 :method-count-assert 9
:size-assert #x30 :size-assert #x30
@ -315,6 +315,7 @@
:flag-assert #x900000010 :flag-assert #x900000010
) )
; todo
; (deftype isphere (vec4s) ; (deftype isphere (vec4s)
; () ; ()
; :method-count-assert 9 ; :method-count-assert 9
@ -417,8 +418,6 @@
:flag-assert #x90000000c :flag-assert #x90000000c
) )
;; todo dot, dot-vu, 4-dot, 4-dot-vu, +!, -!, zero!, reset!, copy!
(defun vector-dot ((a vector) (b vector)) (defun vector-dot ((a vector) (b vector))
"Take the dot product of two vectors. "Take the dot product of two vectors.
Only does the x, y, z compoments. Only does the x, y, z compoments.
@ -461,4 +460,79 @@
(vector4-dot a b) (vector4-dot a b)
) )
(defun vector+! ((dst vector) (a vector) (b vector))
"Set dst = a + b. The w component of dst is set to 0."
(declare (inline))
(rlet ((vf0 :class vf :reset-here #t)
(vf1 :class vf :reset-here #t)
(vf2 :class vf :reset-here #t)
(vf3 :class vf :reset-here #t))
; load vectors
(.lvf vf2 a)
(.lvf vf3 b)
; set vf0 to zero
(.xor.vf vf0 vf0 vf0)
; add
(.add.vf vf1 vf2 vf3)
; set w = 0
(.blend.vf vf1 vf1 vf0 #b1000)
; store
(.svf dst vf1)
)
dst
)
(defun vector-! ((dst vector) (a vector) (b vector))
"Set dst = a - b. The w componenent of dst is set to 0."
(declare (inline))
(rlet ((vf0 :class vf :reset-here #t)
(vf1 :class vf :reset-here #t)
(vf2 :class vf :reset-here #t)
(vf3 :class vf :reset-here #t))
; load vectors
(.lvf vf2 a)
(.lvf vf3 b)
; set vf0 to zero
(.xor.vf vf0 vf0 vf0)
; subtract
(.sub.vf vf1 vf2 vf3)
; set w = 0
(.blend.vf vf1 vf1 vf0 #b1000)
; store
(.svf dst vf1)
)
dst
)
(defun vector-zero! ((dest vector))
"Set xyzw to 0."
(declare (inline))
(rlet ((vf1 :class vf :reset-here #t))
; set vf1 = 0
(.xor.vf vf1 vf1 vf1)
; store the 0
(.svf dest vf1)
)
dest
)
(defun vector-reset! ((dst vector))
"Set vector to 0,0,0,1."
(declare (inline))
(vector-zero! dst)
(set! (-> dst w) 1.0)
dst
)
(defun vector-copy! ((dst vector) (src vector))
"Copy vector src to dst. Copies the entire quadword (xyzw).
The vectors must be aligned."
(declare (inline))
(rlet ((vf1 :class vf :reset-here #t))
(.lvf vf1 src)
(.svf dst vf1)
)
dst
)
(define *zero-vector* (new 'static 'vector :x 0. :y 0. :z 0. :w 0.)) (define *zero-vector* (new 'static 'vector :x 0. :y 0. :z 0. :w 0.))

View File

@ -65,6 +65,11 @@ class Compiler {
Val* compile_get_symbol_value(const goos::Object& form, const std::string& name, Env* env); Val* compile_get_symbol_value(const goos::Object& form, const std::string& name, Env* env);
Val* compile_function_or_method_call(const goos::Object& form, Env* env); Val* compile_function_or_method_call(const goos::Object& form, Env* env);
Val* compile_asm_vf_math3(const goos::Object& form,
const goos::Object& rest,
IR_VFMath3Asm::Kind kind,
Env* env);
Val* get_field_of_structure(const StructureType* type, Val* get_field_of_structure(const StructureType* type,
Val* object, Val* object,
const std::string& field_name, const std::string& field_name,
@ -112,7 +117,7 @@ class Compiler {
TypeSpec parse_typespec(const goos::Object& src); TypeSpec parse_typespec(const goos::Object& src);
bool is_local_symbol(const goos::Object& obj, Env* env); bool is_local_symbol(const goos::Object& obj, Env* env);
emitter::RegKind get_preferred_reg_kind(const TypeSpec& ts); emitter::HWRegKind get_preferred_reg_kind(const TypeSpec& ts);
Val* compile_real_function_call(const goos::Object& form, Val* compile_real_function_call(const goos::Object& form,
RegVal* function, RegVal* function,
const std::vector<RegVal*>& args, const std::vector<RegVal*>& args,
@ -281,6 +286,13 @@ class Compiler {
Val* compile_asm_load_sym(const goos::Object& form, const goos::Object& rest, Env* env); Val* compile_asm_load_sym(const goos::Object& form, const goos::Object& rest, Env* env);
Val* compile_asm_jr(const goos::Object& form, const goos::Object& rest, Env* env); Val* compile_asm_jr(const goos::Object& form, const goos::Object& rest, Env* env);
Val* compile_asm_mov(const goos::Object& form, const goos::Object& rest, Env* env); Val* compile_asm_mov(const goos::Object& form, const goos::Object& rest, Env* env);
Val* compile_asm_lvf(const goos::Object& form, const goos::Object& rest, Env* env);
Val* compile_asm_svf(const goos::Object& form, const goos::Object& rest, Env* env);
Val* compile_asm_xor_vf(const goos::Object& form, const goos::Object& rest, Env* env);
Val* compile_asm_sub_vf(const goos::Object& form, const goos::Object& rest, Env* env);
Val* compile_asm_add_vf(const goos::Object& form, const goos::Object& rest, Env* env);
Val* compile_asm_blend_vf(const goos::Object& form, const goos::Object& rest, Env* env);
// Atoms // Atoms
// Block // Block

View File

@ -18,8 +18,8 @@ void Env::emit(std::unique_ptr<IR> ir) {
/*! /*!
* Allocate an IRegister with the given type. * Allocate an IRegister with the given type.
*/ */
RegVal* Env::make_ireg(TypeSpec ts, emitter::RegKind kind) { RegVal* Env::make_ireg(TypeSpec ts, RegClass reg_class) {
return m_parent->make_ireg(std::move(ts), kind); return m_parent->make_ireg(std::move(ts), reg_class);
} }
/*! /*!
@ -41,11 +41,11 @@ BlockEnv* Env::find_block(const std::string& name) {
} }
RegVal* Env::make_gpr(const TypeSpec& ts) { RegVal* Env::make_gpr(const TypeSpec& ts) {
return make_ireg(coerce_to_reg_type(ts), emitter::RegKind::GPR); return make_ireg(coerce_to_reg_type(ts), RegClass::GPR_64);
} }
RegVal* Env::make_xmm(const TypeSpec& ts) { RegVal* Env::make_fpr(const TypeSpec& ts) {
return make_ireg(coerce_to_reg_type(ts), emitter::RegKind::XMM); return make_ireg(coerce_to_reg_type(ts), RegClass::FLOAT);
} }
std::unordered_map<std::string, Label>& Env::get_label_map() { std::unordered_map<std::string, Label>& Env::get_label_map() {
@ -77,9 +77,9 @@ void GlobalEnv::emit(std::unique_ptr<IR> ir) {
/*! /*!
* Allocate an IRegister with the given type. * Allocate an IRegister with the given type.
*/ */
RegVal* GlobalEnv::make_ireg(TypeSpec ts, emitter::RegKind kind) { RegVal* GlobalEnv::make_ireg(TypeSpec ts, RegClass reg_class) {
(void)ts; (void)ts;
(void)kind; (void)reg_class;
throw std::runtime_error("cannot alloc reg in GlobalEnv"); throw std::runtime_error("cannot alloc reg in GlobalEnv");
} }
@ -231,13 +231,13 @@ void FunctionEnv::resolve_gotos() {
} }
} }
RegVal* FunctionEnv::make_ireg(TypeSpec ts, emitter::RegKind kind) { RegVal* FunctionEnv::make_ireg(TypeSpec ts, RegClass reg_class) {
IRegister ireg; IRegister ireg;
ireg.kind = kind; ireg.reg_class = reg_class;
ireg.id = m_iregs.size(); ireg.id = m_iregs.size();
auto rv = std::make_unique<RegVal>(ireg, coerce_to_reg_type(ts)); auto rv = std::make_unique<RegVal>(ireg, coerce_to_reg_type(ts));
m_iregs.push_back(std::move(rv)); m_iregs.push_back(std::move(rv));
assert(kind != emitter::RegKind::INVALID); assert(reg_class != RegClass::INVALID);
return m_iregs.back().get(); return m_iregs.back().get();
} }

View File

@ -31,13 +31,13 @@ class Env {
explicit Env(Env* parent) : m_parent(parent) {} explicit Env(Env* parent) : m_parent(parent) {}
virtual std::string print() = 0; virtual std::string print() = 0;
virtual void emit(std::unique_ptr<IR> ir); virtual void emit(std::unique_ptr<IR> ir);
virtual RegVal* make_ireg(TypeSpec ts, emitter::RegKind kind); virtual RegVal* make_ireg(TypeSpec ts, RegClass reg_class);
virtual void constrain_reg(IRegConstraint constraint); // todo, remove! virtual void constrain_reg(IRegConstraint constraint); // todo, remove!
virtual RegVal* lexical_lookup(goos::Object sym); virtual RegVal* lexical_lookup(goos::Object sym);
virtual BlockEnv* find_block(const std::string& name); virtual BlockEnv* find_block(const std::string& name);
virtual std::unordered_map<std::string, Label>& get_label_map(); virtual std::unordered_map<std::string, Label>& get_label_map();
RegVal* make_gpr(const TypeSpec& ts); RegVal* make_gpr(const TypeSpec& ts);
RegVal* make_xmm(const TypeSpec& ts); RegVal* make_fpr(const TypeSpec& ts);
virtual ~Env() = default; virtual ~Env() = default;
Env* parent() { return m_parent; } Env* parent() { return m_parent; }
@ -58,7 +58,7 @@ class GlobalEnv : public Env {
GlobalEnv(); GlobalEnv();
std::string print() override; std::string print() override;
void emit(std::unique_ptr<IR> ir) override; void emit(std::unique_ptr<IR> ir) override;
RegVal* make_ireg(TypeSpec ts, emitter::RegKind kind) override; RegVal* make_ireg(TypeSpec ts, RegClass reg_class) override;
void constrain_reg(IRegConstraint constraint) override; void constrain_reg(IRegConstraint constraint) override;
RegVal* lexical_lookup(goos::Object sym) override; RegVal* lexical_lookup(goos::Object sym) override;
BlockEnv* find_block(const std::string& name) override; BlockEnv* find_block(const std::string& name) override;
@ -159,7 +159,7 @@ class FunctionEnv : public DeclareEnv {
void set_segment(int seg) { segment = seg; } void set_segment(int seg) { segment = seg; }
void emit(std::unique_ptr<IR> ir) override; void emit(std::unique_ptr<IR> ir) override;
void finish(); void finish();
RegVal* make_ireg(TypeSpec ts, emitter::RegKind kind) override; RegVal* make_ireg(TypeSpec ts, RegClass reg_class) override;
const std::vector<std::unique_ptr<IR>>& code() const { return m_code; } const std::vector<std::unique_ptr<IR>>& code() const { return m_code; }
int max_vars() const { return m_iregs.size(); } int max_vars() const { return m_iregs.size(); }
const std::vector<IRegConstraint>& constraints() { return m_constraints; } const std::vector<IRegConstraint>& constraints() { return m_constraints; }

View File

@ -41,6 +41,12 @@ Register get_no_color_reg(const RegVal* rv) {
return rv->rlet_constraint().value(); return rv->rlet_constraint().value();
} }
Register get_reg_asm(const RegVal* rv,
const AllocationResult& allocs,
emitter::IR_Record irec,
bool use_coloring) {
return use_coloring ? get_reg(rv, allocs, irec) : get_no_color_reg(rv);
}
void load_constant(u64 value, void load_constant(u64 value,
emitter::ObjectGenerator* gen, emitter::ObjectGenerator* gen,
emitter::IR_Record irec, emitter::IR_Record irec,
@ -64,6 +70,49 @@ void load_constant(u64 value,
} }
} }
} }
void regset_common(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec,
const RegVal* dst,
const RegVal* src,
bool use_coloring) {
auto src_reg = use_coloring ? get_reg(src, allocs, irec) : get_no_color_reg(src);
auto dst_reg = use_coloring ? get_reg(dst, allocs, irec) : get_no_color_reg(dst);
auto src_class = src->ireg().reg_class;
auto dst_class = dst->ireg().reg_class;
if (src_class == RegClass::GPR_64 && dst_class == RegClass::GPR_64) {
if (src_reg == dst_reg) {
// eliminate move
gen->add_instr(IGen::null(), irec);
} else {
gen->add_instr(IGen::mov_gpr64_gpr64(dst_reg, src_reg), irec);
}
} else if (src_class == RegClass::FLOAT && dst_class == RegClass::FLOAT) {
if (src_reg == dst_reg) {
// eliminate move
gen->add_instr(IGen::null(), irec);
} else {
gen->add_instr(IGen::mov_xmm32_xmm32(dst_reg, src_reg), irec);
}
} else if (src_class == RegClass::VECTOR_FLOAT && dst_class == RegClass::VECTOR_FLOAT) {
if (src_reg == dst_reg) {
// eliminate move
gen->add_instr(IGen::null(), irec);
} else {
gen->add_instr(IGen::mov_vf_vf(dst_reg, src_reg), irec);
}
} else if (src_class == RegClass::FLOAT && dst_class == RegClass::GPR_64) {
// xmm 1x -> gpr
gen->add_instr(IGen::movd_gpr32_xmm32(dst_reg, src_reg), irec);
} else if (src_class == RegClass::GPR_64 && dst_class == RegClass::FLOAT) {
// gpr -> xmm 1x
gen->add_instr(IGen::movd_xmm32_gpr32(dst_reg, src_reg), irec);
} else {
assert(false); // unhandled move.
}
}
} // namespace } // namespace
/////////// ///////////
@ -79,7 +128,7 @@ RegAllocInstr IR_Return::to_rai() {
RegAllocInstr rai; RegAllocInstr rai;
rai.write.push_back(m_return_reg->ireg()); rai.write.push_back(m_return_reg->ireg());
rai.read.push_back(m_value->ireg()); rai.read.push_back(m_value->ireg());
if (m_value->ireg().kind == m_return_reg->ireg().kind) { if (m_value->ireg().reg_class == m_return_reg->ireg().reg_class) {
rai.is_move = true; // only true if we aren't moving from register kind to register kind rai.is_move = true; // only true if we aren't moving from register kind to register kind
} }
return rai; return rai;
@ -233,7 +282,7 @@ RegAllocInstr IR_RegSet::to_rai() {
RegAllocInstr rai; RegAllocInstr rai;
rai.write.push_back(m_dest->ireg()); rai.write.push_back(m_dest->ireg());
rai.read.push_back(m_src->ireg()); rai.read.push_back(m_src->ireg());
if (m_dest->ireg().kind == m_src->ireg().kind) { if (m_dest->ireg().reg_class == m_src->ireg().reg_class) {
rai.is_move = true; // only true if we aren't moving from register kind to register kind rai.is_move = true; // only true if we aren't moving from register kind to register kind
} }
return rai; return rai;
@ -242,22 +291,7 @@ RegAllocInstr IR_RegSet::to_rai() {
void IR_RegSet::do_codegen(emitter::ObjectGenerator* gen, void IR_RegSet::do_codegen(emitter::ObjectGenerator* gen,
const AllocationResult& allocs, const AllocationResult& allocs,
emitter::IR_Record irec) { emitter::IR_Record irec) {
auto val_reg = get_reg(m_src, allocs, irec); regset_common(gen, allocs, irec, m_dest, m_src, true);
auto dest_reg = get_reg(m_dest, allocs, irec);
if (val_reg == dest_reg) {
gen->add_instr(IGen::null(), irec);
} else if (val_reg.is_gpr() && dest_reg.is_gpr()) {
gen->add_instr(IGen::mov_gpr64_gpr64(dest_reg, val_reg), irec);
} else if (val_reg.is_xmm() && dest_reg.is_gpr()) {
gen->add_instr(IGen::movd_gpr32_xmm32(dest_reg, val_reg), irec);
} else if (val_reg.is_gpr() && dest_reg.is_xmm()) {
gen->add_instr(IGen::movd_xmm32_gpr32(dest_reg, val_reg), irec);
} else if (val_reg.is_xmm() && dest_reg.is_xmm()) {
gen->add_instr(IGen::mov_xmm32_xmm32(dest_reg, val_reg), irec);
} else {
assert(false);
}
} }
std::string IR_RegSet::print() { std::string IR_RegSet::print() {
@ -643,13 +677,18 @@ void IR_StaticVarLoad::do_codegen(emitter::ObjectGenerator* gen,
auto load_info = m_src->get_load_info(); auto load_info = m_src->get_load_info();
assert(m_src->get_addr_offset() == 0); assert(m_src->get_addr_offset() == 0);
if (m_dest->ireg().kind == emitter::RegKind::XMM) { if (m_dest->ireg().reg_class == RegClass::FLOAT) {
assert(load_info.load_signed == false); assert(load_info.load_signed == false);
assert(load_info.load_size == 4); assert(load_info.load_size == 4);
assert(load_info.requires_load == true); assert(load_info.requires_load == true);
auto instr = gen->add_instr(IGen::static_load_xmm32(get_reg(m_dest, allocs, irec), 0), irec); auto instr = gen->add_instr(IGen::static_load_xmm32(get_reg(m_dest, allocs, irec), 0), irec);
gen->link_instruction_static(instr, m_src->rec, 0); gen->link_instruction_static(instr, m_src->rec, 0);
} else if (m_dest->ireg().reg_class == RegClass::VECTOR_FLOAT) {
// we don't check the load info intentionally because we want to allow loading an entire
// vector structure.
auto instr = gen->add_instr(IGen::loadvf_rip_plus_s32(get_reg(m_dest, allocs, irec), 0), irec);
gen->link_instruction_static(instr, m_src->rec, 0);
} else { } else {
assert(false); assert(false);
} }
@ -766,8 +805,9 @@ void IR_ConditionalBranch::do_codegen(emitter::ObjectGenerator* gen,
IR_LoadConstOffset::IR_LoadConstOffset(const RegVal* dest, IR_LoadConstOffset::IR_LoadConstOffset(const RegVal* dest,
int offset, int offset,
const RegVal* base, const RegVal* base,
MemLoadInfo info) MemLoadInfo info,
: m_dest(dest), m_offset(offset), m_base(base), m_info(info) {} bool use_coloring)
: IR_Asm(use_coloring), m_dest(dest), m_offset(offset), m_base(base), m_info(info) {}
std::string IR_LoadConstOffset::print() { std::string IR_LoadConstOffset::print() {
return fmt::format("mov {}, [{} + {}]", m_dest->print(), m_base->print(), m_offset); return fmt::format("mov {}, [{} + {}]", m_dest->print(), m_base->print(), m_offset);
@ -783,17 +823,22 @@ RegAllocInstr IR_LoadConstOffset::to_rai() {
void IR_LoadConstOffset::do_codegen(emitter::ObjectGenerator* gen, void IR_LoadConstOffset::do_codegen(emitter::ObjectGenerator* gen,
const AllocationResult& allocs, const AllocationResult& allocs,
emitter::IR_Record irec) { emitter::IR_Record irec) {
if (m_dest->ireg().kind == emitter::RegKind::GPR) { auto dest_reg = m_use_coloring ? get_reg(m_dest, allocs, irec) : get_no_color_reg(m_dest);
gen->add_instr(IGen::load_goal_gpr(get_reg(m_dest, allocs, irec), get_reg(m_base, allocs, irec), auto base_reg = m_use_coloring ? get_reg(m_base, allocs, irec) : get_no_color_reg(m_base);
emitter::gRegInfo.get_offset_reg(), m_offset, m_info.size,
m_info.sign_extend), if (m_dest->ireg().reg_class == RegClass::GPR_64) {
gen->add_instr(IGen::load_goal_gpr(dest_reg, base_reg, emitter::gRegInfo.get_offset_reg(),
m_offset, m_info.size, m_info.sign_extend),
irec); irec);
} else if (m_dest->ireg().kind == emitter::RegKind::XMM && m_info.size == 4 && } else if (m_dest->ireg().reg_class == RegClass::FLOAT && m_info.size == 4 &&
m_info.sign_extend == false && m_info.reg == ::RegKind::FLOAT) { m_info.sign_extend == false && m_info.reg == RegClass::FLOAT) {
gen->add_instr( gen->add_instr(
IGen::load_goal_xmm32(get_reg(m_dest, allocs, irec), get_reg(m_base, allocs, irec), IGen::load_goal_xmm32(dest_reg, base_reg, emitter::gRegInfo.get_offset_reg(), m_offset),
emitter::gRegInfo.get_offset_reg(), m_offset),
irec); irec);
} else if (m_dest->ireg().reg_class == RegClass::VECTOR_FLOAT && m_info.size == 16 &&
m_info.sign_extend == false && m_info.reg == RegClass::VECTOR_FLOAT) {
gen->add_instr(
IGen::load_goal_vf(dest_reg, base_reg, emitter::gRegInfo.get_offset_reg(), m_offset), irec);
} else { } else {
throw std::runtime_error("IR_LoadConstOffset::do_codegen not supported"); throw std::runtime_error("IR_LoadConstOffset::do_codegen not supported");
} }
@ -805,8 +850,9 @@ void IR_LoadConstOffset::do_codegen(emitter::ObjectGenerator* gen,
IR_StoreConstOffset::IR_StoreConstOffset(const RegVal* value, IR_StoreConstOffset::IR_StoreConstOffset(const RegVal* value,
int offset, int offset,
const RegVal* base, const RegVal* base,
int size) int size,
: m_value(value), m_offset(offset), m_base(base), m_size(size) {} bool use_coloring)
: IR_Asm(use_coloring), m_value(value), m_offset(offset), m_base(base), m_size(size) {}
std::string IR_StoreConstOffset::print() { std::string IR_StoreConstOffset::print() {
return fmt::format("move [{} + {}], {}", m_base->print(), m_offset, m_value->print()); return fmt::format("move [{} + {}], {}", m_base->print(), m_offset, m_value->print());
@ -822,15 +868,20 @@ RegAllocInstr IR_StoreConstOffset::to_rai() {
void IR_StoreConstOffset::do_codegen(emitter::ObjectGenerator* gen, void IR_StoreConstOffset::do_codegen(emitter::ObjectGenerator* gen,
const AllocationResult& allocs, const AllocationResult& allocs,
emitter::IR_Record irec) { emitter::IR_Record irec) {
if (m_value->ireg().kind == emitter::RegKind::GPR) { auto base_reg = m_use_coloring ? get_reg(m_base, allocs, irec) : get_no_color_reg(m_base);
gen->add_instr( auto value_reg = m_use_coloring ? get_reg(m_value, allocs, irec) : get_no_color_reg(m_value);
IGen::store_goal_gpr(get_reg(m_base, allocs, irec), get_reg(m_value, allocs, irec),
emitter::gRegInfo.get_offset_reg(), m_offset, m_size), if (m_value->ireg().reg_class == RegClass::GPR_64) {
gen->add_instr(IGen::store_goal_gpr(base_reg, value_reg, emitter::gRegInfo.get_offset_reg(),
m_offset, m_size),
irec); irec);
} else if (m_value->ireg().kind == emitter::RegKind::XMM && m_size == 4) { } else if (m_value->ireg().reg_class == RegClass::FLOAT && m_size == 4) {
gen->add_instr( gen->add_instr(
IGen::store_goal_xmm32(get_reg(m_base, allocs, irec), get_reg(m_value, allocs, irec), IGen::store_goal_xmm32(base_reg, value_reg, emitter::gRegInfo.get_offset_reg(), m_offset),
emitter::gRegInfo.get_offset_reg(), m_offset), irec);
} else if (m_value->ireg().reg_class == RegClass::VECTOR_FLOAT && m_size == 16) {
gen->add_instr(
IGen::store_goal_vf(base_reg, value_reg, emitter::gRegInfo.get_offset_reg(), m_offset),
irec); irec);
} else { } else {
throw std::runtime_error("IR_StoreConstOffset::do_codegen can't handle this"); throw std::runtime_error("IR_StoreConstOffset::do_codegen can't handle this");
@ -1203,20 +1254,95 @@ RegAllocInstr IR_RegSetAsm::to_rai() {
void IR_RegSetAsm::do_codegen(emitter::ObjectGenerator* gen, void IR_RegSetAsm::do_codegen(emitter::ObjectGenerator* gen,
const AllocationResult& allocs, const AllocationResult& allocs,
emitter::IR_Record irec) { emitter::IR_Record irec) {
auto val_reg = m_use_coloring ? get_reg(m_src, allocs, irec) : get_no_color_reg(m_src); regset_common(gen, allocs, irec, m_dst, m_src, m_use_coloring);
auto dest_reg = m_use_coloring ? get_reg(m_dst, allocs, irec) : get_no_color_reg(m_dst); }
if (val_reg == dest_reg) { ///////////////////////
gen->add_instr(IGen::null(), irec); // AsmVF3
} else if (val_reg.is_gpr() && dest_reg.is_gpr()) { ///////////////////////
gen->add_instr(IGen::mov_gpr64_gpr64(dest_reg, val_reg), irec);
} else if (val_reg.is_xmm() && dest_reg.is_gpr()) { IR_VFMath3Asm::IR_VFMath3Asm(bool use_color,
gen->add_instr(IGen::movd_gpr32_xmm32(dest_reg, val_reg), irec); const RegVal* dst,
} else if (val_reg.is_gpr() && dest_reg.is_xmm()) { const RegVal* src1,
gen->add_instr(IGen::movd_xmm32_gpr32(dest_reg, val_reg), irec); const RegVal* src2,
} else if (val_reg.is_xmm() && dest_reg.is_xmm()) { Kind kind)
gen->add_instr(IGen::mov_xmm32_xmm32(dest_reg, val_reg), irec); : IR_Asm(use_color), m_dst(dst), m_src1(src1), m_src2(src2), m_kind(kind) {}
} else {
std::string IR_VFMath3Asm::print() {
switch (m_kind) {
case Kind::XOR:
return fmt::format(".xor.vf{} {}, {}, {}", get_color_suffix_string(), m_dst->print(),
m_src1->print(), m_src2->print());
case Kind::SUB:
return fmt::format(".sub.vf{} {}, {}, {}", get_color_suffix_string(), m_dst->print(),
m_src1->print(), m_src2->print());
case Kind::ADD:
return fmt::format(".add.vf{} {}, {}, {}", get_color_suffix_string(), m_dst->print(),
m_src1->print(), m_src2->print());
default:
assert(false); assert(false);
} }
} }
RegAllocInstr IR_VFMath3Asm::to_rai() {
RegAllocInstr rai;
if (m_use_coloring) {
rai.write.push_back(m_dst->ireg());
rai.read.push_back(m_src1->ireg());
rai.read.push_back(m_src2->ireg());
}
return rai;
}
void IR_VFMath3Asm::do_codegen(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) {
auto dst = get_reg_asm(m_dst, allocs, irec, m_use_coloring);
auto src1 = get_reg_asm(m_src1, allocs, irec, m_use_coloring);
auto src2 = get_reg_asm(m_src2, allocs, irec, m_use_coloring);
switch (m_kind) {
case Kind::XOR:
gen->add_instr(IGen::xor_vf(dst, src1, src2), irec);
break;
case Kind::SUB:
gen->add_instr(IGen::sub_vf(dst, src1, src2), irec);
break;
case Kind::ADD:
gen->add_instr(IGen::add_vf(dst, src1, src2), irec);
break;
default:
assert(false);
}
}
IR_BlendVF::IR_BlendVF(bool use_color,
const RegVal* dst,
const RegVal* src1,
const RegVal* src2,
u8 mask)
: IR_Asm(use_color), m_dst(dst), m_src1(src1), m_src2(src2), m_mask(mask) {}
std::string IR_BlendVF::print() {
return fmt::format(".blend.vf{} {}, {}, {}, {}", get_color_suffix_string(), m_dst->print(),
m_src1->print(), m_src2->print(), m_mask);
}
RegAllocInstr IR_BlendVF::to_rai() {
RegAllocInstr rai;
if (m_use_coloring) {
rai.write.push_back(m_dst->ireg());
rai.read.push_back(m_src1->ireg());
rai.read.push_back(m_src2->ireg());
}
return rai;
}
void IR_BlendVF::do_codegen(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) {
auto dst = get_reg_asm(m_dst, allocs, irec, m_use_coloring);
auto src1 = get_reg_asm(m_src1, allocs, irec, m_use_coloring);
auto src2 = get_reg_asm(m_src2, allocs, irec, m_use_coloring);
gen->add_instr(IGen::blend_vf(dst, src1, src2, m_mask), irec);
}

View File

@ -274,38 +274,6 @@ class IR_ConditionalBranch : public IR {
bool m_resolved = false; bool m_resolved = false;
}; };
class IR_LoadConstOffset : public IR {
public:
IR_LoadConstOffset(const RegVal* dest, int offset, const RegVal* base, MemLoadInfo info);
std::string print() override;
RegAllocInstr to_rai() override;
void do_codegen(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
private:
const RegVal* m_dest = nullptr;
int m_offset = 0;
const RegVal* m_base = nullptr;
MemLoadInfo m_info;
};
class IR_StoreConstOffset : public IR {
public:
IR_StoreConstOffset(const RegVal* value, int offset, const RegVal* base, int size);
std::string print() override;
RegAllocInstr to_rai() override;
void do_codegen(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
private:
const RegVal* m_value = nullptr;
int m_offset = 0;
const RegVal* m_base = nullptr;
int m_size = 0;
};
class IR_Null : public IR { class IR_Null : public IR {
public: public:
IR_Null() = default; IR_Null() = default;
@ -380,6 +348,46 @@ class IR_Asm : public IR {
bool m_use_coloring; bool m_use_coloring;
}; };
class IR_LoadConstOffset : public IR_Asm {
public:
IR_LoadConstOffset(const RegVal* dest,
int offset,
const RegVal* base,
MemLoadInfo info,
bool use_coloring = true);
std::string print() override;
RegAllocInstr to_rai() override;
void do_codegen(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
private:
const RegVal* m_dest = nullptr;
int m_offset = 0;
const RegVal* m_base = nullptr;
MemLoadInfo m_info;
};
class IR_StoreConstOffset : public IR_Asm {
public:
IR_StoreConstOffset(const RegVal* value,
int offset,
const RegVal* base,
int size,
bool use_coloring = true);
std::string print() override;
RegAllocInstr to_rai() override;
void do_codegen(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
private:
const RegVal* m_value = nullptr;
int m_offset = 0;
const RegVal* m_base = nullptr;
int m_size = 0;
};
class IR_AsmRet : public IR_Asm { class IR_AsmRet : public IR_Asm {
public: public:
IR_AsmRet(bool use_coloring); IR_AsmRet(bool use_coloring);
@ -486,4 +494,40 @@ class IR_RegSetAsm : public IR_Asm {
const RegVal* m_src = nullptr; const RegVal* m_src = nullptr;
}; };
class IR_VFMath3Asm : public IR_Asm {
public:
enum class Kind { XOR, SUB, ADD };
IR_VFMath3Asm(bool use_color,
const RegVal* dst,
const RegVal* src1,
const RegVal* src2,
Kind kind);
std::string print() override;
RegAllocInstr to_rai() override;
void do_codegen(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
protected:
const RegVal* m_dst = nullptr;
const RegVal* m_src1 = nullptr;
const RegVal* m_src2 = nullptr;
Kind m_kind;
};
class IR_BlendVF : public IR_Asm {
public:
IR_BlendVF(bool use_color, const RegVal* dst, const RegVal* src1, const RegVal* src2, u8 mask);
std::string print() override;
RegAllocInstr to_rai() override;
void do_codegen(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
protected:
const RegVal* m_dst = nullptr;
const RegVal* m_src1 = nullptr;
const RegVal* m_src2 = nullptr;
u8 m_mask = 0xff;
};
#endif // JAK_IR_H #endif // JAK_IR_H

View File

@ -136,12 +136,12 @@ bool Compiler::is_local_symbol(const goos::Object& obj, Env* env) {
return false; return false;
} }
emitter::RegKind Compiler::get_preferred_reg_kind(const TypeSpec& ts) { emitter::HWRegKind Compiler::get_preferred_reg_kind(const TypeSpec& ts) {
switch (m_ts.lookup_type(ts)->get_preferred_reg_kind()) { switch (m_ts.lookup_type(ts)->get_preferred_reg_class()) {
case RegKind::GPR_64: case RegClass::GPR_64:
return emitter::RegKind::GPR; return emitter::HWRegKind::GPR;
case RegKind::FLOAT: case RegClass::FLOAT:
return emitter::RegKind::XMM; return emitter::HWRegKind::XMM;
default: default:
throw std::runtime_error("Unknown preferred register kind"); throw std::runtime_error("Unknown preferred register kind");
} }

View File

@ -9,7 +9,7 @@
RegVal* Val::to_gpr(Env* fe) { RegVal* Val::to_gpr(Env* fe) {
// TODO - handle 128-bit stuff here! // TODO - handle 128-bit stuff here!
auto rv = to_reg(fe); auto rv = to_reg(fe);
if (rv->ireg().kind == emitter::RegKind::GPR) { if (rv->ireg().reg_class == RegClass::GPR_64) {
return rv; return rv;
} else { } else {
auto re = fe->make_gpr(coerce_to_reg_type(m_ts)); auto re = fe->make_gpr(coerce_to_reg_type(m_ts));
@ -19,14 +19,14 @@ RegVal* Val::to_gpr(Env* fe) {
} }
/*! /*!
* Fallback to_xmm if a more optimized one is not provided. * Fallback to_fpr if a more optimized one is not provided.
*/ */
RegVal* Val::to_xmm(Env* fe) { RegVal* Val::to_fpr(Env* fe) {
auto rv = to_reg(fe); auto rv = to_reg(fe);
if (rv->ireg().kind == emitter::RegKind::XMM) { if (rv->ireg().reg_class == RegClass::FLOAT) {
return rv; return rv;
} else { } else {
auto re = fe->make_xmm(coerce_to_reg_type(m_ts)); auto re = fe->make_fpr(coerce_to_reg_type(m_ts));
fe->emit(std::make_unique<IR_RegSet>(re, rv)); fe->emit(std::make_unique<IR_RegSet>(re, rv));
return re; return re;
} }
@ -39,7 +39,7 @@ RegVal* RegVal::to_reg(Env* fe) {
RegVal* RegVal::to_gpr(Env* fe) { RegVal* RegVal::to_gpr(Env* fe) {
(void)fe; (void)fe;
if (m_ireg.kind == emitter::RegKind::GPR) { if (m_ireg.reg_class == RegClass::GPR_64) {
return this; return this;
} else { } else {
auto re = fe->make_gpr(coerce_to_reg_type(m_ts)); auto re = fe->make_gpr(coerce_to_reg_type(m_ts));
@ -48,12 +48,12 @@ RegVal* RegVal::to_gpr(Env* fe) {
} }
} }
RegVal* RegVal::to_xmm(Env* fe) { RegVal* RegVal::to_fpr(Env* fe) {
(void)fe; (void)fe;
if (m_ireg.kind == emitter::RegKind::XMM) { if (m_ireg.reg_class == RegClass::FLOAT) {
return this; return this;
} else { } else {
auto re = fe->make_xmm(coerce_to_reg_type(m_ts)); auto re = fe->make_fpr(coerce_to_reg_type(m_ts));
fe->emit(std::make_unique<IR_RegSet>(re, this)); fe->emit(std::make_unique<IR_RegSet>(re, this));
return re; return re;
} }
@ -104,15 +104,20 @@ RegVal* InlinedLambdaVal::to_reg(Env* fe) {
} }
RegVal* FloatConstantVal::to_reg(Env* fe) { RegVal* FloatConstantVal::to_reg(Env* fe) {
auto re = fe->make_xmm(coerce_to_reg_type(m_ts)); auto re = fe->make_fpr(coerce_to_reg_type(m_ts));
fe->emit(std::make_unique<IR_StaticVarLoad>(re, m_value)); fe->emit(std::make_unique<IR_StaticVarLoad>(re, m_value));
return re; return re;
} }
RegVal* MemoryOffsetConstantVal::to_reg(Env* fe) { RegVal* MemoryOffsetConstantVal::to_reg(Env* fe) {
auto re = fe->make_gpr(coerce_to_reg_type(m_ts)); auto re = fe->make_gpr(coerce_to_reg_type(m_ts));
if (offset == 0) {
fe->emit_ir<IR_RegSet>(re, base->to_gpr(fe));
} else {
fe->emit(std::make_unique<IR_LoadConstant64>(re, int64_t(offset))); fe->emit(std::make_unique<IR_LoadConstant64>(re, int64_t(offset)));
fe->emit(std::make_unique<IR_IntegerMath>(IntegerMathKind::ADD_64, re, base->to_gpr(fe))); fe->emit(std::make_unique<IR_IntegerMath>(IntegerMathKind::ADD_64, re, base->to_gpr(fe)));
}
return re; return re;
} }
@ -139,16 +144,16 @@ RegVal* MemoryDerefVal::to_reg(Env* fe) {
} }
} }
RegVal* MemoryDerefVal::to_xmm(Env* fe) { RegVal* MemoryDerefVal::to_fpr(Env* fe) {
// todo, support better loads/stores from the stack // todo, support better loads/stores from the stack
auto base_as_co = dynamic_cast<MemoryOffsetConstantVal*>(base); auto base_as_co = dynamic_cast<MemoryOffsetConstantVal*>(base);
if (base_as_co) { if (base_as_co) {
auto re = fe->make_xmm(coerce_to_reg_type(m_ts)); auto re = fe->make_fpr(coerce_to_reg_type(m_ts));
fe->emit(std::make_unique<IR_LoadConstOffset>(re, base_as_co->offset, fe->emit(std::make_unique<IR_LoadConstOffset>(re, base_as_co->offset,
base_as_co->base->to_gpr(fe), info)); base_as_co->base->to_gpr(fe), info));
return re; return re;
} else { } else {
auto re = fe->make_xmm(coerce_to_reg_type(m_ts)); auto re = fe->make_fpr(coerce_to_reg_type(m_ts));
auto addr = base->to_gpr(fe); auto addr = base->to_gpr(fe);
fe->emit(std::make_unique<IR_LoadConstOffset>(re, 0, addr, info)); fe->emit(std::make_unique<IR_LoadConstOffset>(re, 0, addr, info));
return re; return re;
@ -157,7 +162,7 @@ RegVal* MemoryDerefVal::to_xmm(Env* fe) {
RegVal* AliasVal::to_reg(Env* fe) { RegVal* AliasVal::to_reg(Env* fe) {
auto as_old_type = base->to_reg(fe); auto as_old_type = base->to_reg(fe);
auto result = fe->make_ireg(m_ts, as_old_type->ireg().kind); auto result = fe->make_ireg(m_ts, as_old_type->ireg().reg_class);
fe->emit(std::make_unique<IR_RegSet>(result, as_old_type)); fe->emit(std::make_unique<IR_RegSet>(result, as_old_type));
return result; return result;
} }
@ -174,7 +179,7 @@ RegVal* PairEntryVal::to_reg(Env* fe) {
int offset = is_car ? -2 : 2; int offset = is_car ? -2 : 2;
auto re = fe->make_gpr(coerce_to_reg_type(m_ts)); auto re = fe->make_gpr(coerce_to_reg_type(m_ts));
MemLoadInfo info; MemLoadInfo info;
info.reg = RegKind::GPR_64; info.reg = RegClass::GPR_64;
info.sign_extend = true; info.sign_extend = true;
info.size = 4; info.size = 4;
fe->emit(std::make_unique<IR_LoadConstOffset>(re, offset, base->to_gpr(fe), info)); fe->emit(std::make_unique<IR_LoadConstOffset>(re, offset, base->to_gpr(fe), info));
@ -197,7 +202,7 @@ RegVal* BitFieldVal::to_reg(Env* env) {
auto parent_reg = m_parent->to_gpr(env); auto parent_reg = m_parent->to_gpr(env);
auto fe = get_parent_env_of_type<FunctionEnv>(env); auto fe = get_parent_env_of_type<FunctionEnv>(env);
auto result = fe->make_ireg(coerce_to_reg_type(m_ts), emitter::RegKind::GPR); auto result = fe->make_ireg(coerce_to_reg_type(m_ts), RegClass::GPR_64);
env->emit(std::make_unique<IR_RegSet>(result, parent_reg)); env->emit(std::make_unique<IR_RegSet>(result, parent_reg));
int start_bit = m_offset; int start_bit = m_offset;

View File

@ -40,7 +40,7 @@ class Val {
throw std::runtime_error("to_reg called on invalid Val: " + print()); throw std::runtime_error("to_reg called on invalid Val: " + print());
} }
virtual RegVal* to_gpr(Env* fe); virtual RegVal* to_gpr(Env* fe);
virtual RegVal* to_xmm(Env* fe); virtual RegVal* to_fpr(Env* fe);
const TypeSpec& type() const { return m_ts; } const TypeSpec& type() const { return m_ts; }
void set_type(TypeSpec ts) { m_ts = std::move(ts); } void set_type(TypeSpec ts) { m_ts = std::move(ts); }
@ -74,7 +74,7 @@ class RegVal : public Val {
std::string print() const override { return m_ireg.to_string(); }; std::string print() const override { return m_ireg.to_string(); };
RegVal* to_reg(Env* fe) override; RegVal* to_reg(Env* fe) override;
RegVal* to_gpr(Env* fe) override; RegVal* to_gpr(Env* fe) override;
RegVal* to_xmm(Env* fe) override; RegVal* to_fpr(Env* fe) override;
void set_rlet_constraint(emitter::Register reg); void set_rlet_constraint(emitter::Register reg);
const std::optional<emitter::Register>& rlet_constraint() const; const std::optional<emitter::Register>& rlet_constraint() const;
@ -157,7 +157,7 @@ struct MemLoadInfo {
reg = di.reg; reg = di.reg;
} }
RegKind reg = RegKind::INVALID; RegClass reg = RegClass::INVALID;
bool sign_extend = false; bool sign_extend = false;
int size = -1; int size = -1;
}; };
@ -207,7 +207,7 @@ class MemoryDerefVal : public Val {
: Val(std::move(ts)), base(_base), info(_info) {} : Val(std::move(ts)), base(_base), info(_info) {}
std::string print() const override { return "[" + base->print() + "]"; } std::string print() const override { return "[" + base->print() + "]"; }
RegVal* to_reg(Env* fe) override; RegVal* to_reg(Env* fe) override;
RegVal* to_xmm(Env* fe) override; RegVal* to_fpr(Env* fe) override;
Val* base = nullptr; Val* base = nullptr;
MemLoadInfo info; MemLoadInfo info;
}; };

View File

@ -58,20 +58,22 @@ Val* Compiler::compile_rlet(const goos::Object& form, const goos::Object& rest,
} }
// figure out the class // figure out the class
emitter::RegKind register_kind = emitter::RegKind::GPR; RegClass register_class = RegClass::GPR_64;
if (def_args.has_named("class")) { if (def_args.has_named("class")) {
auto& class_name = def_args.named.at("class").as_symbol()->name; auto& class_name = def_args.named.at("class").as_symbol()->name;
if (class_name == "gpr") { if (class_name == "gpr") {
register_kind = emitter::RegKind::GPR; register_class = RegClass::GPR_64;
} else if (class_name == "xmm") { } else if (class_name == "fpr") {
register_kind = emitter::RegKind::XMM; register_class = RegClass::FLOAT;
} else if (class_name == "vf") {
register_class = RegClass::VECTOR_FLOAT;
} else { } else {
throw_compiler_error(o, "Register class {} is unknown.", class_name); throw_compiler_error(o, "Register class {} is unknown.", class_name);
} }
} }
// alloc a register: // alloc a register:
auto new_place_reg = env->make_ireg(ts, register_kind); auto new_place_reg = env->make_ireg(ts, register_class);
new_place_reg->mark_as_settable(); new_place_reg->mark_as_settable();
if (def_args.has_named("reg")) { if (def_args.has_named("reg")) {
@ -231,11 +233,174 @@ Val* Compiler::compile_asm_mov(const goos::Object& form, const goos::Object& res
if (args.has_named("color")) { if (args.has_named("color")) {
color = get_true_or_false(form, args.named.at("color")); color = get_true_or_false(form, args.named.at("color"));
} }
auto dest = compile_error_guard(args.unnamed.at(0), env)->to_gpr(env); auto dest = compile_error_guard(args.unnamed.at(0), env)->to_reg(env);
if (!dest->settable()) { if (!dest->settable()) {
throw_compiler_error(form, "Cannot .mov this. Got a {}.", dest->print()); throw_compiler_error(form, "Cannot .mov this. Got a {}.", dest->print());
} }
auto src = compile_error_guard(args.unnamed.at(1), env)->to_gpr(env); auto src = compile_error_guard(args.unnamed.at(1), env)->to_reg(env);
env->emit_ir<IR_RegSetAsm>(color, dest, src); env->emit_ir<IR_RegSetAsm>(color, dest, src);
return get_none(); return get_none();
} }
/*!
* Load a vector float from memory. Does an aligned load.
*/
Val* Compiler::compile_asm_lvf(const goos::Object& form, const goos::Object& rest, Env* env) {
auto args = get_va(form, rest);
va_check(form, args, {{}, {}}, {{"color", {false, goos::ObjectType::SYMBOL}}});
bool color = true;
if (args.has_named("color")) {
color = get_true_or_false(form, args.named.at("color"));
}
auto dest = compile_error_guard(args.unnamed.at(0), env)->to_reg(env);
if (!dest->settable() || dest->ireg().reg_class != RegClass::VECTOR_FLOAT) {
throw_compiler_error(form, "Cannot .lvf into this. Got a {}.", dest->print());
}
auto src = compile_error_guard(args.unnamed.at(1), env);
auto as_co = dynamic_cast<MemoryOffsetConstantVal*>(src);
auto as_sv = dynamic_cast<StaticVal*>(src);
MemLoadInfo info;
info.sign_extend = false;
info.size = 16;
info.reg = RegClass::VECTOR_FLOAT;
if (as_co) {
// can do a clever offset here
assert(false);
env->emit_ir<IR_LoadConstOffset>(dest, as_co->offset, as_co->base->to_gpr(env), info, color);
} else if (as_sv) {
if (!color) {
throw std::runtime_error("no color nyi for static loads");
}
env->emit_ir<IR_StaticVarLoad>(dest, as_sv->obj);
} else {
env->emit_ir<IR_LoadConstOffset>(dest, 0, src->to_gpr(env), info, color);
}
return get_none();
}
/*!
* Store a vector float into memory. Does an aligned load.
*/
Val* Compiler::compile_asm_svf(const goos::Object& form, const goos::Object& rest, Env* env) {
auto args = get_va(form, rest);
va_check(form, args, {{}, {}}, {{"color", {false, goos::ObjectType::SYMBOL}}});
bool color = true;
if (args.has_named("color")) {
color = get_true_or_false(form, args.named.at("color"));
}
auto dest = compile_error_guard(args.unnamed.at(0), env);
auto src = compile_error_guard(args.unnamed.at(1), env)->to_reg(env);
if (!src->settable() || src->ireg().reg_class != RegClass::VECTOR_FLOAT) {
throw_compiler_error(form, "Cannot .svf from this. Got a {}.", dest->print());
}
auto as_co = dynamic_cast<MemoryOffsetConstantVal*>(dest);
MemLoadInfo info;
info.sign_extend = false;
info.size = 16;
info.reg = RegClass::VECTOR_FLOAT;
if (as_co) {
// can do a clever offset here
assert(false);
env->emit_ir<IR_StoreConstOffset>(src, as_co->offset, as_co->base->to_gpr(env), 16, color);
} else {
env->emit_ir<IR_StoreConstOffset>(src, 0, dest->to_gpr(env), 16, color);
}
return get_none();
}
Val* Compiler::compile_asm_xor_vf(const goos::Object& form, const goos::Object& rest, Env* env) {
return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::XOR, env);
}
Val* Compiler::compile_asm_sub_vf(const goos::Object& form, const goos::Object& rest, Env* env) {
return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::SUB, env);
}
Val* Compiler::compile_asm_add_vf(const goos::Object& form, const goos::Object& rest, Env* env) {
return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::ADD, env);
}
Val* Compiler::compile_asm_blend_vf(const goos::Object& form, const goos::Object& rest, Env* env) {
auto args = get_va(form, rest);
va_check(form, args, {{}, {}, {}, {}}, {{"color", {false, goos::ObjectType::SYMBOL}}});
bool color = true;
if (args.has_named("color")) {
color = get_true_or_false(form, args.named.at("color"));
}
auto dest = compile_error_guard(args.unnamed.at(0), env)->to_reg(env);
if (!dest->settable() || dest->ireg().reg_class != RegClass::VECTOR_FLOAT) {
throw_compiler_error(
form, "Invalid destination register for a vector float 3-arg math form. Got a {}.",
dest->print());
}
auto src1 = compile_error_guard(args.unnamed.at(1), env)->to_reg(env);
if (src1->ireg().reg_class != RegClass::VECTOR_FLOAT) {
throw_compiler_error(
form, "Invalid first source register for a vector float 3-arg math form. Got a {}.",
src1->print());
}
auto src2 = compile_error_guard(args.unnamed.at(2), env)->to_reg(env);
if (src2->ireg().reg_class != RegClass::VECTOR_FLOAT) {
throw_compiler_error(
form, "Invalid second source register for a vector float 3-arg math form. Got a {}.",
src2->print());
}
int64_t mask;
if (!try_getting_constant_integer(args.unnamed.at(3), &mask, env)) {
throw_compiler_error(form,
"The value {} is invalid for a blend mask, it could not be evaluated as a "
"constant integer.",
args.unnamed.at(3).print());
}
if (mask < 0 || mask > 15) {
throw_compiler_error(form, "The value {} is out of range for a blend mask.", mask);
}
env->emit_ir<IR_BlendVF>(color, dest, src1, src2, mask);
return get_none();
}
Val* Compiler::compile_asm_vf_math3(const goos::Object& form,
const goos::Object& rest,
IR_VFMath3Asm::Kind kind,
Env* env) {
auto args = get_va(form, rest);
va_check(form, args, {{}, {}, {}}, {{"color", {false, goos::ObjectType::SYMBOL}}});
bool color = true;
if (args.has_named("color")) {
color = get_true_or_false(form, args.named.at("color"));
}
auto dest = compile_error_guard(args.unnamed.at(0), env)->to_reg(env);
if (!dest->settable() || dest->ireg().reg_class != RegClass::VECTOR_FLOAT) {
throw_compiler_error(
form, "Invalid destination register for a vector float 3-arg math form. Got a {}.",
dest->print());
}
auto src1 = compile_error_guard(args.unnamed.at(1), env)->to_reg(env);
if (src1->ireg().reg_class != RegClass::VECTOR_FLOAT) {
throw_compiler_error(
form, "Invalid first source register for a vector float 3-arg math form. Got a {}.",
src1->print());
}
auto src2 = compile_error_guard(args.unnamed.at(2), env)->to_reg(env);
if (src2->ireg().reg_class != RegClass::VECTOR_FLOAT) {
throw_compiler_error(
form, "Invalid second source register for a vector float 3-arg math form. Got a {}.",
src2->print());
}
env->emit_ir<IR_VFMath3Asm>(color, dest, src1, src2, kind);
return get_none();
}

View File

@ -23,6 +23,12 @@ static const std::unordered_map<
{".add", &Compiler::compile_asm_add}, {".add", &Compiler::compile_asm_add},
{".load-sym", &Compiler::compile_asm_load_sym}, {".load-sym", &Compiler::compile_asm_load_sym},
{".mov", &Compiler::compile_asm_mov}, {".mov", &Compiler::compile_asm_mov},
{".lvf", &Compiler::compile_asm_lvf},
{".svf", &Compiler::compile_asm_svf},
{".xor.vf", &Compiler::compile_asm_xor_vf},
{".sub.vf", &Compiler::compile_asm_sub_vf},
{".add.vf", &Compiler::compile_asm_add_vf},
{".blend.vf", &Compiler::compile_asm_blend_vf},
// BLOCK FORMS // BLOCK FORMS
{"top-level", &Compiler::compile_top_level}, {"top-level", &Compiler::compile_top_level},

View File

@ -90,8 +90,8 @@ Condition Compiler::compile_condition(const goos::Object& condition, Env* env, b
// pick between a floating point and an integer comparison. // pick between a floating point and an integer comparison.
if (is_float(first_arg->type())) { if (is_float(first_arg->type())) {
gc.a = first_arg->to_xmm(env); gc.a = first_arg->to_fpr(env);
gc.b = second_arg->to_xmm(env); gc.b = second_arg->to_fpr(env);
gc.is_float = true; gc.is_float = true;
} else { } else {
gc.a = first_arg->to_gpr(env); gc.a = first_arg->to_gpr(env);

View File

@ -153,7 +153,7 @@ Val* Compiler::compile_lambda(const goos::Object& form, const goos::Object& rest
for (u32 i = 0; i < lambda.params.size(); i++) { for (u32 i = 0; i < lambda.params.size(); i++) {
IRegConstraint constr; IRegConstraint constr;
constr.instr_idx = 0; // constraint at function start constr.instr_idx = 0; // constraint at function start
auto ireg = new_func_env->make_ireg(lambda.params.at(i).type, emitter::RegKind::GPR); auto ireg = new_func_env->make_gpr(lambda.params.at(i).type);
ireg->mark_as_settable(); ireg->mark_as_settable();
constr.ireg = ireg->ireg(); constr.ireg = ireg->ireg();
constr.desired_register = emitter::gRegInfo.get_arg_reg(i); constr.desired_register = emitter::gRegInfo.get_arg_reg(i);
@ -165,7 +165,7 @@ Val* Compiler::compile_lambda(const goos::Object& form, const goos::Object& rest
place->func = new_func_env.get(); place->func = new_func_env.get();
// nasty function block env setup // nasty function block env setup
auto return_reg = new_func_env->make_ireg(get_none()->type(), emitter::RegKind::GPR); auto return_reg = new_func_env->make_gpr(get_none()->type());
auto func_block_env = new_func_env->alloc_env<BlockEnv>(new_func_env.get(), "#f"); auto func_block_env = new_func_env->alloc_env<BlockEnv>(new_func_env.get(), "#f");
func_block_env->return_value = return_reg; func_block_env->return_value = return_reg;
func_block_env->end_label = Label(new_func_env.get()); func_block_env->end_label = Label(new_func_env.get());
@ -357,7 +357,7 @@ Val* Compiler::compile_function_or_method_call(const goos::Object& form, Env* en
// note, inlined functions will get a more specific type if possible // note, inlined functions will get a more specific type if possible
// todo, is this right? // todo, is this right?
auto type = eval_args.at(i)->type(); auto type = eval_args.at(i)->type();
auto copy = env->make_ireg(type, get_preferred_reg_kind(type)); auto copy = env->make_ireg(type, m_ts.lookup_type(type)->get_preferred_reg_class());
env->emit(std::make_unique<IR_RegSet>(copy, eval_args.at(i))); env->emit(std::make_unique<IR_RegSet>(copy, eval_args.at(i)));
copy->mark_as_settable(); copy->mark_as_settable();
lexical_env->vars[head_as_lambda->lambda.params.at(i).name] = copy; lexical_env->vars[head_as_lambda->lambda.params.at(i).name] = copy;
@ -368,8 +368,7 @@ Val* Compiler::compile_function_or_method_call(const goos::Object& form, Env* en
RegVal* result_reg_if_return_from = nullptr; RegVal* result_reg_if_return_from = nullptr;
if (auto_inline || got_inlined_lambda) { if (auto_inline || got_inlined_lambda) {
inlined_block_env = fe->alloc_env<BlockEnv>(inlined_compile_env, "#f"); inlined_block_env = fe->alloc_env<BlockEnv>(inlined_compile_env, "#f");
result_reg_if_return_from = result_reg_if_return_from = inlined_compile_env->make_gpr(get_none()->type());
inlined_compile_env->make_ireg(get_none()->type(), emitter::RegKind::GPR);
inlined_block_env->return_value = result_reg_if_return_from; inlined_block_env->return_value = result_reg_if_return_from;
inlined_block_env->end_label = Label(fe); inlined_block_env->end_label = Label(fe);
inlined_compile_env = inlined_block_env; inlined_compile_env = inlined_block_env;
@ -474,7 +473,7 @@ Val* Compiler::compile_real_function_call(const goos::Object& form,
return_ts = function->type().last_arg(); return_ts = function->type().last_arg();
} }
auto return_reg = env->make_ireg(return_ts, emitter::RegKind::GPR); auto return_reg = env->make_gpr(return_ts);
// check arg count: // check arg count:
if (function->type().arg_count() && !is_varargs_function(function->type())) { if (function->type().arg_count() && !is_varargs_function(function->type())) {
@ -501,7 +500,7 @@ Val* Compiler::compile_real_function_call(const goos::Object& form,
// set args (introducing a move here makes coloring more likely to be possible) // set args (introducing a move here makes coloring more likely to be possible)
std::vector<RegVal*> arg_outs; std::vector<RegVal*> arg_outs;
for (auto& arg : args) { for (auto& arg : args) {
arg_outs.push_back(env->make_ireg(arg->type(), emitter::RegKind::GPR)); arg_outs.push_back(env->make_gpr(arg->type()));
arg_outs.back()->mark_as_settable(); arg_outs.back()->mark_as_settable();
env->emit(std::make_unique<IR_RegSet>(arg_outs.back(), arg)); env->emit(std::make_unique<IR_RegSet>(arg_outs.back(), arg));
} }

View File

@ -46,7 +46,7 @@ Val* Compiler::number_to_integer(const goos::Object& form, Val* in, Env* env) {
} else if (is_float(ts)) { } else if (is_float(ts)) {
auto fe = get_parent_env_of_type<FunctionEnv>(env); auto fe = get_parent_env_of_type<FunctionEnv>(env);
auto result = fe->make_gpr(m_ts.make_typespec("int")); auto result = fe->make_gpr(m_ts.make_typespec("int"));
env->emit(std::make_unique<IR_FloatToInt>(result, in->to_xmm(env))); env->emit(std::make_unique<IR_FloatToInt>(result, in->to_fpr(env)));
return result; return result;
} else if (is_integer(ts)) { } else if (is_integer(ts)) {
return in; return in;
@ -84,7 +84,7 @@ Val* Compiler::number_to_float(const goos::Object& form, Val* in, Env* env) {
return in; return in;
} else if (is_integer(ts)) { } else if (is_integer(ts)) {
auto fe = get_parent_env_of_type<FunctionEnv>(env); auto fe = get_parent_env_of_type<FunctionEnv>(env);
auto result = fe->make_xmm(m_ts.make_typespec("float")); auto result = fe->make_fpr(m_ts.make_typespec("float"));
env->emit(std::make_unique<IR_IntToFloat>(result, in->to_gpr(env))); env->emit(std::make_unique<IR_IntToFloat>(result, in->to_gpr(env)));
return result; return result;
} }
@ -132,14 +132,14 @@ Val* Compiler::compile_add(const goos::Object& form, const goos::Object& rest, E
} }
case MATH_FLOAT: { case MATH_FLOAT: {
auto result = env->make_xmm(first_type); auto result = env->make_fpr(first_type);
env->emit(std::make_unique<IR_RegSet>(result, first_val->to_xmm(env))); env->emit(std::make_unique<IR_RegSet>(result, first_val->to_fpr(env)));
for (size_t i = 1; i < args.unnamed.size(); i++) { for (size_t i = 1; i < args.unnamed.size(); i++) {
env->emit(std::make_unique<IR_FloatMath>( env->emit(std::make_unique<IR_FloatMath>(
FloatMathKind::ADD_SS, result, FloatMathKind::ADD_SS, result,
to_math_type(form, compile_error_guard(args.unnamed.at(i), env), math_type, env) to_math_type(form, compile_error_guard(args.unnamed.at(i), env), math_type, env)
->to_xmm(env))); ->to_fpr(env)));
} }
return result; return result;
} }
@ -178,14 +178,14 @@ Val* Compiler::compile_mul(const goos::Object& form, const goos::Object& rest, E
return result; return result;
} }
case MATH_FLOAT: { case MATH_FLOAT: {
auto result = env->make_xmm(first_type); auto result = env->make_fpr(first_type);
env->emit(std::make_unique<IR_RegSet>(result, first_val->to_xmm(env))); env->emit(std::make_unique<IR_RegSet>(result, first_val->to_fpr(env)));
for (size_t i = 1; i < args.unnamed.size(); i++) { for (size_t i = 1; i < args.unnamed.size(); i++) {
env->emit(std::make_unique<IR_FloatMath>( env->emit(std::make_unique<IR_FloatMath>(
FloatMathKind::MUL_SS, result, FloatMathKind::MUL_SS, result,
to_math_type(form, compile_error_guard(args.unnamed.at(i), env), math_type, env) to_math_type(form, compile_error_guard(args.unnamed.at(i), env), math_type, env)
->to_xmm(env))); ->to_fpr(env)));
} }
return result; return result;
} }
@ -210,14 +210,14 @@ Val* Compiler::compile_fmin(const goos::Object& form, const goos::Object& rest,
if (get_math_mode(first_val->type()) != MATH_FLOAT) { if (get_math_mode(first_val->type()) != MATH_FLOAT) {
throw_compiler_error(form, "Must use floats in fmin"); throw_compiler_error(form, "Must use floats in fmin");
} }
auto result = env->make_xmm(first_val->type()); auto result = env->make_fpr(first_val->type());
env->emit(std::make_unique<IR_RegSet>(result, first_val->to_xmm(env))); env->emit(std::make_unique<IR_RegSet>(result, first_val->to_fpr(env)));
for (size_t i = 1; i < args.unnamed.size(); i++) { for (size_t i = 1; i < args.unnamed.size(); i++) {
auto val = compile_error_guard(args.unnamed.at(i), env); auto val = compile_error_guard(args.unnamed.at(i), env);
if (get_math_mode(val->type()) != MATH_FLOAT) { if (get_math_mode(val->type()) != MATH_FLOAT) {
throw_compiler_error(form, "Must use floats in fmin"); throw_compiler_error(form, "Must use floats in fmin");
} }
env->emit(std::make_unique<IR_FloatMath>(FloatMathKind::MIN_SS, result, val->to_xmm(env))); env->emit(std::make_unique<IR_FloatMath>(FloatMathKind::MIN_SS, result, val->to_fpr(env)));
} }
return result; return result;
} }
@ -233,14 +233,14 @@ Val* Compiler::compile_fmax(const goos::Object& form, const goos::Object& rest,
if (get_math_mode(first_val->type()) != MATH_FLOAT) { if (get_math_mode(first_val->type()) != MATH_FLOAT) {
throw_compiler_error(form, "Must use floats in fmax"); throw_compiler_error(form, "Must use floats in fmax");
} }
auto result = env->make_xmm(first_val->type()); auto result = env->make_fpr(first_val->type());
env->emit(std::make_unique<IR_RegSet>(result, first_val->to_xmm(env))); env->emit(std::make_unique<IR_RegSet>(result, first_val->to_fpr(env)));
for (size_t i = 1; i < args.unnamed.size(); i++) { for (size_t i = 1; i < args.unnamed.size(); i++) {
auto val = compile_error_guard(args.unnamed.at(i), env); auto val = compile_error_guard(args.unnamed.at(i), env);
if (get_math_mode(val->type()) != MATH_FLOAT) { if (get_math_mode(val->type()) != MATH_FLOAT) {
throw_compiler_error(form, "Must use floats in fmax"); throw_compiler_error(form, "Must use floats in fmax");
} }
env->emit(std::make_unique<IR_FloatMath>(FloatMathKind::MAX_SS, result, val->to_xmm(env))); env->emit(std::make_unique<IR_FloatMath>(FloatMathKind::MAX_SS, result, val->to_fpr(env)));
} }
return result; return result;
} }
@ -316,23 +316,23 @@ Val* Compiler::compile_sub(const goos::Object& form, const goos::Object& rest, E
case MATH_FLOAT: case MATH_FLOAT:
if (args.unnamed.size() == 1) { if (args.unnamed.size() == 1) {
auto result = auto result =
compile_float(0, env, get_parent_env_of_type<FunctionEnv>(env)->segment)->to_xmm(env); compile_float(0, env, get_parent_env_of_type<FunctionEnv>(env)->segment)->to_fpr(env);
env->emit(std::make_unique<IR_FloatMath>( env->emit(std::make_unique<IR_FloatMath>(
FloatMathKind::SUB_SS, result, FloatMathKind::SUB_SS, result,
to_math_type(form, compile_error_guard(args.unnamed.at(0), env), math_type, env) to_math_type(form, compile_error_guard(args.unnamed.at(0), env), math_type, env)
->to_xmm(env))); ->to_fpr(env)));
return result; return result;
} else { } else {
auto result = env->make_xmm(first_type); auto result = env->make_fpr(first_type);
env->emit(std::make_unique<IR_RegSet>( env->emit(std::make_unique<IR_RegSet>(
result, to_math_type(form, compile_error_guard(args.unnamed.at(0), env), math_type, env) result, to_math_type(form, compile_error_guard(args.unnamed.at(0), env), math_type, env)
->to_xmm(env))); ->to_fpr(env)));
for (size_t i = 1; i < args.unnamed.size(); i++) { for (size_t i = 1; i < args.unnamed.size(); i++) {
env->emit(std::make_unique<IR_FloatMath>( env->emit(std::make_unique<IR_FloatMath>(
FloatMathKind::SUB_SS, result, FloatMathKind::SUB_SS, result,
to_math_type(form, compile_error_guard(args.unnamed.at(i), env), math_type, env) to_math_type(form, compile_error_guard(args.unnamed.at(i), env), math_type, env)
->to_xmm(env))); ->to_fpr(env)));
} }
return result; return result;
} }
@ -360,7 +360,7 @@ Val* Compiler::compile_div(const goos::Object& form, const goos::Object& rest, E
case MATH_INT: { case MATH_INT: {
auto fe = get_parent_env_of_type<FunctionEnv>(env); auto fe = get_parent_env_of_type<FunctionEnv>(env);
auto first_thing = first_val->to_gpr(env); auto first_thing = first_val->to_gpr(env);
auto result = env->make_ireg(first_type, emitter::RegKind::GPR); auto result = env->make_gpr(first_type);
env->emit(std::make_unique<IR_RegSet>(result, first_thing)); env->emit(std::make_unique<IR_RegSet>(result, first_thing));
IRegConstraint result_rax_constraint; IRegConstraint result_rax_constraint;
@ -377,12 +377,12 @@ Val* Compiler::compile_div(const goos::Object& form, const goos::Object& rest, E
} }
case MATH_FLOAT: { case MATH_FLOAT: {
auto result = env->make_xmm(first_type); auto result = env->make_fpr(first_type);
env->emit(std::make_unique<IR_RegSet>(result, first_val->to_xmm(env))); env->emit(std::make_unique<IR_RegSet>(result, first_val->to_fpr(env)));
env->emit(std::make_unique<IR_FloatMath>( env->emit(std::make_unique<IR_FloatMath>(
FloatMathKind::DIV_SS, result, FloatMathKind::DIV_SS, result,
to_math_type(form, compile_error_guard(args.unnamed.at(1), env), math_type, env) to_math_type(form, compile_error_guard(args.unnamed.at(1), env), math_type, env)
->to_xmm(env))); ->to_fpr(env)));
return result; return result;
} }

View File

@ -65,7 +65,7 @@ RegVal* Compiler::compile_get_method_of_object(const goos::Object& form,
MemLoadInfo info; MemLoadInfo info;
info.size = 4; info.size = 4;
info.sign_extend = false; info.sign_extend = false;
info.reg = RegKind::GPR_64; info.reg = RegClass::GPR_64;
env->emit(std::make_unique<IR_LoadConstOffset>(runtime_type, -4, object, info)); env->emit(std::make_unique<IR_LoadConstOffset>(runtime_type, -4, object, info));
} else { } else {
// can't look up at runtime // can't look up at runtime
@ -174,8 +174,8 @@ Val* Compiler::generate_inspector_for_type(const goos::Object& form, Env* env, T
method_env->set_segment(DEBUG_SEGMENT); method_env->set_segment(DEBUG_SEGMENT);
// Create a register which will hold the input to the inspect method // Create a register which will hold the input to the inspect method
auto input = method_env->make_ireg(structured_type->get_name(), emitter::RegKind::GPR); auto input = method_env->make_gpr(structured_type->get_name());
// "Constraint" this register to be the register that the function argument is passed in // "Constrain" this register to be the register that the function argument is passed in
IRegConstraint constraint; IRegConstraint constraint;
constraint.instr_idx = 0; // constraint at the start of the function constraint.instr_idx = 0; // constraint at the start of the function
constraint.ireg = input->ireg(); // constrain this register constraint.ireg = input->ireg(); // constrain this register
@ -326,7 +326,7 @@ Val* Compiler::compile_defmethod(const goos::Object& form, const goos::Object& _
for (u32 i = 0; i < lambda.params.size(); i++) { for (u32 i = 0; i < lambda.params.size(); i++) {
IRegConstraint constr; IRegConstraint constr;
constr.instr_idx = 0; // constraint at function start constr.instr_idx = 0; // constraint at function start
auto ireg = new_func_env->make_ireg(lambda.params.at(i).type, emitter::RegKind::GPR); auto ireg = new_func_env->make_gpr(lambda.params.at(i).type);
ireg->mark_as_settable(); ireg->mark_as_settable();
constr.ireg = ireg->ireg(); constr.ireg = ireg->ireg();
constr.desired_register = emitter::gRegInfo.get_arg_reg(i); constr.desired_register = emitter::gRegInfo.get_arg_reg(i);
@ -338,7 +338,7 @@ Val* Compiler::compile_defmethod(const goos::Object& form, const goos::Object& _
place->func = new_func_env.get(); place->func = new_func_env.get();
// nasty function block env setup // nasty function block env setup
auto return_reg = new_func_env->make_ireg(get_none()->type(), emitter::RegKind::GPR); auto return_reg = new_func_env->make_gpr(get_none()->type());
auto func_block_env = new_func_env->alloc_env<BlockEnv>(new_func_env.get(), "#f"); auto func_block_env = new_func_env->alloc_env<BlockEnv>(new_func_env.get(), "#f");
func_block_env->return_value = return_reg; func_block_env->return_value = return_reg;
func_block_env->end_label = Label(new_func_env.get()); func_block_env->end_label = Label(new_func_env.get());

View File

@ -121,7 +121,6 @@ class IGen {
// todo - GPR64 -> XMM64 (zext) // todo - GPR64 -> XMM64 (zext)
// todo - XMM -> GPR64 // todo - XMM -> GPR64
// todo - XMM128 - XMM128
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
// GOAL Loads and Stores // GOAL Loads and Stores
@ -706,6 +705,17 @@ class IGen {
return instr; return instr;
} }
static Instruction store_goal_vf(Register addr, Register value, Register off, s64 offset) {
if (offset == 0) {
return storevf_gpr64_plus_gpr64(value, addr, off);
} else if (offset >= INT8_MIN && offset <= INT8_MAX) {
return storevf_gpr64_plus_gpr64_plus_s8(value, addr, off, offset);
} else if (offset >= INT32_MIN && offset <= INT32_MAX) {
return storevf_gpr64_plus_gpr64_plus_s32(value, addr, off, offset);
}
assert(false);
}
static Instruction store_goal_gpr(Register addr, static Instruction store_goal_gpr(Register addr,
Register value, Register value,
Register off, Register off,
@ -757,6 +767,18 @@ class IGen {
} }
} }
static Instruction load_goal_vf(Register dst, Register addr, Register off, int offset) {
if (offset == 0) {
return loadvf_gpr64_plus_gpr64(dst, addr, off);
} else if (offset >= INT8_MIN && offset <= INT8_MAX) {
return loadvf_gpr64_plus_gpr64_plus_s8(dst, addr, off, offset);
} else if (offset >= INT32_MIN && offset <= INT32_MAX) {
return loadvf_gpr64_plus_gpr64_plus_s32(dst, addr, off, offset);
} else {
assert(false);
}
}
/*! /*!
* Load memory at addr + offset, where addr is a GOAL pointer and off is the offset register. * Load memory at addr + offset, where addr is a GOAL pointer and off is the offset register.
* This will pick the appropriate fancy addressing mode instruction. * This will pick the appropriate fancy addressing mode instruction.
@ -1820,6 +1842,203 @@ class IGen {
i.is_null = true; i.is_null = true;
return i; return i;
} }
/////////////////////////////
// AVX (VF - Vector Float) //
/////////////////////////////
static Instruction mov_vf_vf(Register dst, Register src) {
assert(dst.is_xmm());
assert(src.is_xmm());
if (src.hw_id() >= 8 && dst.hw_id() < 8) {
// in this case, we can use the 0x29 encoding, which swaps src and dst, in order to use the
// 2 byte VEX prefix, where the 0x28 encoding would require an extra byte.
// compilers/assemblers seem to prefer 0x28, unless 0x29 would save you a byte.
Instruction instr(0x29);
instr.set_vex_modrm_and_rex(src.hw_id(), dst.hw_id(), 3, VEX3::LeadingBytes::P_0F, false);
return instr;
} else {
Instruction instr(0x28);
instr.set_vex_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, VEX3::LeadingBytes::P_0F, false);
return instr;
}
}
static Instruction loadvf_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) {
assert(dst.is_xmm());
assert(addr1.is_gpr());
assert(addr2.is_gpr());
assert(addr1 != addr2);
assert(addr1 != RSP);
assert(addr2 != RSP);
Instruction instr(0x28);
instr.set_vex_modrm_and_rex_for_reg_plus_reg_addr(dst.hw_id(), addr1.hw_id(), addr2.hw_id(),
VEX3::LeadingBytes::P_0F, false);
return instr;
}
static Instruction loadvf_gpr64_plus_gpr64_plus_s8(Register dst,
Register addr1,
Register addr2,
s64 offset) {
assert(dst.is_xmm());
assert(addr1.is_gpr());
assert(addr2.is_gpr());
assert(addr1 != addr2);
assert(addr1 != RSP);
assert(addr2 != RSP);
assert(offset >= INT8_MIN && offset <= INT8_MAX);
Instruction instr(0x28);
instr.set_vex_modrm_and_rex_for_reg_plus_reg_plus_s8(dst.hw_id(), addr1.hw_id(), addr2.hw_id(),
offset, VEX3::LeadingBytes::P_0F, false);
return instr;
}
static Instruction loadvf_gpr64_plus_gpr64_plus_s32(Register dst,
Register addr1,
Register addr2,
s64 offset) {
assert(dst.is_xmm());
assert(addr1.is_gpr());
assert(addr2.is_gpr());
assert(addr1 != addr2);
assert(addr1 != RSP);
assert(addr2 != RSP);
assert(offset >= INT32_MIN && offset <= INT32_MAX);
Instruction instr(0x28);
instr.set_vex_modrm_and_rex_for_reg_plus_reg_plus_s32(dst.hw_id(), addr1.hw_id(), addr2.hw_id(),
offset, VEX3::LeadingBytes::P_0F, false);
return instr;
}
static Instruction storevf_gpr64_plus_gpr64(Register value, Register addr1, Register addr2) {
assert(value.is_xmm());
assert(addr1.is_gpr());
assert(addr2.is_gpr());
assert(addr1 != addr2);
assert(addr1 != RSP);
assert(addr2 != RSP);
Instruction instr(0x29);
instr.set_vex_modrm_and_rex_for_reg_plus_reg_addr(value.hw_id(), addr1.hw_id(), addr2.hw_id(),
VEX3::LeadingBytes::P_0F, false);
return instr;
}
static Instruction storevf_gpr64_plus_gpr64_plus_s8(Register value,
Register addr1,
Register addr2,
s64 offset) {
assert(value.is_xmm());
assert(addr1.is_gpr());
assert(addr2.is_gpr());
assert(addr1 != addr2);
assert(addr1 != RSP);
assert(addr2 != RSP);
assert(offset >= INT8_MIN && offset <= INT8_MAX);
Instruction instr(0x29);
instr.set_vex_modrm_and_rex_for_reg_plus_reg_plus_s8(
value.hw_id(), addr1.hw_id(), addr2.hw_id(), offset, VEX3::LeadingBytes::P_0F, false);
return instr;
}
static Instruction storevf_gpr64_plus_gpr64_plus_s32(Register value,
Register addr1,
Register addr2,
s64 offset) {
assert(value.is_xmm());
assert(addr1.is_gpr());
assert(addr2.is_gpr());
assert(addr1 != addr2);
assert(addr1 != RSP);
assert(addr2 != RSP);
assert(offset >= INT32_MIN && offset <= INT32_MAX);
Instruction instr(0x29);
instr.set_vex_modrm_and_rex_for_reg_plus_reg_plus_s32(
value.hw_id(), addr1.hw_id(), addr2.hw_id(), offset, VEX3::LeadingBytes::P_0F, false);
return instr;
}
static Instruction loadvf_rip_plus_s32(Register dest, s64 offset) {
assert(dest.is_xmm());
assert(offset >= INT32_MIN);
assert(offset <= INT32_MAX);
Instruction instr(0x28);
instr.set_vex_modrm_and_rex_for_rip_plus_s32(dest.hw_id(), offset);
return instr;
}
// todo, rip relative loads and stores.
static Instruction mul_vf(Register dst, Register src1, Register src2) {
assert(dst.is_xmm());
assert(src1.is_xmm());
assert(src2.is_xmm());
Instruction instr(0x59);
instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F, src1.hw_id());
return instr;
}
static Instruction shuffle_vf(Register dst, Register src, u8 dx, u8 dy, u8 dz, u8 dw) {
assert(dst.is_xmm());
assert(src.is_xmm());
assert(dx < 4);
assert(dy < 4);
assert(dz < 4);
assert(dw < 4);
u8 imm = dx + (dy << 2) + (dz << 4) + (dw << 6);
// we use the AVX "VEX" encoding here. This is a three-operand form, but we just set both source
// to the same register. It seems like this is one byte longer but is faster maybe?
Instruction instr(0xc6);
instr.set_vex_modrm_and_rex(dst.hw_id(), src.hw_id(), VEX3::LeadingBytes::P_0F, src.hw_id());
instr.set(Imm(1, imm));
return instr;
// SSE encoding version:
// Instruction instr(0x0f);
// instr.set_op2(0xc6);
// instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false);
// instr.set(Imm(1, imm));
// return instr;
}
static Instruction xor_vf(Register dst, Register src1, Register src2) {
assert(dst.is_xmm());
assert(src1.is_xmm());
assert(src2.is_xmm());
Instruction instr(0x57);
instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F, src1.hw_id());
return instr;
}
static Instruction sub_vf(Register dst, Register src1, Register src2) {
assert(dst.is_xmm());
assert(src1.is_xmm());
assert(src2.is_xmm());
Instruction instr(0x5c);
instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F, src1.hw_id());
return instr;
}
static Instruction add_vf(Register dst, Register src1, Register src2) {
assert(dst.is_xmm());
assert(src1.is_xmm());
assert(src2.is_xmm());
Instruction instr(0x58);
instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F, src1.hw_id());
return instr;
}
static Instruction blend_vf(Register dst, Register src1, Register src2, u8 mask) {
assert(!(mask & 0b11110000));
assert(dst.is_xmm());
assert(src1.is_xmm());
assert(src2.is_xmm());
Instruction instr(0x0c);
instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F_3A,
src1.hw_id(), false, VexPrefix::P_66);
instr.set(Imm(1, mask));
return instr;
}
}; };
} // namespace emitter } // namespace emitter

View File

@ -54,6 +54,83 @@ struct REX {
uint8_t operator()() const { return (1 << 6) | (W << 3) | (R << 2) | (X << 1) | (B << 0); } uint8_t operator()() const { return (1 << 6) | (W << 3) | (R << 2) | (X << 1) | (B << 0); }
}; };
enum class VexPrefix : u8 { P_NONE = 0, P_66 = 1, P_F3 = 2, P_F2 = 3 };
/*!
* The "VEX" 3-byte format for AVX instructions
*/
struct VEX3 {
bool W, R, X, B;
enum class LeadingBytes : u8 { P_INVALID = 0, P_0F = 1, P_0F_38 = 2, P_0F_3A = 3 } leading_bytes;
u8 reg_id;
VexPrefix prefix;
bool L;
u8 emit(u8 byte) const {
if (byte == 0) {
return 0b11000100;
} else if (byte == 1) {
u8 result = 0;
result |= ((!R) << 7);
result |= ((!X) << 6);
result |= ((!B) << 5);
result |= (0b11111 & u8(leading_bytes));
return result;
} else if (byte == 2) {
u8 result = 0;
result |= (W << 7); // this may be inverted?
result |= ((~reg_id) & 0b1111) << 3;
result |= (L << 2);
result |= (u8(prefix) & 0b11);
return result;
} else {
assert(false);
}
}
VEX3(bool w,
bool r,
bool x,
bool b,
LeadingBytes _leading_bytes,
u8 _reg_id = 0,
VexPrefix _prefix = VexPrefix::P_NONE,
bool l = false)
: W(w),
R(r),
X(x),
B(b),
leading_bytes(_leading_bytes),
reg_id(_reg_id),
prefix(_prefix),
L(l) {}
};
struct VEX2 {
bool R;
u8 reg_id;
VexPrefix prefix;
bool L;
u8 emit(u8 byte) const {
if (byte == 0) {
return 0b11000101;
} else if (byte == 1) {
u8 result = 0;
result |= ((!R) << 7);
result |= ((~reg_id) & 0b1111) << 3;
result |= (L << 2);
result |= (u8(prefix) & 0b11);
return result;
} else {
assert(false);
}
}
VEX2(bool r, u8 _reg_id = 0, VexPrefix _prefix = VexPrefix::P_NONE, bool l = false)
: R(r), reg_id(_reg_id), prefix(_prefix), L(l) {}
};
/*! /*!
* A high-level description of an x86-64 opcode. It can emit itself. * A high-level description of an x86-64 opcode. It can emit itself.
*/ */
@ -73,6 +150,9 @@ struct Instruction {
// flag to indicate it's the first instruction of a function and needs align and type tag // flag to indicate it's the first instruction of a function and needs align and type tag
bool is_function_start = false; bool is_function_start = false;
int n_vex = 0;
uint8_t vex[3] = {0, 0, 0};
// the rex byte // the rex byte
bool set_rex = false; bool set_rex = false;
uint8_t m_rex = 0; uint8_t m_rex = 0;
@ -93,10 +173,6 @@ struct Instruction {
bool set_imm = false; bool set_imm = false;
Imm imm; Imm imm;
// which IR instruction does this go with?
// this is only set for the first instruction generated from an IR.
int ir_index = -1;
/*! /*!
* Move opcode byte 0 to before the rex prefix. * Move opcode byte 0 to before the rex prefix.
*/ */
@ -123,6 +199,20 @@ struct Instruction {
set_sib = true; set_sib = true;
} }
void set(VEX3 vex3) {
n_vex = 3;
for (int i = 0; i < n_vex; i++) {
vex[i] = vex3.emit(i);
}
}
void set(VEX2 vex2) {
n_vex = 2;
for (int i = 0; i < n_vex; i++) {
vex[i] = vex2.emit(i);
}
}
void set_disp(Imm i) { void set_disp(Imm i) {
disp = i; disp = i;
set_disp_imm = true; set_disp_imm = true;
@ -187,6 +277,78 @@ struct Instruction {
} }
} }
void set_vex_modrm_and_rex(uint8_t reg,
uint8_t rm,
VEX3::LeadingBytes lb,
uint8_t vex_reg = 0,
bool rex_w = false,
VexPrefix prefix = VexPrefix::P_NONE) {
bool rex_b = false, rex_r = false;
if (rm >= 8) {
rm -= 8;
rex_b = true;
}
if (reg >= 8) {
reg -= 8;
rex_r = true;
}
ModRM modrm;
modrm.mod = 3;
modrm.reg_op = reg;
modrm.rm = rm;
set(modrm);
if (rex_b || rex_w || lb != VEX3::LeadingBytes::P_0F) {
// need three byte version
set(VEX3(rex_w, rex_r, false, rex_b, lb, vex_reg, prefix));
} else {
assert(lb == VEX3::LeadingBytes::P_0F); // vex2 implies 0x0f
assert(!rex_b);
assert(!rex_w);
set(VEX2(rex_r, vex_reg, prefix));
}
}
/*!
* Set VEX prefix for REX as needed for two registers.
*/
void set_vex_modrm_and_rex(uint8_t reg,
uint8_t rm,
uint8_t mod,
VEX3::LeadingBytes lb,
bool rex_w = false) {
bool rex_b = false;
bool rex_r = false;
if (rm >= 8) {
rm -= 8;
rex_b = true;
}
if (reg >= 8) {
reg -= 8;
rex_r = true;
}
ModRM modrm;
modrm.mod = mod;
modrm.reg_op = reg;
modrm.rm = rm;
set(modrm);
if (rex_b || rex_w || lb != VEX3::LeadingBytes::P_0F) {
// need three byte version
set(VEX3(rex_w, rex_r, false, rex_b, lb));
} else {
// can get away with two byte version
assert(lb == VEX3::LeadingBytes::P_0F); // vex2 implies 0x0f
assert(!rex_b);
assert(!rex_w);
set(VEX2(rex_r));
}
}
void set_modrm_and_rex_for_reg_plus_reg_plus_s8(uint8_t reg, void set_modrm_and_rex_for_reg_plus_reg_plus_s8(uint8_t reg,
uint8_t addr1, uint8_t addr1,
uint8_t addr2, uint8_t addr2,
@ -245,6 +407,72 @@ struct Instruction {
set_disp(imm2); set_disp(imm2);
} }
void set_vex_modrm_and_rex_for_reg_plus_reg_plus_s8(uint8_t reg,
uint8_t addr1,
uint8_t addr2,
s8 offset,
VEX3::LeadingBytes lb,
bool rex_w) {
bool rex_b = false, rex_r = false, rex_x = false;
bool addr1_ext = false;
bool addr2_ext = false;
if (addr1 >= 8) {
addr1 -= 8;
addr1_ext = true;
}
if (addr2 >= 8) {
addr2 -= 8;
addr2_ext = true;
}
if (reg >= 8) {
reg -= 8;
rex_r = true;
}
ModRM modrm;
modrm.mod = 1; // no disp
modrm.rm = 4; // sib!
modrm.reg_op = reg;
SIB sib;
sib.scale = 0;
Imm imm2(1, offset);
// default addr1 in index
if (addr1 == 4) {
sib.index = addr2;
sib.base = addr1;
rex_x = addr2_ext;
rex_b = addr1_ext;
} else {
// addr1 in index
sib.index = addr1;
sib.base = addr2;
rex_x = addr1_ext;
rex_b = addr2_ext;
}
assert(sib.index != 4);
if (rex_b || rex_w || rex_x || lb != VEX3::LeadingBytes::P_0F) {
// need three byte version
set(VEX3(rex_w, rex_r, rex_x, rex_b, lb));
} else {
assert(lb == VEX3::LeadingBytes::P_0F); // vex2 implies 0x0f
assert(!rex_b);
assert(!rex_w);
assert(!rex_x);
set(VEX2(rex_r));
}
set(modrm);
set(sib);
set_disp(imm2);
}
void set_modrm_and_rex_for_reg_plus_reg_plus_s32(uint8_t reg, void set_modrm_and_rex_for_reg_plus_reg_plus_s32(uint8_t reg,
uint8_t addr1, uint8_t addr1,
uint8_t addr2, uint8_t addr2,
@ -303,6 +531,72 @@ struct Instruction {
set_disp(imm2); set_disp(imm2);
} }
void set_vex_modrm_and_rex_for_reg_plus_reg_plus_s32(uint8_t reg,
uint8_t addr1,
uint8_t addr2,
s32 offset,
VEX3::LeadingBytes lb,
bool rex_w) {
bool rex_b = false, rex_r = false, rex_x = false;
bool addr1_ext = false;
bool addr2_ext = false;
if (addr1 >= 8) {
addr1 -= 8;
addr1_ext = true;
}
if (addr2 >= 8) {
addr2 -= 8;
addr2_ext = true;
}
if (reg >= 8) {
reg -= 8;
rex_r = true;
}
ModRM modrm;
modrm.mod = 2; // no disp
modrm.rm = 4; // sib!
modrm.reg_op = reg;
SIB sib;
sib.scale = 0;
Imm imm2(4, offset);
// default addr1 in index
if (addr1 == 4) {
sib.index = addr2;
sib.base = addr1;
rex_x = addr2_ext;
rex_b = addr1_ext;
} else {
// addr1 in index
sib.index = addr1;
sib.base = addr2;
rex_x = addr1_ext;
rex_b = addr2_ext;
}
assert(sib.index != 4);
if (rex_b || rex_w || rex_x || lb != VEX3::LeadingBytes::P_0F) {
// need three byte version
set(VEX3(rex_w, rex_r, rex_x, rex_b, lb));
} else {
assert(lb == VEX3::LeadingBytes::P_0F); // vex2 implies 0x0f
assert(!rex_b);
assert(!rex_w);
assert(!rex_x);
set(VEX2(rex_r));
}
set(modrm);
set(sib);
set_disp(imm2);
}
void set_modrm_and_rex_for_reg_plus_reg_addr(uint8_t reg, void set_modrm_and_rex_for_reg_plus_reg_addr(uint8_t reg,
uint8_t addr1, uint8_t addr1,
uint8_t addr2, uint8_t addr2,
@ -371,6 +665,81 @@ struct Instruction {
set(sib); set(sib);
} }
void set_vex_modrm_and_rex_for_reg_plus_reg_addr(uint8_t reg,
uint8_t addr1,
uint8_t addr2,
VEX3::LeadingBytes lb,
bool rex_w = false) {
bool rex_b = false, rex_r = false, rex_x = false;
bool addr1_ext = false;
bool addr2_ext = false;
if (addr1 >= 8) {
addr1 -= 8;
addr1_ext = true;
}
if (addr2 >= 8) {
addr2 -= 8;
addr2_ext = true;
}
if (reg >= 8) {
reg -= 8;
rex_r = true;
}
ModRM modrm;
modrm.mod = 0; // no disp
modrm.rm = 4; // sib!
modrm.reg_op = reg;
SIB sib;
sib.scale = 0;
if (addr1 == 5 && addr2 == 5) {
sib.index = addr1;
sib.base = addr2;
rex_x = addr1_ext;
rex_b = addr2_ext;
modrm.mod = 1;
set_disp(Imm(1, 0));
} else {
// default addr1 in index
bool flipped = (addr1 == 4) || (addr2 == 5);
if (flipped) {
sib.index = addr2;
sib.base = addr1;
rex_x = addr2_ext;
rex_b = addr1_ext;
} else {
// addr1 in index
sib.index = addr1;
sib.base = addr2;
rex_x = addr1_ext;
rex_b = addr2_ext;
}
assert(sib.base != 5);
assert(sib.index != 4);
}
if (rex_b || rex_w || rex_x || lb != VEX3::LeadingBytes::P_0F) {
// need three byte version
set(VEX3(rex_w, rex_r, rex_x, rex_b, lb));
} else {
assert(lb == VEX3::LeadingBytes::P_0F); // vex2 implies 0x0f
assert(!rex_b);
assert(!rex_w);
assert(!rex_x);
set(VEX2(rex_r));
}
set(modrm);
set(sib);
}
/*! /*!
* Set modrm and rex as needed for two regs for an addressing mode. * Set modrm and rex as needed for two regs for an addressing mode.
* Will set SIB if R12 or RSP indexing is used. * Will set SIB if R12 or RSP indexing is used.
@ -440,6 +809,35 @@ struct Instruction {
} }
} }
void set_vex_modrm_and_rex_for_rip_plus_s32(uint8_t reg,
s32 offset,
VEX3::LeadingBytes lb = VEX3::LeadingBytes::P_0F,
bool rex_w = false) {
bool rex_r = false;
if (reg >= 8) {
reg -= 8;
rex_r = true;
}
ModRM modrm;
modrm.mod = 0;
modrm.reg_op = reg;
modrm.rm = 5; // use the RIP addressing mode
set(modrm);
if (rex_w || lb != VEX3::LeadingBytes::P_0F) {
// need three byte version
set(VEX3(rex_w, rex_r, false, false, lb));
} else {
assert(lb == VEX3::LeadingBytes::P_0F); // vex2 implies 0x0f
assert(!rex_w);
set(VEX2(rex_r));
}
set_disp(Imm(4, offset));
}
/*! /*!
* Set up modrm and rex for the commonly used 32-bit immediate displacement indexing mode. * Set up modrm and rex for the commonly used 32-bit immediate displacement indexing mode.
*/ */
@ -484,6 +882,7 @@ struct Instruction {
return 0; return 0;
assert(set_disp_imm); assert(set_disp_imm);
int offset = 0; int offset = 0;
offset += n_vex;
if (set_rex) if (set_rex)
offset++; offset++;
offset++; // opcode offset++; // opcode
@ -506,6 +905,7 @@ struct Instruction {
return 0; return 0;
assert(set_imm); assert(set_imm);
int offset = 0; int offset = 0;
offset += n_vex;
if (set_rex) if (set_rex)
offset++; offset++;
offset++; // opcode offset++; // opcode
@ -529,6 +929,11 @@ struct Instruction {
if (is_null) if (is_null)
return 0; return 0;
uint8_t count = 0; uint8_t count = 0;
for (int i = 0; i < n_vex; i++) {
buffer[count++] = vex[i];
}
if (set_rex) { if (set_rex) {
buffer[count++] = m_rex; buffer[count++] = m_rex;
} }
@ -569,6 +974,9 @@ struct Instruction {
if (is_null) if (is_null)
return 0; return 0;
uint8_t count = 0; uint8_t count = 0;
count += n_vex;
if (set_rex) { if (set_rex) {
count++; count++;
} }

View File

@ -70,14 +70,27 @@ RegisterInfo RegisterInfo::make_register_info() {
RegisterInfo gRegInfo = RegisterInfo::make_register_info(); RegisterInfo gRegInfo = RegisterInfo::make_register_info();
std::string to_string(RegKind kind) { std::string to_string(HWRegKind kind) {
switch (kind) { switch (kind) {
case RegKind::GPR: case HWRegKind::GPR:
return "gpr"; return "gpr";
case RegKind::XMM: case HWRegKind::XMM:
return "xmm"; return "xmm";
default: default:
throw std::runtime_error("Unsupported RegKind"); throw std::runtime_error("Unsupported HWRegKind");
}
}
HWRegKind reg_class_to_hw(RegClass reg_class) {
switch (reg_class) {
case RegClass::VECTOR_FLOAT:
case RegClass::FLOAT:
case RegClass::INT_128:
return HWRegKind::XMM;
case RegClass::GPR_64:
return HWRegKind::GPR;
default:
assert(false);
} }
} }

View File

@ -13,12 +13,13 @@
#include <vector> #include <vector>
#include <string> #include <string>
#include "common/common_types.h" #include "common/common_types.h"
#include "common/goal_constants.h"
namespace emitter { namespace emitter {
enum class RegKind : u8 { GPR, XMM, INVALID }; enum class HWRegKind : u8 { GPR, XMM, INVALID };
HWRegKind reg_class_to_hw(RegClass reg_class);
std::string to_string(RegKind kind); std::string to_string(HWRegKind kind);
constexpr int GPR_SIZE = 8; constexpr int GPR_SIZE = 8;
constexpr int XMM_SIZE = 16; constexpr int XMM_SIZE = 16;

View File

@ -618,13 +618,14 @@ int get_stack_slot_for_var(int var, RegAllocCache* cache) {
const std::vector<emitter::Register>& get_default_alloc_order_for_var_spill(int v, const std::vector<emitter::Register>& get_default_alloc_order_for_var_spill(int v,
RegAllocCache* cache) { RegAllocCache* cache) {
auto& info = cache->iregs.at(v); auto& info = cache->iregs.at(v);
assert(info.kind != emitter::RegKind::INVALID); assert(info.reg_class != RegClass::INVALID);
if (info.kind == emitter::RegKind::GPR) { auto hw_kind = emitter::reg_class_to_hw(info.reg_class);
if (hw_kind == emitter::HWRegKind::GPR) {
return emitter::gRegInfo.get_gpr_spill_alloc_order(); return emitter::gRegInfo.get_gpr_spill_alloc_order();
} else if (info.kind == emitter::RegKind::XMM) { } else if (hw_kind == emitter::HWRegKind::XMM) {
return emitter::gRegInfo.get_xmm_spill_alloc_order(); return emitter::gRegInfo.get_xmm_spill_alloc_order();
} else { } else {
throw std::runtime_error("Unsupported RegKind"); throw std::runtime_error("Unsupported HWRegKind");
} }
} }
@ -632,22 +633,22 @@ const std::vector<emitter::Register>& get_default_alloc_order_for_var(int v,
RegAllocCache* cache, RegAllocCache* cache,
bool get_all) { bool get_all) {
auto& info = cache->iregs.at(v); auto& info = cache->iregs.at(v);
// todo fix this. assert(info.reg_class != RegClass::INVALID);
// assert(info.kind != emitter::RegKind::INVALID); auto hw_kind = emitter::reg_class_to_hw(info.reg_class);
if (info.kind == emitter::RegKind::GPR || info.kind == emitter::RegKind::INVALID) { if (hw_kind == emitter::HWRegKind::GPR || hw_kind == emitter::HWRegKind::INVALID) {
if (!get_all && cache->is_asm_func) { if (!get_all && cache->is_asm_func) {
return emitter::gRegInfo.get_gpr_temp_alloc_order(); return emitter::gRegInfo.get_gpr_temp_alloc_order();
} else { } else {
return emitter::gRegInfo.get_gpr_alloc_order(); return emitter::gRegInfo.get_gpr_alloc_order();
} }
} else if (info.kind == emitter::RegKind::XMM) { } else if (hw_kind == emitter::HWRegKind::XMM) {
if (!get_all && cache->is_asm_func) { if (!get_all && cache->is_asm_func) {
return emitter::gRegInfo.get_xmm_temp_alloc_order(); return emitter::gRegInfo.get_xmm_temp_alloc_order();
} else { } else {
return emitter::gRegInfo.get_xmm_alloc_order(); return emitter::gRegInfo.get_xmm_alloc_order();
} }
} else { } else {
throw std::runtime_error("Unsupported RegKind"); throw std::runtime_error("Unsupported HWRegKind");
} }
} }

View File

@ -1,3 +1,4 @@
#include <cassert>
#include "third-party/fmt/core.h" #include "third-party/fmt/core.h"
#include "IRegister.h" #include "IRegister.h"
@ -10,8 +11,18 @@ std::string IRegister::to_string() const {
// } // }
// return result; // return result;
// } else { // } else {
return fmt::format("i{}-{}", emitter::to_string(kind), id); switch (reg_class) {
// } case RegClass::GPR_64:
return fmt::format("igpr-{}", id);
case RegClass::FLOAT:
return fmt::format("ifpr-{}", id);
case RegClass::INT_128:
return fmt::format("ii128-{}", id);
case RegClass::VECTOR_FLOAT:
return fmt::format("ivf-{}", id);
default:
assert(false);
}
} }
std::string IRegConstraint::to_string() const { std::string IRegConstraint::to_string() const {

View File

@ -4,15 +4,12 @@
* IRegister is the Register for the Intermediate Representation. * IRegister is the Register for the Intermediate Representation.
*/ */
#ifndef JAK_IREGISTER_H
#define JAK_IREGISTER_H
#include <string> #include <string>
#include <vector> #include <vector>
#include "goalc/emitter/Register.h" #include "goalc/emitter/Register.h"
struct IRegister { struct IRegister {
emitter::RegKind kind = emitter::RegKind::INVALID; RegClass reg_class = RegClass::INVALID;
int id = -1; int id = -1;
std::string to_string() const; std::string to_string() const;
struct hash { struct hash {
@ -27,5 +24,3 @@ struct IRegConstraint {
emitter::Register desired_register; emitter::Register desired_register;
std::string to_string() const; std::string to_string() const;
}; };
#endif // JAK_IREGISTER_H

View File

@ -13,6 +13,7 @@ add_executable(goalc-test
test_type_system.cpp test_type_system.cpp
test_CodeTester.cpp test_CodeTester.cpp
test_emitter.cpp test_emitter.cpp
test_emitter_avx.cpp
test_common_util.cpp test_common_util.cpp
test_pretty_print.cpp test_pretty_print.cpp
test_zydis.cpp test_zydis.cpp

View File

@ -0,0 +1,21 @@
(defun test-basic-vector-math ()
(let ((vector-0 (new 'stack 'vector))
(vector-1 (new 'stack 'vector))
(vector-2 (new 'stack 'vector)))
(set! (-> vector-0 x) 1.0)
(set! (-> vector-0 y) 2.0)
(set! (-> vector-0 z) 3.0)
(set! (-> vector-0 w) 4.0)
(set! (-> vector-1 x) 10.0)
(set! (-> vector-1 y) 20.0)
(set! (-> vector-1 z) 30.0)
(set! (-> vector-1 w) 40.1)
(vector-! vector-2 vector-1 vector-0)
; 9 + 18 + 27 = 54.0000
(format #t "~f~%" (+ (-> vector-2 x) (-> vector-2 y) (-> vector-2 z) (-> vector-2 w)))
)
)
(test-basic-vector-math)

View File

@ -0,0 +1,7 @@
(define my-vector (new 'global 'vector))
(rlet ((vf1 :class vf :reset-here #t))
(.lvf vf1 (new 'static 'vector :x 1.0 :y 1.2 :z 1.5 :w 1.6))
(.svf my-vector vf1)
)
(format #t "~f~%" (+ (-> my-vector x) (-> my-vector y) (-> my-vector z) (-> my-vector w)))

View File

@ -0,0 +1,21 @@
(defun vf-test-load-and-store ()
(let ((vector-0 (new 'stack 'vector))
(vector-1 (new 'stack 'vector4s-3)))
(set! (-> vector-0 x) 1.0)
(set! (-> vector-0 y) 2.0)
(set! (-> vector-0 z) 3.0)
(set! (-> vector-0 w) 4.0)
(rlet ((vf1 :class vf :reset-here #t)
(vf2 :class vf :reg xmm1 :reset-here #t))
(.lvf vf1 vector-0)
(.mov vf2 vf1)
(.svf (-> vector-1 vector 0) vf2)
)
(-> vector-1 vector 0 y)
)
)
(format #t "~f~%" (vf-test-load-and-store))
0

View File

@ -345,6 +345,18 @@ TEST_F(WithGameTests, StaticBoxedArray) {
{"4 asdf \"test\" (a b) 0 object 12 12\n0\n"}); {"4 asdf \"test\" (a b) 0 object 12 12\n0\n"});
} }
TEST_F(WithGameTests, VFLoadAndStore) {
runner.run_static_test(env, testCategory, "test-vf-load-and-store.gc", {"2.0000\n0\n"});
}
TEST_F(WithGameTests, VFSimpleMath) {
runner.run_static_test(env, testCategory, "test-basic-vector-math.gc", {"54.0000\n0\n"});
}
TEST_F(WithGameTests, VFLoadStatic) {
runner.run_static_test(env, testCategory, "test-load-static-vector.gc", {"5.3000\n0\n"});
}
TEST(TypeConsistency, TypeConsistency) { TEST(TypeConsistency, TypeConsistency) {
Compiler compiler; Compiler compiler;
compiler.enable_throw_on_redefines(); compiler.enable_throw_on_redefines();

211
test/test_emitter_avx.cpp Normal file
View File

@ -0,0 +1,211 @@
#include "gtest/gtest.h"
#include "goalc/emitter/CodeTester.h"
#include "goalc/emitter/IGen.h"
using namespace emitter;
TEST(EmitterAVX, MOV_VF) {
CodeTester tester;
tester.init_code_buffer(10000);
for (int i = 0; i < 16; i++) {
for (int j = 0; j < 16; j++) {
tester.emit(IGen::mov_vf_vf(XMM0 + i, XMM0 + j));
}
}
EXPECT_EQ(
tester.dump_to_hex_string(true),
"C5F828C0C5F828C1C5F828C2C5F828C3C5F828C4C5F828C5C5F828C6C5F828C7C57829C0C57829C8C57829D0C578"
"29D8C57829E0C57829E8C57829F0C57829F8C5F828C8C5F828C9C5F828CAC5F828CBC5F828CCC5F828CDC5F828CE"
"C5F828CFC57829C1C57829C9C57829D1C57829D9C57829E1C57829E9C57829F1C57829F9C5F828D0C5F828D1C5F8"
"28D2C5F828D3C5F828D4C5F828D5C5F828D6C5F828D7C57829C2C57829CAC57829D2C57829DAC57829E2C57829EA"
"C57829F2C57829FAC5F828D8C5F828D9C5F828DAC5F828DBC5F828DCC5F828DDC5F828DEC5F828DFC57829C3C578"
"29CBC57829D3C57829DBC57829E3C57829EBC57829F3C57829FBC5F828E0C5F828E1C5F828E2C5F828E3C5F828E4"
"C5F828E5C5F828E6C5F828E7C57829C4C57829CCC57829D4C57829DCC57829E4C57829ECC57829F4C57829FCC5F8"
"28E8C5F828E9C5F828EAC5F828EBC5F828ECC5F828EDC5F828EEC5F828EFC57829C5C57829CDC57829D5C57829DD"
"C57829E5C57829EDC57829F5C57829FDC5F828F0C5F828F1C5F828F2C5F828F3C5F828F4C5F828F5C5F828F6C5F8"
"28F7C57829C6C57829CEC57829D6C57829DEC57829E6C57829EEC57829F6C57829FEC5F828F8C5F828F9C5F828FA"
"C5F828FBC5F828FCC5F828FDC5F828FEC5F828FFC57829C7C57829CFC57829D7C57829DFC57829E7C57829EFC578"
"29F7C57829FFC57828C0C57828C1C57828C2C57828C3C57828C4C57828C5C57828C6C57828C7C4417828C0C44178"
"28C1C4417828C2C4417828C3C4417828C4C4417828C5C4417828C6C4417828C7C57828C8C57828C9C57828CAC578"
"28CBC57828CCC57828CDC57828CEC57828CFC4417828C8C4417828C9C4417828CAC4417828CBC4417828CCC44178"
"28CDC4417828CEC4417828CFC57828D0C57828D1C57828D2C57828D3C57828D4C57828D5C57828D6C57828D7C441"
"7828D0C4417828D1C4417828D2C4417828D3C4417828D4C4417828D5C4417828D6C4417828D7C57828D8C57828D9"
"C57828DAC57828DBC57828DCC57828DDC57828DEC57828DFC4417828D8C4417828D9C4417828DAC4417828DBC441"
"7828DCC4417828DDC4417828DEC4417828DFC57828E0C57828E1C57828E2C57828E3C57828E4C57828E5C57828E6"
"C57828E7C4417828E0C4417828E1C4417828E2C4417828E3C4417828E4C4417828E5C4417828E6C4417828E7C578"
"28E8C57828E9C57828EAC57828EBC57828ECC57828EDC57828EEC57828EFC4417828E8C4417828E9C4417828EAC4"
"417828EBC4417828ECC4417828EDC4417828EEC4417828EFC57828F0C57828F1C57828F2C57828F3C57828F4C578"
"28F5C57828F6C57828F7C4417828F0C4417828F1C4417828F2C4417828F3C4417828F4C4417828F5C4417828F6C4"
"417828F7C57828F8C57828F9C57828FAC57828FBC57828FCC57828FDC57828FEC57828FFC4417828F8C4417828F9"
"C4417828FAC4417828FBC4417828FCC4417828FDC4417828FEC4417828FF");
}
TEST(EmitterAVX, LoadVF_Reg) {
CodeTester tester;
tester.init_code_buffer(1024);
tester.emit(IGen::loadvf_gpr64_plus_gpr64(XMM0 + 3, RSI, R15));
tester.emit(IGen::loadvf_gpr64_plus_gpr64(XMM0 + 3, R12, R15));
tester.emit(IGen::loadvf_gpr64_plus_gpr64(XMM0 + 13, RSI, R15));
tester.emit(IGen::loadvf_gpr64_plus_gpr64(XMM0 + 13, R12, R15));
EXPECT_EQ(tester.dump_to_hex_string(true), "C4C178281C37C48178281C3CC44178282C37C40178282C3C");
}
TEST(EmitterAVX, LoadVF_RegS8) {
CodeTester tester;
tester.init_code_buffer(1024);
tester.emit(IGen::loadvf_gpr64_plus_gpr64_plus_s8(XMM0 + 3, RSI, R15, -3));
tester.emit(IGen::loadvf_gpr64_plus_gpr64_plus_s8(XMM0 + 3, R12, R15, -3));
tester.emit(IGen::loadvf_gpr64_plus_gpr64_plus_s8(XMM0 + 13, RSI, R15, -3));
tester.emit(IGen::loadvf_gpr64_plus_gpr64_plus_s8(XMM0 + 13, R12, R15, -3));
EXPECT_EQ(tester.dump_to_hex_string(true),
"C4C178285C37FDC48178285C3CFDC44178286C37FDC40178286C3CFD");
}
TEST(EmitterAVX, LoadVF_RegS32) {
CodeTester tester;
tester.init_code_buffer(1024);
tester.emit(IGen::loadvf_gpr64_plus_gpr64_plus_s32(XMM0 + 3, RSI, R15, -0x100));
tester.emit(IGen::loadvf_gpr64_plus_gpr64_plus_s32(XMM0 + 3, R12, R15, -0x100));
tester.emit(IGen::loadvf_gpr64_plus_gpr64_plus_s32(XMM0 + 13, RSI, R15, -0x100));
tester.emit(IGen::loadvf_gpr64_plus_gpr64_plus_s32(XMM0 + 13, R12, R15, -0x100));
EXPECT_EQ(tester.dump_to_hex_string(true),
"C4C178289C3700FFFFFFC48178289C3C00FFFFFFC4417828AC3700FFFFFFC4017828AC3C00FFFFFF");
}
TEST(EmitterAVX, StoreVF_Reg) {
CodeTester tester;
tester.init_code_buffer(1024);
tester.emit(IGen::storevf_gpr64_plus_gpr64(XMM0 + 3, RSI, R15));
tester.emit(IGen::storevf_gpr64_plus_gpr64(XMM0 + 3, R12, R15));
tester.emit(IGen::storevf_gpr64_plus_gpr64(XMM0 + 13, RSI, R15));
tester.emit(IGen::storevf_gpr64_plus_gpr64(XMM0 + 13, R12, R15));
EXPECT_EQ(tester.dump_to_hex_string(true), "C4C178291C37C48178291C3CC44178292C37C40178292C3C");
}
TEST(EmitterAVX, StoreVF_RegS8) {
CodeTester tester;
tester.init_code_buffer(1024);
tester.emit(IGen::storevf_gpr64_plus_gpr64_plus_s8(XMM0 + 3, RSI, R15, -3));
tester.emit(IGen::storevf_gpr64_plus_gpr64_plus_s8(XMM0 + 3, R12, R15, -3));
tester.emit(IGen::storevf_gpr64_plus_gpr64_plus_s8(XMM0 + 13, RSI, R15, -3));
tester.emit(IGen::storevf_gpr64_plus_gpr64_plus_s8(XMM0 + 13, R12, R15, -3));
EXPECT_EQ(tester.dump_to_hex_string(true),
"C4C178295C37FDC48178295C3CFDC44178296C37FDC40178296C3CFD");
}
TEST(EmitterAVX, StoreVF_RegS32) {
CodeTester tester;
tester.init_code_buffer(1024);
tester.emit(IGen::storevf_gpr64_plus_gpr64_plus_s32(XMM0 + 3, RSI, R15, -0x100));
tester.emit(IGen::storevf_gpr64_plus_gpr64_plus_s32(XMM0 + 3, R12, R15, -0x100));
tester.emit(IGen::storevf_gpr64_plus_gpr64_plus_s32(XMM0 + 13, RSI, R15, -0x100));
tester.emit(IGen::storevf_gpr64_plus_gpr64_plus_s32(XMM0 + 13, R12, R15, -0x100));
EXPECT_EQ(tester.dump_to_hex_string(true),
"C4C178299C3700FFFFFFC48178299C3C00FFFFFFC4417829AC3700FFFFFFC4017829AC3C00FFFFFF");
}
TEST(EmitterAVX, MulVF) {
CodeTester tester;
tester.init_code_buffer(1024);
tester.emit(IGen::mul_vf(XMM0 + 3, XMM0 + 3, XMM0 + 3));
tester.emit(IGen::mul_vf(XMM0 + 3, XMM0 + 3, XMM0 + 13));
tester.emit(IGen::mul_vf(XMM0 + 3, XMM0 + 13, XMM0 + 3));
tester.emit(IGen::mul_vf(XMM0 + 3, XMM0 + 13, XMM0 + 13));
tester.emit(IGen::mul_vf(XMM0 + 13, XMM0 + 3, XMM0 + 3));
tester.emit(IGen::mul_vf(XMM0 + 13, XMM0 + 3, XMM0 + 13));
tester.emit(IGen::mul_vf(XMM0 + 13, XMM0 + 13, XMM0 + 3));
tester.emit(IGen::mul_vf(XMM0 + 13, XMM0 + 13, XMM0 + 13));
EXPECT_EQ(tester.dump_to_hex_string(true),
"C5E059DBC4C16059DDC59059DBC4C11059DDC56059EBC4416059EDC51059EBC4411059ED");
}
TEST(EmitterAVX, ShuffleVF) {
CodeTester tester;
tester.init_code_buffer(1024);
tester.emit(IGen::shuffle_vf(XMM0 + 3, XMM0 + 4, 1, 3, 2, 1));
tester.emit(IGen::shuffle_vf(XMM0 + 3, XMM0 + 14, 1, 3, 2, 1));
tester.emit(IGen::shuffle_vf(XMM0 + 13, XMM0 + 4, 1, 3, 2, 1));
tester.emit(IGen::shuffle_vf(XMM0 + 13, XMM0 + 14, 1, 3, 2, 1));
EXPECT_EQ(tester.dump_to_hex_string(true), "C5D8C6DC6DC4C108C6DE6DC558C6EC6DC44108C6EE6D");
}
TEST(EmitterAVX, XorVF) {
CodeTester tester;
tester.init_code_buffer(1024);
tester.emit(IGen::xor_vf(XMM0 + 3, XMM0 + 3, XMM0 + 3));
tester.emit(IGen::xor_vf(XMM0 + 3, XMM0 + 3, XMM0 + 13));
tester.emit(IGen::xor_vf(XMM0 + 3, XMM0 + 13, XMM0 + 3));
tester.emit(IGen::xor_vf(XMM0 + 3, XMM0 + 13, XMM0 + 13));
tester.emit(IGen::xor_vf(XMM0 + 13, XMM0 + 3, XMM0 + 3));
tester.emit(IGen::xor_vf(XMM0 + 13, XMM0 + 3, XMM0 + 13));
tester.emit(IGen::xor_vf(XMM0 + 13, XMM0 + 13, XMM0 + 3));
tester.emit(IGen::xor_vf(XMM0 + 13, XMM0 + 13, XMM0 + 13));
EXPECT_EQ(tester.dump_to_hex_string(true),
"C5E057DBC4C16057DDC59057DBC4C11057DDC56057EBC4416057EDC51057EBC4411057ED");
}
TEST(EmitterAVX, SubVF) {
CodeTester tester;
tester.init_code_buffer(1024);
tester.emit(IGen::sub_vf(XMM0 + 3, XMM0 + 3, XMM0 + 3));
tester.emit(IGen::sub_vf(XMM0 + 3, XMM0 + 3, XMM0 + 13));
tester.emit(IGen::sub_vf(XMM0 + 3, XMM0 + 13, XMM0 + 3));
tester.emit(IGen::sub_vf(XMM0 + 3, XMM0 + 13, XMM0 + 13));
tester.emit(IGen::sub_vf(XMM0 + 13, XMM0 + 3, XMM0 + 3));
tester.emit(IGen::sub_vf(XMM0 + 13, XMM0 + 3, XMM0 + 13));
tester.emit(IGen::sub_vf(XMM0 + 13, XMM0 + 13, XMM0 + 3));
tester.emit(IGen::sub_vf(XMM0 + 13, XMM0 + 13, XMM0 + 13));
EXPECT_EQ(tester.dump_to_hex_string(true),
"C5E05CDBC4C1605CDDC5905CDBC4C1105CDDC5605CEBC441605CEDC5105CEBC441105CED");
}
TEST(EmitterAVX, AddVF) {
CodeTester tester;
tester.init_code_buffer(1024);
tester.emit(IGen::add_vf(XMM0 + 3, XMM0 + 3, XMM0 + 3));
tester.emit(IGen::add_vf(XMM0 + 3, XMM0 + 3, XMM0 + 13));
tester.emit(IGen::add_vf(XMM0 + 3, XMM0 + 13, XMM0 + 3));
tester.emit(IGen::add_vf(XMM0 + 3, XMM0 + 13, XMM0 + 13));
tester.emit(IGen::add_vf(XMM0 + 13, XMM0 + 3, XMM0 + 3));
tester.emit(IGen::add_vf(XMM0 + 13, XMM0 + 3, XMM0 + 13));
tester.emit(IGen::add_vf(XMM0 + 13, XMM0 + 13, XMM0 + 3));
tester.emit(IGen::add_vf(XMM0 + 13, XMM0 + 13, XMM0 + 13));
EXPECT_EQ(tester.dump_to_hex_string(true),
"C5E058DBC4C16058DDC59058DBC4C11058DDC56058EBC4416058EDC51058EBC4411058ED");
}
TEST(EmitterAVX, BlendVF) {
CodeTester tester;
tester.init_code_buffer(1024);
tester.emit(IGen::blend_vf(XMM0 + 3, XMM0 + 3, XMM0 + 3, 3));
tester.emit(IGen::blend_vf(XMM0 + 3, XMM0 + 3, XMM0 + 13, 3));
tester.emit(IGen::blend_vf(XMM0 + 3, XMM0 + 13, XMM0 + 3, 3));
tester.emit(IGen::blend_vf(XMM0 + 3, XMM0 + 13, XMM0 + 13, 3));
tester.emit(IGen::blend_vf(XMM0 + 13, XMM0 + 3, XMM0 + 3, 3));
tester.emit(IGen::blend_vf(XMM0 + 13, XMM0 + 3, XMM0 + 13, 3));
tester.emit(IGen::blend_vf(XMM0 + 13, XMM0 + 13, XMM0 + 3, 3));
tester.emit(IGen::blend_vf(XMM0 + 13, XMM0 + 13, XMM0 + 13, 3));
EXPECT_EQ(tester.dump_to_hex_string(true),
"C4E3610CDB03C4C3610CDD03C4E3110CDB03C4C3110CDD03C463610CEB03C443610CED03C463110CEB03C4"
"43110CED03");
}
TEST(EmitterAVX, RIP) {
CodeTester tester;
tester.init_code_buffer(1024);
tester.emit(IGen::loadvf_rip_plus_s32(XMM0 + 3, -123));
tester.emit(IGen::loadvf_rip_plus_s32(XMM0 + 13, -123));
EXPECT_EQ(tester.dump_to_hex_string(true), "C5F8281D85FFFFFFC578282D85FFFFFF");
}

View File

@ -128,7 +128,7 @@ TEST(TypeSystem, DerefInfoNoLoadInfoOrStride) {
EXPECT_TRUE(info.can_deref); EXPECT_TRUE(info.can_deref);
EXPECT_TRUE(info.mem_deref); EXPECT_TRUE(info.mem_deref);
EXPECT_FALSE(info.sign_extend); // it's a memory address being loaded EXPECT_FALSE(info.sign_extend); // it's a memory address being loaded
EXPECT_EQ(info.reg, RegKind::GPR_64); EXPECT_EQ(info.reg, RegClass::GPR_64);
EXPECT_EQ(info.stride, 4); EXPECT_EQ(info.stride, 4);
EXPECT_EQ(info.result_type.print(), "(function string symbol int32)"); EXPECT_EQ(info.result_type.print(), "(function string symbol int32)");
EXPECT_EQ(info.load_size, 4); EXPECT_EQ(info.load_size, 4);
@ -141,7 +141,7 @@ TEST(TypeSystem, DerefInfoNoLoadInfoOrStride) {
EXPECT_EQ(info.load_size, 8); EXPECT_EQ(info.load_size, 8);
EXPECT_EQ(info.stride, 8); EXPECT_EQ(info.stride, 8);
EXPECT_EQ(info.sign_extend, true); EXPECT_EQ(info.sign_extend, true);
EXPECT_EQ(info.reg, RegKind::GPR_64); EXPECT_EQ(info.reg, RegClass::GPR_64);
EXPECT_EQ(info.result_type.print(), "int64"); EXPECT_EQ(info.result_type.print(), "int64");
// test inline-array (won't work because type is dynamically sized) // test inline-array (won't work because type is dynamically sized)
@ -331,7 +331,7 @@ TEST(TypeSystem, DecompLookupsTypeOfBasic) {
dk.size = 4; dk.size = 4;
dk.sign_extend = false; dk.sign_extend = false;
dk.is_store = false; dk.is_store = false;
dk.reg_kind = RegKind::GPR_64; dk.reg_kind = RegClass::GPR_64;
input.deref = dk; input.deref = dk;
auto result = ts.reverse_field_lookup(input); auto result = ts.reverse_field_lookup(input);
@ -356,7 +356,7 @@ TEST(TypeSystem, DecompLookupsMethod) {
dk.size = 4; dk.size = 4;
dk.sign_extend = false; dk.sign_extend = false;
dk.is_store = false; dk.is_store = false;
dk.reg_kind = RegKind::GPR_64; dk.reg_kind = RegClass::GPR_64;
input.deref = dk; input.deref = dk;
auto result = ts.reverse_field_lookup(input); auto result = ts.reverse_field_lookup(input);
@ -373,7 +373,7 @@ TEST(TypeSystem, DecompLookupsMethod) {
dk.size = 4; dk.size = 4;
dk.sign_extend = false; dk.sign_extend = false;
dk.is_store = false; dk.is_store = false;
dk.reg_kind = RegKind::GPR_64; dk.reg_kind = RegClass::GPR_64;
input.deref = dk; input.deref = dk;
result = ts.reverse_field_lookup(input); result = ts.reverse_field_lookup(input);