mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-10-17 23:35:34 +00:00
Bug 1660976 - Output of mach vendor rust; r=bbouvier.
Depends on D88132 Differential Revision: https://phabricator.services.mozilla.com/D88133
This commit is contained in:
parent
4586eba01b
commit
d994de3030
@ -60,7 +60,7 @@ rev = "3224e2dee65c0726c448484d4c3c43956b9330ec"
|
||||
[source."https://github.com/bytecodealliance/wasmtime"]
|
||||
git = "https://github.com/bytecodealliance/wasmtime"
|
||||
replace-with = "vendored-sources"
|
||||
rev = "924782191b1cdd85438b707d20a82fbcc8ad40e1"
|
||||
rev = "7c856542854bc8c5da9d5fb1a0b41f3c660d8484"
|
||||
|
||||
[source."https://github.com/badboy/failure"]
|
||||
git = "https://github.com/badboy/failure"
|
||||
|
14
Cargo.lock
generated
14
Cargo.lock
generated
@ -769,7 +769,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "cranelift-bforest"
|
||||
version = "0.66.0"
|
||||
source = "git+https://github.com/bytecodealliance/wasmtime?rev=924782191b1cdd85438b707d20a82fbcc8ad40e1#924782191b1cdd85438b707d20a82fbcc8ad40e1"
|
||||
source = "git+https://github.com/bytecodealliance/wasmtime?rev=7c856542854bc8c5da9d5fb1a0b41f3c660d8484#7c856542854bc8c5da9d5fb1a0b41f3c660d8484"
|
||||
dependencies = [
|
||||
"cranelift-entity 0.66.0",
|
||||
]
|
||||
@ -777,7 +777,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "cranelift-codegen"
|
||||
version = "0.66.0"
|
||||
source = "git+https://github.com/bytecodealliance/wasmtime?rev=924782191b1cdd85438b707d20a82fbcc8ad40e1#924782191b1cdd85438b707d20a82fbcc8ad40e1"
|
||||
source = "git+https://github.com/bytecodealliance/wasmtime?rev=7c856542854bc8c5da9d5fb1a0b41f3c660d8484#7c856542854bc8c5da9d5fb1a0b41f3c660d8484"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
"cranelift-bforest",
|
||||
@ -794,7 +794,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "cranelift-codegen-meta"
|
||||
version = "0.66.0"
|
||||
source = "git+https://github.com/bytecodealliance/wasmtime?rev=924782191b1cdd85438b707d20a82fbcc8ad40e1#924782191b1cdd85438b707d20a82fbcc8ad40e1"
|
||||
source = "git+https://github.com/bytecodealliance/wasmtime?rev=7c856542854bc8c5da9d5fb1a0b41f3c660d8484#7c856542854bc8c5da9d5fb1a0b41f3c660d8484"
|
||||
dependencies = [
|
||||
"cranelift-codegen-shared",
|
||||
"cranelift-entity 0.66.0",
|
||||
@ -803,7 +803,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "cranelift-codegen-shared"
|
||||
version = "0.66.0"
|
||||
source = "git+https://github.com/bytecodealliance/wasmtime?rev=924782191b1cdd85438b707d20a82fbcc8ad40e1#924782191b1cdd85438b707d20a82fbcc8ad40e1"
|
||||
source = "git+https://github.com/bytecodealliance/wasmtime?rev=7c856542854bc8c5da9d5fb1a0b41f3c660d8484#7c856542854bc8c5da9d5fb1a0b41f3c660d8484"
|
||||
|
||||
[[package]]
|
||||
name = "cranelift-entity"
|
||||
@ -813,12 +813,12 @@ source = "git+https://github.com/PLSysSec/lucet_sandbox_compiler?rev=477d8fc53a6
|
||||
[[package]]
|
||||
name = "cranelift-entity"
|
||||
version = "0.66.0"
|
||||
source = "git+https://github.com/bytecodealliance/wasmtime?rev=924782191b1cdd85438b707d20a82fbcc8ad40e1#924782191b1cdd85438b707d20a82fbcc8ad40e1"
|
||||
source = "git+https://github.com/bytecodealliance/wasmtime?rev=7c856542854bc8c5da9d5fb1a0b41f3c660d8484#7c856542854bc8c5da9d5fb1a0b41f3c660d8484"
|
||||
|
||||
[[package]]
|
||||
name = "cranelift-frontend"
|
||||
version = "0.66.0"
|
||||
source = "git+https://github.com/bytecodealliance/wasmtime?rev=924782191b1cdd85438b707d20a82fbcc8ad40e1#924782191b1cdd85438b707d20a82fbcc8ad40e1"
|
||||
source = "git+https://github.com/bytecodealliance/wasmtime?rev=7c856542854bc8c5da9d5fb1a0b41f3c660d8484#7c856542854bc8c5da9d5fb1a0b41f3c660d8484"
|
||||
dependencies = [
|
||||
"cranelift-codegen",
|
||||
"log",
|
||||
@ -829,7 +829,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "cranelift-wasm"
|
||||
version = "0.66.0"
|
||||
source = "git+https://github.com/bytecodealliance/wasmtime?rev=924782191b1cdd85438b707d20a82fbcc8ad40e1#924782191b1cdd85438b707d20a82fbcc8ad40e1"
|
||||
source = "git+https://github.com/bytecodealliance/wasmtime?rev=7c856542854bc8c5da9d5fb1a0b41f3c660d8484#7c856542854bc8c5da9d5fb1a0b41f3c660d8484"
|
||||
dependencies = [
|
||||
"cranelift-codegen",
|
||||
"cranelift-entity 0.66.0",
|
||||
|
File diff suppressed because one or more lines are too long
@ -41,9 +41,11 @@ pub fn has_side_effect(func: &Function, inst: Inst) -> bool {
|
||||
trivially_has_side_effects(opcode) || is_load_with_defined_trapping(opcode, data)
|
||||
}
|
||||
|
||||
/// Does the given instruction have any side-effect as per [has_side_effect], or else is a load?
|
||||
pub fn has_side_effect_or_load(func: &Function, inst: Inst) -> bool {
|
||||
has_side_effect(func, inst) || func.dfg[inst].opcode().can_load()
|
||||
/// Does the given instruction have any side-effect as per [has_side_effect], or else is a load,
|
||||
/// but not the get_pinned_reg opcode?
|
||||
pub fn has_lowering_side_effect(func: &Function, inst: Inst) -> bool {
|
||||
let op = func.dfg[inst].opcode();
|
||||
op != Opcode::GetPinnedReg && (has_side_effect(func, inst) || op.can_load())
|
||||
}
|
||||
|
||||
/// Is the given instruction a constant value (`iconst`, `fconst`, `bconst`) that can be
|
||||
|
@ -3,7 +3,6 @@
|
||||
// Some variants are never constructed, but we still want them as options in the future.
|
||||
#![allow(dead_code)]
|
||||
|
||||
use crate::ir;
|
||||
use crate::ir::types::{F32X2, F32X4, F64X2, I16X4, I16X8, I32X2, I32X4, I64X2, I8X16, I8X8};
|
||||
use crate::ir::Type;
|
||||
use crate::isa::aarch64::inst::*;
|
||||
@ -647,31 +646,37 @@ impl VectorSize {
|
||||
VectorSize::Size64x2 => ScalarSize::Size64,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_128bits(&self) -> bool {
|
||||
match self {
|
||||
VectorSize::Size8x8 => false,
|
||||
VectorSize::Size8x16 => true,
|
||||
VectorSize::Size16x4 => false,
|
||||
VectorSize::Size16x8 => true,
|
||||
VectorSize::Size32x2 => false,
|
||||
VectorSize::Size32x4 => true,
|
||||
VectorSize::Size64x2 => true,
|
||||
}
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
// Instruction sub-components: atomic memory update operations
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
#[repr(u8)]
|
||||
pub enum AtomicRMWOp {
|
||||
Add,
|
||||
Sub,
|
||||
And,
|
||||
Or,
|
||||
Xor,
|
||||
Xchg,
|
||||
pub fn widen(&self) -> VectorSize {
|
||||
match self {
|
||||
VectorSize::Size8x8 => VectorSize::Size16x8,
|
||||
VectorSize::Size8x16 => VectorSize::Size16x8,
|
||||
VectorSize::Size16x4 => VectorSize::Size32x4,
|
||||
VectorSize::Size16x8 => VectorSize::Size32x4,
|
||||
VectorSize::Size32x2 => VectorSize::Size64x2,
|
||||
VectorSize::Size32x4 => VectorSize::Size64x2,
|
||||
VectorSize::Size64x2 => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
impl AtomicRMWOp {
|
||||
pub fn from(ir_op: ir::AtomicRmwOp) -> Self {
|
||||
match ir_op {
|
||||
ir::AtomicRmwOp::Add => AtomicRMWOp::Add,
|
||||
ir::AtomicRmwOp::Sub => AtomicRMWOp::Sub,
|
||||
ir::AtomicRmwOp::And => AtomicRMWOp::And,
|
||||
ir::AtomicRmwOp::Or => AtomicRMWOp::Or,
|
||||
ir::AtomicRmwOp::Xor => AtomicRMWOp::Xor,
|
||||
ir::AtomicRmwOp::Xchg => AtomicRMWOp::Xchg,
|
||||
pub fn halve(&self) -> VectorSize {
|
||||
match self {
|
||||
VectorSize::Size8x16 => VectorSize::Size8x8,
|
||||
VectorSize::Size16x8 => VectorSize::Size16x4,
|
||||
VectorSize::Size32x4 => VectorSize::Size32x2,
|
||||
_ => *self,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -352,12 +352,12 @@ fn enc_fround(top22: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
|
||||
(top22 << 10) | (machreg_to_vec(rn) << 5) | machreg_to_vec(rd.to_reg())
|
||||
}
|
||||
|
||||
fn enc_vec_rr_misc(u: u32, size: u32, bits_12_16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
|
||||
debug_assert_eq!(u & 0b1, u);
|
||||
fn enc_vec_rr_misc(qu: u32, size: u32, bits_12_16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
|
||||
debug_assert_eq!(qu & 0b11, qu);
|
||||
debug_assert_eq!(size & 0b11, size);
|
||||
debug_assert_eq!(bits_12_16 & 0b11111, bits_12_16);
|
||||
let bits = 0b0_1_0_01110_00_10000_00000_10_00000_00000;
|
||||
bits | u << 29
|
||||
let bits = 0b0_00_01110_00_10000_00000_10_00000_00000;
|
||||
bits | qu << 29
|
||||
| size << 22
|
||||
| bits_12_16 << 12
|
||||
| machreg_to_vec(rn) << 5
|
||||
@ -1090,18 +1090,18 @@ impl MachInstEmit for Inst {
|
||||
}
|
||||
sink.put4(enc_ldxr(ty, x27wr, x25)); // ldxr x27, [x25]
|
||||
|
||||
if op == AtomicRMWOp::Xchg {
|
||||
if op == inst_common::AtomicRmwOp::Xchg {
|
||||
// mov x28, x26
|
||||
sink.put4(enc_arith_rrr(0b101_01010_00_0, 0b000000, x28wr, xzr, x26))
|
||||
} else {
|
||||
// add/sub/and/orr/eor x28, x27, x26
|
||||
let bits_31_21 = match op {
|
||||
AtomicRMWOp::Add => 0b100_01011_00_0,
|
||||
AtomicRMWOp::Sub => 0b110_01011_00_0,
|
||||
AtomicRMWOp::And => 0b100_01010_00_0,
|
||||
AtomicRMWOp::Or => 0b101_01010_00_0,
|
||||
AtomicRMWOp::Xor => 0b110_01010_00_0,
|
||||
AtomicRMWOp::Xchg => unreachable!(),
|
||||
inst_common::AtomicRmwOp::Add => 0b100_01011_00_0,
|
||||
inst_common::AtomicRmwOp::Sub => 0b110_01011_00_0,
|
||||
inst_common::AtomicRmwOp::And => 0b100_01010_00_0,
|
||||
inst_common::AtomicRmwOp::Or => 0b101_01010_00_0,
|
||||
inst_common::AtomicRmwOp::Xor => 0b110_01010_00_0,
|
||||
inst_common::AtomicRmwOp::Xchg => unreachable!(),
|
||||
};
|
||||
sink.put4(enc_arith_rrr(bits_31_21, 0b000000, x28wr, x27, x26));
|
||||
}
|
||||
@ -1367,13 +1367,14 @@ impl MachInstEmit for Inst {
|
||||
sink.put4(enc_fpurrrr(top17, rd, rn, rm, ra));
|
||||
}
|
||||
&Inst::VecMisc { op, rd, rn, size } => {
|
||||
let enc_size = match size {
|
||||
VectorSize::Size8x16 => 0b00,
|
||||
VectorSize::Size16x8 => 0b01,
|
||||
VectorSize::Size32x4 => 0b10,
|
||||
VectorSize::Size64x2 => 0b11,
|
||||
_ => unimplemented!(),
|
||||
let enc_size = match size.lane_size() {
|
||||
ScalarSize::Size8 => 0b00,
|
||||
ScalarSize::Size16 => 0b01,
|
||||
ScalarSize::Size32 => 0b10,
|
||||
ScalarSize::Size64 => 0b11,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let q = if size.is_128bits() { 1 } else { 0 };
|
||||
let (u, bits_12_16, size) = match op {
|
||||
VecMisc2::Not => (0b1, 0b00101, 0b00),
|
||||
VecMisc2::Neg => (0b1, 0b01011, enc_size),
|
||||
@ -1390,8 +1391,33 @@ impl MachInstEmit for Inst {
|
||||
debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
|
||||
(0b1, 0b11111, enc_size)
|
||||
}
|
||||
VecMisc2::Rev64 => {
|
||||
debug_assert_ne!(VectorSize::Size64x2, size);
|
||||
(0b0, 0b00000, enc_size)
|
||||
}
|
||||
VecMisc2::Shll => {
|
||||
debug_assert_ne!(VectorSize::Size64x2, size);
|
||||
debug_assert!(!size.is_128bits());
|
||||
(0b1, 0b10011, enc_size)
|
||||
}
|
||||
VecMisc2::Fcvtzs => {
|
||||
debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
|
||||
(0b0, 0b11011, enc_size)
|
||||
}
|
||||
VecMisc2::Fcvtzu => {
|
||||
debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
|
||||
(0b1, 0b11011, enc_size)
|
||||
}
|
||||
VecMisc2::Scvtf => {
|
||||
debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
|
||||
(0b0, 0b11101, enc_size & 0b1)
|
||||
}
|
||||
VecMisc2::Ucvtf => {
|
||||
debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
|
||||
(0b1, 0b11101, enc_size & 0b1)
|
||||
}
|
||||
};
|
||||
sink.put4(enc_vec_rr_misc(u, size, bits_12_16, rd, rn));
|
||||
sink.put4(enc_vec_rr_misc((q << 1) | u, size, bits_12_16, rd, rn));
|
||||
}
|
||||
&Inst::VecLanes { op, rd, rn, size } => {
|
||||
let (q, size) = match size {
|
||||
@ -1634,7 +1660,12 @@ impl MachInstEmit for Inst {
|
||||
| machreg_to_vec(rd.to_reg()),
|
||||
);
|
||||
}
|
||||
&Inst::VecExtend { t, rd, rn } => {
|
||||
&Inst::VecExtend {
|
||||
t,
|
||||
rd,
|
||||
rn,
|
||||
high_half,
|
||||
} => {
|
||||
let (u, immh) = match t {
|
||||
VecExtendOp::Sxtl8 => (0b0, 0b001),
|
||||
VecExtendOp::Sxtl16 => (0b0, 0b010),
|
||||
@ -1645,12 +1676,39 @@ impl MachInstEmit for Inst {
|
||||
};
|
||||
sink.put4(
|
||||
0b000_011110_0000_000_101001_00000_00000
|
||||
| ((high_half as u32) << 30)
|
||||
| (u << 29)
|
||||
| (immh << 19)
|
||||
| (machreg_to_vec(rn) << 5)
|
||||
| machreg_to_vec(rd.to_reg()),
|
||||
);
|
||||
}
|
||||
&Inst::VecMiscNarrow {
|
||||
op,
|
||||
rd,
|
||||
rn,
|
||||
size,
|
||||
high_half,
|
||||
} => {
|
||||
let size = match size.lane_size() {
|
||||
ScalarSize::Size8 => 0b00,
|
||||
ScalarSize::Size16 => 0b01,
|
||||
ScalarSize::Size32 => 0b10,
|
||||
_ => panic!("Unexpected vector operand lane size!"),
|
||||
};
|
||||
let (u, bits_12_16) = match op {
|
||||
VecMiscNarrowOp::Xtn => (0b0, 0b10010),
|
||||
VecMiscNarrowOp::Sqxtn => (0b0, 0b10100),
|
||||
VecMiscNarrowOp::Sqxtun => (0b1, 0b10010),
|
||||
};
|
||||
sink.put4(enc_vec_rr_misc(
|
||||
((high_half as u32) << 1) | u,
|
||||
size,
|
||||
bits_12_16,
|
||||
rd,
|
||||
rn,
|
||||
));
|
||||
}
|
||||
&Inst::VecMovElement {
|
||||
rd,
|
||||
rn,
|
||||
@ -1685,12 +1743,12 @@ impl MachInstEmit for Inst {
|
||||
alu_op,
|
||||
size,
|
||||
} => {
|
||||
let enc_size = match size {
|
||||
VectorSize::Size8x16 => 0b00,
|
||||
VectorSize::Size16x8 => 0b01,
|
||||
VectorSize::Size32x4 => 0b10,
|
||||
VectorSize::Size64x2 => 0b11,
|
||||
_ => 0,
|
||||
let enc_size = match size.lane_size() {
|
||||
ScalarSize::Size8 => 0b00,
|
||||
ScalarSize::Size16 => 0b01,
|
||||
ScalarSize::Size32 => 0b10,
|
||||
ScalarSize::Size64 => 0b11,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let is_float = match alu_op {
|
||||
VecALUOp::Fcmeq
|
||||
@ -1751,6 +1809,11 @@ impl MachInstEmit for Inst {
|
||||
VecALUOp::Fmax => (0b010_01110_00_1, 0b111101),
|
||||
VecALUOp::Fmin => (0b010_01110_10_1, 0b111101),
|
||||
VecALUOp::Fmul => (0b011_01110_00_1, 0b110111),
|
||||
VecALUOp::Addp => (0b010_01110_00_1 | enc_size << 1, 0b101111),
|
||||
VecALUOp::Umlal => {
|
||||
debug_assert!(!size.is_128bits());
|
||||
(0b001_01110_00_1 | enc_size << 1, 0b100000)
|
||||
}
|
||||
};
|
||||
let top11 = if is_float {
|
||||
top11 | enc_float_size << 1
|
||||
|
@ -2008,6 +2008,7 @@ fn test_aarch64_binemit() {
|
||||
t: VecExtendOp::Sxtl8,
|
||||
rd: writable_vreg(4),
|
||||
rn: vreg(27),
|
||||
high_half: false,
|
||||
},
|
||||
"64A7080F",
|
||||
"sxtl v4.8h, v27.8b",
|
||||
@ -2017,15 +2018,17 @@ fn test_aarch64_binemit() {
|
||||
t: VecExtendOp::Sxtl16,
|
||||
rd: writable_vreg(17),
|
||||
rn: vreg(19),
|
||||
high_half: true,
|
||||
},
|
||||
"71A6100F",
|
||||
"sxtl v17.4s, v19.4h",
|
||||
"71A6104F",
|
||||
"sxtl2 v17.4s, v19.8h",
|
||||
));
|
||||
insns.push((
|
||||
Inst::VecExtend {
|
||||
t: VecExtendOp::Sxtl32,
|
||||
rd: writable_vreg(30),
|
||||
rn: vreg(6),
|
||||
high_half: false,
|
||||
},
|
||||
"DEA4200F",
|
||||
"sxtl v30.2d, v6.2s",
|
||||
@ -2035,15 +2038,17 @@ fn test_aarch64_binemit() {
|
||||
t: VecExtendOp::Uxtl8,
|
||||
rd: writable_vreg(3),
|
||||
rn: vreg(29),
|
||||
high_half: true,
|
||||
},
|
||||
"A3A7082F",
|
||||
"uxtl v3.8h, v29.8b",
|
||||
"A3A7086F",
|
||||
"uxtl2 v3.8h, v29.16b",
|
||||
));
|
||||
insns.push((
|
||||
Inst::VecExtend {
|
||||
t: VecExtendOp::Uxtl16,
|
||||
rd: writable_vreg(15),
|
||||
rn: vreg(12),
|
||||
high_half: false,
|
||||
},
|
||||
"8FA5102F",
|
||||
"uxtl v15.4s, v12.4h",
|
||||
@ -2053,9 +2058,10 @@ fn test_aarch64_binemit() {
|
||||
t: VecExtendOp::Uxtl32,
|
||||
rd: writable_vreg(28),
|
||||
rn: vreg(2),
|
||||
high_half: true,
|
||||
},
|
||||
"5CA4202F",
|
||||
"uxtl v28.2d, v2.2s",
|
||||
"5CA4206F",
|
||||
"uxtl2 v28.2d, v2.4s",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
@ -2082,6 +2088,42 @@ fn test_aarch64_binemit() {
|
||||
"mov v31.s[1], v16.s[0]",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::VecMiscNarrow {
|
||||
op: VecMiscNarrowOp::Xtn,
|
||||
rd: writable_vreg(22),
|
||||
rn: vreg(8),
|
||||
size: VectorSize::Size32x2,
|
||||
high_half: false,
|
||||
},
|
||||
"1629A10E",
|
||||
"xtn v22.2s, v8.2d",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::VecMiscNarrow {
|
||||
op: VecMiscNarrowOp::Sqxtn,
|
||||
rd: writable_vreg(31),
|
||||
rn: vreg(0),
|
||||
size: VectorSize::Size16x8,
|
||||
high_half: true,
|
||||
},
|
||||
"1F48614E",
|
||||
"sqxtn2 v31.8h, v0.4s",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::VecMiscNarrow {
|
||||
op: VecMiscNarrowOp::Sqxtun,
|
||||
rd: writable_vreg(16),
|
||||
rn: vreg(23),
|
||||
size: VectorSize::Size8x16,
|
||||
high_half: false,
|
||||
},
|
||||
"F02A212E",
|
||||
"sqxtun v16.8b, v23.8h",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::VecRRR {
|
||||
alu_op: VecALUOp::Sqadd,
|
||||
@ -3066,6 +3108,53 @@ fn test_aarch64_binemit() {
|
||||
"fmul v2.2d, v0.2d, v5.2d",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::VecRRR {
|
||||
alu_op: VecALUOp::Addp,
|
||||
rd: writable_vreg(16),
|
||||
rn: vreg(12),
|
||||
rm: vreg(1),
|
||||
size: VectorSize::Size8x16,
|
||||
},
|
||||
"90BD214E",
|
||||
"addp v16.16b, v12.16b, v1.16b",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::VecRRR {
|
||||
alu_op: VecALUOp::Addp,
|
||||
rd: writable_vreg(8),
|
||||
rn: vreg(12),
|
||||
rm: vreg(14),
|
||||
size: VectorSize::Size32x4,
|
||||
},
|
||||
"88BDAE4E",
|
||||
"addp v8.4s, v12.4s, v14.4s",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::VecRRR {
|
||||
alu_op: VecALUOp::Umlal,
|
||||
rd: writable_vreg(9),
|
||||
rn: vreg(20),
|
||||
rm: vreg(17),
|
||||
size: VectorSize::Size32x2,
|
||||
},
|
||||
"8982B12E",
|
||||
"umlal v9.2d, v20.2s, v17.2s",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::VecMisc {
|
||||
op: VecMisc2::Not,
|
||||
rd: writable_vreg(20),
|
||||
rn: vreg(17),
|
||||
size: VectorSize::Size8x8,
|
||||
},
|
||||
"345A202E",
|
||||
"mvn v20.8b, v17.8b",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::VecMisc {
|
||||
op: VecMisc2::Not,
|
||||
@ -3077,6 +3166,17 @@ fn test_aarch64_binemit() {
|
||||
"mvn v2.16b, v1.16b",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::VecMisc {
|
||||
op: VecMisc2::Neg,
|
||||
rd: writable_vreg(3),
|
||||
rn: vreg(7),
|
||||
size: VectorSize::Size8x8,
|
||||
},
|
||||
"E3B8202E",
|
||||
"neg v3.8b, v7.8b",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::VecMisc {
|
||||
op: VecMisc2::Neg,
|
||||
@ -3121,6 +3221,17 @@ fn test_aarch64_binemit() {
|
||||
"neg v10.2d, v8.2d",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::VecMisc {
|
||||
op: VecMisc2::Abs,
|
||||
rd: writable_vreg(3),
|
||||
rn: vreg(1),
|
||||
size: VectorSize::Size8x8,
|
||||
},
|
||||
"23B8200E",
|
||||
"abs v3.8b, v1.8b",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::VecMisc {
|
||||
op: VecMisc2::Abs,
|
||||
@ -3198,6 +3309,94 @@ fn test_aarch64_binemit() {
|
||||
"fsqrt v7.2d, v18.2d",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::VecMisc {
|
||||
op: VecMisc2::Rev64,
|
||||
rd: writable_vreg(1),
|
||||
rn: vreg(10),
|
||||
size: VectorSize::Size32x4,
|
||||
},
|
||||
"4109A04E",
|
||||
"rev64 v1.4s, v10.4s",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::VecMisc {
|
||||
op: VecMisc2::Shll,
|
||||
rd: writable_vreg(12),
|
||||
rn: vreg(5),
|
||||
size: VectorSize::Size8x8,
|
||||
},
|
||||
"AC38212E",
|
||||
"shll v12.8h, v5.8b, #8",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::VecMisc {
|
||||
op: VecMisc2::Shll,
|
||||
rd: writable_vreg(9),
|
||||
rn: vreg(1),
|
||||
size: VectorSize::Size16x4,
|
||||
},
|
||||
"2938612E",
|
||||
"shll v9.4s, v1.4h, #16",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::VecMisc {
|
||||
op: VecMisc2::Shll,
|
||||
rd: writable_vreg(1),
|
||||
rn: vreg(10),
|
||||
size: VectorSize::Size32x2,
|
||||
},
|
||||
"4139A12E",
|
||||
"shll v1.2d, v10.2s, #32",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::VecMisc {
|
||||
op: VecMisc2::Fcvtzs,
|
||||
rd: writable_vreg(4),
|
||||
rn: vreg(22),
|
||||
size: VectorSize::Size32x4,
|
||||
},
|
||||
"C4BAA14E",
|
||||
"fcvtzs v4.4s, v22.4s",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::VecMisc {
|
||||
op: VecMisc2::Fcvtzu,
|
||||
rd: writable_vreg(29),
|
||||
rn: vreg(15),
|
||||
size: VectorSize::Size64x2,
|
||||
},
|
||||
"FDB9E16E",
|
||||
"fcvtzu v29.2d, v15.2d",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::VecMisc {
|
||||
op: VecMisc2::Scvtf,
|
||||
rd: writable_vreg(20),
|
||||
rn: vreg(8),
|
||||
size: VectorSize::Size32x4,
|
||||
},
|
||||
"14D9214E",
|
||||
"scvtf v20.4s, v8.4s",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::VecMisc {
|
||||
op: VecMisc2::Ucvtf,
|
||||
rd: writable_vreg(10),
|
||||
rn: vreg(19),
|
||||
size: VectorSize::Size64x2,
|
||||
},
|
||||
"6ADA616E",
|
||||
"ucvtf v10.2d, v19.2d",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::VecLanes {
|
||||
op: VecLanesOp::Uminv,
|
||||
@ -4352,7 +4551,7 @@ fn test_aarch64_binemit() {
|
||||
insns.push((
|
||||
Inst::AtomicRMW {
|
||||
ty: I16,
|
||||
op: AtomicRMWOp::Xor,
|
||||
op: inst_common::AtomicRmwOp::Xor,
|
||||
srcloc: None,
|
||||
},
|
||||
"BF3B03D53B7F5F487C031ACA3C7F1848B8FFFFB5BF3B03D5",
|
||||
@ -4362,7 +4561,7 @@ fn test_aarch64_binemit() {
|
||||
insns.push((
|
||||
Inst::AtomicRMW {
|
||||
ty: I32,
|
||||
op: AtomicRMWOp::Xchg,
|
||||
op: inst_common::AtomicRmwOp::Xchg,
|
||||
srcloc: None,
|
||||
},
|
||||
"BF3B03D53B7F5F88FC031AAA3C7F1888B8FFFFB5BF3B03D5",
|
||||
|
@ -283,6 +283,10 @@ pub enum VecALUOp {
|
||||
Fmin,
|
||||
/// Floating-point multiply
|
||||
Fmul,
|
||||
/// Add pairwise
|
||||
Addp,
|
||||
/// Unsigned multiply add long
|
||||
Umlal,
|
||||
}
|
||||
|
||||
/// A Vector miscellaneous operation with two registers.
|
||||
@ -300,6 +304,29 @@ pub enum VecMisc2 {
|
||||
Fneg,
|
||||
/// Floating-point square root
|
||||
Fsqrt,
|
||||
/// Reverse elements in 64-bit doublewords
|
||||
Rev64,
|
||||
/// Shift left long (by element size)
|
||||
Shll,
|
||||
/// Floating-point convert to signed integer, rounding toward zero
|
||||
Fcvtzs,
|
||||
/// Floating-point convert to unsigned integer, rounding toward zero
|
||||
Fcvtzu,
|
||||
/// Signed integer convert to floating-point
|
||||
Scvtf,
|
||||
/// Unsigned integer convert to floating-point
|
||||
Ucvtf,
|
||||
}
|
||||
|
||||
/// A Vector narrowing operation with two registers.
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
|
||||
pub enum VecMiscNarrowOp {
|
||||
/// Extract Narrow
|
||||
Xtn,
|
||||
/// Signed saturating extract narrow
|
||||
Sqxtn,
|
||||
/// Signed saturating extract unsigned narrow
|
||||
Sqxtun,
|
||||
}
|
||||
|
||||
/// An operation across the lanes of vectors.
|
||||
@ -622,7 +649,7 @@ pub enum Inst {
|
||||
/// x28 (wr) scratch reg; value afterwards has no meaning
|
||||
AtomicRMW {
|
||||
ty: Type, // I8, I16, I32 or I64
|
||||
op: AtomicRMWOp,
|
||||
op: inst_common::AtomicRmwOp,
|
||||
srcloc: Option<SourceLoc>,
|
||||
},
|
||||
|
||||
@ -869,6 +896,7 @@ pub enum Inst {
|
||||
t: VecExtendOp,
|
||||
rd: Writable<Reg>,
|
||||
rn: Reg,
|
||||
high_half: bool,
|
||||
},
|
||||
|
||||
/// Move vector element to another vector element.
|
||||
@ -880,6 +908,15 @@ pub enum Inst {
|
||||
size: VectorSize,
|
||||
},
|
||||
|
||||
/// Vector narrowing operation.
|
||||
VecMiscNarrow {
|
||||
op: VecMiscNarrowOp,
|
||||
rd: Writable<Reg>,
|
||||
rn: Reg,
|
||||
size: VectorSize,
|
||||
high_half: bool,
|
||||
},
|
||||
|
||||
/// A vector ALU op.
|
||||
VecRRR {
|
||||
alu_op: VecALUOp,
|
||||
@ -1605,10 +1642,21 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
||||
collector.add_mod(rd);
|
||||
collector.add_use(rn);
|
||||
}
|
||||
&Inst::VecMiscNarrow {
|
||||
rd, rn, high_half, ..
|
||||
} => {
|
||||
collector.add_use(rn);
|
||||
|
||||
if high_half {
|
||||
collector.add_mod(rd);
|
||||
} else {
|
||||
collector.add_def(rd);
|
||||
}
|
||||
}
|
||||
&Inst::VecRRR {
|
||||
alu_op, rd, rn, rm, ..
|
||||
} => {
|
||||
if alu_op == VecALUOp::Bsl {
|
||||
if alu_op == VecALUOp::Bsl || alu_op == VecALUOp::Umlal {
|
||||
collector.add_mod(rd);
|
||||
} else {
|
||||
collector.add_def(rd);
|
||||
@ -2270,6 +2318,20 @@ fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
|
||||
map_mod(mapper, rd);
|
||||
map_use(mapper, rn);
|
||||
}
|
||||
&mut Inst::VecMiscNarrow {
|
||||
ref mut rd,
|
||||
ref mut rn,
|
||||
high_half,
|
||||
..
|
||||
} => {
|
||||
map_use(mapper, rn);
|
||||
|
||||
if high_half {
|
||||
map_mod(mapper, rd);
|
||||
} else {
|
||||
map_def(mapper, rd);
|
||||
}
|
||||
}
|
||||
&mut Inst::VecRRR {
|
||||
alu_op,
|
||||
ref mut rd,
|
||||
@ -2277,7 +2339,7 @@ fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
|
||||
ref mut rm,
|
||||
..
|
||||
} => {
|
||||
if alu_op == VecALUOp::Bsl {
|
||||
if alu_op == VecALUOp::Bsl || alu_op == VecALUOp::Umlal {
|
||||
map_mod(mapper, rd);
|
||||
} else {
|
||||
map_def(mapper, rd);
|
||||
@ -3120,14 +3182,20 @@ impl Inst {
|
||||
let rn = show_vreg_element(rn, mb_rru, 0, size);
|
||||
format!("dup {}, {}", rd, rn)
|
||||
}
|
||||
&Inst::VecExtend { t, rd, rn } => {
|
||||
let (op, dest, src) = match t {
|
||||
VecExtendOp::Sxtl8 => ("sxtl", VectorSize::Size16x8, VectorSize::Size8x8),
|
||||
VecExtendOp::Sxtl16 => ("sxtl", VectorSize::Size32x4, VectorSize::Size16x4),
|
||||
VecExtendOp::Sxtl32 => ("sxtl", VectorSize::Size64x2, VectorSize::Size32x2),
|
||||
VecExtendOp::Uxtl8 => ("uxtl", VectorSize::Size16x8, VectorSize::Size8x8),
|
||||
VecExtendOp::Uxtl16 => ("uxtl", VectorSize::Size32x4, VectorSize::Size16x4),
|
||||
VecExtendOp::Uxtl32 => ("uxtl", VectorSize::Size64x2, VectorSize::Size32x2),
|
||||
&Inst::VecExtend { t, rd, rn, high_half } => {
|
||||
let (op, dest, src) = match (t, high_half) {
|
||||
(VecExtendOp::Sxtl8, false) => ("sxtl", VectorSize::Size16x8, VectorSize::Size8x8),
|
||||
(VecExtendOp::Sxtl8, true) => ("sxtl2", VectorSize::Size16x8, VectorSize::Size8x16),
|
||||
(VecExtendOp::Sxtl16, false) => ("sxtl", VectorSize::Size32x4, VectorSize::Size16x4),
|
||||
(VecExtendOp::Sxtl16, true) => ("sxtl2", VectorSize::Size32x4, VectorSize::Size16x8),
|
||||
(VecExtendOp::Sxtl32, false) => ("sxtl", VectorSize::Size64x2, VectorSize::Size32x2),
|
||||
(VecExtendOp::Sxtl32, true) => ("sxtl2", VectorSize::Size64x2, VectorSize::Size32x4),
|
||||
(VecExtendOp::Uxtl8, false) => ("uxtl", VectorSize::Size16x8, VectorSize::Size8x8),
|
||||
(VecExtendOp::Uxtl8, true) => ("uxtl2", VectorSize::Size16x8, VectorSize::Size8x16),
|
||||
(VecExtendOp::Uxtl16, false) => ("uxtl", VectorSize::Size32x4, VectorSize::Size16x4),
|
||||
(VecExtendOp::Uxtl16, true) => ("uxtl2", VectorSize::Size32x4, VectorSize::Size16x8),
|
||||
(VecExtendOp::Uxtl32, false) => ("uxtl", VectorSize::Size64x2, VectorSize::Size32x2),
|
||||
(VecExtendOp::Uxtl32, true) => ("uxtl2", VectorSize::Size64x2, VectorSize::Size32x4),
|
||||
};
|
||||
let rd = show_vreg_vector(rd.to_reg(), mb_rru, dest);
|
||||
let rn = show_vreg_vector(rn, mb_rru, src);
|
||||
@ -3144,6 +3212,25 @@ impl Inst {
|
||||
let rn = show_vreg_element(rn, mb_rru, idx2, size);
|
||||
format!("mov {}, {}", rd, rn)
|
||||
}
|
||||
&Inst::VecMiscNarrow { op, rd, rn, size, high_half } => {
|
||||
let dest_size = if high_half {
|
||||
assert!(size.is_128bits());
|
||||
size
|
||||
} else {
|
||||
size.halve()
|
||||
};
|
||||
let rd = show_vreg_vector(rd.to_reg(), mb_rru, dest_size);
|
||||
let rn = show_vreg_vector(rn, mb_rru, size.widen());
|
||||
let op = match (op, high_half) {
|
||||
(VecMiscNarrowOp::Xtn, false) => "xtn",
|
||||
(VecMiscNarrowOp::Xtn, true) => "xtn2",
|
||||
(VecMiscNarrowOp::Sqxtn, false) => "sqxtn",
|
||||
(VecMiscNarrowOp::Sqxtn, true) => "sqxtn2",
|
||||
(VecMiscNarrowOp::Sqxtun, false) => "sqxtun",
|
||||
(VecMiscNarrowOp::Sqxtun, true) => "sqxtun2",
|
||||
};
|
||||
format!("{} {}, {}", op, rd, rn)
|
||||
}
|
||||
&Inst::VecRRR {
|
||||
rd,
|
||||
rn,
|
||||
@ -3186,25 +3273,55 @@ impl Inst {
|
||||
VecALUOp::Fmax => ("fmax", size),
|
||||
VecALUOp::Fmin => ("fmin", size),
|
||||
VecALUOp::Fmul => ("fmul", size),
|
||||
VecALUOp::Addp => ("addp", size),
|
||||
VecALUOp::Umlal => ("umlal", size),
|
||||
};
|
||||
let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
|
||||
let rd_size = if alu_op == VecALUOp::Umlal {
|
||||
size.widen()
|
||||
} else {
|
||||
size
|
||||
};
|
||||
let rd = show_vreg_vector(rd.to_reg(), mb_rru, rd_size);
|
||||
let rn = show_vreg_vector(rn, mb_rru, size);
|
||||
let rm = show_vreg_vector(rm, mb_rru, size);
|
||||
format!("{} {}, {}, {}", op, rd, rn, rm)
|
||||
}
|
||||
&Inst::VecMisc { op, rd, rn, size } => {
|
||||
let is_shll = op == VecMisc2::Shll;
|
||||
let suffix = match (is_shll, size) {
|
||||
(true, VectorSize::Size8x8) => ", #8",
|
||||
(true, VectorSize::Size16x4) => ", #16",
|
||||
(true, VectorSize::Size32x2) => ", #32",
|
||||
_ => "",
|
||||
};
|
||||
|
||||
let (op, size) = match op {
|
||||
VecMisc2::Not => ("mvn", VectorSize::Size8x16),
|
||||
VecMisc2::Not => (
|
||||
"mvn",
|
||||
if size.is_128bits() {
|
||||
VectorSize::Size8x16
|
||||
} else {
|
||||
VectorSize::Size8x8
|
||||
},
|
||||
),
|
||||
VecMisc2::Neg => ("neg", size),
|
||||
VecMisc2::Abs => ("abs", size),
|
||||
VecMisc2::Fabs => ("fabs", size),
|
||||
VecMisc2::Fneg => ("fneg", size),
|
||||
VecMisc2::Fsqrt => ("fsqrt", size),
|
||||
VecMisc2::Rev64 => ("rev64", size),
|
||||
VecMisc2::Shll => ("shll", size),
|
||||
VecMisc2::Fcvtzs => ("fcvtzs", size),
|
||||
VecMisc2::Fcvtzu => ("fcvtzu", size),
|
||||
VecMisc2::Scvtf => ("scvtf", size),
|
||||
VecMisc2::Ucvtf => ("ucvtf", size),
|
||||
};
|
||||
|
||||
let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
|
||||
let rd_size = if is_shll { size.widen() } else { size };
|
||||
|
||||
let rd = show_vreg_vector(rd.to_reg(), mb_rru, rd_size);
|
||||
let rn = show_vreg_vector(rn, mb_rru, size);
|
||||
format!("{} {}, {}", op, rd, rn)
|
||||
format!("{} {}, {}{}", op, rd, rn, suffix)
|
||||
}
|
||||
&Inst::VecLanes { op, rd, rn, size } => {
|
||||
let op = match op {
|
||||
|
@ -7,10 +7,11 @@
|
||||
//!
|
||||
//! - Floating-point immediates (FIMM instruction).
|
||||
|
||||
use crate::ir;
|
||||
use crate::ir::condcodes::{FloatCC, IntCC};
|
||||
use crate::ir::types::*;
|
||||
use crate::ir::Inst as IRInst;
|
||||
use crate::ir::{AtomicRmwOp, InstructionData, Opcode, TrapCode, Type};
|
||||
use crate::ir::{InstructionData, Opcode, TrapCode, Type};
|
||||
use crate::machinst::lower::*;
|
||||
use crate::machinst::*;
|
||||
use crate::CodegenResult;
|
||||
@ -348,6 +349,45 @@ fn put_input_in_rse<C: LowerCtx<I = Inst>>(
|
||||
let out_ty = ctx.output_ty(insn, 0);
|
||||
let out_bits = ty_bits(out_ty);
|
||||
|
||||
// Is this a zero-extend or sign-extend and can we handle that with a register-mode operator?
|
||||
if op == Opcode::Uextend || op == Opcode::Sextend {
|
||||
let sign_extend = op == Opcode::Sextend;
|
||||
let inner_ty = ctx.input_ty(insn, 0);
|
||||
let inner_bits = ty_bits(inner_ty);
|
||||
assert!(inner_bits < out_bits);
|
||||
if match (sign_extend, narrow_mode) {
|
||||
// A single zero-extend or sign-extend is equal to itself.
|
||||
(_, NarrowValueMode::None) => true,
|
||||
// Two zero-extends or sign-extends in a row is equal to a single zero-extend or sign-extend.
|
||||
(false, NarrowValueMode::ZeroExtend32) | (false, NarrowValueMode::ZeroExtend64) => {
|
||||
true
|
||||
}
|
||||
(true, NarrowValueMode::SignExtend32) | (true, NarrowValueMode::SignExtend64) => {
|
||||
true
|
||||
}
|
||||
// A zero-extend and a sign-extend in a row is not equal to a single zero-extend or sign-extend
|
||||
(false, NarrowValueMode::SignExtend32) | (false, NarrowValueMode::SignExtend64) => {
|
||||
false
|
||||
}
|
||||
(true, NarrowValueMode::ZeroExtend32) | (true, NarrowValueMode::ZeroExtend64) => {
|
||||
false
|
||||
}
|
||||
} {
|
||||
let extendop = match (sign_extend, inner_bits) {
|
||||
(true, 8) => ExtendOp::SXTB,
|
||||
(false, 8) => ExtendOp::UXTB,
|
||||
(true, 16) => ExtendOp::SXTH,
|
||||
(false, 16) => ExtendOp::UXTH,
|
||||
(true, 32) => ExtendOp::SXTW,
|
||||
(false, 32) => ExtendOp::UXTW,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let reg =
|
||||
put_input_in_reg(ctx, InsnInput { insn, input: 0 }, NarrowValueMode::None);
|
||||
return ResultRSE::RegExtend(reg, extendop);
|
||||
}
|
||||
}
|
||||
|
||||
// If `out_ty` is smaller than 32 bits and we need to zero- or sign-extend,
|
||||
// then get the result into a register and return an Extend-mode operand on
|
||||
// that register.
|
||||
@ -355,7 +395,7 @@ fn put_input_in_rse<C: LowerCtx<I = Inst>>(
|
||||
&& ((narrow_mode.is_32bit() && out_bits < 32)
|
||||
|| (!narrow_mode.is_32bit() && out_bits < 64))
|
||||
{
|
||||
let reg = put_input_in_reg(ctx, InsnInput { insn, input: 0 }, NarrowValueMode::None);
|
||||
let reg = put_input_in_reg(ctx, input, NarrowValueMode::None);
|
||||
let extendop = match (narrow_mode, out_bits) {
|
||||
(NarrowValueMode::SignExtend32, 1) | (NarrowValueMode::SignExtend64, 1) => {
|
||||
ExtendOp::SXTB
|
||||
@ -381,28 +421,6 @@ fn put_input_in_rse<C: LowerCtx<I = Inst>>(
|
||||
};
|
||||
return ResultRSE::RegExtend(reg, extendop);
|
||||
}
|
||||
|
||||
// Is this a zero-extend or sign-extend and can we handle that with a register-mode operator?
|
||||
if op == Opcode::Uextend || op == Opcode::Sextend {
|
||||
assert!(out_bits == 32 || out_bits == 64);
|
||||
let sign_extend = op == Opcode::Sextend;
|
||||
let inner_ty = ctx.input_ty(insn, 0);
|
||||
let inner_bits = ty_bits(inner_ty);
|
||||
assert!(inner_bits < out_bits);
|
||||
let extendop = match (sign_extend, inner_bits) {
|
||||
(true, 1) => ExtendOp::SXTB,
|
||||
(false, 1) => ExtendOp::UXTB,
|
||||
(true, 8) => ExtendOp::SXTB,
|
||||
(false, 8) => ExtendOp::UXTB,
|
||||
(true, 16) => ExtendOp::SXTH,
|
||||
(false, 16) => ExtendOp::UXTH,
|
||||
(true, 32) => ExtendOp::SXTW,
|
||||
(false, 32) => ExtendOp::UXTW,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let reg = put_input_in_reg(ctx, InsnInput { insn, input: 0 }, NarrowValueMode::None);
|
||||
return ResultRSE::RegExtend(reg, extendop);
|
||||
}
|
||||
}
|
||||
|
||||
ResultRSE::from_rs(put_input_in_rs(ctx, input, narrow_mode))
|
||||
@ -1050,7 +1068,7 @@ pub(crate) fn inst_trapcode(data: &InstructionData) -> Option<TrapCode> {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn inst_atomic_rmw_op(data: &InstructionData) -> Option<AtomicRmwOp> {
|
||||
pub(crate) fn inst_atomic_rmw_op(data: &InstructionData) -> Option<ir::AtomicRmwOp> {
|
||||
match data {
|
||||
&InstructionData::AtomicRmw { op, .. } => Some(op),
|
||||
_ => None,
|
||||
|
@ -7,7 +7,7 @@ use crate::ir::Inst as IRInst;
|
||||
use crate::ir::{InstructionData, Opcode, TrapCode};
|
||||
use crate::machinst::lower::*;
|
||||
use crate::machinst::*;
|
||||
use crate::CodegenResult;
|
||||
use crate::{CodegenError, CodegenResult};
|
||||
|
||||
use crate::isa::aarch64::abi::*;
|
||||
use crate::isa::aarch64::inst::*;
|
||||
@ -21,7 +21,8 @@ use smallvec::SmallVec;
|
||||
|
||||
use super::lower::*;
|
||||
|
||||
fn is_single_word_int_ty(ty: Type) -> bool {
|
||||
/// This is target-word-size dependent. And it excludes booleans and reftypes.
|
||||
fn is_valid_atomic_transaction_ty(ty: Type) -> bool {
|
||||
match ty {
|
||||
I8 | I16 | I32 | I64 => true,
|
||||
_ => false,
|
||||
@ -66,7 +67,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
let rd = get_output_reg(ctx, outputs[0]);
|
||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let ty = ty.unwrap();
|
||||
if ty_bits(ty) < 128 {
|
||||
if !ty.is_vector() {
|
||||
let (rm, negated) = put_input_in_rse_imm12_maybe_negated(
|
||||
ctx,
|
||||
inputs[1],
|
||||
@ -94,7 +95,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
let rd = get_output_reg(ctx, outputs[0]);
|
||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let ty = ty.unwrap();
|
||||
if ty_bits(ty) < 128 {
|
||||
if !ty.is_vector() {
|
||||
let (rm, negated) = put_input_in_rse_imm12_maybe_negated(
|
||||
ctx,
|
||||
inputs[1],
|
||||
@ -124,7 +125,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
let is_signed = op == Opcode::SaddSat || op == Opcode::SsubSat;
|
||||
let ty = ty.unwrap();
|
||||
let rd = get_output_reg(ctx, outputs[0]);
|
||||
if ty_bits(ty) < 128 {
|
||||
if !ty.is_vector() {
|
||||
let narrow_mode = if is_signed {
|
||||
NarrowValueMode::SignExtend64
|
||||
} else {
|
||||
@ -180,7 +181,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
Opcode::Ineg => {
|
||||
let rd = get_output_reg(ctx, outputs[0]);
|
||||
let ty = ty.unwrap();
|
||||
if ty_bits(ty) < 128 {
|
||||
if !ty.is_vector() {
|
||||
let rn = zero_reg();
|
||||
let rm = put_input_in_rse_imm12(ctx, inputs[0], NarrowValueMode::None);
|
||||
let alu_op = choose_32_64(ty, ALUOp::Sub32, ALUOp::Sub64);
|
||||
@ -201,7 +202,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
|
||||
let ty = ty.unwrap();
|
||||
if ty_bits(ty) < 128 {
|
||||
if !ty.is_vector() {
|
||||
let alu_op = choose_32_64(ty, ALUOp::MAdd32, ALUOp::MAdd64);
|
||||
ctx.emit(Inst::AluRRRR {
|
||||
alu_op,
|
||||
@ -210,6 +211,112 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
rm,
|
||||
ra: zero_reg(),
|
||||
});
|
||||
} else {
|
||||
if ty == I64X2 {
|
||||
let tmp1 = ctx.alloc_tmp(RegClass::V128, I64X2);
|
||||
let tmp2 = ctx.alloc_tmp(RegClass::V128, I64X2);
|
||||
|
||||
// This I64X2 multiplication is performed with several 32-bit
|
||||
// operations.
|
||||
|
||||
// 64-bit numbers x and y, can be represented as:
|
||||
// x = a + 2^32(b)
|
||||
// y = c + 2^32(d)
|
||||
|
||||
// A 64-bit multiplication is:
|
||||
// x * y = ac + 2^32(ad + bc) + 2^64(bd)
|
||||
// note: `2^64(bd)` can be ignored, the value is too large to fit in
|
||||
// 64 bits.
|
||||
|
||||
// This sequence implements a I64X2 multiply, where the registers
|
||||
// `rn` and `rm` are split up into 32-bit components:
|
||||
// rn = |d|c|b|a|
|
||||
// rm = |h|g|f|e|
|
||||
//
|
||||
// rn * rm = |cg + 2^32(ch + dg)|ae + 2^32(af + be)|
|
||||
//
|
||||
// The sequence is:
|
||||
// rev64 rd.4s, rm.4s
|
||||
// mul rd.4s, rd.4s, rn.4s
|
||||
// xtn tmp1.2s, rn.2d
|
||||
// addp rd.4s, rd.4s, rd.4s
|
||||
// xtn tmp2.2s, rm.2d
|
||||
// shll rd.2d, rd.2s, #32
|
||||
// umlal rd.2d, tmp2.2s, tmp1.2s
|
||||
|
||||
// Reverse the 32-bit elements in the 64-bit words.
|
||||
// rd = |g|h|e|f|
|
||||
ctx.emit(Inst::VecMisc {
|
||||
op: VecMisc2::Rev64,
|
||||
rd,
|
||||
rn: rm,
|
||||
size: VectorSize::Size32x4,
|
||||
});
|
||||
|
||||
// Calculate the high half components.
|
||||
// rd = |dg|ch|be|af|
|
||||
//
|
||||
// Note that this 32-bit multiply of the high half
|
||||
// discards the bits that would overflow, same as
|
||||
// if 64-bit operations were used. Also the Shll
|
||||
// below would shift out the overflow bits anyway.
|
||||
ctx.emit(Inst::VecRRR {
|
||||
alu_op: VecALUOp::Mul,
|
||||
rd,
|
||||
rn: rd.to_reg(),
|
||||
rm: rn,
|
||||
size: VectorSize::Size32x4,
|
||||
});
|
||||
|
||||
// Extract the low half components of rn.
|
||||
// tmp1 = |c|a|
|
||||
ctx.emit(Inst::VecMiscNarrow {
|
||||
op: VecMiscNarrowOp::Xtn,
|
||||
rd: tmp1,
|
||||
rn,
|
||||
size: VectorSize::Size32x2,
|
||||
high_half: false,
|
||||
});
|
||||
|
||||
// Sum the respective high half components.
|
||||
// rd = |dg+ch|be+af||dg+ch|be+af|
|
||||
ctx.emit(Inst::VecRRR {
|
||||
alu_op: VecALUOp::Addp,
|
||||
rd: rd,
|
||||
rn: rd.to_reg(),
|
||||
rm: rd.to_reg(),
|
||||
size: VectorSize::Size32x4,
|
||||
});
|
||||
|
||||
// Extract the low half components of rm.
|
||||
// tmp2 = |g|e|
|
||||
ctx.emit(Inst::VecMiscNarrow {
|
||||
op: VecMiscNarrowOp::Xtn,
|
||||
rd: tmp2,
|
||||
rn: rm,
|
||||
size: VectorSize::Size32x2,
|
||||
high_half: false,
|
||||
});
|
||||
|
||||
// Shift the high half components, into the high half.
|
||||
// rd = |dg+ch << 32|be+af << 32|
|
||||
ctx.emit(Inst::VecMisc {
|
||||
op: VecMisc2::Shll,
|
||||
rd,
|
||||
rn: rd.to_reg(),
|
||||
size: VectorSize::Size32x2,
|
||||
});
|
||||
|
||||
// Multiply the low components together, and accumulate with the high
|
||||
// half.
|
||||
// rd = |rd[1] + cg|rd[0] + ae|
|
||||
ctx.emit(Inst::VecRRR {
|
||||
alu_op: VecALUOp::Umlal,
|
||||
rd,
|
||||
rn: tmp2.to_reg(),
|
||||
rm: tmp1.to_reg(),
|
||||
size: VectorSize::Size32x2,
|
||||
});
|
||||
} else {
|
||||
ctx.emit(Inst::VecRRR {
|
||||
alu_op: VecALUOp::Mul,
|
||||
@ -220,6 +327,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Opcode::Umulhi | Opcode::Smulhi => {
|
||||
let rd = get_output_reg(ctx, outputs[0]);
|
||||
@ -465,7 +573,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
Opcode::Bnot => {
|
||||
let rd = get_output_reg(ctx, outputs[0]);
|
||||
let ty = ty.unwrap();
|
||||
if ty_bits(ty) < 128 {
|
||||
if !ty.is_vector() {
|
||||
let rm = put_input_in_rs_immlogic(ctx, inputs[0], NarrowValueMode::None);
|
||||
let alu_op = choose_32_64(ty, ALUOp::OrrNot32, ALUOp::OrrNot64);
|
||||
// NOT rd, rm ==> ORR_NOT rd, zero, rm
|
||||
@ -489,7 +597,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
| Opcode::BxorNot => {
|
||||
let rd = get_output_reg(ctx, outputs[0]);
|
||||
let ty = ty.unwrap();
|
||||
if ty_bits(ty) < 128 {
|
||||
if !ty.is_vector() {
|
||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let rm = put_input_in_rs_immlogic(ctx, inputs[1], NarrowValueMode::None);
|
||||
let alu_op = match op {
|
||||
@ -528,7 +636,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
Opcode::Ishl | Opcode::Ushr | Opcode::Sshr => {
|
||||
let ty = ty.unwrap();
|
||||
let rd = get_output_reg(ctx, outputs[0]);
|
||||
if ty_bits(ty) < 128 {
|
||||
if !ty.is_vector() {
|
||||
let size = OperandSize::from_bits(ty_bits(ty));
|
||||
let narrow_mode = match (op, size) {
|
||||
(Opcode::Ishl, _) => NarrowValueMode::None,
|
||||
@ -1054,6 +1162,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
t,
|
||||
rd,
|
||||
rn: rd.to_reg(),
|
||||
high_half: false,
|
||||
});
|
||||
}
|
||||
}
|
||||
@ -1120,7 +1229,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
let mut r_addr = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let mut r_arg2 = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
|
||||
let ty_access = ty.unwrap();
|
||||
assert!(is_single_word_int_ty(ty_access));
|
||||
assert!(is_valid_atomic_transaction_ty(ty_access));
|
||||
let memflags = ctx.memflags(insn).expect("memory flags");
|
||||
let srcloc = if !memflags.notrap() {
|
||||
Some(ctx.srcloc(insn))
|
||||
@ -1136,7 +1245,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
ctx.emit(Inst::gen_move(Writable::from_reg(xreg(25)), r_addr, I64));
|
||||
ctx.emit(Inst::gen_move(Writable::from_reg(xreg(26)), r_arg2, I64));
|
||||
// Now the AtomicRMW insn itself
|
||||
let op = AtomicRMWOp::from(inst_atomic_rmw_op(ctx.data(insn)).unwrap());
|
||||
let op = inst_common::AtomicRmwOp::from(inst_atomic_rmw_op(ctx.data(insn)).unwrap());
|
||||
ctx.emit(Inst::AtomicRMW {
|
||||
ty: ty_access,
|
||||
op,
|
||||
@ -1156,7 +1265,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
let mut r_expected = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
|
||||
let mut r_replacement = put_input_in_reg(ctx, inputs[2], NarrowValueMode::None);
|
||||
let ty_access = ty.unwrap();
|
||||
assert!(is_single_word_int_ty(ty_access));
|
||||
assert!(is_valid_atomic_transaction_ty(ty_access));
|
||||
let memflags = ctx.memflags(insn).expect("memory flags");
|
||||
let srcloc = if !memflags.notrap() {
|
||||
Some(ctx.srcloc(insn))
|
||||
@ -1194,7 +1303,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
let r_data = get_output_reg(ctx, outputs[0]);
|
||||
let r_addr = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let ty_access = ty.unwrap();
|
||||
assert!(is_single_word_int_ty(ty_access));
|
||||
assert!(is_valid_atomic_transaction_ty(ty_access));
|
||||
let memflags = ctx.memflags(insn).expect("memory flags");
|
||||
let srcloc = if !memflags.notrap() {
|
||||
Some(ctx.srcloc(insn))
|
||||
@ -1213,7 +1322,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
let r_data = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let r_addr = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
|
||||
let ty_access = ctx.input_ty(insn, 0);
|
||||
assert!(is_single_word_int_ty(ty_access));
|
||||
assert!(is_valid_atomic_transaction_ty(ty_access));
|
||||
let memflags = ctx.memflags(insn).expect("memory flags");
|
||||
let srcloc = if !memflags.notrap() {
|
||||
Some(ctx.srcloc(insn))
|
||||
@ -1328,7 +1437,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
|
||||
Opcode::Bitselect | Opcode::Vselect => {
|
||||
let ty = ty.unwrap();
|
||||
if ty_bits(ty) < 128 {
|
||||
if !ty.is_vector() {
|
||||
debug_assert_ne!(Opcode::Vselect, op);
|
||||
let tmp = ctx.alloc_tmp(RegClass::I64, I64);
|
||||
let rd = get_output_reg(ctx, outputs[0]);
|
||||
@ -1591,7 +1700,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
};
|
||||
let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
|
||||
|
||||
if ty_bits(ty) < 128 {
|
||||
if !ty.is_vector() {
|
||||
let alu_op = choose_32_64(ty, ALUOp::SubS32, ALUOp::SubS64);
|
||||
let rm = put_input_in_rse_imm12(ctx, inputs[1], narrow_mode);
|
||||
ctx.emit(alu_inst_imm12(alu_op, writable_zero_reg(), rn, rm));
|
||||
@ -1611,7 +1720,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
|
||||
let rd = get_output_reg(ctx, outputs[0]);
|
||||
|
||||
if ty_bits(ty) < 128 {
|
||||
if !ty.is_vector() {
|
||||
match ty_bits(ty) {
|
||||
32 => {
|
||||
ctx.emit(Inst::FpuCmp32 { rn, rm });
|
||||
@ -2001,7 +2110,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
|
||||
let rd = get_output_reg(ctx, outputs[0]);
|
||||
if bits < 128 {
|
||||
if !ty.is_vector() {
|
||||
let fpu_op = match (op, bits) {
|
||||
(Opcode::Fadd, 32) => FPUOp2::Add32,
|
||||
(Opcode::Fadd, 64) => FPUOp2::Add64,
|
||||
@ -2044,7 +2153,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
let bits = ty_bits(ty);
|
||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let rd = get_output_reg(ctx, outputs[0]);
|
||||
if bits < 128 {
|
||||
if !ty.is_vector() {
|
||||
let fpu_op = match (op, bits) {
|
||||
(Opcode::Sqrt, 32) => FPUOp1::Sqrt32,
|
||||
(Opcode::Sqrt, 64) => FPUOp1::Sqrt64,
|
||||
@ -2157,12 +2266,12 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
let out_bits = ty_bits(ctx.output_ty(insn, 0));
|
||||
let signed = op == Opcode::FcvtToSint;
|
||||
let op = match (signed, in_bits, out_bits) {
|
||||
(false, 32, 32) => FpuToIntOp::F32ToU32,
|
||||
(true, 32, 32) => FpuToIntOp::F32ToI32,
|
||||
(false, 32, 8) | (false, 32, 16) | (false, 32, 32) => FpuToIntOp::F32ToU32,
|
||||
(true, 32, 8) | (true, 32, 16) | (true, 32, 32) => FpuToIntOp::F32ToI32,
|
||||
(false, 32, 64) => FpuToIntOp::F32ToU64,
|
||||
(true, 32, 64) => FpuToIntOp::F32ToI64,
|
||||
(false, 64, 32) => FpuToIntOp::F64ToU32,
|
||||
(true, 64, 32) => FpuToIntOp::F64ToI32,
|
||||
(false, 64, 8) | (false, 64, 16) | (false, 64, 32) => FpuToIntOp::F64ToU32,
|
||||
(true, 64, 8) | (true, 64, 16) | (true, 64, 32) => FpuToIntOp::F64ToI32,
|
||||
(false, 64, 64) => FpuToIntOp::F64ToU64,
|
||||
(true, 64, 64) => FpuToIntOp::F64ToI64,
|
||||
_ => panic!("Unknown input/output-bits combination"),
|
||||
@ -2199,6 +2308,16 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
if in_bits == 32 {
|
||||
// From float32.
|
||||
let (low_bound, low_cond, high_bound) = match (signed, out_bits) {
|
||||
(true, 8) => (
|
||||
i8::min_value() as f32 - 1.,
|
||||
FloatCC::GreaterThan,
|
||||
i8::max_value() as f32 + 1.,
|
||||
),
|
||||
(true, 16) => (
|
||||
i16::min_value() as f32 - 1.,
|
||||
FloatCC::GreaterThan,
|
||||
i16::max_value() as f32 + 1.,
|
||||
),
|
||||
(true, 32) => (
|
||||
i32::min_value() as f32, // I32_MIN - 1 isn't precisely representable as a f32.
|
||||
FloatCC::GreaterThanOrEqual,
|
||||
@ -2209,6 +2328,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
FloatCC::GreaterThanOrEqual,
|
||||
i64::max_value() as f32 + 1.,
|
||||
),
|
||||
(false, 8) => (-1., FloatCC::GreaterThan, u8::max_value() as f32 + 1.),
|
||||
(false, 16) => (-1., FloatCC::GreaterThan, u16::max_value() as f32 + 1.),
|
||||
(false, 32) => (-1., FloatCC::GreaterThan, u32::max_value() as f32 + 1.),
|
||||
(false, 64) => (-1., FloatCC::GreaterThan, u64::max_value() as f32 + 1.),
|
||||
_ => panic!("Unknown input/output-bits combination"),
|
||||
@ -2240,6 +2361,16 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
} else {
|
||||
// From float64.
|
||||
let (low_bound, low_cond, high_bound) = match (signed, out_bits) {
|
||||
(true, 8) => (
|
||||
i8::min_value() as f64 - 1.,
|
||||
FloatCC::GreaterThan,
|
||||
i8::max_value() as f64 + 1.,
|
||||
),
|
||||
(true, 16) => (
|
||||
i16::min_value() as f64 - 1.,
|
||||
FloatCC::GreaterThan,
|
||||
i16::max_value() as f64 + 1.,
|
||||
),
|
||||
(true, 32) => (
|
||||
i32::min_value() as f64 - 1.,
|
||||
FloatCC::GreaterThan,
|
||||
@ -2250,6 +2381,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
FloatCC::GreaterThanOrEqual,
|
||||
i64::max_value() as f64 + 1.,
|
||||
),
|
||||
(false, 8) => (-1., FloatCC::GreaterThan, u8::max_value() as f64 + 1.),
|
||||
(false, 16) => (-1., FloatCC::GreaterThan, u16::max_value() as f64 + 1.),
|
||||
(false, 32) => (-1., FloatCC::GreaterThan, u32::max_value() as f64 + 1.),
|
||||
(false, 64) => (-1., FloatCC::GreaterThan, u64::max_value() as f64 + 1.),
|
||||
_ => panic!("Unknown input/output-bits combination"),
|
||||
@ -2285,14 +2418,32 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
}
|
||||
|
||||
Opcode::FcvtFromUint | Opcode::FcvtFromSint => {
|
||||
let in_bits = ty_bits(ctx.input_ty(insn, 0));
|
||||
let out_bits = ty_bits(ctx.output_ty(insn, 0));
|
||||
let ty = ty.unwrap();
|
||||
let signed = op == Opcode::FcvtFromSint;
|
||||
let rd = get_output_reg(ctx, outputs[0]);
|
||||
|
||||
if ty.is_vector() {
|
||||
let op = if signed {
|
||||
VecMisc2::Scvtf
|
||||
} else {
|
||||
VecMisc2::Ucvtf
|
||||
};
|
||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
|
||||
ctx.emit(Inst::VecMisc {
|
||||
op,
|
||||
rd,
|
||||
rn,
|
||||
size: VectorSize::from_ty(ty),
|
||||
});
|
||||
} else {
|
||||
let in_bits = ty_bits(ctx.input_ty(insn, 0));
|
||||
let out_bits = ty_bits(ty);
|
||||
let op = match (signed, in_bits, out_bits) {
|
||||
(false, 32, 32) => IntToFpuOp::U32ToF32,
|
||||
(true, 32, 32) => IntToFpuOp::I32ToF32,
|
||||
(false, 32, 64) => IntToFpuOp::U32ToF64,
|
||||
(true, 32, 64) => IntToFpuOp::I32ToF64,
|
||||
(false, 8, 32) | (false, 16, 32) | (false, 32, 32) => IntToFpuOp::U32ToF32,
|
||||
(true, 8, 32) | (true, 16, 32) | (true, 32, 32) => IntToFpuOp::I32ToF32,
|
||||
(false, 8, 64) | (false, 16, 64) | (false, 32, 64) => IntToFpuOp::U32ToF64,
|
||||
(true, 8, 64) | (true, 16, 64) | (true, 32, 64) => IntToFpuOp::I32ToF64,
|
||||
(false, 64, 32) => IntToFpuOp::U64ToF32,
|
||||
(true, 64, 32) => IntToFpuOp::I64ToF32,
|
||||
(false, 64, 64) => IntToFpuOp::U64ToF64,
|
||||
@ -2300,26 +2451,40 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
_ => panic!("Unknown input/output-bits combination"),
|
||||
};
|
||||
let narrow_mode = match (signed, in_bits) {
|
||||
(false, 32) => NarrowValueMode::ZeroExtend32,
|
||||
(true, 32) => NarrowValueMode::SignExtend32,
|
||||
(false, 8) | (false, 16) | (false, 32) => NarrowValueMode::ZeroExtend32,
|
||||
(true, 8) | (true, 16) | (true, 32) => NarrowValueMode::SignExtend32,
|
||||
(false, 64) => NarrowValueMode::ZeroExtend64,
|
||||
(true, 64) => NarrowValueMode::SignExtend64,
|
||||
_ => panic!("Unknown input size"),
|
||||
};
|
||||
let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
|
||||
let rd = get_output_reg(ctx, outputs[0]);
|
||||
ctx.emit(Inst::IntToFpu { op, rd, rn });
|
||||
}
|
||||
}
|
||||
|
||||
Opcode::FcvtToUintSat | Opcode::FcvtToSintSat => {
|
||||
let in_ty = ctx.input_ty(insn, 0);
|
||||
let in_bits = ty_bits(in_ty);
|
||||
let out_ty = ctx.output_ty(insn, 0);
|
||||
let out_bits = ty_bits(out_ty);
|
||||
let ty = ty.unwrap();
|
||||
let out_signed = op == Opcode::FcvtToSintSat;
|
||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let rd = get_output_reg(ctx, outputs[0]);
|
||||
|
||||
if ty.is_vector() {
|
||||
let op = if out_signed {
|
||||
VecMisc2::Fcvtzs
|
||||
} else {
|
||||
VecMisc2::Fcvtzu
|
||||
};
|
||||
|
||||
ctx.emit(Inst::VecMisc {
|
||||
op,
|
||||
rd,
|
||||
rn,
|
||||
size: VectorSize::from_ty(ty),
|
||||
});
|
||||
} else {
|
||||
let in_ty = ctx.input_ty(insn, 0);
|
||||
let in_bits = ty_bits(in_ty);
|
||||
let out_bits = ty_bits(ty);
|
||||
// FIMM Vtmp1, u32::MAX or u64::MAX or i32::MAX or i64::MAX
|
||||
// FMIN Vtmp2, Vin, Vtmp1
|
||||
// FIMM Vtmp1, 0 or 0 or i32::MIN or i64::MIN
|
||||
@ -2433,6 +2598,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
rn: rtmp2.to_reg(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
Opcode::IaddIfcout => {
|
||||
// This is a two-output instruction that is needed for the
|
||||
@ -2560,12 +2726,62 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
});
|
||||
}
|
||||
|
||||
Opcode::Snarrow
|
||||
| Opcode::Unarrow
|
||||
| Opcode::SwidenLow
|
||||
| Opcode::SwidenHigh
|
||||
| Opcode::UwidenLow
|
||||
| Opcode::UwidenHigh => unimplemented!(),
|
||||
Opcode::Snarrow | Opcode::Unarrow => {
|
||||
let op = if op == Opcode::Snarrow {
|
||||
VecMiscNarrowOp::Sqxtn
|
||||
} else {
|
||||
VecMiscNarrowOp::Sqxtun
|
||||
};
|
||||
let rd = get_output_reg(ctx, outputs[0]);
|
||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let rn2 = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
|
||||
let ty = ty.unwrap();
|
||||
|
||||
ctx.emit(Inst::VecMiscNarrow {
|
||||
op,
|
||||
rd,
|
||||
rn,
|
||||
size: VectorSize::from_ty(ty),
|
||||
high_half: false,
|
||||
});
|
||||
ctx.emit(Inst::VecMiscNarrow {
|
||||
op,
|
||||
rd,
|
||||
rn: rn2,
|
||||
size: VectorSize::from_ty(ty),
|
||||
high_half: true,
|
||||
});
|
||||
}
|
||||
|
||||
Opcode::SwidenLow | Opcode::SwidenHigh | Opcode::UwidenLow | Opcode::UwidenHigh => {
|
||||
let lane_type = ty.unwrap().lane_type();
|
||||
let rd = get_output_reg(ctx, outputs[0]);
|
||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let (t, high_half) = match (lane_type, op) {
|
||||
(I16, Opcode::SwidenLow) => (VecExtendOp::Sxtl8, false),
|
||||
(I16, Opcode::SwidenHigh) => (VecExtendOp::Sxtl8, true),
|
||||
(I16, Opcode::UwidenLow) => (VecExtendOp::Uxtl8, false),
|
||||
(I16, Opcode::UwidenHigh) => (VecExtendOp::Uxtl8, true),
|
||||
(I32, Opcode::SwidenLow) => (VecExtendOp::Sxtl16, false),
|
||||
(I32, Opcode::SwidenHigh) => (VecExtendOp::Sxtl16, true),
|
||||
(I32, Opcode::UwidenLow) => (VecExtendOp::Uxtl16, false),
|
||||
(I32, Opcode::UwidenHigh) => (VecExtendOp::Uxtl16, true),
|
||||
_ => {
|
||||
return Err(CodegenError::Unsupported(format!(
|
||||
"Unsupported SIMD vector lane type: {:?}",
|
||||
lane_type
|
||||
)));
|
||||
}
|
||||
};
|
||||
|
||||
ctx.emit(Inst::VecExtend {
|
||||
t,
|
||||
rd,
|
||||
rn,
|
||||
high_half,
|
||||
});
|
||||
}
|
||||
|
||||
Opcode::TlsValue => unimplemented!(),
|
||||
}
|
||||
|
||||
|
@ -325,10 +325,13 @@ impl ABIBody for X64ABIBody {
|
||||
self.fp_to_arg_offset() + off <= u32::max_value() as i64,
|
||||
"large offset nyi"
|
||||
);
|
||||
load_stack(
|
||||
Amode::imm_reg((self.fp_to_arg_offset() + off) as u32, regs::rbp()),
|
||||
to_reg,
|
||||
let from_addr = Amode::imm_reg((self.fp_to_arg_offset() + off) as u32, regs::rbp());
|
||||
Inst::load(
|
||||
ty,
|
||||
from_addr,
|
||||
to_reg,
|
||||
ExtKind::ZeroExtend,
|
||||
/* infallible load */ None,
|
||||
)
|
||||
}
|
||||
}
|
||||
@ -420,8 +423,10 @@ impl ABIBody for X64ABIBody {
|
||||
"large stack return offset nyi"
|
||||
);
|
||||
|
||||
let mem = Amode::imm_reg(off as u32, self.ret_area_ptr.unwrap().to_reg());
|
||||
ret.push(store_stack(mem, from_reg.to_reg(), ty))
|
||||
let from_reg = from_reg.to_reg();
|
||||
let to_mem = Amode::imm_reg(off as u32, self.ret_area_ptr.unwrap().to_reg());
|
||||
let store = Inst::store(ty, from_reg, to_mem, /* infallible store */ None);
|
||||
ret.push(store)
|
||||
}
|
||||
}
|
||||
|
||||
@ -464,17 +469,20 @@ impl ABIBody for X64ABIBody {
|
||||
unimplemented!("store_stackslot")
|
||||
}
|
||||
|
||||
fn load_spillslot(&self, slot: SpillSlot, ty: Type, into_reg: Writable<Reg>) -> Inst {
|
||||
fn load_spillslot(&self, slot: SpillSlot, ty: Type, to_reg: Writable<Reg>) -> Inst {
|
||||
// Offset from beginning of spillslot area, which is at nominal-SP + stackslots_size.
|
||||
let islot = slot.get() as i64;
|
||||
let spill_off = islot * 8;
|
||||
let sp_off = self.stack_slots_size as i64 + spill_off;
|
||||
debug_assert!(sp_off <= u32::max_value() as i64, "large spill offsets NYI");
|
||||
trace!("load_spillslot: slot {:?} -> sp_off {}", slot, sp_off);
|
||||
load_stack(
|
||||
SyntheticAmode::nominal_sp_offset(sp_off as u32),
|
||||
into_reg,
|
||||
let from_addr = SyntheticAmode::nominal_sp_offset(sp_off as u32);
|
||||
Inst::load(
|
||||
ty,
|
||||
from_addr,
|
||||
to_reg,
|
||||
ExtKind::ZeroExtend,
|
||||
/* infallible load */ None,
|
||||
)
|
||||
}
|
||||
|
||||
@ -485,11 +493,8 @@ impl ABIBody for X64ABIBody {
|
||||
let sp_off = self.stack_slots_size as i64 + spill_off;
|
||||
debug_assert!(sp_off <= u32::max_value() as i64, "large spill offsets NYI");
|
||||
trace!("store_spillslot: slot {:?} -> sp_off {}", slot, sp_off);
|
||||
store_stack(
|
||||
SyntheticAmode::nominal_sp_offset(sp_off as u32),
|
||||
from_reg,
|
||||
ty,
|
||||
)
|
||||
let to_mem = SyntheticAmode::nominal_sp_offset(sp_off as u32);
|
||||
Inst::store(ty, from_reg, to_mem, /* infallible store */ None)
|
||||
}
|
||||
|
||||
fn spillslots_to_stack_map(&self, slots: &[SpillSlot], state: &EmitState) -> StackMap {
|
||||
@ -1003,66 +1008,6 @@ fn adjust_stack<C: LowerCtx<I = Inst>>(ctx: &mut C, amount: u64, is_sub: bool) {
|
||||
}
|
||||
}
|
||||
|
||||
fn load_stack(mem: impl Into<SyntheticAmode>, into_reg: Writable<Reg>, ty: Type) -> Inst {
|
||||
let (is_int, ext_mode) = match ty {
|
||||
types::B1 | types::B8 | types::I8 => (true, Some(ExtMode::BQ)),
|
||||
types::B16 | types::I16 => (true, Some(ExtMode::WQ)),
|
||||
types::B32 | types::I32 => (true, Some(ExtMode::LQ)),
|
||||
types::B64 | types::I64 | types::R64 => (true, None),
|
||||
types::F32 | types::F64 => (false, None),
|
||||
_ => panic!("load_stack({})", ty),
|
||||
};
|
||||
|
||||
let mem = mem.into();
|
||||
|
||||
if is_int {
|
||||
match ext_mode {
|
||||
Some(ext_mode) => Inst::movsx_rm_r(
|
||||
ext_mode,
|
||||
RegMem::mem(mem),
|
||||
into_reg,
|
||||
/* infallible load */ None,
|
||||
),
|
||||
None => Inst::mov64_m_r(mem, into_reg, None /* infallible */),
|
||||
}
|
||||
} else {
|
||||
let sse_op = match ty {
|
||||
types::F32 => SseOpcode::Movss,
|
||||
types::F64 => SseOpcode::Movsd,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
Inst::xmm_mov(
|
||||
sse_op,
|
||||
RegMem::mem(mem),
|
||||
into_reg,
|
||||
None, /* infallible */
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
fn store_stack(mem: impl Into<SyntheticAmode>, from_reg: Reg, ty: Type) -> Inst {
|
||||
let (is_int, size) = match ty {
|
||||
types::B1 | types::B8 | types::I8 => (true, 1),
|
||||
types::B16 | types::I16 => (true, 2),
|
||||
types::B32 | types::I32 => (true, 4),
|
||||
types::B64 | types::I64 | types::R64 => (true, 8),
|
||||
types::F32 => (false, 4),
|
||||
types::F64 => (false, 8),
|
||||
_ => unimplemented!("store_stack({})", ty),
|
||||
};
|
||||
let mem = mem.into();
|
||||
if is_int {
|
||||
Inst::mov_r_m(size, from_reg, mem, /* infallible store */ None)
|
||||
} else {
|
||||
let sse_op = match size {
|
||||
4 => SseOpcode::Movss,
|
||||
8 => SseOpcode::Movsd,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
Inst::xmm_mov_r_m(sse_op, from_reg, mem, /* infallible store */ None)
|
||||
}
|
||||
}
|
||||
|
||||
/// X64 ABI object for a function call.
|
||||
pub struct X64ABICall {
|
||||
sig: ABISig,
|
||||
@ -1212,11 +1157,9 @@ impl ABICall for X64ABICall {
|
||||
|
||||
debug_assert!(off <= u32::max_value() as i64);
|
||||
debug_assert!(off >= 0);
|
||||
ctx.emit(store_stack(
|
||||
Amode::imm_reg(off as u32, regs::rsp()),
|
||||
from_reg,
|
||||
ty,
|
||||
))
|
||||
let to_mem = Amode::imm_reg(off as u32, regs::rsp());
|
||||
let store = Inst::store(ty, from_reg, to_mem, /* infallible store */ None);
|
||||
ctx.emit(store)
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1225,21 +1168,25 @@ impl ABICall for X64ABICall {
|
||||
&self,
|
||||
ctx: &mut C,
|
||||
idx: usize,
|
||||
into_reg: Writable<Reg>,
|
||||
to_reg: Writable<Reg>,
|
||||
) {
|
||||
match &self.sig.rets[idx] {
|
||||
&ABIArg::Reg(reg, ty, _) => ctx.emit(Inst::gen_move(into_reg, reg.to_reg(), ty)),
|
||||
&ABIArg::Reg(reg, ty, _) => ctx.emit(Inst::gen_move(to_reg, reg.to_reg(), ty)),
|
||||
&ABIArg::Stack(off, ty, _) => {
|
||||
let ret_area_base = self.sig.stack_arg_space;
|
||||
let sp_offset = off + ret_area_base;
|
||||
// TODO handle offsets bigger than u32::max
|
||||
debug_assert!(sp_offset >= 0);
|
||||
debug_assert!(sp_offset <= u32::max_value() as i64);
|
||||
ctx.emit(load_stack(
|
||||
Amode::imm_reg(sp_offset as u32, regs::rsp()),
|
||||
into_reg,
|
||||
let from_addr = Amode::imm_reg(sp_offset as u32, regs::rsp());
|
||||
let load = Inst::load(
|
||||
ty,
|
||||
));
|
||||
from_addr,
|
||||
to_reg,
|
||||
ExtKind::ZeroExtend,
|
||||
/* infallible load */ None,
|
||||
);
|
||||
ctx.emit(load);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -380,6 +380,8 @@ pub enum SseOpcode {
|
||||
Movaps,
|
||||
Movapd,
|
||||
Movd,
|
||||
Movdqa,
|
||||
Movdqu,
|
||||
Movq,
|
||||
Movss,
|
||||
Movsd,
|
||||
@ -395,6 +397,9 @@ pub enum SseOpcode {
|
||||
Paddd,
|
||||
Paddq,
|
||||
Paddw,
|
||||
Pmulld,
|
||||
Pmullw,
|
||||
Pmuludq,
|
||||
Psllw,
|
||||
Pslld,
|
||||
Psllq,
|
||||
@ -484,6 +489,8 @@ impl SseOpcode {
|
||||
| SseOpcode::Movq
|
||||
| SseOpcode::Movsd
|
||||
| SseOpcode::Movupd
|
||||
| SseOpcode::Movdqa
|
||||
| SseOpcode::Movdqu
|
||||
| SseOpcode::Mulpd
|
||||
| SseOpcode::Mulsd
|
||||
| SseOpcode::Orpd
|
||||
@ -491,6 +498,8 @@ impl SseOpcode {
|
||||
| SseOpcode::Paddd
|
||||
| SseOpcode::Paddq
|
||||
| SseOpcode::Paddw
|
||||
| SseOpcode::Pmullw
|
||||
| SseOpcode::Pmuludq
|
||||
| SseOpcode::Psllw
|
||||
| SseOpcode::Pslld
|
||||
| SseOpcode::Psllq
|
||||
@ -510,7 +519,9 @@ impl SseOpcode {
|
||||
| SseOpcode::Ucomisd
|
||||
| SseOpcode::Xorpd => SSE2,
|
||||
|
||||
SseOpcode::Insertps | SseOpcode::Roundss | SseOpcode::Roundsd => SSE41,
|
||||
SseOpcode::Insertps | SseOpcode::Pmulld | SseOpcode::Roundss | SseOpcode::Roundsd => {
|
||||
SSE41
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -564,6 +575,8 @@ impl fmt::Debug for SseOpcode {
|
||||
SseOpcode::Movaps => "movaps",
|
||||
SseOpcode::Movapd => "movapd",
|
||||
SseOpcode::Movd => "movd",
|
||||
SseOpcode::Movdqa => "movdqa",
|
||||
SseOpcode::Movdqu => "movdqu",
|
||||
SseOpcode::Movq => "movq",
|
||||
SseOpcode::Movss => "movss",
|
||||
SseOpcode::Movsd => "movsd",
|
||||
@ -579,6 +592,9 @@ impl fmt::Debug for SseOpcode {
|
||||
SseOpcode::Paddd => "paddd",
|
||||
SseOpcode::Paddq => "paddq",
|
||||
SseOpcode::Paddw => "paddw",
|
||||
SseOpcode::Pmulld => "pmulld",
|
||||
SseOpcode::Pmullw => "pmullw",
|
||||
SseOpcode::Pmuludq => "pmuludq",
|
||||
SseOpcode::Psllw => "psllw",
|
||||
SseOpcode::Pslld => "pslld",
|
||||
SseOpcode::Psllq => "psllq",
|
||||
@ -618,6 +634,16 @@ impl fmt::Display for SseOpcode {
|
||||
}
|
||||
}
|
||||
|
||||
/// This defines the ways a value can be extended: either signed- or zero-extension, or none for
|
||||
/// types that are not extended. Contrast with [ExtMode], which defines the widths from and to which
|
||||
/// values can be extended.
|
||||
#[derive(Clone, PartialEq)]
|
||||
pub enum ExtKind {
|
||||
None,
|
||||
SignExtend,
|
||||
ZeroExtend,
|
||||
}
|
||||
|
||||
/// These indicate ways of extending (widening) a value, using the Intel
|
||||
/// naming: B(yte) = u8, W(ord) = u16, L(ong)word = u32, Q(uad)word = u64
|
||||
#[derive(Clone, PartialEq)]
|
||||
@ -823,7 +849,7 @@ impl CC {
|
||||
FloatCC::Ordered => CC::NP,
|
||||
FloatCC::Unordered => CC::P,
|
||||
// Alias for NE
|
||||
FloatCC::NotEqual | FloatCC::OrderedNotEqual => CC::NZ,
|
||||
FloatCC::OrderedNotEqual => CC::NZ,
|
||||
// Alias for E
|
||||
FloatCC::UnorderedOrEqual => CC::Z,
|
||||
// Alias for A
|
||||
@ -833,12 +859,14 @@ impl CC {
|
||||
FloatCC::UnorderedOrLessThan => CC::B,
|
||||
FloatCC::UnorderedOrLessThanOrEqual => CC::BE,
|
||||
FloatCC::Equal
|
||||
| FloatCC::NotEqual
|
||||
| FloatCC::LessThan
|
||||
| FloatCC::LessThanOrEqual
|
||||
| FloatCC::UnorderedOrGreaterThan
|
||||
| FloatCC::UnorderedOrGreaterThanOrEqual => {
|
||||
panic!("No single condition code to guarantee ordered. Treat as special case.")
|
||||
}
|
||||
| FloatCC::UnorderedOrGreaterThanOrEqual => panic!(
|
||||
"{:?} can't be lowered to a CC code; treat as special case.",
|
||||
floatcc
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
@ -984,3 +1012,14 @@ impl OperandSize {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// An x64 memory fence kind.
|
||||
#[derive(Clone)]
|
||||
pub enum FenceKind {
|
||||
/// `mfence` instruction ("Memory Fence")
|
||||
MFence,
|
||||
/// `lfence` instruction ("Load Fence")
|
||||
LFence,
|
||||
/// `sfence` instruction ("Store Fence")
|
||||
SFence,
|
||||
}
|
||||
|
@ -1,9 +1,9 @@
|
||||
use crate::binemit::Reloc;
|
||||
use crate::ir::immediates::{Ieee32, Ieee64};
|
||||
use crate::ir::{types, TrapCode};
|
||||
use crate::ir::TrapCode;
|
||||
use crate::isa::x64::inst::args::*;
|
||||
use crate::isa::x64::inst::*;
|
||||
use crate::machinst::{MachBuffer, MachInstEmit, MachLabel};
|
||||
use crate::machinst::{inst_common, MachBuffer, MachInstEmit, MachLabel};
|
||||
use core::convert::TryInto;
|
||||
use log::debug;
|
||||
use regalloc::{Reg, RegClass, Writable};
|
||||
@ -118,25 +118,38 @@ impl RexFlags {
|
||||
}
|
||||
}
|
||||
|
||||
/// For specifying the legacy prefixes (or `None` if no prefix required) to
|
||||
/// be used at the start an instruction. A given prefix may be required for
|
||||
/// various operations, including instructions that operate on GPR, SSE, and Vex
|
||||
/// registers.
|
||||
enum LegacyPrefix {
|
||||
/// We may need to include one or more legacy prefix bytes before the REX prefix. This enum
|
||||
/// covers only the small set of possibilities that we actually need.
|
||||
enum LegacyPrefixes {
|
||||
/// No prefix bytes
|
||||
None,
|
||||
/// Operand Size Override -- here, denoting "16-bit operation"
|
||||
_66,
|
||||
/// The Lock prefix
|
||||
_F0,
|
||||
/// Operand size override and Lock
|
||||
_66F0,
|
||||
/// REPNE, but no specific meaning here -- is just an opcode extension
|
||||
_F2,
|
||||
/// REP/REPE, but no specific meaning here -- is just an opcode extension
|
||||
_F3,
|
||||
}
|
||||
|
||||
impl LegacyPrefix {
|
||||
impl LegacyPrefixes {
|
||||
#[inline(always)]
|
||||
fn emit(&self, sink: &mut MachBuffer<Inst>) {
|
||||
match self {
|
||||
LegacyPrefix::_66 => sink.put1(0x66),
|
||||
LegacyPrefix::_F2 => sink.put1(0xF2),
|
||||
LegacyPrefix::_F3 => sink.put1(0xF3),
|
||||
LegacyPrefix::None => (),
|
||||
LegacyPrefixes::_66 => sink.put1(0x66),
|
||||
LegacyPrefixes::_F0 => sink.put1(0xF0),
|
||||
LegacyPrefixes::_66F0 => {
|
||||
// I don't think the order matters, but in any case, this is the same order that
|
||||
// the GNU assembler uses.
|
||||
sink.put1(0x66);
|
||||
sink.put1(0xF0);
|
||||
}
|
||||
LegacyPrefixes::_F2 => sink.put1(0xF2),
|
||||
LegacyPrefixes::_F3 => sink.put1(0xF3),
|
||||
LegacyPrefixes::None => (),
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -145,15 +158,16 @@ impl LegacyPrefix {
|
||||
///
|
||||
/// For an instruction that has as operands a reg encoding `enc_g` and a memory address `mem_e`,
|
||||
/// create and emit:
|
||||
/// - first the REX prefix,
|
||||
/// - first the legacy prefixes, if any
|
||||
/// - then the REX prefix, if needed
|
||||
/// - then caller-supplied opcode byte(s) (`opcodes` and `num_opcodes`),
|
||||
/// - then the MOD/RM byte,
|
||||
/// - then optionally, a SIB byte,
|
||||
/// - and finally optionally an immediate that will be derived from the `mem_e` operand.
|
||||
///
|
||||
/// For most instructions up to and including SSE4.2, that will be the whole instruction: this is
|
||||
/// what we call "standard" instructions, and abbreviate "std" in the name here. VEX instructions
|
||||
/// will require their own emitter functions.
|
||||
/// what we call "standard" instructions, and abbreviate "std" in the name here. VEX-prefixed
|
||||
/// instructions will require their own emitter functions.
|
||||
///
|
||||
/// This will also work for 32-bits x86 instructions, assuming no REX prefix is provided.
|
||||
///
|
||||
@ -168,7 +182,7 @@ impl LegacyPrefix {
|
||||
/// indicate a 64-bit operation.
|
||||
fn emit_std_enc_mem(
|
||||
sink: &mut MachBuffer<Inst>,
|
||||
prefix: LegacyPrefix,
|
||||
prefixes: LegacyPrefixes,
|
||||
opcodes: u32,
|
||||
mut num_opcodes: usize,
|
||||
enc_g: u8,
|
||||
@ -179,7 +193,7 @@ fn emit_std_enc_mem(
|
||||
// 64-bit integer registers, because they are part of an address
|
||||
// expression. But `enc_g` can be derived from a register of any class.
|
||||
|
||||
prefix.emit(sink);
|
||||
prefixes.emit(sink);
|
||||
|
||||
match mem_e {
|
||||
Amode::ImmReg { simm32, base } => {
|
||||
@ -304,7 +318,7 @@ fn emit_std_enc_mem(
|
||||
/// operand is a register rather than memory. Hence it is much simpler.
|
||||
fn emit_std_enc_enc(
|
||||
sink: &mut MachBuffer<Inst>,
|
||||
prefix: LegacyPrefix,
|
||||
prefixes: LegacyPrefixes,
|
||||
opcodes: u32,
|
||||
mut num_opcodes: usize,
|
||||
enc_g: u8,
|
||||
@ -316,8 +330,8 @@ fn emit_std_enc_enc(
|
||||
// integer-to-FP conversion insn, one might be RegClass::I64 and the other
|
||||
// RegClass::V128.
|
||||
|
||||
// The operand-size override.
|
||||
prefix.emit(sink);
|
||||
// The legacy prefixes.
|
||||
prefixes.emit(sink);
|
||||
|
||||
// The rex byte.
|
||||
rex.emit_two_op(sink, enc_g, enc_e);
|
||||
@ -338,7 +352,7 @@ fn emit_std_enc_enc(
|
||||
|
||||
fn emit_std_reg_mem(
|
||||
sink: &mut MachBuffer<Inst>,
|
||||
prefix: LegacyPrefix,
|
||||
prefixes: LegacyPrefixes,
|
||||
opcodes: u32,
|
||||
num_opcodes: usize,
|
||||
reg_g: Reg,
|
||||
@ -346,12 +360,12 @@ fn emit_std_reg_mem(
|
||||
rex: RexFlags,
|
||||
) {
|
||||
let enc_g = reg_enc(reg_g);
|
||||
emit_std_enc_mem(sink, prefix, opcodes, num_opcodes, enc_g, mem_e, rex);
|
||||
emit_std_enc_mem(sink, prefixes, opcodes, num_opcodes, enc_g, mem_e, rex);
|
||||
}
|
||||
|
||||
fn emit_std_reg_reg(
|
||||
sink: &mut MachBuffer<Inst>,
|
||||
prefix: LegacyPrefix,
|
||||
prefixes: LegacyPrefixes,
|
||||
opcodes: u32,
|
||||
num_opcodes: usize,
|
||||
reg_g: Reg,
|
||||
@ -360,7 +374,7 @@ fn emit_std_reg_reg(
|
||||
) {
|
||||
let enc_g = reg_enc(reg_g);
|
||||
let enc_e = reg_enc(reg_e);
|
||||
emit_std_enc_enc(sink, prefix, opcodes, num_opcodes, enc_g, enc_e, rex);
|
||||
emit_std_enc_enc(sink, prefixes, opcodes, num_opcodes, enc_g, enc_e, rex);
|
||||
}
|
||||
|
||||
/// Write a suitable number of bits from an imm64 to the sink.
|
||||
@ -481,7 +495,7 @@ pub(crate) fn emit(
|
||||
RegMemImm::Reg { reg: reg_e } => {
|
||||
emit_std_reg_reg(
|
||||
sink,
|
||||
LegacyPrefix::None,
|
||||
LegacyPrefixes::None,
|
||||
0x0FAF,
|
||||
2,
|
||||
reg_g.to_reg(),
|
||||
@ -493,7 +507,7 @@ pub(crate) fn emit(
|
||||
RegMemImm::Mem { addr } => {
|
||||
emit_std_reg_mem(
|
||||
sink,
|
||||
LegacyPrefix::None,
|
||||
LegacyPrefixes::None,
|
||||
0x0FAF,
|
||||
2,
|
||||
reg_g.to_reg(),
|
||||
@ -508,7 +522,7 @@ pub(crate) fn emit(
|
||||
// Yes, really, reg_g twice.
|
||||
emit_std_reg_reg(
|
||||
sink,
|
||||
LegacyPrefix::None,
|
||||
LegacyPrefixes::None,
|
||||
opcode,
|
||||
1,
|
||||
reg_g.to_reg(),
|
||||
@ -535,7 +549,7 @@ pub(crate) fn emit(
|
||||
// code easily.
|
||||
emit_std_reg_reg(
|
||||
sink,
|
||||
LegacyPrefix::None,
|
||||
LegacyPrefixes::None,
|
||||
opcode_r,
|
||||
1,
|
||||
*reg_e,
|
||||
@ -550,7 +564,7 @@ pub(crate) fn emit(
|
||||
// Here we revert to the "normal" G-E ordering.
|
||||
emit_std_reg_mem(
|
||||
sink,
|
||||
LegacyPrefix::None,
|
||||
LegacyPrefixes::None,
|
||||
opcode_m,
|
||||
1,
|
||||
reg_g.to_reg(),
|
||||
@ -566,7 +580,7 @@ pub(crate) fn emit(
|
||||
let enc_g = int_reg_enc(reg_g.to_reg());
|
||||
emit_std_enc_enc(
|
||||
sink,
|
||||
LegacyPrefix::None,
|
||||
LegacyPrefixes::None,
|
||||
opcode,
|
||||
1,
|
||||
subopcode_i,
|
||||
@ -581,9 +595,9 @@ pub(crate) fn emit(
|
||||
|
||||
Inst::UnaryRmR { size, op, src, dst } => {
|
||||
let (prefix, rex_flags) = match size {
|
||||
2 => (LegacyPrefix::_66, RexFlags::clear_w()),
|
||||
4 => (LegacyPrefix::None, RexFlags::clear_w()),
|
||||
8 => (LegacyPrefix::None, RexFlags::set_w()),
|
||||
2 => (LegacyPrefixes::_66, RexFlags::clear_w()),
|
||||
4 => (LegacyPrefixes::None, RexFlags::clear_w()),
|
||||
8 => (LegacyPrefixes::None, RexFlags::set_w()),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
@ -621,9 +635,9 @@ pub(crate) fn emit(
|
||||
loc,
|
||||
} => {
|
||||
let (prefix, rex_flags) = match size {
|
||||
2 => (LegacyPrefix::_66, RexFlags::clear_w()),
|
||||
4 => (LegacyPrefix::None, RexFlags::clear_w()),
|
||||
8 => (LegacyPrefix::None, RexFlags::set_w()),
|
||||
2 => (LegacyPrefixes::_66, RexFlags::clear_w()),
|
||||
4 => (LegacyPrefixes::None, RexFlags::clear_w()),
|
||||
8 => (LegacyPrefixes::None, RexFlags::set_w()),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
@ -649,9 +663,9 @@ pub(crate) fn emit(
|
||||
|
||||
Inst::MulHi { size, signed, rhs } => {
|
||||
let (prefix, rex_flags) = match size {
|
||||
2 => (LegacyPrefix::_66, RexFlags::clear_w()),
|
||||
4 => (LegacyPrefix::None, RexFlags::clear_w()),
|
||||
8 => (LegacyPrefix::None, RexFlags::set_w()),
|
||||
2 => (LegacyPrefixes::_66, RexFlags::clear_w()),
|
||||
4 => (LegacyPrefixes::None, RexFlags::clear_w()),
|
||||
8 => (LegacyPrefixes::None, RexFlags::set_w()),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
@ -826,7 +840,7 @@ pub(crate) fn emit(
|
||||
} else {
|
||||
RexFlags::clear_w()
|
||||
};
|
||||
emit_std_reg_reg(sink, LegacyPrefix::None, 0x89, 1, *src, dst.to_reg(), rex);
|
||||
emit_std_reg_reg(sink, LegacyPrefixes::None, 0x89, 1, *src, dst.to_reg(), rex);
|
||||
}
|
||||
|
||||
Inst::MovZX_RM_R {
|
||||
@ -880,7 +894,7 @@ pub(crate) fn emit(
|
||||
}
|
||||
emit_std_reg_reg(
|
||||
sink,
|
||||
LegacyPrefix::None,
|
||||
LegacyPrefixes::None,
|
||||
opcodes,
|
||||
num_opcodes,
|
||||
dst.to_reg(),
|
||||
@ -899,7 +913,7 @@ pub(crate) fn emit(
|
||||
|
||||
emit_std_reg_mem(
|
||||
sink,
|
||||
LegacyPrefix::None,
|
||||
LegacyPrefixes::None,
|
||||
opcodes,
|
||||
num_opcodes,
|
||||
dst.to_reg(),
|
||||
@ -920,7 +934,7 @@ pub(crate) fn emit(
|
||||
|
||||
emit_std_reg_mem(
|
||||
sink,
|
||||
LegacyPrefix::None,
|
||||
LegacyPrefixes::None,
|
||||
0x8B,
|
||||
1,
|
||||
dst.to_reg(),
|
||||
@ -931,7 +945,7 @@ pub(crate) fn emit(
|
||||
|
||||
Inst::LoadEffectiveAddress { addr, dst } => emit_std_reg_mem(
|
||||
sink,
|
||||
LegacyPrefix::None,
|
||||
LegacyPrefixes::None,
|
||||
0x8D,
|
||||
1,
|
||||
dst.to_reg(),
|
||||
@ -982,7 +996,7 @@ pub(crate) fn emit(
|
||||
}
|
||||
emit_std_reg_reg(
|
||||
sink,
|
||||
LegacyPrefix::None,
|
||||
LegacyPrefixes::None,
|
||||
opcodes,
|
||||
num_opcodes,
|
||||
dst.to_reg(),
|
||||
@ -1001,7 +1015,7 @@ pub(crate) fn emit(
|
||||
|
||||
emit_std_reg_mem(
|
||||
sink,
|
||||
LegacyPrefix::None,
|
||||
LegacyPrefixes::None,
|
||||
opcodes,
|
||||
num_opcodes,
|
||||
dst.to_reg(),
|
||||
@ -1038,14 +1052,14 @@ pub(crate) fn emit(
|
||||
};
|
||||
|
||||
// MOV r8, r/m8 is (REX.W==0) 88 /r
|
||||
emit_std_reg_mem(sink, LegacyPrefix::None, 0x88, 1, *src, dst, rex)
|
||||
emit_std_reg_mem(sink, LegacyPrefixes::None, 0x88, 1, *src, dst, rex)
|
||||
}
|
||||
|
||||
2 => {
|
||||
// MOV r16, r/m16 is 66 (REX.W==0) 89 /r
|
||||
emit_std_reg_mem(
|
||||
sink,
|
||||
LegacyPrefix::_66,
|
||||
LegacyPrefixes::_66,
|
||||
0x89,
|
||||
1,
|
||||
*src,
|
||||
@ -1058,7 +1072,7 @@ pub(crate) fn emit(
|
||||
// MOV r32, r/m32 is (REX.W==0) 89 /r
|
||||
emit_std_reg_mem(
|
||||
sink,
|
||||
LegacyPrefix::None,
|
||||
LegacyPrefixes::None,
|
||||
0x89,
|
||||
1,
|
||||
*src,
|
||||
@ -1071,7 +1085,7 @@ pub(crate) fn emit(
|
||||
// MOV r64, r/m64 is (REX.W==1) 89 /r
|
||||
emit_std_reg_mem(
|
||||
sink,
|
||||
LegacyPrefix::None,
|
||||
LegacyPrefixes::None,
|
||||
0x89,
|
||||
1,
|
||||
*src,
|
||||
@ -1109,7 +1123,7 @@ pub(crate) fn emit(
|
||||
None => {
|
||||
// SHL/SHR/SAR %cl, reg32 is (REX.W==0) D3 /subopcode
|
||||
// SHL/SHR/SAR %cl, reg64 is (REX.W==1) D3 /subopcode
|
||||
emit_std_enc_enc(sink, LegacyPrefix::None, 0xD3, 1, subopcode, enc_dst, rex);
|
||||
emit_std_enc_enc(sink, LegacyPrefixes::None, 0xD3, 1, subopcode, enc_dst, rex);
|
||||
}
|
||||
|
||||
Some(num_bits) => {
|
||||
@ -1117,7 +1131,7 @@ pub(crate) fn emit(
|
||||
// SHL/SHR/SAR $ib, reg64 is (REX.W==1) C1 /subopcode ib
|
||||
// When the shift amount is 1, there's an even shorter encoding, but we don't
|
||||
// bother with that nicety here.
|
||||
emit_std_enc_enc(sink, LegacyPrefix::None, 0xC1, 1, subopcode, enc_dst, rex);
|
||||
emit_std_enc_enc(sink, LegacyPrefixes::None, 0xC1, 1, subopcode, enc_dst, rex);
|
||||
sink.put1(*num_bits);
|
||||
}
|
||||
}
|
||||
@ -1125,7 +1139,7 @@ pub(crate) fn emit(
|
||||
|
||||
Inst::XmmRmiReg { opcode, src, dst } => {
|
||||
let rex = RexFlags::clear_w();
|
||||
let prefix = LegacyPrefix::_66;
|
||||
let prefix = LegacyPrefixes::_66;
|
||||
if let RegMemImm::Imm { simm32 } = src {
|
||||
let (opcode_bytes, reg_digit) = match opcode {
|
||||
SseOpcode::Psllw => (0x0F71, 6),
|
||||
@ -1175,9 +1189,9 @@ pub(crate) fn emit(
|
||||
src: src_e,
|
||||
dst: reg_g,
|
||||
} => {
|
||||
let mut prefix = LegacyPrefix::None;
|
||||
let mut prefix = LegacyPrefixes::None;
|
||||
if *size == 2 {
|
||||
prefix = LegacyPrefix::_66;
|
||||
prefix = LegacyPrefixes::_66;
|
||||
}
|
||||
|
||||
let mut rex = match size {
|
||||
@ -1245,7 +1259,7 @@ pub(crate) fn emit(
|
||||
rex_flags.always_emit();
|
||||
emit_std_enc_enc(
|
||||
sink,
|
||||
LegacyPrefix::None,
|
||||
LegacyPrefixes::None,
|
||||
opcode,
|
||||
2,
|
||||
0,
|
||||
@ -1261,9 +1275,9 @@ pub(crate) fn emit(
|
||||
dst: reg_g,
|
||||
} => {
|
||||
let (prefix, rex_flags) = match size {
|
||||
2 => (LegacyPrefix::_66, RexFlags::clear_w()),
|
||||
4 => (LegacyPrefix::None, RexFlags::clear_w()),
|
||||
8 => (LegacyPrefix::None, RexFlags::set_w()),
|
||||
2 => (LegacyPrefixes::_66, RexFlags::clear_w()),
|
||||
4 => (LegacyPrefixes::None, RexFlags::clear_w()),
|
||||
8 => (LegacyPrefixes::None, RexFlags::set_w()),
|
||||
_ => unreachable!("invalid size spec for cmove"),
|
||||
};
|
||||
let opcode = 0x0F40 + cc.get_enc() as u32;
|
||||
@ -1284,6 +1298,8 @@ pub(crate) fn emit(
|
||||
src,
|
||||
dst,
|
||||
} => {
|
||||
// Lowering of the Select IR opcode when the input is an fcmp relies on the fact that
|
||||
// this doesn't clobber flags. Make sure to not do so here.
|
||||
let next = sink.get_label();
|
||||
|
||||
// Jump if cc is *not* set.
|
||||
@ -1315,7 +1331,7 @@ pub(crate) fn emit(
|
||||
let addr = &addr.finalize(state);
|
||||
emit_std_enc_mem(
|
||||
sink,
|
||||
LegacyPrefix::None,
|
||||
LegacyPrefixes::None,
|
||||
0xFF,
|
||||
1,
|
||||
6, /*subopcode*/
|
||||
@ -1371,7 +1387,7 @@ pub(crate) fn emit(
|
||||
let reg_enc = int_reg_enc(*reg);
|
||||
emit_std_enc_enc(
|
||||
sink,
|
||||
LegacyPrefix::None,
|
||||
LegacyPrefixes::None,
|
||||
0xFF,
|
||||
1,
|
||||
2, /*subopcode*/
|
||||
@ -1384,7 +1400,7 @@ pub(crate) fn emit(
|
||||
let addr = &addr.finalize(state);
|
||||
emit_std_enc_mem(
|
||||
sink,
|
||||
LegacyPrefix::None,
|
||||
LegacyPrefixes::None,
|
||||
0xFF,
|
||||
1,
|
||||
2, /*subopcode*/
|
||||
@ -1418,6 +1434,21 @@ pub(crate) fn emit(
|
||||
sink.put4(disp);
|
||||
}
|
||||
|
||||
Inst::JmpIf { cc, taken } => {
|
||||
let cond_start = sink.cur_offset();
|
||||
let cond_disp_off = cond_start + 2;
|
||||
if let Some(l) = taken.as_label() {
|
||||
sink.use_label_at_offset(cond_disp_off, l, LabelUse::JmpRel32);
|
||||
// Since this is not a terminator, don't enroll in the branch inversion mechanism.
|
||||
}
|
||||
|
||||
let taken_disp = taken.as_offset32_or_zero();
|
||||
let taken_disp = taken_disp as u32;
|
||||
sink.put1(0x0F);
|
||||
sink.put1(0x80 + cc.get_enc());
|
||||
sink.put4(taken_disp);
|
||||
}
|
||||
|
||||
Inst::JmpCond {
|
||||
cc,
|
||||
taken,
|
||||
@ -1461,7 +1492,7 @@ pub(crate) fn emit(
|
||||
let reg_enc = int_reg_enc(*reg);
|
||||
emit_std_enc_enc(
|
||||
sink,
|
||||
LegacyPrefix::None,
|
||||
LegacyPrefixes::None,
|
||||
0xFF,
|
||||
1,
|
||||
4, /*subopcode*/
|
||||
@ -1474,7 +1505,7 @@ pub(crate) fn emit(
|
||||
let addr = &addr.finalize(state);
|
||||
emit_std_enc_mem(
|
||||
sink,
|
||||
LegacyPrefix::None,
|
||||
LegacyPrefixes::None,
|
||||
0xFF,
|
||||
1,
|
||||
4, /*subopcode*/
|
||||
@ -1596,18 +1627,20 @@ pub(crate) fn emit(
|
||||
let rex = RexFlags::clear_w();
|
||||
|
||||
let (prefix, opcode) = match op {
|
||||
SseOpcode::Movaps => (LegacyPrefix::None, 0x0F28),
|
||||
SseOpcode::Movapd => (LegacyPrefix::_66, 0x0F28),
|
||||
SseOpcode::Movsd => (LegacyPrefix::_F2, 0x0F10),
|
||||
SseOpcode::Movss => (LegacyPrefix::_F3, 0x0F10),
|
||||
SseOpcode::Movups => (LegacyPrefix::None, 0x0F10),
|
||||
SseOpcode::Movupd => (LegacyPrefix::_66, 0x0F10),
|
||||
SseOpcode::Sqrtps => (LegacyPrefix::None, 0x0F51),
|
||||
SseOpcode::Sqrtpd => (LegacyPrefix::_66, 0x0F51),
|
||||
SseOpcode::Sqrtss => (LegacyPrefix::_F3, 0x0F51),
|
||||
SseOpcode::Sqrtsd => (LegacyPrefix::_F2, 0x0F51),
|
||||
SseOpcode::Cvtss2sd => (LegacyPrefix::_F3, 0x0F5A),
|
||||
SseOpcode::Cvtsd2ss => (LegacyPrefix::_F2, 0x0F5A),
|
||||
SseOpcode::Cvtss2sd => (LegacyPrefixes::_F3, 0x0F5A),
|
||||
SseOpcode::Cvtsd2ss => (LegacyPrefixes::_F2, 0x0F5A),
|
||||
SseOpcode::Movaps => (LegacyPrefixes::None, 0x0F28),
|
||||
SseOpcode::Movapd => (LegacyPrefixes::_66, 0x0F28),
|
||||
SseOpcode::Movdqa => (LegacyPrefixes::_66, 0x0F6F),
|
||||
SseOpcode::Movdqu => (LegacyPrefixes::_F3, 0x0F6F),
|
||||
SseOpcode::Movsd => (LegacyPrefixes::_F2, 0x0F10),
|
||||
SseOpcode::Movss => (LegacyPrefixes::_F3, 0x0F10),
|
||||
SseOpcode::Movups => (LegacyPrefixes::None, 0x0F10),
|
||||
SseOpcode::Movupd => (LegacyPrefixes::_66, 0x0F10),
|
||||
SseOpcode::Sqrtps => (LegacyPrefixes::None, 0x0F51),
|
||||
SseOpcode::Sqrtpd => (LegacyPrefixes::_66, 0x0F51),
|
||||
SseOpcode::Sqrtss => (LegacyPrefixes::_F3, 0x0F51),
|
||||
SseOpcode::Sqrtsd => (LegacyPrefixes::_F2, 0x0F51),
|
||||
_ => unimplemented!("Opcode {:?} not implemented", op),
|
||||
};
|
||||
|
||||
@ -1632,57 +1665,60 @@ pub(crate) fn emit(
|
||||
dst: reg_g,
|
||||
} => {
|
||||
let rex = RexFlags::clear_w();
|
||||
let (prefix, opcode) = match op {
|
||||
SseOpcode::Addps => (LegacyPrefix::None, 0x0F58),
|
||||
SseOpcode::Addpd => (LegacyPrefix::_66, 0x0F58),
|
||||
SseOpcode::Addss => (LegacyPrefix::_F3, 0x0F58),
|
||||
SseOpcode::Addsd => (LegacyPrefix::_F2, 0x0F58),
|
||||
SseOpcode::Andpd => (LegacyPrefix::_66, 0x0F54),
|
||||
SseOpcode::Andps => (LegacyPrefix::None, 0x0F54),
|
||||
SseOpcode::Andnps => (LegacyPrefix::None, 0x0F55),
|
||||
SseOpcode::Andnpd => (LegacyPrefix::_66, 0x0F55),
|
||||
SseOpcode::Divps => (LegacyPrefix::None, 0x0F5E),
|
||||
SseOpcode::Divpd => (LegacyPrefix::_66, 0x0F5E),
|
||||
SseOpcode::Divss => (LegacyPrefix::_F3, 0x0F5E),
|
||||
SseOpcode::Divsd => (LegacyPrefix::_F2, 0x0F5E),
|
||||
SseOpcode::Minps => (LegacyPrefix::None, 0x0F5D),
|
||||
SseOpcode::Minpd => (LegacyPrefix::_66, 0x0F5D),
|
||||
SseOpcode::Minss => (LegacyPrefix::_F3, 0x0F5D),
|
||||
SseOpcode::Minsd => (LegacyPrefix::_F2, 0x0F5D),
|
||||
SseOpcode::Maxps => (LegacyPrefix::None, 0x0F5F),
|
||||
SseOpcode::Maxpd => (LegacyPrefix::_66, 0x0F5F),
|
||||
SseOpcode::Maxss => (LegacyPrefix::_F3, 0x0F5F),
|
||||
SseOpcode::Maxsd => (LegacyPrefix::_F2, 0x0F5F),
|
||||
SseOpcode::Mulps => (LegacyPrefix::None, 0x0F59),
|
||||
SseOpcode::Mulpd => (LegacyPrefix::_66, 0x0F59),
|
||||
SseOpcode::Mulss => (LegacyPrefix::_F3, 0x0F59),
|
||||
SseOpcode::Mulsd => (LegacyPrefix::_F2, 0x0F59),
|
||||
SseOpcode::Orpd => (LegacyPrefix::_66, 0x0F56),
|
||||
SseOpcode::Orps => (LegacyPrefix::None, 0x0F56),
|
||||
SseOpcode::Paddb => (LegacyPrefix::_66, 0x0FFC),
|
||||
SseOpcode::Paddd => (LegacyPrefix::_66, 0x0FFE),
|
||||
SseOpcode::Paddq => (LegacyPrefix::_66, 0x0FD4),
|
||||
SseOpcode::Paddw => (LegacyPrefix::_66, 0x0FFD),
|
||||
SseOpcode::Psubb => (LegacyPrefix::_66, 0x0FF8),
|
||||
SseOpcode::Psubd => (LegacyPrefix::_66, 0x0FFA),
|
||||
SseOpcode::Psubq => (LegacyPrefix::_66, 0x0FFB),
|
||||
SseOpcode::Psubw => (LegacyPrefix::_66, 0x0FF9),
|
||||
SseOpcode::Subps => (LegacyPrefix::None, 0x0F5C),
|
||||
SseOpcode::Subpd => (LegacyPrefix::_66, 0x0F5C),
|
||||
SseOpcode::Subss => (LegacyPrefix::_F3, 0x0F5C),
|
||||
SseOpcode::Subsd => (LegacyPrefix::_F2, 0x0F5C),
|
||||
SseOpcode::Xorps => (LegacyPrefix::None, 0x0F57),
|
||||
SseOpcode::Xorpd => (LegacyPrefix::_66, 0x0F57),
|
||||
let (prefix, opcode, length) = match op {
|
||||
SseOpcode::Addps => (LegacyPrefixes::None, 0x0F58, 2),
|
||||
SseOpcode::Addpd => (LegacyPrefixes::_66, 0x0F58, 2),
|
||||
SseOpcode::Addss => (LegacyPrefixes::_F3, 0x0F58, 2),
|
||||
SseOpcode::Addsd => (LegacyPrefixes::_F2, 0x0F58, 2),
|
||||
SseOpcode::Andpd => (LegacyPrefixes::_66, 0x0F54, 2),
|
||||
SseOpcode::Andps => (LegacyPrefixes::None, 0x0F54, 2),
|
||||
SseOpcode::Andnps => (LegacyPrefixes::None, 0x0F55, 2),
|
||||
SseOpcode::Andnpd => (LegacyPrefixes::_66, 0x0F55, 2),
|
||||
SseOpcode::Divps => (LegacyPrefixes::None, 0x0F5E, 2),
|
||||
SseOpcode::Divpd => (LegacyPrefixes::_66, 0x0F5E, 2),
|
||||
SseOpcode::Divss => (LegacyPrefixes::_F3, 0x0F5E, 2),
|
||||
SseOpcode::Divsd => (LegacyPrefixes::_F2, 0x0F5E, 2),
|
||||
SseOpcode::Minps => (LegacyPrefixes::None, 0x0F5D, 2),
|
||||
SseOpcode::Minpd => (LegacyPrefixes::_66, 0x0F5D, 2),
|
||||
SseOpcode::Minss => (LegacyPrefixes::_F3, 0x0F5D, 2),
|
||||
SseOpcode::Minsd => (LegacyPrefixes::_F2, 0x0F5D, 2),
|
||||
SseOpcode::Maxps => (LegacyPrefixes::None, 0x0F5F, 2),
|
||||
SseOpcode::Maxpd => (LegacyPrefixes::_66, 0x0F5F, 2),
|
||||
SseOpcode::Maxss => (LegacyPrefixes::_F3, 0x0F5F, 2),
|
||||
SseOpcode::Maxsd => (LegacyPrefixes::_F2, 0x0F5F, 2),
|
||||
SseOpcode::Mulps => (LegacyPrefixes::None, 0x0F59, 2),
|
||||
SseOpcode::Mulpd => (LegacyPrefixes::_66, 0x0F59, 2),
|
||||
SseOpcode::Mulss => (LegacyPrefixes::_F3, 0x0F59, 2),
|
||||
SseOpcode::Mulsd => (LegacyPrefixes::_F2, 0x0F59, 2),
|
||||
SseOpcode::Orpd => (LegacyPrefixes::_66, 0x0F56, 2),
|
||||
SseOpcode::Orps => (LegacyPrefixes::None, 0x0F56, 2),
|
||||
SseOpcode::Paddb => (LegacyPrefixes::_66, 0x0FFC, 2),
|
||||
SseOpcode::Paddd => (LegacyPrefixes::_66, 0x0FFE, 2),
|
||||
SseOpcode::Paddq => (LegacyPrefixes::_66, 0x0FD4, 2),
|
||||
SseOpcode::Paddw => (LegacyPrefixes::_66, 0x0FFD, 2),
|
||||
SseOpcode::Pmulld => (LegacyPrefixes::_66, 0x0F3840, 3),
|
||||
SseOpcode::Pmullw => (LegacyPrefixes::_66, 0x0FD5, 2),
|
||||
SseOpcode::Pmuludq => (LegacyPrefixes::_66, 0x0FF4, 2),
|
||||
SseOpcode::Psubb => (LegacyPrefixes::_66, 0x0FF8, 2),
|
||||
SseOpcode::Psubd => (LegacyPrefixes::_66, 0x0FFA, 2),
|
||||
SseOpcode::Psubq => (LegacyPrefixes::_66, 0x0FFB, 2),
|
||||
SseOpcode::Psubw => (LegacyPrefixes::_66, 0x0FF9, 2),
|
||||
SseOpcode::Subps => (LegacyPrefixes::None, 0x0F5C, 2),
|
||||
SseOpcode::Subpd => (LegacyPrefixes::_66, 0x0F5C, 2),
|
||||
SseOpcode::Subss => (LegacyPrefixes::_F3, 0x0F5C, 2),
|
||||
SseOpcode::Subsd => (LegacyPrefixes::_F2, 0x0F5C, 2),
|
||||
SseOpcode::Xorps => (LegacyPrefixes::None, 0x0F57, 2),
|
||||
SseOpcode::Xorpd => (LegacyPrefixes::_66, 0x0F57, 2),
|
||||
_ => unimplemented!("Opcode {:?} not implemented", op),
|
||||
};
|
||||
|
||||
match src_e {
|
||||
RegMem::Reg { reg: reg_e } => {
|
||||
emit_std_reg_reg(sink, prefix, opcode, 2, reg_g.to_reg(), *reg_e, rex);
|
||||
emit_std_reg_reg(sink, prefix, opcode, length, reg_g.to_reg(), *reg_e, rex);
|
||||
}
|
||||
RegMem::Mem { addr } => {
|
||||
let addr = &addr.finalize(state);
|
||||
emit_std_reg_mem(sink, prefix, opcode, 2, reg_g.to_reg(), addr, rex);
|
||||
emit_std_reg_mem(sink, prefix, opcode, length, reg_g.to_reg(), addr, rex);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1775,10 +1811,10 @@ pub(crate) fn emit(
|
||||
|
||||
Inst::XmmRmRImm { op, src, dst, imm } => {
|
||||
let prefix = match op {
|
||||
SseOpcode::Cmpps => LegacyPrefix::None,
|
||||
SseOpcode::Cmppd => LegacyPrefix::_66,
|
||||
SseOpcode::Cmpss => LegacyPrefix::_F3,
|
||||
SseOpcode::Cmpsd => LegacyPrefix::_F2,
|
||||
SseOpcode::Cmpps => LegacyPrefixes::None,
|
||||
SseOpcode::Cmppd => LegacyPrefixes::_66,
|
||||
SseOpcode::Cmpss => LegacyPrefixes::_F3,
|
||||
SseOpcode::Cmpsd => LegacyPrefixes::_F2,
|
||||
_ => unimplemented!("Opcode {:?} not implemented", op),
|
||||
};
|
||||
let opcode = 0x0FC2;
|
||||
@ -1802,17 +1838,9 @@ pub(crate) fn emit(
|
||||
// "constant inline" code should be replaced by constant pool integration.
|
||||
|
||||
// Load the inline constant.
|
||||
let opcode = match *ty {
|
||||
types::F32X4 => SseOpcode::Movups,
|
||||
types::F64X2 => SseOpcode::Movupd,
|
||||
types::I8X16 => SseOpcode::Movupd, // TODO replace with MOVDQU
|
||||
_ => unimplemented!("cannot yet load constants for type: {}", ty),
|
||||
};
|
||||
let constant_start_label = sink.get_label();
|
||||
let load_offset = RegMem::mem(Amode::rip_relative(BranchTarget::Label(
|
||||
constant_start_label,
|
||||
)));
|
||||
let load = Inst::xmm_unary_rm_r(opcode, load_offset, *dst);
|
||||
let load_offset = Amode::rip_relative(BranchTarget::Label(constant_start_label));
|
||||
let load = Inst::load(*ty, load_offset, *dst, ExtKind::None, None);
|
||||
load.emit(sink, flags, state);
|
||||
|
||||
// Jump over the constant.
|
||||
@ -1836,10 +1864,14 @@ pub(crate) fn emit(
|
||||
srcloc,
|
||||
} => {
|
||||
let (prefix, opcode) = match op {
|
||||
SseOpcode::Movss => (LegacyPrefix::_F3, 0x0F11),
|
||||
SseOpcode::Movsd => (LegacyPrefix::_F2, 0x0F11),
|
||||
SseOpcode::Movaps => (LegacyPrefix::None, 0x0F29),
|
||||
SseOpcode::Movups => (LegacyPrefix::None, 0x0F11),
|
||||
SseOpcode::Movaps => (LegacyPrefixes::None, 0x0F29),
|
||||
SseOpcode::Movapd => (LegacyPrefixes::_66, 0x0F29),
|
||||
SseOpcode::Movdqa => (LegacyPrefixes::_66, 0x0F7F),
|
||||
SseOpcode::Movdqu => (LegacyPrefixes::_F3, 0x0F7F),
|
||||
SseOpcode::Movss => (LegacyPrefixes::_F3, 0x0F11),
|
||||
SseOpcode::Movsd => (LegacyPrefixes::_F2, 0x0F11),
|
||||
SseOpcode::Movups => (LegacyPrefixes::None, 0x0F11),
|
||||
SseOpcode::Movupd => (LegacyPrefixes::_66, 0x0F11),
|
||||
_ => unimplemented!("Opcode {:?} not implemented", op),
|
||||
};
|
||||
let dst = &dst.finalize(state);
|
||||
@ -1859,9 +1891,9 @@ pub(crate) fn emit(
|
||||
let (prefix, opcode, dst_first) = match op {
|
||||
// Movd and movq use the same opcode; the presence of the REX prefix (set below)
|
||||
// actually determines which is used.
|
||||
SseOpcode::Movd | SseOpcode::Movq => (LegacyPrefix::_66, 0x0F7E, false),
|
||||
SseOpcode::Cvttss2si => (LegacyPrefix::_F3, 0x0F2C, true),
|
||||
SseOpcode::Cvttsd2si => (LegacyPrefix::_F2, 0x0F2C, true),
|
||||
SseOpcode::Movd | SseOpcode::Movq => (LegacyPrefixes::_66, 0x0F7E, false),
|
||||
SseOpcode::Cvttss2si => (LegacyPrefixes::_F3, 0x0F2C, true),
|
||||
SseOpcode::Cvttsd2si => (LegacyPrefixes::_F2, 0x0F2C, true),
|
||||
_ => panic!("unexpected opcode {:?}", op),
|
||||
};
|
||||
let rex = match dst_size {
|
||||
@ -1887,9 +1919,9 @@ pub(crate) fn emit(
|
||||
let (prefix, opcode) = match op {
|
||||
// Movd and movq use the same opcode; the presence of the REX prefix (set below)
|
||||
// actually determines which is used.
|
||||
SseOpcode::Movd | SseOpcode::Movq => (LegacyPrefix::_66, 0x0F6E),
|
||||
SseOpcode::Cvtsi2ss => (LegacyPrefix::_F3, 0x0F2A),
|
||||
SseOpcode::Cvtsi2sd => (LegacyPrefix::_F2, 0x0F2A),
|
||||
SseOpcode::Movd | SseOpcode::Movq => (LegacyPrefixes::_66, 0x0F6E),
|
||||
SseOpcode::Cvtsi2ss => (LegacyPrefixes::_F3, 0x0F2A),
|
||||
SseOpcode::Cvtsi2sd => (LegacyPrefixes::_F2, 0x0F2A),
|
||||
_ => panic!("unexpected opcode {:?}", op),
|
||||
};
|
||||
let rex = match *src_size {
|
||||
@ -1910,8 +1942,8 @@ pub(crate) fn emit(
|
||||
Inst::XMM_Cmp_RM_R { op, src, dst } => {
|
||||
let rex = RexFlags::clear_w();
|
||||
let (prefix, opcode) = match op {
|
||||
SseOpcode::Ucomisd => (LegacyPrefix::_66, 0x0F2E),
|
||||
SseOpcode::Ucomiss => (LegacyPrefix::None, 0x0F2E),
|
||||
SseOpcode::Ucomisd => (LegacyPrefixes::_66, 0x0F2E),
|
||||
SseOpcode::Ucomiss => (LegacyPrefixes::None, 0x0F2E),
|
||||
_ => unimplemented!("Emit xmm cmp rm r"),
|
||||
};
|
||||
|
||||
@ -2430,6 +2462,113 @@ pub(crate) fn emit(
|
||||
}
|
||||
}
|
||||
|
||||
Inst::LockCmpxchg {
|
||||
ty,
|
||||
src,
|
||||
dst,
|
||||
srcloc,
|
||||
} => {
|
||||
if let Some(srcloc) = srcloc {
|
||||
sink.add_trap(*srcloc, TrapCode::HeapOutOfBounds);
|
||||
}
|
||||
// lock cmpxchg{b,w,l,q} %src, (dst)
|
||||
// Note that 0xF0 is the Lock prefix.
|
||||
let (prefix, rex, opcodes) = match *ty {
|
||||
types::I8 => {
|
||||
let mut rex_flags = RexFlags::clear_w();
|
||||
let enc_src = int_reg_enc(*src);
|
||||
if enc_src >= 4 && enc_src <= 7 {
|
||||
rex_flags.always_emit();
|
||||
};
|
||||
(LegacyPrefixes::_F0, rex_flags, 0x0FB0)
|
||||
}
|
||||
types::I16 => (LegacyPrefixes::_66F0, RexFlags::clear_w(), 0x0FB1),
|
||||
types::I32 => (LegacyPrefixes::_F0, RexFlags::clear_w(), 0x0FB1),
|
||||
types::I64 => (LegacyPrefixes::_F0, RexFlags::set_w(), 0x0FB1),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
emit_std_reg_mem(sink, prefix, opcodes, 2, *src, &dst.finalize(state), rex);
|
||||
}
|
||||
|
||||
Inst::AtomicRmwSeq { ty, op, srcloc } => {
|
||||
// Emit this:
|
||||
//
|
||||
// mov{zbq,zwq,zlq,q} (%r9), %rax // rax = old value
|
||||
// again:
|
||||
// movq %rax, %r11 // rax = old value, r11 = old value
|
||||
// `op`q %r10, %r11 // rax = old value, r11 = new value
|
||||
// lock cmpxchg{b,w,l,q} %r11, (%r9) // try to store new value
|
||||
// jnz again // If this is taken, rax will have a "revised" old value
|
||||
//
|
||||
// Operand conventions:
|
||||
// IN: %r9 (addr), %r10 (2nd arg for `op`)
|
||||
// OUT: %rax (old value), %r11 (trashed), %rflags (trashed)
|
||||
//
|
||||
// In the case where the operation is 'xchg', the "`op`q" instruction is instead
|
||||
// movq %r10, %r11
|
||||
// so that we simply write in the destination, the "2nd arg for `op`".
|
||||
let rax = regs::rax();
|
||||
let r9 = regs::r9();
|
||||
let r10 = regs::r10();
|
||||
let r11 = regs::r11();
|
||||
let rax_w = Writable::from_reg(rax);
|
||||
let r11_w = Writable::from_reg(r11);
|
||||
let amode = Amode::imm_reg(0, r9);
|
||||
let again_label = sink.get_label();
|
||||
|
||||
// mov{zbq,zwq,zlq,q} (%r9), %rax
|
||||
// No need to call `add_trap` here, since the `i1` emit will do that.
|
||||
let i1 = Inst::load(*ty, amode.clone(), rax_w, ExtKind::ZeroExtend, *srcloc);
|
||||
i1.emit(sink, flags, state);
|
||||
|
||||
// again:
|
||||
sink.bind_label(again_label);
|
||||
|
||||
// movq %rax, %r11
|
||||
let i2 = Inst::mov_r_r(true, rax, r11_w);
|
||||
i2.emit(sink, flags, state);
|
||||
|
||||
// opq %r10, %r11
|
||||
let r10_rmi = RegMemImm::reg(r10);
|
||||
let i3 = if *op == inst_common::AtomicRmwOp::Xchg {
|
||||
Inst::mov_r_r(true, r10, r11_w)
|
||||
} else {
|
||||
let alu_op = match op {
|
||||
inst_common::AtomicRmwOp::Add => AluRmiROpcode::Add,
|
||||
inst_common::AtomicRmwOp::Sub => AluRmiROpcode::Sub,
|
||||
inst_common::AtomicRmwOp::And => AluRmiROpcode::And,
|
||||
inst_common::AtomicRmwOp::Or => AluRmiROpcode::Or,
|
||||
inst_common::AtomicRmwOp::Xor => AluRmiROpcode::Xor,
|
||||
inst_common::AtomicRmwOp::Xchg => unreachable!(),
|
||||
};
|
||||
Inst::alu_rmi_r(true, alu_op, r10_rmi, r11_w)
|
||||
};
|
||||
i3.emit(sink, flags, state);
|
||||
|
||||
// lock cmpxchg{b,w,l,q} %r11, (%r9)
|
||||
// No need to call `add_trap` here, since the `i4` emit will do that.
|
||||
let i4 = Inst::LockCmpxchg {
|
||||
ty: *ty,
|
||||
src: r11,
|
||||
dst: amode.into(),
|
||||
srcloc: *srcloc,
|
||||
};
|
||||
i4.emit(sink, flags, state);
|
||||
|
||||
// jnz again
|
||||
one_way_jmp(sink, CC::NZ, again_label);
|
||||
}
|
||||
|
||||
Inst::Fence { kind } => {
|
||||
sink.put1(0x0F);
|
||||
sink.put1(0xAE);
|
||||
match kind {
|
||||
FenceKind::MFence => sink.put1(0xF0), // mfence = 0F AE F0
|
||||
FenceKind::LFence => sink.put1(0xE8), // lfence = 0F AE E8
|
||||
FenceKind::SFence => sink.put1(0xF8), // sfence = 0F AE F8
|
||||
}
|
||||
}
|
||||
|
||||
Inst::Hlt => {
|
||||
sink.put1(0xcc);
|
||||
}
|
||||
|
@ -4,10 +4,13 @@
|
||||
//!
|
||||
//! to see stdout: cargo test -- --nocapture
|
||||
//!
|
||||
//! for this specific case:
|
||||
//! for this specific case, as of 24 Aug 2020:
|
||||
//!
|
||||
//! (cd cranelift/codegen && \
|
||||
//! RUST_BACKTRACE=1 cargo test isa::x64::inst::test_x64_insn_encoding_and_printing -- --nocapture)
|
||||
//! cd to the top of your wasmtime tree, then:
|
||||
//! RUST_BACKTRACE=1 cargo test --features test-programs/test_programs \
|
||||
//! --features experimental_x64 --all --exclude peepmatic --exclude lightbeam \
|
||||
//! --exclude wasmtime-lightbeam --exclude peepmatic-automata --exclude peepmatic-fuzzing \
|
||||
//! --exclude peepmatic-macro -- isa::x64::inst::emit_tests::test_x64_emit
|
||||
|
||||
use super::*;
|
||||
use crate::isa::test_utils;
|
||||
@ -3062,6 +3065,24 @@ fn test_x64_emit() {
|
||||
"psubq %xmm8, %xmm1",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::xmm_rm_r(SseOpcode::Pmulld, RegMem::reg(xmm15), w_xmm6),
|
||||
"66410F3840F7",
|
||||
"pmulld %xmm15, %xmm6",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::xmm_rm_r(SseOpcode::Pmullw, RegMem::reg(xmm14), w_xmm1),
|
||||
"66410FD5CE",
|
||||
"pmullw %xmm14, %xmm1",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::xmm_rm_r(SseOpcode::Pmuludq, RegMem::reg(xmm8), w_xmm9),
|
||||
"66450FF4C8",
|
||||
"pmuludq %xmm8, %xmm9",
|
||||
));
|
||||
|
||||
// XMM_Mov_R_M: float stores
|
||||
insns.push((
|
||||
Inst::xmm_mov_r_m(SseOpcode::Movss, xmm15, Amode::imm_reg(128, r12), None),
|
||||
@ -3254,6 +3275,174 @@ fn test_x64_emit() {
|
||||
"cmpps $0, %xmm15, %xmm7",
|
||||
));
|
||||
|
||||
// ========================================================
|
||||
// Pertaining to atomics.
|
||||
let am1: SyntheticAmode = Amode::imm_reg_reg_shift(321, r10, rdx, 2).into();
|
||||
// `am2` doesn't contribute any 1 bits to the rex prefix, so we must use it when testing
|
||||
// for retention of the apparently-redundant rex prefix in the 8-bit case.
|
||||
let am2: SyntheticAmode = Amode::imm_reg_reg_shift(-12345i32 as u32, rcx, rsi, 3).into();
|
||||
|
||||
// A general 8-bit case.
|
||||
insns.push((
|
||||
Inst::LockCmpxchg {
|
||||
ty: types::I8,
|
||||
src: rbx,
|
||||
dst: am1,
|
||||
srcloc: None,
|
||||
},
|
||||
"F0410FB09C9241010000",
|
||||
"lock cmpxchgb %bl, 321(%r10,%rdx,4)",
|
||||
));
|
||||
// Check redundant rex retention in 8-bit cases.
|
||||
insns.push((
|
||||
Inst::LockCmpxchg {
|
||||
ty: types::I8,
|
||||
src: rdx,
|
||||
dst: am2.clone(),
|
||||
srcloc: None,
|
||||
},
|
||||
"F00FB094F1C7CFFFFF",
|
||||
"lock cmpxchgb %dl, -12345(%rcx,%rsi,8)",
|
||||
));
|
||||
insns.push((
|
||||
Inst::LockCmpxchg {
|
||||
ty: types::I8,
|
||||
src: rsi,
|
||||
dst: am2.clone(),
|
||||
srcloc: None,
|
||||
},
|
||||
"F0400FB0B4F1C7CFFFFF",
|
||||
"lock cmpxchgb %sil, -12345(%rcx,%rsi,8)",
|
||||
));
|
||||
insns.push((
|
||||
Inst::LockCmpxchg {
|
||||
ty: types::I8,
|
||||
src: r10,
|
||||
dst: am2.clone(),
|
||||
srcloc: None,
|
||||
},
|
||||
"F0440FB094F1C7CFFFFF",
|
||||
"lock cmpxchgb %r10b, -12345(%rcx,%rsi,8)",
|
||||
));
|
||||
insns.push((
|
||||
Inst::LockCmpxchg {
|
||||
ty: types::I8,
|
||||
src: r15,
|
||||
dst: am2.clone(),
|
||||
srcloc: None,
|
||||
},
|
||||
"F0440FB0BCF1C7CFFFFF",
|
||||
"lock cmpxchgb %r15b, -12345(%rcx,%rsi,8)",
|
||||
));
|
||||
// 16 bit cases
|
||||
insns.push((
|
||||
Inst::LockCmpxchg {
|
||||
ty: types::I16,
|
||||
src: rsi,
|
||||
dst: am2.clone(),
|
||||
srcloc: None,
|
||||
},
|
||||
"66F00FB1B4F1C7CFFFFF",
|
||||
"lock cmpxchgw %si, -12345(%rcx,%rsi,8)",
|
||||
));
|
||||
insns.push((
|
||||
Inst::LockCmpxchg {
|
||||
ty: types::I16,
|
||||
src: r10,
|
||||
dst: am2.clone(),
|
||||
srcloc: None,
|
||||
},
|
||||
"66F0440FB194F1C7CFFFFF",
|
||||
"lock cmpxchgw %r10w, -12345(%rcx,%rsi,8)",
|
||||
));
|
||||
// 32 bit cases
|
||||
insns.push((
|
||||
Inst::LockCmpxchg {
|
||||
ty: types::I32,
|
||||
src: rsi,
|
||||
dst: am2.clone(),
|
||||
srcloc: None,
|
||||
},
|
||||
"F00FB1B4F1C7CFFFFF",
|
||||
"lock cmpxchgl %esi, -12345(%rcx,%rsi,8)",
|
||||
));
|
||||
insns.push((
|
||||
Inst::LockCmpxchg {
|
||||
ty: types::I32,
|
||||
src: r10,
|
||||
dst: am2.clone(),
|
||||
srcloc: None,
|
||||
},
|
||||
"F0440FB194F1C7CFFFFF",
|
||||
"lock cmpxchgl %r10d, -12345(%rcx,%rsi,8)",
|
||||
));
|
||||
// 64 bit cases
|
||||
insns.push((
|
||||
Inst::LockCmpxchg {
|
||||
ty: types::I64,
|
||||
src: rsi,
|
||||
dst: am2.clone(),
|
||||
srcloc: None,
|
||||
},
|
||||
"F0480FB1B4F1C7CFFFFF",
|
||||
"lock cmpxchgq %rsi, -12345(%rcx,%rsi,8)",
|
||||
));
|
||||
insns.push((
|
||||
Inst::LockCmpxchg {
|
||||
ty: types::I64,
|
||||
src: r10,
|
||||
dst: am2.clone(),
|
||||
srcloc: None,
|
||||
},
|
||||
"F04C0FB194F1C7CFFFFF",
|
||||
"lock cmpxchgq %r10, -12345(%rcx,%rsi,8)",
|
||||
));
|
||||
|
||||
// AtomicRmwSeq
|
||||
insns.push((
|
||||
Inst::AtomicRmwSeq { ty: types::I8, op: inst_common::AtomicRmwOp::Or, srcloc: None },
|
||||
"490FB6014989C34D09D3F0450FB0190F85EFFFFFFF",
|
||||
"atomically { 8_bits_at_[%r9]) Or= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }"
|
||||
));
|
||||
insns.push((
|
||||
Inst::AtomicRmwSeq { ty: types::I16, op: inst_common::AtomicRmwOp::And, srcloc: None },
|
||||
"490FB7014989C34D21D366F0450FB1190F85EEFFFFFF",
|
||||
"atomically { 16_bits_at_[%r9]) And= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }"
|
||||
));
|
||||
insns.push((
|
||||
Inst::AtomicRmwSeq { ty: types::I32, op: inst_common::AtomicRmwOp::Xchg, srcloc: None },
|
||||
"418B014989C34D89D3F0450FB1190F85EFFFFFFF",
|
||||
"atomically { 32_bits_at_[%r9]) Xchg= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }"
|
||||
));
|
||||
insns.push((
|
||||
Inst::AtomicRmwSeq { ty: types::I64, op: inst_common::AtomicRmwOp::Add, srcloc: None },
|
||||
"498B014989C34D01D3F04D0FB1190F85EFFFFFFF",
|
||||
"atomically { 64_bits_at_[%r9]) Add= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }"
|
||||
));
|
||||
|
||||
// Fence
|
||||
insns.push((
|
||||
Inst::Fence {
|
||||
kind: FenceKind::MFence,
|
||||
},
|
||||
"0FAEF0",
|
||||
"mfence",
|
||||
));
|
||||
insns.push((
|
||||
Inst::Fence {
|
||||
kind: FenceKind::LFence,
|
||||
},
|
||||
"0FAEE8",
|
||||
"lfence",
|
||||
));
|
||||
insns.push((
|
||||
Inst::Fence {
|
||||
kind: FenceKind::SFence,
|
||||
},
|
||||
"0FAEF8",
|
||||
"sfence",
|
||||
));
|
||||
|
||||
// ========================================================
|
||||
// Misc instructions.
|
||||
|
||||
|
@ -359,6 +359,16 @@ pub enum Inst {
|
||||
/// Jump to a known target: jmp simm32.
|
||||
JmpKnown { dst: BranchTarget },
|
||||
|
||||
/// One-way conditional branch: jcond cond target.
|
||||
///
|
||||
/// This instruction is useful when we have conditional jumps depending on more than two
|
||||
/// conditions, see for instance the lowering of Brz/brnz with Fcmp inputs.
|
||||
///
|
||||
/// A note of caution: in contexts where the branch target is another block, this has to be the
|
||||
/// same successor as the one specified in the terminator branch of the current block.
|
||||
/// Otherwise, this might confuse register allocation by creating new invisible edges.
|
||||
JmpIf { cc: CC, taken: BranchTarget },
|
||||
|
||||
/// Two-way conditional branch: jcond cond target target.
|
||||
/// Emitted as a compound sequence; the MachBuffer will shrink it as appropriate.
|
||||
JmpCond {
|
||||
@ -404,6 +414,56 @@ pub enum Inst {
|
||||
offset: i64,
|
||||
},
|
||||
|
||||
// =====================================
|
||||
// Instructions pertaining to atomic memory accesses.
|
||||
/// A standard (native) `lock cmpxchg src, (amode)`, with register conventions:
|
||||
///
|
||||
/// `dst` (read) address
|
||||
/// `src` (read) replacement value
|
||||
/// %rax (modified) in: expected value, out: value that was actually at `dst`
|
||||
/// %rflags is written. Do not assume anything about it after the instruction.
|
||||
///
|
||||
/// The instruction "succeeded" iff the lowest `ty` bits of %rax afterwards are the same as
|
||||
/// they were before.
|
||||
LockCmpxchg {
|
||||
ty: Type, // I8, I16, I32 or I64
|
||||
src: Reg,
|
||||
dst: SyntheticAmode,
|
||||
srcloc: Option<SourceLoc>,
|
||||
},
|
||||
|
||||
/// A synthetic instruction, based on a loop around a native `lock cmpxchg` instruction.
|
||||
/// This atomically modifies a value in memory and returns the old value. The sequence
|
||||
/// consists of an initial "normal" load from `dst`, followed by a loop which computes the
|
||||
/// new value and tries to compare-and-swap ("CAS") it into `dst`, using the native
|
||||
/// instruction `lock cmpxchg{b,w,l,q}` . The loop iterates until the CAS is successful.
|
||||
/// If there is no contention, there will be only one pass through the loop body. The
|
||||
/// sequence does *not* perform any explicit memory fence instructions
|
||||
/// (mfence/sfence/lfence).
|
||||
///
|
||||
/// Note that the transaction is atomic in the sense that, as observed by some other thread,
|
||||
/// `dst` either has the initial or final value, but no other. It isn't atomic in the sense
|
||||
/// of guaranteeing that no other thread writes to `dst` in between the initial load and the
|
||||
/// CAS -- but that would cause the CAS to fail unless the other thread's last write before
|
||||
/// the CAS wrote the same value that was already there. In other words, this
|
||||
/// implementation suffers (unavoidably) from the A-B-A problem.
|
||||
///
|
||||
/// This instruction sequence has fixed register uses as follows:
|
||||
///
|
||||
/// %r9 (read) address
|
||||
/// %r10 (read) second operand for `op`
|
||||
/// %r11 (written) scratch reg; value afterwards has no meaning
|
||||
/// %rax (written) the old value at %r9
|
||||
/// %rflags is written. Do not assume anything about it after the instruction.
|
||||
AtomicRmwSeq {
|
||||
ty: Type, // I8, I16, I32 or I64
|
||||
op: inst_common::AtomicRmwOp,
|
||||
srcloc: Option<SourceLoc>,
|
||||
},
|
||||
|
||||
/// A memory fence (mfence, lfence or sfence).
|
||||
Fence { kind: FenceKind },
|
||||
|
||||
// =====================================
|
||||
// Meta-instructions generating no code.
|
||||
/// Marker, no-op in generated code: SP "virtual offset" is adjusted. This
|
||||
@ -526,6 +586,7 @@ impl Inst {
|
||||
Inst::Mov_R_R { is_64, src, dst }
|
||||
}
|
||||
|
||||
// TODO Can be replaced by `Inst::move` (high-level) and `Inst::unary_rm_r` (low-level)
|
||||
pub(crate) fn xmm_mov(
|
||||
op: SseOpcode,
|
||||
src: RegMem,
|
||||
@ -915,6 +976,10 @@ impl Inst {
|
||||
Inst::JmpKnown { dst }
|
||||
}
|
||||
|
||||
pub(crate) fn jmp_if(cc: CC, taken: BranchTarget) -> Inst {
|
||||
Inst::JmpIf { cc, taken }
|
||||
}
|
||||
|
||||
pub(crate) fn jmp_cond(cc: CC, taken: BranchTarget, not_taken: BranchTarget) -> Inst {
|
||||
Inst::JmpCond {
|
||||
cc,
|
||||
@ -935,6 +1000,85 @@ impl Inst {
|
||||
srcloc,
|
||||
}
|
||||
}
|
||||
|
||||
/// Choose which instruction to use for loading a register value from memory. For loads smaller
|
||||
/// than 64 bits, this method expects a way to extend the value (i.e. [ExtKind::SignExtend],
|
||||
/// [ExtKind::ZeroExtend]); loads with no extension necessary will ignore this.
|
||||
pub(crate) fn load(
|
||||
ty: Type,
|
||||
from_addr: impl Into<SyntheticAmode>,
|
||||
to_reg: Writable<Reg>,
|
||||
ext_kind: ExtKind,
|
||||
srcloc: Option<SourceLoc>,
|
||||
) -> Inst {
|
||||
let rc = to_reg.to_reg().get_class();
|
||||
match rc {
|
||||
RegClass::I64 => {
|
||||
let ext_mode = match ty.bytes() {
|
||||
1 => Some(ExtMode::BQ),
|
||||
2 => Some(ExtMode::WQ),
|
||||
4 => Some(ExtMode::LQ),
|
||||
8 => None,
|
||||
_ => unreachable!("the type should never use a scalar load: {}", ty),
|
||||
};
|
||||
if let Some(ext_mode) = ext_mode {
|
||||
// Values smaller than 64 bits must be extended in some way.
|
||||
match ext_kind {
|
||||
ExtKind::SignExtend => {
|
||||
Inst::movsx_rm_r(ext_mode, RegMem::mem(from_addr), to_reg, srcloc)
|
||||
}
|
||||
ExtKind::ZeroExtend => {
|
||||
Inst::movzx_rm_r(ext_mode, RegMem::mem(from_addr), to_reg, srcloc)
|
||||
}
|
||||
ExtKind::None => panic!(
|
||||
"expected an extension kind for extension mode: {:?}",
|
||||
ext_mode
|
||||
),
|
||||
}
|
||||
} else {
|
||||
// 64-bit values can be moved directly.
|
||||
Inst::mov64_m_r(from_addr, to_reg, srcloc)
|
||||
}
|
||||
}
|
||||
RegClass::V128 => {
|
||||
let opcode = match ty {
|
||||
types::F32 => SseOpcode::Movss,
|
||||
types::F64 => SseOpcode::Movsd,
|
||||
types::F32X4 => SseOpcode::Movups,
|
||||
types::F64X2 => SseOpcode::Movupd,
|
||||
_ if ty.is_vector() && ty.bits() == 128 => SseOpcode::Movdqu,
|
||||
_ => unimplemented!("unable to load type: {}", ty),
|
||||
};
|
||||
Inst::xmm_unary_rm_r(opcode, RegMem::mem(from_addr), to_reg)
|
||||
}
|
||||
_ => panic!("unable to generate load for register class: {:?}", rc),
|
||||
}
|
||||
}
|
||||
|
||||
/// Choose which instruction to use for storing a register value to memory.
|
||||
pub(crate) fn store(
|
||||
ty: Type,
|
||||
from_reg: Reg,
|
||||
to_addr: impl Into<SyntheticAmode>,
|
||||
srcloc: Option<SourceLoc>,
|
||||
) -> Inst {
|
||||
let rc = from_reg.get_class();
|
||||
match rc {
|
||||
RegClass::I64 => Inst::mov_r_m(ty.bytes() as u8, from_reg, to_addr, srcloc),
|
||||
RegClass::V128 => {
|
||||
let opcode = match ty {
|
||||
types::F32 => SseOpcode::Movss,
|
||||
types::F64 => SseOpcode::Movsd,
|
||||
types::F32X4 => SseOpcode::Movups,
|
||||
types::F64X2 => SseOpcode::Movupd,
|
||||
_ if ty.is_vector() && ty.bits() == 128 => SseOpcode::Movdqu,
|
||||
_ => unimplemented!("unable to store type: {}", ty),
|
||||
};
|
||||
Inst::xmm_mov_r_m(opcode, from_reg, to_addr, srcloc)
|
||||
}
|
||||
_ => panic!("unable to generate store for register class: {:?}", rc),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Inst helpers.
|
||||
@ -1406,12 +1550,18 @@ impl ShowWithRRU for Inst {
|
||||
format!("{} {}", ljustify("jmp".to_string()), dst.show_rru(mb_rru))
|
||||
}
|
||||
|
||||
Inst::JmpIf { cc, taken } => format!(
|
||||
"{} {}",
|
||||
ljustify2("j".to_string(), cc.to_string()),
|
||||
taken.show_rru(mb_rru),
|
||||
),
|
||||
|
||||
Inst::JmpCond {
|
||||
cc,
|
||||
taken,
|
||||
not_taken,
|
||||
} => format!(
|
||||
"{} taken={} not_taken={}",
|
||||
"{} {}; j {}",
|
||||
ljustify2("j".to_string(), cc.to_string()),
|
||||
taken.show_rru(mb_rru),
|
||||
not_taken.show_rru(mb_rru)
|
||||
@ -1441,6 +1591,26 @@ impl ShowWithRRU for Inst {
|
||||
show_ireg_sized(dst.to_reg(), mb_rru, 8),
|
||||
),
|
||||
|
||||
Inst::LockCmpxchg { ty, src, dst, .. } => {
|
||||
let size = ty.bytes() as u8;
|
||||
format!("lock cmpxchg{} {}, {}",
|
||||
suffixBWLQ(size), show_ireg_sized(*src, mb_rru, size), dst.show_rru(mb_rru))
|
||||
}
|
||||
|
||||
Inst::AtomicRmwSeq { ty, op, .. } => {
|
||||
format!(
|
||||
"atomically {{ {}_bits_at_[%r9]) {:?}= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }}",
|
||||
ty.bits(), op)
|
||||
},
|
||||
|
||||
Inst::Fence { kind } => {
|
||||
match kind {
|
||||
FenceKind::MFence => "mfence".to_string(),
|
||||
FenceKind::LFence => "lfence".to_string(),
|
||||
FenceKind::SFence => "sfence".to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
Inst::VirtualSPOffsetAdj { offset } => format!("virtual_sp_offset_adjust {}", offset),
|
||||
|
||||
Inst::Hlt => "hlt".into(),
|
||||
@ -1657,15 +1827,30 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
||||
collector.add_def(*dst);
|
||||
}
|
||||
|
||||
Inst::LockCmpxchg { src, dst, .. } => {
|
||||
dst.get_regs_as_uses(collector);
|
||||
collector.add_use(*src);
|
||||
collector.add_mod(Writable::from_reg(regs::rax()));
|
||||
}
|
||||
|
||||
Inst::AtomicRmwSeq { .. } => {
|
||||
collector.add_use(regs::r9());
|
||||
collector.add_use(regs::r10());
|
||||
collector.add_def(Writable::from_reg(regs::r11()));
|
||||
collector.add_def(Writable::from_reg(regs::rax()));
|
||||
}
|
||||
|
||||
Inst::Ret
|
||||
| Inst::EpiloguePlaceholder
|
||||
| Inst::JmpKnown { .. }
|
||||
| Inst::JmpIf { .. }
|
||||
| Inst::JmpCond { .. }
|
||||
| Inst::Nop { .. }
|
||||
| Inst::TrapIf { .. }
|
||||
| Inst::VirtualSPOffsetAdj { .. }
|
||||
| Inst::Hlt
|
||||
| Inst::Ud2 { .. } => {
|
||||
| Inst::Ud2 { .. }
|
||||
| Inst::Fence { .. } => {
|
||||
// No registers are used.
|
||||
}
|
||||
}
|
||||
@ -2011,16 +2196,29 @@ fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
|
||||
|
||||
Inst::LoadExtName { ref mut dst, .. } => map_def(mapper, dst),
|
||||
|
||||
Inst::LockCmpxchg {
|
||||
ref mut src,
|
||||
ref mut dst,
|
||||
..
|
||||
} => {
|
||||
map_use(mapper, src);
|
||||
dst.map_uses(mapper);
|
||||
}
|
||||
|
||||
Inst::Ret
|
||||
| Inst::EpiloguePlaceholder
|
||||
| Inst::JmpKnown { .. }
|
||||
| Inst::JmpCond { .. }
|
||||
| Inst::JmpIf { .. }
|
||||
| Inst::Nop { .. }
|
||||
| Inst::TrapIf { .. }
|
||||
| Inst::VirtualSPOffsetAdj { .. }
|
||||
| Inst::Ud2 { .. }
|
||||
| Inst::Hlt => {
|
||||
// No registers are used.
|
||||
| Inst::Hlt
|
||||
| Inst::AtomicRmwSeq { .. }
|
||||
| Inst::Fence { .. } => {
|
||||
// Instruction doesn't explicitly mention any regs, so it can't have any virtual
|
||||
// regs that we'd need to remap. Hence no action required.
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -2093,16 +2291,18 @@ impl MachInst for Inst {
|
||||
debug_assert!(rc_dst == rc_src);
|
||||
match rc_dst {
|
||||
RegClass::I64 => Inst::mov_r_r(true, src_reg, dst_reg),
|
||||
RegClass::V128 => match ty {
|
||||
types::F32 => Inst::xmm_mov(SseOpcode::Movss, RegMem::reg(src_reg), dst_reg, None),
|
||||
types::F64 => Inst::xmm_mov(SseOpcode::Movsd, RegMem::reg(src_reg), dst_reg, None),
|
||||
_ if ty.is_vector() && ty.bits() == 128 => {
|
||||
// TODO Specialize this move for different types: MOVUPD, MOVDQU, etc.
|
||||
Inst::xmm_mov(SseOpcode::Movups, RegMem::reg(src_reg), dst_reg, None)
|
||||
RegClass::V128 => {
|
||||
let opcode = match ty {
|
||||
types::F32 => SseOpcode::Movss,
|
||||
types::F64 => SseOpcode::Movsd,
|
||||
types::F32X4 => SseOpcode::Movaps,
|
||||
types::F64X2 => SseOpcode::Movapd,
|
||||
_ if ty.is_vector() && ty.bits() == 128 => SseOpcode::Movdqa,
|
||||
_ => unimplemented!("unable to move type: {}", ty),
|
||||
};
|
||||
Inst::xmm_unary_rm_r(opcode, RegMem::reg(src_reg), dst_reg)
|
||||
}
|
||||
_ => panic!("unexpected type {:?} in gen_move of regclass V128", ty),
|
||||
},
|
||||
_ => panic!("gen_move(x64): unhandled regclass"),
|
||||
_ => panic!("gen_move(x64): unhandled regclass {:?}", rc_dst),
|
||||
}
|
||||
}
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
36
third_party/rust/cranelift-codegen/src/machinst/inst_common.rs
vendored
Normal file
36
third_party/rust/cranelift-codegen/src/machinst/inst_common.rs
vendored
Normal file
@ -0,0 +1,36 @@
|
||||
//! A place to park MachInst::Inst fragments which are common across multiple architectures.
|
||||
|
||||
use crate::ir;
|
||||
|
||||
/// Atomic memory update operations. As of 21 Aug 2020 these are used for the aarch64 and x64
|
||||
/// targets.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
#[repr(u8)]
|
||||
pub enum AtomicRmwOp {
|
||||
/// Add
|
||||
Add,
|
||||
/// Sub
|
||||
Sub,
|
||||
/// And
|
||||
And,
|
||||
/// Or
|
||||
Or,
|
||||
/// Exclusive Or
|
||||
Xor,
|
||||
/// Exchange (swap operands)
|
||||
Xchg,
|
||||
}
|
||||
|
||||
impl AtomicRmwOp {
|
||||
/// Converts an `ir::AtomicRmwOp` to the corresponding `inst_common::AtomicRmwOp`.
|
||||
pub fn from(ir_op: ir::AtomicRmwOp) -> Self {
|
||||
match ir_op {
|
||||
ir::AtomicRmwOp::Add => AtomicRmwOp::Add,
|
||||
ir::AtomicRmwOp::Sub => AtomicRmwOp::Sub,
|
||||
ir::AtomicRmwOp::And => AtomicRmwOp::And,
|
||||
ir::AtomicRmwOp::Or => AtomicRmwOp::Or,
|
||||
ir::AtomicRmwOp::Xor => AtomicRmwOp::Xor,
|
||||
ir::AtomicRmwOp::Xchg => AtomicRmwOp::Xchg,
|
||||
}
|
||||
}
|
||||
}
|
@ -4,7 +4,7 @@
|
||||
|
||||
use crate::entity::SecondaryMap;
|
||||
use crate::fx::{FxHashMap, FxHashSet};
|
||||
use crate::inst_predicates::{has_side_effect_or_load, is_constant_64bit};
|
||||
use crate::inst_predicates::{has_lowering_side_effect, is_constant_64bit};
|
||||
use crate::ir::instructions::BranchInfo;
|
||||
use crate::ir::types::I64;
|
||||
use crate::ir::{
|
||||
@ -372,7 +372,7 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
|
||||
for bb in f.layout.blocks() {
|
||||
cur_color += 1;
|
||||
for inst in f.layout.block_insts(bb) {
|
||||
let side_effect = has_side_effect_or_load(f, inst);
|
||||
let side_effect = has_lowering_side_effect(f, inst);
|
||||
|
||||
// Assign colors. A new color is chosen *after* any side-effecting instruction.
|
||||
inst_colors[inst] = InstColor::new(cur_color);
|
||||
@ -799,15 +799,15 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
|
||||
ValueDef::Result(src_inst, result_idx) => {
|
||||
debug!(" -> src inst {}", src_inst);
|
||||
debug!(
|
||||
" -> has side effect: {}",
|
||||
has_side_effect_or_load(self.f, src_inst)
|
||||
" -> has lowering side effect: {}",
|
||||
has_lowering_side_effect(self.f, src_inst)
|
||||
);
|
||||
debug!(
|
||||
" -> our color is {:?}, src inst is {:?}",
|
||||
self.inst_color(at_inst),
|
||||
self.inst_color(src_inst)
|
||||
);
|
||||
if !has_side_effect_or_load(self.f, src_inst)
|
||||
if !has_lowering_side_effect(self.f, src_inst)
|
||||
|| self.inst_color(at_inst) == self.inst_color(src_inst)
|
||||
{
|
||||
Some((src_inst, result_idx))
|
||||
@ -989,8 +989,12 @@ impl<'func, I: VCodeInst> LowerCtx for Lower<'func, I> {
|
||||
|
||||
fn use_input_reg(&mut self, input: LowerInput) {
|
||||
debug!("use_input_reg: vreg {:?} is needed", input.reg);
|
||||
// We may directly return a real (machine) register when we know that register holds the
|
||||
// result of an opcode (e.g. GetPinnedReg).
|
||||
if input.reg.is_virtual() {
|
||||
self.vreg_needed[input.reg.get_index()] = true;
|
||||
}
|
||||
}
|
||||
|
||||
fn is_reg_needed(&self, ir_inst: Inst, reg: Reg) -> bool {
|
||||
self.inst_needed[ir_inst] || self.vreg_needed[reg.get_index()]
|
||||
|
@ -133,6 +133,8 @@ pub mod adapter;
|
||||
pub use adapter::*;
|
||||
pub mod helpers;
|
||||
pub use helpers::*;
|
||||
pub mod inst_common;
|
||||
pub use inst_common::*;
|
||||
|
||||
/// A machine instruction.
|
||||
pub trait MachInst: Clone + Debug {
|
||||
|
Loading…
Reference in New Issue
Block a user