Adding VECTOR_SUB for vsub*.

This commit is contained in:
Ben Vanik 2014-08-05 14:03:49 -07:00
parent f149a23367
commit 75eb87f33d
7 changed files with 148 additions and 37 deletions

View File

@ -2735,6 +2735,12 @@ int Translate_SUB(TranslationContext& ctx, Instr* i) {
return DispatchToC(ctx, i, fns[i->dest->type]);
}
int Translate_VECTOR_SUB(TranslationContext& ctx, Instr* i) {
// TODO(benvanik): VECTOR_SUB in IVM.
assert_always();
return 1;
}
uint32_t IntCode_MUL_I8_I8(IntCodeState& ics, const IntCode* i) {
ics.rf[i->dest_reg].i8 = ics.rf[i->src1_reg].i8 * ics.rf[i->src2_reg].i8;
return IA_NEXT;
@ -4200,23 +4206,23 @@ static const TranslateFn dispatch_table[] = {
Translate_VECTOR_COMPARE_UGT, Translate_VECTOR_COMPARE_UGE,
Translate_ADD, Translate_ADD_CARRY,
Translate_VECTOR_ADD, Translate_SUB,
Translate_MUL, Translate_MUL_HI,
Translate_DIV, Translate_MUL_ADD,
Translate_MUL_SUB, Translate_NEG,
Translate_ABS, Translate_SQRT,
Translate_RSQRT, Translate_POW2,
Translate_LOG2, Translate_DOT_PRODUCT_3,
Translate_DOT_PRODUCT_4, Translate_AND,
Translate_OR, Translate_XOR,
Translate_NOT, Translate_SHL,
Translate_VECTOR_SHL, Translate_SHR,
Translate_VECTOR_SHR, Translate_SHA,
Translate_VECTOR_SHA, Translate_ROTATE_LEFT,
Translate_BYTE_SWAP, Translate_CNTLZ,
Translate_INSERT, Translate_EXTRACT,
Translate_SPLAT, Translate_PERMUTE,
Translate_SWIZZLE, Translate_PACK,
Translate_UNPACK,
Translate_VECTOR_SUB, Translate_MUL,
Translate_MUL_HI, Translate_DIV,
Translate_MUL_ADD, Translate_MUL_SUB,
Translate_NEG, Translate_ABS,
Translate_SQRT, Translate_RSQRT,
Translate_POW2, Translate_LOG2,
Translate_DOT_PRODUCT_3, Translate_DOT_PRODUCT_4,
Translate_AND, Translate_OR,
Translate_XOR, Translate_NOT,
Translate_SHL, Translate_VECTOR_SHL,
Translate_SHR, Translate_VECTOR_SHR,
Translate_SHA, Translate_VECTOR_SHA,
Translate_ROTATE_LEFT, Translate_BYTE_SWAP,
Translate_CNTLZ, Translate_INSERT,
Translate_EXTRACT, Translate_SPLAT,
Translate_PERMUTE, Translate_SWIZZLE,
Translate_PACK, Translate_UNPACK,
TranslateInvalid, // Translate_COMPARE_EXCHANGE,
Translate_ATOMIC_EXCHANGE,
TranslateInvalid, // Translate_ATOMIC_ADD,

View File

@ -2854,6 +2854,66 @@ EMITTER_OPCODE_TABLE(
SUB_V128);
// ============================================================================
// OPCODE_VECTOR_SUB
// ============================================================================
EMITTER(VECTOR_SUB, MATCH(I<OPCODE_VECTOR_SUB, V128<>, V128<>, V128<>>)) {
static void Emit(X64Emitter& e, const EmitArgType& i) {
EmitCommutativeBinaryXmmOp(e, i,
[&i](X64Emitter& e, const Xmm& dest, const Xmm& src1, const Xmm& src2) {
const TypeName part_type = static_cast<TypeName>(i.instr->flags & 0xFF);
const uint32_t arithmetic_flags = i.instr->flags >> 8;
bool is_unsigned = !!(arithmetic_flags & ARITHMETIC_UNSIGNED);
bool saturate = !!(arithmetic_flags & ARITHMETIC_SATURATE);
switch (part_type) {
case INT8_TYPE:
if (saturate) {
// TODO(benvanik): trace DID_SATURATE
if (is_unsigned) {
e.vpsubusb(dest, src1, src2);
} else {
e.vpsubsb(dest, src1, src2);
}
} else {
e.vpsubb(dest, src1, src2);
}
break;
case INT16_TYPE:
if (saturate) {
// TODO(benvanik): trace DID_SATURATE
if (is_unsigned) {
e.vpsubusw(dest, src1, src2);
} else {
e.vpsubsw(dest, src1, src2);
}
} else {
e.vpsubw(dest, src1, src2);
}
break;
case INT32_TYPE:
if (saturate) {
if (is_unsigned) {
assert_always();
} else {
assert_always();
}
} else {
e.vpsubd(dest, src1, src2);
}
break;
case FLOAT32_TYPE:
e.vsubps(dest, src1, src2);
break;
default: assert_unhandled_case(part_type); break;
}
});
}
};
EMITTER_OPCODE_TABLE(
OPCODE_VECTOR_SUB,
VECTOR_SUB);
// ============================================================================
// OPCODE_MUL
// ============================================================================
@ -5202,6 +5262,7 @@ void RegisterSequences() {
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_ADD_CARRY);
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_VECTOR_ADD);
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_SUB);
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_VECTOR_SUB);
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_MUL);
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_MUL_HI);
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_DIV);

View File

@ -1572,18 +1572,27 @@ XEEMITTER(vsubfp128, VX128(5, 80), VX128)(PPCHIRBuilder& f, InstrData& i) {
}
XEEMITTER(vsubsbs, 0x10000700, VX)(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
// (VD) <- clamp(EXTS(VA) + ¬EXTS(VB) + 1, -128, 127)
Value* v = f.VectorSub(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT8_TYPE,
ARITHMETIC_SATURATE);
f.StoreVR(i.VX.VD, v);
return 0;
}
XEEMITTER(vsubshs, 0x10000740, VX)(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
// (VD) <- clamp(EXTS(VA) + ¬EXTS(VB) + 1, -2^15, 2^15-1)
Value* v = f.VectorSub(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT16_TYPE,
ARITHMETIC_SATURATE);
f.StoreVR(i.VX.VD, v);
return 0;
}
XEEMITTER(vsubsws, 0x10000780, VX)(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
// (VD) <- clamp(EXTS(VA) + ¬EXTS(VB) + 1, -2^31, 2^31-1)
Value* v = f.VectorSub(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT32_TYPE,
ARITHMETIC_SATURATE);
f.StoreVR(i.VX.VD, v);
return 0;
}
XEEMITTER(vsububm, 0x10000400, VX)(PPCHIRBuilder& f, InstrData& i) {
@ -1591,29 +1600,38 @@ XEEMITTER(vsububm, 0x10000400, VX)(PPCHIRBuilder& f, InstrData& i) {
return 1;
}
XEEMITTER(vsububs, 0x10000600, VX)(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
}
XEEMITTER(vsubuhm, 0x10000440, VX)(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
}
XEEMITTER(vsubuhs, 0x10000640, VX)(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
}
XEEMITTER(vsubuwm, 0x10000480, VX)(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
}
XEEMITTER(vsububs, 0x10000600, VX)(PPCHIRBuilder& f, InstrData& i) {
// (VD) <- clamp(EXTZ(VA) + ¬EXTZ(VB) + 1, 0, 256)
Value* v = f.VectorSub(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT8_TYPE,
ARITHMETIC_SATURATE | ARITHMETIC_UNSIGNED);
f.StoreVR(i.VX.VD, v);
return 0;
}
XEEMITTER(vsubuhs, 0x10000640, VX)(PPCHIRBuilder& f, InstrData& i) {
// (VD) <- clamp(EXTZ(VA) + ¬EXTZ(VB) + 1, 0, 2^16-1)
Value* v = f.VectorSub(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT16_TYPE,
ARITHMETIC_SATURATE | ARITHMETIC_UNSIGNED);
f.StoreVR(i.VX.VD, v);
return 0;
}
XEEMITTER(vsubuws, 0x10000680, VX)(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
// (VD) <- clamp(EXTZ(VA) + ¬EXTZ(VB) + 1, 0, 2^32-1)
Value* v = f.VectorSub(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT32_TYPE,
ARITHMETIC_SATURATE | ARITHMETIC_UNSIGNED);
f.StoreVR(i.VX.VD, v);
return 0;
}
XEEMITTER(vsumsws, 0x10000788, VX)(PPCHIRBuilder& f, InstrData& i) {
@ -2132,10 +2150,10 @@ void RegisterEmitCategoryAltivec() {
XEREGISTERINSTR(vsubshs, 0x10000740);
XEREGISTERINSTR(vsubsws, 0x10000780);
XEREGISTERINSTR(vsububm, 0x10000400);
XEREGISTERINSTR(vsububs, 0x10000600);
XEREGISTERINSTR(vsubuhm, 0x10000440);
XEREGISTERINSTR(vsubuhs, 0x10000640);
XEREGISTERINSTR(vsubuwm, 0x10000480);
XEREGISTERINSTR(vsububs, 0x10000600);
XEREGISTERINSTR(vsubuhs, 0x10000640);
XEREGISTERINSTR(vsubuws, 0x10000680);
XEREGISTERINSTR(vsumsws, 0x10000788);
XEREGISTERINSTR(vsum2sws, 0x10000688);

View File

@ -1299,6 +1299,23 @@ Value* HIRBuilder::Sub(Value* value1, Value* value2,
return i->dest;
}
Value* HIRBuilder::VectorSub(Value* value1, Value* value2, TypeName part_type,
uint32_t arithmetic_flags) {
ASSERT_VECTOR_TYPE(value1);
ASSERT_VECTOR_TYPE(value2);
// This is shady.
uint32_t flags = part_type | (arithmetic_flags << 8);
assert_zero(flags >> 16);
Instr* i = AppendInstr(OPCODE_VECTOR_SUB_info, (uint16_t)flags,
AllocValue(value1->type));
i->set_src1(value1);
i->set_src2(value2);
i->src3.value = NULL;
return i->dest;
}
Value* HIRBuilder::Mul(Value* value1, Value* value2,
uint32_t arithmetic_flags) {
ASSERT_TYPES_EQUAL(value1, value2);

View File

@ -169,6 +169,8 @@ class HIRBuilder {
Value* VectorAdd(Value* value1, Value* value2, TypeName part_type,
uint32_t arithmetic_flags = 0);
Value* Sub(Value* value1, Value* value2, uint32_t arithmetic_flags = 0);
Value* VectorSub(Value* value1, Value* value2, TypeName part_type,
uint32_t arithmetic_flags = 0);
Value* Mul(Value* value1, Value* value2, uint32_t arithmetic_flags = 0);
Value* MulHi(Value* value1, Value* value2, uint32_t arithmetic_flags = 0);
Value* Div(Value* value1, Value* value2, uint32_t arithmetic_flags = 0);

View File

@ -140,6 +140,7 @@ enum Opcode {
OPCODE_ADD_CARRY,
OPCODE_VECTOR_ADD,
OPCODE_SUB,
OPCODE_VECTOR_SUB,
OPCODE_MUL,
OPCODE_MUL_HI, // TODO(benvanik): remove this and add INT128 type.
OPCODE_DIV,

View File

@ -389,6 +389,12 @@ DEFINE_OPCODE(
OPCODE_SIG_V_V_V,
0)
DEFINE_OPCODE(
OPCODE_VECTOR_SUB,
"vector_sub",
OPCODE_SIG_V_V_V,
0)
DEFINE_OPCODE(
OPCODE_MUL,
"mul",