mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-10-25 03:05:34 +00:00
Bug 1136226 - Implement MSimdExtractElement for small integer types. r=bbouvier
Move visitSimdExtractElement into x86-specific code in order to set proper register allocation requirements.
This commit is contained in:
parent
5ceb9125e1
commit
53b831540f
@ -4385,44 +4385,6 @@ LIRGenerator::visitSimdReinterpretCast(MSimdReinterpretCast* ins)
|
||||
define(new(alloc()) LSimdReinterpretCast(use), ins);
|
||||
}
|
||||
|
||||
void
|
||||
LIRGenerator::visitSimdExtractElement(MSimdExtractElement* ins)
|
||||
{
|
||||
MOZ_ASSERT(IsSimdType(ins->input()->type()));
|
||||
MOZ_ASSERT(!IsSimdType(ins->type()));
|
||||
|
||||
switch (ins->input()->type()) {
|
||||
case MIRType::Int32x4: {
|
||||
MOZ_ASSERT(ins->signedness() != SimdSign::NotApplicable);
|
||||
// Note: there could be int16x8 in the future, which doesn't use the
|
||||
// same instruction. We either need to pass the arity or create new LIns.
|
||||
LUse use = useRegisterAtStart(ins->input());
|
||||
if (ins->type() == MIRType::Double) {
|
||||
// Extract an Uint32 lane into a double.
|
||||
MOZ_ASSERT(ins->signedness() == SimdSign::Unsigned);
|
||||
define(new (alloc()) LSimdExtractElementU2D(use, temp()), ins);
|
||||
} else {
|
||||
define(new (alloc()) LSimdExtractElementI(use), ins);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case MIRType::Float32x4: {
|
||||
MOZ_ASSERT(ins->signedness() == SimdSign::NotApplicable);
|
||||
LUse use = useRegisterAtStart(ins->input());
|
||||
define(new(alloc()) LSimdExtractElementF(use), ins);
|
||||
break;
|
||||
}
|
||||
case MIRType::Bool32x4: {
|
||||
MOZ_ASSERT(ins->signedness() == SimdSign::NotApplicable);
|
||||
LUse use = useRegisterAtStart(ins->input());
|
||||
define(new(alloc()) LSimdExtractElementB(use), ins);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
MOZ_CRASH("Unknown SIMD kind when extracting element");
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
LIRGenerator::visitSimdInsertElement(MSimdInsertElement* ins)
|
||||
{
|
||||
|
@ -294,7 +294,6 @@ class LIRGenerator : public LIRGeneratorSpecific
|
||||
void visitRecompileCheck(MRecompileCheck* ins);
|
||||
void visitSimdBox(MSimdBox* ins);
|
||||
void visitSimdUnbox(MSimdUnbox* ins);
|
||||
void visitSimdExtractElement(MSimdExtractElement* ins);
|
||||
void visitSimdInsertElement(MSimdInsertElement* ins);
|
||||
void visitSimdSwizzle(MSimdSwizzle* ins);
|
||||
void visitSimdGeneralShuffle(MSimdGeneralShuffle* ins);
|
||||
|
@ -236,8 +236,8 @@ class LSimdExtractElementBase : public LInstructionHelper<1, 1, 0>
|
||||
const LAllocation* getBase() {
|
||||
return getOperand(0);
|
||||
}
|
||||
unsigned lane() const {
|
||||
return mir_->toSimdExtractElement()->lane();
|
||||
MSimdExtractElement* mir() const {
|
||||
return mir_->toSimdExtractElement();
|
||||
}
|
||||
};
|
||||
|
||||
@ -280,8 +280,8 @@ class LSimdExtractElementU2D : public LInstructionHelper<1, 1, 1>
|
||||
setOperand(0, base);
|
||||
setTemp(0, temp);
|
||||
}
|
||||
unsigned lane() const {
|
||||
return mir_->toSimdExtractElement()->lane();
|
||||
MSimdExtractElement* mir() const {
|
||||
return mir_->toSimdExtractElement();
|
||||
}
|
||||
const LDefinition* temp() {
|
||||
return getTemp(0);
|
||||
|
@ -2123,23 +2123,17 @@ class AssemblerX86Shared : public AssemblerShared
|
||||
MOZ_CRASH("unexpected operand kind");
|
||||
}
|
||||
}
|
||||
void vpextrb(unsigned lane, FloatRegister src, Register dest) {
|
||||
MOZ_ASSERT(HasSSE41());
|
||||
masm.vpextrb_irr(lane, src.encoding(), dest.encoding());
|
||||
}
|
||||
void vpextrw(unsigned lane, FloatRegister src, Register dest) {
|
||||
masm.vpextrw_irr(lane, src.encoding(), dest.encoding());
|
||||
}
|
||||
void vpextrd(unsigned lane, FloatRegister src, Register dest) {
|
||||
MOZ_ASSERT(HasSSE41());
|
||||
masm.vpextrd_irr(lane, src.encoding(), dest.encoding());
|
||||
}
|
||||
void vpextrd(unsigned lane, FloatRegister src, const Operand& dest) {
|
||||
MOZ_ASSERT(HasSSE41());
|
||||
switch (dest.kind()) {
|
||||
case Operand::REG:
|
||||
masm.vpextrd_irr(lane, src.encoding(), dest.reg());
|
||||
break;
|
||||
case Operand::MEM_REG_DISP:
|
||||
masm.vpextrd_irm(lane, src.encoding(), dest.disp(), dest.base());
|
||||
break;
|
||||
default:
|
||||
MOZ_CRASH("unexpected operand kind");
|
||||
}
|
||||
}
|
||||
void vpsrldq(Imm32 shift, FloatRegister src0, FloatRegister dest) {
|
||||
MOZ_ASSERT(HasSSE2());
|
||||
masm.vpsrldq_ir(shift.value, src0.encoding(), dest.encoding());
|
||||
|
@ -3262,21 +3262,18 @@ public:
|
||||
threeByteOpImmInt32Simd("vpinsrd", VEX_PD, OP3_PINSRD_VdqEdIb, ESCAPE_3A, lane, offset, base, src0, dst);
|
||||
}
|
||||
|
||||
void vpextrb_irr(unsigned lane, XMMRegisterID src, RegisterID dst)
|
||||
{
|
||||
MOZ_ASSERT(lane < 16);
|
||||
threeByteOpImmSimdInt32("vpextrb", VEX_PD, OP3_PEXTRB_EdVdqIb, ESCAPE_3A, lane, (XMMRegisterID)dst, (RegisterID)src);
|
||||
}
|
||||
|
||||
void vpextrd_irr(unsigned lane, XMMRegisterID src, RegisterID dst)
|
||||
{
|
||||
MOZ_ASSERT(lane < 4);
|
||||
threeByteOpImmSimdInt32("vpextrd", VEX_PD, OP3_PEXTRD_EdVdqIb, ESCAPE_3A, lane, (XMMRegisterID)dst, (RegisterID)src);
|
||||
}
|
||||
|
||||
void vpextrd_irm(unsigned lane, XMMRegisterID src, int32_t offset, RegisterID base)
|
||||
{
|
||||
MOZ_ASSERT(lane < 4);
|
||||
spew("pextrd $0x%x, %s, " MEM_ob, lane, XMMRegName(src), ADDR_ob(offset, base));
|
||||
m_formatter.prefix(PRE_SSE_66);
|
||||
m_formatter.threeByteOp(OP3_PEXTRD_EdVdqIb, ESCAPE_3A, offset, base, (RegisterID)src);
|
||||
m_formatter.immediate8u(lane);
|
||||
}
|
||||
|
||||
void vblendps_irr(unsigned imm, XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
MOZ_ASSERT(imm < 16);
|
||||
|
@ -2647,9 +2647,10 @@ CodeGeneratorX86Shared::visitSimdReinterpretCast(LSimdReinterpretCast* ins)
|
||||
}
|
||||
}
|
||||
|
||||
// Extract an integer lane from the vector register |input| and place it in |output|.
|
||||
// Extract an integer lane from the 32x4 vector register |input| and place it in
|
||||
// |output|.
|
||||
void
|
||||
CodeGeneratorX86Shared::emitSimdExtractLane(FloatRegister input, Register output, unsigned lane)
|
||||
CodeGeneratorX86Shared::emitSimdExtractLane32x4(FloatRegister input, Register output, unsigned lane)
|
||||
{
|
||||
if (lane == 0) {
|
||||
// The value we want to extract is in the low double-word
|
||||
@ -2663,15 +2664,81 @@ CodeGeneratorX86Shared::emitSimdExtractLane(FloatRegister input, Register output
|
||||
}
|
||||
}
|
||||
|
||||
// Extract an integer lane from the 16x8 vector register |input|, sign- or
|
||||
// zero-extend to 32 bits and place the result in |output|.
|
||||
void
|
||||
CodeGeneratorX86Shared::emitSimdExtractLane16x8(FloatRegister input, Register output,
|
||||
unsigned lane, SimdSign signedness)
|
||||
{
|
||||
// Unlike pextrd and pextrb, this is available in SSE2.
|
||||
masm.vpextrw(lane, input, output);
|
||||
|
||||
if (signedness == SimdSign::Signed)
|
||||
masm.movswl(output, output);
|
||||
}
|
||||
|
||||
// Extract an integer lane from the 8x16 vector register |input|, sign- or
|
||||
// zero-extend to 32 bits and place the result in |output|.
|
||||
void
|
||||
CodeGeneratorX86Shared::emitSimdExtractLane8x16(FloatRegister input, Register output,
|
||||
unsigned lane, SimdSign signedness)
|
||||
{
|
||||
if (AssemblerX86Shared::HasSSE41()) {
|
||||
masm.vpextrb(lane, input, output);
|
||||
// vpextrb clears the high bits, so no further extension required.
|
||||
if (signedness == SimdSign::Unsigned)
|
||||
signedness = SimdSign::NotApplicable;
|
||||
} else {
|
||||
// Extract the relevant 16 bits containing our lane, then shift the
|
||||
// right 8 bits into place.
|
||||
emitSimdExtractLane16x8(input, output, lane / 2, SimdSign::Unsigned);
|
||||
if (lane % 2) {
|
||||
masm.shrl(Imm32(8), output);
|
||||
// The shrl handles the zero-extension. Don't repeat it.
|
||||
if (signedness == SimdSign::Unsigned)
|
||||
signedness = SimdSign::NotApplicable;
|
||||
}
|
||||
}
|
||||
|
||||
// We have the right low 8 bits in |output|, but we may need to fix the high
|
||||
// bits.
|
||||
switch (signedness) {
|
||||
case SimdSign::Signed:
|
||||
masm.movsbl(output, output);
|
||||
break;
|
||||
case SimdSign::Unsigned:
|
||||
masm.movzbl(output, output);
|
||||
break;
|
||||
case SimdSign::NotApplicable:
|
||||
// No adjustment needed.
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
CodeGeneratorX86Shared::visitSimdExtractElementB(LSimdExtractElementB* ins)
|
||||
{
|
||||
FloatRegister input = ToFloatRegister(ins->input());
|
||||
Register output = ToRegister(ins->output());
|
||||
MSimdExtractElement* mir = ins->mir();
|
||||
unsigned length = SimdTypeToLength(mir->specialization());
|
||||
|
||||
emitSimdExtractLane(input, output, ins->lane());
|
||||
switch (length) {
|
||||
case 4:
|
||||
emitSimdExtractLane32x4(input, output, mir->lane());
|
||||
break;
|
||||
case 8:
|
||||
// Get a lane, don't bother fixing the high bits since we'll mask below.
|
||||
emitSimdExtractLane16x8(input, output, mir->lane(), SimdSign::NotApplicable);
|
||||
break;
|
||||
case 16:
|
||||
emitSimdExtractLane8x16(input, output, mir->lane(), SimdSign::NotApplicable);
|
||||
break;
|
||||
default:
|
||||
MOZ_CRASH("Unhandled SIMD length");
|
||||
}
|
||||
|
||||
// We need to generate a 0/1 value. We have 0/-1.
|
||||
// We need to generate a 0/1 value. We have 0/-1 and possibly dirty high bits.
|
||||
masm.and32(Imm32(1), output);
|
||||
}
|
||||
|
||||
@ -2680,8 +2747,22 @@ CodeGeneratorX86Shared::visitSimdExtractElementI(LSimdExtractElementI* ins)
|
||||
{
|
||||
FloatRegister input = ToFloatRegister(ins->input());
|
||||
Register output = ToRegister(ins->output());
|
||||
MSimdExtractElement* mir = ins->mir();
|
||||
unsigned length = SimdTypeToLength(mir->specialization());
|
||||
|
||||
emitSimdExtractLane(input, output, ins->lane());
|
||||
switch (length) {
|
||||
case 4:
|
||||
emitSimdExtractLane32x4(input, output, mir->lane());
|
||||
break;
|
||||
case 8:
|
||||
emitSimdExtractLane16x8(input, output, mir->lane(), mir->signedness());
|
||||
break;
|
||||
case 16:
|
||||
emitSimdExtractLane8x16(input, output, mir->lane(), mir->signedness());
|
||||
break;
|
||||
default:
|
||||
MOZ_CRASH("Unhandled SIMD length");
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
@ -2690,8 +2771,9 @@ CodeGeneratorX86Shared::visitSimdExtractElementU2D(LSimdExtractElementU2D* ins)
|
||||
FloatRegister input = ToFloatRegister(ins->input());
|
||||
FloatRegister output = ToFloatRegister(ins->output());
|
||||
Register temp = ToRegister(ins->temp());
|
||||
|
||||
emitSimdExtractLane(input, temp, ins->lane());
|
||||
MSimdExtractElement* mir = ins->mir();
|
||||
MOZ_ASSERT(mir->specialization() == MIRType::Int32x4);
|
||||
emitSimdExtractLane32x4(input, temp, mir->lane());
|
||||
masm.convertUInt32ToDouble(temp, output);
|
||||
}
|
||||
|
||||
@ -2701,7 +2783,7 @@ CodeGeneratorX86Shared::visitSimdExtractElementF(LSimdExtractElementF* ins)
|
||||
FloatRegister input = ToFloatRegister(ins->input());
|
||||
FloatRegister output = ToFloatRegister(ins->output());
|
||||
|
||||
unsigned lane = ins->lane();
|
||||
unsigned lane = ins->mir()->lane();
|
||||
if (lane == 0) {
|
||||
// The value we want to extract is in the low double-word
|
||||
if (input != output)
|
||||
|
@ -208,7 +208,11 @@ class CodeGeneratorX86Shared : public CodeGeneratorShared
|
||||
|
||||
void emitTableSwitchDispatch(MTableSwitch* mir, Register index, Register base);
|
||||
|
||||
void emitSimdExtractLane(FloatRegister input, Register output, unsigned lane);
|
||||
void emitSimdExtractLane8x16(FloatRegister input, Register output, unsigned lane,
|
||||
SimdSign signedness);
|
||||
void emitSimdExtractLane16x8(FloatRegister input, Register output, unsigned lane,
|
||||
SimdSign signedness);
|
||||
void emitSimdExtractLane32x4(FloatRegister input, Register output, unsigned lane);
|
||||
|
||||
public:
|
||||
CodeGeneratorX86Shared(MIRGenerator* gen, LIRGraph* graph, MacroAssembler* masm);
|
||||
|
@ -266,6 +266,7 @@ enum ThreeByteOpcodeID {
|
||||
OP3_ROUNDSS_VsdWsd = 0x0A,
|
||||
OP3_ROUNDSD_VsdWsd = 0x0B,
|
||||
OP3_BLENDVPS_VdqWdq = 0x14,
|
||||
OP3_PEXTRB_EdVdqIb = 0x14,
|
||||
OP3_PEXTRD_EdVdqIb = 0x16,
|
||||
OP3_BLENDPS_VpsWpsIb = 0x0C,
|
||||
OP3_PTEST_VdVd = 0x17,
|
||||
|
@ -634,6 +634,58 @@ LIRGeneratorX86Shared::lowerAtomicTypedArrayElementBinop(MAtomicTypedArrayElemen
|
||||
define(lir, ins);
|
||||
}
|
||||
|
||||
void
|
||||
LIRGeneratorX86Shared::visitSimdExtractElement(MSimdExtractElement* ins)
|
||||
{
|
||||
MOZ_ASSERT(IsSimdType(ins->input()->type()));
|
||||
MOZ_ASSERT(!IsSimdType(ins->type()));
|
||||
|
||||
switch (ins->input()->type()) {
|
||||
case MIRType::Int8x16:
|
||||
case MIRType::Int16x8:
|
||||
case MIRType::Int32x4: {
|
||||
MOZ_ASSERT(ins->signedness() != SimdSign::NotApplicable);
|
||||
LUse use = useRegisterAtStart(ins->input());
|
||||
if (ins->type() == MIRType::Double) {
|
||||
// Extract an Uint32 lane into a double.
|
||||
MOZ_ASSERT(ins->signedness() == SimdSign::Unsigned);
|
||||
define(new (alloc()) LSimdExtractElementU2D(use, temp()), ins);
|
||||
} else {
|
||||
auto* lir = new (alloc()) LSimdExtractElementI(use);
|
||||
#if defined(JS_CODEGEN_X86)
|
||||
// On x86 (32-bit), we may need to use movsbl or movzbl instructions
|
||||
// to sign or zero extend the extracted lane to 32 bits. The 8-bit
|
||||
// version of these instructions require a source register that is
|
||||
// %al, %bl, %cl, or %dl.
|
||||
// Fix it to %ebx since we can't express that constraint better.
|
||||
if (ins->input()->type() == MIRType::Int8x16) {
|
||||
defineFixed(lir, ins, LAllocation(AnyRegister(ebx)));
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
define(lir, ins);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case MIRType::Float32x4: {
|
||||
MOZ_ASSERT(ins->signedness() == SimdSign::NotApplicable);
|
||||
LUse use = useRegisterAtStart(ins->input());
|
||||
define(new(alloc()) LSimdExtractElementF(use), ins);
|
||||
break;
|
||||
}
|
||||
case MIRType::Bool8x16:
|
||||
case MIRType::Bool16x8:
|
||||
case MIRType::Bool32x4: {
|
||||
MOZ_ASSERT(ins->signedness() == SimdSign::NotApplicable);
|
||||
LUse use = useRegisterAtStart(ins->input());
|
||||
define(new(alloc()) LSimdExtractElementB(use), ins);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
MOZ_CRASH("Unknown SIMD kind when extracting element");
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
LIRGeneratorX86Shared::visitSimdBinaryArith(MSimdBinaryArith* ins)
|
||||
{
|
||||
|
@ -56,6 +56,7 @@ class LIRGeneratorX86Shared : public LIRGeneratorShared
|
||||
void lowerUrshD(MUrsh* mir);
|
||||
void lowerTruncateDToInt32(MTruncateToInt32* ins);
|
||||
void lowerTruncateFToInt32(MTruncateToInt32* ins);
|
||||
void visitSimdExtractElement(MSimdExtractElement* ins);
|
||||
void visitSimdBinaryArith(MSimdBinaryArith* ins);
|
||||
void visitSimdSelect(MSimdSelect* ins);
|
||||
void visitSimdSplat(MSimdSplat* ins);
|
||||
|
Loading…
Reference in New Issue
Block a user