Bug 1136226 - Implement MSimdExtractElement for small integer types. r=bbouvier

Move visitSimdExtractElement into x86-specific code in order to set proper
register allocation requirements.
This commit is contained in:
Jakob Olesen 2016-05-31 09:00:17 -07:00
parent 5ceb9125e1
commit 53b831540f
10 changed files with 166 additions and 74 deletions

View File

@ -4385,44 +4385,6 @@ LIRGenerator::visitSimdReinterpretCast(MSimdReinterpretCast* ins)
define(new(alloc()) LSimdReinterpretCast(use), ins);
}
void
LIRGenerator::visitSimdExtractElement(MSimdExtractElement* ins)
{
MOZ_ASSERT(IsSimdType(ins->input()->type()));
MOZ_ASSERT(!IsSimdType(ins->type()));
switch (ins->input()->type()) {
case MIRType::Int32x4: {
MOZ_ASSERT(ins->signedness() != SimdSign::NotApplicable);
// Note: there could be int16x8 in the future, which doesn't use the
// same instruction. We either need to pass the arity or create new LIns.
LUse use = useRegisterAtStart(ins->input());
if (ins->type() == MIRType::Double) {
// Extract an Uint32 lane into a double.
MOZ_ASSERT(ins->signedness() == SimdSign::Unsigned);
define(new (alloc()) LSimdExtractElementU2D(use, temp()), ins);
} else {
define(new (alloc()) LSimdExtractElementI(use), ins);
}
break;
}
case MIRType::Float32x4: {
MOZ_ASSERT(ins->signedness() == SimdSign::NotApplicable);
LUse use = useRegisterAtStart(ins->input());
define(new(alloc()) LSimdExtractElementF(use), ins);
break;
}
case MIRType::Bool32x4: {
MOZ_ASSERT(ins->signedness() == SimdSign::NotApplicable);
LUse use = useRegisterAtStart(ins->input());
define(new(alloc()) LSimdExtractElementB(use), ins);
break;
}
default:
MOZ_CRASH("Unknown SIMD kind when extracting element");
}
}
void
LIRGenerator::visitSimdInsertElement(MSimdInsertElement* ins)
{

View File

@ -294,7 +294,6 @@ class LIRGenerator : public LIRGeneratorSpecific
void visitRecompileCheck(MRecompileCheck* ins);
void visitSimdBox(MSimdBox* ins);
void visitSimdUnbox(MSimdUnbox* ins);
void visitSimdExtractElement(MSimdExtractElement* ins);
void visitSimdInsertElement(MSimdInsertElement* ins);
void visitSimdSwizzle(MSimdSwizzle* ins);
void visitSimdGeneralShuffle(MSimdGeneralShuffle* ins);

View File

@ -236,8 +236,8 @@ class LSimdExtractElementBase : public LInstructionHelper<1, 1, 0>
const LAllocation* getBase() {
return getOperand(0);
}
unsigned lane() const {
return mir_->toSimdExtractElement()->lane();
MSimdExtractElement* mir() const {
return mir_->toSimdExtractElement();
}
};
@ -280,8 +280,8 @@ class LSimdExtractElementU2D : public LInstructionHelper<1, 1, 1>
setOperand(0, base);
setTemp(0, temp);
}
unsigned lane() const {
return mir_->toSimdExtractElement()->lane();
MSimdExtractElement* mir() const {
return mir_->toSimdExtractElement();
}
const LDefinition* temp() {
return getTemp(0);

View File

@ -2123,23 +2123,17 @@ class AssemblerX86Shared : public AssemblerShared
MOZ_CRASH("unexpected operand kind");
}
}
void vpextrb(unsigned lane, FloatRegister src, Register dest) {
MOZ_ASSERT(HasSSE41());
masm.vpextrb_irr(lane, src.encoding(), dest.encoding());
}
void vpextrw(unsigned lane, FloatRegister src, Register dest) {
masm.vpextrw_irr(lane, src.encoding(), dest.encoding());
}
void vpextrd(unsigned lane, FloatRegister src, Register dest) {
MOZ_ASSERT(HasSSE41());
masm.vpextrd_irr(lane, src.encoding(), dest.encoding());
}
void vpextrd(unsigned lane, FloatRegister src, const Operand& dest) {
MOZ_ASSERT(HasSSE41());
switch (dest.kind()) {
case Operand::REG:
masm.vpextrd_irr(lane, src.encoding(), dest.reg());
break;
case Operand::MEM_REG_DISP:
masm.vpextrd_irm(lane, src.encoding(), dest.disp(), dest.base());
break;
default:
MOZ_CRASH("unexpected operand kind");
}
}
void vpsrldq(Imm32 shift, FloatRegister src0, FloatRegister dest) {
MOZ_ASSERT(HasSSE2());
masm.vpsrldq_ir(shift.value, src0.encoding(), dest.encoding());

View File

@ -3262,21 +3262,18 @@ public:
threeByteOpImmInt32Simd("vpinsrd", VEX_PD, OP3_PINSRD_VdqEdIb, ESCAPE_3A, lane, offset, base, src0, dst);
}
void vpextrb_irr(unsigned lane, XMMRegisterID src, RegisterID dst)
{
MOZ_ASSERT(lane < 16);
threeByteOpImmSimdInt32("vpextrb", VEX_PD, OP3_PEXTRB_EdVdqIb, ESCAPE_3A, lane, (XMMRegisterID)dst, (RegisterID)src);
}
void vpextrd_irr(unsigned lane, XMMRegisterID src, RegisterID dst)
{
MOZ_ASSERT(lane < 4);
threeByteOpImmSimdInt32("vpextrd", VEX_PD, OP3_PEXTRD_EdVdqIb, ESCAPE_3A, lane, (XMMRegisterID)dst, (RegisterID)src);
}
void vpextrd_irm(unsigned lane, XMMRegisterID src, int32_t offset, RegisterID base)
{
MOZ_ASSERT(lane < 4);
spew("pextrd $0x%x, %s, " MEM_ob, lane, XMMRegName(src), ADDR_ob(offset, base));
m_formatter.prefix(PRE_SSE_66);
m_formatter.threeByteOp(OP3_PEXTRD_EdVdqIb, ESCAPE_3A, offset, base, (RegisterID)src);
m_formatter.immediate8u(lane);
}
void vblendps_irr(unsigned imm, XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
{
MOZ_ASSERT(imm < 16);

View File

@ -2647,9 +2647,10 @@ CodeGeneratorX86Shared::visitSimdReinterpretCast(LSimdReinterpretCast* ins)
}
}
// Extract an integer lane from the vector register |input| and place it in |output|.
// Extract an integer lane from the 32x4 vector register |input| and place it in
// |output|.
void
CodeGeneratorX86Shared::emitSimdExtractLane(FloatRegister input, Register output, unsigned lane)
CodeGeneratorX86Shared::emitSimdExtractLane32x4(FloatRegister input, Register output, unsigned lane)
{
if (lane == 0) {
// The value we want to extract is in the low double-word
@ -2663,15 +2664,81 @@ CodeGeneratorX86Shared::emitSimdExtractLane(FloatRegister input, Register output
}
}
// Extract an integer lane from the 16x8 vector register |input|, sign- or
// zero-extend to 32 bits and place the result in |output|.
void
CodeGeneratorX86Shared::emitSimdExtractLane16x8(FloatRegister input, Register output,
unsigned lane, SimdSign signedness)
{
// Unlike pextrd and pextrb, this is available in SSE2.
masm.vpextrw(lane, input, output);
if (signedness == SimdSign::Signed)
masm.movswl(output, output);
}
// Extract an integer lane from the 8x16 vector register |input|, sign- or
// zero-extend to 32 bits and place the result in |output|.
void
CodeGeneratorX86Shared::emitSimdExtractLane8x16(FloatRegister input, Register output,
unsigned lane, SimdSign signedness)
{
if (AssemblerX86Shared::HasSSE41()) {
masm.vpextrb(lane, input, output);
// vpextrb clears the high bits, so no further extension required.
if (signedness == SimdSign::Unsigned)
signedness = SimdSign::NotApplicable;
} else {
// Extract the relevant 16 bits containing our lane, then shift the
// right 8 bits into place.
emitSimdExtractLane16x8(input, output, lane / 2, SimdSign::Unsigned);
if (lane % 2) {
masm.shrl(Imm32(8), output);
// The shrl handles the zero-extension. Don't repeat it.
if (signedness == SimdSign::Unsigned)
signedness = SimdSign::NotApplicable;
}
}
// We have the right low 8 bits in |output|, but we may need to fix the high
// bits.
switch (signedness) {
case SimdSign::Signed:
masm.movsbl(output, output);
break;
case SimdSign::Unsigned:
masm.movzbl(output, output);
break;
case SimdSign::NotApplicable:
// No adjustment needed.
break;
}
}
void
CodeGeneratorX86Shared::visitSimdExtractElementB(LSimdExtractElementB* ins)
{
FloatRegister input = ToFloatRegister(ins->input());
Register output = ToRegister(ins->output());
MSimdExtractElement* mir = ins->mir();
unsigned length = SimdTypeToLength(mir->specialization());
emitSimdExtractLane(input, output, ins->lane());
switch (length) {
case 4:
emitSimdExtractLane32x4(input, output, mir->lane());
break;
case 8:
// Get a lane, don't bother fixing the high bits since we'll mask below.
emitSimdExtractLane16x8(input, output, mir->lane(), SimdSign::NotApplicable);
break;
case 16:
emitSimdExtractLane8x16(input, output, mir->lane(), SimdSign::NotApplicable);
break;
default:
MOZ_CRASH("Unhandled SIMD length");
}
// We need to generate a 0/1 value. We have 0/-1.
// We need to generate a 0/1 value. We have 0/-1 and possibly dirty high bits.
masm.and32(Imm32(1), output);
}
@ -2680,8 +2747,22 @@ CodeGeneratorX86Shared::visitSimdExtractElementI(LSimdExtractElementI* ins)
{
FloatRegister input = ToFloatRegister(ins->input());
Register output = ToRegister(ins->output());
MSimdExtractElement* mir = ins->mir();
unsigned length = SimdTypeToLength(mir->specialization());
emitSimdExtractLane(input, output, ins->lane());
switch (length) {
case 4:
emitSimdExtractLane32x4(input, output, mir->lane());
break;
case 8:
emitSimdExtractLane16x8(input, output, mir->lane(), mir->signedness());
break;
case 16:
emitSimdExtractLane8x16(input, output, mir->lane(), mir->signedness());
break;
default:
MOZ_CRASH("Unhandled SIMD length");
}
}
void
@ -2690,8 +2771,9 @@ CodeGeneratorX86Shared::visitSimdExtractElementU2D(LSimdExtractElementU2D* ins)
FloatRegister input = ToFloatRegister(ins->input());
FloatRegister output = ToFloatRegister(ins->output());
Register temp = ToRegister(ins->temp());
emitSimdExtractLane(input, temp, ins->lane());
MSimdExtractElement* mir = ins->mir();
MOZ_ASSERT(mir->specialization() == MIRType::Int32x4);
emitSimdExtractLane32x4(input, temp, mir->lane());
masm.convertUInt32ToDouble(temp, output);
}
@ -2701,7 +2783,7 @@ CodeGeneratorX86Shared::visitSimdExtractElementF(LSimdExtractElementF* ins)
FloatRegister input = ToFloatRegister(ins->input());
FloatRegister output = ToFloatRegister(ins->output());
unsigned lane = ins->lane();
unsigned lane = ins->mir()->lane();
if (lane == 0) {
// The value we want to extract is in the low double-word
if (input != output)

View File

@ -208,7 +208,11 @@ class CodeGeneratorX86Shared : public CodeGeneratorShared
void emitTableSwitchDispatch(MTableSwitch* mir, Register index, Register base);
void emitSimdExtractLane(FloatRegister input, Register output, unsigned lane);
void emitSimdExtractLane8x16(FloatRegister input, Register output, unsigned lane,
SimdSign signedness);
void emitSimdExtractLane16x8(FloatRegister input, Register output, unsigned lane,
SimdSign signedness);
void emitSimdExtractLane32x4(FloatRegister input, Register output, unsigned lane);
public:
CodeGeneratorX86Shared(MIRGenerator* gen, LIRGraph* graph, MacroAssembler* masm);

View File

@ -266,6 +266,7 @@ enum ThreeByteOpcodeID {
OP3_ROUNDSS_VsdWsd = 0x0A,
OP3_ROUNDSD_VsdWsd = 0x0B,
OP3_BLENDVPS_VdqWdq = 0x14,
OP3_PEXTRB_EdVdqIb = 0x14,
OP3_PEXTRD_EdVdqIb = 0x16,
OP3_BLENDPS_VpsWpsIb = 0x0C,
OP3_PTEST_VdVd = 0x17,

View File

@ -634,6 +634,58 @@ LIRGeneratorX86Shared::lowerAtomicTypedArrayElementBinop(MAtomicTypedArrayElemen
define(lir, ins);
}
void
LIRGeneratorX86Shared::visitSimdExtractElement(MSimdExtractElement* ins)
{
MOZ_ASSERT(IsSimdType(ins->input()->type()));
MOZ_ASSERT(!IsSimdType(ins->type()));
switch (ins->input()->type()) {
case MIRType::Int8x16:
case MIRType::Int16x8:
case MIRType::Int32x4: {
MOZ_ASSERT(ins->signedness() != SimdSign::NotApplicable);
LUse use = useRegisterAtStart(ins->input());
if (ins->type() == MIRType::Double) {
// Extract an Uint32 lane into a double.
MOZ_ASSERT(ins->signedness() == SimdSign::Unsigned);
define(new (alloc()) LSimdExtractElementU2D(use, temp()), ins);
} else {
auto* lir = new (alloc()) LSimdExtractElementI(use);
#if defined(JS_CODEGEN_X86)
// On x86 (32-bit), we may need to use movsbl or movzbl instructions
// to sign or zero extend the extracted lane to 32 bits. The 8-bit
// version of these instructions require a source register that is
// %al, %bl, %cl, or %dl.
// Fix it to %ebx since we can't express that constraint better.
if (ins->input()->type() == MIRType::Int8x16) {
defineFixed(lir, ins, LAllocation(AnyRegister(ebx)));
return;
}
#endif
define(lir, ins);
}
break;
}
case MIRType::Float32x4: {
MOZ_ASSERT(ins->signedness() == SimdSign::NotApplicable);
LUse use = useRegisterAtStart(ins->input());
define(new(alloc()) LSimdExtractElementF(use), ins);
break;
}
case MIRType::Bool8x16:
case MIRType::Bool16x8:
case MIRType::Bool32x4: {
MOZ_ASSERT(ins->signedness() == SimdSign::NotApplicable);
LUse use = useRegisterAtStart(ins->input());
define(new(alloc()) LSimdExtractElementB(use), ins);
break;
}
default:
MOZ_CRASH("Unknown SIMD kind when extracting element");
}
}
void
LIRGeneratorX86Shared::visitSimdBinaryArith(MSimdBinaryArith* ins)
{

View File

@ -56,6 +56,7 @@ class LIRGeneratorX86Shared : public LIRGeneratorShared
void lowerUrshD(MUrsh* mir);
void lowerTruncateDToInt32(MTruncateToInt32* ins);
void lowerTruncateFToInt32(MTruncateToInt32* ins);
void visitSimdExtractElement(MSimdExtractElement* ins);
void visitSimdBinaryArith(MSimdBinaryArith* ins);
void visitSimdSelect(MSimdSelect* ins);
void visitSimdSplat(MSimdSplat* ins);