From 607774c960dbaf32773e2ddae72190f6e48d9d60 Mon Sep 17 00:00:00 2001 From: Jessica Paquette Date: Mon, 11 Mar 2019 22:18:01 +0000 Subject: [PATCH] Recommit "[GlobalISel][AArch64] Add selection support for G_EXTRACT_VECTOR_ELT" After r355865, we should be able to safely select G_EXTRACT_VECTOR_ELT without running into any problematic intrinsics. Also add a fix for lane copies, which don't support index 0. llvm-svn: 355871 --- .../AArch64/AArch64InstructionSelector.cpp | 153 ++++++++++++++++-- .../Target/AArch64/AArch64LegalizerInfo.cpp | 3 +- .../AArch64/AArch64RegisterBankInfo.cpp | 8 + .../GlobalISel/regbank-extract-vector-elt.mir | 103 ++++++++++++ .../GlobalISel/select-extract-vector-elt.mir | 117 ++++++++++++++ 5 files changed, 366 insertions(+), 18 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/regbank-extract-vector-elt.mir create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/select-extract-vector-elt.mir diff --git a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp index 41f4eb9563af..869cc143496c 100644 --- a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -78,6 +78,7 @@ private: void collectShuffleMaskIndices(MachineInstr &I, MachineRegisterInfo &MRI, SmallVectorImpl &Idxs) const; bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI) const; + bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI) const; unsigned emitConstantPoolEntry(Constant *CPVal, MachineFunction &MF) const; MachineInstr *emitLoadFromConstantPool(Constant *CPVal, @@ -1709,6 +1710,8 @@ bool AArch64InstructionSelector::select(MachineInstr &I, return selectUnmergeValues(I, MRI); case TargetOpcode::G_SHUFFLE_VECTOR: return selectShuffleVector(I, MRI); + case TargetOpcode::G_EXTRACT_VECTOR_ELT: + return selectExtractElt(I, MRI); } return false; @@ -1787,6 +1790,138 @@ bool AArch64InstructionSelector::selectMergeValues( return true; } +static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg, + const unsigned EltSize) { + // Choose a lane copy opcode and subregister based off of the size of the + // vector's elements. + switch (EltSize) { + case 16: + CopyOpc = AArch64::CPYi16; + ExtractSubReg = AArch64::hsub; + break; + case 32: + CopyOpc = AArch64::CPYi32; + ExtractSubReg = AArch64::ssub; + break; + case 64: + CopyOpc = AArch64::CPYi64; + ExtractSubReg = AArch64::dsub; + break; + default: + // Unknown size, bail out. + LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n"); + return false; + } + return true; +} + +bool AArch64InstructionSelector::selectExtractElt( + MachineInstr &I, MachineRegisterInfo &MRI) const { + assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT && + "unexpected opcode!"); + unsigned DstReg = I.getOperand(0).getReg(); + const LLT NarrowTy = MRI.getType(DstReg); + const unsigned SrcReg = I.getOperand(1).getReg(); + const LLT WideTy = MRI.getType(SrcReg); + + assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() && + "source register size too small!"); + assert(NarrowTy.isScalar() && "cannot extract vector into vector!"); + + // Need the lane index to determine the correct copy opcode. + MachineOperand &LaneIdxOp = I.getOperand(2); + assert(LaneIdxOp.isReg() && "Lane index operand was not a register?"); + + if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) { + LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n"); + return false; + } + + // Find the instruction that defines the constant to extract from. There could + // be any number of copies between the instruction and the definition of the + // index. Skip them. + MachineInstr *LaneDefInst = nullptr; + for (LaneDefInst = MRI.getVRegDef(LaneIdxOp.getReg()); + LaneDefInst && LaneDefInst->isCopy(); + LaneDefInst = MRI.getVRegDef(LaneDefInst->getOperand(1).getReg())) { + } + + // Did we find a def in the first place? If not, bail. + if (!LaneDefInst) { + LLVM_DEBUG(dbgs() << "Did not find VReg definition for " << LaneIdxOp + << "\n"); + return false; + } + + // TODO: Handle extracts that don't use G_CONSTANT. + if (LaneDefInst->getOpcode() != TargetOpcode::G_CONSTANT) { + LLVM_DEBUG(dbgs() << "VRegs defined by anything other than G_CONSTANT " + "currently unsupported.\n"); + return false; + } + + unsigned LaneIdx = LaneDefInst->getOperand(1).getCImm()->getLimitedValue(); + unsigned CopyOpc = 0; + unsigned ExtractSubReg = 0; + if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits())) { + LLVM_DEBUG( + dbgs() << "Couldn't determine lane copy opcode for instruction.\n"); + return false; + } + + const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); + const TargetRegisterClass *DstRC = + getRegClassForTypeOnBank(NarrowTy, DstRB, RBI, true); + if (!DstRC) { + LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n"); + return false; + } + + const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI); + const TargetRegisterClass *SrcRC = + getRegClassForTypeOnBank(WideTy, SrcRB, RBI, true); + if (!SrcRC) { + LLVM_DEBUG(dbgs() << "Could not determine source register class.\n"); + return false; + } + + // The register that we're going to copy into. + unsigned InsertReg = SrcReg; + MachineIRBuilder MIRBuilder(I); + + // If the lane index is 0, we just use a subregister COPY. + if (LaneIdx == 0) { + unsigned CopyTo = I.getOperand(0).getReg(); + BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(TargetOpcode::COPY), + CopyTo) + .addUse(SrcReg, 0, ExtractSubReg); + RBI.constrainGenericRegister(CopyTo, *DstRC, MRI); + I.eraseFromParent(); + return true; + } + + // Lane copies require 128-bit wide registers. If we're dealing with an + // unpacked vector, then we need to move up to that width. Insert an implicit + // def and a subregister insert to get us there. + if (WideTy.getSizeInBits() != 128) { + MachineInstr *ScalarToVector = emitScalarToVector( + WideTy.getSizeInBits(), &AArch64::FPR128RegClass, SrcReg, MIRBuilder); + if (!ScalarToVector) + return false; + InsertReg = ScalarToVector->getOperand(0).getReg(); + } + + MachineInstr *LaneCopyMI = + MIRBuilder.buildInstr(CopyOpc, {DstReg}, {InsertReg}).addImm(LaneIdx); + constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI); + + // Make sure that we actually constrain the initial copy. + RBI.constrainGenericRegister(DstReg, *DstRC, MRI); + + I.eraseFromParent(); + return true; +} + bool AArch64InstructionSelector::selectUnmergeValues( MachineInstr &I, MachineRegisterInfo &MRI) const { assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && @@ -1823,24 +1958,8 @@ bool AArch64InstructionSelector::selectUnmergeValues( // vector's elements. unsigned CopyOpc = 0; unsigned ExtractSubReg = 0; - switch (NarrowTy.getSizeInBits()) { - case 16: - CopyOpc = AArch64::CPYi16; - ExtractSubReg = AArch64::hsub; - break; - case 32: - CopyOpc = AArch64::CPYi32; - ExtractSubReg = AArch64::ssub; - break; - case 64: - CopyOpc = AArch64::CPYi64; - ExtractSubReg = AArch64::dsub; - break; - default: - // Unknown size, bail out. - LLVM_DEBUG(dbgs() << "NarrowTy had unsupported size.\n"); + if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits())) return false; - } // Set up for the lane copies. MachineBasicBlock &MBB = *I.getParent(); diff --git a/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp index c473dc490c53..4a2f28117ec6 100644 --- a/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp @@ -446,7 +446,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) { .minScalar(2, s64) .legalIf([=](const LegalityQuery &Query) { const LLT &VecTy = Query.Types[1]; - return VecTy == v4s32 || VecTy == v2s64; + return VecTy == v2s16 || VecTy == v4s16 || VecTy == v4s32 || + VecTy == v2s64 || VecTy == v2s32; }); getActionDefinitionsBuilder(G_BUILD_VECTOR) diff --git a/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp index 08cf6859f05d..e4aebf4dbfe2 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp @@ -689,6 +689,14 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { } break; } + case TargetOpcode::G_EXTRACT_VECTOR_ELT: + // Destination and source need to be FPRs. + OpRegBankIdx[0] = PMI_FirstFPR; + OpRegBankIdx[1] = PMI_FirstFPR; + + // Index needs to be a GPR. + OpRegBankIdx[2] = PMI_FirstGPR; + break; case TargetOpcode::G_BUILD_VECTOR: // If the first source operand belongs to a FPR register bank, then make diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-extract-vector-elt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-extract-vector-elt.mir new file mode 100644 index 000000000000..496f19edb8a7 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-extract-vector-elt.mir @@ -0,0 +1,103 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64-unknown-unknown -verify-machineinstrs -O0 -run-pass=regbankselect %s -o - | FileCheck %s + +name: v2s32_fpr +alignment: 2 +legalized: true +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.1.entry: + liveins: $d0 + + %0:_(<2 x s32>) = COPY $d0 + %2:_(s64) = G_CONSTANT i64 1 + %1:_(s32) = G_EXTRACT_VECTOR_ELT %0(<2 x s32>), %2(s64) + $s0 = COPY %1(s32) + RET_ReallyLR implicit $s0 + +... +--- +name: v4s32_gpr +alignment: 2 +legalized: true +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.1.entry: + liveins: $q0 + + ; CHECK-LABEL: name: v4s32_gpr + ; CHECK: liveins: $q0 + ; CHECK: [[COPY:%[0-9]+]]:fpr(<4 x s32>) = COPY $q0 + ; CHECK: [[C:%[0-9]+]]:gpr(s64) = G_CONSTANT i64 0 + ; CHECK: [[EVEC:%[0-9]+]]:fpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C]](s64) + ; CHECK: $s0 = COPY [[EVEC]](s32) + ; CHECK: RET_ReallyLR implicit $s0 + %0:_(<4 x s32>) = COPY $q0 + %2:_(s64) = G_CONSTANT i64 0 + %1:_(s32) = G_EXTRACT_VECTOR_ELT %0(<4 x s32>), %2(s64) + $s0 = COPY %1(s32) + RET_ReallyLR implicit $s0 + +... +--- +name: v2s64_fpr +alignment: 2 +legalized: true +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.1.entry: + liveins: $q0 + + ; CHECK-LABEL: name: v2s64_fpr + ; CHECK: liveins: $q0 + ; CHECK: [[COPY:%[0-9]+]]:fpr(<2 x s64>) = COPY $q0 + ; CHECK: [[C:%[0-9]+]]:gpr(s64) = G_CONSTANT i64 2 + ; CHECK: [[EVEC:%[0-9]+]]:fpr(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s64>), [[C]](s64) + ; CHECK: $d0 = COPY [[EVEC]](s64) + ; CHECK: RET_ReallyLR implicit $d0 + %0:_(<2 x s64>) = COPY $q0 + %2:_(s64) = G_CONSTANT i64 2 + %1:_(s64) = G_EXTRACT_VECTOR_ELT %0(<2 x s64>), %2(s64) + $d0 = COPY %1(s64) + RET_ReallyLR implicit $d0 + +... +--- +name: v4s16_fpr +alignment: 2 +legalized: true +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.1.entry: + liveins: $d0 + + ; CHECK-LABEL: name: v4s16_fpr + ; CHECK: liveins: $d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr(<4 x s16>) = COPY $d0 + ; CHECK: [[C:%[0-9]+]]:gpr(s64) = G_CONSTANT i64 1 + ; CHECK: [[EVEC:%[0-9]+]]:fpr(s16) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s16>), [[C]](s64) + ; CHECK: $h0 = COPY [[EVEC]](s16) + ; CHECK: RET_ReallyLR implicit $h0 + %0:_(<4 x s16>) = COPY $d0 + %2:_(s64) = G_CONSTANT i64 1 + %1:_(s16) = G_EXTRACT_VECTOR_ELT %0(<4 x s16>), %2(s64) + $h0 = COPY %1(s16) + RET_ReallyLR implicit $h0 + +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-extract-vector-elt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-extract-vector-elt.mir new file mode 100644 index 000000000000..4a25670bc122 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-extract-vector-elt.mir @@ -0,0 +1,117 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64-unknown-unknown -verify-machineinstrs -O0 -run-pass=instruction-select %s -o - | FileCheck %s +... +--- +name: v2s32_fpr +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: gpr } + - { id: 3, class: fpr } +body: | + bb.0: + liveins: $d0 + + ; CHECK-LABEL: name: v2s32_fpr + ; CHECK: liveins: $d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.dsub + ; CHECK: [[CPYi32_:%[0-9]+]]:fpr32 = CPYi32 [[INSERT_SUBREG]], 1 + ; CHECK: $s0 = COPY [[CPYi32_]] + ; CHECK: RET_ReallyLR implicit $s0 + %0:fpr(<2 x s32>) = COPY $d0 + %2:gpr(s64) = G_CONSTANT i64 1 + %3:fpr(s64) = COPY %2(s64) + %1:fpr(s32) = G_EXTRACT_VECTOR_ELT %0(<2 x s32>), %3(s64) + $s0 = COPY %1(s32) + RET_ReallyLR implicit $s0 + +... +--- +name: v2s32_fpr_idx0 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0 + ; CHECK-LABEL: name: v2s32_fpr_idx0 + ; CHECK: liveins: $d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY [[COPY]].ssub + ; CHECK: $s0 = COPY [[COPY1]] + ; CHECK: RET_ReallyLR implicit $s0 + %0:fpr(<2 x s32>) = COPY $d0 + %2:gpr(s64) = G_CONSTANT i64 0 + %3:fpr(s64) = COPY %2(s64) + %1:fpr(s32) = G_EXTRACT_VECTOR_ELT %0(<2 x s32>), %3(s64) + $s0 = COPY %1(s32) + RET_ReallyLR implicit $s0 + +... +--- +name: v2s64_fpr +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: gpr } + - { id: 3, class: fpr } +body: | + bb.0: + liveins: $q0 + + ; CHECK-LABEL: name: v2s64_fpr + ; CHECK: liveins: $q0 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[CPYi64_:%[0-9]+]]:fpr64 = CPYi64 [[COPY]], 2 + ; CHECK: $d0 = COPY [[CPYi64_]] + ; CHECK: RET_ReallyLR implicit $d0 + %0:fpr(<2 x s64>) = COPY $q0 + %2:gpr(s64) = G_CONSTANT i64 2 + %3:fpr(s64) = COPY %2(s64) + %1:fpr(s64) = G_EXTRACT_VECTOR_ELT %0(<2 x s64>), %3(s64) + $d0 = COPY %1(s64) + RET_ReallyLR implicit $d0 + +... +--- +name: v4s16_fpr +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: gpr } + - { id: 3, class: fpr } +body: | + bb.0: + liveins: $d0 + + ; CHECK-LABEL: name: v4s16_fpr + ; CHECK: liveins: $d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.dsub + ; CHECK: [[CPYi16_:%[0-9]+]]:fpr16 = CPYi16 [[INSERT_SUBREG]], 1 + ; CHECK: $h0 = COPY [[CPYi16_]] + ; CHECK: RET_ReallyLR implicit $h0 + %0:fpr(<4 x s16>) = COPY $d0 + %2:gpr(s64) = G_CONSTANT i64 1 + %3:fpr(s64) = COPY %2(s64) + %1:fpr(s16) = G_EXTRACT_VECTOR_ELT %0(<4 x s16>), %3(s64) + $h0 = COPY %1(s16) + RET_ReallyLR implicit $h0 + +...