From 36d3cac4d5cbd02f466436d9bc1fabb7646b6078 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Fri, 19 Aug 2016 18:32:14 +0000 Subject: [PATCH] GlobalISel: improve representation of G_SEQUENCE and G_EXTRACT First, make sure all types involved are represented, rather than being implicit from the register width. Second, canonicalize all types to scalar. These operations just act in bits and don't care about vectors. Also standardize spelling of Indices in the MachineIRBuilder (NFC here). llvm-svn: 279294 --- .../CodeGen/GlobalISel/MachineIRBuilder.h | 34 ++++++++++------- lib/CodeGen/GlobalISel/IRTranslator.cpp | 8 ++-- lib/CodeGen/GlobalISel/MachineIRBuilder.cpp | 37 +++++++++++++------ .../GlobalISel/MachineLegalizeHelper.cpp | 17 ++++++--- .../AArch64/GlobalISel/arm64-irtranslator.ll | 16 ++++---- .../AArch64/GlobalISel/legalize-add.mir | 24 ++++++------ 6 files changed, 83 insertions(+), 53 deletions(-) diff --git a/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h index 30c4967d6e8..1b43d92489b 100644 --- a/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ b/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -216,48 +216,56 @@ public: MachineInstrBuilder buildStore(LLT VTy, LLT PTy, unsigned Val, unsigned Addr, MachineMemOperand &MMO); - /// Build and insert `Res0, ... = G_EXTRACT Ty Src, Idx0, ...`. + /// Build and insert `Res0, ... = G_EXTRACT { ResTys, SrcTy } Src, Idx0, + /// ...`. /// - /// If \p Ty has size N bits, G_EXTRACT sets \p Res[0] to bits `[Idxs[0], + /// If \p SrcTy has size N bits, G_EXTRACT sets \p Res[0] to bits `[Idxs[0], /// Idxs[0] + N)` of \p Src and similarly for subsequent bit-indexes. /// /// \pre setBasicBlock or setMI must have been called. /// /// \return a MachineInstrBuilder for the newly created instruction. - MachineInstrBuilder buildExtract(LLT Ty, ArrayRef Results, - unsigned Src, ArrayRef Indexes); + MachineInstrBuilder buildExtract(ArrayRef ResTys, + ArrayRef Results, + ArrayRef Indices, LLT SrcTy, + unsigned Src); - /// Build and insert \p Res = G_SEQUENCE \p Ty \p Op0, \p Idx0... + /// Build and insert \p Res = G_SEQUENCE \p { \pResTy, \p Op0Ty, ... } + /// \p Op0, \p Idx0... /// /// G_SEQUENCE inserts each element of Ops into an IMPLICIT_DEF register, - /// where each entry starts at the bit-index specified by \p Indexes. + /// where each entry starts at the bit-index specified by \p Indices. /// /// \pre setBasicBlock or setMI must have been called. /// \pre The final element of the sequence must not extend past the end of the /// destination register. /// \pre The bits defined by each Op (derived from index and scalar size) must /// not overlap. + /// \pre Each source operand must have a /// /// \return a MachineInstrBuilder for the newly created instruction. - MachineInstrBuilder buildSequence(LLT Ty, unsigned Res, + MachineInstrBuilder buildSequence(LLT ResTy, unsigned Res, + ArrayRef OpTys, ArrayRef Ops, - ArrayRef Indexes); + ArrayRef Indices); - void addUsesWithIndexes(MachineInstrBuilder MIB) {} + void addUsesWithIndices(MachineInstrBuilder MIB) {} template - void addUsesWithIndexes(MachineInstrBuilder MIB, unsigned Reg, + void addUsesWithIndices(MachineInstrBuilder MIB, LLT Ty, unsigned Reg, unsigned BitIndex, ArgTys... Args) { MIB.addUse(Reg).addImm(BitIndex); - addUsesWithIndexes(MIB, Args...); + MIB->setType(Ty, MIB->getNumTypes()); + + addUsesWithIndices(MIB, Args...); } template - MachineInstrBuilder buildSequence(LLT Ty, unsigned Res, unsigned Op, + MachineInstrBuilder buildSequence(LLT Ty, unsigned Res, LLT OpTy, unsigned Op, unsigned Index, ArgTys... Args) { MachineInstrBuilder MIB = buildInstr(TargetOpcode::G_SEQUENCE, Ty).addDef(Res); - addUsesWithIndexes(MIB, Op, Index, Args...); + addUsesWithIndices(MIB, OpTy, Op, Index, Args...); return MIB; } diff --git a/lib/CodeGen/GlobalISel/IRTranslator.cpp b/lib/CodeGen/GlobalISel/IRTranslator.cpp index 8703915f919..545b85f1c21 100644 --- a/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -197,8 +197,8 @@ bool IRTranslator::translateExtractValue(const User &U) { uint64_t Offset = 8 * DL->getIndexedOffsetInType(Src->getType(), Indices); unsigned Res = getOrCreateVReg(EVI); - MIRBuilder.buildExtract(LLT{*EVI.getType(), DL}, Res, getOrCreateVReg(*Src), - Offset); + MIRBuilder.buildExtract(LLT{*EVI.getType(), DL}, Res, Offset, + LLT{*Src->getType(), DL}, getOrCreateVReg(*Src)); return true; } @@ -255,8 +255,8 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, MIB.addUse(Zero); } - MIRBuilder.buildSequence(LLT{*CI.getType(), DL}, getOrCreateVReg(CI), Res, 0, - Overflow, Width); + MIRBuilder.buildSequence(LLT{*CI.getType(), DL}, getOrCreateVReg(CI), Ty, Res, + 0, s1, Overflow, Width); return true; } diff --git a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index 0ee70999b08..bd66a28bdc2 100644 --- a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -141,33 +141,48 @@ MachineInstrBuilder MachineIRBuilder::buildAnyExtend(LLT Ty, unsigned Res, return buildInstr(TargetOpcode::G_ANYEXTEND, Ty).addDef(Res).addUse(Op); } -MachineInstrBuilder -MachineIRBuilder::buildExtract(LLT Ty, ArrayRef Results, unsigned Src, - ArrayRef Indexes) { - assert(Results.size() == Indexes.size() && "inconsistent number of regs"); +MachineInstrBuilder MachineIRBuilder::buildExtract(ArrayRef ResTys, + ArrayRef Results, + ArrayRef Indices, + LLT SrcTy, unsigned Src) { + assert(ResTys.size() == Results.size() && Results.size() == Indices.size() && + "inconsistent number of regs"); + assert(!Results.empty() && "invalid trivial extract"); + + auto MIB = BuildMI(getMF(), DL, getTII().get(TargetOpcode::G_EXTRACT)); + for (unsigned i = 0; i < ResTys.size(); ++i) + MIB->setType(LLT::scalar(ResTys[i].getSizeInBits()), i); + MIB->setType(LLT::scalar(SrcTy.getSizeInBits()), ResTys.size()); - MachineInstrBuilder MIB = buildInstr(TargetOpcode::G_EXTRACT, Ty); for (auto Res : Results) MIB.addDef(Res); MIB.addUse(Src); - for (auto Idx : Indexes) + for (auto Idx : Indices) MIB.addImm(Idx); + + getMBB().insert(getInsertPt(), MIB); + return MIB; } MachineInstrBuilder -MachineIRBuilder::buildSequence(LLT Ty, unsigned Res, +MachineIRBuilder::buildSequence(LLT ResTy, unsigned Res, + ArrayRef OpTys, ArrayRef Ops, - ArrayRef Indexes) { - assert(Ops.size() == Indexes.size() && "incompatible args"); + ArrayRef Indices) { + assert(OpTys.size() == Ops.size() && Ops.size() == Indices.size() && + "incompatible args"); + assert(!Ops.empty() && "invalid trivial sequence"); - MachineInstrBuilder MIB = buildInstr(TargetOpcode::G_SEQUENCE, Ty); + MachineInstrBuilder MIB = + buildInstr(TargetOpcode::G_SEQUENCE, LLT::scalar(ResTy.getSizeInBits())); MIB.addDef(Res); for (unsigned i = 0; i < Ops.size(); ++i) { MIB.addUse(Ops[i]); - MIB.addImm(Indexes[i]); + MIB.addImm(Indices[i]); + MIB->setType(LLT::scalar(OpTys[i].getSizeInBits()), MIB->getNumTypes()); } return MIB; } diff --git a/lib/CodeGen/GlobalISel/MachineLegalizeHelper.cpp b/lib/CodeGen/GlobalISel/MachineLegalizeHelper.cpp index 6d2d1057a6b..29fdee8633f 100644 --- a/lib/CodeGen/GlobalISel/MachineLegalizeHelper.cpp +++ b/lib/CodeGen/GlobalISel/MachineLegalizeHelper.cpp @@ -52,11 +52,14 @@ void MachineLegalizeHelper::extractParts(unsigned Reg, LLT Ty, int NumParts, SmallVectorImpl &VRegs) { unsigned Size = Ty.getSizeInBits(); SmallVector Indexes; + SmallVector ResTys; for (int i = 0; i < NumParts; ++i) { VRegs.push_back(MRI.createGenericVirtualRegister(Size)); Indexes.push_back(i * Size); + ResTys.push_back(Ty); } - MIRBuilder.buildExtract(Ty, VRegs, Reg, Indexes); + MIRBuilder.buildExtract(ResTys, VRegs, Indexes, + LLT::scalar(Ty.getSizeInBits() * NumParts), Reg); } MachineLegalizeHelper::LegalizeResult @@ -78,6 +81,7 @@ MachineLegalizeHelper::narrowScalar(MachineInstr &MI, LLT NarrowTy) { unsigned CarryIn = MRI.createGenericVirtualRegister(1); MIRBuilder.buildConstant(LLT::scalar(1), CarryIn, 0); + SmallVector DstTys; for (int i = 0; i < NumParts; ++i) { unsigned DstReg = MRI.createGenericVirtualRegister(NarrowSize); unsigned CarryOut = MRI.createGenericVirtualRegister(1); @@ -85,12 +89,13 @@ MachineLegalizeHelper::narrowScalar(MachineInstr &MI, LLT NarrowTy) { MIRBuilder.buildUAdde(NarrowTy, DstReg, CarryOut, Src1Regs[i], Src2Regs[i], CarryIn); + DstTys.push_back(NarrowTy); DstRegs.push_back(DstReg); Indexes.push_back(i * NarrowSize); CarryIn = CarryOut; } - MIRBuilder.buildSequence(MI.getType(), MI.getOperand(0).getReg(), DstRegs, - Indexes); + MIRBuilder.buildSequence(MI.getType(), MI.getOperand(0).getReg(), DstTys, + DstRegs, Indexes); MI.eraseFromParent(); return Legalized; } @@ -146,15 +151,17 @@ MachineLegalizeHelper::fewerElementsVector(MachineInstr &MI, LLT NarrowTy) { extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs); extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs); + SmallVector DstTys; for (int i = 0; i < NumParts; ++i) { unsigned DstReg = MRI.createGenericVirtualRegister(NarrowSize); MIRBuilder.buildAdd(NarrowTy, DstReg, Src1Regs[i], Src2Regs[i]); + DstTys.push_back(NarrowTy); DstRegs.push_back(DstReg); Indexes.push_back(i * NarrowSize); } - MIRBuilder.buildSequence(MI.getType(), MI.getOperand(0).getReg(), DstRegs, - Indexes); + MIRBuilder.buildSequence(MI.getType(), MI.getOperand(0).getReg(), DstTys, + DstRegs, Indexes); MI.eraseFromParent(); return Legalized; } diff --git a/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll b/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll index a336df92af3..d618d7be0ca 100644 --- a/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll +++ b/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll @@ -602,7 +602,7 @@ define float @test_frem(float %arg1, float %arg2) { ; CHECK: [[RHS:%[0-9]+]](32) = COPY %w1 ; CHECK: [[ADDR:%[0-9]+]](64) = COPY %x2 ; CHECK: [[VAL:%[0-9]+]](32), [[OVERFLOW:%[0-9]+]](1) = G_SADDO { s32, s1 } [[LHS]], [[RHS]] -; CHECK: [[RES:%[0-9]+]](64) = G_SEQUENCE s64 [[VAL]], 0, [[OVERFLOW]], 32 +; CHECK: [[RES:%[0-9]+]](64) = G_SEQUENCE { s64, s32, s1 } [[VAL]], 0, [[OVERFLOW]], 32 ; CHECK: G_STORE { s64, p0 } [[RES]], [[ADDR]] declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) define void @test_sadd_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %addr) { @@ -617,7 +617,7 @@ define void @test_sadd_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %addr) { ; CHECK: [[ADDR:%[0-9]+]](64) = COPY %x2 ; CHECK: [[ZERO:%[0-9]+]](1) = G_CONSTANT s1 0 ; CHECK: [[VAL:%[0-9]+]](32), [[OVERFLOW:%[0-9]+]](1) = G_UADDE { s32, s1 } [[LHS]], [[RHS]], [[ZERO]] -; CHECK: [[RES:%[0-9]+]](64) = G_SEQUENCE s64 [[VAL]], 0, [[OVERFLOW]], 32 +; CHECK: [[RES:%[0-9]+]](64) = G_SEQUENCE { s64, s32, s1 } [[VAL]], 0, [[OVERFLOW]], 32 ; CHECK: G_STORE { s64, p0 } [[RES]], [[ADDR]] declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) define void @test_uadd_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %addr) { @@ -631,7 +631,7 @@ define void @test_uadd_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %addr) { ; CHECK: [[RHS:%[0-9]+]](32) = COPY %w1 ; CHECK: [[SUBR:%[0-9]+]](64) = COPY %x2 ; CHECK: [[VAL:%[0-9]+]](32), [[OVERFLOW:%[0-9]+]](1) = G_SSUBO { s32, s1 } [[LHS]], [[RHS]] -; CHECK: [[RES:%[0-9]+]](64) = G_SEQUENCE s64 [[VAL]], 0, [[OVERFLOW]], 32 +; CHECK: [[RES:%[0-9]+]](64) = G_SEQUENCE { s64, s32, s1 } [[VAL]], 0, [[OVERFLOW]], 32 ; CHECK: G_STORE { s64, p0 } [[RES]], [[SUBR]] declare { i32, i1 } @llvm.ssub.with.overflow.i32(i32, i32) define void @test_ssub_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %subr) { @@ -646,7 +646,7 @@ define void @test_ssub_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %subr) { ; CHECK: [[SUBR:%[0-9]+]](64) = COPY %x2 ; CHECK: [[ZERO:%[0-9]+]](1) = G_CONSTANT s1 0 ; CHECK: [[VAL:%[0-9]+]](32), [[OVERFLOW:%[0-9]+]](1) = G_USUBE { s32, s1 } [[LHS]], [[RHS]], [[ZERO]] -; CHECK: [[RES:%[0-9]+]](64) = G_SEQUENCE s64 [[VAL]], 0, [[OVERFLOW]], 32 +; CHECK: [[RES:%[0-9]+]](64) = G_SEQUENCE { s64, s32, s1 } [[VAL]], 0, [[OVERFLOW]], 32 ; CHECK: G_STORE { s64, p0 } [[RES]], [[SUBR]] declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) define void @test_usub_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %subr) { @@ -660,7 +660,7 @@ define void @test_usub_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %subr) { ; CHECK: [[RHS:%[0-9]+]](32) = COPY %w1 ; CHECK: [[ADDR:%[0-9]+]](64) = COPY %x2 ; CHECK: [[VAL:%[0-9]+]](32), [[OVERFLOW:%[0-9]+]](1) = G_SMULO { s32, s1 } [[LHS]], [[RHS]] -; CHECK: [[RES:%[0-9]+]](64) = G_SEQUENCE s64 [[VAL]], 0, [[OVERFLOW]], 32 +; CHECK: [[RES:%[0-9]+]](64) = G_SEQUENCE { s64, s32, s1 } [[VAL]], 0, [[OVERFLOW]], 32 ; CHECK: G_STORE { s64, p0 } [[RES]], [[ADDR]] declare { i32, i1 } @llvm.smul.with.overflow.i32(i32, i32) define void @test_smul_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %addr) { @@ -674,7 +674,7 @@ define void @test_smul_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %addr) { ; CHECK: [[RHS:%[0-9]+]](32) = COPY %w1 ; CHECK: [[ADDR:%[0-9]+]](64) = COPY %x2 ; CHECK: [[VAL:%[0-9]+]](32), [[OVERFLOW:%[0-9]+]](1) = G_UMULO { s32, s1 } [[LHS]], [[RHS]] -; CHECK: [[RES:%[0-9]+]](64) = G_SEQUENCE s64 [[VAL]], 0, [[OVERFLOW]], 32 +; CHECK: [[RES:%[0-9]+]](64) = G_SEQUENCE { s64, s32, s1 } [[VAL]], 0, [[OVERFLOW]], 32 ; CHECK: G_STORE { s64, p0 } [[RES]], [[ADDR]] declare { i32, i1 } @llvm.umul.with.overflow.i32(i32, i32) define void @test_umul_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %addr) { @@ -685,7 +685,7 @@ define void @test_umul_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %addr) { ; CHECK-LABEL: name: test_extractvalue ; CHECK: [[STRUCT:%[0-9]+]](128) = G_LOAD { s128, p0 } -; CHECK: [[RES:%[0-9]+]](32) = G_EXTRACT s32 [[STRUCT]], 64 +; CHECK: [[RES:%[0-9]+]](32) = G_EXTRACT { s32, s128 } [[STRUCT]], 64 ; CHECK: %w0 = COPY [[RES]] %struct.nested = type {i8, { i8, i32 }, i32} define i32 @test_extractvalue(%struct.nested* %addr) { @@ -696,7 +696,7 @@ define i32 @test_extractvalue(%struct.nested* %addr) { ; CHECK-LABEL: name: test_extractvalue_agg ; CHECK: [[STRUCT:%[0-9]+]](128) = G_LOAD { s128, p0 } -; CHECK: [[RES:%[0-9]+]](64) = G_EXTRACT s64 [[STRUCT]], 32 +; CHECK: [[RES:%[0-9]+]](64) = G_EXTRACT { s64, s128 } [[STRUCT]], 32 ; CHECK: G_STORE { s64, p0 } [[RES]] define void @test_extractvalue_agg(%struct.nested* %addr, {i8, i32}* %addr2) { %struct = load %struct.nested, %struct.nested* %addr diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-add.mir b/test/CodeGen/AArch64/GlobalISel/legalize-add.mir index afc5ffbce67..0ae03cf67ef 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-add.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-add.mir @@ -28,17 +28,17 @@ body: | bb.0.entry: liveins: %x0, %x1, %x2, %x3 ; CHECK-LABEL: name: test_scalar_add_big - ; CHECK-DAG: [[LHS_LO:%.*]](64), [[LHS_HI:%.*]](64) = G_EXTRACT s64 %0, 0, 64 - ; CHECK-DAG: [[RHS_LO:%.*]](64), [[RHS_HI:%.*]](64) = G_EXTRACT s64 %1, 0, 64 + ; CHECK-DAG: [[LHS_LO:%.*]](64), [[LHS_HI:%.*]](64) = G_EXTRACT { s64, s64, s128 } %0, 0, 64 + ; CHECK-DAG: [[RHS_LO:%.*]](64), [[RHS_HI:%.*]](64) = G_EXTRACT { s64, s64, s128 } %1, 0, 64 ; CHECK-DAG: [[CARRY0:%.*]](1) = G_CONSTANT s1 0 ; CHECK: [[RES_LO:%.*]](64), [[CARRY:%.*]](1) = G_UADDE s64 [[LHS_LO]], [[RHS_LO]], [[CARRY0]] ; CHECK: [[RES_HI:%.*]](64), {{%.*}}(1) = G_UADDE s64 [[LHS_HI]], [[RHS_HI]], [[CARRY]] - ; CHECK: %2(128) = G_SEQUENCE s128 [[RES_LO]], 0, [[RES_HI]], 64 + ; CHECK: %2(128) = G_SEQUENCE { s128, s64, s64 } [[RES_LO]], 0, [[RES_HI]], 64 - %0(128) = G_SEQUENCE s128 %x0, 0, %x1, 64 - %1(128) = G_SEQUENCE s128 %x2, 0, %x3, 64 + %0(128) = G_SEQUENCE { s128, s64, s64 } %x0, 0, %x1, 64 + %1(128) = G_SEQUENCE { s128, s64, s64 } %x2, 0, %x3, 64 %2(128) = G_ADD s128 %0, %1 - %x0, %x1 = G_EXTRACT s64 %2, 0, 64 + %x0, %x1 = G_EXTRACT { s64, s64, s128 } %2, 0, 64 ... --- @@ -74,14 +74,14 @@ body: | bb.0.entry: liveins: %q0, %q1, %q2, %q3 ; CHECK-LABEL: name: test_vector_add - ; CHECK-DAG: [[LHS_LO:%.*]](128), [[LHS_HI:%.*]](128) = G_EXTRACT <2 x s64> %0, 0, 128 - ; CHECK-DAG: [[RHS_LO:%.*]](128), [[RHS_HI:%.*]](128) = G_EXTRACT <2 x s64> %1, 0, 128 + ; CHECK-DAG: [[LHS_LO:%.*]](128), [[LHS_HI:%.*]](128) = G_EXTRACT { s128, s128, s256 } %0, 0, 128 + ; CHECK-DAG: [[RHS_LO:%.*]](128), [[RHS_HI:%.*]](128) = G_EXTRACT { s128, s128, s256 } %1, 0, 128 ; CHECK: [[RES_LO:%.*]](128) = G_ADD <2 x s64> [[LHS_LO]], [[RHS_LO]] ; CHECK: [[RES_HI:%.*]](128) = G_ADD <2 x s64> [[LHS_HI]], [[RHS_HI]] - ; CHECK: %2(256) = G_SEQUENCE <4 x s64> [[RES_LO]], 0, [[RES_HI]], 128 + ; CHECK: %2(256) = G_SEQUENCE { s256, s128, s128 } [[RES_LO]], 0, [[RES_HI]], 128 - %0(256) = G_SEQUENCE <4 x s64> %q0, 0, %q1, 128 - %1(256) = G_SEQUENCE <4 x s64> %q2, 0, %q3, 128 + %0(256) = G_SEQUENCE { s256, s128, s128 } %q0, 0, %q1, 128 + %1(256) = G_SEQUENCE { s256, s128, s128 } %q2, 0, %q3, 128 %2(256) = G_ADD <4 x s64> %0, %1 - %q0, %q1 = G_EXTRACT <2 x s64> %2, 0, 128 + %q0, %q1 = G_EXTRACT { s128, s128, s256 } %2, 0, 128 ...