mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2024-12-01 01:31:26 +00:00
[GlobalISel][AArch64] Combine unmerge(G_EXT v, undef) to unmerge(v).
When having <N x t> d1, unused = unmerge(G_EXT <2*N x t> v1, undef, N), it is possible to express it just as unused, d1 = unmerge v1. It is useful for tackling regressions in arm64-vcvt_f.ll, introduced in https://reviews.llvm.org/D144670.
This commit is contained in:
parent
7eeeeb0cc9
commit
13b7629a58
@ -206,6 +206,14 @@ def vector_sext_inreg_to_shift : GICombineRule<
|
||||
(apply [{ applyVectorSextInReg(*${d}, MRI, B, Observer); }])
|
||||
>;
|
||||
|
||||
def unmerge_ext_to_unmerge_matchdata : GIDefMatchData<"Register">;
|
||||
def unmerge_ext_to_unmerge : GICombineRule<
|
||||
(defs root:$d, unmerge_ext_to_unmerge_matchdata:$matchinfo),
|
||||
(match (wip_match_opcode G_UNMERGE_VALUES):$d,
|
||||
[{ return matchUnmergeExtToUnmerge(*${d}, MRI, ${matchinfo}); }]),
|
||||
(apply [{ applyUnmergeExtToUnmerge(*${d}, MRI, B, Observer, ${matchinfo}); }])
|
||||
>;
|
||||
|
||||
// Post-legalization combines which should happen at all optimization levels.
|
||||
// (E.g. ones that facilitate matching for the selector) For example, matching
|
||||
// pseudos.
|
||||
@ -214,7 +222,8 @@ def AArch64PostLegalizerLowering
|
||||
[shuffle_vector_lowering, vashr_vlshr_imm,
|
||||
icmp_lowering, build_vector_lowering,
|
||||
lower_vector_fcmp, form_truncstore,
|
||||
vector_sext_inreg_to_shift]> {
|
||||
vector_sext_inreg_to_shift,
|
||||
unmerge_ext_to_unmerge]> {
|
||||
}
|
||||
|
||||
// Post-legalization combines which are primarily optimizations.
|
||||
|
@ -1066,6 +1066,50 @@ void applyVectorSextInReg(MachineInstr &MI, MachineRegisterInfo &MRI,
|
||||
Helper.lower(MI, 0, /* Unused hint type */ LLT());
|
||||
}
|
||||
|
||||
/// Combine <N x t>, unused = unmerge(G_EXT <2*N x t> v, undef, N)
|
||||
/// => unused, <N x t> = unmerge v
|
||||
bool matchUnmergeExtToUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
|
||||
Register &MatchInfo) {
|
||||
auto &Unmerge = cast<GUnmerge>(MI);
|
||||
if (Unmerge.getNumDefs() != 2)
|
||||
return false;
|
||||
if (!MRI.use_nodbg_empty(Unmerge.getReg(1)))
|
||||
return false;
|
||||
|
||||
LLT DstTy = MRI.getType(Unmerge.getReg(0));
|
||||
if (!DstTy.isVector())
|
||||
return false;
|
||||
|
||||
MachineInstr *Ext = getOpcodeDef(AArch64::G_EXT, Unmerge.getSourceReg(), MRI);
|
||||
if (!Ext)
|
||||
return false;
|
||||
|
||||
Register ExtSrc1 = Ext->getOperand(1).getReg();
|
||||
Register ExtSrc2 = Ext->getOperand(2).getReg();
|
||||
auto LowestVal =
|
||||
getIConstantVRegValWithLookThrough(Ext->getOperand(3).getReg(), MRI);
|
||||
if (!LowestVal || LowestVal->Value.getZExtValue() != DstTy.getSizeInBytes())
|
||||
return false;
|
||||
|
||||
if (!getOpcodeDef<GImplicitDef>(ExtSrc2, MRI))
|
||||
return false;
|
||||
|
||||
MatchInfo = ExtSrc1;
|
||||
return true;
|
||||
}
|
||||
|
||||
void applyUnmergeExtToUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
|
||||
MachineIRBuilder &B,
|
||||
GISelChangeObserver &Observer, Register &SrcReg) {
|
||||
Observer.changingInstr(MI);
|
||||
// Swap dst registers.
|
||||
Register Dst1 = MI.getOperand(0).getReg();
|
||||
MI.getOperand(0).setReg(MI.getOperand(1).getReg());
|
||||
MI.getOperand(1).setReg(Dst1);
|
||||
MI.getOperand(2).setReg(SrcReg);
|
||||
Observer.changedInstr(MI);
|
||||
}
|
||||
|
||||
class AArch64PostLegalizerLoweringImpl : public Combiner {
|
||||
protected:
|
||||
// TODO: Make CombinerHelper methods const.
|
||||
|
@ -0,0 +1,154 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-lowering -global-isel -verify-machineinstrs %s -o - | FileCheck %s
|
||||
|
||||
---
|
||||
name: v4s32
|
||||
legalized: true
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $q0
|
||||
; CHECK-LABEL: name: v4s32
|
||||
; CHECK: liveins: $q0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: %v1:_(<4 x s32>) = COPY $q0
|
||||
; CHECK-NEXT: %unused:_(<2 x s32>), %unmerge:_(<2 x s32>) = G_UNMERGE_VALUES %v1(<4 x s32>)
|
||||
; CHECK-NEXT: %fpext:_(<2 x s64>) = G_FPEXT %unmerge(<2 x s32>)
|
||||
; CHECK-NEXT: $q0 = COPY %fpext(<2 x s64>)
|
||||
; CHECK-NEXT: RET_ReallyLR implicit $q0
|
||||
%v1:_(<4 x s32>) = COPY $q0
|
||||
%implicit:_(<4 x s32>) = G_IMPLICIT_DEF
|
||||
%C:_(s32) = G_CONSTANT i32 8
|
||||
%ext:_(<4 x s32>) = G_EXT %v1:_, %implicit:_, %C:_(s32)
|
||||
%unmerge:_(<2 x s32>), %unused:_(<2 x s32>) = G_UNMERGE_VALUES %ext:_(<4 x s32>)
|
||||
%fpext:_(<2 x s64>) = G_FPEXT %unmerge:_(<2 x s32>)
|
||||
$q0 = COPY %fpext
|
||||
RET_ReallyLR implicit $q0
|
||||
...
|
||||
---
|
||||
name: v8s16
|
||||
legalized: true
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $q0
|
||||
; CHECK-LABEL: name: v8s16
|
||||
; CHECK: liveins: $q0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: %v1:_(<8 x s16>) = COPY $q0
|
||||
; CHECK-NEXT: %unused:_(<4 x s16>), %unmerge:_(<4 x s16>) = G_UNMERGE_VALUES %v1(<8 x s16>)
|
||||
; CHECK-NEXT: %fpext:_(<4 x s32>) = G_FPEXT %unmerge(<4 x s16>)
|
||||
; CHECK-NEXT: $q0 = COPY %fpext(<4 x s32>)
|
||||
; CHECK-NEXT: RET_ReallyLR implicit $q0
|
||||
%v1:_(<8 x s16>) = COPY $q0
|
||||
%implicit:_(<8 x s16>) = G_IMPLICIT_DEF
|
||||
%C:_(s32) = G_CONSTANT i32 8
|
||||
%ext:_(<8 x s16>) = G_EXT %v1:_, %implicit:_, %C:_(s32)
|
||||
%unmerge:_(<4 x s16>), %unused:_(<4 x s16>) = G_UNMERGE_VALUES %ext:_(<8 x s16>)
|
||||
%fpext:_(<4 x s32>) = G_FPEXT %unmerge:_(<4 x s16>)
|
||||
$q0 = COPY %fpext
|
||||
RET_ReallyLR implicit $q0
|
||||
...
|
||||
---
|
||||
name: v16s8
|
||||
legalized: true
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $q0
|
||||
; CHECK-LABEL: name: v16s8
|
||||
; CHECK: liveins: $q0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: %v1:_(<16 x s8>) = COPY $q0
|
||||
; CHECK-NEXT: %unused:_(<8 x s8>), %unmerge:_(<8 x s8>) = G_UNMERGE_VALUES %v1(<16 x s8>)
|
||||
; CHECK-NEXT: %fpext:_(<8 x s16>) = G_FPEXT %unmerge(<8 x s8>)
|
||||
; CHECK-NEXT: $q0 = COPY %fpext(<8 x s16>)
|
||||
; CHECK-NEXT: RET_ReallyLR implicit $q0
|
||||
%v1:_(<16 x s8>) = COPY $q0
|
||||
%implicit:_(<16 x s8>) = G_IMPLICIT_DEF
|
||||
%C:_(s32) = G_CONSTANT i32 8
|
||||
%ext:_(<16 x s8>) = G_EXT %v1:_, %implicit:_, %C:_(s32)
|
||||
%unmerge:_(<8 x s8>), %unused:_(<8 x s8>) = G_UNMERGE_VALUES %ext:_(<16 x s8>)
|
||||
%fpext:_(<8 x s16>) = G_FPEXT %unmerge:_(<8 x s8>)
|
||||
$q0 = COPY %fpext
|
||||
RET_ReallyLR implicit $q0
|
||||
...
|
||||
---
|
||||
name: skip_not_const
|
||||
legalized: true
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $q0, $w0
|
||||
; CHECK-LABEL: name: skip_not_const
|
||||
; CHECK: liveins: $q0, $w0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: %v1:_(<16 x s8>) = COPY $q0
|
||||
; CHECK-NEXT: %implicit:_(<16 x s8>) = G_IMPLICIT_DEF
|
||||
; CHECK-NEXT: %C:_(s32) = COPY $w0
|
||||
; CHECK-NEXT: %ext:_(<16 x s8>) = G_EXT %v1, %implicit, %C(s32)
|
||||
; CHECK-NEXT: %unmerge:_(<8 x s8>), %unused:_(<8 x s8>) = G_UNMERGE_VALUES %ext(<16 x s8>)
|
||||
; CHECK-NEXT: %fpext:_(<8 x s16>) = G_FPEXT %unmerge(<8 x s8>)
|
||||
; CHECK-NEXT: $q0 = COPY %fpext(<8 x s16>)
|
||||
; CHECK-NEXT: RET_ReallyLR implicit $q0
|
||||
%v1:_(<16 x s8>) = COPY $q0
|
||||
%implicit:_(<16 x s8>) = G_IMPLICIT_DEF
|
||||
%C:_(s32) = COPY $w0
|
||||
%ext:_(<16 x s8>) = G_EXT %v1:_, %implicit:_, %C:_(s32)
|
||||
%unmerge:_(<8 x s8>), %unused:_(<8 x s8>) = G_UNMERGE_VALUES %ext:_(<16 x s8>)
|
||||
%fpext:_(<8 x s16>) = G_FPEXT %unmerge:_(<8 x s8>)
|
||||
$q0 = COPY %fpext
|
||||
RET_ReallyLR implicit $q0
|
||||
...
|
||||
---
|
||||
name: skip_not_unused
|
||||
legalized: true
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $q0
|
||||
; CHECK-LABEL: name: skip_not_unused
|
||||
; CHECK: liveins: $q0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: %v1:_(<16 x s8>) = COPY $q0
|
||||
; CHECK-NEXT: %implicit:_(<16 x s8>) = G_IMPLICIT_DEF
|
||||
; CHECK-NEXT: %C:_(s32) = G_CONSTANT i32 8
|
||||
; CHECK-NEXT: %ext:_(<16 x s8>) = G_EXT %v1, %implicit, %C(s32)
|
||||
; CHECK-NEXT: %unmerge:_(<8 x s8>), %unused:_(<8 x s8>) = G_UNMERGE_VALUES %ext(<16 x s8>)
|
||||
; CHECK-NEXT: %fpext:_(<8 x s16>) = G_FPEXT %unmerge(<8 x s8>)
|
||||
; CHECK-NEXT: %fpext2:_(<8 x s16>) = G_FPEXT %unused(<8 x s8>)
|
||||
; CHECK-NEXT: $q0 = COPY %fpext(<8 x s16>)
|
||||
; CHECK-NEXT: $q1 = COPY %fpext2(<8 x s16>)
|
||||
; CHECK-NEXT: RET_ReallyLR implicit $q0, implicit $q1
|
||||
%v1:_(<16 x s8>) = COPY $q0
|
||||
%implicit:_(<16 x s8>) = G_IMPLICIT_DEF
|
||||
%C:_(s32) = G_CONSTANT i32 8
|
||||
%ext:_(<16 x s8>) = G_EXT %v1:_, %implicit:_, %C:_(s32)
|
||||
%unmerge:_(<8 x s8>), %unused:_(<8 x s8>) = G_UNMERGE_VALUES %ext:_(<16 x s8>)
|
||||
%fpext:_(<8 x s16>) = G_FPEXT %unmerge:_(<8 x s8>)
|
||||
%fpext2:_(<8 x s16>) = G_FPEXT %unused:_(<8 x s8>)
|
||||
$q0 = COPY %fpext
|
||||
$q1 = COPY %fpext2
|
||||
RET_ReallyLR implicit $q0, implicit $q1
|
||||
...
|
||||
---
|
||||
name: skip_borders
|
||||
legalized: true
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $q0
|
||||
; CHECK-LABEL: name: skip_borders
|
||||
; CHECK: liveins: $q0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: %v1:_(<4 x s32>) = COPY $q0
|
||||
; CHECK-NEXT: %implicit:_(<4 x s32>) = G_IMPLICIT_DEF
|
||||
; CHECK-NEXT: %C:_(s32) = G_CONSTANT i32 9
|
||||
; CHECK-NEXT: %ext:_(<4 x s32>) = G_EXT %v1, %implicit, %C(s32)
|
||||
; CHECK-NEXT: %unmerge:_(<2 x s32>), %unused:_(<2 x s32>) = G_UNMERGE_VALUES %ext(<4 x s32>)
|
||||
; CHECK-NEXT: %fpext:_(<2 x s64>) = G_FPEXT %unmerge(<2 x s32>)
|
||||
; CHECK-NEXT: $q0 = COPY %fpext(<2 x s64>)
|
||||
; CHECK-NEXT: RET_ReallyLR implicit $q0
|
||||
%v1:_(<4 x s32>) = COPY $q0
|
||||
%implicit:_(<4 x s32>) = G_IMPLICIT_DEF
|
||||
%C:_(s32) = G_CONSTANT i32 9
|
||||
%ext:_(<4 x s32>) = G_EXT %v1:_, %implicit:_, %C:_(s32)
|
||||
%unmerge:_(<2 x s32>), %unused:_(<2 x s32>) = G_UNMERGE_VALUES %ext:_(<4 x s32>)
|
||||
%fpext:_(<2 x s64>) = G_FPEXT %unmerge:_(<2 x s32>)
|
||||
$q0 = COPY %fpext
|
||||
RET_ReallyLR implicit $q0
|
||||
...
|
@ -137,7 +137,7 @@ define i32 @addp_v4i32(<4 x i32> %a, <4 x i32> %b) {
|
||||
; CHECK-GI-LABEL: addp_v4i32:
|
||||
; CHECK-GI: // %bb.0:
|
||||
; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s
|
||||
; CHECK-GI-NEXT: ext v1.16b, v0.16b, v0.16b, #8
|
||||
; CHECK-GI-NEXT: mov d1, v0.d[1]
|
||||
; CHECK-GI-NEXT: addp v0.2s, v0.2s, v1.2s
|
||||
; CHECK-GI-NEXT: rev64 v1.2s, v0.2s
|
||||
; CHECK-GI-NEXT: add v0.2s, v0.2s, v1.2s
|
||||
@ -164,7 +164,7 @@ define <4 x i16> @addp_v8i16(<8 x i16> %a, <8 x i16> %b) {
|
||||
; CHECK-GI-LABEL: addp_v8i16:
|
||||
; CHECK-GI: // %bb.0:
|
||||
; CHECK-GI-NEXT: add v0.8h, v0.8h, v1.8h
|
||||
; CHECK-GI-NEXT: ext v1.16b, v0.16b, v0.16b, #8
|
||||
; CHECK-GI-NEXT: mov d1, v0.d[1]
|
||||
; CHECK-GI-NEXT: addp v0.4h, v0.4h, v1.4h
|
||||
; CHECK-GI-NEXT: ret
|
||||
%1 = add <8 x i16> %a, %b
|
||||
@ -185,7 +185,7 @@ define <8 x i8> @addp_v16i8(<16 x i8> %a, <16 x i8> %b) {
|
||||
; CHECK-GI-LABEL: addp_v16i8:
|
||||
; CHECK-GI: // %bb.0:
|
||||
; CHECK-GI-NEXT: add v0.16b, v0.16b, v1.16b
|
||||
; CHECK-GI-NEXT: ext v1.16b, v0.16b, v0.16b, #8
|
||||
; CHECK-GI-NEXT: mov d1, v0.d[1]
|
||||
; CHECK-GI-NEXT: addp v0.8b, v0.8b, v1.8b
|
||||
; CHECK-GI-NEXT: ret
|
||||
%1 = add <16 x i8> %a, %b
|
||||
|
@ -71,9 +71,7 @@ define <8 x i16> @sabdl2_8h(ptr %A, ptr %B) nounwind {
|
||||
; CHECK-GI: // %bb.0:
|
||||
; CHECK-GI-NEXT: ldr q0, [x0]
|
||||
; CHECK-GI-NEXT: ldr q1, [x1]
|
||||
; CHECK-GI-NEXT: ext.16b v0, v0, v0, #8
|
||||
; CHECK-GI-NEXT: ext.16b v1, v1, v0, #8
|
||||
; CHECK-GI-NEXT: sabdl.8h v0, v0, v1
|
||||
; CHECK-GI-NEXT: sabdl2.8h v0, v0, v1
|
||||
; CHECK-GI-NEXT: ret
|
||||
%load1 = load <16 x i8>, ptr %A
|
||||
%load2 = load <16 x i8>, ptr %B
|
||||
@ -96,9 +94,7 @@ define <4 x i32> @sabdl2_4s(ptr %A, ptr %B) nounwind {
|
||||
; CHECK-GI: // %bb.0:
|
||||
; CHECK-GI-NEXT: ldr q0, [x0]
|
||||
; CHECK-GI-NEXT: ldr q1, [x1]
|
||||
; CHECK-GI-NEXT: ext.16b v0, v0, v0, #8
|
||||
; CHECK-GI-NEXT: ext.16b v1, v1, v0, #8
|
||||
; CHECK-GI-NEXT: sabdl.4s v0, v0, v1
|
||||
; CHECK-GI-NEXT: sabdl2.4s v0, v0, v1
|
||||
; CHECK-GI-NEXT: ret
|
||||
%load1 = load <8 x i16>, ptr %A
|
||||
%load2 = load <8 x i16>, ptr %B
|
||||
@ -121,9 +117,7 @@ define <2 x i64> @sabdl2_2d(ptr %A, ptr %B) nounwind {
|
||||
; CHECK-GI: // %bb.0:
|
||||
; CHECK-GI-NEXT: ldr q0, [x0]
|
||||
; CHECK-GI-NEXT: ldr q1, [x1]
|
||||
; CHECK-GI-NEXT: ext.16b v0, v0, v0, #8
|
||||
; CHECK-GI-NEXT: ext.16b v1, v1, v0, #8
|
||||
; CHECK-GI-NEXT: sabdl.2d v0, v0, v1
|
||||
; CHECK-GI-NEXT: sabdl2.2d v0, v0, v1
|
||||
; CHECK-GI-NEXT: ret
|
||||
%load1 = load <4 x i32>, ptr %A
|
||||
%load2 = load <4 x i32>, ptr %B
|
||||
@ -188,9 +182,7 @@ define <8 x i16> @uabdl2_8h(ptr %A, ptr %B) nounwind {
|
||||
; CHECK-GI: // %bb.0:
|
||||
; CHECK-GI-NEXT: ldr q0, [x0]
|
||||
; CHECK-GI-NEXT: ldr q1, [x1]
|
||||
; CHECK-GI-NEXT: ext.16b v0, v0, v0, #8
|
||||
; CHECK-GI-NEXT: ext.16b v1, v1, v0, #8
|
||||
; CHECK-GI-NEXT: uabdl.8h v0, v0, v1
|
||||
; CHECK-GI-NEXT: uabdl2.8h v0, v0, v1
|
||||
; CHECK-GI-NEXT: ret
|
||||
%load1 = load <16 x i8>, ptr %A
|
||||
%load2 = load <16 x i8>, ptr %B
|
||||
@ -214,9 +206,7 @@ define <4 x i32> @uabdl2_4s(ptr %A, ptr %B) nounwind {
|
||||
; CHECK-GI: // %bb.0:
|
||||
; CHECK-GI-NEXT: ldr q0, [x0]
|
||||
; CHECK-GI-NEXT: ldr q1, [x1]
|
||||
; CHECK-GI-NEXT: ext.16b v0, v0, v0, #8
|
||||
; CHECK-GI-NEXT: ext.16b v1, v1, v0, #8
|
||||
; CHECK-GI-NEXT: uabdl.4s v0, v0, v1
|
||||
; CHECK-GI-NEXT: uabdl2.4s v0, v0, v1
|
||||
; CHECK-GI-NEXT: ret
|
||||
%load1 = load <8 x i16>, ptr %A
|
||||
%load2 = load <8 x i16>, ptr %B
|
||||
@ -239,9 +229,7 @@ define <2 x i64> @uabdl2_2d(ptr %A, ptr %B) nounwind {
|
||||
; CHECK-GI: // %bb.0:
|
||||
; CHECK-GI-NEXT: ldr q0, [x0]
|
||||
; CHECK-GI-NEXT: ldr q1, [x1]
|
||||
; CHECK-GI-NEXT: ext.16b v0, v0, v0, #8
|
||||
; CHECK-GI-NEXT: ext.16b v1, v1, v0, #8
|
||||
; CHECK-GI-NEXT: uabdl.2d v0, v0, v1
|
||||
; CHECK-GI-NEXT: uabdl2.2d v0, v0, v1
|
||||
; CHECK-GI-NEXT: ret
|
||||
%load1 = load <4 x i32>, ptr %A
|
||||
%load2 = load <4 x i32>, ptr %B
|
||||
@ -1132,12 +1120,10 @@ define <8 x i16> @sabal2_8h(ptr %A, ptr %B, ptr %C) nounwind {
|
||||
;
|
||||
; CHECK-GI-LABEL: sabal2_8h:
|
||||
; CHECK-GI: // %bb.0:
|
||||
; CHECK-GI-NEXT: ldr q0, [x0]
|
||||
; CHECK-GI-NEXT: ldr q1, [x1]
|
||||
; CHECK-GI-NEXT: ext.16b v2, v0, v0, #8
|
||||
; CHECK-GI-NEXT: ext.16b v1, v1, v0, #8
|
||||
; CHECK-GI-NEXT: ldr q1, [x0]
|
||||
; CHECK-GI-NEXT: ldr q2, [x1]
|
||||
; CHECK-GI-NEXT: ldr q0, [x2]
|
||||
; CHECK-GI-NEXT: sabal.8h v0, v2, v1
|
||||
; CHECK-GI-NEXT: sabal2.8h v0, v1, v2
|
||||
; CHECK-GI-NEXT: ret
|
||||
%load1 = load <16 x i8>, ptr %A
|
||||
%load2 = load <16 x i8>, ptr %B
|
||||
@ -1161,12 +1147,10 @@ define <4 x i32> @sabal2_4s(ptr %A, ptr %B, ptr %C) nounwind {
|
||||
;
|
||||
; CHECK-GI-LABEL: sabal2_4s:
|
||||
; CHECK-GI: // %bb.0:
|
||||
; CHECK-GI-NEXT: ldr q0, [x0]
|
||||
; CHECK-GI-NEXT: ldr q1, [x1]
|
||||
; CHECK-GI-NEXT: ext.16b v2, v0, v0, #8
|
||||
; CHECK-GI-NEXT: ext.16b v1, v1, v0, #8
|
||||
; CHECK-GI-NEXT: ldr q1, [x0]
|
||||
; CHECK-GI-NEXT: ldr q2, [x1]
|
||||
; CHECK-GI-NEXT: ldr q0, [x2]
|
||||
; CHECK-GI-NEXT: sabal.4s v0, v2, v1
|
||||
; CHECK-GI-NEXT: sabal2.4s v0, v1, v2
|
||||
; CHECK-GI-NEXT: ret
|
||||
%load1 = load <8 x i16>, ptr %A
|
||||
%load2 = load <8 x i16>, ptr %B
|
||||
@ -1190,12 +1174,10 @@ define <2 x i64> @sabal2_2d(ptr %A, ptr %B, ptr %C) nounwind {
|
||||
;
|
||||
; CHECK-GI-LABEL: sabal2_2d:
|
||||
; CHECK-GI: // %bb.0:
|
||||
; CHECK-GI-NEXT: ldr q0, [x0]
|
||||
; CHECK-GI-NEXT: ldr q1, [x1]
|
||||
; CHECK-GI-NEXT: ext.16b v2, v0, v0, #8
|
||||
; CHECK-GI-NEXT: ext.16b v1, v1, v0, #8
|
||||
; CHECK-GI-NEXT: ldr q1, [x0]
|
||||
; CHECK-GI-NEXT: ldr q2, [x1]
|
||||
; CHECK-GI-NEXT: ldr q0, [x2]
|
||||
; CHECK-GI-NEXT: sabal.2d v0, v2, v1
|
||||
; CHECK-GI-NEXT: sabal2.2d v0, v1, v2
|
||||
; CHECK-GI-NEXT: ret
|
||||
%load1 = load <4 x i32>, ptr %A
|
||||
%load2 = load <4 x i32>, ptr %B
|
||||
@ -1270,12 +1252,10 @@ define <8 x i16> @uabal2_8h(ptr %A, ptr %B, ptr %C) nounwind {
|
||||
;
|
||||
; CHECK-GI-LABEL: uabal2_8h:
|
||||
; CHECK-GI: // %bb.0:
|
||||
; CHECK-GI-NEXT: ldr q0, [x0]
|
||||
; CHECK-GI-NEXT: ldr q1, [x1]
|
||||
; CHECK-GI-NEXT: ext.16b v2, v0, v0, #8
|
||||
; CHECK-GI-NEXT: ext.16b v1, v1, v0, #8
|
||||
; CHECK-GI-NEXT: ldr q1, [x0]
|
||||
; CHECK-GI-NEXT: ldr q2, [x1]
|
||||
; CHECK-GI-NEXT: ldr q0, [x2]
|
||||
; CHECK-GI-NEXT: uabal.8h v0, v2, v1
|
||||
; CHECK-GI-NEXT: uabal2.8h v0, v1, v2
|
||||
; CHECK-GI-NEXT: ret
|
||||
%load1 = load <16 x i8>, ptr %A
|
||||
%load2 = load <16 x i8>, ptr %B
|
||||
@ -1299,12 +1279,10 @@ define <4 x i32> @uabal2_4s(ptr %A, ptr %B, ptr %C) nounwind {
|
||||
;
|
||||
; CHECK-GI-LABEL: uabal2_4s:
|
||||
; CHECK-GI: // %bb.0:
|
||||
; CHECK-GI-NEXT: ldr q0, [x0]
|
||||
; CHECK-GI-NEXT: ldr q1, [x1]
|
||||
; CHECK-GI-NEXT: ext.16b v2, v0, v0, #8
|
||||
; CHECK-GI-NEXT: ext.16b v1, v1, v0, #8
|
||||
; CHECK-GI-NEXT: ldr q1, [x0]
|
||||
; CHECK-GI-NEXT: ldr q2, [x1]
|
||||
; CHECK-GI-NEXT: ldr q0, [x2]
|
||||
; CHECK-GI-NEXT: uabal.4s v0, v2, v1
|
||||
; CHECK-GI-NEXT: uabal2.4s v0, v1, v2
|
||||
; CHECK-GI-NEXT: ret
|
||||
%load1 = load <8 x i16>, ptr %A
|
||||
%load2 = load <8 x i16>, ptr %B
|
||||
@ -1328,12 +1306,10 @@ define <2 x i64> @uabal2_2d(ptr %A, ptr %B, ptr %C) nounwind {
|
||||
;
|
||||
; CHECK-GI-LABEL: uabal2_2d:
|
||||
; CHECK-GI: // %bb.0:
|
||||
; CHECK-GI-NEXT: ldr q0, [x0]
|
||||
; CHECK-GI-NEXT: ldr q1, [x1]
|
||||
; CHECK-GI-NEXT: ext.16b v2, v0, v0, #8
|
||||
; CHECK-GI-NEXT: ext.16b v1, v1, v0, #8
|
||||
; CHECK-GI-NEXT: ldr q1, [x0]
|
||||
; CHECK-GI-NEXT: ldr q2, [x1]
|
||||
; CHECK-GI-NEXT: ldr q0, [x2]
|
||||
; CHECK-GI-NEXT: uabal.2d v0, v2, v1
|
||||
; CHECK-GI-NEXT: uabal2.2d v0, v1, v2
|
||||
; CHECK-GI-NEXT: ret
|
||||
%load1 = load <4 x i32>, ptr %A
|
||||
%load2 = load <4 x i32>, ptr %B
|
||||
@ -1607,7 +1583,7 @@ define <2 x i64> @uabdl2_from_extract_dup(<4 x i32> %lhs, i32 %rhs) {
|
||||
; CHECK-GI-LABEL: uabdl2_from_extract_dup:
|
||||
; CHECK-GI: // %bb.0:
|
||||
; CHECK-GI-NEXT: dup.2s v1, w0
|
||||
; CHECK-GI-NEXT: ext.16b v0, v0, v0, #8
|
||||
; CHECK-GI-NEXT: mov d0, v0[1]
|
||||
; CHECK-GI-NEXT: uabdl.2d v0, v0, v1
|
||||
; CHECK-GI-NEXT: ret
|
||||
%rhsvec.tmp = insertelement <2 x i32> undef, i32 %rhs, i32 0
|
||||
@ -1642,7 +1618,7 @@ define <2 x i64> @sabdl2_from_extract_dup(<4 x i32> %lhs, i32 %rhs) {
|
||||
; CHECK-GI-LABEL: sabdl2_from_extract_dup:
|
||||
; CHECK-GI: // %bb.0:
|
||||
; CHECK-GI-NEXT: dup.2s v1, w0
|
||||
; CHECK-GI-NEXT: ext.16b v0, v0, v0, #8
|
||||
; CHECK-GI-NEXT: mov d0, v0[1]
|
||||
; CHECK-GI-NEXT: sabdl.2d v0, v0, v1
|
||||
; CHECK-GI-NEXT: ret
|
||||
%rhsvec.tmp = insertelement <2 x i32> undef, i32 %rhs, i32 0
|
||||
|
@ -31,8 +31,7 @@ define <2 x double> @test_vcvt_high_f64_f32(<4 x float> %x) nounwind readnone ss
|
||||
;
|
||||
; GISEL-LABEL: test_vcvt_high_f64_f32:
|
||||
; GISEL: // %bb.0:
|
||||
; GISEL-NEXT: ext.16b v0, v0, v0, #8
|
||||
; GISEL-NEXT: fcvtl v0.2d, v0.2s
|
||||
; GISEL-NEXT: fcvtl2 v0.2d, v0.4s
|
||||
; GISEL-NEXT: ret
|
||||
%cvt_in = shufflevector <4 x float> %x, <4 x float> undef, <2 x i32> <i32 2, i32 3>
|
||||
%vcvt1.i = fpext <2 x float> %cvt_in to <2 x double>
|
||||
@ -80,8 +79,7 @@ define <2 x double> @test_vcvt_high_v2i32_f32_bitcast(<4 x i32> %x) nounwind rea
|
||||
;
|
||||
; GISEL-LABEL: test_vcvt_high_v2i32_f32_bitcast:
|
||||
; GISEL: // %bb.0:
|
||||
; GISEL-NEXT: ext.16b v0, v0, v0, #8
|
||||
; GISEL-NEXT: fcvtl v0.2d, v0.2s
|
||||
; GISEL-NEXT: fcvtl2 v0.2d, v0.4s
|
||||
; GISEL-NEXT: ret
|
||||
%ext = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
|
||||
%bc2 = bitcast <2 x i32> %ext to <2 x float>
|
||||
@ -97,7 +95,7 @@ define <2 x double> @test_vcvt_high_v4i16_f32_bitcast(<8 x i16> %x) nounwind rea
|
||||
;
|
||||
; GISEL-LABEL: test_vcvt_high_v4i16_f32_bitcast:
|
||||
; GISEL: // %bb.0:
|
||||
; GISEL-NEXT: ext.16b v0, v0, v0, #8
|
||||
; GISEL-NEXT: mov d0, v0[1]
|
||||
; GISEL-NEXT: fcvtl v0.2d, v0.2s
|
||||
; GISEL-NEXT: ret
|
||||
%ext = shufflevector <8 x i16> %x, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
||||
@ -114,7 +112,7 @@ define <2 x double> @test_vcvt_high_v8i8_f32_bitcast(<16 x i8> %x) nounwind read
|
||||
;
|
||||
; GISEL-LABEL: test_vcvt_high_v8i8_f32_bitcast:
|
||||
; GISEL: // %bb.0:
|
||||
; GISEL-NEXT: ext.16b v0, v0, v0, #8
|
||||
; GISEL-NEXT: mov d0, v0[1]
|
||||
; GISEL-NEXT: fcvtl v0.2d, v0.2s
|
||||
; GISEL-NEXT: ret
|
||||
%ext = shufflevector <16 x i8> %x, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
@ -147,7 +145,7 @@ define <4 x float> @test_vcvt_high_v2i32_f16_bitcast(<4 x i32> %x) nounwind read
|
||||
;
|
||||
; GISEL-LABEL: test_vcvt_high_v2i32_f16_bitcast:
|
||||
; GISEL: // %bb.0:
|
||||
; GISEL-NEXT: ext.16b v0, v0, v0, #8
|
||||
; GISEL-NEXT: mov d0, v0[1]
|
||||
; GISEL-NEXT: fcvtl v0.4s, v0.4h
|
||||
; GISEL-NEXT: ret
|
||||
%ext = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
|
||||
@ -164,8 +162,7 @@ define <4 x float> @test_vcvt_high_v4i16_f16_bitcast(<8 x i16> %x) nounwind read
|
||||
;
|
||||
; GISEL-LABEL: test_vcvt_high_v4i16_f16_bitcast:
|
||||
; GISEL: // %bb.0:
|
||||
; GISEL-NEXT: ext.16b v0, v0, v0, #8
|
||||
; GISEL-NEXT: fcvtl v0.4s, v0.4h
|
||||
; GISEL-NEXT: fcvtl2 v0.4s, v0.8h
|
||||
; GISEL-NEXT: ret
|
||||
%ext = shufflevector <8 x i16> %x, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
||||
%bc2 = bitcast <4 x i16> %ext to <4 x half>
|
||||
@ -181,7 +178,7 @@ define <4 x float> @test_vcvt_high_v8i8_f16_bitcast(<16 x i8> %x) nounwind readn
|
||||
;
|
||||
; GISEL-LABEL: test_vcvt_high_v8i8_f16_bitcast:
|
||||
; GISEL: // %bb.0:
|
||||
; GISEL-NEXT: ext.16b v0, v0, v0, #8
|
||||
; GISEL-NEXT: mov d0, v0[1]
|
||||
; GISEL-NEXT: fcvtl v0.4s, v0.4h
|
||||
; GISEL-NEXT: ret
|
||||
%ext = shufflevector <16 x i8> %x, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
|
Loading…
Reference in New Issue
Block a user