[PowerPC][NFC] Add tests for vector fp <-> int conversions

This NFC patch just adds test cases for conversions that currently
require scalarization of vectors. An updcoming patch will change
the legalization for these and it is more suitable on the review
to show the diferences in code gen rather than just the new code gen.

llvm-svn: 347090
This commit is contained in:
Nemanja Ivanovic 2018-11-16 20:24:10 +00:00
parent a752a87637
commit e274dcbc98
16 changed files with 14768 additions and 0 deletions

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,846 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
; RUN: FileCheck %s --check-prefix=CHECK-P8
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
; RUN: FileCheck %s --check-prefix=CHECK-P9
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
; RUN: FileCheck %s --check-prefix=CHECK-BE
define <2 x i64> @test2elt(i64 %a.coerce) local_unnamed_addr #0 {
; CHECK-P8-LABEL: test2elt:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: mtvsrd f0, r3
; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: xscvspdpn f0, vs0
; CHECK-P8-NEXT: xxsldwi vs1, v2, v2, 3
; CHECK-P8-NEXT: xscvspdpn f1, vs1
; CHECK-P8-NEXT: xxmrghd vs0, vs0, vs1
; CHECK-P8-NEXT: xvcvdpuxds v2, vs0
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test2elt:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: mtvsrd f0, r3
; CHECK-P9-NEXT: xxswapd v2, vs0
; CHECK-P9-NEXT: xscvspdpn f0, vs0
; CHECK-P9-NEXT: xxsldwi vs1, v2, v2, 3
; CHECK-P9-NEXT: xscvspdpn f1, vs1
; CHECK-P9-NEXT: xxmrghd vs0, vs0, vs1
; CHECK-P9-NEXT: xvcvdpuxds v2, vs0
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test2elt:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: mtvsrd f0, r3
; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 1
; CHECK-BE-NEXT: xscvspdpn f0, vs0
; CHECK-BE-NEXT: xscvspdpn f1, vs1
; CHECK-BE-NEXT: xxmrghd vs0, vs0, vs1
; CHECK-BE-NEXT: xvcvdpuxds v2, vs0
; CHECK-BE-NEXT: blr
entry:
%0 = bitcast i64 %a.coerce to <2 x float>
%1 = fptoui <2 x float> %0 to <2 x i64>
ret <2 x i64> %1
}
define void @test4elt(<4 x i64>* noalias nocapture sret %agg.result, <4 x float> %a) local_unnamed_addr #1 {
; CHECK-P8-LABEL: test4elt:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: xxsldwi vs0, v2, v2, 3
; CHECK-P8-NEXT: xxswapd vs1, v2
; CHECK-P8-NEXT: li r4, 16
; CHECK-P8-NEXT: xxsldwi vs3, v2, v2, 1
; CHECK-P8-NEXT: xscvspdpn f2, v2
; CHECK-P8-NEXT: xscvspdpn f0, vs0
; CHECK-P8-NEXT: xscvspdpn f1, vs1
; CHECK-P8-NEXT: xscvspdpn f3, vs3
; CHECK-P8-NEXT: xxmrghd vs0, vs1, vs0
; CHECK-P8-NEXT: xxmrghd vs1, vs2, vs3
; CHECK-P8-NEXT: xvcvdpuxds v2, vs0
; CHECK-P8-NEXT: xvcvdpuxds v3, vs1
; CHECK-P8-NEXT: xxswapd vs1, v2
; CHECK-P8-NEXT: xxswapd vs0, v3
; CHECK-P8-NEXT: stxvd2x vs0, r3, r4
; CHECK-P8-NEXT: stxvd2x vs1, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test4elt:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: xxsldwi vs0, v2, v2, 3
; CHECK-P9-NEXT: xxswapd vs1, v2
; CHECK-P9-NEXT: xxsldwi vs2, v2, v2, 1
; CHECK-P9-NEXT: xscvspdpn f3, v2
; CHECK-P9-NEXT: xscvspdpn f0, vs0
; CHECK-P9-NEXT: xscvspdpn f1, vs1
; CHECK-P9-NEXT: xscvspdpn f2, vs2
; CHECK-P9-NEXT: xxmrghd vs0, vs1, vs0
; CHECK-P9-NEXT: xxmrghd vs1, vs3, vs2
; CHECK-P9-NEXT: xvcvdpuxds vs0, vs0
; CHECK-P9-NEXT: xvcvdpuxds vs1, vs1
; CHECK-P9-NEXT: stxv vs1, 16(r3)
; CHECK-P9-NEXT: stxv vs0, 0(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test4elt:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 1
; CHECK-BE-NEXT: xxsldwi vs1, v2, v2, 3
; CHECK-BE-NEXT: xxswapd vs2, v2
; CHECK-BE-NEXT: xscvspdpn f3, v2
; CHECK-BE-NEXT: xscvspdpn f0, vs0
; CHECK-BE-NEXT: xscvspdpn f1, vs1
; CHECK-BE-NEXT: xscvspdpn f2, vs2
; CHECK-BE-NEXT: xxmrghd vs0, vs3, vs0
; CHECK-BE-NEXT: xxmrghd vs1, vs2, vs1
; CHECK-BE-NEXT: xvcvdpuxds vs0, vs0
; CHECK-BE-NEXT: xvcvdpuxds vs1, vs1
; CHECK-BE-NEXT: stxv vs1, 16(r3)
; CHECK-BE-NEXT: stxv vs0, 0(r3)
; CHECK-BE-NEXT: blr
entry:
%0 = fptoui <4 x float> %a to <4 x i64>
store <4 x i64> %0, <4 x i64>* %agg.result, align 32
ret void
}
define void @test8elt(<8 x i64>* noalias nocapture sret %agg.result, <8 x float>* nocapture readonly) local_unnamed_addr #2 {
; CHECK-P8-LABEL: test8elt:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: li r5, 16
; CHECK-P8-NEXT: lvx v3, 0, r4
; CHECK-P8-NEXT: li r6, 32
; CHECK-P8-NEXT: lvx v2, r4, r5
; CHECK-P8-NEXT: li r4, 48
; CHECK-P8-NEXT: xxsldwi vs5, v3, v3, 3
; CHECK-P8-NEXT: xxswapd vs6, v3
; CHECK-P8-NEXT: xxsldwi vs0, v2, v2, 3
; CHECK-P8-NEXT: xxswapd vs1, v2
; CHECK-P8-NEXT: xxsldwi vs3, v2, v2, 1
; CHECK-P8-NEXT: xxsldwi vs7, v3, v3, 1
; CHECK-P8-NEXT: xscvspdpn f2, v2
; CHECK-P8-NEXT: xscvspdpn f4, v3
; CHECK-P8-NEXT: xscvspdpn f0, vs0
; CHECK-P8-NEXT: xscvspdpn f1, vs1
; CHECK-P8-NEXT: xscvspdpn f3, vs3
; CHECK-P8-NEXT: xscvspdpn f5, vs5
; CHECK-P8-NEXT: xscvspdpn f6, vs6
; CHECK-P8-NEXT: xscvspdpn f7, vs7
; CHECK-P8-NEXT: xxmrghd vs0, vs1, vs0
; CHECK-P8-NEXT: xxmrghd vs1, vs2, vs3
; CHECK-P8-NEXT: xxmrghd vs2, vs6, vs5
; CHECK-P8-NEXT: xvcvdpuxds v2, vs0
; CHECK-P8-NEXT: xxmrghd vs3, vs4, vs7
; CHECK-P8-NEXT: xvcvdpuxds v3, vs1
; CHECK-P8-NEXT: xvcvdpuxds v4, vs2
; CHECK-P8-NEXT: xvcvdpuxds v5, vs3
; CHECK-P8-NEXT: xxswapd vs1, v2
; CHECK-P8-NEXT: xxswapd vs0, v3
; CHECK-P8-NEXT: xxswapd vs3, v4
; CHECK-P8-NEXT: xxswapd vs2, v5
; CHECK-P8-NEXT: stxvd2x vs0, r3, r4
; CHECK-P8-NEXT: stxvd2x vs1, r3, r6
; CHECK-P8-NEXT: stxvd2x vs2, r3, r5
; CHECK-P8-NEXT: stxvd2x vs3, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test8elt:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: lxv vs0, 16(r4)
; CHECK-P9-NEXT: lxv vs1, 0(r4)
; CHECK-P9-NEXT: xxsldwi vs2, vs1, vs1, 3
; CHECK-P9-NEXT: xxswapd vs3, vs1
; CHECK-P9-NEXT: xxsldwi vs4, vs1, vs1, 1
; CHECK-P9-NEXT: xxsldwi vs5, vs0, vs0, 3
; CHECK-P9-NEXT: xxswapd vs6, vs0
; CHECK-P9-NEXT: xxsldwi vs7, vs0, vs0, 1
; CHECK-P9-NEXT: xscvspdpn f1, vs1
; CHECK-P9-NEXT: xscvspdpn f0, vs0
; CHECK-P9-NEXT: xscvspdpn f2, vs2
; CHECK-P9-NEXT: xscvspdpn f3, vs3
; CHECK-P9-NEXT: xscvspdpn f4, vs4
; CHECK-P9-NEXT: xscvspdpn f5, vs5
; CHECK-P9-NEXT: xscvspdpn f6, vs6
; CHECK-P9-NEXT: xscvspdpn f7, vs7
; CHECK-P9-NEXT: xxmrghd vs2, vs3, vs2
; CHECK-P9-NEXT: xxmrghd vs1, vs1, vs4
; CHECK-P9-NEXT: xxmrghd vs3, vs6, vs5
; CHECK-P9-NEXT: xxmrghd vs0, vs0, vs7
; CHECK-P9-NEXT: xvcvdpuxds vs2, vs2
; CHECK-P9-NEXT: xvcvdpuxds vs1, vs1
; CHECK-P9-NEXT: xvcvdpuxds vs3, vs3
; CHECK-P9-NEXT: xvcvdpuxds vs0, vs0
; CHECK-P9-NEXT: stxv vs0, 48(r3)
; CHECK-P9-NEXT: stxv vs3, 32(r3)
; CHECK-P9-NEXT: stxv vs1, 16(r3)
; CHECK-P9-NEXT: stxv vs2, 0(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test8elt:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: lxv vs0, 16(r4)
; CHECK-BE-NEXT: lxv vs1, 0(r4)
; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 1
; CHECK-BE-NEXT: xxsldwi vs3, vs1, vs1, 3
; CHECK-BE-NEXT: xxswapd vs4, vs1
; CHECK-BE-NEXT: xxsldwi vs5, vs0, vs0, 1
; CHECK-BE-NEXT: xxsldwi vs6, vs0, vs0, 3
; CHECK-BE-NEXT: xxswapd vs7, vs0
; CHECK-BE-NEXT: xscvspdpn f1, vs1
; CHECK-BE-NEXT: xscvspdpn f0, vs0
; CHECK-BE-NEXT: xscvspdpn f2, vs2
; CHECK-BE-NEXT: xscvspdpn f3, vs3
; CHECK-BE-NEXT: xscvspdpn f4, vs4
; CHECK-BE-NEXT: xscvspdpn f5, vs5
; CHECK-BE-NEXT: xscvspdpn f6, vs6
; CHECK-BE-NEXT: xscvspdpn f7, vs7
; CHECK-BE-NEXT: xxmrghd vs1, vs1, vs2
; CHECK-BE-NEXT: xxmrghd vs2, vs4, vs3
; CHECK-BE-NEXT: xxmrghd vs0, vs0, vs5
; CHECK-BE-NEXT: xxmrghd vs3, vs7, vs6
; CHECK-BE-NEXT: xvcvdpuxds vs1, vs1
; CHECK-BE-NEXT: xvcvdpuxds vs2, vs2
; CHECK-BE-NEXT: xvcvdpuxds vs0, vs0
; CHECK-BE-NEXT: xvcvdpuxds vs3, vs3
; CHECK-BE-NEXT: stxv vs3, 48(r3)
; CHECK-BE-NEXT: stxv vs0, 32(r3)
; CHECK-BE-NEXT: stxv vs2, 16(r3)
; CHECK-BE-NEXT: stxv vs1, 0(r3)
; CHECK-BE-NEXT: blr
entry:
%a = load <8 x float>, <8 x float>* %0, align 32
%1 = fptoui <8 x float> %a to <8 x i64>
store <8 x i64> %1, <8 x i64>* %agg.result, align 64
ret void
}
define void @test16elt(<16 x i64>* noalias nocapture sret %agg.result, <16 x float>* nocapture readonly) local_unnamed_addr #2 {
; CHECK-P8-LABEL: test16elt:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: li r5, 16
; CHECK-P8-NEXT: li r7, 48
; CHECK-P8-NEXT: li r6, 32
; CHECK-P8-NEXT: lvx v4, 0, r4
; CHECK-P8-NEXT: li r8, 64
; CHECK-P8-NEXT: lvx v5, r4, r5
; CHECK-P8-NEXT: lvx v3, r4, r7
; CHECK-P8-NEXT: lvx v2, r4, r6
; CHECK-P8-NEXT: li r4, 112
; CHECK-P8-NEXT: xxsldwi vs13, v4, v4, 3
; CHECK-P8-NEXT: xscvspdpn f6, v4
; CHECK-P8-NEXT: xxsldwi vs1, v5, v5, 3
; CHECK-P8-NEXT: xxswapd vs3, v5
; CHECK-P8-NEXT: xxsldwi vs9, v3, v3, 1
; CHECK-P8-NEXT: xscvspdpn f4, v3
; CHECK-P8-NEXT: xxsldwi vs5, v5, v5, 1
; CHECK-P8-NEXT: xxsldwi vs10, v3, v3, 3
; CHECK-P8-NEXT: xscvspdpn f1, vs1
; CHECK-P8-NEXT: xxswapd vs11, v3
; CHECK-P8-NEXT: xscvspdpn f3, vs3
; CHECK-P8-NEXT: xxsldwi vs7, v2, v2, 3
; CHECK-P8-NEXT: xscvspdpn f9, vs9
; CHECK-P8-NEXT: xxswapd vs8, v2
; CHECK-P8-NEXT: xscvspdpn f0, v5
; CHECK-P8-NEXT: xxsldwi vs12, v2, v2, 1
; CHECK-P8-NEXT: xscvspdpn f2, v2
; CHECK-P8-NEXT: xxswapd v2, v4
; CHECK-P8-NEXT: xscvspdpn f5, vs5
; CHECK-P8-NEXT: xxsldwi v3, v4, v4, 1
; CHECK-P8-NEXT: xscvspdpn f10, vs10
; CHECK-P8-NEXT: xscvspdpn f11, vs11
; CHECK-P8-NEXT: xxmrghd vs1, vs3, vs1
; CHECK-P8-NEXT: xscvspdpn f7, vs7
; CHECK-P8-NEXT: xxmrghd vs4, vs4, vs9
; CHECK-P8-NEXT: xscvspdpn f8, vs8
; CHECK-P8-NEXT: xscvspdpn f12, vs12
; CHECK-P8-NEXT: xscvspdpn f13, vs13
; CHECK-P8-NEXT: xxmrghd vs0, vs0, vs5
; CHECK-P8-NEXT: xscvspdpn f3, v2
; CHECK-P8-NEXT: xscvspdpn f9, v3
; CHECK-P8-NEXT: xxmrghd vs5, vs11, vs10
; CHECK-P8-NEXT: xvcvdpuxds v3, vs4
; CHECK-P8-NEXT: xvcvdpuxds v2, vs1
; CHECK-P8-NEXT: xxmrghd vs1, vs2, vs12
; CHECK-P8-NEXT: xxmrghd vs2, vs8, vs7
; CHECK-P8-NEXT: xvcvdpuxds v4, vs0
; CHECK-P8-NEXT: xxmrghd vs0, vs3, vs13
; CHECK-P8-NEXT: xvcvdpuxds v5, vs5
; CHECK-P8-NEXT: xxmrghd vs3, vs6, vs9
; CHECK-P8-NEXT: xvcvdpuxds v0, vs1
; CHECK-P8-NEXT: xvcvdpuxds v1, vs2
; CHECK-P8-NEXT: xvcvdpuxds v6, vs0
; CHECK-P8-NEXT: xxswapd vs0, v3
; CHECK-P8-NEXT: xvcvdpuxds v7, vs3
; CHECK-P8-NEXT: xxswapd vs4, v2
; CHECK-P8-NEXT: xxswapd vs3, v4
; CHECK-P8-NEXT: xxswapd vs1, v5
; CHECK-P8-NEXT: stxvd2x vs0, r3, r4
; CHECK-P8-NEXT: li r4, 96
; CHECK-P8-NEXT: xxswapd vs2, v0
; CHECK-P8-NEXT: xxswapd vs0, v1
; CHECK-P8-NEXT: stxvd2x vs1, r3, r4
; CHECK-P8-NEXT: xxswapd vs5, v6
; CHECK-P8-NEXT: li r4, 80
; CHECK-P8-NEXT: xxswapd vs1, v7
; CHECK-P8-NEXT: stxvd2x vs2, r3, r4
; CHECK-P8-NEXT: stxvd2x vs0, r3, r8
; CHECK-P8-NEXT: stxvd2x vs3, r3, r7
; CHECK-P8-NEXT: stxvd2x vs4, r3, r6
; CHECK-P8-NEXT: stxvd2x vs1, r3, r5
; CHECK-P8-NEXT: stxvd2x vs5, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test16elt:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: lxv vs0, 16(r4)
; CHECK-P9-NEXT: lxv vs1, 0(r4)
; CHECK-P9-NEXT: lxv vs2, 48(r4)
; CHECK-P9-NEXT: lxv vs3, 32(r4)
; CHECK-P9-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill
; CHECK-P9-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill
; CHECK-P9-NEXT: xxsldwi vs4, vs1, vs1, 3
; CHECK-P9-NEXT: xxswapd vs5, vs1
; CHECK-P9-NEXT: xxsldwi vs6, vs1, vs1, 1
; CHECK-P9-NEXT: xxsldwi vs7, vs0, vs0, 3
; CHECK-P9-NEXT: xxswapd vs8, vs0
; CHECK-P9-NEXT: xxsldwi vs9, vs0, vs0, 1
; CHECK-P9-NEXT: xxsldwi vs10, vs3, vs3, 3
; CHECK-P9-NEXT: xxswapd vs11, vs3
; CHECK-P9-NEXT: xxsldwi vs12, vs3, vs3, 1
; CHECK-P9-NEXT: xxsldwi vs13, vs2, vs2, 3
; CHECK-P9-NEXT: xxswapd v2, vs2
; CHECK-P9-NEXT: xxsldwi v3, vs2, vs2, 1
; CHECK-P9-NEXT: xscvspdpn f1, vs1
; CHECK-P9-NEXT: xscvspdpn f0, vs0
; CHECK-P9-NEXT: xscvspdpn f3, vs3
; CHECK-P9-NEXT: xscvspdpn f2, vs2
; CHECK-P9-NEXT: xscvspdpn f4, vs4
; CHECK-P9-NEXT: xscvspdpn f5, vs5
; CHECK-P9-NEXT: xscvspdpn f6, vs6
; CHECK-P9-NEXT: xscvspdpn f7, vs7
; CHECK-P9-NEXT: xscvspdpn f8, vs8
; CHECK-P9-NEXT: xscvspdpn f9, vs9
; CHECK-P9-NEXT: xscvspdpn f10, vs10
; CHECK-P9-NEXT: xscvspdpn f11, vs11
; CHECK-P9-NEXT: xscvspdpn f12, vs12
; CHECK-P9-NEXT: xscvspdpn f13, vs13
; CHECK-P9-NEXT: xscvspdpn f31, v2
; CHECK-P9-NEXT: xscvspdpn f30, v3
; CHECK-P9-NEXT: xxmrghd vs4, vs5, vs4
; CHECK-P9-NEXT: xxmrghd vs1, vs1, vs6
; CHECK-P9-NEXT: xxmrghd vs5, vs8, vs7
; CHECK-P9-NEXT: xxmrghd vs0, vs0, vs9
; CHECK-P9-NEXT: xxmrghd vs6, vs11, vs10
; CHECK-P9-NEXT: xxmrghd vs3, vs3, vs12
; CHECK-P9-NEXT: xxmrghd vs7, vs31, vs13
; CHECK-P9-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload
; CHECK-P9-NEXT: xxmrghd vs2, vs2, vs30
; CHECK-P9-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload
; CHECK-P9-NEXT: xvcvdpuxds vs4, vs4
; CHECK-P9-NEXT: xvcvdpuxds vs1, vs1
; CHECK-P9-NEXT: xvcvdpuxds vs5, vs5
; CHECK-P9-NEXT: xvcvdpuxds vs0, vs0
; CHECK-P9-NEXT: xvcvdpuxds vs6, vs6
; CHECK-P9-NEXT: xvcvdpuxds vs3, vs3
; CHECK-P9-NEXT: xvcvdpuxds vs7, vs7
; CHECK-P9-NEXT: xvcvdpuxds vs2, vs2
; CHECK-P9-NEXT: stxv vs0, 48(r3)
; CHECK-P9-NEXT: stxv vs5, 32(r3)
; CHECK-P9-NEXT: stxv vs1, 16(r3)
; CHECK-P9-NEXT: stxv vs4, 0(r3)
; CHECK-P9-NEXT: stxv vs2, 112(r3)
; CHECK-P9-NEXT: stxv vs7, 96(r3)
; CHECK-P9-NEXT: stxv vs3, 80(r3)
; CHECK-P9-NEXT: stxv vs6, 64(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test16elt:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: lxv vs0, 16(r4)
; CHECK-BE-NEXT: lxv vs1, 0(r4)
; CHECK-BE-NEXT: lxv vs2, 48(r4)
; CHECK-BE-NEXT: lxv vs3, 32(r4)
; CHECK-BE-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill
; CHECK-BE-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill
; CHECK-BE-NEXT: xxsldwi vs4, vs1, vs1, 1
; CHECK-BE-NEXT: xxsldwi vs5, vs1, vs1, 3
; CHECK-BE-NEXT: xxswapd vs6, vs1
; CHECK-BE-NEXT: xxsldwi vs7, vs0, vs0, 1
; CHECK-BE-NEXT: xxsldwi vs8, vs0, vs0, 3
; CHECK-BE-NEXT: xxswapd vs9, vs0
; CHECK-BE-NEXT: xxsldwi vs10, vs3, vs3, 1
; CHECK-BE-NEXT: xxsldwi vs11, vs3, vs3, 3
; CHECK-BE-NEXT: xxswapd vs12, vs3
; CHECK-BE-NEXT: xxsldwi vs13, vs2, vs2, 1
; CHECK-BE-NEXT: xxsldwi v2, vs2, vs2, 3
; CHECK-BE-NEXT: xxswapd v3, vs2
; CHECK-BE-NEXT: xscvspdpn f1, vs1
; CHECK-BE-NEXT: xscvspdpn f0, vs0
; CHECK-BE-NEXT: xscvspdpn f3, vs3
; CHECK-BE-NEXT: xscvspdpn f2, vs2
; CHECK-BE-NEXT: xscvspdpn f4, vs4
; CHECK-BE-NEXT: xscvspdpn f5, vs5
; CHECK-BE-NEXT: xscvspdpn f6, vs6
; CHECK-BE-NEXT: xscvspdpn f7, vs7
; CHECK-BE-NEXT: xscvspdpn f8, vs8
; CHECK-BE-NEXT: xscvspdpn f9, vs9
; CHECK-BE-NEXT: xscvspdpn f10, vs10
; CHECK-BE-NEXT: xscvspdpn f11, vs11
; CHECK-BE-NEXT: xscvspdpn f12, vs12
; CHECK-BE-NEXT: xscvspdpn f13, vs13
; CHECK-BE-NEXT: xscvspdpn f31, v2
; CHECK-BE-NEXT: xscvspdpn f30, v3
; CHECK-BE-NEXT: xxmrghd vs1, vs1, vs4
; CHECK-BE-NEXT: xxmrghd vs4, vs6, vs5
; CHECK-BE-NEXT: xxmrghd vs0, vs0, vs7
; CHECK-BE-NEXT: xxmrghd vs5, vs9, vs8
; CHECK-BE-NEXT: xxmrghd vs3, vs3, vs10
; CHECK-BE-NEXT: xxmrghd vs6, vs12, vs11
; CHECK-BE-NEXT: xxmrghd vs2, vs2, vs13
; CHECK-BE-NEXT: xxmrghd vs7, vs30, vs31
; CHECK-BE-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload
; CHECK-BE-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload
; CHECK-BE-NEXT: xvcvdpuxds vs1, vs1
; CHECK-BE-NEXT: xvcvdpuxds vs4, vs4
; CHECK-BE-NEXT: xvcvdpuxds vs0, vs0
; CHECK-BE-NEXT: xvcvdpuxds vs5, vs5
; CHECK-BE-NEXT: xvcvdpuxds vs3, vs3
; CHECK-BE-NEXT: xvcvdpuxds vs6, vs6
; CHECK-BE-NEXT: xvcvdpuxds vs2, vs2
; CHECK-BE-NEXT: xvcvdpuxds vs7, vs7
; CHECK-BE-NEXT: stxv vs5, 48(r3)
; CHECK-BE-NEXT: stxv vs0, 32(r3)
; CHECK-BE-NEXT: stxv vs4, 16(r3)
; CHECK-BE-NEXT: stxv vs1, 0(r3)
; CHECK-BE-NEXT: stxv vs7, 112(r3)
; CHECK-BE-NEXT: stxv vs2, 96(r3)
; CHECK-BE-NEXT: stxv vs6, 80(r3)
; CHECK-BE-NEXT: stxv vs3, 64(r3)
; CHECK-BE-NEXT: blr
entry:
%a = load <16 x float>, <16 x float>* %0, align 64
%1 = fptoui <16 x float> %a to <16 x i64>
store <16 x i64> %1, <16 x i64>* %agg.result, align 128
ret void
}
define <2 x i64> @test2elt_signed(i64 %a.coerce) local_unnamed_addr #0 {
; CHECK-P8-LABEL: test2elt_signed:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: mtvsrd f0, r3
; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: xscvspdpn f0, vs0
; CHECK-P8-NEXT: xxsldwi vs1, v2, v2, 3
; CHECK-P8-NEXT: xscvspdpn f1, vs1
; CHECK-P8-NEXT: xxmrghd vs0, vs0, vs1
; CHECK-P8-NEXT: xvcvdpuxds v2, vs0
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test2elt_signed:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: mtvsrd f0, r3
; CHECK-P9-NEXT: xxswapd v2, vs0
; CHECK-P9-NEXT: xscvspdpn f0, vs0
; CHECK-P9-NEXT: xxsldwi vs1, v2, v2, 3
; CHECK-P9-NEXT: xscvspdpn f1, vs1
; CHECK-P9-NEXT: xxmrghd vs0, vs0, vs1
; CHECK-P9-NEXT: xvcvdpuxds v2, vs0
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test2elt_signed:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: mtvsrd f0, r3
; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 1
; CHECK-BE-NEXT: xscvspdpn f0, vs0
; CHECK-BE-NEXT: xscvspdpn f1, vs1
; CHECK-BE-NEXT: xxmrghd vs0, vs0, vs1
; CHECK-BE-NEXT: xvcvdpuxds v2, vs0
; CHECK-BE-NEXT: blr
entry:
%0 = bitcast i64 %a.coerce to <2 x float>
%1 = fptoui <2 x float> %0 to <2 x i64>
ret <2 x i64> %1
}
define void @test4elt_signed(<4 x i64>* noalias nocapture sret %agg.result, <4 x float> %a) local_unnamed_addr #1 {
; CHECK-P8-LABEL: test4elt_signed:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: xxsldwi vs0, v2, v2, 3
; CHECK-P8-NEXT: xxswapd vs1, v2
; CHECK-P8-NEXT: li r4, 16
; CHECK-P8-NEXT: xxsldwi vs3, v2, v2, 1
; CHECK-P8-NEXT: xscvspdpn f2, v2
; CHECK-P8-NEXT: xscvspdpn f0, vs0
; CHECK-P8-NEXT: xscvspdpn f1, vs1
; CHECK-P8-NEXT: xscvspdpn f3, vs3
; CHECK-P8-NEXT: xxmrghd vs0, vs1, vs0
; CHECK-P8-NEXT: xxmrghd vs1, vs2, vs3
; CHECK-P8-NEXT: xvcvdpuxds v2, vs0
; CHECK-P8-NEXT: xvcvdpuxds v3, vs1
; CHECK-P8-NEXT: xxswapd vs1, v2
; CHECK-P8-NEXT: xxswapd vs0, v3
; CHECK-P8-NEXT: stxvd2x vs0, r3, r4
; CHECK-P8-NEXT: stxvd2x vs1, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test4elt_signed:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: xxsldwi vs0, v2, v2, 3
; CHECK-P9-NEXT: xxswapd vs1, v2
; CHECK-P9-NEXT: xxsldwi vs2, v2, v2, 1
; CHECK-P9-NEXT: xscvspdpn f3, v2
; CHECK-P9-NEXT: xscvspdpn f0, vs0
; CHECK-P9-NEXT: xscvspdpn f1, vs1
; CHECK-P9-NEXT: xscvspdpn f2, vs2
; CHECK-P9-NEXT: xxmrghd vs0, vs1, vs0
; CHECK-P9-NEXT: xxmrghd vs1, vs3, vs2
; CHECK-P9-NEXT: xvcvdpuxds vs0, vs0
; CHECK-P9-NEXT: xvcvdpuxds vs1, vs1
; CHECK-P9-NEXT: stxv vs1, 16(r3)
; CHECK-P9-NEXT: stxv vs0, 0(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test4elt_signed:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 1
; CHECK-BE-NEXT: xxsldwi vs1, v2, v2, 3
; CHECK-BE-NEXT: xxswapd vs2, v2
; CHECK-BE-NEXT: xscvspdpn f3, v2
; CHECK-BE-NEXT: xscvspdpn f0, vs0
; CHECK-BE-NEXT: xscvspdpn f1, vs1
; CHECK-BE-NEXT: xscvspdpn f2, vs2
; CHECK-BE-NEXT: xxmrghd vs0, vs3, vs0
; CHECK-BE-NEXT: xxmrghd vs1, vs2, vs1
; CHECK-BE-NEXT: xvcvdpuxds vs0, vs0
; CHECK-BE-NEXT: xvcvdpuxds vs1, vs1
; CHECK-BE-NEXT: stxv vs1, 16(r3)
; CHECK-BE-NEXT: stxv vs0, 0(r3)
; CHECK-BE-NEXT: blr
entry:
%0 = fptoui <4 x float> %a to <4 x i64>
store <4 x i64> %0, <4 x i64>* %agg.result, align 32
ret void
}
define void @test8elt_signed(<8 x i64>* noalias nocapture sret %agg.result, <8 x float>* nocapture readonly) local_unnamed_addr #2 {
; CHECK-P8-LABEL: test8elt_signed:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: li r5, 16
; CHECK-P8-NEXT: lvx v3, 0, r4
; CHECK-P8-NEXT: li r6, 32
; CHECK-P8-NEXT: lvx v2, r4, r5
; CHECK-P8-NEXT: li r4, 48
; CHECK-P8-NEXT: xxsldwi vs5, v3, v3, 3
; CHECK-P8-NEXT: xxswapd vs6, v3
; CHECK-P8-NEXT: xxsldwi vs0, v2, v2, 3
; CHECK-P8-NEXT: xxswapd vs1, v2
; CHECK-P8-NEXT: xxsldwi vs3, v2, v2, 1
; CHECK-P8-NEXT: xxsldwi vs7, v3, v3, 1
; CHECK-P8-NEXT: xscvspdpn f2, v2
; CHECK-P8-NEXT: xscvspdpn f4, v3
; CHECK-P8-NEXT: xscvspdpn f0, vs0
; CHECK-P8-NEXT: xscvspdpn f1, vs1
; CHECK-P8-NEXT: xscvspdpn f3, vs3
; CHECK-P8-NEXT: xscvspdpn f5, vs5
; CHECK-P8-NEXT: xscvspdpn f6, vs6
; CHECK-P8-NEXT: xscvspdpn f7, vs7
; CHECK-P8-NEXT: xxmrghd vs0, vs1, vs0
; CHECK-P8-NEXT: xxmrghd vs1, vs2, vs3
; CHECK-P8-NEXT: xxmrghd vs2, vs6, vs5
; CHECK-P8-NEXT: xvcvdpuxds v2, vs0
; CHECK-P8-NEXT: xxmrghd vs3, vs4, vs7
; CHECK-P8-NEXT: xvcvdpuxds v3, vs1
; CHECK-P8-NEXT: xvcvdpuxds v4, vs2
; CHECK-P8-NEXT: xvcvdpuxds v5, vs3
; CHECK-P8-NEXT: xxswapd vs1, v2
; CHECK-P8-NEXT: xxswapd vs0, v3
; CHECK-P8-NEXT: xxswapd vs3, v4
; CHECK-P8-NEXT: xxswapd vs2, v5
; CHECK-P8-NEXT: stxvd2x vs0, r3, r4
; CHECK-P8-NEXT: stxvd2x vs1, r3, r6
; CHECK-P8-NEXT: stxvd2x vs2, r3, r5
; CHECK-P8-NEXT: stxvd2x vs3, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test8elt_signed:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: lxv vs0, 16(r4)
; CHECK-P9-NEXT: lxv vs1, 0(r4)
; CHECK-P9-NEXT: xxsldwi vs2, vs1, vs1, 3
; CHECK-P9-NEXT: xxswapd vs3, vs1
; CHECK-P9-NEXT: xxsldwi vs4, vs1, vs1, 1
; CHECK-P9-NEXT: xxsldwi vs5, vs0, vs0, 3
; CHECK-P9-NEXT: xxswapd vs6, vs0
; CHECK-P9-NEXT: xxsldwi vs7, vs0, vs0, 1
; CHECK-P9-NEXT: xscvspdpn f1, vs1
; CHECK-P9-NEXT: xscvspdpn f0, vs0
; CHECK-P9-NEXT: xscvspdpn f2, vs2
; CHECK-P9-NEXT: xscvspdpn f3, vs3
; CHECK-P9-NEXT: xscvspdpn f4, vs4
; CHECK-P9-NEXT: xscvspdpn f5, vs5
; CHECK-P9-NEXT: xscvspdpn f6, vs6
; CHECK-P9-NEXT: xscvspdpn f7, vs7
; CHECK-P9-NEXT: xxmrghd vs2, vs3, vs2
; CHECK-P9-NEXT: xxmrghd vs1, vs1, vs4
; CHECK-P9-NEXT: xxmrghd vs3, vs6, vs5
; CHECK-P9-NEXT: xxmrghd vs0, vs0, vs7
; CHECK-P9-NEXT: xvcvdpuxds vs2, vs2
; CHECK-P9-NEXT: xvcvdpuxds vs1, vs1
; CHECK-P9-NEXT: xvcvdpuxds vs3, vs3
; CHECK-P9-NEXT: xvcvdpuxds vs0, vs0
; CHECK-P9-NEXT: stxv vs0, 48(r3)
; CHECK-P9-NEXT: stxv vs3, 32(r3)
; CHECK-P9-NEXT: stxv vs1, 16(r3)
; CHECK-P9-NEXT: stxv vs2, 0(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test8elt_signed:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: lxv vs0, 16(r4)
; CHECK-BE-NEXT: lxv vs1, 0(r4)
; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 1
; CHECK-BE-NEXT: xxsldwi vs3, vs1, vs1, 3
; CHECK-BE-NEXT: xxswapd vs4, vs1
; CHECK-BE-NEXT: xxsldwi vs5, vs0, vs0, 1
; CHECK-BE-NEXT: xxsldwi vs6, vs0, vs0, 3
; CHECK-BE-NEXT: xxswapd vs7, vs0
; CHECK-BE-NEXT: xscvspdpn f1, vs1
; CHECK-BE-NEXT: xscvspdpn f0, vs0
; CHECK-BE-NEXT: xscvspdpn f2, vs2
; CHECK-BE-NEXT: xscvspdpn f3, vs3
; CHECK-BE-NEXT: xscvspdpn f4, vs4
; CHECK-BE-NEXT: xscvspdpn f5, vs5
; CHECK-BE-NEXT: xscvspdpn f6, vs6
; CHECK-BE-NEXT: xscvspdpn f7, vs7
; CHECK-BE-NEXT: xxmrghd vs1, vs1, vs2
; CHECK-BE-NEXT: xxmrghd vs2, vs4, vs3
; CHECK-BE-NEXT: xxmrghd vs0, vs0, vs5
; CHECK-BE-NEXT: xxmrghd vs3, vs7, vs6
; CHECK-BE-NEXT: xvcvdpuxds vs1, vs1
; CHECK-BE-NEXT: xvcvdpuxds vs2, vs2
; CHECK-BE-NEXT: xvcvdpuxds vs0, vs0
; CHECK-BE-NEXT: xvcvdpuxds vs3, vs3
; CHECK-BE-NEXT: stxv vs3, 48(r3)
; CHECK-BE-NEXT: stxv vs0, 32(r3)
; CHECK-BE-NEXT: stxv vs2, 16(r3)
; CHECK-BE-NEXT: stxv vs1, 0(r3)
; CHECK-BE-NEXT: blr
entry:
%a = load <8 x float>, <8 x float>* %0, align 32
%1 = fptoui <8 x float> %a to <8 x i64>
store <8 x i64> %1, <8 x i64>* %agg.result, align 64
ret void
}
define void @test16elt_signed(<16 x i64>* noalias nocapture sret %agg.result, <16 x float>* nocapture readonly) local_unnamed_addr #2 {
; CHECK-P8-LABEL: test16elt_signed:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: li r5, 16
; CHECK-P8-NEXT: li r7, 48
; CHECK-P8-NEXT: li r6, 32
; CHECK-P8-NEXT: lvx v4, 0, r4
; CHECK-P8-NEXT: li r8, 64
; CHECK-P8-NEXT: lvx v5, r4, r5
; CHECK-P8-NEXT: lvx v3, r4, r7
; CHECK-P8-NEXT: lvx v2, r4, r6
; CHECK-P8-NEXT: li r4, 112
; CHECK-P8-NEXT: xxsldwi vs13, v4, v4, 3
; CHECK-P8-NEXT: xscvspdpn f6, v4
; CHECK-P8-NEXT: xxsldwi vs1, v5, v5, 3
; CHECK-P8-NEXT: xxswapd vs3, v5
; CHECK-P8-NEXT: xxsldwi vs9, v3, v3, 1
; CHECK-P8-NEXT: xscvspdpn f4, v3
; CHECK-P8-NEXT: xxsldwi vs5, v5, v5, 1
; CHECK-P8-NEXT: xxsldwi vs10, v3, v3, 3
; CHECK-P8-NEXT: xscvspdpn f1, vs1
; CHECK-P8-NEXT: xxswapd vs11, v3
; CHECK-P8-NEXT: xscvspdpn f3, vs3
; CHECK-P8-NEXT: xxsldwi vs7, v2, v2, 3
; CHECK-P8-NEXT: xscvspdpn f9, vs9
; CHECK-P8-NEXT: xxswapd vs8, v2
; CHECK-P8-NEXT: xscvspdpn f0, v5
; CHECK-P8-NEXT: xxsldwi vs12, v2, v2, 1
; CHECK-P8-NEXT: xscvspdpn f2, v2
; CHECK-P8-NEXT: xxswapd v2, v4
; CHECK-P8-NEXT: xscvspdpn f5, vs5
; CHECK-P8-NEXT: xxsldwi v3, v4, v4, 1
; CHECK-P8-NEXT: xscvspdpn f10, vs10
; CHECK-P8-NEXT: xscvspdpn f11, vs11
; CHECK-P8-NEXT: xxmrghd vs1, vs3, vs1
; CHECK-P8-NEXT: xscvspdpn f7, vs7
; CHECK-P8-NEXT: xxmrghd vs4, vs4, vs9
; CHECK-P8-NEXT: xscvspdpn f8, vs8
; CHECK-P8-NEXT: xscvspdpn f12, vs12
; CHECK-P8-NEXT: xscvspdpn f13, vs13
; CHECK-P8-NEXT: xxmrghd vs0, vs0, vs5
; CHECK-P8-NEXT: xscvspdpn f3, v2
; CHECK-P8-NEXT: xscvspdpn f9, v3
; CHECK-P8-NEXT: xxmrghd vs5, vs11, vs10
; CHECK-P8-NEXT: xvcvdpuxds v3, vs4
; CHECK-P8-NEXT: xvcvdpuxds v2, vs1
; CHECK-P8-NEXT: xxmrghd vs1, vs2, vs12
; CHECK-P8-NEXT: xxmrghd vs2, vs8, vs7
; CHECK-P8-NEXT: xvcvdpuxds v4, vs0
; CHECK-P8-NEXT: xxmrghd vs0, vs3, vs13
; CHECK-P8-NEXT: xvcvdpuxds v5, vs5
; CHECK-P8-NEXT: xxmrghd vs3, vs6, vs9
; CHECK-P8-NEXT: xvcvdpuxds v0, vs1
; CHECK-P8-NEXT: xvcvdpuxds v1, vs2
; CHECK-P8-NEXT: xvcvdpuxds v6, vs0
; CHECK-P8-NEXT: xxswapd vs0, v3
; CHECK-P8-NEXT: xvcvdpuxds v7, vs3
; CHECK-P8-NEXT: xxswapd vs4, v2
; CHECK-P8-NEXT: xxswapd vs3, v4
; CHECK-P8-NEXT: xxswapd vs1, v5
; CHECK-P8-NEXT: stxvd2x vs0, r3, r4
; CHECK-P8-NEXT: li r4, 96
; CHECK-P8-NEXT: xxswapd vs2, v0
; CHECK-P8-NEXT: xxswapd vs0, v1
; CHECK-P8-NEXT: stxvd2x vs1, r3, r4
; CHECK-P8-NEXT: xxswapd vs5, v6
; CHECK-P8-NEXT: li r4, 80
; CHECK-P8-NEXT: xxswapd vs1, v7
; CHECK-P8-NEXT: stxvd2x vs2, r3, r4
; CHECK-P8-NEXT: stxvd2x vs0, r3, r8
; CHECK-P8-NEXT: stxvd2x vs3, r3, r7
; CHECK-P8-NEXT: stxvd2x vs4, r3, r6
; CHECK-P8-NEXT: stxvd2x vs1, r3, r5
; CHECK-P8-NEXT: stxvd2x vs5, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test16elt_signed:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: lxv vs0, 16(r4)
; CHECK-P9-NEXT: lxv vs1, 0(r4)
; CHECK-P9-NEXT: lxv vs2, 48(r4)
; CHECK-P9-NEXT: lxv vs3, 32(r4)
; CHECK-P9-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill
; CHECK-P9-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill
; CHECK-P9-NEXT: xxsldwi vs4, vs1, vs1, 3
; CHECK-P9-NEXT: xxswapd vs5, vs1
; CHECK-P9-NEXT: xxsldwi vs6, vs1, vs1, 1
; CHECK-P9-NEXT: xxsldwi vs7, vs0, vs0, 3
; CHECK-P9-NEXT: xxswapd vs8, vs0
; CHECK-P9-NEXT: xxsldwi vs9, vs0, vs0, 1
; CHECK-P9-NEXT: xxsldwi vs10, vs3, vs3, 3
; CHECK-P9-NEXT: xxswapd vs11, vs3
; CHECK-P9-NEXT: xxsldwi vs12, vs3, vs3, 1
; CHECK-P9-NEXT: xxsldwi vs13, vs2, vs2, 3
; CHECK-P9-NEXT: xxswapd v2, vs2
; CHECK-P9-NEXT: xxsldwi v3, vs2, vs2, 1
; CHECK-P9-NEXT: xscvspdpn f1, vs1
; CHECK-P9-NEXT: xscvspdpn f0, vs0
; CHECK-P9-NEXT: xscvspdpn f3, vs3
; CHECK-P9-NEXT: xscvspdpn f2, vs2
; CHECK-P9-NEXT: xscvspdpn f4, vs4
; CHECK-P9-NEXT: xscvspdpn f5, vs5
; CHECK-P9-NEXT: xscvspdpn f6, vs6
; CHECK-P9-NEXT: xscvspdpn f7, vs7
; CHECK-P9-NEXT: xscvspdpn f8, vs8
; CHECK-P9-NEXT: xscvspdpn f9, vs9
; CHECK-P9-NEXT: xscvspdpn f10, vs10
; CHECK-P9-NEXT: xscvspdpn f11, vs11
; CHECK-P9-NEXT: xscvspdpn f12, vs12
; CHECK-P9-NEXT: xscvspdpn f13, vs13
; CHECK-P9-NEXT: xscvspdpn f31, v2
; CHECK-P9-NEXT: xscvspdpn f30, v3
; CHECK-P9-NEXT: xxmrghd vs4, vs5, vs4
; CHECK-P9-NEXT: xxmrghd vs1, vs1, vs6
; CHECK-P9-NEXT: xxmrghd vs5, vs8, vs7
; CHECK-P9-NEXT: xxmrghd vs0, vs0, vs9
; CHECK-P9-NEXT: xxmrghd vs6, vs11, vs10
; CHECK-P9-NEXT: xxmrghd vs3, vs3, vs12
; CHECK-P9-NEXT: xxmrghd vs7, vs31, vs13
; CHECK-P9-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload
; CHECK-P9-NEXT: xxmrghd vs2, vs2, vs30
; CHECK-P9-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload
; CHECK-P9-NEXT: xvcvdpuxds vs4, vs4
; CHECK-P9-NEXT: xvcvdpuxds vs1, vs1
; CHECK-P9-NEXT: xvcvdpuxds vs5, vs5
; CHECK-P9-NEXT: xvcvdpuxds vs0, vs0
; CHECK-P9-NEXT: xvcvdpuxds vs6, vs6
; CHECK-P9-NEXT: xvcvdpuxds vs3, vs3
; CHECK-P9-NEXT: xvcvdpuxds vs7, vs7
; CHECK-P9-NEXT: xvcvdpuxds vs2, vs2
; CHECK-P9-NEXT: stxv vs0, 48(r3)
; CHECK-P9-NEXT: stxv vs5, 32(r3)
; CHECK-P9-NEXT: stxv vs1, 16(r3)
; CHECK-P9-NEXT: stxv vs4, 0(r3)
; CHECK-P9-NEXT: stxv vs2, 112(r3)
; CHECK-P9-NEXT: stxv vs7, 96(r3)
; CHECK-P9-NEXT: stxv vs3, 80(r3)
; CHECK-P9-NEXT: stxv vs6, 64(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test16elt_signed:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: lxv vs0, 16(r4)
; CHECK-BE-NEXT: lxv vs1, 0(r4)
; CHECK-BE-NEXT: lxv vs2, 48(r4)
; CHECK-BE-NEXT: lxv vs3, 32(r4)
; CHECK-BE-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill
; CHECK-BE-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill
; CHECK-BE-NEXT: xxsldwi vs4, vs1, vs1, 1
; CHECK-BE-NEXT: xxsldwi vs5, vs1, vs1, 3
; CHECK-BE-NEXT: xxswapd vs6, vs1
; CHECK-BE-NEXT: xxsldwi vs7, vs0, vs0, 1
; CHECK-BE-NEXT: xxsldwi vs8, vs0, vs0, 3
; CHECK-BE-NEXT: xxswapd vs9, vs0
; CHECK-BE-NEXT: xxsldwi vs10, vs3, vs3, 1
; CHECK-BE-NEXT: xxsldwi vs11, vs3, vs3, 3
; CHECK-BE-NEXT: xxswapd vs12, vs3
; CHECK-BE-NEXT: xxsldwi vs13, vs2, vs2, 1
; CHECK-BE-NEXT: xxsldwi v2, vs2, vs2, 3
; CHECK-BE-NEXT: xxswapd v3, vs2
; CHECK-BE-NEXT: xscvspdpn f1, vs1
; CHECK-BE-NEXT: xscvspdpn f0, vs0
; CHECK-BE-NEXT: xscvspdpn f3, vs3
; CHECK-BE-NEXT: xscvspdpn f2, vs2
; CHECK-BE-NEXT: xscvspdpn f4, vs4
; CHECK-BE-NEXT: xscvspdpn f5, vs5
; CHECK-BE-NEXT: xscvspdpn f6, vs6
; CHECK-BE-NEXT: xscvspdpn f7, vs7
; CHECK-BE-NEXT: xscvspdpn f8, vs8
; CHECK-BE-NEXT: xscvspdpn f9, vs9
; CHECK-BE-NEXT: xscvspdpn f10, vs10
; CHECK-BE-NEXT: xscvspdpn f11, vs11
; CHECK-BE-NEXT: xscvspdpn f12, vs12
; CHECK-BE-NEXT: xscvspdpn f13, vs13
; CHECK-BE-NEXT: xscvspdpn f31, v2
; CHECK-BE-NEXT: xscvspdpn f30, v3
; CHECK-BE-NEXT: xxmrghd vs1, vs1, vs4
; CHECK-BE-NEXT: xxmrghd vs4, vs6, vs5
; CHECK-BE-NEXT: xxmrghd vs0, vs0, vs7
; CHECK-BE-NEXT: xxmrghd vs5, vs9, vs8
; CHECK-BE-NEXT: xxmrghd vs3, vs3, vs10
; CHECK-BE-NEXT: xxmrghd vs6, vs12, vs11
; CHECK-BE-NEXT: xxmrghd vs2, vs2, vs13
; CHECK-BE-NEXT: xxmrghd vs7, vs30, vs31
; CHECK-BE-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload
; CHECK-BE-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload
; CHECK-BE-NEXT: xvcvdpuxds vs1, vs1
; CHECK-BE-NEXT: xvcvdpuxds vs4, vs4
; CHECK-BE-NEXT: xvcvdpuxds vs0, vs0
; CHECK-BE-NEXT: xvcvdpuxds vs5, vs5
; CHECK-BE-NEXT: xvcvdpuxds vs3, vs3
; CHECK-BE-NEXT: xvcvdpuxds vs6, vs6
; CHECK-BE-NEXT: xvcvdpuxds vs2, vs2
; CHECK-BE-NEXT: xvcvdpuxds vs7, vs7
; CHECK-BE-NEXT: stxv vs5, 48(r3)
; CHECK-BE-NEXT: stxv vs0, 32(r3)
; CHECK-BE-NEXT: stxv vs4, 16(r3)
; CHECK-BE-NEXT: stxv vs1, 0(r3)
; CHECK-BE-NEXT: stxv vs7, 112(r3)
; CHECK-BE-NEXT: stxv vs2, 96(r3)
; CHECK-BE-NEXT: stxv vs6, 80(r3)
; CHECK-BE-NEXT: stxv vs3, 64(r3)
; CHECK-BE-NEXT: blr
entry:
%a = load <16 x float>, <16 x float>* %0, align 64
%1 = fptoui <16 x float> %a to <16 x i64>
store <16 x i64> %1, <16 x i64>* %agg.result, align 128
ret void
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,598 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
; RUN: FileCheck %s --check-prefix=CHECK-P8
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
; RUN: FileCheck %s --check-prefix=CHECK-P9
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
; RUN: FileCheck %s --check-prefix=CHECK-BE
define i64 @test2elt(<2 x double> %a) local_unnamed_addr #0 {
; CHECK-P8-LABEL: test2elt:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: xxswapd vs0, v2
; CHECK-P8-NEXT: xscvdpuxws f1, v2
; CHECK-P8-NEXT: xscvdpuxws f0, f0
; CHECK-P8-NEXT: mfvsrwz r3, f1
; CHECK-P8-NEXT: mfvsrwz r4, f0
; CHECK-P8-NEXT: mtvsrd f0, r3
; CHECK-P8-NEXT: mtvsrd f1, r4
; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: xxswapd v3, vs1
; CHECK-P8-NEXT: vmrglw v2, v2, v3
; CHECK-P8-NEXT: xxswapd vs0, v2
; CHECK-P8-NEXT: mfvsrd r3, f0
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test2elt:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: xxswapd vs0, v2
; CHECK-P9-NEXT: xscvdpuxws f1, v2
; CHECK-P9-NEXT: xscvdpuxws f0, f0
; CHECK-P9-NEXT: mfvsrwz r3, f1
; CHECK-P9-NEXT: mtvsrws v2, r3
; CHECK-P9-NEXT: mfvsrwz r4, f0
; CHECK-P9-NEXT: mtvsrws v3, r4
; CHECK-P9-NEXT: vmrglw v2, v2, v3
; CHECK-P9-NEXT: mfvsrld r3, v2
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test2elt:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: xxswapd vs0, v2
; CHECK-BE-NEXT: xscvdpuxws f1, v2
; CHECK-BE-NEXT: xscvdpuxws f0, f0
; CHECK-BE-NEXT: mfvsrwz r3, f1
; CHECK-BE-NEXT: mtvsrws v2, r3
; CHECK-BE-NEXT: mfvsrwz r4, f0
; CHECK-BE-NEXT: mtvsrws v3, r4
; CHECK-BE-NEXT: vmrghw v2, v2, v3
; CHECK-BE-NEXT: mfvsrd r3, v2
; CHECK-BE-NEXT: blr
entry:
%0 = fptoui <2 x double> %a to <2 x i32>
%1 = bitcast <2 x i32> %0 to i64
ret i64 %1
}
define <4 x i32> @test4elt(<4 x double>* nocapture readonly) local_unnamed_addr #1 {
; CHECK-P8-LABEL: test4elt:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: li r4, 16
; CHECK-P8-NEXT: lxvd2x vs1, 0, r3
; CHECK-P8-NEXT: lxvd2x vs0, r3, r4
; CHECK-P8-NEXT: xxswapd vs1, vs1
; CHECK-P8-NEXT: xxswapd vs0, vs0
; CHECK-P8-NEXT: xxmrgld vs2, vs0, vs1
; CHECK-P8-NEXT: xxmrghd vs0, vs0, vs1
; CHECK-P8-NEXT: xvcvdpuxws v2, vs2
; CHECK-P8-NEXT: xvcvdpuxws v3, vs0
; CHECK-P8-NEXT: vmrgew v2, v3, v2
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test4elt:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: lxv vs0, 0(r3)
; CHECK-P9-NEXT: lxv vs1, 16(r3)
; CHECK-P9-NEXT: xxmrgld vs2, vs1, vs0
; CHECK-P9-NEXT: xxmrghd vs0, vs1, vs0
; CHECK-P9-NEXT: xvcvdpuxws v2, vs2
; CHECK-P9-NEXT: xvcvdpuxws v3, vs0
; CHECK-P9-NEXT: vmrgew v2, v3, v2
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test4elt:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: lxv vs0, 16(r3)
; CHECK-BE-NEXT: lxv vs1, 0(r3)
; CHECK-BE-NEXT: xxmrgld vs2, vs1, vs0
; CHECK-BE-NEXT: xxmrghd vs0, vs1, vs0
; CHECK-BE-NEXT: xvcvdpuxws v2, vs2
; CHECK-BE-NEXT: xvcvdpuxws v3, vs0
; CHECK-BE-NEXT: vmrgew v2, v3, v2
; CHECK-BE-NEXT: blr
entry:
%a = load <4 x double>, <4 x double>* %0, align 32
%1 = fptoui <4 x double> %a to <4 x i32>
ret <4 x i32> %1
}
define void @test8elt(<8 x i32>* noalias nocapture sret %agg.result, <8 x double>* nocapture readonly) local_unnamed_addr #2 {
; CHECK-P8-LABEL: test8elt:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: li r5, 32
; CHECK-P8-NEXT: li r6, 48
; CHECK-P8-NEXT: lxvd2x vs3, 0, r4
; CHECK-P8-NEXT: lxvd2x vs0, r4, r5
; CHECK-P8-NEXT: li r5, 16
; CHECK-P8-NEXT: lxvd2x vs1, r4, r6
; CHECK-P8-NEXT: lxvd2x vs2, r4, r5
; CHECK-P8-NEXT: xxswapd vs3, vs3
; CHECK-P8-NEXT: xxswapd vs0, vs0
; CHECK-P8-NEXT: xxswapd vs1, vs1
; CHECK-P8-NEXT: xxswapd vs2, vs2
; CHECK-P8-NEXT: xxmrgld vs4, vs1, vs0
; CHECK-P8-NEXT: xxmrghd vs0, vs1, vs0
; CHECK-P8-NEXT: xxmrgld vs1, vs2, vs3
; CHECK-P8-NEXT: xxmrghd vs2, vs2, vs3
; CHECK-P8-NEXT: xvcvdpuxws v2, vs4
; CHECK-P8-NEXT: xvcvdpuxws v3, vs0
; CHECK-P8-NEXT: xvcvdpuxws v4, vs1
; CHECK-P8-NEXT: xvcvdpuxws v5, vs2
; CHECK-P8-NEXT: vmrgew v2, v3, v2
; CHECK-P8-NEXT: vmrgew v3, v5, v4
; CHECK-P8-NEXT: stvx v2, r3, r5
; CHECK-P8-NEXT: stvx v3, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test8elt:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: lxv vs0, 32(r4)
; CHECK-P9-NEXT: lxv vs1, 48(r4)
; CHECK-P9-NEXT: lxv vs2, 0(r4)
; CHECK-P9-NEXT: lxv vs3, 16(r4)
; CHECK-P9-NEXT: xxmrgld vs4, vs3, vs2
; CHECK-P9-NEXT: xxmrghd vs2, vs3, vs2
; CHECK-P9-NEXT: xxmrgld vs3, vs1, vs0
; CHECK-P9-NEXT: xxmrghd vs0, vs1, vs0
; CHECK-P9-NEXT: xvcvdpuxws v2, vs4
; CHECK-P9-NEXT: xvcvdpuxws v3, vs2
; CHECK-P9-NEXT: xvcvdpuxws v4, vs3
; CHECK-P9-NEXT: xvcvdpuxws v5, vs0
; CHECK-P9-NEXT: vmrgew v2, v3, v2
; CHECK-P9-NEXT: vmrgew v3, v5, v4
; CHECK-P9-NEXT: stxv v3, 16(r3)
; CHECK-P9-NEXT: stxv v2, 0(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test8elt:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: lxv vs0, 48(r4)
; CHECK-BE-NEXT: lxv vs1, 32(r4)
; CHECK-BE-NEXT: lxv vs2, 16(r4)
; CHECK-BE-NEXT: lxv vs3, 0(r4)
; CHECK-BE-NEXT: xxmrgld vs4, vs3, vs2
; CHECK-BE-NEXT: xxmrghd vs2, vs3, vs2
; CHECK-BE-NEXT: xxmrgld vs3, vs1, vs0
; CHECK-BE-NEXT: xxmrghd vs0, vs1, vs0
; CHECK-BE-NEXT: xvcvdpuxws v2, vs4
; CHECK-BE-NEXT: xvcvdpuxws v3, vs2
; CHECK-BE-NEXT: xvcvdpuxws v4, vs3
; CHECK-BE-NEXT: xvcvdpuxws v5, vs0
; CHECK-BE-NEXT: vmrgew v2, v3, v2
; CHECK-BE-NEXT: vmrgew v3, v5, v4
; CHECK-BE-NEXT: stxv v3, 16(r3)
; CHECK-BE-NEXT: stxv v2, 0(r3)
; CHECK-BE-NEXT: blr
entry:
%a = load <8 x double>, <8 x double>* %0, align 64
%1 = fptoui <8 x double> %a to <8 x i32>
store <8 x i32> %1, <8 x i32>* %agg.result, align 32
ret void
}
define void @test16elt(<16 x i32>* noalias nocapture sret %agg.result, <16 x double>* nocapture readonly) local_unnamed_addr #2 {
; CHECK-P8-LABEL: test16elt:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: li r5, 32
; CHECK-P8-NEXT: li r6, 48
; CHECK-P8-NEXT: li r8, 64
; CHECK-P8-NEXT: li r7, 16
; CHECK-P8-NEXT: li r9, 80
; CHECK-P8-NEXT: lxvd2x vs7, 0, r4
; CHECK-P8-NEXT: lxvd2x vs0, r4, r5
; CHECK-P8-NEXT: lxvd2x vs1, r4, r6
; CHECK-P8-NEXT: lxvd2x vs3, r4, r8
; CHECK-P8-NEXT: li r8, 96
; CHECK-P8-NEXT: lxvd2x vs2, r4, r7
; CHECK-P8-NEXT: lxvd2x vs5, r4, r8
; CHECK-P8-NEXT: li r8, 112
; CHECK-P8-NEXT: lxvd2x vs4, r4, r9
; CHECK-P8-NEXT: xxswapd vs0, vs0
; CHECK-P8-NEXT: xxswapd vs1, vs1
; CHECK-P8-NEXT: lxvd2x vs6, r4, r8
; CHECK-P8-NEXT: xxswapd vs2, vs2
; CHECK-P8-NEXT: xxswapd vs3, vs3
; CHECK-P8-NEXT: xxswapd vs4, vs4
; CHECK-P8-NEXT: xxswapd vs5, vs5
; CHECK-P8-NEXT: xxmrgld vs8, vs1, vs0
; CHECK-P8-NEXT: xxswapd vs6, vs6
; CHECK-P8-NEXT: xxmrghd vs0, vs1, vs0
; CHECK-P8-NEXT: xxswapd vs1, vs7
; CHECK-P8-NEXT: xxmrgld vs7, vs4, vs3
; CHECK-P8-NEXT: xxmrghd vs3, vs4, vs3
; CHECK-P8-NEXT: xxmrgld vs4, vs6, vs5
; CHECK-P8-NEXT: xvcvdpuxws v2, vs8
; CHECK-P8-NEXT: xvcvdpuxws v3, vs0
; CHECK-P8-NEXT: xxmrghd vs0, vs6, vs5
; CHECK-P8-NEXT: xxmrgld vs5, vs2, vs1
; CHECK-P8-NEXT: xxmrghd vs1, vs2, vs1
; CHECK-P8-NEXT: xvcvdpuxws v4, vs7
; CHECK-P8-NEXT: xvcvdpuxws v5, vs3
; CHECK-P8-NEXT: xvcvdpuxws v0, vs4
; CHECK-P8-NEXT: xvcvdpuxws v1, vs0
; CHECK-P8-NEXT: xvcvdpuxws v6, vs5
; CHECK-P8-NEXT: xvcvdpuxws v7, vs1
; CHECK-P8-NEXT: vmrgew v2, v3, v2
; CHECK-P8-NEXT: vmrgew v3, v5, v4
; CHECK-P8-NEXT: vmrgew v4, v1, v0
; CHECK-P8-NEXT: vmrgew v5, v7, v6
; CHECK-P8-NEXT: stvx v2, r3, r7
; CHECK-P8-NEXT: stvx v3, r3, r5
; CHECK-P8-NEXT: stvx v4, r3, r6
; CHECK-P8-NEXT: stvx v5, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test16elt:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: lxv vs0, 32(r4)
; CHECK-P9-NEXT: lxv vs1, 48(r4)
; CHECK-P9-NEXT: lxv vs2, 0(r4)
; CHECK-P9-NEXT: lxv vs3, 16(r4)
; CHECK-P9-NEXT: lxv vs4, 96(r4)
; CHECK-P9-NEXT: lxv vs5, 112(r4)
; CHECK-P9-NEXT: lxv vs6, 64(r4)
; CHECK-P9-NEXT: lxv vs7, 80(r4)
; CHECK-P9-NEXT: xxmrgld vs8, vs3, vs2
; CHECK-P9-NEXT: xxmrghd vs2, vs3, vs2
; CHECK-P9-NEXT: xxmrgld vs3, vs1, vs0
; CHECK-P9-NEXT: xxmrghd vs0, vs1, vs0
; CHECK-P9-NEXT: xxmrgld vs1, vs7, vs6
; CHECK-P9-NEXT: xxmrghd vs6, vs7, vs6
; CHECK-P9-NEXT: xxmrgld vs7, vs5, vs4
; CHECK-P9-NEXT: xxmrghd vs4, vs5, vs4
; CHECK-P9-NEXT: xvcvdpuxws v2, vs8
; CHECK-P9-NEXT: xvcvdpuxws v3, vs2
; CHECK-P9-NEXT: xvcvdpuxws v4, vs3
; CHECK-P9-NEXT: xvcvdpuxws v5, vs0
; CHECK-P9-NEXT: xvcvdpuxws v0, vs1
; CHECK-P9-NEXT: xvcvdpuxws v1, vs6
; CHECK-P9-NEXT: xvcvdpuxws v6, vs7
; CHECK-P9-NEXT: xvcvdpuxws v7, vs4
; CHECK-P9-NEXT: vmrgew v2, v3, v2
; CHECK-P9-NEXT: vmrgew v3, v5, v4
; CHECK-P9-NEXT: vmrgew v4, v1, v0
; CHECK-P9-NEXT: vmrgew v5, v7, v6
; CHECK-P9-NEXT: stxv v3, 16(r3)
; CHECK-P9-NEXT: stxv v2, 0(r3)
; CHECK-P9-NEXT: stxv v5, 48(r3)
; CHECK-P9-NEXT: stxv v4, 32(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test16elt:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: lxv vs0, 48(r4)
; CHECK-BE-NEXT: lxv vs1, 32(r4)
; CHECK-BE-NEXT: lxv vs2, 16(r4)
; CHECK-BE-NEXT: lxv vs3, 0(r4)
; CHECK-BE-NEXT: lxv vs4, 112(r4)
; CHECK-BE-NEXT: lxv vs5, 96(r4)
; CHECK-BE-NEXT: lxv vs6, 80(r4)
; CHECK-BE-NEXT: lxv vs7, 64(r4)
; CHECK-BE-NEXT: xxmrgld vs8, vs3, vs2
; CHECK-BE-NEXT: xxmrghd vs2, vs3, vs2
; CHECK-BE-NEXT: xxmrgld vs3, vs1, vs0
; CHECK-BE-NEXT: xxmrghd vs0, vs1, vs0
; CHECK-BE-NEXT: xxmrgld vs1, vs7, vs6
; CHECK-BE-NEXT: xxmrghd vs6, vs7, vs6
; CHECK-BE-NEXT: xxmrgld vs7, vs5, vs4
; CHECK-BE-NEXT: xxmrghd vs4, vs5, vs4
; CHECK-BE-NEXT: xvcvdpuxws v2, vs8
; CHECK-BE-NEXT: xvcvdpuxws v3, vs2
; CHECK-BE-NEXT: xvcvdpuxws v4, vs3
; CHECK-BE-NEXT: xvcvdpuxws v5, vs0
; CHECK-BE-NEXT: xvcvdpuxws v0, vs1
; CHECK-BE-NEXT: xvcvdpuxws v1, vs6
; CHECK-BE-NEXT: xvcvdpuxws v6, vs7
; CHECK-BE-NEXT: xvcvdpuxws v7, vs4
; CHECK-BE-NEXT: vmrgew v2, v3, v2
; CHECK-BE-NEXT: vmrgew v3, v5, v4
; CHECK-BE-NEXT: vmrgew v4, v1, v0
; CHECK-BE-NEXT: vmrgew v5, v7, v6
; CHECK-BE-NEXT: stxv v3, 16(r3)
; CHECK-BE-NEXT: stxv v2, 0(r3)
; CHECK-BE-NEXT: stxv v5, 48(r3)
; CHECK-BE-NEXT: stxv v4, 32(r3)
; CHECK-BE-NEXT: blr
entry:
%a = load <16 x double>, <16 x double>* %0, align 128
%1 = fptoui <16 x double> %a to <16 x i32>
store <16 x i32> %1, <16 x i32>* %agg.result, align 64
ret void
}
define i64 @test2elt_signed(<2 x double> %a) local_unnamed_addr #0 {
; CHECK-P8-LABEL: test2elt_signed:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: xxswapd vs0, v2
; CHECK-P8-NEXT: xscvdpsxws f1, v2
; CHECK-P8-NEXT: xscvdpsxws f0, f0
; CHECK-P8-NEXT: mfvsrwz r3, f1
; CHECK-P8-NEXT: mfvsrwz r4, f0
; CHECK-P8-NEXT: mtvsrd f0, r3
; CHECK-P8-NEXT: mtvsrd f1, r4
; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: xxswapd v3, vs1
; CHECK-P8-NEXT: vmrglw v2, v2, v3
; CHECK-P8-NEXT: xxswapd vs0, v2
; CHECK-P8-NEXT: mfvsrd r3, f0
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test2elt_signed:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: xxswapd vs0, v2
; CHECK-P9-NEXT: xscvdpsxws f1, v2
; CHECK-P9-NEXT: xscvdpsxws f0, f0
; CHECK-P9-NEXT: mfvsrwz r3, f1
; CHECK-P9-NEXT: mtvsrws v2, r3
; CHECK-P9-NEXT: mfvsrwz r4, f0
; CHECK-P9-NEXT: mtvsrws v3, r4
; CHECK-P9-NEXT: vmrglw v2, v2, v3
; CHECK-P9-NEXT: mfvsrld r3, v2
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test2elt_signed:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: xxswapd vs0, v2
; CHECK-BE-NEXT: xscvdpsxws f1, v2
; CHECK-BE-NEXT: xscvdpsxws f0, f0
; CHECK-BE-NEXT: mfvsrwz r3, f1
; CHECK-BE-NEXT: mtvsrws v2, r3
; CHECK-BE-NEXT: mfvsrwz r4, f0
; CHECK-BE-NEXT: mtvsrws v3, r4
; CHECK-BE-NEXT: vmrghw v2, v2, v3
; CHECK-BE-NEXT: mfvsrd r3, v2
; CHECK-BE-NEXT: blr
entry:
%0 = fptosi <2 x double> %a to <2 x i32>
%1 = bitcast <2 x i32> %0 to i64
ret i64 %1
}
define <4 x i32> @test4elt_signed(<4 x double>* nocapture readonly) local_unnamed_addr #1 {
; CHECK-P8-LABEL: test4elt_signed:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: li r4, 16
; CHECK-P8-NEXT: lxvd2x vs1, 0, r3
; CHECK-P8-NEXT: lxvd2x vs0, r3, r4
; CHECK-P8-NEXT: xxswapd vs1, vs1
; CHECK-P8-NEXT: xxswapd vs0, vs0
; CHECK-P8-NEXT: xxmrgld vs2, vs0, vs1
; CHECK-P8-NEXT: xxmrghd vs0, vs0, vs1
; CHECK-P8-NEXT: xvcvdpsxws v2, vs2
; CHECK-P8-NEXT: xvcvdpsxws v3, vs0
; CHECK-P8-NEXT: vmrgew v2, v3, v2
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test4elt_signed:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: lxv vs0, 0(r3)
; CHECK-P9-NEXT: lxv vs1, 16(r3)
; CHECK-P9-NEXT: xxmrgld vs2, vs1, vs0
; CHECK-P9-NEXT: xxmrghd vs0, vs1, vs0
; CHECK-P9-NEXT: xvcvdpsxws v2, vs2
; CHECK-P9-NEXT: xvcvdpsxws v3, vs0
; CHECK-P9-NEXT: vmrgew v2, v3, v2
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test4elt_signed:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: lxv vs0, 16(r3)
; CHECK-BE-NEXT: lxv vs1, 0(r3)
; CHECK-BE-NEXT: xxmrgld vs2, vs1, vs0
; CHECK-BE-NEXT: xxmrghd vs0, vs1, vs0
; CHECK-BE-NEXT: xvcvdpsxws v2, vs2
; CHECK-BE-NEXT: xvcvdpsxws v3, vs0
; CHECK-BE-NEXT: vmrgew v2, v3, v2
; CHECK-BE-NEXT: blr
entry:
%a = load <4 x double>, <4 x double>* %0, align 32
%1 = fptosi <4 x double> %a to <4 x i32>
ret <4 x i32> %1
}
define void @test8elt_signed(<8 x i32>* noalias nocapture sret %agg.result, <8 x double>* nocapture readonly) local_unnamed_addr #2 {
; CHECK-P8-LABEL: test8elt_signed:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: li r5, 32
; CHECK-P8-NEXT: li r6, 48
; CHECK-P8-NEXT: lxvd2x vs3, 0, r4
; CHECK-P8-NEXT: lxvd2x vs0, r4, r5
; CHECK-P8-NEXT: li r5, 16
; CHECK-P8-NEXT: lxvd2x vs1, r4, r6
; CHECK-P8-NEXT: lxvd2x vs2, r4, r5
; CHECK-P8-NEXT: xxswapd vs3, vs3
; CHECK-P8-NEXT: xxswapd vs0, vs0
; CHECK-P8-NEXT: xxswapd vs1, vs1
; CHECK-P8-NEXT: xxswapd vs2, vs2
; CHECK-P8-NEXT: xxmrgld vs4, vs1, vs0
; CHECK-P8-NEXT: xxmrghd vs0, vs1, vs0
; CHECK-P8-NEXT: xxmrgld vs1, vs2, vs3
; CHECK-P8-NEXT: xxmrghd vs2, vs2, vs3
; CHECK-P8-NEXT: xvcvdpsxws v2, vs4
; CHECK-P8-NEXT: xvcvdpsxws v3, vs0
; CHECK-P8-NEXT: xvcvdpsxws v4, vs1
; CHECK-P8-NEXT: xvcvdpsxws v5, vs2
; CHECK-P8-NEXT: vmrgew v2, v3, v2
; CHECK-P8-NEXT: vmrgew v3, v5, v4
; CHECK-P8-NEXT: stvx v2, r3, r5
; CHECK-P8-NEXT: stvx v3, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test8elt_signed:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: lxv vs0, 32(r4)
; CHECK-P9-NEXT: lxv vs1, 48(r4)
; CHECK-P9-NEXT: lxv vs2, 0(r4)
; CHECK-P9-NEXT: lxv vs3, 16(r4)
; CHECK-P9-NEXT: xxmrgld vs4, vs3, vs2
; CHECK-P9-NEXT: xxmrghd vs2, vs3, vs2
; CHECK-P9-NEXT: xxmrgld vs3, vs1, vs0
; CHECK-P9-NEXT: xxmrghd vs0, vs1, vs0
; CHECK-P9-NEXT: xvcvdpsxws v2, vs4
; CHECK-P9-NEXT: xvcvdpsxws v3, vs2
; CHECK-P9-NEXT: xvcvdpsxws v4, vs3
; CHECK-P9-NEXT: xvcvdpsxws v5, vs0
; CHECK-P9-NEXT: vmrgew v2, v3, v2
; CHECK-P9-NEXT: vmrgew v3, v5, v4
; CHECK-P9-NEXT: stxv v3, 16(r3)
; CHECK-P9-NEXT: stxv v2, 0(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test8elt_signed:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: lxv vs0, 48(r4)
; CHECK-BE-NEXT: lxv vs1, 32(r4)
; CHECK-BE-NEXT: lxv vs2, 16(r4)
; CHECK-BE-NEXT: lxv vs3, 0(r4)
; CHECK-BE-NEXT: xxmrgld vs4, vs3, vs2
; CHECK-BE-NEXT: xxmrghd vs2, vs3, vs2
; CHECK-BE-NEXT: xxmrgld vs3, vs1, vs0
; CHECK-BE-NEXT: xxmrghd vs0, vs1, vs0
; CHECK-BE-NEXT: xvcvdpsxws v2, vs4
; CHECK-BE-NEXT: xvcvdpsxws v3, vs2
; CHECK-BE-NEXT: xvcvdpsxws v4, vs3
; CHECK-BE-NEXT: xvcvdpsxws v5, vs0
; CHECK-BE-NEXT: vmrgew v2, v3, v2
; CHECK-BE-NEXT: vmrgew v3, v5, v4
; CHECK-BE-NEXT: stxv v3, 16(r3)
; CHECK-BE-NEXT: stxv v2, 0(r3)
; CHECK-BE-NEXT: blr
entry:
%a = load <8 x double>, <8 x double>* %0, align 64
%1 = fptosi <8 x double> %a to <8 x i32>
store <8 x i32> %1, <8 x i32>* %agg.result, align 32
ret void
}
define void @test16elt_signed(<16 x i32>* noalias nocapture sret %agg.result, <16 x double>* nocapture readonly) local_unnamed_addr #2 {
; CHECK-P8-LABEL: test16elt_signed:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: li r5, 32
; CHECK-P8-NEXT: li r6, 48
; CHECK-P8-NEXT: li r8, 64
; CHECK-P8-NEXT: li r7, 16
; CHECK-P8-NEXT: li r9, 80
; CHECK-P8-NEXT: lxvd2x vs7, 0, r4
; CHECK-P8-NEXT: lxvd2x vs0, r4, r5
; CHECK-P8-NEXT: lxvd2x vs1, r4, r6
; CHECK-P8-NEXT: lxvd2x vs3, r4, r8
; CHECK-P8-NEXT: li r8, 96
; CHECK-P8-NEXT: lxvd2x vs2, r4, r7
; CHECK-P8-NEXT: lxvd2x vs5, r4, r8
; CHECK-P8-NEXT: li r8, 112
; CHECK-P8-NEXT: lxvd2x vs4, r4, r9
; CHECK-P8-NEXT: xxswapd vs0, vs0
; CHECK-P8-NEXT: xxswapd vs1, vs1
; CHECK-P8-NEXT: lxvd2x vs6, r4, r8
; CHECK-P8-NEXT: xxswapd vs2, vs2
; CHECK-P8-NEXT: xxswapd vs3, vs3
; CHECK-P8-NEXT: xxswapd vs4, vs4
; CHECK-P8-NEXT: xxswapd vs5, vs5
; CHECK-P8-NEXT: xxmrgld vs8, vs1, vs0
; CHECK-P8-NEXT: xxswapd vs6, vs6
; CHECK-P8-NEXT: xxmrghd vs0, vs1, vs0
; CHECK-P8-NEXT: xxswapd vs1, vs7
; CHECK-P8-NEXT: xxmrgld vs7, vs4, vs3
; CHECK-P8-NEXT: xxmrghd vs3, vs4, vs3
; CHECK-P8-NEXT: xxmrgld vs4, vs6, vs5
; CHECK-P8-NEXT: xvcvdpsxws v2, vs8
; CHECK-P8-NEXT: xvcvdpsxws v3, vs0
; CHECK-P8-NEXT: xxmrghd vs0, vs6, vs5
; CHECK-P8-NEXT: xxmrgld vs5, vs2, vs1
; CHECK-P8-NEXT: xxmrghd vs1, vs2, vs1
; CHECK-P8-NEXT: xvcvdpsxws v4, vs7
; CHECK-P8-NEXT: xvcvdpsxws v5, vs3
; CHECK-P8-NEXT: xvcvdpsxws v0, vs4
; CHECK-P8-NEXT: xvcvdpsxws v1, vs0
; CHECK-P8-NEXT: xvcvdpsxws v6, vs5
; CHECK-P8-NEXT: xvcvdpsxws v7, vs1
; CHECK-P8-NEXT: vmrgew v2, v3, v2
; CHECK-P8-NEXT: vmrgew v3, v5, v4
; CHECK-P8-NEXT: vmrgew v4, v1, v0
; CHECK-P8-NEXT: vmrgew v5, v7, v6
; CHECK-P8-NEXT: stvx v2, r3, r7
; CHECK-P8-NEXT: stvx v3, r3, r5
; CHECK-P8-NEXT: stvx v4, r3, r6
; CHECK-P8-NEXT: stvx v5, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test16elt_signed:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: lxv vs0, 32(r4)
; CHECK-P9-NEXT: lxv vs1, 48(r4)
; CHECK-P9-NEXT: lxv vs2, 0(r4)
; CHECK-P9-NEXT: lxv vs3, 16(r4)
; CHECK-P9-NEXT: lxv vs4, 96(r4)
; CHECK-P9-NEXT: lxv vs5, 112(r4)
; CHECK-P9-NEXT: lxv vs6, 64(r4)
; CHECK-P9-NEXT: lxv vs7, 80(r4)
; CHECK-P9-NEXT: xxmrgld vs8, vs3, vs2
; CHECK-P9-NEXT: xxmrghd vs2, vs3, vs2
; CHECK-P9-NEXT: xxmrgld vs3, vs1, vs0
; CHECK-P9-NEXT: xxmrghd vs0, vs1, vs0
; CHECK-P9-NEXT: xxmrgld vs1, vs7, vs6
; CHECK-P9-NEXT: xxmrghd vs6, vs7, vs6
; CHECK-P9-NEXT: xxmrgld vs7, vs5, vs4
; CHECK-P9-NEXT: xxmrghd vs4, vs5, vs4
; CHECK-P9-NEXT: xvcvdpsxws v2, vs8
; CHECK-P9-NEXT: xvcvdpsxws v3, vs2
; CHECK-P9-NEXT: xvcvdpsxws v4, vs3
; CHECK-P9-NEXT: xvcvdpsxws v5, vs0
; CHECK-P9-NEXT: xvcvdpsxws v0, vs1
; CHECK-P9-NEXT: xvcvdpsxws v1, vs6
; CHECK-P9-NEXT: xvcvdpsxws v6, vs7
; CHECK-P9-NEXT: xvcvdpsxws v7, vs4
; CHECK-P9-NEXT: vmrgew v2, v3, v2
; CHECK-P9-NEXT: vmrgew v3, v5, v4
; CHECK-P9-NEXT: vmrgew v4, v1, v0
; CHECK-P9-NEXT: vmrgew v5, v7, v6
; CHECK-P9-NEXT: stxv v3, 16(r3)
; CHECK-P9-NEXT: stxv v2, 0(r3)
; CHECK-P9-NEXT: stxv v5, 48(r3)
; CHECK-P9-NEXT: stxv v4, 32(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test16elt_signed:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: lxv vs0, 48(r4)
; CHECK-BE-NEXT: lxv vs1, 32(r4)
; CHECK-BE-NEXT: lxv vs2, 16(r4)
; CHECK-BE-NEXT: lxv vs3, 0(r4)
; CHECK-BE-NEXT: lxv vs4, 112(r4)
; CHECK-BE-NEXT: lxv vs5, 96(r4)
; CHECK-BE-NEXT: lxv vs6, 80(r4)
; CHECK-BE-NEXT: lxv vs7, 64(r4)
; CHECK-BE-NEXT: xxmrgld vs8, vs3, vs2
; CHECK-BE-NEXT: xxmrghd vs2, vs3, vs2
; CHECK-BE-NEXT: xxmrgld vs3, vs1, vs0
; CHECK-BE-NEXT: xxmrghd vs0, vs1, vs0
; CHECK-BE-NEXT: xxmrgld vs1, vs7, vs6
; CHECK-BE-NEXT: xxmrghd vs6, vs7, vs6
; CHECK-BE-NEXT: xxmrgld vs7, vs5, vs4
; CHECK-BE-NEXT: xxmrghd vs4, vs5, vs4
; CHECK-BE-NEXT: xvcvdpsxws v2, vs8
; CHECK-BE-NEXT: xvcvdpsxws v3, vs2
; CHECK-BE-NEXT: xvcvdpsxws v4, vs3
; CHECK-BE-NEXT: xvcvdpsxws v5, vs0
; CHECK-BE-NEXT: xvcvdpsxws v0, vs1
; CHECK-BE-NEXT: xvcvdpsxws v1, vs6
; CHECK-BE-NEXT: xvcvdpsxws v6, vs7
; CHECK-BE-NEXT: xvcvdpsxws v7, vs4
; CHECK-BE-NEXT: vmrgew v2, v3, v2
; CHECK-BE-NEXT: vmrgew v3, v5, v4
; CHECK-BE-NEXT: vmrgew v4, v1, v0
; CHECK-BE-NEXT: vmrgew v5, v7, v6
; CHECK-BE-NEXT: stxv v3, 16(r3)
; CHECK-BE-NEXT: stxv v2, 0(r3)
; CHECK-BE-NEXT: stxv v5, 48(r3)
; CHECK-BE-NEXT: stxv v4, 32(r3)
; CHECK-BE-NEXT: blr
entry:
%a = load <16 x double>, <16 x double>* %0, align 128
%1 = fptosi <16 x double> %a to <16 x i32>
store <16 x i32> %1, <16 x i32>* %agg.result, align 64
ret void
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,304 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
; RUN: FileCheck %s --check-prefix=CHECK-P8
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
; RUN: FileCheck %s --check-prefix=CHECK-P9
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
; RUN: FileCheck %s --check-prefix=CHECK-BE
define i64 @test2elt(i64 %a.coerce) local_unnamed_addr #0 {
; CHECK-P8-LABEL: test2elt:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: mtvsrd f0, r3
; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: xvcvspuxws vs0, v2
; CHECK-P8-NEXT: xxswapd vs0, vs0
; CHECK-P8-NEXT: mfvsrd r3, f0
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test2elt:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: mtvsrd f0, r3
; CHECK-P9-NEXT: xxswapd v2, vs0
; CHECK-P9-NEXT: xvcvspuxws vs0, v2
; CHECK-P9-NEXT: mfvsrld r3, vs0
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test2elt:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: mtvsrd f0, r3
; CHECK-BE-NEXT: xvcvspuxws vs0, vs0
; CHECK-BE-NEXT: mfvsrd r3, f0
; CHECK-BE-NEXT: blr
entry:
%0 = bitcast i64 %a.coerce to <2 x float>
%1 = fptoui <2 x float> %0 to <2 x i32>
%2 = bitcast <2 x i32> %1 to i64
ret i64 %2
}
define <4 x i32> @test4elt(<4 x float> %a) local_unnamed_addr #1 {
; CHECK-P8-LABEL: test4elt:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: xvcvspuxws v2, v2
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test4elt:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: xvcvspuxws v2, v2
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test4elt:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: xvcvspuxws v2, v2
; CHECK-BE-NEXT: blr
entry:
%0 = fptoui <4 x float> %a to <4 x i32>
ret <4 x i32> %0
}
define void @test8elt(<8 x i32>* noalias nocapture sret %agg.result, <8 x float>* nocapture readonly) local_unnamed_addr #2 {
; CHECK-P8-LABEL: test8elt:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: li r5, 16
; CHECK-P8-NEXT: lvx v3, 0, r4
; CHECK-P8-NEXT: lvx v2, r4, r5
; CHECK-P8-NEXT: xvcvspuxws v3, v3
; CHECK-P8-NEXT: xvcvspuxws v2, v2
; CHECK-P8-NEXT: stvx v3, 0, r3
; CHECK-P8-NEXT: stvx v2, r3, r5
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test8elt:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: lxv vs0, 16(r4)
; CHECK-P9-NEXT: lxv vs1, 0(r4)
; CHECK-P9-NEXT: xvcvspuxws vs1, vs1
; CHECK-P9-NEXT: xvcvspuxws vs0, vs0
; CHECK-P9-NEXT: stxv vs0, 16(r3)
; CHECK-P9-NEXT: stxv vs1, 0(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test8elt:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: lxv vs0, 16(r4)
; CHECK-BE-NEXT: lxv vs1, 0(r4)
; CHECK-BE-NEXT: xvcvspuxws vs1, vs1
; CHECK-BE-NEXT: xvcvspuxws vs0, vs0
; CHECK-BE-NEXT: stxv vs0, 16(r3)
; CHECK-BE-NEXT: stxv vs1, 0(r3)
; CHECK-BE-NEXT: blr
entry:
%a = load <8 x float>, <8 x float>* %0, align 32
%1 = fptoui <8 x float> %a to <8 x i32>
store <8 x i32> %1, <8 x i32>* %agg.result, align 32
ret void
}
define void @test16elt(<16 x i32>* noalias nocapture sret %agg.result, <16 x float>* nocapture readonly) local_unnamed_addr #2 {
; CHECK-P8-LABEL: test16elt:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: li r5, 16
; CHECK-P8-NEXT: li r6, 32
; CHECK-P8-NEXT: li r7, 48
; CHECK-P8-NEXT: lvx v5, 0, r4
; CHECK-P8-NEXT: lvx v2, r4, r5
; CHECK-P8-NEXT: lvx v3, r4, r6
; CHECK-P8-NEXT: lvx v4, r4, r7
; CHECK-P8-NEXT: xvcvspuxws v5, v5
; CHECK-P8-NEXT: xvcvspuxws v2, v2
; CHECK-P8-NEXT: xvcvspuxws v3, v3
; CHECK-P8-NEXT: xvcvspuxws v4, v4
; CHECK-P8-NEXT: stvx v5, 0, r3
; CHECK-P8-NEXT: stvx v2, r3, r5
; CHECK-P8-NEXT: stvx v3, r3, r6
; CHECK-P8-NEXT: stvx v4, r3, r7
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test16elt:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: lxv vs0, 48(r4)
; CHECK-P9-NEXT: lxv vs1, 32(r4)
; CHECK-P9-NEXT: lxv vs2, 16(r4)
; CHECK-P9-NEXT: lxv vs3, 0(r4)
; CHECK-P9-NEXT: xvcvspuxws vs3, vs3
; CHECK-P9-NEXT: xvcvspuxws vs2, vs2
; CHECK-P9-NEXT: xvcvspuxws vs1, vs1
; CHECK-P9-NEXT: xvcvspuxws vs0, vs0
; CHECK-P9-NEXT: stxv vs0, 48(r3)
; CHECK-P9-NEXT: stxv vs1, 32(r3)
; CHECK-P9-NEXT: stxv vs2, 16(r3)
; CHECK-P9-NEXT: stxv vs3, 0(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test16elt:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: lxv vs0, 48(r4)
; CHECK-BE-NEXT: lxv vs1, 32(r4)
; CHECK-BE-NEXT: lxv vs2, 16(r4)
; CHECK-BE-NEXT: lxv vs3, 0(r4)
; CHECK-BE-NEXT: xvcvspuxws vs3, vs3
; CHECK-BE-NEXT: xvcvspuxws vs2, vs2
; CHECK-BE-NEXT: xvcvspuxws vs1, vs1
; CHECK-BE-NEXT: xvcvspuxws vs0, vs0
; CHECK-BE-NEXT: stxv vs0, 48(r3)
; CHECK-BE-NEXT: stxv vs1, 32(r3)
; CHECK-BE-NEXT: stxv vs2, 16(r3)
; CHECK-BE-NEXT: stxv vs3, 0(r3)
; CHECK-BE-NEXT: blr
entry:
%a = load <16 x float>, <16 x float>* %0, align 64
%1 = fptoui <16 x float> %a to <16 x i32>
store <16 x i32> %1, <16 x i32>* %agg.result, align 64
ret void
}
define i64 @test2elt_signed(i64 %a.coerce) local_unnamed_addr #0 {
; CHECK-P8-LABEL: test2elt_signed:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: mtvsrd f0, r3
; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: xvcvspsxws vs0, v2
; CHECK-P8-NEXT: xxswapd vs0, vs0
; CHECK-P8-NEXT: mfvsrd r3, f0
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test2elt_signed:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: mtvsrd f0, r3
; CHECK-P9-NEXT: xxswapd v2, vs0
; CHECK-P9-NEXT: xvcvspsxws vs0, v2
; CHECK-P9-NEXT: mfvsrld r3, vs0
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test2elt_signed:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: mtvsrd f0, r3
; CHECK-BE-NEXT: xvcvspsxws vs0, vs0
; CHECK-BE-NEXT: mfvsrd r3, f0
; CHECK-BE-NEXT: blr
entry:
%0 = bitcast i64 %a.coerce to <2 x float>
%1 = fptosi <2 x float> %0 to <2 x i32>
%2 = bitcast <2 x i32> %1 to i64
ret i64 %2
}
define <4 x i32> @test4elt_signed(<4 x float> %a) local_unnamed_addr #1 {
; CHECK-P8-LABEL: test4elt_signed:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: xvcvspsxws v2, v2
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test4elt_signed:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: xvcvspsxws v2, v2
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test4elt_signed:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: xvcvspsxws v2, v2
; CHECK-BE-NEXT: blr
entry:
%0 = fptosi <4 x float> %a to <4 x i32>
ret <4 x i32> %0
}
define void @test8elt_signed(<8 x i32>* noalias nocapture sret %agg.result, <8 x float>* nocapture readonly) local_unnamed_addr #2 {
; CHECK-P8-LABEL: test8elt_signed:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: li r5, 16
; CHECK-P8-NEXT: lvx v3, 0, r4
; CHECK-P8-NEXT: lvx v2, r4, r5
; CHECK-P8-NEXT: xvcvspsxws v3, v3
; CHECK-P8-NEXT: xvcvspsxws v2, v2
; CHECK-P8-NEXT: stvx v3, 0, r3
; CHECK-P8-NEXT: stvx v2, r3, r5
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test8elt_signed:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: lxv vs0, 16(r4)
; CHECK-P9-NEXT: lxv vs1, 0(r4)
; CHECK-P9-NEXT: xvcvspsxws vs1, vs1
; CHECK-P9-NEXT: xvcvspsxws vs0, vs0
; CHECK-P9-NEXT: stxv vs0, 16(r3)
; CHECK-P9-NEXT: stxv vs1, 0(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test8elt_signed:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: lxv vs0, 16(r4)
; CHECK-BE-NEXT: lxv vs1, 0(r4)
; CHECK-BE-NEXT: xvcvspsxws vs1, vs1
; CHECK-BE-NEXT: xvcvspsxws vs0, vs0
; CHECK-BE-NEXT: stxv vs0, 16(r3)
; CHECK-BE-NEXT: stxv vs1, 0(r3)
; CHECK-BE-NEXT: blr
entry:
%a = load <8 x float>, <8 x float>* %0, align 32
%1 = fptosi <8 x float> %a to <8 x i32>
store <8 x i32> %1, <8 x i32>* %agg.result, align 32
ret void
}
define void @test16elt_signed(<16 x i32>* noalias nocapture sret %agg.result, <16 x float>* nocapture readonly) local_unnamed_addr #2 {
; CHECK-P8-LABEL: test16elt_signed:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: li r5, 16
; CHECK-P8-NEXT: li r6, 32
; CHECK-P8-NEXT: li r7, 48
; CHECK-P8-NEXT: lvx v5, 0, r4
; CHECK-P8-NEXT: lvx v2, r4, r5
; CHECK-P8-NEXT: lvx v3, r4, r6
; CHECK-P8-NEXT: lvx v4, r4, r7
; CHECK-P8-NEXT: xvcvspsxws v5, v5
; CHECK-P8-NEXT: xvcvspsxws v2, v2
; CHECK-P8-NEXT: xvcvspsxws v3, v3
; CHECK-P8-NEXT: xvcvspsxws v4, v4
; CHECK-P8-NEXT: stvx v5, 0, r3
; CHECK-P8-NEXT: stvx v2, r3, r5
; CHECK-P8-NEXT: stvx v3, r3, r6
; CHECK-P8-NEXT: stvx v4, r3, r7
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test16elt_signed:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: lxv vs0, 48(r4)
; CHECK-P9-NEXT: lxv vs1, 32(r4)
; CHECK-P9-NEXT: lxv vs2, 16(r4)
; CHECK-P9-NEXT: lxv vs3, 0(r4)
; CHECK-P9-NEXT: xvcvspsxws vs3, vs3
; CHECK-P9-NEXT: xvcvspsxws vs2, vs2
; CHECK-P9-NEXT: xvcvspsxws vs1, vs1
; CHECK-P9-NEXT: xvcvspsxws vs0, vs0
; CHECK-P9-NEXT: stxv vs0, 48(r3)
; CHECK-P9-NEXT: stxv vs1, 32(r3)
; CHECK-P9-NEXT: stxv vs2, 16(r3)
; CHECK-P9-NEXT: stxv vs3, 0(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test16elt_signed:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: lxv vs0, 48(r4)
; CHECK-BE-NEXT: lxv vs1, 32(r4)
; CHECK-BE-NEXT: lxv vs2, 16(r4)
; CHECK-BE-NEXT: lxv vs3, 0(r4)
; CHECK-BE-NEXT: xvcvspsxws vs3, vs3
; CHECK-BE-NEXT: xvcvspsxws vs2, vs2
; CHECK-BE-NEXT: xvcvspsxws vs1, vs1
; CHECK-BE-NEXT: xvcvspsxws vs0, vs0
; CHECK-BE-NEXT: stxv vs0, 48(r3)
; CHECK-BE-NEXT: stxv vs1, 32(r3)
; CHECK-BE-NEXT: stxv vs2, 16(r3)
; CHECK-BE-NEXT: stxv vs3, 0(r3)
; CHECK-BE-NEXT: blr
entry:
%a = load <16 x float>, <16 x float>* %0, align 64
%1 = fptosi <16 x float> %a to <16 x i32>
store <16 x i32> %1, <16 x i32>* %agg.result, align 64
ret void
}

View File

@ -0,0 +1,438 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
; RUN: FileCheck %s --check-prefix=CHECK-P8
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
; RUN: FileCheck %s --check-prefix=CHECK-P9
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
; RUN: FileCheck %s --check-prefix=CHECK-BE
define <2 x i64> @test2elt(<2 x double> %a) local_unnamed_addr #0 {
; CHECK-P8-LABEL: test2elt:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: xvcvdpuxds v2, v2
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test2elt:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: xvcvdpuxds v2, v2
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test2elt:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: xvcvdpuxds v2, v2
; CHECK-BE-NEXT: blr
entry:
%0 = fptoui <2 x double> %a to <2 x i64>
ret <2 x i64> %0
}
define void @test4elt(<4 x i64>* noalias nocapture sret %agg.result, <4 x double>* nocapture readonly) local_unnamed_addr #1 {
; CHECK-P8-LABEL: test4elt:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: li r5, 16
; CHECK-P8-NEXT: lxvd2x vs1, 0, r4
; CHECK-P8-NEXT: lxvd2x vs0, r4, r5
; CHECK-P8-NEXT: xvcvdpuxds vs1, vs1
; CHECK-P8-NEXT: xvcvdpuxds vs0, vs0
; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
; CHECK-P8-NEXT: stxvd2x vs1, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test4elt:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: lxv vs0, 16(r4)
; CHECK-P9-NEXT: lxv vs1, 0(r4)
; CHECK-P9-NEXT: xvcvdpuxds vs1, vs1
; CHECK-P9-NEXT: xvcvdpuxds vs0, vs0
; CHECK-P9-NEXT: stxv vs0, 16(r3)
; CHECK-P9-NEXT: stxv vs1, 0(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test4elt:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: lxv vs0, 16(r4)
; CHECK-BE-NEXT: lxv vs1, 0(r4)
; CHECK-BE-NEXT: xvcvdpuxds vs1, vs1
; CHECK-BE-NEXT: xvcvdpuxds vs0, vs0
; CHECK-BE-NEXT: stxv vs0, 16(r3)
; CHECK-BE-NEXT: stxv vs1, 0(r3)
; CHECK-BE-NEXT: blr
entry:
%a = load <4 x double>, <4 x double>* %0, align 32
%1 = fptoui <4 x double> %a to <4 x i64>
store <4 x i64> %1, <4 x i64>* %agg.result, align 32
ret void
}
define void @test8elt(<8 x i64>* noalias nocapture sret %agg.result, <8 x double>* nocapture readonly) local_unnamed_addr #1 {
; CHECK-P8-LABEL: test8elt:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: li r5, 16
; CHECK-P8-NEXT: li r6, 32
; CHECK-P8-NEXT: li r7, 48
; CHECK-P8-NEXT: lxvd2x vs3, 0, r4
; CHECK-P8-NEXT: lxvd2x vs0, r4, r5
; CHECK-P8-NEXT: lxvd2x vs1, r4, r6
; CHECK-P8-NEXT: lxvd2x vs2, r4, r7
; CHECK-P8-NEXT: xvcvdpuxds vs3, vs3
; CHECK-P8-NEXT: xvcvdpuxds vs0, vs0
; CHECK-P8-NEXT: xvcvdpuxds vs1, vs1
; CHECK-P8-NEXT: xvcvdpuxds vs2, vs2
; CHECK-P8-NEXT: stxvd2x vs2, r3, r7
; CHECK-P8-NEXT: stxvd2x vs1, r3, r6
; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
; CHECK-P8-NEXT: stxvd2x vs3, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test8elt:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: lxv vs0, 48(r4)
; CHECK-P9-NEXT: lxv vs1, 32(r4)
; CHECK-P9-NEXT: lxv vs2, 16(r4)
; CHECK-P9-NEXT: lxv vs3, 0(r4)
; CHECK-P9-NEXT: xvcvdpuxds vs3, vs3
; CHECK-P9-NEXT: xvcvdpuxds vs2, vs2
; CHECK-P9-NEXT: xvcvdpuxds vs1, vs1
; CHECK-P9-NEXT: xvcvdpuxds vs0, vs0
; CHECK-P9-NEXT: stxv vs0, 48(r3)
; CHECK-P9-NEXT: stxv vs1, 32(r3)
; CHECK-P9-NEXT: stxv vs2, 16(r3)
; CHECK-P9-NEXT: stxv vs3, 0(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test8elt:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: lxv vs0, 48(r4)
; CHECK-BE-NEXT: lxv vs1, 32(r4)
; CHECK-BE-NEXT: lxv vs2, 16(r4)
; CHECK-BE-NEXT: lxv vs3, 0(r4)
; CHECK-BE-NEXT: xvcvdpuxds vs3, vs3
; CHECK-BE-NEXT: xvcvdpuxds vs2, vs2
; CHECK-BE-NEXT: xvcvdpuxds vs1, vs1
; CHECK-BE-NEXT: xvcvdpuxds vs0, vs0
; CHECK-BE-NEXT: stxv vs0, 48(r3)
; CHECK-BE-NEXT: stxv vs1, 32(r3)
; CHECK-BE-NEXT: stxv vs2, 16(r3)
; CHECK-BE-NEXT: stxv vs3, 0(r3)
; CHECK-BE-NEXT: blr
entry:
%a = load <8 x double>, <8 x double>* %0, align 64
%1 = fptoui <8 x double> %a to <8 x i64>
store <8 x i64> %1, <8 x i64>* %agg.result, align 64
ret void
}
define void @test16elt(<16 x i64>* noalias nocapture sret %agg.result, <16 x double>* nocapture readonly) local_unnamed_addr #1 {
; CHECK-P8-LABEL: test16elt:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: li r5, 16
; CHECK-P8-NEXT: li r6, 32
; CHECK-P8-NEXT: li r7, 64
; CHECK-P8-NEXT: li r8, 96
; CHECK-P8-NEXT: li r9, 112
; CHECK-P8-NEXT: li r10, 80
; CHECK-P8-NEXT: li r11, 48
; CHECK-P8-NEXT: lxvd2x vs0, r4, r5
; CHECK-P8-NEXT: lxvd2x vs1, r4, r6
; CHECK-P8-NEXT: lxvd2x vs2, r4, r7
; CHECK-P8-NEXT: lxvd2x vs3, r4, r8
; CHECK-P8-NEXT: lxvd2x vs4, r4, r9
; CHECK-P8-NEXT: lxvd2x vs5, r4, r10
; CHECK-P8-NEXT: lxvd2x vs6, r4, r11
; CHECK-P8-NEXT: lxvd2x vs7, 0, r4
; CHECK-P8-NEXT: xvcvdpuxds vs0, vs0
; CHECK-P8-NEXT: xvcvdpuxds vs1, vs1
; CHECK-P8-NEXT: xvcvdpuxds vs2, vs2
; CHECK-P8-NEXT: xvcvdpuxds vs3, vs3
; CHECK-P8-NEXT: xvcvdpuxds vs4, vs4
; CHECK-P8-NEXT: xvcvdpuxds vs5, vs5
; CHECK-P8-NEXT: xvcvdpuxds vs6, vs6
; CHECK-P8-NEXT: xvcvdpuxds vs7, vs7
; CHECK-P8-NEXT: stxvd2x vs4, r3, r9
; CHECK-P8-NEXT: stxvd2x vs3, r3, r8
; CHECK-P8-NEXT: stxvd2x vs5, r3, r10
; CHECK-P8-NEXT: stxvd2x vs2, r3, r7
; CHECK-P8-NEXT: stxvd2x vs6, r3, r11
; CHECK-P8-NEXT: stxvd2x vs1, r3, r6
; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
; CHECK-P8-NEXT: stxvd2x vs7, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test16elt:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: lxv vs0, 48(r4)
; CHECK-P9-NEXT: lxv vs1, 32(r4)
; CHECK-P9-NEXT: lxv vs2, 16(r4)
; CHECK-P9-NEXT: lxv vs3, 0(r4)
; CHECK-P9-NEXT: lxv vs4, 112(r4)
; CHECK-P9-NEXT: lxv vs5, 96(r4)
; CHECK-P9-NEXT: lxv vs6, 80(r4)
; CHECK-P9-NEXT: lxv vs7, 64(r4)
; CHECK-P9-NEXT: xvcvdpuxds vs3, vs3
; CHECK-P9-NEXT: xvcvdpuxds vs2, vs2
; CHECK-P9-NEXT: xvcvdpuxds vs1, vs1
; CHECK-P9-NEXT: xvcvdpuxds vs0, vs0
; CHECK-P9-NEXT: xvcvdpuxds vs7, vs7
; CHECK-P9-NEXT: xvcvdpuxds vs6, vs6
; CHECK-P9-NEXT: xvcvdpuxds vs5, vs5
; CHECK-P9-NEXT: xvcvdpuxds vs4, vs4
; CHECK-P9-NEXT: stxv vs0, 48(r3)
; CHECK-P9-NEXT: stxv vs1, 32(r3)
; CHECK-P9-NEXT: stxv vs2, 16(r3)
; CHECK-P9-NEXT: stxv vs3, 0(r3)
; CHECK-P9-NEXT: stxv vs4, 112(r3)
; CHECK-P9-NEXT: stxv vs5, 96(r3)
; CHECK-P9-NEXT: stxv vs6, 80(r3)
; CHECK-P9-NEXT: stxv vs7, 64(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test16elt:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: lxv vs0, 48(r4)
; CHECK-BE-NEXT: lxv vs1, 32(r4)
; CHECK-BE-NEXT: lxv vs2, 16(r4)
; CHECK-BE-NEXT: lxv vs3, 0(r4)
; CHECK-BE-NEXT: lxv vs4, 112(r4)
; CHECK-BE-NEXT: lxv vs5, 96(r4)
; CHECK-BE-NEXT: lxv vs6, 80(r4)
; CHECK-BE-NEXT: lxv vs7, 64(r4)
; CHECK-BE-NEXT: xvcvdpuxds vs3, vs3
; CHECK-BE-NEXT: xvcvdpuxds vs2, vs2
; CHECK-BE-NEXT: xvcvdpuxds vs1, vs1
; CHECK-BE-NEXT: xvcvdpuxds vs0, vs0
; CHECK-BE-NEXT: xvcvdpuxds vs7, vs7
; CHECK-BE-NEXT: xvcvdpuxds vs6, vs6
; CHECK-BE-NEXT: xvcvdpuxds vs5, vs5
; CHECK-BE-NEXT: xvcvdpuxds vs4, vs4
; CHECK-BE-NEXT: stxv vs0, 48(r3)
; CHECK-BE-NEXT: stxv vs1, 32(r3)
; CHECK-BE-NEXT: stxv vs2, 16(r3)
; CHECK-BE-NEXT: stxv vs3, 0(r3)
; CHECK-BE-NEXT: stxv vs4, 112(r3)
; CHECK-BE-NEXT: stxv vs5, 96(r3)
; CHECK-BE-NEXT: stxv vs6, 80(r3)
; CHECK-BE-NEXT: stxv vs7, 64(r3)
; CHECK-BE-NEXT: blr
entry:
%a = load <16 x double>, <16 x double>* %0, align 128
%1 = fptoui <16 x double> %a to <16 x i64>
store <16 x i64> %1, <16 x i64>* %agg.result, align 128
ret void
}
define <2 x i64> @test2elt_signed(<2 x double> %a) local_unnamed_addr #0 {
; CHECK-P8-LABEL: test2elt_signed:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: xvcvdpsxds v2, v2
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test2elt_signed:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: xvcvdpsxds v2, v2
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test2elt_signed:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: xvcvdpsxds v2, v2
; CHECK-BE-NEXT: blr
entry:
%0 = fptosi <2 x double> %a to <2 x i64>
ret <2 x i64> %0
}
define void @test4elt_signed(<4 x i64>* noalias nocapture sret %agg.result, <4 x double>* nocapture readonly) local_unnamed_addr #1 {
; CHECK-P8-LABEL: test4elt_signed:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: li r5, 16
; CHECK-P8-NEXT: lxvd2x vs1, 0, r4
; CHECK-P8-NEXT: lxvd2x vs0, r4, r5
; CHECK-P8-NEXT: xvcvdpsxds vs1, vs1
; CHECK-P8-NEXT: xvcvdpsxds vs0, vs0
; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
; CHECK-P8-NEXT: stxvd2x vs1, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test4elt_signed:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: lxv vs0, 16(r4)
; CHECK-P9-NEXT: lxv vs1, 0(r4)
; CHECK-P9-NEXT: xvcvdpsxds vs1, vs1
; CHECK-P9-NEXT: xvcvdpsxds vs0, vs0
; CHECK-P9-NEXT: stxv vs0, 16(r3)
; CHECK-P9-NEXT: stxv vs1, 0(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test4elt_signed:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: lxv vs0, 16(r4)
; CHECK-BE-NEXT: lxv vs1, 0(r4)
; CHECK-BE-NEXT: xvcvdpsxds vs1, vs1
; CHECK-BE-NEXT: xvcvdpsxds vs0, vs0
; CHECK-BE-NEXT: stxv vs0, 16(r3)
; CHECK-BE-NEXT: stxv vs1, 0(r3)
; CHECK-BE-NEXT: blr
entry:
%a = load <4 x double>, <4 x double>* %0, align 32
%1 = fptosi <4 x double> %a to <4 x i64>
store <4 x i64> %1, <4 x i64>* %agg.result, align 32
ret void
}
define void @test8elt_signed(<8 x i64>* noalias nocapture sret %agg.result, <8 x double>* nocapture readonly) local_unnamed_addr #1 {
; CHECK-P8-LABEL: test8elt_signed:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: li r5, 16
; CHECK-P8-NEXT: li r6, 32
; CHECK-P8-NEXT: li r7, 48
; CHECK-P8-NEXT: lxvd2x vs3, 0, r4
; CHECK-P8-NEXT: lxvd2x vs0, r4, r5
; CHECK-P8-NEXT: lxvd2x vs1, r4, r6
; CHECK-P8-NEXT: lxvd2x vs2, r4, r7
; CHECK-P8-NEXT: xvcvdpsxds vs3, vs3
; CHECK-P8-NEXT: xvcvdpsxds vs0, vs0
; CHECK-P8-NEXT: xvcvdpsxds vs1, vs1
; CHECK-P8-NEXT: xvcvdpsxds vs2, vs2
; CHECK-P8-NEXT: stxvd2x vs2, r3, r7
; CHECK-P8-NEXT: stxvd2x vs1, r3, r6
; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
; CHECK-P8-NEXT: stxvd2x vs3, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test8elt_signed:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: lxv vs0, 48(r4)
; CHECK-P9-NEXT: lxv vs1, 32(r4)
; CHECK-P9-NEXT: lxv vs2, 16(r4)
; CHECK-P9-NEXT: lxv vs3, 0(r4)
; CHECK-P9-NEXT: xvcvdpsxds vs3, vs3
; CHECK-P9-NEXT: xvcvdpsxds vs2, vs2
; CHECK-P9-NEXT: xvcvdpsxds vs1, vs1
; CHECK-P9-NEXT: xvcvdpsxds vs0, vs0
; CHECK-P9-NEXT: stxv vs0, 48(r3)
; CHECK-P9-NEXT: stxv vs1, 32(r3)
; CHECK-P9-NEXT: stxv vs2, 16(r3)
; CHECK-P9-NEXT: stxv vs3, 0(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test8elt_signed:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: lxv vs0, 48(r4)
; CHECK-BE-NEXT: lxv vs1, 32(r4)
; CHECK-BE-NEXT: lxv vs2, 16(r4)
; CHECK-BE-NEXT: lxv vs3, 0(r4)
; CHECK-BE-NEXT: xvcvdpsxds vs3, vs3
; CHECK-BE-NEXT: xvcvdpsxds vs2, vs2
; CHECK-BE-NEXT: xvcvdpsxds vs1, vs1
; CHECK-BE-NEXT: xvcvdpsxds vs0, vs0
; CHECK-BE-NEXT: stxv vs0, 48(r3)
; CHECK-BE-NEXT: stxv vs1, 32(r3)
; CHECK-BE-NEXT: stxv vs2, 16(r3)
; CHECK-BE-NEXT: stxv vs3, 0(r3)
; CHECK-BE-NEXT: blr
entry:
%a = load <8 x double>, <8 x double>* %0, align 64
%1 = fptosi <8 x double> %a to <8 x i64>
store <8 x i64> %1, <8 x i64>* %agg.result, align 64
ret void
}
define void @test16elt_signed(<16 x i64>* noalias nocapture sret %agg.result, <16 x double>* nocapture readonly) local_unnamed_addr #1 {
; CHECK-P8-LABEL: test16elt_signed:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: li r5, 16
; CHECK-P8-NEXT: li r6, 32
; CHECK-P8-NEXT: li r7, 64
; CHECK-P8-NEXT: li r8, 96
; CHECK-P8-NEXT: li r9, 112
; CHECK-P8-NEXT: li r10, 80
; CHECK-P8-NEXT: li r11, 48
; CHECK-P8-NEXT: lxvd2x vs0, r4, r5
; CHECK-P8-NEXT: lxvd2x vs1, r4, r6
; CHECK-P8-NEXT: lxvd2x vs2, r4, r7
; CHECK-P8-NEXT: lxvd2x vs3, r4, r8
; CHECK-P8-NEXT: lxvd2x vs4, r4, r9
; CHECK-P8-NEXT: lxvd2x vs5, r4, r10
; CHECK-P8-NEXT: lxvd2x vs6, r4, r11
; CHECK-P8-NEXT: lxvd2x vs7, 0, r4
; CHECK-P8-NEXT: xvcvdpsxds vs0, vs0
; CHECK-P8-NEXT: xvcvdpsxds vs1, vs1
; CHECK-P8-NEXT: xvcvdpsxds vs2, vs2
; CHECK-P8-NEXT: xvcvdpsxds vs3, vs3
; CHECK-P8-NEXT: xvcvdpsxds vs4, vs4
; CHECK-P8-NEXT: xvcvdpsxds vs5, vs5
; CHECK-P8-NEXT: xvcvdpsxds vs6, vs6
; CHECK-P8-NEXT: xvcvdpsxds vs7, vs7
; CHECK-P8-NEXT: stxvd2x vs4, r3, r9
; CHECK-P8-NEXT: stxvd2x vs3, r3, r8
; CHECK-P8-NEXT: stxvd2x vs5, r3, r10
; CHECK-P8-NEXT: stxvd2x vs2, r3, r7
; CHECK-P8-NEXT: stxvd2x vs6, r3, r11
; CHECK-P8-NEXT: stxvd2x vs1, r3, r6
; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
; CHECK-P8-NEXT: stxvd2x vs7, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test16elt_signed:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: lxv vs0, 48(r4)
; CHECK-P9-NEXT: lxv vs1, 32(r4)
; CHECK-P9-NEXT: lxv vs2, 16(r4)
; CHECK-P9-NEXT: lxv vs3, 0(r4)
; CHECK-P9-NEXT: lxv vs4, 112(r4)
; CHECK-P9-NEXT: lxv vs5, 96(r4)
; CHECK-P9-NEXT: lxv vs6, 80(r4)
; CHECK-P9-NEXT: lxv vs7, 64(r4)
; CHECK-P9-NEXT: xvcvdpsxds vs3, vs3
; CHECK-P9-NEXT: xvcvdpsxds vs2, vs2
; CHECK-P9-NEXT: xvcvdpsxds vs1, vs1
; CHECK-P9-NEXT: xvcvdpsxds vs0, vs0
; CHECK-P9-NEXT: xvcvdpsxds vs7, vs7
; CHECK-P9-NEXT: xvcvdpsxds vs6, vs6
; CHECK-P9-NEXT: xvcvdpsxds vs5, vs5
; CHECK-P9-NEXT: xvcvdpsxds vs4, vs4
; CHECK-P9-NEXT: stxv vs0, 48(r3)
; CHECK-P9-NEXT: stxv vs1, 32(r3)
; CHECK-P9-NEXT: stxv vs2, 16(r3)
; CHECK-P9-NEXT: stxv vs3, 0(r3)
; CHECK-P9-NEXT: stxv vs4, 112(r3)
; CHECK-P9-NEXT: stxv vs5, 96(r3)
; CHECK-P9-NEXT: stxv vs6, 80(r3)
; CHECK-P9-NEXT: stxv vs7, 64(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test16elt_signed:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: lxv vs0, 48(r4)
; CHECK-BE-NEXT: lxv vs1, 32(r4)
; CHECK-BE-NEXT: lxv vs2, 16(r4)
; CHECK-BE-NEXT: lxv vs3, 0(r4)
; CHECK-BE-NEXT: lxv vs4, 112(r4)
; CHECK-BE-NEXT: lxv vs5, 96(r4)
; CHECK-BE-NEXT: lxv vs6, 80(r4)
; CHECK-BE-NEXT: lxv vs7, 64(r4)
; CHECK-BE-NEXT: xvcvdpsxds vs3, vs3
; CHECK-BE-NEXT: xvcvdpsxds vs2, vs2
; CHECK-BE-NEXT: xvcvdpsxds vs1, vs1
; CHECK-BE-NEXT: xvcvdpsxds vs0, vs0
; CHECK-BE-NEXT: xvcvdpsxds vs7, vs7
; CHECK-BE-NEXT: xvcvdpsxds vs6, vs6
; CHECK-BE-NEXT: xvcvdpsxds vs5, vs5
; CHECK-BE-NEXT: xvcvdpsxds vs4, vs4
; CHECK-BE-NEXT: stxv vs0, 48(r3)
; CHECK-BE-NEXT: stxv vs1, 32(r3)
; CHECK-BE-NEXT: stxv vs2, 16(r3)
; CHECK-BE-NEXT: stxv vs3, 0(r3)
; CHECK-BE-NEXT: stxv vs4, 112(r3)
; CHECK-BE-NEXT: stxv vs5, 96(r3)
; CHECK-BE-NEXT: stxv vs6, 80(r3)
; CHECK-BE-NEXT: stxv vs7, 64(r3)
; CHECK-BE-NEXT: blr
entry:
%a = load <16 x double>, <16 x double>* %0, align 128
%1 = fptosi <16 x double> %a to <16 x i64>
store <16 x i64> %1, <16 x i64>* %agg.result, align 128
ret void
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,828 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
; RUN: FileCheck %s --check-prefix=CHECK-P8
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
; RUN: FileCheck %s --check-prefix=CHECK-P9
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
; RUN: FileCheck %s --check-prefix=CHECK-BE
define <2 x double> @test2elt(i32 %a.coerce) local_unnamed_addr #0 {
; CHECK-P8-LABEL: test2elt:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: addis r4, r2, .LCPI0_0@toc@ha
; CHECK-P8-NEXT: mtvsrd f0, r3
; CHECK-P8-NEXT: addi r3, r4, .LCPI0_0@toc@l
; CHECK-P8-NEXT: xxlxor v4, v4, v4
; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: lvx v3, 0, r3
; CHECK-P8-NEXT: vperm v2, v4, v2, v3
; CHECK-P8-NEXT: xvcvuxddp v2, v2
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test2elt:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: addis r4, r2, .LCPI0_0@toc@ha
; CHECK-P9-NEXT: mtvsrws v3, r3
; CHECK-P9-NEXT: xxlxor v4, v4, v4
; CHECK-P9-NEXT: addi r4, r4, .LCPI0_0@toc@l
; CHECK-P9-NEXT: lxvx v2, 0, r4
; CHECK-P9-NEXT: vperm v2, v4, v3, v2
; CHECK-P9-NEXT: xvcvuxddp v2, v2
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test2elt:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: addis r4, r2, .LCPI0_0@toc@ha
; CHECK-BE-NEXT: mtvsrws v3, r3
; CHECK-BE-NEXT: xxlxor v4, v4, v4
; CHECK-BE-NEXT: addi r4, r4, .LCPI0_0@toc@l
; CHECK-BE-NEXT: lxvx v2, 0, r4
; CHECK-BE-NEXT: vperm v2, v3, v4, v2
; CHECK-BE-NEXT: xvcvuxddp v2, v2
; CHECK-BE-NEXT: blr
entry:
%0 = bitcast i32 %a.coerce to <2 x i16>
%1 = uitofp <2 x i16> %0 to <2 x double>
ret <2 x double> %1
}
define void @test4elt(<4 x double>* noalias nocapture sret %agg.result, i64 %a.coerce) local_unnamed_addr #1 {
; CHECK-P8-LABEL: test4elt:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: addis r5, r2, .LCPI1_0@toc@ha
; CHECK-P8-NEXT: mtvsrd f0, r4
; CHECK-P8-NEXT: addis r4, r2, .LCPI1_1@toc@ha
; CHECK-P8-NEXT: addi r5, r5, .LCPI1_0@toc@l
; CHECK-P8-NEXT: addi r4, r4, .LCPI1_1@toc@l
; CHECK-P8-NEXT: xxlxor v4, v4, v4
; CHECK-P8-NEXT: lvx v2, 0, r5
; CHECK-P8-NEXT: xxswapd v3, vs0
; CHECK-P8-NEXT: lvx v5, 0, r4
; CHECK-P8-NEXT: li r4, 16
; CHECK-P8-NEXT: vperm v2, v4, v3, v2
; CHECK-P8-NEXT: vperm v3, v4, v3, v5
; CHECK-P8-NEXT: xvcvuxddp vs0, v2
; CHECK-P8-NEXT: xvcvuxddp vs1, v3
; CHECK-P8-NEXT: xxswapd vs0, vs0
; CHECK-P8-NEXT: xxswapd vs1, vs1
; CHECK-P8-NEXT: stxvd2x vs1, r3, r4
; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test4elt:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: addis r5, r2, .LCPI1_0@toc@ha
; CHECK-P9-NEXT: addis r6, r2, .LCPI1_1@toc@ha
; CHECK-P9-NEXT: mtvsrd f0, r4
; CHECK-P9-NEXT: xxlxor v5, v5, v5
; CHECK-P9-NEXT: addi r5, r5, .LCPI1_0@toc@l
; CHECK-P9-NEXT: addi r6, r6, .LCPI1_1@toc@l
; CHECK-P9-NEXT: xxswapd v4, vs0
; CHECK-P9-NEXT: lxvx v2, 0, r5
; CHECK-P9-NEXT: lxvx v3, 0, r6
; CHECK-P9-NEXT: vperm v2, v5, v4, v2
; CHECK-P9-NEXT: vperm v3, v5, v4, v3
; CHECK-P9-NEXT: xvcvuxddp vs0, v2
; CHECK-P9-NEXT: xvcvuxddp vs1, v3
; CHECK-P9-NEXT: stxv vs1, 16(r3)
; CHECK-P9-NEXT: stxv vs0, 0(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test4elt:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: addis r5, r2, .LCPI1_0@toc@ha
; CHECK-BE-NEXT: addis r6, r2, .LCPI1_1@toc@ha
; CHECK-BE-NEXT: mtvsrd v4, r4
; CHECK-BE-NEXT: xxlxor v5, v5, v5
; CHECK-BE-NEXT: addi r5, r5, .LCPI1_0@toc@l
; CHECK-BE-NEXT: addi r6, r6, .LCPI1_1@toc@l
; CHECK-BE-NEXT: lxvx v2, 0, r5
; CHECK-BE-NEXT: lxvx v3, 0, r6
; CHECK-BE-NEXT: vperm v2, v4, v5, v2
; CHECK-BE-NEXT: vperm v3, v5, v4, v3
; CHECK-BE-NEXT: xvcvuxddp vs0, v2
; CHECK-BE-NEXT: xvcvuxddp vs1, v3
; CHECK-BE-NEXT: stxv vs1, 16(r3)
; CHECK-BE-NEXT: stxv vs0, 0(r3)
; CHECK-BE-NEXT: blr
entry:
%0 = bitcast i64 %a.coerce to <4 x i16>
%1 = uitofp <4 x i16> %0 to <4 x double>
store <4 x double> %1, <4 x double>* %agg.result, align 32
ret void
}
define void @test8elt(<8 x double>* noalias nocapture sret %agg.result, <8 x i16> %a) local_unnamed_addr #2 {
; CHECK-P8-LABEL: test8elt:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: addis r4, r2, .LCPI2_0@toc@ha
; CHECK-P8-NEXT: addis r5, r2, .LCPI2_2@toc@ha
; CHECK-P8-NEXT: xxlxor v4, v4, v4
; CHECK-P8-NEXT: addi r4, r4, .LCPI2_0@toc@l
; CHECK-P8-NEXT: addi r5, r5, .LCPI2_2@toc@l
; CHECK-P8-NEXT: lvx v3, 0, r4
; CHECK-P8-NEXT: addis r4, r2, .LCPI2_3@toc@ha
; CHECK-P8-NEXT: lvx v5, 0, r5
; CHECK-P8-NEXT: addis r5, r2, .LCPI2_1@toc@ha
; CHECK-P8-NEXT: addi r4, r4, .LCPI2_3@toc@l
; CHECK-P8-NEXT: addi r5, r5, .LCPI2_1@toc@l
; CHECK-P8-NEXT: lvx v0, 0, r4
; CHECK-P8-NEXT: lvx v1, 0, r5
; CHECK-P8-NEXT: li r4, 48
; CHECK-P8-NEXT: li r5, 32
; CHECK-P8-NEXT: vperm v3, v4, v2, v3
; CHECK-P8-NEXT: vperm v5, v4, v2, v5
; CHECK-P8-NEXT: vperm v0, v4, v2, v0
; CHECK-P8-NEXT: vperm v2, v4, v2, v1
; CHECK-P8-NEXT: xvcvuxddp vs0, v3
; CHECK-P8-NEXT: xvcvuxddp vs1, v5
; CHECK-P8-NEXT: xvcvuxddp vs2, v0
; CHECK-P8-NEXT: xvcvuxddp vs3, v2
; CHECK-P8-NEXT: xxswapd vs0, vs0
; CHECK-P8-NEXT: xxswapd vs1, vs1
; CHECK-P8-NEXT: xxswapd vs2, vs2
; CHECK-P8-NEXT: xxswapd vs3, vs3
; CHECK-P8-NEXT: stxvd2x vs2, r3, r4
; CHECK-P8-NEXT: li r4, 16
; CHECK-P8-NEXT: stxvd2x vs1, r3, r5
; CHECK-P8-NEXT: stxvd2x vs3, r3, r4
; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test8elt:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: addis r4, r2, .LCPI2_0@toc@ha
; CHECK-P9-NEXT: addis r5, r2, .LCPI2_1@toc@ha
; CHECK-P9-NEXT: addis r6, r2, .LCPI2_2@toc@ha
; CHECK-P9-NEXT: addis r7, r2, .LCPI2_3@toc@ha
; CHECK-P9-NEXT: xxlxor v1, v1, v1
; CHECK-P9-NEXT: addi r4, r4, .LCPI2_0@toc@l
; CHECK-P9-NEXT: addi r5, r5, .LCPI2_1@toc@l
; CHECK-P9-NEXT: addi r6, r6, .LCPI2_2@toc@l
; CHECK-P9-NEXT: addi r7, r7, .LCPI2_3@toc@l
; CHECK-P9-NEXT: lxvx v3, 0, r4
; CHECK-P9-NEXT: lxvx v4, 0, r5
; CHECK-P9-NEXT: lxvx v5, 0, r6
; CHECK-P9-NEXT: lxvx v0, 0, r7
; CHECK-P9-NEXT: vperm v3, v1, v2, v3
; CHECK-P9-NEXT: vperm v4, v1, v2, v4
; CHECK-P9-NEXT: vperm v5, v1, v2, v5
; CHECK-P9-NEXT: vperm v2, v1, v2, v0
; CHECK-P9-NEXT: xvcvuxddp vs0, v3
; CHECK-P9-NEXT: xvcvuxddp vs1, v4
; CHECK-P9-NEXT: xvcvuxddp vs2, v5
; CHECK-P9-NEXT: xvcvuxddp vs3, v2
; CHECK-P9-NEXT: stxv vs3, 48(r3)
; CHECK-P9-NEXT: stxv vs2, 32(r3)
; CHECK-P9-NEXT: stxv vs1, 16(r3)
; CHECK-P9-NEXT: stxv vs0, 0(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test8elt:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: addis r4, r2, .LCPI2_0@toc@ha
; CHECK-BE-NEXT: addis r5, r2, .LCPI2_1@toc@ha
; CHECK-BE-NEXT: addis r6, r2, .LCPI2_2@toc@ha
; CHECK-BE-NEXT: addis r7, r2, .LCPI2_3@toc@ha
; CHECK-BE-NEXT: xxlxor v1, v1, v1
; CHECK-BE-NEXT: addi r4, r4, .LCPI2_0@toc@l
; CHECK-BE-NEXT: addi r5, r5, .LCPI2_1@toc@l
; CHECK-BE-NEXT: addi r6, r6, .LCPI2_2@toc@l
; CHECK-BE-NEXT: addi r7, r7, .LCPI2_3@toc@l
; CHECK-BE-NEXT: lxvx v3, 0, r4
; CHECK-BE-NEXT: lxvx v4, 0, r5
; CHECK-BE-NEXT: lxvx v5, 0, r6
; CHECK-BE-NEXT: lxvx v0, 0, r7
; CHECK-BE-NEXT: vperm v3, v2, v1, v3
; CHECK-BE-NEXT: vperm v4, v1, v2, v4
; CHECK-BE-NEXT: vperm v5, v1, v2, v5
; CHECK-BE-NEXT: vperm v2, v1, v2, v0
; CHECK-BE-NEXT: xvcvuxddp vs0, v3
; CHECK-BE-NEXT: xvcvuxddp vs1, v4
; CHECK-BE-NEXT: xvcvuxddp vs2, v5
; CHECK-BE-NEXT: xvcvuxddp vs3, v2
; CHECK-BE-NEXT: stxv vs3, 48(r3)
; CHECK-BE-NEXT: stxv vs2, 32(r3)
; CHECK-BE-NEXT: stxv vs1, 16(r3)
; CHECK-BE-NEXT: stxv vs0, 0(r3)
; CHECK-BE-NEXT: blr
entry:
%0 = uitofp <8 x i16> %a to <8 x double>
store <8 x double> %0, <8 x double>* %agg.result, align 64
ret void
}
define void @test16elt(<16 x double>* noalias nocapture sret %agg.result, <16 x i16>* nocapture readonly) local_unnamed_addr #3 {
; CHECK-P8-LABEL: test16elt:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: addis r6, r2, .LCPI3_2@toc@ha
; CHECK-P8-NEXT: addis r5, r2, .LCPI3_0@toc@ha
; CHECK-P8-NEXT: lvx v4, 0, r4
; CHECK-P8-NEXT: xxlxor v3, v3, v3
; CHECK-P8-NEXT: addi r6, r6, .LCPI3_2@toc@l
; CHECK-P8-NEXT: addi r5, r5, .LCPI3_0@toc@l
; CHECK-P8-NEXT: lvx v5, 0, r6
; CHECK-P8-NEXT: li r6, 16
; CHECK-P8-NEXT: lvx v2, 0, r5
; CHECK-P8-NEXT: addis r5, r2, .LCPI3_1@toc@ha
; CHECK-P8-NEXT: lvx v0, r4, r6
; CHECK-P8-NEXT: addis r4, r2, .LCPI3_3@toc@ha
; CHECK-P8-NEXT: addi r5, r5, .LCPI3_1@toc@l
; CHECK-P8-NEXT: addi r4, r4, .LCPI3_3@toc@l
; CHECK-P8-NEXT: lvx v1, 0, r5
; CHECK-P8-NEXT: li r5, 96
; CHECK-P8-NEXT: lvx v8, 0, r4
; CHECK-P8-NEXT: vperm v6, v3, v4, v2
; CHECK-P8-NEXT: li r4, 112
; CHECK-P8-NEXT: vperm v7, v3, v4, v5
; CHECK-P8-NEXT: vperm v2, v3, v0, v2
; CHECK-P8-NEXT: vperm v9, v3, v0, v1
; CHECK-P8-NEXT: vperm v5, v3, v0, v5
; CHECK-P8-NEXT: vperm v0, v3, v0, v8
; CHECK-P8-NEXT: vperm v1, v3, v4, v1
; CHECK-P8-NEXT: vperm v3, v3, v4, v8
; CHECK-P8-NEXT: xvcvuxddp vs1, v2
; CHECK-P8-NEXT: xvcvuxddp vs4, v9
; CHECK-P8-NEXT: xvcvuxddp vs2, v5
; CHECK-P8-NEXT: xvcvuxddp vs3, v0
; CHECK-P8-NEXT: xvcvuxddp vs0, v7
; CHECK-P8-NEXT: xvcvuxddp vs5, v3
; CHECK-P8-NEXT: xvcvuxddp vs6, v6
; CHECK-P8-NEXT: xxswapd vs1, vs1
; CHECK-P8-NEXT: xvcvuxddp vs7, v1
; CHECK-P8-NEXT: xxswapd vs4, vs4
; CHECK-P8-NEXT: xxswapd vs2, vs2
; CHECK-P8-NEXT: xxswapd vs3, vs3
; CHECK-P8-NEXT: xxswapd vs0, vs0
; CHECK-P8-NEXT: xxswapd vs5, vs5
; CHECK-P8-NEXT: stxvd2x vs3, r3, r4
; CHECK-P8-NEXT: stxvd2x vs2, r3, r5
; CHECK-P8-NEXT: li r4, 80
; CHECK-P8-NEXT: li r5, 64
; CHECK-P8-NEXT: xxswapd vs2, vs7
; CHECK-P8-NEXT: xxswapd vs3, vs6
; CHECK-P8-NEXT: stxvd2x vs4, r3, r4
; CHECK-P8-NEXT: li r4, 48
; CHECK-P8-NEXT: stxvd2x vs1, r3, r5
; CHECK-P8-NEXT: li r5, 32
; CHECK-P8-NEXT: stxvd2x vs5, r3, r4
; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
; CHECK-P8-NEXT: stxvd2x vs2, r3, r6
; CHECK-P8-NEXT: stxvd2x vs3, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test16elt:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: addis r5, r2, .LCPI3_0@toc@ha
; CHECK-P9-NEXT: addis r6, r2, .LCPI3_1@toc@ha
; CHECK-P9-NEXT: addis r7, r2, .LCPI3_2@toc@ha
; CHECK-P9-NEXT: addis r8, r2, .LCPI3_3@toc@ha
; CHECK-P9-NEXT: lxv v0, 0(r4)
; CHECK-P9-NEXT: lxv v1, 16(r4)
; CHECK-P9-NEXT: xxlxor v6, v6, v6
; CHECK-P9-NEXT: addi r5, r5, .LCPI3_0@toc@l
; CHECK-P9-NEXT: addi r6, r6, .LCPI3_1@toc@l
; CHECK-P9-NEXT: addi r7, r7, .LCPI3_2@toc@l
; CHECK-P9-NEXT: addi r8, r8, .LCPI3_3@toc@l
; CHECK-P9-NEXT: lxvx v2, 0, r5
; CHECK-P9-NEXT: lxvx v3, 0, r6
; CHECK-P9-NEXT: lxvx v4, 0, r7
; CHECK-P9-NEXT: lxvx v5, 0, r8
; CHECK-P9-NEXT: vperm v7, v6, v0, v2
; CHECK-P9-NEXT: vperm v8, v6, v0, v3
; CHECK-P9-NEXT: vperm v9, v6, v0, v4
; CHECK-P9-NEXT: vperm v0, v6, v0, v5
; CHECK-P9-NEXT: vperm v2, v6, v1, v2
; CHECK-P9-NEXT: vperm v3, v6, v1, v3
; CHECK-P9-NEXT: vperm v4, v6, v1, v4
; CHECK-P9-NEXT: vperm v5, v6, v1, v5
; CHECK-P9-NEXT: xvcvuxddp vs0, v7
; CHECK-P9-NEXT: xvcvuxddp vs1, v8
; CHECK-P9-NEXT: xvcvuxddp vs2, v9
; CHECK-P9-NEXT: xvcvuxddp vs3, v0
; CHECK-P9-NEXT: xvcvuxddp vs4, v2
; CHECK-P9-NEXT: xvcvuxddp vs5, v3
; CHECK-P9-NEXT: xvcvuxddp vs6, v4
; CHECK-P9-NEXT: xvcvuxddp vs7, v5
; CHECK-P9-NEXT: stxv vs3, 48(r3)
; CHECK-P9-NEXT: stxv vs2, 32(r3)
; CHECK-P9-NEXT: stxv vs1, 16(r3)
; CHECK-P9-NEXT: stxv vs0, 0(r3)
; CHECK-P9-NEXT: stxv vs7, 112(r3)
; CHECK-P9-NEXT: stxv vs6, 96(r3)
; CHECK-P9-NEXT: stxv vs5, 80(r3)
; CHECK-P9-NEXT: stxv vs4, 64(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test16elt:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: addis r5, r2, .LCPI3_0@toc@ha
; CHECK-BE-NEXT: addis r6, r2, .LCPI3_1@toc@ha
; CHECK-BE-NEXT: addis r7, r2, .LCPI3_2@toc@ha
; CHECK-BE-NEXT: addis r8, r2, .LCPI3_3@toc@ha
; CHECK-BE-NEXT: lxv v0, 0(r4)
; CHECK-BE-NEXT: lxv v1, 16(r4)
; CHECK-BE-NEXT: xxlxor v6, v6, v6
; CHECK-BE-NEXT: addi r5, r5, .LCPI3_0@toc@l
; CHECK-BE-NEXT: addi r6, r6, .LCPI3_1@toc@l
; CHECK-BE-NEXT: addi r7, r7, .LCPI3_2@toc@l
; CHECK-BE-NEXT: addi r8, r8, .LCPI3_3@toc@l
; CHECK-BE-NEXT: lxvx v2, 0, r5
; CHECK-BE-NEXT: lxvx v3, 0, r6
; CHECK-BE-NEXT: lxvx v4, 0, r7
; CHECK-BE-NEXT: lxvx v5, 0, r8
; CHECK-BE-NEXT: vperm v7, v0, v6, v2
; CHECK-BE-NEXT: vperm v8, v6, v0, v3
; CHECK-BE-NEXT: vperm v9, v6, v0, v4
; CHECK-BE-NEXT: vperm v0, v6, v0, v5
; CHECK-BE-NEXT: vperm v2, v1, v6, v2
; CHECK-BE-NEXT: vperm v3, v6, v1, v3
; CHECK-BE-NEXT: vperm v4, v6, v1, v4
; CHECK-BE-NEXT: vperm v5, v6, v1, v5
; CHECK-BE-NEXT: xvcvuxddp vs0, v7
; CHECK-BE-NEXT: xvcvuxddp vs1, v8
; CHECK-BE-NEXT: xvcvuxddp vs2, v9
; CHECK-BE-NEXT: xvcvuxddp vs3, v0
; CHECK-BE-NEXT: xvcvuxddp vs4, v2
; CHECK-BE-NEXT: xvcvuxddp vs5, v3
; CHECK-BE-NEXT: xvcvuxddp vs6, v4
; CHECK-BE-NEXT: xvcvuxddp vs7, v5
; CHECK-BE-NEXT: stxv vs3, 48(r3)
; CHECK-BE-NEXT: stxv vs2, 32(r3)
; CHECK-BE-NEXT: stxv vs1, 16(r3)
; CHECK-BE-NEXT: stxv vs0, 0(r3)
; CHECK-BE-NEXT: stxv vs7, 112(r3)
; CHECK-BE-NEXT: stxv vs6, 96(r3)
; CHECK-BE-NEXT: stxv vs5, 80(r3)
; CHECK-BE-NEXT: stxv vs4, 64(r3)
; CHECK-BE-NEXT: blr
entry:
%a = load <16 x i16>, <16 x i16>* %0, align 32
%1 = uitofp <16 x i16> %a to <16 x double>
store <16 x double> %1, <16 x double>* %agg.result, align 128
ret void
}
define <2 x double> @test2elt_signed(i32 %a.coerce) local_unnamed_addr #0 {
; CHECK-P8-LABEL: test2elt_signed:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: mtvsrd f0, r3
; CHECK-P8-NEXT: mfvsrd r3, f0
; CHECK-P8-NEXT: clrldi r4, r3, 48
; CHECK-P8-NEXT: rldicl r3, r3, 48, 48
; CHECK-P8-NEXT: extsh r4, r4
; CHECK-P8-NEXT: extsh r3, r3
; CHECK-P8-NEXT: mtvsrwa f0, r4
; CHECK-P8-NEXT: mtvsrwa f1, r3
; CHECK-P8-NEXT: xscvsxddp f0, f0
; CHECK-P8-NEXT: xscvsxddp f1, f1
; CHECK-P8-NEXT: xxmrghd v2, vs1, vs0
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test2elt_signed:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: addis r4, r2, .LCPI4_0@toc@ha
; CHECK-P9-NEXT: mtvsrws v3, r3
; CHECK-P9-NEXT: addi r4, r4, .LCPI4_0@toc@l
; CHECK-P9-NEXT: lxvx v2, 0, r4
; CHECK-P9-NEXT: vperm v2, v3, v3, v2
; CHECK-P9-NEXT: vextsh2d v2, v2
; CHECK-P9-NEXT: xvcvsxddp v2, v2
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test2elt_signed:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: addis r4, r2, .LCPI4_0@toc@ha
; CHECK-BE-NEXT: mtvsrws v3, r3
; CHECK-BE-NEXT: addi r4, r4, .LCPI4_0@toc@l
; CHECK-BE-NEXT: lxvx v2, 0, r4
; CHECK-BE-NEXT: vperm v2, v3, v3, v2
; CHECK-BE-NEXT: vextsh2d v2, v2
; CHECK-BE-NEXT: xvcvsxddp v2, v2
; CHECK-BE-NEXT: blr
entry:
%0 = bitcast i32 %a.coerce to <2 x i16>
%1 = sitofp <2 x i16> %0 to <2 x double>
ret <2 x double> %1
}
define void @test4elt_signed(<4 x double>* noalias nocapture sret %agg.result, i64 %a.coerce) local_unnamed_addr #1 {
; CHECK-P8-LABEL: test4elt_signed:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: mtvsrd f0, r4
; CHECK-P8-NEXT: mfvsrd r4, f0
; CHECK-P8-NEXT: clrldi r5, r4, 48
; CHECK-P8-NEXT: rldicl r6, r4, 48, 48
; CHECK-P8-NEXT: extsh r5, r5
; CHECK-P8-NEXT: extsh r6, r6
; CHECK-P8-NEXT: mtvsrwa f0, r5
; CHECK-P8-NEXT: rldicl r5, r4, 32, 48
; CHECK-P8-NEXT: rldicl r4, r4, 16, 48
; CHECK-P8-NEXT: extsh r5, r5
; CHECK-P8-NEXT: extsh r4, r4
; CHECK-P8-NEXT: mtvsrwa f1, r6
; CHECK-P8-NEXT: mtvsrwa f2, r5
; CHECK-P8-NEXT: mtvsrwa f3, r4
; CHECK-P8-NEXT: li r4, 16
; CHECK-P8-NEXT: xscvsxddp f0, f0
; CHECK-P8-NEXT: xscvsxddp f1, f1
; CHECK-P8-NEXT: xscvsxddp f2, f2
; CHECK-P8-NEXT: xscvsxddp f3, f3
; CHECK-P8-NEXT: xxmrghd vs0, vs1, vs0
; CHECK-P8-NEXT: xxmrghd vs1, vs3, vs2
; CHECK-P8-NEXT: xxswapd vs0, vs0
; CHECK-P8-NEXT: xxswapd vs1, vs1
; CHECK-P8-NEXT: stxvd2x vs1, r3, r4
; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test4elt_signed:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: addis r5, r2, .LCPI5_0@toc@ha
; CHECK-P9-NEXT: addis r6, r2, .LCPI5_1@toc@ha
; CHECK-P9-NEXT: mtvsrd f0, r4
; CHECK-P9-NEXT: addi r5, r5, .LCPI5_0@toc@l
; CHECK-P9-NEXT: addi r6, r6, .LCPI5_1@toc@l
; CHECK-P9-NEXT: xxswapd v4, vs0
; CHECK-P9-NEXT: lxvx v2, 0, r5
; CHECK-P9-NEXT: lxvx v3, 0, r6
; CHECK-P9-NEXT: vperm v2, v4, v4, v2
; CHECK-P9-NEXT: vperm v3, v4, v4, v3
; CHECK-P9-NEXT: vextsh2d v2, v2
; CHECK-P9-NEXT: vextsh2d v3, v3
; CHECK-P9-NEXT: xvcvsxddp vs0, v2
; CHECK-P9-NEXT: xvcvsxddp vs1, v3
; CHECK-P9-NEXT: stxv vs1, 16(r3)
; CHECK-P9-NEXT: stxv vs0, 0(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test4elt_signed:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: addis r5, r2, .LCPI5_0@toc@ha
; CHECK-BE-NEXT: addis r6, r2, .LCPI5_1@toc@ha
; CHECK-BE-NEXT: mtvsrd v4, r4
; CHECK-BE-NEXT: xxlxor v5, v5, v5
; CHECK-BE-NEXT: addi r5, r5, .LCPI5_0@toc@l
; CHECK-BE-NEXT: addi r6, r6, .LCPI5_1@toc@l
; CHECK-BE-NEXT: lxvx v2, 0, r5
; CHECK-BE-NEXT: lxvx v3, 0, r6
; CHECK-BE-NEXT: vperm v2, v5, v4, v2
; CHECK-BE-NEXT: vperm v3, v4, v4, v3
; CHECK-BE-NEXT: vextsh2d v2, v2
; CHECK-BE-NEXT: vextsh2d v3, v3
; CHECK-BE-NEXT: xvcvsxddp vs0, v2
; CHECK-BE-NEXT: xvcvsxddp vs1, v3
; CHECK-BE-NEXT: stxv vs0, 16(r3)
; CHECK-BE-NEXT: stxv vs1, 0(r3)
; CHECK-BE-NEXT: blr
entry:
%0 = bitcast i64 %a.coerce to <4 x i16>
%1 = sitofp <4 x i16> %0 to <4 x double>
store <4 x double> %1, <4 x double>* %agg.result, align 32
ret void
}
define void @test8elt_signed(<8 x double>* noalias nocapture sret %agg.result, <8 x i16> %a) local_unnamed_addr #2 {
; CHECK-P8-LABEL: test8elt_signed:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: mfvsrd r4, v2
; CHECK-P8-NEXT: xxswapd vs0, v2
; CHECK-P8-NEXT: clrldi r5, r4, 48
; CHECK-P8-NEXT: rldicl r6, r4, 48, 48
; CHECK-P8-NEXT: extsh r5, r5
; CHECK-P8-NEXT: mfvsrd r7, f0
; CHECK-P8-NEXT: extsh r6, r6
; CHECK-P8-NEXT: mtvsrwa f1, r5
; CHECK-P8-NEXT: rldicl r5, r4, 32, 48
; CHECK-P8-NEXT: rldicl r4, r4, 16, 48
; CHECK-P8-NEXT: extsh r5, r5
; CHECK-P8-NEXT: extsh r4, r4
; CHECK-P8-NEXT: mtvsrwa f0, r6
; CHECK-P8-NEXT: mtvsrwa f2, r5
; CHECK-P8-NEXT: clrldi r5, r7, 48
; CHECK-P8-NEXT: mtvsrwa f3, r4
; CHECK-P8-NEXT: extsh r4, r5
; CHECK-P8-NEXT: rldicl r5, r7, 16, 48
; CHECK-P8-NEXT: mtvsrwa f4, r4
; CHECK-P8-NEXT: rldicl r4, r7, 48, 48
; CHECK-P8-NEXT: extsh r5, r5
; CHECK-P8-NEXT: extsh r4, r4
; CHECK-P8-NEXT: mtvsrwa f7, r5
; CHECK-P8-NEXT: li r5, 32
; CHECK-P8-NEXT: mtvsrwa f5, r4
; CHECK-P8-NEXT: rldicl r4, r7, 32, 48
; CHECK-P8-NEXT: extsh r4, r4
; CHECK-P8-NEXT: xscvsxddp f1, f1
; CHECK-P8-NEXT: mtvsrwa f6, r4
; CHECK-P8-NEXT: li r4, 48
; CHECK-P8-NEXT: xscvsxddp f0, f0
; CHECK-P8-NEXT: xscvsxddp f2, f2
; CHECK-P8-NEXT: xscvsxddp f3, f3
; CHECK-P8-NEXT: xscvsxddp f4, f4
; CHECK-P8-NEXT: xscvsxddp f5, f5
; CHECK-P8-NEXT: xscvsxddp f6, f6
; CHECK-P8-NEXT: xscvsxddp f7, f7
; CHECK-P8-NEXT: xxmrghd vs0, vs0, vs1
; CHECK-P8-NEXT: xxmrghd vs1, vs3, vs2
; CHECK-P8-NEXT: xxswapd vs0, vs0
; CHECK-P8-NEXT: xxmrghd vs2, vs5, vs4
; CHECK-P8-NEXT: xxswapd vs1, vs1
; CHECK-P8-NEXT: xxmrghd vs3, vs7, vs6
; CHECK-P8-NEXT: xxswapd vs2, vs2
; CHECK-P8-NEXT: stxvd2x vs1, r3, r4
; CHECK-P8-NEXT: li r4, 16
; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
; CHECK-P8-NEXT: xxswapd vs3, vs3
; CHECK-P8-NEXT: stxvd2x vs3, r3, r4
; CHECK-P8-NEXT: stxvd2x vs2, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test8elt_signed:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: addis r4, r2, .LCPI6_0@toc@ha
; CHECK-P9-NEXT: addis r5, r2, .LCPI6_1@toc@ha
; CHECK-P9-NEXT: addis r6, r2, .LCPI6_2@toc@ha
; CHECK-P9-NEXT: addis r7, r2, .LCPI6_3@toc@ha
; CHECK-P9-NEXT: addi r4, r4, .LCPI6_0@toc@l
; CHECK-P9-NEXT: addi r5, r5, .LCPI6_1@toc@l
; CHECK-P9-NEXT: addi r6, r6, .LCPI6_2@toc@l
; CHECK-P9-NEXT: addi r7, r7, .LCPI6_3@toc@l
; CHECK-P9-NEXT: lxvx v3, 0, r4
; CHECK-P9-NEXT: lxvx v4, 0, r5
; CHECK-P9-NEXT: lxvx v5, 0, r6
; CHECK-P9-NEXT: lxvx v0, 0, r7
; CHECK-P9-NEXT: vperm v3, v2, v2, v3
; CHECK-P9-NEXT: vperm v4, v2, v2, v4
; CHECK-P9-NEXT: vperm v5, v2, v2, v5
; CHECK-P9-NEXT: vperm v2, v2, v2, v0
; CHECK-P9-NEXT: vextsh2d v3, v3
; CHECK-P9-NEXT: vextsh2d v4, v4
; CHECK-P9-NEXT: vextsh2d v5, v5
; CHECK-P9-NEXT: vextsh2d v2, v2
; CHECK-P9-NEXT: xvcvsxddp vs0, v3
; CHECK-P9-NEXT: xvcvsxddp vs1, v4
; CHECK-P9-NEXT: xvcvsxddp vs2, v5
; CHECK-P9-NEXT: xvcvsxddp vs3, v2
; CHECK-P9-NEXT: stxv vs3, 48(r3)
; CHECK-P9-NEXT: stxv vs2, 32(r3)
; CHECK-P9-NEXT: stxv vs1, 16(r3)
; CHECK-P9-NEXT: stxv vs0, 0(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test8elt_signed:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: addis r4, r2, .LCPI6_0@toc@ha
; CHECK-BE-NEXT: addis r5, r2, .LCPI6_1@toc@ha
; CHECK-BE-NEXT: addis r6, r2, .LCPI6_2@toc@ha
; CHECK-BE-NEXT: addis r7, r2, .LCPI6_3@toc@ha
; CHECK-BE-NEXT: xxlxor v1, v1, v1
; CHECK-BE-NEXT: addi r4, r4, .LCPI6_0@toc@l
; CHECK-BE-NEXT: addi r5, r5, .LCPI6_1@toc@l
; CHECK-BE-NEXT: addi r6, r6, .LCPI6_2@toc@l
; CHECK-BE-NEXT: addi r7, r7, .LCPI6_3@toc@l
; CHECK-BE-NEXT: lxvx v3, 0, r4
; CHECK-BE-NEXT: lxvx v4, 0, r5
; CHECK-BE-NEXT: lxvx v5, 0, r6
; CHECK-BE-NEXT: lxvx v0, 0, r7
; CHECK-BE-NEXT: vperm v3, v1, v2, v3
; CHECK-BE-NEXT: vperm v4, v1, v2, v4
; CHECK-BE-NEXT: vperm v5, v2, v2, v5
; CHECK-BE-NEXT: vperm v2, v2, v2, v0
; CHECK-BE-NEXT: vextsh2d v3, v3
; CHECK-BE-NEXT: vextsh2d v4, v4
; CHECK-BE-NEXT: vextsh2d v5, v5
; CHECK-BE-NEXT: vextsh2d v2, v2
; CHECK-BE-NEXT: xvcvsxddp vs0, v3
; CHECK-BE-NEXT: xvcvsxddp vs1, v4
; CHECK-BE-NEXT: xvcvsxddp vs2, v5
; CHECK-BE-NEXT: xvcvsxddp vs3, v2
; CHECK-BE-NEXT: stxv vs1, 48(r3)
; CHECK-BE-NEXT: stxv vs3, 32(r3)
; CHECK-BE-NEXT: stxv vs0, 16(r3)
; CHECK-BE-NEXT: stxv vs2, 0(r3)
; CHECK-BE-NEXT: blr
entry:
%0 = sitofp <8 x i16> %a to <8 x double>
store <8 x double> %0, <8 x double>* %agg.result, align 64
ret void
}
define void @test16elt_signed(<16 x double>* noalias nocapture sret %agg.result, <16 x i16>* nocapture readonly) local_unnamed_addr #3 {
; CHECK-P8-LABEL: test16elt_signed:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: li r5, 16
; CHECK-P8-NEXT: lvx v3, 0, r4
; CHECK-P8-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: lvx v2, r4, r5
; CHECK-P8-NEXT: mfvsrd r7, v3
; CHECK-P8-NEXT: xxswapd vs8, v3
; CHECK-P8-NEXT: mfvsrd r6, v2
; CHECK-P8-NEXT: xxswapd vs2, v2
; CHECK-P8-NEXT: clrldi r4, r6, 48
; CHECK-P8-NEXT: rldicl r8, r6, 48, 48
; CHECK-P8-NEXT: extsh r4, r4
; CHECK-P8-NEXT: extsh r8, r8
; CHECK-P8-NEXT: mtvsrwa f0, r4
; CHECK-P8-NEXT: rldicl r4, r6, 32, 48
; CHECK-P8-NEXT: rldicl r6, r6, 16, 48
; CHECK-P8-NEXT: mtvsrwa f1, r8
; CHECK-P8-NEXT: extsh r4, r4
; CHECK-P8-NEXT: clrldi r8, r7, 48
; CHECK-P8-NEXT: extsh r6, r6
; CHECK-P8-NEXT: mtvsrwa f3, r4
; CHECK-P8-NEXT: extsh r4, r8
; CHECK-P8-NEXT: mtvsrwa f4, r6
; CHECK-P8-NEXT: rldicl r6, r7, 48, 48
; CHECK-P8-NEXT: mtvsrwa f5, r4
; CHECK-P8-NEXT: rldicl r4, r7, 32, 48
; CHECK-P8-NEXT: extsh r6, r6
; CHECK-P8-NEXT: mfvsrd r8, f2
; CHECK-P8-NEXT: extsh r4, r4
; CHECK-P8-NEXT: mtvsrwa f2, r6
; CHECK-P8-NEXT: rldicl r6, r7, 16, 48
; CHECK-P8-NEXT: mtvsrwa f6, r4
; CHECK-P8-NEXT: clrldi r4, r8, 48
; CHECK-P8-NEXT: extsh r6, r6
; CHECK-P8-NEXT: extsh r4, r4
; CHECK-P8-NEXT: mtvsrwa f7, r6
; CHECK-P8-NEXT: rldicl r6, r8, 48, 48
; CHECK-P8-NEXT: mtvsrwa f9, r4
; CHECK-P8-NEXT: rldicl r4, r8, 32, 48
; CHECK-P8-NEXT: extsh r6, r6
; CHECK-P8-NEXT: extsh r4, r4
; CHECK-P8-NEXT: mtvsrwa f10, r6
; CHECK-P8-NEXT: rldicl r6, r8, 16, 48
; CHECK-P8-NEXT: mtvsrwa f11, r4
; CHECK-P8-NEXT: extsh r6, r6
; CHECK-P8-NEXT: mfvsrd r4, f8
; CHECK-P8-NEXT: mtvsrwa f8, r6
; CHECK-P8-NEXT: clrldi r6, r4, 48
; CHECK-P8-NEXT: xscvsxddp f3, f3
; CHECK-P8-NEXT: extsh r6, r6
; CHECK-P8-NEXT: xscvsxddp f4, f4
; CHECK-P8-NEXT: mtvsrwa f12, r6
; CHECK-P8-NEXT: rldicl r6, r4, 48, 48
; CHECK-P8-NEXT: extsh r6, r6
; CHECK-P8-NEXT: xscvsxddp f0, f0
; CHECK-P8-NEXT: mtvsrwa f13, r6
; CHECK-P8-NEXT: rldicl r6, r4, 32, 48
; CHECK-P8-NEXT: rldicl r4, r4, 16, 48
; CHECK-P8-NEXT: xscvsxddp f1, f1
; CHECK-P8-NEXT: extsh r6, r6
; CHECK-P8-NEXT: extsh r4, r4
; CHECK-P8-NEXT: xscvsxddp f5, f5
; CHECK-P8-NEXT: xscvsxddp f2, f2
; CHECK-P8-NEXT: xxmrghd vs3, vs4, vs3
; CHECK-P8-NEXT: mtvsrwa v2, r6
; CHECK-P8-NEXT: li r6, 32
; CHECK-P8-NEXT: mtvsrwa v3, r4
; CHECK-P8-NEXT: li r4, 112
; CHECK-P8-NEXT: xscvsxddp f6, f6
; CHECK-P8-NEXT: xscvsxddp f7, f7
; CHECK-P8-NEXT: xxmrghd vs0, vs1, vs0
; CHECK-P8-NEXT: xscvsxddp f9, f9
; CHECK-P8-NEXT: xscvsxddp f10, f10
; CHECK-P8-NEXT: xxmrghd vs1, vs2, vs5
; CHECK-P8-NEXT: xscvsxddp f11, f11
; CHECK-P8-NEXT: xxswapd vs2, vs3
; CHECK-P8-NEXT: xscvsxddp f8, f8
; CHECK-P8-NEXT: xxswapd vs0, vs0
; CHECK-P8-NEXT: xscvsxddp f12, f12
; CHECK-P8-NEXT: xxswapd vs1, vs1
; CHECK-P8-NEXT: xscvsxddp f13, f13
; CHECK-P8-NEXT: xxmrghd vs3, vs7, vs6
; CHECK-P8-NEXT: xscvsxddp f4, v2
; CHECK-P8-NEXT: stxvd2x vs2, r3, r4
; CHECK-P8-NEXT: li r4, 96
; CHECK-P8-NEXT: xscvsxddp f31, v3
; CHECK-P8-NEXT: xxmrghd vs5, vs10, vs9
; CHECK-P8-NEXT: xxswapd vs3, vs3
; CHECK-P8-NEXT: stxvd2x vs0, r3, r4
; CHECK-P8-NEXT: li r4, 48
; CHECK-P8-NEXT: xxmrghd vs6, vs8, vs11
; CHECK-P8-NEXT: xxmrghd vs7, vs13, vs12
; CHECK-P8-NEXT: stxvd2x vs3, r3, r4
; CHECK-P8-NEXT: li r4, 80
; CHECK-P8-NEXT: xxswapd vs0, vs6
; CHECK-P8-NEXT: stxvd2x vs1, r3, r6
; CHECK-P8-NEXT: li r6, 64
; CHECK-P8-NEXT: xxmrghd vs2, vs31, vs4
; CHECK-P8-NEXT: xxswapd vs4, vs5
; CHECK-P8-NEXT: xxswapd vs5, vs7
; CHECK-P8-NEXT: stxvd2x vs0, r3, r4
; CHECK-P8-NEXT: xxswapd vs2, vs2
; CHECK-P8-NEXT: stxvd2x vs4, r3, r6
; CHECK-P8-NEXT: stxvd2x vs2, r3, r5
; CHECK-P8-NEXT: stxvd2x vs5, 0, r3
; CHECK-P8-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test16elt_signed:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: addis r5, r2, .LCPI7_0@toc@ha
; CHECK-P9-NEXT: addis r6, r2, .LCPI7_1@toc@ha
; CHECK-P9-NEXT: addis r7, r2, .LCPI7_2@toc@ha
; CHECK-P9-NEXT: addis r8, r2, .LCPI7_3@toc@ha
; CHECK-P9-NEXT: lxv v0, 0(r4)
; CHECK-P9-NEXT: lxv v1, 16(r4)
; CHECK-P9-NEXT: addi r5, r5, .LCPI7_0@toc@l
; CHECK-P9-NEXT: addi r6, r6, .LCPI7_1@toc@l
; CHECK-P9-NEXT: addi r7, r7, .LCPI7_2@toc@l
; CHECK-P9-NEXT: addi r8, r8, .LCPI7_3@toc@l
; CHECK-P9-NEXT: lxvx v2, 0, r5
; CHECK-P9-NEXT: lxvx v3, 0, r6
; CHECK-P9-NEXT: lxvx v4, 0, r7
; CHECK-P9-NEXT: lxvx v5, 0, r8
; CHECK-P9-NEXT: vperm v6, v0, v0, v2
; CHECK-P9-NEXT: vperm v7, v0, v0, v3
; CHECK-P9-NEXT: vperm v8, v0, v0, v4
; CHECK-P9-NEXT: vperm v0, v0, v0, v5
; CHECK-P9-NEXT: vperm v2, v1, v1, v2
; CHECK-P9-NEXT: vperm v3, v1, v1, v3
; CHECK-P9-NEXT: vperm v4, v1, v1, v4
; CHECK-P9-NEXT: vperm v5, v1, v1, v5
; CHECK-P9-NEXT: vextsh2d v1, v6
; CHECK-P9-NEXT: vextsh2d v6, v7
; CHECK-P9-NEXT: vextsh2d v7, v8
; CHECK-P9-NEXT: vextsh2d v0, v0
; CHECK-P9-NEXT: vextsh2d v2, v2
; CHECK-P9-NEXT: vextsh2d v3, v3
; CHECK-P9-NEXT: vextsh2d v4, v4
; CHECK-P9-NEXT: vextsh2d v5, v5
; CHECK-P9-NEXT: xvcvsxddp vs0, v1
; CHECK-P9-NEXT: xvcvsxddp vs1, v6
; CHECK-P9-NEXT: xvcvsxddp vs2, v7
; CHECK-P9-NEXT: xvcvsxddp vs3, v0
; CHECK-P9-NEXT: xvcvsxddp vs4, v2
; CHECK-P9-NEXT: xvcvsxddp vs5, v3
; CHECK-P9-NEXT: xvcvsxddp vs6, v4
; CHECK-P9-NEXT: xvcvsxddp vs7, v5
; CHECK-P9-NEXT: stxv vs3, 48(r3)
; CHECK-P9-NEXT: stxv vs2, 32(r3)
; CHECK-P9-NEXT: stxv vs1, 16(r3)
; CHECK-P9-NEXT: stxv vs0, 0(r3)
; CHECK-P9-NEXT: stxv vs7, 112(r3)
; CHECK-P9-NEXT: stxv vs6, 96(r3)
; CHECK-P9-NEXT: stxv vs5, 80(r3)
; CHECK-P9-NEXT: stxv vs4, 64(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test16elt_signed:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: addis r5, r2, .LCPI7_0@toc@ha
; CHECK-BE-NEXT: addis r6, r2, .LCPI7_1@toc@ha
; CHECK-BE-NEXT: addis r7, r2, .LCPI7_2@toc@ha
; CHECK-BE-NEXT: addis r8, r2, .LCPI7_3@toc@ha
; CHECK-BE-NEXT: lxv v2, 16(r4)
; CHECK-BE-NEXT: lxv v3, 0(r4)
; CHECK-BE-NEXT: xxlxor v6, v6, v6
; CHECK-BE-NEXT: addi r5, r5, .LCPI7_0@toc@l
; CHECK-BE-NEXT: addi r6, r6, .LCPI7_1@toc@l
; CHECK-BE-NEXT: addi r7, r7, .LCPI7_2@toc@l
; CHECK-BE-NEXT: addi r8, r8, .LCPI7_3@toc@l
; CHECK-BE-NEXT: lxvx v4, 0, r5
; CHECK-BE-NEXT: lxvx v5, 0, r6
; CHECK-BE-NEXT: lxvx v0, 0, r7
; CHECK-BE-NEXT: lxvx v1, 0, r8
; CHECK-BE-NEXT: vperm v7, v6, v3, v4
; CHECK-BE-NEXT: vperm v8, v6, v3, v5
; CHECK-BE-NEXT: vperm v4, v6, v2, v4
; CHECK-BE-NEXT: vperm v5, v6, v2, v5
; CHECK-BE-NEXT: vperm v6, v3, v3, v0
; CHECK-BE-NEXT: vperm v3, v3, v3, v1
; CHECK-BE-NEXT: vperm v0, v2, v2, v0
; CHECK-BE-NEXT: vperm v2, v2, v2, v1
; CHECK-BE-NEXT: vextsh2d v1, v7
; CHECK-BE-NEXT: vextsh2d v7, v8
; CHECK-BE-NEXT: vextsh2d v4, v4
; CHECK-BE-NEXT: vextsh2d v5, v5
; CHECK-BE-NEXT: vextsh2d v6, v6
; CHECK-BE-NEXT: vextsh2d v3, v3
; CHECK-BE-NEXT: vextsh2d v0, v0
; CHECK-BE-NEXT: vextsh2d v2, v2
; CHECK-BE-NEXT: xvcvsxddp vs0, v1
; CHECK-BE-NEXT: xvcvsxddp vs1, v7
; CHECK-BE-NEXT: xvcvsxddp vs2, v4
; CHECK-BE-NEXT: xvcvsxddp vs3, v5
; CHECK-BE-NEXT: xvcvsxddp vs4, v6
; CHECK-BE-NEXT: xvcvsxddp vs5, v3
; CHECK-BE-NEXT: xvcvsxddp vs6, v0
; CHECK-BE-NEXT: xvcvsxddp vs7, v2
; CHECK-BE-NEXT: stxv vs3, 112(r3)
; CHECK-BE-NEXT: stxv vs2, 80(r3)
; CHECK-BE-NEXT: stxv vs1, 48(r3)
; CHECK-BE-NEXT: stxv vs0, 16(r3)
; CHECK-BE-NEXT: stxv vs7, 96(r3)
; CHECK-BE-NEXT: stxv vs6, 64(r3)
; CHECK-BE-NEXT: stxv vs5, 32(r3)
; CHECK-BE-NEXT: stxv vs4, 0(r3)
; CHECK-BE-NEXT: blr
entry:
%a = load <16 x i16>, <16 x i16>* %0, align 32
%1 = sitofp <16 x i16> %a to <16 x double>
store <16 x double> %1, <16 x double>* %agg.result, align 128
ret void
}

View File

@ -0,0 +1,518 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
; RUN: FileCheck %s --check-prefix=CHECK-P8
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
; RUN: FileCheck %s --check-prefix=CHECK-P9
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
; RUN: FileCheck %s --check-prefix=CHECK-BE
define <2 x double> @test2elt(i64 %a.coerce) local_unnamed_addr #0 {
; CHECK-P8-LABEL: test2elt:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: mtvsrd f0, r3
; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: xxmrglw v2, v2, v2
; CHECK-P8-NEXT: xvcvuxwdp v2, v2
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test2elt:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: mtvsrd f0, r3
; CHECK-P9-NEXT: xxswapd v2, vs0
; CHECK-P9-NEXT: xxmrglw v2, v2, v2
; CHECK-P9-NEXT: xvcvuxwdp v2, v2
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test2elt:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: mtvsrd f0, r3
; CHECK-BE-NEXT: xxmrghw v2, vs0, vs0
; CHECK-BE-NEXT: xvcvuxwdp v2, v2
; CHECK-BE-NEXT: blr
entry:
%0 = bitcast i64 %a.coerce to <2 x i32>
%1 = uitofp <2 x i32> %0 to <2 x double>
ret <2 x double> %1
}
define void @test4elt(<4 x double>* noalias nocapture sret %agg.result, <4 x i32> %a) local_unnamed_addr #1 {
; CHECK-P8-LABEL: test4elt:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: xxmrglw v3, v2, v2
; CHECK-P8-NEXT: xxmrghw v2, v2, v2
; CHECK-P8-NEXT: li r4, 16
; CHECK-P8-NEXT: xvcvuxwdp vs0, v3
; CHECK-P8-NEXT: xvcvuxwdp vs1, v2
; CHECK-P8-NEXT: xxswapd vs0, vs0
; CHECK-P8-NEXT: xxswapd vs1, vs1
; CHECK-P8-NEXT: stxvd2x vs1, r3, r4
; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test4elt:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: xxmrglw v3, v2, v2
; CHECK-P9-NEXT: xxmrghw v2, v2, v2
; CHECK-P9-NEXT: xvcvuxwdp vs0, v3
; CHECK-P9-NEXT: xvcvuxwdp vs1, v2
; CHECK-P9-NEXT: stxv vs1, 16(r3)
; CHECK-P9-NEXT: stxv vs0, 0(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test4elt:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: xxmrghw v3, v2, v2
; CHECK-BE-NEXT: xxmrglw v2, v2, v2
; CHECK-BE-NEXT: xvcvuxwdp vs0, v3
; CHECK-BE-NEXT: xvcvuxwdp vs1, v2
; CHECK-BE-NEXT: stxv vs1, 16(r3)
; CHECK-BE-NEXT: stxv vs0, 0(r3)
; CHECK-BE-NEXT: blr
entry:
%0 = uitofp <4 x i32> %a to <4 x double>
store <4 x double> %0, <4 x double>* %agg.result, align 32
ret void
}
define void @test8elt(<8 x double>* noalias nocapture sret %agg.result, <8 x i32>* nocapture readonly) local_unnamed_addr #2 {
; CHECK-P8-LABEL: test8elt:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: li r5, 16
; CHECK-P8-NEXT: lvx v3, 0, r4
; CHECK-P8-NEXT: li r6, 32
; CHECK-P8-NEXT: lvx v2, r4, r5
; CHECK-P8-NEXT: li r4, 48
; CHECK-P8-NEXT: xxmrglw v5, v3, v3
; CHECK-P8-NEXT: xxmrghw v3, v3, v3
; CHECK-P8-NEXT: xxmrglw v4, v2, v2
; CHECK-P8-NEXT: xxmrghw v2, v2, v2
; CHECK-P8-NEXT: xvcvuxwdp vs2, v5
; CHECK-P8-NEXT: xvcvuxwdp vs0, v4
; CHECK-P8-NEXT: xvcvuxwdp vs1, v2
; CHECK-P8-NEXT: xvcvuxwdp vs3, v3
; CHECK-P8-NEXT: xxswapd vs2, vs2
; CHECK-P8-NEXT: xxswapd vs0, vs0
; CHECK-P8-NEXT: xxswapd vs1, vs1
; CHECK-P8-NEXT: xxswapd vs3, vs3
; CHECK-P8-NEXT: stxvd2x vs1, r3, r4
; CHECK-P8-NEXT: stxvd2x vs0, r3, r6
; CHECK-P8-NEXT: stxvd2x vs3, r3, r5
; CHECK-P8-NEXT: stxvd2x vs2, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test8elt:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: lxv vs0, 16(r4)
; CHECK-P9-NEXT: lxv vs1, 0(r4)
; CHECK-P9-NEXT: xxmrglw v2, vs1, vs1
; CHECK-P9-NEXT: xxmrghw v3, vs1, vs1
; CHECK-P9-NEXT: xxmrglw v4, vs0, vs0
; CHECK-P9-NEXT: xxmrghw v5, vs0, vs0
; CHECK-P9-NEXT: xvcvuxwdp vs0, v2
; CHECK-P9-NEXT: xvcvuxwdp vs1, v3
; CHECK-P9-NEXT: xvcvuxwdp vs2, v4
; CHECK-P9-NEXT: xvcvuxwdp vs3, v5
; CHECK-P9-NEXT: stxv vs3, 48(r3)
; CHECK-P9-NEXT: stxv vs2, 32(r3)
; CHECK-P9-NEXT: stxv vs1, 16(r3)
; CHECK-P9-NEXT: stxv vs0, 0(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test8elt:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: lxv vs0, 16(r4)
; CHECK-BE-NEXT: lxv vs1, 0(r4)
; CHECK-BE-NEXT: xxmrghw v2, vs1, vs1
; CHECK-BE-NEXT: xxmrglw v3, vs1, vs1
; CHECK-BE-NEXT: xxmrghw v4, vs0, vs0
; CHECK-BE-NEXT: xxmrglw v5, vs0, vs0
; CHECK-BE-NEXT: xvcvuxwdp vs0, v2
; CHECK-BE-NEXT: xvcvuxwdp vs1, v3
; CHECK-BE-NEXT: xvcvuxwdp vs2, v4
; CHECK-BE-NEXT: xvcvuxwdp vs3, v5
; CHECK-BE-NEXT: stxv vs3, 48(r3)
; CHECK-BE-NEXT: stxv vs2, 32(r3)
; CHECK-BE-NEXT: stxv vs1, 16(r3)
; CHECK-BE-NEXT: stxv vs0, 0(r3)
; CHECK-BE-NEXT: blr
entry:
%a = load <8 x i32>, <8 x i32>* %0, align 32
%1 = uitofp <8 x i32> %a to <8 x double>
store <8 x double> %1, <8 x double>* %agg.result, align 64
ret void
}
define void @test16elt(<16 x double>* noalias nocapture sret %agg.result, <16 x i32>* nocapture readonly) local_unnamed_addr #2 {
; CHECK-P8-LABEL: test16elt:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: li r5, 16
; CHECK-P8-NEXT: li r6, 48
; CHECK-P8-NEXT: li r7, 32
; CHECK-P8-NEXT: li r8, 64
; CHECK-P8-NEXT: lvx v2, r4, r5
; CHECK-P8-NEXT: lvx v3, r4, r6
; CHECK-P8-NEXT: lvx v0, r4, r7
; CHECK-P8-NEXT: xxmrglw v4, v2, v2
; CHECK-P8-NEXT: xxmrghw v5, v3, v3
; CHECK-P8-NEXT: xxmrghw v2, v2, v2
; CHECK-P8-NEXT: xxmrglw v3, v3, v3
; CHECK-P8-NEXT: xvcvuxwdp vs0, v4
; CHECK-P8-NEXT: lvx v4, 0, r4
; CHECK-P8-NEXT: li r4, 112
; CHECK-P8-NEXT: xvcvuxwdp vs1, v5
; CHECK-P8-NEXT: xxmrghw v5, v0, v0
; CHECK-P8-NEXT: xxmrglw v0, v0, v0
; CHECK-P8-NEXT: xvcvuxwdp vs2, v2
; CHECK-P8-NEXT: xxmrglw v2, v4, v4
; CHECK-P8-NEXT: xvcvuxwdp vs3, v3
; CHECK-P8-NEXT: xxmrghw v3, v4, v4
; CHECK-P8-NEXT: xvcvuxwdp vs4, v5
; CHECK-P8-NEXT: xvcvuxwdp vs5, v0
; CHECK-P8-NEXT: xvcvuxwdp vs6, v2
; CHECK-P8-NEXT: xxswapd vs0, vs0
; CHECK-P8-NEXT: xvcvuxwdp vs7, v3
; CHECK-P8-NEXT: xxswapd vs1, vs1
; CHECK-P8-NEXT: xxswapd vs2, vs2
; CHECK-P8-NEXT: xxswapd vs3, vs3
; CHECK-P8-NEXT: stxvd2x vs1, r3, r4
; CHECK-P8-NEXT: li r4, 96
; CHECK-P8-NEXT: xxswapd vs4, vs4
; CHECK-P8-NEXT: xxswapd vs1, vs5
; CHECK-P8-NEXT: stxvd2x vs3, r3, r4
; CHECK-P8-NEXT: xxswapd vs5, vs6
; CHECK-P8-NEXT: li r4, 80
; CHECK-P8-NEXT: xxswapd vs3, vs7
; CHECK-P8-NEXT: stxvd2x vs4, r3, r4
; CHECK-P8-NEXT: stxvd2x vs1, r3, r8
; CHECK-P8-NEXT: stxvd2x vs2, r3, r6
; CHECK-P8-NEXT: stxvd2x vs0, r3, r7
; CHECK-P8-NEXT: stxvd2x vs3, r3, r5
; CHECK-P8-NEXT: stxvd2x vs5, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test16elt:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: lxv vs0, 16(r4)
; CHECK-P9-NEXT: lxv vs1, 0(r4)
; CHECK-P9-NEXT: lxv vs2, 48(r4)
; CHECK-P9-NEXT: lxv vs3, 32(r4)
; CHECK-P9-NEXT: xxmrglw v2, vs1, vs1
; CHECK-P9-NEXT: xxmrghw v3, vs1, vs1
; CHECK-P9-NEXT: xxmrglw v4, vs0, vs0
; CHECK-P9-NEXT: xxmrghw v5, vs0, vs0
; CHECK-P9-NEXT: xxmrglw v0, vs3, vs3
; CHECK-P9-NEXT: xxmrghw v1, vs3, vs3
; CHECK-P9-NEXT: xxmrglw v6, vs2, vs2
; CHECK-P9-NEXT: xxmrghw v7, vs2, vs2
; CHECK-P9-NEXT: xvcvuxwdp vs0, v2
; CHECK-P9-NEXT: xvcvuxwdp vs1, v3
; CHECK-P9-NEXT: xvcvuxwdp vs2, v4
; CHECK-P9-NEXT: xvcvuxwdp vs3, v5
; CHECK-P9-NEXT: xvcvuxwdp vs4, v0
; CHECK-P9-NEXT: xvcvuxwdp vs5, v1
; CHECK-P9-NEXT: xvcvuxwdp vs6, v6
; CHECK-P9-NEXT: xvcvuxwdp vs7, v7
; CHECK-P9-NEXT: stxv vs3, 48(r3)
; CHECK-P9-NEXT: stxv vs2, 32(r3)
; CHECK-P9-NEXT: stxv vs1, 16(r3)
; CHECK-P9-NEXT: stxv vs0, 0(r3)
; CHECK-P9-NEXT: stxv vs7, 112(r3)
; CHECK-P9-NEXT: stxv vs6, 96(r3)
; CHECK-P9-NEXT: stxv vs5, 80(r3)
; CHECK-P9-NEXT: stxv vs4, 64(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test16elt:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: lxv vs0, 16(r4)
; CHECK-BE-NEXT: lxv vs1, 0(r4)
; CHECK-BE-NEXT: lxv vs2, 48(r4)
; CHECK-BE-NEXT: lxv vs3, 32(r4)
; CHECK-BE-NEXT: xxmrghw v2, vs1, vs1
; CHECK-BE-NEXT: xxmrglw v3, vs1, vs1
; CHECK-BE-NEXT: xxmrghw v4, vs0, vs0
; CHECK-BE-NEXT: xxmrglw v5, vs0, vs0
; CHECK-BE-NEXT: xxmrghw v0, vs3, vs3
; CHECK-BE-NEXT: xxmrglw v1, vs3, vs3
; CHECK-BE-NEXT: xxmrghw v6, vs2, vs2
; CHECK-BE-NEXT: xxmrglw v7, vs2, vs2
; CHECK-BE-NEXT: xvcvuxwdp vs0, v2
; CHECK-BE-NEXT: xvcvuxwdp vs1, v3
; CHECK-BE-NEXT: xvcvuxwdp vs2, v4
; CHECK-BE-NEXT: xvcvuxwdp vs3, v5
; CHECK-BE-NEXT: xvcvuxwdp vs4, v0
; CHECK-BE-NEXT: xvcvuxwdp vs5, v1
; CHECK-BE-NEXT: xvcvuxwdp vs6, v6
; CHECK-BE-NEXT: xvcvuxwdp vs7, v7
; CHECK-BE-NEXT: stxv vs3, 48(r3)
; CHECK-BE-NEXT: stxv vs2, 32(r3)
; CHECK-BE-NEXT: stxv vs1, 16(r3)
; CHECK-BE-NEXT: stxv vs0, 0(r3)
; CHECK-BE-NEXT: stxv vs7, 112(r3)
; CHECK-BE-NEXT: stxv vs6, 96(r3)
; CHECK-BE-NEXT: stxv vs5, 80(r3)
; CHECK-BE-NEXT: stxv vs4, 64(r3)
; CHECK-BE-NEXT: blr
entry:
%a = load <16 x i32>, <16 x i32>* %0, align 64
%1 = uitofp <16 x i32> %a to <16 x double>
store <16 x double> %1, <16 x double>* %agg.result, align 128
ret void
}
define <2 x double> @test2elt_signed(i64 %a.coerce) local_unnamed_addr #0 {
; CHECK-P8-LABEL: test2elt_signed:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: mtvsrd f0, r3
; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: xxmrglw v2, v2, v2
; CHECK-P8-NEXT: xvcvsxwdp v2, v2
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test2elt_signed:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: mtvsrd f0, r3
; CHECK-P9-NEXT: xxswapd v2, vs0
; CHECK-P9-NEXT: xxmrglw v2, v2, v2
; CHECK-P9-NEXT: xvcvsxwdp v2, v2
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test2elt_signed:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: mtvsrd f0, r3
; CHECK-BE-NEXT: xxmrghw v2, vs0, vs0
; CHECK-BE-NEXT: xvcvsxwdp v2, v2
; CHECK-BE-NEXT: blr
entry:
%0 = bitcast i64 %a.coerce to <2 x i32>
%1 = sitofp <2 x i32> %0 to <2 x double>
ret <2 x double> %1
}
define void @test4elt_signed(<4 x double>* noalias nocapture sret %agg.result, <4 x i32> %a) local_unnamed_addr #1 {
; CHECK-P8-LABEL: test4elt_signed:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: xxmrglw v3, v2, v2
; CHECK-P8-NEXT: xxmrghw v2, v2, v2
; CHECK-P8-NEXT: li r4, 16
; CHECK-P8-NEXT: xvcvsxwdp vs0, v3
; CHECK-P8-NEXT: xvcvsxwdp vs1, v2
; CHECK-P8-NEXT: xxswapd vs0, vs0
; CHECK-P8-NEXT: xxswapd vs1, vs1
; CHECK-P8-NEXT: stxvd2x vs1, r3, r4
; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test4elt_signed:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: xxmrglw v3, v2, v2
; CHECK-P9-NEXT: xxmrghw v2, v2, v2
; CHECK-P9-NEXT: xvcvsxwdp vs0, v3
; CHECK-P9-NEXT: xvcvsxwdp vs1, v2
; CHECK-P9-NEXT: stxv vs1, 16(r3)
; CHECK-P9-NEXT: stxv vs0, 0(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test4elt_signed:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: xxmrghw v3, v2, v2
; CHECK-BE-NEXT: xxmrglw v2, v2, v2
; CHECK-BE-NEXT: xvcvsxwdp vs0, v3
; CHECK-BE-NEXT: xvcvsxwdp vs1, v2
; CHECK-BE-NEXT: stxv vs1, 16(r3)
; CHECK-BE-NEXT: stxv vs0, 0(r3)
; CHECK-BE-NEXT: blr
entry:
%0 = sitofp <4 x i32> %a to <4 x double>
store <4 x double> %0, <4 x double>* %agg.result, align 32
ret void
}
define void @test8elt_signed(<8 x double>* noalias nocapture sret %agg.result, <8 x i32>* nocapture readonly) local_unnamed_addr #2 {
; CHECK-P8-LABEL: test8elt_signed:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: li r5, 16
; CHECK-P8-NEXT: lvx v3, 0, r4
; CHECK-P8-NEXT: li r6, 32
; CHECK-P8-NEXT: lvx v2, r4, r5
; CHECK-P8-NEXT: li r4, 48
; CHECK-P8-NEXT: xxmrglw v5, v3, v3
; CHECK-P8-NEXT: xxmrghw v3, v3, v3
; CHECK-P8-NEXT: xxmrglw v4, v2, v2
; CHECK-P8-NEXT: xxmrghw v2, v2, v2
; CHECK-P8-NEXT: xvcvsxwdp vs2, v5
; CHECK-P8-NEXT: xvcvsxwdp vs0, v4
; CHECK-P8-NEXT: xvcvsxwdp vs1, v2
; CHECK-P8-NEXT: xvcvsxwdp vs3, v3
; CHECK-P8-NEXT: xxswapd vs2, vs2
; CHECK-P8-NEXT: xxswapd vs0, vs0
; CHECK-P8-NEXT: xxswapd vs1, vs1
; CHECK-P8-NEXT: xxswapd vs3, vs3
; CHECK-P8-NEXT: stxvd2x vs1, r3, r4
; CHECK-P8-NEXT: stxvd2x vs0, r3, r6
; CHECK-P8-NEXT: stxvd2x vs3, r3, r5
; CHECK-P8-NEXT: stxvd2x vs2, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test8elt_signed:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: lxv vs0, 16(r4)
; CHECK-P9-NEXT: lxv vs1, 0(r4)
; CHECK-P9-NEXT: xxmrglw v2, vs1, vs1
; CHECK-P9-NEXT: xxmrghw v3, vs1, vs1
; CHECK-P9-NEXT: xxmrglw v4, vs0, vs0
; CHECK-P9-NEXT: xxmrghw v5, vs0, vs0
; CHECK-P9-NEXT: xvcvsxwdp vs0, v2
; CHECK-P9-NEXT: xvcvsxwdp vs1, v3
; CHECK-P9-NEXT: xvcvsxwdp vs2, v4
; CHECK-P9-NEXT: xvcvsxwdp vs3, v5
; CHECK-P9-NEXT: stxv vs3, 48(r3)
; CHECK-P9-NEXT: stxv vs2, 32(r3)
; CHECK-P9-NEXT: stxv vs1, 16(r3)
; CHECK-P9-NEXT: stxv vs0, 0(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test8elt_signed:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: lxv vs0, 16(r4)
; CHECK-BE-NEXT: lxv vs1, 0(r4)
; CHECK-BE-NEXT: xxmrghw v2, vs1, vs1
; CHECK-BE-NEXT: xxmrglw v3, vs1, vs1
; CHECK-BE-NEXT: xxmrghw v4, vs0, vs0
; CHECK-BE-NEXT: xxmrglw v5, vs0, vs0
; CHECK-BE-NEXT: xvcvsxwdp vs0, v2
; CHECK-BE-NEXT: xvcvsxwdp vs1, v3
; CHECK-BE-NEXT: xvcvsxwdp vs2, v4
; CHECK-BE-NEXT: xvcvsxwdp vs3, v5
; CHECK-BE-NEXT: stxv vs3, 48(r3)
; CHECK-BE-NEXT: stxv vs2, 32(r3)
; CHECK-BE-NEXT: stxv vs1, 16(r3)
; CHECK-BE-NEXT: stxv vs0, 0(r3)
; CHECK-BE-NEXT: blr
entry:
%a = load <8 x i32>, <8 x i32>* %0, align 32
%1 = sitofp <8 x i32> %a to <8 x double>
store <8 x double> %1, <8 x double>* %agg.result, align 64
ret void
}
define void @test16elt_signed(<16 x double>* noalias nocapture sret %agg.result, <16 x i32>* nocapture readonly) local_unnamed_addr #2 {
; CHECK-P8-LABEL: test16elt_signed:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: li r5, 16
; CHECK-P8-NEXT: li r6, 48
; CHECK-P8-NEXT: li r7, 32
; CHECK-P8-NEXT: li r8, 64
; CHECK-P8-NEXT: lvx v2, r4, r5
; CHECK-P8-NEXT: lvx v3, r4, r6
; CHECK-P8-NEXT: lvx v0, r4, r7
; CHECK-P8-NEXT: xxmrglw v4, v2, v2
; CHECK-P8-NEXT: xxmrghw v5, v3, v3
; CHECK-P8-NEXT: xxmrghw v2, v2, v2
; CHECK-P8-NEXT: xxmrglw v3, v3, v3
; CHECK-P8-NEXT: xvcvsxwdp vs0, v4
; CHECK-P8-NEXT: lvx v4, 0, r4
; CHECK-P8-NEXT: li r4, 112
; CHECK-P8-NEXT: xvcvsxwdp vs1, v5
; CHECK-P8-NEXT: xxmrghw v5, v0, v0
; CHECK-P8-NEXT: xxmrglw v0, v0, v0
; CHECK-P8-NEXT: xvcvsxwdp vs2, v2
; CHECK-P8-NEXT: xxmrglw v2, v4, v4
; CHECK-P8-NEXT: xvcvsxwdp vs3, v3
; CHECK-P8-NEXT: xxmrghw v3, v4, v4
; CHECK-P8-NEXT: xvcvsxwdp vs4, v5
; CHECK-P8-NEXT: xvcvsxwdp vs5, v0
; CHECK-P8-NEXT: xvcvsxwdp vs6, v2
; CHECK-P8-NEXT: xxswapd vs0, vs0
; CHECK-P8-NEXT: xvcvsxwdp vs7, v3
; CHECK-P8-NEXT: xxswapd vs1, vs1
; CHECK-P8-NEXT: xxswapd vs2, vs2
; CHECK-P8-NEXT: xxswapd vs3, vs3
; CHECK-P8-NEXT: stxvd2x vs1, r3, r4
; CHECK-P8-NEXT: li r4, 96
; CHECK-P8-NEXT: xxswapd vs4, vs4
; CHECK-P8-NEXT: xxswapd vs1, vs5
; CHECK-P8-NEXT: stxvd2x vs3, r3, r4
; CHECK-P8-NEXT: xxswapd vs5, vs6
; CHECK-P8-NEXT: li r4, 80
; CHECK-P8-NEXT: xxswapd vs3, vs7
; CHECK-P8-NEXT: stxvd2x vs4, r3, r4
; CHECK-P8-NEXT: stxvd2x vs1, r3, r8
; CHECK-P8-NEXT: stxvd2x vs2, r3, r6
; CHECK-P8-NEXT: stxvd2x vs0, r3, r7
; CHECK-P8-NEXT: stxvd2x vs3, r3, r5
; CHECK-P8-NEXT: stxvd2x vs5, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test16elt_signed:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: lxv vs0, 16(r4)
; CHECK-P9-NEXT: lxv vs1, 0(r4)
; CHECK-P9-NEXT: lxv vs2, 48(r4)
; CHECK-P9-NEXT: lxv vs3, 32(r4)
; CHECK-P9-NEXT: xxmrglw v2, vs1, vs1
; CHECK-P9-NEXT: xxmrghw v3, vs1, vs1
; CHECK-P9-NEXT: xxmrglw v4, vs0, vs0
; CHECK-P9-NEXT: xxmrghw v5, vs0, vs0
; CHECK-P9-NEXT: xxmrglw v0, vs3, vs3
; CHECK-P9-NEXT: xxmrghw v1, vs3, vs3
; CHECK-P9-NEXT: xxmrglw v6, vs2, vs2
; CHECK-P9-NEXT: xxmrghw v7, vs2, vs2
; CHECK-P9-NEXT: xvcvsxwdp vs0, v2
; CHECK-P9-NEXT: xvcvsxwdp vs1, v3
; CHECK-P9-NEXT: xvcvsxwdp vs2, v4
; CHECK-P9-NEXT: xvcvsxwdp vs3, v5
; CHECK-P9-NEXT: xvcvsxwdp vs4, v0
; CHECK-P9-NEXT: xvcvsxwdp vs5, v1
; CHECK-P9-NEXT: xvcvsxwdp vs6, v6
; CHECK-P9-NEXT: xvcvsxwdp vs7, v7
; CHECK-P9-NEXT: stxv vs3, 48(r3)
; CHECK-P9-NEXT: stxv vs2, 32(r3)
; CHECK-P9-NEXT: stxv vs1, 16(r3)
; CHECK-P9-NEXT: stxv vs0, 0(r3)
; CHECK-P9-NEXT: stxv vs7, 112(r3)
; CHECK-P9-NEXT: stxv vs6, 96(r3)
; CHECK-P9-NEXT: stxv vs5, 80(r3)
; CHECK-P9-NEXT: stxv vs4, 64(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test16elt_signed:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: lxv vs0, 16(r4)
; CHECK-BE-NEXT: lxv vs1, 0(r4)
; CHECK-BE-NEXT: lxv vs2, 48(r4)
; CHECK-BE-NEXT: lxv vs3, 32(r4)
; CHECK-BE-NEXT: xxmrghw v2, vs1, vs1
; CHECK-BE-NEXT: xxmrglw v3, vs1, vs1
; CHECK-BE-NEXT: xxmrghw v4, vs0, vs0
; CHECK-BE-NEXT: xxmrglw v5, vs0, vs0
; CHECK-BE-NEXT: xxmrghw v0, vs3, vs3
; CHECK-BE-NEXT: xxmrglw v1, vs3, vs3
; CHECK-BE-NEXT: xxmrghw v6, vs2, vs2
; CHECK-BE-NEXT: xxmrglw v7, vs2, vs2
; CHECK-BE-NEXT: xvcvsxwdp vs0, v2
; CHECK-BE-NEXT: xvcvsxwdp vs1, v3
; CHECK-BE-NEXT: xvcvsxwdp vs2, v4
; CHECK-BE-NEXT: xvcvsxwdp vs3, v5
; CHECK-BE-NEXT: xvcvsxwdp vs4, v0
; CHECK-BE-NEXT: xvcvsxwdp vs5, v1
; CHECK-BE-NEXT: xvcvsxwdp vs6, v6
; CHECK-BE-NEXT: xvcvsxwdp vs7, v7
; CHECK-BE-NEXT: stxv vs3, 48(r3)
; CHECK-BE-NEXT: stxv vs2, 32(r3)
; CHECK-BE-NEXT: stxv vs1, 16(r3)
; CHECK-BE-NEXT: stxv vs0, 0(r3)
; CHECK-BE-NEXT: stxv vs7, 112(r3)
; CHECK-BE-NEXT: stxv vs6, 96(r3)
; CHECK-BE-NEXT: stxv vs5, 80(r3)
; CHECK-BE-NEXT: stxv vs4, 64(r3)
; CHECK-BE-NEXT: blr
entry:
%a = load <16 x i32>, <16 x i32>* %0, align 64
%1 = sitofp <16 x i32> %a to <16 x double>
store <16 x double> %1, <16 x double>* %agg.result, align 128
ret void
}

View File

@ -0,0 +1,844 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
; RUN: FileCheck %s --check-prefix=CHECK-P8
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
; RUN: FileCheck %s --check-prefix=CHECK-P9
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
; RUN: FileCheck %s --check-prefix=CHECK-BE
define i64 @test2elt(<2 x i64> %a) local_unnamed_addr #0 {
; CHECK-P8-LABEL: test2elt:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: xxswapd vs0, v2
; CHECK-P8-NEXT: xxlor vs1, v2, v2
; CHECK-P8-NEXT: xscvuxdsp f1, f1
; CHECK-P8-NEXT: xscvuxdsp f0, f0
; CHECK-P8-NEXT: xscvdpspn vs1, f1
; CHECK-P8-NEXT: xscvdpspn vs0, f0
; CHECK-P8-NEXT: xxsldwi v3, vs1, vs1, 1
; CHECK-P8-NEXT: xxsldwi v2, vs0, vs0, 1
; CHECK-P8-NEXT: vmrglw v2, v3, v2
; CHECK-P8-NEXT: xxswapd vs0, v2
; CHECK-P8-NEXT: mfvsrd r3, f0
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test2elt:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: xxswapd vs0, v2
; CHECK-P9-NEXT: xxlor vs1, v2, v2
; CHECK-P9-NEXT: xscvuxdsp f1, f1
; CHECK-P9-NEXT: xscvuxdsp f0, f0
; CHECK-P9-NEXT: xscvdpspn vs1, f1
; CHECK-P9-NEXT: xscvdpspn vs0, f0
; CHECK-P9-NEXT: xxsldwi v3, vs1, vs1, 1
; CHECK-P9-NEXT: xxsldwi v2, vs0, vs0, 1
; CHECK-P9-NEXT: vmrglw v2, v3, v2
; CHECK-P9-NEXT: mfvsrld r3, v2
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test2elt:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: xxswapd vs0, v2
; CHECK-BE-NEXT: xxlor vs1, v2, v2
; CHECK-BE-NEXT: xscvuxdsp f1, f1
; CHECK-BE-NEXT: xscvuxdsp f0, f0
; CHECK-BE-NEXT: xscvdpspn v2, f1
; CHECK-BE-NEXT: xscvdpspn v3, f0
; CHECK-BE-NEXT: vmrghw v2, v2, v3
; CHECK-BE-NEXT: mfvsrd r3, v2
; CHECK-BE-NEXT: blr
entry:
%0 = uitofp <2 x i64> %a to <2 x float>
%1 = bitcast <2 x float> %0 to i64
ret i64 %1
}
define <4 x float> @test4elt(<4 x i64>* nocapture readonly) local_unnamed_addr #1 {
; CHECK-P8-LABEL: test4elt:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: li r4, 16
; CHECK-P8-NEXT: lxvd2x vs1, 0, r3
; CHECK-P8-NEXT: lxvd2x vs0, r3, r4
; CHECK-P8-NEXT: xxswapd vs3, vs1
; CHECK-P8-NEXT: xscvuxdsp f1, f1
; CHECK-P8-NEXT: xxswapd vs2, vs0
; CHECK-P8-NEXT: xscvuxdsp f0, f0
; CHECK-P8-NEXT: xscvuxdsp f3, f3
; CHECK-P8-NEXT: xscvuxdsp f2, f2
; CHECK-P8-NEXT: xxmrghd vs0, vs0, vs1
; CHECK-P8-NEXT: xxmrghd vs1, vs2, vs3
; CHECK-P8-NEXT: xvcvdpsp v2, vs0
; CHECK-P8-NEXT: xvcvdpsp v3, vs1
; CHECK-P8-NEXT: vmrgew v2, v3, v2
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test4elt:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: lxv vs0, 16(r3)
; CHECK-P9-NEXT: lxv vs1, 0(r3)
; CHECK-P9-NEXT: xxswapd vs2, vs1
; CHECK-P9-NEXT: xxswapd vs3, vs0
; CHECK-P9-NEXT: xscvuxdsp f1, f1
; CHECK-P9-NEXT: xscvuxdsp f0, f0
; CHECK-P9-NEXT: xscvuxdsp f2, f2
; CHECK-P9-NEXT: xscvuxdsp f3, f3
; CHECK-P9-NEXT: xxmrghd vs0, vs0, vs1
; CHECK-P9-NEXT: xxmrghd vs2, vs3, vs2
; CHECK-P9-NEXT: xvcvdpsp v3, vs0
; CHECK-P9-NEXT: xvcvdpsp v2, vs2
; CHECK-P9-NEXT: vmrgew v2, v3, v2
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test4elt:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: lxv vs0, 0(r3)
; CHECK-BE-NEXT: lxv vs1, 16(r3)
; CHECK-BE-NEXT: xxswapd vs2, vs1
; CHECK-BE-NEXT: xxswapd vs3, vs0
; CHECK-BE-NEXT: xscvuxdsp f1, f1
; CHECK-BE-NEXT: xscvuxdsp f0, f0
; CHECK-BE-NEXT: xscvuxdsp f2, f2
; CHECK-BE-NEXT: xscvuxdsp f3, f3
; CHECK-BE-NEXT: xxmrghd vs0, vs0, vs1
; CHECK-BE-NEXT: xxmrghd vs1, vs3, vs2
; CHECK-BE-NEXT: xvcvdpsp v2, vs0
; CHECK-BE-NEXT: xvcvdpsp v3, vs1
; CHECK-BE-NEXT: vmrgew v2, v2, v3
; CHECK-BE-NEXT: blr
entry:
%a = load <4 x i64>, <4 x i64>* %0, align 32
%1 = uitofp <4 x i64> %a to <4 x float>
ret <4 x float> %1
}
define void @test8elt(<8 x float>* noalias nocapture sret %agg.result, <8 x i64>* nocapture readonly) local_unnamed_addr #2 {
; CHECK-P8-LABEL: test8elt:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: li r5, 32
; CHECK-P8-NEXT: li r6, 48
; CHECK-P8-NEXT: lxvd2x vs3, 0, r4
; CHECK-P8-NEXT: lxvd2x vs0, r4, r5
; CHECK-P8-NEXT: li r5, 16
; CHECK-P8-NEXT: lxvd2x vs1, r4, r6
; CHECK-P8-NEXT: lxvd2x vs2, r4, r5
; CHECK-P8-NEXT: xxswapd vs7, vs3
; CHECK-P8-NEXT: xscvuxdsp f3, f3
; CHECK-P8-NEXT: xxswapd vs4, vs0
; CHECK-P8-NEXT: xscvuxdsp f0, f0
; CHECK-P8-NEXT: xxswapd vs5, vs1
; CHECK-P8-NEXT: xscvuxdsp f1, f1
; CHECK-P8-NEXT: xxswapd vs6, vs2
; CHECK-P8-NEXT: xscvuxdsp f2, f2
; CHECK-P8-NEXT: xscvuxdsp f4, f4
; CHECK-P8-NEXT: xscvuxdsp f5, f5
; CHECK-P8-NEXT: xscvuxdsp f6, f6
; CHECK-P8-NEXT: xscvuxdsp f7, f7
; CHECK-P8-NEXT: xxmrghd vs0, vs1, vs0
; CHECK-P8-NEXT: xxmrghd vs1, vs2, vs3
; CHECK-P8-NEXT: xxmrghd vs2, vs5, vs4
; CHECK-P8-NEXT: xvcvdpsp v2, vs0
; CHECK-P8-NEXT: xvcvdpsp v3, vs1
; CHECK-P8-NEXT: xxmrghd vs0, vs6, vs7
; CHECK-P8-NEXT: xvcvdpsp v4, vs2
; CHECK-P8-NEXT: xvcvdpsp v5, vs0
; CHECK-P8-NEXT: vmrgew v2, v4, v2
; CHECK-P8-NEXT: vmrgew v3, v5, v3
; CHECK-P8-NEXT: stvx v2, r3, r5
; CHECK-P8-NEXT: stvx v3, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test8elt:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: lxv vs0, 48(r4)
; CHECK-P9-NEXT: lxv vs1, 32(r4)
; CHECK-P9-NEXT: lxv vs2, 16(r4)
; CHECK-P9-NEXT: lxv vs3, 0(r4)
; CHECK-P9-NEXT: xxswapd vs4, vs3
; CHECK-P9-NEXT: xxswapd vs5, vs2
; CHECK-P9-NEXT: xxswapd vs6, vs1
; CHECK-P9-NEXT: xxswapd vs7, vs0
; CHECK-P9-NEXT: xscvuxdsp f3, f3
; CHECK-P9-NEXT: xscvuxdsp f2, f2
; CHECK-P9-NEXT: xscvuxdsp f1, f1
; CHECK-P9-NEXT: xscvuxdsp f0, f0
; CHECK-P9-NEXT: xscvuxdsp f4, f4
; CHECK-P9-NEXT: xscvuxdsp f5, f5
; CHECK-P9-NEXT: xscvuxdsp f6, f6
; CHECK-P9-NEXT: xscvuxdsp f7, f7
; CHECK-P9-NEXT: xxmrghd vs2, vs2, vs3
; CHECK-P9-NEXT: xxmrghd vs0, vs0, vs1
; CHECK-P9-NEXT: xxmrghd vs4, vs5, vs4
; CHECK-P9-NEXT: xxmrghd vs3, vs7, vs6
; CHECK-P9-NEXT: xvcvdpsp v3, vs2
; CHECK-P9-NEXT: xvcvdpsp v5, vs0
; CHECK-P9-NEXT: xvcvdpsp v2, vs4
; CHECK-P9-NEXT: xvcvdpsp v4, vs3
; CHECK-P9-NEXT: vmrgew v2, v3, v2
; CHECK-P9-NEXT: vmrgew v3, v5, v4
; CHECK-P9-NEXT: stxv v3, 16(r3)
; CHECK-P9-NEXT: stxv v2, 0(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test8elt:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: lxv vs0, 32(r4)
; CHECK-BE-NEXT: lxv vs1, 48(r4)
; CHECK-BE-NEXT: lxv vs2, 0(r4)
; CHECK-BE-NEXT: lxv vs3, 16(r4)
; CHECK-BE-NEXT: xxswapd vs4, vs3
; CHECK-BE-NEXT: xxswapd vs5, vs2
; CHECK-BE-NEXT: xxswapd vs6, vs1
; CHECK-BE-NEXT: xxswapd vs7, vs0
; CHECK-BE-NEXT: xscvuxdsp f3, f3
; CHECK-BE-NEXT: xscvuxdsp f2, f2
; CHECK-BE-NEXT: xscvuxdsp f1, f1
; CHECK-BE-NEXT: xscvuxdsp f0, f0
; CHECK-BE-NEXT: xscvuxdsp f4, f4
; CHECK-BE-NEXT: xscvuxdsp f5, f5
; CHECK-BE-NEXT: xscvuxdsp f6, f6
; CHECK-BE-NEXT: xscvuxdsp f7, f7
; CHECK-BE-NEXT: xxmrghd vs2, vs2, vs3
; CHECK-BE-NEXT: xxmrghd vs0, vs0, vs1
; CHECK-BE-NEXT: xxmrghd vs3, vs5, vs4
; CHECK-BE-NEXT: xxmrghd vs1, vs7, vs6
; CHECK-BE-NEXT: xvcvdpsp v2, vs2
; CHECK-BE-NEXT: xvcvdpsp v4, vs0
; CHECK-BE-NEXT: xvcvdpsp v3, vs3
; CHECK-BE-NEXT: xvcvdpsp v5, vs1
; CHECK-BE-NEXT: vmrgew v2, v2, v3
; CHECK-BE-NEXT: vmrgew v3, v4, v5
; CHECK-BE-NEXT: stxv v3, 16(r3)
; CHECK-BE-NEXT: stxv v2, 0(r3)
; CHECK-BE-NEXT: blr
entry:
%a = load <8 x i64>, <8 x i64>* %0, align 64
%1 = uitofp <8 x i64> %a to <8 x float>
store <8 x float> %1, <8 x float>* %agg.result, align 32
ret void
}
define void @test16elt(<16 x float>* noalias nocapture sret %agg.result, <16 x i64>* nocapture readonly) local_unnamed_addr #2 {
; CHECK-P8-LABEL: test16elt:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: li r7, 64
; CHECK-P8-NEXT: li r5, 32
; CHECK-P8-NEXT: li r6, 48
; CHECK-P8-NEXT: lxvd2x vs11, 0, r4
; CHECK-P8-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: lxvd2x vs8, r4, r7
; CHECK-P8-NEXT: li r7, 80
; CHECK-P8-NEXT: lxvd2x vs6, r4, r5
; CHECK-P8-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: lxvd2x vs7, r4, r6
; CHECK-P8-NEXT: lxvd2x vs2, r4, r7
; CHECK-P8-NEXT: li r7, 96
; CHECK-P8-NEXT: lxvd2x vs3, r4, r7
; CHECK-P8-NEXT: li r7, 112
; CHECK-P8-NEXT: xscvuxdsp f30, f11
; CHECK-P8-NEXT: xxswapd vs11, vs11
; CHECK-P8-NEXT: lxvd2x vs4, r4, r7
; CHECK-P8-NEXT: li r7, 16
; CHECK-P8-NEXT: xscvuxdsp f0, f6
; CHECK-P8-NEXT: xxswapd vs6, vs6
; CHECK-P8-NEXT: xscvuxdsp f1, f7
; CHECK-P8-NEXT: lxvd2x vs9, r4, r7
; CHECK-P8-NEXT: xxswapd vs7, vs7
; CHECK-P8-NEXT: xscvuxdsp f5, f8
; CHECK-P8-NEXT: xxswapd vs8, vs8
; CHECK-P8-NEXT: xscvuxdsp f10, f2
; CHECK-P8-NEXT: xxswapd vs2, vs2
; CHECK-P8-NEXT: xscvuxdsp f12, f3
; CHECK-P8-NEXT: xxswapd vs3, vs3
; CHECK-P8-NEXT: xscvuxdsp f13, f4
; CHECK-P8-NEXT: xxswapd vs4, vs4
; CHECK-P8-NEXT: xscvuxdsp f31, f9
; CHECK-P8-NEXT: xxswapd vs9, vs9
; CHECK-P8-NEXT: xscvuxdsp f6, f6
; CHECK-P8-NEXT: xxmrghd vs0, vs1, vs0
; CHECK-P8-NEXT: xscvuxdsp f7, f7
; CHECK-P8-NEXT: xscvuxdsp f8, f8
; CHECK-P8-NEXT: xxmrghd vs5, vs10, vs5
; CHECK-P8-NEXT: xscvuxdsp f2, f2
; CHECK-P8-NEXT: xscvuxdsp f3, f3
; CHECK-P8-NEXT: xxmrghd vs10, vs13, vs12
; CHECK-P8-NEXT: xscvuxdsp f4, f4
; CHECK-P8-NEXT: xscvuxdsp f1, f9
; CHECK-P8-NEXT: xscvuxdsp f9, f11
; CHECK-P8-NEXT: xxmrghd vs11, vs31, vs30
; CHECK-P8-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: xvcvdpsp v2, vs0
; CHECK-P8-NEXT: xxmrghd vs0, vs7, vs6
; CHECK-P8-NEXT: xxmrghd vs2, vs2, vs8
; CHECK-P8-NEXT: xvcvdpsp v3, vs5
; CHECK-P8-NEXT: xvcvdpsp v4, vs10
; CHECK-P8-NEXT: xxmrghd vs3, vs4, vs3
; CHECK-P8-NEXT: xvcvdpsp v5, vs11
; CHECK-P8-NEXT: xvcvdpsp v0, vs0
; CHECK-P8-NEXT: xxmrghd vs1, vs1, vs9
; CHECK-P8-NEXT: xvcvdpsp v1, vs2
; CHECK-P8-NEXT: xvcvdpsp v6, vs3
; CHECK-P8-NEXT: xvcvdpsp v7, vs1
; CHECK-P8-NEXT: vmrgew v2, v0, v2
; CHECK-P8-NEXT: vmrgew v3, v1, v3
; CHECK-P8-NEXT: vmrgew v4, v6, v4
; CHECK-P8-NEXT: vmrgew v5, v7, v5
; CHECK-P8-NEXT: stvx v2, r3, r7
; CHECK-P8-NEXT: stvx v3, r3, r5
; CHECK-P8-NEXT: stvx v4, r3, r6
; CHECK-P8-NEXT: stvx v5, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test16elt:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: lxv vs4, 48(r4)
; CHECK-P9-NEXT: lxv vs5, 32(r4)
; CHECK-P9-NEXT: lxv vs6, 16(r4)
; CHECK-P9-NEXT: lxv vs7, 0(r4)
; CHECK-P9-NEXT: lxv vs8, 112(r4)
; CHECK-P9-NEXT: lxv vs9, 96(r4)
; CHECK-P9-NEXT: lxv vs10, 80(r4)
; CHECK-P9-NEXT: lxv vs11, 64(r4)
; CHECK-P9-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill
; CHECK-P9-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill
; CHECK-P9-NEXT: xxswapd vs0, vs7
; CHECK-P9-NEXT: xxswapd vs1, vs6
; CHECK-P9-NEXT: xxswapd vs2, vs5
; CHECK-P9-NEXT: xxswapd vs3, vs4
; CHECK-P9-NEXT: xxswapd vs12, vs11
; CHECK-P9-NEXT: xxswapd vs13, vs10
; CHECK-P9-NEXT: xxswapd vs31, vs9
; CHECK-P9-NEXT: xxswapd vs30, vs8
; CHECK-P9-NEXT: xscvuxdsp f7, f7
; CHECK-P9-NEXT: xscvuxdsp f6, f6
; CHECK-P9-NEXT: xscvuxdsp f5, f5
; CHECK-P9-NEXT: xscvuxdsp f4, f4
; CHECK-P9-NEXT: xscvuxdsp f11, f11
; CHECK-P9-NEXT: xscvuxdsp f10, f10
; CHECK-P9-NEXT: xscvuxdsp f9, f9
; CHECK-P9-NEXT: xscvuxdsp f8, f8
; CHECK-P9-NEXT: xscvuxdsp f0, f0
; CHECK-P9-NEXT: xscvuxdsp f1, f1
; CHECK-P9-NEXT: xscvuxdsp f2, f2
; CHECK-P9-NEXT: xscvuxdsp f3, f3
; CHECK-P9-NEXT: xscvuxdsp f12, f12
; CHECK-P9-NEXT: xscvuxdsp f13, f13
; CHECK-P9-NEXT: xscvuxdsp f31, f31
; CHECK-P9-NEXT: xscvuxdsp f30, f30
; CHECK-P9-NEXT: xxmrghd vs6, vs6, vs7
; CHECK-P9-NEXT: xxmrghd vs4, vs4, vs5
; CHECK-P9-NEXT: xxmrghd vs0, vs1, vs0
; CHECK-P9-NEXT: xxmrghd vs1, vs3, vs2
; CHECK-P9-NEXT: xxmrghd vs2, vs10, vs11
; CHECK-P9-NEXT: xxmrghd vs3, vs8, vs9
; CHECK-P9-NEXT: xxmrghd vs5, vs13, vs12
; CHECK-P9-NEXT: xxmrghd vs7, vs30, vs31
; CHECK-P9-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload
; CHECK-P9-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload
; CHECK-P9-NEXT: xvcvdpsp v2, vs6
; CHECK-P9-NEXT: xvcvdpsp v3, vs4
; CHECK-P9-NEXT: xvcvdpsp v4, vs0
; CHECK-P9-NEXT: xvcvdpsp v5, vs1
; CHECK-P9-NEXT: xvcvdpsp v0, vs5
; CHECK-P9-NEXT: xvcvdpsp v1, vs2
; CHECK-P9-NEXT: xvcvdpsp v6, vs7
; CHECK-P9-NEXT: xvcvdpsp v7, vs3
; CHECK-P9-NEXT: vmrgew v2, v2, v4
; CHECK-P9-NEXT: vmrgew v3, v3, v5
; CHECK-P9-NEXT: vmrgew v4, v1, v0
; CHECK-P9-NEXT: vmrgew v5, v7, v6
; CHECK-P9-NEXT: stxv v4, 32(r3)
; CHECK-P9-NEXT: stxv v3, 16(r3)
; CHECK-P9-NEXT: stxv v2, 0(r3)
; CHECK-P9-NEXT: stxv v5, 48(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test16elt:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: lxv vs2, 32(r4)
; CHECK-BE-NEXT: lxv vs3, 48(r4)
; CHECK-BE-NEXT: lxv vs4, 0(r4)
; CHECK-BE-NEXT: lxv vs5, 16(r4)
; CHECK-BE-NEXT: lxv vs6, 96(r4)
; CHECK-BE-NEXT: lxv vs7, 112(r4)
; CHECK-BE-NEXT: lxv vs8, 64(r4)
; CHECK-BE-NEXT: lxv vs9, 80(r4)
; CHECK-BE-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill
; CHECK-BE-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill
; CHECK-BE-NEXT: xxswapd vs0, vs5
; CHECK-BE-NEXT: xxswapd vs1, vs4
; CHECK-BE-NEXT: xxswapd vs10, vs3
; CHECK-BE-NEXT: xxswapd vs11, vs2
; CHECK-BE-NEXT: xxswapd vs12, vs9
; CHECK-BE-NEXT: xxswapd vs13, vs8
; CHECK-BE-NEXT: xxswapd vs31, vs7
; CHECK-BE-NEXT: xxswapd vs30, vs6
; CHECK-BE-NEXT: xscvuxdsp f5, f5
; CHECK-BE-NEXT: xscvuxdsp f4, f4
; CHECK-BE-NEXT: xscvuxdsp f3, f3
; CHECK-BE-NEXT: xscvuxdsp f2, f2
; CHECK-BE-NEXT: xscvuxdsp f9, f9
; CHECK-BE-NEXT: xscvuxdsp f8, f8
; CHECK-BE-NEXT: xscvuxdsp f7, f7
; CHECK-BE-NEXT: xscvuxdsp f6, f6
; CHECK-BE-NEXT: xscvuxdsp f0, f0
; CHECK-BE-NEXT: xscvuxdsp f1, f1
; CHECK-BE-NEXT: xscvuxdsp f10, f10
; CHECK-BE-NEXT: xscvuxdsp f11, f11
; CHECK-BE-NEXT: xscvuxdsp f12, f12
; CHECK-BE-NEXT: xscvuxdsp f13, f13
; CHECK-BE-NEXT: xscvuxdsp f31, f31
; CHECK-BE-NEXT: xscvuxdsp f30, f30
; CHECK-BE-NEXT: xxmrghd vs4, vs4, vs5
; CHECK-BE-NEXT: xxmrghd vs2, vs2, vs3
; CHECK-BE-NEXT: xxmrghd vs3, vs8, vs9
; CHECK-BE-NEXT: xxmrghd vs5, vs6, vs7
; CHECK-BE-NEXT: xxmrghd vs0, vs1, vs0
; CHECK-BE-NEXT: xxmrghd vs1, vs11, vs10
; CHECK-BE-NEXT: xxmrghd vs6, vs13, vs12
; CHECK-BE-NEXT: xxmrghd vs7, vs30, vs31
; CHECK-BE-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload
; CHECK-BE-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload
; CHECK-BE-NEXT: xvcvdpsp v2, vs4
; CHECK-BE-NEXT: xvcvdpsp v3, vs2
; CHECK-BE-NEXT: xvcvdpsp v0, vs3
; CHECK-BE-NEXT: xvcvdpsp v6, vs5
; CHECK-BE-NEXT: xvcvdpsp v4, vs0
; CHECK-BE-NEXT: xvcvdpsp v5, vs1
; CHECK-BE-NEXT: xvcvdpsp v1, vs6
; CHECK-BE-NEXT: xvcvdpsp v7, vs7
; CHECK-BE-NEXT: vmrgew v2, v2, v4
; CHECK-BE-NEXT: vmrgew v3, v3, v5
; CHECK-BE-NEXT: vmrgew v4, v0, v1
; CHECK-BE-NEXT: vmrgew v5, v6, v7
; CHECK-BE-NEXT: stxv v5, 48(r3)
; CHECK-BE-NEXT: stxv v4, 32(r3)
; CHECK-BE-NEXT: stxv v3, 16(r3)
; CHECK-BE-NEXT: stxv v2, 0(r3)
; CHECK-BE-NEXT: blr
entry:
%a = load <16 x i64>, <16 x i64>* %0, align 128
%1 = uitofp <16 x i64> %a to <16 x float>
store <16 x float> %1, <16 x float>* %agg.result, align 64
ret void
}
define i64 @test2elt_signed(<2 x i64> %a) local_unnamed_addr #0 {
; CHECK-P8-LABEL: test2elt_signed:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: xxswapd vs0, v2
; CHECK-P8-NEXT: xxlor vs1, v2, v2
; CHECK-P8-NEXT: xscvsxdsp f1, f1
; CHECK-P8-NEXT: xscvsxdsp f0, f0
; CHECK-P8-NEXT: xscvdpspn vs1, f1
; CHECK-P8-NEXT: xscvdpspn vs0, f0
; CHECK-P8-NEXT: xxsldwi v3, vs1, vs1, 1
; CHECK-P8-NEXT: xxsldwi v2, vs0, vs0, 1
; CHECK-P8-NEXT: vmrglw v2, v3, v2
; CHECK-P8-NEXT: xxswapd vs0, v2
; CHECK-P8-NEXT: mfvsrd r3, f0
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test2elt_signed:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: xxswapd vs0, v2
; CHECK-P9-NEXT: xxlor vs1, v2, v2
; CHECK-P9-NEXT: xscvsxdsp f1, f1
; CHECK-P9-NEXT: xscvsxdsp f0, f0
; CHECK-P9-NEXT: xscvdpspn vs1, f1
; CHECK-P9-NEXT: xscvdpspn vs0, f0
; CHECK-P9-NEXT: xxsldwi v3, vs1, vs1, 1
; CHECK-P9-NEXT: xxsldwi v2, vs0, vs0, 1
; CHECK-P9-NEXT: vmrglw v2, v3, v2
; CHECK-P9-NEXT: mfvsrld r3, v2
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test2elt_signed:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: xxswapd vs0, v2
; CHECK-BE-NEXT: xxlor vs1, v2, v2
; CHECK-BE-NEXT: xscvsxdsp f1, f1
; CHECK-BE-NEXT: xscvsxdsp f0, f0
; CHECK-BE-NEXT: xscvdpspn v2, f1
; CHECK-BE-NEXT: xscvdpspn v3, f0
; CHECK-BE-NEXT: vmrghw v2, v2, v3
; CHECK-BE-NEXT: mfvsrd r3, v2
; CHECK-BE-NEXT: blr
entry:
%0 = sitofp <2 x i64> %a to <2 x float>
%1 = bitcast <2 x float> %0 to i64
ret i64 %1
}
define <4 x float> @test4elt_signed(<4 x i64>* nocapture readonly) local_unnamed_addr #1 {
; CHECK-P8-LABEL: test4elt_signed:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: li r4, 16
; CHECK-P8-NEXT: lxvd2x vs1, 0, r3
; CHECK-P8-NEXT: lxvd2x vs0, r3, r4
; CHECK-P8-NEXT: xxswapd vs3, vs1
; CHECK-P8-NEXT: xscvsxdsp f1, f1
; CHECK-P8-NEXT: xxswapd vs2, vs0
; CHECK-P8-NEXT: xscvsxdsp f0, f0
; CHECK-P8-NEXT: xscvsxdsp f3, f3
; CHECK-P8-NEXT: xscvsxdsp f2, f2
; CHECK-P8-NEXT: xxmrghd vs0, vs0, vs1
; CHECK-P8-NEXT: xxmrghd vs1, vs2, vs3
; CHECK-P8-NEXT: xvcvdpsp v2, vs0
; CHECK-P8-NEXT: xvcvdpsp v3, vs1
; CHECK-P8-NEXT: vmrgew v2, v3, v2
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test4elt_signed:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: lxv vs0, 16(r3)
; CHECK-P9-NEXT: lxv vs1, 0(r3)
; CHECK-P9-NEXT: xxswapd vs2, vs1
; CHECK-P9-NEXT: xxswapd vs3, vs0
; CHECK-P9-NEXT: xscvsxdsp f1, f1
; CHECK-P9-NEXT: xscvsxdsp f0, f0
; CHECK-P9-NEXT: xscvsxdsp f2, f2
; CHECK-P9-NEXT: xscvsxdsp f3, f3
; CHECK-P9-NEXT: xxmrghd vs0, vs0, vs1
; CHECK-P9-NEXT: xxmrghd vs2, vs3, vs2
; CHECK-P9-NEXT: xvcvdpsp v3, vs0
; CHECK-P9-NEXT: xvcvdpsp v2, vs2
; CHECK-P9-NEXT: vmrgew v2, v3, v2
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test4elt_signed:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: lxv vs0, 0(r3)
; CHECK-BE-NEXT: lxv vs1, 16(r3)
; CHECK-BE-NEXT: xxswapd vs2, vs1
; CHECK-BE-NEXT: xxswapd vs3, vs0
; CHECK-BE-NEXT: xscvsxdsp f1, f1
; CHECK-BE-NEXT: xscvsxdsp f0, f0
; CHECK-BE-NEXT: xscvsxdsp f2, f2
; CHECK-BE-NEXT: xscvsxdsp f3, f3
; CHECK-BE-NEXT: xxmrghd vs0, vs0, vs1
; CHECK-BE-NEXT: xxmrghd vs1, vs3, vs2
; CHECK-BE-NEXT: xvcvdpsp v2, vs0
; CHECK-BE-NEXT: xvcvdpsp v3, vs1
; CHECK-BE-NEXT: vmrgew v2, v2, v3
; CHECK-BE-NEXT: blr
entry:
%a = load <4 x i64>, <4 x i64>* %0, align 32
%1 = sitofp <4 x i64> %a to <4 x float>
ret <4 x float> %1
}
define void @test8elt_signed(<8 x float>* noalias nocapture sret %agg.result, <8 x i64>* nocapture readonly) local_unnamed_addr #2 {
; CHECK-P8-LABEL: test8elt_signed:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: li r5, 32
; CHECK-P8-NEXT: li r6, 48
; CHECK-P8-NEXT: lxvd2x vs3, 0, r4
; CHECK-P8-NEXT: lxvd2x vs0, r4, r5
; CHECK-P8-NEXT: li r5, 16
; CHECK-P8-NEXT: lxvd2x vs1, r4, r6
; CHECK-P8-NEXT: lxvd2x vs2, r4, r5
; CHECK-P8-NEXT: xxswapd vs7, vs3
; CHECK-P8-NEXT: xscvsxdsp f3, f3
; CHECK-P8-NEXT: xxswapd vs4, vs0
; CHECK-P8-NEXT: xscvsxdsp f0, f0
; CHECK-P8-NEXT: xxswapd vs5, vs1
; CHECK-P8-NEXT: xscvsxdsp f1, f1
; CHECK-P8-NEXT: xxswapd vs6, vs2
; CHECK-P8-NEXT: xscvsxdsp f2, f2
; CHECK-P8-NEXT: xscvsxdsp f4, f4
; CHECK-P8-NEXT: xscvsxdsp f5, f5
; CHECK-P8-NEXT: xscvsxdsp f6, f6
; CHECK-P8-NEXT: xscvsxdsp f7, f7
; CHECK-P8-NEXT: xxmrghd vs0, vs1, vs0
; CHECK-P8-NEXT: xxmrghd vs1, vs2, vs3
; CHECK-P8-NEXT: xxmrghd vs2, vs5, vs4
; CHECK-P8-NEXT: xvcvdpsp v2, vs0
; CHECK-P8-NEXT: xvcvdpsp v3, vs1
; CHECK-P8-NEXT: xxmrghd vs0, vs6, vs7
; CHECK-P8-NEXT: xvcvdpsp v4, vs2
; CHECK-P8-NEXT: xvcvdpsp v5, vs0
; CHECK-P8-NEXT: vmrgew v2, v4, v2
; CHECK-P8-NEXT: vmrgew v3, v5, v3
; CHECK-P8-NEXT: stvx v2, r3, r5
; CHECK-P8-NEXT: stvx v3, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test8elt_signed:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: lxv vs0, 48(r4)
; CHECK-P9-NEXT: lxv vs1, 32(r4)
; CHECK-P9-NEXT: lxv vs2, 16(r4)
; CHECK-P9-NEXT: lxv vs3, 0(r4)
; CHECK-P9-NEXT: xxswapd vs4, vs3
; CHECK-P9-NEXT: xxswapd vs5, vs2
; CHECK-P9-NEXT: xxswapd vs6, vs1
; CHECK-P9-NEXT: xxswapd vs7, vs0
; CHECK-P9-NEXT: xscvsxdsp f3, f3
; CHECK-P9-NEXT: xscvsxdsp f2, f2
; CHECK-P9-NEXT: xscvsxdsp f1, f1
; CHECK-P9-NEXT: xscvsxdsp f0, f0
; CHECK-P9-NEXT: xscvsxdsp f4, f4
; CHECK-P9-NEXT: xscvsxdsp f5, f5
; CHECK-P9-NEXT: xscvsxdsp f6, f6
; CHECK-P9-NEXT: xscvsxdsp f7, f7
; CHECK-P9-NEXT: xxmrghd vs2, vs2, vs3
; CHECK-P9-NEXT: xxmrghd vs0, vs0, vs1
; CHECK-P9-NEXT: xxmrghd vs4, vs5, vs4
; CHECK-P9-NEXT: xxmrghd vs3, vs7, vs6
; CHECK-P9-NEXT: xvcvdpsp v3, vs2
; CHECK-P9-NEXT: xvcvdpsp v5, vs0
; CHECK-P9-NEXT: xvcvdpsp v2, vs4
; CHECK-P9-NEXT: xvcvdpsp v4, vs3
; CHECK-P9-NEXT: vmrgew v2, v3, v2
; CHECK-P9-NEXT: vmrgew v3, v5, v4
; CHECK-P9-NEXT: stxv v3, 16(r3)
; CHECK-P9-NEXT: stxv v2, 0(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test8elt_signed:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: lxv vs0, 32(r4)
; CHECK-BE-NEXT: lxv vs1, 48(r4)
; CHECK-BE-NEXT: lxv vs2, 0(r4)
; CHECK-BE-NEXT: lxv vs3, 16(r4)
; CHECK-BE-NEXT: xxswapd vs4, vs3
; CHECK-BE-NEXT: xxswapd vs5, vs2
; CHECK-BE-NEXT: xxswapd vs6, vs1
; CHECK-BE-NEXT: xxswapd vs7, vs0
; CHECK-BE-NEXT: xscvsxdsp f3, f3
; CHECK-BE-NEXT: xscvsxdsp f2, f2
; CHECK-BE-NEXT: xscvsxdsp f1, f1
; CHECK-BE-NEXT: xscvsxdsp f0, f0
; CHECK-BE-NEXT: xscvsxdsp f4, f4
; CHECK-BE-NEXT: xscvsxdsp f5, f5
; CHECK-BE-NEXT: xscvsxdsp f6, f6
; CHECK-BE-NEXT: xscvsxdsp f7, f7
; CHECK-BE-NEXT: xxmrghd vs2, vs2, vs3
; CHECK-BE-NEXT: xxmrghd vs0, vs0, vs1
; CHECK-BE-NEXT: xxmrghd vs3, vs5, vs4
; CHECK-BE-NEXT: xxmrghd vs1, vs7, vs6
; CHECK-BE-NEXT: xvcvdpsp v2, vs2
; CHECK-BE-NEXT: xvcvdpsp v4, vs0
; CHECK-BE-NEXT: xvcvdpsp v3, vs3
; CHECK-BE-NEXT: xvcvdpsp v5, vs1
; CHECK-BE-NEXT: vmrgew v2, v2, v3
; CHECK-BE-NEXT: vmrgew v3, v4, v5
; CHECK-BE-NEXT: stxv v3, 16(r3)
; CHECK-BE-NEXT: stxv v2, 0(r3)
; CHECK-BE-NEXT: blr
entry:
%a = load <8 x i64>, <8 x i64>* %0, align 64
%1 = sitofp <8 x i64> %a to <8 x float>
store <8 x float> %1, <8 x float>* %agg.result, align 32
ret void
}
define void @test16elt_signed(<16 x float>* noalias nocapture sret %agg.result, <16 x i64>* nocapture readonly) local_unnamed_addr #2 {
; CHECK-P8-LABEL: test16elt_signed:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: li r7, 64
; CHECK-P8-NEXT: li r5, 32
; CHECK-P8-NEXT: li r6, 48
; CHECK-P8-NEXT: lxvd2x vs11, 0, r4
; CHECK-P8-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: lxvd2x vs8, r4, r7
; CHECK-P8-NEXT: li r7, 80
; CHECK-P8-NEXT: lxvd2x vs6, r4, r5
; CHECK-P8-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: lxvd2x vs7, r4, r6
; CHECK-P8-NEXT: lxvd2x vs2, r4, r7
; CHECK-P8-NEXT: li r7, 96
; CHECK-P8-NEXT: lxvd2x vs3, r4, r7
; CHECK-P8-NEXT: li r7, 112
; CHECK-P8-NEXT: xscvsxdsp f30, f11
; CHECK-P8-NEXT: xxswapd vs11, vs11
; CHECK-P8-NEXT: lxvd2x vs4, r4, r7
; CHECK-P8-NEXT: li r7, 16
; CHECK-P8-NEXT: xscvsxdsp f0, f6
; CHECK-P8-NEXT: xxswapd vs6, vs6
; CHECK-P8-NEXT: xscvsxdsp f1, f7
; CHECK-P8-NEXT: lxvd2x vs9, r4, r7
; CHECK-P8-NEXT: xxswapd vs7, vs7
; CHECK-P8-NEXT: xscvsxdsp f5, f8
; CHECK-P8-NEXT: xxswapd vs8, vs8
; CHECK-P8-NEXT: xscvsxdsp f10, f2
; CHECK-P8-NEXT: xxswapd vs2, vs2
; CHECK-P8-NEXT: xscvsxdsp f12, f3
; CHECK-P8-NEXT: xxswapd vs3, vs3
; CHECK-P8-NEXT: xscvsxdsp f13, f4
; CHECK-P8-NEXT: xxswapd vs4, vs4
; CHECK-P8-NEXT: xscvsxdsp f31, f9
; CHECK-P8-NEXT: xxswapd vs9, vs9
; CHECK-P8-NEXT: xscvsxdsp f6, f6
; CHECK-P8-NEXT: xxmrghd vs0, vs1, vs0
; CHECK-P8-NEXT: xscvsxdsp f7, f7
; CHECK-P8-NEXT: xscvsxdsp f8, f8
; CHECK-P8-NEXT: xxmrghd vs5, vs10, vs5
; CHECK-P8-NEXT: xscvsxdsp f2, f2
; CHECK-P8-NEXT: xscvsxdsp f3, f3
; CHECK-P8-NEXT: xxmrghd vs10, vs13, vs12
; CHECK-P8-NEXT: xscvsxdsp f4, f4
; CHECK-P8-NEXT: xscvsxdsp f1, f9
; CHECK-P8-NEXT: xscvsxdsp f9, f11
; CHECK-P8-NEXT: xxmrghd vs11, vs31, vs30
; CHECK-P8-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: xvcvdpsp v2, vs0
; CHECK-P8-NEXT: xxmrghd vs0, vs7, vs6
; CHECK-P8-NEXT: xxmrghd vs2, vs2, vs8
; CHECK-P8-NEXT: xvcvdpsp v3, vs5
; CHECK-P8-NEXT: xvcvdpsp v4, vs10
; CHECK-P8-NEXT: xxmrghd vs3, vs4, vs3
; CHECK-P8-NEXT: xvcvdpsp v5, vs11
; CHECK-P8-NEXT: xvcvdpsp v0, vs0
; CHECK-P8-NEXT: xxmrghd vs1, vs1, vs9
; CHECK-P8-NEXT: xvcvdpsp v1, vs2
; CHECK-P8-NEXT: xvcvdpsp v6, vs3
; CHECK-P8-NEXT: xvcvdpsp v7, vs1
; CHECK-P8-NEXT: vmrgew v2, v0, v2
; CHECK-P8-NEXT: vmrgew v3, v1, v3
; CHECK-P8-NEXT: vmrgew v4, v6, v4
; CHECK-P8-NEXT: vmrgew v5, v7, v5
; CHECK-P8-NEXT: stvx v2, r3, r7
; CHECK-P8-NEXT: stvx v3, r3, r5
; CHECK-P8-NEXT: stvx v4, r3, r6
; CHECK-P8-NEXT: stvx v5, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test16elt_signed:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: lxv vs4, 48(r4)
; CHECK-P9-NEXT: lxv vs5, 32(r4)
; CHECK-P9-NEXT: lxv vs6, 16(r4)
; CHECK-P9-NEXT: lxv vs7, 0(r4)
; CHECK-P9-NEXT: lxv vs8, 112(r4)
; CHECK-P9-NEXT: lxv vs9, 96(r4)
; CHECK-P9-NEXT: lxv vs10, 80(r4)
; CHECK-P9-NEXT: lxv vs11, 64(r4)
; CHECK-P9-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill
; CHECK-P9-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill
; CHECK-P9-NEXT: xxswapd vs0, vs7
; CHECK-P9-NEXT: xxswapd vs1, vs6
; CHECK-P9-NEXT: xxswapd vs2, vs5
; CHECK-P9-NEXT: xxswapd vs3, vs4
; CHECK-P9-NEXT: xxswapd vs12, vs11
; CHECK-P9-NEXT: xxswapd vs13, vs10
; CHECK-P9-NEXT: xxswapd vs31, vs9
; CHECK-P9-NEXT: xxswapd vs30, vs8
; CHECK-P9-NEXT: xscvsxdsp f7, f7
; CHECK-P9-NEXT: xscvsxdsp f6, f6
; CHECK-P9-NEXT: xscvsxdsp f5, f5
; CHECK-P9-NEXT: xscvsxdsp f4, f4
; CHECK-P9-NEXT: xscvsxdsp f11, f11
; CHECK-P9-NEXT: xscvsxdsp f10, f10
; CHECK-P9-NEXT: xscvsxdsp f9, f9
; CHECK-P9-NEXT: xscvsxdsp f8, f8
; CHECK-P9-NEXT: xscvsxdsp f0, f0
; CHECK-P9-NEXT: xscvsxdsp f1, f1
; CHECK-P9-NEXT: xscvsxdsp f2, f2
; CHECK-P9-NEXT: xscvsxdsp f3, f3
; CHECK-P9-NEXT: xscvsxdsp f12, f12
; CHECK-P9-NEXT: xscvsxdsp f13, f13
; CHECK-P9-NEXT: xscvsxdsp f31, f31
; CHECK-P9-NEXT: xscvsxdsp f30, f30
; CHECK-P9-NEXT: xxmrghd vs6, vs6, vs7
; CHECK-P9-NEXT: xxmrghd vs4, vs4, vs5
; CHECK-P9-NEXT: xxmrghd vs0, vs1, vs0
; CHECK-P9-NEXT: xxmrghd vs1, vs3, vs2
; CHECK-P9-NEXT: xxmrghd vs2, vs10, vs11
; CHECK-P9-NEXT: xxmrghd vs3, vs8, vs9
; CHECK-P9-NEXT: xxmrghd vs5, vs13, vs12
; CHECK-P9-NEXT: xxmrghd vs7, vs30, vs31
; CHECK-P9-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload
; CHECK-P9-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload
; CHECK-P9-NEXT: xvcvdpsp v2, vs6
; CHECK-P9-NEXT: xvcvdpsp v3, vs4
; CHECK-P9-NEXT: xvcvdpsp v4, vs0
; CHECK-P9-NEXT: xvcvdpsp v5, vs1
; CHECK-P9-NEXT: xvcvdpsp v0, vs5
; CHECK-P9-NEXT: xvcvdpsp v1, vs2
; CHECK-P9-NEXT: xvcvdpsp v6, vs7
; CHECK-P9-NEXT: xvcvdpsp v7, vs3
; CHECK-P9-NEXT: vmrgew v2, v2, v4
; CHECK-P9-NEXT: vmrgew v3, v3, v5
; CHECK-P9-NEXT: vmrgew v4, v1, v0
; CHECK-P9-NEXT: vmrgew v5, v7, v6
; CHECK-P9-NEXT: stxv v4, 32(r3)
; CHECK-P9-NEXT: stxv v3, 16(r3)
; CHECK-P9-NEXT: stxv v2, 0(r3)
; CHECK-P9-NEXT: stxv v5, 48(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test16elt_signed:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: lxv vs2, 32(r4)
; CHECK-BE-NEXT: lxv vs3, 48(r4)
; CHECK-BE-NEXT: lxv vs4, 0(r4)
; CHECK-BE-NEXT: lxv vs5, 16(r4)
; CHECK-BE-NEXT: lxv vs6, 96(r4)
; CHECK-BE-NEXT: lxv vs7, 112(r4)
; CHECK-BE-NEXT: lxv vs8, 64(r4)
; CHECK-BE-NEXT: lxv vs9, 80(r4)
; CHECK-BE-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill
; CHECK-BE-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill
; CHECK-BE-NEXT: xxswapd vs0, vs5
; CHECK-BE-NEXT: xxswapd vs1, vs4
; CHECK-BE-NEXT: xxswapd vs10, vs3
; CHECK-BE-NEXT: xxswapd vs11, vs2
; CHECK-BE-NEXT: xxswapd vs12, vs9
; CHECK-BE-NEXT: xxswapd vs13, vs8
; CHECK-BE-NEXT: xxswapd vs31, vs7
; CHECK-BE-NEXT: xxswapd vs30, vs6
; CHECK-BE-NEXT: xscvsxdsp f5, f5
; CHECK-BE-NEXT: xscvsxdsp f4, f4
; CHECK-BE-NEXT: xscvsxdsp f3, f3
; CHECK-BE-NEXT: xscvsxdsp f2, f2
; CHECK-BE-NEXT: xscvsxdsp f9, f9
; CHECK-BE-NEXT: xscvsxdsp f8, f8
; CHECK-BE-NEXT: xscvsxdsp f7, f7
; CHECK-BE-NEXT: xscvsxdsp f6, f6
; CHECK-BE-NEXT: xscvsxdsp f0, f0
; CHECK-BE-NEXT: xscvsxdsp f1, f1
; CHECK-BE-NEXT: xscvsxdsp f10, f10
; CHECK-BE-NEXT: xscvsxdsp f11, f11
; CHECK-BE-NEXT: xscvsxdsp f12, f12
; CHECK-BE-NEXT: xscvsxdsp f13, f13
; CHECK-BE-NEXT: xscvsxdsp f31, f31
; CHECK-BE-NEXT: xscvsxdsp f30, f30
; CHECK-BE-NEXT: xxmrghd vs4, vs4, vs5
; CHECK-BE-NEXT: xxmrghd vs2, vs2, vs3
; CHECK-BE-NEXT: xxmrghd vs3, vs8, vs9
; CHECK-BE-NEXT: xxmrghd vs5, vs6, vs7
; CHECK-BE-NEXT: xxmrghd vs0, vs1, vs0
; CHECK-BE-NEXT: xxmrghd vs1, vs11, vs10
; CHECK-BE-NEXT: xxmrghd vs6, vs13, vs12
; CHECK-BE-NEXT: xxmrghd vs7, vs30, vs31
; CHECK-BE-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload
; CHECK-BE-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload
; CHECK-BE-NEXT: xvcvdpsp v2, vs4
; CHECK-BE-NEXT: xvcvdpsp v3, vs2
; CHECK-BE-NEXT: xvcvdpsp v0, vs3
; CHECK-BE-NEXT: xvcvdpsp v6, vs5
; CHECK-BE-NEXT: xvcvdpsp v4, vs0
; CHECK-BE-NEXT: xvcvdpsp v5, vs1
; CHECK-BE-NEXT: xvcvdpsp v1, vs6
; CHECK-BE-NEXT: xvcvdpsp v7, vs7
; CHECK-BE-NEXT: vmrgew v2, v2, v4
; CHECK-BE-NEXT: vmrgew v3, v3, v5
; CHECK-BE-NEXT: vmrgew v4, v0, v1
; CHECK-BE-NEXT: vmrgew v5, v6, v7
; CHECK-BE-NEXT: stxv v5, 48(r3)
; CHECK-BE-NEXT: stxv v4, 32(r3)
; CHECK-BE-NEXT: stxv v3, 16(r3)
; CHECK-BE-NEXT: stxv v2, 0(r3)
; CHECK-BE-NEXT: blr
entry:
%a = load <16 x i64>, <16 x i64>* %0, align 128
%1 = sitofp <16 x i64> %a to <16 x float>
store <16 x float> %1, <16 x float>* %agg.result, align 64
ret void
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,304 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
; RUN: FileCheck %s --check-prefix=CHECK-P8
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
; RUN: FileCheck %s --check-prefix=CHECK-P9
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
; RUN: FileCheck %s --check-prefix=CHECK-BE
define i64 @test2elt(i64 %a.coerce) local_unnamed_addr #0 {
; CHECK-P8-LABEL: test2elt:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: mtvsrd f0, r3
; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: xvcvuxwsp vs0, v2
; CHECK-P8-NEXT: xxswapd vs0, vs0
; CHECK-P8-NEXT: mfvsrd r3, f0
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test2elt:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: mtvsrd f0, r3
; CHECK-P9-NEXT: xxswapd v2, vs0
; CHECK-P9-NEXT: xvcvuxwsp vs0, v2
; CHECK-P9-NEXT: mfvsrld r3, vs0
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test2elt:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: mtvsrd f0, r3
; CHECK-BE-NEXT: xvcvuxwsp vs0, vs0
; CHECK-BE-NEXT: mfvsrd r3, f0
; CHECK-BE-NEXT: blr
entry:
%0 = bitcast i64 %a.coerce to <2 x i32>
%1 = uitofp <2 x i32> %0 to <2 x float>
%2 = bitcast <2 x float> %1 to i64
ret i64 %2
}
define <4 x float> @test4elt(<4 x i32> %a) local_unnamed_addr #1 {
; CHECK-P8-LABEL: test4elt:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: xvcvuxwsp v2, v2
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test4elt:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: xvcvuxwsp v2, v2
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test4elt:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: xvcvuxwsp v2, v2
; CHECK-BE-NEXT: blr
entry:
%0 = uitofp <4 x i32> %a to <4 x float>
ret <4 x float> %0
}
define void @test8elt(<8 x float>* noalias nocapture sret %agg.result, <8 x i32>* nocapture readonly) local_unnamed_addr #2 {
; CHECK-P8-LABEL: test8elt:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: li r5, 16
; CHECK-P8-NEXT: lvx v3, 0, r4
; CHECK-P8-NEXT: lvx v2, r4, r5
; CHECK-P8-NEXT: xvcvuxwsp v3, v3
; CHECK-P8-NEXT: xvcvuxwsp v2, v2
; CHECK-P8-NEXT: stvx v3, 0, r3
; CHECK-P8-NEXT: stvx v2, r3, r5
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test8elt:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: lxv vs0, 16(r4)
; CHECK-P9-NEXT: lxv vs1, 0(r4)
; CHECK-P9-NEXT: xvcvuxwsp vs1, vs1
; CHECK-P9-NEXT: xvcvuxwsp vs0, vs0
; CHECK-P9-NEXT: stxv vs0, 16(r3)
; CHECK-P9-NEXT: stxv vs1, 0(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test8elt:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: lxv vs0, 16(r4)
; CHECK-BE-NEXT: lxv vs1, 0(r4)
; CHECK-BE-NEXT: xvcvuxwsp vs1, vs1
; CHECK-BE-NEXT: xvcvuxwsp vs0, vs0
; CHECK-BE-NEXT: stxv vs0, 16(r3)
; CHECK-BE-NEXT: stxv vs1, 0(r3)
; CHECK-BE-NEXT: blr
entry:
%a = load <8 x i32>, <8 x i32>* %0, align 32
%1 = uitofp <8 x i32> %a to <8 x float>
store <8 x float> %1, <8 x float>* %agg.result, align 32
ret void
}
define void @test16elt(<16 x float>* noalias nocapture sret %agg.result, <16 x i32>* nocapture readonly) local_unnamed_addr #2 {
; CHECK-P8-LABEL: test16elt:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: li r5, 16
; CHECK-P8-NEXT: li r6, 32
; CHECK-P8-NEXT: li r7, 48
; CHECK-P8-NEXT: lvx v5, 0, r4
; CHECK-P8-NEXT: lvx v2, r4, r5
; CHECK-P8-NEXT: lvx v3, r4, r6
; CHECK-P8-NEXT: lvx v4, r4, r7
; CHECK-P8-NEXT: xvcvuxwsp v5, v5
; CHECK-P8-NEXT: xvcvuxwsp v2, v2
; CHECK-P8-NEXT: xvcvuxwsp v3, v3
; CHECK-P8-NEXT: xvcvuxwsp v4, v4
; CHECK-P8-NEXT: stvx v5, 0, r3
; CHECK-P8-NEXT: stvx v2, r3, r5
; CHECK-P8-NEXT: stvx v3, r3, r6
; CHECK-P8-NEXT: stvx v4, r3, r7
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test16elt:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: lxv vs0, 48(r4)
; CHECK-P9-NEXT: lxv vs1, 32(r4)
; CHECK-P9-NEXT: lxv vs2, 16(r4)
; CHECK-P9-NEXT: lxv vs3, 0(r4)
; CHECK-P9-NEXT: xvcvuxwsp vs3, vs3
; CHECK-P9-NEXT: xvcvuxwsp vs2, vs2
; CHECK-P9-NEXT: xvcvuxwsp vs1, vs1
; CHECK-P9-NEXT: xvcvuxwsp vs0, vs0
; CHECK-P9-NEXT: stxv vs0, 48(r3)
; CHECK-P9-NEXT: stxv vs1, 32(r3)
; CHECK-P9-NEXT: stxv vs2, 16(r3)
; CHECK-P9-NEXT: stxv vs3, 0(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test16elt:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: lxv vs0, 48(r4)
; CHECK-BE-NEXT: lxv vs1, 32(r4)
; CHECK-BE-NEXT: lxv vs2, 16(r4)
; CHECK-BE-NEXT: lxv vs3, 0(r4)
; CHECK-BE-NEXT: xvcvuxwsp vs3, vs3
; CHECK-BE-NEXT: xvcvuxwsp vs2, vs2
; CHECK-BE-NEXT: xvcvuxwsp vs1, vs1
; CHECK-BE-NEXT: xvcvuxwsp vs0, vs0
; CHECK-BE-NEXT: stxv vs0, 48(r3)
; CHECK-BE-NEXT: stxv vs1, 32(r3)
; CHECK-BE-NEXT: stxv vs2, 16(r3)
; CHECK-BE-NEXT: stxv vs3, 0(r3)
; CHECK-BE-NEXT: blr
entry:
%a = load <16 x i32>, <16 x i32>* %0, align 64
%1 = uitofp <16 x i32> %a to <16 x float>
store <16 x float> %1, <16 x float>* %agg.result, align 64
ret void
}
define i64 @test2elt_signed(i64 %a.coerce) local_unnamed_addr #0 {
; CHECK-P8-LABEL: test2elt_signed:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: mtvsrd f0, r3
; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: xvcvsxwsp vs0, v2
; CHECK-P8-NEXT: xxswapd vs0, vs0
; CHECK-P8-NEXT: mfvsrd r3, f0
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test2elt_signed:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: mtvsrd f0, r3
; CHECK-P9-NEXT: xxswapd v2, vs0
; CHECK-P9-NEXT: xvcvsxwsp vs0, v2
; CHECK-P9-NEXT: mfvsrld r3, vs0
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test2elt_signed:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: mtvsrd f0, r3
; CHECK-BE-NEXT: xvcvsxwsp vs0, vs0
; CHECK-BE-NEXT: mfvsrd r3, f0
; CHECK-BE-NEXT: blr
entry:
%0 = bitcast i64 %a.coerce to <2 x i32>
%1 = sitofp <2 x i32> %0 to <2 x float>
%2 = bitcast <2 x float> %1 to i64
ret i64 %2
}
define <4 x float> @test4elt_signed(<4 x i32> %a) local_unnamed_addr #1 {
; CHECK-P8-LABEL: test4elt_signed:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: xvcvsxwsp v2, v2
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test4elt_signed:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: xvcvsxwsp v2, v2
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test4elt_signed:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: xvcvsxwsp v2, v2
; CHECK-BE-NEXT: blr
entry:
%0 = sitofp <4 x i32> %a to <4 x float>
ret <4 x float> %0
}
define void @test8elt_signed(<8 x float>* noalias nocapture sret %agg.result, <8 x i32>* nocapture readonly) local_unnamed_addr #2 {
; CHECK-P8-LABEL: test8elt_signed:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: li r5, 16
; CHECK-P8-NEXT: lvx v3, 0, r4
; CHECK-P8-NEXT: lvx v2, r4, r5
; CHECK-P8-NEXT: xvcvsxwsp v3, v3
; CHECK-P8-NEXT: xvcvsxwsp v2, v2
; CHECK-P8-NEXT: stvx v3, 0, r3
; CHECK-P8-NEXT: stvx v2, r3, r5
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test8elt_signed:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: lxv vs0, 16(r4)
; CHECK-P9-NEXT: lxv vs1, 0(r4)
; CHECK-P9-NEXT: xvcvsxwsp vs1, vs1
; CHECK-P9-NEXT: xvcvsxwsp vs0, vs0
; CHECK-P9-NEXT: stxv vs0, 16(r3)
; CHECK-P9-NEXT: stxv vs1, 0(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test8elt_signed:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: lxv vs0, 16(r4)
; CHECK-BE-NEXT: lxv vs1, 0(r4)
; CHECK-BE-NEXT: xvcvsxwsp vs1, vs1
; CHECK-BE-NEXT: xvcvsxwsp vs0, vs0
; CHECK-BE-NEXT: stxv vs0, 16(r3)
; CHECK-BE-NEXT: stxv vs1, 0(r3)
; CHECK-BE-NEXT: blr
entry:
%a = load <8 x i32>, <8 x i32>* %0, align 32
%1 = sitofp <8 x i32> %a to <8 x float>
store <8 x float> %1, <8 x float>* %agg.result, align 32
ret void
}
define void @test16elt_signed(<16 x float>* noalias nocapture sret %agg.result, <16 x i32>* nocapture readonly) local_unnamed_addr #2 {
; CHECK-P8-LABEL: test16elt_signed:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: li r5, 16
; CHECK-P8-NEXT: li r6, 32
; CHECK-P8-NEXT: li r7, 48
; CHECK-P8-NEXT: lvx v5, 0, r4
; CHECK-P8-NEXT: lvx v2, r4, r5
; CHECK-P8-NEXT: lvx v3, r4, r6
; CHECK-P8-NEXT: lvx v4, r4, r7
; CHECK-P8-NEXT: xvcvsxwsp v5, v5
; CHECK-P8-NEXT: xvcvsxwsp v2, v2
; CHECK-P8-NEXT: xvcvsxwsp v3, v3
; CHECK-P8-NEXT: xvcvsxwsp v4, v4
; CHECK-P8-NEXT: stvx v5, 0, r3
; CHECK-P8-NEXT: stvx v2, r3, r5
; CHECK-P8-NEXT: stvx v3, r3, r6
; CHECK-P8-NEXT: stvx v4, r3, r7
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test16elt_signed:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: lxv vs0, 48(r4)
; CHECK-P9-NEXT: lxv vs1, 32(r4)
; CHECK-P9-NEXT: lxv vs2, 16(r4)
; CHECK-P9-NEXT: lxv vs3, 0(r4)
; CHECK-P9-NEXT: xvcvsxwsp vs3, vs3
; CHECK-P9-NEXT: xvcvsxwsp vs2, vs2
; CHECK-P9-NEXT: xvcvsxwsp vs1, vs1
; CHECK-P9-NEXT: xvcvsxwsp vs0, vs0
; CHECK-P9-NEXT: stxv vs0, 48(r3)
; CHECK-P9-NEXT: stxv vs1, 32(r3)
; CHECK-P9-NEXT: stxv vs2, 16(r3)
; CHECK-P9-NEXT: stxv vs3, 0(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test16elt_signed:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: lxv vs0, 48(r4)
; CHECK-BE-NEXT: lxv vs1, 32(r4)
; CHECK-BE-NEXT: lxv vs2, 16(r4)
; CHECK-BE-NEXT: lxv vs3, 0(r4)
; CHECK-BE-NEXT: xvcvsxwsp vs3, vs3
; CHECK-BE-NEXT: xvcvsxwsp vs2, vs2
; CHECK-BE-NEXT: xvcvsxwsp vs1, vs1
; CHECK-BE-NEXT: xvcvsxwsp vs0, vs0
; CHECK-BE-NEXT: stxv vs0, 48(r3)
; CHECK-BE-NEXT: stxv vs1, 32(r3)
; CHECK-BE-NEXT: stxv vs2, 16(r3)
; CHECK-BE-NEXT: stxv vs3, 0(r3)
; CHECK-BE-NEXT: blr
entry:
%a = load <16 x i32>, <16 x i32>* %0, align 64
%1 = sitofp <16 x i32> %a to <16 x float>
store <16 x float> %1, <16 x float>* %agg.result, align 64
ret void
}

View File

@ -0,0 +1,438 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
; RUN: FileCheck %s --check-prefix=CHECK-P8
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
; RUN: FileCheck %s --check-prefix=CHECK-P9
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
; RUN: FileCheck %s --check-prefix=CHECK-BE
define <2 x double> @test2elt(<2 x i64> %a) local_unnamed_addr #0 {
; CHECK-P8-LABEL: test2elt:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: xvcvuxddp v2, v2
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test2elt:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: xvcvuxddp v2, v2
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test2elt:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: xvcvuxddp v2, v2
; CHECK-BE-NEXT: blr
entry:
%0 = uitofp <2 x i64> %a to <2 x double>
ret <2 x double> %0
}
define void @test4elt(<4 x double>* noalias nocapture sret %agg.result, <4 x i64>* nocapture readonly) local_unnamed_addr #1 {
; CHECK-P8-LABEL: test4elt:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: li r5, 16
; CHECK-P8-NEXT: lxvd2x vs1, 0, r4
; CHECK-P8-NEXT: lxvd2x vs0, r4, r5
; CHECK-P8-NEXT: xvcvuxddp vs1, vs1
; CHECK-P8-NEXT: xvcvuxddp vs0, vs0
; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
; CHECK-P8-NEXT: stxvd2x vs1, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test4elt:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: lxv v2, 16(r4)
; CHECK-P9-NEXT: lxv v3, 0(r4)
; CHECK-P9-NEXT: xvcvuxddp vs0, v3
; CHECK-P9-NEXT: xvcvuxddp vs1, v2
; CHECK-P9-NEXT: stxv vs1, 16(r3)
; CHECK-P9-NEXT: stxv vs0, 0(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test4elt:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: lxv v2, 16(r4)
; CHECK-BE-NEXT: lxv v3, 0(r4)
; CHECK-BE-NEXT: xvcvuxddp vs0, v3
; CHECK-BE-NEXT: xvcvuxddp vs1, v2
; CHECK-BE-NEXT: stxv vs1, 16(r3)
; CHECK-BE-NEXT: stxv vs0, 0(r3)
; CHECK-BE-NEXT: blr
entry:
%a = load <4 x i64>, <4 x i64>* %0, align 32
%1 = uitofp <4 x i64> %a to <4 x double>
store <4 x double> %1, <4 x double>* %agg.result, align 32
ret void
}
define void @test8elt(<8 x double>* noalias nocapture sret %agg.result, <8 x i64>* nocapture readonly) local_unnamed_addr #1 {
; CHECK-P8-LABEL: test8elt:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: li r5, 16
; CHECK-P8-NEXT: li r6, 32
; CHECK-P8-NEXT: li r7, 48
; CHECK-P8-NEXT: lxvd2x vs3, 0, r4
; CHECK-P8-NEXT: lxvd2x vs0, r4, r5
; CHECK-P8-NEXT: lxvd2x vs1, r4, r6
; CHECK-P8-NEXT: lxvd2x vs2, r4, r7
; CHECK-P8-NEXT: xvcvuxddp vs3, vs3
; CHECK-P8-NEXT: xvcvuxddp vs0, vs0
; CHECK-P8-NEXT: xvcvuxddp vs1, vs1
; CHECK-P8-NEXT: xvcvuxddp vs2, vs2
; CHECK-P8-NEXT: stxvd2x vs2, r3, r7
; CHECK-P8-NEXT: stxvd2x vs1, r3, r6
; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
; CHECK-P8-NEXT: stxvd2x vs3, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test8elt:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: lxv v2, 48(r4)
; CHECK-P9-NEXT: lxv v3, 32(r4)
; CHECK-P9-NEXT: lxv v4, 16(r4)
; CHECK-P9-NEXT: lxv v5, 0(r4)
; CHECK-P9-NEXT: xvcvuxddp vs0, v5
; CHECK-P9-NEXT: xvcvuxddp vs1, v4
; CHECK-P9-NEXT: xvcvuxddp vs2, v3
; CHECK-P9-NEXT: xvcvuxddp vs3, v2
; CHECK-P9-NEXT: stxv vs3, 48(r3)
; CHECK-P9-NEXT: stxv vs2, 32(r3)
; CHECK-P9-NEXT: stxv vs1, 16(r3)
; CHECK-P9-NEXT: stxv vs0, 0(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test8elt:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: lxv v2, 48(r4)
; CHECK-BE-NEXT: lxv v3, 32(r4)
; CHECK-BE-NEXT: lxv v4, 16(r4)
; CHECK-BE-NEXT: lxv v5, 0(r4)
; CHECK-BE-NEXT: xvcvuxddp vs0, v5
; CHECK-BE-NEXT: xvcvuxddp vs1, v4
; CHECK-BE-NEXT: xvcvuxddp vs2, v3
; CHECK-BE-NEXT: xvcvuxddp vs3, v2
; CHECK-BE-NEXT: stxv vs3, 48(r3)
; CHECK-BE-NEXT: stxv vs2, 32(r3)
; CHECK-BE-NEXT: stxv vs1, 16(r3)
; CHECK-BE-NEXT: stxv vs0, 0(r3)
; CHECK-BE-NEXT: blr
entry:
%a = load <8 x i64>, <8 x i64>* %0, align 64
%1 = uitofp <8 x i64> %a to <8 x double>
store <8 x double> %1, <8 x double>* %agg.result, align 64
ret void
}
define void @test16elt(<16 x double>* noalias nocapture sret %agg.result, <16 x i64>* nocapture readonly) local_unnamed_addr #1 {
; CHECK-P8-LABEL: test16elt:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: li r5, 16
; CHECK-P8-NEXT: li r6, 32
; CHECK-P8-NEXT: li r7, 64
; CHECK-P8-NEXT: li r8, 96
; CHECK-P8-NEXT: li r9, 112
; CHECK-P8-NEXT: li r10, 80
; CHECK-P8-NEXT: li r11, 48
; CHECK-P8-NEXT: lxvd2x vs0, r4, r5
; CHECK-P8-NEXT: lxvd2x vs1, r4, r6
; CHECK-P8-NEXT: lxvd2x vs2, r4, r7
; CHECK-P8-NEXT: lxvd2x vs3, r4, r8
; CHECK-P8-NEXT: lxvd2x vs4, r4, r9
; CHECK-P8-NEXT: lxvd2x vs5, r4, r10
; CHECK-P8-NEXT: lxvd2x vs6, r4, r11
; CHECK-P8-NEXT: lxvd2x vs7, 0, r4
; CHECK-P8-NEXT: xvcvuxddp vs0, vs0
; CHECK-P8-NEXT: xvcvuxddp vs1, vs1
; CHECK-P8-NEXT: xvcvuxddp vs2, vs2
; CHECK-P8-NEXT: xvcvuxddp vs3, vs3
; CHECK-P8-NEXT: xvcvuxddp vs4, vs4
; CHECK-P8-NEXT: xvcvuxddp vs5, vs5
; CHECK-P8-NEXT: xvcvuxddp vs6, vs6
; CHECK-P8-NEXT: xvcvuxddp vs7, vs7
; CHECK-P8-NEXT: stxvd2x vs4, r3, r9
; CHECK-P8-NEXT: stxvd2x vs3, r3, r8
; CHECK-P8-NEXT: stxvd2x vs5, r3, r10
; CHECK-P8-NEXT: stxvd2x vs2, r3, r7
; CHECK-P8-NEXT: stxvd2x vs6, r3, r11
; CHECK-P8-NEXT: stxvd2x vs1, r3, r6
; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
; CHECK-P8-NEXT: stxvd2x vs7, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test16elt:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: lxv v2, 48(r4)
; CHECK-P9-NEXT: lxv v3, 32(r4)
; CHECK-P9-NEXT: lxv v4, 16(r4)
; CHECK-P9-NEXT: lxv v5, 0(r4)
; CHECK-P9-NEXT: lxv v0, 112(r4)
; CHECK-P9-NEXT: lxv v1, 96(r4)
; CHECK-P9-NEXT: lxv v6, 80(r4)
; CHECK-P9-NEXT: lxv v7, 64(r4)
; CHECK-P9-NEXT: xvcvuxddp vs0, v5
; CHECK-P9-NEXT: xvcvuxddp vs1, v4
; CHECK-P9-NEXT: xvcvuxddp vs2, v3
; CHECK-P9-NEXT: xvcvuxddp vs3, v2
; CHECK-P9-NEXT: xvcvuxddp vs4, v7
; CHECK-P9-NEXT: xvcvuxddp vs5, v6
; CHECK-P9-NEXT: xvcvuxddp vs6, v1
; CHECK-P9-NEXT: xvcvuxddp vs7, v0
; CHECK-P9-NEXT: stxv vs3, 48(r3)
; CHECK-P9-NEXT: stxv vs2, 32(r3)
; CHECK-P9-NEXT: stxv vs1, 16(r3)
; CHECK-P9-NEXT: stxv vs0, 0(r3)
; CHECK-P9-NEXT: stxv vs7, 112(r3)
; CHECK-P9-NEXT: stxv vs6, 96(r3)
; CHECK-P9-NEXT: stxv vs5, 80(r3)
; CHECK-P9-NEXT: stxv vs4, 64(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test16elt:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: lxv v2, 48(r4)
; CHECK-BE-NEXT: lxv v3, 32(r4)
; CHECK-BE-NEXT: lxv v4, 16(r4)
; CHECK-BE-NEXT: lxv v5, 0(r4)
; CHECK-BE-NEXT: lxv v0, 112(r4)
; CHECK-BE-NEXT: lxv v1, 96(r4)
; CHECK-BE-NEXT: lxv v6, 80(r4)
; CHECK-BE-NEXT: lxv v7, 64(r4)
; CHECK-BE-NEXT: xvcvuxddp vs0, v5
; CHECK-BE-NEXT: xvcvuxddp vs1, v4
; CHECK-BE-NEXT: xvcvuxddp vs2, v3
; CHECK-BE-NEXT: xvcvuxddp vs3, v2
; CHECK-BE-NEXT: xvcvuxddp vs4, v7
; CHECK-BE-NEXT: xvcvuxddp vs5, v6
; CHECK-BE-NEXT: xvcvuxddp vs6, v1
; CHECK-BE-NEXT: xvcvuxddp vs7, v0
; CHECK-BE-NEXT: stxv vs3, 48(r3)
; CHECK-BE-NEXT: stxv vs2, 32(r3)
; CHECK-BE-NEXT: stxv vs1, 16(r3)
; CHECK-BE-NEXT: stxv vs0, 0(r3)
; CHECK-BE-NEXT: stxv vs7, 112(r3)
; CHECK-BE-NEXT: stxv vs6, 96(r3)
; CHECK-BE-NEXT: stxv vs5, 80(r3)
; CHECK-BE-NEXT: stxv vs4, 64(r3)
; CHECK-BE-NEXT: blr
entry:
%a = load <16 x i64>, <16 x i64>* %0, align 128
%1 = uitofp <16 x i64> %a to <16 x double>
store <16 x double> %1, <16 x double>* %agg.result, align 128
ret void
}
define <2 x double> @test2elt_signed(<2 x i64> %a) local_unnamed_addr #0 {
; CHECK-P8-LABEL: test2elt_signed:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: xvcvsxddp v2, v2
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test2elt_signed:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: xvcvsxddp v2, v2
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test2elt_signed:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: xvcvsxddp v2, v2
; CHECK-BE-NEXT: blr
entry:
%0 = sitofp <2 x i64> %a to <2 x double>
ret <2 x double> %0
}
define void @test4elt_signed(<4 x double>* noalias nocapture sret %agg.result, <4 x i64>* nocapture readonly) local_unnamed_addr #1 {
; CHECK-P8-LABEL: test4elt_signed:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: li r5, 16
; CHECK-P8-NEXT: lxvd2x vs1, 0, r4
; CHECK-P8-NEXT: lxvd2x vs0, r4, r5
; CHECK-P8-NEXT: xvcvsxddp vs1, vs1
; CHECK-P8-NEXT: xvcvsxddp vs0, vs0
; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
; CHECK-P8-NEXT: stxvd2x vs1, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test4elt_signed:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: lxv v2, 16(r4)
; CHECK-P9-NEXT: lxv v3, 0(r4)
; CHECK-P9-NEXT: xvcvsxddp vs0, v3
; CHECK-P9-NEXT: xvcvsxddp vs1, v2
; CHECK-P9-NEXT: stxv vs1, 16(r3)
; CHECK-P9-NEXT: stxv vs0, 0(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test4elt_signed:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: lxv v2, 16(r4)
; CHECK-BE-NEXT: lxv v3, 0(r4)
; CHECK-BE-NEXT: xvcvsxddp vs0, v3
; CHECK-BE-NEXT: xvcvsxddp vs1, v2
; CHECK-BE-NEXT: stxv vs1, 16(r3)
; CHECK-BE-NEXT: stxv vs0, 0(r3)
; CHECK-BE-NEXT: blr
entry:
%a = load <4 x i64>, <4 x i64>* %0, align 32
%1 = sitofp <4 x i64> %a to <4 x double>
store <4 x double> %1, <4 x double>* %agg.result, align 32
ret void
}
define void @test8elt_signed(<8 x double>* noalias nocapture sret %agg.result, <8 x i64>* nocapture readonly) local_unnamed_addr #1 {
; CHECK-P8-LABEL: test8elt_signed:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: li r5, 16
; CHECK-P8-NEXT: li r6, 32
; CHECK-P8-NEXT: li r7, 48
; CHECK-P8-NEXT: lxvd2x vs3, 0, r4
; CHECK-P8-NEXT: lxvd2x vs0, r4, r5
; CHECK-P8-NEXT: lxvd2x vs1, r4, r6
; CHECK-P8-NEXT: lxvd2x vs2, r4, r7
; CHECK-P8-NEXT: xvcvsxddp vs3, vs3
; CHECK-P8-NEXT: xvcvsxddp vs0, vs0
; CHECK-P8-NEXT: xvcvsxddp vs1, vs1
; CHECK-P8-NEXT: xvcvsxddp vs2, vs2
; CHECK-P8-NEXT: stxvd2x vs2, r3, r7
; CHECK-P8-NEXT: stxvd2x vs1, r3, r6
; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
; CHECK-P8-NEXT: stxvd2x vs3, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test8elt_signed:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: lxv v2, 48(r4)
; CHECK-P9-NEXT: lxv v3, 32(r4)
; CHECK-P9-NEXT: lxv v4, 16(r4)
; CHECK-P9-NEXT: lxv v5, 0(r4)
; CHECK-P9-NEXT: xvcvsxddp vs0, v5
; CHECK-P9-NEXT: xvcvsxddp vs1, v4
; CHECK-P9-NEXT: xvcvsxddp vs2, v3
; CHECK-P9-NEXT: xvcvsxddp vs3, v2
; CHECK-P9-NEXT: stxv vs3, 48(r3)
; CHECK-P9-NEXT: stxv vs2, 32(r3)
; CHECK-P9-NEXT: stxv vs1, 16(r3)
; CHECK-P9-NEXT: stxv vs0, 0(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test8elt_signed:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: lxv v2, 48(r4)
; CHECK-BE-NEXT: lxv v3, 32(r4)
; CHECK-BE-NEXT: lxv v4, 16(r4)
; CHECK-BE-NEXT: lxv v5, 0(r4)
; CHECK-BE-NEXT: xvcvsxddp vs0, v5
; CHECK-BE-NEXT: xvcvsxddp vs1, v4
; CHECK-BE-NEXT: xvcvsxddp vs2, v3
; CHECK-BE-NEXT: xvcvsxddp vs3, v2
; CHECK-BE-NEXT: stxv vs3, 48(r3)
; CHECK-BE-NEXT: stxv vs2, 32(r3)
; CHECK-BE-NEXT: stxv vs1, 16(r3)
; CHECK-BE-NEXT: stxv vs0, 0(r3)
; CHECK-BE-NEXT: blr
entry:
%a = load <8 x i64>, <8 x i64>* %0, align 64
%1 = sitofp <8 x i64> %a to <8 x double>
store <8 x double> %1, <8 x double>* %agg.result, align 64
ret void
}
define void @test16elt_signed(<16 x double>* noalias nocapture sret %agg.result, <16 x i64>* nocapture readonly) local_unnamed_addr #1 {
; CHECK-P8-LABEL: test16elt_signed:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: li r5, 16
; CHECK-P8-NEXT: li r6, 32
; CHECK-P8-NEXT: li r7, 64
; CHECK-P8-NEXT: li r8, 96
; CHECK-P8-NEXT: li r9, 112
; CHECK-P8-NEXT: li r10, 80
; CHECK-P8-NEXT: li r11, 48
; CHECK-P8-NEXT: lxvd2x vs0, r4, r5
; CHECK-P8-NEXT: lxvd2x vs1, r4, r6
; CHECK-P8-NEXT: lxvd2x vs2, r4, r7
; CHECK-P8-NEXT: lxvd2x vs3, r4, r8
; CHECK-P8-NEXT: lxvd2x vs4, r4, r9
; CHECK-P8-NEXT: lxvd2x vs5, r4, r10
; CHECK-P8-NEXT: lxvd2x vs6, r4, r11
; CHECK-P8-NEXT: lxvd2x vs7, 0, r4
; CHECK-P8-NEXT: xvcvsxddp vs0, vs0
; CHECK-P8-NEXT: xvcvsxddp vs1, vs1
; CHECK-P8-NEXT: xvcvsxddp vs2, vs2
; CHECK-P8-NEXT: xvcvsxddp vs3, vs3
; CHECK-P8-NEXT: xvcvsxddp vs4, vs4
; CHECK-P8-NEXT: xvcvsxddp vs5, vs5
; CHECK-P8-NEXT: xvcvsxddp vs6, vs6
; CHECK-P8-NEXT: xvcvsxddp vs7, vs7
; CHECK-P8-NEXT: stxvd2x vs4, r3, r9
; CHECK-P8-NEXT: stxvd2x vs3, r3, r8
; CHECK-P8-NEXT: stxvd2x vs5, r3, r10
; CHECK-P8-NEXT: stxvd2x vs2, r3, r7
; CHECK-P8-NEXT: stxvd2x vs6, r3, r11
; CHECK-P8-NEXT: stxvd2x vs1, r3, r6
; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
; CHECK-P8-NEXT: stxvd2x vs7, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test16elt_signed:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: lxv v2, 48(r4)
; CHECK-P9-NEXT: lxv v3, 32(r4)
; CHECK-P9-NEXT: lxv v4, 16(r4)
; CHECK-P9-NEXT: lxv v5, 0(r4)
; CHECK-P9-NEXT: lxv v0, 112(r4)
; CHECK-P9-NEXT: lxv v1, 96(r4)
; CHECK-P9-NEXT: lxv v6, 80(r4)
; CHECK-P9-NEXT: lxv v7, 64(r4)
; CHECK-P9-NEXT: xvcvsxddp vs0, v5
; CHECK-P9-NEXT: xvcvsxddp vs1, v4
; CHECK-P9-NEXT: xvcvsxddp vs2, v3
; CHECK-P9-NEXT: xvcvsxddp vs3, v2
; CHECK-P9-NEXT: xvcvsxddp vs4, v7
; CHECK-P9-NEXT: xvcvsxddp vs5, v6
; CHECK-P9-NEXT: xvcvsxddp vs6, v1
; CHECK-P9-NEXT: xvcvsxddp vs7, v0
; CHECK-P9-NEXT: stxv vs3, 48(r3)
; CHECK-P9-NEXT: stxv vs2, 32(r3)
; CHECK-P9-NEXT: stxv vs1, 16(r3)
; CHECK-P9-NEXT: stxv vs0, 0(r3)
; CHECK-P9-NEXT: stxv vs7, 112(r3)
; CHECK-P9-NEXT: stxv vs6, 96(r3)
; CHECK-P9-NEXT: stxv vs5, 80(r3)
; CHECK-P9-NEXT: stxv vs4, 64(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test16elt_signed:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: lxv v2, 48(r4)
; CHECK-BE-NEXT: lxv v3, 32(r4)
; CHECK-BE-NEXT: lxv v4, 16(r4)
; CHECK-BE-NEXT: lxv v5, 0(r4)
; CHECK-BE-NEXT: lxv v0, 112(r4)
; CHECK-BE-NEXT: lxv v1, 96(r4)
; CHECK-BE-NEXT: lxv v6, 80(r4)
; CHECK-BE-NEXT: lxv v7, 64(r4)
; CHECK-BE-NEXT: xvcvsxddp vs0, v5
; CHECK-BE-NEXT: xvcvsxddp vs1, v4
; CHECK-BE-NEXT: xvcvsxddp vs2, v3
; CHECK-BE-NEXT: xvcvsxddp vs3, v2
; CHECK-BE-NEXT: xvcvsxddp vs4, v7
; CHECK-BE-NEXT: xvcvsxddp vs5, v6
; CHECK-BE-NEXT: xvcvsxddp vs6, v1
; CHECK-BE-NEXT: xvcvsxddp vs7, v0
; CHECK-BE-NEXT: stxv vs3, 48(r3)
; CHECK-BE-NEXT: stxv vs2, 32(r3)
; CHECK-BE-NEXT: stxv vs1, 16(r3)
; CHECK-BE-NEXT: stxv vs0, 0(r3)
; CHECK-BE-NEXT: stxv vs7, 112(r3)
; CHECK-BE-NEXT: stxv vs6, 96(r3)
; CHECK-BE-NEXT: stxv vs5, 80(r3)
; CHECK-BE-NEXT: stxv vs4, 64(r3)
; CHECK-BE-NEXT: blr
entry:
%a = load <16 x i64>, <16 x i64>* %0, align 128
%1 = sitofp <16 x i64> %a to <16 x double>
store <16 x double> %1, <16 x double>* %agg.result, align 128
ret void
}