[GlobalISel][X86] G_LOAD/G_STORE vec256/512 support

Summary: mark G_LOAD/G_STORE vec256/512 legal for AVX/AVX512. Implement instruction selection.

Reviewers: zvi, guyblank

Reviewed By: zvi

Subscribers: rovka, llvm-commits, kristof.beyls

Differential Revision: https://reviews.llvm.org/D33268

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@303617 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Igor Breger 2017-05-23 08:23:51 +00:00
parent 5e9f2d0f5d
commit 2bcb4e0921
8 changed files with 568 additions and 40 deletions

View File

@ -302,6 +302,26 @@ unsigned X86InstructionSelector::getLoadStoreOp(LLT &Ty, const RegisterBank &RB,
: HasAVX512
? X86::VMOVUPSZ128mr_NOVLX
: HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr);
} else if (Ty.isVector() && Ty.getSizeInBits() == 256) {
if (Alignment >= 32)
return Isload ? (HasVLX ? X86::VMOVAPSZ256rm
: HasAVX512 ? X86::VMOVAPSZ256rm_NOVLX
: X86::VMOVAPSYrm)
: (HasVLX ? X86::VMOVAPSZ256mr
: HasAVX512 ? X86::VMOVAPSZ256mr_NOVLX
: X86::VMOVAPSYmr);
else
return Isload ? (HasVLX ? X86::VMOVUPSZ256rm
: HasAVX512 ? X86::VMOVUPSZ256rm_NOVLX
: X86::VMOVUPSYrm)
: (HasVLX ? X86::VMOVUPSZ256mr
: HasAVX512 ? X86::VMOVUPSZ256mr_NOVLX
: X86::VMOVUPSYmr);
} else if (Ty.isVector() && Ty.getSizeInBits() == 512) {
if (Alignment >= 64)
return Isload ? X86::VMOVAPSZrm : X86::VMOVAPSZmr;
else
return Isload ? X86::VMOVUPSZrm : X86::VMOVUPSZmr;
}
return Opc;
}

View File

@ -35,6 +35,7 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
setLegalizerInfoSSE1();
setLegalizerInfoSSE2();
setLegalizerInfoSSE41();
setLegalizerInfoAVX();
setLegalizerInfoAVX2();
setLegalizerInfoAVX512();
setLegalizerInfoAVX512DQ();
@ -209,6 +210,18 @@ void X86LegalizerInfo::setLegalizerInfoSSE41() {
setAction({G_MUL, v4s32}, Legal);
}
void X86LegalizerInfo::setLegalizerInfoAVX() {
if (!Subtarget.hasAVX())
return;
const LLT v8s32 = LLT::vector(8, 32);
const LLT v4s64 = LLT::vector(4, 64);
for (unsigned MemOp : {G_LOAD, G_STORE})
for (auto Ty : {v8s32, v4s64})
setAction({MemOp, Ty}, Legal);
}
void X86LegalizerInfo::setLegalizerInfoAVX2() {
if (!Subtarget.hasAVX2())
return;
@ -239,6 +252,10 @@ void X86LegalizerInfo::setLegalizerInfoAVX512() {
setAction({G_MUL, v16s32}, Legal);
for (unsigned MemOp : {G_LOAD, G_STORE})
for (auto Ty : {v16s32, v8s64})
setAction({MemOp, Ty}, Legal);
/************ VLX *******************/
if (!Subtarget.hasVLX())
return;

View File

@ -39,6 +39,7 @@ private:
void setLegalizerInfoSSE1();
void setLegalizerInfoSSE2();
void setLegalizerInfoSSE41();
void setLegalizerInfoAVX();
void setLegalizerInfoAVX2();
void setLegalizerInfoAVX512();
void setLegalizerInfoAVX512DQ();

View File

@ -1,39 +1,116 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=x86_64-linux-gnu -mcpu=skx -global-isel -verify-machineinstrs < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=SKX
; RUN: llc -mtriple=x86_64-linux-gnu -mcpu=skx -regbankselect-greedy -global-isel -verify-machineinstrs < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=SKX
; RUN: llc -mtriple=x86_64-linux-gnu -mcpu=skx -global-isel -verify-machineinstrs < %s -o - | FileCheck %s --check-prefix=SKX
; RUN: llc -mtriple=x86_64-linux-gnu -mcpu=skx -regbankselect-greedy -global-isel -verify-machineinstrs < %s -o - | FileCheck %s --check-prefix=SKX
define <4 x i32> @test_load_v4i32_noalign(<4 x i32> * %p1) {
; ALL-LABEL: test_load_v4i32_noalign:
; ALL: # BB#0:
; ALL-NEXT: vmovups (%rdi), %xmm0
; ALL-NEXT: retq
; SKX-LABEL: test_load_v4i32_noalign:
; SKX: # BB#0:
; SKX-NEXT: vmovups (%rdi), %xmm0
; SKX-NEXT: retq
%r = load <4 x i32>, <4 x i32>* %p1, align 1
ret <4 x i32> %r
}
define <4 x i32> @test_load_v4i32_align(<4 x i32> * %p1) {
; ALL-LABEL: test_load_v4i32_align:
; ALL: # BB#0:
; ALL-NEXT: vmovaps (%rdi), %xmm0
; ALL-NEXT: retq
; SKX-LABEL: test_load_v4i32_align:
; SKX: # BB#0:
; SKX-NEXT: vmovaps (%rdi), %xmm0
; SKX-NEXT: retq
%r = load <4 x i32>, <4 x i32>* %p1, align 16
ret <4 x i32> %r
}
define <8 x i32> @test_load_v8i32_noalign(<8 x i32> * %p1) {
; SKX-LABEL: test_load_v8i32_noalign:
; SKX: # BB#0:
; SKX-NEXT: vmovups (%rdi), %ymm0
; SKX-NEXT: retq
%r = load <8 x i32>, <8 x i32>* %p1, align 1
ret <8 x i32> %r
}
define <8 x i32> @test_load_v8i32_align(<8 x i32> * %p1) {
; SKX-LABEL: test_load_v8i32_align:
; SKX: # BB#0:
; SKX-NEXT: vmovaps (%rdi), %ymm0
; SKX-NEXT: retq
%r = load <8 x i32>, <8 x i32>* %p1, align 32
ret <8 x i32> %r
}
define <16 x i32> @test_load_v16i32_noalign(<16 x i32> * %p1) {
; SKX-LABEL: test_load_v16i32_noalign:
; SKX: # BB#0:
; SKX-NEXT: vmovups (%rdi), %zmm0
; SKX-NEXT: retq
%r = load <16 x i32>, <16 x i32>* %p1, align 1
ret <16 x i32> %r
}
define <16 x i32> @test_load_v16i32_align(<16 x i32> * %p1) {
; SKX-LABEL: test_load_v16i32_align:
; SKX: # BB#0:
; SKX-NEXT: vmovups (%rdi), %zmm0
; SKX-NEXT: retq
%r = load <16 x i32>, <16 x i32>* %p1, align 32
ret <16 x i32> %r
}
define void @test_store_v4i32_noalign(<4 x i32> %val, <4 x i32>* %p1) {
; ALL-LABEL: test_store_v4i32_noalign:
; ALL: # BB#0:
; ALL-NEXT: vmovups %xmm0, (%rdi)
; ALL-NEXT: retq
; SKX-LABEL: test_store_v4i32_noalign:
; SKX: # BB#0:
; SKX-NEXT: vmovups %xmm0, (%rdi)
; SKX-NEXT: retq
store <4 x i32> %val, <4 x i32>* %p1, align 1
ret void
}
define void @test_store_v4i32_align(<4 x i32> %val, <4 x i32>* %p1) {
; ALL-LABEL: test_store_v4i32_align:
; ALL: # BB#0:
; ALL-NEXT: vmovaps %xmm0, (%rdi)
; ALL-NEXT: retq
; SKX-LABEL: test_store_v4i32_align:
; SKX: # BB#0:
; SKX-NEXT: vmovaps %xmm0, (%rdi)
; SKX-NEXT: retq
store <4 x i32> %val, <4 x i32>* %p1, align 16
ret void
}
define void @test_store_v8i32_noalign(<8 x i32> %val, <8 x i32>* %p1) {
; SKX-LABEL: test_store_v8i32_noalign:
; SKX: # BB#0:
; SKX-NEXT: vmovups %ymm0, (%rdi)
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
store <8 x i32> %val, <8 x i32>* %p1, align 1
ret void
}
define void @test_store_v8i32_align(<8 x i32> %val, <8 x i32>* %p1) {
; SKX-LABEL: test_store_v8i32_align:
; SKX: # BB#0:
; SKX-NEXT: vmovaps %ymm0, (%rdi)
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
store <8 x i32> %val, <8 x i32>* %p1, align 32
ret void
}
define void @test_store_v16i32_noalign(<16 x i32> %val, <16 x i32>* %p1) {
; SKX-LABEL: test_store_v16i32_noalign:
; SKX: # BB#0:
; SKX-NEXT: vmovups %zmm0, (%rdi)
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
store <16 x i32> %val, <16 x i32>* %p1, align 1
ret void
}
define void @test_store_v16i32_align(<16 x i32> %val, <16 x i32>* %p1) {
; SKX-LABEL: test_store_v16i32_align:
; SKX: # BB#0:
; SKX-NEXT: vmovaps %zmm0, (%rdi)
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
store <16 x i32> %val, <16 x i32>* %p1, align 64
ret void
}

View File

@ -14,7 +14,16 @@
ret void
}
...
define <8 x i32> @test_load_v8i32_noalign(<8 x i32>* %p1) {
%r = load <8 x i32>, <8 x i32>* %p1, align 1
ret <8 x i32> %r
}
define void @test_store_v8i32_noalign(<8 x i32> %val, <8 x i32>* %p1) {
store <8 x i32> %val, <8 x i32>* %p1, align 1
ret void
}
---
name: test_mul_vec256
alignment: 4
@ -84,3 +93,47 @@ body: |
RET 0
...
---
name: test_load_v8i32_noalign
# CHECK-LABEL: name: test_load_v8i32_noalign
alignment: 4
legalized: true
regBankSelected: false
# CHECK: registers:
# CHECK-NEXT: - { id: 0, class: gpr }
# CHECK-NEXT: - { id: 1, class: vecr }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
body: |
bb.1 (%ir-block.0):
liveins: %rdi
%0(p0) = COPY %rdi
%1(<8 x s32>) = G_LOAD %0(p0) :: (load 32 from %ir.p1, align 1)
%ymm0 = COPY %1(<8 x s32>)
RET 0, implicit %ymm0
...
---
name: test_store_v8i32_noalign
# CHECK-LABEL: name: test_store_v8i32_noalign
alignment: 4
legalized: true
regBankSelected: false
# CHECK: registers:
# CHECK-NEXT: - { id: 0, class: vecr }
# CHECK-NEXT: - { id: 1, class: gpr }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
body: |
bb.1 (%ir-block.0):
liveins: %rdi, %ymm0
%0(<8 x s32>) = COPY %ymm0
%1(p0) = COPY %rdi
G_STORE %0(<8 x s32>), %1(p0) :: (store 32 into %ir.p1, align 1)
RET 0
...

View File

@ -15,22 +15,29 @@
ret void
}
define <16 x i32> @test_load_v16i32_noalign(<16 x i32>* %p1) {
%r = load <16 x i32>, <16 x i32>* %p1, align 1
ret <16 x i32> %r
}
define void @test_store_v16i32_noalign(<16 x i32> %val, <16 x i32>* %p1) {
store <16 x i32> %val, <16 x i32>* %p1, align 1
ret void
}
...
---
name: test_mul_vec512
# CHECK-LABEL: name: test_mul_vec512
alignment: 4
legalized: true
regBankSelected: false
selected: false
tracksRegLiveness: true
# CHECK-LABEL: name: test_mul_vec512
# CHECK: registers:
# CHECK: - { id: 0, class: vecr }
# CHECK: - { id: 1, class: vecr }
# CHECK: registers:
# CHECK-NEXT: - { id: 0, class: vecr }
# CHECK-NEXT: - { id: 1, class: vecr }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
- { id: 2, class: _ }
body: |
bb.1 (%ir-block.0):
@ -41,19 +48,16 @@ body: |
...
---
name: test_add_vec512
# CHECK-LABEL: name: test_add_vec512
alignment: 4
legalized: true
regBankSelected: false
selected: false
tracksRegLiveness: true
# CHECK-LABEL: name: test_add_vec512
# CHECK: registers:
# CHECK: - { id: 0, class: vecr }
# CHECK: - { id: 1, class: vecr }
# CHECK: registers:
# CHECK-NEXT: - { id: 0, class: vecr }
# CHECK-NEXT: - { id: 1, class: vecr }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
- { id: 2, class: _ }
body: |
bb.1 (%ir-block.0):
@ -64,24 +68,65 @@ body: |
...
---
name: test_sub_vec512
# CHECK-LABEL: name: test_sub_vec512
alignment: 4
legalized: true
regBankSelected: false
selected: false
tracksRegLiveness: true
# CHECK-LABEL: name: test_sub_vec512
# CHECK: registers:
# CHECK: - { id: 0, class: vecr }
# CHECK: - { id: 1, class: vecr }
# CHECK: registers:
# CHECK-NEXT: - { id: 0, class: vecr }
# CHECK-NEXT: - { id: 1, class: vecr }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
- { id: 2, class: _ }
body: |
bb.1 (%ir-block.0):
%0(<16 x s32>) = IMPLICIT_DEF
%1(<16 x s32>) = G_SUB %0, %0
RET 0
...
---
name: test_load_v16i32_noalign
# CHECK-LABEL: name: test_load_v16i32_noalign
alignment: 4
legalized: true
regBankSelected: false
# CHECK: registers:
# CHECK-NEXT: - { id: 0, class: gpr }
# CHECK-NEXT: - { id: 1, class: vecr }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
body: |
bb.1 (%ir-block.0):
liveins: %rdi
%0(p0) = COPY %rdi
%1(<16 x s32>) = G_LOAD %0(p0) :: (load 64 from %ir.p1, align 1)
%zmm0 = COPY %1(<16 x s32>)
RET 0, implicit %zmm0
...
---
name: test_store_v16i32_noalign
# CHECK-LABEL: name: test_store_v16i32_noalign
alignment: 4
legalized: true
regBankSelected: false
# CHECK: registers:
# CHECK-NEXT: - { id: 0, class: vecr }
# CHECK-NEXT: - { id: 1, class: gpr }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
body: |
bb.1 (%ir-block.0):
liveins: %rdi, %zmm0
%0(<16 x s32>) = COPY %zmm0
%1(p0) = COPY %rdi
G_STORE %0(<16 x s32>), %1(p0) :: (store 64 into %ir.p1, align 1)
RET 0
...

View File

@ -0,0 +1,188 @@
# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=NO_AVX512VL --check-prefix=NO_AVX512F --check-prefix=AVX
# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=NO_AVX512VL --check-prefix=AVX512ALL --check-prefix=AVX512F
# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f -mattr=+avx512vl -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX512ALL --check-prefix=AVX512VL
--- |
define <8 x i32> @test_load_v8i32_noalign(<8 x i32>* %p1) {
%r = load <8 x i32>, <8 x i32>* %p1, align 1
ret <8 x i32> %r
}
define <8 x i32> @test_load_v8i32_align(<8 x i32>* %p1) {
%r = load <8 x i32>, <8 x i32>* %p1, align 32
ret <8 x i32> %r
}
define void @test_store_v8i32_noalign(<8 x i32> %val, <8 x i32>* %p1) {
store <8 x i32> %val, <8 x i32>* %p1, align 1
ret void
}
define void @test_store_v8i32_align(<8 x i32> %val, <8 x i32>* %p1) {
store <8 x i32> %val, <8 x i32>* %p1, align 32
ret void
}
...
---
name: test_load_v8i32_noalign
# ALL-LABEL: name: test_load_v8i32_noalign
alignment: 4
legalized: true
regBankSelected: true
# NO_AVX512F: registers:
# NO_AVX512F-NEXT: - { id: 0, class: gr64 }
# NO_AVX512F-NEXT: - { id: 1, class: vr256 }
#
# AVX512ALL: registers:
# AVX512ALL-NEXT: - { id: 0, class: gr64 }
# AVX512ALL-NEXT: - { id: 1, class: vr256x }
registers:
- { id: 0, class: gpr }
- { id: 1, class: vecr }
# NO_AVX512F: %0 = COPY %rdi
# NO_AVX512F-NEXT: %1 = VMOVUPSYrm %0, 1, _, 0, _ :: (load 32 from %ir.p1, align 1)
# NO_AVX512F-NEXT: %ymm0 = COPY %1
# NO_AVX512F-NEXT: RET 0, implicit %ymm0
#
# AVX512F: %0 = COPY %rdi
# AVX512F-NEXT: %1 = VMOVUPSZ256rm_NOVLX %0, 1, _, 0, _ :: (load 32 from %ir.p1, align 1)
# AVX512F-NEXT: %ymm0 = COPY %1
# AVX512F-NEXT: RET 0, implicit %ymm0
#
# AVX512VL: %0 = COPY %rdi
# AVX512VL-NEXT: %1 = VMOVUPSZ256rm %0, 1, _, 0, _ :: (load 32 from %ir.p1, align 1)
# AVX512VL-NEXT: %ymm0 = COPY %1
# AVX512VL-NEXT: RET 0, implicit %ymm0
body: |
bb.1 (%ir-block.0):
liveins: %rdi
%0(p0) = COPY %rdi
%1(<8 x s32>) = G_LOAD %0(p0) :: (load 32 from %ir.p1, align 1)
%ymm0 = COPY %1(<8 x s32>)
RET 0, implicit %ymm0
...
---
name: test_load_v8i32_align
# ALL-LABEL: name: test_load_v8i32_align
alignment: 4
legalized: true
regBankSelected: true
# NO_AVX512F: registers:
# NO_AVX512F-NEXT: - { id: 0, class: gr64 }
# NO_AVX512F-NEXT: - { id: 1, class: vr256 }
#
# AVX512ALL: registers:
# AVX512ALL-NEXT: - { id: 0, class: gr64 }
# AVX512ALL-NEXT: - { id: 1, class: vr256x }
registers:
- { id: 0, class: gpr }
- { id: 1, class: vecr }
# NO_AVX512F: %0 = COPY %rdi
# NO_AVX512F-NEXT: %1 = VMOVAPSYrm %0, 1, _, 0, _ :: (load 32 from %ir.p1)
# NO_AVX512F-NEXT: %ymm0 = COPY %1
# NO_AVX512F-NEXT: RET 0, implicit %ymm0
#
# AVX512F: %0 = COPY %rdi
# AVX512F-NEXT: %1 = VMOVAPSZ256rm_NOVLX %0, 1, _, 0, _ :: (load 32 from %ir.p1)
# AVX512F-NEXT: %ymm0 = COPY %1
# AVX512F-NEXT: RET 0, implicit %ymm0
#
# AVX512VL: %0 = COPY %rdi
# AVX512VL-NEXT: %1 = VMOVAPSZ256rm %0, 1, _, 0, _ :: (load 32 from %ir.p1)
# AVX512VL-NEXT: %ymm0 = COPY %1
# AVX512VL-NEXT: RET 0, implicit %ymm0
body: |
bb.1 (%ir-block.0):
liveins: %rdi
%0(p0) = COPY %rdi
%1(<8 x s32>) = G_LOAD %0(p0) :: (load 32 from %ir.p1)
%ymm0 = COPY %1(<8 x s32>)
RET 0, implicit %ymm0
...
---
name: test_store_v8i32_noalign
# ALL-LABEL: name: test_store_v8i32_noalign
alignment: 4
legalized: true
regBankSelected: true
# NO_AVX512F: registers:
# NO_AVX512F-NEXT: - { id: 0, class: vr256 }
# NO_AVX512F-NEXT: - { id: 1, class: gr64 }
#
# AVX512ALL: registers:
# AVX512ALL-NEXT: - { id: 0, class: vr256x }
# AVX512ALL-NEXT: - { id: 1, class: gr64 }
registers:
- { id: 0, class: vecr }
- { id: 1, class: gpr }
# NO_AVX512F: %0 = COPY %ymm0
# NO_AVX512F-NEXT: %1 = COPY %rdi
# NO_AVX512F-NEXT: VMOVUPSYmr %1, 1, _, 0, _, %0 :: (store 32 into %ir.p1, align 1)
# NO_AVX512F-NEXT: RET 0
#
# AVX512F: %0 = COPY %ymm0
# AVX512F-NEXT: %1 = COPY %rdi
# AVX512F-NEXT: VMOVUPSZ256mr_NOVLX %1, 1, _, 0, _, %0 :: (store 32 into %ir.p1, align 1)
# AVX512F-NEXT: RET 0
#
# AVX512VL: %0 = COPY %ymm0
# AVX512VL-NEXT: %1 = COPY %rdi
# AVX512VL-NEXT: VMOVUPSZ256mr %1, 1, _, 0, _, %0 :: (store 32 into %ir.p1, align 1)
# AVX512VL-NEXT: RET 0
body: |
bb.1 (%ir-block.0):
liveins: %rdi, %ymm0
%0(<8 x s32>) = COPY %ymm0
%1(p0) = COPY %rdi
G_STORE %0(<8 x s32>), %1(p0) :: (store 32 into %ir.p1, align 1)
RET 0
...
---
name: test_store_v8i32_align
# ALL-LABEL: name: test_store_v8i32_align
alignment: 4
legalized: true
regBankSelected: true
# NO_AVX512F: registers:
# NO_AVX512F-NEXT: - { id: 0, class: vr256 }
# NO_AVX512F-NEXT: - { id: 1, class: gr64 }
#
# AVX512ALL: registers:
# AVX512ALL-NEXT: - { id: 0, class: vr256x }
# AVX512ALL-NEXT: - { id: 1, class: gr64 }
registers:
- { id: 0, class: vecr }
- { id: 1, class: gpr }
# NO_AVX512F: %0 = COPY %ymm0
# NO_AVX512F-NEXT: %1 = COPY %rdi
# NO_AVX512F-NEXT: VMOVAPSYmr %1, 1, _, 0, _, %0 :: (store 32 into %ir.p1)
# NO_AVX512F-NEXT: RET 0
#
# AVX512F: %0 = COPY %ymm0
# AVX512F-NEXT: %1 = COPY %rdi
# AVX512F-NEXT: VMOVAPSZ256mr_NOVLX %1, 1, _, 0, _, %0 :: (store 32 into %ir.p1)
# AVX512F-NEXT: RET 0
#
# AVX512VL: %0 = COPY %ymm0
# AVX512VL-NEXT: %1 = COPY %rdi
# AVX512VL-NEXT: VMOVAPSZ256mr %1, 1, _, 0, _, %0 :: (store 32 into %ir.p1)
# AVX512VL-NEXT: RET 0
body: |
bb.1 (%ir-block.0):
liveins: %rdi, %ymm0
%0(<8 x s32>) = COPY %ymm0
%1(p0) = COPY %rdi
G_STORE %0(<8 x s32>), %1(p0) :: (store 32 into %ir.p1)
RET 0
...

View File

@ -0,0 +1,127 @@
# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=AVX512F
--- |
define <16 x i32> @test_load_v16i32_noalign(<16 x i32>* %p1) {
%r = load <16 x i32>, <16 x i32>* %p1, align 1
ret <16 x i32> %r
}
define <16 x i32> @test_load_v16i32_align(<16 x i32>* %p1) {
%r = load <16 x i32>, <16 x i32>* %p1, align 32
ret <16 x i32> %r
}
define void @test_store_v16i32_noalign(<16 x i32> %val, <16 x i32>* %p1) {
store <16 x i32> %val, <16 x i32>* %p1, align 1
ret void
}
define void @test_store_v16i32_align(<16 x i32> %val, <16 x i32>* %p1) {
store <16 x i32> %val, <16 x i32>* %p1, align 32
ret void
}
...
---
name: test_load_v16i32_noalign
# AVX512F-LABEL: name: test_load_v16i32_noalign
alignment: 4
legalized: true
regBankSelected: true
# AVX512F: registers:
# AVX512F-NEXT: - { id: 0, class: gr64 }
# AVX512F-NEXT: - { id: 1, class: vr512 }
registers:
- { id: 0, class: gpr }
- { id: 1, class: vecr }
# AVX512F: %0 = COPY %rdi
# AVX512F-NEXT: %1 = VMOVUPSZrm %0, 1, _, 0, _ :: (load 64 from %ir.p1, align 1)
# AVX512F-NEXT: %zmm0 = COPY %1
# AVX512F-NEXT: RET 0, implicit %zmm0
body: |
bb.1 (%ir-block.0):
liveins: %rdi
%0(p0) = COPY %rdi
%1(<16 x s32>) = G_LOAD %0(p0) :: (load 64 from %ir.p1, align 1)
%zmm0 = COPY %1(<16 x s32>)
RET 0, implicit %zmm0
...
---
name: test_load_v16i32_align
# AVX512F-LABEL: name: test_load_v16i32_align
alignment: 4
legalized: true
regBankSelected: true
# AVX512F: registers:
# AVX512F-NEXT: - { id: 0, class: gr64 }
# AVX512F-NEXT: - { id: 1, class: vr512 }
registers:
- { id: 0, class: gpr }
- { id: 1, class: vecr }
# AVX512F: %0 = COPY %rdi
# AVX512F-NEXT: %1 = VMOVUPSZrm %0, 1, _, 0, _ :: (load 64 from %ir.p1, align 32)
# AVX512F-NEXT: %zmm0 = COPY %1
# AVX512F-NEXT: RET 0, implicit %zmm0
body: |
bb.1 (%ir-block.0):
liveins: %rdi
%0(p0) = COPY %rdi
%1(<16 x s32>) = G_LOAD %0(p0) :: (load 64 from %ir.p1, align 32)
%zmm0 = COPY %1(<16 x s32>)
RET 0, implicit %zmm0
...
---
name: test_store_v16i32_noalign
# AVX512F-LABEL: name: test_store_v16i32_noalign
alignment: 4
legalized: true
regBankSelected: true
# AVX512F: registers:
# AVX512F-NEXT: - { id: 0, class: vr512 }
# AVX512F-NEXT: - { id: 1, class: gr64 }
registers:
- { id: 0, class: vecr }
- { id: 1, class: gpr }
# AVX512F: %0 = COPY %zmm0
# AVX512F-NEXT: %1 = COPY %rdi
# AVX512F-NEXT: VMOVUPSZmr %1, 1, _, 0, _, %0 :: (store 64 into %ir.p1, align 1)
# AVX512F-NEXT: RET 0
body: |
bb.1 (%ir-block.0):
liveins: %rdi, %zmm0
%0(<16 x s32>) = COPY %zmm0
%1(p0) = COPY %rdi
G_STORE %0(<16 x s32>), %1(p0) :: (store 64 into %ir.p1, align 1)
RET 0
...
---
name: test_store_v16i32_align
# AVX512F-LABEL: name: test_store_v16i32_align
alignment: 4
legalized: true
regBankSelected: true
# AVX512F: registers:
# AVX512F-NEXT: - { id: 0, class: vr512 }
# AVX512F-NEXT: - { id: 1, class: gr64 }
registers:
- { id: 0, class: vecr }
- { id: 1, class: gpr }
# AVX512F: %0 = COPY %zmm0
# AVX512F-NEXT: %1 = COPY %rdi
# AVX512F-NEXT: VMOVUPSZmr %1, 1, _, 0, _, %0 :: (store 64 into %ir.p1, align 32)
# AVX512F-NEXT: RET 0
body: |
bb.1 (%ir-block.0):
liveins: %rdi, %zmm0
%0(<16 x s32>) = COPY %zmm0
%1(p0) = COPY %rdi
G_STORE %0(<16 x s32>), %1(p0) :: (store 64 into %ir.p1, align 32)
RET 0
...