diff --git a/lib/Target/X86/X86InstructionSelector.cpp b/lib/Target/X86/X86InstructionSelector.cpp index 61956f74182..77dead8d241 100644 --- a/lib/Target/X86/X86InstructionSelector.cpp +++ b/lib/Target/X86/X86InstructionSelector.cpp @@ -302,6 +302,26 @@ unsigned X86InstructionSelector::getLoadStoreOp(LLT &Ty, const RegisterBank &RB, : HasAVX512 ? X86::VMOVUPSZ128mr_NOVLX : HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr); + } else if (Ty.isVector() && Ty.getSizeInBits() == 256) { + if (Alignment >= 32) + return Isload ? (HasVLX ? X86::VMOVAPSZ256rm + : HasAVX512 ? X86::VMOVAPSZ256rm_NOVLX + : X86::VMOVAPSYrm) + : (HasVLX ? X86::VMOVAPSZ256mr + : HasAVX512 ? X86::VMOVAPSZ256mr_NOVLX + : X86::VMOVAPSYmr); + else + return Isload ? (HasVLX ? X86::VMOVUPSZ256rm + : HasAVX512 ? X86::VMOVUPSZ256rm_NOVLX + : X86::VMOVUPSYrm) + : (HasVLX ? X86::VMOVUPSZ256mr + : HasAVX512 ? X86::VMOVUPSZ256mr_NOVLX + : X86::VMOVUPSYmr); + } else if (Ty.isVector() && Ty.getSizeInBits() == 512) { + if (Alignment >= 64) + return Isload ? X86::VMOVAPSZrm : X86::VMOVAPSZmr; + else + return Isload ? X86::VMOVUPSZrm : X86::VMOVUPSZmr; } return Opc; } diff --git a/lib/Target/X86/X86LegalizerInfo.cpp b/lib/Target/X86/X86LegalizerInfo.cpp index da724f5d898..979aaee110a 100644 --- a/lib/Target/X86/X86LegalizerInfo.cpp +++ b/lib/Target/X86/X86LegalizerInfo.cpp @@ -35,6 +35,7 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI, setLegalizerInfoSSE1(); setLegalizerInfoSSE2(); setLegalizerInfoSSE41(); + setLegalizerInfoAVX(); setLegalizerInfoAVX2(); setLegalizerInfoAVX512(); setLegalizerInfoAVX512DQ(); @@ -209,6 +210,18 @@ void X86LegalizerInfo::setLegalizerInfoSSE41() { setAction({G_MUL, v4s32}, Legal); } +void X86LegalizerInfo::setLegalizerInfoAVX() { + if (!Subtarget.hasAVX()) + return; + + const LLT v8s32 = LLT::vector(8, 32); + const LLT v4s64 = LLT::vector(4, 64); + + for (unsigned MemOp : {G_LOAD, G_STORE}) + for (auto Ty : {v8s32, v4s64}) + setAction({MemOp, Ty}, Legal); +} + void X86LegalizerInfo::setLegalizerInfoAVX2() { if (!Subtarget.hasAVX2()) return; @@ -239,6 +252,10 @@ void X86LegalizerInfo::setLegalizerInfoAVX512() { setAction({G_MUL, v16s32}, Legal); + for (unsigned MemOp : {G_LOAD, G_STORE}) + for (auto Ty : {v16s32, v8s64}) + setAction({MemOp, Ty}, Legal); + /************ VLX *******************/ if (!Subtarget.hasVLX()) return; diff --git a/lib/Target/X86/X86LegalizerInfo.h b/lib/Target/X86/X86LegalizerInfo.h index ab5405a7042..135950a95f8 100644 --- a/lib/Target/X86/X86LegalizerInfo.h +++ b/lib/Target/X86/X86LegalizerInfo.h @@ -39,6 +39,7 @@ private: void setLegalizerInfoSSE1(); void setLegalizerInfoSSE2(); void setLegalizerInfoSSE41(); + void setLegalizerInfoAVX(); void setLegalizerInfoAVX2(); void setLegalizerInfoAVX512(); void setLegalizerInfoAVX512DQ(); diff --git a/test/CodeGen/X86/GlobalISel/memop-vec.ll b/test/CodeGen/X86/GlobalISel/memop-vec.ll index f1ffc15f4d0..870e812bbb6 100644 --- a/test/CodeGen/X86/GlobalISel/memop-vec.ll +++ b/test/CodeGen/X86/GlobalISel/memop-vec.ll @@ -1,39 +1,116 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=x86_64-linux-gnu -mcpu=skx -global-isel -verify-machineinstrs < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=SKX -; RUN: llc -mtriple=x86_64-linux-gnu -mcpu=skx -regbankselect-greedy -global-isel -verify-machineinstrs < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=SKX +; RUN: llc -mtriple=x86_64-linux-gnu -mcpu=skx -global-isel -verify-machineinstrs < %s -o - | FileCheck %s --check-prefix=SKX +; RUN: llc -mtriple=x86_64-linux-gnu -mcpu=skx -regbankselect-greedy -global-isel -verify-machineinstrs < %s -o - | FileCheck %s --check-prefix=SKX define <4 x i32> @test_load_v4i32_noalign(<4 x i32> * %p1) { -; ALL-LABEL: test_load_v4i32_noalign: -; ALL: # BB#0: -; ALL-NEXT: vmovups (%rdi), %xmm0 -; ALL-NEXT: retq +; SKX-LABEL: test_load_v4i32_noalign: +; SKX: # BB#0: +; SKX-NEXT: vmovups (%rdi), %xmm0 +; SKX-NEXT: retq %r = load <4 x i32>, <4 x i32>* %p1, align 1 ret <4 x i32> %r } define <4 x i32> @test_load_v4i32_align(<4 x i32> * %p1) { -; ALL-LABEL: test_load_v4i32_align: -; ALL: # BB#0: -; ALL-NEXT: vmovaps (%rdi), %xmm0 -; ALL-NEXT: retq +; SKX-LABEL: test_load_v4i32_align: +; SKX: # BB#0: +; SKX-NEXT: vmovaps (%rdi), %xmm0 +; SKX-NEXT: retq %r = load <4 x i32>, <4 x i32>* %p1, align 16 ret <4 x i32> %r } +define <8 x i32> @test_load_v8i32_noalign(<8 x i32> * %p1) { +; SKX-LABEL: test_load_v8i32_noalign: +; SKX: # BB#0: +; SKX-NEXT: vmovups (%rdi), %ymm0 +; SKX-NEXT: retq + %r = load <8 x i32>, <8 x i32>* %p1, align 1 + ret <8 x i32> %r +} + +define <8 x i32> @test_load_v8i32_align(<8 x i32> * %p1) { +; SKX-LABEL: test_load_v8i32_align: +; SKX: # BB#0: +; SKX-NEXT: vmovaps (%rdi), %ymm0 +; SKX-NEXT: retq + %r = load <8 x i32>, <8 x i32>* %p1, align 32 + ret <8 x i32> %r +} + +define <16 x i32> @test_load_v16i32_noalign(<16 x i32> * %p1) { +; SKX-LABEL: test_load_v16i32_noalign: +; SKX: # BB#0: +; SKX-NEXT: vmovups (%rdi), %zmm0 +; SKX-NEXT: retq + %r = load <16 x i32>, <16 x i32>* %p1, align 1 + ret <16 x i32> %r +} + +define <16 x i32> @test_load_v16i32_align(<16 x i32> * %p1) { +; SKX-LABEL: test_load_v16i32_align: +; SKX: # BB#0: +; SKX-NEXT: vmovups (%rdi), %zmm0 +; SKX-NEXT: retq + %r = load <16 x i32>, <16 x i32>* %p1, align 32 + ret <16 x i32> %r +} + define void @test_store_v4i32_noalign(<4 x i32> %val, <4 x i32>* %p1) { -; ALL-LABEL: test_store_v4i32_noalign: -; ALL: # BB#0: -; ALL-NEXT: vmovups %xmm0, (%rdi) -; ALL-NEXT: retq +; SKX-LABEL: test_store_v4i32_noalign: +; SKX: # BB#0: +; SKX-NEXT: vmovups %xmm0, (%rdi) +; SKX-NEXT: retq store <4 x i32> %val, <4 x i32>* %p1, align 1 ret void } define void @test_store_v4i32_align(<4 x i32> %val, <4 x i32>* %p1) { -; ALL-LABEL: test_store_v4i32_align: -; ALL: # BB#0: -; ALL-NEXT: vmovaps %xmm0, (%rdi) -; ALL-NEXT: retq +; SKX-LABEL: test_store_v4i32_align: +; SKX: # BB#0: +; SKX-NEXT: vmovaps %xmm0, (%rdi) +; SKX-NEXT: retq store <4 x i32> %val, <4 x i32>* %p1, align 16 ret void } + +define void @test_store_v8i32_noalign(<8 x i32> %val, <8 x i32>* %p1) { +; SKX-LABEL: test_store_v8i32_noalign: +; SKX: # BB#0: +; SKX-NEXT: vmovups %ymm0, (%rdi) +; SKX-NEXT: vzeroupper +; SKX-NEXT: retq + store <8 x i32> %val, <8 x i32>* %p1, align 1 + ret void +} + +define void @test_store_v8i32_align(<8 x i32> %val, <8 x i32>* %p1) { +; SKX-LABEL: test_store_v8i32_align: +; SKX: # BB#0: +; SKX-NEXT: vmovaps %ymm0, (%rdi) +; SKX-NEXT: vzeroupper +; SKX-NEXT: retq + store <8 x i32> %val, <8 x i32>* %p1, align 32 + ret void +} + +define void @test_store_v16i32_noalign(<16 x i32> %val, <16 x i32>* %p1) { +; SKX-LABEL: test_store_v16i32_noalign: +; SKX: # BB#0: +; SKX-NEXT: vmovups %zmm0, (%rdi) +; SKX-NEXT: vzeroupper +; SKX-NEXT: retq + store <16 x i32> %val, <16 x i32>* %p1, align 1 + ret void +} + +define void @test_store_v16i32_align(<16 x i32> %val, <16 x i32>* %p1) { +; SKX-LABEL: test_store_v16i32_align: +; SKX: # BB#0: +; SKX-NEXT: vmovaps %zmm0, (%rdi) +; SKX-NEXT: vzeroupper +; SKX-NEXT: retq + store <16 x i32> %val, <16 x i32>* %p1, align 64 + ret void +} + diff --git a/test/CodeGen/X86/GlobalISel/regbankselect-AVX2.mir b/test/CodeGen/X86/GlobalISel/regbankselect-AVX2.mir index f925c836f3d..cc03f3a57f0 100644 --- a/test/CodeGen/X86/GlobalISel/regbankselect-AVX2.mir +++ b/test/CodeGen/X86/GlobalISel/regbankselect-AVX2.mir @@ -14,7 +14,16 @@ ret void } -... + define <8 x i32> @test_load_v8i32_noalign(<8 x i32>* %p1) { + %r = load <8 x i32>, <8 x i32>* %p1, align 1 + ret <8 x i32> %r + } + + define void @test_store_v8i32_noalign(<8 x i32> %val, <8 x i32>* %p1) { + store <8 x i32> %val, <8 x i32>* %p1, align 1 + ret void + } + --- name: test_mul_vec256 alignment: 4 @@ -84,3 +93,47 @@ body: | RET 0 ... +--- +name: test_load_v8i32_noalign +# CHECK-LABEL: name: test_load_v8i32_noalign +alignment: 4 +legalized: true +regBankSelected: false +# CHECK: registers: +# CHECK-NEXT: - { id: 0, class: gpr } +# CHECK-NEXT: - { id: 1, class: vecr } +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } +body: | + bb.1 (%ir-block.0): + liveins: %rdi + + %0(p0) = COPY %rdi + %1(<8 x s32>) = G_LOAD %0(p0) :: (load 32 from %ir.p1, align 1) + %ymm0 = COPY %1(<8 x s32>) + RET 0, implicit %ymm0 + +... +--- +name: test_store_v8i32_noalign +# CHECK-LABEL: name: test_store_v8i32_noalign +alignment: 4 +legalized: true +regBankSelected: false +# CHECK: registers: +# CHECK-NEXT: - { id: 0, class: vecr } +# CHECK-NEXT: - { id: 1, class: gpr } +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } +body: | + bb.1 (%ir-block.0): + liveins: %rdi, %ymm0 + + %0(<8 x s32>) = COPY %ymm0 + %1(p0) = COPY %rdi + G_STORE %0(<8 x s32>), %1(p0) :: (store 32 into %ir.p1, align 1) + RET 0 + +... diff --git a/test/CodeGen/X86/GlobalISel/regbankselect-AVX512.mir b/test/CodeGen/X86/GlobalISel/regbankselect-AVX512.mir index e0c12ff44a2..278413ad38e 100644 --- a/test/CodeGen/X86/GlobalISel/regbankselect-AVX512.mir +++ b/test/CodeGen/X86/GlobalISel/regbankselect-AVX512.mir @@ -15,22 +15,29 @@ ret void } + define <16 x i32> @test_load_v16i32_noalign(<16 x i32>* %p1) { + %r = load <16 x i32>, <16 x i32>* %p1, align 1 + ret <16 x i32> %r + } + + define void @test_store_v16i32_noalign(<16 x i32> %val, <16 x i32>* %p1) { + store <16 x i32> %val, <16 x i32>* %p1, align 1 + ret void + } + ... --- name: test_mul_vec512 +# CHECK-LABEL: name: test_mul_vec512 alignment: 4 legalized: true regBankSelected: false -selected: false -tracksRegLiveness: true -# CHECK-LABEL: name: test_mul_vec512 -# CHECK: registers: -# CHECK: - { id: 0, class: vecr } -# CHECK: - { id: 1, class: vecr } +# CHECK: registers: +# CHECK-NEXT: - { id: 0, class: vecr } +# CHECK-NEXT: - { id: 1, class: vecr } registers: - { id: 0, class: _ } - { id: 1, class: _ } - - { id: 2, class: _ } body: | bb.1 (%ir-block.0): @@ -41,19 +48,16 @@ body: | ... --- name: test_add_vec512 +# CHECK-LABEL: name: test_add_vec512 alignment: 4 legalized: true regBankSelected: false -selected: false -tracksRegLiveness: true -# CHECK-LABEL: name: test_add_vec512 -# CHECK: registers: -# CHECK: - { id: 0, class: vecr } -# CHECK: - { id: 1, class: vecr } +# CHECK: registers: +# CHECK-NEXT: - { id: 0, class: vecr } +# CHECK-NEXT: - { id: 1, class: vecr } registers: - { id: 0, class: _ } - { id: 1, class: _ } - - { id: 2, class: _ } body: | bb.1 (%ir-block.0): @@ -64,24 +68,65 @@ body: | ... --- name: test_sub_vec512 +# CHECK-LABEL: name: test_sub_vec512 alignment: 4 legalized: true regBankSelected: false -selected: false -tracksRegLiveness: true -# CHECK-LABEL: name: test_sub_vec512 -# CHECK: registers: -# CHECK: - { id: 0, class: vecr } -# CHECK: - { id: 1, class: vecr } +# CHECK: registers: +# CHECK-NEXT: - { id: 0, class: vecr } +# CHECK-NEXT: - { id: 1, class: vecr } registers: - { id: 0, class: _ } - { id: 1, class: _ } - - { id: 2, class: _ } body: | bb.1 (%ir-block.0): %0(<16 x s32>) = IMPLICIT_DEF %1(<16 x s32>) = G_SUB %0, %0 RET 0 +... +--- + +name: test_load_v16i32_noalign +# CHECK-LABEL: name: test_load_v16i32_noalign +alignment: 4 +legalized: true +regBankSelected: false +# CHECK: registers: +# CHECK-NEXT: - { id: 0, class: gpr } +# CHECK-NEXT: - { id: 1, class: vecr } +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } +body: | + bb.1 (%ir-block.0): + liveins: %rdi + + %0(p0) = COPY %rdi + %1(<16 x s32>) = G_LOAD %0(p0) :: (load 64 from %ir.p1, align 1) + %zmm0 = COPY %1(<16 x s32>) + RET 0, implicit %zmm0 + +... +--- +name: test_store_v16i32_noalign +# CHECK-LABEL: name: test_store_v16i32_noalign +alignment: 4 +legalized: true +regBankSelected: false +# CHECK: registers: +# CHECK-NEXT: - { id: 0, class: vecr } +# CHECK-NEXT: - { id: 1, class: gpr } +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } +body: | + bb.1 (%ir-block.0): + liveins: %rdi, %zmm0 + + %0(<16 x s32>) = COPY %zmm0 + %1(p0) = COPY %rdi + G_STORE %0(<16 x s32>), %1(p0) :: (store 64 into %ir.p1, align 1) + RET 0 ... diff --git a/test/CodeGen/X86/GlobalISel/select-memop-v256.mir b/test/CodeGen/X86/GlobalISel/select-memop-v256.mir new file mode 100644 index 00000000000..b9a7e4a8cc4 --- /dev/null +++ b/test/CodeGen/X86/GlobalISel/select-memop-v256.mir @@ -0,0 +1,188 @@ +# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=NO_AVX512VL --check-prefix=NO_AVX512F --check-prefix=AVX +# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=NO_AVX512VL --check-prefix=AVX512ALL --check-prefix=AVX512F +# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f -mattr=+avx512vl -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX512ALL --check-prefix=AVX512VL + + +--- | + define <8 x i32> @test_load_v8i32_noalign(<8 x i32>* %p1) { + %r = load <8 x i32>, <8 x i32>* %p1, align 1 + ret <8 x i32> %r + } + + define <8 x i32> @test_load_v8i32_align(<8 x i32>* %p1) { + %r = load <8 x i32>, <8 x i32>* %p1, align 32 + ret <8 x i32> %r + } + + define void @test_store_v8i32_noalign(<8 x i32> %val, <8 x i32>* %p1) { + store <8 x i32> %val, <8 x i32>* %p1, align 1 + ret void + } + + define void @test_store_v8i32_align(<8 x i32> %val, <8 x i32>* %p1) { + store <8 x i32> %val, <8 x i32>* %p1, align 32 + ret void + } + + +... +--- +name: test_load_v8i32_noalign +# ALL-LABEL: name: test_load_v8i32_noalign +alignment: 4 +legalized: true +regBankSelected: true +# NO_AVX512F: registers: +# NO_AVX512F-NEXT: - { id: 0, class: gr64 } +# NO_AVX512F-NEXT: - { id: 1, class: vr256 } +# +# AVX512ALL: registers: +# AVX512ALL-NEXT: - { id: 0, class: gr64 } +# AVX512ALL-NEXT: - { id: 1, class: vr256x } +registers: + - { id: 0, class: gpr } + - { id: 1, class: vecr } +# NO_AVX512F: %0 = COPY %rdi +# NO_AVX512F-NEXT: %1 = VMOVUPSYrm %0, 1, _, 0, _ :: (load 32 from %ir.p1, align 1) +# NO_AVX512F-NEXT: %ymm0 = COPY %1 +# NO_AVX512F-NEXT: RET 0, implicit %ymm0 +# +# AVX512F: %0 = COPY %rdi +# AVX512F-NEXT: %1 = VMOVUPSZ256rm_NOVLX %0, 1, _, 0, _ :: (load 32 from %ir.p1, align 1) +# AVX512F-NEXT: %ymm0 = COPY %1 +# AVX512F-NEXT: RET 0, implicit %ymm0 +# +# AVX512VL: %0 = COPY %rdi +# AVX512VL-NEXT: %1 = VMOVUPSZ256rm %0, 1, _, 0, _ :: (load 32 from %ir.p1, align 1) +# AVX512VL-NEXT: %ymm0 = COPY %1 +# AVX512VL-NEXT: RET 0, implicit %ymm0 +body: | + bb.1 (%ir-block.0): + liveins: %rdi + + %0(p0) = COPY %rdi + %1(<8 x s32>) = G_LOAD %0(p0) :: (load 32 from %ir.p1, align 1) + %ymm0 = COPY %1(<8 x s32>) + RET 0, implicit %ymm0 + +... +--- +name: test_load_v8i32_align +# ALL-LABEL: name: test_load_v8i32_align +alignment: 4 +legalized: true +regBankSelected: true +# NO_AVX512F: registers: +# NO_AVX512F-NEXT: - { id: 0, class: gr64 } +# NO_AVX512F-NEXT: - { id: 1, class: vr256 } +# +# AVX512ALL: registers: +# AVX512ALL-NEXT: - { id: 0, class: gr64 } +# AVX512ALL-NEXT: - { id: 1, class: vr256x } +registers: + - { id: 0, class: gpr } + - { id: 1, class: vecr } +# NO_AVX512F: %0 = COPY %rdi +# NO_AVX512F-NEXT: %1 = VMOVAPSYrm %0, 1, _, 0, _ :: (load 32 from %ir.p1) +# NO_AVX512F-NEXT: %ymm0 = COPY %1 +# NO_AVX512F-NEXT: RET 0, implicit %ymm0 +# +# AVX512F: %0 = COPY %rdi +# AVX512F-NEXT: %1 = VMOVAPSZ256rm_NOVLX %0, 1, _, 0, _ :: (load 32 from %ir.p1) +# AVX512F-NEXT: %ymm0 = COPY %1 +# AVX512F-NEXT: RET 0, implicit %ymm0 +# +# AVX512VL: %0 = COPY %rdi +# AVX512VL-NEXT: %1 = VMOVAPSZ256rm %0, 1, _, 0, _ :: (load 32 from %ir.p1) +# AVX512VL-NEXT: %ymm0 = COPY %1 +# AVX512VL-NEXT: RET 0, implicit %ymm0 +body: | + bb.1 (%ir-block.0): + liveins: %rdi + + %0(p0) = COPY %rdi + %1(<8 x s32>) = G_LOAD %0(p0) :: (load 32 from %ir.p1) + %ymm0 = COPY %1(<8 x s32>) + RET 0, implicit %ymm0 + +... +--- +name: test_store_v8i32_noalign +# ALL-LABEL: name: test_store_v8i32_noalign +alignment: 4 +legalized: true +regBankSelected: true +# NO_AVX512F: registers: +# NO_AVX512F-NEXT: - { id: 0, class: vr256 } +# NO_AVX512F-NEXT: - { id: 1, class: gr64 } +# +# AVX512ALL: registers: +# AVX512ALL-NEXT: - { id: 0, class: vr256x } +# AVX512ALL-NEXT: - { id: 1, class: gr64 } +registers: + - { id: 0, class: vecr } + - { id: 1, class: gpr } +# NO_AVX512F: %0 = COPY %ymm0 +# NO_AVX512F-NEXT: %1 = COPY %rdi +# NO_AVX512F-NEXT: VMOVUPSYmr %1, 1, _, 0, _, %0 :: (store 32 into %ir.p1, align 1) +# NO_AVX512F-NEXT: RET 0 +# +# AVX512F: %0 = COPY %ymm0 +# AVX512F-NEXT: %1 = COPY %rdi +# AVX512F-NEXT: VMOVUPSZ256mr_NOVLX %1, 1, _, 0, _, %0 :: (store 32 into %ir.p1, align 1) +# AVX512F-NEXT: RET 0 +# +# AVX512VL: %0 = COPY %ymm0 +# AVX512VL-NEXT: %1 = COPY %rdi +# AVX512VL-NEXT: VMOVUPSZ256mr %1, 1, _, 0, _, %0 :: (store 32 into %ir.p1, align 1) +# AVX512VL-NEXT: RET 0 +body: | + bb.1 (%ir-block.0): + liveins: %rdi, %ymm0 + + %0(<8 x s32>) = COPY %ymm0 + %1(p0) = COPY %rdi + G_STORE %0(<8 x s32>), %1(p0) :: (store 32 into %ir.p1, align 1) + RET 0 + +... +--- +name: test_store_v8i32_align +# ALL-LABEL: name: test_store_v8i32_align +alignment: 4 +legalized: true +regBankSelected: true +# NO_AVX512F: registers: +# NO_AVX512F-NEXT: - { id: 0, class: vr256 } +# NO_AVX512F-NEXT: - { id: 1, class: gr64 } +# +# AVX512ALL: registers: +# AVX512ALL-NEXT: - { id: 0, class: vr256x } +# AVX512ALL-NEXT: - { id: 1, class: gr64 } +registers: + - { id: 0, class: vecr } + - { id: 1, class: gpr } +# NO_AVX512F: %0 = COPY %ymm0 +# NO_AVX512F-NEXT: %1 = COPY %rdi +# NO_AVX512F-NEXT: VMOVAPSYmr %1, 1, _, 0, _, %0 :: (store 32 into %ir.p1) +# NO_AVX512F-NEXT: RET 0 +# +# AVX512F: %0 = COPY %ymm0 +# AVX512F-NEXT: %1 = COPY %rdi +# AVX512F-NEXT: VMOVAPSZ256mr_NOVLX %1, 1, _, 0, _, %0 :: (store 32 into %ir.p1) +# AVX512F-NEXT: RET 0 +# +# AVX512VL: %0 = COPY %ymm0 +# AVX512VL-NEXT: %1 = COPY %rdi +# AVX512VL-NEXT: VMOVAPSZ256mr %1, 1, _, 0, _, %0 :: (store 32 into %ir.p1) +# AVX512VL-NEXT: RET 0 +body: | + bb.1 (%ir-block.0): + liveins: %rdi, %ymm0 + + %0(<8 x s32>) = COPY %ymm0 + %1(p0) = COPY %rdi + G_STORE %0(<8 x s32>), %1(p0) :: (store 32 into %ir.p1) + RET 0 + +... diff --git a/test/CodeGen/X86/GlobalISel/select-memop-v512.mir b/test/CodeGen/X86/GlobalISel/select-memop-v512.mir new file mode 100644 index 00000000000..87978a684d4 --- /dev/null +++ b/test/CodeGen/X86/GlobalISel/select-memop-v512.mir @@ -0,0 +1,127 @@ +# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=AVX512F +--- | + define <16 x i32> @test_load_v16i32_noalign(<16 x i32>* %p1) { + %r = load <16 x i32>, <16 x i32>* %p1, align 1 + ret <16 x i32> %r + } + + define <16 x i32> @test_load_v16i32_align(<16 x i32>* %p1) { + %r = load <16 x i32>, <16 x i32>* %p1, align 32 + ret <16 x i32> %r + } + + define void @test_store_v16i32_noalign(<16 x i32> %val, <16 x i32>* %p1) { + store <16 x i32> %val, <16 x i32>* %p1, align 1 + ret void + } + + define void @test_store_v16i32_align(<16 x i32> %val, <16 x i32>* %p1) { + store <16 x i32> %val, <16 x i32>* %p1, align 32 + ret void + } + +... +--- +name: test_load_v16i32_noalign +# AVX512F-LABEL: name: test_load_v16i32_noalign +alignment: 4 +legalized: true +regBankSelected: true +# AVX512F: registers: +# AVX512F-NEXT: - { id: 0, class: gr64 } +# AVX512F-NEXT: - { id: 1, class: vr512 } +registers: + - { id: 0, class: gpr } + - { id: 1, class: vecr } +# AVX512F: %0 = COPY %rdi +# AVX512F-NEXT: %1 = VMOVUPSZrm %0, 1, _, 0, _ :: (load 64 from %ir.p1, align 1) +# AVX512F-NEXT: %zmm0 = COPY %1 +# AVX512F-NEXT: RET 0, implicit %zmm0 +body: | + bb.1 (%ir-block.0): + liveins: %rdi + + %0(p0) = COPY %rdi + %1(<16 x s32>) = G_LOAD %0(p0) :: (load 64 from %ir.p1, align 1) + %zmm0 = COPY %1(<16 x s32>) + RET 0, implicit %zmm0 + +... +--- +name: test_load_v16i32_align +# AVX512F-LABEL: name: test_load_v16i32_align +alignment: 4 +legalized: true +regBankSelected: true +# AVX512F: registers: +# AVX512F-NEXT: - { id: 0, class: gr64 } +# AVX512F-NEXT: - { id: 1, class: vr512 } +registers: + - { id: 0, class: gpr } + - { id: 1, class: vecr } +# AVX512F: %0 = COPY %rdi +# AVX512F-NEXT: %1 = VMOVUPSZrm %0, 1, _, 0, _ :: (load 64 from %ir.p1, align 32) +# AVX512F-NEXT: %zmm0 = COPY %1 +# AVX512F-NEXT: RET 0, implicit %zmm0 +body: | + bb.1 (%ir-block.0): + liveins: %rdi + + %0(p0) = COPY %rdi + %1(<16 x s32>) = G_LOAD %0(p0) :: (load 64 from %ir.p1, align 32) + %zmm0 = COPY %1(<16 x s32>) + RET 0, implicit %zmm0 + +... +--- +name: test_store_v16i32_noalign +# AVX512F-LABEL: name: test_store_v16i32_noalign +alignment: 4 +legalized: true +regBankSelected: true +# AVX512F: registers: +# AVX512F-NEXT: - { id: 0, class: vr512 } +# AVX512F-NEXT: - { id: 1, class: gr64 } +registers: + - { id: 0, class: vecr } + - { id: 1, class: gpr } +# AVX512F: %0 = COPY %zmm0 +# AVX512F-NEXT: %1 = COPY %rdi +# AVX512F-NEXT: VMOVUPSZmr %1, 1, _, 0, _, %0 :: (store 64 into %ir.p1, align 1) +# AVX512F-NEXT: RET 0 +body: | + bb.1 (%ir-block.0): + liveins: %rdi, %zmm0 + + %0(<16 x s32>) = COPY %zmm0 + %1(p0) = COPY %rdi + G_STORE %0(<16 x s32>), %1(p0) :: (store 64 into %ir.p1, align 1) + RET 0 + +... +--- +name: test_store_v16i32_align +# AVX512F-LABEL: name: test_store_v16i32_align +alignment: 4 +legalized: true +regBankSelected: true +# AVX512F: registers: +# AVX512F-NEXT: - { id: 0, class: vr512 } +# AVX512F-NEXT: - { id: 1, class: gr64 } +registers: + - { id: 0, class: vecr } + - { id: 1, class: gpr } +# AVX512F: %0 = COPY %zmm0 +# AVX512F-NEXT: %1 = COPY %rdi +# AVX512F-NEXT: VMOVUPSZmr %1, 1, _, 0, _, %0 :: (store 64 into %ir.p1, align 32) +# AVX512F-NEXT: RET 0 +body: | + bb.1 (%ir-block.0): + liveins: %rdi, %zmm0 + + %0(<16 x s32>) = COPY %zmm0 + %1(p0) = COPY %rdi + G_STORE %0(<16 x s32>), %1(p0) :: (store 64 into %ir.p1, align 32) + RET 0 + +...