[IR] Enable load/store/alloca for arrays of scalable vectors.

Differential Revision: https://reviews.llvm.org/D158517
This commit is contained in:
Paul Walker 2023-08-11 14:39:17 +01:00
parent 02d27eac0f
commit c7d65e4466
22 changed files with 324 additions and 56 deletions

View File

@ -742,16 +742,16 @@ an optional list of attached :ref:`metadata <metadata>`.
Variables and aliases can have a
:ref:`Thread Local Storage Model <tls_model>`.
:ref:`Scalable vectors <t_vector>` cannot be global variables or members of
arrays because their size is unknown at compile time. They are allowed in
structs to facilitate intrinsics returning multiple values. Generally, structs
containing scalable vectors are not considered "sized" and cannot be used in
loads, stores, allocas, or GEPs. The only exception to this rule is for structs
that contain scalable vectors of the same type (e.g. ``{<vscale x 2 x i32>,
<vscale x 2 x i32>}`` contains the same type while ``{<vscale x 2 x i32>,
<vscale x 2 x i64>}`` doesn't). These kinds of structs (we may call them
homogeneous scalable vector structs) are considered sized and can be used in
loads, stores, allocas, but not GEPs.
Globals cannot be or contain :ref:`Scalable vectors <t_vector>` because their
size is unknown at compile time. They are allowed in structs to facilitate
intrinsics returning multiple values. Generally, structs containing scalable
vectors are not considered "sized" and cannot be used in loads, stores, allocas,
or GEPs. The only exception to this rule is for structs that contain scalable
vectors of the same type (e.g. ``{<vscale x 2 x i32>, <vscale x 2 x i32>}``
contains the same type while ``{<vscale x 2 x i32>, <vscale x 2 x i64>}``
doesn't). These kinds of structs (we may call them homogeneous scalable vector
structs) are considered sized and can be used in loads, stores, allocas, but
not GEPs.
Syntax::

View File

@ -209,8 +209,7 @@ public:
/// Return true if this is a target extension type with a scalable layout.
bool isScalableTargetExtTy() const;
/// Return true if this is a scalable vector type or a target extension type
/// with a scalable layout.
/// Return true if this is a type whose size is a known multiple of vscale.
bool isScalableTy() const;
/// Return true if this is a FP type or a vector of FP.

View File

@ -4934,7 +4934,7 @@ static Value *simplifyGEPInst(Type *SrcTy, Value *Ptr,
return UndefValue::get(GEPTy);
bool IsScalableVec =
isa<ScalableVectorType>(SrcTy) || any_of(Indices, [](const Value *V) {
SrcTy->isScalableTy() || any_of(Indices, [](const Value *V) {
return isa<ScalableVectorType>(V->getType());
});

View File

@ -127,9 +127,7 @@ bool GEPOperator::accumulateConstantOffset(
auto end = generic_gep_type_iterator<decltype(Index.end())>::end(Index.end());
for (auto GTI = begin, GTE = end; GTI != GTE; ++GTI) {
// Scalable vectors are multiplied by a runtime constant.
bool ScalableType = false;
if (isa<ScalableVectorType>(GTI.getIndexedType()))
ScalableType = true;
bool ScalableType = GTI.getIndexedType()->isScalableTy();
Value *V = GTI.getOperand();
StructType *STy = GTI.getStructTypeOrNull();
@ -189,7 +187,7 @@ bool GEPOperator::collectOffset(
for (gep_type_iterator GTI = gep_type_begin(this), GTE = gep_type_end(this);
GTI != GTE; ++GTI) {
// Scalable vectors are multiplied by a runtime constant.
bool ScalableType = isa<ScalableVectorType>(GTI.getIndexedType());
bool ScalableType = GTI.getIndexedType()->isScalableTy();
Value *V = GTI.getOperand();
StructType *STy = GTI.getStructTypeOrNull();

View File

@ -58,6 +58,8 @@ bool Type::isIntegerTy(unsigned Bitwidth) const {
}
bool Type::isScalableTy() const {
if (const auto *ATy = dyn_cast<ArrayType>(this))
return ATy->getElementType()->isScalableTy();
if (const auto *STy = dyn_cast<StructType>(this)) {
SmallPtrSet<Type *, 4> Visited;
return STy->containsScalableVectorType(&Visited);
@ -658,8 +660,7 @@ ArrayType *ArrayType::get(Type *ElementType, uint64_t NumElements) {
bool ArrayType::isValidElementType(Type *ElemTy) {
return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() &&
!ElemTy->isMetadataTy() && !ElemTy->isFunctionTy() &&
!ElemTy->isTokenTy() && !ElemTy->isX86_AMXTy() &&
!isa<ScalableVectorType>(ElemTy);
!ElemTy->isTokenTy() && !ElemTy->isX86_AMXTy();
}
//===----------------------------------------------------------------------===//

View File

@ -850,17 +850,9 @@ void Verifier::visitGlobalVariable(const GlobalVariable &GV) {
}
// Scalable vectors cannot be global variables, since we don't know
// the runtime size. If the global is an array containing scalable vectors,
// that will be caught by the isValidElementType methods in StructType or
// ArrayType instead.
Check(!isa<ScalableVectorType>(GV.getValueType()),
"Globals cannot contain scalable vectors", &GV);
if (auto *STy = dyn_cast<StructType>(GV.getValueType())) {
SmallPtrSet<Type *, 4> Visited;
Check(!STy->containsScalableVectorType(&Visited),
"Globals cannot contain scalable vectors", &GV);
}
// the runtime size.
Check(!GV.getValueType()->isScalableTy(),
"Globals cannot contain scalable types", &GV);
// Check if it's a target extension type that disallows being used as a
// global.

View File

@ -390,7 +390,7 @@ static bool collectSRATypes(DenseMap<uint64_t, GlobalPart> &Parts,
}
// Scalable types not currently supported.
if (isa<ScalableVectorType>(Ty))
if (Ty->isScalableTy())
return false;
auto IsStored = [](Value *V, Constant *Initializer) {

View File

@ -804,7 +804,7 @@ static Instruction *unpackLoadToAggregate(InstCombinerImpl &IC, LoadInst &LI) {
return nullptr;
const DataLayout &DL = IC.getDataLayout();
auto EltSize = DL.getTypeAllocSize(ET);
TypeSize EltSize = DL.getTypeAllocSize(ET);
const auto Align = LI.getAlign();
auto *Addr = LI.getPointerOperand();
@ -812,7 +812,7 @@ static Instruction *unpackLoadToAggregate(InstCombinerImpl &IC, LoadInst &LI) {
auto *Zero = ConstantInt::get(IdxType, 0);
Value *V = PoisonValue::get(T);
uint64_t Offset = 0;
TypeSize Offset = TypeSize::get(0, ET->isScalableTy());
for (uint64_t i = 0; i < NumElements; i++) {
Value *Indices[2] = {
Zero,
@ -820,9 +820,9 @@ static Instruction *unpackLoadToAggregate(InstCombinerImpl &IC, LoadInst &LI) {
};
auto *Ptr = IC.Builder.CreateInBoundsGEP(AT, Addr, ArrayRef(Indices),
Name + ".elt");
auto EltAlign = commonAlignment(Align, Offset.getKnownMinValue());
auto *L = IC.Builder.CreateAlignedLoad(AT->getElementType(), Ptr,
commonAlignment(Align, Offset),
Name + ".unpack");
EltAlign, Name + ".unpack");
L->setAAMetadata(LI.getAAMetadata());
V = IC.Builder.CreateInsertValue(V, L, i);
Offset += EltSize;
@ -957,7 +957,7 @@ static bool canReplaceGEPIdxWithZero(InstCombinerImpl &IC,
Type *SourceElementType = GEPI->getSourceElementType();
// Size information about scalable vectors is not available, so we cannot
// deduce whether indexing at n is undefined behaviour or not. Bail out.
if (isa<ScalableVectorType>(SourceElementType))
if (SourceElementType->isScalableTy())
return false;
Type *AllocTy = GetElementPtrInst::getIndexedType(SourceElementType, Ops);
@ -1323,7 +1323,7 @@ static bool unpackStoreToAggregate(InstCombinerImpl &IC, StoreInst &SI) {
return false;
const DataLayout &DL = IC.getDataLayout();
auto EltSize = DL.getTypeAllocSize(AT->getElementType());
TypeSize EltSize = DL.getTypeAllocSize(AT->getElementType());
const auto Align = SI.getAlign();
SmallString<16> EltName = V->getName();
@ -1335,7 +1335,7 @@ static bool unpackStoreToAggregate(InstCombinerImpl &IC, StoreInst &SI) {
auto *IdxType = Type::getInt64Ty(T->getContext());
auto *Zero = ConstantInt::get(IdxType, 0);
uint64_t Offset = 0;
TypeSize Offset = TypeSize::get(0, AT->getElementType()->isScalableTy());
for (uint64_t i = 0; i < NumElements; i++) {
Value *Indices[2] = {
Zero,
@ -1344,7 +1344,7 @@ static bool unpackStoreToAggregate(InstCombinerImpl &IC, StoreInst &SI) {
auto *Ptr =
IC.Builder.CreateInBoundsGEP(AT, Addr, ArrayRef(Indices), AddrName);
auto *Val = IC.Builder.CreateExtractValue(V, i, EltName);
auto EltAlign = commonAlignment(Align, Offset);
auto EltAlign = commonAlignment(Align, Offset.getKnownMinValue());
Instruction *NS = IC.Builder.CreateAlignedStore(Val, Ptr, EltAlign);
NS->setAAMetadata(SI.getAAMetadata());
Offset += EltSize;

View File

@ -2005,7 +2005,7 @@ Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP,
APInt Offset(DL.getIndexTypeSizeInBits(PtrTy), 0);
if (NumVarIndices != Src->getNumIndices()) {
// FIXME: getIndexedOffsetInType() does not handled scalable vectors.
if (isa<ScalableVectorType>(BaseType))
if (BaseType->isScalableTy())
return nullptr;
SmallVector<Value *> ConstantIndices;
@ -2118,7 +2118,7 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
SmallVector<Value *, 8> Indices(GEP.indices());
Type *GEPType = GEP.getType();
Type *GEPEltType = GEP.getSourceElementType();
bool IsGEPSrcEleScalable = isa<ScalableVectorType>(GEPEltType);
bool IsGEPSrcEleScalable = GEPEltType->isScalableTy();
if (Value *V = simplifyGEPInst(GEPEltType, PtrOp, Indices, GEP.isInBounds(),
SQ.getWithInstruction(&GEP)))
return replaceInstUsesWith(GEP, V);

View File

@ -830,7 +830,7 @@ SeparateConstOffsetFromGEP::accumulateByteOffset(GetElementPtrInst *GEP,
for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
if (GTI.isSequential()) {
// Constant offsets of scalable types are not really constant.
if (isa<ScalableVectorType>(GTI.getIndexedType()))
if (GTI.getIndexedType()->isScalableTy())
continue;
// Tries to extract a constant offset from this GEP index.
@ -1019,7 +1019,7 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
if (GTI.isSequential()) {
// Constant offsets of scalable types are not really constant.
if (isa<ScalableVectorType>(GTI.getIndexedType()))
if (GTI.getIndexedType()->isScalableTy())
continue;
// Splits this GEP index into a variadic part and a constant offset, and

View File

@ -0,0 +1,106 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc < %s | FileCheck %s
target triple = "aarch64-unknown-linux-gnu"
%my_subtype = type <vscale x 2 x double>
%my_type = type [3 x %my_subtype]
define void @array_1D(ptr %addr) #0 {
; CHECK-LABEL: array_1D:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-3
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, #1, mul vl]
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0, #2, mul vl]
; CHECK-NEXT: ld1d { z2.d }, p0/z, [x0]
; CHECK-NEXT: st1d { z2.d }, p0, [sp]
; CHECK-NEXT: st1d { z1.d }, p0, [sp, #2, mul vl]
; CHECK-NEXT: st1d { z0.d }, p0, [sp, #1, mul vl]
; CHECK-NEXT: addvl sp, sp, #3
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
entry:
%ret = alloca %my_type, align 8
%val = load %my_type, ptr %addr
store %my_type %val, ptr %ret, align 8
ret void
}
define %my_subtype @array_1D_extract(ptr %addr) #0 {
; CHECK-LABEL: array_1D_extract:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-3
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, #1, mul vl]
; CHECK-NEXT: addvl sp, sp, #3
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
entry:
%ret = alloca %my_type, align 8
%val = load %my_type, ptr %addr
%elt = extractvalue %my_type %val, 1
ret %my_subtype %elt
}
define void @array_1D_insert(ptr %addr, %my_subtype %elt) #0 {
; CHECK-LABEL: array_1D_insert:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-3
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0]
; CHECK-NEXT: ld1d { z2.d }, p0/z, [x0, #2, mul vl]
; CHECK-NEXT: st1d { z2.d }, p0, [sp, #2, mul vl]
; CHECK-NEXT: st1d { z1.d }, p0, [sp]
; CHECK-NEXT: st1d { z0.d }, p0, [sp, #1, mul vl]
; CHECK-NEXT: addvl sp, sp, #3
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
entry:
%ret = alloca %my_type, align 8
%val = load %my_type, ptr %addr
%ins = insertvalue %my_type %val, %my_subtype %elt, 1
store %my_type %ins, ptr %ret, align 8
ret void
}
define void @array_2D(ptr %addr) #0 {
; CHECK-LABEL: array_2D:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-6
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x30, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 48 * VG
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, #1, mul vl]
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0, #2, mul vl]
; CHECK-NEXT: ld1d { z2.d }, p0/z, [x0, #3, mul vl]
; CHECK-NEXT: ld1d { z3.d }, p0/z, [x0, #4, mul vl]
; CHECK-NEXT: ld1d { z4.d }, p0/z, [x0, #5, mul vl]
; CHECK-NEXT: ld1d { z5.d }, p0/z, [x0]
; CHECK-NEXT: st1d { z5.d }, p0, [sp]
; CHECK-NEXT: st1d { z4.d }, p0, [sp, #5, mul vl]
; CHECK-NEXT: st1d { z3.d }, p0, [sp, #4, mul vl]
; CHECK-NEXT: st1d { z2.d }, p0, [sp, #3, mul vl]
; CHECK-NEXT: st1d { z1.d }, p0, [sp, #2, mul vl]
; CHECK-NEXT: st1d { z0.d }, p0, [sp, #1, mul vl]
; CHECK-NEXT: addvl sp, sp, #6
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
entry:
%ret = alloca [2 x %my_type], align 8
%val = load [2 x %my_type], ptr %addr
store [2 x %my_type] %val, ptr %ret, align 8
ret void
}
attributes #0 = { "target-features"="+sve" }

View File

@ -0,0 +1,40 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs --riscv-no-aliases < %s | FileCheck %s
target triple = "riscv64-unknown-unknown-elf"
%my_type = type [3 x <vscale x 1 x double>]
define void @test(ptr %addr) {
; CHECK-LABEL: test:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrrs a1, vlenb, zero
; CHECK-NEXT: slli a1, a1, 2
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
; CHECK-NEXT: csrrs a1, vlenb, zero
; CHECK-NEXT: add a2, a0, a1
; CHECK-NEXT: vl1re64.v v8, (a2)
; CHECK-NEXT: slli a2, a1, 1
; CHECK-NEXT: vl1re64.v v9, (a0)
; CHECK-NEXT: add a0, a0, a2
; CHECK-NEXT: vl1re64.v v10, (a0)
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vs1r.v v9, (a0)
; CHECK-NEXT: add a2, a0, a2
; CHECK-NEXT: vs1r.v v10, (a2)
; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: vs1r.v v8, (a0)
; CHECK-NEXT: csrrs a0, vlenb, zero
; CHECK-NEXT: slli a0, a0, 2
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%ret = alloca %my_type, align 8
%val = load %my_type, ptr %addr
store %my_type %val, ptr %ret, align 8
ret void
}

View File

@ -1,8 +0,0 @@
; RUN: not opt -S -passes=verify < %s 2>&1 | FileCheck %s
;; Arrays cannot contain scalable vectors; make sure we detect them even
;; when nested inside other aggregates.
%ty = type { i64, [4 x <vscale x 256 x i1>] }
; CHECK: error: invalid array element type
; CHECK: %ty = type { i64, [4 x <vscale x 256 x i1>] }

View File

@ -52,6 +52,12 @@ define void @gep_cse_offset_canonicalization(ptr %p, i64 %idx, i64 %idx2) {
; CHECK-NEXT: call void @use(ptr [[GEP5]])
; CHECK-NEXT: call void @use(ptr [[GEP5_SAME]])
; CHECK-NEXT: call void @use(ptr [[GEP5_DIFFERENT]])
; CHECK-NEXT: [[GEP6:%.*]] = getelementptr [4 x <vscale x 4 x i32>], ptr [[P]], i64 [[IDX]], i64 1
; CHECK-NEXT: [[GEP6_SAME:%.*]] = getelementptr [4 x <vscale x 4 x float>], ptr [[P]], i64 [[IDX]], i64 1
; CHECK-NEXT: [[GEP6_DIFFERENT:%.*]] = getelementptr [4 x <vscale x 4 x float>], ptr [[P]], i64 [[IDX2]], i64 1
; CHECK-NEXT: call void @use(ptr [[GEP6]])
; CHECK-NEXT: call void @use(ptr [[GEP6_SAME]])
; CHECK-NEXT: call void @use(ptr [[GEP6_DIFFERENT]])
; CHECK-NEXT: ret void
;
%gep1 = getelementptr i64, ptr %p, i64 1
@ -89,6 +95,12 @@ define void @gep_cse_offset_canonicalization(ptr %p, i64 %idx, i64 %idx2) {
call void @use(ptr %gep5)
call void @use(ptr %gep5.same)
call void @use(ptr %gep5.different)
%gep6 = getelementptr [4 x <vscale x 4 x i32>], ptr %p, i64 %idx, i64 1
%gep6.same = getelementptr [4 x <vscale x 4 x float>], ptr %p, i64 %idx, i64 1
%gep6.different = getelementptr [4 x <vscale x 4 x float>], ptr %p, i64 %idx2, i64 1
call void @use(ptr %gep6)
call void @use(ptr %gep6.same)
call void @use(ptr %gep6.different)
ret void
}

View File

@ -0,0 +1,15 @@
; RUN: opt -passes=globalopt < %s
; Ensure we don't ICE by trying to optimize a scalable vector load of a global
; variable.
%struct.xxx = type <{ [96 x i8] }>
@.bss = internal unnamed_addr global %struct.xxx zeroinitializer, align 32
define dso_local void @foo() local_unnamed_addr align 16 {
L.entry:
store [4 x <vscale x 2 x double>] zeroinitializer, ptr @.bss, align 1
%0 = load [4 x <vscale x 2 x double>], ptr @.bss, align 8
unreachable
}

View File

@ -18,3 +18,10 @@ define void @can_replace_gep_idx_with_zero_typesize(i64 %n, ptr %a, i64 %b) {
call void @do_something(<vscale x 4 x i32> %tmp)
ret void
}
define void @can_replace_gep_idx_with_zero_typesize_2(i64 %n, ptr %a, i64 %b) {
%idx = getelementptr [2 x <vscale x 4 x i32>], ptr %a, i64 %b, i64 0
%tmp = load <vscale x 4 x i32>, ptr %idx
call void @do_something(<vscale x 4 x i32> %tmp)
ret void
}

View File

@ -298,6 +298,17 @@ define ptr @geps_combinable_scalable(ptr %a, i64 %idx) {
ret ptr %a3
}
define ptr @geps_combinable_scalable_vector_array(ptr %a, i64 %idx) {
; CHECK-LABEL: @geps_combinable_scalable_vector_array(
; CHECK-NEXT: [[A2:%.*]] = getelementptr inbounds [4 x <vscale x 2 x i32>], ptr [[A:%.*]], i64 1
; CHECK-NEXT: [[A3:%.*]] = getelementptr inbounds i8, ptr [[A2]], i64 4
; CHECK-NEXT: ret ptr [[A3]]
;
%a2 = getelementptr inbounds [4 x <vscale x 2 x i32>], ptr %a, i64 1
%a3 = getelementptr inbounds i8, ptr %a2, i32 4
ret ptr %a3
}
define i1 @compare_geps_same_indices(ptr %a, ptr %b, i64 %idx) {
; CHECK-LABEL: @compare_geps_same_indices(
; CHECK-NEXT: [[C:%.*]] = icmp eq ptr [[A:%.*]], [[B:%.*]]

View File

@ -0,0 +1,28 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
; RUN: opt -passes=instcombine -S < %s | FileCheck %s
define <vscale x 4 x i32> @load(ptr %x) {
; CHECK-LABEL: define <vscale x 4 x i32> @load
; CHECK-SAME: (ptr [[X:%.*]]) {
; CHECK-NEXT: [[A_ELT1:%.*]] = getelementptr inbounds [2 x <vscale x 4 x i32>], ptr [[X]], i64 0, i64 1
; CHECK-NEXT: [[A_UNPACK2:%.*]] = load <vscale x 4 x i32>, ptr [[A_ELT1]], align 16
; CHECK-NEXT: ret <vscale x 4 x i32> [[A_UNPACK2]]
;
%a = load [2 x <vscale x 4 x i32>], ptr %x
%b = extractvalue [2 x <vscale x 4 x i32>] %a, 1
ret <vscale x 4 x i32> %b
}
define void @store(ptr %x, <vscale x 4 x i32> %y, <vscale x 4 x i32> %z) {
; CHECK-LABEL: define void @store
; CHECK-SAME: (ptr [[X:%.*]], <vscale x 4 x i32> [[Y:%.*]], <vscale x 4 x i32> [[Z:%.*]]) {
; CHECK-NEXT: store <vscale x 4 x i32> [[Y]], ptr [[X]], align 16
; CHECK-NEXT: [[X_REPACK1:%.*]] = getelementptr inbounds [2 x <vscale x 4 x i32>], ptr [[X]], i64 0, i64 1
; CHECK-NEXT: store <vscale x 4 x i32> [[Z]], ptr [[X_REPACK1]], align 16
; CHECK-NEXT: ret void
;
%a = insertvalue [2 x <vscale x 4 x i32>] poison, <vscale x 4 x i32> %y, 0
%b = insertvalue [2 x <vscale x 4 x i32>] %a, <vscale x 4 x i32> %z, 1
store [2 x <vscale x 4 x i32>] %b, ptr %x
ret void
}

View File

@ -358,3 +358,12 @@ define <8 x ptr> @gep_vector_index_op3_poison_constant_index_afterwards(ptr %ptr
%res = getelementptr inbounds %t.3, ptr %ptr, i64 0, i32 1, <8 x i64> poison, i32 1
ret <8 x ptr> %res
}
define i64 @gep_array_of_scalable_vectors_ptrdiff(ptr %ptr) {
%c1 = getelementptr inbounds [8 x <vscale x 4 x i32>], ptr %ptr, i64 4
%c2 = getelementptr inbounds [8 x <vscale x 4 x i32>], ptr %ptr, i64 6
%c1.int = ptrtoint ptr %c1 to i64
%c2.int = ptrtoint ptr %c2 to i64
%diff = sub i64 %c2.int, %c1.int
ret i64 %diff
}

View File

@ -0,0 +1,20 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
; RUN: opt < %s -passes='sroa<preserve-cfg>' -S | FileCheck %s
; RUN: opt < %s -passes='sroa<modify-cfg>' -S | FileCheck %s
; This test checks that SROA runs mem2reg on arrays of scalable vectors.
define [ 2 x <vscale x 4 x i32> ] @alloca(<vscale x 4 x i32> %x, <vscale x 4 x i32> %y) {
; CHECK-LABEL: define [2 x <vscale x 4 x i32>] @alloca
; CHECK-SAME: (<vscale x 4 x i32> [[X:%.*]], <vscale x 4 x i32> [[Y:%.*]]) {
; CHECK-NEXT: [[AGG0:%.*]] = insertvalue [2 x <vscale x 4 x i32>] poison, <vscale x 4 x i32> [[X]], 0
; CHECK-NEXT: [[AGG1:%.*]] = insertvalue [2 x <vscale x 4 x i32>] [[AGG0]], <vscale x 4 x i32> [[Y]], 1
; CHECK-NEXT: ret [2 x <vscale x 4 x i32>] [[AGG1]]
;
%addr = alloca [ 2 x <vscale x 4 x i32> ], align 4
%agg0 = insertvalue [ 2 x <vscale x 4 x i32> ] poison, <vscale x 4 x i32> %x, 0
%agg1 = insertvalue [ 2 x <vscale x 4 x i32> ] %agg0, <vscale x 4 x i32> %y, 1
store [ 2 x <vscale x 4 x i32> ] %agg1, ptr %addr, align 4
%val = load [ 2 x <vscale x 4 x i32> ], ptr %addr, align 4
ret [ 2 x <vscale x 4 x i32> ] %val
}

View File

@ -28,4 +28,41 @@ define ptr @test2(ptr %base, i64 %idx) {
ret ptr %gep
}
; Index is implicitly multiplied by vscale and so not really constant.
define ptr @test3(ptr %base, i64 %idx) #0 {
; CHECK-LABEL: @test3(
; CHECK-NEXT: [[IDX_NEXT:%.*]] = add nuw nsw i64 [[IDX:%.*]], 1
; CHECK-NEXT: [[GEP:%.*]] = getelementptr [8 x <vscale x 4 x float>], ptr [[BASE:%.*]], i64 [[IDX_NEXT]]
; CHECK-NEXT: ret ptr [[GEP]]
;
%idx.next = add nuw nsw i64 %idx, 1
%gep = getelementptr [8 x <vscale x 4 x float>], ptr %base, i64 %idx.next
ret ptr %gep
}
; Indices are implicitly multiplied by vscale and so not really constant.
define ptr @test4(ptr %base, i64 %idx) {
; CHECK-LABEL: @test4(
; CHECK-NEXT: [[IDX_NEXT:%.*]] = add nuw nsw i64 [[IDX:%.*]], 1
; CHECK-NEXT: [[GEP:%.*]] = getelementptr [8 x <vscale x 4 x float>], ptr [[BASE:%.*]], i64 3, i64 [[IDX_NEXT]]
; CHECK-NEXT: ret ptr [[GEP]]
;
%idx.next = add nuw nsw i64 %idx, 1
%gep = getelementptr [8 x <vscale x 4 x float>], ptr %base, i64 3, i64 %idx.next
ret ptr %gep
}
; Whilst the first two indices are not constant, the calculation of the third
; index does contain a constant that can be extracted.
define ptr @test5(ptr %base, i64 %idx) {
; CHECK-LABEL: @test5(
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [8 x <vscale x 4 x float>], ptr [[BASE:%.*]], i64 1, i64 3, i64 [[IDX:%.*]]
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr float, ptr [[TMP1]], i64 1
; CHECK-NEXT: ret ptr [[GEP2]]
;
%idx.next = add nuw nsw i64 %idx, 1
%gep = getelementptr [8 x <vscale x 4 x float>], ptr %base, i64 1, i64 3, i64 %idx.next
ret ptr %gep
}
attributes #0 = { "target-features"="+sve" }

View File

@ -3,14 +3,15 @@
;; Global variables cannot be scalable vectors, since we don't
;; know the size at compile time.
; CHECK: Globals cannot contain scalable vectors
; CHECK: Globals cannot contain scalable types
; CHECK-NEXT: ptr @ScalableVecGlobal
@ScalableVecGlobal = global <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: Globals cannot contain scalable vectors
; CHECK-NEXT: Globals cannot contain scalable types
; CHECK-NEXT: ptr @ScalableVecArrayGlobal
@ScalableVecArrayGlobal = global [ 8 x <vscale x 4 x i32> ] zeroinitializer
; CHECK-NEXT: Globals cannot contain scalable types
; CHECK-NEXT: ptr @ScalableVecStructGlobal
@ScalableVecStructGlobal = global { i32, <vscale x 4 x i32> } zeroinitializer
;; Global _pointers_ to scalable vectors are fine
; CHECK-NOT: Globals cannot contain scalable vectors
@ScalableVecPtr = global ptr zeroinitializer