mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-02-14 23:10:54 +00:00
[IR] Enable load/store/alloca for arrays of scalable vectors.
Differential Revision: https://reviews.llvm.org/D158517
This commit is contained in:
parent
02d27eac0f
commit
c7d65e4466
@ -742,16 +742,16 @@ an optional list of attached :ref:`metadata <metadata>`.
|
||||
Variables and aliases can have a
|
||||
:ref:`Thread Local Storage Model <tls_model>`.
|
||||
|
||||
:ref:`Scalable vectors <t_vector>` cannot be global variables or members of
|
||||
arrays because their size is unknown at compile time. They are allowed in
|
||||
structs to facilitate intrinsics returning multiple values. Generally, structs
|
||||
containing scalable vectors are not considered "sized" and cannot be used in
|
||||
loads, stores, allocas, or GEPs. The only exception to this rule is for structs
|
||||
that contain scalable vectors of the same type (e.g. ``{<vscale x 2 x i32>,
|
||||
<vscale x 2 x i32>}`` contains the same type while ``{<vscale x 2 x i32>,
|
||||
<vscale x 2 x i64>}`` doesn't). These kinds of structs (we may call them
|
||||
homogeneous scalable vector structs) are considered sized and can be used in
|
||||
loads, stores, allocas, but not GEPs.
|
||||
Globals cannot be or contain :ref:`Scalable vectors <t_vector>` because their
|
||||
size is unknown at compile time. They are allowed in structs to facilitate
|
||||
intrinsics returning multiple values. Generally, structs containing scalable
|
||||
vectors are not considered "sized" and cannot be used in loads, stores, allocas,
|
||||
or GEPs. The only exception to this rule is for structs that contain scalable
|
||||
vectors of the same type (e.g. ``{<vscale x 2 x i32>, <vscale x 2 x i32>}``
|
||||
contains the same type while ``{<vscale x 2 x i32>, <vscale x 2 x i64>}``
|
||||
doesn't). These kinds of structs (we may call them homogeneous scalable vector
|
||||
structs) are considered sized and can be used in loads, stores, allocas, but
|
||||
not GEPs.
|
||||
|
||||
Syntax::
|
||||
|
||||
|
@ -209,8 +209,7 @@ public:
|
||||
/// Return true if this is a target extension type with a scalable layout.
|
||||
bool isScalableTargetExtTy() const;
|
||||
|
||||
/// Return true if this is a scalable vector type or a target extension type
|
||||
/// with a scalable layout.
|
||||
/// Return true if this is a type whose size is a known multiple of vscale.
|
||||
bool isScalableTy() const;
|
||||
|
||||
/// Return true if this is a FP type or a vector of FP.
|
||||
|
@ -4934,7 +4934,7 @@ static Value *simplifyGEPInst(Type *SrcTy, Value *Ptr,
|
||||
return UndefValue::get(GEPTy);
|
||||
|
||||
bool IsScalableVec =
|
||||
isa<ScalableVectorType>(SrcTy) || any_of(Indices, [](const Value *V) {
|
||||
SrcTy->isScalableTy() || any_of(Indices, [](const Value *V) {
|
||||
return isa<ScalableVectorType>(V->getType());
|
||||
});
|
||||
|
||||
|
@ -127,9 +127,7 @@ bool GEPOperator::accumulateConstantOffset(
|
||||
auto end = generic_gep_type_iterator<decltype(Index.end())>::end(Index.end());
|
||||
for (auto GTI = begin, GTE = end; GTI != GTE; ++GTI) {
|
||||
// Scalable vectors are multiplied by a runtime constant.
|
||||
bool ScalableType = false;
|
||||
if (isa<ScalableVectorType>(GTI.getIndexedType()))
|
||||
ScalableType = true;
|
||||
bool ScalableType = GTI.getIndexedType()->isScalableTy();
|
||||
|
||||
Value *V = GTI.getOperand();
|
||||
StructType *STy = GTI.getStructTypeOrNull();
|
||||
@ -189,7 +187,7 @@ bool GEPOperator::collectOffset(
|
||||
for (gep_type_iterator GTI = gep_type_begin(this), GTE = gep_type_end(this);
|
||||
GTI != GTE; ++GTI) {
|
||||
// Scalable vectors are multiplied by a runtime constant.
|
||||
bool ScalableType = isa<ScalableVectorType>(GTI.getIndexedType());
|
||||
bool ScalableType = GTI.getIndexedType()->isScalableTy();
|
||||
|
||||
Value *V = GTI.getOperand();
|
||||
StructType *STy = GTI.getStructTypeOrNull();
|
||||
|
@ -58,6 +58,8 @@ bool Type::isIntegerTy(unsigned Bitwidth) const {
|
||||
}
|
||||
|
||||
bool Type::isScalableTy() const {
|
||||
if (const auto *ATy = dyn_cast<ArrayType>(this))
|
||||
return ATy->getElementType()->isScalableTy();
|
||||
if (const auto *STy = dyn_cast<StructType>(this)) {
|
||||
SmallPtrSet<Type *, 4> Visited;
|
||||
return STy->containsScalableVectorType(&Visited);
|
||||
@ -658,8 +660,7 @@ ArrayType *ArrayType::get(Type *ElementType, uint64_t NumElements) {
|
||||
bool ArrayType::isValidElementType(Type *ElemTy) {
|
||||
return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() &&
|
||||
!ElemTy->isMetadataTy() && !ElemTy->isFunctionTy() &&
|
||||
!ElemTy->isTokenTy() && !ElemTy->isX86_AMXTy() &&
|
||||
!isa<ScalableVectorType>(ElemTy);
|
||||
!ElemTy->isTokenTy() && !ElemTy->isX86_AMXTy();
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -850,17 +850,9 @@ void Verifier::visitGlobalVariable(const GlobalVariable &GV) {
|
||||
}
|
||||
|
||||
// Scalable vectors cannot be global variables, since we don't know
|
||||
// the runtime size. If the global is an array containing scalable vectors,
|
||||
// that will be caught by the isValidElementType methods in StructType or
|
||||
// ArrayType instead.
|
||||
Check(!isa<ScalableVectorType>(GV.getValueType()),
|
||||
"Globals cannot contain scalable vectors", &GV);
|
||||
|
||||
if (auto *STy = dyn_cast<StructType>(GV.getValueType())) {
|
||||
SmallPtrSet<Type *, 4> Visited;
|
||||
Check(!STy->containsScalableVectorType(&Visited),
|
||||
"Globals cannot contain scalable vectors", &GV);
|
||||
}
|
||||
// the runtime size.
|
||||
Check(!GV.getValueType()->isScalableTy(),
|
||||
"Globals cannot contain scalable types", &GV);
|
||||
|
||||
// Check if it's a target extension type that disallows being used as a
|
||||
// global.
|
||||
|
@ -390,7 +390,7 @@ static bool collectSRATypes(DenseMap<uint64_t, GlobalPart> &Parts,
|
||||
}
|
||||
|
||||
// Scalable types not currently supported.
|
||||
if (isa<ScalableVectorType>(Ty))
|
||||
if (Ty->isScalableTy())
|
||||
return false;
|
||||
|
||||
auto IsStored = [](Value *V, Constant *Initializer) {
|
||||
|
@ -804,7 +804,7 @@ static Instruction *unpackLoadToAggregate(InstCombinerImpl &IC, LoadInst &LI) {
|
||||
return nullptr;
|
||||
|
||||
const DataLayout &DL = IC.getDataLayout();
|
||||
auto EltSize = DL.getTypeAllocSize(ET);
|
||||
TypeSize EltSize = DL.getTypeAllocSize(ET);
|
||||
const auto Align = LI.getAlign();
|
||||
|
||||
auto *Addr = LI.getPointerOperand();
|
||||
@ -812,7 +812,7 @@ static Instruction *unpackLoadToAggregate(InstCombinerImpl &IC, LoadInst &LI) {
|
||||
auto *Zero = ConstantInt::get(IdxType, 0);
|
||||
|
||||
Value *V = PoisonValue::get(T);
|
||||
uint64_t Offset = 0;
|
||||
TypeSize Offset = TypeSize::get(0, ET->isScalableTy());
|
||||
for (uint64_t i = 0; i < NumElements; i++) {
|
||||
Value *Indices[2] = {
|
||||
Zero,
|
||||
@ -820,9 +820,9 @@ static Instruction *unpackLoadToAggregate(InstCombinerImpl &IC, LoadInst &LI) {
|
||||
};
|
||||
auto *Ptr = IC.Builder.CreateInBoundsGEP(AT, Addr, ArrayRef(Indices),
|
||||
Name + ".elt");
|
||||
auto EltAlign = commonAlignment(Align, Offset.getKnownMinValue());
|
||||
auto *L = IC.Builder.CreateAlignedLoad(AT->getElementType(), Ptr,
|
||||
commonAlignment(Align, Offset),
|
||||
Name + ".unpack");
|
||||
EltAlign, Name + ".unpack");
|
||||
L->setAAMetadata(LI.getAAMetadata());
|
||||
V = IC.Builder.CreateInsertValue(V, L, i);
|
||||
Offset += EltSize;
|
||||
@ -957,7 +957,7 @@ static bool canReplaceGEPIdxWithZero(InstCombinerImpl &IC,
|
||||
Type *SourceElementType = GEPI->getSourceElementType();
|
||||
// Size information about scalable vectors is not available, so we cannot
|
||||
// deduce whether indexing at n is undefined behaviour or not. Bail out.
|
||||
if (isa<ScalableVectorType>(SourceElementType))
|
||||
if (SourceElementType->isScalableTy())
|
||||
return false;
|
||||
|
||||
Type *AllocTy = GetElementPtrInst::getIndexedType(SourceElementType, Ops);
|
||||
@ -1323,7 +1323,7 @@ static bool unpackStoreToAggregate(InstCombinerImpl &IC, StoreInst &SI) {
|
||||
return false;
|
||||
|
||||
const DataLayout &DL = IC.getDataLayout();
|
||||
auto EltSize = DL.getTypeAllocSize(AT->getElementType());
|
||||
TypeSize EltSize = DL.getTypeAllocSize(AT->getElementType());
|
||||
const auto Align = SI.getAlign();
|
||||
|
||||
SmallString<16> EltName = V->getName();
|
||||
@ -1335,7 +1335,7 @@ static bool unpackStoreToAggregate(InstCombinerImpl &IC, StoreInst &SI) {
|
||||
auto *IdxType = Type::getInt64Ty(T->getContext());
|
||||
auto *Zero = ConstantInt::get(IdxType, 0);
|
||||
|
||||
uint64_t Offset = 0;
|
||||
TypeSize Offset = TypeSize::get(0, AT->getElementType()->isScalableTy());
|
||||
for (uint64_t i = 0; i < NumElements; i++) {
|
||||
Value *Indices[2] = {
|
||||
Zero,
|
||||
@ -1344,7 +1344,7 @@ static bool unpackStoreToAggregate(InstCombinerImpl &IC, StoreInst &SI) {
|
||||
auto *Ptr =
|
||||
IC.Builder.CreateInBoundsGEP(AT, Addr, ArrayRef(Indices), AddrName);
|
||||
auto *Val = IC.Builder.CreateExtractValue(V, i, EltName);
|
||||
auto EltAlign = commonAlignment(Align, Offset);
|
||||
auto EltAlign = commonAlignment(Align, Offset.getKnownMinValue());
|
||||
Instruction *NS = IC.Builder.CreateAlignedStore(Val, Ptr, EltAlign);
|
||||
NS->setAAMetadata(SI.getAAMetadata());
|
||||
Offset += EltSize;
|
||||
|
@ -2005,7 +2005,7 @@ Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP,
|
||||
APInt Offset(DL.getIndexTypeSizeInBits(PtrTy), 0);
|
||||
if (NumVarIndices != Src->getNumIndices()) {
|
||||
// FIXME: getIndexedOffsetInType() does not handled scalable vectors.
|
||||
if (isa<ScalableVectorType>(BaseType))
|
||||
if (BaseType->isScalableTy())
|
||||
return nullptr;
|
||||
|
||||
SmallVector<Value *> ConstantIndices;
|
||||
@ -2118,7 +2118,7 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
|
||||
SmallVector<Value *, 8> Indices(GEP.indices());
|
||||
Type *GEPType = GEP.getType();
|
||||
Type *GEPEltType = GEP.getSourceElementType();
|
||||
bool IsGEPSrcEleScalable = isa<ScalableVectorType>(GEPEltType);
|
||||
bool IsGEPSrcEleScalable = GEPEltType->isScalableTy();
|
||||
if (Value *V = simplifyGEPInst(GEPEltType, PtrOp, Indices, GEP.isInBounds(),
|
||||
SQ.getWithInstruction(&GEP)))
|
||||
return replaceInstUsesWith(GEP, V);
|
||||
|
@ -830,7 +830,7 @@ SeparateConstOffsetFromGEP::accumulateByteOffset(GetElementPtrInst *GEP,
|
||||
for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
|
||||
if (GTI.isSequential()) {
|
||||
// Constant offsets of scalable types are not really constant.
|
||||
if (isa<ScalableVectorType>(GTI.getIndexedType()))
|
||||
if (GTI.getIndexedType()->isScalableTy())
|
||||
continue;
|
||||
|
||||
// Tries to extract a constant offset from this GEP index.
|
||||
@ -1019,7 +1019,7 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
|
||||
for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
|
||||
if (GTI.isSequential()) {
|
||||
// Constant offsets of scalable types are not really constant.
|
||||
if (isa<ScalableVectorType>(GTI.getIndexedType()))
|
||||
if (GTI.getIndexedType()->isScalableTy())
|
||||
continue;
|
||||
|
||||
// Splits this GEP index into a variadic part and a constant offset, and
|
||||
|
106
llvm/test/CodeGen/AArch64/alloca-load-store-scalable-array.ll
Normal file
106
llvm/test/CodeGen/AArch64/alloca-load-store-scalable-array.ll
Normal file
@ -0,0 +1,106 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
|
||||
; RUN: llc < %s | FileCheck %s
|
||||
|
||||
target triple = "aarch64-unknown-linux-gnu"
|
||||
|
||||
%my_subtype = type <vscale x 2 x double>
|
||||
%my_type = type [3 x %my_subtype]
|
||||
|
||||
define void @array_1D(ptr %addr) #0 {
|
||||
; CHECK-LABEL: array_1D:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-3
|
||||
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG
|
||||
; CHECK-NEXT: .cfi_offset w29, -16
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, #1, mul vl]
|
||||
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0, #2, mul vl]
|
||||
; CHECK-NEXT: ld1d { z2.d }, p0/z, [x0]
|
||||
; CHECK-NEXT: st1d { z2.d }, p0, [sp]
|
||||
; CHECK-NEXT: st1d { z1.d }, p0, [sp, #2, mul vl]
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: addvl sp, sp, #3
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%ret = alloca %my_type, align 8
|
||||
%val = load %my_type, ptr %addr
|
||||
store %my_type %val, ptr %ret, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
define %my_subtype @array_1D_extract(ptr %addr) #0 {
|
||||
; CHECK-LABEL: array_1D_extract:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-3
|
||||
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG
|
||||
; CHECK-NEXT: .cfi_offset w29, -16
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, #1, mul vl]
|
||||
; CHECK-NEXT: addvl sp, sp, #3
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%ret = alloca %my_type, align 8
|
||||
%val = load %my_type, ptr %addr
|
||||
%elt = extractvalue %my_type %val, 1
|
||||
ret %my_subtype %elt
|
||||
}
|
||||
|
||||
define void @array_1D_insert(ptr %addr, %my_subtype %elt) #0 {
|
||||
; CHECK-LABEL: array_1D_insert:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-3
|
||||
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG
|
||||
; CHECK-NEXT: .cfi_offset w29, -16
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0]
|
||||
; CHECK-NEXT: ld1d { z2.d }, p0/z, [x0, #2, mul vl]
|
||||
; CHECK-NEXT: st1d { z2.d }, p0, [sp, #2, mul vl]
|
||||
; CHECK-NEXT: st1d { z1.d }, p0, [sp]
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: addvl sp, sp, #3
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%ret = alloca %my_type, align 8
|
||||
%val = load %my_type, ptr %addr
|
||||
%ins = insertvalue %my_type %val, %my_subtype %elt, 1
|
||||
store %my_type %ins, ptr %ret, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @array_2D(ptr %addr) #0 {
|
||||
; CHECK-LABEL: array_2D:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-6
|
||||
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x30, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 48 * VG
|
||||
; CHECK-NEXT: .cfi_offset w29, -16
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, #1, mul vl]
|
||||
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0, #2, mul vl]
|
||||
; CHECK-NEXT: ld1d { z2.d }, p0/z, [x0, #3, mul vl]
|
||||
; CHECK-NEXT: ld1d { z3.d }, p0/z, [x0, #4, mul vl]
|
||||
; CHECK-NEXT: ld1d { z4.d }, p0/z, [x0, #5, mul vl]
|
||||
; CHECK-NEXT: ld1d { z5.d }, p0/z, [x0]
|
||||
; CHECK-NEXT: st1d { z5.d }, p0, [sp]
|
||||
; CHECK-NEXT: st1d { z4.d }, p0, [sp, #5, mul vl]
|
||||
; CHECK-NEXT: st1d { z3.d }, p0, [sp, #4, mul vl]
|
||||
; CHECK-NEXT: st1d { z2.d }, p0, [sp, #3, mul vl]
|
||||
; CHECK-NEXT: st1d { z1.d }, p0, [sp, #2, mul vl]
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: addvl sp, sp, #6
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%ret = alloca [2 x %my_type], align 8
|
||||
%val = load [2 x %my_type], ptr %addr
|
||||
store [2 x %my_type] %val, ptr %ret, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "target-features"="+sve" }
|
@ -0,0 +1,40 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
|
||||
; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs --riscv-no-aliases < %s | FileCheck %s
|
||||
|
||||
target triple = "riscv64-unknown-unknown-elf"
|
||||
|
||||
%my_type = type [3 x <vscale x 1 x double>]
|
||||
|
||||
define void @test(ptr %addr) {
|
||||
; CHECK-LABEL: test:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: addi sp, sp, -16
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
||||
; CHECK-NEXT: csrrs a1, vlenb, zero
|
||||
; CHECK-NEXT: slli a1, a1, 2
|
||||
; CHECK-NEXT: sub sp, sp, a1
|
||||
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
|
||||
; CHECK-NEXT: csrrs a1, vlenb, zero
|
||||
; CHECK-NEXT: add a2, a0, a1
|
||||
; CHECK-NEXT: vl1re64.v v8, (a2)
|
||||
; CHECK-NEXT: slli a2, a1, 1
|
||||
; CHECK-NEXT: vl1re64.v v9, (a0)
|
||||
; CHECK-NEXT: add a0, a0, a2
|
||||
; CHECK-NEXT: vl1re64.v v10, (a0)
|
||||
; CHECK-NEXT: addi a0, sp, 16
|
||||
; CHECK-NEXT: vs1r.v v9, (a0)
|
||||
; CHECK-NEXT: add a2, a0, a2
|
||||
; CHECK-NEXT: vs1r.v v10, (a2)
|
||||
; CHECK-NEXT: add a0, a0, a1
|
||||
; CHECK-NEXT: vs1r.v v8, (a0)
|
||||
; CHECK-NEXT: csrrs a0, vlenb, zero
|
||||
; CHECK-NEXT: slli a0, a0, 2
|
||||
; CHECK-NEXT: add sp, sp, a0
|
||||
; CHECK-NEXT: addi sp, sp, 16
|
||||
; CHECK-NEXT: jalr zero, 0(ra)
|
||||
entry:
|
||||
%ret = alloca %my_type, align 8
|
||||
%val = load %my_type, ptr %addr
|
||||
store %my_type %val, ptr %ret, align 8
|
||||
ret void
|
||||
}
|
@ -1,8 +0,0 @@
|
||||
; RUN: not opt -S -passes=verify < %s 2>&1 | FileCheck %s
|
||||
|
||||
;; Arrays cannot contain scalable vectors; make sure we detect them even
|
||||
;; when nested inside other aggregates.
|
||||
|
||||
%ty = type { i64, [4 x <vscale x 256 x i1>] }
|
||||
; CHECK: error: invalid array element type
|
||||
; CHECK: %ty = type { i64, [4 x <vscale x 256 x i1>] }
|
@ -52,6 +52,12 @@ define void @gep_cse_offset_canonicalization(ptr %p, i64 %idx, i64 %idx2) {
|
||||
; CHECK-NEXT: call void @use(ptr [[GEP5]])
|
||||
; CHECK-NEXT: call void @use(ptr [[GEP5_SAME]])
|
||||
; CHECK-NEXT: call void @use(ptr [[GEP5_DIFFERENT]])
|
||||
; CHECK-NEXT: [[GEP6:%.*]] = getelementptr [4 x <vscale x 4 x i32>], ptr [[P]], i64 [[IDX]], i64 1
|
||||
; CHECK-NEXT: [[GEP6_SAME:%.*]] = getelementptr [4 x <vscale x 4 x float>], ptr [[P]], i64 [[IDX]], i64 1
|
||||
; CHECK-NEXT: [[GEP6_DIFFERENT:%.*]] = getelementptr [4 x <vscale x 4 x float>], ptr [[P]], i64 [[IDX2]], i64 1
|
||||
; CHECK-NEXT: call void @use(ptr [[GEP6]])
|
||||
; CHECK-NEXT: call void @use(ptr [[GEP6_SAME]])
|
||||
; CHECK-NEXT: call void @use(ptr [[GEP6_DIFFERENT]])
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%gep1 = getelementptr i64, ptr %p, i64 1
|
||||
@ -89,6 +95,12 @@ define void @gep_cse_offset_canonicalization(ptr %p, i64 %idx, i64 %idx2) {
|
||||
call void @use(ptr %gep5)
|
||||
call void @use(ptr %gep5.same)
|
||||
call void @use(ptr %gep5.different)
|
||||
%gep6 = getelementptr [4 x <vscale x 4 x i32>], ptr %p, i64 %idx, i64 1
|
||||
%gep6.same = getelementptr [4 x <vscale x 4 x float>], ptr %p, i64 %idx, i64 1
|
||||
%gep6.different = getelementptr [4 x <vscale x 4 x float>], ptr %p, i64 %idx2, i64 1
|
||||
call void @use(ptr %gep6)
|
||||
call void @use(ptr %gep6.same)
|
||||
call void @use(ptr %gep6.different)
|
||||
ret void
|
||||
}
|
||||
|
||||
|
@ -0,0 +1,15 @@
|
||||
; RUN: opt -passes=globalopt < %s
|
||||
|
||||
; Ensure we don't ICE by trying to optimize a scalable vector load of a global
|
||||
; variable.
|
||||
|
||||
%struct.xxx = type <{ [96 x i8] }>
|
||||
|
||||
@.bss = internal unnamed_addr global %struct.xxx zeroinitializer, align 32
|
||||
|
||||
define dso_local void @foo() local_unnamed_addr align 16 {
|
||||
L.entry:
|
||||
store [4 x <vscale x 2 x double>] zeroinitializer, ptr @.bss, align 1
|
||||
%0 = load [4 x <vscale x 2 x double>], ptr @.bss, align 8
|
||||
unreachable
|
||||
}
|
@ -18,3 +18,10 @@ define void @can_replace_gep_idx_with_zero_typesize(i64 %n, ptr %a, i64 %b) {
|
||||
call void @do_something(<vscale x 4 x i32> %tmp)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @can_replace_gep_idx_with_zero_typesize_2(i64 %n, ptr %a, i64 %b) {
|
||||
%idx = getelementptr [2 x <vscale x 4 x i32>], ptr %a, i64 %b, i64 0
|
||||
%tmp = load <vscale x 4 x i32>, ptr %idx
|
||||
call void @do_something(<vscale x 4 x i32> %tmp)
|
||||
ret void
|
||||
}
|
||||
|
@ -298,6 +298,17 @@ define ptr @geps_combinable_scalable(ptr %a, i64 %idx) {
|
||||
ret ptr %a3
|
||||
}
|
||||
|
||||
define ptr @geps_combinable_scalable_vector_array(ptr %a, i64 %idx) {
|
||||
; CHECK-LABEL: @geps_combinable_scalable_vector_array(
|
||||
; CHECK-NEXT: [[A2:%.*]] = getelementptr inbounds [4 x <vscale x 2 x i32>], ptr [[A:%.*]], i64 1
|
||||
; CHECK-NEXT: [[A3:%.*]] = getelementptr inbounds i8, ptr [[A2]], i64 4
|
||||
; CHECK-NEXT: ret ptr [[A3]]
|
||||
;
|
||||
%a2 = getelementptr inbounds [4 x <vscale x 2 x i32>], ptr %a, i64 1
|
||||
%a3 = getelementptr inbounds i8, ptr %a2, i32 4
|
||||
ret ptr %a3
|
||||
}
|
||||
|
||||
define i1 @compare_geps_same_indices(ptr %a, ptr %b, i64 %idx) {
|
||||
; CHECK-LABEL: @compare_geps_same_indices(
|
||||
; CHECK-NEXT: [[C:%.*]] = icmp eq ptr [[A:%.*]], [[B:%.*]]
|
||||
|
28
llvm/test/Transforms/InstCombine/scalable-vector-array.ll
Normal file
28
llvm/test/Transforms/InstCombine/scalable-vector-array.ll
Normal file
@ -0,0 +1,28 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
|
||||
; RUN: opt -passes=instcombine -S < %s | FileCheck %s
|
||||
|
||||
define <vscale x 4 x i32> @load(ptr %x) {
|
||||
; CHECK-LABEL: define <vscale x 4 x i32> @load
|
||||
; CHECK-SAME: (ptr [[X:%.*]]) {
|
||||
; CHECK-NEXT: [[A_ELT1:%.*]] = getelementptr inbounds [2 x <vscale x 4 x i32>], ptr [[X]], i64 0, i64 1
|
||||
; CHECK-NEXT: [[A_UNPACK2:%.*]] = load <vscale x 4 x i32>, ptr [[A_ELT1]], align 16
|
||||
; CHECK-NEXT: ret <vscale x 4 x i32> [[A_UNPACK2]]
|
||||
;
|
||||
%a = load [2 x <vscale x 4 x i32>], ptr %x
|
||||
%b = extractvalue [2 x <vscale x 4 x i32>] %a, 1
|
||||
ret <vscale x 4 x i32> %b
|
||||
}
|
||||
|
||||
define void @store(ptr %x, <vscale x 4 x i32> %y, <vscale x 4 x i32> %z) {
|
||||
; CHECK-LABEL: define void @store
|
||||
; CHECK-SAME: (ptr [[X:%.*]], <vscale x 4 x i32> [[Y:%.*]], <vscale x 4 x i32> [[Z:%.*]]) {
|
||||
; CHECK-NEXT: store <vscale x 4 x i32> [[Y]], ptr [[X]], align 16
|
||||
; CHECK-NEXT: [[X_REPACK1:%.*]] = getelementptr inbounds [2 x <vscale x 4 x i32>], ptr [[X]], i64 0, i64 1
|
||||
; CHECK-NEXT: store <vscale x 4 x i32> [[Z]], ptr [[X_REPACK1]], align 16
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%a = insertvalue [2 x <vscale x 4 x i32>] poison, <vscale x 4 x i32> %y, 0
|
||||
%b = insertvalue [2 x <vscale x 4 x i32>] %a, <vscale x 4 x i32> %z, 1
|
||||
store [2 x <vscale x 4 x i32>] %b, ptr %x
|
||||
ret void
|
||||
}
|
@ -358,3 +358,12 @@ define <8 x ptr> @gep_vector_index_op3_poison_constant_index_afterwards(ptr %ptr
|
||||
%res = getelementptr inbounds %t.3, ptr %ptr, i64 0, i32 1, <8 x i64> poison, i32 1
|
||||
ret <8 x ptr> %res
|
||||
}
|
||||
|
||||
define i64 @gep_array_of_scalable_vectors_ptrdiff(ptr %ptr) {
|
||||
%c1 = getelementptr inbounds [8 x <vscale x 4 x i32>], ptr %ptr, i64 4
|
||||
%c2 = getelementptr inbounds [8 x <vscale x 4 x i32>], ptr %ptr, i64 6
|
||||
%c1.int = ptrtoint ptr %c1 to i64
|
||||
%c2.int = ptrtoint ptr %c2 to i64
|
||||
%diff = sub i64 %c2.int, %c1.int
|
||||
ret i64 %diff
|
||||
}
|
||||
|
20
llvm/test/Transforms/SROA/scalable-vector-array.ll
Normal file
20
llvm/test/Transforms/SROA/scalable-vector-array.ll
Normal file
@ -0,0 +1,20 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
|
||||
; RUN: opt < %s -passes='sroa<preserve-cfg>' -S | FileCheck %s
|
||||
; RUN: opt < %s -passes='sroa<modify-cfg>' -S | FileCheck %s
|
||||
|
||||
; This test checks that SROA runs mem2reg on arrays of scalable vectors.
|
||||
|
||||
define [ 2 x <vscale x 4 x i32> ] @alloca(<vscale x 4 x i32> %x, <vscale x 4 x i32> %y) {
|
||||
; CHECK-LABEL: define [2 x <vscale x 4 x i32>] @alloca
|
||||
; CHECK-SAME: (<vscale x 4 x i32> [[X:%.*]], <vscale x 4 x i32> [[Y:%.*]]) {
|
||||
; CHECK-NEXT: [[AGG0:%.*]] = insertvalue [2 x <vscale x 4 x i32>] poison, <vscale x 4 x i32> [[X]], 0
|
||||
; CHECK-NEXT: [[AGG1:%.*]] = insertvalue [2 x <vscale x 4 x i32>] [[AGG0]], <vscale x 4 x i32> [[Y]], 1
|
||||
; CHECK-NEXT: ret [2 x <vscale x 4 x i32>] [[AGG1]]
|
||||
;
|
||||
%addr = alloca [ 2 x <vscale x 4 x i32> ], align 4
|
||||
%agg0 = insertvalue [ 2 x <vscale x 4 x i32> ] poison, <vscale x 4 x i32> %x, 0
|
||||
%agg1 = insertvalue [ 2 x <vscale x 4 x i32> ] %agg0, <vscale x 4 x i32> %y, 1
|
||||
store [ 2 x <vscale x 4 x i32> ] %agg1, ptr %addr, align 4
|
||||
%val = load [ 2 x <vscale x 4 x i32> ], ptr %addr, align 4
|
||||
ret [ 2 x <vscale x 4 x i32> ] %val
|
||||
}
|
@ -28,4 +28,41 @@ define ptr @test2(ptr %base, i64 %idx) {
|
||||
ret ptr %gep
|
||||
}
|
||||
|
||||
; Index is implicitly multiplied by vscale and so not really constant.
|
||||
define ptr @test3(ptr %base, i64 %idx) #0 {
|
||||
; CHECK-LABEL: @test3(
|
||||
; CHECK-NEXT: [[IDX_NEXT:%.*]] = add nuw nsw i64 [[IDX:%.*]], 1
|
||||
; CHECK-NEXT: [[GEP:%.*]] = getelementptr [8 x <vscale x 4 x float>], ptr [[BASE:%.*]], i64 [[IDX_NEXT]]
|
||||
; CHECK-NEXT: ret ptr [[GEP]]
|
||||
;
|
||||
%idx.next = add nuw nsw i64 %idx, 1
|
||||
%gep = getelementptr [8 x <vscale x 4 x float>], ptr %base, i64 %idx.next
|
||||
ret ptr %gep
|
||||
}
|
||||
|
||||
; Indices are implicitly multiplied by vscale and so not really constant.
|
||||
define ptr @test4(ptr %base, i64 %idx) {
|
||||
; CHECK-LABEL: @test4(
|
||||
; CHECK-NEXT: [[IDX_NEXT:%.*]] = add nuw nsw i64 [[IDX:%.*]], 1
|
||||
; CHECK-NEXT: [[GEP:%.*]] = getelementptr [8 x <vscale x 4 x float>], ptr [[BASE:%.*]], i64 3, i64 [[IDX_NEXT]]
|
||||
; CHECK-NEXT: ret ptr [[GEP]]
|
||||
;
|
||||
%idx.next = add nuw nsw i64 %idx, 1
|
||||
%gep = getelementptr [8 x <vscale x 4 x float>], ptr %base, i64 3, i64 %idx.next
|
||||
ret ptr %gep
|
||||
}
|
||||
|
||||
; Whilst the first two indices are not constant, the calculation of the third
|
||||
; index does contain a constant that can be extracted.
|
||||
define ptr @test5(ptr %base, i64 %idx) {
|
||||
; CHECK-LABEL: @test5(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [8 x <vscale x 4 x float>], ptr [[BASE:%.*]], i64 1, i64 3, i64 [[IDX:%.*]]
|
||||
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr float, ptr [[TMP1]], i64 1
|
||||
; CHECK-NEXT: ret ptr [[GEP2]]
|
||||
;
|
||||
%idx.next = add nuw nsw i64 %idx, 1
|
||||
%gep = getelementptr [8 x <vscale x 4 x float>], ptr %base, i64 1, i64 3, i64 %idx.next
|
||||
ret ptr %gep
|
||||
}
|
||||
|
||||
attributes #0 = { "target-features"="+sve" }
|
||||
|
@ -3,14 +3,15 @@
|
||||
;; Global variables cannot be scalable vectors, since we don't
|
||||
;; know the size at compile time.
|
||||
|
||||
; CHECK: Globals cannot contain scalable vectors
|
||||
; CHECK: Globals cannot contain scalable types
|
||||
; CHECK-NEXT: ptr @ScalableVecGlobal
|
||||
@ScalableVecGlobal = global <vscale x 4 x i32> zeroinitializer
|
||||
|
||||
; CHECK-NEXT: Globals cannot contain scalable vectors
|
||||
; CHECK-NEXT: Globals cannot contain scalable types
|
||||
; CHECK-NEXT: ptr @ScalableVecArrayGlobal
|
||||
@ScalableVecArrayGlobal = global [ 8 x <vscale x 4 x i32> ] zeroinitializer
|
||||
|
||||
; CHECK-NEXT: Globals cannot contain scalable types
|
||||
; CHECK-NEXT: ptr @ScalableVecStructGlobal
|
||||
@ScalableVecStructGlobal = global { i32, <vscale x 4 x i32> } zeroinitializer
|
||||
|
||||
;; Global _pointers_ to scalable vectors are fine
|
||||
; CHECK-NOT: Globals cannot contain scalable vectors
|
||||
@ScalableVecPtr = global ptr zeroinitializer
|
||||
|
Loading…
x
Reference in New Issue
Block a user