From 5212b768f5a1d4ca84cc7d06a3753224bc75ba36 Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Thu, 28 Mar 2019 20:44:50 +0000 Subject: [PATCH] [InterleavedAccessPass] Don't increase the number of bytes loaded. Even if the interleaving transform would otherwise be legal, we shouldn't introduce an interleaved load that is wider than the original load: it might have undefined behavior. It might be possible to perform some sort of mask-narrowing transform in some cases (using a narrower interleaved load, then extending the results using shufflevectors). But I haven't tried to implement that, at least for now. Fixes https://bugs.llvm.org/show_bug.cgi?id=41245 . Differential Revision: https://reviews.llvm.org/D59954 llvm-svn: 357212 --- lib/CodeGen/InterleavedAccessPass.cpp | 12 ++++++--- .../ARM/interleaved-accesses.ll | 25 ++++++++++++++----- 2 files changed, 28 insertions(+), 9 deletions(-) diff --git a/lib/CodeGen/InterleavedAccessPass.cpp b/lib/CodeGen/InterleavedAccessPass.cpp index 2f8012aeeef..14bc560a561 100644 --- a/lib/CodeGen/InterleavedAccessPass.cpp +++ b/lib/CodeGen/InterleavedAccessPass.cpp @@ -163,14 +163,19 @@ static bool isDeInterleaveMaskOfFactor(ArrayRef Mask, unsigned Factor, /// <0, 2, 4, 6> (mask of index 0 to extract even elements) /// <1, 3, 5, 7> (mask of index 1 to extract odd elements) static bool isDeInterleaveMask(ArrayRef Mask, unsigned &Factor, - unsigned &Index, unsigned MaxFactor) { + unsigned &Index, unsigned MaxFactor, + unsigned NumLoadElements) { if (Mask.size() < 2) return false; // Check potential Factors. - for (Factor = 2; Factor <= MaxFactor; Factor++) + for (Factor = 2; Factor <= MaxFactor; Factor++) { + // Make sure we don't produce a load wider than the input load. + if (Mask.size() * Factor > NumLoadElements) + return false; if (isDeInterleaveMaskOfFactor(Mask, Factor, Index)) return true; + } return false; } @@ -302,9 +307,10 @@ bool InterleavedAccess::lowerInterleavedLoad( unsigned Factor, Index; + unsigned NumLoadElements = LI->getType()->getVectorNumElements(); // Check if the first shufflevector is DE-interleave shuffle. if (!isDeInterleaveMask(Shuffles[0]->getShuffleMask(), Factor, Index, - MaxFactor)) + MaxFactor, NumLoadElements)) return false; // Holds the corresponding index for each DE-interleave shuffle. diff --git a/test/Transforms/InterleavedAccess/ARM/interleaved-accesses.ll b/test/Transforms/InterleavedAccess/ARM/interleaved-accesses.ll index 715c9413a81..fdf6e1feda2 100644 --- a/test/Transforms/InterleavedAccess/ARM/interleaved-accesses.ll +++ b/test/Transforms/InterleavedAccess/ARM/interleaved-accesses.ll @@ -352,9 +352,9 @@ define void @store_undef_mask_factor4(<16 x i32>* %ptr, <4 x i32> %v0, <4 x i32> ret void } -define void @load_address_space(<4 x i32> addrspace(1)* %ptr) { +define void @load_address_space(<8 x i32> addrspace(1)* %ptr) { ; NEON-LABEL: @load_address_space( -; NEON-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> addrspace(1)* %ptr to i8 addrspace(1)* +; NEON-NEXT: [[TMP1:%.*]] = bitcast <8 x i32> addrspace(1)* %ptr to i8 addrspace(1)* ; NEON-NEXT: [[VLDN:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3.v2i32.p1i8(i8 addrspace(1)* [[TMP1]], i32 0) ; NEON-NEXT: [[TMP2:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLDN]], 2 ; NEON-NEXT: [[TMP3:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLDN]], 1 @@ -364,10 +364,10 @@ define void @load_address_space(<4 x i32> addrspace(1)* %ptr) { ; NO_NEON-NOT: @llvm.arm.neon ; NO_NEON: ret void ; - %interleaved.vec = load <4 x i32>, <4 x i32> addrspace(1)* %ptr - %v0 = shufflevector <4 x i32> %interleaved.vec, <4 x i32> undef, <2 x i32> - %v1 = shufflevector <4 x i32> %interleaved.vec, <4 x i32> undef, <2 x i32> - %v2 = shufflevector <4 x i32> %interleaved.vec, <4 x i32> undef, <2 x i32> + %interleaved.vec = load <8 x i32>, <8 x i32> addrspace(1)* %ptr + %v0 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> undef, <2 x i32> + %v1 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> undef, <2 x i32> + %v2 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> undef, <2 x i32> ret void } @@ -883,3 +883,16 @@ define void @load_factor2_wide_pointer(<16 x i32*>* %ptr) { %v1 = shufflevector <16 x i32*> %interleaved.vec, <16 x i32*> undef, <8 x i32> ret void } + +; This would be a candidate for interleaving, except that load doesn't +; actually load enough elements to satisfy the shuffle masks. (It would be +; possible to produce a vld2.v2i32, but that currently isn't implemented.) +define void @load_out_of_range(<4 x i32>* %ptr) { +; ALL-LABEL: @load_out_of_range( +; ALL-NOT: @llvm.arm.neon +; ALL: ret void + %interleaved.vec = load <4 x i32>, <4 x i32>* %ptr, align 4 + %v0 = shufflevector <4 x i32> %interleaved.vec, <4 x i32> undef, <4 x i32> + %v1 = shufflevector <4 x i32> %interleaved.vec, <4 x i32> undef, <4 x i32> + ret void +}