mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 09:13:34 +00:00
[Loop Vectorizer] Fixed memory confilict checks.
Fixed a bug in run-time checks for possible memory conflicts inside loop. The bug is in Low <-> High boundaries calculation. The High boundary should be calculated as "last memory access pointer + element size". Differential revision: https://reviews.llvm.org/D23176 llvm-svn: 279930
This commit is contained in:
parent
0f68f0b2ab
commit
eed0bb3d71
@ -335,9 +335,11 @@ public:
|
||||
struct PointerInfo {
|
||||
/// Holds the pointer value that we need to check.
|
||||
TrackingVH<Value> PointerValue;
|
||||
/// Holds the pointer value at the beginning of the loop.
|
||||
/// Holds the smallest byte address accessed by the pointer throughout all
|
||||
/// iterations of the loop.
|
||||
const SCEV *Start;
|
||||
/// Holds the pointer value at the end of the loop.
|
||||
/// Holds the largest byte address accessed by the pointer throughout all
|
||||
/// iterations of the loop, plus 1.
|
||||
const SCEV *End;
|
||||
/// Holds the information if this pointer is used for writing to memory.
|
||||
bool IsWritePtr;
|
||||
|
@ -149,6 +149,19 @@ const SCEV *llvm::replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,
|
||||
return OrigSCEV;
|
||||
}
|
||||
|
||||
/// Calculate Start and End points of memory access.
|
||||
/// Let's assume A is the first access and B is a memory access on N-th loop
|
||||
/// iteration. Then B is calculated as:
|
||||
/// B = A + Step*N .
|
||||
/// Step value may be positive or negative.
|
||||
/// N is a calculated back-edge taken count:
|
||||
/// N = (TripCount > 0) ? RoundDown(TripCount -1 , VF) : 0
|
||||
/// Start and End points are calculated in the following way:
|
||||
/// Start = UMIN(A, B) ; End = UMAX(A, B) + SizeOfElt,
|
||||
/// where SizeOfElt is the size of single memory access in bytes.
|
||||
///
|
||||
/// There is no conflict when the intervals are disjoint:
|
||||
/// NoConflict = (P2.Start >= P1.End) || (P1.Start >= P2.End)
|
||||
void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr,
|
||||
unsigned DepSetId, unsigned ASId,
|
||||
const ValueToValueMap &Strides,
|
||||
@ -177,12 +190,17 @@ void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr,
|
||||
if (CStep->getValue()->isNegative())
|
||||
std::swap(ScStart, ScEnd);
|
||||
} else {
|
||||
// Fallback case: the step is not constant, but the we can still
|
||||
// Fallback case: the step is not constant, but we can still
|
||||
// get the upper and lower bounds of the interval by using min/max
|
||||
// expressions.
|
||||
ScStart = SE->getUMinExpr(ScStart, ScEnd);
|
||||
ScEnd = SE->getUMaxExpr(AR->getStart(), ScEnd);
|
||||
}
|
||||
// Add the size of the pointed element to ScEnd.
|
||||
unsigned EltSize =
|
||||
Ptr->getType()->getPointerElementType()->getScalarSizeInBits() / 8;
|
||||
const SCEV *EltSizeSCEV = SE->getConstant(ScEnd->getType(), EltSize);
|
||||
ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV);
|
||||
}
|
||||
|
||||
Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, Sc);
|
||||
@ -1870,9 +1888,17 @@ std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeChecks(
|
||||
Value *End0 = ChkBuilder.CreateBitCast(A.End, PtrArithTy1, "bc");
|
||||
Value *End1 = ChkBuilder.CreateBitCast(B.End, PtrArithTy0, "bc");
|
||||
|
||||
Value *Cmp0 = ChkBuilder.CreateICmpULE(Start0, End1, "bound0");
|
||||
// [A|B].Start points to the first accessed byte under base [A|B].
|
||||
// [A|B].End points to the last accessed byte, plus one.
|
||||
// There is no conflict when the intervals are disjoint:
|
||||
// NoConflict = (B.Start >= A.End) || (A.Start >= B.End)
|
||||
//
|
||||
// bound0 = (B.Start < A.End)
|
||||
// bound1 = (A.Start < B.End)
|
||||
// IsConflict = bound0 & bound1
|
||||
Value *Cmp0 = ChkBuilder.CreateICmpULT(Start0, End1, "bound0");
|
||||
FirstInst = getFirstInst(FirstInst, Cmp0, Loc);
|
||||
Value *Cmp1 = ChkBuilder.CreateICmpULE(Start1, End0, "bound1");
|
||||
Value *Cmp1 = ChkBuilder.CreateICmpULT(Start1, End0, "bound1");
|
||||
FirstInst = getFirstInst(FirstInst, Cmp1, Loc);
|
||||
Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict");
|
||||
FirstInst = getFirstInst(FirstInst, IsConflict, Loc);
|
||||
|
@ -0,0 +1,51 @@
|
||||
; RUN: opt -analyze --loop-accesses %s | FileCheck %s
|
||||
|
||||
; This test verifies run-time boundary check of memory accesses.
|
||||
; The original loop:
|
||||
; void fastCopy(const char* src, char* op) {
|
||||
; int len = 32;
|
||||
; while (len > 0) {
|
||||
; *(reinterpret_cast<long long*>(op)) = *(reinterpret_cast<const long long*>(src));
|
||||
; src += 8;
|
||||
; op += 8;
|
||||
; len -= 8;
|
||||
; }
|
||||
; }
|
||||
; Boundaries calculations before this patch:
|
||||
; (Low: %src High: (24 + %src))
|
||||
; and the actual distance between two pointers was 31, (%op - %src = 31)
|
||||
; IsConflict = (24 > 31) = false -> execution is directed to the vectorized loop.
|
||||
; The loop was vectorized to 4, 32 byte memory access ( <4 x i64> ),
|
||||
; store a value at *%op touched memory under *%src.
|
||||
|
||||
;CHECK: Printing analysis 'Loop Access Analysis' for function 'fastCopy'
|
||||
;CHECK: (Low: %op High: (32 + %op))
|
||||
;CHECK: (Low: %src High: (32 + %src))
|
||||
|
||||
define void @fastCopy(i8* nocapture readonly %src, i8* nocapture %op) {
|
||||
entry:
|
||||
br label %while.body.preheader
|
||||
|
||||
while.body.preheader: ; preds = %entry
|
||||
br label %while.body
|
||||
|
||||
while.body: ; preds = %while.body.preheader, %while.body
|
||||
%len.addr.07 = phi i32 [ %sub, %while.body ], [ 32, %while.body.preheader ]
|
||||
%op.addr.06 = phi i8* [ %add.ptr1, %while.body ], [ %op, %while.body.preheader ]
|
||||
%src.addr.05 = phi i8* [ %add.ptr, %while.body ], [ %src, %while.body.preheader ]
|
||||
%0 = bitcast i8* %src.addr.05 to i64*
|
||||
%1 = load i64, i64* %0, align 8
|
||||
%2 = bitcast i8* %op.addr.06 to i64*
|
||||
store i64 %1, i64* %2, align 8
|
||||
%add.ptr = getelementptr inbounds i8, i8* %src.addr.05, i64 8
|
||||
%add.ptr1 = getelementptr inbounds i8, i8* %op.addr.06, i64 8
|
||||
%sub = add nsw i32 %len.addr.07, -8
|
||||
%cmp = icmp sgt i32 %len.addr.07, 8
|
||||
br i1 %cmp, label %while.body, label %while.end.loopexit
|
||||
|
||||
while.end.loopexit: ; preds = %while.body
|
||||
br label %while.end
|
||||
|
||||
while.end: ; preds = %while.end.loopexit, %entry
|
||||
ret void
|
||||
}
|
@ -96,15 +96,15 @@ for.end: ; preds = %for.body
|
||||
; CHECK-NEXT: %arrayidxB = getelementptr inbounds i16, i16* %b, i64 %ind
|
||||
; CHECK-NEXT: Grouped accesses:
|
||||
; CHECK-NEXT: Group {{.*}}[[ZERO]]:
|
||||
; CHECK-NEXT: (Low: %c High: (78 + %c))
|
||||
; CHECK-NEXT: (Low: %c High: (80 + %c))
|
||||
; CHECK-NEXT: Member: {(2 + %c)<nsw>,+,4}
|
||||
; CHECK-NEXT: Member: {%c,+,4}
|
||||
; CHECK-NEXT: Group {{.*}}[[ONE]]:
|
||||
; CHECK-NEXT: (Low: %a High: (40 + %a))
|
||||
; CHECK-NEXT: (Low: %a High: (42 + %a))
|
||||
; CHECK-NEXT: Member: {(2 + %a)<nsw>,+,2}
|
||||
; CHECK-NEXT: Member: {%a,+,2}
|
||||
; CHECK-NEXT: Group {{.*}}[[TWO]]:
|
||||
; CHECK-NEXT: (Low: %b High: (38 + %b))
|
||||
; CHECK-NEXT: (Low: %b High: (40 + %b))
|
||||
; CHECK-NEXT: Member: {%b,+,2}
|
||||
|
||||
define void @testg(i16* %a,
|
||||
@ -168,15 +168,15 @@ for.end: ; preds = %for.body
|
||||
; CHECK-NEXT: %arrayidxB = getelementptr i16, i16* %b, i64 %ind
|
||||
; CHECK-NEXT: Grouped accesses:
|
||||
; CHECK-NEXT: Group {{.*}}[[ZERO]]:
|
||||
; CHECK-NEXT: (Low: %c High: (78 + %c))
|
||||
; CHECK-NEXT: (Low: %c High: (80 + %c))
|
||||
; CHECK-NEXT: Member: {(2 + %c)<nsw>,+,4}
|
||||
; CHECK-NEXT: Member: {%c,+,4}
|
||||
; CHECK-NEXT: Group {{.*}}[[ONE]]:
|
||||
; CHECK-NEXT: (Low: %a High: (40 + %a))
|
||||
; CHECK-NEXT: (Low: %a High: (42 + %a))
|
||||
; CHECK-NEXT: Member: {(2 + %a),+,2}
|
||||
; CHECK-NEXT: Member: {%a,+,2}
|
||||
; CHECK-NEXT: Group {{.*}}[[TWO]]:
|
||||
; CHECK-NEXT: (Low: %b High: (38 + %b))
|
||||
; CHECK-NEXT: (Low: %b High: (40 + %b))
|
||||
; CHECK-NEXT: Member: {%b,+,2}
|
||||
|
||||
define void @testh(i16* %a,
|
||||
@ -247,13 +247,13 @@ for.end: ; preds = %for.body
|
||||
; CHECK-NEXT: %arrayidxA2 = getelementptr i16, i16* %a, i64 %ind2
|
||||
; CHECK-NEXT: Grouped accesses:
|
||||
; CHECK-NEXT: Group {{.*}}[[ZERO]]:
|
||||
; CHECK-NEXT: (Low: ((2 * %offset) + %a)<nsw> High: (9998 + (2 * %offset) + %a))
|
||||
; CHECK-NEXT: (Low: ((2 * %offset) + %a)<nsw> High: (10000 + (2 * %offset) + %a))
|
||||
; CHECK-NEXT: Member: {((2 * %offset) + %a)<nsw>,+,2}<nsw><%for.body>
|
||||
; CHECK-NEXT: Group {{.*}}[[ONE]]:
|
||||
; CHECK-NEXT: (Low: %a High: (9998 + %a))
|
||||
; CHECK-NEXT: (Low: %a High: (10000 + %a))
|
||||
; CHECK-NEXT: Member: {%a,+,2}<%for.body>
|
||||
; CHECK-NEXT: Group {{.*}}[[TWO]]:
|
||||
; CHECK-NEXT: (Low: (20000 + %a) High: (29998 + %a))
|
||||
; CHECK-NEXT: (Low: (20000 + %a) High: (30000 + %a))
|
||||
; CHECK-NEXT: Member: {(20000 + %a),+,2}<%for.body>
|
||||
|
||||
define void @testi(i16* %a,
|
||||
|
@ -16,7 +16,7 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
|
||||
target triple = "aarch64--linux-gnueabi"
|
||||
|
||||
; CHECK: function 'f':
|
||||
; CHECK: (Low: (20000 + %a) High: (60000 + %a)<nsw>)
|
||||
; CHECK: (Low: (20000 + %a) High: (60004 + %a))
|
||||
|
||||
@B = common global i32* null, align 8
|
||||
@A = common global i32* null, align 8
|
||||
@ -59,7 +59,7 @@ for.end: ; preds = %for.body
|
||||
; Here it is not obvious what the limits are, since 'step' could be negative.
|
||||
|
||||
; CHECK: Low: (-1 + (-1 * ((-60001 + (-1 * %a)) umax (-60001 + (40000 * %step) + (-1 * %a)))))
|
||||
; CHECK: High: ((60000 + %a)<nsw> umax (60000 + (-40000 * %step) + %a))
|
||||
; CHECK: High: (4 + ((60000 + %a)<nsw> umax (60000 + (-40000 * %step) + %a)))
|
||||
|
||||
define void @g(i64 %step) {
|
||||
entry:
|
||||
|
@ -8,10 +8,10 @@ target triple = "x86_64-apple-macosx10.8.0"
|
||||
;CHECK: br
|
||||
;CHECK: getelementptr
|
||||
;CHECK-DAG: getelementptr
|
||||
;CHECK-DAG: icmp uge
|
||||
;CHECK-DAG: icmp uge
|
||||
;CHECK-DAG: icmp uge
|
||||
;CHECK-DAG: icmp uge
|
||||
;CHECK-DAG: icmp ugt
|
||||
;CHECK-DAG: icmp ugt
|
||||
;CHECK-DAG: icmp ugt
|
||||
;CHECK-DAG: icmp ugt
|
||||
;CHECK-DAG: and
|
||||
;CHECK-DAG: and
|
||||
;CHECK: br
|
||||
|
@ -36,7 +36,7 @@ for.end: ; preds = %for.body
|
||||
; CHECK: ret i32 0
|
||||
|
||||
; CHECK-NOTBAA-LABEL: @test1
|
||||
; CHECK-NOTBAA: icmp uge i32*
|
||||
; CHECK-NOTBAA: icmp ugt i32*
|
||||
|
||||
; CHECK-NOTBAA: load <4 x float>, <4 x float>* %{{.*}}, align 4, !tbaa
|
||||
; CHECK-NOTBAA: store <4 x i32> %{{.*}}, <4 x i32>* %{{.*}}, align 4, !tbaa
|
||||
@ -70,8 +70,8 @@ for.end: ; preds = %for.body
|
||||
; required. Without TBAA, however, two checks are required.
|
||||
|
||||
; CHECK-LABEL: @test2
|
||||
; CHECK: icmp uge float*
|
||||
; CHECK: icmp uge float*
|
||||
; CHECK: icmp ugt float*
|
||||
; CHECK: icmp ugt float*
|
||||
; CHECK-NOT: icmp uge i32*
|
||||
|
||||
; CHECK: load <4 x float>, <4 x float>* %{{.*}}, align 4, !tbaa
|
||||
@ -80,10 +80,10 @@ for.end: ; preds = %for.body
|
||||
; CHECK: ret i32 0
|
||||
|
||||
; CHECK-NOTBAA-LABEL: @test2
|
||||
; CHECK-NOTBAA: icmp uge float*
|
||||
; CHECK-NOTBAA: icmp uge float*
|
||||
; CHECK-NOTBAA-DAG: icmp uge float*
|
||||
; CHECK-NOTBAA-DAG: icmp uge i32*
|
||||
; CHECK-NOTBAA: icmp ugt float*
|
||||
; CHECK-NOTBAA: icmp ugt float*
|
||||
; CHECK-NOTBAA-DAG: icmp ugt float*
|
||||
; CHECK-NOTBAA-DAG: icmp ugt i32*
|
||||
|
||||
; CHECK-NOTBAA: load <4 x float>, <4 x float>* %{{.*}}, align 4, !tbaa
|
||||
; CHECK-NOTBAA: store <4 x float> %{{.*}}, <4 x float>* %{{.*}}, align 4, !tbaa
|
||||
|
@ -8,15 +8,15 @@
|
||||
; CHECK-NEXT: Loop Versioning found to be beneficial
|
||||
;
|
||||
; CHECK: for.body3:
|
||||
; CHECK-NEXT: %add86 = phi i32 [ %arrayidx7.promoted, %for.body3.ph ], [ %add8, %for.body3 ]
|
||||
; CHECK-NEXT: %[[induction:.*]] = phi i32 [ %arrayidx7.promoted, %for.body3.ph ], [ %add8, %for.body3 ]
|
||||
; CHECK-NEXT: %j.113 = phi i32 [ %j.016, %for.body3.ph ], [ %inc, %for.body3 ]
|
||||
; CHECK-NEXT: %idxprom = zext i32 %j.113 to i64
|
||||
; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, i32* %var1, i64 %idxprom
|
||||
; CHECK-NEXT: store i32 %add, i32* %arrayidx, align 4, !alias.scope !2, !noalias !2
|
||||
; CHECK-NEXT: %add8 = add nsw i32 %add86, %add
|
||||
; CHECK-NEXT: %add8 = add nsw i32 %[[induction]], %add
|
||||
; CHECK-NEXT: %inc = add nuw i32 %j.113, 1
|
||||
; CHECK-NEXT: %cmp2 = icmp ult i32 %inc, %itr
|
||||
; CHECK-NEXT: br i1 %cmp2, label %for.body3, label %for.inc11.loopexit.loopexit5, !llvm.loop !5
|
||||
; CHECK-NEXT: br i1 %cmp2, label %for.body3, label %for.inc11.loopexit.loopexit6, !llvm.loop !5
|
||||
define i32 @foo(i32* nocapture %var1, i32* nocapture readnone %var2, i32* nocapture %var3, i32 %itr) #0 {
|
||||
entry:
|
||||
%cmp14 = icmp eq i32 %itr, 0
|
||||
|
Loading…
x
Reference in New Issue
Block a user