mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-02-16 16:02:19 +00:00
Improve vectorization diagnostic messages and extend vectorize(enable) pragma.
This patch changes the analysis diagnostics produced when loops with floating-point recurrences or memory operations are identified. The new messages say "cannot prove it is safe to reorder * operations; allow reordering by specifying #pragma clang loop vectorize(enable)". Depending on the type of diagnostic the message will include additional options such as ffast-math or __restrict__. This patch also allows the vectorize(enable) pragma to override the low pointer memory check threshold. When the hint is given a higher threshold is used. See the clang patch for the options produced for each diagnostic. llvm-svn: 246187
This commit is contained in:
parent
05845d31c9
commit
5eaa5a9d26
@ -214,6 +214,11 @@ static cl::opt<unsigned> MaxNestedScalarReductionIC(
|
||||
cl::desc("The maximum interleave count to use when interleaving a scalar "
|
||||
"reduction in a nested loop."));
|
||||
|
||||
static cl::opt<unsigned> PragmaVectorizeMemoryCheckThreshold(
|
||||
"pragma-vectorize-memory-check-threshold", cl::init(128), cl::Hidden,
|
||||
cl::desc("The maximum allowed number of runtime memory checks with a "
|
||||
"vectorize(enable) pragma."));
|
||||
|
||||
namespace {
|
||||
|
||||
// Forward declarations.
|
||||
@ -929,6 +934,15 @@ public:
|
||||
return DiagnosticInfo::AlwaysPrint;
|
||||
}
|
||||
|
||||
bool allowReordering() const {
|
||||
// When enabling loop hints are provided we allow the vectorizer to change
|
||||
// the order of operations that is given by the scalar loop. This is not
|
||||
// enabled by default because can be unsafe or inefficient. For example,
|
||||
// reordering floating-point operations will change the way round-off
|
||||
// error accumulates in the loop.
|
||||
return getForce() == LoopVectorizeHints::FK_Enabled || getWidth() > 1;
|
||||
}
|
||||
|
||||
private:
|
||||
/// Find hints specified in the loop metadata and update local values.
|
||||
void getHintsFromMetadata() {
|
||||
@ -1427,29 +1441,25 @@ public:
|
||||
bool doesNotMeet(Function *F, Loop *L, const LoopVectorizeHints &Hints) {
|
||||
const char *Name = Hints.vectorizeAnalysisPassName();
|
||||
bool Failed = false;
|
||||
if (UnsafeAlgebraInst &&
|
||||
Hints.getForce() == LoopVectorizeHints::FK_Undefined &&
|
||||
Hints.getWidth() == 0) {
|
||||
if (UnsafeAlgebraInst && !Hints.allowReordering()) {
|
||||
emitOptimizationRemarkAnalysisFPCommute(
|
||||
F->getContext(), Name, *F, UnsafeAlgebraInst->getDebugLoc(),
|
||||
VectorizationReport() << "vectorization requires changes in the "
|
||||
"order of operations, however IEEE 754 "
|
||||
"floating-point operations are not "
|
||||
"commutative");
|
||||
VectorizationReport() << "cannot prove it is safe to reorder "
|
||||
"floating-point operations");
|
||||
Failed = true;
|
||||
}
|
||||
|
||||
if (NumRuntimePointerChecks >
|
||||
VectorizerParams::RuntimeMemoryCheckThreshold) {
|
||||
// Test if runtime memcheck thresholds are exceeded.
|
||||
bool PragmaThresholdReached =
|
||||
NumRuntimePointerChecks > PragmaVectorizeMemoryCheckThreshold;
|
||||
bool ThresholdReached =
|
||||
NumRuntimePointerChecks > VectorizerParams::RuntimeMemoryCheckThreshold;
|
||||
if ((ThresholdReached && !Hints.allowReordering()) ||
|
||||
PragmaThresholdReached) {
|
||||
emitOptimizationRemarkAnalysisAliasing(
|
||||
F->getContext(), Name, *F, L->getStartLoc(),
|
||||
VectorizationReport()
|
||||
<< "cannot prove pointers refer to independent arrays in memory. "
|
||||
"The loop requires "
|
||||
<< NumRuntimePointerChecks
|
||||
<< " runtime independence checks to vectorize the loop, but that "
|
||||
"would exceed the limit of "
|
||||
<< VectorizerParams::RuntimeMemoryCheckThreshold << " checks");
|
||||
<< "cannot prove it is safe to reorder memory operations");
|
||||
DEBUG(dbgs() << "LV: Too many memory checks needed.\n");
|
||||
Failed = true;
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
; RUN: opt < %s -loop-vectorize -mtriple=x86_64-unknown-linux -S -pass-remarks='loop-vectorize' -pass-remarks-missed='loop-vectorize' -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck %s
|
||||
|
||||
; CHECK: remark: no_fpmath.c:6:11: loop not vectorized: vectorization requires changes in the order of operations, however IEEE 754 floating-point operations are not commutative
|
||||
; CHECK: remark: no_fpmath.c:6:11: loop not vectorized: cannot prove it is safe to reorder floating-point operations
|
||||
; CHECK: remark: no_fpmath.c:6:14: loop not vectorized:
|
||||
; CHECK: remark: no_fpmath.c:17:14: vectorized loop (vectorization width: 2, interleaved count: 2)
|
||||
|
||||
|
@ -1,17 +1,27 @@
|
||||
; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -pass-remarks=loop-vectorize -pass-remarks-missed=loop-vectorize -S 2>&1 | FileCheck %s
|
||||
; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -dce -instcombine -pass-remarks=loop-vectorize -pass-remarks-missed=loop-vectorize -S 2>&1 | FileCheck %s
|
||||
; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -dce -instcombine -pass-remarks=loop-vectorize -pass-remarks-missed=loop-vectorize -S 2>&1 | FileCheck %s -check-prefix=OVERRIDE
|
||||
; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -pragma-vectorize-memory-check-threshold=6 -dce -instcombine -pass-remarks=loop-vectorize -pass-remarks-missed=loop-vectorize -S 2>&1 | FileCheck %s
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-apple-macosx10.8.0"
|
||||
|
||||
; First loop produced diagnostic pass remark.
|
||||
;CHECK: remark: {{.*}}:0:0: vectorized loop (vectorization width: 4, interleaved count: 1)
|
||||
;CHECK: remark: {{.*}}:0:0: vectorized loop (vectorization width: {{[0-9]}}, interleaved count: 1)
|
||||
; Second loop produces diagnostic analysis remark.
|
||||
;CHECK: remark: {{.*}}:0:0: loop not vectorized: cannot prove pointers refer to independent arrays in memory. The loop requires 11 runtime independence checks to vectorize the loop, but that would exceed the limit of 8 checks
|
||||
;CHECK: remark: {{.*}}:0:0: loop not vectorized: cannot prove it is safe to reorder memory operations
|
||||
|
||||
; First loop produced diagnostic pass remark.
|
||||
;OVERRIDE: remark: {{.*}}:0:0: vectorized loop (vectorization width: {{[0-9]}}, interleaved count: 1)
|
||||
; Second loop produces diagnostic pass remark.
|
||||
;OVERRIDE: remark: {{.*}}:0:0: vectorized loop (vectorization width: {{[0-9]}}, interleaved count: 1)
|
||||
|
||||
; We are vectorizing with 6 runtime checks.
|
||||
;CHECK-LABEL: func1x6(
|
||||
;CHECK: <4 x i32>
|
||||
;CHECK: <{{[0-9]}} x i32>
|
||||
;CHECK: ret
|
||||
;OVERRIDE-LABEL: func1x6(
|
||||
;OVERRIDE: <4 x i32>
|
||||
;OVERRIDE: ret
|
||||
define i32 @func1x6(i32* nocapture %out, i32* nocapture %A, i32* nocapture %B, i32* nocapture %C, i32* nocapture %D, i32* nocapture %E, i32* nocapture %F) {
|
||||
entry:
|
||||
br label %for.body
|
||||
@ -44,8 +54,12 @@ for.end: ; preds = %for.body
|
||||
|
||||
; We are not vectorizing with 12 runtime checks.
|
||||
;CHECK-LABEL: func2x6(
|
||||
;CHECK-NOT: <4 x i32>
|
||||
;CHECK-NOT: <{{[0-9]}} x i32>
|
||||
;CHECK: ret
|
||||
; We vectorize with 12 checks if a vectorization hint is provided.
|
||||
;OVERRIDE-LABEL: func2x6(
|
||||
;OVERRIDE: <4 x i32>
|
||||
;OVERRIDE: ret
|
||||
define i32 @func2x6(i32* nocapture %out, i32* nocapture %out2, i32* nocapture %A, i32* nocapture %B, i32* nocapture %C, i32* nocapture %D, i32* nocapture %E, i32* nocapture %F) {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
Loading…
x
Reference in New Issue
Block a user