[ARM] ParallelDSP: added statistics, NFC.

Added statistics for the number of SMLAD instructions created, and
als renamed the pass name to -arm-parallel-dsp.

Differential Revision: https://reviews.llvm.org/D48971

llvm-svn: 336441
This commit is contained in:
Sjoerd Meijer 2018-07-06 14:47:09 +00:00
parent 56f28af5cd
commit 5aeee587fd
14 changed files with 25 additions and 20 deletions

@ -14,6 +14,7 @@
//
//===----------------------------------------------------------------------===//
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
@ -36,7 +37,9 @@
using namespace llvm;
using namespace PatternMatch;
#define DEBUG_TYPE "parallel-dsp"
#define DEBUG_TYPE "arm-parallel-dsp"
STATISTIC(NumSMLAD , "Number of smlad instructions generated");
namespace {
struct ParallelMAC;
@ -604,6 +607,7 @@ Instruction *ARMParallelDSP::CreateSMLADCall(LoadInst *VecLd0, LoadInst *VecLd1,
Value* Args[] = { VecLd0, VecLd1, Acc };
Function *SMLAD = Intrinsic::getDeclaration(M, Intrinsic::arm_smlad);
CallInst *Call = Builder.CreateCall(SMLAD, Args);
NumSMLAD++;
return Call;
}
@ -613,7 +617,7 @@ Pass *llvm::createARMParallelDSPPass() {
char ARMParallelDSP::ID = 0;
INITIALIZE_PASS_BEGIN(ARMParallelDSP, "parallel-dsp",
INITIALIZE_PASS_BEGIN(ARMParallelDSP, "arm-parallel-dsp",
"Transform loops to use DSP intrinsics", false, false)
INITIALIZE_PASS_END(ARMParallelDSP, "parallel-dsp",
INITIALIZE_PASS_END(ARMParallelDSP, "arm-parallel-dsp",
"Transform loops to use DSP intrinsics", false, false)

@ -1,10 +1,10 @@
; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s
; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s
;
; The Cortex-M0 does not support unaligned accesses:
; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m0 < %s -parallel-dsp -S | FileCheck %s --check-prefix=CHECK-UNSUPPORTED
; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m0 < %s -arm-parallel-dsp -S | FileCheck %s --check-prefix=CHECK-UNSUPPORTED
;
; Check DSP extension:
; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 -mattr=-dsp < %s -parallel-dsp -S | FileCheck %s --check-prefix=CHECK-UNSUPPORTED
; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 -mattr=-dsp < %s -arm-parallel-dsp -S | FileCheck %s --check-prefix=CHECK-UNSUPPORTED
;
; CHECK: %mac1{{\.}}026 = phi i32 [ [[V8:%[0-9]+]], %for.body ], [ 0, %for.body.preheader ]
; CHECK: [[V4:%[0-9]+]] = bitcast i16* %arrayidx3 to i32*

@ -1,4 +1,4 @@
; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s
; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s
; CHECK-LABEL: @test1
; CHECK: %mac1{{\.}}026 = phi i32 [ [[V8:%[0-9]+]], %for.body ], [ 0, %for.body.preheader ]

@ -1,4 +1,4 @@
; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s
; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s
;
; Reduction statement is an i64 type: we only support i32 so check that the
; rewrite isn't triggered.

@ -1,4 +1,5 @@
; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s
; REQUIRES: asserts
; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S -stats 2>&1 | FileCheck %s
;
; A more complicated chain: 4 mul operations, so we expect 2 smlad calls.
;
@ -15,9 +16,10 @@
; CHECK: [[V17:%[0-9]+]] = call i32 @llvm.arm.smlad(i32 [[V14]], i32 [[V16]], i32 [[V12]])
;
; And we don't want to see a 3rd smlad:
;
; CHECK-NOT: call i32 @llvm.arm.smlad
;
; CHECK: 2 arm-parallel-dsp - Number of smlad instructions generated
;
define dso_local i32 @test(i32 %arg, i32* nocapture readnone %arg1, i16* nocapture readonly %arg2, i16* nocapture readonly %arg3) {
entry:
%cmp52 = icmp sgt i32 %arg, 0

@ -1,4 +1,4 @@
; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s
; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s
;
; The loop header is not the loop latch.
;

@ -1,4 +1,4 @@
; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s
; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s
;
; Operands of both muls are not symmetrical (see also comments inlined below), check
; that the rewrite isn't triggered.
@ -49,4 +49,3 @@ for.body:
%exitcond = icmp ne i32 %add, %arg
br i1 %exitcond, label %for.body, label %for.cond.cleanup
}

@ -1,4 +1,4 @@
; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s
; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s
;
; The loads are not consecutive: check that the rewrite isn't triggered.
;

@ -1,4 +1,4 @@
; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s
; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s
;
; The loads are not narrow loads: check that the rewrite isn't triggered.
;

@ -1,4 +1,4 @@
; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s
; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s
;
; The loads are volatile loads: check that the rewrite isn't triggered.
;

@ -1,4 +1,4 @@
; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s
; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s
;
; Alias check: check that the rewrite isn't triggered when there's a store
; instruction possibly aliasing any mul load operands; arguments are passed

@ -1,4 +1,4 @@
; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s
; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s
;
; Alias check: check that the rewrite isn't triggered when there's a store
; aliasing one of the mul load operands. Arguments are now annotated with

@ -1,4 +1,4 @@
; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s
; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s
;
; Mul with operands that are not simple load and sext/zext chains: this is not
; yet supported so the rewrite shouldn't trigger (but we do want to support this

@ -1,4 +1,4 @@
; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s
; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s
;
; Muls with operands that are constants: not yet supported, so the rewrite
; should not trigger (but we do want to add this soon).