mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-04-04 16:54:12 +00:00
[ARM] ParallelDSP: added statistics, NFC.
Added statistics for the number of SMLAD instructions created, and als renamed the pass name to -arm-parallel-dsp. Differential Revision: https://reviews.llvm.org/D48971 llvm-svn: 336441
This commit is contained in:
parent
56f28af5cd
commit
5aeee587fd
@ -14,6 +14,7 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/ADT/SmallPtrSet.h"
|
||||
#include "llvm/Analysis/AliasAnalysis.h"
|
||||
#include "llvm/Analysis/LoopAccessAnalysis.h"
|
||||
@ -36,7 +37,9 @@
|
||||
using namespace llvm;
|
||||
using namespace PatternMatch;
|
||||
|
||||
#define DEBUG_TYPE "parallel-dsp"
|
||||
#define DEBUG_TYPE "arm-parallel-dsp"
|
||||
|
||||
STATISTIC(NumSMLAD , "Number of smlad instructions generated");
|
||||
|
||||
namespace {
|
||||
struct ParallelMAC;
|
||||
@ -604,6 +607,7 @@ Instruction *ARMParallelDSP::CreateSMLADCall(LoadInst *VecLd0, LoadInst *VecLd1,
|
||||
Value* Args[] = { VecLd0, VecLd1, Acc };
|
||||
Function *SMLAD = Intrinsic::getDeclaration(M, Intrinsic::arm_smlad);
|
||||
CallInst *Call = Builder.CreateCall(SMLAD, Args);
|
||||
NumSMLAD++;
|
||||
return Call;
|
||||
}
|
||||
|
||||
@ -613,7 +617,7 @@ Pass *llvm::createARMParallelDSPPass() {
|
||||
|
||||
char ARMParallelDSP::ID = 0;
|
||||
|
||||
INITIALIZE_PASS_BEGIN(ARMParallelDSP, "parallel-dsp",
|
||||
INITIALIZE_PASS_BEGIN(ARMParallelDSP, "arm-parallel-dsp",
|
||||
"Transform loops to use DSP intrinsics", false, false)
|
||||
INITIALIZE_PASS_END(ARMParallelDSP, "parallel-dsp",
|
||||
INITIALIZE_PASS_END(ARMParallelDSP, "arm-parallel-dsp",
|
||||
"Transform loops to use DSP intrinsics", false, false)
|
||||
|
@ -1,10 +1,10 @@
|
||||
; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s
|
||||
; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s
|
||||
;
|
||||
; The Cortex-M0 does not support unaligned accesses:
|
||||
; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m0 < %s -parallel-dsp -S | FileCheck %s --check-prefix=CHECK-UNSUPPORTED
|
||||
; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m0 < %s -arm-parallel-dsp -S | FileCheck %s --check-prefix=CHECK-UNSUPPORTED
|
||||
;
|
||||
; Check DSP extension:
|
||||
; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 -mattr=-dsp < %s -parallel-dsp -S | FileCheck %s --check-prefix=CHECK-UNSUPPORTED
|
||||
; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 -mattr=-dsp < %s -arm-parallel-dsp -S | FileCheck %s --check-prefix=CHECK-UNSUPPORTED
|
||||
;
|
||||
; CHECK: %mac1{{\.}}026 = phi i32 [ [[V8:%[0-9]+]], %for.body ], [ 0, %for.body.preheader ]
|
||||
; CHECK: [[V4:%[0-9]+]] = bitcast i16* %arrayidx3 to i32*
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s
|
||||
; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s
|
||||
|
||||
; CHECK-LABEL: @test1
|
||||
; CHECK: %mac1{{\.}}026 = phi i32 [ [[V8:%[0-9]+]], %for.body ], [ 0, %for.body.preheader ]
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s
|
||||
; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s
|
||||
;
|
||||
; Reduction statement is an i64 type: we only support i32 so check that the
|
||||
; rewrite isn't triggered.
|
||||
|
@ -1,4 +1,5 @@
|
||||
; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s
|
||||
; REQUIRES: asserts
|
||||
; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S -stats 2>&1 | FileCheck %s
|
||||
;
|
||||
; A more complicated chain: 4 mul operations, so we expect 2 smlad calls.
|
||||
;
|
||||
@ -15,9 +16,10 @@
|
||||
; CHECK: [[V17:%[0-9]+]] = call i32 @llvm.arm.smlad(i32 [[V14]], i32 [[V16]], i32 [[V12]])
|
||||
;
|
||||
; And we don't want to see a 3rd smlad:
|
||||
;
|
||||
; CHECK-NOT: call i32 @llvm.arm.smlad
|
||||
;
|
||||
; CHECK: 2 arm-parallel-dsp - Number of smlad instructions generated
|
||||
;
|
||||
define dso_local i32 @test(i32 %arg, i32* nocapture readnone %arg1, i16* nocapture readonly %arg2, i16* nocapture readonly %arg3) {
|
||||
entry:
|
||||
%cmp52 = icmp sgt i32 %arg, 0
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s
|
||||
; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s
|
||||
;
|
||||
; The loop header is not the loop latch.
|
||||
;
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s
|
||||
; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s
|
||||
;
|
||||
; Operands of both muls are not symmetrical (see also comments inlined below), check
|
||||
; that the rewrite isn't triggered.
|
||||
@ -49,4 +49,3 @@ for.body:
|
||||
%exitcond = icmp ne i32 %add, %arg
|
||||
br i1 %exitcond, label %for.body, label %for.cond.cleanup
|
||||
}
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s
|
||||
; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s
|
||||
;
|
||||
; The loads are not consecutive: check that the rewrite isn't triggered.
|
||||
;
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s
|
||||
; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s
|
||||
;
|
||||
; The loads are not narrow loads: check that the rewrite isn't triggered.
|
||||
;
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s
|
||||
; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s
|
||||
;
|
||||
; The loads are volatile loads: check that the rewrite isn't triggered.
|
||||
;
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s
|
||||
; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s
|
||||
;
|
||||
; Alias check: check that the rewrite isn't triggered when there's a store
|
||||
; instruction possibly aliasing any mul load operands; arguments are passed
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s
|
||||
; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s
|
||||
;
|
||||
; Alias check: check that the rewrite isn't triggered when there's a store
|
||||
; aliasing one of the mul load operands. Arguments are now annotated with
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s
|
||||
; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s
|
||||
;
|
||||
; Mul with operands that are not simple load and sext/zext chains: this is not
|
||||
; yet supported so the rewrite shouldn't trigger (but we do want to support this
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s
|
||||
; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s
|
||||
;
|
||||
; Muls with operands that are constants: not yet supported, so the rewrite
|
||||
; should not trigger (but we do want to add this soon).
|
||||
|
Loading…
x
Reference in New Issue
Block a user