mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-07 03:40:35 +00:00
e16dab59f1
This patch unifies the data structures we use for mapping instructions from the original loop to their corresponding instructions in the new loop. Previously, we maintained two distinct maps for this purpose: WidenMap and ScalarIVMap. WidenMap maintained the vector values each instruction from the old loop was represented with, and ScalarIVMap maintained the scalar values each scalarized induction variable was represented with. With this patch, all values created for the new loop are maintained in VectorLoopValueMap. The change allows for several simplifications. Previously, when an instruction was scalarized, we had to insert the scalar values into vectors in order to maintain the mapping in WidenMap. Then, if a user of the scalarized value was also scalar, we had to extract the scalar values from the temporary vector we created. We now aovid these unnecessary scalar-to-vector-to-scalar conversions. If a scalarized value is used by a scalar instruction, the scalar value is used directly. However, if the scalarized value is needed by a vector instruction, we generate the needed insertelement instructions on-demand. A common idiom in several locations in the code (including the scalarization code), is to first get the vector values an instruction from the original loop maps to, and then extract a particular scalar value. This patch adds getScalarValue for this purpose along side getVectorValue as an interface into VectorLoopValueMap. These functions work together to return the requested values if they're available or to produce them if they're not. The mapping has also be made less permissive. Entries can be added to VectorLoopValue map with the new initVector and initScalar functions. getVectorValue has been modified to return a constant reference to the mapped entries. There's no real functional change with this patch; however, in some cases we will generate slightly different code. For example, instead of an insertelement sequence following the definition of an instruction, it will now precede the first use of that instruction. This can be seen in the test case changes. Differential Revision: https://reviews.llvm.org/D23169 llvm-svn: 279649
156 lines
7.2 KiB
LLVM
156 lines
7.2 KiB
LLVM
; RUN: opt -S -force-vector-width=2 -force-vector-interleave=1 -loop-vectorize -verify-loop-info -simplifycfg < %s | FileCheck %s
|
|
|
|
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
|
target triple = "x86_64-unknown-linux-gnu"
|
|
|
|
; Test predication of non-void instructions, specifically (i) that these
|
|
; instructions permit vectorization and (ii) the creation of an insertelement
|
|
; and a Phi node. We check the full 2-element sequence for the first
|
|
; instruction; For the rest we'll just make sure they get predicated based
|
|
; on the code generated for the first element.
|
|
define void @test(i32* nocapture %asd, i32* nocapture %aud,
|
|
i32* nocapture %asr, i32* nocapture %aur) {
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.cond.cleanup: ; preds = %if.end
|
|
ret void
|
|
|
|
; CHECK-LABEL: test
|
|
; CHECK: vector.body:
|
|
; CHECK: %[[SDEE:[a-zA-Z0-9]+]] = extractelement <2 x i1> %{{.*}}, i32 0
|
|
; CHECK: %[[SDCC:[a-zA-Z0-9]+]] = icmp eq i1 %[[SDEE]], true
|
|
; CHECK: br i1 %[[SDCC]], label %[[CSD:[a-zA-Z0-9.]+]], label %[[ESD:[a-zA-Z0-9.]+]]
|
|
; CHECK: [[CSD]]:
|
|
; CHECK: %[[SDA0:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 0
|
|
; CHECK: %[[SDA1:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 0
|
|
; CHECK: %[[SD0:[a-zA-Z0-9]+]] = sdiv i32 %[[SDA0]], %[[SDA1]]
|
|
; CHECK: %[[SD1:[a-zA-Z0-9]+]] = insertelement <2 x i32> undef, i32 %[[SD0]], i32 0
|
|
; CHECK: br label %[[ESD]]
|
|
; CHECK: [[ESD]]:
|
|
; CHECK: %[[SDR:[a-zA-Z0-9]+]] = phi <2 x i32> [ undef, %vector.body ], [ %[[SD1]], %[[CSD]] ]
|
|
; CHECK: %[[SDEEH:[a-zA-Z0-9]+]] = extractelement <2 x i1> %{{.*}}, i32 1
|
|
; CHECK: %[[SDCCH:[a-zA-Z0-9]+]] = icmp eq i1 %[[SDEEH]], true
|
|
; CHECK: br i1 %[[SDCCH]], label %[[CSDH:[a-zA-Z0-9.]+]], label %[[ESDH:[a-zA-Z0-9.]+]]
|
|
; CHECK: [[CSDH]]:
|
|
; CHECK: %[[SDA0H:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 1
|
|
; CHECK: %[[SDA1H:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 1
|
|
; CHECK: %[[SD0H:[a-zA-Z0-9]+]] = sdiv i32 %[[SDA0H]], %[[SDA1H]]
|
|
; CHECK: %[[SD1H:[a-zA-Z0-9]+]] = insertelement <2 x i32> %[[SDR]], i32 %[[SD0H]], i32 1
|
|
; CHECK: br label %[[ESDH]]
|
|
; CHECK: [[ESDH]]:
|
|
; CHECK: %{{.*}} = phi <2 x i32> [ %[[SDR]], %[[ESD]] ], [ %[[SD1H]], %[[CSDH]] ]
|
|
|
|
; CHECK: %[[UDEE:[a-zA-Z0-9]+]] = extractelement <2 x i1> %{{.*}}, i32 0
|
|
; CHECK: %[[UDCC:[a-zA-Z0-9]+]] = icmp eq i1 %[[UDEE]], true
|
|
; CHECK: br i1 %[[UDCC]], label %[[CUD:[a-zA-Z0-9.]+]], label %[[EUD:[a-zA-Z0-9.]+]]
|
|
; CHECK: [[CUD]]:
|
|
; CHECK: %[[UDA0:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 0
|
|
; CHECK: %[[UDA1:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 0
|
|
; CHECK: %[[UD0:[a-zA-Z0-9]+]] = udiv i32 %[[UDA0]], %[[UDA1]]
|
|
; CHECK: %[[UD1:[a-zA-Z0-9]+]] = insertelement <2 x i32> undef, i32 %[[UD0]], i32 0
|
|
; CHECK: br label %[[EUD]]
|
|
; CHECK: [[EUD]]:
|
|
; CHECK: %{{.*}} = phi <2 x i32> [ undef, %{{.*}} ], [ %[[UD1]], %[[CUD]] ]
|
|
|
|
; CHECK: %[[SREE:[a-zA-Z0-9]+]] = extractelement <2 x i1> %{{.*}}, i32 0
|
|
; CHECK: %[[SRCC:[a-zA-Z0-9]+]] = icmp eq i1 %[[SREE]], true
|
|
; CHECK: br i1 %[[SRCC]], label %[[CSR:[a-zA-Z0-9.]+]], label %[[ESR:[a-zA-Z0-9.]+]]
|
|
; CHECK: [[CSR]]:
|
|
; CHECK: %[[SRA0:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 0
|
|
; CHECK: %[[SRA1:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 0
|
|
; CHECK: %[[SR0:[a-zA-Z0-9]+]] = srem i32 %[[SRA0]], %[[SRA1]]
|
|
; CHECK: %[[SR1:[a-zA-Z0-9]+]] = insertelement <2 x i32> undef, i32 %[[SR0]], i32 0
|
|
; CHECK: br label %[[ESR]]
|
|
; CHECK: [[ESR]]:
|
|
; CHECK: %{{.*}} = phi <2 x i32> [ undef, %{{.*}} ], [ %[[SR1]], %[[CSR]] ]
|
|
|
|
; CHECK: %[[UREE:[a-zA-Z0-9]+]] = extractelement <2 x i1> %{{.*}}, i32 0
|
|
; CHECK: %[[URCC:[a-zA-Z0-9]+]] = icmp eq i1 %[[UREE]], true
|
|
; CHECK: br i1 %[[URCC]], label %[[CUR:[a-zA-Z0-9.]+]], label %[[EUR:[a-zA-Z0-9.]+]]
|
|
; CHECK: [[CUR]]:
|
|
; CHECK: %[[URA0:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 0
|
|
; CHECK: %[[URA1:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 0
|
|
; CHECK: %[[UR0:[a-zA-Z0-9]+]] = urem i32 %[[URA0]], %[[URA1]]
|
|
; CHECK: %[[UR1:[a-zA-Z0-9]+]] = insertelement <2 x i32> undef, i32 %[[UR0]], i32 0
|
|
; CHECK: br label %[[EUR]]
|
|
; CHECK: [[EUR]]:
|
|
; CHECK: %{{.*}} = phi <2 x i32> [ undef, %{{.*}} ], [ %[[UR1]], %[[CUR]] ]
|
|
|
|
for.body: ; preds = %if.end, %entry
|
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %if.end ]
|
|
%isd = getelementptr inbounds i32, i32* %asd, i64 %indvars.iv
|
|
%iud = getelementptr inbounds i32, i32* %aud, i64 %indvars.iv
|
|
%isr = getelementptr inbounds i32, i32* %asr, i64 %indvars.iv
|
|
%iur = getelementptr inbounds i32, i32* %aur, i64 %indvars.iv
|
|
%lsd = load i32, i32* %isd, align 4
|
|
%lud = load i32, i32* %iud, align 4
|
|
%lsr = load i32, i32* %isr, align 4
|
|
%lur = load i32, i32* %iur, align 4
|
|
%psd = add nsw i32 %lsd, 23
|
|
%pud = add nsw i32 %lud, 24
|
|
%psr = add nsw i32 %lsr, 25
|
|
%pur = add nsw i32 %lur, 26
|
|
%cmp1 = icmp slt i32 %lsd, 100
|
|
br i1 %cmp1, label %if.then, label %if.end
|
|
|
|
if.then: ; preds = %for.body
|
|
%rsd = sdiv i32 %psd, %lsd
|
|
%rud = udiv i32 %pud, %lud
|
|
%rsr = srem i32 %psr, %lsr
|
|
%rur = urem i32 %pur, %lur
|
|
br label %if.end
|
|
|
|
if.end: ; preds = %if.then, %for.body
|
|
%ysd.0 = phi i32 [ %rsd, %if.then ], [ %psd, %for.body ]
|
|
%yud.0 = phi i32 [ %rud, %if.then ], [ %pud, %for.body ]
|
|
%ysr.0 = phi i32 [ %rsr, %if.then ], [ %psr, %for.body ]
|
|
%yur.0 = phi i32 [ %rur, %if.then ], [ %pur, %for.body ]
|
|
store i32 %ysd.0, i32* %isd, align 4
|
|
store i32 %yud.0, i32* %iud, align 4
|
|
store i32 %ysr.0, i32* %isr, align 4
|
|
store i32 %yur.0, i32* %iur, align 4
|
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
%exitcond = icmp eq i64 %indvars.iv.next, 128
|
|
br i1 %exitcond, label %for.cond.cleanup, label %for.body
|
|
}
|
|
|
|
define void @test_scalar2scalar(i32* nocapture %asd, i32* nocapture %bsd) {
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.cond.cleanup: ; preds = %if.end
|
|
ret void
|
|
|
|
; CHECK-LABEL: test_scalar2scalar
|
|
; CHECK: vector.body:
|
|
; CHECK: br i1 %{{.*}}, label %[[THEN:[a-zA-Z0-9.]+]], label %[[FI:[a-zA-Z0-9.]+]]
|
|
; CHECK: [[THEN]]:
|
|
; CHECK: %[[PD:[a-zA-Z0-9]+]] = sdiv i32 %{{.*}}, %{{.*}}
|
|
; CHECK: br label %[[FI]]
|
|
; CHECK: [[FI]]:
|
|
; CHECK: %{{.*}} = phi i32 [ undef, %vector.body ], [ %[[PD]], %[[THEN]] ]
|
|
|
|
for.body: ; preds = %if.end, %entry
|
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %if.end ]
|
|
%isd = getelementptr inbounds i32, i32* %asd, i64 %indvars.iv
|
|
%lsd = load i32, i32* %isd, align 4
|
|
%isd.b = getelementptr inbounds i32, i32* %bsd, i64 %indvars.iv
|
|
%lsd.b = load i32, i32* %isd.b, align 4
|
|
%psd = add nsw i32 %lsd, 23
|
|
%cmp1 = icmp slt i32 %lsd, 100
|
|
br i1 %cmp1, label %if.then, label %if.end
|
|
|
|
if.then: ; preds = %for.body
|
|
%sd1 = sdiv i32 %psd, %lsd
|
|
%rsd = sdiv i32 %lsd.b, %sd1
|
|
br label %if.end
|
|
|
|
if.end: ; preds = %if.then, %for.body
|
|
%ysd.0 = phi i32 [ %rsd, %if.then ], [ %psd, %for.body ]
|
|
store i32 %ysd.0, i32* %isd, align 4
|
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
%exitcond = icmp eq i64 %indvars.iv.next, 128
|
|
br i1 %exitcond, label %for.cond.cleanup, label %for.body
|
|
}
|