Files
archived-llvm/test/Transforms/SLPVectorizer/X86/reverse_extract_elements.ll
Alexey Bataev 559e93a4df [SLP] Fix for PR32164: Improve vectorization of reverse order of extract operations.
Summary: Sometimes vectorization of insertelement instructions with extractelement operands may produce an extra shuffle operation, if these operands are in the reverse order. Patch tries to improve this situation by the reordering of the operands to remove this extra shuffle operation.

Reviewers: mkuper, hfinkel, RKSimon, spatel

Subscribers: mzolotukhin, llvm-commits

Differential Revision: https://reviews.llvm.org/D33954

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@322579 91177308-0d34-0410-b5e6-96231b3b80d8
2018-01-16 18:17:01 +00:00

135 lines
7.0 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -slp-vectorizer -instcombine -S -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 | FileCheck %s
define float @dotf(<4 x float> %x, <4 x float> %y) {
; CHECK-LABEL: @dotf(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = fmul fast <4 x float> [[X:%.*]], [[Y:%.*]]
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP0]], [[RDX_SHUF]]
; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
; CHECK-NEXT: ret float [[TMP1]]
;
entry:
%vecext = extractelement <4 x float> %x, i32 0
%vecext1 = extractelement <4 x float> %y, i32 0
%mul = fmul fast float %vecext, %vecext1
%vecext.1 = extractelement <4 x float> %x, i32 1
%vecext1.1 = extractelement <4 x float> %y, i32 1
%mul.1 = fmul fast float %vecext.1, %vecext1.1
%add.1 = fadd fast float %mul.1, %mul
%vecext.2 = extractelement <4 x float> %x, i32 2
%vecext1.2 = extractelement <4 x float> %y, i32 2
%mul.2 = fmul fast float %vecext.2, %vecext1.2
%add.2 = fadd fast float %mul.2, %add.1
%vecext.3 = extractelement <4 x float> %x, i32 3
%vecext1.3 = extractelement <4 x float> %y, i32 3
%mul.3 = fmul fast float %vecext.3, %vecext1.3
%add.3 = fadd fast float %mul.3, %add.2
ret float %add.3
}
define double @dotd(<4 x double>* byval nocapture readonly align 32, <4 x double>* byval nocapture readonly align 32) {
; CHECK-LABEL: @dotd(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[X:%.*]] = load <4 x double>, <4 x double>* [[TMP0:%.*]], align 32
; CHECK-NEXT: [[Y:%.*]] = load <4 x double>, <4 x double>* [[TMP1:%.*]], align 32
; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <4 x double> [[X]], [[Y]]
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x double> [[TMP2]], [[RDX_SHUF]]
; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x double> [[BIN_RDX]], <4 x double> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x double> [[BIN_RDX]], [[RDX_SHUF1]]
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x double> [[BIN_RDX2]], i32 0
; CHECK-NEXT: ret double [[TMP3]]
;
entry:
%x = load <4 x double>, <4 x double>* %0, align 32
%y = load <4 x double>, <4 x double>* %1, align 32
%vecext = extractelement <4 x double> %x, i32 0
%vecext1 = extractelement <4 x double> %y, i32 0
%mul = fmul fast double %vecext, %vecext1
%vecext.1 = extractelement <4 x double> %x, i32 1
%vecext1.1 = extractelement <4 x double> %y, i32 1
%mul.1 = fmul fast double %vecext.1, %vecext1.1
%add.1 = fadd fast double %mul.1, %mul
%vecext.2 = extractelement <4 x double> %x, i32 2
%vecext1.2 = extractelement <4 x double> %y, i32 2
%mul.2 = fmul fast double %vecext.2, %vecext1.2
%add.2 = fadd fast double %mul.2, %add.1
%vecext.3 = extractelement <4 x double> %x, i32 3
%vecext1.3 = extractelement <4 x double> %y, i32 3
%mul.3 = fmul fast double %vecext.3, %vecext1.3
%add.3 = fadd fast double %mul.3, %add.2
ret double %add.3
}
define float @dotfq(<4 x float>* nocapture readonly %x, <4 x float>* nocapture readonly %y) {
; CHECK-LABEL: @dotfq(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[X:%.*]], align 16
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[Y:%.*]], align 16
; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP0]]
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP2]], [[RDX_SHUF]]
; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
; CHECK-NEXT: ret float [[TMP3]]
;
entry:
%0 = load <4 x float>, <4 x float>* %x, align 16
%1 = load <4 x float>, <4 x float>* %y, align 16
%vecext = extractelement <4 x float> %0, i32 0
%vecext1 = extractelement <4 x float> %1, i32 0
%mul = fmul fast float %vecext1, %vecext
%vecext.1 = extractelement <4 x float> %0, i32 1
%vecext1.1 = extractelement <4 x float> %1, i32 1
%mul.1 = fmul fast float %vecext1.1, %vecext.1
%add.1 = fadd fast float %mul.1, %mul
%vecext.2 = extractelement <4 x float> %0, i32 2
%vecext1.2 = extractelement <4 x float> %1, i32 2
%mul.2 = fmul fast float %vecext1.2, %vecext.2
%add.2 = fadd fast float %mul.2, %add.1
%vecext.3 = extractelement <4 x float> %0, i32 3
%vecext1.3 = extractelement <4 x float> %1, i32 3
%mul.3 = fmul fast float %vecext1.3, %vecext.3
%add.3 = fadd fast float %mul.3, %add.2
ret float %add.3
}
define double @dotdq(<4 x double>* nocapture readonly %x, <4 x double>* nocapture readonly %y) {
; CHECK-LABEL: @dotdq(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x double>, <4 x double>* [[X:%.*]], align 32
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x double>, <4 x double>* [[Y:%.*]], align 32
; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <4 x double> [[TMP1]], [[TMP0]]
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x double> [[TMP2]], [[RDX_SHUF]]
; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x double> [[BIN_RDX]], <4 x double> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x double> [[BIN_RDX]], [[RDX_SHUF1]]
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x double> [[BIN_RDX2]], i32 0
; CHECK-NEXT: ret double [[TMP3]]
;
entry:
%0 = load <4 x double>, <4 x double>* %x, align 32
%1 = load <4 x double>, <4 x double>* %y, align 32
%vecext = extractelement <4 x double> %0, i32 0
%vecext1 = extractelement <4 x double> %1, i32 0
%mul = fmul fast double %vecext1, %vecext
%vecext.1 = extractelement <4 x double> %0, i32 1
%vecext1.1 = extractelement <4 x double> %1, i32 1
%mul.1 = fmul fast double %vecext1.1, %vecext.1
%add.1 = fadd fast double %mul.1, %mul
%vecext.2 = extractelement <4 x double> %0, i32 2
%vecext1.2 = extractelement <4 x double> %1, i32 2
%mul.2 = fmul fast double %vecext1.2, %vecext.2
%add.2 = fadd fast double %mul.2, %add.1
%vecext.3 = extractelement <4 x double> %0, i32 3
%vecext1.3 = extractelement <4 x double> %1, i32 3
%mul.3 = fmul fast double %vecext1.3, %vecext.3
%add.3 = fadd fast double %mul.3, %add.2
ret double %add.3
}