mirror of
https://github.com/RPCS3/llvm.git
synced 2026-01-31 01:25:19 +01:00
Summary: Sometimes vectorization of insertelement instructions with extractelement operands may produce an extra shuffle operation, if these operands are in the reverse order. Patch tries to improve this situation by the reordering of the operands to remove this extra shuffle operation. Reviewers: mkuper, hfinkel, RKSimon, spatel Subscribers: mzolotukhin, llvm-commits Differential Revision: https://reviews.llvm.org/D33954 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@322579 91177308-0d34-0410-b5e6-96231b3b80d8
135 lines
7.0 KiB
LLVM
135 lines
7.0 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt < %s -slp-vectorizer -instcombine -S -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 | FileCheck %s
|
|
|
|
define float @dotf(<4 x float> %x, <4 x float> %y) {
|
|
; CHECK-LABEL: @dotf(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = fmul fast <4 x float> [[X:%.*]], [[Y:%.*]]
|
|
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
|
|
; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP0]], [[RDX_SHUF]]
|
|
; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
|
; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
|
|
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
|
|
; CHECK-NEXT: ret float [[TMP1]]
|
|
;
|
|
entry:
|
|
%vecext = extractelement <4 x float> %x, i32 0
|
|
%vecext1 = extractelement <4 x float> %y, i32 0
|
|
%mul = fmul fast float %vecext, %vecext1
|
|
%vecext.1 = extractelement <4 x float> %x, i32 1
|
|
%vecext1.1 = extractelement <4 x float> %y, i32 1
|
|
%mul.1 = fmul fast float %vecext.1, %vecext1.1
|
|
%add.1 = fadd fast float %mul.1, %mul
|
|
%vecext.2 = extractelement <4 x float> %x, i32 2
|
|
%vecext1.2 = extractelement <4 x float> %y, i32 2
|
|
%mul.2 = fmul fast float %vecext.2, %vecext1.2
|
|
%add.2 = fadd fast float %mul.2, %add.1
|
|
%vecext.3 = extractelement <4 x float> %x, i32 3
|
|
%vecext1.3 = extractelement <4 x float> %y, i32 3
|
|
%mul.3 = fmul fast float %vecext.3, %vecext1.3
|
|
%add.3 = fadd fast float %mul.3, %add.2
|
|
ret float %add.3
|
|
}
|
|
|
|
define double @dotd(<4 x double>* byval nocapture readonly align 32, <4 x double>* byval nocapture readonly align 32) {
|
|
; CHECK-LABEL: @dotd(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[X:%.*]] = load <4 x double>, <4 x double>* [[TMP0:%.*]], align 32
|
|
; CHECK-NEXT: [[Y:%.*]] = load <4 x double>, <4 x double>* [[TMP1:%.*]], align 32
|
|
; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <4 x double> [[X]], [[Y]]
|
|
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
|
|
; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x double> [[TMP2]], [[RDX_SHUF]]
|
|
; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x double> [[BIN_RDX]], <4 x double> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
|
; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x double> [[BIN_RDX]], [[RDX_SHUF1]]
|
|
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x double> [[BIN_RDX2]], i32 0
|
|
; CHECK-NEXT: ret double [[TMP3]]
|
|
;
|
|
entry:
|
|
%x = load <4 x double>, <4 x double>* %0, align 32
|
|
%y = load <4 x double>, <4 x double>* %1, align 32
|
|
%vecext = extractelement <4 x double> %x, i32 0
|
|
%vecext1 = extractelement <4 x double> %y, i32 0
|
|
%mul = fmul fast double %vecext, %vecext1
|
|
%vecext.1 = extractelement <4 x double> %x, i32 1
|
|
%vecext1.1 = extractelement <4 x double> %y, i32 1
|
|
%mul.1 = fmul fast double %vecext.1, %vecext1.1
|
|
%add.1 = fadd fast double %mul.1, %mul
|
|
%vecext.2 = extractelement <4 x double> %x, i32 2
|
|
%vecext1.2 = extractelement <4 x double> %y, i32 2
|
|
%mul.2 = fmul fast double %vecext.2, %vecext1.2
|
|
%add.2 = fadd fast double %mul.2, %add.1
|
|
%vecext.3 = extractelement <4 x double> %x, i32 3
|
|
%vecext1.3 = extractelement <4 x double> %y, i32 3
|
|
%mul.3 = fmul fast double %vecext.3, %vecext1.3
|
|
%add.3 = fadd fast double %mul.3, %add.2
|
|
ret double %add.3
|
|
}
|
|
|
|
define float @dotfq(<4 x float>* nocapture readonly %x, <4 x float>* nocapture readonly %y) {
|
|
; CHECK-LABEL: @dotfq(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[X:%.*]], align 16
|
|
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[Y:%.*]], align 16
|
|
; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP0]]
|
|
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
|
|
; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP2]], [[RDX_SHUF]]
|
|
; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
|
; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
|
|
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
|
|
; CHECK-NEXT: ret float [[TMP3]]
|
|
;
|
|
entry:
|
|
%0 = load <4 x float>, <4 x float>* %x, align 16
|
|
%1 = load <4 x float>, <4 x float>* %y, align 16
|
|
%vecext = extractelement <4 x float> %0, i32 0
|
|
%vecext1 = extractelement <4 x float> %1, i32 0
|
|
%mul = fmul fast float %vecext1, %vecext
|
|
%vecext.1 = extractelement <4 x float> %0, i32 1
|
|
%vecext1.1 = extractelement <4 x float> %1, i32 1
|
|
%mul.1 = fmul fast float %vecext1.1, %vecext.1
|
|
%add.1 = fadd fast float %mul.1, %mul
|
|
%vecext.2 = extractelement <4 x float> %0, i32 2
|
|
%vecext1.2 = extractelement <4 x float> %1, i32 2
|
|
%mul.2 = fmul fast float %vecext1.2, %vecext.2
|
|
%add.2 = fadd fast float %mul.2, %add.1
|
|
%vecext.3 = extractelement <4 x float> %0, i32 3
|
|
%vecext1.3 = extractelement <4 x float> %1, i32 3
|
|
%mul.3 = fmul fast float %vecext1.3, %vecext.3
|
|
%add.3 = fadd fast float %mul.3, %add.2
|
|
ret float %add.3
|
|
}
|
|
|
|
define double @dotdq(<4 x double>* nocapture readonly %x, <4 x double>* nocapture readonly %y) {
|
|
; CHECK-LABEL: @dotdq(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x double>, <4 x double>* [[X:%.*]], align 32
|
|
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x double>, <4 x double>* [[Y:%.*]], align 32
|
|
; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <4 x double> [[TMP1]], [[TMP0]]
|
|
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
|
|
; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x double> [[TMP2]], [[RDX_SHUF]]
|
|
; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x double> [[BIN_RDX]], <4 x double> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
|
; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x double> [[BIN_RDX]], [[RDX_SHUF1]]
|
|
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x double> [[BIN_RDX2]], i32 0
|
|
; CHECK-NEXT: ret double [[TMP3]]
|
|
;
|
|
entry:
|
|
%0 = load <4 x double>, <4 x double>* %x, align 32
|
|
%1 = load <4 x double>, <4 x double>* %y, align 32
|
|
%vecext = extractelement <4 x double> %0, i32 0
|
|
%vecext1 = extractelement <4 x double> %1, i32 0
|
|
%mul = fmul fast double %vecext1, %vecext
|
|
%vecext.1 = extractelement <4 x double> %0, i32 1
|
|
%vecext1.1 = extractelement <4 x double> %1, i32 1
|
|
%mul.1 = fmul fast double %vecext1.1, %vecext.1
|
|
%add.1 = fadd fast double %mul.1, %mul
|
|
%vecext.2 = extractelement <4 x double> %0, i32 2
|
|
%vecext1.2 = extractelement <4 x double> %1, i32 2
|
|
%mul.2 = fmul fast double %vecext1.2, %vecext.2
|
|
%add.2 = fadd fast double %mul.2, %add.1
|
|
%vecext.3 = extractelement <4 x double> %0, i32 3
|
|
%vecext1.3 = extractelement <4 x double> %1, i32 3
|
|
%mul.3 = fmul fast double %vecext1.3, %vecext.3
|
|
%add.3 = fadd fast double %mul.3, %add.2
|
|
ret double %add.3
|
|
}
|