mirror of
https://github.com/RPCSX/llvm.git
synced 2024-12-03 01:12:59 +00:00
[Power9] Exploit vector integer extend instructions
This patch adds build vector patterns to exploit the vector integer extend instructions: vextsb2w - Vector Extend Sign Byte To Word vextsb2d - Vector Extend Sign Byte To Doubleword vextsh2w - Vector Extend Sign Halfword To Word vextsh2d - Vector Extend Sign Halfword To Doubleword vextsw2d - Vector Extend Sign Word To Doubleword Differential Revision: https://reviews.llvm.org/D33510 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@304992 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
06abfee96e
commit
32a3852f3c
@ -2717,6 +2717,40 @@ def DblToFlt {
|
||||
dag B0 = (f32 (fpround (f64 (extractelt v2f64:$B, 0))));
|
||||
dag B1 = (f32 (fpround (f64 (extractelt v2f64:$B, 1))));
|
||||
}
|
||||
|
||||
def ByteToWord {
|
||||
dag A0 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 0)), i8));
|
||||
dag A1 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 4)), i8));
|
||||
dag A2 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 8)), i8));
|
||||
dag A3 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 12)), i8));
|
||||
}
|
||||
|
||||
def ByteToDWord {
|
||||
dag A0 = (i64 (sext_inreg
|
||||
(i64 (anyext (i32 (vector_extract v16i8:$A, 0)))), i8));
|
||||
dag A1 = (i64 (sext_inreg
|
||||
(i64 (anyext (i32 (vector_extract v16i8:$A, 8)))), i8));
|
||||
}
|
||||
|
||||
def HWordToWord {
|
||||
dag A0 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 0)), i16));
|
||||
dag A1 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 2)), i16));
|
||||
dag A2 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 4)), i16));
|
||||
dag A3 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 6)), i16));
|
||||
}
|
||||
|
||||
def HWordToDWord {
|
||||
dag A0 = (i64 (sext_inreg
|
||||
(i64 (anyext (i32 (vector_extract v8i16:$A, 0)))), i16));
|
||||
dag A1 = (i64 (sext_inreg
|
||||
(i64 (anyext (i32 (vector_extract v8i16:$A, 4)))), i16));
|
||||
}
|
||||
|
||||
def WordToDWord {
|
||||
dag A0 = (i64 (sext (i32 (vector_extract v4i32:$A, 0))));
|
||||
dag A1 = (i64 (sext (i32 (vector_extract v4i32:$A, 2))));
|
||||
}
|
||||
|
||||
def FltToIntLoad {
|
||||
dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (extloadf32 xoaddr:$A)))));
|
||||
}
|
||||
@ -2969,4 +3003,21 @@ let AddedComplexity = 400 in {
|
||||
(VMRGOW (COPY_TO_REGCLASS (MTVSRDD AnyExts.D, AnyExts.B), VSRC),
|
||||
(COPY_TO_REGCLASS (MTVSRDD AnyExts.C, AnyExts.A), VSRC))>;
|
||||
}
|
||||
// P9 Altivec instructions that can be used to build vectors.
|
||||
// Adding them to PPCInstrVSX.td rather than PPCAltivecVSX.td to compete
|
||||
// with complexities of existing build vector patterns in this file.
|
||||
let Predicates = [HasP9Altivec] in {
|
||||
def : Pat<(v2i64 (build_vector WordToDWord.A0, WordToDWord.A1)),
|
||||
(v2i64 (VEXTSW2D $A))>;
|
||||
def : Pat<(v2i64 (build_vector HWordToDWord.A0, HWordToDWord.A1)),
|
||||
(v2i64 (VEXTSH2D $A))>;
|
||||
def : Pat<(v4i32 (build_vector HWordToWord.A0, HWordToWord.A1,
|
||||
HWordToWord.A2, HWordToWord.A3)),
|
||||
(v4i32 (VEXTSH2W $A))>;
|
||||
def : Pat<(v4i32 (build_vector ByteToWord.A0, ByteToWord.A1,
|
||||
ByteToWord.A2, ByteToWord.A3)),
|
||||
(v4i32 (VEXTSB2W $A))>;
|
||||
def : Pat<(v2i64 (build_vector ByteToDWord.A0, ByteToDWord.A1)),
|
||||
(v2i64 (VEXTSB2D $A))>;
|
||||
}
|
||||
}
|
||||
|
90
test/CodeGen/PowerPC/vec_int_ext.ll
Normal file
90
test/CodeGen/PowerPC/vec_int_ext.ll
Normal file
@ -0,0 +1,90 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -verify-machineinstrs -mcpu=pwr9 < %s | FileCheck %s -check-prefix=PWR9
|
||||
target triple = "powerpc64le-unknown-linux-gnu"
|
||||
|
||||
define <4 x i32> @vextsb2w(<16 x i8> %a) {
|
||||
; PWR9-LABEL: vextsb2w:
|
||||
; PWR9: # BB#0: # %entry
|
||||
; PWR9-NEXT: vextsb2w 2, 2
|
||||
; PWR9-NEXT: blr
|
||||
entry:
|
||||
%vecext = extractelement <16 x i8> %a, i32 0
|
||||
%conv = sext i8 %vecext to i32
|
||||
%vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0
|
||||
%vecext1 = extractelement <16 x i8> %a, i32 4
|
||||
%conv2 = sext i8 %vecext1 to i32
|
||||
%vecinit3 = insertelement <4 x i32> %vecinit, i32 %conv2, i32 1
|
||||
%vecext4 = extractelement <16 x i8> %a, i32 8
|
||||
%conv5 = sext i8 %vecext4 to i32
|
||||
%vecinit6 = insertelement <4 x i32> %vecinit3, i32 %conv5, i32 2
|
||||
%vecext7 = extractelement <16 x i8> %a, i32 12
|
||||
%conv8 = sext i8 %vecext7 to i32
|
||||
%vecinit9 = insertelement <4 x i32> %vecinit6, i32 %conv8, i32 3
|
||||
ret <4 x i32> %vecinit9
|
||||
}
|
||||
|
||||
define <2 x i64> @vextsb2d(<16 x i8> %a) {
|
||||
; PWR9-LABEL: vextsb2d:
|
||||
; PWR9: # BB#0: # %entry
|
||||
; PWR9-NEXT: vextsb2d 2, 2
|
||||
; PWR9-NEXT: blr
|
||||
entry:
|
||||
%vecext = extractelement <16 x i8> %a, i32 0
|
||||
%conv = sext i8 %vecext to i64
|
||||
%vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
|
||||
%vecext1 = extractelement <16 x i8> %a, i32 8
|
||||
%conv2 = sext i8 %vecext1 to i64
|
||||
%vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1
|
||||
ret <2 x i64> %vecinit3
|
||||
}
|
||||
|
||||
define <4 x i32> @vextsh2w(<8 x i16> %a) {
|
||||
; PWR9-LABEL: vextsh2w:
|
||||
; PWR9: # BB#0: # %entry
|
||||
; PWR9-NEXT: vextsh2w 2, 2
|
||||
; PWR9-NEXT: blr
|
||||
entry:
|
||||
%vecext = extractelement <8 x i16> %a, i32 0
|
||||
%conv = sext i16 %vecext to i32
|
||||
%vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0
|
||||
%vecext1 = extractelement <8 x i16> %a, i32 2
|
||||
%conv2 = sext i16 %vecext1 to i32
|
||||
%vecinit3 = insertelement <4 x i32> %vecinit, i32 %conv2, i32 1
|
||||
%vecext4 = extractelement <8 x i16> %a, i32 4
|
||||
%conv5 = sext i16 %vecext4 to i32
|
||||
%vecinit6 = insertelement <4 x i32> %vecinit3, i32 %conv5, i32 2
|
||||
%vecext7 = extractelement <8 x i16> %a, i32 6
|
||||
%conv8 = sext i16 %vecext7 to i32
|
||||
%vecinit9 = insertelement <4 x i32> %vecinit6, i32 %conv8, i32 3
|
||||
ret <4 x i32> %vecinit9
|
||||
}
|
||||
|
||||
define <2 x i64> @vextsh2d(<8 x i16> %a) {
|
||||
; PWR9-LABEL: vextsh2d:
|
||||
; PWR9: # BB#0: # %entry
|
||||
; PWR9-NEXT: vextsh2d 2, 2
|
||||
; PWR9-NEXT: blr
|
||||
entry:
|
||||
%vecext = extractelement <8 x i16> %a, i32 0
|
||||
%conv = sext i16 %vecext to i64
|
||||
%vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
|
||||
%vecext1 = extractelement <8 x i16> %a, i32 4
|
||||
%conv2 = sext i16 %vecext1 to i64
|
||||
%vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1
|
||||
ret <2 x i64> %vecinit3
|
||||
}
|
||||
|
||||
define <2 x i64> @vextsw2d(<4 x i32> %a) {
|
||||
; PWR9-LABEL: vextsw2d:
|
||||
; PWR9: # BB#0: # %entry
|
||||
; PWR9-NEXT: vextsw2d 2, 2
|
||||
; PWR9-NEXT: blr
|
||||
entry:
|
||||
%vecext = extractelement <4 x i32> %a, i32 0
|
||||
%conv = sext i32 %vecext to i64
|
||||
%vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
|
||||
%vecext1 = extractelement <4 x i32> %a, i32 2
|
||||
%conv2 = sext i32 %vecext1 to i64
|
||||
%vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1
|
||||
ret <2 x i64> %vecinit3
|
||||
}
|
Loading…
Reference in New Issue
Block a user