CodeGen: Power: Add lowering for shifts of v1i128.

When legalizing vector operations on vNi128, they will be split to v1i128
because that is a legal type on ppc64, but then the compiler will crash in
selection dag because it fails to select for these operations. This patch fixes
shift operations. Logical shift right and left shift can be performed in the
vector unit, but algebraic shift right requires being split.

Differential Revision: https://reviews.llvm.org/D32774

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@303307 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Kyle Butt 2017-05-17 21:54:41 +00:00
parent 7e11c73f63
commit 011a826e4f
3 changed files with 111 additions and 4 deletions

View File

@ -689,6 +689,14 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::SRA, MVT::v2i64, Legal); setOperationAction(ISD::SRA, MVT::v2i64, Legal);
setOperationAction(ISD::SRL, MVT::v2i64, Legal); setOperationAction(ISD::SRL, MVT::v2i64, Legal);
// 128 bit shifts can be accomplished via 3 instructions for SHL and
// SRL, but not for SRA because of the instructions available:
// VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
// doing
setOperationAction(ISD::SHL, MVT::v1i128, Expand);
setOperationAction(ISD::SRL, MVT::v1i128, Expand);
setOperationAction(ISD::SRA, MVT::v1i128, Expand);
setOperationAction(ISD::SETCC, MVT::v2i64, Legal); setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
} }
else { else {
@ -742,6 +750,13 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
if (Subtarget.hasP9Vector()) { if (Subtarget.hasP9Vector()) {
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
// 128 bit shifts can be accomplished via 3 instructions for SHL and
// SRL, but not for SRA because of the instructions available:
// VS{RL} and VS{RL}O.
setOperationAction(ISD::SHL, MVT::v1i128, Legal);
setOperationAction(ISD::SRL, MVT::v1i128, Legal);
setOperationAction(ISD::SRA, MVT::v1i128, Expand);
} }
} }

View File

@ -987,12 +987,16 @@ def : Pat<(v8i16 (shl v8i16:$vA, v8i16:$vB)),
(v8i16 (VSLH $vA, $vB))>; (v8i16 (VSLH $vA, $vB))>;
def : Pat<(v4i32 (shl v4i32:$vA, v4i32:$vB)), def : Pat<(v4i32 (shl v4i32:$vA, v4i32:$vB)),
(v4i32 (VSLW $vA, $vB))>; (v4i32 (VSLW $vA, $vB))>;
def : Pat<(v1i128 (shl v1i128:$vA, v1i128:$vB)),
(v1i128 (VSL (VSLO $vA, $vB), (VSPLTB 15, $vB)))>;
def : Pat<(v16i8 (PPCshl v16i8:$vA, v16i8:$vB)), def : Pat<(v16i8 (PPCshl v16i8:$vA, v16i8:$vB)),
(v16i8 (VSLB $vA, $vB))>; (v16i8 (VSLB $vA, $vB))>;
def : Pat<(v8i16 (PPCshl v8i16:$vA, v8i16:$vB)), def : Pat<(v8i16 (PPCshl v8i16:$vA, v8i16:$vB)),
(v8i16 (VSLH $vA, $vB))>; (v8i16 (VSLH $vA, $vB))>;
def : Pat<(v4i32 (PPCshl v4i32:$vA, v4i32:$vB)), def : Pat<(v4i32 (PPCshl v4i32:$vA, v4i32:$vB)),
(v4i32 (VSLW $vA, $vB))>; (v4i32 (VSLW $vA, $vB))>;
def : Pat<(v1i128 (PPCshl v1i128:$vA, v1i128:$vB)),
(v1i128 (VSL (VSLO $vA, $vB), (VSPLTB 15, $vB)))>;
def : Pat<(v16i8 (srl v16i8:$vA, v16i8:$vB)), def : Pat<(v16i8 (srl v16i8:$vA, v16i8:$vB)),
(v16i8 (VSRB $vA, $vB))>; (v16i8 (VSRB $vA, $vB))>;
@ -1000,12 +1004,16 @@ def : Pat<(v8i16 (srl v8i16:$vA, v8i16:$vB)),
(v8i16 (VSRH $vA, $vB))>; (v8i16 (VSRH $vA, $vB))>;
def : Pat<(v4i32 (srl v4i32:$vA, v4i32:$vB)), def : Pat<(v4i32 (srl v4i32:$vA, v4i32:$vB)),
(v4i32 (VSRW $vA, $vB))>; (v4i32 (VSRW $vA, $vB))>;
def : Pat<(v1i128 (srl v1i128:$vA, v1i128:$vB)),
(v1i128 (VSR (VSRO $vA, $vB), (VSPLTB 15, $vB)))>;
def : Pat<(v16i8 (PPCsrl v16i8:$vA, v16i8:$vB)), def : Pat<(v16i8 (PPCsrl v16i8:$vA, v16i8:$vB)),
(v16i8 (VSRB $vA, $vB))>; (v16i8 (VSRB $vA, $vB))>;
def : Pat<(v8i16 (PPCsrl v8i16:$vA, v8i16:$vB)), def : Pat<(v8i16 (PPCsrl v8i16:$vA, v8i16:$vB)),
(v8i16 (VSRH $vA, $vB))>; (v8i16 (VSRH $vA, $vB))>;
def : Pat<(v4i32 (PPCsrl v4i32:$vA, v4i32:$vB)), def : Pat<(v4i32 (PPCsrl v4i32:$vA, v4i32:$vB)),
(v4i32 (VSRW $vA, $vB))>; (v4i32 (VSRW $vA, $vB))>;
def : Pat<(v1i128 (PPCsrl v1i128:$vA, v1i128:$vB)),
(v1i128 (VSR (VSRO $vA, $vB), (VSPLTB 15, $vB)))>;
def : Pat<(v16i8 (sra v16i8:$vA, v16i8:$vB)), def : Pat<(v16i8 (sra v16i8:$vA, v16i8:$vB)),
(v16i8 (VSRAB $vA, $vB))>; (v16i8 (VSRAB $vA, $vB))>;

View File

@ -1,14 +1,98 @@
; RUN: llc -verify-machineinstrs < %s -march=ppc64 | grep sld | count 5 ; RUN: llc -verify-machineinstrs < %s | FileCheck --check-prefix=P8 --check-prefix=CHECK %s
; RUN: llc -mcpu=pwr9 -verify-machineinstrs < %s | FileCheck --check-prefix=P9 --check-prefix=CHECK %s
target datalayout = "e-m:e-i64:64-n32:64"
target triple = "powerpc64le-unknown-linux-gnu"
define i128 @foo_lshr(i128 %x, i128 %y) { ; CHECK-LABEL: lshr:
; CHECK-DAG: subfic [[R0:[0-9]+]], 5, 64
; CHECK-DAG: addi [[R1:[0-9]+]], 5, -64
; CHECK-DAG: srd [[R2:[0-9]+]], 3, 5
; CHECK-DAG: sld [[R3:[0-9]+]], 4, [[R0]]
; CHECK-DAG: srd [[R4:[0-9]+]], 4, [[R1]]
; CHECK-DAG: or [[R5:[0-9]+]], [[R2]], [[R3]]
; CHECK-DAG: or 3, [[R5]], [[R4]]
; CHECK-DAG: srd 4, 4, 5
; CHECK: blr
define i128 @lshr(i128 %x, i128 %y) {
%r = lshr i128 %x, %y %r = lshr i128 %x, %y
ret i128 %r ret i128 %r
} }
define i128 @foo_ashr(i128 %x, i128 %y) { ; CHECK-LABEL: ashr:
; CHECK-DAG: subfic [[R0:[0-9]+]], 5, 64
; CHECK-DAG: addi [[R1:[0-9]+]], 5, -64
; CHECK-DAG: srd [[R2:[0-9]+]], 3, 5
; CHECK-DAG: sld [[R3:[0-9]+]], 4, [[R0]]
; CHECK-DAG: srad [[R4:[0-9]+]], 4, [[R1]]
; CHECK-DAG: or [[R5:[0-9]+]], [[R2]], [[R3]]
; CHECK-DAG: cmpwi [[R1]], 1
; CHECK-DAG: srad 4, 4, 5
; CHECK: isel 3, [[R5]], [[R4]], 0
; CHECK: blr
define i128 @ashr(i128 %x, i128 %y) {
%r = ashr i128 %x, %y %r = ashr i128 %x, %y
ret i128 %r ret i128 %r
} }
define i128 @foo_shl(i128 %x, i128 %y) { ; CHECK-LABEL: shl:
; CHECK-DAG: subfic [[R0:[0-9]+]], 5, 64
; CHECK-DAG: addi [[R1:[0-9]+]], 5, -64
; CHECK-DAG: sld [[R2:[0-9]+]], 4, 5
; CHECK-DAG: srd [[R3:[0-9]+]], 3, [[R0]]
; CHECK-DAG: sld [[R4:[0-9]+]], 3, [[R1]]
; CHECK-DAG: or [[R5:[0-9]+]], [[R2]], [[R3]]
; CHECK-DAG: or 4, [[R5]], [[R4]]
; CHECK-DAG: sld 3, 3, 5
; CHECK: blr
define i128 @shl(i128 %x, i128 %y) {
%r = shl i128 %x, %y %r = shl i128 %x, %y
ret i128 %r ret i128 %r
} }
; CHECK-LABEL: shl_v1i128:
; P8-NOT: {{\b}}vslo
; P8-NOT: {{\b}}vsl
; P9-DAG: vslo
; P9-DAG: vspltb
; P9: vsl
; P9-NOT: {{\b}}sld
; P9-NOT: {{\b}}srd
; CHECK: blr
define i128 @shl_v1i128(i128 %arg, i128 %amt) local_unnamed_addr #0 {
entry:
%0 = insertelement <1 x i128> undef, i128 %arg, i32 0
%1 = insertelement <1 x i128> undef, i128 %amt, i32 0
%2 = shl <1 x i128> %0, %1
%retval = extractelement <1 x i128> %2, i32 0
ret i128 %retval
}
; CHECK-LABEL: lshr_v1i128:
; P8-NOT: {{\b}}vsro
; P8-NOT: {{\b}}vsr
; P9-DAG: vsro
; P9-DAG: vspltb
; P9: vsr
; P9-NOT: {{\b}}srd
; P9-NOT: {{\b}}sld
; CHECK: blr
define i128 @lshr_v1i128(i128 %arg, i128 %amt) local_unnamed_addr #0 {
entry:
%0 = insertelement <1 x i128> undef, i128 %arg, i32 0
%1 = insertelement <1 x i128> undef, i128 %amt, i32 0
%2 = lshr <1 x i128> %0, %1
%retval = extractelement <1 x i128> %2, i32 0
ret i128 %retval
}
; Arithmetic shift right is not available as an operation on the vector registers.
; CHECK-LABEL: ashr_v1i128:
; CHECK-NOT: {{\b}}vsro
; CHECK-NOT: {{\b}}vsr
; CHECK: blr
define i128 @ashr_v1i128(i128 %arg, i128 %amt) local_unnamed_addr #0 {
entry:
%0 = insertelement <1 x i128> undef, i128 %arg, i32 0
%1 = insertelement <1 x i128> undef, i128 %amt, i32 0
%2 = ashr <1 x i128> %0, %1
%retval = extractelement <1 x i128> %2, i32 0
ret i128 %retval
}