mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-12-05 10:17:36 +00:00
[DAGCombiner] reduce insert+bitcast+extract vector ops to truncate (PR39016)
This is a late backend subset of the IR transform added with: D52439 We can confirm that the conversion to a 'trunc' is correct by running: $ opt -instcombine -data-layout="e" (assuming the IR transforms are correct; change "e" to "E" for big-endian) As discussed in PR39016: https://bugs.llvm.org/show_bug.cgi?id=39016 ...the pattern may emerge during legalization, so that's we are waiting for an insertelement to become a scalar_to_vector in the pattern matching here. The DAG allows for fun variations that are not possible in IR. Result types for extracts and scalar_to_vector don't necessarily match input types, so that means we have to be a bit more careful in the transform (see code comments). The tests show that we don't handle cases that require a shift (as we did in the IR version). I've left that as a potential follow-up because I'm not sure if that's a real concern at this late stage. Differential Revision: https://reviews.llvm.org/D53201 llvm-svn: 344872
This commit is contained in:
parent
8249a7a474
commit
be7f6a09b0
@ -15503,16 +15503,41 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
|
||||
// converts.
|
||||
}
|
||||
|
||||
if (ConstEltNo && InVec.getOpcode() == ISD::BITCAST) {
|
||||
// TODO: These transforms should not require the 'hasOneUse' restriction, but
|
||||
// there are regressions on multiple targets without it. We can end up with a
|
||||
// mess of scalar and vector code if we reduce only part of the DAG to scalar.
|
||||
if (ConstEltNo && InVec.getOpcode() == ISD::BITCAST && VT.isInteger() &&
|
||||
InVec.hasOneUse()) {
|
||||
// The vector index of the LSBs of the source depend on the endian-ness.
|
||||
bool IsLE = DAG.getDataLayout().isLittleEndian();
|
||||
|
||||
unsigned ExtractIndex = ConstEltNo->getZExtValue();
|
||||
// extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
|
||||
unsigned BCTruncElt = IsLE ? 0 : VT.getVectorNumElements() - 1;
|
||||
SDValue BCSrc = InVec.getOperand(0);
|
||||
if (InVec.hasOneUse() && ConstEltNo->getZExtValue() == BCTruncElt &&
|
||||
VT.isInteger() && BCSrc.getValueType().isScalarInteger())
|
||||
if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
|
||||
return DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, BCSrc);
|
||||
|
||||
if (LegalTypes && BCSrc.getValueType().isInteger() &&
|
||||
BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) {
|
||||
// ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
|
||||
// trunc i64 X to i32
|
||||
SDValue X = BCSrc.getOperand(0);
|
||||
assert(X.getValueType().isScalarInteger() && NVT.isScalarInteger() &&
|
||||
"Extract element and scalar to vector can't change element type "
|
||||
"from FP to integer.");
|
||||
unsigned XBitWidth = X.getValueSizeInBits();
|
||||
unsigned VecEltBitWidth = VT.getScalarSizeInBits();
|
||||
BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1;
|
||||
|
||||
// An extract element return value type can be wider than its vector
|
||||
// operand element type. In that case, the high bits are undefined, so
|
||||
// it's possible that we may need to extend rather than truncate.
|
||||
if (ExtractIndex == BCTruncElt && XBitWidth > VecEltBitWidth) {
|
||||
assert(XBitWidth % VecEltBitWidth == 0 &&
|
||||
"Scalar bitwidth must be a multiple of vector element bitwidth");
|
||||
return DAG.getAnyExtOrTrunc(X, SDLoc(N), NVT);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
|
||||
|
@ -12,8 +12,7 @@ define i32 @trunc_i64_to_i32_le(i64 %x) {
|
||||
;
|
||||
; LE-LABEL: trunc_i64_to_i32_le:
|
||||
; LE: // %bb.0:
|
||||
; LE-NEXT: fmov d0, x0
|
||||
; LE-NEXT: fmov w0, s0
|
||||
; LE-NEXT: // kill: def $w0 killed $w0 killed $x0
|
||||
; LE-NEXT: ret
|
||||
%ins = insertelement <2 x i64> undef, i64 %x, i32 0
|
||||
%bc = bitcast <2 x i64> %ins to <4 x i32>
|
||||
@ -24,9 +23,7 @@ define i32 @trunc_i64_to_i32_le(i64 %x) {
|
||||
define i32 @trunc_i64_to_i32_be(i64 %x) {
|
||||
; BE-LABEL: trunc_i64_to_i32_be:
|
||||
; BE: // %bb.0:
|
||||
; BE-NEXT: fmov d0, x0
|
||||
; BE-NEXT: rev64 v0.4s, v0.4s
|
||||
; BE-NEXT: mov w0, v0.s[1]
|
||||
; BE-NEXT: // kill: def $w0 killed $w0 killed $x0
|
||||
; BE-NEXT: ret
|
||||
;
|
||||
; LE-LABEL: trunc_i64_to_i32_be:
|
||||
@ -50,8 +47,7 @@ define i16 @trunc_i64_to_i16_le(i64 %x) {
|
||||
;
|
||||
; LE-LABEL: trunc_i64_to_i16_le:
|
||||
; LE: // %bb.0:
|
||||
; LE-NEXT: fmov d0, x0
|
||||
; LE-NEXT: umov w0, v0.h[0]
|
||||
; LE-NEXT: // kill: def $w0 killed $w0 killed $x0
|
||||
; LE-NEXT: ret
|
||||
%ins = insertelement <2 x i64> undef, i64 %x, i32 0
|
||||
%bc = bitcast <2 x i64> %ins to <8 x i16>
|
||||
@ -62,9 +58,7 @@ define i16 @trunc_i64_to_i16_le(i64 %x) {
|
||||
define i16 @trunc_i64_to_i16_be(i64 %x) {
|
||||
; BE-LABEL: trunc_i64_to_i16_be:
|
||||
; BE: // %bb.0:
|
||||
; BE-NEXT: fmov d0, x0
|
||||
; BE-NEXT: rev64 v0.8h, v0.8h
|
||||
; BE-NEXT: umov w0, v0.h[3]
|
||||
; BE-NEXT: // kill: def $w0 killed $w0 killed $x0
|
||||
; BE-NEXT: ret
|
||||
;
|
||||
; LE-LABEL: trunc_i64_to_i16_be:
|
||||
@ -88,8 +82,6 @@ define i8 @trunc_i32_to_i8_le(i32 %x) {
|
||||
;
|
||||
; LE-LABEL: trunc_i32_to_i8_le:
|
||||
; LE: // %bb.0:
|
||||
; LE-NEXT: fmov s0, w0
|
||||
; LE-NEXT: umov w0, v0.b[0]
|
||||
; LE-NEXT: ret
|
||||
%ins = insertelement <4 x i32> undef, i32 %x, i32 0
|
||||
%bc = bitcast <4 x i32> %ins to <16 x i8>
|
||||
@ -100,9 +92,6 @@ define i8 @trunc_i32_to_i8_le(i32 %x) {
|
||||
define i8 @trunc_i32_to_i8_be(i32 %x) {
|
||||
; BE-LABEL: trunc_i32_to_i8_be:
|
||||
; BE: // %bb.0:
|
||||
; BE-NEXT: fmov s0, w0
|
||||
; BE-NEXT: rev32 v0.16b, v0.16b
|
||||
; BE-NEXT: umov w0, v0.b[3]
|
||||
; BE-NEXT: ret
|
||||
;
|
||||
; LE-LABEL: trunc_i32_to_i8_be:
|
||||
@ -116,3 +105,22 @@ define i8 @trunc_i32_to_i8_be(i32 %x) {
|
||||
ret i8 %ext
|
||||
}
|
||||
|
||||
; Weird type (non-power-of-2 vector) is ok.
|
||||
|
||||
define i8 @trunc_i64_to_i8_be(i64 %x) {
|
||||
; BE-LABEL: trunc_i64_to_i8_be:
|
||||
; BE: // %bb.0:
|
||||
; BE-NEXT: // kill: def $w0 killed $w0 killed $x0
|
||||
; BE-NEXT: ret
|
||||
;
|
||||
; LE-LABEL: trunc_i64_to_i8_be:
|
||||
; LE: // %bb.0:
|
||||
; LE-NEXT: fmov d0, x0
|
||||
; LE-NEXT: umov w0, v0.b[7]
|
||||
; LE-NEXT: ret
|
||||
%ins = insertelement <3 x i64> undef, i64 %x, i32 0
|
||||
%bc = bitcast <3 x i64> %ins to <24 x i8>
|
||||
%ext = extractelement <24 x i8> %bc, i32 7
|
||||
ret i8 %ext
|
||||
}
|
||||
|
||||
|
@ -68,8 +68,8 @@ define i32 @trunc_i64_to_i32_le(i64 %x) {
|
||||
;
|
||||
; X64-LABEL: trunc_i64_to_i32_le:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: movq %rdi, %xmm0
|
||||
; X64-NEXT: movd %xmm0, %eax
|
||||
; X64-NEXT: movq %rdi, %rax
|
||||
; X64-NEXT: # kill: def $eax killed $eax killed $rax
|
||||
; X64-NEXT: retq
|
||||
%ins = insertelement <2 x i64> undef, i64 %x, i32 0
|
||||
%bc = bitcast <2 x i64> %ins to <4 x i32>
|
||||
@ -86,9 +86,8 @@ define i16 @trunc_i64_to_i16_le(i64 %x) {
|
||||
;
|
||||
; X64-LABEL: trunc_i64_to_i16_le:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: movq %rdi, %xmm0
|
||||
; X64-NEXT: movd %xmm0, %eax
|
||||
; X64-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; X64-NEXT: movq %rdi, %rax
|
||||
; X64-NEXT: # kill: def $ax killed $ax killed $rax
|
||||
; X64-NEXT: retq
|
||||
%ins = insertelement <2 x i64> undef, i64 %x, i32 0
|
||||
%bc = bitcast <2 x i64> %ins to <8 x i16>
|
||||
|
@ -16,16 +16,17 @@ define i32 @test(%SA* %pSA, i16* %A, i32 %B, i32 %C, i32 %D, i8* %E) {
|
||||
; CHECK-NEXT: # %bb.2: # %if.B
|
||||
; CHECK-NEXT: pshufw $238, %mm0, %mm0 # mm0 = mm0[2,3,2,3]
|
||||
; CHECK-NEXT: movq %mm0, %rax
|
||||
; CHECK-NEXT: jmp .LBB0_3
|
||||
; CHECK-NEXT: testl %eax, %eax
|
||||
; CHECK-NEXT: jne .LBB0_4
|
||||
; CHECK-NEXT: .LBB0_1: # %if.A
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: movd %edx, %mm1
|
||||
; CHECK-NEXT: psllq %mm1, %mm0
|
||||
; CHECK-NEXT: movq %mm0, %rax
|
||||
; CHECK-NEXT: testq %rax, %rax
|
||||
; CHECK-NEXT: jne .LBB0_4
|
||||
; CHECK-NEXT: .LBB0_3: # %if.C
|
||||
; CHECK-NEXT: movq %rax, %xmm0
|
||||
; CHECK-NEXT: movd %xmm0, %eax
|
||||
; CHECK-NEXT: # %bb.3: # %if.C
|
||||
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
|
||||
; CHECK-NEXT: testl %eax, %eax
|
||||
; CHECK-NEXT: je .LBB0_1
|
||||
; CHECK-NEXT: .LBB0_4: # %merge
|
||||
|
Loading…
Reference in New Issue
Block a user