mirror of
https://github.com/RPCS3/llvm.git
synced 2025-02-04 01:26:41 +00:00
[X86] Improve 64-bit shifts on 32-bit targets (PR14593)
As discussed on PR14593, this patch adds support for lowering to SHLD/SHRD from the patterns generated by DAGTypeLegalizer::ExpandShiftWithKnownAmountBit. Differential Revision: https://reviews.llvm.org/D23000 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@277299 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
081d3f18a8
commit
bc139df3fe
@ -28631,18 +28631,23 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
|
||||
unsigned Opc = X86ISD::SHLD;
|
||||
SDValue Op0 = N0.getOperand(0);
|
||||
SDValue Op1 = N1.getOperand(0);
|
||||
if (ShAmt0.getOpcode() == ISD::SUB) {
|
||||
if (ShAmt0.getOpcode() == ISD::SUB ||
|
||||
ShAmt0.getOpcode() == ISD::XOR) {
|
||||
Opc = X86ISD::SHRD;
|
||||
std::swap(Op0, Op1);
|
||||
std::swap(ShAmt0, ShAmt1);
|
||||
}
|
||||
|
||||
// OR( SHL( X, C ), SRL( Y, 32 - C ) ) -> SHLD( X, Y, C )
|
||||
// OR( SRL( X, C ), SHL( Y, 32 - C ) ) -> SHRD( X, Y, C )
|
||||
// OR( SHL( X, C ), SRL( SRL( Y, 1 ), XOR( C, 31 ) ) ) -> SHLD( X, Y, C )
|
||||
// OR( SRL( X, C ), SHL( SHL( Y, 1 ), XOR( C, 31 ) ) ) -> SHRD( X, Y, C )
|
||||
unsigned Bits = VT.getSizeInBits();
|
||||
if (ShAmt1.getOpcode() == ISD::SUB) {
|
||||
SDValue Sum = ShAmt1.getOperand(0);
|
||||
if (ConstantSDNode *SumC = dyn_cast<ConstantSDNode>(Sum)) {
|
||||
SDValue ShAmt1Op1 = ShAmt1.getOperand(1);
|
||||
if (ShAmt1Op1.getNode()->getOpcode() == ISD::TRUNCATE)
|
||||
if (ShAmt1Op1.getOpcode() == ISD::TRUNCATE)
|
||||
ShAmt1Op1 = ShAmt1Op1.getOperand(0);
|
||||
if (SumC->getSExtValue() == Bits && ShAmt1Op1 == ShAmt0)
|
||||
return DAG.getNode(Opc, DL, VT,
|
||||
@ -28652,12 +28657,33 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
|
||||
}
|
||||
} else if (ConstantSDNode *ShAmt1C = dyn_cast<ConstantSDNode>(ShAmt1)) {
|
||||
ConstantSDNode *ShAmt0C = dyn_cast<ConstantSDNode>(ShAmt0);
|
||||
if (ShAmt0C &&
|
||||
ShAmt0C->getSExtValue() + ShAmt1C->getSExtValue() == Bits)
|
||||
if (ShAmt0C && (ShAmt0C->getSExtValue() + ShAmt1C->getSExtValue()) == Bits)
|
||||
return DAG.getNode(Opc, DL, VT,
|
||||
N0.getOperand(0), N1.getOperand(0),
|
||||
DAG.getNode(ISD::TRUNCATE, DL,
|
||||
MVT::i8, ShAmt0));
|
||||
} else if (ShAmt1.getOpcode() == ISD::XOR) {
|
||||
SDValue Mask = ShAmt1.getOperand(1);
|
||||
if (ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(Mask)) {
|
||||
unsigned InnerShift = (X86ISD::SHLD == Opc ? ISD::SRL : ISD::SHL);
|
||||
SDValue ShAmt1Op0 = ShAmt1.getOperand(0);
|
||||
if (ShAmt1Op0.getOpcode() == ISD::TRUNCATE)
|
||||
ShAmt1Op0 = ShAmt1Op0.getOperand(0);
|
||||
if (MaskC->getSExtValue() == (Bits - 1) && ShAmt1Op0 == ShAmt0) {
|
||||
if (Op1.getOpcode() == InnerShift &&
|
||||
isa<ConstantSDNode>(Op1.getOperand(1)) &&
|
||||
Op1.getConstantOperandVal(1) == 1) {
|
||||
return DAG.getNode(Opc, DL, VT, Op0, Op1.getOperand(0),
|
||||
DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ShAmt0));
|
||||
}
|
||||
// Test for ADD( Y, Y ) as an equivalent to SHL( Y, 1 ).
|
||||
if (InnerShift == ISD::SHL && Op1.getOpcode() == ISD::ADD &&
|
||||
Op1.getOperand(0) == Op1.getOperand(1)) {
|
||||
return DAG.getNode(Opc, DL, VT, Op0, Op1.getOperand(0),
|
||||
DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ShAmt0));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
|
@ -24,19 +24,13 @@ define i64 @test2(i64 %xx, i32 %test) nounwind {
|
||||
; CHECK-LABEL: test2:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: pushl %esi
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; CHECK-NEXT: movb {{[0-9]+}}(%esp), %ch
|
||||
; CHECK-NEXT: andb $7, %ch
|
||||
; CHECK-NEXT: movb %ch, %cl
|
||||
; CHECK-NEXT: shll %cl, %esi
|
||||
; CHECK-NEXT: movl %eax, %edx
|
||||
; CHECK-NEXT: shrl %edx
|
||||
; CHECK-NEXT: xorb $31, %cl
|
||||
; CHECK-NEXT: shrl %cl, %edx
|
||||
; CHECK-NEXT: orl %esi, %edx
|
||||
; CHECK-NEXT: movb %ch, %cl
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl
|
||||
; CHECK-NEXT: andb $7, %cl
|
||||
; CHECK-NEXT: movl %esi, %eax
|
||||
; CHECK-NEXT: shll %cl, %eax
|
||||
; CHECK-NEXT: shldl %cl, %esi, %edx
|
||||
; CHECK-NEXT: popl %esi
|
||||
; CHECK-NEXT: retl
|
||||
%and = and i32 %test, 7
|
||||
@ -48,20 +42,12 @@ define i64 @test2(i64 %xx, i32 %test) nounwind {
|
||||
define i64 @test3(i64 %xx, i32 %test) nounwind {
|
||||
; CHECK-LABEL: test3:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: pushl %esi
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; CHECK-NEXT: movb {{[0-9]+}}(%esp), %ch
|
||||
; CHECK-NEXT: andb $7, %ch
|
||||
; CHECK-NEXT: movb %ch, %cl
|
||||
; CHECK-NEXT: shrl %cl, %esi
|
||||
; CHECK-NEXT: leal (%edx,%edx), %eax
|
||||
; CHECK-NEXT: xorb $31, %cl
|
||||
; CHECK-NEXT: shll %cl, %eax
|
||||
; CHECK-NEXT: orl %esi, %eax
|
||||
; CHECK-NEXT: movb %ch, %cl
|
||||
; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl
|
||||
; CHECK-NEXT: andb $7, %cl
|
||||
; CHECK-NEXT: shrdl %cl, %edx, %eax
|
||||
; CHECK-NEXT: shrl %cl, %edx
|
||||
; CHECK-NEXT: popl %esi
|
||||
; CHECK-NEXT: retl
|
||||
%and = and i32 %test, 7
|
||||
%sh_prom = zext i32 %and to i64
|
||||
@ -72,20 +58,12 @@ define i64 @test3(i64 %xx, i32 %test) nounwind {
|
||||
define i64 @test4(i64 %xx, i32 %test) nounwind {
|
||||
; CHECK-LABEL: test4:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: pushl %esi
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; CHECK-NEXT: movb {{[0-9]+}}(%esp), %ch
|
||||
; CHECK-NEXT: andb $7, %ch
|
||||
; CHECK-NEXT: movb %ch, %cl
|
||||
; CHECK-NEXT: shrl %cl, %esi
|
||||
; CHECK-NEXT: leal (%edx,%edx), %eax
|
||||
; CHECK-NEXT: xorb $31, %cl
|
||||
; CHECK-NEXT: shll %cl, %eax
|
||||
; CHECK-NEXT: orl %esi, %eax
|
||||
; CHECK-NEXT: movb %ch, %cl
|
||||
; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl
|
||||
; CHECK-NEXT: andb $7, %cl
|
||||
; CHECK-NEXT: shrdl %cl, %edx, %eax
|
||||
; CHECK-NEXT: sarl %cl, %edx
|
||||
; CHECK-NEXT: popl %esi
|
||||
; CHECK-NEXT: retl
|
||||
%and = and i32 %test, 7
|
||||
%sh_prom = zext i32 %and to i64
|
||||
|
@ -67,13 +67,7 @@ define i64 @test5(i64 %hi, i64 %lo, i64 %bits) nounwind {
|
||||
; CHECK-LABEL: test5:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: movl %edx, %ecx
|
||||
; CHECK-NEXT: xorl $63, %ecx
|
||||
; CHECK-NEXT: shrq %rsi
|
||||
; CHECK-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
|
||||
; CHECK-NEXT: shrq %cl, %rsi
|
||||
; CHECK-NEXT: movl %edx, %ecx
|
||||
; CHECK-NEXT: shlq %cl, %rdi
|
||||
; CHECK-NEXT: orq %rsi, %rdi
|
||||
; CHECK-NEXT: shldq %cl, %rsi, %rdi
|
||||
; CHECK-NEXT: movq %rdi, %rax
|
||||
; CHECK-NEXT: retq
|
||||
%bits64 = xor i64 %bits, 63
|
||||
@ -88,13 +82,8 @@ define i64 @test6(i64 %hi, i64 %lo, i64 %bits) nounwind {
|
||||
; CHECK-LABEL: test6:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: movl %edx, %ecx
|
||||
; CHECK-NEXT: xorl $63, %ecx
|
||||
; CHECK-NEXT: leaq (%rsi,%rsi), %rax
|
||||
; CHECK-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
|
||||
; CHECK-NEXT: shlq %cl, %rax
|
||||
; CHECK-NEXT: movl %edx, %ecx
|
||||
; CHECK-NEXT: shrq %cl, %rdi
|
||||
; CHECK-NEXT: orq %rdi, %rax
|
||||
; CHECK-NEXT: shrdq %cl, %rsi, %rdi
|
||||
; CHECK-NEXT: movq %rdi, %rax
|
||||
; CHECK-NEXT: retq
|
||||
%bits64 = xor i64 %bits, 63
|
||||
%lo2 = shl i64 %lo, 1
|
||||
@ -108,13 +97,8 @@ define i64 @test7(i64 %hi, i64 %lo, i64 %bits) nounwind {
|
||||
; CHECK-LABEL: test7:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: movl %edx, %ecx
|
||||
; CHECK-NEXT: xorl $63, %ecx
|
||||
; CHECK-NEXT: leaq (%rsi,%rsi), %rax
|
||||
; CHECK-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
|
||||
; CHECK-NEXT: shlq %cl, %rax
|
||||
; CHECK-NEXT: movl %edx, %ecx
|
||||
; CHECK-NEXT: shrq %cl, %rdi
|
||||
; CHECK-NEXT: orq %rdi, %rax
|
||||
; CHECK-NEXT: shrdq %cl, %rsi, %rdi
|
||||
; CHECK-NEXT: movq %rdi, %rax
|
||||
; CHECK-NEXT: retq
|
||||
%bits64 = xor i64 %bits, 63
|
||||
%lo2 = add i64 %lo, %lo
|
||||
|
@ -151,19 +151,13 @@ define i64 @test8(i64 %val, i32 %bits) nounwind {
|
||||
; CHECK-LABEL: test8:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: pushl %esi
|
||||
; CHECK-NEXT: movb {{[0-9]+}}(%esp), %ch
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; CHECK-NEXT: movb %ch, %cl
|
||||
; CHECK-NEXT: shll %cl, %esi
|
||||
; CHECK-NEXT: movl %eax, %edx
|
||||
; CHECK-NEXT: shrl %edx
|
||||
; CHECK-NEXT: andb $31, %cl
|
||||
; CHECK-NEXT: xorb $31, %cl
|
||||
; CHECK-NEXT: shrl %cl, %edx
|
||||
; CHECK-NEXT: orl %esi, %edx
|
||||
; CHECK-NEXT: movb %ch, %cl
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; CHECK-NEXT: movl %esi, %eax
|
||||
; CHECK-NEXT: shll %cl, %eax
|
||||
; CHECK-NEXT: andb $31, %cl
|
||||
; CHECK-NEXT: shldl %cl, %esi, %edx
|
||||
; CHECK-NEXT: popl %esi
|
||||
; CHECK-NEXT: retl
|
||||
%and = and i32 %bits, 31
|
||||
@ -176,18 +170,13 @@ define i64 @test9(i64 %val, i32 %bits) nounwind {
|
||||
; CHECK-LABEL: test9:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: pushl %esi
|
||||
; CHECK-NEXT: movb {{[0-9]+}}(%esp), %ch
|
||||
; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; CHECK-NEXT: movb %ch, %cl
|
||||
; CHECK-NEXT: shrl %cl, %esi
|
||||
; CHECK-NEXT: leal (%edx,%edx), %eax
|
||||
; CHECK-NEXT: andb $31, %cl
|
||||
; CHECK-NEXT: xorb $31, %cl
|
||||
; CHECK-NEXT: shll %cl, %eax
|
||||
; CHECK-NEXT: orl %esi, %eax
|
||||
; CHECK-NEXT: movb %ch, %cl
|
||||
; CHECK-NEXT: movl %esi, %edx
|
||||
; CHECK-NEXT: sarl %cl, %edx
|
||||
; CHECK-NEXT: andb $31, %cl
|
||||
; CHECK-NEXT: shrdl %cl, %esi, %eax
|
||||
; CHECK-NEXT: popl %esi
|
||||
; CHECK-NEXT: retl
|
||||
%and = and i32 %bits, 31
|
||||
@ -200,18 +189,13 @@ define i64 @test10(i64 %val, i32 %bits) nounwind {
|
||||
; CHECK-LABEL: test10:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: pushl %esi
|
||||
; CHECK-NEXT: movb {{[0-9]+}}(%esp), %ch
|
||||
; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; CHECK-NEXT: movb %ch, %cl
|
||||
; CHECK-NEXT: shrl %cl, %esi
|
||||
; CHECK-NEXT: leal (%edx,%edx), %eax
|
||||
; CHECK-NEXT: andb $31, %cl
|
||||
; CHECK-NEXT: xorb $31, %cl
|
||||
; CHECK-NEXT: shll %cl, %eax
|
||||
; CHECK-NEXT: orl %esi, %eax
|
||||
; CHECK-NEXT: movb %ch, %cl
|
||||
; CHECK-NEXT: movl %esi, %edx
|
||||
; CHECK-NEXT: shrl %cl, %edx
|
||||
; CHECK-NEXT: andb $31, %cl
|
||||
; CHECK-NEXT: shrdl %cl, %esi, %eax
|
||||
; CHECK-NEXT: popl %esi
|
||||
; CHECK-NEXT: retl
|
||||
%and = and i32 %bits, 31
|
||||
@ -291,19 +275,10 @@ define i32 @test14(i32 %hi, i32 %lo, i32 %bits) nounwind {
|
||||
define i32 @test15(i32 %hi, i32 %lo, i32 %bits) nounwind {
|
||||
; CHECK-LABEL: test15:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: pushl %esi
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; CHECK-NEXT: movl %edx, %ecx
|
||||
; CHECK-NEXT: xorl $31, %ecx
|
||||
; CHECK-NEXT: shrl %esi
|
||||
; CHECK-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
|
||||
; CHECK-NEXT: shrl %cl, %esi
|
||||
; CHECK-NEXT: movl %edx, %ecx
|
||||
; CHECK-NEXT: shll %cl, %eax
|
||||
; CHECK-NEXT: orl %esi, %eax
|
||||
; CHECK-NEXT: popl %esi
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; CHECK-NEXT: shldl %cl, %edx, %eax
|
||||
; CHECK-NEXT: retl
|
||||
%bits32 = xor i32 %bits, 31
|
||||
%lo2 = lshr i32 %lo, 1
|
||||
@ -316,19 +291,10 @@ define i32 @test15(i32 %hi, i32 %lo, i32 %bits) nounwind {
|
||||
define i32 @test16(i32 %hi, i32 %lo, i32 %bits) nounwind {
|
||||
; CHECK-LABEL: test16:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: pushl %esi
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; CHECK-NEXT: movl %edx, %ecx
|
||||
; CHECK-NEXT: xorl $31, %ecx
|
||||
; CHECK-NEXT: addl %esi, %esi
|
||||
; CHECK-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
|
||||
; CHECK-NEXT: shll %cl, %esi
|
||||
; CHECK-NEXT: movl %edx, %ecx
|
||||
; CHECK-NEXT: shrl %cl, %eax
|
||||
; CHECK-NEXT: orl %esi, %eax
|
||||
; CHECK-NEXT: popl %esi
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; CHECK-NEXT: shrdl %cl, %edx, %eax
|
||||
; CHECK-NEXT: retl
|
||||
%bits32 = xor i32 %bits, 31
|
||||
%lo2 = shl i32 %lo, 1
|
||||
@ -341,19 +307,10 @@ define i32 @test16(i32 %hi, i32 %lo, i32 %bits) nounwind {
|
||||
define i32 @test17(i32 %hi, i32 %lo, i32 %bits) nounwind {
|
||||
; CHECK-LABEL: test17:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: pushl %esi
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; CHECK-NEXT: movl %edx, %ecx
|
||||
; CHECK-NEXT: xorl $31, %ecx
|
||||
; CHECK-NEXT: addl %esi, %esi
|
||||
; CHECK-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
|
||||
; CHECK-NEXT: shll %cl, %esi
|
||||
; CHECK-NEXT: movl %edx, %ecx
|
||||
; CHECK-NEXT: shrl %cl, %eax
|
||||
; CHECK-NEXT: orl %esi, %eax
|
||||
; CHECK-NEXT: popl %esi
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; CHECK-NEXT: shrdl %cl, %edx, %eax
|
||||
; CHECK-NEXT: retl
|
||||
%bits32 = xor i32 %bits, 31
|
||||
%lo2 = add i32 %lo, %lo
|
||||
|
Loading…
x
Reference in New Issue
Block a user