Re-apply r267206 with a fix for the encoding problem: when the immediate of

log2(Mask) is smaller than 32, we must use the 32-bit variant because the 64-bit
variant cannot encode it. Therefore, set the subreg part accordingly.

[AArch64] Fix optimizeCondBranch logic.

The opcode for the optimized branch does not depend on the size
of the activate bits in the AND masks, but the AND opcode itself.
Indeed, we need to use a X or W variant based on the AND variant
not based on whether the mask fits into the related variant.
Otherwise, we may end up using the W variant of the optimized branch
for 64-bit register inputs!

This fixes the last make check verifier issues for AArch64: PR27479.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@267465 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Quentin Colombet 2016-04-25 20:54:08 +00:00
parent 68aaa747f9
commit 3f7fbf7831
2 changed files with 66 additions and 8 deletions

View File

@ -3863,9 +3863,9 @@ bool AArch64InstrInfo::optimizeCondBranch(MachineInstr *MI) const {
if (!MRI->hasOneNonDBGUse(VReg))
return false;
bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
uint64_t Mask = AArch64_AM::decodeLogicalImmediate(
DefMI->getOperand(2).getImm(),
(DefMI->getOpcode() == AArch64::ANDWri) ? 32 : 64);
DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64);
if (!isPowerOf2_64(Mask))
return false;
@ -3883,7 +3883,18 @@ bool AArch64InstrInfo::optimizeCondBranch(MachineInstr *MI) const {
unsigned Opc = (Imm < 32)
? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
: (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
BuildMI(RefToMBB, MI, DL, get(Opc)).addReg(NewReg).addImm(Imm).addMBB(TBB);
MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))
.addReg(NewReg)
.addImm(Imm)
.addMBB(TBB);
// For immediate smaller than 32, we need to use the 32-bit
// variant (W) in all cases. Indeed the 64-bit variant does not
// allow to encode them.
// Therefore, if the input register is 64-bit, we need to take the
// 32-bit sub-part.
if (!Is32Bit && Imm < 32)
NewMI->getOperand(0).setSubReg(AArch64::sub_32);
MI->eraseFromParent();
return true;
}

View File

@ -1,9 +1,9 @@
; RUN: llc -mtriple=aarch64-linux-gnueabi < %s | FileCheck %s
; RUN: llc -verify-machineinstrs -mtriple=aarch64-linux-gnueabi < %s | FileCheck %s
; CHECK-LABEL: test1
; CHECK: tbz {{w[0-9]}}, #3, {{.LBB0_3}}
; CHECK: tbz [[REG1:x[0-9]+]], #2, {{.LBB0_3}}
; CHECK-NOT: and [[REG2:x[0-9]+]], [[REG1]], #0x4
; CHECK: tbz w[[REG1:[0-9]+]], #2, {{.LBB0_3}}
; CHECK-NOT: and [[REG2:x[0-9]+]], x[[REG1]], #0x4
; CHECK-NOT: cbz [[REG2]], {{.LBB0_3}}
; CHECK: b
@ -26,8 +26,8 @@ if.end3: ; preds = %if.then2, %entry
; CHECK-LABEL: test2
; CHECK: cbz {{x[0-9]}}, {{.LBB1_3}}
; CHECK: tbz [[REG1:x[0-9]+]], #3, {{.LBB1_3}}
; CHECK-NOT: and [REG2:x[0-9]+], [[REG1]], #0x08
; CHECK: tbz w[[REG1:[0-9]+]], #3, {{.LBB1_3}}
; CHECK-NOT: and [REG2:x[0-9]+], x[[REG1]], #0x08
; CHECK-NOT: cbz [[REG2]], {{.LBB1_3}}
define void @test2(i64 %A, i64* readonly %B) #0 {
@ -47,5 +47,52 @@ if.end3: ; preds = %entry, %if.then2
ret void
}
; Make sure we use the W variant when log2(mask) is < 32.
; CHECK-LABEL: test3
; CHECK: tbz {{w[0-9]}}, #3, {{.LBB2_3}}
; CHECK: tbz w[[REG1:[0-9]+]], #28, {{.LBB2_3}}
; CHECK-NOT: and [[REG2:x[0-9]+]], x[[REG1]]
; CHECK-NOT: cbz [[REG2]], {{.LBB2_3}}
define void @test3(i64 %A, i64 %B) {
entry:
%shift = shl i64 1, 28
%and = and i64 %A, %shift
%notlhs = icmp eq i64 %and, 0
%and.1 = and i64 %B, 8
%0 = icmp eq i64 %and.1, 0
%1 = or i1 %0, %notlhs
br i1 %1, label %if.then2, label %if.end3
if.then2: ; preds = %entry
tail call void @foo(i64 %A, i64 %B)
br label %if.end3
if.end3: ; preds = %if.then2, %entry
ret void
}
; CHECK-LABEL: test4
; CHECK: tbz {{w[0-9]}}, #3, {{.LBB3_3}}
; CHECK: tbz [[REG1:x[0-9]+]], #35, {{.LBB3_3}}
; CHECK-NOT: and [[REG2:x[0-9]+]], x[[REG1]]
; CHECK-NOT: cbz [[REG2]], {{.LBB2_3}}
define void @test4(i64 %A, i64 %B) {
entry:
%shift = shl i64 1, 35
%and = and i64 %A, %shift
%notlhs = icmp eq i64 %and, 0
%and.1 = and i64 %B, 8
%0 = icmp eq i64 %and.1, 0
%1 = or i1 %0, %notlhs
br i1 %1, label %if.then2, label %if.end3
if.then2: ; preds = %entry
tail call void @foo(i64 %A, i64 %B)
br label %if.end3
if.end3: ; preds = %if.then2, %entry
ret void
}
declare void @foo(i64, i64)