Re-apply r267206 with a fix for the encoding problem: when the immediate of

log2(Mask) is smaller than 32, we must use the 32-bit variant because the 64-bit variant cannot encode it. Therefore, set the subreg part accordingly. [AArch64] Fix optimizeCondBranch logic. The opcode for the optimized branch does not depend on the size of the activate bits in the AND masks, but the AND opcode itself. Indeed, we need to use a X or W variant based on the AND variant not based on whether the mask fits into the related variant. Otherwise, we may end up using the W variant of the optimized branch for 64-bit register inputs! This fixes the last make check verifier issues for AArch64: PR27479. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@267465 91177308-0d34-0410-b5e6-96231b3b80d8
2025-01-31 01:15:36 +00:00 · 2016-04-25 20:54:08 +00:00 · 2016-04-25 20:54:08 +00:00 · 3f7fbf7831
commit 3f7fbf7831
parent 68aaa747f9
2 changed files with 66 additions and 8 deletions
--- a/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/lib/Target/AArch64/AArch64InstrInfo.cpp
@ -3863,9 +3863,9 @@ bool AArch64InstrInfo::optimizeCondBranch(MachineInstr *MI) const {
    if (!MRI->hasOneNonDBGUse(VReg))
      return false;

+    bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
    uint64_t Mask = AArch64_AM::decodeLogicalImmediate(
-        DefMI->getOperand(2).getImm(),
-        (DefMI->getOpcode() == AArch64::ANDWri) ? 32 : 64);
+        DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64);
    if (!isPowerOf2_64(Mask))
      return false;

@ -3883,7 +3883,18 @@ bool AArch64InstrInfo::optimizeCondBranch(MachineInstr *MI) const {
    unsigned Opc = (Imm < 32)
                       ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
                       : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
-    BuildMI(RefToMBB, MI, DL, get(Opc)).addReg(NewReg).addImm(Imm).addMBB(TBB);
+    MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))
+                              .addReg(NewReg)
+                              .addImm(Imm)
+                              .addMBB(TBB);
+
+    // For immediate smaller than 32, we need to use the 32-bit
+    // variant (W) in all cases. Indeed the 64-bit variant does not
+    // allow to encode them.
+    // Therefore, if the input register is 64-bit, we need to take the
+    // 32-bit sub-part.
+    if (!Is32Bit && Imm < 32)
+      NewMI->getOperand(0).setSubReg(AArch64::sub_32);
    MI->eraseFromParent();
    return true;
  }
--- a/test/CodeGen/AArch64/aarch64-tbz.ll
+++ b/test/CodeGen/AArch64/aarch64-tbz.ll
@ -1,9 +1,9 @@
-; RUN: llc -mtriple=aarch64-linux-gnueabi < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-linux-gnueabi < %s | FileCheck %s

 ; CHECK-LABEL: test1
 ; CHECK: tbz {{w[0-9]}}, #3, {{.LBB0_3}}
-; CHECK: tbz [[REG1:x[0-9]+]], #2, {{.LBB0_3}}
-; CHECK-NOT: and [[REG2:x[0-9]+]], [[REG1]], #0x4
+; CHECK: tbz w[[REG1:[0-9]+]], #2, {{.LBB0_3}}
+; CHECK-NOT: and [[REG2:x[0-9]+]], x[[REG1]], #0x4
 ; CHECK-NOT: cbz [[REG2]], {{.LBB0_3}}

 ; CHECK: b
@ -26,8 +26,8 @@ if.end3:                                          ; preds = %if.then2, %entry

 ; CHECK-LABEL: test2
 ; CHECK: cbz {{x[0-9]}}, {{.LBB1_3}}
-; CHECK: tbz [[REG1:x[0-9]+]], #3, {{.LBB1_3}}
-; CHECK-NOT: and [REG2:x[0-9]+], [[REG1]], #0x08
+; CHECK: tbz w[[REG1:[0-9]+]], #3, {{.LBB1_3}}
+; CHECK-NOT: and [REG2:x[0-9]+], x[[REG1]], #0x08
 ; CHECK-NOT: cbz [[REG2]], {{.LBB1_3}}

 define void @test2(i64 %A, i64* readonly %B) #0 {
@ -47,5 +47,52 @@ if.end3:                                          ; preds = %entry, %if.then2
  ret void
 }

+; Make sure we use the W variant when log2(mask) is < 32.
+; CHECK-LABEL: test3
+; CHECK: tbz {{w[0-9]}}, #3, {{.LBB2_3}}
+; CHECK: tbz w[[REG1:[0-9]+]], #28, {{.LBB2_3}}
+; CHECK-NOT: and [[REG2:x[0-9]+]], x[[REG1]]
+; CHECK-NOT: cbz [[REG2]], {{.LBB2_3}}
+define void @test3(i64 %A, i64 %B) {
+entry:
+  %shift = shl i64 1, 28
+  %and = and i64 %A, %shift
+  %notlhs = icmp eq i64 %and, 0
+  %and.1 = and i64 %B, 8
+  %0 = icmp eq i64 %and.1, 0
+  %1 = or i1 %0, %notlhs
+  br i1 %1, label %if.then2, label %if.end3
+
+if.then2:                                         ; preds = %entry
+  tail call void @foo(i64 %A, i64 %B)
+  br label %if.end3
+
+if.end3:                                          ; preds = %if.then2, %entry
+  ret void
+}
+
+; CHECK-LABEL: test4
+; CHECK: tbz {{w[0-9]}}, #3, {{.LBB3_3}}
+; CHECK: tbz [[REG1:x[0-9]+]], #35, {{.LBB3_3}}
+; CHECK-NOT: and [[REG2:x[0-9]+]], x[[REG1]]
+; CHECK-NOT: cbz [[REG2]], {{.LBB2_3}}
+define void @test4(i64 %A, i64 %B) {
+entry:
+  %shift = shl i64 1, 35
+  %and = and i64 %A, %shift
+  %notlhs = icmp eq i64 %and, 0
+  %and.1 = and i64 %B, 8
+  %0 = icmp eq i64 %and.1, 0
+  %1 = or i1 %0, %notlhs
+  br i1 %1, label %if.then2, label %if.end3
+
+if.then2:                                         ; preds = %entry
+  tail call void @foo(i64 %A, i64 %B)
+  br label %if.end3
+
+if.end3:                                          ; preds = %if.then2, %entry
+  ret void
+}
+

 declare void @foo(i64, i64)