From 4828912a6aded6c1a71afa4b3518cdd3bdafcf72 Mon Sep 17 00:00:00 2001 From: Matthias Braun Date: Mon, 20 Jul 2015 22:34:47 +0000 Subject: [PATCH] AArch64: Add aditional Cyclone macroop fusion opportunities Related to rdar://19205407 Differential Revision: http://reviews.llvm.org/D10746 llvm-svn: 242724 --- lib/Target/AArch64/AArch64InstrInfo.cpp | 50 +++++++++++++++++-------- test/CodeGen/AArch64/misched-fusion.ll | 34 +++++++++++++++++ 2 files changed, 68 insertions(+), 16 deletions(-) create mode 100644 test/CodeGen/AArch64/misched-fusion.ll diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp index c0b3f2c6091..429f6939db9 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -1445,23 +1445,41 @@ bool AArch64InstrInfo::shouldClusterLoads(MachineInstr *FirstLdSt, bool AArch64InstrInfo::shouldScheduleAdjacent(MachineInstr *First, MachineInstr *Second) const { - // Cyclone can fuse CMN, CMP followed by Bcc. - - // FIXME: B0 can also fuse: - // AND, BIC, ORN, ORR, or EOR (optional S) followed by Bcc or CBZ or CBNZ. - if (Second->getOpcode() != AArch64::Bcc) - return false; - switch (First->getOpcode()) { - default: - return false; - case AArch64::SUBSWri: - case AArch64::ADDSWri: - case AArch64::ANDSWri: - case AArch64::SUBSXri: - case AArch64::ADDSXri: - case AArch64::ANDSXri: - return true; + // Cyclone can fuse CMN, CMP, TST followed by Bcc. + unsigned SecondOpcode = Second->getOpcode(); + if (SecondOpcode == AArch64::Bcc) { + switch (First->getOpcode()) { + default: + return false; + case AArch64::SUBSWri: + case AArch64::ADDSWri: + case AArch64::ANDSWri: + case AArch64::SUBSXri: + case AArch64::ADDSXri: + case AArch64::ANDSXri: + return true; + } } + // Cyclone B0 also supports ALU operations followed by CBZ/CBNZ. + if (SecondOpcode == AArch64::CBNZW || SecondOpcode == AArch64::CBNZX || + SecondOpcode == AArch64::CBZW || SecondOpcode == AArch64::CBZX) { + switch (First->getOpcode()) { + default: + return false; + case AArch64::ADDWri: + case AArch64::ADDXri: + case AArch64::ANDWri: + case AArch64::ANDXri: + case AArch64::EORWri: + case AArch64::EORXri: + case AArch64::ORRWri: + case AArch64::ORRXri: + case AArch64::SUBWri: + case AArch64::SUBXri: + return true; + } + } + return false; } MachineInstr *AArch64InstrInfo::emitFrameIndexDebugValue( diff --git a/test/CodeGen/AArch64/misched-fusion.ll b/test/CodeGen/AArch64/misched-fusion.ll new file mode 100644 index 00000000000..d3886932903 --- /dev/null +++ b/test/CodeGen/AArch64/misched-fusion.ll @@ -0,0 +1,34 @@ +; RUN: llc -o - %s -mcpu=cyclone | FileCheck %s +target triple = "arm64-apple-ios" + +declare void @foobar(i32 %v0, i32 %v1) + +; Make sure sub is scheduled in front of cbnz +; CHECK-LABEL: test_sub_cbz: +; CHECK: add w[[ADDRES:[0-9]+]], w1, #7 +; CHECK: sub w[[SUBRES:[0-9]+]], w0, #13 +; CHECK-NEXT: cbnz w[[SUBRES]], [[SKIPBLOCK:LBB[0-9_]+]] +; CHECK: mov x0, x[[ADDRES]] +; CHECK: mov x1, x[[SUBRES]] +; CHECK: bl _foobar +; CHECK: [[SKIPBLOCK]]: +; CHECK: mov x0, x[[SUBRES]] +; CHECK: mov x1, x[[ADDRES]] +; CHECK: bl _foobar +define void @test_sub_cbz(i32 %a0, i32 %a1) { +entry: + ; except for the fusion opportunity the sub/add should be equal so the + ; scheduler would leave them in source order if it weren't for the scheduling + %v0 = sub i32 %a0, 13 + %cond = icmp eq i32 %v0, 0 + %v1 = add i32 %a1, 7 + br i1 %cond, label %if, label %exit + +if: + call void @foobar(i32 %v1, i32 %v0) + br label %exit + +exit: + call void @foobar(i32 %v0, i32 %v1) + ret void +}