From b41703bc2fc6276f5826767ec47ebb6c4adead1e Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Wed, 3 Nov 2010 05:14:24 +0000 Subject: [PATCH] Add support to match @llvm.prefetch to pld / pldw / pli. rdar://8601536. llvm-svn: 118152 --- lib/Target/ARM/ARMISelLowering.cpp | 5 ++ lib/Target/ARM/ARMInstrInfo.td | 22 ++++--- lib/Target/ARM/ARMInstrThumb2.td | 65 +++++++++---------- .../ARM/Disassembler/ARMDisassembler.cpp | 12 ++-- .../ARM/Disassembler/ThumbDisassemblerCore.h | 6 +- test/CodeGen/ARM/prefetch.ll | 64 ++++++++++++++++++ 6 files changed, 119 insertions(+), 55 deletions(-) create mode 100644 test/CodeGen/ARM/prefetch.ll diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 6503960de9f..b816e66f7d3 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -598,6 +598,11 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Expand); setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Expand); + // ARM v5TE+ and Thumb2 has preload instructions. + if (Subtarget->isThumb2() || + (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())) + setOperationAction(ISD::PREFETCH, MVT::Other, Legal); + // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes. if (!Subtarget->hasV6Ops()) { setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index b3301656d2a..71557c9864e 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -991,17 +991,18 @@ def CPS : AXI<(outs), (ins cps_opt:$opt), MiscFrm, NoItinerary, "cps$opt", // // A8.6.117, A8.6.118. Different instructions are generated for #0 and #-0. // The neg_zero operand translates -0 to -1, -1 to -2, ..., etc. -multiclass APreLoad { +multiclass APreLoad data_read, string opc> { def i12 : AXI<(outs), (ins addrmode_imm12:$addr), MiscFrm, NoItinerary, - !strconcat(opc, "\t$addr"), []> { + !strconcat(opc, "\t$addr"), + [(prefetch addrmode_imm12:$addr, imm, (i32 data_read))]> { bits<4> Rt; bits<17> addr; let Inst{31-26} = 0b111101; let Inst{25} = 0; // 0 for immediate form - let Inst{24} = data; + let Inst{24} = data_read{1}; let Inst{23} = addr{12}; // U (add = ('U' == 1)) - let Inst{22} = read; + let Inst{22} = data_read{0}; let Inst{21-20} = 0b01; let Inst{19-16} = addr{16-13}; // Rn let Inst{15-12} = Rt; @@ -1009,23 +1010,24 @@ multiclass APreLoad { } def rs : AXI<(outs), (ins ldst_so_reg:$shift), MiscFrm, NoItinerary, - !strconcat(opc, "\t$shift"), []> { + !strconcat(opc, "\t$shift"), + [(prefetch ldst_so_reg:$shift, imm, (i32 data_read))]> { bits<4> Rt; bits<17> shift; let Inst{31-26} = 0b111101; let Inst{25} = 1; // 1 for register form - let Inst{24} = data; + let Inst{24} = data_read{1}; let Inst{23} = shift{12}; // U (add = ('U' == 1)) - let Inst{22} = read; + let Inst{22} = data_read{0}; let Inst{21-20} = 0b01; let Inst{19-16} = shift{16-13}; // Rn let Inst{11-0} = shift{11-0}; } } -defm PLD : APreLoad<1, 1, "pld">; -defm PLDW : APreLoad<1, 0, "pldw">; -defm PLI : APreLoad<0, 1, "pli">; +defm PLD : APreLoad<3, "pld">; +defm PLDW : APreLoad<2, "pldw">; +defm PLI : APreLoad<1, "pli">; def SETEND : AXI<(outs),(ins setend_op:$end), MiscFrm, NoItinerary, "setend\t$end", diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 824d935cf10..44724111bb3 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -610,6 +610,8 @@ multiclass T2I_ld opcod, string opc, let Inst{20} = 1; // load let Inst{11-6} = 0b000000; } + + // FIXME: Is the pci variant actually needed? def pci : T2Ipc <(outs GPR:$dst), (ins i32imm:$addr), iii, opc, ".w\t$dst, $addr", [(set GPR:$dst, (opnode (ARMWrapper tconstpool:$addr)))]> { @@ -1172,10 +1174,11 @@ def t2STRD_POST : T2Ii8s4<0, 1, 0, (outs), // // A8.6.117, A8.6.118. Different instructions are generated for #0 and #-0. // The neg_zero operand translates -0 to -1, -1 to -2, ..., etc. -multiclass T2Ipl { +multiclass T2Ipl data_read, string opc> { - def i12 : T2I<(outs), (ins GPR:$base, i32imm:$imm), IIC_iLoad_i, opc, - "\t[$base, $imm]", []> { + def i12 : T2Ii12<(outs), (ins t2addrmode_imm12:$addr), IIC_iLoad_i, opc, + "\t$addr", + [(prefetch t2addrmode_imm12:$addr, imm, (i32 data_read))]> { let Inst{31-25} = 0b1111100; let Inst{24} = instr; let Inst{23} = 1; // U = 1 @@ -1185,8 +1188,9 @@ multiclass T2Ipl { let Inst{15-12} = 0b1111; } - def i8 : T2I<(outs), (ins GPR:$base, neg_zero:$imm), IIC_iLoad_i, opc, - "\t[$base, $imm]", []> { + def i8 : T2Ii8<(outs), (ins t2addrmode_imm8:$addr), IIC_iLoad_i, opc, + "\t$addr", + [(prefetch t2addrmode_imm8:$addr, imm, (i32 data_read))]> { let Inst{31-25} = 0b1111100; let Inst{24} = instr; let Inst{23} = 0; // U = 0 @@ -1197,9 +1201,23 @@ multiclass T2Ipl { let Inst{11-8} = 0b1100; } - let isCodeGenOnly = 1 in // $base doesn't exist in asmstring? - def pci : T2I<(outs), (ins GPR:$base, neg_zero:$imm), IIC_iLoad_i, opc, - "\t[pc, $imm]", []> { + def s : T2Iso<(outs), (ins t2addrmode_so_reg:$addr), IIC_iLoad_i, opc, + "\t$addr", + [(prefetch t2addrmode_so_reg:$addr, imm, (i32 data_read))]> { + let Inst{31-25} = 0b1111100; + let Inst{24} = instr; + let Inst{23} = 0; // add = TRUE for T1 + let Inst{22} = 0; + let Inst{21} = write; + let Inst{20} = 1; + let Inst{15-12} = 0b1111; + let Inst{11-6} = 0000000; + } + + let isCodeGenOnly = 1 in + def pci : T2Ipc<(outs), (ins i32imm:$addr), IIC_iLoad_i, opc, + "\t$addr", + []> { let Inst{31-25} = 0b1111100; let Inst{24} = instr; let Inst{23} = ?; // add = (U == 1) @@ -1209,36 +1227,11 @@ multiclass T2Ipl { let Inst{19-16} = 0b1111; // Rn = 0b1111 let Inst{15-12} = 0b1111; } - - def r : T2I<(outs), (ins GPR:$base, GPR:$a), IIC_iLoad_i, opc, - "\t[$base, $a]", []> { - let Inst{31-25} = 0b1111100; - let Inst{24} = instr; - let Inst{23} = 0; // add = TRUE for T1 - let Inst{22} = 0; - let Inst{21} = write; - let Inst{20} = 1; - let Inst{15-12} = 0b1111; - let Inst{11-6} = 0000000; - let Inst{5-4} = 0b00; // no shift is applied - } - - def s : T2I<(outs), (ins GPR:$base, GPR:$a, i32imm:$shamt), IIC_iLoad_i, opc, - "\t[$base, $a, lsl $shamt]", []> { - let Inst{31-25} = 0b1111100; - let Inst{24} = instr; - let Inst{23} = 0; // add = TRUE for T1 - let Inst{22} = 0; - let Inst{21} = write; - let Inst{20} = 1; - let Inst{15-12} = 0b1111; - let Inst{11-6} = 0000000; - } } -defm t2PLD : T2Ipl<0, 0, "pld">; -defm t2PLDW : T2Ipl<0, 1, "pldw">; -defm t2PLI : T2Ipl<1, 0, "pli">; +defm t2PLD : T2Ipl<0, 0, 3, "pld">; +defm t2PLDW : T2Ipl<0, 1, 2, "pldw">; +defm t2PLI : T2Ipl<1, 0, 1, "pli">; //===----------------------------------------------------------------------===// // Load / store multiple Instructions. diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 6d37bc70a1f..abdcf8161a3 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -355,11 +355,11 @@ static inline bool Thumb2PreloadOpcodeNoPCI(unsigned Opcode) { default: return false; case ARM::t2PLDi12: case ARM::t2PLDi8: - case ARM::t2PLDr: case ARM::t2PLDs: + case ARM::t2PLDs: case ARM::t2PLDWi12: case ARM::t2PLDWi8: - case ARM::t2PLDWr: case ARM::t2PLDWs: + case ARM::t2PLDWs: case ARM::t2PLIi12: case ARM::t2PLIi8: - case ARM::t2PLIr: case ARM::t2PLIs: + case ARM::t2PLIs: return true; } } @@ -369,13 +369,13 @@ static inline unsigned T2Morph2Preload2PCI(unsigned Opcode) { default: return 0; case ARM::t2PLDi12: case ARM::t2PLDi8: - case ARM::t2PLDr: case ARM::t2PLDs: + case ARM::t2PLDs: return ARM::t2PLDpci; case ARM::t2PLDWi12: case ARM::t2PLDWi8: - case ARM::t2PLDWr: case ARM::t2PLDWs: + case ARM::t2PLDWs: return ARM::t2PLDWpci; case ARM::t2PLIi12: case ARM::t2PLIi8: - case ARM::t2PLIr: case ARM::t2PLIs: + case ARM::t2PLIs: return ARM::t2PLIpci; } } diff --git a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h index 4f605e68331..6ed29494216 100644 --- a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h +++ b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h @@ -1731,11 +1731,11 @@ static inline bool Thumb2PreloadOpcode(unsigned Opcode) { default: return false; case ARM::t2PLDi12: case ARM::t2PLDi8: case ARM::t2PLDpci: - case ARM::t2PLDr: case ARM::t2PLDs: + case ARM::t2PLDs: case ARM::t2PLDWi12: case ARM::t2PLDWi8: case ARM::t2PLDWpci: - case ARM::t2PLDWr: case ARM::t2PLDWs: + case ARM::t2PLDWs: case ARM::t2PLIi12: case ARM::t2PLIi8: case ARM::t2PLIpci: - case ARM::t2PLIr: case ARM::t2PLIs: + case ARM::t2PLIs: return true; } } diff --git a/test/CodeGen/ARM/prefetch.ll b/test/CodeGen/ARM/prefetch.ll new file mode 100644 index 00000000000..dbc1002e508 --- /dev/null +++ b/test/CodeGen/ARM/prefetch.ll @@ -0,0 +1,64 @@ +; RUN: llc < %s -march=thumb -mattr=-thumb2 | not grep pld +; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s -check-prefix=THUMB2 +; RUN: llc < %s -march=arm -mattr=+v5te | FileCheck %s -check-prefix=ARM +; rdar://8601536 + +define void @t1(i8* %ptr) nounwind { +entry: +; ARM: t1: +; ARM: pli [r0] +; ARM: pldw [r0] +; ARM: pld [r0] + +; THUMB2: t1: +; THUMB2: pli [r0] +; THUMB2: pldw [r0] +; THUMB2: pld [r0] + tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 1 ) + tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 2 ) + tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 3 ) + ret void +} + +define void @t2(i8* %ptr) nounwind { +entry: +; ARM: t2: +; ARM: pld [r0, #1023] + +; THUMB2: t2: +; THUMB2: pld [r0, #1023] + %tmp = getelementptr i8* %ptr, i32 1023 + tail call void @llvm.prefetch( i8* %tmp, i32 0, i32 3 ) + ret void +} + +define void @t3(i32 %base, i32 %offset) nounwind { +entry: +; ARM: t3: +; ARM: pld [r0, r1, lsr #2] + +; THUMB2: t3: +; THUMB2: lsrs r1, r1, #2 +; THUMB2: pld [r0, r1] + %tmp1 = lshr i32 %offset, 2 + %tmp2 = add i32 %base, %tmp1 + %tmp3 = inttoptr i32 %tmp2 to i8* + tail call void @llvm.prefetch( i8* %tmp3, i32 0, i32 3 ) + ret void +} + +define void @t4(i32 %base, i32 %offset) nounwind { +entry: +; ARM: t4: +; ARM: pld [r0, r1, lsl #2] + +; THUMB2: t4: +; THUMB2: pld [r0, r1, lsl #2] + %tmp1 = shl i32 %offset, 2 + %tmp2 = add i32 %base, %tmp1 + %tmp3 = inttoptr i32 %tmp2 to i8* + tail call void @llvm.prefetch( i8* %tmp3, i32 0, i32 3 ) + ret void +} + +declare void @llvm.prefetch(i8*, i32, i32) nounwind