mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-12-23 03:59:12 +00:00
Add support to match @llvm.prefetch to pld / pldw / pli. rdar://8601536.
llvm-svn: 118152
This commit is contained in:
parent
3894287c96
commit
b41703bc2f
@ -598,6 +598,11 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
|
||||
setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Expand);
|
||||
setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Expand);
|
||||
|
||||
// ARM v5TE+ and Thumb2 has preload instructions.
|
||||
if (Subtarget->isThumb2() ||
|
||||
(!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps()))
|
||||
setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
|
||||
|
||||
// Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
|
||||
if (!Subtarget->hasV6Ops()) {
|
||||
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
|
||||
|
@ -991,17 +991,18 @@ def CPS : AXI<(outs), (ins cps_opt:$opt), MiscFrm, NoItinerary, "cps$opt",
|
||||
//
|
||||
// A8.6.117, A8.6.118. Different instructions are generated for #0 and #-0.
|
||||
// The neg_zero operand translates -0 to -1, -1 to -2, ..., etc.
|
||||
multiclass APreLoad<bit data, bit read, string opc> {
|
||||
multiclass APreLoad<bits<2> data_read, string opc> {
|
||||
|
||||
def i12 : AXI<(outs), (ins addrmode_imm12:$addr), MiscFrm, NoItinerary,
|
||||
!strconcat(opc, "\t$addr"), []> {
|
||||
!strconcat(opc, "\t$addr"),
|
||||
[(prefetch addrmode_imm12:$addr, imm, (i32 data_read))]> {
|
||||
bits<4> Rt;
|
||||
bits<17> addr;
|
||||
let Inst{31-26} = 0b111101;
|
||||
let Inst{25} = 0; // 0 for immediate form
|
||||
let Inst{24} = data;
|
||||
let Inst{24} = data_read{1};
|
||||
let Inst{23} = addr{12}; // U (add = ('U' == 1))
|
||||
let Inst{22} = read;
|
||||
let Inst{22} = data_read{0};
|
||||
let Inst{21-20} = 0b01;
|
||||
let Inst{19-16} = addr{16-13}; // Rn
|
||||
let Inst{15-12} = Rt;
|
||||
@ -1009,23 +1010,24 @@ multiclass APreLoad<bit data, bit read, string opc> {
|
||||
}
|
||||
|
||||
def rs : AXI<(outs), (ins ldst_so_reg:$shift), MiscFrm, NoItinerary,
|
||||
!strconcat(opc, "\t$shift"), []> {
|
||||
!strconcat(opc, "\t$shift"),
|
||||
[(prefetch ldst_so_reg:$shift, imm, (i32 data_read))]> {
|
||||
bits<4> Rt;
|
||||
bits<17> shift;
|
||||
let Inst{31-26} = 0b111101;
|
||||
let Inst{25} = 1; // 1 for register form
|
||||
let Inst{24} = data;
|
||||
let Inst{24} = data_read{1};
|
||||
let Inst{23} = shift{12}; // U (add = ('U' == 1))
|
||||
let Inst{22} = read;
|
||||
let Inst{22} = data_read{0};
|
||||
let Inst{21-20} = 0b01;
|
||||
let Inst{19-16} = shift{16-13}; // Rn
|
||||
let Inst{11-0} = shift{11-0};
|
||||
}
|
||||
}
|
||||
|
||||
defm PLD : APreLoad<1, 1, "pld">;
|
||||
defm PLDW : APreLoad<1, 0, "pldw">;
|
||||
defm PLI : APreLoad<0, 1, "pli">;
|
||||
defm PLD : APreLoad<3, "pld">;
|
||||
defm PLDW : APreLoad<2, "pldw">;
|
||||
defm PLI : APreLoad<1, "pli">;
|
||||
|
||||
def SETEND : AXI<(outs),(ins setend_op:$end), MiscFrm, NoItinerary,
|
||||
"setend\t$end",
|
||||
|
@ -610,6 +610,8 @@ multiclass T2I_ld<bit signed, bits<2> opcod, string opc,
|
||||
let Inst{20} = 1; // load
|
||||
let Inst{11-6} = 0b000000;
|
||||
}
|
||||
|
||||
// FIXME: Is the pci variant actually needed?
|
||||
def pci : T2Ipc <(outs GPR:$dst), (ins i32imm:$addr), iii,
|
||||
opc, ".w\t$dst, $addr",
|
||||
[(set GPR:$dst, (opnode (ARMWrapper tconstpool:$addr)))]> {
|
||||
@ -1172,10 +1174,11 @@ def t2STRD_POST : T2Ii8s4<0, 1, 0, (outs),
|
||||
//
|
||||
// A8.6.117, A8.6.118. Different instructions are generated for #0 and #-0.
|
||||
// The neg_zero operand translates -0 to -1, -1 to -2, ..., etc.
|
||||
multiclass T2Ipl<bit instr, bit write, string opc> {
|
||||
multiclass T2Ipl<bit instr, bit write, bits<2> data_read, string opc> {
|
||||
|
||||
def i12 : T2I<(outs), (ins GPR:$base, i32imm:$imm), IIC_iLoad_i, opc,
|
||||
"\t[$base, $imm]", []> {
|
||||
def i12 : T2Ii12<(outs), (ins t2addrmode_imm12:$addr), IIC_iLoad_i, opc,
|
||||
"\t$addr",
|
||||
[(prefetch t2addrmode_imm12:$addr, imm, (i32 data_read))]> {
|
||||
let Inst{31-25} = 0b1111100;
|
||||
let Inst{24} = instr;
|
||||
let Inst{23} = 1; // U = 1
|
||||
@ -1185,8 +1188,9 @@ multiclass T2Ipl<bit instr, bit write, string opc> {
|
||||
let Inst{15-12} = 0b1111;
|
||||
}
|
||||
|
||||
def i8 : T2I<(outs), (ins GPR:$base, neg_zero:$imm), IIC_iLoad_i, opc,
|
||||
"\t[$base, $imm]", []> {
|
||||
def i8 : T2Ii8<(outs), (ins t2addrmode_imm8:$addr), IIC_iLoad_i, opc,
|
||||
"\t$addr",
|
||||
[(prefetch t2addrmode_imm8:$addr, imm, (i32 data_read))]> {
|
||||
let Inst{31-25} = 0b1111100;
|
||||
let Inst{24} = instr;
|
||||
let Inst{23} = 0; // U = 0
|
||||
@ -1197,9 +1201,23 @@ multiclass T2Ipl<bit instr, bit write, string opc> {
|
||||
let Inst{11-8} = 0b1100;
|
||||
}
|
||||
|
||||
let isCodeGenOnly = 1 in // $base doesn't exist in asmstring?
|
||||
def pci : T2I<(outs), (ins GPR:$base, neg_zero:$imm), IIC_iLoad_i, opc,
|
||||
"\t[pc, $imm]", []> {
|
||||
def s : T2Iso<(outs), (ins t2addrmode_so_reg:$addr), IIC_iLoad_i, opc,
|
||||
"\t$addr",
|
||||
[(prefetch t2addrmode_so_reg:$addr, imm, (i32 data_read))]> {
|
||||
let Inst{31-25} = 0b1111100;
|
||||
let Inst{24} = instr;
|
||||
let Inst{23} = 0; // add = TRUE for T1
|
||||
let Inst{22} = 0;
|
||||
let Inst{21} = write;
|
||||
let Inst{20} = 1;
|
||||
let Inst{15-12} = 0b1111;
|
||||
let Inst{11-6} = 0000000;
|
||||
}
|
||||
|
||||
let isCodeGenOnly = 1 in
|
||||
def pci : T2Ipc<(outs), (ins i32imm:$addr), IIC_iLoad_i, opc,
|
||||
"\t$addr",
|
||||
[]> {
|
||||
let Inst{31-25} = 0b1111100;
|
||||
let Inst{24} = instr;
|
||||
let Inst{23} = ?; // add = (U == 1)
|
||||
@ -1209,36 +1227,11 @@ multiclass T2Ipl<bit instr, bit write, string opc> {
|
||||
let Inst{19-16} = 0b1111; // Rn = 0b1111
|
||||
let Inst{15-12} = 0b1111;
|
||||
}
|
||||
|
||||
def r : T2I<(outs), (ins GPR:$base, GPR:$a), IIC_iLoad_i, opc,
|
||||
"\t[$base, $a]", []> {
|
||||
let Inst{31-25} = 0b1111100;
|
||||
let Inst{24} = instr;
|
||||
let Inst{23} = 0; // add = TRUE for T1
|
||||
let Inst{22} = 0;
|
||||
let Inst{21} = write;
|
||||
let Inst{20} = 1;
|
||||
let Inst{15-12} = 0b1111;
|
||||
let Inst{11-6} = 0000000;
|
||||
let Inst{5-4} = 0b00; // no shift is applied
|
||||
}
|
||||
|
||||
def s : T2I<(outs), (ins GPR:$base, GPR:$a, i32imm:$shamt), IIC_iLoad_i, opc,
|
||||
"\t[$base, $a, lsl $shamt]", []> {
|
||||
let Inst{31-25} = 0b1111100;
|
||||
let Inst{24} = instr;
|
||||
let Inst{23} = 0; // add = TRUE for T1
|
||||
let Inst{22} = 0;
|
||||
let Inst{21} = write;
|
||||
let Inst{20} = 1;
|
||||
let Inst{15-12} = 0b1111;
|
||||
let Inst{11-6} = 0000000;
|
||||
}
|
||||
}
|
||||
|
||||
defm t2PLD : T2Ipl<0, 0, "pld">;
|
||||
defm t2PLDW : T2Ipl<0, 1, "pldw">;
|
||||
defm t2PLI : T2Ipl<1, 0, "pli">;
|
||||
defm t2PLD : T2Ipl<0, 0, 3, "pld">;
|
||||
defm t2PLDW : T2Ipl<0, 1, 2, "pldw">;
|
||||
defm t2PLI : T2Ipl<1, 0, 1, "pli">;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Load / store multiple Instructions.
|
||||
|
@ -355,11 +355,11 @@ static inline bool Thumb2PreloadOpcodeNoPCI(unsigned Opcode) {
|
||||
default:
|
||||
return false;
|
||||
case ARM::t2PLDi12: case ARM::t2PLDi8:
|
||||
case ARM::t2PLDr: case ARM::t2PLDs:
|
||||
case ARM::t2PLDs:
|
||||
case ARM::t2PLDWi12: case ARM::t2PLDWi8:
|
||||
case ARM::t2PLDWr: case ARM::t2PLDWs:
|
||||
case ARM::t2PLDWs:
|
||||
case ARM::t2PLIi12: case ARM::t2PLIi8:
|
||||
case ARM::t2PLIr: case ARM::t2PLIs:
|
||||
case ARM::t2PLIs:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@ -369,13 +369,13 @@ static inline unsigned T2Morph2Preload2PCI(unsigned Opcode) {
|
||||
default:
|
||||
return 0;
|
||||
case ARM::t2PLDi12: case ARM::t2PLDi8:
|
||||
case ARM::t2PLDr: case ARM::t2PLDs:
|
||||
case ARM::t2PLDs:
|
||||
return ARM::t2PLDpci;
|
||||
case ARM::t2PLDWi12: case ARM::t2PLDWi8:
|
||||
case ARM::t2PLDWr: case ARM::t2PLDWs:
|
||||
case ARM::t2PLDWs:
|
||||
return ARM::t2PLDWpci;
|
||||
case ARM::t2PLIi12: case ARM::t2PLIi8:
|
||||
case ARM::t2PLIr: case ARM::t2PLIs:
|
||||
case ARM::t2PLIs:
|
||||
return ARM::t2PLIpci;
|
||||
}
|
||||
}
|
||||
|
@ -1731,11 +1731,11 @@ static inline bool Thumb2PreloadOpcode(unsigned Opcode) {
|
||||
default:
|
||||
return false;
|
||||
case ARM::t2PLDi12: case ARM::t2PLDi8: case ARM::t2PLDpci:
|
||||
case ARM::t2PLDr: case ARM::t2PLDs:
|
||||
case ARM::t2PLDs:
|
||||
case ARM::t2PLDWi12: case ARM::t2PLDWi8: case ARM::t2PLDWpci:
|
||||
case ARM::t2PLDWr: case ARM::t2PLDWs:
|
||||
case ARM::t2PLDWs:
|
||||
case ARM::t2PLIi12: case ARM::t2PLIi8: case ARM::t2PLIpci:
|
||||
case ARM::t2PLIr: case ARM::t2PLIs:
|
||||
case ARM::t2PLIs:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
64
test/CodeGen/ARM/prefetch.ll
Normal file
64
test/CodeGen/ARM/prefetch.ll
Normal file
@ -0,0 +1,64 @@
|
||||
; RUN: llc < %s -march=thumb -mattr=-thumb2 | not grep pld
|
||||
; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s -check-prefix=THUMB2
|
||||
; RUN: llc < %s -march=arm -mattr=+v5te | FileCheck %s -check-prefix=ARM
|
||||
; rdar://8601536
|
||||
|
||||
define void @t1(i8* %ptr) nounwind {
|
||||
entry:
|
||||
; ARM: t1:
|
||||
; ARM: pli [r0]
|
||||
; ARM: pldw [r0]
|
||||
; ARM: pld [r0]
|
||||
|
||||
; THUMB2: t1:
|
||||
; THUMB2: pli [r0]
|
||||
; THUMB2: pldw [r0]
|
||||
; THUMB2: pld [r0]
|
||||
tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 1 )
|
||||
tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 2 )
|
||||
tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 3 )
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @t2(i8* %ptr) nounwind {
|
||||
entry:
|
||||
; ARM: t2:
|
||||
; ARM: pld [r0, #1023]
|
||||
|
||||
; THUMB2: t2:
|
||||
; THUMB2: pld [r0, #1023]
|
||||
%tmp = getelementptr i8* %ptr, i32 1023
|
||||
tail call void @llvm.prefetch( i8* %tmp, i32 0, i32 3 )
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @t3(i32 %base, i32 %offset) nounwind {
|
||||
entry:
|
||||
; ARM: t3:
|
||||
; ARM: pld [r0, r1, lsr #2]
|
||||
|
||||
; THUMB2: t3:
|
||||
; THUMB2: lsrs r1, r1, #2
|
||||
; THUMB2: pld [r0, r1]
|
||||
%tmp1 = lshr i32 %offset, 2
|
||||
%tmp2 = add i32 %base, %tmp1
|
||||
%tmp3 = inttoptr i32 %tmp2 to i8*
|
||||
tail call void @llvm.prefetch( i8* %tmp3, i32 0, i32 3 )
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @t4(i32 %base, i32 %offset) nounwind {
|
||||
entry:
|
||||
; ARM: t4:
|
||||
; ARM: pld [r0, r1, lsl #2]
|
||||
|
||||
; THUMB2: t4:
|
||||
; THUMB2: pld [r0, r1, lsl #2]
|
||||
%tmp1 = shl i32 %offset, 2
|
||||
%tmp2 = add i32 %base, %tmp1
|
||||
%tmp3 = inttoptr i32 %tmp2 to i8*
|
||||
tail call void @llvm.prefetch( i8* %tmp3, i32 0, i32 3 )
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.prefetch(i8*, i32, i32) nounwind
|
Loading…
Reference in New Issue
Block a user