diff --git a/docs/LangRef.html b/docs/LangRef.html
index a024efb0993..74678c7be8e 100644
--- a/docs/LangRef.html
+++ b/docs/LangRef.html
@@ -6064,7 +6064,7 @@ LLVM.
Syntax:
- declare void @llvm.prefetch(i8* <address>, i32 <rw>, i32 <locality>)
+ declare void @llvm.prefetch(i8* <address>, i32 <rw>, i32 <locality>, i32 <cache type>)
Overview:
@@ -6077,8 +6077,10 @@ LLVM.
address is the address to be prefetched, rw is the
specifier determining if the fetch should be for a read (0) or write (1),
and locality is a temporal locality specifier ranging from (0) - no
- locality, to (3) - extremely local keep in cache. The rw
- and locality arguments must be constant integers.
+ locality, to (3) - extremely local keep in cache. The cache type
+ specifies whether the prefetch is performed on the data (1) or instruction (0)
+ cache. The rw, locality and cache type arguments
+ must be constant integers.
Semantics:
This intrinsic does not modify the behavior of the program. In particular,
diff --git a/include/llvm/CodeGen/ISDOpcodes.h b/include/llvm/CodeGen/ISDOpcodes.h
index e765cadf4dc..498614e7fd5 100644
--- a/include/llvm/CodeGen/ISDOpcodes.h
+++ b/include/llvm/CodeGen/ISDOpcodes.h
@@ -580,7 +580,8 @@ namespace ISD {
// PREFETCH - This corresponds to a prefetch intrinsic. It takes chains are
// their first operand. The other operands are the address to prefetch,
- // read / write specifier, and locality specifier.
+ // read / write specifier, locality specifier and instruction / data cache
+ // specifier.
PREFETCH,
// OUTCHAIN = MEMBARRIER(INCHAIN, load-load, load-store, store-load,
diff --git a/include/llvm/Intrinsics.td b/include/llvm/Intrinsics.td
index d8f249a2c0b..c1fbce4e054 100644
--- a/include/llvm/Intrinsics.td
+++ b/include/llvm/Intrinsics.td
@@ -211,7 +211,8 @@ def int_stackrestore : Intrinsic<[], [llvm_ptr_ty]>,
// however it does conveniently prevent the prefetch from being reordered
// with respect to nearby accesses to the same memory.
def int_prefetch : Intrinsic<[],
- [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty],
+ [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty,
+ llvm_i32_ty],
[IntrReadWriteArgMem, NoCapture<0>]>;
def int_pcmarker : Intrinsic<[], [llvm_i32_ty]>;
diff --git a/include/llvm/Target/TargetSelectionDAG.td b/include/llvm/Target/TargetSelectionDAG.td
index 672117f1447..285b8b1abda 100644
--- a/include/llvm/Target/TargetSelectionDAG.td
+++ b/include/llvm/Target/TargetSelectionDAG.td
@@ -197,8 +197,8 @@ def SDTSubVecInsert : SDTypeProfile<1, 3, [ // subvector insert
SDTCisSubVecOfVec<2, 1>, SDTCisSameAs<0,1>, SDTCisInt<3>
]>;
-def SDTPrefetch : SDTypeProfile<0, 3, [ // prefetch
- SDTCisPtrTy<0>, SDTCisSameAs<1, 2>, SDTCisInt<1>
+def SDTPrefetch : SDTypeProfile<0, 4, [ // prefetch
+ SDTCisPtrTy<0>, SDTCisSameAs<1, 2>, SDTCisSameAs<1, 3>, SDTCisInt<1>
]>;
def SDTMemBarrier : SDTypeProfile<0, 5, [ // memory barier
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 3aff0ad5195..8a08fd7e300 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -4788,15 +4788,16 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return implVisitAluOverflow(I, ISD::SMULO);
case Intrinsic::prefetch: {
- SDValue Ops[4];
+ SDValue Ops[5];
unsigned rw = cast(I.getArgOperand(1))->getZExtValue();
Ops[0] = getRoot();
Ops[1] = getValue(I.getArgOperand(0));
Ops[2] = getValue(I.getArgOperand(1));
Ops[3] = getValue(I.getArgOperand(2));
+ Ops[4] = getValue(I.getArgOperand(3));
DAG.setRoot(DAG.getMemIntrinsicNode(ISD::PREFETCH, dl,
DAG.getVTList(MVT::Other),
- &Ops[0], 4,
+ &Ops[0], 5,
EVT::getIntegerVT(*Context, 8),
MachinePointerInfo(I.getArgOperand(0)),
0, /* align */
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 339c85886ae..b012b501f92 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -2281,12 +2281,13 @@ static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG,
// ARMv7 with MP extension has PLDW.
return Op.getOperand(0);
- if (Subtarget->isThumb())
+ unsigned isData = cast(Op.getOperand(4))->getZExtValue();
+ if (Subtarget->isThumb()) {
// Invert the bits.
isRead = ~isRead & 1;
- unsigned isData = Subtarget->isThumb() ? 0 : 1;
+ isData = ~isData & 1;
+ }
- // Currently there is no intrinsic that matches pli.
return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
Op.getOperand(1), DAG.getConstant(isRead, MVT::i32),
DAG.getConstant(isData, MVT::i32));
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 3b1f8464a7a..fbddd126482 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -62,6 +62,9 @@ def SDT_ARMEH_SJLJ_DispatchSetup: SDTypeProfile<0, 1, [SDTCisInt<0>]>;
def SDT_ARMMEMBARRIER : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+def SDT_ARMPREFETCH : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisSameAs<1, 2>,
+ SDTCisInt<1>]>;
+
def SDT_ARMTCRET : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
def SDT_ARMBFI : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
@@ -130,7 +133,7 @@ def ARMMemBarrier : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIER,
[SDNPHasChain]>;
def ARMMemBarrierMCR : SDNode<"ARMISD::MEMBARRIER_MCR", SDT_ARMMEMBARRIER,
[SDNPHasChain]>;
-def ARMPreload : SDNode<"ARMISD::PRELOAD", SDTPrefetch,
+def ARMPreload : SDNode<"ARMISD::PRELOAD", SDT_ARMPREFETCH,
[SDNPHasChain, SDNPMayLoad, SDNPMayStore]>;
def ARMrbit : SDNode<"ARMISD::RBIT", SDTIntUnaryOp>;
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index b64c03a9b59..a38e3721f35 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -2006,13 +2006,13 @@ def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
// Prefetch intrinsic.
def PREFETCHT0 : PSI<0x18, MRM1m, (outs), (ins i8mem:$src),
- "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3))]>;
+ "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))]>;
def PREFETCHT1 : PSI<0x18, MRM2m, (outs), (ins i8mem:$src),
- "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2))]>;
+ "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2), (i32 1))]>;
def PREFETCHT2 : PSI<0x18, MRM3m, (outs), (ins i8mem:$src),
- "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1))]>;
+ "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1), (i32 1))]>;
def PREFETCHNTA : PSI<0x18, MRM0m, (outs), (ins i8mem:$src),
- "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0))]>;
+ "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0), (i32 1))]>;
// Load, store, and memory fence
def SFENCE : I<0xAE, MRM_F8, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>,
diff --git a/lib/VMCore/AutoUpgrade.cpp b/lib/VMCore/AutoUpgrade.cpp
index f8f15caec91..9d4543ded48 100644
--- a/lib/VMCore/AutoUpgrade.cpp
+++ b/lib/VMCore/AutoUpgrade.cpp
@@ -284,6 +284,30 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
break;
}
+ // This upgrades the llvm.prefetch intrinsic to accept one more parameter,
+ // which is a instruction / data cache identifier. The old version only
+ // implicitly accepted the data version.
+ if (Name.compare(5,8,"prefetch",8) == 0) {
+ // Don't do anything if it has the correct number of arguments already
+ if (FTy->getNumParams() == 4)
+ break;
+
+ assert(FTy->getNumParams() == 3 && "old prefetch takes 3 args!");
+ // We first need to change the name of the old (bad) intrinsic, because
+ // its type is incorrect, but we cannot overload that name. We
+ // arbitrarily unique it here allowing us to construct a correctly named
+ // and typed function below.
+ F->setName("");
+ NewFn = cast(M->getOrInsertFunction(Name,
+ FTy->getReturnType(),
+ FTy->getParamType(0),
+ FTy->getParamType(1),
+ FTy->getParamType(2),
+ FTy->getParamType(2),
+ (Type*)0));
+ return true;
+ }
+
break;
case 'x':
// This fixes the poorly named crc32 intrinsics
@@ -1344,6 +1368,29 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
CI->eraseFromParent();
break;
}
+ case Intrinsic::prefetch: {
+ IRBuilder<> Builder(C);
+ Builder.SetInsertPoint(CI->getParent(), CI);
+ const llvm::Type *I32Ty = llvm::Type::getInt32Ty(CI->getContext());
+
+ // Add the extra "data cache" argument
+ Value *Operands[4] = { CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2),
+ llvm::ConstantInt::get(I32Ty, 1) };
+ CallInst *NewCI = CallInst::Create(NewFn, Operands, Operands+4,
+ CI->getName(), CI);
+ NewCI->setTailCall(CI->isTailCall());
+ NewCI->setCallingConv(CI->getCallingConv());
+ // Handle any uses of the old CallInst.
+ if (!CI->use_empty())
+ // Replace all uses of the old call with the new cast which has the
+ // correct type.
+ CI->replaceAllUsesWith(NewCI);
+
+ // Clean up the old call now that it has been completely upgraded.
+ CI->eraseFromParent();
+ break;
+ }
}
}
diff --git a/test/Assembler/AutoUpgradeIntrinsics.ll b/test/Assembler/AutoUpgradeIntrinsics.ll
index 417493f7168..20beb495667 100644
--- a/test/Assembler/AutoUpgradeIntrinsics.ll
+++ b/test/Assembler/AutoUpgradeIntrinsics.ll
@@ -109,3 +109,11 @@ define void @f(<4 x float> %A, i8* %B, <2 x double> %C, i32 %D) {
call void @llvm.x86.sse2.movnt.i(i8* %B, i32 %D)
ret void
}
+
+declare void @llvm.prefetch(i8*, i32, i32) nounwind
+
+define void @p(i8* %ptr) {
+; CHECK: llvm.prefetch(i8* %ptr, i32 0, i32 1, i32 1)
+ tail call void @llvm.prefetch(i8* %ptr, i32 0, i32 1)
+ ret void
+}
diff --git a/test/CodeGen/ARM/prefetch.ll b/test/CodeGen/ARM/prefetch.ll
index 95f082aa938..03a6947699a 100644
--- a/test/CodeGen/ARM/prefetch.ll
+++ b/test/CodeGen/ARM/prefetch.ll
@@ -17,8 +17,8 @@ entry:
; THUMB2: t1:
; THUMB2-NOT: pldw [r0]
; THUMB2: pld [r0]
- tail call void @llvm.prefetch( i8* %ptr, i32 1, i32 3 )
- tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 3 )
+ tail call void @llvm.prefetch( i8* %ptr, i32 1, i32 3, i32 1 )
+ tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 3, i32 1 )
ret void
}
@@ -30,7 +30,7 @@ entry:
; THUMB2: t2:
; THUMB2: pld [r0, #1023]
%tmp = getelementptr i8* %ptr, i32 1023
- tail call void @llvm.prefetch( i8* %tmp, i32 0, i32 3 )
+ tail call void @llvm.prefetch( i8* %tmp, i32 0, i32 3, i32 1 )
ret void
}
@@ -45,7 +45,7 @@ entry:
%tmp1 = lshr i32 %offset, 2
%tmp2 = add i32 %base, %tmp1
%tmp3 = inttoptr i32 %tmp2 to i8*
- tail call void @llvm.prefetch( i8* %tmp3, i32 0, i32 3 )
+ tail call void @llvm.prefetch( i8* %tmp3, i32 0, i32 3, i32 1 )
ret void
}
@@ -59,8 +59,8 @@ entry:
%tmp1 = shl i32 %offset, 2
%tmp2 = add i32 %base, %tmp1
%tmp3 = inttoptr i32 %tmp2 to i8*
- tail call void @llvm.prefetch( i8* %tmp3, i32 0, i32 3 )
+ tail call void @llvm.prefetch( i8* %tmp3, i32 0, i32 3, i32 1 )
ret void
}
-declare void @llvm.prefetch(i8*, i32, i32) nounwind
+declare void @llvm.prefetch(i8*, i32, i32, i32) nounwind
diff --git a/test/CodeGen/X86/prefetch.ll b/test/CodeGen/X86/prefetch.ll
index 48d2673e488..ebe11a5e8e4 100644
--- a/test/CodeGen/X86/prefetch.ll
+++ b/test/CodeGen/X86/prefetch.ll
@@ -6,11 +6,11 @@ entry:
; CHECK: prefetcht1
; CHECK: prefetcht0
; CHECK: prefetchnta
- tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 1 )
- tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 2 )
- tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 3 )
- tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 0 )
+ tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 1, i32 1 )
+ tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 2, i32 1 )
+ tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 3, i32 1 )
+ tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 0, i32 1 )
ret void
}
-declare void @llvm.prefetch(i8*, i32, i32) nounwind
+declare void @llvm.prefetch(i8*, i32, i32, i32) nounwind