mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-10 05:41:40 +00:00
[X86] Modify the the rdtscp intrinsic to return values instead of taking a pointer argument
Similar to what was recently done for addcarry/subborrow and has been done for rdrand/rdseed for a while. It's better to use two results and an explicit store in IR when the store isn't part of the semantics of the instruction. This allows store->load forwarding to happen in the middle end. Or the store to be removed if its never loaded. Differential Revision: https://reviews.llvm.org/D51803 llvm-svn: 341698
This commit is contained in:
parent
4658a5ccff
commit
b825f20a30
@ -53,8 +53,8 @@ let TargetPrefix = "x86" in {
|
||||
let TargetPrefix = "x86" in {
|
||||
def int_x86_rdtsc : GCCBuiltin<"__builtin_ia32_rdtsc">,
|
||||
Intrinsic<[llvm_i64_ty], [], []>;
|
||||
def int_x86_rdtscp : GCCBuiltin<"__builtin_ia32_rdtscp">,
|
||||
Intrinsic<[llvm_i64_ty], [llvm_ptr_ty], [IntrArgMemOnly]>;
|
||||
def int_x86_rdtscp :
|
||||
Intrinsic<[llvm_i64_ty, llvm_i32_ty], [], []>;
|
||||
}
|
||||
|
||||
// Read Performance-Monitoring Counter.
|
||||
|
@ -395,6 +395,17 @@ static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name,
|
||||
if (Name == "subborrow.u64")
|
||||
return UpgradeADCSBBIntrinsic(F, Intrinsic::x86_subborrow_u64, NewFn);
|
||||
|
||||
if (Name == "rdtscp") {
|
||||
// If this intrinsic has 0 operands, it's the new version.
|
||||
if (F->getFunctionType()->getNumParams() == 0)
|
||||
return false;
|
||||
|
||||
rename(F);
|
||||
NewFn = Intrinsic::getDeclaration(F->getParent(),
|
||||
Intrinsic::x86_rdtscp);
|
||||
return true;
|
||||
}
|
||||
|
||||
// SSE4.1 ptest functions may have an old signature.
|
||||
if (Name.startswith("sse41.ptest")) { // Added in 3.2
|
||||
if (Name.substr(11) == "c")
|
||||
@ -3441,6 +3452,32 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
||||
break;
|
||||
}
|
||||
|
||||
case Intrinsic::x86_rdtscp: {
|
||||
// This used to take 1 arguments. If we have no arguments, it is already
|
||||
// upgraded.
|
||||
if (CI->getNumOperands() == 0)
|
||||
return;
|
||||
|
||||
NewCall = Builder.CreateCall(NewFn);
|
||||
// Extract the second result and store it.
|
||||
Value *Data = Builder.CreateExtractValue(NewCall, 1);
|
||||
// Cast the pointer to the right type.
|
||||
Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(0),
|
||||
llvm::PointerType::getUnqual(Data->getType()));
|
||||
Builder.CreateAlignedStore(Data, Ptr, 1);
|
||||
// Replace the original call result with the first result of the new call.
|
||||
Value *TSC = Builder.CreateExtractValue(NewCall, 0);
|
||||
|
||||
std::string Name = CI->getName();
|
||||
if (!Name.empty()) {
|
||||
CI->setName(Name + ".old");
|
||||
NewCall->setName(Name);
|
||||
}
|
||||
CI->replaceAllUsesWith(TSC);
|
||||
CI->eraseFromParent();
|
||||
return;
|
||||
}
|
||||
|
||||
case Intrinsic::x86_addcarryx_u32:
|
||||
case Intrinsic::x86_addcarryx_u64:
|
||||
case Intrinsic::x86_addcarry_u32:
|
||||
|
@ -21746,39 +21746,39 @@ static void getReadTimeStampCounter(SDNode *N, const SDLoc &DL, unsigned Opcode,
|
||||
}
|
||||
SDValue Chain = HI.getValue(1);
|
||||
|
||||
SDValue TSC;
|
||||
if (Subtarget.is64Bit()) {
|
||||
// The EDX register is loaded with the high-order 32 bits of the MSR, and
|
||||
// the EAX register is loaded with the low-order 32 bits.
|
||||
TSC = DAG.getNode(ISD::SHL, DL, MVT::i64, HI,
|
||||
DAG.getConstant(32, DL, MVT::i8));
|
||||
TSC = DAG.getNode(ISD::OR, DL, MVT::i64, LO, TSC);
|
||||
} else {
|
||||
// Use a buildpair to merge the two 32-bit values into a 64-bit one.
|
||||
TSC = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, { LO, HI });
|
||||
}
|
||||
|
||||
if (Opcode == X86ISD::RDTSCP_DAG) {
|
||||
assert(N->getNumOperands() == 3 && "Unexpected number of operands!");
|
||||
assert(N->getNumOperands() == 2 && "Unexpected number of operands!");
|
||||
|
||||
// Instruction RDTSCP loads the IA32:TSC_AUX_MSR (address C000_0103H) into
|
||||
// the ECX register. Add 'ecx' explicitly to the chain.
|
||||
SDValue ecx = DAG.getCopyFromReg(Chain, DL, X86::ECX, MVT::i32,
|
||||
HI.getValue(2));
|
||||
// Explicitly store the content of ECX at the location passed in input
|
||||
// to the 'rdtscp' intrinsic.
|
||||
Chain = DAG.getStore(ecx.getValue(1), DL, ecx, N->getOperand(2),
|
||||
MachinePointerInfo());
|
||||
}
|
||||
|
||||
if (Subtarget.is64Bit()) {
|
||||
// The EDX register is loaded with the high-order 32 bits of the MSR, and
|
||||
// the EAX register is loaded with the low-order 32 bits.
|
||||
SDValue Tmp = DAG.getNode(ISD::SHL, DL, MVT::i64, HI,
|
||||
DAG.getConstant(32, DL, MVT::i8));
|
||||
Results.push_back(DAG.getNode(ISD::OR, DL, MVT::i64, LO, Tmp));
|
||||
Results.push_back(Chain);
|
||||
Results.push_back(TSC);
|
||||
Results.push_back(ecx);
|
||||
Results.push_back(ecx.getValue(1));
|
||||
return;
|
||||
}
|
||||
|
||||
// Use a buildpair to merge the two 32-bit values into a 64-bit one.
|
||||
SDValue Ops[] = { LO, HI };
|
||||
SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops);
|
||||
Results.push_back(Pair);
|
||||
Results.push_back(TSC);
|
||||
Results.push_back(Chain);
|
||||
}
|
||||
|
||||
static SDValue LowerREADCYCLECOUNTER(SDValue Op, const X86Subtarget &Subtarget,
|
||||
SelectionDAG &DAG) {
|
||||
SmallVector<SDValue, 2> Results;
|
||||
SmallVector<SDValue, 3> Results;
|
||||
SDLoc DL(Op);
|
||||
getReadTimeStampCounter(Op.getNode(), DL, X86ISD::RDTSC_DAG, DAG, Subtarget,
|
||||
Results);
|
||||
|
31
test/CodeGen/X86/rdtsc-upgrade.ll
Normal file
31
test/CodeGen/X86/rdtsc-upgrade.ll
Normal file
@ -0,0 +1,31 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=i686-unknown-unknown -mcpu=generic | FileCheck %s --check-prefix=X86
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=generic | FileCheck %s --check-prefix=X64
|
||||
|
||||
; Verify upgrading of the old form of the rdtscp intrinsic.
|
||||
|
||||
define i64 @test_builtin_rdtscp(i8* %A) {
|
||||
; X86-LABEL: test_builtin_rdtscp:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: pushl %esi
|
||||
; X86-NEXT: .cfi_def_cfa_offset 8
|
||||
; X86-NEXT: .cfi_offset %esi, -8
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; X86-NEXT: rdtscp
|
||||
; X86-NEXT: movl %ecx, (%esi)
|
||||
; X86-NEXT: popl %esi
|
||||
; X86-NEXT: .cfi_def_cfa_offset 4
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_builtin_rdtscp:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: rdtscp
|
||||
; X64-NEXT: shlq $32, %rdx
|
||||
; X64-NEXT: orq %rdx, %rax
|
||||
; X64-NEXT: movl %ecx, (%rdi)
|
||||
; X64-NEXT: retq
|
||||
%1 = tail call i64 @llvm.x86.rdtscp(i8* %A)
|
||||
ret i64 %1
|
||||
}
|
||||
|
||||
declare i64 @llvm.x86.rdtscp(i8*)
|
@ -56,15 +56,19 @@ define i64 @test_builtin_rdtscp(i8* %A) {
|
||||
; X64-LABEL: test_builtin_rdtscp:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: rdtscp
|
||||
; X64-NEXT: movl %ecx, (%rdi)
|
||||
; X64-NEXT: shlq $32, %rdx
|
||||
; X64-NEXT: orq %rdx, %rax
|
||||
; X64-NEXT: movl %ecx, (%rdi)
|
||||
; X64-NEXT: retq
|
||||
%1 = tail call i64 @llvm.x86.rdtscp(i8* %A)
|
||||
ret i64 %1
|
||||
%1 = call { i64, i32 } @llvm.x86.rdtscp()
|
||||
%2 = extractvalue { i64, i32 } %1, 1
|
||||
%3 = bitcast i8* %A to i32*
|
||||
store i32 %2, i32* %3, align 1
|
||||
%4 = extractvalue { i64, i32 } %1, 0
|
||||
ret i64 %4
|
||||
}
|
||||
|
||||
declare i64 @llvm.readcyclecounter()
|
||||
declare i64 @llvm.x86.rdtscp(i8*)
|
||||
declare { i64, i32 } @llvm.x86.rdtscp()
|
||||
declare i64 @llvm.x86.rdtsc()
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user