mirror of
https://github.com/RPCSX/llvm.git
synced 2025-02-03 02:53:06 +00:00
[X86][SSE4A] Autoupgrade and remove MOVNTSD/MOVNTSS intrinsics
Required better annotation of the instruction defs upon removal of the builtin intrinsic pattern. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@273077 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
e56626f946
commit
e2e7d46a44
@ -941,11 +941,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
llvm_i8_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_sse4a_insertq : GCCBuiltin<"__builtin_ia32_insertq">,
|
||||
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
|
||||
|
||||
def int_x86_sse4a_movnt_ss :
|
||||
Intrinsic<[], [llvm_ptr_ty, llvm_v4f32_ty], []>;
|
||||
def int_x86_sse4a_movnt_sd :
|
||||
Intrinsic<[], [llvm_ptr_ty, llvm_v2f64_ty], []>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -209,6 +209,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
|
||||
Name == "x86.avx2.vinserti128" ||
|
||||
Name.startswith("x86.avx.vextractf128.") ||
|
||||
Name == "x86.avx2.vextracti128" ||
|
||||
Name.startswith("x86.sse4a.movnt.") ||
|
||||
Name.startswith("x86.avx.movnt.") ||
|
||||
Name == "x86.sse2.storel.dq" ||
|
||||
Name.startswith("x86.sse.storeu.") ||
|
||||
@ -616,6 +617,30 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
||||
Value *Src = CI->getArgOperand(0);
|
||||
VectorType *DstTy = cast<VectorType>(CI->getType());
|
||||
Rep = Builder.CreateFPToSI(Src, DstTy, "cvtt");
|
||||
} else if (Name.startswith("llvm.x86.sse4a.movnt.")) {
|
||||
Module *M = F->getParent();
|
||||
SmallVector<Metadata *, 1> Elts;
|
||||
Elts.push_back(
|
||||
ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
|
||||
MDNode *Node = MDNode::get(C, Elts);
|
||||
|
||||
Value *Arg0 = CI->getArgOperand(0);
|
||||
Value *Arg1 = CI->getArgOperand(1);
|
||||
|
||||
// Nontemporal (unaligned) store of the 0'th element of the float/double
|
||||
// vector.
|
||||
Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType();
|
||||
PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy);
|
||||
Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast");
|
||||
Value *Extract =
|
||||
Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
|
||||
|
||||
StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, 1);
|
||||
SI->setMetadata(M->getMDKindID("nontemporal"), Node);
|
||||
|
||||
// Remove intrinsic.
|
||||
CI->eraseFromParent();
|
||||
return;
|
||||
} else if (Name.startswith("llvm.x86.avx.movnt.")) {
|
||||
Module *M = F->getParent();
|
||||
SmallVector<Metadata *, 1> Elts;
|
||||
|
@ -7776,13 +7776,13 @@ def INSERTQ : I<0x79, MRMSrcReg, (outs VR128:$dst),
|
||||
|
||||
// Non-temporal (unaligned) scalar stores.
|
||||
let AddedComplexity = 400 in { // Prefer non-temporal versions
|
||||
let mayStore = 1, SchedRW = [WriteStore] in {
|
||||
def MOVNTSS : I<0x2B, MRMDestMem, (outs), (ins f32mem:$dst, VR128:$src),
|
||||
"movntss\t{$src, $dst|$dst, $src}",
|
||||
[(int_x86_sse4a_movnt_ss addr:$dst, VR128:$src)]>, XS;
|
||||
"movntss\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVNT>, XS;
|
||||
|
||||
def MOVNTSD : I<0x2B, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
|
||||
"movntsd\t{$src, $dst|$dst, $src}",
|
||||
[(int_x86_sse4a_movnt_sd addr:$dst, VR128:$src)]>, XD;
|
||||
"movntsd\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVNT>, XD;
|
||||
} // SchedRW
|
||||
|
||||
def : Pat<(nontemporalstore FR32:$src, addr:$dst),
|
||||
(MOVNTSS addr:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
|
||||
|
39
test/CodeGen/X86/sse4a-upgrade.ll
Normal file
39
test/CodeGen/X86/sse4a-upgrade.ll
Normal file
@ -0,0 +1,39 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+sse4a | FileCheck %s --check-prefix=X32
|
||||
; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+sse4a,+avx | FileCheck %s --check-prefix=X32
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4a | FileCheck %s --check-prefix=X64
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4a,+avx | FileCheck %s --check-prefix=X64
|
||||
|
||||
define void @test_movntss(i8* %p, <4 x float> %a) nounwind optsize ssp {
|
||||
; X32-LABEL: test_movntss:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: movntss %xmm0, (%eax)
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_movntss:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: movntss %xmm0, (%rdi)
|
||||
; X64-NEXT: retq
|
||||
tail call void @llvm.x86.sse4a.movnt.ss(i8* %p, <4 x float> %a) nounwind
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.x86.sse4a.movnt.ss(i8*, <4 x float>)
|
||||
|
||||
define void @test_movntsd(i8* %p, <2 x double> %a) nounwind optsize ssp {
|
||||
; X32-LABEL: test_movntsd:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: movntsd %xmm0, (%eax)
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_movntsd:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: movntsd %xmm0, (%rdi)
|
||||
; X64-NEXT: retq
|
||||
tail call void @llvm.x86.sse4a.movnt.sd(i8* %p, <2 x double> %a) nounwind
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.x86.sse4a.movnt.sd(i8*, <2 x double>)
|
@ -4,40 +4,6 @@
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4a | FileCheck %s --check-prefix=X64
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4a,+avx | FileCheck %s --check-prefix=X64
|
||||
|
||||
define void @test_movntss(i8* %p, <4 x float> %a) nounwind optsize ssp {
|
||||
; X32-LABEL: test_movntss:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: movntss %xmm0, (%eax)
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_movntss:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: movntss %xmm0, (%rdi)
|
||||
; X64-NEXT: retq
|
||||
tail call void @llvm.x86.sse4a.movnt.ss(i8* %p, <4 x float> %a) nounwind
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.x86.sse4a.movnt.ss(i8*, <4 x float>)
|
||||
|
||||
define void @test_movntsd(i8* %p, <2 x double> %a) nounwind optsize ssp {
|
||||
; X32-LABEL: test_movntsd:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: movntsd %xmm0, (%eax)
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_movntsd:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: movntsd %xmm0, (%rdi)
|
||||
; X64-NEXT: retq
|
||||
tail call void @llvm.x86.sse4a.movnt.sd(i8* %p, <2 x double> %a) nounwind
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.x86.sse4a.movnt.sd(i8*, <2 x double>)
|
||||
|
||||
define <2 x i64> @test_extrqi(<2 x i64> %x) nounwind uwtable ssp {
|
||||
; X32-LABEL: test_extrqi:
|
||||
; X32: # BB#0:
|
||||
|
Loading…
x
Reference in New Issue
Block a user