mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-12-11 21:45:16 +00:00
[X86] Limit store merge size when implicitfloat is enabled (PR34421)
As suggested by @niravd : https://bugs.llvm.org/show_bug.cgi?id=34421#c2 Differential Revision: https://reviews.llvm.org/D37464 llvm-svn: 312534
This commit is contained in:
parent
cf9a358350
commit
9957f716de
@ -4611,6 +4611,20 @@ bool X86TargetLowering::isCheapToSpeculateCtlz() const {
|
||||
return Subtarget.hasLZCNT();
|
||||
}
|
||||
|
||||
bool X86TargetLowering::canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
|
||||
const SelectionDAG &DAG) const {
|
||||
// Do not merge to float value size (128 bytes) if no implicit
|
||||
// float attribute is set.
|
||||
bool NoFloat = DAG.getMachineFunction().getFunction()->hasFnAttribute(
|
||||
Attribute::NoImplicitFloat);
|
||||
|
||||
if (NoFloat) {
|
||||
unsigned MaxIntSize = Subtarget.is64Bit() ? 64 : 32;
|
||||
return (MemVT.getSizeInBits() <= MaxIntSize);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool X86TargetLowering::isCtlzFast() const {
|
||||
return Subtarget.hasFastLZCNT();
|
||||
}
|
||||
|
@ -814,6 +814,9 @@ namespace llvm {
|
||||
|
||||
bool mergeStoresAfterLegalization() const override { return true; }
|
||||
|
||||
bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
|
||||
const SelectionDAG &DAG) const override;
|
||||
|
||||
bool isCheapToSpeculateCttz() const override;
|
||||
|
||||
bool isCheapToSpeculateCtlz() const override;
|
||||
|
40
test/CodeGen/X86/pr34421.ll
Normal file
40
test/CodeGen/X86/pr34421.ll
Normal file
@ -0,0 +1,40 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=i686-apple-macosx10.13.0 | FileCheck %s --check-prefix=X86
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-macosx10.13.0 | FileCheck %s --check-prefix=X64
|
||||
|
||||
define void @thread_selfcounts() noimplicitfloat noredzone nounwind {
|
||||
; X86-LABEL: thread_selfcounts:
|
||||
; X86: ## BB#0: ## %entry
|
||||
; X86-NEXT: subl $44, %esp
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movl %eax, (%esp)
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: ## -- End function
|
||||
;
|
||||
; X64-LABEL: thread_selfcounts:
|
||||
; X64: ## BB#0: ## %entry
|
||||
; X64-NEXT: subq $40, %rsp
|
||||
; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax
|
||||
; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx
|
||||
; X64-NEXT: movq %rax, (%rsp)
|
||||
; X64-NEXT: movq %rcx, {{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: ## -- End function
|
||||
entry:
|
||||
%counts = alloca [2 x i64], align 16
|
||||
%thread_counts = alloca [3 x i64], align 16
|
||||
%arraydecay = getelementptr inbounds [3 x i64], [3 x i64]* %thread_counts, i64 0, i64 0
|
||||
%0 = load i64, i64* %arraydecay, align 16
|
||||
%arrayidx3 = getelementptr inbounds [2 x i64], [2 x i64]* %counts, i64 0, i64 0
|
||||
store i64 %0, i64* %arrayidx3, align 16
|
||||
%arrayidx6 = getelementptr inbounds [3 x i64], [3 x i64]* %thread_counts, i64 0, i64 1
|
||||
%1 = load i64, i64* %arrayidx6, align 8
|
||||
%arrayidx10 = getelementptr inbounds [2 x i64], [2 x i64]* %counts, i64 0, i64 1
|
||||
store i64 %1, i64* %arrayidx10, align 8
|
||||
unreachable
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user