[X86] Limit store merge size when implicitfloat is enabled (PR34421)

As suggested by @niravd : https://bugs.llvm.org/show_bug.cgi?id=34421#c2

Differential Revision: https://reviews.llvm.org/D37464

llvm-svn: 312534
This commit is contained in:
Simon Pilgrim 2017-09-05 13:40:29 +00:00
parent cf9a358350
commit 9957f716de
3 changed files with 57 additions and 0 deletions

View File

@ -4611,6 +4611,20 @@ bool X86TargetLowering::isCheapToSpeculateCtlz() const {
return Subtarget.hasLZCNT();
}
bool X86TargetLowering::canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
const SelectionDAG &DAG) const {
// Do not merge to float value size (128 bytes) if no implicit
// float attribute is set.
bool NoFloat = DAG.getMachineFunction().getFunction()->hasFnAttribute(
Attribute::NoImplicitFloat);
if (NoFloat) {
unsigned MaxIntSize = Subtarget.is64Bit() ? 64 : 32;
return (MemVT.getSizeInBits() <= MaxIntSize);
}
return true;
}
bool X86TargetLowering::isCtlzFast() const {
return Subtarget.hasFastLZCNT();
}

View File

@ -814,6 +814,9 @@ namespace llvm {
bool mergeStoresAfterLegalization() const override { return true; }
bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
const SelectionDAG &DAG) const override;
bool isCheapToSpeculateCttz() const override;
bool isCheapToSpeculateCtlz() const override;

View File

@ -0,0 +1,40 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-apple-macosx10.13.0 | FileCheck %s --check-prefix=X86
; RUN: llc < %s -mtriple=x86_64-apple-macosx10.13.0 | FileCheck %s --check-prefix=X64
define void @thread_selfcounts() noimplicitfloat noredzone nounwind {
; X86-LABEL: thread_selfcounts:
; X86: ## BB#0: ## %entry
; X86-NEXT: subl $44, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: ## -- End function
;
; X64-LABEL: thread_selfcounts:
; X64: ## BB#0: ## %entry
; X64-NEXT: subq $40, %rsp
; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax
; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx
; X64-NEXT: movq %rax, (%rsp)
; X64-NEXT: movq %rcx, {{[0-9]+}}(%rsp)
; X64-NEXT: ## -- End function
entry:
%counts = alloca [2 x i64], align 16
%thread_counts = alloca [3 x i64], align 16
%arraydecay = getelementptr inbounds [3 x i64], [3 x i64]* %thread_counts, i64 0, i64 0
%0 = load i64, i64* %arraydecay, align 16
%arrayidx3 = getelementptr inbounds [2 x i64], [2 x i64]* %counts, i64 0, i64 0
store i64 %0, i64* %arrayidx3, align 16
%arrayidx6 = getelementptr inbounds [3 x i64], [3 x i64]* %thread_counts, i64 0, i64 1
%1 = load i64, i64* %arrayidx6, align 8
%arrayidx10 = getelementptr inbounds [2 x i64], [2 x i64]* %counts, i64 0, i64 1
store i64 %1, i64* %arrayidx10, align 8
unreachable
}