diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 023e539250c..81eeead911d 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -4611,6 +4611,20 @@ bool X86TargetLowering::isCheapToSpeculateCtlz() const { return Subtarget.hasLZCNT(); } +bool X86TargetLowering::canMergeStoresTo(unsigned AddressSpace, EVT MemVT, + const SelectionDAG &DAG) const { + // Do not merge to float value size (128 bytes) if no implicit + // float attribute is set. + bool NoFloat = DAG.getMachineFunction().getFunction()->hasFnAttribute( + Attribute::NoImplicitFloat); + + if (NoFloat) { + unsigned MaxIntSize = Subtarget.is64Bit() ? 64 : 32; + return (MemVT.getSizeInBits() <= MaxIntSize); + } + return true; +} + bool X86TargetLowering::isCtlzFast() const { return Subtarget.hasFastLZCNT(); } diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 53cd8ca5361..663b9532338 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -814,6 +814,9 @@ namespace llvm { bool mergeStoresAfterLegalization() const override { return true; } + bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT, + const SelectionDAG &DAG) const override; + bool isCheapToSpeculateCttz() const override; bool isCheapToSpeculateCtlz() const override; diff --git a/test/CodeGen/X86/pr34421.ll b/test/CodeGen/X86/pr34421.ll new file mode 100644 index 00000000000..5db8b4c601e --- /dev/null +++ b/test/CodeGen/X86/pr34421.ll @@ -0,0 +1,40 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-apple-macosx10.13.0 | FileCheck %s --check-prefix=X86 +; RUN: llc < %s -mtriple=x86_64-apple-macosx10.13.0 | FileCheck %s --check-prefix=X64 + +define void @thread_selfcounts() noimplicitfloat noredzone nounwind { +; X86-LABEL: thread_selfcounts: +; X86: ## BB#0: ## %entry +; X86-NEXT: subl $44, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, (%esp) +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: ## -- End function +; +; X64-LABEL: thread_selfcounts: +; X64: ## BB#0: ## %entry +; X64-NEXT: subq $40, %rsp +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx +; X64-NEXT: movq %rax, (%rsp) +; X64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) +; X64-NEXT: ## -- End function +entry: + %counts = alloca [2 x i64], align 16 + %thread_counts = alloca [3 x i64], align 16 + %arraydecay = getelementptr inbounds [3 x i64], [3 x i64]* %thread_counts, i64 0, i64 0 + %0 = load i64, i64* %arraydecay, align 16 + %arrayidx3 = getelementptr inbounds [2 x i64], [2 x i64]* %counts, i64 0, i64 0 + store i64 %0, i64* %arrayidx3, align 16 + %arrayidx6 = getelementptr inbounds [3 x i64], [3 x i64]* %thread_counts, i64 0, i64 1 + %1 = load i64, i64* %arrayidx6, align 8 + %arrayidx10 = getelementptr inbounds [2 x i64], [2 x i64]* %counts, i64 0, i64 1 + store i64 %1, i64* %arrayidx10, align 8 + unreachable +} +