mirror of
https://github.com/RPCS3/llvm.git
synced 2024-12-14 23:48:56 +00:00
[X86][DAG] Switch X86 Target to post-legalized store merge
Move store merge to happen after intrinsic lowering to allow lowered stores to be merged. Some regressions due in MergeConsecutiveStores to missing insert_subvector that are addressed in follow up patch. Reviewers: craig.topper, efriedma, RKSimon Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D34559 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@310710 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
0fdbc978d7
commit
b872fbbb3d
@ -2723,6 +2723,9 @@ public:
|
||||
bool foldBooleans, DAGCombinerInfo &DCI,
|
||||
const SDLoc &dl) const;
|
||||
|
||||
// For targets which wrap address, unwrap for analysis.
|
||||
virtual SDValue unwrapAddress(SDValue N) const { return N; }
|
||||
|
||||
/// Returns true (and the GlobalValue and the offset) if the node is a
|
||||
/// GlobalAddress + offset.
|
||||
virtual bool
|
||||
|
@ -14,6 +14,7 @@
|
||||
#include "llvm/CodeGen/MachineFrameInfo.h"
|
||||
#include "llvm/CodeGen/SelectionDAG.h"
|
||||
#include "llvm/CodeGen/SelectionDAGNodes.h"
|
||||
#include "llvm/Target/TargetLowering.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
@ -55,7 +56,7 @@ bool BaseIndexOffset::equalBaseIndex(BaseIndexOffset &Other,
|
||||
/// Parses tree in Ptr for base, index, offset addresses.
|
||||
BaseIndexOffset BaseIndexOffset::match(SDValue Ptr, const SelectionDAG &DAG) {
|
||||
// (((B + I*M) + c)) + c ...
|
||||
SDValue Base = Ptr;
|
||||
SDValue Base = DAG.getTargetLoweringInfo().unwrapAddress(Ptr);
|
||||
SDValue Index = SDValue();
|
||||
int64_t Offset = 0;
|
||||
bool IsIndexSignExt = false;
|
||||
|
@ -27034,6 +27034,12 @@ unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode(
|
||||
return 1;
|
||||
}
|
||||
|
||||
SDValue X86TargetLowering::unwrapAddress(SDValue N) const {
|
||||
if (N->getOpcode() == X86ISD::Wrapper || N->getOpcode() == X86ISD::WrapperRIP)
|
||||
return N->getOperand(0);
|
||||
return N;
|
||||
}
|
||||
|
||||
/// Returns true (and the GlobalValue and the offset) if the node is a
|
||||
/// GlobalAddress + offset.
|
||||
bool X86TargetLowering::isGAPlusOffset(SDNode *N,
|
||||
|
@ -812,6 +812,8 @@ namespace llvm {
|
||||
/// This method returns the name of a target specific DAG node.
|
||||
const char *getTargetNodeName(unsigned Opcode) const override;
|
||||
|
||||
bool mergeStoresAfterLegalization() const override { return true; }
|
||||
|
||||
bool isCheapToSpeculateCttz() const override;
|
||||
|
||||
bool isCheapToSpeculateCtlz() const override;
|
||||
@ -867,6 +869,8 @@ namespace llvm {
|
||||
const SelectionDAG &DAG,
|
||||
unsigned Depth) const override;
|
||||
|
||||
SDValue unwrapAddress(SDValue N) const override;
|
||||
|
||||
bool isGAPlusOffset(SDNode *N, const GlobalValue* &GA,
|
||||
int64_t &Offset) const override;
|
||||
|
||||
|
@ -492,10 +492,15 @@ define void @merge_vec_element_store(<8 x float> %v, float* %ptr) {
|
||||
store float %vecext7, float* %arrayidx7, align 4
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: merge_vec_element_store
|
||||
; CHECK: vmovups
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
; CHECK: vextractf128 $1, %ymm0, %xmm1
|
||||
; CHECK: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; CHECK: retq
|
||||
|
||||
; This is what should be generated:
|
||||
; FIXME-LABEL: merge_vec_element_store
|
||||
; FIXME: vmovups
|
||||
; FIXME-NEXT: vzeroupper
|
||||
; FIXME-NEXT: retq
|
||||
}
|
||||
|
||||
; PR21711 - Merge vector stores into wider vector stores.
|
||||
@ -515,11 +520,18 @@ define void @merge_vec_extract_stores(<8 x float> %v1, <8 x float> %v2, <4 x flo
|
||||
store <4 x float> %shuffle3, <4 x float>* %idx3, align 16
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: merge_vec_extract_stores
|
||||
; CHECK: vmovups %ymm0, 48(%rdi)
|
||||
; CHECK-NEXT: vmovups %ymm1, 80(%rdi)
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
; These vblendpd are obviously redundant.
|
||||
; CHECK: vblendpd $12, %ymm0, %ymm0, %ymm0 # ymm0 = ymm0[0,1,2,3]
|
||||
; CHECK: vmovupd %ymm0, 48(%rdi)
|
||||
; CHECK: vblendpd $12, %ymm1, %ymm1, %ymm0 # ymm0 = ymm1[0,1,2,3]
|
||||
; CHECK: vmovupd %ymm0, 80(%rdi)
|
||||
|
||||
; This is what should be generated:
|
||||
; FIXME-LABEL: merge_vec_extract_stores
|
||||
; FIXME: vmovups %ymm0, 48(%rdi)
|
||||
; FIXME-NEXT: vmovups %ymm1, 80(%rdi)
|
||||
; FIXME-NEXT: vzeroupper
|
||||
; FIXME-NEXT: retq
|
||||
}
|
||||
|
||||
; Merging vector stores when sourced from vector loads.
|
||||
@ -557,8 +569,7 @@ define void @merge_vec_stores_of_constants(<4 x i32>* %ptr) {
|
||||
}
|
||||
|
||||
; This is a minimized test based on real code that was failing.
|
||||
; We could merge stores (and loads) like this...
|
||||
|
||||
; This should now be merged.
|
||||
define void @merge_vec_element_and_scalar_load([6 x i64]* %array) {
|
||||
%idx0 = getelementptr inbounds [6 x i64], [6 x i64]* %array, i64 0, i64 0
|
||||
%idx1 = getelementptr inbounds [6 x i64], [6 x i64]* %array, i64 0, i64 1
|
||||
@ -575,10 +586,8 @@ define void @merge_vec_element_and_scalar_load([6 x i64]* %array) {
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: merge_vec_element_and_scalar_load
|
||||
; CHECK: movq (%rdi), %rax
|
||||
; CHECK-NEXT: movq 8(%rdi), %rcx
|
||||
; CHECK-NEXT: movq %rax, 32(%rdi)
|
||||
; CHECK-NEXT: movq %rcx, 40(%rdi)
|
||||
; CHECK: vmovups (%rdi), %xmm0
|
||||
; CHECK-NEXT: vmovups %xmm0, 32(%rdi)
|
||||
; CHECK-NEXT: retq
|
||||
}
|
||||
|
||||
|
@ -31,13 +31,13 @@ entry:
|
||||
ret %0 %3
|
||||
}
|
||||
|
||||
|
||||
define fastcc %1 @ReturnBigStruct2() nounwind readnone {
|
||||
; X86-LABEL: ReturnBigStruct2:
|
||||
; X86: # BB#0: # %entry
|
||||
; X86-NEXT: movl $48, 4(%ecx)
|
||||
; X86-NEXT: movb $1, 2(%ecx)
|
||||
; X86-NEXT: movb $1, 1(%ecx)
|
||||
; X86-NEXT: movb $0, (%ecx)
|
||||
; X86-NEXT: movw $256, (%ecx) # imm = 0x100
|
||||
; X86-NEXT: movl %ecx, %eax
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
@ -45,8 +45,7 @@ define fastcc %1 @ReturnBigStruct2() nounwind readnone {
|
||||
; X64: # BB#0: # %entry
|
||||
; X64-NEXT: movl $48, 4(%rdi)
|
||||
; X64-NEXT: movb $1, 2(%rdi)
|
||||
; X64-NEXT: movb $1, 1(%rdi)
|
||||
; X64-NEXT: movb $0, (%rdi)
|
||||
; X64-NEXT: movw $256, (%rdi) # imm = 0x100
|
||||
; X64-NEXT: movq %rdi, %rax
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
|
@ -12,11 +12,8 @@ define i256 @foo(<8 x i32> %a) {
|
||||
;
|
||||
; SLOW-LABEL: foo:
|
||||
; SLOW: # BB#0:
|
||||
; SLOW-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; SLOW-NEXT: vpextrq $1, %xmm1, 24(%rdi)
|
||||
; SLOW-NEXT: vmovq %xmm1, 16(%rdi)
|
||||
; SLOW-NEXT: vpextrq $1, %xmm0, 8(%rdi)
|
||||
; SLOW-NEXT: vmovq %xmm0, (%rdi)
|
||||
; SLOW-NEXT: vextractf128 $1, %ymm0, 16(%rdi)
|
||||
; SLOW-NEXT: vmovups %xmm0, (%rdi)
|
||||
; SLOW-NEXT: movq %rdi, %rax
|
||||
; SLOW-NEXT: vzeroupper
|
||||
; SLOW-NEXT: retq
|
||||
|
@ -15,12 +15,11 @@ define void @PR22524({ float, float }* %arg) {
|
||||
;
|
||||
; CHECK-LABEL: PR22524:
|
||||
; CHECK: # BB#0: # %entry
|
||||
; CHECK-NEXT: movl $0, 4(%rdi)
|
||||
; CHECK-NEXT: xorl %eax, %eax
|
||||
; CHECK-NEXT: movd %eax, %xmm0
|
||||
; CHECK-NEXT: xorps %xmm1, %xmm1
|
||||
; CHECK-NEXT: mulss %xmm0, %xmm1
|
||||
; CHECK-NEXT: movl $0, (%rdi)
|
||||
; CHECK-NEXT: movq $0, (%rdi)
|
||||
; CHECK-NEXT: movss %xmm1, 4(%rdi)
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
|
@ -510,22 +510,22 @@ define void @extract_f64_1(double* nocapture %dst, <2 x double> %foo) nounwind {
|
||||
}
|
||||
|
||||
define void @extract_f128_0(fp128* nocapture %dst, <2 x fp128> %foo) nounwind {
|
||||
; X32-LABEL: extract_f128_0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: pushl %edi
|
||||
; X32-NEXT: pushl %esi
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
|
||||
; X32-NEXT: movl %esi, 12(%edi)
|
||||
; X32-NEXT: movl %edx, 8(%edi)
|
||||
; X32-NEXT: movl %ecx, 4(%edi)
|
||||
; X32-NEXT: movl %eax, (%edi)
|
||||
; X32-NEXT: popl %esi
|
||||
; X32-NEXT: popl %edi
|
||||
; X32-NEXT: retl
|
||||
; SSE-X32-LABEL: extract_f128_0:
|
||||
; SSE-X32: # BB#0:
|
||||
; SSE-X32-NEXT: pushl %edi
|
||||
; SSE-X32-NEXT: pushl %esi
|
||||
; SSE-X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; SSE-X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; SSE-X32-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; SSE-X32-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; SSE-X32-NEXT: movl {{[0-9]+}}(%esp), %edi
|
||||
; SSE-X32-NEXT: movl %esi, 12(%edi)
|
||||
; SSE-X32-NEXT: movl %edx, 8(%edi)
|
||||
; SSE-X32-NEXT: movl %ecx, 4(%edi)
|
||||
; SSE-X32-NEXT: movl %eax, (%edi)
|
||||
; SSE-X32-NEXT: popl %esi
|
||||
; SSE-X32-NEXT: popl %edi
|
||||
; SSE-X32-NEXT: retl
|
||||
;
|
||||
; SSE2-X64-LABEL: extract_f128_0:
|
||||
; SSE2-X64: # BB#0:
|
||||
@ -539,6 +539,13 @@ define void @extract_f128_0(fp128* nocapture %dst, <2 x fp128> %foo) nounwind {
|
||||
; SSE41-X64-NEXT: movq %rsi, (%rdi)
|
||||
; SSE41-X64-NEXT: retq
|
||||
;
|
||||
; AVX-X32-LABEL: extract_f128_0:
|
||||
; AVX-X32: # BB#0:
|
||||
; AVX-X32-NEXT: vmovups {{[0-9]+}}(%esp), %xmm0
|
||||
; AVX-X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; AVX-X32-NEXT: vmovups %xmm0, (%eax)
|
||||
; AVX-X32-NEXT: retl
|
||||
;
|
||||
; AVX-X64-LABEL: extract_f128_0:
|
||||
; AVX-X64: # BB#0:
|
||||
; AVX-X64-NEXT: movq %rdx, 8(%rdi)
|
||||
@ -555,22 +562,22 @@ define void @extract_f128_0(fp128* nocapture %dst, <2 x fp128> %foo) nounwind {
|
||||
}
|
||||
|
||||
define void @extract_f128_1(fp128* nocapture %dst, <2 x fp128> %foo) nounwind {
|
||||
; X32-LABEL: extract_f128_1:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: pushl %edi
|
||||
; X32-NEXT: pushl %esi
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
|
||||
; X32-NEXT: movl %esi, 12(%edi)
|
||||
; X32-NEXT: movl %edx, 8(%edi)
|
||||
; X32-NEXT: movl %ecx, 4(%edi)
|
||||
; X32-NEXT: movl %eax, (%edi)
|
||||
; X32-NEXT: popl %esi
|
||||
; X32-NEXT: popl %edi
|
||||
; X32-NEXT: retl
|
||||
; SSE-X32-LABEL: extract_f128_1:
|
||||
; SSE-X32: # BB#0:
|
||||
; SSE-X32-NEXT: pushl %edi
|
||||
; SSE-X32-NEXT: pushl %esi
|
||||
; SSE-X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; SSE-X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; SSE-X32-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; SSE-X32-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; SSE-X32-NEXT: movl {{[0-9]+}}(%esp), %edi
|
||||
; SSE-X32-NEXT: movl %esi, 12(%edi)
|
||||
; SSE-X32-NEXT: movl %edx, 8(%edi)
|
||||
; SSE-X32-NEXT: movl %ecx, 4(%edi)
|
||||
; SSE-X32-NEXT: movl %eax, (%edi)
|
||||
; SSE-X32-NEXT: popl %esi
|
||||
; SSE-X32-NEXT: popl %edi
|
||||
; SSE-X32-NEXT: retl
|
||||
;
|
||||
; SSE2-X64-LABEL: extract_f128_1:
|
||||
; SSE2-X64: # BB#0:
|
||||
@ -584,6 +591,13 @@ define void @extract_f128_1(fp128* nocapture %dst, <2 x fp128> %foo) nounwind {
|
||||
; SSE41-X64-NEXT: movq %rcx, (%rdi)
|
||||
; SSE41-X64-NEXT: retq
|
||||
;
|
||||
; AVX-X32-LABEL: extract_f128_1:
|
||||
; AVX-X32: # BB#0:
|
||||
; AVX-X32-NEXT: vmovups {{[0-9]+}}(%esp), %xmm0
|
||||
; AVX-X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; AVX-X32-NEXT: vmovups %xmm0, (%eax)
|
||||
; AVX-X32-NEXT: retl
|
||||
;
|
||||
; AVX-X64-LABEL: extract_f128_1:
|
||||
; AVX-X64: # BB#0:
|
||||
; AVX-X64-NEXT: movq %r8, 8(%rdi)
|
||||
|
@ -53,8 +53,10 @@ define <2 x i256> @test_zext1() {
|
||||
ret <2 x i256> %Shuff
|
||||
|
||||
; X64-LABEL: test_zext1
|
||||
; X64: movq $0
|
||||
; X64-NEXT: movq $0
|
||||
; X64: xorps %xmm0, %xmm0
|
||||
; X64: movaps %xmm0
|
||||
; X64: movaps %xmm0
|
||||
; X64: movaps %xmm0
|
||||
; X64-NEXT: movq $0
|
||||
; X64-NEXT: movq $254
|
||||
|
||||
@ -75,8 +77,10 @@ define <2 x i256> @test_zext2() {
|
||||
ret <2 x i256> %Shuff
|
||||
|
||||
; X64-LABEL: test_zext2
|
||||
; X64: movq $0
|
||||
; X64-NEXT: movq $0
|
||||
; X64: xorps %xmm0, %xmm0
|
||||
; X64-NEXT: movaps %xmm0
|
||||
; X64-NEXT: movaps %xmm0
|
||||
; X64-NEXT: movaps %xmm0
|
||||
; X64-NEXT: movq $-1
|
||||
; X64-NEXT: movq $-2
|
||||
|
||||
|
@ -26,14 +26,11 @@ define <2 x i256> @test_shl(<2 x i256> %In) {
|
||||
;
|
||||
; X64-LABEL: test_shl:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: movq $0, 56(%rdi)
|
||||
; X64-NEXT: movq $0, 48(%rdi)
|
||||
; X64-NEXT: movq $0, 40(%rdi)
|
||||
; X64-NEXT: movq $0, 32(%rdi)
|
||||
; X64-NEXT: movq $0, 24(%rdi)
|
||||
; X64-NEXT: movq $0, 16(%rdi)
|
||||
; X64-NEXT: movq $0, 8(%rdi)
|
||||
; X64-NEXT: movq $0, (%rdi)
|
||||
; X64-NEXT: xorps %xmm0, %xmm0
|
||||
; X64-NEXT: movaps %xmm0, 48(%rdi)
|
||||
; X64-NEXT: movaps %xmm0, 32(%rdi)
|
||||
; X64-NEXT: movaps %xmm0, 16(%rdi)
|
||||
; X64-NEXT: movaps %xmm0, (%rdi)
|
||||
; X64-NEXT: movq %rdi, %rax
|
||||
; X64-NEXT: retq
|
||||
%Amt = insertelement <2 x i256> undef, i256 -1, i32 0
|
||||
@ -65,14 +62,11 @@ define <2 x i256> @test_srl(<2 x i256> %In) {
|
||||
;
|
||||
; X64-LABEL: test_srl:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: movq $0, 56(%rdi)
|
||||
; X64-NEXT: movq $0, 48(%rdi)
|
||||
; X64-NEXT: movq $0, 40(%rdi)
|
||||
; X64-NEXT: movq $0, 32(%rdi)
|
||||
; X64-NEXT: movq $0, 24(%rdi)
|
||||
; X64-NEXT: movq $0, 16(%rdi)
|
||||
; X64-NEXT: movq $0, 8(%rdi)
|
||||
; X64-NEXT: movq $0, (%rdi)
|
||||
; X64-NEXT: xorps %xmm0, %xmm0
|
||||
; X64-NEXT: movaps %xmm0, 48(%rdi)
|
||||
; X64-NEXT: movaps %xmm0, 32(%rdi)
|
||||
; X64-NEXT: movaps %xmm0, 16(%rdi)
|
||||
; X64-NEXT: movaps %xmm0, (%rdi)
|
||||
; X64-NEXT: movq %rdi, %rax
|
||||
; X64-NEXT: retq
|
||||
%Amt = insertelement <2 x i256> undef, i256 -1, i32 0
|
||||
|
@ -526,44 +526,28 @@ define <8 x i16> @merge_8i16_i16_23u567u9(i16* %ptr) nounwind uwtable noinline s
|
||||
;
|
||||
; X32-SSE1-LABEL: merge_8i16_i16_23u567u9:
|
||||
; X32-SSE1: # BB#0:
|
||||
; X32-SSE1-NEXT: pushl %ebp
|
||||
; X32-SSE1-NEXT: pushl %edi
|
||||
; X32-SSE1-NEXT: .Lcfi6:
|
||||
; X32-SSE1-NEXT: .cfi_def_cfa_offset 8
|
||||
; X32-SSE1-NEXT: pushl %ebx
|
||||
; X32-SSE1-NEXT: pushl %esi
|
||||
; X32-SSE1-NEXT: .Lcfi7:
|
||||
; X32-SSE1-NEXT: .cfi_def_cfa_offset 12
|
||||
; X32-SSE1-NEXT: pushl %edi
|
||||
; X32-SSE1-NEXT: .Lcfi8:
|
||||
; X32-SSE1-NEXT: .cfi_def_cfa_offset 16
|
||||
; X32-SSE1-NEXT: pushl %esi
|
||||
; X32-SSE1-NEXT: .cfi_offset %esi, -12
|
||||
; X32-SSE1-NEXT: .Lcfi9:
|
||||
; X32-SSE1-NEXT: .cfi_def_cfa_offset 20
|
||||
; X32-SSE1-NEXT: .Lcfi10:
|
||||
; X32-SSE1-NEXT: .cfi_offset %esi, -20
|
||||
; X32-SSE1-NEXT: .Lcfi11:
|
||||
; X32-SSE1-NEXT: .cfi_offset %edi, -16
|
||||
; X32-SSE1-NEXT: .Lcfi12:
|
||||
; X32-SSE1-NEXT: .cfi_offset %ebx, -12
|
||||
; X32-SSE1-NEXT: .Lcfi13:
|
||||
; X32-SSE1-NEXT: .cfi_offset %ebp, -8
|
||||
; X32-SSE1-NEXT: .cfi_offset %edi, -8
|
||||
; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-SSE1-NEXT: movzwl 4(%ecx), %edx
|
||||
; X32-SSE1-NEXT: movzwl 6(%ecx), %esi
|
||||
; X32-SSE1-NEXT: movzwl 10(%ecx), %edi
|
||||
; X32-SSE1-NEXT: movzwl 12(%ecx), %ebx
|
||||
; X32-SSE1-NEXT: movzwl 14(%ecx), %ebp
|
||||
; X32-SSE1-NEXT: movl 4(%ecx), %edx
|
||||
; X32-SSE1-NEXT: movl 10(%ecx), %esi
|
||||
; X32-SSE1-NEXT: movzwl 14(%ecx), %edi
|
||||
; X32-SSE1-NEXT: movzwl 18(%ecx), %ecx
|
||||
; X32-SSE1-NEXT: movw %bp, 10(%eax)
|
||||
; X32-SSE1-NEXT: movw %bx, 8(%eax)
|
||||
; X32-SSE1-NEXT: movw %di, 10(%eax)
|
||||
; X32-SSE1-NEXT: movw %cx, 14(%eax)
|
||||
; X32-SSE1-NEXT: movw %si, 2(%eax)
|
||||
; X32-SSE1-NEXT: movw %dx, (%eax)
|
||||
; X32-SSE1-NEXT: movw %di, 6(%eax)
|
||||
; X32-SSE1-NEXT: movl %edx, (%eax)
|
||||
; X32-SSE1-NEXT: movl %esi, 6(%eax)
|
||||
; X32-SSE1-NEXT: popl %esi
|
||||
; X32-SSE1-NEXT: popl %edi
|
||||
; X32-SSE1-NEXT: popl %ebx
|
||||
; X32-SSE1-NEXT: popl %ebp
|
||||
; X32-SSE1-NEXT: retl $4
|
||||
;
|
||||
; X32-SSE41-LABEL: merge_8i16_i16_23u567u9:
|
||||
@ -607,10 +591,8 @@ define <8 x i16> @merge_8i16_i16_34uuuuuu(i16* %ptr) nounwind uwtable noinline s
|
||||
; X32-SSE1: # BB#0:
|
||||
; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-SSE1-NEXT: movzwl 6(%ecx), %edx
|
||||
; X32-SSE1-NEXT: movzwl 8(%ecx), %ecx
|
||||
; X32-SSE1-NEXT: movw %cx, 2(%eax)
|
||||
; X32-SSE1-NEXT: movw %dx, (%eax)
|
||||
; X32-SSE1-NEXT: movl 6(%ecx), %ecx
|
||||
; X32-SSE1-NEXT: movl %ecx, (%eax)
|
||||
; X32-SSE1-NEXT: retl $4
|
||||
;
|
||||
; X32-SSE41-LABEL: merge_8i16_i16_34uuuuuu:
|
||||
@ -640,24 +622,14 @@ define <8 x i16> @merge_8i16_i16_45u7zzzz(i16* %ptr) nounwind uwtable noinline s
|
||||
;
|
||||
; X32-SSE1-LABEL: merge_8i16_i16_45u7zzzz:
|
||||
; X32-SSE1: # BB#0:
|
||||
; X32-SSE1-NEXT: pushl %esi
|
||||
; X32-SSE1-NEXT: .Lcfi14:
|
||||
; X32-SSE1-NEXT: .cfi_def_cfa_offset 8
|
||||
; X32-SSE1-NEXT: .Lcfi15:
|
||||
; X32-SSE1-NEXT: .cfi_offset %esi, -8
|
||||
; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-SSE1-NEXT: movzwl 8(%ecx), %edx
|
||||
; X32-SSE1-NEXT: movzwl 10(%ecx), %esi
|
||||
; X32-SSE1-NEXT: movl 8(%ecx), %edx
|
||||
; X32-SSE1-NEXT: movzwl 14(%ecx), %ecx
|
||||
; X32-SSE1-NEXT: movw %si, 2(%eax)
|
||||
; X32-SSE1-NEXT: movw %dx, (%eax)
|
||||
; X32-SSE1-NEXT: movl %edx, (%eax)
|
||||
; X32-SSE1-NEXT: movw %cx, 6(%eax)
|
||||
; X32-SSE1-NEXT: movw $0, 14(%eax)
|
||||
; X32-SSE1-NEXT: movw $0, 12(%eax)
|
||||
; X32-SSE1-NEXT: movw $0, 10(%eax)
|
||||
; X32-SSE1-NEXT: movw $0, 8(%eax)
|
||||
; X32-SSE1-NEXT: popl %esi
|
||||
; X32-SSE1-NEXT: movl $0, 12(%eax)
|
||||
; X32-SSE1-NEXT: movl $0, 8(%eax)
|
||||
; X32-SSE1-NEXT: retl $4
|
||||
;
|
||||
; X32-SSE41-LABEL: merge_8i16_i16_45u7zzzz:
|
||||
@ -694,64 +666,44 @@ define <16 x i8> @merge_16i8_i8_01u3456789ABCDuF(i8* %ptr) nounwind uwtable noin
|
||||
;
|
||||
; X32-SSE1-LABEL: merge_16i8_i8_01u3456789ABCDuF:
|
||||
; X32-SSE1: # BB#0:
|
||||
; X32-SSE1-NEXT: pushl %ebx
|
||||
; X32-SSE1-NEXT: .Lcfi16:
|
||||
; X32-SSE1-NEXT: pushl %ebp
|
||||
; X32-SSE1-NEXT: .Lcfi10:
|
||||
; X32-SSE1-NEXT: .cfi_def_cfa_offset 8
|
||||
; X32-SSE1-NEXT: subl $12, %esp
|
||||
; X32-SSE1-NEXT: .Lcfi17:
|
||||
; X32-SSE1-NEXT: pushl %ebx
|
||||
; X32-SSE1-NEXT: .Lcfi11:
|
||||
; X32-SSE1-NEXT: .cfi_def_cfa_offset 12
|
||||
; X32-SSE1-NEXT: pushl %edi
|
||||
; X32-SSE1-NEXT: .Lcfi12:
|
||||
; X32-SSE1-NEXT: .cfi_def_cfa_offset 16
|
||||
; X32-SSE1-NEXT: pushl %esi
|
||||
; X32-SSE1-NEXT: .Lcfi13:
|
||||
; X32-SSE1-NEXT: .cfi_def_cfa_offset 20
|
||||
; X32-SSE1-NEXT: .Lcfi18:
|
||||
; X32-SSE1-NEXT: .cfi_offset %ebx, -8
|
||||
; X32-SSE1-NEXT: .Lcfi14:
|
||||
; X32-SSE1-NEXT: .cfi_offset %esi, -20
|
||||
; X32-SSE1-NEXT: .Lcfi15:
|
||||
; X32-SSE1-NEXT: .cfi_offset %edi, -16
|
||||
; X32-SSE1-NEXT: .Lcfi16:
|
||||
; X32-SSE1-NEXT: .cfi_offset %ebx, -12
|
||||
; X32-SSE1-NEXT: .Lcfi17:
|
||||
; X32-SSE1-NEXT: .cfi_offset %ebp, -8
|
||||
; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-SSE1-NEXT: movb (%ecx), %dl
|
||||
; X32-SSE1-NEXT: movb %dl, {{[0-9]+}}(%esp) # 1-byte Spill
|
||||
; X32-SSE1-NEXT: movb 1(%ecx), %dl
|
||||
; X32-SSE1-NEXT: movb %dl, {{[0-9]+}}(%esp) # 1-byte Spill
|
||||
; X32-SSE1-NEXT: movb 3(%ecx), %dl
|
||||
; X32-SSE1-NEXT: movb %dl, {{[0-9]+}}(%esp) # 1-byte Spill
|
||||
; X32-SSE1-NEXT: movb 4(%ecx), %dl
|
||||
; X32-SSE1-NEXT: movb %dl, {{[0-9]+}}(%esp) # 1-byte Spill
|
||||
; X32-SSE1-NEXT: movb 5(%ecx), %dl
|
||||
; X32-SSE1-NEXT: movb %dl, {{[0-9]+}}(%esp) # 1-byte Spill
|
||||
; X32-SSE1-NEXT: movb 6(%ecx), %dl
|
||||
; X32-SSE1-NEXT: movb %dl, {{[0-9]+}}(%esp) # 1-byte Spill
|
||||
; X32-SSE1-NEXT: movb 7(%ecx), %dl
|
||||
; X32-SSE1-NEXT: movb %dl, {{[0-9]+}}(%esp) # 1-byte Spill
|
||||
; X32-SSE1-NEXT: movb 8(%ecx), %dl
|
||||
; X32-SSE1-NEXT: movb %dl, {{[0-9]+}}(%esp) # 1-byte Spill
|
||||
; X32-SSE1-NEXT: movb 9(%ecx), %dl
|
||||
; X32-SSE1-NEXT: movb %dl, {{[0-9]+}}(%esp) # 1-byte Spill
|
||||
; X32-SSE1-NEXT: movb 10(%ecx), %bh
|
||||
; X32-SSE1-NEXT: movb 11(%ecx), %bl
|
||||
; X32-SSE1-NEXT: movb 12(%ecx), %dh
|
||||
; X32-SSE1-NEXT: movzwl (%ecx), %ebp
|
||||
; X32-SSE1-NEXT: movl 3(%ecx), %esi
|
||||
; X32-SSE1-NEXT: movl 7(%ecx), %edi
|
||||
; X32-SSE1-NEXT: movzwl 11(%ecx), %ebx
|
||||
; X32-SSE1-NEXT: movb 13(%ecx), %dl
|
||||
; X32-SSE1-NEXT: movb 15(%ecx), %cl
|
||||
; X32-SSE1-NEXT: movb %dl, 13(%eax)
|
||||
; X32-SSE1-NEXT: movb %dh, 12(%eax)
|
||||
; X32-SSE1-NEXT: movb %cl, 15(%eax)
|
||||
; X32-SSE1-NEXT: movb %bl, 11(%eax)
|
||||
; X32-SSE1-NEXT: movb %bh, 10(%eax)
|
||||
; X32-SSE1-NEXT: movb {{[0-9]+}}(%esp), %cl # 1-byte Reload
|
||||
; X32-SSE1-NEXT: movb %cl, 9(%eax)
|
||||
; X32-SSE1-NEXT: movb {{[0-9]+}}(%esp), %cl # 1-byte Reload
|
||||
; X32-SSE1-NEXT: movb %cl, 8(%eax)
|
||||
; X32-SSE1-NEXT: movb {{[0-9]+}}(%esp), %cl # 1-byte Reload
|
||||
; X32-SSE1-NEXT: movb %cl, 7(%eax)
|
||||
; X32-SSE1-NEXT: movb {{[0-9]+}}(%esp), %cl # 1-byte Reload
|
||||
; X32-SSE1-NEXT: movb %cl, 6(%eax)
|
||||
; X32-SSE1-NEXT: movb {{[0-9]+}}(%esp), %cl # 1-byte Reload
|
||||
; X32-SSE1-NEXT: movb %cl, 5(%eax)
|
||||
; X32-SSE1-NEXT: movb {{[0-9]+}}(%esp), %cl # 1-byte Reload
|
||||
; X32-SSE1-NEXT: movb %cl, 4(%eax)
|
||||
; X32-SSE1-NEXT: movb {{[0-9]+}}(%esp), %cl # 1-byte Reload
|
||||
; X32-SSE1-NEXT: movb %cl, 1(%eax)
|
||||
; X32-SSE1-NEXT: movb {{[0-9]+}}(%esp), %cl # 1-byte Reload
|
||||
; X32-SSE1-NEXT: movb %cl, (%eax)
|
||||
; X32-SSE1-NEXT: movb {{[0-9]+}}(%esp), %cl # 1-byte Reload
|
||||
; X32-SSE1-NEXT: movb %cl, 3(%eax)
|
||||
; X32-SSE1-NEXT: addl $12, %esp
|
||||
; X32-SSE1-NEXT: movw %bx, 11(%eax)
|
||||
; X32-SSE1-NEXT: movl %edi, 7(%eax)
|
||||
; X32-SSE1-NEXT: movw %bp, (%eax)
|
||||
; X32-SSE1-NEXT: movl %esi, 3(%eax)
|
||||
; X32-SSE1-NEXT: popl %esi
|
||||
; X32-SSE1-NEXT: popl %edi
|
||||
; X32-SSE1-NEXT: popl %ebx
|
||||
; X32-SSE1-NEXT: popl %ebp
|
||||
; X32-SSE1-NEXT: retl $4
|
||||
;
|
||||
; X32-SSE41-LABEL: merge_16i8_i8_01u3456789ABCDuF:
|
||||
@ -819,17 +771,13 @@ define <16 x i8> @merge_16i8_i8_01u3uuzzuuuuuzzz(i8* %ptr) nounwind uwtable noin
|
||||
; X32-SSE1: # BB#0:
|
||||
; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-SSE1-NEXT: movb (%ecx), %dl
|
||||
; X32-SSE1-NEXT: movb 1(%ecx), %dh
|
||||
; X32-SSE1-NEXT: movzwl (%ecx), %edx
|
||||
; X32-SSE1-NEXT: movb 3(%ecx), %cl
|
||||
; X32-SSE1-NEXT: movb %dh, 1(%eax)
|
||||
; X32-SSE1-NEXT: movb %dl, (%eax)
|
||||
; X32-SSE1-NEXT: movw %dx, (%eax)
|
||||
; X32-SSE1-NEXT: movb %cl, 3(%eax)
|
||||
; X32-SSE1-NEXT: movb $0, 15(%eax)
|
||||
; X32-SSE1-NEXT: movb $0, 14(%eax)
|
||||
; X32-SSE1-NEXT: movb $0, 13(%eax)
|
||||
; X32-SSE1-NEXT: movb $0, 7(%eax)
|
||||
; X32-SSE1-NEXT: movb $0, 6(%eax)
|
||||
; X32-SSE1-NEXT: movw $0, 13(%eax)
|
||||
; X32-SSE1-NEXT: movw $0, 6(%eax)
|
||||
; X32-SSE1-NEXT: retl $4
|
||||
;
|
||||
; X32-SSE41-LABEL: merge_16i8_i8_01u3uuzzuuuuuzzz:
|
||||
@ -867,35 +815,14 @@ define <16 x i8> @merge_16i8_i8_0123uu67uuuuuzzz(i8* %ptr) nounwind uwtable noin
|
||||
;
|
||||
; X32-SSE1-LABEL: merge_16i8_i8_0123uu67uuuuuzzz:
|
||||
; X32-SSE1: # BB#0:
|
||||
; X32-SSE1-NEXT: pushl %ebx
|
||||
; X32-SSE1-NEXT: .Lcfi19:
|
||||
; X32-SSE1-NEXT: .cfi_def_cfa_offset 8
|
||||
; X32-SSE1-NEXT: pushl %eax
|
||||
; X32-SSE1-NEXT: .Lcfi20:
|
||||
; X32-SSE1-NEXT: .cfi_def_cfa_offset 12
|
||||
; X32-SSE1-NEXT: .Lcfi21:
|
||||
; X32-SSE1-NEXT: .cfi_offset %ebx, -8
|
||||
; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-SSE1-NEXT: movb (%ecx), %dl
|
||||
; X32-SSE1-NEXT: movb %dl, {{[0-9]+}}(%esp) # 1-byte Spill
|
||||
; X32-SSE1-NEXT: movb 1(%ecx), %dh
|
||||
; X32-SSE1-NEXT: movb 2(%ecx), %bl
|
||||
; X32-SSE1-NEXT: movb 3(%ecx), %bh
|
||||
; X32-SSE1-NEXT: movb 6(%ecx), %dl
|
||||
; X32-SSE1-NEXT: movb 7(%ecx), %cl
|
||||
; X32-SSE1-NEXT: movb %cl, 7(%eax)
|
||||
; X32-SSE1-NEXT: movb %dl, 6(%eax)
|
||||
; X32-SSE1-NEXT: movb %bh, 3(%eax)
|
||||
; X32-SSE1-NEXT: movb %bl, 2(%eax)
|
||||
; X32-SSE1-NEXT: movb %dh, 1(%eax)
|
||||
; X32-SSE1-NEXT: movb {{[0-9]+}}(%esp), %cl # 1-byte Reload
|
||||
; X32-SSE1-NEXT: movb %cl, (%eax)
|
||||
; X32-SSE1-NEXT: movl (%ecx), %edx
|
||||
; X32-SSE1-NEXT: movzwl 6(%ecx), %ecx
|
||||
; X32-SSE1-NEXT: movw %cx, 6(%eax)
|
||||
; X32-SSE1-NEXT: movl %edx, (%eax)
|
||||
; X32-SSE1-NEXT: movb $0, 15(%eax)
|
||||
; X32-SSE1-NEXT: movb $0, 14(%eax)
|
||||
; X32-SSE1-NEXT: movb $0, 13(%eax)
|
||||
; X32-SSE1-NEXT: addl $4, %esp
|
||||
; X32-SSE1-NEXT: popl %ebx
|
||||
; X32-SSE1-NEXT: movw $0, 13(%eax)
|
||||
; X32-SSE1-NEXT: retl $4
|
||||
;
|
||||
; X32-SSE41-LABEL: merge_16i8_i8_0123uu67uuuuuzzz:
|
||||
@ -990,14 +917,14 @@ define <2 x i64> @merge_2i64_i64_12_volatile(i64* %ptr) nounwind uwtable noinlin
|
||||
; X32-SSE1-LABEL: merge_2i64_i64_12_volatile:
|
||||
; X32-SSE1: # BB#0:
|
||||
; X32-SSE1-NEXT: pushl %edi
|
||||
; X32-SSE1-NEXT: .Lcfi22:
|
||||
; X32-SSE1-NEXT: .Lcfi18:
|
||||
; X32-SSE1-NEXT: .cfi_def_cfa_offset 8
|
||||
; X32-SSE1-NEXT: pushl %esi
|
||||
; X32-SSE1-NEXT: .Lcfi23:
|
||||
; X32-SSE1-NEXT: .Lcfi19:
|
||||
; X32-SSE1-NEXT: .cfi_def_cfa_offset 12
|
||||
; X32-SSE1-NEXT: .Lcfi24:
|
||||
; X32-SSE1-NEXT: .Lcfi20:
|
||||
; X32-SSE1-NEXT: .cfi_offset %esi, -12
|
||||
; X32-SSE1-NEXT: .Lcfi25:
|
||||
; X32-SSE1-NEXT: .Lcfi21:
|
||||
; X32-SSE1-NEXT: .cfi_offset %edi, -8
|
||||
; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
|
@ -13,7 +13,7 @@
|
||||
; X86-NEXT: movb [[HI1]], 3([[BASEREG]])
|
||||
; X86-NEXT: retq
|
||||
|
||||
; DBGDAG-LABEL: Optimized lowered selection DAG: BB#0 'merge_store_partial_overlap_load:'
|
||||
; DBGDAG-LABEL: Optimized legalized selection DAG: BB#0 'merge_store_partial_overlap_load:'
|
||||
; DBGDAG: [[ENTRYTOKEN:t[0-9]+]]: ch = EntryToken
|
||||
; DBGDAG-DAG: [[BASEPTR:t[0-9]+]]: i64,ch = CopyFromReg [[ENTRYTOKEN]],
|
||||
; DBGDAG-DAG: [[ADDPTR:t[0-9]+]]: i64 = add [[BASEPTR]], Constant:i64<2>
|
||||
@ -27,7 +27,7 @@
|
||||
|
||||
; DBGDAG: X86ISD::RET_FLAG t{{[0-9]+}},
|
||||
|
||||
; DBGDAG: Type-legalized selection DAG: BB#0 'merge_store_partial_overlap_load:'
|
||||
; DBGDAG-LABEL: Instruction selection begins
|
||||
define void @merge_store_partial_overlap_load([4 x i8]* %tmp) {
|
||||
%tmp8 = getelementptr inbounds [4 x i8], [4 x i8]* %tmp, i32 0, i8 0
|
||||
%tmp10 = getelementptr inbounds [4 x i8], [4 x i8]* %tmp, i32 0, i8 1
|
||||
|
@ -5,22 +5,8 @@
|
||||
define <16 x i8> @PR27973() {
|
||||
; CHECK-LABEL: PR27973:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: movb $0, 15(%rdi)
|
||||
; CHECK-NEXT: movb $0, 14(%rdi)
|
||||
; CHECK-NEXT: movb $0, 13(%rdi)
|
||||
; CHECK-NEXT: movb $0, 12(%rdi)
|
||||
; CHECK-NEXT: movb $0, 11(%rdi)
|
||||
; CHECK-NEXT: movb $0, 10(%rdi)
|
||||
; CHECK-NEXT: movb $0, 9(%rdi)
|
||||
; CHECK-NEXT: movb $0, 8(%rdi)
|
||||
; CHECK-NEXT: movb $0, 7(%rdi)
|
||||
; CHECK-NEXT: movb $0, 6(%rdi)
|
||||
; CHECK-NEXT: movb $0, 5(%rdi)
|
||||
; CHECK-NEXT: movb $0, 4(%rdi)
|
||||
; CHECK-NEXT: movb $0, 3(%rdi)
|
||||
; CHECK-NEXT: movb $0, 2(%rdi)
|
||||
; CHECK-NEXT: movb $0, 1(%rdi)
|
||||
; CHECK-NEXT: movb $0, (%rdi)
|
||||
; CHECK-NEXT: movq $0, 8(%rdi)
|
||||
; CHECK-NEXT: movq $0, (%rdi)
|
||||
; CHECK-NEXT: movq %rdi, %rax
|
||||
; CHECK-NEXT: retq
|
||||
%t0 = zext <16 x i8> zeroinitializer to <16 x i32>
|
||||
|
@ -14,8 +14,7 @@ entry:
|
||||
;
|
||||
; CHECK-DAG: movq {{.*}}, 192(%rsp)
|
||||
; CHECK-DAG: movq {{.*}}, 184(%rsp)
|
||||
; CHECK-DAG: movl {{.*}}, 180(%rsp)
|
||||
; CHECK-DAG: movl {{.*}}, 176(%rsp)
|
||||
; CHECK-DAG: movq {{.*}}, 176(%rsp)
|
||||
%ap3 = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i64 0, i64 0; <%struct.__va_list_tag*> [#uses=1]
|
||||
call void @bar(%struct.__va_list_tag* %ap3) nounwind
|
||||
call void @llvm.va_end(i8* %ap12)
|
||||
|
@ -13,9 +13,8 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
;; the same result in memory in the end.
|
||||
|
||||
; CHECK-LABEL: redundant_stores_merging:
|
||||
; CHECK: movabsq $528280977409, %rax
|
||||
; CHECK: movq %rax, e+4(%rip)
|
||||
; CHECK: movl $456, e+8(%rip)
|
||||
; CHECK: movabsq $1958505086977, %rax
|
||||
; CHECK: movq %rax, e+4(%rip)
|
||||
define void @redundant_stores_merging() {
|
||||
entry:
|
||||
store i32 1, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 1), align 4
|
||||
@ -26,9 +25,8 @@ entry:
|
||||
|
||||
;; This variant tests PR25154.
|
||||
; CHECK-LABEL: redundant_stores_merging_reverse:
|
||||
; CHECK: movabsq $528280977409, %rax
|
||||
; CHECK: movq %rax, e+4(%rip)
|
||||
; CHECK: movl $456, e+8(%rip)
|
||||
; CHECK: movabsq $1958505086977, %rax
|
||||
; CHECK: movq %rax, e+4(%rip)
|
||||
define void @redundant_stores_merging_reverse() {
|
||||
entry:
|
||||
store i32 123, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 2), align 4
|
||||
|
Loading…
Reference in New Issue
Block a user