Elide stores which are overwritten without being observed.

Summary:
In SelectionDAG, when a store is immediately chained to another store
to the same address, elide the first store as it has no observable
effects. This is causes small improvements dealing with intrinsics
lowered to stores.

Test notes:

* Many testcases overwrite store addresses multiple times and needed
  minor changes, mainly making stores volatile to prevent the
  optimization from optimizing the test away.

* Many X86 test cases optimized out instructions associated with
  associated with va_start.

* Note that test_splat in CodeGen/AArch64/misched-stp.ll no longer has
  dependencies to check and can probably be removed and potentially
  replaced with another test.

Reviewers: rnk, john.brawn

Subscribers: aemerson, rengolin, qcolombet, jyknight, nemanjai, nhaehnle, javed.absar, llvm-commits

Differential Revision: https://reviews.llvm.org/D33206

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@303198 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Nirav Dave 2017-05-16 19:43:56 +00:00
parent 34eb467434
commit acc2c1d71d
23 changed files with 135 additions and 150 deletions

View File

@ -13087,14 +13087,28 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
}
}
// If this is a store followed by a store with the same value to the same
// location, then the store is dead/noop.
if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
if (ST1->getBasePtr() == Ptr && ST->getMemoryVT() == ST1->getMemoryVT() &&
ST1->getValue() == Value && ST->isUnindexed() && !ST->isVolatile() &&
ST1->isUnindexed() && !ST1->isVolatile()) {
// The store is dead, remove it.
return Chain;
if (ST->isUnindexed() && !ST->isVolatile() && ST1->isUnindexed() &&
!ST1->isVolatile() && ST1->getBasePtr() == Ptr &&
ST->getMemoryVT() == ST1->getMemoryVT()) {
// If this is a store followed by a store with the same value to the same
// location, then the store is dead/noop.
if (ST1->getValue() == Value) {
// The store is dead, remove it.
return Chain;
}
// If this is a store who's preceeding store to the same location
// and no one other node is chained to that store we can effectively
// drop the store. Do not remove stores to undef as they may be used as
// data sinks.
if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
!ST1->getBasePtr().isUndef()) {
// ST1 is fully overwritten and can be elided. Combine with it's chain
// value.
CombineTo(ST1, ST1->getChain());
return SDValue();
}
}
}

View File

@ -9,9 +9,9 @@ declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1)
; Original test case which exhibited the bug
define void @test1(%struct.tree_common* %t, i32 %code, i8* %type) {
; CHECK-LABEL: test1:
; CHECK: stp xzr, xzr, [x0, #8]
; CHECK: stp xzr, x2, [x0]
; CHECK: str w1, [x0, #16]
; CHECK-DAG: stp x2, xzr, [x0, #8]
; CHECK-DAG: str w1, [x0, #16]
; CHECK-DAG: str xzr, [x0]
entry:
%0 = bitcast %struct.tree_common* %t to i8*
tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 24, i32 8, i1 false)
@ -25,10 +25,8 @@ entry:
; Store to each struct element instead of using memset
define void @test2(%struct.tree_common* %t, i32 %code, i8* %type) {
; CHECK-LABEL: test2:
; CHECK: stp xzr, xzr, [x0]
; CHECK: str wzr, [x0, #16]
; CHECK: str w1, [x0, #16]
; CHECK: str x2, [x0, #8]
; CHECK-DAG: str w1, [x0, #16]
; CHECK-DAG: stp xzr, x2, [x0]
entry:
%0 = getelementptr inbounds %struct.tree_common, %struct.tree_common* %t, i64 0, i32 0
%1 = getelementptr inbounds %struct.tree_common, %struct.tree_common* %t, i64 0, i32 1
@ -44,9 +42,9 @@ entry:
; Vector store instead of memset
define void @test3(%struct.tree_common* %t, i32 %code, i8* %type) {
; CHECK-LABEL: test3:
; CHECK: stp xzr, xzr, [x0, #8]
; CHECK: stp xzr, x2, [x0]
; CHECK: str w1, [x0, #16]
; CHECK-DAG: stp x2, xzr, [x0, #8]
; CHECK-DAG: str w1, [x0, #16]
; CHECK-DAG: str xzr, [x0]
entry:
%0 = bitcast %struct.tree_common* %t to <3 x i64>*
store <3 x i64> zeroinitializer, <3 x i64>* %0, align 8
@ -60,9 +58,8 @@ entry:
; Vector store, then store to vector elements
define void @test4(<3 x i64>* %p, i64 %x, i64 %y) {
; CHECK-LABEL: test4:
; CHECK: stp xzr, xzr, [x0, #8]
; CHECK: stp xzr, x2, [x0]
; CHECK: str x1, [x0, #16]
; CHECK-DAG: stp x2, x1, [x0, #8]
; CHECK-DAG: str xzr, [x0]
entry:
store <3 x i64> zeroinitializer, <3 x i64>* %p, align 8
%0 = bitcast <3 x i64>* %p to i64*

View File

@ -1,20 +1,18 @@
; REQUIRES: asserts
; RUN: llc < %s -mtriple=aarch64 -mcpu=cyclone -mattr=+use-aa -enable-misched -verify-misched -debug-only=misched -o - 2>&1 > /dev/null | FileCheck %s
; RUN: llc < %s -mtriple=aarch64 -mcpu=cyclone -mattr=+use-aa -enable-misched -verify-misched -o - | FileCheck %s
; Tests to check that the scheduler dependencies derived from alias analysis are
; correct when we have loads that have been split up so that they can later be
; merged into STP.
; CHECK: ********** MI Scheduling **********
; CHECK: test_splat:BB#0 entry
; CHECK: SU({{[0-9]+}}): STRWui %vreg{{[0-9]+}}, %vreg{{[0-9]+}}, 3; mem:ST4[%3+8]
; CHECK: Successors:
; CHECK-NEXT: ord [[SU1:SU\([0-9]+\)]]
; CHECK: SU({{[0-9]+}}): STRWui %vreg{{[0-9]+}}, %vreg{{[0-9]+}}, 2; mem:ST4[%3+4]
; CHECK: Successors:
; CHECK-NEXT: ord [[SU2:SU\([0-9]+\)]]
; CHECK: [[SU1]]: STRWui %vreg{{[0-9]+}}, %vreg{{[0-9]+}}, 3; mem:ST4[%2]
; CHECK: [[SU2]]: STRWui %vreg{{[0-9]+}}, %vreg{{[0-9]+}}, 2; mem:ST4[%1]
; Now that overwritten stores are elided in SelectionDAG, dependencies
; are resolved and removed before MISCHED. Check that we have
; equivalent pair of stp calls as a baseline.
; CHECK-LABEL: test_splat
; CHECK: ldr [[REG:w[0-9]+]], [x2]
; CHECK-DAG: stp w0, [[REG]], [x2, #12]
; CHECK-DAG: stp [[REG]], w1, [x2, #4]
define void @test_splat(i32 %x, i32 %y, i32* %p) {
entry:
%val = load i32, i32* %p, align 4
@ -35,16 +33,11 @@ entry:
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1)
%struct.tree_common = type { i8*, i8*, i32 }
; CHECK: ********** MI Scheduling **********
; CHECK: test_zero:BB#0 entry
; CHECK: SU({{[0-9]+}}): STRXui %XZR, %vreg{{[0-9]+}}, 2; mem:ST8[%0+16]
; CHECK: Successors:
; CHECK-NEXT: ord [[SU3:SU\([0-9]+\)]]
; CHECK: SU({{[0-9]+}}): STRXui %XZR, %vreg{{[0-9]+}}, 1; mem:ST8[%0+8]
; CHECK: Successors:
; CHECK-NEXT: ord [[SU4:SU\([0-9]+\)]]
; CHECK: [[SU3]]: STRWui %vreg{{[0-9]+}}, %vreg{{[0-9]+}}, 4; mem:ST4[%code1]
; CHECK: [[SU4]]: STRXui %vreg{{[0-9]+}}, %vreg{{[0-9]+}}, 1; mem:ST8[%type2]
; CHECK-LABEL: test_zero
; CHECK-DAG: stp x2, xzr, [x0, #8]
; CHECK-DAG: str w1, [x0, #16]
; CHECK-DAG: str xzr, [x0]
define void @test_zero(%struct.tree_common* %t, i32 %code, i8* %type) {
entry:
%0 = bitcast %struct.tree_common* %t to i8*

View File

@ -29,10 +29,10 @@
define amdgpu_kernel void @private_test(i32 %index, float addrspace(1)* %out) {
%ptr = getelementptr [4 x float], [4 x float] addrspace(2) * @private1, i32 0, i32 %index
%val = load float, float addrspace(2)* %ptr
store float %val, float addrspace(1)* %out
store volatile float %val, float addrspace(1)* %out
%ptr2 = getelementptr [4 x float], [4 x float] addrspace(2) * @private2, i32 0, i32 %index
%val2 = load float, float addrspace(2)* %ptr2
store float %val2, float addrspace(1)* %out
store volatile float %val2, float addrspace(1)* %out
ret void
}

View File

@ -20,7 +20,7 @@ bb3: ; preds = %bb, %entry
bb8: ; preds = %bb3
%1 = getelementptr inbounds i8, i8* %0, i32 0
store i8 0, i8* %1, align 1
store volatile i8 0, i8* %1, align 1
%2 = call i32 @ptou() nounwind
; CHECK: umull [[REGISTER:lr|r[0-9]+]],
; CHECK-NOT: [[REGISTER]],
@ -35,7 +35,7 @@ bb8: ; preds = %bb3
%7 = or i8 %6, 48
%8 = add i8 %6, 87
%iftmp.5.0.1 = select i1 %5, i8 %7, i8 %8
store i8 %iftmp.5.0.1, i8* %p8, align 1
store volatile i8 %iftmp.5.0.1, i8* %p8, align 1
; CHECK: umull [[REGISTER:lr|r[0-9]+]],
; CHECK-NOT: [[REGISTER]],
; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
@ -49,7 +49,7 @@ bb8: ; preds = %bb3
%13 = or i8 %12, 48
%14 = add i8 %12, 87
%iftmp.5.0.2 = select i1 %11, i8 %13, i8 %14
store i8 %iftmp.5.0.2, i8* %p8, align 1
store volatile i8 %iftmp.5.0.2, i8* %p8, align 1
; CHECK: umull [[REGISTER:lr|r[0-9]+]],
; CHECK-NOT: [[REGISTER]],
; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
@ -63,7 +63,7 @@ bb8: ; preds = %bb3
%19 = or i8 %18, 48
%20 = add i8 %18, 87
%iftmp.5.0.4 = select i1 %17, i8 %19, i8 %20
store i8 %iftmp.5.0.4, i8* null, align 1
store volatile i8 %iftmp.5.0.4, i8* null, align 1
; CHECK: umull [[REGISTER:lr|r[0-9]+]],
; CHECK-NOT: [[REGISTER]],
; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
@ -74,7 +74,7 @@ bb8: ; preds = %bb3
%22 = urem i32 %21, 10
%23 = icmp ult i32 %22, 10
%iftmp.5.0.5 = select i1 %23, i8 0, i8 %val8
store i8 %iftmp.5.0.5, i8* %p8, align 1
store volatile i8 %iftmp.5.0.5, i8* %p8, align 1
; CHECK: umull [[REGISTER:lr|r[0-9]+]],
; CHECK-NOT: [[REGISTER]],
; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
@ -88,7 +88,7 @@ bb8: ; preds = %bb3
%28 = or i8 %27, 48
%29 = add i8 %27, 87
%iftmp.5.0.6 = select i1 %26, i8 %28, i8 %29
store i8 %iftmp.5.0.6, i8* %p8, align 1
store volatile i8 %iftmp.5.0.6, i8* %p8, align 1
; CHECK: umull [[REGISTER:lr|r[0-9]+]],
; CHECK-NOT: [[REGISTER]],
; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
@ -102,7 +102,7 @@ bb8: ; preds = %bb3
%34 = or i8 %33, 48
%35 = add i8 %33, 87
%iftmp.5.0.7 = select i1 %32, i8 %34, i8 %35
store i8 %iftmp.5.0.7, i8* %p8, align 1
store volatile i8 %iftmp.5.0.7, i8* %p8, align 1
; CHECK: umull [[REGISTER:lr|r[0-9]+]],
; CHECK-NOT: [[REGISTER]],
; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
@ -116,7 +116,7 @@ bb8: ; preds = %bb3
%40 = or i8 %39, 48
%41 = add i8 %39, 87
%iftmp.5.0.8 = select i1 %38, i8 %40, i8 %41
store i8 %iftmp.5.0.8, i8* null, align 1
store volatile i8 %iftmp.5.0.8, i8* null, align 1
br label %bb46
bb46: ; preds = %bb3

View File

@ -13,7 +13,7 @@ entry:
; CHECK: sub sp, sp, #12
; CHECK: sub sp, sp, #4
; CHECK: add r0, sp, #4
; CHECK: stm sp, {r0, r1, r2, r3}
; CHECK: stmib sp, {r1, r2, r3}
%g = alloca i8*
%g1 = bitcast i8** %g to i8*
call void @llvm.va_start(i8* %g1)

View File

@ -8,7 +8,7 @@
; CHECK-LABEL: {{^}}main
; CHECK: mov [[TMP:r[0-9]+]], #0
; CHECK-NEXT: str [[TMP]], [sp, #4]
; CHECK-NEXT: str [[TMP]], [sp]
; CHECK_O0: str [[TMP]], [sp]
; CHECK_O0: ldr [[TMP:r[0-9]+]], [sp]
; CHECK_O0-NEXT: add [[TMP]], [[TMP]], #2
; CHECK_O1-NOT: ldr [[TMP:r[0-9]+]], [sp]

View File

@ -25,7 +25,6 @@ define i16 @va_arg(i8* %vl) nounwind {
entry:
; CHECK-LABEL: va_arg:
%vl.addr = alloca i8*, align 2
; CHECK: mov.w r12, 0(r1)
store i8* %vl, i8** %vl.addr, align 2
; CHECK: mov.w r12, [[REG:r[0-9]+]]
; CHECK-NEXT: add.w #2, [[REG]]

View File

@ -9,9 +9,9 @@ entry:
%0 = load <16 x i8>, <16 x i8>* @llvm_mips_bmnzi_b_ARG1
%1 = load <16 x i8>, <16 x i8>* @llvm_mips_bmnzi_b_ARG2
%2 = tail call <16 x i8> @llvm.mips.bmnzi.b(<16 x i8> %0, <16 x i8> %1, i32 240)
store <16 x i8> %2, <16 x i8>* @llvm_mips_bmnzi_b_RES
store volatile <16 x i8> %2, <16 x i8>* @llvm_mips_bmnzi_b_RES
%3 = tail call <16 x i8> @llvm.mips.bmnzi.b(<16 x i8> %0, <16 x i8> %1, i32 15)
store <16 x i8> %3, <16 x i8>* @llvm_mips_bmnzi_b_RES
store volatile <16 x i8> %3, <16 x i8>* @llvm_mips_bmnzi_b_RES
%4 = tail call <16 x i8> @llvm.mips.bmnzi.b(<16 x i8> %0, <16 x i8> %1, i32 170)
store <16 x i8> %4, <16 x i8>* @llvm_mips_bmnzi_b_RES
ret void
@ -32,9 +32,9 @@ entry:
%0 = load <16 x i8>, <16 x i8>* @llvm_mips_bmnzi_b_ARG1
%1 = load <16 x i8>, <16 x i8>* @llvm_mips_bmnzi_b_ARG2
%2 = tail call <16 x i8> @llvm.mips.bmzi.b(<16 x i8> %0, <16 x i8> %1, i32 240)
store <16 x i8> %2, <16 x i8>* @llvm_mips_bmnzi_b_RES
store volatile <16 x i8> %2, <16 x i8>* @llvm_mips_bmnzi_b_RES
%3 = tail call <16 x i8> @llvm.mips.bmzi.b(<16 x i8> %0, <16 x i8> %1, i32 15)
store <16 x i8> %3, <16 x i8>* @llvm_mips_bmnzi_b_RES
store volatile <16 x i8> %3, <16 x i8>* @llvm_mips_bmnzi_b_RES
%4 = tail call <16 x i8> @llvm.mips.bmzi.b(<16 x i8> %0, <16 x i8> %1, i32 170)
store <16 x i8> %4, <16 x i8>* @llvm_mips_bmnzi_b_RES
ret void

View File

@ -14,19 +14,19 @@ entry:
%0 = load ppc_fp128, ppc_fp128* @ld, align 16
%1 = load ppc_fp128, ppc_fp128* @ld2, align 16
%add = fadd ppc_fp128 %0, %1
store ppc_fp128 %add, ppc_fp128* %c, align 16
store volatile ppc_fp128 %add, ppc_fp128* %c, align 16
%2 = load ppc_fp128, ppc_fp128* @ld, align 16
%3 = load ppc_fp128, ppc_fp128* @ld2, align 16
%sub = fsub ppc_fp128 %2, %3
store ppc_fp128 %sub, ppc_fp128* %c, align 16
store volatile ppc_fp128 %sub, ppc_fp128* %c, align 16
%4 = load ppc_fp128, ppc_fp128* @ld, align 16
%5 = load ppc_fp128, ppc_fp128* @ld2, align 16
%mul = fmul ppc_fp128 %4, %5
store ppc_fp128 %mul, ppc_fp128* %c, align 16
store volatile ppc_fp128 %mul, ppc_fp128* %c, align 16
%6 = load ppc_fp128, ppc_fp128* @ld, align 16
%7 = load ppc_fp128, ppc_fp128* @ld2, align 16
%div = fdiv ppc_fp128 %6, %7
store ppc_fp128 %div, ppc_fp128* %c, align 16
store volatile ppc_fp128 %div, ppc_fp128* %c, align 16
ret void
; CHECK-LABEL: __gcc_qadd

View File

@ -25,17 +25,17 @@ define void @intarg(i8 %a0, ; %i0
i32 %a5, ; %i5
i32 signext %a6, ; [%fp+92]
i8* %a7) { ; [%fp+96]
store i8 %a0, i8* %a4
store i8 %a1, i8* %a4
store volatile i8 %a0, i8* %a4
store volatile i8 %a1, i8* %a4
%p16 = bitcast i8* %a4 to i16*
store i16 %a2, i16* %p16
store volatile i16 %a2, i16* %p16
%p32 = bitcast i8* %a4 to i32*
store i32 %a3, i32* %p32
store volatile i32 %a3, i32* %p32
%pp = bitcast i8* %a4 to i8**
store i8* %a4, i8** %pp
store i32 %a5, i32* %p32
store i32 %a6, i32* %p32
store i8* %a7, i8** %pp
store volatile i8* %a4, i8** %pp
store volatile i32 %a5, i32* %p32
store volatile i32 %a6, i32* %p32
store volatile i8* %a7, i8** %pp
ret void
}

View File

@ -24,17 +24,17 @@ define void @intarg(i8 %a0, ; %i0
i32 %a5, ; %i5
i32 signext %a6, ; [%fp+BIAS+176]
i8* %a7) { ; [%fp+BIAS+184]
store i8 %a0, i8* %a4
store i8 %a1, i8* %a4
store volatile i8 %a0, i8* %a4
store volatile i8 %a1, i8* %a4
%p16 = bitcast i8* %a4 to i16*
store i16 %a2, i16* %p16
store volatile i16 %a2, i16* %p16
%p32 = bitcast i8* %a4 to i32*
store i32 %a3, i32* %p32
store volatile i32 %a3, i32* %p32
%pp = bitcast i8* %a4 to i8**
store i8* %a4, i8** %pp
store i32 %a5, i32* %p32
store i32 %a6, i32* %p32
store i8* %a7, i8** %pp
store volatile i8* %a4, i8** %pp
store volatile i32 %a5, i32* %p32
store volatile i32 %a6, i32* %p32
store volatile i8* %a7, i8** %pp
ret void
}
@ -316,7 +316,7 @@ define void @call_ret_i64_pair(i64* %i0) {
%rv = call { i64, i64 } @ret_i64_pair(i32 undef, i32 undef,
i64* undef, i64* undef)
%e0 = extractvalue { i64, i64 } %rv, 0
store i64 %e0, i64* %i0
store volatile i64 %e0, i64* %i0
%e1 = extractvalue { i64, i64 } %rv, 1
store i64 %e1, i64* %i0
ret void

View File

@ -189,11 +189,11 @@ define void @consume_i1_ret() {
%v6 = extractvalue { i1, i1, i1, i1 } %call, 2
%v7 = extractvalue { i1, i1, i1, i1 } %call, 3
%val = zext i1 %v3 to i32
store i32 %val, i32* @var
store volatile i32 %val, i32* @var
%val2 = zext i1 %v5 to i32
store i32 %val2, i32* @var
store volatile i32 %val2, i32* @var
%val3 = zext i1 %v6 to i32
store i32 %val3, i32* @var
store volatile i32 %val3, i32* @var
%val4 = zext i1 %v7 to i32
store i32 %val4, i32* @var
ret void

View File

@ -7,13 +7,13 @@ define void @test1(i8** %p) {
%z = alloca i8, align 1
; CHECK: add r1, sp, #8
; CHECK: str r1, [r0]
store i8* %x, i8** %p, align 4
store volatile i8* %x, i8** %p, align 4
; CHECK: add r1, sp, #4
; CHECK: str r1, [r0]
store i8* %y, i8** %p, align 4
store volatile i8* %y, i8** %p, align 4
; CHECK: mov r1, sp
; CHECK: str r1, [r0]
store i8* %z, i8** %p, align 4
store volatile i8* %z, i8** %p, align 4
ret void
}
@ -24,10 +24,10 @@ define void @test2([1024 x i8]** %p) {
; CHECK: add r1, sp, #1020
; CHECK: adds r1, #4
; CHECK: str r1, [r0]
store [1024 x i8]* %arr1, [1024 x i8]** %p, align 4
store volatile [1024 x i8]* %arr1, [1024 x i8]** %p, align 4
; CHECK: mov r1, sp
; CHECK: str r1, [r0]
store [1024 x i8]* %arr2, [1024 x i8]** %p, align 4
store volatile [1024 x i8]* %arr2, [1024 x i8]** %p, align 4
ret void
}

View File

@ -50,9 +50,9 @@ bb420: ; preds = %bb20, %bb20
; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp
; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp
; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp
store %union.rec* null, %union.rec** @zz_hold, align 4
store volatile %union.rec* null, %union.rec** @zz_hold, align 4
store %union.rec* null, %union.rec** @zz_res, align 4
store %union.rec* %x, %union.rec** @zz_hold, align 4
store volatile %union.rec* %x, %union.rec** @zz_hold, align 4
%0 = call %union.rec* @Manifest(%union.rec* undef, %union.rec* %env, %struct.STYLE* %style, %union.rec** %bthr, %union.rec** %fthr, %union.rec** %target, %union.rec** %crs, i32 %ok, i32 %need_expand, %union.rec** %enclose, i32 %fcr) nounwind ; <%union.rec*> [#uses=0]
unreachable

View File

@ -253,9 +253,7 @@ entry:
; CHECK: calll _addrof_i32
; CHECK: retl
; Don't elide the copy when the alloca is escaped with a store.
define void @escape_with_store(i32 %x) {
%x1 = alloca i32
%x2 = alloca i32*
@ -268,9 +266,8 @@ define void @escape_with_store(i32 %x) {
}
; CHECK-LABEL: _escape_with_store:
; CHECK-DAG: movl {{.*}}(%esp), %[[reg:[^ ]*]]
; CHECK-DAG: movl $0, [[offs:[0-9]*]](%esp)
; CHECK: movl %[[reg]], [[offs]](%esp)
; CHECK: movl {{.*}}(%esp), %[[reg:[^ ]*]]
; CHECK: movl %[[reg]], [[offs:[0-9]*]](%esp)
; CHECK: calll _addrof_i32

View File

@ -9,33 +9,29 @@ define void @f(<4 x float> %A, i8* %B, <2 x double> %C, i32 %D, <2 x i64> %E, <4
; X32-SSE: # BB#0:
; X32-SSE-NEXT: pushl %ebp
; X32-SSE-NEXT: movl %esp, %ebp
; X32-SSE-NEXT: pushl %esi
; X32-SSE-NEXT: andl $-16, %esp
; X32-SSE-NEXT: subl $16, %esp
; X32-SSE-NEXT: movl 72(%ebp), %eax
; X32-SSE-NEXT: movl 76(%ebp), %ecx
; X32-SSE-NEXT: movl 12(%ebp), %edx
; X32-SSE-NEXT: movdqa 56(%ebp), %xmm3
; X32-SSE-NEXT: movdqa 40(%ebp), %xmm4
; X32-SSE-NEXT: movdqa 24(%ebp), %xmm5
; X32-SSE-NEXT: movl 8(%ebp), %esi
; X32-SSE-NEXT: addps .LCPI0_0, %xmm0
; X32-SSE-NEXT: movntps %xmm0, (%esi)
; X32-SSE-NEXT: paddq .LCPI0_1, %xmm2
; X32-SSE-NEXT: movntdq %xmm2, (%esi)
; X32-SSE-NEXT: addpd .LCPI0_2, %xmm1
; X32-SSE-NEXT: movntpd %xmm1, (%esi)
; X32-SSE-NEXT: paddd .LCPI0_3, %xmm5
; X32-SSE-NEXT: movntdq %xmm5, (%esi)
; X32-SSE-NEXT: paddw .LCPI0_4, %xmm4
; X32-SSE-NEXT: movntdq %xmm4, (%esi)
; X32-SSE-NEXT: paddb .LCPI0_5, %xmm3
; X32-SSE-NEXT: movntdq %xmm3, (%esi)
; X32-SSE-NEXT: movntil %edx, (%esi)
; X32-SSE-NEXT: movntil %ecx, 4(%esi)
; X32-SSE-NEXT: movntil %eax, (%esi)
; X32-SSE-NEXT: leal -4(%ebp), %esp
; X32-SSE-NEXT: popl %esi
; X32-SSE-NEXT: movl 8(%ebp), %edx
; X32-SSE-NEXT: addps {{\.LCPI.*}}, %xmm0
; X32-SSE-NEXT: movntps %xmm0, (%edx)
; X32-SSE-NEXT: paddq {{\.LCPI.*}}, %xmm2
; X32-SSE-NEXT: movntdq %xmm2, (%edx)
; X32-SSE-NEXT: addpd {{\.LCPI.*}}, %xmm1
; X32-SSE-NEXT: movntpd %xmm1, (%edx)
; X32-SSE-NEXT: paddd {{\.LCPI.*}}, %xmm5
; X32-SSE-NEXT: movntdq %xmm5, (%edx)
; X32-SSE-NEXT: paddw {{\.LCPI.*}}, %xmm4
; X32-SSE-NEXT: movntdq %xmm4, (%edx)
; X32-SSE-NEXT: paddb {{\.LCPI.*}}, %xmm3
; X32-SSE-NEXT: movntdq %xmm3, (%edx)
; X32-SSE-NEXT: movntil %ecx, 4(%edx)
; X32-SSE-NEXT: movntil %eax, (%edx)
; X32-SSE-NEXT: movl %ebp, %esp
; X32-SSE-NEXT: popl %ebp
; X32-SSE-NEXT: retl
;
@ -43,33 +39,29 @@ define void @f(<4 x float> %A, i8* %B, <2 x double> %C, i32 %D, <2 x i64> %E, <4
; X32-AVX: # BB#0:
; X32-AVX-NEXT: pushl %ebp
; X32-AVX-NEXT: movl %esp, %ebp
; X32-AVX-NEXT: pushl %esi
; X32-AVX-NEXT: andl $-16, %esp
; X32-AVX-NEXT: subl $16, %esp
; X32-AVX-NEXT: movl 72(%ebp), %eax
; X32-AVX-NEXT: movl 76(%ebp), %ecx
; X32-AVX-NEXT: movl 12(%ebp), %edx
; X32-AVX-NEXT: vmovdqa 56(%ebp), %xmm3
; X32-AVX-NEXT: vmovdqa 40(%ebp), %xmm4
; X32-AVX-NEXT: vmovdqa 24(%ebp), %xmm5
; X32-AVX-NEXT: movl 8(%ebp), %esi
; X32-AVX-NEXT: vaddps .LCPI0_0, %xmm0, %xmm0
; X32-AVX-NEXT: vmovntps %xmm0, (%esi)
; X32-AVX-NEXT: vpaddq .LCPI0_1, %xmm2, %xmm0
; X32-AVX-NEXT: vmovntdq %xmm0, (%esi)
; X32-AVX-NEXT: vaddpd .LCPI0_2, %xmm1, %xmm0
; X32-AVX-NEXT: vmovntpd %xmm0, (%esi)
; X32-AVX-NEXT: vpaddd .LCPI0_3, %xmm5, %xmm0
; X32-AVX-NEXT: vmovntdq %xmm0, (%esi)
; X32-AVX-NEXT: vpaddw .LCPI0_4, %xmm4, %xmm0
; X32-AVX-NEXT: vmovntdq %xmm0, (%esi)
; X32-AVX-NEXT: vpaddb .LCPI0_5, %xmm3, %xmm0
; X32-AVX-NEXT: vmovntdq %xmm0, (%esi)
; X32-AVX-NEXT: movntil %edx, (%esi)
; X32-AVX-NEXT: movntil %ecx, 4(%esi)
; X32-AVX-NEXT: movntil %eax, (%esi)
; X32-AVX-NEXT: leal -4(%ebp), %esp
; X32-AVX-NEXT: popl %esi
; X32-AVX-NEXT: movl 8(%ebp), %edx
; X32-AVX-NEXT: vaddps {{\.LCPI.*}}, %xmm0, %xmm0
; X32-AVX-NEXT: vmovntps %xmm0, (%edx)
; X32-AVX-NEXT: vpaddq {{\.LCPI.*}}, %xmm2, %xmm0
; X32-AVX-NEXT: vmovntdq %xmm0, (%edx)
; X32-AVX-NEXT: vaddpd {{\.LCPI.*}}, %xmm1, %xmm0
; X32-AVX-NEXT: vmovntpd %xmm0, (%edx)
; X32-AVX-NEXT: vpaddd {{\.LCPI.*}}, %xmm5, %xmm0
; X32-AVX-NEXT: vmovntdq %xmm0, (%edx)
; X32-AVX-NEXT: vpaddw {{\.LCPI.*}}, %xmm4, %xmm0
; X32-AVX-NEXT: vmovntdq %xmm0, (%edx)
; X32-AVX-NEXT: vpaddb {{\.LCPI.*}}, %xmm3, %xmm0
; X32-AVX-NEXT: vmovntdq %xmm0, (%edx)
; X32-AVX-NEXT: movntil %ecx, 4(%edx)
; X32-AVX-NEXT: movntil %eax, (%edx)
; X32-AVX-NEXT: movl %ebp, %esp
; X32-AVX-NEXT: popl %ebp
; X32-AVX-NEXT: retl
;

View File

@ -134,10 +134,7 @@ entry:
@g_16 = internal global i32 -1
; X64-LABEL: test8:
; X64-NEXT: movl _g_16(%rip), %eax
; X64-NEXT: movl $0, _g_16(%rip)
; X64-NEXT: orl $1, %eax
; X64-NEXT: movl %eax, _g_16(%rip)
; X64-NEXT: orb $1, _g_16(%rip)
; X64-NEXT: ret
define void @test8() nounwind {
%tmp = load i32, i32* @g_16

View File

@ -184,11 +184,11 @@ define void @consume_i1_ret() {
%v6 = extractvalue { i1, i1, i1, i1 } %call, 2
%v7 = extractvalue { i1, i1, i1, i1 } %call, 3
%val = zext i1 %v3 to i32
store i32 %val, i32* @var
store volatile i32 %val, i32* @var
%val2 = zext i1 %v5 to i32
store i32 %val2, i32* @var
store volatile i32 %val2, i32* @var
%val3 = zext i1 %v6 to i32
store i32 %val3, i32* @var
store volatile i32 %val3, i32* @var
%val4 = zext i1 %v7 to i32
store i32 %val4, i32* @var
ret void

View File

@ -20,7 +20,7 @@ declare void @bar(<16 x float> %a, i32 %b)
; Check that proper alignment of spilled vector does not affect vargs
; CHECK-LABEL: vargs_not_affected
; CHECK: leal 28(%ebp), %eax
; CHECK: movl 28(%ebp), %eax
define i32 @vargs_not_affected(<4 x float> %v, i8* %f, ...) {
entry:
%ap = alloca i8*, align 4

View File

@ -12,8 +12,8 @@ entry:
; LINUX: movq $0, -8(%rsp)
%this = alloca %Object addrspace(1)*
store %Object addrspace(1)* null, %Object addrspace(1)** %this
store %Object addrspace(1)* %param0, %Object addrspace(1)** %this
store volatile %Object addrspace(1)* null, %Object addrspace(1)** %this
store volatile %Object addrspace(1)* %param0, %Object addrspace(1)** %this
br label %0
; <label>:0 ; preds = %entry

View File

@ -94,9 +94,7 @@ entry:
; CHECK-LABEL: arg4:
; CHECK: pushq
; va_start:
; CHECK: leaq 48(%rsp), [[REG_arg4_1:%[a-z]+]]
; CHECK: movq [[REG_arg4_1]], (%rsp)
; va_start (optimized away as overwritten by va_arg)
; va_arg:
; CHECK: leaq 52(%rsp), [[REG_arg4_2:%[a-z]+]]
; CHECK: movq [[REG_arg4_2]], (%rsp)

View File

@ -90,9 +90,7 @@ entry:
}
; CHECK-LABEL: arg4:
; va_start:
; CHECK: leaq 48(%rsp), [[REG_arg4_1:%[a-z]+]]
; CHECK: movq [[REG_arg4_1]], (%rsp)
; va_start (optimized away as overwritten by va_arg)
; va_arg:
; CHECK: leaq 52(%rsp), [[REG_arg4_2:%[a-z]+]]
; CHECK: movq [[REG_arg4_2]], (%rsp)