mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-12-05 02:07:56 +00:00
81d794ab6d
Remove FeatureSlowMisaligned128Store from cyclone flags. This flag causes splitting of 16 byte wide stores into 2 stored of 8 bytes. This was useful on older apple CPUs which were slow for 16byte stores that were not aligned on 16byte. As the compiler often cannot predict the actual alignment, the splitting was choosen. This has been a topic for a lot of debate as the splitting also decreases performance for some benchmarks. Measuring the effects on newer apple chips (rdar://35525421) shows that it harms more cases than it helps. So it is time to retire this workaround. llvm-svn: 323289
51 lines
2.0 KiB
LLVM
51 lines
2.0 KiB
LLVM
; REQUIRES: asserts
|
|
; RUN: llc < %s -mtriple=aarch64 -mcpu=cyclone -mattr=+use-aa,+slow-misaligned-128store -enable-misched -verify-misched -o - | FileCheck %s
|
|
|
|
; Tests to check that the scheduler dependencies derived from alias analysis are
|
|
; correct when we have loads that have been split up so that they can later be
|
|
; merged into STP.
|
|
|
|
; Now that overwritten stores are elided in SelectionDAG, dependencies
|
|
; are resolved and removed before MISCHED. Check that we have
|
|
; equivalent pair of stp calls as a baseline.
|
|
|
|
; CHECK-LABEL: test_splat
|
|
; CHECK: ldr [[REG:w[0-9]+]], [x2]
|
|
; CHECK-DAG: stp w0, [[REG]], [x2, #12]
|
|
; CHECK-DAG: stp [[REG]], w1, [x2, #4]
|
|
define void @test_splat(i32 %x, i32 %y, i32* %p) {
|
|
entry:
|
|
%val = load i32, i32* %p, align 4
|
|
%0 = getelementptr inbounds i32, i32* %p, i64 1
|
|
%1 = getelementptr inbounds i32, i32* %p, i64 2
|
|
%2 = getelementptr inbounds i32, i32* %p, i64 3
|
|
%vec0 = insertelement <4 x i32> undef, i32 %val, i32 0
|
|
%vec1 = insertelement <4 x i32> %vec0, i32 %val, i32 1
|
|
%vec2 = insertelement <4 x i32> %vec1, i32 %val, i32 2
|
|
%vec3 = insertelement <4 x i32> %vec2, i32 %val, i32 3
|
|
%3 = bitcast i32* %0 to <4 x i32>*
|
|
store <4 x i32> %vec3, <4 x i32>* %3, align 4
|
|
store i32 %x, i32* %2, align 4
|
|
store i32 %y, i32* %1, align 4
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1)
|
|
%struct.tree_common = type { i8*, i8*, i32 }
|
|
|
|
; CHECK-LABEL: test_zero
|
|
; CHECK-DAG: stp x2, xzr, [x0, #8]
|
|
; CHECK-DAG: str w1, [x0, #16]
|
|
; CHECK-DAG: str xzr, [x0]
|
|
|
|
define void @test_zero(%struct.tree_common* %t, i32 %code, i8* %type) {
|
|
entry:
|
|
%0 = bitcast %struct.tree_common* %t to i8*
|
|
tail call void @llvm.memset.p0i8.i64(i8* align 8 %0, i8 0, i64 24, i1 false)
|
|
%code1 = getelementptr inbounds %struct.tree_common, %struct.tree_common* %t, i64 0, i32 2
|
|
store i32 %code, i32* %code1, align 8
|
|
%type2 = getelementptr inbounds %struct.tree_common, %struct.tree_common* %t, i64 0, i32 1
|
|
store i8* %type, i8** %type2, align 8
|
|
ret void
|
|
}
|