[Power] Use lwsync for non-seq_cst fences

Summary:
hwsync is only required for seq_cst fences, acquire and release one can use
the cheaper lwsync.

Test Plan: Added some cases to atomics.ll + make check-all

Reviewers: jfb, wschmidt

Subscribers: llvm-commits

Differential Revision: http://reviews.llvm.org/D5317

llvm-svn: 218995
This commit is contained in:
Robin Morisset 2014-10-03 18:04:36 +00:00
parent 56b634e6ac
commit 95772cea0c
2 changed files with 37 additions and 1 deletions

View File

@ -2549,7 +2549,14 @@ def : Pat<(f64 (extloadf32 xaddr:$src)),
def : Pat<(f64 (fextend f32:$src)),
(COPY_TO_REGCLASS $src, F8RC)>;
def : Pat<(atomic_fence (imm), (imm)), (SYNC 0)>, Requires<[HasSYNC]>;
// Only seq_cst fences require the heavyweight sync (SYNC 0).
// All others can use the lightweight sync (SYNC 1).
// source: http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
// The rule for seq_cst is duplicated to work with both 64 bits and 32 bits
// versions of Power.
def : Pat<(atomic_fence (i64 7), (imm)), (SYNC 0)>, Requires<[HasSYNC]>;
def : Pat<(atomic_fence (i32 7), (imm)), (SYNC 0)>, Requires<[HasSYNC]>;
def : Pat<(atomic_fence (imm), (imm)), (SYNC 1)>, Requires<[HasSYNC]>;
def : Pat<(atomic_fence (imm), (imm)), (MSYNC)>, Requires<[HasOnlyMSYNC]>;
// Additional FNMSUB patterns: -a*c + b == -(a*c - b)

View File

@ -0,0 +1,29 @@
; RUN: llc < %s -mtriple=powerpc-apple-darwin -march=ppc32 -verify-machineinstrs | FileCheck %s --check-prefix=CHECK
; RUN: llc < %s -mtriple=powerpc-apple-darwin -march=ppc64 -verify-machineinstrs | FileCheck %s --check-prefix=CHECK
; RUN: llc < %s -mtriple=powerpc-apple-darwin -mcpu=440 | FileCheck %s --check-prefix=PPC440
; Fences
define void @fence_acquire() {
; CHECK-LABEL: fence_acquire
; CHECK: sync 1
; PPC440-NOT: sync 1
; PPC440: msync
fence acquire
ret void
}
define void @fence_release() {
; CHECK-LABEL: fence_release
; CHECK: sync 1
; PPC440-NOT: sync 1
; PPC440: msync
fence release
ret void
}
define void @fence_seq_cst() {
; CHECK-LABEL: fence_seq_cst
; CHECK: sync 0
; PPC440-NOT: sync 0
; PPC440: msync
fence seq_cst
ret void
}