From 30efd87f6ee7f208d3fe44cf495ab93e5e243134 Mon Sep 17 00:00:00 2001
From: Richard Sandiford <rsandifo@linux.vnet.ibm.com>
Date: Fri, 31 May 2013 13:25:22 +0000
Subject: [PATCH] [SystemZ] Don't use LOAD and STORE REVERSED for volatile
 accesses

Unlike most -- hopefully "all other", but I'm still checking -- memory
instructions we support, LOAD REVERSED and STORE REVERSED may access
the memory location several times.  This means that they are not suitable
for volatile loads and stores.

This patch is a prerequisite for better atomic load and store support.
The same principle applies there: almost all memory instructions we
support are inherently atomic ("block concurrent"), but LOAD REVERSED
and STORE REVERSED are exceptions.

Other instructions continue to allow volatile operands.  I will add
positive "allows volatile" tests at the same time as the "allows atomic
load or store" tests.

llvm-svn: 183002
---
 llvm/lib/Target/SystemZ/SystemZInstrInfo.td | 13 ++++----
 llvm/lib/Target/SystemZ/SystemZOperators.td | 21 ++++++++++--
 llvm/test/CodeGen/SystemZ/bswap-02.ll       | 12 +++++++
 llvm/test/CodeGen/SystemZ/bswap-03.ll       | 12 +++++++
 llvm/test/CodeGen/SystemZ/bswap-04.ll       | 36 ++++++++++++++-------
 llvm/test/CodeGen/SystemZ/bswap-05.ll       | 36 ++++++++++++++-------
 6 files changed, 98 insertions(+), 32 deletions(-)
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
index 062266bb485e..c9ec6bc1f991 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -382,13 +382,14 @@ let neverHasSideEffects = 1 in {
   def LRVGR : UnaryRRE<"lrvgr", 0xB90F, bswap, GR64, GR64>;
 }
 
-// Byte-swapping loads.
-def LRV  : UnaryRXY<"lrv",  0xE31E, loadu<bswap>, GR32>;
-def LRVG : UnaryRXY<"lrvg", 0xE30F, loadu<bswap>, GR64>;
+// Byte-swapping loads.  Unlike normal loads, these instructions are
+// allowed to access storage more than once.
+def LRV  : UnaryRXY<"lrv",  0xE31E, loadu<bswap, nonvolatile_load>, GR32>;
+def LRVG : UnaryRXY<"lrvg", 0xE30F, loadu<bswap, nonvolatile_load>, GR64>;
 
-// Byte-swapping stores.
-def STRV  : StoreRXY<"strv",  0xE33E, storeu<bswap>, GR32>;
-def STRVG : StoreRXY<"strvg", 0xE32F, storeu<bswap>, GR64>;
+// Likewise byte-swapping stores.
+def STRV  : StoreRXY<"strv",  0xE33E, storeu<bswap, nonvolatile_store>, GR32>;
+def STRVG : StoreRXY<"strvg", 0xE32F, storeu<bswap, nonvolatile_store>, GR64>;
 
 //===----------------------------------------------------------------------===//
 // Load address instructions
diff --git a/llvm/lib/Target/SystemZ/SystemZOperators.td b/llvm/lib/Target/SystemZ/SystemZOperators.td
index 8c4df56b4615..ab01b2527a83 100644
--- a/llvm/lib/Target/SystemZ/SystemZOperators.td
+++ b/llvm/lib/Target/SystemZ/SystemZOperators.td
@@ -142,6 +142,23 @@ def aligned_store         : AlignedStore<store>;
 def aligned_truncstorei16 : AlignedStore<truncstorei16>;
 def aligned_truncstorei32 : AlignedStore<truncstorei32>;
 
+// Non-volatile loads.  Used for instructions that might access the storage
+// location multiple times.
+class NonvolatileLoad<SDPatternOperator load>
+  : PatFrag<(ops node:$addr), (load node:$addr), [{
+  LoadSDNode *Load = cast<LoadSDNode>(N);
+  return !Load->isVolatile();
+}]>;
+def nonvolatile_load : NonvolatileLoad<load>;
+
+// Non-volatile stores.
+class NonvolatileStore<SDPatternOperator store>
+  : PatFrag<(ops node:$src, node:$addr), (store node:$src, node:$addr), [{
+  StoreSDNode *Store = cast<StoreSDNode>(N);
+  return !Store->isVolatile();
+}]>;
+def nonvolatile_store : NonvolatileStore<store>;
+
 // Insertions.
 def inserti8 : PatFrag<(ops node:$src1, node:$src2),
                        (or (and node:$src1, -256), node:$src2)>;
@@ -186,11 +203,11 @@ def fnabs : PatFrag<(ops node:$ptr), (fneg (fabs node:$ptr))>;
 
 // Create a unary operator that loads from memory and then performs
 // the given operation on it.
-class loadu<SDPatternOperator operator>
+class loadu<SDPatternOperator operator, SDPatternOperator load = load>
   : PatFrag<(ops node:$addr), (operator (load node:$addr))>;
 
 // Create a store operator that performs the given unary operation
 // on the value before storing it.
-class storeu<SDPatternOperator operator>
+class storeu<SDPatternOperator operator, SDPatternOperator store = store>
   : PatFrag<(ops node:$value, node:$addr),
             (store (operator node:$value), node:$addr)>;
diff --git a/llvm/test/CodeGen/SystemZ/bswap-02.ll b/llvm/test/CodeGen/SystemZ/bswap-02.ll
index e9b7eb5f055b..8b99077f4c13 100644
--- a/llvm/test/CodeGen/SystemZ/bswap-02.ll
+++ b/llvm/test/CodeGen/SystemZ/bswap-02.ll
@@ -85,3 +85,15 @@ define i32 @f7(i64 %src, i64 %index) {
   %swapped = call i32 @llvm.bswap.i32(i32 %a)
   ret i32 %swapped
 }
+
+; Check that volatile accesses do not use LRV, which might access the
+; storage multple times.
+define i32 @f8(i32 *%src) {
+; CHECK: f8:
+; CHECK: l [[REG:%r[0-5]]], 0(%r2)
+; CHECK: lrvr %r2, [[REG]]
+; CHECK: br %r14
+  %a = load volatile i32 *%src
+  %swapped = call i32 @llvm.bswap.i32(i32 %a)
+  ret i32 %swapped
+}
diff --git a/llvm/test/CodeGen/SystemZ/bswap-03.ll b/llvm/test/CodeGen/SystemZ/bswap-03.ll
index 2e6bcdce2651..df6624e5b46c 100644
--- a/llvm/test/CodeGen/SystemZ/bswap-03.ll
+++ b/llvm/test/CodeGen/SystemZ/bswap-03.ll
@@ -85,3 +85,15 @@ define i64 @f7(i64 %src, i64 %index) {
   %swapped = call i64 @llvm.bswap.i64(i64 %a)
   ret i64 %swapped
 }
+
+; Check that volatile accesses do not use LRVG, which might access the
+; storage multple times.
+define i64 @f8(i64 *%src) {
+; CHECK: f8:
+; CHECK: lg [[REG:%r[0-5]]], 0(%r2)
+; CHECK: lrvgr %r2, [[REG]]
+; CHECK: br %r14
+  %a = load volatile i64 *%src
+  %swapped = call i64 @llvm.bswap.i64(i64 %a)
+  ret i64 %swapped
+}
diff --git a/llvm/test/CodeGen/SystemZ/bswap-04.ll b/llvm/test/CodeGen/SystemZ/bswap-04.ll
index 192327bd256c..63b202044e48 100644
--- a/llvm/test/CodeGen/SystemZ/bswap-04.ll
+++ b/llvm/test/CodeGen/SystemZ/bswap-04.ll
@@ -5,21 +5,21 @@
 declare i32 @llvm.bswap.i32(i32 %a)
 
 ; Check STRV with no displacement.
-define void @f1(i32 *%src, i32 %a) {
+define void @f1(i32 *%dst, i32 %a) {
 ; CHECK: f1:
 ; CHECK: strv %r3, 0(%r2)
 ; CHECK: br %r14
   %swapped = call i32 @llvm.bswap.i32(i32 %a)
-  store i32 %swapped, i32 *%src
+  store i32 %swapped, i32 *%dst
   ret void
 }
 
 ; Check the high end of the aligned STRV range.
-define void @f2(i32 *%src, i32 %a) {
+define void @f2(i32 *%dst, i32 %a) {
 ; CHECK: f2:
 ; CHECK: strv %r3, 524284(%r2)
 ; CHECK: br %r14
-  %ptr = getelementptr i32 *%src, i64 131071
+  %ptr = getelementptr i32 *%dst, i64 131071
   %swapped = call i32 @llvm.bswap.i32(i32 %a)
   store i32 %swapped, i32 *%ptr
   ret void
@@ -27,34 +27,34 @@ define void @f2(i32 *%src, i32 %a) {
 
 ; Check the next word up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
-define void @f3(i32 *%src, i32 %a) {
+define void @f3(i32 *%dst, i32 %a) {
 ; CHECK: f3:
 ; CHECK: agfi %r2, 524288
 ; CHECK: strv %r3, 0(%r2)
 ; CHECK: br %r14
-  %ptr = getelementptr i32 *%src, i64 131072
+  %ptr = getelementptr i32 *%dst, i64 131072
   %swapped = call i32 @llvm.bswap.i32(i32 %a)
   store i32 %swapped, i32 *%ptr
   ret void
 }
 
 ; Check the high end of the negative aligned STRV range.
-define void @f4(i32 *%src, i32 %a) {
+define void @f4(i32 *%dst, i32 %a) {
 ; CHECK: f4:
 ; CHECK: strv %r3, -4(%r2)
 ; CHECK: br %r14
-  %ptr = getelementptr i32 *%src, i64 -1
+  %ptr = getelementptr i32 *%dst, i64 -1
   %swapped = call i32 @llvm.bswap.i32(i32 %a)
   store i32 %swapped, i32 *%ptr
   ret void
 }
 
 ; Check the low end of the STRV range.
-define void @f5(i32 *%src, i32 %a) {
+define void @f5(i32 *%dst, i32 %a) {
 ; CHECK: f5:
 ; CHECK: strv %r3, -524288(%r2)
 ; CHECK: br %r14
-  %ptr = getelementptr i32 *%src, i64 -131072
+  %ptr = getelementptr i32 *%dst, i64 -131072
   %swapped = call i32 @llvm.bswap.i32(i32 %a)
   store i32 %swapped, i32 *%ptr
   ret void
@@ -62,12 +62,12 @@ define void @f5(i32 *%src, i32 %a) {
 
 ; Check the next word down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
-define void @f6(i32 *%src, i32 %a) {
+define void @f6(i32 *%dst, i32 %a) {
 ; CHECK: f6:
 ; CHECK: agfi %r2, -524292
 ; CHECK: strv %r3, 0(%r2)
 ; CHECK: br %r14
-  %ptr = getelementptr i32 *%src, i64 -131073
+  %ptr = getelementptr i32 *%dst, i64 -131073
   %swapped = call i32 @llvm.bswap.i32(i32 %a)
   store i32 %swapped, i32 *%ptr
   ret void
@@ -85,3 +85,15 @@ define void @f7(i64 %src, i64 %index, i32 %a) {
   store i32 %swapped, i32 *%ptr
   ret void
 }
+
+; Check that volatile stores do not use STRV, which might access the
+; storage multple times.
+define void @f8(i32 *%dst, i32 %a) {
+; CHECK: f8:
+; CHECK: lrvr [[REG:%r[0-5]]], %r3
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+  %swapped = call i32 @llvm.bswap.i32(i32 %a)
+  store volatile i32 %swapped, i32 *%dst
+  ret void
+}
diff --git a/llvm/test/CodeGen/SystemZ/bswap-05.ll b/llvm/test/CodeGen/SystemZ/bswap-05.ll
index e58cb80c3981..6f25d3c10ab6 100644
--- a/llvm/test/CodeGen/SystemZ/bswap-05.ll
+++ b/llvm/test/CodeGen/SystemZ/bswap-05.ll
@@ -5,21 +5,21 @@
 declare i64 @llvm.bswap.i64(i64 %a)
 
 ; Check STRVG with no displacement.
-define void @f1(i64 *%src, i64 %a) {
+define void @f1(i64 *%dst, i64 %a) {
 ; CHECK: f1:
 ; CHECK: strvg %r3, 0(%r2)
 ; CHECK: br %r14
   %swapped = call i64 @llvm.bswap.i64(i64 %a)
-  store i64 %swapped, i64 *%src
+  store i64 %swapped, i64 *%dst
   ret void
 }
 
 ; Check the high end of the aligned STRVG range.
-define void @f2(i64 *%src, i64 %a) {
+define void @f2(i64 *%dst, i64 %a) {
 ; CHECK: f2:
 ; CHECK: strvg %r3, 524280(%r2)
 ; CHECK: br %r14
-  %ptr = getelementptr i64 *%src, i64 65535
+  %ptr = getelementptr i64 *%dst, i64 65535
   %swapped = call i64 @llvm.bswap.i64(i64 %a)
   store i64 %swapped, i64 *%ptr
   ret void
@@ -27,34 +27,34 @@ define void @f2(i64 *%src, i64 %a) {
 
 ; Check the next doubleword up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
-define void @f3(i64 *%src, i64 %a) {
+define void @f3(i64 *%dst, i64 %a) {
 ; CHECK: f3:
 ; CHECK: agfi %r2, 524288
 ; CHECK: strvg %r3, 0(%r2)
 ; CHECK: br %r14
-  %ptr = getelementptr i64 *%src, i64 65536
+  %ptr = getelementptr i64 *%dst, i64 65536
   %swapped = call i64 @llvm.bswap.i64(i64 %a)
   store i64 %swapped, i64 *%ptr
   ret void
 }
 
 ; Check the high end of the negative aligned STRVG range.
-define void @f4(i64 *%src, i64 %a) {
+define void @f4(i64 *%dst, i64 %a) {
 ; CHECK: f4:
 ; CHECK: strvg %r3, -8(%r2)
 ; CHECK: br %r14
-  %ptr = getelementptr i64 *%src, i64 -1
+  %ptr = getelementptr i64 *%dst, i64 -1
   %swapped = call i64 @llvm.bswap.i64(i64 %a)
   store i64 %swapped, i64 *%ptr
   ret void
 }
 
 ; Check the low end of the STRVG range.
-define void @f5(i64 *%src, i64 %a) {
+define void @f5(i64 *%dst, i64 %a) {
 ; CHECK: f5:
 ; CHECK: strvg %r3, -524288(%r2)
 ; CHECK: br %r14
-  %ptr = getelementptr i64 *%src, i64 -65536
+  %ptr = getelementptr i64 *%dst, i64 -65536
   %swapped = call i64 @llvm.bswap.i64(i64 %a)
   store i64 %swapped, i64 *%ptr
   ret void
@@ -62,12 +62,12 @@ define void @f5(i64 *%src, i64 %a) {
 
 ; Check the next doubleword down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
-define void @f6(i64 *%src, i64 %a) {
+define void @f6(i64 *%dst, i64 %a) {
 ; CHECK: f6:
 ; CHECK: agfi %r2, -524296
 ; CHECK: strvg %r3, 0(%r2)
 ; CHECK: br %r14
-  %ptr = getelementptr i64 *%src, i64 -65537
+  %ptr = getelementptr i64 *%dst, i64 -65537
   %swapped = call i64 @llvm.bswap.i64(i64 %a)
   store i64 %swapped, i64 *%ptr
   ret void
@@ -85,3 +85,15 @@ define void @f7(i64 %src, i64 %index, i64 %a) {
   store i64 %swapped, i64 *%ptr
   ret void
 }
+
+; Check that volatile stores do not use STRVG, which might access the
+; storage multple times.
+define void @f8(i64 *%dst, i64 %a) {
+; CHECK: f8:
+; CHECK: lrvgr [[REG:%r[0-5]]], %r3
+; CHECK: stg [[REG]], 0(%r2)
+; CHECK: br %r14
+  %swapped = call i64 @llvm.bswap.i64(i64 %a)
+  store volatile i64 %swapped, i64 *%dst
+  ret void
+}