[SPARC64]: Avoid membar instructions in delay slots.

In particular, avoid membar instructions in the delay
slot of a jmpl instruction.

UltraSPARC-I, II, IIi, and IIe have a bug, documented in
the UltraSPARC-IIi User's Manual, Appendix K, Erratum 51

The long and short of it is that if the IMU unit misses
on a branch or jmpl, and there is a store buffer synchronizing
membar in the delay slot, the chip can stop fetching instructions.

If interrupts are enabled or some other trap is enabled, the
chip will unwedge itself, but performance will suffer.

We already had a workaround for this bug in a few spots, but
it's better to have the entire tree sanitized for this rule.

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2005-06-27 15:42:04 -07:00
parent 020f46a39e
commit b445e26cbf
15 changed files with 172 additions and 111 deletions

View File

@ -271,8 +271,9 @@ cplus_fptrap_insn_1:
fmuld %f0, %f2, %f26 fmuld %f0, %f2, %f26
faddd %f0, %f2, %f28 faddd %f0, %f2, %f28
fmuld %f0, %f2, %f30 fmuld %f0, %f2, %f30
membar #Sync
b,pt %xcc, fpdis_exit b,pt %xcc, fpdis_exit
membar #Sync nop
2: andcc %g5, FPRS_DU, %g0 2: andcc %g5, FPRS_DU, %g0
bne,pt %icc, 3f bne,pt %icc, 3f
fzero %f32 fzero %f32
@ -301,8 +302,9 @@ cplus_fptrap_insn_2:
fmuld %f32, %f34, %f58 fmuld %f32, %f34, %f58
faddd %f32, %f34, %f60 faddd %f32, %f34, %f60
fmuld %f32, %f34, %f62 fmuld %f32, %f34, %f62
membar #Sync
ba,pt %xcc, fpdis_exit ba,pt %xcc, fpdis_exit
membar #Sync nop
3: mov SECONDARY_CONTEXT, %g3 3: mov SECONDARY_CONTEXT, %g3
add %g6, TI_FPREGS, %g1 add %g6, TI_FPREGS, %g1
ldxa [%g3] ASI_DMMU, %g5 ldxa [%g3] ASI_DMMU, %g5

View File

@ -32,8 +32,9 @@ static __inline__ int __sem_update_count(struct semaphore *sem, int incr)
" add %1, %4, %1\n" " add %1, %4, %1\n"
" cas [%3], %0, %1\n" " cas [%3], %0, %1\n"
" cmp %0, %1\n" " cmp %0, %1\n"
" membar #StoreLoad | #StoreStore\n"
" bne,pn %%icc, 1b\n" " bne,pn %%icc, 1b\n"
" membar #StoreLoad | #StoreStore\n" " nop\n"
: "=&r" (old_count), "=&r" (tmp), "=m" (sem->count) : "=&r" (old_count), "=&r" (tmp), "=m" (sem->count)
: "r" (&sem->count), "r" (incr), "m" (sem->count) : "r" (&sem->count), "r" (incr), "m" (sem->count)
: "cc"); : "cc");
@ -71,8 +72,9 @@ void up(struct semaphore *sem)
" cmp %%g1, %%g7\n" " cmp %%g1, %%g7\n"
" bne,pn %%icc, 1b\n" " bne,pn %%icc, 1b\n"
" addcc %%g7, 1, %%g0\n" " addcc %%g7, 1, %%g0\n"
" membar #StoreLoad | #StoreStore\n"
" ble,pn %%icc, 3f\n" " ble,pn %%icc, 3f\n"
" membar #StoreLoad | #StoreStore\n" " nop\n"
"2:\n" "2:\n"
" .subsection 2\n" " .subsection 2\n"
"3: mov %0, %%g1\n" "3: mov %0, %%g1\n"
@ -128,8 +130,9 @@ void __sched down(struct semaphore *sem)
" cmp %%g1, %%g7\n" " cmp %%g1, %%g7\n"
" bne,pn %%icc, 1b\n" " bne,pn %%icc, 1b\n"
" cmp %%g7, 1\n" " cmp %%g7, 1\n"
" membar #StoreLoad | #StoreStore\n"
" bl,pn %%icc, 3f\n" " bl,pn %%icc, 3f\n"
" membar #StoreLoad | #StoreStore\n" " nop\n"
"2:\n" "2:\n"
" .subsection 2\n" " .subsection 2\n"
"3: mov %0, %%g1\n" "3: mov %0, %%g1\n"
@ -233,8 +236,9 @@ int __sched down_interruptible(struct semaphore *sem)
" cmp %%g1, %%g7\n" " cmp %%g1, %%g7\n"
" bne,pn %%icc, 1b\n" " bne,pn %%icc, 1b\n"
" cmp %%g7, 1\n" " cmp %%g7, 1\n"
" membar #StoreLoad | #StoreStore\n"
" bl,pn %%icc, 3f\n" " bl,pn %%icc, 3f\n"
" membar #StoreLoad | #StoreStore\n" " nop\n"
"2:\n" "2:\n"
" .subsection 2\n" " .subsection 2\n"
"3: mov %2, %%g1\n" "3: mov %2, %%g1\n"

View File

@ -98,8 +98,9 @@ startup_continue:
sethi %hi(prom_entry_lock), %g2 sethi %hi(prom_entry_lock), %g2
1: ldstub [%g2 + %lo(prom_entry_lock)], %g1 1: ldstub [%g2 + %lo(prom_entry_lock)], %g1
membar #StoreLoad | #StoreStore
brnz,pn %g1, 1b brnz,pn %g1, 1b
membar #StoreLoad | #StoreStore nop
sethi %hi(p1275buf), %g2 sethi %hi(p1275buf), %g2
or %g2, %lo(p1275buf), %g2 or %g2, %lo(p1275buf), %g2

View File

@ -87,14 +87,17 @@
#define LOOP_CHUNK3(src, dest, len, branch_dest) \ #define LOOP_CHUNK3(src, dest, len, branch_dest) \
MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest) MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest)
#define DO_SYNC membar #Sync;
#define STORE_SYNC(dest, fsrc) \ #define STORE_SYNC(dest, fsrc) \
EX_ST(STORE_BLK(%fsrc, %dest)); \ EX_ST(STORE_BLK(%fsrc, %dest)); \
add %dest, 0x40, %dest; add %dest, 0x40, %dest; \
DO_SYNC
#define STORE_JUMP(dest, fsrc, target) \ #define STORE_JUMP(dest, fsrc, target) \
EX_ST(STORE_BLK(%fsrc, %dest)); \ EX_ST(STORE_BLK(%fsrc, %dest)); \
add %dest, 0x40, %dest; \ add %dest, 0x40, %dest; \
ba,pt %xcc, target; ba,pt %xcc, target; \
nop;
#define FINISH_VISCHUNK(dest, f0, f1, left) \ #define FINISH_VISCHUNK(dest, f0, f1, left) \
subcc %left, 8, %left;\ subcc %left, 8, %left;\
@ -239,17 +242,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
ba,pt %xcc, 1b+4 ba,pt %xcc, 1b+4
faligndata %f0, %f2, %f48 faligndata %f0, %f2, %f48
1: FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) 1: FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)
STORE_SYNC(o0, f48) membar #Sync STORE_SYNC(o0, f48)
FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)
STORE_JUMP(o0, f48, 40f) membar #Sync STORE_JUMP(o0, f48, 40f)
2: FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) 2: FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)
STORE_SYNC(o0, f48) membar #Sync STORE_SYNC(o0, f48)
FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)
STORE_JUMP(o0, f48, 48f) membar #Sync STORE_JUMP(o0, f48, 48f)
3: FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) 3: FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)
STORE_SYNC(o0, f48) membar #Sync STORE_SYNC(o0, f48)
FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)
STORE_JUMP(o0, f48, 56f) membar #Sync STORE_JUMP(o0, f48, 56f)
1: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) 1: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)
LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@ -260,17 +263,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
ba,pt %xcc, 1b+4 ba,pt %xcc, 1b+4
faligndata %f2, %f4, %f48 faligndata %f2, %f4, %f48
1: FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) 1: FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)
STORE_SYNC(o0, f48) membar #Sync STORE_SYNC(o0, f48)
FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)
STORE_JUMP(o0, f48, 41f) membar #Sync STORE_JUMP(o0, f48, 41f)
2: FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) 2: FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)
STORE_SYNC(o0, f48) membar #Sync STORE_SYNC(o0, f48)
FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)
STORE_JUMP(o0, f48, 49f) membar #Sync STORE_JUMP(o0, f48, 49f)
3: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) 3: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)
STORE_SYNC(o0, f48) membar #Sync STORE_SYNC(o0, f48)
FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)
STORE_JUMP(o0, f48, 57f) membar #Sync STORE_JUMP(o0, f48, 57f)
1: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) 1: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)
LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@ -281,17 +284,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
ba,pt %xcc, 1b+4 ba,pt %xcc, 1b+4
faligndata %f4, %f6, %f48 faligndata %f4, %f6, %f48
1: FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) 1: FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)
STORE_SYNC(o0, f48) membar #Sync STORE_SYNC(o0, f48)
FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)
STORE_JUMP(o0, f48, 42f) membar #Sync STORE_JUMP(o0, f48, 42f)
2: FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) 2: FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)
STORE_SYNC(o0, f48) membar #Sync STORE_SYNC(o0, f48)
FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)
STORE_JUMP(o0, f48, 50f) membar #Sync STORE_JUMP(o0, f48, 50f)
3: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) 3: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)
STORE_SYNC(o0, f48) membar #Sync STORE_SYNC(o0, f48)
FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)
STORE_JUMP(o0, f48, 58f) membar #Sync STORE_JUMP(o0, f48, 58f)
1: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) 1: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)
LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@ -302,17 +305,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
ba,pt %xcc, 1b+4 ba,pt %xcc, 1b+4
faligndata %f6, %f8, %f48 faligndata %f6, %f8, %f48
1: FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) 1: FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)
STORE_SYNC(o0, f48) membar #Sync STORE_SYNC(o0, f48)
FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)
STORE_JUMP(o0, f48, 43f) membar #Sync STORE_JUMP(o0, f48, 43f)
2: FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) 2: FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)
STORE_SYNC(o0, f48) membar #Sync STORE_SYNC(o0, f48)
FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)
STORE_JUMP(o0, f48, 51f) membar #Sync STORE_JUMP(o0, f48, 51f)
3: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) 3: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)
STORE_SYNC(o0, f48) membar #Sync STORE_SYNC(o0, f48)
FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)
STORE_JUMP(o0, f48, 59f) membar #Sync STORE_JUMP(o0, f48, 59f)
1: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) 1: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)
LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@ -323,17 +326,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
ba,pt %xcc, 1b+4 ba,pt %xcc, 1b+4
faligndata %f8, %f10, %f48 faligndata %f8, %f10, %f48
1: FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) 1: FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)
STORE_SYNC(o0, f48) membar #Sync STORE_SYNC(o0, f48)
FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)
STORE_JUMP(o0, f48, 44f) membar #Sync STORE_JUMP(o0, f48, 44f)
2: FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) 2: FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)
STORE_SYNC(o0, f48) membar #Sync STORE_SYNC(o0, f48)
FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)
STORE_JUMP(o0, f48, 52f) membar #Sync STORE_JUMP(o0, f48, 52f)
3: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) 3: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)
STORE_SYNC(o0, f48) membar #Sync STORE_SYNC(o0, f48)
FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)
STORE_JUMP(o0, f48, 60f) membar #Sync STORE_JUMP(o0, f48, 60f)
1: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) 1: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)
LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@ -344,17 +347,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
ba,pt %xcc, 1b+4 ba,pt %xcc, 1b+4
faligndata %f10, %f12, %f48 faligndata %f10, %f12, %f48
1: FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) 1: FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)
STORE_SYNC(o0, f48) membar #Sync STORE_SYNC(o0, f48)
FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10)
STORE_JUMP(o0, f48, 45f) membar #Sync STORE_JUMP(o0, f48, 45f)
2: FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) 2: FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10)
STORE_SYNC(o0, f48) membar #Sync STORE_SYNC(o0, f48)
FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)
STORE_JUMP(o0, f48, 53f) membar #Sync STORE_JUMP(o0, f48, 53f)
3: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) 3: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)
STORE_SYNC(o0, f48) membar #Sync STORE_SYNC(o0, f48)
FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)
STORE_JUMP(o0, f48, 61f) membar #Sync STORE_JUMP(o0, f48, 61f)
1: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) 1: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)
LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@ -365,17 +368,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
ba,pt %xcc, 1b+4 ba,pt %xcc, 1b+4
faligndata %f12, %f14, %f48 faligndata %f12, %f14, %f48
1: FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) 1: FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)
STORE_SYNC(o0, f48) membar #Sync STORE_SYNC(o0, f48)
FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12)
STORE_JUMP(o0, f48, 46f) membar #Sync STORE_JUMP(o0, f48, 46f)
2: FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) 2: FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12)
STORE_SYNC(o0, f48) membar #Sync STORE_SYNC(o0, f48)
FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)
STORE_JUMP(o0, f48, 54f) membar #Sync STORE_JUMP(o0, f48, 54f)
3: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) 3: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)
STORE_SYNC(o0, f48) membar #Sync STORE_SYNC(o0, f48)
FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)
STORE_JUMP(o0, f48, 62f) membar #Sync STORE_JUMP(o0, f48, 62f)
1: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) 1: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)
LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@ -386,17 +389,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
ba,pt %xcc, 1b+4 ba,pt %xcc, 1b+4
faligndata %f14, %f16, %f48 faligndata %f14, %f16, %f48
1: FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) 1: FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
STORE_SYNC(o0, f48) membar #Sync STORE_SYNC(o0, f48)
FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14)
STORE_JUMP(o0, f48, 47f) membar #Sync STORE_JUMP(o0, f48, 47f)
2: FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) 2: FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14)
STORE_SYNC(o0, f48) membar #Sync STORE_SYNC(o0, f48)
FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)
STORE_JUMP(o0, f48, 55f) membar #Sync STORE_JUMP(o0, f48, 55f)
3: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) 3: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)
STORE_SYNC(o0, f48) membar #Sync STORE_SYNC(o0, f48)
FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
STORE_JUMP(o0, f48, 63f) membar #Sync STORE_JUMP(o0, f48, 63f)
40: FINISH_VISCHUNK(o0, f0, f2, g3) 40: FINISH_VISCHUNK(o0, f0, f2, g3)
41: FINISH_VISCHUNK(o0, f2, f4, g3) 41: FINISH_VISCHUNK(o0, f2, f4, g3)

View File

@ -72,7 +72,11 @@ vis1: ldub [%g6 + TI_FPSAVED], %g3
stda %f48, [%g3 + %g1] ASI_BLK_P stda %f48, [%g3 + %g1] ASI_BLK_P
5: membar #Sync 5: membar #Sync
jmpl %g7 + %g0, %g0 ba,pt %xcc, 80f
nop
.align 32
80: jmpl %g7 + %g0, %g0
nop nop
6: ldub [%g3 + TI_FPSAVED], %o5 6: ldub [%g3 + TI_FPSAVED], %o5
@ -87,8 +91,11 @@ vis1: ldub [%g6 + TI_FPSAVED], %g3
stda %f32, [%g2 + %g1] ASI_BLK_P stda %f32, [%g2 + %g1] ASI_BLK_P
stda %f48, [%g3 + %g1] ASI_BLK_P stda %f48, [%g3 + %g1] ASI_BLK_P
membar #Sync membar #Sync
jmpl %g7 + %g0, %g0 ba,pt %xcc, 80f
nop
.align 32
80: jmpl %g7 + %g0, %g0
nop nop
.align 32 .align 32
@ -126,6 +133,10 @@ VISenterhalf:
stda %f0, [%g2 + %g1] ASI_BLK_P stda %f0, [%g2 + %g1] ASI_BLK_P
stda %f16, [%g3 + %g1] ASI_BLK_P stda %f16, [%g3 + %g1] ASI_BLK_P
membar #Sync membar #Sync
ba,pt %xcc, 4f
nop
.align 32
4: and %o5, FPRS_DU, %o5 4: and %o5, FPRS_DU, %o5
jmpl %g7 + %g0, %g0 jmpl %g7 + %g0, %g0
wr %o5, FPRS_FEF, %fprs wr %o5, FPRS_FEF, %fprs

View File

@ -7,18 +7,6 @@
#include <linux/config.h> #include <linux/config.h>
#include <asm/asi.h> #include <asm/asi.h>
/* On SMP we need to use memory barriers to ensure
* correct memory operation ordering, nop these out
* for uniprocessor.
*/
#ifdef CONFIG_SMP
#define ATOMIC_PRE_BARRIER membar #StoreLoad | #LoadLoad
#define ATOMIC_POST_BARRIER membar #StoreLoad | #StoreStore
#else
#define ATOMIC_PRE_BARRIER nop
#define ATOMIC_POST_BARRIER nop
#endif
.text .text
/* Two versions of the atomic routines, one that /* Two versions of the atomic routines, one that
@ -52,6 +40,24 @@ atomic_sub: /* %o0 = decrement, %o1 = atomic_ptr */
nop nop
.size atomic_sub, .-atomic_sub .size atomic_sub, .-atomic_sub
/* On SMP we need to use memory barriers to ensure
* correct memory operation ordering, nop these out
* for uniprocessor.
*/
#ifdef CONFIG_SMP
#define ATOMIC_PRE_BARRIER membar #StoreLoad | #LoadLoad;
#define ATOMIC_POST_BARRIER \
ba,pt %xcc, 80b; \
membar #StoreLoad | #StoreStore
80: retl
nop
#else
#define ATOMIC_PRE_BARRIER
#define ATOMIC_POST_BARRIER
#endif
.globl atomic_add_ret .globl atomic_add_ret
.type atomic_add_ret,#function .type atomic_add_ret,#function
atomic_add_ret: /* %o0 = increment, %o1 = atomic_ptr */ atomic_add_ret: /* %o0 = increment, %o1 = atomic_ptr */
@ -62,9 +68,10 @@ atomic_add_ret: /* %o0 = increment, %o1 = atomic_ptr */
cmp %g1, %g7 cmp %g1, %g7
bne,pn %icc, 1b bne,pn %icc, 1b
add %g7, %o0, %g7 add %g7, %o0, %g7
sra %g7, 0, %o0
ATOMIC_POST_BARRIER ATOMIC_POST_BARRIER
retl retl
sra %g7, 0, %o0 nop
.size atomic_add_ret, .-atomic_add_ret .size atomic_add_ret, .-atomic_add_ret
.globl atomic_sub_ret .globl atomic_sub_ret
@ -77,9 +84,10 @@ atomic_sub_ret: /* %o0 = decrement, %o1 = atomic_ptr */
cmp %g1, %g7 cmp %g1, %g7
bne,pn %icc, 1b bne,pn %icc, 1b
sub %g7, %o0, %g7 sub %g7, %o0, %g7
sra %g7, 0, %o0
ATOMIC_POST_BARRIER ATOMIC_POST_BARRIER
retl retl
sra %g7, 0, %o0 nop
.size atomic_sub_ret, .-atomic_sub_ret .size atomic_sub_ret, .-atomic_sub_ret
.globl atomic64_add .globl atomic64_add
@ -118,9 +126,10 @@ atomic64_add_ret: /* %o0 = increment, %o1 = atomic_ptr */
cmp %g1, %g7 cmp %g1, %g7
bne,pn %xcc, 1b bne,pn %xcc, 1b
add %g7, %o0, %g7 add %g7, %o0, %g7
mov %g7, %o0
ATOMIC_POST_BARRIER ATOMIC_POST_BARRIER
retl retl
mov %g7, %o0 nop
.size atomic64_add_ret, .-atomic64_add_ret .size atomic64_add_ret, .-atomic64_add_ret
.globl atomic64_sub_ret .globl atomic64_sub_ret
@ -133,7 +142,8 @@ atomic64_sub_ret: /* %o0 = decrement, %o1 = atomic_ptr */
cmp %g1, %g7 cmp %g1, %g7
bne,pn %xcc, 1b bne,pn %xcc, 1b
sub %g7, %o0, %g7 sub %g7, %o0, %g7
mov %g7, %o0
ATOMIC_POST_BARRIER ATOMIC_POST_BARRIER
retl retl
mov %g7, %o0 nop
.size atomic64_sub_ret, .-atomic64_sub_ret .size atomic64_sub_ret, .-atomic64_sub_ret

View File

@ -7,19 +7,25 @@
#include <linux/config.h> #include <linux/config.h>
#include <asm/asi.h> #include <asm/asi.h>
.text
/* On SMP we need to use memory barriers to ensure /* On SMP we need to use memory barriers to ensure
* correct memory operation ordering, nop these out * correct memory operation ordering, nop these out
* for uniprocessor. * for uniprocessor.
*/ */
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
#define BITOP_PRE_BARRIER membar #StoreLoad | #LoadLoad #define BITOP_PRE_BARRIER membar #StoreLoad | #LoadLoad
#define BITOP_POST_BARRIER membar #StoreLoad | #StoreStore #define BITOP_POST_BARRIER \
#else ba,pt %xcc, 80b; \
#define BITOP_PRE_BARRIER nop membar #StoreLoad | #StoreStore
#define BITOP_POST_BARRIER nop
#endif
.text 80: retl
nop
#else
#define BITOP_PRE_BARRIER
#define BITOP_POST_BARRIER
#endif
.globl test_and_set_bit .globl test_and_set_bit
.type test_and_set_bit,#function .type test_and_set_bit,#function
@ -37,10 +43,11 @@ test_and_set_bit: /* %o0=nr, %o1=addr */
cmp %g7, %g1 cmp %g7, %g1
bne,pn %xcc, 1b bne,pn %xcc, 1b
and %g7, %o2, %g2 and %g7, %o2, %g2
BITOP_POST_BARRIER
clr %o0 clr %o0
movrne %g2, 1, %o0
BITOP_POST_BARRIER
retl retl
movrne %g2, 1, %o0 nop
.size test_and_set_bit, .-test_and_set_bit .size test_and_set_bit, .-test_and_set_bit
.globl test_and_clear_bit .globl test_and_clear_bit
@ -59,10 +66,11 @@ test_and_clear_bit: /* %o0=nr, %o1=addr */
cmp %g7, %g1 cmp %g7, %g1
bne,pn %xcc, 1b bne,pn %xcc, 1b
and %g7, %o2, %g2 and %g7, %o2, %g2
BITOP_POST_BARRIER
clr %o0 clr %o0
movrne %g2, 1, %o0
BITOP_POST_BARRIER
retl retl
movrne %g2, 1, %o0 nop
.size test_and_clear_bit, .-test_and_clear_bit .size test_and_clear_bit, .-test_and_clear_bit
.globl test_and_change_bit .globl test_and_change_bit
@ -81,10 +89,11 @@ test_and_change_bit: /* %o0=nr, %o1=addr */
cmp %g7, %g1 cmp %g7, %g1
bne,pn %xcc, 1b bne,pn %xcc, 1b
and %g7, %o2, %g2 and %g7, %o2, %g2
BITOP_POST_BARRIER
clr %o0 clr %o0
movrne %g2, 1, %o0
BITOP_POST_BARRIER
retl retl
movrne %g2, 1, %o0 nop
.size test_and_change_bit, .-test_and_change_bit .size test_and_change_bit, .-test_and_change_bit
.globl set_bit .globl set_bit

View File

@ -252,8 +252,9 @@ wlock_again:
" andn %%g1, %%g3, %%g7\n" " andn %%g1, %%g3, %%g7\n"
" casx [%0], %%g1, %%g7\n" " casx [%0], %%g1, %%g7\n"
" cmp %%g1, %%g7\n" " cmp %%g1, %%g7\n"
" membar #StoreLoad | #StoreStore\n"
" bne,pn %%xcc, 1b\n" " bne,pn %%xcc, 1b\n"
" membar #StoreLoad | #StoreStore" " nop"
: /* no outputs */ : /* no outputs */
: "r" (&(rw->lock)) : "r" (&(rw->lock))
: "g3", "g1", "g7", "cc", "memory"); : "g3", "g1", "g7", "cc", "memory");
@ -351,8 +352,9 @@ int _do_write_trylock (rwlock_t *rw, char *str)
" andn %%g1, %%g3, %%g7\n" " andn %%g1, %%g3, %%g7\n"
" casx [%0], %%g1, %%g7\n" " casx [%0], %%g1, %%g7\n"
" cmp %%g1, %%g7\n" " cmp %%g1, %%g7\n"
" membar #StoreLoad | #StoreStore\n"
" bne,pn %%xcc, 1b\n" " bne,pn %%xcc, 1b\n"
" membar #StoreLoad | #StoreStore" " nop"
: /* no outputs */ : /* no outputs */
: "r" (&(rw->lock)) : "r" (&(rw->lock))
: "g3", "g1", "g7", "cc", "memory"); : "g3", "g1", "g7", "cc", "memory");

View File

@ -48,8 +48,9 @@ start_to_zero:
#endif #endif
to_zero: to_zero:
ldstub [%o1], %g3 ldstub [%o1], %g3
membar #StoreLoad | #StoreStore
brnz,pn %g3, spin_on_lock brnz,pn %g3, spin_on_lock
membar #StoreLoad | #StoreStore nop
loop2: cas [%o0], %g2, %g7 /* ASSERT(g7 == 0) */ loop2: cas [%o0], %g2, %g7 /* ASSERT(g7 == 0) */
cmp %g2, %g7 cmp %g2, %g7
@ -71,8 +72,9 @@ loop2: cas [%o0], %g2, %g7 /* ASSERT(g7 == 0) */
nop nop
spin_on_lock: spin_on_lock:
ldub [%o1], %g3 ldub [%o1], %g3
membar #LoadLoad
brnz,pt %g3, spin_on_lock brnz,pt %g3, spin_on_lock
membar #LoadLoad nop
ba,pt %xcc, to_zero ba,pt %xcc, to_zero
nop nop
nop nop

View File

@ -17,8 +17,9 @@ __down_read:
bne,pn %icc, 1b bne,pn %icc, 1b
add %g7, 1, %g7 add %g7, 1, %g7
cmp %g7, 0 cmp %g7, 0
membar #StoreLoad | #StoreStore
bl,pn %icc, 3f bl,pn %icc, 3f
membar #StoreLoad | #StoreStore nop
2: 2:
retl retl
nop nop
@ -57,8 +58,9 @@ __down_write:
cmp %g3, %g7 cmp %g3, %g7
bne,pn %icc, 1b bne,pn %icc, 1b
cmp %g7, 0 cmp %g7, 0
membar #StoreLoad | #StoreStore
bne,pn %icc, 3f bne,pn %icc, 3f
membar #StoreLoad | #StoreStore nop
2: retl 2: retl
nop nop
3: 3:
@ -97,8 +99,9 @@ __up_read:
cmp %g1, %g7 cmp %g1, %g7
bne,pn %icc, 1b bne,pn %icc, 1b
cmp %g7, 0 cmp %g7, 0
membar #StoreLoad | #StoreStore
bl,pn %icc, 3f bl,pn %icc, 3f
membar #StoreLoad | #StoreStore nop
2: retl 2: retl
nop nop
3: sethi %hi(RWSEM_ACTIVE_MASK), %g1 3: sethi %hi(RWSEM_ACTIVE_MASK), %g1
@ -126,8 +129,9 @@ __up_write:
bne,pn %icc, 1b bne,pn %icc, 1b
sub %g7, %g1, %g7 sub %g7, %g1, %g7
cmp %g7, 0 cmp %g7, 0
membar #StoreLoad | #StoreStore
bl,pn %icc, 3f bl,pn %icc, 3f
membar #StoreLoad | #StoreStore nop
2: 2:
retl retl
nop nop
@ -151,8 +155,9 @@ __downgrade_write:
bne,pn %icc, 1b bne,pn %icc, 1b
sub %g7, %g1, %g7 sub %g7, %g1, %g7
cmp %g7, 0 cmp %g7, 0
membar #StoreLoad | #StoreStore
bl,pn %icc, 3f bl,pn %icc, 3f
membar #StoreLoad | #StoreStore nop
2: 2:
retl retl
nop nop

View File

@ -136,8 +136,9 @@ static __inline__ void set_dcache_dirty(struct page *page, int this_cpu)
"or %%g1, %0, %%g1\n\t" "or %%g1, %0, %%g1\n\t"
"casx [%2], %%g7, %%g1\n\t" "casx [%2], %%g7, %%g1\n\t"
"cmp %%g7, %%g1\n\t" "cmp %%g7, %%g1\n\t"
"membar #StoreLoad | #StoreStore\n\t"
"bne,pn %%xcc, 1b\n\t" "bne,pn %%xcc, 1b\n\t"
" membar #StoreLoad | #StoreStore" " nop"
: /* no outputs */ : /* no outputs */
: "r" (mask), "r" (non_cpu_bits), "r" (&page->flags) : "r" (mask), "r" (non_cpu_bits), "r" (&page->flags)
: "g1", "g7"); : "g1", "g7");
@ -157,8 +158,9 @@ static __inline__ void clear_dcache_dirty_cpu(struct page *page, unsigned long c
" andn %%g7, %1, %%g1\n\t" " andn %%g7, %1, %%g1\n\t"
"casx [%2], %%g7, %%g1\n\t" "casx [%2], %%g7, %%g1\n\t"
"cmp %%g7, %%g1\n\t" "cmp %%g7, %%g1\n\t"
"membar #StoreLoad | #StoreStore\n\t"
"bne,pn %%xcc, 1b\n\t" "bne,pn %%xcc, 1b\n\t"
" membar #StoreLoad | #StoreStore\n" " nop\n"
"2:" "2:"
: /* no outputs */ : /* no outputs */
: "r" (cpu), "r" (mask), "r" (&page->flags), : "r" (cpu), "r" (mask), "r" (&page->flags),

View File

@ -266,8 +266,9 @@ __cheetah_flush_tlb_pending: /* 22 insns */
andn %o3, 1, %o3 andn %o3, 1, %o3
stxa %g0, [%o3] ASI_IMMU_DEMAP stxa %g0, [%o3] ASI_IMMU_DEMAP
2: stxa %g0, [%o3] ASI_DMMU_DEMAP 2: stxa %g0, [%o3] ASI_DMMU_DEMAP
membar #Sync
brnz,pt %o1, 1b brnz,pt %o1, 1b
membar #Sync nop
stxa %g2, [%o4] ASI_DMMU stxa %g2, [%o4] ASI_DMMU
flush %g6 flush %g6
wrpr %g0, 0, %tl wrpr %g0, 0, %tl

View File

@ -55,8 +55,9 @@ static __inline__ int rwsem_atomic_update(int delta, struct rw_semaphore *sem)
"add %%g1, %1, %%g7\n\t" "add %%g1, %1, %%g7\n\t"
"cas [%2], %%g1, %%g7\n\t" "cas [%2], %%g1, %%g7\n\t"
"cmp %%g1, %%g7\n\t" "cmp %%g1, %%g7\n\t"
"membar #StoreLoad | #StoreStore\n\t"
"bne,pn %%icc, 1b\n\t" "bne,pn %%icc, 1b\n\t"
" membar #StoreLoad | #StoreStore\n\t" " nop\n\t"
"mov %%g7, %0\n\t" "mov %%g7, %0\n\t"
: "=&r" (tmp) : "=&r" (tmp)
: "0" (tmp), "r" (sem) : "0" (tmp), "r" (sem)

View File

@ -52,12 +52,14 @@ static inline void _raw_spin_lock(spinlock_t *lock)
__asm__ __volatile__( __asm__ __volatile__(
"1: ldstub [%1], %0\n" "1: ldstub [%1], %0\n"
" membar #StoreLoad | #StoreStore\n"
" brnz,pn %0, 2f\n" " brnz,pn %0, 2f\n"
" membar #StoreLoad | #StoreStore\n" " nop\n"
" .subsection 2\n" " .subsection 2\n"
"2: ldub [%1], %0\n" "2: ldub [%1], %0\n"
" membar #LoadLoad\n"
" brnz,pt %0, 2b\n" " brnz,pt %0, 2b\n"
" membar #LoadLoad\n" " nop\n"
" ba,a,pt %%xcc, 1b\n" " ba,a,pt %%xcc, 1b\n"
" .previous" " .previous"
: "=&r" (tmp) : "=&r" (tmp)
@ -95,16 +97,18 @@ static inline void _raw_spin_lock_flags(spinlock_t *lock, unsigned long flags)
__asm__ __volatile__( __asm__ __volatile__(
"1: ldstub [%2], %0\n" "1: ldstub [%2], %0\n"
" brnz,pn %0, 2f\n"
" membar #StoreLoad | #StoreStore\n" " membar #StoreLoad | #StoreStore\n"
" brnz,pn %0, 2f\n"
" nop\n"
" .subsection 2\n" " .subsection 2\n"
"2: rdpr %%pil, %1\n" "2: rdpr %%pil, %1\n"
" wrpr %3, %%pil\n" " wrpr %3, %%pil\n"
"3: ldub [%2], %0\n" "3: ldub [%2], %0\n"
" brnz,pt %0, 3b\n"
" membar #LoadLoad\n" " membar #LoadLoad\n"
" brnz,pt %0, 3b\n"
" nop\n"
" ba,pt %%xcc, 1b\n" " ba,pt %%xcc, 1b\n"
" wrpr %1, %%pil\n" " wrpr %1, %%pil\n"
" .previous" " .previous"
: "=&r" (tmp1), "=&r" (tmp2) : "=&r" (tmp1), "=&r" (tmp2)
: "r"(lock), "r"(flags) : "r"(lock), "r"(flags)
@ -162,12 +166,14 @@ static void inline __read_lock(rwlock_t *lock)
"4: add %0, 1, %1\n" "4: add %0, 1, %1\n"
" cas [%2], %0, %1\n" " cas [%2], %0, %1\n"
" cmp %0, %1\n" " cmp %0, %1\n"
" membar #StoreLoad | #StoreStore\n"
" bne,pn %%icc, 1b\n" " bne,pn %%icc, 1b\n"
" membar #StoreLoad | #StoreStore\n" " nop\n"
" .subsection 2\n" " .subsection 2\n"
"2: ldsw [%2], %0\n" "2: ldsw [%2], %0\n"
" membar #LoadLoad\n"
" brlz,pt %0, 2b\n" " brlz,pt %0, 2b\n"
" membar #LoadLoad\n" " nop\n"
" ba,a,pt %%xcc, 4b\n" " ba,a,pt %%xcc, 4b\n"
" .previous" " .previous"
: "=&r" (tmp1), "=&r" (tmp2) : "=&r" (tmp1), "=&r" (tmp2)
@ -204,12 +210,14 @@ static void inline __write_lock(rwlock_t *lock)
"4: or %0, %3, %1\n" "4: or %0, %3, %1\n"
" cas [%2], %0, %1\n" " cas [%2], %0, %1\n"
" cmp %0, %1\n" " cmp %0, %1\n"
" membar #StoreLoad | #StoreStore\n"
" bne,pn %%icc, 1b\n" " bne,pn %%icc, 1b\n"
" membar #StoreLoad | #StoreStore\n" " nop\n"
" .subsection 2\n" " .subsection 2\n"
"2: lduw [%2], %0\n" "2: lduw [%2], %0\n"
" membar #LoadLoad\n"
" brnz,pt %0, 2b\n" " brnz,pt %0, 2b\n"
" membar #LoadLoad\n" " nop\n"
" ba,a,pt %%xcc, 4b\n" " ba,a,pt %%xcc, 4b\n"
" .previous" " .previous"
: "=&r" (tmp1), "=&r" (tmp2) : "=&r" (tmp1), "=&r" (tmp2)
@ -240,8 +248,9 @@ static int inline __write_trylock(rwlock_t *lock)
" or %0, %4, %1\n" " or %0, %4, %1\n"
" cas [%3], %0, %1\n" " cas [%3], %0, %1\n"
" cmp %0, %1\n" " cmp %0, %1\n"
" membar #StoreLoad | #StoreStore\n"
" bne,pn %%icc, 1b\n" " bne,pn %%icc, 1b\n"
" membar #StoreLoad | #StoreStore\n" " nop\n"
" mov 1, %2\n" " mov 1, %2\n"
"2:" "2:"
: "=&r" (tmp1), "=&r" (tmp2), "=&r" (result) : "=&r" (tmp1), "=&r" (tmp2), "=&r" (result)

View File

@ -111,7 +111,6 @@ static __inline__ void spitfire_put_dcache_tag(unsigned long addr, unsigned long
"membar #Sync" "membar #Sync"
: /* No outputs */ : /* No outputs */
: "r" (tag), "r" (addr), "i" (ASI_DCACHE_TAG)); : "r" (tag), "r" (addr), "i" (ASI_DCACHE_TAG));
__asm__ __volatile__ ("membar #Sync" : : : "memory");
} }
/* The instruction cache lines are flushed with this, but note that /* The instruction cache lines are flushed with this, but note that