Commit 4d803fcdcd97dd346d4b39c3b76e5879cead8a31

Authored by David S. Miller
1 parent 1b11d78cf8

[SPARC64]: Inline membar()'s again.

Since GCC has to emit a call and a delay slot to the
out-of-line "membar" routines in arch/sparc64/lib/mb.S
it is much better to just do the necessary predicted
branch inline instead as:

	ba,pt	%xcc, 1f
	 membar	#whatever
1:

instead of the current:

	call	membar_foo
	 dslot

because this way GCC is not required to allocate a stack
frame if the function can be a leaf function.

This also makes this bug fix easier to backport to 2.4.x

Signed-off-by: David S. Miller <davem@davemloft.net>

Showing 4 changed files with 43 additions and 90 deletions Side-by-side Diff

arch/sparc64/kernel/sparc64_ksyms.c
... ... @@ -403,13 +403,4 @@
403 403 EXPORT_SYMBOL(xor_vis_5);
404 404  
405 405 EXPORT_SYMBOL(prom_palette);
406   -
407   -/* memory barriers */
408   -EXPORT_SYMBOL(mb);
409   -EXPORT_SYMBOL(rmb);
410   -EXPORT_SYMBOL(wmb);
411   -EXPORT_SYMBOL(membar_storeload);
412   -EXPORT_SYMBOL(membar_storeload_storestore);
413   -EXPORT_SYMBOL(membar_storeload_loadload);
414   -EXPORT_SYMBOL(membar_storestore_loadstore);
arch/sparc64/lib/Makefile
... ... @@ -12,7 +12,7 @@
12 12 U1memcpy.o U1copy_from_user.o U1copy_to_user.o \
13 13 U3memcpy.o U3copy_from_user.o U3copy_to_user.o U3patch.o \
14 14 copy_in_user.o user_fixup.o memmove.o \
15   - mcount.o ipcsum.o rwsem.o xor.o find_bit.o delay.o mb.o
  15 + mcount.o ipcsum.o rwsem.o xor.o find_bit.o delay.o
16 16  
17 17 lib-$(CONFIG_DEBUG_SPINLOCK) += debuglocks.o
18 18 lib-$(CONFIG_HAVE_DEC_LOCK) += dec_and_lock.o
arch/sparc64/lib/mb.S
1   -/* mb.S: Out of line memory barriers.
2   - *
3   - * Copyright (C) 2005 David S. Miller (davem@davemloft.net)
4   - */
5   -
6   - /* These are here in an effort to more fully work around
7   - * Spitfire Errata #51. Essentially, if a memory barrier
8   - * occurs soon after a mispredicted branch, the chip can stop
9   - * executing instructions until a trap occurs. Therefore, if
10   - * interrupts are disabled, the chip can hang forever.
11   - *
12   - * It used to be believed that the memory barrier had to be
13   - * right in the delay slot, but a case has been traced
14   - * recently wherein the memory barrier was one instruction
15   - * after the branch delay slot and the chip still hung. The
16   - * offending sequence was the following in sym_wakeup_done()
17   - * of the sym53c8xx_2 driver:
18   - *
19   - * call sym_ccb_from_dsa, 0
20   - * movge %icc, 0, %l0
21   - * brz,pn %o0, .LL1303
22   - * mov %o0, %l2
23   - * membar #LoadLoad
24   - *
25   - * The branch has to be mispredicted for the bug to occur.
26   - * Therefore, we put the memory barrier explicitly into a
27   - * "branch always, predicted taken" delay slot to avoid the
28   - * problem case.
29   - */
30   -
31   - .text
32   -
33   -99: retl
34   - nop
35   -
36   - .globl mb
37   -mb: ba,pt %xcc, 99b
38   - membar #LoadLoad | #LoadStore | #StoreStore | #StoreLoad
39   - .size mb, .-mb
40   -
41   - .globl rmb
42   -rmb: ba,pt %xcc, 99b
43   - membar #LoadLoad
44   - .size rmb, .-rmb
45   -
46   - .globl wmb
47   -wmb: ba,pt %xcc, 99b
48   - membar #StoreStore
49   - .size wmb, .-wmb
50   -
51   - .globl membar_storeload
52   -membar_storeload:
53   - ba,pt %xcc, 99b
54   - membar #StoreLoad
55   - .size membar_storeload, .-membar_storeload
56   -
57   - .globl membar_storeload_storestore
58   -membar_storeload_storestore:
59   - ba,pt %xcc, 99b
60   - membar #StoreLoad | #StoreStore
61   - .size membar_storeload_storestore, .-membar_storeload_storestore
62   -
63   - .globl membar_storeload_loadload
64   -membar_storeload_loadload:
65   - ba,pt %xcc, 99b
66   - membar #StoreLoad | #LoadLoad
67   - .size membar_storeload_loadload, .-membar_storeload_loadload
68   -
69   - .globl membar_storestore_loadstore
70   -membar_storestore_loadstore:
71   - ba,pt %xcc, 99b
72   - membar #StoreStore | #LoadStore
73   - .size membar_storestore_loadstore, .-membar_storestore_loadstore
include/asm-sparc64/system.h
... ... @@ -28,13 +28,48 @@
28 28 #define ARCH_SUN4C_SUN4 0
29 29 #define ARCH_SUN4 0
30 30  
31   -extern void mb(void);
32   -extern void rmb(void);
33   -extern void wmb(void);
34   -extern void membar_storeload(void);
35   -extern void membar_storeload_storestore(void);
36   -extern void membar_storeload_loadload(void);
37   -extern void membar_storestore_loadstore(void);
  31 +/* These are here in an effort to more fully work around Spitfire Errata
  32 + * #51. Essentially, if a memory barrier occurs soon after a mispredicted
  33 + * branch, the chip can stop executing instructions until a trap occurs.
  34 + * Therefore, if interrupts are disabled, the chip can hang forever.
  35 + *
  36 + * It used to be believed that the memory barrier had to be right in the
  37 + * delay slot, but a case has been traced recently wherein the memory barrier
  38 + * was one instruction after the branch delay slot and the chip still hung.
  39 + * The offending sequence was the following in sym_wakeup_done() of the
  40 + * sym53c8xx_2 driver:
  41 + *
  42 + * call sym_ccb_from_dsa, 0
  43 + * movge %icc, 0, %l0
  44 + * brz,pn %o0, .LL1303
  45 + * mov %o0, %l2
  46 + * membar #LoadLoad
  47 + *
  48 + * The branch has to be mispredicted for the bug to occur. Therefore, we put
  49 + * the memory barrier explicitly into a "branch always, predicted taken"
  50 + * delay slot to avoid the problem case.
  51 + */
  52 +#define membar_safe(type) \
  53 +do { __asm__ __volatile__("ba,pt %%xcc, 1f\n\t" \
  54 + " membar " type "\n" \
  55 + "1:\n" \
  56 + : : : "memory"); \
  57 +} while (0)
  58 +
  59 +#define mb() \
  60 + membar_safe("#LoadLoad | #LoadStore | #StoreStore | #StoreLoad")
  61 +#define rmb() \
  62 + membar_safe("#LoadLoad")
  63 +#define wmb() \
  64 + membar_safe("#StoreStore")
  65 +#define membar_storeload() \
  66 + membar_safe("#StoreLoad")
  67 +#define membar_storeload_storestore() \
  68 + membar_safe("#StoreLoad | #StoreStore")
  69 +#define membar_storeload_loadload() \
  70 + membar_safe("#StoreLoad | #LoadLoad")
  71 +#define membar_storestore_loadstore() \
  72 + membar_safe("#StoreStore | #LoadStore")
38 73  
39 74 #endif
40 75