Commit 4d803fcdcd97dd346d4b39c3b76e5879cead8a31
1 parent
1b11d78cf8
Exists in
master
and in
4 other branches
[SPARC64]: Inline membar()'s again.
Since GCC has to emit a call and a delay slot to the out-of-line "membar" routines in arch/sparc64/lib/mb.S it is much better to just do the necessary predicted branch inline instead as: ba,pt %xcc, 1f membar #whatever 1: instead of the current: call membar_foo dslot because this way GCC is not required to allocate a stack frame if the function can be a leaf function. This also makes this bug fix easier to backport to 2.4.x Signed-off-by: David S. Miller <davem@davemloft.net>
Showing 4 changed files with 43 additions and 90 deletions Side-by-side Diff
arch/sparc64/kernel/sparc64_ksyms.c
| ... | ... | @@ -403,13 +403,4 @@ |
| 403 | 403 | EXPORT_SYMBOL(xor_vis_5); |
| 404 | 404 | |
| 405 | 405 | EXPORT_SYMBOL(prom_palette); |
| 406 | - | |
| 407 | -/* memory barriers */ | |
| 408 | -EXPORT_SYMBOL(mb); | |
| 409 | -EXPORT_SYMBOL(rmb); | |
| 410 | -EXPORT_SYMBOL(wmb); | |
| 411 | -EXPORT_SYMBOL(membar_storeload); | |
| 412 | -EXPORT_SYMBOL(membar_storeload_storestore); | |
| 413 | -EXPORT_SYMBOL(membar_storeload_loadload); | |
| 414 | -EXPORT_SYMBOL(membar_storestore_loadstore); |
arch/sparc64/lib/Makefile
| ... | ... | @@ -12,7 +12,7 @@ |
| 12 | 12 | U1memcpy.o U1copy_from_user.o U1copy_to_user.o \ |
| 13 | 13 | U3memcpy.o U3copy_from_user.o U3copy_to_user.o U3patch.o \ |
| 14 | 14 | copy_in_user.o user_fixup.o memmove.o \ |
| 15 | - mcount.o ipcsum.o rwsem.o xor.o find_bit.o delay.o mb.o | |
| 15 | + mcount.o ipcsum.o rwsem.o xor.o find_bit.o delay.o | |
| 16 | 16 | |
| 17 | 17 | lib-$(CONFIG_DEBUG_SPINLOCK) += debuglocks.o |
| 18 | 18 | lib-$(CONFIG_HAVE_DEC_LOCK) += dec_and_lock.o |
arch/sparc64/lib/mb.S
| 1 | -/* mb.S: Out of line memory barriers. | |
| 2 | - * | |
| 3 | - * Copyright (C) 2005 David S. Miller (davem@davemloft.net) | |
| 4 | - */ | |
| 5 | - | |
| 6 | - /* These are here in an effort to more fully work around | |
| 7 | - * Spitfire Errata #51. Essentially, if a memory barrier | |
| 8 | - * occurs soon after a mispredicted branch, the chip can stop | |
| 9 | - * executing instructions until a trap occurs. Therefore, if | |
| 10 | - * interrupts are disabled, the chip can hang forever. | |
| 11 | - * | |
| 12 | - * It used to be believed that the memory barrier had to be | |
| 13 | - * right in the delay slot, but a case has been traced | |
| 14 | - * recently wherein the memory barrier was one instruction | |
| 15 | - * after the branch delay slot and the chip still hung. The | |
| 16 | - * offending sequence was the following in sym_wakeup_done() | |
| 17 | - * of the sym53c8xx_2 driver: | |
| 18 | - * | |
| 19 | - * call sym_ccb_from_dsa, 0 | |
| 20 | - * movge %icc, 0, %l0 | |
| 21 | - * brz,pn %o0, .LL1303 | |
| 22 | - * mov %o0, %l2 | |
| 23 | - * membar #LoadLoad | |
| 24 | - * | |
| 25 | - * The branch has to be mispredicted for the bug to occur. | |
| 26 | - * Therefore, we put the memory barrier explicitly into a | |
| 27 | - * "branch always, predicted taken" delay slot to avoid the | |
| 28 | - * problem case. | |
| 29 | - */ | |
| 30 | - | |
| 31 | - .text | |
| 32 | - | |
| 33 | -99: retl | |
| 34 | - nop | |
| 35 | - | |
| 36 | - .globl mb | |
| 37 | -mb: ba,pt %xcc, 99b | |
| 38 | - membar #LoadLoad | #LoadStore | #StoreStore | #StoreLoad | |
| 39 | - .size mb, .-mb | |
| 40 | - | |
| 41 | - .globl rmb | |
| 42 | -rmb: ba,pt %xcc, 99b | |
| 43 | - membar #LoadLoad | |
| 44 | - .size rmb, .-rmb | |
| 45 | - | |
| 46 | - .globl wmb | |
| 47 | -wmb: ba,pt %xcc, 99b | |
| 48 | - membar #StoreStore | |
| 49 | - .size wmb, .-wmb | |
| 50 | - | |
| 51 | - .globl membar_storeload | |
| 52 | -membar_storeload: | |
| 53 | - ba,pt %xcc, 99b | |
| 54 | - membar #StoreLoad | |
| 55 | - .size membar_storeload, .-membar_storeload | |
| 56 | - | |
| 57 | - .globl membar_storeload_storestore | |
| 58 | -membar_storeload_storestore: | |
| 59 | - ba,pt %xcc, 99b | |
| 60 | - membar #StoreLoad | #StoreStore | |
| 61 | - .size membar_storeload_storestore, .-membar_storeload_storestore | |
| 62 | - | |
| 63 | - .globl membar_storeload_loadload | |
| 64 | -membar_storeload_loadload: | |
| 65 | - ba,pt %xcc, 99b | |
| 66 | - membar #StoreLoad | #LoadLoad | |
| 67 | - .size membar_storeload_loadload, .-membar_storeload_loadload | |
| 68 | - | |
| 69 | - .globl membar_storestore_loadstore | |
| 70 | -membar_storestore_loadstore: | |
| 71 | - ba,pt %xcc, 99b | |
| 72 | - membar #StoreStore | #LoadStore | |
| 73 | - .size membar_storestore_loadstore, .-membar_storestore_loadstore |
include/asm-sparc64/system.h
| ... | ... | @@ -28,13 +28,48 @@ |
| 28 | 28 | #define ARCH_SUN4C_SUN4 0 |
| 29 | 29 | #define ARCH_SUN4 0 |
| 30 | 30 | |
| 31 | -extern void mb(void); | |
| 32 | -extern void rmb(void); | |
| 33 | -extern void wmb(void); | |
| 34 | -extern void membar_storeload(void); | |
| 35 | -extern void membar_storeload_storestore(void); | |
| 36 | -extern void membar_storeload_loadload(void); | |
| 37 | -extern void membar_storestore_loadstore(void); | |
| 31 | +/* These are here in an effort to more fully work around Spitfire Errata | |
| 32 | + * #51. Essentially, if a memory barrier occurs soon after a mispredicted | |
| 33 | + * branch, the chip can stop executing instructions until a trap occurs. | |
| 34 | + * Therefore, if interrupts are disabled, the chip can hang forever. | |
| 35 | + * | |
| 36 | + * It used to be believed that the memory barrier had to be right in the | |
| 37 | + * delay slot, but a case has been traced recently wherein the memory barrier | |
| 38 | + * was one instruction after the branch delay slot and the chip still hung. | |
| 39 | + * The offending sequence was the following in sym_wakeup_done() of the | |
| 40 | + * sym53c8xx_2 driver: | |
| 41 | + * | |
| 42 | + * call sym_ccb_from_dsa, 0 | |
| 43 | + * movge %icc, 0, %l0 | |
| 44 | + * brz,pn %o0, .LL1303 | |
| 45 | + * mov %o0, %l2 | |
| 46 | + * membar #LoadLoad | |
| 47 | + * | |
| 48 | + * The branch has to be mispredicted for the bug to occur. Therefore, we put | |
| 49 | + * the memory barrier explicitly into a "branch always, predicted taken" | |
| 50 | + * delay slot to avoid the problem case. | |
| 51 | + */ | |
| 52 | +#define membar_safe(type) \ | |
| 53 | +do { __asm__ __volatile__("ba,pt %%xcc, 1f\n\t" \ | |
| 54 | + " membar " type "\n" \ | |
| 55 | + "1:\n" \ | |
| 56 | + : : : "memory"); \ | |
| 57 | +} while (0) | |
| 58 | + | |
| 59 | +#define mb() \ | |
| 60 | + membar_safe("#LoadLoad | #LoadStore | #StoreStore | #StoreLoad") | |
| 61 | +#define rmb() \ | |
| 62 | + membar_safe("#LoadLoad") | |
| 63 | +#define wmb() \ | |
| 64 | + membar_safe("#StoreStore") | |
| 65 | +#define membar_storeload() \ | |
| 66 | + membar_safe("#StoreLoad") | |
| 67 | +#define membar_storeload_storestore() \ | |
| 68 | + membar_safe("#StoreLoad | #StoreStore") | |
| 69 | +#define membar_storeload_loadload() \ | |
| 70 | + membar_safe("#StoreLoad | #LoadLoad") | |
| 71 | +#define membar_storestore_loadstore() \ | |
| 72 | + membar_safe("#StoreStore | #LoadStore") | |
| 38 | 73 | |
| 39 | 74 | #endif |
| 40 | 75 |