Commit 7837314d141c661c70bc13c5050694413ecfe14a

Authored by Ralf Baechle
1 parent 18cb657ca1

MIPS: Get rid of branches to .subsections.

It was a nice optimization - on paper at least.  In practice it results in
branches that may exceed the maximum legal range for a branch.  We can
fight that problem with -ffunction-sections but -ffunction-sections again
is incompatible with -pg used by the function tracer.

By rewriting the loop around all simple LL/SC blocks to C we reduce the
amount of inline assembler and at the same time allow GCC to often fill
the branch delay slots with something sensible or whatever else clever
optimization it may have up in its sleeve.

With this optimization gone we also no longer need -ffunction-sections,
so drop it.

This optimization was originally introduced in 2.6.21, commit
5999eca25c1fd4b9b9aca7833b04d10fe4bc877d (linux-mips.org) rsp.
f65e4fa8e0c6022ad58dc88d1b11b12589ed7f9f (kernel.org).

Original fix for the issues which caused me to pull this optimization by
Paul Gortmaker <paul.gortmaker@windriver.com>.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>

Showing 5 changed files with 243 additions and 297 deletions Side-by-side Diff

... ... @@ -48,9 +48,6 @@
48 48 endif
49 49 endif
50 50  
51   -ifndef CONFIG_FUNCTION_TRACER
52   -cflags-y := -ffunction-sections
53   -endif
54 51 ifdef CONFIG_FUNCTION_GRAPH_TRACER
55 52 ifndef KBUILD_MCOUNT_RA_ADDRESS
56 53 ifeq ($(call cc-option-yn,-mmcount-ra-address), y)
arch/mips/include/asm/atomic.h
... ... @@ -64,18 +64,16 @@
64 64 } else if (kernel_uses_llsc) {
65 65 int temp;
66 66  
67   - __asm__ __volatile__(
68   - " .set mips3 \n"
69   - "1: ll %0, %1 # atomic_add \n"
70   - " addu %0, %2 \n"
71   - " sc %0, %1 \n"
72   - " beqz %0, 2f \n"
73   - " .subsection 2 \n"
74   - "2: b 1b \n"
75   - " .previous \n"
76   - " .set mips0 \n"
77   - : "=&r" (temp), "=m" (v->counter)
78   - : "Ir" (i), "m" (v->counter));
  67 + do {
  68 + __asm__ __volatile__(
  69 + " .set mips3 \n"
  70 + " ll %0, %1 # atomic_add \n"
  71 + " addu %0, %2 \n"
  72 + " sc %0, %1 \n"
  73 + " .set mips0 \n"
  74 + : "=&r" (temp), "=m" (v->counter)
  75 + : "Ir" (i), "m" (v->counter));
  76 + } while (unlikely(!temp));
79 77 } else {
80 78 unsigned long flags;
81 79  
... ... @@ -109,18 +107,16 @@
109 107 } else if (kernel_uses_llsc) {
110 108 int temp;
111 109  
112   - __asm__ __volatile__(
113   - " .set mips3 \n"
114   - "1: ll %0, %1 # atomic_sub \n"
115   - " subu %0, %2 \n"
116   - " sc %0, %1 \n"
117   - " beqz %0, 2f \n"
118   - " .subsection 2 \n"
119   - "2: b 1b \n"
120   - " .previous \n"
121   - " .set mips0 \n"
122   - : "=&r" (temp), "=m" (v->counter)
123   - : "Ir" (i), "m" (v->counter));
  110 + do {
  111 + __asm__ __volatile__(
  112 + " .set mips3 \n"
  113 + " ll %0, %1 # atomic_sub \n"
  114 + " subu %0, %2 \n"
  115 + " sc %0, %1 \n"
  116 + " .set mips0 \n"
  117 + : "=&r" (temp), "=m" (v->counter)
  118 + : "Ir" (i), "m" (v->counter));
  119 + } while (unlikely(!temp));
124 120 } else {
125 121 unsigned long flags;
126 122  
... ... @@ -156,20 +152,19 @@
156 152 } else if (kernel_uses_llsc) {
157 153 int temp;
158 154  
159   - __asm__ __volatile__(
160   - " .set mips3 \n"
161   - "1: ll %1, %2 # atomic_add_return \n"
162   - " addu %0, %1, %3 \n"
163   - " sc %0, %2 \n"
164   - " beqz %0, 2f \n"
165   - " addu %0, %1, %3 \n"
166   - " .subsection 2 \n"
167   - "2: b 1b \n"
168   - " .previous \n"
169   - " .set mips0 \n"
170   - : "=&r" (result), "=&r" (temp), "=m" (v->counter)
171   - : "Ir" (i), "m" (v->counter)
172   - : "memory");
  155 + do {
  156 + __asm__ __volatile__(
  157 + " .set mips3 \n"
  158 + " ll %1, %2 # atomic_add_return \n"
  159 + " addu %0, %1, %3 \n"
  160 + " sc %0, %2 \n"
  161 + " .set mips0 \n"
  162 + : "=&r" (result), "=&r" (temp), "=m" (v->counter)
  163 + : "Ir" (i), "m" (v->counter)
  164 + : "memory");
  165 + } while (unlikely(!result));
  166 +
  167 + result = temp + i;
173 168 } else {
174 169 unsigned long flags;
175 170  
176 171  
... ... @@ -205,23 +200,24 @@
205 200 : "=&r" (result), "=&r" (temp), "=m" (v->counter)
206 201 : "Ir" (i), "m" (v->counter)
207 202 : "memory");
  203 +
  204 + result = temp - i;
208 205 } else if (kernel_uses_llsc) {
209 206 int temp;
210 207  
211   - __asm__ __volatile__(
212   - " .set mips3 \n"
213   - "1: ll %1, %2 # atomic_sub_return \n"
214   - " subu %0, %1, %3 \n"
215   - " sc %0, %2 \n"
216   - " beqz %0, 2f \n"
217   - " subu %0, %1, %3 \n"
218   - " .subsection 2 \n"
219   - "2: b 1b \n"
220   - " .previous \n"
221   - " .set mips0 \n"
222   - : "=&r" (result), "=&r" (temp), "=m" (v->counter)
223   - : "Ir" (i), "m" (v->counter)
224   - : "memory");
  208 + do {
  209 + __asm__ __volatile__(
  210 + " .set mips3 \n"
  211 + " ll %1, %2 # atomic_sub_return \n"
  212 + " subu %0, %1, %3 \n"
  213 + " sc %0, %2 \n"
  214 + " .set mips0 \n"
  215 + : "=&r" (result), "=&r" (temp), "=m" (v->counter)
  216 + : "Ir" (i), "m" (v->counter)
  217 + : "memory");
  218 + } while (unlikely(!result));
  219 +
  220 + result = temp - i;
225 221 } else {
226 222 unsigned long flags;
227 223  
228 224  
... ... @@ -279,12 +275,9 @@
279 275 " bltz %0, 1f \n"
280 276 " sc %0, %2 \n"
281 277 " .set noreorder \n"
282   - " beqz %0, 2f \n"
  278 + " beqz %0, 1b \n"
283 279 " subu %0, %1, %3 \n"
284 280 " .set reorder \n"
285   - " .subsection 2 \n"
286   - "2: b 1b \n"
287   - " .previous \n"
288 281 "1: \n"
289 282 " .set mips0 \n"
290 283 : "=&r" (result), "=&r" (temp), "=m" (v->counter)
... ... @@ -443,18 +436,16 @@
443 436 } else if (kernel_uses_llsc) {
444 437 long temp;
445 438  
446   - __asm__ __volatile__(
447   - " .set mips3 \n"
448   - "1: lld %0, %1 # atomic64_add \n"
449   - " daddu %0, %2 \n"
450   - " scd %0, %1 \n"
451   - " beqz %0, 2f \n"
452   - " .subsection 2 \n"
453   - "2: b 1b \n"
454   - " .previous \n"
455   - " .set mips0 \n"
456   - : "=&r" (temp), "=m" (v->counter)
457   - : "Ir" (i), "m" (v->counter));
  439 + do {
  440 + __asm__ __volatile__(
  441 + " .set mips3 \n"
  442 + " lld %0, %1 # atomic64_add \n"
  443 + " daddu %0, %2 \n"
  444 + " scd %0, %1 \n"
  445 + " .set mips0 \n"
  446 + : "=&r" (temp), "=m" (v->counter)
  447 + : "Ir" (i), "m" (v->counter));
  448 + } while (unlikely(!temp));
458 449 } else {
459 450 unsigned long flags;
460 451  
... ... @@ -488,18 +479,16 @@
488 479 } else if (kernel_uses_llsc) {
489 480 long temp;
490 481  
491   - __asm__ __volatile__(
492   - " .set mips3 \n"
493   - "1: lld %0, %1 # atomic64_sub \n"
494   - " dsubu %0, %2 \n"
495   - " scd %0, %1 \n"
496   - " beqz %0, 2f \n"
497   - " .subsection 2 \n"
498   - "2: b 1b \n"
499   - " .previous \n"
500   - " .set mips0 \n"
501   - : "=&r" (temp), "=m" (v->counter)
502   - : "Ir" (i), "m" (v->counter));
  482 + do {
  483 + __asm__ __volatile__(
  484 + " .set mips3 \n"
  485 + " lld %0, %1 # atomic64_sub \n"
  486 + " dsubu %0, %2 \n"
  487 + " scd %0, %1 \n"
  488 + " .set mips0 \n"
  489 + : "=&r" (temp), "=m" (v->counter)
  490 + : "Ir" (i), "m" (v->counter));
  491 + } while (unlikely(!temp));
503 492 } else {
504 493 unsigned long flags;
505 494  
... ... @@ -535,20 +524,19 @@
535 524 } else if (kernel_uses_llsc) {
536 525 long temp;
537 526  
538   - __asm__ __volatile__(
539   - " .set mips3 \n"
540   - "1: lld %1, %2 # atomic64_add_return \n"
541   - " daddu %0, %1, %3 \n"
542   - " scd %0, %2 \n"
543   - " beqz %0, 2f \n"
544   - " daddu %0, %1, %3 \n"
545   - " .subsection 2 \n"
546   - "2: b 1b \n"
547   - " .previous \n"
548   - " .set mips0 \n"
549   - : "=&r" (result), "=&r" (temp), "=m" (v->counter)
550   - : "Ir" (i), "m" (v->counter)
551   - : "memory");
  527 + do {
  528 + __asm__ __volatile__(
  529 + " .set mips3 \n"
  530 + " lld %1, %2 # atomic64_add_return \n"
  531 + " daddu %0, %1, %3 \n"
  532 + " scd %0, %2 \n"
  533 + " .set mips0 \n"
  534 + : "=&r" (result), "=&r" (temp), "=m" (v->counter)
  535 + : "Ir" (i), "m" (v->counter)
  536 + : "memory");
  537 + } while (unlikely(!result));
  538 +
  539 + result = temp + i;
552 540 } else {
553 541 unsigned long flags;
554 542  
... ... @@ -587,20 +575,19 @@
587 575 } else if (kernel_uses_llsc) {
588 576 long temp;
589 577  
590   - __asm__ __volatile__(
591   - " .set mips3 \n"
592   - "1: lld %1, %2 # atomic64_sub_return \n"
593   - " dsubu %0, %1, %3 \n"
594   - " scd %0, %2 \n"
595   - " beqz %0, 2f \n"
596   - " dsubu %0, %1, %3 \n"
597   - " .subsection 2 \n"
598   - "2: b 1b \n"
599   - " .previous \n"
600   - " .set mips0 \n"
601   - : "=&r" (result), "=&r" (temp), "=m" (v->counter)
602   - : "Ir" (i), "m" (v->counter)
603   - : "memory");
  578 + do {
  579 + __asm__ __volatile__(
  580 + " .set mips3 \n"
  581 + " lld %1, %2 # atomic64_sub_return \n"
  582 + " dsubu %0, %1, %3 \n"
  583 + " scd %0, %2 \n"
  584 + " .set mips0 \n"
  585 + : "=&r" (result), "=&r" (temp), "=m" (v->counter)
  586 + : "Ir" (i), "m" (v->counter)
  587 + : "memory");
  588 + } while (unlikely(!result));
  589 +
  590 + result = temp - i;
604 591 } else {
605 592 unsigned long flags;
606 593  
607 594  
... ... @@ -658,12 +645,9 @@
658 645 " bltz %0, 1f \n"
659 646 " scd %0, %2 \n"
660 647 " .set noreorder \n"
661   - " beqz %0, 2f \n"
  648 + " beqz %0, 1b \n"
662 649 " dsubu %0, %1, %3 \n"
663 650 " .set reorder \n"
664   - " .subsection 2 \n"
665   - "2: b 1b \n"
666   - " .previous \n"
667 651 "1: \n"
668 652 " .set mips0 \n"
669 653 : "=&r" (result), "=&r" (temp), "=m" (v->counter)
arch/mips/include/asm/bitops.h
... ... @@ -73,30 +73,26 @@
73 73 : "ir" (1UL << bit), "m" (*m));
74 74 #ifdef CONFIG_CPU_MIPSR2
75 75 } else if (kernel_uses_llsc && __builtin_constant_p(bit)) {
76   - __asm__ __volatile__(
77   - "1: " __LL "%0, %1 # set_bit \n"
78   - " " __INS "%0, %4, %2, 1 \n"
79   - " " __SC "%0, %1 \n"
80   - " beqz %0, 2f \n"
81   - " .subsection 2 \n"
82   - "2: b 1b \n"
83   - " .previous \n"
84   - : "=&r" (temp), "=m" (*m)
85   - : "ir" (bit), "m" (*m), "r" (~0));
  76 + do {
  77 + __asm__ __volatile__(
  78 + " " __LL "%0, %1 # set_bit \n"
  79 + " " __INS "%0, %3, %2, 1 \n"
  80 + " " __SC "%0, %1 \n"
  81 + : "=&r" (temp), "+m" (*m)
  82 + : "ir" (bit), "r" (~0));
  83 + } while (unlikely(!temp));
86 84 #endif /* CONFIG_CPU_MIPSR2 */
87 85 } else if (kernel_uses_llsc) {
88   - __asm__ __volatile__(
89   - " .set mips3 \n"
90   - "1: " __LL "%0, %1 # set_bit \n"
91   - " or %0, %2 \n"
92   - " " __SC "%0, %1 \n"
93   - " beqz %0, 2f \n"
94   - " .subsection 2 \n"
95   - "2: b 1b \n"
96   - " .previous \n"
97   - " .set mips0 \n"
98   - : "=&r" (temp), "=m" (*m)
99   - : "ir" (1UL << bit), "m" (*m));
  86 + do {
  87 + __asm__ __volatile__(
  88 + " .set mips3 \n"
  89 + " " __LL "%0, %1 # set_bit \n"
  90 + " or %0, %2 \n"
  91 + " " __SC "%0, %1 \n"
  92 + " .set mips0 \n"
  93 + : "=&r" (temp), "+m" (*m)
  94 + : "ir" (1UL << bit));
  95 + } while (unlikely(!temp));
100 96 } else {
101 97 volatile unsigned long *a = addr;
102 98 unsigned long mask;
103 99  
104 100  
... ... @@ -134,34 +130,30 @@
134 130 " " __SC "%0, %1 \n"
135 131 " beqzl %0, 1b \n"
136 132 " .set mips0 \n"
137   - : "=&r" (temp), "=m" (*m)
138   - : "ir" (~(1UL << bit)), "m" (*m));
  133 + : "=&r" (temp), "+m" (*m)
  134 + : "ir" (~(1UL << bit)));
139 135 #ifdef CONFIG_CPU_MIPSR2
140 136 } else if (kernel_uses_llsc && __builtin_constant_p(bit)) {
141   - __asm__ __volatile__(
142   - "1: " __LL "%0, %1 # clear_bit \n"
143   - " " __INS "%0, $0, %2, 1 \n"
144   - " " __SC "%0, %1 \n"
145   - " beqz %0, 2f \n"
146   - " .subsection 2 \n"
147   - "2: b 1b \n"
148   - " .previous \n"
149   - : "=&r" (temp), "=m" (*m)
150   - : "ir" (bit), "m" (*m));
  137 + do {
  138 + __asm__ __volatile__(
  139 + " " __LL "%0, %1 # clear_bit \n"
  140 + " " __INS "%0, $0, %2, 1 \n"
  141 + " " __SC "%0, %1 \n"
  142 + : "=&r" (temp), "+m" (*m)
  143 + : "ir" (bit));
  144 + } while (unlikely(!temp));
151 145 #endif /* CONFIG_CPU_MIPSR2 */
152 146 } else if (kernel_uses_llsc) {
153   - __asm__ __volatile__(
154   - " .set mips3 \n"
155   - "1: " __LL "%0, %1 # clear_bit \n"
156   - " and %0, %2 \n"
157   - " " __SC "%0, %1 \n"
158   - " beqz %0, 2f \n"
159   - " .subsection 2 \n"
160   - "2: b 1b \n"
161   - " .previous \n"
162   - " .set mips0 \n"
163   - : "=&r" (temp), "=m" (*m)
164   - : "ir" (~(1UL << bit)), "m" (*m));
  147 + do {
  148 + __asm__ __volatile__(
  149 + " .set mips3 \n"
  150 + " " __LL "%0, %1 # clear_bit \n"
  151 + " and %0, %2 \n"
  152 + " " __SC "%0, %1 \n"
  153 + " .set mips0 \n"
  154 + : "=&r" (temp), "+m" (*m)
  155 + : "ir" (~(1UL << bit)));
  156 + } while (unlikely(!temp));
165 157 } else {
166 158 volatile unsigned long *a = addr;
167 159 unsigned long mask;
168 160  
... ... @@ -213,24 +205,22 @@
213 205 " " __SC "%0, %1 \n"
214 206 " beqzl %0, 1b \n"
215 207 " .set mips0 \n"
216   - : "=&r" (temp), "=m" (*m)
217   - : "ir" (1UL << bit), "m" (*m));
  208 + : "=&r" (temp), "+m" (*m)
  209 + : "ir" (1UL << bit));
218 210 } else if (kernel_uses_llsc) {
219 211 unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
220 212 unsigned long temp;
221 213  
222   - __asm__ __volatile__(
223   - " .set mips3 \n"
224   - "1: " __LL "%0, %1 # change_bit \n"
225   - " xor %0, %2 \n"
226   - " " __SC "%0, %1 \n"
227   - " beqz %0, 2f \n"
228   - " .subsection 2 \n"
229   - "2: b 1b \n"
230   - " .previous \n"
231   - " .set mips0 \n"
232   - : "=&r" (temp), "=m" (*m)
233   - : "ir" (1UL << bit), "m" (*m));
  214 + do {
  215 + __asm__ __volatile__(
  216 + " .set mips3 \n"
  217 + " " __LL "%0, %1 # change_bit \n"
  218 + " xor %0, %2 \n"
  219 + " " __SC "%0, %1 \n"
  220 + " .set mips0 \n"
  221 + : "=&r" (temp), "+m" (*m)
  222 + : "ir" (1UL << bit));
  223 + } while (unlikely(!temp));
234 224 } else {
235 225 volatile unsigned long *a = addr;
236 226 unsigned long mask;
237 227  
... ... @@ -272,30 +262,26 @@
272 262 " beqzl %2, 1b \n"
273 263 " and %2, %0, %3 \n"
274 264 " .set mips0 \n"
275   - : "=&r" (temp), "=m" (*m), "=&r" (res)
276   - : "r" (1UL << bit), "m" (*m)
  265 + : "=&r" (temp), "+m" (*m), "=&r" (res)
  266 + : "r" (1UL << bit)
277 267 : "memory");
278 268 } else if (kernel_uses_llsc) {
279 269 unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
280 270 unsigned long temp;
281 271  
282   - __asm__ __volatile__(
283   - " .set push \n"
284   - " .set noreorder \n"
285   - " .set mips3 \n"
286   - "1: " __LL "%0, %1 # test_and_set_bit \n"
287   - " or %2, %0, %3 \n"
288   - " " __SC "%2, %1 \n"
289   - " beqz %2, 2f \n"
290   - " and %2, %0, %3 \n"
291   - " .subsection 2 \n"
292   - "2: b 1b \n"
293   - " nop \n"
294   - " .previous \n"
295   - " .set pop \n"
296   - : "=&r" (temp), "=m" (*m), "=&r" (res)
297   - : "r" (1UL << bit), "m" (*m)
298   - : "memory");
  272 + do {
  273 + __asm__ __volatile__(
  274 + " .set mips3 \n"
  275 + " " __LL "%0, %1 # test_and_set_bit \n"
  276 + " or %2, %0, %3 \n"
  277 + " " __SC "%2, %1 \n"
  278 + " .set mips0 \n"
  279 + : "=&r" (temp), "+m" (*m), "=&r" (res)
  280 + : "r" (1UL << bit)
  281 + : "memory");
  282 + } while (unlikely(!res));
  283 +
  284 + res = temp & (1UL << bit);
299 285 } else {
300 286 volatile unsigned long *a = addr;
301 287 unsigned long mask;
302 288  
... ... @@ -340,30 +326,26 @@
340 326 " beqzl %2, 1b \n"
341 327 " and %2, %0, %3 \n"
342 328 " .set mips0 \n"
343   - : "=&r" (temp), "=m" (*m), "=&r" (res)
344   - : "r" (1UL << bit), "m" (*m)
  329 + : "=&r" (temp), "+m" (*m), "=&r" (res)
  330 + : "r" (1UL << bit)
345 331 : "memory");
346 332 } else if (kernel_uses_llsc) {
347 333 unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
348 334 unsigned long temp;
349 335  
350   - __asm__ __volatile__(
351   - " .set push \n"
352   - " .set noreorder \n"
353   - " .set mips3 \n"
354   - "1: " __LL "%0, %1 # test_and_set_bit \n"
355   - " or %2, %0, %3 \n"
356   - " " __SC "%2, %1 \n"
357   - " beqz %2, 2f \n"
358   - " and %2, %0, %3 \n"
359   - " .subsection 2 \n"
360   - "2: b 1b \n"
361   - " nop \n"
362   - " .previous \n"
363   - " .set pop \n"
364   - : "=&r" (temp), "=m" (*m), "=&r" (res)
365   - : "r" (1UL << bit), "m" (*m)
366   - : "memory");
  336 + do {
  337 + __asm__ __volatile__(
  338 + " .set mips3 \n"
  339 + " " __LL "%0, %1 # test_and_set_bit \n"
  340 + " or %2, %0, %3 \n"
  341 + " " __SC "%2, %1 \n"
  342 + " .set mips0 \n"
  343 + : "=&r" (temp), "+m" (*m), "=&r" (res)
  344 + : "r" (1UL << bit)
  345 + : "memory");
  346 + } while (unlikely(!res));
  347 +
  348 + res = temp & (1UL << bit);
367 349 } else {
368 350 volatile unsigned long *a = addr;
369 351 unsigned long mask;
370 352  
371 353  
... ... @@ -410,49 +392,43 @@
410 392 " beqzl %2, 1b \n"
411 393 " and %2, %0, %3 \n"
412 394 " .set mips0 \n"
413   - : "=&r" (temp), "=m" (*m), "=&r" (res)
414   - : "r" (1UL << bit), "m" (*m)
  395 + : "=&r" (temp), "+m" (*m), "=&r" (res)
  396 + : "r" (1UL << bit)
415 397 : "memory");
416 398 #ifdef CONFIG_CPU_MIPSR2
417 399 } else if (kernel_uses_llsc && __builtin_constant_p(nr)) {
418 400 unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
419 401 unsigned long temp;
420 402  
421   - __asm__ __volatile__(
422   - "1: " __LL "%0, %1 # test_and_clear_bit \n"
423   - " " __EXT "%2, %0, %3, 1 \n"
424   - " " __INS "%0, $0, %3, 1 \n"
425   - " " __SC "%0, %1 \n"
426   - " beqz %0, 2f \n"
427   - " .subsection 2 \n"
428   - "2: b 1b \n"
429   - " .previous \n"
430   - : "=&r" (temp), "=m" (*m), "=&r" (res)
431   - : "ir" (bit), "m" (*m)
432   - : "memory");
  403 + do {
  404 + __asm__ __volatile__(
  405 + " " __LL "%0, %1 # test_and_clear_bit \n"
  406 + " " __EXT "%2, %0, %3, 1 \n"
  407 + " " __INS "%0, $0, %3, 1 \n"
  408 + " " __SC "%0, %1 \n"
  409 + : "=&r" (temp), "+m" (*m), "=&r" (res)
  410 + : "ir" (bit)
  411 + : "memory");
  412 + } while (unlikely(!temp));
433 413 #endif
434 414 } else if (kernel_uses_llsc) {
435 415 unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
436 416 unsigned long temp;
437 417  
438   - __asm__ __volatile__(
439   - " .set push \n"
440   - " .set noreorder \n"
441   - " .set mips3 \n"
442   - "1: " __LL "%0, %1 # test_and_clear_bit \n"
443   - " or %2, %0, %3 \n"
444   - " xor %2, %3 \n"
445   - " " __SC "%2, %1 \n"
446   - " beqz %2, 2f \n"
447   - " and %2, %0, %3 \n"
448   - " .subsection 2 \n"
449   - "2: b 1b \n"
450   - " nop \n"
451   - " .previous \n"
452   - " .set pop \n"
453   - : "=&r" (temp), "=m" (*m), "=&r" (res)
454   - : "r" (1UL << bit), "m" (*m)
455   - : "memory");
  418 + do {
  419 + __asm__ __volatile__(
  420 + " .set mips3 \n"
  421 + " " __LL "%0, %1 # test_and_clear_bit \n"
  422 + " or %2, %0, %3 \n"
  423 + " xor %2, %3 \n"
  424 + " " __SC "%2, %1 \n"
  425 + " .set mips0 \n"
  426 + : "=&r" (temp), "+m" (*m), "=&r" (res)
  427 + : "r" (1UL << bit)
  428 + : "memory");
  429 + } while (unlikely(!res));
  430 +
  431 + res = temp & (1UL << bit);
456 432 } else {
457 433 volatile unsigned long *a = addr;
458 434 unsigned long mask;
459 435  
... ... @@ -499,30 +475,26 @@
499 475 " beqzl %2, 1b \n"
500 476 " and %2, %0, %3 \n"
501 477 " .set mips0 \n"
502   - : "=&r" (temp), "=m" (*m), "=&r" (res)
503   - : "r" (1UL << bit), "m" (*m)
  478 + : "=&r" (temp), "+m" (*m), "=&r" (res)
  479 + : "r" (1UL << bit)
504 480 : "memory");
505 481 } else if (kernel_uses_llsc) {
506 482 unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
507 483 unsigned long temp;
508 484  
509   - __asm__ __volatile__(
510   - " .set push \n"
511   - " .set noreorder \n"
512   - " .set mips3 \n"
513   - "1: " __LL "%0, %1 # test_and_change_bit \n"
514   - " xor %2, %0, %3 \n"
515   - " " __SC "\t%2, %1 \n"
516   - " beqz %2, 2f \n"
517   - " and %2, %0, %3 \n"
518   - " .subsection 2 \n"
519   - "2: b 1b \n"
520   - " nop \n"
521   - " .previous \n"
522   - " .set pop \n"
523   - : "=&r" (temp), "=m" (*m), "=&r" (res)
524   - : "r" (1UL << bit), "m" (*m)
525   - : "memory");
  485 + do {
  486 + __asm__ __volatile__(
  487 + " .set mips3 \n"
  488 + " " __LL "%0, %1 # test_and_change_bit \n"
  489 + " xor %2, %0, %3 \n"
  490 + " " __SC "\t%2, %1 \n"
  491 + " .set mips0 \n"
  492 + : "=&r" (temp), "+m" (*m), "=&r" (res)
  493 + : "r" (1UL << bit)
  494 + : "memory");
  495 + } while (unlikely(!res));
  496 +
  497 + res = temp & (1UL << bit);
526 498 } else {
527 499 volatile unsigned long *a = addr;
528 500 unsigned long mask;
arch/mips/include/asm/cmpxchg.h
... ... @@ -44,12 +44,9 @@
44 44 " move $1, %z4 \n" \
45 45 " .set mips3 \n" \
46 46 " " st " $1, %1 \n" \
47   - " beqz $1, 3f \n" \
48   - "2: \n" \
49   - " .subsection 2 \n" \
50   - "3: b 1b \n" \
51   - " .previous \n" \
  47 + " beqz $1, 1b \n" \
52 48 " .set pop \n" \
  49 + "2: \n" \
53 50 : "=&r" (__ret), "=R" (*m) \
54 51 : "R" (*m), "Jr" (old), "Jr" (new) \
55 52 : "memory"); \
arch/mips/include/asm/system.h
... ... @@ -115,21 +115,19 @@
115 115 } else if (kernel_uses_llsc) {
116 116 unsigned long dummy;
117 117  
118   - __asm__ __volatile__(
119   - " .set mips3 \n"
120   - "1: ll %0, %3 # xchg_u32 \n"
121   - " .set mips0 \n"
122   - " move %2, %z4 \n"
123   - " .set mips3 \n"
124   - " sc %2, %1 \n"
125   - " beqz %2, 2f \n"
126   - " .subsection 2 \n"
127   - "2: b 1b \n"
128   - " .previous \n"
129   - " .set mips0 \n"
130   - : "=&r" (retval), "=m" (*m), "=&r" (dummy)
131   - : "R" (*m), "Jr" (val)
132   - : "memory");
  118 + do {
  119 + __asm__ __volatile__(
  120 + " .set mips3 \n"
  121 + " ll %0, %3 # xchg_u32 \n"
  122 + " .set mips0 \n"
  123 + " move %2, %z4 \n"
  124 + " .set mips3 \n"
  125 + " sc %2, %1 \n"
  126 + " .set mips0 \n"
  127 + : "=&r" (retval), "=m" (*m), "=&r" (dummy)
  128 + : "R" (*m), "Jr" (val)
  129 + : "memory");
  130 + } while (unlikely(!dummy));
133 131 } else {
134 132 unsigned long flags;
135 133  
... ... @@ -167,19 +165,17 @@
167 165 } else if (kernel_uses_llsc) {
168 166 unsigned long dummy;
169 167  
170   - __asm__ __volatile__(
171   - " .set mips3 \n"
172   - "1: lld %0, %3 # xchg_u64 \n"
173   - " move %2, %z4 \n"
174   - " scd %2, %1 \n"
175   - " beqz %2, 2f \n"
176   - " .subsection 2 \n"
177   - "2: b 1b \n"
178   - " .previous \n"
179   - " .set mips0 \n"
180   - : "=&r" (retval), "=m" (*m), "=&r" (dummy)
181   - : "R" (*m), "Jr" (val)
182   - : "memory");
  168 + do {
  169 + __asm__ __volatile__(
  170 + " .set mips3 \n"
  171 + " lld %0, %3 # xchg_u64 \n"
  172 + " move %2, %z4 \n"
  173 + " scd %2, %1 \n"
  174 + " .set mips0 \n"
  175 + : "=&r" (retval), "=m" (*m), "=&r" (dummy)
  176 + : "R" (*m), "Jr" (val)
  177 + : "memory");
  178 + } while (unlikely(!dummy));
183 179 } else {
184 180 unsigned long flags;
185 181