Commit 7837314d141c661c70bc13c5050694413ecfe14a
1 parent
18cb657ca1
MIPS: Get rid of branches to .subsections.
It was a nice optimization - on paper at least. In practice it results in branches that may exceed the maximum legal range for a branch. We can fight that problem with -ffunction-sections but -ffunction-sections again is incompatible with -pg used by the function tracer. By rewriting the loop around all simple LL/SC blocks to C we reduce the amount of inline assembler and at the same time allow GCC to often fill the branch delay slots with something sensible or whatever else clever optimization it may have up in its sleeve. With this optimization gone we also no longer need -ffunction-sections, so drop it. This optimization was originally introduced in 2.6.21, commit 5999eca25c1fd4b9b9aca7833b04d10fe4bc877d (linux-mips.org) rsp. f65e4fa8e0c6022ad58dc88d1b11b12589ed7f9f (kernel.org). Original fix for the issues which caused me to pull this optimization by Paul Gortmaker <paul.gortmaker@windriver.com>. Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Showing 5 changed files with 243 additions and 297 deletions Side-by-side Diff
arch/mips/Makefile
arch/mips/include/asm/atomic.h
... | ... | @@ -64,18 +64,16 @@ |
64 | 64 | } else if (kernel_uses_llsc) { |
65 | 65 | int temp; |
66 | 66 | |
67 | - __asm__ __volatile__( | |
68 | - " .set mips3 \n" | |
69 | - "1: ll %0, %1 # atomic_add \n" | |
70 | - " addu %0, %2 \n" | |
71 | - " sc %0, %1 \n" | |
72 | - " beqz %0, 2f \n" | |
73 | - " .subsection 2 \n" | |
74 | - "2: b 1b \n" | |
75 | - " .previous \n" | |
76 | - " .set mips0 \n" | |
77 | - : "=&r" (temp), "=m" (v->counter) | |
78 | - : "Ir" (i), "m" (v->counter)); | |
67 | + do { | |
68 | + __asm__ __volatile__( | |
69 | + " .set mips3 \n" | |
70 | + " ll %0, %1 # atomic_add \n" | |
71 | + " addu %0, %2 \n" | |
72 | + " sc %0, %1 \n" | |
73 | + " .set mips0 \n" | |
74 | + : "=&r" (temp), "=m" (v->counter) | |
75 | + : "Ir" (i), "m" (v->counter)); | |
76 | + } while (unlikely(!temp)); | |
79 | 77 | } else { |
80 | 78 | unsigned long flags; |
81 | 79 | |
... | ... | @@ -109,18 +107,16 @@ |
109 | 107 | } else if (kernel_uses_llsc) { |
110 | 108 | int temp; |
111 | 109 | |
112 | - __asm__ __volatile__( | |
113 | - " .set mips3 \n" | |
114 | - "1: ll %0, %1 # atomic_sub \n" | |
115 | - " subu %0, %2 \n" | |
116 | - " sc %0, %1 \n" | |
117 | - " beqz %0, 2f \n" | |
118 | - " .subsection 2 \n" | |
119 | - "2: b 1b \n" | |
120 | - " .previous \n" | |
121 | - " .set mips0 \n" | |
122 | - : "=&r" (temp), "=m" (v->counter) | |
123 | - : "Ir" (i), "m" (v->counter)); | |
110 | + do { | |
111 | + __asm__ __volatile__( | |
112 | + " .set mips3 \n" | |
113 | + " ll %0, %1 # atomic_sub \n" | |
114 | + " subu %0, %2 \n" | |
115 | + " sc %0, %1 \n" | |
116 | + " .set mips0 \n" | |
117 | + : "=&r" (temp), "=m" (v->counter) | |
118 | + : "Ir" (i), "m" (v->counter)); | |
119 | + } while (unlikely(!temp)); | |
124 | 120 | } else { |
125 | 121 | unsigned long flags; |
126 | 122 | |
... | ... | @@ -156,20 +152,19 @@ |
156 | 152 | } else if (kernel_uses_llsc) { |
157 | 153 | int temp; |
158 | 154 | |
159 | - __asm__ __volatile__( | |
160 | - " .set mips3 \n" | |
161 | - "1: ll %1, %2 # atomic_add_return \n" | |
162 | - " addu %0, %1, %3 \n" | |
163 | - " sc %0, %2 \n" | |
164 | - " beqz %0, 2f \n" | |
165 | - " addu %0, %1, %3 \n" | |
166 | - " .subsection 2 \n" | |
167 | - "2: b 1b \n" | |
168 | - " .previous \n" | |
169 | - " .set mips0 \n" | |
170 | - : "=&r" (result), "=&r" (temp), "=m" (v->counter) | |
171 | - : "Ir" (i), "m" (v->counter) | |
172 | - : "memory"); | |
155 | + do { | |
156 | + __asm__ __volatile__( | |
157 | + " .set mips3 \n" | |
158 | + " ll %1, %2 # atomic_add_return \n" | |
159 | + " addu %0, %1, %3 \n" | |
160 | + " sc %0, %2 \n" | |
161 | + " .set mips0 \n" | |
162 | + : "=&r" (result), "=&r" (temp), "=m" (v->counter) | |
163 | + : "Ir" (i), "m" (v->counter) | |
164 | + : "memory"); | |
165 | + } while (unlikely(!result)); | |
166 | + | |
167 | + result = temp + i; | |
173 | 168 | } else { |
174 | 169 | unsigned long flags; |
175 | 170 | |
176 | 171 | |
... | ... | @@ -205,23 +200,24 @@ |
205 | 200 | : "=&r" (result), "=&r" (temp), "=m" (v->counter) |
206 | 201 | : "Ir" (i), "m" (v->counter) |
207 | 202 | : "memory"); |
203 | + | |
204 | + result = temp - i; | |
208 | 205 | } else if (kernel_uses_llsc) { |
209 | 206 | int temp; |
210 | 207 | |
211 | - __asm__ __volatile__( | |
212 | - " .set mips3 \n" | |
213 | - "1: ll %1, %2 # atomic_sub_return \n" | |
214 | - " subu %0, %1, %3 \n" | |
215 | - " sc %0, %2 \n" | |
216 | - " beqz %0, 2f \n" | |
217 | - " subu %0, %1, %3 \n" | |
218 | - " .subsection 2 \n" | |
219 | - "2: b 1b \n" | |
220 | - " .previous \n" | |
221 | - " .set mips0 \n" | |
222 | - : "=&r" (result), "=&r" (temp), "=m" (v->counter) | |
223 | - : "Ir" (i), "m" (v->counter) | |
224 | - : "memory"); | |
208 | + do { | |
209 | + __asm__ __volatile__( | |
210 | + " .set mips3 \n" | |
211 | + " ll %1, %2 # atomic_sub_return \n" | |
212 | + " subu %0, %1, %3 \n" | |
213 | + " sc %0, %2 \n" | |
214 | + " .set mips0 \n" | |
215 | + : "=&r" (result), "=&r" (temp), "=m" (v->counter) | |
216 | + : "Ir" (i), "m" (v->counter) | |
217 | + : "memory"); | |
218 | + } while (unlikely(!result)); | |
219 | + | |
220 | + result = temp - i; | |
225 | 221 | } else { |
226 | 222 | unsigned long flags; |
227 | 223 | |
228 | 224 | |
... | ... | @@ -279,12 +275,9 @@ |
279 | 275 | " bltz %0, 1f \n" |
280 | 276 | " sc %0, %2 \n" |
281 | 277 | " .set noreorder \n" |
282 | - " beqz %0, 2f \n" | |
278 | + " beqz %0, 1b \n" | |
283 | 279 | " subu %0, %1, %3 \n" |
284 | 280 | " .set reorder \n" |
285 | - " .subsection 2 \n" | |
286 | - "2: b 1b \n" | |
287 | - " .previous \n" | |
288 | 281 | "1: \n" |
289 | 282 | " .set mips0 \n" |
290 | 283 | : "=&r" (result), "=&r" (temp), "=m" (v->counter) |
... | ... | @@ -443,18 +436,16 @@ |
443 | 436 | } else if (kernel_uses_llsc) { |
444 | 437 | long temp; |
445 | 438 | |
446 | - __asm__ __volatile__( | |
447 | - " .set mips3 \n" | |
448 | - "1: lld %0, %1 # atomic64_add \n" | |
449 | - " daddu %0, %2 \n" | |
450 | - " scd %0, %1 \n" | |
451 | - " beqz %0, 2f \n" | |
452 | - " .subsection 2 \n" | |
453 | - "2: b 1b \n" | |
454 | - " .previous \n" | |
455 | - " .set mips0 \n" | |
456 | - : "=&r" (temp), "=m" (v->counter) | |
457 | - : "Ir" (i), "m" (v->counter)); | |
439 | + do { | |
440 | + __asm__ __volatile__( | |
441 | + " .set mips3 \n" | |
442 | + " lld %0, %1 # atomic64_add \n" | |
443 | + " daddu %0, %2 \n" | |
444 | + " scd %0, %1 \n" | |
445 | + " .set mips0 \n" | |
446 | + : "=&r" (temp), "=m" (v->counter) | |
447 | + : "Ir" (i), "m" (v->counter)); | |
448 | + } while (unlikely(!temp)); | |
458 | 449 | } else { |
459 | 450 | unsigned long flags; |
460 | 451 | |
... | ... | @@ -488,18 +479,16 @@ |
488 | 479 | } else if (kernel_uses_llsc) { |
489 | 480 | long temp; |
490 | 481 | |
491 | - __asm__ __volatile__( | |
492 | - " .set mips3 \n" | |
493 | - "1: lld %0, %1 # atomic64_sub \n" | |
494 | - " dsubu %0, %2 \n" | |
495 | - " scd %0, %1 \n" | |
496 | - " beqz %0, 2f \n" | |
497 | - " .subsection 2 \n" | |
498 | - "2: b 1b \n" | |
499 | - " .previous \n" | |
500 | - " .set mips0 \n" | |
501 | - : "=&r" (temp), "=m" (v->counter) | |
502 | - : "Ir" (i), "m" (v->counter)); | |
482 | + do { | |
483 | + __asm__ __volatile__( | |
484 | + " .set mips3 \n" | |
485 | + " lld %0, %1 # atomic64_sub \n" | |
486 | + " dsubu %0, %2 \n" | |
487 | + " scd %0, %1 \n" | |
488 | + " .set mips0 \n" | |
489 | + : "=&r" (temp), "=m" (v->counter) | |
490 | + : "Ir" (i), "m" (v->counter)); | |
491 | + } while (unlikely(!temp)); | |
503 | 492 | } else { |
504 | 493 | unsigned long flags; |
505 | 494 | |
... | ... | @@ -535,20 +524,19 @@ |
535 | 524 | } else if (kernel_uses_llsc) { |
536 | 525 | long temp; |
537 | 526 | |
538 | - __asm__ __volatile__( | |
539 | - " .set mips3 \n" | |
540 | - "1: lld %1, %2 # atomic64_add_return \n" | |
541 | - " daddu %0, %1, %3 \n" | |
542 | - " scd %0, %2 \n" | |
543 | - " beqz %0, 2f \n" | |
544 | - " daddu %0, %1, %3 \n" | |
545 | - " .subsection 2 \n" | |
546 | - "2: b 1b \n" | |
547 | - " .previous \n" | |
548 | - " .set mips0 \n" | |
549 | - : "=&r" (result), "=&r" (temp), "=m" (v->counter) | |
550 | - : "Ir" (i), "m" (v->counter) | |
551 | - : "memory"); | |
527 | + do { | |
528 | + __asm__ __volatile__( | |
529 | + " .set mips3 \n" | |
530 | + " lld %1, %2 # atomic64_add_return \n" | |
531 | + " daddu %0, %1, %3 \n" | |
532 | + " scd %0, %2 \n" | |
533 | + " .set mips0 \n" | |
534 | + : "=&r" (result), "=&r" (temp), "=m" (v->counter) | |
535 | + : "Ir" (i), "m" (v->counter) | |
536 | + : "memory"); | |
537 | + } while (unlikely(!result)); | |
538 | + | |
539 | + result = temp + i; | |
552 | 540 | } else { |
553 | 541 | unsigned long flags; |
554 | 542 | |
... | ... | @@ -587,20 +575,19 @@ |
587 | 575 | } else if (kernel_uses_llsc) { |
588 | 576 | long temp; |
589 | 577 | |
590 | - __asm__ __volatile__( | |
591 | - " .set mips3 \n" | |
592 | - "1: lld %1, %2 # atomic64_sub_return \n" | |
593 | - " dsubu %0, %1, %3 \n" | |
594 | - " scd %0, %2 \n" | |
595 | - " beqz %0, 2f \n" | |
596 | - " dsubu %0, %1, %3 \n" | |
597 | - " .subsection 2 \n" | |
598 | - "2: b 1b \n" | |
599 | - " .previous \n" | |
600 | - " .set mips0 \n" | |
601 | - : "=&r" (result), "=&r" (temp), "=m" (v->counter) | |
602 | - : "Ir" (i), "m" (v->counter) | |
603 | - : "memory"); | |
578 | + do { | |
579 | + __asm__ __volatile__( | |
580 | + " .set mips3 \n" | |
581 | + " lld %1, %2 # atomic64_sub_return \n" | |
582 | + " dsubu %0, %1, %3 \n" | |
583 | + " scd %0, %2 \n" | |
584 | + " .set mips0 \n" | |
585 | + : "=&r" (result), "=&r" (temp), "=m" (v->counter) | |
586 | + : "Ir" (i), "m" (v->counter) | |
587 | + : "memory"); | |
588 | + } while (unlikely(!result)); | |
589 | + | |
590 | + result = temp - i; | |
604 | 591 | } else { |
605 | 592 | unsigned long flags; |
606 | 593 | |
607 | 594 | |
... | ... | @@ -658,12 +645,9 @@ |
658 | 645 | " bltz %0, 1f \n" |
659 | 646 | " scd %0, %2 \n" |
660 | 647 | " .set noreorder \n" |
661 | - " beqz %0, 2f \n" | |
648 | + " beqz %0, 1b \n" | |
662 | 649 | " dsubu %0, %1, %3 \n" |
663 | 650 | " .set reorder \n" |
664 | - " .subsection 2 \n" | |
665 | - "2: b 1b \n" | |
666 | - " .previous \n" | |
667 | 651 | "1: \n" |
668 | 652 | " .set mips0 \n" |
669 | 653 | : "=&r" (result), "=&r" (temp), "=m" (v->counter) |
arch/mips/include/asm/bitops.h
... | ... | @@ -73,30 +73,26 @@ |
73 | 73 | : "ir" (1UL << bit), "m" (*m)); |
74 | 74 | #ifdef CONFIG_CPU_MIPSR2 |
75 | 75 | } else if (kernel_uses_llsc && __builtin_constant_p(bit)) { |
76 | - __asm__ __volatile__( | |
77 | - "1: " __LL "%0, %1 # set_bit \n" | |
78 | - " " __INS "%0, %4, %2, 1 \n" | |
79 | - " " __SC "%0, %1 \n" | |
80 | - " beqz %0, 2f \n" | |
81 | - " .subsection 2 \n" | |
82 | - "2: b 1b \n" | |
83 | - " .previous \n" | |
84 | - : "=&r" (temp), "=m" (*m) | |
85 | - : "ir" (bit), "m" (*m), "r" (~0)); | |
76 | + do { | |
77 | + __asm__ __volatile__( | |
78 | + " " __LL "%0, %1 # set_bit \n" | |
79 | + " " __INS "%0, %3, %2, 1 \n" | |
80 | + " " __SC "%0, %1 \n" | |
81 | + : "=&r" (temp), "+m" (*m) | |
82 | + : "ir" (bit), "r" (~0)); | |
83 | + } while (unlikely(!temp)); | |
86 | 84 | #endif /* CONFIG_CPU_MIPSR2 */ |
87 | 85 | } else if (kernel_uses_llsc) { |
88 | - __asm__ __volatile__( | |
89 | - " .set mips3 \n" | |
90 | - "1: " __LL "%0, %1 # set_bit \n" | |
91 | - " or %0, %2 \n" | |
92 | - " " __SC "%0, %1 \n" | |
93 | - " beqz %0, 2f \n" | |
94 | - " .subsection 2 \n" | |
95 | - "2: b 1b \n" | |
96 | - " .previous \n" | |
97 | - " .set mips0 \n" | |
98 | - : "=&r" (temp), "=m" (*m) | |
99 | - : "ir" (1UL << bit), "m" (*m)); | |
86 | + do { | |
87 | + __asm__ __volatile__( | |
88 | + " .set mips3 \n" | |
89 | + " " __LL "%0, %1 # set_bit \n" | |
90 | + " or %0, %2 \n" | |
91 | + " " __SC "%0, %1 \n" | |
92 | + " .set mips0 \n" | |
93 | + : "=&r" (temp), "+m" (*m) | |
94 | + : "ir" (1UL << bit)); | |
95 | + } while (unlikely(!temp)); | |
100 | 96 | } else { |
101 | 97 | volatile unsigned long *a = addr; |
102 | 98 | unsigned long mask; |
103 | 99 | |
104 | 100 | |
... | ... | @@ -134,34 +130,30 @@ |
134 | 130 | " " __SC "%0, %1 \n" |
135 | 131 | " beqzl %0, 1b \n" |
136 | 132 | " .set mips0 \n" |
137 | - : "=&r" (temp), "=m" (*m) | |
138 | - : "ir" (~(1UL << bit)), "m" (*m)); | |
133 | + : "=&r" (temp), "+m" (*m) | |
134 | + : "ir" (~(1UL << bit))); | |
139 | 135 | #ifdef CONFIG_CPU_MIPSR2 |
140 | 136 | } else if (kernel_uses_llsc && __builtin_constant_p(bit)) { |
141 | - __asm__ __volatile__( | |
142 | - "1: " __LL "%0, %1 # clear_bit \n" | |
143 | - " " __INS "%0, $0, %2, 1 \n" | |
144 | - " " __SC "%0, %1 \n" | |
145 | - " beqz %0, 2f \n" | |
146 | - " .subsection 2 \n" | |
147 | - "2: b 1b \n" | |
148 | - " .previous \n" | |
149 | - : "=&r" (temp), "=m" (*m) | |
150 | - : "ir" (bit), "m" (*m)); | |
137 | + do { | |
138 | + __asm__ __volatile__( | |
139 | + " " __LL "%0, %1 # clear_bit \n" | |
140 | + " " __INS "%0, $0, %2, 1 \n" | |
141 | + " " __SC "%0, %1 \n" | |
142 | + : "=&r" (temp), "+m" (*m) | |
143 | + : "ir" (bit)); | |
144 | + } while (unlikely(!temp)); | |
151 | 145 | #endif /* CONFIG_CPU_MIPSR2 */ |
152 | 146 | } else if (kernel_uses_llsc) { |
153 | - __asm__ __volatile__( | |
154 | - " .set mips3 \n" | |
155 | - "1: " __LL "%0, %1 # clear_bit \n" | |
156 | - " and %0, %2 \n" | |
157 | - " " __SC "%0, %1 \n" | |
158 | - " beqz %0, 2f \n" | |
159 | - " .subsection 2 \n" | |
160 | - "2: b 1b \n" | |
161 | - " .previous \n" | |
162 | - " .set mips0 \n" | |
163 | - : "=&r" (temp), "=m" (*m) | |
164 | - : "ir" (~(1UL << bit)), "m" (*m)); | |
147 | + do { | |
148 | + __asm__ __volatile__( | |
149 | + " .set mips3 \n" | |
150 | + " " __LL "%0, %1 # clear_bit \n" | |
151 | + " and %0, %2 \n" | |
152 | + " " __SC "%0, %1 \n" | |
153 | + " .set mips0 \n" | |
154 | + : "=&r" (temp), "+m" (*m) | |
155 | + : "ir" (~(1UL << bit))); | |
156 | + } while (unlikely(!temp)); | |
165 | 157 | } else { |
166 | 158 | volatile unsigned long *a = addr; |
167 | 159 | unsigned long mask; |
168 | 160 | |
... | ... | @@ -213,24 +205,22 @@ |
213 | 205 | " " __SC "%0, %1 \n" |
214 | 206 | " beqzl %0, 1b \n" |
215 | 207 | " .set mips0 \n" |
216 | - : "=&r" (temp), "=m" (*m) | |
217 | - : "ir" (1UL << bit), "m" (*m)); | |
208 | + : "=&r" (temp), "+m" (*m) | |
209 | + : "ir" (1UL << bit)); | |
218 | 210 | } else if (kernel_uses_llsc) { |
219 | 211 | unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG); |
220 | 212 | unsigned long temp; |
221 | 213 | |
222 | - __asm__ __volatile__( | |
223 | - " .set mips3 \n" | |
224 | - "1: " __LL "%0, %1 # change_bit \n" | |
225 | - " xor %0, %2 \n" | |
226 | - " " __SC "%0, %1 \n" | |
227 | - " beqz %0, 2f \n" | |
228 | - " .subsection 2 \n" | |
229 | - "2: b 1b \n" | |
230 | - " .previous \n" | |
231 | - " .set mips0 \n" | |
232 | - : "=&r" (temp), "=m" (*m) | |
233 | - : "ir" (1UL << bit), "m" (*m)); | |
214 | + do { | |
215 | + __asm__ __volatile__( | |
216 | + " .set mips3 \n" | |
217 | + " " __LL "%0, %1 # change_bit \n" | |
218 | + " xor %0, %2 \n" | |
219 | + " " __SC "%0, %1 \n" | |
220 | + " .set mips0 \n" | |
221 | + : "=&r" (temp), "+m" (*m) | |
222 | + : "ir" (1UL << bit)); | |
223 | + } while (unlikely(!temp)); | |
234 | 224 | } else { |
235 | 225 | volatile unsigned long *a = addr; |
236 | 226 | unsigned long mask; |
237 | 227 | |
... | ... | @@ -272,30 +262,26 @@ |
272 | 262 | " beqzl %2, 1b \n" |
273 | 263 | " and %2, %0, %3 \n" |
274 | 264 | " .set mips0 \n" |
275 | - : "=&r" (temp), "=m" (*m), "=&r" (res) | |
276 | - : "r" (1UL << bit), "m" (*m) | |
265 | + : "=&r" (temp), "+m" (*m), "=&r" (res) | |
266 | + : "r" (1UL << bit) | |
277 | 267 | : "memory"); |
278 | 268 | } else if (kernel_uses_llsc) { |
279 | 269 | unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG); |
280 | 270 | unsigned long temp; |
281 | 271 | |
282 | - __asm__ __volatile__( | |
283 | - " .set push \n" | |
284 | - " .set noreorder \n" | |
285 | - " .set mips3 \n" | |
286 | - "1: " __LL "%0, %1 # test_and_set_bit \n" | |
287 | - " or %2, %0, %3 \n" | |
288 | - " " __SC "%2, %1 \n" | |
289 | - " beqz %2, 2f \n" | |
290 | - " and %2, %0, %3 \n" | |
291 | - " .subsection 2 \n" | |
292 | - "2: b 1b \n" | |
293 | - " nop \n" | |
294 | - " .previous \n" | |
295 | - " .set pop \n" | |
296 | - : "=&r" (temp), "=m" (*m), "=&r" (res) | |
297 | - : "r" (1UL << bit), "m" (*m) | |
298 | - : "memory"); | |
272 | + do { | |
273 | + __asm__ __volatile__( | |
274 | + " .set mips3 \n" | |
275 | + " " __LL "%0, %1 # test_and_set_bit \n" | |
276 | + " or %2, %0, %3 \n" | |
277 | + " " __SC "%2, %1 \n" | |
278 | + " .set mips0 \n" | |
279 | + : "=&r" (temp), "+m" (*m), "=&r" (res) | |
280 | + : "r" (1UL << bit) | |
281 | + : "memory"); | |
282 | + } while (unlikely(!res)); | |
283 | + | |
284 | + res = temp & (1UL << bit); | |
299 | 285 | } else { |
300 | 286 | volatile unsigned long *a = addr; |
301 | 287 | unsigned long mask; |
302 | 288 | |
... | ... | @@ -340,30 +326,26 @@ |
340 | 326 | " beqzl %2, 1b \n" |
341 | 327 | " and %2, %0, %3 \n" |
342 | 328 | " .set mips0 \n" |
343 | - : "=&r" (temp), "=m" (*m), "=&r" (res) | |
344 | - : "r" (1UL << bit), "m" (*m) | |
329 | + : "=&r" (temp), "+m" (*m), "=&r" (res) | |
330 | + : "r" (1UL << bit) | |
345 | 331 | : "memory"); |
346 | 332 | } else if (kernel_uses_llsc) { |
347 | 333 | unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG); |
348 | 334 | unsigned long temp; |
349 | 335 | |
350 | - __asm__ __volatile__( | |
351 | - " .set push \n" | |
352 | - " .set noreorder \n" | |
353 | - " .set mips3 \n" | |
354 | - "1: " __LL "%0, %1 # test_and_set_bit \n" | |
355 | - " or %2, %0, %3 \n" | |
356 | - " " __SC "%2, %1 \n" | |
357 | - " beqz %2, 2f \n" | |
358 | - " and %2, %0, %3 \n" | |
359 | - " .subsection 2 \n" | |
360 | - "2: b 1b \n" | |
361 | - " nop \n" | |
362 | - " .previous \n" | |
363 | - " .set pop \n" | |
364 | - : "=&r" (temp), "=m" (*m), "=&r" (res) | |
365 | - : "r" (1UL << bit), "m" (*m) | |
366 | - : "memory"); | |
336 | + do { | |
337 | + __asm__ __volatile__( | |
338 | + " .set mips3 \n" | |
339 | + " " __LL "%0, %1 # test_and_set_bit \n" | |
340 | + " or %2, %0, %3 \n" | |
341 | + " " __SC "%2, %1 \n" | |
342 | + " .set mips0 \n" | |
343 | + : "=&r" (temp), "+m" (*m), "=&r" (res) | |
344 | + : "r" (1UL << bit) | |
345 | + : "memory"); | |
346 | + } while (unlikely(!res)); | |
347 | + | |
348 | + res = temp & (1UL << bit); | |
367 | 349 | } else { |
368 | 350 | volatile unsigned long *a = addr; |
369 | 351 | unsigned long mask; |
370 | 352 | |
371 | 353 | |
... | ... | @@ -410,49 +392,43 @@ |
410 | 392 | " beqzl %2, 1b \n" |
411 | 393 | " and %2, %0, %3 \n" |
412 | 394 | " .set mips0 \n" |
413 | - : "=&r" (temp), "=m" (*m), "=&r" (res) | |
414 | - : "r" (1UL << bit), "m" (*m) | |
395 | + : "=&r" (temp), "+m" (*m), "=&r" (res) | |
396 | + : "r" (1UL << bit) | |
415 | 397 | : "memory"); |
416 | 398 | #ifdef CONFIG_CPU_MIPSR2 |
417 | 399 | } else if (kernel_uses_llsc && __builtin_constant_p(nr)) { |
418 | 400 | unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG); |
419 | 401 | unsigned long temp; |
420 | 402 | |
421 | - __asm__ __volatile__( | |
422 | - "1: " __LL "%0, %1 # test_and_clear_bit \n" | |
423 | - " " __EXT "%2, %0, %3, 1 \n" | |
424 | - " " __INS "%0, $0, %3, 1 \n" | |
425 | - " " __SC "%0, %1 \n" | |
426 | - " beqz %0, 2f \n" | |
427 | - " .subsection 2 \n" | |
428 | - "2: b 1b \n" | |
429 | - " .previous \n" | |
430 | - : "=&r" (temp), "=m" (*m), "=&r" (res) | |
431 | - : "ir" (bit), "m" (*m) | |
432 | - : "memory"); | |
403 | + do { | |
404 | + __asm__ __volatile__( | |
405 | + " " __LL "%0, %1 # test_and_clear_bit \n" | |
406 | + " " __EXT "%2, %0, %3, 1 \n" | |
407 | + " " __INS "%0, $0, %3, 1 \n" | |
408 | + " " __SC "%0, %1 \n" | |
409 | + : "=&r" (temp), "+m" (*m), "=&r" (res) | |
410 | + : "ir" (bit) | |
411 | + : "memory"); | |
412 | + } while (unlikely(!temp)); | |
433 | 413 | #endif |
434 | 414 | } else if (kernel_uses_llsc) { |
435 | 415 | unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG); |
436 | 416 | unsigned long temp; |
437 | 417 | |
438 | - __asm__ __volatile__( | |
439 | - " .set push \n" | |
440 | - " .set noreorder \n" | |
441 | - " .set mips3 \n" | |
442 | - "1: " __LL "%0, %1 # test_and_clear_bit \n" | |
443 | - " or %2, %0, %3 \n" | |
444 | - " xor %2, %3 \n" | |
445 | - " " __SC "%2, %1 \n" | |
446 | - " beqz %2, 2f \n" | |
447 | - " and %2, %0, %3 \n" | |
448 | - " .subsection 2 \n" | |
449 | - "2: b 1b \n" | |
450 | - " nop \n" | |
451 | - " .previous \n" | |
452 | - " .set pop \n" | |
453 | - : "=&r" (temp), "=m" (*m), "=&r" (res) | |
454 | - : "r" (1UL << bit), "m" (*m) | |
455 | - : "memory"); | |
418 | + do { | |
419 | + __asm__ __volatile__( | |
420 | + " .set mips3 \n" | |
421 | + " " __LL "%0, %1 # test_and_clear_bit \n" | |
422 | + " or %2, %0, %3 \n" | |
423 | + " xor %2, %3 \n" | |
424 | + " " __SC "%2, %1 \n" | |
425 | + " .set mips0 \n" | |
426 | + : "=&r" (temp), "+m" (*m), "=&r" (res) | |
427 | + : "r" (1UL << bit) | |
428 | + : "memory"); | |
429 | + } while (unlikely(!res)); | |
430 | + | |
431 | + res = temp & (1UL << bit); | |
456 | 432 | } else { |
457 | 433 | volatile unsigned long *a = addr; |
458 | 434 | unsigned long mask; |
459 | 435 | |
... | ... | @@ -499,30 +475,26 @@ |
499 | 475 | " beqzl %2, 1b \n" |
500 | 476 | " and %2, %0, %3 \n" |
501 | 477 | " .set mips0 \n" |
502 | - : "=&r" (temp), "=m" (*m), "=&r" (res) | |
503 | - : "r" (1UL << bit), "m" (*m) | |
478 | + : "=&r" (temp), "+m" (*m), "=&r" (res) | |
479 | + : "r" (1UL << bit) | |
504 | 480 | : "memory"); |
505 | 481 | } else if (kernel_uses_llsc) { |
506 | 482 | unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG); |
507 | 483 | unsigned long temp; |
508 | 484 | |
509 | - __asm__ __volatile__( | |
510 | - " .set push \n" | |
511 | - " .set noreorder \n" | |
512 | - " .set mips3 \n" | |
513 | - "1: " __LL "%0, %1 # test_and_change_bit \n" | |
514 | - " xor %2, %0, %3 \n" | |
515 | - " " __SC "\t%2, %1 \n" | |
516 | - " beqz %2, 2f \n" | |
517 | - " and %2, %0, %3 \n" | |
518 | - " .subsection 2 \n" | |
519 | - "2: b 1b \n" | |
520 | - " nop \n" | |
521 | - " .previous \n" | |
522 | - " .set pop \n" | |
523 | - : "=&r" (temp), "=m" (*m), "=&r" (res) | |
524 | - : "r" (1UL << bit), "m" (*m) | |
525 | - : "memory"); | |
485 | + do { | |
486 | + __asm__ __volatile__( | |
487 | + " .set mips3 \n" | |
488 | + " " __LL "%0, %1 # test_and_change_bit \n" | |
489 | + " xor %2, %0, %3 \n" | |
490 | + " " __SC "\t%2, %1 \n" | |
491 | + " .set mips0 \n" | |
492 | + : "=&r" (temp), "+m" (*m), "=&r" (res) | |
493 | + : "r" (1UL << bit) | |
494 | + : "memory"); | |
495 | + } while (unlikely(!res)); | |
496 | + | |
497 | + res = temp & (1UL << bit); | |
526 | 498 | } else { |
527 | 499 | volatile unsigned long *a = addr; |
528 | 500 | unsigned long mask; |
arch/mips/include/asm/cmpxchg.h
... | ... | @@ -44,12 +44,9 @@ |
44 | 44 | " move $1, %z4 \n" \ |
45 | 45 | " .set mips3 \n" \ |
46 | 46 | " " st " $1, %1 \n" \ |
47 | - " beqz $1, 3f \n" \ | |
48 | - "2: \n" \ | |
49 | - " .subsection 2 \n" \ | |
50 | - "3: b 1b \n" \ | |
51 | - " .previous \n" \ | |
47 | + " beqz $1, 1b \n" \ | |
52 | 48 | " .set pop \n" \ |
49 | + "2: \n" \ | |
53 | 50 | : "=&r" (__ret), "=R" (*m) \ |
54 | 51 | : "R" (*m), "Jr" (old), "Jr" (new) \ |
55 | 52 | : "memory"); \ |
arch/mips/include/asm/system.h
... | ... | @@ -115,21 +115,19 @@ |
115 | 115 | } else if (kernel_uses_llsc) { |
116 | 116 | unsigned long dummy; |
117 | 117 | |
118 | - __asm__ __volatile__( | |
119 | - " .set mips3 \n" | |
120 | - "1: ll %0, %3 # xchg_u32 \n" | |
121 | - " .set mips0 \n" | |
122 | - " move %2, %z4 \n" | |
123 | - " .set mips3 \n" | |
124 | - " sc %2, %1 \n" | |
125 | - " beqz %2, 2f \n" | |
126 | - " .subsection 2 \n" | |
127 | - "2: b 1b \n" | |
128 | - " .previous \n" | |
129 | - " .set mips0 \n" | |
130 | - : "=&r" (retval), "=m" (*m), "=&r" (dummy) | |
131 | - : "R" (*m), "Jr" (val) | |
132 | - : "memory"); | |
118 | + do { | |
119 | + __asm__ __volatile__( | |
120 | + " .set mips3 \n" | |
121 | + " ll %0, %3 # xchg_u32 \n" | |
122 | + " .set mips0 \n" | |
123 | + " move %2, %z4 \n" | |
124 | + " .set mips3 \n" | |
125 | + " sc %2, %1 \n" | |
126 | + " .set mips0 \n" | |
127 | + : "=&r" (retval), "=m" (*m), "=&r" (dummy) | |
128 | + : "R" (*m), "Jr" (val) | |
129 | + : "memory"); | |
130 | + } while (unlikely(!dummy)); | |
133 | 131 | } else { |
134 | 132 | unsigned long flags; |
135 | 133 | |
... | ... | @@ -167,19 +165,17 @@ |
167 | 165 | } else if (kernel_uses_llsc) { |
168 | 166 | unsigned long dummy; |
169 | 167 | |
170 | - __asm__ __volatile__( | |
171 | - " .set mips3 \n" | |
172 | - "1: lld %0, %3 # xchg_u64 \n" | |
173 | - " move %2, %z4 \n" | |
174 | - " scd %2, %1 \n" | |
175 | - " beqz %2, 2f \n" | |
176 | - " .subsection 2 \n" | |
177 | - "2: b 1b \n" | |
178 | - " .previous \n" | |
179 | - " .set mips0 \n" | |
180 | - : "=&r" (retval), "=m" (*m), "=&r" (dummy) | |
181 | - : "R" (*m), "Jr" (val) | |
182 | - : "memory"); | |
168 | + do { | |
169 | + __asm__ __volatile__( | |
170 | + " .set mips3 \n" | |
171 | + " lld %0, %3 # xchg_u64 \n" | |
172 | + " move %2, %z4 \n" | |
173 | + " scd %2, %1 \n" | |
174 | + " .set mips0 \n" | |
175 | + : "=&r" (retval), "=m" (*m), "=&r" (dummy) | |
176 | + : "R" (*m), "Jr" (val) | |
177 | + : "memory"); | |
178 | + } while (unlikely(!dummy)); | |
183 | 179 | } else { |
184 | 180 | unsigned long flags; |
185 | 181 |