Commit 60815cf2e05057db5b78e398d9734c493560b11e

Authored by Linus Torvalds

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/borntraeger/linux

Pull ACCESS_ONCE cleanup preparation from Christian Borntraeger:
 "kernel: Provide READ_ONCE and ASSIGN_ONCE

  As discussed on LKML http://marc.info/?i=54611D86.4040306%40de.ibm.com
  ACCESS_ONCE might fail with specific compilers for non-scalar
  accesses.

  Here is a set of patches to tackle that problem.

  The first patch introduce READ_ONCE and ASSIGN_ONCE.  If the data
  structure is larger than the machine word size memcpy is used and a
  warning is emitted.  The next patches fix up several in-tree users of
  ACCESS_ONCE on non-scalar types.

  This does not yet contain a patch that forces ACCESS_ONCE to work only
  on scalar types.  This is targetted for the next merge window as Linux
  next already contains new offenders regarding ACCESS_ONCE vs.
  non-scalar types"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/borntraeger/linux:
  s390/kvm: REPLACE barrier fixup with READ_ONCE
  arm/spinlock: Replace ACCESS_ONCE with READ_ONCE
  arm64/spinlock: Replace ACCESS_ONCE READ_ONCE
  mips/gup: Replace ACCESS_ONCE with READ_ONCE
  x86/gup: Replace ACCESS_ONCE with READ_ONCE
  x86/spinlock: Replace ACCESS_ONCE with READ_ONCE
  mm: replace ACCESS_ONCE with READ_ONCE or barriers
  kernel: Provide READ_ONCE and ASSIGN_ONCE

Showing 10 changed files Side-by-side Diff

arch/arm/include/asm/spinlock.h
... ... @@ -120,12 +120,12 @@
120 120  
121 121 static inline int arch_spin_is_locked(arch_spinlock_t *lock)
122 122 {
123   - return !arch_spin_value_unlocked(ACCESS_ONCE(*lock));
  123 + return !arch_spin_value_unlocked(READ_ONCE(*lock));
124 124 }
125 125  
126 126 static inline int arch_spin_is_contended(arch_spinlock_t *lock)
127 127 {
128   - struct __raw_tickets tickets = ACCESS_ONCE(lock->tickets);
  128 + struct __raw_tickets tickets = READ_ONCE(lock->tickets);
129 129 return (tickets.next - tickets.owner) > 1;
130 130 }
131 131 #define arch_spin_is_contended arch_spin_is_contended
arch/arm64/include/asm/spinlock.h
... ... @@ -99,12 +99,12 @@
99 99  
100 100 static inline int arch_spin_is_locked(arch_spinlock_t *lock)
101 101 {
102   - return !arch_spin_value_unlocked(ACCESS_ONCE(*lock));
  102 + return !arch_spin_value_unlocked(READ_ONCE(*lock));
103 103 }
104 104  
105 105 static inline int arch_spin_is_contended(arch_spinlock_t *lock)
106 106 {
107   - arch_spinlock_t lockval = ACCESS_ONCE(*lock);
  107 + arch_spinlock_t lockval = READ_ONCE(*lock);
108 108 return (lockval.next - lockval.owner) > 1;
109 109 }
110 110 #define arch_spin_is_contended arch_spin_is_contended
... ... @@ -30,7 +30,7 @@
30 30  
31 31 return pte;
32 32 #else
33   - return ACCESS_ONCE(*ptep);
  33 + return READ_ONCE(*ptep);
34 34 #endif
35 35 }
36 36  
arch/s390/kvm/gaccess.c
... ... @@ -227,12 +227,10 @@
227 227 goto out;
228 228 ic = &vcpu->kvm->arch.sca->ipte_control;
229 229 do {
230   - old = *ic;
231   - barrier();
  230 + old = READ_ONCE(*ic);
232 231 while (old.k) {
233 232 cond_resched();
234   - old = *ic;
235   - barrier();
  233 + old = READ_ONCE(*ic);
236 234 }
237 235 new = old;
238 236 new.k = 1;
... ... @@ -251,8 +249,7 @@
251 249 goto out;
252 250 ic = &vcpu->kvm->arch.sca->ipte_control;
253 251 do {
254   - old = *ic;
255   - barrier();
  252 + old = READ_ONCE(*ic);
256 253 new = old;
257 254 new.k = 0;
258 255 } while (cmpxchg(&ic->val, old.val, new.val) != old.val);
259 256  
... ... @@ -267,12 +264,10 @@
267 264  
268 265 ic = &vcpu->kvm->arch.sca->ipte_control;
269 266 do {
270   - old = *ic;
271   - barrier();
  267 + old = READ_ONCE(*ic);
272 268 while (old.kg) {
273 269 cond_resched();
274   - old = *ic;
275   - barrier();
  270 + old = READ_ONCE(*ic);
276 271 }
277 272 new = old;
278 273 new.k = 1;
... ... @@ -286,8 +281,7 @@
286 281  
287 282 ic = &vcpu->kvm->arch.sca->ipte_control;
288 283 do {
289   - old = *ic;
290   - barrier();
  284 + old = READ_ONCE(*ic);
291 285 new = old;
292 286 new.kh--;
293 287 if (!new.kh)
arch/x86/include/asm/spinlock.h
... ... @@ -92,7 +92,7 @@
92 92 unsigned count = SPIN_THRESHOLD;
93 93  
94 94 do {
95   - if (ACCESS_ONCE(lock->tickets.head) == inc.tail)
  95 + if (READ_ONCE(lock->tickets.head) == inc.tail)
96 96 goto out;
97 97 cpu_relax();
98 98 } while (--count);
... ... @@ -105,7 +105,7 @@
105 105 {
106 106 arch_spinlock_t old, new;
107 107  
108   - old.tickets = ACCESS_ONCE(lock->tickets);
  108 + old.tickets = READ_ONCE(lock->tickets);
109 109 if (old.tickets.head != (old.tickets.tail & ~TICKET_SLOWPATH_FLAG))
110 110 return 0;
111 111  
112 112  
... ... @@ -162,14 +162,14 @@
162 162  
163 163 static inline int arch_spin_is_locked(arch_spinlock_t *lock)
164 164 {
165   - struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets);
  165 + struct __raw_tickets tmp = READ_ONCE(lock->tickets);
166 166  
167 167 return tmp.tail != tmp.head;
168 168 }
169 169  
170 170 static inline int arch_spin_is_contended(arch_spinlock_t *lock)
171 171 {
172   - struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets);
  172 + struct __raw_tickets tmp = READ_ONCE(lock->tickets);
173 173  
174 174 return (__ticket_t)(tmp.tail - tmp.head) > TICKET_LOCK_INC;
175 175 }
... ... @@ -15,7 +15,7 @@
15 15 static inline pte_t gup_get_pte(pte_t *ptep)
16 16 {
17 17 #ifndef CONFIG_X86_PAE
18   - return ACCESS_ONCE(*ptep);
  18 + return READ_ONCE(*ptep);
19 19 #else
20 20 /*
21 21 * With get_user_pages_fast, we walk down the pagetables without taking
include/linux/compiler.h
... ... @@ -186,6 +186,80 @@
186 186 # define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __LINE__)
187 187 #endif
188 188  
  189 +#include <uapi/linux/types.h>
  190 +
  191 +static __always_inline void data_access_exceeds_word_size(void)
  192 +#ifdef __compiletime_warning
  193 +__compiletime_warning("data access exceeds word size and won't be atomic")
  194 +#endif
  195 +;
  196 +
  197 +static __always_inline void data_access_exceeds_word_size(void)
  198 +{
  199 +}
  200 +
  201 +static __always_inline void __read_once_size(volatile void *p, void *res, int size)
  202 +{
  203 + switch (size) {
  204 + case 1: *(__u8 *)res = *(volatile __u8 *)p; break;
  205 + case 2: *(__u16 *)res = *(volatile __u16 *)p; break;
  206 + case 4: *(__u32 *)res = *(volatile __u32 *)p; break;
  207 +#ifdef CONFIG_64BIT
  208 + case 8: *(__u64 *)res = *(volatile __u64 *)p; break;
  209 +#endif
  210 + default:
  211 + barrier();
  212 + __builtin_memcpy((void *)res, (const void *)p, size);
  213 + data_access_exceeds_word_size();
  214 + barrier();
  215 + }
  216 +}
  217 +
  218 +static __always_inline void __assign_once_size(volatile void *p, void *res, int size)
  219 +{
  220 + switch (size) {
  221 + case 1: *(volatile __u8 *)p = *(__u8 *)res; break;
  222 + case 2: *(volatile __u16 *)p = *(__u16 *)res; break;
  223 + case 4: *(volatile __u32 *)p = *(__u32 *)res; break;
  224 +#ifdef CONFIG_64BIT
  225 + case 8: *(volatile __u64 *)p = *(__u64 *)res; break;
  226 +#endif
  227 + default:
  228 + barrier();
  229 + __builtin_memcpy((void *)p, (const void *)res, size);
  230 + data_access_exceeds_word_size();
  231 + barrier();
  232 + }
  233 +}
  234 +
  235 +/*
  236 + * Prevent the compiler from merging or refetching reads or writes. The
  237 + * compiler is also forbidden from reordering successive instances of
  238 + * READ_ONCE, ASSIGN_ONCE and ACCESS_ONCE (see below), but only when the
  239 + * compiler is aware of some particular ordering. One way to make the
  240 + * compiler aware of ordering is to put the two invocations of READ_ONCE,
  241 + * ASSIGN_ONCE or ACCESS_ONCE() in different C statements.
  242 + *
  243 + * In contrast to ACCESS_ONCE these two macros will also work on aggregate
  244 + * data types like structs or unions. If the size of the accessed data
  245 + * type exceeds the word size of the machine (e.g., 32 bits or 64 bits)
  246 + * READ_ONCE() and ASSIGN_ONCE() will fall back to memcpy and print a
  247 + * compile-time warning.
  248 + *
  249 + * Their two major use cases are: (1) Mediating communication between
  250 + * process-level code and irq/NMI handlers, all running on the same CPU,
  251 + * and (2) Ensuring that the compiler does not fold, spindle, or otherwise
  252 + * mutilate accesses that either do not require ordering or that interact
  253 + * with an explicit memory barrier or atomic instruction that provides the
  254 + * required ordering.
  255 + */
  256 +
  257 +#define READ_ONCE(x) \
  258 + ({ typeof(x) __val; __read_once_size(&x, &__val, sizeof(__val)); __val; })
  259 +
  260 +#define ASSIGN_ONCE(val, x) \
  261 + ({ typeof(x) __val; __val = val; __assign_once_size(&x, &__val, sizeof(__val)); __val; })
  262 +
189 263 #endif /* __KERNEL__ */
190 264  
191 265 #endif /* __ASSEMBLY__ */
... ... @@ -968,7 +968,7 @@
968 968  
969 969 pudp = pud_offset(&pgd, addr);
970 970 do {
971   - pud_t pud = ACCESS_ONCE(*pudp);
  971 + pud_t pud = READ_ONCE(*pudp);
972 972  
973 973 next = pud_addr_end(addr, end);
974 974 if (pud_none(pud))
... ... @@ -3195,7 +3195,16 @@
3195 3195 pte_t entry;
3196 3196 spinlock_t *ptl;
3197 3197  
3198   - entry = ACCESS_ONCE(*pte);
  3198 + /*
  3199 + * some architectures can have larger ptes than wordsize,
  3200 + * e.g.ppc44x-defconfig has CONFIG_PTE_64BIT=y and CONFIG_32BIT=y,
  3201 + * so READ_ONCE or ACCESS_ONCE cannot guarantee atomic accesses.
  3202 + * The code below just needs a consistent view for the ifs and
  3203 + * we later double check anyway with the ptl lock held. So here
  3204 + * a barrier will do.
  3205 + */
  3206 + entry = *pte;
  3207 + barrier();
3199 3208 if (!pte_present(entry)) {
3200 3209 if (pte_none(entry)) {
3201 3210 if (vma->vm_ops) {
... ... @@ -583,7 +583,8 @@
583 583 * without holding anon_vma lock for write. So when looking for a
584 584 * genuine pmde (in which to find pte), test present and !THP together.
585 585 */
586   - pmde = ACCESS_ONCE(*pmd);
  586 + pmde = *pmd;
  587 + barrier();
587 588 if (!pmd_present(pmde) || pmd_trans_huge(pmde))
588 589 pmd = NULL;
589 590 out: