Commit a6b9b4d50f492630443b38404d1f436b3b748c14
Exists in
master
and in
7 other branches
Merge branch 'rcu/next' of git://git.kernel.org/pub/scm/linux/kernel/git/paulmck…
…/linux-2.6-rcu into core/rcu
Showing 54 changed files Side-by-side Diff
- Documentation/DocBook/kernel-locking.tmpl
- Documentation/RCU/checklist.txt
- drivers/input/evdev.c
- drivers/vhost/net.c
- drivers/vhost/vhost.c
- drivers/vhost/vhost.h
- include/linux/cgroup.h
- include/linux/compiler.h
- include/linux/cred.h
- include/linux/fdtable.h
- include/linux/fs.h
- include/linux/genhd.h
- include/linux/hardirq.h
- include/linux/idr.h
- include/linux/init_task.h
- include/linux/input.h
- include/linux/iocontext.h
- include/linux/key.h
- include/linux/kvm_host.h
- include/linux/mm_types.h
- include/linux/nfs_fs.h
- include/linux/notifier.h
- include/linux/radix-tree.h
- include/linux/rculist.h
- include/linux/rculist_nulls.h
- include/linux/rcupdate.h
- include/linux/rcutiny.h
- include/linux/rcutree.h
- include/linux/sched.h
- include/linux/srcu.h
- include/linux/sunrpc/auth_gss.h
- include/net/cls_cgroup.h
- include/net/netfilter/nf_conntrack.h
- init/Kconfig
- kernel/Makefile
- kernel/cgroup.c
- kernel/pid.c
- kernel/rcupdate.c
- kernel/rcutiny.c
- kernel/rcutiny_plugin.h
- kernel/rcutorture.c
- kernel/rcutree.c
- kernel/rcutree.h
- kernel/rcutree_plugin.h
- kernel/rcutree_trace.c
- lib/Kconfig.debug
- lib/radix-tree.c
- net/ipv4/netfilter/nf_nat_core.c
- net/netfilter/core.c
- net/netfilter/nf_conntrack_ecache.c
- net/netfilter/nf_conntrack_extend.c
- net/netfilter/nf_conntrack_proto.c
- net/netfilter/nf_log.c
- net/netfilter/nf_queue.c
Documentation/DocBook/kernel-locking.tmpl
... | ... | @@ -1645,7 +1645,9 @@ |
1645 | 1645 | all the readers who were traversing the list when we deleted the |
1646 | 1646 | element are finished. We use <function>call_rcu()</function> to |
1647 | 1647 | register a callback which will actually destroy the object once |
1648 | - the readers are finished. | |
1648 | + all pre-existing readers are finished. Alternatively, | |
1649 | + <function>synchronize_rcu()</function> may be used to block until | |
1650 | + all pre-existing are finished. | |
1649 | 1651 | </para> |
1650 | 1652 | <para> |
1651 | 1653 | But how does Read Copy Update know when the readers are |
... | ... | @@ -1714,7 +1716,7 @@ |
1714 | 1716 | - object_put(obj); |
1715 | 1717 | + list_del_rcu(&obj->list); |
1716 | 1718 | cache_num--; |
1717 | -+ call_rcu(&obj->rcu, cache_delete_rcu, obj); | |
1719 | ++ call_rcu(&obj->rcu, cache_delete_rcu); | |
1718 | 1720 | } |
1719 | 1721 | |
1720 | 1722 | /* Must be holding cache_lock */ |
... | ... | @@ -1725,14 +1727,6 @@ |
1725 | 1727 | if (++cache_num > MAX_CACHE_SIZE) { |
1726 | 1728 | struct object *i, *outcast = NULL; |
1727 | 1729 | list_for_each_entry(i, &cache, list) { |
1728 | -@@ -85,6 +94,7 @@ | |
1729 | - obj->popularity = 0; | |
1730 | - atomic_set(&obj->refcnt, 1); /* The cache holds a reference */ | |
1731 | - spin_lock_init(&obj->lock); | |
1732 | -+ INIT_RCU_HEAD(&obj->rcu); | |
1733 | - | |
1734 | - spin_lock_irqsave(&cache_lock, flags); | |
1735 | - __cache_add(obj); | |
1736 | 1730 | @@ -104,12 +114,11 @@ |
1737 | 1731 | struct object *cache_find(int id) |
1738 | 1732 | { |
Documentation/RCU/checklist.txt
... | ... | @@ -218,13 +218,22 @@ |
218 | 218 | include: |
219 | 219 | |
220 | 220 | a. Keeping a count of the number of data-structure elements |
221 | - used by the RCU-protected data structure, including those | |
222 | - waiting for a grace period to elapse. Enforce a limit | |
223 | - on this number, stalling updates as needed to allow | |
224 | - previously deferred frees to complete. | |
221 | + used by the RCU-protected data structure, including | |
222 | + those waiting for a grace period to elapse. Enforce a | |
223 | + limit on this number, stalling updates as needed to allow | |
224 | + previously deferred frees to complete. Alternatively, | |
225 | + limit only the number awaiting deferred free rather than | |
226 | + the total number of elements. | |
225 | 227 | |
226 | - Alternatively, limit only the number awaiting deferred | |
227 | - free rather than the total number of elements. | |
228 | + One way to stall the updates is to acquire the update-side | |
229 | + mutex. (Don't try this with a spinlock -- other CPUs | |
230 | + spinning on the lock could prevent the grace period | |
231 | + from ever ending.) Another way to stall the updates | |
232 | + is for the updates to use a wrapper function around | |
233 | + the memory allocator, so that this wrapper function | |
234 | + simulates OOM when there is too much memory awaiting an | |
235 | + RCU grace period. There are of course many other | |
236 | + variations on this theme. | |
228 | 237 | |
229 | 238 | b. Limiting update rate. For example, if updates occur only |
230 | 239 | once per hour, then no explicit rate limiting is required, |
... | ... | @@ -365,4 +374,27 @@ |
365 | 374 | and the compiler to freely reorder code into and out of RCU |
366 | 375 | read-side critical sections. It is the responsibility of the |
367 | 376 | RCU update-side primitives to deal with this. |
377 | + | |
378 | +17. Use CONFIG_PROVE_RCU, CONFIG_DEBUG_OBJECTS_RCU_HEAD, and | |
379 | + the __rcu sparse checks to validate your RCU code. These | |
380 | + can help find problems as follows: | |
381 | + | |
382 | + CONFIG_PROVE_RCU: check that accesses to RCU-protected data | |
383 | + structures are carried out under the proper RCU | |
384 | + read-side critical section, while holding the right | |
385 | + combination of locks, or whatever other conditions | |
386 | + are appropriate. | |
387 | + | |
388 | + CONFIG_DEBUG_OBJECTS_RCU_HEAD: check that you don't pass the | |
389 | + same object to call_rcu() (or friends) before an RCU | |
390 | + grace period has elapsed since the last time that you | |
391 | + passed that same object to call_rcu() (or friends). | |
392 | + | |
393 | + __rcu sparse checks: tag the pointer to the RCU-protected data | |
394 | + structure with __rcu, and sparse will warn you if you | |
395 | + access that pointer without the services of one of the | |
396 | + variants of rcu_dereference(). | |
397 | + | |
398 | + These debugging aids can help you find problems that are | |
399 | + otherwise extremely difficult to spot. |
drivers/input/evdev.c
drivers/vhost/net.c
... | ... | @@ -127,7 +127,10 @@ |
127 | 127 | size_t len, total_len = 0; |
128 | 128 | int err, wmem; |
129 | 129 | size_t hdr_size; |
130 | - struct socket *sock = rcu_dereference(vq->private_data); | |
130 | + struct socket *sock; | |
131 | + | |
132 | + sock = rcu_dereference_check(vq->private_data, | |
133 | + lockdep_is_held(&vq->mutex)); | |
131 | 134 | if (!sock) |
132 | 135 | return; |
133 | 136 | |
... | ... | @@ -582,7 +585,10 @@ |
582 | 585 | static void vhost_net_enable_vq(struct vhost_net *n, |
583 | 586 | struct vhost_virtqueue *vq) |
584 | 587 | { |
585 | - struct socket *sock = vq->private_data; | |
588 | + struct socket *sock; | |
589 | + | |
590 | + sock = rcu_dereference_protected(vq->private_data, | |
591 | + lockdep_is_held(&vq->mutex)); | |
586 | 592 | if (!sock) |
587 | 593 | return; |
588 | 594 | if (vq == n->vqs + VHOST_NET_VQ_TX) { |
... | ... | @@ -598,7 +604,8 @@ |
598 | 604 | struct socket *sock; |
599 | 605 | |
600 | 606 | mutex_lock(&vq->mutex); |
601 | - sock = vq->private_data; | |
607 | + sock = rcu_dereference_protected(vq->private_data, | |
608 | + lockdep_is_held(&vq->mutex)); | |
602 | 609 | vhost_net_disable_vq(n, vq); |
603 | 610 | rcu_assign_pointer(vq->private_data, NULL); |
604 | 611 | mutex_unlock(&vq->mutex); |
... | ... | @@ -736,7 +743,8 @@ |
736 | 743 | } |
737 | 744 | |
738 | 745 | /* start polling new socket */ |
739 | - oldsock = vq->private_data; | |
746 | + oldsock = rcu_dereference_protected(vq->private_data, | |
747 | + lockdep_is_held(&vq->mutex)); | |
740 | 748 | if (sock != oldsock) { |
741 | 749 | vhost_net_disable_vq(n, vq); |
742 | 750 | rcu_assign_pointer(vq->private_data, sock); |
drivers/vhost/vhost.c
... | ... | @@ -284,7 +284,7 @@ |
284 | 284 | vhost_dev_cleanup(dev); |
285 | 285 | |
286 | 286 | memory->nregions = 0; |
287 | - dev->memory = memory; | |
287 | + RCU_INIT_POINTER(dev->memory, memory); | |
288 | 288 | return 0; |
289 | 289 | } |
290 | 290 | |
... | ... | @@ -316,8 +316,9 @@ |
316 | 316 | fput(dev->log_file); |
317 | 317 | dev->log_file = NULL; |
318 | 318 | /* No one will access memory at this point */ |
319 | - kfree(dev->memory); | |
320 | - dev->memory = NULL; | |
319 | + kfree(rcu_dereference_protected(dev->memory, | |
320 | + lockdep_is_held(&dev->mutex))); | |
321 | + RCU_INIT_POINTER(dev->memory, NULL); | |
321 | 322 | if (dev->mm) |
322 | 323 | mmput(dev->mm); |
323 | 324 | dev->mm = NULL; |
324 | 325 | |
... | ... | @@ -401,14 +402,22 @@ |
401 | 402 | /* Caller should have device mutex but not vq mutex */ |
402 | 403 | int vhost_log_access_ok(struct vhost_dev *dev) |
403 | 404 | { |
404 | - return memory_access_ok(dev, dev->memory, 1); | |
405 | + struct vhost_memory *mp; | |
406 | + | |
407 | + mp = rcu_dereference_protected(dev->memory, | |
408 | + lockdep_is_held(&dev->mutex)); | |
409 | + return memory_access_ok(dev, mp, 1); | |
405 | 410 | } |
406 | 411 | |
407 | 412 | /* Verify access for write logging. */ |
408 | 413 | /* Caller should have vq mutex and device mutex */ |
409 | 414 | static int vq_log_access_ok(struct vhost_virtqueue *vq, void __user *log_base) |
410 | 415 | { |
411 | - return vq_memory_access_ok(log_base, vq->dev->memory, | |
416 | + struct vhost_memory *mp; | |
417 | + | |
418 | + mp = rcu_dereference_protected(vq->dev->memory, | |
419 | + lockdep_is_held(&vq->mutex)); | |
420 | + return vq_memory_access_ok(log_base, mp, | |
412 | 421 | vhost_has_feature(vq->dev, VHOST_F_LOG_ALL)) && |
413 | 422 | (!vq->log_used || log_access_ok(log_base, vq->log_addr, |
414 | 423 | sizeof *vq->used + |
... | ... | @@ -448,7 +457,8 @@ |
448 | 457 | kfree(newmem); |
449 | 458 | return -EFAULT; |
450 | 459 | } |
451 | - oldmem = d->memory; | |
460 | + oldmem = rcu_dereference_protected(d->memory, | |
461 | + lockdep_is_held(&d->mutex)); | |
452 | 462 | rcu_assign_pointer(d->memory, newmem); |
453 | 463 | synchronize_rcu(); |
454 | 464 | kfree(oldmem); |
drivers/vhost/vhost.h
... | ... | @@ -106,7 +106,7 @@ |
106 | 106 | * vhost_work execution acts instead of rcu_read_lock() and the end of |
107 | 107 | * vhost_work execution acts instead of rcu_read_lock(). |
108 | 108 | * Writers use virtqueue mutex. */ |
109 | - void *private_data; | |
109 | + void __rcu *private_data; | |
110 | 110 | /* Log write descriptors */ |
111 | 111 | void __user *log_base; |
112 | 112 | struct vhost_log log[VHOST_NET_MAX_SG]; |
... | ... | @@ -116,7 +116,7 @@ |
116 | 116 | /* Readers use RCU to access memory table pointer |
117 | 117 | * log base pointer and features. |
118 | 118 | * Writers use mutex below.*/ |
119 | - struct vhost_memory *memory; | |
119 | + struct vhost_memory __rcu *memory; | |
120 | 120 | struct mm_struct *mm; |
121 | 121 | struct mutex mutex; |
122 | 122 | unsigned acked_features; |
... | ... | @@ -173,7 +173,11 @@ |
173 | 173 | |
174 | 174 | static inline int vhost_has_feature(struct vhost_dev *dev, int bit) |
175 | 175 | { |
176 | - unsigned acked_features = rcu_dereference(dev->acked_features); | |
176 | + unsigned acked_features; | |
177 | + | |
178 | + acked_features = | |
179 | + rcu_dereference_index_check(dev->acked_features, | |
180 | + lockdep_is_held(&dev->mutex)); | |
177 | 181 | return acked_features & (1 << bit); |
178 | 182 | } |
179 | 183 |
include/linux/cgroup.h
... | ... | @@ -75,7 +75,7 @@ |
75 | 75 | |
76 | 76 | unsigned long flags; |
77 | 77 | /* ID for this css, if possible */ |
78 | - struct css_id *id; | |
78 | + struct css_id __rcu *id; | |
79 | 79 | }; |
80 | 80 | |
81 | 81 | /* bits in struct cgroup_subsys_state flags field */ |
... | ... | @@ -205,7 +205,7 @@ |
205 | 205 | struct list_head children; /* my children */ |
206 | 206 | |
207 | 207 | struct cgroup *parent; /* my parent */ |
208 | - struct dentry *dentry; /* cgroup fs entry, RCU protected */ | |
208 | + struct dentry __rcu *dentry; /* cgroup fs entry, RCU protected */ | |
209 | 209 | |
210 | 210 | /* Private pointers for each registered subsystem */ |
211 | 211 | struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; |
include/linux/compiler.h
... | ... | @@ -16,7 +16,11 @@ |
16 | 16 | # define __release(x) __context__(x,-1) |
17 | 17 | # define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0) |
18 | 18 | # define __percpu __attribute__((noderef, address_space(3))) |
19 | +#ifdef CONFIG_SPARSE_RCU_POINTER | |
20 | +# define __rcu __attribute__((noderef, address_space(4))) | |
21 | +#else | |
19 | 22 | # define __rcu |
23 | +#endif | |
20 | 24 | extern void __chk_user_ptr(const volatile void __user *); |
21 | 25 | extern void __chk_io_ptr(const volatile void __iomem *); |
22 | 26 | #else |
include/linux/cred.h
... | ... | @@ -84,7 +84,7 @@ |
84 | 84 | atomic_t usage; |
85 | 85 | pid_t tgid; /* thread group process ID */ |
86 | 86 | spinlock_t lock; |
87 | - struct key *session_keyring; /* keyring inherited over fork */ | |
87 | + struct key __rcu *session_keyring; /* keyring inherited over fork */ | |
88 | 88 | struct key *process_keyring; /* keyring private to this process */ |
89 | 89 | struct rcu_head rcu; /* RCU deletion hook */ |
90 | 90 | }; |
include/linux/fdtable.h
... | ... | @@ -31,7 +31,7 @@ |
31 | 31 | |
32 | 32 | struct fdtable { |
33 | 33 | unsigned int max_fds; |
34 | - struct file ** fd; /* current fd array */ | |
34 | + struct file __rcu **fd; /* current fd array */ | |
35 | 35 | fd_set *close_on_exec; |
36 | 36 | fd_set *open_fds; |
37 | 37 | struct rcu_head rcu; |
... | ... | @@ -46,7 +46,7 @@ |
46 | 46 | * read mostly part |
47 | 47 | */ |
48 | 48 | atomic_t count; |
49 | - struct fdtable *fdt; | |
49 | + struct fdtable __rcu *fdt; | |
50 | 50 | struct fdtable fdtab; |
51 | 51 | /* |
52 | 52 | * written part on a separate cache line in SMP |
... | ... | @@ -55,7 +55,7 @@ |
55 | 55 | int next_fd; |
56 | 56 | struct embedded_fd_set close_on_exec_init; |
57 | 57 | struct embedded_fd_set open_fds_init; |
58 | - struct file * fd_array[NR_OPEN_DEFAULT]; | |
58 | + struct file __rcu * fd_array[NR_OPEN_DEFAULT]; | |
59 | 59 | }; |
60 | 60 | |
61 | 61 | #define rcu_dereference_check_fdtable(files, fdtfd) \ |
include/linux/fs.h
include/linux/genhd.h
... | ... | @@ -129,8 +129,8 @@ |
129 | 129 | struct disk_part_tbl { |
130 | 130 | struct rcu_head rcu_head; |
131 | 131 | int len; |
132 | - struct hd_struct *last_lookup; | |
133 | - struct hd_struct *part[]; | |
132 | + struct hd_struct __rcu *last_lookup; | |
133 | + struct hd_struct __rcu *part[]; | |
134 | 134 | }; |
135 | 135 | |
136 | 136 | struct gendisk { |
... | ... | @@ -149,7 +149,7 @@ |
149 | 149 | * non-critical accesses use RCU. Always access through |
150 | 150 | * helpers. |
151 | 151 | */ |
152 | - struct disk_part_tbl *part_tbl; | |
152 | + struct disk_part_tbl __rcu *part_tbl; | |
153 | 153 | struct hd_struct part0; |
154 | 154 | |
155 | 155 | const struct block_device_operations *fops; |
include/linux/hardirq.h
include/linux/idr.h
... | ... | @@ -50,14 +50,14 @@ |
50 | 50 | |
51 | 51 | struct idr_layer { |
52 | 52 | unsigned long bitmap; /* A zero bit means "space here" */ |
53 | - struct idr_layer *ary[1<<IDR_BITS]; | |
53 | + struct idr_layer __rcu *ary[1<<IDR_BITS]; | |
54 | 54 | int count; /* When zero, we can release it */ |
55 | 55 | int layer; /* distance from leaf */ |
56 | 56 | struct rcu_head rcu_head; |
57 | 57 | }; |
58 | 58 | |
59 | 59 | struct idr { |
60 | - struct idr_layer *top; | |
60 | + struct idr_layer __rcu *top; | |
61 | 61 | struct idr_layer *id_free; |
62 | 62 | int layers; /* only valid without concurrent changes */ |
63 | 63 | int id_free_cnt; |
include/linux/init_task.h
... | ... | @@ -82,11 +82,17 @@ |
82 | 82 | # define CAP_INIT_BSET CAP_FULL_SET |
83 | 83 | |
84 | 84 | #ifdef CONFIG_TREE_PREEMPT_RCU |
85 | +#define INIT_TASK_RCU_TREE_PREEMPT() \ | |
86 | + .rcu_blocked_node = NULL, | |
87 | +#else | |
88 | +#define INIT_TASK_RCU_TREE_PREEMPT(tsk) | |
89 | +#endif | |
90 | +#ifdef CONFIG_PREEMPT_RCU | |
85 | 91 | #define INIT_TASK_RCU_PREEMPT(tsk) \ |
86 | 92 | .rcu_read_lock_nesting = 0, \ |
87 | 93 | .rcu_read_unlock_special = 0, \ |
88 | - .rcu_blocked_node = NULL, \ | |
89 | - .rcu_node_entry = LIST_HEAD_INIT(tsk.rcu_node_entry), | |
94 | + .rcu_node_entry = LIST_HEAD_INIT(tsk.rcu_node_entry), \ | |
95 | + INIT_TASK_RCU_TREE_PREEMPT() | |
90 | 96 | #else |
91 | 97 | #define INIT_TASK_RCU_PREEMPT(tsk) |
92 | 98 | #endif |
... | ... | @@ -137,8 +143,8 @@ |
137 | 143 | .children = LIST_HEAD_INIT(tsk.children), \ |
138 | 144 | .sibling = LIST_HEAD_INIT(tsk.sibling), \ |
139 | 145 | .group_leader = &tsk, \ |
140 | - .real_cred = &init_cred, \ | |
141 | - .cred = &init_cred, \ | |
146 | + RCU_INIT_POINTER(.real_cred, &init_cred), \ | |
147 | + RCU_INIT_POINTER(.cred, &init_cred), \ | |
142 | 148 | .cred_guard_mutex = \ |
143 | 149 | __MUTEX_INITIALIZER(tsk.cred_guard_mutex), \ |
144 | 150 | .comm = "swapper", \ |
include/linux/input.h
... | ... | @@ -1196,7 +1196,7 @@ |
1196 | 1196 | int (*flush)(struct input_dev *dev, struct file *file); |
1197 | 1197 | int (*event)(struct input_dev *dev, unsigned int type, unsigned int code, int value); |
1198 | 1198 | |
1199 | - struct input_handle *grab; | |
1199 | + struct input_handle __rcu *grab; | |
1200 | 1200 | |
1201 | 1201 | spinlock_t event_lock; |
1202 | 1202 | struct mutex mutex; |
include/linux/iocontext.h
include/linux/key.h
include/linux/kvm_host.h
... | ... | @@ -205,7 +205,7 @@ |
205 | 205 | |
206 | 206 | struct mutex irq_lock; |
207 | 207 | #ifdef CONFIG_HAVE_KVM_IRQCHIP |
208 | - struct kvm_irq_routing_table *irq_routing; | |
208 | + struct kvm_irq_routing_table __rcu *irq_routing; | |
209 | 209 | struct hlist_head mask_notifier_list; |
210 | 210 | struct hlist_head irq_ack_notifier_list; |
211 | 211 | #endif |
include/linux/mm_types.h
include/linux/nfs_fs.h
... | ... | @@ -185,7 +185,7 @@ |
185 | 185 | struct nfs4_cached_acl *nfs4_acl; |
186 | 186 | /* NFSv4 state */ |
187 | 187 | struct list_head open_states; |
188 | - struct nfs_delegation *delegation; | |
188 | + struct nfs_delegation __rcu *delegation; | |
189 | 189 | fmode_t delegation_state; |
190 | 190 | struct rw_semaphore rwsem; |
191 | 191 | #endif /* CONFIG_NFS_V4*/ |
include/linux/notifier.h
... | ... | @@ -49,28 +49,28 @@ |
49 | 49 | |
50 | 50 | struct notifier_block { |
51 | 51 | int (*notifier_call)(struct notifier_block *, unsigned long, void *); |
52 | - struct notifier_block *next; | |
52 | + struct notifier_block __rcu *next; | |
53 | 53 | int priority; |
54 | 54 | }; |
55 | 55 | |
56 | 56 | struct atomic_notifier_head { |
57 | 57 | spinlock_t lock; |
58 | - struct notifier_block *head; | |
58 | + struct notifier_block __rcu *head; | |
59 | 59 | }; |
60 | 60 | |
61 | 61 | struct blocking_notifier_head { |
62 | 62 | struct rw_semaphore rwsem; |
63 | - struct notifier_block *head; | |
63 | + struct notifier_block __rcu *head; | |
64 | 64 | }; |
65 | 65 | |
66 | 66 | struct raw_notifier_head { |
67 | - struct notifier_block *head; | |
67 | + struct notifier_block __rcu *head; | |
68 | 68 | }; |
69 | 69 | |
70 | 70 | struct srcu_notifier_head { |
71 | 71 | struct mutex mutex; |
72 | 72 | struct srcu_struct srcu; |
73 | - struct notifier_block *head; | |
73 | + struct notifier_block __rcu *head; | |
74 | 74 | }; |
75 | 75 | |
76 | 76 | #define ATOMIC_INIT_NOTIFIER_HEAD(name) do { \ |
include/linux/radix-tree.h
... | ... | @@ -47,6 +47,8 @@ |
47 | 47 | { |
48 | 48 | return (void *)((unsigned long)ptr & ~RADIX_TREE_INDIRECT_PTR); |
49 | 49 | } |
50 | +#define radix_tree_indirect_to_ptr(ptr) \ | |
51 | + radix_tree_indirect_to_ptr((void __force *)(ptr)) | |
50 | 52 | |
51 | 53 | static inline int radix_tree_is_indirect_ptr(void *ptr) |
52 | 54 | { |
... | ... | @@ -61,7 +63,7 @@ |
61 | 63 | struct radix_tree_root { |
62 | 64 | unsigned int height; |
63 | 65 | gfp_t gfp_mask; |
64 | - struct radix_tree_node *rnode; | |
66 | + struct radix_tree_node __rcu *rnode; | |
65 | 67 | }; |
66 | 68 | |
67 | 69 | #define RADIX_TREE_INIT(mask) { \ |
include/linux/rculist.h
... | ... | @@ -10,6 +10,21 @@ |
10 | 10 | #include <linux/rcupdate.h> |
11 | 11 | |
12 | 12 | /* |
13 | + * Why is there no list_empty_rcu()? Because list_empty() serves this | |
14 | + * purpose. The list_empty() function fetches the RCU-protected pointer | |
15 | + * and compares it to the address of the list head, but neither dereferences | |
16 | + * this pointer itself nor provides this pointer to the caller. Therefore, | |
17 | + * it is not necessary to use rcu_dereference(), so that list_empty() can | |
18 | + * be used anywhere you would want to use a list_empty_rcu(). | |
19 | + */ | |
20 | + | |
21 | +/* | |
22 | + * return the ->next pointer of a list_head in an rcu safe | |
23 | + * way, we must not access it directly | |
24 | + */ | |
25 | +#define list_next_rcu(list) (*((struct list_head __rcu **)(&(list)->next))) | |
26 | + | |
27 | +/* | |
13 | 28 | * Insert a new entry between two known consecutive entries. |
14 | 29 | * |
15 | 30 | * This is only for internal list manipulation where we know |
... | ... | @@ -20,7 +35,7 @@ |
20 | 35 | { |
21 | 36 | new->next = next; |
22 | 37 | new->prev = prev; |
23 | - rcu_assign_pointer(prev->next, new); | |
38 | + rcu_assign_pointer(list_next_rcu(prev), new); | |
24 | 39 | next->prev = new; |
25 | 40 | } |
26 | 41 | |
... | ... | @@ -138,7 +153,7 @@ |
138 | 153 | { |
139 | 154 | new->next = old->next; |
140 | 155 | new->prev = old->prev; |
141 | - rcu_assign_pointer(new->prev->next, new); | |
156 | + rcu_assign_pointer(list_next_rcu(new->prev), new); | |
142 | 157 | new->next->prev = new; |
143 | 158 | old->prev = LIST_POISON2; |
144 | 159 | } |
... | ... | @@ -193,7 +208,7 @@ |
193 | 208 | */ |
194 | 209 | |
195 | 210 | last->next = at; |
196 | - rcu_assign_pointer(head->next, first); | |
211 | + rcu_assign_pointer(list_next_rcu(head), first); | |
197 | 212 | first->prev = head; |
198 | 213 | at->prev = last; |
199 | 214 | } |
... | ... | @@ -208,7 +223,9 @@ |
208 | 223 | * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock(). |
209 | 224 | */ |
210 | 225 | #define list_entry_rcu(ptr, type, member) \ |
211 | - container_of(rcu_dereference_raw(ptr), type, member) | |
226 | + ({typeof (*ptr) __rcu *__ptr = (typeof (*ptr) __rcu __force *)ptr; \ | |
227 | + container_of((typeof(ptr))rcu_dereference_raw(__ptr), type, member); \ | |
228 | + }) | |
212 | 229 | |
213 | 230 | /** |
214 | 231 | * list_first_entry_rcu - get the first element from a list |
215 | 232 | |
... | ... | @@ -225,9 +242,9 @@ |
225 | 242 | list_entry_rcu((ptr)->next, type, member) |
226 | 243 | |
227 | 244 | #define __list_for_each_rcu(pos, head) \ |
228 | - for (pos = rcu_dereference_raw((head)->next); \ | |
245 | + for (pos = rcu_dereference_raw(list_next_rcu(head)); \ | |
229 | 246 | pos != (head); \ |
230 | - pos = rcu_dereference_raw(pos->next)) | |
247 | + pos = rcu_dereference_raw(list_next_rcu((pos))) | |
231 | 248 | |
232 | 249 | /** |
233 | 250 | * list_for_each_entry_rcu - iterate over rcu list of given type |
234 | 251 | |
... | ... | @@ -257,9 +274,9 @@ |
257 | 274 | * as long as the traversal is guarded by rcu_read_lock(). |
258 | 275 | */ |
259 | 276 | #define list_for_each_continue_rcu(pos, head) \ |
260 | - for ((pos) = rcu_dereference_raw((pos)->next); \ | |
277 | + for ((pos) = rcu_dereference_raw(list_next_rcu(pos)); \ | |
261 | 278 | prefetch((pos)->next), (pos) != (head); \ |
262 | - (pos) = rcu_dereference_raw((pos)->next)) | |
279 | + (pos) = rcu_dereference_raw(list_next_rcu(pos))) | |
263 | 280 | |
264 | 281 | /** |
265 | 282 | * list_for_each_entry_continue_rcu - continue iteration over list of given type |
266 | 283 | |
... | ... | @@ -314,12 +331,19 @@ |
314 | 331 | |
315 | 332 | new->next = next; |
316 | 333 | new->pprev = old->pprev; |
317 | - rcu_assign_pointer(*new->pprev, new); | |
334 | + rcu_assign_pointer(*(struct hlist_node __rcu **)new->pprev, new); | |
318 | 335 | if (next) |
319 | 336 | new->next->pprev = &new->next; |
320 | 337 | old->pprev = LIST_POISON2; |
321 | 338 | } |
322 | 339 | |
340 | +/* | |
341 | + * return the first or the next element in an RCU protected hlist | |
342 | + */ | |
343 | +#define hlist_first_rcu(head) (*((struct hlist_node __rcu **)(&(head)->first))) | |
344 | +#define hlist_next_rcu(node) (*((struct hlist_node __rcu **)(&(node)->next))) | |
345 | +#define hlist_pprev_rcu(node) (*((struct hlist_node __rcu **)((node)->pprev))) | |
346 | + | |
323 | 347 | /** |
324 | 348 | * hlist_add_head_rcu |
325 | 349 | * @n: the element to add to the hash list. |
... | ... | @@ -346,7 +370,7 @@ |
346 | 370 | |
347 | 371 | n->next = first; |
348 | 372 | n->pprev = &h->first; |
349 | - rcu_assign_pointer(h->first, n); | |
373 | + rcu_assign_pointer(hlist_first_rcu(h), n); | |
350 | 374 | if (first) |
351 | 375 | first->pprev = &n->next; |
352 | 376 | } |
... | ... | @@ -374,7 +398,7 @@ |
374 | 398 | { |
375 | 399 | n->pprev = next->pprev; |
376 | 400 | n->next = next; |
377 | - rcu_assign_pointer(*(n->pprev), n); | |
401 | + rcu_assign_pointer(hlist_pprev_rcu(n), n); | |
378 | 402 | next->pprev = &n->next; |
379 | 403 | } |
380 | 404 | |
381 | 405 | |
... | ... | @@ -401,15 +425,15 @@ |
401 | 425 | { |
402 | 426 | n->next = prev->next; |
403 | 427 | n->pprev = &prev->next; |
404 | - rcu_assign_pointer(prev->next, n); | |
428 | + rcu_assign_pointer(hlist_next_rcu(prev), n); | |
405 | 429 | if (n->next) |
406 | 430 | n->next->pprev = &n->next; |
407 | 431 | } |
408 | 432 | |
409 | -#define __hlist_for_each_rcu(pos, head) \ | |
410 | - for (pos = rcu_dereference((head)->first); \ | |
411 | - pos && ({ prefetch(pos->next); 1; }); \ | |
412 | - pos = rcu_dereference(pos->next)) | |
433 | +#define __hlist_for_each_rcu(pos, head) \ | |
434 | + for (pos = rcu_dereference(hlist_first_rcu(head)); \ | |
435 | + pos && ({ prefetch(pos->next); 1; }); \ | |
436 | + pos = rcu_dereference(hlist_next_rcu(pos))) | |
413 | 437 | |
414 | 438 | /** |
415 | 439 | * hlist_for_each_entry_rcu - iterate over rcu list of given type |
416 | 440 | |
... | ... | @@ -422,11 +446,11 @@ |
422 | 446 | * the _rcu list-mutation primitives such as hlist_add_head_rcu() |
423 | 447 | * as long as the traversal is guarded by rcu_read_lock(). |
424 | 448 | */ |
425 | -#define hlist_for_each_entry_rcu(tpos, pos, head, member) \ | |
426 | - for (pos = rcu_dereference_raw((head)->first); \ | |
449 | +#define hlist_for_each_entry_rcu(tpos, pos, head, member) \ | |
450 | + for (pos = rcu_dereference_raw(hlist_first_rcu(head)); \ | |
427 | 451 | pos && ({ prefetch(pos->next); 1; }) && \ |
428 | 452 | ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ |
429 | - pos = rcu_dereference_raw(pos->next)) | |
453 | + pos = rcu_dereference_raw(hlist_next_rcu(pos))) | |
430 | 454 | |
431 | 455 | /** |
432 | 456 | * hlist_for_each_entry_rcu_bh - iterate over rcu list of given type |
include/linux/rculist_nulls.h
... | ... | @@ -37,6 +37,12 @@ |
37 | 37 | } |
38 | 38 | } |
39 | 39 | |
40 | +#define hlist_nulls_first_rcu(head) \ | |
41 | + (*((struct hlist_nulls_node __rcu __force **)&(head)->first)) | |
42 | + | |
43 | +#define hlist_nulls_next_rcu(node) \ | |
44 | + (*((struct hlist_nulls_node __rcu __force **)&(node)->next)) | |
45 | + | |
40 | 46 | /** |
41 | 47 | * hlist_nulls_del_rcu - deletes entry from hash list without re-initialization |
42 | 48 | * @n: the element to delete from the hash list. |
... | ... | @@ -88,7 +94,7 @@ |
88 | 94 | |
89 | 95 | n->next = first; |
90 | 96 | n->pprev = &h->first; |
91 | - rcu_assign_pointer(h->first, n); | |
97 | + rcu_assign_pointer(hlist_nulls_first_rcu(h), n); | |
92 | 98 | if (!is_a_nulls(first)) |
93 | 99 | first->pprev = &n->next; |
94 | 100 | } |
95 | 101 | |
... | ... | @@ -100,11 +106,11 @@ |
100 | 106 | * @member: the name of the hlist_nulls_node within the struct. |
101 | 107 | * |
102 | 108 | */ |
103 | -#define hlist_nulls_for_each_entry_rcu(tpos, pos, head, member) \ | |
104 | - for (pos = rcu_dereference_raw((head)->first); \ | |
105 | - (!is_a_nulls(pos)) && \ | |
109 | +#define hlist_nulls_for_each_entry_rcu(tpos, pos, head, member) \ | |
110 | + for (pos = rcu_dereference_raw(hlist_nulls_first_rcu(head)); \ | |
111 | + (!is_a_nulls(pos)) && \ | |
106 | 112 | ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1; }); \ |
107 | - pos = rcu_dereference_raw(pos->next)) | |
113 | + pos = rcu_dereference_raw(hlist_nulls_next_rcu(pos))) | |
108 | 114 | |
109 | 115 | #endif |
110 | 116 | #endif |
include/linux/rcupdate.h
... | ... | @@ -41,11 +41,15 @@ |
41 | 41 | #include <linux/lockdep.h> |
42 | 42 | #include <linux/completion.h> |
43 | 43 | #include <linux/debugobjects.h> |
44 | +#include <linux/compiler.h> | |
44 | 45 | |
45 | 46 | #ifdef CONFIG_RCU_TORTURE_TEST |
46 | 47 | extern int rcutorture_runnable; /* for sysctl */ |
47 | 48 | #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */ |
48 | 49 | |
50 | +#define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) | |
51 | +#define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b)) | |
52 | + | |
49 | 53 | /** |
50 | 54 | * struct rcu_head - callback structure for use with RCU |
51 | 55 | * @next: next update requests in a list |
52 | 56 | |
53 | 57 | |
54 | 58 | |
55 | 59 | |
56 | 60 | |
... | ... | @@ -57,29 +61,94 @@ |
57 | 61 | }; |
58 | 62 | |
59 | 63 | /* Exported common interfaces */ |
60 | -extern void rcu_barrier(void); | |
64 | +extern void call_rcu_sched(struct rcu_head *head, | |
65 | + void (*func)(struct rcu_head *rcu)); | |
66 | +extern void synchronize_sched(void); | |
61 | 67 | extern void rcu_barrier_bh(void); |
62 | 68 | extern void rcu_barrier_sched(void); |
63 | 69 | extern void synchronize_sched_expedited(void); |
64 | 70 | extern int sched_expedited_torture_stats(char *page); |
65 | 71 | |
72 | +static inline void __rcu_read_lock_bh(void) | |
73 | +{ | |
74 | + local_bh_disable(); | |
75 | +} | |
76 | + | |
77 | +static inline void __rcu_read_unlock_bh(void) | |
78 | +{ | |
79 | + local_bh_enable(); | |
80 | +} | |
81 | + | |
82 | +#ifdef CONFIG_PREEMPT_RCU | |
83 | + | |
84 | +extern void __rcu_read_lock(void); | |
85 | +extern void __rcu_read_unlock(void); | |
86 | +void synchronize_rcu(void); | |
87 | + | |
88 | +/* | |
89 | + * Defined as a macro as it is a very low level header included from | |
90 | + * areas that don't even know about current. This gives the rcu_read_lock() | |
91 | + * nesting depth, but makes sense only if CONFIG_PREEMPT_RCU -- in other | |
92 | + * types of kernel builds, the rcu_read_lock() nesting depth is unknowable. | |
93 | + */ | |
94 | +#define rcu_preempt_depth() (current->rcu_read_lock_nesting) | |
95 | + | |
96 | +#else /* #ifdef CONFIG_PREEMPT_RCU */ | |
97 | + | |
98 | +static inline void __rcu_read_lock(void) | |
99 | +{ | |
100 | + preempt_disable(); | |
101 | +} | |
102 | + | |
103 | +static inline void __rcu_read_unlock(void) | |
104 | +{ | |
105 | + preempt_enable(); | |
106 | +} | |
107 | + | |
108 | +static inline void synchronize_rcu(void) | |
109 | +{ | |
110 | + synchronize_sched(); | |
111 | +} | |
112 | + | |
113 | +static inline int rcu_preempt_depth(void) | |
114 | +{ | |
115 | + return 0; | |
116 | +} | |
117 | + | |
118 | +#endif /* #else #ifdef CONFIG_PREEMPT_RCU */ | |
119 | + | |
66 | 120 | /* Internal to kernel */ |
67 | 121 | extern void rcu_init(void); |
122 | +extern void rcu_sched_qs(int cpu); | |
123 | +extern void rcu_bh_qs(int cpu); | |
124 | +extern void rcu_check_callbacks(int cpu, int user); | |
125 | +struct notifier_block; | |
68 | 126 | |
127 | +#ifdef CONFIG_NO_HZ | |
128 | + | |
129 | +extern void rcu_enter_nohz(void); | |
130 | +extern void rcu_exit_nohz(void); | |
131 | + | |
132 | +#else /* #ifdef CONFIG_NO_HZ */ | |
133 | + | |
134 | +static inline void rcu_enter_nohz(void) | |
135 | +{ | |
136 | +} | |
137 | + | |
138 | +static inline void rcu_exit_nohz(void) | |
139 | +{ | |
140 | +} | |
141 | + | |
142 | +#endif /* #else #ifdef CONFIG_NO_HZ */ | |
143 | + | |
69 | 144 | #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) |
70 | 145 | #include <linux/rcutree.h> |
71 | -#elif defined(CONFIG_TINY_RCU) | |
146 | +#elif defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU) | |
72 | 147 | #include <linux/rcutiny.h> |
73 | 148 | #else |
74 | 149 | #error "Unknown RCU implementation specified to kernel configuration" |
75 | 150 | #endif |
76 | 151 | |
77 | -#define RCU_HEAD_INIT { .next = NULL, .func = NULL } | |
78 | -#define RCU_HEAD(head) struct rcu_head head = RCU_HEAD_INIT | |
79 | -#define INIT_RCU_HEAD(ptr) do { \ | |
80 | - (ptr)->next = NULL; (ptr)->func = NULL; \ | |
81 | -} while (0) | |
82 | - | |
83 | 152 | /* |
84 | 153 | * init_rcu_head_on_stack()/destroy_rcu_head_on_stack() are needed for dynamic |
85 | 154 | * initialization and destruction of rcu_head on the stack. rcu_head structures |
86 | 155 | |
87 | 156 | |
... | ... | @@ -120,14 +189,15 @@ |
120 | 189 | extern int debug_lockdep_rcu_enabled(void); |
121 | 190 | |
122 | 191 | /** |
123 | - * rcu_read_lock_held - might we be in RCU read-side critical section? | |
192 | + * rcu_read_lock_held() - might we be in RCU read-side critical section? | |
124 | 193 | * |
125 | 194 | * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an RCU |
126 | 195 | * read-side critical section. In absence of CONFIG_DEBUG_LOCK_ALLOC, |
127 | 196 | * this assumes we are in an RCU read-side critical section unless it can |
128 | - * prove otherwise. | |
197 | + * prove otherwise. This is useful for debug checks in functions that | |
198 | + * require that they be called within an RCU read-side critical section. | |
129 | 199 | * |
130 | - * Check debug_lockdep_rcu_enabled() to prevent false positives during boot | |
200 | + * Checks debug_lockdep_rcu_enabled() to prevent false positives during boot | |
131 | 201 | * and while lockdep is disabled. |
132 | 202 | */ |
133 | 203 | static inline int rcu_read_lock_held(void) |
134 | 204 | |
... | ... | @@ -144,14 +214,16 @@ |
144 | 214 | extern int rcu_read_lock_bh_held(void); |
145 | 215 | |
146 | 216 | /** |
147 | - * rcu_read_lock_sched_held - might we be in RCU-sched read-side critical section? | |
217 | + * rcu_read_lock_sched_held() - might we be in RCU-sched read-side critical section? | |
148 | 218 | * |
149 | 219 | * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an |
150 | 220 | * RCU-sched read-side critical section. In absence of |
151 | 221 | * CONFIG_DEBUG_LOCK_ALLOC, this assumes we are in an RCU-sched read-side |
152 | 222 | * critical section unless it can prove otherwise. Note that disabling |
153 | 223 | * of preemption (including disabling irqs) counts as an RCU-sched |
154 | - * read-side critical section. | |
224 | + * read-side critical section. This is useful for debug checks in functions | |
225 | + * that required that they be called within an RCU-sched read-side | |
226 | + * critical section. | |
155 | 227 | * |
156 | 228 | * Check debug_lockdep_rcu_enabled() to prevent false positives during boot |
157 | 229 | * and while lockdep is disabled. |
... | ... | @@ -211,7 +283,11 @@ |
211 | 283 | |
212 | 284 | extern int rcu_my_thread_group_empty(void); |
213 | 285 | |
214 | -#define __do_rcu_dereference_check(c) \ | |
286 | +/** | |
287 | + * rcu_lockdep_assert - emit lockdep splat if specified condition not met | |
288 | + * @c: condition to check | |
289 | + */ | |
290 | +#define rcu_lockdep_assert(c) \ | |
215 | 291 | do { \ |
216 | 292 | static bool __warned; \ |
217 | 293 | if (debug_lockdep_rcu_enabled() && !__warned && !(c)) { \ |
218 | 294 | |
219 | 295 | |
220 | 296 | |
221 | 297 | |
222 | 298 | |
223 | 299 | |
224 | 300 | |
225 | 301 | |
226 | 302 | |
... | ... | @@ -220,42 +296,156 @@ |
220 | 296 | } \ |
221 | 297 | } while (0) |
222 | 298 | |
299 | +#else /* #ifdef CONFIG_PROVE_RCU */ | |
300 | + | |
301 | +#define rcu_lockdep_assert(c) do { } while (0) | |
302 | + | |
303 | +#endif /* #else #ifdef CONFIG_PROVE_RCU */ | |
304 | + | |
305 | +/* | |
306 | + * Helper functions for rcu_dereference_check(), rcu_dereference_protected() | |
307 | + * and rcu_assign_pointer(). Some of these could be folded into their | |
308 | + * callers, but they are left separate in order to ease introduction of | |
309 | + * multiple flavors of pointers to match the multiple flavors of RCU | |
310 | + * (e.g., __rcu_bh, * __rcu_sched, and __srcu), should this make sense in | |
311 | + * the future. | |
312 | + */ | |
313 | +#define __rcu_access_pointer(p, space) \ | |
314 | + ({ \ | |
315 | + typeof(*p) *_________p1 = (typeof(*p)*__force )ACCESS_ONCE(p); \ | |
316 | + (void) (((typeof (*p) space *)p) == p); \ | |
317 | + ((typeof(*p) __force __kernel *)(_________p1)); \ | |
318 | + }) | |
319 | +#define __rcu_dereference_check(p, c, space) \ | |
320 | + ({ \ | |
321 | + typeof(*p) *_________p1 = (typeof(*p)*__force )ACCESS_ONCE(p); \ | |
322 | + rcu_lockdep_assert(c); \ | |
323 | + (void) (((typeof (*p) space *)p) == p); \ | |
324 | + smp_read_barrier_depends(); \ | |
325 | + ((typeof(*p) __force __kernel *)(_________p1)); \ | |
326 | + }) | |
327 | +#define __rcu_dereference_protected(p, c, space) \ | |
328 | + ({ \ | |
329 | + rcu_lockdep_assert(c); \ | |
330 | + (void) (((typeof (*p) space *)p) == p); \ | |
331 | + ((typeof(*p) __force __kernel *)(p)); \ | |
332 | + }) | |
333 | + | |
334 | +#define __rcu_dereference_index_check(p, c) \ | |
335 | + ({ \ | |
336 | + typeof(p) _________p1 = ACCESS_ONCE(p); \ | |
337 | + rcu_lockdep_assert(c); \ | |
338 | + smp_read_barrier_depends(); \ | |
339 | + (_________p1); \ | |
340 | + }) | |
341 | +#define __rcu_assign_pointer(p, v, space) \ | |
342 | + ({ \ | |
343 | + if (!__builtin_constant_p(v) || \ | |
344 | + ((v) != NULL)) \ | |
345 | + smp_wmb(); \ | |
346 | + (p) = (typeof(*v) __force space *)(v); \ | |
347 | + }) | |
348 | + | |
349 | + | |
223 | 350 | /** |
224 | - * rcu_dereference_check - rcu_dereference with debug checking | |
351 | + * rcu_access_pointer() - fetch RCU pointer with no dereferencing | |
352 | + * @p: The pointer to read | |
353 | + * | |
354 | + * Return the value of the specified RCU-protected pointer, but omit the | |
355 | + * smp_read_barrier_depends() and keep the ACCESS_ONCE(). This is useful | |
356 | + * when the value of this pointer is accessed, but the pointer is not | |
357 | + * dereferenced, for example, when testing an RCU-protected pointer against | |
358 | + * NULL. Although rcu_access_pointer() may also be used in cases where | |
359 | + * update-side locks prevent the value of the pointer from changing, you | |
360 | + * should instead use rcu_dereference_protected() for this use case. | |
361 | + */ | |
362 | +#define rcu_access_pointer(p) __rcu_access_pointer((p), __rcu) | |
363 | + | |
364 | +/** | |
365 | + * rcu_dereference_check() - rcu_dereference with debug checking | |
225 | 366 | * @p: The pointer to read, prior to dereferencing |
226 | 367 | * @c: The conditions under which the dereference will take place |
227 | 368 | * |
228 | 369 | * Do an rcu_dereference(), but check that the conditions under which the |
229 | - * dereference will take place are correct. Typically the conditions indicate | |
230 | - * the various locking conditions that should be held at that point. The check | |
231 | - * should return true if the conditions are satisfied. | |
370 | + * dereference will take place are correct. Typically the conditions | |
371 | + * indicate the various locking conditions that should be held at that | |
372 | + * point. The check should return true if the conditions are satisfied. | |
373 | + * An implicit check for being in an RCU read-side critical section | |
374 | + * (rcu_read_lock()) is included. | |
232 | 375 | * |
233 | 376 | * For example: |
234 | 377 | * |
235 | - * bar = rcu_dereference_check(foo->bar, rcu_read_lock_held() || | |
236 | - * lockdep_is_held(&foo->lock)); | |
378 | + * bar = rcu_dereference_check(foo->bar, lockdep_is_held(&foo->lock)); | |
237 | 379 | * |
238 | 380 | * could be used to indicate to lockdep that foo->bar may only be dereferenced |
239 | - * if either the RCU read lock is held, or that the lock required to replace | |
381 | + * if either rcu_read_lock() is held, or that the lock required to replace | |
240 | 382 | * the bar struct at foo->bar is held. |
241 | 383 | * |
242 | 384 | * Note that the list of conditions may also include indications of when a lock |
243 | 385 | * need not be held, for example during initialisation or destruction of the |
244 | 386 | * target struct: |
245 | 387 | * |
246 | - * bar = rcu_dereference_check(foo->bar, rcu_read_lock_held() || | |
247 | - * lockdep_is_held(&foo->lock) || | |
388 | + * bar = rcu_dereference_check(foo->bar, lockdep_is_held(&foo->lock) || | |
248 | 389 | * atomic_read(&foo->usage) == 0); |
390 | + * | |
391 | + * Inserts memory barriers on architectures that require them | |
392 | + * (currently only the Alpha), prevents the compiler from refetching | |
393 | + * (and from merging fetches), and, more importantly, documents exactly | |
394 | + * which pointers are protected by RCU and checks that the pointer is | |
395 | + * annotated as __rcu. | |
249 | 396 | */ |
250 | 397 | #define rcu_dereference_check(p, c) \ |
251 | - ({ \ | |
252 | - __do_rcu_dereference_check(c); \ | |
253 | - rcu_dereference_raw(p); \ | |
254 | - }) | |
398 | + __rcu_dereference_check((p), rcu_read_lock_held() || (c), __rcu) | |
255 | 399 | |
256 | 400 | /** |
257 | - * rcu_dereference_protected - fetch RCU pointer when updates prevented | |
401 | + * rcu_dereference_bh_check() - rcu_dereference_bh with debug checking | |
402 | + * @p: The pointer to read, prior to dereferencing | |
403 | + * @c: The conditions under which the dereference will take place | |
258 | 404 | * |
405 | + * This is the RCU-bh counterpart to rcu_dereference_check(). | |
406 | + */ | |
407 | +#define rcu_dereference_bh_check(p, c) \ | |
408 | + __rcu_dereference_check((p), rcu_read_lock_bh_held() || (c), __rcu) | |
409 | + | |
410 | +/** | |
411 | + * rcu_dereference_sched_check() - rcu_dereference_sched with debug checking | |
412 | + * @p: The pointer to read, prior to dereferencing | |
413 | + * @c: The conditions under which the dereference will take place | |
414 | + * | |
415 | + * This is the RCU-sched counterpart to rcu_dereference_check(). | |
416 | + */ | |
417 | +#define rcu_dereference_sched_check(p, c) \ | |
418 | + __rcu_dereference_check((p), rcu_read_lock_sched_held() || (c), \ | |
419 | + __rcu) | |
420 | + | |
421 | +#define rcu_dereference_raw(p) rcu_dereference_check(p, 1) /*@@@ needed? @@@*/ | |
422 | + | |
423 | +/** | |
424 | + * rcu_dereference_index_check() - rcu_dereference for indices with debug checking | |
425 | + * @p: The pointer to read, prior to dereferencing | |
426 | + * @c: The conditions under which the dereference will take place | |
427 | + * | |
428 | + * Similar to rcu_dereference_check(), but omits the sparse checking. | |
429 | + * This allows rcu_dereference_index_check() to be used on integers, | |
430 | + * which can then be used as array indices. Attempting to use | |
431 | + * rcu_dereference_check() on an integer will give compiler warnings | |
432 | + * because the sparse address-space mechanism relies on dereferencing | |
433 | + * the RCU-protected pointer. Dereferencing integers is not something | |
434 | + * that even gcc will put up with. | |
435 | + * | |
436 | + * Note that this function does not implicitly check for RCU read-side | |
437 | + * critical sections. If this function gains lots of uses, it might | |
438 | + * make sense to provide versions for each flavor of RCU, but it does | |
439 | + * not make sense as of early 2010. | |
440 | + */ | |
441 | +#define rcu_dereference_index_check(p, c) \ | |
442 | + __rcu_dereference_index_check((p), (c)) | |
443 | + | |
444 | +/** | |
445 | + * rcu_dereference_protected() - fetch RCU pointer when updates prevented | |
446 | + * @p: The pointer to read, prior to dereferencing | |
447 | + * @c: The conditions under which the dereference will take place | |
448 | + * | |
259 | 449 | * Return the value of the specified RCU-protected pointer, but omit |
260 | 450 | * both the smp_read_barrier_depends() and the ACCESS_ONCE(). This |
261 | 451 | * is useful in cases where update-side locks prevent the value of the |
262 | 452 | |
263 | 453 | |
264 | 454 | |
265 | 455 | |
266 | 456 | |
267 | 457 | |
268 | 458 | |
269 | 459 | |
270 | 460 | |
... | ... | @@ -263,36 +453,62 @@ |
263 | 453 | * prevent the compiler from repeating this reference or combining it |
264 | 454 | * with other references, so it should not be used without protection |
265 | 455 | * of appropriate locks. |
456 | + * | |
457 | + * This function is only for update-side use. Using this function | |
458 | + * when protected only by rcu_read_lock() will result in infrequent | |
459 | + * but very ugly failures. | |
266 | 460 | */ |
267 | 461 | #define rcu_dereference_protected(p, c) \ |
268 | - ({ \ | |
269 | - __do_rcu_dereference_check(c); \ | |
270 | - (p); \ | |
271 | - }) | |
462 | + __rcu_dereference_protected((p), (c), __rcu) | |
272 | 463 | |
273 | -#else /* #ifdef CONFIG_PROVE_RCU */ | |
464 | +/** | |
465 | + * rcu_dereference_bh_protected() - fetch RCU-bh pointer when updates prevented | |
466 | + * @p: The pointer to read, prior to dereferencing | |
467 | + * @c: The conditions under which the dereference will take place | |
468 | + * | |
469 | + * This is the RCU-bh counterpart to rcu_dereference_protected(). | |
470 | + */ | |
471 | +#define rcu_dereference_bh_protected(p, c) \ | |
472 | + __rcu_dereference_protected((p), (c), __rcu) | |
274 | 473 | |
275 | -#define rcu_dereference_check(p, c) rcu_dereference_raw(p) | |
276 | -#define rcu_dereference_protected(p, c) (p) | |
474 | +/** | |
475 | + * rcu_dereference_sched_protected() - fetch RCU-sched pointer when updates prevented | |
476 | + * @p: The pointer to read, prior to dereferencing | |
477 | + * @c: The conditions under which the dereference will take place | |
478 | + * | |
479 | + * This is the RCU-sched counterpart to rcu_dereference_protected(). | |
480 | + */ | |
481 | +#define rcu_dereference_sched_protected(p, c) \ | |
482 | + __rcu_dereference_protected((p), (c), __rcu) | |
277 | 483 | |
278 | -#endif /* #else #ifdef CONFIG_PROVE_RCU */ | |
279 | 484 | |
280 | 485 | /** |
281 | - * rcu_access_pointer - fetch RCU pointer with no dereferencing | |
486 | + * rcu_dereference() - fetch RCU-protected pointer for dereferencing | |
487 | + * @p: The pointer to read, prior to dereferencing | |
282 | 488 | * |
283 | - * Return the value of the specified RCU-protected pointer, but omit the | |
284 | - * smp_read_barrier_depends() and keep the ACCESS_ONCE(). This is useful | |
285 | - * when the value of this pointer is accessed, but the pointer is not | |
286 | - * dereferenced, for example, when testing an RCU-protected pointer against | |
287 | - * NULL. This may also be used in cases where update-side locks prevent | |
288 | - * the value of the pointer from changing, but rcu_dereference_protected() | |
289 | - * is a lighter-weight primitive for this use case. | |
489 | + * This is a simple wrapper around rcu_dereference_check(). | |
290 | 490 | */ |
291 | -#define rcu_access_pointer(p) ACCESS_ONCE(p) | |
491 | +#define rcu_dereference(p) rcu_dereference_check(p, 0) | |
292 | 492 | |
293 | 493 | /** |
294 | - * rcu_read_lock - mark the beginning of an RCU read-side critical section. | |
494 | + * rcu_dereference_bh() - fetch an RCU-bh-protected pointer for dereferencing | |
495 | + * @p: The pointer to read, prior to dereferencing | |
295 | 496 | * |
497 | + * Makes rcu_dereference_check() do the dirty work. | |
498 | + */ | |
499 | +#define rcu_dereference_bh(p) rcu_dereference_bh_check(p, 0) | |
500 | + | |
501 | +/** | |
502 | + * rcu_dereference_sched() - fetch RCU-sched-protected pointer for dereferencing | |
503 | + * @p: The pointer to read, prior to dereferencing | |
504 | + * | |
505 | + * Makes rcu_dereference_check() do the dirty work. | |
506 | + */ | |
507 | +#define rcu_dereference_sched(p) rcu_dereference_sched_check(p, 0) | |
508 | + | |
509 | +/** | |
510 | + * rcu_read_lock() - mark the beginning of an RCU read-side critical section | |
511 | + * | |
296 | 512 | * When synchronize_rcu() is invoked on one CPU while other CPUs |
297 | 513 | * are within RCU read-side critical sections, then the |
298 | 514 | * synchronize_rcu() is guaranteed to block until after all the other |
... | ... | @@ -302,7 +518,7 @@ |
302 | 518 | * until after the all the other CPUs exit their critical sections. |
303 | 519 | * |
304 | 520 | * Note, however, that RCU callbacks are permitted to run concurrently |
305 | - * with RCU read-side critical sections. One way that this can happen | |
521 | + * with new RCU read-side critical sections. One way that this can happen | |
306 | 522 | * is via the following sequence of events: (1) CPU 0 enters an RCU |
307 | 523 | * read-side critical section, (2) CPU 1 invokes call_rcu() to register |
308 | 524 | * an RCU callback, (3) CPU 0 exits the RCU read-side critical section, |
... | ... | @@ -317,7 +533,20 @@ |
317 | 533 | * will be deferred until the outermost RCU read-side critical section |
318 | 534 | * completes. |
319 | 535 | * |
320 | - * It is illegal to block while in an RCU read-side critical section. | |
536 | + * You can avoid reading and understanding the next paragraph by | |
537 | + * following this rule: don't put anything in an rcu_read_lock() RCU | |
538 | + * read-side critical section that would block in a !PREEMPT kernel. | |
539 | + * But if you want the full story, read on! | |
540 | + * | |
541 | + * In non-preemptible RCU implementations (TREE_RCU and TINY_RCU), it | |
542 | + * is illegal to block while in an RCU read-side critical section. In | |
543 | + * preemptible RCU implementations (TREE_PREEMPT_RCU and TINY_PREEMPT_RCU) | |
544 | + * in CONFIG_PREEMPT kernel builds, RCU read-side critical sections may | |
545 | + * be preempted, but explicit blocking is illegal. Finally, in preemptible | |
546 | + * RCU implementations in real-time (CONFIG_PREEMPT_RT) kernel builds, | |
547 | + * RCU read-side critical sections may be preempted and they may also | |
548 | + * block, but only when acquiring spinlocks that are subject to priority | |
549 | + * inheritance. | |
321 | 550 | */ |
322 | 551 | static inline void rcu_read_lock(void) |
323 | 552 | { |
... | ... | @@ -337,7 +566,7 @@ |
337 | 566 | */ |
338 | 567 | |
339 | 568 | /** |
340 | - * rcu_read_unlock - marks the end of an RCU read-side critical section. | |
569 | + * rcu_read_unlock() - marks the end of an RCU read-side critical section. | |
341 | 570 | * |
342 | 571 | * See rcu_read_lock() for more information. |
343 | 572 | */ |
344 | 573 | |
... | ... | @@ -349,15 +578,16 @@ |
349 | 578 | } |
350 | 579 | |
351 | 580 | /** |
352 | - * rcu_read_lock_bh - mark the beginning of a softirq-only RCU critical section | |
581 | + * rcu_read_lock_bh() - mark the beginning of an RCU-bh critical section | |
353 | 582 | * |
354 | 583 | * This is equivalent of rcu_read_lock(), but to be used when updates |
355 | - * are being done using call_rcu_bh(). Since call_rcu_bh() callbacks | |
356 | - * consider completion of a softirq handler to be a quiescent state, | |
357 | - * a process in RCU read-side critical section must be protected by | |
358 | - * disabling softirqs. Read-side critical sections in interrupt context | |
359 | - * can use just rcu_read_lock(). | |
360 | - * | |
584 | + * are being done using call_rcu_bh() or synchronize_rcu_bh(). Since | |
585 | + * both call_rcu_bh() and synchronize_rcu_bh() consider completion of a | |
586 | + * softirq handler to be a quiescent state, a process in RCU read-side | |
587 | + * critical section must be protected by disabling softirqs. Read-side | |
588 | + * critical sections in interrupt context can use just rcu_read_lock(), | |
589 | + * though this should at least be commented to avoid confusing people | |
590 | + * reading the code. | |
361 | 591 | */ |
362 | 592 | static inline void rcu_read_lock_bh(void) |
363 | 593 | { |
364 | 594 | |
... | ... | @@ -379,13 +609,12 @@ |
379 | 609 | } |
380 | 610 | |
381 | 611 | /** |
382 | - * rcu_read_lock_sched - mark the beginning of a RCU-classic critical section | |
612 | + * rcu_read_lock_sched() - mark the beginning of a RCU-sched critical section | |
383 | 613 | * |
384 | - * Should be used with either | |
385 | - * - synchronize_sched() | |
386 | - * or | |
387 | - * - call_rcu_sched() and rcu_barrier_sched() | |
388 | - * on the write-side to insure proper synchronization. | |
614 | + * This is equivalent of rcu_read_lock(), but to be used when updates | |
615 | + * are being done using call_rcu_sched() or synchronize_rcu_sched(). | |
616 | + * Read-side critical sections can also be introduced by anything that | |
617 | + * disables preemption, including local_irq_disable() and friends. | |
389 | 618 | */ |
390 | 619 | static inline void rcu_read_lock_sched(void) |
391 | 620 | { |
392 | 621 | |
393 | 622 | |
394 | 623 | |
395 | 624 | |
396 | 625 | |
397 | 626 | |
... | ... | @@ -420,71 +649,34 @@ |
420 | 649 | preempt_enable_notrace(); |
421 | 650 | } |
422 | 651 | |
423 | - | |
424 | 652 | /** |
425 | - * rcu_dereference_raw - fetch an RCU-protected pointer | |
653 | + * rcu_assign_pointer() - assign to RCU-protected pointer | |
654 | + * @p: pointer to assign to | |
655 | + * @v: value to assign (publish) | |
426 | 656 | * |
427 | - * The caller must be within some flavor of RCU read-side critical | |
428 | - * section, or must be otherwise preventing the pointer from changing, | |
429 | - * for example, by holding an appropriate lock. This pointer may later | |
430 | - * be safely dereferenced. It is the caller's responsibility to have | |
431 | - * done the right thing, as this primitive does no checking of any kind. | |
657 | + * Assigns the specified value to the specified RCU-protected | |
658 | + * pointer, ensuring that any concurrent RCU readers will see | |
659 | + * any prior initialization. Returns the value assigned. | |
432 | 660 | * |
433 | 661 | * Inserts memory barriers on architectures that require them |
434 | - * (currently only the Alpha), and, more importantly, documents | |
435 | - * exactly which pointers are protected by RCU. | |
436 | - */ | |
437 | -#define rcu_dereference_raw(p) ({ \ | |
438 | - typeof(p) _________p1 = ACCESS_ONCE(p); \ | |
439 | - smp_read_barrier_depends(); \ | |
440 | - (_________p1); \ | |
441 | - }) | |
442 | - | |
443 | -/** | |
444 | - * rcu_dereference - fetch an RCU-protected pointer, checking for RCU | |
445 | - * | |
446 | - * Makes rcu_dereference_check() do the dirty work. | |
447 | - */ | |
448 | -#define rcu_dereference(p) \ | |
449 | - rcu_dereference_check(p, rcu_read_lock_held()) | |
450 | - | |
451 | -/** | |
452 | - * rcu_dereference_bh - fetch an RCU-protected pointer, checking for RCU-bh | |
453 | - * | |
454 | - * Makes rcu_dereference_check() do the dirty work. | |
455 | - */ | |
456 | -#define rcu_dereference_bh(p) \ | |
457 | - rcu_dereference_check(p, rcu_read_lock_bh_held()) | |
458 | - | |
459 | -/** | |
460 | - * rcu_dereference_sched - fetch RCU-protected pointer, checking for RCU-sched | |
461 | - * | |
462 | - * Makes rcu_dereference_check() do the dirty work. | |
463 | - */ | |
464 | -#define rcu_dereference_sched(p) \ | |
465 | - rcu_dereference_check(p, rcu_read_lock_sched_held()) | |
466 | - | |
467 | -/** | |
468 | - * rcu_assign_pointer - assign (publicize) a pointer to a newly | |
469 | - * initialized structure that will be dereferenced by RCU read-side | |
470 | - * critical sections. Returns the value assigned. | |
471 | - * | |
472 | - * Inserts memory barriers on architectures that require them | |
473 | 662 | * (pretty much all of them other than x86), and also prevents |
474 | 663 | * the compiler from reordering the code that initializes the |
475 | 664 | * structure after the pointer assignment. More importantly, this |
476 | 665 | * call documents which pointers will be dereferenced by RCU read-side |
477 | 666 | * code. |
478 | 667 | */ |
479 | - | |
480 | 668 | #define rcu_assign_pointer(p, v) \ |
481 | - ({ \ | |
482 | - if (!__builtin_constant_p(v) || \ | |
483 | - ((v) != NULL)) \ | |
484 | - smp_wmb(); \ | |
485 | - (p) = (v); \ | |
486 | - }) | |
669 | + __rcu_assign_pointer((p), (v), __rcu) | |
487 | 670 | |
671 | +/** | |
672 | + * RCU_INIT_POINTER() - initialize an RCU protected pointer | |
673 | + * | |
674 | + * Initialize an RCU-protected pointer in such a way to avoid RCU-lockdep | |
675 | + * splats. | |
676 | + */ | |
677 | +#define RCU_INIT_POINTER(p, v) \ | |
678 | + p = (typeof(*v) __force __rcu *)(v) | |
679 | + | |
488 | 680 | /* Infrastructure to implement the synchronize_() primitives. */ |
489 | 681 | |
490 | 682 | struct rcu_synchronize { |
491 | 683 | |
492 | 684 | |
493 | 685 | |
494 | 686 | |
495 | 687 | |
496 | 688 | |
497 | 689 | |
... | ... | @@ -494,26 +686,37 @@ |
494 | 686 | |
495 | 687 | extern void wakeme_after_rcu(struct rcu_head *head); |
496 | 688 | |
689 | +#ifdef CONFIG_PREEMPT_RCU | |
690 | + | |
497 | 691 | /** |
498 | - * call_rcu - Queue an RCU callback for invocation after a grace period. | |
692 | + * call_rcu() - Queue an RCU callback for invocation after a grace period. | |
499 | 693 | * @head: structure to be used for queueing the RCU updates. |
500 | - * @func: actual update function to be invoked after the grace period | |
694 | + * @func: actual callback function to be invoked after the grace period | |
501 | 695 | * |
502 | - * The update function will be invoked some time after a full grace | |
503 | - * period elapses, in other words after all currently executing RCU | |
504 | - * read-side critical sections have completed. RCU read-side critical | |
696 | + * The callback function will be invoked some time after a full grace | |
697 | + * period elapses, in other words after all pre-existing RCU read-side | |
698 | + * critical sections have completed. However, the callback function | |
699 | + * might well execute concurrently with RCU read-side critical sections | |
700 | + * that started after call_rcu() was invoked. RCU read-side critical | |
505 | 701 | * sections are delimited by rcu_read_lock() and rcu_read_unlock(), |
506 | 702 | * and may be nested. |
507 | 703 | */ |
508 | 704 | extern void call_rcu(struct rcu_head *head, |
509 | 705 | void (*func)(struct rcu_head *head)); |
510 | 706 | |
707 | +#else /* #ifdef CONFIG_PREEMPT_RCU */ | |
708 | + | |
709 | +/* In classic RCU, call_rcu() is just call_rcu_sched(). */ | |
710 | +#define call_rcu call_rcu_sched | |
711 | + | |
712 | +#endif /* #else #ifdef CONFIG_PREEMPT_RCU */ | |
713 | + | |
511 | 714 | /** |
512 | - * call_rcu_bh - Queue an RCU for invocation after a quicker grace period. | |
715 | + * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period. | |
513 | 716 | * @head: structure to be used for queueing the RCU updates. |
514 | - * @func: actual update function to be invoked after the grace period | |
717 | + * @func: actual callback function to be invoked after the grace period | |
515 | 718 | * |
516 | - * The update function will be invoked some time after a full grace | |
719 | + * The callback function will be invoked some time after a full grace | |
517 | 720 | * period elapses, in other words after all currently executing RCU |
518 | 721 | * read-side critical sections have completed. call_rcu_bh() assumes |
519 | 722 | * that the read-side critical sections end on completion of a softirq |
... | ... | @@ -565,39 +768,6 @@ |
565 | 768 | { |
566 | 769 | } |
567 | 770 | #endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ |
568 | - | |
569 | -#ifndef CONFIG_PROVE_RCU | |
570 | -#define __do_rcu_dereference_check(c) do { } while (0) | |
571 | -#endif /* #ifdef CONFIG_PROVE_RCU */ | |
572 | - | |
573 | -#define __rcu_dereference_index_check(p, c) \ | |
574 | - ({ \ | |
575 | - typeof(p) _________p1 = ACCESS_ONCE(p); \ | |
576 | - __do_rcu_dereference_check(c); \ | |
577 | - smp_read_barrier_depends(); \ | |
578 | - (_________p1); \ | |
579 | - }) | |
580 | - | |
581 | -/** | |
582 | - * rcu_dereference_index_check() - rcu_dereference for indices with debug checking | |
583 | - * @p: The pointer to read, prior to dereferencing | |
584 | - * @c: The conditions under which the dereference will take place | |
585 | - * | |
586 | - * Similar to rcu_dereference_check(), but omits the sparse checking. | |
587 | - * This allows rcu_dereference_index_check() to be used on integers, | |
588 | - * which can then be used as array indices. Attempting to use | |
589 | - * rcu_dereference_check() on an integer will give compiler warnings | |
590 | - * because the sparse address-space mechanism relies on dereferencing | |
591 | - * the RCU-protected pointer. Dereferencing integers is not something | |
592 | - * that even gcc will put up with. | |
593 | - * | |
594 | - * Note that this function does not implicitly check for RCU read-side | |
595 | - * critical sections. If this function gains lots of uses, it might | |
596 | - * make sense to provide versions for each flavor of RCU, but it does | |
597 | - * not make sense as of early 2010. | |
598 | - */ | |
599 | -#define rcu_dereference_index_check(p, c) \ | |
600 | - __rcu_dereference_index_check((p), (c)) | |
601 | 771 | |
602 | 772 | #endif /* __LINUX_RCUPDATE_H */ |
include/linux/rcutiny.h
... | ... | @@ -27,103 +27,101 @@ |
27 | 27 | |
28 | 28 | #include <linux/cache.h> |
29 | 29 | |
30 | -void rcu_sched_qs(int cpu); | |
31 | -void rcu_bh_qs(int cpu); | |
32 | -static inline void rcu_note_context_switch(int cpu) | |
30 | +#define rcu_init_sched() do { } while (0) | |
31 | + | |
32 | +#ifdef CONFIG_TINY_RCU | |
33 | + | |
34 | +static inline void synchronize_rcu_expedited(void) | |
33 | 35 | { |
34 | - rcu_sched_qs(cpu); | |
36 | + synchronize_sched(); /* Only one CPU, so pretty fast anyway!!! */ | |
35 | 37 | } |
36 | 38 | |
37 | -#define __rcu_read_lock() preempt_disable() | |
38 | -#define __rcu_read_unlock() preempt_enable() | |
39 | -#define __rcu_read_lock_bh() local_bh_disable() | |
40 | -#define __rcu_read_unlock_bh() local_bh_enable() | |
41 | -#define call_rcu_sched call_rcu | |
42 | - | |
43 | -#define rcu_init_sched() do { } while (0) | |
44 | -extern void rcu_check_callbacks(int cpu, int user); | |
45 | - | |
46 | -static inline int rcu_needs_cpu(int cpu) | |
39 | +static inline void rcu_barrier(void) | |
47 | 40 | { |
48 | - return 0; | |
41 | + rcu_barrier_sched(); /* Only one CPU, so only one list of callbacks! */ | |
49 | 42 | } |
50 | 43 | |
51 | -/* | |
52 | - * Return the number of grace periods. | |
53 | - */ | |
54 | -static inline long rcu_batches_completed(void) | |
44 | +#else /* #ifdef CONFIG_TINY_RCU */ | |
45 | + | |
46 | +void rcu_barrier(void); | |
47 | +void synchronize_rcu_expedited(void); | |
48 | + | |
49 | +#endif /* #else #ifdef CONFIG_TINY_RCU */ | |
50 | + | |
51 | +static inline void synchronize_rcu_bh(void) | |
55 | 52 | { |
56 | - return 0; | |
53 | + synchronize_sched(); | |
57 | 54 | } |
58 | 55 | |
59 | -/* | |
60 | - * Return the number of bottom-half grace periods. | |
61 | - */ | |
62 | -static inline long rcu_batches_completed_bh(void) | |
56 | +static inline void synchronize_rcu_bh_expedited(void) | |
63 | 57 | { |
64 | - return 0; | |
58 | + synchronize_sched(); | |
65 | 59 | } |
66 | 60 | |
67 | -static inline void rcu_force_quiescent_state(void) | |
61 | +#ifdef CONFIG_TINY_RCU | |
62 | + | |
63 | +static inline void rcu_preempt_note_context_switch(void) | |
68 | 64 | { |
69 | 65 | } |
70 | 66 | |
71 | -static inline void rcu_bh_force_quiescent_state(void) | |
67 | +static inline void exit_rcu(void) | |
72 | 68 | { |
73 | 69 | } |
74 | 70 | |
75 | -static inline void rcu_sched_force_quiescent_state(void) | |
71 | +static inline int rcu_needs_cpu(int cpu) | |
76 | 72 | { |
73 | + return 0; | |
77 | 74 | } |
78 | 75 | |
79 | -extern void synchronize_sched(void); | |
76 | +#else /* #ifdef CONFIG_TINY_RCU */ | |
80 | 77 | |
81 | -static inline void synchronize_rcu(void) | |
78 | +void rcu_preempt_note_context_switch(void); | |
79 | +extern void exit_rcu(void); | |
80 | +int rcu_preempt_needs_cpu(void); | |
81 | + | |
82 | +static inline int rcu_needs_cpu(int cpu) | |
82 | 83 | { |
83 | - synchronize_sched(); | |
84 | + return rcu_preempt_needs_cpu(); | |
84 | 85 | } |
85 | 86 | |
86 | -static inline void synchronize_rcu_bh(void) | |
87 | +#endif /* #else #ifdef CONFIG_TINY_RCU */ | |
88 | + | |
89 | +static inline void rcu_note_context_switch(int cpu) | |
87 | 90 | { |
88 | - synchronize_sched(); | |
91 | + rcu_sched_qs(cpu); | |
92 | + rcu_preempt_note_context_switch(); | |
89 | 93 | } |
90 | 94 | |
91 | -static inline void synchronize_rcu_expedited(void) | |
95 | +/* | |
96 | + * Return the number of grace periods. | |
97 | + */ | |
98 | +static inline long rcu_batches_completed(void) | |
92 | 99 | { |
93 | - synchronize_sched(); | |
100 | + return 0; | |
94 | 101 | } |
95 | 102 | |
96 | -static inline void synchronize_rcu_bh_expedited(void) | |
103 | +/* | |
104 | + * Return the number of bottom-half grace periods. | |
105 | + */ | |
106 | +static inline long rcu_batches_completed_bh(void) | |
97 | 107 | { |
98 | - synchronize_sched(); | |
108 | + return 0; | |
99 | 109 | } |
100 | 110 | |
101 | -struct notifier_block; | |
102 | - | |
103 | -#ifdef CONFIG_NO_HZ | |
104 | - | |
105 | -extern void rcu_enter_nohz(void); | |
106 | -extern void rcu_exit_nohz(void); | |
107 | - | |
108 | -#else /* #ifdef CONFIG_NO_HZ */ | |
109 | - | |
110 | -static inline void rcu_enter_nohz(void) | |
111 | +static inline void rcu_force_quiescent_state(void) | |
111 | 112 | { |
112 | 113 | } |
113 | 114 | |
114 | -static inline void rcu_exit_nohz(void) | |
115 | +static inline void rcu_bh_force_quiescent_state(void) | |
115 | 116 | { |
116 | 117 | } |
117 | 118 | |
118 | -#endif /* #else #ifdef CONFIG_NO_HZ */ | |
119 | - | |
120 | -static inline void exit_rcu(void) | |
119 | +static inline void rcu_sched_force_quiescent_state(void) | |
121 | 120 | { |
122 | 121 | } |
123 | 122 | |
124 | -static inline int rcu_preempt_depth(void) | |
123 | +static inline void rcu_cpu_stall_reset(void) | |
125 | 124 | { |
126 | - return 0; | |
127 | 125 | } |
128 | 126 | |
129 | 127 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
include/linux/rcutree.h
... | ... | @@ -30,64 +30,23 @@ |
30 | 30 | #ifndef __LINUX_RCUTREE_H |
31 | 31 | #define __LINUX_RCUTREE_H |
32 | 32 | |
33 | -struct notifier_block; | |
34 | - | |
35 | -extern void rcu_sched_qs(int cpu); | |
36 | -extern void rcu_bh_qs(int cpu); | |
37 | 33 | extern void rcu_note_context_switch(int cpu); |
38 | 34 | extern int rcu_needs_cpu(int cpu); |
35 | +extern void rcu_cpu_stall_reset(void); | |
39 | 36 | |
40 | 37 | #ifdef CONFIG_TREE_PREEMPT_RCU |
41 | 38 | |
42 | -extern void __rcu_read_lock(void); | |
43 | -extern void __rcu_read_unlock(void); | |
44 | -extern void synchronize_rcu(void); | |
45 | 39 | extern void exit_rcu(void); |
46 | 40 | |
47 | -/* | |
48 | - * Defined as macro as it is a very low level header | |
49 | - * included from areas that don't even know about current | |
50 | - */ | |
51 | -#define rcu_preempt_depth() (current->rcu_read_lock_nesting) | |
52 | - | |
53 | 41 | #else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ |
54 | 42 | |
55 | -static inline void __rcu_read_lock(void) | |
56 | -{ | |
57 | - preempt_disable(); | |
58 | -} | |
59 | - | |
60 | -static inline void __rcu_read_unlock(void) | |
61 | -{ | |
62 | - preempt_enable(); | |
63 | -} | |
64 | - | |
65 | -#define synchronize_rcu synchronize_sched | |
66 | - | |
67 | 43 | static inline void exit_rcu(void) |
68 | 44 | { |
69 | 45 | } |
70 | 46 | |
71 | -static inline int rcu_preempt_depth(void) | |
72 | -{ | |
73 | - return 0; | |
74 | -} | |
75 | - | |
76 | 47 | #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */ |
77 | 48 | |
78 | -static inline void __rcu_read_lock_bh(void) | |
79 | -{ | |
80 | - local_bh_disable(); | |
81 | -} | |
82 | -static inline void __rcu_read_unlock_bh(void) | |
83 | -{ | |
84 | - local_bh_enable(); | |
85 | -} | |
86 | - | |
87 | -extern void call_rcu_sched(struct rcu_head *head, | |
88 | - void (*func)(struct rcu_head *rcu)); | |
89 | 49 | extern void synchronize_rcu_bh(void); |
90 | -extern void synchronize_sched(void); | |
91 | 50 | extern void synchronize_rcu_expedited(void); |
92 | 51 | |
93 | 52 | static inline void synchronize_rcu_bh_expedited(void) |
... | ... | @@ -95,7 +54,7 @@ |
95 | 54 | synchronize_sched_expedited(); |
96 | 55 | } |
97 | 56 | |
98 | -extern void rcu_check_callbacks(int cpu, int user); | |
57 | +extern void rcu_barrier(void); | |
99 | 58 | |
100 | 59 | extern long rcu_batches_completed(void); |
101 | 60 | extern long rcu_batches_completed_bh(void); |
... | ... | @@ -103,18 +62,6 @@ |
103 | 62 | extern void rcu_force_quiescent_state(void); |
104 | 63 | extern void rcu_bh_force_quiescent_state(void); |
105 | 64 | extern void rcu_sched_force_quiescent_state(void); |
106 | - | |
107 | -#ifdef CONFIG_NO_HZ | |
108 | -void rcu_enter_nohz(void); | |
109 | -void rcu_exit_nohz(void); | |
110 | -#else /* CONFIG_NO_HZ */ | |
111 | -static inline void rcu_enter_nohz(void) | |
112 | -{ | |
113 | -} | |
114 | -static inline void rcu_exit_nohz(void) | |
115 | -{ | |
116 | -} | |
117 | -#endif /* CONFIG_NO_HZ */ | |
118 | 65 | |
119 | 66 | /* A context switch is a grace period for RCU-sched and RCU-bh. */ |
120 | 67 | static inline int rcu_blocking_is_gp(void) |
include/linux/sched.h
... | ... | @@ -1202,11 +1202,13 @@ |
1202 | 1202 | unsigned int policy; |
1203 | 1203 | cpumask_t cpus_allowed; |
1204 | 1204 | |
1205 | -#ifdef CONFIG_TREE_PREEMPT_RCU | |
1205 | +#ifdef CONFIG_PREEMPT_RCU | |
1206 | 1206 | int rcu_read_lock_nesting; |
1207 | 1207 | char rcu_read_unlock_special; |
1208 | - struct rcu_node *rcu_blocked_node; | |
1209 | 1208 | struct list_head rcu_node_entry; |
1209 | +#endif /* #ifdef CONFIG_PREEMPT_RCU */ | |
1210 | +#ifdef CONFIG_TREE_PREEMPT_RCU | |
1211 | + struct rcu_node *rcu_blocked_node; | |
1210 | 1212 | #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ |
1211 | 1213 | |
1212 | 1214 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) |
1213 | 1215 | |
... | ... | @@ -1288,9 +1290,9 @@ |
1288 | 1290 | struct list_head cpu_timers[3]; |
1289 | 1291 | |
1290 | 1292 | /* process credentials */ |
1291 | - const struct cred *real_cred; /* objective and real subjective task | |
1293 | + const struct cred __rcu *real_cred; /* objective and real subjective task | |
1292 | 1294 | * credentials (COW) */ |
1293 | - const struct cred *cred; /* effective (overridable) subjective task | |
1295 | + const struct cred __rcu *cred; /* effective (overridable) subjective task | |
1294 | 1296 | * credentials (COW) */ |
1295 | 1297 | struct mutex cred_guard_mutex; /* guard against foreign influences on |
1296 | 1298 | * credential calculations |
... | ... | @@ -1418,7 +1420,7 @@ |
1418 | 1420 | #endif |
1419 | 1421 | #ifdef CONFIG_CGROUPS |
1420 | 1422 | /* Control Group info protected by css_set_lock */ |
1421 | - struct css_set *cgroups; | |
1423 | + struct css_set __rcu *cgroups; | |
1422 | 1424 | /* cg_list protected by css_set_lock and tsk->alloc_lock */ |
1423 | 1425 | struct list_head cg_list; |
1424 | 1426 | #endif |
... | ... | @@ -1740,7 +1742,7 @@ |
1740 | 1742 | #define tsk_used_math(p) ((p)->flags & PF_USED_MATH) |
1741 | 1743 | #define used_math() tsk_used_math(current) |
1742 | 1744 | |
1743 | -#ifdef CONFIG_TREE_PREEMPT_RCU | |
1745 | +#ifdef CONFIG_PREEMPT_RCU | |
1744 | 1746 | |
1745 | 1747 | #define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */ |
1746 | 1748 | #define RCU_READ_UNLOCK_NEED_QS (1 << 1) /* RCU core needs CPU response. */ |
1747 | 1749 | |
... | ... | @@ -1749,7 +1751,9 @@ |
1749 | 1751 | { |
1750 | 1752 | p->rcu_read_lock_nesting = 0; |
1751 | 1753 | p->rcu_read_unlock_special = 0; |
1754 | +#ifdef CONFIG_TREE_PREEMPT_RCU | |
1752 | 1755 | p->rcu_blocked_node = NULL; |
1756 | +#endif | |
1753 | 1757 | INIT_LIST_HEAD(&p->rcu_node_entry); |
1754 | 1758 | } |
1755 | 1759 |
include/linux/srcu.h
... | ... | @@ -108,19 +108,43 @@ |
108 | 108 | #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ |
109 | 109 | |
110 | 110 | /** |
111 | - * srcu_dereference - fetch SRCU-protected pointer with checking | |
111 | + * srcu_dereference_check - fetch SRCU-protected pointer for later dereferencing | |
112 | + * @p: the pointer to fetch and protect for later dereferencing | |
113 | + * @sp: pointer to the srcu_struct, which is used to check that we | |
114 | + * really are in an SRCU read-side critical section. | |
115 | + * @c: condition to check for update-side use | |
112 | 116 | * |
113 | - * Makes rcu_dereference_check() do the dirty work. | |
117 | + * If PROVE_RCU is enabled, invoking this outside of an RCU read-side | |
118 | + * critical section will result in an RCU-lockdep splat, unless @c evaluates | |
119 | + * to 1. The @c argument will normally be a logical expression containing | |
120 | + * lockdep_is_held() calls. | |
114 | 121 | */ |
115 | -#define srcu_dereference(p, sp) \ | |
116 | - rcu_dereference_check(p, srcu_read_lock_held(sp)) | |
122 | +#define srcu_dereference_check(p, sp, c) \ | |
123 | + __rcu_dereference_check((p), srcu_read_lock_held(sp) || (c), __rcu) | |
117 | 124 | |
118 | 125 | /** |
126 | + * srcu_dereference - fetch SRCU-protected pointer for later dereferencing | |
127 | + * @p: the pointer to fetch and protect for later dereferencing | |
128 | + * @sp: pointer to the srcu_struct, which is used to check that we | |
129 | + * really are in an SRCU read-side critical section. | |
130 | + * | |
131 | + * Makes rcu_dereference_check() do the dirty work. If PROVE_RCU | |
132 | + * is enabled, invoking this outside of an RCU read-side critical | |
133 | + * section will result in an RCU-lockdep splat. | |
134 | + */ | |
135 | +#define srcu_dereference(p, sp) srcu_dereference_check((p), (sp), 0) | |
136 | + | |
137 | +/** | |
119 | 138 | * srcu_read_lock - register a new reader for an SRCU-protected structure. |
120 | 139 | * @sp: srcu_struct in which to register the new reader. |
121 | 140 | * |
122 | 141 | * Enter an SRCU read-side critical section. Note that SRCU read-side |
123 | - * critical sections may be nested. | |
142 | + * critical sections may be nested. However, it is illegal to | |
143 | + * call anything that waits on an SRCU grace period for the same | |
144 | + * srcu_struct, whether directly or indirectly. Please note that | |
145 | + * one way to indirectly wait on an SRCU grace period is to acquire | |
146 | + * a mutex that is held elsewhere while calling synchronize_srcu() or | |
147 | + * synchronize_srcu_expedited(). | |
124 | 148 | */ |
125 | 149 | static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp) |
126 | 150 | { |
include/linux/sunrpc/auth_gss.h
... | ... | @@ -69,7 +69,7 @@ |
69 | 69 | enum rpc_gss_proc gc_proc; |
70 | 70 | u32 gc_seq; |
71 | 71 | spinlock_t gc_seq_lock; |
72 | - struct gss_ctx *gc_gss_ctx; | |
72 | + struct gss_ctx __rcu *gc_gss_ctx; | |
73 | 73 | struct xdr_netobj gc_wire_ctx; |
74 | 74 | u32 gc_win; |
75 | 75 | unsigned long gc_expiry; |
... | ... | @@ -80,7 +80,7 @@ |
80 | 80 | struct gss_cred { |
81 | 81 | struct rpc_cred gc_base; |
82 | 82 | enum rpc_gss_svc gc_service; |
83 | - struct gss_cl_ctx *gc_ctx; | |
83 | + struct gss_cl_ctx __rcu *gc_ctx; | |
84 | 84 | struct gss_upcall_msg *gc_upcall; |
85 | 85 | unsigned long gc_upcall_timestamp; |
86 | 86 | unsigned char gc_machine_cred : 1; |
include/net/cls_cgroup.h
... | ... | @@ -45,7 +45,8 @@ |
45 | 45 | return 0; |
46 | 46 | |
47 | 47 | rcu_read_lock(); |
48 | - id = rcu_dereference(net_cls_subsys_id); | |
48 | + id = rcu_dereference_index_check(net_cls_subsys_id, | |
49 | + rcu_read_lock_held()); | |
49 | 50 | if (id >= 0) |
50 | 51 | classid = container_of(task_subsys_state(p, id), |
51 | 52 | struct cgroup_cls_state, css)->classid; |
include/net/netfilter/nf_conntrack.h
init/Kconfig
... | ... | @@ -340,6 +340,7 @@ |
340 | 340 | |
341 | 341 | config TREE_RCU |
342 | 342 | bool "Tree-based hierarchical RCU" |
343 | + depends on !PREEMPT && SMP | |
343 | 344 | help |
344 | 345 | This option selects the RCU implementation that is |
345 | 346 | designed for very large SMP system with hundreds or |
... | ... | @@ -347,7 +348,7 @@ |
347 | 348 | smaller systems. |
348 | 349 | |
349 | 350 | config TREE_PREEMPT_RCU |
350 | - bool "Preemptable tree-based hierarchical RCU" | |
351 | + bool "Preemptible tree-based hierarchical RCU" | |
351 | 352 | depends on PREEMPT |
352 | 353 | help |
353 | 354 | This option selects the RCU implementation that is |
354 | 355 | |
... | ... | @@ -365,8 +366,22 @@ |
365 | 366 | is not required. This option greatly reduces the |
366 | 367 | memory footprint of RCU. |
367 | 368 | |
369 | +config TINY_PREEMPT_RCU | |
370 | + bool "Preemptible UP-only small-memory-footprint RCU" | |
371 | + depends on !SMP && PREEMPT | |
372 | + help | |
373 | + This option selects the RCU implementation that is designed | |
374 | + for real-time UP systems. This option greatly reduces the | |
375 | + memory footprint of RCU. | |
376 | + | |
368 | 377 | endchoice |
369 | 378 | |
379 | +config PREEMPT_RCU | |
380 | + def_bool ( TREE_PREEMPT_RCU || TINY_PREEMPT_RCU ) | |
381 | + help | |
382 | + This option enables preemptible-RCU code that is common between | |
383 | + the TREE_PREEMPT_RCU and TINY_PREEMPT_RCU implementations. | |
384 | + | |
370 | 385 | config RCU_TRACE |
371 | 386 | bool "Enable tracing for RCU" |
372 | 387 | depends on TREE_RCU || TREE_PREEMPT_RCU |
... | ... | @@ -387,9 +402,12 @@ |
387 | 402 | help |
388 | 403 | This option controls the fanout of hierarchical implementations |
389 | 404 | of RCU, allowing RCU to work efficiently on machines with |
390 | - large numbers of CPUs. This value must be at least the cube | |
391 | - root of NR_CPUS, which allows NR_CPUS up to 32,768 for 32-bit | |
392 | - systems and up to 262,144 for 64-bit systems. | |
405 | + large numbers of CPUs. This value must be at least the fourth | |
406 | + root of NR_CPUS, which allows NR_CPUS to be insanely large. | |
407 | + The default value of RCU_FANOUT should be used for production | |
408 | + systems, but if you are stress-testing the RCU implementation | |
409 | + itself, small RCU_FANOUT values allow you to test large-system | |
410 | + code paths on small(er) systems. | |
393 | 411 | |
394 | 412 | Select a specific number if testing RCU itself. |
395 | 413 | Take the default if unsure. |
kernel/Makefile
... | ... | @@ -86,6 +86,7 @@ |
86 | 86 | obj-$(CONFIG_TREE_PREEMPT_RCU) += rcutree.o |
87 | 87 | obj-$(CONFIG_TREE_RCU_TRACE) += rcutree_trace.o |
88 | 88 | obj-$(CONFIG_TINY_RCU) += rcutiny.o |
89 | +obj-$(CONFIG_TINY_PREEMPT_RCU) += rcutiny.o | |
89 | 90 | obj-$(CONFIG_RELAY) += relay.o |
90 | 91 | obj-$(CONFIG_SYSCTL) += utsname_sysctl.o |
91 | 92 | obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o |
kernel/cgroup.c
... | ... | @@ -138,7 +138,7 @@ |
138 | 138 | * is called after synchronize_rcu(). But for safe use, css_is_removed() |
139 | 139 | * css_tryget() should be used for avoiding race. |
140 | 140 | */ |
141 | - struct cgroup_subsys_state *css; | |
141 | + struct cgroup_subsys_state __rcu *css; | |
142 | 142 | /* |
143 | 143 | * ID of this css. |
144 | 144 | */ |
kernel/pid.c
... | ... | @@ -401,7 +401,7 @@ |
401 | 401 | struct task_struct *result = NULL; |
402 | 402 | if (pid) { |
403 | 403 | struct hlist_node *first; |
404 | - first = rcu_dereference_check(pid->tasks[type].first, | |
404 | + first = rcu_dereference_check(hlist_first_rcu(&pid->tasks[type]), | |
405 | 405 | rcu_read_lock_held() || |
406 | 406 | lockdep_tasklist_lock_is_held()); |
407 | 407 | if (first) |
... | ... | @@ -416,6 +416,7 @@ |
416 | 416 | */ |
417 | 417 | struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns) |
418 | 418 | { |
419 | + rcu_lockdep_assert(rcu_read_lock_held()); | |
419 | 420 | return pid_task(find_pid_ns(nr, ns), PIDTYPE_PID); |
420 | 421 | } |
421 | 422 |
kernel/rcupdate.c
... | ... | @@ -73,12 +73,14 @@ |
73 | 73 | EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled); |
74 | 74 | |
75 | 75 | /** |
76 | - * rcu_read_lock_bh_held - might we be in RCU-bh read-side critical section? | |
76 | + * rcu_read_lock_bh_held() - might we be in RCU-bh read-side critical section? | |
77 | 77 | * |
78 | 78 | * Check for bottom half being disabled, which covers both the |
79 | 79 | * CONFIG_PROVE_RCU and not cases. Note that if someone uses |
80 | 80 | * rcu_read_lock_bh(), but then later enables BH, lockdep (if enabled) |
81 | - * will show the situation. | |
81 | + * will show the situation. This is useful for debug checks in functions | |
82 | + * that require that they be called within an RCU read-side critical | |
83 | + * section. | |
82 | 84 | * |
83 | 85 | * Check debug_lockdep_rcu_enabled() to prevent false positives during boot. |
84 | 86 | */ |
kernel/rcutiny.c
... | ... | @@ -59,6 +59,14 @@ |
59 | 59 | EXPORT_SYMBOL_GPL(rcu_scheduler_active); |
60 | 60 | #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ |
61 | 61 | |
62 | +/* Forward declarations for rcutiny_plugin.h. */ | |
63 | +static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp); | |
64 | +static void __call_rcu(struct rcu_head *head, | |
65 | + void (*func)(struct rcu_head *rcu), | |
66 | + struct rcu_ctrlblk *rcp); | |
67 | + | |
68 | +#include "rcutiny_plugin.h" | |
69 | + | |
62 | 70 | #ifdef CONFIG_NO_HZ |
63 | 71 | |
64 | 72 | static long rcu_dynticks_nesting = 1; |
... | ... | @@ -140,6 +148,7 @@ |
140 | 148 | rcu_sched_qs(cpu); |
141 | 149 | else if (!in_softirq()) |
142 | 150 | rcu_bh_qs(cpu); |
151 | + rcu_preempt_check_callbacks(); | |
143 | 152 | } |
144 | 153 | |
145 | 154 | /* |
... | ... | @@ -162,6 +171,7 @@ |
162 | 171 | *rcp->donetail = NULL; |
163 | 172 | if (rcp->curtail == rcp->donetail) |
164 | 173 | rcp->curtail = &rcp->rcucblist; |
174 | + rcu_preempt_remove_callbacks(rcp); | |
165 | 175 | rcp->donetail = &rcp->rcucblist; |
166 | 176 | local_irq_restore(flags); |
167 | 177 | |
... | ... | @@ -182,6 +192,7 @@ |
182 | 192 | { |
183 | 193 | __rcu_process_callbacks(&rcu_sched_ctrlblk); |
184 | 194 | __rcu_process_callbacks(&rcu_bh_ctrlblk); |
195 | + rcu_preempt_process_callbacks(); | |
185 | 196 | } |
186 | 197 | |
187 | 198 | /* |
188 | 199 | |
189 | 200 | |
... | ... | @@ -223,15 +234,15 @@ |
223 | 234 | } |
224 | 235 | |
225 | 236 | /* |
226 | - * Post an RCU callback to be invoked after the end of an RCU grace | |
237 | + * Post an RCU callback to be invoked after the end of an RCU-sched grace | |
227 | 238 | * period. But since we have but one CPU, that would be after any |
228 | 239 | * quiescent state. |
229 | 240 | */ |
230 | -void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) | |
241 | +void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) | |
231 | 242 | { |
232 | 243 | __call_rcu(head, func, &rcu_sched_ctrlblk); |
233 | 244 | } |
234 | -EXPORT_SYMBOL_GPL(call_rcu); | |
245 | +EXPORT_SYMBOL_GPL(call_rcu_sched); | |
235 | 246 | |
236 | 247 | /* |
237 | 248 | * Post an RCU bottom-half callback to be invoked after any subsequent |
... | ... | @@ -243,20 +254,6 @@ |
243 | 254 | } |
244 | 255 | EXPORT_SYMBOL_GPL(call_rcu_bh); |
245 | 256 | |
246 | -void rcu_barrier(void) | |
247 | -{ | |
248 | - struct rcu_synchronize rcu; | |
249 | - | |
250 | - init_rcu_head_on_stack(&rcu.head); | |
251 | - init_completion(&rcu.completion); | |
252 | - /* Will wake me after RCU finished. */ | |
253 | - call_rcu(&rcu.head, wakeme_after_rcu); | |
254 | - /* Wait for it. */ | |
255 | - wait_for_completion(&rcu.completion); | |
256 | - destroy_rcu_head_on_stack(&rcu.head); | |
257 | -} | |
258 | -EXPORT_SYMBOL_GPL(rcu_barrier); | |
259 | - | |
260 | 257 | void rcu_barrier_bh(void) |
261 | 258 | { |
262 | 259 | struct rcu_synchronize rcu; |
... | ... | @@ -289,6 +286,4 @@ |
289 | 286 | { |
290 | 287 | open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); |
291 | 288 | } |
292 | - | |
293 | -#include "rcutiny_plugin.h" |
kernel/rcutiny_plugin.h
1 | 1 | /* |
2 | - * Read-Copy Update mechanism for mutual exclusion (tree-based version) | |
2 | + * Read-Copy Update mechanism for mutual exclusion, the Bloatwatch edition | |
3 | 3 | * Internal non-public definitions that provide either classic |
4 | - * or preemptable semantics. | |
4 | + * or preemptible semantics. | |
5 | 5 | * |
6 | 6 | * This program is free software; you can redistribute it and/or modify |
7 | 7 | * it under the terms of the GNU General Public License as published by |
8 | 8 | |
... | ... | @@ -17,10 +17,582 @@ |
17 | 17 | * along with this program; if not, write to the Free Software |
18 | 18 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. |
19 | 19 | * |
20 | - * Copyright IBM Corporation, 2009 | |
20 | + * Copyright (c) 2010 Linaro | |
21 | 21 | * |
22 | 22 | * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com> |
23 | 23 | */ |
24 | + | |
25 | +#ifdef CONFIG_TINY_PREEMPT_RCU | |
26 | + | |
27 | +#include <linux/delay.h> | |
28 | + | |
29 | +/* Global control variables for preemptible RCU. */ | |
30 | +struct rcu_preempt_ctrlblk { | |
31 | + struct rcu_ctrlblk rcb; /* curtail: ->next ptr of last CB for GP. */ | |
32 | + struct rcu_head **nexttail; | |
33 | + /* Tasks blocked in a preemptible RCU */ | |
34 | + /* read-side critical section while an */ | |
35 | + /* preemptible-RCU grace period is in */ | |
36 | + /* progress must wait for a later grace */ | |
37 | + /* period. This pointer points to the */ | |
38 | + /* ->next pointer of the last task that */ | |
39 | + /* must wait for a later grace period, or */ | |
40 | + /* to &->rcb.rcucblist if there is no */ | |
41 | + /* such task. */ | |
42 | + struct list_head blkd_tasks; | |
43 | + /* Tasks blocked in RCU read-side critical */ | |
44 | + /* section. Tasks are placed at the head */ | |
45 | + /* of this list and age towards the tail. */ | |
46 | + struct list_head *gp_tasks; | |
47 | + /* Pointer to the first task blocking the */ | |
48 | + /* current grace period, or NULL if there */ | |
49 | + /* is not such task. */ | |
50 | + struct list_head *exp_tasks; | |
51 | + /* Pointer to first task blocking the */ | |
52 | + /* current expedited grace period, or NULL */ | |
53 | + /* if there is no such task. If there */ | |
54 | + /* is no current expedited grace period, */ | |
55 | + /* then there cannot be any such task. */ | |
56 | + u8 gpnum; /* Current grace period. */ | |
57 | + u8 gpcpu; /* Last grace period blocked by the CPU. */ | |
58 | + u8 completed; /* Last grace period completed. */ | |
59 | + /* If all three are equal, RCU is idle. */ | |
60 | +}; | |
61 | + | |
62 | +static struct rcu_preempt_ctrlblk rcu_preempt_ctrlblk = { | |
63 | + .rcb.donetail = &rcu_preempt_ctrlblk.rcb.rcucblist, | |
64 | + .rcb.curtail = &rcu_preempt_ctrlblk.rcb.rcucblist, | |
65 | + .nexttail = &rcu_preempt_ctrlblk.rcb.rcucblist, | |
66 | + .blkd_tasks = LIST_HEAD_INIT(rcu_preempt_ctrlblk.blkd_tasks), | |
67 | +}; | |
68 | + | |
69 | +static int rcu_preempted_readers_exp(void); | |
70 | +static void rcu_report_exp_done(void); | |
71 | + | |
72 | +/* | |
73 | + * Return true if the CPU has not yet responded to the current grace period. | |
74 | + */ | |
75 | +static int rcu_cpu_cur_gp(void) | |
76 | +{ | |
77 | + return rcu_preempt_ctrlblk.gpcpu != rcu_preempt_ctrlblk.gpnum; | |
78 | +} | |
79 | + | |
80 | +/* | |
81 | + * Check for a running RCU reader. Because there is only one CPU, | |
82 | + * there can be but one running RCU reader at a time. ;-) | |
83 | + */ | |
84 | +static int rcu_preempt_running_reader(void) | |
85 | +{ | |
86 | + return current->rcu_read_lock_nesting; | |
87 | +} | |
88 | + | |
89 | +/* | |
90 | + * Check for preempted RCU readers blocking any grace period. | |
91 | + * If the caller needs a reliable answer, it must disable hard irqs. | |
92 | + */ | |
93 | +static int rcu_preempt_blocked_readers_any(void) | |
94 | +{ | |
95 | + return !list_empty(&rcu_preempt_ctrlblk.blkd_tasks); | |
96 | +} | |
97 | + | |
98 | +/* | |
99 | + * Check for preempted RCU readers blocking the current grace period. | |
100 | + * If the caller needs a reliable answer, it must disable hard irqs. | |
101 | + */ | |
102 | +static int rcu_preempt_blocked_readers_cgp(void) | |
103 | +{ | |
104 | + return rcu_preempt_ctrlblk.gp_tasks != NULL; | |
105 | +} | |
106 | + | |
107 | +/* | |
108 | + * Return true if another preemptible-RCU grace period is needed. | |
109 | + */ | |
110 | +static int rcu_preempt_needs_another_gp(void) | |
111 | +{ | |
112 | + return *rcu_preempt_ctrlblk.rcb.curtail != NULL; | |
113 | +} | |
114 | + | |
115 | +/* | |
116 | + * Return true if a preemptible-RCU grace period is in progress. | |
117 | + * The caller must disable hardirqs. | |
118 | + */ | |
119 | +static int rcu_preempt_gp_in_progress(void) | |
120 | +{ | |
121 | + return rcu_preempt_ctrlblk.completed != rcu_preempt_ctrlblk.gpnum; | |
122 | +} | |
123 | + | |
124 | +/* | |
125 | + * Record a preemptible-RCU quiescent state for the specified CPU. Note | |
126 | + * that this just means that the task currently running on the CPU is | |
127 | + * in a quiescent state. There might be any number of tasks blocked | |
128 | + * while in an RCU read-side critical section. | |
129 | + * | |
130 | + * Unlike the other rcu_*_qs() functions, callers to this function | |
131 | + * must disable irqs in order to protect the assignment to | |
132 | + * ->rcu_read_unlock_special. | |
133 | + * | |
134 | + * Because this is a single-CPU implementation, the only way a grace | |
135 | + * period can end is if the CPU is in a quiescent state. The reason is | |
136 | + * that a blocked preemptible-RCU reader can exit its critical section | |
137 | + * only if the CPU is running it at the time. Therefore, when the | |
138 | + * last task blocking the current grace period exits its RCU read-side | |
139 | + * critical section, neither the CPU nor blocked tasks will be stopping | |
140 | + * the current grace period. (In contrast, SMP implementations | |
141 | + * might have CPUs running in RCU read-side critical sections that | |
142 | + * block later grace periods -- but this is not possible given only | |
143 | + * one CPU.) | |
144 | + */ | |
145 | +static void rcu_preempt_cpu_qs(void) | |
146 | +{ | |
147 | + /* Record both CPU and task as having responded to current GP. */ | |
148 | + rcu_preempt_ctrlblk.gpcpu = rcu_preempt_ctrlblk.gpnum; | |
149 | + current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; | |
150 | + | |
151 | + /* | |
152 | + * If there is no GP, or if blocked readers are still blocking GP, | |
153 | + * then there is nothing more to do. | |
154 | + */ | |
155 | + if (!rcu_preempt_gp_in_progress() || rcu_preempt_blocked_readers_cgp()) | |
156 | + return; | |
157 | + | |
158 | + /* Advance callbacks. */ | |
159 | + rcu_preempt_ctrlblk.completed = rcu_preempt_ctrlblk.gpnum; | |
160 | + rcu_preempt_ctrlblk.rcb.donetail = rcu_preempt_ctrlblk.rcb.curtail; | |
161 | + rcu_preempt_ctrlblk.rcb.curtail = rcu_preempt_ctrlblk.nexttail; | |
162 | + | |
163 | + /* If there are no blocked readers, next GP is done instantly. */ | |
164 | + if (!rcu_preempt_blocked_readers_any()) | |
165 | + rcu_preempt_ctrlblk.rcb.donetail = rcu_preempt_ctrlblk.nexttail; | |
166 | + | |
167 | + /* If there are done callbacks, make RCU_SOFTIRQ process them. */ | |
168 | + if (*rcu_preempt_ctrlblk.rcb.donetail != NULL) | |
169 | + raise_softirq(RCU_SOFTIRQ); | |
170 | +} | |
171 | + | |
172 | +/* | |
173 | + * Start a new RCU grace period if warranted. Hard irqs must be disabled. | |
174 | + */ | |
175 | +static void rcu_preempt_start_gp(void) | |
176 | +{ | |
177 | + if (!rcu_preempt_gp_in_progress() && rcu_preempt_needs_another_gp()) { | |
178 | + | |
179 | + /* Official start of GP. */ | |
180 | + rcu_preempt_ctrlblk.gpnum++; | |
181 | + | |
182 | + /* Any blocked RCU readers block new GP. */ | |
183 | + if (rcu_preempt_blocked_readers_any()) | |
184 | + rcu_preempt_ctrlblk.gp_tasks = | |
185 | + rcu_preempt_ctrlblk.blkd_tasks.next; | |
186 | + | |
187 | + /* If there is no running reader, CPU is done with GP. */ | |
188 | + if (!rcu_preempt_running_reader()) | |
189 | + rcu_preempt_cpu_qs(); | |
190 | + } | |
191 | +} | |
192 | + | |
193 | +/* | |
194 | + * We have entered the scheduler, and the current task might soon be | |
195 | + * context-switched away from. If this task is in an RCU read-side | |
196 | + * critical section, we will no longer be able to rely on the CPU to | |
197 | + * record that fact, so we enqueue the task on the blkd_tasks list. | |
198 | + * If the task started after the current grace period began, as recorded | |
199 | + * by ->gpcpu, we enqueue at the beginning of the list. Otherwise | |
200 | + * before the element referenced by ->gp_tasks (or at the tail if | |
201 | + * ->gp_tasks is NULL) and point ->gp_tasks at the newly added element. | |
202 | + * The task will dequeue itself when it exits the outermost enclosing | |
203 | + * RCU read-side critical section. Therefore, the current grace period | |
204 | + * cannot be permitted to complete until the ->gp_tasks pointer becomes | |
205 | + * NULL. | |
206 | + * | |
207 | + * Caller must disable preemption. | |
208 | + */ | |
209 | +void rcu_preempt_note_context_switch(void) | |
210 | +{ | |
211 | + struct task_struct *t = current; | |
212 | + unsigned long flags; | |
213 | + | |
214 | + local_irq_save(flags); /* must exclude scheduler_tick(). */ | |
215 | + if (rcu_preempt_running_reader() && | |
216 | + (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) { | |
217 | + | |
218 | + /* Possibly blocking in an RCU read-side critical section. */ | |
219 | + t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED; | |
220 | + | |
221 | + /* | |
222 | + * If this CPU has already checked in, then this task | |
223 | + * will hold up the next grace period rather than the | |
224 | + * current grace period. Queue the task accordingly. | |
225 | + * If the task is queued for the current grace period | |
226 | + * (i.e., this CPU has not yet passed through a quiescent | |
227 | + * state for the current grace period), then as long | |
228 | + * as that task remains queued, the current grace period | |
229 | + * cannot end. | |
230 | + */ | |
231 | + list_add(&t->rcu_node_entry, &rcu_preempt_ctrlblk.blkd_tasks); | |
232 | + if (rcu_cpu_cur_gp()) | |
233 | + rcu_preempt_ctrlblk.gp_tasks = &t->rcu_node_entry; | |
234 | + } | |
235 | + | |
236 | + /* | |
237 | + * Either we were not in an RCU read-side critical section to | |
238 | + * begin with, or we have now recorded that critical section | |
239 | + * globally. Either way, we can now note a quiescent state | |
240 | + * for this CPU. Again, if we were in an RCU read-side critical | |
241 | + * section, and if that critical section was blocking the current | |
242 | + * grace period, then the fact that the task has been enqueued | |
243 | + * means that current grace period continues to be blocked. | |
244 | + */ | |
245 | + rcu_preempt_cpu_qs(); | |
246 | + local_irq_restore(flags); | |
247 | +} | |
248 | + | |
249 | +/* | |
250 | + * Tiny-preemptible RCU implementation for rcu_read_lock(). | |
251 | + * Just increment ->rcu_read_lock_nesting, shared state will be updated | |
252 | + * if we block. | |
253 | + */ | |
254 | +void __rcu_read_lock(void) | |
255 | +{ | |
256 | + current->rcu_read_lock_nesting++; | |
257 | + barrier(); /* needed if we ever invoke rcu_read_lock in rcutiny.c */ | |
258 | +} | |
259 | +EXPORT_SYMBOL_GPL(__rcu_read_lock); | |
260 | + | |
261 | +/* | |
262 | + * Handle special cases during rcu_read_unlock(), such as needing to | |
263 | + * notify RCU core processing or task having blocked during the RCU | |
264 | + * read-side critical section. | |
265 | + */ | |
266 | +static void rcu_read_unlock_special(struct task_struct *t) | |
267 | +{ | |
268 | + int empty; | |
269 | + int empty_exp; | |
270 | + unsigned long flags; | |
271 | + struct list_head *np; | |
272 | + int special; | |
273 | + | |
274 | + /* | |
275 | + * NMI handlers cannot block and cannot safely manipulate state. | |
276 | + * They therefore cannot possibly be special, so just leave. | |
277 | + */ | |
278 | + if (in_nmi()) | |
279 | + return; | |
280 | + | |
281 | + local_irq_save(flags); | |
282 | + | |
283 | + /* | |
284 | + * If RCU core is waiting for this CPU to exit critical section, | |
285 | + * let it know that we have done so. | |
286 | + */ | |
287 | + special = t->rcu_read_unlock_special; | |
288 | + if (special & RCU_READ_UNLOCK_NEED_QS) | |
289 | + rcu_preempt_cpu_qs(); | |
290 | + | |
291 | + /* Hardware IRQ handlers cannot block. */ | |
292 | + if (in_irq()) { | |
293 | + local_irq_restore(flags); | |
294 | + return; | |
295 | + } | |
296 | + | |
297 | + /* Clean up if blocked during RCU read-side critical section. */ | |
298 | + if (special & RCU_READ_UNLOCK_BLOCKED) { | |
299 | + t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BLOCKED; | |
300 | + | |
301 | + /* | |
302 | + * Remove this task from the ->blkd_tasks list and adjust | |
303 | + * any pointers that might have been referencing it. | |
304 | + */ | |
305 | + empty = !rcu_preempt_blocked_readers_cgp(); | |
306 | + empty_exp = rcu_preempt_ctrlblk.exp_tasks == NULL; | |
307 | + np = t->rcu_node_entry.next; | |
308 | + if (np == &rcu_preempt_ctrlblk.blkd_tasks) | |
309 | + np = NULL; | |
310 | + list_del(&t->rcu_node_entry); | |
311 | + if (&t->rcu_node_entry == rcu_preempt_ctrlblk.gp_tasks) | |
312 | + rcu_preempt_ctrlblk.gp_tasks = np; | |
313 | + if (&t->rcu_node_entry == rcu_preempt_ctrlblk.exp_tasks) | |
314 | + rcu_preempt_ctrlblk.exp_tasks = np; | |
315 | + INIT_LIST_HEAD(&t->rcu_node_entry); | |
316 | + | |
317 | + /* | |
318 | + * If this was the last task on the current list, and if | |
319 | + * we aren't waiting on the CPU, report the quiescent state | |
320 | + * and start a new grace period if needed. | |
321 | + */ | |
322 | + if (!empty && !rcu_preempt_blocked_readers_cgp()) { | |
323 | + rcu_preempt_cpu_qs(); | |
324 | + rcu_preempt_start_gp(); | |
325 | + } | |
326 | + | |
327 | + /* | |
328 | + * If this was the last task on the expedited lists, | |
329 | + * then we need wake up the waiting task. | |
330 | + */ | |
331 | + if (!empty_exp && rcu_preempt_ctrlblk.exp_tasks == NULL) | |
332 | + rcu_report_exp_done(); | |
333 | + } | |
334 | + local_irq_restore(flags); | |
335 | +} | |
336 | + | |
337 | +/* | |
338 | + * Tiny-preemptible RCU implementation for rcu_read_unlock(). | |
339 | + * Decrement ->rcu_read_lock_nesting. If the result is zero (outermost | |
340 | + * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then | |
341 | + * invoke rcu_read_unlock_special() to clean up after a context switch | |
342 | + * in an RCU read-side critical section and other special cases. | |
343 | + */ | |
344 | +void __rcu_read_unlock(void) | |
345 | +{ | |
346 | + struct task_struct *t = current; | |
347 | + | |
348 | + barrier(); /* needed if we ever invoke rcu_read_unlock in rcutiny.c */ | |
349 | + --t->rcu_read_lock_nesting; | |
350 | + barrier(); /* decrement before load of ->rcu_read_unlock_special */ | |
351 | + if (t->rcu_read_lock_nesting == 0 && | |
352 | + unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) | |
353 | + rcu_read_unlock_special(t); | |
354 | +#ifdef CONFIG_PROVE_LOCKING | |
355 | + WARN_ON_ONCE(t->rcu_read_lock_nesting < 0); | |
356 | +#endif /* #ifdef CONFIG_PROVE_LOCKING */ | |
357 | +} | |
358 | +EXPORT_SYMBOL_GPL(__rcu_read_unlock); | |
359 | + | |
360 | +/* | |
361 | + * Check for a quiescent state from the current CPU. When a task blocks, | |
362 | + * the task is recorded in the rcu_preempt_ctrlblk structure, which is | |
363 | + * checked elsewhere. This is called from the scheduling-clock interrupt. | |
364 | + * | |
365 | + * Caller must disable hard irqs. | |
366 | + */ | |
367 | +static void rcu_preempt_check_callbacks(void) | |
368 | +{ | |
369 | + struct task_struct *t = current; | |
370 | + | |
371 | + if (!rcu_preempt_running_reader() && rcu_preempt_gp_in_progress()) | |
372 | + rcu_preempt_cpu_qs(); | |
373 | + if (&rcu_preempt_ctrlblk.rcb.rcucblist != | |
374 | + rcu_preempt_ctrlblk.rcb.donetail) | |
375 | + raise_softirq(RCU_SOFTIRQ); | |
376 | + if (rcu_preempt_gp_in_progress() && rcu_preempt_running_reader()) | |
377 | + t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS; | |
378 | +} | |
379 | + | |
380 | +/* | |
381 | + * TINY_PREEMPT_RCU has an extra callback-list tail pointer to | |
382 | + * update, so this is invoked from __rcu_process_callbacks() to | |
383 | + * handle that case. Of course, it is invoked for all flavors of | |
384 | + * RCU, but RCU callbacks can appear only on one of the lists, and | |
385 | + * neither ->nexttail nor ->donetail can possibly be NULL, so there | |
386 | + * is no need for an explicit check. | |
387 | + */ | |
388 | +static void rcu_preempt_remove_callbacks(struct rcu_ctrlblk *rcp) | |
389 | +{ | |
390 | + if (rcu_preempt_ctrlblk.nexttail == rcp->donetail) | |
391 | + rcu_preempt_ctrlblk.nexttail = &rcp->rcucblist; | |
392 | +} | |
393 | + | |
394 | +/* | |
395 | + * Process callbacks for preemptible RCU. | |
396 | + */ | |
397 | +static void rcu_preempt_process_callbacks(void) | |
398 | +{ | |
399 | + __rcu_process_callbacks(&rcu_preempt_ctrlblk.rcb); | |
400 | +} | |
401 | + | |
402 | +/* | |
403 | + * Queue a preemptible -RCU callback for invocation after a grace period. | |
404 | + */ | |
405 | +void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) | |
406 | +{ | |
407 | + unsigned long flags; | |
408 | + | |
409 | + debug_rcu_head_queue(head); | |
410 | + head->func = func; | |
411 | + head->next = NULL; | |
412 | + | |
413 | + local_irq_save(flags); | |
414 | + *rcu_preempt_ctrlblk.nexttail = head; | |
415 | + rcu_preempt_ctrlblk.nexttail = &head->next; | |
416 | + rcu_preempt_start_gp(); /* checks to see if GP needed. */ | |
417 | + local_irq_restore(flags); | |
418 | +} | |
419 | +EXPORT_SYMBOL_GPL(call_rcu); | |
420 | + | |
421 | +void rcu_barrier(void) | |
422 | +{ | |
423 | + struct rcu_synchronize rcu; | |
424 | + | |
425 | + init_rcu_head_on_stack(&rcu.head); | |
426 | + init_completion(&rcu.completion); | |
427 | + /* Will wake me after RCU finished. */ | |
428 | + call_rcu(&rcu.head, wakeme_after_rcu); | |
429 | + /* Wait for it. */ | |
430 | + wait_for_completion(&rcu.completion); | |
431 | + destroy_rcu_head_on_stack(&rcu.head); | |
432 | +} | |
433 | +EXPORT_SYMBOL_GPL(rcu_barrier); | |
434 | + | |
435 | +/* | |
436 | + * synchronize_rcu - wait until a grace period has elapsed. | |
437 | + * | |
438 | + * Control will return to the caller some time after a full grace | |
439 | + * period has elapsed, in other words after all currently executing RCU | |
440 | + * read-side critical sections have completed. RCU read-side critical | |
441 | + * sections are delimited by rcu_read_lock() and rcu_read_unlock(), | |
442 | + * and may be nested. | |
443 | + */ | |
444 | +void synchronize_rcu(void) | |
445 | +{ | |
446 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
447 | + if (!rcu_scheduler_active) | |
448 | + return; | |
449 | +#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ | |
450 | + | |
451 | + WARN_ON_ONCE(rcu_preempt_running_reader()); | |
452 | + if (!rcu_preempt_blocked_readers_any()) | |
453 | + return; | |
454 | + | |
455 | + /* Once we get past the fastpath checks, same code as rcu_barrier(). */ | |
456 | + rcu_barrier(); | |
457 | +} | |
458 | +EXPORT_SYMBOL_GPL(synchronize_rcu); | |
459 | + | |
460 | +static DECLARE_WAIT_QUEUE_HEAD(sync_rcu_preempt_exp_wq); | |
461 | +static unsigned long sync_rcu_preempt_exp_count; | |
462 | +static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex); | |
463 | + | |
464 | +/* | |
465 | + * Return non-zero if there are any tasks in RCU read-side critical | |
466 | + * sections blocking the current preemptible-RCU expedited grace period. | |
467 | + * If there is no preemptible-RCU expedited grace period currently in | |
468 | + * progress, returns zero unconditionally. | |
469 | + */ | |
470 | +static int rcu_preempted_readers_exp(void) | |
471 | +{ | |
472 | + return rcu_preempt_ctrlblk.exp_tasks != NULL; | |
473 | +} | |
474 | + | |
475 | +/* | |
476 | + * Report the exit from RCU read-side critical section for the last task | |
477 | + * that queued itself during or before the current expedited preemptible-RCU | |
478 | + * grace period. | |
479 | + */ | |
480 | +static void rcu_report_exp_done(void) | |
481 | +{ | |
482 | + wake_up(&sync_rcu_preempt_exp_wq); | |
483 | +} | |
484 | + | |
485 | +/* | |
486 | + * Wait for an rcu-preempt grace period, but expedite it. The basic idea | |
487 | + * is to rely in the fact that there is but one CPU, and that it is | |
488 | + * illegal for a task to invoke synchronize_rcu_expedited() while in a | |
489 | + * preemptible-RCU read-side critical section. Therefore, any such | |
490 | + * critical sections must correspond to blocked tasks, which must therefore | |
491 | + * be on the ->blkd_tasks list. So just record the current head of the | |
492 | + * list in the ->exp_tasks pointer, and wait for all tasks including and | |
493 | + * after the task pointed to by ->exp_tasks to drain. | |
494 | + */ | |
495 | +void synchronize_rcu_expedited(void) | |
496 | +{ | |
497 | + unsigned long flags; | |
498 | + struct rcu_preempt_ctrlblk *rpcp = &rcu_preempt_ctrlblk; | |
499 | + unsigned long snap; | |
500 | + | |
501 | + barrier(); /* ensure prior action seen before grace period. */ | |
502 | + | |
503 | + WARN_ON_ONCE(rcu_preempt_running_reader()); | |
504 | + | |
505 | + /* | |
506 | + * Acquire lock so that there is only one preemptible RCU grace | |
507 | + * period in flight. Of course, if someone does the expedited | |
508 | + * grace period for us while we are acquiring the lock, just leave. | |
509 | + */ | |
510 | + snap = sync_rcu_preempt_exp_count + 1; | |
511 | + mutex_lock(&sync_rcu_preempt_exp_mutex); | |
512 | + if (ULONG_CMP_LT(snap, sync_rcu_preempt_exp_count)) | |
513 | + goto unlock_mb_ret; /* Others did our work for us. */ | |
514 | + | |
515 | + local_irq_save(flags); | |
516 | + | |
517 | + /* | |
518 | + * All RCU readers have to already be on blkd_tasks because | |
519 | + * we cannot legally be executing in an RCU read-side critical | |
520 | + * section. | |
521 | + */ | |
522 | + | |
523 | + /* Snapshot current head of ->blkd_tasks list. */ | |
524 | + rpcp->exp_tasks = rpcp->blkd_tasks.next; | |
525 | + if (rpcp->exp_tasks == &rpcp->blkd_tasks) | |
526 | + rpcp->exp_tasks = NULL; | |
527 | + local_irq_restore(flags); | |
528 | + | |
529 | + /* Wait for tail of ->blkd_tasks list to drain. */ | |
530 | + if (rcu_preempted_readers_exp()) | |
531 | + wait_event(sync_rcu_preempt_exp_wq, | |
532 | + !rcu_preempted_readers_exp()); | |
533 | + | |
534 | + /* Clean up and exit. */ | |
535 | + barrier(); /* ensure expedited GP seen before counter increment. */ | |
536 | + sync_rcu_preempt_exp_count++; | |
537 | +unlock_mb_ret: | |
538 | + mutex_unlock(&sync_rcu_preempt_exp_mutex); | |
539 | + barrier(); /* ensure subsequent action seen after grace period. */ | |
540 | +} | |
541 | +EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); | |
542 | + | |
543 | +/* | |
544 | + * Does preemptible RCU need the CPU to stay out of dynticks mode? | |
545 | + */ | |
546 | +int rcu_preempt_needs_cpu(void) | |
547 | +{ | |
548 | + if (!rcu_preempt_running_reader()) | |
549 | + rcu_preempt_cpu_qs(); | |
550 | + return rcu_preempt_ctrlblk.rcb.rcucblist != NULL; | |
551 | +} | |
552 | + | |
553 | +/* | |
554 | + * Check for a task exiting while in a preemptible -RCU read-side | |
555 | + * critical section, clean up if so. No need to issue warnings, | |
556 | + * as debug_check_no_locks_held() already does this if lockdep | |
557 | + * is enabled. | |
558 | + */ | |
559 | +void exit_rcu(void) | |
560 | +{ | |
561 | + struct task_struct *t = current; | |
562 | + | |
563 | + if (t->rcu_read_lock_nesting == 0) | |
564 | + return; | |
565 | + t->rcu_read_lock_nesting = 1; | |
566 | + rcu_read_unlock(); | |
567 | +} | |
568 | + | |
569 | +#else /* #ifdef CONFIG_TINY_PREEMPT_RCU */ | |
570 | + | |
571 | +/* | |
572 | + * Because preemptible RCU does not exist, it never has any callbacks | |
573 | + * to check. | |
574 | + */ | |
575 | +static void rcu_preempt_check_callbacks(void) | |
576 | +{ | |
577 | +} | |
578 | + | |
579 | +/* | |
580 | + * Because preemptible RCU does not exist, it never has any callbacks | |
581 | + * to remove. | |
582 | + */ | |
583 | +static void rcu_preempt_remove_callbacks(struct rcu_ctrlblk *rcp) | |
584 | +{ | |
585 | +} | |
586 | + | |
587 | +/* | |
588 | + * Because preemptible RCU does not exist, it never has any callbacks | |
589 | + * to process. | |
590 | + */ | |
591 | +static void rcu_preempt_process_callbacks(void) | |
592 | +{ | |
593 | +} | |
594 | + | |
595 | +#endif /* #else #ifdef CONFIG_TINY_PREEMPT_RCU */ | |
24 | 596 | |
25 | 597 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
26 | 598 |
kernel/rcutorture.c
... | ... | @@ -303,6 +303,10 @@ |
303 | 303 | mdelay(longdelay_ms); |
304 | 304 | if (!(rcu_random(rrsp) % (nrealreaders * 2 * shortdelay_us))) |
305 | 305 | udelay(shortdelay_us); |
306 | +#ifdef CONFIG_PREEMPT | |
307 | + if (!preempt_count() && !(rcu_random(rrsp) % (nrealreaders * 20000))) | |
308 | + preempt_schedule(); /* No QS if preempt_disable() in effect */ | |
309 | +#endif | |
306 | 310 | } |
307 | 311 | |
308 | 312 | static void rcu_torture_read_unlock(int idx) __releases(RCU) |
... | ... | @@ -536,6 +540,8 @@ |
536 | 540 | delay = rcu_random(rrsp) % (nrealreaders * 2 * longdelay * uspertick); |
537 | 541 | if (!delay) |
538 | 542 | schedule_timeout_interruptible(longdelay); |
543 | + else | |
544 | + rcu_read_delay(rrsp); | |
539 | 545 | } |
540 | 546 | |
541 | 547 | static void srcu_torture_read_unlock(int idx) __releases(&srcu_ctl) |
kernel/rcutree.c
... | ... | @@ -143,6 +143,11 @@ |
143 | 143 | module_param(qhimark, int, 0); |
144 | 144 | module_param(qlowmark, int, 0); |
145 | 145 | |
146 | +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR | |
147 | +int rcu_cpu_stall_suppress __read_mostly = RCU_CPU_STALL_SUPPRESS_INIT; | |
148 | +module_param(rcu_cpu_stall_suppress, int, 0644); | |
149 | +#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ | |
150 | + | |
146 | 151 | static void force_quiescent_state(struct rcu_state *rsp, int relaxed); |
147 | 152 | static int rcu_pending(int cpu); |
148 | 153 | |
... | ... | @@ -450,7 +455,7 @@ |
450 | 455 | |
451 | 456 | #ifdef CONFIG_RCU_CPU_STALL_DETECTOR |
452 | 457 | |
453 | -int rcu_cpu_stall_panicking __read_mostly; | |
458 | +int rcu_cpu_stall_suppress __read_mostly; | |
454 | 459 | |
455 | 460 | static void record_gp_stall_check_time(struct rcu_state *rsp) |
456 | 461 | { |
... | ... | @@ -482,8 +487,11 @@ |
482 | 487 | rcu_print_task_stall(rnp); |
483 | 488 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
484 | 489 | |
485 | - /* OK, time to rat on our buddy... */ | |
486 | - | |
490 | + /* | |
491 | + * OK, time to rat on our buddy... | |
492 | + * See Documentation/RCU/stallwarn.txt for info on how to debug | |
493 | + * RCU CPU stall warnings. | |
494 | + */ | |
487 | 495 | printk(KERN_ERR "INFO: %s detected stalls on CPUs/tasks: {", |
488 | 496 | rsp->name); |
489 | 497 | rcu_for_each_leaf_node(rsp, rnp) { |
... | ... | @@ -512,6 +520,11 @@ |
512 | 520 | unsigned long flags; |
513 | 521 | struct rcu_node *rnp = rcu_get_root(rsp); |
514 | 522 | |
523 | + /* | |
524 | + * OK, time to rat on ourselves... | |
525 | + * See Documentation/RCU/stallwarn.txt for info on how to debug | |
526 | + * RCU CPU stall warnings. | |
527 | + */ | |
515 | 528 | printk(KERN_ERR "INFO: %s detected stall on CPU %d (t=%lu jiffies)\n", |
516 | 529 | rsp->name, smp_processor_id(), jiffies - rsp->gp_start); |
517 | 530 | trigger_all_cpu_backtrace(); |
... | ... | @@ -530,7 +543,7 @@ |
530 | 543 | long delta; |
531 | 544 | struct rcu_node *rnp; |
532 | 545 | |
533 | - if (rcu_cpu_stall_panicking) | |
546 | + if (rcu_cpu_stall_suppress) | |
534 | 547 | return; |
535 | 548 | delta = jiffies - rsp->jiffies_stall; |
536 | 549 | rnp = rdp->mynode; |
537 | 550 | |
... | ... | @@ -548,10 +561,26 @@ |
548 | 561 | |
549 | 562 | static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr) |
550 | 563 | { |
551 | - rcu_cpu_stall_panicking = 1; | |
564 | + rcu_cpu_stall_suppress = 1; | |
552 | 565 | return NOTIFY_DONE; |
553 | 566 | } |
554 | 567 | |
568 | +/** | |
569 | + * rcu_cpu_stall_reset - prevent further stall warnings in current grace period | |
570 | + * | |
571 | + * Set the stall-warning timeout way off into the future, thus preventing | |
572 | + * any RCU CPU stall-warning messages from appearing in the current set of | |
573 | + * RCU grace periods. | |
574 | + * | |
575 | + * The caller must disable hard irqs. | |
576 | + */ | |
577 | +void rcu_cpu_stall_reset(void) | |
578 | +{ | |
579 | + rcu_sched_state.jiffies_stall = jiffies + ULONG_MAX / 2; | |
580 | + rcu_bh_state.jiffies_stall = jiffies + ULONG_MAX / 2; | |
581 | + rcu_preempt_stall_reset(); | |
582 | +} | |
583 | + | |
555 | 584 | static struct notifier_block rcu_panic_block = { |
556 | 585 | .notifier_call = rcu_panic, |
557 | 586 | }; |
... | ... | @@ -571,6 +600,10 @@ |
571 | 600 | { |
572 | 601 | } |
573 | 602 | |
603 | +void rcu_cpu_stall_reset(void) | |
604 | +{ | |
605 | +} | |
606 | + | |
574 | 607 | static void __init check_cpu_stall_init(void) |
575 | 608 | { |
576 | 609 | } |
... | ... | @@ -712,7 +745,7 @@ |
712 | 745 | rcu_start_gp(struct rcu_state *rsp, unsigned long flags) |
713 | 746 | __releases(rcu_get_root(rsp)->lock) |
714 | 747 | { |
715 | - struct rcu_data *rdp = rsp->rda[smp_processor_id()]; | |
748 | + struct rcu_data *rdp = this_cpu_ptr(rsp->rda); | |
716 | 749 | struct rcu_node *rnp = rcu_get_root(rsp); |
717 | 750 | |
718 | 751 | if (!cpu_needs_another_gp(rsp, rdp) || rsp->fqs_active) { |
... | ... | @@ -960,7 +993,7 @@ |
960 | 993 | static void rcu_send_cbs_to_orphanage(struct rcu_state *rsp) |
961 | 994 | { |
962 | 995 | int i; |
963 | - struct rcu_data *rdp = rsp->rda[smp_processor_id()]; | |
996 | + struct rcu_data *rdp = this_cpu_ptr(rsp->rda); | |
964 | 997 | |
965 | 998 | if (rdp->nxtlist == NULL) |
966 | 999 | return; /* irqs disabled, so comparison is stable. */ |
... | ... | @@ -984,7 +1017,7 @@ |
984 | 1017 | struct rcu_data *rdp; |
985 | 1018 | |
986 | 1019 | raw_spin_lock_irqsave(&rsp->onofflock, flags); |
987 | - rdp = rsp->rda[smp_processor_id()]; | |
1020 | + rdp = this_cpu_ptr(rsp->rda); | |
988 | 1021 | if (rsp->orphan_cbs_list == NULL) { |
989 | 1022 | raw_spin_unlock_irqrestore(&rsp->onofflock, flags); |
990 | 1023 | return; |
... | ... | @@ -1007,7 +1040,7 @@ |
1007 | 1040 | unsigned long flags; |
1008 | 1041 | unsigned long mask; |
1009 | 1042 | int need_report = 0; |
1010 | - struct rcu_data *rdp = rsp->rda[cpu]; | |
1043 | + struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); | |
1011 | 1044 | struct rcu_node *rnp; |
1012 | 1045 | |
1013 | 1046 | /* Exclude any attempts to start a new grace period. */ |
... | ... | @@ -1226,7 +1259,8 @@ |
1226 | 1259 | cpu = rnp->grplo; |
1227 | 1260 | bit = 1; |
1228 | 1261 | for (; cpu <= rnp->grphi; cpu++, bit <<= 1) { |
1229 | - if ((rnp->qsmask & bit) != 0 && f(rsp->rda[cpu])) | |
1262 | + if ((rnp->qsmask & bit) != 0 && | |
1263 | + f(per_cpu_ptr(rsp->rda, cpu))) | |
1230 | 1264 | mask |= bit; |
1231 | 1265 | } |
1232 | 1266 | if (mask != 0) { |
... | ... | @@ -1402,7 +1436,7 @@ |
1402 | 1436 | * a quiescent state betweentimes. |
1403 | 1437 | */ |
1404 | 1438 | local_irq_save(flags); |
1405 | - rdp = rsp->rda[smp_processor_id()]; | |
1439 | + rdp = this_cpu_ptr(rsp->rda); | |
1406 | 1440 | rcu_process_gp_end(rsp, rdp); |
1407 | 1441 | check_for_new_grace_period(rsp, rdp); |
1408 | 1442 | |
... | ... | @@ -1701,7 +1735,7 @@ |
1701 | 1735 | { |
1702 | 1736 | unsigned long flags; |
1703 | 1737 | int i; |
1704 | - struct rcu_data *rdp = rsp->rda[cpu]; | |
1738 | + struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); | |
1705 | 1739 | struct rcu_node *rnp = rcu_get_root(rsp); |
1706 | 1740 | |
1707 | 1741 | /* Set up local state, ensuring consistent view of global state. */ |
... | ... | @@ -1729,7 +1763,7 @@ |
1729 | 1763 | { |
1730 | 1764 | unsigned long flags; |
1731 | 1765 | unsigned long mask; |
1732 | - struct rcu_data *rdp = rsp->rda[cpu]; | |
1766 | + struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); | |
1733 | 1767 | struct rcu_node *rnp = rcu_get_root(rsp); |
1734 | 1768 | |
1735 | 1769 | /* Set up local state, ensuring consistent view of global state. */ |
... | ... | @@ -1865,7 +1899,8 @@ |
1865 | 1899 | /* |
1866 | 1900 | * Helper function for rcu_init() that initializes one rcu_state structure. |
1867 | 1901 | */ |
1868 | -static void __init rcu_init_one(struct rcu_state *rsp) | |
1902 | +static void __init rcu_init_one(struct rcu_state *rsp, | |
1903 | + struct rcu_data __percpu *rda) | |
1869 | 1904 | { |
1870 | 1905 | static char *buf[] = { "rcu_node_level_0", |
1871 | 1906 | "rcu_node_level_1", |
1872 | 1907 | |
1873 | 1908 | |
1874 | 1909 | |
... | ... | @@ -1918,37 +1953,23 @@ |
1918 | 1953 | } |
1919 | 1954 | } |
1920 | 1955 | |
1956 | + rsp->rda = rda; | |
1921 | 1957 | rnp = rsp->level[NUM_RCU_LVLS - 1]; |
1922 | 1958 | for_each_possible_cpu(i) { |
1923 | 1959 | while (i > rnp->grphi) |
1924 | 1960 | rnp++; |
1925 | - rsp->rda[i]->mynode = rnp; | |
1961 | + per_cpu_ptr(rsp->rda, i)->mynode = rnp; | |
1926 | 1962 | rcu_boot_init_percpu_data(i, rsp); |
1927 | 1963 | } |
1928 | 1964 | } |
1929 | 1965 | |
1930 | -/* | |
1931 | - * Helper macro for __rcu_init() and __rcu_init_preempt(). To be used | |
1932 | - * nowhere else! Assigns leaf node pointers into each CPU's rcu_data | |
1933 | - * structure. | |
1934 | - */ | |
1935 | -#define RCU_INIT_FLAVOR(rsp, rcu_data) \ | |
1936 | -do { \ | |
1937 | - int i; \ | |
1938 | - \ | |
1939 | - for_each_possible_cpu(i) { \ | |
1940 | - (rsp)->rda[i] = &per_cpu(rcu_data, i); \ | |
1941 | - } \ | |
1942 | - rcu_init_one(rsp); \ | |
1943 | -} while (0) | |
1944 | - | |
1945 | 1966 | void __init rcu_init(void) |
1946 | 1967 | { |
1947 | 1968 | int cpu; |
1948 | 1969 | |
1949 | 1970 | rcu_bootup_announce(); |
1950 | - RCU_INIT_FLAVOR(&rcu_sched_state, rcu_sched_data); | |
1951 | - RCU_INIT_FLAVOR(&rcu_bh_state, rcu_bh_data); | |
1971 | + rcu_init_one(&rcu_sched_state, &rcu_sched_data); | |
1972 | + rcu_init_one(&rcu_bh_state, &rcu_bh_data); | |
1952 | 1973 | __rcu_init_preempt(); |
1953 | 1974 | open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); |
1954 | 1975 |
kernel/rcutree.h
... | ... | @@ -254,20 +254,24 @@ |
254 | 254 | #define RCU_STALL_DELAY_DELTA 0 |
255 | 255 | #endif |
256 | 256 | |
257 | -#define RCU_SECONDS_TILL_STALL_CHECK (10 * HZ + RCU_STALL_DELAY_DELTA) | |
257 | +#define RCU_SECONDS_TILL_STALL_CHECK (CONFIG_RCU_CPU_STALL_TIMEOUT * HZ + \ | |
258 | + RCU_STALL_DELAY_DELTA) | |
258 | 259 | /* for rsp->jiffies_stall */ |
259 | -#define RCU_SECONDS_TILL_STALL_RECHECK (30 * HZ + RCU_STALL_DELAY_DELTA) | |
260 | +#define RCU_SECONDS_TILL_STALL_RECHECK (3 * RCU_SECONDS_TILL_STALL_CHECK + 30) | |
260 | 261 | /* for rsp->jiffies_stall */ |
261 | 262 | #define RCU_STALL_RAT_DELAY 2 /* Allow other CPUs time */ |
262 | 263 | /* to take at least one */ |
263 | 264 | /* scheduling clock irq */ |
264 | 265 | /* before ratting on them. */ |
265 | 266 | |
267 | +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR_RUNNABLE | |
268 | +#define RCU_CPU_STALL_SUPPRESS_INIT 0 | |
269 | +#else | |
270 | +#define RCU_CPU_STALL_SUPPRESS_INIT 1 | |
271 | +#endif | |
272 | + | |
266 | 273 | #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ |
267 | 274 | |
268 | -#define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) | |
269 | -#define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b)) | |
270 | - | |
271 | 275 | /* |
272 | 276 | * RCU global state, including node hierarchy. This hierarchy is |
273 | 277 | * represented in "heap" form in a dense array. The root (first level) |
... | ... | @@ -283,7 +287,7 @@ |
283 | 287 | struct rcu_node *level[NUM_RCU_LVLS]; /* Hierarchy levels. */ |
284 | 288 | u32 levelcnt[MAX_RCU_LVLS + 1]; /* # nodes in each level. */ |
285 | 289 | u8 levelspread[NUM_RCU_LVLS]; /* kids/node in each level. */ |
286 | - struct rcu_data *rda[NR_CPUS]; /* array of rdp pointers. */ | |
290 | + struct rcu_data __percpu *rda; /* pointer of percu rcu_data. */ | |
287 | 291 | |
288 | 292 | /* The following fields are guarded by the root rcu_node's lock. */ |
289 | 293 | |
... | ... | @@ -365,6 +369,7 @@ |
365 | 369 | #ifdef CONFIG_RCU_CPU_STALL_DETECTOR |
366 | 370 | static void rcu_print_detail_task_stall(struct rcu_state *rsp); |
367 | 371 | static void rcu_print_task_stall(struct rcu_node *rnp); |
372 | +static void rcu_preempt_stall_reset(void); | |
368 | 373 | #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ |
369 | 374 | static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp); |
370 | 375 | #ifdef CONFIG_HOTPLUG_CPU |
kernel/rcutree_plugin.h
... | ... | @@ -154,7 +154,7 @@ |
154 | 154 | (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) { |
155 | 155 | |
156 | 156 | /* Possibly blocking in an RCU read-side critical section. */ |
157 | - rdp = rcu_preempt_state.rda[cpu]; | |
157 | + rdp = per_cpu_ptr(rcu_preempt_state.rda, cpu); | |
158 | 158 | rnp = rdp->mynode; |
159 | 159 | raw_spin_lock_irqsave(&rnp->lock, flags); |
160 | 160 | t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED; |
... | ... | @@ -201,7 +201,7 @@ |
201 | 201 | */ |
202 | 202 | void __rcu_read_lock(void) |
203 | 203 | { |
204 | - ACCESS_ONCE(current->rcu_read_lock_nesting)++; | |
204 | + current->rcu_read_lock_nesting++; | |
205 | 205 | barrier(); /* needed if we ever invoke rcu_read_lock in rcutree.c */ |
206 | 206 | } |
207 | 207 | EXPORT_SYMBOL_GPL(__rcu_read_lock); |
... | ... | @@ -344,7 +344,9 @@ |
344 | 344 | struct task_struct *t = current; |
345 | 345 | |
346 | 346 | barrier(); /* needed if we ever invoke rcu_read_unlock in rcutree.c */ |
347 | - if (--ACCESS_ONCE(t->rcu_read_lock_nesting) == 0 && | |
347 | + --t->rcu_read_lock_nesting; | |
348 | + barrier(); /* decrement before load of ->rcu_read_unlock_special */ | |
349 | + if (t->rcu_read_lock_nesting == 0 && | |
348 | 350 | unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) |
349 | 351 | rcu_read_unlock_special(t); |
350 | 352 | #ifdef CONFIG_PROVE_LOCKING |
... | ... | @@ -417,6 +419,16 @@ |
417 | 419 | } |
418 | 420 | } |
419 | 421 | |
422 | +/* | |
423 | + * Suppress preemptible RCU's CPU stall warnings by pushing the | |
424 | + * time of the next stall-warning message comfortably far into the | |
425 | + * future. | |
426 | + */ | |
427 | +static void rcu_preempt_stall_reset(void) | |
428 | +{ | |
429 | + rcu_preempt_state.jiffies_stall = jiffies + ULONG_MAX / 2; | |
430 | +} | |
431 | + | |
420 | 432 | #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ |
421 | 433 | |
422 | 434 | /* |
... | ... | @@ -546,9 +558,11 @@ |
546 | 558 | * |
547 | 559 | * Control will return to the caller some time after a full grace |
548 | 560 | * period has elapsed, in other words after all currently executing RCU |
549 | - * read-side critical sections have completed. RCU read-side critical | |
550 | - * sections are delimited by rcu_read_lock() and rcu_read_unlock(), | |
551 | - * and may be nested. | |
561 | + * read-side critical sections have completed. Note, however, that | |
562 | + * upon return from synchronize_rcu(), the caller might well be executing | |
563 | + * concurrently with new RCU read-side critical sections that began while | |
564 | + * synchronize_rcu() was waiting. RCU read-side critical sections are | |
565 | + * delimited by rcu_read_lock() and rcu_read_unlock(), and may be nested. | |
552 | 566 | */ |
553 | 567 | void synchronize_rcu(void) |
554 | 568 | { |
... | ... | @@ -771,7 +785,7 @@ |
771 | 785 | */ |
772 | 786 | static void __init __rcu_init_preempt(void) |
773 | 787 | { |
774 | - RCU_INIT_FLAVOR(&rcu_preempt_state, rcu_preempt_data); | |
788 | + rcu_init_one(&rcu_preempt_state, &rcu_preempt_data); | |
775 | 789 | } |
776 | 790 | |
777 | 791 | /* |
... | ... | @@ -865,6 +879,14 @@ |
865 | 879 | { |
866 | 880 | } |
867 | 881 | |
882 | +/* | |
883 | + * Because preemptible RCU does not exist, there is no need to suppress | |
884 | + * its CPU stall warnings. | |
885 | + */ | |
886 | +static void rcu_preempt_stall_reset(void) | |
887 | +{ | |
888 | +} | |
889 | + | |
868 | 890 | #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ |
869 | 891 | |
870 | 892 | /* |
... | ... | @@ -917,15 +939,6 @@ |
917 | 939 | static void rcu_preempt_process_callbacks(void) |
918 | 940 | { |
919 | 941 | } |
920 | - | |
921 | -/* | |
922 | - * In classic RCU, call_rcu() is just call_rcu_sched(). | |
923 | - */ | |
924 | -void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) | |
925 | -{ | |
926 | - call_rcu_sched(head, func); | |
927 | -} | |
928 | -EXPORT_SYMBOL_GPL(call_rcu); | |
929 | 942 | |
930 | 943 | /* |
931 | 944 | * Wait for an rcu-preempt grace period, but make it happen quickly. |
kernel/rcutree_trace.c
lib/Kconfig.debug
... | ... | @@ -539,6 +539,19 @@ |
539 | 539 | disabling, allowing multiple RCU-lockdep warnings to be printed |
540 | 540 | on a single reboot. |
541 | 541 | |
542 | +config SPARSE_RCU_POINTER | |
543 | + bool "RCU debugging: sparse-based checks for pointer usage" | |
544 | + default n | |
545 | + help | |
546 | + This feature enables the __rcu sparse annotation for | |
547 | + RCU-protected pointers. This annotation will cause sparse | |
548 | + to flag any non-RCU used of annotated pointers. This can be | |
549 | + helpful when debugging RCU usage. Please note that this feature | |
550 | + is not intended to enforce code cleanliness; it is instead merely | |
551 | + a debugging aid. | |
552 | + | |
553 | + Say Y to make sparse flag questionable use of RCU-protected pointers | |
554 | + | |
542 | 555 | Say N if you are unsure. |
543 | 556 | |
544 | 557 | config LOCKDEP |
... | ... | @@ -831,6 +844,30 @@ |
831 | 844 | Say N if you want to disable such checks. |
832 | 845 | |
833 | 846 | Say Y if you are unsure. |
847 | + | |
848 | +config RCU_CPU_STALL_TIMEOUT | |
849 | + int "RCU CPU stall timeout in seconds" | |
850 | + depends on RCU_CPU_STALL_DETECTOR | |
851 | + range 3 300 | |
852 | + default 60 | |
853 | + help | |
854 | + If a given RCU grace period extends more than the specified | |
855 | + number of seconds, a CPU stall warning is printed. If the | |
856 | + RCU grace period persists, additional CPU stall warnings are | |
857 | + printed at more widely spaced intervals. | |
858 | + | |
859 | +config RCU_CPU_STALL_DETECTOR_RUNNABLE | |
860 | + bool "RCU CPU stall checking starts automatically at boot" | |
861 | + depends on RCU_CPU_STALL_DETECTOR | |
862 | + default y | |
863 | + help | |
864 | + If set, start checking for RCU CPU stalls immediately on | |
865 | + boot. Otherwise, RCU CPU stall checking must be manually | |
866 | + enabled. | |
867 | + | |
868 | + Say Y if you are unsure. | |
869 | + | |
870 | + Say N if you wish to suppress RCU CPU stall checking during boot. | |
834 | 871 | |
835 | 872 | config RCU_CPU_STALL_VERBOSE |
836 | 873 | bool "Print additional per-task information for RCU_CPU_STALL_DETECTOR" |
lib/radix-tree.c
... | ... | @@ -49,7 +49,7 @@ |
49 | 49 | unsigned int height; /* Height from the bottom */ |
50 | 50 | unsigned int count; |
51 | 51 | struct rcu_head rcu_head; |
52 | - void *slots[RADIX_TREE_MAP_SIZE]; | |
52 | + void __rcu *slots[RADIX_TREE_MAP_SIZE]; | |
53 | 53 | unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS]; |
54 | 54 | }; |
55 | 55 |
net/ipv4/netfilter/nf_nat_core.c
... | ... | @@ -38,7 +38,7 @@ |
38 | 38 | static struct nf_conntrack_l3proto *l3proto __read_mostly; |
39 | 39 | |
40 | 40 | #define MAX_IP_NAT_PROTO 256 |
41 | -static const struct nf_nat_protocol *nf_nat_protos[MAX_IP_NAT_PROTO] | |
41 | +static const struct nf_nat_protocol __rcu *nf_nat_protos[MAX_IP_NAT_PROTO] | |
42 | 42 | __read_mostly; |
43 | 43 | |
44 | 44 | static inline const struct nf_nat_protocol * |
net/netfilter/core.c
... | ... | @@ -27,7 +27,7 @@ |
27 | 27 | |
28 | 28 | static DEFINE_MUTEX(afinfo_mutex); |
29 | 29 | |
30 | -const struct nf_afinfo *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly; | |
30 | +const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly; | |
31 | 31 | EXPORT_SYMBOL(nf_afinfo); |
32 | 32 | |
33 | 33 | int nf_register_afinfo(const struct nf_afinfo *afinfo) |
net/netfilter/nf_conntrack_ecache.c
... | ... | @@ -26,10 +26,10 @@ |
26 | 26 | |
27 | 27 | static DEFINE_MUTEX(nf_ct_ecache_mutex); |
28 | 28 | |
29 | -struct nf_ct_event_notifier *nf_conntrack_event_cb __read_mostly; | |
29 | +struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb __read_mostly; | |
30 | 30 | EXPORT_SYMBOL_GPL(nf_conntrack_event_cb); |
31 | 31 | |
32 | -struct nf_exp_event_notifier *nf_expect_event_cb __read_mostly; | |
32 | +struct nf_exp_event_notifier __rcu *nf_expect_event_cb __read_mostly; | |
33 | 33 | EXPORT_SYMBOL_GPL(nf_expect_event_cb); |
34 | 34 | |
35 | 35 | /* deliver cached events and clear cache entry - must be called with locally |
net/netfilter/nf_conntrack_extend.c
... | ... | @@ -16,7 +16,7 @@ |
16 | 16 | #include <linux/skbuff.h> |
17 | 17 | #include <net/netfilter/nf_conntrack_extend.h> |
18 | 18 | |
19 | -static struct nf_ct_ext_type *nf_ct_ext_types[NF_CT_EXT_NUM]; | |
19 | +static struct nf_ct_ext_type __rcu *nf_ct_ext_types[NF_CT_EXT_NUM]; | |
20 | 20 | static DEFINE_MUTEX(nf_ct_ext_type_mutex); |
21 | 21 | |
22 | 22 | void __nf_ct_ext_destroy(struct nf_conn *ct) |
net/netfilter/nf_conntrack_proto.c
... | ... | @@ -28,8 +28,8 @@ |
28 | 28 | #include <net/netfilter/nf_conntrack_l4proto.h> |
29 | 29 | #include <net/netfilter/nf_conntrack_core.h> |
30 | 30 | |
31 | -static struct nf_conntrack_l4proto **nf_ct_protos[PF_MAX] __read_mostly; | |
32 | -struct nf_conntrack_l3proto *nf_ct_l3protos[AF_MAX] __read_mostly; | |
31 | +static struct nf_conntrack_l4proto __rcu **nf_ct_protos[PF_MAX] __read_mostly; | |
32 | +struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[AF_MAX] __read_mostly; | |
33 | 33 | EXPORT_SYMBOL_GPL(nf_ct_l3protos); |
34 | 34 | |
35 | 35 | static DEFINE_MUTEX(nf_ct_proto_mutex); |
net/netfilter/nf_log.c
... | ... | @@ -16,7 +16,7 @@ |
16 | 16 | #define NF_LOG_PREFIXLEN 128 |
17 | 17 | #define NFLOGGER_NAME_LEN 64 |
18 | 18 | |
19 | -static const struct nf_logger *nf_loggers[NFPROTO_NUMPROTO] __read_mostly; | |
19 | +static const struct nf_logger __rcu *nf_loggers[NFPROTO_NUMPROTO] __read_mostly; | |
20 | 20 | static struct list_head nf_loggers_l[NFPROTO_NUMPROTO] __read_mostly; |
21 | 21 | static DEFINE_MUTEX(nf_log_mutex); |
22 | 22 |
net/netfilter/nf_queue.c
... | ... | @@ -18,7 +18,7 @@ |
18 | 18 | * long term mutex. The handler must provide an an outfn() to accept packets |
19 | 19 | * for queueing and must reinject all packets it receives, no matter what. |
20 | 20 | */ |
21 | -static const struct nf_queue_handler *queue_handler[NFPROTO_NUMPROTO] __read_mostly; | |
21 | +static const struct nf_queue_handler __rcu *queue_handler[NFPROTO_NUMPROTO] __read_mostly; | |
22 | 22 | |
23 | 23 | static DEFINE_MUTEX(queue_handler_mutex); |
24 | 24 |