Commit a6b9b4d50f492630443b38404d1f436b3b748c14

Authored by Ingo Molnar

Merge branch 'rcu/next' of git://git.kernel.org/pub/scm/linux/kernel/git/paulmck…

…/linux-2.6-rcu into core/rcu

Showing 54 changed files Side-by-side Diff

Documentation/DocBook/kernel-locking.tmpl
... ... @@ -1645,7 +1645,9 @@
1645 1645 all the readers who were traversing the list when we deleted the
1646 1646 element are finished. We use <function>call_rcu()</function> to
1647 1647 register a callback which will actually destroy the object once
1648   - the readers are finished.
  1648 + all pre-existing readers are finished. Alternatively,
  1649 + <function>synchronize_rcu()</function> may be used to block until
  1650 + all pre-existing are finished.
1649 1651 </para>
1650 1652 <para>
1651 1653 But how does Read Copy Update know when the readers are
... ... @@ -1714,7 +1716,7 @@
1714 1716 - object_put(obj);
1715 1717 + list_del_rcu(&amp;obj-&gt;list);
1716 1718 cache_num--;
1717   -+ call_rcu(&amp;obj-&gt;rcu, cache_delete_rcu, obj);
  1719 ++ call_rcu(&amp;obj-&gt;rcu, cache_delete_rcu);
1718 1720 }
1719 1721  
1720 1722 /* Must be holding cache_lock */
... ... @@ -1725,14 +1727,6 @@
1725 1727 if (++cache_num > MAX_CACHE_SIZE) {
1726 1728 struct object *i, *outcast = NULL;
1727 1729 list_for_each_entry(i, &amp;cache, list) {
1728   -@@ -85,6 +94,7 @@
1729   - obj-&gt;popularity = 0;
1730   - atomic_set(&amp;obj-&gt;refcnt, 1); /* The cache holds a reference */
1731   - spin_lock_init(&amp;obj-&gt;lock);
1732   -+ INIT_RCU_HEAD(&amp;obj-&gt;rcu);
1733   -
1734   - spin_lock_irqsave(&amp;cache_lock, flags);
1735   - __cache_add(obj);
1736 1730 @@ -104,12 +114,11 @@
1737 1731 struct object *cache_find(int id)
1738 1732 {
Documentation/RCU/checklist.txt
... ... @@ -218,13 +218,22 @@
218 218 include:
219 219  
220 220 a. Keeping a count of the number of data-structure elements
221   - used by the RCU-protected data structure, including those
222   - waiting for a grace period to elapse. Enforce a limit
223   - on this number, stalling updates as needed to allow
224   - previously deferred frees to complete.
  221 + used by the RCU-protected data structure, including
  222 + those waiting for a grace period to elapse. Enforce a
  223 + limit on this number, stalling updates as needed to allow
  224 + previously deferred frees to complete. Alternatively,
  225 + limit only the number awaiting deferred free rather than
  226 + the total number of elements.
225 227  
226   - Alternatively, limit only the number awaiting deferred
227   - free rather than the total number of elements.
  228 + One way to stall the updates is to acquire the update-side
  229 + mutex. (Don't try this with a spinlock -- other CPUs
  230 + spinning on the lock could prevent the grace period
  231 + from ever ending.) Another way to stall the updates
  232 + is for the updates to use a wrapper function around
  233 + the memory allocator, so that this wrapper function
  234 + simulates OOM when there is too much memory awaiting an
  235 + RCU grace period. There are of course many other
  236 + variations on this theme.
228 237  
229 238 b. Limiting update rate. For example, if updates occur only
230 239 once per hour, then no explicit rate limiting is required,
... ... @@ -365,4 +374,27 @@
365 374 and the compiler to freely reorder code into and out of RCU
366 375 read-side critical sections. It is the responsibility of the
367 376 RCU update-side primitives to deal with this.
  377 +
  378 +17. Use CONFIG_PROVE_RCU, CONFIG_DEBUG_OBJECTS_RCU_HEAD, and
  379 + the __rcu sparse checks to validate your RCU code. These
  380 + can help find problems as follows:
  381 +
  382 + CONFIG_PROVE_RCU: check that accesses to RCU-protected data
  383 + structures are carried out under the proper RCU
  384 + read-side critical section, while holding the right
  385 + combination of locks, or whatever other conditions
  386 + are appropriate.
  387 +
  388 + CONFIG_DEBUG_OBJECTS_RCU_HEAD: check that you don't pass the
  389 + same object to call_rcu() (or friends) before an RCU
  390 + grace period has elapsed since the last time that you
  391 + passed that same object to call_rcu() (or friends).
  392 +
  393 + __rcu sparse checks: tag the pointer to the RCU-protected data
  394 + structure with __rcu, and sparse will warn you if you
  395 + access that pointer without the services of one of the
  396 + variants of rcu_dereference().
  397 +
  398 + These debugging aids can help you find problems that are
  399 + otherwise extremely difficult to spot.
drivers/input/evdev.c
... ... @@ -28,7 +28,7 @@
28 28 int minor;
29 29 struct input_handle handle;
30 30 wait_queue_head_t wait;
31   - struct evdev_client *grab;
  31 + struct evdev_client __rcu *grab;
32 32 struct list_head client_list;
33 33 spinlock_t client_lock; /* protects client_list */
34 34 struct mutex mutex;
... ... @@ -127,7 +127,10 @@
127 127 size_t len, total_len = 0;
128 128 int err, wmem;
129 129 size_t hdr_size;
130   - struct socket *sock = rcu_dereference(vq->private_data);
  130 + struct socket *sock;
  131 +
  132 + sock = rcu_dereference_check(vq->private_data,
  133 + lockdep_is_held(&vq->mutex));
131 134 if (!sock)
132 135 return;
133 136  
... ... @@ -582,7 +585,10 @@
582 585 static void vhost_net_enable_vq(struct vhost_net *n,
583 586 struct vhost_virtqueue *vq)
584 587 {
585   - struct socket *sock = vq->private_data;
  588 + struct socket *sock;
  589 +
  590 + sock = rcu_dereference_protected(vq->private_data,
  591 + lockdep_is_held(&vq->mutex));
586 592 if (!sock)
587 593 return;
588 594 if (vq == n->vqs + VHOST_NET_VQ_TX) {
... ... @@ -598,7 +604,8 @@
598 604 struct socket *sock;
599 605  
600 606 mutex_lock(&vq->mutex);
601   - sock = vq->private_data;
  607 + sock = rcu_dereference_protected(vq->private_data,
  608 + lockdep_is_held(&vq->mutex));
602 609 vhost_net_disable_vq(n, vq);
603 610 rcu_assign_pointer(vq->private_data, NULL);
604 611 mutex_unlock(&vq->mutex);
... ... @@ -736,7 +743,8 @@
736 743 }
737 744  
738 745 /* start polling new socket */
739   - oldsock = vq->private_data;
  746 + oldsock = rcu_dereference_protected(vq->private_data,
  747 + lockdep_is_held(&vq->mutex));
740 748 if (sock != oldsock) {
741 749 vhost_net_disable_vq(n, vq);
742 750 rcu_assign_pointer(vq->private_data, sock);
drivers/vhost/vhost.c
... ... @@ -284,7 +284,7 @@
284 284 vhost_dev_cleanup(dev);
285 285  
286 286 memory->nregions = 0;
287   - dev->memory = memory;
  287 + RCU_INIT_POINTER(dev->memory, memory);
288 288 return 0;
289 289 }
290 290  
... ... @@ -316,8 +316,9 @@
316 316 fput(dev->log_file);
317 317 dev->log_file = NULL;
318 318 /* No one will access memory at this point */
319   - kfree(dev->memory);
320   - dev->memory = NULL;
  319 + kfree(rcu_dereference_protected(dev->memory,
  320 + lockdep_is_held(&dev->mutex)));
  321 + RCU_INIT_POINTER(dev->memory, NULL);
321 322 if (dev->mm)
322 323 mmput(dev->mm);
323 324 dev->mm = NULL;
324 325  
... ... @@ -401,14 +402,22 @@
401 402 /* Caller should have device mutex but not vq mutex */
402 403 int vhost_log_access_ok(struct vhost_dev *dev)
403 404 {
404   - return memory_access_ok(dev, dev->memory, 1);
  405 + struct vhost_memory *mp;
  406 +
  407 + mp = rcu_dereference_protected(dev->memory,
  408 + lockdep_is_held(&dev->mutex));
  409 + return memory_access_ok(dev, mp, 1);
405 410 }
406 411  
407 412 /* Verify access for write logging. */
408 413 /* Caller should have vq mutex and device mutex */
409 414 static int vq_log_access_ok(struct vhost_virtqueue *vq, void __user *log_base)
410 415 {
411   - return vq_memory_access_ok(log_base, vq->dev->memory,
  416 + struct vhost_memory *mp;
  417 +
  418 + mp = rcu_dereference_protected(vq->dev->memory,
  419 + lockdep_is_held(&vq->mutex));
  420 + return vq_memory_access_ok(log_base, mp,
412 421 vhost_has_feature(vq->dev, VHOST_F_LOG_ALL)) &&
413 422 (!vq->log_used || log_access_ok(log_base, vq->log_addr,
414 423 sizeof *vq->used +
... ... @@ -448,7 +457,8 @@
448 457 kfree(newmem);
449 458 return -EFAULT;
450 459 }
451   - oldmem = d->memory;
  460 + oldmem = rcu_dereference_protected(d->memory,
  461 + lockdep_is_held(&d->mutex));
452 462 rcu_assign_pointer(d->memory, newmem);
453 463 synchronize_rcu();
454 464 kfree(oldmem);
drivers/vhost/vhost.h
... ... @@ -106,7 +106,7 @@
106 106 * vhost_work execution acts instead of rcu_read_lock() and the end of
107 107 * vhost_work execution acts instead of rcu_read_lock().
108 108 * Writers use virtqueue mutex. */
109   - void *private_data;
  109 + void __rcu *private_data;
110 110 /* Log write descriptors */
111 111 void __user *log_base;
112 112 struct vhost_log log[VHOST_NET_MAX_SG];
... ... @@ -116,7 +116,7 @@
116 116 /* Readers use RCU to access memory table pointer
117 117 * log base pointer and features.
118 118 * Writers use mutex below.*/
119   - struct vhost_memory *memory;
  119 + struct vhost_memory __rcu *memory;
120 120 struct mm_struct *mm;
121 121 struct mutex mutex;
122 122 unsigned acked_features;
... ... @@ -173,7 +173,11 @@
173 173  
174 174 static inline int vhost_has_feature(struct vhost_dev *dev, int bit)
175 175 {
176   - unsigned acked_features = rcu_dereference(dev->acked_features);
  176 + unsigned acked_features;
  177 +
  178 + acked_features =
  179 + rcu_dereference_index_check(dev->acked_features,
  180 + lockdep_is_held(&dev->mutex));
177 181 return acked_features & (1 << bit);
178 182 }
179 183  
include/linux/cgroup.h
... ... @@ -75,7 +75,7 @@
75 75  
76 76 unsigned long flags;
77 77 /* ID for this css, if possible */
78   - struct css_id *id;
  78 + struct css_id __rcu *id;
79 79 };
80 80  
81 81 /* bits in struct cgroup_subsys_state flags field */
... ... @@ -205,7 +205,7 @@
205 205 struct list_head children; /* my children */
206 206  
207 207 struct cgroup *parent; /* my parent */
208   - struct dentry *dentry; /* cgroup fs entry, RCU protected */
  208 + struct dentry __rcu *dentry; /* cgroup fs entry, RCU protected */
209 209  
210 210 /* Private pointers for each registered subsystem */
211 211 struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];
include/linux/compiler.h
... ... @@ -16,7 +16,11 @@
16 16 # define __release(x) __context__(x,-1)
17 17 # define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0)
18 18 # define __percpu __attribute__((noderef, address_space(3)))
  19 +#ifdef CONFIG_SPARSE_RCU_POINTER
  20 +# define __rcu __attribute__((noderef, address_space(4)))
  21 +#else
19 22 # define __rcu
  23 +#endif
20 24 extern void __chk_user_ptr(const volatile void __user *);
21 25 extern void __chk_io_ptr(const volatile void __iomem *);
22 26 #else
include/linux/cred.h
... ... @@ -84,7 +84,7 @@
84 84 atomic_t usage;
85 85 pid_t tgid; /* thread group process ID */
86 86 spinlock_t lock;
87   - struct key *session_keyring; /* keyring inherited over fork */
  87 + struct key __rcu *session_keyring; /* keyring inherited over fork */
88 88 struct key *process_keyring; /* keyring private to this process */
89 89 struct rcu_head rcu; /* RCU deletion hook */
90 90 };
include/linux/fdtable.h
... ... @@ -31,7 +31,7 @@
31 31  
32 32 struct fdtable {
33 33 unsigned int max_fds;
34   - struct file ** fd; /* current fd array */
  34 + struct file __rcu **fd; /* current fd array */
35 35 fd_set *close_on_exec;
36 36 fd_set *open_fds;
37 37 struct rcu_head rcu;
... ... @@ -46,7 +46,7 @@
46 46 * read mostly part
47 47 */
48 48 atomic_t count;
49   - struct fdtable *fdt;
  49 + struct fdtable __rcu *fdt;
50 50 struct fdtable fdtab;
51 51 /*
52 52 * written part on a separate cache line in SMP
... ... @@ -55,7 +55,7 @@
55 55 int next_fd;
56 56 struct embedded_fd_set close_on_exec_init;
57 57 struct embedded_fd_set open_fds_init;
58   - struct file * fd_array[NR_OPEN_DEFAULT];
  58 + struct file __rcu * fd_array[NR_OPEN_DEFAULT];
59 59 };
60 60  
61 61 #define rcu_dereference_check_fdtable(files, fdtfd) \
... ... @@ -1380,7 +1380,7 @@
1380 1380 * Saved mount options for lazy filesystems using
1381 1381 * generic_show_options()
1382 1382 */
1383   - char *s_options;
  1383 + char __rcu *s_options;
1384 1384 };
1385 1385  
1386 1386 extern struct timespec current_fs_time(struct super_block *sb);
include/linux/genhd.h
... ... @@ -129,8 +129,8 @@
129 129 struct disk_part_tbl {
130 130 struct rcu_head rcu_head;
131 131 int len;
132   - struct hd_struct *last_lookup;
133   - struct hd_struct *part[];
  132 + struct hd_struct __rcu *last_lookup;
  133 + struct hd_struct __rcu *part[];
134 134 };
135 135  
136 136 struct gendisk {
... ... @@ -149,7 +149,7 @@
149 149 * non-critical accesses use RCU. Always access through
150 150 * helpers.
151 151 */
152   - struct disk_part_tbl *part_tbl;
  152 + struct disk_part_tbl __rcu *part_tbl;
153 153 struct hd_struct part0;
154 154  
155 155 const struct block_device_operations *fops;
include/linux/hardirq.h
... ... @@ -139,7 +139,7 @@
139 139 #endif
140 140  
141 141 #if defined(CONFIG_NO_HZ)
142   -#if defined(CONFIG_TINY_RCU)
  142 +#if defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU)
143 143 extern void rcu_enter_nohz(void);
144 144 extern void rcu_exit_nohz(void);
145 145  
... ... @@ -50,14 +50,14 @@
50 50  
51 51 struct idr_layer {
52 52 unsigned long bitmap; /* A zero bit means "space here" */
53   - struct idr_layer *ary[1<<IDR_BITS];
  53 + struct idr_layer __rcu *ary[1<<IDR_BITS];
54 54 int count; /* When zero, we can release it */
55 55 int layer; /* distance from leaf */
56 56 struct rcu_head rcu_head;
57 57 };
58 58  
59 59 struct idr {
60   - struct idr_layer *top;
  60 + struct idr_layer __rcu *top;
61 61 struct idr_layer *id_free;
62 62 int layers; /* only valid without concurrent changes */
63 63 int id_free_cnt;
include/linux/init_task.h
... ... @@ -82,11 +82,17 @@
82 82 # define CAP_INIT_BSET CAP_FULL_SET
83 83  
84 84 #ifdef CONFIG_TREE_PREEMPT_RCU
  85 +#define INIT_TASK_RCU_TREE_PREEMPT() \
  86 + .rcu_blocked_node = NULL,
  87 +#else
  88 +#define INIT_TASK_RCU_TREE_PREEMPT(tsk)
  89 +#endif
  90 +#ifdef CONFIG_PREEMPT_RCU
85 91 #define INIT_TASK_RCU_PREEMPT(tsk) \
86 92 .rcu_read_lock_nesting = 0, \
87 93 .rcu_read_unlock_special = 0, \
88   - .rcu_blocked_node = NULL, \
89   - .rcu_node_entry = LIST_HEAD_INIT(tsk.rcu_node_entry),
  94 + .rcu_node_entry = LIST_HEAD_INIT(tsk.rcu_node_entry), \
  95 + INIT_TASK_RCU_TREE_PREEMPT()
90 96 #else
91 97 #define INIT_TASK_RCU_PREEMPT(tsk)
92 98 #endif
... ... @@ -137,8 +143,8 @@
137 143 .children = LIST_HEAD_INIT(tsk.children), \
138 144 .sibling = LIST_HEAD_INIT(tsk.sibling), \
139 145 .group_leader = &tsk, \
140   - .real_cred = &init_cred, \
141   - .cred = &init_cred, \
  146 + RCU_INIT_POINTER(.real_cred, &init_cred), \
  147 + RCU_INIT_POINTER(.cred, &init_cred), \
142 148 .cred_guard_mutex = \
143 149 __MUTEX_INITIALIZER(tsk.cred_guard_mutex), \
144 150 .comm = "swapper", \
include/linux/input.h
... ... @@ -1196,7 +1196,7 @@
1196 1196 int (*flush)(struct input_dev *dev, struct file *file);
1197 1197 int (*event)(struct input_dev *dev, unsigned int type, unsigned int code, int value);
1198 1198  
1199   - struct input_handle *grab;
  1199 + struct input_handle __rcu *grab;
1200 1200  
1201 1201 spinlock_t event_lock;
1202 1202 struct mutex mutex;
include/linux/iocontext.h
... ... @@ -53,7 +53,7 @@
53 53  
54 54 struct radix_tree_root radix_root;
55 55 struct hlist_head cic_list;
56   - void *ioc_data;
  56 + void __rcu *ioc_data;
57 57 };
58 58  
59 59 static inline struct io_context *ioc_task_link(struct io_context *ioc)
... ... @@ -178,8 +178,9 @@
178 178 */
179 179 union {
180 180 unsigned long value;
  181 + void __rcu *rcudata;
181 182 void *data;
182   - struct keyring_list *subscriptions;
  183 + struct keyring_list __rcu *subscriptions;
183 184 } payload;
184 185 };
185 186  
include/linux/kvm_host.h
... ... @@ -205,7 +205,7 @@
205 205  
206 206 struct mutex irq_lock;
207 207 #ifdef CONFIG_HAVE_KVM_IRQCHIP
208   - struct kvm_irq_routing_table *irq_routing;
  208 + struct kvm_irq_routing_table __rcu *irq_routing;
209 209 struct hlist_head mask_notifier_list;
210 210 struct hlist_head irq_ack_notifier_list;
211 211 #endif
include/linux/mm_types.h
... ... @@ -299,7 +299,7 @@
299 299 * new_owner->mm == mm
300 300 * new_owner->alloc_lock is held
301 301 */
302   - struct task_struct *owner;
  302 + struct task_struct __rcu *owner;
303 303 #endif
304 304  
305 305 #ifdef CONFIG_PROC_FS
include/linux/nfs_fs.h
... ... @@ -185,7 +185,7 @@
185 185 struct nfs4_cached_acl *nfs4_acl;
186 186 /* NFSv4 state */
187 187 struct list_head open_states;
188   - struct nfs_delegation *delegation;
  188 + struct nfs_delegation __rcu *delegation;
189 189 fmode_t delegation_state;
190 190 struct rw_semaphore rwsem;
191 191 #endif /* CONFIG_NFS_V4*/
include/linux/notifier.h
... ... @@ -49,28 +49,28 @@
49 49  
50 50 struct notifier_block {
51 51 int (*notifier_call)(struct notifier_block *, unsigned long, void *);
52   - struct notifier_block *next;
  52 + struct notifier_block __rcu *next;
53 53 int priority;
54 54 };
55 55  
56 56 struct atomic_notifier_head {
57 57 spinlock_t lock;
58   - struct notifier_block *head;
  58 + struct notifier_block __rcu *head;
59 59 };
60 60  
61 61 struct blocking_notifier_head {
62 62 struct rw_semaphore rwsem;
63   - struct notifier_block *head;
  63 + struct notifier_block __rcu *head;
64 64 };
65 65  
66 66 struct raw_notifier_head {
67   - struct notifier_block *head;
  67 + struct notifier_block __rcu *head;
68 68 };
69 69  
70 70 struct srcu_notifier_head {
71 71 struct mutex mutex;
72 72 struct srcu_struct srcu;
73   - struct notifier_block *head;
  73 + struct notifier_block __rcu *head;
74 74 };
75 75  
76 76 #define ATOMIC_INIT_NOTIFIER_HEAD(name) do { \
include/linux/radix-tree.h
... ... @@ -47,6 +47,8 @@
47 47 {
48 48 return (void *)((unsigned long)ptr & ~RADIX_TREE_INDIRECT_PTR);
49 49 }
  50 +#define radix_tree_indirect_to_ptr(ptr) \
  51 + radix_tree_indirect_to_ptr((void __force *)(ptr))
50 52  
51 53 static inline int radix_tree_is_indirect_ptr(void *ptr)
52 54 {
... ... @@ -61,7 +63,7 @@
61 63 struct radix_tree_root {
62 64 unsigned int height;
63 65 gfp_t gfp_mask;
64   - struct radix_tree_node *rnode;
  66 + struct radix_tree_node __rcu *rnode;
65 67 };
66 68  
67 69 #define RADIX_TREE_INIT(mask) { \
include/linux/rculist.h
... ... @@ -10,6 +10,21 @@
10 10 #include <linux/rcupdate.h>
11 11  
12 12 /*
  13 + * Why is there no list_empty_rcu()? Because list_empty() serves this
  14 + * purpose. The list_empty() function fetches the RCU-protected pointer
  15 + * and compares it to the address of the list head, but neither dereferences
  16 + * this pointer itself nor provides this pointer to the caller. Therefore,
  17 + * it is not necessary to use rcu_dereference(), so that list_empty() can
  18 + * be used anywhere you would want to use a list_empty_rcu().
  19 + */
  20 +
  21 +/*
  22 + * return the ->next pointer of a list_head in an rcu safe
  23 + * way, we must not access it directly
  24 + */
  25 +#define list_next_rcu(list) (*((struct list_head __rcu **)(&(list)->next)))
  26 +
  27 +/*
13 28 * Insert a new entry between two known consecutive entries.
14 29 *
15 30 * This is only for internal list manipulation where we know
... ... @@ -20,7 +35,7 @@
20 35 {
21 36 new->next = next;
22 37 new->prev = prev;
23   - rcu_assign_pointer(prev->next, new);
  38 + rcu_assign_pointer(list_next_rcu(prev), new);
24 39 next->prev = new;
25 40 }
26 41  
... ... @@ -138,7 +153,7 @@
138 153 {
139 154 new->next = old->next;
140 155 new->prev = old->prev;
141   - rcu_assign_pointer(new->prev->next, new);
  156 + rcu_assign_pointer(list_next_rcu(new->prev), new);
142 157 new->next->prev = new;
143 158 old->prev = LIST_POISON2;
144 159 }
... ... @@ -193,7 +208,7 @@
193 208 */
194 209  
195 210 last->next = at;
196   - rcu_assign_pointer(head->next, first);
  211 + rcu_assign_pointer(list_next_rcu(head), first);
197 212 first->prev = head;
198 213 at->prev = last;
199 214 }
... ... @@ -208,7 +223,9 @@
208 223 * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock().
209 224 */
210 225 #define list_entry_rcu(ptr, type, member) \
211   - container_of(rcu_dereference_raw(ptr), type, member)
  226 + ({typeof (*ptr) __rcu *__ptr = (typeof (*ptr) __rcu __force *)ptr; \
  227 + container_of((typeof(ptr))rcu_dereference_raw(__ptr), type, member); \
  228 + })
212 229  
213 230 /**
214 231 * list_first_entry_rcu - get the first element from a list
215 232  
... ... @@ -225,9 +242,9 @@
225 242 list_entry_rcu((ptr)->next, type, member)
226 243  
227 244 #define __list_for_each_rcu(pos, head) \
228   - for (pos = rcu_dereference_raw((head)->next); \
  245 + for (pos = rcu_dereference_raw(list_next_rcu(head)); \
229 246 pos != (head); \
230   - pos = rcu_dereference_raw(pos->next))
  247 + pos = rcu_dereference_raw(list_next_rcu((pos)))
231 248  
232 249 /**
233 250 * list_for_each_entry_rcu - iterate over rcu list of given type
234 251  
... ... @@ -257,9 +274,9 @@
257 274 * as long as the traversal is guarded by rcu_read_lock().
258 275 */
259 276 #define list_for_each_continue_rcu(pos, head) \
260   - for ((pos) = rcu_dereference_raw((pos)->next); \
  277 + for ((pos) = rcu_dereference_raw(list_next_rcu(pos)); \
261 278 prefetch((pos)->next), (pos) != (head); \
262   - (pos) = rcu_dereference_raw((pos)->next))
  279 + (pos) = rcu_dereference_raw(list_next_rcu(pos)))
263 280  
264 281 /**
265 282 * list_for_each_entry_continue_rcu - continue iteration over list of given type
266 283  
... ... @@ -314,12 +331,19 @@
314 331  
315 332 new->next = next;
316 333 new->pprev = old->pprev;
317   - rcu_assign_pointer(*new->pprev, new);
  334 + rcu_assign_pointer(*(struct hlist_node __rcu **)new->pprev, new);
318 335 if (next)
319 336 new->next->pprev = &new->next;
320 337 old->pprev = LIST_POISON2;
321 338 }
322 339  
  340 +/*
  341 + * return the first or the next element in an RCU protected hlist
  342 + */
  343 +#define hlist_first_rcu(head) (*((struct hlist_node __rcu **)(&(head)->first)))
  344 +#define hlist_next_rcu(node) (*((struct hlist_node __rcu **)(&(node)->next)))
  345 +#define hlist_pprev_rcu(node) (*((struct hlist_node __rcu **)((node)->pprev)))
  346 +
323 347 /**
324 348 * hlist_add_head_rcu
325 349 * @n: the element to add to the hash list.
... ... @@ -346,7 +370,7 @@
346 370  
347 371 n->next = first;
348 372 n->pprev = &h->first;
349   - rcu_assign_pointer(h->first, n);
  373 + rcu_assign_pointer(hlist_first_rcu(h), n);
350 374 if (first)
351 375 first->pprev = &n->next;
352 376 }
... ... @@ -374,7 +398,7 @@
374 398 {
375 399 n->pprev = next->pprev;
376 400 n->next = next;
377   - rcu_assign_pointer(*(n->pprev), n);
  401 + rcu_assign_pointer(hlist_pprev_rcu(n), n);
378 402 next->pprev = &n->next;
379 403 }
380 404  
381 405  
... ... @@ -401,15 +425,15 @@
401 425 {
402 426 n->next = prev->next;
403 427 n->pprev = &prev->next;
404   - rcu_assign_pointer(prev->next, n);
  428 + rcu_assign_pointer(hlist_next_rcu(prev), n);
405 429 if (n->next)
406 430 n->next->pprev = &n->next;
407 431 }
408 432  
409   -#define __hlist_for_each_rcu(pos, head) \
410   - for (pos = rcu_dereference((head)->first); \
411   - pos && ({ prefetch(pos->next); 1; }); \
412   - pos = rcu_dereference(pos->next))
  433 +#define __hlist_for_each_rcu(pos, head) \
  434 + for (pos = rcu_dereference(hlist_first_rcu(head)); \
  435 + pos && ({ prefetch(pos->next); 1; }); \
  436 + pos = rcu_dereference(hlist_next_rcu(pos)))
413 437  
414 438 /**
415 439 * hlist_for_each_entry_rcu - iterate over rcu list of given type
416 440  
... ... @@ -422,11 +446,11 @@
422 446 * the _rcu list-mutation primitives such as hlist_add_head_rcu()
423 447 * as long as the traversal is guarded by rcu_read_lock().
424 448 */
425   -#define hlist_for_each_entry_rcu(tpos, pos, head, member) \
426   - for (pos = rcu_dereference_raw((head)->first); \
  449 +#define hlist_for_each_entry_rcu(tpos, pos, head, member) \
  450 + for (pos = rcu_dereference_raw(hlist_first_rcu(head)); \
427 451 pos && ({ prefetch(pos->next); 1; }) && \
428 452 ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \
429   - pos = rcu_dereference_raw(pos->next))
  453 + pos = rcu_dereference_raw(hlist_next_rcu(pos)))
430 454  
431 455 /**
432 456 * hlist_for_each_entry_rcu_bh - iterate over rcu list of given type
include/linux/rculist_nulls.h
... ... @@ -37,6 +37,12 @@
37 37 }
38 38 }
39 39  
  40 +#define hlist_nulls_first_rcu(head) \
  41 + (*((struct hlist_nulls_node __rcu __force **)&(head)->first))
  42 +
  43 +#define hlist_nulls_next_rcu(node) \
  44 + (*((struct hlist_nulls_node __rcu __force **)&(node)->next))
  45 +
40 46 /**
41 47 * hlist_nulls_del_rcu - deletes entry from hash list without re-initialization
42 48 * @n: the element to delete from the hash list.
... ... @@ -88,7 +94,7 @@
88 94  
89 95 n->next = first;
90 96 n->pprev = &h->first;
91   - rcu_assign_pointer(h->first, n);
  97 + rcu_assign_pointer(hlist_nulls_first_rcu(h), n);
92 98 if (!is_a_nulls(first))
93 99 first->pprev = &n->next;
94 100 }
95 101  
... ... @@ -100,11 +106,11 @@
100 106 * @member: the name of the hlist_nulls_node within the struct.
101 107 *
102 108 */
103   -#define hlist_nulls_for_each_entry_rcu(tpos, pos, head, member) \
104   - for (pos = rcu_dereference_raw((head)->first); \
105   - (!is_a_nulls(pos)) && \
  109 +#define hlist_nulls_for_each_entry_rcu(tpos, pos, head, member) \
  110 + for (pos = rcu_dereference_raw(hlist_nulls_first_rcu(head)); \
  111 + (!is_a_nulls(pos)) && \
106 112 ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1; }); \
107   - pos = rcu_dereference_raw(pos->next))
  113 + pos = rcu_dereference_raw(hlist_nulls_next_rcu(pos)))
108 114  
109 115 #endif
110 116 #endif
include/linux/rcupdate.h
... ... @@ -41,11 +41,15 @@
41 41 #include <linux/lockdep.h>
42 42 #include <linux/completion.h>
43 43 #include <linux/debugobjects.h>
  44 +#include <linux/compiler.h>
44 45  
45 46 #ifdef CONFIG_RCU_TORTURE_TEST
46 47 extern int rcutorture_runnable; /* for sysctl */
47 48 #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
48 49  
  50 +#define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b))
  51 +#define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b))
  52 +
49 53 /**
50 54 * struct rcu_head - callback structure for use with RCU
51 55 * @next: next update requests in a list
52 56  
53 57  
54 58  
55 59  
56 60  
... ... @@ -57,29 +61,94 @@
57 61 };
58 62  
59 63 /* Exported common interfaces */
60   -extern void rcu_barrier(void);
  64 +extern void call_rcu_sched(struct rcu_head *head,
  65 + void (*func)(struct rcu_head *rcu));
  66 +extern void synchronize_sched(void);
61 67 extern void rcu_barrier_bh(void);
62 68 extern void rcu_barrier_sched(void);
63 69 extern void synchronize_sched_expedited(void);
64 70 extern int sched_expedited_torture_stats(char *page);
65 71  
  72 +static inline void __rcu_read_lock_bh(void)
  73 +{
  74 + local_bh_disable();
  75 +}
  76 +
  77 +static inline void __rcu_read_unlock_bh(void)
  78 +{
  79 + local_bh_enable();
  80 +}
  81 +
  82 +#ifdef CONFIG_PREEMPT_RCU
  83 +
  84 +extern void __rcu_read_lock(void);
  85 +extern void __rcu_read_unlock(void);
  86 +void synchronize_rcu(void);
  87 +
  88 +/*
  89 + * Defined as a macro as it is a very low level header included from
  90 + * areas that don't even know about current. This gives the rcu_read_lock()
  91 + * nesting depth, but makes sense only if CONFIG_PREEMPT_RCU -- in other
  92 + * types of kernel builds, the rcu_read_lock() nesting depth is unknowable.
  93 + */
  94 +#define rcu_preempt_depth() (current->rcu_read_lock_nesting)
  95 +
  96 +#else /* #ifdef CONFIG_PREEMPT_RCU */
  97 +
  98 +static inline void __rcu_read_lock(void)
  99 +{
  100 + preempt_disable();
  101 +}
  102 +
  103 +static inline void __rcu_read_unlock(void)
  104 +{
  105 + preempt_enable();
  106 +}
  107 +
  108 +static inline void synchronize_rcu(void)
  109 +{
  110 + synchronize_sched();
  111 +}
  112 +
  113 +static inline int rcu_preempt_depth(void)
  114 +{
  115 + return 0;
  116 +}
  117 +
  118 +#endif /* #else #ifdef CONFIG_PREEMPT_RCU */
  119 +
66 120 /* Internal to kernel */
67 121 extern void rcu_init(void);
  122 +extern void rcu_sched_qs(int cpu);
  123 +extern void rcu_bh_qs(int cpu);
  124 +extern void rcu_check_callbacks(int cpu, int user);
  125 +struct notifier_block;
68 126  
  127 +#ifdef CONFIG_NO_HZ
  128 +
  129 +extern void rcu_enter_nohz(void);
  130 +extern void rcu_exit_nohz(void);
  131 +
  132 +#else /* #ifdef CONFIG_NO_HZ */
  133 +
  134 +static inline void rcu_enter_nohz(void)
  135 +{
  136 +}
  137 +
  138 +static inline void rcu_exit_nohz(void)
  139 +{
  140 +}
  141 +
  142 +#endif /* #else #ifdef CONFIG_NO_HZ */
  143 +
69 144 #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU)
70 145 #include <linux/rcutree.h>
71   -#elif defined(CONFIG_TINY_RCU)
  146 +#elif defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU)
72 147 #include <linux/rcutiny.h>
73 148 #else
74 149 #error "Unknown RCU implementation specified to kernel configuration"
75 150 #endif
76 151  
77   -#define RCU_HEAD_INIT { .next = NULL, .func = NULL }
78   -#define RCU_HEAD(head) struct rcu_head head = RCU_HEAD_INIT
79   -#define INIT_RCU_HEAD(ptr) do { \
80   - (ptr)->next = NULL; (ptr)->func = NULL; \
81   -} while (0)
82   -
83 152 /*
84 153 * init_rcu_head_on_stack()/destroy_rcu_head_on_stack() are needed for dynamic
85 154 * initialization and destruction of rcu_head on the stack. rcu_head structures
86 155  
87 156  
... ... @@ -120,14 +189,15 @@
120 189 extern int debug_lockdep_rcu_enabled(void);
121 190  
122 191 /**
123   - * rcu_read_lock_held - might we be in RCU read-side critical section?
  192 + * rcu_read_lock_held() - might we be in RCU read-side critical section?
124 193 *
125 194 * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an RCU
126 195 * read-side critical section. In absence of CONFIG_DEBUG_LOCK_ALLOC,
127 196 * this assumes we are in an RCU read-side critical section unless it can
128   - * prove otherwise.
  197 + * prove otherwise. This is useful for debug checks in functions that
  198 + * require that they be called within an RCU read-side critical section.
129 199 *
130   - * Check debug_lockdep_rcu_enabled() to prevent false positives during boot
  200 + * Checks debug_lockdep_rcu_enabled() to prevent false positives during boot
131 201 * and while lockdep is disabled.
132 202 */
133 203 static inline int rcu_read_lock_held(void)
134 204  
... ... @@ -144,14 +214,16 @@
144 214 extern int rcu_read_lock_bh_held(void);
145 215  
146 216 /**
147   - * rcu_read_lock_sched_held - might we be in RCU-sched read-side critical section?
  217 + * rcu_read_lock_sched_held() - might we be in RCU-sched read-side critical section?
148 218 *
149 219 * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an
150 220 * RCU-sched read-side critical section. In absence of
151 221 * CONFIG_DEBUG_LOCK_ALLOC, this assumes we are in an RCU-sched read-side
152 222 * critical section unless it can prove otherwise. Note that disabling
153 223 * of preemption (including disabling irqs) counts as an RCU-sched
154   - * read-side critical section.
  224 + * read-side critical section. This is useful for debug checks in functions
  225 + * that required that they be called within an RCU-sched read-side
  226 + * critical section.
155 227 *
156 228 * Check debug_lockdep_rcu_enabled() to prevent false positives during boot
157 229 * and while lockdep is disabled.
... ... @@ -211,7 +283,11 @@
211 283  
212 284 extern int rcu_my_thread_group_empty(void);
213 285  
214   -#define __do_rcu_dereference_check(c) \
  286 +/**
  287 + * rcu_lockdep_assert - emit lockdep splat if specified condition not met
  288 + * @c: condition to check
  289 + */
  290 +#define rcu_lockdep_assert(c) \
215 291 do { \
216 292 static bool __warned; \
217 293 if (debug_lockdep_rcu_enabled() && !__warned && !(c)) { \
218 294  
219 295  
220 296  
221 297  
222 298  
223 299  
224 300  
225 301  
226 302  
... ... @@ -220,42 +296,156 @@
220 296 } \
221 297 } while (0)
222 298  
  299 +#else /* #ifdef CONFIG_PROVE_RCU */
  300 +
  301 +#define rcu_lockdep_assert(c) do { } while (0)
  302 +
  303 +#endif /* #else #ifdef CONFIG_PROVE_RCU */
  304 +
  305 +/*
  306 + * Helper functions for rcu_dereference_check(), rcu_dereference_protected()
  307 + * and rcu_assign_pointer(). Some of these could be folded into their
  308 + * callers, but they are left separate in order to ease introduction of
  309 + * multiple flavors of pointers to match the multiple flavors of RCU
  310 + * (e.g., __rcu_bh, * __rcu_sched, and __srcu), should this make sense in
  311 + * the future.
  312 + */
  313 +#define __rcu_access_pointer(p, space) \
  314 + ({ \
  315 + typeof(*p) *_________p1 = (typeof(*p)*__force )ACCESS_ONCE(p); \
  316 + (void) (((typeof (*p) space *)p) == p); \
  317 + ((typeof(*p) __force __kernel *)(_________p1)); \
  318 + })
  319 +#define __rcu_dereference_check(p, c, space) \
  320 + ({ \
  321 + typeof(*p) *_________p1 = (typeof(*p)*__force )ACCESS_ONCE(p); \
  322 + rcu_lockdep_assert(c); \
  323 + (void) (((typeof (*p) space *)p) == p); \
  324 + smp_read_barrier_depends(); \
  325 + ((typeof(*p) __force __kernel *)(_________p1)); \
  326 + })
  327 +#define __rcu_dereference_protected(p, c, space) \
  328 + ({ \
  329 + rcu_lockdep_assert(c); \
  330 + (void) (((typeof (*p) space *)p) == p); \
  331 + ((typeof(*p) __force __kernel *)(p)); \
  332 + })
  333 +
  334 +#define __rcu_dereference_index_check(p, c) \
  335 + ({ \
  336 + typeof(p) _________p1 = ACCESS_ONCE(p); \
  337 + rcu_lockdep_assert(c); \
  338 + smp_read_barrier_depends(); \
  339 + (_________p1); \
  340 + })
  341 +#define __rcu_assign_pointer(p, v, space) \
  342 + ({ \
  343 + if (!__builtin_constant_p(v) || \
  344 + ((v) != NULL)) \
  345 + smp_wmb(); \
  346 + (p) = (typeof(*v) __force space *)(v); \
  347 + })
  348 +
  349 +
223 350 /**
224   - * rcu_dereference_check - rcu_dereference with debug checking
  351 + * rcu_access_pointer() - fetch RCU pointer with no dereferencing
  352 + * @p: The pointer to read
  353 + *
  354 + * Return the value of the specified RCU-protected pointer, but omit the
  355 + * smp_read_barrier_depends() and keep the ACCESS_ONCE(). This is useful
  356 + * when the value of this pointer is accessed, but the pointer is not
  357 + * dereferenced, for example, when testing an RCU-protected pointer against
  358 + * NULL. Although rcu_access_pointer() may also be used in cases where
  359 + * update-side locks prevent the value of the pointer from changing, you
  360 + * should instead use rcu_dereference_protected() for this use case.
  361 + */
  362 +#define rcu_access_pointer(p) __rcu_access_pointer((p), __rcu)
  363 +
  364 +/**
  365 + * rcu_dereference_check() - rcu_dereference with debug checking
225 366 * @p: The pointer to read, prior to dereferencing
226 367 * @c: The conditions under which the dereference will take place
227 368 *
228 369 * Do an rcu_dereference(), but check that the conditions under which the
229   - * dereference will take place are correct. Typically the conditions indicate
230   - * the various locking conditions that should be held at that point. The check
231   - * should return true if the conditions are satisfied.
  370 + * dereference will take place are correct. Typically the conditions
  371 + * indicate the various locking conditions that should be held at that
  372 + * point. The check should return true if the conditions are satisfied.
  373 + * An implicit check for being in an RCU read-side critical section
  374 + * (rcu_read_lock()) is included.
232 375 *
233 376 * For example:
234 377 *
235   - * bar = rcu_dereference_check(foo->bar, rcu_read_lock_held() ||
236   - * lockdep_is_held(&foo->lock));
  378 + * bar = rcu_dereference_check(foo->bar, lockdep_is_held(&foo->lock));
237 379 *
238 380 * could be used to indicate to lockdep that foo->bar may only be dereferenced
239   - * if either the RCU read lock is held, or that the lock required to replace
  381 + * if either rcu_read_lock() is held, or that the lock required to replace
240 382 * the bar struct at foo->bar is held.
241 383 *
242 384 * Note that the list of conditions may also include indications of when a lock
243 385 * need not be held, for example during initialisation or destruction of the
244 386 * target struct:
245 387 *
246   - * bar = rcu_dereference_check(foo->bar, rcu_read_lock_held() ||
247   - * lockdep_is_held(&foo->lock) ||
  388 + * bar = rcu_dereference_check(foo->bar, lockdep_is_held(&foo->lock) ||
248 389 * atomic_read(&foo->usage) == 0);
  390 + *
  391 + * Inserts memory barriers on architectures that require them
  392 + * (currently only the Alpha), prevents the compiler from refetching
  393 + * (and from merging fetches), and, more importantly, documents exactly
  394 + * which pointers are protected by RCU and checks that the pointer is
  395 + * annotated as __rcu.
249 396 */
250 397 #define rcu_dereference_check(p, c) \
251   - ({ \
252   - __do_rcu_dereference_check(c); \
253   - rcu_dereference_raw(p); \
254   - })
  398 + __rcu_dereference_check((p), rcu_read_lock_held() || (c), __rcu)
255 399  
256 400 /**
257   - * rcu_dereference_protected - fetch RCU pointer when updates prevented
  401 + * rcu_dereference_bh_check() - rcu_dereference_bh with debug checking
  402 + * @p: The pointer to read, prior to dereferencing
  403 + * @c: The conditions under which the dereference will take place
258 404 *
  405 + * This is the RCU-bh counterpart to rcu_dereference_check().
  406 + */
  407 +#define rcu_dereference_bh_check(p, c) \
  408 + __rcu_dereference_check((p), rcu_read_lock_bh_held() || (c), __rcu)
  409 +
  410 +/**
  411 + * rcu_dereference_sched_check() - rcu_dereference_sched with debug checking
  412 + * @p: The pointer to read, prior to dereferencing
  413 + * @c: The conditions under which the dereference will take place
  414 + *
  415 + * This is the RCU-sched counterpart to rcu_dereference_check().
  416 + */
  417 +#define rcu_dereference_sched_check(p, c) \
  418 + __rcu_dereference_check((p), rcu_read_lock_sched_held() || (c), \
  419 + __rcu)
  420 +
  421 +#define rcu_dereference_raw(p) rcu_dereference_check(p, 1) /*@@@ needed? @@@*/
  422 +
  423 +/**
  424 + * rcu_dereference_index_check() - rcu_dereference for indices with debug checking
  425 + * @p: The pointer to read, prior to dereferencing
  426 + * @c: The conditions under which the dereference will take place
  427 + *
  428 + * Similar to rcu_dereference_check(), but omits the sparse checking.
  429 + * This allows rcu_dereference_index_check() to be used on integers,
  430 + * which can then be used as array indices. Attempting to use
  431 + * rcu_dereference_check() on an integer will give compiler warnings
  432 + * because the sparse address-space mechanism relies on dereferencing
  433 + * the RCU-protected pointer. Dereferencing integers is not something
  434 + * that even gcc will put up with.
  435 + *
  436 + * Note that this function does not implicitly check for RCU read-side
  437 + * critical sections. If this function gains lots of uses, it might
  438 + * make sense to provide versions for each flavor of RCU, but it does
  439 + * not make sense as of early 2010.
  440 + */
  441 +#define rcu_dereference_index_check(p, c) \
  442 + __rcu_dereference_index_check((p), (c))
  443 +
  444 +/**
  445 + * rcu_dereference_protected() - fetch RCU pointer when updates prevented
  446 + * @p: The pointer to read, prior to dereferencing
  447 + * @c: The conditions under which the dereference will take place
  448 + *
259 449 * Return the value of the specified RCU-protected pointer, but omit
260 450 * both the smp_read_barrier_depends() and the ACCESS_ONCE(). This
261 451 * is useful in cases where update-side locks prevent the value of the
262 452  
263 453  
264 454  
265 455  
266 456  
267 457  
268 458  
269 459  
270 460  
... ... @@ -263,36 +453,62 @@
263 453 * prevent the compiler from repeating this reference or combining it
264 454 * with other references, so it should not be used without protection
265 455 * of appropriate locks.
  456 + *
  457 + * This function is only for update-side use. Using this function
  458 + * when protected only by rcu_read_lock() will result in infrequent
  459 + * but very ugly failures.
266 460 */
267 461 #define rcu_dereference_protected(p, c) \
268   - ({ \
269   - __do_rcu_dereference_check(c); \
270   - (p); \
271   - })
  462 + __rcu_dereference_protected((p), (c), __rcu)
272 463  
273   -#else /* #ifdef CONFIG_PROVE_RCU */
  464 +/**
  465 + * rcu_dereference_bh_protected() - fetch RCU-bh pointer when updates prevented
  466 + * @p: The pointer to read, prior to dereferencing
  467 + * @c: The conditions under which the dereference will take place
  468 + *
  469 + * This is the RCU-bh counterpart to rcu_dereference_protected().
  470 + */
  471 +#define rcu_dereference_bh_protected(p, c) \
  472 + __rcu_dereference_protected((p), (c), __rcu)
274 473  
275   -#define rcu_dereference_check(p, c) rcu_dereference_raw(p)
276   -#define rcu_dereference_protected(p, c) (p)
  474 +/**
  475 + * rcu_dereference_sched_protected() - fetch RCU-sched pointer when updates prevented
  476 + * @p: The pointer to read, prior to dereferencing
  477 + * @c: The conditions under which the dereference will take place
  478 + *
  479 + * This is the RCU-sched counterpart to rcu_dereference_protected().
  480 + */
  481 +#define rcu_dereference_sched_protected(p, c) \
  482 + __rcu_dereference_protected((p), (c), __rcu)
277 483  
278   -#endif /* #else #ifdef CONFIG_PROVE_RCU */
279 484  
280 485 /**
281   - * rcu_access_pointer - fetch RCU pointer with no dereferencing
  486 + * rcu_dereference() - fetch RCU-protected pointer for dereferencing
  487 + * @p: The pointer to read, prior to dereferencing
282 488 *
283   - * Return the value of the specified RCU-protected pointer, but omit the
284   - * smp_read_barrier_depends() and keep the ACCESS_ONCE(). This is useful
285   - * when the value of this pointer is accessed, but the pointer is not
286   - * dereferenced, for example, when testing an RCU-protected pointer against
287   - * NULL. This may also be used in cases where update-side locks prevent
288   - * the value of the pointer from changing, but rcu_dereference_protected()
289   - * is a lighter-weight primitive for this use case.
  489 + * This is a simple wrapper around rcu_dereference_check().
290 490 */
291   -#define rcu_access_pointer(p) ACCESS_ONCE(p)
  491 +#define rcu_dereference(p) rcu_dereference_check(p, 0)
292 492  
293 493 /**
294   - * rcu_read_lock - mark the beginning of an RCU read-side critical section.
  494 + * rcu_dereference_bh() - fetch an RCU-bh-protected pointer for dereferencing
  495 + * @p: The pointer to read, prior to dereferencing
295 496 *
  497 + * Makes rcu_dereference_check() do the dirty work.
  498 + */
  499 +#define rcu_dereference_bh(p) rcu_dereference_bh_check(p, 0)
  500 +
  501 +/**
  502 + * rcu_dereference_sched() - fetch RCU-sched-protected pointer for dereferencing
  503 + * @p: The pointer to read, prior to dereferencing
  504 + *
  505 + * Makes rcu_dereference_check() do the dirty work.
  506 + */
  507 +#define rcu_dereference_sched(p) rcu_dereference_sched_check(p, 0)
  508 +
  509 +/**
  510 + * rcu_read_lock() - mark the beginning of an RCU read-side critical section
  511 + *
296 512 * When synchronize_rcu() is invoked on one CPU while other CPUs
297 513 * are within RCU read-side critical sections, then the
298 514 * synchronize_rcu() is guaranteed to block until after all the other
... ... @@ -302,7 +518,7 @@
302 518 * until after the all the other CPUs exit their critical sections.
303 519 *
304 520 * Note, however, that RCU callbacks are permitted to run concurrently
305   - * with RCU read-side critical sections. One way that this can happen
  521 + * with new RCU read-side critical sections. One way that this can happen
306 522 * is via the following sequence of events: (1) CPU 0 enters an RCU
307 523 * read-side critical section, (2) CPU 1 invokes call_rcu() to register
308 524 * an RCU callback, (3) CPU 0 exits the RCU read-side critical section,
... ... @@ -317,7 +533,20 @@
317 533 * will be deferred until the outermost RCU read-side critical section
318 534 * completes.
319 535 *
320   - * It is illegal to block while in an RCU read-side critical section.
  536 + * You can avoid reading and understanding the next paragraph by
  537 + * following this rule: don't put anything in an rcu_read_lock() RCU
  538 + * read-side critical section that would block in a !PREEMPT kernel.
  539 + * But if you want the full story, read on!
  540 + *
  541 + * In non-preemptible RCU implementations (TREE_RCU and TINY_RCU), it
  542 + * is illegal to block while in an RCU read-side critical section. In
  543 + * preemptible RCU implementations (TREE_PREEMPT_RCU and TINY_PREEMPT_RCU)
  544 + * in CONFIG_PREEMPT kernel builds, RCU read-side critical sections may
  545 + * be preempted, but explicit blocking is illegal. Finally, in preemptible
  546 + * RCU implementations in real-time (CONFIG_PREEMPT_RT) kernel builds,
  547 + * RCU read-side critical sections may be preempted and they may also
  548 + * block, but only when acquiring spinlocks that are subject to priority
  549 + * inheritance.
321 550 */
322 551 static inline void rcu_read_lock(void)
323 552 {
... ... @@ -337,7 +566,7 @@
337 566 */
338 567  
339 568 /**
340   - * rcu_read_unlock - marks the end of an RCU read-side critical section.
  569 + * rcu_read_unlock() - marks the end of an RCU read-side critical section.
341 570 *
342 571 * See rcu_read_lock() for more information.
343 572 */
344 573  
... ... @@ -349,15 +578,16 @@
349 578 }
350 579  
351 580 /**
352   - * rcu_read_lock_bh - mark the beginning of a softirq-only RCU critical section
  581 + * rcu_read_lock_bh() - mark the beginning of an RCU-bh critical section
353 582 *
354 583 * This is equivalent of rcu_read_lock(), but to be used when updates
355   - * are being done using call_rcu_bh(). Since call_rcu_bh() callbacks
356   - * consider completion of a softirq handler to be a quiescent state,
357   - * a process in RCU read-side critical section must be protected by
358   - * disabling softirqs. Read-side critical sections in interrupt context
359   - * can use just rcu_read_lock().
360   - *
  584 + * are being done using call_rcu_bh() or synchronize_rcu_bh(). Since
  585 + * both call_rcu_bh() and synchronize_rcu_bh() consider completion of a
  586 + * softirq handler to be a quiescent state, a process in RCU read-side
  587 + * critical section must be protected by disabling softirqs. Read-side
  588 + * critical sections in interrupt context can use just rcu_read_lock(),
  589 + * though this should at least be commented to avoid confusing people
  590 + * reading the code.
361 591 */
362 592 static inline void rcu_read_lock_bh(void)
363 593 {
364 594  
... ... @@ -379,13 +609,12 @@
379 609 }
380 610  
381 611 /**
382   - * rcu_read_lock_sched - mark the beginning of a RCU-classic critical section
  612 + * rcu_read_lock_sched() - mark the beginning of a RCU-sched critical section
383 613 *
384   - * Should be used with either
385   - * - synchronize_sched()
386   - * or
387   - * - call_rcu_sched() and rcu_barrier_sched()
388   - * on the write-side to insure proper synchronization.
  614 + * This is equivalent of rcu_read_lock(), but to be used when updates
  615 + * are being done using call_rcu_sched() or synchronize_rcu_sched().
  616 + * Read-side critical sections can also be introduced by anything that
  617 + * disables preemption, including local_irq_disable() and friends.
389 618 */
390 619 static inline void rcu_read_lock_sched(void)
391 620 {
392 621  
393 622  
394 623  
395 624  
396 625  
397 626  
... ... @@ -420,71 +649,34 @@
420 649 preempt_enable_notrace();
421 650 }
422 651  
423   -
424 652 /**
425   - * rcu_dereference_raw - fetch an RCU-protected pointer
  653 + * rcu_assign_pointer() - assign to RCU-protected pointer
  654 + * @p: pointer to assign to
  655 + * @v: value to assign (publish)
426 656 *
427   - * The caller must be within some flavor of RCU read-side critical
428   - * section, or must be otherwise preventing the pointer from changing,
429   - * for example, by holding an appropriate lock. This pointer may later
430   - * be safely dereferenced. It is the caller's responsibility to have
431   - * done the right thing, as this primitive does no checking of any kind.
  657 + * Assigns the specified value to the specified RCU-protected
  658 + * pointer, ensuring that any concurrent RCU readers will see
  659 + * any prior initialization. Returns the value assigned.
432 660 *
433 661 * Inserts memory barriers on architectures that require them
434   - * (currently only the Alpha), and, more importantly, documents
435   - * exactly which pointers are protected by RCU.
436   - */
437   -#define rcu_dereference_raw(p) ({ \
438   - typeof(p) _________p1 = ACCESS_ONCE(p); \
439   - smp_read_barrier_depends(); \
440   - (_________p1); \
441   - })
442   -
443   -/**
444   - * rcu_dereference - fetch an RCU-protected pointer, checking for RCU
445   - *
446   - * Makes rcu_dereference_check() do the dirty work.
447   - */
448   -#define rcu_dereference(p) \
449   - rcu_dereference_check(p, rcu_read_lock_held())
450   -
451   -/**
452   - * rcu_dereference_bh - fetch an RCU-protected pointer, checking for RCU-bh
453   - *
454   - * Makes rcu_dereference_check() do the dirty work.
455   - */
456   -#define rcu_dereference_bh(p) \
457   - rcu_dereference_check(p, rcu_read_lock_bh_held())
458   -
459   -/**
460   - * rcu_dereference_sched - fetch RCU-protected pointer, checking for RCU-sched
461   - *
462   - * Makes rcu_dereference_check() do the dirty work.
463   - */
464   -#define rcu_dereference_sched(p) \
465   - rcu_dereference_check(p, rcu_read_lock_sched_held())
466   -
467   -/**
468   - * rcu_assign_pointer - assign (publicize) a pointer to a newly
469   - * initialized structure that will be dereferenced by RCU read-side
470   - * critical sections. Returns the value assigned.
471   - *
472   - * Inserts memory barriers on architectures that require them
473 662 * (pretty much all of them other than x86), and also prevents
474 663 * the compiler from reordering the code that initializes the
475 664 * structure after the pointer assignment. More importantly, this
476 665 * call documents which pointers will be dereferenced by RCU read-side
477 666 * code.
478 667 */
479   -
480 668 #define rcu_assign_pointer(p, v) \
481   - ({ \
482   - if (!__builtin_constant_p(v) || \
483   - ((v) != NULL)) \
484   - smp_wmb(); \
485   - (p) = (v); \
486   - })
  669 + __rcu_assign_pointer((p), (v), __rcu)
487 670  
  671 +/**
  672 + * RCU_INIT_POINTER() - initialize an RCU protected pointer
  673 + *
  674 + * Initialize an RCU-protected pointer in such a way to avoid RCU-lockdep
  675 + * splats.
  676 + */
  677 +#define RCU_INIT_POINTER(p, v) \
  678 + p = (typeof(*v) __force __rcu *)(v)
  679 +
488 680 /* Infrastructure to implement the synchronize_() primitives. */
489 681  
490 682 struct rcu_synchronize {
491 683  
492 684  
493 685  
494 686  
495 687  
496 688  
497 689  
... ... @@ -494,26 +686,37 @@
494 686  
495 687 extern void wakeme_after_rcu(struct rcu_head *head);
496 688  
  689 +#ifdef CONFIG_PREEMPT_RCU
  690 +
497 691 /**
498   - * call_rcu - Queue an RCU callback for invocation after a grace period.
  692 + * call_rcu() - Queue an RCU callback for invocation after a grace period.
499 693 * @head: structure to be used for queueing the RCU updates.
500   - * @func: actual update function to be invoked after the grace period
  694 + * @func: actual callback function to be invoked after the grace period
501 695 *
502   - * The update function will be invoked some time after a full grace
503   - * period elapses, in other words after all currently executing RCU
504   - * read-side critical sections have completed. RCU read-side critical
  696 + * The callback function will be invoked some time after a full grace
  697 + * period elapses, in other words after all pre-existing RCU read-side
  698 + * critical sections have completed. However, the callback function
  699 + * might well execute concurrently with RCU read-side critical sections
  700 + * that started after call_rcu() was invoked. RCU read-side critical
505 701 * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
506 702 * and may be nested.
507 703 */
508 704 extern void call_rcu(struct rcu_head *head,
509 705 void (*func)(struct rcu_head *head));
510 706  
  707 +#else /* #ifdef CONFIG_PREEMPT_RCU */
  708 +
  709 +/* In classic RCU, call_rcu() is just call_rcu_sched(). */
  710 +#define call_rcu call_rcu_sched
  711 +
  712 +#endif /* #else #ifdef CONFIG_PREEMPT_RCU */
  713 +
511 714 /**
512   - * call_rcu_bh - Queue an RCU for invocation after a quicker grace period.
  715 + * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period.
513 716 * @head: structure to be used for queueing the RCU updates.
514   - * @func: actual update function to be invoked after the grace period
  717 + * @func: actual callback function to be invoked after the grace period
515 718 *
516   - * The update function will be invoked some time after a full grace
  719 + * The callback function will be invoked some time after a full grace
517 720 * period elapses, in other words after all currently executing RCU
518 721 * read-side critical sections have completed. call_rcu_bh() assumes
519 722 * that the read-side critical sections end on completion of a softirq
... ... @@ -565,39 +768,6 @@
565 768 {
566 769 }
567 770 #endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
568   -
569   -#ifndef CONFIG_PROVE_RCU
570   -#define __do_rcu_dereference_check(c) do { } while (0)
571   -#endif /* #ifdef CONFIG_PROVE_RCU */
572   -
573   -#define __rcu_dereference_index_check(p, c) \
574   - ({ \
575   - typeof(p) _________p1 = ACCESS_ONCE(p); \
576   - __do_rcu_dereference_check(c); \
577   - smp_read_barrier_depends(); \
578   - (_________p1); \
579   - })
580   -
581   -/**
582   - * rcu_dereference_index_check() - rcu_dereference for indices with debug checking
583   - * @p: The pointer to read, prior to dereferencing
584   - * @c: The conditions under which the dereference will take place
585   - *
586   - * Similar to rcu_dereference_check(), but omits the sparse checking.
587   - * This allows rcu_dereference_index_check() to be used on integers,
588   - * which can then be used as array indices. Attempting to use
589   - * rcu_dereference_check() on an integer will give compiler warnings
590   - * because the sparse address-space mechanism relies on dereferencing
591   - * the RCU-protected pointer. Dereferencing integers is not something
592   - * that even gcc will put up with.
593   - *
594   - * Note that this function does not implicitly check for RCU read-side
595   - * critical sections. If this function gains lots of uses, it might
596   - * make sense to provide versions for each flavor of RCU, but it does
597   - * not make sense as of early 2010.
598   - */
599   -#define rcu_dereference_index_check(p, c) \
600   - __rcu_dereference_index_check((p), (c))
601 771  
602 772 #endif /* __LINUX_RCUPDATE_H */
include/linux/rcutiny.h
... ... @@ -27,103 +27,101 @@
27 27  
28 28 #include <linux/cache.h>
29 29  
30   -void rcu_sched_qs(int cpu);
31   -void rcu_bh_qs(int cpu);
32   -static inline void rcu_note_context_switch(int cpu)
  30 +#define rcu_init_sched() do { } while (0)
  31 +
  32 +#ifdef CONFIG_TINY_RCU
  33 +
  34 +static inline void synchronize_rcu_expedited(void)
33 35 {
34   - rcu_sched_qs(cpu);
  36 + synchronize_sched(); /* Only one CPU, so pretty fast anyway!!! */
35 37 }
36 38  
37   -#define __rcu_read_lock() preempt_disable()
38   -#define __rcu_read_unlock() preempt_enable()
39   -#define __rcu_read_lock_bh() local_bh_disable()
40   -#define __rcu_read_unlock_bh() local_bh_enable()
41   -#define call_rcu_sched call_rcu
42   -
43   -#define rcu_init_sched() do { } while (0)
44   -extern void rcu_check_callbacks(int cpu, int user);
45   -
46   -static inline int rcu_needs_cpu(int cpu)
  39 +static inline void rcu_barrier(void)
47 40 {
48   - return 0;
  41 + rcu_barrier_sched(); /* Only one CPU, so only one list of callbacks! */
49 42 }
50 43  
51   -/*
52   - * Return the number of grace periods.
53   - */
54   -static inline long rcu_batches_completed(void)
  44 +#else /* #ifdef CONFIG_TINY_RCU */
  45 +
  46 +void rcu_barrier(void);
  47 +void synchronize_rcu_expedited(void);
  48 +
  49 +#endif /* #else #ifdef CONFIG_TINY_RCU */
  50 +
  51 +static inline void synchronize_rcu_bh(void)
55 52 {
56   - return 0;
  53 + synchronize_sched();
57 54 }
58 55  
59   -/*
60   - * Return the number of bottom-half grace periods.
61   - */
62   -static inline long rcu_batches_completed_bh(void)
  56 +static inline void synchronize_rcu_bh_expedited(void)
63 57 {
64   - return 0;
  58 + synchronize_sched();
65 59 }
66 60  
67   -static inline void rcu_force_quiescent_state(void)
  61 +#ifdef CONFIG_TINY_RCU
  62 +
  63 +static inline void rcu_preempt_note_context_switch(void)
68 64 {
69 65 }
70 66  
71   -static inline void rcu_bh_force_quiescent_state(void)
  67 +static inline void exit_rcu(void)
72 68 {
73 69 }
74 70  
75   -static inline void rcu_sched_force_quiescent_state(void)
  71 +static inline int rcu_needs_cpu(int cpu)
76 72 {
  73 + return 0;
77 74 }
78 75  
79   -extern void synchronize_sched(void);
  76 +#else /* #ifdef CONFIG_TINY_RCU */
80 77  
81   -static inline void synchronize_rcu(void)
  78 +void rcu_preempt_note_context_switch(void);
  79 +extern void exit_rcu(void);
  80 +int rcu_preempt_needs_cpu(void);
  81 +
  82 +static inline int rcu_needs_cpu(int cpu)
82 83 {
83   - synchronize_sched();
  84 + return rcu_preempt_needs_cpu();
84 85 }
85 86  
86   -static inline void synchronize_rcu_bh(void)
  87 +#endif /* #else #ifdef CONFIG_TINY_RCU */
  88 +
  89 +static inline void rcu_note_context_switch(int cpu)
87 90 {
88   - synchronize_sched();
  91 + rcu_sched_qs(cpu);
  92 + rcu_preempt_note_context_switch();
89 93 }
90 94  
91   -static inline void synchronize_rcu_expedited(void)
  95 +/*
  96 + * Return the number of grace periods.
  97 + */
  98 +static inline long rcu_batches_completed(void)
92 99 {
93   - synchronize_sched();
  100 + return 0;
94 101 }
95 102  
96   -static inline void synchronize_rcu_bh_expedited(void)
  103 +/*
  104 + * Return the number of bottom-half grace periods.
  105 + */
  106 +static inline long rcu_batches_completed_bh(void)
97 107 {
98   - synchronize_sched();
  108 + return 0;
99 109 }
100 110  
101   -struct notifier_block;
102   -
103   -#ifdef CONFIG_NO_HZ
104   -
105   -extern void rcu_enter_nohz(void);
106   -extern void rcu_exit_nohz(void);
107   -
108   -#else /* #ifdef CONFIG_NO_HZ */
109   -
110   -static inline void rcu_enter_nohz(void)
  111 +static inline void rcu_force_quiescent_state(void)
111 112 {
112 113 }
113 114  
114   -static inline void rcu_exit_nohz(void)
  115 +static inline void rcu_bh_force_quiescent_state(void)
115 116 {
116 117 }
117 118  
118   -#endif /* #else #ifdef CONFIG_NO_HZ */
119   -
120   -static inline void exit_rcu(void)
  119 +static inline void rcu_sched_force_quiescent_state(void)
121 120 {
122 121 }
123 122  
124   -static inline int rcu_preempt_depth(void)
  123 +static inline void rcu_cpu_stall_reset(void)
125 124 {
126   - return 0;
127 125 }
128 126  
129 127 #ifdef CONFIG_DEBUG_LOCK_ALLOC
include/linux/rcutree.h
... ... @@ -30,64 +30,23 @@
30 30 #ifndef __LINUX_RCUTREE_H
31 31 #define __LINUX_RCUTREE_H
32 32  
33   -struct notifier_block;
34   -
35   -extern void rcu_sched_qs(int cpu);
36   -extern void rcu_bh_qs(int cpu);
37 33 extern void rcu_note_context_switch(int cpu);
38 34 extern int rcu_needs_cpu(int cpu);
  35 +extern void rcu_cpu_stall_reset(void);
39 36  
40 37 #ifdef CONFIG_TREE_PREEMPT_RCU
41 38  
42   -extern void __rcu_read_lock(void);
43   -extern void __rcu_read_unlock(void);
44   -extern void synchronize_rcu(void);
45 39 extern void exit_rcu(void);
46 40  
47   -/*
48   - * Defined as macro as it is a very low level header
49   - * included from areas that don't even know about current
50   - */
51   -#define rcu_preempt_depth() (current->rcu_read_lock_nesting)
52   -
53 41 #else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
54 42  
55   -static inline void __rcu_read_lock(void)
56   -{
57   - preempt_disable();
58   -}
59   -
60   -static inline void __rcu_read_unlock(void)
61   -{
62   - preempt_enable();
63   -}
64   -
65   -#define synchronize_rcu synchronize_sched
66   -
67 43 static inline void exit_rcu(void)
68 44 {
69 45 }
70 46  
71   -static inline int rcu_preempt_depth(void)
72   -{
73   - return 0;
74   -}
75   -
76 47 #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
77 48  
78   -static inline void __rcu_read_lock_bh(void)
79   -{
80   - local_bh_disable();
81   -}
82   -static inline void __rcu_read_unlock_bh(void)
83   -{
84   - local_bh_enable();
85   -}
86   -
87   -extern void call_rcu_sched(struct rcu_head *head,
88   - void (*func)(struct rcu_head *rcu));
89 49 extern void synchronize_rcu_bh(void);
90   -extern void synchronize_sched(void);
91 50 extern void synchronize_rcu_expedited(void);
92 51  
93 52 static inline void synchronize_rcu_bh_expedited(void)
... ... @@ -95,7 +54,7 @@
95 54 synchronize_sched_expedited();
96 55 }
97 56  
98   -extern void rcu_check_callbacks(int cpu, int user);
  57 +extern void rcu_barrier(void);
99 58  
100 59 extern long rcu_batches_completed(void);
101 60 extern long rcu_batches_completed_bh(void);
... ... @@ -103,18 +62,6 @@
103 62 extern void rcu_force_quiescent_state(void);
104 63 extern void rcu_bh_force_quiescent_state(void);
105 64 extern void rcu_sched_force_quiescent_state(void);
106   -
107   -#ifdef CONFIG_NO_HZ
108   -void rcu_enter_nohz(void);
109   -void rcu_exit_nohz(void);
110   -#else /* CONFIG_NO_HZ */
111   -static inline void rcu_enter_nohz(void)
112   -{
113   -}
114   -static inline void rcu_exit_nohz(void)
115   -{
116   -}
117   -#endif /* CONFIG_NO_HZ */
118 65  
119 66 /* A context switch is a grace period for RCU-sched and RCU-bh. */
120 67 static inline int rcu_blocking_is_gp(void)
include/linux/sched.h
... ... @@ -1202,11 +1202,13 @@
1202 1202 unsigned int policy;
1203 1203 cpumask_t cpus_allowed;
1204 1204  
1205   -#ifdef CONFIG_TREE_PREEMPT_RCU
  1205 +#ifdef CONFIG_PREEMPT_RCU
1206 1206 int rcu_read_lock_nesting;
1207 1207 char rcu_read_unlock_special;
1208   - struct rcu_node *rcu_blocked_node;
1209 1208 struct list_head rcu_node_entry;
  1209 +#endif /* #ifdef CONFIG_PREEMPT_RCU */
  1210 +#ifdef CONFIG_TREE_PREEMPT_RCU
  1211 + struct rcu_node *rcu_blocked_node;
1210 1212 #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
1211 1213  
1212 1214 #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
1213 1215  
... ... @@ -1288,9 +1290,9 @@
1288 1290 struct list_head cpu_timers[3];
1289 1291  
1290 1292 /* process credentials */
1291   - const struct cred *real_cred; /* objective and real subjective task
  1293 + const struct cred __rcu *real_cred; /* objective and real subjective task
1292 1294 * credentials (COW) */
1293   - const struct cred *cred; /* effective (overridable) subjective task
  1295 + const struct cred __rcu *cred; /* effective (overridable) subjective task
1294 1296 * credentials (COW) */
1295 1297 struct mutex cred_guard_mutex; /* guard against foreign influences on
1296 1298 * credential calculations
... ... @@ -1418,7 +1420,7 @@
1418 1420 #endif
1419 1421 #ifdef CONFIG_CGROUPS
1420 1422 /* Control Group info protected by css_set_lock */
1421   - struct css_set *cgroups;
  1423 + struct css_set __rcu *cgroups;
1422 1424 /* cg_list protected by css_set_lock and tsk->alloc_lock */
1423 1425 struct list_head cg_list;
1424 1426 #endif
... ... @@ -1740,7 +1742,7 @@
1740 1742 #define tsk_used_math(p) ((p)->flags & PF_USED_MATH)
1741 1743 #define used_math() tsk_used_math(current)
1742 1744  
1743   -#ifdef CONFIG_TREE_PREEMPT_RCU
  1745 +#ifdef CONFIG_PREEMPT_RCU
1744 1746  
1745 1747 #define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */
1746 1748 #define RCU_READ_UNLOCK_NEED_QS (1 << 1) /* RCU core needs CPU response. */
1747 1749  
... ... @@ -1749,7 +1751,9 @@
1749 1751 {
1750 1752 p->rcu_read_lock_nesting = 0;
1751 1753 p->rcu_read_unlock_special = 0;
  1754 +#ifdef CONFIG_TREE_PREEMPT_RCU
1752 1755 p->rcu_blocked_node = NULL;
  1756 +#endif
1753 1757 INIT_LIST_HEAD(&p->rcu_node_entry);
1754 1758 }
1755 1759  
include/linux/srcu.h
... ... @@ -108,19 +108,43 @@
108 108 #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
109 109  
110 110 /**
111   - * srcu_dereference - fetch SRCU-protected pointer with checking
  111 + * srcu_dereference_check - fetch SRCU-protected pointer for later dereferencing
  112 + * @p: the pointer to fetch and protect for later dereferencing
  113 + * @sp: pointer to the srcu_struct, which is used to check that we
  114 + * really are in an SRCU read-side critical section.
  115 + * @c: condition to check for update-side use
112 116 *
113   - * Makes rcu_dereference_check() do the dirty work.
  117 + * If PROVE_RCU is enabled, invoking this outside of an RCU read-side
  118 + * critical section will result in an RCU-lockdep splat, unless @c evaluates
  119 + * to 1. The @c argument will normally be a logical expression containing
  120 + * lockdep_is_held() calls.
114 121 */
115   -#define srcu_dereference(p, sp) \
116   - rcu_dereference_check(p, srcu_read_lock_held(sp))
  122 +#define srcu_dereference_check(p, sp, c) \
  123 + __rcu_dereference_check((p), srcu_read_lock_held(sp) || (c), __rcu)
117 124  
118 125 /**
  126 + * srcu_dereference - fetch SRCU-protected pointer for later dereferencing
  127 + * @p: the pointer to fetch and protect for later dereferencing
  128 + * @sp: pointer to the srcu_struct, which is used to check that we
  129 + * really are in an SRCU read-side critical section.
  130 + *
  131 + * Makes rcu_dereference_check() do the dirty work. If PROVE_RCU
  132 + * is enabled, invoking this outside of an RCU read-side critical
  133 + * section will result in an RCU-lockdep splat.
  134 + */
  135 +#define srcu_dereference(p, sp) srcu_dereference_check((p), (sp), 0)
  136 +
  137 +/**
119 138 * srcu_read_lock - register a new reader for an SRCU-protected structure.
120 139 * @sp: srcu_struct in which to register the new reader.
121 140 *
122 141 * Enter an SRCU read-side critical section. Note that SRCU read-side
123   - * critical sections may be nested.
  142 + * critical sections may be nested. However, it is illegal to
  143 + * call anything that waits on an SRCU grace period for the same
  144 + * srcu_struct, whether directly or indirectly. Please note that
  145 + * one way to indirectly wait on an SRCU grace period is to acquire
  146 + * a mutex that is held elsewhere while calling synchronize_srcu() or
  147 + * synchronize_srcu_expedited().
124 148 */
125 149 static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp)
126 150 {
include/linux/sunrpc/auth_gss.h
... ... @@ -69,7 +69,7 @@
69 69 enum rpc_gss_proc gc_proc;
70 70 u32 gc_seq;
71 71 spinlock_t gc_seq_lock;
72   - struct gss_ctx *gc_gss_ctx;
  72 + struct gss_ctx __rcu *gc_gss_ctx;
73 73 struct xdr_netobj gc_wire_ctx;
74 74 u32 gc_win;
75 75 unsigned long gc_expiry;
... ... @@ -80,7 +80,7 @@
80 80 struct gss_cred {
81 81 struct rpc_cred gc_base;
82 82 enum rpc_gss_svc gc_service;
83   - struct gss_cl_ctx *gc_ctx;
  83 + struct gss_cl_ctx __rcu *gc_ctx;
84 84 struct gss_upcall_msg *gc_upcall;
85 85 unsigned long gc_upcall_timestamp;
86 86 unsigned char gc_machine_cred : 1;
include/net/cls_cgroup.h
... ... @@ -45,7 +45,8 @@
45 45 return 0;
46 46  
47 47 rcu_read_lock();
48   - id = rcu_dereference(net_cls_subsys_id);
  48 + id = rcu_dereference_index_check(net_cls_subsys_id,
  49 + rcu_read_lock_held());
49 50 if (id >= 0)
50 51 classid = container_of(task_subsys_state(p, id),
51 52 struct cgroup_cls_state, css)->classid;
include/net/netfilter/nf_conntrack.h
... ... @@ -75,7 +75,7 @@
75 75 /* nf_conn feature for connections that have a helper */
76 76 struct nf_conn_help {
77 77 /* Helper. if any */
78   - struct nf_conntrack_helper *helper;
  78 + struct nf_conntrack_helper __rcu *helper;
79 79  
80 80 union nf_conntrack_help help;
81 81  
... ... @@ -340,6 +340,7 @@
340 340  
341 341 config TREE_RCU
342 342 bool "Tree-based hierarchical RCU"
  343 + depends on !PREEMPT && SMP
343 344 help
344 345 This option selects the RCU implementation that is
345 346 designed for very large SMP system with hundreds or
... ... @@ -347,7 +348,7 @@
347 348 smaller systems.
348 349  
349 350 config TREE_PREEMPT_RCU
350   - bool "Preemptable tree-based hierarchical RCU"
  351 + bool "Preemptible tree-based hierarchical RCU"
351 352 depends on PREEMPT
352 353 help
353 354 This option selects the RCU implementation that is
354 355  
... ... @@ -365,8 +366,22 @@
365 366 is not required. This option greatly reduces the
366 367 memory footprint of RCU.
367 368  
  369 +config TINY_PREEMPT_RCU
  370 + bool "Preemptible UP-only small-memory-footprint RCU"
  371 + depends on !SMP && PREEMPT
  372 + help
  373 + This option selects the RCU implementation that is designed
  374 + for real-time UP systems. This option greatly reduces the
  375 + memory footprint of RCU.
  376 +
368 377 endchoice
369 378  
  379 +config PREEMPT_RCU
  380 + def_bool ( TREE_PREEMPT_RCU || TINY_PREEMPT_RCU )
  381 + help
  382 + This option enables preemptible-RCU code that is common between
  383 + the TREE_PREEMPT_RCU and TINY_PREEMPT_RCU implementations.
  384 +
370 385 config RCU_TRACE
371 386 bool "Enable tracing for RCU"
372 387 depends on TREE_RCU || TREE_PREEMPT_RCU
... ... @@ -387,9 +402,12 @@
387 402 help
388 403 This option controls the fanout of hierarchical implementations
389 404 of RCU, allowing RCU to work efficiently on machines with
390   - large numbers of CPUs. This value must be at least the cube
391   - root of NR_CPUS, which allows NR_CPUS up to 32,768 for 32-bit
392   - systems and up to 262,144 for 64-bit systems.
  405 + large numbers of CPUs. This value must be at least the fourth
  406 + root of NR_CPUS, which allows NR_CPUS to be insanely large.
  407 + The default value of RCU_FANOUT should be used for production
  408 + systems, but if you are stress-testing the RCU implementation
  409 + itself, small RCU_FANOUT values allow you to test large-system
  410 + code paths on small(er) systems.
393 411  
394 412 Select a specific number if testing RCU itself.
395 413 Take the default if unsure.
... ... @@ -86,6 +86,7 @@
86 86 obj-$(CONFIG_TREE_PREEMPT_RCU) += rcutree.o
87 87 obj-$(CONFIG_TREE_RCU_TRACE) += rcutree_trace.o
88 88 obj-$(CONFIG_TINY_RCU) += rcutiny.o
  89 +obj-$(CONFIG_TINY_PREEMPT_RCU) += rcutiny.o
89 90 obj-$(CONFIG_RELAY) += relay.o
90 91 obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
91 92 obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
... ... @@ -138,7 +138,7 @@
138 138 * is called after synchronize_rcu(). But for safe use, css_is_removed()
139 139 * css_tryget() should be used for avoiding race.
140 140 */
141   - struct cgroup_subsys_state *css;
  141 + struct cgroup_subsys_state __rcu *css;
142 142 /*
143 143 * ID of this css.
144 144 */
... ... @@ -401,7 +401,7 @@
401 401 struct task_struct *result = NULL;
402 402 if (pid) {
403 403 struct hlist_node *first;
404   - first = rcu_dereference_check(pid->tasks[type].first,
  404 + first = rcu_dereference_check(hlist_first_rcu(&pid->tasks[type]),
405 405 rcu_read_lock_held() ||
406 406 lockdep_tasklist_lock_is_held());
407 407 if (first)
... ... @@ -416,6 +416,7 @@
416 416 */
417 417 struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns)
418 418 {
  419 + rcu_lockdep_assert(rcu_read_lock_held());
419 420 return pid_task(find_pid_ns(nr, ns), PIDTYPE_PID);
420 421 }
421 422  
... ... @@ -73,12 +73,14 @@
73 73 EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled);
74 74  
75 75 /**
76   - * rcu_read_lock_bh_held - might we be in RCU-bh read-side critical section?
  76 + * rcu_read_lock_bh_held() - might we be in RCU-bh read-side critical section?
77 77 *
78 78 * Check for bottom half being disabled, which covers both the
79 79 * CONFIG_PROVE_RCU and not cases. Note that if someone uses
80 80 * rcu_read_lock_bh(), but then later enables BH, lockdep (if enabled)
81   - * will show the situation.
  81 + * will show the situation. This is useful for debug checks in functions
  82 + * that require that they be called within an RCU read-side critical
  83 + * section.
82 84 *
83 85 * Check debug_lockdep_rcu_enabled() to prevent false positives during boot.
84 86 */
... ... @@ -59,6 +59,14 @@
59 59 EXPORT_SYMBOL_GPL(rcu_scheduler_active);
60 60 #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
61 61  
  62 +/* Forward declarations for rcutiny_plugin.h. */
  63 +static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp);
  64 +static void __call_rcu(struct rcu_head *head,
  65 + void (*func)(struct rcu_head *rcu),
  66 + struct rcu_ctrlblk *rcp);
  67 +
  68 +#include "rcutiny_plugin.h"
  69 +
62 70 #ifdef CONFIG_NO_HZ
63 71  
64 72 static long rcu_dynticks_nesting = 1;
... ... @@ -140,6 +148,7 @@
140 148 rcu_sched_qs(cpu);
141 149 else if (!in_softirq())
142 150 rcu_bh_qs(cpu);
  151 + rcu_preempt_check_callbacks();
143 152 }
144 153  
145 154 /*
... ... @@ -162,6 +171,7 @@
162 171 *rcp->donetail = NULL;
163 172 if (rcp->curtail == rcp->donetail)
164 173 rcp->curtail = &rcp->rcucblist;
  174 + rcu_preempt_remove_callbacks(rcp);
165 175 rcp->donetail = &rcp->rcucblist;
166 176 local_irq_restore(flags);
167 177  
... ... @@ -182,6 +192,7 @@
182 192 {
183 193 __rcu_process_callbacks(&rcu_sched_ctrlblk);
184 194 __rcu_process_callbacks(&rcu_bh_ctrlblk);
  195 + rcu_preempt_process_callbacks();
185 196 }
186 197  
187 198 /*
188 199  
189 200  
... ... @@ -223,15 +234,15 @@
223 234 }
224 235  
225 236 /*
226   - * Post an RCU callback to be invoked after the end of an RCU grace
  237 + * Post an RCU callback to be invoked after the end of an RCU-sched grace
227 238 * period. But since we have but one CPU, that would be after any
228 239 * quiescent state.
229 240 */
230   -void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
  241 +void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
231 242 {
232 243 __call_rcu(head, func, &rcu_sched_ctrlblk);
233 244 }
234   -EXPORT_SYMBOL_GPL(call_rcu);
  245 +EXPORT_SYMBOL_GPL(call_rcu_sched);
235 246  
236 247 /*
237 248 * Post an RCU bottom-half callback to be invoked after any subsequent
... ... @@ -243,20 +254,6 @@
243 254 }
244 255 EXPORT_SYMBOL_GPL(call_rcu_bh);
245 256  
246   -void rcu_barrier(void)
247   -{
248   - struct rcu_synchronize rcu;
249   -
250   - init_rcu_head_on_stack(&rcu.head);
251   - init_completion(&rcu.completion);
252   - /* Will wake me after RCU finished. */
253   - call_rcu(&rcu.head, wakeme_after_rcu);
254   - /* Wait for it. */
255   - wait_for_completion(&rcu.completion);
256   - destroy_rcu_head_on_stack(&rcu.head);
257   -}
258   -EXPORT_SYMBOL_GPL(rcu_barrier);
259   -
260 257 void rcu_barrier_bh(void)
261 258 {
262 259 struct rcu_synchronize rcu;
... ... @@ -289,6 +286,4 @@
289 286 {
290 287 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
291 288 }
292   -
293   -#include "rcutiny_plugin.h"
kernel/rcutiny_plugin.h
1 1 /*
2   - * Read-Copy Update mechanism for mutual exclusion (tree-based version)
  2 + * Read-Copy Update mechanism for mutual exclusion, the Bloatwatch edition
3 3 * Internal non-public definitions that provide either classic
4   - * or preemptable semantics.
  4 + * or preemptible semantics.
5 5 *
6 6 * This program is free software; you can redistribute it and/or modify
7 7 * it under the terms of the GNU General Public License as published by
8 8  
... ... @@ -17,10 +17,582 @@
17 17 * along with this program; if not, write to the Free Software
18 18 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 19 *
20   - * Copyright IBM Corporation, 2009
  20 + * Copyright (c) 2010 Linaro
21 21 *
22 22 * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
23 23 */
  24 +
  25 +#ifdef CONFIG_TINY_PREEMPT_RCU
  26 +
  27 +#include <linux/delay.h>
  28 +
  29 +/* Global control variables for preemptible RCU. */
  30 +struct rcu_preempt_ctrlblk {
  31 + struct rcu_ctrlblk rcb; /* curtail: ->next ptr of last CB for GP. */
  32 + struct rcu_head **nexttail;
  33 + /* Tasks blocked in a preemptible RCU */
  34 + /* read-side critical section while an */
  35 + /* preemptible-RCU grace period is in */
  36 + /* progress must wait for a later grace */
  37 + /* period. This pointer points to the */
  38 + /* ->next pointer of the last task that */
  39 + /* must wait for a later grace period, or */
  40 + /* to &->rcb.rcucblist if there is no */
  41 + /* such task. */
  42 + struct list_head blkd_tasks;
  43 + /* Tasks blocked in RCU read-side critical */
  44 + /* section. Tasks are placed at the head */
  45 + /* of this list and age towards the tail. */
  46 + struct list_head *gp_tasks;
  47 + /* Pointer to the first task blocking the */
  48 + /* current grace period, or NULL if there */
  49 + /* is not such task. */
  50 + struct list_head *exp_tasks;
  51 + /* Pointer to first task blocking the */
  52 + /* current expedited grace period, or NULL */
  53 + /* if there is no such task. If there */
  54 + /* is no current expedited grace period, */
  55 + /* then there cannot be any such task. */
  56 + u8 gpnum; /* Current grace period. */
  57 + u8 gpcpu; /* Last grace period blocked by the CPU. */
  58 + u8 completed; /* Last grace period completed. */
  59 + /* If all three are equal, RCU is idle. */
  60 +};
  61 +
  62 +static struct rcu_preempt_ctrlblk rcu_preempt_ctrlblk = {
  63 + .rcb.donetail = &rcu_preempt_ctrlblk.rcb.rcucblist,
  64 + .rcb.curtail = &rcu_preempt_ctrlblk.rcb.rcucblist,
  65 + .nexttail = &rcu_preempt_ctrlblk.rcb.rcucblist,
  66 + .blkd_tasks = LIST_HEAD_INIT(rcu_preempt_ctrlblk.blkd_tasks),
  67 +};
  68 +
  69 +static int rcu_preempted_readers_exp(void);
  70 +static void rcu_report_exp_done(void);
  71 +
  72 +/*
  73 + * Return true if the CPU has not yet responded to the current grace period.
  74 + */
  75 +static int rcu_cpu_cur_gp(void)
  76 +{
  77 + return rcu_preempt_ctrlblk.gpcpu != rcu_preempt_ctrlblk.gpnum;
  78 +}
  79 +
  80 +/*
  81 + * Check for a running RCU reader. Because there is only one CPU,
  82 + * there can be but one running RCU reader at a time. ;-)
  83 + */
  84 +static int rcu_preempt_running_reader(void)
  85 +{
  86 + return current->rcu_read_lock_nesting;
  87 +}
  88 +
  89 +/*
  90 + * Check for preempted RCU readers blocking any grace period.
  91 + * If the caller needs a reliable answer, it must disable hard irqs.
  92 + */
  93 +static int rcu_preempt_blocked_readers_any(void)
  94 +{
  95 + return !list_empty(&rcu_preempt_ctrlblk.blkd_tasks);
  96 +}
  97 +
  98 +/*
  99 + * Check for preempted RCU readers blocking the current grace period.
  100 + * If the caller needs a reliable answer, it must disable hard irqs.
  101 + */
  102 +static int rcu_preempt_blocked_readers_cgp(void)
  103 +{
  104 + return rcu_preempt_ctrlblk.gp_tasks != NULL;
  105 +}
  106 +
  107 +/*
  108 + * Return true if another preemptible-RCU grace period is needed.
  109 + */
  110 +static int rcu_preempt_needs_another_gp(void)
  111 +{
  112 + return *rcu_preempt_ctrlblk.rcb.curtail != NULL;
  113 +}
  114 +
  115 +/*
  116 + * Return true if a preemptible-RCU grace period is in progress.
  117 + * The caller must disable hardirqs.
  118 + */
  119 +static int rcu_preempt_gp_in_progress(void)
  120 +{
  121 + return rcu_preempt_ctrlblk.completed != rcu_preempt_ctrlblk.gpnum;
  122 +}
  123 +
  124 +/*
  125 + * Record a preemptible-RCU quiescent state for the specified CPU. Note
  126 + * that this just means that the task currently running on the CPU is
  127 + * in a quiescent state. There might be any number of tasks blocked
  128 + * while in an RCU read-side critical section.
  129 + *
  130 + * Unlike the other rcu_*_qs() functions, callers to this function
  131 + * must disable irqs in order to protect the assignment to
  132 + * ->rcu_read_unlock_special.
  133 + *
  134 + * Because this is a single-CPU implementation, the only way a grace
  135 + * period can end is if the CPU is in a quiescent state. The reason is
  136 + * that a blocked preemptible-RCU reader can exit its critical section
  137 + * only if the CPU is running it at the time. Therefore, when the
  138 + * last task blocking the current grace period exits its RCU read-side
  139 + * critical section, neither the CPU nor blocked tasks will be stopping
  140 + * the current grace period. (In contrast, SMP implementations
  141 + * might have CPUs running in RCU read-side critical sections that
  142 + * block later grace periods -- but this is not possible given only
  143 + * one CPU.)
  144 + */
  145 +static void rcu_preempt_cpu_qs(void)
  146 +{
  147 + /* Record both CPU and task as having responded to current GP. */
  148 + rcu_preempt_ctrlblk.gpcpu = rcu_preempt_ctrlblk.gpnum;
  149 + current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
  150 +
  151 + /*
  152 + * If there is no GP, or if blocked readers are still blocking GP,
  153 + * then there is nothing more to do.
  154 + */
  155 + if (!rcu_preempt_gp_in_progress() || rcu_preempt_blocked_readers_cgp())
  156 + return;
  157 +
  158 + /* Advance callbacks. */
  159 + rcu_preempt_ctrlblk.completed = rcu_preempt_ctrlblk.gpnum;
  160 + rcu_preempt_ctrlblk.rcb.donetail = rcu_preempt_ctrlblk.rcb.curtail;
  161 + rcu_preempt_ctrlblk.rcb.curtail = rcu_preempt_ctrlblk.nexttail;
  162 +
  163 + /* If there are no blocked readers, next GP is done instantly. */
  164 + if (!rcu_preempt_blocked_readers_any())
  165 + rcu_preempt_ctrlblk.rcb.donetail = rcu_preempt_ctrlblk.nexttail;
  166 +
  167 + /* If there are done callbacks, make RCU_SOFTIRQ process them. */
  168 + if (*rcu_preempt_ctrlblk.rcb.donetail != NULL)
  169 + raise_softirq(RCU_SOFTIRQ);
  170 +}
  171 +
  172 +/*
  173 + * Start a new RCU grace period if warranted. Hard irqs must be disabled.
  174 + */
  175 +static void rcu_preempt_start_gp(void)
  176 +{
  177 + if (!rcu_preempt_gp_in_progress() && rcu_preempt_needs_another_gp()) {
  178 +
  179 + /* Official start of GP. */
  180 + rcu_preempt_ctrlblk.gpnum++;
  181 +
  182 + /* Any blocked RCU readers block new GP. */
  183 + if (rcu_preempt_blocked_readers_any())
  184 + rcu_preempt_ctrlblk.gp_tasks =
  185 + rcu_preempt_ctrlblk.blkd_tasks.next;
  186 +
  187 + /* If there is no running reader, CPU is done with GP. */
  188 + if (!rcu_preempt_running_reader())
  189 + rcu_preempt_cpu_qs();
  190 + }
  191 +}
  192 +
  193 +/*
  194 + * We have entered the scheduler, and the current task might soon be
  195 + * context-switched away from. If this task is in an RCU read-side
  196 + * critical section, we will no longer be able to rely on the CPU to
  197 + * record that fact, so we enqueue the task on the blkd_tasks list.
  198 + * If the task started after the current grace period began, as recorded
  199 + * by ->gpcpu, we enqueue at the beginning of the list. Otherwise
  200 + * before the element referenced by ->gp_tasks (or at the tail if
  201 + * ->gp_tasks is NULL) and point ->gp_tasks at the newly added element.
  202 + * The task will dequeue itself when it exits the outermost enclosing
  203 + * RCU read-side critical section. Therefore, the current grace period
  204 + * cannot be permitted to complete until the ->gp_tasks pointer becomes
  205 + * NULL.
  206 + *
  207 + * Caller must disable preemption.
  208 + */
  209 +void rcu_preempt_note_context_switch(void)
  210 +{
  211 + struct task_struct *t = current;
  212 + unsigned long flags;
  213 +
  214 + local_irq_save(flags); /* must exclude scheduler_tick(). */
  215 + if (rcu_preempt_running_reader() &&
  216 + (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) {
  217 +
  218 + /* Possibly blocking in an RCU read-side critical section. */
  219 + t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED;
  220 +
  221 + /*
  222 + * If this CPU has already checked in, then this task
  223 + * will hold up the next grace period rather than the
  224 + * current grace period. Queue the task accordingly.
  225 + * If the task is queued for the current grace period
  226 + * (i.e., this CPU has not yet passed through a quiescent
  227 + * state for the current grace period), then as long
  228 + * as that task remains queued, the current grace period
  229 + * cannot end.
  230 + */
  231 + list_add(&t->rcu_node_entry, &rcu_preempt_ctrlblk.blkd_tasks);
  232 + if (rcu_cpu_cur_gp())
  233 + rcu_preempt_ctrlblk.gp_tasks = &t->rcu_node_entry;
  234 + }
  235 +
  236 + /*
  237 + * Either we were not in an RCU read-side critical section to
  238 + * begin with, or we have now recorded that critical section
  239 + * globally. Either way, we can now note a quiescent state
  240 + * for this CPU. Again, if we were in an RCU read-side critical
  241 + * section, and if that critical section was blocking the current
  242 + * grace period, then the fact that the task has been enqueued
  243 + * means that current grace period continues to be blocked.
  244 + */
  245 + rcu_preempt_cpu_qs();
  246 + local_irq_restore(flags);
  247 +}
  248 +
  249 +/*
  250 + * Tiny-preemptible RCU implementation for rcu_read_lock().
  251 + * Just increment ->rcu_read_lock_nesting, shared state will be updated
  252 + * if we block.
  253 + */
  254 +void __rcu_read_lock(void)
  255 +{
  256 + current->rcu_read_lock_nesting++;
  257 + barrier(); /* needed if we ever invoke rcu_read_lock in rcutiny.c */
  258 +}
  259 +EXPORT_SYMBOL_GPL(__rcu_read_lock);
  260 +
  261 +/*
  262 + * Handle special cases during rcu_read_unlock(), such as needing to
  263 + * notify RCU core processing or task having blocked during the RCU
  264 + * read-side critical section.
  265 + */
  266 +static void rcu_read_unlock_special(struct task_struct *t)
  267 +{
  268 + int empty;
  269 + int empty_exp;
  270 + unsigned long flags;
  271 + struct list_head *np;
  272 + int special;
  273 +
  274 + /*
  275 + * NMI handlers cannot block and cannot safely manipulate state.
  276 + * They therefore cannot possibly be special, so just leave.
  277 + */
  278 + if (in_nmi())
  279 + return;
  280 +
  281 + local_irq_save(flags);
  282 +
  283 + /*
  284 + * If RCU core is waiting for this CPU to exit critical section,
  285 + * let it know that we have done so.
  286 + */
  287 + special = t->rcu_read_unlock_special;
  288 + if (special & RCU_READ_UNLOCK_NEED_QS)
  289 + rcu_preempt_cpu_qs();
  290 +
  291 + /* Hardware IRQ handlers cannot block. */
  292 + if (in_irq()) {
  293 + local_irq_restore(flags);
  294 + return;
  295 + }
  296 +
  297 + /* Clean up if blocked during RCU read-side critical section. */
  298 + if (special & RCU_READ_UNLOCK_BLOCKED) {
  299 + t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BLOCKED;
  300 +
  301 + /*
  302 + * Remove this task from the ->blkd_tasks list and adjust
  303 + * any pointers that might have been referencing it.
  304 + */
  305 + empty = !rcu_preempt_blocked_readers_cgp();
  306 + empty_exp = rcu_preempt_ctrlblk.exp_tasks == NULL;
  307 + np = t->rcu_node_entry.next;
  308 + if (np == &rcu_preempt_ctrlblk.blkd_tasks)
  309 + np = NULL;
  310 + list_del(&t->rcu_node_entry);
  311 + if (&t->rcu_node_entry == rcu_preempt_ctrlblk.gp_tasks)
  312 + rcu_preempt_ctrlblk.gp_tasks = np;
  313 + if (&t->rcu_node_entry == rcu_preempt_ctrlblk.exp_tasks)
  314 + rcu_preempt_ctrlblk.exp_tasks = np;
  315 + INIT_LIST_HEAD(&t->rcu_node_entry);
  316 +
  317 + /*
  318 + * If this was the last task on the current list, and if
  319 + * we aren't waiting on the CPU, report the quiescent state
  320 + * and start a new grace period if needed.
  321 + */
  322 + if (!empty && !rcu_preempt_blocked_readers_cgp()) {
  323 + rcu_preempt_cpu_qs();
  324 + rcu_preempt_start_gp();
  325 + }
  326 +
  327 + /*
  328 + * If this was the last task on the expedited lists,
  329 + * then we need wake up the waiting task.
  330 + */
  331 + if (!empty_exp && rcu_preempt_ctrlblk.exp_tasks == NULL)
  332 + rcu_report_exp_done();
  333 + }
  334 + local_irq_restore(flags);
  335 +}
  336 +
  337 +/*
  338 + * Tiny-preemptible RCU implementation for rcu_read_unlock().
  339 + * Decrement ->rcu_read_lock_nesting. If the result is zero (outermost
  340 + * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then
  341 + * invoke rcu_read_unlock_special() to clean up after a context switch
  342 + * in an RCU read-side critical section and other special cases.
  343 + */
  344 +void __rcu_read_unlock(void)
  345 +{
  346 + struct task_struct *t = current;
  347 +
  348 + barrier(); /* needed if we ever invoke rcu_read_unlock in rcutiny.c */
  349 + --t->rcu_read_lock_nesting;
  350 + barrier(); /* decrement before load of ->rcu_read_unlock_special */
  351 + if (t->rcu_read_lock_nesting == 0 &&
  352 + unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
  353 + rcu_read_unlock_special(t);
  354 +#ifdef CONFIG_PROVE_LOCKING
  355 + WARN_ON_ONCE(t->rcu_read_lock_nesting < 0);
  356 +#endif /* #ifdef CONFIG_PROVE_LOCKING */
  357 +}
  358 +EXPORT_SYMBOL_GPL(__rcu_read_unlock);
  359 +
  360 +/*
  361 + * Check for a quiescent state from the current CPU. When a task blocks,
  362 + * the task is recorded in the rcu_preempt_ctrlblk structure, which is
  363 + * checked elsewhere. This is called from the scheduling-clock interrupt.
  364 + *
  365 + * Caller must disable hard irqs.
  366 + */
  367 +static void rcu_preempt_check_callbacks(void)
  368 +{
  369 + struct task_struct *t = current;
  370 +
  371 + if (!rcu_preempt_running_reader() && rcu_preempt_gp_in_progress())
  372 + rcu_preempt_cpu_qs();
  373 + if (&rcu_preempt_ctrlblk.rcb.rcucblist !=
  374 + rcu_preempt_ctrlblk.rcb.donetail)
  375 + raise_softirq(RCU_SOFTIRQ);
  376 + if (rcu_preempt_gp_in_progress() && rcu_preempt_running_reader())
  377 + t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS;
  378 +}
  379 +
  380 +/*
  381 + * TINY_PREEMPT_RCU has an extra callback-list tail pointer to
  382 + * update, so this is invoked from __rcu_process_callbacks() to
  383 + * handle that case. Of course, it is invoked for all flavors of
  384 + * RCU, but RCU callbacks can appear only on one of the lists, and
  385 + * neither ->nexttail nor ->donetail can possibly be NULL, so there
  386 + * is no need for an explicit check.
  387 + */
  388 +static void rcu_preempt_remove_callbacks(struct rcu_ctrlblk *rcp)
  389 +{
  390 + if (rcu_preempt_ctrlblk.nexttail == rcp->donetail)
  391 + rcu_preempt_ctrlblk.nexttail = &rcp->rcucblist;
  392 +}
  393 +
  394 +/*
  395 + * Process callbacks for preemptible RCU.
  396 + */
  397 +static void rcu_preempt_process_callbacks(void)
  398 +{
  399 + __rcu_process_callbacks(&rcu_preempt_ctrlblk.rcb);
  400 +}
  401 +
  402 +/*
  403 + * Queue a preemptible -RCU callback for invocation after a grace period.
  404 + */
  405 +void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
  406 +{
  407 + unsigned long flags;
  408 +
  409 + debug_rcu_head_queue(head);
  410 + head->func = func;
  411 + head->next = NULL;
  412 +
  413 + local_irq_save(flags);
  414 + *rcu_preempt_ctrlblk.nexttail = head;
  415 + rcu_preempt_ctrlblk.nexttail = &head->next;
  416 + rcu_preempt_start_gp(); /* checks to see if GP needed. */
  417 + local_irq_restore(flags);
  418 +}
  419 +EXPORT_SYMBOL_GPL(call_rcu);
  420 +
  421 +void rcu_barrier(void)
  422 +{
  423 + struct rcu_synchronize rcu;
  424 +
  425 + init_rcu_head_on_stack(&rcu.head);
  426 + init_completion(&rcu.completion);
  427 + /* Will wake me after RCU finished. */
  428 + call_rcu(&rcu.head, wakeme_after_rcu);
  429 + /* Wait for it. */
  430 + wait_for_completion(&rcu.completion);
  431 + destroy_rcu_head_on_stack(&rcu.head);
  432 +}
  433 +EXPORT_SYMBOL_GPL(rcu_barrier);
  434 +
  435 +/*
  436 + * synchronize_rcu - wait until a grace period has elapsed.
  437 + *
  438 + * Control will return to the caller some time after a full grace
  439 + * period has elapsed, in other words after all currently executing RCU
  440 + * read-side critical sections have completed. RCU read-side critical
  441 + * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
  442 + * and may be nested.
  443 + */
  444 +void synchronize_rcu(void)
  445 +{
  446 +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  447 + if (!rcu_scheduler_active)
  448 + return;
  449 +#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
  450 +
  451 + WARN_ON_ONCE(rcu_preempt_running_reader());
  452 + if (!rcu_preempt_blocked_readers_any())
  453 + return;
  454 +
  455 + /* Once we get past the fastpath checks, same code as rcu_barrier(). */
  456 + rcu_barrier();
  457 +}
  458 +EXPORT_SYMBOL_GPL(synchronize_rcu);
  459 +
  460 +static DECLARE_WAIT_QUEUE_HEAD(sync_rcu_preempt_exp_wq);
  461 +static unsigned long sync_rcu_preempt_exp_count;
  462 +static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex);
  463 +
  464 +/*
  465 + * Return non-zero if there are any tasks in RCU read-side critical
  466 + * sections blocking the current preemptible-RCU expedited grace period.
  467 + * If there is no preemptible-RCU expedited grace period currently in
  468 + * progress, returns zero unconditionally.
  469 + */
  470 +static int rcu_preempted_readers_exp(void)
  471 +{
  472 + return rcu_preempt_ctrlblk.exp_tasks != NULL;
  473 +}
  474 +
  475 +/*
  476 + * Report the exit from RCU read-side critical section for the last task
  477 + * that queued itself during or before the current expedited preemptible-RCU
  478 + * grace period.
  479 + */
  480 +static void rcu_report_exp_done(void)
  481 +{
  482 + wake_up(&sync_rcu_preempt_exp_wq);
  483 +}
  484 +
  485 +/*
  486 + * Wait for an rcu-preempt grace period, but expedite it. The basic idea
  487 + * is to rely in the fact that there is but one CPU, and that it is
  488 + * illegal for a task to invoke synchronize_rcu_expedited() while in a
  489 + * preemptible-RCU read-side critical section. Therefore, any such
  490 + * critical sections must correspond to blocked tasks, which must therefore
  491 + * be on the ->blkd_tasks list. So just record the current head of the
  492 + * list in the ->exp_tasks pointer, and wait for all tasks including and
  493 + * after the task pointed to by ->exp_tasks to drain.
  494 + */
  495 +void synchronize_rcu_expedited(void)
  496 +{
  497 + unsigned long flags;
  498 + struct rcu_preempt_ctrlblk *rpcp = &rcu_preempt_ctrlblk;
  499 + unsigned long snap;
  500 +
  501 + barrier(); /* ensure prior action seen before grace period. */
  502 +
  503 + WARN_ON_ONCE(rcu_preempt_running_reader());
  504 +
  505 + /*
  506 + * Acquire lock so that there is only one preemptible RCU grace
  507 + * period in flight. Of course, if someone does the expedited
  508 + * grace period for us while we are acquiring the lock, just leave.
  509 + */
  510 + snap = sync_rcu_preempt_exp_count + 1;
  511 + mutex_lock(&sync_rcu_preempt_exp_mutex);
  512 + if (ULONG_CMP_LT(snap, sync_rcu_preempt_exp_count))
  513 + goto unlock_mb_ret; /* Others did our work for us. */
  514 +
  515 + local_irq_save(flags);
  516 +
  517 + /*
  518 + * All RCU readers have to already be on blkd_tasks because
  519 + * we cannot legally be executing in an RCU read-side critical
  520 + * section.
  521 + */
  522 +
  523 + /* Snapshot current head of ->blkd_tasks list. */
  524 + rpcp->exp_tasks = rpcp->blkd_tasks.next;
  525 + if (rpcp->exp_tasks == &rpcp->blkd_tasks)
  526 + rpcp->exp_tasks = NULL;
  527 + local_irq_restore(flags);
  528 +
  529 + /* Wait for tail of ->blkd_tasks list to drain. */
  530 + if (rcu_preempted_readers_exp())
  531 + wait_event(sync_rcu_preempt_exp_wq,
  532 + !rcu_preempted_readers_exp());
  533 +
  534 + /* Clean up and exit. */
  535 + barrier(); /* ensure expedited GP seen before counter increment. */
  536 + sync_rcu_preempt_exp_count++;
  537 +unlock_mb_ret:
  538 + mutex_unlock(&sync_rcu_preempt_exp_mutex);
  539 + barrier(); /* ensure subsequent action seen after grace period. */
  540 +}
  541 +EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
  542 +
  543 +/*
  544 + * Does preemptible RCU need the CPU to stay out of dynticks mode?
  545 + */
  546 +int rcu_preempt_needs_cpu(void)
  547 +{
  548 + if (!rcu_preempt_running_reader())
  549 + rcu_preempt_cpu_qs();
  550 + return rcu_preempt_ctrlblk.rcb.rcucblist != NULL;
  551 +}
  552 +
  553 +/*
  554 + * Check for a task exiting while in a preemptible -RCU read-side
  555 + * critical section, clean up if so. No need to issue warnings,
  556 + * as debug_check_no_locks_held() already does this if lockdep
  557 + * is enabled.
  558 + */
  559 +void exit_rcu(void)
  560 +{
  561 + struct task_struct *t = current;
  562 +
  563 + if (t->rcu_read_lock_nesting == 0)
  564 + return;
  565 + t->rcu_read_lock_nesting = 1;
  566 + rcu_read_unlock();
  567 +}
  568 +
  569 +#else /* #ifdef CONFIG_TINY_PREEMPT_RCU */
  570 +
  571 +/*
  572 + * Because preemptible RCU does not exist, it never has any callbacks
  573 + * to check.
  574 + */
  575 +static void rcu_preempt_check_callbacks(void)
  576 +{
  577 +}
  578 +
  579 +/*
  580 + * Because preemptible RCU does not exist, it never has any callbacks
  581 + * to remove.
  582 + */
  583 +static void rcu_preempt_remove_callbacks(struct rcu_ctrlblk *rcp)
  584 +{
  585 +}
  586 +
  587 +/*
  588 + * Because preemptible RCU does not exist, it never has any callbacks
  589 + * to process.
  590 + */
  591 +static void rcu_preempt_process_callbacks(void)
  592 +{
  593 +}
  594 +
  595 +#endif /* #else #ifdef CONFIG_TINY_PREEMPT_RCU */
24 596  
25 597 #ifdef CONFIG_DEBUG_LOCK_ALLOC
26 598  
... ... @@ -303,6 +303,10 @@
303 303 mdelay(longdelay_ms);
304 304 if (!(rcu_random(rrsp) % (nrealreaders * 2 * shortdelay_us)))
305 305 udelay(shortdelay_us);
  306 +#ifdef CONFIG_PREEMPT
  307 + if (!preempt_count() && !(rcu_random(rrsp) % (nrealreaders * 20000)))
  308 + preempt_schedule(); /* No QS if preempt_disable() in effect */
  309 +#endif
306 310 }
307 311  
308 312 static void rcu_torture_read_unlock(int idx) __releases(RCU)
... ... @@ -536,6 +540,8 @@
536 540 delay = rcu_random(rrsp) % (nrealreaders * 2 * longdelay * uspertick);
537 541 if (!delay)
538 542 schedule_timeout_interruptible(longdelay);
  543 + else
  544 + rcu_read_delay(rrsp);
539 545 }
540 546  
541 547 static void srcu_torture_read_unlock(int idx) __releases(&srcu_ctl)
... ... @@ -143,6 +143,11 @@
143 143 module_param(qhimark, int, 0);
144 144 module_param(qlowmark, int, 0);
145 145  
  146 +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
  147 +int rcu_cpu_stall_suppress __read_mostly = RCU_CPU_STALL_SUPPRESS_INIT;
  148 +module_param(rcu_cpu_stall_suppress, int, 0644);
  149 +#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
  150 +
146 151 static void force_quiescent_state(struct rcu_state *rsp, int relaxed);
147 152 static int rcu_pending(int cpu);
148 153  
... ... @@ -450,7 +455,7 @@
450 455  
451 456 #ifdef CONFIG_RCU_CPU_STALL_DETECTOR
452 457  
453   -int rcu_cpu_stall_panicking __read_mostly;
  458 +int rcu_cpu_stall_suppress __read_mostly;
454 459  
455 460 static void record_gp_stall_check_time(struct rcu_state *rsp)
456 461 {
... ... @@ -482,8 +487,11 @@
482 487 rcu_print_task_stall(rnp);
483 488 raw_spin_unlock_irqrestore(&rnp->lock, flags);
484 489  
485   - /* OK, time to rat on our buddy... */
486   -
  490 + /*
  491 + * OK, time to rat on our buddy...
  492 + * See Documentation/RCU/stallwarn.txt for info on how to debug
  493 + * RCU CPU stall warnings.
  494 + */
487 495 printk(KERN_ERR "INFO: %s detected stalls on CPUs/tasks: {",
488 496 rsp->name);
489 497 rcu_for_each_leaf_node(rsp, rnp) {
... ... @@ -512,6 +520,11 @@
512 520 unsigned long flags;
513 521 struct rcu_node *rnp = rcu_get_root(rsp);
514 522  
  523 + /*
  524 + * OK, time to rat on ourselves...
  525 + * See Documentation/RCU/stallwarn.txt for info on how to debug
  526 + * RCU CPU stall warnings.
  527 + */
515 528 printk(KERN_ERR "INFO: %s detected stall on CPU %d (t=%lu jiffies)\n",
516 529 rsp->name, smp_processor_id(), jiffies - rsp->gp_start);
517 530 trigger_all_cpu_backtrace();
... ... @@ -530,7 +543,7 @@
530 543 long delta;
531 544 struct rcu_node *rnp;
532 545  
533   - if (rcu_cpu_stall_panicking)
  546 + if (rcu_cpu_stall_suppress)
534 547 return;
535 548 delta = jiffies - rsp->jiffies_stall;
536 549 rnp = rdp->mynode;
537 550  
... ... @@ -548,10 +561,26 @@
548 561  
549 562 static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr)
550 563 {
551   - rcu_cpu_stall_panicking = 1;
  564 + rcu_cpu_stall_suppress = 1;
552 565 return NOTIFY_DONE;
553 566 }
554 567  
  568 +/**
  569 + * rcu_cpu_stall_reset - prevent further stall warnings in current grace period
  570 + *
  571 + * Set the stall-warning timeout way off into the future, thus preventing
  572 + * any RCU CPU stall-warning messages from appearing in the current set of
  573 + * RCU grace periods.
  574 + *
  575 + * The caller must disable hard irqs.
  576 + */
  577 +void rcu_cpu_stall_reset(void)
  578 +{
  579 + rcu_sched_state.jiffies_stall = jiffies + ULONG_MAX / 2;
  580 + rcu_bh_state.jiffies_stall = jiffies + ULONG_MAX / 2;
  581 + rcu_preempt_stall_reset();
  582 +}
  583 +
555 584 static struct notifier_block rcu_panic_block = {
556 585 .notifier_call = rcu_panic,
557 586 };
... ... @@ -571,6 +600,10 @@
571 600 {
572 601 }
573 602  
  603 +void rcu_cpu_stall_reset(void)
  604 +{
  605 +}
  606 +
574 607 static void __init check_cpu_stall_init(void)
575 608 {
576 609 }
... ... @@ -712,7 +745,7 @@
712 745 rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
713 746 __releases(rcu_get_root(rsp)->lock)
714 747 {
715   - struct rcu_data *rdp = rsp->rda[smp_processor_id()];
  748 + struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
716 749 struct rcu_node *rnp = rcu_get_root(rsp);
717 750  
718 751 if (!cpu_needs_another_gp(rsp, rdp) || rsp->fqs_active) {
... ... @@ -960,7 +993,7 @@
960 993 static void rcu_send_cbs_to_orphanage(struct rcu_state *rsp)
961 994 {
962 995 int i;
963   - struct rcu_data *rdp = rsp->rda[smp_processor_id()];
  996 + struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
964 997  
965 998 if (rdp->nxtlist == NULL)
966 999 return; /* irqs disabled, so comparison is stable. */
... ... @@ -984,7 +1017,7 @@
984 1017 struct rcu_data *rdp;
985 1018  
986 1019 raw_spin_lock_irqsave(&rsp->onofflock, flags);
987   - rdp = rsp->rda[smp_processor_id()];
  1020 + rdp = this_cpu_ptr(rsp->rda);
988 1021 if (rsp->orphan_cbs_list == NULL) {
989 1022 raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
990 1023 return;
... ... @@ -1007,7 +1040,7 @@
1007 1040 unsigned long flags;
1008 1041 unsigned long mask;
1009 1042 int need_report = 0;
1010   - struct rcu_data *rdp = rsp->rda[cpu];
  1043 + struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
1011 1044 struct rcu_node *rnp;
1012 1045  
1013 1046 /* Exclude any attempts to start a new grace period. */
... ... @@ -1226,7 +1259,8 @@
1226 1259 cpu = rnp->grplo;
1227 1260 bit = 1;
1228 1261 for (; cpu <= rnp->grphi; cpu++, bit <<= 1) {
1229   - if ((rnp->qsmask & bit) != 0 && f(rsp->rda[cpu]))
  1262 + if ((rnp->qsmask & bit) != 0 &&
  1263 + f(per_cpu_ptr(rsp->rda, cpu)))
1230 1264 mask |= bit;
1231 1265 }
1232 1266 if (mask != 0) {
... ... @@ -1402,7 +1436,7 @@
1402 1436 * a quiescent state betweentimes.
1403 1437 */
1404 1438 local_irq_save(flags);
1405   - rdp = rsp->rda[smp_processor_id()];
  1439 + rdp = this_cpu_ptr(rsp->rda);
1406 1440 rcu_process_gp_end(rsp, rdp);
1407 1441 check_for_new_grace_period(rsp, rdp);
1408 1442  
... ... @@ -1701,7 +1735,7 @@
1701 1735 {
1702 1736 unsigned long flags;
1703 1737 int i;
1704   - struct rcu_data *rdp = rsp->rda[cpu];
  1738 + struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
1705 1739 struct rcu_node *rnp = rcu_get_root(rsp);
1706 1740  
1707 1741 /* Set up local state, ensuring consistent view of global state. */
... ... @@ -1729,7 +1763,7 @@
1729 1763 {
1730 1764 unsigned long flags;
1731 1765 unsigned long mask;
1732   - struct rcu_data *rdp = rsp->rda[cpu];
  1766 + struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
1733 1767 struct rcu_node *rnp = rcu_get_root(rsp);
1734 1768  
1735 1769 /* Set up local state, ensuring consistent view of global state. */
... ... @@ -1865,7 +1899,8 @@
1865 1899 /*
1866 1900 * Helper function for rcu_init() that initializes one rcu_state structure.
1867 1901 */
1868   -static void __init rcu_init_one(struct rcu_state *rsp)
  1902 +static void __init rcu_init_one(struct rcu_state *rsp,
  1903 + struct rcu_data __percpu *rda)
1869 1904 {
1870 1905 static char *buf[] = { "rcu_node_level_0",
1871 1906 "rcu_node_level_1",
1872 1907  
1873 1908  
1874 1909  
... ... @@ -1918,37 +1953,23 @@
1918 1953 }
1919 1954 }
1920 1955  
  1956 + rsp->rda = rda;
1921 1957 rnp = rsp->level[NUM_RCU_LVLS - 1];
1922 1958 for_each_possible_cpu(i) {
1923 1959 while (i > rnp->grphi)
1924 1960 rnp++;
1925   - rsp->rda[i]->mynode = rnp;
  1961 + per_cpu_ptr(rsp->rda, i)->mynode = rnp;
1926 1962 rcu_boot_init_percpu_data(i, rsp);
1927 1963 }
1928 1964 }
1929 1965  
1930   -/*
1931   - * Helper macro for __rcu_init() and __rcu_init_preempt(). To be used
1932   - * nowhere else! Assigns leaf node pointers into each CPU's rcu_data
1933   - * structure.
1934   - */
1935   -#define RCU_INIT_FLAVOR(rsp, rcu_data) \
1936   -do { \
1937   - int i; \
1938   - \
1939   - for_each_possible_cpu(i) { \
1940   - (rsp)->rda[i] = &per_cpu(rcu_data, i); \
1941   - } \
1942   - rcu_init_one(rsp); \
1943   -} while (0)
1944   -
1945 1966 void __init rcu_init(void)
1946 1967 {
1947 1968 int cpu;
1948 1969  
1949 1970 rcu_bootup_announce();
1950   - RCU_INIT_FLAVOR(&rcu_sched_state, rcu_sched_data);
1951   - RCU_INIT_FLAVOR(&rcu_bh_state, rcu_bh_data);
  1971 + rcu_init_one(&rcu_sched_state, &rcu_sched_data);
  1972 + rcu_init_one(&rcu_bh_state, &rcu_bh_data);
1952 1973 __rcu_init_preempt();
1953 1974 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
1954 1975  
... ... @@ -254,20 +254,24 @@
254 254 #define RCU_STALL_DELAY_DELTA 0
255 255 #endif
256 256  
257   -#define RCU_SECONDS_TILL_STALL_CHECK (10 * HZ + RCU_STALL_DELAY_DELTA)
  257 +#define RCU_SECONDS_TILL_STALL_CHECK (CONFIG_RCU_CPU_STALL_TIMEOUT * HZ + \
  258 + RCU_STALL_DELAY_DELTA)
258 259 /* for rsp->jiffies_stall */
259   -#define RCU_SECONDS_TILL_STALL_RECHECK (30 * HZ + RCU_STALL_DELAY_DELTA)
  260 +#define RCU_SECONDS_TILL_STALL_RECHECK (3 * RCU_SECONDS_TILL_STALL_CHECK + 30)
260 261 /* for rsp->jiffies_stall */
261 262 #define RCU_STALL_RAT_DELAY 2 /* Allow other CPUs time */
262 263 /* to take at least one */
263 264 /* scheduling clock irq */
264 265 /* before ratting on them. */
265 266  
  267 +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR_RUNNABLE
  268 +#define RCU_CPU_STALL_SUPPRESS_INIT 0
  269 +#else
  270 +#define RCU_CPU_STALL_SUPPRESS_INIT 1
  271 +#endif
  272 +
266 273 #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
267 274  
268   -#define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b))
269   -#define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b))
270   -
271 275 /*
272 276 * RCU global state, including node hierarchy. This hierarchy is
273 277 * represented in "heap" form in a dense array. The root (first level)
... ... @@ -283,7 +287,7 @@
283 287 struct rcu_node *level[NUM_RCU_LVLS]; /* Hierarchy levels. */
284 288 u32 levelcnt[MAX_RCU_LVLS + 1]; /* # nodes in each level. */
285 289 u8 levelspread[NUM_RCU_LVLS]; /* kids/node in each level. */
286   - struct rcu_data *rda[NR_CPUS]; /* array of rdp pointers. */
  290 + struct rcu_data __percpu *rda; /* pointer of percu rcu_data. */
287 291  
288 292 /* The following fields are guarded by the root rcu_node's lock. */
289 293  
... ... @@ -365,6 +369,7 @@
365 369 #ifdef CONFIG_RCU_CPU_STALL_DETECTOR
366 370 static void rcu_print_detail_task_stall(struct rcu_state *rsp);
367 371 static void rcu_print_task_stall(struct rcu_node *rnp);
  372 +static void rcu_preempt_stall_reset(void);
368 373 #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
369 374 static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp);
370 375 #ifdef CONFIG_HOTPLUG_CPU
kernel/rcutree_plugin.h
... ... @@ -154,7 +154,7 @@
154 154 (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) {
155 155  
156 156 /* Possibly blocking in an RCU read-side critical section. */
157   - rdp = rcu_preempt_state.rda[cpu];
  157 + rdp = per_cpu_ptr(rcu_preempt_state.rda, cpu);
158 158 rnp = rdp->mynode;
159 159 raw_spin_lock_irqsave(&rnp->lock, flags);
160 160 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED;
... ... @@ -201,7 +201,7 @@
201 201 */
202 202 void __rcu_read_lock(void)
203 203 {
204   - ACCESS_ONCE(current->rcu_read_lock_nesting)++;
  204 + current->rcu_read_lock_nesting++;
205 205 barrier(); /* needed if we ever invoke rcu_read_lock in rcutree.c */
206 206 }
207 207 EXPORT_SYMBOL_GPL(__rcu_read_lock);
... ... @@ -344,7 +344,9 @@
344 344 struct task_struct *t = current;
345 345  
346 346 barrier(); /* needed if we ever invoke rcu_read_unlock in rcutree.c */
347   - if (--ACCESS_ONCE(t->rcu_read_lock_nesting) == 0 &&
  347 + --t->rcu_read_lock_nesting;
  348 + barrier(); /* decrement before load of ->rcu_read_unlock_special */
  349 + if (t->rcu_read_lock_nesting == 0 &&
348 350 unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
349 351 rcu_read_unlock_special(t);
350 352 #ifdef CONFIG_PROVE_LOCKING
... ... @@ -417,6 +419,16 @@
417 419 }
418 420 }
419 421  
  422 +/*
  423 + * Suppress preemptible RCU's CPU stall warnings by pushing the
  424 + * time of the next stall-warning message comfortably far into the
  425 + * future.
  426 + */
  427 +static void rcu_preempt_stall_reset(void)
  428 +{
  429 + rcu_preempt_state.jiffies_stall = jiffies + ULONG_MAX / 2;
  430 +}
  431 +
420 432 #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
421 433  
422 434 /*
... ... @@ -546,9 +558,11 @@
546 558 *
547 559 * Control will return to the caller some time after a full grace
548 560 * period has elapsed, in other words after all currently executing RCU
549   - * read-side critical sections have completed. RCU read-side critical
550   - * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
551   - * and may be nested.
  561 + * read-side critical sections have completed. Note, however, that
  562 + * upon return from synchronize_rcu(), the caller might well be executing
  563 + * concurrently with new RCU read-side critical sections that began while
  564 + * synchronize_rcu() was waiting. RCU read-side critical sections are
  565 + * delimited by rcu_read_lock() and rcu_read_unlock(), and may be nested.
552 566 */
553 567 void synchronize_rcu(void)
554 568 {
... ... @@ -771,7 +785,7 @@
771 785 */
772 786 static void __init __rcu_init_preempt(void)
773 787 {
774   - RCU_INIT_FLAVOR(&rcu_preempt_state, rcu_preempt_data);
  788 + rcu_init_one(&rcu_preempt_state, &rcu_preempt_data);
775 789 }
776 790  
777 791 /*
... ... @@ -865,6 +879,14 @@
865 879 {
866 880 }
867 881  
  882 +/*
  883 + * Because preemptible RCU does not exist, there is no need to suppress
  884 + * its CPU stall warnings.
  885 + */
  886 +static void rcu_preempt_stall_reset(void)
  887 +{
  888 +}
  889 +
868 890 #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
869 891  
870 892 /*
... ... @@ -917,15 +939,6 @@
917 939 static void rcu_preempt_process_callbacks(void)
918 940 {
919 941 }
920   -
921   -/*
922   - * In classic RCU, call_rcu() is just call_rcu_sched().
923   - */
924   -void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
925   -{
926   - call_rcu_sched(head, func);
927   -}
928   -EXPORT_SYMBOL_GPL(call_rcu);
929 942  
930 943 /*
931 944 * Wait for an rcu-preempt grace period, but make it happen quickly.
kernel/rcutree_trace.c
... ... @@ -262,7 +262,7 @@
262 262 struct rcu_data *rdp;
263 263  
264 264 for_each_possible_cpu(cpu) {
265   - rdp = rsp->rda[cpu];
  265 + rdp = per_cpu_ptr(rsp->rda, cpu);
266 266 if (rdp->beenonline)
267 267 print_one_rcu_pending(m, rdp);
268 268 }
... ... @@ -539,6 +539,19 @@
539 539 disabling, allowing multiple RCU-lockdep warnings to be printed
540 540 on a single reboot.
541 541  
  542 +config SPARSE_RCU_POINTER
  543 + bool "RCU debugging: sparse-based checks for pointer usage"
  544 + default n
  545 + help
  546 + This feature enables the __rcu sparse annotation for
  547 + RCU-protected pointers. This annotation will cause sparse
  548 + to flag any non-RCU used of annotated pointers. This can be
  549 + helpful when debugging RCU usage. Please note that this feature
  550 + is not intended to enforce code cleanliness; it is instead merely
  551 + a debugging aid.
  552 +
  553 + Say Y to make sparse flag questionable use of RCU-protected pointers
  554 +
542 555 Say N if you are unsure.
543 556  
544 557 config LOCKDEP
... ... @@ -831,6 +844,30 @@
831 844 Say N if you want to disable such checks.
832 845  
833 846 Say Y if you are unsure.
  847 +
  848 +config RCU_CPU_STALL_TIMEOUT
  849 + int "RCU CPU stall timeout in seconds"
  850 + depends on RCU_CPU_STALL_DETECTOR
  851 + range 3 300
  852 + default 60
  853 + help
  854 + If a given RCU grace period extends more than the specified
  855 + number of seconds, a CPU stall warning is printed. If the
  856 + RCU grace period persists, additional CPU stall warnings are
  857 + printed at more widely spaced intervals.
  858 +
  859 +config RCU_CPU_STALL_DETECTOR_RUNNABLE
  860 + bool "RCU CPU stall checking starts automatically at boot"
  861 + depends on RCU_CPU_STALL_DETECTOR
  862 + default y
  863 + help
  864 + If set, start checking for RCU CPU stalls immediately on
  865 + boot. Otherwise, RCU CPU stall checking must be manually
  866 + enabled.
  867 +
  868 + Say Y if you are unsure.
  869 +
  870 + Say N if you wish to suppress RCU CPU stall checking during boot.
834 871  
835 872 config RCU_CPU_STALL_VERBOSE
836 873 bool "Print additional per-task information for RCU_CPU_STALL_DETECTOR"
... ... @@ -49,7 +49,7 @@
49 49 unsigned int height; /* Height from the bottom */
50 50 unsigned int count;
51 51 struct rcu_head rcu_head;
52   - void *slots[RADIX_TREE_MAP_SIZE];
  52 + void __rcu *slots[RADIX_TREE_MAP_SIZE];
53 53 unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS];
54 54 };
55 55  
net/ipv4/netfilter/nf_nat_core.c
... ... @@ -38,7 +38,7 @@
38 38 static struct nf_conntrack_l3proto *l3proto __read_mostly;
39 39  
40 40 #define MAX_IP_NAT_PROTO 256
41   -static const struct nf_nat_protocol *nf_nat_protos[MAX_IP_NAT_PROTO]
  41 +static const struct nf_nat_protocol __rcu *nf_nat_protos[MAX_IP_NAT_PROTO]
42 42 __read_mostly;
43 43  
44 44 static inline const struct nf_nat_protocol *
net/netfilter/core.c
... ... @@ -27,7 +27,7 @@
27 27  
28 28 static DEFINE_MUTEX(afinfo_mutex);
29 29  
30   -const struct nf_afinfo *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly;
  30 +const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly;
31 31 EXPORT_SYMBOL(nf_afinfo);
32 32  
33 33 int nf_register_afinfo(const struct nf_afinfo *afinfo)
net/netfilter/nf_conntrack_ecache.c
... ... @@ -26,10 +26,10 @@
26 26  
27 27 static DEFINE_MUTEX(nf_ct_ecache_mutex);
28 28  
29   -struct nf_ct_event_notifier *nf_conntrack_event_cb __read_mostly;
  29 +struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb __read_mostly;
30 30 EXPORT_SYMBOL_GPL(nf_conntrack_event_cb);
31 31  
32   -struct nf_exp_event_notifier *nf_expect_event_cb __read_mostly;
  32 +struct nf_exp_event_notifier __rcu *nf_expect_event_cb __read_mostly;
33 33 EXPORT_SYMBOL_GPL(nf_expect_event_cb);
34 34  
35 35 /* deliver cached events and clear cache entry - must be called with locally
net/netfilter/nf_conntrack_extend.c
... ... @@ -16,7 +16,7 @@
16 16 #include <linux/skbuff.h>
17 17 #include <net/netfilter/nf_conntrack_extend.h>
18 18  
19   -static struct nf_ct_ext_type *nf_ct_ext_types[NF_CT_EXT_NUM];
  19 +static struct nf_ct_ext_type __rcu *nf_ct_ext_types[NF_CT_EXT_NUM];
20 20 static DEFINE_MUTEX(nf_ct_ext_type_mutex);
21 21  
22 22 void __nf_ct_ext_destroy(struct nf_conn *ct)
net/netfilter/nf_conntrack_proto.c
... ... @@ -28,8 +28,8 @@
28 28 #include <net/netfilter/nf_conntrack_l4proto.h>
29 29 #include <net/netfilter/nf_conntrack_core.h>
30 30  
31   -static struct nf_conntrack_l4proto **nf_ct_protos[PF_MAX] __read_mostly;
32   -struct nf_conntrack_l3proto *nf_ct_l3protos[AF_MAX] __read_mostly;
  31 +static struct nf_conntrack_l4proto __rcu **nf_ct_protos[PF_MAX] __read_mostly;
  32 +struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[AF_MAX] __read_mostly;
33 33 EXPORT_SYMBOL_GPL(nf_ct_l3protos);
34 34  
35 35 static DEFINE_MUTEX(nf_ct_proto_mutex);
net/netfilter/nf_log.c
... ... @@ -16,7 +16,7 @@
16 16 #define NF_LOG_PREFIXLEN 128
17 17 #define NFLOGGER_NAME_LEN 64
18 18  
19   -static const struct nf_logger *nf_loggers[NFPROTO_NUMPROTO] __read_mostly;
  19 +static const struct nf_logger __rcu *nf_loggers[NFPROTO_NUMPROTO] __read_mostly;
20 20 static struct list_head nf_loggers_l[NFPROTO_NUMPROTO] __read_mostly;
21 21 static DEFINE_MUTEX(nf_log_mutex);
22 22  
net/netfilter/nf_queue.c
... ... @@ -18,7 +18,7 @@
18 18 * long term mutex. The handler must provide an an outfn() to accept packets
19 19 * for queueing and must reinject all packets it receives, no matter what.
20 20 */
21   -static const struct nf_queue_handler *queue_handler[NFPROTO_NUMPROTO] __read_mostly;
  21 +static const struct nf_queue_handler __rcu *queue_handler[NFPROTO_NUMPROTO] __read_mostly;
22 22  
23 23 static DEFINE_MUTEX(queue_handler_mutex);
24 24