Commit 4eacdf18374e5d7d21a728b46dfec269ac8ef55c
Committed by
Paul E. McKenney
1 parent
90f45e4e72
Exists in
smarc-l5.0.0_1.0.0-ga
and in
5 other branches
context_tracking: Add comments on interface and internals
This subsystem lacks many explanations on its purpose and design. Add these missing comments. v4: Document function parameter to be more kernel-doc friendly, as per Namhyung suggestion. Reported-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com> Cc: Alessio Igor Bogani <abogani@kernel.org> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Chris Metcalf <cmetcalf@tilera.com> Cc: Christoph Lameter <cl@linux.com> Cc: Geoff Levand <geoff@infradead.org> Cc: Gilad Ben Yossef <gilad@benyossef.com> Cc: Hakan Akkan <hakanakkan@gmail.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Li Zhong <zhong@linux.vnet.ibm.com> Cc: Namhyung Kim <namhyung.kim@lge.com> Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: Paul Gortmaker <paul.gortmaker@windriver.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Showing 1 changed file with 65 additions and 10 deletions Side-by-side Diff
kernel/context_tracking.c
1 | +/* | |
2 | + * Context tracking: Probe on high level context boundaries such as kernel | |
3 | + * and userspace. This includes syscalls and exceptions entry/exit. | |
4 | + * | |
5 | + * This is used by RCU to remove its dependency on the timer tick while a CPU | |
6 | + * runs in userspace. | |
7 | + * | |
8 | + * Started by Frederic Weisbecker: | |
9 | + * | |
10 | + * Copyright (C) 2012 Red Hat, Inc., Frederic Weisbecker <fweisbec@redhat.com> | |
11 | + * | |
12 | + * Many thanks to Gilad Ben-Yossef, Paul McKenney, Ingo Molnar, Andrew Morton, | |
13 | + * Steven Rostedt, Peter Zijlstra for suggestions and improvements. | |
14 | + * | |
15 | + */ | |
16 | + | |
1 | 17 | #include <linux/context_tracking.h> |
2 | 18 | #include <linux/rcupdate.h> |
3 | 19 | #include <linux/sched.h> |
... | ... | @@ -6,8 +22,8 @@ |
6 | 22 | |
7 | 23 | struct context_tracking { |
8 | 24 | /* |
9 | - * When active is false, hooks are not set to | |
10 | - * minimize overhead: TIF flags are cleared | |
25 | + * When active is false, probes are unset in order | |
26 | + * to minimize overhead: TIF flags are cleared | |
11 | 27 | * and calls to user_enter/exit are ignored. This |
12 | 28 | * may be further optimized using static keys. |
13 | 29 | */ |
... | ... | @@ -24,6 +40,15 @@ |
24 | 40 | #endif |
25 | 41 | }; |
26 | 42 | |
43 | +/** | |
44 | + * user_enter - Inform the context tracking that the CPU is going to | |
45 | + * enter userspace mode. | |
46 | + * | |
47 | + * This function must be called right before we switch from the kernel | |
48 | + * to userspace, when it's guaranteed the remaining kernel instructions | |
49 | + * to execute won't use any RCU read side critical section because this | |
50 | + * function sets RCU in extended quiescent state. | |
51 | + */ | |
27 | 52 | void user_enter(void) |
28 | 53 | { |
29 | 54 | unsigned long flags; |
30 | 55 | |
31 | 56 | |
32 | 57 | |
33 | 58 | |
34 | 59 | |
... | ... | @@ -39,40 +64,70 @@ |
39 | 64 | if (in_interrupt()) |
40 | 65 | return; |
41 | 66 | |
67 | + /* Kernel threads aren't supposed to go to userspace */ | |
42 | 68 | WARN_ON_ONCE(!current->mm); |
43 | 69 | |
44 | 70 | local_irq_save(flags); |
45 | 71 | if (__this_cpu_read(context_tracking.active) && |
46 | 72 | __this_cpu_read(context_tracking.state) != IN_USER) { |
47 | 73 | __this_cpu_write(context_tracking.state, IN_USER); |
74 | + /* | |
75 | + * At this stage, only low level arch entry code remains and | |
76 | + * then we'll run in userspace. We can assume there won't be | |
77 | + * any RCU read-side critical section until the next call to | |
78 | + * user_exit() or rcu_irq_enter(). Let's remove RCU's dependency | |
79 | + * on the tick. | |
80 | + */ | |
48 | 81 | rcu_user_enter(); |
49 | 82 | } |
50 | 83 | local_irq_restore(flags); |
51 | 84 | } |
52 | 85 | |
86 | + | |
87 | +/** | |
88 | + * user_exit - Inform the context tracking that the CPU is | |
89 | + * exiting userspace mode and entering the kernel. | |
90 | + * | |
91 | + * This function must be called after we entered the kernel from userspace | |
92 | + * before any use of RCU read side critical section. This potentially include | |
93 | + * any high level kernel code like syscalls, exceptions, signal handling, etc... | |
94 | + * | |
95 | + * This call supports re-entrancy. This way it can be called from any exception | |
96 | + * handler without needing to know if we came from userspace or not. | |
97 | + */ | |
53 | 98 | void user_exit(void) |
54 | 99 | { |
55 | 100 | unsigned long flags; |
56 | 101 | |
57 | - /* | |
58 | - * Some contexts may involve an exception occuring in an irq, | |
59 | - * leading to that nesting: | |
60 | - * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit() | |
61 | - * This would mess up the dyntick_nesting count though. And rcu_irq_*() | |
62 | - * helpers are enough to protect RCU uses inside the exception. So | |
63 | - * just return immediately if we detect we are in an IRQ. | |
64 | - */ | |
65 | 102 | if (in_interrupt()) |
66 | 103 | return; |
67 | 104 | |
68 | 105 | local_irq_save(flags); |
69 | 106 | if (__this_cpu_read(context_tracking.state) == IN_USER) { |
70 | 107 | __this_cpu_write(context_tracking.state, IN_KERNEL); |
108 | + /* | |
109 | + * We are going to run code that may use RCU. Inform | |
110 | + * RCU core about that (ie: we may need the tick again). | |
111 | + */ | |
71 | 112 | rcu_user_exit(); |
72 | 113 | } |
73 | 114 | local_irq_restore(flags); |
74 | 115 | } |
75 | 116 | |
117 | + | |
118 | +/** | |
119 | + * context_tracking_task_switch - context switch the syscall callbacks | |
120 | + * @prev: the task that is being switched out | |
121 | + * @next: the task that is being switched in | |
122 | + * | |
123 | + * The context tracking uses the syscall slow path to implement its user-kernel | |
124 | + * boundaries probes on syscalls. This way it doesn't impact the syscall fast | |
125 | + * path on CPUs that don't do context tracking. | |
126 | + * | |
127 | + * But we need to clear the flag on the previous task because it may later | |
128 | + * migrate to some CPU that doesn't do the context tracking. As such the TIF | |
129 | + * flag may not be desired there. | |
130 | + */ | |
76 | 131 | void context_tracking_task_switch(struct task_struct *prev, |
77 | 132 | struct task_struct *next) |
78 | 133 | { |