generic-ipi: use per cpu data for single cpu ipi calls

The smp_call_function can be passed a wait parameter telling it to wait for all the functions running on other CPUs to complete before returning, or to return without waiting. Unfortunately, this is currently just a suggestion and not manditory. That is, the smp_call_function can decide not to return and wait instead. The reason for this is because it uses kmalloc to allocate storage to send to the called CPU and that CPU will free it when it is done. But if we fail to allocate the storage, the stack is used instead. This means we must wait for the called CPU to finish before continuing. Unfortunatly, some callers do no abide by this hint and act as if the non-wait option is mandatory. The MTRR code for instance will deadlock if the smp_call_function is set to wait. This is because the smp_call_function will wait for the other CPUs to finish their called functions, but those functions are waiting on the caller to continue. This patch changes the generic smp_call_function code to use per cpu variables if the allocation of the data fails for a single CPU call. The smp_call_function_many will fall back to the smp_call_function_single if it fails its alloc. The smp_call_function_single is modified to not force the wait state. Since we now are using a single data per cpu we must synchronize the callers to prevent a second caller modifying the data before the first called IPI functions complete. To do so, I added a flag to the call_single_data called CSD_FLAG_LOCK. When the single CPU is called (which can be called when a many call fails an alloc), we set the LOCK bit on this per cpu data. When the caller finishes it clears the LOCK bit. The caller must wait till the LOCK bit is cleared before setting it. When it is cleared, there is no IPI function using it. Signed-off-by: Steven Rostedt <srostedt@redhat.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Acked-by: Jens Axboe <jens.axboe@oracle.com> Acked-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Ingo Molnar <mingo@elte.hu>

generic-ipi: use per cpu data for single cpu ipi calls
The smp_call_function can be passed a wait parameter telling it to wait for all the functions running on other CPUs to complete before returning, or to return without waiting. Unfortunately, this is currently just a suggestion and not manditory. That is, the smp_call_function can decide not to return and wait instead. The reason for this is because it uses kmalloc to allocate storage to send to the called CPU and that CPU will free it when it is done. But if we fail to allocate the storage, the stack is used instead. This means we must wait for the called CPU to finish before continuing. Unfortunatly, some callers do no abide by this hint and act as if the non-wait option is mandatory. The MTRR code for instance will deadlock if the smp_call_function is set to wait. This is because the smp_call_function will wait for the other CPUs to finish their called functions, but those functions are waiting on the caller to continue. This patch changes the generic smp_call_function code to use per cpu variables if the allocation of the data fails for a single CPU call. The smp_call_function_many will fall back to the smp_call_function_single if it fails its alloc. The smp_call_function_single is modified to not force the wait state. Since we now are using a single data per cpu we must synchronize the callers to prevent a second caller modifying the data before the first called IPI functions complete. To do so, I added a flag to the call_single_data called CSD_FLAG_LOCK. When the single CPU is called (which can be called when a many call fails an alloc), we set the LOCK bit on this per cpu data. When the caller finishes it clears the LOCK bit. The caller must wait till the LOCK bit is cleared before setting it. When it is cleared, there is no IPI function using it. Signed-off-by: Steven Rostedt <srostedt@redhat.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Acked-by: Jens Axboe <jens.axboe@oracle.com> Acked-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Steven Rostedt · Ingo Molnar
1 parent 4ab0a9409a
Showing 1 changed file with 33 additions and 3 deletions Side-by-side Diff
kernel/smp.c
@@ -18,6 +18,7 @@
 enum {
 	CSD_FLAG_WAIT		= 0x01,
 	CSD_FLAG_ALLOC		= 0x02,
+	CSD_FLAG_LOCK		= 0x04,
 };
  
 struct call_function_data {
@@ -186,6 +187,9 @@
 			if (data_flags & CSD_FLAG_WAIT) {
 				smp_wmb();
 				data->flags &= ~CSD_FLAG_WAIT;
+			} else if (data_flags & CSD_FLAG_LOCK) {
+				smp_wmb();
+				data->flags &= ~CSD_FLAG_LOCK;
 			} else if (data_flags & CSD_FLAG_ALLOC)
 				kfree(data);
 		}
@@ -196,6 +200,8 @@
 	}
 }
  
+static DEFINE_PER_CPU(struct call_single_data, csd_data);
+
 /*
  * smp_call_function_single - Run a function on a specific CPU
  * @func: The function to run. This must be fast and non-blocking.
  
  
@@ -224,14 +230,38 @@
 		func(info);
 		local_irq_restore(flags);
 	} else if ((unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) {
-		struct call_single_data *data = NULL;
+		struct call_single_data *data;
  
 		if (!wait) {
+			/*
+			 * We are calling a function on a single CPU
+			 * and we are not going to wait for it to finish.
+			 * We first try to allocate the data, but if we
+			 * fail, we fall back to use a per cpu data to pass
+			 * the information to that CPU. Since all callers
+			 * of this code will use the same data, we must
+			 * synchronize the callers to prevent a new caller
+			 * from corrupting the data before the callee
+			 * can access it.
+			 *
+			 * The CSD_FLAG_LOCK is used to let us know when
+			 * the IPI handler is done with the data.
+			 * The first caller will set it, and the callee
+			 * will clear it. The next caller must wait for
+			 * it to clear before we set it again. This
+			 * will make sure the callee is done with the
+			 * data before a new caller will use it.
+			 */
 			data = kmalloc(sizeof(*data), GFP_ATOMIC);
 			if (data)
 				data->flags = CSD_FLAG_ALLOC;
-		}
-		if (!data) {
+			else {
+				data = &per_cpu(csd_data, me);
+				while (data->flags & CSD_FLAG_LOCK)
+					cpu_relax();
+				data->flags = CSD_FLAG_LOCK;
+			}
+		} else {
 			data = &d;
 			data->flags = CSD_FLAG_WAIT;
 		}
...	...	@@ -18,6 +18,7 @@
18	18	enum {
19	19	CSD_FLAG_WAIT = 0x01,
20	20	CSD_FLAG_ALLOC = 0x02,
	21	+ CSD_FLAG_LOCK = 0x04,
21	22	};
22	23
23	24	struct call_function_data {
...	...	@@ -186,6 +187,9 @@
186	187	if (data_flags & CSD_FLAG_WAIT) {
187	188	smp_wmb();
188	189	data->flags &= ~CSD_FLAG_WAIT;
	190	+ } else if (data_flags & CSD_FLAG_LOCK) {
	191	+ smp_wmb();
	192	+ data->flags &= ~CSD_FLAG_LOCK;
189	193	} else if (data_flags & CSD_FLAG_ALLOC)
190	194	kfree(data);
191	195	}
...	...	@@ -196,6 +200,8 @@
196	200	}
197	201	}
198	202
	203	+static DEFINE_PER_CPU(struct call_single_data, csd_data);
	204	+
199	205	/*
200	206	* smp_call_function_single - Run a function on a specific CPU
201	207	* @func: The function to run. This must be fast and non-blocking.
202	208
203	209
...	...	@@ -224,14 +230,38 @@
224	230	func(info);
225	231	local_irq_restore(flags);
226	232	} else if ((unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) {
227		- struct call_single_data *data = NULL;
	233	+ struct call_single_data *data;
228	234
229	235	if (!wait) {
	236	+ /*
	237	+ * We are calling a function on a single CPU
	238	+ * and we are not going to wait for it to finish.
	239	+ * We first try to allocate the data, but if we
	240	+ * fail, we fall back to use a per cpu data to pass
	241	+ * the information to that CPU. Since all callers
	242	+ * of this code will use the same data, we must
	243	+ * synchronize the callers to prevent a new caller
	244	+ * from corrupting the data before the callee
	245	+ * can access it.
	246	+ *
	247	+ * The CSD_FLAG_LOCK is used to let us know when
	248	+ * the IPI handler is done with the data.
	249	+ * The first caller will set it, and the callee
	250	+ * will clear it. The next caller must wait for
	251	+ * it to clear before we set it again. This
	252	+ * will make sure the callee is done with the
	253	+ * data before a new caller will use it.
	254	+ */
230	255	data = kmalloc(sizeof(*data), GFP_ATOMIC);
231	256	if (data)
232	257	data->flags = CSD_FLAG_ALLOC;
233		- }
234		- if (!data) {
	258	+ else {
	259	+ data = &per_cpu(csd_data, me);
	260	+ while (data->flags & CSD_FLAG_LOCK)
	261	+ cpu_relax();
	262	+ data->flags = CSD_FLAG_LOCK;
	263	+ }
	264	+ } else {
235	265	data = &d;
236	266	data->flags = CSD_FLAG_WAIT;
237	267	}