[PATCH] Use alloc_percpu to allocate workqueues locally

This patch makes the workqueus use alloc_percpu instead of an array. The workqueues are placed on nodes local to each processor. The workqueue structure can grow to a significant size on a system with lots of processors if this patch is not applied. 64 bit architectures with all debugging features enabled and configured for 512 processors will not be able to boot without this patch. Signed-off-by: Christoph Lameter <clameter@sgi.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>

[PATCH] Use alloc_percpu to allocate workqueues locally
This patch makes the workqueus use alloc_percpu instead of an array. The workqueues are placed on nodes local to each processor. The workqueue structure can grow to a significant size on a system with lots of processors if this patch is not applied. 64 bit architectures with all debugging features enabled and configured for 512 processors will not be able to boot without this patch. Signed-off-by: Christoph Lameter <clameter@sgi.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Christoph Lameter · Linus Torvalds
1 parent d61780c0d3
Showing 1 changed file with 20 additions and 13 deletions Side-by-side Diff
kernel/workqueue.c
@@ -12,6 +12,8 @@
  *   Andrew Morton <andrewm@uow.edu.au>
  *   Kai Petzke <wpp@marie.physik.tu-berlin.de>
  *   Theodore Ts'o <tytso@mit.edu>
+ *
+ * Made to use alloc_percpu by Christoph Lameter <clameter@sgi.com>.
  */
  
 #include <linux/module.h>
@@ -57,7 +59,7 @@
  * per-CPU workqueues:
  */
 struct workqueue_struct {
-	struct cpu_workqueue_struct cpu_wq[NR_CPUS];
+	struct cpu_workqueue_struct *cpu_wq;
 	const char *name;
 	struct list_head list; 	/* Empty if single thread */
 };
@@ -102,7 +104,7 @@
 		if (unlikely(is_single_threaded(wq)))
 			cpu = 0;
 		BUG_ON(!list_empty(&work->entry));
-		__queue_work(wq->cpu_wq + cpu, work);
+		__queue_work(per_cpu_ptr(wq->cpu_wq, cpu), work);
 		ret = 1;
 	}
 	put_cpu();
@@ -118,7 +120,7 @@
 	if (unlikely(is_single_threaded(wq)))
 		cpu = 0;
  
-	__queue_work(wq->cpu_wq + cpu, work);
+	__queue_work(per_cpu_ptr(wq->cpu_wq, cpu), work);
 }
  
 int fastcall queue_delayed_work(struct workqueue_struct *wq,
  
@@ -265,13 +267,13 @@
  
 	if (is_single_threaded(wq)) {
 		/* Always use cpu 0's area. */
-		flush_cpu_workqueue(wq->cpu_wq + 0);
+		flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, 0));
 	} else {
 		int cpu;
  
 		lock_cpu_hotplug();
 		for_each_online_cpu(cpu)
-			flush_cpu_workqueue(wq->cpu_wq + cpu);
+			flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, cpu));
 		unlock_cpu_hotplug();
 	}
 }
@@ -279,7 +281,7 @@
 static struct task_struct *create_workqueue_thread(struct workqueue_struct *wq,
 						   int cpu)
 {
-	struct cpu_workqueue_struct *cwq = wq->cpu_wq + cpu;
+	struct cpu_workqueue_struct *cwq = per_cpu_ptr(wq->cpu_wq, cpu);
 	struct task_struct *p;
  
 	spin_lock_init(&cwq->lock);
@@ -312,6 +314,7 @@
 	if (!wq)
 		return NULL;
  
+	wq->cpu_wq = alloc_percpu(struct cpu_workqueue_struct);
 	wq->name = name;
 	/* We don't need the distraction of CPUs appearing and vanishing. */
 	lock_cpu_hotplug();
@@ -353,7 +356,7 @@
 	unsigned long flags;
 	struct task_struct *p;
  
-	cwq = wq->cpu_wq + cpu;
+	cwq = per_cpu_ptr(wq->cpu_wq, cpu);
 	spin_lock_irqsave(&cwq->lock, flags);
 	p = cwq->thread;
 	cwq->thread = NULL;
@@ -380,6 +383,7 @@
 		spin_unlock(&workqueue_lock);
 	}
 	unlock_cpu_hotplug();
+	free_percpu(wq->cpu_wq);
 	kfree(wq);
 }
  
@@ -458,7 +462,7 @@
  
 	BUG_ON(!keventd_wq);
  
-	cwq = keventd_wq->cpu_wq + cpu;
+	cwq = per_cpu_ptr(keventd_wq->cpu_wq, cpu);
 	if (current == cwq->thread)
 		ret = 1;
  
@@ -470,7 +474,7 @@
 /* Take the work from this (downed) CPU. */
 static void take_over_work(struct workqueue_struct *wq, unsigned int cpu)
 {
-	struct cpu_workqueue_struct *cwq = wq->cpu_wq + cpu;
+	struct cpu_workqueue_struct *cwq = per_cpu_ptr(wq->cpu_wq, cpu);
 	LIST_HEAD(list);
 	struct work_struct *work;
  
@@ -481,7 +485,7 @@
 		printk("Taking work for %s\n", wq->name);
 		work = list_entry(list.next,struct work_struct,entry);
 		list_del(&work->entry);
-		__queue_work(wq->cpu_wq + smp_processor_id(), work);
+		__queue_work(per_cpu_ptr(wq->cpu_wq, smp_processor_id()), work);
 	}
 	spin_unlock_irq(&cwq->lock);
 }
  
@@ -508,15 +512,18 @@
 	case CPU_ONLINE:
 		/* Kick off worker threads. */
 		list_for_each_entry(wq, &workqueues, list) {
-			kthread_bind(wq->cpu_wq[hotcpu].thread, hotcpu);
-			wake_up_process(wq->cpu_wq[hotcpu].thread);
+			struct cpu_workqueue_struct *cwq;
+
+			cwq = per_cpu_ptr(wq->cpu_wq, hotcpu);
+			kthread_bind(cwq->thread, hotcpu);
+			wake_up_process(cwq->thread);
 		}
 		break;
  
 	case CPU_UP_CANCELED:
 		list_for_each_entry(wq, &workqueues, list) {
 			/* Unbind so it can run. */
-			kthread_bind(wq->cpu_wq[hotcpu].thread,
+			kthread_bind(per_cpu_ptr(wq->cpu_wq, hotcpu)->thread,
 				     smp_processor_id());
 			cleanup_workqueue_thread(wq, hotcpu);
 		}
...	...	@@ -12,6 +12,8 @@
12	12	* Andrew Morton <andrewm@uow.edu.au>
13	13	* Kai Petzke <wpp@marie.physik.tu-berlin.de>
14	14	* Theodore Ts'o <tytso@mit.edu>
	15	+ *
	16	+ * Made to use alloc_percpu by Christoph Lameter <clameter@sgi.com>.
15	17	*/
16	18
17	19	#include <linux/module.h>
...	...	@@ -57,7 +59,7 @@
57	59	* per-CPU workqueues:
58	60	*/
59	61	struct workqueue_struct {
60		- struct cpu_workqueue_struct cpu_wq[NR_CPUS];
	62	+ struct cpu_workqueue_struct *cpu_wq;
61	63	const char *name;
62	64	struct list_head list; /* Empty if single thread */
63	65	};
...	...	@@ -102,7 +104,7 @@
102	104	if (unlikely(is_single_threaded(wq)))
103	105	cpu = 0;
104	106	BUG_ON(!list_empty(&work->entry));
105		- __queue_work(wq->cpu_wq + cpu, work);
	107	+ __queue_work(per_cpu_ptr(wq->cpu_wq, cpu), work);
106	108	ret = 1;
107	109	}
108	110	put_cpu();
...	...	@@ -118,7 +120,7 @@
118	120	if (unlikely(is_single_threaded(wq)))
119	121	cpu = 0;
120	122
121		- __queue_work(wq->cpu_wq + cpu, work);
	123	+ __queue_work(per_cpu_ptr(wq->cpu_wq, cpu), work);
122	124	}
123	125
124	126	int fastcall queue_delayed_work(struct workqueue_struct *wq,
125	127
...	...	@@ -265,13 +267,13 @@
265	267
266	268	if (is_single_threaded(wq)) {
267	269	/* Always use cpu 0's area. */
268		- flush_cpu_workqueue(wq->cpu_wq + 0);
	270	+ flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, 0));
269	271	} else {
270	272	int cpu;
271	273
272	274	lock_cpu_hotplug();
273	275	for_each_online_cpu(cpu)
274		- flush_cpu_workqueue(wq->cpu_wq + cpu);
	276	+ flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, cpu));
275	277	unlock_cpu_hotplug();
276	278	}
277	279	}
...	...	@@ -279,7 +281,7 @@
279	281	static struct task_struct create_workqueue_thread(struct workqueue_struct wq,
280	282	int cpu)
281	283	{
282		- struct cpu_workqueue_struct *cwq = wq->cpu_wq + cpu;
	284	+ struct cpu_workqueue_struct *cwq = per_cpu_ptr(wq->cpu_wq, cpu);
283	285	struct task_struct *p;
284	286
285	287	spin_lock_init(&cwq->lock);
...	...	@@ -312,6 +314,7 @@
312	314	if (!wq)
313	315	return NULL;
314	316
	317	+ wq->cpu_wq = alloc_percpu(struct cpu_workqueue_struct);
315	318	wq->name = name;
316	319	/* We don't need the distraction of CPUs appearing and vanishing. */
317	320	lock_cpu_hotplug();
...	...	@@ -353,7 +356,7 @@
353	356	unsigned long flags;
354	357	struct task_struct *p;
355	358
356		- cwq = wq->cpu_wq + cpu;
	359	+ cwq = per_cpu_ptr(wq->cpu_wq, cpu);
357	360	spin_lock_irqsave(&cwq->lock, flags);
358	361	p = cwq->thread;
359	362	cwq->thread = NULL;
...	...	@@ -380,6 +383,7 @@
380	383	spin_unlock(&workqueue_lock);
381	384	}
382	385	unlock_cpu_hotplug();
	386	+ free_percpu(wq->cpu_wq);
383	387	kfree(wq);
384	388	}
385	389
...	...	@@ -458,7 +462,7 @@
458	462
459	463	BUG_ON(!keventd_wq);
460	464
461		- cwq = keventd_wq->cpu_wq + cpu;
	465	+ cwq = per_cpu_ptr(keventd_wq->cpu_wq, cpu);
462	466	if (current == cwq->thread)
463	467	ret = 1;
464	468
...	...	@@ -470,7 +474,7 @@
470	474	/* Take the work from this (downed) CPU. */
471	475	static void take_over_work(struct workqueue_struct *wq, unsigned int cpu)
472	476	{
473		- struct cpu_workqueue_struct *cwq = wq->cpu_wq + cpu;
	477	+ struct cpu_workqueue_struct *cwq = per_cpu_ptr(wq->cpu_wq, cpu);
474	478	LIST_HEAD(list);
475	479	struct work_struct *work;
476	480
...	...	@@ -481,7 +485,7 @@
481	485	printk("Taking work for %s\n", wq->name);
482	486	work = list_entry(list.next,struct work_struct,entry);
483	487	list_del(&work->entry);
484		- __queue_work(wq->cpu_wq + smp_processor_id(), work);
	488	+ __queue_work(per_cpu_ptr(wq->cpu_wq, smp_processor_id()), work);
485	489	}
486	490	spin_unlock_irq(&cwq->lock);
487	491	}
488	492
...	...	@@ -508,15 +512,18 @@
508	512	case CPU_ONLINE:
509	513	/* Kick off worker threads. */
510	514	list_for_each_entry(wq, &workqueues, list) {
511		- kthread_bind(wq->cpu_wq[hotcpu].thread, hotcpu);
512		- wake_up_process(wq->cpu_wq[hotcpu].thread);
	515	+ struct cpu_workqueue_struct *cwq;
	516	+
	517	+ cwq = per_cpu_ptr(wq->cpu_wq, hotcpu);
	518	+ kthread_bind(cwq->thread, hotcpu);
	519	+ wake_up_process(cwq->thread);
513	520	}
514	521	break;
515	522
516	523	case CPU_UP_CANCELED:
517	524	list_for_each_entry(wq, &workqueues, list) {
518	525	/* Unbind so it can run. */
519		- kthread_bind(wq->cpu_wq[hotcpu].thread,
	526	+ kthread_bind(per_cpu_ptr(wq->cpu_wq, hotcpu)->thread,
520	527	smp_processor_id());
521	528	cleanup_workqueue_thread(wq, hotcpu);
522	529	}