drivers/edac: fix workq reset deadlock

Fix mutex locking deadlock on the device controller linked list. Was calling a lock then a function that could call the same lock. Moved the cancel workq function to outside the lock Added some short circuit logic in the workq code Added comments of description Code tidying Signed-off-by: Doug Thompson <dougthompson@xmission.com> Cc: Greg KH <greg@kroah.com> Cc: Alan Cox <alan@lxorguk.ukuu.org.uk> Cc: Oleg Nesterov <oleg@tv-sign.ru> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

drivers/edac: fix workq reset deadlock
Fix mutex locking deadlock on the device controller linked list. Was calling a lock then a function that could call the same lock. Moved the cancel workq function to outside the lock Added some short circuit logic in the workq code Added comments of description Code tidying Signed-off-by: Doug Thompson <dougthompson@xmission.com> Cc: Greg KH <greg@kroah.com> Cc: Alan Cox <alan@lxorguk.ukuu.org.uk> Cc: Oleg Nesterov <oleg@tv-sign.ru> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Doug Thompson · Linus Torvalds
1 parent fb3fb20687
Showing 2 changed files with 100 additions and 28 deletions Side-by-side Diff
drivers/edac/edac_device.c
drivers/edac/edac_mc.c
@@ -32,7 +32,9 @@
 #include "edac_core.h"
 #include "edac_module.h"
  
-/* lock to memory controller's control array 'edac_device_list' */
+/* lock for the list: 'edac_device_list', manipulation of this list
+ * is protected by the 'device_ctls_mutex' lock
+ */
 static DEFINE_MUTEX(device_ctls_mutex);
 static struct list_head edac_device_list = LIST_HEAD_INIT(edac_device_list);
  
@@ -386,6 +388,14 @@
 /*
  * edac_device_workq_function
  *	performs the operation scheduled by a workq request
+ *
+ *	this workq is embedded within an edac_device_ctl_info
+ *	structure, that needs to be polled for possible error events.
+ *
+ *	This operation is to acquire the list mutex lock
+ *	(thus preventing insertation or deletion)
+ *	and then call the device's poll function IFF this device is
+ *	running polled and there is a poll function defined.
  */
 static void edac_device_workq_function(struct work_struct *work_req)
 {
@@ -403,8 +413,17 @@
  
 	mutex_unlock(&device_ctls_mutex);
  
-	/* Reschedule */
-	queue_delayed_work(edac_workqueue, &edac_dev->work, edac_dev->delay);
+	/* Reschedule the workq for the next time period to start again
+	 * if the number of msec is for 1 sec, then adjust to the next
+	 * whole one second to save timers fireing all over the period
+	 * between integral seconds
+	 */
+	if (edac_dev->poll_msec == 1000)
+		queue_delayed_work(edac_workqueue, &edac_dev->work,
+				round_jiffies(edac_dev->delay));
+	else
+		queue_delayed_work(edac_workqueue, &edac_dev->work,
+				edac_dev->delay);
 }
  
 /*
  
  
@@ -417,11 +436,26 @@
 {
 	debugf0("%s()\n", __func__);
  
+	/* take the arg 'msec' and set it into the control structure
+	 * to used in the time period calculation
+	 * then calc the number of jiffies that represents
+	 */
 	edac_dev->poll_msec = msec;
-	edac_dev->delay = msecs_to_jiffies(msec);	/* Calc delay jiffies */
+	edac_dev->delay = msecs_to_jiffies(msec);
  
 	INIT_DELAYED_WORK(&edac_dev->work, edac_device_workq_function);
-	queue_delayed_work(edac_workqueue, &edac_dev->work, edac_dev->delay);
+
+	/* optimize here for the 1 second case, which will be normal value, to
+	 * fire ON the 1 second time event. This helps reduce all sorts of
+	 * timers firing on sub-second basis, while they are happy
+	 * to fire together on the 1 second exactly
+	 */
+	if (edac_dev->poll_msec == 1000)
+		queue_delayed_work(edac_workqueue, &edac_dev->work,
+				round_jiffies(edac_dev->delay));
+	else
+		queue_delayed_work(edac_workqueue, &edac_dev->work,
+				edac_dev->delay);
 }
  
 /*
  
  
  
@@ -441,15 +475,19 @@
  
 /*
  * edac_device_reset_delay_period
+ *
+ *	need to stop any outstanding workq queued up at this time
+ *	because we will be resetting the sleep time.
+ *	Then restart the workq on the new delay
  */
-
 void edac_device_reset_delay_period(struct edac_device_ctl_info *edac_dev,
 					unsigned long value)
 {
-	mutex_lock(&device_ctls_mutex);
-
-	/* cancel the current workq request */
+	/* cancel the current workq request, without the mutex lock */
 	edac_device_workq_teardown(edac_dev);
+
+	/* acquire the mutex before doing the workq setup */
+	mutex_lock(&device_ctls_mutex);
  
 	/* restart the workq request, with new delay value */
 	edac_device_workq_setup(edac_dev, value);
@@ -258,6 +258,12 @@
  
 	mutex_lock(&mem_ctls_mutex);
  
+	/* if this control struct has movd to offline state, we are done */
+	if (mci->op_state == OP_OFFLINE) {
+		mutex_unlock(&mem_ctls_mutex);
+		return;
+	}
+
 	/* Only poll controllers that are running polled and have a check */
 	if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL))
 		mci->edac_check(mci);
  
  
@@ -279,11 +285,19 @@
  * edac_mc_workq_setup
  *	initialize a workq item for this mci
  *	passing in the new delay period in msec
+ *
+ *	locking model:
+ *
+ *		called with the mem_ctls_mutex held
  */
-void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec)
+static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec)
 {
 	debugf0("%s()\n", __func__);
  
+	/* if this instance is not in the POLL state, then simply return */
+	if (mci->op_state != OP_RUNNING_POLL)
+		return;
+
 	INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
 	queue_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec));
 }
  
  
  
  
  
@@ -291,29 +305,39 @@
 /*
  * edac_mc_workq_teardown
  *	stop the workq processing on this mci
+ *
+ *	locking model:
+ *
+ *		called WITHOUT lock held
  */
-void edac_mc_workq_teardown(struct mem_ctl_info *mci)
+static void edac_mc_workq_teardown(struct mem_ctl_info *mci)
 {
 	int status;
  
-	status = cancel_delayed_work(&mci->work);
-	if (status == 0) {
-		/* workq instance might be running, wait for it */
-		flush_workqueue(edac_workqueue);
+	/* if not running POLL, leave now */
+	if (mci->op_state == OP_RUNNING_POLL) {
+		status = cancel_delayed_work(&mci->work);
+		if (status == 0) {
+			debugf0("%s() not canceled, flush the queue\n",
+				__func__);
+
+			/* workq instance might be running, wait for it */
+			flush_workqueue(edac_workqueue);
+		}
 	}
 }
  
 /*
  * edac_reset_delay_period
  */
-
-void edac_reset_delay_period(struct mem_ctl_info *mci, unsigned long value)
+static void edac_reset_delay_period(struct mem_ctl_info *mci, unsigned long value)
 {
-	mutex_lock(&mem_ctls_mutex);
-
 	/* cancel the current workq request */
 	edac_mc_workq_teardown(mci);
  
+	/* lock the list of devices for the new setup */
+	mutex_lock(&mem_ctls_mutex);
+
 	/* restart the workq request, with new delay value */
 	edac_mc_workq_setup(mci, value);
  
@@ -323,6 +347,10 @@
 /* Return 0 on success, 1 on failure.
  * Before calling this function, caller must
  * assign a unique value to mci->mc_idx.
+ *
+ *	locking model:
+ *
+ *		called with the mem_ctls_mutex lock held
  */
 static int add_mc_to_global_list(struct mem_ctl_info *mci)
 {
@@ -331,7 +359,8 @@
  
 	insert_before = &mc_devices;
  
-	if (unlikely((p = find_mci_by_dev(mci->dev)) != NULL))
+	p = find_mci_by_dev(mci->dev);
+	if (unlikely(p != NULL))
 		goto fail0;
  
 	list_for_each(item, &mc_devices) {
@@ -467,8 +496,8 @@
 	}
  
 	/* Report action taken */
-	edac_mc_printk(mci, KERN_INFO, "Giving out device to %s %s: DEV %s\n",
-		mci->mod_name, mci->ctl_name, dev_name(mci));
+	edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':"
+		" DEV %s\n", mci->mod_name, mci->ctl_name, dev_name(mci));
  
 	mutex_unlock(&mem_ctls_mutex);
 	return 0;
  
@@ -493,10 +522,13 @@
 {
 	struct mem_ctl_info *mci;
  
-	debugf0("MC: %s()\n", __func__);
+	debugf0("%s()\n", __func__);
+
 	mutex_lock(&mem_ctls_mutex);
  
-	if ((mci = find_mci_by_dev(dev)) == NULL) {
+	/* find the requested mci struct in the global list */
+	mci = find_mci_by_dev(dev);
+	if (mci == NULL) {
 		mutex_unlock(&mem_ctls_mutex);
 		return NULL;
 	}
  
  
@@ -504,15 +536,17 @@
 	/* marking MCI offline */
 	mci->op_state = OP_OFFLINE;
  
-	/* flush workq processes */
-	edac_mc_workq_teardown(mci);
-
-	edac_remove_sysfs_mci_device(mci);
 	del_mc_from_global_list(mci);
 	mutex_unlock(&mem_ctls_mutex);
+
+	/* flush workq processes and remove sysfs */
+	edac_mc_workq_teardown(mci);
+	edac_remove_sysfs_mci_device(mci);
+
 	edac_printk(KERN_INFO, EDAC_MC,
 		"Removed device %d for %s %s: DEV %s\n", mci->mc_idx,
 		mci->mod_name, mci->ctl_name, dev_name(mci));
+
 	return mci;
 }
 EXPORT_SYMBOL_GPL(edac_mc_del_mc);
...	...	@@ -32,7 +32,9 @@
32	32	#include "edac_core.h"
33	33	#include "edac_module.h"
34	34
35		-/* lock to memory controller's control array 'edac_device_list' */
	35	+/* lock for the list: 'edac_device_list', manipulation of this list
	36	+ * is protected by the 'device_ctls_mutex' lock
	37	+ */
36	38	static DEFINE_MUTEX(device_ctls_mutex);
37	39	static struct list_head edac_device_list = LIST_HEAD_INIT(edac_device_list);
38	40
...	...	@@ -386,6 +388,14 @@
386	388	/*
387	389	* edac_device_workq_function
388	390	* performs the operation scheduled by a workq request
	391	+ *
	392	+ * this workq is embedded within an edac_device_ctl_info
	393	+ * structure, that needs to be polled for possible error events.
	394	+ *
	395	+ * This operation is to acquire the list mutex lock
	396	+ * (thus preventing insertation or deletion)
	397	+ * and then call the device's poll function IFF this device is
	398	+ * running polled and there is a poll function defined.
389	399	*/
390	400	static void edac_device_workq_function(struct work_struct *work_req)
391	401	{
...	...	@@ -403,8 +413,17 @@
403	413
404	414	mutex_unlock(&device_ctls_mutex);
405	415
406		- /* Reschedule */
407		- queue_delayed_work(edac_workqueue, &edac_dev->work, edac_dev->delay);
	416	+ /* Reschedule the workq for the next time period to start again
	417	+ * if the number of msec is for 1 sec, then adjust to the next
	418	+ * whole one second to save timers fireing all over the period
	419	+ * between integral seconds
	420	+ */
	421	+ if (edac_dev->poll_msec == 1000)
	422	+ queue_delayed_work(edac_workqueue, &edac_dev->work,
	423	+ round_jiffies(edac_dev->delay));
	424	+ else
	425	+ queue_delayed_work(edac_workqueue, &edac_dev->work,
	426	+ edac_dev->delay);
408	427	}
409	428
410	429	/*
411	430
412	431
...	...	@@ -417,11 +436,26 @@
417	436	{
418	437	debugf0("%s()\n", __func__);
419	438
	439	+ /* take the arg 'msec' and set it into the control structure
	440	+ * to used in the time period calculation
	441	+ * then calc the number of jiffies that represents
	442	+ */
420	443	edac_dev->poll_msec = msec;
421		- edac_dev->delay = msecs_to_jiffies(msec); /* Calc delay jiffies */
	444	+ edac_dev->delay = msecs_to_jiffies(msec);
422	445
423	446	INIT_DELAYED_WORK(&edac_dev->work, edac_device_workq_function);
424		- queue_delayed_work(edac_workqueue, &edac_dev->work, edac_dev->delay);
	447	+
	448	+ /* optimize here for the 1 second case, which will be normal value, to
	449	+ * fire ON the 1 second time event. This helps reduce all sorts of
	450	+ * timers firing on sub-second basis, while they are happy
	451	+ * to fire together on the 1 second exactly
	452	+ */
	453	+ if (edac_dev->poll_msec == 1000)
	454	+ queue_delayed_work(edac_workqueue, &edac_dev->work,
	455	+ round_jiffies(edac_dev->delay));
	456	+ else
	457	+ queue_delayed_work(edac_workqueue, &edac_dev->work,
	458	+ edac_dev->delay);
425	459	}
426	460
427	461	/*
428	462
429	463
430	464
...	...	@@ -441,15 +475,19 @@
441	475
442	476	/*
443	477	* edac_device_reset_delay_period
	478	+ *
	479	+ * need to stop any outstanding workq queued up at this time
	480	+ * because we will be resetting the sleep time.
	481	+ * Then restart the workq on the new delay
444	482	*/
445		-
446	483	void edac_device_reset_delay_period(struct edac_device_ctl_info *edac_dev,
447	484	unsigned long value)
448	485	{
449		- mutex_lock(&device_ctls_mutex);
450		-
451		- /* cancel the current workq request */
	486	+ /* cancel the current workq request, without the mutex lock */
452	487	edac_device_workq_teardown(edac_dev);
	488	+
	489	+ /* acquire the mutex before doing the workq setup */
	490	+ mutex_lock(&device_ctls_mutex);
453	491
454	492	/* restart the workq request, with new delay value */
455	493	edac_device_workq_setup(edac_dev, value);
...	...	@@ -258,6 +258,12 @@
258	258
259	259	mutex_lock(&mem_ctls_mutex);
260	260
	261	+ /* if this control struct has movd to offline state, we are done */
	262	+ if (mci->op_state == OP_OFFLINE) {
	263	+ mutex_unlock(&mem_ctls_mutex);
	264	+ return;
	265	+ }
	266	+
261	267	/* Only poll controllers that are running polled and have a check */
262	268	if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL))
263	269	mci->edac_check(mci);
264	270
265	271
...	...	@@ -279,11 +285,19 @@
279	285	* edac_mc_workq_setup
280	286	* initialize a workq item for this mci
281	287	* passing in the new delay period in msec
	288	+ *
	289	+ * locking model:
	290	+ *
	291	+ * called with the mem_ctls_mutex held
282	292	*/
283		-void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec)
	293	+static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec)
284	294	{
285	295	debugf0("%s()\n", __func__);
286	296
	297	+ /* if this instance is not in the POLL state, then simply return */
	298	+ if (mci->op_state != OP_RUNNING_POLL)
	299	+ return;
	300	+
287	301	INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
288	302	queue_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec));
289	303	}
290	304
291	305
292	306
293	307
294	308
...	...	@@ -291,29 +305,39 @@
291	305	/*
292	306	* edac_mc_workq_teardown
293	307	* stop the workq processing on this mci
	308	+ *
	309	+ * locking model:
	310	+ *
	311	+ * called WITHOUT lock held
294	312	*/
295		-void edac_mc_workq_teardown(struct mem_ctl_info *mci)
	313	+static void edac_mc_workq_teardown(struct mem_ctl_info *mci)
296	314	{
297	315	int status;
298	316
299		- status = cancel_delayed_work(&mci->work);
300		- if (status == 0) {
301		- /* workq instance might be running, wait for it */
302		- flush_workqueue(edac_workqueue);
	317	+ /* if not running POLL, leave now */
	318	+ if (mci->op_state == OP_RUNNING_POLL) {
	319	+ status = cancel_delayed_work(&mci->work);
	320	+ if (status == 0) {
	321	+ debugf0("%s() not canceled, flush the queue\n",
	322	+ __func__);
	323	+
	324	+ /* workq instance might be running, wait for it */
	325	+ flush_workqueue(edac_workqueue);
	326	+ }
303	327	}
304	328	}
305	329
306	330	/*
307	331	* edac_reset_delay_period
308	332	*/
309		-
310		-void edac_reset_delay_period(struct mem_ctl_info *mci, unsigned long value)
	333	+static void edac_reset_delay_period(struct mem_ctl_info *mci, unsigned long value)
311	334	{
312		- mutex_lock(&mem_ctls_mutex);
313		-
314	335	/* cancel the current workq request */
315	336	edac_mc_workq_teardown(mci);
316	337
	338	+ /* lock the list of devices for the new setup */
	339	+ mutex_lock(&mem_ctls_mutex);
	340	+
317	341	/* restart the workq request, with new delay value */
318	342	edac_mc_workq_setup(mci, value);
319	343
...	...	@@ -323,6 +347,10 @@
323	347	/* Return 0 on success, 1 on failure.
324	348	* Before calling this function, caller must
325	349	* assign a unique value to mci->mc_idx.
	350	+ *
	351	+ * locking model:
	352	+ *
	353	+ * called with the mem_ctls_mutex lock held
326	354	*/
327	355	static int add_mc_to_global_list(struct mem_ctl_info *mci)
328	356	{
...	...	@@ -331,7 +359,8 @@
331	359
332	360	insert_before = &mc_devices;
333	361
334		- if (unlikely((p = find_mci_by_dev(mci->dev)) != NULL))
	362	+ p = find_mci_by_dev(mci->dev);
	363	+ if (unlikely(p != NULL))
335	364	goto fail0;
336	365
337	366	list_for_each(item, &mc_devices) {
...	...	@@ -467,8 +496,8 @@
467	496	}
468	497
469	498	/* Report action taken */
470		- edac_mc_printk(mci, KERN_INFO, "Giving out device to %s %s: DEV %s\n",
471		- mci->mod_name, mci->ctl_name, dev_name(mci));
	499	+ edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':"
	500	+ " DEV %s\n", mci->mod_name, mci->ctl_name, dev_name(mci));
472	501
473	502	mutex_unlock(&mem_ctls_mutex);
474	503	return 0;
475	504
...	...	@@ -493,10 +522,13 @@
493	522	{
494	523	struct mem_ctl_info *mci;
495	524
496		- debugf0("MC: %s()\n", __func__);
	525	+ debugf0("%s()\n", __func__);
	526	+
497	527	mutex_lock(&mem_ctls_mutex);
498	528
499		- if ((mci = find_mci_by_dev(dev)) == NULL) {
	529	+ /* find the requested mci struct in the global list */
	530	+ mci = find_mci_by_dev(dev);
	531	+ if (mci == NULL) {
500	532	mutex_unlock(&mem_ctls_mutex);
501	533	return NULL;
502	534	}
503	535
504	536
...	...	@@ -504,15 +536,17 @@
504	536	/* marking MCI offline */
505	537	mci->op_state = OP_OFFLINE;
506	538
507		- /* flush workq processes */
508		- edac_mc_workq_teardown(mci);
509		-
510		- edac_remove_sysfs_mci_device(mci);
511	539	del_mc_from_global_list(mci);
512	540	mutex_unlock(&mem_ctls_mutex);
	541	+
	542	+ /* flush workq processes and remove sysfs */
	543	+ edac_mc_workq_teardown(mci);
	544	+ edac_remove_sysfs_mci_device(mci);
	545	+
513	546	edac_printk(KERN_INFO, EDAC_MC,
514	547	"Removed device %d for %s %s: DEV %s\n", mci->mc_idx,
515	548	mci->mod_name, mci->ctl_name, dev_name(mci));
	549	+
516	550	return mci;
517	551	}
518	552	EXPORT_SYMBOL_GPL(edac_mc_del_mc);