ipmi: Turn off all activity on an idle ipmi interface

The IPMI driver would wake up periodically looking for events and watchdog pretimeouts. If there is nothing waiting for these events, it's really kind of pointless to be checking for them. So modify the driver so the message handler can pass down if it needs the lower layer to be waiting for these. Modify the system interface lower layer to turn off all timer and thread activity if the upper layer doesn't need anything and it is not currently handling messages. And modify the message handler to not restart the timer if its timer is not needed. The timers and kthread will still be enabled if: - the SI interface is handling a message. - a user has enabled watching for events. - the IPMI watchdog timer is in use (since it uses pretimeouts). - the message handler is waiting on a remote response. - a user has registered to receive commands. This mostly affects interfaces without interrupts. Interfaces with interrupts already don't use CPU in the system interface when the interface is idle. Signed-off-by: Corey Minyard <cminyard@mvista.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

ipmi: Turn off all activity on an idle ipmi interface
The IPMI driver would wake up periodically looking for events and watchdog pretimeouts. If there is nothing waiting for these events, it's really kind of pointless to be checking for them. So modify the driver so the message handler can pass down if it needs the lower layer to be waiting for these. Modify the system interface lower layer to turn off all timer and thread activity if the upper layer doesn't need anything and it is not currently handling messages. And modify the message handler to not restart the timer if its timer is not needed. The timers and kthread will still be enabled if: - the SI interface is handling a message. - a user has enabled watching for events. - the IPMI watchdog timer is in use (since it uses pretimeouts). - the message handler is waiting on a remote response. - a user has registered to receive commands. This mostly affects interfaces without interrupts. Interfaces with interrupts already don't use CPU in the system interface when the interface is idle. Signed-off-by: Corey Minyard <cminyard@mvista.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Corey Minyard · Linus Torvalds
1 parent 0dfe6e7ed4
Showing 4 changed files with 182 additions and 97 deletions Side-by-side Diff
drivers/char/ipmi/ipmi_msghandler.c
drivers/char/ipmi/ipmi_si_intf.c
include/linux/ipmi.h
include/linux/ipmi_smi.h
@@ -55,6 +55,7 @@
 static int ipmi_init_msghandler(void);
 static void smi_recv_tasklet(unsigned long);
 static void handle_new_recv_msgs(ipmi_smi_t intf);
+static void need_waiter(ipmi_smi_t intf);
  
 static int initialized;
  
  
@@ -73,7 +74,21 @@
  */
 #define MAX_MSG_TIMEOUT		60000
  
+/* Call every ~1000 ms. */
+#define IPMI_TIMEOUT_TIME	1000
+
+/* How many jiffies does it take to get to the timeout time. */
+#define IPMI_TIMEOUT_JIFFIES	((IPMI_TIMEOUT_TIME * HZ) / 1000)
+
 /*
+ * Request events from the queue every second (this is the number of
+ * IPMI_TIMEOUT_TIMES between event requests).  Hopefully, in the
+ * future, IPMI will add a way to know immediately if an event is in
+ * the queue and this silliness can go away.
+ */
+#define IPMI_REQUEST_EV_TIME	(1000 / (IPMI_TIMEOUT_TIME))
+
+/*
  * The main "user" data structure.
  */
 struct ipmi_user {
@@ -92,7 +107,7 @@
 	ipmi_smi_t intf;
  
 	/* Does this interface receive IPMI events? */
-	int gets_events;
+	bool gets_events;
 };
  
 struct cmd_rcvr {
@@ -383,6 +398,9 @@
 	unsigned int     waiting_events_count; /* How many events in queue? */
 	char             delivering_events;
 	char             event_msg_printed;
+	atomic_t         event_waiters;
+	unsigned int     ticks_to_req_ev;
+	int              last_needs_timer;
  
 	/*
 	 * The event receiver for my BMC, only really used at panic
@@ -451,7 +469,6 @@
 static LIST_HEAD(smi_watchers);
 static DEFINE_MUTEX(smi_watchers_mutex);
  
-
 #define ipmi_inc_stat(intf, stat) \
 	atomic_inc(&(intf)->stats[IPMI_STAT_ ## stat])
 #define ipmi_get_stat(intf, stat) \
@@ -772,6 +789,7 @@
 		*seq = i;
 		*seqid = intf->seq_table[i].seqid;
 		intf->curr_seq = (i+1)%IPMI_IPMB_NUM_SEQ;
+		need_waiter(intf);
 	} else {
 		rv = -EAGAIN;
 	}
@@ -941,7 +959,7 @@
 	new_user->handler = handler;
 	new_user->handler_data = handler_data;
 	new_user->intf = intf;
-	new_user->gets_events = 0;
+	new_user->gets_events = false;
  
 	if (!try_module_get(intf->handlers->owner)) {
 		rv = -ENODEV;
@@ -966,6 +984,11 @@
 	spin_lock_irqsave(&intf->seq_lock, flags);
 	list_add_rcu(&new_user->link, &intf->users);
 	spin_unlock_irqrestore(&intf->seq_lock, flags);
+	if (handler->ipmi_watchdog_pretimeout) {
+		/* User wants pretimeouts, so make sure to watch for them. */
+		if (atomic_inc_return(&intf->event_waiters) == 1)
+			need_waiter(intf);
+	}
 	*user = new_user;
 	return 0;
  
@@ -1021,6 +1044,12 @@
  
 	user->valid = 0;
  
+	if (user->handler->ipmi_watchdog_pretimeout)
+		atomic_dec(&intf->event_waiters);
+
+	if (user->gets_events)
+		atomic_dec(&intf->event_waiters);
+
 	/* Remove the user from the interface's sequence table. */
 	spin_lock_irqsave(&intf->seq_lock, flags);
 	list_del_rcu(&user->link);
@@ -1184,7 +1213,7 @@
 }
 EXPORT_SYMBOL(ipmi_set_maintenance_mode);
  
-int ipmi_set_gets_events(ipmi_user_t user, int val)
+int ipmi_set_gets_events(ipmi_user_t user, bool val)
 {
 	unsigned long        flags;
 	ipmi_smi_t           intf = user->intf;
  
@@ -1194,8 +1223,18 @@
 	INIT_LIST_HEAD(&msgs);
  
 	spin_lock_irqsave(&intf->events_lock, flags);
+	if (user->gets_events == val)
+		goto out;
+
 	user->gets_events = val;
  
+	if (val) {
+		if (atomic_inc_return(&intf->event_waiters) == 1)
+			need_waiter(intf);
+	} else {
+		atomic_dec(&intf->event_waiters);
+	}
+
 	if (intf->delivering_events)
 		/*
 		 * Another thread is delivering events for this, so
@@ -1289,6 +1328,9 @@
 		goto out_unlock;
 	}
  
+	if (atomic_inc_return(&intf->event_waiters) == 1)
+		need_waiter(intf);
+
 	list_add_rcu(&rcvr->link, &intf->cmd_rcvrs);
  
  out_unlock:
@@ -1330,6 +1372,7 @@
 	mutex_unlock(&intf->cmd_rcvrs_mutex);
 	synchronize_rcu();
 	while (rcvrs) {
+		atomic_dec(&intf->event_waiters);
 		rcvr = rcvrs;
 		rcvrs = rcvr->next;
 		kfree(rcvr);
@@ -2876,6 +2919,8 @@
 		     (unsigned long) intf);
 	atomic_set(&intf->watchdog_pretimeouts_to_deliver, 0);
 	spin_lock_init(&intf->events_lock);
+	atomic_set(&intf->event_waiters, 0);
+	intf->ticks_to_req_ev = IPMI_REQUEST_EV_TIME;
 	INIT_LIST_HEAD(&intf->waiting_events);
 	intf->waiting_events_count = 0;
 	mutex_init(&intf->cmd_rcvrs_mutex);
@@ -3965,7 +4010,8 @@
  
 static void check_msg_timeout(ipmi_smi_t intf, struct seq_table *ent,
 			      struct list_head *timeouts, long timeout_period,
-			      int slot, unsigned long *flags)
+			      int slot, unsigned long *flags,
+			      unsigned int *waiting_msgs)
 {
 	struct ipmi_recv_msg     *msg;
 	struct ipmi_smi_handlers *handlers;
  
@@ -3977,8 +4023,10 @@
 		return;
  
 	ent->timeout -= timeout_period;
-	if (ent->timeout > 0)
+	if (ent->timeout > 0) {
+		(*waiting_msgs)++;
 		return;
+	}
  
 	if (ent->retries_left == 0) {
 		/* The message has used all its retries. */
@@ -3995,6 +4043,8 @@
 		struct ipmi_smi_msg *smi_msg;
 		/* More retries, send again. */
  
+		(*waiting_msgs)++;
+
 		/*
 		 * Start with the max timer, set to normal timer after
 		 * the message is sent.
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
@@ -4040,117 +4090,118 @@
 	}
 }
  
-static void ipmi_timeout_handler(long timeout_period)
+static unsigned int ipmi_timeout_handler(ipmi_smi_t intf, long timeout_period)
 {
-	ipmi_smi_t           intf;
 	struct list_head     timeouts;
 	struct ipmi_recv_msg *msg, *msg2;
 	unsigned long        flags;
 	int                  i;
+	unsigned int         waiting_msgs = 0;
  
-	rcu_read_lock();
-	list_for_each_entry_rcu(intf, &ipmi_interfaces, link) {
-		tasklet_schedule(&intf->recv_tasklet);
+	/*
+	 * Go through the seq table and find any messages that
+	 * have timed out, putting them in the timeouts
+	 * list.
+	 */
+	INIT_LIST_HEAD(&timeouts);
+	spin_lock_irqsave(&intf->seq_lock, flags);
+	for (i = 0; i < IPMI_IPMB_NUM_SEQ; i++)
+		check_msg_timeout(intf, &(intf->seq_table[i]),
+				  &timeouts, timeout_period, i,
+				  &flags, &waiting_msgs);
+	spin_unlock_irqrestore(&intf->seq_lock, flags);
  
-		/*
-		 * Go through the seq table and find any messages that
-		 * have timed out, putting them in the timeouts
-		 * list.
-		 */
-		INIT_LIST_HEAD(&timeouts);
-		spin_lock_irqsave(&intf->seq_lock, flags);
-		for (i = 0; i < IPMI_IPMB_NUM_SEQ; i++)
-			check_msg_timeout(intf, &(intf->seq_table[i]),
-					  &timeouts, timeout_period, i,
-					  &flags);
-		spin_unlock_irqrestore(&intf->seq_lock, flags);
+	list_for_each_entry_safe(msg, msg2, &timeouts, link)
+		deliver_err_response(msg, IPMI_TIMEOUT_COMPLETION_CODE);
  
-		list_for_each_entry_safe(msg, msg2, &timeouts, link)
-			deliver_err_response(msg, IPMI_TIMEOUT_COMPLETION_CODE);
-
-		/*
-		 * Maintenance mode handling.  Check the timeout
-		 * optimistically before we claim the lock.  It may
-		 * mean a timeout gets missed occasionally, but that
-		 * only means the timeout gets extended by one period
-		 * in that case.  No big deal, and it avoids the lock
-		 * most of the time.
-		 */
+	/*
+	 * Maintenance mode handling.  Check the timeout
+	 * optimistically before we claim the lock.  It may
+	 * mean a timeout gets missed occasionally, but that
+	 * only means the timeout gets extended by one period
+	 * in that case.  No big deal, and it avoids the lock
+	 * most of the time.
+	 */
+	if (intf->auto_maintenance_timeout > 0) {
+		spin_lock_irqsave(&intf->maintenance_mode_lock, flags);
 		if (intf->auto_maintenance_timeout > 0) {
-			spin_lock_irqsave(&intf->maintenance_mode_lock, flags);
-			if (intf->auto_maintenance_timeout > 0) {
-				intf->auto_maintenance_timeout
-					-= timeout_period;
-				if (!intf->maintenance_mode
-				    && (intf->auto_maintenance_timeout <= 0)) {
-					intf->maintenance_mode_enable = 0;
-					maintenance_mode_update(intf);
-				}
+			intf->auto_maintenance_timeout
+				-= timeout_period;
+			if (!intf->maintenance_mode
+			    && (intf->auto_maintenance_timeout <= 0)) {
+				intf->maintenance_mode_enable = 0;
+				maintenance_mode_update(intf);
 			}
-			spin_unlock_irqrestore(&intf->maintenance_mode_lock,
-					       flags);
 		}
+		spin_unlock_irqrestore(&intf->maintenance_mode_lock,
+				       flags);
 	}
-	rcu_read_unlock();
+
+	tasklet_schedule(&intf->recv_tasklet);
+
+	return waiting_msgs;
 }
  
-static void ipmi_request_event(void)
+static void ipmi_request_event(ipmi_smi_t intf)
 {
-	ipmi_smi_t               intf;
 	struct ipmi_smi_handlers *handlers;
  
-	rcu_read_lock();
-	/*
-	 * Called from the timer, no need to check if handlers is
-	 * valid.
-	 */
-	list_for_each_entry_rcu(intf, &ipmi_interfaces, link) {
-		/* No event requests when in maintenance mode. */
-		if (intf->maintenance_mode_enable)
-			continue;
+	/* No event requests when in maintenance mode. */
+	if (intf->maintenance_mode_enable)
+		return;
  
-		handlers = intf->handlers;
-		if (handlers)
-			handlers->request_events(intf->send_info);
-	}
-	rcu_read_unlock();
+	handlers = intf->handlers;
+	if (handlers)
+		handlers->request_events(intf->send_info);
 }
  
 static struct timer_list ipmi_timer;
  
-/* Call every ~1000 ms. */
-#define IPMI_TIMEOUT_TIME	1000
-
-/* How many jiffies does it take to get to the timeout time. */
-#define IPMI_TIMEOUT_JIFFIES	((IPMI_TIMEOUT_TIME * HZ) / 1000)
-
-/*
- * Request events from the queue every second (this is the number of
- * IPMI_TIMEOUT_TIMES between event requests).  Hopefully, in the
- * future, IPMI will add a way to know immediately if an event is in
- * the queue and this silliness can go away.
- */
-#define IPMI_REQUEST_EV_TIME	(1000 / (IPMI_TIMEOUT_TIME))
-
 static atomic_t stop_operation;
-static unsigned int ticks_to_req_ev = IPMI_REQUEST_EV_TIME;
  
 static void ipmi_timeout(unsigned long data)
 {
+	ipmi_smi_t intf;
+	int nt = 0;
+
 	if (atomic_read(&stop_operation))
 		return;
  
-	ticks_to_req_ev--;
-	if (ticks_to_req_ev == 0) {
-		ipmi_request_event();
-		ticks_to_req_ev = IPMI_REQUEST_EV_TIME;
-	}
+	rcu_read_lock();
+	list_for_each_entry_rcu(intf, &ipmi_interfaces, link) {
+		int lnt = 0;
  
-	ipmi_timeout_handler(IPMI_TIMEOUT_TIME);
+		if (atomic_read(&intf->event_waiters)) {
+			intf->ticks_to_req_ev--;
+			if (intf->ticks_to_req_ev == 0) {
+				ipmi_request_event(intf);
+				intf->ticks_to_req_ev = IPMI_REQUEST_EV_TIME;
+			}
+			lnt++;
+		}
  
-	mod_timer(&ipmi_timer, jiffies + IPMI_TIMEOUT_JIFFIES);
+		lnt += ipmi_timeout_handler(intf, IPMI_TIMEOUT_TIME);
+
+		lnt = !!lnt;
+		if (lnt != intf->last_needs_timer &&
+					intf->handlers->set_need_watch)
+			intf->handlers->set_need_watch(intf->send_info, lnt);
+		intf->last_needs_timer = lnt;
+
+		nt += lnt;
+	}
+	rcu_read_unlock();
+
+	if (nt)
+		mod_timer(&ipmi_timer, jiffies + IPMI_TIMEOUT_JIFFIES);
 }
  
+static void need_waiter(ipmi_smi_t intf)
+{
+	/* Racy, but worst case we start the timer twice. */
+	if (!timer_pending(&ipmi_timer))
+		mod_timer(&ipmi_timer, jiffies + IPMI_TIMEOUT_JIFFIES);
+}
  
 static atomic_t smi_msg_inuse_count = ATOMIC_INIT(0);
 static atomic_t recv_msg_inuse_count = ATOMIC_INIT(0);
@@ -257,6 +257,9 @@
 	/* Used to gracefully stop the timer without race conditions. */
 	atomic_t            stop_operation;
  
+	/* Are we waiting for the events, pretimeouts, received msgs? */
+	atomic_t            need_watch;
+
 	/*
 	 * The driver will disable interrupts when it gets into a
 	 * situation where it cannot handle messages due to lack of
@@ -862,6 +865,19 @@
 	return si_sm_result;
 }
  
+static void check_start_timer_thread(struct smi_info *smi_info)
+{
+	if (smi_info->si_state == SI_NORMAL && smi_info->curr_msg == NULL) {
+		smi_mod_timer(smi_info, jiffies + SI_TIMEOUT_JIFFIES);
+
+		if (smi_info->thread)
+			wake_up_process(smi_info->thread);
+
+		start_next_msg(smi_info);
+		smi_event_handler(smi_info, 0);
+	}
+}
+
 static void sender(void                *send_info,
 		   struct ipmi_smi_msg *msg,
 		   int                 priority)
@@ -915,15 +931,7 @@
 	else
 		list_add_tail(&msg->link, &smi_info->xmit_msgs);
  
-	if (smi_info->si_state == SI_NORMAL && smi_info->curr_msg == NULL) {
-		smi_mod_timer(smi_info, jiffies + SI_TIMEOUT_JIFFIES);
-
-		if (smi_info->thread)
-			wake_up_process(smi_info->thread);
-
-		start_next_msg(smi_info);
-		smi_event_handler(smi_info, 0);
-	}
+	check_start_timer_thread(smi_info);
 	spin_unlock_irqrestore(&smi_info->si_lock, flags);
 }
  
@@ -1023,9 +1031,15 @@
 			; /* do nothing */
 		else if (smi_result == SI_SM_CALL_WITH_DELAY && busy_wait)
 			schedule();
-		else if (smi_result == SI_SM_IDLE)
-			schedule_timeout_interruptible(100);
-		else
+		else if (smi_result == SI_SM_IDLE) {
+			if (atomic_read(&smi_info->need_watch)) {
+				schedule_timeout_interruptible(100);
+			} else {
+				/* Wait to be woken up when we are needed. */
+				__set_current_state(TASK_INTERRUPTIBLE);
+				schedule();
+			}
+		} else
 			schedule_timeout_interruptible(1);
 	}
 	return 0;
@@ -1061,6 +1075,17 @@
 	atomic_set(&smi_info->req_events, 1);
 }
  
+static void set_need_watch(void *send_info, int enable)
+{
+	struct smi_info *smi_info = send_info;
+	unsigned long flags;
+
+	atomic_set(&smi_info->need_watch, enable);
+	spin_lock_irqsave(&smi_info->si_lock, flags);
+	check_start_timer_thread(smi_info);
+	spin_unlock_irqrestore(&smi_info->si_lock, flags);
+}
+
 static int initialized;
  
 static void smi_timeout(unsigned long data)
@@ -1212,6 +1237,7 @@
 	.get_smi_info		= get_smi_info,
 	.sender			= sender,
 	.request_events		= request_events,
+	.set_need_watch		= set_need_watch,
 	.set_maintenance_mode   = set_maintenance_mode,
 	.set_run_to_completion  = set_run_to_completion,
 	.poll			= poll,
@@ -3352,6 +3378,7 @@
  
 	new_smi->interrupt_disabled = 1;
 	atomic_set(&new_smi->stop_operation, 0);
+	atomic_set(&new_smi->need_watch, 0);
 	new_smi->intf_num = smi_num;
 	smi_num++;
  
@@ -237,7 +237,7 @@
  * The first user that sets this to TRUE will receive all events that
  * have been queued while no one was waiting for events.
  */
-int ipmi_set_gets_events(ipmi_user_t user, int val);
+int ipmi_set_gets_events(ipmi_user_t user, bool val);
  
 /*
  * Called when a new SMI is registered.  This will also be called on
@@ -109,6 +109,13 @@
 	   events from the BMC we are attached to. */
 	void (*request_events)(void *send_info);
  
+	/* Called by the upper layer when some user requires that the
+	   interface watch for events, received messages, watchdog
+	   pretimeouts, or not.  Used by the SMI to know if it should
+	   watch for these.  This may be NULL if the SMI does not
+	   implement it. */
+	void (*set_need_watch)(void *send_info, int enable);
+
 	/* Called when the interface should go into "run to
 	   completion" mode.  If this call sets the value to true, the
 	   interface should make sure that all messages are flushed
...	...	@@ -55,6 +55,7 @@
55	55	static int ipmi_init_msghandler(void);
56	56	static void smi_recv_tasklet(unsigned long);
57	57	static void handle_new_recv_msgs(ipmi_smi_t intf);
	58	+static void need_waiter(ipmi_smi_t intf);
58	59
59	60	static int initialized;
60	61
61	62
...	...	@@ -73,7 +74,21 @@
73	74	*/
74	75	#define MAX_MSG_TIMEOUT 60000
75	76
	77	+/* Call every ~1000 ms. */
	78	+#define IPMI_TIMEOUT_TIME 1000
	79	+
	80	+/* How many jiffies does it take to get to the timeout time. */
	81	+#define IPMI_TIMEOUT_JIFFIES ((IPMI_TIMEOUT_TIME * HZ) / 1000)
	82	+
76	83	/*
	84	+ * Request events from the queue every second (this is the number of
	85	+ * IPMI_TIMEOUT_TIMES between event requests). Hopefully, in the
	86	+ * future, IPMI will add a way to know immediately if an event is in
	87	+ * the queue and this silliness can go away.
	88	+ */
	89	+#define IPMI_REQUEST_EV_TIME (1000 / (IPMI_TIMEOUT_TIME))
	90	+
	91	+/*
77	92	* The main "user" data structure.
78	93	*/
79	94	struct ipmi_user {
...	...	@@ -92,7 +107,7 @@
92	107	ipmi_smi_t intf;
93	108
94	109	/* Does this interface receive IPMI events? */
95		- int gets_events;
	110	+ bool gets_events;
96	111	};
97	112
98	113	struct cmd_rcvr {
...	...	@@ -383,6 +398,9 @@
383	398	unsigned int waiting_events_count; /* How many events in queue? */
384	399	char delivering_events;
385	400	char event_msg_printed;
	401	+ atomic_t event_waiters;
	402	+ unsigned int ticks_to_req_ev;
	403	+ int last_needs_timer;
386	404
387	405	/*
388	406	* The event receiver for my BMC, only really used at panic
...	...	@@ -451,7 +469,6 @@
451	469	static LIST_HEAD(smi_watchers);
452	470	static DEFINE_MUTEX(smi_watchers_mutex);
453	471
454		-
455	472	#define ipmi_inc_stat(intf, stat) \
456	473	atomic_inc(&(intf)->stats[IPMI_STAT_ ## stat])
457	474	#define ipmi_get_stat(intf, stat) \
...	...	@@ -772,6 +789,7 @@
772	789	*seq = i;
773	790	*seqid = intf->seq_table[i].seqid;
774	791	intf->curr_seq = (i+1)%IPMI_IPMB_NUM_SEQ;
	792	+ need_waiter(intf);
775	793	} else {
776	794	rv = -EAGAIN;
777	795	}
...	...	@@ -941,7 +959,7 @@
941	959	new_user->handler = handler;
942	960	new_user->handler_data = handler_data;
943	961	new_user->intf = intf;
944		- new_user->gets_events = 0;
	962	+ new_user->gets_events = false;
945	963
946	964	if (!try_module_get(intf->handlers->owner)) {
947	965	rv = -ENODEV;
...	...	@@ -966,6 +984,11 @@
966	984	spin_lock_irqsave(&intf->seq_lock, flags);
967	985	list_add_rcu(&new_user->link, &intf->users);
968	986	spin_unlock_irqrestore(&intf->seq_lock, flags);
	987	+ if (handler->ipmi_watchdog_pretimeout) {
	988	+ /* User wants pretimeouts, so make sure to watch for them. */
	989	+ if (atomic_inc_return(&intf->event_waiters) == 1)
	990	+ need_waiter(intf);
	991	+ }
969	992	*user = new_user;
970	993	return 0;
971	994
...	...	@@ -1021,6 +1044,12 @@
1021	1044
1022	1045	user->valid = 0;
1023	1046
	1047	+ if (user->handler->ipmi_watchdog_pretimeout)
	1048	+ atomic_dec(&intf->event_waiters);
	1049	+
	1050	+ if (user->gets_events)
	1051	+ atomic_dec(&intf->event_waiters);
	1052	+
1024	1053	/* Remove the user from the interface's sequence table. */
1025	1054	spin_lock_irqsave(&intf->seq_lock, flags);
1026	1055	list_del_rcu(&user->link);
...	...	@@ -1184,7 +1213,7 @@
1184	1213	}
1185	1214	EXPORT_SYMBOL(ipmi_set_maintenance_mode);
1186	1215
1187		-int ipmi_set_gets_events(ipmi_user_t user, int val)
	1216	+int ipmi_set_gets_events(ipmi_user_t user, bool val)
1188	1217	{
1189	1218	unsigned long flags;
1190	1219	ipmi_smi_t intf = user->intf;
1191	1220
...	...	@@ -1194,8 +1223,18 @@
1194	1223	INIT_LIST_HEAD(&msgs);
1195	1224
1196	1225	spin_lock_irqsave(&intf->events_lock, flags);
	1226	+ if (user->gets_events == val)
	1227	+ goto out;
	1228	+
1197	1229	user->gets_events = val;
1198	1230
	1231	+ if (val) {
	1232	+ if (atomic_inc_return(&intf->event_waiters) == 1)
	1233	+ need_waiter(intf);
	1234	+ } else {
	1235	+ atomic_dec(&intf->event_waiters);
	1236	+ }
	1237	+
1199	1238	if (intf->delivering_events)
1200	1239	/*
1201	1240	* Another thread is delivering events for this, so
...	...	@@ -1289,6 +1328,9 @@
1289	1328	goto out_unlock;
1290	1329	}
1291	1330
	1331	+ if (atomic_inc_return(&intf->event_waiters) == 1)
	1332	+ need_waiter(intf);
	1333	+
1292	1334	list_add_rcu(&rcvr->link, &intf->cmd_rcvrs);
1293	1335
1294	1336	out_unlock:
...	...	@@ -1330,6 +1372,7 @@
1330	1372	mutex_unlock(&intf->cmd_rcvrs_mutex);
1331	1373	synchronize_rcu();
1332	1374	while (rcvrs) {
	1375	+ atomic_dec(&intf->event_waiters);
1333	1376	rcvr = rcvrs;
1334	1377	rcvrs = rcvr->next;
1335	1378	kfree(rcvr);
...	...	@@ -2876,6 +2919,8 @@
2876	2919	(unsigned long) intf);
2877	2920	atomic_set(&intf->watchdog_pretimeouts_to_deliver, 0);
2878	2921	spin_lock_init(&intf->events_lock);
	2922	+ atomic_set(&intf->event_waiters, 0);
	2923	+ intf->ticks_to_req_ev = IPMI_REQUEST_EV_TIME;
2879	2924	INIT_LIST_HEAD(&intf->waiting_events);
2880	2925	intf->waiting_events_count = 0;
2881	2926	mutex_init(&intf->cmd_rcvrs_mutex);
...	...	@@ -3965,7 +4010,8 @@
3965	4010
3966	4011	static void check_msg_timeout(ipmi_smi_t intf, struct seq_table *ent,
3967	4012	struct list_head *timeouts, long timeout_period,
3968		- int slot, unsigned long *flags)
	4013	+ int slot, unsigned long *flags,
	4014	+ unsigned int *waiting_msgs)
3969	4015	{
3970	4016	struct ipmi_recv_msg *msg;
3971	4017	struct ipmi_smi_handlers *handlers;
3972	4018
...	...	@@ -3977,8 +4023,10 @@
3977	4023	return;
3978	4024
3979	4025	ent->timeout -= timeout_period;
3980		- if (ent->timeout > 0)
	4026	+ if (ent->timeout > 0) {
	4027	+ (*waiting_msgs)++;
3981	4028	return;
	4029	+ }
3982	4030
3983	4031	if (ent->retries_left == 0) {
3984	4032	/* The message has used all its retries. */
...	...	@@ -3995,6 +4043,8 @@
3995	4043	struct ipmi_smi_msg *smi_msg;
3996	4044	/* More retries, send again. */
3997	4045
	4046	+ (*waiting_msgs)++;
	4047	+
3998	4048	/*
3999	4049	* Start with the max timer, set to normal timer after
4000	4050	* the message is sent.
4001	4051
4002	4052
4003	4053
4004	4054
4005	4055
4006	4056
4007	4057
4008	4058
4009	4059
4010	4060
4011	4061
4012	4062
4013	4063
4014	4064
4015	4065
4016	4066
4017	4067
4018	4068
4019	4069
4020	4070
...	...	@@ -4040,117 +4090,118 @@
4040	4090	}
4041	4091	}
4042	4092
4043		-static void ipmi_timeout_handler(long timeout_period)
	4093	+static unsigned int ipmi_timeout_handler(ipmi_smi_t intf, long timeout_period)
4044	4094	{
4045		- ipmi_smi_t intf;
4046	4095	struct list_head timeouts;
4047	4096	struct ipmi_recv_msg msg, msg2;
4048	4097	unsigned long flags;
4049	4098	int i;
	4099	+ unsigned int waiting_msgs = 0;
4050	4100
4051		- rcu_read_lock();
4052		- list_for_each_entry_rcu(intf, &ipmi_interfaces, link) {
4053		- tasklet_schedule(&intf->recv_tasklet);
	4101	+ /*
	4102	+ * Go through the seq table and find any messages that
	4103	+ * have timed out, putting them in the timeouts
	4104	+ * list.
	4105	+ */
	4106	+ INIT_LIST_HEAD(&timeouts);
	4107	+ spin_lock_irqsave(&intf->seq_lock, flags);
	4108	+ for (i = 0; i < IPMI_IPMB_NUM_SEQ; i++)
	4109	+ check_msg_timeout(intf, &(intf->seq_table[i]),
	4110	+ &timeouts, timeout_period, i,
	4111	+ &flags, &waiting_msgs);
	4112	+ spin_unlock_irqrestore(&intf->seq_lock, flags);
4054	4113
4055		- /*
4056		- * Go through the seq table and find any messages that
4057		- * have timed out, putting them in the timeouts
4058		- * list.
4059		- */
4060		- INIT_LIST_HEAD(&timeouts);
4061		- spin_lock_irqsave(&intf->seq_lock, flags);
4062		- for (i = 0; i < IPMI_IPMB_NUM_SEQ; i++)
4063		- check_msg_timeout(intf, &(intf->seq_table[i]),
4064		- &timeouts, timeout_period, i,
4065		- &flags);
4066		- spin_unlock_irqrestore(&intf->seq_lock, flags);
	4114	+ list_for_each_entry_safe(msg, msg2, &timeouts, link)
	4115	+ deliver_err_response(msg, IPMI_TIMEOUT_COMPLETION_CODE);
4067	4116
4068		- list_for_each_entry_safe(msg, msg2, &timeouts, link)
4069		- deliver_err_response(msg, IPMI_TIMEOUT_COMPLETION_CODE);
4070		-
4071		- /*
4072		- * Maintenance mode handling. Check the timeout
4073		- * optimistically before we claim the lock. It may
4074		- * mean a timeout gets missed occasionally, but that
4075		- * only means the timeout gets extended by one period
4076		- * in that case. No big deal, and it avoids the lock
4077		- * most of the time.
4078		- */
	4117	+ /*
	4118	+ * Maintenance mode handling. Check the timeout
	4119	+ * optimistically before we claim the lock. It may
	4120	+ * mean a timeout gets missed occasionally, but that
	4121	+ * only means the timeout gets extended by one period
	4122	+ * in that case. No big deal, and it avoids the lock
	4123	+ * most of the time.
	4124	+ */
	4125	+ if (intf->auto_maintenance_timeout > 0) {
	4126	+ spin_lock_irqsave(&intf->maintenance_mode_lock, flags);
4079	4127	if (intf->auto_maintenance_timeout > 0) {
4080		- spin_lock_irqsave(&intf->maintenance_mode_lock, flags);
4081		- if (intf->auto_maintenance_timeout > 0) {
4082		- intf->auto_maintenance_timeout
4083		- -= timeout_period;
4084		- if (!intf->maintenance_mode
4085		- && (intf->auto_maintenance_timeout <= 0)) {
4086		- intf->maintenance_mode_enable = 0;
4087		- maintenance_mode_update(intf);
4088		- }
	4128	+ intf->auto_maintenance_timeout
	4129	+ -= timeout_period;
	4130	+ if (!intf->maintenance_mode
	4131	+ && (intf->auto_maintenance_timeout <= 0)) {
	4132	+ intf->maintenance_mode_enable = 0;
	4133	+ maintenance_mode_update(intf);
4089	4134	}
4090		- spin_unlock_irqrestore(&intf->maintenance_mode_lock,
4091		- flags);
4092	4135	}
	4136	+ spin_unlock_irqrestore(&intf->maintenance_mode_lock,
	4137	+ flags);
4093	4138	}
4094		- rcu_read_unlock();
	4139	+
	4140	+ tasklet_schedule(&intf->recv_tasklet);
	4141	+
	4142	+ return waiting_msgs;
4095	4143	}
4096	4144
4097		-static void ipmi_request_event(void)
	4145	+static void ipmi_request_event(ipmi_smi_t intf)
4098	4146	{
4099		- ipmi_smi_t intf;
4100	4147	struct ipmi_smi_handlers *handlers;
4101	4148
4102		- rcu_read_lock();
4103		- /*
4104		- * Called from the timer, no need to check if handlers is
4105		- * valid.
4106		- */
4107		- list_for_each_entry_rcu(intf, &ipmi_interfaces, link) {
4108		- /* No event requests when in maintenance mode. */
4109		- if (intf->maintenance_mode_enable)
4110		- continue;
	4149	+ /* No event requests when in maintenance mode. */
	4150	+ if (intf->maintenance_mode_enable)
	4151	+ return;
4111	4152
4112		- handlers = intf->handlers;
4113		- if (handlers)
4114		- handlers->request_events(intf->send_info);
4115		- }
4116		- rcu_read_unlock();
	4153	+ handlers = intf->handlers;
	4154	+ if (handlers)
	4155	+ handlers->request_events(intf->send_info);
4117	4156	}
4118	4157
4119	4158	static struct timer_list ipmi_timer;
4120	4159
4121		-/* Call every ~1000 ms. */
4122		-#define IPMI_TIMEOUT_TIME 1000
4123		-
4124		-/* How many jiffies does it take to get to the timeout time. */
4125		-#define IPMI_TIMEOUT_JIFFIES ((IPMI_TIMEOUT_TIME * HZ) / 1000)
4126		-
4127		-/*
4128		- * Request events from the queue every second (this is the number of
4129		- * IPMI_TIMEOUT_TIMES between event requests). Hopefully, in the
4130		- * future, IPMI will add a way to know immediately if an event is in
4131		- * the queue and this silliness can go away.
4132		- */
4133		-#define IPMI_REQUEST_EV_TIME (1000 / (IPMI_TIMEOUT_TIME))
4134		-
4135	4160	static atomic_t stop_operation;
4136		-static unsigned int ticks_to_req_ev = IPMI_REQUEST_EV_TIME;
4137	4161
4138	4162	static void ipmi_timeout(unsigned long data)
4139	4163	{
	4164	+ ipmi_smi_t intf;
	4165	+ int nt = 0;
	4166	+
4140	4167	if (atomic_read(&stop_operation))
4141	4168	return;
4142	4169
4143		- ticks_to_req_ev--;
4144		- if (ticks_to_req_ev == 0) {
4145		- ipmi_request_event();
4146		- ticks_to_req_ev = IPMI_REQUEST_EV_TIME;
4147		- }
	4170	+ rcu_read_lock();
	4171	+ list_for_each_entry_rcu(intf, &ipmi_interfaces, link) {
	4172	+ int lnt = 0;
4148	4173
4149		- ipmi_timeout_handler(IPMI_TIMEOUT_TIME);
	4174	+ if (atomic_read(&intf->event_waiters)) {
	4175	+ intf->ticks_to_req_ev--;
	4176	+ if (intf->ticks_to_req_ev == 0) {
	4177	+ ipmi_request_event(intf);
	4178	+ intf->ticks_to_req_ev = IPMI_REQUEST_EV_TIME;
	4179	+ }
	4180	+ lnt++;
	4181	+ }
4150	4182
4151		- mod_timer(&ipmi_timer, jiffies + IPMI_TIMEOUT_JIFFIES);
	4183	+ lnt += ipmi_timeout_handler(intf, IPMI_TIMEOUT_TIME);
	4184	+
	4185	+ lnt = !!lnt;
	4186	+ if (lnt != intf->last_needs_timer &&
	4187	+ intf->handlers->set_need_watch)
	4188	+ intf->handlers->set_need_watch(intf->send_info, lnt);
	4189	+ intf->last_needs_timer = lnt;
	4190	+
	4191	+ nt += lnt;
	4192	+ }
	4193	+ rcu_read_unlock();
	4194	+
	4195	+ if (nt)
	4196	+ mod_timer(&ipmi_timer, jiffies + IPMI_TIMEOUT_JIFFIES);
4152	4197	}
4153	4198
	4199	+static void need_waiter(ipmi_smi_t intf)
	4200	+{
	4201	+ /* Racy, but worst case we start the timer twice. */
	4202	+ if (!timer_pending(&ipmi_timer))
	4203	+ mod_timer(&ipmi_timer, jiffies + IPMI_TIMEOUT_JIFFIES);
	4204	+}
4154	4205
4155	4206	static atomic_t smi_msg_inuse_count = ATOMIC_INIT(0);
4156	4207	static atomic_t recv_msg_inuse_count = ATOMIC_INIT(0);
...	...	@@ -257,6 +257,9 @@
257	257	/* Used to gracefully stop the timer without race conditions. */
258	258	atomic_t stop_operation;
259	259
	260	+ /* Are we waiting for the events, pretimeouts, received msgs? */
	261	+ atomic_t need_watch;
	262	+
260	263	/*
261	264	* The driver will disable interrupts when it gets into a
262	265	* situation where it cannot handle messages due to lack of
...	...	@@ -862,6 +865,19 @@
862	865	return si_sm_result;
863	866	}
864	867
	868	+static void check_start_timer_thread(struct smi_info *smi_info)
	869	+{
	870	+ if (smi_info->si_state == SI_NORMAL && smi_info->curr_msg == NULL) {
	871	+ smi_mod_timer(smi_info, jiffies + SI_TIMEOUT_JIFFIES);
	872	+
	873	+ if (smi_info->thread)
	874	+ wake_up_process(smi_info->thread);
	875	+
	876	+ start_next_msg(smi_info);
	877	+ smi_event_handler(smi_info, 0);
	878	+ }
	879	+}
	880	+
865	881	static void sender(void *send_info,
866	882	struct ipmi_smi_msg *msg,
867	883	int priority)
...	...	@@ -915,15 +931,7 @@
915	931	else
916	932	list_add_tail(&msg->link, &smi_info->xmit_msgs);
917	933
918		- if (smi_info->si_state == SI_NORMAL && smi_info->curr_msg == NULL) {
919		- smi_mod_timer(smi_info, jiffies + SI_TIMEOUT_JIFFIES);
920		-
921		- if (smi_info->thread)
922		- wake_up_process(smi_info->thread);
923		-
924		- start_next_msg(smi_info);
925		- smi_event_handler(smi_info, 0);
926		- }
	934	+ check_start_timer_thread(smi_info);
927	935	spin_unlock_irqrestore(&smi_info->si_lock, flags);
928	936	}
929	937
...	...	@@ -1023,9 +1031,15 @@
1023	1031	; /* do nothing */
1024	1032	else if (smi_result == SI_SM_CALL_WITH_DELAY && busy_wait)
1025	1033	schedule();
1026		- else if (smi_result == SI_SM_IDLE)
1027		- schedule_timeout_interruptible(100);
1028		- else
	1034	+ else if (smi_result == SI_SM_IDLE) {
	1035	+ if (atomic_read(&smi_info->need_watch)) {
	1036	+ schedule_timeout_interruptible(100);
	1037	+ } else {
	1038	+ /* Wait to be woken up when we are needed. */
	1039	+ __set_current_state(TASK_INTERRUPTIBLE);
	1040	+ schedule();
	1041	+ }
	1042	+ } else
1029	1043	schedule_timeout_interruptible(1);
1030	1044	}
1031	1045	return 0;
...	...	@@ -1061,6 +1075,17 @@
1061	1075	atomic_set(&smi_info->req_events, 1);
1062	1076	}
1063	1077
	1078	+static void set_need_watch(void *send_info, int enable)
	1079	+{
	1080	+ struct smi_info *smi_info = send_info;
	1081	+ unsigned long flags;
	1082	+
	1083	+ atomic_set(&smi_info->need_watch, enable);
	1084	+ spin_lock_irqsave(&smi_info->si_lock, flags);
	1085	+ check_start_timer_thread(smi_info);
	1086	+ spin_unlock_irqrestore(&smi_info->si_lock, flags);
	1087	+}
	1088	+
1064	1089	static int initialized;
1065	1090
1066	1091	static void smi_timeout(unsigned long data)
...	...	@@ -1212,6 +1237,7 @@
1212	1237	.get_smi_info = get_smi_info,
1213	1238	.sender = sender,
1214	1239	.request_events = request_events,
	1240	+ .set_need_watch = set_need_watch,
1215	1241	.set_maintenance_mode = set_maintenance_mode,
1216	1242	.set_run_to_completion = set_run_to_completion,
1217	1243	.poll = poll,
...	...	@@ -3352,6 +3378,7 @@
3352	3378
3353	3379	new_smi->interrupt_disabled = 1;
3354	3380	atomic_set(&new_smi->stop_operation, 0);
	3381	+ atomic_set(&new_smi->need_watch, 0);
3355	3382	new_smi->intf_num = smi_num;
3356	3383	smi_num++;
3357	3384
...	...	@@ -237,7 +237,7 @@
237	237	* The first user that sets this to TRUE will receive all events that
238	238	* have been queued while no one was waiting for events.
239	239	*/
240		-int ipmi_set_gets_events(ipmi_user_t user, int val);
	240	+int ipmi_set_gets_events(ipmi_user_t user, bool val);
241	241
242	242	/*
243	243	* Called when a new SMI is registered. This will also be called on
...	...	@@ -109,6 +109,13 @@
109	109	events from the BMC we are attached to. */
110	110	void (request_events)(void send_info);
111	111
	112	+ /* Called by the upper layer when some user requires that the
	113	+ interface watch for events, received messages, watchdog
	114	+ pretimeouts, or not. Used by the SMI to know if it should
	115	+ watch for these. This may be NULL if the SMI does not
	116	+ implement it. */
	117	+ void (set_need_watch)(void send_info, int enable);
	118	+
112	119	/* Called when the interface should go into "run to
113	120	completion" mode. If this call sets the value to true, the
114	121	interface should make sure that all messages are flushed