Commit aae4518b3124b29f8dc81c829c704fd2df72e98b

Authored by Rafael J. Wysocki
1 parent f6514be5fe

PM / sleep: Mechanism to avoid resuming runtime-suspended devices unnecessarily

Currently, some subsystems (e.g. PCI and the ACPI PM domain) have to
resume all runtime-suspended devices during system suspend, mostly
because those devices may need to be reprogrammed due to different
wakeup settings for system sleep and for runtime PM.

For some devices, though, it's OK to remain in runtime suspend
throughout a complete system suspend/resume cycle (if the device was in
runtime suspend at the start of the cycle).  We would like to do this
whenever possible, to avoid the overhead of extra power-up and power-down
events.

However, problems may arise because the device's descendants may require
it to be at full power at various points during the cycle.  Therefore the
most straightforward way to do this safely is if the device and all its
descendants can remain runtime suspended until the complete stage of
system resume.

To this end, introduce a new device PM flag, power.direct_complete
and modify the PM core to use that flag as follows.

If the ->prepare() callback of a device returns a positive number,
the PM core will regard that as an indication that it may leave the
device runtime-suspended.  It will then check if the system power
transition in progress is a suspend (and not hibernation in particular)
and if the device is, indeed, runtime-suspended.  In that case, the PM
core will set the device's power.direct_complete flag.  Otherwise it
will clear power.direct_complete for the device and it also will later
clear it for the device's parent (if there's one).

Next, the PM core will not invoke the ->suspend() ->suspend_late(),
->suspend_irq(), ->resume_irq(), ->resume_early(), or ->resume()
callbacks for all devices having power.direct_complete set.  It
will invoke their ->complete() callbacks, however, and those
callbacks are then responsible for resuming the devices as
appropriate, if necessary.  For example, in some cases they may
need to queue up runtime resume requests for the devices using
pm_request_resume().

Changelog partly based on an Alan Stern's description of the idea
(http://marc.info/?l=linux-pm&m=139940466625569&w=2).

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Alan Stern <stern@rowland.harvard.edu>

Showing 3 changed files with 85 additions and 23 deletions Side-by-side Diff

drivers/base/power/main.c
... ... @@ -479,7 +479,7 @@
479 479 TRACE_DEVICE(dev);
480 480 TRACE_RESUME(0);
481 481  
482   - if (dev->power.syscore)
  482 + if (dev->power.syscore || dev->power.direct_complete)
483 483 goto Out;
484 484  
485 485 if (!dev->power.is_noirq_suspended)
... ... @@ -605,7 +605,7 @@
605 605 TRACE_DEVICE(dev);
606 606 TRACE_RESUME(0);
607 607  
608   - if (dev->power.syscore)
  608 + if (dev->power.syscore || dev->power.direct_complete)
609 609 goto Out;
610 610  
611 611 if (!dev->power.is_late_suspended)
... ... @@ -735,6 +735,12 @@
735 735 if (dev->power.syscore)
736 736 goto Complete;
737 737  
  738 + if (dev->power.direct_complete) {
  739 + /* Match the pm_runtime_disable() in __device_suspend(). */
  740 + pm_runtime_enable(dev);
  741 + goto Complete;
  742 + }
  743 +
738 744 dpm_wait(dev->parent, async);
739 745 dpm_watchdog_set(&wd, dev);
740 746 device_lock(dev);
... ... @@ -1007,7 +1013,7 @@
1007 1013 goto Complete;
1008 1014 }
1009 1015  
1010   - if (dev->power.syscore)
  1016 + if (dev->power.syscore || dev->power.direct_complete)
1011 1017 goto Complete;
1012 1018  
1013 1019 dpm_wait_for_children(dev, async);
... ... @@ -1146,7 +1152,7 @@
1146 1152 goto Complete;
1147 1153 }
1148 1154  
1149   - if (dev->power.syscore)
  1155 + if (dev->power.syscore || dev->power.direct_complete)
1150 1156 goto Complete;
1151 1157  
1152 1158 dpm_wait_for_children(dev, async);
... ... @@ -1332,6 +1338,17 @@
1332 1338 if (dev->power.syscore)
1333 1339 goto Complete;
1334 1340  
  1341 + if (dev->power.direct_complete) {
  1342 + if (pm_runtime_status_suspended(dev)) {
  1343 + pm_runtime_disable(dev);
  1344 + if (pm_runtime_suspended_if_enabled(dev))
  1345 + goto Complete;
  1346 +
  1347 + pm_runtime_enable(dev);
  1348 + }
  1349 + dev->power.direct_complete = false;
  1350 + }
  1351 +
1335 1352 dpm_watchdog_set(&wd, dev);
1336 1353 device_lock(dev);
1337 1354  
1338 1355  
... ... @@ -1382,10 +1399,19 @@
1382 1399  
1383 1400 End:
1384 1401 if (!error) {
  1402 + struct device *parent = dev->parent;
  1403 +
1385 1404 dev->power.is_suspended = true;
1386   - if (dev->power.wakeup_path
1387   - && dev->parent && !dev->parent->power.ignore_children)
1388   - dev->parent->power.wakeup_path = true;
  1405 + if (parent) {
  1406 + spin_lock_irq(&parent->power.lock);
  1407 +
  1408 + dev->parent->power.direct_complete = false;
  1409 + if (dev->power.wakeup_path
  1410 + && !dev->parent->power.ignore_children)
  1411 + dev->parent->power.wakeup_path = true;
  1412 +
  1413 + spin_unlock_irq(&parent->power.lock);
  1414 + }
1389 1415 }
1390 1416  
1391 1417 device_unlock(dev);
... ... @@ -1487,7 +1513,7 @@
1487 1513 {
1488 1514 int (*callback)(struct device *) = NULL;
1489 1515 char *info = NULL;
1490   - int error = 0;
  1516 + int ret = 0;
1491 1517  
1492 1518 if (dev->power.syscore)
1493 1519 return 0;
1494 1520  
1495 1521  
... ... @@ -1523,17 +1549,27 @@
1523 1549 callback = dev->driver->pm->prepare;
1524 1550 }
1525 1551  
1526   - if (callback) {
1527   - error = callback(dev);
1528   - suspend_report_result(callback, error);
1529   - }
  1552 + if (callback)
  1553 + ret = callback(dev);
1530 1554  
1531 1555 device_unlock(dev);
1532 1556  
1533   - if (error)
  1557 + if (ret < 0) {
  1558 + suspend_report_result(callback, ret);
1534 1559 pm_runtime_put(dev);
1535   -
1536   - return error;
  1560 + return ret;
  1561 + }
  1562 + /*
  1563 + * A positive return value from ->prepare() means "this device appears
  1564 + * to be runtime-suspended and its state is fine, so if it really is
  1565 + * runtime-suspended, you can leave it in that state provided that you
  1566 + * will do the same thing with all of its descendants". This only
  1567 + * applies to suspend transitions, however.
  1568 + */
  1569 + spin_lock_irq(&dev->power.lock);
  1570 + dev->power.direct_complete = ret > 0 && state.event == PM_EVENT_SUSPEND;
  1571 + spin_unlock_irq(&dev->power.lock);
  1572 + return 0;
1537 1573 }
1538 1574  
1539 1575 /**
... ... @@ -93,13 +93,23 @@
93 93 * been registered) to recover from the race condition.
94 94 * This method is executed for all kinds of suspend transitions and is
95 95 * followed by one of the suspend callbacks: @suspend(), @freeze(), or
96   - * @poweroff(). The PM core executes subsystem-level @prepare() for all
97   - * devices before starting to invoke suspend callbacks for any of them, so
98   - * generally devices may be assumed to be functional or to respond to
99   - * runtime resume requests while @prepare() is being executed. However,
100   - * device drivers may NOT assume anything about the availability of user
101   - * space at that time and it is NOT valid to request firmware from within
102   - * @prepare() (it's too late to do that). It also is NOT valid to allocate
  96 + * @poweroff(). If the transition is a suspend to memory or standby (that
  97 + * is, not related to hibernation), the return value of @prepare() may be
  98 + * used to indicate to the PM core to leave the device in runtime suspend
  99 + * if applicable. Namely, if @prepare() returns a positive number, the PM
  100 + * core will understand that as a declaration that the device appears to be
  101 + * runtime-suspended and it may be left in that state during the entire
  102 + * transition and during the subsequent resume if all of its descendants
  103 + * are left in runtime suspend too. If that happens, @complete() will be
  104 + * executed directly after @prepare() and it must ensure the proper
  105 + * functioning of the device after the system resume.
  106 + * The PM core executes subsystem-level @prepare() for all devices before
  107 + * starting to invoke suspend callbacks for any of them, so generally
  108 + * devices may be assumed to be functional or to respond to runtime resume
  109 + * requests while @prepare() is being executed. However, device drivers
  110 + * may NOT assume anything about the availability of user space at that
  111 + * time and it is NOT valid to request firmware from within @prepare()
  112 + * (it's too late to do that). It also is NOT valid to allocate
103 113 * substantial amounts of memory from @prepare() in the GFP_KERNEL mode.
104 114 * [To work around these limitations, drivers may register suspend and
105 115 * hibernation notifiers to be executed before the freezing of tasks.]
... ... @@ -112,7 +122,16 @@
112 122 * of the other devices that the PM core has unsuccessfully attempted to
113 123 * suspend earlier).
114 124 * The PM core executes subsystem-level @complete() after it has executed
115   - * the appropriate resume callbacks for all devices.
  125 + * the appropriate resume callbacks for all devices. If the corresponding
  126 + * @prepare() at the beginning of the suspend transition returned a
  127 + * positive number and the device was left in runtime suspend (without
  128 + * executing any suspend and resume callbacks for it), @complete() will be
  129 + * the only callback executed for the device during resume. In that case,
  130 + * @complete() must be prepared to do whatever is necessary to ensure the
  131 + * proper functioning of the device after the system resume. To this end,
  132 + * @complete() can check the power.direct_complete flag of the device to
  133 + * learn whether (unset) or not (set) the previous suspend and resume
  134 + * callbacks have been executed for it.
116 135 *
117 136 * @suspend: Executed before putting the system into a sleep state in which the
118 137 * contents of main memory are preserved. The exact action to perform
... ... @@ -546,6 +565,7 @@
546 565 bool is_late_suspended:1;
547 566 bool ignore_children:1;
548 567 bool early_init:1; /* Owned by the PM core */
  568 + bool direct_complete:1; /* Owned by the PM core */
549 569 spinlock_t lock;
550 570 #ifdef CONFIG_PM_SLEEP
551 571 struct list_head entry;
include/linux/pm_runtime.h
... ... @@ -101,6 +101,11 @@
101 101 return dev->power.runtime_status == RPM_SUSPENDED;
102 102 }
103 103  
  104 +static inline bool pm_runtime_suspended_if_enabled(struct device *dev)
  105 +{
  106 + return pm_runtime_status_suspended(dev) && dev->power.disable_depth == 1;
  107 +}
  108 +
104 109 static inline bool pm_runtime_enabled(struct device *dev)
105 110 {
106 111 return !dev->power.disable_depth;
... ... @@ -150,6 +155,7 @@
150 155 static inline bool pm_runtime_suspended(struct device *dev) { return false; }
151 156 static inline bool pm_runtime_active(struct device *dev) { return true; }
152 157 static inline bool pm_runtime_status_suspended(struct device *dev) { return false; }
  158 +static inline bool pm_runtime_suspended_if_enabled(struct device *dev) { return false; }
153 159 static inline bool pm_runtime_enabled(struct device *dev) { return false; }
154 160  
155 161 static inline void pm_runtime_no_callbacks(struct device *dev) {}