Commit 73c101011926c5832e6e141682180c4debe2cf45

Authored by Jens Axboe
1 parent a488e74976

block: initial patch for on-stack per-task plugging

This patch adds support for creating a queuing context outside
of the queue itself. This enables us to batch up pieces of IO
before grabbing the block device queue lock and submitting them to
the IO scheduler.

The context is created on the stack of the process and assigned in
the task structure, so that we can auto-unplug it if we hit a schedule
event.

The current queue plugging happens implicitly if IO is submitted to
an empty device, yet callers have to remember to unplug that IO when
they are going to wait for it. This is an ugly API and has caused bugs
in the past. Additionally, it requires hacks in the vm (->sync_page()
callback) to handle that logic. By switching to an explicit plugging
scheme we make the API a lot nicer and can get rid of the ->sync_page()
hack in the vm.

Signed-off-by: Jens Axboe <jaxboe@fusionio.com>

Showing 10 changed files with 344 additions and 101 deletions Side-by-side Diff

... ... @@ -27,6 +27,7 @@
27 27 #include <linux/writeback.h>
28 28 #include <linux/task_io_accounting_ops.h>
29 29 #include <linux/fault-inject.h>
  30 +#include <linux/list_sort.h>
30 31  
31 32 #define CREATE_TRACE_POINTS
32 33 #include <trace/events/block.h>
... ... @@ -203,7 +204,7 @@
203 204  
204 205 q = container_of(work, struct request_queue, delay_work.work);
205 206 spin_lock_irq(q->queue_lock);
206   - q->request_fn(q);
  207 + __blk_run_queue(q);
207 208 spin_unlock_irq(q->queue_lock);
208 209 }
209 210  
... ... @@ -686,6 +687,8 @@
686 687  
687 688 static inline void blk_free_request(struct request_queue *q, struct request *rq)
688 689 {
  690 + BUG_ON(rq->cmd_flags & REQ_ON_PLUG);
  691 +
689 692 if (rq->cmd_flags & REQ_ELVPRIV)
690 693 elv_put_request(q, rq);
691 694 mempool_free(rq, q->rq.rq_pool);
... ... @@ -1051,6 +1054,13 @@
1051 1054 }
1052 1055 EXPORT_SYMBOL(blk_requeue_request);
1053 1056  
  1057 +static void add_acct_request(struct request_queue *q, struct request *rq,
  1058 + int where)
  1059 +{
  1060 + drive_stat_acct(rq, 1);
  1061 + __elv_add_request(q, rq, where, 0);
  1062 +}
  1063 +
1054 1064 /**
1055 1065 * blk_insert_request - insert a special request into a request queue
1056 1066 * @q: request queue where request should be inserted
... ... @@ -1093,8 +1103,7 @@
1093 1103 if (blk_rq_tagged(rq))
1094 1104 blk_queue_end_tag(q, rq);
1095 1105  
1096   - drive_stat_acct(rq, 1);
1097   - __elv_add_request(q, rq, where, 0);
  1106 + add_acct_request(q, rq, where);
1098 1107 __blk_run_queue(q);
1099 1108 spin_unlock_irqrestore(q->queue_lock, flags);
1100 1109 }
... ... @@ -1215,6 +1224,113 @@
1215 1224 }
1216 1225 EXPORT_SYMBOL_GPL(blk_add_request_payload);
1217 1226  
  1227 +static bool bio_attempt_back_merge(struct request_queue *q, struct request *req,
  1228 + struct bio *bio)
  1229 +{
  1230 + const int ff = bio->bi_rw & REQ_FAILFAST_MASK;
  1231 +
  1232 + /*
  1233 + * Debug stuff, kill later
  1234 + */
  1235 + if (!rq_mergeable(req)) {
  1236 + blk_dump_rq_flags(req, "back");
  1237 + return false;
  1238 + }
  1239 +
  1240 + if (!ll_back_merge_fn(q, req, bio))
  1241 + return false;
  1242 +
  1243 + trace_block_bio_backmerge(q, bio);
  1244 +
  1245 + if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
  1246 + blk_rq_set_mixed_merge(req);
  1247 +
  1248 + req->biotail->bi_next = bio;
  1249 + req->biotail = bio;
  1250 + req->__data_len += bio->bi_size;
  1251 + req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
  1252 +
  1253 + drive_stat_acct(req, 0);
  1254 + return true;
  1255 +}
  1256 +
  1257 +static bool bio_attempt_front_merge(struct request_queue *q,
  1258 + struct request *req, struct bio *bio)
  1259 +{
  1260 + const int ff = bio->bi_rw & REQ_FAILFAST_MASK;
  1261 + sector_t sector;
  1262 +
  1263 + /*
  1264 + * Debug stuff, kill later
  1265 + */
  1266 + if (!rq_mergeable(req)) {
  1267 + blk_dump_rq_flags(req, "front");
  1268 + return false;
  1269 + }
  1270 +
  1271 + if (!ll_front_merge_fn(q, req, bio))
  1272 + return false;
  1273 +
  1274 + trace_block_bio_frontmerge(q, bio);
  1275 +
  1276 + if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
  1277 + blk_rq_set_mixed_merge(req);
  1278 +
  1279 + sector = bio->bi_sector;
  1280 +
  1281 + bio->bi_next = req->bio;
  1282 + req->bio = bio;
  1283 +
  1284 + /*
  1285 + * may not be valid. if the low level driver said
  1286 + * it didn't need a bounce buffer then it better
  1287 + * not touch req->buffer either...
  1288 + */
  1289 + req->buffer = bio_data(bio);
  1290 + req->__sector = bio->bi_sector;
  1291 + req->__data_len += bio->bi_size;
  1292 + req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
  1293 +
  1294 + drive_stat_acct(req, 0);
  1295 + return true;
  1296 +}
  1297 +
  1298 +/*
  1299 + * Attempts to merge with the plugged list in the current process. Returns
  1300 + * true if merge was succesful, otherwise false.
  1301 + */
  1302 +static bool attempt_plug_merge(struct task_struct *tsk, struct request_queue *q,
  1303 + struct bio *bio)
  1304 +{
  1305 + struct blk_plug *plug;
  1306 + struct request *rq;
  1307 + bool ret = false;
  1308 +
  1309 + plug = tsk->plug;
  1310 + if (!plug)
  1311 + goto out;
  1312 +
  1313 + list_for_each_entry_reverse(rq, &plug->list, queuelist) {
  1314 + int el_ret;
  1315 +
  1316 + if (rq->q != q)
  1317 + continue;
  1318 +
  1319 + el_ret = elv_try_merge(rq, bio);
  1320 + if (el_ret == ELEVATOR_BACK_MERGE) {
  1321 + ret = bio_attempt_back_merge(q, rq, bio);
  1322 + if (ret)
  1323 + break;
  1324 + } else if (el_ret == ELEVATOR_FRONT_MERGE) {
  1325 + ret = bio_attempt_front_merge(q, rq, bio);
  1326 + if (ret)
  1327 + break;
  1328 + }
  1329 + }
  1330 +out:
  1331 + return ret;
  1332 +}
  1333 +
1218 1334 void init_request_from_bio(struct request *req, struct bio *bio)
1219 1335 {
1220 1336 req->cpu = bio->bi_comp_cpu;
1221 1337  
1222 1338  
... ... @@ -1230,26 +1346,12 @@
1230 1346 blk_rq_bio_prep(req->q, req, bio);
1231 1347 }
1232 1348  
1233   -/*
1234   - * Only disabling plugging for non-rotational devices if it does tagging
1235   - * as well, otherwise we do need the proper merging
1236   - */
1237   -static inline bool queue_should_plug(struct request_queue *q)
1238   -{
1239   - return !(blk_queue_nonrot(q) && blk_queue_tagged(q));
1240   -}
1241   -
1242 1349 static int __make_request(struct request_queue *q, struct bio *bio)
1243 1350 {
1244   - struct request *req;
1245   - int el_ret;
1246   - unsigned int bytes = bio->bi_size;
1247   - const unsigned short prio = bio_prio(bio);
1248 1351 const bool sync = !!(bio->bi_rw & REQ_SYNC);
1249   - const bool unplug = !!(bio->bi_rw & REQ_UNPLUG);
1250   - const unsigned long ff = bio->bi_rw & REQ_FAILFAST_MASK;
1251   - int where = ELEVATOR_INSERT_SORT;
1252   - int rw_flags;
  1352 + struct blk_plug *plug;
  1353 + int el_ret, rw_flags, where = ELEVATOR_INSERT_SORT;
  1354 + struct request *req;
1253 1355  
1254 1356 /*
1255 1357 * low level driver can indicate that it wants pages above a
1256 1358  
1257 1359  
1258 1360  
1259 1361  
1260 1362  
... ... @@ -1258,78 +1360,36 @@
1258 1360 */
1259 1361 blk_queue_bounce(q, &bio);
1260 1362  
1261   - spin_lock_irq(q->queue_lock);
1262   -
1263 1363 if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) {
  1364 + spin_lock_irq(q->queue_lock);
1264 1365 where = ELEVATOR_INSERT_FLUSH;
1265 1366 goto get_rq;
1266 1367 }
1267 1368  
1268   - if (elv_queue_empty(q))
1269   - goto get_rq;
1270   -
1271   - el_ret = elv_merge(q, &req, bio);
1272   - switch (el_ret) {
1273   - case ELEVATOR_BACK_MERGE:
1274   - BUG_ON(!rq_mergeable(req));
1275   -
1276   - if (!ll_back_merge_fn(q, req, bio))
1277   - break;
1278   -
1279   - trace_block_bio_backmerge(q, bio);
1280   -
1281   - if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
1282   - blk_rq_set_mixed_merge(req);
1283   -
1284   - req->biotail->bi_next = bio;
1285   - req->biotail = bio;
1286   - req->__data_len += bytes;
1287   - req->ioprio = ioprio_best(req->ioprio, prio);
1288   - if (!blk_rq_cpu_valid(req))
1289   - req->cpu = bio->bi_comp_cpu;
1290   - drive_stat_acct(req, 0);
1291   - elv_bio_merged(q, req, bio);
1292   - if (!attempt_back_merge(q, req))
1293   - elv_merged_request(q, req, el_ret);
  1369 + /*
  1370 + * Check if we can merge with the plugged list before grabbing
  1371 + * any locks.
  1372 + */
  1373 + if (attempt_plug_merge(current, q, bio))
1294 1374 goto out;
1295 1375  
1296   - case ELEVATOR_FRONT_MERGE:
1297   - BUG_ON(!rq_mergeable(req));
  1376 + spin_lock_irq(q->queue_lock);
1298 1377  
1299   - if (!ll_front_merge_fn(q, req, bio))
1300   - break;
1301   -
1302   - trace_block_bio_frontmerge(q, bio);
1303   -
1304   - if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) {
1305   - blk_rq_set_mixed_merge(req);
1306   - req->cmd_flags &= ~REQ_FAILFAST_MASK;
1307   - req->cmd_flags |= ff;
  1378 + el_ret = elv_merge(q, &req, bio);
  1379 + if (el_ret == ELEVATOR_BACK_MERGE) {
  1380 + BUG_ON(req->cmd_flags & REQ_ON_PLUG);
  1381 + if (bio_attempt_back_merge(q, req, bio)) {
  1382 + if (!attempt_back_merge(q, req))
  1383 + elv_merged_request(q, req, el_ret);
  1384 + goto out_unlock;
1308 1385 }
1309   -
1310   - bio->bi_next = req->bio;
1311   - req->bio = bio;
1312   -
1313   - /*
1314   - * may not be valid. if the low level driver said
1315   - * it didn't need a bounce buffer then it better
1316   - * not touch req->buffer either...
1317   - */
1318   - req->buffer = bio_data(bio);
1319   - req->__sector = bio->bi_sector;
1320   - req->__data_len += bytes;
1321   - req->ioprio = ioprio_best(req->ioprio, prio);
1322   - if (!blk_rq_cpu_valid(req))
1323   - req->cpu = bio->bi_comp_cpu;
1324   - drive_stat_acct(req, 0);
1325   - elv_bio_merged(q, req, bio);
1326   - if (!attempt_front_merge(q, req))
1327   - elv_merged_request(q, req, el_ret);
1328   - goto out;
1329   -
1330   - /* ELV_NO_MERGE: elevator says don't/can't merge. */
1331   - default:
1332   - ;
  1386 + } else if (el_ret == ELEVATOR_FRONT_MERGE) {
  1387 + BUG_ON(req->cmd_flags & REQ_ON_PLUG);
  1388 + if (bio_attempt_front_merge(q, req, bio)) {
  1389 + if (!attempt_front_merge(q, req))
  1390 + elv_merged_request(q, req, el_ret);
  1391 + goto out_unlock;
  1392 + }
1333 1393 }
1334 1394  
1335 1395 get_rq:
1336 1396  
1337 1397  
1338 1398  
... ... @@ -1356,20 +1416,35 @@
1356 1416 */
1357 1417 init_request_from_bio(req, bio);
1358 1418  
1359   - spin_lock_irq(q->queue_lock);
1360 1419 if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) ||
1361   - bio_flagged(bio, BIO_CPU_AFFINE))
1362   - req->cpu = blk_cpu_to_group(smp_processor_id());
1363   - if (queue_should_plug(q) && elv_queue_empty(q))
1364   - blk_plug_device(q);
  1420 + bio_flagged(bio, BIO_CPU_AFFINE)) {
  1421 + req->cpu = blk_cpu_to_group(get_cpu());
  1422 + put_cpu();
  1423 + }
1365 1424  
1366   - /* insert the request into the elevator */
1367   - drive_stat_acct(req, 1);
1368   - __elv_add_request(q, req, where, 0);
  1425 + plug = current->plug;
  1426 + if (plug && !sync) {
  1427 + if (!plug->should_sort && !list_empty(&plug->list)) {
  1428 + struct request *__rq;
  1429 +
  1430 + __rq = list_entry_rq(plug->list.prev);
  1431 + if (__rq->q != q)
  1432 + plug->should_sort = 1;
  1433 + }
  1434 + /*
  1435 + * Debug flag, kill later
  1436 + */
  1437 + req->cmd_flags |= REQ_ON_PLUG;
  1438 + list_add_tail(&req->queuelist, &plug->list);
  1439 + drive_stat_acct(req, 1);
  1440 + } else {
  1441 + spin_lock_irq(q->queue_lock);
  1442 + add_acct_request(q, req, where);
  1443 + __blk_run_queue(q);
  1444 +out_unlock:
  1445 + spin_unlock_irq(q->queue_lock);
  1446 + }
1369 1447 out:
1370   - if (unplug || !queue_should_plug(q))
1371   - __generic_unplug_device(q);
1372   - spin_unlock_irq(q->queue_lock);
1373 1448 return 0;
1374 1449 }
1375 1450  
... ... @@ -1772,9 +1847,7 @@
1772 1847 */
1773 1848 BUG_ON(blk_queued_rq(rq));
1774 1849  
1775   - drive_stat_acct(rq, 1);
1776   - __elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 0);
1777   -
  1850 + add_acct_request(q, rq, ELEVATOR_INSERT_BACK);
1778 1851 spin_unlock_irqrestore(q->queue_lock, flags);
1779 1852  
1780 1853 return 0;
... ... @@ -2658,6 +2731,106 @@
2658 2731 return queue_delayed_work(kblockd_workqueue, dwork, delay);
2659 2732 }
2660 2733 EXPORT_SYMBOL(kblockd_schedule_delayed_work);
  2734 +
  2735 +#define PLUG_MAGIC 0x91827364
  2736 +
  2737 +void blk_start_plug(struct blk_plug *plug)
  2738 +{
  2739 + struct task_struct *tsk = current;
  2740 +
  2741 + plug->magic = PLUG_MAGIC;
  2742 + INIT_LIST_HEAD(&plug->list);
  2743 + plug->should_sort = 0;
  2744 +
  2745 + /*
  2746 + * If this is a nested plug, don't actually assign it. It will be
  2747 + * flushed on its own.
  2748 + */
  2749 + if (!tsk->plug) {
  2750 + /*
  2751 + * Store ordering should not be needed here, since a potential
  2752 + * preempt will imply a full memory barrier
  2753 + */
  2754 + tsk->plug = plug;
  2755 + }
  2756 +}
  2757 +EXPORT_SYMBOL(blk_start_plug);
  2758 +
  2759 +static int plug_rq_cmp(void *priv, struct list_head *a, struct list_head *b)
  2760 +{
  2761 + struct request *rqa = container_of(a, struct request, queuelist);
  2762 + struct request *rqb = container_of(b, struct request, queuelist);
  2763 +
  2764 + return !(rqa->q == rqb->q);
  2765 +}
  2766 +
  2767 +static void flush_plug_list(struct blk_plug *plug)
  2768 +{
  2769 + struct request_queue *q;
  2770 + unsigned long flags;
  2771 + struct request *rq;
  2772 +
  2773 + BUG_ON(plug->magic != PLUG_MAGIC);
  2774 +
  2775 + if (list_empty(&plug->list))
  2776 + return;
  2777 +
  2778 + if (plug->should_sort)
  2779 + list_sort(NULL, &plug->list, plug_rq_cmp);
  2780 +
  2781 + q = NULL;
  2782 + local_irq_save(flags);
  2783 + while (!list_empty(&plug->list)) {
  2784 + rq = list_entry_rq(plug->list.next);
  2785 + list_del_init(&rq->queuelist);
  2786 + BUG_ON(!(rq->cmd_flags & REQ_ON_PLUG));
  2787 + BUG_ON(!rq->q);
  2788 + if (rq->q != q) {
  2789 + if (q) {
  2790 + __blk_run_queue(q);
  2791 + spin_unlock(q->queue_lock);
  2792 + }
  2793 + q = rq->q;
  2794 + spin_lock(q->queue_lock);
  2795 + }
  2796 + rq->cmd_flags &= ~REQ_ON_PLUG;
  2797 +
  2798 + /*
  2799 + * rq is already accounted, so use raw insert
  2800 + */
  2801 + __elv_add_request(q, rq, ELEVATOR_INSERT_SORT, 0);
  2802 + }
  2803 +
  2804 + if (q) {
  2805 + __blk_run_queue(q);
  2806 + spin_unlock(q->queue_lock);
  2807 + }
  2808 +
  2809 + BUG_ON(!list_empty(&plug->list));
  2810 + local_irq_restore(flags);
  2811 +}
  2812 +
  2813 +static void __blk_finish_plug(struct task_struct *tsk, struct blk_plug *plug)
  2814 +{
  2815 + flush_plug_list(plug);
  2816 +
  2817 + if (plug == tsk->plug)
  2818 + tsk->plug = NULL;
  2819 +}
  2820 +
  2821 +void blk_finish_plug(struct blk_plug *plug)
  2822 +{
  2823 + if (plug)
  2824 + __blk_finish_plug(current, plug);
  2825 +}
  2826 +EXPORT_SYMBOL(blk_finish_plug);
  2827 +
  2828 +void __blk_flush_plug(struct task_struct *tsk, struct blk_plug *plug)
  2829 +{
  2830 + __blk_finish_plug(tsk, plug);
  2831 + tsk->plug = plug;
  2832 +}
  2833 +EXPORT_SYMBOL(__blk_flush_plug);
2661 2834  
2662 2835 int __init blk_dev_init(void)
2663 2836 {
... ... @@ -264,10 +264,9 @@
264 264 static void flush_data_end_io(struct request *rq, int error)
265 265 {
266 266 struct request_queue *q = rq->q;
267   - bool was_empty = elv_queue_empty(q);
268 267  
269 268 /* after populating an empty queue, kick it to avoid stall */
270   - if (blk_flush_complete_seq(rq, REQ_FSEQ_DATA, error) && was_empty)
  269 + if (blk_flush_complete_seq(rq, REQ_FSEQ_DATA, error))
271 270 __blk_run_queue(q);
272 271 }
273 272  
... ... @@ -113,7 +113,7 @@
113 113 }
114 114 EXPORT_SYMBOL(elv_rq_merge_ok);
115 115  
116   -static inline int elv_try_merge(struct request *__rq, struct bio *bio)
  116 +int elv_try_merge(struct request *__rq, struct bio *bio)
117 117 {
118 118 int ret = ELEVATOR_NO_MERGE;
119 119  
... ... @@ -421,6 +421,8 @@
421 421 struct list_head *entry;
422 422 int stop_flags;
423 423  
  424 + BUG_ON(rq->cmd_flags & REQ_ON_PLUG);
  425 +
424 426 if (q->last_merge == rq)
425 427 q->last_merge = NULL;
426 428  
... ... @@ -696,6 +698,8 @@
696 698 void __elv_add_request(struct request_queue *q, struct request *rq, int where,
697 699 int plug)
698 700 {
  701 + BUG_ON(rq->cmd_flags & REQ_ON_PLUG);
  702 +
699 703 if (rq->cmd_flags & REQ_SOFTBARRIER) {
700 704 /* barriers are scheduling boundary, update end_sector */
701 705 if (rq->cmd_type == REQ_TYPE_FS ||
include/linux/blk_types.h
... ... @@ -152,6 +152,7 @@
152 152 __REQ_IO_STAT, /* account I/O stat */
153 153 __REQ_MIXED_MERGE, /* merge of different types, fail separately */
154 154 __REQ_SECURE, /* secure discard (used with __REQ_DISCARD) */
  155 + __REQ_ON_PLUG, /* on plug list */
155 156 __REQ_NR_BITS, /* stops here */
156 157 };
157 158  
... ... @@ -193,6 +194,7 @@
193 194 #define REQ_IO_STAT (1 << __REQ_IO_STAT)
194 195 #define REQ_MIXED_MERGE (1 << __REQ_MIXED_MERGE)
195 196 #define REQ_SECURE (1 << __REQ_SECURE)
  197 +#define REQ_ON_PLUG (1 << __REQ_ON_PLUG)
196 198  
197 199 #endif /* __LINUX_BLK_TYPES_H */
include/linux/blkdev.h
... ... @@ -871,6 +871,31 @@
871 871 struct request_queue *blk_alloc_queue_node(gfp_t, int);
872 872 extern void blk_put_queue(struct request_queue *);
873 873  
  874 +struct blk_plug {
  875 + unsigned long magic;
  876 + struct list_head list;
  877 + unsigned int should_sort;
  878 +};
  879 +
  880 +extern void blk_start_plug(struct blk_plug *);
  881 +extern void blk_finish_plug(struct blk_plug *);
  882 +extern void __blk_flush_plug(struct task_struct *, struct blk_plug *);
  883 +
  884 +static inline void blk_flush_plug(struct task_struct *tsk)
  885 +{
  886 + struct blk_plug *plug = tsk->plug;
  887 +
  888 + if (unlikely(plug))
  889 + __blk_flush_plug(tsk, plug);
  890 +}
  891 +
  892 +static inline bool blk_needs_flush_plug(struct task_struct *tsk)
  893 +{
  894 + struct blk_plug *plug = tsk->plug;
  895 +
  896 + return plug && !list_empty(&plug->list);
  897 +}
  898 +
874 899 /*
875 900 * tag stuff
876 901 */
... ... @@ -1292,6 +1317,23 @@
1292 1317 static inline long nr_blockdev_pages(void)
1293 1318 {
1294 1319 return 0;
  1320 +}
  1321 +
  1322 +static inline void blk_start_plug(struct list_head *list)
  1323 +{
  1324 +}
  1325 +
  1326 +static inline void blk_finish_plug(struct list_head *list)
  1327 +{
  1328 +}
  1329 +
  1330 +static inline void blk_flush_plug(struct task_struct *tsk)
  1331 +{
  1332 +}
  1333 +
  1334 +static inline bool blk_needs_flush_plug(struct task_struct *tsk)
  1335 +{
  1336 + return false;
1295 1337 }
1296 1338  
1297 1339 #endif /* CONFIG_BLOCK */
include/linux/elevator.h
... ... @@ -105,6 +105,7 @@
105 105 extern void __elv_add_request(struct request_queue *, struct request *, int, int);
106 106 extern void elv_insert(struct request_queue *, struct request *, int);
107 107 extern int elv_merge(struct request_queue *, struct request **, struct bio *);
  108 +extern int elv_try_merge(struct request *, struct bio *);
108 109 extern void elv_merge_requests(struct request_queue *, struct request *,
109 110 struct request *);
110 111 extern void elv_merged_request(struct request_queue *, struct request *, int);
include/linux/sched.h
... ... @@ -99,6 +99,7 @@
99 99 struct bio_list;
100 100 struct fs_struct;
101 101 struct perf_event_context;
  102 +struct blk_plug;
102 103  
103 104 /*
104 105 * List of flags we want to share for kernel threads,
... ... @@ -1428,6 +1429,11 @@
1428 1429  
1429 1430 /* stacked block device info */
1430 1431 struct bio_list *bio_list;
  1432 +
  1433 +#ifdef CONFIG_BLOCK
  1434 +/* stack plugging */
  1435 + struct blk_plug *plug;
  1436 +#endif
1431 1437  
1432 1438 /* VM state */
1433 1439 struct reclaim_state *reclaim_state;
... ... @@ -908,6 +908,7 @@
908 908 profile_task_exit(tsk);
909 909  
910 910 WARN_ON(atomic_read(&tsk->fs_excl));
  911 + WARN_ON(blk_needs_flush_plug(tsk));
911 912  
912 913 if (unlikely(in_interrupt()))
913 914 panic("Aiee, killing interrupt handler!");
... ... @@ -1204,6 +1204,9 @@
1204 1204 * Clear TID on mm_release()?
1205 1205 */
1206 1206 p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL;
  1207 +#ifdef CONFIG_BLOCK
  1208 + p->plug = NULL;
  1209 +#endif
1207 1210 #ifdef CONFIG_FUTEX
1208 1211 p->robust_list = NULL;
1209 1212 #ifdef CONFIG_COMPAT
... ... @@ -3978,6 +3978,16 @@
3978 3978 switch_count = &prev->nvcsw;
3979 3979 }
3980 3980  
  3981 + /*
  3982 + * If we are going to sleep and we have plugged IO queued, make
  3983 + * sure to submit it to avoid deadlocks.
  3984 + */
  3985 + if (prev->state != TASK_RUNNING && blk_needs_flush_plug(prev)) {
  3986 + raw_spin_unlock(&rq->lock);
  3987 + blk_flush_plug(prev);
  3988 + raw_spin_lock(&rq->lock);
  3989 + }
  3990 +
3981 3991 pre_schedule(rq, prev);
3982 3992  
3983 3993 if (unlikely(!rq->nr_running))
... ... @@ -5333,6 +5343,7 @@
5333 5343  
5334 5344 delayacct_blkio_start();
5335 5345 atomic_inc(&rq->nr_iowait);
  5346 + blk_flush_plug(current);
5336 5347 current->in_iowait = 1;
5337 5348 schedule();
5338 5349 current->in_iowait = 0;
... ... @@ -5348,6 +5359,7 @@
5348 5359  
5349 5360 delayacct_blkio_start();
5350 5361 atomic_inc(&rq->nr_iowait);
  5362 + blk_flush_plug(current);
5351 5363 current->in_iowait = 1;
5352 5364 ret = schedule_timeout(timeout);
5353 5365 current->in_iowait = 0;