Commit b3c9dd182ed3bdcdaf0e42625a35924b0497afdc
Exists in
master
and in
38 other branches
Merge branch 'for-3.3/core' of git://git.kernel.dk/linux-block
* 'for-3.3/core' of git://git.kernel.dk/linux-block: (37 commits) Revert "block: recursive merge requests" block: Stop using macro stubs for the bio data integrity calls blockdev: convert some macros to static inlines fs: remove unneeded plug in mpage_readpages() block: Add BLKROTATIONAL ioctl block: Introduce blk_set_stacking_limits function block: remove WARN_ON_ONCE() in exit_io_context() block: an exiting task should be allowed to create io_context block: ioc_cgroup_changed() needs to be exported block: recursive merge requests block, cfq: fix empty queue crash caused by request merge block, cfq: move icq creation and rq->elv.icq association to block core block, cfq: restructure io_cq creation path for io_context interface cleanup block, cfq: move io_cq exit/release to blk-ioc.c block, cfq: move icq cache management to block core block, cfq: move io_cq lookup to blk-ioc.c block, cfq: move cfqd->icq_list to request_queue and add request->elv.icq block, cfq: reorganize cfq_io_context into generic and cfq specific parts block: remove elevator_queue->ops block: reorder elevator switch sequence ... Fix up conflicts in: - block/blk-cgroup.c Switch from can_attach_task to can_attach - block/cfq-iosched.c conflict with now removed cic index changes (we now use q->id instead)
Showing 28 changed files Side-by-side Diff
- block/blk-cgroup.c
- block/blk-core.c
- block/blk-exec.c
- block/blk-ioc.c
- block/blk-settings.c
- block/blk-sysfs.c
- block/blk-throttle.c
- block/blk.h
- block/bsg.c
- block/cfq-iosched.c
- block/compat_ioctl.c
- block/deadline-iosched.c
- block/elevator.c
- block/genhd.c
- block/ioctl.c
- block/noop-iosched.c
- drivers/block/sx8.c
- drivers/md/dm-table.c
- drivers/md/md.c
- drivers/scsi/scsi_scan.c
- fs/ioprio.c
- fs/mpage.c
- include/linux/bio.h
- include/linux/blkdev.h
- include/linux/elevator.h
- include/linux/fs.h
- include/linux/iocontext.h
- kernel/fork.c
block/blk-cgroup.c
... | ... | @@ -1655,11 +1655,12 @@ |
1655 | 1655 | struct io_context *ioc; |
1656 | 1656 | |
1657 | 1657 | cgroup_taskset_for_each(task, cgrp, tset) { |
1658 | - task_lock(task); | |
1659 | - ioc = task->io_context; | |
1660 | - if (ioc) | |
1661 | - ioc->cgroup_changed = 1; | |
1662 | - task_unlock(task); | |
1658 | + /* we don't lose anything even if ioc allocation fails */ | |
1659 | + ioc = get_task_io_context(task, GFP_ATOMIC, NUMA_NO_NODE); | |
1660 | + if (ioc) { | |
1661 | + ioc_cgroup_changed(ioc); | |
1662 | + put_io_context(ioc, NULL); | |
1663 | + } | |
1663 | 1664 | } |
1664 | 1665 | } |
1665 | 1666 |
block/blk-core.c
... | ... | @@ -39,6 +39,8 @@ |
39 | 39 | EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap); |
40 | 40 | EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete); |
41 | 41 | |
42 | +DEFINE_IDA(blk_queue_ida); | |
43 | + | |
42 | 44 | /* |
43 | 45 | * For the allocated request tables |
44 | 46 | */ |
... | ... | @@ -358,7 +360,8 @@ |
358 | 360 | void blk_drain_queue(struct request_queue *q, bool drain_all) |
359 | 361 | { |
360 | 362 | while (true) { |
361 | - int nr_rqs; | |
363 | + bool drain = false; | |
364 | + int i; | |
362 | 365 | |
363 | 366 | spin_lock_irq(q->queue_lock); |
364 | 367 | |
365 | 368 | |
366 | 369 | |
... | ... | @@ -375,14 +378,25 @@ |
375 | 378 | if (!list_empty(&q->queue_head)) |
376 | 379 | __blk_run_queue(q); |
377 | 380 | |
378 | - if (drain_all) | |
379 | - nr_rqs = q->rq.count[0] + q->rq.count[1]; | |
380 | - else | |
381 | - nr_rqs = q->rq.elvpriv; | |
381 | + drain |= q->rq.elvpriv; | |
382 | 382 | |
383 | + /* | |
384 | + * Unfortunately, requests are queued at and tracked from | |
385 | + * multiple places and there's no single counter which can | |
386 | + * be drained. Check all the queues and counters. | |
387 | + */ | |
388 | + if (drain_all) { | |
389 | + drain |= !list_empty(&q->queue_head); | |
390 | + for (i = 0; i < 2; i++) { | |
391 | + drain |= q->rq.count[i]; | |
392 | + drain |= q->in_flight[i]; | |
393 | + drain |= !list_empty(&q->flush_queue[i]); | |
394 | + } | |
395 | + } | |
396 | + | |
383 | 397 | spin_unlock_irq(q->queue_lock); |
384 | 398 | |
385 | - if (!nr_rqs) | |
399 | + if (!drain) | |
386 | 400 | break; |
387 | 401 | msleep(10); |
388 | 402 | } |
... | ... | @@ -469,6 +483,10 @@ |
469 | 483 | if (!q) |
470 | 484 | return NULL; |
471 | 485 | |
486 | + q->id = ida_simple_get(&blk_queue_ida, 0, 0, GFP_KERNEL); | |
487 | + if (q->id < 0) | |
488 | + goto fail_q; | |
489 | + | |
472 | 490 | q->backing_dev_info.ra_pages = |
473 | 491 | (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; |
474 | 492 | q->backing_dev_info.state = 0; |
475 | 493 | |
476 | 494 | |
... | ... | @@ -477,20 +495,17 @@ |
477 | 495 | q->node = node_id; |
478 | 496 | |
479 | 497 | err = bdi_init(&q->backing_dev_info); |
480 | - if (err) { | |
481 | - kmem_cache_free(blk_requestq_cachep, q); | |
482 | - return NULL; | |
483 | - } | |
498 | + if (err) | |
499 | + goto fail_id; | |
484 | 500 | |
485 | - if (blk_throtl_init(q)) { | |
486 | - kmem_cache_free(blk_requestq_cachep, q); | |
487 | - return NULL; | |
488 | - } | |
501 | + if (blk_throtl_init(q)) | |
502 | + goto fail_id; | |
489 | 503 | |
490 | 504 | setup_timer(&q->backing_dev_info.laptop_mode_wb_timer, |
491 | 505 | laptop_mode_timer_fn, (unsigned long) q); |
492 | 506 | setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q); |
493 | 507 | INIT_LIST_HEAD(&q->timeout_list); |
508 | + INIT_LIST_HEAD(&q->icq_list); | |
494 | 509 | INIT_LIST_HEAD(&q->flush_queue[0]); |
495 | 510 | INIT_LIST_HEAD(&q->flush_queue[1]); |
496 | 511 | INIT_LIST_HEAD(&q->flush_data_in_flight); |
... | ... | @@ -508,6 +523,12 @@ |
508 | 523 | q->queue_lock = &q->__queue_lock; |
509 | 524 | |
510 | 525 | return q; |
526 | + | |
527 | +fail_id: | |
528 | + ida_simple_remove(&blk_queue_ida, q->id); | |
529 | +fail_q: | |
530 | + kmem_cache_free(blk_requestq_cachep, q); | |
531 | + return NULL; | |
511 | 532 | } |
512 | 533 | EXPORT_SYMBOL(blk_alloc_queue_node); |
513 | 534 | |
514 | 535 | |
515 | 536 | |
516 | 537 | |
517 | 538 | |
518 | 539 | |
... | ... | @@ -605,26 +626,31 @@ |
605 | 626 | } |
606 | 627 | EXPORT_SYMBOL(blk_init_allocated_queue); |
607 | 628 | |
608 | -int blk_get_queue(struct request_queue *q) | |
629 | +bool blk_get_queue(struct request_queue *q) | |
609 | 630 | { |
610 | - if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) { | |
611 | - kobject_get(&q->kobj); | |
612 | - return 0; | |
631 | + if (likely(!blk_queue_dead(q))) { | |
632 | + __blk_get_queue(q); | |
633 | + return true; | |
613 | 634 | } |
614 | 635 | |
615 | - return 1; | |
636 | + return false; | |
616 | 637 | } |
617 | 638 | EXPORT_SYMBOL(blk_get_queue); |
618 | 639 | |
619 | 640 | static inline void blk_free_request(struct request_queue *q, struct request *rq) |
620 | 641 | { |
621 | - if (rq->cmd_flags & REQ_ELVPRIV) | |
642 | + if (rq->cmd_flags & REQ_ELVPRIV) { | |
622 | 643 | elv_put_request(q, rq); |
644 | + if (rq->elv.icq) | |
645 | + put_io_context(rq->elv.icq->ioc, q); | |
646 | + } | |
647 | + | |
623 | 648 | mempool_free(rq, q->rq.rq_pool); |
624 | 649 | } |
625 | 650 | |
626 | 651 | static struct request * |
627 | -blk_alloc_request(struct request_queue *q, unsigned int flags, gfp_t gfp_mask) | |
652 | +blk_alloc_request(struct request_queue *q, struct io_cq *icq, | |
653 | + unsigned int flags, gfp_t gfp_mask) | |
628 | 654 | { |
629 | 655 | struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask); |
630 | 656 | |
... | ... | @@ -635,10 +661,15 @@ |
635 | 661 | |
636 | 662 | rq->cmd_flags = flags | REQ_ALLOCED; |
637 | 663 | |
638 | - if ((flags & REQ_ELVPRIV) && | |
639 | - unlikely(elv_set_request(q, rq, gfp_mask))) { | |
640 | - mempool_free(rq, q->rq.rq_pool); | |
641 | - return NULL; | |
664 | + if (flags & REQ_ELVPRIV) { | |
665 | + rq->elv.icq = icq; | |
666 | + if (unlikely(elv_set_request(q, rq, gfp_mask))) { | |
667 | + mempool_free(rq, q->rq.rq_pool); | |
668 | + return NULL; | |
669 | + } | |
670 | + /* @rq->elv.icq holds on to io_context until @rq is freed */ | |
671 | + if (icq) | |
672 | + get_io_context(icq->ioc); | |
642 | 673 | } |
643 | 674 | |
644 | 675 | return rq; |
645 | 676 | |
646 | 677 | |
647 | 678 | |
... | ... | @@ -750,11 +781,17 @@ |
750 | 781 | { |
751 | 782 | struct request *rq = NULL; |
752 | 783 | struct request_list *rl = &q->rq; |
753 | - struct io_context *ioc = NULL; | |
784 | + struct elevator_type *et; | |
785 | + struct io_context *ioc; | |
786 | + struct io_cq *icq = NULL; | |
754 | 787 | const bool is_sync = rw_is_sync(rw_flags) != 0; |
788 | + bool retried = false; | |
755 | 789 | int may_queue; |
790 | +retry: | |
791 | + et = q->elevator->type; | |
792 | + ioc = current->io_context; | |
756 | 793 | |
757 | - if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) | |
794 | + if (unlikely(blk_queue_dead(q))) | |
758 | 795 | return NULL; |
759 | 796 | |
760 | 797 | may_queue = elv_may_queue(q, rw_flags); |
761 | 798 | |
... | ... | @@ -763,8 +800,21 @@ |
763 | 800 | |
764 | 801 | if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) { |
765 | 802 | if (rl->count[is_sync]+1 >= q->nr_requests) { |
766 | - ioc = current_io_context(GFP_ATOMIC, q->node); | |
767 | 803 | /* |
804 | + * We want ioc to record batching state. If it's | |
805 | + * not already there, creating a new one requires | |
806 | + * dropping queue_lock, which in turn requires | |
807 | + * retesting conditions to avoid queue hang. | |
808 | + */ | |
809 | + if (!ioc && !retried) { | |
810 | + spin_unlock_irq(q->queue_lock); | |
811 | + create_io_context(current, gfp_mask, q->node); | |
812 | + spin_lock_irq(q->queue_lock); | |
813 | + retried = true; | |
814 | + goto retry; | |
815 | + } | |
816 | + | |
817 | + /* | |
768 | 818 | * The queue will fill after this allocation, so set |
769 | 819 | * it as full, and mark this process as "batching". |
770 | 820 | * This process will be allowed to complete a batch of |
771 | 821 | |
772 | 822 | |
... | ... | @@ -799,17 +849,36 @@ |
799 | 849 | rl->count[is_sync]++; |
800 | 850 | rl->starved[is_sync] = 0; |
801 | 851 | |
852 | + /* | |
853 | + * Decide whether the new request will be managed by elevator. If | |
854 | + * so, mark @rw_flags and increment elvpriv. Non-zero elvpriv will | |
855 | + * prevent the current elevator from being destroyed until the new | |
856 | + * request is freed. This guarantees icq's won't be destroyed and | |
857 | + * makes creating new ones safe. | |
858 | + * | |
859 | + * Also, lookup icq while holding queue_lock. If it doesn't exist, | |
860 | + * it will be created after releasing queue_lock. | |
861 | + */ | |
802 | 862 | if (blk_rq_should_init_elevator(bio) && |
803 | 863 | !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags)) { |
804 | 864 | rw_flags |= REQ_ELVPRIV; |
805 | 865 | rl->elvpriv++; |
866 | + if (et->icq_cache && ioc) | |
867 | + icq = ioc_lookup_icq(ioc, q); | |
806 | 868 | } |
807 | 869 | |
808 | 870 | if (blk_queue_io_stat(q)) |
809 | 871 | rw_flags |= REQ_IO_STAT; |
810 | 872 | spin_unlock_irq(q->queue_lock); |
811 | 873 | |
812 | - rq = blk_alloc_request(q, rw_flags, gfp_mask); | |
874 | + /* create icq if missing */ | |
875 | + if (unlikely(et->icq_cache && !icq)) | |
876 | + icq = ioc_create_icq(q, gfp_mask); | |
877 | + | |
878 | + /* rqs are guaranteed to have icq on elv_set_request() if requested */ | |
879 | + if (likely(!et->icq_cache || icq)) | |
880 | + rq = blk_alloc_request(q, icq, rw_flags, gfp_mask); | |
881 | + | |
813 | 882 | if (unlikely(!rq)) { |
814 | 883 | /* |
815 | 884 | * Allocation failed presumably due to memory. Undo anything |
816 | 885 | |
... | ... | @@ -871,10 +940,9 @@ |
871 | 940 | rq = get_request(q, rw_flags, bio, GFP_NOIO); |
872 | 941 | while (!rq) { |
873 | 942 | DEFINE_WAIT(wait); |
874 | - struct io_context *ioc; | |
875 | 943 | struct request_list *rl = &q->rq; |
876 | 944 | |
877 | - if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) | |
945 | + if (unlikely(blk_queue_dead(q))) | |
878 | 946 | return NULL; |
879 | 947 | |
880 | 948 | prepare_to_wait_exclusive(&rl->wait[is_sync], &wait, |
... | ... | @@ -891,8 +959,8 @@ |
891 | 959 | * up to a big batch of them for a small period time. |
892 | 960 | * See ioc_batching, ioc_set_batching |
893 | 961 | */ |
894 | - ioc = current_io_context(GFP_NOIO, q->node); | |
895 | - ioc_set_batching(q, ioc); | |
962 | + create_io_context(current, GFP_NOIO, q->node); | |
963 | + ioc_set_batching(q, current->io_context); | |
896 | 964 | |
897 | 965 | spin_lock_irq(q->queue_lock); |
898 | 966 | finish_wait(&rl->wait[is_sync], &wait); |
... | ... | @@ -1009,54 +1077,6 @@ |
1009 | 1077 | __elv_add_request(q, rq, where); |
1010 | 1078 | } |
1011 | 1079 | |
1012 | -/** | |
1013 | - * blk_insert_request - insert a special request into a request queue | |
1014 | - * @q: request queue where request should be inserted | |
1015 | - * @rq: request to be inserted | |
1016 | - * @at_head: insert request at head or tail of queue | |
1017 | - * @data: private data | |
1018 | - * | |
1019 | - * Description: | |
1020 | - * Many block devices need to execute commands asynchronously, so they don't | |
1021 | - * block the whole kernel from preemption during request execution. This is | |
1022 | - * accomplished normally by inserting aritficial requests tagged as | |
1023 | - * REQ_TYPE_SPECIAL in to the corresponding request queue, and letting them | |
1024 | - * be scheduled for actual execution by the request queue. | |
1025 | - * | |
1026 | - * We have the option of inserting the head or the tail of the queue. | |
1027 | - * Typically we use the tail for new ioctls and so forth. We use the head | |
1028 | - * of the queue for things like a QUEUE_FULL message from a device, or a | |
1029 | - * host that is unable to accept a particular command. | |
1030 | - */ | |
1031 | -void blk_insert_request(struct request_queue *q, struct request *rq, | |
1032 | - int at_head, void *data) | |
1033 | -{ | |
1034 | - int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; | |
1035 | - unsigned long flags; | |
1036 | - | |
1037 | - /* | |
1038 | - * tell I/O scheduler that this isn't a regular read/write (ie it | |
1039 | - * must not attempt merges on this) and that it acts as a soft | |
1040 | - * barrier | |
1041 | - */ | |
1042 | - rq->cmd_type = REQ_TYPE_SPECIAL; | |
1043 | - | |
1044 | - rq->special = data; | |
1045 | - | |
1046 | - spin_lock_irqsave(q->queue_lock, flags); | |
1047 | - | |
1048 | - /* | |
1049 | - * If command is tagged, release the tag | |
1050 | - */ | |
1051 | - if (blk_rq_tagged(rq)) | |
1052 | - blk_queue_end_tag(q, rq); | |
1053 | - | |
1054 | - add_acct_request(q, rq, where); | |
1055 | - __blk_run_queue(q); | |
1056 | - spin_unlock_irqrestore(q->queue_lock, flags); | |
1057 | -} | |
1058 | -EXPORT_SYMBOL(blk_insert_request); | |
1059 | - | |
1060 | 1080 | static void part_round_stats_single(int cpu, struct hd_struct *part, |
1061 | 1081 | unsigned long now) |
1062 | 1082 | { |
... | ... | @@ -1766,6 +1786,10 @@ |
1766 | 1786 | return -EIO; |
1767 | 1787 | |
1768 | 1788 | spin_lock_irqsave(q->queue_lock, flags); |
1789 | + if (unlikely(blk_queue_dead(q))) { | |
1790 | + spin_unlock_irqrestore(q->queue_lock, flags); | |
1791 | + return -ENODEV; | |
1792 | + } | |
1769 | 1793 | |
1770 | 1794 | /* |
1771 | 1795 | * Submitting request must be dequeued before calling this function |
... | ... | @@ -2740,6 +2764,14 @@ |
2740 | 2764 | trace_block_unplug(q, depth, !from_schedule); |
2741 | 2765 | |
2742 | 2766 | /* |
2767 | + * Don't mess with dead queue. | |
2768 | + */ | |
2769 | + if (unlikely(blk_queue_dead(q))) { | |
2770 | + spin_unlock(q->queue_lock); | |
2771 | + return; | |
2772 | + } | |
2773 | + | |
2774 | + /* | |
2743 | 2775 | * If we are punting this to kblockd, then we can safely drop |
2744 | 2776 | * the queue_lock before waking kblockd (which needs to take |
2745 | 2777 | * this lock). |
... | ... | @@ -2815,6 +2847,15 @@ |
2815 | 2847 | depth = 0; |
2816 | 2848 | spin_lock(q->queue_lock); |
2817 | 2849 | } |
2850 | + | |
2851 | + /* | |
2852 | + * Short-circuit if @q is dead | |
2853 | + */ | |
2854 | + if (unlikely(blk_queue_dead(q))) { | |
2855 | + __blk_end_request_all(rq, -ENODEV); | |
2856 | + continue; | |
2857 | + } | |
2858 | + | |
2818 | 2859 | /* |
2819 | 2860 | * rq is already accounted, so use raw insert |
2820 | 2861 | */ |
block/blk-exec.c
... | ... | @@ -50,7 +50,11 @@ |
50 | 50 | { |
51 | 51 | int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; |
52 | 52 | |
53 | - if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) { | |
53 | + WARN_ON(irqs_disabled()); | |
54 | + spin_lock_irq(q->queue_lock); | |
55 | + | |
56 | + if (unlikely(blk_queue_dead(q))) { | |
57 | + spin_unlock_irq(q->queue_lock); | |
54 | 58 | rq->errors = -ENXIO; |
55 | 59 | if (rq->end_io) |
56 | 60 | rq->end_io(rq, rq->errors); |
... | ... | @@ -59,8 +63,6 @@ |
59 | 63 | |
60 | 64 | rq->rq_disk = bd_disk; |
61 | 65 | rq->end_io = done; |
62 | - WARN_ON(irqs_disabled()); | |
63 | - spin_lock_irq(q->queue_lock); | |
64 | 66 | __elv_add_request(q, rq, where); |
65 | 67 | __blk_run_queue(q); |
66 | 68 | /* the queue is stopped so it won't be run */ |
block/blk-ioc.c
... | ... | @@ -16,53 +16,214 @@ |
16 | 16 | */ |
17 | 17 | static struct kmem_cache *iocontext_cachep; |
18 | 18 | |
19 | -static void cfq_dtor(struct io_context *ioc) | |
19 | +/** | |
20 | + * get_io_context - increment reference count to io_context | |
21 | + * @ioc: io_context to get | |
22 | + * | |
23 | + * Increment reference count to @ioc. | |
24 | + */ | |
25 | +void get_io_context(struct io_context *ioc) | |
20 | 26 | { |
21 | - if (!hlist_empty(&ioc->cic_list)) { | |
22 | - struct cfq_io_context *cic; | |
27 | + BUG_ON(atomic_long_read(&ioc->refcount) <= 0); | |
28 | + atomic_long_inc(&ioc->refcount); | |
29 | +} | |
30 | +EXPORT_SYMBOL(get_io_context); | |
23 | 31 | |
24 | - cic = hlist_entry(ioc->cic_list.first, struct cfq_io_context, | |
25 | - cic_list); | |
26 | - cic->dtor(ioc); | |
32 | +/* | |
33 | + * Releasing ioc may nest into another put_io_context() leading to nested | |
34 | + * fast path release. As the ioc's can't be the same, this is okay but | |
35 | + * makes lockdep whine. Keep track of nesting and use it as subclass. | |
36 | + */ | |
37 | +#ifdef CONFIG_LOCKDEP | |
38 | +#define ioc_release_depth(q) ((q) ? (q)->ioc_release_depth : 0) | |
39 | +#define ioc_release_depth_inc(q) (q)->ioc_release_depth++ | |
40 | +#define ioc_release_depth_dec(q) (q)->ioc_release_depth-- | |
41 | +#else | |
42 | +#define ioc_release_depth(q) 0 | |
43 | +#define ioc_release_depth_inc(q) do { } while (0) | |
44 | +#define ioc_release_depth_dec(q) do { } while (0) | |
45 | +#endif | |
46 | + | |
47 | +static void icq_free_icq_rcu(struct rcu_head *head) | |
48 | +{ | |
49 | + struct io_cq *icq = container_of(head, struct io_cq, __rcu_head); | |
50 | + | |
51 | + kmem_cache_free(icq->__rcu_icq_cache, icq); | |
52 | +} | |
53 | + | |
54 | +/* | |
55 | + * Exit and free an icq. Called with both ioc and q locked. | |
56 | + */ | |
57 | +static void ioc_exit_icq(struct io_cq *icq) | |
58 | +{ | |
59 | + struct io_context *ioc = icq->ioc; | |
60 | + struct request_queue *q = icq->q; | |
61 | + struct elevator_type *et = q->elevator->type; | |
62 | + | |
63 | + lockdep_assert_held(&ioc->lock); | |
64 | + lockdep_assert_held(q->queue_lock); | |
65 | + | |
66 | + radix_tree_delete(&ioc->icq_tree, icq->q->id); | |
67 | + hlist_del_init(&icq->ioc_node); | |
68 | + list_del_init(&icq->q_node); | |
69 | + | |
70 | + /* | |
71 | + * Both setting lookup hint to and clearing it from @icq are done | |
72 | + * under queue_lock. If it's not pointing to @icq now, it never | |
73 | + * will. Hint assignment itself can race safely. | |
74 | + */ | |
75 | + if (rcu_dereference_raw(ioc->icq_hint) == icq) | |
76 | + rcu_assign_pointer(ioc->icq_hint, NULL); | |
77 | + | |
78 | + if (et->ops.elevator_exit_icq_fn) { | |
79 | + ioc_release_depth_inc(q); | |
80 | + et->ops.elevator_exit_icq_fn(icq); | |
81 | + ioc_release_depth_dec(q); | |
27 | 82 | } |
83 | + | |
84 | + /* | |
85 | + * @icq->q might have gone away by the time RCU callback runs | |
86 | + * making it impossible to determine icq_cache. Record it in @icq. | |
87 | + */ | |
88 | + icq->__rcu_icq_cache = et->icq_cache; | |
89 | + call_rcu(&icq->__rcu_head, icq_free_icq_rcu); | |
28 | 90 | } |
29 | 91 | |
30 | 92 | /* |
31 | - * IO Context helper functions. put_io_context() returns 1 if there are no | |
32 | - * more users of this io context, 0 otherwise. | |
93 | + * Slow path for ioc release in put_io_context(). Performs double-lock | |
94 | + * dancing to unlink all icq's and then frees ioc. | |
33 | 95 | */ |
34 | -int put_io_context(struct io_context *ioc) | |
96 | +static void ioc_release_fn(struct work_struct *work) | |
35 | 97 | { |
36 | - if (ioc == NULL) | |
37 | - return 1; | |
98 | + struct io_context *ioc = container_of(work, struct io_context, | |
99 | + release_work); | |
100 | + struct request_queue *last_q = NULL; | |
38 | 101 | |
39 | - BUG_ON(atomic_long_read(&ioc->refcount) == 0); | |
102 | + spin_lock_irq(&ioc->lock); | |
40 | 103 | |
41 | - if (atomic_long_dec_and_test(&ioc->refcount)) { | |
42 | - rcu_read_lock(); | |
43 | - cfq_dtor(ioc); | |
44 | - rcu_read_unlock(); | |
104 | + while (!hlist_empty(&ioc->icq_list)) { | |
105 | + struct io_cq *icq = hlist_entry(ioc->icq_list.first, | |
106 | + struct io_cq, ioc_node); | |
107 | + struct request_queue *this_q = icq->q; | |
45 | 108 | |
46 | - kmem_cache_free(iocontext_cachep, ioc); | |
47 | - return 1; | |
109 | + if (this_q != last_q) { | |
110 | + /* | |
111 | + * Need to switch to @this_q. Once we release | |
112 | + * @ioc->lock, it can go away along with @cic. | |
113 | + * Hold on to it. | |
114 | + */ | |
115 | + __blk_get_queue(this_q); | |
116 | + | |
117 | + /* | |
118 | + * blk_put_queue() might sleep thanks to kobject | |
119 | + * idiocy. Always release both locks, put and | |
120 | + * restart. | |
121 | + */ | |
122 | + if (last_q) { | |
123 | + spin_unlock(last_q->queue_lock); | |
124 | + spin_unlock_irq(&ioc->lock); | |
125 | + blk_put_queue(last_q); | |
126 | + } else { | |
127 | + spin_unlock_irq(&ioc->lock); | |
128 | + } | |
129 | + | |
130 | + last_q = this_q; | |
131 | + spin_lock_irq(this_q->queue_lock); | |
132 | + spin_lock(&ioc->lock); | |
133 | + continue; | |
134 | + } | |
135 | + ioc_exit_icq(icq); | |
48 | 136 | } |
49 | - return 0; | |
137 | + | |
138 | + if (last_q) { | |
139 | + spin_unlock(last_q->queue_lock); | |
140 | + spin_unlock_irq(&ioc->lock); | |
141 | + blk_put_queue(last_q); | |
142 | + } else { | |
143 | + spin_unlock_irq(&ioc->lock); | |
144 | + } | |
145 | + | |
146 | + kmem_cache_free(iocontext_cachep, ioc); | |
50 | 147 | } |
51 | -EXPORT_SYMBOL(put_io_context); | |
52 | 148 | |
53 | -static void cfq_exit(struct io_context *ioc) | |
149 | +/** | |
150 | + * put_io_context - put a reference of io_context | |
151 | + * @ioc: io_context to put | |
152 | + * @locked_q: request_queue the caller is holding queue_lock of (hint) | |
153 | + * | |
154 | + * Decrement reference count of @ioc and release it if the count reaches | |
155 | + * zero. If the caller is holding queue_lock of a queue, it can indicate | |
156 | + * that with @locked_q. This is an optimization hint and the caller is | |
157 | + * allowed to pass in %NULL even when it's holding a queue_lock. | |
158 | + */ | |
159 | +void put_io_context(struct io_context *ioc, struct request_queue *locked_q) | |
54 | 160 | { |
55 | - rcu_read_lock(); | |
161 | + struct request_queue *last_q = locked_q; | |
162 | + unsigned long flags; | |
56 | 163 | |
57 | - if (!hlist_empty(&ioc->cic_list)) { | |
58 | - struct cfq_io_context *cic; | |
164 | + if (ioc == NULL) | |
165 | + return; | |
59 | 166 | |
60 | - cic = hlist_entry(ioc->cic_list.first, struct cfq_io_context, | |
61 | - cic_list); | |
62 | - cic->exit(ioc); | |
167 | + BUG_ON(atomic_long_read(&ioc->refcount) <= 0); | |
168 | + if (locked_q) | |
169 | + lockdep_assert_held(locked_q->queue_lock); | |
170 | + | |
171 | + if (!atomic_long_dec_and_test(&ioc->refcount)) | |
172 | + return; | |
173 | + | |
174 | + /* | |
175 | + * Destroy @ioc. This is a bit messy because icq's are chained | |
176 | + * from both ioc and queue, and ioc->lock nests inside queue_lock. | |
177 | + * The inner ioc->lock should be held to walk our icq_list and then | |
178 | + * for each icq the outer matching queue_lock should be grabbed. | |
179 | + * ie. We need to do reverse-order double lock dancing. | |
180 | + * | |
181 | + * Another twist is that we are often called with one of the | |
182 | + * matching queue_locks held as indicated by @locked_q, which | |
183 | + * prevents performing double-lock dance for other queues. | |
184 | + * | |
185 | + * So, we do it in two stages. The fast path uses the queue_lock | |
186 | + * the caller is holding and, if other queues need to be accessed, | |
187 | + * uses trylock to avoid introducing locking dependency. This can | |
188 | + * handle most cases, especially if @ioc was performing IO on only | |
189 | + * single device. | |
190 | + * | |
191 | + * If trylock doesn't cut it, we defer to @ioc->release_work which | |
192 | + * can do all the double-locking dancing. | |
193 | + */ | |
194 | + spin_lock_irqsave_nested(&ioc->lock, flags, | |
195 | + ioc_release_depth(locked_q)); | |
196 | + | |
197 | + while (!hlist_empty(&ioc->icq_list)) { | |
198 | + struct io_cq *icq = hlist_entry(ioc->icq_list.first, | |
199 | + struct io_cq, ioc_node); | |
200 | + struct request_queue *this_q = icq->q; | |
201 | + | |
202 | + if (this_q != last_q) { | |
203 | + if (last_q && last_q != locked_q) | |
204 | + spin_unlock(last_q->queue_lock); | |
205 | + last_q = NULL; | |
206 | + | |
207 | + if (!spin_trylock(this_q->queue_lock)) | |
208 | + break; | |
209 | + last_q = this_q; | |
210 | + continue; | |
211 | + } | |
212 | + ioc_exit_icq(icq); | |
63 | 213 | } |
64 | - rcu_read_unlock(); | |
214 | + | |
215 | + if (last_q && last_q != locked_q) | |
216 | + spin_unlock(last_q->queue_lock); | |
217 | + | |
218 | + spin_unlock_irqrestore(&ioc->lock, flags); | |
219 | + | |
220 | + /* if no icq is left, we're done; otherwise, kick release_work */ | |
221 | + if (hlist_empty(&ioc->icq_list)) | |
222 | + kmem_cache_free(iocontext_cachep, ioc); | |
223 | + else | |
224 | + schedule_work(&ioc->release_work); | |
65 | 225 | } |
226 | +EXPORT_SYMBOL(put_io_context); | |
66 | 227 | |
67 | 228 | /* Called by the exiting task */ |
68 | 229 | void exit_io_context(struct task_struct *task) |
69 | 230 | |
70 | 231 | |
71 | 232 | |
72 | 233 | |
73 | 234 | |
74 | 235 | |
75 | 236 | |
76 | 237 | |
77 | 238 | |
78 | 239 | |
79 | 240 | |
80 | 241 | |
81 | 242 | |
82 | 243 | |
83 | 244 | |
84 | 245 | |
85 | 246 | |
86 | 247 | |
87 | 248 | |
88 | 249 | |
89 | 250 | |
... | ... | @@ -74,86 +235,240 @@ |
74 | 235 | task->io_context = NULL; |
75 | 236 | task_unlock(task); |
76 | 237 | |
77 | - if (atomic_dec_and_test(&ioc->nr_tasks)) | |
78 | - cfq_exit(ioc); | |
238 | + atomic_dec(&ioc->nr_tasks); | |
239 | + put_io_context(ioc, NULL); | |
240 | +} | |
79 | 241 | |
80 | - put_io_context(ioc); | |
242 | +/** | |
243 | + * ioc_clear_queue - break any ioc association with the specified queue | |
244 | + * @q: request_queue being cleared | |
245 | + * | |
246 | + * Walk @q->icq_list and exit all io_cq's. Must be called with @q locked. | |
247 | + */ | |
248 | +void ioc_clear_queue(struct request_queue *q) | |
249 | +{ | |
250 | + lockdep_assert_held(q->queue_lock); | |
251 | + | |
252 | + while (!list_empty(&q->icq_list)) { | |
253 | + struct io_cq *icq = list_entry(q->icq_list.next, | |
254 | + struct io_cq, q_node); | |
255 | + struct io_context *ioc = icq->ioc; | |
256 | + | |
257 | + spin_lock(&ioc->lock); | |
258 | + ioc_exit_icq(icq); | |
259 | + spin_unlock(&ioc->lock); | |
260 | + } | |
81 | 261 | } |
82 | 262 | |
83 | -struct io_context *alloc_io_context(gfp_t gfp_flags, int node) | |
263 | +void create_io_context_slowpath(struct task_struct *task, gfp_t gfp_flags, | |
264 | + int node) | |
84 | 265 | { |
85 | 266 | struct io_context *ioc; |
86 | 267 | |
87 | - ioc = kmem_cache_alloc_node(iocontext_cachep, gfp_flags, node); | |
88 | - if (ioc) { | |
89 | - atomic_long_set(&ioc->refcount, 1); | |
90 | - atomic_set(&ioc->nr_tasks, 1); | |
91 | - spin_lock_init(&ioc->lock); | |
92 | - ioc->ioprio_changed = 0; | |
93 | - ioc->ioprio = 0; | |
94 | - ioc->last_waited = 0; /* doesn't matter... */ | |
95 | - ioc->nr_batch_requests = 0; /* because this is 0 */ | |
96 | - INIT_RADIX_TREE(&ioc->radix_root, GFP_ATOMIC | __GFP_HIGH); | |
97 | - INIT_HLIST_HEAD(&ioc->cic_list); | |
98 | - ioc->ioc_data = NULL; | |
99 | -#if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE) | |
100 | - ioc->cgroup_changed = 0; | |
101 | -#endif | |
102 | - } | |
268 | + ioc = kmem_cache_alloc_node(iocontext_cachep, gfp_flags | __GFP_ZERO, | |
269 | + node); | |
270 | + if (unlikely(!ioc)) | |
271 | + return; | |
103 | 272 | |
104 | - return ioc; | |
273 | + /* initialize */ | |
274 | + atomic_long_set(&ioc->refcount, 1); | |
275 | + atomic_set(&ioc->nr_tasks, 1); | |
276 | + spin_lock_init(&ioc->lock); | |
277 | + INIT_RADIX_TREE(&ioc->icq_tree, GFP_ATOMIC | __GFP_HIGH); | |
278 | + INIT_HLIST_HEAD(&ioc->icq_list); | |
279 | + INIT_WORK(&ioc->release_work, ioc_release_fn); | |
280 | + | |
281 | + /* | |
282 | + * Try to install. ioc shouldn't be installed if someone else | |
283 | + * already did or @task, which isn't %current, is exiting. Note | |
284 | + * that we need to allow ioc creation on exiting %current as exit | |
285 | + * path may issue IOs from e.g. exit_files(). The exit path is | |
286 | + * responsible for not issuing IO after exit_io_context(). | |
287 | + */ | |
288 | + task_lock(task); | |
289 | + if (!task->io_context && | |
290 | + (task == current || !(task->flags & PF_EXITING))) | |
291 | + task->io_context = ioc; | |
292 | + else | |
293 | + kmem_cache_free(iocontext_cachep, ioc); | |
294 | + task_unlock(task); | |
105 | 295 | } |
106 | 296 | |
107 | -/* | |
108 | - * If the current task has no IO context then create one and initialise it. | |
109 | - * Otherwise, return its existing IO context. | |
297 | +/** | |
298 | + * get_task_io_context - get io_context of a task | |
299 | + * @task: task of interest | |
300 | + * @gfp_flags: allocation flags, used if allocation is necessary | |
301 | + * @node: allocation node, used if allocation is necessary | |
110 | 302 | * |
111 | - * This returned IO context doesn't have a specifically elevated refcount, | |
112 | - * but since the current task itself holds a reference, the context can be | |
113 | - * used in general code, so long as it stays within `current` context. | |
303 | + * Return io_context of @task. If it doesn't exist, it is created with | |
304 | + * @gfp_flags and @node. The returned io_context has its reference count | |
305 | + * incremented. | |
306 | + * | |
307 | + * This function always goes through task_lock() and it's better to use | |
308 | + * %current->io_context + get_io_context() for %current. | |
114 | 309 | */ |
115 | -struct io_context *current_io_context(gfp_t gfp_flags, int node) | |
310 | +struct io_context *get_task_io_context(struct task_struct *task, | |
311 | + gfp_t gfp_flags, int node) | |
116 | 312 | { |
117 | - struct task_struct *tsk = current; | |
118 | - struct io_context *ret; | |
313 | + struct io_context *ioc; | |
119 | 314 | |
120 | - ret = tsk->io_context; | |
121 | - if (likely(ret)) | |
122 | - return ret; | |
315 | + might_sleep_if(gfp_flags & __GFP_WAIT); | |
123 | 316 | |
124 | - ret = alloc_io_context(gfp_flags, node); | |
125 | - if (ret) { | |
126 | - /* make sure set_task_ioprio() sees the settings above */ | |
127 | - smp_wmb(); | |
128 | - tsk->io_context = ret; | |
129 | - } | |
317 | + do { | |
318 | + task_lock(task); | |
319 | + ioc = task->io_context; | |
320 | + if (likely(ioc)) { | |
321 | + get_io_context(ioc); | |
322 | + task_unlock(task); | |
323 | + return ioc; | |
324 | + } | |
325 | + task_unlock(task); | |
326 | + } while (create_io_context(task, gfp_flags, node)); | |
130 | 327 | |
131 | - return ret; | |
328 | + return NULL; | |
132 | 329 | } |
330 | +EXPORT_SYMBOL(get_task_io_context); | |
133 | 331 | |
134 | -/* | |
135 | - * If the current task has no IO context then create one and initialise it. | |
136 | - * If it does have a context, take a ref on it. | |
332 | +/** | |
333 | + * ioc_lookup_icq - lookup io_cq from ioc | |
334 | + * @ioc: the associated io_context | |
335 | + * @q: the associated request_queue | |
137 | 336 | * |
138 | - * This is always called in the context of the task which submitted the I/O. | |
337 | + * Look up io_cq associated with @ioc - @q pair from @ioc. Must be called | |
338 | + * with @q->queue_lock held. | |
139 | 339 | */ |
140 | -struct io_context *get_io_context(gfp_t gfp_flags, int node) | |
340 | +struct io_cq *ioc_lookup_icq(struct io_context *ioc, struct request_queue *q) | |
141 | 341 | { |
142 | - struct io_context *ioc = NULL; | |
342 | + struct io_cq *icq; | |
143 | 343 | |
344 | + lockdep_assert_held(q->queue_lock); | |
345 | + | |
144 | 346 | /* |
145 | - * Check for unlikely race with exiting task. ioc ref count is | |
146 | - * zero when ioc is being detached. | |
347 | + * icq's are indexed from @ioc using radix tree and hint pointer, | |
348 | + * both of which are protected with RCU. All removals are done | |
349 | + * holding both q and ioc locks, and we're holding q lock - if we | |
350 | + * find a icq which points to us, it's guaranteed to be valid. | |
147 | 351 | */ |
148 | - do { | |
149 | - ioc = current_io_context(gfp_flags, node); | |
150 | - if (unlikely(!ioc)) | |
151 | - break; | |
152 | - } while (!atomic_long_inc_not_zero(&ioc->refcount)); | |
352 | + rcu_read_lock(); | |
353 | + icq = rcu_dereference(ioc->icq_hint); | |
354 | + if (icq && icq->q == q) | |
355 | + goto out; | |
153 | 356 | |
154 | - return ioc; | |
357 | + icq = radix_tree_lookup(&ioc->icq_tree, q->id); | |
358 | + if (icq && icq->q == q) | |
359 | + rcu_assign_pointer(ioc->icq_hint, icq); /* allowed to race */ | |
360 | + else | |
361 | + icq = NULL; | |
362 | +out: | |
363 | + rcu_read_unlock(); | |
364 | + return icq; | |
155 | 365 | } |
156 | -EXPORT_SYMBOL(get_io_context); | |
366 | +EXPORT_SYMBOL(ioc_lookup_icq); | |
367 | + | |
368 | +/** | |
369 | + * ioc_create_icq - create and link io_cq | |
370 | + * @q: request_queue of interest | |
371 | + * @gfp_mask: allocation mask | |
372 | + * | |
373 | + * Make sure io_cq linking %current->io_context and @q exists. If either | |
374 | + * io_context and/or icq don't exist, they will be created using @gfp_mask. | |
375 | + * | |
376 | + * The caller is responsible for ensuring @ioc won't go away and @q is | |
377 | + * alive and will stay alive until this function returns. | |
378 | + */ | |
379 | +struct io_cq *ioc_create_icq(struct request_queue *q, gfp_t gfp_mask) | |
380 | +{ | |
381 | + struct elevator_type *et = q->elevator->type; | |
382 | + struct io_context *ioc; | |
383 | + struct io_cq *icq; | |
384 | + | |
385 | + /* allocate stuff */ | |
386 | + ioc = create_io_context(current, gfp_mask, q->node); | |
387 | + if (!ioc) | |
388 | + return NULL; | |
389 | + | |
390 | + icq = kmem_cache_alloc_node(et->icq_cache, gfp_mask | __GFP_ZERO, | |
391 | + q->node); | |
392 | + if (!icq) | |
393 | + return NULL; | |
394 | + | |
395 | + if (radix_tree_preload(gfp_mask) < 0) { | |
396 | + kmem_cache_free(et->icq_cache, icq); | |
397 | + return NULL; | |
398 | + } | |
399 | + | |
400 | + icq->ioc = ioc; | |
401 | + icq->q = q; | |
402 | + INIT_LIST_HEAD(&icq->q_node); | |
403 | + INIT_HLIST_NODE(&icq->ioc_node); | |
404 | + | |
405 | + /* lock both q and ioc and try to link @icq */ | |
406 | + spin_lock_irq(q->queue_lock); | |
407 | + spin_lock(&ioc->lock); | |
408 | + | |
409 | + if (likely(!radix_tree_insert(&ioc->icq_tree, q->id, icq))) { | |
410 | + hlist_add_head(&icq->ioc_node, &ioc->icq_list); | |
411 | + list_add(&icq->q_node, &q->icq_list); | |
412 | + if (et->ops.elevator_init_icq_fn) | |
413 | + et->ops.elevator_init_icq_fn(icq); | |
414 | + } else { | |
415 | + kmem_cache_free(et->icq_cache, icq); | |
416 | + icq = ioc_lookup_icq(ioc, q); | |
417 | + if (!icq) | |
418 | + printk(KERN_ERR "cfq: icq link failed!\n"); | |
419 | + } | |
420 | + | |
421 | + spin_unlock(&ioc->lock); | |
422 | + spin_unlock_irq(q->queue_lock); | |
423 | + radix_tree_preload_end(); | |
424 | + return icq; | |
425 | +} | |
426 | + | |
427 | +void ioc_set_changed(struct io_context *ioc, int which) | |
428 | +{ | |
429 | + struct io_cq *icq; | |
430 | + struct hlist_node *n; | |
431 | + | |
432 | + hlist_for_each_entry(icq, n, &ioc->icq_list, ioc_node) | |
433 | + set_bit(which, &icq->changed); | |
434 | +} | |
435 | + | |
436 | +/** | |
437 | + * ioc_ioprio_changed - notify ioprio change | |
438 | + * @ioc: io_context of interest | |
439 | + * @ioprio: new ioprio | |
440 | + * | |
441 | + * @ioc's ioprio has changed to @ioprio. Set %ICQ_IOPRIO_CHANGED for all | |
442 | + * icq's. iosched is responsible for checking the bit and applying it on | |
443 | + * request issue path. | |
444 | + */ | |
445 | +void ioc_ioprio_changed(struct io_context *ioc, int ioprio) | |
446 | +{ | |
447 | + unsigned long flags; | |
448 | + | |
449 | + spin_lock_irqsave(&ioc->lock, flags); | |
450 | + ioc->ioprio = ioprio; | |
451 | + ioc_set_changed(ioc, ICQ_IOPRIO_CHANGED); | |
452 | + spin_unlock_irqrestore(&ioc->lock, flags); | |
453 | +} | |
454 | + | |
455 | +/** | |
456 | + * ioc_cgroup_changed - notify cgroup change | |
457 | + * @ioc: io_context of interest | |
458 | + * | |
459 | + * @ioc's cgroup has changed. Set %ICQ_CGROUP_CHANGED for all icq's. | |
460 | + * iosched is responsible for checking the bit and applying it on request | |
461 | + * issue path. | |
462 | + */ | |
463 | +void ioc_cgroup_changed(struct io_context *ioc) | |
464 | +{ | |
465 | + unsigned long flags; | |
466 | + | |
467 | + spin_lock_irqsave(&ioc->lock, flags); | |
468 | + ioc_set_changed(ioc, ICQ_CGROUP_CHANGED); | |
469 | + spin_unlock_irqrestore(&ioc->lock, flags); | |
470 | +} | |
471 | +EXPORT_SYMBOL(ioc_cgroup_changed); | |
157 | 472 | |
158 | 473 | static int __init blk_ioc_init(void) |
159 | 474 | { |
block/blk-settings.c
... | ... | @@ -104,9 +104,7 @@ |
104 | 104 | * @lim: the queue_limits structure to reset |
105 | 105 | * |
106 | 106 | * Description: |
107 | - * Returns a queue_limit struct to its default state. Can be used by | |
108 | - * stacking drivers like DM that stage table swaps and reuse an | |
109 | - * existing device queue. | |
107 | + * Returns a queue_limit struct to its default state. | |
110 | 108 | */ |
111 | 109 | void blk_set_default_limits(struct queue_limits *lim) |
112 | 110 | { |
113 | 111 | |
... | ... | @@ -114,13 +112,12 @@ |
114 | 112 | lim->max_integrity_segments = 0; |
115 | 113 | lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK; |
116 | 114 | lim->max_segment_size = BLK_MAX_SEGMENT_SIZE; |
117 | - lim->max_sectors = BLK_DEF_MAX_SECTORS; | |
118 | - lim->max_hw_sectors = INT_MAX; | |
115 | + lim->max_sectors = lim->max_hw_sectors = BLK_SAFE_MAX_SECTORS; | |
119 | 116 | lim->max_discard_sectors = 0; |
120 | 117 | lim->discard_granularity = 0; |
121 | 118 | lim->discard_alignment = 0; |
122 | 119 | lim->discard_misaligned = 0; |
123 | - lim->discard_zeroes_data = 1; | |
120 | + lim->discard_zeroes_data = 0; | |
124 | 121 | lim->logical_block_size = lim->physical_block_size = lim->io_min = 512; |
125 | 122 | lim->bounce_pfn = (unsigned long)(BLK_BOUNCE_ANY >> PAGE_SHIFT); |
126 | 123 | lim->alignment_offset = 0; |
... | ... | @@ -131,6 +128,27 @@ |
131 | 128 | EXPORT_SYMBOL(blk_set_default_limits); |
132 | 129 | |
133 | 130 | /** |
131 | + * blk_set_stacking_limits - set default limits for stacking devices | |
132 | + * @lim: the queue_limits structure to reset | |
133 | + * | |
134 | + * Description: | |
135 | + * Returns a queue_limit struct to its default state. Should be used | |
136 | + * by stacking drivers like DM that have no internal limits. | |
137 | + */ | |
138 | +void blk_set_stacking_limits(struct queue_limits *lim) | |
139 | +{ | |
140 | + blk_set_default_limits(lim); | |
141 | + | |
142 | + /* Inherit limits from component devices */ | |
143 | + lim->discard_zeroes_data = 1; | |
144 | + lim->max_segments = USHRT_MAX; | |
145 | + lim->max_hw_sectors = UINT_MAX; | |
146 | + | |
147 | + lim->max_sectors = BLK_DEF_MAX_SECTORS; | |
148 | +} | |
149 | +EXPORT_SYMBOL(blk_set_stacking_limits); | |
150 | + | |
151 | +/** | |
134 | 152 | * blk_queue_make_request - define an alternate make_request function for a device |
135 | 153 | * @q: the request queue for the device to be affected |
136 | 154 | * @mfn: the alternate make_request function |
... | ... | @@ -165,8 +183,6 @@ |
165 | 183 | q->nr_batching = BLK_BATCH_REQ; |
166 | 184 | |
167 | 185 | blk_set_default_limits(&q->limits); |
168 | - blk_queue_max_hw_sectors(q, BLK_SAFE_MAX_SECTORS); | |
169 | - q->limits.discard_zeroes_data = 0; | |
170 | 186 | |
171 | 187 | /* |
172 | 188 | * by default assume old behaviour and bounce for any highmem page |
block/blk-sysfs.c
... | ... | @@ -425,7 +425,7 @@ |
425 | 425 | if (!entry->show) |
426 | 426 | return -EIO; |
427 | 427 | mutex_lock(&q->sysfs_lock); |
428 | - if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)) { | |
428 | + if (blk_queue_dead(q)) { | |
429 | 429 | mutex_unlock(&q->sysfs_lock); |
430 | 430 | return -ENOENT; |
431 | 431 | } |
... | ... | @@ -447,7 +447,7 @@ |
447 | 447 | |
448 | 448 | q = container_of(kobj, struct request_queue, kobj); |
449 | 449 | mutex_lock(&q->sysfs_lock); |
450 | - if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)) { | |
450 | + if (blk_queue_dead(q)) { | |
451 | 451 | mutex_unlock(&q->sysfs_lock); |
452 | 452 | return -ENOENT; |
453 | 453 | } |
454 | 454 | |
... | ... | @@ -479,8 +479,12 @@ |
479 | 479 | |
480 | 480 | blk_sync_queue(q); |
481 | 481 | |
482 | - if (q->elevator) | |
482 | + if (q->elevator) { | |
483 | + spin_lock_irq(q->queue_lock); | |
484 | + ioc_clear_queue(q); | |
485 | + spin_unlock_irq(q->queue_lock); | |
483 | 486 | elevator_exit(q->elevator); |
487 | + } | |
484 | 488 | |
485 | 489 | blk_throtl_exit(q); |
486 | 490 | |
... | ... | @@ -494,6 +498,8 @@ |
494 | 498 | blk_trace_shutdown(q); |
495 | 499 | |
496 | 500 | bdi_destroy(&q->backing_dev_info); |
501 | + | |
502 | + ida_simple_remove(&blk_queue_ida, q->id); | |
497 | 503 | kmem_cache_free(blk_requestq_cachep, q); |
498 | 504 | } |
499 | 505 |
block/blk-throttle.c
... | ... | @@ -310,7 +310,7 @@ |
310 | 310 | struct request_queue *q = td->queue; |
311 | 311 | |
312 | 312 | /* no throttling for dead queue */ |
313 | - if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) | |
313 | + if (unlikely(blk_queue_dead(q))) | |
314 | 314 | return NULL; |
315 | 315 | |
316 | 316 | rcu_read_lock(); |
... | ... | @@ -335,7 +335,7 @@ |
335 | 335 | spin_lock_irq(q->queue_lock); |
336 | 336 | |
337 | 337 | /* Make sure @q is still alive */ |
338 | - if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) { | |
338 | + if (unlikely(blk_queue_dead(q))) { | |
339 | 339 | kfree(tg); |
340 | 340 | return NULL; |
341 | 341 | } |
block/blk.h
1 | 1 | #ifndef BLK_INTERNAL_H |
2 | 2 | #define BLK_INTERNAL_H |
3 | 3 | |
4 | +#include <linux/idr.h> | |
5 | + | |
4 | 6 | /* Amount of time in which a process may batch requests */ |
5 | 7 | #define BLK_BATCH_TIME (HZ/50UL) |
6 | 8 | |
7 | 9 | |
... | ... | @@ -9,7 +11,13 @@ |
9 | 11 | |
10 | 12 | extern struct kmem_cache *blk_requestq_cachep; |
11 | 13 | extern struct kobj_type blk_queue_ktype; |
14 | +extern struct ida blk_queue_ida; | |
12 | 15 | |
16 | +static inline void __blk_get_queue(struct request_queue *q) | |
17 | +{ | |
18 | + kobject_get(&q->kobj); | |
19 | +} | |
20 | + | |
13 | 21 | void init_request_from_bio(struct request *req, struct bio *bio); |
14 | 22 | void blk_rq_bio_prep(struct request_queue *q, struct request *rq, |
15 | 23 | struct bio *bio); |
... | ... | @@ -85,8 +93,8 @@ |
85 | 93 | q->flush_queue_delayed = 1; |
86 | 94 | return NULL; |
87 | 95 | } |
88 | - if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags) || | |
89 | - !q->elevator->ops->elevator_dispatch_fn(q, 0)) | |
96 | + if (unlikely(blk_queue_dead(q)) || | |
97 | + !q->elevator->type->ops.elevator_dispatch_fn(q, 0)) | |
90 | 98 | return NULL; |
91 | 99 | } |
92 | 100 | } |
93 | 101 | |
... | ... | @@ -95,16 +103,16 @@ |
95 | 103 | { |
96 | 104 | struct elevator_queue *e = q->elevator; |
97 | 105 | |
98 | - if (e->ops->elevator_activate_req_fn) | |
99 | - e->ops->elevator_activate_req_fn(q, rq); | |
106 | + if (e->type->ops.elevator_activate_req_fn) | |
107 | + e->type->ops.elevator_activate_req_fn(q, rq); | |
100 | 108 | } |
101 | 109 | |
102 | 110 | static inline void elv_deactivate_rq(struct request_queue *q, struct request *rq) |
103 | 111 | { |
104 | 112 | struct elevator_queue *e = q->elevator; |
105 | 113 | |
106 | - if (e->ops->elevator_deactivate_req_fn) | |
107 | - e->ops->elevator_deactivate_req_fn(q, rq); | |
114 | + if (e->type->ops.elevator_deactivate_req_fn) | |
115 | + e->type->ops.elevator_deactivate_req_fn(q, rq); | |
108 | 116 | } |
109 | 117 | |
110 | 118 | #ifdef CONFIG_FAIL_IO_TIMEOUT |
... | ... | @@ -119,8 +127,6 @@ |
119 | 127 | } |
120 | 128 | #endif |
121 | 129 | |
122 | -struct io_context *current_io_context(gfp_t gfp_flags, int node); | |
123 | - | |
124 | 130 | int ll_back_merge_fn(struct request_queue *q, struct request *req, |
125 | 131 | struct bio *bio); |
126 | 132 | int ll_front_merge_fn(struct request_queue *q, struct request *req, |
... | ... | @@ -189,6 +195,42 @@ |
189 | 195 | (rq->cmd_flags & REQ_DISCARD)); |
190 | 196 | } |
191 | 197 | |
198 | +/* | |
199 | + * Internal io_context interface | |
200 | + */ | |
201 | +void get_io_context(struct io_context *ioc); | |
202 | +struct io_cq *ioc_lookup_icq(struct io_context *ioc, struct request_queue *q); | |
203 | +struct io_cq *ioc_create_icq(struct request_queue *q, gfp_t gfp_mask); | |
204 | +void ioc_clear_queue(struct request_queue *q); | |
205 | + | |
206 | +void create_io_context_slowpath(struct task_struct *task, gfp_t gfp_mask, | |
207 | + int node); | |
208 | + | |
209 | +/** | |
210 | + * create_io_context - try to create task->io_context | |
211 | + * @task: target task | |
212 | + * @gfp_mask: allocation mask | |
213 | + * @node: allocation node | |
214 | + * | |
215 | + * If @task->io_context is %NULL, allocate a new io_context and install it. | |
216 | + * Returns the current @task->io_context which may be %NULL if allocation | |
217 | + * failed. | |
218 | + * | |
219 | + * Note that this function can't be called with IRQ disabled because | |
220 | + * task_lock which protects @task->io_context is IRQ-unsafe. | |
221 | + */ | |
222 | +static inline struct io_context *create_io_context(struct task_struct *task, | |
223 | + gfp_t gfp_mask, int node) | |
224 | +{ | |
225 | + WARN_ON_ONCE(irqs_disabled()); | |
226 | + if (unlikely(!task->io_context)) | |
227 | + create_io_context_slowpath(task, gfp_mask, node); | |
228 | + return task->io_context; | |
229 | +} | |
230 | + | |
231 | +/* | |
232 | + * Internal throttling interface | |
233 | + */ | |
192 | 234 | #ifdef CONFIG_BLK_DEV_THROTTLING |
193 | 235 | extern bool blk_throtl_bio(struct request_queue *q, struct bio *bio); |
194 | 236 | extern void blk_throtl_drain(struct request_queue *q); |
block/bsg.c
... | ... | @@ -769,12 +769,10 @@ |
769 | 769 | struct file *file) |
770 | 770 | { |
771 | 771 | struct bsg_device *bd; |
772 | - int ret; | |
773 | 772 | #ifdef BSG_DEBUG |
774 | 773 | unsigned char buf[32]; |
775 | 774 | #endif |
776 | - ret = blk_get_queue(rq); | |
777 | - if (ret) | |
775 | + if (!blk_get_queue(rq)) | |
778 | 776 | return ERR_PTR(-ENXIO); |
779 | 777 | |
780 | 778 | bd = bsg_alloc_device(); |
block/cfq-iosched.c
Changes suppressed. Click to show
... | ... | @@ -14,6 +14,7 @@ |
14 | 14 | #include <linux/rbtree.h> |
15 | 15 | #include <linux/ioprio.h> |
16 | 16 | #include <linux/blktrace_api.h> |
17 | +#include "blk.h" | |
17 | 18 | #include "cfq.h" |
18 | 19 | |
19 | 20 | /* |
20 | 21 | |
21 | 22 | |
... | ... | @@ -53,21 +54,12 @@ |
53 | 54 | #define CFQQ_SECT_THR_NONROT (sector_t)(2 * 32) |
54 | 55 | #define CFQQ_SEEKY(cfqq) (hweight32(cfqq->seek_history) > 32/8) |
55 | 56 | |
56 | -#define RQ_CIC(rq) \ | |
57 | - ((struct cfq_io_context *) (rq)->elevator_private[0]) | |
58 | -#define RQ_CFQQ(rq) (struct cfq_queue *) ((rq)->elevator_private[1]) | |
59 | -#define RQ_CFQG(rq) (struct cfq_group *) ((rq)->elevator_private[2]) | |
57 | +#define RQ_CIC(rq) icq_to_cic((rq)->elv.icq) | |
58 | +#define RQ_CFQQ(rq) (struct cfq_queue *) ((rq)->elv.priv[0]) | |
59 | +#define RQ_CFQG(rq) (struct cfq_group *) ((rq)->elv.priv[1]) | |
60 | 60 | |
61 | 61 | static struct kmem_cache *cfq_pool; |
62 | -static struct kmem_cache *cfq_ioc_pool; | |
63 | 62 | |
64 | -static DEFINE_PER_CPU(unsigned long, cfq_ioc_count); | |
65 | -static struct completion *ioc_gone; | |
66 | -static DEFINE_SPINLOCK(ioc_gone_lock); | |
67 | - | |
68 | -static DEFINE_SPINLOCK(cic_index_lock); | |
69 | -static DEFINE_IDA(cic_index_ida); | |
70 | - | |
71 | 63 | #define CFQ_PRIO_LISTS IOPRIO_BE_NR |
72 | 64 | #define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE) |
73 | 65 | #define cfq_class_rt(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_RT) |
... | ... | @@ -75,6 +67,14 @@ |
75 | 67 | #define sample_valid(samples) ((samples) > 80) |
76 | 68 | #define rb_entry_cfqg(node) rb_entry((node), struct cfq_group, rb_node) |
77 | 69 | |
70 | +struct cfq_ttime { | |
71 | + unsigned long last_end_request; | |
72 | + | |
73 | + unsigned long ttime_total; | |
74 | + unsigned long ttime_samples; | |
75 | + unsigned long ttime_mean; | |
76 | +}; | |
77 | + | |
78 | 78 | /* |
79 | 79 | * Most of our rbtree usage is for sorting with min extraction, so |
80 | 80 | * if we cache the leftmost node we don't have to walk down the tree |
... | ... | @@ -216,6 +216,12 @@ |
216 | 216 | struct cfq_ttime ttime; |
217 | 217 | }; |
218 | 218 | |
219 | +struct cfq_io_cq { | |
220 | + struct io_cq icq; /* must be the first member */ | |
221 | + struct cfq_queue *cfqq[2]; | |
222 | + struct cfq_ttime ttime; | |
223 | +}; | |
224 | + | |
219 | 225 | /* |
220 | 226 | * Per block device queue structure |
221 | 227 | */ |
... | ... | @@ -267,7 +273,7 @@ |
267 | 273 | struct work_struct unplug_work; |
268 | 274 | |
269 | 275 | struct cfq_queue *active_queue; |
270 | - struct cfq_io_context *active_cic; | |
276 | + struct cfq_io_cq *active_cic; | |
271 | 277 | |
272 | 278 | /* |
273 | 279 | * async queue for each priority case |
... | ... | @@ -290,9 +296,6 @@ |
290 | 296 | unsigned int cfq_group_idle; |
291 | 297 | unsigned int cfq_latency; |
292 | 298 | |
293 | - unsigned int cic_index; | |
294 | - struct list_head cic_list; | |
295 | - | |
296 | 299 | /* |
297 | 300 | * Fallback dummy cfqq for extreme OOM conditions |
298 | 301 | */ |
299 | 302 | |
300 | 303 | |
301 | 304 | |
302 | 305 | |
303 | 306 | |
304 | 307 | |
305 | 308 | |
306 | 309 | |
307 | 310 | |
... | ... | @@ -464,37 +467,35 @@ |
464 | 467 | static void cfq_dispatch_insert(struct request_queue *, struct request *); |
465 | 468 | static struct cfq_queue *cfq_get_queue(struct cfq_data *, bool, |
466 | 469 | struct io_context *, gfp_t); |
467 | -static struct cfq_io_context *cfq_cic_lookup(struct cfq_data *, | |
468 | - struct io_context *); | |
469 | 470 | |
470 | -static inline struct cfq_queue *cic_to_cfqq(struct cfq_io_context *cic, | |
471 | - bool is_sync) | |
471 | +static inline struct cfq_io_cq *icq_to_cic(struct io_cq *icq) | |
472 | 472 | { |
473 | - return cic->cfqq[is_sync]; | |
473 | + /* cic->icq is the first member, %NULL will convert to %NULL */ | |
474 | + return container_of(icq, struct cfq_io_cq, icq); | |
474 | 475 | } |
475 | 476 | |
476 | -static inline void cic_set_cfqq(struct cfq_io_context *cic, | |
477 | - struct cfq_queue *cfqq, bool is_sync) | |
477 | +static inline struct cfq_io_cq *cfq_cic_lookup(struct cfq_data *cfqd, | |
478 | + struct io_context *ioc) | |
478 | 479 | { |
479 | - cic->cfqq[is_sync] = cfqq; | |
480 | + if (ioc) | |
481 | + return icq_to_cic(ioc_lookup_icq(ioc, cfqd->queue)); | |
482 | + return NULL; | |
480 | 483 | } |
481 | 484 | |
482 | -#define CIC_DEAD_KEY 1ul | |
483 | -#define CIC_DEAD_INDEX_SHIFT 1 | |
484 | - | |
485 | -static inline void *cfqd_dead_key(struct cfq_data *cfqd) | |
485 | +static inline struct cfq_queue *cic_to_cfqq(struct cfq_io_cq *cic, bool is_sync) | |
486 | 486 | { |
487 | - return (void *)(cfqd->cic_index << CIC_DEAD_INDEX_SHIFT | CIC_DEAD_KEY); | |
487 | + return cic->cfqq[is_sync]; | |
488 | 488 | } |
489 | 489 | |
490 | -static inline struct cfq_data *cic_to_cfqd(struct cfq_io_context *cic) | |
490 | +static inline void cic_set_cfqq(struct cfq_io_cq *cic, struct cfq_queue *cfqq, | |
491 | + bool is_sync) | |
491 | 492 | { |
492 | - struct cfq_data *cfqd = cic->key; | |
493 | + cic->cfqq[is_sync] = cfqq; | |
494 | +} | |
493 | 495 | |
494 | - if (unlikely((unsigned long) cfqd & CIC_DEAD_KEY)) | |
495 | - return NULL; | |
496 | - | |
497 | - return cfqd; | |
496 | +static inline struct cfq_data *cic_to_cfqd(struct cfq_io_cq *cic) | |
497 | +{ | |
498 | + return cic->icq.q->elevator->elevator_data; | |
498 | 499 | } |
499 | 500 | |
500 | 501 | /* |
... | ... | @@ -1561,7 +1562,7 @@ |
1561 | 1562 | cfq_find_rq_fmerge(struct cfq_data *cfqd, struct bio *bio) |
1562 | 1563 | { |
1563 | 1564 | struct task_struct *tsk = current; |
1564 | - struct cfq_io_context *cic; | |
1565 | + struct cfq_io_cq *cic; | |
1565 | 1566 | struct cfq_queue *cfqq; |
1566 | 1567 | |
1567 | 1568 | cic = cfq_cic_lookup(cfqd, tsk->io_context); |
... | ... | @@ -1687,7 +1688,7 @@ |
1687 | 1688 | struct bio *bio) |
1688 | 1689 | { |
1689 | 1690 | struct cfq_data *cfqd = q->elevator->elevator_data; |
1690 | - struct cfq_io_context *cic; | |
1691 | + struct cfq_io_cq *cic; | |
1691 | 1692 | struct cfq_queue *cfqq; |
1692 | 1693 | |
1693 | 1694 | /* |
1694 | 1695 | |
... | ... | @@ -1697,12 +1698,19 @@ |
1697 | 1698 | return false; |
1698 | 1699 | |
1699 | 1700 | /* |
1700 | - * Lookup the cfqq that this bio will be queued with. Allow | |
1701 | - * merge only if rq is queued there. | |
1701 | + * Lookup the cfqq that this bio will be queued with and allow | |
1702 | + * merge only if rq is queued there. This function can be called | |
1703 | + * from plug merge without queue_lock. In such cases, ioc of @rq | |
1704 | + * and %current are guaranteed to be equal. Avoid lookup which | |
1705 | + * requires queue_lock by using @rq's cic. | |
1702 | 1706 | */ |
1703 | - cic = cfq_cic_lookup(cfqd, current->io_context); | |
1704 | - if (!cic) | |
1705 | - return false; | |
1707 | + if (current->io_context == RQ_CIC(rq)->icq.ioc) { | |
1708 | + cic = RQ_CIC(rq); | |
1709 | + } else { | |
1710 | + cic = cfq_cic_lookup(cfqd, current->io_context); | |
1711 | + if (!cic) | |
1712 | + return false; | |
1713 | + } | |
1706 | 1714 | |
1707 | 1715 | cfqq = cic_to_cfqq(cic, cfq_bio_sync(bio)); |
1708 | 1716 | return cfqq == RQ_CFQQ(rq); |
... | ... | @@ -1786,7 +1794,7 @@ |
1786 | 1794 | cfqd->active_queue = NULL; |
1787 | 1795 | |
1788 | 1796 | if (cfqd->active_cic) { |
1789 | - put_io_context(cfqd->active_cic->ioc); | |
1797 | + put_io_context(cfqd->active_cic->icq.ioc, cfqd->queue); | |
1790 | 1798 | cfqd->active_cic = NULL; |
1791 | 1799 | } |
1792 | 1800 | } |
... | ... | @@ -2006,7 +2014,7 @@ |
2006 | 2014 | static void cfq_arm_slice_timer(struct cfq_data *cfqd) |
2007 | 2015 | { |
2008 | 2016 | struct cfq_queue *cfqq = cfqd->active_queue; |
2009 | - struct cfq_io_context *cic; | |
2017 | + struct cfq_io_cq *cic; | |
2010 | 2018 | unsigned long sl, group_idle = 0; |
2011 | 2019 | |
2012 | 2020 | /* |
... | ... | @@ -2041,7 +2049,7 @@ |
2041 | 2049 | * task has exited, don't wait |
2042 | 2050 | */ |
2043 | 2051 | cic = cfqd->active_cic; |
2044 | - if (!cic || !atomic_read(&cic->ioc->nr_tasks)) | |
2052 | + if (!cic || !atomic_read(&cic->icq.ioc->nr_tasks)) | |
2045 | 2053 | return; |
2046 | 2054 | |
2047 | 2055 | /* |
2048 | 2056 | |
... | ... | @@ -2592,9 +2600,9 @@ |
2592 | 2600 | cfq_dispatch_insert(cfqd->queue, rq); |
2593 | 2601 | |
2594 | 2602 | if (!cfqd->active_cic) { |
2595 | - struct cfq_io_context *cic = RQ_CIC(rq); | |
2603 | + struct cfq_io_cq *cic = RQ_CIC(rq); | |
2596 | 2604 | |
2597 | - atomic_long_inc(&cic->ioc->refcount); | |
2605 | + atomic_long_inc(&cic->icq.ioc->refcount); | |
2598 | 2606 | cfqd->active_cic = cic; |
2599 | 2607 | } |
2600 | 2608 | |
... | ... | @@ -2677,84 +2685,6 @@ |
2677 | 2685 | cfq_put_cfqg(cfqg); |
2678 | 2686 | } |
2679 | 2687 | |
2680 | -/* | |
2681 | - * Call func for each cic attached to this ioc. | |
2682 | - */ | |
2683 | -static void | |
2684 | -call_for_each_cic(struct io_context *ioc, | |
2685 | - void (*func)(struct io_context *, struct cfq_io_context *)) | |
2686 | -{ | |
2687 | - struct cfq_io_context *cic; | |
2688 | - struct hlist_node *n; | |
2689 | - | |
2690 | - rcu_read_lock(); | |
2691 | - | |
2692 | - hlist_for_each_entry_rcu(cic, n, &ioc->cic_list, cic_list) | |
2693 | - func(ioc, cic); | |
2694 | - | |
2695 | - rcu_read_unlock(); | |
2696 | -} | |
2697 | - | |
2698 | -static void cfq_cic_free_rcu(struct rcu_head *head) | |
2699 | -{ | |
2700 | - struct cfq_io_context *cic; | |
2701 | - | |
2702 | - cic = container_of(head, struct cfq_io_context, rcu_head); | |
2703 | - | |
2704 | - kmem_cache_free(cfq_ioc_pool, cic); | |
2705 | - elv_ioc_count_dec(cfq_ioc_count); | |
2706 | - | |
2707 | - if (ioc_gone) { | |
2708 | - /* | |
2709 | - * CFQ scheduler is exiting, grab exit lock and check | |
2710 | - * the pending io context count. If it hits zero, | |
2711 | - * complete ioc_gone and set it back to NULL | |
2712 | - */ | |
2713 | - spin_lock(&ioc_gone_lock); | |
2714 | - if (ioc_gone && !elv_ioc_count_read(cfq_ioc_count)) { | |
2715 | - complete(ioc_gone); | |
2716 | - ioc_gone = NULL; | |
2717 | - } | |
2718 | - spin_unlock(&ioc_gone_lock); | |
2719 | - } | |
2720 | -} | |
2721 | - | |
2722 | -static void cfq_cic_free(struct cfq_io_context *cic) | |
2723 | -{ | |
2724 | - call_rcu(&cic->rcu_head, cfq_cic_free_rcu); | |
2725 | -} | |
2726 | - | |
2727 | -static void cic_free_func(struct io_context *ioc, struct cfq_io_context *cic) | |
2728 | -{ | |
2729 | - unsigned long flags; | |
2730 | - unsigned long dead_key = (unsigned long) cic->key; | |
2731 | - | |
2732 | - BUG_ON(!(dead_key & CIC_DEAD_KEY)); | |
2733 | - | |
2734 | - spin_lock_irqsave(&ioc->lock, flags); | |
2735 | - radix_tree_delete(&ioc->radix_root, dead_key >> CIC_DEAD_INDEX_SHIFT); | |
2736 | - hlist_del_rcu(&cic->cic_list); | |
2737 | - spin_unlock_irqrestore(&ioc->lock, flags); | |
2738 | - | |
2739 | - cfq_cic_free(cic); | |
2740 | -} | |
2741 | - | |
2742 | -/* | |
2743 | - * Must be called with rcu_read_lock() held or preemption otherwise disabled. | |
2744 | - * Only two callers of this - ->dtor() which is called with the rcu_read_lock(), | |
2745 | - * and ->trim() which is called with the task lock held | |
2746 | - */ | |
2747 | -static void cfq_free_io_context(struct io_context *ioc) | |
2748 | -{ | |
2749 | - /* | |
2750 | - * ioc->refcount is zero here, or we are called from elv_unregister(), | |
2751 | - * so no more cic's are allowed to be linked into this ioc. So it | |
2752 | - * should be ok to iterate over the known list, we will see all cic's | |
2753 | - * since no new ones are added. | |
2754 | - */ | |
2755 | - call_for_each_cic(ioc, cic_free_func); | |
2756 | -} | |
2757 | - | |
2758 | 2688 | static void cfq_put_cooperator(struct cfq_queue *cfqq) |
2759 | 2689 | { |
2760 | 2690 | struct cfq_queue *__cfqq, *next; |
2761 | 2691 | |
2762 | 2692 | |
2763 | 2693 | |
2764 | 2694 | |
... | ... | @@ -2788,28 +2718,18 @@ |
2788 | 2718 | cfq_put_queue(cfqq); |
2789 | 2719 | } |
2790 | 2720 | |
2791 | -static void __cfq_exit_single_io_context(struct cfq_data *cfqd, | |
2792 | - struct cfq_io_context *cic) | |
2721 | +static void cfq_init_icq(struct io_cq *icq) | |
2793 | 2722 | { |
2794 | - struct io_context *ioc = cic->ioc; | |
2723 | + struct cfq_io_cq *cic = icq_to_cic(icq); | |
2795 | 2724 | |
2796 | - list_del_init(&cic->queue_list); | |
2725 | + cic->ttime.last_end_request = jiffies; | |
2726 | +} | |
2797 | 2727 | |
2798 | - /* | |
2799 | - * Make sure dead mark is seen for dead queues | |
2800 | - */ | |
2801 | - smp_wmb(); | |
2802 | - cic->key = cfqd_dead_key(cfqd); | |
2728 | +static void cfq_exit_icq(struct io_cq *icq) | |
2729 | +{ | |
2730 | + struct cfq_io_cq *cic = icq_to_cic(icq); | |
2731 | + struct cfq_data *cfqd = cic_to_cfqd(cic); | |
2803 | 2732 | |
2804 | - rcu_read_lock(); | |
2805 | - if (rcu_dereference(ioc->ioc_data) == cic) { | |
2806 | - rcu_read_unlock(); | |
2807 | - spin_lock(&ioc->lock); | |
2808 | - rcu_assign_pointer(ioc->ioc_data, NULL); | |
2809 | - spin_unlock(&ioc->lock); | |
2810 | - } else | |
2811 | - rcu_read_unlock(); | |
2812 | - | |
2813 | 2733 | if (cic->cfqq[BLK_RW_ASYNC]) { |
2814 | 2734 | cfq_exit_cfqq(cfqd, cic->cfqq[BLK_RW_ASYNC]); |
2815 | 2735 | cic->cfqq[BLK_RW_ASYNC] = NULL; |
... | ... | @@ -2821,57 +2741,6 @@ |
2821 | 2741 | } |
2822 | 2742 | } |
2823 | 2743 | |
2824 | -static void cfq_exit_single_io_context(struct io_context *ioc, | |
2825 | - struct cfq_io_context *cic) | |
2826 | -{ | |
2827 | - struct cfq_data *cfqd = cic_to_cfqd(cic); | |
2828 | - | |
2829 | - if (cfqd) { | |
2830 | - struct request_queue *q = cfqd->queue; | |
2831 | - unsigned long flags; | |
2832 | - | |
2833 | - spin_lock_irqsave(q->queue_lock, flags); | |
2834 | - | |
2835 | - /* | |
2836 | - * Ensure we get a fresh copy of the ->key to prevent | |
2837 | - * race between exiting task and queue | |
2838 | - */ | |
2839 | - smp_read_barrier_depends(); | |
2840 | - if (cic->key == cfqd) | |
2841 | - __cfq_exit_single_io_context(cfqd, cic); | |
2842 | - | |
2843 | - spin_unlock_irqrestore(q->queue_lock, flags); | |
2844 | - } | |
2845 | -} | |
2846 | - | |
2847 | -/* | |
2848 | - * The process that ioc belongs to has exited, we need to clean up | |
2849 | - * and put the internal structures we have that belongs to that process. | |
2850 | - */ | |
2851 | -static void cfq_exit_io_context(struct io_context *ioc) | |
2852 | -{ | |
2853 | - call_for_each_cic(ioc, cfq_exit_single_io_context); | |
2854 | -} | |
2855 | - | |
2856 | -static struct cfq_io_context * | |
2857 | -cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) | |
2858 | -{ | |
2859 | - struct cfq_io_context *cic; | |
2860 | - | |
2861 | - cic = kmem_cache_alloc_node(cfq_ioc_pool, gfp_mask | __GFP_ZERO, | |
2862 | - cfqd->queue->node); | |
2863 | - if (cic) { | |
2864 | - cic->ttime.last_end_request = jiffies; | |
2865 | - INIT_LIST_HEAD(&cic->queue_list); | |
2866 | - INIT_HLIST_NODE(&cic->cic_list); | |
2867 | - cic->dtor = cfq_free_io_context; | |
2868 | - cic->exit = cfq_exit_io_context; | |
2869 | - elv_ioc_count_inc(cfq_ioc_count); | |
2870 | - } | |
2871 | - | |
2872 | - return cic; | |
2873 | -} | |
2874 | - | |
2875 | 2744 | static void cfq_init_prio_data(struct cfq_queue *cfqq, struct io_context *ioc) |
2876 | 2745 | { |
2877 | 2746 | struct task_struct *tsk = current; |
2878 | 2747 | |
2879 | 2748 | |
2880 | 2749 | |
... | ... | @@ -2914,21 +2783,18 @@ |
2914 | 2783 | cfq_clear_cfqq_prio_changed(cfqq); |
2915 | 2784 | } |
2916 | 2785 | |
2917 | -static void changed_ioprio(struct io_context *ioc, struct cfq_io_context *cic) | |
2786 | +static void changed_ioprio(struct cfq_io_cq *cic) | |
2918 | 2787 | { |
2919 | 2788 | struct cfq_data *cfqd = cic_to_cfqd(cic); |
2920 | 2789 | struct cfq_queue *cfqq; |
2921 | - unsigned long flags; | |
2922 | 2790 | |
2923 | 2791 | if (unlikely(!cfqd)) |
2924 | 2792 | return; |
2925 | 2793 | |
2926 | - spin_lock_irqsave(cfqd->queue->queue_lock, flags); | |
2927 | - | |
2928 | 2794 | cfqq = cic->cfqq[BLK_RW_ASYNC]; |
2929 | 2795 | if (cfqq) { |
2930 | 2796 | struct cfq_queue *new_cfqq; |
2931 | - new_cfqq = cfq_get_queue(cfqd, BLK_RW_ASYNC, cic->ioc, | |
2797 | + new_cfqq = cfq_get_queue(cfqd, BLK_RW_ASYNC, cic->icq.ioc, | |
2932 | 2798 | GFP_ATOMIC); |
2933 | 2799 | if (new_cfqq) { |
2934 | 2800 | cic->cfqq[BLK_RW_ASYNC] = new_cfqq; |
2935 | 2801 | |
... | ... | @@ -2939,16 +2805,8 @@ |
2939 | 2805 | cfqq = cic->cfqq[BLK_RW_SYNC]; |
2940 | 2806 | if (cfqq) |
2941 | 2807 | cfq_mark_cfqq_prio_changed(cfqq); |
2942 | - | |
2943 | - spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); | |
2944 | 2808 | } |
2945 | 2809 | |
2946 | -static void cfq_ioc_set_ioprio(struct io_context *ioc) | |
2947 | -{ | |
2948 | - call_for_each_cic(ioc, changed_ioprio); | |
2949 | - ioc->ioprio_changed = 0; | |
2950 | -} | |
2951 | - | |
2952 | 2810 | static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq, |
2953 | 2811 | pid_t pid, bool is_sync) |
2954 | 2812 | { |
2955 | 2813 | |
... | ... | @@ -2970,11 +2828,10 @@ |
2970 | 2828 | } |
2971 | 2829 | |
2972 | 2830 | #ifdef CONFIG_CFQ_GROUP_IOSCHED |
2973 | -static void changed_cgroup(struct io_context *ioc, struct cfq_io_context *cic) | |
2831 | +static void changed_cgroup(struct cfq_io_cq *cic) | |
2974 | 2832 | { |
2975 | 2833 | struct cfq_queue *sync_cfqq = cic_to_cfqq(cic, 1); |
2976 | 2834 | struct cfq_data *cfqd = cic_to_cfqd(cic); |
2977 | - unsigned long flags; | |
2978 | 2835 | struct request_queue *q; |
2979 | 2836 | |
2980 | 2837 | if (unlikely(!cfqd)) |
... | ... | @@ -2982,8 +2839,6 @@ |
2982 | 2839 | |
2983 | 2840 | q = cfqd->queue; |
2984 | 2841 | |
2985 | - spin_lock_irqsave(q->queue_lock, flags); | |
2986 | - | |
2987 | 2842 | if (sync_cfqq) { |
2988 | 2843 | /* |
2989 | 2844 | * Drop reference to sync queue. A new sync queue will be |
2990 | 2845 | |
... | ... | @@ -2993,15 +2848,7 @@ |
2993 | 2848 | cic_set_cfqq(cic, NULL, 1); |
2994 | 2849 | cfq_put_queue(sync_cfqq); |
2995 | 2850 | } |
2996 | - | |
2997 | - spin_unlock_irqrestore(q->queue_lock, flags); | |
2998 | 2851 | } |
2999 | - | |
3000 | -static void cfq_ioc_set_cgroup(struct io_context *ioc) | |
3001 | -{ | |
3002 | - call_for_each_cic(ioc, changed_cgroup); | |
3003 | - ioc->cgroup_changed = 0; | |
3004 | -} | |
3005 | 2852 | #endif /* CONFIG_CFQ_GROUP_IOSCHED */ |
3006 | 2853 | |
3007 | 2854 | static struct cfq_queue * |
... | ... | @@ -3009,7 +2856,7 @@ |
3009 | 2856 | struct io_context *ioc, gfp_t gfp_mask) |
3010 | 2857 | { |
3011 | 2858 | struct cfq_queue *cfqq, *new_cfqq = NULL; |
3012 | - struct cfq_io_context *cic; | |
2859 | + struct cfq_io_cq *cic; | |
3013 | 2860 | struct cfq_group *cfqg; |
3014 | 2861 | |
3015 | 2862 | retry: |
3016 | 2863 | |
... | ... | @@ -3100,161 +2947,7 @@ |
3100 | 2947 | return cfqq; |
3101 | 2948 | } |
3102 | 2949 | |
3103 | -/* | |
3104 | - * We drop cfq io contexts lazily, so we may find a dead one. | |
3105 | - */ | |
3106 | 2950 | static void |
3107 | -cfq_drop_dead_cic(struct cfq_data *cfqd, struct io_context *ioc, | |
3108 | - struct cfq_io_context *cic) | |
3109 | -{ | |
3110 | - unsigned long flags; | |
3111 | - | |
3112 | - WARN_ON(!list_empty(&cic->queue_list)); | |
3113 | - BUG_ON(cic->key != cfqd_dead_key(cfqd)); | |
3114 | - | |
3115 | - spin_lock_irqsave(&ioc->lock, flags); | |
3116 | - | |
3117 | - BUG_ON(rcu_dereference_check(ioc->ioc_data, | |
3118 | - lockdep_is_held(&ioc->lock)) == cic); | |
3119 | - | |
3120 | - radix_tree_delete(&ioc->radix_root, cfqd->cic_index); | |
3121 | - hlist_del_rcu(&cic->cic_list); | |
3122 | - spin_unlock_irqrestore(&ioc->lock, flags); | |
3123 | - | |
3124 | - cfq_cic_free(cic); | |
3125 | -} | |
3126 | - | |
3127 | -static struct cfq_io_context * | |
3128 | -cfq_cic_lookup(struct cfq_data *cfqd, struct io_context *ioc) | |
3129 | -{ | |
3130 | - struct cfq_io_context *cic; | |
3131 | - unsigned long flags; | |
3132 | - | |
3133 | - if (unlikely(!ioc)) | |
3134 | - return NULL; | |
3135 | - | |
3136 | - rcu_read_lock(); | |
3137 | - | |
3138 | - /* | |
3139 | - * we maintain a last-hit cache, to avoid browsing over the tree | |
3140 | - */ | |
3141 | - cic = rcu_dereference(ioc->ioc_data); | |
3142 | - if (cic && cic->key == cfqd) { | |
3143 | - rcu_read_unlock(); | |
3144 | - return cic; | |
3145 | - } | |
3146 | - | |
3147 | - do { | |
3148 | - cic = radix_tree_lookup(&ioc->radix_root, cfqd->cic_index); | |
3149 | - rcu_read_unlock(); | |
3150 | - if (!cic) | |
3151 | - break; | |
3152 | - if (unlikely(cic->key != cfqd)) { | |
3153 | - cfq_drop_dead_cic(cfqd, ioc, cic); | |
3154 | - rcu_read_lock(); | |
3155 | - continue; | |
3156 | - } | |
3157 | - | |
3158 | - spin_lock_irqsave(&ioc->lock, flags); | |
3159 | - rcu_assign_pointer(ioc->ioc_data, cic); | |
3160 | - spin_unlock_irqrestore(&ioc->lock, flags); | |
3161 | - break; | |
3162 | - } while (1); | |
3163 | - | |
3164 | - return cic; | |
3165 | -} | |
3166 | - | |
3167 | -/* | |
3168 | - * Add cic into ioc, using cfqd as the search key. This enables us to lookup | |
3169 | - * the process specific cfq io context when entered from the block layer. | |
3170 | - * Also adds the cic to a per-cfqd list, used when this queue is removed. | |
3171 | - */ | |
3172 | -static int cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc, | |
3173 | - struct cfq_io_context *cic, gfp_t gfp_mask) | |
3174 | -{ | |
3175 | - unsigned long flags; | |
3176 | - int ret; | |
3177 | - | |
3178 | - ret = radix_tree_preload(gfp_mask); | |
3179 | - if (!ret) { | |
3180 | - cic->ioc = ioc; | |
3181 | - cic->key = cfqd; | |
3182 | - | |
3183 | - spin_lock_irqsave(&ioc->lock, flags); | |
3184 | - ret = radix_tree_insert(&ioc->radix_root, | |
3185 | - cfqd->cic_index, cic); | |
3186 | - if (!ret) | |
3187 | - hlist_add_head_rcu(&cic->cic_list, &ioc->cic_list); | |
3188 | - spin_unlock_irqrestore(&ioc->lock, flags); | |
3189 | - | |
3190 | - radix_tree_preload_end(); | |
3191 | - | |
3192 | - if (!ret) { | |
3193 | - spin_lock_irqsave(cfqd->queue->queue_lock, flags); | |
3194 | - list_add(&cic->queue_list, &cfqd->cic_list); | |
3195 | - spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); | |
3196 | - } | |
3197 | - } | |
3198 | - | |
3199 | - if (ret && ret != -EEXIST) | |
3200 | - printk(KERN_ERR "cfq: cic link failed!\n"); | |
3201 | - | |
3202 | - return ret; | |
3203 | -} | |
3204 | - | |
3205 | -/* | |
3206 | - * Setup general io context and cfq io context. There can be several cfq | |
3207 | - * io contexts per general io context, if this process is doing io to more | |
3208 | - * than one device managed by cfq. | |
3209 | - */ | |
3210 | -static struct cfq_io_context * | |
3211 | -cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) | |
3212 | -{ | |
3213 | - struct io_context *ioc = NULL; | |
3214 | - struct cfq_io_context *cic; | |
3215 | - int ret; | |
3216 | - | |
3217 | - might_sleep_if(gfp_mask & __GFP_WAIT); | |
3218 | - | |
3219 | - ioc = get_io_context(gfp_mask, cfqd->queue->node); | |
3220 | - if (!ioc) | |
3221 | - return NULL; | |
3222 | - | |
3223 | -retry: | |
3224 | - cic = cfq_cic_lookup(cfqd, ioc); | |
3225 | - if (cic) | |
3226 | - goto out; | |
3227 | - | |
3228 | - cic = cfq_alloc_io_context(cfqd, gfp_mask); | |
3229 | - if (cic == NULL) | |
3230 | - goto err; | |
3231 | - | |
3232 | - ret = cfq_cic_link(cfqd, ioc, cic, gfp_mask); | |
3233 | - if (ret == -EEXIST) { | |
3234 | - /* someone has linked cic to ioc already */ | |
3235 | - cfq_cic_free(cic); | |
3236 | - goto retry; | |
3237 | - } else if (ret) | |
3238 | - goto err_free; | |
3239 | - | |
3240 | -out: | |
3241 | - smp_read_barrier_depends(); | |
3242 | - if (unlikely(ioc->ioprio_changed)) | |
3243 | - cfq_ioc_set_ioprio(ioc); | |
3244 | - | |
3245 | -#ifdef CONFIG_CFQ_GROUP_IOSCHED | |
3246 | - if (unlikely(ioc->cgroup_changed)) | |
3247 | - cfq_ioc_set_cgroup(ioc); | |
3248 | -#endif | |
3249 | - return cic; | |
3250 | -err_free: | |
3251 | - cfq_cic_free(cic); | |
3252 | -err: | |
3253 | - put_io_context(ioc); | |
3254 | - return NULL; | |
3255 | -} | |
3256 | - | |
3257 | -static void | |
3258 | 2951 | __cfq_update_io_thinktime(struct cfq_ttime *ttime, unsigned long slice_idle) |
3259 | 2952 | { |
3260 | 2953 | unsigned long elapsed = jiffies - ttime->last_end_request; |
... | ... | @@ -3267,7 +2960,7 @@ |
3267 | 2960 | |
3268 | 2961 | static void |
3269 | 2962 | cfq_update_io_thinktime(struct cfq_data *cfqd, struct cfq_queue *cfqq, |
3270 | - struct cfq_io_context *cic) | |
2963 | + struct cfq_io_cq *cic) | |
3271 | 2964 | { |
3272 | 2965 | if (cfq_cfqq_sync(cfqq)) { |
3273 | 2966 | __cfq_update_io_thinktime(&cic->ttime, cfqd->cfq_slice_idle); |
... | ... | @@ -3305,7 +2998,7 @@ |
3305 | 2998 | */ |
3306 | 2999 | static void |
3307 | 3000 | cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq, |
3308 | - struct cfq_io_context *cic) | |
3001 | + struct cfq_io_cq *cic) | |
3309 | 3002 | { |
3310 | 3003 | int old_idle, enable_idle; |
3311 | 3004 | |
... | ... | @@ -3322,8 +3015,9 @@ |
3322 | 3015 | |
3323 | 3016 | if (cfqq->next_rq && (cfqq->next_rq->cmd_flags & REQ_NOIDLE)) |
3324 | 3017 | enable_idle = 0; |
3325 | - else if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle || | |
3326 | - (!cfq_cfqq_deep(cfqq) && CFQQ_SEEKY(cfqq))) | |
3018 | + else if (!atomic_read(&cic->icq.ioc->nr_tasks) || | |
3019 | + !cfqd->cfq_slice_idle || | |
3020 | + (!cfq_cfqq_deep(cfqq) && CFQQ_SEEKY(cfqq))) | |
3327 | 3021 | enable_idle = 0; |
3328 | 3022 | else if (sample_valid(cic->ttime.ttime_samples)) { |
3329 | 3023 | if (cic->ttime.ttime_mean > cfqd->cfq_slice_idle) |
... | ... | @@ -3455,7 +3149,7 @@ |
3455 | 3149 | cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, |
3456 | 3150 | struct request *rq) |
3457 | 3151 | { |
3458 | - struct cfq_io_context *cic = RQ_CIC(rq); | |
3152 | + struct cfq_io_cq *cic = RQ_CIC(rq); | |
3459 | 3153 | |
3460 | 3154 | cfqd->rq_queued++; |
3461 | 3155 | if (rq->cmd_flags & REQ_PRIO) |
... | ... | @@ -3508,7 +3202,7 @@ |
3508 | 3202 | struct cfq_queue *cfqq = RQ_CFQQ(rq); |
3509 | 3203 | |
3510 | 3204 | cfq_log_cfqq(cfqd, cfqq, "insert_request"); |
3511 | - cfq_init_prio_data(cfqq, RQ_CIC(rq)->ioc); | |
3205 | + cfq_init_prio_data(cfqq, RQ_CIC(rq)->icq.ioc); | |
3512 | 3206 | |
3513 | 3207 | rq_set_fifo_time(rq, jiffies + cfqd->cfq_fifo_expire[rq_is_sync(rq)]); |
3514 | 3208 | list_add_tail(&rq->queuelist, &cfqq->fifo); |
... | ... | @@ -3558,7 +3252,7 @@ |
3558 | 3252 | |
3559 | 3253 | static bool cfq_should_wait_busy(struct cfq_data *cfqd, struct cfq_queue *cfqq) |
3560 | 3254 | { |
3561 | - struct cfq_io_context *cic = cfqd->active_cic; | |
3255 | + struct cfq_io_cq *cic = cfqd->active_cic; | |
3562 | 3256 | |
3563 | 3257 | /* If the queue already has requests, don't wait */ |
3564 | 3258 | if (!RB_EMPTY_ROOT(&cfqq->sort_list)) |
... | ... | @@ -3695,7 +3389,7 @@ |
3695 | 3389 | { |
3696 | 3390 | struct cfq_data *cfqd = q->elevator->elevator_data; |
3697 | 3391 | struct task_struct *tsk = current; |
3698 | - struct cfq_io_context *cic; | |
3392 | + struct cfq_io_cq *cic; | |
3699 | 3393 | struct cfq_queue *cfqq; |
3700 | 3394 | |
3701 | 3395 | /* |
... | ... | @@ -3710,7 +3404,7 @@ |
3710 | 3404 | |
3711 | 3405 | cfqq = cic_to_cfqq(cic, rw_is_sync(rw)); |
3712 | 3406 | if (cfqq) { |
3713 | - cfq_init_prio_data(cfqq, cic->ioc); | |
3407 | + cfq_init_prio_data(cfqq, cic->icq.ioc); | |
3714 | 3408 | |
3715 | 3409 | return __cfq_may_queue(cfqq); |
3716 | 3410 | } |
3717 | 3411 | |
3718 | 3412 | |
... | ... | @@ -3731,21 +3425,17 @@ |
3731 | 3425 | BUG_ON(!cfqq->allocated[rw]); |
3732 | 3426 | cfqq->allocated[rw]--; |
3733 | 3427 | |
3734 | - put_io_context(RQ_CIC(rq)->ioc); | |
3735 | - | |
3736 | - rq->elevator_private[0] = NULL; | |
3737 | - rq->elevator_private[1] = NULL; | |
3738 | - | |
3739 | 3428 | /* Put down rq reference on cfqg */ |
3740 | 3429 | cfq_put_cfqg(RQ_CFQG(rq)); |
3741 | - rq->elevator_private[2] = NULL; | |
3430 | + rq->elv.priv[0] = NULL; | |
3431 | + rq->elv.priv[1] = NULL; | |
3742 | 3432 | |
3743 | 3433 | cfq_put_queue(cfqq); |
3744 | 3434 | } |
3745 | 3435 | } |
3746 | 3436 | |
3747 | 3437 | static struct cfq_queue * |
3748 | -cfq_merge_cfqqs(struct cfq_data *cfqd, struct cfq_io_context *cic, | |
3438 | +cfq_merge_cfqqs(struct cfq_data *cfqd, struct cfq_io_cq *cic, | |
3749 | 3439 | struct cfq_queue *cfqq) |
3750 | 3440 | { |
3751 | 3441 | cfq_log_cfqq(cfqd, cfqq, "merging with queue %p", cfqq->new_cfqq); |
... | ... | @@ -3760,7 +3450,7 @@ |
3760 | 3450 | * was the last process referring to said cfqq. |
3761 | 3451 | */ |
3762 | 3452 | static struct cfq_queue * |
3763 | -split_cfqq(struct cfq_io_context *cic, struct cfq_queue *cfqq) | |
3453 | +split_cfqq(struct cfq_io_cq *cic, struct cfq_queue *cfqq) | |
3764 | 3454 | { |
3765 | 3455 | if (cfqq_process_refs(cfqq) == 1) { |
3766 | 3456 | cfqq->pid = current->pid; |
3767 | 3457 | |
3768 | 3458 | |
3769 | 3459 | |
3770 | 3460 | |
3771 | 3461 | |
... | ... | @@ -3783,25 +3473,29 @@ |
3783 | 3473 | cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask) |
3784 | 3474 | { |
3785 | 3475 | struct cfq_data *cfqd = q->elevator->elevator_data; |
3786 | - struct cfq_io_context *cic; | |
3476 | + struct cfq_io_cq *cic = icq_to_cic(rq->elv.icq); | |
3787 | 3477 | const int rw = rq_data_dir(rq); |
3788 | 3478 | const bool is_sync = rq_is_sync(rq); |
3789 | 3479 | struct cfq_queue *cfqq; |
3790 | - unsigned long flags; | |
3791 | 3480 | |
3792 | 3481 | might_sleep_if(gfp_mask & __GFP_WAIT); |
3793 | 3482 | |
3794 | - cic = cfq_get_io_context(cfqd, gfp_mask); | |
3483 | + spin_lock_irq(q->queue_lock); | |
3795 | 3484 | |
3796 | - spin_lock_irqsave(q->queue_lock, flags); | |
3485 | + /* handle changed notifications */ | |
3486 | + if (unlikely(cic->icq.changed)) { | |
3487 | + if (test_and_clear_bit(ICQ_IOPRIO_CHANGED, &cic->icq.changed)) | |
3488 | + changed_ioprio(cic); | |
3489 | +#ifdef CONFIG_CFQ_GROUP_IOSCHED | |
3490 | + if (test_and_clear_bit(ICQ_CGROUP_CHANGED, &cic->icq.changed)) | |
3491 | + changed_cgroup(cic); | |
3492 | +#endif | |
3493 | + } | |
3797 | 3494 | |
3798 | - if (!cic) | |
3799 | - goto queue_fail; | |
3800 | - | |
3801 | 3495 | new_queue: |
3802 | 3496 | cfqq = cic_to_cfqq(cic, is_sync); |
3803 | 3497 | if (!cfqq || cfqq == &cfqd->oom_cfqq) { |
3804 | - cfqq = cfq_get_queue(cfqd, is_sync, cic->ioc, gfp_mask); | |
3498 | + cfqq = cfq_get_queue(cfqd, is_sync, cic->icq.ioc, gfp_mask); | |
3805 | 3499 | cic_set_cfqq(cic, cfqq, is_sync); |
3806 | 3500 | } else { |
3807 | 3501 | /* |
3808 | 3502 | |
... | ... | @@ -3827,17 +3521,10 @@ |
3827 | 3521 | cfqq->allocated[rw]++; |
3828 | 3522 | |
3829 | 3523 | cfqq->ref++; |
3830 | - rq->elevator_private[0] = cic; | |
3831 | - rq->elevator_private[1] = cfqq; | |
3832 | - rq->elevator_private[2] = cfq_ref_get_cfqg(cfqq->cfqg); | |
3833 | - spin_unlock_irqrestore(q->queue_lock, flags); | |
3524 | + rq->elv.priv[0] = cfqq; | |
3525 | + rq->elv.priv[1] = cfq_ref_get_cfqg(cfqq->cfqg); | |
3526 | + spin_unlock_irq(q->queue_lock); | |
3834 | 3527 | return 0; |
3835 | - | |
3836 | -queue_fail: | |
3837 | - cfq_schedule_dispatch(cfqd); | |
3838 | - spin_unlock_irqrestore(q->queue_lock, flags); | |
3839 | - cfq_log(cfqd, "set_request fail"); | |
3840 | - return 1; | |
3841 | 3528 | } |
3842 | 3529 | |
3843 | 3530 | static void cfq_kick_queue(struct work_struct *work) |
... | ... | @@ -3941,14 +3628,6 @@ |
3941 | 3628 | if (cfqd->active_queue) |
3942 | 3629 | __cfq_slice_expired(cfqd, cfqd->active_queue, 0); |
3943 | 3630 | |
3944 | - while (!list_empty(&cfqd->cic_list)) { | |
3945 | - struct cfq_io_context *cic = list_entry(cfqd->cic_list.next, | |
3946 | - struct cfq_io_context, | |
3947 | - queue_list); | |
3948 | - | |
3949 | - __cfq_exit_single_io_context(cfqd, cic); | |
3950 | - } | |
3951 | - | |
3952 | 3631 | cfq_put_async_queues(cfqd); |
3953 | 3632 | cfq_release_cfq_groups(cfqd); |
3954 | 3633 | |
... | ... | @@ -3963,10 +3642,6 @@ |
3963 | 3642 | |
3964 | 3643 | cfq_shutdown_timer_wq(cfqd); |
3965 | 3644 | |
3966 | - spin_lock(&cic_index_lock); | |
3967 | - ida_remove(&cic_index_ida, cfqd->cic_index); | |
3968 | - spin_unlock(&cic_index_lock); | |
3969 | - | |
3970 | 3645 | /* |
3971 | 3646 | * Wait for cfqg->blkg->key accessors to exit their grace periods. |
3972 | 3647 | * Do this wait only if there are other unlinked groups out |
... | ... | @@ -3988,24 +3663,6 @@ |
3988 | 3663 | kfree(cfqd); |
3989 | 3664 | } |
3990 | 3665 | |
3991 | -static int cfq_alloc_cic_index(void) | |
3992 | -{ | |
3993 | - int index, error; | |
3994 | - | |
3995 | - do { | |
3996 | - if (!ida_pre_get(&cic_index_ida, GFP_KERNEL)) | |
3997 | - return -ENOMEM; | |
3998 | - | |
3999 | - spin_lock(&cic_index_lock); | |
4000 | - error = ida_get_new(&cic_index_ida, &index); | |
4001 | - spin_unlock(&cic_index_lock); | |
4002 | - if (error && error != -EAGAIN) | |
4003 | - return error; | |
4004 | - } while (error); | |
4005 | - | |
4006 | - return index; | |
4007 | -} | |
4008 | - | |
4009 | 3666 | static void *cfq_init_queue(struct request_queue *q) |
4010 | 3667 | { |
4011 | 3668 | struct cfq_data *cfqd; |
4012 | 3669 | |
4013 | 3670 | |
4014 | 3671 | |
... | ... | @@ -4013,24 +3670,10 @@ |
4013 | 3670 | struct cfq_group *cfqg; |
4014 | 3671 | struct cfq_rb_root *st; |
4015 | 3672 | |
4016 | - i = cfq_alloc_cic_index(); | |
4017 | - if (i < 0) | |
4018 | - return NULL; | |
4019 | - | |
4020 | 3673 | cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node); |
4021 | - if (!cfqd) { | |
4022 | - spin_lock(&cic_index_lock); | |
4023 | - ida_remove(&cic_index_ida, i); | |
4024 | - spin_unlock(&cic_index_lock); | |
3674 | + if (!cfqd) | |
4025 | 3675 | return NULL; |
4026 | - } | |
4027 | 3676 | |
4028 | - /* | |
4029 | - * Don't need take queue_lock in the routine, since we are | |
4030 | - * initializing the ioscheduler, and nobody is using cfqd | |
4031 | - */ | |
4032 | - cfqd->cic_index = i; | |
4033 | - | |
4034 | 3677 | /* Init root service tree */ |
4035 | 3678 | cfqd->grp_service_tree = CFQ_RB_ROOT; |
4036 | 3679 | |
... | ... | @@ -4055,11 +3698,6 @@ |
4055 | 3698 | |
4056 | 3699 | if (blkio_alloc_blkg_stats(&cfqg->blkg)) { |
4057 | 3700 | kfree(cfqg); |
4058 | - | |
4059 | - spin_lock(&cic_index_lock); | |
4060 | - ida_remove(&cic_index_ida, cfqd->cic_index); | |
4061 | - spin_unlock(&cic_index_lock); | |
4062 | - | |
4063 | 3701 | kfree(cfqd); |
4064 | 3702 | return NULL; |
4065 | 3703 | } |
... | ... | @@ -4091,8 +3729,6 @@ |
4091 | 3729 | cfqd->oom_cfqq.ref++; |
4092 | 3730 | cfq_link_cfqq_cfqg(&cfqd->oom_cfqq, &cfqd->root_group); |
4093 | 3731 | |
4094 | - INIT_LIST_HEAD(&cfqd->cic_list); | |
4095 | - | |
4096 | 3732 | cfqd->queue = q; |
4097 | 3733 | |
4098 | 3734 | init_timer(&cfqd->idle_slice_timer); |
... | ... | @@ -4121,34 +3757,6 @@ |
4121 | 3757 | return cfqd; |
4122 | 3758 | } |
4123 | 3759 | |
4124 | -static void cfq_slab_kill(void) | |
4125 | -{ | |
4126 | - /* | |
4127 | - * Caller already ensured that pending RCU callbacks are completed, | |
4128 | - * so we should have no busy allocations at this point. | |
4129 | - */ | |
4130 | - if (cfq_pool) | |
4131 | - kmem_cache_destroy(cfq_pool); | |
4132 | - if (cfq_ioc_pool) | |
4133 | - kmem_cache_destroy(cfq_ioc_pool); | |
4134 | -} | |
4135 | - | |
4136 | -static int __init cfq_slab_setup(void) | |
4137 | -{ | |
4138 | - cfq_pool = KMEM_CACHE(cfq_queue, 0); | |
4139 | - if (!cfq_pool) | |
4140 | - goto fail; | |
4141 | - | |
4142 | - cfq_ioc_pool = KMEM_CACHE(cfq_io_context, 0); | |
4143 | - if (!cfq_ioc_pool) | |
4144 | - goto fail; | |
4145 | - | |
4146 | - return 0; | |
4147 | -fail: | |
4148 | - cfq_slab_kill(); | |
4149 | - return -ENOMEM; | |
4150 | -} | |
4151 | - | |
4152 | 3760 | /* |
4153 | 3761 | * sysfs parts below --> |
4154 | 3762 | */ |
4155 | 3763 | |
4156 | 3764 | |
4157 | 3765 | |
... | ... | @@ -4254,15 +3862,18 @@ |
4254 | 3862 | .elevator_completed_req_fn = cfq_completed_request, |
4255 | 3863 | .elevator_former_req_fn = elv_rb_former_request, |
4256 | 3864 | .elevator_latter_req_fn = elv_rb_latter_request, |
3865 | + .elevator_init_icq_fn = cfq_init_icq, | |
3866 | + .elevator_exit_icq_fn = cfq_exit_icq, | |
4257 | 3867 | .elevator_set_req_fn = cfq_set_request, |
4258 | 3868 | .elevator_put_req_fn = cfq_put_request, |
4259 | 3869 | .elevator_may_queue_fn = cfq_may_queue, |
4260 | 3870 | .elevator_init_fn = cfq_init_queue, |
4261 | 3871 | .elevator_exit_fn = cfq_exit_queue, |
4262 | - .trim = cfq_free_io_context, | |
4263 | 3872 | }, |
3873 | + .icq_size = sizeof(struct cfq_io_cq), | |
3874 | + .icq_align = __alignof__(struct cfq_io_cq), | |
4264 | 3875 | .elevator_attrs = cfq_attrs, |
4265 | - .elevator_name = "cfq", | |
3876 | + .elevator_name = "cfq", | |
4266 | 3877 | .elevator_owner = THIS_MODULE, |
4267 | 3878 | }; |
4268 | 3879 | |
... | ... | @@ -4280,6 +3891,8 @@ |
4280 | 3891 | |
4281 | 3892 | static int __init cfq_init(void) |
4282 | 3893 | { |
3894 | + int ret; | |
3895 | + | |
4283 | 3896 | /* |
4284 | 3897 | * could be 0 on HZ < 1000 setups |
4285 | 3898 | */ |
4286 | 3899 | |
... | ... | @@ -4294,10 +3907,16 @@ |
4294 | 3907 | #else |
4295 | 3908 | cfq_group_idle = 0; |
4296 | 3909 | #endif |
4297 | - if (cfq_slab_setup()) | |
3910 | + cfq_pool = KMEM_CACHE(cfq_queue, 0); | |
3911 | + if (!cfq_pool) | |
4298 | 3912 | return -ENOMEM; |
4299 | 3913 | |
4300 | - elv_register(&iosched_cfq); | |
3914 | + ret = elv_register(&iosched_cfq); | |
3915 | + if (ret) { | |
3916 | + kmem_cache_destroy(cfq_pool); | |
3917 | + return ret; | |
3918 | + } | |
3919 | + | |
4301 | 3920 | blkio_policy_register(&blkio_policy_cfq); |
4302 | 3921 | |
4303 | 3922 | return 0; |
4304 | 3923 | |
... | ... | @@ -4305,21 +3924,9 @@ |
4305 | 3924 | |
4306 | 3925 | static void __exit cfq_exit(void) |
4307 | 3926 | { |
4308 | - DECLARE_COMPLETION_ONSTACK(all_gone); | |
4309 | 3927 | blkio_policy_unregister(&blkio_policy_cfq); |
4310 | 3928 | elv_unregister(&iosched_cfq); |
4311 | - ioc_gone = &all_gone; | |
4312 | - /* ioc_gone's update must be visible before reading ioc_count */ | |
4313 | - smp_wmb(); | |
4314 | - | |
4315 | - /* | |
4316 | - * this also protects us from entering cfq_slab_kill() with | |
4317 | - * pending RCU callbacks | |
4318 | - */ | |
4319 | - if (elv_ioc_count_read(cfq_ioc_count)) | |
4320 | - wait_for_completion(&all_gone); | |
4321 | - ida_destroy(&cic_index_ida); | |
4322 | - cfq_slab_kill(); | |
3929 | + kmem_cache_destroy(cfq_pool); | |
4323 | 3930 | } |
4324 | 3931 | |
4325 | 3932 | module_init(cfq_init); |
block/compat_ioctl.c
... | ... | @@ -719,6 +719,9 @@ |
719 | 719 | case BLKSECTGET: |
720 | 720 | return compat_put_ushort(arg, |
721 | 721 | queue_max_sectors(bdev_get_queue(bdev))); |
722 | + case BLKROTATIONAL: | |
723 | + return compat_put_ushort(arg, | |
724 | + !blk_queue_nonrot(bdev_get_queue(bdev))); | |
722 | 725 | case BLKRASET: /* compatible, but no compat_ptr (!) */ |
723 | 726 | case BLKFRASET: |
724 | 727 | if (!capable(CAP_SYS_ADMIN)) |
block/deadline-iosched.c
block/elevator.c
... | ... | @@ -61,8 +61,8 @@ |
61 | 61 | struct request_queue *q = rq->q; |
62 | 62 | struct elevator_queue *e = q->elevator; |
63 | 63 | |
64 | - if (e->ops->elevator_allow_merge_fn) | |
65 | - return e->ops->elevator_allow_merge_fn(q, rq, bio); | |
64 | + if (e->type->ops.elevator_allow_merge_fn) | |
65 | + return e->type->ops.elevator_allow_merge_fn(q, rq, bio); | |
66 | 66 | |
67 | 67 | return 1; |
68 | 68 | } |
69 | 69 | |
70 | 70 | |
... | ... | @@ -168,19 +168,15 @@ |
168 | 168 | return e; |
169 | 169 | } |
170 | 170 | |
171 | -static void *elevator_init_queue(struct request_queue *q, | |
172 | - struct elevator_queue *eq) | |
171 | +static int elevator_init_queue(struct request_queue *q, | |
172 | + struct elevator_queue *eq) | |
173 | 173 | { |
174 | - return eq->ops->elevator_init_fn(q); | |
174 | + eq->elevator_data = eq->type->ops.elevator_init_fn(q); | |
175 | + if (eq->elevator_data) | |
176 | + return 0; | |
177 | + return -ENOMEM; | |
175 | 178 | } |
176 | 179 | |
177 | -static void elevator_attach(struct request_queue *q, struct elevator_queue *eq, | |
178 | - void *data) | |
179 | -{ | |
180 | - q->elevator = eq; | |
181 | - eq->elevator_data = data; | |
182 | -} | |
183 | - | |
184 | 180 | static char chosen_elevator[ELV_NAME_MAX]; |
185 | 181 | |
186 | 182 | static int __init elevator_setup(char *str) |
... | ... | @@ -207,8 +203,7 @@ |
207 | 203 | if (unlikely(!eq)) |
208 | 204 | goto err; |
209 | 205 | |
210 | - eq->ops = &e->ops; | |
211 | - eq->elevator_type = e; | |
206 | + eq->type = e; | |
212 | 207 | kobject_init(&eq->kobj, &elv_ktype); |
213 | 208 | mutex_init(&eq->sysfs_lock); |
214 | 209 | |
... | ... | @@ -232,7 +227,7 @@ |
232 | 227 | struct elevator_queue *e; |
233 | 228 | |
234 | 229 | e = container_of(kobj, struct elevator_queue, kobj); |
235 | - elevator_put(e->elevator_type); | |
230 | + elevator_put(e->type); | |
236 | 231 | kfree(e->hash); |
237 | 232 | kfree(e); |
238 | 233 | } |
... | ... | @@ -241,7 +236,7 @@ |
241 | 236 | { |
242 | 237 | struct elevator_type *e = NULL; |
243 | 238 | struct elevator_queue *eq; |
244 | - void *data; | |
239 | + int err; | |
245 | 240 | |
246 | 241 | if (unlikely(q->elevator)) |
247 | 242 | return 0; |
248 | 243 | |
249 | 244 | |
... | ... | @@ -278,13 +273,13 @@ |
278 | 273 | if (!eq) |
279 | 274 | return -ENOMEM; |
280 | 275 | |
281 | - data = elevator_init_queue(q, eq); | |
282 | - if (!data) { | |
276 | + err = elevator_init_queue(q, eq); | |
277 | + if (err) { | |
283 | 278 | kobject_put(&eq->kobj); |
284 | - return -ENOMEM; | |
279 | + return err; | |
285 | 280 | } |
286 | 281 | |
287 | - elevator_attach(q, eq, data); | |
282 | + q->elevator = eq; | |
288 | 283 | return 0; |
289 | 284 | } |
290 | 285 | EXPORT_SYMBOL(elevator_init); |
... | ... | @@ -292,9 +287,8 @@ |
292 | 287 | void elevator_exit(struct elevator_queue *e) |
293 | 288 | { |
294 | 289 | mutex_lock(&e->sysfs_lock); |
295 | - if (e->ops->elevator_exit_fn) | |
296 | - e->ops->elevator_exit_fn(e); | |
297 | - e->ops = NULL; | |
290 | + if (e->type->ops.elevator_exit_fn) | |
291 | + e->type->ops.elevator_exit_fn(e); | |
298 | 292 | mutex_unlock(&e->sysfs_lock); |
299 | 293 | |
300 | 294 | kobject_put(&e->kobj); |
... | ... | @@ -504,8 +498,8 @@ |
504 | 498 | return ELEVATOR_BACK_MERGE; |
505 | 499 | } |
506 | 500 | |
507 | - if (e->ops->elevator_merge_fn) | |
508 | - return e->ops->elevator_merge_fn(q, req, bio); | |
501 | + if (e->type->ops.elevator_merge_fn) | |
502 | + return e->type->ops.elevator_merge_fn(q, req, bio); | |
509 | 503 | |
510 | 504 | return ELEVATOR_NO_MERGE; |
511 | 505 | } |
... | ... | @@ -548,8 +542,8 @@ |
548 | 542 | { |
549 | 543 | struct elevator_queue *e = q->elevator; |
550 | 544 | |
551 | - if (e->ops->elevator_merged_fn) | |
552 | - e->ops->elevator_merged_fn(q, rq, type); | |
545 | + if (e->type->ops.elevator_merged_fn) | |
546 | + e->type->ops.elevator_merged_fn(q, rq, type); | |
553 | 547 | |
554 | 548 | if (type == ELEVATOR_BACK_MERGE) |
555 | 549 | elv_rqhash_reposition(q, rq); |
... | ... | @@ -563,8 +557,8 @@ |
563 | 557 | struct elevator_queue *e = q->elevator; |
564 | 558 | const int next_sorted = next->cmd_flags & REQ_SORTED; |
565 | 559 | |
566 | - if (next_sorted && e->ops->elevator_merge_req_fn) | |
567 | - e->ops->elevator_merge_req_fn(q, rq, next); | |
560 | + if (next_sorted && e->type->ops.elevator_merge_req_fn) | |
561 | + e->type->ops.elevator_merge_req_fn(q, rq, next); | |
568 | 562 | |
569 | 563 | elv_rqhash_reposition(q, rq); |
570 | 564 | |
... | ... | @@ -581,8 +575,8 @@ |
581 | 575 | { |
582 | 576 | struct elevator_queue *e = q->elevator; |
583 | 577 | |
584 | - if (e->ops->elevator_bio_merged_fn) | |
585 | - e->ops->elevator_bio_merged_fn(q, rq, bio); | |
578 | + if (e->type->ops.elevator_bio_merged_fn) | |
579 | + e->type->ops.elevator_bio_merged_fn(q, rq, bio); | |
586 | 580 | } |
587 | 581 | |
588 | 582 | void elv_requeue_request(struct request_queue *q, struct request *rq) |
589 | 583 | |
... | ... | @@ -608,12 +602,12 @@ |
608 | 602 | |
609 | 603 | lockdep_assert_held(q->queue_lock); |
610 | 604 | |
611 | - while (q->elevator->ops->elevator_dispatch_fn(q, 1)) | |
605 | + while (q->elevator->type->ops.elevator_dispatch_fn(q, 1)) | |
612 | 606 | ; |
613 | 607 | if (q->nr_sorted && printed++ < 10) { |
614 | 608 | printk(KERN_ERR "%s: forced dispatching is broken " |
615 | 609 | "(nr_sorted=%u), please report this\n", |
616 | - q->elevator->elevator_type->elevator_name, q->nr_sorted); | |
610 | + q->elevator->type->elevator_name, q->nr_sorted); | |
617 | 611 | } |
618 | 612 | } |
619 | 613 | |
... | ... | @@ -702,7 +696,7 @@ |
702 | 696 | * rq cannot be accessed after calling |
703 | 697 | * elevator_add_req_fn. |
704 | 698 | */ |
705 | - q->elevator->ops->elevator_add_req_fn(q, rq); | |
699 | + q->elevator->type->ops.elevator_add_req_fn(q, rq); | |
706 | 700 | break; |
707 | 701 | |
708 | 702 | case ELEVATOR_INSERT_FLUSH: |
... | ... | @@ -731,8 +725,8 @@ |
731 | 725 | { |
732 | 726 | struct elevator_queue *e = q->elevator; |
733 | 727 | |
734 | - if (e->ops->elevator_latter_req_fn) | |
735 | - return e->ops->elevator_latter_req_fn(q, rq); | |
728 | + if (e->type->ops.elevator_latter_req_fn) | |
729 | + return e->type->ops.elevator_latter_req_fn(q, rq); | |
736 | 730 | return NULL; |
737 | 731 | } |
738 | 732 | |
... | ... | @@ -740,8 +734,8 @@ |
740 | 734 | { |
741 | 735 | struct elevator_queue *e = q->elevator; |
742 | 736 | |
743 | - if (e->ops->elevator_former_req_fn) | |
744 | - return e->ops->elevator_former_req_fn(q, rq); | |
737 | + if (e->type->ops.elevator_former_req_fn) | |
738 | + return e->type->ops.elevator_former_req_fn(q, rq); | |
745 | 739 | return NULL; |
746 | 740 | } |
747 | 741 | |
... | ... | @@ -749,10 +743,8 @@ |
749 | 743 | { |
750 | 744 | struct elevator_queue *e = q->elevator; |
751 | 745 | |
752 | - if (e->ops->elevator_set_req_fn) | |
753 | - return e->ops->elevator_set_req_fn(q, rq, gfp_mask); | |
754 | - | |
755 | - rq->elevator_private[0] = NULL; | |
746 | + if (e->type->ops.elevator_set_req_fn) | |
747 | + return e->type->ops.elevator_set_req_fn(q, rq, gfp_mask); | |
756 | 748 | return 0; |
757 | 749 | } |
758 | 750 | |
759 | 751 | |
... | ... | @@ -760,16 +752,16 @@ |
760 | 752 | { |
761 | 753 | struct elevator_queue *e = q->elevator; |
762 | 754 | |
763 | - if (e->ops->elevator_put_req_fn) | |
764 | - e->ops->elevator_put_req_fn(rq); | |
755 | + if (e->type->ops.elevator_put_req_fn) | |
756 | + e->type->ops.elevator_put_req_fn(rq); | |
765 | 757 | } |
766 | 758 | |
767 | 759 | int elv_may_queue(struct request_queue *q, int rw) |
768 | 760 | { |
769 | 761 | struct elevator_queue *e = q->elevator; |
770 | 762 | |
771 | - if (e->ops->elevator_may_queue_fn) | |
772 | - return e->ops->elevator_may_queue_fn(q, rw); | |
763 | + if (e->type->ops.elevator_may_queue_fn) | |
764 | + return e->type->ops.elevator_may_queue_fn(q, rw); | |
773 | 765 | |
774 | 766 | return ELV_MQUEUE_MAY; |
775 | 767 | } |
... | ... | @@ -804,8 +796,8 @@ |
804 | 796 | if (blk_account_rq(rq)) { |
805 | 797 | q->in_flight[rq_is_sync(rq)]--; |
806 | 798 | if ((rq->cmd_flags & REQ_SORTED) && |
807 | - e->ops->elevator_completed_req_fn) | |
808 | - e->ops->elevator_completed_req_fn(q, rq); | |
799 | + e->type->ops.elevator_completed_req_fn) | |
800 | + e->type->ops.elevator_completed_req_fn(q, rq); | |
809 | 801 | } |
810 | 802 | } |
811 | 803 | |
... | ... | @@ -823,7 +815,7 @@ |
823 | 815 | |
824 | 816 | e = container_of(kobj, struct elevator_queue, kobj); |
825 | 817 | mutex_lock(&e->sysfs_lock); |
826 | - error = e->ops ? entry->show(e, page) : -ENOENT; | |
818 | + error = e->type ? entry->show(e, page) : -ENOENT; | |
827 | 819 | mutex_unlock(&e->sysfs_lock); |
828 | 820 | return error; |
829 | 821 | } |
... | ... | @@ -841,7 +833,7 @@ |
841 | 833 | |
842 | 834 | e = container_of(kobj, struct elevator_queue, kobj); |
843 | 835 | mutex_lock(&e->sysfs_lock); |
844 | - error = e->ops ? entry->store(e, page, length) : -ENOENT; | |
836 | + error = e->type ? entry->store(e, page, length) : -ENOENT; | |
845 | 837 | mutex_unlock(&e->sysfs_lock); |
846 | 838 | return error; |
847 | 839 | } |
848 | 840 | |
849 | 841 | |
... | ... | @@ -856,14 +848,13 @@ |
856 | 848 | .release = elevator_release, |
857 | 849 | }; |
858 | 850 | |
859 | -int elv_register_queue(struct request_queue *q) | |
851 | +int __elv_register_queue(struct request_queue *q, struct elevator_queue *e) | |
860 | 852 | { |
861 | - struct elevator_queue *e = q->elevator; | |
862 | 853 | int error; |
863 | 854 | |
864 | 855 | error = kobject_add(&e->kobj, &q->kobj, "%s", "iosched"); |
865 | 856 | if (!error) { |
866 | - struct elv_fs_entry *attr = e->elevator_type->elevator_attrs; | |
857 | + struct elv_fs_entry *attr = e->type->elevator_attrs; | |
867 | 858 | if (attr) { |
868 | 859 | while (attr->attr.name) { |
869 | 860 | if (sysfs_create_file(&e->kobj, &attr->attr)) |
870 | 861 | |
871 | 862 | |
872 | 863 | |
873 | 864 | |
874 | 865 | |
875 | 866 | |
876 | 867 | |
877 | 868 | |
... | ... | @@ -876,31 +867,55 @@ |
876 | 867 | } |
877 | 868 | return error; |
878 | 869 | } |
879 | -EXPORT_SYMBOL(elv_register_queue); | |
880 | 870 | |
881 | -static void __elv_unregister_queue(struct elevator_queue *e) | |
871 | +int elv_register_queue(struct request_queue *q) | |
882 | 872 | { |
883 | - kobject_uevent(&e->kobj, KOBJ_REMOVE); | |
884 | - kobject_del(&e->kobj); | |
885 | - e->registered = 0; | |
873 | + return __elv_register_queue(q, q->elevator); | |
886 | 874 | } |
875 | +EXPORT_SYMBOL(elv_register_queue); | |
887 | 876 | |
888 | 877 | void elv_unregister_queue(struct request_queue *q) |
889 | 878 | { |
890 | - if (q) | |
891 | - __elv_unregister_queue(q->elevator); | |
879 | + if (q) { | |
880 | + struct elevator_queue *e = q->elevator; | |
881 | + | |
882 | + kobject_uevent(&e->kobj, KOBJ_REMOVE); | |
883 | + kobject_del(&e->kobj); | |
884 | + e->registered = 0; | |
885 | + } | |
892 | 886 | } |
893 | 887 | EXPORT_SYMBOL(elv_unregister_queue); |
894 | 888 | |
895 | -void elv_register(struct elevator_type *e) | |
889 | +int elv_register(struct elevator_type *e) | |
896 | 890 | { |
897 | 891 | char *def = ""; |
898 | 892 | |
893 | + /* create icq_cache if requested */ | |
894 | + if (e->icq_size) { | |
895 | + if (WARN_ON(e->icq_size < sizeof(struct io_cq)) || | |
896 | + WARN_ON(e->icq_align < __alignof__(struct io_cq))) | |
897 | + return -EINVAL; | |
898 | + | |
899 | + snprintf(e->icq_cache_name, sizeof(e->icq_cache_name), | |
900 | + "%s_io_cq", e->elevator_name); | |
901 | + e->icq_cache = kmem_cache_create(e->icq_cache_name, e->icq_size, | |
902 | + e->icq_align, 0, NULL); | |
903 | + if (!e->icq_cache) | |
904 | + return -ENOMEM; | |
905 | + } | |
906 | + | |
907 | + /* register, don't allow duplicate names */ | |
899 | 908 | spin_lock(&elv_list_lock); |
900 | - BUG_ON(elevator_find(e->elevator_name)); | |
909 | + if (elevator_find(e->elevator_name)) { | |
910 | + spin_unlock(&elv_list_lock); | |
911 | + if (e->icq_cache) | |
912 | + kmem_cache_destroy(e->icq_cache); | |
913 | + return -EBUSY; | |
914 | + } | |
901 | 915 | list_add_tail(&e->list, &elv_list); |
902 | 916 | spin_unlock(&elv_list_lock); |
903 | 917 | |
918 | + /* print pretty message */ | |
904 | 919 | if (!strcmp(e->elevator_name, chosen_elevator) || |
905 | 920 | (!*chosen_elevator && |
906 | 921 | !strcmp(e->elevator_name, CONFIG_DEFAULT_IOSCHED))) |
907 | 922 | |
908 | 923 | |
909 | 924 | |
910 | 925 | |
... | ... | @@ -908,30 +923,26 @@ |
908 | 923 | |
909 | 924 | printk(KERN_INFO "io scheduler %s registered%s\n", e->elevator_name, |
910 | 925 | def); |
926 | + return 0; | |
911 | 927 | } |
912 | 928 | EXPORT_SYMBOL_GPL(elv_register); |
913 | 929 | |
914 | 930 | void elv_unregister(struct elevator_type *e) |
915 | 931 | { |
916 | - struct task_struct *g, *p; | |
932 | + /* unregister */ | |
933 | + spin_lock(&elv_list_lock); | |
934 | + list_del_init(&e->list); | |
935 | + spin_unlock(&elv_list_lock); | |
917 | 936 | |
918 | 937 | /* |
919 | - * Iterate every thread in the process to remove the io contexts. | |
938 | + * Destroy icq_cache if it exists. icq's are RCU managed. Make | |
939 | + * sure all RCU operations are complete before proceeding. | |
920 | 940 | */ |
921 | - if (e->ops.trim) { | |
922 | - read_lock(&tasklist_lock); | |
923 | - do_each_thread(g, p) { | |
924 | - task_lock(p); | |
925 | - if (p->io_context) | |
926 | - e->ops.trim(p->io_context); | |
927 | - task_unlock(p); | |
928 | - } while_each_thread(g, p); | |
929 | - read_unlock(&tasklist_lock); | |
941 | + if (e->icq_cache) { | |
942 | + rcu_barrier(); | |
943 | + kmem_cache_destroy(e->icq_cache); | |
944 | + e->icq_cache = NULL; | |
930 | 945 | } |
931 | - | |
932 | - spin_lock(&elv_list_lock); | |
933 | - list_del_init(&e->list); | |
934 | - spin_unlock(&elv_list_lock); | |
935 | 946 | } |
936 | 947 | EXPORT_SYMBOL_GPL(elv_unregister); |
937 | 948 | |
938 | 949 | |
939 | 950 | |
940 | 951 | |
941 | 952 | |
942 | 953 | |
943 | 954 | |
944 | 955 | |
... | ... | @@ -944,54 +955,41 @@ |
944 | 955 | static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) |
945 | 956 | { |
946 | 957 | struct elevator_queue *old_elevator, *e; |
947 | - void *data; | |
948 | 958 | int err; |
949 | 959 | |
950 | - /* | |
951 | - * Allocate new elevator | |
952 | - */ | |
960 | + /* allocate new elevator */ | |
953 | 961 | e = elevator_alloc(q, new_e); |
954 | 962 | if (!e) |
955 | 963 | return -ENOMEM; |
956 | 964 | |
957 | - data = elevator_init_queue(q, e); | |
958 | - if (!data) { | |
965 | + err = elevator_init_queue(q, e); | |
966 | + if (err) { | |
959 | 967 | kobject_put(&e->kobj); |
960 | - return -ENOMEM; | |
968 | + return err; | |
961 | 969 | } |
962 | 970 | |
963 | - /* | |
964 | - * Turn on BYPASS and drain all requests w/ elevator private data | |
965 | - */ | |
971 | + /* turn on BYPASS and drain all requests w/ elevator private data */ | |
966 | 972 | elv_quiesce_start(q); |
967 | 973 | |
968 | - /* | |
969 | - * Remember old elevator. | |
970 | - */ | |
971 | - old_elevator = q->elevator; | |
972 | - | |
973 | - /* | |
974 | - * attach and start new elevator | |
975 | - */ | |
976 | - spin_lock_irq(q->queue_lock); | |
977 | - elevator_attach(q, e, data); | |
978 | - spin_unlock_irq(q->queue_lock); | |
979 | - | |
980 | - if (old_elevator->registered) { | |
981 | - __elv_unregister_queue(old_elevator); | |
982 | - | |
983 | - err = elv_register_queue(q); | |
974 | + /* unregister old queue, register new one and kill old elevator */ | |
975 | + if (q->elevator->registered) { | |
976 | + elv_unregister_queue(q); | |
977 | + err = __elv_register_queue(q, e); | |
984 | 978 | if (err) |
985 | 979 | goto fail_register; |
986 | 980 | } |
987 | 981 | |
988 | - /* | |
989 | - * finally exit old elevator and turn off BYPASS. | |
990 | - */ | |
982 | + /* done, clear io_cq's, switch elevators and turn off BYPASS */ | |
983 | + spin_lock_irq(q->queue_lock); | |
984 | + ioc_clear_queue(q); | |
985 | + old_elevator = q->elevator; | |
986 | + q->elevator = e; | |
987 | + spin_unlock_irq(q->queue_lock); | |
988 | + | |
991 | 989 | elevator_exit(old_elevator); |
992 | 990 | elv_quiesce_end(q); |
993 | 991 | |
994 | - blk_add_trace_msg(q, "elv switch: %s", e->elevator_type->elevator_name); | |
992 | + blk_add_trace_msg(q, "elv switch: %s", e->type->elevator_name); | |
995 | 993 | |
996 | 994 | return 0; |
997 | 995 | |
... | ... | @@ -1001,7 +999,6 @@ |
1001 | 999 | * one again (along with re-adding the sysfs dir) |
1002 | 1000 | */ |
1003 | 1001 | elevator_exit(e); |
1004 | - q->elevator = old_elevator; | |
1005 | 1002 | elv_register_queue(q); |
1006 | 1003 | elv_quiesce_end(q); |
1007 | 1004 | |
... | ... | @@ -1026,7 +1023,7 @@ |
1026 | 1023 | return -EINVAL; |
1027 | 1024 | } |
1028 | 1025 | |
1029 | - if (!strcmp(elevator_name, q->elevator->elevator_type->elevator_name)) { | |
1026 | + if (!strcmp(elevator_name, q->elevator->type->elevator_name)) { | |
1030 | 1027 | elevator_put(e); |
1031 | 1028 | return 0; |
1032 | 1029 | } |
... | ... | @@ -1061,7 +1058,7 @@ |
1061 | 1058 | if (!q->elevator || !blk_queue_stackable(q)) |
1062 | 1059 | return sprintf(name, "none\n"); |
1063 | 1060 | |
1064 | - elv = e->elevator_type; | |
1061 | + elv = e->type; | |
1065 | 1062 | |
1066 | 1063 | spin_lock(&elv_list_lock); |
1067 | 1064 | list_for_each_entry(__e, &elv_list, list) { |
block/genhd.c
... | ... | @@ -614,7 +614,7 @@ |
614 | 614 | * Take an extra ref on queue which will be put on disk_release() |
615 | 615 | * so that it sticks around as long as @disk is there. |
616 | 616 | */ |
617 | - WARN_ON_ONCE(blk_get_queue(disk->queue)); | |
617 | + WARN_ON_ONCE(!blk_get_queue(disk->queue)); | |
618 | 618 | |
619 | 619 | retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj, |
620 | 620 | "bdi"); |
block/ioctl.c
... | ... | @@ -296,6 +296,8 @@ |
296 | 296 | return put_uint(arg, bdev_discard_zeroes_data(bdev)); |
297 | 297 | case BLKSECTGET: |
298 | 298 | return put_ushort(arg, queue_max_sectors(bdev_get_queue(bdev))); |
299 | + case BLKROTATIONAL: | |
300 | + return put_ushort(arg, !blk_queue_nonrot(bdev_get_queue(bdev))); | |
299 | 301 | case BLKRASET: |
300 | 302 | case BLKFRASET: |
301 | 303 | if(!capable(CAP_SYS_ADMIN)) |
block/noop-iosched.c
drivers/block/sx8.c
... | ... | @@ -619,8 +619,10 @@ |
619 | 619 | host->state == HST_DEV_SCAN); |
620 | 620 | spin_unlock_irq(&host->lock); |
621 | 621 | |
622 | - DPRINTK("blk_insert_request, tag == %u\n", idx); | |
623 | - blk_insert_request(host->oob_q, crq->rq, 1, crq); | |
622 | + DPRINTK("blk_execute_rq_nowait, tag == %u\n", idx); | |
623 | + crq->rq->cmd_type = REQ_TYPE_SPECIAL; | |
624 | + crq->rq->special = crq; | |
625 | + blk_execute_rq_nowait(host->oob_q, NULL, crq->rq, true, NULL); | |
624 | 626 | |
625 | 627 | return 0; |
626 | 628 | |
... | ... | @@ -658,8 +660,10 @@ |
658 | 660 | BUG_ON(rc < 0); |
659 | 661 | crq->msg_bucket = (u32) rc; |
660 | 662 | |
661 | - DPRINTK("blk_insert_request, tag == %u\n", idx); | |
662 | - blk_insert_request(host->oob_q, crq->rq, 1, crq); | |
663 | + DPRINTK("blk_execute_rq_nowait, tag == %u\n", idx); | |
664 | + crq->rq->cmd_type = REQ_TYPE_SPECIAL; | |
665 | + crq->rq->special = crq; | |
666 | + blk_execute_rq_nowait(host->oob_q, NULL, crq->rq, true, NULL); | |
663 | 667 | |
664 | 668 | return 0; |
665 | 669 | } |
drivers/md/dm-table.c
... | ... | @@ -699,7 +699,7 @@ |
699 | 699 | while (i < dm_table_get_num_targets(table)) { |
700 | 700 | ti = dm_table_get_target(table, i++); |
701 | 701 | |
702 | - blk_set_default_limits(&ti_limits); | |
702 | + blk_set_stacking_limits(&ti_limits); | |
703 | 703 | |
704 | 704 | /* combine all target devices' limits */ |
705 | 705 | if (ti->type->iterate_devices) |
706 | 706 | |
... | ... | @@ -1221,10 +1221,10 @@ |
1221 | 1221 | struct queue_limits ti_limits; |
1222 | 1222 | unsigned i = 0; |
1223 | 1223 | |
1224 | - blk_set_default_limits(limits); | |
1224 | + blk_set_stacking_limits(limits); | |
1225 | 1225 | |
1226 | 1226 | while (i < dm_table_get_num_targets(table)) { |
1227 | - blk_set_default_limits(&ti_limits); | |
1227 | + blk_set_stacking_limits(&ti_limits); | |
1228 | 1228 | |
1229 | 1229 | ti = dm_table_get_target(table, i++); |
1230 | 1230 |
drivers/md/md.c
drivers/scsi/scsi_scan.c
fs/ioprio.c
... | ... | @@ -48,28 +48,12 @@ |
48 | 48 | if (err) |
49 | 49 | return err; |
50 | 50 | |
51 | - task_lock(task); | |
52 | - do { | |
53 | - ioc = task->io_context; | |
54 | - /* see wmb() in current_io_context() */ | |
55 | - smp_read_barrier_depends(); | |
56 | - if (ioc) | |
57 | - break; | |
58 | - | |
59 | - ioc = alloc_io_context(GFP_ATOMIC, -1); | |
60 | - if (!ioc) { | |
61 | - err = -ENOMEM; | |
62 | - break; | |
63 | - } | |
64 | - task->io_context = ioc; | |
65 | - } while (1); | |
66 | - | |
67 | - if (!err) { | |
68 | - ioc->ioprio = ioprio; | |
69 | - ioc->ioprio_changed = 1; | |
51 | + ioc = get_task_io_context(task, GFP_ATOMIC, NUMA_NO_NODE); | |
52 | + if (ioc) { | |
53 | + ioc_ioprio_changed(ioc, ioprio); | |
54 | + put_io_context(ioc, NULL); | |
70 | 55 | } |
71 | 56 | |
72 | - task_unlock(task); | |
73 | 57 | return err; |
74 | 58 | } |
75 | 59 | EXPORT_SYMBOL_GPL(set_task_ioprio); |
fs/mpage.c
... | ... | @@ -371,10 +371,7 @@ |
371 | 371 | sector_t last_block_in_bio = 0; |
372 | 372 | struct buffer_head map_bh; |
373 | 373 | unsigned long first_logical_block = 0; |
374 | - struct blk_plug plug; | |
375 | 374 | |
376 | - blk_start_plug(&plug); | |
377 | - | |
378 | 375 | map_bh.b_state = 0; |
379 | 376 | map_bh.b_size = 0; |
380 | 377 | for (page_idx = 0; page_idx < nr_pages; page_idx++) { |
... | ... | @@ -395,7 +392,6 @@ |
395 | 392 | BUG_ON(!list_empty(pages)); |
396 | 393 | if (bio) |
397 | 394 | mpage_bio_submit(READ, bio); |
398 | - blk_finish_plug(&plug); | |
399 | 395 | return 0; |
400 | 396 | } |
401 | 397 | EXPORT_SYMBOL(mpage_readpages); |
include/linux/bio.h
... | ... | @@ -515,24 +515,64 @@ |
515 | 515 | |
516 | 516 | #else /* CONFIG_BLK_DEV_INTEGRITY */ |
517 | 517 | |
518 | -#define bio_integrity(a) (0) | |
519 | -#define bioset_integrity_create(a, b) (0) | |
520 | -#define bio_integrity_prep(a) (0) | |
521 | -#define bio_integrity_enabled(a) (0) | |
518 | +static inline int bio_integrity(struct bio *bio) | |
519 | +{ | |
520 | + return 0; | |
521 | +} | |
522 | + | |
523 | +static inline int bio_integrity_enabled(struct bio *bio) | |
524 | +{ | |
525 | + return 0; | |
526 | +} | |
527 | + | |
528 | +static inline int bioset_integrity_create(struct bio_set *bs, int pool_size) | |
529 | +{ | |
530 | + return 0; | |
531 | +} | |
532 | + | |
533 | +static inline void bioset_integrity_free (struct bio_set *bs) | |
534 | +{ | |
535 | + return; | |
536 | +} | |
537 | + | |
538 | +static inline int bio_integrity_prep(struct bio *bio) | |
539 | +{ | |
540 | + return 0; | |
541 | +} | |
542 | + | |
543 | +static inline void bio_integrity_free(struct bio *bio, struct bio_set *bs) | |
544 | +{ | |
545 | + return; | |
546 | +} | |
547 | + | |
522 | 548 | static inline int bio_integrity_clone(struct bio *bio, struct bio *bio_src, |
523 | 549 | gfp_t gfp_mask, struct bio_set *bs) |
524 | 550 | { |
525 | 551 | return 0; |
526 | 552 | } |
527 | -#define bioset_integrity_free(a) do { } while (0) | |
528 | -#define bio_integrity_free(a, b) do { } while (0) | |
529 | -#define bio_integrity_endio(a, b) do { } while (0) | |
530 | -#define bio_integrity_advance(a, b) do { } while (0) | |
531 | -#define bio_integrity_trim(a, b, c) do { } while (0) | |
532 | -#define bio_integrity_split(a, b, c) do { } while (0) | |
533 | -#define bio_integrity_set_tag(a, b, c) do { } while (0) | |
534 | -#define bio_integrity_get_tag(a, b, c) do { } while (0) | |
535 | -#define bio_integrity_init(a) do { } while (0) | |
553 | + | |
554 | +static inline void bio_integrity_split(struct bio *bio, struct bio_pair *bp, | |
555 | + int sectors) | |
556 | +{ | |
557 | + return; | |
558 | +} | |
559 | + | |
560 | +static inline void bio_integrity_advance(struct bio *bio, | |
561 | + unsigned int bytes_done) | |
562 | +{ | |
563 | + return; | |
564 | +} | |
565 | + | |
566 | +static inline void bio_integrity_trim(struct bio *bio, unsigned int offset, | |
567 | + unsigned int sectors) | |
568 | +{ | |
569 | + return; | |
570 | +} | |
571 | + | |
572 | +static inline void bio_integrity_init(void) | |
573 | +{ | |
574 | + return; | |
575 | +} | |
536 | 576 | |
537 | 577 | #endif /* CONFIG_BLK_DEV_INTEGRITY */ |
538 | 578 |
include/linux/blkdev.h
... | ... | @@ -111,11 +111,15 @@ |
111 | 111 | * Three pointers are available for the IO schedulers, if they need |
112 | 112 | * more they have to dynamically allocate it. Flush requests are |
113 | 113 | * never put on the IO scheduler. So let the flush fields share |
114 | - * space with the three elevator_private pointers. | |
114 | + * space with the elevator data. | |
115 | 115 | */ |
116 | 116 | union { |
117 | - void *elevator_private[3]; | |
118 | 117 | struct { |
118 | + struct io_cq *icq; | |
119 | + void *priv[2]; | |
120 | + } elv; | |
121 | + | |
122 | + struct { | |
119 | 123 | unsigned int seq; |
120 | 124 | struct list_head list; |
121 | 125 | rq_end_io_fn *saved_end_io; |
... | ... | @@ -311,6 +315,12 @@ |
311 | 315 | unsigned long queue_flags; |
312 | 316 | |
313 | 317 | /* |
318 | + * ida allocated id for this queue. Used to index queues from | |
319 | + * ioctx. | |
320 | + */ | |
321 | + int id; | |
322 | + | |
323 | + /* | |
314 | 324 | * queue needs bounce pages for pages above this limit |
315 | 325 | */ |
316 | 326 | gfp_t bounce_gfp; |
... | ... | @@ -351,6 +361,8 @@ |
351 | 361 | struct timer_list timeout; |
352 | 362 | struct list_head timeout_list; |
353 | 363 | |
364 | + struct list_head icq_list; | |
365 | + | |
354 | 366 | struct queue_limits limits; |
355 | 367 | |
356 | 368 | /* |
... | ... | @@ -387,6 +399,9 @@ |
387 | 399 | /* Throttle data */ |
388 | 400 | struct throtl_data *td; |
389 | 401 | #endif |
402 | +#ifdef CONFIG_LOCKDEP | |
403 | + int ioc_release_depth; | |
404 | +#endif | |
390 | 405 | }; |
391 | 406 | |
392 | 407 | #define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ |
... | ... | @@ -481,6 +496,7 @@ |
481 | 496 | |
482 | 497 | #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) |
483 | 498 | #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) |
499 | +#define blk_queue_dead(q) test_bit(QUEUE_FLAG_DEAD, &(q)->queue_flags) | |
484 | 500 | #define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags) |
485 | 501 | #define blk_queue_noxmerges(q) \ |
486 | 502 | test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags) |
... | ... | @@ -660,7 +676,6 @@ |
660 | 676 | extern struct request *blk_get_request(struct request_queue *, int, gfp_t); |
661 | 677 | extern struct request *blk_make_request(struct request_queue *, struct bio *, |
662 | 678 | gfp_t); |
663 | -extern void blk_insert_request(struct request_queue *, struct request *, int, void *); | |
664 | 679 | extern void blk_requeue_request(struct request_queue *, struct request *); |
665 | 680 | extern void blk_add_request_payload(struct request *rq, struct page *page, |
666 | 681 | unsigned int len); |
... | ... | @@ -829,6 +844,7 @@ |
829 | 844 | extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt); |
830 | 845 | extern void blk_queue_io_opt(struct request_queue *q, unsigned int opt); |
831 | 846 | extern void blk_set_default_limits(struct queue_limits *lim); |
847 | +extern void blk_set_stacking_limits(struct queue_limits *lim); | |
832 | 848 | extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, |
833 | 849 | sector_t offset); |
834 | 850 | extern int bdev_stack_limits(struct queue_limits *t, struct block_device *bdev, |
... | ... | @@ -859,7 +875,7 @@ |
859 | 875 | extern void blk_dump_rq_flags(struct request *, char *); |
860 | 876 | extern long nr_blockdev_pages(void); |
861 | 877 | |
862 | -int blk_get_queue(struct request_queue *); | |
878 | +bool __must_check blk_get_queue(struct request_queue *); | |
863 | 879 | struct request_queue *blk_alloc_queue(gfp_t); |
864 | 880 | struct request_queue *blk_alloc_queue_node(gfp_t, int); |
865 | 881 | extern void blk_put_queue(struct request_queue *); |
... | ... | @@ -1282,19 +1298,70 @@ |
1282 | 1298 | |
1283 | 1299 | #else /* CONFIG_BLK_DEV_INTEGRITY */ |
1284 | 1300 | |
1285 | -#define blk_integrity_rq(rq) (0) | |
1286 | -#define blk_rq_count_integrity_sg(a, b) (0) | |
1287 | -#define blk_rq_map_integrity_sg(a, b, c) (0) | |
1288 | -#define bdev_get_integrity(a) (0) | |
1289 | -#define blk_get_integrity(a) (0) | |
1290 | -#define blk_integrity_compare(a, b) (0) | |
1291 | -#define blk_integrity_register(a, b) (0) | |
1292 | -#define blk_integrity_unregister(a) do { } while (0) | |
1293 | -#define blk_queue_max_integrity_segments(a, b) do { } while (0) | |
1294 | -#define queue_max_integrity_segments(a) (0) | |
1295 | -#define blk_integrity_merge_rq(a, b, c) (0) | |
1296 | -#define blk_integrity_merge_bio(a, b, c) (0) | |
1297 | -#define blk_integrity_is_initialized(a) (0) | |
1301 | +struct bio; | |
1302 | +struct block_device; | |
1303 | +struct gendisk; | |
1304 | +struct blk_integrity; | |
1305 | + | |
1306 | +static inline int blk_integrity_rq(struct request *rq) | |
1307 | +{ | |
1308 | + return 0; | |
1309 | +} | |
1310 | +static inline int blk_rq_count_integrity_sg(struct request_queue *q, | |
1311 | + struct bio *b) | |
1312 | +{ | |
1313 | + return 0; | |
1314 | +} | |
1315 | +static inline int blk_rq_map_integrity_sg(struct request_queue *q, | |
1316 | + struct bio *b, | |
1317 | + struct scatterlist *s) | |
1318 | +{ | |
1319 | + return 0; | |
1320 | +} | |
1321 | +static inline struct blk_integrity *bdev_get_integrity(struct block_device *b) | |
1322 | +{ | |
1323 | + return 0; | |
1324 | +} | |
1325 | +static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk) | |
1326 | +{ | |
1327 | + return NULL; | |
1328 | +} | |
1329 | +static inline int blk_integrity_compare(struct gendisk *a, struct gendisk *b) | |
1330 | +{ | |
1331 | + return 0; | |
1332 | +} | |
1333 | +static inline int blk_integrity_register(struct gendisk *d, | |
1334 | + struct blk_integrity *b) | |
1335 | +{ | |
1336 | + return 0; | |
1337 | +} | |
1338 | +static inline void blk_integrity_unregister(struct gendisk *d) | |
1339 | +{ | |
1340 | +} | |
1341 | +static inline void blk_queue_max_integrity_segments(struct request_queue *q, | |
1342 | + unsigned int segs) | |
1343 | +{ | |
1344 | +} | |
1345 | +static inline unsigned short queue_max_integrity_segments(struct request_queue *q) | |
1346 | +{ | |
1347 | + return 0; | |
1348 | +} | |
1349 | +static inline int blk_integrity_merge_rq(struct request_queue *rq, | |
1350 | + struct request *r1, | |
1351 | + struct request *r2) | |
1352 | +{ | |
1353 | + return 0; | |
1354 | +} | |
1355 | +static inline int blk_integrity_merge_bio(struct request_queue *rq, | |
1356 | + struct request *r, | |
1357 | + struct bio *b) | |
1358 | +{ | |
1359 | + return 0; | |
1360 | +} | |
1361 | +static inline bool blk_integrity_is_initialized(struct gendisk *g) | |
1362 | +{ | |
1363 | + return 0; | |
1364 | +} | |
1298 | 1365 | |
1299 | 1366 | #endif /* CONFIG_BLK_DEV_INTEGRITY */ |
1300 | 1367 |
include/linux/elevator.h
... | ... | @@ -5,6 +5,8 @@ |
5 | 5 | |
6 | 6 | #ifdef CONFIG_BLOCK |
7 | 7 | |
8 | +struct io_cq; | |
9 | + | |
8 | 10 | typedef int (elevator_merge_fn) (struct request_queue *, struct request **, |
9 | 11 | struct bio *); |
10 | 12 | |
... | ... | @@ -24,6 +26,8 @@ |
24 | 26 | typedef void (elevator_completed_req_fn) (struct request_queue *, struct request *); |
25 | 27 | typedef int (elevator_may_queue_fn) (struct request_queue *, int); |
26 | 28 | |
29 | +typedef void (elevator_init_icq_fn) (struct io_cq *); | |
30 | +typedef void (elevator_exit_icq_fn) (struct io_cq *); | |
27 | 31 | typedef int (elevator_set_req_fn) (struct request_queue *, struct request *, gfp_t); |
28 | 32 | typedef void (elevator_put_req_fn) (struct request *); |
29 | 33 | typedef void (elevator_activate_req_fn) (struct request_queue *, struct request *); |
... | ... | @@ -56,6 +60,9 @@ |
56 | 60 | elevator_request_list_fn *elevator_former_req_fn; |
57 | 61 | elevator_request_list_fn *elevator_latter_req_fn; |
58 | 62 | |
63 | + elevator_init_icq_fn *elevator_init_icq_fn; /* see iocontext.h */ | |
64 | + elevator_exit_icq_fn *elevator_exit_icq_fn; /* ditto */ | |
65 | + | |
59 | 66 | elevator_set_req_fn *elevator_set_req_fn; |
60 | 67 | elevator_put_req_fn *elevator_put_req_fn; |
61 | 68 | |
... | ... | @@ -63,7 +70,6 @@ |
63 | 70 | |
64 | 71 | elevator_init_fn *elevator_init_fn; |
65 | 72 | elevator_exit_fn *elevator_exit_fn; |
66 | - void (*trim)(struct io_context *); | |
67 | 73 | }; |
68 | 74 | |
69 | 75 | #define ELV_NAME_MAX (16) |
70 | 76 | |
71 | 77 | |
... | ... | @@ -79,11 +85,20 @@ |
79 | 85 | */ |
80 | 86 | struct elevator_type |
81 | 87 | { |
82 | - struct list_head list; | |
88 | + /* managed by elevator core */ | |
89 | + struct kmem_cache *icq_cache; | |
90 | + | |
91 | + /* fields provided by elevator implementation */ | |
83 | 92 | struct elevator_ops ops; |
93 | + size_t icq_size; /* see iocontext.h */ | |
94 | + size_t icq_align; /* ditto */ | |
84 | 95 | struct elv_fs_entry *elevator_attrs; |
85 | 96 | char elevator_name[ELV_NAME_MAX]; |
86 | 97 | struct module *elevator_owner; |
98 | + | |
99 | + /* managed by elevator core */ | |
100 | + char icq_cache_name[ELV_NAME_MAX + 5]; /* elvname + "_io_cq" */ | |
101 | + struct list_head list; | |
87 | 102 | }; |
88 | 103 | |
89 | 104 | /* |
90 | 105 | |
... | ... | @@ -91,10 +106,9 @@ |
91 | 106 | */ |
92 | 107 | struct elevator_queue |
93 | 108 | { |
94 | - struct elevator_ops *ops; | |
109 | + struct elevator_type *type; | |
95 | 110 | void *elevator_data; |
96 | 111 | struct kobject kobj; |
97 | - struct elevator_type *elevator_type; | |
98 | 112 | struct mutex sysfs_lock; |
99 | 113 | struct hlist_head *hash; |
100 | 114 | unsigned int registered:1; |
... | ... | @@ -129,7 +143,7 @@ |
129 | 143 | /* |
130 | 144 | * io scheduler registration |
131 | 145 | */ |
132 | -extern void elv_register(struct elevator_type *); | |
146 | +extern int elv_register(struct elevator_type *); | |
133 | 147 | extern void elv_unregister(struct elevator_type *); |
134 | 148 | |
135 | 149 | /* |
... | ... | @@ -196,23 +210,6 @@ |
196 | 210 | list_del_init(&(rq)->queuelist); \ |
197 | 211 | INIT_LIST_HEAD(&(rq)->csd.list); \ |
198 | 212 | } while (0) |
199 | - | |
200 | -/* | |
201 | - * io context count accounting | |
202 | - */ | |
203 | -#define elv_ioc_count_mod(name, __val) this_cpu_add(name, __val) | |
204 | -#define elv_ioc_count_inc(name) this_cpu_inc(name) | |
205 | -#define elv_ioc_count_dec(name) this_cpu_dec(name) | |
206 | - | |
207 | -#define elv_ioc_count_read(name) \ | |
208 | -({ \ | |
209 | - unsigned long __val = 0; \ | |
210 | - int __cpu; \ | |
211 | - smp_wmb(); \ | |
212 | - for_each_possible_cpu(__cpu) \ | |
213 | - __val += per_cpu(name, __cpu); \ | |
214 | - __val; \ | |
215 | -}) | |
216 | 213 | |
217 | 214 | #endif /* CONFIG_BLOCK */ |
218 | 215 | #endif |
include/linux/fs.h
... | ... | @@ -319,6 +319,7 @@ |
319 | 319 | #define BLKPBSZGET _IO(0x12,123) |
320 | 320 | #define BLKDISCARDZEROES _IO(0x12,124) |
321 | 321 | #define BLKSECDISCARD _IO(0x12,125) |
322 | +#define BLKROTATIONAL _IO(0x12,126) | |
322 | 323 | |
323 | 324 | #define BMAP_IOCTL 1 /* obsolete - kept for compatibility */ |
324 | 325 | #define FIBMAP _IO(0x00,1) /* bmap access */ |
include/linux/iocontext.h
... | ... | @@ -3,32 +3,92 @@ |
3 | 3 | |
4 | 4 | #include <linux/radix-tree.h> |
5 | 5 | #include <linux/rcupdate.h> |
6 | +#include <linux/workqueue.h> | |
6 | 7 | |
7 | -struct cfq_queue; | |
8 | -struct cfq_ttime { | |
9 | - unsigned long last_end_request; | |
10 | - | |
11 | - unsigned long ttime_total; | |
12 | - unsigned long ttime_samples; | |
13 | - unsigned long ttime_mean; | |
8 | +enum { | |
9 | + ICQ_IOPRIO_CHANGED, | |
10 | + ICQ_CGROUP_CHANGED, | |
14 | 11 | }; |
15 | 12 | |
16 | -struct cfq_io_context { | |
17 | - void *key; | |
13 | +/* | |
14 | + * An io_cq (icq) is association between an io_context (ioc) and a | |
15 | + * request_queue (q). This is used by elevators which need to track | |
16 | + * information per ioc - q pair. | |
17 | + * | |
18 | + * Elevator can request use of icq by setting elevator_type->icq_size and | |
19 | + * ->icq_align. Both size and align must be larger than that of struct | |
20 | + * io_cq and elevator can use the tail area for private information. The | |
21 | + * recommended way to do this is defining a struct which contains io_cq as | |
22 | + * the first member followed by private members and using its size and | |
23 | + * align. For example, | |
24 | + * | |
25 | + * struct snail_io_cq { | |
26 | + * struct io_cq icq; | |
27 | + * int poke_snail; | |
28 | + * int feed_snail; | |
29 | + * }; | |
30 | + * | |
31 | + * struct elevator_type snail_elv_type { | |
32 | + * .ops = { ... }, | |
33 | + * .icq_size = sizeof(struct snail_io_cq), | |
34 | + * .icq_align = __alignof__(struct snail_io_cq), | |
35 | + * ... | |
36 | + * }; | |
37 | + * | |
38 | + * If icq_size is set, block core will manage icq's. All requests will | |
39 | + * have its ->elv.icq field set before elevator_ops->elevator_set_req_fn() | |
40 | + * is called and be holding a reference to the associated io_context. | |
41 | + * | |
42 | + * Whenever a new icq is created, elevator_ops->elevator_init_icq_fn() is | |
43 | + * called and, on destruction, ->elevator_exit_icq_fn(). Both functions | |
44 | + * are called with both the associated io_context and queue locks held. | |
45 | + * | |
46 | + * Elevator is allowed to lookup icq using ioc_lookup_icq() while holding | |
47 | + * queue lock but the returned icq is valid only until the queue lock is | |
48 | + * released. Elevators can not and should not try to create or destroy | |
49 | + * icq's. | |
50 | + * | |
51 | + * As icq's are linked from both ioc and q, the locking rules are a bit | |
52 | + * complex. | |
53 | + * | |
54 | + * - ioc lock nests inside q lock. | |
55 | + * | |
56 | + * - ioc->icq_list and icq->ioc_node are protected by ioc lock. | |
57 | + * q->icq_list and icq->q_node by q lock. | |
58 | + * | |
59 | + * - ioc->icq_tree and ioc->icq_hint are protected by ioc lock, while icq | |
60 | + * itself is protected by q lock. However, both the indexes and icq | |
61 | + * itself are also RCU managed and lookup can be performed holding only | |
62 | + * the q lock. | |
63 | + * | |
64 | + * - icq's are not reference counted. They are destroyed when either the | |
65 | + * ioc or q goes away. Each request with icq set holds an extra | |
66 | + * reference to ioc to ensure it stays until the request is completed. | |
67 | + * | |
68 | + * - Linking and unlinking icq's are performed while holding both ioc and q | |
69 | + * locks. Due to the lock ordering, q exit is simple but ioc exit | |
70 | + * requires reverse-order double lock dance. | |
71 | + */ | |
72 | +struct io_cq { | |
73 | + struct request_queue *q; | |
74 | + struct io_context *ioc; | |
18 | 75 | |
19 | - struct cfq_queue *cfqq[2]; | |
76 | + /* | |
77 | + * q_node and ioc_node link io_cq through icq_list of q and ioc | |
78 | + * respectively. Both fields are unused once ioc_exit_icq() is | |
79 | + * called and shared with __rcu_icq_cache and __rcu_head which are | |
80 | + * used for RCU free of io_cq. | |
81 | + */ | |
82 | + union { | |
83 | + struct list_head q_node; | |
84 | + struct kmem_cache *__rcu_icq_cache; | |
85 | + }; | |
86 | + union { | |
87 | + struct hlist_node ioc_node; | |
88 | + struct rcu_head __rcu_head; | |
89 | + }; | |
20 | 90 | |
21 | - struct io_context *ioc; | |
22 | - | |
23 | - struct cfq_ttime ttime; | |
24 | - | |
25 | - struct list_head queue_list; | |
26 | - struct hlist_node cic_list; | |
27 | - | |
28 | - void (*dtor)(struct io_context *); /* destructor */ | |
29 | - void (*exit)(struct io_context *); /* called on task exit */ | |
30 | - | |
31 | - struct rcu_head rcu_head; | |
91 | + unsigned long changed; | |
32 | 92 | }; |
33 | 93 | |
34 | 94 | /* |
35 | 95 | |
36 | 96 | |
... | ... | @@ -43,21 +103,18 @@ |
43 | 103 | spinlock_t lock; |
44 | 104 | |
45 | 105 | unsigned short ioprio; |
46 | - unsigned short ioprio_changed; | |
47 | 106 | |
48 | -#if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE) | |
49 | - unsigned short cgroup_changed; | |
50 | -#endif | |
51 | - | |
52 | 107 | /* |
53 | 108 | * For request batching |
54 | 109 | */ |
55 | 110 | int nr_batch_requests; /* Number of requests left in the batch */ |
56 | 111 | unsigned long last_waited; /* Time last woken after wait for request */ |
57 | 112 | |
58 | - struct radix_tree_root radix_root; | |
59 | - struct hlist_head cic_list; | |
60 | - void __rcu *ioc_data; | |
113 | + struct radix_tree_root icq_tree; | |
114 | + struct io_cq __rcu *icq_hint; | |
115 | + struct hlist_head icq_list; | |
116 | + | |
117 | + struct work_struct release_work; | |
61 | 118 | }; |
62 | 119 | |
63 | 120 | static inline struct io_context *ioc_task_link(struct io_context *ioc) |
64 | 121 | |
65 | 122 | |
66 | 123 | |
... | ... | @@ -76,20 +133,17 @@ |
76 | 133 | |
77 | 134 | struct task_struct; |
78 | 135 | #ifdef CONFIG_BLOCK |
79 | -int put_io_context(struct io_context *ioc); | |
136 | +void put_io_context(struct io_context *ioc, struct request_queue *locked_q); | |
80 | 137 | void exit_io_context(struct task_struct *task); |
81 | -struct io_context *get_io_context(gfp_t gfp_flags, int node); | |
82 | -struct io_context *alloc_io_context(gfp_t gfp_flags, int node); | |
138 | +struct io_context *get_task_io_context(struct task_struct *task, | |
139 | + gfp_t gfp_flags, int node); | |
140 | +void ioc_ioprio_changed(struct io_context *ioc, int ioprio); | |
141 | +void ioc_cgroup_changed(struct io_context *ioc); | |
83 | 142 | #else |
84 | -static inline void exit_io_context(struct task_struct *task) | |
85 | -{ | |
86 | -} | |
87 | - | |
88 | 143 | struct io_context; |
89 | -static inline int put_io_context(struct io_context *ioc) | |
90 | -{ | |
91 | - return 1; | |
92 | -} | |
144 | +static inline void put_io_context(struct io_context *ioc, | |
145 | + struct request_queue *locked_q) { } | |
146 | +static inline void exit_io_context(struct task_struct *task) { } | |
93 | 147 | #endif |
94 | 148 | |
95 | 149 | #endif |
kernel/fork.c
... | ... | @@ -873,6 +873,7 @@ |
873 | 873 | { |
874 | 874 | #ifdef CONFIG_BLOCK |
875 | 875 | struct io_context *ioc = current->io_context; |
876 | + struct io_context *new_ioc; | |
876 | 877 | |
877 | 878 | if (!ioc) |
878 | 879 | return 0; |
879 | 880 | |
... | ... | @@ -884,11 +885,12 @@ |
884 | 885 | if (unlikely(!tsk->io_context)) |
885 | 886 | return -ENOMEM; |
886 | 887 | } else if (ioprio_valid(ioc->ioprio)) { |
887 | - tsk->io_context = alloc_io_context(GFP_KERNEL, -1); | |
888 | - if (unlikely(!tsk->io_context)) | |
888 | + new_ioc = get_task_io_context(tsk, GFP_KERNEL, NUMA_NO_NODE); | |
889 | + if (unlikely(!new_ioc)) | |
889 | 890 | return -ENOMEM; |
890 | 891 | |
891 | - tsk->io_context->ioprio = ioc->ioprio; | |
892 | + new_ioc->ioprio = ioc->ioprio; | |
893 | + put_io_context(new_ioc, NULL); | |
892 | 894 | } |
893 | 895 | #endif |
894 | 896 | return 0; |