Commit b3c9dd182ed3bdcdaf0e42625a35924b0497afdc
Exists in
master
and in
34 other branches
Merge branch 'for-3.3/core' of git://git.kernel.dk/linux-block
* 'for-3.3/core' of git://git.kernel.dk/linux-block: (37 commits) Revert "block: recursive merge requests" block: Stop using macro stubs for the bio data integrity calls blockdev: convert some macros to static inlines fs: remove unneeded plug in mpage_readpages() block: Add BLKROTATIONAL ioctl block: Introduce blk_set_stacking_limits function block: remove WARN_ON_ONCE() in exit_io_context() block: an exiting task should be allowed to create io_context block: ioc_cgroup_changed() needs to be exported block: recursive merge requests block, cfq: fix empty queue crash caused by request merge block, cfq: move icq creation and rq->elv.icq association to block core block, cfq: restructure io_cq creation path for io_context interface cleanup block, cfq: move io_cq exit/release to blk-ioc.c block, cfq: move icq cache management to block core block, cfq: move io_cq lookup to blk-ioc.c block, cfq: move cfqd->icq_list to request_queue and add request->elv.icq block, cfq: reorganize cfq_io_context into generic and cfq specific parts block: remove elevator_queue->ops block: reorder elevator switch sequence ... Fix up conflicts in: - block/blk-cgroup.c Switch from can_attach_task to can_attach - block/cfq-iosched.c conflict with now removed cic index changes (we now use q->id instead)
Showing 28 changed files Side-by-side Diff
- block/blk-cgroup.c
- block/blk-core.c
- block/blk-exec.c
- block/blk-ioc.c
- block/blk-settings.c
- block/blk-sysfs.c
- block/blk-throttle.c
- block/blk.h
- block/bsg.c
- block/cfq-iosched.c
- block/compat_ioctl.c
- block/deadline-iosched.c
- block/elevator.c
- block/genhd.c
- block/ioctl.c
- block/noop-iosched.c
- drivers/block/sx8.c
- drivers/md/dm-table.c
- drivers/md/md.c
- drivers/scsi/scsi_scan.c
- fs/ioprio.c
- fs/mpage.c
- include/linux/bio.h
- include/linux/blkdev.h
- include/linux/elevator.h
- include/linux/fs.h
- include/linux/iocontext.h
- kernel/fork.c
block/blk-cgroup.c
... | ... | @@ -1655,11 +1655,12 @@ |
1655 | 1655 | struct io_context *ioc; |
1656 | 1656 | |
1657 | 1657 | cgroup_taskset_for_each(task, cgrp, tset) { |
1658 | - task_lock(task); | |
1659 | - ioc = task->io_context; | |
1660 | - if (ioc) | |
1661 | - ioc->cgroup_changed = 1; | |
1662 | - task_unlock(task); | |
1658 | + /* we don't lose anything even if ioc allocation fails */ | |
1659 | + ioc = get_task_io_context(task, GFP_ATOMIC, NUMA_NO_NODE); | |
1660 | + if (ioc) { | |
1661 | + ioc_cgroup_changed(ioc); | |
1662 | + put_io_context(ioc, NULL); | |
1663 | + } | |
1663 | 1664 | } |
1664 | 1665 | } |
1665 | 1666 |
block/blk-core.c
... | ... | @@ -39,6 +39,8 @@ |
39 | 39 | EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap); |
40 | 40 | EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete); |
41 | 41 | |
42 | +DEFINE_IDA(blk_queue_ida); | |
43 | + | |
42 | 44 | /* |
43 | 45 | * For the allocated request tables |
44 | 46 | */ |
... | ... | @@ -358,7 +360,8 @@ |
358 | 360 | void blk_drain_queue(struct request_queue *q, bool drain_all) |
359 | 361 | { |
360 | 362 | while (true) { |
361 | - int nr_rqs; | |
363 | + bool drain = false; | |
364 | + int i; | |
362 | 365 | |
363 | 366 | spin_lock_irq(q->queue_lock); |
364 | 367 | |
365 | 368 | |
366 | 369 | |
... | ... | @@ -375,14 +378,25 @@ |
375 | 378 | if (!list_empty(&q->queue_head)) |
376 | 379 | __blk_run_queue(q); |
377 | 380 | |
378 | - if (drain_all) | |
379 | - nr_rqs = q->rq.count[0] + q->rq.count[1]; | |
380 | - else | |
381 | - nr_rqs = q->rq.elvpriv; | |
381 | + drain |= q->rq.elvpriv; | |
382 | 382 | |
383 | + /* | |
384 | + * Unfortunately, requests are queued at and tracked from | |
385 | + * multiple places and there's no single counter which can | |
386 | + * be drained. Check all the queues and counters. | |
387 | + */ | |
388 | + if (drain_all) { | |
389 | + drain |= !list_empty(&q->queue_head); | |
390 | + for (i = 0; i < 2; i++) { | |
391 | + drain |= q->rq.count[i]; | |
392 | + drain |= q->in_flight[i]; | |
393 | + drain |= !list_empty(&q->flush_queue[i]); | |
394 | + } | |
395 | + } | |
396 | + | |
383 | 397 | spin_unlock_irq(q->queue_lock); |
384 | 398 | |
385 | - if (!nr_rqs) | |
399 | + if (!drain) | |
386 | 400 | break; |
387 | 401 | msleep(10); |
388 | 402 | } |
... | ... | @@ -469,6 +483,10 @@ |
469 | 483 | if (!q) |
470 | 484 | return NULL; |
471 | 485 | |
486 | + q->id = ida_simple_get(&blk_queue_ida, 0, 0, GFP_KERNEL); | |
487 | + if (q->id < 0) | |
488 | + goto fail_q; | |
489 | + | |
472 | 490 | q->backing_dev_info.ra_pages = |
473 | 491 | (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; |
474 | 492 | q->backing_dev_info.state = 0; |
475 | 493 | |
476 | 494 | |
... | ... | @@ -477,20 +495,17 @@ |
477 | 495 | q->node = node_id; |
478 | 496 | |
479 | 497 | err = bdi_init(&q->backing_dev_info); |
480 | - if (err) { | |
481 | - kmem_cache_free(blk_requestq_cachep, q); | |
482 | - return NULL; | |
483 | - } | |
498 | + if (err) | |
499 | + goto fail_id; | |
484 | 500 | |
485 | - if (blk_throtl_init(q)) { | |
486 | - kmem_cache_free(blk_requestq_cachep, q); | |
487 | - return NULL; | |
488 | - } | |
501 | + if (blk_throtl_init(q)) | |
502 | + goto fail_id; | |
489 | 503 | |
490 | 504 | setup_timer(&q->backing_dev_info.laptop_mode_wb_timer, |
491 | 505 | laptop_mode_timer_fn, (unsigned long) q); |
492 | 506 | setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q); |
493 | 507 | INIT_LIST_HEAD(&q->timeout_list); |
508 | + INIT_LIST_HEAD(&q->icq_list); | |
494 | 509 | INIT_LIST_HEAD(&q->flush_queue[0]); |
495 | 510 | INIT_LIST_HEAD(&q->flush_queue[1]); |
496 | 511 | INIT_LIST_HEAD(&q->flush_data_in_flight); |
... | ... | @@ -508,6 +523,12 @@ |
508 | 523 | q->queue_lock = &q->__queue_lock; |
509 | 524 | |
510 | 525 | return q; |
526 | + | |
527 | +fail_id: | |
528 | + ida_simple_remove(&blk_queue_ida, q->id); | |
529 | +fail_q: | |
530 | + kmem_cache_free(blk_requestq_cachep, q); | |
531 | + return NULL; | |
511 | 532 | } |
512 | 533 | EXPORT_SYMBOL(blk_alloc_queue_node); |
513 | 534 | |
514 | 535 | |
515 | 536 | |
516 | 537 | |
517 | 538 | |
518 | 539 | |
... | ... | @@ -605,26 +626,31 @@ |
605 | 626 | } |
606 | 627 | EXPORT_SYMBOL(blk_init_allocated_queue); |
607 | 628 | |
608 | -int blk_get_queue(struct request_queue *q) | |
629 | +bool blk_get_queue(struct request_queue *q) | |
609 | 630 | { |
610 | - if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) { | |
611 | - kobject_get(&q->kobj); | |
612 | - return 0; | |
631 | + if (likely(!blk_queue_dead(q))) { | |
632 | + __blk_get_queue(q); | |
633 | + return true; | |
613 | 634 | } |
614 | 635 | |
615 | - return 1; | |
636 | + return false; | |
616 | 637 | } |
617 | 638 | EXPORT_SYMBOL(blk_get_queue); |
618 | 639 | |
619 | 640 | static inline void blk_free_request(struct request_queue *q, struct request *rq) |
620 | 641 | { |
621 | - if (rq->cmd_flags & REQ_ELVPRIV) | |
642 | + if (rq->cmd_flags & REQ_ELVPRIV) { | |
622 | 643 | elv_put_request(q, rq); |
644 | + if (rq->elv.icq) | |
645 | + put_io_context(rq->elv.icq->ioc, q); | |
646 | + } | |
647 | + | |
623 | 648 | mempool_free(rq, q->rq.rq_pool); |
624 | 649 | } |
625 | 650 | |
626 | 651 | static struct request * |
627 | -blk_alloc_request(struct request_queue *q, unsigned int flags, gfp_t gfp_mask) | |
652 | +blk_alloc_request(struct request_queue *q, struct io_cq *icq, | |
653 | + unsigned int flags, gfp_t gfp_mask) | |
628 | 654 | { |
629 | 655 | struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask); |
630 | 656 | |
... | ... | @@ -635,10 +661,15 @@ |
635 | 661 | |
636 | 662 | rq->cmd_flags = flags | REQ_ALLOCED; |
637 | 663 | |
638 | - if ((flags & REQ_ELVPRIV) && | |
639 | - unlikely(elv_set_request(q, rq, gfp_mask))) { | |
640 | - mempool_free(rq, q->rq.rq_pool); | |
641 | - return NULL; | |
664 | + if (flags & REQ_ELVPRIV) { | |
665 | + rq->elv.icq = icq; | |
666 | + if (unlikely(elv_set_request(q, rq, gfp_mask))) { | |
667 | + mempool_free(rq, q->rq.rq_pool); | |
668 | + return NULL; | |
669 | + } | |
670 | + /* @rq->elv.icq holds on to io_context until @rq is freed */ | |
671 | + if (icq) | |
672 | + get_io_context(icq->ioc); | |
642 | 673 | } |
643 | 674 | |
644 | 675 | return rq; |
645 | 676 | |
646 | 677 | |
647 | 678 | |
... | ... | @@ -750,11 +781,17 @@ |
750 | 781 | { |
751 | 782 | struct request *rq = NULL; |
752 | 783 | struct request_list *rl = &q->rq; |
753 | - struct io_context *ioc = NULL; | |
784 | + struct elevator_type *et; | |
785 | + struct io_context *ioc; | |
786 | + struct io_cq *icq = NULL; | |
754 | 787 | const bool is_sync = rw_is_sync(rw_flags) != 0; |
788 | + bool retried = false; | |
755 | 789 | int may_queue; |
790 | +retry: | |
791 | + et = q->elevator->type; | |
792 | + ioc = current->io_context; | |
756 | 793 | |
757 | - if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) | |
794 | + if (unlikely(blk_queue_dead(q))) | |
758 | 795 | return NULL; |
759 | 796 | |
760 | 797 | may_queue = elv_may_queue(q, rw_flags); |
761 | 798 | |
... | ... | @@ -763,8 +800,21 @@ |
763 | 800 | |
764 | 801 | if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) { |
765 | 802 | if (rl->count[is_sync]+1 >= q->nr_requests) { |
766 | - ioc = current_io_context(GFP_ATOMIC, q->node); | |
767 | 803 | /* |
804 | + * We want ioc to record batching state. If it's | |
805 | + * not already there, creating a new one requires | |
806 | + * dropping queue_lock, which in turn requires | |
807 | + * retesting conditions to avoid queue hang. | |
808 | + */ | |
809 | + if (!ioc && !retried) { | |
810 | + spin_unlock_irq(q->queue_lock); | |
811 | + create_io_context(current, gfp_mask, q->node); | |
812 | + spin_lock_irq(q->queue_lock); | |
813 | + retried = true; | |
814 | + goto retry; | |
815 | + } | |
816 | + | |
817 | + /* | |
768 | 818 | * The queue will fill after this allocation, so set |
769 | 819 | * it as full, and mark this process as "batching". |
770 | 820 | * This process will be allowed to complete a batch of |
771 | 821 | |
772 | 822 | |
... | ... | @@ -799,17 +849,36 @@ |
799 | 849 | rl->count[is_sync]++; |
800 | 850 | rl->starved[is_sync] = 0; |
801 | 851 | |
852 | + /* | |
853 | + * Decide whether the new request will be managed by elevator. If | |
854 | + * so, mark @rw_flags and increment elvpriv. Non-zero elvpriv will | |
855 | + * prevent the current elevator from being destroyed until the new | |
856 | + * request is freed. This guarantees icq's won't be destroyed and | |
857 | + * makes creating new ones safe. | |
858 | + * | |
859 | + * Also, lookup icq while holding queue_lock. If it doesn't exist, | |
860 | + * it will be created after releasing queue_lock. | |
861 | + */ | |
802 | 862 | if (blk_rq_should_init_elevator(bio) && |
803 | 863 | !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags)) { |
804 | 864 | rw_flags |= REQ_ELVPRIV; |
805 | 865 | rl->elvpriv++; |
866 | + if (et->icq_cache && ioc) | |
867 | + icq = ioc_lookup_icq(ioc, q); | |
806 | 868 | } |
807 | 869 | |
808 | 870 | if (blk_queue_io_stat(q)) |
809 | 871 | rw_flags |= REQ_IO_STAT; |
810 | 872 | spin_unlock_irq(q->queue_lock); |
811 | 873 | |
812 | - rq = blk_alloc_request(q, rw_flags, gfp_mask); | |
874 | + /* create icq if missing */ | |
875 | + if (unlikely(et->icq_cache && !icq)) | |
876 | + icq = ioc_create_icq(q, gfp_mask); | |
877 | + | |
878 | + /* rqs are guaranteed to have icq on elv_set_request() if requested */ | |
879 | + if (likely(!et->icq_cache || icq)) | |
880 | + rq = blk_alloc_request(q, icq, rw_flags, gfp_mask); | |
881 | + | |
813 | 882 | if (unlikely(!rq)) { |
814 | 883 | /* |
815 | 884 | * Allocation failed presumably due to memory. Undo anything |
816 | 885 | |
... | ... | @@ -871,10 +940,9 @@ |
871 | 940 | rq = get_request(q, rw_flags, bio, GFP_NOIO); |
872 | 941 | while (!rq) { |
873 | 942 | DEFINE_WAIT(wait); |
874 | - struct io_context *ioc; | |
875 | 943 | struct request_list *rl = &q->rq; |
876 | 944 | |
877 | - if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) | |
945 | + if (unlikely(blk_queue_dead(q))) | |
878 | 946 | return NULL; |
879 | 947 | |
880 | 948 | prepare_to_wait_exclusive(&rl->wait[is_sync], &wait, |
... | ... | @@ -891,8 +959,8 @@ |
891 | 959 | * up to a big batch of them for a small period time. |
892 | 960 | * See ioc_batching, ioc_set_batching |
893 | 961 | */ |
894 | - ioc = current_io_context(GFP_NOIO, q->node); | |
895 | - ioc_set_batching(q, ioc); | |
962 | + create_io_context(current, GFP_NOIO, q->node); | |
963 | + ioc_set_batching(q, current->io_context); | |
896 | 964 | |
897 | 965 | spin_lock_irq(q->queue_lock); |
898 | 966 | finish_wait(&rl->wait[is_sync], &wait); |
... | ... | @@ -1009,54 +1077,6 @@ |
1009 | 1077 | __elv_add_request(q, rq, where); |
1010 | 1078 | } |
1011 | 1079 | |
1012 | -/** | |
1013 | - * blk_insert_request - insert a special request into a request queue | |
1014 | - * @q: request queue where request should be inserted | |
1015 | - * @rq: request to be inserted | |
1016 | - * @at_head: insert request at head or tail of queue | |
1017 | - * @data: private data | |
1018 | - * | |
1019 | - * Description: | |
1020 | - * Many block devices need to execute commands asynchronously, so they don't | |
1021 | - * block the whole kernel from preemption during request execution. This is | |
1022 | - * accomplished normally by inserting aritficial requests tagged as | |
1023 | - * REQ_TYPE_SPECIAL in to the corresponding request queue, and letting them | |
1024 | - * be scheduled for actual execution by the request queue. | |
1025 | - * | |
1026 | - * We have the option of inserting the head or the tail of the queue. | |
1027 | - * Typically we use the tail for new ioctls and so forth. We use the head | |
1028 | - * of the queue for things like a QUEUE_FULL message from a device, or a | |
1029 | - * host that is unable to accept a particular command. | |
1030 | - */ | |
1031 | -void blk_insert_request(struct request_queue *q, struct request *rq, | |
1032 | - int at_head, void *data) | |
1033 | -{ | |
1034 | - int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; | |
1035 | - unsigned long flags; | |
1036 | - | |
1037 | - /* | |
1038 | - * tell I/O scheduler that this isn't a regular read/write (ie it | |
1039 | - * must not attempt merges on this) and that it acts as a soft | |
1040 | - * barrier | |
1041 | - */ | |
1042 | - rq->cmd_type = REQ_TYPE_SPECIAL; | |
1043 | - | |
1044 | - rq->special = data; | |
1045 | - | |
1046 | - spin_lock_irqsave(q->queue_lock, flags); | |
1047 | - | |
1048 | - /* | |
1049 | - * If command is tagged, release the tag | |
1050 | - */ | |
1051 | - if (blk_rq_tagged(rq)) | |
1052 | - blk_queue_end_tag(q, rq); | |
1053 | - | |
1054 | - add_acct_request(q, rq, where); | |
1055 | - __blk_run_queue(q); | |
1056 | - spin_unlock_irqrestore(q->queue_lock, flags); | |
1057 | -} | |
1058 | -EXPORT_SYMBOL(blk_insert_request); | |
1059 | - | |
1060 | 1080 | static void part_round_stats_single(int cpu, struct hd_struct *part, |
1061 | 1081 | unsigned long now) |
1062 | 1082 | { |
... | ... | @@ -1766,6 +1786,10 @@ |
1766 | 1786 | return -EIO; |
1767 | 1787 | |
1768 | 1788 | spin_lock_irqsave(q->queue_lock, flags); |
1789 | + if (unlikely(blk_queue_dead(q))) { | |
1790 | + spin_unlock_irqrestore(q->queue_lock, flags); | |
1791 | + return -ENODEV; | |
1792 | + } | |
1769 | 1793 | |
1770 | 1794 | /* |
1771 | 1795 | * Submitting request must be dequeued before calling this function |
... | ... | @@ -2740,6 +2764,14 @@ |
2740 | 2764 | trace_block_unplug(q, depth, !from_schedule); |
2741 | 2765 | |
2742 | 2766 | /* |
2767 | + * Don't mess with dead queue. | |
2768 | + */ | |
2769 | + if (unlikely(blk_queue_dead(q))) { | |
2770 | + spin_unlock(q->queue_lock); | |
2771 | + return; | |
2772 | + } | |
2773 | + | |
2774 | + /* | |
2743 | 2775 | * If we are punting this to kblockd, then we can safely drop |
2744 | 2776 | * the queue_lock before waking kblockd (which needs to take |
2745 | 2777 | * this lock). |
... | ... | @@ -2815,6 +2847,15 @@ |
2815 | 2847 | depth = 0; |
2816 | 2848 | spin_lock(q->queue_lock); |
2817 | 2849 | } |
2850 | + | |
2851 | + /* | |
2852 | + * Short-circuit if @q is dead | |
2853 | + */ | |
2854 | + if (unlikely(blk_queue_dead(q))) { | |
2855 | + __blk_end_request_all(rq, -ENODEV); | |
2856 | + continue; | |
2857 | + } | |
2858 | + | |
2818 | 2859 | /* |
2819 | 2860 | * rq is already accounted, so use raw insert |
2820 | 2861 | */ |
block/blk-exec.c
... | ... | @@ -50,7 +50,11 @@ |
50 | 50 | { |
51 | 51 | int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; |
52 | 52 | |
53 | - if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) { | |
53 | + WARN_ON(irqs_disabled()); | |
54 | + spin_lock_irq(q->queue_lock); | |
55 | + | |
56 | + if (unlikely(blk_queue_dead(q))) { | |
57 | + spin_unlock_irq(q->queue_lock); | |
54 | 58 | rq->errors = -ENXIO; |
55 | 59 | if (rq->end_io) |
56 | 60 | rq->end_io(rq, rq->errors); |
... | ... | @@ -59,8 +63,6 @@ |
59 | 63 | |
60 | 64 | rq->rq_disk = bd_disk; |
61 | 65 | rq->end_io = done; |
62 | - WARN_ON(irqs_disabled()); | |
63 | - spin_lock_irq(q->queue_lock); | |
64 | 66 | __elv_add_request(q, rq, where); |
65 | 67 | __blk_run_queue(q); |
66 | 68 | /* the queue is stopped so it won't be run */ |
block/blk-ioc.c
... | ... | @@ -16,53 +16,214 @@ |
16 | 16 | */ |
17 | 17 | static struct kmem_cache *iocontext_cachep; |
18 | 18 | |
19 | -static void cfq_dtor(struct io_context *ioc) | |
19 | +/** | |
20 | + * get_io_context - increment reference count to io_context | |
21 | + * @ioc: io_context to get | |
22 | + * | |
23 | + * Increment reference count to @ioc. | |
24 | + */ | |
25 | +void get_io_context(struct io_context *ioc) | |
20 | 26 | { |
21 | - if (!hlist_empty(&ioc->cic_list)) { | |
22 | - struct cfq_io_context *cic; | |
27 | + BUG_ON(atomic_long_read(&ioc->refcount) <= 0); | |
28 | + atomic_long_inc(&ioc->refcount); | |
29 | +} | |
30 | +EXPORT_SYMBOL(get_io_context); | |
23 | 31 | |
24 | - cic = hlist_entry(ioc->cic_list.first, struct cfq_io_context, | |
25 | - cic_list); | |
26 | - cic->dtor(ioc); | |
32 | +/* | |
33 | + * Releasing ioc may nest into another put_io_context() leading to nested | |
34 | + * fast path release. As the ioc's can't be the same, this is okay but | |
35 | + * makes lockdep whine. Keep track of nesting and use it as subclass. | |
36 | + */ | |
37 | +#ifdef CONFIG_LOCKDEP | |
38 | +#define ioc_release_depth(q) ((q) ? (q)->ioc_release_depth : 0) | |
39 | +#define ioc_release_depth_inc(q) (q)->ioc_release_depth++ | |
40 | +#define ioc_release_depth_dec(q) (q)->ioc_release_depth-- | |
41 | +#else | |
42 | +#define ioc_release_depth(q) 0 | |
43 | +#define ioc_release_depth_inc(q) do { } while (0) | |
44 | +#define ioc_release_depth_dec(q) do { } while (0) | |
45 | +#endif | |
46 | + | |
47 | +static void icq_free_icq_rcu(struct rcu_head *head) | |
48 | +{ | |
49 | + struct io_cq *icq = container_of(head, struct io_cq, __rcu_head); | |
50 | + | |
51 | + kmem_cache_free(icq->__rcu_icq_cache, icq); | |
52 | +} | |
53 | + | |
54 | +/* | |
55 | + * Exit and free an icq. Called with both ioc and q locked. | |
56 | + */ | |
57 | +static void ioc_exit_icq(struct io_cq *icq) | |
58 | +{ | |
59 | + struct io_context *ioc = icq->ioc; | |
60 | + struct request_queue *q = icq->q; | |
61 | + struct elevator_type *et = q->elevator->type; | |
62 | + | |
63 | + lockdep_assert_held(&ioc->lock); | |
64 | + lockdep_assert_held(q->queue_lock); | |
65 | + | |
66 | + radix_tree_delete(&ioc->icq_tree, icq->q->id); | |
67 | + hlist_del_init(&icq->ioc_node); | |
68 | + list_del_init(&icq->q_node); | |
69 | + | |
70 | + /* | |
71 | + * Both setting lookup hint to and clearing it from @icq are done | |
72 | + * under queue_lock. If it's not pointing to @icq now, it never | |
73 | + * will. Hint assignment itself can race safely. | |
74 | + */ | |
75 | + if (rcu_dereference_raw(ioc->icq_hint) == icq) | |
76 | + rcu_assign_pointer(ioc->icq_hint, NULL); | |
77 | + | |
78 | + if (et->ops.elevator_exit_icq_fn) { | |
79 | + ioc_release_depth_inc(q); | |
80 | + et->ops.elevator_exit_icq_fn(icq); | |
81 | + ioc_release_depth_dec(q); | |
27 | 82 | } |
83 | + | |
84 | + /* | |
85 | + * @icq->q might have gone away by the time RCU callback runs | |
86 | + * making it impossible to determine icq_cache. Record it in @icq. | |
87 | + */ | |
88 | + icq->__rcu_icq_cache = et->icq_cache; | |
89 | + call_rcu(&icq->__rcu_head, icq_free_icq_rcu); | |
28 | 90 | } |
29 | 91 | |
30 | 92 | /* |
31 | - * IO Context helper functions. put_io_context() returns 1 if there are no | |
32 | - * more users of this io context, 0 otherwise. | |
93 | + * Slow path for ioc release in put_io_context(). Performs double-lock | |
94 | + * dancing to unlink all icq's and then frees ioc. | |
33 | 95 | */ |
34 | -int put_io_context(struct io_context *ioc) | |
96 | +static void ioc_release_fn(struct work_struct *work) | |
35 | 97 | { |
36 | - if (ioc == NULL) | |
37 | - return 1; | |
98 | + struct io_context *ioc = container_of(work, struct io_context, | |
99 | + release_work); | |
100 | + struct request_queue *last_q = NULL; | |
38 | 101 | |
39 | - BUG_ON(atomic_long_read(&ioc->refcount) == 0); | |
102 | + spin_lock_irq(&ioc->lock); | |
40 | 103 | |
41 | - if (atomic_long_dec_and_test(&ioc->refcount)) { | |
42 | - rcu_read_lock(); | |
43 | - cfq_dtor(ioc); | |
44 | - rcu_read_unlock(); | |
104 | + while (!hlist_empty(&ioc->icq_list)) { | |
105 | + struct io_cq *icq = hlist_entry(ioc->icq_list.first, | |
106 | + struct io_cq, ioc_node); | |
107 | + struct request_queue *this_q = icq->q; | |
45 | 108 | |
46 | - kmem_cache_free(iocontext_cachep, ioc); | |
47 | - return 1; | |
109 | + if (this_q != last_q) { | |
110 | + /* | |
111 | + * Need to switch to @this_q. Once we release | |
112 | + * @ioc->lock, it can go away along with @cic. | |
113 | + * Hold on to it. | |
114 | + */ | |
115 | + __blk_get_queue(this_q); | |
116 | + | |
117 | + /* | |
118 | + * blk_put_queue() might sleep thanks to kobject | |
119 | + * idiocy. Always release both locks, put and | |
120 | + * restart. | |
121 | + */ | |
122 | + if (last_q) { | |
123 | + spin_unlock(last_q->queue_lock); | |
124 | + spin_unlock_irq(&ioc->lock); | |
125 | + blk_put_queue(last_q); | |
126 | + } else { | |
127 | + spin_unlock_irq(&ioc->lock); | |
128 | + } | |
129 | + | |
130 | + last_q = this_q; | |
131 | + spin_lock_irq(this_q->queue_lock); | |
132 | + spin_lock(&ioc->lock); | |
133 | + continue; | |
134 | + } | |
135 | + ioc_exit_icq(icq); | |
48 | 136 | } |
49 | - return 0; | |
137 | + | |
138 | + if (last_q) { | |
139 | + spin_unlock(last_q->queue_lock); | |
140 | + spin_unlock_irq(&ioc->lock); | |
141 | + blk_put_queue(last_q); | |
142 | + } else { | |
143 | + spin_unlock_irq(&ioc->lock); | |
144 | + } | |
145 | + | |
146 | + kmem_cache_free(iocontext_cachep, ioc); | |
50 | 147 | } |
51 | -EXPORT_SYMBOL(put_io_context); | |
52 | 148 | |
53 | -static void cfq_exit(struct io_context *ioc) | |
149 | +/** | |
150 | + * put_io_context - put a reference of io_context | |
151 | + * @ioc: io_context to put | |
152 | + * @locked_q: request_queue the caller is holding queue_lock of (hint) | |
153 | + * | |
154 | + * Decrement reference count of @ioc and release it if the count reaches | |
155 | + * zero. If the caller is holding queue_lock of a queue, it can indicate | |
156 | + * that with @locked_q. This is an optimization hint and the caller is | |
157 | + * allowed to pass in %NULL even when it's holding a queue_lock. | |
158 | + */ | |
159 | +void put_io_context(struct io_context *ioc, struct request_queue *locked_q) | |
54 | 160 | { |
55 | - rcu_read_lock(); | |
161 | + struct request_queue *last_q = locked_q; | |
162 | + unsigned long flags; | |
56 | 163 | |
57 | - if (!hlist_empty(&ioc->cic_list)) { | |
58 | - struct cfq_io_context *cic; | |
164 | + if (ioc == NULL) | |
165 | + return; | |
59 | 166 | |
60 | - cic = hlist_entry(ioc->cic_list.first, struct cfq_io_context, | |
61 | - cic_list); | |
62 | - cic->exit(ioc); | |
167 | + BUG_ON(atomic_long_read(&ioc->refcount) <= 0); | |
168 | + if (locked_q) | |
169 | + lockdep_assert_held(locked_q->queue_lock); | |
170 | + | |
171 | + if (!atomic_long_dec_and_test(&ioc->refcount)) | |
172 | + return; | |
173 | + | |
174 | + /* | |
175 | + * Destroy @ioc. This is a bit messy because icq's are chained | |
176 | + * from both ioc and queue, and ioc->lock nests inside queue_lock. | |
177 | + * The inner ioc->lock should be held to walk our icq_list and then | |
178 | + * for each icq the outer matching queue_lock should be grabbed. | |
179 | + * ie. We need to do reverse-order double lock dancing. | |
180 | + * | |
181 | + * Another twist is that we are often called with one of the | |
182 | + * matching queue_locks held as indicated by @locked_q, which | |
183 | + * prevents performing double-lock dance for other queues. | |
184 | + * | |
185 | + * So, we do it in two stages. The fast path uses the queue_lock | |
186 | + * the caller is holding and, if other queues need to be accessed, | |
187 | + * uses trylock to avoid introducing locking dependency. This can | |
188 | + * handle most cases, especially if @ioc was performing IO on only | |
189 | + * single device. | |
190 | + * | |
191 | + * If trylock doesn't cut it, we defer to @ioc->release_work which | |
192 | + * can do all the double-locking dancing. | |
193 | + */ | |
194 | + spin_lock_irqsave_nested(&ioc->lock, flags, | |
195 | + ioc_release_depth(locked_q)); | |
196 | + | |
197 | + while (!hlist_empty(&ioc->icq_list)) { | |
198 | + struct io_cq *icq = hlist_entry(ioc->icq_list.first, | |
199 | + struct io_cq, ioc_node); | |
200 | + struct request_queue *this_q = icq->q; | |
201 | + | |
202 | + if (this_q != last_q) { | |
203 | + if (last_q && last_q != locked_q) | |
204 | + spin_unlock(last_q->queue_lock); | |
205 | + last_q = NULL; | |
206 | + | |
207 | + if (!spin_trylock(this_q->queue_lock)) | |
208 | + break; | |
209 | + last_q = this_q; | |
210 | + continue; | |
211 | + } | |
212 | + ioc_exit_icq(icq); | |
63 | 213 | } |
64 | - rcu_read_unlock(); | |
214 | + | |
215 | + if (last_q && last_q != locked_q) | |
216 | + spin_unlock(last_q->queue_lock); | |
217 | + | |
218 | + spin_unlock_irqrestore(&ioc->lock, flags); | |
219 | + | |
220 | + /* if no icq is left, we're done; otherwise, kick release_work */ | |
221 | + if (hlist_empty(&ioc->icq_list)) | |
222 | + kmem_cache_free(iocontext_cachep, ioc); | |
223 | + else | |
224 | + schedule_work(&ioc->release_work); | |
65 | 225 | } |
226 | +EXPORT_SYMBOL(put_io_context); | |
66 | 227 | |
67 | 228 | /* Called by the exiting task */ |
68 | 229 | void exit_io_context(struct task_struct *task) |
69 | 230 | |
70 | 231 | |
71 | 232 | |
72 | 233 | |
73 | 234 | |
74 | 235 | |
75 | 236 | |
76 | 237 | |
77 | 238 | |
78 | 239 | |
79 | 240 | |
80 | 241 | |
81 | 242 | |
82 | 243 | |
83 | 244 | |
84 | 245 | |
85 | 246 | |
86 | 247 | |
87 | 248 | |
88 | 249 | |
89 | 250 | |
... | ... | @@ -74,86 +235,240 @@ |
74 | 235 | task->io_context = NULL; |
75 | 236 | task_unlock(task); |
76 | 237 | |
77 | - if (atomic_dec_and_test(&ioc->nr_tasks)) | |
78 | - cfq_exit(ioc); | |
238 | + atomic_dec(&ioc->nr_tasks); | |
239 | + put_io_context(ioc, NULL); | |
240 | +} | |
79 | 241 | |
80 | - put_io_context(ioc); | |
242 | +/** | |
243 | + * ioc_clear_queue - break any ioc association with the specified queue | |
244 | + * @q: request_queue being cleared | |
245 | + * | |
246 | + * Walk @q->icq_list and exit all io_cq's. Must be called with @q locked. | |
247 | + */ | |
248 | +void ioc_clear_queue(struct request_queue *q) | |
249 | +{ | |
250 | + lockdep_assert_held(q->queue_lock); | |
251 | + | |
252 | + while (!list_empty(&q->icq_list)) { | |
253 | + struct io_cq *icq = list_entry(q->icq_list.next, | |
254 | + struct io_cq, q_node); | |
255 | + struct io_context *ioc = icq->ioc; | |
256 | + | |
257 | + spin_lock(&ioc->lock); | |
258 | + ioc_exit_icq(icq); | |
259 | + spin_unlock(&ioc->lock); | |
260 | + } | |
81 | 261 | } |
82 | 262 | |
83 | -struct io_context *alloc_io_context(gfp_t gfp_flags, int node) | |
263 | +void create_io_context_slowpath(struct task_struct *task, gfp_t gfp_flags, | |
264 | + int node) | |
84 | 265 | { |
85 | 266 | struct io_context *ioc; |
86 | 267 | |
87 | - ioc = kmem_cache_alloc_node(iocontext_cachep, gfp_flags, node); | |
88 | - if (ioc) { | |
89 | - atomic_long_set(&ioc->refcount, 1); | |
90 | - atomic_set(&ioc->nr_tasks, 1); | |
91 | - spin_lock_init(&ioc->lock); | |
92 | - ioc->ioprio_changed = 0; | |
93 | - ioc->ioprio = 0; | |
94 | - ioc->last_waited = 0; /* doesn't matter... */ | |
95 | - ioc->nr_batch_requests = 0; /* because this is 0 */ | |
96 | - INIT_RADIX_TREE(&ioc->radix_root, GFP_ATOMIC | __GFP_HIGH); | |
97 | - INIT_HLIST_HEAD(&ioc->cic_list); | |
98 | - ioc->ioc_data = NULL; | |
99 | -#if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE) | |
100 | - ioc->cgroup_changed = 0; | |
101 | -#endif | |
102 | - } | |
268 | + ioc = kmem_cache_alloc_node(iocontext_cachep, gfp_flags | __GFP_ZERO, | |
269 | + node); | |
270 | + if (unlikely(!ioc)) | |
271 | + return; | |
103 | 272 | |
104 | - return ioc; | |
273 | + /* initialize */ | |
274 | + atomic_long_set(&ioc->refcount, 1); | |
275 | + atomic_set(&ioc->nr_tasks, 1); | |
276 | + spin_lock_init(&ioc->lock); | |
277 | + INIT_RADIX_TREE(&ioc->icq_tree, GFP_ATOMIC | __GFP_HIGH); | |
278 | + INIT_HLIST_HEAD(&ioc->icq_list); | |
279 | + INIT_WORK(&ioc->release_work, ioc_release_fn); | |
280 | + | |
281 | + /* | |
282 | + * Try to install. ioc shouldn't be installed if someone else | |
283 | + * already did or @task, which isn't %current, is exiting. Note | |
284 | + * that we need to allow ioc creation on exiting %current as exit | |
285 | + * path may issue IOs from e.g. exit_files(). The exit path is | |
286 | + * responsible for not issuing IO after exit_io_context(). | |
287 | + */ | |
288 | + task_lock(task); | |
289 | + if (!task->io_context && | |
290 | + (task == current || !(task->flags & PF_EXITING))) | |
291 | + task->io_context = ioc; | |
292 | + else | |
293 | + kmem_cache_free(iocontext_cachep, ioc); | |
294 | + task_unlock(task); | |
105 | 295 | } |
106 | 296 | |
107 | -/* | |
108 | - * If the current task has no IO context then create one and initialise it. | |
109 | - * Otherwise, return its existing IO context. | |
297 | +/** | |
298 | + * get_task_io_context - get io_context of a task | |
299 | + * @task: task of interest | |
300 | + * @gfp_flags: allocation flags, used if allocation is necessary | |
301 | + * @node: allocation node, used if allocation is necessary | |
110 | 302 | * |
111 | - * This returned IO context doesn't have a specifically elevated refcount, | |
112 | - * but since the current task itself holds a reference, the context can be | |
113 | - * used in general code, so long as it stays within `current` context. | |
303 | + * Return io_context of @task. If it doesn't exist, it is created with | |
304 | + * @gfp_flags and @node. The returned io_context has its reference count | |
305 | + * incremented. | |
306 | + * | |
307 | + * This function always goes through task_lock() and it's better to use | |
308 | + * %current->io_context + get_io_context() for %current. | |
114 | 309 | */ |
115 | -struct io_context *current_io_context(gfp_t gfp_flags, int node) | |
310 | +struct io_context *get_task_io_context(struct task_struct *task, | |
311 | + gfp_t gfp_flags, int node) | |
116 | 312 | { |
117 | - struct task_struct *tsk = current; | |
118 | - struct io_context *ret; | |
313 | + struct io_context *ioc; | |
119 | 314 | |
120 | - ret = tsk->io_context; | |
121 | - if (likely(ret)) | |
122 | - return ret; | |
315 | + might_sleep_if(gfp_flags & __GFP_WAIT); | |
123 | 316 | |
124 | - ret = alloc_io_context(gfp_flags, node); | |
125 | - if (ret) { | |
126 | - /* make sure set_task_ioprio() sees the settings above */ | |
127 | - smp_wmb(); | |
128 | - tsk->io_context = ret; | |
129 | - } | |
317 | + do { | |
318 | + task_lock(task); | |
319 | + ioc = task->io_context; | |
320 | + if (likely(ioc)) { | |
321 | + get_io_context(ioc); | |
322 | + task_unlock(task); | |
323 | + return ioc; | |
324 | + } | |
325 | + task_unlock(task); | |
326 | + } while (create_io_context(task, gfp_flags, node)); | |
130 | 327 | |
131 | - return ret; | |
328 | + return NULL; | |
132 | 329 | } |
330 | +EXPORT_SYMBOL(get_task_io_context); | |
133 | 331 | |
134 | -/* | |
135 | - * If the current task has no IO context then create one and initialise it. | |
136 | - * If it does have a context, take a ref on it. | |
332 | +/** | |
333 | + * ioc_lookup_icq - lookup io_cq from ioc | |
334 | + * @ioc: the associated io_context | |
335 | + * @q: the associated request_queue | |
137 | 336 | * |
138 | - * This is always called in the context of the task which submitted the I/O. | |
337 | + * Look up io_cq associated with @ioc - @q pair from @ioc. Must be called | |
338 | + * with @q->queue_lock held. | |
139 | 339 | */ |
140 | -struct io_context *get_io_context(gfp_t gfp_flags, int node) | |
340 | +struct io_cq *ioc_lookup_icq(struct io_context *ioc, struct request_queue *q) | |
141 | 341 | { |
142 | - struct io_context *ioc = NULL; | |
342 | + struct io_cq *icq; | |
143 | 343 | |
344 | + lockdep_assert_held(q->queue_lock); | |
345 | + | |
144 | 346 | /* |
145 | - * Check for unlikely race with exiting task. ioc ref count is | |
146 | - * zero when ioc is being detached. | |
347 | + * icq's are indexed from @ioc using radix tree and hint pointer, | |
348 | + * both of which are protected with RCU. All removals are done | |
349 | + * holding both q and ioc locks, and we're holding q lock - if we | |
350 | + * find a icq which points to us, it's guaranteed to be valid. | |
147 | 351 | */ |
148 | - do { | |
149 | - ioc = current_io_context(gfp_flags, node); | |
150 | - if (unlikely(!ioc)) | |
151 | - break; | |
152 | - } while (!atomic_long_inc_not_zero(&ioc->refcount)); | |
352 | + rcu_read_lock(); | |
353 | + icq = rcu_dereference(ioc->icq_hint); | |
354 | + if (icq && icq->q == q) | |
355 | + goto out; | |
153 | 356 | |
154 | - return ioc; | |
357 | + icq = radix_tree_lookup(&ioc->icq_tree, q->id); | |
358 | + if (icq && icq->q == q) | |
359 | + rcu_assign_pointer(ioc->icq_hint, icq); /* allowed to race */ | |
360 | + else | |
361 | + icq = NULL; | |
362 | +out: | |
363 | + rcu_read_unlock(); | |
364 | + return icq; | |
155 | 365 | } |
156 | -EXPORT_SYMBOL(get_io_context); | |
366 | +EXPORT_SYMBOL(ioc_lookup_icq); | |
367 | + | |
368 | +/** | |
369 | + * ioc_create_icq - create and link io_cq | |
370 | + * @q: request_queue of interest | |
371 | + * @gfp_mask: allocation mask | |
372 | + * | |
373 | + * Make sure io_cq linking %current->io_context and @q exists. If either | |
374 | + * io_context and/or icq don't exist, they will be created using @gfp_mask. | |
375 | + * | |
376 | + * The caller is responsible for ensuring @ioc won't go away and @q is | |
377 | + * alive and will stay alive until this function returns. | |
378 | + */ | |
379 | +struct io_cq *ioc_create_icq(struct request_queue *q, gfp_t gfp_mask) | |
380 | +{ | |
381 | + struct elevator_type *et = q->elevator->type; | |
382 | + struct io_context *ioc; | |
383 | + struct io_cq *icq; | |
384 | + | |
385 | + /* allocate stuff */ | |
386 | + ioc = create_io_context(current, gfp_mask, q->node); | |
387 | + if (!ioc) | |
388 | + return NULL; | |
389 | + | |
390 | + icq = kmem_cache_alloc_node(et->icq_cache, gfp_mask | __GFP_ZERO, | |
391 | + q->node); | |
392 | + if (!icq) | |
393 | + return NULL; | |
394 | + | |
395 | + if (radix_tree_preload(gfp_mask) < 0) { | |
396 | + kmem_cache_free(et->icq_cache, icq); | |
397 | + return NULL; | |
398 | + } | |
399 | + | |
400 | + icq->ioc = ioc; | |
401 | + icq->q = q; | |
402 | + INIT_LIST_HEAD(&icq->q_node); | |
403 | + INIT_HLIST_NODE(&icq->ioc_node); | |
404 | + | |
405 | + /* lock both q and ioc and try to link @icq */ | |
406 | + spin_lock_irq(q->queue_lock); | |
407 | + spin_lock(&ioc->lock); | |
408 | + | |
409 | + if (likely(!radix_tree_insert(&ioc->icq_tree, q->id, icq))) { | |
410 | + hlist_add_head(&icq->ioc_node, &ioc->icq_list); | |
411 | + list_add(&icq->q_node, &q->icq_list); | |
412 | + if (et->ops.elevator_init_icq_fn) | |
413 | + et->ops.elevator_init_icq_fn(icq); | |
414 | + } else { | |
415 | + kmem_cache_free(et->icq_cache, icq); | |
416 | + icq = ioc_lookup_icq(ioc, q); | |
417 | + if (!icq) | |
418 | + printk(KERN_ERR "cfq: icq link failed!\n"); | |
419 | + } | |
420 | + | |
421 | + spin_unlock(&ioc->lock); | |
422 | + spin_unlock_irq(q->queue_lock); | |
423 | + radix_tree_preload_end(); | |
424 | + return icq; | |
425 | +} | |
426 | + | |
427 | +void ioc_set_changed(struct io_context *ioc, int which) | |
428 | +{ | |
429 | + struct io_cq *icq; | |
430 | + struct hlist_node *n; | |
431 | + | |
432 | + hlist_for_each_entry(icq, n, &ioc->icq_list, ioc_node) | |
433 | + set_bit(which, &icq->changed); | |
434 | +} | |
435 | + | |
436 | +/** | |
437 | + * ioc_ioprio_changed - notify ioprio change | |
438 | + * @ioc: io_context of interest | |
439 | + * @ioprio: new ioprio | |
440 | + * | |
441 | + * @ioc's ioprio has changed to @ioprio. Set %ICQ_IOPRIO_CHANGED for all | |
442 | + * icq's. iosched is responsible for checking the bit and applying it on | |
443 | + * request issue path. | |
444 | + */ | |
445 | +void ioc_ioprio_changed(struct io_context *ioc, int ioprio) | |
446 | +{ | |
447 | + unsigned long flags; | |
448 | + | |
449 | + spin_lock_irqsave(&ioc->lock, flags); | |
450 | + ioc->ioprio = ioprio; | |
451 | + ioc_set_changed(ioc, ICQ_IOPRIO_CHANGED); | |
452 | + spin_unlock_irqrestore(&ioc->lock, flags); | |
453 | +} | |
454 | + | |
455 | +/** | |
456 | + * ioc_cgroup_changed - notify cgroup change | |
457 | + * @ioc: io_context of interest | |
458 | + * | |
459 | + * @ioc's cgroup has changed. Set %ICQ_CGROUP_CHANGED for all icq's. | |
460 | + * iosched is responsible for checking the bit and applying it on request | |
461 | + * issue path. | |
462 | + */ | |
463 | +void ioc_cgroup_changed(struct io_context *ioc) | |
464 | +{ | |
465 | + unsigned long flags; | |
466 | + | |
467 | + spin_lock_irqsave(&ioc->lock, flags); | |
468 | + ioc_set_changed(ioc, ICQ_CGROUP_CHANGED); | |
469 | + spin_unlock_irqrestore(&ioc->lock, flags); | |
470 | +} | |
471 | +EXPORT_SYMBOL(ioc_cgroup_changed); | |
157 | 472 | |
158 | 473 | static int __init blk_ioc_init(void) |
159 | 474 | { |
block/blk-settings.c
... | ... | @@ -104,9 +104,7 @@ |
104 | 104 | * @lim: the queue_limits structure to reset |
105 | 105 | * |
106 | 106 | * Description: |
107 | - * Returns a queue_limit struct to its default state. Can be used by | |
108 | - * stacking drivers like DM that stage table swaps and reuse an | |
109 | - * existing device queue. | |
107 | + * Returns a queue_limit struct to its default state. | |
110 | 108 | */ |
111 | 109 | void blk_set_default_limits(struct queue_limits *lim) |
112 | 110 | { |
113 | 111 | |
... | ... | @@ -114,13 +112,12 @@ |
114 | 112 | lim->max_integrity_segments = 0; |
115 | 113 | lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK; |
116 | 114 | lim->max_segment_size = BLK_MAX_SEGMENT_SIZE; |
117 | - lim->max_sectors = BLK_DEF_MAX_SECTORS; | |
118 | - lim->max_hw_sectors = INT_MAX; | |
115 | + lim->max_sectors = lim->max_hw_sectors = BLK_SAFE_MAX_SECTORS; | |
119 | 116 | lim->max_discard_sectors = 0; |
120 | 117 | lim->discard_granularity = 0; |
121 | 118 | lim->discard_alignment = 0; |
122 | 119 | lim->discard_misaligned = 0; |
123 | - lim->discard_zeroes_data = 1; | |
120 | + lim->discard_zeroes_data = 0; | |
124 | 121 | lim->logical_block_size = lim->physical_block_size = lim->io_min = 512; |
125 | 122 | lim->bounce_pfn = (unsigned long)(BLK_BOUNCE_ANY >> PAGE_SHIFT); |
126 | 123 | lim->alignment_offset = 0; |
... | ... | @@ -131,6 +128,27 @@ |
131 | 128 | EXPORT_SYMBOL(blk_set_default_limits); |
132 | 129 | |
133 | 130 | /** |
131 | + * blk_set_stacking_limits - set default limits for stacking devices | |
132 | + * @lim: the queue_limits structure to reset | |
133 | + * | |
134 | + * Description: | |
135 | + * Returns a queue_limit struct to its default state. Should be used | |
136 | + * by stacking drivers like DM that have no internal limits. | |
137 | + */ | |
138 | +void blk_set_stacking_limits(struct queue_limits *lim) | |
139 | +{ | |
140 | + blk_set_default_limits(lim); | |
141 | + | |
142 | + /* Inherit limits from component devices */ | |
143 | + lim->discard_zeroes_data = 1; | |
144 | + lim->max_segments = USHRT_MAX; | |
145 | + lim->max_hw_sectors = UINT_MAX; | |
146 | + | |
147 | + lim->max_sectors = BLK_DEF_MAX_SECTORS; | |
148 | +} | |
149 | +EXPORT_SYMBOL(blk_set_stacking_limits); | |
150 | + | |
151 | +/** | |
134 | 152 | * blk_queue_make_request - define an alternate make_request function for a device |
135 | 153 | * @q: the request queue for the device to be affected |
136 | 154 | * @mfn: the alternate make_request function |
... | ... | @@ -165,8 +183,6 @@ |
165 | 183 | q->nr_batching = BLK_BATCH_REQ; |
166 | 184 | |
167 | 185 | blk_set_default_limits(&q->limits); |
168 | - blk_queue_max_hw_sectors(q, BLK_SAFE_MAX_SECTORS); | |
169 | - q->limits.discard_zeroes_data = 0; | |
170 | 186 | |
171 | 187 | /* |
172 | 188 | * by default assume old behaviour and bounce for any highmem page |
block/blk-sysfs.c
... | ... | @@ -425,7 +425,7 @@ |
425 | 425 | if (!entry->show) |
426 | 426 | return -EIO; |
427 | 427 | mutex_lock(&q->sysfs_lock); |
428 | - if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)) { | |
428 | + if (blk_queue_dead(q)) { | |
429 | 429 | mutex_unlock(&q->sysfs_lock); |
430 | 430 | return -ENOENT; |
431 | 431 | } |
... | ... | @@ -447,7 +447,7 @@ |
447 | 447 | |
448 | 448 | q = container_of(kobj, struct request_queue, kobj); |
449 | 449 | mutex_lock(&q->sysfs_lock); |
450 | - if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)) { | |
450 | + if (blk_queue_dead(q)) { | |
451 | 451 | mutex_unlock(&q->sysfs_lock); |
452 | 452 | return -ENOENT; |
453 | 453 | } |
454 | 454 | |
... | ... | @@ -479,8 +479,12 @@ |
479 | 479 | |
480 | 480 | blk_sync_queue(q); |
481 | 481 | |
482 | - if (q->elevator) | |
482 | + if (q->elevator) { | |
483 | + spin_lock_irq(q->queue_lock); | |
484 | + ioc_clear_queue(q); | |