Commit 275220f0fcff1adf28a717076e00f575edf05fda
Merge branch 'for-2.6.38/core' of git://git.kernel.dk/linux-2.6-block
* 'for-2.6.38/core' of git://git.kernel.dk/linux-2.6-block: (43 commits) block: ensure that completion error gets properly traced blktrace: add missing probe argument to block_bio_complete block cfq: don't use atomic_t for cfq_group block cfq: don't use atomic_t for cfq_queue block: trace event block fix unassigned field block: add internal hd part table references block: fix accounting bug on cross partition merges kref: add kref_test_and_get bio-integrity: mark kintegrityd_wq highpri and CPU intensive block: make kblockd_workqueue smarter Revert "sd: implement sd_check_events()" block: Clean up exit_io_context() source code. Fix compile warnings due to missing removal of a 'ret' variable fs/block: type signature of major_to_index(int) to major_to_index(unsigned) block: convert !IS_ERR(p) && p to !IS_ERR_NOR_NULL(p) cfq-iosched: don't check cfqg in choose_service_tree() fs/splice: Pull buf->ops->confirm() from splice_from_pipe actors cdrom: export cdrom_check_events() sd: implement sd_check_events() sr: implement sr_check_events() ...
Showing 53 changed files Side-by-side Diff
- Documentation/cgroups/blkio-controller.txt
- block/blk-cgroup.c
- block/blk-core.c
- block/blk-ioc.c
- block/blk-merge.c
- block/cfq-iosched.c
- block/genhd.c
- block/ioctl.c
- drivers/block/drbd/drbd_int.h
- drivers/block/drbd/drbd_main.c
- drivers/block/drbd/drbd_nl.c
- drivers/block/loop.c
- drivers/block/pktcdvd.c
- drivers/cdrom/cdrom.c
- drivers/char/raw.c
- drivers/md/dm-table.c
- drivers/md/dm.c
- drivers/md/md.c
- drivers/mtd/devices/block2mtd.c
- drivers/s390/block/dasd_genhd.c
- drivers/scsi/scsi_lib.c
- drivers/scsi/sd.c
- drivers/scsi/sr.c
- drivers/scsi/sr.h
- drivers/scsi/sr_ioctl.c
- drivers/usb/gadget/storage_common.c
- fs/bio-integrity.c
- fs/block_dev.c
- fs/btrfs/volumes.c
- fs/btrfs/volumes.h
- fs/char_dev.c
- fs/ext3/super.c
- fs/ext4/super.c
- fs/gfs2/ops_fstype.c
- fs/jfs/jfs_logmgr.c
- fs/logfs/dev_bdev.c
- fs/nfsd/vfs.c
- fs/nilfs2/super.c
- fs/ocfs2/cluster/heartbeat.c
- fs/partitions/check.c
- fs/reiserfs/journal.c
- fs/splice.c
- fs/super.c
- fs/xfs/linux-2.6/xfs_super.c
- include/linux/blkdev.h
- include/linux/cdrom.h
- include/linux/fs.h
- include/linux/genhd.h
- include/scsi/scsi.h
- include/trace/events/block.h
- kernel/power/swap.c
- kernel/trace/blktrace.c
- mm/swapfile.c
Documentation/cgroups/blkio-controller.txt
... | ... | @@ -89,6 +89,33 @@ |
89 | 89 | |
90 | 90 | Limits for writes can be put using blkio.write_bps_device file. |
91 | 91 | |
92 | +Hierarchical Cgroups | |
93 | +==================== | |
94 | +- Currently none of the IO control policy supports hierarhical groups. But | |
95 | + cgroup interface does allow creation of hierarhical cgroups and internally | |
96 | + IO policies treat them as flat hierarchy. | |
97 | + | |
98 | + So this patch will allow creation of cgroup hierarhcy but at the backend | |
99 | + everything will be treated as flat. So if somebody created a hierarchy like | |
100 | + as follows. | |
101 | + | |
102 | + root | |
103 | + / \ | |
104 | + test1 test2 | |
105 | + | | |
106 | + test3 | |
107 | + | |
108 | + CFQ and throttling will practically treat all groups at same level. | |
109 | + | |
110 | + pivot | |
111 | + / | \ \ | |
112 | + root test1 test2 test3 | |
113 | + | |
114 | + Down the line we can implement hierarchical accounting/control support | |
115 | + and also introduce a new cgroup file "use_hierarchy" which will control | |
116 | + whether cgroup hierarchy is viewed as flat or hierarchical by the policy.. | |
117 | + This is how memory controller also has implemented the things. | |
118 | + | |
92 | 119 | Various user visible config options |
93 | 120 | =================================== |
94 | 121 | CONFIG_BLK_CGROUP |
block/blk-cgroup.c
... | ... | @@ -1452,10 +1452,6 @@ |
1452 | 1452 | goto done; |
1453 | 1453 | } |
1454 | 1454 | |
1455 | - /* Currently we do not support hierarchy deeper than two level (0,1) */ | |
1456 | - if (parent != cgroup->top_cgroup) | |
1457 | - return ERR_PTR(-EPERM); | |
1458 | - | |
1459 | 1455 | blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL); |
1460 | 1456 | if (!blkcg) |
1461 | 1457 | return ERR_PTR(-ENOMEM); |
block/blk-core.c
... | ... | @@ -33,7 +33,7 @@ |
33 | 33 | |
34 | 34 | #include "blk.h" |
35 | 35 | |
36 | -EXPORT_TRACEPOINT_SYMBOL_GPL(block_remap); | |
36 | +EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap); | |
37 | 37 | EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap); |
38 | 38 | EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete); |
39 | 39 | |
40 | 40 | |
41 | 41 | |
42 | 42 | |
... | ... | @@ -64,13 +64,27 @@ |
64 | 64 | return; |
65 | 65 | |
66 | 66 | cpu = part_stat_lock(); |
67 | - part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq)); | |
68 | 67 | |
69 | - if (!new_io) | |
68 | + if (!new_io) { | |
69 | + part = rq->part; | |
70 | 70 | part_stat_inc(cpu, part, merges[rw]); |
71 | - else { | |
71 | + } else { | |
72 | + part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq)); | |
73 | + if (!hd_struct_try_get(part)) { | |
74 | + /* | |
75 | + * The partition is already being removed, | |
76 | + * the request will be accounted on the disk only | |
77 | + * | |
78 | + * We take a reference on disk->part0 although that | |
79 | + * partition will never be deleted, so we can treat | |
80 | + * it as any other partition. | |
81 | + */ | |
82 | + part = &rq->rq_disk->part0; | |
83 | + hd_struct_get(part); | |
84 | + } | |
72 | 85 | part_round_stats(cpu, part); |
73 | 86 | part_inc_in_flight(part, rw); |
87 | + rq->part = part; | |
74 | 88 | } |
75 | 89 | |
76 | 90 | part_stat_unlock(); |
... | ... | @@ -128,6 +142,7 @@ |
128 | 142 | rq->ref_count = 1; |
129 | 143 | rq->start_time = jiffies; |
130 | 144 | set_start_time_ns(rq); |
145 | + rq->part = NULL; | |
131 | 146 | } |
132 | 147 | EXPORT_SYMBOL(blk_rq_init); |
133 | 148 | |
... | ... | @@ -1329,9 +1344,9 @@ |
1329 | 1344 | bio->bi_sector += p->start_sect; |
1330 | 1345 | bio->bi_bdev = bdev->bd_contains; |
1331 | 1346 | |
1332 | - trace_block_remap(bdev_get_queue(bio->bi_bdev), bio, | |
1333 | - bdev->bd_dev, | |
1334 | - bio->bi_sector - p->start_sect); | |
1347 | + trace_block_bio_remap(bdev_get_queue(bio->bi_bdev), bio, | |
1348 | + bdev->bd_dev, | |
1349 | + bio->bi_sector - p->start_sect); | |
1335 | 1350 | } |
1336 | 1351 | } |
1337 | 1352 | |
... | ... | @@ -1500,7 +1515,7 @@ |
1500 | 1515 | goto end_io; |
1501 | 1516 | |
1502 | 1517 | if (old_sector != -1) |
1503 | - trace_block_remap(q, bio, old_dev, old_sector); | |
1518 | + trace_block_bio_remap(q, bio, old_dev, old_sector); | |
1504 | 1519 | |
1505 | 1520 | old_sector = bio->bi_sector; |
1506 | 1521 | old_dev = bio->bi_bdev->bd_dev; |
... | ... | @@ -1776,7 +1791,7 @@ |
1776 | 1791 | int cpu; |
1777 | 1792 | |
1778 | 1793 | cpu = part_stat_lock(); |
1779 | - part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req)); | |
1794 | + part = req->part; | |
1780 | 1795 | part_stat_add(cpu, part, sectors[rw], bytes >> 9); |
1781 | 1796 | part_stat_unlock(); |
1782 | 1797 | } |
1783 | 1798 | |
... | ... | @@ -1796,13 +1811,14 @@ |
1796 | 1811 | int cpu; |
1797 | 1812 | |
1798 | 1813 | cpu = part_stat_lock(); |
1799 | - part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req)); | |
1814 | + part = req->part; | |
1800 | 1815 | |
1801 | 1816 | part_stat_inc(cpu, part, ios[rw]); |
1802 | 1817 | part_stat_add(cpu, part, ticks[rw], duration); |
1803 | 1818 | part_round_stats(cpu, part); |
1804 | 1819 | part_dec_in_flight(part, rw); |
1805 | 1820 | |
1821 | + hd_struct_put(part); | |
1806 | 1822 | part_stat_unlock(); |
1807 | 1823 | } |
1808 | 1824 | } |
... | ... | @@ -2606,7 +2622,9 @@ |
2606 | 2622 | BUILD_BUG_ON(__REQ_NR_BITS > 8 * |
2607 | 2623 | sizeof(((struct request *)0)->cmd_flags)); |
2608 | 2624 | |
2609 | - kblockd_workqueue = create_workqueue("kblockd"); | |
2625 | + /* used for unplugging and affects IO latency/throughput - HIGHPRI */ | |
2626 | + kblockd_workqueue = alloc_workqueue("kblockd", | |
2627 | + WQ_MEM_RECLAIM | WQ_HIGHPRI, 0); | |
2610 | 2628 | if (!kblockd_workqueue) |
2611 | 2629 | panic("Failed to create kblockd\n"); |
2612 | 2630 |
block/blk-ioc.c
... | ... | @@ -64,7 +64,7 @@ |
64 | 64 | rcu_read_unlock(); |
65 | 65 | } |
66 | 66 | |
67 | -/* Called by the exitting task */ | |
67 | +/* Called by the exiting task */ | |
68 | 68 | void exit_io_context(struct task_struct *task) |
69 | 69 | { |
70 | 70 | struct io_context *ioc; |
71 | 71 | |
... | ... | @@ -74,10 +74,9 @@ |
74 | 74 | task->io_context = NULL; |
75 | 75 | task_unlock(task); |
76 | 76 | |
77 | - if (atomic_dec_and_test(&ioc->nr_tasks)) { | |
77 | + if (atomic_dec_and_test(&ioc->nr_tasks)) | |
78 | 78 | cfq_exit(ioc); |
79 | 79 | |
80 | - } | |
81 | 80 | put_io_context(ioc); |
82 | 81 | } |
83 | 82 |
block/blk-merge.c
... | ... | @@ -351,11 +351,12 @@ |
351 | 351 | int cpu; |
352 | 352 | |
353 | 353 | cpu = part_stat_lock(); |
354 | - part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req)); | |
354 | + part = req->part; | |
355 | 355 | |
356 | 356 | part_round_stats(cpu, part); |
357 | 357 | part_dec_in_flight(part, rq_data_dir(req)); |
358 | 358 | |
359 | + hd_struct_put(part); | |
359 | 360 | part_stat_unlock(); |
360 | 361 | } |
361 | 362 | } |
block/cfq-iosched.c
... | ... | @@ -87,7 +87,6 @@ |
87 | 87 | unsigned count; |
88 | 88 | unsigned total_weight; |
89 | 89 | u64 min_vdisktime; |
90 | - struct rb_node *active; | |
91 | 90 | }; |
92 | 91 | #define CFQ_RB_ROOT (struct cfq_rb_root) { .rb = RB_ROOT, .left = NULL, \ |
93 | 92 | .count = 0, .min_vdisktime = 0, } |
... | ... | @@ -97,7 +96,7 @@ |
97 | 96 | */ |
98 | 97 | struct cfq_queue { |
99 | 98 | /* reference count */ |
100 | - atomic_t ref; | |
99 | + int ref; | |
101 | 100 | /* various state flags, see below */ |
102 | 101 | unsigned int flags; |
103 | 102 | /* parent cfq_data */ |
... | ... | @@ -180,7 +179,6 @@ |
180 | 179 | /* group service_tree key */ |
181 | 180 | u64 vdisktime; |
182 | 181 | unsigned int weight; |
183 | - bool on_st; | |
184 | 182 | |
185 | 183 | /* number of cfqq currently on this group */ |
186 | 184 | int nr_cfqq; |
... | ... | @@ -209,7 +207,7 @@ |
209 | 207 | struct blkio_group blkg; |
210 | 208 | #ifdef CONFIG_CFQ_GROUP_IOSCHED |
211 | 209 | struct hlist_node cfqd_node; |
212 | - atomic_t ref; | |
210 | + int ref; | |
213 | 211 | #endif |
214 | 212 | /* number of requests that are on the dispatch list or inside driver */ |
215 | 213 | int dispatched; |
... | ... | @@ -563,11 +561,6 @@ |
563 | 561 | u64 vdisktime = st->min_vdisktime; |
564 | 562 | struct cfq_group *cfqg; |
565 | 563 | |
566 | - if (st->active) { | |
567 | - cfqg = rb_entry_cfqg(st->active); | |
568 | - vdisktime = cfqg->vdisktime; | |
569 | - } | |
570 | - | |
571 | 564 | if (st->left) { |
572 | 565 | cfqg = rb_entry_cfqg(st->left); |
573 | 566 | vdisktime = min_vdisktime(vdisktime, cfqg->vdisktime); |
574 | 567 | |
575 | 568 | |
... | ... | @@ -646,11 +639,11 @@ |
646 | 639 | static inline bool cfq_slice_used(struct cfq_queue *cfqq) |
647 | 640 | { |
648 | 641 | if (cfq_cfqq_slice_new(cfqq)) |
649 | - return 0; | |
642 | + return false; | |
650 | 643 | if (time_before(jiffies, cfqq->slice_end)) |
651 | - return 0; | |
644 | + return false; | |
652 | 645 | |
653 | - return 1; | |
646 | + return true; | |
654 | 647 | } |
655 | 648 | |
656 | 649 | /* |
... | ... | @@ -869,7 +862,7 @@ |
869 | 862 | struct rb_node *n; |
870 | 863 | |
871 | 864 | cfqg->nr_cfqq++; |
872 | - if (cfqg->on_st) | |
865 | + if (!RB_EMPTY_NODE(&cfqg->rb_node)) | |
873 | 866 | return; |
874 | 867 | |
875 | 868 | /* |
... | ... | @@ -885,7 +878,6 @@ |
885 | 878 | cfqg->vdisktime = st->min_vdisktime; |
886 | 879 | |
887 | 880 | __cfq_group_service_tree_add(st, cfqg); |
888 | - cfqg->on_st = true; | |
889 | 881 | st->total_weight += cfqg->weight; |
890 | 882 | } |
891 | 883 | |
... | ... | @@ -894,9 +886,6 @@ |
894 | 886 | { |
895 | 887 | struct cfq_rb_root *st = &cfqd->grp_service_tree; |
896 | 888 | |
897 | - if (st->active == &cfqg->rb_node) | |
898 | - st->active = NULL; | |
899 | - | |
900 | 889 | BUG_ON(cfqg->nr_cfqq < 1); |
901 | 890 | cfqg->nr_cfqq--; |
902 | 891 | |
... | ... | @@ -905,7 +894,6 @@ |
905 | 894 | return; |
906 | 895 | |
907 | 896 | cfq_log_cfqg(cfqd, cfqg, "del_from_rr group"); |
908 | - cfqg->on_st = false; | |
909 | 897 | st->total_weight -= cfqg->weight; |
910 | 898 | if (!RB_EMPTY_NODE(&cfqg->rb_node)) |
911 | 899 | cfq_rb_erase(&cfqg->rb_node, st); |
... | ... | @@ -1026,7 +1014,7 @@ |
1026 | 1014 | * elevator which will be dropped by either elevator exit |
1027 | 1015 | * or cgroup deletion path depending on who is exiting first. |
1028 | 1016 | */ |
1029 | - atomic_set(&cfqg->ref, 1); | |
1017 | + cfqg->ref = 1; | |
1030 | 1018 | |
1031 | 1019 | /* |
1032 | 1020 | * Add group onto cgroup list. It might happen that bdi->dev is |
... | ... | @@ -1071,7 +1059,7 @@ |
1071 | 1059 | |
1072 | 1060 | static inline struct cfq_group *cfq_ref_get_cfqg(struct cfq_group *cfqg) |
1073 | 1061 | { |
1074 | - atomic_inc(&cfqg->ref); | |
1062 | + cfqg->ref++; | |
1075 | 1063 | return cfqg; |
1076 | 1064 | } |
1077 | 1065 | |
... | ... | @@ -1083,7 +1071,7 @@ |
1083 | 1071 | |
1084 | 1072 | cfqq->cfqg = cfqg; |
1085 | 1073 | /* cfqq reference on cfqg */ |
1086 | - atomic_inc(&cfqq->cfqg->ref); | |
1074 | + cfqq->cfqg->ref++; | |
1087 | 1075 | } |
1088 | 1076 | |
1089 | 1077 | static void cfq_put_cfqg(struct cfq_group *cfqg) |
1090 | 1078 | |
... | ... | @@ -1091,11 +1079,12 @@ |
1091 | 1079 | struct cfq_rb_root *st; |
1092 | 1080 | int i, j; |
1093 | 1081 | |
1094 | - BUG_ON(atomic_read(&cfqg->ref) <= 0); | |
1095 | - if (!atomic_dec_and_test(&cfqg->ref)) | |
1082 | + BUG_ON(cfqg->ref <= 0); | |
1083 | + cfqg->ref--; | |
1084 | + if (cfqg->ref) | |
1096 | 1085 | return; |
1097 | 1086 | for_each_cfqg_st(cfqg, i, j, st) |
1098 | - BUG_ON(!RB_EMPTY_ROOT(&st->rb) || st->active != NULL); | |
1087 | + BUG_ON(!RB_EMPTY_ROOT(&st->rb)); | |
1099 | 1088 | kfree(cfqg); |
1100 | 1089 | } |
1101 | 1090 | |
... | ... | @@ -1200,7 +1189,7 @@ |
1200 | 1189 | cfq_group_service_tree_del(cfqd, cfqq->cfqg); |
1201 | 1190 | cfqq->orig_cfqg = cfqq->cfqg; |
1202 | 1191 | cfqq->cfqg = &cfqd->root_group; |
1203 | - atomic_inc(&cfqd->root_group.ref); | |
1192 | + cfqd->root_group.ref++; | |
1204 | 1193 | group_changed = 1; |
1205 | 1194 | } else if (!cfqd->cfq_group_isolation |
1206 | 1195 | && cfqq_type(cfqq) == SYNC_WORKLOAD && cfqq->orig_cfqg) { |
... | ... | @@ -1687,9 +1676,6 @@ |
1687 | 1676 | if (cfqq == cfqd->active_queue) |
1688 | 1677 | cfqd->active_queue = NULL; |
1689 | 1678 | |
1690 | - if (&cfqq->cfqg->rb_node == cfqd->grp_service_tree.active) | |
1691 | - cfqd->grp_service_tree.active = NULL; | |
1692 | - | |
1693 | 1679 | if (cfqd->active_cic) { |
1694 | 1680 | put_io_context(cfqd->active_cic->ioc); |
1695 | 1681 | cfqd->active_cic = NULL; |
1696 | 1682 | |
... | ... | @@ -1901,10 +1887,10 @@ |
1901 | 1887 | * in their service tree. |
1902 | 1888 | */ |
1903 | 1889 | if (service_tree->count == 1 && cfq_cfqq_sync(cfqq)) |
1904 | - return 1; | |
1890 | + return true; | |
1905 | 1891 | cfq_log_cfqq(cfqd, cfqq, "Not idling. st->count:%d", |
1906 | 1892 | service_tree->count); |
1907 | - return 0; | |
1893 | + return false; | |
1908 | 1894 | } |
1909 | 1895 | |
1910 | 1896 | static void cfq_arm_slice_timer(struct cfq_data *cfqd) |
... | ... | @@ -2040,7 +2026,7 @@ |
2040 | 2026 | int process_refs, io_refs; |
2041 | 2027 | |
2042 | 2028 | io_refs = cfqq->allocated[READ] + cfqq->allocated[WRITE]; |
2043 | - process_refs = atomic_read(&cfqq->ref) - io_refs; | |
2029 | + process_refs = cfqq->ref - io_refs; | |
2044 | 2030 | BUG_ON(process_refs < 0); |
2045 | 2031 | return process_refs; |
2046 | 2032 | } |
2047 | 2033 | |
... | ... | @@ -2080,10 +2066,10 @@ |
2080 | 2066 | */ |
2081 | 2067 | if (new_process_refs >= process_refs) { |
2082 | 2068 | cfqq->new_cfqq = new_cfqq; |
2083 | - atomic_add(process_refs, &new_cfqq->ref); | |
2069 | + new_cfqq->ref += process_refs; | |
2084 | 2070 | } else { |
2085 | 2071 | new_cfqq->new_cfqq = cfqq; |
2086 | - atomic_add(new_process_refs, &cfqq->ref); | |
2072 | + cfqq->ref += new_process_refs; | |
2087 | 2073 | } |
2088 | 2074 | } |
2089 | 2075 | |
2090 | 2076 | |
... | ... | @@ -2116,13 +2102,8 @@ |
2116 | 2102 | unsigned count; |
2117 | 2103 | struct cfq_rb_root *st; |
2118 | 2104 | unsigned group_slice; |
2105 | + enum wl_prio_t original_prio = cfqd->serving_prio; | |
2119 | 2106 | |
2120 | - if (!cfqg) { | |
2121 | - cfqd->serving_prio = IDLE_WORKLOAD; | |
2122 | - cfqd->workload_expires = jiffies + 1; | |
2123 | - return; | |
2124 | - } | |
2125 | - | |
2126 | 2107 | /* Choose next priority. RT > BE > IDLE */ |
2127 | 2108 | if (cfq_group_busy_queues_wl(RT_WORKLOAD, cfqd, cfqg)) |
2128 | 2109 | cfqd->serving_prio = RT_WORKLOAD; |
... | ... | @@ -2134,6 +2115,9 @@ |
2134 | 2115 | return; |
2135 | 2116 | } |
2136 | 2117 | |
2118 | + if (original_prio != cfqd->serving_prio) | |
2119 | + goto new_workload; | |
2120 | + | |
2137 | 2121 | /* |
2138 | 2122 | * For RT and BE, we have to choose also the type |
2139 | 2123 | * (SYNC, SYNC_NOIDLE, ASYNC), and to compute a workload |
... | ... | @@ -2148,6 +2132,7 @@ |
2148 | 2132 | if (count && !time_after(jiffies, cfqd->workload_expires)) |
2149 | 2133 | return; |
2150 | 2134 | |
2135 | +new_workload: | |
2151 | 2136 | /* otherwise select new workload type */ |
2152 | 2137 | cfqd->serving_type = |
2153 | 2138 | cfq_choose_wl(cfqd, cfqg, cfqd->serving_prio); |
... | ... | @@ -2199,7 +2184,6 @@ |
2199 | 2184 | if (RB_EMPTY_ROOT(&st->rb)) |
2200 | 2185 | return NULL; |
2201 | 2186 | cfqg = cfq_rb_first_group(st); |
2202 | - st->active = &cfqg->rb_node; | |
2203 | 2187 | update_min_vdisktime(st); |
2204 | 2188 | return cfqg; |
2205 | 2189 | } |
... | ... | @@ -2293,6 +2277,17 @@ |
2293 | 2277 | goto keep_queue; |
2294 | 2278 | } |
2295 | 2279 | |
2280 | + /* | |
2281 | + * This is a deep seek queue, but the device is much faster than | |
2282 | + * the queue can deliver, don't idle | |
2283 | + **/ | |
2284 | + if (CFQQ_SEEKY(cfqq) && cfq_cfqq_idle_window(cfqq) && | |
2285 | + (cfq_cfqq_slice_new(cfqq) || | |
2286 | + (cfqq->slice_end - jiffies > jiffies - cfqq->slice_start))) { | |
2287 | + cfq_clear_cfqq_deep(cfqq); | |
2288 | + cfq_clear_cfqq_idle_window(cfqq); | |
2289 | + } | |
2290 | + | |
2296 | 2291 | if (cfqq->dispatched && cfq_should_idle(cfqd, cfqq)) { |
2297 | 2292 | cfqq = NULL; |
2298 | 2293 | goto keep_queue; |
2299 | 2294 | |
2300 | 2295 | |
... | ... | @@ -2367,12 +2362,12 @@ |
2367 | 2362 | { |
2368 | 2363 | /* the queue hasn't finished any request, can't estimate */ |
2369 | 2364 | if (cfq_cfqq_slice_new(cfqq)) |
2370 | - return 1; | |
2365 | + return true; | |
2371 | 2366 | if (time_after(jiffies + cfqd->cfq_slice_idle * cfqq->dispatched, |
2372 | 2367 | cfqq->slice_end)) |
2373 | - return 1; | |
2368 | + return true; | |
2374 | 2369 | |
2375 | - return 0; | |
2370 | + return false; | |
2376 | 2371 | } |
2377 | 2372 | |
2378 | 2373 | static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq) |
2379 | 2374 | |
... | ... | @@ -2538,9 +2533,10 @@ |
2538 | 2533 | struct cfq_data *cfqd = cfqq->cfqd; |
2539 | 2534 | struct cfq_group *cfqg, *orig_cfqg; |
2540 | 2535 | |
2541 | - BUG_ON(atomic_read(&cfqq->ref) <= 0); | |
2536 | + BUG_ON(cfqq->ref <= 0); | |
2542 | 2537 | |
2543 | - if (!atomic_dec_and_test(&cfqq->ref)) | |
2538 | + cfqq->ref--; | |
2539 | + if (cfqq->ref) | |
2544 | 2540 | return; |
2545 | 2541 | |
2546 | 2542 | cfq_log_cfqq(cfqd, cfqq, "put_queue"); |
... | ... | @@ -2843,7 +2839,7 @@ |
2843 | 2839 | RB_CLEAR_NODE(&cfqq->p_node); |
2844 | 2840 | INIT_LIST_HEAD(&cfqq->fifo); |
2845 | 2841 | |
2846 | - atomic_set(&cfqq->ref, 0); | |
2842 | + cfqq->ref = 0; | |
2847 | 2843 | cfqq->cfqd = cfqd; |
2848 | 2844 | |
2849 | 2845 | cfq_mark_cfqq_prio_changed(cfqq); |
2850 | 2846 | |
... | ... | @@ -2979,11 +2975,11 @@ |
2979 | 2975 | * pin the queue now that it's allocated, scheduler exit will prune it |
2980 | 2976 | */ |
2981 | 2977 | if (!is_sync && !(*async_cfqq)) { |
2982 | - atomic_inc(&cfqq->ref); | |
2978 | + cfqq->ref++; | |
2983 | 2979 | *async_cfqq = cfqq; |
2984 | 2980 | } |
2985 | 2981 | |
2986 | - atomic_inc(&cfqq->ref); | |
2982 | + cfqq->ref++; | |
2987 | 2983 | return cfqq; |
2988 | 2984 | } |
2989 | 2985 | |
... | ... | @@ -3265,6 +3261,10 @@ |
3265 | 3261 | if (cfq_class_rt(new_cfqq) && !cfq_class_rt(cfqq)) |
3266 | 3262 | return true; |
3267 | 3263 | |
3264 | + /* An idle queue should not be idle now for some reason */ | |
3265 | + if (RB_EMPTY_ROOT(&cfqq->sort_list) && !cfq_should_idle(cfqd, cfqq)) | |
3266 | + return true; | |
3267 | + | |
3268 | 3268 | if (!cfqd->active_cic || !cfq_cfqq_wait_request(cfqq)) |
3269 | 3269 | return false; |
3270 | 3270 | |
3271 | 3271 | |
... | ... | @@ -3681,13 +3681,13 @@ |
3681 | 3681 | } |
3682 | 3682 | |
3683 | 3683 | cfqq->allocated[rw]++; |
3684 | - atomic_inc(&cfqq->ref); | |
3685 | - | |
3686 | - spin_unlock_irqrestore(q->queue_lock, flags); | |
3687 | - | |
3684 | + cfqq->ref++; | |
3688 | 3685 | rq->elevator_private = cic; |
3689 | 3686 | rq->elevator_private2 = cfqq; |
3690 | 3687 | rq->elevator_private3 = cfq_ref_get_cfqg(cfqq->cfqg); |
3688 | + | |
3689 | + spin_unlock_irqrestore(q->queue_lock, flags); | |
3690 | + | |
3691 | 3691 | return 0; |
3692 | 3692 | |
3693 | 3693 | queue_fail: |
... | ... | @@ -3862,6 +3862,10 @@ |
3862 | 3862 | if (!cfqd) |
3863 | 3863 | return NULL; |
3864 | 3864 | |
3865 | + /* | |
3866 | + * Don't need take queue_lock in the routine, since we are | |
3867 | + * initializing the ioscheduler, and nobody is using cfqd | |
3868 | + */ | |
3865 | 3869 | cfqd->cic_index = i; |
3866 | 3870 | |
3867 | 3871 | /* Init root service tree */ |
... | ... | @@ -3881,7 +3885,7 @@ |
3881 | 3885 | * Take a reference to root group which we never drop. This is just |
3882 | 3886 | * to make sure that cfq_put_cfqg() does not try to kfree root group |
3883 | 3887 | */ |
3884 | - atomic_set(&cfqg->ref, 1); | |
3888 | + cfqg->ref = 1; | |
3885 | 3889 | rcu_read_lock(); |
3886 | 3890 | cfq_blkiocg_add_blkio_group(&blkio_root_cgroup, &cfqg->blkg, |
3887 | 3891 | (void *)cfqd, 0); |
... | ... | @@ -3901,7 +3905,7 @@ |
3901 | 3905 | * will not attempt to free it. |
3902 | 3906 | */ |
3903 | 3907 | cfq_init_cfqq(cfqd, &cfqd->oom_cfqq, 1, 0); |
3904 | - atomic_inc(&cfqd->oom_cfqq.ref); | |
3908 | + cfqd->oom_cfqq.ref++; | |
3905 | 3909 | cfq_link_cfqq_cfqg(&cfqd->oom_cfqq, &cfqd->root_group); |
3906 | 3910 | |
3907 | 3911 | INIT_LIST_HEAD(&cfqd->cic_list); |
block/genhd.c
... | ... | @@ -18,6 +18,7 @@ |
18 | 18 | #include <linux/buffer_head.h> |
19 | 19 | #include <linux/mutex.h> |
20 | 20 | #include <linux/idr.h> |
21 | +#include <linux/log2.h> | |
21 | 22 | |
22 | 23 | #include "blk.h" |
23 | 24 | |
... | ... | @@ -35,6 +36,10 @@ |
35 | 36 | |
36 | 37 | static struct device_type disk_type; |
37 | 38 | |
39 | +static void disk_add_events(struct gendisk *disk); | |
40 | +static void disk_del_events(struct gendisk *disk); | |
41 | +static void disk_release_events(struct gendisk *disk); | |
42 | + | |
38 | 43 | /** |
39 | 44 | * disk_get_part - get partition |
40 | 45 | * @disk: disk to look partition from |
... | ... | @@ -239,7 +244,7 @@ |
239 | 244 | } *major_names[BLKDEV_MAJOR_HASH_SIZE]; |
240 | 245 | |
241 | 246 | /* index in the above - for now: assume no multimajor ranges */ |
242 | -static inline int major_to_index(int major) | |
247 | +static inline int major_to_index(unsigned major) | |
243 | 248 | { |
244 | 249 | return major % BLKDEV_MAJOR_HASH_SIZE; |
245 | 250 | } |
... | ... | @@ -502,6 +507,64 @@ |
502 | 507 | return 0; |
503 | 508 | } |
504 | 509 | |
510 | +void register_disk(struct gendisk *disk) | |
511 | +{ | |
512 | + struct device *ddev = disk_to_dev(disk); | |
513 | + struct block_device *bdev; | |
514 | + struct disk_part_iter piter; | |
515 | + struct hd_struct *part; | |
516 | + int err; | |
517 | + | |
518 | + ddev->parent = disk->driverfs_dev; | |
519 | + | |
520 | + dev_set_name(ddev, disk->disk_name); | |
521 | + | |
522 | + /* delay uevents, until we scanned partition table */ | |
523 | + dev_set_uevent_suppress(ddev, 1); | |
524 | + | |
525 | + if (device_add(ddev)) | |
526 | + return; | |
527 | + if (!sysfs_deprecated) { | |
528 | + err = sysfs_create_link(block_depr, &ddev->kobj, | |
529 | + kobject_name(&ddev->kobj)); | |
530 | + if (err) { | |
531 | + device_del(ddev); | |
532 | + return; | |
533 | + } | |
534 | + } | |
535 | + disk->part0.holder_dir = kobject_create_and_add("holders", &ddev->kobj); | |
536 | + disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj); | |
537 | + | |
538 | + /* No minors to use for partitions */ | |
539 | + if (!disk_partitionable(disk)) | |
540 | + goto exit; | |
541 | + | |
542 | + /* No such device (e.g., media were just removed) */ | |
543 | + if (!get_capacity(disk)) | |
544 | + goto exit; | |
545 | + | |
546 | + bdev = bdget_disk(disk, 0); | |
547 | + if (!bdev) | |
548 | + goto exit; | |
549 | + | |
550 | + bdev->bd_invalidated = 1; | |
551 | + err = blkdev_get(bdev, FMODE_READ, NULL); | |
552 | + if (err < 0) | |
553 | + goto exit; | |
554 | + blkdev_put(bdev, FMODE_READ); | |
555 | + | |
556 | +exit: | |
557 | + /* announce disk after possible partitions are created */ | |
558 | + dev_set_uevent_suppress(ddev, 0); | |
559 | + kobject_uevent(&ddev->kobj, KOBJ_ADD); | |
560 | + | |
561 | + /* announce possible partitions */ | |
562 | + disk_part_iter_init(&piter, disk, 0); | |
563 | + while ((part = disk_part_iter_next(&piter))) | |
564 | + kobject_uevent(&part_to_dev(part)->kobj, KOBJ_ADD); | |
565 | + disk_part_iter_exit(&piter); | |
566 | +} | |
567 | + | |
505 | 568 | /** |
506 | 569 | * add_disk - add partitioning information to kernel list |
507 | 570 | * @disk: per-device partitioning information |
508 | 571 | |
509 | 572 | |
510 | 573 | |
511 | 574 | |
512 | 575 | |
513 | 576 | |
... | ... | @@ -551,18 +614,48 @@ |
551 | 614 | retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj, |
552 | 615 | "bdi"); |
553 | 616 | WARN_ON(retval); |
554 | -} | |
555 | 617 | |
618 | + disk_add_events(disk); | |
619 | +} | |
556 | 620 | EXPORT_SYMBOL(add_disk); |
557 | -EXPORT_SYMBOL(del_gendisk); /* in partitions/check.c */ | |
558 | 621 | |
559 | -void unlink_gendisk(struct gendisk *disk) | |
622 | +void del_gendisk(struct gendisk *disk) | |
560 | 623 | { |
624 | + struct disk_part_iter piter; | |
625 | + struct hd_struct *part; | |
626 | + | |
627 | + disk_del_events(disk); | |
628 | + | |
629 | + /* invalidate stuff */ | |
630 | + disk_part_iter_init(&piter, disk, | |
631 | + DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE); | |
632 | + while ((part = disk_part_iter_next(&piter))) { | |
633 | + invalidate_partition(disk, part->partno); | |
634 | + delete_partition(disk, part->partno); | |
635 | + } | |
636 | + disk_part_iter_exit(&piter); | |
637 | + | |
638 | + invalidate_partition(disk, 0); | |
639 | + blk_free_devt(disk_to_dev(disk)->devt); | |
640 | + set_capacity(disk, 0); | |
641 | + disk->flags &= ~GENHD_FL_UP; | |
642 | + | |
561 | 643 | sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi"); |
562 | 644 | bdi_unregister(&disk->queue->backing_dev_info); |
563 | 645 | blk_unregister_queue(disk); |
564 | 646 | blk_unregister_region(disk_devt(disk), disk->minors); |
647 | + | |
648 | + part_stat_set_all(&disk->part0, 0); | |
649 | + disk->part0.stamp = 0; | |
650 | + | |
651 | + kobject_put(disk->part0.holder_dir); | |
652 | + kobject_put(disk->slave_dir); | |
653 | + disk->driverfs_dev = NULL; | |
654 | + if (!sysfs_deprecated) | |
655 | + sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk))); | |
656 | + device_del(disk_to_dev(disk)); | |
565 | 657 | } |
658 | +EXPORT_SYMBOL(del_gendisk); | |
566 | 659 | |
567 | 660 | /** |
568 | 661 | * get_gendisk - get partitioning information for a given device |
... | ... | @@ -735,7 +828,7 @@ |
735 | 828 | static void *p; |
736 | 829 | |
737 | 830 | p = disk_seqf_start(seqf, pos); |
738 | - if (!IS_ERR(p) && p && !*pos) | |
831 | + if (!IS_ERR_OR_NULL(p) && !*pos) | |
739 | 832 | seq_puts(seqf, "major minor #blocks name\n\n"); |
740 | 833 | return p; |
741 | 834 | } |
... | ... | @@ -1005,6 +1098,7 @@ |
1005 | 1098 | { |
1006 | 1099 | struct gendisk *disk = dev_to_disk(dev); |
1007 | 1100 | |
1101 | + disk_release_events(disk); | |
1008 | 1102 | kfree(disk->random); |
1009 | 1103 | disk_replace_part_tbl(disk, NULL); |
1010 | 1104 | free_part_stats(&disk->part0); |
... | ... | @@ -1110,29 +1204,6 @@ |
1110 | 1204 | module_init(proc_genhd_init); |
1111 | 1205 | #endif /* CONFIG_PROC_FS */ |
1112 | 1206 | |
1113 | -static void media_change_notify_thread(struct work_struct *work) | |
1114 | -{ | |
1115 | - struct gendisk *gd = container_of(work, struct gendisk, async_notify); | |
1116 | - char event[] = "MEDIA_CHANGE=1"; | |
1117 | - char *envp[] = { event, NULL }; | |
1118 | - | |
1119 | - /* | |
1120 | - * set enviroment vars to indicate which event this is for | |
1121 | - * so that user space will know to go check the media status. | |
1122 | - */ | |
1123 | - kobject_uevent_env(&disk_to_dev(gd)->kobj, KOBJ_CHANGE, envp); | |
1124 | - put_device(gd->driverfs_dev); | |
1125 | -} | |
1126 | - | |
1127 | -#if 0 | |
1128 | -void genhd_media_change_notify(struct gendisk *disk) | |
1129 | -{ | |
1130 | - get_device(disk->driverfs_dev); | |
1131 | - schedule_work(&disk->async_notify); | |
1132 | -} | |
1133 | -EXPORT_SYMBOL_GPL(genhd_media_change_notify); | |
1134 | -#endif /* 0 */ | |
1135 | - | |
1136 | 1207 | dev_t blk_lookup_devt(const char *name, int partno) |
1137 | 1208 | { |
1138 | 1209 | dev_t devt = MKDEV(0, 0); |
1139 | 1210 | |
... | ... | @@ -1193,13 +1264,13 @@ |
1193 | 1264 | } |
1194 | 1265 | disk->part_tbl->part[0] = &disk->part0; |
1195 | 1266 | |
1267 | + hd_ref_init(&disk->part0); | |
1268 | + | |
1196 | 1269 | disk->minors = minors; |
1197 | 1270 | rand_initialize_disk(disk); |
1198 | 1271 | disk_to_dev(disk)->class = &block_class; |
1199 | 1272 | disk_to_dev(disk)->type = &disk_type; |
1200 | 1273 | device_initialize(disk_to_dev(disk)); |
1201 | - INIT_WORK(&disk->async_notify, | |
1202 | - media_change_notify_thread); | |
1203 | 1274 | } |
1204 | 1275 | return disk; |
1205 | 1276 | } |
... | ... | @@ -1291,4 +1362,423 @@ |
1291 | 1362 | } |
1292 | 1363 | |
1293 | 1364 | EXPORT_SYMBOL(invalidate_partition); |
1365 | + | |
1366 | +/* | |
1367 | + * Disk events - monitor disk events like media change and eject request. | |
1368 | + */ | |
1369 | +struct disk_events { | |
1370 | + struct list_head node; /* all disk_event's */ | |
1371 | + struct gendisk *disk; /* the associated disk */ | |
1372 | + spinlock_t lock; | |
1373 | + | |
1374 | + int block; /* event blocking depth */ | |
1375 | + unsigned int pending; /* events already sent out */ | |
1376 | + unsigned int clearing; /* events being cleared */ | |
1377 | + | |
1378 | + long poll_msecs; /* interval, -1 for default */ | |
1379 | + struct delayed_work dwork; | |
1380 | +}; | |
1381 | + | |
1382 | +static const char *disk_events_strs[] = { | |
1383 | + [ilog2(DISK_EVENT_MEDIA_CHANGE)] = "media_change", | |
1384 | + [ilog2(DISK_EVENT_EJECT_REQUEST)] = "eject_request", | |
1385 | +}; | |
1386 | + | |
1387 | +static char *disk_uevents[] = { | |
1388 | + [ilog2(DISK_EVENT_MEDIA_CHANGE)] = "DISK_MEDIA_CHANGE=1", | |
1389 | + [ilog2(DISK_EVENT_EJECT_REQUEST)] = "DISK_EJECT_REQUEST=1", | |
1390 | +}; | |
1391 | + | |
1392 | +/* list of all disk_events */ | |
1393 | +static DEFINE_MUTEX(disk_events_mutex); | |
1394 | +static LIST_HEAD(disk_events); | |
1395 | + | |
1396 | +/* disable in-kernel polling by default */ | |
1397 | +static unsigned long disk_events_dfl_poll_msecs = 0; | |
1398 | + | |
1399 | +static unsigned long disk_events_poll_jiffies(struct gendisk *disk) | |
1400 | +{ | |
1401 | + struct disk_events *ev = disk->ev; | |
1402 | + long intv_msecs = 0; | |
1403 | + | |
1404 | + /* | |
1405 | + * If device-specific poll interval is set, always use it. If | |
1406 | + * the default is being used, poll iff there are events which | |
1407 | + * can't be monitored asynchronously. | |
1408 | + */ | |
1409 | + if (ev->poll_msecs >= 0) | |
1410 | + intv_msecs = ev->poll_msecs; | |
1411 | + else if (disk->events & ~disk->async_events) | |
1412 | + intv_msecs = disk_events_dfl_poll_msecs; | |
1413 | + | |
1414 | + return msecs_to_jiffies(intv_msecs); | |
1415 | +} | |
1416 | + | |
1417 | +static void __disk_block_events(struct gendisk *disk, bool sync) | |
1418 | +{ | |
1419 | + struct disk_events *ev = disk->ev; | |
1420 | + unsigned long flags; | |
1421 | + bool cancel; | |
1422 | + | |
1423 | + spin_lock_irqsave(&ev->lock, flags); | |
1424 | + cancel = !ev->block++; | |
1425 | + spin_unlock_irqrestore(&ev->lock, flags); | |
1426 | + | |
1427 | + if (cancel) { | |
1428 | + if (sync) | |
1429 | + cancel_delayed_work_sync(&disk->ev->dwork); | |
1430 | + else | |
1431 | + cancel_delayed_work(&disk->ev->dwork); | |
1432 | + } | |
1433 | +} | |
1434 | + | |
1435 | +static void __disk_unblock_events(struct gendisk *disk, bool check_now) | |
1436 | +{ | |
1437 | + struct disk_events *ev = disk->ev; | |
1438 | + unsigned long intv; | |
1439 | + unsigned long flags; | |
1440 | + | |
1441 | + spin_lock_irqsave(&ev->lock, flags); | |
1442 | + | |
1443 | + if (WARN_ON_ONCE(ev->block <= 0)) | |
1444 | + goto out_unlock; | |
1445 | + | |
1446 | + if (--ev->block) | |
1447 | + goto out_unlock; | |
1448 | + | |
1449 | + /* | |
1450 | + * Not exactly a latency critical operation, set poll timer | |
1451 | + * slack to 25% and kick event check. | |
1452 | + */ | |
1453 | + intv = disk_events_poll_jiffies(disk); | |
1454 | + set_timer_slack(&ev->dwork.timer, intv / 4); | |
1455 | + if (check_now) | |
1456 | + queue_delayed_work(system_nrt_wq, &ev->dwork, 0); | |
1457 | + else if (intv) | |
1458 | + queue_delayed_work(system_nrt_wq, &ev->dwork, intv); | |
1459 | +out_unlock: | |
1460 | + spin_unlock_irqrestore(&ev->lock, flags); | |
1461 | +} | |
1462 | + | |
1463 | +/** | |
1464 | + * disk_block_events - block and flush disk event checking | |
1465 | + * @disk: disk to block events for | |
1466 | + * | |
1467 | + * On return from this function, it is guaranteed that event checking | |
1468 | + * isn't in progress and won't happen until unblocked by | |
1469 | + * disk_unblock_events(). Events blocking is counted and the actual | |
1470 | + * unblocking happens after the matching number of unblocks are done. | |
1471 | + * | |
1472 | + * Note that this intentionally does not block event checking from | |
1473 | + * disk_clear_events(). | |
1474 | + * | |
1475 | + * CONTEXT: | |
1476 | + * Might sleep. | |
1477 | + */ | |
1478 | +void disk_block_events(struct gendisk *disk) | |
1479 | +{ | |
1480 | + if (disk->ev) | |
1481 | + __disk_block_events(disk, true); | |
1482 | +} | |
1483 | + | |
1484 | +/** | |
1485 | + * disk_unblock_events - unblock disk event checking | |
1486 | + * @disk: disk to unblock events for | |
1487 | + * | |
1488 | + * Undo disk_block_events(). When the block count reaches zero, it | |
1489 | + * starts events polling if configured. | |
1490 | + * | |
1491 | + * CONTEXT: | |
1492 | + * Don't care. Safe to call from irq context. | |
1493 | + */ | |
1494 | +void disk_unblock_events(struct gendisk *disk) | |
1495 | +{ | |
1496 | + if (disk->ev) | |
1497 | + __disk_unblock_events(disk, true); | |
1498 | +} | |
1499 | + | |
1500 | +/** | |
1501 | + * disk_check_events - schedule immediate event checking | |
1502 | + * @disk: disk to check events for | |
1503 | + * | |
1504 | + * Schedule immediate event checking on @disk if not blocked. | |
1505 | + * | |
1506 | + * CONTEXT: | |
1507 | + * Don't care. Safe to call from irq context. | |
1508 | + */ | |
1509 | +void disk_check_events(struct gendisk *disk) | |
1510 | +{ | |
1511 | + if (disk->ev) { | |
1512 | + __disk_block_events(disk, false); | |
1513 | + __disk_unblock_events(disk, true); | |
1514 | + } | |
1515 | +} | |
1516 | +EXPORT_SYMBOL_GPL(disk_check_events); | |
1517 | + | |
1518 | +/** | |
1519 | + * disk_clear_events - synchronously check, clear and return pending events | |
1520 | + * @disk: disk to fetch and clear events from | |
1521 | + * @mask: mask of events to be fetched and clearted | |
1522 | + * | |
1523 | + * Disk events are synchronously checked and pending events in @mask | |
1524 | + * are cleared and returned. This ignores the block count. | |
1525 | + * | |
1526 | + * CONTEXT: | |
1527 | + * Might sleep. | |
1528 | + */ | |
1529 | +unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask) | |
1530 | +{ | |
1531 | + const struct block_device_operations *bdops = disk->fops; | |
1532 | + struct disk_events *ev = disk->ev; | |
1533 | + unsigned int pending; | |
1534 | + | |
1535 | + if (!ev) { | |
1536 | + /* for drivers still using the old ->media_changed method */ | |
1537 | + if ((mask & DISK_EVENT_MEDIA_CHANGE) && | |
1538 | + bdops->media_changed && bdops->media_changed(disk)) | |
1539 | + return DISK_EVENT_MEDIA_CHANGE; | |
1540 | + return 0; | |
1541 | + } | |
1542 | + | |
1543 | + /* tell the workfn about the events being cleared */ | |
1544 | + spin_lock_irq(&ev->lock); | |
1545 | + ev->clearing |= mask; | |
1546 | + spin_unlock_irq(&ev->lock); | |
1547 | + | |
1548 | + /* uncondtionally schedule event check and wait for it to finish */ | |
1549 | + __disk_block_events(disk, true); | |
1550 | + queue_delayed_work(system_nrt_wq, &ev->dwork, 0); | |
1551 | + flush_delayed_work(&ev->dwork); | |
1552 | + __disk_unblock_events(disk, false); | |
1553 | + | |
1554 | + /* then, fetch and clear pending events */ | |
1555 | + spin_lock_irq(&ev->lock); | |
1556 | + WARN_ON_ONCE(ev->clearing & mask); /* cleared by workfn */ | |
1557 | + pending = ev->pending & mask; | |
1558 | + ev->pending &= ~mask; | |
1559 | + spin_unlock_irq(&ev->lock); | |
1560 | + | |
1561 | + return pending; | |
1562 | +} | |
1563 | + | |
1564 | +static void disk_events_workfn(struct work_struct *work) | |
1565 | +{ | |
1566 | + struct delayed_work *dwork = to_delayed_work(work); | |
1567 | + struct disk_events *ev = container_of(dwork, struct disk_events, dwork); | |
1568 | + struct gendisk *disk = ev->disk; | |
1569 | + char *envp[ARRAY_SIZE(disk_uevents) + 1] = { }; | |
1570 | + unsigned int clearing = ev->clearing; | |
1571 | + unsigned int events; | |
1572 | + unsigned long intv; | |
1573 | + int nr_events = 0, i; | |
1574 | + | |
1575 | + /* check events */ | |
1576 | + events = disk->fops->check_events(disk, clearing); | |
1577 | + | |
1578 | + /* accumulate pending events and schedule next poll if necessary */ | |
1579 | + spin_lock_irq(&ev->lock); | |
1580 | + | |
1581 | + events &= ~ev->pending; | |
1582 | + ev->pending |= events; | |
1583 | + ev->clearing &= ~clearing; | |
1584 | + | |
1585 | + intv = disk_events_poll_jiffies(disk); | |
1586 | + if (!ev->block && intv) | |
1587 | + queue_delayed_work(system_nrt_wq, &ev->dwork, intv); | |
1588 | + | |
1589 | + spin_unlock_irq(&ev->lock); | |
1590 | + | |
1591 | + /* tell userland about new events */ | |
1592 | + for (i = 0; i < ARRAY_SIZE(disk_uevents); i++) | |
1593 | + if (events & (1 << i)) | |
1594 | + envp[nr_events++] = disk_uevents[i]; | |
1595 | + | |
1596 | + if (nr_events) | |
1597 | + kobject_uevent_env(&disk_to_dev(disk)->kobj, KOBJ_CHANGE, envp); | |
1598 | +} | |
1599 | + | |
1600 | +/* | |
1601 | + * A disk events enabled device has the following sysfs nodes under | |
1602 | + * its /sys/block/X/ directory. | |
1603 | + * | |
1604 | + * events : list of all supported events | |
1605 | + * events_async : list of events which can be detected w/o polling | |
1606 | + * events_poll_msecs : polling interval, 0: disable, -1: system default | |
1607 | + */ | |
1608 | +static ssize_t __disk_events_show(unsigned int events, char *buf) | |
1609 | +{ | |
1610 | + const char *delim = ""; | |
1611 | + ssize_t pos = 0; | |
1612 | + int i; | |
1613 | + | |
1614 | + for (i = 0; i < ARRAY_SIZE(disk_events_strs); i++) | |
1615 | + if (events & (1 << i)) { | |
1616 | + pos += sprintf(buf + pos, "%s%s", | |
1617 | + delim, disk_events_strs[i]); | |
1618 | + delim = " "; | |
1619 | + } | |
1620 | + if (pos) | |
1621 | + pos += sprintf(buf + pos, "\n"); | |
1622 | + return pos; | |
1623 | +} | |
1624 | + | |
1625 | +static ssize_t disk_events_show(struct device *dev, | |
1626 | + struct device_attribute *attr, char *buf) | |
1627 | +{ | |
1628 | + struct gendisk *disk = dev_to_disk(dev); | |
1629 | + | |
1630 | + return __disk_events_show(disk->events, buf); | |
1631 | +} | |
1632 | + | |
1633 | +static ssize_t disk_events_async_show(struct device *dev, | |
1634 | + struct device_attribute *attr, char *buf) | |
1635 | +{ | |
1636 | + struct gendisk *disk = dev_to_disk(dev); | |
1637 | + | |
1638 | + return __disk_events_show(disk->async_events, buf); | |
1639 | +} | |
1640 | + | |
1641 | +static ssize_t disk_events_poll_msecs_show(struct device *dev, | |
1642 | + struct device_attribute *attr, | |
1643 | + char *buf) | |
1644 | +{ | |
1645 | + struct gendisk *disk = dev_to_disk(dev); | |
1646 | + | |
1647 | + return sprintf(buf, "%ld\n", disk->ev->poll_msecs); | |
1648 | +} | |
1649 | + | |
1650 | +static ssize_t disk_events_poll_msecs_store(struct device *dev, | |
1651 | + struct device_attribute *attr, | |
1652 | + const char *buf, size_t count) | |
1653 | +{ | |
1654 | + struct gendisk *disk = dev_to_disk(dev); | |
1655 | + long intv; | |
1656 | + | |
1657 | + if (!count || !sscanf(buf, "%ld", &intv)) | |
1658 | + return -EINVAL; | |
1659 | + | |
1660 | + if (intv < 0 && intv != -1) | |
1661 | + return -EINVAL; | |
1662 | + | |
1663 | + __disk_block_events(disk, true); | |
1664 | + disk->ev->poll_msecs = intv; | |
1665 | + __disk_unblock_events(disk, true); | |
1666 | + | |
1667 | + return count; | |
1668 | +} | |
1669 | + | |
1670 | +static const DEVICE_ATTR(events, S_IRUGO, disk_events_show, NULL); | |
1671 | +static const DEVICE_ATTR(events_async, S_IRUGO, disk_events_async_show, NULL); | |
1672 | +static const DEVICE_ATTR(events_poll_msecs, S_IRUGO|S_IWUSR, | |
1673 | + disk_events_poll_msecs_show, | |
1674 | + disk_events_poll_msecs_store); | |
1675 | + | |
1676 | +static const struct attribute *disk_events_attrs[] = { | |
1677 | + &dev_attr_events.attr, | |
1678 | + &dev_attr_events_async.attr, | |
1679 | + &dev_attr_events_poll_msecs.attr, | |
1680 | + NULL, | |
1681 | +}; | |
1682 | + | |
1683 | +/* | |
1684 | + * The default polling interval can be specified by the kernel | |
1685 | + * parameter block.events_dfl_poll_msecs which defaults to 0 | |
1686 | + * (disable). This can also be modified runtime by writing to | |
1687 | + * /sys/module/block/events_dfl_poll_msecs. | |
1688 | + */ | |
1689 | +static int disk_events_set_dfl_poll_msecs(const char *val, | |
1690 | + const struct kernel_param *kp) | |
1691 | +{ | |
1692 | + struct disk_events *ev; | |
1693 | + int ret; | |
1694 | + | |
1695 | + ret = param_set_ulong(val, kp); | |
1696 | + if (ret < 0) | |
1697 | + return ret; | |
1698 | + | |
1699 | + mutex_lock(&disk_events_mutex); | |
1700 | + | |
1701 | + list_for_each_entry(ev, &disk_events, node) | |
1702 | + disk_check_events(ev->disk); | |
1703 | + | |
1704 | + mutex_unlock(&disk_events_mutex); | |
1705 | + | |
1706 | + return 0; | |
1707 | +} | |
1708 | + | |
1709 | +static const struct kernel_param_ops disk_events_dfl_poll_msecs_param_ops = { | |
1710 | + .set = disk_events_set_dfl_poll_msecs, | |
1711 | + .get = param_get_ulong, | |
1712 | +}; | |
1713 | + | |
1714 | +#undef MODULE_PARAM_PREFIX | |
1715 | +#define MODULE_PARAM_PREFIX "block." | |
1716 | + | |
1717 | +module_param_cb(events_dfl_poll_msecs, &disk_events_dfl_poll_msecs_param_ops, | |
1718 | + &disk_events_dfl_poll_msecs, 0644); | |
1719 | + | |
1720 | +/* | |
1721 | + * disk_{add|del|release}_events - initialize and destroy disk_events. | |
1722 | + */ | |
1723 | +static void disk_add_events(struct gendisk *disk) | |
1724 | +{ | |
1725 | + struct disk_events *ev; | |
1726 | + | |
1727 | + if (!disk->fops->check_events || !(disk->events | disk->async_events)) | |
1728 | + return; | |
1729 | + | |
1730 | + ev = kzalloc(sizeof(*ev), GFP_KERNEL); | |
1731 | + if (!ev) { | |
1732 | + pr_warn("%s: failed to initialize events\n", disk->disk_name); | |
1733 | + return; | |
1734 | + } | |
1735 | + | |
1736 | + if (sysfs_create_files(&disk_to_dev(disk)->kobj, | |
1737 | + disk_events_attrs) < 0) { | |
1738 | + pr_warn("%s: failed to create sysfs files for events\n", | |
1739 | + disk->disk_name); | |
1740 | + kfree(ev); | |
1741 | + return; | |
1742 | + } | |
1743 | + | |
1744 | + disk->ev = ev; | |
1745 | + | |
1746 | + INIT_LIST_HEAD(&ev->node); | |
1747 | + ev->disk = disk; | |
1748 | + spin_lock_init(&ev->lock); | |
1749 | + ev->block = 1; | |
1750 | + ev->poll_msecs = -1; | |
1751 | + INIT_DELAYED_WORK(&ev->dwork, disk_events_workfn); | |
1752 | + | |
1753 | + mutex_lock(&disk_events_mutex); | |
1754 | + list_add_tail(&ev->node, &disk_events); | |
1755 | + mutex_unlock(&disk_events_mutex); | |
1756 | + | |
1757 | + /* | |
1758 | + * Block count is initialized to 1 and the following initial | |
1759 | + * unblock kicks it into action. | |
1760 | + */ | |
1761 | + __disk_unblock_events(disk, true); | |
1762 | +} | |
1763 | + | |
1764 | +static void disk_del_events(struct gendisk *disk) | |
1765 | +{ | |
1766 | + if (!disk->ev) | |
1767 | + return; | |
1768 | + | |
1769 | + __disk_block_events(disk, true); | |
1770 | + | |
1771 | + mutex_lock(&disk_events_mutex); | |
1772 | + list_del_init(&disk->ev->node); | |
1773 | + mutex_unlock(&disk_events_mutex); | |
1774 | + | |
1775 | + sysfs_remove_files(&disk_to_dev(disk)->kobj, disk_events_attrs); | |
1776 | +} | |
1777 | + | |
1778 | +static void disk_release_events(struct gendisk *disk) | |
1779 | +{ | |
1780 | + /* the block count should be 1 from disk_del_events() */ | |
1781 | + WARN_ON_ONCE(disk->ev && disk->ev->block != 1); | |
1782 | + kfree(disk->ev); | |
1783 | +} |
block/ioctl.c
... | ... | @@ -294,11 +294,12 @@ |
294 | 294 | return -EINVAL; |
295 | 295 | if (get_user(n, (int __user *) arg)) |
296 | 296 | return -EFAULT; |
297 | - if (!(mode & FMODE_EXCL) && bd_claim(bdev, &bdev) < 0) | |
297 | + if (!(mode & FMODE_EXCL) && | |
298 | + blkdev_get(bdev, mode | FMODE_EXCL, &bdev) < 0) | |
298 | 299 | return -EBUSY; |
299 | 300 | ret = set_blocksize(bdev, n); |
300 | 301 | if (!(mode & FMODE_EXCL)) |
301 | - bd_release(bdev); | |
302 | + blkdev_put(bdev, mode | FMODE_EXCL); | |
302 | 303 | return ret; |
303 | 304 | case BLKPG: |
304 | 305 | ret = blkpg_ioctl(bdev, (struct blkpg_ioctl_arg __user *) arg); |
drivers/block/drbd/drbd_int.h
... | ... | @@ -911,8 +911,6 @@ |
911 | 911 | struct drbd_backing_dev { |
912 | 912 | struct block_device *backing_bdev; |
913 | 913 | struct block_device *md_bdev; |
914 | - struct file *lo_file; | |
915 | - struct file *md_file; | |
916 | 914 | struct drbd_md md; |
917 | 915 | struct disk_conf dc; /* The user provided config... */ |
918 | 916 | sector_t known_size; /* last known size of that backing device */ |
drivers/block/drbd/drbd_main.c
... | ... | @@ -3372,11 +3372,8 @@ |
3372 | 3372 | if (ldev == NULL) |
3373 | 3373 | return; |
3374 | 3374 | |
3375 | - bd_release(ldev->backing_bdev); | |
3376 | - bd_release(ldev->md_bdev); | |
3377 | - | |
3378 | - fput(ldev->lo_file); | |
3379 | - fput(ldev->md_file); | |
3375 | + blkdev_put(ldev->backing_bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); | |
3376 | + blkdev_put(ldev->md_bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); | |
3380 | 3377 | |
3381 | 3378 | kfree(ldev); |
3382 | 3379 | } |
drivers/block/drbd/drbd_nl.c
... | ... | @@ -855,7 +855,7 @@ |
855 | 855 | sector_t max_possible_sectors; |
856 | 856 | sector_t min_md_device_sectors; |
857 | 857 | struct drbd_backing_dev *nbc = NULL; /* new_backing_conf */ |
858 | - struct inode *inode, *inode2; | |
858 | + struct block_device *bdev; | |
859 | 859 | struct lru_cache *resync_lru = NULL; |
860 | 860 | union drbd_state ns, os; |
861 | 861 | unsigned int max_seg_s; |
862 | 862 | |
863 | 863 | |
864 | 864 | |
865 | 865 | |
866 | 866 | |
867 | 867 | |
868 | 868 | |
869 | 869 | |
870 | 870 | |
... | ... | @@ -907,78 +907,51 @@ |
907 | 907 | } |
908 | 908 | } |
909 | 909 | |
910 | - nbc->lo_file = filp_open(nbc->dc.backing_dev, O_RDWR, 0); | |
911 | - if (IS_ERR(nbc->lo_file)) { | |
910 | + bdev = blkdev_get_by_path(nbc->dc.backing_dev, | |
911 | + FMODE_READ | FMODE_WRITE | FMODE_EXCL, mdev); | |
912 | + if (IS_ERR(bdev)) { | |
912 | 913 | dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.backing_dev, |
913 | - PTR_ERR(nbc->lo_file)); | |
914 | - nbc->lo_file = NULL; | |
914 | + PTR_ERR(bdev)); | |
915 | 915 | retcode = ERR_OPEN_DISK; |
916 | 916 | goto fail; |
917 | 917 | } |
918 | + nbc->backing_bdev = bdev; | |
918 | 919 | |
919 | - inode = nbc->lo_file->f_dentry->d_inode; | |
920 | - | |
921 | - if (!S_ISBLK(inode->i_mode)) { | |
922 | - retcode = ERR_DISK_NOT_BDEV; | |
923 | - goto fail; | |
924 | - } | |
925 | - | |
926 | - nbc->md_file = filp_open(nbc->dc.meta_dev, O_RDWR, 0); | |
927 | - if (IS_ERR(nbc->md_file)) { | |
920 | + /* | |
921 | + * meta_dev_idx >= 0: external fixed size, possibly multiple | |
922 | + * drbd sharing one meta device. TODO in that case, paranoia | |
923 | + * check that [md_bdev, meta_dev_idx] is not yet used by some | |
924 | + * other drbd minor! (if you use drbd.conf + drbdadm, that | |
925 | + * should check it for you already; but if you don't, or | |
926 | + * someone fooled it, we need to double check here) | |
927 | + */ | |
928 | + bdev = blkdev_get_by_path(nbc->dc.meta_dev, | |
929 | + FMODE_READ | FMODE_WRITE | FMODE_EXCL, | |
930 | + (nbc->dc.meta_dev_idx < 0) ? | |
931 | + (void *)mdev : (void *)drbd_m_holder); | |
932 | + if (IS_ERR(bdev)) { | |
928 | 933 | dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.meta_dev, |
929 | - PTR_ERR(nbc->md_file)); | |
930 | - nbc->md_file = NULL; | |
934 | + PTR_ERR(bdev)); | |
931 | 935 | retcode = ERR_OPEN_MD_DISK; |
932 | 936 | goto fail; |
933 | 937 | } |
938 | + nbc->md_bdev = bdev; | |
934 | 939 | |
935 | - inode2 = nbc->md_file->f_dentry->d_inode; | |
936 | - | |
937 | - if (!S_ISBLK(inode2->i_mode)) { | |
938 | - retcode = ERR_MD_NOT_BDEV; | |
940 | + if ((nbc->backing_bdev == nbc->md_bdev) != | |
941 | + (nbc->dc.meta_dev_idx == DRBD_MD_INDEX_INTERNAL || | |
942 | + nbc->dc.meta_dev_idx == DRBD_MD_INDEX_FLEX_INT)) { | |
943 | + retcode = ERR_MD_IDX_INVALID; | |
939 | 944 | goto fail; |
940 | 945 | } |
941 | 946 | |
942 | - nbc->backing_bdev = inode->i_bdev; | |
943 | - if (bd_claim(nbc->backing_bdev, mdev)) { | |
944 | - printk(KERN_ERR "drbd: bd_claim(%p,%p); failed [%p;%p;%u]\n", | |
945 | - nbc->backing_bdev, mdev, | |
946 | - nbc->backing_bdev->bd_holder, | |
947 | - nbc->backing_bdev->bd_contains->bd_holder, | |
948 | - nbc->backing_bdev->bd_holders); | |
949 | - retcode = ERR_BDCLAIM_DISK; | |
950 | - goto fail; | |
951 | - } | |
952 | - | |
953 | 947 | resync_lru = lc_create("resync", drbd_bm_ext_cache, |
954 | 948 | 61, sizeof(struct bm_extent), |
955 | 949 | offsetof(struct bm_extent, lce)); |
956 | 950 | if (!resync_lru) { |
957 | 951 | retcode = ERR_NOMEM; |
958 | - goto release_bdev_fail; | |
952 | + goto fail; | |
959 | 953 | } |
960 | 954 | |
961 | - /* meta_dev_idx >= 0: external fixed size, | |
962 | - * possibly multiple drbd sharing one meta device. | |
963 | - * TODO in that case, paranoia check that [md_bdev, meta_dev_idx] is | |
964 | - * not yet used by some other drbd minor! | |
965 | - * (if you use drbd.conf + drbdadm, | |
966 | - * that should check it for you already; but if you don't, or someone | |
967 | - * fooled it, we need to double check here) */ | |
968 | - nbc->md_bdev = inode2->i_bdev; | |
969 | - if (bd_claim(nbc->md_bdev, (nbc->dc.meta_dev_idx < 0) ? (void *)mdev | |
970 | - : (void *) drbd_m_holder)) { | |
971 | - retcode = ERR_BDCLAIM_MD_DISK; | |
972 | - goto release_bdev_fail; | |
973 | - } | |
974 | - | |
975 | - if ((nbc->backing_bdev == nbc->md_bdev) != | |
976 | - (nbc->dc.meta_dev_idx == DRBD_MD_INDEX_INTERNAL || | |
977 | - nbc->dc.meta_dev_idx == DRBD_MD_INDEX_FLEX_INT)) { | |
978 | - retcode = ERR_MD_IDX_INVALID; | |
979 | - goto release_bdev2_fail; | |
980 | - } | |
981 | - | |
982 | 955 | /* RT - for drbd_get_max_capacity() DRBD_MD_INDEX_FLEX_INT */ |
983 | 956 | drbd_md_set_sector_offsets(mdev, nbc); |
984 | 957 | |
... | ... | @@ -987,7 +960,7 @@ |
987 | 960 | (unsigned long long) drbd_get_max_capacity(nbc), |
988 | 961 | (unsigned long long) nbc->dc.disk_size); |
989 | 962 | retcode = ERR_DISK_TO_SMALL; |
990 | - goto release_bdev2_fail; | |
963 | + goto fail; | |
991 | 964 | } |
992 | 965 | |
993 | 966 | if (nbc->dc.meta_dev_idx < 0) { |
... | ... | @@ -1004,7 +977,7 @@ |
1004 | 977 | dev_warn(DEV, "refusing attach: md-device too small, " |
1005 | 978 | "at least %llu sectors needed for this meta-disk type\n", |
1006 | 979 | (unsigned long long) min_md_device_sectors); |
1007 | - goto release_bdev2_fail; | |
980 | + goto fail; | |
1008 | 981 | } |
1009 | 982 | |
1010 | 983 | /* Make sure the new disk is big enough |
... | ... | @@ -1012,7 +985,7 @@ |
1012 | 985 | if (drbd_get_max_capacity(nbc) < |
1013 | 986 | drbd_get_capacity(mdev->this_bdev)) { |
1014 | 987 | retcode = ERR_DISK_TO_SMALL; |
1015 | - goto release_bdev2_fail; | |
988 | + goto fail; | |
1016 | 989 | } |
1017 | 990 | |
1018 | 991 | nbc->known_size = drbd_get_capacity(nbc->backing_bdev); |
... | ... | @@ -1035,7 +1008,7 @@ |
1035 | 1008 | retcode = _drbd_request_state(mdev, NS(disk, D_ATTACHING), CS_VERBOSE); |
1036 | 1009 | drbd_resume_io(mdev); |
1037 | 1010 | if (retcode < SS_SUCCESS) |
1038 | - goto release_bdev2_fail; | |
1011 | + goto fail; | |
1039 | 1012 | |
1040 | 1013 | if (!get_ldev_if_state(mdev, D_ATTACHING)) |
1041 | 1014 | goto force_diskless; |
1042 | 1015 | |
... | ... | @@ -1269,18 +1242,14 @@ |
1269 | 1242 | force_diskless: |
1270 | 1243 | drbd_force_state(mdev, NS(disk, D_FAILED)); |
1271 | 1244 | drbd_md_sync(mdev); |
1272 | - release_bdev2_fail: | |
1273 | - if (nbc) | |
1274 | - bd_release(nbc->md_bdev); | |
1275 | - release_bdev_fail: | |
1276 | - if (nbc) | |
1277 | - bd_release(nbc->backing_bdev); | |
1278 | 1245 | fail: |
1279 | 1246 | if (nbc) { |
1280 | - if (nbc->lo_file) | |
1281 | - fput(nbc->lo_file); | |
1282 | - if (nbc->md_file) | |
1283 | - fput(nbc->md_file); | |
1247 | + if (nbc->backing_bdev) | |
1248 | + blkdev_put(nbc->backing_bdev, | |
1249 | + FMODE_READ | FMODE_WRITE | FMODE_EXCL); | |
1250 | + if (nbc->md_bdev) | |
1251 | + blkdev_put(nbc->md_bdev, | |
1252 | + FMODE_READ | FMODE_WRITE | FMODE_EXCL); | |
1284 | 1253 | kfree(nbc); |
1285 | 1254 | } |
1286 | 1255 | lc_destroy(resync_lru); |
drivers/block/loop.c
... | ... | @@ -395,11 +395,7 @@ |
395 | 395 | struct loop_device *lo = p->lo; |
396 | 396 | struct page *page = buf->page; |
397 | 397 | sector_t IV; |
398 | - int size, ret; | |
399 | - | |
400 | - ret = buf->ops->confirm(pipe, buf); | |
401 | - if (unlikely(ret)) | |
402 | - return ret; | |
398 | + int size; | |
403 | 399 | |
404 | 400 | IV = ((sector_t) page->index << (PAGE_CACHE_SHIFT - 9)) + |
405 | 401 | (buf->offset >> 9); |
drivers/block/pktcdvd.c
... | ... | @@ -2296,15 +2296,12 @@ |
2296 | 2296 | * so bdget() can't fail. |
2297 | 2297 | */ |
2298 | 2298 | bdget(pd->bdev->bd_dev); |
2299 | - if ((ret = blkdev_get(pd->bdev, FMODE_READ))) | |
2299 | + if ((ret = blkdev_get(pd->bdev, FMODE_READ | FMODE_EXCL, pd))) | |
2300 | 2300 | goto out; |
2301 | 2301 | |
2302 | - if ((ret = bd_claim(pd->bdev, pd))) | |
2303 | - goto out_putdev; | |
2304 | - | |
2305 | 2302 | if ((ret = pkt_get_last_written(pd, &lba))) { |
2306 | 2303 | printk(DRIVER_NAME": pkt_get_last_written failed\n"); |
2307 | - goto out_unclaim; | |
2304 | + goto out_putdev; | |
2308 | 2305 | } |
2309 | 2306 | |
2310 | 2307 | set_capacity(pd->disk, lba << 2); |
... | ... | @@ -2314,7 +2311,7 @@ |
2314 | 2311 | q = bdev_get_queue(pd->bdev); |
2315 | 2312 | if (write) { |
2316 | 2313 | if ((ret = pkt_open_write(pd))) |
2317 | - goto out_unclaim; | |
2314 | + goto out_putdev; | |
2318 | 2315 | /* |
2319 | 2316 | * Some CDRW drives can not handle writes larger than one packet, |
2320 | 2317 | * even if the size is a multiple of the packet size. |
2321 | 2318 | |
2322 | 2319 | |
2323 | 2320 | |
... | ... | @@ -2329,23 +2326,21 @@ |
2329 | 2326 | } |
2330 | 2327 | |
2331 | 2328 | if ((ret = pkt_set_segment_merging(pd, q))) |
2332 | - goto out_unclaim; | |
2329 | + goto out_putdev; | |
2333 | 2330 | |
2334 | 2331 | if (write) { |
2335 | 2332 | if (!pkt_grow_pktlist(pd, CONFIG_CDROM_PKTCDVD_BUFFERS)) { |
2336 | 2333 | printk(DRIVER_NAME": not enough memory for buffers\n"); |
2337 | 2334 | ret = -ENOMEM; |
2338 | - goto out_unclaim; | |
2335 | + goto out_putdev; | |
2339 | 2336 | } |
2340 | 2337 | printk(DRIVER_NAME": %lukB available on disc\n", lba << 1); |
2341 | 2338 | } |
2342 | 2339 | |
2343 | 2340 | return 0; |
2344 | 2341 | |
2345 | -out_unclaim: | |
2346 | - bd_release(pd->bdev); | |
2347 | 2342 | out_putdev: |
2348 | - blkdev_put(pd->bdev, FMODE_READ); | |
2343 | + blkdev_put(pd->bdev, FMODE_READ | FMODE_EXCL); | |
2349 | 2344 | out: |
2350 | 2345 | return ret; |
2351 | 2346 | } |
... | ... | @@ -2362,8 +2357,7 @@ |
2362 | 2357 | pkt_lock_door(pd, 0); |
2363 | 2358 | |
2364 | 2359 | pkt_set_speed(pd, MAX_SPEED, MAX_SPEED); |
2365 | - bd_release(pd->bdev); | |
2366 | - blkdev_put(pd->bdev, FMODE_READ); | |
2360 | + blkdev_put(pd->bdev, FMODE_READ | FMODE_EXCL); | |
2367 | 2361 | |
2368 | 2362 | pkt_shrink_pktlist(pd); |
2369 | 2363 | } |
... | ... | @@ -2733,7 +2727,7 @@ |
2733 | 2727 | bdev = bdget(dev); |
2734 | 2728 | if (!bdev) |
2735 | 2729 | return -ENOMEM; |
2736 | - ret = blkdev_get(bdev, FMODE_READ | FMODE_NDELAY); | |
2730 | + ret = blkdev_get(bdev, FMODE_READ | FMODE_NDELAY, NULL); | |
2737 | 2731 | if (ret) |
2738 | 2732 | return ret; |
2739 | 2733 |
drivers/cdrom/cdrom.c
... | ... | @@ -1348,7 +1348,10 @@ |
1348 | 1348 | if (!CDROM_CAN(CDC_SELECT_DISC)) |
1349 | 1349 | return -EDRIVE_CANT_DO_THIS; |
1350 | 1350 | |
1351 | - (void) cdi->ops->media_changed(cdi, slot); | |
1351 | + if (cdi->ops->check_events) | |
1352 | + cdi->ops->check_events(cdi, 0, slot); | |
1353 | + else | |
1354 | + cdi->ops->media_changed(cdi, slot); | |
1352 | 1355 | |
1353 | 1356 | if (slot == CDSL_NONE) { |
1354 | 1357 | /* set media changed bits, on both queues */ |
... | ... | @@ -1392,6 +1395,42 @@ |
1392 | 1395 | return slot; |
1393 | 1396 | } |
1394 | 1397 | |
1398 | +/* | |
1399 | + * As cdrom implements an extra ioctl consumer for media changed | |
1400 | + * event, it needs to buffer ->check_events() output, such that event | |
1401 | + * is not lost for both the usual VFS and ioctl paths. | |
1402 | + * cdi->{vfs|ioctl}_events are used to buffer pending events for each | |
1403 | + * path. | |
1404 | + * | |
1405 | + * XXX: Locking is non-existent. cdi->ops->check_events() can be | |
1406 | + * called in parallel and buffering fields are accessed without any | |
1407 | + * exclusion. The original media_changed code had the same problem. | |
1408 | + * It might be better to simply deprecate CDROM_MEDIA_CHANGED ioctl | |
1409 | + * and remove this cruft altogether. It doesn't have much usefulness | |
1410 | + * at this point. | |
1411 | + */ | |
1412 | +static void cdrom_update_events(struct cdrom_device_info *cdi, | |
1413 | + unsigned int clearing) | |
1414 | +{ | |
1415 | + unsigned int events; | |
1416 | + | |
1417 | + events = cdi->ops->check_events(cdi, clearing, CDSL_CURRENT); | |
1418 | + cdi->vfs_events |= events; | |
1419 | + cdi->ioctl_events |= events; | |
1420 | +} | |
1421 | + | |
1422 | +unsigned int cdrom_check_events(struct cdrom_device_info *cdi, | |
1423 | + unsigned int clearing) | |
1424 | +{ | |
1425 | + unsigned int events; | |
1426 | + | |
1427 | + cdrom_update_events(cdi, clearing); | |
1428 | + events = cdi->vfs_events; | |
1429 | + cdi->vfs_events = 0; | |
1430 | + return events; | |
1431 | +} | |
1432 | +EXPORT_SYMBOL(cdrom_check_events); | |
1433 | + | |
1395 | 1434 | /* We want to make media_changed accessible to the user through an |
1396 | 1435 | * ioctl. The main problem now is that we must double-buffer the |
1397 | 1436 | * low-level implementation, to assure that the VFS and the user both |
1398 | 1437 | |
1399 | 1438 | |
1400 | 1439 | |
... | ... | @@ -1403,15 +1442,26 @@ |
1403 | 1442 | { |
1404 | 1443 | unsigned int mask = (1 << (queue & 1)); |
1405 | 1444 | int ret = !!(cdi->mc_flags & mask); |
1445 | + bool changed; | |
1406 | 1446 | |
1407 | 1447 | if (!CDROM_CAN(CDC_MEDIA_CHANGED)) |
1408 | - return ret; | |
1448 | + return ret; | |
1449 | + | |
1409 | 1450 | /* changed since last call? */ |
1410 | - if (cdi->ops->media_changed(cdi, CDSL_CURRENT)) { | |
1451 | + if (cdi->ops->check_events) { | |
1452 | + BUG_ON(!queue); /* shouldn't be called from VFS path */ | |
1453 | + cdrom_update_events(cdi, DISK_EVENT_MEDIA_CHANGE); | |
1454 | + changed = cdi->ioctl_events & DISK_EVENT_MEDIA_CHANGE; | |
1455 | + cdi->ioctl_events = 0; | |
1456 | + } else | |
1457 | + changed = cdi->ops->media_changed(cdi, CDSL_CURRENT); | |
1458 | + | |
1459 | + if (changed) { | |
1411 | 1460 | cdi->mc_flags = 0x3; /* set bit on both queues */ |
1412 | 1461 | ret |= 1; |
1413 | 1462 | cdi->media_written = 0; |
1414 | 1463 | } |
1464 | + | |
1415 | 1465 | cdi->mc_flags &= ~mask; /* clear bit */ |
1416 | 1466 | return ret; |
1417 | 1467 | } |
drivers/char/raw.c
... | ... | @@ -65,15 +65,12 @@ |
65 | 65 | if (!bdev) |
66 | 66 | goto out; |
67 | 67 | igrab(bdev->bd_inode); |
68 | - err = blkdev_get(bdev, filp->f_mode); | |
68 | + err = blkdev_get(bdev, filp->f_mode | FMODE_EXCL, raw_open); | |
69 | 69 | if (err) |
70 | 70 | goto out; |
71 | - err = bd_claim(bdev, raw_open); | |
72 | - if (err) | |
73 | - goto out1; | |
74 | 71 | err = set_blocksize(bdev, bdev_logical_block_size(bdev)); |
75 | 72 | if (err) |
76 | - goto out2; | |
73 | + goto out1; | |
77 | 74 | filp->f_flags |= O_DIRECT; |
78 | 75 | filp->f_mapping = bdev->bd_inode->i_mapping; |
79 | 76 | if (++raw_devices[minor].inuse == 1) |
80 | 77 | |
... | ... | @@ -83,10 +80,8 @@ |
83 | 80 | mutex_unlock(&raw_mutex); |
84 | 81 | return 0; |
85 | 82 | |
86 | -out2: | |
87 | - bd_release(bdev); | |
88 | 83 | out1: |
89 | - blkdev_put(bdev, filp->f_mode); | |
84 | + blkdev_put(bdev, filp->f_mode | FMODE_EXCL); | |
90 | 85 | out: |
91 | 86 | mutex_unlock(&raw_mutex); |
92 | 87 | return err; |
... | ... | @@ -110,8 +105,7 @@ |
110 | 105 | } |
111 | 106 | mutex_unlock(&raw_mutex); |
112 | 107 | |
113 | - bd_release(bdev); | |
114 | - blkdev_put(bdev, filp->f_mode); | |
108 | + blkdev_put(bdev, filp->f_mode | FMODE_EXCL); | |
115 | 109 | return 0; |
116 | 110 | } |
117 | 111 |
drivers/md/dm-table.c
... | ... | @@ -325,15 +325,18 @@ |
325 | 325 | |
326 | 326 | BUG_ON(d->dm_dev.bdev); |
327 | 327 | |
328 | - bdev = open_by_devnum(dev, d->dm_dev.mode); | |
328 | + bdev = blkdev_get_by_dev(dev, d->dm_dev.mode | FMODE_EXCL, _claim_ptr); | |
329 | 329 | if (IS_ERR(bdev)) |
330 | 330 | return PTR_ERR(bdev); |
331 | - r = bd_claim_by_disk(bdev, _claim_ptr, dm_disk(md)); | |
332 | - if (r) | |
333 | - blkdev_put(bdev, d->dm_dev.mode); | |
334 | - else | |
335 | - d->dm_dev.bdev = bdev; | |
336 | - return r; | |
331 | + | |
332 | + r = bd_link_disk_holder(bdev, dm_disk(md)); | |
333 | + if (r) { | |
334 | + blkdev_put(bdev, d->dm_dev.mode | FMODE_EXCL); | |
335 | + return r; | |
336 | + } | |
337 | + | |
338 | + d->dm_dev.bdev = bdev; | |
339 | + return 0; | |
337 | 340 | } |
338 | 341 | |
339 | 342 | /* |
... | ... | @@ -344,8 +347,7 @@ |
344 | 347 | if (!d->dm_dev.bdev) |
345 | 348 | return; |
346 | 349 | |
347 | - bd_release_from_disk(d->dm_dev.bdev, dm_disk(md)); | |
348 | - blkdev_put(d->dm_dev.bdev, d->dm_dev.mode); | |
350 | + blkdev_put(d->dm_dev.bdev, d->dm_dev.mode | FMODE_EXCL); | |
349 | 351 | d->dm_dev.bdev = NULL; |
350 | 352 | } |
351 | 353 |
drivers/md/dm.c
... | ... | @@ -630,7 +630,7 @@ |
630 | 630 | queue_io(md, bio); |
631 | 631 | } else { |
632 | 632 | /* done with normal IO or empty flush */ |
633 | - trace_block_bio_complete(md->queue, bio); | |
633 | + trace_block_bio_complete(md->queue, bio, io_error); | |
634 | 634 | bio_endio(bio, io_error); |
635 | 635 | } |
636 | 636 | } |
... | ... | @@ -990,8 +990,8 @@ |
990 | 990 | if (r == DM_MAPIO_REMAPPED) { |
991 | 991 | /* the bio has been remapped so dispatch it */ |
992 | 992 | |
993 | - trace_block_remap(bdev_get_queue(clone->bi_bdev), clone, | |
994 | - tio->io->bio->bi_bdev->bd_dev, sector); | |
993 | + trace_block_bio_remap(bdev_get_queue(clone->bi_bdev), clone, | |
994 | + tio->io->bio->bi_bdev->bd_dev, sector); | |
995 | 995 | |
996 | 996 | generic_make_request(clone); |
997 | 997 | } else if (r < 0 || r == DM_MAPIO_REQUEUE) { |
drivers/md/md.c
... | ... | @@ -1879,7 +1879,7 @@ |
1879 | 1879 | rdev->sysfs_state = sysfs_get_dirent_safe(rdev->kobj.sd, "state"); |
1880 | 1880 | |
1881 | 1881 | list_add_rcu(&rdev->same_set, &mddev->disks); |
1882 | - bd_claim_by_disk(rdev->bdev, rdev->bdev->bd_holder, mddev->gendisk); | |
1882 | + bd_link_disk_holder(rdev->bdev, mddev->gendisk); | |
1883 | 1883 | |
1884 | 1884 | /* May as well allow recovery to be retried once */ |
1885 | 1885 | mddev->recovery_disabled = 0; |
... | ... | @@ -1906,7 +1906,6 @@ |
1906 | 1906 | MD_BUG(); |
1907 | 1907 | return; |
1908 | 1908 | } |
1909 | - bd_release_from_disk(rdev->bdev, rdev->mddev->gendisk); | |
1910 | 1909 | list_del_rcu(&rdev->same_set); |
1911 | 1910 | printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b)); |
1912 | 1911 | rdev->mddev = NULL; |
1913 | 1912 | |
... | ... | @@ -1934,19 +1933,13 @@ |
1934 | 1933 | struct block_device *bdev; |
1935 | 1934 | char b[BDEVNAME_SIZE]; |
1936 | 1935 | |
1937 | - bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE); | |
1936 | + bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, | |
1937 | + shared ? (mdk_rdev_t *)lock_rdev : rdev); | |
1938 | 1938 | if (IS_ERR(bdev)) { |
1939 | 1939 | printk(KERN_ERR "md: could not open %s.\n", |
1940 | 1940 | __bdevname(dev, b)); |
1941 | 1941 | return PTR_ERR(bdev); |
1942 | 1942 | } |
1943 | - err = bd_claim(bdev, shared ? (mdk_rdev_t *)lock_rdev : rdev); | |
1944 | - if (err) { | |
1945 | - printk(KERN_ERR "md: could not bd_claim %s.\n", | |
1946 | - bdevname(bdev, b)); | |
1947 | - blkdev_put(bdev, FMODE_READ|FMODE_WRITE); | |
1948 | - return err; | |
1949 | - } | |
1950 | 1943 | if (!shared) |
1951 | 1944 | set_bit(AllReserved, &rdev->flags); |
1952 | 1945 | rdev->bdev = bdev; |
... | ... | @@ -1959,8 +1952,7 @@ |
1959 | 1952 | rdev->bdev = NULL; |
1960 | 1953 | if (!bdev) |
1961 | 1954 | MD_BUG(); |
1962 | - bd_release(bdev); | |
1963 | - blkdev_put(bdev, FMODE_READ|FMODE_WRITE); | |
1955 | + blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); | |
1964 | 1956 | } |
1965 | 1957 | |
1966 | 1958 | void md_autodetect_dev(dev_t dev); |
drivers/mtd/devices/block2mtd.c
... | ... | @@ -224,7 +224,7 @@ |
224 | 224 | if (dev->blkdev) { |
225 | 225 | invalidate_mapping_pages(dev->blkdev->bd_inode->i_mapping, |
226 | 226 | 0, -1); |
227 | - close_bdev_exclusive(dev->blkdev, FMODE_READ|FMODE_WRITE); | |
227 | + blkdev_put(dev->blkdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); | |
228 | 228 | } |
229 | 229 | |
230 | 230 | kfree(dev); |
... | ... | @@ -234,6 +234,7 @@ |
234 | 234 | /* FIXME: ensure that mtd->size % erase_size == 0 */ |
235 | 235 | static struct block2mtd_dev *add_device(char *devname, int erase_size) |
236 | 236 | { |
237 | + const fmode_t mode = FMODE_READ | FMODE_WRITE | FMODE_EXCL; | |
237 | 238 | struct block_device *bdev; |
238 | 239 | struct block2mtd_dev *dev; |
239 | 240 | char *name; |
... | ... | @@ -246,7 +247,7 @@ |
246 | 247 | return NULL; |
247 | 248 | |
248 | 249 | /* Get a handle on the device */ |
249 | - bdev = open_bdev_exclusive(devname, FMODE_READ|FMODE_WRITE, NULL); | |
250 | + bdev = blkdev_get_by_path(devname, mode, dev); | |
250 | 251 | #ifndef MODULE |
251 | 252 | if (IS_ERR(bdev)) { |
252 | 253 | |
... | ... | @@ -254,9 +255,8 @@ |
254 | 255 | to resolve the device name by other means. */ |
255 | 256 | |
256 | 257 | dev_t devt = name_to_dev_t(devname); |
257 | - if (devt) { | |
258 | - bdev = open_by_devnum(devt, FMODE_WRITE | FMODE_READ); | |
259 | - } | |
258 | + if (devt) | |
259 | + bdev = blkdev_get_by_dev(devt, mode, dev); | |
260 | 260 | } |
261 | 261 | #endif |
262 | 262 |
drivers/s390/block/dasd_genhd.c
... | ... | @@ -103,7 +103,7 @@ |
103 | 103 | struct block_device *bdev; |
104 | 104 | |
105 | 105 | bdev = bdget_disk(block->gdp, 0); |
106 | - if (!bdev || blkdev_get(bdev, FMODE_READ) < 0) | |
106 | + if (!bdev || blkdev_get(bdev, FMODE_READ, NULL) < 0) | |
107 | 107 | return -ENODEV; |
108 | 108 | /* |
109 | 109 | * See fs/partition/check.c:register_disk,rescan_partitions |
drivers/scsi/scsi_lib.c
... | ... | @@ -1977,8 +1977,7 @@ |
1977 | 1977 | * in. |
1978 | 1978 | * |
1979 | 1979 | * Returns zero if unsuccessful or an error if TUR failed. For |
1980 | - * removable media, a return of NOT_READY or UNIT_ATTENTION is | |
1981 | - * translated to success, with the ->changed flag updated. | |
1980 | + * removable media, UNIT_ATTENTION sets ->changed flag. | |
1982 | 1981 | **/ |
1983 | 1982 | int |
1984 | 1983 | scsi_test_unit_ready(struct scsi_device *sdev, int timeout, int retries, |
... | ... | @@ -2005,16 +2004,6 @@ |
2005 | 2004 | } while (scsi_sense_valid(sshdr) && |
2006 | 2005 | sshdr->sense_key == UNIT_ATTENTION && --retries); |
2007 | 2006 | |
2008 | - if (!sshdr) | |
2009 | - /* could not allocate sense buffer, so can't process it */ | |
2010 | - return result; | |
2011 | - | |
2012 | - if (sdev->removable && scsi_sense_valid(sshdr) && | |
2013 | - (sshdr->sense_key == UNIT_ATTENTION || | |
2014 | - sshdr->sense_key == NOT_READY)) { | |
2015 | - sdev->changed = 1; | |
2016 | - result = 0; | |
2017 | - } | |
2018 | 2007 | if (!sshdr_external) |
2019 | 2008 | kfree(sshdr); |
2020 | 2009 | return result; |
drivers/scsi/sd.c
... | ... | @@ -1043,15 +1043,7 @@ |
1043 | 1043 | sshdr); |
1044 | 1044 | } |
1045 | 1045 | |
1046 | - /* | |
1047 | - * Unable to test, unit probably not ready. This usually | |
1048 | - * means there is no disc in the drive. Mark as changed, | |
1049 | - * and we will figure it out later once the drive is | |
1050 | - * available again. | |
1051 | - */ | |
1052 | - if (retval || (scsi_sense_valid(sshdr) && | |
1053 | - /* 0x3a is medium not present */ | |
1054 | - sshdr->asc == 0x3a)) { | |
1046 | + if (retval) { | |
1055 | 1047 | set_media_not_present(sdkp); |
1056 | 1048 | goto out; |
1057 | 1049 | } |
drivers/scsi/sr.c
... | ... | @@ -104,14 +104,15 @@ |
104 | 104 | static void get_sectorsize(struct scsi_cd *); |
105 | 105 | static void get_capabilities(struct scsi_cd *); |
106 | 106 | |
107 | -static int sr_media_change(struct cdrom_device_info *, int); | |
107 | +static unsigned int sr_check_events(struct cdrom_device_info *cdi, | |
108 | + unsigned int clearing, int slot); | |
108 | 109 | static int sr_packet(struct cdrom_device_info *, struct packet_command *); |
109 | 110 | |
110 | 111 | static struct cdrom_device_ops sr_dops = { |
111 | 112 | .open = sr_open, |
112 | 113 | .release = sr_release, |
113 | 114 | .drive_status = sr_drive_status, |
114 | - .media_changed = sr_media_change, | |
115 | + .check_events = sr_check_events, | |
115 | 116 | .tray_move = sr_tray_move, |
116 | 117 | .lock_door = sr_lock_door, |
117 | 118 | .select_speed = sr_select_speed, |
118 | 119 | |
119 | 120 | |
120 | 121 | |
121 | 122 | |
122 | 123 | |
123 | 124 | |
124 | 125 | |
125 | 126 | |
126 | 127 | |
127 | 128 | |
128 | 129 | |
129 | 130 | |
... | ... | @@ -165,90 +166,96 @@ |
165 | 166 | mutex_unlock(&sr_ref_mutex); |
166 | 167 | } |
167 | 168 | |
168 | -/* identical to scsi_test_unit_ready except that it doesn't | |
169 | - * eat the NOT_READY returns for removable media */ | |
170 | -int sr_test_unit_ready(struct scsi_device *sdev, struct scsi_sense_hdr *sshdr) | |
169 | +static unsigned int sr_get_events(struct scsi_device *sdev) | |
171 | 170 | { |
172 | - int retries = MAX_RETRIES; | |
173 | - int the_result; | |
174 | - u8 cmd[] = {TEST_UNIT_READY, 0, 0, 0, 0, 0 }; | |
171 | + u8 buf[8]; | |
172 | + u8 cmd[] = { GET_EVENT_STATUS_NOTIFICATION, | |
173 | + 1, /* polled */ | |
174 | + 0, 0, /* reserved */ | |
175 | + 1 << 4, /* notification class: media */ | |
176 | + 0, 0, /* reserved */ | |
177 | + 0, sizeof(buf), /* allocation length */ | |
178 | + 0, /* control */ | |
179 | + }; | |
180 | + struct event_header *eh = (void *)buf; | |
181 | + struct media_event_desc *med = (void *)(buf + 4); | |
182 | + struct scsi_sense_hdr sshdr; | |
183 | + int result; | |
175 | 184 | |
176 | - /* issue TEST_UNIT_READY until the initial startup UNIT_ATTENTION | |
177 | - * conditions are gone, or a timeout happens | |
178 | - */ | |
179 | - do { | |
180 | - the_result = scsi_execute_req(sdev, cmd, DMA_NONE, NULL, | |
181 | - 0, sshdr, SR_TIMEOUT, | |
182 | - retries--, NULL); | |
183 | - if (scsi_sense_valid(sshdr) && | |
184 | - sshdr->sense_key == UNIT_ATTENTION) | |
185 | - sdev->changed = 1; | |
185 | + result = scsi_execute_req(sdev, cmd, DMA_FROM_DEVICE, buf, sizeof(buf), | |
186 | + &sshdr, SR_TIMEOUT, MAX_RETRIES, NULL); | |
187 | + if (scsi_sense_valid(&sshdr) && sshdr.sense_key == UNIT_ATTENTION) | |
188 | + return DISK_EVENT_MEDIA_CHANGE; | |
186 | 189 | |
187 | - } while (retries > 0 && | |
188 | - (!scsi_status_is_good(the_result) || | |
189 | - (scsi_sense_valid(sshdr) && | |
190 | - sshdr->sense_key == UNIT_ATTENTION))); | |
191 | - return the_result; | |
190 | + if (result || be16_to_cpu(eh->data_len) < sizeof(*med)) | |
191 | + return 0; | |
192 | + | |
193 | + if (eh->nea || eh->notification_class != 0x4) | |
194 | + return 0; | |
195 | + | |
196 | + if (med->media_event_code == 1) | |
197 | + return DISK_EVENT_EJECT_REQUEST; | |
198 | + else if (med->media_event_code == 2) | |
199 | + return DISK_EVENT_MEDIA_CHANGE; | |
200 | + return 0; | |
192 | 201 | } |
193 | 202 | |
194 | 203 | /* |
195 | - * This function checks to see if the media has been changed in the | |
196 | - * CDROM drive. It is possible that we have already sensed a change, | |
197 | - * or the drive may have sensed one and not yet reported it. We must | |
198 | - * be ready for either case. This function always reports the current | |
199 | - * value of the changed bit. If flag is 0, then the changed bit is reset. | |
200 | - * This function could be done as an ioctl, but we would need to have | |
201 | - * an inode for that to work, and we do not always have one. | |
204 | + * This function checks to see if the media has been changed or eject | |
205 | + * button has been pressed. It is possible that we have already | |
206 | + * sensed a change, or the drive may have sensed one and not yet | |
207 | + * reported it. The past events are accumulated in sdev->changed and | |
208 | + * returned together with the current state. | |
202 | 209 | */ |
203 | - | |
204 | -static int sr_media_change(struct cdrom_device_info *cdi, int slot) | |
210 | +static unsigned int sr_check_events(struct cdrom_device_info *cdi, | |
211 | + unsigned int clearing, int slot) | |
205 | 212 | { |
206 | 213 | struct scsi_cd *cd = cdi->handle; |
207 | - int retval; | |
208 | - struct scsi_sense_hdr *sshdr; | |
214 | + bool last_present; | |
215 | + struct scsi_sense_hdr sshdr; | |
216 | + unsigned int events; | |
217 | + int ret; | |
209 | 218 | |
210 | - if (CDSL_CURRENT != slot) { | |
211 | - /* no changer support */ | |
212 | - return -EINVAL; | |
213 | - } | |
219 | + /* no changer support */ | |
220 | + if (CDSL_CURRENT != slot) | |
221 | + return 0; | |
214 | 222 | |
215 | - sshdr = kzalloc(sizeof(*sshdr), GFP_KERNEL); | |
216 | - retval = sr_test_unit_ready(cd->device, sshdr); | |
217 | - if (retval || (scsi_sense_valid(sshdr) && | |
218 | - /* 0x3a is medium not present */ | |
219 | - sshdr->asc == 0x3a)) { | |
220 | - /* Media not present or unable to test, unit probably not | |
221 | - * ready. This usually means there is no disc in the drive. | |
222 | - * Mark as changed, and we will figure it out later once | |
223 | - * the drive is available again. | |
224 | - */ | |
225 | - cd->device->changed = 1; | |
226 | - /* This will force a flush, if called from check_disk_change */ | |
227 | - retval = 1; | |
228 | - goto out; | |
229 | - }; | |
223 | + events = sr_get_events(cd->device); | |
224 | + /* | |
225 | + * GET_EVENT_STATUS_NOTIFICATION is enough unless MEDIA_CHANGE | |
226 | + * is being cleared. Note that there are devices which hang | |
227 | + * if asked to execute TUR repeatedly. | |
228 | + */ | |
229 | + if (!(clearing & DISK_EVENT_MEDIA_CHANGE)) | |
230 | + goto skip_tur; | |
230 | 231 | |
231 | - retval = cd->device->changed; | |
232 | - cd->device->changed = 0; | |
233 | - /* If the disk changed, the capacity will now be different, | |
234 | - * so we force a re-read of this information */ | |
235 | - if (retval) { | |
236 | - /* check multisession offset etc */ | |
237 | - sr_cd_check(cdi); | |
238 | - get_sectorsize(cd); | |
232 | + /* let's see whether the media is there with TUR */ | |
233 | + last_present = cd->media_present; | |
234 | + ret = scsi_test_unit_ready(cd->device, SR_TIMEOUT, MAX_RETRIES, &sshdr); | |
235 | + | |
236 | + /* | |
237 | + * Media is considered to be present if TUR succeeds or fails with | |
238 | + * sense data indicating something other than media-not-present | |
239 | + * (ASC 0x3a). | |
240 | + */ | |
241 | + cd->media_present = scsi_status_is_good(ret) || | |
242 | + (scsi_sense_valid(&sshdr) && sshdr.asc != 0x3a); | |
243 | + | |
244 | + if (last_present != cd->media_present) | |
245 | + events |= DISK_EVENT_MEDIA_CHANGE; | |
246 | +skip_tur: | |
247 | + if (cd->device->changed) { | |
248 | + events |= DISK_EVENT_MEDIA_CHANGE; | |
249 | + cd->device->changed = 0; | |
239 | 250 | } |
240 | 251 | |
241 | -out: | |
242 | - /* Notify userspace, that media has changed. */ | |
243 | - if (retval != cd->previous_state) | |
252 | + /* for backward compatibility */ | |
253 | + if (events & DISK_EVENT_MEDIA_CHANGE) | |
244 | 254 | sdev_evt_send_simple(cd->device, SDEV_EVT_MEDIA_CHANGE, |
245 | 255 | GFP_KERNEL); |
246 | - cd->previous_state = retval; | |
247 | - kfree(sshdr); | |
248 | - | |
249 | - return retval; | |
256 | + return events; | |
250 | 257 | } |
251 | - | |
258 | + | |
252 | 259 | /* |
253 | 260 | * sr_done is the interrupt routine for the device driver. |
254 | 261 | * |
255 | 262 | |
256 | 263 | |
257 | 264 | |
... | ... | @@ -533,19 +540,35 @@ |
533 | 540 | return ret; |
534 | 541 | } |
535 | 542 | |
536 | -static int sr_block_media_changed(struct gendisk *disk) | |
543 | +static unsigned int sr_block_check_events(struct gendisk *disk, | |
544 | + unsigned int clearing) | |
537 | 545 | { |
538 | 546 | struct scsi_cd *cd = scsi_cd(disk); |
539 | - return cdrom_media_changed(&cd->cdi); | |
547 | + return cdrom_check_events(&cd->cdi, clearing); | |
540 | 548 | } |
541 | 549 | |
550 | +static int sr_block_revalidate_disk(struct gendisk *disk) | |
551 | +{ | |
552 | + struct scsi_cd *cd = scsi_cd(disk); | |
553 | + struct scsi_sense_hdr sshdr; | |
554 | + | |
555 | + /* if the unit is not ready, nothing more to do */ | |
556 | + if (scsi_test_unit_ready(cd->device, SR_TIMEOUT, MAX_RETRIES, &sshdr)) | |
557 | + return 0; | |
558 | + | |
559 | + sr_cd_check(&cd->cdi); | |
560 | + get_sectorsize(cd); | |
561 | + return 0; | |
562 | +} | |
563 | + | |
542 | 564 | static const struct block_device_operations sr_bdops = |
543 | 565 | { |
544 | 566 | .owner = THIS_MODULE, |
545 | 567 | .open = sr_block_open, |
546 | 568 | .release = sr_block_release, |
547 | 569 | .ioctl = sr_block_ioctl, |
548 | - .media_changed = sr_block_media_changed, | |
570 | + .check_events = sr_block_check_events, | |
571 | + .revalidate_disk = sr_block_revalidate_disk, | |
549 | 572 | /* |
550 | 573 | * No compat_ioctl for now because sr_block_ioctl never |
551 | 574 | * seems to pass arbitary ioctls down to host drivers. |
... | ... | @@ -618,6 +641,7 @@ |
618 | 641 | sprintf(disk->disk_name, "sr%d", minor); |
619 | 642 | disk->fops = &sr_bdops; |
620 | 643 | disk->flags = GENHD_FL_CD; |
644 | + disk->events = DISK_EVENT_MEDIA_CHANGE | DISK_EVENT_EJECT_REQUEST; | |
621 | 645 | |
622 | 646 | blk_queue_rq_timeout(sdev->request_queue, SR_TIMEOUT); |
623 | 647 | |
... | ... | @@ -627,7 +651,7 @@ |
627 | 651 | cd->disk = disk; |
628 | 652 | cd->capacity = 0x1fffff; |
629 | 653 | cd->device->changed = 1; /* force recheck CD type */ |
630 | - cd->previous_state = 1; | |
654 | + cd->media_present = 1; | |
631 | 655 | cd->use = 1; |
632 | 656 | cd->readcd_known = 0; |
633 | 657 | cd->readcd_cdda = 0; |
... | ... | @@ -780,7 +804,7 @@ |
780 | 804 | } |
781 | 805 | |
782 | 806 | /* eat unit attentions */ |
783 | - sr_test_unit_ready(cd->device, &sshdr); | |
807 | + scsi_test_unit_ready(cd->device, SR_TIMEOUT, MAX_RETRIES, &sshdr); | |
784 | 808 | |
785 | 809 | /* ask for mode page 0x2a */ |
786 | 810 | rc = scsi_mode_sense(cd->device, 0, 0x2a, buffer, 128, |
drivers/scsi/sr.h
... | ... | @@ -40,7 +40,7 @@ |
40 | 40 | unsigned xa_flag:1; /* CD has XA sectors ? */ |
41 | 41 | unsigned readcd_known:1; /* drive supports READ_CD (0xbe) */ |
42 | 42 | unsigned readcd_cdda:1; /* reading audio data using READ_CD */ |
43 | - unsigned previous_state:1; /* media has changed */ | |
43 | + unsigned media_present:1; /* media is present */ | |
44 | 44 | struct cdrom_device_info cdi; |
45 | 45 | /* We hold gendisk and scsi_device references on probe and use |
46 | 46 | * the refs on this kref to decide when to release them */ |
... | ... | @@ -61,7 +61,6 @@ |
61 | 61 | int sr_audio_ioctl(struct cdrom_device_info *, unsigned int, void *); |
62 | 62 | |
63 | 63 | int sr_is_xa(Scsi_CD *); |
64 | -int sr_test_unit_ready(struct scsi_device *sdev, struct scsi_sense_hdr *sshdr); | |
65 | 64 | |
66 | 65 | /* sr_vendor.c */ |
67 | 66 | void sr_vendor_init(Scsi_CD *); |
drivers/scsi/sr_ioctl.c
... | ... | @@ -307,7 +307,7 @@ |
307 | 307 | /* we have no changer support */ |
308 | 308 | return -EINVAL; |
309 | 309 | } |
310 | - if (0 == sr_test_unit_ready(cd->device, &sshdr)) | |
310 | + if (!scsi_test_unit_ready(cd->device, SR_TIMEOUT, MAX_RETRIES, &sshdr)) | |
311 | 311 | return CDS_DISC_OK; |
312 | 312 | |
313 | 313 | /* SK/ASC/ASCQ of 2/4/1 means "unit is becoming ready" */ |
drivers/usb/gadget/storage_common.c
... | ... | @@ -543,7 +543,7 @@ |
543 | 543 | ro = curlun->initially_ro; |
544 | 544 | if (!ro) { |
545 | 545 | filp = filp_open(filename, O_RDWR | O_LARGEFILE, 0); |
546 | - if (-EROFS == PTR_ERR(filp)) | |
546 | + if (PTR_ERR(filp) == -EROFS || PTR_ERR(filp) == -EACCES) | |
547 | 547 | ro = 1; |
548 | 548 | } |
549 | 549 | if (ro) |
... | ... | @@ -558,10 +558,7 @@ |
558 | 558 | |
559 | 559 | if (filp->f_path.dentry) |
560 | 560 | inode = filp->f_path.dentry->d_inode; |
561 | - if (inode && S_ISBLK(inode->i_mode)) { | |
562 | - if (bdev_read_only(inode->i_bdev)) | |
563 | - ro = 1; | |
564 | - } else if (!inode || !S_ISREG(inode->i_mode)) { | |
561 | + if (!inode || (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))) { | |
565 | 562 | LINFO(curlun, "invalid file type: %s\n", filename); |
566 | 563 | goto out; |
567 | 564 | } |
fs/bio-integrity.c
... | ... | @@ -782,7 +782,12 @@ |
782 | 782 | { |
783 | 783 | unsigned int i; |
784 | 784 | |
785 | - kintegrityd_wq = create_workqueue("kintegrityd"); | |
785 | + /* | |
786 | + * kintegrityd won't block much but may burn a lot of CPU cycles. | |
787 | + * Make it highpri CPU intensive wq with max concurrency of 1. | |
788 | + */ | |
789 | + kintegrityd_wq = alloc_workqueue("kintegrityd", WQ_MEM_RECLAIM | | |
790 | + WQ_HIGHPRI | WQ_CPU_INTENSIVE, 1); | |
786 | 791 | if (!kintegrityd_wq) |
787 | 792 | panic("Failed to create kintegrityd\n"); |
788 | 793 |
fs/block_dev.c
... | ... | @@ -432,9 +432,6 @@ |
432 | 432 | mutex_init(&bdev->bd_mutex); |
433 | 433 | INIT_LIST_HEAD(&bdev->bd_inodes); |
434 | 434 | INIT_LIST_HEAD(&bdev->bd_list); |
435 | -#ifdef CONFIG_SYSFS | |
436 | - INIT_LIST_HEAD(&bdev->bd_holder_list); | |
437 | -#endif | |
438 | 435 | inode_init_once(&ei->vfs_inode); |
439 | 436 | /* Initialize mutex for freeze. */ |
440 | 437 | mutex_init(&bdev->bd_fsfreeze_mutex); |
... | ... | @@ -669,7 +666,7 @@ |
669 | 666 | else if (bdev->bd_contains == bdev) |
670 | 667 | return true; /* is a whole device which isn't held */ |
671 | 668 | |
672 | - else if (whole->bd_holder == bd_claim) | |
669 | + else if (whole->bd_holder == bd_may_claim) | |
673 | 670 | return true; /* is a partition of a device that is being partitioned */ |
674 | 671 | else if (whole->bd_holder != NULL) |
675 | 672 | return false; /* is a partition of a held device */ |
676 | 673 | |
677 | 674 | |
678 | 675 | |
679 | 676 | |
680 | 677 | |
681 | 678 | |
682 | 679 | |
683 | 680 | |
684 | 681 | |
685 | 682 | |
686 | 683 | |
687 | 684 | |
688 | 685 | |
689 | 686 | |
690 | 687 | |
691 | 688 | |
692 | 689 | |
693 | 690 | |
694 | 691 | |
695 | 692 | |
696 | 693 | |
697 | 694 | |
698 | 695 | |
699 | 696 | |
700 | 697 | |
701 | 698 | |
702 | 699 | |
... | ... | @@ -781,440 +778,88 @@ |
781 | 778 | } |
782 | 779 | } |
783 | 780 | |
784 | -/* releases bdev_lock */ | |
785 | -static void __bd_abort_claiming(struct block_device *whole, void *holder) | |
786 | -{ | |
787 | - BUG_ON(whole->bd_claiming != holder); | |
788 | - whole->bd_claiming = NULL; | |
789 | - wake_up_bit(&whole->bd_claiming, 0); | |
790 | - | |
791 | - spin_unlock(&bdev_lock); | |
792 | - bdput(whole); | |
793 | -} | |
794 | - | |
795 | -/** | |
796 | - * bd_abort_claiming - abort claiming a block device | |
797 | - * @whole: whole block device returned by bd_start_claiming() | |
798 | - * @holder: holder trying to claim @bdev | |
799 | - * | |
800 | - * Abort a claiming block started by bd_start_claiming(). Note that | |
801 | - * @whole is not the block device to be claimed but the whole device | |
802 | - * returned by bd_start_claiming(). | |
803 | - * | |
804 | - * CONTEXT: | |
805 | - * Grabs and releases bdev_lock. | |
806 | - */ | |
807 | -static void bd_abort_claiming(struct block_device *whole, void *holder) | |
808 | -{ | |
809 | - spin_lock(&bdev_lock); | |
810 | - __bd_abort_claiming(whole, holder); /* releases bdev_lock */ | |
811 | -} | |
812 | - | |
813 | -/* increment holders when we have a legitimate claim. requires bdev_lock */ | |
814 | -static void __bd_claim(struct block_device *bdev, struct block_device *whole, | |
815 | - void *holder) | |
816 | -{ | |
817 | - /* note that for a whole device bd_holders | |
818 | - * will be incremented twice, and bd_holder will | |
819 | - * be set to bd_claim before being set to holder | |
820 | - */ | |
821 | - whole->bd_holders++; | |
822 | - whole->bd_holder = bd_claim; | |
823 | - bdev->bd_holders++; | |
824 | - bdev->bd_holder = holder; | |
825 | -} | |
826 | - | |
827 | -/** | |
828 | - * bd_finish_claiming - finish claiming a block device | |
829 | - * @bdev: block device of interest (passed to bd_start_claiming()) | |
830 | - * @whole: whole block device returned by bd_start_claiming() | |
831 | - * @holder: holder trying to claim @bdev | |
832 | - * | |
833 | - * Finish a claiming block started by bd_start_claiming(). | |
834 | - * | |
835 | - * CONTEXT: | |
836 | - * Grabs and releases bdev_lock. | |
837 | - */ | |
838 | -static void bd_finish_claiming(struct block_device *bdev, | |
839 | - struct block_device *whole, void *holder) | |
840 | -{ | |
841 | - spin_lock(&bdev_lock); | |
842 | - BUG_ON(!bd_may_claim(bdev, whole, holder)); | |
843 | - __bd_claim(bdev, whole, holder); | |
844 | - __bd_abort_claiming(whole, holder); /* not actually an abort */ | |
845 | -} | |
846 | - | |
847 | -/** | |
848 | - * bd_claim - claim a block device | |
849 | - * @bdev: block device to claim | |
850 | - * @holder: holder trying to claim @bdev | |
851 | - * | |
852 | - * Try to claim @bdev which must have been opened successfully. | |
853 | - * | |
854 | - * CONTEXT: | |
855 | - * Might sleep. | |
856 | - * | |
857 | - * RETURNS: | |
858 | - * 0 if successful, -EBUSY if @bdev is already claimed. | |
859 | - */ | |
860 | -int bd_claim(struct block_device *bdev, void *holder) | |
861 | -{ | |
862 | - struct block_device *whole = bdev->bd_contains; | |
863 | - int res; | |
864 | - | |
865 | - might_sleep(); | |
866 | - | |
867 | - spin_lock(&bdev_lock); | |
868 | - res = bd_prepare_to_claim(bdev, whole, holder); | |
869 | - if (res == 0) | |
870 | - __bd_claim(bdev, whole, holder); | |
871 | - spin_unlock(&bdev_lock); | |
872 | - | |
873 | - return res; | |
874 | -} | |
875 | -EXPORT_SYMBOL(bd_claim); | |
876 | - | |
877 | -void bd_release(struct block_device *bdev) | |
878 | -{ | |
879 | - spin_lock(&bdev_lock); | |
880 | - if (!--bdev->bd_contains->bd_holders) | |
881 | - bdev->bd_contains->bd_holder = NULL; | |
882 | - if (!--bdev->bd_holders) | |
883 | - bdev->bd_holder = NULL; | |
884 | - spin_unlock(&bdev_lock); | |
885 | -} | |
886 | - | |
887 | -EXPORT_SYMBOL(bd_release); | |
888 | - | |
889 | 781 | #ifdef CONFIG_SYSFS |
890 | -/* | |
891 | - * Functions for bd_claim_by_kobject / bd_release_from_kobject | |
892 | - * | |
893 | - * If a kobject is passed to bd_claim_by_kobject() | |
894 | - * and the kobject has a parent directory, | |
895 | - * following symlinks are created: | |
896 | - * o from the kobject to the claimed bdev | |
897 | - * o from "holders" directory of the bdev to the parent of the kobject | |
898 | - * bd_release_from_kobject() removes these symlinks. | |
899 | - * | |
900 | - * Example: | |
901 | - * If /dev/dm-0 maps to /dev/sda, kobject corresponding to | |
902 | - * /sys/block/dm-0/slaves is passed to bd_claim_by_kobject(), then: | |
903 | - * /sys/block/dm-0/slaves/sda --> /sys/block/sda | |
904 | - * /sys/block/sda/holders/dm-0 --> /sys/block/dm-0 | |
905 | - */ | |
906 | - | |
907 | 782 | static int add_symlink(struct kobject *from, struct kobject *to) |
908 | 783 | { |
909 | - if (!from || !to) | |
910 | - return 0; | |
911 | 784 | return sysfs_create_link(from, to, kobject_name(to)); |
912 | 785 | } |
913 | 786 | |
914 | 787 | static void del_symlink(struct kobject *from, struct kobject *to) |
915 | 788 | { |
916 | - if (!from || !to) | |
917 | - return; | |
918 | 789 | sysfs_remove_link(from, kobject_name(to)); |
919 | 790 | } |
920 | 791 | |
921 | -/* | |
922 | - * 'struct bd_holder' contains pointers to kobjects symlinked by | |
923 | - * bd_claim_by_kobject. | |
924 | - * It's connected to bd_holder_list which is protected by bdev->bd_sem. | |
925 | - */ | |
926 | -struct bd_holder { | |
927 | - struct list_head list; /* chain of holders of the bdev */ | |
928 | - int count; /* references from the holder */ | |
929 | - struct kobject *sdir; /* holder object, e.g. "/block/dm-0/slaves" */ | |
930 | - struct kobject *hdev; /* e.g. "/block/dm-0" */ | |
931 | - struct kobject *hdir; /* e.g. "/block/sda/holders" */ | |
932 | - struct kobject *sdev; /* e.g. "/block/sda" */ | |
933 | -}; | |
934 | - | |
935 | -/* | |
936 | - * Get references of related kobjects at once. | |
937 | - * Returns 1 on success. 0 on failure. | |
938 | - * | |
939 | - * Should call bd_holder_release_dirs() after successful use. | |
940 | - */ | |
941 | -static int bd_holder_grab_dirs(struct block_device *bdev, | |
942 | - struct bd_holder *bo) | |
943 | -{ | |
944 | - if (!bdev || !bo) | |
945 | - return 0; | |
946 | - | |
947 | - bo->sdir = kobject_get(bo->sdir); | |
948 | - if (!bo->sdir) | |
949 | - return 0; | |
950 | - | |
951 | - bo->hdev = kobject_get(bo->sdir->parent); | |
952 | - if (!bo->hdev) | |
953 | - goto fail_put_sdir; | |
954 | - | |
955 | - bo->sdev = kobject_get(&part_to_dev(bdev->bd_part)->kobj); | |
956 | - if (!bo->sdev) | |
957 | - goto fail_put_hdev; | |
958 | - | |
959 | - bo->hdir = kobject_get(bdev->bd_part->holder_dir); | |
960 | - if (!bo->hdir) | |
961 | - goto fail_put_sdev; | |
962 | - | |
963 | - return 1; | |
964 | - | |
965 | -fail_put_sdev: | |
966 | - kobject_put(bo->sdev); | |
967 | -fail_put_hdev: | |
968 | - kobject_put(bo->hdev); | |
969 | -fail_put_sdir: | |
970 | - kobject_put(bo->sdir); | |
971 | - | |
972 | - return 0; | |
973 | -} | |
974 | - | |
975 | -/* Put references of related kobjects at once. */ | |
976 | -static void bd_holder_release_dirs(struct bd_holder *bo) | |
977 | -{ | |
978 | - kobject_put(bo->hdir); | |
979 | - kobject_put(bo->sdev); | |
980 | - kobject_put(bo->hdev); | |
981 | - kobject_put(bo->sdir); | |
982 | -} | |
983 | - | |
984 | -static struct bd_holder *alloc_bd_holder(struct kobject *kobj) | |
985 | -{ | |
986 | - struct bd_holder *bo; | |
987 | - | |
988 | - bo = kzalloc(sizeof(*bo), GFP_KERNEL); | |
989 | - if (!bo) | |
990 | - return NULL; | |
991 | - | |
992 | - bo->count = 1; | |
993 | - bo->sdir = kobj; | |
994 | - | |
995 | - return bo; | |
996 | -} | |
997 | - | |
998 | -static void free_bd_holder(struct bd_holder *bo) | |
999 | -{ | |
1000 | - kfree(bo); | |
1001 | -} | |
1002 | - | |
1003 | 792 | /** |
1004 | - * find_bd_holder - find matching struct bd_holder from the block device | |
793 | + * bd_link_disk_holder - create symlinks between holding disk and slave bdev | |
794 | + * @bdev: the claimed slave bdev | |
795 | + * @disk: the holding disk | |
1005 | 796 | * |
1006 | - * @bdev: struct block device to be searched | |
1007 | - * @bo: target struct bd_holder | |
797 | + * This functions creates the following sysfs symlinks. | |
1008 | 798 | * |
1009 | - * Returns matching entry with @bo in @bdev->bd_holder_list. | |
1010 | - * If found, increment the reference count and return the pointer. | |
1011 | - * If not found, returns NULL. | |
1012 | - */ | |
1013 | -static struct bd_holder *find_bd_holder(struct block_device *bdev, | |
1014 | - struct bd_holder *bo) | |
1015 | -{ | |
1016 | - struct bd_holder *tmp; | |
1017 | - | |
1018 | - list_for_each_entry(tmp, &bdev->bd_holder_list, list) | |
1019 | - if (tmp->sdir == bo->sdir) { | |
1020 | - tmp->count++; | |
1021 | - return tmp; | |
1022 | - } | |
1023 | - | |
1024 | - return NULL; | |
1025 | -} | |
1026 | - | |
1027 | -/** | |
1028 | - * add_bd_holder - create sysfs symlinks for bd_claim() relationship | |
799 | + * - from "slaves" directory of the holder @disk to the claimed @bdev | |
800 | + * - from "holders" directory of the @bdev to the holder @disk | |
1029 | 801 | * |
1030 | - * @bdev: block device to be bd_claimed | |
1031 | - * @bo: preallocated and initialized by alloc_bd_holder() | |
802 | + * For example, if /dev/dm-0 maps to /dev/sda and disk for dm-0 is | |
803 | + * passed to bd_link_disk_holder(), then: | |
1032 | 804 | * |
1033 | - * Add @bo to @bdev->bd_holder_list, create symlinks. | |
805 | + * /sys/block/dm-0/slaves/sda --> /sys/block/sda | |
806 | + * /sys/block/sda/holders/dm-0 --> /sys/block/dm-0 | |
1034 | 807 | * |
1035 | - * Returns 0 if symlinks are created. | |
1036 | - * Returns -ve if something fails. | |
1037 | - */ | |
1038 | -static int add_bd_holder(struct block_device *bdev, struct bd_holder *bo) | |
1039 | -{ | |
1040 | - int err; | |
1041 | - | |
1042 | - if (!bo) | |
1043 | - return -EINVAL; | |
1044 | - | |
1045 | - if (!bd_holder_grab_dirs(bdev, bo)) | |
1046 | - return -EBUSY; | |
1047 | - | |
1048 | - err = add_symlink(bo->sdir, bo->sdev); | |
1049 | - if (err) | |
1050 | - return err; | |
1051 | - | |
1052 | - err = add_symlink(bo->hdir, bo->hdev); | |
1053 | - if (err) { | |
1054 | - del_symlink(bo->sdir, bo->sdev); | |
1055 | - return err; | |
1056 | - } | |
1057 | - | |
1058 | - list_add_tail(&bo->list, &bdev->bd_holder_list); | |
1059 | - return 0; | |
1060 | -} | |
1061 | - | |
1062 | -/** | |
1063 | - * del_bd_holder - delete sysfs symlinks for bd_claim() relationship | |
808 | + * The caller must have claimed @bdev before calling this function and | |
809 | + * ensure that both @bdev and @disk are valid during the creation and | |
810 | + * lifetime of these symlinks. | |
1064 | 811 | * |
1065 | - * @bdev: block device to be bd_claimed | |
1066 | - * @kobj: holder's kobject | |
812 | + * CONTEXT: | |
813 | + * Might sleep. | |
1067 | 814 | * |
1068 | - * If there is matching entry with @kobj in @bdev->bd_holder_list | |
1069 | - * and no other bd_claim() from the same kobject, | |
1070 | - * remove the struct bd_holder from the list, delete symlinks for it. | |
1071 | - * | |
1072 | - * Returns a pointer to the struct bd_holder when it's removed from the list | |
1073 | - * and ready to be freed. | |
1074 | - * Returns NULL if matching claim isn't found or there is other bd_claim() | |
1075 | - * by the same kobject. | |
815 | + * RETURNS: | |
816 | + * 0 on success, -errno on failure. | |
1076 | 817 | */ |
1077 | -static struct bd_holder *del_bd_holder(struct block_device *bdev, | |
1078 | - struct kobject *kobj) | |
818 | +int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk) | |
1079 | 819 | { |
1080 | - struct bd_holder *bo; | |
820 | + int ret = 0; | |
1081 | 821 | |
1082 | - list_for_each_entry(bo, &bdev->bd_holder_list, list) { | |
1083 | - if (bo->sdir == kobj) { | |
1084 | - bo->count--; | |
1085 | - BUG_ON(bo->count < 0); | |
1086 | - if (!bo->count) { | |
1087 | - list_del(&bo->list); | |
1088 | - del_symlink(bo->sdir, bo->sdev); | |
1089 | - del_symlink(bo->hdir, bo->hdev); | |
1090 | - bd_holder_release_dirs(bo); | |
1091 | - return bo; | |
1092 | - } | |
1093 | - break; | |
1094 | - } | |
1095 | - } | |
822 | + mutex_lock(&bdev->bd_mutex); | |
1096 | 823 | |
1097 | - return NULL; | |
1098 | -} | |
824 | + WARN_ON_ONCE(!bdev->bd_holder || bdev->bd_holder_disk); | |
1099 | 825 | |
1100 | -/** | |
1101 | - * bd_claim_by_kobject - bd_claim() with additional kobject signature | |
1102 | - * | |
1103 | - * @bdev: block device to be claimed | |
1104 | - * @holder: holder's signature | |
1105 | - * @kobj: holder's kobject | |
1106 | - * | |
1107 | - * Do bd_claim() and if it succeeds, create sysfs symlinks between | |
1108 | - * the bdev and the holder's kobject. | |
1109 | - * Use bd_release_from_kobject() when relesing the claimed bdev. | |
1110 | - * | |
1111 | - * Returns 0 on success. (same as bd_claim()) | |
1112 | - * Returns errno on failure. | |
1113 | - */ | |
1114 | -static int bd_claim_by_kobject(struct block_device *bdev, void *holder, | |
1115 | - struct kobject *kobj) | |
1116 | -{ | |
1117 | - int err; | |
1118 | - struct bd_holder *bo, *found; | |
826 | + /* FIXME: remove the following once add_disk() handles errors */ | |
827 | + if (WARN_ON(!disk->slave_dir || !bdev->bd_part->holder_dir)) | |
828 | + goto out_unlock; | |
1119 | 829 | |
1120 | - if (!kobj) | |
1121 | - return -EINVAL; | |
830 | + ret = add_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj); | |
831 | + if (ret) | |
832 | + goto out_unlock; | |
1122 | 833 | |
1123 | - bo = alloc_bd_holder(kobj); | |
1124 | - if (!bo) | |
1125 | - return -ENOMEM; | |
834 | + ret = add_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj); | |
835 | + if (ret) { | |
836 | + del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj); | |
837 | + goto out_unlock; | |
838 | + } | |
1126 | 839 | |
1127 | - mutex_lock(&bdev->bd_mutex); | |
1128 | - | |
1129 | - err = bd_claim(bdev, holder); | |
1130 | - if (err) | |
1131 | - goto fail; | |
1132 | - | |
1133 | - found = find_bd_holder(bdev, bo); | |
1134 | - if (found) | |
1135 | - goto fail; | |
1136 | - | |
1137 | - err = add_bd_holder(bdev, bo); | |
1138 | - if (err) | |
1139 | - bd_release(bdev); | |
1140 | - else | |
1141 | - bo = NULL; | |
1142 | -fail: | |
840 | + bdev->bd_holder_disk = disk; | |
841 | +out_unlock: | |
1143 | 842 | mutex_unlock(&bdev->bd_mutex); |
1144 | - free_bd_holder(bo); | |
1145 | - return err; | |
843 | + return ret; | |
1146 | 844 | } |
845 | +EXPORT_SYMBOL_GPL(bd_link_disk_holder); | |
1147 | 846 | |
1148 | -/** | |
1149 | - * bd_release_from_kobject - bd_release() with additional kobject signature | |
1150 | - * | |
1151 | - * @bdev: block device to be released | |
1152 | - * @kobj: holder's kobject | |
1153 | - * | |
1154 | - * Do bd_release() and remove sysfs symlinks created by bd_claim_by_kobject(). | |
1155 | - */ | |
1156 | -static void bd_release_from_kobject(struct block_device *bdev, | |
1157 | - struct kobject *kobj) | |
847 | +static void bd_unlink_disk_holder(struct block_device *bdev) | |
1158 | 848 | { |
1159 | - if (!kobj) | |
849 | + struct gendisk *disk = bdev->bd_holder_disk; | |
850 | + | |
851 | + bdev->bd_holder_disk = NULL; | |
852 | + if (!disk) | |
1160 | 853 | return; |
1161 | 854 | |
1162 | - mutex_lock(&bdev->bd_mutex); | |
1163 | - bd_release(bdev); | |
1164 | - free_bd_holder(del_bd_holder(bdev, kobj)); | |
1165 | - mutex_unlock(&bdev->bd_mutex); | |
855 | + del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj); | |
856 | + del_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj); | |
1166 | 857 | } |
1167 | - | |
1168 | -/** | |
1169 | - * bd_claim_by_disk - wrapper function for bd_claim_by_kobject() | |
1170 | - * | |
1171 | - * @bdev: block device to be claimed | |
1172 | - * @holder: holder's signature | |
1173 | - * @disk: holder's gendisk | |
1174 | - * | |
1175 | - * Call bd_claim_by_kobject() with getting @disk->slave_dir. | |
1176 | - */ | |
1177 | -int bd_claim_by_disk(struct block_device *bdev, void *holder, | |
1178 | - struct gendisk *disk) | |
1179 | -{ | |
1180 | - return bd_claim_by_kobject(bdev, holder, kobject_get(disk->slave_dir)); | |
1181 | -} | |
1182 | -EXPORT_SYMBOL_GPL(bd_claim_by_disk); | |
1183 | - | |
1184 | -/** | |
1185 | - * bd_release_from_disk - wrapper function for bd_release_from_kobject() | |
1186 | - * | |
1187 | - * @bdev: block device to be claimed | |
1188 | - * @disk: holder's gendisk | |
1189 | - * | |
1190 | - * Call bd_release_from_kobject() and put @disk->slave_dir. | |
1191 | - */ | |
1192 | -void bd_release_from_disk(struct block_device *bdev, struct gendisk *disk) | |
1193 | -{ | |
1194 | - bd_release_from_kobject(bdev, disk->slave_dir); | |
1195 | - kobject_put(disk->slave_dir); | |
1196 | -} | |
1197 | -EXPORT_SYMBOL_GPL(bd_release_from_disk); | |
858 | +#else | |
859 | +static inline void bd_unlink_disk_holder(struct block_device *bdev) | |
860 | +{ } | |
1198 | 861 | #endif |
1199 | 862 | |
1200 | -/* | |
1201 | - * Tries to open block device by device number. Use it ONLY if you | |
1202 | - * really do not have anything better - i.e. when you are behind a | |
1203 | - * truly sucky interface and all you are given is a device number. _Never_ | |
1204 | - * to be used for internal purposes. If you ever need it - reconsider | |
1205 | - * your API. | |
1206 | - */ | |
1207 | -struct block_device *open_by_devnum(dev_t dev, fmode_t mode) | |
1208 | -{ | |
1209 | - struct block_device *bdev = bdget(dev); | |
1210 | - int err = -ENOMEM; | |
1211 | - if (bdev) | |
1212 | - err = blkdev_get(bdev, mode); | |
1213 | - return err ? ERR_PTR(err) : bdev; | |
1214 | -} | |
1215 | - | |
1216 | -EXPORT_SYMBOL(open_by_devnum); | |
1217 | - | |
1218 | 863 | /** |
1219 | 864 | * flush_disk - invalidates all buffer-cache entries on a disk |
1220 | 865 | * |
1221 | 866 | |
1222 | 867 | |
... | ... | @@ -1309,11 +954,12 @@ |
1309 | 954 | { |
1310 | 955 | struct gendisk *disk = bdev->bd_disk; |
1311 | 956 | const struct block_device_operations *bdops = disk->fops; |
957 | + unsigned int events; | |
1312 | 958 | |
1313 | - if (!bdops->media_changed) | |
959 | + events = disk_clear_events(disk, DISK_EVENT_MEDIA_CHANGE | | |
960 | + DISK_EVENT_EJECT_REQUEST); | |
961 | + if (!(events & DISK_EVENT_MEDIA_CHANGE)) | |
1314 | 962 | return 0; |
1315 | - if (!bdops->media_changed(bdev->bd_disk)) | |
1316 | - return 0; | |
1317 | 963 | |
1318 | 964 | flush_disk(bdev); |
1319 | 965 | if (bdops->revalidate_disk) |
1320 | 966 | |
1321 | 967 | |
1322 | 968 | |
1323 | 969 | |
... | ... | @@ -1475,17 +1121,171 @@ |
1475 | 1121 | return ret; |
1476 | 1122 | } |
1477 | 1123 | |
1478 | -int blkdev_get(struct block_device *bdev, fmode_t mode) | |
1124 | +/** | |
1125 | + * blkdev_get - open a block device | |
1126 | + * @bdev: block_device to open | |
1127 | + * @mode: FMODE_* mask | |
1128 | + * @holder: exclusive holder identifier | |
1129 | + * | |
1130 | + * Open @bdev with @mode. If @mode includes %FMODE_EXCL, @bdev is | |
1131 | + * open with exclusive access. Specifying %FMODE_EXCL with %NULL | |
1132 | + * @holder is invalid. Exclusive opens may nest for the same @holder. | |
1133 | + * | |
1134 | + * On success, the reference count of @bdev is unchanged. On failure, | |
1135 | + * @bdev is put. | |
1136 | + * | |
1137 | + * CONTEXT: | |
1138 | + * Might sleep. | |
1139 | + * | |
1140 | + * RETURNS: | |
1141 | + * 0 on success, -errno on failure. | |
1142 | + */ | |
1143 | +int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder) | |
1479 | 1144 | { |
1480 | - return __blkdev_get(bdev, mode, 0); | |
1145 | + struct block_device *whole = NULL; | |
1146 | + int res; | |
1147 | + | |
1148 | + WARN_ON_ONCE((mode & FMODE_EXCL) && !holder); | |
1149 | + | |
1150 | + if ((mode & FMODE_EXCL) && holder) { | |
1151 | + whole = bd_start_claiming(bdev, holder); | |
1152 | + if (IS_ERR(whole)) { | |
1153 | + bdput(bdev); | |
1154 | + return PTR_ERR(whole); | |
1155 | + } | |
1156 | + } | |
1157 | + | |
1158 | + res = __blkdev_get(bdev, mode, 0); | |
1159 | + | |
1160 | + /* __blkdev_get() may alter read only status, check it afterwards */ | |
1161 | + if (!res && (mode & FMODE_WRITE) && bdev_read_only(bdev)) { | |
1162 | + __blkdev_put(bdev, mode, 0); | |
1163 | + res = -EACCES; | |
1164 | + } | |
1165 | + | |
1166 | + if (whole) { | |
1167 | + /* finish claiming */ | |
1168 | + mutex_lock(&bdev->bd_mutex); | |
1169 | + spin_lock(&bdev_lock); | |
1170 | + | |
1171 | + if (!res) { | |
1172 | + BUG_ON(!bd_may_claim(bdev, whole, holder)); | |
1173 | + /* | |
1174 | + * Note that for a whole device bd_holders | |
1175 | + * will be incremented twice, and bd_holder | |
1176 | + * will be set to bd_may_claim before being | |
1177 | + * set to holder | |
1178 | + */ | |
1179 | + whole->bd_holders++; | |
1180 | + whole->bd_holder = bd_may_claim; | |
1181 | + bdev->bd_holders++; | |
1182 | + bdev->bd_holder = holder; | |
1183 | + } | |
1184 | + | |
1185 | + /* tell others that we're done */ | |
1186 | + BUG_ON(whole->bd_claiming != holder); | |
1187 | + whole->bd_claiming = NULL; | |
1188 | + wake_up_bit(&whole->bd_claiming, 0); | |
1189 | + | |
1190 | + spin_unlock(&bdev_lock); | |
1191 | + | |
1192 | + /* | |
1193 | + * Block event polling for write claims. Any write | |
1194 | + * holder makes the write_holder state stick until all | |
1195 | + * are released. This is good enough and tracking | |
1196 | + * individual writeable reference is too fragile given | |
1197 | + * the way @mode is used in blkdev_get/put(). | |
1198 | + */ | |
1199 | + if (!res && (mode & FMODE_WRITE) && !bdev->bd_write_holder) { | |
1200 | + bdev->bd_write_holder = true; | |
1201 | + disk_block_events(bdev->bd_disk); | |
1202 | + } | |
1203 | + | |
1204 | + mutex_unlock(&bdev->bd_mutex); | |
1205 | + bdput(whole); | |
1206 | + } | |
1207 | + | |
1208 | + return res; | |
1481 | 1209 | } |
1482 | 1210 | EXPORT_SYMBOL(blkdev_get); |
1483 | 1211 | |
1212 | +/** | |
1213 | + * blkdev_get_by_path - open a block device by name | |
1214 | + * @path: path to the block device to open | |
1215 | + * @mode: FMODE_* mask | |
1216 | + * @holder: exclusive holder identifier | |
1217 | + * | |
1218 | + * Open the blockdevice described by the device file at @path. @mode | |
1219 | + * and @holder are identical to blkdev_get(). | |
1220 | + * | |
1221 | + * On success, the returned block_device has reference count of one. | |
1222 | + * | |
1223 | + * CONTEXT: | |
1224 | + * Might sleep. | |
1225 | + * | |
1226 | + * RETURNS: | |
1227 | + * Pointer to block_device on success, ERR_PTR(-errno) on failure. | |
1228 | + */ | |
1229 | +struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, | |
1230 | + void *holder) | |
1231 | +{ | |
1232 | + struct block_device *bdev; | |
1233 | + int err; | |
1234 | + | |
1235 | + bdev = lookup_bdev(path); | |
1236 | + if (IS_ERR(bdev)) | |
1237 | + return bdev; | |
1238 | + | |
1239 | + err = blkdev_get(bdev, mode, holder); | |
1240 | + if (err) | |
1241 | + return ERR_PTR(err); | |
1242 | + | |
1243 | + return bdev; | |
1244 | +} | |
1245 | +EXPORT_SYMBOL(blkdev_get_by_path); | |
1246 | + | |
1247 | +/** | |
1248 | + * blkdev_get_by_dev - open a block device by device number | |
1249 | + * @dev: device number of block device to open | |
1250 | + * @mode: FMODE_* mask | |
1251 | + * @holder: exclusive holder identifier | |
1252 | + * | |
1253 | + * Open the blockdevice described by device number @dev. @mode and | |
1254 | + * @holder are identical to blkdev_get(). | |
1255 | + * | |
1256 | + * Use it ONLY if you really do not have anything better - i.e. when | |
1257 | + * you are behind a truly sucky interface and all you are given is a | |
1258 | + * device number. _Never_ to be used for internal purposes. If you | |
1259 | + * ever need it - reconsider your API. | |
1260 | + * | |
1261 | + * On success, the returned block_device has reference count of one. | |
1262 | + * | |
1263 | + * CONTEXT: | |
1264 | + * Might sleep. | |
1265 | + * | |
1266 | + * RETURNS: | |
1267 | + * Pointer to block_device on success, ERR_PTR(-errno) on failure. | |
1268 | + */ | |
1269 | +struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder) | |
1270 | +{ | |
1271 | + struct block_device *bdev; | |
1272 | + int err; | |
1273 | + | |
1274 | + bdev = bdget(dev); | |
1275 | + if (!bdev) | |
1276 | + return ERR_PTR(-ENOMEM); | |
1277 | + | |
1278 | + err = blkdev_get(bdev, mode, holder); | |
1279 | + if (err) | |
1280 | + return ERR_PTR(err); | |
1281 | + | |
1282 | + return bdev; | |
1283 | +} | |
1284 | +EXPORT_SYMBOL(blkdev_get_by_dev); | |
1285 | + | |
1484 | 1286 | static int blkdev_open(struct inode * inode, struct file * filp) |
1485 | 1287 | { |
1486 | - struct block_device *whole = NULL; | |
1487 | 1288 | struct block_device *bdev; |
1488 | - int res; | |
1489 | 1289 | |
1490 | 1290 | /* |
1491 | 1291 | * Preserve backwards compatibility and allow large file access |
1492 | 1292 | |
... | ... | @@ -1506,26 +1306,9 @@ |
1506 | 1306 | if (bdev == NULL) |
1507 | 1307 | return -ENOMEM; |
1508 | 1308 | |
1509 | - if (filp->f_mode & FMODE_EXCL) { | |
1510 | - whole = bd_start_claiming(bdev, filp); | |
1511 | - if (IS_ERR(whole)) { | |
1512 | - bdput(bdev); | |
1513 | - return PTR_ERR(whole); | |
1514 | - } | |
1515 | - } | |
1516 | - | |
1517 | 1309 | filp->f_mapping = bdev->bd_inode->i_mapping; |
1518 | 1310 | |
1519 | - res = blkdev_get(bdev, filp->f_mode); | |
1520 | - | |
1521 | - if (whole) { | |
1522 | - if (res == 0) | |
1523 | - bd_finish_claiming(bdev, whole, filp); | |
1524 | - else | |
1525 | - bd_abort_claiming(whole, filp); | |
1526 | - } | |
1527 | - | |
1528 | - return res; | |
1311 | + return blkdev_get(bdev, filp->f_mode, filp); | |
1529 | 1312 | } |
1530 | 1313 | |
1531 | 1314 | static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) |
... | ... | @@ -1539,6 +1322,7 @@ |
1539 | 1322 | bdev->bd_part_count--; |
1540 | 1323 | |
1541 | 1324 | if (!--bdev->bd_openers) { |
1325 | + WARN_ON_ONCE(bdev->bd_holders); | |
1542 | 1326 | sync_blockdev(bdev); |
1543 | 1327 | kill_bdev(bdev); |
1544 | 1328 | } |
... | ... | @@ -1569,6 +1353,45 @@ |
1569 | 1353 | |
1570 | 1354 | int blkdev_put(struct block_device *bdev, fmode_t mode) |
1571 | 1355 | { |
1356 | + if (mode & FMODE_EXCL) { | |
1357 | + bool bdev_free; | |
1358 | + | |
1359 | + /* | |
1360 | + * Release a claim on the device. The holder fields | |
1361 | + * are protected with bdev_lock. bd_mutex is to | |
1362 | + * synchronize disk_holder unlinking. | |
1363 | + */ | |
1364 | + mutex_lock(&bdev->bd_mutex); | |
1365 | + spin_lock(&bdev_lock); | |
1366 | + | |
1367 | + WARN_ON_ONCE(--bdev->bd_holders < 0); | |
1368 | + WARN_ON_ONCE(--bdev->bd_contains->bd_holders < 0); | |
1369 | + | |
1370 | + /* bd_contains might point to self, check in a separate step */ | |
1371 | + if ((bdev_free = !bdev->bd_holders)) | |
1372 | + bdev->bd_holder = NULL; | |
1373 | + if (!bdev->bd_contains->bd_holders) | |
1374 | + bdev->bd_contains->bd_holder = NULL; | |
1375 | + | |
1376 | + spin_unlock(&bdev_lock); | |
1377 | + | |
1378 | + /* | |
1379 | + * If this was the last claim, remove holder link and | |
1380 | + * unblock evpoll if it was a write holder. | |
1381 | + */ | |
1382 | + if (bdev_free) { | |
1383 | + bd_unlink_disk_holder(bdev); | |
1384 | + if (bdev->bd_write_holder) { | |
1385 | + disk_unblock_events(bdev->bd_disk); | |
1386 | + bdev->bd_write_holder = false; | |
1387 | + } else | |
1388 | + disk_check_events(bdev->bd_disk); | |
1389 | + } | |
1390 | + | |
1391 | + mutex_unlock(&bdev->bd_mutex); | |
1392 | + } else | |
1393 | + disk_check_events(bdev->bd_disk); | |
1394 | + | |
1572 | 1395 | return __blkdev_put(bdev, mode, 0); |
1573 | 1396 | } |
1574 | 1397 | EXPORT_SYMBOL(blkdev_put); |
... | ... | @@ -1576,8 +1399,7 @@ |
1576 | 1399 | static int blkdev_close(struct inode * inode, struct file * filp) |
1577 | 1400 | { |
1578 | 1401 | struct block_device *bdev = I_BDEV(filp->f_mapping->host); |
1579 | - if (bdev->bd_holder == filp) | |
1580 | - bd_release(bdev); | |
1402 | + | |
1581 | 1403 | return blkdev_put(bdev, filp->f_mode); |
1582 | 1404 | } |
1583 | 1405 | |
... | ... | @@ -1721,67 +1543,6 @@ |
1721 | 1543 | goto out; |
1722 | 1544 | } |
1723 | 1545 | EXPORT_SYMBOL(lookup_bdev); |
1724 | - | |
1725 | -/** | |
1726 | - * open_bdev_exclusive - open a block device by name and set it up for use | |
1727 | - * | |
1728 | - * @path: special file representing the block device | |
1729 | - * @mode: FMODE_... combination to pass be used | |
1730 | - * @holder: owner for exclusion | |
1731 | - * | |
1732 | - * Open the blockdevice described by the special file at @path, claim it | |
1733 | - * for the @holder. | |
1734 | - */ | |
1735 | -struct block_device *open_bdev_exclusive(const char *path, fmode_t mode, void *holder) | |
1736 | -{ | |
1737 | - struct block_device *bdev, *whole; | |
1738 | - int error; | |
1739 | - | |
1740 | - bdev = lookup_bdev(path); | |
1741 | - if (IS_ERR(bdev)) | |
1742 | - return bdev; | |
1743 | - | |
1744 | - whole = bd_start_claiming(bdev, holder); | |
1745 | - if (IS_ERR(whole)) { | |
1746 | - bdput(bdev); | |
1747 | - return whole; | |
1748 | - } | |
1749 | - | |
1750 | - error = blkdev_get(bdev, mode); | |
1751 | - if (error) | |
1752 | - goto out_abort_claiming; | |
1753 | - | |
1754 | - error = -EACCES; | |
1755 | - if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) | |
1756 | - goto out_blkdev_put; | |
1757 | - | |
1758 | - bd_finish_claiming(bdev, whole, holder); | |
1759 | - return bdev; | |
1760 | - | |
1761 | -out_blkdev_put: | |
1762 | - blkdev_put(bdev, mode); | |
1763 | -out_abort_claiming: | |
1764 | - bd_abort_claiming(whole, holder); | |
1765 | - return ERR_PTR(error); | |
1766 | -} | |
1767 | - | |
1768 | -EXPORT_SYMBOL(open_bdev_exclusive); | |
1769 | - | |
1770 | -/** | |
1771 | - * close_bdev_exclusive - close a blockdevice opened by open_bdev_exclusive() | |
1772 | - * | |
1773 | - * @bdev: blockdevice to close | |
1774 | - * @mode: mode, must match that used to open. | |
1775 | - * | |
1776 | - * This is the counterpart to open_bdev_exclusive(). | |
1777 | - */ | |
1778 | -void close_bdev_exclusive(struct block_device *bdev, fmode_t mode) | |
1779 | -{ | |
1780 | - bd_release(bdev); | |
1781 | - blkdev_put(bdev, mode); | |
1782 | -} | |
1783 | - | |
1784 | -EXPORT_SYMBOL(close_bdev_exclusive); | |
1785 | 1546 | |
1786 | 1547 | int __invalidate_device(struct block_device *bdev) |
1787 | 1548 | { |
fs/btrfs/volumes.c
... | ... | @@ -493,7 +493,7 @@ |
493 | 493 | continue; |
494 | 494 | |
495 | 495 | if (device->bdev) { |
496 | - close_bdev_exclusive(device->bdev, device->mode); | |
496 | + blkdev_put(device->bdev, device->mode); | |
497 | 497 | device->bdev = NULL; |
498 | 498 | fs_devices->open_devices--; |
499 | 499 | } |
... | ... | @@ -527,7 +527,7 @@ |
527 | 527 | |
528 | 528 | list_for_each_entry(device, &fs_devices->devices, dev_list) { |
529 | 529 | if (device->bdev) { |
530 | - close_bdev_exclusive(device->bdev, device->mode); | |
530 | + blkdev_put(device->bdev, device->mode); | |
531 | 531 | fs_devices->open_devices--; |
532 | 532 | } |
533 | 533 | if (device->writeable) { |
534 | 534 | |
... | ... | @@ -584,13 +584,15 @@ |
584 | 584 | int seeding = 1; |
585 | 585 | int ret = 0; |
586 | 586 | |
587 | + flags |= FMODE_EXCL; | |
588 | + | |
587 | 589 | list_for_each_entry(device, head, dev_list) { |
588 | 590 | if (device->bdev) |
589 | 591 | continue; |
590 | 592 | if (!device->name) |
591 | 593 | continue; |
592 | 594 | |
593 | - bdev = open_bdev_exclusive(device->name, flags, holder); | |
595 | + bdev = blkdev_get_by_path(device->name, flags, holder); | |
594 | 596 | if (IS_ERR(bdev)) { |
595 | 597 | printk(KERN_INFO "open %s failed\n", device->name); |
596 | 598 | goto error; |
... | ... | @@ -642,7 +644,7 @@ |
642 | 644 | error_brelse: |
643 | 645 | brelse(bh); |
644 | 646 | error_close: |
645 | - close_bdev_exclusive(bdev, FMODE_READ); | |
647 | + blkdev_put(bdev, flags); | |
646 | 648 | error: |
647 | 649 | continue; |
648 | 650 | } |
... | ... | @@ -688,7 +690,8 @@ |
688 | 690 | |
689 | 691 | mutex_lock(&uuid_mutex); |
690 | 692 | |
691 | - bdev = open_bdev_exclusive(path, flags, holder); | |
693 | + flags |= FMODE_EXCL; | |
694 | + bdev = blkdev_get_by_path(path, flags, holder); | |
692 | 695 | |
693 | 696 | if (IS_ERR(bdev)) { |
694 | 697 | ret = PTR_ERR(bdev); |
... | ... | @@ -720,7 +723,7 @@ |
720 | 723 | |
721 | 724 | brelse(bh); |
722 | 725 | error_close: |
723 | - close_bdev_exclusive(bdev, flags); | |
726 | + blkdev_put(bdev, flags); | |
724 | 727 | error: |
725 | 728 | mutex_unlock(&uuid_mutex); |
726 | 729 | return ret; |
... | ... | @@ -1183,8 +1186,8 @@ |
1183 | 1186 | goto out; |
1184 | 1187 | } |
1185 | 1188 | } else { |
1186 | - bdev = open_bdev_exclusive(device_path, FMODE_READ, | |
1187 | - root->fs_info->bdev_holder); | |
1189 | + bdev = blkdev_get_by_path(device_path, FMODE_READ | FMODE_EXCL, | |
1190 | + root->fs_info->bdev_holder); | |
1188 | 1191 | if (IS_ERR(bdev)) { |
1189 | 1192 | ret = PTR_ERR(bdev); |
1190 | 1193 | goto out; |
... | ... | @@ -1251,7 +1254,7 @@ |
1251 | 1254 | root->fs_info->fs_devices->latest_bdev = next_device->bdev; |
1252 | 1255 | |
1253 | 1256 | if (device->bdev) { |
1254 | - close_bdev_exclusive(device->bdev, device->mode); | |
1257 | + blkdev_put(device->bdev, device->mode); | |
1255 | 1258 | device->bdev = NULL; |
1256 | 1259 | device->fs_devices->open_devices--; |
1257 | 1260 | } |
... | ... | @@ -1294,7 +1297,7 @@ |
1294 | 1297 | brelse(bh); |
1295 | 1298 | error_close: |
1296 | 1299 | if (bdev) |
1297 | - close_bdev_exclusive(bdev, FMODE_READ); | |
1300 | + blkdev_put(bdev, FMODE_READ | FMODE_EXCL); | |
1298 | 1301 | out: |
1299 | 1302 | mutex_unlock(&root->fs_info->volume_mutex); |
1300 | 1303 | mutex_unlock(&uuid_mutex); |
... | ... | @@ -1446,7 +1449,8 @@ |
1446 | 1449 | if ((sb->s_flags & MS_RDONLY) && !root->fs_info->fs_devices->seeding) |
1447 | 1450 | return -EINVAL; |
1448 | 1451 | |
1449 | - bdev = open_bdev_exclusive(device_path, 0, root->fs_info->bdev_holder); | |
1452 | + bdev = blkdev_get_by_path(device_path, FMODE_EXCL, | |
1453 | + root->fs_info->bdev_holder); | |
1450 | 1454 | if (IS_ERR(bdev)) |
1451 | 1455 | return PTR_ERR(bdev); |
1452 | 1456 | |
... | ... | @@ -1572,7 +1576,7 @@ |
1572 | 1576 | mutex_unlock(&root->fs_info->volume_mutex); |
1573 | 1577 | return ret; |
1574 | 1578 | error: |
1575 | - close_bdev_exclusive(bdev, 0); | |
1579 | + blkdev_put(bdev, FMODE_EXCL); | |
1576 | 1580 | if (seeding_dev) { |
1577 | 1581 | mutex_unlock(&uuid_mutex); |
1578 | 1582 | up_write(&sb->s_umount); |
fs/btrfs/volumes.h
fs/char_dev.c
fs/ext3/super.c
... | ... | @@ -364,7 +364,7 @@ |
364 | 364 | struct block_device *bdev; |
365 | 365 | char b[BDEVNAME_SIZE]; |
366 | 366 | |
367 | - bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE); | |
367 | + bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb); | |
368 | 368 | if (IS_ERR(bdev)) |
369 | 369 | goto fail; |
370 | 370 | return bdev; |
... | ... | @@ -381,8 +381,7 @@ |
381 | 381 | */ |
382 | 382 | static int ext3_blkdev_put(struct block_device *bdev) |
383 | 383 | { |
384 | - bd_release(bdev); | |
385 | - return blkdev_put(bdev, FMODE_READ|FMODE_WRITE); | |
384 | + return blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); | |
386 | 385 | } |
387 | 386 | |
388 | 387 | static int ext3_blkdev_remove(struct ext3_sb_info *sbi) |
... | ... | @@ -2161,13 +2160,6 @@ |
2161 | 2160 | bdev = ext3_blkdev_get(j_dev, sb); |
2162 | 2161 | if (bdev == NULL) |
2163 | 2162 | return NULL; |
2164 | - | |
2165 | - if (bd_claim(bdev, sb)) { | |
2166 | - ext3_msg(sb, KERN_ERR, | |
2167 | - "error: failed to claim external journal device"); | |
2168 | - blkdev_put(bdev, FMODE_READ|FMODE_WRITE); | |
2169 | - return NULL; | |
2170 | - } | |
2171 | 2163 | |
2172 | 2164 | blocksize = sb->s_blocksize; |
2173 | 2165 | hblock = bdev_logical_block_size(bdev); |
fs/ext4/super.c
... | ... | @@ -657,7 +657,7 @@ |
657 | 657 | struct block_device *bdev; |
658 | 658 | char b[BDEVNAME_SIZE]; |
659 | 659 | |
660 | - bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE); | |
660 | + bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb); | |
661 | 661 | if (IS_ERR(bdev)) |
662 | 662 | goto fail; |
663 | 663 | return bdev; |
... | ... | @@ -673,8 +673,7 @@ |
673 | 673 | */ |
674 | 674 | static int ext4_blkdev_put(struct block_device *bdev) |
675 | 675 | { |
676 | - bd_release(bdev); | |
677 | - return blkdev_put(bdev, FMODE_READ|FMODE_WRITE); | |
676 | + return blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); | |
678 | 677 | } |
679 | 678 | |
680 | 679 | static int ext4_blkdev_remove(struct ext4_sb_info *sbi) |
... | ... | @@ -3777,13 +3776,6 @@ |
3777 | 3776 | bdev = ext4_blkdev_get(j_dev, sb); |
3778 | 3777 | if (bdev == NULL) |
3779 | 3778 | return NULL; |
3780 | - | |
3781 | - if (bd_claim(bdev, sb)) { | |
3782 | - ext4_msg(sb, KERN_ERR, | |
3783 | - "failed to claim external journal device"); | |
3784 | - blkdev_put(bdev, FMODE_READ|FMODE_WRITE); | |
3785 | - return NULL; | |
3786 | - } | |
3787 | 3779 | |
3788 | 3780 | blocksize = sb->s_blocksize; |
3789 | 3781 | hblock = bdev_logical_block_size(bdev); |
fs/gfs2/ops_fstype.c
... | ... | @@ -1268,7 +1268,7 @@ |
1268 | 1268 | { |
1269 | 1269 | struct block_device *bdev; |
1270 | 1270 | struct super_block *s; |
1271 | - fmode_t mode = FMODE_READ; | |
1271 | + fmode_t mode = FMODE_READ | FMODE_EXCL; | |
1272 | 1272 | int error; |
1273 | 1273 | struct gfs2_args args; |
1274 | 1274 | struct gfs2_sbd *sdp; |
... | ... | @@ -1276,7 +1276,7 @@ |
1276 | 1276 | if (!(flags & MS_RDONLY)) |
1277 | 1277 | mode |= FMODE_WRITE; |
1278 | 1278 | |
1279 | - bdev = open_bdev_exclusive(dev_name, mode, fs_type); | |
1279 | + bdev = blkdev_get_by_path(dev_name, mode, fs_type); | |
1280 | 1280 | if (IS_ERR(bdev)) |
1281 | 1281 | return ERR_CAST(bdev); |
1282 | 1282 | |
... | ... | @@ -1298,7 +1298,7 @@ |
1298 | 1298 | goto error_bdev; |
1299 | 1299 | |
1300 | 1300 | if (s->s_root) |
1301 | - close_bdev_exclusive(bdev, mode); | |
1301 | + blkdev_put(bdev, mode); | |
1302 | 1302 | |
1303 | 1303 | memset(&args, 0, sizeof(args)); |
1304 | 1304 | args.ar_quota = GFS2_QUOTA_DEFAULT; |
... | ... | @@ -1342,7 +1342,7 @@ |
1342 | 1342 | deactivate_locked_super(s); |
1343 | 1343 | return ERR_PTR(error); |
1344 | 1344 | error_bdev: |
1345 | - close_bdev_exclusive(bdev, mode); | |
1345 | + blkdev_put(bdev, mode); | |
1346 | 1346 | return ERR_PTR(error); |
1347 | 1347 | } |
1348 | 1348 |
fs/jfs/jfs_logmgr.c
... | ... | @@ -1120,16 +1120,13 @@ |
1120 | 1120 | * file systems to log may have n-to-1 relationship; |
1121 | 1121 | */ |
1122 | 1122 | |
1123 | - bdev = open_by_devnum(sbi->logdev, FMODE_READ|FMODE_WRITE); | |
1123 | + bdev = blkdev_get_by_dev(sbi->logdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, | |
1124 | + log); | |
1124 | 1125 | if (IS_ERR(bdev)) { |
1125 | 1126 | rc = -PTR_ERR(bdev); |
1126 | 1127 | goto free; |
1127 | 1128 | } |
1128 | 1129 | |
1129 | - if ((rc = bd_claim(bdev, log))) { | |
1130 | - goto close; | |
1131 | - } | |
1132 | - | |
1133 | 1130 | log->bdev = bdev; |
1134 | 1131 | memcpy(log->uuid, sbi->loguuid, sizeof(log->uuid)); |
1135 | 1132 | |
... | ... | @@ -1137,7 +1134,7 @@ |
1137 | 1134 | * initialize log: |
1138 | 1135 | */ |
1139 | 1136 | if ((rc = lmLogInit(log))) |
1140 | - goto unclaim; | |
1137 | + goto close; | |
1141 | 1138 | |
1142 | 1139 | list_add(&log->journal_list, &jfs_external_logs); |
1143 | 1140 | |
1144 | 1141 | |
... | ... | @@ -1163,11 +1160,8 @@ |
1163 | 1160 | list_del(&log->journal_list); |
1164 | 1161 | lbmLogShutdown(log); |
1165 | 1162 | |
1166 | - unclaim: | |
1167 | - bd_release(bdev); | |
1168 | - | |
1169 | 1163 | close: /* close external log device */ |
1170 | - blkdev_put(bdev, FMODE_READ|FMODE_WRITE); | |
1164 | + blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); | |
1171 | 1165 | |
1172 | 1166 | free: /* free log descriptor */ |
1173 | 1167 | mutex_unlock(&jfs_log_mutex); |
... | ... | @@ -1512,8 +1506,7 @@ |
1512 | 1506 | bdev = log->bdev; |
1513 | 1507 | rc = lmLogShutdown(log); |
1514 | 1508 | |
1515 | - bd_release(bdev); | |
1516 | - blkdev_put(bdev, FMODE_READ|FMODE_WRITE); | |
1509 | + blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); | |
1517 | 1510 | |
1518 | 1511 | kfree(log); |
1519 | 1512 |
fs/logfs/dev_bdev.c
... | ... | @@ -300,7 +300,7 @@ |
300 | 300 | |
301 | 301 | static void bdev_put_device(struct logfs_super *s) |
302 | 302 | { |
303 | - close_bdev_exclusive(s->s_bdev, FMODE_READ|FMODE_WRITE); | |
303 | + blkdev_put(s->s_bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); | |
304 | 304 | } |
305 | 305 | |
306 | 306 | static int bdev_can_write_buf(struct super_block *sb, u64 ofs) |
307 | 307 | |
... | ... | @@ -325,13 +325,14 @@ |
325 | 325 | { |
326 | 326 | struct block_device *bdev; |
327 | 327 | |
328 | - bdev = open_bdev_exclusive(devname, FMODE_READ|FMODE_WRITE, type); | |
328 | + bdev = blkdev_get_by_path(devname, FMODE_READ|FMODE_WRITE|FMODE_EXCL, | |
329 | + type); | |
329 | 330 | if (IS_ERR(bdev)) |
330 | 331 | return PTR_ERR(bdev); |
331 | 332 | |
332 | 333 | if (MAJOR(bdev->bd_dev) == MTD_BLOCK_MAJOR) { |
333 | 334 | int mtdnr = MINOR(bdev->bd_dev); |
334 | - close_bdev_exclusive(bdev, FMODE_READ|FMODE_WRITE); | |
335 | + blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); | |
335 | 336 | return logfs_get_sb_mtd(p, mtdnr); |
336 | 337 | } |
337 | 338 |
fs/nfsd/vfs.c
... | ... | @@ -845,11 +845,6 @@ |
845 | 845 | struct page **pp = rqstp->rq_respages + rqstp->rq_resused; |
846 | 846 | struct page *page = buf->page; |
847 | 847 | size_t size; |
848 | - int ret; | |
849 | - | |
850 | - ret = buf->ops->confirm(pipe, buf); | |
851 | - if (unlikely(ret)) | |
852 | - return ret; | |
853 | 848 | |
854 | 849 | size = sd->len; |
855 | 850 |
fs/nilfs2/super.c
... | ... | @@ -1163,14 +1163,14 @@ |
1163 | 1163 | { |
1164 | 1164 | struct nilfs_super_data sd; |
1165 | 1165 | struct super_block *s; |
1166 | - fmode_t mode = FMODE_READ; | |
1166 | + fmode_t mode = FMODE_READ | FMODE_EXCL; | |
1167 | 1167 | struct dentry *root_dentry; |
1168 | 1168 | int err, s_new = false; |
1169 | 1169 | |
1170 | 1170 | if (!(flags & MS_RDONLY)) |
1171 | 1171 | mode |= FMODE_WRITE; |
1172 | 1172 | |
1173 | - sd.bdev = open_bdev_exclusive(dev_name, mode, fs_type); | |
1173 | + sd.bdev = blkdev_get_by_path(dev_name, mode, fs_type); | |
1174 | 1174 | if (IS_ERR(sd.bdev)) |
1175 | 1175 | return ERR_CAST(sd.bdev); |
1176 | 1176 | |
... | ... | @@ -1249,7 +1249,7 @@ |
1249 | 1249 | } |
1250 | 1250 | |
1251 | 1251 | if (!s_new) |
1252 | - close_bdev_exclusive(sd.bdev, mode); | |
1252 | + blkdev_put(sd.bdev, mode); | |
1253 | 1253 | |
1254 | 1254 | return root_dentry; |
1255 | 1255 | |
... | ... | @@ -1258,7 +1258,7 @@ |
1258 | 1258 | |
1259 | 1259 | failed: |
1260 | 1260 | if (!s_new) |
1261 | - close_bdev_exclusive(sd.bdev, mode); | |
1261 | + blkdev_put(sd.bdev, mode); | |
1262 | 1262 | return ERR_PTR(err); |
1263 | 1263 | } |
1264 | 1264 |
fs/ocfs2/cluster/heartbeat.c
... | ... | @@ -1729,7 +1729,7 @@ |
1729 | 1729 | goto out; |
1730 | 1730 | |
1731 | 1731 | reg->hr_bdev = I_BDEV(filp->f_mapping->host); |
1732 | - ret = blkdev_get(reg->hr_bdev, FMODE_WRITE | FMODE_READ); | |
1732 | + ret = blkdev_get(reg->hr_bdev, FMODE_WRITE | FMODE_READ, NULL); | |
1733 | 1733 | if (ret) { |
1734 | 1734 | reg->hr_bdev = NULL; |
1735 | 1735 | goto out; |
fs/partitions/check.c
... | ... | @@ -237,6 +237,13 @@ |
237 | 237 | return sprintf(buf, "%llu\n",(unsigned long long)p->nr_sects); |
238 | 238 | } |
239 | 239 | |
240 | +ssize_t part_ro_show(struct device *dev, | |
241 | + struct device_attribute *attr, char *buf) | |
242 | +{ | |
243 | + struct hd_struct *p = dev_to_part(dev); | |
244 | + return sprintf(buf, "%d\n", p->policy ? 1 : 0); | |
245 | +} | |
246 | + | |
240 | 247 | ssize_t part_alignment_offset_show(struct device *dev, |
241 | 248 | struct device_attribute *attr, char *buf) |
242 | 249 | { |
... | ... | @@ -312,6 +319,7 @@ |
312 | 319 | static DEVICE_ATTR(partition, S_IRUGO, part_partition_show, NULL); |
313 | 320 | static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL); |
314 | 321 | static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL); |
322 | +static DEVICE_ATTR(ro, S_IRUGO, part_ro_show, NULL); | |
315 | 323 | static DEVICE_ATTR(alignment_offset, S_IRUGO, part_alignment_offset_show, NULL); |
316 | 324 | static DEVICE_ATTR(discard_alignment, S_IRUGO, part_discard_alignment_show, |
317 | 325 | NULL); |
... | ... | @@ -326,6 +334,7 @@ |
326 | 334 | &dev_attr_partition.attr, |
327 | 335 | &dev_attr_start.attr, |
328 | 336 | &dev_attr_size.attr, |
337 | + &dev_attr_ro.attr, | |
329 | 338 | &dev_attr_alignment_offset.attr, |
330 | 339 | &dev_attr_discard_alignment.attr, |
331 | 340 | &dev_attr_stat.attr, |
... | ... | @@ -372,6 +381,11 @@ |
372 | 381 | put_device(part_to_dev(part)); |
373 | 382 | } |
374 | 383 | |
384 | +void __delete_partition(struct hd_struct *part) | |
385 | +{ | |
386 | + call_rcu(&part->rcu_head, delete_partition_rcu_cb); | |
387 | +} | |
388 | + | |
375 | 389 | void delete_partition(struct gendisk *disk, int partno) |
376 | 390 | { |
377 | 391 | struct disk_part_tbl *ptbl = disk->part_tbl; |
... | ... | @@ -390,7 +404,7 @@ |
390 | 404 | kobject_put(part->holder_dir); |
391 | 405 | device_del(part_to_dev(part)); |
392 | 406 | |
393 | - call_rcu(&part->rcu_head, delete_partition_rcu_cb); | |
407 | + hd_struct_put(part); | |
394 | 408 | } |
395 | 409 | |
396 | 410 | static ssize_t whole_disk_show(struct device *dev, |
... | ... | @@ -489,6 +503,7 @@ |
489 | 503 | if (!dev_get_uevent_suppress(ddev)) |
490 | 504 | kobject_uevent(&pdev->kobj, KOBJ_ADD); |
491 | 505 | |
506 | + hd_ref_init(p); | |
492 | 507 | return p; |
493 | 508 | |
494 | 509 | out_free_info: |
... | ... | @@ -507,65 +522,6 @@ |
507 | 522 | return ERR_PTR(err); |
508 | 523 | } |
509 | 524 | |
510 | -/* Not exported, helper to add_disk(). */ | |
511 | -void register_disk(struct gendisk *disk) | |
512 | -{ | |
513 | - struct device *ddev = disk_to_dev(disk); | |
514 | - struct block_device *bdev; | |
515 | - struct disk_part_iter piter; | |
516 | - struct hd_struct *part; | |
517 | - int err; | |
518 | - | |
519 | - ddev->parent = disk->driverfs_dev; | |
520 | - | |
521 | - dev_set_name(ddev, disk->disk_name); | |
522 | - | |
523 | - /* delay uevents, until we scanned partition table */ | |
524 | - dev_set_uevent_suppress(ddev, 1); | |
525 | - | |
526 | - if (device_add(ddev)) | |
527 | - return; | |
528 | - if (!sysfs_deprecated) { | |
529 | - err = sysfs_create_link(block_depr, &ddev->kobj, | |
530 | - kobject_name(&ddev->kobj)); | |
531 | - if (err) { | |
532 | - device_del(ddev); | |
533 | - return; | |
534 | - } | |
535 | - } | |
536 | - disk->part0.holder_dir = kobject_create_and_add("holders", &ddev->kobj); | |
537 | - disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj); | |
538 | - | |
539 | - /* No minors to use for partitions */ | |
540 | - if (!disk_partitionable(disk)) | |
541 | - goto exit; | |
542 | - | |
543 | - /* No such device (e.g., media were just removed) */ | |
544 | - if (!get_capacity(disk)) | |
545 | - goto exit; | |
546 | - | |
547 | - bdev = bdget_disk(disk, 0); | |
548 | - if (!bdev) | |
549 | - goto exit; | |
550 | - | |
551 | - bdev->bd_invalidated = 1; | |
552 | - err = blkdev_get(bdev, FMODE_READ); | |
553 | - if (err < 0) | |
554 | - goto exit; | |
555 | - blkdev_put(bdev, FMODE_READ); | |
556 | - | |
557 | -exit: | |
558 | - /* announce disk after possible partitions are created */ | |
559 | - dev_set_uevent_suppress(ddev, 0); | |
560 | - kobject_uevent(&ddev->kobj, KOBJ_ADD); | |
561 | - | |
562 | - /* announce possible partitions */ | |
563 | - disk_part_iter_init(&piter, disk, 0); | |
564 | - while ((part = disk_part_iter_next(&piter))) | |
565 | - kobject_uevent(&part_to_dev(part)->kobj, KOBJ_ADD); | |
566 | - disk_part_iter_exit(&piter); | |
567 | -} | |
568 | - | |
569 | 525 | static bool disk_unlock_native_capacity(struct gendisk *disk) |
570 | 526 | { |
571 | 527 | const struct block_device_operations *bdops = disk->fops; |
... | ... | @@ -728,34 +684,4 @@ |
728 | 684 | } |
729 | 685 | |
730 | 686 | EXPORT_SYMBOL(read_dev_sector); |
731 | - | |
732 | -void del_gendisk(struct gendisk *disk) | |
733 | -{ | |
734 | - struct disk_part_iter piter; | |
735 | - struct hd_struct *part; | |
736 | - | |
737 | - /* invalidate stuff */ | |
738 | - disk_part_iter_init(&piter, disk, | |
739 | - DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE); | |
740 | - while ((part = disk_part_iter_next(&piter))) { | |
741 | - invalidate_partition(disk, part->partno); | |
742 | - delete_partition(disk, part->partno); | |
743 | - } | |
744 | - disk_part_iter_exit(&piter); | |
745 | - | |
746 | - invalidate_partition(disk, 0); | |
747 | - blk_free_devt(disk_to_dev(disk)->devt); | |
748 | - set_capacity(disk, 0); | |
749 | - disk->flags &= ~GENHD_FL_UP; | |
750 | - unlink_gendisk(disk); | |
751 | - part_stat_set_all(&disk->part0, 0); | |
752 | - disk->part0.stamp = 0; | |
753 | - | |
754 | - kobject_put(disk->part0.holder_dir); | |
755 | - kobject_put(disk->slave_dir); | |
756 | - disk->driverfs_dev = NULL; | |
757 | - if (!sysfs_deprecated) | |
758 | - sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk))); | |
759 | - device_del(disk_to_dev(disk)); | |
760 | -} |
fs/reiserfs/journal.c
... | ... | @@ -2551,8 +2551,6 @@ |
2551 | 2551 | result = 0; |
2552 | 2552 | |
2553 | 2553 | if (journal->j_dev_bd != NULL) { |
2554 | - if (journal->j_dev_bd->bd_dev != super->s_dev) | |
2555 | - bd_release(journal->j_dev_bd); | |
2556 | 2554 | result = blkdev_put(journal->j_dev_bd, journal->j_dev_mode); |
2557 | 2555 | journal->j_dev_bd = NULL; |
2558 | 2556 | } |
... | ... | @@ -2570,7 +2568,7 @@ |
2570 | 2568 | { |
2571 | 2569 | int result; |
2572 | 2570 | dev_t jdev; |
2573 | - fmode_t blkdev_mode = FMODE_READ | FMODE_WRITE; | |
2571 | + fmode_t blkdev_mode = FMODE_READ | FMODE_WRITE | FMODE_EXCL; | |
2574 | 2572 | char b[BDEVNAME_SIZE]; |
2575 | 2573 | |
2576 | 2574 | result = 0; |
... | ... | @@ -2584,7 +2582,10 @@ |
2584 | 2582 | |
2585 | 2583 | /* there is no "jdev" option and journal is on separate device */ |
2586 | 2584 | if ((!jdev_name || !jdev_name[0])) { |
2587 | - journal->j_dev_bd = open_by_devnum(jdev, blkdev_mode); | |
2585 | + if (jdev == super->s_dev) | |
2586 | + blkdev_mode &= ~FMODE_EXCL; | |
2587 | + journal->j_dev_bd = blkdev_get_by_dev(jdev, blkdev_mode, | |
2588 | + journal); | |
2588 | 2589 | journal->j_dev_mode = blkdev_mode; |
2589 | 2590 | if (IS_ERR(journal->j_dev_bd)) { |
2590 | 2591 | result = PTR_ERR(journal->j_dev_bd); |
2591 | 2592 | |
2592 | 2593 | |
... | ... | @@ -2593,22 +2594,14 @@ |
2593 | 2594 | "cannot init journal device '%s': %i", |
2594 | 2595 | __bdevname(jdev, b), result); |
2595 | 2596 | return result; |
2596 | - } else if (jdev != super->s_dev) { | |
2597 | - result = bd_claim(journal->j_dev_bd, journal); | |
2598 | - if (result) { | |
2599 | - blkdev_put(journal->j_dev_bd, blkdev_mode); | |
2600 | - return result; | |
2601 | - } | |
2602 | - | |
2597 | + } else if (jdev != super->s_dev) | |
2603 | 2598 | set_blocksize(journal->j_dev_bd, super->s_blocksize); |
2604 | - } | |
2605 | 2599 | |
2606 | 2600 | return 0; |
2607 | 2601 | } |
2608 | 2602 | |
2609 | 2603 | journal->j_dev_mode = blkdev_mode; |
2610 | - journal->j_dev_bd = open_bdev_exclusive(jdev_name, | |
2611 | - blkdev_mode, journal); | |
2604 | + journal->j_dev_bd = blkdev_get_by_path(jdev_name, blkdev_mode, journal); | |
2612 | 2605 | if (IS_ERR(journal->j_dev_bd)) { |
2613 | 2606 | result = PTR_ERR(journal->j_dev_bd); |
2614 | 2607 | journal->j_dev_bd = NULL; |
fs/splice.c
... | ... | @@ -682,19 +682,14 @@ |
682 | 682 | { |
683 | 683 | struct file *file = sd->u.file; |
684 | 684 | loff_t pos = sd->pos; |
685 | - int ret, more; | |
685 | + int more; | |
686 | 686 | |
687 | - ret = buf->ops->confirm(pipe, buf); | |
688 | - if (!ret) { | |
689 | - more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len; | |
690 | - if (file->f_op && file->f_op->sendpage) | |
691 | - ret = file->f_op->sendpage(file, buf->page, buf->offset, | |
692 | - sd->len, &pos, more); | |
693 | - else | |
694 | - ret = -EINVAL; | |
695 | - } | |
687 | + if (!likely(file->f_op && file->f_op->sendpage)) | |
688 | + return -EINVAL; | |
696 | 689 | |
697 | - return ret; | |
690 | + more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len; | |
691 | + return file->f_op->sendpage(file, buf->page, buf->offset, | |
692 | + sd->len, &pos, more); | |
698 | 693 | } |
699 | 694 | |
700 | 695 | /* |
... | ... | @@ -727,13 +722,6 @@ |
727 | 722 | void *fsdata; |
728 | 723 | int ret; |
729 | 724 | |
730 | - /* | |
731 | - * make sure the data in this buffer is uptodate | |
732 | - */ | |
733 | - ret = buf->ops->confirm(pipe, buf); | |
734 | - if (unlikely(ret)) | |
735 | - return ret; | |
736 | - | |
737 | 725 | offset = sd->pos & ~PAGE_CACHE_MASK; |
738 | 726 | |
739 | 727 | this_len = sd->len; |
740 | 728 | |
... | ... | @@ -805,12 +793,17 @@ |
805 | 793 | if (sd->len > sd->total_len) |
806 | 794 | sd->len = sd->total_len; |
807 | 795 | |
808 | - ret = actor(pipe, buf, sd); | |
809 | - if (ret <= 0) { | |
796 | + ret = buf->ops->confirm(pipe, buf); | |
797 | + if (unlikely(ret)) { | |
810 | 798 | if (ret == -ENODATA) |
811 | 799 | ret = 0; |
812 | 800 | return ret; |
813 | 801 | } |
802 | + | |
803 | + ret = actor(pipe, buf, sd); | |
804 | + if (ret <= 0) | |
805 | + return ret; | |
806 | + | |
814 | 807 | buf->offset += ret; |
815 | 808 | buf->len -= ret; |
816 | 809 | |
... | ... | @@ -1044,10 +1037,6 @@ |
1044 | 1037 | int ret; |
1045 | 1038 | void *data; |
1046 | 1039 | |
1047 | - ret = buf->ops->confirm(pipe, buf); | |
1048 | - if (ret) | |
1049 | - return ret; | |
1050 | - | |
1051 | 1040 | data = buf->ops->map(pipe, buf, 0); |
1052 | 1041 | ret = kernel_write(sd->u.file, data + buf->offset, sd->len, sd->pos); |
1053 | 1042 | buf->ops->unmap(pipe, buf, data); |
... | ... | @@ -1494,10 +1483,6 @@ |
1494 | 1483 | { |
1495 | 1484 | char *src; |
1496 | 1485 | int ret; |
1497 | - | |
1498 | - ret = buf->ops->confirm(pipe, buf); | |
1499 | - if (unlikely(ret)) | |
1500 | - return ret; | |
1501 | 1486 | |
1502 | 1487 | /* |
1503 | 1488 | * See if we can use the atomic maps, by prefaulting in the |
fs/super.c
... | ... | @@ -767,13 +767,13 @@ |
767 | 767 | { |
768 | 768 | struct block_device *bdev; |
769 | 769 | struct super_block *s; |
770 | - fmode_t mode = FMODE_READ; | |
770 | + fmode_t mode = FMODE_READ | FMODE_EXCL; | |
771 | 771 | int error = 0; |
772 | 772 | |
773 | 773 | if (!(flags & MS_RDONLY)) |
774 | 774 | mode |= FMODE_WRITE; |
775 | 775 | |
776 | - bdev = open_bdev_exclusive(dev_name, mode, fs_type); | |
776 | + bdev = blkdev_get_by_path(dev_name, mode, fs_type); | |
777 | 777 | if (IS_ERR(bdev)) |
778 | 778 | return ERR_CAST(bdev); |
779 | 779 | |
780 | 780 | |
... | ... | @@ -802,13 +802,13 @@ |
802 | 802 | |
803 | 803 | /* |
804 | 804 | * s_umount nests inside bd_mutex during |
805 | - * __invalidate_device(). close_bdev_exclusive() | |
806 | - * acquires bd_mutex and can't be called under | |
807 | - * s_umount. Drop s_umount temporarily. This is safe | |
808 | - * as we're holding an active reference. | |
805 | + * __invalidate_device(). blkdev_put() acquires | |
806 | + * bd_mutex and can't be called under s_umount. Drop | |
807 | + * s_umount temporarily. This is safe as we're | |
808 | + * holding an active reference. | |
809 | 809 | */ |
810 | 810 | up_write(&s->s_umount); |
811 | - close_bdev_exclusive(bdev, mode); | |
811 | + blkdev_put(bdev, mode); | |
812 | 812 | down_write(&s->s_umount); |
813 | 813 | } else { |
814 | 814 | char b[BDEVNAME_SIZE]; |
... | ... | @@ -832,7 +832,7 @@ |
832 | 832 | error_s: |
833 | 833 | error = PTR_ERR(s); |
834 | 834 | error_bdev: |
835 | - close_bdev_exclusive(bdev, mode); | |
835 | + blkdev_put(bdev, mode); | |
836 | 836 | error: |
837 | 837 | return ERR_PTR(error); |
838 | 838 | } |
... | ... | @@ -863,7 +863,8 @@ |
863 | 863 | bdev->bd_super = NULL; |
864 | 864 | generic_shutdown_super(sb); |
865 | 865 | sync_blockdev(bdev); |
866 | - close_bdev_exclusive(bdev, mode); | |
866 | + WARN_ON_ONCE(!(mode & FMODE_EXCL)); | |
867 | + blkdev_put(bdev, mode | FMODE_EXCL); | |
867 | 868 | } |
868 | 869 | |
869 | 870 | EXPORT_SYMBOL(kill_block_super); |
fs/xfs/linux-2.6/xfs_super.c
... | ... | @@ -606,7 +606,8 @@ |
606 | 606 | { |
607 | 607 | int error = 0; |
608 | 608 | |
609 | - *bdevp = open_bdev_exclusive(name, FMODE_READ|FMODE_WRITE, mp); | |
609 | + *bdevp = blkdev_get_by_path(name, FMODE_READ|FMODE_WRITE|FMODE_EXCL, | |
610 | + mp); | |
610 | 611 | if (IS_ERR(*bdevp)) { |
611 | 612 | error = PTR_ERR(*bdevp); |
612 | 613 | printk("XFS: Invalid device [%s], error=%d\n", name, error); |
... | ... | @@ -620,7 +621,7 @@ |
620 | 621 | struct block_device *bdev) |
621 | 622 | { |
622 | 623 | if (bdev) |
623 | - close_bdev_exclusive(bdev, FMODE_READ|FMODE_WRITE); | |
624 | + blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); | |
624 | 625 | } |
625 | 626 | |
626 | 627 | /* |
include/linux/blkdev.h
... | ... | @@ -115,6 +115,7 @@ |
115 | 115 | void *elevator_private3; |
116 | 116 | |
117 | 117 | struct gendisk *rq_disk; |
118 | + struct hd_struct *part; | |
118 | 119 | unsigned long start_time; |
119 | 120 | #ifdef CONFIG_BLK_CGROUP |
120 | 121 | unsigned long long start_time_ns; |
... | ... | @@ -646,7 +647,6 @@ |
646 | 647 | |
647 | 648 | extern int blk_register_queue(struct gendisk *disk); |
648 | 649 | extern void blk_unregister_queue(struct gendisk *disk); |
649 | -extern void register_disk(struct gendisk *dev); | |
650 | 650 | extern void generic_make_request(struct bio *bio); |
651 | 651 | extern void blk_rq_init(struct request_queue *q, struct request *rq); |
652 | 652 | extern void blk_put_request(struct request *); |
... | ... | @@ -1256,6 +1256,9 @@ |
1256 | 1256 | int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); |
1257 | 1257 | int (*direct_access) (struct block_device *, sector_t, |
1258 | 1258 | void **, unsigned long *); |
1259 | + unsigned int (*check_events) (struct gendisk *disk, | |
1260 | + unsigned int clearing); | |
1261 | + /* ->media_changed() is DEPRECATED, use ->check_events() instead */ | |
1259 | 1262 | int (*media_changed) (struct gendisk *); |
1260 | 1263 | void (*unlock_native_capacity) (struct gendisk *); |
1261 | 1264 | int (*revalidate_disk) (struct gendisk *); |
include/linux/cdrom.h
... | ... | @@ -946,6 +946,8 @@ |
946 | 946 | /* device-related storage */ |
947 | 947 | unsigned int options : 30; /* options flags */ |
948 | 948 | unsigned mc_flags : 2; /* media change buffer flags */ |
949 | + unsigned int vfs_events; /* cached events for vfs path */ | |
950 | + unsigned int ioctl_events; /* cached events for ioctl path */ | |
949 | 951 | int use_count; /* number of times device opened */ |
950 | 952 | char name[20]; /* name of the device type */ |
951 | 953 | /* per-device flags */ |
... | ... | @@ -965,6 +967,8 @@ |
965 | 967 | int (*open) (struct cdrom_device_info *, int); |
966 | 968 | void (*release) (struct cdrom_device_info *); |
967 | 969 | int (*drive_status) (struct cdrom_device_info *, int); |
970 | + unsigned int (*check_events) (struct cdrom_device_info *cdi, | |
971 | + unsigned int clearing, int slot); | |
968 | 972 | int (*media_changed) (struct cdrom_device_info *, int); |
969 | 973 | int (*tray_move) (struct cdrom_device_info *, int); |
970 | 974 | int (*lock_door) (struct cdrom_device_info *, int); |
... | ... | @@ -993,6 +997,8 @@ |
993 | 997 | extern void cdrom_release(struct cdrom_device_info *cdi, fmode_t mode); |
994 | 998 | extern int cdrom_ioctl(struct cdrom_device_info *cdi, struct block_device *bdev, |
995 | 999 | fmode_t mode, unsigned int cmd, unsigned long arg); |
1000 | +extern unsigned int cdrom_check_events(struct cdrom_device_info *cdi, | |
1001 | + unsigned int clearing); | |
996 | 1002 | extern int cdrom_media_changed(struct cdrom_device_info *); |
997 | 1003 | |
998 | 1004 | extern int register_cdrom(struct cdrom_device_info *cdi); |
include/linux/fs.h
... | ... | @@ -664,8 +664,9 @@ |
664 | 664 | void * bd_claiming; |
665 | 665 | void * bd_holder; |
666 | 666 | int bd_holders; |
667 | + bool bd_write_holder; | |
667 | 668 | #ifdef CONFIG_SYSFS |
668 | - struct list_head bd_holder_list; | |
669 | + struct gendisk * bd_holder_disk; /* for sysfs slave linkng */ | |
669 | 670 | #endif |
670 | 671 | struct block_device * bd_contains; |
671 | 672 | unsigned bd_block_size; |
... | ... | @@ -2019,7 +2020,6 @@ |
2019 | 2020 | extern void bd_set_size(struct block_device *, loff_t size); |
2020 | 2021 | extern void bd_forget(struct inode *inode); |
2021 | 2022 | extern void bdput(struct block_device *); |
2022 | -extern struct block_device *open_by_devnum(dev_t, fmode_t); | |
2023 | 2023 | extern void invalidate_bdev(struct block_device *); |
2024 | 2024 | extern int sync_blockdev(struct block_device *bdev); |
2025 | 2025 | extern struct super_block *freeze_bdev(struct block_device *); |
2026 | 2026 | |
2027 | 2027 | |
... | ... | @@ -2050,16 +2050,20 @@ |
2050 | 2050 | extern int ioctl_by_bdev(struct block_device *, unsigned, unsigned long); |
2051 | 2051 | extern int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long); |
2052 | 2052 | extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long); |
2053 | -extern int blkdev_get(struct block_device *, fmode_t); | |
2054 | -extern int blkdev_put(struct block_device *, fmode_t); | |
2055 | -extern int bd_claim(struct block_device *, void *); | |
2056 | -extern void bd_release(struct block_device *); | |
2053 | +extern int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder); | |
2054 | +extern struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, | |
2055 | + void *holder); | |
2056 | +extern struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, | |
2057 | + void *holder); | |
2058 | +extern int blkdev_put(struct block_device *bdev, fmode_t mode); | |
2057 | 2059 | #ifdef CONFIG_SYSFS |
2058 | -extern int bd_claim_by_disk(struct block_device *, void *, struct gendisk *); | |
2059 | -extern void bd_release_from_disk(struct block_device *, struct gendisk *); | |
2060 | +extern int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk); | |
2060 | 2061 | #else |
2061 | -#define bd_claim_by_disk(bdev, holder, disk) bd_claim(bdev, holder) | |
2062 | -#define bd_release_from_disk(bdev, disk) bd_release(bdev) | |
2062 | +static inline int bd_link_disk_holder(struct block_device *bdev, | |
2063 | + struct gendisk *disk) | |
2064 | +{ | |
2065 | + return 0; | |
2066 | +} | |
2063 | 2067 | #endif |
2064 | 2068 | #endif |
2065 | 2069 | |
... | ... | @@ -2095,8 +2099,6 @@ |
2095 | 2099 | extern const char *__bdevname(dev_t, char *buffer); |
2096 | 2100 | extern const char *bdevname(struct block_device *bdev, char *buffer); |
2097 | 2101 | extern struct block_device *lookup_bdev(const char *); |
2098 | -extern struct block_device *open_bdev_exclusive(const char *, fmode_t, void *); | |
2099 | -extern void close_bdev_exclusive(struct block_device *, fmode_t); | |
2100 | 2102 | extern void blkdev_show(struct seq_file *,off_t); |
2101 | 2103 | |
2102 | 2104 | #else |
include/linux/genhd.h
... | ... | @@ -115,6 +115,7 @@ |
115 | 115 | #else |
116 | 116 | struct disk_stats dkstats; |
117 | 117 | #endif |
118 | + atomic_t ref; | |
118 | 119 | struct rcu_head rcu_head; |
119 | 120 | }; |
120 | 121 | |
... | ... | @@ -127,6 +128,11 @@ |
127 | 128 | #define GENHD_FL_EXT_DEVT 64 /* allow extended devt */ |
128 | 129 | #define GENHD_FL_NATIVE_CAPACITY 128 |
129 | 130 | |
131 | +enum { | |
132 | + DISK_EVENT_MEDIA_CHANGE = 1 << 0, /* media changed */ | |
133 | + DISK_EVENT_EJECT_REQUEST = 1 << 1, /* eject requested */ | |
134 | +}; | |
135 | + | |
130 | 136 | #define BLK_SCSI_MAX_CMDS (256) |
131 | 137 | #define BLK_SCSI_CMD_PER_LONG (BLK_SCSI_MAX_CMDS / (sizeof(long) * 8)) |
132 | 138 | |
... | ... | @@ -143,6 +149,8 @@ |
143 | 149 | struct hd_struct __rcu *part[]; |
144 | 150 | }; |
145 | 151 | |
152 | +struct disk_events; | |
153 | + | |
146 | 154 | struct gendisk { |
147 | 155 | /* major, first_minor and minors are input parameters only, |
148 | 156 | * don't use directly. Use disk_devt() and disk_max_parts(). |
... | ... | @@ -154,6 +162,10 @@ |
154 | 162 | |
155 | 163 | char disk_name[DISK_NAME_LEN]; /* name of major driver */ |
156 | 164 | char *(*devnode)(struct gendisk *gd, mode_t *mode); |
165 | + | |
166 | + unsigned int events; /* supported events */ | |
167 | + unsigned int async_events; /* async events, subset of all */ | |
168 | + | |
157 | 169 | /* Array of pointers to partitions indexed by partno. |
158 | 170 | * Protected with matching bdev lock but stat and other |
159 | 171 | * non-critical accesses use RCU. Always access through |
160 | 172 | |
... | ... | @@ -171,9 +183,8 @@ |
171 | 183 | struct kobject *slave_dir; |
172 | 184 | |
173 | 185 | struct timer_rand_state *random; |
174 | - | |
175 | 186 | atomic_t sync_io; /* RAID */ |
176 | - struct work_struct async_notify; | |
187 | + struct disk_events *ev; | |
177 | 188 | #ifdef CONFIG_BLK_DEV_INTEGRITY |
178 | 189 | struct blk_integrity *integrity; |
179 | 190 | #endif |
... | ... | @@ -395,7 +406,6 @@ |
395 | 406 | /* block/genhd.c */ |
396 | 407 | extern void add_disk(struct gendisk *disk); |
397 | 408 | extern void del_gendisk(struct gendisk *gp); |
398 | -extern void unlink_gendisk(struct gendisk *gp); | |
399 | 409 | extern struct gendisk *get_gendisk(dev_t dev, int *partno); |
400 | 410 | extern struct block_device *bdget_disk(struct gendisk *disk, int partno); |
401 | 411 | |
... | ... | @@ -407,6 +417,11 @@ |
407 | 417 | return disk->part0.policy; |
408 | 418 | } |
409 | 419 | |
420 | +extern void disk_block_events(struct gendisk *disk); | |
421 | +extern void disk_unblock_events(struct gendisk *disk); | |
422 | +extern void disk_check_events(struct gendisk *disk); | |
423 | +extern unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask); | |
424 | + | |
410 | 425 | /* drivers/char/random.c */ |
411 | 426 | extern void add_disk_randomness(struct gendisk *disk); |
412 | 427 | extern void rand_initialize_disk(struct gendisk *disk); |
... | ... | @@ -583,6 +598,7 @@ |
583 | 598 | sector_t len, int flags, |
584 | 599 | struct partition_meta_info |
585 | 600 | *info); |
601 | +extern void __delete_partition(struct hd_struct *); | |
586 | 602 | extern void delete_partition(struct gendisk *, int); |
587 | 603 | extern void printk_all_partitions(void); |
588 | 604 | |
... | ... | @@ -610,6 +626,29 @@ |
610 | 626 | struct device_attribute *attr, |
611 | 627 | const char *buf, size_t count); |
612 | 628 | #endif /* CONFIG_FAIL_MAKE_REQUEST */ |
629 | + | |
630 | +static inline void hd_ref_init(struct hd_struct *part) | |
631 | +{ | |
632 | + atomic_set(&part->ref, 1); | |
633 | + smp_mb(); | |
634 | +} | |
635 | + | |
636 | +static inline void hd_struct_get(struct hd_struct *part) | |
637 | +{ | |
638 | + atomic_inc(&part->ref); | |
639 | + smp_mb__after_atomic_inc(); | |
640 | +} | |
641 | + | |
642 | +static inline int hd_struct_try_get(struct hd_struct *part) | |
643 | +{ | |
644 | + return atomic_inc_not_zero(&part->ref); | |
645 | +} | |
646 | + | |
647 | +static inline void hd_struct_put(struct hd_struct *part) | |
648 | +{ | |
649 | + if (atomic_dec_and_test(&part->ref)) | |
650 | + __delete_partition(part); | |
651 | +} | |
613 | 652 | |
614 | 653 | #else /* CONFIG_BLOCK */ |
615 | 654 |
include/scsi/scsi.h
include/trace/events/block.h
... | ... | @@ -206,15 +206,16 @@ |
206 | 206 | * block_bio_complete - completed all work on the block operation |
207 | 207 | * @q: queue holding the block operation |
208 | 208 | * @bio: block operation completed |
209 | + * @error: io error value | |
209 | 210 | * |
210 | 211 | * This tracepoint indicates there is no further work to do on this |
211 | 212 | * block IO operation @bio. |
212 | 213 | */ |
213 | 214 | TRACE_EVENT(block_bio_complete, |
214 | 215 | |
215 | - TP_PROTO(struct request_queue *q, struct bio *bio), | |
216 | + TP_PROTO(struct request_queue *q, struct bio *bio, int error), | |
216 | 217 | |
217 | - TP_ARGS(q, bio), | |
218 | + TP_ARGS(q, bio, error), | |
218 | 219 | |
219 | 220 | TP_STRUCT__entry( |
220 | 221 | __field( dev_t, dev ) |
... | ... | @@ -228,6 +229,7 @@ |
228 | 229 | __entry->dev = bio->bi_bdev->bd_dev; |
229 | 230 | __entry->sector = bio->bi_sector; |
230 | 231 | __entry->nr_sector = bio->bi_size >> 9; |
232 | + __entry->error = error; | |
231 | 233 | blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); |
232 | 234 | ), |
233 | 235 | |
234 | 236 | |
235 | 237 | |
... | ... | @@ -486,16 +488,16 @@ |
486 | 488 | ); |
487 | 489 | |
488 | 490 | /** |
489 | - * block_remap - map request for a partition to the raw device | |
491 | + * block_bio_remap - map request for a logical device to the raw device | |
490 | 492 | * @q: queue holding the operation |
491 | 493 | * @bio: revised operation |
492 | 494 | * @dev: device for the operation |
493 | 495 | * @from: original sector for the operation |
494 | 496 | * |
495 | - * An operation for a partition on a block device has been mapped to the | |
497 | + * An operation for a logical device has been mapped to the | |
496 | 498 | * raw block device. |
497 | 499 | */ |
498 | -TRACE_EVENT(block_remap, | |
500 | +TRACE_EVENT(block_bio_remap, | |
499 | 501 | |
500 | 502 | TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev, |
501 | 503 | sector_t from), |
kernel/power/swap.c
... | ... | @@ -224,7 +224,7 @@ |
224 | 224 | return res; |
225 | 225 | |
226 | 226 | root_swap = res; |
227 | - res = blkdev_get(hib_resume_bdev, FMODE_WRITE); | |
227 | + res = blkdev_get(hib_resume_bdev, FMODE_WRITE, NULL); | |
228 | 228 | if (res) |
229 | 229 | return res; |
230 | 230 | |
... | ... | @@ -930,7 +930,8 @@ |
930 | 930 | { |
931 | 931 | int error; |
932 | 932 | |
933 | - hib_resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_READ); | |
933 | + hib_resume_bdev = blkdev_get_by_dev(swsusp_resume_device, | |
934 | + FMODE_READ, NULL); | |
934 | 935 | if (!IS_ERR(hib_resume_bdev)) { |
935 | 936 | set_blocksize(hib_resume_bdev, PAGE_SIZE); |
936 | 937 | clear_page(swsusp_header); |
kernel/trace/blktrace.c
... | ... | @@ -758,53 +758,58 @@ |
758 | 758 | * @q: queue the io is for |
759 | 759 | * @bio: the source bio |
760 | 760 | * @what: the action |
761 | + * @error: error, if any | |
761 | 762 | * |
762 | 763 | * Description: |
763 | 764 | * Records an action against a bio. Will log the bio offset + size. |
764 | 765 | * |
765 | 766 | **/ |
766 | 767 | static void blk_add_trace_bio(struct request_queue *q, struct bio *bio, |
767 | - u32 what) | |
768 | + u32 what, int error) | |
768 | 769 | { |
769 | 770 | struct blk_trace *bt = q->blk_trace; |
770 | 771 | |
771 | 772 | if (likely(!bt)) |
772 | 773 | return; |
773 | 774 | |
775 | + if (!error && !bio_flagged(bio, BIO_UPTODATE)) | |
776 | + error = EIO; | |
777 | + | |
774 | 778 | __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what, |
775 | - !bio_flagged(bio, BIO_UPTODATE), 0, NULL); | |
779 | + error, 0, NULL); | |
776 | 780 | } |
777 | 781 | |
778 | 782 | static void blk_add_trace_bio_bounce(void *ignore, |
779 | 783 | struct request_queue *q, struct bio *bio) |
780 | 784 | { |
781 | - blk_add_trace_bio(q, bio, BLK_TA_BOUNCE); | |
785 | + blk_add_trace_bio(q, bio, BLK_TA_BOUNCE, 0); | |
782 | 786 | } |
783 | 787 | |
784 | 788 | static void blk_add_trace_bio_complete(void *ignore, |
785 | - struct request_queue *q, struct bio *bio) | |
789 | + struct request_queue *q, struct bio *bio, | |
790 | + int error) | |
786 | 791 | { |
787 | - blk_add_trace_bio(q, bio, BLK_TA_COMPLETE); | |
792 | + blk_add_trace_bio(q, bio, BLK_TA_COMPLETE, error); | |
788 | 793 | } |
789 | 794 | |
790 | 795 | static void blk_add_trace_bio_backmerge(void *ignore, |
791 | 796 | struct request_queue *q, |
792 | 797 | struct bio *bio) |
793 | 798 | { |
794 | - blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE); | |
799 | + blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE, 0); | |
795 | 800 | } |
796 | 801 | |
797 | 802 | static void blk_add_trace_bio_frontmerge(void *ignore, |
798 | 803 | struct request_queue *q, |
799 | 804 | struct bio *bio) |
800 | 805 | { |
801 | - blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE); | |
806 | + blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE, 0); | |
802 | 807 | } |
803 | 808 | |
804 | 809 | static void blk_add_trace_bio_queue(void *ignore, |
805 | 810 | struct request_queue *q, struct bio *bio) |
806 | 811 | { |
807 | - blk_add_trace_bio(q, bio, BLK_TA_QUEUE); | |
812 | + blk_add_trace_bio(q, bio, BLK_TA_QUEUE, 0); | |
808 | 813 | } |
809 | 814 | |
810 | 815 | static void blk_add_trace_getrq(void *ignore, |
... | ... | @@ -812,7 +817,7 @@ |
812 | 817 | struct bio *bio, int rw) |
813 | 818 | { |
814 | 819 | if (bio) |
815 | - blk_add_trace_bio(q, bio, BLK_TA_GETRQ); | |
820 | + blk_add_trace_bio(q, bio, BLK_TA_GETRQ, 0); | |
816 | 821 | else { |
817 | 822 | struct blk_trace *bt = q->blk_trace; |
818 | 823 | |
... | ... | @@ -827,7 +832,7 @@ |
827 | 832 | struct bio *bio, int rw) |
828 | 833 | { |
829 | 834 | if (bio) |
830 | - blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ); | |
835 | + blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ, 0); | |
831 | 836 | else { |
832 | 837 | struct blk_trace *bt = q->blk_trace; |
833 | 838 | |
... | ... | @@ -887,7 +892,7 @@ |
887 | 892 | } |
888 | 893 | |
889 | 894 | /** |
890 | - * blk_add_trace_remap - Add a trace for a remap operation | |
895 | + * blk_add_trace_bio_remap - Add a trace for a bio-remap operation | |
891 | 896 | * @ignore: trace callback data parameter (not used) |
892 | 897 | * @q: queue the io is for |
893 | 898 | * @bio: the source bio |
... | ... | @@ -899,9 +904,9 @@ |
899 | 904 | * it spans a stripe (or similar). Add a trace for that action. |
900 | 905 | * |
901 | 906 | **/ |
902 | -static void blk_add_trace_remap(void *ignore, | |
903 | - struct request_queue *q, struct bio *bio, | |
904 | - dev_t dev, sector_t from) | |
907 | +static void blk_add_trace_bio_remap(void *ignore, | |
908 | + struct request_queue *q, struct bio *bio, | |
909 | + dev_t dev, sector_t from) | |
905 | 910 | { |
906 | 911 | struct blk_trace *bt = q->blk_trace; |
907 | 912 | struct blk_io_trace_remap r; |
... | ... | @@ -1016,7 +1021,7 @@ |
1016 | 1021 | WARN_ON(ret); |
1017 | 1022 | ret = register_trace_block_split(blk_add_trace_split, NULL); |
1018 | 1023 | WARN_ON(ret); |
1019 | - ret = register_trace_block_remap(blk_add_trace_remap, NULL); | |
1024 | + ret = register_trace_block_bio_remap(blk_add_trace_bio_remap, NULL); | |
1020 | 1025 | WARN_ON(ret); |
1021 | 1026 | ret = register_trace_block_rq_remap(blk_add_trace_rq_remap, NULL); |
1022 | 1027 | WARN_ON(ret); |
... | ... | @@ -1025,7 +1030,7 @@ |
1025 | 1030 | static void blk_unregister_tracepoints(void) |
1026 | 1031 | { |
1027 | 1032 | unregister_trace_block_rq_remap(blk_add_trace_rq_remap, NULL); |
1028 | - unregister_trace_block_remap(blk_add_trace_remap, NULL); | |
1033 | + unregister_trace_block_bio_remap(blk_add_trace_bio_remap, NULL); | |
1029 | 1034 | unregister_trace_block_split(blk_add_trace_split, NULL); |
1030 | 1035 | unregister_trace_block_unplug_io(blk_add_trace_unplug_io, NULL); |
1031 | 1036 | unregister_trace_block_unplug_timer(blk_add_trace_unplug_timer, NULL); |
mm/swapfile.c
... | ... | @@ -1677,7 +1677,7 @@ |
1677 | 1677 | if (S_ISBLK(inode->i_mode)) { |
1678 | 1678 | struct block_device *bdev = I_BDEV(inode); |
1679 | 1679 | set_blocksize(bdev, p->old_block_size); |
1680 | - bd_release(bdev); | |
1680 | + blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); | |
1681 | 1681 | } else { |
1682 | 1682 | mutex_lock(&inode->i_mutex); |
1683 | 1683 | inode->i_flags &= ~S_SWAPFILE; |
... | ... | @@ -1939,7 +1939,8 @@ |
1939 | 1939 | error = -EINVAL; |
1940 | 1940 | if (S_ISBLK(inode->i_mode)) { |
1941 | 1941 | bdev = I_BDEV(inode); |
1942 | - error = bd_claim(bdev, sys_swapon); | |
1942 | + error = blkdev_get(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL, | |
1943 | + sys_swapon); | |
1943 | 1944 | if (error < 0) { |
1944 | 1945 | bdev = NULL; |
1945 | 1946 | error = -EINVAL; |
... | ... | @@ -2136,7 +2137,7 @@ |
2136 | 2137 | bad_swap: |
2137 | 2138 | if (bdev) { |
2138 | 2139 | set_blocksize(bdev, p->old_block_size); |
2139 | - bd_release(bdev); | |
2140 | + blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); | |
2140 | 2141 | } |
2141 | 2142 | destroy_swap_extents(p); |
2142 | 2143 | swap_cgroup_swapoff(type); |