Commit bcce3de1be61e424deef35d1e86e86a35c4b6e65

Authored by Tejun Heo
Committed by Jens Axboe
1 parent c995905916

block: implement extended dev numbers

Implement extended device numbers.  A block driver can tell block
layer that it wants to use extended device numbers.  After the usual
minor space is used up, block layer automatically allocates devt's
from EXT_BLOCK_MAJOR.

Currently only one major number is allocated for this but as the
allocation is strictly on-demand, ~1mil minor space under it should
suffice unless the system actually has more than ~1mil partitions and
if that ever happens adding more majors to the extended devt area is
easy.

Due to internal implementation issues, the first partition can't be
allocated on the extended area.  In other words, genhd->minors should
at least be 1.  This limitation will be lifted by later changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

Showing 4 changed files with 135 additions and 9 deletions Side-by-side Diff

... ... @@ -16,6 +16,7 @@
16 16 #include <linux/kobj_map.h>
17 17 #include <linux/buffer_head.h>
18 18 #include <linux/mutex.h>
  19 +#include <linux/idr.h>
19 20  
20 21 #include "blk.h"
21 22  
... ... @@ -24,6 +25,15 @@
24 25 struct kobject *block_depr;
25 26 #endif
26 27  
  28 +/* for extended dynamic devt allocation, currently only one major is used */
  29 +#define MAX_EXT_DEVT (1 << MINORBITS)
  30 +
  31 +/* For extended devt allocation. ext_devt_mutex prevents look up
  32 + * results from going away underneath its user.
  33 + */
  34 +static DEFINE_MUTEX(ext_devt_mutex);
  35 +static DEFINE_IDR(ext_devt_idr);
  36 +
27 37 static struct device_type disk_type;
28 38  
29 39 /**
... ... @@ -288,6 +298,74 @@
288 298  
289 299 static struct kobj_map *bdev_map;
290 300  
  301 +/**
  302 + * blk_alloc_devt - allocate a dev_t for a partition
  303 + * @part: partition to allocate dev_t for
  304 + * @gfp_mask: memory allocation flag
  305 + * @devt: out parameter for resulting dev_t
  306 + *
  307 + * Allocate a dev_t for block device.
  308 + *
  309 + * RETURNS:
  310 + * 0 on success, allocated dev_t is returned in *@devt. -errno on
  311 + * failure.
  312 + *
  313 + * CONTEXT:
  314 + * Might sleep.
  315 + */
  316 +int blk_alloc_devt(struct hd_struct *part, dev_t *devt)
  317 +{
  318 + struct gendisk *disk = part_to_disk(part);
  319 + int idx, rc;
  320 +
  321 + /* in consecutive minor range? */
  322 + if (part->partno < disk->minors) {
  323 + *devt = MKDEV(disk->major, disk->first_minor + part->partno);
  324 + return 0;
  325 + }
  326 +
  327 + /* allocate ext devt */
  328 + do {
  329 + if (!idr_pre_get(&ext_devt_idr, GFP_KERNEL))
  330 + return -ENOMEM;
  331 + rc = idr_get_new(&ext_devt_idr, part, &idx);
  332 + } while (rc == -EAGAIN);
  333 +
  334 + if (rc)
  335 + return rc;
  336 +
  337 + if (idx > MAX_EXT_DEVT) {
  338 + idr_remove(&ext_devt_idr, idx);
  339 + return -EBUSY;
  340 + }
  341 +
  342 + *devt = MKDEV(BLOCK_EXT_MAJOR, idx);
  343 + return 0;
  344 +}
  345 +
  346 +/**
  347 + * blk_free_devt - free a dev_t
  348 + * @devt: dev_t to free
  349 + *
  350 + * Free @devt which was allocated using blk_alloc_devt().
  351 + *
  352 + * CONTEXT:
  353 + * Might sleep.
  354 + */
  355 +void blk_free_devt(dev_t devt)
  356 +{
  357 + might_sleep();
  358 +
  359 + if (devt == MKDEV(0, 0))
  360 + return;
  361 +
  362 + if (MAJOR(devt) == BLOCK_EXT_MAJOR) {
  363 + mutex_lock(&ext_devt_mutex);
  364 + idr_remove(&ext_devt_idr, MINOR(devt));
  365 + mutex_unlock(&ext_devt_mutex);
  366 + }
  367 +}
  368 +
291 369 /*
292 370 * Register device numbers dev..(dev+range-1)
293 371 * range must be nonzero
294 372  
... ... @@ -371,10 +449,27 @@
371 449 */
372 450 struct gendisk *get_gendisk(dev_t devt, int *partno)
373 451 {
374   - struct kobject *kobj = kobj_lookup(bdev_map, devt, partno);
375   - struct device *dev = kobj_to_dev(kobj);
  452 + struct gendisk *disk = NULL;
376 453  
377   - return kobj ? dev_to_disk(dev) : NULL;
  454 + if (MAJOR(devt) != BLOCK_EXT_MAJOR) {
  455 + struct kobject *kobj;
  456 +
  457 + kobj = kobj_lookup(bdev_map, devt, partno);
  458 + if (kobj)
  459 + disk = dev_to_disk(kobj_to_dev(kobj));
  460 + } else {
  461 + struct hd_struct *part;
  462 +
  463 + mutex_lock(&ext_devt_mutex);
  464 + part = idr_find(&ext_devt_idr, MINOR(devt));
  465 + if (part && get_disk(part_to_disk(part))) {
  466 + *partno = part->partno;
  467 + disk = part_to_disk(part);
  468 + }
  469 + mutex_unlock(&ext_devt_mutex);
  470 + }
  471 +
  472 + return disk;
378 473 }
379 474  
380 475 /**
381 476  
382 477  
... ... @@ -878,17 +973,29 @@
878 973  
879 974 struct gendisk *alloc_disk_node(int minors, int node_id)
880 975 {
  976 + return alloc_disk_ext_node(minors, 0, node_id);
  977 +}
  978 +
  979 +struct gendisk *alloc_disk_ext(int minors, int ext_minors)
  980 +{
  981 + return alloc_disk_ext_node(minors, ext_minors, -1);
  982 +}
  983 +
  984 +struct gendisk *alloc_disk_ext_node(int minors, int ext_minors, int node_id)
  985 +{
881 986 struct gendisk *disk;
882 987  
883 988 disk = kmalloc_node(sizeof(struct gendisk),
884 989 GFP_KERNEL | __GFP_ZERO, node_id);
885 990 if (disk) {
  991 + int tot_minors = minors + ext_minors;
  992 +
886 993 if (!init_disk_stats(disk)) {
887 994 kfree(disk);
888 995 return NULL;
889 996 }
890   - if (minors > 1) {
891   - int size = (minors - 1) * sizeof(struct hd_struct *);
  997 + if (tot_minors > 1) {
  998 + int size = (tot_minors - 1) * sizeof(struct hd_struct *);
892 999 disk->__part = kmalloc_node(size,
893 1000 GFP_KERNEL | __GFP_ZERO, node_id);
894 1001 if (!disk->__part) {
... ... @@ -898,6 +1005,7 @@
898 1005 }
899 1006 }
900 1007 disk->minors = minors;
  1008 + disk->ext_minors = ext_minors;
901 1009 rand_initialize_disk(disk);
902 1010 disk->dev.class = &block_class;
903 1011 disk->dev.type = &disk_type;
... ... @@ -910,6 +1018,8 @@
910 1018  
911 1019 EXPORT_SYMBOL(alloc_disk);
912 1020 EXPORT_SYMBOL(alloc_disk_node);
  1021 +EXPORT_SYMBOL(alloc_disk_ext);
  1022 +EXPORT_SYMBOL(alloc_disk_ext_node);
913 1023  
914 1024 struct kobject *get_disk(struct gendisk *disk)
915 1025 {
fs/partitions/check.c
... ... @@ -333,6 +333,7 @@
333 333 if (!part)
334 334 return;
335 335  
  336 + blk_free_devt(part_devt(part));
336 337 rcu_assign_pointer(disk->__part[partno-1], NULL);
337 338 kobject_put(part->holder_dir);
338 339 device_del(&part->dev);
... ... @@ -352,6 +353,7 @@
352 353 sector_t start, sector_t len, int flags)
353 354 {
354 355 struct hd_struct *p;
  356 + dev_t devt = MKDEV(0, 0);
355 357 int err;
356 358  
357 359 if (disk->__part[partno - 1])
358 360  
... ... @@ -378,11 +380,15 @@
378 380 "%s%d", disk->dev.bus_id, partno);
379 381  
380 382 device_initialize(&p->dev);
381   - p->dev.devt = MKDEV(disk->major, disk->first_minor + partno);
382 383 p->dev.class = &block_class;
383 384 p->dev.type = &part_type;
384 385 p->dev.parent = &disk->dev;
385 386  
  387 + err = blk_alloc_devt(p, &devt);
  388 + if (err)
  389 + goto out_put;
  390 + p->dev.devt = devt;
  391 +
386 392 /* delay uevent until 'holders' subdir is created */
387 393 p->dev.uevent_suppress = 1;
388 394 err = device_add(&p->dev);
... ... @@ -419,6 +425,7 @@
419 425 device_del(&p->dev);
420 426 out_put:
421 427 put_device(&p->dev);
  428 + blk_free_devt(devt);
422 429 return err;
423 430 }
424 431  
include/linux/genhd.h
... ... @@ -113,13 +113,15 @@
113 113 #define GENHD_FL_FAIL 64
114 114  
115 115 struct gendisk {
116   - /* major, first_minor and minors are input parameters only,
117   - * don't use directly. Use disk_devt() and disk_max_parts().
  116 + /* major, first_minor, minors and ext_minors are input
  117 + * parameters only, don't use directly. Use disk_devt() and
  118 + * disk_max_parts().
118 119 */
119 120 int major; /* major number of driver */
120 121 int first_minor;
121 122 int minors; /* maximum number of minors, =1 for
122 123 * disks that can't be partitioned. */
  124 + int ext_minors; /* number of extended dynamic minors */
123 125  
124 126 char disk_name[32]; /* name of major driver */
125 127  
... ... @@ -167,7 +169,7 @@
167 169  
168 170 static inline int disk_max_parts(struct gendisk *disk)
169 171 {
170   - return disk->minors - 1;
  172 + return disk->minors + disk->ext_minors - 1;
171 173 }
172 174  
173 175 static inline dev_t disk_devt(struct gendisk *disk)
... ... @@ -554,6 +556,8 @@
554 556 #define ADDPART_FLAG_RAID 1
555 557 #define ADDPART_FLAG_WHOLEDISK 2
556 558  
  559 +extern int blk_alloc_devt(struct hd_struct *part, dev_t *devt);
  560 +extern void blk_free_devt(dev_t devt);
557 561 extern dev_t blk_lookup_devt(const char *name, int partno);
558 562 extern char *disk_name (struct gendisk *hd, int partno, char *buf);
559 563  
... ... @@ -564,6 +568,9 @@
564 568  
565 569 extern struct gendisk *alloc_disk_node(int minors, int node_id);
566 570 extern struct gendisk *alloc_disk(int minors);
  571 +extern struct gendisk *alloc_disk_ext_node(int minors, int ext_minrs,
  572 + int node_id);
  573 +extern struct gendisk *alloc_disk_ext(int minors, int ext_minors);
567 574 extern struct kobject *get_disk(struct gendisk *disk);
568 575 extern void put_disk(struct gendisk *disk);
569 576 extern void blk_register_region(dev_t devt, unsigned long range,
include/linux/major.h
... ... @@ -170,5 +170,7 @@
170 170  
171 171 #define VIOTAPE_MAJOR 230
172 172  
  173 +#define BLOCK_EXT_MAJOR 259
  174 +
173 175 #endif