The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/block/genhd.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  *  gendisk handling
    3  */
    4 
    5 #include <linux/module.h>
    6 #include <linux/fs.h>
    7 #include <linux/genhd.h>
    8 #include <linux/kdev_t.h>
    9 #include <linux/kernel.h>
   10 #include <linux/blkdev.h>
   11 #include <linux/init.h>
   12 #include <linux/spinlock.h>
   13 #include <linux/proc_fs.h>
   14 #include <linux/seq_file.h>
   15 #include <linux/slab.h>
   16 #include <linux/kmod.h>
   17 #include <linux/kobj_map.h>
   18 #include <linux/mutex.h>
   19 #include <linux/idr.h>
   20 #include <linux/log2.h>
   21 
   22 #include "blk.h"
   23 
   24 static DEFINE_MUTEX(block_class_lock);
   25 struct kobject *block_depr;
   26 
   27 /* for extended dynamic devt allocation, currently only one major is used */
   28 #define MAX_EXT_DEVT            (1 << MINORBITS)
   29 
   30 /* For extended devt allocation.  ext_devt_mutex prevents look up
   31  * results from going away underneath its user.
   32  */
   33 static DEFINE_MUTEX(ext_devt_mutex);
   34 static DEFINE_IDR(ext_devt_idr);
   35 
   36 static struct device_type disk_type;
   37 
   38 static void disk_alloc_events(struct gendisk *disk);
   39 static void disk_add_events(struct gendisk *disk);
   40 static void disk_del_events(struct gendisk *disk);
   41 static void disk_release_events(struct gendisk *disk);
   42 
   43 /**
   44  * disk_get_part - get partition
   45  * @disk: disk to look partition from
   46  * @partno: partition number
   47  *
   48  * Look for partition @partno from @disk.  If found, increment
   49  * reference count and return it.
   50  *
   51  * CONTEXT:
   52  * Don't care.
   53  *
   54  * RETURNS:
   55  * Pointer to the found partition on success, NULL if not found.
   56  */
   57 struct hd_struct *disk_get_part(struct gendisk *disk, int partno)
   58 {
   59         struct hd_struct *part = NULL;
   60         struct disk_part_tbl *ptbl;
   61 
   62         if (unlikely(partno < 0))
   63                 return NULL;
   64 
   65         rcu_read_lock();
   66 
   67         ptbl = rcu_dereference(disk->part_tbl);
   68         if (likely(partno < ptbl->len)) {
   69                 part = rcu_dereference(ptbl->part[partno]);
   70                 if (part)
   71                         get_device(part_to_dev(part));
   72         }
   73 
   74         rcu_read_unlock();
   75 
   76         return part;
   77 }
   78 EXPORT_SYMBOL_GPL(disk_get_part);
   79 
   80 /**
   81  * disk_part_iter_init - initialize partition iterator
   82  * @piter: iterator to initialize
   83  * @disk: disk to iterate over
   84  * @flags: DISK_PITER_* flags
   85  *
   86  * Initialize @piter so that it iterates over partitions of @disk.
   87  *
   88  * CONTEXT:
   89  * Don't care.
   90  */
   91 void disk_part_iter_init(struct disk_part_iter *piter, struct gendisk *disk,
   92                           unsigned int flags)
   93 {
   94         struct disk_part_tbl *ptbl;
   95 
   96         rcu_read_lock();
   97         ptbl = rcu_dereference(disk->part_tbl);
   98 
   99         piter->disk = disk;
  100         piter->part = NULL;
  101 
  102         if (flags & DISK_PITER_REVERSE)
  103                 piter->idx = ptbl->len - 1;
  104         else if (flags & (DISK_PITER_INCL_PART0 | DISK_PITER_INCL_EMPTY_PART0))
  105                 piter->idx = 0;
  106         else
  107                 piter->idx = 1;
  108 
  109         piter->flags = flags;
  110 
  111         rcu_read_unlock();
  112 }
  113 EXPORT_SYMBOL_GPL(disk_part_iter_init);
  114 
  115 /**
  116  * disk_part_iter_next - proceed iterator to the next partition and return it
  117  * @piter: iterator of interest
  118  *
  119  * Proceed @piter to the next partition and return it.
  120  *
  121  * CONTEXT:
  122  * Don't care.
  123  */
  124 struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter)
  125 {
  126         struct disk_part_tbl *ptbl;
  127         int inc, end;
  128 
  129         /* put the last partition */
  130         disk_put_part(piter->part);
  131         piter->part = NULL;
  132 
  133         /* get part_tbl */
  134         rcu_read_lock();
  135         ptbl = rcu_dereference(piter->disk->part_tbl);
  136 
  137         /* determine iteration parameters */
  138         if (piter->flags & DISK_PITER_REVERSE) {
  139                 inc = -1;
  140                 if (piter->flags & (DISK_PITER_INCL_PART0 |
  141                                     DISK_PITER_INCL_EMPTY_PART0))
  142                         end = -1;
  143                 else
  144                         end = 0;
  145         } else {
  146                 inc = 1;
  147                 end = ptbl->len;
  148         }
  149 
  150         /* iterate to the next partition */
  151         for (; piter->idx != end; piter->idx += inc) {
  152                 struct hd_struct *part;
  153 
  154                 part = rcu_dereference(ptbl->part[piter->idx]);
  155                 if (!part)
  156                         continue;
  157                 if (!part_nr_sects_read(part) &&
  158                     !(piter->flags & DISK_PITER_INCL_EMPTY) &&
  159                     !(piter->flags & DISK_PITER_INCL_EMPTY_PART0 &&
  160                       piter->idx == 0))
  161                         continue;
  162 
  163                 get_device(part_to_dev(part));
  164                 piter->part = part;
  165                 piter->idx += inc;
  166                 break;
  167         }
  168 
  169         rcu_read_unlock();
  170 
  171         return piter->part;
  172 }
  173 EXPORT_SYMBOL_GPL(disk_part_iter_next);
  174 
  175 /**
  176  * disk_part_iter_exit - finish up partition iteration
  177  * @piter: iter of interest
  178  *
  179  * Called when iteration is over.  Cleans up @piter.
  180  *
  181  * CONTEXT:
  182  * Don't care.
  183  */
  184 void disk_part_iter_exit(struct disk_part_iter *piter)
  185 {
  186         disk_put_part(piter->part);
  187         piter->part = NULL;
  188 }
  189 EXPORT_SYMBOL_GPL(disk_part_iter_exit);
  190 
  191 static inline int sector_in_part(struct hd_struct *part, sector_t sector)
  192 {
  193         return part->start_sect <= sector &&
  194                 sector < part->start_sect + part_nr_sects_read(part);
  195 }
  196 
  197 /**
  198  * disk_map_sector_rcu - map sector to partition
  199  * @disk: gendisk of interest
  200  * @sector: sector to map
  201  *
  202  * Find out which partition @sector maps to on @disk.  This is
  203  * primarily used for stats accounting.
  204  *
  205  * CONTEXT:
  206  * RCU read locked.  The returned partition pointer is valid only
  207  * while preemption is disabled.
  208  *
  209  * RETURNS:
  210  * Found partition on success, part0 is returned if no partition matches
  211  */
  212 struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector)
  213 {
  214         struct disk_part_tbl *ptbl;
  215         struct hd_struct *part;
  216         int i;
  217 
  218         ptbl = rcu_dereference(disk->part_tbl);
  219 
  220         part = rcu_dereference(ptbl->last_lookup);
  221         if (part && sector_in_part(part, sector))
  222                 return part;
  223 
  224         for (i = 1; i < ptbl->len; i++) {
  225                 part = rcu_dereference(ptbl->part[i]);
  226 
  227                 if (part && sector_in_part(part, sector)) {
  228                         rcu_assign_pointer(ptbl->last_lookup, part);
  229                         return part;
  230                 }
  231         }
  232         return &disk->part0;
  233 }
  234 EXPORT_SYMBOL_GPL(disk_map_sector_rcu);
  235 
  236 /*
  237  * Can be deleted altogether. Later.
  238  *
  239  */
  240 static struct blk_major_name {
  241         struct blk_major_name *next;
  242         int major;
  243         char name[16];
  244 } *major_names[BLKDEV_MAJOR_HASH_SIZE];
  245 
  246 /* index in the above - for now: assume no multimajor ranges */
  247 static inline int major_to_index(unsigned major)
  248 {
  249         return major % BLKDEV_MAJOR_HASH_SIZE;
  250 }
  251 
  252 #ifdef CONFIG_PROC_FS
  253 void blkdev_show(struct seq_file *seqf, off_t offset)
  254 {
  255         struct blk_major_name *dp;
  256 
  257         if (offset < BLKDEV_MAJOR_HASH_SIZE) {
  258                 mutex_lock(&block_class_lock);
  259                 for (dp = major_names[offset]; dp; dp = dp->next)
  260                         seq_printf(seqf, "%3d %s\n", dp->major, dp->name);
  261                 mutex_unlock(&block_class_lock);
  262         }
  263 }
  264 #endif /* CONFIG_PROC_FS */
  265 
  266 /**
  267  * register_blkdev - register a new block device
  268  *
  269  * @major: the requested major device number [1..255]. If @major=0, try to
  270  *         allocate any unused major number.
  271  * @name: the name of the new block device as a zero terminated string
  272  *
  273  * The @name must be unique within the system.
  274  *
  275  * The return value depends on the @major input parameter.
  276  *  - if a major device number was requested in range [1..255] then the
  277  *    function returns zero on success, or a negative error code
  278  *  - if any unused major number was requested with @major=0 parameter
  279  *    then the return value is the allocated major number in range
  280  *    [1..255] or a negative error code otherwise
  281  */
  282 int register_blkdev(unsigned int major, const char *name)
  283 {
  284         struct blk_major_name **n, *p;
  285         int index, ret = 0;
  286 
  287         mutex_lock(&block_class_lock);
  288 
  289         /* temporary */
  290         if (major == 0) {
  291                 for (index = ARRAY_SIZE(major_names)-1; index > 0; index--) {
  292                         if (major_names[index] == NULL)
  293                                 break;
  294                 }
  295 
  296                 if (index == 0) {
  297                         printk("register_blkdev: failed to get major for %s\n",
  298                                name);
  299                         ret = -EBUSY;
  300                         goto out;
  301                 }
  302                 major = index;
  303                 ret = major;
  304         }
  305 
  306         p = kmalloc(sizeof(struct blk_major_name), GFP_KERNEL);
  307         if (p == NULL) {
  308                 ret = -ENOMEM;
  309                 goto out;
  310         }
  311 
  312         p->major = major;
  313         strlcpy(p->name, name, sizeof(p->name));
  314         p->next = NULL;
  315         index = major_to_index(major);
  316 
  317         for (n = &major_names[index]; *n; n = &(*n)->next) {
  318                 if ((*n)->major == major)
  319                         break;
  320         }
  321         if (!*n)
  322                 *n = p;
  323         else
  324                 ret = -EBUSY;
  325 
  326         if (ret < 0) {
  327                 printk("register_blkdev: cannot get major %d for %s\n",
  328                        major, name);
  329                 kfree(p);
  330         }
  331 out:
  332         mutex_unlock(&block_class_lock);
  333         return ret;
  334 }
  335 
  336 EXPORT_SYMBOL(register_blkdev);
  337 
  338 void unregister_blkdev(unsigned int major, const char *name)
  339 {
  340         struct blk_major_name **n;
  341         struct blk_major_name *p = NULL;
  342         int index = major_to_index(major);
  343 
  344         mutex_lock(&block_class_lock);
  345         for (n = &major_names[index]; *n; n = &(*n)->next)
  346                 if ((*n)->major == major)
  347                         break;
  348         if (!*n || strcmp((*n)->name, name)) {
  349                 WARN_ON(1);
  350         } else {
  351                 p = *n;
  352                 *n = p->next;
  353         }
  354         mutex_unlock(&block_class_lock);
  355         kfree(p);
  356 }
  357 
  358 EXPORT_SYMBOL(unregister_blkdev);
  359 
  360 static struct kobj_map *bdev_map;
  361 
  362 /**
  363  * blk_mangle_minor - scatter minor numbers apart
  364  * @minor: minor number to mangle
  365  *
  366  * Scatter consecutively allocated @minor number apart if MANGLE_DEVT
  367  * is enabled.  Mangling twice gives the original value.
  368  *
  369  * RETURNS:
  370  * Mangled value.
  371  *
  372  * CONTEXT:
  373  * Don't care.
  374  */
  375 static int blk_mangle_minor(int minor)
  376 {
  377 #ifdef CONFIG_DEBUG_BLOCK_EXT_DEVT
  378         int i;
  379 
  380         for (i = 0; i < MINORBITS / 2; i++) {
  381                 int low = minor & (1 << i);
  382                 int high = minor & (1 << (MINORBITS - 1 - i));
  383                 int distance = MINORBITS - 1 - 2 * i;
  384 
  385                 minor ^= low | high;    /* clear both bits */
  386                 low <<= distance;       /* swap the positions */
  387                 high >>= distance;
  388                 minor |= low | high;    /* and set */
  389         }
  390 #endif
  391         return minor;
  392 }
  393 
  394 /**
  395  * blk_alloc_devt - allocate a dev_t for a partition
  396  * @part: partition to allocate dev_t for
  397  * @devt: out parameter for resulting dev_t
  398  *
  399  * Allocate a dev_t for block device.
  400  *
  401  * RETURNS:
  402  * 0 on success, allocated dev_t is returned in *@devt.  -errno on
  403  * failure.
  404  *
  405  * CONTEXT:
  406  * Might sleep.
  407  */
  408 int blk_alloc_devt(struct hd_struct *part, dev_t *devt)
  409 {
  410         struct gendisk *disk = part_to_disk(part);
  411         int idx, rc;
  412 
  413         /* in consecutive minor range? */
  414         if (part->partno < disk->minors) {
  415                 *devt = MKDEV(disk->major, disk->first_minor + part->partno);
  416                 return 0;
  417         }
  418 
  419         /* allocate ext devt */
  420         do {
  421                 if (!idr_pre_get(&ext_devt_idr, GFP_KERNEL))
  422                         return -ENOMEM;
  423                 rc = idr_get_new(&ext_devt_idr, part, &idx);
  424         } while (rc == -EAGAIN);
  425 
  426         if (rc)
  427                 return rc;
  428 
  429         if (idx > MAX_EXT_DEVT) {
  430                 idr_remove(&ext_devt_idr, idx);
  431                 return -EBUSY;
  432         }
  433 
  434         *devt = MKDEV(BLOCK_EXT_MAJOR, blk_mangle_minor(idx));
  435         return 0;
  436 }
  437 
  438 /**
  439  * blk_free_devt - free a dev_t
  440  * @devt: dev_t to free
  441  *
  442  * Free @devt which was allocated using blk_alloc_devt().
  443  *
  444  * CONTEXT:
  445  * Might sleep.
  446  */
  447 void blk_free_devt(dev_t devt)
  448 {
  449         might_sleep();
  450 
  451         if (devt == MKDEV(0, 0))
  452                 return;
  453 
  454         if (MAJOR(devt) == BLOCK_EXT_MAJOR) {
  455                 mutex_lock(&ext_devt_mutex);
  456                 idr_remove(&ext_devt_idr, blk_mangle_minor(MINOR(devt)));
  457                 mutex_unlock(&ext_devt_mutex);
  458         }
  459 }
  460 
  461 static char *bdevt_str(dev_t devt, char *buf)
  462 {
  463         if (MAJOR(devt) <= 0xff && MINOR(devt) <= 0xff) {
  464                 char tbuf[BDEVT_SIZE];
  465                 snprintf(tbuf, BDEVT_SIZE, "%02x%02x", MAJOR(devt), MINOR(devt));
  466                 snprintf(buf, BDEVT_SIZE, "%-9s", tbuf);
  467         } else
  468                 snprintf(buf, BDEVT_SIZE, "%03x:%05x", MAJOR(devt), MINOR(devt));
  469 
  470         return buf;
  471 }
  472 
  473 /*
  474  * Register device numbers dev..(dev+range-1)
  475  * range must be nonzero
  476  * The hash chain is sorted on range, so that subranges can override.
  477  */
  478 void blk_register_region(dev_t devt, unsigned long range, struct module *module,
  479                          struct kobject *(*probe)(dev_t, int *, void *),
  480                          int (*lock)(dev_t, void *), void *data)
  481 {
  482         kobj_map(bdev_map, devt, range, module, probe, lock, data);
  483 }
  484 
  485 EXPORT_SYMBOL(blk_register_region);
  486 
  487 void blk_unregister_region(dev_t devt, unsigned long range)
  488 {
  489         kobj_unmap(bdev_map, devt, range);
  490 }
  491 
  492 EXPORT_SYMBOL(blk_unregister_region);
  493 
  494 static struct kobject *exact_match(dev_t devt, int *partno, void *data)
  495 {
  496         struct gendisk *p = data;
  497 
  498         return &disk_to_dev(p)->kobj;
  499 }
  500 
  501 static int exact_lock(dev_t devt, void *data)
  502 {
  503         struct gendisk *p = data;
  504 
  505         if (!get_disk(p))
  506                 return -1;
  507         return 0;
  508 }
  509 
  510 static void register_disk(struct gendisk *disk)
  511 {
  512         struct device *ddev = disk_to_dev(disk);
  513         struct block_device *bdev;
  514         struct disk_part_iter piter;
  515         struct hd_struct *part;
  516         int err;
  517 
  518         ddev->parent = disk->driverfs_dev;
  519 
  520         dev_set_name(ddev, disk->disk_name);
  521 
  522         /* delay uevents, until we scanned partition table */
  523         dev_set_uevent_suppress(ddev, 1);
  524 
  525         if (device_add(ddev))
  526                 return;
  527         if (!sysfs_deprecated) {
  528                 err = sysfs_create_link(block_depr, &ddev->kobj,
  529                                         kobject_name(&ddev->kobj));
  530                 if (err) {
  531                         device_del(ddev);
  532                         return;
  533                 }
  534         }
  535         disk->part0.holder_dir = kobject_create_and_add("holders", &ddev->kobj);
  536         disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj);
  537 
  538         /* No minors to use for partitions */
  539         if (!disk_part_scan_enabled(disk))
  540                 goto exit;
  541 
  542         /* No such device (e.g., media were just removed) */
  543         if (!get_capacity(disk))
  544                 goto exit;
  545 
  546         bdev = bdget_disk(disk, 0);
  547         if (!bdev)
  548                 goto exit;
  549 
  550         bdev->bd_invalidated = 1;
  551         err = blkdev_get(bdev, FMODE_READ, NULL);
  552         if (err < 0)
  553                 goto exit;
  554         blkdev_put(bdev, FMODE_READ);
  555 
  556 exit:
  557         /* announce disk after possible partitions are created */
  558         dev_set_uevent_suppress(ddev, 0);
  559         kobject_uevent(&ddev->kobj, KOBJ_ADD);
  560 
  561         /* announce possible partitions */
  562         disk_part_iter_init(&piter, disk, 0);
  563         while ((part = disk_part_iter_next(&piter)))
  564                 kobject_uevent(&part_to_dev(part)->kobj, KOBJ_ADD);
  565         disk_part_iter_exit(&piter);
  566 }
  567 
  568 /**
  569  * add_disk - add partitioning information to kernel list
  570  * @disk: per-device partitioning information
  571  *
  572  * This function registers the partitioning information in @disk
  573  * with the kernel.
  574  *
  575  * FIXME: error handling
  576  */
  577 void add_disk(struct gendisk *disk)
  578 {
  579         struct backing_dev_info *bdi;
  580         dev_t devt;
  581         int retval;
  582 
  583         /* minors == 0 indicates to use ext devt from part0 and should
  584          * be accompanied with EXT_DEVT flag.  Make sure all
  585          * parameters make sense.
  586          */
  587         WARN_ON(disk->minors && !(disk->major || disk->first_minor));
  588         WARN_ON(!disk->minors && !(disk->flags & GENHD_FL_EXT_DEVT));
  589 
  590         disk->flags |= GENHD_FL_UP;
  591 
  592         retval = blk_alloc_devt(&disk->part0, &devt);
  593         if (retval) {
  594                 WARN_ON(1);
  595                 return;
  596         }
  597         disk_to_dev(disk)->devt = devt;
  598 
  599         /* ->major and ->first_minor aren't supposed to be
  600          * dereferenced from here on, but set them just in case.
  601          */
  602         disk->major = MAJOR(devt);
  603         disk->first_minor = MINOR(devt);
  604 
  605         disk_alloc_events(disk);
  606 
  607         /* Register BDI before referencing it from bdev */
  608         bdi = &disk->queue->backing_dev_info;
  609         bdi_register_dev(bdi, disk_devt(disk));
  610 
  611         blk_register_region(disk_devt(disk), disk->minors, NULL,
  612                             exact_match, exact_lock, disk);
  613         register_disk(disk);
  614         blk_register_queue(disk);
  615 
  616         /*
  617          * Take an extra ref on queue which will be put on disk_release()
  618          * so that it sticks around as long as @disk is there.
  619          */
  620         WARN_ON_ONCE(!blk_get_queue(disk->queue));
  621 
  622         retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj,
  623                                    "bdi");
  624         WARN_ON(retval);
  625 
  626         disk_add_events(disk);
  627 }
  628 EXPORT_SYMBOL(add_disk);
  629 
  630 void del_gendisk(struct gendisk *disk)
  631 {
  632         struct disk_part_iter piter;
  633         struct hd_struct *part;
  634 
  635         disk_del_events(disk);
  636 
  637         /* invalidate stuff */
  638         disk_part_iter_init(&piter, disk,
  639                              DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE);
  640         while ((part = disk_part_iter_next(&piter))) {
  641                 invalidate_partition(disk, part->partno);
  642                 delete_partition(disk, part->partno);
  643         }
  644         disk_part_iter_exit(&piter);
  645 
  646         invalidate_partition(disk, 0);
  647         blk_free_devt(disk_to_dev(disk)->devt);
  648         set_capacity(disk, 0);
  649         disk->flags &= ~GENHD_FL_UP;
  650 
  651         sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi");
  652         bdi_unregister(&disk->queue->backing_dev_info);
  653         blk_unregister_queue(disk);
  654         blk_unregister_region(disk_devt(disk), disk->minors);
  655 
  656         part_stat_set_all(&disk->part0, 0);
  657         disk->part0.stamp = 0;
  658 
  659         kobject_put(disk->part0.holder_dir);
  660         kobject_put(disk->slave_dir);
  661         disk->driverfs_dev = NULL;
  662         if (!sysfs_deprecated)
  663                 sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk)));
  664         device_del(disk_to_dev(disk));
  665 }
  666 EXPORT_SYMBOL(del_gendisk);
  667 
  668 /**
  669  * get_gendisk - get partitioning information for a given device
  670  * @devt: device to get partitioning information for
  671  * @partno: returned partition index
  672  *
  673  * This function gets the structure containing partitioning
  674  * information for the given device @devt.
  675  */
  676 struct gendisk *get_gendisk(dev_t devt, int *partno)
  677 {
  678         struct gendisk *disk = NULL;
  679 
  680         if (MAJOR(devt) != BLOCK_EXT_MAJOR) {
  681                 struct kobject *kobj;
  682 
  683                 kobj = kobj_lookup(bdev_map, devt, partno);
  684                 if (kobj)
  685                         disk = dev_to_disk(kobj_to_dev(kobj));
  686         } else {
  687                 struct hd_struct *part;
  688 
  689                 mutex_lock(&ext_devt_mutex);
  690                 part = idr_find(&ext_devt_idr, blk_mangle_minor(MINOR(devt)));
  691                 if (part && get_disk(part_to_disk(part))) {
  692                         *partno = part->partno;
  693                         disk = part_to_disk(part);
  694                 }
  695                 mutex_unlock(&ext_devt_mutex);
  696         }
  697 
  698         return disk;
  699 }
  700 EXPORT_SYMBOL(get_gendisk);
  701 
  702 /**
  703  * bdget_disk - do bdget() by gendisk and partition number
  704  * @disk: gendisk of interest
  705  * @partno: partition number
  706  *
  707  * Find partition @partno from @disk, do bdget() on it.
  708  *
  709  * CONTEXT:
  710  * Don't care.
  711  *
  712  * RETURNS:
  713  * Resulting block_device on success, NULL on failure.
  714  */
  715 struct block_device *bdget_disk(struct gendisk *disk, int partno)
  716 {
  717         struct hd_struct *part;
  718         struct block_device *bdev = NULL;
  719 
  720         part = disk_get_part(disk, partno);
  721         if (part)
  722                 bdev = bdget(part_devt(part));
  723         disk_put_part(part);
  724 
  725         return bdev;
  726 }
  727 EXPORT_SYMBOL(bdget_disk);
  728 
  729 /*
  730  * print a full list of all partitions - intended for places where the root
  731  * filesystem can't be mounted and thus to give the victim some idea of what
  732  * went wrong
  733  */
  734 void __init printk_all_partitions(void)
  735 {
  736         struct class_dev_iter iter;
  737         struct device *dev;
  738 
  739         class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
  740         while ((dev = class_dev_iter_next(&iter))) {
  741                 struct gendisk *disk = dev_to_disk(dev);
  742                 struct disk_part_iter piter;
  743                 struct hd_struct *part;
  744                 char name_buf[BDEVNAME_SIZE];
  745                 char devt_buf[BDEVT_SIZE];
  746 
  747                 /*
  748                  * Don't show empty devices or things that have been
  749                  * suppressed
  750                  */
  751                 if (get_capacity(disk) == 0 ||
  752                     (disk->flags & GENHD_FL_SUPPRESS_PARTITION_INFO))
  753                         continue;
  754 
  755                 /*
  756                  * Note, unlike /proc/partitions, I am showing the
  757                  * numbers in hex - the same format as the root=
  758                  * option takes.
  759                  */
  760                 disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0);
  761                 while ((part = disk_part_iter_next(&piter))) {
  762                         bool is_part0 = part == &disk->part0;
  763 
  764                         printk("%s%s %10llu %s %s", is_part0 ? "" : "  ",
  765                                bdevt_str(part_devt(part), devt_buf),
  766                                (unsigned long long)part_nr_sects_read(part) >> 1
  767                                , disk_name(disk, part->partno, name_buf),
  768                                part->info ? part->info->uuid : "");
  769                         if (is_part0) {
  770                                 if (disk->driverfs_dev != NULL &&
  771                                     disk->driverfs_dev->driver != NULL)
  772                                         printk(" driver: %s\n",
  773                                               disk->driverfs_dev->driver->name);
  774                                 else
  775                                         printk(" (driver?)\n");
  776                         } else
  777                                 printk("\n");
  778                 }
  779                 disk_part_iter_exit(&piter);
  780         }
  781         class_dev_iter_exit(&iter);
  782 }
  783 
  784 #ifdef CONFIG_PROC_FS
  785 /* iterator */
  786 static void *disk_seqf_start(struct seq_file *seqf, loff_t *pos)
  787 {
  788         loff_t skip = *pos;
  789         struct class_dev_iter *iter;
  790         struct device *dev;
  791 
  792         iter = kmalloc(sizeof(*iter), GFP_KERNEL);
  793         if (!iter)
  794                 return ERR_PTR(-ENOMEM);
  795 
  796         seqf->private = iter;
  797         class_dev_iter_init(iter, &block_class, NULL, &disk_type);
  798         do {
  799                 dev = class_dev_iter_next(iter);
  800                 if (!dev)
  801                         return NULL;
  802         } while (skip--);
  803 
  804         return dev_to_disk(dev);
  805 }
  806 
  807 static void *disk_seqf_next(struct seq_file *seqf, void *v, loff_t *pos)
  808 {
  809         struct device *dev;
  810 
  811         (*pos)++;
  812         dev = class_dev_iter_next(seqf->private);
  813         if (dev)
  814                 return dev_to_disk(dev);
  815 
  816         return NULL;
  817 }
  818 
  819 static void disk_seqf_stop(struct seq_file *seqf, void *v)
  820 {
  821         struct class_dev_iter *iter = seqf->private;
  822 
  823         /* stop is called even after start failed :-( */
  824         if (iter) {
  825                 class_dev_iter_exit(iter);
  826                 kfree(iter);
  827         }
  828 }
  829 
  830 static void *show_partition_start(struct seq_file *seqf, loff_t *pos)
  831 {
  832         void *p;
  833 
  834         p = disk_seqf_start(seqf, pos);
  835         if (!IS_ERR_OR_NULL(p) && !*pos)
  836                 seq_puts(seqf, "major minor  #blocks  name\n\n");
  837         return p;
  838 }
  839 
  840 static int show_partition(struct seq_file *seqf, void *v)
  841 {
  842         struct gendisk *sgp = v;
  843         struct disk_part_iter piter;
  844         struct hd_struct *part;
  845         char buf[BDEVNAME_SIZE];
  846 
  847         /* Don't show non-partitionable removeable devices or empty devices */
  848         if (!get_capacity(sgp) || (!disk_max_parts(sgp) &&
  849                                    (sgp->flags & GENHD_FL_REMOVABLE)))
  850                 return 0;
  851         if (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO)
  852                 return 0;
  853 
  854         /* show the full disk and all non-0 size partitions of it */
  855         disk_part_iter_init(&piter, sgp, DISK_PITER_INCL_PART0);
  856         while ((part = disk_part_iter_next(&piter)))
  857                 seq_printf(seqf, "%4d  %7d %10llu %s\n",
  858                            MAJOR(part_devt(part)), MINOR(part_devt(part)),
  859                            (unsigned long long)part_nr_sects_read(part) >> 1,
  860                            disk_name(sgp, part->partno, buf));
  861         disk_part_iter_exit(&piter);
  862 
  863         return 0;
  864 }
  865 
  866 static const struct seq_operations partitions_op = {
  867         .start  = show_partition_start,
  868         .next   = disk_seqf_next,
  869         .stop   = disk_seqf_stop,
  870         .show   = show_partition
  871 };
  872 
  873 static int partitions_open(struct inode *inode, struct file *file)
  874 {
  875         return seq_open(file, &partitions_op);
  876 }
  877 
  878 static const struct file_operations proc_partitions_operations = {
  879         .open           = partitions_open,
  880         .read           = seq_read,
  881         .llseek         = seq_lseek,
  882         .release        = seq_release,
  883 };
  884 #endif
  885 
  886 
  887 static struct kobject *base_probe(dev_t devt, int *partno, void *data)
  888 {
  889         if (request_module("block-major-%d-%d", MAJOR(devt), MINOR(devt)) > 0)
  890                 /* Make old-style 2.4 aliases work */
  891                 request_module("block-major-%d", MAJOR(devt));
  892         return NULL;
  893 }
  894 
  895 static int __init genhd_device_init(void)
  896 {
  897         int error;
  898 
  899         block_class.dev_kobj = sysfs_dev_block_kobj;
  900         error = class_register(&block_class);
  901         if (unlikely(error))
  902                 return error;
  903         bdev_map = kobj_map_init(base_probe, &block_class_lock);
  904         blk_dev_init();
  905 
  906         register_blkdev(BLOCK_EXT_MAJOR, "blkext");
  907 
  908         /* create top-level block dir */
  909         if (!sysfs_deprecated)
  910                 block_depr = kobject_create_and_add("block", NULL);
  911         return 0;
  912 }
  913 
  914 subsys_initcall(genhd_device_init);
  915 
  916 static ssize_t disk_range_show(struct device *dev,
  917                                struct device_attribute *attr, char *buf)
  918 {
  919         struct gendisk *disk = dev_to_disk(dev);
  920 
  921         return sprintf(buf, "%d\n", disk->minors);
  922 }
  923 
  924 static ssize_t disk_ext_range_show(struct device *dev,
  925                                    struct device_attribute *attr, char *buf)
  926 {
  927         struct gendisk *disk = dev_to_disk(dev);
  928 
  929         return sprintf(buf, "%d\n", disk_max_parts(disk));
  930 }
  931 
  932 static ssize_t disk_removable_show(struct device *dev,
  933                                    struct device_attribute *attr, char *buf)
  934 {
  935         struct gendisk *disk = dev_to_disk(dev);
  936 
  937         return sprintf(buf, "%d\n",
  938                        (disk->flags & GENHD_FL_REMOVABLE ? 1 : 0));
  939 }
  940 
  941 static ssize_t disk_ro_show(struct device *dev,
  942                                    struct device_attribute *attr, char *buf)
  943 {
  944         struct gendisk *disk = dev_to_disk(dev);
  945 
  946         return sprintf(buf, "%d\n", get_disk_ro(disk) ? 1 : 0);
  947 }
  948 
  949 static ssize_t disk_capability_show(struct device *dev,
  950                                     struct device_attribute *attr, char *buf)
  951 {
  952         struct gendisk *disk = dev_to_disk(dev);
  953 
  954         return sprintf(buf, "%x\n", disk->flags);
  955 }
  956 
  957 static ssize_t disk_alignment_offset_show(struct device *dev,
  958                                           struct device_attribute *attr,
  959                                           char *buf)
  960 {
  961         struct gendisk *disk = dev_to_disk(dev);
  962 
  963         return sprintf(buf, "%d\n", queue_alignment_offset(disk->queue));
  964 }
  965 
  966 static ssize_t disk_discard_alignment_show(struct device *dev,
  967                                            struct device_attribute *attr,
  968                                            char *buf)
  969 {
  970         struct gendisk *disk = dev_to_disk(dev);
  971 
  972         return sprintf(buf, "%d\n", queue_discard_alignment(disk->queue));
  973 }
  974 
  975 static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL);
  976 static DEVICE_ATTR(ext_range, S_IRUGO, disk_ext_range_show, NULL);
  977 static DEVICE_ATTR(removable, S_IRUGO, disk_removable_show, NULL);
  978 static DEVICE_ATTR(ro, S_IRUGO, disk_ro_show, NULL);
  979 static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
  980 static DEVICE_ATTR(alignment_offset, S_IRUGO, disk_alignment_offset_show, NULL);
  981 static DEVICE_ATTR(discard_alignment, S_IRUGO, disk_discard_alignment_show,
  982                    NULL);
  983 static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL);
  984 static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
  985 static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL);
  986 #ifdef CONFIG_FAIL_MAKE_REQUEST
  987 static struct device_attribute dev_attr_fail =
  988         __ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store);
  989 #endif
  990 #ifdef CONFIG_FAIL_IO_TIMEOUT
  991 static struct device_attribute dev_attr_fail_timeout =
  992         __ATTR(io-timeout-fail,  S_IRUGO|S_IWUSR, part_timeout_show,
  993                 part_timeout_store);
  994 #endif
  995 
  996 static struct attribute *disk_attrs[] = {
  997         &dev_attr_range.attr,
  998         &dev_attr_ext_range.attr,
  999         &dev_attr_removable.attr,
 1000         &dev_attr_ro.attr,
 1001         &dev_attr_size.attr,
 1002         &dev_attr_alignment_offset.attr,
 1003         &dev_attr_discard_alignment.attr,
 1004         &dev_attr_capability.attr,
 1005         &dev_attr_stat.attr,
 1006         &dev_attr_inflight.attr,
 1007 #ifdef CONFIG_FAIL_MAKE_REQUEST
 1008         &dev_attr_fail.attr,
 1009 #endif
 1010 #ifdef CONFIG_FAIL_IO_TIMEOUT
 1011         &dev_attr_fail_timeout.attr,
 1012 #endif
 1013         NULL
 1014 };
 1015 
 1016 static struct attribute_group disk_attr_group = {
 1017         .attrs = disk_attrs,
 1018 };
 1019 
 1020 static const struct attribute_group *disk_attr_groups[] = {
 1021         &disk_attr_group,
 1022         NULL
 1023 };
 1024 
 1025 /**
 1026  * disk_replace_part_tbl - replace disk->part_tbl in RCU-safe way
 1027  * @disk: disk to replace part_tbl for
 1028  * @new_ptbl: new part_tbl to install
 1029  *
 1030  * Replace disk->part_tbl with @new_ptbl in RCU-safe way.  The
 1031  * original ptbl is freed using RCU callback.
 1032  *
 1033  * LOCKING:
 1034  * Matching bd_mutx locked.
 1035  */
 1036 static void disk_replace_part_tbl(struct gendisk *disk,
 1037                                   struct disk_part_tbl *new_ptbl)
 1038 {
 1039         struct disk_part_tbl *old_ptbl = disk->part_tbl;
 1040 
 1041         rcu_assign_pointer(disk->part_tbl, new_ptbl);
 1042 
 1043         if (old_ptbl) {
 1044                 rcu_assign_pointer(old_ptbl->last_lookup, NULL);
 1045                 kfree_rcu(old_ptbl, rcu_head);
 1046         }
 1047 }
 1048 
 1049 /**
 1050  * disk_expand_part_tbl - expand disk->part_tbl
 1051  * @disk: disk to expand part_tbl for
 1052  * @partno: expand such that this partno can fit in
 1053  *
 1054  * Expand disk->part_tbl such that @partno can fit in.  disk->part_tbl
 1055  * uses RCU to allow unlocked dereferencing for stats and other stuff.
 1056  *
 1057  * LOCKING:
 1058  * Matching bd_mutex locked, might sleep.
 1059  *
 1060  * RETURNS:
 1061  * 0 on success, -errno on failure.
 1062  */
 1063 int disk_expand_part_tbl(struct gendisk *disk, int partno)
 1064 {
 1065         struct disk_part_tbl *old_ptbl = disk->part_tbl;
 1066         struct disk_part_tbl *new_ptbl;
 1067         int len = old_ptbl ? old_ptbl->len : 0;
 1068         int target = partno + 1;
 1069         size_t size;
 1070         int i;
 1071 
 1072         /* disk_max_parts() is zero during initialization, ignore if so */
 1073         if (disk_max_parts(disk) && target > disk_max_parts(disk))
 1074                 return -EINVAL;
 1075 
 1076         if (target <= len)
 1077                 return 0;
 1078 
 1079         size = sizeof(*new_ptbl) + target * sizeof(new_ptbl->part[0]);
 1080         new_ptbl = kzalloc_node(size, GFP_KERNEL, disk->node_id);
 1081         if (!new_ptbl)
 1082                 return -ENOMEM;
 1083 
 1084         new_ptbl->len = target;
 1085 
 1086         for (i = 0; i < len; i++)
 1087                 rcu_assign_pointer(new_ptbl->part[i], old_ptbl->part[i]);
 1088 
 1089         disk_replace_part_tbl(disk, new_ptbl);
 1090         return 0;
 1091 }
 1092 
 1093 static void disk_release(struct device *dev)
 1094 {
 1095         struct gendisk *disk = dev_to_disk(dev);
 1096 
 1097         disk_release_events(disk);
 1098         kfree(disk->random);
 1099         disk_replace_part_tbl(disk, NULL);
 1100         free_part_stats(&disk->part0);
 1101         free_part_info(&disk->part0);
 1102         if (disk->queue)
 1103                 blk_put_queue(disk->queue);
 1104         kfree(disk);
 1105 }
 1106 struct class block_class = {
 1107         .name           = "block",
 1108 };
 1109 
 1110 static char *block_devnode(struct device *dev, umode_t *mode)
 1111 {
 1112         struct gendisk *disk = dev_to_disk(dev);
 1113 
 1114         if (disk->devnode)
 1115                 return disk->devnode(disk, mode);
 1116         return NULL;
 1117 }
 1118 
 1119 static struct device_type disk_type = {
 1120         .name           = "disk",
 1121         .groups         = disk_attr_groups,
 1122         .release        = disk_release,
 1123         .devnode        = block_devnode,
 1124 };
 1125 
 1126 #ifdef CONFIG_PROC_FS
 1127 /*
 1128  * aggregate disk stat collector.  Uses the same stats that the sysfs
 1129  * entries do, above, but makes them available through one seq_file.
 1130  *
 1131  * The output looks suspiciously like /proc/partitions with a bunch of
 1132  * extra fields.
 1133  */
 1134 static int diskstats_show(struct seq_file *seqf, void *v)
 1135 {
 1136         struct gendisk *gp = v;
 1137         struct disk_part_iter piter;
 1138         struct hd_struct *hd;
 1139         char buf[BDEVNAME_SIZE];
 1140         int cpu;
 1141 
 1142         /*
 1143         if (&disk_to_dev(gp)->kobj.entry == block_class.devices.next)
 1144                 seq_puts(seqf,  "major minor name"
 1145                                 "     rio rmerge rsect ruse wio wmerge "
 1146                                 "wsect wuse running use aveq"
 1147                                 "\n\n");
 1148         */
 1149 
 1150         disk_part_iter_init(&piter, gp, DISK_PITER_INCL_EMPTY_PART0);
 1151         while ((hd = disk_part_iter_next(&piter))) {
 1152                 cpu = part_stat_lock();
 1153                 part_round_stats(cpu, hd);
 1154                 part_stat_unlock();
 1155                 seq_printf(seqf, "%4d %7d %s %lu %lu %lu "
 1156                            "%u %lu %lu %lu %u %u %u %u\n",
 1157                            MAJOR(part_devt(hd)), MINOR(part_devt(hd)),
 1158                            disk_name(gp, hd->partno, buf),
 1159                            part_stat_read(hd, ios[READ]),
 1160                            part_stat_read(hd, merges[READ]),
 1161                            part_stat_read(hd, sectors[READ]),
 1162                            jiffies_to_msecs(part_stat_read(hd, ticks[READ])),
 1163                            part_stat_read(hd, ios[WRITE]),
 1164                            part_stat_read(hd, merges[WRITE]),
 1165                            part_stat_read(hd, sectors[WRITE]),
 1166                            jiffies_to_msecs(part_stat_read(hd, ticks[WRITE])),
 1167                            part_in_flight(hd),
 1168                            jiffies_to_msecs(part_stat_read(hd, io_ticks)),
 1169                            jiffies_to_msecs(part_stat_read(hd, time_in_queue))
 1170                         );
 1171         }
 1172         disk_part_iter_exit(&piter);
 1173 
 1174         return 0;
 1175 }
 1176 
 1177 static const struct seq_operations diskstats_op = {
 1178         .start  = disk_seqf_start,
 1179         .next   = disk_seqf_next,
 1180         .stop   = disk_seqf_stop,
 1181         .show   = diskstats_show
 1182 };
 1183 
 1184 static int diskstats_open(struct inode *inode, struct file *file)
 1185 {
 1186         return seq_open(file, &diskstats_op);
 1187 }
 1188 
 1189 static const struct file_operations proc_diskstats_operations = {
 1190         .open           = diskstats_open,
 1191         .read           = seq_read,
 1192         .llseek         = seq_lseek,
 1193         .release        = seq_release,
 1194 };
 1195 
 1196 static int __init proc_genhd_init(void)
 1197 {
 1198         proc_create("diskstats", 0, NULL, &proc_diskstats_operations);
 1199         proc_create("partitions", 0, NULL, &proc_partitions_operations);
 1200         return 0;
 1201 }
 1202 module_init(proc_genhd_init);
 1203 #endif /* CONFIG_PROC_FS */
 1204 
 1205 dev_t blk_lookup_devt(const char *name, int partno)
 1206 {
 1207         dev_t devt = MKDEV(0, 0);
 1208         struct class_dev_iter iter;
 1209         struct device *dev;
 1210 
 1211         class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
 1212         while ((dev = class_dev_iter_next(&iter))) {
 1213                 struct gendisk *disk = dev_to_disk(dev);
 1214                 struct hd_struct *part;
 1215 
 1216                 if (strcmp(dev_name(dev), name))
 1217                         continue;
 1218 
 1219                 if (partno < disk->minors) {
 1220                         /* We need to return the right devno, even
 1221                          * if the partition doesn't exist yet.
 1222                          */
 1223                         devt = MKDEV(MAJOR(dev->devt),
 1224                                      MINOR(dev->devt) + partno);
 1225                         break;
 1226                 }
 1227                 part = disk_get_part(disk, partno);
 1228                 if (part) {
 1229                         devt = part_devt(part);
 1230                         disk_put_part(part);
 1231                         break;
 1232                 }
 1233                 disk_put_part(part);
 1234         }
 1235         class_dev_iter_exit(&iter);
 1236         return devt;
 1237 }
 1238 EXPORT_SYMBOL(blk_lookup_devt);
 1239 
 1240 struct gendisk *alloc_disk(int minors)
 1241 {
 1242         return alloc_disk_node(minors, NUMA_NO_NODE);
 1243 }
 1244 EXPORT_SYMBOL(alloc_disk);
 1245 
 1246 struct gendisk *alloc_disk_node(int minors, int node_id)
 1247 {
 1248         struct gendisk *disk;
 1249 
 1250         disk = kmalloc_node(sizeof(struct gendisk),
 1251                                 GFP_KERNEL | __GFP_ZERO, node_id);
 1252         if (disk) {
 1253                 if (!init_part_stats(&disk->part0)) {
 1254                         kfree(disk);
 1255                         return NULL;
 1256                 }
 1257                 disk->node_id = node_id;
 1258                 if (disk_expand_part_tbl(disk, 0)) {
 1259                         free_part_stats(&disk->part0);
 1260                         kfree(disk);
 1261                         return NULL;
 1262                 }
 1263                 disk->part_tbl->part[0] = &disk->part0;
 1264 
 1265                 /*
 1266                  * set_capacity() and get_capacity() currently don't use
 1267                  * seqcounter to read/update the part0->nr_sects. Still init
 1268                  * the counter as we can read the sectors in IO submission
 1269                  * patch using seqence counters.
 1270                  *
 1271                  * TODO: Ideally set_capacity() and get_capacity() should be
 1272                  * converted to make use of bd_mutex and sequence counters.
 1273                  */
 1274                 seqcount_init(&disk->part0.nr_sects_seq);
 1275                 hd_ref_init(&disk->part0);
 1276 
 1277                 disk->minors = minors;
 1278                 rand_initialize_disk(disk);
 1279                 disk_to_dev(disk)->class = &block_class;
 1280                 disk_to_dev(disk)->type = &disk_type;
 1281                 device_initialize(disk_to_dev(disk));
 1282         }
 1283         return disk;
 1284 }
 1285 EXPORT_SYMBOL(alloc_disk_node);
 1286 
 1287 struct kobject *get_disk(struct gendisk *disk)
 1288 {
 1289         struct module *owner;
 1290         struct kobject *kobj;
 1291 
 1292         if (!disk->fops)
 1293                 return NULL;
 1294         owner = disk->fops->owner;
 1295         if (owner && !try_module_get(owner))
 1296                 return NULL;
 1297         kobj = kobject_get(&disk_to_dev(disk)->kobj);
 1298         if (kobj == NULL) {
 1299                 module_put(owner);
 1300                 return NULL;
 1301         }
 1302         return kobj;
 1303 
 1304 }
 1305 
 1306 EXPORT_SYMBOL(get_disk);
 1307 
 1308 void put_disk(struct gendisk *disk)
 1309 {
 1310         if (disk)
 1311                 kobject_put(&disk_to_dev(disk)->kobj);
 1312 }
 1313 
 1314 EXPORT_SYMBOL(put_disk);
 1315 
 1316 static void set_disk_ro_uevent(struct gendisk *gd, int ro)
 1317 {
 1318         char event[] = "DISK_RO=1";
 1319         char *envp[] = { event, NULL };
 1320 
 1321         if (!ro)
 1322                 event[8] = '';
 1323         kobject_uevent_env(&disk_to_dev(gd)->kobj, KOBJ_CHANGE, envp);
 1324 }
 1325 
 1326 void set_device_ro(struct block_device *bdev, int flag)
 1327 {
 1328         bdev->bd_part->policy = flag;
 1329 }
 1330 
 1331 EXPORT_SYMBOL(set_device_ro);
 1332 
 1333 void set_disk_ro(struct gendisk *disk, int flag)
 1334 {
 1335         struct disk_part_iter piter;
 1336         struct hd_struct *part;
 1337 
 1338         if (disk->part0.policy != flag) {
 1339                 set_disk_ro_uevent(disk, flag);
 1340                 disk->part0.policy = flag;
 1341         }
 1342 
 1343         disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY);
 1344         while ((part = disk_part_iter_next(&piter)))
 1345                 part->policy = flag;
 1346         disk_part_iter_exit(&piter);
 1347 }
 1348 
 1349 EXPORT_SYMBOL(set_disk_ro);
 1350 
 1351 int bdev_read_only(struct block_device *bdev)
 1352 {
 1353         if (!bdev)
 1354                 return 0;
 1355         return bdev->bd_part->policy;
 1356 }
 1357 
 1358 EXPORT_SYMBOL(bdev_read_only);
 1359 
 1360 int invalidate_partition(struct gendisk *disk, int partno)
 1361 {
 1362         int res = 0;
 1363         struct block_device *bdev = bdget_disk(disk, partno);
 1364         if (bdev) {
 1365                 fsync_bdev(bdev);
 1366                 res = __invalidate_device(bdev, true);
 1367                 bdput(bdev);
 1368         }
 1369         return res;
 1370 }
 1371 
 1372 EXPORT_SYMBOL(invalidate_partition);
 1373 
 1374 /*
 1375  * Disk events - monitor disk events like media change and eject request.
 1376  */
 1377 struct disk_events {
 1378         struct list_head        node;           /* all disk_event's */
 1379         struct gendisk          *disk;          /* the associated disk */
 1380         spinlock_t              lock;
 1381 
 1382         struct mutex            block_mutex;    /* protects blocking */
 1383         int                     block;          /* event blocking depth */
 1384         unsigned int            pending;        /* events already sent out */
 1385         unsigned int            clearing;       /* events being cleared */
 1386 
 1387         long                    poll_msecs;     /* interval, -1 for default */
 1388         struct delayed_work     dwork;
 1389 };
 1390 
 1391 static const char *disk_events_strs[] = {
 1392         [ilog2(DISK_EVENT_MEDIA_CHANGE)]        = "media_change",
 1393         [ilog2(DISK_EVENT_EJECT_REQUEST)]       = "eject_request",
 1394 };
 1395 
 1396 static char *disk_uevents[] = {
 1397         [ilog2(DISK_EVENT_MEDIA_CHANGE)]        = "DISK_MEDIA_CHANGE=1",
 1398         [ilog2(DISK_EVENT_EJECT_REQUEST)]       = "DISK_EJECT_REQUEST=1",
 1399 };
 1400 
 1401 /* list of all disk_events */
 1402 static DEFINE_MUTEX(disk_events_mutex);
 1403 static LIST_HEAD(disk_events);
 1404 
 1405 /* disable in-kernel polling by default */
 1406 static unsigned long disk_events_dfl_poll_msecs = 0;
 1407 
 1408 static unsigned long disk_events_poll_jiffies(struct gendisk *disk)
 1409 {
 1410         struct disk_events *ev = disk->ev;
 1411         long intv_msecs = 0;
 1412 
 1413         /*
 1414          * If device-specific poll interval is set, always use it.  If
 1415          * the default is being used, poll iff there are events which
 1416          * can't be monitored asynchronously.
 1417          */
 1418         if (ev->poll_msecs >= 0)
 1419                 intv_msecs = ev->poll_msecs;
 1420         else if (disk->events & ~disk->async_events)
 1421                 intv_msecs = disk_events_dfl_poll_msecs;
 1422 
 1423         return msecs_to_jiffies(intv_msecs);
 1424 }
 1425 
 1426 /**
 1427  * disk_block_events - block and flush disk event checking
 1428  * @disk: disk to block events for
 1429  *
 1430  * On return from this function, it is guaranteed that event checking
 1431  * isn't in progress and won't happen until unblocked by
 1432  * disk_unblock_events().  Events blocking is counted and the actual
 1433  * unblocking happens after the matching number of unblocks are done.
 1434  *
 1435  * Note that this intentionally does not block event checking from
 1436  * disk_clear_events().
 1437  *
 1438  * CONTEXT:
 1439  * Might sleep.
 1440  */
 1441 void disk_block_events(struct gendisk *disk)
 1442 {
 1443         struct disk_events *ev = disk->ev;
 1444         unsigned long flags;
 1445         bool cancel;
 1446 
 1447         if (!ev)
 1448                 return;
 1449 
 1450         /*
 1451          * Outer mutex ensures that the first blocker completes canceling
 1452          * the event work before further blockers are allowed to finish.
 1453          */
 1454         mutex_lock(&ev->block_mutex);
 1455 
 1456         spin_lock_irqsave(&ev->lock, flags);
 1457         cancel = !ev->block++;
 1458         spin_unlock_irqrestore(&ev->lock, flags);
 1459 
 1460         if (cancel)
 1461                 cancel_delayed_work_sync(&disk->ev->dwork);
 1462 
 1463         mutex_unlock(&ev->block_mutex);
 1464 }
 1465 
 1466 static void __disk_unblock_events(struct gendisk *disk, bool check_now)
 1467 {
 1468         struct disk_events *ev = disk->ev;
 1469         unsigned long intv;
 1470         unsigned long flags;
 1471 
 1472         spin_lock_irqsave(&ev->lock, flags);
 1473 
 1474         if (WARN_ON_ONCE(ev->block <= 0))
 1475                 goto out_unlock;
 1476 
 1477         if (--ev->block)
 1478                 goto out_unlock;
 1479 
 1480         /*
 1481          * Not exactly a latency critical operation, set poll timer
 1482          * slack to 25% and kick event check.
 1483          */
 1484         intv = disk_events_poll_jiffies(disk);
 1485         set_timer_slack(&ev->dwork.timer, intv / 4);
 1486         if (check_now)
 1487                 queue_delayed_work(system_freezable_wq, &ev->dwork, 0);
 1488         else if (intv)
 1489                 queue_delayed_work(system_freezable_wq, &ev->dwork, intv);
 1490 out_unlock:
 1491         spin_unlock_irqrestore(&ev->lock, flags);
 1492 }
 1493 
 1494 /**
 1495  * disk_unblock_events - unblock disk event checking
 1496  * @disk: disk to unblock events for
 1497  *
 1498  * Undo disk_block_events().  When the block count reaches zero, it
 1499  * starts events polling if configured.
 1500  *
 1501  * CONTEXT:
 1502  * Don't care.  Safe to call from irq context.
 1503  */
 1504 void disk_unblock_events(struct gendisk *disk)
 1505 {
 1506         if (disk->ev)
 1507                 __disk_unblock_events(disk, false);
 1508 }
 1509 
 1510 /**
 1511  * disk_flush_events - schedule immediate event checking and flushing
 1512  * @disk: disk to check and flush events for
 1513  * @mask: events to flush
 1514  *
 1515  * Schedule immediate event checking on @disk if not blocked.  Events in
 1516  * @mask are scheduled to be cleared from the driver.  Note that this
 1517  * doesn't clear the events from @disk->ev.
 1518  *
 1519  * CONTEXT:
 1520  * If @mask is non-zero must be called with bdev->bd_mutex held.
 1521  */
 1522 void disk_flush_events(struct gendisk *disk, unsigned int mask)
 1523 {
 1524         struct disk_events *ev = disk->ev;
 1525 
 1526         if (!ev)
 1527                 return;
 1528 
 1529         spin_lock_irq(&ev->lock);
 1530         ev->clearing |= mask;
 1531         if (!ev->block)
 1532                 mod_delayed_work(system_freezable_wq, &ev->dwork, 0);
 1533         spin_unlock_irq(&ev->lock);
 1534 }
 1535 
 1536 /**
 1537  * disk_clear_events - synchronously check, clear and return pending events
 1538  * @disk: disk to fetch and clear events from
 1539  * @mask: mask of events to be fetched and clearted
 1540  *
 1541  * Disk events are synchronously checked and pending events in @mask
 1542  * are cleared and returned.  This ignores the block count.
 1543  *
 1544  * CONTEXT:
 1545  * Might sleep.
 1546  */
 1547 unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask)
 1548 {
 1549         const struct block_device_operations *bdops = disk->fops;
 1550         struct disk_events *ev = disk->ev;
 1551         unsigned int pending;
 1552 
 1553         if (!ev) {
 1554                 /* for drivers still using the old ->media_changed method */
 1555                 if ((mask & DISK_EVENT_MEDIA_CHANGE) &&
 1556                     bdops->media_changed && bdops->media_changed(disk))
 1557                         return DISK_EVENT_MEDIA_CHANGE;
 1558                 return 0;
 1559         }
 1560 
 1561         /* tell the workfn about the events being cleared */
 1562         spin_lock_irq(&ev->lock);
 1563         ev->clearing |= mask;
 1564         spin_unlock_irq(&ev->lock);
 1565 
 1566         /* uncondtionally schedule event check and wait for it to finish */
 1567         disk_block_events(disk);
 1568         queue_delayed_work(system_freezable_wq, &ev->dwork, 0);
 1569         flush_delayed_work(&ev->dwork);
 1570         __disk_unblock_events(disk, false);
 1571 
 1572         /* then, fetch and clear pending events */
 1573         spin_lock_irq(&ev->lock);
 1574         WARN_ON_ONCE(ev->clearing & mask);      /* cleared by workfn */
 1575         pending = ev->pending & mask;
 1576         ev->pending &= ~mask;
 1577         spin_unlock_irq(&ev->lock);
 1578 
 1579         return pending;
 1580 }
 1581 
 1582 static void disk_events_workfn(struct work_struct *work)
 1583 {
 1584         struct delayed_work *dwork = to_delayed_work(work);
 1585         struct disk_events *ev = container_of(dwork, struct disk_events, dwork);
 1586         struct gendisk *disk = ev->disk;
 1587         char *envp[ARRAY_SIZE(disk_uevents) + 1] = { };
 1588         unsigned int clearing = ev->clearing;
 1589         unsigned int events;
 1590         unsigned long intv;
 1591         int nr_events = 0, i;
 1592 
 1593         /* check events */
 1594         events = disk->fops->check_events(disk, clearing);
 1595 
 1596         /* accumulate pending events and schedule next poll if necessary */
 1597         spin_lock_irq(&ev->lock);
 1598 
 1599         events &= ~ev->pending;
 1600         ev->pending |= events;
 1601         ev->clearing &= ~clearing;
 1602 
 1603         intv = disk_events_poll_jiffies(disk);
 1604         if (!ev->block && intv)
 1605                 queue_delayed_work(system_freezable_wq, &ev->dwork, intv);
 1606 
 1607         spin_unlock_irq(&ev->lock);
 1608 
 1609         /*
 1610          * Tell userland about new events.  Only the events listed in
 1611          * @disk->events are reported.  Unlisted events are processed the
 1612          * same internally but never get reported to userland.
 1613          */
 1614         for (i = 0; i < ARRAY_SIZE(disk_uevents); i++)
 1615                 if (events & disk->events & (1 << i))
 1616                         envp[nr_events++] = disk_uevents[i];
 1617 
 1618         if (nr_events)
 1619                 kobject_uevent_env(&disk_to_dev(disk)->kobj, KOBJ_CHANGE, envp);
 1620 }
 1621 
 1622 /*
 1623  * A disk events enabled device has the following sysfs nodes under
 1624  * its /sys/block/X/ directory.
 1625  *
 1626  * events               : list of all supported events
 1627  * events_async         : list of events which can be detected w/o polling
 1628  * events_poll_msecs    : polling interval, 0: disable, -1: system default
 1629  */
 1630 static ssize_t __disk_events_show(unsigned int events, char *buf)
 1631 {
 1632         const char *delim = "";
 1633         ssize_t pos = 0;
 1634         int i;
 1635 
 1636         for (i = 0; i < ARRAY_SIZE(disk_events_strs); i++)
 1637                 if (events & (1 << i)) {
 1638                         pos += sprintf(buf + pos, "%s%s",
 1639                                        delim, disk_events_strs[i]);
 1640                         delim = " ";
 1641                 }
 1642         if (pos)
 1643                 pos += sprintf(buf + pos, "\n");
 1644         return pos;
 1645 }
 1646 
 1647 static ssize_t disk_events_show(struct device *dev,
 1648                                 struct device_attribute *attr, char *buf)
 1649 {
 1650         struct gendisk *disk = dev_to_disk(dev);
 1651 
 1652         return __disk_events_show(disk->events, buf);
 1653 }
 1654 
 1655 static ssize_t disk_events_async_show(struct device *dev,
 1656                                       struct device_attribute *attr, char *buf)
 1657 {
 1658         struct gendisk *disk = dev_to_disk(dev);
 1659 
 1660         return __disk_events_show(disk->async_events, buf);
 1661 }
 1662 
 1663 static ssize_t disk_events_poll_msecs_show(struct device *dev,
 1664                                            struct device_attribute *attr,
 1665                                            char *buf)
 1666 {
 1667         struct gendisk *disk = dev_to_disk(dev);
 1668 
 1669         return sprintf(buf, "%ld\n", disk->ev->poll_msecs);
 1670 }
 1671 
 1672 static ssize_t disk_events_poll_msecs_store(struct device *dev,
 1673                                             struct device_attribute *attr,
 1674                                             const char *buf, size_t count)
 1675 {
 1676         struct gendisk *disk = dev_to_disk(dev);
 1677         long intv;
 1678 
 1679         if (!count || !sscanf(buf, "%ld", &intv))
 1680                 return -EINVAL;
 1681 
 1682         if (intv < 0 && intv != -1)
 1683                 return -EINVAL;
 1684 
 1685         disk_block_events(disk);
 1686         disk->ev->poll_msecs = intv;
 1687         __disk_unblock_events(disk, true);
 1688 
 1689         return count;
 1690 }
 1691 
 1692 static const DEVICE_ATTR(events, S_IRUGO, disk_events_show, NULL);
 1693 static const DEVICE_ATTR(events_async, S_IRUGO, disk_events_async_show, NULL);
 1694 static const DEVICE_ATTR(events_poll_msecs, S_IRUGO|S_IWUSR,
 1695                          disk_events_poll_msecs_show,
 1696                          disk_events_poll_msecs_store);
 1697 
 1698 static const struct attribute *disk_events_attrs[] = {
 1699         &dev_attr_events.attr,
 1700         &dev_attr_events_async.attr,
 1701         &dev_attr_events_poll_msecs.attr,
 1702         NULL,
 1703 };
 1704 
 1705 /*
 1706  * The default polling interval can be specified by the kernel
 1707  * parameter block.events_dfl_poll_msecs which defaults to 0
 1708  * (disable).  This can also be modified runtime by writing to
 1709  * /sys/module/block/events_dfl_poll_msecs.
 1710  */
 1711 static int disk_events_set_dfl_poll_msecs(const char *val,
 1712                                           const struct kernel_param *kp)
 1713 {
 1714         struct disk_events *ev;
 1715         int ret;
 1716 
 1717         ret = param_set_ulong(val, kp);
 1718         if (ret < 0)
 1719                 return ret;
 1720 
 1721         mutex_lock(&disk_events_mutex);
 1722 
 1723         list_for_each_entry(ev, &disk_events, node)
 1724                 disk_flush_events(ev->disk, 0);
 1725 
 1726         mutex_unlock(&disk_events_mutex);
 1727 
 1728         return 0;
 1729 }
 1730 
 1731 static const struct kernel_param_ops disk_events_dfl_poll_msecs_param_ops = {
 1732         .set    = disk_events_set_dfl_poll_msecs,
 1733         .get    = param_get_ulong,
 1734 };
 1735 
 1736 #undef MODULE_PARAM_PREFIX
 1737 #define MODULE_PARAM_PREFIX     "block."
 1738 
 1739 module_param_cb(events_dfl_poll_msecs, &disk_events_dfl_poll_msecs_param_ops,
 1740                 &disk_events_dfl_poll_msecs, 0644);
 1741 
 1742 /*
 1743  * disk_{alloc|add|del|release}_events - initialize and destroy disk_events.
 1744  */
 1745 static void disk_alloc_events(struct gendisk *disk)
 1746 {
 1747         struct disk_events *ev;
 1748 
 1749         if (!disk->fops->check_events)
 1750                 return;
 1751 
 1752         ev = kzalloc(sizeof(*ev), GFP_KERNEL);
 1753         if (!ev) {
 1754                 pr_warn("%s: failed to initialize events\n", disk->disk_name);
 1755                 return;
 1756         }
 1757 
 1758         INIT_LIST_HEAD(&ev->node);
 1759         ev->disk = disk;
 1760         spin_lock_init(&ev->lock);
 1761         mutex_init(&ev->block_mutex);
 1762         ev->block = 1;
 1763         ev->poll_msecs = -1;
 1764         INIT_DELAYED_WORK(&ev->dwork, disk_events_workfn);
 1765 
 1766         disk->ev = ev;
 1767 }
 1768 
 1769 static void disk_add_events(struct gendisk *disk)
 1770 {
 1771         if (!disk->ev)
 1772                 return;
 1773 
 1774         /* FIXME: error handling */
 1775         if (sysfs_create_files(&disk_to_dev(disk)->kobj, disk_events_attrs) < 0)
 1776                 pr_warn("%s: failed to create sysfs files for events\n",
 1777                         disk->disk_name);
 1778 
 1779         mutex_lock(&disk_events_mutex);
 1780         list_add_tail(&disk->ev->node, &disk_events);
 1781         mutex_unlock(&disk_events_mutex);
 1782 
 1783         /*
 1784          * Block count is initialized to 1 and the following initial
 1785          * unblock kicks it into action.
 1786          */
 1787         __disk_unblock_events(disk, true);
 1788 }
 1789 
 1790 static void disk_del_events(struct gendisk *disk)
 1791 {
 1792         if (!disk->ev)
 1793                 return;
 1794 
 1795         disk_block_events(disk);
 1796 
 1797         mutex_lock(&disk_events_mutex);
 1798         list_del_init(&disk->ev->node);
 1799         mutex_unlock(&disk_events_mutex);
 1800 
 1801         sysfs_remove_files(&disk_to_dev(disk)->kobj, disk_events_attrs);
 1802 }
 1803 
 1804 static void disk_release_events(struct gendisk *disk)
 1805 {
 1806         /* the block count should be 1 from disk_del_events() */
 1807         WARN_ON_ONCE(disk->ev && disk->ev->block != 1);
 1808         kfree(disk->ev);
 1809 }

Cache object: 875e9939e33c25e0b43cbf7afd0ff7a3


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.