The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/dev/nvme/nvme_ns.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (C) 2012-2013 Intel Corporation
    3  * All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  *
   14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   24  * SUCH DAMAGE.
   25  */
   26 
   27 #include <sys/cdefs.h>
   28 __FBSDID("$FreeBSD$");
   29 
   30 #include <sys/param.h>
   31 #include <sys/bio.h>
   32 #include <sys/bus.h>
   33 #include <sys/conf.h>
   34 #include <sys/disk.h>
   35 #include <sys/fcntl.h>
   36 #include <sys/ioccom.h>
   37 #include <sys/malloc.h>
   38 #include <sys/module.h>
   39 #include <sys/proc.h>
   40 #include <sys/systm.h>
   41 
   42 #include <dev/pci/pcivar.h>
   43 
   44 #include <geom/geom.h>
   45 
   46 #include "nvme_private.h"
   47 
   48 static void             nvme_bio_child_inbed(struct bio *parent, int bio_error);
   49 static void             nvme_bio_child_done(void *arg,
   50                                             const struct nvme_completion *cpl);
   51 static uint32_t         nvme_get_num_segments(uint64_t addr, uint64_t size,
   52                                               uint32_t alignment);
   53 static void             nvme_free_child_bios(int num_bios,
   54                                              struct bio **child_bios);
   55 static struct bio **    nvme_allocate_child_bios(int num_bios);
   56 static struct bio **    nvme_construct_child_bios(struct bio *bp,
   57                                                   uint32_t alignment,
   58                                                   int *num_bios);
   59 static int              nvme_ns_split_bio(struct nvme_namespace *ns,
   60                                           struct bio *bp,
   61                                           uint32_t alignment);
   62 
   63 static int
   64 nvme_ns_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int flag,
   65     struct thread *td)
   66 {
   67         struct nvme_namespace                   *ns;
   68         struct nvme_controller                  *ctrlr;
   69         struct nvme_pt_command                  *pt;
   70 
   71         ns = cdev->si_drv1;
   72         ctrlr = ns->ctrlr;
   73 
   74         switch (cmd) {
   75         case NVME_IO_TEST:
   76         case NVME_BIO_TEST:
   77                 nvme_ns_test(ns, cmd, arg);
   78                 break;
   79         case NVME_PASSTHROUGH_CMD:
   80                 pt = (struct nvme_pt_command *)arg;
   81                 return (nvme_ctrlr_passthrough_cmd(ctrlr, pt, ns->id, 
   82                     1 /* is_user_buffer */, 0 /* is_admin_cmd */));
   83         case NVME_GET_NSID:
   84         {
   85                 struct nvme_get_nsid *gnsid = (struct nvme_get_nsid *)arg;
   86                 strncpy(gnsid->cdev, device_get_nameunit(ctrlr->dev),
   87                     sizeof(gnsid->cdev));
   88                 gnsid->nsid = ns->id;
   89                 break;
   90         }
   91         case DIOCGMEDIASIZE:
   92                 *(off_t *)arg = (off_t)nvme_ns_get_size(ns);
   93                 break;
   94         case DIOCGSECTORSIZE:
   95                 *(u_int *)arg = nvme_ns_get_sector_size(ns);
   96                 break;
   97         default:
   98                 return (ENOTTY);
   99         }
  100 
  101         return (0);
  102 }
  103 
  104 static int
  105 nvme_ns_open(struct cdev *dev __unused, int flags, int fmt __unused,
  106     struct thread *td)
  107 {
  108         int error = 0;
  109 
  110         if (flags & FWRITE)
  111                 error = securelevel_gt(td->td_ucred, 0);
  112 
  113         return (error);
  114 }
  115 
  116 static int
  117 nvme_ns_close(struct cdev *dev __unused, int flags, int fmt __unused,
  118     struct thread *td)
  119 {
  120 
  121         return (0);
  122 }
  123 
  124 static void
  125 nvme_ns_strategy_done(void *arg, const struct nvme_completion *cpl)
  126 {
  127         struct bio *bp = arg;
  128 
  129         /*
  130          * TODO: add more extensive translation of NVMe status codes
  131          *  to different bio error codes (i.e. EIO, EINVAL, etc.)
  132          */
  133         if (nvme_completion_is_error(cpl)) {
  134                 bp->bio_error = EIO;
  135                 bp->bio_flags |= BIO_ERROR;
  136                 bp->bio_resid = bp->bio_bcount;
  137         } else
  138                 bp->bio_resid = 0;
  139 
  140         biodone(bp);
  141 }
  142 
  143 static void
  144 nvme_ns_strategy(struct bio *bp)
  145 {
  146         struct nvme_namespace   *ns;
  147         int                     err;
  148 
  149         ns = bp->bio_dev->si_drv1;
  150         err = nvme_ns_bio_process(ns, bp, nvme_ns_strategy_done);
  151 
  152         if (err) {
  153                 bp->bio_error = err;
  154                 bp->bio_flags |= BIO_ERROR;
  155                 bp->bio_resid = bp->bio_bcount;
  156                 biodone(bp);
  157         }
  158 
  159 }
  160 
  161 static struct cdevsw nvme_ns_cdevsw = {
  162         .d_version =    D_VERSION,
  163         .d_flags =      D_DISK,
  164         .d_read =       physread,
  165         .d_write =      physwrite,
  166         .d_open =       nvme_ns_open,
  167         .d_close =      nvme_ns_close,
  168         .d_strategy =   nvme_ns_strategy,
  169         .d_ioctl =      nvme_ns_ioctl
  170 };
  171 
  172 uint32_t
  173 nvme_ns_get_max_io_xfer_size(struct nvme_namespace *ns)
  174 {
  175         return ns->ctrlr->max_xfer_size;
  176 }
  177 
  178 uint32_t
  179 nvme_ns_get_sector_size(struct nvme_namespace *ns)
  180 {
  181         return (1 << ns->data.lbaf[ns->data.flbas.format].lbads);
  182 }
  183 
  184 uint64_t
  185 nvme_ns_get_num_sectors(struct nvme_namespace *ns)
  186 {
  187         return (ns->data.nsze);
  188 }
  189 
  190 uint64_t
  191 nvme_ns_get_size(struct nvme_namespace *ns)
  192 {
  193         return (nvme_ns_get_num_sectors(ns) * nvme_ns_get_sector_size(ns));
  194 }
  195 
  196 uint32_t
  197 nvme_ns_get_flags(struct nvme_namespace *ns)
  198 {
  199         return (ns->flags);
  200 }
  201 
  202 const char *
  203 nvme_ns_get_serial_number(struct nvme_namespace *ns)
  204 {
  205         return ((const char *)ns->ctrlr->cdata.sn);
  206 }
  207 
  208 const char *
  209 nvme_ns_get_model_number(struct nvme_namespace *ns)
  210 {
  211         return ((const char *)ns->ctrlr->cdata.mn);
  212 }
  213 
  214 const struct nvme_namespace_data *
  215 nvme_ns_get_data(struct nvme_namespace *ns)
  216 {
  217 
  218         return (&ns->data);
  219 }
  220 
  221 uint32_t
  222 nvme_ns_get_stripesize(struct nvme_namespace *ns)
  223 {
  224 
  225         return (ns->stripesize);
  226 }
  227 
  228 static void
  229 nvme_ns_bio_done(void *arg, const struct nvme_completion *status)
  230 {
  231         struct bio      *bp = arg;
  232         nvme_cb_fn_t    bp_cb_fn;
  233 
  234         bp_cb_fn = bp->bio_driver1;
  235 
  236         if (bp->bio_driver2)
  237                 free(bp->bio_driver2, M_NVME);
  238 
  239         if (nvme_completion_is_error(status)) {
  240                 bp->bio_flags |= BIO_ERROR;
  241                 if (bp->bio_error == 0)
  242                         bp->bio_error = EIO;
  243         }
  244 
  245         if ((bp->bio_flags & BIO_ERROR) == 0)
  246                 bp->bio_resid = 0;
  247         else
  248                 bp->bio_resid = bp->bio_bcount;
  249 
  250         bp_cb_fn(bp, status);
  251 }
  252 
  253 static void
  254 nvme_bio_child_inbed(struct bio *parent, int bio_error)
  255 {
  256         struct nvme_completion  parent_cpl;
  257         int                     children, inbed;
  258 
  259         if (bio_error != 0) {
  260                 parent->bio_flags |= BIO_ERROR;
  261                 parent->bio_error = bio_error;
  262         }
  263 
  264         /*
  265          * atomic_fetchadd will return value before adding 1, so we still
  266          *  must add 1 to get the updated inbed number.  Save bio_children
  267          *  before incrementing to guard against race conditions when
  268          *  two children bios complete on different queues.
  269          */
  270         children = atomic_load_acq_int(&parent->bio_children);
  271         inbed = atomic_fetchadd_int(&parent->bio_inbed, 1) + 1;
  272         if (inbed == children) {
  273                 bzero(&parent_cpl, sizeof(parent_cpl));
  274                 if (parent->bio_flags & BIO_ERROR)
  275                         parent_cpl.status.sc = NVME_SC_DATA_TRANSFER_ERROR;
  276                 nvme_ns_bio_done(parent, &parent_cpl);
  277         }
  278 }
  279 
  280 static void
  281 nvme_bio_child_done(void *arg, const struct nvme_completion *cpl)
  282 {
  283         struct bio              *child = arg;
  284         struct bio              *parent;
  285         int                     bio_error;
  286 
  287         parent = child->bio_parent;
  288         g_destroy_bio(child);
  289         bio_error = nvme_completion_is_error(cpl) ? EIO : 0;
  290         nvme_bio_child_inbed(parent, bio_error);
  291 }
  292 
  293 static uint32_t
  294 nvme_get_num_segments(uint64_t addr, uint64_t size, uint32_t align)
  295 {
  296         uint32_t        num_segs, offset, remainder;
  297 
  298         if (align == 0)
  299                 return (1);
  300 
  301         KASSERT((align & (align - 1)) == 0, ("alignment not power of 2\n"));
  302 
  303         num_segs = size / align;
  304         remainder = size & (align - 1);
  305         offset = addr & (align - 1);
  306         if (remainder > 0 || offset > 0)
  307                 num_segs += 1 + (remainder + offset - 1) / align;
  308         return (num_segs);
  309 }
  310 
  311 static void
  312 nvme_free_child_bios(int num_bios, struct bio **child_bios)
  313 {
  314         int i;
  315 
  316         for (i = 0; i < num_bios; i++) {
  317                 if (child_bios[i] != NULL)
  318                         g_destroy_bio(child_bios[i]);
  319         }
  320 
  321         free(child_bios, M_NVME);
  322 }
  323 
  324 static struct bio **
  325 nvme_allocate_child_bios(int num_bios)
  326 {
  327         struct bio **child_bios;
  328         int err = 0, i;
  329 
  330         child_bios = malloc(num_bios * sizeof(struct bio *), M_NVME, M_NOWAIT);
  331         if (child_bios == NULL)
  332                 return (NULL);
  333 
  334         for (i = 0; i < num_bios; i++) {
  335                 child_bios[i] = g_new_bio();
  336                 if (child_bios[i] == NULL)
  337                         err = ENOMEM;
  338         }
  339 
  340         if (err == ENOMEM) {
  341                 nvme_free_child_bios(num_bios, child_bios);
  342                 return (NULL);
  343         }
  344 
  345         return (child_bios);
  346 }
  347 
  348 static struct bio **
  349 nvme_construct_child_bios(struct bio *bp, uint32_t alignment, int *num_bios)
  350 {
  351         struct bio      **child_bios;
  352         struct bio      *child;
  353         uint64_t        cur_offset;
  354         caddr_t         data;
  355         uint32_t        rem_bcount;
  356         int             i;
  357         struct vm_page  **ma;
  358         uint32_t        ma_offset;
  359 
  360         *num_bios = nvme_get_num_segments(bp->bio_offset, bp->bio_bcount,
  361             alignment);
  362         child_bios = nvme_allocate_child_bios(*num_bios);
  363         if (child_bios == NULL)
  364                 return (NULL);
  365 
  366         bp->bio_children = *num_bios;
  367         bp->bio_inbed = 0;
  368         cur_offset = bp->bio_offset;
  369         rem_bcount = bp->bio_bcount;
  370         data = bp->bio_data;
  371         ma_offset = bp->bio_ma_offset;
  372         ma = bp->bio_ma;
  373 
  374         for (i = 0; i < *num_bios; i++) {
  375                 child = child_bios[i];
  376                 child->bio_parent = bp;
  377                 child->bio_cmd = bp->bio_cmd;
  378                 child->bio_offset = cur_offset;
  379                 child->bio_bcount = min(rem_bcount,
  380                     alignment - (cur_offset & (alignment - 1)));
  381                 child->bio_flags = bp->bio_flags;
  382                 if (bp->bio_flags & BIO_UNMAPPED) {
  383                         child->bio_ma_offset = ma_offset;
  384                         child->bio_ma = ma;
  385                         child->bio_ma_n =
  386                             nvme_get_num_segments(child->bio_ma_offset,
  387                                 child->bio_bcount, PAGE_SIZE);
  388                         ma_offset = (ma_offset + child->bio_bcount) &
  389                             PAGE_MASK;
  390                         ma += child->bio_ma_n;
  391                         if (ma_offset != 0)
  392                                 ma -= 1;
  393                 } else {
  394                         child->bio_data = data;
  395                         data += child->bio_bcount;
  396                 }
  397                 cur_offset += child->bio_bcount;
  398                 rem_bcount -= child->bio_bcount;
  399         }
  400 
  401         return (child_bios);
  402 }
  403 
  404 static int
  405 nvme_ns_split_bio(struct nvme_namespace *ns, struct bio *bp,
  406     uint32_t alignment)
  407 {
  408         struct bio      *child;
  409         struct bio      **child_bios;
  410         int             err, i, num_bios;
  411 
  412         child_bios = nvme_construct_child_bios(bp, alignment, &num_bios);
  413         if (child_bios == NULL)
  414                 return (ENOMEM);
  415 
  416         for (i = 0; i < num_bios; i++) {
  417                 child = child_bios[i];
  418                 err = nvme_ns_bio_process(ns, child, nvme_bio_child_done);
  419                 if (err != 0) {
  420                         nvme_bio_child_inbed(bp, err);
  421                         g_destroy_bio(child);
  422                 }
  423         }
  424 
  425         free(child_bios, M_NVME);
  426         return (0);
  427 }
  428 
  429 int
  430 nvme_ns_bio_process(struct nvme_namespace *ns, struct bio *bp,
  431         nvme_cb_fn_t cb_fn)
  432 {
  433         struct nvme_dsm_range   *dsm_range;
  434         uint32_t                num_bios;
  435         int                     err;
  436 
  437         bp->bio_driver1 = cb_fn;
  438 
  439         if (ns->stripesize > 0 &&
  440             (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE)) {
  441                 num_bios = nvme_get_num_segments(bp->bio_offset,
  442                     bp->bio_bcount, ns->stripesize);
  443                 if (num_bios > 1)
  444                         return (nvme_ns_split_bio(ns, bp, ns->stripesize));
  445         }
  446 
  447         switch (bp->bio_cmd) {
  448         case BIO_READ:
  449                 err = nvme_ns_cmd_read_bio(ns, bp, nvme_ns_bio_done, bp);
  450                 break;
  451         case BIO_WRITE:
  452                 err = nvme_ns_cmd_write_bio(ns, bp, nvme_ns_bio_done, bp);
  453                 break;
  454         case BIO_FLUSH:
  455                 err = nvme_ns_cmd_flush(ns, nvme_ns_bio_done, bp);
  456                 break;
  457         case BIO_DELETE:
  458                 dsm_range =
  459                     malloc(sizeof(struct nvme_dsm_range), M_NVME,
  460                     M_ZERO | M_WAITOK);
  461                 dsm_range->length =
  462                     bp->bio_bcount/nvme_ns_get_sector_size(ns);
  463                 dsm_range->starting_lba =
  464                     bp->bio_offset/nvme_ns_get_sector_size(ns);
  465                 bp->bio_driver2 = dsm_range;
  466                 err = nvme_ns_cmd_deallocate(ns, dsm_range, 1,
  467                         nvme_ns_bio_done, bp);
  468                 if (err != 0)
  469                         free(dsm_range, M_NVME);
  470                 break;
  471         default:
  472                 err = EIO;
  473                 break;
  474         }
  475 
  476         return (err);
  477 }
  478 
  479 int
  480 nvme_ns_ioctl_process(struct nvme_namespace *ns, u_long cmd, caddr_t arg,
  481     int flag, struct thread *td)
  482 {
  483         return (nvme_ns_ioctl(ns->cdev, cmd, arg, flag, td));
  484 }
  485 
  486 int
  487 nvme_ns_construct(struct nvme_namespace *ns, uint32_t id,
  488     struct nvme_controller *ctrlr)
  489 {
  490         struct make_dev_args                    md_args;
  491         struct nvme_completion_poll_status      status;
  492         int                                     res;
  493         int                                     unit;
  494 
  495         ns->ctrlr = ctrlr;
  496         ns->id = id;
  497         ns->stripesize = 0;
  498 
  499         /*
  500          * Older Intel devices advertise in vendor specific space an alignment
  501          * that improves performance.  If present use for the stripe size.  NVMe
  502          * 1.3 standardized this as NOIOB, and newer Intel drives use that.
  503          */
  504         switch (pci_get_devid(ctrlr->dev)) {
  505         case 0x09538086:                /* Intel DC PC3500 */
  506         case 0x0a538086:                /* Intel DC PC3520 */
  507         case 0x0a548086:                /* Intel DC PC4500 */
  508         case 0x0a558086:                /* Dell Intel P4600 */
  509                 if (ctrlr->cdata.vs[3] != 0)
  510                         ns->stripesize =
  511                             (1 << ctrlr->cdata.vs[3]) * ctrlr->min_page_size;
  512                 break;
  513         default:
  514                 break;
  515         }
  516 
  517         /*
  518          * Namespaces are reconstructed after a controller reset, so check
  519          *  to make sure we only call mtx_init once on each mtx.
  520          *
  521          * TODO: Move this somewhere where it gets called at controller
  522          *  construction time, which is not invoked as part of each
  523          *  controller reset.
  524          */
  525         if (!mtx_initialized(&ns->lock))
  526                 mtx_init(&ns->lock, "nvme ns lock", NULL, MTX_DEF);
  527 
  528         status.done = 0;
  529         nvme_ctrlr_cmd_identify_namespace(ctrlr, id, &ns->data,
  530             nvme_completion_poll_cb, &status);
  531         while (!atomic_load_acq_int(&status.done))
  532                 pause("nvme", 1);
  533         if (nvme_completion_is_error(&status.cpl)) {
  534                 nvme_printf(ctrlr, "nvme_identify_namespace failed\n");
  535                 return (ENXIO);
  536         }
  537 
  538         /*
  539          * If the size of is zero, chances are this isn't a valid
  540          * namespace (eg one that's not been configured yet). The
  541          * standard says the entire id will be zeros, so this is a
  542          * cheap way to test for that.
  543          */
  544         if (ns->data.nsze == 0)
  545                 return (ENXIO);
  546 
  547         /*
  548          * Note: format is a 0-based value, so > is appropriate here,
  549          *  not >=.
  550          */
  551         if (ns->data.flbas.format > ns->data.nlbaf) {
  552                 printf("lba format %d exceeds number supported (%d)\n",
  553                     ns->data.flbas.format, ns->data.nlbaf+1);
  554                 return (ENXIO);
  555         }
  556 
  557         if (ctrlr->cdata.oncs.dsm)
  558                 ns->flags |= NVME_NS_DEALLOCATE_SUPPORTED;
  559 
  560         if (ctrlr->cdata.vwc.present)
  561                 ns->flags |= NVME_NS_FLUSH_SUPPORTED;
  562 
  563         /*
  564          * cdev may have already been created, if we are reconstructing the
  565          *  namespace after a controller-level reset.
  566          */
  567         if (ns->cdev != NULL)
  568                 return (0);
  569 
  570         /*
  571          * Namespace IDs start at 1, so we need to subtract 1 to create a
  572          *  correct unit number.
  573          */
  574         unit = device_get_unit(ctrlr->dev) * NVME_MAX_NAMESPACES + ns->id - 1;
  575 
  576         make_dev_args_init(&md_args);
  577         md_args.mda_devsw = &nvme_ns_cdevsw;
  578         md_args.mda_unit = unit;
  579         md_args.mda_mode = 0600;
  580         md_args.mda_si_drv1 = ns;
  581         res = make_dev_s(&md_args, &ns->cdev, "nvme%dns%d",
  582             device_get_unit(ctrlr->dev), ns->id);
  583         if (res != 0)
  584                 return (ENXIO);
  585 
  586         ns->cdev->si_flags |= SI_UNMAPPED;
  587 
  588         return (0);
  589 }
  590 
  591 void nvme_ns_destruct(struct nvme_namespace *ns)
  592 {
  593 
  594         if (ns->cdev != NULL)
  595                 destroy_dev(ns->cdev);
  596 }

Cache object: d6ee9c2aee4d768147479a33b77f31d5


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.