The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/cam/nvme/nvme_da.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause
    3  *
    4  * Copyright (c) 2015 Netflix, Inc.
    5  *
    6  * Redistribution and use in source and binary forms, with or without
    7  * modification, are permitted provided that the following conditions
    8  * are met:
    9  * 1. Redistributions of source code must retain the above copyright
   10  *    notice, this list of conditions and the following disclaimer.
   11  * 2. Redistributions in binary form must reproduce the above copyright
   12  *    notice, this list of conditions and the following disclaimer in the
   13  *    documentation and/or other materials provided with the distribution.
   14  *
   15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   25  * SUCH DAMAGE.
   26  *
   27  * Derived from ata_da.c:
   28  * Copyright (c) 2009 Alexander Motin <mav@FreeBSD.org>
   29  */
   30 
   31 #include <sys/cdefs.h>
   32 __FBSDID("$FreeBSD$");
   33 
   34 #include <sys/param.h>
   35 
   36 #ifdef _KERNEL
   37 #include <sys/systm.h>
   38 #include <sys/kernel.h>
   39 #include <sys/bio.h>
   40 #include <sys/sysctl.h>
   41 #include <sys/taskqueue.h>
   42 #include <sys/lock.h>
   43 #include <sys/mutex.h>
   44 #include <sys/conf.h>
   45 #include <sys/devicestat.h>
   46 #include <sys/eventhandler.h>
   47 #include <sys/malloc.h>
   48 #include <sys/cons.h>
   49 #include <sys/proc.h>
   50 #include <sys/reboot.h>
   51 #include <sys/sbuf.h>
   52 #include <geom/geom.h>
   53 #include <geom/geom_disk.h>
   54 #endif /* _KERNEL */
   55 
   56 #ifndef _KERNEL
   57 #include <stdio.h>
   58 #include <string.h>
   59 #endif /* _KERNEL */
   60 
   61 #include <cam/cam.h>
   62 #include <cam/cam_ccb.h>
   63 #include <cam/cam_periph.h>
   64 #include <cam/cam_xpt_periph.h>
   65 #include <cam/cam_sim.h>
   66 #include <cam/cam_iosched.h>
   67 
   68 #include <cam/nvme/nvme_all.h>
   69 
   70 typedef enum {
   71         NDA_STATE_NORMAL
   72 } nda_state;
   73 
   74 typedef enum {
   75         NDA_FLAG_OPEN           = 0x0001,
   76         NDA_FLAG_DIRTY          = 0x0002,
   77         NDA_FLAG_SCTX_INIT      = 0x0004,
   78 } nda_flags;
   79 #define NDA_FLAG_STRING         \
   80         "\020"                  \
   81         "\001OPEN"              \
   82         "\002DIRTY"             \
   83         "\003SCTX_INIT"
   84 
   85 typedef enum {
   86         NDA_Q_4K   = 0x01,
   87         NDA_Q_NONE = 0x00,
   88 } nda_quirks;
   89 
   90 #define NDA_Q_BIT_STRING        \
   91         "\020"                  \
   92         "\001Bit 0"
   93 
   94 typedef enum {
   95         NDA_CCB_BUFFER_IO       = 0x01,
   96         NDA_CCB_DUMP            = 0x02,
   97         NDA_CCB_TRIM            = 0x03,
   98         NDA_CCB_PASS            = 0x04,
   99         NDA_CCB_TYPE_MASK       = 0x0F,
  100 } nda_ccb_state;
  101 
  102 /* Offsets into our private area for storing information */
  103 #define ccb_state       ccb_h.ppriv_field0
  104 #define ccb_bp          ccb_h.ppriv_ptr1        /* For NDA_CCB_BUFFER_IO */
  105 #define ccb_trim        ccb_h.ppriv_ptr1        /* For NDA_CCB_TRIM */
  106 
  107 struct nda_softc {
  108         struct   cam_iosched_softc *cam_iosched;
  109         int                     outstanding_cmds;       /* Number of active commands */
  110         int                     refcount;               /* Active xpt_action() calls */
  111         nda_state               state;
  112         nda_flags               flags;
  113         nda_quirks              quirks;
  114         int                     unmappedio;
  115         quad_t                  deletes;
  116         uint32_t                nsid;                   /* Namespace ID for this nda device */
  117         struct disk             *disk;
  118         struct task             sysctl_task;
  119         struct sysctl_ctx_list  sysctl_ctx;
  120         struct sysctl_oid       *sysctl_tree;
  121         uint64_t                trim_count;
  122         uint64_t                trim_ranges;
  123         uint64_t                trim_lbas;
  124 #ifdef CAM_TEST_FAILURE
  125         int                     force_read_error;
  126         int                     force_write_error;
  127         int                     periodic_read_error;
  128         int                     periodic_read_count;
  129 #endif
  130 #ifdef CAM_IO_STATS
  131         struct sysctl_ctx_list  sysctl_stats_ctx;
  132         struct sysctl_oid       *sysctl_stats_tree;
  133         u_int                   timeouts;
  134         u_int                   errors;
  135         u_int                   invalidations;
  136 #endif
  137 };
  138 
  139 struct nda_trim_request {
  140         struct nvme_dsm_range   dsm[NVME_MAX_DSM_TRIM / sizeof(struct nvme_dsm_range)];
  141         TAILQ_HEAD(, bio) bps;
  142 };
  143 _Static_assert(NVME_MAX_DSM_TRIM % sizeof(struct nvme_dsm_range) == 0,
  144     "NVME_MAX_DSM_TRIM must be an integral number of ranges");
  145 
  146 /* Need quirk table */
  147 
  148 static  disk_ioctl_t    ndaioctl;
  149 static  disk_strategy_t ndastrategy;
  150 static  dumper_t        ndadump;
  151 static  periph_init_t   ndainit;
  152 static  void            ndaasync(void *callback_arg, u_int32_t code,
  153                                 struct cam_path *path, void *arg);
  154 static  void            ndasysctlinit(void *context, int pending);
  155 static  int             ndaflagssysctl(SYSCTL_HANDLER_ARGS);
  156 static  periph_ctor_t   ndaregister;
  157 static  periph_dtor_t   ndacleanup;
  158 static  periph_start_t  ndastart;
  159 static  periph_oninv_t  ndaoninvalidate;
  160 static  void            ndadone(struct cam_periph *periph,
  161                                union ccb *done_ccb);
  162 static  int             ndaerror(union ccb *ccb, u_int32_t cam_flags,
  163                                 u_int32_t sense_flags);
  164 static void             ndashutdown(void *arg, int howto);
  165 static void             ndasuspend(void *arg);
  166 
  167 #ifndef NDA_DEFAULT_SEND_ORDERED
  168 #define NDA_DEFAULT_SEND_ORDERED        1
  169 #endif
  170 #ifndef NDA_DEFAULT_TIMEOUT
  171 #define NDA_DEFAULT_TIMEOUT 30  /* Timeout in seconds */
  172 #endif
  173 #ifndef NDA_DEFAULT_RETRY
  174 #define NDA_DEFAULT_RETRY       4
  175 #endif
  176 #ifndef NDA_MAX_TRIM_ENTRIES
  177 #define NDA_MAX_TRIM_ENTRIES  (NVME_MAX_DSM_TRIM / sizeof(struct nvme_dsm_range))/* Number of DSM trims to use, max 256 */
  178 #endif
  179 
  180 static SYSCTL_NODE(_kern_cam, OID_AUTO, nda, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
  181     "CAM Direct Access Disk driver");
  182 
  183 //static int nda_retry_count = NDA_DEFAULT_RETRY;
  184 static int nda_send_ordered = NDA_DEFAULT_SEND_ORDERED;
  185 static int nda_default_timeout = NDA_DEFAULT_TIMEOUT;
  186 static int nda_max_trim_entries = NDA_MAX_TRIM_ENTRIES;
  187 static int nda_enable_biospeedup = 1;
  188 static int nda_nvd_compat = 1;
  189 SYSCTL_INT(_kern_cam_nda, OID_AUTO, max_trim, CTLFLAG_RDTUN,
  190     &nda_max_trim_entries, NDA_MAX_TRIM_ENTRIES,
  191     "Maximum number of BIO_DELETE to send down as a DSM TRIM.");
  192 SYSCTL_INT(_kern_cam_nda, OID_AUTO, enable_biospeedup, CTLFLAG_RDTUN,
  193     &nda_enable_biospeedup, 0, "Enable BIO_SPEEDUP processing.");
  194 SYSCTL_INT(_kern_cam_nda, OID_AUTO, nvd_compat, CTLFLAG_RDTUN,
  195     &nda_nvd_compat, 1, "Enable creation of nvd aliases.");
  196 
  197 /*
  198  * All NVMe media is non-rotational, so all nvme device instances
  199  * share this to implement the sysctl.
  200  */
  201 static int nda_rotating_media = 0;
  202 
  203 static struct periph_driver ndadriver =
  204 {
  205         ndainit, "nda",
  206         TAILQ_HEAD_INITIALIZER(ndadriver.units), /* generation */ 0
  207 };
  208 
  209 PERIPHDRIVER_DECLARE(nda, ndadriver);
  210 
  211 static MALLOC_DEFINE(M_NVMEDA, "nvme_da", "nvme_da buffers");
  212 
  213 /*
  214  * nice wrappers. Maybe these belong in nvme_all.c instead of
  215  * here, but this is the only place that uses these. Should
  216  * we ever grow another NVME periph, we should move them
  217  * all there wholesale.
  218  */
  219 
  220 static void
  221 nda_nvme_flush(struct nda_softc *softc, struct ccb_nvmeio *nvmeio)
  222 {
  223         cam_fill_nvmeio(nvmeio,
  224             0,                  /* retries */
  225             ndadone,            /* cbfcnp */
  226             CAM_DIR_NONE,       /* flags */
  227             NULL,               /* data_ptr */
  228             0,                  /* dxfer_len */
  229             nda_default_timeout * 1000); /* timeout 30s */
  230         nvme_ns_flush_cmd(&nvmeio->cmd, softc->nsid);
  231 }
  232 
  233 static void
  234 nda_nvme_trim(struct nda_softc *softc, struct ccb_nvmeio *nvmeio,
  235     void *payload, uint32_t num_ranges)
  236 {
  237         cam_fill_nvmeio(nvmeio,
  238             0,                  /* retries */
  239             ndadone,            /* cbfcnp */
  240             CAM_DIR_OUT,        /* flags */
  241             payload,            /* data_ptr */
  242             num_ranges * sizeof(struct nvme_dsm_range), /* dxfer_len */
  243             nda_default_timeout * 1000); /* timeout 30s */
  244         nvme_ns_trim_cmd(&nvmeio->cmd, softc->nsid, num_ranges);
  245 }
  246 
  247 static void
  248 nda_nvme_write(struct nda_softc *softc, struct ccb_nvmeio *nvmeio,
  249     void *payload, uint64_t lba, uint32_t len, uint32_t count)
  250 {
  251         cam_fill_nvmeio(nvmeio,
  252             0,                  /* retries */
  253             ndadone,            /* cbfcnp */
  254             CAM_DIR_OUT,        /* flags */
  255             payload,            /* data_ptr */
  256             len,                /* dxfer_len */
  257             nda_default_timeout * 1000); /* timeout 30s */
  258         nvme_ns_write_cmd(&nvmeio->cmd, softc->nsid, lba, count);
  259 }
  260 
  261 static void
  262 nda_nvme_rw_bio(struct nda_softc *softc, struct ccb_nvmeio *nvmeio,
  263     struct bio *bp, uint32_t rwcmd)
  264 {
  265         int flags = rwcmd == NVME_OPC_READ ? CAM_DIR_IN : CAM_DIR_OUT;
  266         void *payload;
  267         uint64_t lba;
  268         uint32_t count;
  269 
  270         if (bp->bio_flags & BIO_UNMAPPED) {
  271                 flags |= CAM_DATA_BIO;
  272                 payload = bp;
  273         } else {
  274                 payload = bp->bio_data;
  275         }
  276 
  277         lba = bp->bio_pblkno;
  278         count = bp->bio_bcount / softc->disk->d_sectorsize;
  279 
  280         cam_fill_nvmeio(nvmeio,
  281             0,                  /* retries */
  282             ndadone,            /* cbfcnp */
  283             flags,              /* flags */
  284             payload,            /* data_ptr */
  285             bp->bio_bcount,     /* dxfer_len */
  286             nda_default_timeout * 1000); /* timeout 30s */
  287         nvme_ns_rw_cmd(&nvmeio->cmd, rwcmd, softc->nsid, lba, count);
  288 }
  289 
  290 static int
  291 ndaopen(struct disk *dp)
  292 {
  293         struct cam_periph *periph;
  294         struct nda_softc *softc;
  295         int error;
  296 
  297         periph = (struct cam_periph *)dp->d_drv1;
  298         if (cam_periph_acquire(periph) != 0) {
  299                 return(ENXIO);
  300         }
  301 
  302         cam_periph_lock(periph);
  303         if ((error = cam_periph_hold(periph, PRIBIO|PCATCH)) != 0) {
  304                 cam_periph_unlock(periph);
  305                 cam_periph_release(periph);
  306                 return (error);
  307         }
  308 
  309         CAM_DEBUG(periph->path, CAM_DEBUG_TRACE | CAM_DEBUG_PERIPH,
  310             ("ndaopen\n"));
  311 
  312         softc = (struct nda_softc *)periph->softc;
  313         softc->flags |= NDA_FLAG_OPEN;
  314 
  315         cam_periph_unhold(periph);
  316         cam_periph_unlock(periph);
  317         return (0);
  318 }
  319 
  320 static int
  321 ndaclose(struct disk *dp)
  322 {
  323         struct  cam_periph *periph;
  324         struct  nda_softc *softc;
  325         union ccb *ccb;
  326         int error;
  327 
  328         periph = (struct cam_periph *)dp->d_drv1;
  329         softc = (struct nda_softc *)periph->softc;
  330         cam_periph_lock(periph);
  331 
  332         CAM_DEBUG(periph->path, CAM_DEBUG_TRACE | CAM_DEBUG_PERIPH,
  333             ("ndaclose\n"));
  334 
  335         if ((softc->flags & NDA_FLAG_DIRTY) != 0 &&
  336             (periph->flags & CAM_PERIPH_INVALID) == 0 &&
  337             cam_periph_hold(periph, PRIBIO) == 0) {
  338                 ccb = cam_periph_getccb(periph, CAM_PRIORITY_NORMAL);
  339                 nda_nvme_flush(softc, &ccb->nvmeio);
  340                 error = cam_periph_runccb(ccb, ndaerror, /*cam_flags*/0,
  341                     /*sense_flags*/0, softc->disk->d_devstat);
  342 
  343                 if (error != 0)
  344                         xpt_print(periph->path, "Synchronize cache failed\n");
  345                 else
  346                         softc->flags &= ~NDA_FLAG_DIRTY;
  347                 xpt_release_ccb(ccb);
  348                 cam_periph_unhold(periph);
  349         }
  350 
  351         softc->flags &= ~NDA_FLAG_OPEN;
  352 
  353         while (softc->refcount != 0)
  354                 cam_periph_sleep(periph, &softc->refcount, PRIBIO, "ndaclose", 1);
  355         KASSERT(softc->outstanding_cmds == 0,
  356             ("nda %d outstanding commands", softc->outstanding_cmds));
  357         cam_periph_unlock(periph);
  358         cam_periph_release(periph);
  359         return (0);     
  360 }
  361 
  362 static void
  363 ndaschedule(struct cam_periph *periph)
  364 {
  365         struct nda_softc *softc = (struct nda_softc *)periph->softc;
  366 
  367         if (softc->state != NDA_STATE_NORMAL)
  368                 return;
  369 
  370         cam_iosched_schedule(softc->cam_iosched, periph);
  371 }
  372 
  373 static int
  374 ndaioctl(struct disk *dp, u_long cmd, void *data, int fflag,
  375     struct thread *td)
  376 {
  377         struct cam_periph *periph;
  378 
  379         periph = (struct cam_periph *)dp->d_drv1;
  380 
  381         switch (cmd) {
  382         case NVME_IO_TEST:
  383         case NVME_BIO_TEST:
  384                 /*
  385                  * These don't map well to the underlying CCBs, so
  386                  * they are usupported via CAM.
  387                  */
  388                 return (ENOTTY);
  389         case NVME_GET_NSID:
  390         {
  391                 struct nvme_get_nsid *gnsid = (struct nvme_get_nsid *)data;
  392                 struct ccb_pathinq cpi;
  393 
  394                 xpt_path_inq(&cpi, periph->path);
  395                 strncpy(gnsid->cdev, cpi.xport_specific.nvme.dev_name,
  396                     sizeof(gnsid->cdev));
  397                 gnsid->nsid = cpi.xport_specific.nvme.nsid;
  398                 return (0);
  399         }
  400         case NVME_PASSTHROUGH_CMD:
  401         {
  402                 struct nvme_pt_command *pt;
  403                 union ccb *ccb;
  404                 struct cam_periph_map_info mapinfo;
  405                 u_int maxmap = dp->d_maxsize;
  406                 int error;
  407 
  408                 /*
  409                  * Create a NVME_IO CCB to do the passthrough command.
  410                  */
  411                 pt = (struct nvme_pt_command *)data;
  412                 ccb = xpt_alloc_ccb();
  413                 xpt_setup_ccb(&ccb->ccb_h, periph->path, CAM_PRIORITY_NORMAL);
  414                 ccb->ccb_state = NDA_CCB_PASS;
  415                 cam_fill_nvmeio(&ccb->nvmeio,
  416                     0,                  /* Retries */
  417                     ndadone,
  418                     (pt->is_read ? CAM_DIR_IN : CAM_DIR_OUT) | CAM_DATA_VADDR,
  419                     pt->buf,
  420                     pt->len,
  421                     nda_default_timeout * 1000);
  422                 memcpy(&ccb->nvmeio.cmd, &pt->cmd, sizeof(pt->cmd));
  423 
  424                 /*
  425                  * Wire the user memory in this request for the I/O
  426                  */
  427                 memset(&mapinfo, 0, sizeof(mapinfo));
  428                 error = cam_periph_mapmem(ccb, &mapinfo, maxmap);
  429                 if (error)
  430                         goto out;
  431 
  432                 /*
  433                  * Lock the periph and run the command.
  434                  */
  435                 cam_periph_lock(periph);
  436                 cam_periph_runccb(ccb, NULL, CAM_RETRY_SELTO,
  437                     SF_RETRY_UA | SF_NO_PRINT, NULL);
  438 
  439                 /*
  440                  * Tear down mapping and return status.
  441                  */
  442                 cam_periph_unlock(periph);
  443                 cam_periph_unmapmem(ccb, &mapinfo);
  444                 error = (ccb->ccb_h.status == CAM_REQ_CMP) ? 0 : EIO;
  445 out:
  446                 cam_periph_lock(periph);
  447                 xpt_release_ccb(ccb);
  448                 cam_periph_unlock(periph);
  449                 return (error);
  450         }
  451         default:
  452                 break;
  453         }
  454         return (ENOTTY);
  455 }
  456 
  457 /*
  458  * Actually translate the requested transfer into one the physical driver
  459  * can understand.  The transfer is described by a buf and will include
  460  * only one physical transfer.
  461  */
  462 static void
  463 ndastrategy(struct bio *bp)
  464 {
  465         struct cam_periph *periph;
  466         struct nda_softc *softc;
  467 
  468         periph = (struct cam_periph *)bp->bio_disk->d_drv1;
  469         softc = (struct nda_softc *)periph->softc;
  470 
  471         cam_periph_lock(periph);
  472 
  473         CAM_DEBUG(periph->path, CAM_DEBUG_TRACE, ("ndastrategy(%p)\n", bp));
  474 
  475         /*
  476          * If the device has been made invalid, error out
  477          */
  478         if ((periph->flags & CAM_PERIPH_INVALID) != 0) {
  479                 cam_periph_unlock(periph);
  480                 biofinish(bp, NULL, ENXIO);
  481                 return;
  482         }
  483 
  484         if (bp->bio_cmd == BIO_DELETE)
  485                 softc->deletes++;
  486 
  487         /*
  488          * Place it in the queue of disk activities for this disk
  489          */
  490         cam_iosched_queue_work(softc->cam_iosched, bp);
  491 
  492         /*
  493          * Schedule ourselves for performing the work.
  494          */
  495         ndaschedule(periph);
  496         cam_periph_unlock(periph);
  497 
  498         return;
  499 }
  500 
  501 static int
  502 ndadump(void *arg, void *virtual, off_t offset, size_t length)
  503 {
  504         struct      cam_periph *periph;
  505         struct      nda_softc *softc;
  506         u_int       secsize;
  507         struct ccb_nvmeio nvmeio;
  508         struct      disk *dp;
  509         uint64_t    lba;
  510         uint32_t    count;
  511         int         error = 0;
  512 
  513         dp = arg;
  514         periph = dp->d_drv1;
  515         softc = (struct nda_softc *)periph->softc;
  516         secsize = softc->disk->d_sectorsize;
  517         lba = offset / secsize;
  518         count = length / secsize;
  519 
  520         if ((periph->flags & CAM_PERIPH_INVALID) != 0)
  521                 return (ENXIO);
  522 
  523         /* xpt_get_ccb returns a zero'd allocation for the ccb, mimic that here */
  524         memset(&nvmeio, 0, sizeof(nvmeio));
  525         if (length > 0) {
  526                 xpt_setup_ccb(&nvmeio.ccb_h, periph->path, CAM_PRIORITY_NORMAL);
  527                 nvmeio.ccb_state = NDA_CCB_DUMP;
  528                 nda_nvme_write(softc, &nvmeio, virtual, lba, length, count);
  529                 error = cam_periph_runccb((union ccb *)&nvmeio, cam_periph_error,
  530                     0, SF_NO_RECOVERY | SF_NO_RETRY, NULL);
  531                 if (error != 0)
  532                         printf("Aborting dump due to I/O error %d.\n", error);
  533 
  534                 return (error);
  535         }
  536 
  537         /* Flush */
  538         xpt_setup_ccb(&nvmeio.ccb_h, periph->path, CAM_PRIORITY_NORMAL);
  539 
  540         nvmeio.ccb_state = NDA_CCB_DUMP;
  541         nda_nvme_flush(softc, &nvmeio);
  542         error = cam_periph_runccb((union ccb *)&nvmeio, cam_periph_error,
  543             0, SF_NO_RECOVERY | SF_NO_RETRY, NULL);
  544         if (error != 0)
  545                 xpt_print(periph->path, "flush cmd failed\n");
  546         return (error);
  547 }
  548 
  549 static void
  550 ndainit(void)
  551 {
  552         cam_status status;
  553 
  554         /*
  555          * Install a global async callback.  This callback will
  556          * receive async callbacks like "new device found".
  557          */
  558         status = xpt_register_async(AC_FOUND_DEVICE, ndaasync, NULL, NULL);
  559 
  560         if (status != CAM_REQ_CMP) {
  561                 printf("nda: Failed to attach master async callback "
  562                        "due to status 0x%x!\n", status);
  563         } else if (nda_send_ordered) {
  564                 /* Register our event handlers */
  565                 if ((EVENTHANDLER_REGISTER(power_suspend, ndasuspend,
  566                                            NULL, EVENTHANDLER_PRI_LAST)) == NULL)
  567                     printf("ndainit: power event registration failed!\n");
  568                 if ((EVENTHANDLER_REGISTER(shutdown_post_sync, ndashutdown,
  569                                            NULL, SHUTDOWN_PRI_DEFAULT)) == NULL)
  570                     printf("ndainit: shutdown event registration failed!\n");
  571         }
  572 }
  573 
  574 /*
  575  * Callback from GEOM, called when it has finished cleaning up its
  576  * resources.
  577  */
  578 static void
  579 ndadiskgonecb(struct disk *dp)
  580 {
  581         struct cam_periph *periph;
  582 
  583         periph = (struct cam_periph *)dp->d_drv1;
  584 
  585         cam_periph_release(periph);
  586 }
  587 
  588 static void
  589 ndaoninvalidate(struct cam_periph *periph)
  590 {
  591         struct nda_softc *softc;
  592 
  593         softc = (struct nda_softc *)periph->softc;
  594 
  595         /*
  596          * De-register any async callbacks.
  597          */
  598         xpt_register_async(0, ndaasync, periph, periph->path);
  599 #ifdef CAM_IO_STATS
  600         softc->invalidations++;
  601 #endif
  602 
  603         /*
  604          * Return all queued I/O with ENXIO. Transactions may be queued up here
  605          * for retry (since we are called while there's other transactions
  606          * pending). Any requests in the hardware will drain before ndacleanup
  607          * is called.
  608          */
  609         cam_iosched_flush(softc->cam_iosched, NULL, ENXIO);
  610 
  611         /*
  612          * Tell GEOM that we've gone away, we'll get a callback when it is
  613          * done cleaning up its resources.
  614          */
  615         disk_gone(softc->disk);
  616 }
  617 
  618 static void
  619 ndacleanup(struct cam_periph *periph)
  620 {
  621         struct nda_softc *softc;
  622 
  623         softc = (struct nda_softc *)periph->softc;
  624 
  625         cam_periph_unlock(periph);
  626 
  627         cam_iosched_fini(softc->cam_iosched);
  628 
  629         /*
  630          * If we can't free the sysctl tree, oh well...
  631          */
  632         if ((softc->flags & NDA_FLAG_SCTX_INIT) != 0) {
  633 #ifdef CAM_IO_STATS
  634                 if (sysctl_ctx_free(&softc->sysctl_stats_ctx) != 0)
  635                         xpt_print(periph->path,
  636                             "can't remove sysctl stats context\n");
  637 #endif
  638                 if (sysctl_ctx_free(&softc->sysctl_ctx) != 0)
  639                         xpt_print(periph->path,
  640                             "can't remove sysctl context\n");
  641         }
  642 
  643         disk_destroy(softc->disk);
  644         free(softc, M_DEVBUF);
  645         cam_periph_lock(periph);
  646 }
  647 
  648 static void
  649 ndaasync(void *callback_arg, u_int32_t code,
  650         struct cam_path *path, void *arg)
  651 {
  652         struct cam_periph *periph;
  653 
  654         periph = (struct cam_periph *)callback_arg;
  655         switch (code) {
  656         case AC_FOUND_DEVICE:
  657         {
  658                 struct ccb_getdev *cgd;
  659                 cam_status status;
  660 
  661                 cgd = (struct ccb_getdev *)arg;
  662                 if (cgd == NULL)
  663                         break;
  664 
  665                 if (cgd->protocol != PROTO_NVME)
  666                         break;
  667 
  668                 /*
  669                  * Allocate a peripheral instance for
  670                  * this device and start the probe
  671                  * process.
  672                  */
  673                 status = cam_periph_alloc(ndaregister, ndaoninvalidate,
  674                                           ndacleanup, ndastart,
  675                                           "nda", CAM_PERIPH_BIO,
  676                                           path, ndaasync,
  677                                           AC_FOUND_DEVICE, cgd);
  678 
  679                 if (status != CAM_REQ_CMP
  680                  && status != CAM_REQ_INPROG)
  681                         printf("ndaasync: Unable to attach to new device "
  682                                 "due to status 0x%x\n", status);
  683                 break;
  684         }
  685         case AC_ADVINFO_CHANGED:
  686         {
  687                 uintptr_t buftype;
  688 
  689                 buftype = (uintptr_t)arg;
  690                 if (buftype == CDAI_TYPE_PHYS_PATH) {
  691                         struct nda_softc *softc;
  692 
  693                         softc = periph->softc;
  694                         disk_attr_changed(softc->disk, "GEOM::physpath",
  695                                           M_NOWAIT);
  696                 }
  697                 break;
  698         }
  699         case AC_LOST_DEVICE:
  700         default:
  701                 break;
  702         }
  703         cam_periph_async(periph, code, path, arg);
  704 }
  705 
  706 static void
  707 ndasysctlinit(void *context, int pending)
  708 {
  709         struct cam_periph *periph;
  710         struct nda_softc *softc;
  711         char tmpstr[32], tmpstr2[16];
  712 
  713         periph = (struct cam_periph *)context;
  714 
  715         /* periph was held for us when this task was enqueued */
  716         if ((periph->flags & CAM_PERIPH_INVALID) != 0) {
  717                 cam_periph_release(periph);
  718                 return;
  719         }
  720 
  721         softc = (struct nda_softc *)periph->softc;
  722         snprintf(tmpstr, sizeof(tmpstr), "CAM NDA unit %d", periph->unit_number);
  723         snprintf(tmpstr2, sizeof(tmpstr2), "%d", periph->unit_number);
  724 
  725         sysctl_ctx_init(&softc->sysctl_ctx);
  726         softc->flags |= NDA_FLAG_SCTX_INIT;
  727         softc->sysctl_tree = SYSCTL_ADD_NODE_WITH_LABEL(&softc->sysctl_ctx,
  728                 SYSCTL_STATIC_CHILDREN(_kern_cam_nda), OID_AUTO, tmpstr2,
  729                 CTLFLAG_RD | CTLFLAG_MPSAFE, 0, tmpstr, "device_index");
  730         if (softc->sysctl_tree == NULL) {
  731                 printf("ndasysctlinit: unable to allocate sysctl tree\n");
  732                 cam_periph_release(periph);
  733                 return;
  734         }
  735 
  736         SYSCTL_ADD_INT(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree),
  737             OID_AUTO, "unmapped_io", CTLFLAG_RD,
  738             &softc->unmappedio, 0, "Unmapped I/O leaf");
  739 
  740         SYSCTL_ADD_QUAD(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree),
  741             OID_AUTO, "deletes", CTLFLAG_RD,
  742             &softc->deletes, "Number of BIO_DELETE requests");
  743 
  744         SYSCTL_ADD_UQUAD(&softc->sysctl_ctx,
  745                 SYSCTL_CHILDREN(softc->sysctl_tree), OID_AUTO,
  746                 "trim_count", CTLFLAG_RD, &softc->trim_count,
  747                 "Total number of unmap/dsm commands sent");
  748         SYSCTL_ADD_UQUAD(&softc->sysctl_ctx,
  749                 SYSCTL_CHILDREN(softc->sysctl_tree), OID_AUTO,
  750                 "trim_ranges", CTLFLAG_RD, &softc->trim_ranges,
  751                 "Total number of ranges in unmap/dsm commands");
  752         SYSCTL_ADD_UQUAD(&softc->sysctl_ctx,
  753                 SYSCTL_CHILDREN(softc->sysctl_tree), OID_AUTO,
  754                 "trim_lbas", CTLFLAG_RD, &softc->trim_lbas,
  755                 "Total lbas in the unmap/dsm commands sent");
  756 
  757         SYSCTL_ADD_INT(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree),
  758             OID_AUTO, "rotating", CTLFLAG_RD, &nda_rotating_media, 1,
  759             "Rotating media");
  760 
  761         SYSCTL_ADD_PROC(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree),
  762             OID_AUTO, "flags", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE,
  763             softc, 0, ndaflagssysctl, "A",
  764             "Flags for drive");
  765 
  766 #ifdef CAM_IO_STATS
  767         softc->sysctl_stats_tree = SYSCTL_ADD_NODE(&softc->sysctl_stats_ctx,
  768                 SYSCTL_CHILDREN(softc->sysctl_tree), OID_AUTO, "stats",
  769                 CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "Statistics");
  770         if (softc->sysctl_stats_tree == NULL) {
  771                 printf("ndasysctlinit: unable to allocate sysctl tree for stats\n");
  772                 cam_periph_release(periph);
  773                 return;
  774         }
  775         SYSCTL_ADD_INT(&softc->sysctl_stats_ctx,
  776                 SYSCTL_CHILDREN(softc->sysctl_stats_tree),
  777                 OID_AUTO, "timeouts", CTLFLAG_RD,
  778                 &softc->timeouts, 0,
  779                 "Device timeouts reported by the SIM");
  780         SYSCTL_ADD_INT(&softc->sysctl_stats_ctx,
  781                 SYSCTL_CHILDREN(softc->sysctl_stats_tree),
  782                 OID_AUTO, "errors", CTLFLAG_RD,
  783                 &softc->errors, 0,
  784                 "Transport errors reported by the SIM.");
  785         SYSCTL_ADD_INT(&softc->sysctl_stats_ctx,
  786                 SYSCTL_CHILDREN(softc->sysctl_stats_tree),
  787                 OID_AUTO, "pack_invalidations", CTLFLAG_RD,
  788                 &softc->invalidations, 0,
  789                 "Device pack invalidations.");
  790 #endif
  791 
  792 #ifdef CAM_TEST_FAILURE
  793         SYSCTL_ADD_PROC(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree),
  794                 OID_AUTO, "invalidate", CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE,
  795                 periph, 0, cam_periph_invalidate_sysctl, "I",
  796                 "Write 1 to invalidate the drive immediately");
  797 #endif
  798 
  799         cam_iosched_sysctl_init(softc->cam_iosched, &softc->sysctl_ctx,
  800             softc->sysctl_tree);
  801 
  802         cam_periph_release(periph);
  803 }
  804 
  805 static int
  806 ndaflagssysctl(SYSCTL_HANDLER_ARGS)
  807 {
  808         struct sbuf sbuf;
  809         struct nda_softc *softc = arg1;
  810         int error;
  811 
  812         sbuf_new_for_sysctl(&sbuf, NULL, 0, req);
  813         if (softc->flags != 0)
  814                 sbuf_printf(&sbuf, "0x%b", (unsigned)softc->flags, NDA_FLAG_STRING);
  815         else
  816                 sbuf_printf(&sbuf, "");
  817         error = sbuf_finish(&sbuf);
  818         sbuf_delete(&sbuf);
  819 
  820         return (error);
  821 }
  822 
  823 static int
  824 ndagetattr(struct bio *bp)
  825 {
  826         int ret;
  827         struct cam_periph *periph;
  828 
  829         if (g_handleattr_int(bp, "GEOM::canspeedup", nda_enable_biospeedup))
  830                 return (EJUSTRETURN);
  831 
  832         periph = (struct cam_periph *)bp->bio_disk->d_drv1;
  833         cam_periph_lock(periph);
  834         ret = xpt_getattr(bp->bio_data, bp->bio_length, bp->bio_attribute,
  835             periph->path);
  836         cam_periph_unlock(periph);
  837         if (ret == 0)
  838                 bp->bio_completed = bp->bio_length;
  839         return ret;
  840 }
  841 
  842 static cam_status
  843 ndaregister(struct cam_periph *periph, void *arg)
  844 {
  845         struct nda_softc *softc;
  846         struct disk *disk;
  847         struct ccb_pathinq cpi;
  848         const struct nvme_namespace_data *nsd;
  849         const struct nvme_controller_data *cd;
  850         char   announce_buf[80];
  851         uint8_t flbas_fmt, lbads, vwc_present;
  852         u_int maxio;
  853         int quirks;
  854 
  855         nsd = nvme_get_identify_ns(periph);
  856         cd = nvme_get_identify_cntrl(periph);
  857 
  858         softc = (struct nda_softc *)malloc(sizeof(*softc), M_DEVBUF,
  859             M_NOWAIT | M_ZERO);
  860 
  861         if (softc == NULL) {
  862                 printf("ndaregister: Unable to probe new device. "
  863                     "Unable to allocate softc\n");
  864                 return(CAM_REQ_CMP_ERR);
  865         }
  866 
  867         if (cam_iosched_init(&softc->cam_iosched, periph) != 0) {
  868                 printf("ndaregister: Unable to probe new device. "
  869                        "Unable to allocate iosched memory\n");
  870                 free(softc, M_DEVBUF);
  871                 return(CAM_REQ_CMP_ERR);
  872         }
  873 
  874         /* ident_data parsing */
  875 
  876         periph->softc = softc;
  877         softc->quirks = NDA_Q_NONE;
  878         xpt_path_inq(&cpi, periph->path);
  879         TASK_INIT(&softc->sysctl_task, 0, ndasysctlinit, periph);
  880 
  881         /*
  882          * The name space ID is the lun, save it for later I/O
  883          */
  884         softc->nsid = (uint32_t)xpt_path_lun_id(periph->path);
  885 
  886         /*
  887          * Register this media as a disk
  888          */
  889         (void)cam_periph_acquire(periph);
  890         cam_periph_unlock(periph);
  891         snprintf(announce_buf, sizeof(announce_buf),
  892             "kern.cam.nda.%d.quirks", periph->unit_number);
  893         quirks = softc->quirks;
  894         TUNABLE_INT_FETCH(announce_buf, &quirks);
  895         softc->quirks = quirks;
  896         cam_iosched_set_sort_queue(softc->cam_iosched, 0);
  897         softc->disk = disk = disk_alloc();
  898         disk->d_rotation_rate = DISK_RR_NON_ROTATING;
  899         disk->d_open = ndaopen;
  900         disk->d_close = ndaclose;
  901         disk->d_strategy = ndastrategy;
  902         disk->d_ioctl = ndaioctl;
  903         disk->d_getattr = ndagetattr;
  904         if (cam_sim_pollable(periph->sim))
  905                 disk->d_dump = ndadump;
  906         disk->d_gone = ndadiskgonecb;
  907         disk->d_name = "nda";
  908         disk->d_drv1 = periph;
  909         disk->d_unit = periph->unit_number;
  910         maxio = cpi.maxio;              /* Honor max I/O size of SIM */
  911         if (maxio == 0)
  912                 maxio = DFLTPHYS;       /* traditional default */
  913         else if (maxio > maxphys)
  914                 maxio = maxphys;        /* for safety */
  915         disk->d_maxsize = maxio;
  916         flbas_fmt = (nsd->flbas >> NVME_NS_DATA_FLBAS_FORMAT_SHIFT) &
  917                 NVME_NS_DATA_FLBAS_FORMAT_MASK;
  918         lbads = (nsd->lbaf[flbas_fmt] >> NVME_NS_DATA_LBAF_LBADS_SHIFT) &
  919                 NVME_NS_DATA_LBAF_LBADS_MASK;
  920         disk->d_sectorsize = 1 << lbads;
  921         disk->d_mediasize = (off_t)(disk->d_sectorsize * nsd->nsze);
  922         disk->d_delmaxsize = disk->d_mediasize;
  923         disk->d_flags = DISKFLAG_DIRECT_COMPLETION;
  924         if (nvme_ctrlr_has_dataset_mgmt(cd))
  925                 disk->d_flags |= DISKFLAG_CANDELETE;
  926         vwc_present = (cd->vwc >> NVME_CTRLR_DATA_VWC_PRESENT_SHIFT) &
  927                 NVME_CTRLR_DATA_VWC_PRESENT_MASK;
  928         if (vwc_present)
  929                 disk->d_flags |= DISKFLAG_CANFLUSHCACHE;
  930         if ((cpi.hba_misc & PIM_UNMAPPED) != 0) {
  931                 disk->d_flags |= DISKFLAG_UNMAPPED_BIO;
  932                 softc->unmappedio = 1;
  933         }
  934         /*
  935          * d_ident and d_descr are both far bigger than the length of either
  936          *  the serial or model number strings.
  937          */
  938         cam_strvis_flag(disk->d_descr, cd->mn, NVME_MODEL_NUMBER_LENGTH,
  939             sizeof(disk->d_descr), CAM_STRVIS_FLAG_NONASCII_SPC);
  940 
  941         cam_strvis_flag(disk->d_ident, cd->sn, NVME_SERIAL_NUMBER_LENGTH,
  942             sizeof(disk->d_ident), CAM_STRVIS_FLAG_NONASCII_SPC);
  943 
  944         disk->d_hba_vendor = cpi.hba_vendor;
  945         disk->d_hba_device = cpi.hba_device;
  946         disk->d_hba_subvendor = cpi.hba_subvendor;
  947         disk->d_hba_subdevice = cpi.hba_subdevice;
  948         snprintf(disk->d_attachment, sizeof(disk->d_attachment),
  949             "%s%d", cpi.dev_name, cpi.unit_number);
  950         if (((nsd->nsfeat >> NVME_NS_DATA_NSFEAT_NPVALID_SHIFT) &
  951             NVME_NS_DATA_NSFEAT_NPVALID_MASK) != 0 && nsd->npwg != 0)
  952                 disk->d_stripesize = ((nsd->npwg + 1) * disk->d_sectorsize);
  953         else
  954                 disk->d_stripesize = nsd->noiob * disk->d_sectorsize;
  955         disk->d_stripeoffset = 0;
  956         disk->d_devstat = devstat_new_entry(periph->periph_name,
  957             periph->unit_number, disk->d_sectorsize,
  958             DEVSTAT_ALL_SUPPORTED,
  959             DEVSTAT_TYPE_DIRECT | XPORT_DEVSTAT_TYPE(cpi.transport),
  960             DEVSTAT_PRIORITY_DISK);
  961         /*
  962          * Add alias for older nvd drives to ease transition.
  963          */
  964         if (nda_nvd_compat)
  965                 disk_add_alias(disk, "nvd");
  966 
  967         cam_periph_lock(periph);
  968 
  969         snprintf(announce_buf, sizeof(announce_buf),
  970                 "%juMB (%ju %u byte sectors)",
  971             (uintmax_t)((uintmax_t)disk->d_mediasize / (1024*1024)),
  972                 (uintmax_t)disk->d_mediasize / disk->d_sectorsize,
  973                 disk->d_sectorsize);
  974         xpt_announce_periph(periph, announce_buf);
  975         xpt_announce_quirks(periph, softc->quirks, NDA_Q_BIT_STRING);
  976 
  977         /*
  978          * Create our sysctl variables, now that we know
  979          * we have successfully attached.
  980          */
  981         if (cam_periph_acquire(periph) == 0)
  982                 taskqueue_enqueue(taskqueue_thread, &softc->sysctl_task);
  983 
  984         /*
  985          * Register for device going away and info about the drive
  986          * changing (though with NVMe, it can't)
  987          */
  988         xpt_register_async(AC_LOST_DEVICE | AC_ADVINFO_CHANGED,
  989             ndaasync, periph, periph->path);
  990 
  991         softc->state = NDA_STATE_NORMAL;
  992 
  993         /*
  994          * We'll release this reference once GEOM calls us back via
  995          * ndadiskgonecb(), telling us that our provider has been freed.
  996          */
  997         if (cam_periph_acquire(periph) == 0)
  998                 disk_create(softc->disk, DISK_VERSION);
  999 
 1000         cam_periph_release_locked(periph);
 1001         return(CAM_REQ_CMP);
 1002 }
 1003 
 1004 static void
 1005 ndastart(struct cam_periph *periph, union ccb *start_ccb)
 1006 {
 1007         struct nda_softc *softc = (struct nda_softc *)periph->softc;
 1008         struct ccb_nvmeio *nvmeio = &start_ccb->nvmeio;
 1009 
 1010         CAM_DEBUG(periph->path, CAM_DEBUG_TRACE, ("ndastart\n"));
 1011 
 1012         switch (softc->state) {
 1013         case NDA_STATE_NORMAL:
 1014         {
 1015                 struct bio *bp;
 1016 
 1017                 bp = cam_iosched_next_bio(softc->cam_iosched);
 1018                 CAM_DEBUG(periph->path, CAM_DEBUG_TRACE, ("ndastart: bio %p\n", bp));
 1019                 if (bp == NULL) {
 1020                         xpt_release_ccb(start_ccb);
 1021                         break;
 1022                 }
 1023 
 1024                 switch (bp->bio_cmd) {
 1025                 case BIO_WRITE:
 1026                         softc->flags |= NDA_FLAG_DIRTY;
 1027                         /* FALLTHROUGH */
 1028                 case BIO_READ:
 1029                 {
 1030 #ifdef CAM_TEST_FAILURE
 1031                         int fail = 0;
 1032 
 1033                         /*
 1034                          * Support the failure ioctls.  If the command is a
 1035                          * read, and there are pending forced read errors, or
 1036                          * if a write and pending write errors, then fail this
 1037                          * operation with EIO.  This is useful for testing
 1038                          * purposes.  Also, support having every Nth read fail.
 1039                          *
 1040                          * This is a rather blunt tool.
 1041                          */
 1042                         if (bp->bio_cmd == BIO_READ) {
 1043                                 if (softc->force_read_error) {
 1044                                         softc->force_read_error--;
 1045                                         fail = 1;
 1046                                 }
 1047                                 if (softc->periodic_read_error > 0) {
 1048                                         if (++softc->periodic_read_count >=
 1049                                             softc->periodic_read_error) {
 1050                                                 softc->periodic_read_count = 0;
 1051                                                 fail = 1;
 1052                                         }
 1053                                 }
 1054                         } else {
 1055                                 if (softc->force_write_error) {
 1056                                         softc->force_write_error--;
 1057                                         fail = 1;
 1058                                 }
 1059                         }
 1060                         if (fail) {
 1061                                 biofinish(bp, NULL, EIO);
 1062                                 xpt_release_ccb(start_ccb);
 1063                                 ndaschedule(periph);
 1064                                 return;
 1065                         }
 1066 #endif
 1067                         KASSERT((bp->bio_flags & BIO_UNMAPPED) == 0 ||
 1068                             round_page(bp->bio_bcount + bp->bio_ma_offset) /
 1069                             PAGE_SIZE == bp->bio_ma_n,
 1070                             ("Short bio %p", bp));
 1071                         nda_nvme_rw_bio(softc, &start_ccb->nvmeio, bp, bp->bio_cmd == BIO_READ ?
 1072                             NVME_OPC_READ : NVME_OPC_WRITE);
 1073                         break;
 1074                 }
 1075                 case BIO_DELETE:
 1076                 {
 1077                         struct nvme_dsm_range *dsm_range, *dsm_end;
 1078                         struct nda_trim_request *trim;
 1079                         struct bio *bp1;
 1080                         int ents;
 1081                         uint32_t totalcount = 0, ranges = 0;
 1082 
 1083                         trim = malloc(sizeof(*trim), M_NVMEDA, M_ZERO | M_NOWAIT);
 1084                         if (trim == NULL) {
 1085                                 biofinish(bp, NULL, ENOMEM);
 1086                                 xpt_release_ccb(start_ccb);
 1087                                 ndaschedule(periph);
 1088                                 return;
 1089                         }
 1090                         TAILQ_INIT(&trim->bps);
 1091                         bp1 = bp;
 1092                         ents = min(nitems(trim->dsm), nda_max_trim_entries);
 1093                         ents = max(ents, 1);
 1094                         dsm_range = trim->dsm;
 1095                         dsm_end = dsm_range + ents;
 1096                         do {
 1097                                 TAILQ_INSERT_TAIL(&trim->bps, bp1, bio_queue);
 1098                                 dsm_range->length =
 1099                                     htole32(bp1->bio_bcount / softc->disk->d_sectorsize);
 1100                                 dsm_range->starting_lba =
 1101                                     htole64(bp1->bio_offset / softc->disk->d_sectorsize);
 1102                                 ranges++;
 1103                                 totalcount += dsm_range->length;
 1104                                 dsm_range++;
 1105                                 if (dsm_range >= dsm_end)
 1106                                         break;
 1107                                 bp1 = cam_iosched_next_trim(softc->cam_iosched);
 1108                                 /* XXX -- Could collapse adjacent ranges, but we don't for now */
 1109                                 /* XXX -- Could limit based on total payload size */
 1110                         } while (bp1 != NULL);
 1111                         start_ccb->ccb_trim = trim;
 1112                         nda_nvme_trim(softc, &start_ccb->nvmeio, trim->dsm,
 1113                             dsm_range - trim->dsm);
 1114                         start_ccb->ccb_state = NDA_CCB_TRIM;
 1115                         softc->trim_count++;
 1116                         softc->trim_ranges += ranges;
 1117                         softc->trim_lbas += totalcount;
 1118                         /*
 1119                          * Note: We can have multiple TRIMs in flight, so we don't call
 1120                          * cam_iosched_submit_trim(softc->cam_iosched);
 1121                          * since that forces the I/O scheduler to only schedule one at a time.
 1122                          * On NVMe drives, this is a performance disaster.
 1123                          */
 1124                         goto out;
 1125                 }
 1126                 case BIO_FLUSH:
 1127                         nda_nvme_flush(softc, nvmeio);
 1128                         break;
 1129                 default:
 1130                         biofinish(bp, NULL, EOPNOTSUPP);
 1131                         xpt_release_ccb(start_ccb);
 1132                         ndaschedule(periph);
 1133                         return;
 1134                 }
 1135                 start_ccb->ccb_state = NDA_CCB_BUFFER_IO;
 1136                 start_ccb->ccb_bp = bp;
 1137 out:
 1138                 start_ccb->ccb_h.flags |= CAM_UNLOCKED;
 1139                 softc->outstanding_cmds++;
 1140                 softc->refcount++;                      /* For submission only */
 1141                 cam_periph_unlock(periph);
 1142                 xpt_action(start_ccb);
 1143                 cam_periph_lock(periph);
 1144                 softc->refcount--;                      /* Submission done */
 1145 
 1146                 /* May have more work to do, so ensure we stay scheduled */
 1147                 ndaschedule(periph);
 1148                 break;
 1149                 }
 1150         }
 1151 }
 1152 
 1153 static void
 1154 ndadone(struct cam_periph *periph, union ccb *done_ccb)
 1155 {
 1156         struct nda_softc *softc;
 1157         struct ccb_nvmeio *nvmeio = &done_ccb->nvmeio;
 1158         struct cam_path *path;
 1159         int state;
 1160 
 1161         softc = (struct nda_softc *)periph->softc;
 1162         path = done_ccb->ccb_h.path;
 1163 
 1164         CAM_DEBUG(path, CAM_DEBUG_TRACE, ("ndadone\n"));
 1165 
 1166         state = nvmeio->ccb_state & NDA_CCB_TYPE_MASK;
 1167         switch (state) {
 1168         case NDA_CCB_BUFFER_IO:
 1169         case NDA_CCB_TRIM:
 1170         {
 1171                 int error;
 1172 
 1173                 cam_periph_lock(periph);
 1174                 if ((done_ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
 1175                         error = ndaerror(done_ccb, 0, 0);
 1176                         if (error == ERESTART) {
 1177                                 /* A retry was scheduled, so just return. */
 1178                                 cam_periph_unlock(periph);
 1179                                 return;
 1180                         }
 1181                         if ((done_ccb->ccb_h.status & CAM_DEV_QFRZN) != 0)
 1182                                 cam_release_devq(path,
 1183                                                  /*relsim_flags*/0,
 1184                                                  /*reduction*/0,
 1185                                                  /*timeout*/0,
 1186                                                  /*getcount_only*/0);
 1187                 } else {
 1188                         if ((done_ccb->ccb_h.status & CAM_DEV_QFRZN) != 0)
 1189                                 panic("REQ_CMP with QFRZN");
 1190                         error = 0;
 1191                 }
 1192                 if (state == NDA_CCB_BUFFER_IO) {
 1193                         struct bio *bp;
 1194 
 1195                         bp = (struct bio *)done_ccb->ccb_bp;
 1196                         bp->bio_error = error;
 1197                         if (error != 0) {
 1198                                 bp->bio_resid = bp->bio_bcount;
 1199                                 bp->bio_flags |= BIO_ERROR;
 1200                         } else {
 1201                                 bp->bio_resid = 0;
 1202                         }
 1203                         softc->outstanding_cmds--;
 1204 
 1205                         /*
 1206                          * We need to call cam_iosched before we call biodone so that we
 1207                          * don't measure any activity that happens in the completion
 1208                          * routine, which in the case of sendfile can be quite
 1209                          * extensive.
 1210                          */
 1211                         cam_iosched_bio_complete(softc->cam_iosched, bp, done_ccb);
 1212                         xpt_release_ccb(done_ccb);
 1213                         ndaschedule(periph);
 1214                         cam_periph_unlock(periph);
 1215                         biodone(bp);
 1216                 } else { /* state == NDA_CCB_TRIM */
 1217                         struct nda_trim_request *trim;
 1218                         struct bio *bp1, *bp2;
 1219                         TAILQ_HEAD(, bio) queue;
 1220 
 1221                         trim = nvmeio->ccb_trim;
 1222                         TAILQ_INIT(&queue);
 1223                         TAILQ_CONCAT(&queue, &trim->bps, bio_queue);
 1224                         free(trim, M_NVMEDA);
 1225 
 1226                         /*
 1227                          * Since we can have multiple trims in flight, we don't
 1228                          * need to call this here.
 1229                          * cam_iosched_trim_done(softc->cam_iosched);
 1230                          */
 1231                         /*
 1232                          * The the I/O scheduler that we're finishing the I/O
 1233                          * so we can keep book. The first one we pass in the CCB
 1234                          * which has the timing information. The rest we pass in NULL
 1235                          * so we can keep proper counts.
 1236                          */
 1237                         bp1 = TAILQ_FIRST(&queue);
 1238                         cam_iosched_bio_complete(softc->cam_iosched, bp1, done_ccb);
 1239                         xpt_release_ccb(done_ccb);
 1240                         softc->outstanding_cmds--;
 1241                         ndaschedule(periph);
 1242                         cam_periph_unlock(periph);
 1243                         while ((bp2 = TAILQ_FIRST(&queue)) != NULL) {
 1244                                 TAILQ_REMOVE(&queue, bp2, bio_queue);
 1245                                 bp2->bio_error = error;
 1246                                 if (error != 0) {
 1247                                         bp2->bio_flags |= BIO_ERROR;
 1248                                         bp2->bio_resid = bp1->bio_bcount;
 1249                                 } else
 1250                                         bp2->bio_resid = 0;
 1251                                 if (bp1 != bp2)
 1252                                         cam_iosched_bio_complete(softc->cam_iosched, bp2, NULL);
 1253                                 biodone(bp2);
 1254                         }
 1255                 }
 1256                 return;
 1257         }
 1258         case NDA_CCB_DUMP:
 1259                 /* No-op.  We're polling */
 1260                 return;
 1261         case NDA_CCB_PASS:
 1262                 /* NVME_PASSTHROUGH_CMD runs this CCB and releases it */
 1263                 return;
 1264         default:
 1265                 break;
 1266         }
 1267         xpt_release_ccb(done_ccb);
 1268 }
 1269 
 1270 static int
 1271 ndaerror(union ccb *ccb, u_int32_t cam_flags, u_int32_t sense_flags)
 1272 {
 1273 #ifdef CAM_IO_STATS
 1274         struct nda_softc *softc;
 1275         struct cam_periph *periph;
 1276 
 1277         periph = xpt_path_periph(ccb->ccb_h.path);
 1278         softc = (struct nda_softc *)periph->softc;
 1279 #endif
 1280 
 1281         switch (ccb->ccb_h.status & CAM_STATUS_MASK) {
 1282         case CAM_CMD_TIMEOUT:
 1283 #ifdef CAM_IO_STATS
 1284                 softc->timeouts++;
 1285 #endif
 1286                 break;
 1287         case CAM_REQ_ABORTED:
 1288         case CAM_REQ_CMP_ERR:
 1289         case CAM_REQ_TERMIO:
 1290         case CAM_UNREC_HBA_ERROR:
 1291         case CAM_DATA_RUN_ERR:
 1292         case CAM_ATA_STATUS_ERROR:
 1293 #ifdef CAM_IO_STATS
 1294                 softc->errors++;
 1295 #endif
 1296                 break;
 1297         default:
 1298                 break;
 1299         }
 1300 
 1301         return(cam_periph_error(ccb, cam_flags, sense_flags));
 1302 }
 1303 
 1304 /*
 1305  * Step through all NDA peripheral drivers, and if the device is still open,
 1306  * sync the disk cache to physical media.
 1307  */
 1308 static void
 1309 ndaflush(void)
 1310 {
 1311         struct cam_periph *periph;
 1312         struct nda_softc *softc;
 1313         union ccb *ccb;
 1314         int error;
 1315 
 1316         CAM_PERIPH_FOREACH(periph, &ndadriver) {
 1317                 softc = (struct nda_softc *)periph->softc;
 1318 
 1319                 if (SCHEDULER_STOPPED()) {
 1320                         /*
 1321                          * If we panicked with the lock held or the periph is not
 1322                          * open, do not recurse.  Otherwise, call ndadump since
 1323                          * that avoids the sleeping cam_periph_getccb does if no
 1324                          * CCBs are available.
 1325                          */
 1326                         if (!cam_periph_owned(periph) &&
 1327                             (softc->flags & NDA_FLAG_OPEN)) {
 1328                                 ndadump(softc->disk, NULL, 0, 0);
 1329                         }
 1330                         continue;
 1331                 }
 1332 
 1333                 /*
 1334                  * We only sync the cache if the drive is still open
 1335                  */
 1336                 cam_periph_lock(periph);
 1337                 if ((softc->flags & NDA_FLAG_OPEN) == 0) {
 1338                         cam_periph_unlock(periph);
 1339                         continue;
 1340                 }
 1341 
 1342                 ccb = cam_periph_getccb(periph, CAM_PRIORITY_NORMAL);
 1343                 nda_nvme_flush(softc, &ccb->nvmeio);
 1344                 error = cam_periph_runccb(ccb, ndaerror, /*cam_flags*/0,
 1345                     /*sense_flags*/ SF_NO_RECOVERY | SF_NO_RETRY,
 1346                     softc->disk->d_devstat);
 1347                 if (error != 0)
 1348                         xpt_print(periph->path, "Synchronize cache failed\n");
 1349                 xpt_release_ccb(ccb);
 1350                 cam_periph_unlock(periph);
 1351         }
 1352 }
 1353 
 1354 static void
 1355 ndashutdown(void *arg, int howto)
 1356 {
 1357 
 1358         if ((howto & RB_NOSYNC) != 0)
 1359                 return;
 1360 
 1361         ndaflush();
 1362 }
 1363 
 1364 static void
 1365 ndasuspend(void *arg)
 1366 {
 1367 
 1368         ndaflush();
 1369 }

Cache object: c041034e8c7d41eac84b898a8dc24b10


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.